liblinear-ruby-swig 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/linear.cpp ADDED
@@ -0,0 +1,2096 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <stdarg.h>
6
+ #include "linear.h"
7
+ #include "tron.h"
8
+ typedef signed char schar;
9
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
10
+ #ifndef min
11
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
12
+ #endif
13
+ #ifndef max
14
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
15
+ #endif
16
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
17
+ {
18
+ dst = new T[n];
19
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
20
+ }
21
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
22
+ #define INF HUGE_VAL
23
+
24
+ static void print_string_stdout(const char *s)
25
+ {
26
+ fputs(s,stdout);
27
+ fflush(stdout);
28
+ }
29
+
30
+ void (*liblinear_print_string) (const char *) = &print_string_stdout;
31
+
32
+ #if 1
33
+ int info_on = 0;
34
+ static void info(const char *fmt,...)
35
+ {
36
+ char buf[BUFSIZ];
37
+ va_list ap;
38
+ if (info_on==1) {
39
+ va_start(ap,fmt);
40
+ vsprintf(buf,fmt,ap);
41
+ va_end(ap);
42
+ (*liblinear_print_string)(buf);
43
+ }
44
+ }
45
+ #else
46
+ static void info(const char *fmt,...) {}
47
+ #endif
48
+
49
+ class l2r_lr_fun : public function
50
+ {
51
+ public:
52
+ l2r_lr_fun(const problem *prob, double Cp, double Cn);
53
+ ~l2r_lr_fun();
54
+
55
+ double fun(double *w);
56
+ void grad(double *w, double *g);
57
+ void Hv(double *s, double *Hs);
58
+
59
+ int get_nr_variable(void);
60
+
61
+ private:
62
+ void Xv(double *v, double *Xv);
63
+ void XTv(double *v, double *XTv);
64
+
65
+ double *C;
66
+ double *z;
67
+ double *D;
68
+ const problem *prob;
69
+ };
70
+
71
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn)
72
+ {
73
+ int i;
74
+ int l=prob->l;
75
+ int *y=prob->y;
76
+
77
+ this->prob = prob;
78
+
79
+ z = new double[l];
80
+ D = new double[l];
81
+ C = new double[l];
82
+
83
+ for (i=0; i<l; i++)
84
+ {
85
+ if (y[i] == 1)
86
+ C[i] = Cp;
87
+ else
88
+ C[i] = Cn;
89
+ }
90
+ }
91
+
92
+ l2r_lr_fun::~l2r_lr_fun()
93
+ {
94
+ delete[] z;
95
+ delete[] D;
96
+ delete[] C;
97
+ }
98
+
99
+
100
+ double l2r_lr_fun::fun(double *w)
101
+ {
102
+ int i;
103
+ double f=0;
104
+ int *y=prob->y;
105
+ int l=prob->l;
106
+ int w_size=get_nr_variable();
107
+
108
+ Xv(w, z);
109
+ for(i=0;i<l;i++)
110
+ {
111
+ double yz = y[i]*z[i];
112
+ if (yz >= 0)
113
+ f += C[i]*log(1 + exp(-yz));
114
+ else
115
+ f += C[i]*(-yz+log(1 + exp(yz)));
116
+ }
117
+ f = 2*f;
118
+ for(i=0;i<w_size;i++)
119
+ f += w[i]*w[i];
120
+ f /= 2.0;
121
+
122
+ return(f);
123
+ }
124
+
125
+ void l2r_lr_fun::grad(double *w, double *g)
126
+ {
127
+ int i;
128
+ int *y=prob->y;
129
+ int l=prob->l;
130
+ int w_size=get_nr_variable();
131
+
132
+ for(i=0;i<l;i++)
133
+ {
134
+ z[i] = 1/(1 + exp(-y[i]*z[i]));
135
+ D[i] = z[i]*(1-z[i]);
136
+ z[i] = C[i]*(z[i]-1)*y[i];
137
+ }
138
+ XTv(z, g);
139
+
140
+ for(i=0;i<w_size;i++)
141
+ g[i] = w[i] + g[i];
142
+ }
143
+
144
+ int l2r_lr_fun::get_nr_variable(void)
145
+ {
146
+ return prob->n;
147
+ }
148
+
149
+ void l2r_lr_fun::Hv(double *s, double *Hs)
150
+ {
151
+ int i;
152
+ int l=prob->l;
153
+ int w_size=get_nr_variable();
154
+ double *wa = new double[l];
155
+
156
+ Xv(s, wa);
157
+ for(i=0;i<l;i++)
158
+ wa[i] = C[i]*D[i]*wa[i];
159
+
160
+ XTv(wa, Hs);
161
+ for(i=0;i<w_size;i++)
162
+ Hs[i] = s[i] + Hs[i];
163
+ delete[] wa;
164
+ }
165
+
166
+ void l2r_lr_fun::Xv(double *v, double *Xv)
167
+ {
168
+ int i;
169
+ int l=prob->l;
170
+ feature_node **x=prob->x;
171
+
172
+ for(i=0;i<l;i++)
173
+ {
174
+ feature_node *s=x[i];
175
+ Xv[i]=0;
176
+ while(s->index!=-1)
177
+ {
178
+ Xv[i]+=v[s->index-1]*s->value;
179
+ s++;
180
+ }
181
+ }
182
+ }
183
+
184
+ void l2r_lr_fun::XTv(double *v, double *XTv)
185
+ {
186
+ int i;
187
+ int l=prob->l;
188
+ int w_size=get_nr_variable();
189
+ feature_node **x=prob->x;
190
+
191
+ for(i=0;i<w_size;i++)
192
+ XTv[i]=0;
193
+ for(i=0;i<l;i++)
194
+ {
195
+ feature_node *s=x[i];
196
+ while(s->index!=-1)
197
+ {
198
+ XTv[s->index-1]+=v[i]*s->value;
199
+ s++;
200
+ }
201
+ }
202
+ }
203
+
204
+ class l2r_l2_svc_fun : public function
205
+ {
206
+ public:
207
+ l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);
208
+ ~l2r_l2_svc_fun();
209
+
210
+ double fun(double *w);
211
+ void grad(double *w, double *g);
212
+ void Hv(double *s, double *Hs);
213
+
214
+ int get_nr_variable(void);
215
+
216
+ private:
217
+ void Xv(double *v, double *Xv);
218
+ void subXv(double *v, double *Xv);
219
+ void subXTv(double *v, double *XTv);
220
+
221
+ double *C;
222
+ double *z;
223
+ double *D;
224
+ int *I;
225
+ int sizeI;
226
+ const problem *prob;
227
+ };
228
+
229
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn)
230
+ {
231
+ int i;
232
+ int l=prob->l;
233
+ int *y=prob->y;
234
+
235
+ this->prob = prob;
236
+
237
+ z = new double[l];
238
+ D = new double[l];
239
+ C = new double[l];
240
+ I = new int[l];
241
+
242
+ for (i=0; i<l; i++)
243
+ {
244
+ if (y[i] == 1)
245
+ C[i] = Cp;
246
+ else
247
+ C[i] = Cn;
248
+ }
249
+ }
250
+
251
+ l2r_l2_svc_fun::~l2r_l2_svc_fun()
252
+ {
253
+ delete[] z;
254
+ delete[] D;
255
+ delete[] C;
256
+ delete[] I;
257
+ }
258
+
259
+ double l2r_l2_svc_fun::fun(double *w)
260
+ {
261
+ int i;
262
+ double f=0;
263
+ int *y=prob->y;
264
+ int l=prob->l;
265
+ int w_size=get_nr_variable();
266
+
267
+ Xv(w, z);
268
+ for(i=0;i<l;i++)
269
+ {
270
+ z[i] = y[i]*z[i];
271
+ double d = 1-z[i];
272
+ if (d > 0)
273
+ f += C[i]*d*d;
274
+ }
275
+ f = 2*f;
276
+ for(i=0;i<w_size;i++)
277
+ f += w[i]*w[i];
278
+ f /= 2.0;
279
+
280
+ return(f);
281
+ }
282
+
283
+ void l2r_l2_svc_fun::grad(double *w, double *g)
284
+ {
285
+ int i;
286
+ int *y=prob->y;
287
+ int l=prob->l;
288
+ int w_size=get_nr_variable();
289
+
290
+ sizeI = 0;
291
+ for (i=0;i<l;i++)
292
+ if (z[i] < 1)
293
+ {
294
+ z[sizeI] = C[i]*y[i]*(z[i]-1);
295
+ I[sizeI] = i;
296
+ sizeI++;
297
+ }
298
+ subXTv(z, g);
299
+
300
+ for(i=0;i<w_size;i++)
301
+ g[i] = w[i] + 2*g[i];
302
+ }
303
+
304
+ int l2r_l2_svc_fun::get_nr_variable(void)
305
+ {
306
+ return prob->n;
307
+ }
308
+
309
+ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
310
+ {
311
+ int i;
312
+ int l=prob->l;
313
+ int w_size=get_nr_variable();
314
+ double *wa = new double[l];
315
+
316
+ subXv(s, wa);
317
+ for(i=0;i<sizeI;i++)
318
+ wa[i] = C[I[i]]*wa[i];
319
+
320
+ subXTv(wa, Hs);
321
+ for(i=0;i<w_size;i++)
322
+ Hs[i] = s[i] + 2*Hs[i];
323
+ delete[] wa;
324
+ }
325
+
326
+ void l2r_l2_svc_fun::Xv(double *v, double *Xv)
327
+ {
328
+ int i;
329
+ int l=prob->l;
330
+ feature_node **x=prob->x;
331
+
332
+ for(i=0;i<l;i++)
333
+ {
334
+ feature_node *s=x[i];
335
+ Xv[i]=0;
336
+ while(s->index!=-1)
337
+ {
338
+ Xv[i]+=v[s->index-1]*s->value;
339
+ s++;
340
+ }
341
+ }
342
+ }
343
+
344
+ void l2r_l2_svc_fun::subXv(double *v, double *Xv)
345
+ {
346
+ int i;
347
+ feature_node **x=prob->x;
348
+
349
+ for(i=0;i<sizeI;i++)
350
+ {
351
+ feature_node *s=x[I[i]];
352
+ Xv[i]=0;
353
+ while(s->index!=-1)
354
+ {
355
+ Xv[i]+=v[s->index-1]*s->value;
356
+ s++;
357
+ }
358
+ }
359
+ }
360
+
361
+ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
362
+ {
363
+ int i;
364
+ int w_size=get_nr_variable();
365
+ feature_node **x=prob->x;
366
+
367
+ for(i=0;i<w_size;i++)
368
+ XTv[i]=0;
369
+ for(i=0;i<sizeI;i++)
370
+ {
371
+ feature_node *s=x[I[i]];
372
+ while(s->index!=-1)
373
+ {
374
+ XTv[s->index-1]+=v[i]*s->value;
375
+ s++;
376
+ }
377
+ }
378
+ }
379
+
380
+ // A coordinate descent algorithm for
381
+ // multi-class support vector machines by Crammer and Singer
382
+ //
383
+ // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
384
+ // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
385
+ //
386
+ // where e^m_i = 0 if y_i = m,
387
+ // e^m_i = 1 if y_i != m,
388
+ // C^m_i = C if m = y_i,
389
+ // C^m_i = 0 if m != y_i,
390
+ // and w_m(\alpha) = \sum_i \alpha^m_i x_i
391
+ //
392
+ // Given:
393
+ // x, y, C
394
+ // eps is the stopping tolerance
395
+ //
396
+ // solution will be put in w
397
+
398
+ #define GETI(i) (prob->y[i])
399
+ // To support weights for instances, use GETI(i) (i)
400
+
401
+ class Solver_MCSVM_CS
402
+ {
403
+ public:
404
+ Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
405
+ ~Solver_MCSVM_CS();
406
+ void Solve(double *w);
407
+ private:
408
+ void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
409
+ bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
410
+ double *B, *C, *G;
411
+ int w_size, l;
412
+ int nr_class;
413
+ int max_iter;
414
+ double eps;
415
+ const problem *prob;
416
+ };
417
+
418
+ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
419
+ {
420
+ this->w_size = prob->n;
421
+ this->l = prob->l;
422
+ this->nr_class = nr_class;
423
+ this->eps = eps;
424
+ this->max_iter = max_iter;
425
+ this->prob = prob;
426
+ this->C = weighted_C;
427
+ this->B = new double[nr_class];
428
+ this->G = new double[nr_class];
429
+ }
430
+
431
+ Solver_MCSVM_CS::~Solver_MCSVM_CS()
432
+ {
433
+ delete[] B;
434
+ delete[] G;
435
+ }
436
+
437
+ int compare_double(const void *a, const void *b)
438
+ {
439
+ if(*(double *)a > *(double *)b)
440
+ return -1;
441
+ if(*(double *)a < *(double *)b)
442
+ return 1;
443
+ return 0;
444
+ }
445
+
446
+ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
447
+ {
448
+ int r;
449
+ double *D;
450
+
451
+ clone(D, B, active_i);
452
+ if(yi < active_i)
453
+ D[yi] += A_i*C_yi;
454
+ qsort(D, active_i, sizeof(double), compare_double);
455
+
456
+ double beta = D[0] - A_i*C_yi;
457
+ for(r=1;r<active_i && beta<r*D[r];r++)
458
+ beta += D[r];
459
+
460
+ beta /= r;
461
+ for(r=0;r<active_i;r++)
462
+ {
463
+ if(r == yi)
464
+ alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
465
+ else
466
+ alpha_new[r] = min((double)0, (beta - B[r])/A_i);
467
+ }
468
+ delete[] D;
469
+ }
470
+
471
+ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
472
+ {
473
+ double bound = 0;
474
+ if(m == yi)
475
+ bound = C[GETI(i)];
476
+ if(alpha_i == bound && G[m] < minG)
477
+ return true;
478
+ return false;
479
+ }
480
+
481
+ void Solver_MCSVM_CS::Solve(double *w)
482
+ {
483
+ int i, m, s;
484
+ int iter = 0;
485
+ double *alpha = new double[l*nr_class];
486
+ double *alpha_new = new double[nr_class];
487
+ int *index = new int[l];
488
+ double *QD = new double[l];
489
+ int *d_ind = new int[nr_class];
490
+ double *d_val = new double[nr_class];
491
+ int *alpha_index = new int[nr_class*l];
492
+ int *y_index = new int[l];
493
+ int active_size = l;
494
+ int *active_size_i = new int[l];
495
+ double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
496
+ bool start_from_all = true;
497
+ // initial
498
+ for(i=0;i<l*nr_class;i++)
499
+ alpha[i] = 0;
500
+ for(i=0;i<w_size*nr_class;i++)
501
+ w[i] = 0;
502
+ for(i=0;i<l;i++)
503
+ {
504
+ for(m=0;m<nr_class;m++)
505
+ alpha_index[i*nr_class+m] = m;
506
+ feature_node *xi = prob->x[i];
507
+ QD[i] = 0;
508
+ while(xi->index != -1)
509
+ {
510
+ QD[i] += (xi->value)*(xi->value);
511
+ xi++;
512
+ }
513
+ active_size_i[i] = nr_class;
514
+ y_index[i] = prob->y[i];
515
+ index[i] = i;
516
+ }
517
+
518
+ while(iter < max_iter)
519
+ {
520
+ double stopping = -INF;
521
+ for(i=0;i<active_size;i++)
522
+ {
523
+ int j = i+rand()%(active_size-i);
524
+ swap(index[i], index[j]);
525
+ }
526
+ for(s=0;s<active_size;s++)
527
+ {
528
+ i = index[s];
529
+ double Ai = QD[i];
530
+ double *alpha_i = &alpha[i*nr_class];
531
+ int *alpha_index_i = &alpha_index[i*nr_class];
532
+
533
+ if(Ai > 0)
534
+ {
535
+ for(m=0;m<active_size_i[i];m++)
536
+ G[m] = 1;
537
+ if(y_index[i] < active_size_i[i])
538
+ G[y_index[i]] = 0;
539
+
540
+ feature_node *xi = prob->x[i];
541
+ while(xi->index!= -1)
542
+ {
543
+ double *w_i = &w[(xi->index-1)*nr_class];
544
+ for(m=0;m<active_size_i[i];m++)
545
+ G[m] += w_i[alpha_index_i[m]]*(xi->value);
546
+ xi++;
547
+ }
548
+
549
+ double minG = INF;
550
+ double maxG = -INF;
551
+ for(m=0;m<active_size_i[i];m++)
552
+ {
553
+ if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
554
+ minG = G[m];
555
+ if(G[m] > maxG)
556
+ maxG = G[m];
557
+ }
558
+ if(y_index[i] < active_size_i[i])
559
+ if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
560
+ minG = G[y_index[i]];
561
+
562
+ for(m=0;m<active_size_i[i];m++)
563
+ {
564
+ if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
565
+ {
566
+ active_size_i[i]--;
567
+ while(active_size_i[i]>m)
568
+ {
569
+ if(!be_shrunk(i, active_size_i[i], y_index[i],
570
+ alpha_i[alpha_index_i[active_size_i[i]]], minG))
571
+ {
572
+ swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
573
+ swap(G[m], G[active_size_i[i]]);
574
+ if(y_index[i] == active_size_i[i])
575
+ y_index[i] = m;
576
+ else if(y_index[i] == m)
577
+ y_index[i] = active_size_i[i];
578
+ break;
579
+ }
580
+ active_size_i[i]--;
581
+ }
582
+ }
583
+ }
584
+
585
+ if(active_size_i[i] <= 1)
586
+ {
587
+ active_size--;
588
+ swap(index[s], index[active_size]);
589
+ s--;
590
+ continue;
591
+ }
592
+
593
+ if(maxG-minG <= 1e-12)
594
+ continue;
595
+ else
596
+ stopping = max(maxG - minG, stopping);
597
+
598
+ for(m=0;m<active_size_i[i];m++)
599
+ B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
600
+
601
+ solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
602
+ int nz_d = 0;
603
+ for(m=0;m<active_size_i[i];m++)
604
+ {
605
+ double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
606
+ alpha_i[alpha_index_i[m]] = alpha_new[m];
607
+ if(fabs(d) >= 1e-12)
608
+ {
609
+ d_ind[nz_d] = alpha_index_i[m];
610
+ d_val[nz_d] = d;
611
+ nz_d++;
612
+ }
613
+ }
614
+
615
+ xi = prob->x[i];
616
+ while(xi->index != -1)
617
+ {
618
+ double *w_i = &w[(xi->index-1)*nr_class];
619
+ for(m=0;m<nz_d;m++)
620
+ w_i[d_ind[m]] += d_val[m]*xi->value;
621
+ xi++;
622
+ }
623
+ }
624
+ }
625
+
626
+ iter++;
627
+ if(iter % 10 == 0)
628
+ {
629
+ info(".");
630
+ }
631
+
632
+ if(stopping < eps_shrink)
633
+ {
634
+ if(stopping < eps && start_from_all == true)
635
+ break;
636
+ else
637
+ {
638
+ active_size = l;
639
+ for(i=0;i<l;i++)
640
+ active_size_i[i] = nr_class;
641
+ info("*");
642
+ eps_shrink = max(eps_shrink/2, eps);
643
+ start_from_all = true;
644
+ }
645
+ }
646
+ else
647
+ start_from_all = false;
648
+ }
649
+
650
+ info("\noptimization finished, #iter = %d\n",iter);
651
+ if (iter >= max_iter)
652
+ info("Warning: reaching max number of iterations\n");
653
+
654
+ // calculate objective value
655
+ double v = 0;
656
+ int nSV = 0;
657
+ for(i=0;i<w_size*nr_class;i++)
658
+ v += w[i]*w[i];
659
+ v = 0.5*v;
660
+ for(i=0;i<l*nr_class;i++)
661
+ {
662
+ v += alpha[i];
663
+ if(fabs(alpha[i]) > 0)
664
+ nSV++;
665
+ }
666
+ for(i=0;i<l;i++)
667
+ v -= alpha[i*nr_class+prob->y[i]];
668
+ info("Objective value = %lf\n",v);
669
+ info("nSV = %d\n",nSV);
670
+
671
+ delete [] alpha;
672
+ delete [] alpha_new;
673
+ delete [] index;
674
+ delete [] QD;
675
+ delete [] d_ind;
676
+ delete [] d_val;
677
+ delete [] alpha_index;
678
+ delete [] y_index;
679
+ delete [] active_size_i;
680
+ }
681
+
682
+ // A coordinate descent algorithm for
683
+ // L1-loss and L2-loss SVM dual problems
684
+ //
685
+ // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
686
+ // s.t. 0 <= alpha_i <= upper_bound_i,
687
+ //
688
+ // where Qij = yi yj xi^T xj and
689
+ // D is a diagonal matrix
690
+ //
691
+ // In L1-SVM case:
692
+ // upper_bound_i = Cp if y_i = 1
693
+ // upper_bound_i = Cn if y_i = -1
694
+ // D_ii = 0
695
+ // In L2-SVM case:
696
+ // upper_bound_i = INF
697
+ // D_ii = 1/(2*Cp) if y_i = 1
698
+ // D_ii = 1/(2*Cn) if y_i = -1
699
+ //
700
+ // Given:
701
+ // x, y, Cp, Cn
702
+ // eps is the stopping tolerance
703
+ //
704
+ // solution will be put in w
705
+
706
+ #undef GETI
707
+ #define GETI(i) (y[i]+1)
708
+ // To support weights for instances, use GETI(i) (i)
709
+
710
+ static void solve_l2r_l1l2_svc(
711
+ const problem *prob, double *w, double eps,
712
+ double Cp, double Cn, int solver_type)
713
+ {
714
+ int l = prob->l;
715
+ int w_size = prob->n;
716
+ int i, s, iter = 0;
717
+ double C, d, G;
718
+ double *QD = new double[l];
719
+ int max_iter = 1000;
720
+ int *index = new int[l];
721
+ double *alpha = new double[l];
722
+ schar *y = new schar[l];
723
+ int active_size = l;
724
+
725
+ // PG: projected gradient, for shrinking and stopping
726
+ double PG;
727
+ double PGmax_old = INF;
728
+ double PGmin_old = -INF;
729
+ double PGmax_new, PGmin_new;
730
+
731
+ // default solver_type: L2R_L2LOSS_SVC_DUAL
732
+ double diag[3] = {0.5/Cn, 0, 0.5/Cp};
733
+ double upper_bound[3] = {INF, 0, INF};
734
+ if(solver_type == L2R_L1LOSS_SVC_DUAL)
735
+ {
736
+ diag[0] = 0;
737
+ diag[2] = 0;
738
+ upper_bound[0] = Cn;
739
+ upper_bound[2] = Cp;
740
+ }
741
+
742
+ for(i=0; i<w_size; i++)
743
+ w[i] = 0;
744
+ for(i=0; i<l; i++)
745
+ {
746
+ alpha[i] = 0;
747
+ if(prob->y[i] > 0)
748
+ {
749
+ y[i] = +1;
750
+ }
751
+ else
752
+ {
753
+ y[i] = -1;
754
+ }
755
+ QD[i] = diag[GETI(i)];
756
+
757
+ feature_node *xi = prob->x[i];
758
+ while (xi->index != -1)
759
+ {
760
+ QD[i] += (xi->value)*(xi->value);
761
+ xi++;
762
+ }
763
+ index[i] = i;
764
+ }
765
+
766
+ while (iter < max_iter)
767
+ {
768
+ PGmax_new = -INF;
769
+ PGmin_new = INF;
770
+
771
+ for (i=0; i<active_size; i++)
772
+ {
773
+ int j = i+rand()%(active_size-i);
774
+ swap(index[i], index[j]);
775
+ }
776
+
777
+ for (s=0;s<active_size;s++)
778
+ {
779
+ i = index[s];
780
+ G = 0;
781
+ schar yi = y[i];
782
+
783
+ feature_node *xi = prob->x[i];
784
+ while(xi->index!= -1)
785
+ {
786
+ G += w[xi->index-1]*(xi->value);
787
+ xi++;
788
+ }
789
+ G = G*yi-1;
790
+
791
+ C = upper_bound[GETI(i)];
792
+ G += alpha[i]*diag[GETI(i)];
793
+
794
+ PG = 0;
795
+ if (alpha[i] == 0)
796
+ {
797
+ if (G > PGmax_old)
798
+ {
799
+ active_size--;
800
+ swap(index[s], index[active_size]);
801
+ s--;
802
+ continue;
803
+ }
804
+ else if (G < 0)
805
+ PG = G;
806
+ }
807
+ else if (alpha[i] == C)
808
+ {
809
+ if (G < PGmin_old)
810
+ {
811
+ active_size--;
812
+ swap(index[s], index[active_size]);
813
+ s--;
814
+ continue;
815
+ }
816
+ else if (G > 0)
817
+ PG = G;
818
+ }
819
+ else
820
+ PG = G;
821
+
822
+ PGmax_new = max(PGmax_new, PG);
823
+ PGmin_new = min(PGmin_new, PG);
824
+
825
+ if(fabs(PG) > 1.0e-12)
826
+ {
827
+ double alpha_old = alpha[i];
828
+ alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
829
+ d = (alpha[i] - alpha_old)*yi;
830
+ xi = prob->x[i];
831
+ while (xi->index != -1)
832
+ {
833
+ w[xi->index-1] += d*xi->value;
834
+ xi++;
835
+ }
836
+ }
837
+ }
838
+
839
+ iter++;
840
+ if(iter % 10 == 0)
841
+ info(".");
842
+
843
+ if(PGmax_new - PGmin_new <= eps)
844
+ {
845
+ if(active_size == l)
846
+ break;
847
+ else
848
+ {
849
+ active_size = l;
850
+ info("*");
851
+ PGmax_old = INF;
852
+ PGmin_old = -INF;
853
+ continue;
854
+ }
855
+ }
856
+ PGmax_old = PGmax_new;
857
+ PGmin_old = PGmin_new;
858
+ if (PGmax_old <= 0)
859
+ PGmax_old = INF;
860
+ if (PGmin_old >= 0)
861
+ PGmin_old = -INF;
862
+ }
863
+
864
+ info("\noptimization finished, #iter = %d\n",iter);
865
+ if (iter >= max_iter)
866
+ info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
867
+
868
+ // calculate objective value
869
+
870
+ double v = 0;
871
+ int nSV = 0;
872
+ for(i=0; i<w_size; i++)
873
+ v += w[i]*w[i];
874
+ for(i=0; i<l; i++)
875
+ {
876
+ v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
877
+ if(alpha[i] > 0)
878
+ ++nSV;
879
+ }
880
+ info("Objective value = %lf\n",v/2);
881
+ info("nSV = %d\n",nSV);
882
+
883
+ delete [] QD;
884
+ delete [] alpha;
885
+ delete [] y;
886
+ delete [] index;
887
+ }
888
+
889
+ // A coordinate descent algorithm for
890
+ // L1-regularized L2-loss support vector classification
891
+ //
892
+ // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
893
+ //
894
+ // Given:
895
+ // x, y, Cp, Cn
896
+ // eps is the stopping tolerance
897
+ //
898
+ // solution will be put in w
899
+
900
+ #undef GETI
901
+ #define GETI(i) (y[i]+1)
902
+ // To support weights for instances, use GETI(i) (i)
903
+
904
+ static void solve_l1r_l2_svc(
905
+ problem *prob_col, double *w, double eps,
906
+ double Cp, double Cn)
907
+ {
908
+ int l = prob_col->l;
909
+ int w_size = prob_col->n;
910
+ int j, s, iter = 0;
911
+ int max_iter = 1000;
912
+ int active_size = w_size;
913
+ int max_num_linesearch = 20;
914
+
915
+ double sigma = 0.01;
916
+ double d, G_loss, G, H;
917
+ double Gmax_old = INF;
918
+ double Gmax_new;
919
+ double Gmax_init;
920
+ double d_old, d_diff;
921
+ double loss_old, loss_new;
922
+ double appxcond, cond;
923
+
924
+ int *index = new int[w_size];
925
+ schar *y = new schar[l];
926
+ double *b = new double[l]; // b = 1-ywTx
927
+ double *xj_sq = new double[w_size];
928
+ feature_node *x;
929
+
930
+ double C[3] = {Cn,0,Cp};
931
+
932
+ for(j=0; j<l; j++)
933
+ {
934
+ b[j] = 1;
935
+ if(prob_col->y[j] > 0)
936
+ y[j] = 1;
937
+ else
938
+ y[j] = -1;
939
+ }
940
+ for(j=0; j<w_size; j++)
941
+ {
942
+ w[j] = 0;
943
+ index[j] = j;
944
+ xj_sq[j] = 0;
945
+ x = prob_col->x[j];
946
+ while(x->index != -1)
947
+ {
948
+ int ind = x->index-1;
949
+ double val = x->value;
950
+ x->value *= y[ind]; // x->value stores yi*xij
951
+ xj_sq[j] += C[GETI(ind)]*val*val;
952
+ x++;
953
+ }
954
+ }
955
+
956
+ while(iter < max_iter)
957
+ {
958
+ Gmax_new = 0;
959
+
960
+ for(j=0; j<active_size; j++)
961
+ {
962
+ int i = j+rand()%(active_size-j);
963
+ swap(index[i], index[j]);
964
+ }
965
+
966
+ for(s=0; s<active_size; s++)
967
+ {
968
+ j = index[s];
969
+ G_loss = 0;
970
+ H = 0;
971
+
972
+ x = prob_col->x[j];
973
+ while(x->index != -1)
974
+ {
975
+ int ind = x->index-1;
976
+ if(b[ind] > 0)
977
+ {
978
+ double val = x->value;
979
+ double tmp = C[GETI(ind)]*val;
980
+ G_loss -= tmp*b[ind];
981
+ H += tmp*val;
982
+ }
983
+ x++;
984
+ }
985
+ G_loss *= 2;
986
+
987
+ G = G_loss;
988
+ H *= 2;
989
+ H = max(H, 1e-12);
990
+
991
+ double Gp = G+1;
992
+ double Gn = G-1;
993
+ double violation = 0;
994
+ if(w[j] == 0)
995
+ {
996
+ if(Gp < 0)
997
+ violation = -Gp;
998
+ else if(Gn > 0)
999
+ violation = Gn;
1000
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1001
+ {
1002
+ active_size--;
1003
+ swap(index[s], index[active_size]);
1004
+ s--;
1005
+ continue;
1006
+ }
1007
+ }
1008
+ else if(w[j] > 0)
1009
+ violation = fabs(Gp);
1010
+ else
1011
+ violation = fabs(Gn);
1012
+
1013
+ Gmax_new = max(Gmax_new, violation);
1014
+
1015
+ // obtain Newton direction d
1016
+ if(Gp <= H*w[j])
1017
+ d = -Gp/H;
1018
+ else if(Gn >= H*w[j])
1019
+ d = -Gn/H;
1020
+ else
1021
+ d = -w[j];
1022
+
1023
+ if(fabs(d) < 1.0e-12)
1024
+ continue;
1025
+
1026
+ double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1027
+ d_old = 0;
1028
+ int num_linesearch;
1029
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1030
+ {
1031
+ d_diff = d_old - d;
1032
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1033
+
1034
+ appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1035
+ if(appxcond <= 0)
1036
+ {
1037
+ x = prob_col->x[j];
1038
+ while(x->index != -1)
1039
+ {
1040
+ b[x->index-1] += d_diff*x->value;
1041
+ x++;
1042
+ }
1043
+ break;
1044
+ }
1045
+
1046
+ if(num_linesearch == 0)
1047
+ {
1048
+ loss_old = 0;
1049
+ loss_new = 0;
1050
+ x = prob_col->x[j];
1051
+ while(x->index != -1)
1052
+ {
1053
+ int ind = x->index-1;
1054
+ if(b[ind] > 0)
1055
+ loss_old += C[GETI(ind)]*b[ind]*b[ind];
1056
+ double b_new = b[ind] + d_diff*x->value;
1057
+ b[ind] = b_new;
1058
+ if(b_new > 0)
1059
+ loss_new += C[GETI(ind)]*b_new*b_new;
1060
+ x++;
1061
+ }
1062
+ }
1063
+ else
1064
+ {
1065
+ loss_new = 0;
1066
+ x = prob_col->x[j];
1067
+ while(x->index != -1)
1068
+ {
1069
+ int ind = x->index-1;
1070
+ double b_new = b[ind] + d_diff*x->value;
1071
+ b[ind] = b_new;
1072
+ if(b_new > 0)
1073
+ loss_new += C[GETI(ind)]*b_new*b_new;
1074
+ x++;
1075
+ }
1076
+ }
1077
+
1078
+ cond = cond + loss_new - loss_old;
1079
+ if(cond <= 0)
1080
+ break;
1081
+ else
1082
+ {
1083
+ d_old = d;
1084
+ d *= 0.5;
1085
+ delta *= 0.5;
1086
+ }
1087
+ }
1088
+
1089
+ w[j] += d;
1090
+
1091
+ // recompute b[] if line search takes too many steps
1092
+ if(num_linesearch >= max_num_linesearch)
1093
+ {
1094
+ info("#");
1095
+ for(int i=0; i<l; i++)
1096
+ b[i] = 1;
1097
+
1098
+ for(int i=0; i<w_size; i++)
1099
+ {
1100
+ if(w[i]==0) continue;
1101
+ x = prob_col->x[i];
1102
+ while(x->index != -1)
1103
+ {
1104
+ b[x->index-1] -= w[i]*x->value;
1105
+ x++;
1106
+ }
1107
+ }
1108
+ }
1109
+ }
1110
+
1111
+ if(iter == 0)
1112
+ Gmax_init = Gmax_new;
1113
+ iter++;
1114
+ if(iter % 10 == 0)
1115
+ info(".");
1116
+
1117
+ if(Gmax_new <= eps*Gmax_init)
1118
+ {
1119
+ if(active_size == w_size)
1120
+ break;
1121
+ else
1122
+ {
1123
+ active_size = w_size;
1124
+ info("*");
1125
+ Gmax_old = INF;
1126
+ continue;
1127
+ }
1128
+ }
1129
+
1130
+ Gmax_old = Gmax_new;
1131
+ }
1132
+
1133
+ info("\noptimization finished, #iter = %d\n", iter);
1134
+ if(iter >= max_iter)
1135
+ info("\nWARNING: reaching max number of iterations\n");
1136
+
1137
+ // calculate objective value
1138
+
1139
+ double v = 0;
1140
+ int nnz = 0;
1141
+ for(j=0; j<w_size; j++)
1142
+ {
1143
+ x = prob_col->x[j];
1144
+ while(x->index != -1)
1145
+ {
1146
+ x->value *= prob_col->y[x->index-1]; // restore x->value
1147
+ x++;
1148
+ }
1149
+ if(w[j] != 0)
1150
+ {
1151
+ v += fabs(w[j]);
1152
+ nnz++;
1153
+ }
1154
+ }
1155
+ for(j=0; j<l; j++)
1156
+ if(b[j] > 0)
1157
+ v += C[GETI(j)]*b[j]*b[j];
1158
+
1159
+ info("Objective value = %lf\n", v);
1160
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1161
+
1162
+ delete [] index;
1163
+ delete [] y;
1164
+ delete [] b;
1165
+ delete [] xj_sq;
1166
+ }
1167
+
1168
+ // A coordinate descent algorithm for
1169
+ // L1-regularized logistic regression problems
1170
+ //
1171
+ // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1172
+ //
1173
+ // Given:
1174
+ // x, y, Cp, Cn
1175
+ // eps is the stopping tolerance
1176
+ //
1177
+ // solution will be put in w
1178
+
1179
+ #undef GETI
1180
+ #define GETI(i) (y[i]+1)
1181
+ // To support weights for instances, use GETI(i) (i)
1182
+
1183
+ static void solve_l1r_lr(
1184
+ const problem *prob_col, double *w, double eps,
1185
+ double Cp, double Cn)
1186
+ {
1187
+ int l = prob_col->l;
1188
+ int w_size = prob_col->n;
1189
+ int j, s, iter = 0;
1190
+ int max_iter = 1000;
1191
+ int active_size = w_size;
1192
+ int max_num_linesearch = 20;
1193
+
1194
+ double x_min = 0;
1195
+ double sigma = 0.01;
1196
+ double d, G, H;
1197
+ double Gmax_old = INF;
1198
+ double Gmax_new;
1199
+ double Gmax_init;
1200
+ double sum1, appxcond1;
1201
+ double sum2, appxcond2;
1202
+ double cond;
1203
+
1204
+ int *index = new int[w_size];
1205
+ schar *y = new schar[l];
1206
+ double *exp_wTx = new double[l];
1207
+ double *exp_wTx_new = new double[l];
1208
+ double *xj_max = new double[w_size];
1209
+ double *C_sum = new double[w_size];
1210
+ double *xjneg_sum = new double[w_size];
1211
+ double *xjpos_sum = new double[w_size];
1212
+ feature_node *x;
1213
+
1214
+ double C[3] = {Cn,0,Cp};
1215
+
1216
+ for(j=0; j<l; j++)
1217
+ {
1218
+ exp_wTx[j] = 1;
1219
+ if(prob_col->y[j] > 0)
1220
+ y[j] = 1;
1221
+ else
1222
+ y[j] = -1;
1223
+ }
1224
+ for(j=0; j<w_size; j++)
1225
+ {
1226
+ w[j] = 0;
1227
+ index[j] = j;
1228
+ xj_max[j] = 0;
1229
+ C_sum[j] = 0;
1230
+ xjneg_sum[j] = 0;
1231
+ xjpos_sum[j] = 0;
1232
+ x = prob_col->x[j];
1233
+ while(x->index != -1)
1234
+ {
1235
+ int ind = x->index-1;
1236
+ double val = x->value;
1237
+ x_min = min(x_min, val);
1238
+ xj_max[j] = max(xj_max[j], val);
1239
+ C_sum[j] += C[GETI(ind)];
1240
+ if(y[ind] == -1)
1241
+ xjneg_sum[j] += C[GETI(ind)]*val;
1242
+ else
1243
+ xjpos_sum[j] += C[GETI(ind)]*val;
1244
+ x++;
1245
+ }
1246
+ }
1247
+
1248
+ while(iter < max_iter)
1249
+ {
1250
+ Gmax_new = 0;
1251
+
1252
+ for(j=0; j<active_size; j++)
1253
+ {
1254
+ int i = j+rand()%(active_size-j);
1255
+ swap(index[i], index[j]);
1256
+ }
1257
+
1258
+ for(s=0; s<active_size; s++)
1259
+ {
1260
+ j = index[s];
1261
+ sum1 = 0;
1262
+ sum2 = 0;
1263
+ H = 0;
1264
+
1265
+ x = prob_col->x[j];
1266
+ while(x->index != -1)
1267
+ {
1268
+ int ind = x->index-1;
1269
+ double exp_wTxind = exp_wTx[ind];
1270
+ double tmp1 = x->value/(1+exp_wTxind);
1271
+ double tmp2 = C[GETI(ind)]*tmp1;
1272
+ double tmp3 = tmp2*exp_wTxind;
1273
+ sum2 += tmp2;
1274
+ sum1 += tmp3;
1275
+ H += tmp1*tmp3;
1276
+ x++;
1277
+ }
1278
+
1279
+ G = -sum2 + xjneg_sum[j];
1280
+
1281
+ double Gp = G+1;
1282
+ double Gn = G-1;
1283
+ double violation = 0;
1284
+ if(w[j] == 0)
1285
+ {
1286
+ if(Gp < 0)
1287
+ violation = -Gp;
1288
+ else if(Gn > 0)
1289
+ violation = Gn;
1290
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1291
+ {
1292
+ active_size--;
1293
+ swap(index[s], index[active_size]);
1294
+ s--;
1295
+ continue;
1296
+ }
1297
+ }
1298
+ else if(w[j] > 0)
1299
+ violation = fabs(Gp);
1300
+ else
1301
+ violation = fabs(Gn);
1302
+
1303
+ Gmax_new = max(Gmax_new, violation);
1304
+
1305
+ // obtain Newton direction d
1306
+ if(Gp <= H*w[j])
1307
+ d = -Gp/H;
1308
+ else if(Gn >= H*w[j])
1309
+ d = -Gn/H;
1310
+ else
1311
+ d = -w[j];
1312
+
1313
+ if(fabs(d) < 1.0e-12)
1314
+ continue;
1315
+
1316
+ d = min(max(d,-10.0),10.0);
1317
+
1318
+ double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1319
+ int num_linesearch;
1320
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1321
+ {
1322
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1323
+
1324
+ if(x_min >= 0)
1325
+ {
1326
+ double tmp = exp(d*xj_max[j]);
1327
+ appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
1328
+ appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
1329
+ if(min(appxcond1,appxcond2) <= 0)
1330
+ {
1331
+ x = prob_col->x[j];
1332
+ while(x->index != -1)
1333
+ {
1334
+ exp_wTx[x->index-1] *= exp(d*x->value);
1335
+ x++;
1336
+ }
1337
+ break;
1338
+ }
1339
+ }
1340
+
1341
+ cond += d*xjneg_sum[j];
1342
+
1343
+ int i = 0;
1344
+ x = prob_col->x[j];
1345
+ while(x->index != -1)
1346
+ {
1347
+ int ind = x->index-1;
1348
+ double exp_dx = exp(d*x->value);
1349
+ exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1350
+ cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1351
+ x++; i++;
1352
+ }
1353
+
1354
+ if(cond <= 0)
1355
+ {
1356
+ int i = 0;
1357
+ x = prob_col->x[j];
1358
+ while(x->index != -1)
1359
+ {
1360
+ int ind = x->index-1;
1361
+ exp_wTx[ind] = exp_wTx_new[i];
1362
+ x++; i++;
1363
+ }
1364
+ break;
1365
+ }
1366
+ else
1367
+ {
1368
+ d *= 0.5;
1369
+ delta *= 0.5;
1370
+ }
1371
+ }
1372
+
1373
+ w[j] += d;
1374
+
1375
+ // recompute exp_wTx[] if line search takes too many steps
1376
+ if(num_linesearch >= max_num_linesearch)
1377
+ {
1378
+ info("#");
1379
+ for(int i=0; i<l; i++)
1380
+ exp_wTx[i] = 0;
1381
+
1382
+ for(int i=0; i<w_size; i++)
1383
+ {
1384
+ if(w[i]==0) continue;
1385
+ x = prob_col->x[i];
1386
+ while(x->index != -1)
1387
+ {
1388
+ exp_wTx[x->index-1] += w[i]*x->value;
1389
+ x++;
1390
+ }
1391
+ }
1392
+
1393
+ for(int i=0; i<l; i++)
1394
+ exp_wTx[i] = exp(exp_wTx[i]);
1395
+ }
1396
+ }
1397
+
1398
+ if(iter == 0)
1399
+ Gmax_init = Gmax_new;
1400
+ iter++;
1401
+ if(iter % 10 == 0)
1402
+ info(".");
1403
+
1404
+ if(Gmax_new <= eps*Gmax_init)
1405
+ {
1406
+ if(active_size == w_size)
1407
+ break;
1408
+ else
1409
+ {
1410
+ active_size = w_size;
1411
+ info("*");
1412
+ Gmax_old = INF;
1413
+ continue;
1414
+ }
1415
+ }
1416
+
1417
+ Gmax_old = Gmax_new;
1418
+ }
1419
+
1420
+ info("\noptimization finished, #iter = %d\n", iter);
1421
+ if(iter >= max_iter)
1422
+ info("\nWARNING: reaching max number of iterations\n");
1423
+
1424
+ // calculate objective value
1425
+
1426
+ double v = 0;
1427
+ int nnz = 0;
1428
+ for(j=0; j<w_size; j++)
1429
+ if(w[j] != 0)
1430
+ {
1431
+ v += fabs(w[j]);
1432
+ nnz++;
1433
+ }
1434
+ for(j=0; j<l; j++)
1435
+ if(y[j] == 1)
1436
+ v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1437
+ else
1438
+ v += C[GETI(j)]*log(1+exp_wTx[j]);
1439
+
1440
+ info("Objective value = %lf\n", v);
1441
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1442
+
1443
+ delete [] index;
1444
+ delete [] y;
1445
+ delete [] exp_wTx;
1446
+ delete [] exp_wTx_new;
1447
+ delete [] xj_max;
1448
+ delete [] C_sum;
1449
+ delete [] xjneg_sum;
1450
+ delete [] xjpos_sum;
1451
+ }
1452
+
1453
+ // transpose matrix X from row format to column format
1454
+ static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
1455
+ {
1456
+ int i;
1457
+ int l = prob->l;
1458
+ int n = prob->n;
1459
+ int nnz = 0;
1460
+ int *col_ptr = new int[n+1];
1461
+ feature_node *x_space;
1462
+ prob_col->l = l;
1463
+ prob_col->n = n;
1464
+ prob_col->y = new int[l];
1465
+ prob_col->x = new feature_node*[n];
1466
+
1467
+ for(i=0; i<l; i++)
1468
+ prob_col->y[i] = prob->y[i];
1469
+
1470
+ for(i=0; i<n+1; i++)
1471
+ col_ptr[i] = 0;
1472
+ for(i=0; i<l; i++)
1473
+ {
1474
+ feature_node *x = prob->x[i];
1475
+ while(x->index != -1)
1476
+ {
1477
+ nnz++;
1478
+ col_ptr[x->index]++;
1479
+ x++;
1480
+ }
1481
+ }
1482
+ for(i=1; i<n+1; i++)
1483
+ col_ptr[i] += col_ptr[i-1] + 1;
1484
+
1485
+ x_space = new feature_node[nnz+n];
1486
+ for(i=0; i<n; i++)
1487
+ prob_col->x[i] = &x_space[col_ptr[i]];
1488
+
1489
+ for(i=0; i<l; i++)
1490
+ {
1491
+ feature_node *x = prob->x[i];
1492
+ while(x->index != -1)
1493
+ {
1494
+ int ind = x->index-1;
1495
+ x_space[col_ptr[ind]].index = i+1; // starts from 1
1496
+ x_space[col_ptr[ind]].value = x->value;
1497
+ col_ptr[ind]++;
1498
+ x++;
1499
+ }
1500
+ }
1501
+ for(i=0; i<n; i++)
1502
+ x_space[col_ptr[i]].index = -1;
1503
+
1504
+ *x_space_ret = x_space;
1505
+
1506
+ delete [] col_ptr;
1507
+ }
1508
+
1509
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
1510
+ // perm, length l, must be allocated before calling this subroutine
1511
+ static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
1512
+ {
1513
+ int l = prob->l;
1514
+ int max_nr_class = 16;
1515
+ int nr_class = 0;
1516
+ int *label = Malloc(int,max_nr_class);
1517
+ int *count = Malloc(int,max_nr_class);
1518
+ int *data_label = Malloc(int,l);
1519
+ int i;
1520
+
1521
+ for(i=0;i<l;i++)
1522
+ {
1523
+ int this_label = prob->y[i];
1524
+ int j;
1525
+ for(j=0;j<nr_class;j++)
1526
+ {
1527
+ if(this_label == label[j])
1528
+ {
1529
+ ++count[j];
1530
+ break;
1531
+ }
1532
+ }
1533
+ data_label[i] = j;
1534
+ if(j == nr_class)
1535
+ {
1536
+ if(nr_class == max_nr_class)
1537
+ {
1538
+ max_nr_class *= 2;
1539
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
1540
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
1541
+ }
1542
+ label[nr_class] = this_label;
1543
+ count[nr_class] = 1;
1544
+ ++nr_class;
1545
+ }
1546
+ }
1547
+
1548
+ int *start = Malloc(int,nr_class);
1549
+ start[0] = 0;
1550
+ for(i=1;i<nr_class;i++)
1551
+ start[i] = start[i-1]+count[i-1];
1552
+ for(i=0;i<l;i++)
1553
+ {
1554
+ perm[start[data_label[i]]] = i;
1555
+ ++start[data_label[i]];
1556
+ }
1557
+ start[0] = 0;
1558
+ for(i=1;i<nr_class;i++)
1559
+ start[i] = start[i-1]+count[i-1];
1560
+
1561
+ *nr_class_ret = nr_class;
1562
+ *label_ret = label;
1563
+ *start_ret = start;
1564
+ *count_ret = count;
1565
+ free(data_label);
1566
+ }
1567
+
1568
+ static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
1569
+ {
1570
+ double eps=param->eps;
1571
+ int pos = 0;
1572
+ int neg = 0;
1573
+ for(int i=0;i<prob->l;i++)
1574
+ if(prob->y[i]==+1)
1575
+ pos++;
1576
+ neg = prob->l - pos;
1577
+
1578
+ function *fun_obj=NULL;
1579
+ switch(param->solver_type)
1580
+ {
1581
+ case L2R_LR:
1582
+ {
1583
+ fun_obj=new l2r_lr_fun(prob, Cp, Cn);
1584
+ TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1585
+ tron_obj.set_print_string(liblinear_print_string);
1586
+ tron_obj.tron(w);
1587
+ delete fun_obj;
1588
+ break;
1589
+ }
1590
+ case L2R_L2LOSS_SVC:
1591
+ {
1592
+ fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn);
1593
+ TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1594
+ tron_obj.set_print_string(liblinear_print_string);
1595
+ tron_obj.tron(w);
1596
+ delete fun_obj;
1597
+ break;
1598
+ }
1599
+ case L2R_L2LOSS_SVC_DUAL:
1600
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
1601
+ break;
1602
+ case L2R_L1LOSS_SVC_DUAL:
1603
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
1604
+ break;
1605
+ case L1R_L2LOSS_SVC:
1606
+ {
1607
+ problem prob_col;
1608
+ feature_node *x_space = NULL;
1609
+ transpose(prob, &x_space ,&prob_col);
1610
+ solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1611
+ delete [] prob_col.y;
1612
+ delete [] prob_col.x;
1613
+ delete [] x_space;
1614
+ break;
1615
+ }
1616
+ case L1R_LR:
1617
+ {
1618
+ problem prob_col;
1619
+ feature_node *x_space = NULL;
1620
+ transpose(prob, &x_space ,&prob_col);
1621
+ solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1622
+ delete [] prob_col.y;
1623
+ delete [] prob_col.x;
1624
+ delete [] x_space;
1625
+ break;
1626
+ }
1627
+ default:
1628
+ fprintf(stderr, "Error: unknown solver_type\n");
1629
+ break;
1630
+ }
1631
+ }
1632
+
1633
+ //
1634
+ // Interface functions
1635
+ //
1636
+ model* train(const problem *prob, const parameter *param)
1637
+ {
1638
+ int i,j;
1639
+ int l = prob->l;
1640
+ int n = prob->n;
1641
+ int w_size = prob->n;
1642
+ model *model_ = Malloc(model,1);
1643
+
1644
+ if(prob->bias>=0)
1645
+ model_->nr_feature=n-1;
1646
+ else
1647
+ model_->nr_feature=n;
1648
+ model_->param = *param;
1649
+ model_->bias = prob->bias;
1650
+
1651
+ int nr_class;
1652
+ int *label = NULL;
1653
+ int *start = NULL;
1654
+ int *count = NULL;
1655
+ int *perm = Malloc(int,l);
1656
+
1657
+ // group training data of the same class
1658
+ group_classes(prob,&nr_class,&label,&start,&count,perm);
1659
+
1660
+ model_->nr_class=nr_class;
1661
+ model_->label = Malloc(int,nr_class);
1662
+ for(i=0;i<nr_class;i++)
1663
+ model_->label[i] = label[i];
1664
+
1665
+ // calculate weighted C
1666
+ double *weighted_C = Malloc(double, nr_class);
1667
+ for(i=0;i<nr_class;i++)
1668
+ weighted_C[i] = param->C;
1669
+ for(i=0;i<param->nr_weight;i++)
1670
+ {
1671
+ for(j=0;j<nr_class;j++)
1672
+ if(param->weight_label[i] == label[j])
1673
+ break;
1674
+ if(j == nr_class)
1675
+ fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
1676
+ else
1677
+ weighted_C[j] *= param->weight[i];
1678
+ }
1679
+
1680
+ // constructing the subproblem
1681
+ feature_node **x = Malloc(feature_node *,l);
1682
+ for(i=0;i<l;i++)
1683
+ x[i] = prob->x[perm[i]];
1684
+
1685
+ int k;
1686
+ problem sub_prob;
1687
+ sub_prob.l = l;
1688
+ sub_prob.n = n;
1689
+ sub_prob.x = Malloc(feature_node *,sub_prob.l);
1690
+ sub_prob.y = Malloc(int,sub_prob.l);
1691
+
1692
+ for(k=0; k<sub_prob.l; k++)
1693
+ sub_prob.x[k] = x[k];
1694
+
1695
+ // multi-class svm by Crammer and Singer
1696
+ if(param->solver_type == MCSVM_CS)
1697
+ {
1698
+ model_->w=Malloc(double, n*nr_class);
1699
+ for(i=0;i<nr_class;i++)
1700
+ for(j=start[i];j<start[i]+count[i];j++)
1701
+ sub_prob.y[j] = i;
1702
+ Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
1703
+ Solver.Solve(model_->w);
1704
+ }
1705
+ else
1706
+ {
1707
+ if(nr_class == 2)
1708
+ {
1709
+ model_->w=Malloc(double, w_size);
1710
+
1711
+ int e0 = start[0]+count[0];
1712
+ k=0;
1713
+ for(; k<e0; k++)
1714
+ sub_prob.y[k] = +1;
1715
+ for(; k<sub_prob.l; k++)
1716
+ sub_prob.y[k] = -1;
1717
+
1718
+ train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
1719
+ }
1720
+ else
1721
+ {
1722
+ model_->w=Malloc(double, w_size*nr_class);
1723
+ double *w=Malloc(double, w_size);
1724
+ for(i=0;i<nr_class;i++)
1725
+ {
1726
+ int si = start[i];
1727
+ int ei = si+count[i];
1728
+
1729
+ k=0;
1730
+ for(; k<si; k++)
1731
+ sub_prob.y[k] = -1;
1732
+ for(; k<ei; k++)
1733
+ sub_prob.y[k] = +1;
1734
+ for(; k<sub_prob.l; k++)
1735
+ sub_prob.y[k] = -1;
1736
+
1737
+ train_one(&sub_prob, param, w, weighted_C[i], param->C);
1738
+
1739
+ for(int j=0;j<w_size;j++)
1740
+ model_->w[j*nr_class+i] = w[j];
1741
+ }
1742
+ free(w);
1743
+ }
1744
+
1745
+ }
1746
+
1747
+ free(x);
1748
+ free(label);
1749
+ free(start);
1750
+ free(count);
1751
+ free(perm);
1752
+ free(sub_prob.x);
1753
+ free(sub_prob.y);
1754
+ free(weighted_C);
1755
+ return model_;
1756
+ }
1757
+
1758
+ void destroy_model(struct model *model_)
1759
+ {
1760
+ if(model_->w != NULL)
1761
+ free(model_->w);
1762
+ if(model_->label != NULL)
1763
+ free(model_->label);
1764
+ free(model_);
1765
+ }
1766
+
1767
+ static const char *solver_type_table[]=
1768
+ {
1769
+ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC","L2R_L1LOSS_SVC_DUAL","MCSVM_CS", "L1R_L2LOSS_SVC","L1R_LR", NULL
1770
+ };
1771
+
1772
+ int save_model(const char *model_file_name, const struct model *model_)
1773
+ {
1774
+ int i;
1775
+ int nr_feature=model_->nr_feature;
1776
+ int n;
1777
+ const parameter& param = model_->param;
1778
+
1779
+ if(model_->bias>=0)
1780
+ n=nr_feature+1;
1781
+ else
1782
+ n=nr_feature;
1783
+ int w_size = n;
1784
+ FILE *fp = fopen(model_file_name,"w");
1785
+ if(fp==NULL) return -1;
1786
+
1787
+ int nr_w;
1788
+ if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
1789
+ nr_w=1;
1790
+ else
1791
+ nr_w=model_->nr_class;
1792
+
1793
+ fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
1794
+ fprintf(fp, "nr_class %d\n", model_->nr_class);
1795
+ fprintf(fp, "label");
1796
+ for(i=0; i<model_->nr_class; i++)
1797
+ fprintf(fp, " %d", model_->label[i]);
1798
+ fprintf(fp, "\n");
1799
+
1800
+ fprintf(fp, "nr_feature %d\n", nr_feature);
1801
+
1802
+ fprintf(fp, "bias %.16g\n", model_->bias);
1803
+
1804
+ fprintf(fp, "w\n");
1805
+ for(i=0; i<w_size; i++)
1806
+ {
1807
+ int j;
1808
+ for(j=0; j<nr_w; j++)
1809
+ fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
1810
+ fprintf(fp, "\n");
1811
+ }
1812
+
1813
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
1814
+ else return 0;
1815
+ }
1816
+
1817
+ struct model *load_model(const char *model_file_name)
1818
+ {
1819
+ FILE *fp = fopen(model_file_name,"r");
1820
+ if(fp==NULL) return NULL;
1821
+
1822
+ int i;
1823
+ int nr_feature;
1824
+ int n;
1825
+ int nr_class;
1826
+ double bias;
1827
+ model *model_ = Malloc(model,1);
1828
+ parameter& param = model_->param;
1829
+
1830
+ model_->label = NULL;
1831
+
1832
+ char cmd[81];
1833
+ while(1)
1834
+ {
1835
+ fscanf(fp,"%80s",cmd);
1836
+ if(strcmp(cmd,"solver_type")==0)
1837
+ {
1838
+ fscanf(fp,"%80s",cmd);
1839
+ int i;
1840
+ for(i=0;solver_type_table[i];i++)
1841
+ {
1842
+ if(strcmp(solver_type_table[i],cmd)==0)
1843
+ {
1844
+ param.solver_type=i;
1845
+ break;
1846
+ }
1847
+ }
1848
+ if(solver_type_table[i] == NULL)
1849
+ {
1850
+ fprintf(stderr,"unknown solver type.\n");
1851
+ free(model_->label);
1852
+ free(model_);
1853
+ return NULL;
1854
+ }
1855
+ }
1856
+ else if(strcmp(cmd,"nr_class")==0)
1857
+ {
1858
+ fscanf(fp,"%d",&nr_class);
1859
+ model_->nr_class=nr_class;
1860
+ }
1861
+ else if(strcmp(cmd,"nr_feature")==0)
1862
+ {
1863
+ fscanf(fp,"%d",&nr_feature);
1864
+ model_->nr_feature=nr_feature;
1865
+ }
1866
+ else if(strcmp(cmd,"bias")==0)
1867
+ {
1868
+ fscanf(fp,"%lf",&bias);
1869
+ model_->bias=bias;
1870
+ }
1871
+ else if(strcmp(cmd,"w")==0)
1872
+ {
1873
+ break;
1874
+ }
1875
+ else if(strcmp(cmd,"label")==0)
1876
+ {
1877
+ int nr_class = model_->nr_class;
1878
+ model_->label = Malloc(int,nr_class);
1879
+ for(int i=0;i<nr_class;i++)
1880
+ fscanf(fp,"%d",&model_->label[i]);
1881
+ }
1882
+ else
1883
+ {
1884
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
1885
+ free(model_);
1886
+ return NULL;
1887
+ }
1888
+ }
1889
+
1890
+ nr_feature=model_->nr_feature;
1891
+ if(model_->bias>=0)
1892
+ n=nr_feature+1;
1893
+ else
1894
+ n=nr_feature;
1895
+ int w_size = n;
1896
+ int nr_w;
1897
+ if(nr_class==2 && param.solver_type != MCSVM_CS)
1898
+ nr_w = 1;
1899
+ else
1900
+ nr_w = nr_class;
1901
+
1902
+ model_->w=Malloc(double, w_size*nr_w);
1903
+ for(i=0; i<w_size; i++)
1904
+ {
1905
+ int j;
1906
+ for(j=0; j<nr_w; j++)
1907
+ fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
1908
+ fscanf(fp, "\n");
1909
+ }
1910
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
1911
+
1912
+ return model_;
1913
+ }
1914
+
1915
+ int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
1916
+ {
1917
+ int idx;
1918
+ int n;
1919
+ if(model_->bias>=0)
1920
+ n=model_->nr_feature+1;
1921
+ else
1922
+ n=model_->nr_feature;
1923
+ double *w=model_->w;
1924
+ int nr_class=model_->nr_class;
1925
+ int i;
1926
+ int nr_w;
1927
+ if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
1928
+ nr_w = 1;
1929
+ else
1930
+ nr_w = nr_class;
1931
+
1932
+ const feature_node *lx=x;
1933
+ for(i=0;i<nr_w;i++)
1934
+ dec_values[i] = 0;
1935
+ for(; (idx=lx->index)!=-1; lx++)
1936
+ {
1937
+ // the dimension of testing data may exceed that of training
1938
+ if(idx<=n)
1939
+ for(i=0;i<nr_w;i++)
1940
+ dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
1941
+ }
1942
+
1943
+ if(nr_class==2)
1944
+ return (dec_values[0]>0)?model_->label[0]:model_->label[1];
1945
+ else
1946
+ {
1947
+ int dec_max_idx = 0;
1948
+ for(i=1;i<nr_class;i++)
1949
+ {
1950
+ if(dec_values[i] > dec_values[dec_max_idx])
1951
+ dec_max_idx = i;
1952
+ }
1953
+ return model_->label[dec_max_idx];
1954
+ }
1955
+ }
1956
+
1957
+ int predict(const model *model_, const feature_node *x)
1958
+ {
1959
+ double *dec_values = Malloc(double, model_->nr_class);
1960
+ int label=predict_values(model_, x, dec_values);
1961
+ free(dec_values);
1962
+ return label;
1963
+ }
1964
+
1965
+ int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
1966
+ {
1967
+ if(model_->param.solver_type==L2R_LR)
1968
+ {
1969
+ int i;
1970
+ int nr_class=model_->nr_class;
1971
+ int nr_w;
1972
+ if(nr_class==2)
1973
+ nr_w = 1;
1974
+ else
1975
+ nr_w = nr_class;
1976
+
1977
+ int label=predict_values(model_, x, prob_estimates);
1978
+ for(i=0;i<nr_w;i++)
1979
+ prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
1980
+
1981
+ if(nr_class==2) // for binary classification
1982
+ prob_estimates[1]=1.-prob_estimates[0];
1983
+ else
1984
+ {
1985
+ double sum=0;
1986
+ for(i=0; i<nr_class; i++)
1987
+ sum+=prob_estimates[i];
1988
+
1989
+ for(i=0; i<nr_class; i++)
1990
+ prob_estimates[i]=prob_estimates[i]/sum;
1991
+ }
1992
+
1993
+ return label;
1994
+ }
1995
+ else
1996
+ return 0;
1997
+ }
1998
+
1999
+ void destroy_param(parameter* param)
2000
+ {
2001
+ if(param->weight_label != NULL)
2002
+ free(param->weight_label);
2003
+ if(param->weight != NULL)
2004
+ free(param->weight);
2005
+ }
2006
+
2007
+ const char *check_parameter(const problem *prob, const parameter *param)
2008
+ {
2009
+ if(param->eps <= 0)
2010
+ return "eps <= 0";
2011
+
2012
+ if(param->C <= 0)
2013
+ return "C <= 0";
2014
+
2015
+ if(param->solver_type != L2R_LR
2016
+ && param->solver_type != L2R_L2LOSS_SVC_DUAL
2017
+ && param->solver_type != L2R_L2LOSS_SVC
2018
+ && param->solver_type != L2R_L1LOSS_SVC_DUAL
2019
+ && param->solver_type != MCSVM_CS
2020
+ && param->solver_type != L1R_L2LOSS_SVC
2021
+ && param->solver_type != L1R_LR)
2022
+ return "unknown solver type";
2023
+
2024
+ return NULL;
2025
+ }
2026
+
2027
+ void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
2028
+ {
2029
+ int i;
2030
+ int *fold_start = Malloc(int,nr_fold+1);
2031
+ int l = prob->l;
2032
+ int *perm = Malloc(int,l);
2033
+
2034
+ for(i=0;i<l;i++) perm[i]=i;
2035
+ for(i=0;i<l;i++)
2036
+ {
2037
+ int j = i+rand()%(l-i);
2038
+ swap(perm[i],perm[j]);
2039
+ }
2040
+ for(i=0;i<=nr_fold;i++)
2041
+ fold_start[i]=i*l/nr_fold;
2042
+
2043
+ for(i=0;i<nr_fold;i++)
2044
+ {
2045
+ int begin = fold_start[i];
2046
+ int end = fold_start[i+1];
2047
+ int j,k;
2048
+ struct problem subprob;
2049
+
2050
+ subprob.bias = prob->bias;
2051
+ subprob.n = prob->n;
2052
+ subprob.l = l-(end-begin);
2053
+ subprob.x = Malloc(struct feature_node*,subprob.l);
2054
+ subprob.y = Malloc(int,subprob.l);
2055
+
2056
+ k=0;
2057
+ for(j=0;j<begin;j++)
2058
+ {
2059
+ subprob.x[k] = prob->x[perm[j]];
2060
+ subprob.y[k] = prob->y[perm[j]];
2061
+ ++k;
2062
+ }
2063
+ for(j=end;j<l;j++)
2064
+ {
2065
+ subprob.x[k] = prob->x[perm[j]];
2066
+ subprob.y[k] = prob->y[perm[j]];
2067
+ ++k;
2068
+ }
2069
+ struct model *submodel = train(&subprob,param);
2070
+ for(j=begin;j<end;j++)
2071
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2072
+ destroy_model(submodel);
2073
+ free(subprob.x);
2074
+ free(subprob.y);
2075
+ }
2076
+ free(fold_start);
2077
+ free(perm);
2078
+ }
2079
+
2080
+ int get_nr_feature(const model *model_)
2081
+ {
2082
+ return model_->nr_feature;
2083
+ }
2084
+
2085
+ int get_nr_class(const model *model_)
2086
+ {
2087
+ return model_->nr_class;
2088
+ }
2089
+
2090
+ void get_labels(const model *model_, int* label)
2091
+ {
2092
+ if (model_->label != NULL)
2093
+ for(int i=0;i<model_->nr_class;i++)
2094
+ label[i] = model_->label[i];
2095
+ }
2096
+