liblinear-ruby-swig 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/linear.cpp ADDED
@@ -0,0 +1,2096 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <stdarg.h>
6
+ #include "linear.h"
7
+ #include "tron.h"
8
+ typedef signed char schar;
9
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
10
+ #ifndef min
11
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
12
+ #endif
13
+ #ifndef max
14
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
15
+ #endif
16
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
17
+ {
18
+ dst = new T[n];
19
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
20
+ }
21
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
22
+ #define INF HUGE_VAL
23
+
24
+ static void print_string_stdout(const char *s)
25
+ {
26
+ fputs(s,stdout);
27
+ fflush(stdout);
28
+ }
29
+
30
+ void (*liblinear_print_string) (const char *) = &print_string_stdout;
31
+
32
+ #if 1
33
+ int info_on = 0;
34
+ static void info(const char *fmt,...)
35
+ {
36
+ char buf[BUFSIZ];
37
+ va_list ap;
38
+ if (info_on==1) {
39
+ va_start(ap,fmt);
40
+ vsprintf(buf,fmt,ap);
41
+ va_end(ap);
42
+ (*liblinear_print_string)(buf);
43
+ }
44
+ }
45
+ #else
46
+ static void info(const char *fmt,...) {}
47
+ #endif
48
+
49
+ class l2r_lr_fun : public function
50
+ {
51
+ public:
52
+ l2r_lr_fun(const problem *prob, double Cp, double Cn);
53
+ ~l2r_lr_fun();
54
+
55
+ double fun(double *w);
56
+ void grad(double *w, double *g);
57
+ void Hv(double *s, double *Hs);
58
+
59
+ int get_nr_variable(void);
60
+
61
+ private:
62
+ void Xv(double *v, double *Xv);
63
+ void XTv(double *v, double *XTv);
64
+
65
+ double *C;
66
+ double *z;
67
+ double *D;
68
+ const problem *prob;
69
+ };
70
+
71
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn)
72
+ {
73
+ int i;
74
+ int l=prob->l;
75
+ int *y=prob->y;
76
+
77
+ this->prob = prob;
78
+
79
+ z = new double[l];
80
+ D = new double[l];
81
+ C = new double[l];
82
+
83
+ for (i=0; i<l; i++)
84
+ {
85
+ if (y[i] == 1)
86
+ C[i] = Cp;
87
+ else
88
+ C[i] = Cn;
89
+ }
90
+ }
91
+
92
+ l2r_lr_fun::~l2r_lr_fun()
93
+ {
94
+ delete[] z;
95
+ delete[] D;
96
+ delete[] C;
97
+ }
98
+
99
+
100
+ double l2r_lr_fun::fun(double *w)
101
+ {
102
+ int i;
103
+ double f=0;
104
+ int *y=prob->y;
105
+ int l=prob->l;
106
+ int w_size=get_nr_variable();
107
+
108
+ Xv(w, z);
109
+ for(i=0;i<l;i++)
110
+ {
111
+ double yz = y[i]*z[i];
112
+ if (yz >= 0)
113
+ f += C[i]*log(1 + exp(-yz));
114
+ else
115
+ f += C[i]*(-yz+log(1 + exp(yz)));
116
+ }
117
+ f = 2*f;
118
+ for(i=0;i<w_size;i++)
119
+ f += w[i]*w[i];
120
+ f /= 2.0;
121
+
122
+ return(f);
123
+ }
124
+
125
+ void l2r_lr_fun::grad(double *w, double *g)
126
+ {
127
+ int i;
128
+ int *y=prob->y;
129
+ int l=prob->l;
130
+ int w_size=get_nr_variable();
131
+
132
+ for(i=0;i<l;i++)
133
+ {
134
+ z[i] = 1/(1 + exp(-y[i]*z[i]));
135
+ D[i] = z[i]*(1-z[i]);
136
+ z[i] = C[i]*(z[i]-1)*y[i];
137
+ }
138
+ XTv(z, g);
139
+
140
+ for(i=0;i<w_size;i++)
141
+ g[i] = w[i] + g[i];
142
+ }
143
+
144
+ int l2r_lr_fun::get_nr_variable(void)
145
+ {
146
+ return prob->n;
147
+ }
148
+
149
+ void l2r_lr_fun::Hv(double *s, double *Hs)
150
+ {
151
+ int i;
152
+ int l=prob->l;
153
+ int w_size=get_nr_variable();
154
+ double *wa = new double[l];
155
+
156
+ Xv(s, wa);
157
+ for(i=0;i<l;i++)
158
+ wa[i] = C[i]*D[i]*wa[i];
159
+
160
+ XTv(wa, Hs);
161
+ for(i=0;i<w_size;i++)
162
+ Hs[i] = s[i] + Hs[i];
163
+ delete[] wa;
164
+ }
165
+
166
+ void l2r_lr_fun::Xv(double *v, double *Xv)
167
+ {
168
+ int i;
169
+ int l=prob->l;
170
+ feature_node **x=prob->x;
171
+
172
+ for(i=0;i<l;i++)
173
+ {
174
+ feature_node *s=x[i];
175
+ Xv[i]=0;
176
+ while(s->index!=-1)
177
+ {
178
+ Xv[i]+=v[s->index-1]*s->value;
179
+ s++;
180
+ }
181
+ }
182
+ }
183
+
184
+ void l2r_lr_fun::XTv(double *v, double *XTv)
185
+ {
186
+ int i;
187
+ int l=prob->l;
188
+ int w_size=get_nr_variable();
189
+ feature_node **x=prob->x;
190
+
191
+ for(i=0;i<w_size;i++)
192
+ XTv[i]=0;
193
+ for(i=0;i<l;i++)
194
+ {
195
+ feature_node *s=x[i];
196
+ while(s->index!=-1)
197
+ {
198
+ XTv[s->index-1]+=v[i]*s->value;
199
+ s++;
200
+ }
201
+ }
202
+ }
203
+
204
+ class l2r_l2_svc_fun : public function
205
+ {
206
+ public:
207
+ l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);
208
+ ~l2r_l2_svc_fun();
209
+
210
+ double fun(double *w);
211
+ void grad(double *w, double *g);
212
+ void Hv(double *s, double *Hs);
213
+
214
+ int get_nr_variable(void);
215
+
216
+ private:
217
+ void Xv(double *v, double *Xv);
218
+ void subXv(double *v, double *Xv);
219
+ void subXTv(double *v, double *XTv);
220
+
221
+ double *C;
222
+ double *z;
223
+ double *D;
224
+ int *I;
225
+ int sizeI;
226
+ const problem *prob;
227
+ };
228
+
229
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn)
230
+ {
231
+ int i;
232
+ int l=prob->l;
233
+ int *y=prob->y;
234
+
235
+ this->prob = prob;
236
+
237
+ z = new double[l];
238
+ D = new double[l];
239
+ C = new double[l];
240
+ I = new int[l];
241
+
242
+ for (i=0; i<l; i++)
243
+ {
244
+ if (y[i] == 1)
245
+ C[i] = Cp;
246
+ else
247
+ C[i] = Cn;
248
+ }
249
+ }
250
+
251
+ l2r_l2_svc_fun::~l2r_l2_svc_fun()
252
+ {
253
+ delete[] z;
254
+ delete[] D;
255
+ delete[] C;
256
+ delete[] I;
257
+ }
258
+
259
+ double l2r_l2_svc_fun::fun(double *w)
260
+ {
261
+ int i;
262
+ double f=0;
263
+ int *y=prob->y;
264
+ int l=prob->l;
265
+ int w_size=get_nr_variable();
266
+
267
+ Xv(w, z);
268
+ for(i=0;i<l;i++)
269
+ {
270
+ z[i] = y[i]*z[i];
271
+ double d = 1-z[i];
272
+ if (d > 0)
273
+ f += C[i]*d*d;
274
+ }
275
+ f = 2*f;
276
+ for(i=0;i<w_size;i++)
277
+ f += w[i]*w[i];
278
+ f /= 2.0;
279
+
280
+ return(f);
281
+ }
282
+
283
+ void l2r_l2_svc_fun::grad(double *w, double *g)
284
+ {
285
+ int i;
286
+ int *y=prob->y;
287
+ int l=prob->l;
288
+ int w_size=get_nr_variable();
289
+
290
+ sizeI = 0;
291
+ for (i=0;i<l;i++)
292
+ if (z[i] < 1)
293
+ {
294
+ z[sizeI] = C[i]*y[i]*(z[i]-1);
295
+ I[sizeI] = i;
296
+ sizeI++;
297
+ }
298
+ subXTv(z, g);
299
+
300
+ for(i=0;i<w_size;i++)
301
+ g[i] = w[i] + 2*g[i];
302
+ }
303
+
304
+ int l2r_l2_svc_fun::get_nr_variable(void)
305
+ {
306
+ return prob->n;
307
+ }
308
+
309
+ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
310
+ {
311
+ int i;
312
+ int l=prob->l;
313
+ int w_size=get_nr_variable();
314
+ double *wa = new double[l];
315
+
316
+ subXv(s, wa);
317
+ for(i=0;i<sizeI;i++)
318
+ wa[i] = C[I[i]]*wa[i];
319
+
320
+ subXTv(wa, Hs);
321
+ for(i=0;i<w_size;i++)
322
+ Hs[i] = s[i] + 2*Hs[i];
323
+ delete[] wa;
324
+ }
325
+
326
+ void l2r_l2_svc_fun::Xv(double *v, double *Xv)
327
+ {
328
+ int i;
329
+ int l=prob->l;
330
+ feature_node **x=prob->x;
331
+
332
+ for(i=0;i<l;i++)
333
+ {
334
+ feature_node *s=x[i];
335
+ Xv[i]=0;
336
+ while(s->index!=-1)
337
+ {
338
+ Xv[i]+=v[s->index-1]*s->value;
339
+ s++;
340
+ }
341
+ }
342
+ }
343
+
344
+ void l2r_l2_svc_fun::subXv(double *v, double *Xv)
345
+ {
346
+ int i;
347
+ feature_node **x=prob->x;
348
+
349
+ for(i=0;i<sizeI;i++)
350
+ {
351
+ feature_node *s=x[I[i]];
352
+ Xv[i]=0;
353
+ while(s->index!=-1)
354
+ {
355
+ Xv[i]+=v[s->index-1]*s->value;
356
+ s++;
357
+ }
358
+ }
359
+ }
360
+
361
+ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
362
+ {
363
+ int i;
364
+ int w_size=get_nr_variable();
365
+ feature_node **x=prob->x;
366
+
367
+ for(i=0;i<w_size;i++)
368
+ XTv[i]=0;
369
+ for(i=0;i<sizeI;i++)
370
+ {
371
+ feature_node *s=x[I[i]];
372
+ while(s->index!=-1)
373
+ {
374
+ XTv[s->index-1]+=v[i]*s->value;
375
+ s++;
376
+ }
377
+ }
378
+ }
379
+
380
+ // A coordinate descent algorithm for
381
+ // multi-class support vector machines by Crammer and Singer
382
+ //
383
+ // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
384
+ // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
385
+ //
386
+ // where e^m_i = 0 if y_i = m,
387
+ // e^m_i = 1 if y_i != m,
388
+ // C^m_i = C if m = y_i,
389
+ // C^m_i = 0 if m != y_i,
390
+ // and w_m(\alpha) = \sum_i \alpha^m_i x_i
391
+ //
392
+ // Given:
393
+ // x, y, C
394
+ // eps is the stopping tolerance
395
+ //
396
+ // solution will be put in w
397
+
398
+ #define GETI(i) (prob->y[i])
399
+ // To support weights for instances, use GETI(i) (i)
400
+
401
+ class Solver_MCSVM_CS
402
+ {
403
+ public:
404
+ Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
405
+ ~Solver_MCSVM_CS();
406
+ void Solve(double *w);
407
+ private:
408
+ void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
409
+ bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
410
+ double *B, *C, *G;
411
+ int w_size, l;
412
+ int nr_class;
413
+ int max_iter;
414
+ double eps;
415
+ const problem *prob;
416
+ };
417
+
418
+ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
419
+ {
420
+ this->w_size = prob->n;
421
+ this->l = prob->l;
422
+ this->nr_class = nr_class;
423
+ this->eps = eps;
424
+ this->max_iter = max_iter;
425
+ this->prob = prob;
426
+ this->C = weighted_C;
427
+ this->B = new double[nr_class];
428
+ this->G = new double[nr_class];
429
+ }
430
+
431
+ Solver_MCSVM_CS::~Solver_MCSVM_CS()
432
+ {
433
+ delete[] B;
434
+ delete[] G;
435
+ }
436
+
437
+ int compare_double(const void *a, const void *b)
438
+ {
439
+ if(*(double *)a > *(double *)b)
440
+ return -1;
441
+ if(*(double *)a < *(double *)b)
442
+ return 1;
443
+ return 0;
444
+ }
445
+
446
+ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
447
+ {
448
+ int r;
449
+ double *D;
450
+
451
+ clone(D, B, active_i);
452
+ if(yi < active_i)
453
+ D[yi] += A_i*C_yi;
454
+ qsort(D, active_i, sizeof(double), compare_double);
455
+
456
+ double beta = D[0] - A_i*C_yi;
457
+ for(r=1;r<active_i && beta<r*D[r];r++)
458
+ beta += D[r];
459
+
460
+ beta /= r;
461
+ for(r=0;r<active_i;r++)
462
+ {
463
+ if(r == yi)
464
+ alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
465
+ else
466
+ alpha_new[r] = min((double)0, (beta - B[r])/A_i);
467
+ }
468
+ delete[] D;
469
+ }
470
+
471
+ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
472
+ {
473
+ double bound = 0;
474
+ if(m == yi)
475
+ bound = C[GETI(i)];
476
+ if(alpha_i == bound && G[m] < minG)
477
+ return true;
478
+ return false;
479
+ }
480
+
481
+ void Solver_MCSVM_CS::Solve(double *w)
482
+ {
483
+ int i, m, s;
484
+ int iter = 0;
485
+ double *alpha = new double[l*nr_class];
486
+ double *alpha_new = new double[nr_class];
487
+ int *index = new int[l];
488
+ double *QD = new double[l];
489
+ int *d_ind = new int[nr_class];
490
+ double *d_val = new double[nr_class];
491
+ int *alpha_index = new int[nr_class*l];
492
+ int *y_index = new int[l];
493
+ int active_size = l;
494
+ int *active_size_i = new int[l];
495
+ double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
496
+ bool start_from_all = true;
497
+ // initial
498
+ for(i=0;i<l*nr_class;i++)
499
+ alpha[i] = 0;
500
+ for(i=0;i<w_size*nr_class;i++)
501
+ w[i] = 0;
502
+ for(i=0;i<l;i++)
503
+ {
504
+ for(m=0;m<nr_class;m++)
505
+ alpha_index[i*nr_class+m] = m;
506
+ feature_node *xi = prob->x[i];
507
+ QD[i] = 0;
508
+ while(xi->index != -1)
509
+ {
510
+ QD[i] += (xi->value)*(xi->value);
511
+ xi++;
512
+ }
513
+ active_size_i[i] = nr_class;
514
+ y_index[i] = prob->y[i];
515
+ index[i] = i;
516
+ }
517
+
518
+ while(iter < max_iter)
519
+ {
520
+ double stopping = -INF;
521
+ for(i=0;i<active_size;i++)
522
+ {
523
+ int j = i+rand()%(active_size-i);
524
+ swap(index[i], index[j]);
525
+ }
526
+ for(s=0;s<active_size;s++)
527
+ {
528
+ i = index[s];
529
+ double Ai = QD[i];
530
+ double *alpha_i = &alpha[i*nr_class];
531
+ int *alpha_index_i = &alpha_index[i*nr_class];
532
+
533
+ if(Ai > 0)
534
+ {
535
+ for(m=0;m<active_size_i[i];m++)
536
+ G[m] = 1;
537
+ if(y_index[i] < active_size_i[i])
538
+ G[y_index[i]] = 0;
539
+
540
+ feature_node *xi = prob->x[i];
541
+ while(xi->index!= -1)
542
+ {
543
+ double *w_i = &w[(xi->index-1)*nr_class];
544
+ for(m=0;m<active_size_i[i];m++)
545
+ G[m] += w_i[alpha_index_i[m]]*(xi->value);
546
+ xi++;
547
+ }
548
+
549
+ double minG = INF;
550
+ double maxG = -INF;
551
+ for(m=0;m<active_size_i[i];m++)
552
+ {
553
+ if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
554
+ minG = G[m];
555
+ if(G[m] > maxG)
556
+ maxG = G[m];
557
+ }
558
+ if(y_index[i] < active_size_i[i])
559
+ if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
560
+ minG = G[y_index[i]];
561
+
562
+ for(m=0;m<active_size_i[i];m++)
563
+ {
564
+ if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
565
+ {
566
+ active_size_i[i]--;
567
+ while(active_size_i[i]>m)
568
+ {
569
+ if(!be_shrunk(i, active_size_i[i], y_index[i],
570
+ alpha_i[alpha_index_i[active_size_i[i]]], minG))
571
+ {
572
+ swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
573
+ swap(G[m], G[active_size_i[i]]);
574
+ if(y_index[i] == active_size_i[i])
575
+ y_index[i] = m;
576
+ else if(y_index[i] == m)
577
+ y_index[i] = active_size_i[i];
578
+ break;
579
+ }
580
+ active_size_i[i]--;
581
+ }
582
+ }
583
+ }
584
+
585
+ if(active_size_i[i] <= 1)
586
+ {
587
+ active_size--;
588
+ swap(index[s], index[active_size]);
589
+ s--;
590
+ continue;
591
+ }
592
+
593
+ if(maxG-minG <= 1e-12)
594
+ continue;
595
+ else
596
+ stopping = max(maxG - minG, stopping);
597
+
598
+ for(m=0;m<active_size_i[i];m++)
599
+ B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
600
+
601
+ solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
602
+ int nz_d = 0;
603
+ for(m=0;m<active_size_i[i];m++)
604
+ {
605
+ double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
606
+ alpha_i[alpha_index_i[m]] = alpha_new[m];
607
+ if(fabs(d) >= 1e-12)
608
+ {
609
+ d_ind[nz_d] = alpha_index_i[m];
610
+ d_val[nz_d] = d;
611
+ nz_d++;
612
+ }
613
+ }
614
+
615
+ xi = prob->x[i];
616
+ while(xi->index != -1)
617
+ {
618
+ double *w_i = &w[(xi->index-1)*nr_class];
619
+ for(m=0;m<nz_d;m++)
620
+ w_i[d_ind[m]] += d_val[m]*xi->value;
621
+ xi++;
622
+ }
623
+ }
624
+ }
625
+
626
+ iter++;
627
+ if(iter % 10 == 0)
628
+ {
629
+ info(".");
630
+ }
631
+
632
+ if(stopping < eps_shrink)
633
+ {
634
+ if(stopping < eps && start_from_all == true)
635
+ break;
636
+ else
637
+ {
638
+ active_size = l;
639
+ for(i=0;i<l;i++)
640
+ active_size_i[i] = nr_class;
641
+ info("*");
642
+ eps_shrink = max(eps_shrink/2, eps);
643
+ start_from_all = true;
644
+ }
645
+ }
646
+ else
647
+ start_from_all = false;
648
+ }
649
+
650
+ info("\noptimization finished, #iter = %d\n",iter);
651
+ if (iter >= max_iter)
652
+ info("Warning: reaching max number of iterations\n");
653
+
654
+ // calculate objective value
655
+ double v = 0;
656
+ int nSV = 0;
657
+ for(i=0;i<w_size*nr_class;i++)
658
+ v += w[i]*w[i];
659
+ v = 0.5*v;
660
+ for(i=0;i<l*nr_class;i++)
661
+ {
662
+ v += alpha[i];
663
+ if(fabs(alpha[i]) > 0)
664
+ nSV++;
665
+ }
666
+ for(i=0;i<l;i++)
667
+ v -= alpha[i*nr_class+prob->y[i]];
668
+ info("Objective value = %lf\n",v);
669
+ info("nSV = %d\n",nSV);
670
+
671
+ delete [] alpha;
672
+ delete [] alpha_new;
673
+ delete [] index;
674
+ delete [] QD;
675
+ delete [] d_ind;
676
+ delete [] d_val;
677
+ delete [] alpha_index;
678
+ delete [] y_index;
679
+ delete [] active_size_i;
680
+ }
681
+
682
+ // A coordinate descent algorithm for
683
+ // L1-loss and L2-loss SVM dual problems
684
+ //
685
+ // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
686
+ // s.t. 0 <= alpha_i <= upper_bound_i,
687
+ //
688
+ // where Qij = yi yj xi^T xj and
689
+ // D is a diagonal matrix
690
+ //
691
+ // In L1-SVM case:
692
+ // upper_bound_i = Cp if y_i = 1
693
+ // upper_bound_i = Cn if y_i = -1
694
+ // D_ii = 0
695
+ // In L2-SVM case:
696
+ // upper_bound_i = INF
697
+ // D_ii = 1/(2*Cp) if y_i = 1
698
+ // D_ii = 1/(2*Cn) if y_i = -1
699
+ //
700
+ // Given:
701
+ // x, y, Cp, Cn
702
+ // eps is the stopping tolerance
703
+ //
704
+ // solution will be put in w
705
+
706
+ #undef GETI
707
+ #define GETI(i) (y[i]+1)
708
+ // To support weights for instances, use GETI(i) (i)
709
+
710
+ static void solve_l2r_l1l2_svc(
711
+ const problem *prob, double *w, double eps,
712
+ double Cp, double Cn, int solver_type)
713
+ {
714
+ int l = prob->l;
715
+ int w_size = prob->n;
716
+ int i, s, iter = 0;
717
+ double C, d, G;
718
+ double *QD = new double[l];
719
+ int max_iter = 1000;
720
+ int *index = new int[l];
721
+ double *alpha = new double[l];
722
+ schar *y = new schar[l];
723
+ int active_size = l;
724
+
725
+ // PG: projected gradient, for shrinking and stopping
726
+ double PG;
727
+ double PGmax_old = INF;
728
+ double PGmin_old = -INF;
729
+ double PGmax_new, PGmin_new;
730
+
731
+ // default solver_type: L2R_L2LOSS_SVC_DUAL
732
+ double diag[3] = {0.5/Cn, 0, 0.5/Cp};
733
+ double upper_bound[3] = {INF, 0, INF};
734
+ if(solver_type == L2R_L1LOSS_SVC_DUAL)
735
+ {
736
+ diag[0] = 0;
737
+ diag[2] = 0;
738
+ upper_bound[0] = Cn;
739
+ upper_bound[2] = Cp;
740
+ }
741
+
742
+ for(i=0; i<w_size; i++)
743
+ w[i] = 0;
744
+ for(i=0; i<l; i++)
745
+ {
746
+ alpha[i] = 0;
747
+ if(prob->y[i] > 0)
748
+ {
749
+ y[i] = +1;
750
+ }
751
+ else
752
+ {
753
+ y[i] = -1;
754
+ }
755
+ QD[i] = diag[GETI(i)];
756
+
757
+ feature_node *xi = prob->x[i];
758
+ while (xi->index != -1)
759
+ {
760
+ QD[i] += (xi->value)*(xi->value);
761
+ xi++;
762
+ }
763
+ index[i] = i;
764
+ }
765
+
766
+ while (iter < max_iter)
767
+ {
768
+ PGmax_new = -INF;
769
+ PGmin_new = INF;
770
+
771
+ for (i=0; i<active_size; i++)
772
+ {
773
+ int j = i+rand()%(active_size-i);
774
+ swap(index[i], index[j]);
775
+ }
776
+
777
+ for (s=0;s<active_size;s++)
778
+ {
779
+ i = index[s];
780
+ G = 0;
781
+ schar yi = y[i];
782
+
783
+ feature_node *xi = prob->x[i];
784
+ while(xi->index!= -1)
785
+ {
786
+ G += w[xi->index-1]*(xi->value);
787
+ xi++;
788
+ }
789
+ G = G*yi-1;
790
+
791
+ C = upper_bound[GETI(i)];
792
+ G += alpha[i]*diag[GETI(i)];
793
+
794
+ PG = 0;
795
+ if (alpha[i] == 0)
796
+ {
797
+ if (G > PGmax_old)
798
+ {
799
+ active_size--;
800
+ swap(index[s], index[active_size]);
801
+ s--;
802
+ continue;
803
+ }
804
+ else if (G < 0)
805
+ PG = G;
806
+ }
807
+ else if (alpha[i] == C)
808
+ {
809
+ if (G < PGmin_old)
810
+ {
811
+ active_size--;
812
+ swap(index[s], index[active_size]);
813
+ s--;
814
+ continue;
815
+ }
816
+ else if (G > 0)
817
+ PG = G;
818
+ }
819
+ else
820
+ PG = G;
821
+
822
+ PGmax_new = max(PGmax_new, PG);
823
+ PGmin_new = min(PGmin_new, PG);
824
+
825
+ if(fabs(PG) > 1.0e-12)
826
+ {
827
+ double alpha_old = alpha[i];
828
+ alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
829
+ d = (alpha[i] - alpha_old)*yi;
830
+ xi = prob->x[i];
831
+ while (xi->index != -1)
832
+ {
833
+ w[xi->index-1] += d*xi->value;
834
+ xi++;
835
+ }
836
+ }
837
+ }
838
+
839
+ iter++;
840
+ if(iter % 10 == 0)
841
+ info(".");
842
+
843
+ if(PGmax_new - PGmin_new <= eps)
844
+ {
845
+ if(active_size == l)
846
+ break;
847
+ else
848
+ {
849
+ active_size = l;
850
+ info("*");
851
+ PGmax_old = INF;
852
+ PGmin_old = -INF;
853
+ continue;
854
+ }
855
+ }
856
+ PGmax_old = PGmax_new;
857
+ PGmin_old = PGmin_new;
858
+ if (PGmax_old <= 0)
859
+ PGmax_old = INF;
860
+ if (PGmin_old >= 0)
861
+ PGmin_old = -INF;
862
+ }
863
+
864
+ info("\noptimization finished, #iter = %d\n",iter);
865
+ if (iter >= max_iter)
866
+ info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
867
+
868
+ // calculate objective value
869
+
870
+ double v = 0;
871
+ int nSV = 0;
872
+ for(i=0; i<w_size; i++)
873
+ v += w[i]*w[i];
874
+ for(i=0; i<l; i++)
875
+ {
876
+ v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
877
+ if(alpha[i] > 0)
878
+ ++nSV;
879
+ }
880
+ info("Objective value = %lf\n",v/2);
881
+ info("nSV = %d\n",nSV);
882
+
883
+ delete [] QD;
884
+ delete [] alpha;
885
+ delete [] y;
886
+ delete [] index;
887
+ }
888
+
889
+ // A coordinate descent algorithm for
890
+ // L1-regularized L2-loss support vector classification
891
+ //
892
+ // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
893
+ //
894
+ // Given:
895
+ // x, y, Cp, Cn
896
+ // eps is the stopping tolerance
897
+ //
898
+ // solution will be put in w
899
+
900
+ #undef GETI
901
+ #define GETI(i) (y[i]+1)
902
+ // To support weights for instances, use GETI(i) (i)
903
+
904
+ static void solve_l1r_l2_svc(
905
+ problem *prob_col, double *w, double eps,
906
+ double Cp, double Cn)
907
+ {
908
+ int l = prob_col->l;
909
+ int w_size = prob_col->n;
910
+ int j, s, iter = 0;
911
+ int max_iter = 1000;
912
+ int active_size = w_size;
913
+ int max_num_linesearch = 20;
914
+
915
+ double sigma = 0.01;
916
+ double d, G_loss, G, H;
917
+ double Gmax_old = INF;
918
+ double Gmax_new;
919
+ double Gmax_init;
920
+ double d_old, d_diff;
921
+ double loss_old, loss_new;
922
+ double appxcond, cond;
923
+
924
+ int *index = new int[w_size];
925
+ schar *y = new schar[l];
926
+ double *b = new double[l]; // b = 1-ywTx
927
+ double *xj_sq = new double[w_size];
928
+ feature_node *x;
929
+
930
+ double C[3] = {Cn,0,Cp};
931
+
932
+ for(j=0; j<l; j++)
933
+ {
934
+ b[j] = 1;
935
+ if(prob_col->y[j] > 0)
936
+ y[j] = 1;
937
+ else
938
+ y[j] = -1;
939
+ }
940
+ for(j=0; j<w_size; j++)
941
+ {
942
+ w[j] = 0;
943
+ index[j] = j;
944
+ xj_sq[j] = 0;
945
+ x = prob_col->x[j];
946
+ while(x->index != -1)
947
+ {
948
+ int ind = x->index-1;
949
+ double val = x->value;
950
+ x->value *= y[ind]; // x->value stores yi*xij
951
+ xj_sq[j] += C[GETI(ind)]*val*val;
952
+ x++;
953
+ }
954
+ }
955
+
956
+ while(iter < max_iter)
957
+ {
958
+ Gmax_new = 0;
959
+
960
+ for(j=0; j<active_size; j++)
961
+ {
962
+ int i = j+rand()%(active_size-j);
963
+ swap(index[i], index[j]);
964
+ }
965
+
966
+ for(s=0; s<active_size; s++)
967
+ {
968
+ j = index[s];
969
+ G_loss = 0;
970
+ H = 0;
971
+
972
+ x = prob_col->x[j];
973
+ while(x->index != -1)
974
+ {
975
+ int ind = x->index-1;
976
+ if(b[ind] > 0)
977
+ {
978
+ double val = x->value;
979
+ double tmp = C[GETI(ind)]*val;
980
+ G_loss -= tmp*b[ind];
981
+ H += tmp*val;
982
+ }
983
+ x++;
984
+ }
985
+ G_loss *= 2;
986
+
987
+ G = G_loss;
988
+ H *= 2;
989
+ H = max(H, 1e-12);
990
+
991
+ double Gp = G+1;
992
+ double Gn = G-1;
993
+ double violation = 0;
994
+ if(w[j] == 0)
995
+ {
996
+ if(Gp < 0)
997
+ violation = -Gp;
998
+ else if(Gn > 0)
999
+ violation = Gn;
1000
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1001
+ {
1002
+ active_size--;
1003
+ swap(index[s], index[active_size]);
1004
+ s--;
1005
+ continue;
1006
+ }
1007
+ }
1008
+ else if(w[j] > 0)
1009
+ violation = fabs(Gp);
1010
+ else
1011
+ violation = fabs(Gn);
1012
+
1013
+ Gmax_new = max(Gmax_new, violation);
1014
+
1015
+ // obtain Newton direction d
1016
+ if(Gp <= H*w[j])
1017
+ d = -Gp/H;
1018
+ else if(Gn >= H*w[j])
1019
+ d = -Gn/H;
1020
+ else
1021
+ d = -w[j];
1022
+
1023
+ if(fabs(d) < 1.0e-12)
1024
+ continue;
1025
+
1026
+ double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1027
+ d_old = 0;
1028
+ int num_linesearch;
1029
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1030
+ {
1031
+ d_diff = d_old - d;
1032
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1033
+
1034
+ appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1035
+ if(appxcond <= 0)
1036
+ {
1037
+ x = prob_col->x[j];
1038
+ while(x->index != -1)
1039
+ {
1040
+ b[x->index-1] += d_diff*x->value;
1041
+ x++;
1042
+ }
1043
+ break;
1044
+ }
1045
+
1046
+ if(num_linesearch == 0)
1047
+ {
1048
+ loss_old = 0;
1049
+ loss_new = 0;
1050
+ x = prob_col->x[j];
1051
+ while(x->index != -1)
1052
+ {
1053
+ int ind = x->index-1;
1054
+ if(b[ind] > 0)
1055
+ loss_old += C[GETI(ind)]*b[ind]*b[ind];
1056
+ double b_new = b[ind] + d_diff*x->value;
1057
+ b[ind] = b_new;
1058
+ if(b_new > 0)
1059
+ loss_new += C[GETI(ind)]*b_new*b_new;
1060
+ x++;
1061
+ }
1062
+ }
1063
+ else
1064
+ {
1065
+ loss_new = 0;
1066
+ x = prob_col->x[j];
1067
+ while(x->index != -1)
1068
+ {
1069
+ int ind = x->index-1;
1070
+ double b_new = b[ind] + d_diff*x->value;
1071
+ b[ind] = b_new;
1072
+ if(b_new > 0)
1073
+ loss_new += C[GETI(ind)]*b_new*b_new;
1074
+ x++;
1075
+ }
1076
+ }
1077
+
1078
+ cond = cond + loss_new - loss_old;
1079
+ if(cond <= 0)
1080
+ break;
1081
+ else
1082
+ {
1083
+ d_old = d;
1084
+ d *= 0.5;
1085
+ delta *= 0.5;
1086
+ }
1087
+ }
1088
+
1089
+ w[j] += d;
1090
+
1091
+ // recompute b[] if line search takes too many steps
1092
+ if(num_linesearch >= max_num_linesearch)
1093
+ {
1094
+ info("#");
1095
+ for(int i=0; i<l; i++)
1096
+ b[i] = 1;
1097
+
1098
+ for(int i=0; i<w_size; i++)
1099
+ {
1100
+ if(w[i]==0) continue;
1101
+ x = prob_col->x[i];
1102
+ while(x->index != -1)
1103
+ {
1104
+ b[x->index-1] -= w[i]*x->value;
1105
+ x++;
1106
+ }
1107
+ }
1108
+ }
1109
+ }
1110
+
1111
+ if(iter == 0)
1112
+ Gmax_init = Gmax_new;
1113
+ iter++;
1114
+ if(iter % 10 == 0)
1115
+ info(".");
1116
+
1117
+ if(Gmax_new <= eps*Gmax_init)
1118
+ {
1119
+ if(active_size == w_size)
1120
+ break;
1121
+ else
1122
+ {
1123
+ active_size = w_size;
1124
+ info("*");
1125
+ Gmax_old = INF;
1126
+ continue;
1127
+ }
1128
+ }
1129
+
1130
+ Gmax_old = Gmax_new;
1131
+ }
1132
+
1133
+ info("\noptimization finished, #iter = %d\n", iter);
1134
+ if(iter >= max_iter)
1135
+ info("\nWARNING: reaching max number of iterations\n");
1136
+
1137
+ // calculate objective value
1138
+
1139
+ double v = 0;
1140
+ int nnz = 0;
1141
+ for(j=0; j<w_size; j++)
1142
+ {
1143
+ x = prob_col->x[j];
1144
+ while(x->index != -1)
1145
+ {
1146
+ x->value *= prob_col->y[x->index-1]; // restore x->value
1147
+ x++;
1148
+ }
1149
+ if(w[j] != 0)
1150
+ {
1151
+ v += fabs(w[j]);
1152
+ nnz++;
1153
+ }
1154
+ }
1155
+ for(j=0; j<l; j++)
1156
+ if(b[j] > 0)
1157
+ v += C[GETI(j)]*b[j]*b[j];
1158
+
1159
+ info("Objective value = %lf\n", v);
1160
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1161
+
1162
+ delete [] index;
1163
+ delete [] y;
1164
+ delete [] b;
1165
+ delete [] xj_sq;
1166
+ }
1167
+
1168
+ // A coordinate descent algorithm for
1169
+ // L1-regularized logistic regression problems
1170
+ //
1171
+ // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1172
+ //
1173
+ // Given:
1174
+ // x, y, Cp, Cn
1175
+ // eps is the stopping tolerance
1176
+ //
1177
+ // solution will be put in w
1178
+
1179
+ #undef GETI
1180
+ #define GETI(i) (y[i]+1)
1181
+ // To support weights for instances, use GETI(i) (i)
1182
+
1183
+ static void solve_l1r_lr(
1184
+ const problem *prob_col, double *w, double eps,
1185
+ double Cp, double Cn)
1186
+ {
1187
+ int l = prob_col->l;
1188
+ int w_size = prob_col->n;
1189
+ int j, s, iter = 0;
1190
+ int max_iter = 1000;
1191
+ int active_size = w_size;
1192
+ int max_num_linesearch = 20;
1193
+
1194
+ double x_min = 0;
1195
+ double sigma = 0.01;
1196
+ double d, G, H;
1197
+ double Gmax_old = INF;
1198
+ double Gmax_new;
1199
+ double Gmax_init;
1200
+ double sum1, appxcond1;
1201
+ double sum2, appxcond2;
1202
+ double cond;
1203
+
1204
+ int *index = new int[w_size];
1205
+ schar *y = new schar[l];
1206
+ double *exp_wTx = new double[l];
1207
+ double *exp_wTx_new = new double[l];
1208
+ double *xj_max = new double[w_size];
1209
+ double *C_sum = new double[w_size];
1210
+ double *xjneg_sum = new double[w_size];
1211
+ double *xjpos_sum = new double[w_size];
1212
+ feature_node *x;
1213
+
1214
+ double C[3] = {Cn,0,Cp};
1215
+
1216
+ for(j=0; j<l; j++)
1217
+ {
1218
+ exp_wTx[j] = 1;
1219
+ if(prob_col->y[j] > 0)
1220
+ y[j] = 1;
1221
+ else
1222
+ y[j] = -1;
1223
+ }
1224
+ for(j=0; j<w_size; j++)
1225
+ {
1226
+ w[j] = 0;
1227
+ index[j] = j;
1228
+ xj_max[j] = 0;
1229
+ C_sum[j] = 0;
1230
+ xjneg_sum[j] = 0;
1231
+ xjpos_sum[j] = 0;
1232
+ x = prob_col->x[j];
1233
+ while(x->index != -1)
1234
+ {
1235
+ int ind = x->index-1;
1236
+ double val = x->value;
1237
+ x_min = min(x_min, val);
1238
+ xj_max[j] = max(xj_max[j], val);
1239
+ C_sum[j] += C[GETI(ind)];
1240
+ if(y[ind] == -1)
1241
+ xjneg_sum[j] += C[GETI(ind)]*val;
1242
+ else
1243
+ xjpos_sum[j] += C[GETI(ind)]*val;
1244
+ x++;
1245
+ }
1246
+ }
1247
+
1248
+ while(iter < max_iter)
1249
+ {
1250
+ Gmax_new = 0;
1251
+
1252
+ for(j=0; j<active_size; j++)
1253
+ {
1254
+ int i = j+rand()%(active_size-j);
1255
+ swap(index[i], index[j]);
1256
+ }
1257
+
1258
+ for(s=0; s<active_size; s++)
1259
+ {
1260
+ j = index[s];
1261
+ sum1 = 0;
1262
+ sum2 = 0;
1263
+ H = 0;
1264
+
1265
+ x = prob_col->x[j];
1266
+ while(x->index != -1)
1267
+ {
1268
+ int ind = x->index-1;
1269
+ double exp_wTxind = exp_wTx[ind];
1270
+ double tmp1 = x->value/(1+exp_wTxind);
1271
+ double tmp2 = C[GETI(ind)]*tmp1;
1272
+ double tmp3 = tmp2*exp_wTxind;
1273
+ sum2 += tmp2;
1274
+ sum1 += tmp3;
1275
+ H += tmp1*tmp3;
1276
+ x++;
1277
+ }
1278
+
1279
+ G = -sum2 + xjneg_sum[j];
1280
+
1281
+ double Gp = G+1;
1282
+ double Gn = G-1;
1283
+ double violation = 0;
1284
+ if(w[j] == 0)
1285
+ {
1286
+ if(Gp < 0)
1287
+ violation = -Gp;
1288
+ else if(Gn > 0)
1289
+ violation = Gn;
1290
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1291
+ {
1292
+ active_size--;
1293
+ swap(index[s], index[active_size]);
1294
+ s--;
1295
+ continue;
1296
+ }
1297
+ }
1298
+ else if(w[j] > 0)
1299
+ violation = fabs(Gp);
1300
+ else
1301
+ violation = fabs(Gn);
1302
+
1303
+ Gmax_new = max(Gmax_new, violation);
1304
+
1305
+ // obtain Newton direction d
1306
+ if(Gp <= H*w[j])
1307
+ d = -Gp/H;
1308
+ else if(Gn >= H*w[j])
1309
+ d = -Gn/H;
1310
+ else
1311
+ d = -w[j];
1312
+
1313
+ if(fabs(d) < 1.0e-12)
1314
+ continue;
1315
+
1316
+ d = min(max(d,-10.0),10.0);
1317
+
1318
+ double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1319
+ int num_linesearch;
1320
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1321
+ {
1322
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1323
+
1324
+ if(x_min >= 0)
1325
+ {
1326
+ double tmp = exp(d*xj_max[j]);
1327
+ appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
1328
+ appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
1329
+ if(min(appxcond1,appxcond2) <= 0)
1330
+ {
1331
+ x = prob_col->x[j];
1332
+ while(x->index != -1)
1333
+ {
1334
+ exp_wTx[x->index-1] *= exp(d*x->value);
1335
+ x++;
1336
+ }
1337
+ break;
1338
+ }
1339
+ }
1340
+
1341
+ cond += d*xjneg_sum[j];
1342
+
1343
+ int i = 0;
1344
+ x = prob_col->x[j];
1345
+ while(x->index != -1)
1346
+ {
1347
+ int ind = x->index-1;
1348
+ double exp_dx = exp(d*x->value);
1349
+ exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1350
+ cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1351
+ x++; i++;
1352
+ }
1353
+
1354
+ if(cond <= 0)
1355
+ {
1356
+ int i = 0;
1357
+ x = prob_col->x[j];
1358
+ while(x->index != -1)
1359
+ {
1360
+ int ind = x->index-1;
1361
+ exp_wTx[ind] = exp_wTx_new[i];
1362
+ x++; i++;
1363
+ }
1364
+ break;
1365
+ }
1366
+ else
1367
+ {
1368
+ d *= 0.5;
1369
+ delta *= 0.5;
1370
+ }
1371
+ }
1372
+
1373
+ w[j] += d;
1374
+
1375
+ // recompute exp_wTx[] if line search takes too many steps
1376
+ if(num_linesearch >= max_num_linesearch)
1377
+ {
1378
+ info("#");
1379
+ for(int i=0; i<l; i++)
1380
+ exp_wTx[i] = 0;
1381
+
1382
+ for(int i=0; i<w_size; i++)
1383
+ {
1384
+ if(w[i]==0) continue;
1385
+ x = prob_col->x[i];
1386
+ while(x->index != -1)
1387
+ {
1388
+ exp_wTx[x->index-1] += w[i]*x->value;
1389
+ x++;
1390
+ }
1391
+ }
1392
+
1393
+ for(int i=0; i<l; i++)
1394
+ exp_wTx[i] = exp(exp_wTx[i]);
1395
+ }
1396
+ }
1397
+
1398
+ if(iter == 0)
1399
+ Gmax_init = Gmax_new;
1400
+ iter++;
1401
+ if(iter % 10 == 0)
1402
+ info(".");
1403
+
1404
+ if(Gmax_new <= eps*Gmax_init)
1405
+ {
1406
+ if(active_size == w_size)
1407
+ break;
1408
+ else
1409
+ {
1410
+ active_size = w_size;
1411
+ info("*");
1412
+ Gmax_old = INF;
1413
+ continue;
1414
+ }
1415
+ }
1416
+
1417
+ Gmax_old = Gmax_new;
1418
+ }
1419
+
1420
+ info("\noptimization finished, #iter = %d\n", iter);
1421
+ if(iter >= max_iter)
1422
+ info("\nWARNING: reaching max number of iterations\n");
1423
+
1424
+ // calculate objective value
1425
+
1426
+ double v = 0;
1427
+ int nnz = 0;
1428
+ for(j=0; j<w_size; j++)
1429
+ if(w[j] != 0)
1430
+ {
1431
+ v += fabs(w[j]);
1432
+ nnz++;
1433
+ }
1434
+ for(j=0; j<l; j++)
1435
+ if(y[j] == 1)
1436
+ v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1437
+ else
1438
+ v += C[GETI(j)]*log(1+exp_wTx[j]);
1439
+
1440
+ info("Objective value = %lf\n", v);
1441
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1442
+
1443
+ delete [] index;
1444
+ delete [] y;
1445
+ delete [] exp_wTx;
1446
+ delete [] exp_wTx_new;
1447
+ delete [] xj_max;
1448
+ delete [] C_sum;
1449
+ delete [] xjneg_sum;
1450
+ delete [] xjpos_sum;
1451
+ }
1452
+
1453
+ // transpose matrix X from row format to column format
1454
+ static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
1455
+ {
1456
+ int i;
1457
+ int l = prob->l;
1458
+ int n = prob->n;
1459
+ int nnz = 0;
1460
+ int *col_ptr = new int[n+1];
1461
+ feature_node *x_space;
1462
+ prob_col->l = l;
1463
+ prob_col->n = n;
1464
+ prob_col->y = new int[l];
1465
+ prob_col->x = new feature_node*[n];
1466
+
1467
+ for(i=0; i<l; i++)
1468
+ prob_col->y[i] = prob->y[i];
1469
+
1470
+ for(i=0; i<n+1; i++)
1471
+ col_ptr[i] = 0;
1472
+ for(i=0; i<l; i++)
1473
+ {
1474
+ feature_node *x = prob->x[i];
1475
+ while(x->index != -1)
1476
+ {
1477
+ nnz++;
1478
+ col_ptr[x->index]++;
1479
+ x++;
1480
+ }
1481
+ }
1482
+ for(i=1; i<n+1; i++)
1483
+ col_ptr[i] += col_ptr[i-1] + 1;
1484
+
1485
+ x_space = new feature_node[nnz+n];
1486
+ for(i=0; i<n; i++)
1487
+ prob_col->x[i] = &x_space[col_ptr[i]];
1488
+
1489
+ for(i=0; i<l; i++)
1490
+ {
1491
+ feature_node *x = prob->x[i];
1492
+ while(x->index != -1)
1493
+ {
1494
+ int ind = x->index-1;
1495
+ x_space[col_ptr[ind]].index = i+1; // starts from 1
1496
+ x_space[col_ptr[ind]].value = x->value;
1497
+ col_ptr[ind]++;
1498
+ x++;
1499
+ }
1500
+ }
1501
+ for(i=0; i<n; i++)
1502
+ x_space[col_ptr[i]].index = -1;
1503
+
1504
+ *x_space_ret = x_space;
1505
+
1506
+ delete [] col_ptr;
1507
+ }
1508
+
1509
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
1510
+ // perm, length l, must be allocated before calling this subroutine
1511
+ static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
1512
+ {
1513
+ int l = prob->l;
1514
+ int max_nr_class = 16;
1515
+ int nr_class = 0;
1516
+ int *label = Malloc(int,max_nr_class);
1517
+ int *count = Malloc(int,max_nr_class);
1518
+ int *data_label = Malloc(int,l);
1519
+ int i;
1520
+
1521
+ for(i=0;i<l;i++)
1522
+ {
1523
+ int this_label = prob->y[i];
1524
+ int j;
1525
+ for(j=0;j<nr_class;j++)
1526
+ {
1527
+ if(this_label == label[j])
1528
+ {
1529
+ ++count[j];
1530
+ break;
1531
+ }
1532
+ }
1533
+ data_label[i] = j;
1534
+ if(j == nr_class)
1535
+ {
1536
+ if(nr_class == max_nr_class)
1537
+ {
1538
+ max_nr_class *= 2;
1539
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
1540
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
1541
+ }
1542
+ label[nr_class] = this_label;
1543
+ count[nr_class] = 1;
1544
+ ++nr_class;
1545
+ }
1546
+ }
1547
+
1548
+ int *start = Malloc(int,nr_class);
1549
+ start[0] = 0;
1550
+ for(i=1;i<nr_class;i++)
1551
+ start[i] = start[i-1]+count[i-1];
1552
+ for(i=0;i<l;i++)
1553
+ {
1554
+ perm[start[data_label[i]]] = i;
1555
+ ++start[data_label[i]];
1556
+ }
1557
+ start[0] = 0;
1558
+ for(i=1;i<nr_class;i++)
1559
+ start[i] = start[i-1]+count[i-1];
1560
+
1561
+ *nr_class_ret = nr_class;
1562
+ *label_ret = label;
1563
+ *start_ret = start;
1564
+ *count_ret = count;
1565
+ free(data_label);
1566
+ }
1567
+
1568
+ static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
1569
+ {
1570
+ double eps=param->eps;
1571
+ int pos = 0;
1572
+ int neg = 0;
1573
+ for(int i=0;i<prob->l;i++)
1574
+ if(prob->y[i]==+1)
1575
+ pos++;
1576
+ neg = prob->l - pos;
1577
+
1578
+ function *fun_obj=NULL;
1579
+ switch(param->solver_type)
1580
+ {
1581
+ case L2R_LR:
1582
+ {
1583
+ fun_obj=new l2r_lr_fun(prob, Cp, Cn);
1584
+ TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1585
+ tron_obj.set_print_string(liblinear_print_string);
1586
+ tron_obj.tron(w);
1587
+ delete fun_obj;
1588
+ break;
1589
+ }
1590
+ case L2R_L2LOSS_SVC:
1591
+ {
1592
+ fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn);
1593
+ TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1594
+ tron_obj.set_print_string(liblinear_print_string);
1595
+ tron_obj.tron(w);
1596
+ delete fun_obj;
1597
+ break;
1598
+ }
1599
+ case L2R_L2LOSS_SVC_DUAL:
1600
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
1601
+ break;
1602
+ case L2R_L1LOSS_SVC_DUAL:
1603
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
1604
+ break;
1605
+ case L1R_L2LOSS_SVC:
1606
+ {
1607
+ problem prob_col;
1608
+ feature_node *x_space = NULL;
1609
+ transpose(prob, &x_space ,&prob_col);
1610
+ solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1611
+ delete [] prob_col.y;
1612
+ delete [] prob_col.x;
1613
+ delete [] x_space;
1614
+ break;
1615
+ }
1616
+ case L1R_LR:
1617
+ {
1618
+ problem prob_col;
1619
+ feature_node *x_space = NULL;
1620
+ transpose(prob, &x_space ,&prob_col);
1621
+ solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1622
+ delete [] prob_col.y;
1623
+ delete [] prob_col.x;
1624
+ delete [] x_space;
1625
+ break;
1626
+ }
1627
+ default:
1628
+ fprintf(stderr, "Error: unknown solver_type\n");
1629
+ break;
1630
+ }
1631
+ }
1632
+
1633
+ //
1634
+ // Interface functions
1635
+ //
1636
+ model* train(const problem *prob, const parameter *param)
1637
+ {
1638
+ int i,j;
1639
+ int l = prob->l;
1640
+ int n = prob->n;
1641
+ int w_size = prob->n;
1642
+ model *model_ = Malloc(model,1);
1643
+
1644
+ if(prob->bias>=0)
1645
+ model_->nr_feature=n-1;
1646
+ else
1647
+ model_->nr_feature=n;
1648
+ model_->param = *param;
1649
+ model_->bias = prob->bias;
1650
+
1651
+ int nr_class;
1652
+ int *label = NULL;
1653
+ int *start = NULL;
1654
+ int *count = NULL;
1655
+ int *perm = Malloc(int,l);
1656
+
1657
+ // group training data of the same class
1658
+ group_classes(prob,&nr_class,&label,&start,&count,perm);
1659
+
1660
+ model_->nr_class=nr_class;
1661
+ model_->label = Malloc(int,nr_class);
1662
+ for(i=0;i<nr_class;i++)
1663
+ model_->label[i] = label[i];
1664
+
1665
+ // calculate weighted C
1666
+ double *weighted_C = Malloc(double, nr_class);
1667
+ for(i=0;i<nr_class;i++)
1668
+ weighted_C[i] = param->C;
1669
+ for(i=0;i<param->nr_weight;i++)
1670
+ {
1671
+ for(j=0;j<nr_class;j++)
1672
+ if(param->weight_label[i] == label[j])
1673
+ break;
1674
+ if(j == nr_class)
1675
+ fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
1676
+ else
1677
+ weighted_C[j] *= param->weight[i];
1678
+ }
1679
+
1680
+ // constructing the subproblem
1681
+ feature_node **x = Malloc(feature_node *,l);
1682
+ for(i=0;i<l;i++)
1683
+ x[i] = prob->x[perm[i]];
1684
+
1685
+ int k;
1686
+ problem sub_prob;
1687
+ sub_prob.l = l;
1688
+ sub_prob.n = n;
1689
+ sub_prob.x = Malloc(feature_node *,sub_prob.l);
1690
+ sub_prob.y = Malloc(int,sub_prob.l);
1691
+
1692
+ for(k=0; k<sub_prob.l; k++)
1693
+ sub_prob.x[k] = x[k];
1694
+
1695
+ // multi-class svm by Crammer and Singer
1696
+ if(param->solver_type == MCSVM_CS)
1697
+ {
1698
+ model_->w=Malloc(double, n*nr_class);
1699
+ for(i=0;i<nr_class;i++)
1700
+ for(j=start[i];j<start[i]+count[i];j++)
1701
+ sub_prob.y[j] = i;
1702
+ Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
1703
+ Solver.Solve(model_->w);
1704
+ }
1705
+ else
1706
+ {
1707
+ if(nr_class == 2)
1708
+ {
1709
+ model_->w=Malloc(double, w_size);
1710
+
1711
+ int e0 = start[0]+count[0];
1712
+ k=0;
1713
+ for(; k<e0; k++)
1714
+ sub_prob.y[k] = +1;
1715
+ for(; k<sub_prob.l; k++)
1716
+ sub_prob.y[k] = -1;
1717
+
1718
+ train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
1719
+ }
1720
+ else
1721
+ {
1722
+ model_->w=Malloc(double, w_size*nr_class);
1723
+ double *w=Malloc(double, w_size);
1724
+ for(i=0;i<nr_class;i++)
1725
+ {
1726
+ int si = start[i];
1727
+ int ei = si+count[i];
1728
+
1729
+ k=0;
1730
+ for(; k<si; k++)
1731
+ sub_prob.y[k] = -1;
1732
+ for(; k<ei; k++)
1733
+ sub_prob.y[k] = +1;
1734
+ for(; k<sub_prob.l; k++)
1735
+ sub_prob.y[k] = -1;
1736
+
1737
+ train_one(&sub_prob, param, w, weighted_C[i], param->C);
1738
+
1739
+ for(int j=0;j<w_size;j++)
1740
+ model_->w[j*nr_class+i] = w[j];
1741
+ }
1742
+ free(w);
1743
+ }
1744
+
1745
+ }
1746
+
1747
+ free(x);
1748
+ free(label);
1749
+ free(start);
1750
+ free(count);
1751
+ free(perm);
1752
+ free(sub_prob.x);
1753
+ free(sub_prob.y);
1754
+ free(weighted_C);
1755
+ return model_;
1756
+ }
1757
+
1758
+ void destroy_model(struct model *model_)
1759
+ {
1760
+ if(model_->w != NULL)
1761
+ free(model_->w);
1762
+ if(model_->label != NULL)
1763
+ free(model_->label);
1764
+ free(model_);
1765
+ }
1766
+
1767
+ static const char *solver_type_table[]=
1768
+ {
1769
+ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC","L2R_L1LOSS_SVC_DUAL","MCSVM_CS", "L1R_L2LOSS_SVC","L1R_LR", NULL
1770
+ };
1771
+
1772
+ int save_model(const char *model_file_name, const struct model *model_)
1773
+ {
1774
+ int i;
1775
+ int nr_feature=model_->nr_feature;
1776
+ int n;
1777
+ const parameter& param = model_->param;
1778
+
1779
+ if(model_->bias>=0)
1780
+ n=nr_feature+1;
1781
+ else
1782
+ n=nr_feature;
1783
+ int w_size = n;
1784
+ FILE *fp = fopen(model_file_name,"w");
1785
+ if(fp==NULL) return -1;
1786
+
1787
+ int nr_w;
1788
+ if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
1789
+ nr_w=1;
1790
+ else
1791
+ nr_w=model_->nr_class;
1792
+
1793
+ fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
1794
+ fprintf(fp, "nr_class %d\n", model_->nr_class);
1795
+ fprintf(fp, "label");
1796
+ for(i=0; i<model_->nr_class; i++)
1797
+ fprintf(fp, " %d", model_->label[i]);
1798
+ fprintf(fp, "\n");
1799
+
1800
+ fprintf(fp, "nr_feature %d\n", nr_feature);
1801
+
1802
+ fprintf(fp, "bias %.16g\n", model_->bias);
1803
+
1804
+ fprintf(fp, "w\n");
1805
+ for(i=0; i<w_size; i++)
1806
+ {
1807
+ int j;
1808
+ for(j=0; j<nr_w; j++)
1809
+ fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
1810
+ fprintf(fp, "\n");
1811
+ }
1812
+
1813
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
1814
+ else return 0;
1815
+ }
1816
+
1817
+ struct model *load_model(const char *model_file_name)
1818
+ {
1819
+ FILE *fp = fopen(model_file_name,"r");
1820
+ if(fp==NULL) return NULL;
1821
+
1822
+ int i;
1823
+ int nr_feature;
1824
+ int n;
1825
+ int nr_class;
1826
+ double bias;
1827
+ model *model_ = Malloc(model,1);
1828
+ parameter& param = model_->param;
1829
+
1830
+ model_->label = NULL;
1831
+
1832
+ char cmd[81];
1833
+ while(1)
1834
+ {
1835
+ fscanf(fp,"%80s",cmd);
1836
+ if(strcmp(cmd,"solver_type")==0)
1837
+ {
1838
+ fscanf(fp,"%80s",cmd);
1839
+ int i;
1840
+ for(i=0;solver_type_table[i];i++)
1841
+ {
1842
+ if(strcmp(solver_type_table[i],cmd)==0)
1843
+ {
1844
+ param.solver_type=i;
1845
+ break;
1846
+ }
1847
+ }
1848
+ if(solver_type_table[i] == NULL)
1849
+ {
1850
+ fprintf(stderr,"unknown solver type.\n");
1851
+ free(model_->label);
1852
+ free(model_);
1853
+ return NULL;
1854
+ }
1855
+ }
1856
+ else if(strcmp(cmd,"nr_class")==0)
1857
+ {
1858
+ fscanf(fp,"%d",&nr_class);
1859
+ model_->nr_class=nr_class;
1860
+ }
1861
+ else if(strcmp(cmd,"nr_feature")==0)
1862
+ {
1863
+ fscanf(fp,"%d",&nr_feature);
1864
+ model_->nr_feature=nr_feature;
1865
+ }
1866
+ else if(strcmp(cmd,"bias")==0)
1867
+ {
1868
+ fscanf(fp,"%lf",&bias);
1869
+ model_->bias=bias;
1870
+ }
1871
+ else if(strcmp(cmd,"w")==0)
1872
+ {
1873
+ break;
1874
+ }
1875
+ else if(strcmp(cmd,"label")==0)
1876
+ {
1877
+ int nr_class = model_->nr_class;
1878
+ model_->label = Malloc(int,nr_class);
1879
+ for(int i=0;i<nr_class;i++)
1880
+ fscanf(fp,"%d",&model_->label[i]);
1881
+ }
1882
+ else
1883
+ {
1884
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
1885
+ free(model_);
1886
+ return NULL;
1887
+ }
1888
+ }
1889
+
1890
+ nr_feature=model_->nr_feature;
1891
+ if(model_->bias>=0)
1892
+ n=nr_feature+1;
1893
+ else
1894
+ n=nr_feature;
1895
+ int w_size = n;
1896
+ int nr_w;
1897
+ if(nr_class==2 && param.solver_type != MCSVM_CS)
1898
+ nr_w = 1;
1899
+ else
1900
+ nr_w = nr_class;
1901
+
1902
+ model_->w=Malloc(double, w_size*nr_w);
1903
+ for(i=0; i<w_size; i++)
1904
+ {
1905
+ int j;
1906
+ for(j=0; j<nr_w; j++)
1907
+ fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
1908
+ fscanf(fp, "\n");
1909
+ }
1910
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
1911
+
1912
+ return model_;
1913
+ }
1914
+
1915
+ int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
1916
+ {
1917
+ int idx;
1918
+ int n;
1919
+ if(model_->bias>=0)
1920
+ n=model_->nr_feature+1;
1921
+ else
1922
+ n=model_->nr_feature;
1923
+ double *w=model_->w;
1924
+ int nr_class=model_->nr_class;
1925
+ int i;
1926
+ int nr_w;
1927
+ if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
1928
+ nr_w = 1;
1929
+ else
1930
+ nr_w = nr_class;
1931
+
1932
+ const feature_node *lx=x;
1933
+ for(i=0;i<nr_w;i++)
1934
+ dec_values[i] = 0;
1935
+ for(; (idx=lx->index)!=-1; lx++)
1936
+ {
1937
+ // the dimension of testing data may exceed that of training
1938
+ if(idx<=n)
1939
+ for(i=0;i<nr_w;i++)
1940
+ dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
1941
+ }
1942
+
1943
+ if(nr_class==2)
1944
+ return (dec_values[0]>0)?model_->label[0]:model_->label[1];
1945
+ else
1946
+ {
1947
+ int dec_max_idx = 0;
1948
+ for(i=1;i<nr_class;i++)
1949
+ {
1950
+ if(dec_values[i] > dec_values[dec_max_idx])
1951
+ dec_max_idx = i;
1952
+ }
1953
+ return model_->label[dec_max_idx];
1954
+ }
1955
+ }
1956
+
1957
+ int predict(const model *model_, const feature_node *x)
1958
+ {
1959
+ double *dec_values = Malloc(double, model_->nr_class);
1960
+ int label=predict_values(model_, x, dec_values);
1961
+ free(dec_values);
1962
+ return label;
1963
+ }
1964
+
1965
+ int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
1966
+ {
1967
+ if(model_->param.solver_type==L2R_LR)
1968
+ {
1969
+ int i;
1970
+ int nr_class=model_->nr_class;
1971
+ int nr_w;
1972
+ if(nr_class==2)
1973
+ nr_w = 1;
1974
+ else
1975
+ nr_w = nr_class;
1976
+
1977
+ int label=predict_values(model_, x, prob_estimates);
1978
+ for(i=0;i<nr_w;i++)
1979
+ prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
1980
+
1981
+ if(nr_class==2) // for binary classification
1982
+ prob_estimates[1]=1.-prob_estimates[0];
1983
+ else
1984
+ {
1985
+ double sum=0;
1986
+ for(i=0; i<nr_class; i++)
1987
+ sum+=prob_estimates[i];
1988
+
1989
+ for(i=0; i<nr_class; i++)
1990
+ prob_estimates[i]=prob_estimates[i]/sum;
1991
+ }
1992
+
1993
+ return label;
1994
+ }
1995
+ else
1996
+ return 0;
1997
+ }
1998
+
1999
+ void destroy_param(parameter* param)
2000
+ {
2001
+ if(param->weight_label != NULL)
2002
+ free(param->weight_label);
2003
+ if(param->weight != NULL)
2004
+ free(param->weight);
2005
+ }
2006
+
2007
+ const char *check_parameter(const problem *prob, const parameter *param)
2008
+ {
2009
+ if(param->eps <= 0)
2010
+ return "eps <= 0";
2011
+
2012
+ if(param->C <= 0)
2013
+ return "C <= 0";
2014
+
2015
+ if(param->solver_type != L2R_LR
2016
+ && param->solver_type != L2R_L2LOSS_SVC_DUAL
2017
+ && param->solver_type != L2R_L2LOSS_SVC
2018
+ && param->solver_type != L2R_L1LOSS_SVC_DUAL
2019
+ && param->solver_type != MCSVM_CS
2020
+ && param->solver_type != L1R_L2LOSS_SVC
2021
+ && param->solver_type != L1R_LR)
2022
+ return "unknown solver type";
2023
+
2024
+ return NULL;
2025
+ }
2026
+
2027
+ void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
2028
+ {
2029
+ int i;
2030
+ int *fold_start = Malloc(int,nr_fold+1);
2031
+ int l = prob->l;
2032
+ int *perm = Malloc(int,l);
2033
+
2034
+ for(i=0;i<l;i++) perm[i]=i;
2035
+ for(i=0;i<l;i++)
2036
+ {
2037
+ int j = i+rand()%(l-i);
2038
+ swap(perm[i],perm[j]);
2039
+ }
2040
+ for(i=0;i<=nr_fold;i++)
2041
+ fold_start[i]=i*l/nr_fold;
2042
+
2043
+ for(i=0;i<nr_fold;i++)
2044
+ {
2045
+ int begin = fold_start[i];
2046
+ int end = fold_start[i+1];
2047
+ int j,k;
2048
+ struct problem subprob;
2049
+
2050
+ subprob.bias = prob->bias;
2051
+ subprob.n = prob->n;
2052
+ subprob.l = l-(end-begin);
2053
+ subprob.x = Malloc(struct feature_node*,subprob.l);
2054
+ subprob.y = Malloc(int,subprob.l);
2055
+
2056
+ k=0;
2057
+ for(j=0;j<begin;j++)
2058
+ {
2059
+ subprob.x[k] = prob->x[perm[j]];
2060
+ subprob.y[k] = prob->y[perm[j]];
2061
+ ++k;
2062
+ }
2063
+ for(j=end;j<l;j++)
2064
+ {
2065
+ subprob.x[k] = prob->x[perm[j]];
2066
+ subprob.y[k] = prob->y[perm[j]];
2067
+ ++k;
2068
+ }
2069
+ struct model *submodel = train(&subprob,param);
2070
+ for(j=begin;j<end;j++)
2071
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2072
+ destroy_model(submodel);
2073
+ free(subprob.x);
2074
+ free(subprob.y);
2075
+ }
2076
+ free(fold_start);
2077
+ free(perm);
2078
+ }
2079
+
2080
+ int get_nr_feature(const model *model_)
2081
+ {
2082
+ return model_->nr_feature;
2083
+ }
2084
+
2085
+ int get_nr_class(const model *model_)
2086
+ {
2087
+ return model_->nr_class;
2088
+ }
2089
+
2090
+ void get_labels(const model *model_, int* label)
2091
+ {
2092
+ if (model_->label != NULL)
2093
+ for(int i=0;i<model_->nr_class;i++)
2094
+ label[i] = model_->label[i];
2095
+ }
2096
+