liblinear-ruby 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +46 -0
  6. data/Rakefile +1 -0
  7. data/ext/Makefile +237 -0
  8. data/ext/blas.h +25 -0
  9. data/ext/blasp.h +430 -0
  10. data/ext/daxpy.c +49 -0
  11. data/ext/ddot.c +50 -0
  12. data/ext/dnrm2.c +62 -0
  13. data/ext/dscal.c +44 -0
  14. data/ext/extconf.rb +12 -0
  15. data/ext/liblinear_wrap.cxx +4646 -0
  16. data/ext/linear.cpp +2811 -0
  17. data/ext/linear.h +74 -0
  18. data/ext/linear.rb +357 -0
  19. data/ext/tron.cpp +235 -0
  20. data/ext/tron.h +34 -0
  21. data/lib/liblinear.rb +89 -0
  22. data/lib/liblinear/error.rb +4 -0
  23. data/lib/liblinear/model.rb +66 -0
  24. data/lib/liblinear/parameter.rb +42 -0
  25. data/lib/liblinear/problem.rb +55 -0
  26. data/lib/liblinear/version.rb +3 -0
  27. data/liblinear-1.93/COPYRIGHT +31 -0
  28. data/liblinear-1.93/Makefile +37 -0
  29. data/liblinear-1.93/Makefile.win +30 -0
  30. data/liblinear-1.93/README +531 -0
  31. data/liblinear-1.93/blas/Makefile +22 -0
  32. data/liblinear-1.93/blas/blas.a +0 -0
  33. data/liblinear-1.93/blas/blas.h +25 -0
  34. data/liblinear-1.93/blas/blasp.h +430 -0
  35. data/liblinear-1.93/blas/daxpy.c +49 -0
  36. data/liblinear-1.93/blas/daxpy.o +0 -0
  37. data/liblinear-1.93/blas/ddot.c +50 -0
  38. data/liblinear-1.93/blas/ddot.o +0 -0
  39. data/liblinear-1.93/blas/dnrm2.c +62 -0
  40. data/liblinear-1.93/blas/dnrm2.o +0 -0
  41. data/liblinear-1.93/blas/dscal.c +44 -0
  42. data/liblinear-1.93/blas/dscal.o +0 -0
  43. data/liblinear-1.93/heart_scale +270 -0
  44. data/liblinear-1.93/linear.cpp +2811 -0
  45. data/liblinear-1.93/linear.def +18 -0
  46. data/liblinear-1.93/linear.h +74 -0
  47. data/liblinear-1.93/linear.o +0 -0
  48. data/liblinear-1.93/matlab/Makefile +58 -0
  49. data/liblinear-1.93/matlab/README +197 -0
  50. data/liblinear-1.93/matlab/libsvmread.c +212 -0
  51. data/liblinear-1.93/matlab/libsvmwrite.c +106 -0
  52. data/liblinear-1.93/matlab/linear_model_matlab.c +176 -0
  53. data/liblinear-1.93/matlab/linear_model_matlab.h +2 -0
  54. data/liblinear-1.93/matlab/make.m +21 -0
  55. data/liblinear-1.93/matlab/predict.c +331 -0
  56. data/liblinear-1.93/matlab/train.c +418 -0
  57. data/liblinear-1.93/predict +0 -0
  58. data/liblinear-1.93/predict.c +245 -0
  59. data/liblinear-1.93/python/Makefile +4 -0
  60. data/liblinear-1.93/python/README +343 -0
  61. data/liblinear-1.93/python/liblinear.py +277 -0
  62. data/liblinear-1.93/python/liblinearutil.py +250 -0
  63. data/liblinear-1.93/ruby/liblinear.i +41 -0
  64. data/liblinear-1.93/ruby/liblinear_wrap.cxx +4646 -0
  65. data/liblinear-1.93/ruby/linear.h +74 -0
  66. data/liblinear-1.93/ruby/linear.o +0 -0
  67. data/liblinear-1.93/train +0 -0
  68. data/liblinear-1.93/train.c +399 -0
  69. data/liblinear-1.93/tron.cpp +235 -0
  70. data/liblinear-1.93/tron.h +34 -0
  71. data/liblinear-1.93/tron.o +0 -0
  72. data/liblinear-1.93/windows/liblinear.dll +0 -0
  73. data/liblinear-1.93/windows/libsvmread.mexw64 +0 -0
  74. data/liblinear-1.93/windows/libsvmwrite.mexw64 +0 -0
  75. data/liblinear-1.93/windows/predict.exe +0 -0
  76. data/liblinear-1.93/windows/predict.mexw64 +0 -0
  77. data/liblinear-1.93/windows/train.exe +0 -0
  78. data/liblinear-1.93/windows/train.mexw64 +0 -0
  79. data/liblinear-ruby.gemspec +24 -0
  80. metadata +152 -0
data/ext/linear.cpp ADDED
@@ -0,0 +1,2811 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <stdarg.h>
6
+ #include <locale.h>
7
+ #include "linear.h"
8
+ #include "tron.h"
9
+ typedef signed char schar;
10
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
11
+ #ifndef min
12
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
13
+ #endif
14
+ #ifndef max
15
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
16
+ #endif
17
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
18
+ {
19
+ dst = new T[n];
20
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
21
+ }
22
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
23
+ #define INF HUGE_VAL
24
+
25
+ static void print_string_stdout(const char *s)
26
+ {
27
+ fputs(s,stdout);
28
+ fflush(stdout);
29
+ }
30
+
31
+ static void (*liblinear_print_string) (const char *) = &print_string_stdout;
32
+
33
+ #if 1
34
+ static void info(const char *fmt,...)
35
+ {
36
+ char buf[BUFSIZ];
37
+ va_list ap;
38
+ va_start(ap,fmt);
39
+ vsprintf(buf,fmt,ap);
40
+ va_end(ap);
41
+ (*liblinear_print_string)(buf);
42
+ }
43
+ #else
44
+ static void info(const char *fmt,...) {}
45
+ #endif
46
+
47
+ class l2r_lr_fun: public function
48
+ {
49
+ public:
50
+ l2r_lr_fun(const problem *prob, double *C);
51
+ ~l2r_lr_fun();
52
+
53
+ double fun(double *w);
54
+ void grad(double *w, double *g);
55
+ void Hv(double *s, double *Hs);
56
+
57
+ int get_nr_variable(void);
58
+
59
+ private:
60
+ void Xv(double *v, double *Xv);
61
+ void XTv(double *v, double *XTv);
62
+
63
+ double *C;
64
+ double *z;
65
+ double *D;
66
+ const problem *prob;
67
+ };
68
+
69
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
70
+ {
71
+ int l=prob->l;
72
+
73
+ this->prob = prob;
74
+
75
+ z = new double[l];
76
+ D = new double[l];
77
+ this->C = C;
78
+ }
79
+
80
+ l2r_lr_fun::~l2r_lr_fun()
81
+ {
82
+ delete[] z;
83
+ delete[] D;
84
+ }
85
+
86
+
87
+ double l2r_lr_fun::fun(double *w)
88
+ {
89
+ int i;
90
+ double f=0;
91
+ double *y=prob->y;
92
+ int l=prob->l;
93
+ int w_size=get_nr_variable();
94
+
95
+ Xv(w, z);
96
+
97
+ for(i=0;i<w_size;i++)
98
+ f += w[i]*w[i];
99
+ f /= 2.0;
100
+ for(i=0;i<l;i++)
101
+ {
102
+ double yz = y[i]*z[i];
103
+ if (yz >= 0)
104
+ f += C[i]*log(1 + exp(-yz));
105
+ else
106
+ f += C[i]*(-yz+log(1 + exp(yz)));
107
+ }
108
+
109
+ return(f);
110
+ }
111
+
112
+ void l2r_lr_fun::grad(double *w, double *g)
113
+ {
114
+ int i;
115
+ double *y=prob->y;
116
+ int l=prob->l;
117
+ int w_size=get_nr_variable();
118
+
119
+ for(i=0;i<l;i++)
120
+ {
121
+ z[i] = 1/(1 + exp(-y[i]*z[i]));
122
+ D[i] = z[i]*(1-z[i]);
123
+ z[i] = C[i]*(z[i]-1)*y[i];
124
+ }
125
+ XTv(z, g);
126
+
127
+ for(i=0;i<w_size;i++)
128
+ g[i] = w[i] + g[i];
129
+ }
130
+
131
+ int l2r_lr_fun::get_nr_variable(void)
132
+ {
133
+ return prob->n;
134
+ }
135
+
136
+ void l2r_lr_fun::Hv(double *s, double *Hs)
137
+ {
138
+ int i;
139
+ int l=prob->l;
140
+ int w_size=get_nr_variable();
141
+ double *wa = new double[l];
142
+
143
+ Xv(s, wa);
144
+ for(i=0;i<l;i++)
145
+ wa[i] = C[i]*D[i]*wa[i];
146
+
147
+ XTv(wa, Hs);
148
+ for(i=0;i<w_size;i++)
149
+ Hs[i] = s[i] + Hs[i];
150
+ delete[] wa;
151
+ }
152
+
153
+ void l2r_lr_fun::Xv(double *v, double *Xv)
154
+ {
155
+ int i;
156
+ int l=prob->l;
157
+ feature_node **x=prob->x;
158
+
159
+ for(i=0;i<l;i++)
160
+ {
161
+ feature_node *s=x[i];
162
+ Xv[i]=0;
163
+ while(s->index!=-1)
164
+ {
165
+ Xv[i]+=v[s->index-1]*s->value;
166
+ s++;
167
+ }
168
+ }
169
+ }
170
+
171
+ void l2r_lr_fun::XTv(double *v, double *XTv)
172
+ {
173
+ int i;
174
+ int l=prob->l;
175
+ int w_size=get_nr_variable();
176
+ feature_node **x=prob->x;
177
+
178
+ for(i=0;i<w_size;i++)
179
+ XTv[i]=0;
180
+ for(i=0;i<l;i++)
181
+ {
182
+ feature_node *s=x[i];
183
+ while(s->index!=-1)
184
+ {
185
+ XTv[s->index-1]+=v[i]*s->value;
186
+ s++;
187
+ }
188
+ }
189
+ }
190
+
191
+ class l2r_l2_svc_fun: public function
192
+ {
193
+ public:
194
+ l2r_l2_svc_fun(const problem *prob, double *C);
195
+ ~l2r_l2_svc_fun();
196
+
197
+ double fun(double *w);
198
+ void grad(double *w, double *g);
199
+ void Hv(double *s, double *Hs);
200
+
201
+ int get_nr_variable(void);
202
+
203
+ protected:
204
+ void Xv(double *v, double *Xv);
205
+ void subXv(double *v, double *Xv);
206
+ void subXTv(double *v, double *XTv);
207
+
208
+ double *C;
209
+ double *z;
210
+ double *D;
211
+ int *I;
212
+ int sizeI;
213
+ const problem *prob;
214
+ };
215
+
216
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
217
+ {
218
+ int l=prob->l;
219
+
220
+ this->prob = prob;
221
+
222
+ z = new double[l];
223
+ D = new double[l];
224
+ I = new int[l];
225
+ this->C = C;
226
+ }
227
+
228
+ l2r_l2_svc_fun::~l2r_l2_svc_fun()
229
+ {
230
+ delete[] z;
231
+ delete[] D;
232
+ delete[] I;
233
+ }
234
+
235
+ double l2r_l2_svc_fun::fun(double *w)
236
+ {
237
+ int i;
238
+ double f=0;
239
+ double *y=prob->y;
240
+ int l=prob->l;
241
+ int w_size=get_nr_variable();
242
+
243
+ Xv(w, z);
244
+
245
+ for(i=0;i<w_size;i++)
246
+ f += w[i]*w[i];
247
+ f /= 2.0;
248
+ for(i=0;i<l;i++)
249
+ {
250
+ z[i] = y[i]*z[i];
251
+ double d = 1-z[i];
252
+ if (d > 0)
253
+ f += C[i]*d*d;
254
+ }
255
+
256
+ return(f);
257
+ }
258
+
259
+ void l2r_l2_svc_fun::grad(double *w, double *g)
260
+ {
261
+ int i;
262
+ double *y=prob->y;
263
+ int l=prob->l;
264
+ int w_size=get_nr_variable();
265
+
266
+ sizeI = 0;
267
+ for (i=0;i<l;i++)
268
+ if (z[i] < 1)
269
+ {
270
+ z[sizeI] = C[i]*y[i]*(z[i]-1);
271
+ I[sizeI] = i;
272
+ sizeI++;
273
+ }
274
+ subXTv(z, g);
275
+
276
+ for(i=0;i<w_size;i++)
277
+ g[i] = w[i] + 2*g[i];
278
+ }
279
+
280
+ int l2r_l2_svc_fun::get_nr_variable(void)
281
+ {
282
+ return prob->n;
283
+ }
284
+
285
+ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
286
+ {
287
+ int i;
288
+ int w_size=get_nr_variable();
289
+ double *wa = new double[sizeI];
290
+
291
+ subXv(s, wa);
292
+ for(i=0;i<sizeI;i++)
293
+ wa[i] = C[I[i]]*wa[i];
294
+
295
+ subXTv(wa, Hs);
296
+ for(i=0;i<w_size;i++)
297
+ Hs[i] = s[i] + 2*Hs[i];
298
+ delete[] wa;
299
+ }
300
+
301
+ void l2r_l2_svc_fun::Xv(double *v, double *Xv)
302
+ {
303
+ int i;
304
+ int l=prob->l;
305
+ feature_node **x=prob->x;
306
+
307
+ for(i=0;i<l;i++)
308
+ {
309
+ feature_node *s=x[i];
310
+ Xv[i]=0;
311
+ while(s->index!=-1)
312
+ {
313
+ Xv[i]+=v[s->index-1]*s->value;
314
+ s++;
315
+ }
316
+ }
317
+ }
318
+
319
+ void l2r_l2_svc_fun::subXv(double *v, double *Xv)
320
+ {
321
+ int i;
322
+ feature_node **x=prob->x;
323
+
324
+ for(i=0;i<sizeI;i++)
325
+ {
326
+ feature_node *s=x[I[i]];
327
+ Xv[i]=0;
328
+ while(s->index!=-1)
329
+ {
330
+ Xv[i]+=v[s->index-1]*s->value;
331
+ s++;
332
+ }
333
+ }
334
+ }
335
+
336
+ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
337
+ {
338
+ int i;
339
+ int w_size=get_nr_variable();
340
+ feature_node **x=prob->x;
341
+
342
+ for(i=0;i<w_size;i++)
343
+ XTv[i]=0;
344
+ for(i=0;i<sizeI;i++)
345
+ {
346
+ feature_node *s=x[I[i]];
347
+ while(s->index!=-1)
348
+ {
349
+ XTv[s->index-1]+=v[i]*s->value;
350
+ s++;
351
+ }
352
+ }
353
+ }
354
+
355
+ class l2r_l2_svr_fun: public l2r_l2_svc_fun
356
+ {
357
+ public:
358
+ l2r_l2_svr_fun(const problem *prob, double *C, double p);
359
+
360
+ double fun(double *w);
361
+ void grad(double *w, double *g);
362
+
363
+ private:
364
+ double p;
365
+ };
366
+
367
+ l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
368
+ l2r_l2_svc_fun(prob, C)
369
+ {
370
+ this->p = p;
371
+ }
372
+
373
+ double l2r_l2_svr_fun::fun(double *w)
374
+ {
375
+ int i;
376
+ double f=0;
377
+ double *y=prob->y;
378
+ int l=prob->l;
379
+ int w_size=get_nr_variable();
380
+ double d;
381
+
382
+ Xv(w, z);
383
+
384
+ for(i=0;i<w_size;i++)
385
+ f += w[i]*w[i];
386
+ f /= 2;
387
+ for(i=0;i<l;i++)
388
+ {
389
+ d = z[i] - y[i];
390
+ if(d < -p)
391
+ f += C[i]*(d+p)*(d+p);
392
+ else if(d > p)
393
+ f += C[i]*(d-p)*(d-p);
394
+ }
395
+
396
+ return(f);
397
+ }
398
+
399
+ void l2r_l2_svr_fun::grad(double *w, double *g)
400
+ {
401
+ int i;
402
+ double *y=prob->y;
403
+ int l=prob->l;
404
+ int w_size=get_nr_variable();
405
+ double d;
406
+
407
+ sizeI = 0;
408
+ for(i=0;i<l;i++)
409
+ {
410
+ d = z[i] - y[i];
411
+
412
+ // generate index set I
413
+ if(d < -p)
414
+ {
415
+ z[sizeI] = C[i]*(d+p);
416
+ I[sizeI] = i;
417
+ sizeI++;
418
+ }
419
+ else if(d > p)
420
+ {
421
+ z[sizeI] = C[i]*(d-p);
422
+ I[sizeI] = i;
423
+ sizeI++;
424
+ }
425
+
426
+ }
427
+ subXTv(z, g);
428
+
429
+ for(i=0;i<w_size;i++)
430
+ g[i] = w[i] + 2*g[i];
431
+ }
432
+
433
+ // A coordinate descent algorithm for
434
+ // multi-class support vector machines by Crammer and Singer
435
+ //
436
+ // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
437
+ // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
438
+ //
439
+ // where e^m_i = 0 if y_i = m,
440
+ // e^m_i = 1 if y_i != m,
441
+ // C^m_i = C if m = y_i,
442
+ // C^m_i = 0 if m != y_i,
443
+ // and w_m(\alpha) = \sum_i \alpha^m_i x_i
444
+ //
445
+ // Given:
446
+ // x, y, C
447
+ // eps is the stopping tolerance
448
+ //
449
+ // solution will be put in w
450
+ //
451
+ // See Appendix of LIBLINEAR paper, Fan et al. (2008)
452
+
453
+ #define GETI(i) ((int) prob->y[i])
454
+ // To support weights for instances, use GETI(i) (i)
455
+
456
+ class Solver_MCSVM_CS
457
+ {
458
+ public:
459
+ Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
460
+ ~Solver_MCSVM_CS();
461
+ void Solve(double *w);
462
+ private:
463
+ void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
464
+ bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
465
+ double *B, *C, *G;
466
+ int w_size, l;
467
+ int nr_class;
468
+ int max_iter;
469
+ double eps;
470
+ const problem *prob;
471
+ };
472
+
473
+ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
474
+ {
475
+ this->w_size = prob->n;
476
+ this->l = prob->l;
477
+ this->nr_class = nr_class;
478
+ this->eps = eps;
479
+ this->max_iter = max_iter;
480
+ this->prob = prob;
481
+ this->B = new double[nr_class];
482
+ this->G = new double[nr_class];
483
+ this->C = weighted_C;
484
+ }
485
+
486
+ Solver_MCSVM_CS::~Solver_MCSVM_CS()
487
+ {
488
+ delete[] B;
489
+ delete[] G;
490
+ }
491
+
492
+ int compare_double(const void *a, const void *b)
493
+ {
494
+ if(*(double *)a > *(double *)b)
495
+ return -1;
496
+ if(*(double *)a < *(double *)b)
497
+ return 1;
498
+ return 0;
499
+ }
500
+
501
+ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
502
+ {
503
+ int r;
504
+ double *D;
505
+
506
+ clone(D, B, active_i);
507
+ if(yi < active_i)
508
+ D[yi] += A_i*C_yi;
509
+ qsort(D, active_i, sizeof(double), compare_double);
510
+
511
+ double beta = D[0] - A_i*C_yi;
512
+ for(r=1;r<active_i && beta<r*D[r];r++)
513
+ beta += D[r];
514
+ beta /= r;
515
+
516
+ for(r=0;r<active_i;r++)
517
+ {
518
+ if(r == yi)
519
+ alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
520
+ else
521
+ alpha_new[r] = min((double)0, (beta - B[r])/A_i);
522
+ }
523
+ delete[] D;
524
+ }
525
+
526
+ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
527
+ {
528
+ double bound = 0;
529
+ if(m == yi)
530
+ bound = C[GETI(i)];
531
+ if(alpha_i == bound && G[m] < minG)
532
+ return true;
533
+ return false;
534
+ }
535
+
536
+ void Solver_MCSVM_CS::Solve(double *w)
537
+ {
538
+ int i, m, s;
539
+ int iter = 0;
540
+ double *alpha = new double[l*nr_class];
541
+ double *alpha_new = new double[nr_class];
542
+ int *index = new int[l];
543
+ double *QD = new double[l];
544
+ int *d_ind = new int[nr_class];
545
+ double *d_val = new double[nr_class];
546
+ int *alpha_index = new int[nr_class*l];
547
+ int *y_index = new int[l];
548
+ int active_size = l;
549
+ int *active_size_i = new int[l];
550
+ double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
551
+ bool start_from_all = true;
552
+
553
+ // Initial alpha can be set here. Note that
554
+ // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
555
+ // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
556
+ // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
557
+ // If initial alpha isn't zero, uncomment the for loop below to initialize w
558
+ for(i=0;i<l*nr_class;i++)
559
+ alpha[i] = 0;
560
+
561
+ for(i=0;i<w_size*nr_class;i++)
562
+ w[i] = 0;
563
+ for(i=0;i<l;i++)
564
+ {
565
+ for(m=0;m<nr_class;m++)
566
+ alpha_index[i*nr_class+m] = m;
567
+ feature_node *xi = prob->x[i];
568
+ QD[i] = 0;
569
+ while(xi->index != -1)
570
+ {
571
+ double val = xi->value;
572
+ QD[i] += val*val;
573
+
574
+ // Uncomment the for loop if initial alpha isn't zero
575
+ // for(m=0; m<nr_class; m++)
576
+ // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
577
+ xi++;
578
+ }
579
+ active_size_i[i] = nr_class;
580
+ y_index[i] = (int)prob->y[i];
581
+ index[i] = i;
582
+ }
583
+
584
+ while(iter < max_iter)
585
+ {
586
+ double stopping = -INF;
587
+ for(i=0;i<active_size;i++)
588
+ {
589
+ int j = i+rand()%(active_size-i);
590
+ swap(index[i], index[j]);
591
+ }
592
+ for(s=0;s<active_size;s++)
593
+ {
594
+ i = index[s];
595
+ double Ai = QD[i];
596
+ double *alpha_i = &alpha[i*nr_class];
597
+ int *alpha_index_i = &alpha_index[i*nr_class];
598
+
599
+ if(Ai > 0)
600
+ {
601
+ for(m=0;m<active_size_i[i];m++)
602
+ G[m] = 1;
603
+ if(y_index[i] < active_size_i[i])
604
+ G[y_index[i]] = 0;
605
+
606
+ feature_node *xi = prob->x[i];
607
+ while(xi->index!= -1)
608
+ {
609
+ double *w_i = &w[(xi->index-1)*nr_class];
610
+ for(m=0;m<active_size_i[i];m++)
611
+ G[m] += w_i[alpha_index_i[m]]*(xi->value);
612
+ xi++;
613
+ }
614
+
615
+ double minG = INF;
616
+ double maxG = -INF;
617
+ for(m=0;m<active_size_i[i];m++)
618
+ {
619
+ if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
620
+ minG = G[m];
621
+ if(G[m] > maxG)
622
+ maxG = G[m];
623
+ }
624
+ if(y_index[i] < active_size_i[i])
625
+ if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
626
+ minG = G[y_index[i]];
627
+
628
+ for(m=0;m<active_size_i[i];m++)
629
+ {
630
+ if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
631
+ {
632
+ active_size_i[i]--;
633
+ while(active_size_i[i]>m)
634
+ {
635
+ if(!be_shrunk(i, active_size_i[i], y_index[i],
636
+ alpha_i[alpha_index_i[active_size_i[i]]], minG))
637
+ {
638
+ swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
639
+ swap(G[m], G[active_size_i[i]]);
640
+ if(y_index[i] == active_size_i[i])
641
+ y_index[i] = m;
642
+ else if(y_index[i] == m)
643
+ y_index[i] = active_size_i[i];
644
+ break;
645
+ }
646
+ active_size_i[i]--;
647
+ }
648
+ }
649
+ }
650
+
651
+ if(active_size_i[i] <= 1)
652
+ {
653
+ active_size--;
654
+ swap(index[s], index[active_size]);
655
+ s--;
656
+ continue;
657
+ }
658
+
659
+ if(maxG-minG <= 1e-12)
660
+ continue;
661
+ else
662
+ stopping = max(maxG - minG, stopping);
663
+
664
+ for(m=0;m<active_size_i[i];m++)
665
+ B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
666
+
667
+ solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
668
+ int nz_d = 0;
669
+ for(m=0;m<active_size_i[i];m++)
670
+ {
671
+ double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
672
+ alpha_i[alpha_index_i[m]] = alpha_new[m];
673
+ if(fabs(d) >= 1e-12)
674
+ {
675
+ d_ind[nz_d] = alpha_index_i[m];
676
+ d_val[nz_d] = d;
677
+ nz_d++;
678
+ }
679
+ }
680
+
681
+ xi = prob->x[i];
682
+ while(xi->index != -1)
683
+ {
684
+ double *w_i = &w[(xi->index-1)*nr_class];
685
+ for(m=0;m<nz_d;m++)
686
+ w_i[d_ind[m]] += d_val[m]*xi->value;
687
+ xi++;
688
+ }
689
+ }
690
+ }
691
+
692
+ iter++;
693
+ if(iter % 10 == 0)
694
+ {
695
+ info(".");
696
+ }
697
+
698
+ if(stopping < eps_shrink)
699
+ {
700
+ if(stopping < eps && start_from_all == true)
701
+ break;
702
+ else
703
+ {
704
+ active_size = l;
705
+ for(i=0;i<l;i++)
706
+ active_size_i[i] = nr_class;
707
+ info("*");
708
+ eps_shrink = max(eps_shrink/2, eps);
709
+ start_from_all = true;
710
+ }
711
+ }
712
+ else
713
+ start_from_all = false;
714
+ }
715
+
716
+ info("\noptimization finished, #iter = %d\n",iter);
717
+ if (iter >= max_iter)
718
+ info("\nWARNING: reaching max number of iterations\n");
719
+
720
+ // calculate objective value
721
+ double v = 0;
722
+ int nSV = 0;
723
+ for(i=0;i<w_size*nr_class;i++)
724
+ v += w[i]*w[i];
725
+ v = 0.5*v;
726
+ for(i=0;i<l*nr_class;i++)
727
+ {
728
+ v += alpha[i];
729
+ if(fabs(alpha[i]) > 0)
730
+ nSV++;
731
+ }
732
+ for(i=0;i<l;i++)
733
+ v -= alpha[i*nr_class+(int)prob->y[i]];
734
+ info("Objective value = %lf\n",v);
735
+ info("nSV = %d\n",nSV);
736
+
737
+ delete [] alpha;
738
+ delete [] alpha_new;
739
+ delete [] index;
740
+ delete [] QD;
741
+ delete [] d_ind;
742
+ delete [] d_val;
743
+ delete [] alpha_index;
744
+ delete [] y_index;
745
+ delete [] active_size_i;
746
+ }
747
+
748
+ // A coordinate descent algorithm for
749
+ // L1-loss and L2-loss SVM dual problems
750
+ //
751
+ // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
752
+ // s.t. 0 <= \alpha_i <= upper_bound_i,
753
+ //
754
+ // where Qij = yi yj xi^T xj and
755
+ // D is a diagonal matrix
756
+ //
757
+ // In L1-SVM case:
758
+ // upper_bound_i = Cp if y_i = 1
759
+ // upper_bound_i = Cn if y_i = -1
760
+ // D_ii = 0
761
+ // In L2-SVM case:
762
+ // upper_bound_i = INF
763
+ // D_ii = 1/(2*Cp) if y_i = 1
764
+ // D_ii = 1/(2*Cn) if y_i = -1
765
+ //
766
+ // Given:
767
+ // x, y, Cp, Cn
768
+ // eps is the stopping tolerance
769
+ //
770
+ // solution will be put in w
771
+ //
772
+ // See Algorithm 3 of Hsieh et al., ICML 2008
773
+
774
+ #undef GETI
775
+ #define GETI(i) (y[i]+1)
776
+ // To support weights for instances, use GETI(i) (i)
777
+
778
+ static void solve_l2r_l1l2_svc(
779
+ const problem *prob, double *w, double eps,
780
+ double Cp, double Cn, int solver_type)
781
+ {
782
+ int l = prob->l;
783
+ int w_size = prob->n;
784
+ int i, s, iter = 0;
785
+ double C, d, G;
786
+ double *QD = new double[l];
787
+ int max_iter = 1000;
788
+ int *index = new int[l];
789
+ double *alpha = new double[l];
790
+ schar *y = new schar[l];
791
+ int active_size = l;
792
+
793
+ // PG: projected gradient, for shrinking and stopping
794
+ double PG;
795
+ double PGmax_old = INF;
796
+ double PGmin_old = -INF;
797
+ double PGmax_new, PGmin_new;
798
+
799
+ // default solver_type: L2R_L2LOSS_SVC_DUAL
800
+ double diag[3] = {0.5/Cn, 0, 0.5/Cp};
801
+ double upper_bound[3] = {INF, 0, INF};
802
+ if(solver_type == L2R_L1LOSS_SVC_DUAL)
803
+ {
804
+ diag[0] = 0;
805
+ diag[2] = 0;
806
+ upper_bound[0] = Cn;
807
+ upper_bound[2] = Cp;
808
+ }
809
+
810
+ for(i=0; i<l; i++)
811
+ {
812
+ if(prob->y[i] > 0)
813
+ {
814
+ y[i] = +1;
815
+ }
816
+ else
817
+ {
818
+ y[i] = -1;
819
+ }
820
+ }
821
+
822
+ // Initial alpha can be set here. Note that
823
+ // 0 <= alpha[i] <= upper_bound[GETI(i)]
824
+ for(i=0; i<l; i++)
825
+ alpha[i] = 0;
826
+
827
+ for(i=0; i<w_size; i++)
828
+ w[i] = 0;
829
+ for(i=0; i<l; i++)
830
+ {
831
+ QD[i] = diag[GETI(i)];
832
+
833
+ feature_node *xi = prob->x[i];
834
+ while (xi->index != -1)
835
+ {
836
+ double val = xi->value;
837
+ QD[i] += val*val;
838
+ w[xi->index-1] += y[i]*alpha[i]*val;
839
+ xi++;
840
+ }
841
+ index[i] = i;
842
+ }
843
+
844
+ while (iter < max_iter)
845
+ {
846
+ PGmax_new = -INF;
847
+ PGmin_new = INF;
848
+
849
+ for (i=0; i<active_size; i++)
850
+ {
851
+ int j = i+rand()%(active_size-i);
852
+ swap(index[i], index[j]);
853
+ }
854
+
855
+ for (s=0; s<active_size; s++)
856
+ {
857
+ i = index[s];
858
+ G = 0;
859
+ schar yi = y[i];
860
+
861
+ feature_node *xi = prob->x[i];
862
+ while(xi->index!= -1)
863
+ {
864
+ G += w[xi->index-1]*(xi->value);
865
+ xi++;
866
+ }
867
+ G = G*yi-1;
868
+
869
+ C = upper_bound[GETI(i)];
870
+ G += alpha[i]*diag[GETI(i)];
871
+
872
+ PG = 0;
873
+ if (alpha[i] == 0)
874
+ {
875
+ if (G > PGmax_old)
876
+ {
877
+ active_size--;
878
+ swap(index[s], index[active_size]);
879
+ s--;
880
+ continue;
881
+ }
882
+ else if (G < 0)
883
+ PG = G;
884
+ }
885
+ else if (alpha[i] == C)
886
+ {
887
+ if (G < PGmin_old)
888
+ {
889
+ active_size--;
890
+ swap(index[s], index[active_size]);
891
+ s--;
892
+ continue;
893
+ }
894
+ else if (G > 0)
895
+ PG = G;
896
+ }
897
+ else
898
+ PG = G;
899
+
900
+ PGmax_new = max(PGmax_new, PG);
901
+ PGmin_new = min(PGmin_new, PG);
902
+
903
+ if(fabs(PG) > 1.0e-12)
904
+ {
905
+ double alpha_old = alpha[i];
906
+ alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
907
+ d = (alpha[i] - alpha_old)*yi;
908
+ xi = prob->x[i];
909
+ while (xi->index != -1)
910
+ {
911
+ w[xi->index-1] += d*xi->value;
912
+ xi++;
913
+ }
914
+ }
915
+ }
916
+
917
+ iter++;
918
+ if(iter % 10 == 0)
919
+ info(".");
920
+
921
+ if(PGmax_new - PGmin_new <= eps)
922
+ {
923
+ if(active_size == l)
924
+ break;
925
+ else
926
+ {
927
+ active_size = l;
928
+ info("*");
929
+ PGmax_old = INF;
930
+ PGmin_old = -INF;
931
+ continue;
932
+ }
933
+ }
934
+ PGmax_old = PGmax_new;
935
+ PGmin_old = PGmin_new;
936
+ if (PGmax_old <= 0)
937
+ PGmax_old = INF;
938
+ if (PGmin_old >= 0)
939
+ PGmin_old = -INF;
940
+ }
941
+
942
+ info("\noptimization finished, #iter = %d\n",iter);
943
+ if (iter >= max_iter)
944
+ info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
945
+
946
+ // calculate objective value
947
+
948
+ double v = 0;
949
+ int nSV = 0;
950
+ for(i=0; i<w_size; i++)
951
+ v += w[i]*w[i];
952
+ for(i=0; i<l; i++)
953
+ {
954
+ v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
955
+ if(alpha[i] > 0)
956
+ ++nSV;
957
+ }
958
+ info("Objective value = %lf\n",v/2);
959
+ info("nSV = %d\n",nSV);
960
+
961
+ delete [] QD;
962
+ delete [] alpha;
963
+ delete [] y;
964
+ delete [] index;
965
+ }
966
+
967
+
968
+ // A coordinate descent algorithm for
969
+ // L1-loss and L2-loss epsilon-SVR dual problem
970
+ //
971
+ // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
972
+ // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
973
+ //
974
+ // where Qij = xi^T xj and
975
+ // D is a diagonal matrix
976
+ //
977
+ // In L1-SVM case:
978
+ // upper_bound_i = C
979
+ // lambda_i = 0
980
+ // In L2-SVM case:
981
+ // upper_bound_i = INF
982
+ // lambda_i = 1/(2*C)
983
+ //
984
+ // Given:
985
+ // x, y, p, C
986
+ // eps is the stopping tolerance
987
+ //
988
+ // solution will be put in w
989
+ //
990
+ // See Algorithm 4 of Ho and Lin, 2012
991
+
992
+ #undef GETI
993
+ #define GETI(i) (0)
994
+ // To support weights for instances, use GETI(i) (i)
995
+
996
+ static void solve_l2r_l1l2_svr(
997
+ const problem *prob, double *w, const parameter *param,
998
+ int solver_type)
999
+ {
1000
+ int l = prob->l;
1001
+ double C = param->C;
1002
+ double p = param->p;
1003
+ int w_size = prob->n;
1004
+ double eps = param->eps;
1005
+ int i, s, iter = 0;
1006
+ int max_iter = 1000;
1007
+ int active_size = l;
1008
+ int *index = new int[l];
1009
+
1010
+ double d, G, H;
1011
+ double Gmax_old = INF;
1012
+ double Gmax_new, Gnorm1_new;
1013
+ double Gnorm1_init;
1014
+ double *beta = new double[l];
1015
+ double *QD = new double[l];
1016
+ double *y = prob->y;
1017
+
1018
+ // L2R_L2LOSS_SVR_DUAL
1019
+ double lambda[1], upper_bound[1];
1020
+ lambda[0] = 0.5/C;
1021
+ upper_bound[0] = INF;
1022
+
1023
+ if(solver_type == L2R_L1LOSS_SVR_DUAL)
1024
+ {
1025
+ lambda[0] = 0;
1026
+ upper_bound[0] = C;
1027
+ }
1028
+
1029
+ // Initial beta can be set here. Note that
1030
+ // -upper_bound <= beta[i] <= upper_bound
1031
+ for(i=0; i<l; i++)
1032
+ beta[i] = 0;
1033
+
1034
+ for(i=0; i<w_size; i++)
1035
+ w[i] = 0;
1036
+ for(i=0; i<l; i++)
1037
+ {
1038
+ QD[i] = 0;
1039
+ feature_node *xi = prob->x[i];
1040
+ while(xi->index != -1)
1041
+ {
1042
+ double val = xi->value;
1043
+ QD[i] += val*val;
1044
+ w[xi->index-1] += beta[i]*val;
1045
+ xi++;
1046
+ }
1047
+
1048
+ index[i] = i;
1049
+ }
1050
+
1051
+
1052
+ while(iter < max_iter)
1053
+ {
1054
+ Gmax_new = 0;
1055
+ Gnorm1_new = 0;
1056
+
1057
+ for(i=0; i<active_size; i++)
1058
+ {
1059
+ int j = i+rand()%(active_size-i);
1060
+ swap(index[i], index[j]);
1061
+ }
1062
+
1063
+ for(s=0; s<active_size; s++)
1064
+ {
1065
+ i = index[s];
1066
+ G = -y[i] + lambda[GETI(i)]*beta[i];
1067
+ H = QD[i] + lambda[GETI(i)];
1068
+
1069
+ feature_node *xi = prob->x[i];
1070
+ while(xi->index != -1)
1071
+ {
1072
+ int ind = xi->index-1;
1073
+ double val = xi->value;
1074
+ G += val*w[ind];
1075
+ xi++;
1076
+ }
1077
+
1078
+ double Gp = G+p;
1079
+ double Gn = G-p;
1080
+ double violation = 0;
1081
+ if(beta[i] == 0)
1082
+ {
1083
+ if(Gp < 0)
1084
+ violation = -Gp;
1085
+ else if(Gn > 0)
1086
+ violation = Gn;
1087
+ else if(Gp>Gmax_old && Gn<-Gmax_old)
1088
+ {
1089
+ active_size--;
1090
+ swap(index[s], index[active_size]);
1091
+ s--;
1092
+ continue;
1093
+ }
1094
+ }
1095
+ else if(beta[i] >= upper_bound[GETI(i)])
1096
+ {
1097
+ if(Gp > 0)
1098
+ violation = Gp;
1099
+ else if(Gp < -Gmax_old)
1100
+ {
1101
+ active_size--;
1102
+ swap(index[s], index[active_size]);
1103
+ s--;
1104
+ continue;
1105
+ }
1106
+ }
1107
+ else if(beta[i] <= -upper_bound[GETI(i)])
1108
+ {
1109
+ if(Gn < 0)
1110
+ violation = -Gn;
1111
+ else if(Gn > Gmax_old)
1112
+ {
1113
+ active_size--;
1114
+ swap(index[s], index[active_size]);
1115
+ s--;
1116
+ continue;
1117
+ }
1118
+ }
1119
+ else if(beta[i] > 0)
1120
+ violation = fabs(Gp);
1121
+ else
1122
+ violation = fabs(Gn);
1123
+
1124
+ Gmax_new = max(Gmax_new, violation);
1125
+ Gnorm1_new += violation;
1126
+
1127
+ // obtain Newton direction d
1128
+ if(Gp < H*beta[i])
1129
+ d = -Gp/H;
1130
+ else if(Gn > H*beta[i])
1131
+ d = -Gn/H;
1132
+ else
1133
+ d = -beta[i];
1134
+
1135
+ if(fabs(d) < 1.0e-12)
1136
+ continue;
1137
+
1138
+ double beta_old = beta[i];
1139
+ beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1140
+ d = beta[i]-beta_old;
1141
+
1142
+ if(d != 0)
1143
+ {
1144
+ xi = prob->x[i];
1145
+ while(xi->index != -1)
1146
+ {
1147
+ w[xi->index-1] += d*xi->value;
1148
+ xi++;
1149
+ }
1150
+ }
1151
+ }
1152
+
1153
+ if(iter == 0)
1154
+ Gnorm1_init = Gnorm1_new;
1155
+ iter++;
1156
+ if(iter % 10 == 0)
1157
+ info(".");
1158
+
1159
+ if(Gnorm1_new <= eps*Gnorm1_init)
1160
+ {
1161
+ if(active_size == l)
1162
+ break;
1163
+ else
1164
+ {
1165
+ active_size = l;
1166
+ info("*");
1167
+ Gmax_old = INF;
1168
+ continue;
1169
+ }
1170
+ }
1171
+
1172
+ Gmax_old = Gmax_new;
1173
+ }
1174
+
1175
+ info("\noptimization finished, #iter = %d\n", iter);
1176
+ if(iter >= max_iter)
1177
+ info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1178
+
1179
+ // calculate objective value
1180
+ double v = 0;
1181
+ int nSV = 0;
1182
+ for(i=0; i<w_size; i++)
1183
+ v += w[i]*w[i];
1184
+ v = 0.5*v;
1185
+ for(i=0; i<l; i++)
1186
+ {
1187
+ v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1188
+ if(beta[i] != 0)
1189
+ nSV++;
1190
+ }
1191
+
1192
+ info("Objective value = %lf\n", v);
1193
+ info("nSV = %d\n",nSV);
1194
+
1195
+ delete [] beta;
1196
+ delete [] QD;
1197
+ delete [] index;
1198
+ }
1199
+
1200
+
1201
+ // A coordinate descent algorithm for
1202
+ // the dual of L2-regularized logistic regression problems
1203
+ //
1204
+ // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1205
+ // s.t. 0 <= \alpha_i <= upper_bound_i,
1206
+ //
1207
+ // where Qij = yi yj xi^T xj and
1208
+ // upper_bound_i = Cp if y_i = 1
1209
+ // upper_bound_i = Cn if y_i = -1
1210
+ //
1211
+ // Given:
1212
+ // x, y, Cp, Cn
1213
+ // eps is the stopping tolerance
1214
+ //
1215
+ // solution will be put in w
1216
+ //
1217
+ // See Algorithm 5 of Yu et al., MLJ 2010
1218
+
1219
+ #undef GETI
1220
+ #define GETI(i) (y[i]+1)
1221
+ // To support weights for instances, use GETI(i) (i)
1222
+
1223
+ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1224
+ {
1225
+ int l = prob->l;
1226
+ int w_size = prob->n;
1227
+ int i, s, iter = 0;
1228
+ double *xTx = new double[l];
1229
+ int max_iter = 1000;
1230
+ int *index = new int[l];
1231
+ double *alpha = new double[2*l]; // store alpha and C - alpha
1232
+ schar *y = new schar[l];
1233
+ int max_inner_iter = 100; // for inner Newton
1234
+ double innereps = 1e-2;
1235
+ double innereps_min = min(1e-8, eps);
1236
+ double upper_bound[3] = {Cn, 0, Cp};
1237
+
1238
+ for(i=0; i<l; i++)
1239
+ {
1240
+ if(prob->y[i] > 0)
1241
+ {
1242
+ y[i] = +1;
1243
+ }
1244
+ else
1245
+ {
1246
+ y[i] = -1;
1247
+ }
1248
+ }
1249
+
1250
+ // Initial alpha can be set here. Note that
1251
+ // 0 < alpha[i] < upper_bound[GETI(i)]
1252
+ // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1253
+ for(i=0; i<l; i++)
1254
+ {
1255
+ alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1256
+ alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1257
+ }
1258
+
1259
+ for(i=0; i<w_size; i++)
1260
+ w[i] = 0;
1261
+ for(i=0; i<l; i++)
1262
+ {
1263
+ xTx[i] = 0;
1264
+ feature_node *xi = prob->x[i];
1265
+ while (xi->index != -1)
1266
+ {
1267
+ double val = xi->value;
1268
+ xTx[i] += val*val;
1269
+ w[xi->index-1] += y[i]*alpha[2*i]*val;
1270
+ xi++;
1271
+ }
1272
+ index[i] = i;
1273
+ }
1274
+
1275
+ while (iter < max_iter)
1276
+ {
1277
+ for (i=0; i<l; i++)
1278
+ {
1279
+ int j = i+rand()%(l-i);
1280
+ swap(index[i], index[j]);
1281
+ }
1282
+ int newton_iter = 0;
1283
+ double Gmax = 0;
1284
+ for (s=0; s<l; s++)
1285
+ {
1286
+ i = index[s];
1287
+ schar yi = y[i];
1288
+ double C = upper_bound[GETI(i)];
1289
+ double ywTx = 0, xisq = xTx[i];
1290
+ feature_node *xi = prob->x[i];
1291
+ while (xi->index != -1)
1292
+ {
1293
+ ywTx += w[xi->index-1]*xi->value;
1294
+ xi++;
1295
+ }
1296
+ ywTx *= y[i];
1297
+ double a = xisq, b = ywTx;
1298
+
1299
+ // Decide to minimize g_1(z) or g_2(z)
1300
+ int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1301
+ if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1302
+ {
1303
+ ind1 = 2*i+1;
1304
+ ind2 = 2*i;
1305
+ sign = -1;
1306
+ }
1307
+
1308
+ // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1309
+ double alpha_old = alpha[ind1];
1310
+ double z = alpha_old;
1311
+ if(C - z < 0.5 * C)
1312
+ z = 0.1*z;
1313
+ double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1314
+ Gmax = max(Gmax, fabs(gp));
1315
+
1316
+ // Newton method on the sub-problem
1317
+ const double eta = 0.1; // xi in the paper
1318
+ int inner_iter = 0;
1319
+ while (inner_iter <= max_inner_iter)
1320
+ {
1321
+ if(fabs(gp) < innereps)
1322
+ break;
1323
+ double gpp = a + C/(C-z)/z;
1324
+ double tmpz = z - gp/gpp;
1325
+ if(tmpz <= 0)
1326
+ z *= eta;
1327
+ else // tmpz in (0, C)
1328
+ z = tmpz;
1329
+ gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1330
+ newton_iter++;
1331
+ inner_iter++;
1332
+ }
1333
+
1334
+ if(inner_iter > 0) // update w
1335
+ {
1336
+ alpha[ind1] = z;
1337
+ alpha[ind2] = C-z;
1338
+ xi = prob->x[i];
1339
+ while (xi->index != -1)
1340
+ {
1341
+ w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
1342
+ xi++;
1343
+ }
1344
+ }
1345
+ }
1346
+
1347
+ iter++;
1348
+ if(iter % 10 == 0)
1349
+ info(".");
1350
+
1351
+ if(Gmax < eps)
1352
+ break;
1353
+
1354
+ if(newton_iter <= l/10)
1355
+ innereps = max(innereps_min, 0.1*innereps);
1356
+
1357
+ }
1358
+
1359
+ info("\noptimization finished, #iter = %d\n",iter);
1360
+ if (iter >= max_iter)
1361
+ info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1362
+
1363
+ // calculate objective value
1364
+
1365
+ double v = 0;
1366
+ for(i=0; i<w_size; i++)
1367
+ v += w[i] * w[i];
1368
+ v *= 0.5;
1369
+ for(i=0; i<l; i++)
1370
+ v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1371
+ - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1372
+ info("Objective value = %lf\n", v);
1373
+
1374
+ delete [] xTx;
1375
+ delete [] alpha;
1376
+ delete [] y;
1377
+ delete [] index;
1378
+ }
1379
+
1380
+ // A coordinate descent algorithm for
1381
+ // L1-regularized L2-loss support vector classification
1382
+ //
1383
+ // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1384
+ //
1385
+ // Given:
1386
+ // x, y, Cp, Cn
1387
+ // eps is the stopping tolerance
1388
+ //
1389
+ // solution will be put in w
1390
+ //
1391
+ // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1392
+
1393
+ #undef GETI
1394
+ #define GETI(i) (y[i]+1)
1395
+ // To support weights for instances, use GETI(i) (i)
1396
+
1397
+ static void solve_l1r_l2_svc(
1398
+ problem *prob_col, double *w, double eps,
1399
+ double Cp, double Cn)
1400
+ {
1401
+ int l = prob_col->l;
1402
+ int w_size = prob_col->n;
1403
+ int j, s, iter = 0;
1404
+ int max_iter = 1000;
1405
+ int active_size = w_size;
1406
+ int max_num_linesearch = 20;
1407
+
1408
+ double sigma = 0.01;
1409
+ double d, G_loss, G, H;
1410
+ double Gmax_old = INF;
1411
+ double Gmax_new, Gnorm1_new;
1412
+ double Gnorm1_init;
1413
+ double d_old, d_diff;
1414
+ double loss_old, loss_new;
1415
+ double appxcond, cond;
1416
+
1417
+ int *index = new int[w_size];
1418
+ schar *y = new schar[l];
1419
+ double *b = new double[l]; // b = 1-ywTx
1420
+ double *xj_sq = new double[w_size];
1421
+ feature_node *x;
1422
+
1423
+ double C[3] = {Cn,0,Cp};
1424
+
1425
+ // Initial w can be set here.
1426
+ for(j=0; j<w_size; j++)
1427
+ w[j] = 0;
1428
+
1429
+ for(j=0; j<l; j++)
1430
+ {
1431
+ b[j] = 1;
1432
+ if(prob_col->y[j] > 0)
1433
+ y[j] = 1;
1434
+ else
1435
+ y[j] = -1;
1436
+ }
1437
+ for(j=0; j<w_size; j++)
1438
+ {
1439
+ index[j] = j;
1440
+ xj_sq[j] = 0;
1441
+ x = prob_col->x[j];
1442
+ while(x->index != -1)
1443
+ {
1444
+ int ind = x->index-1;
1445
+ x->value *= y[ind]; // x->value stores yi*xij
1446
+ double val = x->value;
1447
+ b[ind] -= w[j]*val;
1448
+ xj_sq[j] += C[GETI(ind)]*val*val;
1449
+ x++;
1450
+ }
1451
+ }
1452
+
1453
+ while(iter < max_iter)
1454
+ {
1455
+ Gmax_new = 0;
1456
+ Gnorm1_new = 0;
1457
+
1458
+ for(j=0; j<active_size; j++)
1459
+ {
1460
+ int i = j+rand()%(active_size-j);
1461
+ swap(index[i], index[j]);
1462
+ }
1463
+
1464
+ for(s=0; s<active_size; s++)
1465
+ {
1466
+ j = index[s];
1467
+ G_loss = 0;
1468
+ H = 0;
1469
+
1470
+ x = prob_col->x[j];
1471
+ while(x->index != -1)
1472
+ {
1473
+ int ind = x->index-1;
1474
+ if(b[ind] > 0)
1475
+ {
1476
+ double val = x->value;
1477
+ double tmp = C[GETI(ind)]*val;
1478
+ G_loss -= tmp*b[ind];
1479
+ H += tmp*val;
1480
+ }
1481
+ x++;
1482
+ }
1483
+ G_loss *= 2;
1484
+
1485
+ G = G_loss;
1486
+ H *= 2;
1487
+ H = max(H, 1e-12);
1488
+
1489
+ double Gp = G+1;
1490
+ double Gn = G-1;
1491
+ double violation = 0;
1492
+ if(w[j] == 0)
1493
+ {
1494
+ if(Gp < 0)
1495
+ violation = -Gp;
1496
+ else if(Gn > 0)
1497
+ violation = Gn;
1498
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1499
+ {
1500
+ active_size--;
1501
+ swap(index[s], index[active_size]);
1502
+ s--;
1503
+ continue;
1504
+ }
1505
+ }
1506
+ else if(w[j] > 0)
1507
+ violation = fabs(Gp);
1508
+ else
1509
+ violation = fabs(Gn);
1510
+
1511
+ Gmax_new = max(Gmax_new, violation);
1512
+ Gnorm1_new += violation;
1513
+
1514
+ // obtain Newton direction d
1515
+ if(Gp < H*w[j])
1516
+ d = -Gp/H;
1517
+ else if(Gn > H*w[j])
1518
+ d = -Gn/H;
1519
+ else
1520
+ d = -w[j];
1521
+
1522
+ if(fabs(d) < 1.0e-12)
1523
+ continue;
1524
+
1525
+ double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1526
+ d_old = 0;
1527
+ int num_linesearch;
1528
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1529
+ {
1530
+ d_diff = d_old - d;
1531
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1532
+
1533
+ appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1534
+ if(appxcond <= 0)
1535
+ {
1536
+ x = prob_col->x[j];
1537
+ while(x->index != -1)
1538
+ {
1539
+ b[x->index-1] += d_diff*x->value;
1540
+ x++;
1541
+ }
1542
+ break;
1543
+ }
1544
+
1545
+ if(num_linesearch == 0)
1546
+ {
1547
+ loss_old = 0;
1548
+ loss_new = 0;
1549
+ x = prob_col->x[j];
1550
+ while(x->index != -1)
1551
+ {
1552
+ int ind = x->index-1;
1553
+ if(b[ind] > 0)
1554
+ loss_old += C[GETI(ind)]*b[ind]*b[ind];
1555
+ double b_new = b[ind] + d_diff*x->value;
1556
+ b[ind] = b_new;
1557
+ if(b_new > 0)
1558
+ loss_new += C[GETI(ind)]*b_new*b_new;
1559
+ x++;
1560
+ }
1561
+ }
1562
+ else
1563
+ {
1564
+ loss_new = 0;
1565
+ x = prob_col->x[j];
1566
+ while(x->index != -1)
1567
+ {
1568
+ int ind = x->index-1;
1569
+ double b_new = b[ind] + d_diff*x->value;
1570
+ b[ind] = b_new;
1571
+ if(b_new > 0)
1572
+ loss_new += C[GETI(ind)]*b_new*b_new;
1573
+ x++;
1574
+ }
1575
+ }
1576
+
1577
+ cond = cond + loss_new - loss_old;
1578
+ if(cond <= 0)
1579
+ break;
1580
+ else
1581
+ {
1582
+ d_old = d;
1583
+ d *= 0.5;
1584
+ delta *= 0.5;
1585
+ }
1586
+ }
1587
+
1588
+ w[j] += d;
1589
+
1590
+ // recompute b[] if line search takes too many steps
1591
+ if(num_linesearch >= max_num_linesearch)
1592
+ {
1593
+ info("#");
1594
+ for(int i=0; i<l; i++)
1595
+ b[i] = 1;
1596
+
1597
+ for(int i=0; i<w_size; i++)
1598
+ {
1599
+ if(w[i]==0) continue;
1600
+ x = prob_col->x[i];
1601
+ while(x->index != -1)
1602
+ {
1603
+ b[x->index-1] -= w[i]*x->value;
1604
+ x++;
1605
+ }
1606
+ }
1607
+ }
1608
+ }
1609
+
1610
+ if(iter == 0)
1611
+ Gnorm1_init = Gnorm1_new;
1612
+ iter++;
1613
+ if(iter % 10 == 0)
1614
+ info(".");
1615
+
1616
+ if(Gnorm1_new <= eps*Gnorm1_init)
1617
+ {
1618
+ if(active_size == w_size)
1619
+ break;
1620
+ else
1621
+ {
1622
+ active_size = w_size;
1623
+ info("*");
1624
+ Gmax_old = INF;
1625
+ continue;
1626
+ }
1627
+ }
1628
+
1629
+ Gmax_old = Gmax_new;
1630
+ }
1631
+
1632
+ info("\noptimization finished, #iter = %d\n", iter);
1633
+ if(iter >= max_iter)
1634
+ info("\nWARNING: reaching max number of iterations\n");
1635
+
1636
+ // calculate objective value
1637
+
1638
+ double v = 0;
1639
+ int nnz = 0;
1640
+ for(j=0; j<w_size; j++)
1641
+ {
1642
+ x = prob_col->x[j];
1643
+ while(x->index != -1)
1644
+ {
1645
+ x->value *= prob_col->y[x->index-1]; // restore x->value
1646
+ x++;
1647
+ }
1648
+ if(w[j] != 0)
1649
+ {
1650
+ v += fabs(w[j]);
1651
+ nnz++;
1652
+ }
1653
+ }
1654
+ for(j=0; j<l; j++)
1655
+ if(b[j] > 0)
1656
+ v += C[GETI(j)]*b[j]*b[j];
1657
+
1658
+ info("Objective value = %lf\n", v);
1659
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1660
+
1661
+ delete [] index;
1662
+ delete [] y;
1663
+ delete [] b;
1664
+ delete [] xj_sq;
1665
+ }
1666
+
1667
+ // A coordinate descent algorithm for
1668
+ // L1-regularized logistic regression problems
1669
+ //
1670
+ // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1671
+ //
1672
+ // Given:
1673
+ // x, y, Cp, Cn
1674
+ // eps is the stopping tolerance
1675
+ //
1676
+ // solution will be put in w
1677
+ //
1678
+ // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1679
+
1680
+ #undef GETI
1681
+ #define GETI(i) (y[i]+1)
1682
+ // To support weights for instances, use GETI(i) (i)
1683
+
1684
+ static void solve_l1r_lr(
1685
+ const problem *prob_col, double *w, double eps,
1686
+ double Cp, double Cn)
1687
+ {
1688
+ int l = prob_col->l;
1689
+ int w_size = prob_col->n;
1690
+ int j, s, newton_iter=0, iter=0;
1691
+ int max_newton_iter = 100;
1692
+ int max_iter = 1000;
1693
+ int max_num_linesearch = 20;
1694
+ int active_size;
1695
+ int QP_active_size;
1696
+
1697
+ double nu = 1e-12;
1698
+ double inner_eps = 1;
1699
+ double sigma = 0.01;
1700
+ double w_norm, w_norm_new;
1701
+ double z, G, H;
1702
+ double Gnorm1_init;
1703
+ double Gmax_old = INF;
1704
+ double Gmax_new, Gnorm1_new;
1705
+ double QP_Gmax_old = INF;
1706
+ double QP_Gmax_new, QP_Gnorm1_new;
1707
+ double delta, negsum_xTd, cond;
1708
+
1709
+ int *index = new int[w_size];
1710
+ schar *y = new schar[l];
1711
+ double *Hdiag = new double[w_size];
1712
+ double *Grad = new double[w_size];
1713
+ double *wpd = new double[w_size];
1714
+ double *xjneg_sum = new double[w_size];
1715
+ double *xTd = new double[l];
1716
+ double *exp_wTx = new double[l];
1717
+ double *exp_wTx_new = new double[l];
1718
+ double *tau = new double[l];
1719
+ double *D = new double[l];
1720
+ feature_node *x;
1721
+
1722
+ double C[3] = {Cn,0,Cp};
1723
+
1724
+ // Initial w can be set here.
1725
+ for(j=0; j<w_size; j++)
1726
+ w[j] = 0;
1727
+
1728
+ for(j=0; j<l; j++)
1729
+ {
1730
+ if(prob_col->y[j] > 0)
1731
+ y[j] = 1;
1732
+ else
1733
+ y[j] = -1;
1734
+
1735
+ exp_wTx[j] = 0;
1736
+ }
1737
+
1738
+ w_norm = 0;
1739
+ for(j=0; j<w_size; j++)
1740
+ {
1741
+ w_norm += fabs(w[j]);
1742
+ wpd[j] = w[j];
1743
+ index[j] = j;
1744
+ xjneg_sum[j] = 0;
1745
+ x = prob_col->x[j];
1746
+ while(x->index != -1)
1747
+ {
1748
+ int ind = x->index-1;
1749
+ double val = x->value;
1750
+ exp_wTx[ind] += w[j]*val;
1751
+ if(y[ind] == -1)
1752
+ xjneg_sum[j] += C[GETI(ind)]*val;
1753
+ x++;
1754
+ }
1755
+ }
1756
+ for(j=0; j<l; j++)
1757
+ {
1758
+ exp_wTx[j] = exp(exp_wTx[j]);
1759
+ double tau_tmp = 1/(1+exp_wTx[j]);
1760
+ tau[j] = C[GETI(j)]*tau_tmp;
1761
+ D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1762
+ }
1763
+
1764
+ while(newton_iter < max_newton_iter)
1765
+ {
1766
+ Gmax_new = 0;
1767
+ Gnorm1_new = 0;
1768
+ active_size = w_size;
1769
+
1770
+ for(s=0; s<active_size; s++)
1771
+ {
1772
+ j = index[s];
1773
+ Hdiag[j] = nu;
1774
+ Grad[j] = 0;
1775
+
1776
+ double tmp = 0;
1777
+ x = prob_col->x[j];
1778
+ while(x->index != -1)
1779
+ {
1780
+ int ind = x->index-1;
1781
+ Hdiag[j] += x->value*x->value*D[ind];
1782
+ tmp += x->value*tau[ind];
1783
+ x++;
1784
+ }
1785
+ Grad[j] = -tmp + xjneg_sum[j];
1786
+
1787
+ double Gp = Grad[j]+1;
1788
+ double Gn = Grad[j]-1;
1789
+ double violation = 0;
1790
+ if(w[j] == 0)
1791
+ {
1792
+ if(Gp < 0)
1793
+ violation = -Gp;
1794
+ else if(Gn > 0)
1795
+ violation = Gn;
1796
+ //outer-level shrinking
1797
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1798
+ {
1799
+ active_size--;
1800
+ swap(index[s], index[active_size]);
1801
+ s--;
1802
+ continue;
1803
+ }
1804
+ }
1805
+ else if(w[j] > 0)
1806
+ violation = fabs(Gp);
1807
+ else
1808
+ violation = fabs(Gn);
1809
+
1810
+ Gmax_new = max(Gmax_new, violation);
1811
+ Gnorm1_new += violation;
1812
+ }
1813
+
1814
+ if(newton_iter == 0)
1815
+ Gnorm1_init = Gnorm1_new;
1816
+
1817
+ if(Gnorm1_new <= eps*Gnorm1_init)
1818
+ break;
1819
+
1820
+ iter = 0;
1821
+ QP_Gmax_old = INF;
1822
+ QP_active_size = active_size;
1823
+
1824
+ for(int i=0; i<l; i++)
1825
+ xTd[i] = 0;
1826
+
1827
+ // optimize QP over wpd
1828
+ while(iter < max_iter)
1829
+ {
1830
+ QP_Gmax_new = 0;
1831
+ QP_Gnorm1_new = 0;
1832
+
1833
+ for(j=0; j<QP_active_size; j++)
1834
+ {
1835
+ int i = j+rand()%(QP_active_size-j);
1836
+ swap(index[i], index[j]);
1837
+ }
1838
+
1839
+ for(s=0; s<QP_active_size; s++)
1840
+ {
1841
+ j = index[s];
1842
+ H = Hdiag[j];
1843
+
1844
+ x = prob_col->x[j];
1845
+ G = Grad[j] + (wpd[j]-w[j])*nu;
1846
+ while(x->index != -1)
1847
+ {
1848
+ int ind = x->index-1;
1849
+ G += x->value*D[ind]*xTd[ind];
1850
+ x++;
1851
+ }
1852
+
1853
+ double Gp = G+1;
1854
+ double Gn = G-1;
1855
+ double violation = 0;
1856
+ if(wpd[j] == 0)
1857
+ {
1858
+ if(Gp < 0)
1859
+ violation = -Gp;
1860
+ else if(Gn > 0)
1861
+ violation = Gn;
1862
+ //inner-level shrinking
1863
+ else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1864
+ {
1865
+ QP_active_size--;
1866
+ swap(index[s], index[QP_active_size]);
1867
+ s--;
1868
+ continue;
1869
+ }
1870
+ }
1871
+ else if(wpd[j] > 0)
1872
+ violation = fabs(Gp);
1873
+ else
1874
+ violation = fabs(Gn);
1875
+
1876
+ QP_Gmax_new = max(QP_Gmax_new, violation);
1877
+ QP_Gnorm1_new += violation;
1878
+
1879
+ // obtain solution of one-variable problem
1880
+ if(Gp < H*wpd[j])
1881
+ z = -Gp/H;
1882
+ else if(Gn > H*wpd[j])
1883
+ z = -Gn/H;
1884
+ else
1885
+ z = -wpd[j];
1886
+
1887
+ if(fabs(z) < 1.0e-12)
1888
+ continue;
1889
+ z = min(max(z,-10.0),10.0);
1890
+
1891
+ wpd[j] += z;
1892
+
1893
+ x = prob_col->x[j];
1894
+ while(x->index != -1)
1895
+ {
1896
+ int ind = x->index-1;
1897
+ xTd[ind] += x->value*z;
1898
+ x++;
1899
+ }
1900
+ }
1901
+
1902
+ iter++;
1903
+
1904
+ if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1905
+ {
1906
+ //inner stopping
1907
+ if(QP_active_size == active_size)
1908
+ break;
1909
+ //active set reactivation
1910
+ else
1911
+ {
1912
+ QP_active_size = active_size;
1913
+ QP_Gmax_old = INF;
1914
+ continue;
1915
+ }
1916
+ }
1917
+
1918
+ QP_Gmax_old = QP_Gmax_new;
1919
+ }
1920
+
1921
+ if(iter >= max_iter)
1922
+ info("WARNING: reaching max number of inner iterations\n");
1923
+
1924
+ delta = 0;
1925
+ w_norm_new = 0;
1926
+ for(j=0; j<w_size; j++)
1927
+ {
1928
+ delta += Grad[j]*(wpd[j]-w[j]);
1929
+ if(wpd[j] != 0)
1930
+ w_norm_new += fabs(wpd[j]);
1931
+ }
1932
+ delta += (w_norm_new-w_norm);
1933
+
1934
+ negsum_xTd = 0;
1935
+ for(int i=0; i<l; i++)
1936
+ if(y[i] == -1)
1937
+ negsum_xTd += C[GETI(i)]*xTd[i];
1938
+
1939
+ int num_linesearch;
1940
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1941
+ {
1942
+ cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1943
+
1944
+ for(int i=0; i<l; i++)
1945
+ {
1946
+ double exp_xTd = exp(xTd[i]);
1947
+ exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1948
+ cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1949
+ }
1950
+
1951
+ if(cond <= 0)
1952
+ {
1953
+ w_norm = w_norm_new;
1954
+ for(j=0; j<w_size; j++)
1955
+ w[j] = wpd[j];
1956
+ for(int i=0; i<l; i++)
1957
+ {
1958
+ exp_wTx[i] = exp_wTx_new[i];
1959
+ double tau_tmp = 1/(1+exp_wTx[i]);
1960
+ tau[i] = C[GETI(i)]*tau_tmp;
1961
+ D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1962
+ }
1963
+ break;
1964
+ }
1965
+ else
1966
+ {
1967
+ w_norm_new = 0;
1968
+ for(j=0; j<w_size; j++)
1969
+ {
1970
+ wpd[j] = (w[j]+wpd[j])*0.5;
1971
+ if(wpd[j] != 0)
1972
+ w_norm_new += fabs(wpd[j]);
1973
+ }
1974
+ delta *= 0.5;
1975
+ negsum_xTd *= 0.5;
1976
+ for(int i=0; i<l; i++)
1977
+ xTd[i] *= 0.5;
1978
+ }
1979
+ }
1980
+
1981
+ // Recompute some info due to too many line search steps
1982
+ if(num_linesearch >= max_num_linesearch)
1983
+ {
1984
+ for(int i=0; i<l; i++)
1985
+ exp_wTx[i] = 0;
1986
+
1987
+ for(int i=0; i<w_size; i++)
1988
+ {
1989
+ if(w[i]==0) continue;
1990
+ x = prob_col->x[i];
1991
+ while(x->index != -1)
1992
+ {
1993
+ exp_wTx[x->index-1] += w[i]*x->value;
1994
+ x++;
1995
+ }
1996
+ }
1997
+
1998
+ for(int i=0; i<l; i++)
1999
+ exp_wTx[i] = exp(exp_wTx[i]);
2000
+ }
2001
+
2002
+ if(iter == 1)
2003
+ inner_eps *= 0.25;
2004
+
2005
+ newton_iter++;
2006
+ Gmax_old = Gmax_new;
2007
+
2008
+ info("iter %3d #CD cycles %d\n", newton_iter, iter);
2009
+ }
2010
+
2011
+ info("=========================\n");
2012
+ info("optimization finished, #iter = %d\n", newton_iter);
2013
+ if(newton_iter >= max_newton_iter)
2014
+ info("WARNING: reaching max number of iterations\n");
2015
+
2016
+ // calculate objective value
2017
+
2018
+ double v = 0;
2019
+ int nnz = 0;
2020
+ for(j=0; j<w_size; j++)
2021
+ if(w[j] != 0)
2022
+ {
2023
+ v += fabs(w[j]);
2024
+ nnz++;
2025
+ }
2026
+ for(j=0; j<l; j++)
2027
+ if(y[j] == 1)
2028
+ v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2029
+ else
2030
+ v += C[GETI(j)]*log(1+exp_wTx[j]);
2031
+
2032
+ info("Objective value = %lf\n", v);
2033
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2034
+
2035
+ delete [] index;
2036
+ delete [] y;
2037
+ delete [] Hdiag;
2038
+ delete [] Grad;
2039
+ delete [] wpd;
2040
+ delete [] xjneg_sum;
2041
+ delete [] xTd;
2042
+ delete [] exp_wTx;
2043
+ delete [] exp_wTx_new;
2044
+ delete [] tau;
2045
+ delete [] D;
2046
+ }
2047
+
2048
+ // transpose matrix X from row format to column format
2049
+ static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2050
+ {
2051
+ int i;
2052
+ int l = prob->l;
2053
+ int n = prob->n;
2054
+ int nnz = 0;
2055
+ int *col_ptr = new int[n+1];
2056
+ feature_node *x_space;
2057
+ prob_col->l = l;
2058
+ prob_col->n = n;
2059
+ prob_col->y = new double[l];
2060
+ prob_col->x = new feature_node*[n];
2061
+
2062
+ for(i=0; i<l; i++)
2063
+ prob_col->y[i] = prob->y[i];
2064
+
2065
+ for(i=0; i<n+1; i++)
2066
+ col_ptr[i] = 0;
2067
+ for(i=0; i<l; i++)
2068
+ {
2069
+ feature_node *x = prob->x[i];
2070
+ while(x->index != -1)
2071
+ {
2072
+ nnz++;
2073
+ col_ptr[x->index]++;
2074
+ x++;
2075
+ }
2076
+ }
2077
+ for(i=1; i<n+1; i++)
2078
+ col_ptr[i] += col_ptr[i-1] + 1;
2079
+
2080
+ x_space = new feature_node[nnz+n];
2081
+ for(i=0; i<n; i++)
2082
+ prob_col->x[i] = &x_space[col_ptr[i]];
2083
+
2084
+ for(i=0; i<l; i++)
2085
+ {
2086
+ feature_node *x = prob->x[i];
2087
+ while(x->index != -1)
2088
+ {
2089
+ int ind = x->index-1;
2090
+ x_space[col_ptr[ind]].index = i+1; // starts from 1
2091
+ x_space[col_ptr[ind]].value = x->value;
2092
+ col_ptr[ind]++;
2093
+ x++;
2094
+ }
2095
+ }
2096
+ for(i=0; i<n; i++)
2097
+ x_space[col_ptr[i]].index = -1;
2098
+
2099
+ *x_space_ret = x_space;
2100
+
2101
+ delete [] col_ptr;
2102
+ }
2103
+
2104
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2105
+ // perm, length l, must be allocated before calling this subroutine
2106
+ static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2107
+ {
2108
+ int l = prob->l;
2109
+ int max_nr_class = 16;
2110
+ int nr_class = 0;
2111
+ int *label = Malloc(int,max_nr_class);
2112
+ int *count = Malloc(int,max_nr_class);
2113
+ int *data_label = Malloc(int,l);
2114
+ int i;
2115
+
2116
+ for(i=0;i<l;i++)
2117
+ {
2118
+ int this_label = (int)prob->y[i];
2119
+ int j;
2120
+ for(j=0;j<nr_class;j++)
2121
+ {
2122
+ if(this_label == label[j])
2123
+ {
2124
+ ++count[j];
2125
+ break;
2126
+ }
2127
+ }
2128
+ data_label[i] = j;
2129
+ if(j == nr_class)
2130
+ {
2131
+ if(nr_class == max_nr_class)
2132
+ {
2133
+ max_nr_class *= 2;
2134
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
2135
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
2136
+ }
2137
+ label[nr_class] = this_label;
2138
+ count[nr_class] = 1;
2139
+ ++nr_class;
2140
+ }
2141
+ }
2142
+
2143
+ int *start = Malloc(int,nr_class);
2144
+ start[0] = 0;
2145
+ for(i=1;i<nr_class;i++)
2146
+ start[i] = start[i-1]+count[i-1];
2147
+ for(i=0;i<l;i++)
2148
+ {
2149
+ perm[start[data_label[i]]] = i;
2150
+ ++start[data_label[i]];
2151
+ }
2152
+ start[0] = 0;
2153
+ for(i=1;i<nr_class;i++)
2154
+ start[i] = start[i-1]+count[i-1];
2155
+
2156
+ *nr_class_ret = nr_class;
2157
+ *label_ret = label;
2158
+ *start_ret = start;
2159
+ *count_ret = count;
2160
+ free(data_label);
2161
+ }
2162
+
2163
+ static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2164
+ {
2165
+ double eps=param->eps;
2166
+ int pos = 0;
2167
+ int neg = 0;
2168
+ for(int i=0;i<prob->l;i++)
2169
+ if(prob->y[i] > 0)
2170
+ pos++;
2171
+ neg = prob->l - pos;
2172
+
2173
+ double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2174
+
2175
+ function *fun_obj=NULL;
2176
+ switch(param->solver_type)
2177
+ {
2178
+ case L2R_LR:
2179
+ {
2180
+ double *C = new double[prob->l];
2181
+ for(int i = 0; i < prob->l; i++)
2182
+ {
2183
+ if(prob->y[i] > 0)
2184
+ C[i] = Cp;
2185
+ else
2186
+ C[i] = Cn;
2187
+ }
2188
+ fun_obj=new l2r_lr_fun(prob, C);
2189
+ TRON tron_obj(fun_obj, primal_solver_tol);
2190
+ tron_obj.set_print_string(liblinear_print_string);
2191
+ tron_obj.tron(w);
2192
+ delete fun_obj;
2193
+ delete C;
2194
+ break;
2195
+ }
2196
+ case L2R_L2LOSS_SVC:
2197
+ {
2198
+ double *C = new double[prob->l];
2199
+ for(int i = 0; i < prob->l; i++)
2200
+ {
2201
+ if(prob->y[i] > 0)
2202
+ C[i] = Cp;
2203
+ else
2204
+ C[i] = Cn;
2205
+ }
2206
+ fun_obj=new l2r_l2_svc_fun(prob, C);
2207
+ TRON tron_obj(fun_obj, primal_solver_tol);
2208
+ tron_obj.set_print_string(liblinear_print_string);
2209
+ tron_obj.tron(w);
2210
+ delete fun_obj;
2211
+ delete C;
2212
+ break;
2213
+ }
2214
+ case L2R_L2LOSS_SVC_DUAL:
2215
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2216
+ break;
2217
+ case L2R_L1LOSS_SVC_DUAL:
2218
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2219
+ break;
2220
+ case L1R_L2LOSS_SVC:
2221
+ {
2222
+ problem prob_col;
2223
+ feature_node *x_space = NULL;
2224
+ transpose(prob, &x_space ,&prob_col);
2225
+ solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2226
+ delete [] prob_col.y;
2227
+ delete [] prob_col.x;
2228
+ delete [] x_space;
2229
+ break;
2230
+ }
2231
+ case L1R_LR:
2232
+ {
2233
+ problem prob_col;
2234
+ feature_node *x_space = NULL;
2235
+ transpose(prob, &x_space ,&prob_col);
2236
+ solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2237
+ delete [] prob_col.y;
2238
+ delete [] prob_col.x;
2239
+ delete [] x_space;
2240
+ break;
2241
+ }
2242
+ case L2R_LR_DUAL:
2243
+ solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2244
+ break;
2245
+ case L2R_L2LOSS_SVR:
2246
+ {
2247
+ double *C = new double[prob->l];
2248
+ for(int i = 0; i < prob->l; i++)
2249
+ C[i] = param->C;
2250
+
2251
+ fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2252
+ TRON tron_obj(fun_obj, param->eps);
2253
+ tron_obj.set_print_string(liblinear_print_string);
2254
+ tron_obj.tron(w);
2255
+ delete fun_obj;
2256
+ delete C;
2257
+ break;
2258
+
2259
+ }
2260
+ case L2R_L1LOSS_SVR_DUAL:
2261
+ solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2262
+ break;
2263
+ case L2R_L2LOSS_SVR_DUAL:
2264
+ solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2265
+ break;
2266
+ default:
2267
+ fprintf(stderr, "ERROR: unknown solver_type\n");
2268
+ break;
2269
+ }
2270
+ }
2271
+
2272
+ //
2273
+ // Interface functions
2274
+ //
2275
+ model* train(const problem *prob, const parameter *param)
2276
+ {
2277
+ int i,j;
2278
+ int l = prob->l;
2279
+ int n = prob->n;
2280
+ int w_size = prob->n;
2281
+ model *model_ = Malloc(model,1);
2282
+
2283
+ if(prob->bias>=0)
2284
+ model_->nr_feature=n-1;
2285
+ else
2286
+ model_->nr_feature=n;
2287
+ model_->param = *param;
2288
+ model_->bias = prob->bias;
2289
+
2290
+ if(param->solver_type == L2R_L2LOSS_SVR ||
2291
+ param->solver_type == L2R_L1LOSS_SVR_DUAL ||
2292
+ param->solver_type == L2R_L2LOSS_SVR_DUAL)
2293
+ {
2294
+ model_->w = Malloc(double, w_size);
2295
+ model_->nr_class = 2;
2296
+ model_->label = NULL;
2297
+ train_one(prob, param, &model_->w[0], 0, 0);
2298
+ }
2299
+ else
2300
+ {
2301
+ int nr_class;
2302
+ int *label = NULL;
2303
+ int *start = NULL;
2304
+ int *count = NULL;
2305
+ int *perm = Malloc(int,l);
2306
+
2307
+ // group training data of the same class
2308
+ group_classes(prob,&nr_class,&label,&start,&count,perm);
2309
+
2310
+ model_->nr_class=nr_class;
2311
+ model_->label = Malloc(int,nr_class);
2312
+ for(i=0;i<nr_class;i++)
2313
+ model_->label[i] = label[i];
2314
+
2315
+ // calculate weighted C
2316
+ double *weighted_C = Malloc(double, nr_class);
2317
+ for(i=0;i<nr_class;i++)
2318
+ weighted_C[i] = param->C;
2319
+ for(i=0;i<param->nr_weight;i++)
2320
+ {
2321
+ for(j=0;j<nr_class;j++)
2322
+ if(param->weight_label[i] == label[j])
2323
+ break;
2324
+ if(j == nr_class)
2325
+ fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2326
+ else
2327
+ weighted_C[j] *= param->weight[i];
2328
+ }
2329
+
2330
+ // constructing the subproblem
2331
+ feature_node **x = Malloc(feature_node *,l);
2332
+ for(i=0;i<l;i++)
2333
+ x[i] = prob->x[perm[i]];
2334
+
2335
+ int k;
2336
+ problem sub_prob;
2337
+ sub_prob.l = l;
2338
+ sub_prob.n = n;
2339
+ sub_prob.x = Malloc(feature_node *,sub_prob.l);
2340
+ sub_prob.y = Malloc(double,sub_prob.l);
2341
+
2342
+ for(k=0; k<sub_prob.l; k++)
2343
+ sub_prob.x[k] = x[k];
2344
+
2345
+ // multi-class svm by Crammer and Singer
2346
+ if(param->solver_type == MCSVM_CS)
2347
+ {
2348
+ model_->w=Malloc(double, n*nr_class);
2349
+ for(i=0;i<nr_class;i++)
2350
+ for(j=start[i];j<start[i]+count[i];j++)
2351
+ sub_prob.y[j] = i;
2352
+ Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2353
+ Solver.Solve(model_->w);
2354
+ }
2355
+ else
2356
+ {
2357
+ if(nr_class == 2)
2358
+ {
2359
+ model_->w=Malloc(double, w_size);
2360
+
2361
+ int e0 = start[0]+count[0];
2362
+ k=0;
2363
+ for(; k<e0; k++)
2364
+ sub_prob.y[k] = +1;
2365
+ for(; k<sub_prob.l; k++)
2366
+ sub_prob.y[k] = -1;
2367
+
2368
+ train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
2369
+ }
2370
+ else
2371
+ {
2372
+ model_->w=Malloc(double, w_size*nr_class);
2373
+ double *w=Malloc(double, w_size);
2374
+ for(i=0;i<nr_class;i++)
2375
+ {
2376
+ int si = start[i];
2377
+ int ei = si+count[i];
2378
+
2379
+ k=0;
2380
+ for(; k<si; k++)
2381
+ sub_prob.y[k] = -1;
2382
+ for(; k<ei; k++)
2383
+ sub_prob.y[k] = +1;
2384
+ for(; k<sub_prob.l; k++)
2385
+ sub_prob.y[k] = -1;
2386
+
2387
+ train_one(&sub_prob, param, w, weighted_C[i], param->C);
2388
+
2389
+ for(int j=0;j<w_size;j++)
2390
+ model_->w[j*nr_class+i] = w[j];
2391
+ }
2392
+ free(w);
2393
+ }
2394
+
2395
+ }
2396
+
2397
+ free(x);
2398
+ free(label);
2399
+ free(start);
2400
+ free(count);
2401
+ free(perm);
2402
+ free(sub_prob.x);
2403
+ free(sub_prob.y);
2404
+ free(weighted_C);
2405
+ }
2406
+ return model_;
2407
+ }
2408
+
2409
+ void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
2410
+ {
2411
+ int i;
2412
+ int *fold_start = Malloc(int,nr_fold+1);
2413
+ int l = prob->l;
2414
+ int *perm = Malloc(int,l);
2415
+
2416
+ for(i=0;i<l;i++) perm[i]=i;
2417
+ for(i=0;i<l;i++)
2418
+ {
2419
+ int j = i+rand()%(l-i);
2420
+ swap(perm[i],perm[j]);
2421
+ }
2422
+ for(i=0;i<=nr_fold;i++)
2423
+ fold_start[i]=i*l/nr_fold;
2424
+
2425
+ for(i=0;i<nr_fold;i++)
2426
+ {
2427
+ int begin = fold_start[i];
2428
+ int end = fold_start[i+1];
2429
+ int j,k;
2430
+ struct problem subprob;
2431
+
2432
+ subprob.bias = prob->bias;
2433
+ subprob.n = prob->n;
2434
+ subprob.l = l-(end-begin);
2435
+ subprob.x = Malloc(struct feature_node*,subprob.l);
2436
+ subprob.y = Malloc(double,subprob.l);
2437
+
2438
+ k=0;
2439
+ for(j=0;j<begin;j++)
2440
+ {
2441
+ subprob.x[k] = prob->x[perm[j]];
2442
+ subprob.y[k] = prob->y[perm[j]];
2443
+ ++k;
2444
+ }
2445
+ for(j=end;j<l;j++)
2446
+ {
2447
+ subprob.x[k] = prob->x[perm[j]];
2448
+ subprob.y[k] = prob->y[perm[j]];
2449
+ ++k;
2450
+ }
2451
+ struct model *submodel = train(&subprob,param);
2452
+ for(j=begin;j<end;j++)
2453
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2454
+ free_and_destroy_model(&submodel);
2455
+ free(subprob.x);
2456
+ free(subprob.y);
2457
+ }
2458
+ free(fold_start);
2459
+ free(perm);
2460
+ }
2461
+
2462
+ double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2463
+ {
2464
+ int idx;
2465
+ int n;
2466
+ if(model_->bias>=0)
2467
+ n=model_->nr_feature+1;
2468
+ else
2469
+ n=model_->nr_feature;
2470
+ double *w=model_->w;
2471
+ int nr_class=model_->nr_class;
2472
+ int i;
2473
+ int nr_w;
2474
+ if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2475
+ nr_w = 1;
2476
+ else
2477
+ nr_w = nr_class;
2478
+
2479
+ const feature_node *lx=x;
2480
+ for(i=0;i<nr_w;i++)
2481
+ dec_values[i] = 0;
2482
+ for(; (idx=lx->index)!=-1; lx++)
2483
+ {
2484
+ // the dimension of testing data may exceed that of training
2485
+ if(idx<=n)
2486
+ for(i=0;i<nr_w;i++)
2487
+ dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2488
+ }
2489
+
2490
+ if(nr_class==2)
2491
+ {
2492
+ if(model_->param.solver_type == L2R_L2LOSS_SVR ||
2493
+ model_->param.solver_type == L2R_L1LOSS_SVR_DUAL ||
2494
+ model_->param.solver_type == L2R_L2LOSS_SVR_DUAL)
2495
+ return dec_values[0];
2496
+ else
2497
+ return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2498
+ }
2499
+ else
2500
+ {
2501
+ int dec_max_idx = 0;
2502
+ for(i=1;i<nr_class;i++)
2503
+ {
2504
+ if(dec_values[i] > dec_values[dec_max_idx])
2505
+ dec_max_idx = i;
2506
+ }
2507
+ return model_->label[dec_max_idx];
2508
+ }
2509
+ }
2510
+
2511
+ double predict(const model *model_, const feature_node *x)
2512
+ {
2513
+ double *dec_values = Malloc(double, model_->nr_class);
2514
+ double label=predict_values(model_, x, dec_values);
2515
+ free(dec_values);
2516
+ return label;
2517
+ }
2518
+
2519
+ double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2520
+ {
2521
+ if(check_probability_model(model_))
2522
+ {
2523
+ int i;
2524
+ int nr_class=model_->nr_class;
2525
+ int nr_w;
2526
+ if(nr_class==2)
2527
+ nr_w = 1;
2528
+ else
2529
+ nr_w = nr_class;
2530
+
2531
+ double label=predict_values(model_, x, prob_estimates);
2532
+ for(i=0;i<nr_w;i++)
2533
+ prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2534
+
2535
+ if(nr_class==2) // for binary classification
2536
+ prob_estimates[1]=1.-prob_estimates[0];
2537
+ else
2538
+ {
2539
+ double sum=0;
2540
+ for(i=0; i<nr_class; i++)
2541
+ sum+=prob_estimates[i];
2542
+
2543
+ for(i=0; i<nr_class; i++)
2544
+ prob_estimates[i]=prob_estimates[i]/sum;
2545
+ }
2546
+
2547
+ return label;
2548
+ }
2549
+ else
2550
+ return 0;
2551
+ }
2552
+
2553
+ static const char *solver_type_table[]=
2554
+ {
2555
+ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2556
+ "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2557
+ "", "", "",
2558
+ "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
2559
+ };
2560
+
2561
+ int save_model(const char *model_file_name, const struct model *model_)
2562
+ {
2563
+ int i;
2564
+ int nr_feature=model_->nr_feature;
2565
+ int n;
2566
+ const parameter& param = model_->param;
2567
+
2568
+ if(model_->bias>=0)
2569
+ n=nr_feature+1;
2570
+ else
2571
+ n=nr_feature;
2572
+ int w_size = n;
2573
+ FILE *fp = fopen(model_file_name,"w");
2574
+ if(fp==NULL) return -1;
2575
+
2576
+ char *old_locale = strdup(setlocale(LC_ALL, NULL));
2577
+ setlocale(LC_ALL, "C");
2578
+
2579
+ int nr_w;
2580
+ if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2581
+ nr_w=1;
2582
+ else
2583
+ nr_w=model_->nr_class;
2584
+
2585
+ fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2586
+ fprintf(fp, "nr_class %d\n", model_->nr_class);
2587
+
2588
+ if(model_->label)
2589
+ {
2590
+ fprintf(fp, "label");
2591
+ for(i=0; i<model_->nr_class; i++)
2592
+ fprintf(fp, " %d", model_->label[i]);
2593
+ fprintf(fp, "\n");
2594
+ }
2595
+
2596
+ fprintf(fp, "nr_feature %d\n", nr_feature);
2597
+
2598
+ fprintf(fp, "bias %.16g\n", model_->bias);
2599
+
2600
+ fprintf(fp, "w\n");
2601
+ for(i=0; i<w_size; i++)
2602
+ {
2603
+ int j;
2604
+ for(j=0; j<nr_w; j++)
2605
+ fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2606
+ fprintf(fp, "\n");
2607
+ }
2608
+
2609
+ setlocale(LC_ALL, old_locale);
2610
+ free(old_locale);
2611
+
2612
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2613
+ else return 0;
2614
+ }
2615
+
2616
+ struct model *load_model(const char *model_file_name)
2617
+ {
2618
+ FILE *fp = fopen(model_file_name,"r");
2619
+ if(fp==NULL) return NULL;
2620
+
2621
+ int i;
2622
+ int nr_feature;
2623
+ int n;
2624
+ int nr_class;
2625
+ double bias;
2626
+ model *model_ = Malloc(model,1);
2627
+ parameter& param = model_->param;
2628
+
2629
+ model_->label = NULL;
2630
+
2631
+ char *old_locale = strdup(setlocale(LC_ALL, NULL));
2632
+ setlocale(LC_ALL, "C");
2633
+
2634
+ char cmd[81];
2635
+ while(1)
2636
+ {
2637
+ fscanf(fp,"%80s",cmd);
2638
+ if(strcmp(cmd,"solver_type")==0)
2639
+ {
2640
+ fscanf(fp,"%80s",cmd);
2641
+ int i;
2642
+ for(i=0;solver_type_table[i];i++)
2643
+ {
2644
+ if(strcmp(solver_type_table[i],cmd)==0)
2645
+ {
2646
+ param.solver_type=i;
2647
+ break;
2648
+ }
2649
+ }
2650
+ if(solver_type_table[i] == NULL)
2651
+ {
2652
+ fprintf(stderr,"unknown solver type.\n");
2653
+
2654
+ setlocale(LC_ALL, old_locale);
2655
+ free(model_->label);
2656
+ free(model_);
2657
+ free(old_locale);
2658
+ return NULL;
2659
+ }
2660
+ }
2661
+ else if(strcmp(cmd,"nr_class")==0)
2662
+ {
2663
+ fscanf(fp,"%d",&nr_class);
2664
+ model_->nr_class=nr_class;
2665
+ }
2666
+ else if(strcmp(cmd,"nr_feature")==0)
2667
+ {
2668
+ fscanf(fp,"%d",&nr_feature);
2669
+ model_->nr_feature=nr_feature;
2670
+ }
2671
+ else if(strcmp(cmd,"bias")==0)
2672
+ {
2673
+ fscanf(fp,"%lf",&bias);
2674
+ model_->bias=bias;
2675
+ }
2676
+ else if(strcmp(cmd,"w")==0)
2677
+ {
2678
+ break;
2679
+ }
2680
+ else if(strcmp(cmd,"label")==0)
2681
+ {
2682
+ int nr_class = model_->nr_class;
2683
+ model_->label = Malloc(int,nr_class);
2684
+ for(int i=0;i<nr_class;i++)
2685
+ fscanf(fp,"%d",&model_->label[i]);
2686
+ }
2687
+ else
2688
+ {
2689
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2690
+ setlocale(LC_ALL, old_locale);
2691
+ free(model_->label);
2692
+ free(model_);
2693
+ free(old_locale);
2694
+ return NULL;
2695
+ }
2696
+ }
2697
+
2698
+ nr_feature=model_->nr_feature;
2699
+ if(model_->bias>=0)
2700
+ n=nr_feature+1;
2701
+ else
2702
+ n=nr_feature;
2703
+ int w_size = n;
2704
+ int nr_w;
2705
+ if(nr_class==2 && param.solver_type != MCSVM_CS)
2706
+ nr_w = 1;
2707
+ else
2708
+ nr_w = nr_class;
2709
+
2710
+ model_->w=Malloc(double, w_size*nr_w);
2711
+ for(i=0; i<w_size; i++)
2712
+ {
2713
+ int j;
2714
+ for(j=0; j<nr_w; j++)
2715
+ fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
2716
+ fscanf(fp, "\n");
2717
+ }
2718
+
2719
+ setlocale(LC_ALL, old_locale);
2720
+ free(old_locale);
2721
+
2722
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2723
+
2724
+ return model_;
2725
+ }
2726
+
2727
+ int get_nr_feature(const model *model_)
2728
+ {
2729
+ return model_->nr_feature;
2730
+ }
2731
+
2732
+ int get_nr_class(const model *model_)
2733
+ {
2734
+ return model_->nr_class;
2735
+ }
2736
+
2737
+ void get_labels(const model *model_, int* label)
2738
+ {
2739
+ if (model_->label != NULL)
2740
+ for(int i=0;i<model_->nr_class;i++)
2741
+ label[i] = model_->label[i];
2742
+ }
2743
+
2744
+ void free_model_content(struct model *model_ptr)
2745
+ {
2746
+ if(model_ptr->w != NULL)
2747
+ free(model_ptr->w);
2748
+ if(model_ptr->label != NULL)
2749
+ free(model_ptr->label);
2750
+ }
2751
+
2752
+ void free_and_destroy_model(struct model **model_ptr_ptr)
2753
+ {
2754
+ struct model *model_ptr = *model_ptr_ptr;
2755
+ if(model_ptr != NULL)
2756
+ {
2757
+ free_model_content(model_ptr);
2758
+ free(model_ptr);
2759
+ }
2760
+ }
2761
+
2762
+ void destroy_param(parameter* param)
2763
+ {
2764
+ if(param->weight_label != NULL)
2765
+ free(param->weight_label);
2766
+ if(param->weight != NULL)
2767
+ free(param->weight);
2768
+ }
2769
+
2770
+ const char *check_parameter(const problem *prob, const parameter *param)
2771
+ {
2772
+ if(param->eps <= 0)
2773
+ return "eps <= 0";
2774
+
2775
+ if(param->C <= 0)
2776
+ return "C <= 0";
2777
+
2778
+ if(param->p < 0)
2779
+ return "p < 0";
2780
+
2781
+ if(param->solver_type != L2R_LR
2782
+ && param->solver_type != L2R_L2LOSS_SVC_DUAL
2783
+ && param->solver_type != L2R_L2LOSS_SVC
2784
+ && param->solver_type != L2R_L1LOSS_SVC_DUAL
2785
+ && param->solver_type != MCSVM_CS
2786
+ && param->solver_type != L1R_L2LOSS_SVC
2787
+ && param->solver_type != L1R_LR
2788
+ && param->solver_type != L2R_LR_DUAL
2789
+ && param->solver_type != L2R_L2LOSS_SVR
2790
+ && param->solver_type != L2R_L2LOSS_SVR_DUAL
2791
+ && param->solver_type != L2R_L1LOSS_SVR_DUAL)
2792
+ return "unknown solver type";
2793
+
2794
+ return NULL;
2795
+ }
2796
+
2797
+ int check_probability_model(const struct model *model_)
2798
+ {
2799
+ return (model_->param.solver_type==L2R_LR ||
2800
+ model_->param.solver_type==L2R_LR_DUAL ||
2801
+ model_->param.solver_type==L1R_LR);
2802
+ }
2803
+
2804
+ void set_print_string_function(void (*print_func)(const char*))
2805
+ {
2806
+ if (print_func == NULL)
2807
+ liblinear_print_string = &print_string_stdout;
2808
+ else
2809
+ liblinear_print_string = print_func;
2810
+ }
2811
+