liblinear-ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +46 -0
  6. data/Rakefile +1 -0
  7. data/ext/Makefile +237 -0
  8. data/ext/blas.h +25 -0
  9. data/ext/blasp.h +430 -0
  10. data/ext/daxpy.c +49 -0
  11. data/ext/ddot.c +50 -0
  12. data/ext/dnrm2.c +62 -0
  13. data/ext/dscal.c +44 -0
  14. data/ext/extconf.rb +12 -0
  15. data/ext/liblinear_wrap.cxx +4646 -0
  16. data/ext/linear.cpp +2811 -0
  17. data/ext/linear.h +74 -0
  18. data/ext/linear.rb +357 -0
  19. data/ext/tron.cpp +235 -0
  20. data/ext/tron.h +34 -0
  21. data/lib/liblinear.rb +89 -0
  22. data/lib/liblinear/error.rb +4 -0
  23. data/lib/liblinear/model.rb +66 -0
  24. data/lib/liblinear/parameter.rb +42 -0
  25. data/lib/liblinear/problem.rb +55 -0
  26. data/lib/liblinear/version.rb +3 -0
  27. data/liblinear-1.93/COPYRIGHT +31 -0
  28. data/liblinear-1.93/Makefile +37 -0
  29. data/liblinear-1.93/Makefile.win +30 -0
  30. data/liblinear-1.93/README +531 -0
  31. data/liblinear-1.93/blas/Makefile +22 -0
  32. data/liblinear-1.93/blas/blas.a +0 -0
  33. data/liblinear-1.93/blas/blas.h +25 -0
  34. data/liblinear-1.93/blas/blasp.h +430 -0
  35. data/liblinear-1.93/blas/daxpy.c +49 -0
  36. data/liblinear-1.93/blas/daxpy.o +0 -0
  37. data/liblinear-1.93/blas/ddot.c +50 -0
  38. data/liblinear-1.93/blas/ddot.o +0 -0
  39. data/liblinear-1.93/blas/dnrm2.c +62 -0
  40. data/liblinear-1.93/blas/dnrm2.o +0 -0
  41. data/liblinear-1.93/blas/dscal.c +44 -0
  42. data/liblinear-1.93/blas/dscal.o +0 -0
  43. data/liblinear-1.93/heart_scale +270 -0
  44. data/liblinear-1.93/linear.cpp +2811 -0
  45. data/liblinear-1.93/linear.def +18 -0
  46. data/liblinear-1.93/linear.h +74 -0
  47. data/liblinear-1.93/linear.o +0 -0
  48. data/liblinear-1.93/matlab/Makefile +58 -0
  49. data/liblinear-1.93/matlab/README +197 -0
  50. data/liblinear-1.93/matlab/libsvmread.c +212 -0
  51. data/liblinear-1.93/matlab/libsvmwrite.c +106 -0
  52. data/liblinear-1.93/matlab/linear_model_matlab.c +176 -0
  53. data/liblinear-1.93/matlab/linear_model_matlab.h +2 -0
  54. data/liblinear-1.93/matlab/make.m +21 -0
  55. data/liblinear-1.93/matlab/predict.c +331 -0
  56. data/liblinear-1.93/matlab/train.c +418 -0
  57. data/liblinear-1.93/predict +0 -0
  58. data/liblinear-1.93/predict.c +245 -0
  59. data/liblinear-1.93/python/Makefile +4 -0
  60. data/liblinear-1.93/python/README +343 -0
  61. data/liblinear-1.93/python/liblinear.py +277 -0
  62. data/liblinear-1.93/python/liblinearutil.py +250 -0
  63. data/liblinear-1.93/ruby/liblinear.i +41 -0
  64. data/liblinear-1.93/ruby/liblinear_wrap.cxx +4646 -0
  65. data/liblinear-1.93/ruby/linear.h +74 -0
  66. data/liblinear-1.93/ruby/linear.o +0 -0
  67. data/liblinear-1.93/train +0 -0
  68. data/liblinear-1.93/train.c +399 -0
  69. data/liblinear-1.93/tron.cpp +235 -0
  70. data/liblinear-1.93/tron.h +34 -0
  71. data/liblinear-1.93/tron.o +0 -0
  72. data/liblinear-1.93/windows/liblinear.dll +0 -0
  73. data/liblinear-1.93/windows/libsvmread.mexw64 +0 -0
  74. data/liblinear-1.93/windows/libsvmwrite.mexw64 +0 -0
  75. data/liblinear-1.93/windows/predict.exe +0 -0
  76. data/liblinear-1.93/windows/predict.mexw64 +0 -0
  77. data/liblinear-1.93/windows/train.exe +0 -0
  78. data/liblinear-1.93/windows/train.mexw64 +0 -0
  79. data/liblinear-ruby.gemspec +24 -0
  80. metadata +152 -0
@@ -0,0 +1,2811 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <stdarg.h>
6
+ #include <locale.h>
7
+ #include "linear.h"
8
+ #include "tron.h"
9
+ typedef signed char schar;
10
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
11
+ #ifndef min
12
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
13
+ #endif
14
+ #ifndef max
15
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
16
+ #endif
17
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
18
+ {
19
+ dst = new T[n];
20
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
21
+ }
22
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
23
+ #define INF HUGE_VAL
24
+
25
+ static void print_string_stdout(const char *s)
26
+ {
27
+ fputs(s,stdout);
28
+ fflush(stdout);
29
+ }
30
+
31
+ static void (*liblinear_print_string) (const char *) = &print_string_stdout;
32
+
33
+ #if 1
34
+ static void info(const char *fmt,...)
35
+ {
36
+ char buf[BUFSIZ];
37
+ va_list ap;
38
+ va_start(ap,fmt);
39
+ vsprintf(buf,fmt,ap);
40
+ va_end(ap);
41
+ (*liblinear_print_string)(buf);
42
+ }
43
+ #else
44
+ static void info(const char *fmt,...) {}
45
+ #endif
46
+
47
+ class l2r_lr_fun: public function
48
+ {
49
+ public:
50
+ l2r_lr_fun(const problem *prob, double *C);
51
+ ~l2r_lr_fun();
52
+
53
+ double fun(double *w);
54
+ void grad(double *w, double *g);
55
+ void Hv(double *s, double *Hs);
56
+
57
+ int get_nr_variable(void);
58
+
59
+ private:
60
+ void Xv(double *v, double *Xv);
61
+ void XTv(double *v, double *XTv);
62
+
63
+ double *C;
64
+ double *z;
65
+ double *D;
66
+ const problem *prob;
67
+ };
68
+
69
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
70
+ {
71
+ int l=prob->l;
72
+
73
+ this->prob = prob;
74
+
75
+ z = new double[l];
76
+ D = new double[l];
77
+ this->C = C;
78
+ }
79
+
80
+ l2r_lr_fun::~l2r_lr_fun()
81
+ {
82
+ delete[] z;
83
+ delete[] D;
84
+ }
85
+
86
+
87
+ double l2r_lr_fun::fun(double *w)
88
+ {
89
+ int i;
90
+ double f=0;
91
+ double *y=prob->y;
92
+ int l=prob->l;
93
+ int w_size=get_nr_variable();
94
+
95
+ Xv(w, z);
96
+
97
+ for(i=0;i<w_size;i++)
98
+ f += w[i]*w[i];
99
+ f /= 2.0;
100
+ for(i=0;i<l;i++)
101
+ {
102
+ double yz = y[i]*z[i];
103
+ if (yz >= 0)
104
+ f += C[i]*log(1 + exp(-yz));
105
+ else
106
+ f += C[i]*(-yz+log(1 + exp(yz)));
107
+ }
108
+
109
+ return(f);
110
+ }
111
+
112
+ void l2r_lr_fun::grad(double *w, double *g)
113
+ {
114
+ int i;
115
+ double *y=prob->y;
116
+ int l=prob->l;
117
+ int w_size=get_nr_variable();
118
+
119
+ for(i=0;i<l;i++)
120
+ {
121
+ z[i] = 1/(1 + exp(-y[i]*z[i]));
122
+ D[i] = z[i]*(1-z[i]);
123
+ z[i] = C[i]*(z[i]-1)*y[i];
124
+ }
125
+ XTv(z, g);
126
+
127
+ for(i=0;i<w_size;i++)
128
+ g[i] = w[i] + g[i];
129
+ }
130
+
131
+ int l2r_lr_fun::get_nr_variable(void)
132
+ {
133
+ return prob->n;
134
+ }
135
+
136
+ void l2r_lr_fun::Hv(double *s, double *Hs)
137
+ {
138
+ int i;
139
+ int l=prob->l;
140
+ int w_size=get_nr_variable();
141
+ double *wa = new double[l];
142
+
143
+ Xv(s, wa);
144
+ for(i=0;i<l;i++)
145
+ wa[i] = C[i]*D[i]*wa[i];
146
+
147
+ XTv(wa, Hs);
148
+ for(i=0;i<w_size;i++)
149
+ Hs[i] = s[i] + Hs[i];
150
+ delete[] wa;
151
+ }
152
+
153
+ void l2r_lr_fun::Xv(double *v, double *Xv)
154
+ {
155
+ int i;
156
+ int l=prob->l;
157
+ feature_node **x=prob->x;
158
+
159
+ for(i=0;i<l;i++)
160
+ {
161
+ feature_node *s=x[i];
162
+ Xv[i]=0;
163
+ while(s->index!=-1)
164
+ {
165
+ Xv[i]+=v[s->index-1]*s->value;
166
+ s++;
167
+ }
168
+ }
169
+ }
170
+
171
+ void l2r_lr_fun::XTv(double *v, double *XTv)
172
+ {
173
+ int i;
174
+ int l=prob->l;
175
+ int w_size=get_nr_variable();
176
+ feature_node **x=prob->x;
177
+
178
+ for(i=0;i<w_size;i++)
179
+ XTv[i]=0;
180
+ for(i=0;i<l;i++)
181
+ {
182
+ feature_node *s=x[i];
183
+ while(s->index!=-1)
184
+ {
185
+ XTv[s->index-1]+=v[i]*s->value;
186
+ s++;
187
+ }
188
+ }
189
+ }
190
+
191
+ class l2r_l2_svc_fun: public function
192
+ {
193
+ public:
194
+ l2r_l2_svc_fun(const problem *prob, double *C);
195
+ ~l2r_l2_svc_fun();
196
+
197
+ double fun(double *w);
198
+ void grad(double *w, double *g);
199
+ void Hv(double *s, double *Hs);
200
+
201
+ int get_nr_variable(void);
202
+
203
+ protected:
204
+ void Xv(double *v, double *Xv);
205
+ void subXv(double *v, double *Xv);
206
+ void subXTv(double *v, double *XTv);
207
+
208
+ double *C;
209
+ double *z;
210
+ double *D;
211
+ int *I;
212
+ int sizeI;
213
+ const problem *prob;
214
+ };
215
+
216
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
217
+ {
218
+ int l=prob->l;
219
+
220
+ this->prob = prob;
221
+
222
+ z = new double[l];
223
+ D = new double[l];
224
+ I = new int[l];
225
+ this->C = C;
226
+ }
227
+
228
+ l2r_l2_svc_fun::~l2r_l2_svc_fun()
229
+ {
230
+ delete[] z;
231
+ delete[] D;
232
+ delete[] I;
233
+ }
234
+
235
+ double l2r_l2_svc_fun::fun(double *w)
236
+ {
237
+ int i;
238
+ double f=0;
239
+ double *y=prob->y;
240
+ int l=prob->l;
241
+ int w_size=get_nr_variable();
242
+
243
+ Xv(w, z);
244
+
245
+ for(i=0;i<w_size;i++)
246
+ f += w[i]*w[i];
247
+ f /= 2.0;
248
+ for(i=0;i<l;i++)
249
+ {
250
+ z[i] = y[i]*z[i];
251
+ double d = 1-z[i];
252
+ if (d > 0)
253
+ f += C[i]*d*d;
254
+ }
255
+
256
+ return(f);
257
+ }
258
+
259
+ void l2r_l2_svc_fun::grad(double *w, double *g)
260
+ {
261
+ int i;
262
+ double *y=prob->y;
263
+ int l=prob->l;
264
+ int w_size=get_nr_variable();
265
+
266
+ sizeI = 0;
267
+ for (i=0;i<l;i++)
268
+ if (z[i] < 1)
269
+ {
270
+ z[sizeI] = C[i]*y[i]*(z[i]-1);
271
+ I[sizeI] = i;
272
+ sizeI++;
273
+ }
274
+ subXTv(z, g);
275
+
276
+ for(i=0;i<w_size;i++)
277
+ g[i] = w[i] + 2*g[i];
278
+ }
279
+
280
+ int l2r_l2_svc_fun::get_nr_variable(void)
281
+ {
282
+ return prob->n;
283
+ }
284
+
285
+ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
286
+ {
287
+ int i;
288
+ int w_size=get_nr_variable();
289
+ double *wa = new double[sizeI];
290
+
291
+ subXv(s, wa);
292
+ for(i=0;i<sizeI;i++)
293
+ wa[i] = C[I[i]]*wa[i];
294
+
295
+ subXTv(wa, Hs);
296
+ for(i=0;i<w_size;i++)
297
+ Hs[i] = s[i] + 2*Hs[i];
298
+ delete[] wa;
299
+ }
300
+
301
+ void l2r_l2_svc_fun::Xv(double *v, double *Xv)
302
+ {
303
+ int i;
304
+ int l=prob->l;
305
+ feature_node **x=prob->x;
306
+
307
+ for(i=0;i<l;i++)
308
+ {
309
+ feature_node *s=x[i];
310
+ Xv[i]=0;
311
+ while(s->index!=-1)
312
+ {
313
+ Xv[i]+=v[s->index-1]*s->value;
314
+ s++;
315
+ }
316
+ }
317
+ }
318
+
319
+ void l2r_l2_svc_fun::subXv(double *v, double *Xv)
320
+ {
321
+ int i;
322
+ feature_node **x=prob->x;
323
+
324
+ for(i=0;i<sizeI;i++)
325
+ {
326
+ feature_node *s=x[I[i]];
327
+ Xv[i]=0;
328
+ while(s->index!=-1)
329
+ {
330
+ Xv[i]+=v[s->index-1]*s->value;
331
+ s++;
332
+ }
333
+ }
334
+ }
335
+
336
+ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
337
+ {
338
+ int i;
339
+ int w_size=get_nr_variable();
340
+ feature_node **x=prob->x;
341
+
342
+ for(i=0;i<w_size;i++)
343
+ XTv[i]=0;
344
+ for(i=0;i<sizeI;i++)
345
+ {
346
+ feature_node *s=x[I[i]];
347
+ while(s->index!=-1)
348
+ {
349
+ XTv[s->index-1]+=v[i]*s->value;
350
+ s++;
351
+ }
352
+ }
353
+ }
354
+
355
+ class l2r_l2_svr_fun: public l2r_l2_svc_fun
356
+ {
357
+ public:
358
+ l2r_l2_svr_fun(const problem *prob, double *C, double p);
359
+
360
+ double fun(double *w);
361
+ void grad(double *w, double *g);
362
+
363
+ private:
364
+ double p;
365
+ };
366
+
367
+ l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
368
+ l2r_l2_svc_fun(prob, C)
369
+ {
370
+ this->p = p;
371
+ }
372
+
373
+ double l2r_l2_svr_fun::fun(double *w)
374
+ {
375
+ int i;
376
+ double f=0;
377
+ double *y=prob->y;
378
+ int l=prob->l;
379
+ int w_size=get_nr_variable();
380
+ double d;
381
+
382
+ Xv(w, z);
383
+
384
+ for(i=0;i<w_size;i++)
385
+ f += w[i]*w[i];
386
+ f /= 2;
387
+ for(i=0;i<l;i++)
388
+ {
389
+ d = z[i] - y[i];
390
+ if(d < -p)
391
+ f += C[i]*(d+p)*(d+p);
392
+ else if(d > p)
393
+ f += C[i]*(d-p)*(d-p);
394
+ }
395
+
396
+ return(f);
397
+ }
398
+
399
+ void l2r_l2_svr_fun::grad(double *w, double *g)
400
+ {
401
+ int i;
402
+ double *y=prob->y;
403
+ int l=prob->l;
404
+ int w_size=get_nr_variable();
405
+ double d;
406
+
407
+ sizeI = 0;
408
+ for(i=0;i<l;i++)
409
+ {
410
+ d = z[i] - y[i];
411
+
412
+ // generate index set I
413
+ if(d < -p)
414
+ {
415
+ z[sizeI] = C[i]*(d+p);
416
+ I[sizeI] = i;
417
+ sizeI++;
418
+ }
419
+ else if(d > p)
420
+ {
421
+ z[sizeI] = C[i]*(d-p);
422
+ I[sizeI] = i;
423
+ sizeI++;
424
+ }
425
+
426
+ }
427
+ subXTv(z, g);
428
+
429
+ for(i=0;i<w_size;i++)
430
+ g[i] = w[i] + 2*g[i];
431
+ }
432
+
433
+ // A coordinate descent algorithm for
434
+ // multi-class support vector machines by Crammer and Singer
435
+ //
436
+ // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
437
+ // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
438
+ //
439
+ // where e^m_i = 0 if y_i = m,
440
+ // e^m_i = 1 if y_i != m,
441
+ // C^m_i = C if m = y_i,
442
+ // C^m_i = 0 if m != y_i,
443
+ // and w_m(\alpha) = \sum_i \alpha^m_i x_i
444
+ //
445
+ // Given:
446
+ // x, y, C
447
+ // eps is the stopping tolerance
448
+ //
449
+ // solution will be put in w
450
+ //
451
+ // See Appendix of LIBLINEAR paper, Fan et al. (2008)
452
+
453
+ #define GETI(i) ((int) prob->y[i])
454
+ // To support weights for instances, use GETI(i) (i)
455
+
456
+ class Solver_MCSVM_CS
457
+ {
458
+ public:
459
+ Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
460
+ ~Solver_MCSVM_CS();
461
+ void Solve(double *w);
462
+ private:
463
+ void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
464
+ bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
465
+ double *B, *C, *G;
466
+ int w_size, l;
467
+ int nr_class;
468
+ int max_iter;
469
+ double eps;
470
+ const problem *prob;
471
+ };
472
+
473
+ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
474
+ {
475
+ this->w_size = prob->n;
476
+ this->l = prob->l;
477
+ this->nr_class = nr_class;
478
+ this->eps = eps;
479
+ this->max_iter = max_iter;
480
+ this->prob = prob;
481
+ this->B = new double[nr_class];
482
+ this->G = new double[nr_class];
483
+ this->C = weighted_C;
484
+ }
485
+
486
+ Solver_MCSVM_CS::~Solver_MCSVM_CS()
487
+ {
488
+ delete[] B;
489
+ delete[] G;
490
+ }
491
+
492
+ int compare_double(const void *a, const void *b)
493
+ {
494
+ if(*(double *)a > *(double *)b)
495
+ return -1;
496
+ if(*(double *)a < *(double *)b)
497
+ return 1;
498
+ return 0;
499
+ }
500
+
501
+ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
502
+ {
503
+ int r;
504
+ double *D;
505
+
506
+ clone(D, B, active_i);
507
+ if(yi < active_i)
508
+ D[yi] += A_i*C_yi;
509
+ qsort(D, active_i, sizeof(double), compare_double);
510
+
511
+ double beta = D[0] - A_i*C_yi;
512
+ for(r=1;r<active_i && beta<r*D[r];r++)
513
+ beta += D[r];
514
+ beta /= r;
515
+
516
+ for(r=0;r<active_i;r++)
517
+ {
518
+ if(r == yi)
519
+ alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
520
+ else
521
+ alpha_new[r] = min((double)0, (beta - B[r])/A_i);
522
+ }
523
+ delete[] D;
524
+ }
525
+
526
+ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
527
+ {
528
+ double bound = 0;
529
+ if(m == yi)
530
+ bound = C[GETI(i)];
531
+ if(alpha_i == bound && G[m] < minG)
532
+ return true;
533
+ return false;
534
+ }
535
+
536
+ void Solver_MCSVM_CS::Solve(double *w)
537
+ {
538
+ int i, m, s;
539
+ int iter = 0;
540
+ double *alpha = new double[l*nr_class];
541
+ double *alpha_new = new double[nr_class];
542
+ int *index = new int[l];
543
+ double *QD = new double[l];
544
+ int *d_ind = new int[nr_class];
545
+ double *d_val = new double[nr_class];
546
+ int *alpha_index = new int[nr_class*l];
547
+ int *y_index = new int[l];
548
+ int active_size = l;
549
+ int *active_size_i = new int[l];
550
+ double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
551
+ bool start_from_all = true;
552
+
553
+ // Initial alpha can be set here. Note that
554
+ // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
555
+ // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
556
+ // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
557
+ // If initial alpha isn't zero, uncomment the for loop below to initialize w
558
+ for(i=0;i<l*nr_class;i++)
559
+ alpha[i] = 0;
560
+
561
+ for(i=0;i<w_size*nr_class;i++)
562
+ w[i] = 0;
563
+ for(i=0;i<l;i++)
564
+ {
565
+ for(m=0;m<nr_class;m++)
566
+ alpha_index[i*nr_class+m] = m;
567
+ feature_node *xi = prob->x[i];
568
+ QD[i] = 0;
569
+ while(xi->index != -1)
570
+ {
571
+ double val = xi->value;
572
+ QD[i] += val*val;
573
+
574
+ // Uncomment the for loop if initial alpha isn't zero
575
+ // for(m=0; m<nr_class; m++)
576
+ // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
577
+ xi++;
578
+ }
579
+ active_size_i[i] = nr_class;
580
+ y_index[i] = (int)prob->y[i];
581
+ index[i] = i;
582
+ }
583
+
584
+ while(iter < max_iter)
585
+ {
586
+ double stopping = -INF;
587
+ for(i=0;i<active_size;i++)
588
+ {
589
+ int j = i+rand()%(active_size-i);
590
+ swap(index[i], index[j]);
591
+ }
592
+ for(s=0;s<active_size;s++)
593
+ {
594
+ i = index[s];
595
+ double Ai = QD[i];
596
+ double *alpha_i = &alpha[i*nr_class];
597
+ int *alpha_index_i = &alpha_index[i*nr_class];
598
+
599
+ if(Ai > 0)
600
+ {
601
+ for(m=0;m<active_size_i[i];m++)
602
+ G[m] = 1;
603
+ if(y_index[i] < active_size_i[i])
604
+ G[y_index[i]] = 0;
605
+
606
+ feature_node *xi = prob->x[i];
607
+ while(xi->index!= -1)
608
+ {
609
+ double *w_i = &w[(xi->index-1)*nr_class];
610
+ for(m=0;m<active_size_i[i];m++)
611
+ G[m] += w_i[alpha_index_i[m]]*(xi->value);
612
+ xi++;
613
+ }
614
+
615
+ double minG = INF;
616
+ double maxG = -INF;
617
+ for(m=0;m<active_size_i[i];m++)
618
+ {
619
+ if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
620
+ minG = G[m];
621
+ if(G[m] > maxG)
622
+ maxG = G[m];
623
+ }
624
+ if(y_index[i] < active_size_i[i])
625
+ if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
626
+ minG = G[y_index[i]];
627
+
628
+ for(m=0;m<active_size_i[i];m++)
629
+ {
630
+ if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
631
+ {
632
+ active_size_i[i]--;
633
+ while(active_size_i[i]>m)
634
+ {
635
+ if(!be_shrunk(i, active_size_i[i], y_index[i],
636
+ alpha_i[alpha_index_i[active_size_i[i]]], minG))
637
+ {
638
+ swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
639
+ swap(G[m], G[active_size_i[i]]);
640
+ if(y_index[i] == active_size_i[i])
641
+ y_index[i] = m;
642
+ else if(y_index[i] == m)
643
+ y_index[i] = active_size_i[i];
644
+ break;
645
+ }
646
+ active_size_i[i]--;
647
+ }
648
+ }
649
+ }
650
+
651
+ if(active_size_i[i] <= 1)
652
+ {
653
+ active_size--;
654
+ swap(index[s], index[active_size]);
655
+ s--;
656
+ continue;
657
+ }
658
+
659
+ if(maxG-minG <= 1e-12)
660
+ continue;
661
+ else
662
+ stopping = max(maxG - minG, stopping);
663
+
664
+ for(m=0;m<active_size_i[i];m++)
665
+ B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
666
+
667
+ solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
668
+ int nz_d = 0;
669
+ for(m=0;m<active_size_i[i];m++)
670
+ {
671
+ double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
672
+ alpha_i[alpha_index_i[m]] = alpha_new[m];
673
+ if(fabs(d) >= 1e-12)
674
+ {
675
+ d_ind[nz_d] = alpha_index_i[m];
676
+ d_val[nz_d] = d;
677
+ nz_d++;
678
+ }
679
+ }
680
+
681
+ xi = prob->x[i];
682
+ while(xi->index != -1)
683
+ {
684
+ double *w_i = &w[(xi->index-1)*nr_class];
685
+ for(m=0;m<nz_d;m++)
686
+ w_i[d_ind[m]] += d_val[m]*xi->value;
687
+ xi++;
688
+ }
689
+ }
690
+ }
691
+
692
+ iter++;
693
+ if(iter % 10 == 0)
694
+ {
695
+ info(".");
696
+ }
697
+
698
+ if(stopping < eps_shrink)
699
+ {
700
+ if(stopping < eps && start_from_all == true)
701
+ break;
702
+ else
703
+ {
704
+ active_size = l;
705
+ for(i=0;i<l;i++)
706
+ active_size_i[i] = nr_class;
707
+ info("*");
708
+ eps_shrink = max(eps_shrink/2, eps);
709
+ start_from_all = true;
710
+ }
711
+ }
712
+ else
713
+ start_from_all = false;
714
+ }
715
+
716
+ info("\noptimization finished, #iter = %d\n",iter);
717
+ if (iter >= max_iter)
718
+ info("\nWARNING: reaching max number of iterations\n");
719
+
720
+ // calculate objective value
721
+ double v = 0;
722
+ int nSV = 0;
723
+ for(i=0;i<w_size*nr_class;i++)
724
+ v += w[i]*w[i];
725
+ v = 0.5*v;
726
+ for(i=0;i<l*nr_class;i++)
727
+ {
728
+ v += alpha[i];
729
+ if(fabs(alpha[i]) > 0)
730
+ nSV++;
731
+ }
732
+ for(i=0;i<l;i++)
733
+ v -= alpha[i*nr_class+(int)prob->y[i]];
734
+ info("Objective value = %lf\n",v);
735
+ info("nSV = %d\n",nSV);
736
+
737
+ delete [] alpha;
738
+ delete [] alpha_new;
739
+ delete [] index;
740
+ delete [] QD;
741
+ delete [] d_ind;
742
+ delete [] d_val;
743
+ delete [] alpha_index;
744
+ delete [] y_index;
745
+ delete [] active_size_i;
746
+ }
747
+
748
+ // A coordinate descent algorithm for
749
+ // L1-loss and L2-loss SVM dual problems
750
+ //
751
+ // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
752
+ // s.t. 0 <= \alpha_i <= upper_bound_i,
753
+ //
754
+ // where Qij = yi yj xi^T xj and
755
+ // D is a diagonal matrix
756
+ //
757
+ // In L1-SVM case:
758
+ // upper_bound_i = Cp if y_i = 1
759
+ // upper_bound_i = Cn if y_i = -1
760
+ // D_ii = 0
761
+ // In L2-SVM case:
762
+ // upper_bound_i = INF
763
+ // D_ii = 1/(2*Cp) if y_i = 1
764
+ // D_ii = 1/(2*Cn) if y_i = -1
765
+ //
766
+ // Given:
767
+ // x, y, Cp, Cn
768
+ // eps is the stopping tolerance
769
+ //
770
+ // solution will be put in w
771
+ //
772
+ // See Algorithm 3 of Hsieh et al., ICML 2008
773
+
774
+ #undef GETI
775
+ #define GETI(i) (y[i]+1)
776
+ // To support weights for instances, use GETI(i) (i)
777
+
778
+ static void solve_l2r_l1l2_svc(
779
+ const problem *prob, double *w, double eps,
780
+ double Cp, double Cn, int solver_type)
781
+ {
782
+ int l = prob->l;
783
+ int w_size = prob->n;
784
+ int i, s, iter = 0;
785
+ double C, d, G;
786
+ double *QD = new double[l];
787
+ int max_iter = 1000;
788
+ int *index = new int[l];
789
+ double *alpha = new double[l];
790
+ schar *y = new schar[l];
791
+ int active_size = l;
792
+
793
+ // PG: projected gradient, for shrinking and stopping
794
+ double PG;
795
+ double PGmax_old = INF;
796
+ double PGmin_old = -INF;
797
+ double PGmax_new, PGmin_new;
798
+
799
+ // default solver_type: L2R_L2LOSS_SVC_DUAL
800
+ double diag[3] = {0.5/Cn, 0, 0.5/Cp};
801
+ double upper_bound[3] = {INF, 0, INF};
802
+ if(solver_type == L2R_L1LOSS_SVC_DUAL)
803
+ {
804
+ diag[0] = 0;
805
+ diag[2] = 0;
806
+ upper_bound[0] = Cn;
807
+ upper_bound[2] = Cp;
808
+ }
809
+
810
+ for(i=0; i<l; i++)
811
+ {
812
+ if(prob->y[i] > 0)
813
+ {
814
+ y[i] = +1;
815
+ }
816
+ else
817
+ {
818
+ y[i] = -1;
819
+ }
820
+ }
821
+
822
+ // Initial alpha can be set here. Note that
823
+ // 0 <= alpha[i] <= upper_bound[GETI(i)]
824
+ for(i=0; i<l; i++)
825
+ alpha[i] = 0;
826
+
827
+ for(i=0; i<w_size; i++)
828
+ w[i] = 0;
829
+ for(i=0; i<l; i++)
830
+ {
831
+ QD[i] = diag[GETI(i)];
832
+
833
+ feature_node *xi = prob->x[i];
834
+ while (xi->index != -1)
835
+ {
836
+ double val = xi->value;
837
+ QD[i] += val*val;
838
+ w[xi->index-1] += y[i]*alpha[i]*val;
839
+ xi++;
840
+ }
841
+ index[i] = i;
842
+ }
843
+
844
+ while (iter < max_iter)
845
+ {
846
+ PGmax_new = -INF;
847
+ PGmin_new = INF;
848
+
849
+ for (i=0; i<active_size; i++)
850
+ {
851
+ int j = i+rand()%(active_size-i);
852
+ swap(index[i], index[j]);
853
+ }
854
+
855
+ for (s=0; s<active_size; s++)
856
+ {
857
+ i = index[s];
858
+ G = 0;
859
+ schar yi = y[i];
860
+
861
+ feature_node *xi = prob->x[i];
862
+ while(xi->index!= -1)
863
+ {
864
+ G += w[xi->index-1]*(xi->value);
865
+ xi++;
866
+ }
867
+ G = G*yi-1;
868
+
869
+ C = upper_bound[GETI(i)];
870
+ G += alpha[i]*diag[GETI(i)];
871
+
872
+ PG = 0;
873
+ if (alpha[i] == 0)
874
+ {
875
+ if (G > PGmax_old)
876
+ {
877
+ active_size--;
878
+ swap(index[s], index[active_size]);
879
+ s--;
880
+ continue;
881
+ }
882
+ else if (G < 0)
883
+ PG = G;
884
+ }
885
+ else if (alpha[i] == C)
886
+ {
887
+ if (G < PGmin_old)
888
+ {
889
+ active_size--;
890
+ swap(index[s], index[active_size]);
891
+ s--;
892
+ continue;
893
+ }
894
+ else if (G > 0)
895
+ PG = G;
896
+ }
897
+ else
898
+ PG = G;
899
+
900
+ PGmax_new = max(PGmax_new, PG);
901
+ PGmin_new = min(PGmin_new, PG);
902
+
903
+ if(fabs(PG) > 1.0e-12)
904
+ {
905
+ double alpha_old = alpha[i];
906
+ alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
907
+ d = (alpha[i] - alpha_old)*yi;
908
+ xi = prob->x[i];
909
+ while (xi->index != -1)
910
+ {
911
+ w[xi->index-1] += d*xi->value;
912
+ xi++;
913
+ }
914
+ }
915
+ }
916
+
917
+ iter++;
918
+ if(iter % 10 == 0)
919
+ info(".");
920
+
921
+ if(PGmax_new - PGmin_new <= eps)
922
+ {
923
+ if(active_size == l)
924
+ break;
925
+ else
926
+ {
927
+ active_size = l;
928
+ info("*");
929
+ PGmax_old = INF;
930
+ PGmin_old = -INF;
931
+ continue;
932
+ }
933
+ }
934
+ PGmax_old = PGmax_new;
935
+ PGmin_old = PGmin_new;
936
+ if (PGmax_old <= 0)
937
+ PGmax_old = INF;
938
+ if (PGmin_old >= 0)
939
+ PGmin_old = -INF;
940
+ }
941
+
942
+ info("\noptimization finished, #iter = %d\n",iter);
943
+ if (iter >= max_iter)
944
+ info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
945
+
946
+ // calculate objective value
947
+
948
+ double v = 0;
949
+ int nSV = 0;
950
+ for(i=0; i<w_size; i++)
951
+ v += w[i]*w[i];
952
+ for(i=0; i<l; i++)
953
+ {
954
+ v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
955
+ if(alpha[i] > 0)
956
+ ++nSV;
957
+ }
958
+ info("Objective value = %lf\n",v/2);
959
+ info("nSV = %d\n",nSV);
960
+
961
+ delete [] QD;
962
+ delete [] alpha;
963
+ delete [] y;
964
+ delete [] index;
965
+ }
966
+
967
+
968
+ // A coordinate descent algorithm for
969
+ // L1-loss and L2-loss epsilon-SVR dual problem
970
+ //
971
+ // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
972
+ // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
973
+ //
974
+ // where Qij = xi^T xj and
975
+ // D is a diagonal matrix
976
+ //
977
+ // In L1-SVM case:
978
+ // upper_bound_i = C
979
+ // lambda_i = 0
980
+ // In L2-SVM case:
981
+ // upper_bound_i = INF
982
+ // lambda_i = 1/(2*C)
983
+ //
984
+ // Given:
985
+ // x, y, p, C
986
+ // eps is the stopping tolerance
987
+ //
988
+ // solution will be put in w
989
+ //
990
+ // See Algorithm 4 of Ho and Lin, 2012
991
+
992
+ #undef GETI
993
+ #define GETI(i) (0)
994
+ // To support weights for instances, use GETI(i) (i)
995
+
996
+ static void solve_l2r_l1l2_svr(
997
+ const problem *prob, double *w, const parameter *param,
998
+ int solver_type)
999
+ {
1000
+ int l = prob->l;
1001
+ double C = param->C;
1002
+ double p = param->p;
1003
+ int w_size = prob->n;
1004
+ double eps = param->eps;
1005
+ int i, s, iter = 0;
1006
+ int max_iter = 1000;
1007
+ int active_size = l;
1008
+ int *index = new int[l];
1009
+
1010
+ double d, G, H;
1011
+ double Gmax_old = INF;
1012
+ double Gmax_new, Gnorm1_new;
1013
+ double Gnorm1_init;
1014
+ double *beta = new double[l];
1015
+ double *QD = new double[l];
1016
+ double *y = prob->y;
1017
+
1018
+ // L2R_L2LOSS_SVR_DUAL
1019
+ double lambda[1], upper_bound[1];
1020
+ lambda[0] = 0.5/C;
1021
+ upper_bound[0] = INF;
1022
+
1023
+ if(solver_type == L2R_L1LOSS_SVR_DUAL)
1024
+ {
1025
+ lambda[0] = 0;
1026
+ upper_bound[0] = C;
1027
+ }
1028
+
1029
+ // Initial beta can be set here. Note that
1030
+ // -upper_bound <= beta[i] <= upper_bound
1031
+ for(i=0; i<l; i++)
1032
+ beta[i] = 0;
1033
+
1034
+ for(i=0; i<w_size; i++)
1035
+ w[i] = 0;
1036
+ for(i=0; i<l; i++)
1037
+ {
1038
+ QD[i] = 0;
1039
+ feature_node *xi = prob->x[i];
1040
+ while(xi->index != -1)
1041
+ {
1042
+ double val = xi->value;
1043
+ QD[i] += val*val;
1044
+ w[xi->index-1] += beta[i]*val;
1045
+ xi++;
1046
+ }
1047
+
1048
+ index[i] = i;
1049
+ }
1050
+
1051
+
1052
+ while(iter < max_iter)
1053
+ {
1054
+ Gmax_new = 0;
1055
+ Gnorm1_new = 0;
1056
+
1057
+ for(i=0; i<active_size; i++)
1058
+ {
1059
+ int j = i+rand()%(active_size-i);
1060
+ swap(index[i], index[j]);
1061
+ }
1062
+
1063
+ for(s=0; s<active_size; s++)
1064
+ {
1065
+ i = index[s];
1066
+ G = -y[i] + lambda[GETI(i)]*beta[i];
1067
+ H = QD[i] + lambda[GETI(i)];
1068
+
1069
+ feature_node *xi = prob->x[i];
1070
+ while(xi->index != -1)
1071
+ {
1072
+ int ind = xi->index-1;
1073
+ double val = xi->value;
1074
+ G += val*w[ind];
1075
+ xi++;
1076
+ }
1077
+
1078
+ double Gp = G+p;
1079
+ double Gn = G-p;
1080
+ double violation = 0;
1081
+ if(beta[i] == 0)
1082
+ {
1083
+ if(Gp < 0)
1084
+ violation = -Gp;
1085
+ else if(Gn > 0)
1086
+ violation = Gn;
1087
+ else if(Gp>Gmax_old && Gn<-Gmax_old)
1088
+ {
1089
+ active_size--;
1090
+ swap(index[s], index[active_size]);
1091
+ s--;
1092
+ continue;
1093
+ }
1094
+ }
1095
+ else if(beta[i] >= upper_bound[GETI(i)])
1096
+ {
1097
+ if(Gp > 0)
1098
+ violation = Gp;
1099
+ else if(Gp < -Gmax_old)
1100
+ {
1101
+ active_size--;
1102
+ swap(index[s], index[active_size]);
1103
+ s--;
1104
+ continue;
1105
+ }
1106
+ }
1107
+ else if(beta[i] <= -upper_bound[GETI(i)])
1108
+ {
1109
+ if(Gn < 0)
1110
+ violation = -Gn;
1111
+ else if(Gn > Gmax_old)
1112
+ {
1113
+ active_size--;
1114
+ swap(index[s], index[active_size]);
1115
+ s--;
1116
+ continue;
1117
+ }
1118
+ }
1119
+ else if(beta[i] > 0)
1120
+ violation = fabs(Gp);
1121
+ else
1122
+ violation = fabs(Gn);
1123
+
1124
+ Gmax_new = max(Gmax_new, violation);
1125
+ Gnorm1_new += violation;
1126
+
1127
+ // obtain Newton direction d
1128
+ if(Gp < H*beta[i])
1129
+ d = -Gp/H;
1130
+ else if(Gn > H*beta[i])
1131
+ d = -Gn/H;
1132
+ else
1133
+ d = -beta[i];
1134
+
1135
+ if(fabs(d) < 1.0e-12)
1136
+ continue;
1137
+
1138
+ double beta_old = beta[i];
1139
+ beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1140
+ d = beta[i]-beta_old;
1141
+
1142
+ if(d != 0)
1143
+ {
1144
+ xi = prob->x[i];
1145
+ while(xi->index != -1)
1146
+ {
1147
+ w[xi->index-1] += d*xi->value;
1148
+ xi++;
1149
+ }
1150
+ }
1151
+ }
1152
+
1153
+ if(iter == 0)
1154
+ Gnorm1_init = Gnorm1_new;
1155
+ iter++;
1156
+ if(iter % 10 == 0)
1157
+ info(".");
1158
+
1159
+ if(Gnorm1_new <= eps*Gnorm1_init)
1160
+ {
1161
+ if(active_size == l)
1162
+ break;
1163
+ else
1164
+ {
1165
+ active_size = l;
1166
+ info("*");
1167
+ Gmax_old = INF;
1168
+ continue;
1169
+ }
1170
+ }
1171
+
1172
+ Gmax_old = Gmax_new;
1173
+ }
1174
+
1175
+ info("\noptimization finished, #iter = %d\n", iter);
1176
+ if(iter >= max_iter)
1177
+ info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1178
+
1179
+ // calculate objective value
1180
+ double v = 0;
1181
+ int nSV = 0;
1182
+ for(i=0; i<w_size; i++)
1183
+ v += w[i]*w[i];
1184
+ v = 0.5*v;
1185
+ for(i=0; i<l; i++)
1186
+ {
1187
+ v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1188
+ if(beta[i] != 0)
1189
+ nSV++;
1190
+ }
1191
+
1192
+ info("Objective value = %lf\n", v);
1193
+ info("nSV = %d\n",nSV);
1194
+
1195
+ delete [] beta;
1196
+ delete [] QD;
1197
+ delete [] index;
1198
+ }
1199
+
1200
+
1201
+ // A coordinate descent algorithm for
1202
+ // the dual of L2-regularized logistic regression problems
1203
+ //
1204
+ // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1205
+ // s.t. 0 <= \alpha_i <= upper_bound_i,
1206
+ //
1207
+ // where Qij = yi yj xi^T xj and
1208
+ // upper_bound_i = Cp if y_i = 1
1209
+ // upper_bound_i = Cn if y_i = -1
1210
+ //
1211
+ // Given:
1212
+ // x, y, Cp, Cn
1213
+ // eps is the stopping tolerance
1214
+ //
1215
+ // solution will be put in w
1216
+ //
1217
+ // See Algorithm 5 of Yu et al., MLJ 2010
1218
+
1219
+ #undef GETI
1220
+ #define GETI(i) (y[i]+1)
1221
+ // To support weights for instances, use GETI(i) (i)
1222
+
1223
+ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1224
+ {
1225
+ int l = prob->l;
1226
+ int w_size = prob->n;
1227
+ int i, s, iter = 0;
1228
+ double *xTx = new double[l];
1229
+ int max_iter = 1000;
1230
+ int *index = new int[l];
1231
+ double *alpha = new double[2*l]; // store alpha and C - alpha
1232
+ schar *y = new schar[l];
1233
+ int max_inner_iter = 100; // for inner Newton
1234
+ double innereps = 1e-2;
1235
+ double innereps_min = min(1e-8, eps);
1236
+ double upper_bound[3] = {Cn, 0, Cp};
1237
+
1238
+ for(i=0; i<l; i++)
1239
+ {
1240
+ if(prob->y[i] > 0)
1241
+ {
1242
+ y[i] = +1;
1243
+ }
1244
+ else
1245
+ {
1246
+ y[i] = -1;
1247
+ }
1248
+ }
1249
+
1250
+ // Initial alpha can be set here. Note that
1251
+ // 0 < alpha[i] < upper_bound[GETI(i)]
1252
+ // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1253
+ for(i=0; i<l; i++)
1254
+ {
1255
+ alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1256
+ alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1257
+ }
1258
+
1259
+ for(i=0; i<w_size; i++)
1260
+ w[i] = 0;
1261
+ for(i=0; i<l; i++)
1262
+ {
1263
+ xTx[i] = 0;
1264
+ feature_node *xi = prob->x[i];
1265
+ while (xi->index != -1)
1266
+ {
1267
+ double val = xi->value;
1268
+ xTx[i] += val*val;
1269
+ w[xi->index-1] += y[i]*alpha[2*i]*val;
1270
+ xi++;
1271
+ }
1272
+ index[i] = i;
1273
+ }
1274
+
1275
+ while (iter < max_iter)
1276
+ {
1277
+ for (i=0; i<l; i++)
1278
+ {
1279
+ int j = i+rand()%(l-i);
1280
+ swap(index[i], index[j]);
1281
+ }
1282
+ int newton_iter = 0;
1283
+ double Gmax = 0;
1284
+ for (s=0; s<l; s++)
1285
+ {
1286
+ i = index[s];
1287
+ schar yi = y[i];
1288
+ double C = upper_bound[GETI(i)];
1289
+ double ywTx = 0, xisq = xTx[i];
1290
+ feature_node *xi = prob->x[i];
1291
+ while (xi->index != -1)
1292
+ {
1293
+ ywTx += w[xi->index-1]*xi->value;
1294
+ xi++;
1295
+ }
1296
+ ywTx *= y[i];
1297
+ double a = xisq, b = ywTx;
1298
+
1299
+ // Decide to minimize g_1(z) or g_2(z)
1300
+ int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1301
+ if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1302
+ {
1303
+ ind1 = 2*i+1;
1304
+ ind2 = 2*i;
1305
+ sign = -1;
1306
+ }
1307
+
1308
+ // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1309
+ double alpha_old = alpha[ind1];
1310
+ double z = alpha_old;
1311
+ if(C - z < 0.5 * C)
1312
+ z = 0.1*z;
1313
+ double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1314
+ Gmax = max(Gmax, fabs(gp));
1315
+
1316
+ // Newton method on the sub-problem
1317
+ const double eta = 0.1; // xi in the paper
1318
+ int inner_iter = 0;
1319
+ while (inner_iter <= max_inner_iter)
1320
+ {
1321
+ if(fabs(gp) < innereps)
1322
+ break;
1323
+ double gpp = a + C/(C-z)/z;
1324
+ double tmpz = z - gp/gpp;
1325
+ if(tmpz <= 0)
1326
+ z *= eta;
1327
+ else // tmpz in (0, C)
1328
+ z = tmpz;
1329
+ gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1330
+ newton_iter++;
1331
+ inner_iter++;
1332
+ }
1333
+
1334
+ if(inner_iter > 0) // update w
1335
+ {
1336
+ alpha[ind1] = z;
1337
+ alpha[ind2] = C-z;
1338
+ xi = prob->x[i];
1339
+ while (xi->index != -1)
1340
+ {
1341
+ w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
1342
+ xi++;
1343
+ }
1344
+ }
1345
+ }
1346
+
1347
+ iter++;
1348
+ if(iter % 10 == 0)
1349
+ info(".");
1350
+
1351
+ if(Gmax < eps)
1352
+ break;
1353
+
1354
+ if(newton_iter <= l/10)
1355
+ innereps = max(innereps_min, 0.1*innereps);
1356
+
1357
+ }
1358
+
1359
+ info("\noptimization finished, #iter = %d\n",iter);
1360
+ if (iter >= max_iter)
1361
+ info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1362
+
1363
+ // calculate objective value
1364
+
1365
+ double v = 0;
1366
+ for(i=0; i<w_size; i++)
1367
+ v += w[i] * w[i];
1368
+ v *= 0.5;
1369
+ for(i=0; i<l; i++)
1370
+ v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1371
+ - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1372
+ info("Objective value = %lf\n", v);
1373
+
1374
+ delete [] xTx;
1375
+ delete [] alpha;
1376
+ delete [] y;
1377
+ delete [] index;
1378
+ }
1379
+
1380
+ // A coordinate descent algorithm for
1381
+ // L1-regularized L2-loss support vector classification
1382
+ //
1383
+ // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1384
+ //
1385
+ // Given:
1386
+ // x, y, Cp, Cn
1387
+ // eps is the stopping tolerance
1388
+ //
1389
+ // solution will be put in w
1390
+ //
1391
+ // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1392
+
1393
+ #undef GETI
1394
+ #define GETI(i) (y[i]+1)
1395
+ // To support weights for instances, use GETI(i) (i)
1396
+
1397
+ static void solve_l1r_l2_svc(
1398
+ problem *prob_col, double *w, double eps,
1399
+ double Cp, double Cn)
1400
+ {
1401
+ int l = prob_col->l;
1402
+ int w_size = prob_col->n;
1403
+ int j, s, iter = 0;
1404
+ int max_iter = 1000;
1405
+ int active_size = w_size;
1406
+ int max_num_linesearch = 20;
1407
+
1408
+ double sigma = 0.01;
1409
+ double d, G_loss, G, H;
1410
+ double Gmax_old = INF;
1411
+ double Gmax_new, Gnorm1_new;
1412
+ double Gnorm1_init;
1413
+ double d_old, d_diff;
1414
+ double loss_old, loss_new;
1415
+ double appxcond, cond;
1416
+
1417
+ int *index = new int[w_size];
1418
+ schar *y = new schar[l];
1419
+ double *b = new double[l]; // b = 1-ywTx
1420
+ double *xj_sq = new double[w_size];
1421
+ feature_node *x;
1422
+
1423
+ double C[3] = {Cn,0,Cp};
1424
+
1425
+ // Initial w can be set here.
1426
+ for(j=0; j<w_size; j++)
1427
+ w[j] = 0;
1428
+
1429
+ for(j=0; j<l; j++)
1430
+ {
1431
+ b[j] = 1;
1432
+ if(prob_col->y[j] > 0)
1433
+ y[j] = 1;
1434
+ else
1435
+ y[j] = -1;
1436
+ }
1437
+ for(j=0; j<w_size; j++)
1438
+ {
1439
+ index[j] = j;
1440
+ xj_sq[j] = 0;
1441
+ x = prob_col->x[j];
1442
+ while(x->index != -1)
1443
+ {
1444
+ int ind = x->index-1;
1445
+ x->value *= y[ind]; // x->value stores yi*xij
1446
+ double val = x->value;
1447
+ b[ind] -= w[j]*val;
1448
+ xj_sq[j] += C[GETI(ind)]*val*val;
1449
+ x++;
1450
+ }
1451
+ }
1452
+
1453
+ while(iter < max_iter)
1454
+ {
1455
+ Gmax_new = 0;
1456
+ Gnorm1_new = 0;
1457
+
1458
+ for(j=0; j<active_size; j++)
1459
+ {
1460
+ int i = j+rand()%(active_size-j);
1461
+ swap(index[i], index[j]);
1462
+ }
1463
+
1464
+ for(s=0; s<active_size; s++)
1465
+ {
1466
+ j = index[s];
1467
+ G_loss = 0;
1468
+ H = 0;
1469
+
1470
+ x = prob_col->x[j];
1471
+ while(x->index != -1)
1472
+ {
1473
+ int ind = x->index-1;
1474
+ if(b[ind] > 0)
1475
+ {
1476
+ double val = x->value;
1477
+ double tmp = C[GETI(ind)]*val;
1478
+ G_loss -= tmp*b[ind];
1479
+ H += tmp*val;
1480
+ }
1481
+ x++;
1482
+ }
1483
+ G_loss *= 2;
1484
+
1485
+ G = G_loss;
1486
+ H *= 2;
1487
+ H = max(H, 1e-12);
1488
+
1489
+ double Gp = G+1;
1490
+ double Gn = G-1;
1491
+ double violation = 0;
1492
+ if(w[j] == 0)
1493
+ {
1494
+ if(Gp < 0)
1495
+ violation = -Gp;
1496
+ else if(Gn > 0)
1497
+ violation = Gn;
1498
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1499
+ {
1500
+ active_size--;
1501
+ swap(index[s], index[active_size]);
1502
+ s--;
1503
+ continue;
1504
+ }
1505
+ }
1506
+ else if(w[j] > 0)
1507
+ violation = fabs(Gp);
1508
+ else
1509
+ violation = fabs(Gn);
1510
+
1511
+ Gmax_new = max(Gmax_new, violation);
1512
+ Gnorm1_new += violation;
1513
+
1514
+ // obtain Newton direction d
1515
+ if(Gp < H*w[j])
1516
+ d = -Gp/H;
1517
+ else if(Gn > H*w[j])
1518
+ d = -Gn/H;
1519
+ else
1520
+ d = -w[j];
1521
+
1522
+ if(fabs(d) < 1.0e-12)
1523
+ continue;
1524
+
1525
+ double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1526
+ d_old = 0;
1527
+ int num_linesearch;
1528
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1529
+ {
1530
+ d_diff = d_old - d;
1531
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1532
+
1533
+ appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1534
+ if(appxcond <= 0)
1535
+ {
1536
+ x = prob_col->x[j];
1537
+ while(x->index != -1)
1538
+ {
1539
+ b[x->index-1] += d_diff*x->value;
1540
+ x++;
1541
+ }
1542
+ break;
1543
+ }
1544
+
1545
+ if(num_linesearch == 0)
1546
+ {
1547
+ loss_old = 0;
1548
+ loss_new = 0;
1549
+ x = prob_col->x[j];
1550
+ while(x->index != -1)
1551
+ {
1552
+ int ind = x->index-1;
1553
+ if(b[ind] > 0)
1554
+ loss_old += C[GETI(ind)]*b[ind]*b[ind];
1555
+ double b_new = b[ind] + d_diff*x->value;
1556
+ b[ind] = b_new;
1557
+ if(b_new > 0)
1558
+ loss_new += C[GETI(ind)]*b_new*b_new;
1559
+ x++;
1560
+ }
1561
+ }
1562
+ else
1563
+ {
1564
+ loss_new = 0;
1565
+ x = prob_col->x[j];
1566
+ while(x->index != -1)
1567
+ {
1568
+ int ind = x->index-1;
1569
+ double b_new = b[ind] + d_diff*x->value;
1570
+ b[ind] = b_new;
1571
+ if(b_new > 0)
1572
+ loss_new += C[GETI(ind)]*b_new*b_new;
1573
+ x++;
1574
+ }
1575
+ }
1576
+
1577
+ cond = cond + loss_new - loss_old;
1578
+ if(cond <= 0)
1579
+ break;
1580
+ else
1581
+ {
1582
+ d_old = d;
1583
+ d *= 0.5;
1584
+ delta *= 0.5;
1585
+ }
1586
+ }
1587
+
1588
+ w[j] += d;
1589
+
1590
+ // recompute b[] if line search takes too many steps
1591
+ if(num_linesearch >= max_num_linesearch)
1592
+ {
1593
+ info("#");
1594
+ for(int i=0; i<l; i++)
1595
+ b[i] = 1;
1596
+
1597
+ for(int i=0; i<w_size; i++)
1598
+ {
1599
+ if(w[i]==0) continue;
1600
+ x = prob_col->x[i];
1601
+ while(x->index != -1)
1602
+ {
1603
+ b[x->index-1] -= w[i]*x->value;
1604
+ x++;
1605
+ }
1606
+ }
1607
+ }
1608
+ }
1609
+
1610
+ if(iter == 0)
1611
+ Gnorm1_init = Gnorm1_new;
1612
+ iter++;
1613
+ if(iter % 10 == 0)
1614
+ info(".");
1615
+
1616
+ if(Gnorm1_new <= eps*Gnorm1_init)
1617
+ {
1618
+ if(active_size == w_size)
1619
+ break;
1620
+ else
1621
+ {
1622
+ active_size = w_size;
1623
+ info("*");
1624
+ Gmax_old = INF;
1625
+ continue;
1626
+ }
1627
+ }
1628
+
1629
+ Gmax_old = Gmax_new;
1630
+ }
1631
+
1632
+ info("\noptimization finished, #iter = %d\n", iter);
1633
+ if(iter >= max_iter)
1634
+ info("\nWARNING: reaching max number of iterations\n");
1635
+
1636
+ // calculate objective value
1637
+
1638
+ double v = 0;
1639
+ int nnz = 0;
1640
+ for(j=0; j<w_size; j++)
1641
+ {
1642
+ x = prob_col->x[j];
1643
+ while(x->index != -1)
1644
+ {
1645
+ x->value *= prob_col->y[x->index-1]; // restore x->value
1646
+ x++;
1647
+ }
1648
+ if(w[j] != 0)
1649
+ {
1650
+ v += fabs(w[j]);
1651
+ nnz++;
1652
+ }
1653
+ }
1654
+ for(j=0; j<l; j++)
1655
+ if(b[j] > 0)
1656
+ v += C[GETI(j)]*b[j]*b[j];
1657
+
1658
+ info("Objective value = %lf\n", v);
1659
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1660
+
1661
+ delete [] index;
1662
+ delete [] y;
1663
+ delete [] b;
1664
+ delete [] xj_sq;
1665
+ }
1666
+
1667
+ // A coordinate descent algorithm for
1668
+ // L1-regularized logistic regression problems
1669
+ //
1670
+ // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1671
+ //
1672
+ // Given:
1673
+ // x, y, Cp, Cn
1674
+ // eps is the stopping tolerance
1675
+ //
1676
+ // solution will be put in w
1677
+ //
1678
+ // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1679
+
1680
+ #undef GETI
1681
+ #define GETI(i) (y[i]+1)
1682
+ // To support weights for instances, use GETI(i) (i)
1683
+
1684
+ static void solve_l1r_lr(
1685
+ const problem *prob_col, double *w, double eps,
1686
+ double Cp, double Cn)
1687
+ {
1688
+ int l = prob_col->l;
1689
+ int w_size = prob_col->n;
1690
+ int j, s, newton_iter=0, iter=0;
1691
+ int max_newton_iter = 100;
1692
+ int max_iter = 1000;
1693
+ int max_num_linesearch = 20;
1694
+ int active_size;
1695
+ int QP_active_size;
1696
+
1697
+ double nu = 1e-12;
1698
+ double inner_eps = 1;
1699
+ double sigma = 0.01;
1700
+ double w_norm, w_norm_new;
1701
+ double z, G, H;
1702
+ double Gnorm1_init;
1703
+ double Gmax_old = INF;
1704
+ double Gmax_new, Gnorm1_new;
1705
+ double QP_Gmax_old = INF;
1706
+ double QP_Gmax_new, QP_Gnorm1_new;
1707
+ double delta, negsum_xTd, cond;
1708
+
1709
+ int *index = new int[w_size];
1710
+ schar *y = new schar[l];
1711
+ double *Hdiag = new double[w_size];
1712
+ double *Grad = new double[w_size];
1713
+ double *wpd = new double[w_size];
1714
+ double *xjneg_sum = new double[w_size];
1715
+ double *xTd = new double[l];
1716
+ double *exp_wTx = new double[l];
1717
+ double *exp_wTx_new = new double[l];
1718
+ double *tau = new double[l];
1719
+ double *D = new double[l];
1720
+ feature_node *x;
1721
+
1722
+ double C[3] = {Cn,0,Cp};
1723
+
1724
+ // Initial w can be set here.
1725
+ for(j=0; j<w_size; j++)
1726
+ w[j] = 0;
1727
+
1728
+ for(j=0; j<l; j++)
1729
+ {
1730
+ if(prob_col->y[j] > 0)
1731
+ y[j] = 1;
1732
+ else
1733
+ y[j] = -1;
1734
+
1735
+ exp_wTx[j] = 0;
1736
+ }
1737
+
1738
+ w_norm = 0;
1739
+ for(j=0; j<w_size; j++)
1740
+ {
1741
+ w_norm += fabs(w[j]);
1742
+ wpd[j] = w[j];
1743
+ index[j] = j;
1744
+ xjneg_sum[j] = 0;
1745
+ x = prob_col->x[j];
1746
+ while(x->index != -1)
1747
+ {
1748
+ int ind = x->index-1;
1749
+ double val = x->value;
1750
+ exp_wTx[ind] += w[j]*val;
1751
+ if(y[ind] == -1)
1752
+ xjneg_sum[j] += C[GETI(ind)]*val;
1753
+ x++;
1754
+ }
1755
+ }
1756
+ for(j=0; j<l; j++)
1757
+ {
1758
+ exp_wTx[j] = exp(exp_wTx[j]);
1759
+ double tau_tmp = 1/(1+exp_wTx[j]);
1760
+ tau[j] = C[GETI(j)]*tau_tmp;
1761
+ D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1762
+ }
1763
+
1764
+ while(newton_iter < max_newton_iter)
1765
+ {
1766
+ Gmax_new = 0;
1767
+ Gnorm1_new = 0;
1768
+ active_size = w_size;
1769
+
1770
+ for(s=0; s<active_size; s++)
1771
+ {
1772
+ j = index[s];
1773
+ Hdiag[j] = nu;
1774
+ Grad[j] = 0;
1775
+
1776
+ double tmp = 0;
1777
+ x = prob_col->x[j];
1778
+ while(x->index != -1)
1779
+ {
1780
+ int ind = x->index-1;
1781
+ Hdiag[j] += x->value*x->value*D[ind];
1782
+ tmp += x->value*tau[ind];
1783
+ x++;
1784
+ }
1785
+ Grad[j] = -tmp + xjneg_sum[j];
1786
+
1787
+ double Gp = Grad[j]+1;
1788
+ double Gn = Grad[j]-1;
1789
+ double violation = 0;
1790
+ if(w[j] == 0)
1791
+ {
1792
+ if(Gp < 0)
1793
+ violation = -Gp;
1794
+ else if(Gn > 0)
1795
+ violation = Gn;
1796
+ //outer-level shrinking
1797
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1798
+ {
1799
+ active_size--;
1800
+ swap(index[s], index[active_size]);
1801
+ s--;
1802
+ continue;
1803
+ }
1804
+ }
1805
+ else if(w[j] > 0)
1806
+ violation = fabs(Gp);
1807
+ else
1808
+ violation = fabs(Gn);
1809
+
1810
+ Gmax_new = max(Gmax_new, violation);
1811
+ Gnorm1_new += violation;
1812
+ }
1813
+
1814
+ if(newton_iter == 0)
1815
+ Gnorm1_init = Gnorm1_new;
1816
+
1817
+ if(Gnorm1_new <= eps*Gnorm1_init)
1818
+ break;
1819
+
1820
+ iter = 0;
1821
+ QP_Gmax_old = INF;
1822
+ QP_active_size = active_size;
1823
+
1824
+ for(int i=0; i<l; i++)
1825
+ xTd[i] = 0;
1826
+
1827
+ // optimize QP over wpd
1828
+ while(iter < max_iter)
1829
+ {
1830
+ QP_Gmax_new = 0;
1831
+ QP_Gnorm1_new = 0;
1832
+
1833
+ for(j=0; j<QP_active_size; j++)
1834
+ {
1835
+ int i = j+rand()%(QP_active_size-j);
1836
+ swap(index[i], index[j]);
1837
+ }
1838
+
1839
+ for(s=0; s<QP_active_size; s++)
1840
+ {
1841
+ j = index[s];
1842
+ H = Hdiag[j];
1843
+
1844
+ x = prob_col->x[j];
1845
+ G = Grad[j] + (wpd[j]-w[j])*nu;
1846
+ while(x->index != -1)
1847
+ {
1848
+ int ind = x->index-1;
1849
+ G += x->value*D[ind]*xTd[ind];
1850
+ x++;
1851
+ }
1852
+
1853
+ double Gp = G+1;
1854
+ double Gn = G-1;
1855
+ double violation = 0;
1856
+ if(wpd[j] == 0)
1857
+ {
1858
+ if(Gp < 0)
1859
+ violation = -Gp;
1860
+ else if(Gn > 0)
1861
+ violation = Gn;
1862
+ //inner-level shrinking
1863
+ else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1864
+ {
1865
+ QP_active_size--;
1866
+ swap(index[s], index[QP_active_size]);
1867
+ s--;
1868
+ continue;
1869
+ }
1870
+ }
1871
+ else if(wpd[j] > 0)
1872
+ violation = fabs(Gp);
1873
+ else
1874
+ violation = fabs(Gn);
1875
+
1876
+ QP_Gmax_new = max(QP_Gmax_new, violation);
1877
+ QP_Gnorm1_new += violation;
1878
+
1879
+ // obtain solution of one-variable problem
1880
+ if(Gp < H*wpd[j])
1881
+ z = -Gp/H;
1882
+ else if(Gn > H*wpd[j])
1883
+ z = -Gn/H;
1884
+ else
1885
+ z = -wpd[j];
1886
+
1887
+ if(fabs(z) < 1.0e-12)
1888
+ continue;
1889
+ z = min(max(z,-10.0),10.0);
1890
+
1891
+ wpd[j] += z;
1892
+
1893
+ x = prob_col->x[j];
1894
+ while(x->index != -1)
1895
+ {
1896
+ int ind = x->index-1;
1897
+ xTd[ind] += x->value*z;
1898
+ x++;
1899
+ }
1900
+ }
1901
+
1902
+ iter++;
1903
+
1904
+ if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1905
+ {
1906
+ //inner stopping
1907
+ if(QP_active_size == active_size)
1908
+ break;
1909
+ //active set reactivation
1910
+ else
1911
+ {
1912
+ QP_active_size = active_size;
1913
+ QP_Gmax_old = INF;
1914
+ continue;
1915
+ }
1916
+ }
1917
+
1918
+ QP_Gmax_old = QP_Gmax_new;
1919
+ }
1920
+
1921
+ if(iter >= max_iter)
1922
+ info("WARNING: reaching max number of inner iterations\n");
1923
+
1924
+ delta = 0;
1925
+ w_norm_new = 0;
1926
+ for(j=0; j<w_size; j++)
1927
+ {
1928
+ delta += Grad[j]*(wpd[j]-w[j]);
1929
+ if(wpd[j] != 0)
1930
+ w_norm_new += fabs(wpd[j]);
1931
+ }
1932
+ delta += (w_norm_new-w_norm);
1933
+
1934
+ negsum_xTd = 0;
1935
+ for(int i=0; i<l; i++)
1936
+ if(y[i] == -1)
1937
+ negsum_xTd += C[GETI(i)]*xTd[i];
1938
+
1939
+ int num_linesearch;
1940
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1941
+ {
1942
+ cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1943
+
1944
+ for(int i=0; i<l; i++)
1945
+ {
1946
+ double exp_xTd = exp(xTd[i]);
1947
+ exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1948
+ cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1949
+ }
1950
+
1951
+ if(cond <= 0)
1952
+ {
1953
+ w_norm = w_norm_new;
1954
+ for(j=0; j<w_size; j++)
1955
+ w[j] = wpd[j];
1956
+ for(int i=0; i<l; i++)
1957
+ {
1958
+ exp_wTx[i] = exp_wTx_new[i];
1959
+ double tau_tmp = 1/(1+exp_wTx[i]);
1960
+ tau[i] = C[GETI(i)]*tau_tmp;
1961
+ D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1962
+ }
1963
+ break;
1964
+ }
1965
+ else
1966
+ {
1967
+ w_norm_new = 0;
1968
+ for(j=0; j<w_size; j++)
1969
+ {
1970
+ wpd[j] = (w[j]+wpd[j])*0.5;
1971
+ if(wpd[j] != 0)
1972
+ w_norm_new += fabs(wpd[j]);
1973
+ }
1974
+ delta *= 0.5;
1975
+ negsum_xTd *= 0.5;
1976
+ for(int i=0; i<l; i++)
1977
+ xTd[i] *= 0.5;
1978
+ }
1979
+ }
1980
+
1981
+ // Recompute some info due to too many line search steps
1982
+ if(num_linesearch >= max_num_linesearch)
1983
+ {
1984
+ for(int i=0; i<l; i++)
1985
+ exp_wTx[i] = 0;
1986
+
1987
+ for(int i=0; i<w_size; i++)
1988
+ {
1989
+ if(w[i]==0) continue;
1990
+ x = prob_col->x[i];
1991
+ while(x->index != -1)
1992
+ {
1993
+ exp_wTx[x->index-1] += w[i]*x->value;
1994
+ x++;
1995
+ }
1996
+ }
1997
+
1998
+ for(int i=0; i<l; i++)
1999
+ exp_wTx[i] = exp(exp_wTx[i]);
2000
+ }
2001
+
2002
+ if(iter == 1)
2003
+ inner_eps *= 0.25;
2004
+
2005
+ newton_iter++;
2006
+ Gmax_old = Gmax_new;
2007
+
2008
+ info("iter %3d #CD cycles %d\n", newton_iter, iter);
2009
+ }
2010
+
2011
+ info("=========================\n");
2012
+ info("optimization finished, #iter = %d\n", newton_iter);
2013
+ if(newton_iter >= max_newton_iter)
2014
+ info("WARNING: reaching max number of iterations\n");
2015
+
2016
+ // calculate objective value
2017
+
2018
+ double v = 0;
2019
+ int nnz = 0;
2020
+ for(j=0; j<w_size; j++)
2021
+ if(w[j] != 0)
2022
+ {
2023
+ v += fabs(w[j]);
2024
+ nnz++;
2025
+ }
2026
+ for(j=0; j<l; j++)
2027
+ if(y[j] == 1)
2028
+ v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2029
+ else
2030
+ v += C[GETI(j)]*log(1+exp_wTx[j]);
2031
+
2032
+ info("Objective value = %lf\n", v);
2033
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2034
+
2035
+ delete [] index;
2036
+ delete [] y;
2037
+ delete [] Hdiag;
2038
+ delete [] Grad;
2039
+ delete [] wpd;
2040
+ delete [] xjneg_sum;
2041
+ delete [] xTd;
2042
+ delete [] exp_wTx;
2043
+ delete [] exp_wTx_new;
2044
+ delete [] tau;
2045
+ delete [] D;
2046
+ }
2047
+
2048
+ // transpose matrix X from row format to column format
2049
+ static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2050
+ {
2051
+ int i;
2052
+ int l = prob->l;
2053
+ int n = prob->n;
2054
+ int nnz = 0;
2055
+ int *col_ptr = new int[n+1];
2056
+ feature_node *x_space;
2057
+ prob_col->l = l;
2058
+ prob_col->n = n;
2059
+ prob_col->y = new double[l];
2060
+ prob_col->x = new feature_node*[n];
2061
+
2062
+ for(i=0; i<l; i++)
2063
+ prob_col->y[i] = prob->y[i];
2064
+
2065
+ for(i=0; i<n+1; i++)
2066
+ col_ptr[i] = 0;
2067
+ for(i=0; i<l; i++)
2068
+ {
2069
+ feature_node *x = prob->x[i];
2070
+ while(x->index != -1)
2071
+ {
2072
+ nnz++;
2073
+ col_ptr[x->index]++;
2074
+ x++;
2075
+ }
2076
+ }
2077
+ for(i=1; i<n+1; i++)
2078
+ col_ptr[i] += col_ptr[i-1] + 1;
2079
+
2080
+ x_space = new feature_node[nnz+n];
2081
+ for(i=0; i<n; i++)
2082
+ prob_col->x[i] = &x_space[col_ptr[i]];
2083
+
2084
+ for(i=0; i<l; i++)
2085
+ {
2086
+ feature_node *x = prob->x[i];
2087
+ while(x->index != -1)
2088
+ {
2089
+ int ind = x->index-1;
2090
+ x_space[col_ptr[ind]].index = i+1; // starts from 1
2091
+ x_space[col_ptr[ind]].value = x->value;
2092
+ col_ptr[ind]++;
2093
+ x++;
2094
+ }
2095
+ }
2096
+ for(i=0; i<n; i++)
2097
+ x_space[col_ptr[i]].index = -1;
2098
+
2099
+ *x_space_ret = x_space;
2100
+
2101
+ delete [] col_ptr;
2102
+ }
2103
+
2104
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2105
+ // perm, length l, must be allocated before calling this subroutine
2106
+ static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2107
+ {
2108
+ int l = prob->l;
2109
+ int max_nr_class = 16;
2110
+ int nr_class = 0;
2111
+ int *label = Malloc(int,max_nr_class);
2112
+ int *count = Malloc(int,max_nr_class);
2113
+ int *data_label = Malloc(int,l);
2114
+ int i;
2115
+
2116
+ for(i=0;i<l;i++)
2117
+ {
2118
+ int this_label = (int)prob->y[i];
2119
+ int j;
2120
+ for(j=0;j<nr_class;j++)
2121
+ {
2122
+ if(this_label == label[j])
2123
+ {
2124
+ ++count[j];
2125
+ break;
2126
+ }
2127
+ }
2128
+ data_label[i] = j;
2129
+ if(j == nr_class)
2130
+ {
2131
+ if(nr_class == max_nr_class)
2132
+ {
2133
+ max_nr_class *= 2;
2134
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
2135
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
2136
+ }
2137
+ label[nr_class] = this_label;
2138
+ count[nr_class] = 1;
2139
+ ++nr_class;
2140
+ }
2141
+ }
2142
+
2143
+ int *start = Malloc(int,nr_class);
2144
+ start[0] = 0;
2145
+ for(i=1;i<nr_class;i++)
2146
+ start[i] = start[i-1]+count[i-1];
2147
+ for(i=0;i<l;i++)
2148
+ {
2149
+ perm[start[data_label[i]]] = i;
2150
+ ++start[data_label[i]];
2151
+ }
2152
+ start[0] = 0;
2153
+ for(i=1;i<nr_class;i++)
2154
+ start[i] = start[i-1]+count[i-1];
2155
+
2156
+ *nr_class_ret = nr_class;
2157
+ *label_ret = label;
2158
+ *start_ret = start;
2159
+ *count_ret = count;
2160
+ free(data_label);
2161
+ }
2162
+
2163
+ static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2164
+ {
2165
+ double eps=param->eps;
2166
+ int pos = 0;
2167
+ int neg = 0;
2168
+ for(int i=0;i<prob->l;i++)
2169
+ if(prob->y[i] > 0)
2170
+ pos++;
2171
+ neg = prob->l - pos;
2172
+
2173
+ double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2174
+
2175
+ function *fun_obj=NULL;
2176
+ switch(param->solver_type)
2177
+ {
2178
+ case L2R_LR:
2179
+ {
2180
+ double *C = new double[prob->l];
2181
+ for(int i = 0; i < prob->l; i++)
2182
+ {
2183
+ if(prob->y[i] > 0)
2184
+ C[i] = Cp;
2185
+ else
2186
+ C[i] = Cn;
2187
+ }
2188
+ fun_obj=new l2r_lr_fun(prob, C);
2189
+ TRON tron_obj(fun_obj, primal_solver_tol);
2190
+ tron_obj.set_print_string(liblinear_print_string);
2191
+ tron_obj.tron(w);
2192
+ delete fun_obj;
2193
+ delete C;
2194
+ break;
2195
+ }
2196
+ case L2R_L2LOSS_SVC:
2197
+ {
2198
+ double *C = new double[prob->l];
2199
+ for(int i = 0; i < prob->l; i++)
2200
+ {
2201
+ if(prob->y[i] > 0)
2202
+ C[i] = Cp;
2203
+ else
2204
+ C[i] = Cn;
2205
+ }
2206
+ fun_obj=new l2r_l2_svc_fun(prob, C);
2207
+ TRON tron_obj(fun_obj, primal_solver_tol);
2208
+ tron_obj.set_print_string(liblinear_print_string);
2209
+ tron_obj.tron(w);
2210
+ delete fun_obj;
2211
+ delete C;
2212
+ break;
2213
+ }
2214
+ case L2R_L2LOSS_SVC_DUAL:
2215
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2216
+ break;
2217
+ case L2R_L1LOSS_SVC_DUAL:
2218
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2219
+ break;
2220
+ case L1R_L2LOSS_SVC:
2221
+ {
2222
+ problem prob_col;
2223
+ feature_node *x_space = NULL;
2224
+ transpose(prob, &x_space ,&prob_col);
2225
+ solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2226
+ delete [] prob_col.y;
2227
+ delete [] prob_col.x;
2228
+ delete [] x_space;
2229
+ break;
2230
+ }
2231
+ case L1R_LR:
2232
+ {
2233
+ problem prob_col;
2234
+ feature_node *x_space = NULL;
2235
+ transpose(prob, &x_space ,&prob_col);
2236
+ solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2237
+ delete [] prob_col.y;
2238
+ delete [] prob_col.x;
2239
+ delete [] x_space;
2240
+ break;
2241
+ }
2242
+ case L2R_LR_DUAL:
2243
+ solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2244
+ break;
2245
+ case L2R_L2LOSS_SVR:
2246
+ {
2247
+ double *C = new double[prob->l];
2248
+ for(int i = 0; i < prob->l; i++)
2249
+ C[i] = param->C;
2250
+
2251
+ fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2252
+ TRON tron_obj(fun_obj, param->eps);
2253
+ tron_obj.set_print_string(liblinear_print_string);
2254
+ tron_obj.tron(w);
2255
+ delete fun_obj;
2256
+ delete C;
2257
+ break;
2258
+
2259
+ }
2260
+ case L2R_L1LOSS_SVR_DUAL:
2261
+ solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2262
+ break;
2263
+ case L2R_L2LOSS_SVR_DUAL:
2264
+ solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2265
+ break;
2266
+ default:
2267
+ fprintf(stderr, "ERROR: unknown solver_type\n");
2268
+ break;
2269
+ }
2270
+ }
2271
+
2272
+ //
2273
+ // Interface functions
2274
+ //
2275
+ model* train(const problem *prob, const parameter *param)
2276
+ {
2277
+ int i,j;
2278
+ int l = prob->l;
2279
+ int n = prob->n;
2280
+ int w_size = prob->n;
2281
+ model *model_ = Malloc(model,1);
2282
+
2283
+ if(prob->bias>=0)
2284
+ model_->nr_feature=n-1;
2285
+ else
2286
+ model_->nr_feature=n;
2287
+ model_->param = *param;
2288
+ model_->bias = prob->bias;
2289
+
2290
+ if(param->solver_type == L2R_L2LOSS_SVR ||
2291
+ param->solver_type == L2R_L1LOSS_SVR_DUAL ||
2292
+ param->solver_type == L2R_L2LOSS_SVR_DUAL)
2293
+ {
2294
+ model_->w = Malloc(double, w_size);
2295
+ model_->nr_class = 2;
2296
+ model_->label = NULL;
2297
+ train_one(prob, param, &model_->w[0], 0, 0);
2298
+ }
2299
+ else
2300
+ {
2301
+ int nr_class;
2302
+ int *label = NULL;
2303
+ int *start = NULL;
2304
+ int *count = NULL;
2305
+ int *perm = Malloc(int,l);
2306
+
2307
+ // group training data of the same class
2308
+ group_classes(prob,&nr_class,&label,&start,&count,perm);
2309
+
2310
+ model_->nr_class=nr_class;
2311
+ model_->label = Malloc(int,nr_class);
2312
+ for(i=0;i<nr_class;i++)
2313
+ model_->label[i] = label[i];
2314
+
2315
+ // calculate weighted C
2316
+ double *weighted_C = Malloc(double, nr_class);
2317
+ for(i=0;i<nr_class;i++)
2318
+ weighted_C[i] = param->C;
2319
+ for(i=0;i<param->nr_weight;i++)
2320
+ {
2321
+ for(j=0;j<nr_class;j++)
2322
+ if(param->weight_label[i] == label[j])
2323
+ break;
2324
+ if(j == nr_class)
2325
+ fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2326
+ else
2327
+ weighted_C[j] *= param->weight[i];
2328
+ }
2329
+
2330
+ // constructing the subproblem
2331
+ feature_node **x = Malloc(feature_node *,l);
2332
+ for(i=0;i<l;i++)
2333
+ x[i] = prob->x[perm[i]];
2334
+
2335
+ int k;
2336
+ problem sub_prob;
2337
+ sub_prob.l = l;
2338
+ sub_prob.n = n;
2339
+ sub_prob.x = Malloc(feature_node *,sub_prob.l);
2340
+ sub_prob.y = Malloc(double,sub_prob.l);
2341
+
2342
+ for(k=0; k<sub_prob.l; k++)
2343
+ sub_prob.x[k] = x[k];
2344
+
2345
+ // multi-class svm by Crammer and Singer
2346
+ if(param->solver_type == MCSVM_CS)
2347
+ {
2348
+ model_->w=Malloc(double, n*nr_class);
2349
+ for(i=0;i<nr_class;i++)
2350
+ for(j=start[i];j<start[i]+count[i];j++)
2351
+ sub_prob.y[j] = i;
2352
+ Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2353
+ Solver.Solve(model_->w);
2354
+ }
2355
+ else
2356
+ {
2357
+ if(nr_class == 2)
2358
+ {
2359
+ model_->w=Malloc(double, w_size);
2360
+
2361
+ int e0 = start[0]+count[0];
2362
+ k=0;
2363
+ for(; k<e0; k++)
2364
+ sub_prob.y[k] = +1;
2365
+ for(; k<sub_prob.l; k++)
2366
+ sub_prob.y[k] = -1;
2367
+
2368
+ train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
2369
+ }
2370
+ else
2371
+ {
2372
+ model_->w=Malloc(double, w_size*nr_class);
2373
+ double *w=Malloc(double, w_size);
2374
+ for(i=0;i<nr_class;i++)
2375
+ {
2376
+ int si = start[i];
2377
+ int ei = si+count[i];
2378
+
2379
+ k=0;
2380
+ for(; k<si; k++)
2381
+ sub_prob.y[k] = -1;
2382
+ for(; k<ei; k++)
2383
+ sub_prob.y[k] = +1;
2384
+ for(; k<sub_prob.l; k++)
2385
+ sub_prob.y[k] = -1;
2386
+
2387
+ train_one(&sub_prob, param, w, weighted_C[i], param->C);
2388
+
2389
+ for(int j=0;j<w_size;j++)
2390
+ model_->w[j*nr_class+i] = w[j];
2391
+ }
2392
+ free(w);
2393
+ }
2394
+
2395
+ }
2396
+
2397
+ free(x);
2398
+ free(label);
2399
+ free(start);
2400
+ free(count);
2401
+ free(perm);
2402
+ free(sub_prob.x);
2403
+ free(sub_prob.y);
2404
+ free(weighted_C);
2405
+ }
2406
+ return model_;
2407
+ }
2408
+
2409
+ void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
2410
+ {
2411
+ int i;
2412
+ int *fold_start = Malloc(int,nr_fold+1);
2413
+ int l = prob->l;
2414
+ int *perm = Malloc(int,l);
2415
+
2416
+ for(i=0;i<l;i++) perm[i]=i;
2417
+ for(i=0;i<l;i++)
2418
+ {
2419
+ int j = i+rand()%(l-i);
2420
+ swap(perm[i],perm[j]);
2421
+ }
2422
+ for(i=0;i<=nr_fold;i++)
2423
+ fold_start[i]=i*l/nr_fold;
2424
+
2425
+ for(i=0;i<nr_fold;i++)
2426
+ {
2427
+ int begin = fold_start[i];
2428
+ int end = fold_start[i+1];
2429
+ int j,k;
2430
+ struct problem subprob;
2431
+
2432
+ subprob.bias = prob->bias;
2433
+ subprob.n = prob->n;
2434
+ subprob.l = l-(end-begin);
2435
+ subprob.x = Malloc(struct feature_node*,subprob.l);
2436
+ subprob.y = Malloc(double,subprob.l);
2437
+
2438
+ k=0;
2439
+ for(j=0;j<begin;j++)
2440
+ {
2441
+ subprob.x[k] = prob->x[perm[j]];
2442
+ subprob.y[k] = prob->y[perm[j]];
2443
+ ++k;
2444
+ }
2445
+ for(j=end;j<l;j++)
2446
+ {
2447
+ subprob.x[k] = prob->x[perm[j]];
2448
+ subprob.y[k] = prob->y[perm[j]];
2449
+ ++k;
2450
+ }
2451
+ struct model *submodel = train(&subprob,param);
2452
+ for(j=begin;j<end;j++)
2453
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2454
+ free_and_destroy_model(&submodel);
2455
+ free(subprob.x);
2456
+ free(subprob.y);
2457
+ }
2458
+ free(fold_start);
2459
+ free(perm);
2460
+ }
2461
+
2462
+ double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2463
+ {
2464
+ int idx;
2465
+ int n;
2466
+ if(model_->bias>=0)
2467
+ n=model_->nr_feature+1;
2468
+ else
2469
+ n=model_->nr_feature;
2470
+ double *w=model_->w;
2471
+ int nr_class=model_->nr_class;
2472
+ int i;
2473
+ int nr_w;
2474
+ if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2475
+ nr_w = 1;
2476
+ else
2477
+ nr_w = nr_class;
2478
+
2479
+ const feature_node *lx=x;
2480
+ for(i=0;i<nr_w;i++)
2481
+ dec_values[i] = 0;
2482
+ for(; (idx=lx->index)!=-1; lx++)
2483
+ {
2484
+ // the dimension of testing data may exceed that of training
2485
+ if(idx<=n)
2486
+ for(i=0;i<nr_w;i++)
2487
+ dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2488
+ }
2489
+
2490
+ if(nr_class==2)
2491
+ {
2492
+ if(model_->param.solver_type == L2R_L2LOSS_SVR ||
2493
+ model_->param.solver_type == L2R_L1LOSS_SVR_DUAL ||
2494
+ model_->param.solver_type == L2R_L2LOSS_SVR_DUAL)
2495
+ return dec_values[0];
2496
+ else
2497
+ return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2498
+ }
2499
+ else
2500
+ {
2501
+ int dec_max_idx = 0;
2502
+ for(i=1;i<nr_class;i++)
2503
+ {
2504
+ if(dec_values[i] > dec_values[dec_max_idx])
2505
+ dec_max_idx = i;
2506
+ }
2507
+ return model_->label[dec_max_idx];
2508
+ }
2509
+ }
2510
+
2511
+ double predict(const model *model_, const feature_node *x)
2512
+ {
2513
+ double *dec_values = Malloc(double, model_->nr_class);
2514
+ double label=predict_values(model_, x, dec_values);
2515
+ free(dec_values);
2516
+ return label;
2517
+ }
2518
+
2519
+ double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2520
+ {
2521
+ if(check_probability_model(model_))
2522
+ {
2523
+ int i;
2524
+ int nr_class=model_->nr_class;
2525
+ int nr_w;
2526
+ if(nr_class==2)
2527
+ nr_w = 1;
2528
+ else
2529
+ nr_w = nr_class;
2530
+
2531
+ double label=predict_values(model_, x, prob_estimates);
2532
+ for(i=0;i<nr_w;i++)
2533
+ prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2534
+
2535
+ if(nr_class==2) // for binary classification
2536
+ prob_estimates[1]=1.-prob_estimates[0];
2537
+ else
2538
+ {
2539
+ double sum=0;
2540
+ for(i=0; i<nr_class; i++)
2541
+ sum+=prob_estimates[i];
2542
+
2543
+ for(i=0; i<nr_class; i++)
2544
+ prob_estimates[i]=prob_estimates[i]/sum;
2545
+ }
2546
+
2547
+ return label;
2548
+ }
2549
+ else
2550
+ return 0;
2551
+ }
2552
+
2553
+ static const char *solver_type_table[]=
2554
+ {
2555
+ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2556
+ "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2557
+ "", "", "",
2558
+ "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
2559
+ };
2560
+
2561
+ int save_model(const char *model_file_name, const struct model *model_)
2562
+ {
2563
+ int i;
2564
+ int nr_feature=model_->nr_feature;
2565
+ int n;
2566
+ const parameter& param = model_->param;
2567
+
2568
+ if(model_->bias>=0)
2569
+ n=nr_feature+1;
2570
+ else
2571
+ n=nr_feature;
2572
+ int w_size = n;
2573
+ FILE *fp = fopen(model_file_name,"w");
2574
+ if(fp==NULL) return -1;
2575
+
2576
+ char *old_locale = strdup(setlocale(LC_ALL, NULL));
2577
+ setlocale(LC_ALL, "C");
2578
+
2579
+ int nr_w;
2580
+ if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2581
+ nr_w=1;
2582
+ else
2583
+ nr_w=model_->nr_class;
2584
+
2585
+ fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2586
+ fprintf(fp, "nr_class %d\n", model_->nr_class);
2587
+
2588
+ if(model_->label)
2589
+ {
2590
+ fprintf(fp, "label");
2591
+ for(i=0; i<model_->nr_class; i++)
2592
+ fprintf(fp, " %d", model_->label[i]);
2593
+ fprintf(fp, "\n");
2594
+ }
2595
+
2596
+ fprintf(fp, "nr_feature %d\n", nr_feature);
2597
+
2598
+ fprintf(fp, "bias %.16g\n", model_->bias);
2599
+
2600
+ fprintf(fp, "w\n");
2601
+ for(i=0; i<w_size; i++)
2602
+ {
2603
+ int j;
2604
+ for(j=0; j<nr_w; j++)
2605
+ fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2606
+ fprintf(fp, "\n");
2607
+ }
2608
+
2609
+ setlocale(LC_ALL, old_locale);
2610
+ free(old_locale);
2611
+
2612
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2613
+ else return 0;
2614
+ }
2615
+
2616
+ struct model *load_model(const char *model_file_name)
2617
+ {
2618
+ FILE *fp = fopen(model_file_name,"r");
2619
+ if(fp==NULL) return NULL;
2620
+
2621
+ int i;
2622
+ int nr_feature;
2623
+ int n;
2624
+ int nr_class;
2625
+ double bias;
2626
+ model *model_ = Malloc(model,1);
2627
+ parameter& param = model_->param;
2628
+
2629
+ model_->label = NULL;
2630
+
2631
+ char *old_locale = strdup(setlocale(LC_ALL, NULL));
2632
+ setlocale(LC_ALL, "C");
2633
+
2634
+ char cmd[81];
2635
+ while(1)
2636
+ {
2637
+ fscanf(fp,"%80s",cmd);
2638
+ if(strcmp(cmd,"solver_type")==0)
2639
+ {
2640
+ fscanf(fp,"%80s",cmd);
2641
+ int i;
2642
+ for(i=0;solver_type_table[i];i++)
2643
+ {
2644
+ if(strcmp(solver_type_table[i],cmd)==0)
2645
+ {
2646
+ param.solver_type=i;
2647
+ break;
2648
+ }
2649
+ }
2650
+ if(solver_type_table[i] == NULL)
2651
+ {
2652
+ fprintf(stderr,"unknown solver type.\n");
2653
+
2654
+ setlocale(LC_ALL, old_locale);
2655
+ free(model_->label);
2656
+ free(model_);
2657
+ free(old_locale);
2658
+ return NULL;
2659
+ }
2660
+ }
2661
+ else if(strcmp(cmd,"nr_class")==0)
2662
+ {
2663
+ fscanf(fp,"%d",&nr_class);
2664
+ model_->nr_class=nr_class;
2665
+ }
2666
+ else if(strcmp(cmd,"nr_feature")==0)
2667
+ {
2668
+ fscanf(fp,"%d",&nr_feature);
2669
+ model_->nr_feature=nr_feature;
2670
+ }
2671
+ else if(strcmp(cmd,"bias")==0)
2672
+ {
2673
+ fscanf(fp,"%lf",&bias);
2674
+ model_->bias=bias;
2675
+ }
2676
+ else if(strcmp(cmd,"w")==0)
2677
+ {
2678
+ break;
2679
+ }
2680
+ else if(strcmp(cmd,"label")==0)
2681
+ {
2682
+ int nr_class = model_->nr_class;
2683
+ model_->label = Malloc(int,nr_class);
2684
+ for(int i=0;i<nr_class;i++)
2685
+ fscanf(fp,"%d",&model_->label[i]);
2686
+ }
2687
+ else
2688
+ {
2689
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2690
+ setlocale(LC_ALL, old_locale);
2691
+ free(model_->label);
2692
+ free(model_);
2693
+ free(old_locale);
2694
+ return NULL;
2695
+ }
2696
+ }
2697
+
2698
+ nr_feature=model_->nr_feature;
2699
+ if(model_->bias>=0)
2700
+ n=nr_feature+1;
2701
+ else
2702
+ n=nr_feature;
2703
+ int w_size = n;
2704
+ int nr_w;
2705
+ if(nr_class==2 && param.solver_type != MCSVM_CS)
2706
+ nr_w = 1;
2707
+ else
2708
+ nr_w = nr_class;
2709
+
2710
+ model_->w=Malloc(double, w_size*nr_w);
2711
+ for(i=0; i<w_size; i++)
2712
+ {
2713
+ int j;
2714
+ for(j=0; j<nr_w; j++)
2715
+ fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
2716
+ fscanf(fp, "\n");
2717
+ }
2718
+
2719
+ setlocale(LC_ALL, old_locale);
2720
+ free(old_locale);
2721
+
2722
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2723
+
2724
+ return model_;
2725
+ }
2726
+
2727
+ int get_nr_feature(const model *model_)
2728
+ {
2729
+ return model_->nr_feature;
2730
+ }
2731
+
2732
+ int get_nr_class(const model *model_)
2733
+ {
2734
+ return model_->nr_class;
2735
+ }
2736
+
2737
+ void get_labels(const model *model_, int* label)
2738
+ {
2739
+ if (model_->label != NULL)
2740
+ for(int i=0;i<model_->nr_class;i++)
2741
+ label[i] = model_->label[i];
2742
+ }
2743
+
2744
+ void free_model_content(struct model *model_ptr)
2745
+ {
2746
+ if(model_ptr->w != NULL)
2747
+ free(model_ptr->w);
2748
+ if(model_ptr->label != NULL)
2749
+ free(model_ptr->label);
2750
+ }
2751
+
2752
+ void free_and_destroy_model(struct model **model_ptr_ptr)
2753
+ {
2754
+ struct model *model_ptr = *model_ptr_ptr;
2755
+ if(model_ptr != NULL)
2756
+ {
2757
+ free_model_content(model_ptr);
2758
+ free(model_ptr);
2759
+ }
2760
+ }
2761
+
2762
+ void destroy_param(parameter* param)
2763
+ {
2764
+ if(param->weight_label != NULL)
2765
+ free(param->weight_label);
2766
+ if(param->weight != NULL)
2767
+ free(param->weight);
2768
+ }
2769
+
2770
+ const char *check_parameter(const problem *prob, const parameter *param)
2771
+ {
2772
+ if(param->eps <= 0)
2773
+ return "eps <= 0";
2774
+
2775
+ if(param->C <= 0)
2776
+ return "C <= 0";
2777
+
2778
+ if(param->p < 0)
2779
+ return "p < 0";
2780
+
2781
+ if(param->solver_type != L2R_LR
2782
+ && param->solver_type != L2R_L2LOSS_SVC_DUAL
2783
+ && param->solver_type != L2R_L2LOSS_SVC
2784
+ && param->solver_type != L2R_L1LOSS_SVC_DUAL
2785
+ && param->solver_type != MCSVM_CS
2786
+ && param->solver_type != L1R_L2LOSS_SVC
2787
+ && param->solver_type != L1R_LR
2788
+ && param->solver_type != L2R_LR_DUAL
2789
+ && param->solver_type != L2R_L2LOSS_SVR
2790
+ && param->solver_type != L2R_L2LOSS_SVR_DUAL
2791
+ && param->solver_type != L2R_L1LOSS_SVR_DUAL)
2792
+ return "unknown solver type";
2793
+
2794
+ return NULL;
2795
+ }
2796
+
2797
+ int check_probability_model(const struct model *model_)
2798
+ {
2799
+ return (model_->param.solver_type==L2R_LR ||
2800
+ model_->param.solver_type==L2R_LR_DUAL ||
2801
+ model_->param.solver_type==L1R_LR);
2802
+ }
2803
+
2804
+ void set_print_string_function(void (*print_func)(const char*))
2805
+ {
2806
+ if (print_func == NULL)
2807
+ liblinear_print_string = &print_string_stdout;
2808
+ else
2809
+ liblinear_print_string = print_func;
2810
+ }
2811
+