liblinear-ruby 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/blasp.h +8 -8
  4. data/ext/daxpy.c +3 -3
  5. data/ext/ddot.c +3 -3
  6. data/ext/dnrm2.c +7 -7
  7. data/ext/dscal.c +4 -4
  8. data/ext/liblinear_wrap.cxx +382 -382
  9. data/ext/linear.cpp +44 -55
  10. data/ext/linear.h +5 -1
  11. data/ext/tron.cpp +13 -5
  12. data/ext/tron.h +1 -1
  13. data/lib/liblinear.rb +2 -0
  14. data/lib/liblinear/version.rb +1 -1
  15. metadata +2 -41
  16. data/liblinear-2.1/COPYRIGHT +0 -31
  17. data/liblinear-2.1/Makefile +0 -37
  18. data/liblinear-2.1/Makefile.win +0 -24
  19. data/liblinear-2.1/README +0 -600
  20. data/liblinear-2.1/blas/Makefile +0 -22
  21. data/liblinear-2.1/blas/blas.h +0 -25
  22. data/liblinear-2.1/blas/blasp.h +0 -438
  23. data/liblinear-2.1/blas/daxpy.c +0 -57
  24. data/liblinear-2.1/blas/ddot.c +0 -58
  25. data/liblinear-2.1/blas/dnrm2.c +0 -70
  26. data/liblinear-2.1/blas/dscal.c +0 -52
  27. data/liblinear-2.1/heart_scale +0 -270
  28. data/liblinear-2.1/linear.cpp +0 -3053
  29. data/liblinear-2.1/linear.def +0 -22
  30. data/liblinear-2.1/linear.h +0 -79
  31. data/liblinear-2.1/matlab/Makefile +0 -49
  32. data/liblinear-2.1/matlab/README +0 -208
  33. data/liblinear-2.1/matlab/libsvmread.c +0 -212
  34. data/liblinear-2.1/matlab/libsvmwrite.c +0 -119
  35. data/liblinear-2.1/matlab/linear_model_matlab.c +0 -176
  36. data/liblinear-2.1/matlab/linear_model_matlab.h +0 -2
  37. data/liblinear-2.1/matlab/make.m +0 -22
  38. data/liblinear-2.1/matlab/predict.c +0 -341
  39. data/liblinear-2.1/matlab/train.c +0 -492
  40. data/liblinear-2.1/predict.c +0 -243
  41. data/liblinear-2.1/python/Makefile +0 -4
  42. data/liblinear-2.1/python/README +0 -380
  43. data/liblinear-2.1/python/liblinear.py +0 -323
  44. data/liblinear-2.1/python/liblinearutil.py +0 -270
  45. data/liblinear-2.1/train.c +0 -449
  46. data/liblinear-2.1/tron.cpp +0 -241
  47. data/liblinear-2.1/tron.h +0 -35
  48. data/liblinear-2.1/windows/liblinear.dll +0 -0
  49. data/liblinear-2.1/windows/libsvmread.mexw64 +0 -0
  50. data/liblinear-2.1/windows/libsvmwrite.mexw64 +0 -0
  51. data/liblinear-2.1/windows/predict.exe +0 -0
  52. data/liblinear-2.1/windows/predict.mexw64 +0 -0
  53. data/liblinear-2.1/windows/train.exe +0 -0
  54. data/liblinear-2.1/windows/train.mexw64 +0 -0
@@ -1,3053 +0,0 @@
1
- #include <math.h>
2
- #include <stdio.h>
3
- #include <stdlib.h>
4
- #include <string.h>
5
- #include <stdarg.h>
6
- #include <locale.h>
7
- #include "linear.h"
8
- #include "tron.h"
9
- typedef signed char schar;
10
- template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
11
- #ifndef min
12
- template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
13
- #endif
14
- #ifndef max
15
- template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
16
- #endif
17
- template <class S, class T> static inline void clone(T*& dst, S* src, int n)
18
- {
19
- dst = new T[n];
20
- memcpy((void *)dst,(void *)src,sizeof(T)*n);
21
- }
22
- #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
23
- #define INF HUGE_VAL
24
-
25
- static void print_string_stdout(const char *s)
26
- {
27
- fputs(s,stdout);
28
- fflush(stdout);
29
- }
30
- static void print_null(const char *s) {}
31
-
32
- static void (*liblinear_print_string) (const char *) = &print_string_stdout;
33
-
34
- #if 1
35
- static void info(const char *fmt,...)
36
- {
37
- char buf[BUFSIZ];
38
- va_list ap;
39
- va_start(ap,fmt);
40
- vsprintf(buf,fmt,ap);
41
- va_end(ap);
42
- (*liblinear_print_string)(buf);
43
- }
44
- #else
45
- static void info(const char *fmt,...) {}
46
- #endif
47
- class sparse_operator
48
- {
49
- public:
50
- static double nrm2_sq(const feature_node *x)
51
- {
52
- double ret = 0;
53
- while(x->index != -1)
54
- {
55
- ret += x->value*x->value;
56
- x++;
57
- }
58
- return (ret);
59
- }
60
-
61
- static double dot(const double *s, const feature_node *x)
62
- {
63
- double ret = 0;
64
- while(x->index != -1)
65
- {
66
- ret += s[x->index-1]*x->value;
67
- x++;
68
- }
69
- return (ret);
70
- }
71
-
72
- static void axpy(const double a, const feature_node *x, double *y)
73
- {
74
- while(x->index != -1)
75
- {
76
- y[x->index-1] += a*x->value;
77
- x++;
78
- }
79
- }
80
- };
81
-
82
- class l2r_lr_fun: public function
83
- {
84
- public:
85
- l2r_lr_fun(const problem *prob, double *C);
86
- ~l2r_lr_fun();
87
-
88
- double fun(double *w);
89
- void grad(double *w, double *g);
90
- void Hv(double *s, double *Hs);
91
-
92
- int get_nr_variable(void);
93
-
94
- private:
95
- void Xv(double *v, double *Xv);
96
- void XTv(double *v, double *XTv);
97
-
98
- double *C;
99
- double *z;
100
- double *D;
101
- const problem *prob;
102
- };
103
-
104
- l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
105
- {
106
- int l=prob->l;
107
-
108
- this->prob = prob;
109
-
110
- z = new double[l];
111
- D = new double[l];
112
- this->C = C;
113
- }
114
-
115
- l2r_lr_fun::~l2r_lr_fun()
116
- {
117
- delete[] z;
118
- delete[] D;
119
- }
120
-
121
-
122
- double l2r_lr_fun::fun(double *w)
123
- {
124
- int i;
125
- double f=0;
126
- double *y=prob->y;
127
- int l=prob->l;
128
- int w_size=get_nr_variable();
129
-
130
- Xv(w, z);
131
-
132
- for(i=0;i<w_size;i++)
133
- f += w[i]*w[i];
134
- f /= 2.0;
135
- for(i=0;i<l;i++)
136
- {
137
- double yz = y[i]*z[i];
138
- if (yz >= 0)
139
- f += C[i]*log(1 + exp(-yz));
140
- else
141
- f += C[i]*(-yz+log(1 + exp(yz)));
142
- }
143
-
144
- return(f);
145
- }
146
-
147
- void l2r_lr_fun::grad(double *w, double *g)
148
- {
149
- int i;
150
- double *y=prob->y;
151
- int l=prob->l;
152
- int w_size=get_nr_variable();
153
-
154
- for(i=0;i<l;i++)
155
- {
156
- z[i] = 1/(1 + exp(-y[i]*z[i]));
157
- D[i] = z[i]*(1-z[i]);
158
- z[i] = C[i]*(z[i]-1)*y[i];
159
- }
160
- XTv(z, g);
161
-
162
- for(i=0;i<w_size;i++)
163
- g[i] = w[i] + g[i];
164
- }
165
-
166
- int l2r_lr_fun::get_nr_variable(void)
167
- {
168
- return prob->n;
169
- }
170
-
171
- void l2r_lr_fun::Hv(double *s, double *Hs)
172
- {
173
- int i;
174
- int l=prob->l;
175
- int w_size=get_nr_variable();
176
- double *wa = new double[l];
177
- feature_node **x=prob->x;
178
-
179
- for(i=0;i<w_size;i++)
180
- Hs[i] = 0;
181
- for(i=0;i<l;i++)
182
- {
183
- feature_node * const xi=x[i];
184
- wa[i] = sparse_operator::dot(s, xi);
185
-
186
- wa[i] = C[i]*D[i]*wa[i];
187
-
188
- sparse_operator::axpy(wa[i], xi, Hs);
189
- }
190
- for(i=0;i<w_size;i++)
191
- Hs[i] = s[i] + Hs[i];
192
- delete[] wa;
193
- }
194
-
195
- void l2r_lr_fun::Xv(double *v, double *Xv)
196
- {
197
- int i;
198
- int l=prob->l;
199
- feature_node **x=prob->x;
200
-
201
- for(i=0;i<l;i++)
202
- Xv[i]=sparse_operator::dot(v, x[i]);
203
- }
204
-
205
- void l2r_lr_fun::XTv(double *v, double *XTv)
206
- {
207
- int i;
208
- int l=prob->l;
209
- int w_size=get_nr_variable();
210
- feature_node **x=prob->x;
211
-
212
- for(i=0;i<w_size;i++)
213
- XTv[i]=0;
214
- for(i=0;i<l;i++)
215
- sparse_operator::axpy(v[i], x[i], XTv);
216
- }
217
-
218
- class l2r_l2_svc_fun: public function
219
- {
220
- public:
221
- l2r_l2_svc_fun(const problem *prob, double *C);
222
- ~l2r_l2_svc_fun();
223
-
224
- double fun(double *w);
225
- void grad(double *w, double *g);
226
- void Hv(double *s, double *Hs);
227
-
228
- int get_nr_variable(void);
229
-
230
- protected:
231
- void Xv(double *v, double *Xv);
232
- void subXTv(double *v, double *XTv);
233
-
234
- double *C;
235
- double *z;
236
- double *D;
237
- int *I;
238
- int sizeI;
239
- const problem *prob;
240
- };
241
-
242
- l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
243
- {
244
- int l=prob->l;
245
-
246
- this->prob = prob;
247
-
248
- z = new double[l];
249
- D = new double[l];
250
- I = new int[l];
251
- this->C = C;
252
- }
253
-
254
- l2r_l2_svc_fun::~l2r_l2_svc_fun()
255
- {
256
- delete[] z;
257
- delete[] D;
258
- delete[] I;
259
- }
260
-
261
- double l2r_l2_svc_fun::fun(double *w)
262
- {
263
- int i;
264
- double f=0;
265
- double *y=prob->y;
266
- int l=prob->l;
267
- int w_size=get_nr_variable();
268
-
269
- Xv(w, z);
270
-
271
- for(i=0;i<w_size;i++)
272
- f += w[i]*w[i];
273
- f /= 2.0;
274
- for(i=0;i<l;i++)
275
- {
276
- z[i] = y[i]*z[i];
277
- double d = 1-z[i];
278
- if (d > 0)
279
- f += C[i]*d*d;
280
- }
281
-
282
- return(f);
283
- }
284
-
285
- void l2r_l2_svc_fun::grad(double *w, double *g)
286
- {
287
- int i;
288
- double *y=prob->y;
289
- int l=prob->l;
290
- int w_size=get_nr_variable();
291
-
292
- sizeI = 0;
293
- for (i=0;i<l;i++)
294
- if (z[i] < 1)
295
- {
296
- z[sizeI] = C[i]*y[i]*(z[i]-1);
297
- I[sizeI] = i;
298
- sizeI++;
299
- }
300
- subXTv(z, g);
301
-
302
- for(i=0;i<w_size;i++)
303
- g[i] = w[i] + 2*g[i];
304
- }
305
-
306
- int l2r_l2_svc_fun::get_nr_variable(void)
307
- {
308
- return prob->n;
309
- }
310
-
311
- void l2r_l2_svc_fun::Hv(double *s, double *Hs)
312
- {
313
- int i;
314
- int w_size=get_nr_variable();
315
- double *wa = new double[sizeI];
316
- feature_node **x=prob->x;
317
-
318
- for(i=0;i<w_size;i++)
319
- Hs[i]=0;
320
- for(i=0;i<sizeI;i++)
321
- {
322
- feature_node * const xi=x[I[i]];
323
- wa[i] = sparse_operator::dot(s, xi);
324
-
325
- wa[i] = C[I[i]]*wa[i];
326
-
327
- sparse_operator::axpy(wa[i], xi, Hs);
328
- }
329
- for(i=0;i<w_size;i++)
330
- Hs[i] = s[i] + 2*Hs[i];
331
- delete[] wa;
332
- }
333
-
334
- void l2r_l2_svc_fun::Xv(double *v, double *Xv)
335
- {
336
- int i;
337
- int l=prob->l;
338
- feature_node **x=prob->x;
339
-
340
- for(i=0;i<l;i++)
341
- Xv[i]=sparse_operator::dot(v, x[i]);
342
- }
343
-
344
- void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
345
- {
346
- int i;
347
- int w_size=get_nr_variable();
348
- feature_node **x=prob->x;
349
-
350
- for(i=0;i<w_size;i++)
351
- XTv[i]=0;
352
- for(i=0;i<sizeI;i++)
353
- sparse_operator::axpy(v[i], x[I[i]], XTv);
354
- }
355
-
356
- class l2r_l2_svr_fun: public l2r_l2_svc_fun
357
- {
358
- public:
359
- l2r_l2_svr_fun(const problem *prob, double *C, double p);
360
-
361
- double fun(double *w);
362
- void grad(double *w, double *g);
363
-
364
- private:
365
- double p;
366
- };
367
-
368
- l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
369
- l2r_l2_svc_fun(prob, C)
370
- {
371
- this->p = p;
372
- }
373
-
374
- double l2r_l2_svr_fun::fun(double *w)
375
- {
376
- int i;
377
- double f=0;
378
- double *y=prob->y;
379
- int l=prob->l;
380
- int w_size=get_nr_variable();
381
- double d;
382
-
383
- Xv(w, z);
384
-
385
- for(i=0;i<w_size;i++)
386
- f += w[i]*w[i];
387
- f /= 2;
388
- for(i=0;i<l;i++)
389
- {
390
- d = z[i] - y[i];
391
- if(d < -p)
392
- f += C[i]*(d+p)*(d+p);
393
- else if(d > p)
394
- f += C[i]*(d-p)*(d-p);
395
- }
396
-
397
- return(f);
398
- }
399
-
400
- void l2r_l2_svr_fun::grad(double *w, double *g)
401
- {
402
- int i;
403
- double *y=prob->y;
404
- int l=prob->l;
405
- int w_size=get_nr_variable();
406
- double d;
407
-
408
- sizeI = 0;
409
- for(i=0;i<l;i++)
410
- {
411
- d = z[i] - y[i];
412
-
413
- // generate index set I
414
- if(d < -p)
415
- {
416
- z[sizeI] = C[i]*(d+p);
417
- I[sizeI] = i;
418
- sizeI++;
419
- }
420
- else if(d > p)
421
- {
422
- z[sizeI] = C[i]*(d-p);
423
- I[sizeI] = i;
424
- sizeI++;
425
- }
426
-
427
- }
428
- subXTv(z, g);
429
-
430
- for(i=0;i<w_size;i++)
431
- g[i] = w[i] + 2*g[i];
432
- }
433
-
434
- // A coordinate descent algorithm for
435
- // multi-class support vector machines by Crammer and Singer
436
- //
437
- // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
438
- // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
439
- //
440
- // where e^m_i = 0 if y_i = m,
441
- // e^m_i = 1 if y_i != m,
442
- // C^m_i = C if m = y_i,
443
- // C^m_i = 0 if m != y_i,
444
- // and w_m(\alpha) = \sum_i \alpha^m_i x_i
445
- //
446
- // Given:
447
- // x, y, C
448
- // eps is the stopping tolerance
449
- //
450
- // solution will be put in w
451
- //
452
- // See Appendix of LIBLINEAR paper, Fan et al. (2008)
453
-
454
- #define GETI(i) ((int) prob->y[i])
455
- // To support weights for instances, use GETI(i) (i)
456
-
457
- class Solver_MCSVM_CS
458
- {
459
- public:
460
- Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
461
- ~Solver_MCSVM_CS();
462
- void Solve(double *w);
463
- private:
464
- void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
465
- bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
466
- double *B, *C, *G;
467
- int w_size, l;
468
- int nr_class;
469
- int max_iter;
470
- double eps;
471
- const problem *prob;
472
- };
473
-
474
- Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
475
- {
476
- this->w_size = prob->n;
477
- this->l = prob->l;
478
- this->nr_class = nr_class;
479
- this->eps = eps;
480
- this->max_iter = max_iter;
481
- this->prob = prob;
482
- this->B = new double[nr_class];
483
- this->G = new double[nr_class];
484
- this->C = weighted_C;
485
- }
486
-
487
- Solver_MCSVM_CS::~Solver_MCSVM_CS()
488
- {
489
- delete[] B;
490
- delete[] G;
491
- }
492
-
493
- int compare_double(const void *a, const void *b)
494
- {
495
- if(*(double *)a > *(double *)b)
496
- return -1;
497
- if(*(double *)a < *(double *)b)
498
- return 1;
499
- return 0;
500
- }
501
-
502
- void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
503
- {
504
- int r;
505
- double *D;
506
-
507
- clone(D, B, active_i);
508
- if(yi < active_i)
509
- D[yi] += A_i*C_yi;
510
- qsort(D, active_i, sizeof(double), compare_double);
511
-
512
- double beta = D[0] - A_i*C_yi;
513
- for(r=1;r<active_i && beta<r*D[r];r++)
514
- beta += D[r];
515
- beta /= r;
516
-
517
- for(r=0;r<active_i;r++)
518
- {
519
- if(r == yi)
520
- alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
521
- else
522
- alpha_new[r] = min((double)0, (beta - B[r])/A_i);
523
- }
524
- delete[] D;
525
- }
526
-
527
- bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
528
- {
529
- double bound = 0;
530
- if(m == yi)
531
- bound = C[GETI(i)];
532
- if(alpha_i == bound && G[m] < minG)
533
- return true;
534
- return false;
535
- }
536
-
537
- void Solver_MCSVM_CS::Solve(double *w)
538
- {
539
- int i, m, s;
540
- int iter = 0;
541
- double *alpha = new double[l*nr_class];
542
- double *alpha_new = new double[nr_class];
543
- int *index = new int[l];
544
- double *QD = new double[l];
545
- int *d_ind = new int[nr_class];
546
- double *d_val = new double[nr_class];
547
- int *alpha_index = new int[nr_class*l];
548
- int *y_index = new int[l];
549
- int active_size = l;
550
- int *active_size_i = new int[l];
551
- double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
552
- bool start_from_all = true;
553
-
554
- // Initial alpha can be set here. Note that
555
- // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
556
- // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
557
- // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
558
- // If initial alpha isn't zero, uncomment the for loop below to initialize w
559
- for(i=0;i<l*nr_class;i++)
560
- alpha[i] = 0;
561
-
562
- for(i=0;i<w_size*nr_class;i++)
563
- w[i] = 0;
564
- for(i=0;i<l;i++)
565
- {
566
- for(m=0;m<nr_class;m++)
567
- alpha_index[i*nr_class+m] = m;
568
- feature_node *xi = prob->x[i];
569
- QD[i] = 0;
570
- while(xi->index != -1)
571
- {
572
- double val = xi->value;
573
- QD[i] += val*val;
574
-
575
- // Uncomment the for loop if initial alpha isn't zero
576
- // for(m=0; m<nr_class; m++)
577
- // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
578
- xi++;
579
- }
580
- active_size_i[i] = nr_class;
581
- y_index[i] = (int)prob->y[i];
582
- index[i] = i;
583
- }
584
-
585
- while(iter < max_iter)
586
- {
587
- double stopping = -INF;
588
- for(i=0;i<active_size;i++)
589
- {
590
- int j = i+rand()%(active_size-i);
591
- swap(index[i], index[j]);
592
- }
593
- for(s=0;s<active_size;s++)
594
- {
595
- i = index[s];
596
- double Ai = QD[i];
597
- double *alpha_i = &alpha[i*nr_class];
598
- int *alpha_index_i = &alpha_index[i*nr_class];
599
-
600
- if(Ai > 0)
601
- {
602
- for(m=0;m<active_size_i[i];m++)
603
- G[m] = 1;
604
- if(y_index[i] < active_size_i[i])
605
- G[y_index[i]] = 0;
606
-
607
- feature_node *xi = prob->x[i];
608
- while(xi->index!= -1)
609
- {
610
- double *w_i = &w[(xi->index-1)*nr_class];
611
- for(m=0;m<active_size_i[i];m++)
612
- G[m] += w_i[alpha_index_i[m]]*(xi->value);
613
- xi++;
614
- }
615
-
616
- double minG = INF;
617
- double maxG = -INF;
618
- for(m=0;m<active_size_i[i];m++)
619
- {
620
- if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
621
- minG = G[m];
622
- if(G[m] > maxG)
623
- maxG = G[m];
624
- }
625
- if(y_index[i] < active_size_i[i])
626
- if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
627
- minG = G[y_index[i]];
628
-
629
- for(m=0;m<active_size_i[i];m++)
630
- {
631
- if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
632
- {
633
- active_size_i[i]--;
634
- while(active_size_i[i]>m)
635
- {
636
- if(!be_shrunk(i, active_size_i[i], y_index[i],
637
- alpha_i[alpha_index_i[active_size_i[i]]], minG))
638
- {
639
- swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
640
- swap(G[m], G[active_size_i[i]]);
641
- if(y_index[i] == active_size_i[i])
642
- y_index[i] = m;
643
- else if(y_index[i] == m)
644
- y_index[i] = active_size_i[i];
645
- break;
646
- }
647
- active_size_i[i]--;
648
- }
649
- }
650
- }
651
-
652
- if(active_size_i[i] <= 1)
653
- {
654
- active_size--;
655
- swap(index[s], index[active_size]);
656
- s--;
657
- continue;
658
- }
659
-
660
- if(maxG-minG <= 1e-12)
661
- continue;
662
- else
663
- stopping = max(maxG - minG, stopping);
664
-
665
- for(m=0;m<active_size_i[i];m++)
666
- B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
667
-
668
- solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
669
- int nz_d = 0;
670
- for(m=0;m<active_size_i[i];m++)
671
- {
672
- double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
673
- alpha_i[alpha_index_i[m]] = alpha_new[m];
674
- if(fabs(d) >= 1e-12)
675
- {
676
- d_ind[nz_d] = alpha_index_i[m];
677
- d_val[nz_d] = d;
678
- nz_d++;
679
- }
680
- }
681
-
682
- xi = prob->x[i];
683
- while(xi->index != -1)
684
- {
685
- double *w_i = &w[(xi->index-1)*nr_class];
686
- for(m=0;m<nz_d;m++)
687
- w_i[d_ind[m]] += d_val[m]*xi->value;
688
- xi++;
689
- }
690
- }
691
- }
692
-
693
- iter++;
694
- if(iter % 10 == 0)
695
- {
696
- info(".");
697
- }
698
-
699
- if(stopping < eps_shrink)
700
- {
701
- if(stopping < eps && start_from_all == true)
702
- break;
703
- else
704
- {
705
- active_size = l;
706
- for(i=0;i<l;i++)
707
- active_size_i[i] = nr_class;
708
- info("*");
709
- eps_shrink = max(eps_shrink/2, eps);
710
- start_from_all = true;
711
- }
712
- }
713
- else
714
- start_from_all = false;
715
- }
716
-
717
- info("\noptimization finished, #iter = %d\n",iter);
718
- if (iter >= max_iter)
719
- info("\nWARNING: reaching max number of iterations\n");
720
-
721
- // calculate objective value
722
- double v = 0;
723
- int nSV = 0;
724
- for(i=0;i<w_size*nr_class;i++)
725
- v += w[i]*w[i];
726
- v = 0.5*v;
727
- for(i=0;i<l*nr_class;i++)
728
- {
729
- v += alpha[i];
730
- if(fabs(alpha[i]) > 0)
731
- nSV++;
732
- }
733
- for(i=0;i<l;i++)
734
- v -= alpha[i*nr_class+(int)prob->y[i]];
735
- info("Objective value = %lf\n",v);
736
- info("nSV = %d\n",nSV);
737
-
738
- delete [] alpha;
739
- delete [] alpha_new;
740
- delete [] index;
741
- delete [] QD;
742
- delete [] d_ind;
743
- delete [] d_val;
744
- delete [] alpha_index;
745
- delete [] y_index;
746
- delete [] active_size_i;
747
- }
748
-
749
- // A coordinate descent algorithm for
750
- // L1-loss and L2-loss SVM dual problems
751
- //
752
- // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
753
- // s.t. 0 <= \alpha_i <= upper_bound_i,
754
- //
755
- // where Qij = yi yj xi^T xj and
756
- // D is a diagonal matrix
757
- //
758
- // In L1-SVM case:
759
- // upper_bound_i = Cp if y_i = 1
760
- // upper_bound_i = Cn if y_i = -1
761
- // D_ii = 0
762
- // In L2-SVM case:
763
- // upper_bound_i = INF
764
- // D_ii = 1/(2*Cp) if y_i = 1
765
- // D_ii = 1/(2*Cn) if y_i = -1
766
- //
767
- // Given:
768
- // x, y, Cp, Cn
769
- // eps is the stopping tolerance
770
- //
771
- // solution will be put in w
772
- //
773
- // See Algorithm 3 of Hsieh et al., ICML 2008
774
-
775
- #undef GETI
776
- #define GETI(i) (y[i]+1)
777
- // To support weights for instances, use GETI(i) (i)
778
-
779
- static void solve_l2r_l1l2_svc(
780
- const problem *prob, double *w, double eps,
781
- double Cp, double Cn, int solver_type)
782
- {
783
- int l = prob->l;
784
- int w_size = prob->n;
785
- int i, s, iter = 0;
786
- double C, d, G;
787
- double *QD = new double[l];
788
- int max_iter = 1000;
789
- int *index = new int[l];
790
- double *alpha = new double[l];
791
- schar *y = new schar[l];
792
- int active_size = l;
793
-
794
- // PG: projected gradient, for shrinking and stopping
795
- double PG;
796
- double PGmax_old = INF;
797
- double PGmin_old = -INF;
798
- double PGmax_new, PGmin_new;
799
-
800
- // default solver_type: L2R_L2LOSS_SVC_DUAL
801
- double diag[3] = {0.5/Cn, 0, 0.5/Cp};
802
- double upper_bound[3] = {INF, 0, INF};
803
- if(solver_type == L2R_L1LOSS_SVC_DUAL)
804
- {
805
- diag[0] = 0;
806
- diag[2] = 0;
807
- upper_bound[0] = Cn;
808
- upper_bound[2] = Cp;
809
- }
810
-
811
- for(i=0; i<l; i++)
812
- {
813
- if(prob->y[i] > 0)
814
- {
815
- y[i] = +1;
816
- }
817
- else
818
- {
819
- y[i] = -1;
820
- }
821
- }
822
-
823
- // Initial alpha can be set here. Note that
824
- // 0 <= alpha[i] <= upper_bound[GETI(i)]
825
- for(i=0; i<l; i++)
826
- alpha[i] = 0;
827
-
828
- for(i=0; i<w_size; i++)
829
- w[i] = 0;
830
- for(i=0; i<l; i++)
831
- {
832
- QD[i] = diag[GETI(i)];
833
-
834
- feature_node * const xi = prob->x[i];
835
- QD[i] += sparse_operator::nrm2_sq(xi);
836
- sparse_operator::axpy(y[i]*alpha[i], xi, w);
837
-
838
- index[i] = i;
839
- }
840
-
841
- while (iter < max_iter)
842
- {
843
- PGmax_new = -INF;
844
- PGmin_new = INF;
845
-
846
- for (i=0; i<active_size; i++)
847
- {
848
- int j = i+rand()%(active_size-i);
849
- swap(index[i], index[j]);
850
- }
851
-
852
- for (s=0; s<active_size; s++)
853
- {
854
- i = index[s];
855
- const schar yi = y[i];
856
- feature_node * const xi = prob->x[i];
857
-
858
- G = yi*sparse_operator::dot(w, xi)-1;
859
-
860
- C = upper_bound[GETI(i)];
861
- G += alpha[i]*diag[GETI(i)];
862
-
863
- PG = 0;
864
- if (alpha[i] == 0)
865
- {
866
- if (G > PGmax_old)
867
- {
868
- active_size--;
869
- swap(index[s], index[active_size]);
870
- s--;
871
- continue;
872
- }
873
- else if (G < 0)
874
- PG = G;
875
- }
876
- else if (alpha[i] == C)
877
- {
878
- if (G < PGmin_old)
879
- {
880
- active_size--;
881
- swap(index[s], index[active_size]);
882
- s--;
883
- continue;
884
- }
885
- else if (G > 0)
886
- PG = G;
887
- }
888
- else
889
- PG = G;
890
-
891
- PGmax_new = max(PGmax_new, PG);
892
- PGmin_new = min(PGmin_new, PG);
893
-
894
- if(fabs(PG) > 1.0e-12)
895
- {
896
- double alpha_old = alpha[i];
897
- alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
898
- d = (alpha[i] - alpha_old)*yi;
899
- sparse_operator::axpy(d, xi, w);
900
- }
901
- }
902
-
903
- iter++;
904
- if(iter % 10 == 0)
905
- info(".");
906
-
907
- if(PGmax_new - PGmin_new <= eps)
908
- {
909
- if(active_size == l)
910
- break;
911
- else
912
- {
913
- active_size = l;
914
- info("*");
915
- PGmax_old = INF;
916
- PGmin_old = -INF;
917
- continue;
918
- }
919
- }
920
- PGmax_old = PGmax_new;
921
- PGmin_old = PGmin_new;
922
- if (PGmax_old <= 0)
923
- PGmax_old = INF;
924
- if (PGmin_old >= 0)
925
- PGmin_old = -INF;
926
- }
927
-
928
- info("\noptimization finished, #iter = %d\n",iter);
929
- if (iter >= max_iter)
930
- info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
931
-
932
- // calculate objective value
933
-
934
- double v = 0;
935
- int nSV = 0;
936
- for(i=0; i<w_size; i++)
937
- v += w[i]*w[i];
938
- for(i=0; i<l; i++)
939
- {
940
- v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
941
- if(alpha[i] > 0)
942
- ++nSV;
943
- }
944
- info("Objective value = %lf\n",v/2);
945
- info("nSV = %d\n",nSV);
946
-
947
- delete [] QD;
948
- delete [] alpha;
949
- delete [] y;
950
- delete [] index;
951
- }
952
-
953
-
954
- // A coordinate descent algorithm for
955
- // L1-loss and L2-loss epsilon-SVR dual problem
956
- //
957
- // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
958
- // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
959
- //
960
- // where Qij = xi^T xj and
961
- // D is a diagonal matrix
962
- //
963
- // In L1-SVM case:
964
- // upper_bound_i = C
965
- // lambda_i = 0
966
- // In L2-SVM case:
967
- // upper_bound_i = INF
968
- // lambda_i = 1/(2*C)
969
- //
970
- // Given:
971
- // x, y, p, C
972
- // eps is the stopping tolerance
973
- //
974
- // solution will be put in w
975
- //
976
- // See Algorithm 4 of Ho and Lin, 2012
977
-
978
- #undef GETI
979
- #define GETI(i) (0)
980
- // To support weights for instances, use GETI(i) (i)
981
-
982
- static void solve_l2r_l1l2_svr(
983
- const problem *prob, double *w, const parameter *param,
984
- int solver_type)
985
- {
986
- int l = prob->l;
987
- double C = param->C;
988
- double p = param->p;
989
- int w_size = prob->n;
990
- double eps = param->eps;
991
- int i, s, iter = 0;
992
- int max_iter = 1000;
993
- int active_size = l;
994
- int *index = new int[l];
995
-
996
- double d, G, H;
997
- double Gmax_old = INF;
998
- double Gmax_new, Gnorm1_new;
999
- double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1000
- double *beta = new double[l];
1001
- double *QD = new double[l];
1002
- double *y = prob->y;
1003
-
1004
- // L2R_L2LOSS_SVR_DUAL
1005
- double lambda[1], upper_bound[1];
1006
- lambda[0] = 0.5/C;
1007
- upper_bound[0] = INF;
1008
-
1009
- if(solver_type == L2R_L1LOSS_SVR_DUAL)
1010
- {
1011
- lambda[0] = 0;
1012
- upper_bound[0] = C;
1013
- }
1014
-
1015
- // Initial beta can be set here. Note that
1016
- // -upper_bound <= beta[i] <= upper_bound
1017
- for(i=0; i<l; i++)
1018
- beta[i] = 0;
1019
-
1020
- for(i=0; i<w_size; i++)
1021
- w[i] = 0;
1022
- for(i=0; i<l; i++)
1023
- {
1024
- feature_node * const xi = prob->x[i];
1025
- QD[i] = sparse_operator::nrm2_sq(xi);
1026
- sparse_operator::axpy(beta[i], xi, w);
1027
-
1028
- index[i] = i;
1029
- }
1030
-
1031
-
1032
- while(iter < max_iter)
1033
- {
1034
- Gmax_new = 0;
1035
- Gnorm1_new = 0;
1036
-
1037
- for(i=0; i<active_size; i++)
1038
- {
1039
- int j = i+rand()%(active_size-i);
1040
- swap(index[i], index[j]);
1041
- }
1042
-
1043
- for(s=0; s<active_size; s++)
1044
- {
1045
- i = index[s];
1046
- G = -y[i] + lambda[GETI(i)]*beta[i];
1047
- H = QD[i] + lambda[GETI(i)];
1048
-
1049
- feature_node * const xi = prob->x[i];
1050
- G += sparse_operator::dot(w, xi);
1051
-
1052
- double Gp = G+p;
1053
- double Gn = G-p;
1054
- double violation = 0;
1055
- if(beta[i] == 0)
1056
- {
1057
- if(Gp < 0)
1058
- violation = -Gp;
1059
- else if(Gn > 0)
1060
- violation = Gn;
1061
- else if(Gp>Gmax_old && Gn<-Gmax_old)
1062
- {
1063
- active_size--;
1064
- swap(index[s], index[active_size]);
1065
- s--;
1066
- continue;
1067
- }
1068
- }
1069
- else if(beta[i] >= upper_bound[GETI(i)])
1070
- {
1071
- if(Gp > 0)
1072
- violation = Gp;
1073
- else if(Gp < -Gmax_old)
1074
- {
1075
- active_size--;
1076
- swap(index[s], index[active_size]);
1077
- s--;
1078
- continue;
1079
- }
1080
- }
1081
- else if(beta[i] <= -upper_bound[GETI(i)])
1082
- {
1083
- if(Gn < 0)
1084
- violation = -Gn;
1085
- else if(Gn > Gmax_old)
1086
- {
1087
- active_size--;
1088
- swap(index[s], index[active_size]);
1089
- s--;
1090
- continue;
1091
- }
1092
- }
1093
- else if(beta[i] > 0)
1094
- violation = fabs(Gp);
1095
- else
1096
- violation = fabs(Gn);
1097
-
1098
- Gmax_new = max(Gmax_new, violation);
1099
- Gnorm1_new += violation;
1100
-
1101
- // obtain Newton direction d
1102
- if(Gp < H*beta[i])
1103
- d = -Gp/H;
1104
- else if(Gn > H*beta[i])
1105
- d = -Gn/H;
1106
- else
1107
- d = -beta[i];
1108
-
1109
- if(fabs(d) < 1.0e-12)
1110
- continue;
1111
-
1112
- double beta_old = beta[i];
1113
- beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1114
- d = beta[i]-beta_old;
1115
-
1116
- if(d != 0)
1117
- sparse_operator::axpy(d, xi, w);
1118
- }
1119
-
1120
- if(iter == 0)
1121
- Gnorm1_init = Gnorm1_new;
1122
- iter++;
1123
- if(iter % 10 == 0)
1124
- info(".");
1125
-
1126
- if(Gnorm1_new <= eps*Gnorm1_init)
1127
- {
1128
- if(active_size == l)
1129
- break;
1130
- else
1131
- {
1132
- active_size = l;
1133
- info("*");
1134
- Gmax_old = INF;
1135
- continue;
1136
- }
1137
- }
1138
-
1139
- Gmax_old = Gmax_new;
1140
- }
1141
-
1142
- info("\noptimization finished, #iter = %d\n", iter);
1143
- if(iter >= max_iter)
1144
- info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1145
-
1146
- // calculate objective value
1147
- double v = 0;
1148
- int nSV = 0;
1149
- for(i=0; i<w_size; i++)
1150
- v += w[i]*w[i];
1151
- v = 0.5*v;
1152
- for(i=0; i<l; i++)
1153
- {
1154
- v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1155
- if(beta[i] != 0)
1156
- nSV++;
1157
- }
1158
-
1159
- info("Objective value = %lf\n", v);
1160
- info("nSV = %d\n",nSV);
1161
-
1162
- delete [] beta;
1163
- delete [] QD;
1164
- delete [] index;
1165
- }
1166
-
1167
-
1168
- // A coordinate descent algorithm for
1169
- // the dual of L2-regularized logistic regression problems
1170
- //
1171
- // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1172
- // s.t. 0 <= \alpha_i <= upper_bound_i,
1173
- //
1174
- // where Qij = yi yj xi^T xj and
1175
- // upper_bound_i = Cp if y_i = 1
1176
- // upper_bound_i = Cn if y_i = -1
1177
- //
1178
- // Given:
1179
- // x, y, Cp, Cn
1180
- // eps is the stopping tolerance
1181
- //
1182
- // solution will be put in w
1183
- //
1184
- // See Algorithm 5 of Yu et al., MLJ 2010
1185
-
1186
- #undef GETI
1187
- #define GETI(i) (y[i]+1)
1188
- // To support weights for instances, use GETI(i) (i)
1189
-
1190
- void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1191
- {
1192
- int l = prob->l;
1193
- int w_size = prob->n;
1194
- int i, s, iter = 0;
1195
- double *xTx = new double[l];
1196
- int max_iter = 1000;
1197
- int *index = new int[l];
1198
- double *alpha = new double[2*l]; // store alpha and C - alpha
1199
- schar *y = new schar[l];
1200
- int max_inner_iter = 100; // for inner Newton
1201
- double innereps = 1e-2;
1202
- double innereps_min = min(1e-8, eps);
1203
- double upper_bound[3] = {Cn, 0, Cp};
1204
-
1205
- for(i=0; i<l; i++)
1206
- {
1207
- if(prob->y[i] > 0)
1208
- {
1209
- y[i] = +1;
1210
- }
1211
- else
1212
- {
1213
- y[i] = -1;
1214
- }
1215
- }
1216
-
1217
- // Initial alpha can be set here. Note that
1218
- // 0 < alpha[i] < upper_bound[GETI(i)]
1219
- // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1220
- for(i=0; i<l; i++)
1221
- {
1222
- alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1223
- alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1224
- }
1225
-
1226
- for(i=0; i<w_size; i++)
1227
- w[i] = 0;
1228
- for(i=0; i<l; i++)
1229
- {
1230
- feature_node * const xi = prob->x[i];
1231
- xTx[i] = sparse_operator::nrm2_sq(xi);
1232
- sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1233
- index[i] = i;
1234
- }
1235
-
1236
- while (iter < max_iter)
1237
- {
1238
- for (i=0; i<l; i++)
1239
- {
1240
- int j = i+rand()%(l-i);
1241
- swap(index[i], index[j]);
1242
- }
1243
- int newton_iter = 0;
1244
- double Gmax = 0;
1245
- for (s=0; s<l; s++)
1246
- {
1247
- i = index[s];
1248
- const schar yi = y[i];
1249
- double C = upper_bound[GETI(i)];
1250
- double ywTx = 0, xisq = xTx[i];
1251
- feature_node * const xi = prob->x[i];
1252
- ywTx = yi*sparse_operator::dot(w, xi);
1253
- double a = xisq, b = ywTx;
1254
-
1255
- // Decide to minimize g_1(z) or g_2(z)
1256
- int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1257
- if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1258
- {
1259
- ind1 = 2*i+1;
1260
- ind2 = 2*i;
1261
- sign = -1;
1262
- }
1263
-
1264
- // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1265
- double alpha_old = alpha[ind1];
1266
- double z = alpha_old;
1267
- if(C - z < 0.5 * C)
1268
- z = 0.1*z;
1269
- double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1270
- Gmax = max(Gmax, fabs(gp));
1271
-
1272
- // Newton method on the sub-problem
1273
- const double eta = 0.1; // xi in the paper
1274
- int inner_iter = 0;
1275
- while (inner_iter <= max_inner_iter)
1276
- {
1277
- if(fabs(gp) < innereps)
1278
- break;
1279
- double gpp = a + C/(C-z)/z;
1280
- double tmpz = z - gp/gpp;
1281
- if(tmpz <= 0)
1282
- z *= eta;
1283
- else // tmpz in (0, C)
1284
- z = tmpz;
1285
- gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1286
- newton_iter++;
1287
- inner_iter++;
1288
- }
1289
-
1290
- if(inner_iter > 0) // update w
1291
- {
1292
- alpha[ind1] = z;
1293
- alpha[ind2] = C-z;
1294
- sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1295
- }
1296
- }
1297
-
1298
- iter++;
1299
- if(iter % 10 == 0)
1300
- info(".");
1301
-
1302
- if(Gmax < eps)
1303
- break;
1304
-
1305
- if(newton_iter <= l/10)
1306
- innereps = max(innereps_min, 0.1*innereps);
1307
-
1308
- }
1309
-
1310
- info("\noptimization finished, #iter = %d\n",iter);
1311
- if (iter >= max_iter)
1312
- info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1313
-
1314
- // calculate objective value
1315
-
1316
- double v = 0;
1317
- for(i=0; i<w_size; i++)
1318
- v += w[i] * w[i];
1319
- v *= 0.5;
1320
- for(i=0; i<l; i++)
1321
- v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1322
- - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1323
- info("Objective value = %lf\n", v);
1324
-
1325
- delete [] xTx;
1326
- delete [] alpha;
1327
- delete [] y;
1328
- delete [] index;
1329
- }
1330
-
1331
- // A coordinate descent algorithm for
1332
- // L1-regularized L2-loss support vector classification
1333
- //
1334
- // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1335
- //
1336
- // Given:
1337
- // x, y, Cp, Cn
1338
- // eps is the stopping tolerance
1339
- //
1340
- // solution will be put in w
1341
- //
1342
- // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1343
-
1344
- #undef GETI
1345
- #define GETI(i) (y[i]+1)
1346
- // To support weights for instances, use GETI(i) (i)
1347
-
1348
- static void solve_l1r_l2_svc(
1349
- problem *prob_col, double *w, double eps,
1350
- double Cp, double Cn)
1351
- {
1352
- int l = prob_col->l;
1353
- int w_size = prob_col->n;
1354
- int j, s, iter = 0;
1355
- int max_iter = 1000;
1356
- int active_size = w_size;
1357
- int max_num_linesearch = 20;
1358
-
1359
- double sigma = 0.01;
1360
- double d, G_loss, G, H;
1361
- double Gmax_old = INF;
1362
- double Gmax_new, Gnorm1_new;
1363
- double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1364
- double d_old, d_diff;
1365
- double loss_old, loss_new;
1366
- double appxcond, cond;
1367
-
1368
- int *index = new int[w_size];
1369
- schar *y = new schar[l];
1370
- double *b = new double[l]; // b = 1-ywTx
1371
- double *xj_sq = new double[w_size];
1372
- feature_node *x;
1373
-
1374
- double C[3] = {Cn,0,Cp};
1375
-
1376
- // Initial w can be set here.
1377
- for(j=0; j<w_size; j++)
1378
- w[j] = 0;
1379
-
1380
- for(j=0; j<l; j++)
1381
- {
1382
- b[j] = 1;
1383
- if(prob_col->y[j] > 0)
1384
- y[j] = 1;
1385
- else
1386
- y[j] = -1;
1387
- }
1388
- for(j=0; j<w_size; j++)
1389
- {
1390
- index[j] = j;
1391
- xj_sq[j] = 0;
1392
- x = prob_col->x[j];
1393
- while(x->index != -1)
1394
- {
1395
- int ind = x->index-1;
1396
- x->value *= y[ind]; // x->value stores yi*xij
1397
- double val = x->value;
1398
- b[ind] -= w[j]*val;
1399
- xj_sq[j] += C[GETI(ind)]*val*val;
1400
- x++;
1401
- }
1402
- }
1403
-
1404
- while(iter < max_iter)
1405
- {
1406
- Gmax_new = 0;
1407
- Gnorm1_new = 0;
1408
-
1409
- for(j=0; j<active_size; j++)
1410
- {
1411
- int i = j+rand()%(active_size-j);
1412
- swap(index[i], index[j]);
1413
- }
1414
-
1415
- for(s=0; s<active_size; s++)
1416
- {
1417
- j = index[s];
1418
- G_loss = 0;
1419
- H = 0;
1420
-
1421
- x = prob_col->x[j];
1422
- while(x->index != -1)
1423
- {
1424
- int ind = x->index-1;
1425
- if(b[ind] > 0)
1426
- {
1427
- double val = x->value;
1428
- double tmp = C[GETI(ind)]*val;
1429
- G_loss -= tmp*b[ind];
1430
- H += tmp*val;
1431
- }
1432
- x++;
1433
- }
1434
- G_loss *= 2;
1435
-
1436
- G = G_loss;
1437
- H *= 2;
1438
- H = max(H, 1e-12);
1439
-
1440
- double Gp = G+1;
1441
- double Gn = G-1;
1442
- double violation = 0;
1443
- if(w[j] == 0)
1444
- {
1445
- if(Gp < 0)
1446
- violation = -Gp;
1447
- else if(Gn > 0)
1448
- violation = Gn;
1449
- else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1450
- {
1451
- active_size--;
1452
- swap(index[s], index[active_size]);
1453
- s--;
1454
- continue;
1455
- }
1456
- }
1457
- else if(w[j] > 0)
1458
- violation = fabs(Gp);
1459
- else
1460
- violation = fabs(Gn);
1461
-
1462
- Gmax_new = max(Gmax_new, violation);
1463
- Gnorm1_new += violation;
1464
-
1465
- // obtain Newton direction d
1466
- if(Gp < H*w[j])
1467
- d = -Gp/H;
1468
- else if(Gn > H*w[j])
1469
- d = -Gn/H;
1470
- else
1471
- d = -w[j];
1472
-
1473
- if(fabs(d) < 1.0e-12)
1474
- continue;
1475
-
1476
- double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1477
- d_old = 0;
1478
- int num_linesearch;
1479
- for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1480
- {
1481
- d_diff = d_old - d;
1482
- cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1483
-
1484
- appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1485
- if(appxcond <= 0)
1486
- {
1487
- x = prob_col->x[j];
1488
- sparse_operator::axpy(d_diff, x, b);
1489
- break;
1490
- }
1491
-
1492
- if(num_linesearch == 0)
1493
- {
1494
- loss_old = 0;
1495
- loss_new = 0;
1496
- x = prob_col->x[j];
1497
- while(x->index != -1)
1498
- {
1499
- int ind = x->index-1;
1500
- if(b[ind] > 0)
1501
- loss_old += C[GETI(ind)]*b[ind]*b[ind];
1502
- double b_new = b[ind] + d_diff*x->value;
1503
- b[ind] = b_new;
1504
- if(b_new > 0)
1505
- loss_new += C[GETI(ind)]*b_new*b_new;
1506
- x++;
1507
- }
1508
- }
1509
- else
1510
- {
1511
- loss_new = 0;
1512
- x = prob_col->x[j];
1513
- while(x->index != -1)
1514
- {
1515
- int ind = x->index-1;
1516
- double b_new = b[ind] + d_diff*x->value;
1517
- b[ind] = b_new;
1518
- if(b_new > 0)
1519
- loss_new += C[GETI(ind)]*b_new*b_new;
1520
- x++;
1521
- }
1522
- }
1523
-
1524
- cond = cond + loss_new - loss_old;
1525
- if(cond <= 0)
1526
- break;
1527
- else
1528
- {
1529
- d_old = d;
1530
- d *= 0.5;
1531
- delta *= 0.5;
1532
- }
1533
- }
1534
-
1535
- w[j] += d;
1536
-
1537
- // recompute b[] if line search takes too many steps
1538
- if(num_linesearch >= max_num_linesearch)
1539
- {
1540
- info("#");
1541
- for(int i=0; i<l; i++)
1542
- b[i] = 1;
1543
-
1544
- for(int i=0; i<w_size; i++)
1545
- {
1546
- if(w[i]==0) continue;
1547
- x = prob_col->x[i];
1548
- sparse_operator::axpy(-w[i], x, b);
1549
- }
1550
- }
1551
- }
1552
-
1553
- if(iter == 0)
1554
- Gnorm1_init = Gnorm1_new;
1555
- iter++;
1556
- if(iter % 10 == 0)
1557
- info(".");
1558
-
1559
- if(Gnorm1_new <= eps*Gnorm1_init)
1560
- {
1561
- if(active_size == w_size)
1562
- break;
1563
- else
1564
- {
1565
- active_size = w_size;
1566
- info("*");
1567
- Gmax_old = INF;
1568
- continue;
1569
- }
1570
- }
1571
-
1572
- Gmax_old = Gmax_new;
1573
- }
1574
-
1575
- info("\noptimization finished, #iter = %d\n", iter);
1576
- if(iter >= max_iter)
1577
- info("\nWARNING: reaching max number of iterations\n");
1578
-
1579
- // calculate objective value
1580
-
1581
- double v = 0;
1582
- int nnz = 0;
1583
- for(j=0; j<w_size; j++)
1584
- {
1585
- x = prob_col->x[j];
1586
- while(x->index != -1)
1587
- {
1588
- x->value *= prob_col->y[x->index-1]; // restore x->value
1589
- x++;
1590
- }
1591
- if(w[j] != 0)
1592
- {
1593
- v += fabs(w[j]);
1594
- nnz++;
1595
- }
1596
- }
1597
- for(j=0; j<l; j++)
1598
- if(b[j] > 0)
1599
- v += C[GETI(j)]*b[j]*b[j];
1600
-
1601
- info("Objective value = %lf\n", v);
1602
- info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1603
-
1604
- delete [] index;
1605
- delete [] y;
1606
- delete [] b;
1607
- delete [] xj_sq;
1608
- }
1609
-
1610
- // A coordinate descent algorithm for
1611
- // L1-regularized logistic regression problems
1612
- //
1613
- // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1614
- //
1615
- // Given:
1616
- // x, y, Cp, Cn
1617
- // eps is the stopping tolerance
1618
- //
1619
- // solution will be put in w
1620
- //
1621
- // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1622
-
1623
- #undef GETI
1624
- #define GETI(i) (y[i]+1)
1625
- // To support weights for instances, use GETI(i) (i)
1626
-
1627
- static void solve_l1r_lr(
1628
- const problem *prob_col, double *w, double eps,
1629
- double Cp, double Cn)
1630
- {
1631
- int l = prob_col->l;
1632
- int w_size = prob_col->n;
1633
- int j, s, newton_iter=0, iter=0;
1634
- int max_newton_iter = 100;
1635
- int max_iter = 1000;
1636
- int max_num_linesearch = 20;
1637
- int active_size;
1638
- int QP_active_size;
1639
-
1640
- double nu = 1e-12;
1641
- double inner_eps = 1;
1642
- double sigma = 0.01;
1643
- double w_norm, w_norm_new;
1644
- double z, G, H;
1645
- double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1646
- double Gmax_old = INF;
1647
- double Gmax_new, Gnorm1_new;
1648
- double QP_Gmax_old = INF;
1649
- double QP_Gmax_new, QP_Gnorm1_new;
1650
- double delta, negsum_xTd, cond;
1651
-
1652
- int *index = new int[w_size];
1653
- schar *y = new schar[l];
1654
- double *Hdiag = new double[w_size];
1655
- double *Grad = new double[w_size];
1656
- double *wpd = new double[w_size];
1657
- double *xjneg_sum = new double[w_size];
1658
- double *xTd = new double[l];
1659
- double *exp_wTx = new double[l];
1660
- double *exp_wTx_new = new double[l];
1661
- double *tau = new double[l];
1662
- double *D = new double[l];
1663
- feature_node *x;
1664
-
1665
- double C[3] = {Cn,0,Cp};
1666
-
1667
- // Initial w can be set here.
1668
- for(j=0; j<w_size; j++)
1669
- w[j] = 0;
1670
-
1671
- for(j=0; j<l; j++)
1672
- {
1673
- if(prob_col->y[j] > 0)
1674
- y[j] = 1;
1675
- else
1676
- y[j] = -1;
1677
-
1678
- exp_wTx[j] = 0;
1679
- }
1680
-
1681
- w_norm = 0;
1682
- for(j=0; j<w_size; j++)
1683
- {
1684
- w_norm += fabs(w[j]);
1685
- wpd[j] = w[j];
1686
- index[j] = j;
1687
- xjneg_sum[j] = 0;
1688
- x = prob_col->x[j];
1689
- while(x->index != -1)
1690
- {
1691
- int ind = x->index-1;
1692
- double val = x->value;
1693
- exp_wTx[ind] += w[j]*val;
1694
- if(y[ind] == -1)
1695
- xjneg_sum[j] += C[GETI(ind)]*val;
1696
- x++;
1697
- }
1698
- }
1699
- for(j=0; j<l; j++)
1700
- {
1701
- exp_wTx[j] = exp(exp_wTx[j]);
1702
- double tau_tmp = 1/(1+exp_wTx[j]);
1703
- tau[j] = C[GETI(j)]*tau_tmp;
1704
- D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1705
- }
1706
-
1707
- while(newton_iter < max_newton_iter)
1708
- {
1709
- Gmax_new = 0;
1710
- Gnorm1_new = 0;
1711
- active_size = w_size;
1712
-
1713
- for(s=0; s<active_size; s++)
1714
- {
1715
- j = index[s];
1716
- Hdiag[j] = nu;
1717
- Grad[j] = 0;
1718
-
1719
- double tmp = 0;
1720
- x = prob_col->x[j];
1721
- while(x->index != -1)
1722
- {
1723
- int ind = x->index-1;
1724
- Hdiag[j] += x->value*x->value*D[ind];
1725
- tmp += x->value*tau[ind];
1726
- x++;
1727
- }
1728
- Grad[j] = -tmp + xjneg_sum[j];
1729
-
1730
- double Gp = Grad[j]+1;
1731
- double Gn = Grad[j]-1;
1732
- double violation = 0;
1733
- if(w[j] == 0)
1734
- {
1735
- if(Gp < 0)
1736
- violation = -Gp;
1737
- else if(Gn > 0)
1738
- violation = Gn;
1739
- //outer-level shrinking
1740
- else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1741
- {
1742
- active_size--;
1743
- swap(index[s], index[active_size]);
1744
- s--;
1745
- continue;
1746
- }
1747
- }
1748
- else if(w[j] > 0)
1749
- violation = fabs(Gp);
1750
- else
1751
- violation = fabs(Gn);
1752
-
1753
- Gmax_new = max(Gmax_new, violation);
1754
- Gnorm1_new += violation;
1755
- }
1756
-
1757
- if(newton_iter == 0)
1758
- Gnorm1_init = Gnorm1_new;
1759
-
1760
- if(Gnorm1_new <= eps*Gnorm1_init)
1761
- break;
1762
-
1763
- iter = 0;
1764
- QP_Gmax_old = INF;
1765
- QP_active_size = active_size;
1766
-
1767
- for(int i=0; i<l; i++)
1768
- xTd[i] = 0;
1769
-
1770
- // optimize QP over wpd
1771
- while(iter < max_iter)
1772
- {
1773
- QP_Gmax_new = 0;
1774
- QP_Gnorm1_new = 0;
1775
-
1776
- for(j=0; j<QP_active_size; j++)
1777
- {
1778
- int i = j+rand()%(QP_active_size-j);
1779
- swap(index[i], index[j]);
1780
- }
1781
-
1782
- for(s=0; s<QP_active_size; s++)
1783
- {
1784
- j = index[s];
1785
- H = Hdiag[j];
1786
-
1787
- x = prob_col->x[j];
1788
- G = Grad[j] + (wpd[j]-w[j])*nu;
1789
- while(x->index != -1)
1790
- {
1791
- int ind = x->index-1;
1792
- G += x->value*D[ind]*xTd[ind];
1793
- x++;
1794
- }
1795
-
1796
- double Gp = G+1;
1797
- double Gn = G-1;
1798
- double violation = 0;
1799
- if(wpd[j] == 0)
1800
- {
1801
- if(Gp < 0)
1802
- violation = -Gp;
1803
- else if(Gn > 0)
1804
- violation = Gn;
1805
- //inner-level shrinking
1806
- else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1807
- {
1808
- QP_active_size--;
1809
- swap(index[s], index[QP_active_size]);
1810
- s--;
1811
- continue;
1812
- }
1813
- }
1814
- else if(wpd[j] > 0)
1815
- violation = fabs(Gp);
1816
- else
1817
- violation = fabs(Gn);
1818
-
1819
- QP_Gmax_new = max(QP_Gmax_new, violation);
1820
- QP_Gnorm1_new += violation;
1821
-
1822
- // obtain solution of one-variable problem
1823
- if(Gp < H*wpd[j])
1824
- z = -Gp/H;
1825
- else if(Gn > H*wpd[j])
1826
- z = -Gn/H;
1827
- else
1828
- z = -wpd[j];
1829
-
1830
- if(fabs(z) < 1.0e-12)
1831
- continue;
1832
- z = min(max(z,-10.0),10.0);
1833
-
1834
- wpd[j] += z;
1835
-
1836
- x = prob_col->x[j];
1837
- sparse_operator::axpy(z, x, xTd);
1838
- }
1839
-
1840
- iter++;
1841
-
1842
- if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1843
- {
1844
- //inner stopping
1845
- if(QP_active_size == active_size)
1846
- break;
1847
- //active set reactivation
1848
- else
1849
- {
1850
- QP_active_size = active_size;
1851
- QP_Gmax_old = INF;
1852
- continue;
1853
- }
1854
- }
1855
-
1856
- QP_Gmax_old = QP_Gmax_new;
1857
- }
1858
-
1859
- if(iter >= max_iter)
1860
- info("WARNING: reaching max number of inner iterations\n");
1861
-
1862
- delta = 0;
1863
- w_norm_new = 0;
1864
- for(j=0; j<w_size; j++)
1865
- {
1866
- delta += Grad[j]*(wpd[j]-w[j]);
1867
- if(wpd[j] != 0)
1868
- w_norm_new += fabs(wpd[j]);
1869
- }
1870
- delta += (w_norm_new-w_norm);
1871
-
1872
- negsum_xTd = 0;
1873
- for(int i=0; i<l; i++)
1874
- if(y[i] == -1)
1875
- negsum_xTd += C[GETI(i)]*xTd[i];
1876
-
1877
- int num_linesearch;
1878
- for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1879
- {
1880
- cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1881
-
1882
- for(int i=0; i<l; i++)
1883
- {
1884
- double exp_xTd = exp(xTd[i]);
1885
- exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1886
- cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1887
- }
1888
-
1889
- if(cond <= 0)
1890
- {
1891
- w_norm = w_norm_new;
1892
- for(j=0; j<w_size; j++)
1893
- w[j] = wpd[j];
1894
- for(int i=0; i<l; i++)
1895
- {
1896
- exp_wTx[i] = exp_wTx_new[i];
1897
- double tau_tmp = 1/(1+exp_wTx[i]);
1898
- tau[i] = C[GETI(i)]*tau_tmp;
1899
- D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1900
- }
1901
- break;
1902
- }
1903
- else
1904
- {
1905
- w_norm_new = 0;
1906
- for(j=0; j<w_size; j++)
1907
- {
1908
- wpd[j] = (w[j]+wpd[j])*0.5;
1909
- if(wpd[j] != 0)
1910
- w_norm_new += fabs(wpd[j]);
1911
- }
1912
- delta *= 0.5;
1913
- negsum_xTd *= 0.5;
1914
- for(int i=0; i<l; i++)
1915
- xTd[i] *= 0.5;
1916
- }
1917
- }
1918
-
1919
- // Recompute some info due to too many line search steps
1920
- if(num_linesearch >= max_num_linesearch)
1921
- {
1922
- for(int i=0; i<l; i++)
1923
- exp_wTx[i] = 0;
1924
-
1925
- for(int i=0; i<w_size; i++)
1926
- {
1927
- if(w[i]==0) continue;
1928
- x = prob_col->x[i];
1929
- sparse_operator::axpy(w[i], x, exp_wTx);
1930
- }
1931
-
1932
- for(int i=0; i<l; i++)
1933
- exp_wTx[i] = exp(exp_wTx[i]);
1934
- }
1935
-
1936
- if(iter == 1)
1937
- inner_eps *= 0.25;
1938
-
1939
- newton_iter++;
1940
- Gmax_old = Gmax_new;
1941
-
1942
- info("iter %3d #CD cycles %d\n", newton_iter, iter);
1943
- }
1944
-
1945
- info("=========================\n");
1946
- info("optimization finished, #iter = %d\n", newton_iter);
1947
- if(newton_iter >= max_newton_iter)
1948
- info("WARNING: reaching max number of iterations\n");
1949
-
1950
- // calculate objective value
1951
-
1952
- double v = 0;
1953
- int nnz = 0;
1954
- for(j=0; j<w_size; j++)
1955
- if(w[j] != 0)
1956
- {
1957
- v += fabs(w[j]);
1958
- nnz++;
1959
- }
1960
- for(j=0; j<l; j++)
1961
- if(y[j] == 1)
1962
- v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1963
- else
1964
- v += C[GETI(j)]*log(1+exp_wTx[j]);
1965
-
1966
- info("Objective value = %lf\n", v);
1967
- info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1968
-
1969
- delete [] index;
1970
- delete [] y;
1971
- delete [] Hdiag;
1972
- delete [] Grad;
1973
- delete [] wpd;
1974
- delete [] xjneg_sum;
1975
- delete [] xTd;
1976
- delete [] exp_wTx;
1977
- delete [] exp_wTx_new;
1978
- delete [] tau;
1979
- delete [] D;
1980
- }
1981
-
1982
- // transpose matrix X from row format to column format
1983
- static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
1984
- {
1985
- int i;
1986
- int l = prob->l;
1987
- int n = prob->n;
1988
- size_t nnz = 0;
1989
- size_t *col_ptr = new size_t [n+1];
1990
- feature_node *x_space;
1991
- prob_col->l = l;
1992
- prob_col->n = n;
1993
- prob_col->y = new double[l];
1994
- prob_col->x = new feature_node*[n];
1995
-
1996
- for(i=0; i<l; i++)
1997
- prob_col->y[i] = prob->y[i];
1998
-
1999
- for(i=0; i<n+1; i++)
2000
- col_ptr[i] = 0;
2001
- for(i=0; i<l; i++)
2002
- {
2003
- feature_node *x = prob->x[i];
2004
- while(x->index != -1)
2005
- {
2006
- nnz++;
2007
- col_ptr[x->index]++;
2008
- x++;
2009
- }
2010
- }
2011
- for(i=1; i<n+1; i++)
2012
- col_ptr[i] += col_ptr[i-1] + 1;
2013
-
2014
- x_space = new feature_node[nnz+n];
2015
- for(i=0; i<n; i++)
2016
- prob_col->x[i] = &x_space[col_ptr[i]];
2017
-
2018
- for(i=0; i<l; i++)
2019
- {
2020
- feature_node *x = prob->x[i];
2021
- while(x->index != -1)
2022
- {
2023
- int ind = x->index-1;
2024
- x_space[col_ptr[ind]].index = i+1; // starts from 1
2025
- x_space[col_ptr[ind]].value = x->value;
2026
- col_ptr[ind]++;
2027
- x++;
2028
- }
2029
- }
2030
- for(i=0; i<n; i++)
2031
- x_space[col_ptr[i]].index = -1;
2032
-
2033
- *x_space_ret = x_space;
2034
-
2035
- delete [] col_ptr;
2036
- }
2037
-
2038
- // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2039
- // perm, length l, must be allocated before calling this subroutine
2040
- static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2041
- {
2042
- int l = prob->l;
2043
- int max_nr_class = 16;
2044
- int nr_class = 0;
2045
- int *label = Malloc(int,max_nr_class);
2046
- int *count = Malloc(int,max_nr_class);
2047
- int *data_label = Malloc(int,l);
2048
- int i;
2049
-
2050
- for(i=0;i<l;i++)
2051
- {
2052
- int this_label = (int)prob->y[i];
2053
- int j;
2054
- for(j=0;j<nr_class;j++)
2055
- {
2056
- if(this_label == label[j])
2057
- {
2058
- ++count[j];
2059
- break;
2060
- }
2061
- }
2062
- data_label[i] = j;
2063
- if(j == nr_class)
2064
- {
2065
- if(nr_class == max_nr_class)
2066
- {
2067
- max_nr_class *= 2;
2068
- label = (int *)realloc(label,max_nr_class*sizeof(int));
2069
- count = (int *)realloc(count,max_nr_class*sizeof(int));
2070
- }
2071
- label[nr_class] = this_label;
2072
- count[nr_class] = 1;
2073
- ++nr_class;
2074
- }
2075
- }
2076
-
2077
- //
2078
- // Labels are ordered by their first occurrence in the training set.
2079
- // However, for two-class sets with -1/+1 labels and -1 appears first,
2080
- // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2081
- //
2082
- if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2083
- {
2084
- swap(label[0],label[1]);
2085
- swap(count[0],count[1]);
2086
- for(i=0;i<l;i++)
2087
- {
2088
- if(data_label[i] == 0)
2089
- data_label[i] = 1;
2090
- else
2091
- data_label[i] = 0;
2092
- }
2093
- }
2094
-
2095
- int *start = Malloc(int,nr_class);
2096
- start[0] = 0;
2097
- for(i=1;i<nr_class;i++)
2098
- start[i] = start[i-1]+count[i-1];
2099
- for(i=0;i<l;i++)
2100
- {
2101
- perm[start[data_label[i]]] = i;
2102
- ++start[data_label[i]];
2103
- }
2104
- start[0] = 0;
2105
- for(i=1;i<nr_class;i++)
2106
- start[i] = start[i-1]+count[i-1];
2107
-
2108
- *nr_class_ret = nr_class;
2109
- *label_ret = label;
2110
- *start_ret = start;
2111
- *count_ret = count;
2112
- free(data_label);
2113
- }
2114
-
2115
- static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2116
- {
2117
- //inner and outer tolerances for TRON
2118
- double eps = param->eps;
2119
- double eps_cg = 0.1;
2120
- if(param->init_sol != NULL)
2121
- eps_cg = 0.5;
2122
-
2123
- int pos = 0;
2124
- int neg = 0;
2125
- for(int i=0;i<prob->l;i++)
2126
- if(prob->y[i] > 0)
2127
- pos++;
2128
- neg = prob->l - pos;
2129
- double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2130
-
2131
- function *fun_obj=NULL;
2132
- switch(param->solver_type)
2133
- {
2134
- case L2R_LR:
2135
- {
2136
- double *C = new double[prob->l];
2137
- for(int i = 0; i < prob->l; i++)
2138
- {
2139
- if(prob->y[i] > 0)
2140
- C[i] = Cp;
2141
- else
2142
- C[i] = Cn;
2143
- }
2144
- fun_obj=new l2r_lr_fun(prob, C);
2145
- TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2146
- tron_obj.set_print_string(liblinear_print_string);
2147
- tron_obj.tron(w);
2148
- delete fun_obj;
2149
- delete[] C;
2150
- break;
2151
- }
2152
- case L2R_L2LOSS_SVC:
2153
- {
2154
- double *C = new double[prob->l];
2155
- for(int i = 0; i < prob->l; i++)
2156
- {
2157
- if(prob->y[i] > 0)
2158
- C[i] = Cp;
2159
- else
2160
- C[i] = Cn;
2161
- }
2162
- fun_obj=new l2r_l2_svc_fun(prob, C);
2163
- TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2164
- tron_obj.set_print_string(liblinear_print_string);
2165
- tron_obj.tron(w);
2166
- delete fun_obj;
2167
- delete[] C;
2168
- break;
2169
- }
2170
- case L2R_L2LOSS_SVC_DUAL:
2171
- solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2172
- break;
2173
- case L2R_L1LOSS_SVC_DUAL:
2174
- solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2175
- break;
2176
- case L1R_L2LOSS_SVC:
2177
- {
2178
- problem prob_col;
2179
- feature_node *x_space = NULL;
2180
- transpose(prob, &x_space ,&prob_col);
2181
- solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2182
- delete [] prob_col.y;
2183
- delete [] prob_col.x;
2184
- delete [] x_space;
2185
- break;
2186
- }
2187
- case L1R_LR:
2188
- {
2189
- problem prob_col;
2190
- feature_node *x_space = NULL;
2191
- transpose(prob, &x_space ,&prob_col);
2192
- solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2193
- delete [] prob_col.y;
2194
- delete [] prob_col.x;
2195
- delete [] x_space;
2196
- break;
2197
- }
2198
- case L2R_LR_DUAL:
2199
- solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2200
- break;
2201
- case L2R_L2LOSS_SVR:
2202
- {
2203
- double *C = new double[prob->l];
2204
- for(int i = 0; i < prob->l; i++)
2205
- C[i] = param->C;
2206
-
2207
- fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2208
- TRON tron_obj(fun_obj, param->eps);
2209
- tron_obj.set_print_string(liblinear_print_string);
2210
- tron_obj.tron(w);
2211
- delete fun_obj;
2212
- delete[] C;
2213
- break;
2214
-
2215
- }
2216
- case L2R_L1LOSS_SVR_DUAL:
2217
- solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2218
- break;
2219
- case L2R_L2LOSS_SVR_DUAL:
2220
- solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2221
- break;
2222
- default:
2223
- fprintf(stderr, "ERROR: unknown solver_type\n");
2224
- break;
2225
- }
2226
- }
2227
-
2228
- // Calculate the initial C for parameter selection
2229
- static double calc_start_C(const problem *prob, const parameter *param)
2230
- {
2231
- int i;
2232
- double xTx,max_xTx;
2233
- max_xTx = 0;
2234
- for(i=0; i<prob->l; i++)
2235
- {
2236
- xTx = 0;
2237
- feature_node *xi=prob->x[i];
2238
- while(xi->index != -1)
2239
- {
2240
- double val = xi->value;
2241
- xTx += val*val;
2242
- xi++;
2243
- }
2244
- if(xTx > max_xTx)
2245
- max_xTx = xTx;
2246
- }
2247
-
2248
- double min_C = 1.0;
2249
- if(param->solver_type == L2R_LR)
2250
- min_C = 1.0 / (prob->l * max_xTx);
2251
- else if(param->solver_type == L2R_L2LOSS_SVC)
2252
- min_C = 1.0 / (2 * prob->l * max_xTx);
2253
-
2254
- return pow( 2, floor(log(min_C) / log(2.0)) );
2255
- }
2256
-
2257
-
2258
- //
2259
- // Interface functions
2260
- //
2261
- model* train(const problem *prob, const parameter *param)
2262
- {
2263
- int i,j;
2264
- int l = prob->l;
2265
- int n = prob->n;
2266
- int w_size = prob->n;
2267
- model *model_ = Malloc(model,1);
2268
-
2269
- if(prob->bias>=0)
2270
- model_->nr_feature=n-1;
2271
- else
2272
- model_->nr_feature=n;
2273
- model_->param = *param;
2274
- model_->bias = prob->bias;
2275
-
2276
- if(check_regression_model(model_))
2277
- {
2278
- model_->w = Malloc(double, w_size);
2279
- for(i=0; i<w_size; i++)
2280
- model_->w[i] = 0;
2281
- model_->nr_class = 2;
2282
- model_->label = NULL;
2283
- train_one(prob, param, model_->w, 0, 0);
2284
- }
2285
- else
2286
- {
2287
- int nr_class;
2288
- int *label = NULL;
2289
- int *start = NULL;
2290
- int *count = NULL;
2291
- int *perm = Malloc(int,l);
2292
-
2293
- // group training data of the same class
2294
- group_classes(prob,&nr_class,&label,&start,&count,perm);
2295
-
2296
- model_->nr_class=nr_class;
2297
- model_->label = Malloc(int,nr_class);
2298
- for(i=0;i<nr_class;i++)
2299
- model_->label[i] = label[i];
2300
-
2301
- // calculate weighted C
2302
- double *weighted_C = Malloc(double, nr_class);
2303
- for(i=0;i<nr_class;i++)
2304
- weighted_C[i] = param->C;
2305
- for(i=0;i<param->nr_weight;i++)
2306
- {
2307
- for(j=0;j<nr_class;j++)
2308
- if(param->weight_label[i] == label[j])
2309
- break;
2310
- if(j == nr_class)
2311
- fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2312
- else
2313
- weighted_C[j] *= param->weight[i];
2314
- }
2315
-
2316
- // constructing the subproblem
2317
- feature_node **x = Malloc(feature_node *,l);
2318
- for(i=0;i<l;i++)
2319
- x[i] = prob->x[perm[i]];
2320
-
2321
- int k;
2322
- problem sub_prob;
2323
- sub_prob.l = l;
2324
- sub_prob.n = n;
2325
- sub_prob.x = Malloc(feature_node *,sub_prob.l);
2326
- sub_prob.y = Malloc(double,sub_prob.l);
2327
-
2328
- for(k=0; k<sub_prob.l; k++)
2329
- sub_prob.x[k] = x[k];
2330
-
2331
- // multi-class svm by Crammer and Singer
2332
- if(param->solver_type == MCSVM_CS)
2333
- {
2334
- model_->w=Malloc(double, n*nr_class);
2335
- for(i=0;i<nr_class;i++)
2336
- for(j=start[i];j<start[i]+count[i];j++)
2337
- sub_prob.y[j] = i;
2338
- Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2339
- Solver.Solve(model_->w);
2340
- }
2341
- else
2342
- {
2343
- if(nr_class == 2)
2344
- {
2345
- model_->w=Malloc(double, w_size);
2346
-
2347
- int e0 = start[0]+count[0];
2348
- k=0;
2349
- for(; k<e0; k++)
2350
- sub_prob.y[k] = +1;
2351
- for(; k<sub_prob.l; k++)
2352
- sub_prob.y[k] = -1;
2353
-
2354
- if(param->init_sol != NULL)
2355
- for(i=0;i<w_size;i++)
2356
- model_->w[i] = param->init_sol[i];
2357
- else
2358
- for(i=0;i<w_size;i++)
2359
- model_->w[i] = 0;
2360
-
2361
- train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
2362
- }
2363
- else
2364
- {
2365
- model_->w=Malloc(double, w_size*nr_class);
2366
- double *w=Malloc(double, w_size);
2367
- for(i=0;i<nr_class;i++)
2368
- {
2369
- int si = start[i];
2370
- int ei = si+count[i];
2371
-
2372
- k=0;
2373
- for(; k<si; k++)
2374
- sub_prob.y[k] = -1;
2375
- for(; k<ei; k++)
2376
- sub_prob.y[k] = +1;
2377
- for(; k<sub_prob.l; k++)
2378
- sub_prob.y[k] = -1;
2379
-
2380
- if(param->init_sol != NULL)
2381
- for(j=0;j<w_size;j++)
2382
- w[j] = param->init_sol[j*nr_class+i];
2383
- else
2384
- for(j=0;j<w_size;j++)
2385
- w[j] = 0;
2386
-
2387
- train_one(&sub_prob, param, w, weighted_C[i], param->C);
2388
-
2389
- for(int j=0;j<w_size;j++)
2390
- model_->w[j*nr_class+i] = w[j];
2391
- }
2392
- free(w);
2393
- }
2394
-
2395
- }
2396
-
2397
- free(x);
2398
- free(label);
2399
- free(start);
2400
- free(count);
2401
- free(perm);
2402
- free(sub_prob.x);
2403
- free(sub_prob.y);
2404
- free(weighted_C);
2405
- }
2406
- return model_;
2407
- }
2408
-
2409
- void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
2410
- {
2411
- int i;
2412
- int *fold_start;
2413
- int l = prob->l;
2414
- int *perm = Malloc(int,l);
2415
- if (nr_fold > l)
2416
- {
2417
- nr_fold = l;
2418
- fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2419
- }
2420
- fold_start = Malloc(int,nr_fold+1);
2421
- for(i=0;i<l;i++) perm[i]=i;
2422
- for(i=0;i<l;i++)
2423
- {
2424
- int j = i+rand()%(l-i);
2425
- swap(perm[i],perm[j]);
2426
- }
2427
- for(i=0;i<=nr_fold;i++)
2428
- fold_start[i]=i*l/nr_fold;
2429
-
2430
- for(i=0;i<nr_fold;i++)
2431
- {
2432
- int begin = fold_start[i];
2433
- int end = fold_start[i+1];
2434
- int j,k;
2435
- struct problem subprob;
2436
-
2437
- subprob.bias = prob->bias;
2438
- subprob.n = prob->n;
2439
- subprob.l = l-(end-begin);
2440
- subprob.x = Malloc(struct feature_node*,subprob.l);
2441
- subprob.y = Malloc(double,subprob.l);
2442
-
2443
- k=0;
2444
- for(j=0;j<begin;j++)
2445
- {
2446
- subprob.x[k] = prob->x[perm[j]];
2447
- subprob.y[k] = prob->y[perm[j]];
2448
- ++k;
2449
- }
2450
- for(j=end;j<l;j++)
2451
- {
2452
- subprob.x[k] = prob->x[perm[j]];
2453
- subprob.y[k] = prob->y[perm[j]];
2454
- ++k;
2455
- }
2456
- struct model *submodel = train(&subprob,param);
2457
- for(j=begin;j<end;j++)
2458
- target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2459
- free_and_destroy_model(&submodel);
2460
- free(subprob.x);
2461
- free(subprob.y);
2462
- }
2463
- free(fold_start);
2464
- free(perm);
2465
- }
2466
-
2467
- void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
2468
- {
2469
- // variables for CV
2470
- int i;
2471
- int *fold_start;
2472
- int l = prob->l;
2473
- int *perm = Malloc(int, l);
2474
- double *target = Malloc(double, prob->l);
2475
- struct problem *subprob = Malloc(problem,nr_fold);
2476
-
2477
- // variables for warm start
2478
- double ratio = 2;
2479
- double **prev_w = Malloc(double*, nr_fold);
2480
- for(i = 0; i < nr_fold; i++)
2481
- prev_w[i] = NULL;
2482
- int num_unchanged_w = 0;
2483
- struct parameter param1 = *param;
2484
- void (*default_print_string) (const char *) = liblinear_print_string;
2485
-
2486
- if (nr_fold > l)
2487
- {
2488
- nr_fold = l;
2489
- fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2490
- }
2491
- fold_start = Malloc(int,nr_fold+1);
2492
- for(i=0;i<l;i++) perm[i]=i;
2493
- for(i=0;i<l;i++)
2494
- {
2495
- int j = i+rand()%(l-i);
2496
- swap(perm[i],perm[j]);
2497
- }
2498
- for(i=0;i<=nr_fold;i++)
2499
- fold_start[i]=i*l/nr_fold;
2500
-
2501
- for(i=0;i<nr_fold;i++)
2502
- {
2503
- int begin = fold_start[i];
2504
- int end = fold_start[i+1];
2505
- int j,k;
2506
-
2507
- subprob[i].bias = prob->bias;
2508
- subprob[i].n = prob->n;
2509
- subprob[i].l = l-(end-begin);
2510
- subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
2511
- subprob[i].y = Malloc(double,subprob[i].l);
2512
-
2513
- k=0;
2514
- for(j=0;j<begin;j++)
2515
- {
2516
- subprob[i].x[k] = prob->x[perm[j]];
2517
- subprob[i].y[k] = prob->y[perm[j]];
2518
- ++k;
2519
- }
2520
- for(j=end;j<l;j++)
2521
- {
2522
- subprob[i].x[k] = prob->x[perm[j]];
2523
- subprob[i].y[k] = prob->y[perm[j]];
2524
- ++k;
2525
- }
2526
-
2527
- }
2528
-
2529
- *best_rate = 0;
2530
- if(start_C <= 0)
2531
- start_C = calc_start_C(prob,param);
2532
- param1.C = start_C;
2533
-
2534
- while(param1.C <= max_C)
2535
- {
2536
- //Output disabled for running CV at a particular C
2537
- set_print_string_function(&print_null);
2538
-
2539
- for(i=0; i<nr_fold; i++)
2540
- {
2541
- int j;
2542
- int begin = fold_start[i];
2543
- int end = fold_start[i+1];
2544
-
2545
- param1.init_sol = prev_w[i];
2546
- struct model *submodel = train(&subprob[i],&param1);
2547
-
2548
- int total_w_size;
2549
- if(submodel->nr_class == 2)
2550
- total_w_size = subprob[i].n;
2551
- else
2552
- total_w_size = subprob[i].n * submodel->nr_class;
2553
-
2554
- if(prev_w[i] == NULL)
2555
- {
2556
- prev_w[i] = Malloc(double, total_w_size);
2557
- for(j=0; j<total_w_size; j++)
2558
- prev_w[i][j] = submodel->w[j];
2559
- }
2560
- else if(num_unchanged_w >= 0)
2561
- {
2562
- double norm_w_diff = 0;
2563
- for(j=0; j<total_w_size; j++)
2564
- {
2565
- norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2566
- prev_w[i][j] = submodel->w[j];
2567
- }
2568
- norm_w_diff = sqrt(norm_w_diff);
2569
-
2570
- if(norm_w_diff > 1e-15)
2571
- num_unchanged_w = -1;
2572
- }
2573
- else
2574
- {
2575
- for(j=0; j<total_w_size; j++)
2576
- prev_w[i][j] = submodel->w[j];
2577
- }
2578
-
2579
- for(j=begin; j<end; j++)
2580
- target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2581
-
2582
- free_and_destroy_model(&submodel);
2583
- }
2584
- set_print_string_function(default_print_string);
2585
-
2586
- int total_correct = 0;
2587
- for(i=0; i<prob->l; i++)
2588
- if(target[i] == prob->y[i])
2589
- ++total_correct;
2590
- double current_rate = (double)total_correct/prob->l;
2591
- if(current_rate > *best_rate)
2592
- {
2593
- *best_C = param1.C;
2594
- *best_rate = current_rate;
2595
- }
2596
-
2597
- info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
2598
- num_unchanged_w++;
2599
- if(num_unchanged_w == 3)
2600
- break;
2601
- param1.C = param1.C*ratio;
2602
- }
2603
-
2604
- if(param1.C > max_C && max_C > start_C)
2605
- info("warning: maximum C reached.\n");
2606
- free(fold_start);
2607
- free(perm);
2608
- free(target);
2609
- for(i=0; i<nr_fold; i++)
2610
- {
2611
- free(subprob[i].x);
2612
- free(subprob[i].y);
2613
- free(prev_w[i]);
2614
- }
2615
- free(prev_w);
2616
- free(subprob);
2617
- }
2618
-
2619
- double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2620
- {
2621
- int idx;
2622
- int n;
2623
- if(model_->bias>=0)
2624
- n=model_->nr_feature+1;
2625
- else
2626
- n=model_->nr_feature;
2627
- double *w=model_->w;
2628
- int nr_class=model_->nr_class;
2629
- int i;
2630
- int nr_w;
2631
- if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2632
- nr_w = 1;
2633
- else
2634
- nr_w = nr_class;
2635
-
2636
- const feature_node *lx=x;
2637
- for(i=0;i<nr_w;i++)
2638
- dec_values[i] = 0;
2639
- for(; (idx=lx->index)!=-1; lx++)
2640
- {
2641
- // the dimension of testing data may exceed that of training
2642
- if(idx<=n)
2643
- for(i=0;i<nr_w;i++)
2644
- dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2645
- }
2646
-
2647
- if(nr_class==2)
2648
- {
2649
- if(check_regression_model(model_))
2650
- return dec_values[0];
2651
- else
2652
- return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2653
- }
2654
- else
2655
- {
2656
- int dec_max_idx = 0;
2657
- for(i=1;i<nr_class;i++)
2658
- {
2659
- if(dec_values[i] > dec_values[dec_max_idx])
2660
- dec_max_idx = i;
2661
- }
2662
- return model_->label[dec_max_idx];
2663
- }
2664
- }
2665
-
2666
- double predict(const model *model_, const feature_node *x)
2667
- {
2668
- double *dec_values = Malloc(double, model_->nr_class);
2669
- double label=predict_values(model_, x, dec_values);
2670
- free(dec_values);
2671
- return label;
2672
- }
2673
-
2674
- double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2675
- {
2676
- if(check_probability_model(model_))
2677
- {
2678
- int i;
2679
- int nr_class=model_->nr_class;
2680
- int nr_w;
2681
- if(nr_class==2)
2682
- nr_w = 1;
2683
- else
2684
- nr_w = nr_class;
2685
-
2686
- double label=predict_values(model_, x, prob_estimates);
2687
- for(i=0;i<nr_w;i++)
2688
- prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2689
-
2690
- if(nr_class==2) // for binary classification
2691
- prob_estimates[1]=1.-prob_estimates[0];
2692
- else
2693
- {
2694
- double sum=0;
2695
- for(i=0; i<nr_class; i++)
2696
- sum+=prob_estimates[i];
2697
-
2698
- for(i=0; i<nr_class; i++)
2699
- prob_estimates[i]=prob_estimates[i]/sum;
2700
- }
2701
-
2702
- return label;
2703
- }
2704
- else
2705
- return 0;
2706
- }
2707
-
2708
- static const char *solver_type_table[]=
2709
- {
2710
- "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2711
- "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2712
- "", "", "",
2713
- "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
2714
- };
2715
-
2716
- int save_model(const char *model_file_name, const struct model *model_)
2717
- {
2718
- int i;
2719
- int nr_feature=model_->nr_feature;
2720
- int n;
2721
- const parameter& param = model_->param;
2722
-
2723
- if(model_->bias>=0)
2724
- n=nr_feature+1;
2725
- else
2726
- n=nr_feature;
2727
- int w_size = n;
2728
- FILE *fp = fopen(model_file_name,"w");
2729
- if(fp==NULL) return -1;
2730
-
2731
- char *old_locale = setlocale(LC_ALL, NULL);
2732
- if (old_locale)
2733
- {
2734
- old_locale = strdup(old_locale);
2735
- }
2736
- setlocale(LC_ALL, "C");
2737
-
2738
- int nr_w;
2739
- if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2740
- nr_w=1;
2741
- else
2742
- nr_w=model_->nr_class;
2743
-
2744
- fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2745
- fprintf(fp, "nr_class %d\n", model_->nr_class);
2746
-
2747
- if(model_->label)
2748
- {
2749
- fprintf(fp, "label");
2750
- for(i=0; i<model_->nr_class; i++)
2751
- fprintf(fp, " %d", model_->label[i]);
2752
- fprintf(fp, "\n");
2753
- }
2754
-
2755
- fprintf(fp, "nr_feature %d\n", nr_feature);
2756
-
2757
- fprintf(fp, "bias %.16g\n", model_->bias);
2758
-
2759
- fprintf(fp, "w\n");
2760
- for(i=0; i<w_size; i++)
2761
- {
2762
- int j;
2763
- for(j=0; j<nr_w; j++)
2764
- fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2765
- fprintf(fp, "\n");
2766
- }
2767
-
2768
- setlocale(LC_ALL, old_locale);
2769
- free(old_locale);
2770
-
2771
- if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2772
- else return 0;
2773
- }
2774
-
2775
- //
2776
- // FSCANF helps to handle fscanf failures.
2777
- // Its do-while block avoids the ambiguity when
2778
- // if (...)
2779
- // FSCANF();
2780
- // is used
2781
- //
2782
- #define FSCANF(_stream, _format, _var)do\
2783
- {\
2784
- if (fscanf(_stream, _format, _var) != 1)\
2785
- {\
2786
- fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
2787
- EXIT_LOAD_MODEL()\
2788
- }\
2789
- }while(0)
2790
- // EXIT_LOAD_MODEL should NOT end with a semicolon.
2791
- #define EXIT_LOAD_MODEL()\
2792
- {\
2793
- setlocale(LC_ALL, old_locale);\
2794
- free(model_->label);\
2795
- free(model_);\
2796
- free(old_locale);\
2797
- return NULL;\
2798
- }
2799
- struct model *load_model(const char *model_file_name)
2800
- {
2801
- FILE *fp = fopen(model_file_name,"r");
2802
- if(fp==NULL) return NULL;
2803
-
2804
- int i;
2805
- int nr_feature;
2806
- int n;
2807
- int nr_class;
2808
- double bias;
2809
- model *model_ = Malloc(model,1);
2810
- parameter& param = model_->param;
2811
-
2812
- model_->label = NULL;
2813
-
2814
- char *old_locale = setlocale(LC_ALL, NULL);
2815
- if (old_locale)
2816
- {
2817
- old_locale = strdup(old_locale);
2818
- }
2819
- setlocale(LC_ALL, "C");
2820
-
2821
- char cmd[81];
2822
- while(1)
2823
- {
2824
- FSCANF(fp,"%80s",cmd);
2825
- if(strcmp(cmd,"solver_type")==0)
2826
- {
2827
- FSCANF(fp,"%80s",cmd);
2828
- int i;
2829
- for(i=0;solver_type_table[i];i++)
2830
- {
2831
- if(strcmp(solver_type_table[i],cmd)==0)
2832
- {
2833
- param.solver_type=i;
2834
- break;
2835
- }
2836
- }
2837
- if(solver_type_table[i] == NULL)
2838
- {
2839
- fprintf(stderr,"unknown solver type.\n");
2840
- EXIT_LOAD_MODEL()
2841
- }
2842
- }
2843
- else if(strcmp(cmd,"nr_class")==0)
2844
- {
2845
- FSCANF(fp,"%d",&nr_class);
2846
- model_->nr_class=nr_class;
2847
- }
2848
- else if(strcmp(cmd,"nr_feature")==0)
2849
- {
2850
- FSCANF(fp,"%d",&nr_feature);
2851
- model_->nr_feature=nr_feature;
2852
- }
2853
- else if(strcmp(cmd,"bias")==0)
2854
- {
2855
- FSCANF(fp,"%lf",&bias);
2856
- model_->bias=bias;
2857
- }
2858
- else if(strcmp(cmd,"w")==0)
2859
- {
2860
- break;
2861
- }
2862
- else if(strcmp(cmd,"label")==0)
2863
- {
2864
- int nr_class = model_->nr_class;
2865
- model_->label = Malloc(int,nr_class);
2866
- for(int i=0;i<nr_class;i++)
2867
- FSCANF(fp,"%d",&model_->label[i]);
2868
- }
2869
- else
2870
- {
2871
- fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2872
- EXIT_LOAD_MODEL()
2873
- }
2874
- }
2875
-
2876
- nr_feature=model_->nr_feature;
2877
- if(model_->bias>=0)
2878
- n=nr_feature+1;
2879
- else
2880
- n=nr_feature;
2881
- int w_size = n;
2882
- int nr_w;
2883
- if(nr_class==2 && param.solver_type != MCSVM_CS)
2884
- nr_w = 1;
2885
- else
2886
- nr_w = nr_class;
2887
-
2888
- model_->w=Malloc(double, w_size*nr_w);
2889
- for(i=0; i<w_size; i++)
2890
- {
2891
- int j;
2892
- for(j=0; j<nr_w; j++)
2893
- FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
2894
- if (fscanf(fp, "\n") !=0)
2895
- {
2896
- fprintf(stderr, "ERROR: fscanf failed to read the model\n");
2897
- EXIT_LOAD_MODEL()
2898
- }
2899
- }
2900
-
2901
- setlocale(LC_ALL, old_locale);
2902
- free(old_locale);
2903
-
2904
- if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2905
-
2906
- return model_;
2907
- }
2908
-
2909
- int get_nr_feature(const model *model_)
2910
- {
2911
- return model_->nr_feature;
2912
- }
2913
-
2914
- int get_nr_class(const model *model_)
2915
- {
2916
- return model_->nr_class;
2917
- }
2918
-
2919
- void get_labels(const model *model_, int* label)
2920
- {
2921
- if (model_->label != NULL)
2922
- for(int i=0;i<model_->nr_class;i++)
2923
- label[i] = model_->label[i];
2924
- }
2925
-
2926
- // use inline here for better performance (around 20% faster than the non-inline one)
2927
- static inline double get_w_value(const struct model *model_, int idx, int label_idx)
2928
- {
2929
- int nr_class = model_->nr_class;
2930
- int solver_type = model_->param.solver_type;
2931
- const double *w = model_->w;
2932
-
2933
- if(idx < 0 || idx > model_->nr_feature)
2934
- return 0;
2935
- if(check_regression_model(model_))
2936
- return w[idx];
2937
- else
2938
- {
2939
- if(label_idx < 0 || label_idx >= nr_class)
2940
- return 0;
2941
- if(nr_class == 2 && solver_type != MCSVM_CS)
2942
- {
2943
- if(label_idx == 0)
2944
- return w[idx];
2945
- else
2946
- return -w[idx];
2947
- }
2948
- else
2949
- return w[idx*nr_class+label_idx];
2950
- }
2951
- }
2952
-
2953
- // feat_idx: starting from 1 to nr_feature
2954
- // label_idx: starting from 0 to nr_class-1 for classification models;
2955
- // for regression models, label_idx is ignored.
2956
- double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
2957
- {
2958
- if(feat_idx > model_->nr_feature)
2959
- return 0;
2960
- return get_w_value(model_, feat_idx-1, label_idx);
2961
- }
2962
-
2963
- double get_decfun_bias(const struct model *model_, int label_idx)
2964
- {
2965
- int bias_idx = model_->nr_feature;
2966
- double bias = model_->bias;
2967
- if(bias <= 0)
2968
- return 0;
2969
- else
2970
- return bias*get_w_value(model_, bias_idx, label_idx);
2971
- }
2972
-
2973
- void free_model_content(struct model *model_ptr)
2974
- {
2975
- if(model_ptr->w != NULL)
2976
- free(model_ptr->w);
2977
- if(model_ptr->label != NULL)
2978
- free(model_ptr->label);
2979
- }
2980
-
2981
- void free_and_destroy_model(struct model **model_ptr_ptr)
2982
- {
2983
- struct model *model_ptr = *model_ptr_ptr;
2984
- if(model_ptr != NULL)
2985
- {
2986
- free_model_content(model_ptr);
2987
- free(model_ptr);
2988
- }
2989
- }
2990
-
2991
- void destroy_param(parameter* param)
2992
- {
2993
- if(param->weight_label != NULL)
2994
- free(param->weight_label);
2995
- if(param->weight != NULL)
2996
- free(param->weight);
2997
- if(param->init_sol != NULL)
2998
- free(param->init_sol);
2999
- }
3000
-
3001
- const char *check_parameter(const problem *prob, const parameter *param)
3002
- {
3003
- if(param->eps <= 0)
3004
- return "eps <= 0";
3005
-
3006
- if(param->C <= 0)
3007
- return "C <= 0";
3008
-
3009
- if(param->p < 0)
3010
- return "p < 0";
3011
-
3012
- if(param->solver_type != L2R_LR
3013
- && param->solver_type != L2R_L2LOSS_SVC_DUAL
3014
- && param->solver_type != L2R_L2LOSS_SVC
3015
- && param->solver_type != L2R_L1LOSS_SVC_DUAL
3016
- && param->solver_type != MCSVM_CS
3017
- && param->solver_type != L1R_L2LOSS_SVC
3018
- && param->solver_type != L1R_LR
3019
- && param->solver_type != L2R_LR_DUAL
3020
- && param->solver_type != L2R_L2LOSS_SVR
3021
- && param->solver_type != L2R_L2LOSS_SVR_DUAL
3022
- && param->solver_type != L2R_L1LOSS_SVR_DUAL)
3023
- return "unknown solver type";
3024
-
3025
- if(param->init_sol != NULL
3026
- && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
3027
- return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
3028
-
3029
- return NULL;
3030
- }
3031
-
3032
- int check_probability_model(const struct model *model_)
3033
- {
3034
- return (model_->param.solver_type==L2R_LR ||
3035
- model_->param.solver_type==L2R_LR_DUAL ||
3036
- model_->param.solver_type==L1R_LR);
3037
- }
3038
-
3039
- int check_regression_model(const struct model *model_)
3040
- {
3041
- return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3042
- model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3043
- model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3044
- }
3045
-
3046
- void set_print_string_function(void (*print_func)(const char*))
3047
- {
3048
- if (print_func == NULL)
3049
- liblinear_print_string = &print_string_stdout;
3050
- else
3051
- liblinear_print_string = print_func;
3052
- }
3053
-