liblinear-ruby 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/blasp.h +8 -8
  4. data/ext/daxpy.c +3 -3
  5. data/ext/ddot.c +3 -3
  6. data/ext/dnrm2.c +7 -7
  7. data/ext/dscal.c +4 -4
  8. data/ext/liblinear_wrap.cxx +382 -382
  9. data/ext/linear.cpp +44 -55
  10. data/ext/linear.h +5 -1
  11. data/ext/tron.cpp +13 -5
  12. data/ext/tron.h +1 -1
  13. data/lib/liblinear.rb +2 -0
  14. data/lib/liblinear/version.rb +1 -1
  15. metadata +2 -41
  16. data/liblinear-2.1/COPYRIGHT +0 -31
  17. data/liblinear-2.1/Makefile +0 -37
  18. data/liblinear-2.1/Makefile.win +0 -24
  19. data/liblinear-2.1/README +0 -600
  20. data/liblinear-2.1/blas/Makefile +0 -22
  21. data/liblinear-2.1/blas/blas.h +0 -25
  22. data/liblinear-2.1/blas/blasp.h +0 -438
  23. data/liblinear-2.1/blas/daxpy.c +0 -57
  24. data/liblinear-2.1/blas/ddot.c +0 -58
  25. data/liblinear-2.1/blas/dnrm2.c +0 -70
  26. data/liblinear-2.1/blas/dscal.c +0 -52
  27. data/liblinear-2.1/heart_scale +0 -270
  28. data/liblinear-2.1/linear.cpp +0 -3053
  29. data/liblinear-2.1/linear.def +0 -22
  30. data/liblinear-2.1/linear.h +0 -79
  31. data/liblinear-2.1/matlab/Makefile +0 -49
  32. data/liblinear-2.1/matlab/README +0 -208
  33. data/liblinear-2.1/matlab/libsvmread.c +0 -212
  34. data/liblinear-2.1/matlab/libsvmwrite.c +0 -119
  35. data/liblinear-2.1/matlab/linear_model_matlab.c +0 -176
  36. data/liblinear-2.1/matlab/linear_model_matlab.h +0 -2
  37. data/liblinear-2.1/matlab/make.m +0 -22
  38. data/liblinear-2.1/matlab/predict.c +0 -341
  39. data/liblinear-2.1/matlab/train.c +0 -492
  40. data/liblinear-2.1/predict.c +0 -243
  41. data/liblinear-2.1/python/Makefile +0 -4
  42. data/liblinear-2.1/python/README +0 -380
  43. data/liblinear-2.1/python/liblinear.py +0 -323
  44. data/liblinear-2.1/python/liblinearutil.py +0 -270
  45. data/liblinear-2.1/train.c +0 -449
  46. data/liblinear-2.1/tron.cpp +0 -241
  47. data/liblinear-2.1/tron.h +0 -35
  48. data/liblinear-2.1/windows/liblinear.dll +0 -0
  49. data/liblinear-2.1/windows/libsvmread.mexw64 +0 -0
  50. data/liblinear-2.1/windows/libsvmwrite.mexw64 +0 -0
  51. data/liblinear-2.1/windows/predict.exe +0 -0
  52. data/liblinear-2.1/windows/predict.mexw64 +0 -0
  53. data/liblinear-2.1/windows/train.exe +0 -0
  54. data/liblinear-2.1/windows/train.mexw64 +0 -0
@@ -1,3053 +0,0 @@
1
- #include <math.h>
2
- #include <stdio.h>
3
- #include <stdlib.h>
4
- #include <string.h>
5
- #include <stdarg.h>
6
- #include <locale.h>
7
- #include "linear.h"
8
- #include "tron.h"
9
- typedef signed char schar;
10
- template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
11
- #ifndef min
12
- template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
13
- #endif
14
- #ifndef max
15
- template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
16
- #endif
17
- template <class S, class T> static inline void clone(T*& dst, S* src, int n)
18
- {
19
- dst = new T[n];
20
- memcpy((void *)dst,(void *)src,sizeof(T)*n);
21
- }
22
- #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
23
- #define INF HUGE_VAL
24
-
25
- static void print_string_stdout(const char *s)
26
- {
27
- fputs(s,stdout);
28
- fflush(stdout);
29
- }
30
- static void print_null(const char *s) {}
31
-
32
- static void (*liblinear_print_string) (const char *) = &print_string_stdout;
33
-
34
- #if 1
35
- static void info(const char *fmt,...)
36
- {
37
- char buf[BUFSIZ];
38
- va_list ap;
39
- va_start(ap,fmt);
40
- vsprintf(buf,fmt,ap);
41
- va_end(ap);
42
- (*liblinear_print_string)(buf);
43
- }
44
- #else
45
- static void info(const char *fmt,...) {}
46
- #endif
47
- class sparse_operator
48
- {
49
- public:
50
- static double nrm2_sq(const feature_node *x)
51
- {
52
- double ret = 0;
53
- while(x->index != -1)
54
- {
55
- ret += x->value*x->value;
56
- x++;
57
- }
58
- return (ret);
59
- }
60
-
61
- static double dot(const double *s, const feature_node *x)
62
- {
63
- double ret = 0;
64
- while(x->index != -1)
65
- {
66
- ret += s[x->index-1]*x->value;
67
- x++;
68
- }
69
- return (ret);
70
- }
71
-
72
- static void axpy(const double a, const feature_node *x, double *y)
73
- {
74
- while(x->index != -1)
75
- {
76
- y[x->index-1] += a*x->value;
77
- x++;
78
- }
79
- }
80
- };
81
-
82
- class l2r_lr_fun: public function
83
- {
84
- public:
85
- l2r_lr_fun(const problem *prob, double *C);
86
- ~l2r_lr_fun();
87
-
88
- double fun(double *w);
89
- void grad(double *w, double *g);
90
- void Hv(double *s, double *Hs);
91
-
92
- int get_nr_variable(void);
93
-
94
- private:
95
- void Xv(double *v, double *Xv);
96
- void XTv(double *v, double *XTv);
97
-
98
- double *C;
99
- double *z;
100
- double *D;
101
- const problem *prob;
102
- };
103
-
104
- l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
105
- {
106
- int l=prob->l;
107
-
108
- this->prob = prob;
109
-
110
- z = new double[l];
111
- D = new double[l];
112
- this->C = C;
113
- }
114
-
115
- l2r_lr_fun::~l2r_lr_fun()
116
- {
117
- delete[] z;
118
- delete[] D;
119
- }
120
-
121
-
122
- double l2r_lr_fun::fun(double *w)
123
- {
124
- int i;
125
- double f=0;
126
- double *y=prob->y;
127
- int l=prob->l;
128
- int w_size=get_nr_variable();
129
-
130
- Xv(w, z);
131
-
132
- for(i=0;i<w_size;i++)
133
- f += w[i]*w[i];
134
- f /= 2.0;
135
- for(i=0;i<l;i++)
136
- {
137
- double yz = y[i]*z[i];
138
- if (yz >= 0)
139
- f += C[i]*log(1 + exp(-yz));
140
- else
141
- f += C[i]*(-yz+log(1 + exp(yz)));
142
- }
143
-
144
- return(f);
145
- }
146
-
147
- void l2r_lr_fun::grad(double *w, double *g)
148
- {
149
- int i;
150
- double *y=prob->y;
151
- int l=prob->l;
152
- int w_size=get_nr_variable();
153
-
154
- for(i=0;i<l;i++)
155
- {
156
- z[i] = 1/(1 + exp(-y[i]*z[i]));
157
- D[i] = z[i]*(1-z[i]);
158
- z[i] = C[i]*(z[i]-1)*y[i];
159
- }
160
- XTv(z, g);
161
-
162
- for(i=0;i<w_size;i++)
163
- g[i] = w[i] + g[i];
164
- }
165
-
166
- int l2r_lr_fun::get_nr_variable(void)
167
- {
168
- return prob->n;
169
- }
170
-
171
- void l2r_lr_fun::Hv(double *s, double *Hs)
172
- {
173
- int i;
174
- int l=prob->l;
175
- int w_size=get_nr_variable();
176
- double *wa = new double[l];
177
- feature_node **x=prob->x;
178
-
179
- for(i=0;i<w_size;i++)
180
- Hs[i] = 0;
181
- for(i=0;i<l;i++)
182
- {
183
- feature_node * const xi=x[i];
184
- wa[i] = sparse_operator::dot(s, xi);
185
-
186
- wa[i] = C[i]*D[i]*wa[i];
187
-
188
- sparse_operator::axpy(wa[i], xi, Hs);
189
- }
190
- for(i=0;i<w_size;i++)
191
- Hs[i] = s[i] + Hs[i];
192
- delete[] wa;
193
- }
194
-
195
- void l2r_lr_fun::Xv(double *v, double *Xv)
196
- {
197
- int i;
198
- int l=prob->l;
199
- feature_node **x=prob->x;
200
-
201
- for(i=0;i<l;i++)
202
- Xv[i]=sparse_operator::dot(v, x[i]);
203
- }
204
-
205
- void l2r_lr_fun::XTv(double *v, double *XTv)
206
- {
207
- int i;
208
- int l=prob->l;
209
- int w_size=get_nr_variable();
210
- feature_node **x=prob->x;
211
-
212
- for(i=0;i<w_size;i++)
213
- XTv[i]=0;
214
- for(i=0;i<l;i++)
215
- sparse_operator::axpy(v[i], x[i], XTv);
216
- }
217
-
218
- class l2r_l2_svc_fun: public function
219
- {
220
- public:
221
- l2r_l2_svc_fun(const problem *prob, double *C);
222
- ~l2r_l2_svc_fun();
223
-
224
- double fun(double *w);
225
- void grad(double *w, double *g);
226
- void Hv(double *s, double *Hs);
227
-
228
- int get_nr_variable(void);
229
-
230
- protected:
231
- void Xv(double *v, double *Xv);
232
- void subXTv(double *v, double *XTv);
233
-
234
- double *C;
235
- double *z;
236
- double *D;
237
- int *I;
238
- int sizeI;
239
- const problem *prob;
240
- };
241
-
242
- l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
243
- {
244
- int l=prob->l;
245
-
246
- this->prob = prob;
247
-
248
- z = new double[l];
249
- D = new double[l];
250
- I = new int[l];
251
- this->C = C;
252
- }
253
-
254
- l2r_l2_svc_fun::~l2r_l2_svc_fun()
255
- {
256
- delete[] z;
257
- delete[] D;
258
- delete[] I;
259
- }
260
-
261
- double l2r_l2_svc_fun::fun(double *w)
262
- {
263
- int i;
264
- double f=0;
265
- double *y=prob->y;
266
- int l=prob->l;
267
- int w_size=get_nr_variable();
268
-
269
- Xv(w, z);
270
-
271
- for(i=0;i<w_size;i++)
272
- f += w[i]*w[i];
273
- f /= 2.0;
274
- for(i=0;i<l;i++)
275
- {
276
- z[i] = y[i]*z[i];
277
- double d = 1-z[i];
278
- if (d > 0)
279
- f += C[i]*d*d;
280
- }
281
-
282
- return(f);
283
- }
284
-
285
- void l2r_l2_svc_fun::grad(double *w, double *g)
286
- {
287
- int i;
288
- double *y=prob->y;
289
- int l=prob->l;
290
- int w_size=get_nr_variable();
291
-
292
- sizeI = 0;
293
- for (i=0;i<l;i++)
294
- if (z[i] < 1)
295
- {
296
- z[sizeI] = C[i]*y[i]*(z[i]-1);
297
- I[sizeI] = i;
298
- sizeI++;
299
- }
300
- subXTv(z, g);
301
-
302
- for(i=0;i<w_size;i++)
303
- g[i] = w[i] + 2*g[i];
304
- }
305
-
306
- int l2r_l2_svc_fun::get_nr_variable(void)
307
- {
308
- return prob->n;
309
- }
310
-
311
- void l2r_l2_svc_fun::Hv(double *s, double *Hs)
312
- {
313
- int i;
314
- int w_size=get_nr_variable();
315
- double *wa = new double[sizeI];
316
- feature_node **x=prob->x;
317
-
318
- for(i=0;i<w_size;i++)
319
- Hs[i]=0;
320
- for(i=0;i<sizeI;i++)
321
- {
322
- feature_node * const xi=x[I[i]];
323
- wa[i] = sparse_operator::dot(s, xi);
324
-
325
- wa[i] = C[I[i]]*wa[i];
326
-
327
- sparse_operator::axpy(wa[i], xi, Hs);
328
- }
329
- for(i=0;i<w_size;i++)
330
- Hs[i] = s[i] + 2*Hs[i];
331
- delete[] wa;
332
- }
333
-
334
- void l2r_l2_svc_fun::Xv(double *v, double *Xv)
335
- {
336
- int i;
337
- int l=prob->l;
338
- feature_node **x=prob->x;
339
-
340
- for(i=0;i<l;i++)
341
- Xv[i]=sparse_operator::dot(v, x[i]);
342
- }
343
-
344
- void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
345
- {
346
- int i;
347
- int w_size=get_nr_variable();
348
- feature_node **x=prob->x;
349
-
350
- for(i=0;i<w_size;i++)
351
- XTv[i]=0;
352
- for(i=0;i<sizeI;i++)
353
- sparse_operator::axpy(v[i], x[I[i]], XTv);
354
- }
355
-
356
- class l2r_l2_svr_fun: public l2r_l2_svc_fun
357
- {
358
- public:
359
- l2r_l2_svr_fun(const problem *prob, double *C, double p);
360
-
361
- double fun(double *w);
362
- void grad(double *w, double *g);
363
-
364
- private:
365
- double p;
366
- };
367
-
368
- l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
369
- l2r_l2_svc_fun(prob, C)
370
- {
371
- this->p = p;
372
- }
373
-
374
- double l2r_l2_svr_fun::fun(double *w)
375
- {
376
- int i;
377
- double f=0;
378
- double *y=prob->y;
379
- int l=prob->l;
380
- int w_size=get_nr_variable();
381
- double d;
382
-
383
- Xv(w, z);
384
-
385
- for(i=0;i<w_size;i++)
386
- f += w[i]*w[i];
387
- f /= 2;
388
- for(i=0;i<l;i++)
389
- {
390
- d = z[i] - y[i];
391
- if(d < -p)
392
- f += C[i]*(d+p)*(d+p);
393
- else if(d > p)
394
- f += C[i]*(d-p)*(d-p);
395
- }
396
-
397
- return(f);
398
- }
399
-
400
- void l2r_l2_svr_fun::grad(double *w, double *g)
401
- {
402
- int i;
403
- double *y=prob->y;
404
- int l=prob->l;
405
- int w_size=get_nr_variable();
406
- double d;
407
-
408
- sizeI = 0;
409
- for(i=0;i<l;i++)
410
- {
411
- d = z[i] - y[i];
412
-
413
- // generate index set I
414
- if(d < -p)
415
- {
416
- z[sizeI] = C[i]*(d+p);
417
- I[sizeI] = i;
418
- sizeI++;
419
- }
420
- else if(d > p)
421
- {
422
- z[sizeI] = C[i]*(d-p);
423
- I[sizeI] = i;
424
- sizeI++;
425
- }
426
-
427
- }
428
- subXTv(z, g);
429
-
430
- for(i=0;i<w_size;i++)
431
- g[i] = w[i] + 2*g[i];
432
- }
433
-
434
- // A coordinate descent algorithm for
435
- // multi-class support vector machines by Crammer and Singer
436
- //
437
- // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
438
- // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
439
- //
440
- // where e^m_i = 0 if y_i = m,
441
- // e^m_i = 1 if y_i != m,
442
- // C^m_i = C if m = y_i,
443
- // C^m_i = 0 if m != y_i,
444
- // and w_m(\alpha) = \sum_i \alpha^m_i x_i
445
- //
446
- // Given:
447
- // x, y, C
448
- // eps is the stopping tolerance
449
- //
450
- // solution will be put in w
451
- //
452
- // See Appendix of LIBLINEAR paper, Fan et al. (2008)
453
-
454
- #define GETI(i) ((int) prob->y[i])
455
- // To support weights for instances, use GETI(i) (i)
456
-
457
- class Solver_MCSVM_CS
458
- {
459
- public:
460
- Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
461
- ~Solver_MCSVM_CS();
462
- void Solve(double *w);
463
- private:
464
- void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
465
- bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
466
- double *B, *C, *G;
467
- int w_size, l;
468
- int nr_class;
469
- int max_iter;
470
- double eps;
471
- const problem *prob;
472
- };
473
-
474
- Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
475
- {
476
- this->w_size = prob->n;
477
- this->l = prob->l;
478
- this->nr_class = nr_class;
479
- this->eps = eps;
480
- this->max_iter = max_iter;
481
- this->prob = prob;
482
- this->B = new double[nr_class];
483
- this->G = new double[nr_class];
484
- this->C = weighted_C;
485
- }
486
-
487
- Solver_MCSVM_CS::~Solver_MCSVM_CS()
488
- {
489
- delete[] B;
490
- delete[] G;
491
- }
492
-
493
- int compare_double(const void *a, const void *b)
494
- {
495
- if(*(double *)a > *(double *)b)
496
- return -1;
497
- if(*(double *)a < *(double *)b)
498
- return 1;
499
- return 0;
500
- }
501
-
502
- void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
503
- {
504
- int r;
505
- double *D;
506
-
507
- clone(D, B, active_i);
508
- if(yi < active_i)
509
- D[yi] += A_i*C_yi;
510
- qsort(D, active_i, sizeof(double), compare_double);
511
-
512
- double beta = D[0] - A_i*C_yi;
513
- for(r=1;r<active_i && beta<r*D[r];r++)
514
- beta += D[r];
515
- beta /= r;
516
-
517
- for(r=0;r<active_i;r++)
518
- {
519
- if(r == yi)
520
- alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
521
- else
522
- alpha_new[r] = min((double)0, (beta - B[r])/A_i);
523
- }
524
- delete[] D;
525
- }
526
-
527
- bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
528
- {
529
- double bound = 0;
530
- if(m == yi)
531
- bound = C[GETI(i)];
532
- if(alpha_i == bound && G[m] < minG)
533
- return true;
534
- return false;
535
- }
536
-
537
- void Solver_MCSVM_CS::Solve(double *w)
538
- {
539
- int i, m, s;
540
- int iter = 0;
541
- double *alpha = new double[l*nr_class];
542
- double *alpha_new = new double[nr_class];
543
- int *index = new int[l];
544
- double *QD = new double[l];
545
- int *d_ind = new int[nr_class];
546
- double *d_val = new double[nr_class];
547
- int *alpha_index = new int[nr_class*l];
548
- int *y_index = new int[l];
549
- int active_size = l;
550
- int *active_size_i = new int[l];
551
- double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
552
- bool start_from_all = true;
553
-
554
- // Initial alpha can be set here. Note that
555
- // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
556
- // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
557
- // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
558
- // If initial alpha isn't zero, uncomment the for loop below to initialize w
559
- for(i=0;i<l*nr_class;i++)
560
- alpha[i] = 0;
561
-
562
- for(i=0;i<w_size*nr_class;i++)
563
- w[i] = 0;
564
- for(i=0;i<l;i++)
565
- {
566
- for(m=0;m<nr_class;m++)
567
- alpha_index[i*nr_class+m] = m;
568
- feature_node *xi = prob->x[i];
569
- QD[i] = 0;
570
- while(xi->index != -1)
571
- {
572
- double val = xi->value;
573
- QD[i] += val*val;
574
-
575
- // Uncomment the for loop if initial alpha isn't zero
576
- // for(m=0; m<nr_class; m++)
577
- // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
578
- xi++;
579
- }
580
- active_size_i[i] = nr_class;
581
- y_index[i] = (int)prob->y[i];
582
- index[i] = i;
583
- }
584
-
585
- while(iter < max_iter)
586
- {
587
- double stopping = -INF;
588
- for(i=0;i<active_size;i++)
589
- {
590
- int j = i+rand()%(active_size-i);
591
- swap(index[i], index[j]);
592
- }
593
- for(s=0;s<active_size;s++)
594
- {
595
- i = index[s];
596
- double Ai = QD[i];
597
- double *alpha_i = &alpha[i*nr_class];
598
- int *alpha_index_i = &alpha_index[i*nr_class];
599
-
600
- if(Ai > 0)
601
- {
602
- for(m=0;m<active_size_i[i];m++)
603
- G[m] = 1;
604
- if(y_index[i] < active_size_i[i])
605
- G[y_index[i]] = 0;
606
-
607
- feature_node *xi = prob->x[i];
608
- while(xi->index!= -1)
609
- {
610
- double *w_i = &w[(xi->index-1)*nr_class];
611
- for(m=0;m<active_size_i[i];m++)
612
- G[m] += w_i[alpha_index_i[m]]*(xi->value);
613
- xi++;
614
- }
615
-
616
- double minG = INF;
617
- double maxG = -INF;
618
- for(m=0;m<active_size_i[i];m++)
619
- {
620
- if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
621
- minG = G[m];
622
- if(G[m] > maxG)
623
- maxG = G[m];
624
- }
625
- if(y_index[i] < active_size_i[i])
626
- if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
627
- minG = G[y_index[i]];
628
-
629
- for(m=0;m<active_size_i[i];m++)
630
- {
631
- if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
632
- {
633
- active_size_i[i]--;
634
- while(active_size_i[i]>m)
635
- {
636
- if(!be_shrunk(i, active_size_i[i], y_index[i],
637
- alpha_i[alpha_index_i[active_size_i[i]]], minG))
638
- {
639
- swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
640
- swap(G[m], G[active_size_i[i]]);
641
- if(y_index[i] == active_size_i[i])
642
- y_index[i] = m;
643
- else if(y_index[i] == m)
644
- y_index[i] = active_size_i[i];
645
- break;
646
- }
647
- active_size_i[i]--;
648
- }
649
- }
650
- }
651
-
652
- if(active_size_i[i] <= 1)
653
- {
654
- active_size--;
655
- swap(index[s], index[active_size]);
656
- s--;
657
- continue;
658
- }
659
-
660
- if(maxG-minG <= 1e-12)
661
- continue;
662
- else
663
- stopping = max(maxG - minG, stopping);
664
-
665
- for(m=0;m<active_size_i[i];m++)
666
- B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
667
-
668
- solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
669
- int nz_d = 0;
670
- for(m=0;m<active_size_i[i];m++)
671
- {
672
- double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
673
- alpha_i[alpha_index_i[m]] = alpha_new[m];
674
- if(fabs(d) >= 1e-12)
675
- {
676
- d_ind[nz_d] = alpha_index_i[m];
677
- d_val[nz_d] = d;
678
- nz_d++;
679
- }
680
- }
681
-
682
- xi = prob->x[i];
683
- while(xi->index != -1)
684
- {
685
- double *w_i = &w[(xi->index-1)*nr_class];
686
- for(m=0;m<nz_d;m++)
687
- w_i[d_ind[m]] += d_val[m]*xi->value;
688
- xi++;
689
- }
690
- }
691
- }
692
-
693
- iter++;
694
- if(iter % 10 == 0)
695
- {
696
- info(".");
697
- }
698
-
699
- if(stopping < eps_shrink)
700
- {
701
- if(stopping < eps && start_from_all == true)
702
- break;
703
- else
704
- {
705
- active_size = l;
706
- for(i=0;i<l;i++)
707
- active_size_i[i] = nr_class;
708
- info("*");
709
- eps_shrink = max(eps_shrink/2, eps);
710
- start_from_all = true;
711
- }
712
- }
713
- else
714
- start_from_all = false;
715
- }
716
-
717
- info("\noptimization finished, #iter = %d\n",iter);
718
- if (iter >= max_iter)
719
- info("\nWARNING: reaching max number of iterations\n");
720
-
721
- // calculate objective value
722
- double v = 0;
723
- int nSV = 0;
724
- for(i=0;i<w_size*nr_class;i++)
725
- v += w[i]*w[i];
726
- v = 0.5*v;
727
- for(i=0;i<l*nr_class;i++)
728
- {
729
- v += alpha[i];
730
- if(fabs(alpha[i]) > 0)
731
- nSV++;
732
- }
733
- for(i=0;i<l;i++)
734
- v -= alpha[i*nr_class+(int)prob->y[i]];
735
- info("Objective value = %lf\n",v);
736
- info("nSV = %d\n",nSV);
737
-
738
- delete [] alpha;
739
- delete [] alpha_new;
740
- delete [] index;
741
- delete [] QD;
742
- delete [] d_ind;
743
- delete [] d_val;
744
- delete [] alpha_index;
745
- delete [] y_index;
746
- delete [] active_size_i;
747
- }
748
-
749
- // A coordinate descent algorithm for
750
- // L1-loss and L2-loss SVM dual problems
751
- //
752
- // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
753
- // s.t. 0 <= \alpha_i <= upper_bound_i,
754
- //
755
- // where Qij = yi yj xi^T xj and
756
- // D is a diagonal matrix
757
- //
758
- // In L1-SVM case:
759
- // upper_bound_i = Cp if y_i = 1
760
- // upper_bound_i = Cn if y_i = -1
761
- // D_ii = 0
762
- // In L2-SVM case:
763
- // upper_bound_i = INF
764
- // D_ii = 1/(2*Cp) if y_i = 1
765
- // D_ii = 1/(2*Cn) if y_i = -1
766
- //
767
- // Given:
768
- // x, y, Cp, Cn
769
- // eps is the stopping tolerance
770
- //
771
- // solution will be put in w
772
- //
773
- // See Algorithm 3 of Hsieh et al., ICML 2008
774
-
775
- #undef GETI
776
- #define GETI(i) (y[i]+1)
777
- // To support weights for instances, use GETI(i) (i)
778
-
779
- static void solve_l2r_l1l2_svc(
780
- const problem *prob, double *w, double eps,
781
- double Cp, double Cn, int solver_type)
782
- {
783
- int l = prob->l;
784
- int w_size = prob->n;
785
- int i, s, iter = 0;
786
- double C, d, G;
787
- double *QD = new double[l];
788
- int max_iter = 1000;
789
- int *index = new int[l];
790
- double *alpha = new double[l];
791
- schar *y = new schar[l];
792
- int active_size = l;
793
-
794
- // PG: projected gradient, for shrinking and stopping
795
- double PG;
796
- double PGmax_old = INF;
797
- double PGmin_old = -INF;
798
- double PGmax_new, PGmin_new;
799
-
800
- // default solver_type: L2R_L2LOSS_SVC_DUAL
801
- double diag[3] = {0.5/Cn, 0, 0.5/Cp};
802
- double upper_bound[3] = {INF, 0, INF};
803
- if(solver_type == L2R_L1LOSS_SVC_DUAL)
804
- {
805
- diag[0] = 0;
806
- diag[2] = 0;
807
- upper_bound[0] = Cn;
808
- upper_bound[2] = Cp;
809
- }
810
-
811
- for(i=0; i<l; i++)
812
- {
813
- if(prob->y[i] > 0)
814
- {
815
- y[i] = +1;
816
- }
817
- else
818
- {
819
- y[i] = -1;
820
- }
821
- }
822
-
823
- // Initial alpha can be set here. Note that
824
- // 0 <= alpha[i] <= upper_bound[GETI(i)]
825
- for(i=0; i<l; i++)
826
- alpha[i] = 0;
827
-
828
- for(i=0; i<w_size; i++)
829
- w[i] = 0;
830
- for(i=0; i<l; i++)
831
- {
832
- QD[i] = diag[GETI(i)];
833
-
834
- feature_node * const xi = prob->x[i];
835
- QD[i] += sparse_operator::nrm2_sq(xi);
836
- sparse_operator::axpy(y[i]*alpha[i], xi, w);
837
-
838
- index[i] = i;
839
- }
840
-
841
- while (iter < max_iter)
842
- {
843
- PGmax_new = -INF;
844
- PGmin_new = INF;
845
-
846
- for (i=0; i<active_size; i++)
847
- {
848
- int j = i+rand()%(active_size-i);
849
- swap(index[i], index[j]);
850
- }
851
-
852
- for (s=0; s<active_size; s++)
853
- {
854
- i = index[s];
855
- const schar yi = y[i];
856
- feature_node * const xi = prob->x[i];
857
-
858
- G = yi*sparse_operator::dot(w, xi)-1;
859
-
860
- C = upper_bound[GETI(i)];
861
- G += alpha[i]*diag[GETI(i)];
862
-
863
- PG = 0;
864
- if (alpha[i] == 0)
865
- {
866
- if (G > PGmax_old)
867
- {
868
- active_size--;
869
- swap(index[s], index[active_size]);
870
- s--;
871
- continue;
872
- }
873
- else if (G < 0)
874
- PG = G;
875
- }
876
- else if (alpha[i] == C)
877
- {
878
- if (G < PGmin_old)
879
- {
880
- active_size--;
881
- swap(index[s], index[active_size]);
882
- s--;
883
- continue;
884
- }
885
- else if (G > 0)
886
- PG = G;
887
- }
888
- else
889
- PG = G;
890
-
891
- PGmax_new = max(PGmax_new, PG);
892
- PGmin_new = min(PGmin_new, PG);
893
-
894
- if(fabs(PG) > 1.0e-12)
895
- {
896
- double alpha_old = alpha[i];
897
- alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
898
- d = (alpha[i] - alpha_old)*yi;
899
- sparse_operator::axpy(d, xi, w);
900
- }
901
- }
902
-
903
- iter++;
904
- if(iter % 10 == 0)
905
- info(".");
906
-
907
- if(PGmax_new - PGmin_new <= eps)
908
- {
909
- if(active_size == l)
910
- break;
911
- else
912
- {
913
- active_size = l;
914
- info("*");
915
- PGmax_old = INF;
916
- PGmin_old = -INF;
917
- continue;
918
- }
919
- }
920
- PGmax_old = PGmax_new;
921
- PGmin_old = PGmin_new;
922
- if (PGmax_old <= 0)
923
- PGmax_old = INF;
924
- if (PGmin_old >= 0)
925
- PGmin_old = -INF;
926
- }
927
-
928
- info("\noptimization finished, #iter = %d\n",iter);
929
- if (iter >= max_iter)
930
- info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
931
-
932
- // calculate objective value
933
-
934
- double v = 0;
935
- int nSV = 0;
936
- for(i=0; i<w_size; i++)
937
- v += w[i]*w[i];
938
- for(i=0; i<l; i++)
939
- {
940
- v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
941
- if(alpha[i] > 0)
942
- ++nSV;
943
- }
944
- info("Objective value = %lf\n",v/2);
945
- info("nSV = %d\n",nSV);
946
-
947
- delete [] QD;
948
- delete [] alpha;
949
- delete [] y;
950
- delete [] index;
951
- }
952
-
953
-
954
- // A coordinate descent algorithm for
955
- // L1-loss and L2-loss epsilon-SVR dual problem
956
- //
957
- // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
958
- // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
959
- //
960
- // where Qij = xi^T xj and
961
- // D is a diagonal matrix
962
- //
963
- // In L1-SVM case:
964
- // upper_bound_i = C
965
- // lambda_i = 0
966
- // In L2-SVM case:
967
- // upper_bound_i = INF
968
- // lambda_i = 1/(2*C)
969
- //
970
- // Given:
971
- // x, y, p, C
972
- // eps is the stopping tolerance
973
- //
974
- // solution will be put in w
975
- //
976
- // See Algorithm 4 of Ho and Lin, 2012
977
-
978
- #undef GETI
979
- #define GETI(i) (0)
980
- // To support weights for instances, use GETI(i) (i)
981
-
982
- static void solve_l2r_l1l2_svr(
983
- const problem *prob, double *w, const parameter *param,
984
- int solver_type)
985
- {
986
- int l = prob->l;
987
- double C = param->C;
988
- double p = param->p;
989
- int w_size = prob->n;
990
- double eps = param->eps;
991
- int i, s, iter = 0;
992
- int max_iter = 1000;
993
- int active_size = l;
994
- int *index = new int[l];
995
-
996
- double d, G, H;
997
- double Gmax_old = INF;
998
- double Gmax_new, Gnorm1_new;
999
- double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1000
- double *beta = new double[l];
1001
- double *QD = new double[l];
1002
- double *y = prob->y;
1003
-
1004
- // L2R_L2LOSS_SVR_DUAL
1005
- double lambda[1], upper_bound[1];
1006
- lambda[0] = 0.5/C;
1007
- upper_bound[0] = INF;
1008
-
1009
- if(solver_type == L2R_L1LOSS_SVR_DUAL)
1010
- {
1011
- lambda[0] = 0;
1012
- upper_bound[0] = C;
1013
- }
1014
-
1015
- // Initial beta can be set here. Note that
1016
- // -upper_bound <= beta[i] <= upper_bound
1017
- for(i=0; i<l; i++)
1018
- beta[i] = 0;
1019
-
1020
- for(i=0; i<w_size; i++)
1021
- w[i] = 0;
1022
- for(i=0; i<l; i++)
1023
- {
1024
- feature_node * const xi = prob->x[i];
1025
- QD[i] = sparse_operator::nrm2_sq(xi);
1026
- sparse_operator::axpy(beta[i], xi, w);
1027
-
1028
- index[i] = i;
1029
- }
1030
-
1031
-
1032
- while(iter < max_iter)
1033
- {
1034
- Gmax_new = 0;
1035
- Gnorm1_new = 0;
1036
-
1037
- for(i=0; i<active_size; i++)
1038
- {
1039
- int j = i+rand()%(active_size-i);
1040
- swap(index[i], index[j]);
1041
- }
1042
-
1043
- for(s=0; s<active_size; s++)
1044
- {
1045
- i = index[s];
1046
- G = -y[i] + lambda[GETI(i)]*beta[i];
1047
- H = QD[i] + lambda[GETI(i)];
1048
-
1049
- feature_node * const xi = prob->x[i];
1050
- G += sparse_operator::dot(w, xi);
1051
-
1052
- double Gp = G+p;
1053
- double Gn = G-p;
1054
- double violation = 0;
1055
- if(beta[i] == 0)
1056
- {
1057
- if(Gp < 0)
1058
- violation = -Gp;
1059
- else if(Gn > 0)
1060
- violation = Gn;
1061
- else if(Gp>Gmax_old && Gn<-Gmax_old)
1062
- {
1063
- active_size--;
1064
- swap(index[s], index[active_size]);
1065
- s--;
1066
- continue;
1067
- }
1068
- }
1069
- else if(beta[i] >= upper_bound[GETI(i)])
1070
- {
1071
- if(Gp > 0)
1072
- violation = Gp;
1073
- else if(Gp < -Gmax_old)
1074
- {
1075
- active_size--;
1076
- swap(index[s], index[active_size]);
1077
- s--;
1078
- continue;
1079
- }
1080
- }
1081
- else if(beta[i] <= -upper_bound[GETI(i)])
1082
- {
1083
- if(Gn < 0)
1084
- violation = -Gn;
1085
- else if(Gn > Gmax_old)
1086
- {
1087
- active_size--;
1088
- swap(index[s], index[active_size]);
1089
- s--;
1090
- continue;
1091
- }
1092
- }
1093
- else if(beta[i] > 0)
1094
- violation = fabs(Gp);
1095
- else
1096
- violation = fabs(Gn);
1097
-
1098
- Gmax_new = max(Gmax_new, violation);
1099
- Gnorm1_new += violation;
1100
-
1101
- // obtain Newton direction d
1102
- if(Gp < H*beta[i])
1103
- d = -Gp/H;
1104
- else if(Gn > H*beta[i])
1105
- d = -Gn/H;
1106
- else
1107
- d = -beta[i];
1108
-
1109
- if(fabs(d) < 1.0e-12)
1110
- continue;
1111
-
1112
- double beta_old = beta[i];
1113
- beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1114
- d = beta[i]-beta_old;
1115
-
1116
- if(d != 0)
1117
- sparse_operator::axpy(d, xi, w);
1118
- }
1119
-
1120
- if(iter == 0)
1121
- Gnorm1_init = Gnorm1_new;
1122
- iter++;
1123
- if(iter % 10 == 0)
1124
- info(".");
1125
-
1126
- if(Gnorm1_new <= eps*Gnorm1_init)
1127
- {
1128
- if(active_size == l)
1129
- break;
1130
- else
1131
- {
1132
- active_size = l;
1133
- info("*");
1134
- Gmax_old = INF;
1135
- continue;
1136
- }
1137
- }
1138
-
1139
- Gmax_old = Gmax_new;
1140
- }
1141
-
1142
- info("\noptimization finished, #iter = %d\n", iter);
1143
- if(iter >= max_iter)
1144
- info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1145
-
1146
- // calculate objective value
1147
- double v = 0;
1148
- int nSV = 0;
1149
- for(i=0; i<w_size; i++)
1150
- v += w[i]*w[i];
1151
- v = 0.5*v;
1152
- for(i=0; i<l; i++)
1153
- {
1154
- v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1155
- if(beta[i] != 0)
1156
- nSV++;
1157
- }
1158
-
1159
- info("Objective value = %lf\n", v);
1160
- info("nSV = %d\n",nSV);
1161
-
1162
- delete [] beta;
1163
- delete [] QD;
1164
- delete [] index;
1165
- }
1166
-
1167
-
1168
- // A coordinate descent algorithm for
1169
- // the dual of L2-regularized logistic regression problems
1170
- //
1171
- // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1172
- // s.t. 0 <= \alpha_i <= upper_bound_i,
1173
- //
1174
- // where Qij = yi yj xi^T xj and
1175
- // upper_bound_i = Cp if y_i = 1
1176
- // upper_bound_i = Cn if y_i = -1
1177
- //
1178
- // Given:
1179
- // x, y, Cp, Cn
1180
- // eps is the stopping tolerance
1181
- //
1182
- // solution will be put in w
1183
- //
1184
- // See Algorithm 5 of Yu et al., MLJ 2010
1185
-
1186
- #undef GETI
1187
- #define GETI(i) (y[i]+1)
1188
- // To support weights for instances, use GETI(i) (i)
1189
-
1190
- void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1191
- {
1192
- int l = prob->l;
1193
- int w_size = prob->n;
1194
- int i, s, iter = 0;
1195
- double *xTx = new double[l];
1196
- int max_iter = 1000;
1197
- int *index = new int[l];
1198
- double *alpha = new double[2*l]; // store alpha and C - alpha
1199
- schar *y = new schar[l];
1200
- int max_inner_iter = 100; // for inner Newton
1201
- double innereps = 1e-2;
1202
- double innereps_min = min(1e-8, eps);
1203
- double upper_bound[3] = {Cn, 0, Cp};
1204
-
1205
- for(i=0; i<l; i++)
1206
- {
1207
- if(prob->y[i] > 0)
1208
- {
1209
- y[i] = +1;
1210
- }
1211
- else
1212
- {
1213
- y[i] = -1;
1214
- }
1215
- }
1216
-
1217
- // Initial alpha can be set here. Note that
1218
- // 0 < alpha[i] < upper_bound[GETI(i)]
1219
- // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1220
- for(i=0; i<l; i++)
1221
- {
1222
- alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1223
- alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1224
- }
1225
-
1226
- for(i=0; i<w_size; i++)
1227
- w[i] = 0;
1228
- for(i=0; i<l; i++)
1229
- {
1230
- feature_node * const xi = prob->x[i];
1231
- xTx[i] = sparse_operator::nrm2_sq(xi);
1232
- sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1233
- index[i] = i;
1234
- }
1235
-
1236
- while (iter < max_iter)
1237
- {
1238
- for (i=0; i<l; i++)
1239
- {
1240
- int j = i+rand()%(l-i);
1241
- swap(index[i], index[j]);
1242
- }
1243
- int newton_iter = 0;
1244
- double Gmax = 0;
1245
- for (s=0; s<l; s++)
1246
- {
1247
- i = index[s];
1248
- const schar yi = y[i];
1249
- double C = upper_bound[GETI(i)];
1250
- double ywTx = 0, xisq = xTx[i];
1251
- feature_node * const xi = prob->x[i];
1252
- ywTx = yi*sparse_operator::dot(w, xi);
1253
- double a = xisq, b = ywTx;
1254
-
1255
- // Decide to minimize g_1(z) or g_2(z)
1256
- int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1257
- if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1258
- {
1259
- ind1 = 2*i+1;
1260
- ind2 = 2*i;
1261
- sign = -1;
1262
- }
1263
-
1264
- // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1265
- double alpha_old = alpha[ind1];
1266
- double z = alpha_old;
1267
- if(C - z < 0.5 * C)
1268
- z = 0.1*z;
1269
- double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1270
- Gmax = max(Gmax, fabs(gp));
1271
-
1272
- // Newton method on the sub-problem
1273
- const double eta = 0.1; // xi in the paper
1274
- int inner_iter = 0;
1275
- while (inner_iter <= max_inner_iter)
1276
- {
1277
- if(fabs(gp) < innereps)
1278
- break;
1279
- double gpp = a + C/(C-z)/z;
1280
- double tmpz = z - gp/gpp;
1281
- if(tmpz <= 0)
1282
- z *= eta;
1283
- else // tmpz in (0, C)
1284
- z = tmpz;
1285
- gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1286
- newton_iter++;
1287
- inner_iter++;
1288
- }
1289
-
1290
- if(inner_iter > 0) // update w
1291
- {
1292
- alpha[ind1] = z;
1293
- alpha[ind2] = C-z;
1294
- sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1295
- }
1296
- }
1297
-
1298
- iter++;
1299
- if(iter % 10 == 0)
1300
- info(".");
1301
-
1302
- if(Gmax < eps)
1303
- break;
1304
-
1305
- if(newton_iter <= l/10)
1306
- innereps = max(innereps_min, 0.1*innereps);
1307
-
1308
- }
1309
-
1310
- info("\noptimization finished, #iter = %d\n",iter);
1311
- if (iter >= max_iter)
1312
- info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1313
-
1314
- // calculate objective value
1315
-
1316
- double v = 0;
1317
- for(i=0; i<w_size; i++)
1318
- v += w[i] * w[i];
1319
- v *= 0.5;
1320
- for(i=0; i<l; i++)
1321
- v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1322
- - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1323
- info("Objective value = %lf\n", v);
1324
-
1325
- delete [] xTx;
1326
- delete [] alpha;
1327
- delete [] y;
1328
- delete [] index;
1329
- }
1330
-
1331
- // A coordinate descent algorithm for
1332
- // L1-regularized L2-loss support vector classification
1333
- //
1334
- // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1335
- //
1336
- // Given:
1337
- // x, y, Cp, Cn
1338
- // eps is the stopping tolerance
1339
- //
1340
- // solution will be put in w
1341
- //
1342
- // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1343
-
1344
- #undef GETI
1345
- #define GETI(i) (y[i]+1)
1346
- // To support weights for instances, use GETI(i) (i)
1347
-
1348
- static void solve_l1r_l2_svc(
1349
- problem *prob_col, double *w, double eps,
1350
- double Cp, double Cn)
1351
- {
1352
- int l = prob_col->l;
1353
- int w_size = prob_col->n;
1354
- int j, s, iter = 0;
1355
- int max_iter = 1000;
1356
- int active_size = w_size;
1357
- int max_num_linesearch = 20;
1358
-
1359
- double sigma = 0.01;
1360
- double d, G_loss, G, H;
1361
- double Gmax_old = INF;
1362
- double Gmax_new, Gnorm1_new;
1363
- double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1364
- double d_old, d_diff;
1365
- double loss_old, loss_new;
1366
- double appxcond, cond;
1367
-
1368
- int *index = new int[w_size];
1369
- schar *y = new schar[l];
1370
- double *b = new double[l]; // b = 1-ywTx
1371
- double *xj_sq = new double[w_size];
1372
- feature_node *x;
1373
-
1374
- double C[3] = {Cn,0,Cp};
1375
-
1376
- // Initial w can be set here.
1377
- for(j=0; j<w_size; j++)
1378
- w[j] = 0;
1379
-
1380
- for(j=0; j<l; j++)
1381
- {
1382
- b[j] = 1;
1383
- if(prob_col->y[j] > 0)
1384
- y[j] = 1;
1385
- else
1386
- y[j] = -1;
1387
- }
1388
- for(j=0; j<w_size; j++)
1389
- {
1390
- index[j] = j;
1391
- xj_sq[j] = 0;
1392
- x = prob_col->x[j];
1393
- while(x->index != -1)
1394
- {
1395
- int ind = x->index-1;
1396
- x->value *= y[ind]; // x->value stores yi*xij
1397
- double val = x->value;
1398
- b[ind] -= w[j]*val;
1399
- xj_sq[j] += C[GETI(ind)]*val*val;
1400
- x++;
1401
- }
1402
- }
1403
-
1404
- while(iter < max_iter)
1405
- {
1406
- Gmax_new = 0;
1407
- Gnorm1_new = 0;
1408
-
1409
- for(j=0; j<active_size; j++)
1410
- {
1411
- int i = j+rand()%(active_size-j);
1412
- swap(index[i], index[j]);
1413
- }
1414
-
1415
- for(s=0; s<active_size; s++)
1416
- {
1417
- j = index[s];
1418
- G_loss = 0;
1419
- H = 0;
1420
-
1421
- x = prob_col->x[j];
1422
- while(x->index != -1)
1423
- {
1424
- int ind = x->index-1;
1425
- if(b[ind] > 0)
1426
- {
1427
- double val = x->value;
1428
- double tmp = C[GETI(ind)]*val;
1429
- G_loss -= tmp*b[ind];
1430
- H += tmp*val;
1431
- }
1432
- x++;
1433
- }
1434
- G_loss *= 2;
1435
-
1436
- G = G_loss;
1437
- H *= 2;
1438
- H = max(H, 1e-12);
1439
-
1440
- double Gp = G+1;
1441
- double Gn = G-1;
1442
- double violation = 0;
1443
- if(w[j] == 0)
1444
- {
1445
- if(Gp < 0)
1446
- violation = -Gp;
1447
- else if(Gn > 0)
1448
- violation = Gn;
1449
- else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1450
- {
1451
- active_size--;
1452
- swap(index[s], index[active_size]);
1453
- s--;
1454
- continue;
1455
- }
1456
- }
1457
- else if(w[j] > 0)
1458
- violation = fabs(Gp);
1459
- else
1460
- violation = fabs(Gn);
1461
-
1462
- Gmax_new = max(Gmax_new, violation);
1463
- Gnorm1_new += violation;
1464
-
1465
- // obtain Newton direction d
1466
- if(Gp < H*w[j])
1467
- d = -Gp/H;
1468
- else if(Gn > H*w[j])
1469
- d = -Gn/H;
1470
- else
1471
- d = -w[j];
1472
-
1473
- if(fabs(d) < 1.0e-12)
1474
- continue;
1475
-
1476
- double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1477
- d_old = 0;
1478
- int num_linesearch;
1479
- for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1480
- {
1481
- d_diff = d_old - d;
1482
- cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1483
-
1484
- appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1485
- if(appxcond <= 0)
1486
- {
1487
- x = prob_col->x[j];
1488
- sparse_operator::axpy(d_diff, x, b);
1489
- break;
1490
- }
1491
-
1492
- if(num_linesearch == 0)
1493
- {
1494
- loss_old = 0;
1495
- loss_new = 0;
1496
- x = prob_col->x[j];
1497
- while(x->index != -1)
1498
- {
1499
- int ind = x->index-1;
1500
- if(b[ind] > 0)
1501
- loss_old += C[GETI(ind)]*b[ind]*b[ind];
1502
- double b_new = b[ind] + d_diff*x->value;
1503
- b[ind] = b_new;
1504
- if(b_new > 0)
1505
- loss_new += C[GETI(ind)]*b_new*b_new;
1506
- x++;
1507
- }
1508
- }
1509
- else
1510
- {
1511
- loss_new = 0;
1512
- x = prob_col->x[j];
1513
- while(x->index != -1)
1514
- {
1515
- int ind = x->index-1;
1516
- double b_new = b[ind] + d_diff*x->value;
1517
- b[ind] = b_new;
1518
- if(b_new > 0)
1519
- loss_new += C[GETI(ind)]*b_new*b_new;
1520
- x++;
1521
- }
1522
- }
1523
-
1524
- cond = cond + loss_new - loss_old;
1525
- if(cond <= 0)
1526
- break;
1527
- else
1528
- {
1529
- d_old = d;
1530
- d *= 0.5;
1531
- delta *= 0.5;
1532
- }
1533
- }
1534
-
1535
- w[j] += d;
1536
-
1537
- // recompute b[] if line search takes too many steps
1538
- if(num_linesearch >= max_num_linesearch)
1539
- {
1540
- info("#");
1541
- for(int i=0; i<l; i++)
1542
- b[i] = 1;
1543
-
1544
- for(int i=0; i<w_size; i++)
1545
- {
1546
- if(w[i]==0) continue;
1547
- x = prob_col->x[i];
1548
- sparse_operator::axpy(-w[i], x, b);
1549
- }
1550
- }
1551
- }
1552
-
1553
- if(iter == 0)
1554
- Gnorm1_init = Gnorm1_new;
1555
- iter++;
1556
- if(iter % 10 == 0)
1557
- info(".");
1558
-
1559
- if(Gnorm1_new <= eps*Gnorm1_init)
1560
- {
1561
- if(active_size == w_size)
1562
- break;
1563
- else
1564
- {
1565
- active_size = w_size;
1566
- info("*");
1567
- Gmax_old = INF;
1568
- continue;
1569
- }
1570
- }
1571
-
1572
- Gmax_old = Gmax_new;
1573
- }
1574
-
1575
- info("\noptimization finished, #iter = %d\n", iter);
1576
- if(iter >= max_iter)
1577
- info("\nWARNING: reaching max number of iterations\n");
1578
-
1579
- // calculate objective value
1580
-
1581
- double v = 0;
1582
- int nnz = 0;
1583
- for(j=0; j<w_size; j++)
1584
- {
1585
- x = prob_col->x[j];
1586
- while(x->index != -1)
1587
- {
1588
- x->value *= prob_col->y[x->index-1]; // restore x->value
1589
- x++;
1590
- }
1591
- if(w[j] != 0)
1592
- {
1593
- v += fabs(w[j]);
1594
- nnz++;
1595
- }
1596
- }
1597
- for(j=0; j<l; j++)
1598
- if(b[j] > 0)
1599
- v += C[GETI(j)]*b[j]*b[j];
1600
-
1601
- info("Objective value = %lf\n", v);
1602
- info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1603
-
1604
- delete [] index;
1605
- delete [] y;
1606
- delete [] b;
1607
- delete [] xj_sq;
1608
- }
1609
-
1610
- // A coordinate descent algorithm for
1611
- // L1-regularized logistic regression problems
1612
- //
1613
- // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1614
- //
1615
- // Given:
1616
- // x, y, Cp, Cn
1617
- // eps is the stopping tolerance
1618
- //
1619
- // solution will be put in w
1620
- //
1621
- // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1622
-
1623
- #undef GETI
1624
- #define GETI(i) (y[i]+1)
1625
- // To support weights for instances, use GETI(i) (i)
1626
-
1627
- static void solve_l1r_lr(
1628
- const problem *prob_col, double *w, double eps,
1629
- double Cp, double Cn)
1630
- {
1631
- int l = prob_col->l;
1632
- int w_size = prob_col->n;
1633
- int j, s, newton_iter=0, iter=0;
1634
- int max_newton_iter = 100;
1635
- int max_iter = 1000;
1636
- int max_num_linesearch = 20;
1637
- int active_size;
1638
- int QP_active_size;
1639
-
1640
- double nu = 1e-12;
1641
- double inner_eps = 1;
1642
- double sigma = 0.01;
1643
- double w_norm, w_norm_new;
1644
- double z, G, H;
1645
- double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1646
- double Gmax_old = INF;
1647
- double Gmax_new, Gnorm1_new;
1648
- double QP_Gmax_old = INF;
1649
- double QP_Gmax_new, QP_Gnorm1_new;
1650
- double delta, negsum_xTd, cond;
1651
-
1652
- int *index = new int[w_size];
1653
- schar *y = new schar[l];
1654
- double *Hdiag = new double[w_size];
1655
- double *Grad = new double[w_size];
1656
- double *wpd = new double[w_size];
1657
- double *xjneg_sum = new double[w_size];
1658
- double *xTd = new double[l];
1659
- double *exp_wTx = new double[l];
1660
- double *exp_wTx_new = new double[l];
1661
- double *tau = new double[l];
1662
- double *D = new double[l];
1663
- feature_node *x;
1664
-
1665
- double C[3] = {Cn,0,Cp};
1666
-
1667
- // Initial w can be set here.
1668
- for(j=0; j<w_size; j++)
1669
- w[j] = 0;
1670
-
1671
- for(j=0; j<l; j++)
1672
- {
1673
- if(prob_col->y[j] > 0)
1674
- y[j] = 1;
1675
- else
1676
- y[j] = -1;
1677
-
1678
- exp_wTx[j] = 0;
1679
- }
1680
-
1681
- w_norm = 0;
1682
- for(j=0; j<w_size; j++)
1683
- {
1684
- w_norm += fabs(w[j]);
1685
- wpd[j] = w[j];
1686
- index[j] = j;
1687
- xjneg_sum[j] = 0;
1688
- x = prob_col->x[j];
1689
- while(x->index != -1)
1690
- {
1691
- int ind = x->index-1;
1692
- double val = x->value;
1693
- exp_wTx[ind] += w[j]*val;
1694
- if(y[ind] == -1)
1695
- xjneg_sum[j] += C[GETI(ind)]*val;
1696
- x++;
1697
- }
1698
- }
1699
- for(j=0; j<l; j++)
1700
- {
1701
- exp_wTx[j] = exp(exp_wTx[j]);
1702
- double tau_tmp = 1/(1+exp_wTx[j]);
1703
- tau[j] = C[GETI(j)]*tau_tmp;
1704
- D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1705
- }
1706
-
1707
- while(newton_iter < max_newton_iter)
1708
- {
1709
- Gmax_new = 0;
1710
- Gnorm1_new = 0;
1711
- active_size = w_size;
1712
-
1713
- for(s=0; s<active_size; s++)
1714
- {
1715
- j = index[s];
1716
- Hdiag[j] = nu;
1717
- Grad[j] = 0;
1718
-
1719
- double tmp = 0;
1720
- x = prob_col->x[j];
1721
- while(x->index != -1)
1722
- {
1723
- int ind = x->index-1;
1724
- Hdiag[j] += x->value*x->value*D[ind];
1725
- tmp += x->value*tau[ind];
1726
- x++;
1727
- }
1728
- Grad[j] = -tmp + xjneg_sum[j];
1729
-
1730
- double Gp = Grad[j]+1;
1731
- double Gn = Grad[j]-1;
1732
- double violation = 0;
1733
- if(w[j] == 0)
1734
- {
1735
- if(Gp < 0)
1736
- violation = -Gp;
1737
- else if(Gn > 0)
1738
- violation = Gn;
1739
- //outer-level shrinking
1740
- else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1741
- {
1742
- active_size--;
1743
- swap(index[s], index[active_size]);
1744
- s--;
1745
- continue;
1746
- }
1747
- }
1748
- else if(w[j] > 0)
1749
- violation = fabs(Gp);
1750
- else
1751
- violation = fabs(Gn);
1752
-
1753
- Gmax_new = max(Gmax_new, violation);
1754
- Gnorm1_new += violation;
1755
- }
1756
-
1757
- if(newton_iter == 0)
1758
- Gnorm1_init = Gnorm1_new;
1759
-
1760
- if(Gnorm1_new <= eps*Gnorm1_init)
1761
- break;
1762
-
1763
- iter = 0;
1764
- QP_Gmax_old = INF;
1765
- QP_active_size = active_size;
1766
-
1767
- for(int i=0; i<l; i++)
1768
- xTd[i] = 0;
1769
-
1770
- // optimize QP over wpd
1771
- while(iter < max_iter)
1772
- {
1773
- QP_Gmax_new = 0;
1774
- QP_Gnorm1_new = 0;
1775
-
1776
- for(j=0; j<QP_active_size; j++)
1777
- {
1778
- int i = j+rand()%(QP_active_size-j);
1779
- swap(index[i], index[j]);
1780
- }
1781
-
1782
- for(s=0; s<QP_active_size; s++)
1783
- {
1784
- j = index[s];
1785
- H = Hdiag[j];
1786
-
1787
- x = prob_col->x[j];
1788
- G = Grad[j] + (wpd[j]-w[j])*nu;
1789
- while(x->index != -1)
1790
- {
1791
- int ind = x->index-1;
1792
- G += x->value*D[ind]*xTd[ind];
1793
- x++;
1794
- }
1795
-
1796
- double Gp = G+1;
1797
- double Gn = G-1;
1798
- double violation = 0;
1799
- if(wpd[j] == 0)
1800
- {
1801
- if(Gp < 0)
1802
- violation = -Gp;
1803
- else if(Gn > 0)
1804
- violation = Gn;
1805
- //inner-level shrinking
1806
- else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1807
- {
1808
- QP_active_size--;
1809
- swap(index[s], index[QP_active_size]);
1810
- s--;
1811
- continue;
1812
- }
1813
- }
1814
- else if(wpd[j] > 0)
1815
- violation = fabs(Gp);
1816
- else
1817
- violation = fabs(Gn);
1818
-
1819
- QP_Gmax_new = max(QP_Gmax_new, violation);
1820
- QP_Gnorm1_new += violation;
1821
-
1822
- // obtain solution of one-variable problem
1823
- if(Gp < H*wpd[j])
1824
- z = -Gp/H;
1825
- else if(Gn > H*wpd[j])
1826
- z = -Gn/H;
1827
- else
1828
- z = -wpd[j];
1829
-
1830
- if(fabs(z) < 1.0e-12)
1831
- continue;
1832
- z = min(max(z,-10.0),10.0);
1833
-
1834
- wpd[j] += z;
1835
-
1836
- x = prob_col->x[j];
1837
- sparse_operator::axpy(z, x, xTd);
1838
- }
1839
-
1840
- iter++;
1841
-
1842
- if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1843
- {
1844
- //inner stopping
1845
- if(QP_active_size == active_size)
1846
- break;
1847
- //active set reactivation
1848
- else
1849
- {
1850
- QP_active_size = active_size;
1851
- QP_Gmax_old = INF;
1852
- continue;
1853
- }
1854
- }
1855
-
1856
- QP_Gmax_old = QP_Gmax_new;
1857
- }
1858
-
1859
- if(iter >= max_iter)
1860
- info("WARNING: reaching max number of inner iterations\n");
1861
-
1862
- delta = 0;
1863
- w_norm_new = 0;
1864
- for(j=0; j<w_size; j++)
1865
- {
1866
- delta += Grad[j]*(wpd[j]-w[j]);
1867
- if(wpd[j] != 0)
1868
- w_norm_new += fabs(wpd[j]);
1869
- }
1870
- delta += (w_norm_new-w_norm);
1871
-
1872
- negsum_xTd = 0;
1873
- for(int i=0; i<l; i++)
1874
- if(y[i] == -1)
1875
- negsum_xTd += C[GETI(i)]*xTd[i];
1876
-
1877
- int num_linesearch;
1878
- for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1879
- {
1880
- cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1881
-
1882
- for(int i=0; i<l; i++)
1883
- {
1884
- double exp_xTd = exp(xTd[i]);
1885
- exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1886
- cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1887
- }
1888
-
1889
- if(cond <= 0)
1890
- {
1891
- w_norm = w_norm_new;
1892
- for(j=0; j<w_size; j++)
1893
- w[j] = wpd[j];
1894
- for(int i=0; i<l; i++)
1895
- {
1896
- exp_wTx[i] = exp_wTx_new[i];
1897
- double tau_tmp = 1/(1+exp_wTx[i]);
1898
- tau[i] = C[GETI(i)]*tau_tmp;
1899
- D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1900
- }
1901
- break;
1902
- }
1903
- else
1904
- {
1905
- w_norm_new = 0;
1906
- for(j=0; j<w_size; j++)
1907
- {
1908
- wpd[j] = (w[j]+wpd[j])*0.5;
1909
- if(wpd[j] != 0)
1910
- w_norm_new += fabs(wpd[j]);
1911
- }
1912
- delta *= 0.5;
1913
- negsum_xTd *= 0.5;
1914
- for(int i=0; i<l; i++)
1915
- xTd[i] *= 0.5;
1916
- }
1917
- }
1918
-
1919
- // Recompute some info due to too many line search steps
1920
- if(num_linesearch >= max_num_linesearch)
1921
- {
1922
- for(int i=0; i<l; i++)
1923
- exp_wTx[i] = 0;
1924
-
1925
- for(int i=0; i<w_size; i++)
1926
- {
1927
- if(w[i]==0) continue;
1928
- x = prob_col->x[i];
1929
- sparse_operator::axpy(w[i], x, exp_wTx);
1930
- }
1931
-
1932
- for(int i=0; i<l; i++)
1933
- exp_wTx[i] = exp(exp_wTx[i]);
1934
- }
1935
-
1936
- if(iter == 1)
1937
- inner_eps *= 0.25;
1938
-
1939
- newton_iter++;
1940
- Gmax_old = Gmax_new;
1941
-
1942
- info("iter %3d #CD cycles %d\n", newton_iter, iter);
1943
- }
1944
-
1945
- info("=========================\n");
1946
- info("optimization finished, #iter = %d\n", newton_iter);
1947
- if(newton_iter >= max_newton_iter)
1948
- info("WARNING: reaching max number of iterations\n");
1949
-
1950
- // calculate objective value
1951
-
1952
- double v = 0;
1953
- int nnz = 0;
1954
- for(j=0; j<w_size; j++)
1955
- if(w[j] != 0)
1956
- {
1957
- v += fabs(w[j]);
1958
- nnz++;
1959
- }
1960
- for(j=0; j<l; j++)
1961
- if(y[j] == 1)
1962
- v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1963
- else
1964
- v += C[GETI(j)]*log(1+exp_wTx[j]);
1965
-
1966
- info("Objective value = %lf\n", v);
1967
- info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1968
-
1969
- delete [] index;
1970
- delete [] y;
1971
- delete [] Hdiag;
1972
- delete [] Grad;
1973
- delete [] wpd;
1974
- delete [] xjneg_sum;
1975
- delete [] xTd;
1976
- delete [] exp_wTx;
1977
- delete [] exp_wTx_new;
1978
- delete [] tau;
1979
- delete [] D;
1980
- }
1981
-
1982
- // transpose matrix X from row format to column format
1983
- static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
1984
- {
1985
- int i;
1986
- int l = prob->l;
1987
- int n = prob->n;
1988
- size_t nnz = 0;
1989
- size_t *col_ptr = new size_t [n+1];
1990
- feature_node *x_space;
1991
- prob_col->l = l;
1992
- prob_col->n = n;
1993
- prob_col->y = new double[l];
1994
- prob_col->x = new feature_node*[n];
1995
-
1996
- for(i=0; i<l; i++)
1997
- prob_col->y[i] = prob->y[i];
1998
-
1999
- for(i=0; i<n+1; i++)
2000
- col_ptr[i] = 0;
2001
- for(i=0; i<l; i++)
2002
- {
2003
- feature_node *x = prob->x[i];
2004
- while(x->index != -1)
2005
- {
2006
- nnz++;
2007
- col_ptr[x->index]++;
2008
- x++;
2009
- }
2010
- }
2011
- for(i=1; i<n+1; i++)
2012
- col_ptr[i] += col_ptr[i-1] + 1;
2013
-
2014
- x_space = new feature_node[nnz+n];
2015
- for(i=0; i<n; i++)
2016
- prob_col->x[i] = &x_space[col_ptr[i]];
2017
-
2018
- for(i=0; i<l; i++)
2019
- {
2020
- feature_node *x = prob->x[i];
2021
- while(x->index != -1)
2022
- {
2023
- int ind = x->index-1;
2024
- x_space[col_ptr[ind]].index = i+1; // starts from 1
2025
- x_space[col_ptr[ind]].value = x->value;
2026
- col_ptr[ind]++;
2027
- x++;
2028
- }
2029
- }
2030
- for(i=0; i<n; i++)
2031
- x_space[col_ptr[i]].index = -1;
2032
-
2033
- *x_space_ret = x_space;
2034
-
2035
- delete [] col_ptr;
2036
- }
2037
-
2038
- // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2039
- // perm, length l, must be allocated before calling this subroutine
2040
- static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2041
- {
2042
- int l = prob->l;
2043
- int max_nr_class = 16;
2044
- int nr_class = 0;
2045
- int *label = Malloc(int,max_nr_class);
2046
- int *count = Malloc(int,max_nr_class);
2047
- int *data_label = Malloc(int,l);
2048
- int i;
2049
-
2050
- for(i=0;i<l;i++)
2051
- {
2052
- int this_label = (int)prob->y[i];
2053
- int j;
2054
- for(j=0;j<nr_class;j++)
2055
- {
2056
- if(this_label == label[j])
2057
- {
2058
- ++count[j];
2059
- break;
2060
- }
2061
- }
2062
- data_label[i] = j;
2063
- if(j == nr_class)
2064
- {
2065
- if(nr_class == max_nr_class)
2066
- {
2067
- max_nr_class *= 2;
2068
- label = (int *)realloc(label,max_nr_class*sizeof(int));
2069
- count = (int *)realloc(count,max_nr_class*sizeof(int));
2070
- }
2071
- label[nr_class] = this_label;
2072
- count[nr_class] = 1;
2073
- ++nr_class;
2074
- }
2075
- }
2076
-
2077
- //
2078
- // Labels are ordered by their first occurrence in the training set.
2079
- // However, for two-class sets with -1/+1 labels and -1 appears first,
2080
- // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2081
- //
2082
- if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2083
- {
2084
- swap(label[0],label[1]);
2085
- swap(count[0],count[1]);
2086
- for(i=0;i<l;i++)
2087
- {
2088
- if(data_label[i] == 0)
2089
- data_label[i] = 1;
2090
- else
2091
- data_label[i] = 0;
2092
- }
2093
- }
2094
-
2095
- int *start = Malloc(int,nr_class);
2096
- start[0] = 0;
2097
- for(i=1;i<nr_class;i++)
2098
- start[i] = start[i-1]+count[i-1];
2099
- for(i=0;i<l;i++)
2100
- {
2101
- perm[start[data_label[i]]] = i;
2102
- ++start[data_label[i]];
2103
- }
2104
- start[0] = 0;
2105
- for(i=1;i<nr_class;i++)
2106
- start[i] = start[i-1]+count[i-1];
2107
-
2108
- *nr_class_ret = nr_class;
2109
- *label_ret = label;
2110
- *start_ret = start;
2111
- *count_ret = count;
2112
- free(data_label);
2113
- }
2114
-
2115
- static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2116
- {
2117
- //inner and outer tolerances for TRON
2118
- double eps = param->eps;
2119
- double eps_cg = 0.1;
2120
- if(param->init_sol != NULL)
2121
- eps_cg = 0.5;
2122
-
2123
- int pos = 0;
2124
- int neg = 0;
2125
- for(int i=0;i<prob->l;i++)
2126
- if(prob->y[i] > 0)
2127
- pos++;
2128
- neg = prob->l - pos;
2129
- double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2130
-
2131
- function *fun_obj=NULL;
2132
- switch(param->solver_type)
2133
- {
2134
- case L2R_LR:
2135
- {
2136
- double *C = new double[prob->l];
2137
- for(int i = 0; i < prob->l; i++)
2138
- {
2139
- if(prob->y[i] > 0)
2140
- C[i] = Cp;
2141
- else
2142
- C[i] = Cn;
2143
- }
2144
- fun_obj=new l2r_lr_fun(prob, C);
2145
- TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2146
- tron_obj.set_print_string(liblinear_print_string);
2147
- tron_obj.tron(w);
2148
- delete fun_obj;
2149
- delete[] C;
2150
- break;
2151
- }
2152
- case L2R_L2LOSS_SVC:
2153
- {
2154
- double *C = new double[prob->l];
2155
- for(int i = 0; i < prob->l; i++)
2156
- {
2157
- if(prob->y[i] > 0)
2158
- C[i] = Cp;
2159
- else
2160
- C[i] = Cn;
2161
- }
2162
- fun_obj=new l2r_l2_svc_fun(prob, C);
2163
- TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2164
- tron_obj.set_print_string(liblinear_print_string);
2165
- tron_obj.tron(w);
2166
- delete fun_obj;
2167
- delete[] C;
2168
- break;
2169
- }
2170
- case L2R_L2LOSS_SVC_DUAL:
2171
- solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2172
- break;
2173
- case L2R_L1LOSS_SVC_DUAL:
2174
- solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2175
- break;
2176
- case L1R_L2LOSS_SVC:
2177
- {
2178
- problem prob_col;
2179
- feature_node *x_space = NULL;
2180
- transpose(prob, &x_space ,&prob_col);
2181
- solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2182
- delete [] prob_col.y;
2183
- delete [] prob_col.x;
2184
- delete [] x_space;
2185
- break;
2186
- }
2187
- case L1R_LR:
2188
- {
2189
- problem prob_col;
2190
- feature_node *x_space = NULL;
2191
- transpose(prob, &x_space ,&prob_col);
2192
- solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2193
- delete [] prob_col.y;
2194
- delete [] prob_col.x;
2195
- delete [] x_space;
2196
- break;
2197
- }
2198
- case L2R_LR_DUAL:
2199
- solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2200
- break;
2201
- case L2R_L2LOSS_SVR:
2202
- {
2203
- double *C = new double[prob->l];
2204
- for(int i = 0; i < prob->l; i++)
2205
- C[i] = param->C;
2206
-
2207
- fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2208
- TRON tron_obj(fun_obj, param->eps);
2209
- tron_obj.set_print_string(liblinear_print_string);
2210
- tron_obj.tron(w);
2211
- delete fun_obj;
2212
- delete[] C;
2213
- break;
2214
-
2215
- }
2216
- case L2R_L1LOSS_SVR_DUAL:
2217
- solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2218
- break;
2219
- case L2R_L2LOSS_SVR_DUAL:
2220
- solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2221
- break;
2222
- default:
2223
- fprintf(stderr, "ERROR: unknown solver_type\n");
2224
- break;
2225
- }
2226
- }
2227
-
2228
- // Calculate the initial C for parameter selection
2229
- static double calc_start_C(const problem *prob, const parameter *param)
2230
- {
2231
- int i;
2232
- double xTx,max_xTx;
2233
- max_xTx = 0;
2234
- for(i=0; i<prob->l; i++)
2235
- {
2236
- xTx = 0;
2237
- feature_node *xi=prob->x[i];
2238
- while(xi->index != -1)
2239
- {
2240
- double val = xi->value;
2241
- xTx += val*val;
2242
- xi++;
2243
- }
2244
- if(xTx > max_xTx)
2245
- max_xTx = xTx;
2246
- }
2247
-
2248
- double min_C = 1.0;
2249
- if(param->solver_type == L2R_LR)
2250
- min_C = 1.0 / (prob->l * max_xTx);
2251
- else if(param->solver_type == L2R_L2LOSS_SVC)
2252
- min_C = 1.0 / (2 * prob->l * max_xTx);
2253
-
2254
- return pow( 2, floor(log(min_C) / log(2.0)) );
2255
- }
2256
-
2257
-
2258
- //
2259
- // Interface functions
2260
- //
2261
- model* train(const problem *prob, const parameter *param)
2262
- {
2263
- int i,j;
2264
- int l = prob->l;
2265
- int n = prob->n;
2266
- int w_size = prob->n;
2267
- model *model_ = Malloc(model,1);
2268
-
2269
- if(prob->bias>=0)
2270
- model_->nr_feature=n-1;
2271
- else
2272
- model_->nr_feature=n;
2273
- model_->param = *param;
2274
- model_->bias = prob->bias;
2275
-
2276
- if(check_regression_model(model_))
2277
- {
2278
- model_->w = Malloc(double, w_size);
2279
- for(i=0; i<w_size; i++)
2280
- model_->w[i] = 0;
2281
- model_->nr_class = 2;
2282
- model_->label = NULL;
2283
- train_one(prob, param, model_->w, 0, 0);
2284
- }
2285
- else
2286
- {
2287
- int nr_class;
2288
- int *label = NULL;
2289
- int *start = NULL;
2290
- int *count = NULL;
2291
- int *perm = Malloc(int,l);
2292
-
2293
- // group training data of the same class
2294
- group_classes(prob,&nr_class,&label,&start,&count,perm);
2295
-
2296
- model_->nr_class=nr_class;
2297
- model_->label = Malloc(int,nr_class);
2298
- for(i=0;i<nr_class;i++)
2299
- model_->label[i] = label[i];
2300
-
2301
- // calculate weighted C
2302
- double *weighted_C = Malloc(double, nr_class);
2303
- for(i=0;i<nr_class;i++)
2304
- weighted_C[i] = param->C;
2305
- for(i=0;i<param->nr_weight;i++)
2306
- {
2307
- for(j=0;j<nr_class;j++)
2308
- if(param->weight_label[i] == label[j])
2309
- break;
2310
- if(j == nr_class)
2311
- fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2312
- else
2313
- weighted_C[j] *= param->weight[i];
2314
- }
2315
-
2316
- // constructing the subproblem
2317
- feature_node **x = Malloc(feature_node *,l);
2318
- for(i=0;i<l;i++)
2319
- x[i] = prob->x[perm[i]];
2320
-
2321
- int k;
2322
- problem sub_prob;
2323
- sub_prob.l = l;
2324
- sub_prob.n = n;
2325
- sub_prob.x = Malloc(feature_node *,sub_prob.l);
2326
- sub_prob.y = Malloc(double,sub_prob.l);
2327
-
2328
- for(k=0; k<sub_prob.l; k++)
2329
- sub_prob.x[k] = x[k];
2330
-
2331
- // multi-class svm by Crammer and Singer
2332
- if(param->solver_type == MCSVM_CS)
2333
- {
2334
- model_->w=Malloc(double, n*nr_class);
2335
- for(i=0;i<nr_class;i++)
2336
- for(j=start[i];j<start[i]+count[i];j++)
2337
- sub_prob.y[j] = i;
2338
- Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2339
- Solver.Solve(model_->w);
2340
- }
2341
- else
2342
- {
2343
- if(nr_class == 2)
2344
- {
2345
- model_->w=Malloc(double, w_size);
2346
-
2347
- int e0 = start[0]+count[0];
2348
- k=0;
2349
- for(; k<e0; k++)
2350
- sub_prob.y[k] = +1;
2351
- for(; k<sub_prob.l; k++)
2352
- sub_prob.y[k] = -1;
2353
-
2354
- if(param->init_sol != NULL)
2355
- for(i=0;i<w_size;i++)
2356
- model_->w[i] = param->init_sol[i];
2357
- else
2358
- for(i=0;i<w_size;i++)
2359
- model_->w[i] = 0;
2360
-
2361
- train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
2362
- }
2363
- else
2364
- {
2365
- model_->w=Malloc(double, w_size*nr_class);
2366
- double *w=Malloc(double, w_size);
2367
- for(i=0;i<nr_class;i++)
2368
- {
2369
- int si = start[i];
2370
- int ei = si+count[i];
2371
-
2372
- k=0;
2373
- for(; k<si; k++)
2374
- sub_prob.y[k] = -1;
2375
- for(; k<ei; k++)
2376
- sub_prob.y[k] = +1;
2377
- for(; k<sub_prob.l; k++)
2378
- sub_prob.y[k] = -1;
2379
-
2380
- if(param->init_sol != NULL)
2381
- for(j=0;j<w_size;j++)
2382
- w[j] = param->init_sol[j*nr_class+i];
2383
- else
2384
- for(j=0;j<w_size;j++)
2385
- w[j] = 0;
2386
-
2387
- train_one(&sub_prob, param, w, weighted_C[i], param->C);
2388
-
2389
- for(int j=0;j<w_size;j++)
2390
- model_->w[j*nr_class+i] = w[j];
2391
- }
2392
- free(w);
2393
- }
2394
-
2395
- }
2396
-
2397
- free(x);
2398
- free(label);
2399
- free(start);
2400
- free(count);
2401
- free(perm);
2402
- free(sub_prob.x);
2403
- free(sub_prob.y);
2404
- free(weighted_C);
2405
- }
2406
- return model_;
2407
- }
2408
-
2409
- void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
2410
- {
2411
- int i;
2412
- int *fold_start;
2413
- int l = prob->l;
2414
- int *perm = Malloc(int,l);
2415
- if (nr_fold > l)
2416
- {
2417
- nr_fold = l;
2418
- fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2419
- }
2420
- fold_start = Malloc(int,nr_fold+1);
2421
- for(i=0;i<l;i++) perm[i]=i;
2422
- for(i=0;i<l;i++)
2423
- {
2424
- int j = i+rand()%(l-i);
2425
- swap(perm[i],perm[j]);
2426
- }
2427
- for(i=0;i<=nr_fold;i++)
2428
- fold_start[i]=i*l/nr_fold;
2429
-
2430
- for(i=0;i<nr_fold;i++)
2431
- {
2432
- int begin = fold_start[i];
2433
- int end = fold_start[i+1];
2434
- int j,k;
2435
- struct problem subprob;
2436
-
2437
- subprob.bias = prob->bias;
2438
- subprob.n = prob->n;
2439
- subprob.l = l-(end-begin);
2440
- subprob.x = Malloc(struct feature_node*,subprob.l);
2441
- subprob.y = Malloc(double,subprob.l);
2442
-
2443
- k=0;
2444
- for(j=0;j<begin;j++)
2445
- {
2446
- subprob.x[k] = prob->x[perm[j]];
2447
- subprob.y[k] = prob->y[perm[j]];
2448
- ++k;
2449
- }
2450
- for(j=end;j<l;j++)
2451
- {
2452
- subprob.x[k] = prob->x[perm[j]];
2453
- subprob.y[k] = prob->y[perm[j]];
2454
- ++k;
2455
- }
2456
- struct model *submodel = train(&subprob,param);
2457
- for(j=begin;j<end;j++)
2458
- target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2459
- free_and_destroy_model(&submodel);
2460
- free(subprob.x);
2461
- free(subprob.y);
2462
- }
2463
- free(fold_start);
2464
- free(perm);
2465
- }
2466
-
2467
- void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
2468
- {
2469
- // variables for CV
2470
- int i;
2471
- int *fold_start;
2472
- int l = prob->l;
2473
- int *perm = Malloc(int, l);
2474
- double *target = Malloc(double, prob->l);
2475
- struct problem *subprob = Malloc(problem,nr_fold);
2476
-
2477
- // variables for warm start
2478
- double ratio = 2;
2479
- double **prev_w = Malloc(double*, nr_fold);
2480
- for(i = 0; i < nr_fold; i++)
2481
- prev_w[i] = NULL;
2482
- int num_unchanged_w = 0;
2483
- struct parameter param1 = *param;
2484
- void (*default_print_string) (const char *) = liblinear_print_string;
2485
-
2486
- if (nr_fold > l)
2487
- {
2488
- nr_fold = l;
2489
- fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2490
- }
2491
- fold_start = Malloc(int,nr_fold+1);
2492
- for(i=0;i<l;i++) perm[i]=i;
2493
- for(i=0;i<l;i++)
2494
- {
2495
- int j = i+rand()%(l-i);
2496
- swap(perm[i],perm[j]);
2497
- }
2498
- for(i=0;i<=nr_fold;i++)
2499
- fold_start[i]=i*l/nr_fold;
2500
-
2501
- for(i=0;i<nr_fold;i++)
2502
- {
2503
- int begin = fold_start[i];
2504
- int end = fold_start[i+1];
2505
- int j,k;
2506
-
2507
- subprob[i].bias = prob->bias;
2508
- subprob[i].n = prob->n;
2509
- subprob[i].l = l-(end-begin);
2510
- subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
2511
- subprob[i].y = Malloc(double,subprob[i].l);
2512
-
2513
- k=0;
2514
- for(j=0;j<begin;j++)
2515
- {
2516
- subprob[i].x[k] = prob->x[perm[j]];
2517
- subprob[i].y[k] = prob->y[perm[j]];
2518
- ++k;
2519
- }
2520
- for(j=end;j<l;j++)
2521
- {
2522
- subprob[i].x[k] = prob->x[perm[j]];
2523
- subprob[i].y[k] = prob->y[perm[j]];
2524
- ++k;
2525
- }
2526
-
2527
- }
2528
-
2529
- *best_rate = 0;
2530
- if(start_C <= 0)
2531
- start_C = calc_start_C(prob,param);
2532
- param1.C = start_C;
2533
-
2534
- while(param1.C <= max_C)
2535
- {
2536
- //Output disabled for running CV at a particular C
2537
- set_print_string_function(&print_null);
2538
-
2539
- for(i=0; i<nr_fold; i++)
2540
- {
2541
- int j;
2542
- int begin = fold_start[i];
2543
- int end = fold_start[i+1];
2544
-
2545
- param1.init_sol = prev_w[i];
2546
- struct model *submodel = train(&subprob[i],&param1);
2547
-
2548
- int total_w_size;
2549
- if(submodel->nr_class == 2)
2550
- total_w_size = subprob[i].n;
2551
- else
2552
- total_w_size = subprob[i].n * submodel->nr_class;
2553
-
2554
- if(prev_w[i] == NULL)
2555
- {
2556
- prev_w[i] = Malloc(double, total_w_size);
2557
- for(j=0; j<total_w_size; j++)
2558
- prev_w[i][j] = submodel->w[j];
2559
- }
2560
- else if(num_unchanged_w >= 0)
2561
- {
2562
- double norm_w_diff = 0;
2563
- for(j=0; j<total_w_size; j++)
2564
- {
2565
- norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2566
- prev_w[i][j] = submodel->w[j];
2567
- }
2568
- norm_w_diff = sqrt(norm_w_diff);
2569
-
2570
- if(norm_w_diff > 1e-15)
2571
- num_unchanged_w = -1;
2572
- }
2573
- else
2574
- {
2575
- for(j=0; j<total_w_size; j++)
2576
- prev_w[i][j] = submodel->w[j];
2577
- }
2578
-
2579
- for(j=begin; j<end; j++)
2580
- target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2581
-
2582
- free_and_destroy_model(&submodel);
2583
- }
2584
- set_print_string_function(default_print_string);
2585
-
2586
- int total_correct = 0;
2587
- for(i=0; i<prob->l; i++)
2588
- if(target[i] == prob->y[i])
2589
- ++total_correct;
2590
- double current_rate = (double)total_correct/prob->l;
2591
- if(current_rate > *best_rate)
2592
- {
2593
- *best_C = param1.C;
2594
- *best_rate = current_rate;
2595
- }
2596
-
2597
- info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
2598
- num_unchanged_w++;
2599
- if(num_unchanged_w == 3)
2600
- break;
2601
- param1.C = param1.C*ratio;
2602
- }
2603
-
2604
- if(param1.C > max_C && max_C > start_C)
2605
- info("warning: maximum C reached.\n");
2606
- free(fold_start);
2607
- free(perm);
2608
- free(target);
2609
- for(i=0; i<nr_fold; i++)
2610
- {
2611
- free(subprob[i].x);
2612
- free(subprob[i].y);
2613
- free(prev_w[i]);
2614
- }
2615
- free(prev_w);
2616
- free(subprob);
2617
- }
2618
-
2619
- double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2620
- {
2621
- int idx;
2622
- int n;
2623
- if(model_->bias>=0)
2624
- n=model_->nr_feature+1;
2625
- else
2626
- n=model_->nr_feature;
2627
- double *w=model_->w;
2628
- int nr_class=model_->nr_class;
2629
- int i;
2630
- int nr_w;
2631
- if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2632
- nr_w = 1;
2633
- else
2634
- nr_w = nr_class;
2635
-
2636
- const feature_node *lx=x;
2637
- for(i=0;i<nr_w;i++)
2638
- dec_values[i] = 0;
2639
- for(; (idx=lx->index)!=-1; lx++)
2640
- {
2641
- // the dimension of testing data may exceed that of training
2642
- if(idx<=n)
2643
- for(i=0;i<nr_w;i++)
2644
- dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2645
- }
2646
-
2647
- if(nr_class==2)
2648
- {
2649
- if(check_regression_model(model_))
2650
- return dec_values[0];
2651
- else
2652
- return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2653
- }
2654
- else
2655
- {
2656
- int dec_max_idx = 0;
2657
- for(i=1;i<nr_class;i++)
2658
- {
2659
- if(dec_values[i] > dec_values[dec_max_idx])
2660
- dec_max_idx = i;
2661
- }
2662
- return model_->label[dec_max_idx];
2663
- }
2664
- }
2665
-
2666
- double predict(const model *model_, const feature_node *x)
2667
- {
2668
- double *dec_values = Malloc(double, model_->nr_class);
2669
- double label=predict_values(model_, x, dec_values);
2670
- free(dec_values);
2671
- return label;
2672
- }
2673
-
2674
- double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2675
- {
2676
- if(check_probability_model(model_))
2677
- {
2678
- int i;
2679
- int nr_class=model_->nr_class;
2680
- int nr_w;
2681
- if(nr_class==2)
2682
- nr_w = 1;
2683
- else
2684
- nr_w = nr_class;
2685
-
2686
- double label=predict_values(model_, x, prob_estimates);
2687
- for(i=0;i<nr_w;i++)
2688
- prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2689
-
2690
- if(nr_class==2) // for binary classification
2691
- prob_estimates[1]=1.-prob_estimates[0];
2692
- else
2693
- {
2694
- double sum=0;
2695
- for(i=0; i<nr_class; i++)
2696
- sum+=prob_estimates[i];
2697
-
2698
- for(i=0; i<nr_class; i++)
2699
- prob_estimates[i]=prob_estimates[i]/sum;
2700
- }
2701
-
2702
- return label;
2703
- }
2704
- else
2705
- return 0;
2706
- }
2707
-
2708
- static const char *solver_type_table[]=
2709
- {
2710
- "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2711
- "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2712
- "", "", "",
2713
- "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
2714
- };
2715
-
2716
- int save_model(const char *model_file_name, const struct model *model_)
2717
- {
2718
- int i;
2719
- int nr_feature=model_->nr_feature;
2720
- int n;
2721
- const parameter& param = model_->param;
2722
-
2723
- if(model_->bias>=0)
2724
- n=nr_feature+1;
2725
- else
2726
- n=nr_feature;
2727
- int w_size = n;
2728
- FILE *fp = fopen(model_file_name,"w");
2729
- if(fp==NULL) return -1;
2730
-
2731
- char *old_locale = setlocale(LC_ALL, NULL);
2732
- if (old_locale)
2733
- {
2734
- old_locale = strdup(old_locale);
2735
- }
2736
- setlocale(LC_ALL, "C");
2737
-
2738
- int nr_w;
2739
- if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2740
- nr_w=1;
2741
- else
2742
- nr_w=model_->nr_class;
2743
-
2744
- fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2745
- fprintf(fp, "nr_class %d\n", model_->nr_class);
2746
-
2747
- if(model_->label)
2748
- {
2749
- fprintf(fp, "label");
2750
- for(i=0; i<model_->nr_class; i++)
2751
- fprintf(fp, " %d", model_->label[i]);
2752
- fprintf(fp, "\n");
2753
- }
2754
-
2755
- fprintf(fp, "nr_feature %d\n", nr_feature);
2756
-
2757
- fprintf(fp, "bias %.16g\n", model_->bias);
2758
-
2759
- fprintf(fp, "w\n");
2760
- for(i=0; i<w_size; i++)
2761
- {
2762
- int j;
2763
- for(j=0; j<nr_w; j++)
2764
- fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2765
- fprintf(fp, "\n");
2766
- }
2767
-
2768
- setlocale(LC_ALL, old_locale);
2769
- free(old_locale);
2770
-
2771
- if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2772
- else return 0;
2773
- }
2774
-
2775
- //
2776
- // FSCANF helps to handle fscanf failures.
2777
- // Its do-while block avoids the ambiguity when
2778
- // if (...)
2779
- // FSCANF();
2780
- // is used
2781
- //
2782
- #define FSCANF(_stream, _format, _var)do\
2783
- {\
2784
- if (fscanf(_stream, _format, _var) != 1)\
2785
- {\
2786
- fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
2787
- EXIT_LOAD_MODEL()\
2788
- }\
2789
- }while(0)
2790
- // EXIT_LOAD_MODEL should NOT end with a semicolon.
2791
- #define EXIT_LOAD_MODEL()\
2792
- {\
2793
- setlocale(LC_ALL, old_locale);\
2794
- free(model_->label);\
2795
- free(model_);\
2796
- free(old_locale);\
2797
- return NULL;\
2798
- }
2799
- struct model *load_model(const char *model_file_name)
2800
- {
2801
- FILE *fp = fopen(model_file_name,"r");
2802
- if(fp==NULL) return NULL;
2803
-
2804
- int i;
2805
- int nr_feature;
2806
- int n;
2807
- int nr_class;
2808
- double bias;
2809
- model *model_ = Malloc(model,1);
2810
- parameter& param = model_->param;
2811
-
2812
- model_->label = NULL;
2813
-
2814
- char *old_locale = setlocale(LC_ALL, NULL);
2815
- if (old_locale)
2816
- {
2817
- old_locale = strdup(old_locale);
2818
- }
2819
- setlocale(LC_ALL, "C");
2820
-
2821
- char cmd[81];
2822
- while(1)
2823
- {
2824
- FSCANF(fp,"%80s",cmd);
2825
- if(strcmp(cmd,"solver_type")==0)
2826
- {
2827
- FSCANF(fp,"%80s",cmd);
2828
- int i;
2829
- for(i=0;solver_type_table[i];i++)
2830
- {
2831
- if(strcmp(solver_type_table[i],cmd)==0)
2832
- {
2833
- param.solver_type=i;
2834
- break;
2835
- }
2836
- }
2837
- if(solver_type_table[i] == NULL)
2838
- {
2839
- fprintf(stderr,"unknown solver type.\n");
2840
- EXIT_LOAD_MODEL()
2841
- }
2842
- }
2843
- else if(strcmp(cmd,"nr_class")==0)
2844
- {
2845
- FSCANF(fp,"%d",&nr_class);
2846
- model_->nr_class=nr_class;
2847
- }
2848
- else if(strcmp(cmd,"nr_feature")==0)
2849
- {
2850
- FSCANF(fp,"%d",&nr_feature);
2851
- model_->nr_feature=nr_feature;
2852
- }
2853
- else if(strcmp(cmd,"bias")==0)
2854
- {
2855
- FSCANF(fp,"%lf",&bias);
2856
- model_->bias=bias;
2857
- }
2858
- else if(strcmp(cmd,"w")==0)
2859
- {
2860
- break;
2861
- }
2862
- else if(strcmp(cmd,"label")==0)
2863
- {
2864
- int nr_class = model_->nr_class;
2865
- model_->label = Malloc(int,nr_class);
2866
- for(int i=0;i<nr_class;i++)
2867
- FSCANF(fp,"%d",&model_->label[i]);
2868
- }
2869
- else
2870
- {
2871
- fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2872
- EXIT_LOAD_MODEL()
2873
- }
2874
- }
2875
-
2876
- nr_feature=model_->nr_feature;
2877
- if(model_->bias>=0)
2878
- n=nr_feature+1;
2879
- else
2880
- n=nr_feature;
2881
- int w_size = n;
2882
- int nr_w;
2883
- if(nr_class==2 && param.solver_type != MCSVM_CS)
2884
- nr_w = 1;
2885
- else
2886
- nr_w = nr_class;
2887
-
2888
- model_->w=Malloc(double, w_size*nr_w);
2889
- for(i=0; i<w_size; i++)
2890
- {
2891
- int j;
2892
- for(j=0; j<nr_w; j++)
2893
- FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
2894
- if (fscanf(fp, "\n") !=0)
2895
- {
2896
- fprintf(stderr, "ERROR: fscanf failed to read the model\n");
2897
- EXIT_LOAD_MODEL()
2898
- }
2899
- }
2900
-
2901
- setlocale(LC_ALL, old_locale);
2902
- free(old_locale);
2903
-
2904
- if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2905
-
2906
- return model_;
2907
- }
2908
-
2909
- int get_nr_feature(const model *model_)
2910
- {
2911
- return model_->nr_feature;
2912
- }
2913
-
2914
- int get_nr_class(const model *model_)
2915
- {
2916
- return model_->nr_class;
2917
- }
2918
-
2919
- void get_labels(const model *model_, int* label)
2920
- {
2921
- if (model_->label != NULL)
2922
- for(int i=0;i<model_->nr_class;i++)
2923
- label[i] = model_->label[i];
2924
- }
2925
-
2926
- // use inline here for better performance (around 20% faster than the non-inline one)
2927
- static inline double get_w_value(const struct model *model_, int idx, int label_idx)
2928
- {
2929
- int nr_class = model_->nr_class;
2930
- int solver_type = model_->param.solver_type;
2931
- const double *w = model_->w;
2932
-
2933
- if(idx < 0 || idx > model_->nr_feature)
2934
- return 0;
2935
- if(check_regression_model(model_))
2936
- return w[idx];
2937
- else
2938
- {
2939
- if(label_idx < 0 || label_idx >= nr_class)
2940
- return 0;
2941
- if(nr_class == 2 && solver_type != MCSVM_CS)
2942
- {
2943
- if(label_idx == 0)
2944
- return w[idx];
2945
- else
2946
- return -w[idx];
2947
- }
2948
- else
2949
- return w[idx*nr_class+label_idx];
2950
- }
2951
- }
2952
-
2953
- // feat_idx: starting from 1 to nr_feature
2954
- // label_idx: starting from 0 to nr_class-1 for classification models;
2955
- // for regression models, label_idx is ignored.
2956
- double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
2957
- {
2958
- if(feat_idx > model_->nr_feature)
2959
- return 0;
2960
- return get_w_value(model_, feat_idx-1, label_idx);
2961
- }
2962
-
2963
- double get_decfun_bias(const struct model *model_, int label_idx)
2964
- {
2965
- int bias_idx = model_->nr_feature;
2966
- double bias = model_->bias;
2967
- if(bias <= 0)
2968
- return 0;
2969
- else
2970
- return bias*get_w_value(model_, bias_idx, label_idx);
2971
- }
2972
-
2973
- void free_model_content(struct model *model_ptr)
2974
- {
2975
- if(model_ptr->w != NULL)
2976
- free(model_ptr->w);
2977
- if(model_ptr->label != NULL)
2978
- free(model_ptr->label);
2979
- }
2980
-
2981
- void free_and_destroy_model(struct model **model_ptr_ptr)
2982
- {
2983
- struct model *model_ptr = *model_ptr_ptr;
2984
- if(model_ptr != NULL)
2985
- {
2986
- free_model_content(model_ptr);
2987
- free(model_ptr);
2988
- }
2989
- }
2990
-
2991
- void destroy_param(parameter* param)
2992
- {
2993
- if(param->weight_label != NULL)
2994
- free(param->weight_label);
2995
- if(param->weight != NULL)
2996
- free(param->weight);
2997
- if(param->init_sol != NULL)
2998
- free(param->init_sol);
2999
- }
3000
-
3001
- const char *check_parameter(const problem *prob, const parameter *param)
3002
- {
3003
- if(param->eps <= 0)
3004
- return "eps <= 0";
3005
-
3006
- if(param->C <= 0)
3007
- return "C <= 0";
3008
-
3009
- if(param->p < 0)
3010
- return "p < 0";
3011
-
3012
- if(param->solver_type != L2R_LR
3013
- && param->solver_type != L2R_L2LOSS_SVC_DUAL
3014
- && param->solver_type != L2R_L2LOSS_SVC
3015
- && param->solver_type != L2R_L1LOSS_SVC_DUAL
3016
- && param->solver_type != MCSVM_CS
3017
- && param->solver_type != L1R_L2LOSS_SVC
3018
- && param->solver_type != L1R_LR
3019
- && param->solver_type != L2R_LR_DUAL
3020
- && param->solver_type != L2R_L2LOSS_SVR
3021
- && param->solver_type != L2R_L2LOSS_SVR_DUAL
3022
- && param->solver_type != L2R_L1LOSS_SVR_DUAL)
3023
- return "unknown solver type";
3024
-
3025
- if(param->init_sol != NULL
3026
- && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
3027
- return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
3028
-
3029
- return NULL;
3030
- }
3031
-
3032
- int check_probability_model(const struct model *model_)
3033
- {
3034
- return (model_->param.solver_type==L2R_LR ||
3035
- model_->param.solver_type==L2R_LR_DUAL ||
3036
- model_->param.solver_type==L1R_LR);
3037
- }
3038
-
3039
- int check_regression_model(const struct model *model_)
3040
- {
3041
- return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3042
- model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3043
- model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3044
- }
3045
-
3046
- void set_print_string_function(void (*print_func)(const char*))
3047
- {
3048
- if (print_func == NULL)
3049
- liblinear_print_string = &print_string_stdout;
3050
- else
3051
- liblinear_print_string = print_func;
3052
- }
3053
-