numo-liblinear 0.2.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ #include "blas.h"
2
+
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
8
+ int *incy)
9
+ {
10
+ long int i, m, ix, iy, nn, iincx, iincy;
11
+ register double ssa;
12
+
13
+ /* constant times a vector plus a vector.
14
+ uses unrolled loop for increments equal to one.
15
+ jack dongarra, linpack, 3/11/78.
16
+ modified 12/3/93, array(1) declarations changed to array(*) */
17
+
18
+ /* Dereference inputs */
19
+ nn = *n;
20
+ ssa = *sa;
21
+ iincx = *incx;
22
+ iincy = *incy;
23
+
24
+ if( nn > 0 && ssa != 0.0 )
25
+ {
26
+ if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
27
+ {
28
+ m = nn-3;
29
+ for (i = 0; i < m; i += 4)
30
+ {
31
+ sy[i] += ssa * sx[i];
32
+ sy[i+1] += ssa * sx[i+1];
33
+ sy[i+2] += ssa * sx[i+2];
34
+ sy[i+3] += ssa * sx[i+3];
35
+ }
36
+ for ( ; i < nn; ++i) /* clean-up loop */
37
+ sy[i] += ssa * sx[i];
38
+ }
39
+ else /* code for unequal increments or equal increments not equal to 1 */
40
+ {
41
+ ix = iincx >= 0 ? 0 : (1 - nn) * iincx;
42
+ iy = iincy >= 0 ? 0 : (1 - nn) * iincy;
43
+ for (i = 0; i < nn; i++)
44
+ {
45
+ sy[iy] += ssa * sx[ix];
46
+ ix += iincx;
47
+ iy += iincy;
48
+ }
49
+ }
50
+ }
51
+
52
+ return 0;
53
+ } /* daxpy_ */
54
+
55
+ #ifdef __cplusplus
56
+ }
57
+ #endif
@@ -0,0 +1,58 @@
1
+ #include "blas.h"
2
+
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
8
+ {
9
+ long int i, m, nn, iincx, iincy;
10
+ double stemp;
11
+ long int ix, iy;
12
+
13
+ /* forms the dot product of two vectors.
14
+ uses unrolled loops for increments equal to one.
15
+ jack dongarra, linpack, 3/11/78.
16
+ modified 12/3/93, array(1) declarations changed to array(*) */
17
+
18
+ /* Dereference inputs */
19
+ nn = *n;
20
+ iincx = *incx;
21
+ iincy = *incy;
22
+
23
+ stemp = 0.0;
24
+ if (nn > 0)
25
+ {
26
+ if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
27
+ {
28
+ m = nn-4;
29
+ for (i = 0; i < m; i += 5)
30
+ stemp += sx[i] * sy[i] + sx[i+1] * sy[i+1] + sx[i+2] * sy[i+2] +
31
+ sx[i+3] * sy[i+3] + sx[i+4] * sy[i+4];
32
+
33
+ for ( ; i < nn; i++) /* clean-up loop */
34
+ stemp += sx[i] * sy[i];
35
+ }
36
+ else /* code for unequal increments or equal increments not equal to 1 */
37
+ {
38
+ ix = 0;
39
+ iy = 0;
40
+ if (iincx < 0)
41
+ ix = (1 - nn) * iincx;
42
+ if (iincy < 0)
43
+ iy = (1 - nn) * iincy;
44
+ for (i = 0; i < nn; i++)
45
+ {
46
+ stemp += sx[ix] * sy[iy];
47
+ ix += iincx;
48
+ iy += iincy;
49
+ }
50
+ }
51
+ }
52
+
53
+ return stemp;
54
+ } /* ddot_ */
55
+
56
+ #ifdef __cplusplus
57
+ }
58
+ #endif
@@ -0,0 +1,70 @@
1
+ #include <math.h> /* Needed for fabs() and sqrt() */
2
+ #include "blas.h"
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ double dnrm2_(int *n, double *x, int *incx)
9
+ {
10
+ long int ix, nn, iincx;
11
+ double norm, scale, absxi, ssq, temp;
12
+
13
+ /* DNRM2 returns the euclidean norm of a vector via the function
14
+ name, so that
15
+
16
+ DNRM2 := sqrt( x'*x )
17
+
18
+ -- This version written on 25-October-1982.
19
+ Modified on 14-October-1993 to inline the call to SLASSQ.
20
+ Sven Hammarling, Nag Ltd. */
21
+
22
+ /* Dereference inputs */
23
+ nn = *n;
24
+ iincx = *incx;
25
+
26
+ if( nn > 0 && iincx > 0 )
27
+ {
28
+ if (nn == 1)
29
+ {
30
+ norm = fabs(x[0]);
31
+ }
32
+ else
33
+ {
34
+ scale = 0.0;
35
+ ssq = 1.0;
36
+
37
+ /* The following loop is equivalent to this call to the LAPACK
38
+ auxiliary routine: CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
39
+
40
+ for (ix=(nn-1)*iincx; ix>=0; ix-=iincx)
41
+ {
42
+ if (x[ix] != 0.0)
43
+ {
44
+ absxi = fabs(x[ix]);
45
+ if (scale < absxi)
46
+ {
47
+ temp = scale / absxi;
48
+ ssq = ssq * (temp * temp) + 1.0;
49
+ scale = absxi;
50
+ }
51
+ else
52
+ {
53
+ temp = absxi / scale;
54
+ ssq += temp * temp;
55
+ }
56
+ }
57
+ }
58
+ norm = scale * sqrt(ssq);
59
+ }
60
+ }
61
+ else
62
+ norm = 0.0;
63
+
64
+ return norm;
65
+
66
+ } /* dnrm2_ */
67
+
68
+ #ifdef __cplusplus
69
+ }
70
+ #endif
@@ -0,0 +1,52 @@
1
+ #include "blas.h"
2
+
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ int dscal_(int *n, double *sa, double *sx, int *incx)
8
+ {
9
+ long int i, m, nincx, nn, iincx;
10
+ double ssa;
11
+
12
+ /* scales a vector by a constant.
13
+ uses unrolled loops for increment equal to 1.
14
+ jack dongarra, linpack, 3/11/78.
15
+ modified 3/93 to return if incx .le. 0.
16
+ modified 12/3/93, array(1) declarations changed to array(*) */
17
+
18
+ /* Dereference inputs */
19
+ nn = *n;
20
+ iincx = *incx;
21
+ ssa = *sa;
22
+
23
+ if (nn > 0 && iincx > 0)
24
+ {
25
+ if (iincx == 1) /* code for increment equal to 1 */
26
+ {
27
+ m = nn-4;
28
+ for (i = 0; i < m; i += 5)
29
+ {
30
+ sx[i] = ssa * sx[i];
31
+ sx[i+1] = ssa * sx[i+1];
32
+ sx[i+2] = ssa * sx[i+2];
33
+ sx[i+3] = ssa * sx[i+3];
34
+ sx[i+4] = ssa * sx[i+4];
35
+ }
36
+ for ( ; i < nn; ++i) /* clean-up loop */
37
+ sx[i] = ssa * sx[i];
38
+ }
39
+ else /* code for increment not equal to 1 */
40
+ {
41
+ nincx = nn * iincx;
42
+ for (i = 0; i < nincx; i += iincx)
43
+ sx[i] = ssa * sx[i];
44
+ }
45
+ }
46
+
47
+ return 0;
48
+ } /* dscal_ */
49
+
50
+ #ifdef __cplusplus
51
+ }
52
+ #endif
@@ -0,0 +1,3725 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <stdarg.h>
6
+ #include <locale.h>
7
+ #include "linear.h"
8
+ #include "newton.h"
9
+ int liblinear_version = LIBLINEAR_VERSION;
10
+ typedef signed char schar;
11
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
12
+ #ifndef min
13
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
14
+ #endif
15
+ #ifndef max
16
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
17
+ #endif
18
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
19
+ {
20
+ dst = new T[n];
21
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
22
+ }
23
+ #define INF HUGE_VAL
24
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
25
+
26
+ static void print_string_stdout(const char *s)
27
+ {
28
+ fputs(s,stdout);
29
+ fflush(stdout);
30
+ }
31
+ static void print_null(const char *s) {}
32
+
33
+ static void (*liblinear_print_string) (const char *) = &print_string_stdout;
34
+
35
+ #if 1
36
+ static void info(const char *fmt,...)
37
+ {
38
+ char buf[BUFSIZ];
39
+ va_list ap;
40
+ va_start(ap,fmt);
41
+ vsprintf(buf,fmt,ap);
42
+ va_end(ap);
43
+ (*liblinear_print_string)(buf);
44
+ }
45
+ #else
46
+ static void info(const char *fmt,...) {}
47
+ #endif
48
+ class sparse_operator
49
+ {
50
+ public:
51
+ static double nrm2_sq(const feature_node *x)
52
+ {
53
+ double ret = 0;
54
+ while(x->index != -1)
55
+ {
56
+ ret += x->value*x->value;
57
+ x++;
58
+ }
59
+ return (ret);
60
+ }
61
+
62
+ static double dot(const double *s, const feature_node *x)
63
+ {
64
+ double ret = 0;
65
+ while(x->index != -1)
66
+ {
67
+ ret += s[x->index-1]*x->value;
68
+ x++;
69
+ }
70
+ return (ret);
71
+ }
72
+
73
+ static double sparse_dot(const feature_node *x1, const feature_node *x2)
74
+ {
75
+ double ret = 0;
76
+ while(x1->index != -1 && x2->index != -1)
77
+ {
78
+ if(x1->index == x2->index)
79
+ {
80
+ ret += x1->value * x2->value;
81
+ ++x1;
82
+ ++x2;
83
+ }
84
+ else
85
+ {
86
+ if(x1->index > x2->index)
87
+ ++x2;
88
+ else
89
+ ++x1;
90
+ }
91
+ }
92
+ return (ret);
93
+ }
94
+
95
+ static void axpy(const double a, const feature_node *x, double *y)
96
+ {
97
+ while(x->index != -1)
98
+ {
99
+ y[x->index-1] += a*x->value;
100
+ x++;
101
+ }
102
+ }
103
+ };
104
+
105
+ // L2-regularized empirical risk minimization
106
+ // min_w w^Tw/2 + \sum C_i \xi(w^Tx_i), where \xi() is the loss
107
+
108
+ class l2r_erm_fun: public function
109
+ {
110
+ public:
111
+ l2r_erm_fun(const problem *prob, const parameter *param, double *C);
112
+ ~l2r_erm_fun();
113
+
114
+ double fun(double *w);
115
+ double linesearch_and_update(double *w, double *d, double *f, double *g, double alpha);
116
+ int get_nr_variable(void);
117
+
118
+ protected:
119
+ virtual double C_times_loss(int i, double wx_i) = 0;
120
+ void Xv(double *v, double *Xv);
121
+ void XTv(double *v, double *XTv);
122
+
123
+ double *C;
124
+ const problem *prob;
125
+ double *wx;
126
+ double *tmp; // a working array
127
+ double wTw;
128
+ int regularize_bias;
129
+ };
130
+
131
+ l2r_erm_fun::l2r_erm_fun(const problem *prob, const parameter *param, double *C)
132
+ {
133
+ int l=prob->l;
134
+
135
+ this->prob = prob;
136
+
137
+ wx = new double[l];
138
+ tmp = new double[l];
139
+ this->C = C;
140
+ this->regularize_bias = param->regularize_bias;
141
+ }
142
+
143
+ l2r_erm_fun::~l2r_erm_fun()
144
+ {
145
+ delete[] wx;
146
+ delete[] tmp;
147
+ }
148
+
149
+ double l2r_erm_fun::fun(double *w)
150
+ {
151
+ int i;
152
+ double f=0;
153
+ int l=prob->l;
154
+ int w_size=get_nr_variable();
155
+
156
+ wTw = 0;
157
+ Xv(w, wx);
158
+
159
+ for(i=0;i<w_size;i++)
160
+ wTw += w[i]*w[i];
161
+ if(regularize_bias == 0)
162
+ wTw -= w[w_size-1]*w[w_size-1];
163
+ for(i=0;i<l;i++)
164
+ f += C_times_loss(i, wx[i]);
165
+ f = f + 0.5 * wTw;
166
+
167
+ return(f);
168
+ }
169
+
170
+ int l2r_erm_fun::get_nr_variable(void)
171
+ {
172
+ return prob->n;
173
+ }
174
+
175
+ // On entry *f must be the function value of w
176
+ // On exit w is updated and *f is the new function value
177
+ double l2r_erm_fun::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
178
+ {
179
+ int i;
180
+ int l = prob->l;
181
+ double sTs = 0;
182
+ double wTs = 0;
183
+ double gTs = 0;
184
+ double eta = 0.01;
185
+ int w_size = get_nr_variable();
186
+ int max_num_linesearch = 20;
187
+ double fold = *f;
188
+ Xv(s, tmp);
189
+
190
+ for (i=0;i<w_size;i++)
191
+ {
192
+ sTs += s[i] * s[i];
193
+ wTs += s[i] * w[i];
194
+ gTs += s[i] * g[i];
195
+ }
196
+ if(regularize_bias == 0)
197
+ {
198
+ // bias not used in calculating (w + \alpha s)^T (w + \alpha s)
199
+ sTs -= s[w_size-1] * s[w_size-1];
200
+ wTs -= s[w_size-1] * w[w_size-1];
201
+ }
202
+
203
+ int num_linesearch = 0;
204
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
205
+ {
206
+ double loss = 0;
207
+ for(i=0;i<l;i++)
208
+ {
209
+ double inner_product = tmp[i] * alpha + wx[i];
210
+ loss += C_times_loss(i, inner_product);
211
+ }
212
+ *f = loss + (alpha * alpha * sTs + wTw) / 2.0 + alpha * wTs;
213
+ if (*f - fold <= eta * alpha * gTs)
214
+ {
215
+ for (i=0;i<l;i++)
216
+ wx[i] += alpha * tmp[i];
217
+ break;
218
+ }
219
+ else
220
+ alpha *= 0.5;
221
+ }
222
+
223
+ if (num_linesearch >= max_num_linesearch)
224
+ {
225
+ *f = fold;
226
+ return 0;
227
+ }
228
+ else
229
+ for (i=0;i<w_size;i++)
230
+ w[i] += alpha * s[i];
231
+
232
+ wTw += alpha * alpha * sTs + 2* alpha * wTs;
233
+ return alpha;
234
+ }
235
+
236
+ void l2r_erm_fun::Xv(double *v, double *Xv)
237
+ {
238
+ int i;
239
+ int l=prob->l;
240
+ feature_node **x=prob->x;
241
+
242
+ for(i=0;i<l;i++)
243
+ Xv[i]=sparse_operator::dot(v, x[i]);
244
+ }
245
+
246
+ void l2r_erm_fun::XTv(double *v, double *XTv)
247
+ {
248
+ int i;
249
+ int l=prob->l;
250
+ int w_size=get_nr_variable();
251
+ feature_node **x=prob->x;
252
+
253
+ for(i=0;i<w_size;i++)
254
+ XTv[i]=0;
255
+ for(i=0;i<l;i++)
256
+ sparse_operator::axpy(v[i], x[i], XTv);
257
+ }
258
+
259
+ class l2r_lr_fun: public l2r_erm_fun
260
+ {
261
+ public:
262
+ l2r_lr_fun(const problem *prob, const parameter *param, double *C);
263
+ ~l2r_lr_fun();
264
+
265
+ void grad(double *w, double *g);
266
+ void Hv(double *s, double *Hs);
267
+
268
+ void get_diag_preconditioner(double *M);
269
+
270
+ private:
271
+ double *D;
272
+ double C_times_loss(int i, double wx_i);
273
+ };
274
+
275
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, const parameter *param, double *C):
276
+ l2r_erm_fun(prob, param, C)
277
+ {
278
+ int l=prob->l;
279
+ D = new double[l];
280
+ }
281
+
282
+ l2r_lr_fun::~l2r_lr_fun()
283
+ {
284
+ delete[] D;
285
+ }
286
+
287
+ double l2r_lr_fun::C_times_loss(int i, double wx_i)
288
+ {
289
+ double ywx_i = wx_i * prob->y[i];
290
+ if (ywx_i >= 0)
291
+ return C[i]*log(1 + exp(-ywx_i));
292
+ else
293
+ return C[i]*(-ywx_i + log(1 + exp(ywx_i)));
294
+ }
295
+
296
+ void l2r_lr_fun::grad(double *w, double *g)
297
+ {
298
+ int i;
299
+ double *y=prob->y;
300
+ int l=prob->l;
301
+ int w_size=get_nr_variable();
302
+
303
+ for(i=0;i<l;i++)
304
+ {
305
+ tmp[i] = 1/(1 + exp(-y[i]*wx[i]));
306
+ D[i] = tmp[i]*(1-tmp[i]);
307
+ tmp[i] = C[i]*(tmp[i]-1)*y[i];
308
+ }
309
+ XTv(tmp, g);
310
+
311
+ for(i=0;i<w_size;i++)
312
+ g[i] = w[i] + g[i];
313
+ if(regularize_bias == 0)
314
+ g[w_size-1] -= w[w_size-1];
315
+ }
316
+
317
+ void l2r_lr_fun::get_diag_preconditioner(double *M)
318
+ {
319
+ int i;
320
+ int l = prob->l;
321
+ int w_size=get_nr_variable();
322
+ feature_node **x = prob->x;
323
+
324
+ for (i=0; i<w_size; i++)
325
+ M[i] = 1;
326
+ if(regularize_bias == 0)
327
+ M[w_size-1] = 0;
328
+
329
+ for (i=0; i<l; i++)
330
+ {
331
+ feature_node *xi = x[i];
332
+ while (xi->index!=-1)
333
+ {
334
+ M[xi->index-1] += xi->value*xi->value*C[i]*D[i];
335
+ xi++;
336
+ }
337
+ }
338
+ }
339
+
340
+ void l2r_lr_fun::Hv(double *s, double *Hs)
341
+ {
342
+ int i;
343
+ int l=prob->l;
344
+ int w_size=get_nr_variable();
345
+ feature_node **x=prob->x;
346
+
347
+ for(i=0;i<w_size;i++)
348
+ Hs[i] = 0;
349
+ for(i=0;i<l;i++)
350
+ {
351
+ feature_node * const xi=x[i];
352
+ double xTs = sparse_operator::dot(s, xi);
353
+
354
+ xTs = C[i]*D[i]*xTs;
355
+
356
+ sparse_operator::axpy(xTs, xi, Hs);
357
+ }
358
+ for(i=0;i<w_size;i++)
359
+ Hs[i] = s[i] + Hs[i];
360
+ if(regularize_bias == 0)
361
+ Hs[w_size-1] -= s[w_size-1];
362
+ }
363
+
364
+ class l2r_l2_svc_fun: public l2r_erm_fun
365
+ {
366
+ public:
367
+ l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C);
368
+ ~l2r_l2_svc_fun();
369
+
370
+ void grad(double *w, double *g);
371
+ void Hv(double *s, double *Hs);
372
+
373
+ void get_diag_preconditioner(double *M);
374
+
375
+ protected:
376
+ void subXTv(double *v, double *XTv);
377
+
378
+ int *I;
379
+ int sizeI;
380
+
381
+ private:
382
+ double C_times_loss(int i, double wx_i);
383
+ };
384
+
385
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C):
386
+ l2r_erm_fun(prob, param, C)
387
+ {
388
+ I = new int[prob->l];
389
+ }
390
+
391
+ l2r_l2_svc_fun::~l2r_l2_svc_fun()
392
+ {
393
+ delete[] I;
394
+ }
395
+
396
+ double l2r_l2_svc_fun::C_times_loss(int i, double wx_i)
397
+ {
398
+ double d = 1 - prob->y[i] * wx_i;
399
+ if (d > 0)
400
+ return C[i] * d * d;
401
+ else
402
+ return 0;
403
+ }
404
+
405
+ void l2r_l2_svc_fun::grad(double *w, double *g)
406
+ {
407
+ int i;
408
+ double *y=prob->y;
409
+ int l=prob->l;
410
+ int w_size=get_nr_variable();
411
+
412
+ sizeI = 0;
413
+ for (i=0;i<l;i++)
414
+ {
415
+ tmp[i] = wx[i] * y[i];
416
+ if (tmp[i] < 1)
417
+ {
418
+ tmp[sizeI] = C[i]*y[i]*(tmp[i]-1);
419
+ I[sizeI] = i;
420
+ sizeI++;
421
+ }
422
+ }
423
+ subXTv(tmp, g);
424
+
425
+ for(i=0;i<w_size;i++)
426
+ g[i] = w[i] + 2*g[i];
427
+ if(regularize_bias == 0)
428
+ g[w_size-1] -= w[w_size-1];
429
+ }
430
+
431
+ void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
432
+ {
433
+ int i;
434
+ int w_size=get_nr_variable();
435
+ feature_node **x = prob->x;
436
+
437
+ for (i=0; i<w_size; i++)
438
+ M[i] = 1;
439
+ if(regularize_bias == 0)
440
+ M[w_size-1] = 0;
441
+
442
+ for (i=0; i<sizeI; i++)
443
+ {
444
+ int idx = I[i];
445
+ feature_node *xi = x[idx];
446
+ while (xi->index!=-1)
447
+ {
448
+ M[xi->index-1] += xi->value*xi->value*C[idx]*2;
449
+ xi++;
450
+ }
451
+ }
452
+ }
453
+
454
+ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
455
+ {
456
+ int i;
457
+ int w_size=get_nr_variable();
458
+ feature_node **x=prob->x;
459
+
460
+ for(i=0;i<w_size;i++)
461
+ Hs[i]=0;
462
+ for(i=0;i<sizeI;i++)
463
+ {
464
+ feature_node * const xi=x[I[i]];
465
+ double xTs = sparse_operator::dot(s, xi);
466
+
467
+ xTs = C[I[i]]*xTs;
468
+
469
+ sparse_operator::axpy(xTs, xi, Hs);
470
+ }
471
+ for(i=0;i<w_size;i++)
472
+ Hs[i] = s[i] + 2*Hs[i];
473
+ if(regularize_bias == 0)
474
+ Hs[w_size-1] -= s[w_size-1];
475
+ }
476
+
477
+ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
478
+ {
479
+ int i;
480
+ int w_size=get_nr_variable();
481
+ feature_node **x=prob->x;
482
+
483
+ for(i=0;i<w_size;i++)
484
+ XTv[i]=0;
485
+ for(i=0;i<sizeI;i++)
486
+ sparse_operator::axpy(v[i], x[I[i]], XTv);
487
+ }
488
+
489
+ class l2r_l2_svr_fun: public l2r_l2_svc_fun
490
+ {
491
+ public:
492
+ l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C);
493
+
494
+ void grad(double *w, double *g);
495
+
496
+ private:
497
+ double C_times_loss(int i, double wx_i);
498
+ double p;
499
+ };
500
+
501
+ l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C):
502
+ l2r_l2_svc_fun(prob, param, C)
503
+ {
504
+ this->p = param->p;
505
+ this->regularize_bias = param->regularize_bias;
506
+ }
507
+
508
+ double l2r_l2_svr_fun::C_times_loss(int i, double wx_i)
509
+ {
510
+ double d = wx_i - prob->y[i];
511
+ if(d < -p)
512
+ return C[i]*(d+p)*(d+p);
513
+ else if(d > p)
514
+ return C[i]*(d-p)*(d-p);
515
+ return 0;
516
+ }
517
+
518
+ void l2r_l2_svr_fun::grad(double *w, double *g)
519
+ {
520
+ int i;
521
+ double *y=prob->y;
522
+ int l=prob->l;
523
+ int w_size=get_nr_variable();
524
+ double d;
525
+
526
+ sizeI = 0;
527
+ for(i=0;i<l;i++)
528
+ {
529
+ d = wx[i] - y[i];
530
+
531
+ // generate index set I
532
+ if(d < -p)
533
+ {
534
+ tmp[sizeI] = C[i]*(d+p);
535
+ I[sizeI] = i;
536
+ sizeI++;
537
+ }
538
+ else if(d > p)
539
+ {
540
+ tmp[sizeI] = C[i]*(d-p);
541
+ I[sizeI] = i;
542
+ sizeI++;
543
+ }
544
+
545
+ }
546
+ subXTv(tmp, g);
547
+
548
+ for(i=0;i<w_size;i++)
549
+ g[i] = w[i] + 2*g[i];
550
+ if(regularize_bias == 0)
551
+ g[w_size-1] -= w[w_size-1];
552
+ }
553
+
554
+ // A coordinate descent algorithm for
555
+ // multi-class support vector machines by Crammer and Singer
556
+ //
557
+ // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
558
+ // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
559
+ //
560
+ // where e^m_i = 0 if y_i = m,
561
+ // e^m_i = 1 if y_i != m,
562
+ // C^m_i = C if m = y_i,
563
+ // C^m_i = 0 if m != y_i,
564
+ // and w_m(\alpha) = \sum_i \alpha^m_i x_i
565
+ //
566
+ // Given:
567
+ // x, y, C
568
+ // eps is the stopping tolerance
569
+ //
570
+ // solution will be put in w
571
+ //
572
+ // See Appendix of LIBLINEAR paper, Fan et al. (2008)
573
+
574
+ #define GETI(i) ((int) prob->y[i])
575
+ // To support weights for instances, use GETI(i) (i)
576
+
577
+ class Solver_MCSVM_CS
578
+ {
579
+ public:
580
+ Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
581
+ ~Solver_MCSVM_CS();
582
+ void Solve(double *w);
583
+ private:
584
+ void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
585
+ bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
586
+ double *B, *C, *G;
587
+ int w_size, l;
588
+ int nr_class;
589
+ int max_iter;
590
+ double eps;
591
+ const problem *prob;
592
+ };
593
+
594
+ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
595
+ {
596
+ this->w_size = prob->n;
597
+ this->l = prob->l;
598
+ this->nr_class = nr_class;
599
+ this->eps = eps;
600
+ this->max_iter = max_iter;
601
+ this->prob = prob;
602
+ this->B = new double[nr_class];
603
+ this->G = new double[nr_class];
604
+ this->C = weighted_C;
605
+ }
606
+
607
+ Solver_MCSVM_CS::~Solver_MCSVM_CS()
608
+ {
609
+ delete[] B;
610
+ delete[] G;
611
+ }
612
+
613
+ int compare_double(const void *a, const void *b)
614
+ {
615
+ if(*(double *)a > *(double *)b)
616
+ return -1;
617
+ if(*(double *)a < *(double *)b)
618
+ return 1;
619
+ return 0;
620
+ }
621
+
622
+ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
623
+ {
624
+ int r;
625
+ double *D;
626
+
627
+ clone(D, B, active_i);
628
+ if(yi < active_i)
629
+ D[yi] += A_i*C_yi;
630
+ qsort(D, active_i, sizeof(double), compare_double);
631
+
632
+ double beta = D[0] - A_i*C_yi;
633
+ for(r=1;r<active_i && beta<r*D[r];r++)
634
+ beta += D[r];
635
+ beta /= r;
636
+
637
+ for(r=0;r<active_i;r++)
638
+ {
639
+ if(r == yi)
640
+ alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
641
+ else
642
+ alpha_new[r] = min((double)0, (beta - B[r])/A_i);
643
+ }
644
+ delete[] D;
645
+ }
646
+
647
+ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
648
+ {
649
+ double bound = 0;
650
+ if(m == yi)
651
+ bound = C[GETI(i)];
652
+ if(alpha_i == bound && G[m] < minG)
653
+ return true;
654
+ return false;
655
+ }
656
+
657
+ void Solver_MCSVM_CS::Solve(double *w)
658
+ {
659
+ int i, m, s;
660
+ int iter = 0;
661
+ double *alpha = new double[l*nr_class];
662
+ double *alpha_new = new double[nr_class];
663
+ int *index = new int[l];
664
+ double *QD = new double[l];
665
+ int *d_ind = new int[nr_class];
666
+ double *d_val = new double[nr_class];
667
+ int *alpha_index = new int[nr_class*l];
668
+ int *y_index = new int[l];
669
+ int active_size = l;
670
+ int *active_size_i = new int[l];
671
+ double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
672
+ bool start_from_all = true;
673
+
674
+ // Initial alpha can be set here. Note that
675
+ // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
676
+ // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
677
+ // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
678
+ // If initial alpha isn't zero, uncomment the for loop below to initialize w
679
+ for(i=0;i<l*nr_class;i++)
680
+ alpha[i] = 0;
681
+
682
+ for(i=0;i<w_size*nr_class;i++)
683
+ w[i] = 0;
684
+ for(i=0;i<l;i++)
685
+ {
686
+ for(m=0;m<nr_class;m++)
687
+ alpha_index[i*nr_class+m] = m;
688
+ feature_node *xi = prob->x[i];
689
+ QD[i] = 0;
690
+ while(xi->index != -1)
691
+ {
692
+ double val = xi->value;
693
+ QD[i] += val*val;
694
+
695
+ // Uncomment the for loop if initial alpha isn't zero
696
+ // for(m=0; m<nr_class; m++)
697
+ // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
698
+ xi++;
699
+ }
700
+ active_size_i[i] = nr_class;
701
+ y_index[i] = (int)prob->y[i];
702
+ index[i] = i;
703
+ }
704
+
705
+ while(iter < max_iter)
706
+ {
707
+ double stopping = -INF;
708
+ for(i=0;i<active_size;i++)
709
+ {
710
+ int j = i+rand()%(active_size-i);
711
+ swap(index[i], index[j]);
712
+ }
713
+ for(s=0;s<active_size;s++)
714
+ {
715
+ i = index[s];
716
+ double Ai = QD[i];
717
+ double *alpha_i = &alpha[i*nr_class];
718
+ int *alpha_index_i = &alpha_index[i*nr_class];
719
+
720
+ if(Ai > 0)
721
+ {
722
+ for(m=0;m<active_size_i[i];m++)
723
+ G[m] = 1;
724
+ if(y_index[i] < active_size_i[i])
725
+ G[y_index[i]] = 0;
726
+
727
+ feature_node *xi = prob->x[i];
728
+ while(xi->index!= -1)
729
+ {
730
+ double *w_i = &w[(xi->index-1)*nr_class];
731
+ for(m=0;m<active_size_i[i];m++)
732
+ G[m] += w_i[alpha_index_i[m]]*(xi->value);
733
+ xi++;
734
+ }
735
+
736
+ double minG = INF;
737
+ double maxG = -INF;
738
+ for(m=0;m<active_size_i[i];m++)
739
+ {
740
+ if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
741
+ minG = G[m];
742
+ if(G[m] > maxG)
743
+ maxG = G[m];
744
+ }
745
+ if(y_index[i] < active_size_i[i])
746
+ if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
747
+ minG = G[y_index[i]];
748
+
749
+ for(m=0;m<active_size_i[i];m++)
750
+ {
751
+ if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
752
+ {
753
+ active_size_i[i]--;
754
+ while(active_size_i[i]>m)
755
+ {
756
+ if(!be_shrunk(i, active_size_i[i], y_index[i],
757
+ alpha_i[alpha_index_i[active_size_i[i]]], minG))
758
+ {
759
+ swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
760
+ swap(G[m], G[active_size_i[i]]);
761
+ if(y_index[i] == active_size_i[i])
762
+ y_index[i] = m;
763
+ else if(y_index[i] == m)
764
+ y_index[i] = active_size_i[i];
765
+ break;
766
+ }
767
+ active_size_i[i]--;
768
+ }
769
+ }
770
+ }
771
+
772
+ if(active_size_i[i] <= 1)
773
+ {
774
+ active_size--;
775
+ swap(index[s], index[active_size]);
776
+ s--;
777
+ continue;
778
+ }
779
+
780
+ if(maxG-minG <= 1e-12)
781
+ continue;
782
+ else
783
+ stopping = max(maxG - minG, stopping);
784
+
785
+ for(m=0;m<active_size_i[i];m++)
786
+ B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
787
+
788
+ solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
789
+ int nz_d = 0;
790
+ for(m=0;m<active_size_i[i];m++)
791
+ {
792
+ double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
793
+ alpha_i[alpha_index_i[m]] = alpha_new[m];
794
+ if(fabs(d) >= 1e-12)
795
+ {
796
+ d_ind[nz_d] = alpha_index_i[m];
797
+ d_val[nz_d] = d;
798
+ nz_d++;
799
+ }
800
+ }
801
+
802
+ xi = prob->x[i];
803
+ while(xi->index != -1)
804
+ {
805
+ double *w_i = &w[(xi->index-1)*nr_class];
806
+ for(m=0;m<nz_d;m++)
807
+ w_i[d_ind[m]] += d_val[m]*xi->value;
808
+ xi++;
809
+ }
810
+ }
811
+ }
812
+
813
+ iter++;
814
+ if(iter % 10 == 0)
815
+ {
816
+ info(".");
817
+ }
818
+
819
+ if(stopping < eps_shrink)
820
+ {
821
+ if(stopping < eps && start_from_all == true)
822
+ break;
823
+ else
824
+ {
825
+ active_size = l;
826
+ for(i=0;i<l;i++)
827
+ active_size_i[i] = nr_class;
828
+ info("*");
829
+ eps_shrink = max(eps_shrink/2, eps);
830
+ start_from_all = true;
831
+ }
832
+ }
833
+ else
834
+ start_from_all = false;
835
+ }
836
+
837
+ info("\noptimization finished, #iter = %d\n",iter);
838
+ if (iter >= max_iter)
839
+ info("\nWARNING: reaching max number of iterations\n");
840
+
841
+ // calculate objective value
842
+ double v = 0;
843
+ int nSV = 0;
844
+ for(i=0;i<w_size*nr_class;i++)
845
+ v += w[i]*w[i];
846
+ v = 0.5*v;
847
+ for(i=0;i<l*nr_class;i++)
848
+ {
849
+ v += alpha[i];
850
+ if(fabs(alpha[i]) > 0)
851
+ nSV++;
852
+ }
853
+ for(i=0;i<l;i++)
854
+ v -= alpha[i*nr_class+(int)prob->y[i]];
855
+ info("Objective value = %lf\n",v);
856
+ info("nSV = %d\n",nSV);
857
+
858
+ delete [] alpha;
859
+ delete [] alpha_new;
860
+ delete [] index;
861
+ delete [] QD;
862
+ delete [] d_ind;
863
+ delete [] d_val;
864
+ delete [] alpha_index;
865
+ delete [] y_index;
866
+ delete [] active_size_i;
867
+ }
868
+
869
+ // A coordinate descent algorithm for
870
+ // L1-loss and L2-loss SVM dual problems
871
+ //
872
+ // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
873
+ // s.t. 0 <= \alpha_i <= upper_bound_i,
874
+ //
875
+ // where Qij = yi yj xi^T xj and
876
+ // D is a diagonal matrix
877
+ //
878
+ // In L1-SVM case:
879
+ // upper_bound_i = Cp if y_i = 1
880
+ // upper_bound_i = Cn if y_i = -1
881
+ // D_ii = 0
882
+ // In L2-SVM case:
883
+ // upper_bound_i = INF
884
+ // D_ii = 1/(2*Cp) if y_i = 1
885
+ // D_ii = 1/(2*Cn) if y_i = -1
886
+ //
887
+ // Given:
888
+ // x, y, Cp, Cn
889
+ // eps is the stopping tolerance
890
+ //
891
+ // solution will be put in w
892
+ //
893
+ // See Algorithm 3 of Hsieh et al., ICML 2008
894
+
895
+ #undef GETI
896
+ #define GETI(i) (y[i]+1)
897
+ // To support weights for instances, use GETI(i) (i)
898
+
899
+ static void solve_l2r_l1l2_svc(
900
+ const problem *prob, double *w, double eps,
901
+ double Cp, double Cn, int solver_type)
902
+ {
903
+ int l = prob->l;
904
+ int w_size = prob->n;
905
+ int i, s, iter = 0;
906
+ double C, d, G;
907
+ double *QD = new double[l];
908
+ int max_iter = 1000;
909
+ int *index = new int[l];
910
+ double *alpha = new double[l];
911
+ schar *y = new schar[l];
912
+ int active_size = l;
913
+
914
+ // PG: projected gradient, for shrinking and stopping
915
+ double PG;
916
+ double PGmax_old = INF;
917
+ double PGmin_old = -INF;
918
+ double PGmax_new, PGmin_new;
919
+
920
+ // default solver_type: L2R_L2LOSS_SVC_DUAL
921
+ double diag[3] = {0.5/Cn, 0, 0.5/Cp};
922
+ double upper_bound[3] = {INF, 0, INF};
923
+ if(solver_type == L2R_L1LOSS_SVC_DUAL)
924
+ {
925
+ diag[0] = 0;
926
+ diag[2] = 0;
927
+ upper_bound[0] = Cn;
928
+ upper_bound[2] = Cp;
929
+ }
930
+
931
+ for(i=0; i<l; i++)
932
+ {
933
+ if(prob->y[i] > 0)
934
+ {
935
+ y[i] = +1;
936
+ }
937
+ else
938
+ {
939
+ y[i] = -1;
940
+ }
941
+ }
942
+
943
+ // Initial alpha can be set here. Note that
944
+ // 0 <= alpha[i] <= upper_bound[GETI(i)]
945
+ for(i=0; i<l; i++)
946
+ alpha[i] = 0;
947
+
948
+ for(i=0; i<w_size; i++)
949
+ w[i] = 0;
950
+ for(i=0; i<l; i++)
951
+ {
952
+ QD[i] = diag[GETI(i)];
953
+
954
+ feature_node * const xi = prob->x[i];
955
+ QD[i] += sparse_operator::nrm2_sq(xi);
956
+ sparse_operator::axpy(y[i]*alpha[i], xi, w);
957
+
958
+ index[i] = i;
959
+ }
960
+
961
+ while (iter < max_iter)
962
+ {
963
+ PGmax_new = -INF;
964
+ PGmin_new = INF;
965
+
966
+ for (i=0; i<active_size; i++)
967
+ {
968
+ int j = i+rand()%(active_size-i);
969
+ swap(index[i], index[j]);
970
+ }
971
+
972
+ for (s=0; s<active_size; s++)
973
+ {
974
+ i = index[s];
975
+ const schar yi = y[i];
976
+ feature_node * const xi = prob->x[i];
977
+
978
+ G = yi*sparse_operator::dot(w, xi)-1;
979
+
980
+ C = upper_bound[GETI(i)];
981
+ G += alpha[i]*diag[GETI(i)];
982
+
983
+ PG = 0;
984
+ if (alpha[i] == 0)
985
+ {
986
+ if (G > PGmax_old)
987
+ {
988
+ active_size--;
989
+ swap(index[s], index[active_size]);
990
+ s--;
991
+ continue;
992
+ }
993
+ else if (G < 0)
994
+ PG = G;
995
+ }
996
+ else if (alpha[i] == C)
997
+ {
998
+ if (G < PGmin_old)
999
+ {
1000
+ active_size--;
1001
+ swap(index[s], index[active_size]);
1002
+ s--;
1003
+ continue;
1004
+ }
1005
+ else if (G > 0)
1006
+ PG = G;
1007
+ }
1008
+ else
1009
+ PG = G;
1010
+
1011
+ PGmax_new = max(PGmax_new, PG);
1012
+ PGmin_new = min(PGmin_new, PG);
1013
+
1014
+ if(fabs(PG) > 1.0e-12)
1015
+ {
1016
+ double alpha_old = alpha[i];
1017
+ alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
1018
+ d = (alpha[i] - alpha_old)*yi;
1019
+ sparse_operator::axpy(d, xi, w);
1020
+ }
1021
+ }
1022
+
1023
+ iter++;
1024
+ if(iter % 10 == 0)
1025
+ info(".");
1026
+
1027
+ if(PGmax_new - PGmin_new <= eps)
1028
+ {
1029
+ if(active_size == l)
1030
+ break;
1031
+ else
1032
+ {
1033
+ active_size = l;
1034
+ info("*");
1035
+ PGmax_old = INF;
1036
+ PGmin_old = -INF;
1037
+ continue;
1038
+ }
1039
+ }
1040
+ PGmax_old = PGmax_new;
1041
+ PGmin_old = PGmin_new;
1042
+ if (PGmax_old <= 0)
1043
+ PGmax_old = INF;
1044
+ if (PGmin_old >= 0)
1045
+ PGmin_old = -INF;
1046
+ }
1047
+
1048
+ info("\noptimization finished, #iter = %d\n",iter);
1049
+ if (iter >= max_iter)
1050
+ info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
1051
+
1052
+ // calculate objective value
1053
+
1054
+ double v = 0;
1055
+ int nSV = 0;
1056
+ for(i=0; i<w_size; i++)
1057
+ v += w[i]*w[i];
1058
+ for(i=0; i<l; i++)
1059
+ {
1060
+ v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
1061
+ if(alpha[i] > 0)
1062
+ ++nSV;
1063
+ }
1064
+ info("Objective value = %lf\n",v/2);
1065
+ info("nSV = %d\n",nSV);
1066
+
1067
+ delete [] QD;
1068
+ delete [] alpha;
1069
+ delete [] y;
1070
+ delete [] index;
1071
+ }
1072
+
1073
+
1074
+ // A coordinate descent algorithm for
1075
+ // L1-loss and L2-loss epsilon-SVR dual problem
1076
+ //
1077
+ // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
1078
+ // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
1079
+ //
1080
+ // where Qij = xi^T xj and
1081
+ // D is a diagonal matrix
1082
+ //
1083
+ // In L1-SVM case:
1084
+ // upper_bound_i = C
1085
+ // lambda_i = 0
1086
+ // In L2-SVM case:
1087
+ // upper_bound_i = INF
1088
+ // lambda_i = 1/(2*C)
1089
+ //
1090
+ // Given:
1091
+ // x, y, p, C
1092
+ // eps is the stopping tolerance
1093
+ //
1094
+ // solution will be put in w
1095
+ //
1096
+ // See Algorithm 4 of Ho and Lin, 2012
1097
+
1098
+ #undef GETI
1099
+ #define GETI(i) (0)
1100
+ // To support weights for instances, use GETI(i) (i)
1101
+
1102
+ static void solve_l2r_l1l2_svr(
1103
+ const problem *prob, double *w, const parameter *param,
1104
+ int solver_type)
1105
+ {
1106
+ int l = prob->l;
1107
+ double C = param->C;
1108
+ double p = param->p;
1109
+ int w_size = prob->n;
1110
+ double eps = param->eps;
1111
+ int i, s, iter = 0;
1112
+ int max_iter = 1000;
1113
+ int active_size = l;
1114
+ int *index = new int[l];
1115
+
1116
+ double d, G, H;
1117
+ double Gmax_old = INF;
1118
+ double Gmax_new, Gnorm1_new;
1119
+ double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1120
+ double *beta = new double[l];
1121
+ double *QD = new double[l];
1122
+ double *y = prob->y;
1123
+
1124
+ // L2R_L2LOSS_SVR_DUAL
1125
+ double lambda[1], upper_bound[1];
1126
+ lambda[0] = 0.5/C;
1127
+ upper_bound[0] = INF;
1128
+
1129
+ if(solver_type == L2R_L1LOSS_SVR_DUAL)
1130
+ {
1131
+ lambda[0] = 0;
1132
+ upper_bound[0] = C;
1133
+ }
1134
+
1135
+ // Initial beta can be set here. Note that
1136
+ // -upper_bound <= beta[i] <= upper_bound
1137
+ for(i=0; i<l; i++)
1138
+ beta[i] = 0;
1139
+
1140
+ for(i=0; i<w_size; i++)
1141
+ w[i] = 0;
1142
+ for(i=0; i<l; i++)
1143
+ {
1144
+ feature_node * const xi = prob->x[i];
1145
+ QD[i] = sparse_operator::nrm2_sq(xi);
1146
+ sparse_operator::axpy(beta[i], xi, w);
1147
+
1148
+ index[i] = i;
1149
+ }
1150
+
1151
+
1152
+ while(iter < max_iter)
1153
+ {
1154
+ Gmax_new = 0;
1155
+ Gnorm1_new = 0;
1156
+
1157
+ for(i=0; i<active_size; i++)
1158
+ {
1159
+ int j = i+rand()%(active_size-i);
1160
+ swap(index[i], index[j]);
1161
+ }
1162
+
1163
+ for(s=0; s<active_size; s++)
1164
+ {
1165
+ i = index[s];
1166
+ G = -y[i] + lambda[GETI(i)]*beta[i];
1167
+ H = QD[i] + lambda[GETI(i)];
1168
+
1169
+ feature_node * const xi = prob->x[i];
1170
+ G += sparse_operator::dot(w, xi);
1171
+
1172
+ double Gp = G+p;
1173
+ double Gn = G-p;
1174
+ double violation = 0;
1175
+ if(beta[i] == 0)
1176
+ {
1177
+ if(Gp < 0)
1178
+ violation = -Gp;
1179
+ else if(Gn > 0)
1180
+ violation = Gn;
1181
+ else if(Gp>Gmax_old && Gn<-Gmax_old)
1182
+ {
1183
+ active_size--;
1184
+ swap(index[s], index[active_size]);
1185
+ s--;
1186
+ continue;
1187
+ }
1188
+ }
1189
+ else if(beta[i] >= upper_bound[GETI(i)])
1190
+ {
1191
+ if(Gp > 0)
1192
+ violation = Gp;
1193
+ else if(Gp < -Gmax_old)
1194
+ {
1195
+ active_size--;
1196
+ swap(index[s], index[active_size]);
1197
+ s--;
1198
+ continue;
1199
+ }
1200
+ }
1201
+ else if(beta[i] <= -upper_bound[GETI(i)])
1202
+ {
1203
+ if(Gn < 0)
1204
+ violation = -Gn;
1205
+ else if(Gn > Gmax_old)
1206
+ {
1207
+ active_size--;
1208
+ swap(index[s], index[active_size]);
1209
+ s--;
1210
+ continue;
1211
+ }
1212
+ }
1213
+ else if(beta[i] > 0)
1214
+ violation = fabs(Gp);
1215
+ else
1216
+ violation = fabs(Gn);
1217
+
1218
+ Gmax_new = max(Gmax_new, violation);
1219
+ Gnorm1_new += violation;
1220
+
1221
+ // obtain Newton direction d
1222
+ if(Gp < H*beta[i])
1223
+ d = -Gp/H;
1224
+ else if(Gn > H*beta[i])
1225
+ d = -Gn/H;
1226
+ else
1227
+ d = -beta[i];
1228
+
1229
+ if(fabs(d) < 1.0e-12)
1230
+ continue;
1231
+
1232
+ double beta_old = beta[i];
1233
+ beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1234
+ d = beta[i]-beta_old;
1235
+
1236
+ if(d != 0)
1237
+ sparse_operator::axpy(d, xi, w);
1238
+ }
1239
+
1240
+ if(iter == 0)
1241
+ Gnorm1_init = Gnorm1_new;
1242
+ iter++;
1243
+ if(iter % 10 == 0)
1244
+ info(".");
1245
+
1246
+ if(Gnorm1_new <= eps*Gnorm1_init)
1247
+ {
1248
+ if(active_size == l)
1249
+ break;
1250
+ else
1251
+ {
1252
+ active_size = l;
1253
+ info("*");
1254
+ Gmax_old = INF;
1255
+ continue;
1256
+ }
1257
+ }
1258
+
1259
+ Gmax_old = Gmax_new;
1260
+ }
1261
+
1262
+ info("\noptimization finished, #iter = %d\n", iter);
1263
+ if(iter >= max_iter)
1264
+ info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1265
+
1266
+ // calculate objective value
1267
+ double v = 0;
1268
+ int nSV = 0;
1269
+ for(i=0; i<w_size; i++)
1270
+ v += w[i]*w[i];
1271
+ v = 0.5*v;
1272
+ for(i=0; i<l; i++)
1273
+ {
1274
+ v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1275
+ if(beta[i] != 0)
1276
+ nSV++;
1277
+ }
1278
+
1279
+ info("Objective value = %lf\n", v);
1280
+ info("nSV = %d\n",nSV);
1281
+
1282
+ delete [] beta;
1283
+ delete [] QD;
1284
+ delete [] index;
1285
+ }
1286
+
1287
+
1288
+ // A coordinate descent algorithm for
1289
+ // the dual of L2-regularized logistic regression problems
1290
+ //
1291
+ // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1292
+ // s.t. 0 <= \alpha_i <= upper_bound_i,
1293
+ //
1294
+ // where Qij = yi yj xi^T xj and
1295
+ // upper_bound_i = Cp if y_i = 1
1296
+ // upper_bound_i = Cn if y_i = -1
1297
+ //
1298
+ // Given:
1299
+ // x, y, Cp, Cn
1300
+ // eps is the stopping tolerance
1301
+ //
1302
+ // solution will be put in w
1303
+ //
1304
+ // See Algorithm 5 of Yu et al., MLJ 2010
1305
+
1306
+ #undef GETI
1307
+ #define GETI(i) (y[i]+1)
1308
+ // To support weights for instances, use GETI(i) (i)
1309
+
1310
+ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1311
+ {
1312
+ int l = prob->l;
1313
+ int w_size = prob->n;
1314
+ int i, s, iter = 0;
1315
+ double *xTx = new double[l];
1316
+ int max_iter = 1000;
1317
+ int *index = new int[l];
1318
+ double *alpha = new double[2*l]; // store alpha and C - alpha
1319
+ schar *y = new schar[l];
1320
+ int max_inner_iter = 100; // for inner Newton
1321
+ double innereps = 1e-2;
1322
+ double innereps_min = min(1e-8, eps);
1323
+ double upper_bound[3] = {Cn, 0, Cp};
1324
+
1325
+ for(i=0; i<l; i++)
1326
+ {
1327
+ if(prob->y[i] > 0)
1328
+ {
1329
+ y[i] = +1;
1330
+ }
1331
+ else
1332
+ {
1333
+ y[i] = -1;
1334
+ }
1335
+ }
1336
+
1337
+ // Initial alpha can be set here. Note that
1338
+ // 0 < alpha[i] < upper_bound[GETI(i)]
1339
+ // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1340
+ for(i=0; i<l; i++)
1341
+ {
1342
+ alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1343
+ alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1344
+ }
1345
+
1346
+ for(i=0; i<w_size; i++)
1347
+ w[i] = 0;
1348
+ for(i=0; i<l; i++)
1349
+ {
1350
+ feature_node * const xi = prob->x[i];
1351
+ xTx[i] = sparse_operator::nrm2_sq(xi);
1352
+ sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1353
+ index[i] = i;
1354
+ }
1355
+
1356
+ while (iter < max_iter)
1357
+ {
1358
+ for (i=0; i<l; i++)
1359
+ {
1360
+ int j = i+rand()%(l-i);
1361
+ swap(index[i], index[j]);
1362
+ }
1363
+ int newton_iter = 0;
1364
+ double Gmax = 0;
1365
+ for (s=0; s<l; s++)
1366
+ {
1367
+ i = index[s];
1368
+ const schar yi = y[i];
1369
+ double C = upper_bound[GETI(i)];
1370
+ double ywTx = 0, xisq = xTx[i];
1371
+ feature_node * const xi = prob->x[i];
1372
+ ywTx = yi*sparse_operator::dot(w, xi);
1373
+ double a = xisq, b = ywTx;
1374
+
1375
+ // Decide to minimize g_1(z) or g_2(z)
1376
+ int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1377
+ if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1378
+ {
1379
+ ind1 = 2*i+1;
1380
+ ind2 = 2*i;
1381
+ sign = -1;
1382
+ }
1383
+
1384
+ // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1385
+ double alpha_old = alpha[ind1];
1386
+ double z = alpha_old;
1387
+ if(C - z < 0.5 * C)
1388
+ z = 0.1*z;
1389
+ double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1390
+ Gmax = max(Gmax, fabs(gp));
1391
+
1392
+ // Newton method on the sub-problem
1393
+ const double eta = 0.1; // xi in the paper
1394
+ int inner_iter = 0;
1395
+ while (inner_iter <= max_inner_iter)
1396
+ {
1397
+ if(fabs(gp) < innereps)
1398
+ break;
1399
+ double gpp = a + C/(C-z)/z;
1400
+ double tmpz = z - gp/gpp;
1401
+ if(tmpz <= 0)
1402
+ z *= eta;
1403
+ else // tmpz in (0, C)
1404
+ z = tmpz;
1405
+ gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1406
+ newton_iter++;
1407
+ inner_iter++;
1408
+ }
1409
+
1410
+ if(inner_iter > 0) // update w
1411
+ {
1412
+ alpha[ind1] = z;
1413
+ alpha[ind2] = C-z;
1414
+ sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1415
+ }
1416
+ }
1417
+
1418
+ iter++;
1419
+ if(iter % 10 == 0)
1420
+ info(".");
1421
+
1422
+ if(Gmax < eps)
1423
+ break;
1424
+
1425
+ if(newton_iter <= l/10)
1426
+ innereps = max(innereps_min, 0.1*innereps);
1427
+
1428
+ }
1429
+
1430
+ info("\noptimization finished, #iter = %d\n",iter);
1431
+ if (iter >= max_iter)
1432
+ info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1433
+
1434
+ // calculate objective value
1435
+
1436
+ double v = 0;
1437
+ for(i=0; i<w_size; i++)
1438
+ v += w[i] * w[i];
1439
+ v *= 0.5;
1440
+ for(i=0; i<l; i++)
1441
+ v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1442
+ - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1443
+ info("Objective value = %lf\n", v);
1444
+
1445
+ delete [] xTx;
1446
+ delete [] alpha;
1447
+ delete [] y;
1448
+ delete [] index;
1449
+ }
1450
+
1451
+ // A coordinate descent algorithm for
1452
+ // L1-regularized L2-loss support vector classification
1453
+ //
1454
+ // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1455
+ //
1456
+ // Given:
1457
+ // x, y, Cp, Cn
1458
+ // eps is the stopping tolerance
1459
+ //
1460
+ // solution will be put in w
1461
+ //
1462
+ // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1463
+ //
1464
+ // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1465
+ // must have been added to the original data. (see -B and -R option)
1466
+
1467
+ #undef GETI
1468
+ #define GETI(i) (y[i]+1)
1469
+ // To support weights for instances, use GETI(i) (i)
1470
+
1471
+ static void solve_l1r_l2_svc(
1472
+ problem *prob_col, double *w, double eps,
1473
+ double Cp, double Cn, int regularize_bias)
1474
+ {
1475
+ int l = prob_col->l;
1476
+ int w_size = prob_col->n;
1477
+ int j, s, iter = 0;
1478
+ int max_iter = 1000;
1479
+ int active_size = w_size;
1480
+ int max_num_linesearch = 20;
1481
+
1482
+ double sigma = 0.01;
1483
+ double d, G_loss, G, H;
1484
+ double Gmax_old = INF;
1485
+ double Gmax_new, Gnorm1_new;
1486
+ double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1487
+ double d_old, d_diff;
1488
+ double loss_old = 0, loss_new;
1489
+ double appxcond, cond;
1490
+
1491
+ int *index = new int[w_size];
1492
+ schar *y = new schar[l];
1493
+ double *b = new double[l]; // b = 1-ywTx
1494
+ double *xj_sq = new double[w_size];
1495
+ feature_node *x;
1496
+
1497
+ double C[3] = {Cn,0,Cp};
1498
+
1499
+ // Initial w can be set here.
1500
+ for(j=0; j<w_size; j++)
1501
+ w[j] = 0;
1502
+
1503
+ for(j=0; j<l; j++)
1504
+ {
1505
+ b[j] = 1;
1506
+ if(prob_col->y[j] > 0)
1507
+ y[j] = 1;
1508
+ else
1509
+ y[j] = -1;
1510
+ }
1511
+ for(j=0; j<w_size; j++)
1512
+ {
1513
+ index[j] = j;
1514
+ xj_sq[j] = 0;
1515
+ x = prob_col->x[j];
1516
+ while(x->index != -1)
1517
+ {
1518
+ int ind = x->index-1;
1519
+ x->value *= y[ind]; // x->value stores yi*xij
1520
+ double val = x->value;
1521
+ b[ind] -= w[j]*val;
1522
+ xj_sq[j] += C[GETI(ind)]*val*val;
1523
+ x++;
1524
+ }
1525
+ }
1526
+
1527
+ while(iter < max_iter)
1528
+ {
1529
+ Gmax_new = 0;
1530
+ Gnorm1_new = 0;
1531
+
1532
+ for(j=0; j<active_size; j++)
1533
+ {
1534
+ int i = j+rand()%(active_size-j);
1535
+ swap(index[i], index[j]);
1536
+ }
1537
+
1538
+ for(s=0; s<active_size; s++)
1539
+ {
1540
+ j = index[s];
1541
+ G_loss = 0;
1542
+ H = 0;
1543
+
1544
+ x = prob_col->x[j];
1545
+ while(x->index != -1)
1546
+ {
1547
+ int ind = x->index-1;
1548
+ if(b[ind] > 0)
1549
+ {
1550
+ double val = x->value;
1551
+ double tmp = C[GETI(ind)]*val;
1552
+ G_loss -= tmp*b[ind];
1553
+ H += tmp*val;
1554
+ }
1555
+ x++;
1556
+ }
1557
+ G_loss *= 2;
1558
+
1559
+ G = G_loss;
1560
+ H *= 2;
1561
+ H = max(H, 1e-12);
1562
+
1563
+ double violation = 0;
1564
+ double Gp = 0, Gn = 0;
1565
+ if(j == w_size-1 && regularize_bias == 0)
1566
+ violation = fabs(G);
1567
+ else
1568
+ {
1569
+ Gp = G+1;
1570
+ Gn = G-1;
1571
+ if(w[j] == 0)
1572
+ {
1573
+ if(Gp < 0)
1574
+ violation = -Gp;
1575
+ else if(Gn > 0)
1576
+ violation = Gn;
1577
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1578
+ {
1579
+ active_size--;
1580
+ swap(index[s], index[active_size]);
1581
+ s--;
1582
+ continue;
1583
+ }
1584
+ }
1585
+ else if(w[j] > 0)
1586
+ violation = fabs(Gp);
1587
+ else
1588
+ violation = fabs(Gn);
1589
+ }
1590
+ Gmax_new = max(Gmax_new, violation);
1591
+ Gnorm1_new += violation;
1592
+
1593
+ // obtain Newton direction d
1594
+ if(j == w_size-1 && regularize_bias == 0)
1595
+ d = -G/H;
1596
+ else
1597
+ {
1598
+ if(Gp < H*w[j])
1599
+ d = -Gp/H;
1600
+ else if(Gn > H*w[j])
1601
+ d = -Gn/H;
1602
+ else
1603
+ d = -w[j];
1604
+ }
1605
+
1606
+ if(fabs(d) < 1.0e-12)
1607
+ continue;
1608
+
1609
+ double delta;
1610
+ if(j == w_size-1 && regularize_bias == 0)
1611
+ delta = G*d;
1612
+ else
1613
+ delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1614
+ d_old = 0;
1615
+ int num_linesearch;
1616
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1617
+ {
1618
+ d_diff = d_old - d;
1619
+ if(j == w_size-1 && regularize_bias == 0)
1620
+ cond = -sigma*delta;
1621
+ else
1622
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1623
+
1624
+ appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1625
+ if(appxcond <= 0)
1626
+ {
1627
+ x = prob_col->x[j];
1628
+ sparse_operator::axpy(d_diff, x, b);
1629
+ break;
1630
+ }
1631
+
1632
+ if(num_linesearch == 0)
1633
+ {
1634
+ loss_old = 0;
1635
+ loss_new = 0;
1636
+ x = prob_col->x[j];
1637
+ while(x->index != -1)
1638
+ {
1639
+ int ind = x->index-1;
1640
+ if(b[ind] > 0)
1641
+ loss_old += C[GETI(ind)]*b[ind]*b[ind];
1642
+ double b_new = b[ind] + d_diff*x->value;
1643
+ b[ind] = b_new;
1644
+ if(b_new > 0)
1645
+ loss_new += C[GETI(ind)]*b_new*b_new;
1646
+ x++;
1647
+ }
1648
+ }
1649
+ else
1650
+ {
1651
+ loss_new = 0;
1652
+ x = prob_col->x[j];
1653
+ while(x->index != -1)
1654
+ {
1655
+ int ind = x->index-1;
1656
+ double b_new = b[ind] + d_diff*x->value;
1657
+ b[ind] = b_new;
1658
+ if(b_new > 0)
1659
+ loss_new += C[GETI(ind)]*b_new*b_new;
1660
+ x++;
1661
+ }
1662
+ }
1663
+
1664
+ cond = cond + loss_new - loss_old;
1665
+ if(cond <= 0)
1666
+ break;
1667
+ else
1668
+ {
1669
+ d_old = d;
1670
+ d *= 0.5;
1671
+ delta *= 0.5;
1672
+ }
1673
+ }
1674
+
1675
+ w[j] += d;
1676
+
1677
+ // recompute b[] if line search takes too many steps
1678
+ if(num_linesearch >= max_num_linesearch)
1679
+ {
1680
+ info("#");
1681
+ for(int i=0; i<l; i++)
1682
+ b[i] = 1;
1683
+
1684
+ for(int i=0; i<w_size; i++)
1685
+ {
1686
+ if(w[i]==0) continue;
1687
+ x = prob_col->x[i];
1688
+ sparse_operator::axpy(-w[i], x, b);
1689
+ }
1690
+ }
1691
+ }
1692
+
1693
+ if(iter == 0)
1694
+ Gnorm1_init = Gnorm1_new;
1695
+ iter++;
1696
+ if(iter % 10 == 0)
1697
+ info(".");
1698
+
1699
+ if(Gnorm1_new <= eps*Gnorm1_init)
1700
+ {
1701
+ if(active_size == w_size)
1702
+ break;
1703
+ else
1704
+ {
1705
+ active_size = w_size;
1706
+ info("*");
1707
+ Gmax_old = INF;
1708
+ continue;
1709
+ }
1710
+ }
1711
+
1712
+ Gmax_old = Gmax_new;
1713
+ }
1714
+
1715
+ info("\noptimization finished, #iter = %d\n", iter);
1716
+ if(iter >= max_iter)
1717
+ info("\nWARNING: reaching max number of iterations\n");
1718
+
1719
+ // calculate objective value
1720
+
1721
+ double v = 0;
1722
+ int nnz = 0;
1723
+ for(j=0; j<w_size; j++)
1724
+ {
1725
+ x = prob_col->x[j];
1726
+ while(x->index != -1)
1727
+ {
1728
+ x->value *= prob_col->y[x->index-1]; // restore x->value
1729
+ x++;
1730
+ }
1731
+ if(w[j] != 0)
1732
+ {
1733
+ v += fabs(w[j]);
1734
+ nnz++;
1735
+ }
1736
+ }
1737
+ if (regularize_bias == 0)
1738
+ v -= fabs(w[w_size-1]);
1739
+ for(j=0; j<l; j++)
1740
+ if(b[j] > 0)
1741
+ v += C[GETI(j)]*b[j]*b[j];
1742
+
1743
+ info("Objective value = %lf\n", v);
1744
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1745
+
1746
+ delete [] index;
1747
+ delete [] y;
1748
+ delete [] b;
1749
+ delete [] xj_sq;
1750
+ }
1751
+
1752
+ // A coordinate descent algorithm for
1753
+ // L1-regularized logistic regression problems
1754
+ //
1755
+ // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1756
+ //
1757
+ // Given:
1758
+ // x, y, Cp, Cn
1759
+ // eps is the stopping tolerance
1760
+ //
1761
+ // solution will be put in w
1762
+ //
1763
+ // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1764
+ //
1765
+ // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1766
+ // must have been added to the original data. (see -B and -R option)
1767
+
1768
+ #undef GETI
1769
+ #define GETI(i) (y[i]+1)
1770
+ // To support weights for instances, use GETI(i) (i)
1771
+
1772
+ static void solve_l1r_lr(
1773
+ const problem *prob_col, double *w, double eps,
1774
+ double Cp, double Cn, int regularize_bias)
1775
+ {
1776
+ int l = prob_col->l;
1777
+ int w_size = prob_col->n;
1778
+ int j, s, newton_iter=0, iter=0;
1779
+ int max_newton_iter = 100;
1780
+ int max_iter = 1000;
1781
+ int max_num_linesearch = 20;
1782
+ int active_size;
1783
+ int QP_active_size;
1784
+
1785
+ double nu = 1e-12;
1786
+ double inner_eps = 1;
1787
+ double sigma = 0.01;
1788
+ double w_norm, w_norm_new;
1789
+ double z, G, H;
1790
+ double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1791
+ double Gmax_old = INF;
1792
+ double Gmax_new, Gnorm1_new;
1793
+ double QP_Gmax_old = INF;
1794
+ double QP_Gmax_new, QP_Gnorm1_new;
1795
+ double delta, negsum_xTd, cond;
1796
+
1797
+ int *index = new int[w_size];
1798
+ schar *y = new schar[l];
1799
+ double *Hdiag = new double[w_size];
1800
+ double *Grad = new double[w_size];
1801
+ double *wpd = new double[w_size];
1802
+ double *xjneg_sum = new double[w_size];
1803
+ double *xTd = new double[l];
1804
+ double *exp_wTx = new double[l];
1805
+ double *exp_wTx_new = new double[l];
1806
+ double *tau = new double[l];
1807
+ double *D = new double[l];
1808
+ feature_node *x;
1809
+
1810
+ double C[3] = {Cn,0,Cp};
1811
+
1812
+ // Initial w can be set here.
1813
+ for(j=0; j<w_size; j++)
1814
+ w[j] = 0;
1815
+
1816
+ for(j=0; j<l; j++)
1817
+ {
1818
+ if(prob_col->y[j] > 0)
1819
+ y[j] = 1;
1820
+ else
1821
+ y[j] = -1;
1822
+
1823
+ exp_wTx[j] = 0;
1824
+ }
1825
+
1826
+ w_norm = 0;
1827
+ for(j=0; j<w_size; j++)
1828
+ {
1829
+ w_norm += fabs(w[j]);
1830
+ wpd[j] = w[j];
1831
+ index[j] = j;
1832
+ xjneg_sum[j] = 0;
1833
+ x = prob_col->x[j];
1834
+ while(x->index != -1)
1835
+ {
1836
+ int ind = x->index-1;
1837
+ double val = x->value;
1838
+ exp_wTx[ind] += w[j]*val;
1839
+ if(y[ind] == -1)
1840
+ xjneg_sum[j] += C[GETI(ind)]*val;
1841
+ x++;
1842
+ }
1843
+ }
1844
+ if (regularize_bias == 0)
1845
+ w_norm -= fabs(w[w_size-1]);
1846
+
1847
+ for(j=0; j<l; j++)
1848
+ {
1849
+ exp_wTx[j] = exp(exp_wTx[j]);
1850
+ double tau_tmp = 1/(1+exp_wTx[j]);
1851
+ tau[j] = C[GETI(j)]*tau_tmp;
1852
+ D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1853
+ }
1854
+
1855
+ while(newton_iter < max_newton_iter)
1856
+ {
1857
+ Gmax_new = 0;
1858
+ Gnorm1_new = 0;
1859
+ active_size = w_size;
1860
+
1861
+ for(s=0; s<active_size; s++)
1862
+ {
1863
+ j = index[s];
1864
+ Hdiag[j] = nu;
1865
+ Grad[j] = 0;
1866
+
1867
+ double tmp = 0;
1868
+ x = prob_col->x[j];
1869
+ while(x->index != -1)
1870
+ {
1871
+ int ind = x->index-1;
1872
+ Hdiag[j] += x->value*x->value*D[ind];
1873
+ tmp += x->value*tau[ind];
1874
+ x++;
1875
+ }
1876
+ Grad[j] = -tmp + xjneg_sum[j];
1877
+
1878
+ double violation = 0;
1879
+ if (j == w_size-1 && regularize_bias == 0)
1880
+ violation = fabs(Grad[j]);
1881
+ else
1882
+ {
1883
+ double Gp = Grad[j]+1;
1884
+ double Gn = Grad[j]-1;
1885
+ if(w[j] == 0)
1886
+ {
1887
+ if(Gp < 0)
1888
+ violation = -Gp;
1889
+ else if(Gn > 0)
1890
+ violation = Gn;
1891
+ //outer-level shrinking
1892
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1893
+ {
1894
+ active_size--;
1895
+ swap(index[s], index[active_size]);
1896
+ s--;
1897
+ continue;
1898
+ }
1899
+ }
1900
+ else if(w[j] > 0)
1901
+ violation = fabs(Gp);
1902
+ else
1903
+ violation = fabs(Gn);
1904
+ }
1905
+ Gmax_new = max(Gmax_new, violation);
1906
+ Gnorm1_new += violation;
1907
+ }
1908
+
1909
+ if(newton_iter == 0)
1910
+ Gnorm1_init = Gnorm1_new;
1911
+
1912
+ if(Gnorm1_new <= eps*Gnorm1_init)
1913
+ break;
1914
+
1915
+ iter = 0;
1916
+ QP_Gmax_old = INF;
1917
+ QP_active_size = active_size;
1918
+
1919
+ for(int i=0; i<l; i++)
1920
+ xTd[i] = 0;
1921
+
1922
+ // optimize QP over wpd
1923
+ while(iter < max_iter)
1924
+ {
1925
+ QP_Gmax_new = 0;
1926
+ QP_Gnorm1_new = 0;
1927
+
1928
+ for(j=0; j<QP_active_size; j++)
1929
+ {
1930
+ int i = j+rand()%(QP_active_size-j);
1931
+ swap(index[i], index[j]);
1932
+ }
1933
+
1934
+ for(s=0; s<QP_active_size; s++)
1935
+ {
1936
+ j = index[s];
1937
+ H = Hdiag[j];
1938
+
1939
+ x = prob_col->x[j];
1940
+ G = Grad[j] + (wpd[j]-w[j])*nu;
1941
+ while(x->index != -1)
1942
+ {
1943
+ int ind = x->index-1;
1944
+ G += x->value*D[ind]*xTd[ind];
1945
+ x++;
1946
+ }
1947
+
1948
+ double violation = 0;
1949
+ if (j == w_size-1 && regularize_bias == 0)
1950
+ {
1951
+ // bias term not shrunken
1952
+ violation = fabs(G);
1953
+ z = -G/H;
1954
+ }
1955
+ else
1956
+ {
1957
+ double Gp = G+1;
1958
+ double Gn = G-1;
1959
+ if(wpd[j] == 0)
1960
+ {
1961
+ if(Gp < 0)
1962
+ violation = -Gp;
1963
+ else if(Gn > 0)
1964
+ violation = Gn;
1965
+ //inner-level shrinking
1966
+ else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1967
+ {
1968
+ QP_active_size--;
1969
+ swap(index[s], index[QP_active_size]);
1970
+ s--;
1971
+ continue;
1972
+ }
1973
+ }
1974
+ else if(wpd[j] > 0)
1975
+ violation = fabs(Gp);
1976
+ else
1977
+ violation = fabs(Gn);
1978
+
1979
+ // obtain solution of one-variable problem
1980
+ if(Gp < H*wpd[j])
1981
+ z = -Gp/H;
1982
+ else if(Gn > H*wpd[j])
1983
+ z = -Gn/H;
1984
+ else
1985
+ z = -wpd[j];
1986
+ }
1987
+ QP_Gmax_new = max(QP_Gmax_new, violation);
1988
+ QP_Gnorm1_new += violation;
1989
+
1990
+ if(fabs(z) < 1.0e-12)
1991
+ continue;
1992
+ z = min(max(z,-10.0),10.0);
1993
+
1994
+ wpd[j] += z;
1995
+
1996
+ x = prob_col->x[j];
1997
+ sparse_operator::axpy(z, x, xTd);
1998
+ }
1999
+
2000
+ iter++;
2001
+
2002
+ if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
2003
+ {
2004
+ //inner stopping
2005
+ if(QP_active_size == active_size)
2006
+ break;
2007
+ //active set reactivation
2008
+ else
2009
+ {
2010
+ QP_active_size = active_size;
2011
+ QP_Gmax_old = INF;
2012
+ continue;
2013
+ }
2014
+ }
2015
+
2016
+ QP_Gmax_old = QP_Gmax_new;
2017
+ }
2018
+
2019
+ if(iter >= max_iter)
2020
+ info("WARNING: reaching max number of inner iterations\n");
2021
+
2022
+ delta = 0;
2023
+ w_norm_new = 0;
2024
+ for(j=0; j<w_size; j++)
2025
+ {
2026
+ delta += Grad[j]*(wpd[j]-w[j]);
2027
+ if(wpd[j] != 0)
2028
+ w_norm_new += fabs(wpd[j]);
2029
+ }
2030
+ if (regularize_bias == 0)
2031
+ w_norm_new -= fabs(wpd[w_size-1]);
2032
+ delta += (w_norm_new-w_norm);
2033
+
2034
+ negsum_xTd = 0;
2035
+ for(int i=0; i<l; i++)
2036
+ if(y[i] == -1)
2037
+ negsum_xTd += C[GETI(i)]*xTd[i];
2038
+
2039
+ int num_linesearch;
2040
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
2041
+ {
2042
+ cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
2043
+
2044
+ for(int i=0; i<l; i++)
2045
+ {
2046
+ double exp_xTd = exp(xTd[i]);
2047
+ exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
2048
+ cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
2049
+ }
2050
+
2051
+ if(cond <= 0)
2052
+ {
2053
+ w_norm = w_norm_new;
2054
+ for(j=0; j<w_size; j++)
2055
+ w[j] = wpd[j];
2056
+ for(int i=0; i<l; i++)
2057
+ {
2058
+ exp_wTx[i] = exp_wTx_new[i];
2059
+ double tau_tmp = 1/(1+exp_wTx[i]);
2060
+ tau[i] = C[GETI(i)]*tau_tmp;
2061
+ D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
2062
+ }
2063
+ break;
2064
+ }
2065
+ else
2066
+ {
2067
+ w_norm_new = 0;
2068
+ for(j=0; j<w_size; j++)
2069
+ {
2070
+ wpd[j] = (w[j]+wpd[j])*0.5;
2071
+ if(wpd[j] != 0)
2072
+ w_norm_new += fabs(wpd[j]);
2073
+ }
2074
+ if (regularize_bias == 0)
2075
+ w_norm_new -= fabs(wpd[w_size-1]);
2076
+ delta *= 0.5;
2077
+ negsum_xTd *= 0.5;
2078
+ for(int i=0; i<l; i++)
2079
+ xTd[i] *= 0.5;
2080
+ }
2081
+ }
2082
+
2083
+ // Recompute some info due to too many line search steps
2084
+ if(num_linesearch >= max_num_linesearch)
2085
+ {
2086
+ for(int i=0; i<l; i++)
2087
+ exp_wTx[i] = 0;
2088
+
2089
+ for(int i=0; i<w_size; i++)
2090
+ {
2091
+ if(w[i]==0) continue;
2092
+ x = prob_col->x[i];
2093
+ sparse_operator::axpy(w[i], x, exp_wTx);
2094
+ }
2095
+
2096
+ for(int i=0; i<l; i++)
2097
+ exp_wTx[i] = exp(exp_wTx[i]);
2098
+ }
2099
+
2100
+ if(iter == 1)
2101
+ inner_eps *= 0.25;
2102
+
2103
+ newton_iter++;
2104
+ Gmax_old = Gmax_new;
2105
+
2106
+ info("iter %3d #CD cycles %d\n", newton_iter, iter);
2107
+ }
2108
+
2109
+ info("=========================\n");
2110
+ info("optimization finished, #iter = %d\n", newton_iter);
2111
+ if(newton_iter >= max_newton_iter)
2112
+ info("WARNING: reaching max number of iterations\n");
2113
+
2114
+ // calculate objective value
2115
+
2116
+ double v = 0;
2117
+ int nnz = 0;
2118
+ for(j=0; j<w_size; j++)
2119
+ if(w[j] != 0)
2120
+ {
2121
+ v += fabs(w[j]);
2122
+ nnz++;
2123
+ }
2124
+ if (regularize_bias == 0)
2125
+ v -= fabs(w[w_size-1]);
2126
+ for(j=0; j<l; j++)
2127
+ if(y[j] == 1)
2128
+ v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2129
+ else
2130
+ v += C[GETI(j)]*log(1+exp_wTx[j]);
2131
+
2132
+ info("Objective value = %lf\n", v);
2133
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2134
+
2135
+ delete [] index;
2136
+ delete [] y;
2137
+ delete [] Hdiag;
2138
+ delete [] Grad;
2139
+ delete [] wpd;
2140
+ delete [] xjneg_sum;
2141
+ delete [] xTd;
2142
+ delete [] exp_wTx;
2143
+ delete [] exp_wTx_new;
2144
+ delete [] tau;
2145
+ delete [] D;
2146
+ }
2147
+
2148
+ struct heap {
2149
+ enum HEAP_TYPE { MIN, MAX };
2150
+ int _size;
2151
+ HEAP_TYPE _type;
2152
+ feature_node* a;
2153
+
2154
+ heap(int max_size, HEAP_TYPE type)
2155
+ {
2156
+ _size = 0;
2157
+ a = new feature_node[max_size];
2158
+ _type = type;
2159
+ }
2160
+ ~heap()
2161
+ {
2162
+ delete [] a;
2163
+ }
2164
+ bool cmp(const feature_node& left, const feature_node& right)
2165
+ {
2166
+ if(_type == MIN)
2167
+ return left.value > right.value;
2168
+ else
2169
+ return left.value < right.value;
2170
+ }
2171
+ int size()
2172
+ {
2173
+ return _size;
2174
+ }
2175
+ void push(feature_node node)
2176
+ {
2177
+ a[_size] = node;
2178
+ _size++;
2179
+ int i = _size-1;
2180
+ while(i)
2181
+ {
2182
+ int p = (i-1)/2;
2183
+ if(cmp(a[p], a[i]))
2184
+ {
2185
+ swap(a[i], a[p]);
2186
+ i = p;
2187
+ }
2188
+ else
2189
+ break;
2190
+ }
2191
+ }
2192
+ void pop()
2193
+ {
2194
+ _size--;
2195
+ a[0] = a[_size];
2196
+ int i = 0;
2197
+ while(i*2+1 < _size)
2198
+ {
2199
+ int l = i*2+1;
2200
+ int r = i*2+2;
2201
+ if(r < _size && cmp(a[l], a[r]))
2202
+ l = r;
2203
+ if(cmp(a[i], a[l]))
2204
+ {
2205
+ swap(a[i], a[l]);
2206
+ i = l;
2207
+ }
2208
+ else
2209
+ break;
2210
+ }
2211
+ }
2212
+ feature_node top()
2213
+ {
2214
+ return a[0];
2215
+ }
2216
+ };
2217
+
2218
+ // A two-level coordinate descent algorithm for
2219
+ // a scaled one-class SVM dual problem
2220
+ //
2221
+ // min_\alpha 0.5(\alpha^T Q \alpha),
2222
+ // s.t. 0 <= \alpha_i <= 1 and
2223
+ // e^T \alpha = \nu l
2224
+ //
2225
+ // where Qij = xi^T xj
2226
+ //
2227
+ // Given:
2228
+ // x, nu
2229
+ // eps is the stopping tolerance
2230
+ //
2231
+ // solution will be put in w and rho
2232
+ //
2233
+ // See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.
2234
+
2235
+ static void solve_oneclass_svm(const problem *prob, double *w, double *rho, double eps, double nu)
2236
+ {
2237
+ int l = prob->l;
2238
+ int w_size = prob->n;
2239
+ int i, j, s, iter = 0;
2240
+ double Gi, Gj;
2241
+ double Qij, quad_coef, delta, sum;
2242
+ double old_alpha_i;
2243
+ double *QD = new double[l];
2244
+ double *G = new double[l];
2245
+ int *index = new int[l];
2246
+ double *alpha = new double[l];
2247
+ int max_inner_iter;
2248
+ int max_iter = 1000;
2249
+ int active_size = l;
2250
+
2251
+ double negGmax; // max { -grad(f)_i | alpha_i < 1 }
2252
+ double negGmin; // min { -grad(f)_i | alpha_i > 0 }
2253
+
2254
+ int *most_violating_i = new int[l];
2255
+ int *most_violating_j = new int[l];
2256
+
2257
+ int n = (int)(nu*l); // # of alpha's at upper bound
2258
+ for(i=0; i<n; i++)
2259
+ alpha[i] = 1;
2260
+ if (n<l)
2261
+ alpha[i] = nu*l-n;
2262
+ for(i=n+1; i<l; i++)
2263
+ alpha[i] = 0;
2264
+
2265
+ for(i=0; i<w_size; i++)
2266
+ w[i] = 0;
2267
+ for(i=0; i<l; i++)
2268
+ {
2269
+ feature_node * const xi = prob->x[i];
2270
+ QD[i] = sparse_operator::nrm2_sq(xi);
2271
+ sparse_operator::axpy(alpha[i], xi, w);
2272
+
2273
+ index[i] = i;
2274
+ }
2275
+
2276
+ while (iter < max_iter)
2277
+ {
2278
+ negGmax = -INF;
2279
+ negGmin = INF;
2280
+
2281
+ for (s=0; s<active_size; s++)
2282
+ {
2283
+ i = index[s];
2284
+ feature_node * const xi = prob->x[i];
2285
+ G[i] = sparse_operator::dot(w, xi);
2286
+ if (alpha[i] < 1)
2287
+ negGmax = max(negGmax, -G[i]);
2288
+ if (alpha[i] > 0)
2289
+ negGmin = min(negGmin, -G[i]);
2290
+ }
2291
+
2292
+ if (negGmax - negGmin < eps)
2293
+ {
2294
+ if (active_size == l)
2295
+ break;
2296
+ else
2297
+ {
2298
+ active_size = l;
2299
+ info("*");
2300
+ continue;
2301
+ }
2302
+ }
2303
+
2304
+ for(s=0; s<active_size; s++)
2305
+ {
2306
+ i = index[s];
2307
+ if ((alpha[i] == 1 && -G[i] > negGmax) ||
2308
+ (alpha[i] == 0 && -G[i] < negGmin))
2309
+ {
2310
+ active_size--;
2311
+ swap(index[s], index[active_size]);
2312
+ s--;
2313
+ }
2314
+ }
2315
+
2316
+ max_inner_iter = max(active_size/10, 1);
2317
+ struct heap min_heap = heap(max_inner_iter, heap::MIN);
2318
+ struct heap max_heap = heap(max_inner_iter, heap::MAX);
2319
+ struct feature_node node;
2320
+ for(s=0; s<active_size; s++)
2321
+ {
2322
+ i = index[s];
2323
+ node.index = i;
2324
+ node.value = -G[i];
2325
+
2326
+ if (alpha[i] < 1)
2327
+ {
2328
+ if (min_heap.size() < max_inner_iter)
2329
+ min_heap.push(node);
2330
+ else if (min_heap.top().value < node.value)
2331
+ {
2332
+ min_heap.pop();
2333
+ min_heap.push(node);
2334
+ }
2335
+ }
2336
+
2337
+ if (alpha[i] > 0)
2338
+ {
2339
+ if (max_heap.size() < max_inner_iter)
2340
+ max_heap.push(node);
2341
+ else if (max_heap.top().value > node.value)
2342
+ {
2343
+ max_heap.pop();
2344
+ max_heap.push(node);
2345
+ }
2346
+ }
2347
+ }
2348
+ max_inner_iter = min(min_heap.size(), max_heap.size());
2349
+ while (max_heap.size() > max_inner_iter)
2350
+ max_heap.pop();
2351
+ while (min_heap.size() > max_inner_iter)
2352
+ min_heap.pop();
2353
+
2354
+ for (s=max_inner_iter-1; s>=0; s--)
2355
+ {
2356
+ most_violating_i[s] = min_heap.top().index;
2357
+ most_violating_j[s] = max_heap.top().index;
2358
+ min_heap.pop();
2359
+ max_heap.pop();
2360
+ }
2361
+
2362
+ for (s=0; s<max_inner_iter; s++)
2363
+ {
2364
+ i = most_violating_i[s];
2365
+ j = most_violating_j[s];
2366
+
2367
+ if ((alpha[i] == 0 && alpha[j] == 0) ||
2368
+ (alpha[i] == 1 && alpha[j] == 1))
2369
+ continue;
2370
+
2371
+ feature_node const * xi = prob->x[i];
2372
+ feature_node const * xj = prob->x[j];
2373
+
2374
+ Gi = sparse_operator::dot(w, xi);
2375
+ Gj = sparse_operator::dot(w, xj);
2376
+
2377
+ int violating_pair = 0;
2378
+ if (alpha[i] < 1 && alpha[j] > 0 && -Gj + 1e-12 < -Gi)
2379
+ violating_pair = 1;
2380
+ else
2381
+ if (alpha[i] > 0 && alpha[j] < 1 && -Gi + 1e-12 < -Gj)
2382
+ violating_pair = 1;
2383
+ if (violating_pair == 0)
2384
+ continue;
2385
+
2386
+ Qij = sparse_operator::sparse_dot(xi, xj);
2387
+ quad_coef = QD[i] + QD[j] - 2*Qij;
2388
+ if(quad_coef <= 0)
2389
+ quad_coef = 1e-12;
2390
+ delta = (Gi - Gj) / quad_coef;
2391
+ old_alpha_i = alpha[i];
2392
+ sum = alpha[i] + alpha[j];
2393
+ alpha[i] = alpha[i] - delta;
2394
+ alpha[j] = alpha[j] + delta;
2395
+ if (sum > 1)
2396
+ {
2397
+ if (alpha[i] > 1)
2398
+ {
2399
+ alpha[i] = 1;
2400
+ alpha[j] = sum - 1;
2401
+ }
2402
+ }
2403
+ else
2404
+ {
2405
+ if (alpha[j] < 0)
2406
+ {
2407
+ alpha[j] = 0;
2408
+ alpha[i] = sum;
2409
+ }
2410
+ }
2411
+ if (sum > 1)
2412
+ {
2413
+ if (alpha[j] > 1)
2414
+ {
2415
+ alpha[j] = 1;
2416
+ alpha[i] = sum - 1;
2417
+ }
2418
+ }
2419
+ else
2420
+ {
2421
+ if (alpha[i] < 0)
2422
+ {
2423
+ alpha[i] = 0;
2424
+ alpha[j] = sum;
2425
+ }
2426
+ }
2427
+ delta = alpha[i] - old_alpha_i;
2428
+ sparse_operator::axpy(delta, xi, w);
2429
+ sparse_operator::axpy(-delta, xj, w);
2430
+ }
2431
+ iter++;
2432
+ if (iter % 10 == 0)
2433
+ info(".");
2434
+ }
2435
+ info("\noptimization finished, #iter = %d\n",iter);
2436
+ if (iter >= max_iter)
2437
+ info("\nWARNING: reaching max number of iterations\n\n");
2438
+
2439
+ // calculate object value
2440
+ double v = 0;
2441
+ for(i=0; i<w_size; i++)
2442
+ v += w[i]*w[i];
2443
+ int nSV = 0;
2444
+ for(i=0; i<l; i++)
2445
+ {
2446
+ if (alpha[i] > 0)
2447
+ ++nSV;
2448
+ }
2449
+ info("Objective value = %lf\n", v/2);
2450
+ info("nSV = %d\n", nSV);
2451
+
2452
+ // calculate rho
2453
+ double nr_free = 0;
2454
+ double ub = INF, lb = -INF, sum_free = 0;
2455
+ for(i=0; i<l; i++)
2456
+ {
2457
+ double G = sparse_operator::dot(w, prob->x[i]);
2458
+ if (alpha[i] == 1)
2459
+ lb = max(lb, G);
2460
+ else if (alpha[i] == 0)
2461
+ ub = min(ub, G);
2462
+ else
2463
+ {
2464
+ ++nr_free;
2465
+ sum_free += G;
2466
+ }
2467
+ }
2468
+
2469
+ if (nr_free > 0)
2470
+ *rho = sum_free/nr_free;
2471
+ else
2472
+ *rho = (ub + lb)/2;
2473
+
2474
+ info("rho = %lf\n", *rho);
2475
+
2476
+ delete [] QD;
2477
+ delete [] G;
2478
+ delete [] index;
2479
+ delete [] alpha;
2480
+ delete [] most_violating_i;
2481
+ delete [] most_violating_j;
2482
+ }
2483
+
2484
+ // transpose matrix X from row format to column format
2485
+ static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2486
+ {
2487
+ int i;
2488
+ int l = prob->l;
2489
+ int n = prob->n;
2490
+ size_t nnz = 0;
2491
+ size_t *col_ptr = new size_t [n+1];
2492
+ feature_node *x_space;
2493
+ prob_col->l = l;
2494
+ prob_col->n = n;
2495
+ prob_col->y = new double[l];
2496
+ prob_col->x = new feature_node*[n];
2497
+
2498
+ for(i=0; i<l; i++)
2499
+ prob_col->y[i] = prob->y[i];
2500
+
2501
+ for(i=0; i<n+1; i++)
2502
+ col_ptr[i] = 0;
2503
+ for(i=0; i<l; i++)
2504
+ {
2505
+ feature_node *x = prob->x[i];
2506
+ while(x->index != -1)
2507
+ {
2508
+ nnz++;
2509
+ col_ptr[x->index]++;
2510
+ x++;
2511
+ }
2512
+ }
2513
+ for(i=1; i<n+1; i++)
2514
+ col_ptr[i] += col_ptr[i-1] + 1;
2515
+
2516
+ x_space = new feature_node[nnz+n];
2517
+ for(i=0; i<n; i++)
2518
+ prob_col->x[i] = &x_space[col_ptr[i]];
2519
+
2520
+ for(i=0; i<l; i++)
2521
+ {
2522
+ feature_node *x = prob->x[i];
2523
+ while(x->index != -1)
2524
+ {
2525
+ int ind = x->index-1;
2526
+ x_space[col_ptr[ind]].index = i+1; // starts from 1
2527
+ x_space[col_ptr[ind]].value = x->value;
2528
+ col_ptr[ind]++;
2529
+ x++;
2530
+ }
2531
+ }
2532
+ for(i=0; i<n; i++)
2533
+ x_space[col_ptr[i]].index = -1;
2534
+
2535
+ *x_space_ret = x_space;
2536
+
2537
+ delete [] col_ptr;
2538
+ }
2539
+
2540
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2541
+ // perm, length l, must be allocated before calling this subroutine
2542
+ static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2543
+ {
2544
+ int l = prob->l;
2545
+ int max_nr_class = 16;
2546
+ int nr_class = 0;
2547
+ int *label = Malloc(int,max_nr_class);
2548
+ int *count = Malloc(int,max_nr_class);
2549
+ int *data_label = Malloc(int,l);
2550
+ int i;
2551
+
2552
+ for(i=0;i<l;i++)
2553
+ {
2554
+ int this_label = (int)prob->y[i];
2555
+ int j;
2556
+ for(j=0;j<nr_class;j++)
2557
+ {
2558
+ if(this_label == label[j])
2559
+ {
2560
+ ++count[j];
2561
+ break;
2562
+ }
2563
+ }
2564
+ data_label[i] = j;
2565
+ if(j == nr_class)
2566
+ {
2567
+ if(nr_class == max_nr_class)
2568
+ {
2569
+ max_nr_class *= 2;
2570
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
2571
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
2572
+ }
2573
+ label[nr_class] = this_label;
2574
+ count[nr_class] = 1;
2575
+ ++nr_class;
2576
+ }
2577
+ }
2578
+
2579
+ //
2580
+ // Labels are ordered by their first occurrence in the training set.
2581
+ // However, for two-class sets with -1/+1 labels and -1 appears first,
2582
+ // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2583
+ //
2584
+ if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2585
+ {
2586
+ swap(label[0],label[1]);
2587
+ swap(count[0],count[1]);
2588
+ for(i=0;i<l;i++)
2589
+ {
2590
+ if(data_label[i] == 0)
2591
+ data_label[i] = 1;
2592
+ else
2593
+ data_label[i] = 0;
2594
+ }
2595
+ }
2596
+
2597
+ int *start = Malloc(int,nr_class);
2598
+ start[0] = 0;
2599
+ for(i=1;i<nr_class;i++)
2600
+ start[i] = start[i-1]+count[i-1];
2601
+ for(i=0;i<l;i++)
2602
+ {
2603
+ perm[start[data_label[i]]] = i;
2604
+ ++start[data_label[i]];
2605
+ }
2606
+ start[0] = 0;
2607
+ for(i=1;i<nr_class;i++)
2608
+ start[i] = start[i-1]+count[i-1];
2609
+
2610
+ *nr_class_ret = nr_class;
2611
+ *label_ret = label;
2612
+ *start_ret = start;
2613
+ *count_ret = count;
2614
+ free(data_label);
2615
+ }
2616
+
2617
+ static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2618
+ {
2619
+ double eps = param->eps;
2620
+
2621
+ int pos = 0;
2622
+ int neg = 0;
2623
+ for(int i=0;i<prob->l;i++)
2624
+ if(prob->y[i] > 0)
2625
+ pos++;
2626
+ neg = prob->l - pos;
2627
+ double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2628
+
2629
+ function *fun_obj=NULL;
2630
+ switch(param->solver_type)
2631
+ {
2632
+ case L2R_LR:
2633
+ {
2634
+ double *C = new double[prob->l];
2635
+ for(int i = 0; i < prob->l; i++)
2636
+ {
2637
+ if(prob->y[i] > 0)
2638
+ C[i] = Cp;
2639
+ else
2640
+ C[i] = Cn;
2641
+ }
2642
+ fun_obj=new l2r_lr_fun(prob, param, C);
2643
+ NEWTON newton_obj(fun_obj, primal_solver_tol);
2644
+ newton_obj.set_print_string(liblinear_print_string);
2645
+ newton_obj.newton(w);
2646
+ delete fun_obj;
2647
+ delete[] C;
2648
+ break;
2649
+ }
2650
+ case L2R_L2LOSS_SVC:
2651
+ {
2652
+ double *C = new double[prob->l];
2653
+ for(int i = 0; i < prob->l; i++)
2654
+ {
2655
+ if(prob->y[i] > 0)
2656
+ C[i] = Cp;
2657
+ else
2658
+ C[i] = Cn;
2659
+ }
2660
+ fun_obj=new l2r_l2_svc_fun(prob, param, C);
2661
+ NEWTON newton_obj(fun_obj, primal_solver_tol);
2662
+ newton_obj.set_print_string(liblinear_print_string);
2663
+ newton_obj.newton(w);
2664
+ delete fun_obj;
2665
+ delete[] C;
2666
+ break;
2667
+ }
2668
+ case L2R_L2LOSS_SVC_DUAL:
2669
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2670
+ break;
2671
+ case L2R_L1LOSS_SVC_DUAL:
2672
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2673
+ break;
2674
+ case L1R_L2LOSS_SVC:
2675
+ {
2676
+ problem prob_col;
2677
+ feature_node *x_space = NULL;
2678
+ transpose(prob, &x_space ,&prob_col);
2679
+ solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2680
+ delete [] prob_col.y;
2681
+ delete [] prob_col.x;
2682
+ delete [] x_space;
2683
+ break;
2684
+ }
2685
+ case L1R_LR:
2686
+ {
2687
+ problem prob_col;
2688
+ feature_node *x_space = NULL;
2689
+ transpose(prob, &x_space ,&prob_col);
2690
+ solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2691
+ delete [] prob_col.y;
2692
+ delete [] prob_col.x;
2693
+ delete [] x_space;
2694
+ break;
2695
+ }
2696
+ case L2R_LR_DUAL:
2697
+ solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2698
+ break;
2699
+ case L2R_L2LOSS_SVR:
2700
+ {
2701
+ double *C = new double[prob->l];
2702
+ for(int i = 0; i < prob->l; i++)
2703
+ C[i] = param->C;
2704
+
2705
+ fun_obj=new l2r_l2_svr_fun(prob, param, C);
2706
+ NEWTON newton_obj(fun_obj, param->eps);
2707
+ newton_obj.set_print_string(liblinear_print_string);
2708
+ newton_obj.newton(w);
2709
+ delete fun_obj;
2710
+ delete[] C;
2711
+ break;
2712
+
2713
+ }
2714
+ case L2R_L1LOSS_SVR_DUAL:
2715
+ solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2716
+ break;
2717
+ case L2R_L2LOSS_SVR_DUAL:
2718
+ solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2719
+ break;
2720
+ default:
2721
+ fprintf(stderr, "ERROR: unknown solver_type\n");
2722
+ break;
2723
+ }
2724
+ }
2725
+
2726
+ // Calculate the initial C for parameter selection
2727
+ static double calc_start_C(const problem *prob, const parameter *param)
2728
+ {
2729
+ int i;
2730
+ double xTx, max_xTx;
2731
+ max_xTx = 0;
2732
+ for(i=0; i<prob->l; i++)
2733
+ {
2734
+ xTx = 0;
2735
+ feature_node *xi=prob->x[i];
2736
+ while(xi->index != -1)
2737
+ {
2738
+ double val = xi->value;
2739
+ xTx += val*val;
2740
+ xi++;
2741
+ }
2742
+ if(xTx > max_xTx)
2743
+ max_xTx = xTx;
2744
+ }
2745
+
2746
+ double min_C = 1.0;
2747
+ if(param->solver_type == L2R_LR)
2748
+ min_C = 1.0 / (prob->l * max_xTx);
2749
+ else if(param->solver_type == L2R_L2LOSS_SVC)
2750
+ min_C = 1.0 / (2 * prob->l * max_xTx);
2751
+ else if(param->solver_type == L2R_L2LOSS_SVR)
2752
+ {
2753
+ double sum_y, loss, y_abs;
2754
+ double delta2 = 0.1;
2755
+ sum_y = 0, loss = 0;
2756
+ for(i=0; i<prob->l; i++)
2757
+ {
2758
+ y_abs = fabs(prob->y[i]);
2759
+ sum_y += y_abs;
2760
+ loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
2761
+ }
2762
+ if(loss > 0)
2763
+ min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
2764
+ else
2765
+ min_C = INF;
2766
+ }
2767
+
2768
+ return pow( 2, floor(log(min_C) / log(2.0)) );
2769
+ }
2770
+
2771
+ static double calc_max_p(const problem *prob, const parameter *param)
2772
+ {
2773
+ int i;
2774
+ double max_p = 0.0;
2775
+ for(i = 0; i < prob->l; i++)
2776
+ max_p = max(max_p, fabs(prob->y[i]));
2777
+
2778
+ return max_p;
2779
+ }
2780
+
2781
+ static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
2782
+ {
2783
+ // variables for CV
2784
+ int i;
2785
+ double *target = Malloc(double, prob->l);
2786
+
2787
+ // variables for warm start
2788
+ double ratio = 2;
2789
+ double **prev_w = Malloc(double*, nr_fold);
2790
+ for(i = 0; i < nr_fold; i++)
2791
+ prev_w[i] = NULL;
2792
+ int num_unchanged_w = 0;
2793
+ void (*default_print_string) (const char *) = liblinear_print_string;
2794
+
2795
+ if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2796
+ *best_score = 0.0;
2797
+ else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2798
+ *best_score = INF;
2799
+ *best_C = start_C;
2800
+
2801
+ param_tmp->C = start_C;
2802
+ while(param_tmp->C <= max_C)
2803
+ {
2804
+ //Output disabled for running CV at a particular C
2805
+ set_print_string_function(&print_null);
2806
+
2807
+ for(i=0; i<nr_fold; i++)
2808
+ {
2809
+ int j;
2810
+ int begin = fold_start[i];
2811
+ int end = fold_start[i+1];
2812
+
2813
+ param_tmp->init_sol = prev_w[i];
2814
+ struct model *submodel = train(&subprob[i],param_tmp);
2815
+
2816
+ int total_w_size;
2817
+ if(submodel->nr_class == 2)
2818
+ total_w_size = subprob[i].n;
2819
+ else
2820
+ total_w_size = subprob[i].n * submodel->nr_class;
2821
+
2822
+ if(prev_w[i] == NULL)
2823
+ {
2824
+ prev_w[i] = Malloc(double, total_w_size);
2825
+ for(j=0; j<total_w_size; j++)
2826
+ prev_w[i][j] = submodel->w[j];
2827
+ }
2828
+ else if(num_unchanged_w >= 0)
2829
+ {
2830
+ double norm_w_diff = 0;
2831
+ for(j=0; j<total_w_size; j++)
2832
+ {
2833
+ norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2834
+ prev_w[i][j] = submodel->w[j];
2835
+ }
2836
+ norm_w_diff = sqrt(norm_w_diff);
2837
+
2838
+ if(norm_w_diff > 1e-15)
2839
+ num_unchanged_w = -1;
2840
+ }
2841
+ else
2842
+ {
2843
+ for(j=0; j<total_w_size; j++)
2844
+ prev_w[i][j] = submodel->w[j];
2845
+ }
2846
+
2847
+ for(j=begin; j<end; j++)
2848
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2849
+
2850
+ free_and_destroy_model(&submodel);
2851
+ }
2852
+ set_print_string_function(default_print_string);
2853
+
2854
+ if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2855
+ {
2856
+ int total_correct = 0;
2857
+ for(i=0; i<prob->l; i++)
2858
+ if(target[i] == prob->y[i])
2859
+ ++total_correct;
2860
+ double current_rate = (double)total_correct/prob->l;
2861
+ if(current_rate > *best_score)
2862
+ {
2863
+ *best_C = param_tmp->C;
2864
+ *best_score = current_rate;
2865
+ }
2866
+
2867
+ info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
2868
+ }
2869
+ else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2870
+ {
2871
+ double total_error = 0.0;
2872
+ for(i=0; i<prob->l; i++)
2873
+ {
2874
+ double y = prob->y[i];
2875
+ double v = target[i];
2876
+ total_error += (v-y)*(v-y);
2877
+ }
2878
+ double current_error = total_error/prob->l;
2879
+ if(current_error < *best_score)
2880
+ {
2881
+ *best_C = param_tmp->C;
2882
+ *best_score = current_error;
2883
+ }
2884
+
2885
+ info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
2886
+ }
2887
+
2888
+ num_unchanged_w++;
2889
+ if(num_unchanged_w == 5)
2890
+ break;
2891
+ param_tmp->C = param_tmp->C*ratio;
2892
+ }
2893
+
2894
+ if(param_tmp->C > max_C)
2895
+ info("WARNING: maximum C reached.\n");
2896
+ free(target);
2897
+ for(i=0; i<nr_fold; i++)
2898
+ free(prev_w[i]);
2899
+ free(prev_w);
2900
+ }
2901
+
2902
+
2903
+ //
2904
+ // Interface functions
2905
+ //
2906
+ model* train(const problem *prob, const parameter *param)
2907
+ {
2908
+ int i,j;
2909
+ int l = prob->l;
2910
+ int n = prob->n;
2911
+ int w_size = prob->n;
2912
+ model *model_ = Malloc(model,1);
2913
+
2914
+ if(prob->bias>=0)
2915
+ model_->nr_feature=n-1;
2916
+ else
2917
+ model_->nr_feature=n;
2918
+ model_->param = *param;
2919
+ model_->bias = prob->bias;
2920
+
2921
+ if(check_regression_model(model_))
2922
+ {
2923
+ model_->w = Malloc(double, w_size);
2924
+
2925
+ if(param->init_sol != NULL)
2926
+ for(i=0;i<w_size;i++)
2927
+ model_->w[i] = param->init_sol[i];
2928
+ else
2929
+ for(i=0;i<w_size;i++)
2930
+ model_->w[i] = 0;
2931
+
2932
+ model_->nr_class = 2;
2933
+ model_->label = NULL;
2934
+ train_one(prob, param, model_->w, 0, 0);
2935
+ }
2936
+ else if(check_oneclass_model(model_))
2937
+ {
2938
+ model_->w = Malloc(double, w_size);
2939
+ model_->nr_class = 2;
2940
+ model_->label = NULL;
2941
+ solve_oneclass_svm(prob, model_->w, &(model_->rho), param->eps, param->nu);
2942
+ }
2943
+ else
2944
+ {
2945
+ int nr_class;
2946
+ int *label = NULL;
2947
+ int *start = NULL;
2948
+ int *count = NULL;
2949
+ int *perm = Malloc(int,l);
2950
+
2951
+ // group training data of the same class
2952
+ group_classes(prob,&nr_class,&label,&start,&count,perm);
2953
+
2954
+ model_->nr_class=nr_class;
2955
+ model_->label = Malloc(int,nr_class);
2956
+ for(i=0;i<nr_class;i++)
2957
+ model_->label[i] = label[i];
2958
+
2959
+ // calculate weighted C
2960
+ double *weighted_C = Malloc(double, nr_class);
2961
+ for(i=0;i<nr_class;i++)
2962
+ weighted_C[i] = param->C;
2963
+ for(i=0;i<param->nr_weight;i++)
2964
+ {
2965
+ for(j=0;j<nr_class;j++)
2966
+ if(param->weight_label[i] == label[j])
2967
+ break;
2968
+ if(j == nr_class)
2969
+ fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2970
+ else
2971
+ weighted_C[j] *= param->weight[i];
2972
+ }
2973
+
2974
+ // constructing the subproblem
2975
+ feature_node **x = Malloc(feature_node *,l);
2976
+ for(i=0;i<l;i++)
2977
+ x[i] = prob->x[perm[i]];
2978
+
2979
+ int k;
2980
+ problem sub_prob;
2981
+ sub_prob.l = l;
2982
+ sub_prob.n = n;
2983
+ sub_prob.x = Malloc(feature_node *,sub_prob.l);
2984
+ sub_prob.y = Malloc(double,sub_prob.l);
2985
+
2986
+ for(k=0; k<sub_prob.l; k++)
2987
+ sub_prob.x[k] = x[k];
2988
+
2989
+ // multi-class svm by Crammer and Singer
2990
+ if(param->solver_type == MCSVM_CS)
2991
+ {
2992
+ model_->w=Malloc(double, n*nr_class);
2993
+ for(i=0;i<nr_class;i++)
2994
+ for(j=start[i];j<start[i]+count[i];j++)
2995
+ sub_prob.y[j] = i;
2996
+ Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2997
+ Solver.Solve(model_->w);
2998
+ }
2999
+ else
3000
+ {
3001
+ if(nr_class == 2)
3002
+ {
3003
+ model_->w=Malloc(double, w_size);
3004
+
3005
+ int e0 = start[0]+count[0];
3006
+ k=0;
3007
+ for(; k<e0; k++)
3008
+ sub_prob.y[k] = +1;
3009
+ for(; k<sub_prob.l; k++)
3010
+ sub_prob.y[k] = -1;
3011
+
3012
+ if(param->init_sol != NULL)
3013
+ for(i=0;i<w_size;i++)
3014
+ model_->w[i] = param->init_sol[i];
3015
+ else
3016
+ for(i=0;i<w_size;i++)
3017
+ model_->w[i] = 0;
3018
+
3019
+ train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
3020
+ }
3021
+ else
3022
+ {
3023
+ model_->w=Malloc(double, w_size*nr_class);
3024
+ double *w=Malloc(double, w_size);
3025
+ for(i=0;i<nr_class;i++)
3026
+ {
3027
+ int si = start[i];
3028
+ int ei = si+count[i];
3029
+
3030
+ k=0;
3031
+ for(; k<si; k++)
3032
+ sub_prob.y[k] = -1;
3033
+ for(; k<ei; k++)
3034
+ sub_prob.y[k] = +1;
3035
+ for(; k<sub_prob.l; k++)
3036
+ sub_prob.y[k] = -1;
3037
+
3038
+ if(param->init_sol != NULL)
3039
+ for(j=0;j<w_size;j++)
3040
+ w[j] = param->init_sol[j*nr_class+i];
3041
+ else
3042
+ for(j=0;j<w_size;j++)
3043
+ w[j] = 0;
3044
+
3045
+ train_one(&sub_prob, param, w, weighted_C[i], param->C);
3046
+
3047
+ for(j=0;j<w_size;j++)
3048
+ model_->w[j*nr_class+i] = w[j];
3049
+ }
3050
+ free(w);
3051
+ }
3052
+
3053
+ }
3054
+
3055
+ free(x);
3056
+ free(label);
3057
+ free(start);
3058
+ free(count);
3059
+ free(perm);
3060
+ free(sub_prob.x);
3061
+ free(sub_prob.y);
3062
+ free(weighted_C);
3063
+ }
3064
+ return model_;
3065
+ }
3066
+
3067
+ void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
3068
+ {
3069
+ int i;
3070
+ int *fold_start;
3071
+ int l = prob->l;
3072
+ int *perm = Malloc(int,l);
3073
+ if (nr_fold > l)
3074
+ {
3075
+ nr_fold = l;
3076
+ fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3077
+ }
3078
+ fold_start = Malloc(int,nr_fold+1);
3079
+ for(i=0;i<l;i++) perm[i]=i;
3080
+ for(i=0;i<l;i++)
3081
+ {
3082
+ int j = i+rand()%(l-i);
3083
+ swap(perm[i],perm[j]);
3084
+ }
3085
+ for(i=0;i<=nr_fold;i++)
3086
+ fold_start[i]=i*l/nr_fold;
3087
+
3088
+ for(i=0;i<nr_fold;i++)
3089
+ {
3090
+ int begin = fold_start[i];
3091
+ int end = fold_start[i+1];
3092
+ int j,k;
3093
+ struct problem subprob;
3094
+
3095
+ subprob.bias = prob->bias;
3096
+ subprob.n = prob->n;
3097
+ subprob.l = l-(end-begin);
3098
+ subprob.x = Malloc(struct feature_node*,subprob.l);
3099
+ subprob.y = Malloc(double,subprob.l);
3100
+
3101
+ k=0;
3102
+ for(j=0;j<begin;j++)
3103
+ {
3104
+ subprob.x[k] = prob->x[perm[j]];
3105
+ subprob.y[k] = prob->y[perm[j]];
3106
+ ++k;
3107
+ }
3108
+ for(j=end;j<l;j++)
3109
+ {
3110
+ subprob.x[k] = prob->x[perm[j]];
3111
+ subprob.y[k] = prob->y[perm[j]];
3112
+ ++k;
3113
+ }
3114
+ struct model *submodel = train(&subprob,param);
3115
+ for(j=begin;j<end;j++)
3116
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
3117
+ free_and_destroy_model(&submodel);
3118
+ free(subprob.x);
3119
+ free(subprob.y);
3120
+ }
3121
+ free(fold_start);
3122
+ free(perm);
3123
+ }
3124
+
3125
+
3126
+ void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
3127
+ {
3128
+ // prepare CV folds
3129
+
3130
+ int i;
3131
+ int *fold_start;
3132
+ int l = prob->l;
3133
+ int *perm = Malloc(int, l);
3134
+ struct problem *subprob = Malloc(problem,nr_fold);
3135
+
3136
+ if (nr_fold > l)
3137
+ {
3138
+ nr_fold = l;
3139
+ fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3140
+ }
3141
+ fold_start = Malloc(int,nr_fold+1);
3142
+ for(i=0;i<l;i++) perm[i]=i;
3143
+ for(i=0;i<l;i++)
3144
+ {
3145
+ int j = i+rand()%(l-i);
3146
+ swap(perm[i],perm[j]);
3147
+ }
3148
+ for(i=0;i<=nr_fold;i++)
3149
+ fold_start[i]=i*l/nr_fold;
3150
+
3151
+ for(i=0;i<nr_fold;i++)
3152
+ {
3153
+ int begin = fold_start[i];
3154
+ int end = fold_start[i+1];
3155
+ int j,k;
3156
+
3157
+ subprob[i].bias = prob->bias;
3158
+ subprob[i].n = prob->n;
3159
+ subprob[i].l = l-(end-begin);
3160
+ subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
3161
+ subprob[i].y = Malloc(double,subprob[i].l);
3162
+
3163
+ k=0;
3164
+ for(j=0;j<begin;j++)
3165
+ {
3166
+ subprob[i].x[k] = prob->x[perm[j]];
3167
+ subprob[i].y[k] = prob->y[perm[j]];
3168
+ ++k;
3169
+ }
3170
+ for(j=end;j<l;j++)
3171
+ {
3172
+ subprob[i].x[k] = prob->x[perm[j]];
3173
+ subprob[i].y[k] = prob->y[perm[j]];
3174
+ ++k;
3175
+ }
3176
+
3177
+ }
3178
+
3179
+ struct parameter param_tmp = *param;
3180
+ *best_p = -1;
3181
+ if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
3182
+ {
3183
+ if(start_C <= 0)
3184
+ start_C = calc_start_C(prob, &param_tmp);
3185
+ double max_C = 1024;
3186
+ start_C = min(start_C, max_C);
3187
+ double best_C_tmp, best_score_tmp;
3188
+
3189
+ find_parameter_C(prob, &param_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3190
+
3191
+ *best_C = best_C_tmp;
3192
+ *best_score = best_score_tmp;
3193
+ }
3194
+ else if(param->solver_type == L2R_L2LOSS_SVR)
3195
+ {
3196
+ double max_p = calc_max_p(prob, &param_tmp);
3197
+ int num_p_steps = 20;
3198
+ double max_C = 1048576;
3199
+ *best_score = INF;
3200
+
3201
+ i = num_p_steps-1;
3202
+ if(start_p > 0)
3203
+ i = min((int)(start_p/(max_p/num_p_steps)), i);
3204
+ for(; i >= 0; i--)
3205
+ {
3206
+ param_tmp.p = i*max_p/num_p_steps;
3207
+ double start_C_tmp;
3208
+ if(start_C <= 0)
3209
+ start_C_tmp = calc_start_C(prob, &param_tmp);
3210
+ else
3211
+ start_C_tmp = start_C;
3212
+ start_C_tmp = min(start_C_tmp, max_C);
3213
+ double best_C_tmp, best_score_tmp;
3214
+
3215
+ find_parameter_C(prob, &param_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3216
+
3217
+ if(best_score_tmp < *best_score)
3218
+ {
3219
+ *best_p = param_tmp.p;
3220
+ *best_C = best_C_tmp;
3221
+ *best_score = best_score_tmp;
3222
+ }
3223
+ }
3224
+ }
3225
+
3226
+ free(fold_start);
3227
+ free(perm);
3228
+ for(i=0; i<nr_fold; i++)
3229
+ {
3230
+ free(subprob[i].x);
3231
+ free(subprob[i].y);
3232
+ }
3233
+ free(subprob);
3234
+ }
3235
+
3236
+ double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
3237
+ {
3238
+ int idx;
3239
+ int n;
3240
+ if(model_->bias>=0)
3241
+ n=model_->nr_feature+1;
3242
+ else
3243
+ n=model_->nr_feature;
3244
+ double *w=model_->w;
3245
+ int nr_class=model_->nr_class;
3246
+ int i;
3247
+ int nr_w;
3248
+ if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
3249
+ nr_w = 1;
3250
+ else
3251
+ nr_w = nr_class;
3252
+
3253
+ const feature_node *lx=x;
3254
+ for(i=0;i<nr_w;i++)
3255
+ dec_values[i] = 0;
3256
+ for(; (idx=lx->index)!=-1; lx++)
3257
+ {
3258
+ // the dimension of testing data may exceed that of training
3259
+ if(idx<=n)
3260
+ for(i=0;i<nr_w;i++)
3261
+ dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
3262
+ }
3263
+ if(check_oneclass_model(model_))
3264
+ dec_values[0] -= model_->rho;
3265
+
3266
+ if(nr_class==2)
3267
+ {
3268
+ if(check_regression_model(model_))
3269
+ return dec_values[0];
3270
+ else if(check_oneclass_model(model_))
3271
+ return (dec_values[0]>0)?1:-1;
3272
+ else
3273
+ return (dec_values[0]>0)?model_->label[0]:model_->label[1];
3274
+ }
3275
+ else
3276
+ {
3277
+ int dec_max_idx = 0;
3278
+ for(i=1;i<nr_class;i++)
3279
+ {
3280
+ if(dec_values[i] > dec_values[dec_max_idx])
3281
+ dec_max_idx = i;
3282
+ }
3283
+ return model_->label[dec_max_idx];
3284
+ }
3285
+ }
3286
+
3287
+ double predict(const model *model_, const feature_node *x)
3288
+ {
3289
+ double *dec_values = Malloc(double, model_->nr_class);
3290
+ double label=predict_values(model_, x, dec_values);
3291
+ free(dec_values);
3292
+ return label;
3293
+ }
3294
+
3295
+ double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
3296
+ {
3297
+ if(check_probability_model(model_))
3298
+ {
3299
+ int i;
3300
+ int nr_class=model_->nr_class;
3301
+ int nr_w;
3302
+ if(nr_class==2)
3303
+ nr_w = 1;
3304
+ else
3305
+ nr_w = nr_class;
3306
+
3307
+ double label=predict_values(model_, x, prob_estimates);
3308
+ for(i=0;i<nr_w;i++)
3309
+ prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
3310
+
3311
+ if(nr_class==2) // for binary classification
3312
+ prob_estimates[1]=1.-prob_estimates[0];
3313
+ else
3314
+ {
3315
+ double sum=0;
3316
+ for(i=0; i<nr_class; i++)
3317
+ sum+=prob_estimates[i];
3318
+
3319
+ for(i=0; i<nr_class; i++)
3320
+ prob_estimates[i]=prob_estimates[i]/sum;
3321
+ }
3322
+
3323
+ return label;
3324
+ }
3325
+ else
3326
+ return 0;
3327
+ }
3328
+
3329
+ static const char *solver_type_table[]=
3330
+ {
3331
+ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
3332
+ "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
3333
+ "", "", "",
3334
+ "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL",
3335
+ "", "", "", "", "", "", "",
3336
+ "ONECLASS_SVM", NULL
3337
+ };
3338
+
3339
+ int save_model(const char *model_file_name, const struct model *model_)
3340
+ {
3341
+ int i;
3342
+ int nr_feature=model_->nr_feature;
3343
+ int n;
3344
+ const parameter& param = model_->param;
3345
+
3346
+ if(model_->bias>=0)
3347
+ n=nr_feature+1;
3348
+ else
3349
+ n=nr_feature;
3350
+ int w_size = n;
3351
+ FILE *fp = fopen(model_file_name,"w");
3352
+ if(fp==NULL) return -1;
3353
+
3354
+ char *old_locale = setlocale(LC_ALL, NULL);
3355
+ if (old_locale)
3356
+ {
3357
+ old_locale = strdup(old_locale);
3358
+ }
3359
+ setlocale(LC_ALL, "C");
3360
+
3361
+ int nr_w;
3362
+ if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
3363
+ nr_w=1;
3364
+ else
3365
+ nr_w=model_->nr_class;
3366
+
3367
+ fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
3368
+ fprintf(fp, "nr_class %d\n", model_->nr_class);
3369
+
3370
+ if(model_->label)
3371
+ {
3372
+ fprintf(fp, "label");
3373
+ for(i=0; i<model_->nr_class; i++)
3374
+ fprintf(fp, " %d", model_->label[i]);
3375
+ fprintf(fp, "\n");
3376
+ }
3377
+
3378
+ fprintf(fp, "nr_feature %d\n", nr_feature);
3379
+
3380
+ fprintf(fp, "bias %.17g\n", model_->bias);
3381
+
3382
+ if(check_oneclass_model(model_))
3383
+ fprintf(fp, "rho %.17g\n", model_->rho);
3384
+
3385
+ fprintf(fp, "w\n");
3386
+ for(i=0; i<w_size; i++)
3387
+ {
3388
+ int j;
3389
+ for(j=0; j<nr_w; j++)
3390
+ fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
3391
+ fprintf(fp, "\n");
3392
+ }
3393
+
3394
+ setlocale(LC_ALL, old_locale);
3395
+ free(old_locale);
3396
+
3397
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
3398
+ else return 0;
3399
+ }
3400
+
3401
+ //
3402
+ // FSCANF helps to handle fscanf failures.
3403
+ // Its do-while block avoids the ambiguity when
3404
+ // if (...)
3405
+ // FSCANF();
3406
+ // is used
3407
+ //
3408
+ #define FSCANF(_stream, _format, _var)do\
3409
+ {\
3410
+ if (fscanf(_stream, _format, _var) != 1)\
3411
+ {\
3412
+ fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
3413
+ EXIT_LOAD_MODEL()\
3414
+ }\
3415
+ }while(0)
3416
+ // EXIT_LOAD_MODEL should NOT end with a semicolon.
3417
+ #define EXIT_LOAD_MODEL()\
3418
+ {\
3419
+ setlocale(LC_ALL, old_locale);\
3420
+ free(model_->label);\
3421
+ free(model_);\
3422
+ free(old_locale);\
3423
+ return NULL;\
3424
+ }
3425
+ struct model *load_model(const char *model_file_name)
3426
+ {
3427
+ FILE *fp = fopen(model_file_name,"r");
3428
+ if(fp==NULL) return NULL;
3429
+
3430
+ int i;
3431
+ int nr_feature;
3432
+ int n;
3433
+ int nr_class;
3434
+ double bias;
3435
+ double rho;
3436
+ model *model_ = Malloc(model,1);
3437
+ parameter& param = model_->param;
3438
+ // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
3439
+ param.nr_weight = 0;
3440
+ param.weight_label = NULL;
3441
+ param.weight = NULL;
3442
+ param.init_sol = NULL;
3443
+
3444
+ model_->label = NULL;
3445
+
3446
+ char *old_locale = setlocale(LC_ALL, NULL);
3447
+ if (old_locale)
3448
+ {
3449
+ old_locale = strdup(old_locale);
3450
+ }
3451
+ setlocale(LC_ALL, "C");
3452
+
3453
+ char cmd[81];
3454
+ while(1)
3455
+ {
3456
+ FSCANF(fp,"%80s",cmd);
3457
+ if(strcmp(cmd,"solver_type")==0)
3458
+ {
3459
+ FSCANF(fp,"%80s",cmd);
3460
+ int i;
3461
+ for(i=0;solver_type_table[i];i++)
3462
+ {
3463
+ if(strcmp(solver_type_table[i],cmd)==0)
3464
+ {
3465
+ param.solver_type=i;
3466
+ break;
3467
+ }
3468
+ }
3469
+ if(solver_type_table[i] == NULL)
3470
+ {
3471
+ fprintf(stderr,"unknown solver type.\n");
3472
+ EXIT_LOAD_MODEL()
3473
+ }
3474
+ }
3475
+ else if(strcmp(cmd,"nr_class")==0)
3476
+ {
3477
+ FSCANF(fp,"%d",&nr_class);
3478
+ model_->nr_class=nr_class;
3479
+ }
3480
+ else if(strcmp(cmd,"nr_feature")==0)
3481
+ {
3482
+ FSCANF(fp,"%d",&nr_feature);
3483
+ model_->nr_feature=nr_feature;
3484
+ }
3485
+ else if(strcmp(cmd,"bias")==0)
3486
+ {
3487
+ FSCANF(fp,"%lf",&bias);
3488
+ model_->bias=bias;
3489
+ }
3490
+ else if(strcmp(cmd,"rho")==0)
3491
+ {
3492
+ FSCANF(fp,"%lf",&rho);
3493
+ model_->rho=rho;
3494
+ }
3495
+ else if(strcmp(cmd,"w")==0)
3496
+ {
3497
+ break;
3498
+ }
3499
+ else if(strcmp(cmd,"label")==0)
3500
+ {
3501
+ int nr_class = model_->nr_class;
3502
+ model_->label = Malloc(int,nr_class);
3503
+ for(int i=0;i<nr_class;i++)
3504
+ FSCANF(fp,"%d",&model_->label[i]);
3505
+ }
3506
+ else
3507
+ {
3508
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
3509
+ EXIT_LOAD_MODEL()
3510
+ }
3511
+ }
3512
+
3513
+ nr_feature=model_->nr_feature;
3514
+ if(model_->bias>=0)
3515
+ n=nr_feature+1;
3516
+ else
3517
+ n=nr_feature;
3518
+ int w_size = n;
3519
+ int nr_w;
3520
+ if(nr_class==2 && param.solver_type != MCSVM_CS)
3521
+ nr_w = 1;
3522
+ else
3523
+ nr_w = nr_class;
3524
+
3525
+ model_->w=Malloc(double, w_size*nr_w);
3526
+ for(i=0; i<w_size; i++)
3527
+ {
3528
+ int j;
3529
+ for(j=0; j<nr_w; j++)
3530
+ FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
3531
+ }
3532
+
3533
+ setlocale(LC_ALL, old_locale);
3534
+ free(old_locale);
3535
+
3536
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
3537
+
3538
+ return model_;
3539
+ }
3540
+
3541
+ int get_nr_feature(const model *model_)
3542
+ {
3543
+ return model_->nr_feature;
3544
+ }
3545
+
3546
+ int get_nr_class(const model *model_)
3547
+ {
3548
+ return model_->nr_class;
3549
+ }
3550
+
3551
+ void get_labels(const model *model_, int* label)
3552
+ {
3553
+ if (model_->label != NULL)
3554
+ for(int i=0;i<model_->nr_class;i++)
3555
+ label[i] = model_->label[i];
3556
+ }
3557
+
3558
+ // use inline here for better performance (around 20% faster than the non-inline one)
3559
+ static inline double get_w_value(const struct model *model_, int idx, int label_idx)
3560
+ {
3561
+ int nr_class = model_->nr_class;
3562
+ int solver_type = model_->param.solver_type;
3563
+ const double *w = model_->w;
3564
+
3565
+ if(idx < 0 || idx > model_->nr_feature)
3566
+ return 0;
3567
+ if(check_regression_model(model_) || check_oneclass_model(model_))
3568
+ return w[idx];
3569
+ else
3570
+ {
3571
+ if(label_idx < 0 || label_idx >= nr_class)
3572
+ return 0;
3573
+ if(nr_class == 2 && solver_type != MCSVM_CS)
3574
+ {
3575
+ if(label_idx == 0)
3576
+ return w[idx];
3577
+ else
3578
+ return -w[idx];
3579
+ }
3580
+ else
3581
+ return w[idx*nr_class+label_idx];
3582
+ }
3583
+ }
3584
+
3585
+ // feat_idx: starting from 1 to nr_feature
3586
+ // label_idx: starting from 0 to nr_class-1 for classification models;
3587
+ // for regression and one-class SVM models, label_idx is
3588
+ // ignored.
3589
+ double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3590
+ {
3591
+ if(feat_idx > model_->nr_feature)
3592
+ return 0;
3593
+ return get_w_value(model_, feat_idx-1, label_idx);
3594
+ }
3595
+
3596
+ double get_decfun_bias(const struct model *model_, int label_idx)
3597
+ {
3598
+ if(check_oneclass_model(model_))
3599
+ {
3600
+ fprintf(stderr, "ERROR: get_decfun_bias can not be called for a one-class SVM model\n");
3601
+ return 0;
3602
+ }
3603
+ int bias_idx = model_->nr_feature;
3604
+ double bias = model_->bias;
3605
+ if(bias <= 0)
3606
+ return 0;
3607
+ else
3608
+ return bias*get_w_value(model_, bias_idx, label_idx);
3609
+ }
3610
+
3611
+ double get_decfun_rho(const struct model *model_)
3612
+ {
3613
+ if(check_oneclass_model(model_))
3614
+ return model_->rho;
3615
+ else
3616
+ {
3617
+ fprintf(stderr, "ERROR: get_decfun_rho can be called only for a one-class SVM model\n");
3618
+ return 0;
3619
+ }
3620
+ }
3621
+
3622
+ void free_model_content(struct model *model_ptr)
3623
+ {
3624
+ if(model_ptr->w != NULL)
3625
+ free(model_ptr->w);
3626
+ if(model_ptr->label != NULL)
3627
+ free(model_ptr->label);
3628
+ }
3629
+
3630
+ void free_and_destroy_model(struct model **model_ptr_ptr)
3631
+ {
3632
+ struct model *model_ptr = *model_ptr_ptr;
3633
+ if(model_ptr != NULL)
3634
+ {
3635
+ free_model_content(model_ptr);
3636
+ free(model_ptr);
3637
+ }
3638
+ }
3639
+
3640
+ void destroy_param(parameter* param)
3641
+ {
3642
+ if(param->weight_label != NULL)
3643
+ free(param->weight_label);
3644
+ if(param->weight != NULL)
3645
+ free(param->weight);
3646
+ if(param->init_sol != NULL)
3647
+ free(param->init_sol);
3648
+ }
3649
+
3650
+ const char *check_parameter(const problem *prob, const parameter *param)
3651
+ {
3652
+ if(param->eps <= 0)
3653
+ return "eps <= 0";
3654
+
3655
+ if(param->C <= 0)
3656
+ return "C <= 0";
3657
+
3658
+ if(param->p < 0)
3659
+ return "p < 0";
3660
+
3661
+ if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM)
3662
+ return "prob->bias >=0, but this is ignored in ONECLASS_SVM";
3663
+
3664
+ if(param->regularize_bias == 0)
3665
+ {
3666
+ if(prob->bias != 1.0)
3667
+ return "To not regularize bias, must specify -B 1 along with -R";
3668
+ if(param->solver_type != L2R_LR
3669
+ && param->solver_type != L2R_L2LOSS_SVC
3670
+ && param->solver_type != L1R_L2LOSS_SVC
3671
+ && param->solver_type != L1R_LR
3672
+ && param->solver_type != L2R_L2LOSS_SVR)
3673
+ return "-R option supported only for solver L2R_LR, L2R_L2LOSS_SVC, L1R_L2LOSS_SVC, L1R_LR, and L2R_L2LOSS_SVR";
3674
+ }
3675
+
3676
+ if(param->solver_type != L2R_LR
3677
+ && param->solver_type != L2R_L2LOSS_SVC_DUAL
3678
+ && param->solver_type != L2R_L2LOSS_SVC
3679
+ && param->solver_type != L2R_L1LOSS_SVC_DUAL
3680
+ && param->solver_type != MCSVM_CS
3681
+ && param->solver_type != L1R_L2LOSS_SVC
3682
+ && param->solver_type != L1R_LR
3683
+ && param->solver_type != L2R_LR_DUAL
3684
+ && param->solver_type != L2R_L2LOSS_SVR
3685
+ && param->solver_type != L2R_L2LOSS_SVR_DUAL
3686
+ && param->solver_type != L2R_L1LOSS_SVR_DUAL
3687
+ && param->solver_type != ONECLASS_SVM)
3688
+ return "unknown solver type";
3689
+
3690
+ if(param->init_sol != NULL
3691
+ && param->solver_type != L2R_LR
3692
+ && param->solver_type != L2R_L2LOSS_SVC
3693
+ && param->solver_type != L2R_L2LOSS_SVR)
3694
+ return "Initial-solution specification supported only for solvers L2R_LR, L2R_L2LOSS_SVC, and L2R_L2LOSS_SVR";
3695
+
3696
+ return NULL;
3697
+ }
3698
+
3699
+ int check_probability_model(const struct model *model_)
3700
+ {
3701
+ return (model_->param.solver_type==L2R_LR ||
3702
+ model_->param.solver_type==L2R_LR_DUAL ||
3703
+ model_->param.solver_type==L1R_LR);
3704
+ }
3705
+
3706
+ int check_regression_model(const struct model *model_)
3707
+ {
3708
+ return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3709
+ model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3710
+ model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3711
+ }
3712
+
3713
+ int check_oneclass_model(const struct model *model_)
3714
+ {
3715
+ return model_->param.solver_type == ONECLASS_SVM;
3716
+ }
3717
+
3718
+ void set_print_string_function(void (*print_func)(const char*))
3719
+ {
3720
+ if (print_func == NULL)
3721
+ liblinear_print_string = &print_string_stdout;
3722
+ else
3723
+ liblinear_print_string = print_func;
3724
+ }
3725
+