numo-liblinear 0.3.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,57 @@
1
+ #include "blas.h"
2
+
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
8
+ int *incy)
9
+ {
10
+ long int i, m, ix, iy, nn, iincx, iincy;
11
+ register double ssa;
12
+
13
+ /* constant times a vector plus a vector.
14
+ uses unrolled loop for increments equal to one.
15
+ jack dongarra, linpack, 3/11/78.
16
+ modified 12/3/93, array(1) declarations changed to array(*) */
17
+
18
+ /* Dereference inputs */
19
+ nn = *n;
20
+ ssa = *sa;
21
+ iincx = *incx;
22
+ iincy = *incy;
23
+
24
+ if( nn > 0 && ssa != 0.0 )
25
+ {
26
+ if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
27
+ {
28
+ m = nn-3;
29
+ for (i = 0; i < m; i += 4)
30
+ {
31
+ sy[i] += ssa * sx[i];
32
+ sy[i+1] += ssa * sx[i+1];
33
+ sy[i+2] += ssa * sx[i+2];
34
+ sy[i+3] += ssa * sx[i+3];
35
+ }
36
+ for ( ; i < nn; ++i) /* clean-up loop */
37
+ sy[i] += ssa * sx[i];
38
+ }
39
+ else /* code for unequal increments or equal increments not equal to 1 */
40
+ {
41
+ ix = iincx >= 0 ? 0 : (1 - nn) * iincx;
42
+ iy = iincy >= 0 ? 0 : (1 - nn) * iincy;
43
+ for (i = 0; i < nn; i++)
44
+ {
45
+ sy[iy] += ssa * sx[ix];
46
+ ix += iincx;
47
+ iy += iincy;
48
+ }
49
+ }
50
+ }
51
+
52
+ return 0;
53
+ } /* daxpy_ */
54
+
55
+ #ifdef __cplusplus
56
+ }
57
+ #endif
@@ -0,0 +1,58 @@
1
+ #include "blas.h"
2
+
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
8
+ {
9
+ long int i, m, nn, iincx, iincy;
10
+ double stemp;
11
+ long int ix, iy;
12
+
13
+ /* forms the dot product of two vectors.
14
+ uses unrolled loops for increments equal to one.
15
+ jack dongarra, linpack, 3/11/78.
16
+ modified 12/3/93, array(1) declarations changed to array(*) */
17
+
18
+ /* Dereference inputs */
19
+ nn = *n;
20
+ iincx = *incx;
21
+ iincy = *incy;
22
+
23
+ stemp = 0.0;
24
+ if (nn > 0)
25
+ {
26
+ if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
27
+ {
28
+ m = nn-4;
29
+ for (i = 0; i < m; i += 5)
30
+ stemp += sx[i] * sy[i] + sx[i+1] * sy[i+1] + sx[i+2] * sy[i+2] +
31
+ sx[i+3] * sy[i+3] + sx[i+4] * sy[i+4];
32
+
33
+ for ( ; i < nn; i++) /* clean-up loop */
34
+ stemp += sx[i] * sy[i];
35
+ }
36
+ else /* code for unequal increments or equal increments not equal to 1 */
37
+ {
38
+ ix = 0;
39
+ iy = 0;
40
+ if (iincx < 0)
41
+ ix = (1 - nn) * iincx;
42
+ if (iincy < 0)
43
+ iy = (1 - nn) * iincy;
44
+ for (i = 0; i < nn; i++)
45
+ {
46
+ stemp += sx[ix] * sy[iy];
47
+ ix += iincx;
48
+ iy += iincy;
49
+ }
50
+ }
51
+ }
52
+
53
+ return stemp;
54
+ } /* ddot_ */
55
+
56
+ #ifdef __cplusplus
57
+ }
58
+ #endif
@@ -0,0 +1,70 @@
1
+ #include <math.h> /* Needed for fabs() and sqrt() */
2
+ #include "blas.h"
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ double dnrm2_(int *n, double *x, int *incx)
9
+ {
10
+ long int ix, nn, iincx;
11
+ double norm, scale, absxi, ssq, temp;
12
+
13
+ /* DNRM2 returns the euclidean norm of a vector via the function
14
+ name, so that
15
+
16
+ DNRM2 := sqrt( x'*x )
17
+
18
+ -- This version written on 25-October-1982.
19
+ Modified on 14-October-1993 to inline the call to SLASSQ.
20
+ Sven Hammarling, Nag Ltd. */
21
+
22
+ /* Dereference inputs */
23
+ nn = *n;
24
+ iincx = *incx;
25
+
26
+ if( nn > 0 && iincx > 0 )
27
+ {
28
+ if (nn == 1)
29
+ {
30
+ norm = fabs(x[0]);
31
+ }
32
+ else
33
+ {
34
+ scale = 0.0;
35
+ ssq = 1.0;
36
+
37
+ /* The following loop is equivalent to this call to the LAPACK
38
+ auxiliary routine: CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
39
+
40
+ for (ix=(nn-1)*iincx; ix>=0; ix-=iincx)
41
+ {
42
+ if (x[ix] != 0.0)
43
+ {
44
+ absxi = fabs(x[ix]);
45
+ if (scale < absxi)
46
+ {
47
+ temp = scale / absxi;
48
+ ssq = ssq * (temp * temp) + 1.0;
49
+ scale = absxi;
50
+ }
51
+ else
52
+ {
53
+ temp = absxi / scale;
54
+ ssq += temp * temp;
55
+ }
56
+ }
57
+ }
58
+ norm = scale * sqrt(ssq);
59
+ }
60
+ }
61
+ else
62
+ norm = 0.0;
63
+
64
+ return norm;
65
+
66
+ } /* dnrm2_ */
67
+
68
+ #ifdef __cplusplus
69
+ }
70
+ #endif
@@ -0,0 +1,52 @@
1
+ #include "blas.h"
2
+
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ int dscal_(int *n, double *sa, double *sx, int *incx)
8
+ {
9
+ long int i, m, nincx, nn, iincx;
10
+ double ssa;
11
+
12
+ /* scales a vector by a constant.
13
+ uses unrolled loops for increment equal to 1.
14
+ jack dongarra, linpack, 3/11/78.
15
+ modified 3/93 to return if incx .le. 0.
16
+ modified 12/3/93, array(1) declarations changed to array(*) */
17
+
18
+ /* Dereference inputs */
19
+ nn = *n;
20
+ iincx = *incx;
21
+ ssa = *sa;
22
+
23
+ if (nn > 0 && iincx > 0)
24
+ {
25
+ if (iincx == 1) /* code for increment equal to 1 */
26
+ {
27
+ m = nn-4;
28
+ for (i = 0; i < m; i += 5)
29
+ {
30
+ sx[i] = ssa * sx[i];
31
+ sx[i+1] = ssa * sx[i+1];
32
+ sx[i+2] = ssa * sx[i+2];
33
+ sx[i+3] = ssa * sx[i+3];
34
+ sx[i+4] = ssa * sx[i+4];
35
+ }
36
+ for ( ; i < nn; ++i) /* clean-up loop */
37
+ sx[i] = ssa * sx[i];
38
+ }
39
+ else /* code for increment not equal to 1 */
40
+ {
41
+ nincx = nn * iincx;
42
+ for (i = 0; i < nincx; i += iincx)
43
+ sx[i] = ssa * sx[i];
44
+ }
45
+ }
46
+
47
+ return 0;
48
+ } /* dscal_ */
49
+
50
+ #ifdef __cplusplus
51
+ }
52
+ #endif
@@ -0,0 +1,3725 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <stdarg.h>
6
+ #include <locale.h>
7
+ #include "linear.h"
8
+ #include "newton.h"
9
+ int liblinear_version = LIBLINEAR_VERSION;
10
+ typedef signed char schar;
11
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
12
+ #ifndef min
13
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
14
+ #endif
15
+ #ifndef max
16
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
17
+ #endif
18
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
19
+ {
20
+ dst = new T[n];
21
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
22
+ }
23
+ #define INF HUGE_VAL
24
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
25
+
26
+ static void print_string_stdout(const char *s)
27
+ {
28
+ fputs(s,stdout);
29
+ fflush(stdout);
30
+ }
31
+ static void print_null(const char *s) {}
32
+
33
+ static void (*liblinear_print_string) (const char *) = &print_string_stdout;
34
+
35
+ #if 1
36
+ static void info(const char *fmt,...)
37
+ {
38
+ char buf[BUFSIZ];
39
+ va_list ap;
40
+ va_start(ap,fmt);
41
+ vsprintf(buf,fmt,ap);
42
+ va_end(ap);
43
+ (*liblinear_print_string)(buf);
44
+ }
45
+ #else
46
+ static void info(const char *fmt,...) {}
47
+ #endif
48
+ class sparse_operator
49
+ {
50
+ public:
51
+ static double nrm2_sq(const feature_node *x)
52
+ {
53
+ double ret = 0;
54
+ while(x->index != -1)
55
+ {
56
+ ret += x->value*x->value;
57
+ x++;
58
+ }
59
+ return (ret);
60
+ }
61
+
62
+ static double dot(const double *s, const feature_node *x)
63
+ {
64
+ double ret = 0;
65
+ while(x->index != -1)
66
+ {
67
+ ret += s[x->index-1]*x->value;
68
+ x++;
69
+ }
70
+ return (ret);
71
+ }
72
+
73
+ static double sparse_dot(const feature_node *x1, const feature_node *x2)
74
+ {
75
+ double ret = 0;
76
+ while(x1->index != -1 && x2->index != -1)
77
+ {
78
+ if(x1->index == x2->index)
79
+ {
80
+ ret += x1->value * x2->value;
81
+ ++x1;
82
+ ++x2;
83
+ }
84
+ else
85
+ {
86
+ if(x1->index > x2->index)
87
+ ++x2;
88
+ else
89
+ ++x1;
90
+ }
91
+ }
92
+ return (ret);
93
+ }
94
+
95
+ static void axpy(const double a, const feature_node *x, double *y)
96
+ {
97
+ while(x->index != -1)
98
+ {
99
+ y[x->index-1] += a*x->value;
100
+ x++;
101
+ }
102
+ }
103
+ };
104
+
105
+ // L2-regularized empirical risk minimization
106
+ // min_w w^Tw/2 + \sum C_i \xi(w^Tx_i), where \xi() is the loss
107
+
108
+ class l2r_erm_fun: public function
109
+ {
110
+ public:
111
+ l2r_erm_fun(const problem *prob, const parameter *param, double *C);
112
+ ~l2r_erm_fun();
113
+
114
+ double fun(double *w);
115
+ double linesearch_and_update(double *w, double *d, double *f, double *g, double alpha);
116
+ int get_nr_variable(void);
117
+
118
+ protected:
119
+ virtual double C_times_loss(int i, double wx_i) = 0;
120
+ void Xv(double *v, double *Xv);
121
+ void XTv(double *v, double *XTv);
122
+
123
+ double *C;
124
+ const problem *prob;
125
+ double *wx;
126
+ double *tmp; // a working array
127
+ double wTw;
128
+ int regularize_bias;
129
+ };
130
+
131
+ l2r_erm_fun::l2r_erm_fun(const problem *prob, const parameter *param, double *C)
132
+ {
133
+ int l=prob->l;
134
+
135
+ this->prob = prob;
136
+
137
+ wx = new double[l];
138
+ tmp = new double[l];
139
+ this->C = C;
140
+ this->regularize_bias = param->regularize_bias;
141
+ }
142
+
143
+ l2r_erm_fun::~l2r_erm_fun()
144
+ {
145
+ delete[] wx;
146
+ delete[] tmp;
147
+ }
148
+
149
+ double l2r_erm_fun::fun(double *w)
150
+ {
151
+ int i;
152
+ double f=0;
153
+ int l=prob->l;
154
+ int w_size=get_nr_variable();
155
+
156
+ wTw = 0;
157
+ Xv(w, wx);
158
+
159
+ for(i=0;i<w_size;i++)
160
+ wTw += w[i]*w[i];
161
+ if(regularize_bias == 0)
162
+ wTw -= w[w_size-1]*w[w_size-1];
163
+ for(i=0;i<l;i++)
164
+ f += C_times_loss(i, wx[i]);
165
+ f = f + 0.5 * wTw;
166
+
167
+ return(f);
168
+ }
169
+
170
+ int l2r_erm_fun::get_nr_variable(void)
171
+ {
172
+ return prob->n;
173
+ }
174
+
175
+ // On entry *f must be the function value of w
176
+ // On exit w is updated and *f is the new function value
177
+ double l2r_erm_fun::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
178
+ {
179
+ int i;
180
+ int l = prob->l;
181
+ double sTs = 0;
182
+ double wTs = 0;
183
+ double gTs = 0;
184
+ double eta = 0.01;
185
+ int w_size = get_nr_variable();
186
+ int max_num_linesearch = 20;
187
+ double fold = *f;
188
+ Xv(s, tmp);
189
+
190
+ for (i=0;i<w_size;i++)
191
+ {
192
+ sTs += s[i] * s[i];
193
+ wTs += s[i] * w[i];
194
+ gTs += s[i] * g[i];
195
+ }
196
+ if(regularize_bias == 0)
197
+ {
198
+ // bias not used in calculating (w + \alpha s)^T (w + \alpha s)
199
+ sTs -= s[w_size-1] * s[w_size-1];
200
+ wTs -= s[w_size-1] * w[w_size-1];
201
+ }
202
+
203
+ int num_linesearch = 0;
204
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
205
+ {
206
+ double loss = 0;
207
+ for(i=0;i<l;i++)
208
+ {
209
+ double inner_product = tmp[i] * alpha + wx[i];
210
+ loss += C_times_loss(i, inner_product);
211
+ }
212
+ *f = loss + (alpha * alpha * sTs + wTw) / 2.0 + alpha * wTs;
213
+ if (*f - fold <= eta * alpha * gTs)
214
+ {
215
+ for (i=0;i<l;i++)
216
+ wx[i] += alpha * tmp[i];
217
+ break;
218
+ }
219
+ else
220
+ alpha *= 0.5;
221
+ }
222
+
223
+ if (num_linesearch >= max_num_linesearch)
224
+ {
225
+ *f = fold;
226
+ return 0;
227
+ }
228
+ else
229
+ for (i=0;i<w_size;i++)
230
+ w[i] += alpha * s[i];
231
+
232
+ wTw += alpha * alpha * sTs + 2* alpha * wTs;
233
+ return alpha;
234
+ }
235
+
236
+ void l2r_erm_fun::Xv(double *v, double *Xv)
237
+ {
238
+ int i;
239
+ int l=prob->l;
240
+ feature_node **x=prob->x;
241
+
242
+ for(i=0;i<l;i++)
243
+ Xv[i]=sparse_operator::dot(v, x[i]);
244
+ }
245
+
246
+ void l2r_erm_fun::XTv(double *v, double *XTv)
247
+ {
248
+ int i;
249
+ int l=prob->l;
250
+ int w_size=get_nr_variable();
251
+ feature_node **x=prob->x;
252
+
253
+ for(i=0;i<w_size;i++)
254
+ XTv[i]=0;
255
+ for(i=0;i<l;i++)
256
+ sparse_operator::axpy(v[i], x[i], XTv);
257
+ }
258
+
259
+ class l2r_lr_fun: public l2r_erm_fun
260
+ {
261
+ public:
262
+ l2r_lr_fun(const problem *prob, const parameter *param, double *C);
263
+ ~l2r_lr_fun();
264
+
265
+ void grad(double *w, double *g);
266
+ void Hv(double *s, double *Hs);
267
+
268
+ void get_diag_preconditioner(double *M);
269
+
270
+ private:
271
+ double *D;
272
+ double C_times_loss(int i, double wx_i);
273
+ };
274
+
275
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, const parameter *param, double *C):
276
+ l2r_erm_fun(prob, param, C)
277
+ {
278
+ int l=prob->l;
279
+ D = new double[l];
280
+ }
281
+
282
+ l2r_lr_fun::~l2r_lr_fun()
283
+ {
284
+ delete[] D;
285
+ }
286
+
287
+ double l2r_lr_fun::C_times_loss(int i, double wx_i)
288
+ {
289
+ double ywx_i = wx_i * prob->y[i];
290
+ if (ywx_i >= 0)
291
+ return C[i]*log(1 + exp(-ywx_i));
292
+ else
293
+ return C[i]*(-ywx_i + log(1 + exp(ywx_i)));
294
+ }
295
+
296
+ void l2r_lr_fun::grad(double *w, double *g)
297
+ {
298
+ int i;
299
+ double *y=prob->y;
300
+ int l=prob->l;
301
+ int w_size=get_nr_variable();
302
+
303
+ for(i=0;i<l;i++)
304
+ {
305
+ tmp[i] = 1/(1 + exp(-y[i]*wx[i]));
306
+ D[i] = tmp[i]*(1-tmp[i]);
307
+ tmp[i] = C[i]*(tmp[i]-1)*y[i];
308
+ }
309
+ XTv(tmp, g);
310
+
311
+ for(i=0;i<w_size;i++)
312
+ g[i] = w[i] + g[i];
313
+ if(regularize_bias == 0)
314
+ g[w_size-1] -= w[w_size-1];
315
+ }
316
+
317
+ void l2r_lr_fun::get_diag_preconditioner(double *M)
318
+ {
319
+ int i;
320
+ int l = prob->l;
321
+ int w_size=get_nr_variable();
322
+ feature_node **x = prob->x;
323
+
324
+ for (i=0; i<w_size; i++)
325
+ M[i] = 1;
326
+ if(regularize_bias == 0)
327
+ M[w_size-1] = 0;
328
+
329
+ for (i=0; i<l; i++)
330
+ {
331
+ feature_node *xi = x[i];
332
+ while (xi->index!=-1)
333
+ {
334
+ M[xi->index-1] += xi->value*xi->value*C[i]*D[i];
335
+ xi++;
336
+ }
337
+ }
338
+ }
339
+
340
+ void l2r_lr_fun::Hv(double *s, double *Hs)
341
+ {
342
+ int i;
343
+ int l=prob->l;
344
+ int w_size=get_nr_variable();
345
+ feature_node **x=prob->x;
346
+
347
+ for(i=0;i<w_size;i++)
348
+ Hs[i] = 0;
349
+ for(i=0;i<l;i++)
350
+ {
351
+ feature_node * const xi=x[i];
352
+ double xTs = sparse_operator::dot(s, xi);
353
+
354
+ xTs = C[i]*D[i]*xTs;
355
+
356
+ sparse_operator::axpy(xTs, xi, Hs);
357
+ }
358
+ for(i=0;i<w_size;i++)
359
+ Hs[i] = s[i] + Hs[i];
360
+ if(regularize_bias == 0)
361
+ Hs[w_size-1] -= s[w_size-1];
362
+ }
363
+
364
+ class l2r_l2_svc_fun: public l2r_erm_fun
365
+ {
366
+ public:
367
+ l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C);
368
+ ~l2r_l2_svc_fun();
369
+
370
+ void grad(double *w, double *g);
371
+ void Hv(double *s, double *Hs);
372
+
373
+ void get_diag_preconditioner(double *M);
374
+
375
+ protected:
376
+ void subXTv(double *v, double *XTv);
377
+
378
+ int *I;
379
+ int sizeI;
380
+
381
+ private:
382
+ double C_times_loss(int i, double wx_i);
383
+ };
384
+
385
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C):
386
+ l2r_erm_fun(prob, param, C)
387
+ {
388
+ I = new int[prob->l];
389
+ }
390
+
391
+ l2r_l2_svc_fun::~l2r_l2_svc_fun()
392
+ {
393
+ delete[] I;
394
+ }
395
+
396
+ double l2r_l2_svc_fun::C_times_loss(int i, double wx_i)
397
+ {
398
+ double d = 1 - prob->y[i] * wx_i;
399
+ if (d > 0)
400
+ return C[i] * d * d;
401
+ else
402
+ return 0;
403
+ }
404
+
405
+ void l2r_l2_svc_fun::grad(double *w, double *g)
406
+ {
407
+ int i;
408
+ double *y=prob->y;
409
+ int l=prob->l;
410
+ int w_size=get_nr_variable();
411
+
412
+ sizeI = 0;
413
+ for (i=0;i<l;i++)
414
+ {
415
+ tmp[i] = wx[i] * y[i];
416
+ if (tmp[i] < 1)
417
+ {
418
+ tmp[sizeI] = C[i]*y[i]*(tmp[i]-1);
419
+ I[sizeI] = i;
420
+ sizeI++;
421
+ }
422
+ }
423
+ subXTv(tmp, g);
424
+
425
+ for(i=0;i<w_size;i++)
426
+ g[i] = w[i] + 2*g[i];
427
+ if(regularize_bias == 0)
428
+ g[w_size-1] -= w[w_size-1];
429
+ }
430
+
431
+ void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
432
+ {
433
+ int i;
434
+ int w_size=get_nr_variable();
435
+ feature_node **x = prob->x;
436
+
437
+ for (i=0; i<w_size; i++)
438
+ M[i] = 1;
439
+ if(regularize_bias == 0)
440
+ M[w_size-1] = 0;
441
+
442
+ for (i=0; i<sizeI; i++)
443
+ {
444
+ int idx = I[i];
445
+ feature_node *xi = x[idx];
446
+ while (xi->index!=-1)
447
+ {
448
+ M[xi->index-1] += xi->value*xi->value*C[idx]*2;
449
+ xi++;
450
+ }
451
+ }
452
+ }
453
+
454
+ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
455
+ {
456
+ int i;
457
+ int w_size=get_nr_variable();
458
+ feature_node **x=prob->x;
459
+
460
+ for(i=0;i<w_size;i++)
461
+ Hs[i]=0;
462
+ for(i=0;i<sizeI;i++)
463
+ {
464
+ feature_node * const xi=x[I[i]];
465
+ double xTs = sparse_operator::dot(s, xi);
466
+
467
+ xTs = C[I[i]]*xTs;
468
+
469
+ sparse_operator::axpy(xTs, xi, Hs);
470
+ }
471
+ for(i=0;i<w_size;i++)
472
+ Hs[i] = s[i] + 2*Hs[i];
473
+ if(regularize_bias == 0)
474
+ Hs[w_size-1] -= s[w_size-1];
475
+ }
476
+
477
+ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
478
+ {
479
+ int i;
480
+ int w_size=get_nr_variable();
481
+ feature_node **x=prob->x;
482
+
483
+ for(i=0;i<w_size;i++)
484
+ XTv[i]=0;
485
+ for(i=0;i<sizeI;i++)
486
+ sparse_operator::axpy(v[i], x[I[i]], XTv);
487
+ }
488
+
489
+ class l2r_l2_svr_fun: public l2r_l2_svc_fun
490
+ {
491
+ public:
492
+ l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C);
493
+
494
+ void grad(double *w, double *g);
495
+
496
+ private:
497
+ double C_times_loss(int i, double wx_i);
498
+ double p;
499
+ };
500
+
501
+ l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C):
502
+ l2r_l2_svc_fun(prob, param, C)
503
+ {
504
+ this->p = param->p;
505
+ this->regularize_bias = param->regularize_bias;
506
+ }
507
+
508
+ double l2r_l2_svr_fun::C_times_loss(int i, double wx_i)
509
+ {
510
+ double d = wx_i - prob->y[i];
511
+ if(d < -p)
512
+ return C[i]*(d+p)*(d+p);
513
+ else if(d > p)
514
+ return C[i]*(d-p)*(d-p);
515
+ return 0;
516
+ }
517
+
518
+ void l2r_l2_svr_fun::grad(double *w, double *g)
519
+ {
520
+ int i;
521
+ double *y=prob->y;
522
+ int l=prob->l;
523
+ int w_size=get_nr_variable();
524
+ double d;
525
+
526
+ sizeI = 0;
527
+ for(i=0;i<l;i++)
528
+ {
529
+ d = wx[i] - y[i];
530
+
531
+ // generate index set I
532
+ if(d < -p)
533
+ {
534
+ tmp[sizeI] = C[i]*(d+p);
535
+ I[sizeI] = i;
536
+ sizeI++;
537
+ }
538
+ else if(d > p)
539
+ {
540
+ tmp[sizeI] = C[i]*(d-p);
541
+ I[sizeI] = i;
542
+ sizeI++;
543
+ }
544
+
545
+ }
546
+ subXTv(tmp, g);
547
+
548
+ for(i=0;i<w_size;i++)
549
+ g[i] = w[i] + 2*g[i];
550
+ if(regularize_bias == 0)
551
+ g[w_size-1] -= w[w_size-1];
552
+ }
553
+
554
+ // A coordinate descent algorithm for
555
+ // multi-class support vector machines by Crammer and Singer
556
+ //
557
+ // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
558
+ // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
559
+ //
560
+ // where e^m_i = 0 if y_i = m,
561
+ // e^m_i = 1 if y_i != m,
562
+ // C^m_i = C if m = y_i,
563
+ // C^m_i = 0 if m != y_i,
564
+ // and w_m(\alpha) = \sum_i \alpha^m_i x_i
565
+ //
566
+ // Given:
567
+ // x, y, C
568
+ // eps is the stopping tolerance
569
+ //
570
+ // solution will be put in w
571
+ //
572
+ // See Appendix of LIBLINEAR paper, Fan et al. (2008)
573
+
574
+ #define GETI(i) ((int) prob->y[i])
575
+ // To support weights for instances, use GETI(i) (i)
576
+
577
+ class Solver_MCSVM_CS
578
+ {
579
+ public:
580
+ Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
581
+ ~Solver_MCSVM_CS();
582
+ void Solve(double *w);
583
+ private:
584
+ void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
585
+ bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
586
+ double *B, *C, *G;
587
+ int w_size, l;
588
+ int nr_class;
589
+ int max_iter;
590
+ double eps;
591
+ const problem *prob;
592
+ };
593
+
594
+ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
595
+ {
596
+ this->w_size = prob->n;
597
+ this->l = prob->l;
598
+ this->nr_class = nr_class;
599
+ this->eps = eps;
600
+ this->max_iter = max_iter;
601
+ this->prob = prob;
602
+ this->B = new double[nr_class];
603
+ this->G = new double[nr_class];
604
+ this->C = weighted_C;
605
+ }
606
+
607
+ Solver_MCSVM_CS::~Solver_MCSVM_CS()
608
+ {
609
+ delete[] B;
610
+ delete[] G;
611
+ }
612
+
613
+ int compare_double(const void *a, const void *b)
614
+ {
615
+ if(*(double *)a > *(double *)b)
616
+ return -1;
617
+ if(*(double *)a < *(double *)b)
618
+ return 1;
619
+ return 0;
620
+ }
621
+
622
+ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
623
+ {
624
+ int r;
625
+ double *D;
626
+
627
+ clone(D, B, active_i);
628
+ if(yi < active_i)
629
+ D[yi] += A_i*C_yi;
630
+ qsort(D, active_i, sizeof(double), compare_double);
631
+
632
+ double beta = D[0] - A_i*C_yi;
633
+ for(r=1;r<active_i && beta<r*D[r];r++)
634
+ beta += D[r];
635
+ beta /= r;
636
+
637
+ for(r=0;r<active_i;r++)
638
+ {
639
+ if(r == yi)
640
+ alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
641
+ else
642
+ alpha_new[r] = min((double)0, (beta - B[r])/A_i);
643
+ }
644
+ delete[] D;
645
+ }
646
+
647
+ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
648
+ {
649
+ double bound = 0;
650
+ if(m == yi)
651
+ bound = C[GETI(i)];
652
+ if(alpha_i == bound && G[m] < minG)
653
+ return true;
654
+ return false;
655
+ }
656
+
657
+ void Solver_MCSVM_CS::Solve(double *w)
658
+ {
659
+ int i, m, s;
660
+ int iter = 0;
661
+ double *alpha = new double[l*nr_class];
662
+ double *alpha_new = new double[nr_class];
663
+ int *index = new int[l];
664
+ double *QD = new double[l];
665
+ int *d_ind = new int[nr_class];
666
+ double *d_val = new double[nr_class];
667
+ int *alpha_index = new int[nr_class*l];
668
+ int *y_index = new int[l];
669
+ int active_size = l;
670
+ int *active_size_i = new int[l];
671
+ double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
672
+ bool start_from_all = true;
673
+
674
+ // Initial alpha can be set here. Note that
675
+ // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
676
+ // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
677
+ // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
678
+ // If initial alpha isn't zero, uncomment the for loop below to initialize w
679
+ for(i=0;i<l*nr_class;i++)
680
+ alpha[i] = 0;
681
+
682
+ for(i=0;i<w_size*nr_class;i++)
683
+ w[i] = 0;
684
+ for(i=0;i<l;i++)
685
+ {
686
+ for(m=0;m<nr_class;m++)
687
+ alpha_index[i*nr_class+m] = m;
688
+ feature_node *xi = prob->x[i];
689
+ QD[i] = 0;
690
+ while(xi->index != -1)
691
+ {
692
+ double val = xi->value;
693
+ QD[i] += val*val;
694
+
695
+ // Uncomment the for loop if initial alpha isn't zero
696
+ // for(m=0; m<nr_class; m++)
697
+ // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
698
+ xi++;
699
+ }
700
+ active_size_i[i] = nr_class;
701
+ y_index[i] = (int)prob->y[i];
702
+ index[i] = i;
703
+ }
704
+
705
+ while(iter < max_iter)
706
+ {
707
+ double stopping = -INF;
708
+ for(i=0;i<active_size;i++)
709
+ {
710
+ int j = i+rand()%(active_size-i);
711
+ swap(index[i], index[j]);
712
+ }
713
+ for(s=0;s<active_size;s++)
714
+ {
715
+ i = index[s];
716
+ double Ai = QD[i];
717
+ double *alpha_i = &alpha[i*nr_class];
718
+ int *alpha_index_i = &alpha_index[i*nr_class];
719
+
720
+ if(Ai > 0)
721
+ {
722
+ for(m=0;m<active_size_i[i];m++)
723
+ G[m] = 1;
724
+ if(y_index[i] < active_size_i[i])
725
+ G[y_index[i]] = 0;
726
+
727
+ feature_node *xi = prob->x[i];
728
+ while(xi->index!= -1)
729
+ {
730
+ double *w_i = &w[(xi->index-1)*nr_class];
731
+ for(m=0;m<active_size_i[i];m++)
732
+ G[m] += w_i[alpha_index_i[m]]*(xi->value);
733
+ xi++;
734
+ }
735
+
736
+ double minG = INF;
737
+ double maxG = -INF;
738
+ for(m=0;m<active_size_i[i];m++)
739
+ {
740
+ if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
741
+ minG = G[m];
742
+ if(G[m] > maxG)
743
+ maxG = G[m];
744
+ }
745
+ if(y_index[i] < active_size_i[i])
746
+ if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
747
+ minG = G[y_index[i]];
748
+
749
+ for(m=0;m<active_size_i[i];m++)
750
+ {
751
+ if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
752
+ {
753
+ active_size_i[i]--;
754
+ while(active_size_i[i]>m)
755
+ {
756
+ if(!be_shrunk(i, active_size_i[i], y_index[i],
757
+ alpha_i[alpha_index_i[active_size_i[i]]], minG))
758
+ {
759
+ swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
760
+ swap(G[m], G[active_size_i[i]]);
761
+ if(y_index[i] == active_size_i[i])
762
+ y_index[i] = m;
763
+ else if(y_index[i] == m)
764
+ y_index[i] = active_size_i[i];
765
+ break;
766
+ }
767
+ active_size_i[i]--;
768
+ }
769
+ }
770
+ }
771
+
772
+ if(active_size_i[i] <= 1)
773
+ {
774
+ active_size--;
775
+ swap(index[s], index[active_size]);
776
+ s--;
777
+ continue;
778
+ }
779
+
780
+ if(maxG-minG <= 1e-12)
781
+ continue;
782
+ else
783
+ stopping = max(maxG - minG, stopping);
784
+
785
+ for(m=0;m<active_size_i[i];m++)
786
+ B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
787
+
788
+ solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
789
+ int nz_d = 0;
790
+ for(m=0;m<active_size_i[i];m++)
791
+ {
792
+ double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
793
+ alpha_i[alpha_index_i[m]] = alpha_new[m];
794
+ if(fabs(d) >= 1e-12)
795
+ {
796
+ d_ind[nz_d] = alpha_index_i[m];
797
+ d_val[nz_d] = d;
798
+ nz_d++;
799
+ }
800
+ }
801
+
802
+ xi = prob->x[i];
803
+ while(xi->index != -1)
804
+ {
805
+ double *w_i = &w[(xi->index-1)*nr_class];
806
+ for(m=0;m<nz_d;m++)
807
+ w_i[d_ind[m]] += d_val[m]*xi->value;
808
+ xi++;
809
+ }
810
+ }
811
+ }
812
+
813
+ iter++;
814
+ if(iter % 10 == 0)
815
+ {
816
+ info(".");
817
+ }
818
+
819
+ if(stopping < eps_shrink)
820
+ {
821
+ if(stopping < eps && start_from_all == true)
822
+ break;
823
+ else
824
+ {
825
+ active_size = l;
826
+ for(i=0;i<l;i++)
827
+ active_size_i[i] = nr_class;
828
+ info("*");
829
+ eps_shrink = max(eps_shrink/2, eps);
830
+ start_from_all = true;
831
+ }
832
+ }
833
+ else
834
+ start_from_all = false;
835
+ }
836
+
837
+ info("\noptimization finished, #iter = %d\n",iter);
838
+ if (iter >= max_iter)
839
+ info("\nWARNING: reaching max number of iterations\n");
840
+
841
+ // calculate objective value
842
+ double v = 0;
843
+ int nSV = 0;
844
+ for(i=0;i<w_size*nr_class;i++)
845
+ v += w[i]*w[i];
846
+ v = 0.5*v;
847
+ for(i=0;i<l*nr_class;i++)
848
+ {
849
+ v += alpha[i];
850
+ if(fabs(alpha[i]) > 0)
851
+ nSV++;
852
+ }
853
+ for(i=0;i<l;i++)
854
+ v -= alpha[i*nr_class+(int)prob->y[i]];
855
+ info("Objective value = %lf\n",v);
856
+ info("nSV = %d\n",nSV);
857
+
858
+ delete [] alpha;
859
+ delete [] alpha_new;
860
+ delete [] index;
861
+ delete [] QD;
862
+ delete [] d_ind;
863
+ delete [] d_val;
864
+ delete [] alpha_index;
865
+ delete [] y_index;
866
+ delete [] active_size_i;
867
+ }
868
+
869
+ // A coordinate descent algorithm for
870
+ // L1-loss and L2-loss SVM dual problems
871
+ //
872
+ // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
873
+ // s.t. 0 <= \alpha_i <= upper_bound_i,
874
+ //
875
+ // where Qij = yi yj xi^T xj and
876
+ // D is a diagonal matrix
877
+ //
878
+ // In L1-SVM case:
879
+ // upper_bound_i = Cp if y_i = 1
880
+ // upper_bound_i = Cn if y_i = -1
881
+ // D_ii = 0
882
+ // In L2-SVM case:
883
+ // upper_bound_i = INF
884
+ // D_ii = 1/(2*Cp) if y_i = 1
885
+ // D_ii = 1/(2*Cn) if y_i = -1
886
+ //
887
+ // Given:
888
+ // x, y, Cp, Cn
889
+ // eps is the stopping tolerance
890
+ //
891
+ // solution will be put in w
892
+ //
893
+ // See Algorithm 3 of Hsieh et al., ICML 2008
894
+
895
+ #undef GETI
896
+ #define GETI(i) (y[i]+1)
897
+ // To support weights for instances, use GETI(i) (i)
898
+
899
+ static void solve_l2r_l1l2_svc(
900
+ const problem *prob, double *w, double eps,
901
+ double Cp, double Cn, int solver_type)
902
+ {
903
+ int l = prob->l;
904
+ int w_size = prob->n;
905
+ int i, s, iter = 0;
906
+ double C, d, G;
907
+ double *QD = new double[l];
908
+ int max_iter = 1000;
909
+ int *index = new int[l];
910
+ double *alpha = new double[l];
911
+ schar *y = new schar[l];
912
+ int active_size = l;
913
+
914
+ // PG: projected gradient, for shrinking and stopping
915
+ double PG;
916
+ double PGmax_old = INF;
917
+ double PGmin_old = -INF;
918
+ double PGmax_new, PGmin_new;
919
+
920
+ // default solver_type: L2R_L2LOSS_SVC_DUAL
921
+ double diag[3] = {0.5/Cn, 0, 0.5/Cp};
922
+ double upper_bound[3] = {INF, 0, INF};
923
+ if(solver_type == L2R_L1LOSS_SVC_DUAL)
924
+ {
925
+ diag[0] = 0;
926
+ diag[2] = 0;
927
+ upper_bound[0] = Cn;
928
+ upper_bound[2] = Cp;
929
+ }
930
+
931
+ for(i=0; i<l; i++)
932
+ {
933
+ if(prob->y[i] > 0)
934
+ {
935
+ y[i] = +1;
936
+ }
937
+ else
938
+ {
939
+ y[i] = -1;
940
+ }
941
+ }
942
+
943
+ // Initial alpha can be set here. Note that
944
+ // 0 <= alpha[i] <= upper_bound[GETI(i)]
945
+ for(i=0; i<l; i++)
946
+ alpha[i] = 0;
947
+
948
+ for(i=0; i<w_size; i++)
949
+ w[i] = 0;
950
+ for(i=0; i<l; i++)
951
+ {
952
+ QD[i] = diag[GETI(i)];
953
+
954
+ feature_node * const xi = prob->x[i];
955
+ QD[i] += sparse_operator::nrm2_sq(xi);
956
+ sparse_operator::axpy(y[i]*alpha[i], xi, w);
957
+
958
+ index[i] = i;
959
+ }
960
+
961
+ while (iter < max_iter)
962
+ {
963
+ PGmax_new = -INF;
964
+ PGmin_new = INF;
965
+
966
+ for (i=0; i<active_size; i++)
967
+ {
968
+ int j = i+rand()%(active_size-i);
969
+ swap(index[i], index[j]);
970
+ }
971
+
972
+ for (s=0; s<active_size; s++)
973
+ {
974
+ i = index[s];
975
+ const schar yi = y[i];
976
+ feature_node * const xi = prob->x[i];
977
+
978
+ G = yi*sparse_operator::dot(w, xi)-1;
979
+
980
+ C = upper_bound[GETI(i)];
981
+ G += alpha[i]*diag[GETI(i)];
982
+
983
+ PG = 0;
984
+ if (alpha[i] == 0)
985
+ {
986
+ if (G > PGmax_old)
987
+ {
988
+ active_size--;
989
+ swap(index[s], index[active_size]);
990
+ s--;
991
+ continue;
992
+ }
993
+ else if (G < 0)
994
+ PG = G;
995
+ }
996
+ else if (alpha[i] == C)
997
+ {
998
+ if (G < PGmin_old)
999
+ {
1000
+ active_size--;
1001
+ swap(index[s], index[active_size]);
1002
+ s--;
1003
+ continue;
1004
+ }
1005
+ else if (G > 0)
1006
+ PG = G;
1007
+ }
1008
+ else
1009
+ PG = G;
1010
+
1011
+ PGmax_new = max(PGmax_new, PG);
1012
+ PGmin_new = min(PGmin_new, PG);
1013
+
1014
+ if(fabs(PG) > 1.0e-12)
1015
+ {
1016
+ double alpha_old = alpha[i];
1017
+ alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
1018
+ d = (alpha[i] - alpha_old)*yi;
1019
+ sparse_operator::axpy(d, xi, w);
1020
+ }
1021
+ }
1022
+
1023
+ iter++;
1024
+ if(iter % 10 == 0)
1025
+ info(".");
1026
+
1027
+ if(PGmax_new - PGmin_new <= eps)
1028
+ {
1029
+ if(active_size == l)
1030
+ break;
1031
+ else
1032
+ {
1033
+ active_size = l;
1034
+ info("*");
1035
+ PGmax_old = INF;
1036
+ PGmin_old = -INF;
1037
+ continue;
1038
+ }
1039
+ }
1040
+ PGmax_old = PGmax_new;
1041
+ PGmin_old = PGmin_new;
1042
+ if (PGmax_old <= 0)
1043
+ PGmax_old = INF;
1044
+ if (PGmin_old >= 0)
1045
+ PGmin_old = -INF;
1046
+ }
1047
+
1048
+ info("\noptimization finished, #iter = %d\n",iter);
1049
+ if (iter >= max_iter)
1050
+ info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
1051
+
1052
+ // calculate objective value
1053
+
1054
+ double v = 0;
1055
+ int nSV = 0;
1056
+ for(i=0; i<w_size; i++)
1057
+ v += w[i]*w[i];
1058
+ for(i=0; i<l; i++)
1059
+ {
1060
+ v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
1061
+ if(alpha[i] > 0)
1062
+ ++nSV;
1063
+ }
1064
+ info("Objective value = %lf\n",v/2);
1065
+ info("nSV = %d\n",nSV);
1066
+
1067
+ delete [] QD;
1068
+ delete [] alpha;
1069
+ delete [] y;
1070
+ delete [] index;
1071
+ }
1072
+
1073
+
1074
+ // A coordinate descent algorithm for
1075
+ // L1-loss and L2-loss epsilon-SVR dual problem
1076
+ //
1077
+ // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
1078
+ // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
1079
+ //
1080
+ // where Qij = xi^T xj and
1081
+ // D is a diagonal matrix
1082
+ //
1083
+ // In L1-SVM case:
1084
+ // upper_bound_i = C
1085
+ // lambda_i = 0
1086
+ // In L2-SVM case:
1087
+ // upper_bound_i = INF
1088
+ // lambda_i = 1/(2*C)
1089
+ //
1090
+ // Given:
1091
+ // x, y, p, C
1092
+ // eps is the stopping tolerance
1093
+ //
1094
+ // solution will be put in w
1095
+ //
1096
+ // See Algorithm 4 of Ho and Lin, 2012
1097
+
1098
+ #undef GETI
1099
+ #define GETI(i) (0)
1100
+ // To support weights for instances, use GETI(i) (i)
1101
+
1102
+ static void solve_l2r_l1l2_svr(
1103
+ const problem *prob, double *w, const parameter *param,
1104
+ int solver_type)
1105
+ {
1106
+ int l = prob->l;
1107
+ double C = param->C;
1108
+ double p = param->p;
1109
+ int w_size = prob->n;
1110
+ double eps = param->eps;
1111
+ int i, s, iter = 0;
1112
+ int max_iter = 1000;
1113
+ int active_size = l;
1114
+ int *index = new int[l];
1115
+
1116
+ double d, G, H;
1117
+ double Gmax_old = INF;
1118
+ double Gmax_new, Gnorm1_new;
1119
+ double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1120
+ double *beta = new double[l];
1121
+ double *QD = new double[l];
1122
+ double *y = prob->y;
1123
+
1124
+ // L2R_L2LOSS_SVR_DUAL
1125
+ double lambda[1], upper_bound[1];
1126
+ lambda[0] = 0.5/C;
1127
+ upper_bound[0] = INF;
1128
+
1129
+ if(solver_type == L2R_L1LOSS_SVR_DUAL)
1130
+ {
1131
+ lambda[0] = 0;
1132
+ upper_bound[0] = C;
1133
+ }
1134
+
1135
+ // Initial beta can be set here. Note that
1136
+ // -upper_bound <= beta[i] <= upper_bound
1137
+ for(i=0; i<l; i++)
1138
+ beta[i] = 0;
1139
+
1140
+ for(i=0; i<w_size; i++)
1141
+ w[i] = 0;
1142
+ for(i=0; i<l; i++)
1143
+ {
1144
+ feature_node * const xi = prob->x[i];
1145
+ QD[i] = sparse_operator::nrm2_sq(xi);
1146
+ sparse_operator::axpy(beta[i], xi, w);
1147
+
1148
+ index[i] = i;
1149
+ }
1150
+
1151
+
1152
+ while(iter < max_iter)
1153
+ {
1154
+ Gmax_new = 0;
1155
+ Gnorm1_new = 0;
1156
+
1157
+ for(i=0; i<active_size; i++)
1158
+ {
1159
+ int j = i+rand()%(active_size-i);
1160
+ swap(index[i], index[j]);
1161
+ }
1162
+
1163
+ for(s=0; s<active_size; s++)
1164
+ {
1165
+ i = index[s];
1166
+ G = -y[i] + lambda[GETI(i)]*beta[i];
1167
+ H = QD[i] + lambda[GETI(i)];
1168
+
1169
+ feature_node * const xi = prob->x[i];
1170
+ G += sparse_operator::dot(w, xi);
1171
+
1172
+ double Gp = G+p;
1173
+ double Gn = G-p;
1174
+ double violation = 0;
1175
+ if(beta[i] == 0)
1176
+ {
1177
+ if(Gp < 0)
1178
+ violation = -Gp;
1179
+ else if(Gn > 0)
1180
+ violation = Gn;
1181
+ else if(Gp>Gmax_old && Gn<-Gmax_old)
1182
+ {
1183
+ active_size--;
1184
+ swap(index[s], index[active_size]);
1185
+ s--;
1186
+ continue;
1187
+ }
1188
+ }
1189
+ else if(beta[i] >= upper_bound[GETI(i)])
1190
+ {
1191
+ if(Gp > 0)
1192
+ violation = Gp;
1193
+ else if(Gp < -Gmax_old)
1194
+ {
1195
+ active_size--;
1196
+ swap(index[s], index[active_size]);
1197
+ s--;
1198
+ continue;
1199
+ }
1200
+ }
1201
+ else if(beta[i] <= -upper_bound[GETI(i)])
1202
+ {
1203
+ if(Gn < 0)
1204
+ violation = -Gn;
1205
+ else if(Gn > Gmax_old)
1206
+ {
1207
+ active_size--;
1208
+ swap(index[s], index[active_size]);
1209
+ s--;
1210
+ continue;
1211
+ }
1212
+ }
1213
+ else if(beta[i] > 0)
1214
+ violation = fabs(Gp);
1215
+ else
1216
+ violation = fabs(Gn);
1217
+
1218
+ Gmax_new = max(Gmax_new, violation);
1219
+ Gnorm1_new += violation;
1220
+
1221
+ // obtain Newton direction d
1222
+ if(Gp < H*beta[i])
1223
+ d = -Gp/H;
1224
+ else if(Gn > H*beta[i])
1225
+ d = -Gn/H;
1226
+ else
1227
+ d = -beta[i];
1228
+
1229
+ if(fabs(d) < 1.0e-12)
1230
+ continue;
1231
+
1232
+ double beta_old = beta[i];
1233
+ beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1234
+ d = beta[i]-beta_old;
1235
+
1236
+ if(d != 0)
1237
+ sparse_operator::axpy(d, xi, w);
1238
+ }
1239
+
1240
+ if(iter == 0)
1241
+ Gnorm1_init = Gnorm1_new;
1242
+ iter++;
1243
+ if(iter % 10 == 0)
1244
+ info(".");
1245
+
1246
+ if(Gnorm1_new <= eps*Gnorm1_init)
1247
+ {
1248
+ if(active_size == l)
1249
+ break;
1250
+ else
1251
+ {
1252
+ active_size = l;
1253
+ info("*");
1254
+ Gmax_old = INF;
1255
+ continue;
1256
+ }
1257
+ }
1258
+
1259
+ Gmax_old = Gmax_new;
1260
+ }
1261
+
1262
+ info("\noptimization finished, #iter = %d\n", iter);
1263
+ if(iter >= max_iter)
1264
+ info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1265
+
1266
+ // calculate objective value
1267
+ double v = 0;
1268
+ int nSV = 0;
1269
+ for(i=0; i<w_size; i++)
1270
+ v += w[i]*w[i];
1271
+ v = 0.5*v;
1272
+ for(i=0; i<l; i++)
1273
+ {
1274
+ v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1275
+ if(beta[i] != 0)
1276
+ nSV++;
1277
+ }
1278
+
1279
+ info("Objective value = %lf\n", v);
1280
+ info("nSV = %d\n",nSV);
1281
+
1282
+ delete [] beta;
1283
+ delete [] QD;
1284
+ delete [] index;
1285
+ }
1286
+
1287
+
1288
+ // A coordinate descent algorithm for
1289
+ // the dual of L2-regularized logistic regression problems
1290
+ //
1291
+ // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1292
+ // s.t. 0 <= \alpha_i <= upper_bound_i,
1293
+ //
1294
+ // where Qij = yi yj xi^T xj and
1295
+ // upper_bound_i = Cp if y_i = 1
1296
+ // upper_bound_i = Cn if y_i = -1
1297
+ //
1298
+ // Given:
1299
+ // x, y, Cp, Cn
1300
+ // eps is the stopping tolerance
1301
+ //
1302
+ // solution will be put in w
1303
+ //
1304
+ // See Algorithm 5 of Yu et al., MLJ 2010
1305
+
1306
+ #undef GETI
1307
+ #define GETI(i) (y[i]+1)
1308
+ // To support weights for instances, use GETI(i) (i)
1309
+
1310
+ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1311
+ {
1312
+ int l = prob->l;
1313
+ int w_size = prob->n;
1314
+ int i, s, iter = 0;
1315
+ double *xTx = new double[l];
1316
+ int max_iter = 1000;
1317
+ int *index = new int[l];
1318
+ double *alpha = new double[2*l]; // store alpha and C - alpha
1319
+ schar *y = new schar[l];
1320
+ int max_inner_iter = 100; // for inner Newton
1321
+ double innereps = 1e-2;
1322
+ double innereps_min = min(1e-8, eps);
1323
+ double upper_bound[3] = {Cn, 0, Cp};
1324
+
1325
+ for(i=0; i<l; i++)
1326
+ {
1327
+ if(prob->y[i] > 0)
1328
+ {
1329
+ y[i] = +1;
1330
+ }
1331
+ else
1332
+ {
1333
+ y[i] = -1;
1334
+ }
1335
+ }
1336
+
1337
+ // Initial alpha can be set here. Note that
1338
+ // 0 < alpha[i] < upper_bound[GETI(i)]
1339
+ // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1340
+ for(i=0; i<l; i++)
1341
+ {
1342
+ alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1343
+ alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1344
+ }
1345
+
1346
+ for(i=0; i<w_size; i++)
1347
+ w[i] = 0;
1348
+ for(i=0; i<l; i++)
1349
+ {
1350
+ feature_node * const xi = prob->x[i];
1351
+ xTx[i] = sparse_operator::nrm2_sq(xi);
1352
+ sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1353
+ index[i] = i;
1354
+ }
1355
+
1356
+ while (iter < max_iter)
1357
+ {
1358
+ for (i=0; i<l; i++)
1359
+ {
1360
+ int j = i+rand()%(l-i);
1361
+ swap(index[i], index[j]);
1362
+ }
1363
+ int newton_iter = 0;
1364
+ double Gmax = 0;
1365
+ for (s=0; s<l; s++)
1366
+ {
1367
+ i = index[s];
1368
+ const schar yi = y[i];
1369
+ double C = upper_bound[GETI(i)];
1370
+ double ywTx = 0, xisq = xTx[i];
1371
+ feature_node * const xi = prob->x[i];
1372
+ ywTx = yi*sparse_operator::dot(w, xi);
1373
+ double a = xisq, b = ywTx;
1374
+
1375
+ // Decide to minimize g_1(z) or g_2(z)
1376
+ int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1377
+ if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1378
+ {
1379
+ ind1 = 2*i+1;
1380
+ ind2 = 2*i;
1381
+ sign = -1;
1382
+ }
1383
+
1384
+ // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1385
+ double alpha_old = alpha[ind1];
1386
+ double z = alpha_old;
1387
+ if(C - z < 0.5 * C)
1388
+ z = 0.1*z;
1389
+ double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1390
+ Gmax = max(Gmax, fabs(gp));
1391
+
1392
+ // Newton method on the sub-problem
1393
+ const double eta = 0.1; // xi in the paper
1394
+ int inner_iter = 0;
1395
+ while (inner_iter <= max_inner_iter)
1396
+ {
1397
+ if(fabs(gp) < innereps)
1398
+ break;
1399
+ double gpp = a + C/(C-z)/z;
1400
+ double tmpz = z - gp/gpp;
1401
+ if(tmpz <= 0)
1402
+ z *= eta;
1403
+ else // tmpz in (0, C)
1404
+ z = tmpz;
1405
+ gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1406
+ newton_iter++;
1407
+ inner_iter++;
1408
+ }
1409
+
1410
+ if(inner_iter > 0) // update w
1411
+ {
1412
+ alpha[ind1] = z;
1413
+ alpha[ind2] = C-z;
1414
+ sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1415
+ }
1416
+ }
1417
+
1418
+ iter++;
1419
+ if(iter % 10 == 0)
1420
+ info(".");
1421
+
1422
+ if(Gmax < eps)
1423
+ break;
1424
+
1425
+ if(newton_iter <= l/10)
1426
+ innereps = max(innereps_min, 0.1*innereps);
1427
+
1428
+ }
1429
+
1430
+ info("\noptimization finished, #iter = %d\n",iter);
1431
+ if (iter >= max_iter)
1432
+ info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1433
+
1434
+ // calculate objective value
1435
+
1436
+ double v = 0;
1437
+ for(i=0; i<w_size; i++)
1438
+ v += w[i] * w[i];
1439
+ v *= 0.5;
1440
+ for(i=0; i<l; i++)
1441
+ v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1442
+ - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1443
+ info("Objective value = %lf\n", v);
1444
+
1445
+ delete [] xTx;
1446
+ delete [] alpha;
1447
+ delete [] y;
1448
+ delete [] index;
1449
+ }
1450
+
1451
+ // A coordinate descent algorithm for
1452
+ // L1-regularized L2-loss support vector classification
1453
+ //
1454
+ // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1455
+ //
1456
+ // Given:
1457
+ // x, y, Cp, Cn
1458
+ // eps is the stopping tolerance
1459
+ //
1460
+ // solution will be put in w
1461
+ //
1462
+ // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1463
+ //
1464
+ // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1465
+ // must have been added to the original data. (see -B and -R option)
1466
+
1467
+ #undef GETI
1468
+ #define GETI(i) (y[i]+1)
1469
+ // To support weights for instances, use GETI(i) (i)
1470
+
1471
+ static void solve_l1r_l2_svc(
1472
+ problem *prob_col, double *w, double eps,
1473
+ double Cp, double Cn, int regularize_bias)
1474
+ {
1475
+ int l = prob_col->l;
1476
+ int w_size = prob_col->n;
1477
+ int j, s, iter = 0;
1478
+ int max_iter = 1000;
1479
+ int active_size = w_size;
1480
+ int max_num_linesearch = 20;
1481
+
1482
+ double sigma = 0.01;
1483
+ double d, G_loss, G, H;
1484
+ double Gmax_old = INF;
1485
+ double Gmax_new, Gnorm1_new;
1486
+ double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1487
+ double d_old, d_diff;
1488
+ double loss_old = 0, loss_new;
1489
+ double appxcond, cond;
1490
+
1491
+ int *index = new int[w_size];
1492
+ schar *y = new schar[l];
1493
+ double *b = new double[l]; // b = 1-ywTx
1494
+ double *xj_sq = new double[w_size];
1495
+ feature_node *x;
1496
+
1497
+ double C[3] = {Cn,0,Cp};
1498
+
1499
+ // Initial w can be set here.
1500
+ for(j=0; j<w_size; j++)
1501
+ w[j] = 0;
1502
+
1503
+ for(j=0; j<l; j++)
1504
+ {
1505
+ b[j] = 1;
1506
+ if(prob_col->y[j] > 0)
1507
+ y[j] = 1;
1508
+ else
1509
+ y[j] = -1;
1510
+ }
1511
+ for(j=0; j<w_size; j++)
1512
+ {
1513
+ index[j] = j;
1514
+ xj_sq[j] = 0;
1515
+ x = prob_col->x[j];
1516
+ while(x->index != -1)
1517
+ {
1518
+ int ind = x->index-1;
1519
+ x->value *= y[ind]; // x->value stores yi*xij
1520
+ double val = x->value;
1521
+ b[ind] -= w[j]*val;
1522
+ xj_sq[j] += C[GETI(ind)]*val*val;
1523
+ x++;
1524
+ }
1525
+ }
1526
+
1527
+ while(iter < max_iter)
1528
+ {
1529
+ Gmax_new = 0;
1530
+ Gnorm1_new = 0;
1531
+
1532
+ for(j=0; j<active_size; j++)
1533
+ {
1534
+ int i = j+rand()%(active_size-j);
1535
+ swap(index[i], index[j]);
1536
+ }
1537
+
1538
+ for(s=0; s<active_size; s++)
1539
+ {
1540
+ j = index[s];
1541
+ G_loss = 0;
1542
+ H = 0;
1543
+
1544
+ x = prob_col->x[j];
1545
+ while(x->index != -1)
1546
+ {
1547
+ int ind = x->index-1;
1548
+ if(b[ind] > 0)
1549
+ {
1550
+ double val = x->value;
1551
+ double tmp = C[GETI(ind)]*val;
1552
+ G_loss -= tmp*b[ind];
1553
+ H += tmp*val;
1554
+ }
1555
+ x++;
1556
+ }
1557
+ G_loss *= 2;
1558
+
1559
+ G = G_loss;
1560
+ H *= 2;
1561
+ H = max(H, 1e-12);
1562
+
1563
+ double violation = 0;
1564
+ double Gp = 0, Gn = 0;
1565
+ if(j == w_size-1 && regularize_bias == 0)
1566
+ violation = fabs(G);
1567
+ else
1568
+ {
1569
+ Gp = G+1;
1570
+ Gn = G-1;
1571
+ if(w[j] == 0)
1572
+ {
1573
+ if(Gp < 0)
1574
+ violation = -Gp;
1575
+ else if(Gn > 0)
1576
+ violation = Gn;
1577
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1578
+ {
1579
+ active_size--;
1580
+ swap(index[s], index[active_size]);
1581
+ s--;
1582
+ continue;
1583
+ }
1584
+ }
1585
+ else if(w[j] > 0)
1586
+ violation = fabs(Gp);
1587
+ else
1588
+ violation = fabs(Gn);
1589
+ }
1590
+ Gmax_new = max(Gmax_new, violation);
1591
+ Gnorm1_new += violation;
1592
+
1593
+ // obtain Newton direction d
1594
+ if(j == w_size-1 && regularize_bias == 0)
1595
+ d = -G/H;
1596
+ else
1597
+ {
1598
+ if(Gp < H*w[j])
1599
+ d = -Gp/H;
1600
+ else if(Gn > H*w[j])
1601
+ d = -Gn/H;
1602
+ else
1603
+ d = -w[j];
1604
+ }
1605
+
1606
+ if(fabs(d) < 1.0e-12)
1607
+ continue;
1608
+
1609
+ double delta;
1610
+ if(j == w_size-1 && regularize_bias == 0)
1611
+ delta = G*d;
1612
+ else
1613
+ delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1614
+ d_old = 0;
1615
+ int num_linesearch;
1616
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1617
+ {
1618
+ d_diff = d_old - d;
1619
+ if(j == w_size-1 && regularize_bias == 0)
1620
+ cond = -sigma*delta;
1621
+ else
1622
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1623
+
1624
+ appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1625
+ if(appxcond <= 0)
1626
+ {
1627
+ x = prob_col->x[j];
1628
+ sparse_operator::axpy(d_diff, x, b);
1629
+ break;
1630
+ }
1631
+
1632
+ if(num_linesearch == 0)
1633
+ {
1634
+ loss_old = 0;
1635
+ loss_new = 0;
1636
+ x = prob_col->x[j];
1637
+ while(x->index != -1)
1638
+ {
1639
+ int ind = x->index-1;
1640
+ if(b[ind] > 0)
1641
+ loss_old += C[GETI(ind)]*b[ind]*b[ind];
1642
+ double b_new = b[ind] + d_diff*x->value;
1643
+ b[ind] = b_new;
1644
+ if(b_new > 0)
1645
+ loss_new += C[GETI(ind)]*b_new*b_new;
1646
+ x++;
1647
+ }
1648
+ }
1649
+ else
1650
+ {
1651
+ loss_new = 0;
1652
+ x = prob_col->x[j];
1653
+ while(x->index != -1)
1654
+ {
1655
+ int ind = x->index-1;
1656
+ double b_new = b[ind] + d_diff*x->value;
1657
+ b[ind] = b_new;
1658
+ if(b_new > 0)
1659
+ loss_new += C[GETI(ind)]*b_new*b_new;
1660
+ x++;
1661
+ }
1662
+ }
1663
+
1664
+ cond = cond + loss_new - loss_old;
1665
+ if(cond <= 0)
1666
+ break;
1667
+ else
1668
+ {
1669
+ d_old = d;
1670
+ d *= 0.5;
1671
+ delta *= 0.5;
1672
+ }
1673
+ }
1674
+
1675
+ w[j] += d;
1676
+
1677
+ // recompute b[] if line search takes too many steps
1678
+ if(num_linesearch >= max_num_linesearch)
1679
+ {
1680
+ info("#");
1681
+ for(int i=0; i<l; i++)
1682
+ b[i] = 1;
1683
+
1684
+ for(int i=0; i<w_size; i++)
1685
+ {
1686
+ if(w[i]==0) continue;
1687
+ x = prob_col->x[i];
1688
+ sparse_operator::axpy(-w[i], x, b);
1689
+ }
1690
+ }
1691
+ }
1692
+
1693
+ if(iter == 0)
1694
+ Gnorm1_init = Gnorm1_new;
1695
+ iter++;
1696
+ if(iter % 10 == 0)
1697
+ info(".");
1698
+
1699
+ if(Gnorm1_new <= eps*Gnorm1_init)
1700
+ {
1701
+ if(active_size == w_size)
1702
+ break;
1703
+ else
1704
+ {
1705
+ active_size = w_size;
1706
+ info("*");
1707
+ Gmax_old = INF;
1708
+ continue;
1709
+ }
1710
+ }
1711
+
1712
+ Gmax_old = Gmax_new;
1713
+ }
1714
+
1715
+ info("\noptimization finished, #iter = %d\n", iter);
1716
+ if(iter >= max_iter)
1717
+ info("\nWARNING: reaching max number of iterations\n");
1718
+
1719
+ // calculate objective value
1720
+
1721
+ double v = 0;
1722
+ int nnz = 0;
1723
+ for(j=0; j<w_size; j++)
1724
+ {
1725
+ x = prob_col->x[j];
1726
+ while(x->index != -1)
1727
+ {
1728
+ x->value *= prob_col->y[x->index-1]; // restore x->value
1729
+ x++;
1730
+ }
1731
+ if(w[j] != 0)
1732
+ {
1733
+ v += fabs(w[j]);
1734
+ nnz++;
1735
+ }
1736
+ }
1737
+ if (regularize_bias == 0)
1738
+ v -= fabs(w[w_size-1]);
1739
+ for(j=0; j<l; j++)
1740
+ if(b[j] > 0)
1741
+ v += C[GETI(j)]*b[j]*b[j];
1742
+
1743
+ info("Objective value = %lf\n", v);
1744
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1745
+
1746
+ delete [] index;
1747
+ delete [] y;
1748
+ delete [] b;
1749
+ delete [] xj_sq;
1750
+ }
1751
+
1752
+ // A coordinate descent algorithm for
1753
+ // L1-regularized logistic regression problems
1754
+ //
1755
+ // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1756
+ //
1757
+ // Given:
1758
+ // x, y, Cp, Cn
1759
+ // eps is the stopping tolerance
1760
+ //
1761
+ // solution will be put in w
1762
+ //
1763
+ // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1764
+ //
1765
+ // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1766
+ // must have been added to the original data. (see -B and -R option)
1767
+
1768
+ #undef GETI
1769
+ #define GETI(i) (y[i]+1)
1770
+ // To support weights for instances, use GETI(i) (i)
1771
+
1772
+ static void solve_l1r_lr(
1773
+ const problem *prob_col, double *w, double eps,
1774
+ double Cp, double Cn, int regularize_bias)
1775
+ {
1776
+ int l = prob_col->l;
1777
+ int w_size = prob_col->n;
1778
+ int j, s, newton_iter=0, iter=0;
1779
+ int max_newton_iter = 100;
1780
+ int max_iter = 1000;
1781
+ int max_num_linesearch = 20;
1782
+ int active_size;
1783
+ int QP_active_size;
1784
+
1785
+ double nu = 1e-12;
1786
+ double inner_eps = 1;
1787
+ double sigma = 0.01;
1788
+ double w_norm, w_norm_new;
1789
+ double z, G, H;
1790
+ double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1791
+ double Gmax_old = INF;
1792
+ double Gmax_new, Gnorm1_new;
1793
+ double QP_Gmax_old = INF;
1794
+ double QP_Gmax_new, QP_Gnorm1_new;
1795
+ double delta, negsum_xTd, cond;
1796
+
1797
+ int *index = new int[w_size];
1798
+ schar *y = new schar[l];
1799
+ double *Hdiag = new double[w_size];
1800
+ double *Grad = new double[w_size];
1801
+ double *wpd = new double[w_size];
1802
+ double *xjneg_sum = new double[w_size];
1803
+ double *xTd = new double[l];
1804
+ double *exp_wTx = new double[l];
1805
+ double *exp_wTx_new = new double[l];
1806
+ double *tau = new double[l];
1807
+ double *D = new double[l];
1808
+ feature_node *x;
1809
+
1810
+ double C[3] = {Cn,0,Cp};
1811
+
1812
+ // Initial w can be set here.
1813
+ for(j=0; j<w_size; j++)
1814
+ w[j] = 0;
1815
+
1816
+ for(j=0; j<l; j++)
1817
+ {
1818
+ if(prob_col->y[j] > 0)
1819
+ y[j] = 1;
1820
+ else
1821
+ y[j] = -1;
1822
+
1823
+ exp_wTx[j] = 0;
1824
+ }
1825
+
1826
+ w_norm = 0;
1827
+ for(j=0; j<w_size; j++)
1828
+ {
1829
+ w_norm += fabs(w[j]);
1830
+ wpd[j] = w[j];
1831
+ index[j] = j;
1832
+ xjneg_sum[j] = 0;
1833
+ x = prob_col->x[j];
1834
+ while(x->index != -1)
1835
+ {
1836
+ int ind = x->index-1;
1837
+ double val = x->value;
1838
+ exp_wTx[ind] += w[j]*val;
1839
+ if(y[ind] == -1)
1840
+ xjneg_sum[j] += C[GETI(ind)]*val;
1841
+ x++;
1842
+ }
1843
+ }
1844
+ if (regularize_bias == 0)
1845
+ w_norm -= fabs(w[w_size-1]);
1846
+
1847
+ for(j=0; j<l; j++)
1848
+ {
1849
+ exp_wTx[j] = exp(exp_wTx[j]);
1850
+ double tau_tmp = 1/(1+exp_wTx[j]);
1851
+ tau[j] = C[GETI(j)]*tau_tmp;
1852
+ D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1853
+ }
1854
+
1855
+ while(newton_iter < max_newton_iter)
1856
+ {
1857
+ Gmax_new = 0;
1858
+ Gnorm1_new = 0;
1859
+ active_size = w_size;
1860
+
1861
+ for(s=0; s<active_size; s++)
1862
+ {
1863
+ j = index[s];
1864
+ Hdiag[j] = nu;
1865
+ Grad[j] = 0;
1866
+
1867
+ double tmp = 0;
1868
+ x = prob_col->x[j];
1869
+ while(x->index != -1)
1870
+ {
1871
+ int ind = x->index-1;
1872
+ Hdiag[j] += x->value*x->value*D[ind];
1873
+ tmp += x->value*tau[ind];
1874
+ x++;
1875
+ }
1876
+ Grad[j] = -tmp + xjneg_sum[j];
1877
+
1878
+ double violation = 0;
1879
+ if (j == w_size-1 && regularize_bias == 0)
1880
+ violation = fabs(Grad[j]);
1881
+ else
1882
+ {
1883
+ double Gp = Grad[j]+1;
1884
+ double Gn = Grad[j]-1;
1885
+ if(w[j] == 0)
1886
+ {
1887
+ if(Gp < 0)
1888
+ violation = -Gp;
1889
+ else if(Gn > 0)
1890
+ violation = Gn;
1891
+ //outer-level shrinking
1892
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1893
+ {
1894
+ active_size--;
1895
+ swap(index[s], index[active_size]);
1896
+ s--;
1897
+ continue;
1898
+ }
1899
+ }
1900
+ else if(w[j] > 0)
1901
+ violation = fabs(Gp);
1902
+ else
1903
+ violation = fabs(Gn);
1904
+ }
1905
+ Gmax_new = max(Gmax_new, violation);
1906
+ Gnorm1_new += violation;
1907
+ }
1908
+
1909
+ if(newton_iter == 0)
1910
+ Gnorm1_init = Gnorm1_new;
1911
+
1912
+ if(Gnorm1_new <= eps*Gnorm1_init)
1913
+ break;
1914
+
1915
+ iter = 0;
1916
+ QP_Gmax_old = INF;
1917
+ QP_active_size = active_size;
1918
+
1919
+ for(int i=0; i<l; i++)
1920
+ xTd[i] = 0;
1921
+
1922
+ // optimize QP over wpd
1923
+ while(iter < max_iter)
1924
+ {
1925
+ QP_Gmax_new = 0;
1926
+ QP_Gnorm1_new = 0;
1927
+
1928
+ for(j=0; j<QP_active_size; j++)
1929
+ {
1930
+ int i = j+rand()%(QP_active_size-j);
1931
+ swap(index[i], index[j]);
1932
+ }
1933
+
1934
+ for(s=0; s<QP_active_size; s++)
1935
+ {
1936
+ j = index[s];
1937
+ H = Hdiag[j];
1938
+
1939
+ x = prob_col->x[j];
1940
+ G = Grad[j] + (wpd[j]-w[j])*nu;
1941
+ while(x->index != -1)
1942
+ {
1943
+ int ind = x->index-1;
1944
+ G += x->value*D[ind]*xTd[ind];
1945
+ x++;
1946
+ }
1947
+
1948
+ double violation = 0;
1949
+ if (j == w_size-1 && regularize_bias == 0)
1950
+ {
1951
+ // bias term not shrunken
1952
+ violation = fabs(G);
1953
+ z = -G/H;
1954
+ }
1955
+ else
1956
+ {
1957
+ double Gp = G+1;
1958
+ double Gn = G-1;
1959
+ if(wpd[j] == 0)
1960
+ {
1961
+ if(Gp < 0)
1962
+ violation = -Gp;
1963
+ else if(Gn > 0)
1964
+ violation = Gn;
1965
+ //inner-level shrinking
1966
+ else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1967
+ {
1968
+ QP_active_size--;
1969
+ swap(index[s], index[QP_active_size]);
1970
+ s--;
1971
+ continue;
1972
+ }
1973
+ }
1974
+ else if(wpd[j] > 0)
1975
+ violation = fabs(Gp);
1976
+ else
1977
+ violation = fabs(Gn);
1978
+
1979
+ // obtain solution of one-variable problem
1980
+ if(Gp < H*wpd[j])
1981
+ z = -Gp/H;
1982
+ else if(Gn > H*wpd[j])
1983
+ z = -Gn/H;
1984
+ else
1985
+ z = -wpd[j];
1986
+ }
1987
+ QP_Gmax_new = max(QP_Gmax_new, violation);
1988
+ QP_Gnorm1_new += violation;
1989
+
1990
+ if(fabs(z) < 1.0e-12)
1991
+ continue;
1992
+ z = min(max(z,-10.0),10.0);
1993
+
1994
+ wpd[j] += z;
1995
+
1996
+ x = prob_col->x[j];
1997
+ sparse_operator::axpy(z, x, xTd);
1998
+ }
1999
+
2000
+ iter++;
2001
+
2002
+ if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
2003
+ {
2004
+ //inner stopping
2005
+ if(QP_active_size == active_size)
2006
+ break;
2007
+ //active set reactivation
2008
+ else
2009
+ {
2010
+ QP_active_size = active_size;
2011
+ QP_Gmax_old = INF;
2012
+ continue;
2013
+ }
2014
+ }
2015
+
2016
+ QP_Gmax_old = QP_Gmax_new;
2017
+ }
2018
+
2019
+ if(iter >= max_iter)
2020
+ info("WARNING: reaching max number of inner iterations\n");
2021
+
2022
+ delta = 0;
2023
+ w_norm_new = 0;
2024
+ for(j=0; j<w_size; j++)
2025
+ {
2026
+ delta += Grad[j]*(wpd[j]-w[j]);
2027
+ if(wpd[j] != 0)
2028
+ w_norm_new += fabs(wpd[j]);
2029
+ }
2030
+ if (regularize_bias == 0)
2031
+ w_norm_new -= fabs(wpd[w_size-1]);
2032
+ delta += (w_norm_new-w_norm);
2033
+
2034
+ negsum_xTd = 0;
2035
+ for(int i=0; i<l; i++)
2036
+ if(y[i] == -1)
2037
+ negsum_xTd += C[GETI(i)]*xTd[i];
2038
+
2039
+ int num_linesearch;
2040
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
2041
+ {
2042
+ cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
2043
+
2044
+ for(int i=0; i<l; i++)
2045
+ {
2046
+ double exp_xTd = exp(xTd[i]);
2047
+ exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
2048
+ cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
2049
+ }
2050
+
2051
+ if(cond <= 0)
2052
+ {
2053
+ w_norm = w_norm_new;
2054
+ for(j=0; j<w_size; j++)
2055
+ w[j] = wpd[j];
2056
+ for(int i=0; i<l; i++)
2057
+ {
2058
+ exp_wTx[i] = exp_wTx_new[i];
2059
+ double tau_tmp = 1/(1+exp_wTx[i]);
2060
+ tau[i] = C[GETI(i)]*tau_tmp;
2061
+ D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
2062
+ }
2063
+ break;
2064
+ }
2065
+ else
2066
+ {
2067
+ w_norm_new = 0;
2068
+ for(j=0; j<w_size; j++)
2069
+ {
2070
+ wpd[j] = (w[j]+wpd[j])*0.5;
2071
+ if(wpd[j] != 0)
2072
+ w_norm_new += fabs(wpd[j]);
2073
+ }
2074
+ if (regularize_bias == 0)
2075
+ w_norm_new -= fabs(wpd[w_size-1]);
2076
+ delta *= 0.5;
2077
+ negsum_xTd *= 0.5;
2078
+ for(int i=0; i<l; i++)
2079
+ xTd[i] *= 0.5;
2080
+ }
2081
+ }
2082
+
2083
+ // Recompute some info due to too many line search steps
2084
+ if(num_linesearch >= max_num_linesearch)
2085
+ {
2086
+ for(int i=0; i<l; i++)
2087
+ exp_wTx[i] = 0;
2088
+
2089
+ for(int i=0; i<w_size; i++)
2090
+ {
2091
+ if(w[i]==0) continue;
2092
+ x = prob_col->x[i];
2093
+ sparse_operator::axpy(w[i], x, exp_wTx);
2094
+ }
2095
+
2096
+ for(int i=0; i<l; i++)
2097
+ exp_wTx[i] = exp(exp_wTx[i]);
2098
+ }
2099
+
2100
+ if(iter == 1)
2101
+ inner_eps *= 0.25;
2102
+
2103
+ newton_iter++;
2104
+ Gmax_old = Gmax_new;
2105
+
2106
+ info("iter %3d #CD cycles %d\n", newton_iter, iter);
2107
+ }
2108
+
2109
+ info("=========================\n");
2110
+ info("optimization finished, #iter = %d\n", newton_iter);
2111
+ if(newton_iter >= max_newton_iter)
2112
+ info("WARNING: reaching max number of iterations\n");
2113
+
2114
+ // calculate objective value
2115
+
2116
+ double v = 0;
2117
+ int nnz = 0;
2118
+ for(j=0; j<w_size; j++)
2119
+ if(w[j] != 0)
2120
+ {
2121
+ v += fabs(w[j]);
2122
+ nnz++;
2123
+ }
2124
+ if (regularize_bias == 0)
2125
+ v -= fabs(w[w_size-1]);
2126
+ for(j=0; j<l; j++)
2127
+ if(y[j] == 1)
2128
+ v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2129
+ else
2130
+ v += C[GETI(j)]*log(1+exp_wTx[j]);
2131
+
2132
+ info("Objective value = %lf\n", v);
2133
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2134
+
2135
+ delete [] index;
2136
+ delete [] y;
2137
+ delete [] Hdiag;
2138
+ delete [] Grad;
2139
+ delete [] wpd;
2140
+ delete [] xjneg_sum;
2141
+ delete [] xTd;
2142
+ delete [] exp_wTx;
2143
+ delete [] exp_wTx_new;
2144
+ delete [] tau;
2145
+ delete [] D;
2146
+ }
2147
+
2148
+ struct heap {
2149
+ enum HEAP_TYPE { MIN, MAX };
2150
+ int _size;
2151
+ HEAP_TYPE _type;
2152
+ feature_node* a;
2153
+
2154
+ heap(int max_size, HEAP_TYPE type)
2155
+ {
2156
+ _size = 0;
2157
+ a = new feature_node[max_size];
2158
+ _type = type;
2159
+ }
2160
+ ~heap()
2161
+ {
2162
+ delete [] a;
2163
+ }
2164
+ bool cmp(const feature_node& left, const feature_node& right)
2165
+ {
2166
+ if(_type == MIN)
2167
+ return left.value > right.value;
2168
+ else
2169
+ return left.value < right.value;
2170
+ }
2171
+ int size()
2172
+ {
2173
+ return _size;
2174
+ }
2175
+ void push(feature_node node)
2176
+ {
2177
+ a[_size] = node;
2178
+ _size++;
2179
+ int i = _size-1;
2180
+ while(i)
2181
+ {
2182
+ int p = (i-1)/2;
2183
+ if(cmp(a[p], a[i]))
2184
+ {
2185
+ swap(a[i], a[p]);
2186
+ i = p;
2187
+ }
2188
+ else
2189
+ break;
2190
+ }
2191
+ }
2192
+ void pop()
2193
+ {
2194
+ _size--;
2195
+ a[0] = a[_size];
2196
+ int i = 0;
2197
+ while(i*2+1 < _size)
2198
+ {
2199
+ int l = i*2+1;
2200
+ int r = i*2+2;
2201
+ if(r < _size && cmp(a[l], a[r]))
2202
+ l = r;
2203
+ if(cmp(a[i], a[l]))
2204
+ {
2205
+ swap(a[i], a[l]);
2206
+ i = l;
2207
+ }
2208
+ else
2209
+ break;
2210
+ }
2211
+ }
2212
+ feature_node top()
2213
+ {
2214
+ return a[0];
2215
+ }
2216
+ };
2217
+
2218
+ // A two-level coordinate descent algorithm for
2219
+ // a scaled one-class SVM dual problem
2220
+ //
2221
+ // min_\alpha 0.5(\alpha^T Q \alpha),
2222
+ // s.t. 0 <= \alpha_i <= 1 and
2223
+ // e^T \alpha = \nu l
2224
+ //
2225
+ // where Qij = xi^T xj
2226
+ //
2227
+ // Given:
2228
+ // x, nu
2229
+ // eps is the stopping tolerance
2230
+ //
2231
+ // solution will be put in w and rho
2232
+ //
2233
+ // See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.
2234
+
2235
+ static void solve_oneclass_svm(const problem *prob, double *w, double *rho, double eps, double nu)
2236
+ {
2237
+ int l = prob->l;
2238
+ int w_size = prob->n;
2239
+ int i, j, s, iter = 0;
2240
+ double Gi, Gj;
2241
+ double Qij, quad_coef, delta, sum;
2242
+ double old_alpha_i;
2243
+ double *QD = new double[l];
2244
+ double *G = new double[l];
2245
+ int *index = new int[l];
2246
+ double *alpha = new double[l];
2247
+ int max_inner_iter;
2248
+ int max_iter = 1000;
2249
+ int active_size = l;
2250
+
2251
+ double negGmax; // max { -grad(f)_i | alpha_i < 1 }
2252
+ double negGmin; // min { -grad(f)_i | alpha_i > 0 }
2253
+
2254
+ int *most_violating_i = new int[l];
2255
+ int *most_violating_j = new int[l];
2256
+
2257
+ int n = (int)(nu*l); // # of alpha's at upper bound
2258
+ for(i=0; i<n; i++)
2259
+ alpha[i] = 1;
2260
+ if (n<l)
2261
+ alpha[i] = nu*l-n;
2262
+ for(i=n+1; i<l; i++)
2263
+ alpha[i] = 0;
2264
+
2265
+ for(i=0; i<w_size; i++)
2266
+ w[i] = 0;
2267
+ for(i=0; i<l; i++)
2268
+ {
2269
+ feature_node * const xi = prob->x[i];
2270
+ QD[i] = sparse_operator::nrm2_sq(xi);
2271
+ sparse_operator::axpy(alpha[i], xi, w);
2272
+
2273
+ index[i] = i;
2274
+ }
2275
+
2276
+ while (iter < max_iter)
2277
+ {
2278
+ negGmax = -INF;
2279
+ negGmin = INF;
2280
+
2281
+ for (s=0; s<active_size; s++)
2282
+ {
2283
+ i = index[s];
2284
+ feature_node * const xi = prob->x[i];
2285
+ G[i] = sparse_operator::dot(w, xi);
2286
+ if (alpha[i] < 1)
2287
+ negGmax = max(negGmax, -G[i]);
2288
+ if (alpha[i] > 0)
2289
+ negGmin = min(negGmin, -G[i]);
2290
+ }
2291
+
2292
+ if (negGmax - negGmin < eps)
2293
+ {
2294
+ if (active_size == l)
2295
+ break;
2296
+ else
2297
+ {
2298
+ active_size = l;
2299
+ info("*");
2300
+ continue;
2301
+ }
2302
+ }
2303
+
2304
+ for(s=0; s<active_size; s++)
2305
+ {
2306
+ i = index[s];
2307
+ if ((alpha[i] == 1 && -G[i] > negGmax) ||
2308
+ (alpha[i] == 0 && -G[i] < negGmin))
2309
+ {
2310
+ active_size--;
2311
+ swap(index[s], index[active_size]);
2312
+ s--;
2313
+ }
2314
+ }
2315
+
2316
+ max_inner_iter = max(active_size/10, 1);
2317
+ struct heap min_heap = heap(max_inner_iter, heap::MIN);
2318
+ struct heap max_heap = heap(max_inner_iter, heap::MAX);
2319
+ struct feature_node node;
2320
+ for(s=0; s<active_size; s++)
2321
+ {
2322
+ i = index[s];
2323
+ node.index = i;
2324
+ node.value = -G[i];
2325
+
2326
+ if (alpha[i] < 1)
2327
+ {
2328
+ if (min_heap.size() < max_inner_iter)
2329
+ min_heap.push(node);
2330
+ else if (min_heap.top().value < node.value)
2331
+ {
2332
+ min_heap.pop();
2333
+ min_heap.push(node);
2334
+ }
2335
+ }
2336
+
2337
+ if (alpha[i] > 0)
2338
+ {
2339
+ if (max_heap.size() < max_inner_iter)
2340
+ max_heap.push(node);
2341
+ else if (max_heap.top().value > node.value)
2342
+ {
2343
+ max_heap.pop();
2344
+ max_heap.push(node);
2345
+ }
2346
+ }
2347
+ }
2348
+ max_inner_iter = min(min_heap.size(), max_heap.size());
2349
+ while (max_heap.size() > max_inner_iter)
2350
+ max_heap.pop();
2351
+ while (min_heap.size() > max_inner_iter)
2352
+ min_heap.pop();
2353
+
2354
+ for (s=max_inner_iter-1; s>=0; s--)
2355
+ {
2356
+ most_violating_i[s] = min_heap.top().index;
2357
+ most_violating_j[s] = max_heap.top().index;
2358
+ min_heap.pop();
2359
+ max_heap.pop();
2360
+ }
2361
+
2362
+ for (s=0; s<max_inner_iter; s++)
2363
+ {
2364
+ i = most_violating_i[s];
2365
+ j = most_violating_j[s];
2366
+
2367
+ if ((alpha[i] == 0 && alpha[j] == 0) ||
2368
+ (alpha[i] == 1 && alpha[j] == 1))
2369
+ continue;
2370
+
2371
+ feature_node const * xi = prob->x[i];
2372
+ feature_node const * xj = prob->x[j];
2373
+
2374
+ Gi = sparse_operator::dot(w, xi);
2375
+ Gj = sparse_operator::dot(w, xj);
2376
+
2377
+ int violating_pair = 0;
2378
+ if (alpha[i] < 1 && alpha[j] > 0 && -Gj + 1e-12 < -Gi)
2379
+ violating_pair = 1;
2380
+ else
2381
+ if (alpha[i] > 0 && alpha[j] < 1 && -Gi + 1e-12 < -Gj)
2382
+ violating_pair = 1;
2383
+ if (violating_pair == 0)
2384
+ continue;
2385
+
2386
+ Qij = sparse_operator::sparse_dot(xi, xj);
2387
+ quad_coef = QD[i] + QD[j] - 2*Qij;
2388
+ if(quad_coef <= 0)
2389
+ quad_coef = 1e-12;
2390
+ delta = (Gi - Gj) / quad_coef;
2391
+ old_alpha_i = alpha[i];
2392
+ sum = alpha[i] + alpha[j];
2393
+ alpha[i] = alpha[i] - delta;
2394
+ alpha[j] = alpha[j] + delta;
2395
+ if (sum > 1)
2396
+ {
2397
+ if (alpha[i] > 1)
2398
+ {
2399
+ alpha[i] = 1;
2400
+ alpha[j] = sum - 1;
2401
+ }
2402
+ }
2403
+ else
2404
+ {
2405
+ if (alpha[j] < 0)
2406
+ {
2407
+ alpha[j] = 0;
2408
+ alpha[i] = sum;
2409
+ }
2410
+ }
2411
+ if (sum > 1)
2412
+ {
2413
+ if (alpha[j] > 1)
2414
+ {
2415
+ alpha[j] = 1;
2416
+ alpha[i] = sum - 1;
2417
+ }
2418
+ }
2419
+ else
2420
+ {
2421
+ if (alpha[i] < 0)
2422
+ {
2423
+ alpha[i] = 0;
2424
+ alpha[j] = sum;
2425
+ }
2426
+ }
2427
+ delta = alpha[i] - old_alpha_i;
2428
+ sparse_operator::axpy(delta, xi, w);
2429
+ sparse_operator::axpy(-delta, xj, w);
2430
+ }
2431
+ iter++;
2432
+ if (iter % 10 == 0)
2433
+ info(".");
2434
+ }
2435
+ info("\noptimization finished, #iter = %d\n",iter);
2436
+ if (iter >= max_iter)
2437
+ info("\nWARNING: reaching max number of iterations\n\n");
2438
+
2439
+ // calculate object value
2440
+ double v = 0;
2441
+ for(i=0; i<w_size; i++)
2442
+ v += w[i]*w[i];
2443
+ int nSV = 0;
2444
+ for(i=0; i<l; i++)
2445
+ {
2446
+ if (alpha[i] > 0)
2447
+ ++nSV;
2448
+ }
2449
+ info("Objective value = %lf\n", v/2);
2450
+ info("nSV = %d\n", nSV);
2451
+
2452
+ // calculate rho
2453
+ double nr_free = 0;
2454
+ double ub = INF, lb = -INF, sum_free = 0;
2455
+ for(i=0; i<l; i++)
2456
+ {
2457
+ double G = sparse_operator::dot(w, prob->x[i]);
2458
+ if (alpha[i] == 1)
2459
+ lb = max(lb, G);
2460
+ else if (alpha[i] == 0)
2461
+ ub = min(ub, G);
2462
+ else
2463
+ {
2464
+ ++nr_free;
2465
+ sum_free += G;
2466
+ }
2467
+ }
2468
+
2469
+ if (nr_free > 0)
2470
+ *rho = sum_free/nr_free;
2471
+ else
2472
+ *rho = (ub + lb)/2;
2473
+
2474
+ info("rho = %lf\n", *rho);
2475
+
2476
+ delete [] QD;
2477
+ delete [] G;
2478
+ delete [] index;
2479
+ delete [] alpha;
2480
+ delete [] most_violating_i;
2481
+ delete [] most_violating_j;
2482
+ }
2483
+
2484
+ // transpose matrix X from row format to column format
2485
+ static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2486
+ {
2487
+ int i;
2488
+ int l = prob->l;
2489
+ int n = prob->n;
2490
+ size_t nnz = 0;
2491
+ size_t *col_ptr = new size_t [n+1];
2492
+ feature_node *x_space;
2493
+ prob_col->l = l;
2494
+ prob_col->n = n;
2495
+ prob_col->y = new double[l];
2496
+ prob_col->x = new feature_node*[n];
2497
+
2498
+ for(i=0; i<l; i++)
2499
+ prob_col->y[i] = prob->y[i];
2500
+
2501
+ for(i=0; i<n+1; i++)
2502
+ col_ptr[i] = 0;
2503
+ for(i=0; i<l; i++)
2504
+ {
2505
+ feature_node *x = prob->x[i];
2506
+ while(x->index != -1)
2507
+ {
2508
+ nnz++;
2509
+ col_ptr[x->index]++;
2510
+ x++;
2511
+ }
2512
+ }
2513
+ for(i=1; i<n+1; i++)
2514
+ col_ptr[i] += col_ptr[i-1] + 1;
2515
+
2516
+ x_space = new feature_node[nnz+n];
2517
+ for(i=0; i<n; i++)
2518
+ prob_col->x[i] = &x_space[col_ptr[i]];
2519
+
2520
+ for(i=0; i<l; i++)
2521
+ {
2522
+ feature_node *x = prob->x[i];
2523
+ while(x->index != -1)
2524
+ {
2525
+ int ind = x->index-1;
2526
+ x_space[col_ptr[ind]].index = i+1; // starts from 1
2527
+ x_space[col_ptr[ind]].value = x->value;
2528
+ col_ptr[ind]++;
2529
+ x++;
2530
+ }
2531
+ }
2532
+ for(i=0; i<n; i++)
2533
+ x_space[col_ptr[i]].index = -1;
2534
+
2535
+ *x_space_ret = x_space;
2536
+
2537
+ delete [] col_ptr;
2538
+ }
2539
+
2540
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2541
+ // perm, length l, must be allocated before calling this subroutine
2542
+ static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2543
+ {
2544
+ int l = prob->l;
2545
+ int max_nr_class = 16;
2546
+ int nr_class = 0;
2547
+ int *label = Malloc(int,max_nr_class);
2548
+ int *count = Malloc(int,max_nr_class);
2549
+ int *data_label = Malloc(int,l);
2550
+ int i;
2551
+
2552
+ for(i=0;i<l;i++)
2553
+ {
2554
+ int this_label = (int)prob->y[i];
2555
+ int j;
2556
+ for(j=0;j<nr_class;j++)
2557
+ {
2558
+ if(this_label == label[j])
2559
+ {
2560
+ ++count[j];
2561
+ break;
2562
+ }
2563
+ }
2564
+ data_label[i] = j;
2565
+ if(j == nr_class)
2566
+ {
2567
+ if(nr_class == max_nr_class)
2568
+ {
2569
+ max_nr_class *= 2;
2570
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
2571
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
2572
+ }
2573
+ label[nr_class] = this_label;
2574
+ count[nr_class] = 1;
2575
+ ++nr_class;
2576
+ }
2577
+ }
2578
+
2579
+ //
2580
+ // Labels are ordered by their first occurrence in the training set.
2581
+ // However, for two-class sets with -1/+1 labels and -1 appears first,
2582
+ // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2583
+ //
2584
+ if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2585
+ {
2586
+ swap(label[0],label[1]);
2587
+ swap(count[0],count[1]);
2588
+ for(i=0;i<l;i++)
2589
+ {
2590
+ if(data_label[i] == 0)
2591
+ data_label[i] = 1;
2592
+ else
2593
+ data_label[i] = 0;
2594
+ }
2595
+ }
2596
+
2597
+ int *start = Malloc(int,nr_class);
2598
+ start[0] = 0;
2599
+ for(i=1;i<nr_class;i++)
2600
+ start[i] = start[i-1]+count[i-1];
2601
+ for(i=0;i<l;i++)
2602
+ {
2603
+ perm[start[data_label[i]]] = i;
2604
+ ++start[data_label[i]];
2605
+ }
2606
+ start[0] = 0;
2607
+ for(i=1;i<nr_class;i++)
2608
+ start[i] = start[i-1]+count[i-1];
2609
+
2610
+ *nr_class_ret = nr_class;
2611
+ *label_ret = label;
2612
+ *start_ret = start;
2613
+ *count_ret = count;
2614
+ free(data_label);
2615
+ }
2616
+
2617
+ static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2618
+ {
2619
+ double eps = param->eps;
2620
+
2621
+ int pos = 0;
2622
+ int neg = 0;
2623
+ for(int i=0;i<prob->l;i++)
2624
+ if(prob->y[i] > 0)
2625
+ pos++;
2626
+ neg = prob->l - pos;
2627
+ double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2628
+
2629
+ function *fun_obj=NULL;
2630
+ switch(param->solver_type)
2631
+ {
2632
+ case L2R_LR:
2633
+ {
2634
+ double *C = new double[prob->l];
2635
+ for(int i = 0; i < prob->l; i++)
2636
+ {
2637
+ if(prob->y[i] > 0)
2638
+ C[i] = Cp;
2639
+ else
2640
+ C[i] = Cn;
2641
+ }
2642
+ fun_obj=new l2r_lr_fun(prob, param, C);
2643
+ NEWTON newton_obj(fun_obj, primal_solver_tol);
2644
+ newton_obj.set_print_string(liblinear_print_string);
2645
+ newton_obj.newton(w);
2646
+ delete fun_obj;
2647
+ delete[] C;
2648
+ break;
2649
+ }
2650
+ case L2R_L2LOSS_SVC:
2651
+ {
2652
+ double *C = new double[prob->l];
2653
+ for(int i = 0; i < prob->l; i++)
2654
+ {
2655
+ if(prob->y[i] > 0)
2656
+ C[i] = Cp;
2657
+ else
2658
+ C[i] = Cn;
2659
+ }
2660
+ fun_obj=new l2r_l2_svc_fun(prob, param, C);
2661
+ NEWTON newton_obj(fun_obj, primal_solver_tol);
2662
+ newton_obj.set_print_string(liblinear_print_string);
2663
+ newton_obj.newton(w);
2664
+ delete fun_obj;
2665
+ delete[] C;
2666
+ break;
2667
+ }
2668
+ case L2R_L2LOSS_SVC_DUAL:
2669
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2670
+ break;
2671
+ case L2R_L1LOSS_SVC_DUAL:
2672
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2673
+ break;
2674
+ case L1R_L2LOSS_SVC:
2675
+ {
2676
+ problem prob_col;
2677
+ feature_node *x_space = NULL;
2678
+ transpose(prob, &x_space ,&prob_col);
2679
+ solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2680
+ delete [] prob_col.y;
2681
+ delete [] prob_col.x;
2682
+ delete [] x_space;
2683
+ break;
2684
+ }
2685
+ case L1R_LR:
2686
+ {
2687
+ problem prob_col;
2688
+ feature_node *x_space = NULL;
2689
+ transpose(prob, &x_space ,&prob_col);
2690
+ solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2691
+ delete [] prob_col.y;
2692
+ delete [] prob_col.x;
2693
+ delete [] x_space;
2694
+ break;
2695
+ }
2696
+ case L2R_LR_DUAL:
2697
+ solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2698
+ break;
2699
+ case L2R_L2LOSS_SVR:
2700
+ {
2701
+ double *C = new double[prob->l];
2702
+ for(int i = 0; i < prob->l; i++)
2703
+ C[i] = param->C;
2704
+
2705
+ fun_obj=new l2r_l2_svr_fun(prob, param, C);
2706
+ NEWTON newton_obj(fun_obj, param->eps);
2707
+ newton_obj.set_print_string(liblinear_print_string);
2708
+ newton_obj.newton(w);
2709
+ delete fun_obj;
2710
+ delete[] C;
2711
+ break;
2712
+
2713
+ }
2714
+ case L2R_L1LOSS_SVR_DUAL:
2715
+ solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2716
+ break;
2717
+ case L2R_L2LOSS_SVR_DUAL:
2718
+ solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2719
+ break;
2720
+ default:
2721
+ fprintf(stderr, "ERROR: unknown solver_type\n");
2722
+ break;
2723
+ }
2724
+ }
2725
+
2726
+ // Calculate the initial C for parameter selection
2727
+ static double calc_start_C(const problem *prob, const parameter *param)
2728
+ {
2729
+ int i;
2730
+ double xTx, max_xTx;
2731
+ max_xTx = 0;
2732
+ for(i=0; i<prob->l; i++)
2733
+ {
2734
+ xTx = 0;
2735
+ feature_node *xi=prob->x[i];
2736
+ while(xi->index != -1)
2737
+ {
2738
+ double val = xi->value;
2739
+ xTx += val*val;
2740
+ xi++;
2741
+ }
2742
+ if(xTx > max_xTx)
2743
+ max_xTx = xTx;
2744
+ }
2745
+
2746
+ double min_C = 1.0;
2747
+ if(param->solver_type == L2R_LR)
2748
+ min_C = 1.0 / (prob->l * max_xTx);
2749
+ else if(param->solver_type == L2R_L2LOSS_SVC)
2750
+ min_C = 1.0 / (2 * prob->l * max_xTx);
2751
+ else if(param->solver_type == L2R_L2LOSS_SVR)
2752
+ {
2753
+ double sum_y, loss, y_abs;
2754
+ double delta2 = 0.1;
2755
+ sum_y = 0, loss = 0;
2756
+ for(i=0; i<prob->l; i++)
2757
+ {
2758
+ y_abs = fabs(prob->y[i]);
2759
+ sum_y += y_abs;
2760
+ loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
2761
+ }
2762
+ if(loss > 0)
2763
+ min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
2764
+ else
2765
+ min_C = INF;
2766
+ }
2767
+
2768
+ return pow( 2, floor(log(min_C) / log(2.0)) );
2769
+ }
2770
+
2771
+ static double calc_max_p(const problem *prob, const parameter *param)
2772
+ {
2773
+ int i;
2774
+ double max_p = 0.0;
2775
+ for(i = 0; i < prob->l; i++)
2776
+ max_p = max(max_p, fabs(prob->y[i]));
2777
+
2778
+ return max_p;
2779
+ }
2780
+
2781
+ static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
2782
+ {
2783
+ // variables for CV
2784
+ int i;
2785
+ double *target = Malloc(double, prob->l);
2786
+
2787
+ // variables for warm start
2788
+ double ratio = 2;
2789
+ double **prev_w = Malloc(double*, nr_fold);
2790
+ for(i = 0; i < nr_fold; i++)
2791
+ prev_w[i] = NULL;
2792
+ int num_unchanged_w = 0;
2793
+ void (*default_print_string) (const char *) = liblinear_print_string;
2794
+
2795
+ if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2796
+ *best_score = 0.0;
2797
+ else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2798
+ *best_score = INF;
2799
+ *best_C = start_C;
2800
+
2801
+ param_tmp->C = start_C;
2802
+ while(param_tmp->C <= max_C)
2803
+ {
2804
+ //Output disabled for running CV at a particular C
2805
+ set_print_string_function(&print_null);
2806
+
2807
+ for(i=0; i<nr_fold; i++)
2808
+ {
2809
+ int j;
2810
+ int begin = fold_start[i];
2811
+ int end = fold_start[i+1];
2812
+
2813
+ param_tmp->init_sol = prev_w[i];
2814
+ struct model *submodel = train(&subprob[i],param_tmp);
2815
+
2816
+ int total_w_size;
2817
+ if(submodel->nr_class == 2)
2818
+ total_w_size = subprob[i].n;
2819
+ else
2820
+ total_w_size = subprob[i].n * submodel->nr_class;
2821
+
2822
+ if(prev_w[i] == NULL)
2823
+ {
2824
+ prev_w[i] = Malloc(double, total_w_size);
2825
+ for(j=0; j<total_w_size; j++)
2826
+ prev_w[i][j] = submodel->w[j];
2827
+ }
2828
+ else if(num_unchanged_w >= 0)
2829
+ {
2830
+ double norm_w_diff = 0;
2831
+ for(j=0; j<total_w_size; j++)
2832
+ {
2833
+ norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2834
+ prev_w[i][j] = submodel->w[j];
2835
+ }
2836
+ norm_w_diff = sqrt(norm_w_diff);
2837
+
2838
+ if(norm_w_diff > 1e-15)
2839
+ num_unchanged_w = -1;
2840
+ }
2841
+ else
2842
+ {
2843
+ for(j=0; j<total_w_size; j++)
2844
+ prev_w[i][j] = submodel->w[j];
2845
+ }
2846
+
2847
+ for(j=begin; j<end; j++)
2848
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2849
+
2850
+ free_and_destroy_model(&submodel);
2851
+ }
2852
+ set_print_string_function(default_print_string);
2853
+
2854
+ if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2855
+ {
2856
+ int total_correct = 0;
2857
+ for(i=0; i<prob->l; i++)
2858
+ if(target[i] == prob->y[i])
2859
+ ++total_correct;
2860
+ double current_rate = (double)total_correct/prob->l;
2861
+ if(current_rate > *best_score)
2862
+ {
2863
+ *best_C = param_tmp->C;
2864
+ *best_score = current_rate;
2865
+ }
2866
+
2867
+ info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
2868
+ }
2869
+ else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2870
+ {
2871
+ double total_error = 0.0;
2872
+ for(i=0; i<prob->l; i++)
2873
+ {
2874
+ double y = prob->y[i];
2875
+ double v = target[i];
2876
+ total_error += (v-y)*(v-y);
2877
+ }
2878
+ double current_error = total_error/prob->l;
2879
+ if(current_error < *best_score)
2880
+ {
2881
+ *best_C = param_tmp->C;
2882
+ *best_score = current_error;
2883
+ }
2884
+
2885
+ info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
2886
+ }
2887
+
2888
+ num_unchanged_w++;
2889
+ if(num_unchanged_w == 5)
2890
+ break;
2891
+ param_tmp->C = param_tmp->C*ratio;
2892
+ }
2893
+
2894
+ if(param_tmp->C > max_C)
2895
+ info("WARNING: maximum C reached.\n");
2896
+ free(target);
2897
+ for(i=0; i<nr_fold; i++)
2898
+ free(prev_w[i]);
2899
+ free(prev_w);
2900
+ }
2901
+
2902
+
2903
+ //
2904
+ // Interface functions
2905
+ //
2906
+ model* train(const problem *prob, const parameter *param)
2907
+ {
2908
+ int i,j;
2909
+ int l = prob->l;
2910
+ int n = prob->n;
2911
+ int w_size = prob->n;
2912
+ model *model_ = Malloc(model,1);
2913
+
2914
+ if(prob->bias>=0)
2915
+ model_->nr_feature=n-1;
2916
+ else
2917
+ model_->nr_feature=n;
2918
+ model_->param = *param;
2919
+ model_->bias = prob->bias;
2920
+
2921
+ if(check_regression_model(model_))
2922
+ {
2923
+ model_->w = Malloc(double, w_size);
2924
+
2925
+ if(param->init_sol != NULL)
2926
+ for(i=0;i<w_size;i++)
2927
+ model_->w[i] = param->init_sol[i];
2928
+ else
2929
+ for(i=0;i<w_size;i++)
2930
+ model_->w[i] = 0;
2931
+
2932
+ model_->nr_class = 2;
2933
+ model_->label = NULL;
2934
+ train_one(prob, param, model_->w, 0, 0);
2935
+ }
2936
+ else if(check_oneclass_model(model_))
2937
+ {
2938
+ model_->w = Malloc(double, w_size);
2939
+ model_->nr_class = 2;
2940
+ model_->label = NULL;
2941
+ solve_oneclass_svm(prob, model_->w, &(model_->rho), param->eps, param->nu);
2942
+ }
2943
+ else
2944
+ {
2945
+ int nr_class;
2946
+ int *label = NULL;
2947
+ int *start = NULL;
2948
+ int *count = NULL;
2949
+ int *perm = Malloc(int,l);
2950
+
2951
+ // group training data of the same class
2952
+ group_classes(prob,&nr_class,&label,&start,&count,perm);
2953
+
2954
+ model_->nr_class=nr_class;
2955
+ model_->label = Malloc(int,nr_class);
2956
+ for(i=0;i<nr_class;i++)
2957
+ model_->label[i] = label[i];
2958
+
2959
+ // calculate weighted C
2960
+ double *weighted_C = Malloc(double, nr_class);
2961
+ for(i=0;i<nr_class;i++)
2962
+ weighted_C[i] = param->C;
2963
+ for(i=0;i<param->nr_weight;i++)
2964
+ {
2965
+ for(j=0;j<nr_class;j++)
2966
+ if(param->weight_label[i] == label[j])
2967
+ break;
2968
+ if(j == nr_class)
2969
+ fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2970
+ else
2971
+ weighted_C[j] *= param->weight[i];
2972
+ }
2973
+
2974
+ // constructing the subproblem
2975
+ feature_node **x = Malloc(feature_node *,l);
2976
+ for(i=0;i<l;i++)
2977
+ x[i] = prob->x[perm[i]];
2978
+
2979
+ int k;
2980
+ problem sub_prob;
2981
+ sub_prob.l = l;
2982
+ sub_prob.n = n;
2983
+ sub_prob.x = Malloc(feature_node *,sub_prob.l);
2984
+ sub_prob.y = Malloc(double,sub_prob.l);
2985
+
2986
+ for(k=0; k<sub_prob.l; k++)
2987
+ sub_prob.x[k] = x[k];
2988
+
2989
+ // multi-class svm by Crammer and Singer
2990
+ if(param->solver_type == MCSVM_CS)
2991
+ {
2992
+ model_->w=Malloc(double, n*nr_class);
2993
+ for(i=0;i<nr_class;i++)
2994
+ for(j=start[i];j<start[i]+count[i];j++)
2995
+ sub_prob.y[j] = i;
2996
+ Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2997
+ Solver.Solve(model_->w);
2998
+ }
2999
+ else
3000
+ {
3001
+ if(nr_class == 2)
3002
+ {
3003
+ model_->w=Malloc(double, w_size);
3004
+
3005
+ int e0 = start[0]+count[0];
3006
+ k=0;
3007
+ for(; k<e0; k++)
3008
+ sub_prob.y[k] = +1;
3009
+ for(; k<sub_prob.l; k++)
3010
+ sub_prob.y[k] = -1;
3011
+
3012
+ if(param->init_sol != NULL)
3013
+ for(i=0;i<w_size;i++)
3014
+ model_->w[i] = param->init_sol[i];
3015
+ else
3016
+ for(i=0;i<w_size;i++)
3017
+ model_->w[i] = 0;
3018
+
3019
+ train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
3020
+ }
3021
+ else
3022
+ {
3023
+ model_->w=Malloc(double, w_size*nr_class);
3024
+ double *w=Malloc(double, w_size);
3025
+ for(i=0;i<nr_class;i++)
3026
+ {
3027
+ int si = start[i];
3028
+ int ei = si+count[i];
3029
+
3030
+ k=0;
3031
+ for(; k<si; k++)
3032
+ sub_prob.y[k] = -1;
3033
+ for(; k<ei; k++)
3034
+ sub_prob.y[k] = +1;
3035
+ for(; k<sub_prob.l; k++)
3036
+ sub_prob.y[k] = -1;
3037
+
3038
+ if(param->init_sol != NULL)
3039
+ for(j=0;j<w_size;j++)
3040
+ w[j] = param->init_sol[j*nr_class+i];
3041
+ else
3042
+ for(j=0;j<w_size;j++)
3043
+ w[j] = 0;
3044
+
3045
+ train_one(&sub_prob, param, w, weighted_C[i], param->C);
3046
+
3047
+ for(j=0;j<w_size;j++)
3048
+ model_->w[j*nr_class+i] = w[j];
3049
+ }
3050
+ free(w);
3051
+ }
3052
+
3053
+ }
3054
+
3055
+ free(x);
3056
+ free(label);
3057
+ free(start);
3058
+ free(count);
3059
+ free(perm);
3060
+ free(sub_prob.x);
3061
+ free(sub_prob.y);
3062
+ free(weighted_C);
3063
+ }
3064
+ return model_;
3065
+ }
3066
+
3067
+ void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
3068
+ {
3069
+ int i;
3070
+ int *fold_start;
3071
+ int l = prob->l;
3072
+ int *perm = Malloc(int,l);
3073
+ if (nr_fold > l)
3074
+ {
3075
+ nr_fold = l;
3076
+ fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3077
+ }
3078
+ fold_start = Malloc(int,nr_fold+1);
3079
+ for(i=0;i<l;i++) perm[i]=i;
3080
+ for(i=0;i<l;i++)
3081
+ {
3082
+ int j = i+rand()%(l-i);
3083
+ swap(perm[i],perm[j]);
3084
+ }
3085
+ for(i=0;i<=nr_fold;i++)
3086
+ fold_start[i]=i*l/nr_fold;
3087
+
3088
+ for(i=0;i<nr_fold;i++)
3089
+ {
3090
+ int begin = fold_start[i];
3091
+ int end = fold_start[i+1];
3092
+ int j,k;
3093
+ struct problem subprob;
3094
+
3095
+ subprob.bias = prob->bias;
3096
+ subprob.n = prob->n;
3097
+ subprob.l = l-(end-begin);
3098
+ subprob.x = Malloc(struct feature_node*,subprob.l);
3099
+ subprob.y = Malloc(double,subprob.l);
3100
+
3101
+ k=0;
3102
+ for(j=0;j<begin;j++)
3103
+ {
3104
+ subprob.x[k] = prob->x[perm[j]];
3105
+ subprob.y[k] = prob->y[perm[j]];
3106
+ ++k;
3107
+ }
3108
+ for(j=end;j<l;j++)
3109
+ {
3110
+ subprob.x[k] = prob->x[perm[j]];
3111
+ subprob.y[k] = prob->y[perm[j]];
3112
+ ++k;
3113
+ }
3114
+ struct model *submodel = train(&subprob,param);
3115
+ for(j=begin;j<end;j++)
3116
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
3117
+ free_and_destroy_model(&submodel);
3118
+ free(subprob.x);
3119
+ free(subprob.y);
3120
+ }
3121
+ free(fold_start);
3122
+ free(perm);
3123
+ }
3124
+
3125
+
3126
+ void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
3127
+ {
3128
+ // prepare CV folds
3129
+
3130
+ int i;
3131
+ int *fold_start;
3132
+ int l = prob->l;
3133
+ int *perm = Malloc(int, l);
3134
+ struct problem *subprob = Malloc(problem,nr_fold);
3135
+
3136
+ if (nr_fold > l)
3137
+ {
3138
+ nr_fold = l;
3139
+ fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3140
+ }
3141
+ fold_start = Malloc(int,nr_fold+1);
3142
+ for(i=0;i<l;i++) perm[i]=i;
3143
+ for(i=0;i<l;i++)
3144
+ {
3145
+ int j = i+rand()%(l-i);
3146
+ swap(perm[i],perm[j]);
3147
+ }
3148
+ for(i=0;i<=nr_fold;i++)
3149
+ fold_start[i]=i*l/nr_fold;
3150
+
3151
+ for(i=0;i<nr_fold;i++)
3152
+ {
3153
+ int begin = fold_start[i];
3154
+ int end = fold_start[i+1];
3155
+ int j,k;
3156
+
3157
+ subprob[i].bias = prob->bias;
3158
+ subprob[i].n = prob->n;
3159
+ subprob[i].l = l-(end-begin);
3160
+ subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
3161
+ subprob[i].y = Malloc(double,subprob[i].l);
3162
+
3163
+ k=0;
3164
+ for(j=0;j<begin;j++)
3165
+ {
3166
+ subprob[i].x[k] = prob->x[perm[j]];
3167
+ subprob[i].y[k] = prob->y[perm[j]];
3168
+ ++k;
3169
+ }
3170
+ for(j=end;j<l;j++)
3171
+ {
3172
+ subprob[i].x[k] = prob->x[perm[j]];
3173
+ subprob[i].y[k] = prob->y[perm[j]];
3174
+ ++k;
3175
+ }
3176
+
3177
+ }
3178
+
3179
+ struct parameter param_tmp = *param;
3180
+ *best_p = -1;
3181
+ if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
3182
+ {
3183
+ if(start_C <= 0)
3184
+ start_C = calc_start_C(prob, &param_tmp);
3185
+ double max_C = 1024;
3186
+ start_C = min(start_C, max_C);
3187
+ double best_C_tmp, best_score_tmp;
3188
+
3189
+ find_parameter_C(prob, &param_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3190
+
3191
+ *best_C = best_C_tmp;
3192
+ *best_score = best_score_tmp;
3193
+ }
3194
+ else if(param->solver_type == L2R_L2LOSS_SVR)
3195
+ {
3196
+ double max_p = calc_max_p(prob, &param_tmp);
3197
+ int num_p_steps = 20;
3198
+ double max_C = 1048576;
3199
+ *best_score = INF;
3200
+
3201
+ i = num_p_steps-1;
3202
+ if(start_p > 0)
3203
+ i = min((int)(start_p/(max_p/num_p_steps)), i);
3204
+ for(; i >= 0; i--)
3205
+ {
3206
+ param_tmp.p = i*max_p/num_p_steps;
3207
+ double start_C_tmp;
3208
+ if(start_C <= 0)
3209
+ start_C_tmp = calc_start_C(prob, &param_tmp);
3210
+ else
3211
+ start_C_tmp = start_C;
3212
+ start_C_tmp = min(start_C_tmp, max_C);
3213
+ double best_C_tmp, best_score_tmp;
3214
+
3215
+ find_parameter_C(prob, &param_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3216
+
3217
+ if(best_score_tmp < *best_score)
3218
+ {
3219
+ *best_p = param_tmp.p;
3220
+ *best_C = best_C_tmp;
3221
+ *best_score = best_score_tmp;
3222
+ }
3223
+ }
3224
+ }
3225
+
3226
+ free(fold_start);
3227
+ free(perm);
3228
+ for(i=0; i<nr_fold; i++)
3229
+ {
3230
+ free(subprob[i].x);
3231
+ free(subprob[i].y);
3232
+ }
3233
+ free(subprob);
3234
+ }
3235
+
3236
+ double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
3237
+ {
3238
+ int idx;
3239
+ int n;
3240
+ if(model_->bias>=0)
3241
+ n=model_->nr_feature+1;
3242
+ else
3243
+ n=model_->nr_feature;
3244
+ double *w=model_->w;
3245
+ int nr_class=model_->nr_class;
3246
+ int i;
3247
+ int nr_w;
3248
+ if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
3249
+ nr_w = 1;
3250
+ else
3251
+ nr_w = nr_class;
3252
+
3253
+ const feature_node *lx=x;
3254
+ for(i=0;i<nr_w;i++)
3255
+ dec_values[i] = 0;
3256
+ for(; (idx=lx->index)!=-1; lx++)
3257
+ {
3258
+ // the dimension of testing data may exceed that of training
3259
+ if(idx<=n)
3260
+ for(i=0;i<nr_w;i++)
3261
+ dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
3262
+ }
3263
+ if(check_oneclass_model(model_))
3264
+ dec_values[0] -= model_->rho;
3265
+
3266
+ if(nr_class==2)
3267
+ {
3268
+ if(check_regression_model(model_))
3269
+ return dec_values[0];
3270
+ else if(check_oneclass_model(model_))
3271
+ return (dec_values[0]>0)?1:-1;
3272
+ else
3273
+ return (dec_values[0]>0)?model_->label[0]:model_->label[1];
3274
+ }
3275
+ else
3276
+ {
3277
+ int dec_max_idx = 0;
3278
+ for(i=1;i<nr_class;i++)
3279
+ {
3280
+ if(dec_values[i] > dec_values[dec_max_idx])
3281
+ dec_max_idx = i;
3282
+ }
3283
+ return model_->label[dec_max_idx];
3284
+ }
3285
+ }
3286
+
3287
+ double predict(const model *model_, const feature_node *x)
3288
+ {
3289
+ double *dec_values = Malloc(double, model_->nr_class);
3290
+ double label=predict_values(model_, x, dec_values);
3291
+ free(dec_values);
3292
+ return label;
3293
+ }
3294
+
3295
+ double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
3296
+ {
3297
+ if(check_probability_model(model_))
3298
+ {
3299
+ int i;
3300
+ int nr_class=model_->nr_class;
3301
+ int nr_w;
3302
+ if(nr_class==2)
3303
+ nr_w = 1;
3304
+ else
3305
+ nr_w = nr_class;
3306
+
3307
+ double label=predict_values(model_, x, prob_estimates);
3308
+ for(i=0;i<nr_w;i++)
3309
+ prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
3310
+
3311
+ if(nr_class==2) // for binary classification
3312
+ prob_estimates[1]=1.-prob_estimates[0];
3313
+ else
3314
+ {
3315
+ double sum=0;
3316
+ for(i=0; i<nr_class; i++)
3317
+ sum+=prob_estimates[i];
3318
+
3319
+ for(i=0; i<nr_class; i++)
3320
+ prob_estimates[i]=prob_estimates[i]/sum;
3321
+ }
3322
+
3323
+ return label;
3324
+ }
3325
+ else
3326
+ return 0;
3327
+ }
3328
+
3329
+ static const char *solver_type_table[]=
3330
+ {
3331
+ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
3332
+ "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
3333
+ "", "", "",
3334
+ "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL",
3335
+ "", "", "", "", "", "", "",
3336
+ "ONECLASS_SVM", NULL
3337
+ };
3338
+
3339
+ int save_model(const char *model_file_name, const struct model *model_)
3340
+ {
3341
+ int i;
3342
+ int nr_feature=model_->nr_feature;
3343
+ int n;
3344
+ const parameter& param = model_->param;
3345
+
3346
+ if(model_->bias>=0)
3347
+ n=nr_feature+1;
3348
+ else
3349
+ n=nr_feature;
3350
+ int w_size = n;
3351
+ FILE *fp = fopen(model_file_name,"w");
3352
+ if(fp==NULL) return -1;
3353
+
3354
+ char *old_locale = setlocale(LC_ALL, NULL);
3355
+ if (old_locale)
3356
+ {
3357
+ old_locale = strdup(old_locale);
3358
+ }
3359
+ setlocale(LC_ALL, "C");
3360
+
3361
+ int nr_w;
3362
+ if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
3363
+ nr_w=1;
3364
+ else
3365
+ nr_w=model_->nr_class;
3366
+
3367
+ fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
3368
+ fprintf(fp, "nr_class %d\n", model_->nr_class);
3369
+
3370
+ if(model_->label)
3371
+ {
3372
+ fprintf(fp, "label");
3373
+ for(i=0; i<model_->nr_class; i++)
3374
+ fprintf(fp, " %d", model_->label[i]);
3375
+ fprintf(fp, "\n");
3376
+ }
3377
+
3378
+ fprintf(fp, "nr_feature %d\n", nr_feature);
3379
+
3380
+ fprintf(fp, "bias %.17g\n", model_->bias);
3381
+
3382
+ if(check_oneclass_model(model_))
3383
+ fprintf(fp, "rho %.17g\n", model_->rho);
3384
+
3385
+ fprintf(fp, "w\n");
3386
+ for(i=0; i<w_size; i++)
3387
+ {
3388
+ int j;
3389
+ for(j=0; j<nr_w; j++)
3390
+ fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
3391
+ fprintf(fp, "\n");
3392
+ }
3393
+
3394
+ setlocale(LC_ALL, old_locale);
3395
+ free(old_locale);
3396
+
3397
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
3398
+ else return 0;
3399
+ }
3400
+
3401
+ //
3402
+ // FSCANF helps to handle fscanf failures.
3403
+ // Its do-while block avoids the ambiguity when
3404
+ // if (...)
3405
+ // FSCANF();
3406
+ // is used
3407
+ //
3408
+ #define FSCANF(_stream, _format, _var)do\
3409
+ {\
3410
+ if (fscanf(_stream, _format, _var) != 1)\
3411
+ {\
3412
+ fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
3413
+ EXIT_LOAD_MODEL()\
3414
+ }\
3415
+ }while(0)
3416
+ // EXIT_LOAD_MODEL should NOT end with a semicolon.
3417
+ #define EXIT_LOAD_MODEL()\
3418
+ {\
3419
+ setlocale(LC_ALL, old_locale);\
3420
+ free(model_->label);\
3421
+ free(model_);\
3422
+ free(old_locale);\
3423
+ return NULL;\
3424
+ }
3425
+ struct model *load_model(const char *model_file_name)
3426
+ {
3427
+ FILE *fp = fopen(model_file_name,"r");
3428
+ if(fp==NULL) return NULL;
3429
+
3430
+ int i;
3431
+ int nr_feature;
3432
+ int n;
3433
+ int nr_class;
3434
+ double bias;
3435
+ double rho;
3436
+ model *model_ = Malloc(model,1);
3437
+ parameter& param = model_->param;
3438
+ // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
3439
+ param.nr_weight = 0;
3440
+ param.weight_label = NULL;
3441
+ param.weight = NULL;
3442
+ param.init_sol = NULL;
3443
+
3444
+ model_->label = NULL;
3445
+
3446
+ char *old_locale = setlocale(LC_ALL, NULL);
3447
+ if (old_locale)
3448
+ {
3449
+ old_locale = strdup(old_locale);
3450
+ }
3451
+ setlocale(LC_ALL, "C");
3452
+
3453
+ char cmd[81];
3454
+ while(1)
3455
+ {
3456
+ FSCANF(fp,"%80s",cmd);
3457
+ if(strcmp(cmd,"solver_type")==0)
3458
+ {
3459
+ FSCANF(fp,"%80s",cmd);
3460
+ int i;
3461
+ for(i=0;solver_type_table[i];i++)
3462
+ {
3463
+ if(strcmp(solver_type_table[i],cmd)==0)
3464
+ {
3465
+ param.solver_type=i;
3466
+ break;
3467
+ }
3468
+ }
3469
+ if(solver_type_table[i] == NULL)
3470
+ {
3471
+ fprintf(stderr,"unknown solver type.\n");
3472
+ EXIT_LOAD_MODEL()
3473
+ }
3474
+ }
3475
+ else if(strcmp(cmd,"nr_class")==0)
3476
+ {
3477
+ FSCANF(fp,"%d",&nr_class);
3478
+ model_->nr_class=nr_class;
3479
+ }
3480
+ else if(strcmp(cmd,"nr_feature")==0)
3481
+ {
3482
+ FSCANF(fp,"%d",&nr_feature);
3483
+ model_->nr_feature=nr_feature;
3484
+ }
3485
+ else if(strcmp(cmd,"bias")==0)
3486
+ {
3487
+ FSCANF(fp,"%lf",&bias);
3488
+ model_->bias=bias;
3489
+ }
3490
+ else if(strcmp(cmd,"rho")==0)
3491
+ {
3492
+ FSCANF(fp,"%lf",&rho);
3493
+ model_->rho=rho;
3494
+ }
3495
+ else if(strcmp(cmd,"w")==0)
3496
+ {
3497
+ break;
3498
+ }
3499
+ else if(strcmp(cmd,"label")==0)
3500
+ {
3501
+ int nr_class = model_->nr_class;
3502
+ model_->label = Malloc(int,nr_class);
3503
+ for(int i=0;i<nr_class;i++)
3504
+ FSCANF(fp,"%d",&model_->label[i]);
3505
+ }
3506
+ else
3507
+ {
3508
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
3509
+ EXIT_LOAD_MODEL()
3510
+ }
3511
+ }
3512
+
3513
+ nr_feature=model_->nr_feature;
3514
+ if(model_->bias>=0)
3515
+ n=nr_feature+1;
3516
+ else
3517
+ n=nr_feature;
3518
+ int w_size = n;
3519
+ int nr_w;
3520
+ if(nr_class==2 && param.solver_type != MCSVM_CS)
3521
+ nr_w = 1;
3522
+ else
3523
+ nr_w = nr_class;
3524
+
3525
+ model_->w=Malloc(double, w_size*nr_w);
3526
+ for(i=0; i<w_size; i++)
3527
+ {
3528
+ int j;
3529
+ for(j=0; j<nr_w; j++)
3530
+ FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
3531
+ }
3532
+
3533
+ setlocale(LC_ALL, old_locale);
3534
+ free(old_locale);
3535
+
3536
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
3537
+
3538
+ return model_;
3539
+ }
3540
+
3541
+ int get_nr_feature(const model *model_)
3542
+ {
3543
+ return model_->nr_feature;
3544
+ }
3545
+
3546
+ int get_nr_class(const model *model_)
3547
+ {
3548
+ return model_->nr_class;
3549
+ }
3550
+
3551
+ void get_labels(const model *model_, int* label)
3552
+ {
3553
+ if (model_->label != NULL)
3554
+ for(int i=0;i<model_->nr_class;i++)
3555
+ label[i] = model_->label[i];
3556
+ }
3557
+
3558
+ // use inline here for better performance (around 20% faster than the non-inline one)
3559
+ static inline double get_w_value(const struct model *model_, int idx, int label_idx)
3560
+ {
3561
+ int nr_class = model_->nr_class;
3562
+ int solver_type = model_->param.solver_type;
3563
+ const double *w = model_->w;
3564
+
3565
+ if(idx < 0 || idx > model_->nr_feature)
3566
+ return 0;
3567
+ if(check_regression_model(model_) || check_oneclass_model(model_))
3568
+ return w[idx];
3569
+ else
3570
+ {
3571
+ if(label_idx < 0 || label_idx >= nr_class)
3572
+ return 0;
3573
+ if(nr_class == 2 && solver_type != MCSVM_CS)
3574
+ {
3575
+ if(label_idx == 0)
3576
+ return w[idx];
3577
+ else
3578
+ return -w[idx];
3579
+ }
3580
+ else
3581
+ return w[idx*nr_class+label_idx];
3582
+ }
3583
+ }
3584
+
3585
+ // feat_idx: starting from 1 to nr_feature
3586
+ // label_idx: starting from 0 to nr_class-1 for classification models;
3587
+ // for regression and one-class SVM models, label_idx is
3588
+ // ignored.
3589
+ double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3590
+ {
3591
+ if(feat_idx > model_->nr_feature)
3592
+ return 0;
3593
+ return get_w_value(model_, feat_idx-1, label_idx);
3594
+ }
3595
+
3596
+ double get_decfun_bias(const struct model *model_, int label_idx)
3597
+ {
3598
+ if(check_oneclass_model(model_))
3599
+ {
3600
+ fprintf(stderr, "ERROR: get_decfun_bias can not be called for a one-class SVM model\n");
3601
+ return 0;
3602
+ }
3603
+ int bias_idx = model_->nr_feature;
3604
+ double bias = model_->bias;
3605
+ if(bias <= 0)
3606
+ return 0;
3607
+ else
3608
+ return bias*get_w_value(model_, bias_idx, label_idx);
3609
+ }
3610
+
3611
+ double get_decfun_rho(const struct model *model_)
3612
+ {
3613
+ if(check_oneclass_model(model_))
3614
+ return model_->rho;
3615
+ else
3616
+ {
3617
+ fprintf(stderr, "ERROR: get_decfun_rho can be called only for a one-class SVM model\n");
3618
+ return 0;
3619
+ }
3620
+ }
3621
+
3622
+ void free_model_content(struct model *model_ptr)
3623
+ {
3624
+ if(model_ptr->w != NULL)
3625
+ free(model_ptr->w);
3626
+ if(model_ptr->label != NULL)
3627
+ free(model_ptr->label);
3628
+ }
3629
+
3630
+ void free_and_destroy_model(struct model **model_ptr_ptr)
3631
+ {
3632
+ struct model *model_ptr = *model_ptr_ptr;
3633
+ if(model_ptr != NULL)
3634
+ {
3635
+ free_model_content(model_ptr);
3636
+ free(model_ptr);
3637
+ }
3638
+ }
3639
+
3640
+ void destroy_param(parameter* param)
3641
+ {
3642
+ if(param->weight_label != NULL)
3643
+ free(param->weight_label);
3644
+ if(param->weight != NULL)
3645
+ free(param->weight);
3646
+ if(param->init_sol != NULL)
3647
+ free(param->init_sol);
3648
+ }
3649
+
3650
+ const char *check_parameter(const problem *prob, const parameter *param)
3651
+ {
3652
+ if(param->eps <= 0)
3653
+ return "eps <= 0";
3654
+
3655
+ if(param->C <= 0)
3656
+ return "C <= 0";
3657
+
3658
+ if(param->p < 0)
3659
+ return "p < 0";
3660
+
3661
+ if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM)
3662
+ return "prob->bias >=0, but this is ignored in ONECLASS_SVM";
3663
+
3664
+ if(param->regularize_bias == 0)
3665
+ {
3666
+ if(prob->bias != 1.0)
3667
+ return "To not regularize bias, must specify -B 1 along with -R";
3668
+ if(param->solver_type != L2R_LR
3669
+ && param->solver_type != L2R_L2LOSS_SVC
3670
+ && param->solver_type != L1R_L2LOSS_SVC
3671
+ && param->solver_type != L1R_LR
3672
+ && param->solver_type != L2R_L2LOSS_SVR)
3673
+ return "-R option supported only for solver L2R_LR, L2R_L2LOSS_SVC, L1R_L2LOSS_SVC, L1R_LR, and L2R_L2LOSS_SVR";
3674
+ }
3675
+
3676
+ if(param->solver_type != L2R_LR
3677
+ && param->solver_type != L2R_L2LOSS_SVC_DUAL
3678
+ && param->solver_type != L2R_L2LOSS_SVC
3679
+ && param->solver_type != L2R_L1LOSS_SVC_DUAL
3680
+ && param->solver_type != MCSVM_CS
3681
+ && param->solver_type != L1R_L2LOSS_SVC
3682
+ && param->solver_type != L1R_LR
3683
+ && param->solver_type != L2R_LR_DUAL
3684
+ && param->solver_type != L2R_L2LOSS_SVR
3685
+ && param->solver_type != L2R_L2LOSS_SVR_DUAL
3686
+ && param->solver_type != L2R_L1LOSS_SVR_DUAL
3687
+ && param->solver_type != ONECLASS_SVM)
3688
+ return "unknown solver type";
3689
+
3690
+ if(param->init_sol != NULL
3691
+ && param->solver_type != L2R_LR
3692
+ && param->solver_type != L2R_L2LOSS_SVC
3693
+ && param->solver_type != L2R_L2LOSS_SVR)
3694
+ return "Initial-solution specification supported only for solvers L2R_LR, L2R_L2LOSS_SVC, and L2R_L2LOSS_SVR";
3695
+
3696
+ return NULL;
3697
+ }
3698
+
3699
+ int check_probability_model(const struct model *model_)
3700
+ {
3701
+ return (model_->param.solver_type==L2R_LR ||
3702
+ model_->param.solver_type==L2R_LR_DUAL ||
3703
+ model_->param.solver_type==L1R_LR);
3704
+ }
3705
+
3706
+ int check_regression_model(const struct model *model_)
3707
+ {
3708
+ return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3709
+ model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3710
+ model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3711
+ }
3712
+
3713
+ int check_oneclass_model(const struct model *model_)
3714
+ {
3715
+ return model_->param.solver_type == ONECLASS_SVM;
3716
+ }
3717
+
3718
+ void set_print_string_function(void (*print_func)(const char*))
3719
+ {
3720
+ if (print_func == NULL)
3721
+ liblinear_print_string = &print_string_stdout;
3722
+ else
3723
+ liblinear_print_string = print_func;
3724
+ }
3725
+