ruby_linear 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ #include "blas.h"
2
+
3
+ int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
4
+ int *incy)
5
+ {
6
+ long int i, m, ix, iy, nn, iincx, iincy;
7
+ register double ssa;
8
+
9
+ /* constant times a vector plus a vector.
10
+ uses unrolled loop for increments equal to one.
11
+ jack dongarra, linpack, 3/11/78.
12
+ modified 12/3/93, array(1) declarations changed to array(*) */
13
+
14
+ /* Dereference inputs */
15
+ nn = *n;
16
+ ssa = *sa;
17
+ iincx = *incx;
18
+ iincy = *incy;
19
+
20
+ if( nn > 0 && ssa != 0.0 )
21
+ {
22
+ if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23
+ {
24
+ m = nn-3;
25
+ for (i = 0; i < m; i += 4)
26
+ {
27
+ sy[i] += ssa * sx[i];
28
+ sy[i+1] += ssa * sx[i+1];
29
+ sy[i+2] += ssa * sx[i+2];
30
+ sy[i+3] += ssa * sx[i+3];
31
+ }
32
+ for ( ; i < nn; ++i) /* clean-up loop */
33
+ sy[i] += ssa * sx[i];
34
+ }
35
+ else /* code for unequal increments or equal increments not equal to 1 */
36
+ {
37
+ ix = iincx >= 0 ? 0 : (1 - nn) * iincx;
38
+ iy = iincy >= 0 ? 0 : (1 - nn) * iincy;
39
+ for (i = 0; i < nn; i++)
40
+ {
41
+ sy[iy] += ssa * sx[ix];
42
+ ix += iincx;
43
+ iy += iincy;
44
+ }
45
+ }
46
+ }
47
+
48
+ return 0;
49
+ } /* daxpy_ */
@@ -0,0 +1,50 @@
1
+ #include "blas.h"
2
+
3
+ double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
4
+ {
5
+ long int i, m, nn, iincx, iincy;
6
+ double stemp;
7
+ long int ix, iy;
8
+
9
+ /* forms the dot product of two vectors.
10
+ uses unrolled loops for increments equal to one.
11
+ jack dongarra, linpack, 3/11/78.
12
+ modified 12/3/93, array(1) declarations changed to array(*) */
13
+
14
+ /* Dereference inputs */
15
+ nn = *n;
16
+ iincx = *incx;
17
+ iincy = *incy;
18
+
19
+ stemp = 0.0;
20
+ if (nn > 0)
21
+ {
22
+ if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23
+ {
24
+ m = nn-4;
25
+ for (i = 0; i < m; i += 5)
26
+ stemp += sx[i] * sy[i] + sx[i+1] * sy[i+1] + sx[i+2] * sy[i+2] +
27
+ sx[i+3] * sy[i+3] + sx[i+4] * sy[i+4];
28
+
29
+ for ( ; i < nn; i++) /* clean-up loop */
30
+ stemp += sx[i] * sy[i];
31
+ }
32
+ else /* code for unequal increments or equal increments not equal to 1 */
33
+ {
34
+ ix = 0;
35
+ iy = 0;
36
+ if (iincx < 0)
37
+ ix = (1 - nn) * iincx;
38
+ if (iincy < 0)
39
+ iy = (1 - nn) * iincy;
40
+ for (i = 0; i < nn; i++)
41
+ {
42
+ stemp += sx[ix] * sy[iy];
43
+ ix += iincx;
44
+ iy += iincy;
45
+ }
46
+ }
47
+ }
48
+
49
+ return stemp;
50
+ } /* ddot_ */
@@ -0,0 +1,62 @@
1
+ #include <math.h> /* Needed for fabs() and sqrt() */
2
+ #include "blas.h"
3
+
4
+ double dnrm2_(int *n, double *x, int *incx)
5
+ {
6
+ long int ix, nn, iincx;
7
+ double norm, scale, absxi, ssq, temp;
8
+
9
+ /* DNRM2 returns the euclidean norm of a vector via the function
10
+ name, so that
11
+
12
+ DNRM2 := sqrt( x'*x )
13
+
14
+ -- This version written on 25-October-1982.
15
+ Modified on 14-October-1993 to inline the call to SLASSQ.
16
+ Sven Hammarling, Nag Ltd. */
17
+
18
+ /* Dereference inputs */
19
+ nn = *n;
20
+ iincx = *incx;
21
+
22
+ if( nn > 0 && iincx > 0 )
23
+ {
24
+ if (nn == 1)
25
+ {
26
+ norm = fabs(x[0]);
27
+ }
28
+ else
29
+ {
30
+ scale = 0.0;
31
+ ssq = 1.0;
32
+
33
+ /* The following loop is equivalent to this call to the LAPACK
34
+ auxiliary routine: CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
35
+
36
+ for (ix=(nn-1)*iincx; ix>=0; ix-=iincx)
37
+ {
38
+ if (x[ix] != 0.0)
39
+ {
40
+ absxi = fabs(x[ix]);
41
+ if (scale < absxi)
42
+ {
43
+ temp = scale / absxi;
44
+ ssq = ssq * (temp * temp) + 1.0;
45
+ scale = absxi;
46
+ }
47
+ else
48
+ {
49
+ temp = absxi / scale;
50
+ ssq += temp * temp;
51
+ }
52
+ }
53
+ }
54
+ norm = scale * sqrt(ssq);
55
+ }
56
+ }
57
+ else
58
+ norm = 0.0;
59
+
60
+ return norm;
61
+
62
+ } /* dnrm2_ */
@@ -0,0 +1,44 @@
1
+ #include "blas.h"
2
+
3
+ int dscal_(int *n, double *sa, double *sx, int *incx)
4
+ {
5
+ long int i, m, nincx, nn, iincx;
6
+ double ssa;
7
+
8
+ /* scales a vector by a constant.
9
+ uses unrolled loops for increment equal to 1.
10
+ jack dongarra, linpack, 3/11/78.
11
+ modified 3/93 to return if incx .le. 0.
12
+ modified 12/3/93, array(1) declarations changed to array(*) */
13
+
14
+ /* Dereference inputs */
15
+ nn = *n;
16
+ iincx = *incx;
17
+ ssa = *sa;
18
+
19
+ if (nn > 0 && iincx > 0)
20
+ {
21
+ if (iincx == 1) /* code for increment equal to 1 */
22
+ {
23
+ m = nn-4;
24
+ for (i = 0; i < m; i += 5)
25
+ {
26
+ sx[i] = ssa * sx[i];
27
+ sx[i+1] = ssa * sx[i+1];
28
+ sx[i+2] = ssa * sx[i+2];
29
+ sx[i+3] = ssa * sx[i+3];
30
+ sx[i+4] = ssa * sx[i+4];
31
+ }
32
+ for ( ; i < nn; ++i) /* clean-up loop */
33
+ sx[i] = ssa * sx[i];
34
+ }
35
+ else /* code for increment not equal to 1 */
36
+ {
37
+ nincx = nn * iincx;
38
+ for (i = 0; i < nincx; i += iincx)
39
+ sx[i] = ssa * sx[i];
40
+ }
41
+ }
42
+
43
+ return 0;
44
+ } /* dscal_ */
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ CONFIG["LDSHARED"] = "g++ -shared"
3
+ $CFLAGS = "#{ENV['CFLAGS']} -Wall -O3"
4
+ create_makefile('rubylinear_native')
@@ -0,0 +1,2385 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <stdarg.h>
6
+ #include "linear.h"
7
+ #include "tron.h"
8
+ typedef signed char schar;
9
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
10
+ #ifndef min
11
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
12
+ #endif
13
+ #ifndef max
14
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
15
+ #endif
16
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
17
+ {
18
+ dst = new T[n];
19
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
20
+ }
21
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
22
+ #define INF HUGE_VAL
23
+
24
+ static void print_string_stdout(const char *s)
25
+ {
26
+ fputs(s,stdout);
27
+ fflush(stdout);
28
+ }
29
+
30
+ static void (*liblinear_print_string) (const char *) = &print_string_stdout;
31
+
32
+ #if 1
33
+ int info_on = 0;
34
+ static void info(const char *fmt,...)
35
+ {
36
+ char buf[BUFSIZ];
37
+ va_list ap;
38
+ if (info_on==1) {
39
+ va_start(ap,fmt);
40
+ vsprintf(buf,fmt,ap);
41
+ va_end(ap);
42
+ (*liblinear_print_string)(buf);
43
+ }
44
+ }
45
+ #else
46
+ static void info(const char *fmt,...) {}
47
+ #endif
48
+
49
+ class l2r_lr_fun : public function
50
+ {
51
+ public:
52
+ l2r_lr_fun(const problem *prob, double Cp, double Cn);
53
+ ~l2r_lr_fun();
54
+
55
+ double fun(double *w);
56
+ void grad(double *w, double *g);
57
+ void Hv(double *s, double *Hs);
58
+
59
+ int get_nr_variable(void);
60
+
61
+ private:
62
+ void Xv(double *v, double *Xv);
63
+ void XTv(double *v, double *XTv);
64
+
65
+ double *C;
66
+ double *z;
67
+ double *D;
68
+ const problem *prob;
69
+ };
70
+
71
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn)
72
+ {
73
+ int i;
74
+ int l=prob->l;
75
+ int *y=prob->y;
76
+
77
+ this->prob = prob;
78
+
79
+ z = new double[l];
80
+ D = new double[l];
81
+ C = new double[l];
82
+
83
+ for (i=0; i<l; i++)
84
+ {
85
+ if (y[i] == 1)
86
+ C[i] = Cp;
87
+ else
88
+ C[i] = Cn;
89
+ }
90
+ }
91
+
92
+ l2r_lr_fun::~l2r_lr_fun()
93
+ {
94
+ delete[] z;
95
+ delete[] D;
96
+ delete[] C;
97
+ }
98
+
99
+
100
+ double l2r_lr_fun::fun(double *w)
101
+ {
102
+ int i;
103
+ double f=0;
104
+ int *y=prob->y;
105
+ int l=prob->l;
106
+ int w_size=get_nr_variable();
107
+
108
+ Xv(w, z);
109
+ for(i=0;i<l;i++)
110
+ {
111
+ double yz = y[i]*z[i];
112
+ if (yz >= 0)
113
+ f += C[i]*log(1 + exp(-yz));
114
+ else
115
+ f += C[i]*(-yz+log(1 + exp(yz)));
116
+ }
117
+ f = 2*f;
118
+ for(i=0;i<w_size;i++)
119
+ f += w[i]*w[i];
120
+ f /= 2.0;
121
+
122
+ return(f);
123
+ }
124
+
125
+ void l2r_lr_fun::grad(double *w, double *g)
126
+ {
127
+ int i;
128
+ int *y=prob->y;
129
+ int l=prob->l;
130
+ int w_size=get_nr_variable();
131
+
132
+ for(i=0;i<l;i++)
133
+ {
134
+ z[i] = 1/(1 + exp(-y[i]*z[i]));
135
+ D[i] = z[i]*(1-z[i]);
136
+ z[i] = C[i]*(z[i]-1)*y[i];
137
+ }
138
+ XTv(z, g);
139
+
140
+ for(i=0;i<w_size;i++)
141
+ g[i] = w[i] + g[i];
142
+ }
143
+
144
+ int l2r_lr_fun::get_nr_variable(void)
145
+ {
146
+ return prob->n;
147
+ }
148
+
149
+ void l2r_lr_fun::Hv(double *s, double *Hs)
150
+ {
151
+ int i;
152
+ int l=prob->l;
153
+ int w_size=get_nr_variable();
154
+ double *wa = new double[l];
155
+
156
+ Xv(s, wa);
157
+ for(i=0;i<l;i++)
158
+ wa[i] = C[i]*D[i]*wa[i];
159
+
160
+ XTv(wa, Hs);
161
+ for(i=0;i<w_size;i++)
162
+ Hs[i] = s[i] + Hs[i];
163
+ delete[] wa;
164
+ }
165
+
166
+ void l2r_lr_fun::Xv(double *v, double *Xv)
167
+ {
168
+ int i;
169
+ int l=prob->l;
170
+ feature_node **x=prob->x;
171
+
172
+ for(i=0;i<l;i++)
173
+ {
174
+ feature_node *s=x[i];
175
+ Xv[i]=0;
176
+ while(s->index!=-1)
177
+ {
178
+ Xv[i]+=v[s->index-1]*s->value;
179
+ s++;
180
+ }
181
+ }
182
+ }
183
+
184
+ void l2r_lr_fun::XTv(double *v, double *XTv)
185
+ {
186
+ int i;
187
+ int l=prob->l;
188
+ int w_size=get_nr_variable();
189
+ feature_node **x=prob->x;
190
+
191
+ for(i=0;i<w_size;i++)
192
+ XTv[i]=0;
193
+ for(i=0;i<l;i++)
194
+ {
195
+ feature_node *s=x[i];
196
+ while(s->index!=-1)
197
+ {
198
+ XTv[s->index-1]+=v[i]*s->value;
199
+ s++;
200
+ }
201
+ }
202
+ }
203
+
204
+ class l2r_l2_svc_fun : public function
205
+ {
206
+ public:
207
+ l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);
208
+ ~l2r_l2_svc_fun();
209
+
210
+ double fun(double *w);
211
+ void grad(double *w, double *g);
212
+ void Hv(double *s, double *Hs);
213
+
214
+ int get_nr_variable(void);
215
+
216
+ private:
217
+ void Xv(double *v, double *Xv);
218
+ void subXv(double *v, double *Xv);
219
+ void subXTv(double *v, double *XTv);
220
+
221
+ double *C;
222
+ double *z;
223
+ double *D;
224
+ int *I;
225
+ int sizeI;
226
+ const problem *prob;
227
+ };
228
+
229
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn)
230
+ {
231
+ int i;
232
+ int l=prob->l;
233
+ int *y=prob->y;
234
+
235
+ this->prob = prob;
236
+
237
+ z = new double[l];
238
+ D = new double[l];
239
+ C = new double[l];
240
+ I = new int[l];
241
+
242
+ for (i=0; i<l; i++)
243
+ {
244
+ if (y[i] == 1)
245
+ C[i] = Cp;
246
+ else
247
+ C[i] = Cn;
248
+ }
249
+ }
250
+
251
+ l2r_l2_svc_fun::~l2r_l2_svc_fun()
252
+ {
253
+ delete[] z;
254
+ delete[] D;
255
+ delete[] C;
256
+ delete[] I;
257
+ }
258
+
259
+ double l2r_l2_svc_fun::fun(double *w)
260
+ {
261
+ int i;
262
+ double f=0;
263
+ int *y=prob->y;
264
+ int l=prob->l;
265
+ int w_size=get_nr_variable();
266
+
267
+ Xv(w, z);
268
+ for(i=0;i<l;i++)
269
+ {
270
+ z[i] = y[i]*z[i];
271
+ double d = 1-z[i];
272
+ if (d > 0)
273
+ f += C[i]*d*d;
274
+ }
275
+ f = 2*f;
276
+ for(i=0;i<w_size;i++)
277
+ f += w[i]*w[i];
278
+ f /= 2.0;
279
+
280
+ return(f);
281
+ }
282
+
283
+ void l2r_l2_svc_fun::grad(double *w, double *g)
284
+ {
285
+ int i;
286
+ int *y=prob->y;
287
+ int l=prob->l;
288
+ int w_size=get_nr_variable();
289
+
290
+ sizeI = 0;
291
+ for (i=0;i<l;i++)
292
+ if (z[i] < 1)
293
+ {
294
+ z[sizeI] = C[i]*y[i]*(z[i]-1);
295
+ I[sizeI] = i;
296
+ sizeI++;
297
+ }
298
+ subXTv(z, g);
299
+
300
+ for(i=0;i<w_size;i++)
301
+ g[i] = w[i] + 2*g[i];
302
+ }
303
+
304
+ int l2r_l2_svc_fun::get_nr_variable(void)
305
+ {
306
+ return prob->n;
307
+ }
308
+
309
+ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
310
+ {
311
+ int i;
312
+ int l=prob->l;
313
+ int w_size=get_nr_variable();
314
+ double *wa = new double[l];
315
+
316
+ subXv(s, wa);
317
+ for(i=0;i<sizeI;i++)
318
+ wa[i] = C[I[i]]*wa[i];
319
+
320
+ subXTv(wa, Hs);
321
+ for(i=0;i<w_size;i++)
322
+ Hs[i] = s[i] + 2*Hs[i];
323
+ delete[] wa;
324
+ }
325
+
326
+ void l2r_l2_svc_fun::Xv(double *v, double *Xv)
327
+ {
328
+ int i;
329
+ int l=prob->l;
330
+ feature_node **x=prob->x;
331
+
332
+ for(i=0;i<l;i++)
333
+ {
334
+ feature_node *s=x[i];
335
+ Xv[i]=0;
336
+ while(s->index!=-1)
337
+ {
338
+ Xv[i]+=v[s->index-1]*s->value;
339
+ s++;
340
+ }
341
+ }
342
+ }
343
+
344
+ void l2r_l2_svc_fun::subXv(double *v, double *Xv)
345
+ {
346
+ int i;
347
+ feature_node **x=prob->x;
348
+
349
+ for(i=0;i<sizeI;i++)
350
+ {
351
+ feature_node *s=x[I[i]];
352
+ Xv[i]=0;
353
+ while(s->index!=-1)
354
+ {
355
+ Xv[i]+=v[s->index-1]*s->value;
356
+ s++;
357
+ }
358
+ }
359
+ }
360
+
361
+ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
362
+ {
363
+ int i;
364
+ int w_size=get_nr_variable();
365
+ feature_node **x=prob->x;
366
+
367
+ for(i=0;i<w_size;i++)
368
+ XTv[i]=0;
369
+ for(i=0;i<sizeI;i++)
370
+ {
371
+ feature_node *s=x[I[i]];
372
+ while(s->index!=-1)
373
+ {
374
+ XTv[s->index-1]+=v[i]*s->value;
375
+ s++;
376
+ }
377
+ }
378
+ }
379
+
380
+ // A coordinate descent algorithm for
381
+ // multi-class support vector machines by Crammer and Singer
382
+ //
383
+ // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
384
+ // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
385
+ //
386
+ // where e^m_i = 0 if y_i = m,
387
+ // e^m_i = 1 if y_i != m,
388
+ // C^m_i = C if m = y_i,
389
+ // C^m_i = 0 if m != y_i,
390
+ // and w_m(\alpha) = \sum_i \alpha^m_i x_i
391
+ //
392
+ // Given:
393
+ // x, y, C
394
+ // eps is the stopping tolerance
395
+ //
396
+ // solution will be put in w
397
+ //
398
+ // See Appendix of LIBLINEAR paper, Fan et al. (2008)
399
+
400
+ #define GETI(i) (prob->y[i])
401
+ // To support weights for instances, use GETI(i) (i)
402
+
403
+ class Solver_MCSVM_CS
404
+ {
405
+ public:
406
+ Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
407
+ ~Solver_MCSVM_CS();
408
+ void Solve(double *w);
409
+ private:
410
+ void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
411
+ bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
412
+ double *B, *C, *G;
413
+ int w_size, l;
414
+ int nr_class;
415
+ int max_iter;
416
+ double eps;
417
+ const problem *prob;
418
+ };
419
+
420
+ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
421
+ {
422
+ this->w_size = prob->n;
423
+ this->l = prob->l;
424
+ this->nr_class = nr_class;
425
+ this->eps = eps;
426
+ this->max_iter = max_iter;
427
+ this->prob = prob;
428
+ this->B = new double[nr_class];
429
+ this->G = new double[nr_class];
430
+ this->C = weighted_C;
431
+ }
432
+
433
+ Solver_MCSVM_CS::~Solver_MCSVM_CS()
434
+ {
435
+ delete[] B;
436
+ delete[] G;
437
+ }
438
+
439
+ int compare_double(const void *a, const void *b)
440
+ {
441
+ if(*(double *)a > *(double *)b)
442
+ return -1;
443
+ if(*(double *)a < *(double *)b)
444
+ return 1;
445
+ return 0;
446
+ }
447
+
448
+ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
449
+ {
450
+ int r;
451
+ double *D;
452
+
453
+ clone(D, B, active_i);
454
+ if(yi < active_i)
455
+ D[yi] += A_i*C_yi;
456
+ qsort(D, active_i, sizeof(double), compare_double);
457
+
458
+ double beta = D[0] - A_i*C_yi;
459
+ for(r=1;r<active_i && beta<r*D[r];r++)
460
+ beta += D[r];
461
+
462
+ beta /= r;
463
+ for(r=0;r<active_i;r++)
464
+ {
465
+ if(r == yi)
466
+ alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
467
+ else
468
+ alpha_new[r] = min((double)0, (beta - B[r])/A_i);
469
+ }
470
+ delete[] D;
471
+ }
472
+
473
+ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
474
+ {
475
+ double bound = 0;
476
+ if(m == yi)
477
+ bound = C[GETI(i)];
478
+ if(alpha_i == bound && G[m] < minG)
479
+ return true;
480
+ return false;
481
+ }
482
+
483
+ void Solver_MCSVM_CS::Solve(double *w)
484
+ {
485
+ int i, m, s;
486
+ int iter = 0;
487
+ double *alpha = new double[l*nr_class];
488
+ double *alpha_new = new double[nr_class];
489
+ int *index = new int[l];
490
+ double *QD = new double[l];
491
+ int *d_ind = new int[nr_class];
492
+ double *d_val = new double[nr_class];
493
+ int *alpha_index = new int[nr_class*l];
494
+ int *y_index = new int[l];
495
+ int active_size = l;
496
+ int *active_size_i = new int[l];
497
+ double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
498
+ bool start_from_all = true;
499
+ // initial
500
+ for(i=0;i<l*nr_class;i++)
501
+ alpha[i] = 0;
502
+ for(i=0;i<w_size*nr_class;i++)
503
+ w[i] = 0;
504
+ for(i=0;i<l;i++)
505
+ {
506
+ for(m=0;m<nr_class;m++)
507
+ alpha_index[i*nr_class+m] = m;
508
+ feature_node *xi = prob->x[i];
509
+ QD[i] = 0;
510
+ while(xi->index != -1)
511
+ {
512
+ QD[i] += (xi->value)*(xi->value);
513
+ xi++;
514
+ }
515
+ active_size_i[i] = nr_class;
516
+ y_index[i] = prob->y[i];
517
+ index[i] = i;
518
+ }
519
+
520
+ while(iter < max_iter)
521
+ {
522
+ double stopping = -INF;
523
+ for(i=0;i<active_size;i++)
524
+ {
525
+ int j = i+rand()%(active_size-i);
526
+ swap(index[i], index[j]);
527
+ }
528
+ for(s=0;s<active_size;s++)
529
+ {
530
+ i = index[s];
531
+ double Ai = QD[i];
532
+ double *alpha_i = &alpha[i*nr_class];
533
+ int *alpha_index_i = &alpha_index[i*nr_class];
534
+
535
+ if(Ai > 0)
536
+ {
537
+ for(m=0;m<active_size_i[i];m++)
538
+ G[m] = 1;
539
+ if(y_index[i] < active_size_i[i])
540
+ G[y_index[i]] = 0;
541
+
542
+ feature_node *xi = prob->x[i];
543
+ while(xi->index!= -1)
544
+ {
545
+ double *w_i = &w[(xi->index-1)*nr_class];
546
+ for(m=0;m<active_size_i[i];m++)
547
+ G[m] += w_i[alpha_index_i[m]]*(xi->value);
548
+ xi++;
549
+ }
550
+
551
+ double minG = INF;
552
+ double maxG = -INF;
553
+ for(m=0;m<active_size_i[i];m++)
554
+ {
555
+ if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
556
+ minG = G[m];
557
+ if(G[m] > maxG)
558
+ maxG = G[m];
559
+ }
560
+ if(y_index[i] < active_size_i[i])
561
+ if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
562
+ minG = G[y_index[i]];
563
+
564
+ for(m=0;m<active_size_i[i];m++)
565
+ {
566
+ if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
567
+ {
568
+ active_size_i[i]--;
569
+ while(active_size_i[i]>m)
570
+ {
571
+ if(!be_shrunk(i, active_size_i[i], y_index[i],
572
+ alpha_i[alpha_index_i[active_size_i[i]]], minG))
573
+ {
574
+ swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
575
+ swap(G[m], G[active_size_i[i]]);
576
+ if(y_index[i] == active_size_i[i])
577
+ y_index[i] = m;
578
+ else if(y_index[i] == m)
579
+ y_index[i] = active_size_i[i];
580
+ break;
581
+ }
582
+ active_size_i[i]--;
583
+ }
584
+ }
585
+ }
586
+
587
+ if(active_size_i[i] <= 1)
588
+ {
589
+ active_size--;
590
+ swap(index[s], index[active_size]);
591
+ s--;
592
+ continue;
593
+ }
594
+
595
+ if(maxG-minG <= 1e-12)
596
+ continue;
597
+ else
598
+ stopping = max(maxG - minG, stopping);
599
+
600
+ for(m=0;m<active_size_i[i];m++)
601
+ B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
602
+
603
+ solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
604
+ int nz_d = 0;
605
+ for(m=0;m<active_size_i[i];m++)
606
+ {
607
+ double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
608
+ alpha_i[alpha_index_i[m]] = alpha_new[m];
609
+ if(fabs(d) >= 1e-12)
610
+ {
611
+ d_ind[nz_d] = alpha_index_i[m];
612
+ d_val[nz_d] = d;
613
+ nz_d++;
614
+ }
615
+ }
616
+
617
+ xi = prob->x[i];
618
+ while(xi->index != -1)
619
+ {
620
+ double *w_i = &w[(xi->index-1)*nr_class];
621
+ for(m=0;m<nz_d;m++)
622
+ w_i[d_ind[m]] += d_val[m]*xi->value;
623
+ xi++;
624
+ }
625
+ }
626
+ }
627
+
628
+ iter++;
629
+ if(iter % 10 == 0)
630
+ {
631
+ info(".");
632
+ }
633
+
634
+ if(stopping < eps_shrink)
635
+ {
636
+ if(stopping < eps && start_from_all == true)
637
+ break;
638
+ else
639
+ {
640
+ active_size = l;
641
+ for(i=0;i<l;i++)
642
+ active_size_i[i] = nr_class;
643
+ info("*");
644
+ eps_shrink = max(eps_shrink/2, eps);
645
+ start_from_all = true;
646
+ }
647
+ }
648
+ else
649
+ start_from_all = false;
650
+ }
651
+
652
+ info("\noptimization finished, #iter = %d\n",iter);
653
+ if (iter >= max_iter)
654
+ info("\nWARNING: reaching max number of iterations\n");
655
+
656
+ // calculate objective value
657
+ double v = 0;
658
+ int nSV = 0;
659
+ for(i=0;i<w_size*nr_class;i++)
660
+ v += w[i]*w[i];
661
+ v = 0.5*v;
662
+ for(i=0;i<l*nr_class;i++)
663
+ {
664
+ v += alpha[i];
665
+ if(fabs(alpha[i]) > 0)
666
+ nSV++;
667
+ }
668
+ for(i=0;i<l;i++)
669
+ v -= alpha[i*nr_class+prob->y[i]];
670
+ info("Objective value = %lf\n",v);
671
+ info("nSV = %d\n",nSV);
672
+
673
+ delete [] alpha;
674
+ delete [] alpha_new;
675
+ delete [] index;
676
+ delete [] QD;
677
+ delete [] d_ind;
678
+ delete [] d_val;
679
+ delete [] alpha_index;
680
+ delete [] y_index;
681
+ delete [] active_size_i;
682
+ }
683
+
684
+ // A coordinate descent algorithm for
685
+ // L1-loss and L2-loss SVM dual problems
686
+ //
687
+ // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
688
+ // s.t. 0 <= alpha_i <= upper_bound_i,
689
+ //
690
+ // where Qij = yi yj xi^T xj and
691
+ // D is a diagonal matrix
692
+ //
693
+ // In L1-SVM case:
694
+ // upper_bound_i = Cp if y_i = 1
695
+ // upper_bound_i = Cn if y_i = -1
696
+ // D_ii = 0
697
+ // In L2-SVM case:
698
+ // upper_bound_i = INF
699
+ // D_ii = 1/(2*Cp) if y_i = 1
700
+ // D_ii = 1/(2*Cn) if y_i = -1
701
+ //
702
+ // Given:
703
+ // x, y, Cp, Cn
704
+ // eps is the stopping tolerance
705
+ //
706
+ // solution will be put in w
707
+ //
708
+ // See Algorithm 3 of Hsieh et al., ICML 2008
709
+
710
+ #undef GETI
711
+ #define GETI(i) (y[i]+1)
712
+ // To support weights for instances, use GETI(i) (i)
713
+
714
+ static void solve_l2r_l1l2_svc(
715
+ const problem *prob, double *w, double eps,
716
+ double Cp, double Cn, int solver_type)
717
+ {
718
+ int l = prob->l;
719
+ int w_size = prob->n;
720
+ int i, s, iter = 0;
721
+ double C, d, G;
722
+ double *QD = new double[l];
723
+ int max_iter = 1000;
724
+ int *index = new int[l];
725
+ double *alpha = new double[l];
726
+ schar *y = new schar[l];
727
+ int active_size = l;
728
+
729
+ // PG: projected gradient, for shrinking and stopping
730
+ double PG;
731
+ double PGmax_old = INF;
732
+ double PGmin_old = -INF;
733
+ double PGmax_new, PGmin_new;
734
+
735
+ // default solver_type: L2R_L2LOSS_SVC_DUAL
736
+ double diag[3] = {0.5/Cn, 0, 0.5/Cp};
737
+ double upper_bound[3] = {INF, 0, INF};
738
+ if(solver_type == L2R_L1LOSS_SVC_DUAL)
739
+ {
740
+ diag[0] = 0;
741
+ diag[2] = 0;
742
+ upper_bound[0] = Cn;
743
+ upper_bound[2] = Cp;
744
+ }
745
+
746
+ for(i=0; i<w_size; i++)
747
+ w[i] = 0;
748
+ for(i=0; i<l; i++)
749
+ {
750
+ alpha[i] = 0;
751
+ if(prob->y[i] > 0)
752
+ {
753
+ y[i] = +1;
754
+ }
755
+ else
756
+ {
757
+ y[i] = -1;
758
+ }
759
+ QD[i] = diag[GETI(i)];
760
+
761
+ feature_node *xi = prob->x[i];
762
+ while (xi->index != -1)
763
+ {
764
+ QD[i] += (xi->value)*(xi->value);
765
+ xi++;
766
+ }
767
+ index[i] = i;
768
+ }
769
+
770
+ while (iter < max_iter)
771
+ {
772
+ PGmax_new = -INF;
773
+ PGmin_new = INF;
774
+
775
+ for (i=0; i<active_size; i++)
776
+ {
777
+ int j = i+rand()%(active_size-i);
778
+ swap(index[i], index[j]);
779
+ }
780
+
781
+ for (s=0; s<active_size; s++)
782
+ {
783
+ i = index[s];
784
+ G = 0;
785
+ schar yi = y[i];
786
+
787
+ feature_node *xi = prob->x[i];
788
+ while(xi->index!= -1)
789
+ {
790
+ G += w[xi->index-1]*(xi->value);
791
+ xi++;
792
+ }
793
+ G = G*yi-1;
794
+
795
+ C = upper_bound[GETI(i)];
796
+ G += alpha[i]*diag[GETI(i)];
797
+
798
+ PG = 0;
799
+ if (alpha[i] == 0)
800
+ {
801
+ if (G > PGmax_old)
802
+ {
803
+ active_size--;
804
+ swap(index[s], index[active_size]);
805
+ s--;
806
+ continue;
807
+ }
808
+ else if (G < 0)
809
+ PG = G;
810
+ }
811
+ else if (alpha[i] == C)
812
+ {
813
+ if (G < PGmin_old)
814
+ {
815
+ active_size--;
816
+ swap(index[s], index[active_size]);
817
+ s--;
818
+ continue;
819
+ }
820
+ else if (G > 0)
821
+ PG = G;
822
+ }
823
+ else
824
+ PG = G;
825
+
826
+ PGmax_new = max(PGmax_new, PG);
827
+ PGmin_new = min(PGmin_new, PG);
828
+
829
+ if(fabs(PG) > 1.0e-12)
830
+ {
831
+ double alpha_old = alpha[i];
832
+ alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
833
+ d = (alpha[i] - alpha_old)*yi;
834
+ xi = prob->x[i];
835
+ while (xi->index != -1)
836
+ {
837
+ w[xi->index-1] += d*xi->value;
838
+ xi++;
839
+ }
840
+ }
841
+ }
842
+
843
+ iter++;
844
+ if(iter % 10 == 0)
845
+ info(".");
846
+
847
+ if(PGmax_new - PGmin_new <= eps)
848
+ {
849
+ if(active_size == l)
850
+ break;
851
+ else
852
+ {
853
+ active_size = l;
854
+ info("*");
855
+ PGmax_old = INF;
856
+ PGmin_old = -INF;
857
+ continue;
858
+ }
859
+ }
860
+ PGmax_old = PGmax_new;
861
+ PGmin_old = PGmin_new;
862
+ if (PGmax_old <= 0)
863
+ PGmax_old = INF;
864
+ if (PGmin_old >= 0)
865
+ PGmin_old = -INF;
866
+ }
867
+
868
+ info("\noptimization finished, #iter = %d\n",iter);
869
+ if (iter >= max_iter)
870
+ info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
871
+
872
+ // calculate objective value
873
+
874
+ double v = 0;
875
+ int nSV = 0;
876
+ for(i=0; i<w_size; i++)
877
+ v += w[i]*w[i];
878
+ for(i=0; i<l; i++)
879
+ {
880
+ v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
881
+ if(alpha[i] > 0)
882
+ ++nSV;
883
+ }
884
+ info("Objective value = %lf\n",v/2);
885
+ info("nSV = %d\n",nSV);
886
+
887
+ delete [] QD;
888
+ delete [] alpha;
889
+ delete [] y;
890
+ delete [] index;
891
+ }
892
+
893
+ // A coordinate descent algorithm for
894
+ // the dual of L2-regularized logistic regression problems
895
+ //
896
+ // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) ,
897
+ // s.t. 0 <= alpha_i <= upper_bound_i,
898
+ //
899
+ // where Qij = yi yj xi^T xj and
900
+ // upper_bound_i = Cp if y_i = 1
901
+ // upper_bound_i = Cn if y_i = -1
902
+ //
903
+ // Given:
904
+ // x, y, Cp, Cn
905
+ // eps is the stopping tolerance
906
+ //
907
+ // solution will be put in w
908
+ //
909
+ // See Algorithm 5 of Yu et al., MLJ 2010
910
+
911
+ #undef GETI
912
+ #define GETI(i) (y[i]+1)
913
+ // To support weights for instances, use GETI(i) (i)
914
+
915
+ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
916
+ {
917
+ int l = prob->l;
918
+ int w_size = prob->n;
919
+ int i, s, iter = 0;
920
+ double *xTx = new double[l];
921
+ int max_iter = 1000;
922
+ int *index = new int[l];
923
+ double *alpha = new double[2*l]; // store alpha and C - alpha
924
+ schar *y = new schar[l];
925
+ int max_inner_iter = 100; // for inner Newton
926
+ double innereps = 1e-2;
927
+ double innereps_min = min(1e-8, eps);
928
+ double upper_bound[3] = {Cn, 0, Cp};
929
+
930
+ for(i=0; i<w_size; i++)
931
+ w[i] = 0;
932
+ for(i=0; i<l; i++)
933
+ {
934
+ if(prob->y[i] > 0)
935
+ {
936
+ y[i] = +1;
937
+ }
938
+ else
939
+ {
940
+ y[i] = -1;
941
+ }
942
+ alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
943
+ alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
944
+
945
+ xTx[i] = 0;
946
+ feature_node *xi = prob->x[i];
947
+ while (xi->index != -1)
948
+ {
949
+ xTx[i] += (xi->value)*(xi->value);
950
+ w[xi->index-1] += y[i]*alpha[2*i]*xi->value;
951
+ xi++;
952
+ }
953
+ index[i] = i;
954
+ }
955
+
956
+ while (iter < max_iter)
957
+ {
958
+ for (i=0; i<l; i++)
959
+ {
960
+ int j = i+rand()%(l-i);
961
+ swap(index[i], index[j]);
962
+ }
963
+ int newton_iter = 0;
964
+ double Gmax = 0;
965
+ for (s=0; s<l; s++)
966
+ {
967
+ i = index[s];
968
+ schar yi = y[i];
969
+ double C = upper_bound[GETI(i)];
970
+ double ywTx = 0, xisq = xTx[i];
971
+ feature_node *xi = prob->x[i];
972
+ while (xi->index != -1)
973
+ {
974
+ ywTx += w[xi->index-1]*xi->value;
975
+ xi++;
976
+ }
977
+ ywTx *= y[i];
978
+ double a = xisq, b = ywTx;
979
+
980
+ // Decide to minimize g_1(z) or g_2(z)
981
+ int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
982
+ if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
983
+ {
984
+ ind1 = 2*i+1;
985
+ ind2 = 2*i;
986
+ sign = -1;
987
+ }
988
+
989
+ // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
990
+ double alpha_old = alpha[ind1];
991
+ double z = alpha_old;
992
+ if(C - z < 0.5 * C)
993
+ z = 0.1*z;
994
+ double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
995
+ Gmax = max(Gmax, fabs(gp));
996
+
997
+ // Newton method on the sub-problem
998
+ const double eta = 0.1; // xi in the paper
999
+ int inner_iter = 0;
1000
+ while (inner_iter <= max_inner_iter)
1001
+ {
1002
+ if(fabs(gp) < innereps)
1003
+ break;
1004
+ double gpp = a + C/(C-z)/z;
1005
+ double tmpz = z - gp/gpp;
1006
+ if(tmpz <= 0)
1007
+ z *= eta;
1008
+ else // tmpz in (0, C)
1009
+ z = tmpz;
1010
+ gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1011
+ newton_iter++;
1012
+ inner_iter++;
1013
+ }
1014
+
1015
+ if(inner_iter > 0) // update w
1016
+ {
1017
+ alpha[ind1] = z;
1018
+ alpha[ind2] = C-z;
1019
+ xi = prob->x[i];
1020
+ while (xi->index != -1)
1021
+ {
1022
+ w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
1023
+ xi++;
1024
+ }
1025
+ }
1026
+ }
1027
+
1028
+ iter++;
1029
+ if(iter % 10 == 0)
1030
+ info(".");
1031
+
1032
+ if(Gmax < eps)
1033
+ break;
1034
+
1035
+ if(newton_iter <= l/10)
1036
+ innereps = max(innereps_min, 0.1*innereps);
1037
+
1038
+ }
1039
+
1040
+ info("\noptimization finished, #iter = %d\n",iter);
1041
+ if (iter >= max_iter)
1042
+ info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1043
+
1044
+ // calculate objective value
1045
+
1046
+ double v = 0;
1047
+ for(i=0; i<w_size; i++)
1048
+ v += w[i] * w[i];
1049
+ v *= 0.5;
1050
+ for(i=0; i<l; i++)
1051
+ v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1052
+ - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1053
+ info("Objective value = %lf\n", v);
1054
+
1055
+ delete [] xTx;
1056
+ delete [] alpha;
1057
+ delete [] y;
1058
+ delete [] index;
1059
+ }
1060
+
1061
+ // A coordinate descent algorithm for
1062
+ // L1-regularized L2-loss support vector classification
1063
+ //
1064
+ // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1065
+ //
1066
+ // Given:
1067
+ // x, y, Cp, Cn
1068
+ // eps is the stopping tolerance
1069
+ //
1070
+ // solution will be put in w
1071
+ //
1072
+ // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1073
+
1074
+ #undef GETI
1075
+ #define GETI(i) (y[i]+1)
1076
+ // To support weights for instances, use GETI(i) (i)
1077
+
1078
+ static void solve_l1r_l2_svc(
1079
+ problem *prob_col, double *w, double eps,
1080
+ double Cp, double Cn)
1081
+ {
1082
+ int l = prob_col->l;
1083
+ int w_size = prob_col->n;
1084
+ int j, s, iter = 0;
1085
+ int max_iter = 1000;
1086
+ int active_size = w_size;
1087
+ int max_num_linesearch = 20;
1088
+
1089
+ double sigma = 0.01;
1090
+ double d, G_loss, G, H;
1091
+ double Gmax_old = INF;
1092
+ double Gmax_new, Gnorm1_new;
1093
+ double Gnorm1_init;
1094
+ double d_old, d_diff;
1095
+ double loss_old, loss_new;
1096
+ double appxcond, cond;
1097
+
1098
+ int *index = new int[w_size];
1099
+ schar *y = new schar[l];
1100
+ double *b = new double[l]; // b = 1-ywTx
1101
+ double *xj_sq = new double[w_size];
1102
+ feature_node *x;
1103
+
1104
+ double C[3] = {Cn,0,Cp};
1105
+
1106
+ for(j=0; j<l; j++)
1107
+ {
1108
+ b[j] = 1;
1109
+ if(prob_col->y[j] > 0)
1110
+ y[j] = 1;
1111
+ else
1112
+ y[j] = -1;
1113
+ }
1114
+ for(j=0; j<w_size; j++)
1115
+ {
1116
+ w[j] = 0;
1117
+ index[j] = j;
1118
+ xj_sq[j] = 0;
1119
+ x = prob_col->x[j];
1120
+ while(x->index != -1)
1121
+ {
1122
+ int ind = x->index-1;
1123
+ double val = x->value;
1124
+ x->value *= y[ind]; // x->value stores yi*xij
1125
+ xj_sq[j] += C[GETI(ind)]*val*val;
1126
+ x++;
1127
+ }
1128
+ }
1129
+
1130
+ while(iter < max_iter)
1131
+ {
1132
+ Gmax_new = 0;
1133
+ Gnorm1_new = 0;
1134
+
1135
+ for(j=0; j<active_size; j++)
1136
+ {
1137
+ int i = j+rand()%(active_size-j);
1138
+ swap(index[i], index[j]);
1139
+ }
1140
+
1141
+ for(s=0; s<active_size; s++)
1142
+ {
1143
+ j = index[s];
1144
+ G_loss = 0;
1145
+ H = 0;
1146
+
1147
+ x = prob_col->x[j];
1148
+ while(x->index != -1)
1149
+ {
1150
+ int ind = x->index-1;
1151
+ if(b[ind] > 0)
1152
+ {
1153
+ double val = x->value;
1154
+ double tmp = C[GETI(ind)]*val;
1155
+ G_loss -= tmp*b[ind];
1156
+ H += tmp*val;
1157
+ }
1158
+ x++;
1159
+ }
1160
+ G_loss *= 2;
1161
+
1162
+ G = G_loss;
1163
+ H *= 2;
1164
+ H = max(H, 1e-12);
1165
+
1166
+ double Gp = G+1;
1167
+ double Gn = G-1;
1168
+ double violation = 0;
1169
+ if(w[j] == 0)
1170
+ {
1171
+ if(Gp < 0)
1172
+ violation = -Gp;
1173
+ else if(Gn > 0)
1174
+ violation = Gn;
1175
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1176
+ {
1177
+ active_size--;
1178
+ swap(index[s], index[active_size]);
1179
+ s--;
1180
+ continue;
1181
+ }
1182
+ }
1183
+ else if(w[j] > 0)
1184
+ violation = fabs(Gp);
1185
+ else
1186
+ violation = fabs(Gn);
1187
+
1188
+ Gmax_new = max(Gmax_new, violation);
1189
+ Gnorm1_new += violation;
1190
+
1191
+ // obtain Newton direction d
1192
+ if(Gp <= H*w[j])
1193
+ d = -Gp/H;
1194
+ else if(Gn >= H*w[j])
1195
+ d = -Gn/H;
1196
+ else
1197
+ d = -w[j];
1198
+
1199
+ if(fabs(d) < 1.0e-12)
1200
+ continue;
1201
+
1202
+ double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1203
+ d_old = 0;
1204
+ int num_linesearch;
1205
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1206
+ {
1207
+ d_diff = d_old - d;
1208
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1209
+
1210
+ appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1211
+ if(appxcond <= 0)
1212
+ {
1213
+ x = prob_col->x[j];
1214
+ while(x->index != -1)
1215
+ {
1216
+ b[x->index-1] += d_diff*x->value;
1217
+ x++;
1218
+ }
1219
+ break;
1220
+ }
1221
+
1222
+ if(num_linesearch == 0)
1223
+ {
1224
+ loss_old = 0;
1225
+ loss_new = 0;
1226
+ x = prob_col->x[j];
1227
+ while(x->index != -1)
1228
+ {
1229
+ int ind = x->index-1;
1230
+ if(b[ind] > 0)
1231
+ loss_old += C[GETI(ind)]*b[ind]*b[ind];
1232
+ double b_new = b[ind] + d_diff*x->value;
1233
+ b[ind] = b_new;
1234
+ if(b_new > 0)
1235
+ loss_new += C[GETI(ind)]*b_new*b_new;
1236
+ x++;
1237
+ }
1238
+ }
1239
+ else
1240
+ {
1241
+ loss_new = 0;
1242
+ x = prob_col->x[j];
1243
+ while(x->index != -1)
1244
+ {
1245
+ int ind = x->index-1;
1246
+ double b_new = b[ind] + d_diff*x->value;
1247
+ b[ind] = b_new;
1248
+ if(b_new > 0)
1249
+ loss_new += C[GETI(ind)]*b_new*b_new;
1250
+ x++;
1251
+ }
1252
+ }
1253
+
1254
+ cond = cond + loss_new - loss_old;
1255
+ if(cond <= 0)
1256
+ break;
1257
+ else
1258
+ {
1259
+ d_old = d;
1260
+ d *= 0.5;
1261
+ delta *= 0.5;
1262
+ }
1263
+ }
1264
+
1265
+ w[j] += d;
1266
+
1267
+ // recompute b[] if line search takes too many steps
1268
+ if(num_linesearch >= max_num_linesearch)
1269
+ {
1270
+ info("#");
1271
+ for(int i=0; i<l; i++)
1272
+ b[i] = 1;
1273
+
1274
+ for(int i=0; i<w_size; i++)
1275
+ {
1276
+ if(w[i]==0) continue;
1277
+ x = prob_col->x[i];
1278
+ while(x->index != -1)
1279
+ {
1280
+ b[x->index-1] -= w[i]*x->value;
1281
+ x++;
1282
+ }
1283
+ }
1284
+ }
1285
+ }
1286
+
1287
+ if(iter == 0)
1288
+ Gnorm1_init = Gnorm1_new;
1289
+ iter++;
1290
+ if(iter % 10 == 0)
1291
+ info(".");
1292
+
1293
+ if(Gnorm1_new <= eps*Gnorm1_init)
1294
+ {
1295
+ if(active_size == w_size)
1296
+ break;
1297
+ else
1298
+ {
1299
+ active_size = w_size;
1300
+ info("*");
1301
+ Gmax_old = INF;
1302
+ continue;
1303
+ }
1304
+ }
1305
+
1306
+ Gmax_old = Gmax_new;
1307
+ }
1308
+
1309
+ info("\noptimization finished, #iter = %d\n", iter);
1310
+ if(iter >= max_iter)
1311
+ info("\nWARNING: reaching max number of iterations\n");
1312
+
1313
+ // calculate objective value
1314
+
1315
+ double v = 0;
1316
+ int nnz = 0;
1317
+ for(j=0; j<w_size; j++)
1318
+ {
1319
+ x = prob_col->x[j];
1320
+ while(x->index != -1)
1321
+ {
1322
+ x->value *= prob_col->y[x->index-1]; // restore x->value
1323
+ x++;
1324
+ }
1325
+ if(w[j] != 0)
1326
+ {
1327
+ v += fabs(w[j]);
1328
+ nnz++;
1329
+ }
1330
+ }
1331
+ for(j=0; j<l; j++)
1332
+ if(b[j] > 0)
1333
+ v += C[GETI(j)]*b[j]*b[j];
1334
+
1335
+ info("Objective value = %lf\n", v);
1336
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1337
+
1338
+ delete [] index;
1339
+ delete [] y;
1340
+ delete [] b;
1341
+ delete [] xj_sq;
1342
+ }
1343
+
1344
+ // A coordinate descent algorithm for
1345
+ // L1-regularized logistic regression problems
1346
+ //
1347
+ // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1348
+ //
1349
+ // Given:
1350
+ // x, y, Cp, Cn
1351
+ // eps is the stopping tolerance
1352
+ //
1353
+ // solution will be put in w
1354
+ //
1355
+ // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1356
+
1357
+ #undef GETI
1358
+ #define GETI(i) (y[i]+1)
1359
+ // To support weights for instances, use GETI(i) (i)
1360
+
1361
+ static void solve_l1r_lr(
1362
+ const problem *prob_col, double *w, double eps,
1363
+ double Cp, double Cn)
1364
+ {
1365
+ int l = prob_col->l;
1366
+ int w_size = prob_col->n;
1367
+ int j, s, newton_iter=0, iter=0;
1368
+ int max_newton_iter = 100;
1369
+ int max_iter = 1000;
1370
+ int max_num_linesearch = 20;
1371
+ int active_size;
1372
+ int QP_active_size;
1373
+
1374
+ double nu = 1e-12;
1375
+ double inner_eps = 1;
1376
+ double sigma = 0.01;
1377
+ double w_norm=0, w_norm_new;
1378
+ double z, G, H;
1379
+ double Gnorm1_init;
1380
+ double Gmax_old = INF;
1381
+ double Gmax_new, Gnorm1_new;
1382
+ double QP_Gmax_old = INF;
1383
+ double QP_Gmax_new, QP_Gnorm1_new;
1384
+ double delta, negsum_xTd, cond;
1385
+
1386
+ int *index = new int[w_size];
1387
+ schar *y = new schar[l];
1388
+ double *Hdiag = new double[w_size];
1389
+ double *Grad = new double[w_size];
1390
+ double *wpd = new double[w_size];
1391
+ double *xjneg_sum = new double[w_size];
1392
+ double *xTd = new double[l];
1393
+ double *exp_wTx = new double[l];
1394
+ double *exp_wTx_new = new double[l];
1395
+ double *tau = new double[l];
1396
+ double *D = new double[l];
1397
+ feature_node *x;
1398
+
1399
+ double C[3] = {Cn,0,Cp};
1400
+
1401
+ for(j=0; j<l; j++)
1402
+ {
1403
+ if(prob_col->y[j] > 0)
1404
+ y[j] = 1;
1405
+ else
1406
+ y[j] = -1;
1407
+
1408
+ // assume initial w is 0
1409
+ exp_wTx[j] = 1;
1410
+ tau[j] = C[GETI(j)]*0.5;
1411
+ D[j] = C[GETI(j)]*0.25;
1412
+ }
1413
+ for(j=0; j<w_size; j++)
1414
+ {
1415
+ w[j] = 0;
1416
+ wpd[j] = w[j];
1417
+ index[j] = j;
1418
+ xjneg_sum[j] = 0;
1419
+ x = prob_col->x[j];
1420
+ while(x->index != -1)
1421
+ {
1422
+ int ind = x->index-1;
1423
+ if(y[ind] == -1)
1424
+ xjneg_sum[j] += C[GETI(ind)]*x->value;
1425
+ x++;
1426
+ }
1427
+ }
1428
+
1429
+ while(newton_iter < max_newton_iter)
1430
+ {
1431
+ Gmax_new = 0;
1432
+ Gnorm1_new = 0;
1433
+ active_size = w_size;
1434
+
1435
+ for(s=0; s<active_size; s++)
1436
+ {
1437
+ j = index[s];
1438
+ Hdiag[j] = nu;
1439
+ Grad[j] = 0;
1440
+
1441
+ double tmp = 0;
1442
+ x = prob_col->x[j];
1443
+ while(x->index != -1)
1444
+ {
1445
+ int ind = x->index-1;
1446
+ Hdiag[j] += x->value*x->value*D[ind];
1447
+ tmp += x->value*tau[ind];
1448
+ x++;
1449
+ }
1450
+ Grad[j] = -tmp + xjneg_sum[j];
1451
+
1452
+ double Gp = Grad[j]+1;
1453
+ double Gn = Grad[j]-1;
1454
+ double violation = 0;
1455
+ if(w[j] == 0)
1456
+ {
1457
+ if(Gp < 0)
1458
+ violation = -Gp;
1459
+ else if(Gn > 0)
1460
+ violation = Gn;
1461
+ //outer-level shrinking
1462
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1463
+ {
1464
+ active_size--;
1465
+ swap(index[s], index[active_size]);
1466
+ s--;
1467
+ continue;
1468
+ }
1469
+ }
1470
+ else if(w[j] > 0)
1471
+ violation = fabs(Gp);
1472
+ else
1473
+ violation = fabs(Gn);
1474
+
1475
+ Gmax_new = max(Gmax_new, violation);
1476
+ Gnorm1_new += violation;
1477
+ }
1478
+
1479
+ if(newton_iter == 0)
1480
+ Gnorm1_init = Gnorm1_new;
1481
+
1482
+ if(Gnorm1_new <= eps*Gnorm1_init)
1483
+ break;
1484
+
1485
+ iter = 0;
1486
+ QP_Gmax_old = INF;
1487
+ QP_active_size = active_size;
1488
+
1489
+ for(int i=0; i<l; i++)
1490
+ xTd[i] = 0;
1491
+
1492
+ // optimize QP over wpd
1493
+ while(iter < max_iter)
1494
+ {
1495
+ QP_Gmax_new = 0;
1496
+ QP_Gnorm1_new = 0;
1497
+
1498
+ for(j=0; j<QP_active_size; j++)
1499
+ {
1500
+ int i = j+rand()%(QP_active_size-j);
1501
+ swap(index[i], index[j]);
1502
+ }
1503
+
1504
+ for(s=0; s<QP_active_size; s++)
1505
+ {
1506
+ j = index[s];
1507
+ H = Hdiag[j];
1508
+
1509
+ x = prob_col->x[j];
1510
+ G = Grad[j] + (wpd[j]-w[j])*nu;
1511
+ while(x->index != -1)
1512
+ {
1513
+ int ind = x->index-1;
1514
+ G += x->value*D[ind]*xTd[ind];
1515
+ x++;
1516
+ }
1517
+
1518
+ double Gp = G+1;
1519
+ double Gn = G-1;
1520
+ double violation = 0;
1521
+ if(wpd[j] == 0)
1522
+ {
1523
+ if(Gp < 0)
1524
+ violation = -Gp;
1525
+ else if(Gn > 0)
1526
+ violation = Gn;
1527
+ //inner-level shrinking
1528
+ else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1529
+ {
1530
+ QP_active_size--;
1531
+ swap(index[s], index[QP_active_size]);
1532
+ s--;
1533
+ continue;
1534
+ }
1535
+ }
1536
+ else if(wpd[j] > 0)
1537
+ violation = fabs(Gp);
1538
+ else
1539
+ violation = fabs(Gn);
1540
+
1541
+ QP_Gmax_new = max(QP_Gmax_new, violation);
1542
+ QP_Gnorm1_new += violation;
1543
+
1544
+ // obtain solution of one-variable problem
1545
+ if(Gp <= H*wpd[j])
1546
+ z = -Gp/H;
1547
+ else if(Gn >= H*wpd[j])
1548
+ z = -Gn/H;
1549
+ else
1550
+ z = -wpd[j];
1551
+
1552
+ if(fabs(z) < 1.0e-12)
1553
+ continue;
1554
+ z = min(max(z,-10.0),10.0);
1555
+
1556
+ wpd[j] += z;
1557
+
1558
+ x = prob_col->x[j];
1559
+ while(x->index != -1)
1560
+ {
1561
+ int ind = x->index-1;
1562
+ xTd[ind] += x->value*z;
1563
+ x++;
1564
+ }
1565
+ }
1566
+
1567
+ iter++;
1568
+
1569
+ if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1570
+ {
1571
+ //inner stopping
1572
+ if(QP_active_size == active_size)
1573
+ break;
1574
+ //active set reactivation
1575
+ else
1576
+ {
1577
+ QP_active_size = active_size;
1578
+ QP_Gmax_old = INF;
1579
+ continue;
1580
+ }
1581
+ }
1582
+
1583
+ QP_Gmax_old = QP_Gmax_new;
1584
+ }
1585
+
1586
+ if(iter >= max_iter)
1587
+ info("WARNING: reaching max number of inner iterations\n");
1588
+
1589
+ delta = 0;
1590
+ w_norm_new = 0;
1591
+ for(j=0; j<w_size; j++)
1592
+ {
1593
+ delta += Grad[j]*(wpd[j]-w[j]);
1594
+ if(wpd[j] != 0)
1595
+ w_norm_new += fabs(wpd[j]);
1596
+ }
1597
+ delta += (w_norm_new-w_norm);
1598
+
1599
+ negsum_xTd = 0;
1600
+ for(int i=0; i<l; i++)
1601
+ if(y[i] == -1)
1602
+ negsum_xTd += C[GETI(i)]*xTd[i];
1603
+
1604
+ int num_linesearch;
1605
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1606
+ {
1607
+ cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1608
+
1609
+ for(int i=0; i<l; i++)
1610
+ {
1611
+ double exp_xTd = exp(xTd[i]);
1612
+ exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1613
+ cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1614
+ }
1615
+
1616
+ if(cond <= 0)
1617
+ {
1618
+ w_norm = w_norm_new;
1619
+ for(j=0; j<w_size; j++)
1620
+ w[j] = wpd[j];
1621
+ for(int i=0; i<l; i++)
1622
+ {
1623
+ exp_wTx[i] = exp_wTx_new[i];
1624
+ double tau_tmp = 1/(1+exp_wTx[i]);
1625
+ tau[i] = C[GETI(i)]*tau_tmp;
1626
+ D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1627
+ }
1628
+ break;
1629
+ }
1630
+ else
1631
+ {
1632
+ w_norm_new = 0;
1633
+ for(j=0; j<w_size; j++)
1634
+ {
1635
+ wpd[j] = (w[j]+wpd[j])*0.5;
1636
+ if(wpd[j] != 0)
1637
+ w_norm_new += fabs(wpd[j]);
1638
+ }
1639
+ delta *= 0.5;
1640
+ negsum_xTd *= 0.5;
1641
+ for(int i=0; i<l; i++)
1642
+ xTd[i] *= 0.5;
1643
+ }
1644
+ }
1645
+
1646
+ // Recompute some info due to too many line search steps
1647
+ if(num_linesearch >= max_num_linesearch)
1648
+ {
1649
+ for(int i=0; i<l; i++)
1650
+ exp_wTx[i] = 0;
1651
+
1652
+ for(int i=0; i<w_size; i++)
1653
+ {
1654
+ if(w[i]==0) continue;
1655
+ x = prob_col->x[i];
1656
+ while(x->index != -1)
1657
+ {
1658
+ exp_wTx[x->index-1] += w[i]*x->value;
1659
+ x++;
1660
+ }
1661
+ }
1662
+
1663
+ for(int i=0; i<l; i++)
1664
+ exp_wTx[i] = exp(exp_wTx[i]);
1665
+ }
1666
+
1667
+ if(iter == 1)
1668
+ inner_eps *= 0.25;
1669
+
1670
+ newton_iter++;
1671
+ Gmax_old = Gmax_new;
1672
+
1673
+ info("iter %3d #CD cycles %d\n", newton_iter, iter);
1674
+ }
1675
+
1676
+ info("=========================\n");
1677
+ info("optimization finished, #iter = %d\n", newton_iter);
1678
+ if(newton_iter >= max_newton_iter)
1679
+ info("WARNING: reaching max number of iterations\n");
1680
+
1681
+ // calculate objective value
1682
+
1683
+ double v = 0;
1684
+ int nnz = 0;
1685
+ for(j=0; j<w_size; j++)
1686
+ if(w[j] != 0)
1687
+ {
1688
+ v += fabs(w[j]);
1689
+ nnz++;
1690
+ }
1691
+ for(j=0; j<l; j++)
1692
+ if(y[j] == 1)
1693
+ v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1694
+ else
1695
+ v += C[GETI(j)]*log(1+exp_wTx[j]);
1696
+
1697
+ info("Objective value = %lf\n", v);
1698
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1699
+
1700
+ delete [] index;
1701
+ delete [] y;
1702
+ delete [] Hdiag;
1703
+ delete [] Grad;
1704
+ delete [] wpd;
1705
+ delete [] xjneg_sum;
1706
+ delete [] xTd;
1707
+ delete [] exp_wTx;
1708
+ delete [] exp_wTx_new;
1709
+ delete [] tau;
1710
+ delete [] D;
1711
+ }
1712
+
1713
+ // transpose matrix X from row format to column format
1714
+ static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
1715
+ {
1716
+ int i;
1717
+ int l = prob->l;
1718
+ int n = prob->n;
1719
+ int nnz = 0;
1720
+ int *col_ptr = new int[n+1];
1721
+ feature_node *x_space;
1722
+ prob_col->l = l;
1723
+ prob_col->n = n;
1724
+ prob_col->y = new int[l];
1725
+ prob_col->x = new feature_node*[n];
1726
+
1727
+ for(i=0; i<l; i++)
1728
+ prob_col->y[i] = prob->y[i];
1729
+
1730
+ for(i=0; i<n+1; i++)
1731
+ col_ptr[i] = 0;
1732
+ for(i=0; i<l; i++)
1733
+ {
1734
+ feature_node *x = prob->x[i];
1735
+ while(x->index != -1)
1736
+ {
1737
+ nnz++;
1738
+ col_ptr[x->index]++;
1739
+ x++;
1740
+ }
1741
+ }
1742
+ for(i=1; i<n+1; i++)
1743
+ col_ptr[i] += col_ptr[i-1] + 1;
1744
+
1745
+ x_space = new feature_node[nnz+n];
1746
+ for(i=0; i<n; i++)
1747
+ prob_col->x[i] = &x_space[col_ptr[i]];
1748
+
1749
+ for(i=0; i<l; i++)
1750
+ {
1751
+ feature_node *x = prob->x[i];
1752
+ while(x->index != -1)
1753
+ {
1754
+ int ind = x->index-1;
1755
+ x_space[col_ptr[ind]].index = i+1; // starts from 1
1756
+ x_space[col_ptr[ind]].value = x->value;
1757
+ col_ptr[ind]++;
1758
+ x++;
1759
+ }
1760
+ }
1761
+ for(i=0; i<n; i++)
1762
+ x_space[col_ptr[i]].index = -1;
1763
+
1764
+ *x_space_ret = x_space;
1765
+
1766
+ delete [] col_ptr;
1767
+ }
1768
+
1769
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
1770
+ // perm, length l, must be allocated before calling this subroutine
1771
+ static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
1772
+ {
1773
+ int l = prob->l;
1774
+ int max_nr_class = 16;
1775
+ int nr_class = 0;
1776
+ int *label = Malloc(int,max_nr_class);
1777
+ int *count = Malloc(int,max_nr_class);
1778
+ int *data_label = Malloc(int,l);
1779
+ int i;
1780
+
1781
+ for(i=0;i<l;i++)
1782
+ {
1783
+ int this_label = prob->y[i];
1784
+ int j;
1785
+ for(j=0;j<nr_class;j++)
1786
+ {
1787
+ if(this_label == label[j])
1788
+ {
1789
+ ++count[j];
1790
+ break;
1791
+ }
1792
+ }
1793
+ data_label[i] = j;
1794
+ if(j == nr_class)
1795
+ {
1796
+ if(nr_class == max_nr_class)
1797
+ {
1798
+ max_nr_class *= 2;
1799
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
1800
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
1801
+ }
1802
+ label[nr_class] = this_label;
1803
+ count[nr_class] = 1;
1804
+ ++nr_class;
1805
+ }
1806
+ }
1807
+
1808
+ int *start = Malloc(int,nr_class);
1809
+ start[0] = 0;
1810
+ for(i=1;i<nr_class;i++)
1811
+ start[i] = start[i-1]+count[i-1];
1812
+ for(i=0;i<l;i++)
1813
+ {
1814
+ perm[start[data_label[i]]] = i;
1815
+ ++start[data_label[i]];
1816
+ }
1817
+ start[0] = 0;
1818
+ for(i=1;i<nr_class;i++)
1819
+ start[i] = start[i-1]+count[i-1];
1820
+
1821
+ *nr_class_ret = nr_class;
1822
+ *label_ret = label;
1823
+ *start_ret = start;
1824
+ *count_ret = count;
1825
+ free(data_label);
1826
+ }
1827
+
1828
+ static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
1829
+ {
1830
+ double eps=param->eps;
1831
+ int pos = 0;
1832
+ int neg = 0;
1833
+ for(int i=0;i<prob->l;i++)
1834
+ if(prob->y[i]==+1)
1835
+ pos++;
1836
+ neg = prob->l - pos;
1837
+
1838
+ function *fun_obj=NULL;
1839
+ switch(param->solver_type)
1840
+ {
1841
+ case L2R_LR:
1842
+ {
1843
+ fun_obj=new l2r_lr_fun(prob, Cp, Cn);
1844
+ TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1845
+ tron_obj.set_print_string(liblinear_print_string);
1846
+ tron_obj.tron(w);
1847
+ delete fun_obj;
1848
+ break;
1849
+ }
1850
+ case L2R_L2LOSS_SVC:
1851
+ {
1852
+ fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn);
1853
+ TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1854
+ tron_obj.set_print_string(liblinear_print_string);
1855
+ tron_obj.tron(w);
1856
+ delete fun_obj;
1857
+ break;
1858
+ }
1859
+ case L2R_L2LOSS_SVC_DUAL:
1860
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
1861
+ break;
1862
+ case L2R_L1LOSS_SVC_DUAL:
1863
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
1864
+ break;
1865
+ case L1R_L2LOSS_SVC:
1866
+ {
1867
+ problem prob_col;
1868
+ feature_node *x_space = NULL;
1869
+ transpose(prob, &x_space ,&prob_col);
1870
+ solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1871
+ delete [] prob_col.y;
1872
+ delete [] prob_col.x;
1873
+ delete [] x_space;
1874
+ break;
1875
+ }
1876
+ case L1R_LR:
1877
+ {
1878
+ problem prob_col;
1879
+ feature_node *x_space = NULL;
1880
+ transpose(prob, &x_space ,&prob_col);
1881
+ solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1882
+ delete [] prob_col.y;
1883
+ delete [] prob_col.x;
1884
+ delete [] x_space;
1885
+ break;
1886
+ }
1887
+ case L2R_LR_DUAL:
1888
+ solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
1889
+ break;
1890
+ default:
1891
+ fprintf(stderr, "Error: unknown solver_type\n");
1892
+ break;
1893
+ }
1894
+ }
1895
+
1896
+ //
1897
+ // Interface functions
1898
+ //
1899
+ model* train(const problem *prob, const parameter *param)
1900
+ {
1901
+ int i,j;
1902
+ int l = prob->l;
1903
+ int n = prob->n;
1904
+ int w_size = prob->n;
1905
+ model *model_ = Malloc(model,1);
1906
+
1907
+ if(prob->bias>=0)
1908
+ model_->nr_feature=n-1;
1909
+ else
1910
+ model_->nr_feature=n;
1911
+ model_->param = *param;
1912
+ model_->bias = prob->bias;
1913
+
1914
+ int nr_class;
1915
+ int *label = NULL;
1916
+ int *start = NULL;
1917
+ int *count = NULL;
1918
+ int *perm = Malloc(int,l);
1919
+
1920
+ // group training data of the same class
1921
+ group_classes(prob,&nr_class,&label,&start,&count,perm);
1922
+
1923
+ model_->nr_class=nr_class;
1924
+ model_->label = Malloc(int,nr_class);
1925
+ for(i=0;i<nr_class;i++)
1926
+ model_->label[i] = label[i];
1927
+
1928
+ // calculate weighted C
1929
+ double *weighted_C = Malloc(double, nr_class);
1930
+ for(i=0;i<nr_class;i++)
1931
+ weighted_C[i] = param->C;
1932
+ for(i=0;i<param->nr_weight;i++)
1933
+ {
1934
+ for(j=0;j<nr_class;j++)
1935
+ if(param->weight_label[i] == label[j])
1936
+ break;
1937
+ if(j == nr_class)
1938
+ fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
1939
+ else
1940
+ weighted_C[j] *= param->weight[i];
1941
+ }
1942
+
1943
+ // constructing the subproblem
1944
+ feature_node **x = Malloc(feature_node *,l);
1945
+ for(i=0;i<l;i++)
1946
+ x[i] = prob->x[perm[i]];
1947
+
1948
+ int k;
1949
+ problem sub_prob;
1950
+ sub_prob.l = l;
1951
+ sub_prob.n = n;
1952
+ sub_prob.x = Malloc(feature_node *,sub_prob.l);
1953
+ sub_prob.y = Malloc(int,sub_prob.l);
1954
+
1955
+ for(k=0; k<sub_prob.l; k++)
1956
+ sub_prob.x[k] = x[k];
1957
+
1958
+ // multi-class svm by Crammer and Singer
1959
+ if(param->solver_type == MCSVM_CS)
1960
+ {
1961
+ model_->w=Malloc(double, n*nr_class);
1962
+ for(i=0;i<nr_class;i++)
1963
+ for(j=start[i];j<start[i]+count[i];j++)
1964
+ sub_prob.y[j] = i;
1965
+ Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
1966
+ Solver.Solve(model_->w);
1967
+ }
1968
+ else
1969
+ {
1970
+ if(nr_class == 2)
1971
+ {
1972
+ model_->w=Malloc(double, w_size);
1973
+
1974
+ int e0 = start[0]+count[0];
1975
+ k=0;
1976
+ for(; k<e0; k++)
1977
+ sub_prob.y[k] = +1;
1978
+ for(; k<sub_prob.l; k++)
1979
+ sub_prob.y[k] = -1;
1980
+
1981
+ train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
1982
+ }
1983
+ else
1984
+ {
1985
+ model_->w=Malloc(double, w_size*nr_class);
1986
+ double *w=Malloc(double, w_size);
1987
+ for(i=0;i<nr_class;i++)
1988
+ {
1989
+ int si = start[i];
1990
+ int ei = si+count[i];
1991
+
1992
+ k=0;
1993
+ for(; k<si; k++)
1994
+ sub_prob.y[k] = -1;
1995
+ for(; k<ei; k++)
1996
+ sub_prob.y[k] = +1;
1997
+ for(; k<sub_prob.l; k++)
1998
+ sub_prob.y[k] = -1;
1999
+
2000
+ train_one(&sub_prob, param, w, weighted_C[i], param->C);
2001
+
2002
+ for(int j=0;j<w_size;j++)
2003
+ model_->w[j*nr_class+i] = w[j];
2004
+ }
2005
+ free(w);
2006
+ }
2007
+
2008
+ }
2009
+
2010
+ free(x);
2011
+ free(label);
2012
+ free(start);
2013
+ free(count);
2014
+ free(perm);
2015
+ free(sub_prob.x);
2016
+ free(sub_prob.y);
2017
+ free(weighted_C);
2018
+ return model_;
2019
+ }
2020
+
2021
+ void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
2022
+ {
2023
+ int i;
2024
+ int *fold_start = Malloc(int,nr_fold+1);
2025
+ int l = prob->l;
2026
+ int *perm = Malloc(int,l);
2027
+
2028
+ for(i=0;i<l;i++) perm[i]=i;
2029
+ for(i=0;i<l;i++)
2030
+ {
2031
+ int j = i+rand()%(l-i);
2032
+ swap(perm[i],perm[j]);
2033
+ }
2034
+ for(i=0;i<=nr_fold;i++)
2035
+ fold_start[i]=i*l/nr_fold;
2036
+
2037
+ for(i=0;i<nr_fold;i++)
2038
+ {
2039
+ int begin = fold_start[i];
2040
+ int end = fold_start[i+1];
2041
+ int j,k;
2042
+ struct problem subprob;
2043
+
2044
+ subprob.bias = prob->bias;
2045
+ subprob.n = prob->n;
2046
+ subprob.l = l-(end-begin);
2047
+ subprob.x = Malloc(struct feature_node*,subprob.l);
2048
+ subprob.y = Malloc(int,subprob.l);
2049
+
2050
+ k=0;
2051
+ for(j=0;j<begin;j++)
2052
+ {
2053
+ subprob.x[k] = prob->x[perm[j]];
2054
+ subprob.y[k] = prob->y[perm[j]];
2055
+ ++k;
2056
+ }
2057
+ for(j=end;j<l;j++)
2058
+ {
2059
+ subprob.x[k] = prob->x[perm[j]];
2060
+ subprob.y[k] = prob->y[perm[j]];
2061
+ ++k;
2062
+ }
2063
+ struct model *submodel = train(&subprob,param);
2064
+ for(j=begin;j<end;j++)
2065
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2066
+ free_and_destroy_model(&submodel);
2067
+ free(subprob.x);
2068
+ free(subprob.y);
2069
+ }
2070
+ free(fold_start);
2071
+ free(perm);
2072
+ }
2073
+
2074
+ int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2075
+ {
2076
+ int idx;
2077
+ int n;
2078
+ if(model_->bias>=0)
2079
+ n=model_->nr_feature+1;
2080
+ else
2081
+ n=model_->nr_feature;
2082
+ double *w=model_->w;
2083
+ int nr_class=model_->nr_class;
2084
+ int i;
2085
+ int nr_w;
2086
+ if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2087
+ nr_w = 1;
2088
+ else
2089
+ nr_w = nr_class;
2090
+
2091
+ const feature_node *lx=x;
2092
+ for(i=0;i<nr_w;i++)
2093
+ dec_values[i] = 0;
2094
+ for(; (idx=lx->index)!=-1; lx++)
2095
+ {
2096
+ // the dimension of testing data may exceed that of training
2097
+ if(idx<=n)
2098
+ for(i=0;i<nr_w;i++)
2099
+ dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2100
+ }
2101
+
2102
+ if(nr_class==2)
2103
+ return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2104
+ else
2105
+ {
2106
+ int dec_max_idx = 0;
2107
+ for(i=1;i<nr_class;i++)
2108
+ {
2109
+ if(dec_values[i] > dec_values[dec_max_idx])
2110
+ dec_max_idx = i;
2111
+ }
2112
+ return model_->label[dec_max_idx];
2113
+ }
2114
+ }
2115
+
2116
+ int predict(const model *model_, const feature_node *x)
2117
+ {
2118
+ double *dec_values = Malloc(double, model_->nr_class);
2119
+ int label=predict_values(model_, x, dec_values);
2120
+ free(dec_values);
2121
+ return label;
2122
+ }
2123
+
2124
+ int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2125
+ {
2126
+ if(check_probability_model(model_))
2127
+ {
2128
+ int i;
2129
+ int nr_class=model_->nr_class;
2130
+ int nr_w;
2131
+ if(nr_class==2)
2132
+ nr_w = 1;
2133
+ else
2134
+ nr_w = nr_class;
2135
+
2136
+ int label=predict_values(model_, x, prob_estimates);
2137
+ for(i=0;i<nr_w;i++)
2138
+ prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2139
+
2140
+ if(nr_class==2) // for binary classification
2141
+ prob_estimates[1]=1.-prob_estimates[0];
2142
+ else
2143
+ {
2144
+ double sum=0;
2145
+ for(i=0; i<nr_class; i++)
2146
+ sum+=prob_estimates[i];
2147
+
2148
+ for(i=0; i<nr_class; i++)
2149
+ prob_estimates[i]=prob_estimates[i]/sum;
2150
+ }
2151
+
2152
+ return label;
2153
+ }
2154
+ else
2155
+ return 0;
2156
+ }
2157
+
2158
+ static const char *solver_type_table[]=
2159
+ {
2160
+ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2161
+ "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL
2162
+ };
2163
+
2164
+ int save_model(const char *model_file_name, const struct model *model_)
2165
+ {
2166
+ int i;
2167
+ int nr_feature=model_->nr_feature;
2168
+ int n;
2169
+ const parameter& param = model_->param;
2170
+
2171
+ if(model_->bias>=0)
2172
+ n=nr_feature+1;
2173
+ else
2174
+ n=nr_feature;
2175
+ int w_size = n;
2176
+ FILE *fp = fopen(model_file_name,"w");
2177
+ if(fp==NULL) return -1;
2178
+
2179
+ int nr_w;
2180
+ if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2181
+ nr_w=1;
2182
+ else
2183
+ nr_w=model_->nr_class;
2184
+
2185
+ fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2186
+ fprintf(fp, "nr_class %d\n", model_->nr_class);
2187
+ fprintf(fp, "label");
2188
+ for(i=0; i<model_->nr_class; i++)
2189
+ fprintf(fp, " %d", model_->label[i]);
2190
+ fprintf(fp, "\n");
2191
+
2192
+ fprintf(fp, "nr_feature %d\n", nr_feature);
2193
+
2194
+ fprintf(fp, "bias %.16g\n", model_->bias);
2195
+
2196
+ fprintf(fp, "w\n");
2197
+ for(i=0; i<w_size; i++)
2198
+ {
2199
+ int j;
2200
+ for(j=0; j<nr_w; j++)
2201
+ fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2202
+ fprintf(fp, "\n");
2203
+ }
2204
+
2205
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2206
+ else return 0;
2207
+ }
2208
+
2209
+ struct model *load_model(const char *model_file_name)
2210
+ {
2211
+ FILE *fp = fopen(model_file_name,"r");
2212
+ if(fp==NULL) return NULL;
2213
+
2214
+ int i;
2215
+ int nr_feature;
2216
+ int n;
2217
+ int nr_class;
2218
+ double bias;
2219
+ model *model_ = Malloc(model,1);
2220
+ parameter& param = model_->param;
2221
+
2222
+ model_->label = NULL;
2223
+
2224
+ char cmd[81];
2225
+ while(1)
2226
+ {
2227
+ fscanf(fp,"%80s",cmd);
2228
+ if(strcmp(cmd,"solver_type")==0)
2229
+ {
2230
+ fscanf(fp,"%80s",cmd);
2231
+ int i;
2232
+ for(i=0;solver_type_table[i];i++)
2233
+ {
2234
+ if(strcmp(solver_type_table[i],cmd)==0)
2235
+ {
2236
+ param.solver_type=i;
2237
+ break;
2238
+ }
2239
+ }
2240
+ if(solver_type_table[i] == NULL)
2241
+ {
2242
+ fprintf(stderr,"unknown solver type.\n");
2243
+ free(model_->label);
2244
+ free(model_);
2245
+ return NULL;
2246
+ }
2247
+ }
2248
+ else if(strcmp(cmd,"nr_class")==0)
2249
+ {
2250
+ fscanf(fp,"%d",&nr_class);
2251
+ model_->nr_class=nr_class;
2252
+ }
2253
+ else if(strcmp(cmd,"nr_feature")==0)
2254
+ {
2255
+ fscanf(fp,"%d",&nr_feature);
2256
+ model_->nr_feature=nr_feature;
2257
+ }
2258
+ else if(strcmp(cmd,"bias")==0)
2259
+ {
2260
+ fscanf(fp,"%lf",&bias);
2261
+ model_->bias=bias;
2262
+ }
2263
+ else if(strcmp(cmd,"w")==0)
2264
+ {
2265
+ break;
2266
+ }
2267
+ else if(strcmp(cmd,"label")==0)
2268
+ {
2269
+ int nr_class = model_->nr_class;
2270
+ model_->label = Malloc(int,nr_class);
2271
+ for(int i=0;i<nr_class;i++)
2272
+ fscanf(fp,"%d",&model_->label[i]);
2273
+ }
2274
+ else
2275
+ {
2276
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2277
+ free(model_);
2278
+ return NULL;
2279
+ }
2280
+ }
2281
+
2282
+ nr_feature=model_->nr_feature;
2283
+ if(model_->bias>=0)
2284
+ n=nr_feature+1;
2285
+ else
2286
+ n=nr_feature;
2287
+ int w_size = n;
2288
+ int nr_w;
2289
+ if(nr_class==2 && param.solver_type != MCSVM_CS)
2290
+ nr_w = 1;
2291
+ else
2292
+ nr_w = nr_class;
2293
+
2294
+ model_->w=Malloc(double, w_size*nr_w);
2295
+ for(i=0; i<w_size; i++)
2296
+ {
2297
+ int j;
2298
+ for(j=0; j<nr_w; j++)
2299
+ fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
2300
+ fscanf(fp, "\n");
2301
+ }
2302
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2303
+
2304
+ return model_;
2305
+ }
2306
+
2307
+ int get_nr_feature(const model *model_)
2308
+ {
2309
+ return model_->nr_feature;
2310
+ }
2311
+
2312
+ int get_nr_class(const model *model_)
2313
+ {
2314
+ return model_->nr_class;
2315
+ }
2316
+
2317
+ void get_labels(const model *model_, int* label)
2318
+ {
2319
+ if (model_->label != NULL)
2320
+ for(int i=0;i<model_->nr_class;i++)
2321
+ label[i] = model_->label[i];
2322
+ }
2323
+
2324
+ void free_model_content(struct model *model_ptr)
2325
+ {
2326
+ if(model_ptr->w != NULL)
2327
+ free(model_ptr->w);
2328
+ if(model_ptr->label != NULL)
2329
+ free(model_ptr->label);
2330
+ }
2331
+
2332
+ void free_and_destroy_model(struct model **model_ptr_ptr)
2333
+ {
2334
+ struct model *model_ptr = *model_ptr_ptr;
2335
+ if(model_ptr != NULL)
2336
+ {
2337
+ free_model_content(model_ptr);
2338
+ free(model_ptr);
2339
+ }
2340
+ }
2341
+
2342
+ void destroy_param(parameter* param)
2343
+ {
2344
+ if(param->weight_label != NULL)
2345
+ free(param->weight_label);
2346
+ if(param->weight != NULL)
2347
+ free(param->weight);
2348
+ }
2349
+
2350
+ const char *check_parameter(const problem *prob, const parameter *param)
2351
+ {
2352
+ if(param->eps <= 0)
2353
+ return "eps <= 0";
2354
+
2355
+ if(param->C <= 0)
2356
+ return "C <= 0";
2357
+
2358
+ if(param->solver_type != L2R_LR
2359
+ && param->solver_type != L2R_L2LOSS_SVC_DUAL
2360
+ && param->solver_type != L2R_L2LOSS_SVC
2361
+ && param->solver_type != L2R_L1LOSS_SVC_DUAL
2362
+ && param->solver_type != MCSVM_CS
2363
+ && param->solver_type != L1R_L2LOSS_SVC
2364
+ && param->solver_type != L1R_LR
2365
+ && param->solver_type != L2R_LR_DUAL)
2366
+ return "unknown solver type";
2367
+
2368
+ return NULL;
2369
+ }
2370
+
2371
+ int check_probability_model(const struct model *model_)
2372
+ {
2373
+ return (model_->param.solver_type==L2R_LR ||
2374
+ model_->param.solver_type==L2R_LR_DUAL ||
2375
+ model_->param.solver_type==L1R_LR);
2376
+ }
2377
+
2378
+ void set_print_string_function(void (*print_func)(const char*))
2379
+ {
2380
+ if (print_func == NULL)
2381
+ liblinear_print_string = &print_string_stdout;
2382
+ else
2383
+ liblinear_print_string = print_func;
2384
+ }
2385
+