ruby_linear 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,49 @@
1
+ #include "blas.h"
2
+
3
+ int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
4
+ int *incy)
5
+ {
6
+ long int i, m, ix, iy, nn, iincx, iincy;
7
+ register double ssa;
8
+
9
+ /* constant times a vector plus a vector.
10
+ uses unrolled loop for increments equal to one.
11
+ jack dongarra, linpack, 3/11/78.
12
+ modified 12/3/93, array(1) declarations changed to array(*) */
13
+
14
+ /* Dereference inputs */
15
+ nn = *n;
16
+ ssa = *sa;
17
+ iincx = *incx;
18
+ iincy = *incy;
19
+
20
+ if( nn > 0 && ssa != 0.0 )
21
+ {
22
+ if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23
+ {
24
+ m = nn-3;
25
+ for (i = 0; i < m; i += 4)
26
+ {
27
+ sy[i] += ssa * sx[i];
28
+ sy[i+1] += ssa * sx[i+1];
29
+ sy[i+2] += ssa * sx[i+2];
30
+ sy[i+3] += ssa * sx[i+3];
31
+ }
32
+ for ( ; i < nn; ++i) /* clean-up loop */
33
+ sy[i] += ssa * sx[i];
34
+ }
35
+ else /* code for unequal increments or equal increments not equal to 1 */
36
+ {
37
+ ix = iincx >= 0 ? 0 : (1 - nn) * iincx;
38
+ iy = iincy >= 0 ? 0 : (1 - nn) * iincy;
39
+ for (i = 0; i < nn; i++)
40
+ {
41
+ sy[iy] += ssa * sx[ix];
42
+ ix += iincx;
43
+ iy += iincy;
44
+ }
45
+ }
46
+ }
47
+
48
+ return 0;
49
+ } /* daxpy_ */
@@ -0,0 +1,50 @@
1
+ #include "blas.h"
2
+
3
+ double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
4
+ {
5
+ long int i, m, nn, iincx, iincy;
6
+ double stemp;
7
+ long int ix, iy;
8
+
9
+ /* forms the dot product of two vectors.
10
+ uses unrolled loops for increments equal to one.
11
+ jack dongarra, linpack, 3/11/78.
12
+ modified 12/3/93, array(1) declarations changed to array(*) */
13
+
14
+ /* Dereference inputs */
15
+ nn = *n;
16
+ iincx = *incx;
17
+ iincy = *incy;
18
+
19
+ stemp = 0.0;
20
+ if (nn > 0)
21
+ {
22
+ if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23
+ {
24
+ m = nn-4;
25
+ for (i = 0; i < m; i += 5)
26
+ stemp += sx[i] * sy[i] + sx[i+1] * sy[i+1] + sx[i+2] * sy[i+2] +
27
+ sx[i+3] * sy[i+3] + sx[i+4] * sy[i+4];
28
+
29
+ for ( ; i < nn; i++) /* clean-up loop */
30
+ stemp += sx[i] * sy[i];
31
+ }
32
+ else /* code for unequal increments or equal increments not equal to 1 */
33
+ {
34
+ ix = 0;
35
+ iy = 0;
36
+ if (iincx < 0)
37
+ ix = (1 - nn) * iincx;
38
+ if (iincy < 0)
39
+ iy = (1 - nn) * iincy;
40
+ for (i = 0; i < nn; i++)
41
+ {
42
+ stemp += sx[ix] * sy[iy];
43
+ ix += iincx;
44
+ iy += iincy;
45
+ }
46
+ }
47
+ }
48
+
49
+ return stemp;
50
+ } /* ddot_ */
@@ -0,0 +1,62 @@
1
+ #include <math.h> /* Needed for fabs() and sqrt() */
2
+ #include "blas.h"
3
+
4
+ double dnrm2_(int *n, double *x, int *incx)
5
+ {
6
+ long int ix, nn, iincx;
7
+ double norm, scale, absxi, ssq, temp;
8
+
9
+ /* DNRM2 returns the euclidean norm of a vector via the function
10
+ name, so that
11
+
12
+ DNRM2 := sqrt( x'*x )
13
+
14
+ -- This version written on 25-October-1982.
15
+ Modified on 14-October-1993 to inline the call to SLASSQ.
16
+ Sven Hammarling, Nag Ltd. */
17
+
18
+ /* Dereference inputs */
19
+ nn = *n;
20
+ iincx = *incx;
21
+
22
+ if( nn > 0 && iincx > 0 )
23
+ {
24
+ if (nn == 1)
25
+ {
26
+ norm = fabs(x[0]);
27
+ }
28
+ else
29
+ {
30
+ scale = 0.0;
31
+ ssq = 1.0;
32
+
33
+ /* The following loop is equivalent to this call to the LAPACK
34
+ auxiliary routine: CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
35
+
36
+ for (ix=(nn-1)*iincx; ix>=0; ix-=iincx)
37
+ {
38
+ if (x[ix] != 0.0)
39
+ {
40
+ absxi = fabs(x[ix]);
41
+ if (scale < absxi)
42
+ {
43
+ temp = scale / absxi;
44
+ ssq = ssq * (temp * temp) + 1.0;
45
+ scale = absxi;
46
+ }
47
+ else
48
+ {
49
+ temp = absxi / scale;
50
+ ssq += temp * temp;
51
+ }
52
+ }
53
+ }
54
+ norm = scale * sqrt(ssq);
55
+ }
56
+ }
57
+ else
58
+ norm = 0.0;
59
+
60
+ return norm;
61
+
62
+ } /* dnrm2_ */
@@ -0,0 +1,44 @@
1
+ #include "blas.h"
2
+
3
+ int dscal_(int *n, double *sa, double *sx, int *incx)
4
+ {
5
+ long int i, m, nincx, nn, iincx;
6
+ double ssa;
7
+
8
+ /* scales a vector by a constant.
9
+ uses unrolled loops for increment equal to 1.
10
+ jack dongarra, linpack, 3/11/78.
11
+ modified 3/93 to return if incx .le. 0.
12
+ modified 12/3/93, array(1) declarations changed to array(*) */
13
+
14
+ /* Dereference inputs */
15
+ nn = *n;
16
+ iincx = *incx;
17
+ ssa = *sa;
18
+
19
+ if (nn > 0 && iincx > 0)
20
+ {
21
+ if (iincx == 1) /* code for increment equal to 1 */
22
+ {
23
+ m = nn-4;
24
+ for (i = 0; i < m; i += 5)
25
+ {
26
+ sx[i] = ssa * sx[i];
27
+ sx[i+1] = ssa * sx[i+1];
28
+ sx[i+2] = ssa * sx[i+2];
29
+ sx[i+3] = ssa * sx[i+3];
30
+ sx[i+4] = ssa * sx[i+4];
31
+ }
32
+ for ( ; i < nn; ++i) /* clean-up loop */
33
+ sx[i] = ssa * sx[i];
34
+ }
35
+ else /* code for increment not equal to 1 */
36
+ {
37
+ nincx = nn * iincx;
38
+ for (i = 0; i < nincx; i += iincx)
39
+ sx[i] = ssa * sx[i];
40
+ }
41
+ }
42
+
43
+ return 0;
44
+ } /* dscal_ */
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ CONFIG["LDSHARED"] = "g++ -shared"
3
+ $CFLAGS = "#{ENV['CFLAGS']} -Wall -O3"
4
+ create_makefile('rubylinear_native')
@@ -0,0 +1,2385 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <stdarg.h>
6
+ #include "linear.h"
7
+ #include "tron.h"
8
+ typedef signed char schar;
9
+ template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
10
+ #ifndef min
11
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
12
+ #endif
13
+ #ifndef max
14
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
15
+ #endif
16
+ template <class S, class T> static inline void clone(T*& dst, S* src, int n)
17
+ {
18
+ dst = new T[n];
19
+ memcpy((void *)dst,(void *)src,sizeof(T)*n);
20
+ }
21
+ #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
22
+ #define INF HUGE_VAL
23
+
24
+ static void print_string_stdout(const char *s)
25
+ {
26
+ fputs(s,stdout);
27
+ fflush(stdout);
28
+ }
29
+
30
+ static void (*liblinear_print_string) (const char *) = &print_string_stdout;
31
+
32
+ #if 1
33
+ int info_on = 0;
34
+ static void info(const char *fmt,...)
35
+ {
36
+ char buf[BUFSIZ];
37
+ va_list ap;
38
+ if (info_on==1) {
39
+ va_start(ap,fmt);
40
+ vsprintf(buf,fmt,ap);
41
+ va_end(ap);
42
+ (*liblinear_print_string)(buf);
43
+ }
44
+ }
45
+ #else
46
+ static void info(const char *fmt,...) {}
47
+ #endif
48
+
49
+ class l2r_lr_fun : public function
50
+ {
51
+ public:
52
+ l2r_lr_fun(const problem *prob, double Cp, double Cn);
53
+ ~l2r_lr_fun();
54
+
55
+ double fun(double *w);
56
+ void grad(double *w, double *g);
57
+ void Hv(double *s, double *Hs);
58
+
59
+ int get_nr_variable(void);
60
+
61
+ private:
62
+ void Xv(double *v, double *Xv);
63
+ void XTv(double *v, double *XTv);
64
+
65
+ double *C;
66
+ double *z;
67
+ double *D;
68
+ const problem *prob;
69
+ };
70
+
71
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn)
72
+ {
73
+ int i;
74
+ int l=prob->l;
75
+ int *y=prob->y;
76
+
77
+ this->prob = prob;
78
+
79
+ z = new double[l];
80
+ D = new double[l];
81
+ C = new double[l];
82
+
83
+ for (i=0; i<l; i++)
84
+ {
85
+ if (y[i] == 1)
86
+ C[i] = Cp;
87
+ else
88
+ C[i] = Cn;
89
+ }
90
+ }
91
+
92
+ l2r_lr_fun::~l2r_lr_fun()
93
+ {
94
+ delete[] z;
95
+ delete[] D;
96
+ delete[] C;
97
+ }
98
+
99
+
100
+ double l2r_lr_fun::fun(double *w)
101
+ {
102
+ int i;
103
+ double f=0;
104
+ int *y=prob->y;
105
+ int l=prob->l;
106
+ int w_size=get_nr_variable();
107
+
108
+ Xv(w, z);
109
+ for(i=0;i<l;i++)
110
+ {
111
+ double yz = y[i]*z[i];
112
+ if (yz >= 0)
113
+ f += C[i]*log(1 + exp(-yz));
114
+ else
115
+ f += C[i]*(-yz+log(1 + exp(yz)));
116
+ }
117
+ f = 2*f;
118
+ for(i=0;i<w_size;i++)
119
+ f += w[i]*w[i];
120
+ f /= 2.0;
121
+
122
+ return(f);
123
+ }
124
+
125
+ void l2r_lr_fun::grad(double *w, double *g)
126
+ {
127
+ int i;
128
+ int *y=prob->y;
129
+ int l=prob->l;
130
+ int w_size=get_nr_variable();
131
+
132
+ for(i=0;i<l;i++)
133
+ {
134
+ z[i] = 1/(1 + exp(-y[i]*z[i]));
135
+ D[i] = z[i]*(1-z[i]);
136
+ z[i] = C[i]*(z[i]-1)*y[i];
137
+ }
138
+ XTv(z, g);
139
+
140
+ for(i=0;i<w_size;i++)
141
+ g[i] = w[i] + g[i];
142
+ }
143
+
144
+ int l2r_lr_fun::get_nr_variable(void)
145
+ {
146
+ return prob->n;
147
+ }
148
+
149
+ void l2r_lr_fun::Hv(double *s, double *Hs)
150
+ {
151
+ int i;
152
+ int l=prob->l;
153
+ int w_size=get_nr_variable();
154
+ double *wa = new double[l];
155
+
156
+ Xv(s, wa);
157
+ for(i=0;i<l;i++)
158
+ wa[i] = C[i]*D[i]*wa[i];
159
+
160
+ XTv(wa, Hs);
161
+ for(i=0;i<w_size;i++)
162
+ Hs[i] = s[i] + Hs[i];
163
+ delete[] wa;
164
+ }
165
+
166
+ void l2r_lr_fun::Xv(double *v, double *Xv)
167
+ {
168
+ int i;
169
+ int l=prob->l;
170
+ feature_node **x=prob->x;
171
+
172
+ for(i=0;i<l;i++)
173
+ {
174
+ feature_node *s=x[i];
175
+ Xv[i]=0;
176
+ while(s->index!=-1)
177
+ {
178
+ Xv[i]+=v[s->index-1]*s->value;
179
+ s++;
180
+ }
181
+ }
182
+ }
183
+
184
+ void l2r_lr_fun::XTv(double *v, double *XTv)
185
+ {
186
+ int i;
187
+ int l=prob->l;
188
+ int w_size=get_nr_variable();
189
+ feature_node **x=prob->x;
190
+
191
+ for(i=0;i<w_size;i++)
192
+ XTv[i]=0;
193
+ for(i=0;i<l;i++)
194
+ {
195
+ feature_node *s=x[i];
196
+ while(s->index!=-1)
197
+ {
198
+ XTv[s->index-1]+=v[i]*s->value;
199
+ s++;
200
+ }
201
+ }
202
+ }
203
+
204
+ class l2r_l2_svc_fun : public function
205
+ {
206
+ public:
207
+ l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);
208
+ ~l2r_l2_svc_fun();
209
+
210
+ double fun(double *w);
211
+ void grad(double *w, double *g);
212
+ void Hv(double *s, double *Hs);
213
+
214
+ int get_nr_variable(void);
215
+
216
+ private:
217
+ void Xv(double *v, double *Xv);
218
+ void subXv(double *v, double *Xv);
219
+ void subXTv(double *v, double *XTv);
220
+
221
+ double *C;
222
+ double *z;
223
+ double *D;
224
+ int *I;
225
+ int sizeI;
226
+ const problem *prob;
227
+ };
228
+
229
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn)
230
+ {
231
+ int i;
232
+ int l=prob->l;
233
+ int *y=prob->y;
234
+
235
+ this->prob = prob;
236
+
237
+ z = new double[l];
238
+ D = new double[l];
239
+ C = new double[l];
240
+ I = new int[l];
241
+
242
+ for (i=0; i<l; i++)
243
+ {
244
+ if (y[i] == 1)
245
+ C[i] = Cp;
246
+ else
247
+ C[i] = Cn;
248
+ }
249
+ }
250
+
251
+ l2r_l2_svc_fun::~l2r_l2_svc_fun()
252
+ {
253
+ delete[] z;
254
+ delete[] D;
255
+ delete[] C;
256
+ delete[] I;
257
+ }
258
+
259
+ double l2r_l2_svc_fun::fun(double *w)
260
+ {
261
+ int i;
262
+ double f=0;
263
+ int *y=prob->y;
264
+ int l=prob->l;
265
+ int w_size=get_nr_variable();
266
+
267
+ Xv(w, z);
268
+ for(i=0;i<l;i++)
269
+ {
270
+ z[i] = y[i]*z[i];
271
+ double d = 1-z[i];
272
+ if (d > 0)
273
+ f += C[i]*d*d;
274
+ }
275
+ f = 2*f;
276
+ for(i=0;i<w_size;i++)
277
+ f += w[i]*w[i];
278
+ f /= 2.0;
279
+
280
+ return(f);
281
+ }
282
+
283
+ void l2r_l2_svc_fun::grad(double *w, double *g)
284
+ {
285
+ int i;
286
+ int *y=prob->y;
287
+ int l=prob->l;
288
+ int w_size=get_nr_variable();
289
+
290
+ sizeI = 0;
291
+ for (i=0;i<l;i++)
292
+ if (z[i] < 1)
293
+ {
294
+ z[sizeI] = C[i]*y[i]*(z[i]-1);
295
+ I[sizeI] = i;
296
+ sizeI++;
297
+ }
298
+ subXTv(z, g);
299
+
300
+ for(i=0;i<w_size;i++)
301
+ g[i] = w[i] + 2*g[i];
302
+ }
303
+
304
+ int l2r_l2_svc_fun::get_nr_variable(void)
305
+ {
306
+ return prob->n;
307
+ }
308
+
309
+ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
310
+ {
311
+ int i;
312
+ int l=prob->l;
313
+ int w_size=get_nr_variable();
314
+ double *wa = new double[l];
315
+
316
+ subXv(s, wa);
317
+ for(i=0;i<sizeI;i++)
318
+ wa[i] = C[I[i]]*wa[i];
319
+
320
+ subXTv(wa, Hs);
321
+ for(i=0;i<w_size;i++)
322
+ Hs[i] = s[i] + 2*Hs[i];
323
+ delete[] wa;
324
+ }
325
+
326
+ void l2r_l2_svc_fun::Xv(double *v, double *Xv)
327
+ {
328
+ int i;
329
+ int l=prob->l;
330
+ feature_node **x=prob->x;
331
+
332
+ for(i=0;i<l;i++)
333
+ {
334
+ feature_node *s=x[i];
335
+ Xv[i]=0;
336
+ while(s->index!=-1)
337
+ {
338
+ Xv[i]+=v[s->index-1]*s->value;
339
+ s++;
340
+ }
341
+ }
342
+ }
343
+
344
+ void l2r_l2_svc_fun::subXv(double *v, double *Xv)
345
+ {
346
+ int i;
347
+ feature_node **x=prob->x;
348
+
349
+ for(i=0;i<sizeI;i++)
350
+ {
351
+ feature_node *s=x[I[i]];
352
+ Xv[i]=0;
353
+ while(s->index!=-1)
354
+ {
355
+ Xv[i]+=v[s->index-1]*s->value;
356
+ s++;
357
+ }
358
+ }
359
+ }
360
+
361
+ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
362
+ {
363
+ int i;
364
+ int w_size=get_nr_variable();
365
+ feature_node **x=prob->x;
366
+
367
+ for(i=0;i<w_size;i++)
368
+ XTv[i]=0;
369
+ for(i=0;i<sizeI;i++)
370
+ {
371
+ feature_node *s=x[I[i]];
372
+ while(s->index!=-1)
373
+ {
374
+ XTv[s->index-1]+=v[i]*s->value;
375
+ s++;
376
+ }
377
+ }
378
+ }
379
+
380
+ // A coordinate descent algorithm for
381
+ // multi-class support vector machines by Crammer and Singer
382
+ //
383
+ // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
384
+ // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
385
+ //
386
+ // where e^m_i = 0 if y_i = m,
387
+ // e^m_i = 1 if y_i != m,
388
+ // C^m_i = C if m = y_i,
389
+ // C^m_i = 0 if m != y_i,
390
+ // and w_m(\alpha) = \sum_i \alpha^m_i x_i
391
+ //
392
+ // Given:
393
+ // x, y, C
394
+ // eps is the stopping tolerance
395
+ //
396
+ // solution will be put in w
397
+ //
398
+ // See Appendix of LIBLINEAR paper, Fan et al. (2008)
399
+
400
+ #define GETI(i) (prob->y[i])
401
+ // To support weights for instances, use GETI(i) (i)
402
+
403
+ class Solver_MCSVM_CS
404
+ {
405
+ public:
406
+ Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
407
+ ~Solver_MCSVM_CS();
408
+ void Solve(double *w);
409
+ private:
410
+ void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
411
+ bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
412
+ double *B, *C, *G;
413
+ int w_size, l;
414
+ int nr_class;
415
+ int max_iter;
416
+ double eps;
417
+ const problem *prob;
418
+ };
419
+
420
+ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
421
+ {
422
+ this->w_size = prob->n;
423
+ this->l = prob->l;
424
+ this->nr_class = nr_class;
425
+ this->eps = eps;
426
+ this->max_iter = max_iter;
427
+ this->prob = prob;
428
+ this->B = new double[nr_class];
429
+ this->G = new double[nr_class];
430
+ this->C = weighted_C;
431
+ }
432
+
433
+ Solver_MCSVM_CS::~Solver_MCSVM_CS()
434
+ {
435
+ delete[] B;
436
+ delete[] G;
437
+ }
438
+
439
+ int compare_double(const void *a, const void *b)
440
+ {
441
+ if(*(double *)a > *(double *)b)
442
+ return -1;
443
+ if(*(double *)a < *(double *)b)
444
+ return 1;
445
+ return 0;
446
+ }
447
+
448
+ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
449
+ {
450
+ int r;
451
+ double *D;
452
+
453
+ clone(D, B, active_i);
454
+ if(yi < active_i)
455
+ D[yi] += A_i*C_yi;
456
+ qsort(D, active_i, sizeof(double), compare_double);
457
+
458
+ double beta = D[0] - A_i*C_yi;
459
+ for(r=1;r<active_i && beta<r*D[r];r++)
460
+ beta += D[r];
461
+
462
+ beta /= r;
463
+ for(r=0;r<active_i;r++)
464
+ {
465
+ if(r == yi)
466
+ alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
467
+ else
468
+ alpha_new[r] = min((double)0, (beta - B[r])/A_i);
469
+ }
470
+ delete[] D;
471
+ }
472
+
473
+ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
474
+ {
475
+ double bound = 0;
476
+ if(m == yi)
477
+ bound = C[GETI(i)];
478
+ if(alpha_i == bound && G[m] < minG)
479
+ return true;
480
+ return false;
481
+ }
482
+
483
+ void Solver_MCSVM_CS::Solve(double *w)
484
+ {
485
+ int i, m, s;
486
+ int iter = 0;
487
+ double *alpha = new double[l*nr_class];
488
+ double *alpha_new = new double[nr_class];
489
+ int *index = new int[l];
490
+ double *QD = new double[l];
491
+ int *d_ind = new int[nr_class];
492
+ double *d_val = new double[nr_class];
493
+ int *alpha_index = new int[nr_class*l];
494
+ int *y_index = new int[l];
495
+ int active_size = l;
496
+ int *active_size_i = new int[l];
497
+ double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
498
+ bool start_from_all = true;
499
+ // initial
500
+ for(i=0;i<l*nr_class;i++)
501
+ alpha[i] = 0;
502
+ for(i=0;i<w_size*nr_class;i++)
503
+ w[i] = 0;
504
+ for(i=0;i<l;i++)
505
+ {
506
+ for(m=0;m<nr_class;m++)
507
+ alpha_index[i*nr_class+m] = m;
508
+ feature_node *xi = prob->x[i];
509
+ QD[i] = 0;
510
+ while(xi->index != -1)
511
+ {
512
+ QD[i] += (xi->value)*(xi->value);
513
+ xi++;
514
+ }
515
+ active_size_i[i] = nr_class;
516
+ y_index[i] = prob->y[i];
517
+ index[i] = i;
518
+ }
519
+
520
+ while(iter < max_iter)
521
+ {
522
+ double stopping = -INF;
523
+ for(i=0;i<active_size;i++)
524
+ {
525
+ int j = i+rand()%(active_size-i);
526
+ swap(index[i], index[j]);
527
+ }
528
+ for(s=0;s<active_size;s++)
529
+ {
530
+ i = index[s];
531
+ double Ai = QD[i];
532
+ double *alpha_i = &alpha[i*nr_class];
533
+ int *alpha_index_i = &alpha_index[i*nr_class];
534
+
535
+ if(Ai > 0)
536
+ {
537
+ for(m=0;m<active_size_i[i];m++)
538
+ G[m] = 1;
539
+ if(y_index[i] < active_size_i[i])
540
+ G[y_index[i]] = 0;
541
+
542
+ feature_node *xi = prob->x[i];
543
+ while(xi->index!= -1)
544
+ {
545
+ double *w_i = &w[(xi->index-1)*nr_class];
546
+ for(m=0;m<active_size_i[i];m++)
547
+ G[m] += w_i[alpha_index_i[m]]*(xi->value);
548
+ xi++;
549
+ }
550
+
551
+ double minG = INF;
552
+ double maxG = -INF;
553
+ for(m=0;m<active_size_i[i];m++)
554
+ {
555
+ if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
556
+ minG = G[m];
557
+ if(G[m] > maxG)
558
+ maxG = G[m];
559
+ }
560
+ if(y_index[i] < active_size_i[i])
561
+ if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
562
+ minG = G[y_index[i]];
563
+
564
+ for(m=0;m<active_size_i[i];m++)
565
+ {
566
+ if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
567
+ {
568
+ active_size_i[i]--;
569
+ while(active_size_i[i]>m)
570
+ {
571
+ if(!be_shrunk(i, active_size_i[i], y_index[i],
572
+ alpha_i[alpha_index_i[active_size_i[i]]], minG))
573
+ {
574
+ swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
575
+ swap(G[m], G[active_size_i[i]]);
576
+ if(y_index[i] == active_size_i[i])
577
+ y_index[i] = m;
578
+ else if(y_index[i] == m)
579
+ y_index[i] = active_size_i[i];
580
+ break;
581
+ }
582
+ active_size_i[i]--;
583
+ }
584
+ }
585
+ }
586
+
587
+ if(active_size_i[i] <= 1)
588
+ {
589
+ active_size--;
590
+ swap(index[s], index[active_size]);
591
+ s--;
592
+ continue;
593
+ }
594
+
595
+ if(maxG-minG <= 1e-12)
596
+ continue;
597
+ else
598
+ stopping = max(maxG - minG, stopping);
599
+
600
+ for(m=0;m<active_size_i[i];m++)
601
+ B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
602
+
603
+ solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
604
+ int nz_d = 0;
605
+ for(m=0;m<active_size_i[i];m++)
606
+ {
607
+ double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
608
+ alpha_i[alpha_index_i[m]] = alpha_new[m];
609
+ if(fabs(d) >= 1e-12)
610
+ {
611
+ d_ind[nz_d] = alpha_index_i[m];
612
+ d_val[nz_d] = d;
613
+ nz_d++;
614
+ }
615
+ }
616
+
617
+ xi = prob->x[i];
618
+ while(xi->index != -1)
619
+ {
620
+ double *w_i = &w[(xi->index-1)*nr_class];
621
+ for(m=0;m<nz_d;m++)
622
+ w_i[d_ind[m]] += d_val[m]*xi->value;
623
+ xi++;
624
+ }
625
+ }
626
+ }
627
+
628
+ iter++;
629
+ if(iter % 10 == 0)
630
+ {
631
+ info(".");
632
+ }
633
+
634
+ if(stopping < eps_shrink)
635
+ {
636
+ if(stopping < eps && start_from_all == true)
637
+ break;
638
+ else
639
+ {
640
+ active_size = l;
641
+ for(i=0;i<l;i++)
642
+ active_size_i[i] = nr_class;
643
+ info("*");
644
+ eps_shrink = max(eps_shrink/2, eps);
645
+ start_from_all = true;
646
+ }
647
+ }
648
+ else
649
+ start_from_all = false;
650
+ }
651
+
652
+ info("\noptimization finished, #iter = %d\n",iter);
653
+ if (iter >= max_iter)
654
+ info("\nWARNING: reaching max number of iterations\n");
655
+
656
+ // calculate objective value
657
+ double v = 0;
658
+ int nSV = 0;
659
+ for(i=0;i<w_size*nr_class;i++)
660
+ v += w[i]*w[i];
661
+ v = 0.5*v;
662
+ for(i=0;i<l*nr_class;i++)
663
+ {
664
+ v += alpha[i];
665
+ if(fabs(alpha[i]) > 0)
666
+ nSV++;
667
+ }
668
+ for(i=0;i<l;i++)
669
+ v -= alpha[i*nr_class+prob->y[i]];
670
+ info("Objective value = %lf\n",v);
671
+ info("nSV = %d\n",nSV);
672
+
673
+ delete [] alpha;
674
+ delete [] alpha_new;
675
+ delete [] index;
676
+ delete [] QD;
677
+ delete [] d_ind;
678
+ delete [] d_val;
679
+ delete [] alpha_index;
680
+ delete [] y_index;
681
+ delete [] active_size_i;
682
+ }
683
+
684
+ // A coordinate descent algorithm for
685
+ // L1-loss and L2-loss SVM dual problems
686
+ //
687
+ // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
688
+ // s.t. 0 <= alpha_i <= upper_bound_i,
689
+ //
690
+ // where Qij = yi yj xi^T xj and
691
+ // D is a diagonal matrix
692
+ //
693
+ // In L1-SVM case:
694
+ // upper_bound_i = Cp if y_i = 1
695
+ // upper_bound_i = Cn if y_i = -1
696
+ // D_ii = 0
697
+ // In L2-SVM case:
698
+ // upper_bound_i = INF
699
+ // D_ii = 1/(2*Cp) if y_i = 1
700
+ // D_ii = 1/(2*Cn) if y_i = -1
701
+ //
702
+ // Given:
703
+ // x, y, Cp, Cn
704
+ // eps is the stopping tolerance
705
+ //
706
+ // solution will be put in w
707
+ //
708
+ // See Algorithm 3 of Hsieh et al., ICML 2008
709
+
710
+ #undef GETI
711
+ #define GETI(i) (y[i]+1)
712
+ // To support weights for instances, use GETI(i) (i)
713
+
714
+ static void solve_l2r_l1l2_svc(
715
+ const problem *prob, double *w, double eps,
716
+ double Cp, double Cn, int solver_type)
717
+ {
718
+ int l = prob->l;
719
+ int w_size = prob->n;
720
+ int i, s, iter = 0;
721
+ double C, d, G;
722
+ double *QD = new double[l];
723
+ int max_iter = 1000;
724
+ int *index = new int[l];
725
+ double *alpha = new double[l];
726
+ schar *y = new schar[l];
727
+ int active_size = l;
728
+
729
+ // PG: projected gradient, for shrinking and stopping
730
+ double PG;
731
+ double PGmax_old = INF;
732
+ double PGmin_old = -INF;
733
+ double PGmax_new, PGmin_new;
734
+
735
+ // default solver_type: L2R_L2LOSS_SVC_DUAL
736
+ double diag[3] = {0.5/Cn, 0, 0.5/Cp};
737
+ double upper_bound[3] = {INF, 0, INF};
738
+ if(solver_type == L2R_L1LOSS_SVC_DUAL)
739
+ {
740
+ diag[0] = 0;
741
+ diag[2] = 0;
742
+ upper_bound[0] = Cn;
743
+ upper_bound[2] = Cp;
744
+ }
745
+
746
+ for(i=0; i<w_size; i++)
747
+ w[i] = 0;
748
+ for(i=0; i<l; i++)
749
+ {
750
+ alpha[i] = 0;
751
+ if(prob->y[i] > 0)
752
+ {
753
+ y[i] = +1;
754
+ }
755
+ else
756
+ {
757
+ y[i] = -1;
758
+ }
759
+ QD[i] = diag[GETI(i)];
760
+
761
+ feature_node *xi = prob->x[i];
762
+ while (xi->index != -1)
763
+ {
764
+ QD[i] += (xi->value)*(xi->value);
765
+ xi++;
766
+ }
767
+ index[i] = i;
768
+ }
769
+
770
+ while (iter < max_iter)
771
+ {
772
+ PGmax_new = -INF;
773
+ PGmin_new = INF;
774
+
775
+ for (i=0; i<active_size; i++)
776
+ {
777
+ int j = i+rand()%(active_size-i);
778
+ swap(index[i], index[j]);
779
+ }
780
+
781
+ for (s=0; s<active_size; s++)
782
+ {
783
+ i = index[s];
784
+ G = 0;
785
+ schar yi = y[i];
786
+
787
+ feature_node *xi = prob->x[i];
788
+ while(xi->index!= -1)
789
+ {
790
+ G += w[xi->index-1]*(xi->value);
791
+ xi++;
792
+ }
793
+ G = G*yi-1;
794
+
795
+ C = upper_bound[GETI(i)];
796
+ G += alpha[i]*diag[GETI(i)];
797
+
798
+ PG = 0;
799
+ if (alpha[i] == 0)
800
+ {
801
+ if (G > PGmax_old)
802
+ {
803
+ active_size--;
804
+ swap(index[s], index[active_size]);
805
+ s--;
806
+ continue;
807
+ }
808
+ else if (G < 0)
809
+ PG = G;
810
+ }
811
+ else if (alpha[i] == C)
812
+ {
813
+ if (G < PGmin_old)
814
+ {
815
+ active_size--;
816
+ swap(index[s], index[active_size]);
817
+ s--;
818
+ continue;
819
+ }
820
+ else if (G > 0)
821
+ PG = G;
822
+ }
823
+ else
824
+ PG = G;
825
+
826
+ PGmax_new = max(PGmax_new, PG);
827
+ PGmin_new = min(PGmin_new, PG);
828
+
829
+ if(fabs(PG) > 1.0e-12)
830
+ {
831
+ double alpha_old = alpha[i];
832
+ alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
833
+ d = (alpha[i] - alpha_old)*yi;
834
+ xi = prob->x[i];
835
+ while (xi->index != -1)
836
+ {
837
+ w[xi->index-1] += d*xi->value;
838
+ xi++;
839
+ }
840
+ }
841
+ }
842
+
843
+ iter++;
844
+ if(iter % 10 == 0)
845
+ info(".");
846
+
847
+ if(PGmax_new - PGmin_new <= eps)
848
+ {
849
+ if(active_size == l)
850
+ break;
851
+ else
852
+ {
853
+ active_size = l;
854
+ info("*");
855
+ PGmax_old = INF;
856
+ PGmin_old = -INF;
857
+ continue;
858
+ }
859
+ }
860
+ PGmax_old = PGmax_new;
861
+ PGmin_old = PGmin_new;
862
+ if (PGmax_old <= 0)
863
+ PGmax_old = INF;
864
+ if (PGmin_old >= 0)
865
+ PGmin_old = -INF;
866
+ }
867
+
868
+ info("\noptimization finished, #iter = %d\n",iter);
869
+ if (iter >= max_iter)
870
+ info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
871
+
872
+ // calculate objective value
873
+
874
+ double v = 0;
875
+ int nSV = 0;
876
+ for(i=0; i<w_size; i++)
877
+ v += w[i]*w[i];
878
+ for(i=0; i<l; i++)
879
+ {
880
+ v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
881
+ if(alpha[i] > 0)
882
+ ++nSV;
883
+ }
884
+ info("Objective value = %lf\n",v/2);
885
+ info("nSV = %d\n",nSV);
886
+
887
+ delete [] QD;
888
+ delete [] alpha;
889
+ delete [] y;
890
+ delete [] index;
891
+ }
892
+
893
+ // A coordinate descent algorithm for
894
+ // the dual of L2-regularized logistic regression problems
895
+ //
896
+ // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) ,
897
+ // s.t. 0 <= alpha_i <= upper_bound_i,
898
+ //
899
+ // where Qij = yi yj xi^T xj and
900
+ // upper_bound_i = Cp if y_i = 1
901
+ // upper_bound_i = Cn if y_i = -1
902
+ //
903
+ // Given:
904
+ // x, y, Cp, Cn
905
+ // eps is the stopping tolerance
906
+ //
907
+ // solution will be put in w
908
+ //
909
+ // See Algorithm 5 of Yu et al., MLJ 2010
910
+
911
+ #undef GETI
912
+ #define GETI(i) (y[i]+1)
913
+ // To support weights for instances, use GETI(i) (i)
914
+
915
+ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
916
+ {
917
+ int l = prob->l;
918
+ int w_size = prob->n;
919
+ int i, s, iter = 0;
920
+ double *xTx = new double[l];
921
+ int max_iter = 1000;
922
+ int *index = new int[l];
923
+ double *alpha = new double[2*l]; // store alpha and C - alpha
924
+ schar *y = new schar[l];
925
+ int max_inner_iter = 100; // for inner Newton
926
+ double innereps = 1e-2;
927
+ double innereps_min = min(1e-8, eps);
928
+ double upper_bound[3] = {Cn, 0, Cp};
929
+
930
+ for(i=0; i<w_size; i++)
931
+ w[i] = 0;
932
+ for(i=0; i<l; i++)
933
+ {
934
+ if(prob->y[i] > 0)
935
+ {
936
+ y[i] = +1;
937
+ }
938
+ else
939
+ {
940
+ y[i] = -1;
941
+ }
942
+ alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
943
+ alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
944
+
945
+ xTx[i] = 0;
946
+ feature_node *xi = prob->x[i];
947
+ while (xi->index != -1)
948
+ {
949
+ xTx[i] += (xi->value)*(xi->value);
950
+ w[xi->index-1] += y[i]*alpha[2*i]*xi->value;
951
+ xi++;
952
+ }
953
+ index[i] = i;
954
+ }
955
+
956
+ while (iter < max_iter)
957
+ {
958
+ for (i=0; i<l; i++)
959
+ {
960
+ int j = i+rand()%(l-i);
961
+ swap(index[i], index[j]);
962
+ }
963
+ int newton_iter = 0;
964
+ double Gmax = 0;
965
+ for (s=0; s<l; s++)
966
+ {
967
+ i = index[s];
968
+ schar yi = y[i];
969
+ double C = upper_bound[GETI(i)];
970
+ double ywTx = 0, xisq = xTx[i];
971
+ feature_node *xi = prob->x[i];
972
+ while (xi->index != -1)
973
+ {
974
+ ywTx += w[xi->index-1]*xi->value;
975
+ xi++;
976
+ }
977
+ ywTx *= y[i];
978
+ double a = xisq, b = ywTx;
979
+
980
+ // Decide to minimize g_1(z) or g_2(z)
981
+ int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
982
+ if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
983
+ {
984
+ ind1 = 2*i+1;
985
+ ind2 = 2*i;
986
+ sign = -1;
987
+ }
988
+
989
+ // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
990
+ double alpha_old = alpha[ind1];
991
+ double z = alpha_old;
992
+ if(C - z < 0.5 * C)
993
+ z = 0.1*z;
994
+ double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
995
+ Gmax = max(Gmax, fabs(gp));
996
+
997
+ // Newton method on the sub-problem
998
+ const double eta = 0.1; // xi in the paper
999
+ int inner_iter = 0;
1000
+ while (inner_iter <= max_inner_iter)
1001
+ {
1002
+ if(fabs(gp) < innereps)
1003
+ break;
1004
+ double gpp = a + C/(C-z)/z;
1005
+ double tmpz = z - gp/gpp;
1006
+ if(tmpz <= 0)
1007
+ z *= eta;
1008
+ else // tmpz in (0, C)
1009
+ z = tmpz;
1010
+ gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1011
+ newton_iter++;
1012
+ inner_iter++;
1013
+ }
1014
+
1015
+ if(inner_iter > 0) // update w
1016
+ {
1017
+ alpha[ind1] = z;
1018
+ alpha[ind2] = C-z;
1019
+ xi = prob->x[i];
1020
+ while (xi->index != -1)
1021
+ {
1022
+ w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
1023
+ xi++;
1024
+ }
1025
+ }
1026
+ }
1027
+
1028
+ iter++;
1029
+ if(iter % 10 == 0)
1030
+ info(".");
1031
+
1032
+ if(Gmax < eps)
1033
+ break;
1034
+
1035
+ if(newton_iter <= l/10)
1036
+ innereps = max(innereps_min, 0.1*innereps);
1037
+
1038
+ }
1039
+
1040
+ info("\noptimization finished, #iter = %d\n",iter);
1041
+ if (iter >= max_iter)
1042
+ info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1043
+
1044
+ // calculate objective value
1045
+
1046
+ double v = 0;
1047
+ for(i=0; i<w_size; i++)
1048
+ v += w[i] * w[i];
1049
+ v *= 0.5;
1050
+ for(i=0; i<l; i++)
1051
+ v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1052
+ - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1053
+ info("Objective value = %lf\n", v);
1054
+
1055
+ delete [] xTx;
1056
+ delete [] alpha;
1057
+ delete [] y;
1058
+ delete [] index;
1059
+ }
1060
+
1061
+ // A coordinate descent algorithm for
1062
+ // L1-regularized L2-loss support vector classification
1063
+ //
1064
+ // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1065
+ //
1066
+ // Given:
1067
+ // x, y, Cp, Cn
1068
+ // eps is the stopping tolerance
1069
+ //
1070
+ // solution will be put in w
1071
+ //
1072
+ // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1073
+
1074
+ #undef GETI
1075
+ #define GETI(i) (y[i]+1)
1076
+ // To support weights for instances, use GETI(i) (i)
1077
+
1078
+ static void solve_l1r_l2_svc(
1079
+ problem *prob_col, double *w, double eps,
1080
+ double Cp, double Cn)
1081
+ {
1082
+ int l = prob_col->l;
1083
+ int w_size = prob_col->n;
1084
+ int j, s, iter = 0;
1085
+ int max_iter = 1000;
1086
+ int active_size = w_size;
1087
+ int max_num_linesearch = 20;
1088
+
1089
+ double sigma = 0.01;
1090
+ double d, G_loss, G, H;
1091
+ double Gmax_old = INF;
1092
+ double Gmax_new, Gnorm1_new;
1093
+ double Gnorm1_init;
1094
+ double d_old, d_diff;
1095
+ double loss_old, loss_new;
1096
+ double appxcond, cond;
1097
+
1098
+ int *index = new int[w_size];
1099
+ schar *y = new schar[l];
1100
+ double *b = new double[l]; // b = 1-ywTx
1101
+ double *xj_sq = new double[w_size];
1102
+ feature_node *x;
1103
+
1104
+ double C[3] = {Cn,0,Cp};
1105
+
1106
+ for(j=0; j<l; j++)
1107
+ {
1108
+ b[j] = 1;
1109
+ if(prob_col->y[j] > 0)
1110
+ y[j] = 1;
1111
+ else
1112
+ y[j] = -1;
1113
+ }
1114
+ for(j=0; j<w_size; j++)
1115
+ {
1116
+ w[j] = 0;
1117
+ index[j] = j;
1118
+ xj_sq[j] = 0;
1119
+ x = prob_col->x[j];
1120
+ while(x->index != -1)
1121
+ {
1122
+ int ind = x->index-1;
1123
+ double val = x->value;
1124
+ x->value *= y[ind]; // x->value stores yi*xij
1125
+ xj_sq[j] += C[GETI(ind)]*val*val;
1126
+ x++;
1127
+ }
1128
+ }
1129
+
1130
+ while(iter < max_iter)
1131
+ {
1132
+ Gmax_new = 0;
1133
+ Gnorm1_new = 0;
1134
+
1135
+ for(j=0; j<active_size; j++)
1136
+ {
1137
+ int i = j+rand()%(active_size-j);
1138
+ swap(index[i], index[j]);
1139
+ }
1140
+
1141
+ for(s=0; s<active_size; s++)
1142
+ {
1143
+ j = index[s];
1144
+ G_loss = 0;
1145
+ H = 0;
1146
+
1147
+ x = prob_col->x[j];
1148
+ while(x->index != -1)
1149
+ {
1150
+ int ind = x->index-1;
1151
+ if(b[ind] > 0)
1152
+ {
1153
+ double val = x->value;
1154
+ double tmp = C[GETI(ind)]*val;
1155
+ G_loss -= tmp*b[ind];
1156
+ H += tmp*val;
1157
+ }
1158
+ x++;
1159
+ }
1160
+ G_loss *= 2;
1161
+
1162
+ G = G_loss;
1163
+ H *= 2;
1164
+ H = max(H, 1e-12);
1165
+
1166
+ double Gp = G+1;
1167
+ double Gn = G-1;
1168
+ double violation = 0;
1169
+ if(w[j] == 0)
1170
+ {
1171
+ if(Gp < 0)
1172
+ violation = -Gp;
1173
+ else if(Gn > 0)
1174
+ violation = Gn;
1175
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1176
+ {
1177
+ active_size--;
1178
+ swap(index[s], index[active_size]);
1179
+ s--;
1180
+ continue;
1181
+ }
1182
+ }
1183
+ else if(w[j] > 0)
1184
+ violation = fabs(Gp);
1185
+ else
1186
+ violation = fabs(Gn);
1187
+
1188
+ Gmax_new = max(Gmax_new, violation);
1189
+ Gnorm1_new += violation;
1190
+
1191
+ // obtain Newton direction d
1192
+ if(Gp <= H*w[j])
1193
+ d = -Gp/H;
1194
+ else if(Gn >= H*w[j])
1195
+ d = -Gn/H;
1196
+ else
1197
+ d = -w[j];
1198
+
1199
+ if(fabs(d) < 1.0e-12)
1200
+ continue;
1201
+
1202
+ double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1203
+ d_old = 0;
1204
+ int num_linesearch;
1205
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1206
+ {
1207
+ d_diff = d_old - d;
1208
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1209
+
1210
+ appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1211
+ if(appxcond <= 0)
1212
+ {
1213
+ x = prob_col->x[j];
1214
+ while(x->index != -1)
1215
+ {
1216
+ b[x->index-1] += d_diff*x->value;
1217
+ x++;
1218
+ }
1219
+ break;
1220
+ }
1221
+
1222
+ if(num_linesearch == 0)
1223
+ {
1224
+ loss_old = 0;
1225
+ loss_new = 0;
1226
+ x = prob_col->x[j];
1227
+ while(x->index != -1)
1228
+ {
1229
+ int ind = x->index-1;
1230
+ if(b[ind] > 0)
1231
+ loss_old += C[GETI(ind)]*b[ind]*b[ind];
1232
+ double b_new = b[ind] + d_diff*x->value;
1233
+ b[ind] = b_new;
1234
+ if(b_new > 0)
1235
+ loss_new += C[GETI(ind)]*b_new*b_new;
1236
+ x++;
1237
+ }
1238
+ }
1239
+ else
1240
+ {
1241
+ loss_new = 0;
1242
+ x = prob_col->x[j];
1243
+ while(x->index != -1)
1244
+ {
1245
+ int ind = x->index-1;
1246
+ double b_new = b[ind] + d_diff*x->value;
1247
+ b[ind] = b_new;
1248
+ if(b_new > 0)
1249
+ loss_new += C[GETI(ind)]*b_new*b_new;
1250
+ x++;
1251
+ }
1252
+ }
1253
+
1254
+ cond = cond + loss_new - loss_old;
1255
+ if(cond <= 0)
1256
+ break;
1257
+ else
1258
+ {
1259
+ d_old = d;
1260
+ d *= 0.5;
1261
+ delta *= 0.5;
1262
+ }
1263
+ }
1264
+
1265
+ w[j] += d;
1266
+
1267
+ // recompute b[] if line search takes too many steps
1268
+ if(num_linesearch >= max_num_linesearch)
1269
+ {
1270
+ info("#");
1271
+ for(int i=0; i<l; i++)
1272
+ b[i] = 1;
1273
+
1274
+ for(int i=0; i<w_size; i++)
1275
+ {
1276
+ if(w[i]==0) continue;
1277
+ x = prob_col->x[i];
1278
+ while(x->index != -1)
1279
+ {
1280
+ b[x->index-1] -= w[i]*x->value;
1281
+ x++;
1282
+ }
1283
+ }
1284
+ }
1285
+ }
1286
+
1287
+ if(iter == 0)
1288
+ Gnorm1_init = Gnorm1_new;
1289
+ iter++;
1290
+ if(iter % 10 == 0)
1291
+ info(".");
1292
+
1293
+ if(Gnorm1_new <= eps*Gnorm1_init)
1294
+ {
1295
+ if(active_size == w_size)
1296
+ break;
1297
+ else
1298
+ {
1299
+ active_size = w_size;
1300
+ info("*");
1301
+ Gmax_old = INF;
1302
+ continue;
1303
+ }
1304
+ }
1305
+
1306
+ Gmax_old = Gmax_new;
1307
+ }
1308
+
1309
+ info("\noptimization finished, #iter = %d\n", iter);
1310
+ if(iter >= max_iter)
1311
+ info("\nWARNING: reaching max number of iterations\n");
1312
+
1313
+ // calculate objective value
1314
+
1315
+ double v = 0;
1316
+ int nnz = 0;
1317
+ for(j=0; j<w_size; j++)
1318
+ {
1319
+ x = prob_col->x[j];
1320
+ while(x->index != -1)
1321
+ {
1322
+ x->value *= prob_col->y[x->index-1]; // restore x->value
1323
+ x++;
1324
+ }
1325
+ if(w[j] != 0)
1326
+ {
1327
+ v += fabs(w[j]);
1328
+ nnz++;
1329
+ }
1330
+ }
1331
+ for(j=0; j<l; j++)
1332
+ if(b[j] > 0)
1333
+ v += C[GETI(j)]*b[j]*b[j];
1334
+
1335
+ info("Objective value = %lf\n", v);
1336
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1337
+
1338
+ delete [] index;
1339
+ delete [] y;
1340
+ delete [] b;
1341
+ delete [] xj_sq;
1342
+ }
1343
+
1344
+ // A coordinate descent algorithm for
1345
+ // L1-regularized logistic regression problems
1346
+ //
1347
+ // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1348
+ //
1349
+ // Given:
1350
+ // x, y, Cp, Cn
1351
+ // eps is the stopping tolerance
1352
+ //
1353
+ // solution will be put in w
1354
+ //
1355
+ // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1356
+
1357
+ #undef GETI
1358
+ #define GETI(i) (y[i]+1)
1359
+ // To support weights for instances, use GETI(i) (i)
1360
+
1361
+ static void solve_l1r_lr(
1362
+ const problem *prob_col, double *w, double eps,
1363
+ double Cp, double Cn)
1364
+ {
1365
+ int l = prob_col->l;
1366
+ int w_size = prob_col->n;
1367
+ int j, s, newton_iter=0, iter=0;
1368
+ int max_newton_iter = 100;
1369
+ int max_iter = 1000;
1370
+ int max_num_linesearch = 20;
1371
+ int active_size;
1372
+ int QP_active_size;
1373
+
1374
+ double nu = 1e-12;
1375
+ double inner_eps = 1;
1376
+ double sigma = 0.01;
1377
+ double w_norm=0, w_norm_new;
1378
+ double z, G, H;
1379
+ double Gnorm1_init;
1380
+ double Gmax_old = INF;
1381
+ double Gmax_new, Gnorm1_new;
1382
+ double QP_Gmax_old = INF;
1383
+ double QP_Gmax_new, QP_Gnorm1_new;
1384
+ double delta, negsum_xTd, cond;
1385
+
1386
+ int *index = new int[w_size];
1387
+ schar *y = new schar[l];
1388
+ double *Hdiag = new double[w_size];
1389
+ double *Grad = new double[w_size];
1390
+ double *wpd = new double[w_size];
1391
+ double *xjneg_sum = new double[w_size];
1392
+ double *xTd = new double[l];
1393
+ double *exp_wTx = new double[l];
1394
+ double *exp_wTx_new = new double[l];
1395
+ double *tau = new double[l];
1396
+ double *D = new double[l];
1397
+ feature_node *x;
1398
+
1399
+ double C[3] = {Cn,0,Cp};
1400
+
1401
+ for(j=0; j<l; j++)
1402
+ {
1403
+ if(prob_col->y[j] > 0)
1404
+ y[j] = 1;
1405
+ else
1406
+ y[j] = -1;
1407
+
1408
+ // assume initial w is 0
1409
+ exp_wTx[j] = 1;
1410
+ tau[j] = C[GETI(j)]*0.5;
1411
+ D[j] = C[GETI(j)]*0.25;
1412
+ }
1413
+ for(j=0; j<w_size; j++)
1414
+ {
1415
+ w[j] = 0;
1416
+ wpd[j] = w[j];
1417
+ index[j] = j;
1418
+ xjneg_sum[j] = 0;
1419
+ x = prob_col->x[j];
1420
+ while(x->index != -1)
1421
+ {
1422
+ int ind = x->index-1;
1423
+ if(y[ind] == -1)
1424
+ xjneg_sum[j] += C[GETI(ind)]*x->value;
1425
+ x++;
1426
+ }
1427
+ }
1428
+
1429
+ while(newton_iter < max_newton_iter)
1430
+ {
1431
+ Gmax_new = 0;
1432
+ Gnorm1_new = 0;
1433
+ active_size = w_size;
1434
+
1435
+ for(s=0; s<active_size; s++)
1436
+ {
1437
+ j = index[s];
1438
+ Hdiag[j] = nu;
1439
+ Grad[j] = 0;
1440
+
1441
+ double tmp = 0;
1442
+ x = prob_col->x[j];
1443
+ while(x->index != -1)
1444
+ {
1445
+ int ind = x->index-1;
1446
+ Hdiag[j] += x->value*x->value*D[ind];
1447
+ tmp += x->value*tau[ind];
1448
+ x++;
1449
+ }
1450
+ Grad[j] = -tmp + xjneg_sum[j];
1451
+
1452
+ double Gp = Grad[j]+1;
1453
+ double Gn = Grad[j]-1;
1454
+ double violation = 0;
1455
+ if(w[j] == 0)
1456
+ {
1457
+ if(Gp < 0)
1458
+ violation = -Gp;
1459
+ else if(Gn > 0)
1460
+ violation = Gn;
1461
+ //outer-level shrinking
1462
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1463
+ {
1464
+ active_size--;
1465
+ swap(index[s], index[active_size]);
1466
+ s--;
1467
+ continue;
1468
+ }
1469
+ }
1470
+ else if(w[j] > 0)
1471
+ violation = fabs(Gp);
1472
+ else
1473
+ violation = fabs(Gn);
1474
+
1475
+ Gmax_new = max(Gmax_new, violation);
1476
+ Gnorm1_new += violation;
1477
+ }
1478
+
1479
+ if(newton_iter == 0)
1480
+ Gnorm1_init = Gnorm1_new;
1481
+
1482
+ if(Gnorm1_new <= eps*Gnorm1_init)
1483
+ break;
1484
+
1485
+ iter = 0;
1486
+ QP_Gmax_old = INF;
1487
+ QP_active_size = active_size;
1488
+
1489
+ for(int i=0; i<l; i++)
1490
+ xTd[i] = 0;
1491
+
1492
+ // optimize QP over wpd
1493
+ while(iter < max_iter)
1494
+ {
1495
+ QP_Gmax_new = 0;
1496
+ QP_Gnorm1_new = 0;
1497
+
1498
+ for(j=0; j<QP_active_size; j++)
1499
+ {
1500
+ int i = j+rand()%(QP_active_size-j);
1501
+ swap(index[i], index[j]);
1502
+ }
1503
+
1504
+ for(s=0; s<QP_active_size; s++)
1505
+ {
1506
+ j = index[s];
1507
+ H = Hdiag[j];
1508
+
1509
+ x = prob_col->x[j];
1510
+ G = Grad[j] + (wpd[j]-w[j])*nu;
1511
+ while(x->index != -1)
1512
+ {
1513
+ int ind = x->index-1;
1514
+ G += x->value*D[ind]*xTd[ind];
1515
+ x++;
1516
+ }
1517
+
1518
+ double Gp = G+1;
1519
+ double Gn = G-1;
1520
+ double violation = 0;
1521
+ if(wpd[j] == 0)
1522
+ {
1523
+ if(Gp < 0)
1524
+ violation = -Gp;
1525
+ else if(Gn > 0)
1526
+ violation = Gn;
1527
+ //inner-level shrinking
1528
+ else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1529
+ {
1530
+ QP_active_size--;
1531
+ swap(index[s], index[QP_active_size]);
1532
+ s--;
1533
+ continue;
1534
+ }
1535
+ }
1536
+ else if(wpd[j] > 0)
1537
+ violation = fabs(Gp);
1538
+ else
1539
+ violation = fabs(Gn);
1540
+
1541
+ QP_Gmax_new = max(QP_Gmax_new, violation);
1542
+ QP_Gnorm1_new += violation;
1543
+
1544
+ // obtain solution of one-variable problem
1545
+ if(Gp <= H*wpd[j])
1546
+ z = -Gp/H;
1547
+ else if(Gn >= H*wpd[j])
1548
+ z = -Gn/H;
1549
+ else
1550
+ z = -wpd[j];
1551
+
1552
+ if(fabs(z) < 1.0e-12)
1553
+ continue;
1554
+ z = min(max(z,-10.0),10.0);
1555
+
1556
+ wpd[j] += z;
1557
+
1558
+ x = prob_col->x[j];
1559
+ while(x->index != -1)
1560
+ {
1561
+ int ind = x->index-1;
1562
+ xTd[ind] += x->value*z;
1563
+ x++;
1564
+ }
1565
+ }
1566
+
1567
+ iter++;
1568
+
1569
+ if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1570
+ {
1571
+ //inner stopping
1572
+ if(QP_active_size == active_size)
1573
+ break;
1574
+ //active set reactivation
1575
+ else
1576
+ {
1577
+ QP_active_size = active_size;
1578
+ QP_Gmax_old = INF;
1579
+ continue;
1580
+ }
1581
+ }
1582
+
1583
+ QP_Gmax_old = QP_Gmax_new;
1584
+ }
1585
+
1586
+ if(iter >= max_iter)
1587
+ info("WARNING: reaching max number of inner iterations\n");
1588
+
1589
+ delta = 0;
1590
+ w_norm_new = 0;
1591
+ for(j=0; j<w_size; j++)
1592
+ {
1593
+ delta += Grad[j]*(wpd[j]-w[j]);
1594
+ if(wpd[j] != 0)
1595
+ w_norm_new += fabs(wpd[j]);
1596
+ }
1597
+ delta += (w_norm_new-w_norm);
1598
+
1599
+ negsum_xTd = 0;
1600
+ for(int i=0; i<l; i++)
1601
+ if(y[i] == -1)
1602
+ negsum_xTd += C[GETI(i)]*xTd[i];
1603
+
1604
+ int num_linesearch;
1605
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1606
+ {
1607
+ cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1608
+
1609
+ for(int i=0; i<l; i++)
1610
+ {
1611
+ double exp_xTd = exp(xTd[i]);
1612
+ exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1613
+ cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1614
+ }
1615
+
1616
+ if(cond <= 0)
1617
+ {
1618
+ w_norm = w_norm_new;
1619
+ for(j=0; j<w_size; j++)
1620
+ w[j] = wpd[j];
1621
+ for(int i=0; i<l; i++)
1622
+ {
1623
+ exp_wTx[i] = exp_wTx_new[i];
1624
+ double tau_tmp = 1/(1+exp_wTx[i]);
1625
+ tau[i] = C[GETI(i)]*tau_tmp;
1626
+ D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1627
+ }
1628
+ break;
1629
+ }
1630
+ else
1631
+ {
1632
+ w_norm_new = 0;
1633
+ for(j=0; j<w_size; j++)
1634
+ {
1635
+ wpd[j] = (w[j]+wpd[j])*0.5;
1636
+ if(wpd[j] != 0)
1637
+ w_norm_new += fabs(wpd[j]);
1638
+ }
1639
+ delta *= 0.5;
1640
+ negsum_xTd *= 0.5;
1641
+ for(int i=0; i<l; i++)
1642
+ xTd[i] *= 0.5;
1643
+ }
1644
+ }
1645
+
1646
+ // Recompute some info due to too many line search steps
1647
+ if(num_linesearch >= max_num_linesearch)
1648
+ {
1649
+ for(int i=0; i<l; i++)
1650
+ exp_wTx[i] = 0;
1651
+
1652
+ for(int i=0; i<w_size; i++)
1653
+ {
1654
+ if(w[i]==0) continue;
1655
+ x = prob_col->x[i];
1656
+ while(x->index != -1)
1657
+ {
1658
+ exp_wTx[x->index-1] += w[i]*x->value;
1659
+ x++;
1660
+ }
1661
+ }
1662
+
1663
+ for(int i=0; i<l; i++)
1664
+ exp_wTx[i] = exp(exp_wTx[i]);
1665
+ }
1666
+
1667
+ if(iter == 1)
1668
+ inner_eps *= 0.25;
1669
+
1670
+ newton_iter++;
1671
+ Gmax_old = Gmax_new;
1672
+
1673
+ info("iter %3d #CD cycles %d\n", newton_iter, iter);
1674
+ }
1675
+
1676
+ info("=========================\n");
1677
+ info("optimization finished, #iter = %d\n", newton_iter);
1678
+ if(newton_iter >= max_newton_iter)
1679
+ info("WARNING: reaching max number of iterations\n");
1680
+
1681
+ // calculate objective value
1682
+
1683
+ double v = 0;
1684
+ int nnz = 0;
1685
+ for(j=0; j<w_size; j++)
1686
+ if(w[j] != 0)
1687
+ {
1688
+ v += fabs(w[j]);
1689
+ nnz++;
1690
+ }
1691
+ for(j=0; j<l; j++)
1692
+ if(y[j] == 1)
1693
+ v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1694
+ else
1695
+ v += C[GETI(j)]*log(1+exp_wTx[j]);
1696
+
1697
+ info("Objective value = %lf\n", v);
1698
+ info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1699
+
1700
+ delete [] index;
1701
+ delete [] y;
1702
+ delete [] Hdiag;
1703
+ delete [] Grad;
1704
+ delete [] wpd;
1705
+ delete [] xjneg_sum;
1706
+ delete [] xTd;
1707
+ delete [] exp_wTx;
1708
+ delete [] exp_wTx_new;
1709
+ delete [] tau;
1710
+ delete [] D;
1711
+ }
1712
+
1713
+ // transpose matrix X from row format to column format
1714
+ static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
1715
+ {
1716
+ int i;
1717
+ int l = prob->l;
1718
+ int n = prob->n;
1719
+ int nnz = 0;
1720
+ int *col_ptr = new int[n+1];
1721
+ feature_node *x_space;
1722
+ prob_col->l = l;
1723
+ prob_col->n = n;
1724
+ prob_col->y = new int[l];
1725
+ prob_col->x = new feature_node*[n];
1726
+
1727
+ for(i=0; i<l; i++)
1728
+ prob_col->y[i] = prob->y[i];
1729
+
1730
+ for(i=0; i<n+1; i++)
1731
+ col_ptr[i] = 0;
1732
+ for(i=0; i<l; i++)
1733
+ {
1734
+ feature_node *x = prob->x[i];
1735
+ while(x->index != -1)
1736
+ {
1737
+ nnz++;
1738
+ col_ptr[x->index]++;
1739
+ x++;
1740
+ }
1741
+ }
1742
+ for(i=1; i<n+1; i++)
1743
+ col_ptr[i] += col_ptr[i-1] + 1;
1744
+
1745
+ x_space = new feature_node[nnz+n];
1746
+ for(i=0; i<n; i++)
1747
+ prob_col->x[i] = &x_space[col_ptr[i]];
1748
+
1749
+ for(i=0; i<l; i++)
1750
+ {
1751
+ feature_node *x = prob->x[i];
1752
+ while(x->index != -1)
1753
+ {
1754
+ int ind = x->index-1;
1755
+ x_space[col_ptr[ind]].index = i+1; // starts from 1
1756
+ x_space[col_ptr[ind]].value = x->value;
1757
+ col_ptr[ind]++;
1758
+ x++;
1759
+ }
1760
+ }
1761
+ for(i=0; i<n; i++)
1762
+ x_space[col_ptr[i]].index = -1;
1763
+
1764
+ *x_space_ret = x_space;
1765
+
1766
+ delete [] col_ptr;
1767
+ }
1768
+
1769
+ // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
1770
+ // perm, length l, must be allocated before calling this subroutine
1771
+ static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
1772
+ {
1773
+ int l = prob->l;
1774
+ int max_nr_class = 16;
1775
+ int nr_class = 0;
1776
+ int *label = Malloc(int,max_nr_class);
1777
+ int *count = Malloc(int,max_nr_class);
1778
+ int *data_label = Malloc(int,l);
1779
+ int i;
1780
+
1781
+ for(i=0;i<l;i++)
1782
+ {
1783
+ int this_label = prob->y[i];
1784
+ int j;
1785
+ for(j=0;j<nr_class;j++)
1786
+ {
1787
+ if(this_label == label[j])
1788
+ {
1789
+ ++count[j];
1790
+ break;
1791
+ }
1792
+ }
1793
+ data_label[i] = j;
1794
+ if(j == nr_class)
1795
+ {
1796
+ if(nr_class == max_nr_class)
1797
+ {
1798
+ max_nr_class *= 2;
1799
+ label = (int *)realloc(label,max_nr_class*sizeof(int));
1800
+ count = (int *)realloc(count,max_nr_class*sizeof(int));
1801
+ }
1802
+ label[nr_class] = this_label;
1803
+ count[nr_class] = 1;
1804
+ ++nr_class;
1805
+ }
1806
+ }
1807
+
1808
+ int *start = Malloc(int,nr_class);
1809
+ start[0] = 0;
1810
+ for(i=1;i<nr_class;i++)
1811
+ start[i] = start[i-1]+count[i-1];
1812
+ for(i=0;i<l;i++)
1813
+ {
1814
+ perm[start[data_label[i]]] = i;
1815
+ ++start[data_label[i]];
1816
+ }
1817
+ start[0] = 0;
1818
+ for(i=1;i<nr_class;i++)
1819
+ start[i] = start[i-1]+count[i-1];
1820
+
1821
+ *nr_class_ret = nr_class;
1822
+ *label_ret = label;
1823
+ *start_ret = start;
1824
+ *count_ret = count;
1825
+ free(data_label);
1826
+ }
1827
+
1828
+ static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
1829
+ {
1830
+ double eps=param->eps;
1831
+ int pos = 0;
1832
+ int neg = 0;
1833
+ for(int i=0;i<prob->l;i++)
1834
+ if(prob->y[i]==+1)
1835
+ pos++;
1836
+ neg = prob->l - pos;
1837
+
1838
+ function *fun_obj=NULL;
1839
+ switch(param->solver_type)
1840
+ {
1841
+ case L2R_LR:
1842
+ {
1843
+ fun_obj=new l2r_lr_fun(prob, Cp, Cn);
1844
+ TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1845
+ tron_obj.set_print_string(liblinear_print_string);
1846
+ tron_obj.tron(w);
1847
+ delete fun_obj;
1848
+ break;
1849
+ }
1850
+ case L2R_L2LOSS_SVC:
1851
+ {
1852
+ fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn);
1853
+ TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
1854
+ tron_obj.set_print_string(liblinear_print_string);
1855
+ tron_obj.tron(w);
1856
+ delete fun_obj;
1857
+ break;
1858
+ }
1859
+ case L2R_L2LOSS_SVC_DUAL:
1860
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
1861
+ break;
1862
+ case L2R_L1LOSS_SVC_DUAL:
1863
+ solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
1864
+ break;
1865
+ case L1R_L2LOSS_SVC:
1866
+ {
1867
+ problem prob_col;
1868
+ feature_node *x_space = NULL;
1869
+ transpose(prob, &x_space ,&prob_col);
1870
+ solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1871
+ delete [] prob_col.y;
1872
+ delete [] prob_col.x;
1873
+ delete [] x_space;
1874
+ break;
1875
+ }
1876
+ case L1R_LR:
1877
+ {
1878
+ problem prob_col;
1879
+ feature_node *x_space = NULL;
1880
+ transpose(prob, &x_space ,&prob_col);
1881
+ solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
1882
+ delete [] prob_col.y;
1883
+ delete [] prob_col.x;
1884
+ delete [] x_space;
1885
+ break;
1886
+ }
1887
+ case L2R_LR_DUAL:
1888
+ solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
1889
+ break;
1890
+ default:
1891
+ fprintf(stderr, "Error: unknown solver_type\n");
1892
+ break;
1893
+ }
1894
+ }
1895
+
1896
+ //
1897
+ // Interface functions
1898
+ //
1899
+ model* train(const problem *prob, const parameter *param)
1900
+ {
1901
+ int i,j;
1902
+ int l = prob->l;
1903
+ int n = prob->n;
1904
+ int w_size = prob->n;
1905
+ model *model_ = Malloc(model,1);
1906
+
1907
+ if(prob->bias>=0)
1908
+ model_->nr_feature=n-1;
1909
+ else
1910
+ model_->nr_feature=n;
1911
+ model_->param = *param;
1912
+ model_->bias = prob->bias;
1913
+
1914
+ int nr_class;
1915
+ int *label = NULL;
1916
+ int *start = NULL;
1917
+ int *count = NULL;
1918
+ int *perm = Malloc(int,l);
1919
+
1920
+ // group training data of the same class
1921
+ group_classes(prob,&nr_class,&label,&start,&count,perm);
1922
+
1923
+ model_->nr_class=nr_class;
1924
+ model_->label = Malloc(int,nr_class);
1925
+ for(i=0;i<nr_class;i++)
1926
+ model_->label[i] = label[i];
1927
+
1928
+ // calculate weighted C
1929
+ double *weighted_C = Malloc(double, nr_class);
1930
+ for(i=0;i<nr_class;i++)
1931
+ weighted_C[i] = param->C;
1932
+ for(i=0;i<param->nr_weight;i++)
1933
+ {
1934
+ for(j=0;j<nr_class;j++)
1935
+ if(param->weight_label[i] == label[j])
1936
+ break;
1937
+ if(j == nr_class)
1938
+ fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
1939
+ else
1940
+ weighted_C[j] *= param->weight[i];
1941
+ }
1942
+
1943
+ // constructing the subproblem
1944
+ feature_node **x = Malloc(feature_node *,l);
1945
+ for(i=0;i<l;i++)
1946
+ x[i] = prob->x[perm[i]];
1947
+
1948
+ int k;
1949
+ problem sub_prob;
1950
+ sub_prob.l = l;
1951
+ sub_prob.n = n;
1952
+ sub_prob.x = Malloc(feature_node *,sub_prob.l);
1953
+ sub_prob.y = Malloc(int,sub_prob.l);
1954
+
1955
+ for(k=0; k<sub_prob.l; k++)
1956
+ sub_prob.x[k] = x[k];
1957
+
1958
+ // multi-class svm by Crammer and Singer
1959
+ if(param->solver_type == MCSVM_CS)
1960
+ {
1961
+ model_->w=Malloc(double, n*nr_class);
1962
+ for(i=0;i<nr_class;i++)
1963
+ for(j=start[i];j<start[i]+count[i];j++)
1964
+ sub_prob.y[j] = i;
1965
+ Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
1966
+ Solver.Solve(model_->w);
1967
+ }
1968
+ else
1969
+ {
1970
+ if(nr_class == 2)
1971
+ {
1972
+ model_->w=Malloc(double, w_size);
1973
+
1974
+ int e0 = start[0]+count[0];
1975
+ k=0;
1976
+ for(; k<e0; k++)
1977
+ sub_prob.y[k] = +1;
1978
+ for(; k<sub_prob.l; k++)
1979
+ sub_prob.y[k] = -1;
1980
+
1981
+ train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
1982
+ }
1983
+ else
1984
+ {
1985
+ model_->w=Malloc(double, w_size*nr_class);
1986
+ double *w=Malloc(double, w_size);
1987
+ for(i=0;i<nr_class;i++)
1988
+ {
1989
+ int si = start[i];
1990
+ int ei = si+count[i];
1991
+
1992
+ k=0;
1993
+ for(; k<si; k++)
1994
+ sub_prob.y[k] = -1;
1995
+ for(; k<ei; k++)
1996
+ sub_prob.y[k] = +1;
1997
+ for(; k<sub_prob.l; k++)
1998
+ sub_prob.y[k] = -1;
1999
+
2000
+ train_one(&sub_prob, param, w, weighted_C[i], param->C);
2001
+
2002
+ for(int j=0;j<w_size;j++)
2003
+ model_->w[j*nr_class+i] = w[j];
2004
+ }
2005
+ free(w);
2006
+ }
2007
+
2008
+ }
2009
+
2010
+ free(x);
2011
+ free(label);
2012
+ free(start);
2013
+ free(count);
2014
+ free(perm);
2015
+ free(sub_prob.x);
2016
+ free(sub_prob.y);
2017
+ free(weighted_C);
2018
+ return model_;
2019
+ }
2020
+
2021
+ void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
2022
+ {
2023
+ int i;
2024
+ int *fold_start = Malloc(int,nr_fold+1);
2025
+ int l = prob->l;
2026
+ int *perm = Malloc(int,l);
2027
+
2028
+ for(i=0;i<l;i++) perm[i]=i;
2029
+ for(i=0;i<l;i++)
2030
+ {
2031
+ int j = i+rand()%(l-i);
2032
+ swap(perm[i],perm[j]);
2033
+ }
2034
+ for(i=0;i<=nr_fold;i++)
2035
+ fold_start[i]=i*l/nr_fold;
2036
+
2037
+ for(i=0;i<nr_fold;i++)
2038
+ {
2039
+ int begin = fold_start[i];
2040
+ int end = fold_start[i+1];
2041
+ int j,k;
2042
+ struct problem subprob;
2043
+
2044
+ subprob.bias = prob->bias;
2045
+ subprob.n = prob->n;
2046
+ subprob.l = l-(end-begin);
2047
+ subprob.x = Malloc(struct feature_node*,subprob.l);
2048
+ subprob.y = Malloc(int,subprob.l);
2049
+
2050
+ k=0;
2051
+ for(j=0;j<begin;j++)
2052
+ {
2053
+ subprob.x[k] = prob->x[perm[j]];
2054
+ subprob.y[k] = prob->y[perm[j]];
2055
+ ++k;
2056
+ }
2057
+ for(j=end;j<l;j++)
2058
+ {
2059
+ subprob.x[k] = prob->x[perm[j]];
2060
+ subprob.y[k] = prob->y[perm[j]];
2061
+ ++k;
2062
+ }
2063
+ struct model *submodel = train(&subprob,param);
2064
+ for(j=begin;j<end;j++)
2065
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2066
+ free_and_destroy_model(&submodel);
2067
+ free(subprob.x);
2068
+ free(subprob.y);
2069
+ }
2070
+ free(fold_start);
2071
+ free(perm);
2072
+ }
2073
+
2074
+ int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2075
+ {
2076
+ int idx;
2077
+ int n;
2078
+ if(model_->bias>=0)
2079
+ n=model_->nr_feature+1;
2080
+ else
2081
+ n=model_->nr_feature;
2082
+ double *w=model_->w;
2083
+ int nr_class=model_->nr_class;
2084
+ int i;
2085
+ int nr_w;
2086
+ if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2087
+ nr_w = 1;
2088
+ else
2089
+ nr_w = nr_class;
2090
+
2091
+ const feature_node *lx=x;
2092
+ for(i=0;i<nr_w;i++)
2093
+ dec_values[i] = 0;
2094
+ for(; (idx=lx->index)!=-1; lx++)
2095
+ {
2096
+ // the dimension of testing data may exceed that of training
2097
+ if(idx<=n)
2098
+ for(i=0;i<nr_w;i++)
2099
+ dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2100
+ }
2101
+
2102
+ if(nr_class==2)
2103
+ return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2104
+ else
2105
+ {
2106
+ int dec_max_idx = 0;
2107
+ for(i=1;i<nr_class;i++)
2108
+ {
2109
+ if(dec_values[i] > dec_values[dec_max_idx])
2110
+ dec_max_idx = i;
2111
+ }
2112
+ return model_->label[dec_max_idx];
2113
+ }
2114
+ }
2115
+
2116
+ int predict(const model *model_, const feature_node *x)
2117
+ {
2118
+ double *dec_values = Malloc(double, model_->nr_class);
2119
+ int label=predict_values(model_, x, dec_values);
2120
+ free(dec_values);
2121
+ return label;
2122
+ }
2123
+
2124
+ int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2125
+ {
2126
+ if(check_probability_model(model_))
2127
+ {
2128
+ int i;
2129
+ int nr_class=model_->nr_class;
2130
+ int nr_w;
2131
+ if(nr_class==2)
2132
+ nr_w = 1;
2133
+ else
2134
+ nr_w = nr_class;
2135
+
2136
+ int label=predict_values(model_, x, prob_estimates);
2137
+ for(i=0;i<nr_w;i++)
2138
+ prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2139
+
2140
+ if(nr_class==2) // for binary classification
2141
+ prob_estimates[1]=1.-prob_estimates[0];
2142
+ else
2143
+ {
2144
+ double sum=0;
2145
+ for(i=0; i<nr_class; i++)
2146
+ sum+=prob_estimates[i];
2147
+
2148
+ for(i=0; i<nr_class; i++)
2149
+ prob_estimates[i]=prob_estimates[i]/sum;
2150
+ }
2151
+
2152
+ return label;
2153
+ }
2154
+ else
2155
+ return 0;
2156
+ }
2157
+
2158
+ static const char *solver_type_table[]=
2159
+ {
2160
+ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2161
+ "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL
2162
+ };
2163
+
2164
+ int save_model(const char *model_file_name, const struct model *model_)
2165
+ {
2166
+ int i;
2167
+ int nr_feature=model_->nr_feature;
2168
+ int n;
2169
+ const parameter& param = model_->param;
2170
+
2171
+ if(model_->bias>=0)
2172
+ n=nr_feature+1;
2173
+ else
2174
+ n=nr_feature;
2175
+ int w_size = n;
2176
+ FILE *fp = fopen(model_file_name,"w");
2177
+ if(fp==NULL) return -1;
2178
+
2179
+ int nr_w;
2180
+ if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2181
+ nr_w=1;
2182
+ else
2183
+ nr_w=model_->nr_class;
2184
+
2185
+ fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2186
+ fprintf(fp, "nr_class %d\n", model_->nr_class);
2187
+ fprintf(fp, "label");
2188
+ for(i=0; i<model_->nr_class; i++)
2189
+ fprintf(fp, " %d", model_->label[i]);
2190
+ fprintf(fp, "\n");
2191
+
2192
+ fprintf(fp, "nr_feature %d\n", nr_feature);
2193
+
2194
+ fprintf(fp, "bias %.16g\n", model_->bias);
2195
+
2196
+ fprintf(fp, "w\n");
2197
+ for(i=0; i<w_size; i++)
2198
+ {
2199
+ int j;
2200
+ for(j=0; j<nr_w; j++)
2201
+ fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
2202
+ fprintf(fp, "\n");
2203
+ }
2204
+
2205
+ if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2206
+ else return 0;
2207
+ }
2208
+
2209
+ struct model *load_model(const char *model_file_name)
2210
+ {
2211
+ FILE *fp = fopen(model_file_name,"r");
2212
+ if(fp==NULL) return NULL;
2213
+
2214
+ int i;
2215
+ int nr_feature;
2216
+ int n;
2217
+ int nr_class;
2218
+ double bias;
2219
+ model *model_ = Malloc(model,1);
2220
+ parameter& param = model_->param;
2221
+
2222
+ model_->label = NULL;
2223
+
2224
+ char cmd[81];
2225
+ while(1)
2226
+ {
2227
+ fscanf(fp,"%80s",cmd);
2228
+ if(strcmp(cmd,"solver_type")==0)
2229
+ {
2230
+ fscanf(fp,"%80s",cmd);
2231
+ int i;
2232
+ for(i=0;solver_type_table[i];i++)
2233
+ {
2234
+ if(strcmp(solver_type_table[i],cmd)==0)
2235
+ {
2236
+ param.solver_type=i;
2237
+ break;
2238
+ }
2239
+ }
2240
+ if(solver_type_table[i] == NULL)
2241
+ {
2242
+ fprintf(stderr,"unknown solver type.\n");
2243
+ free(model_->label);
2244
+ free(model_);
2245
+ return NULL;
2246
+ }
2247
+ }
2248
+ else if(strcmp(cmd,"nr_class")==0)
2249
+ {
2250
+ fscanf(fp,"%d",&nr_class);
2251
+ model_->nr_class=nr_class;
2252
+ }
2253
+ else if(strcmp(cmd,"nr_feature")==0)
2254
+ {
2255
+ fscanf(fp,"%d",&nr_feature);
2256
+ model_->nr_feature=nr_feature;
2257
+ }
2258
+ else if(strcmp(cmd,"bias")==0)
2259
+ {
2260
+ fscanf(fp,"%lf",&bias);
2261
+ model_->bias=bias;
2262
+ }
2263
+ else if(strcmp(cmd,"w")==0)
2264
+ {
2265
+ break;
2266
+ }
2267
+ else if(strcmp(cmd,"label")==0)
2268
+ {
2269
+ int nr_class = model_->nr_class;
2270
+ model_->label = Malloc(int,nr_class);
2271
+ for(int i=0;i<nr_class;i++)
2272
+ fscanf(fp,"%d",&model_->label[i]);
2273
+ }
2274
+ else
2275
+ {
2276
+ fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2277
+ free(model_);
2278
+ return NULL;
2279
+ }
2280
+ }
2281
+
2282
+ nr_feature=model_->nr_feature;
2283
+ if(model_->bias>=0)
2284
+ n=nr_feature+1;
2285
+ else
2286
+ n=nr_feature;
2287
+ int w_size = n;
2288
+ int nr_w;
2289
+ if(nr_class==2 && param.solver_type != MCSVM_CS)
2290
+ nr_w = 1;
2291
+ else
2292
+ nr_w = nr_class;
2293
+
2294
+ model_->w=Malloc(double, w_size*nr_w);
2295
+ for(i=0; i<w_size; i++)
2296
+ {
2297
+ int j;
2298
+ for(j=0; j<nr_w; j++)
2299
+ fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
2300
+ fscanf(fp, "\n");
2301
+ }
2302
+ if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2303
+
2304
+ return model_;
2305
+ }
2306
+
2307
+ int get_nr_feature(const model *model_)
2308
+ {
2309
+ return model_->nr_feature;
2310
+ }
2311
+
2312
+ int get_nr_class(const model *model_)
2313
+ {
2314
+ return model_->nr_class;
2315
+ }
2316
+
2317
+ void get_labels(const model *model_, int* label)
2318
+ {
2319
+ if (model_->label != NULL)
2320
+ for(int i=0;i<model_->nr_class;i++)
2321
+ label[i] = model_->label[i];
2322
+ }
2323
+
2324
+ void free_model_content(struct model *model_ptr)
2325
+ {
2326
+ if(model_ptr->w != NULL)
2327
+ free(model_ptr->w);
2328
+ if(model_ptr->label != NULL)
2329
+ free(model_ptr->label);
2330
+ }
2331
+
2332
+ void free_and_destroy_model(struct model **model_ptr_ptr)
2333
+ {
2334
+ struct model *model_ptr = *model_ptr_ptr;
2335
+ if(model_ptr != NULL)
2336
+ {
2337
+ free_model_content(model_ptr);
2338
+ free(model_ptr);
2339
+ }
2340
+ }
2341
+
2342
+ void destroy_param(parameter* param)
2343
+ {
2344
+ if(param->weight_label != NULL)
2345
+ free(param->weight_label);
2346
+ if(param->weight != NULL)
2347
+ free(param->weight);
2348
+ }
2349
+
2350
+ const char *check_parameter(const problem *prob, const parameter *param)
2351
+ {
2352
+ if(param->eps <= 0)
2353
+ return "eps <= 0";
2354
+
2355
+ if(param->C <= 0)
2356
+ return "C <= 0";
2357
+
2358
+ if(param->solver_type != L2R_LR
2359
+ && param->solver_type != L2R_L2LOSS_SVC_DUAL
2360
+ && param->solver_type != L2R_L2LOSS_SVC
2361
+ && param->solver_type != L2R_L1LOSS_SVC_DUAL
2362
+ && param->solver_type != MCSVM_CS
2363
+ && param->solver_type != L1R_L2LOSS_SVC
2364
+ && param->solver_type != L1R_LR
2365
+ && param->solver_type != L2R_LR_DUAL)
2366
+ return "unknown solver type";
2367
+
2368
+ return NULL;
2369
+ }
2370
+
2371
+ int check_probability_model(const struct model *model_)
2372
+ {
2373
+ return (model_->param.solver_type==L2R_LR ||
2374
+ model_->param.solver_type==L2R_LR_DUAL ||
2375
+ model_->param.solver_type==L1R_LR);
2376
+ }
2377
+
2378
+ void set_print_string_function(void (*print_func)(const char*))
2379
+ {
2380
+ if (print_func == NULL)
2381
+ liblinear_print_string = &print_string_stdout;
2382
+ else
2383
+ liblinear_print_string = print_func;
2384
+ }
2385
+