numo-liblinear 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e97d55247e6ea6bb0e6c98c5ad93445adc0f37fa
4
- data.tar.gz: c164048e2db697e01269f48e816c4e6adac2ecfb
2
+ SHA256:
3
+ metadata.gz: 4e69ca89df0a86fdc4be7d1a3074a4722ead3997a6f7ba2fff84b48c6d1c36ea
4
+ data.tar.gz: 992d8f8606dbf272ac18c1f6502e923d69ce3242d6cb35a460b3856cace29671
5
5
  SHA512:
6
- metadata.gz: 4240c57da4b083bc433293ec2c9b0716893f2523ab4180d8cf41f3bdd6d769173abc45ba0ce08607615b4de34849949527b1a97ce43cfaa5dd4a1f21a0a0e67c
7
- data.tar.gz: ba830acf8df33fec1efffb35a041bf6f08bf65f368b4d03fa6fc086e5d8e6a046b99629e313126100cea57ae7b655410871f85e8e63a6ba1c6234e03a1d62a2f
6
+ metadata.gz: 46f453f5b9ee23640a7131d92d3f666806e182265654cec321ebae22c97ae6af06a5b35726e4ab0cfde96407d0fb828391a2c5578c3e27644c487df66432cce8
7
+ data.tar.gz: d1e0fc15d4227491823fe4ed8f631c491e6cd4c8fb9c078ccf230785e2881fa12134dbfca9e84069bd8d995c140523bfc0572cbaa38761b114d162015dea5be1
@@ -8,6 +8,7 @@ rvm:
8
8
  - '2.4'
9
9
  - '2.5'
10
10
  - '2.6'
11
+ - '2.7'
11
12
 
12
13
  before_install:
13
14
  - gem install bundler -v 2.0.2
@@ -1,3 +1,7 @@
1
+ # 1.1.0
2
+ - Update bundled LIBLINEAR version to 2.4.1.
3
+ - Support one-class SVM implemented on LIBLINEAR ver. 2.4.0.
4
+
1
5
  # 1.0.0
2
6
  ## Breaking change
3
7
  - For easy installation, Numo::LIBLINEAR bundles LIBLINEAR codes.
@@ -1,4 +1,4 @@
1
- Copyright (c) 2019 Atsushi Tatsuma
1
+ Copyright (c) 2019-2020 Atsushi Tatsuma
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![Build Status](https://travis-ci.org/yoshoku/numo-liblinear.svg?branch=master)](https://travis-ci.org/yoshoku/numo-liblinear)
4
4
  [![Gem Version](https://badge.fury.io/rb/numo-liblinear.svg)](https://badge.fury.io/rb/numo-liblinear)
5
5
  [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/numo-liblinear/blob/master/LICENSE.txt)
6
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/numo-liblinear/0.4.0)
6
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://yoshoku.github.io/numo-liblinear/doc/)
7
7
 
8
8
  Numo::Liblinear is a Ruby gem binding to the [LIBLINEAR](https://www.csie.ntu.edu.tw/~cjlin/liblinear/) library.
9
9
  LIBLINEAR is one of the famous libraries for large-scale regularized linear classification and regression.
@@ -29,7 +29,7 @@ end
29
29
  $LDFLAGS << ' -lstdc++ '
30
30
 
31
31
  $srcs = Dir.glob("#{$srcdir}/*.c").map { |path| File.basename(path) }
32
- $srcs.concat(%w[linear.cpp tron.cpp daxpy.c ddot.c dnrm2.c dscal.c])
32
+ $srcs.concat(%w[linear.cpp newton.cpp daxpy.c ddot.c dnrm2.c dscal.c])
33
33
 
34
34
  $INCFLAGS << " -I$(srcdir)/liblinear"
35
35
  $VPATH << "$(srcdir)/liblinear"
@@ -5,7 +5,7 @@
5
5
  #include <stdarg.h>
6
6
  #include <locale.h>
7
7
  #include "linear.h"
8
- #include "tron.h"
8
+ #include "newton.h"
9
9
  int liblinear_version = LIBLINEAR_VERSION;
10
10
  typedef signed char schar;
11
11
  template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
@@ -70,6 +70,28 @@ public:
70
70
  return (ret);
71
71
  }
72
72
 
73
+ static double sparse_dot(const feature_node *x1, const feature_node *x2)
74
+ {
75
+ double ret = 0;
76
+ while(x1->index != -1 && x2->index != -1)
77
+ {
78
+ if(x1->index == x2->index)
79
+ {
80
+ ret += x1->value * x2->value;
81
+ ++x1;
82
+ ++x2;
83
+ }
84
+ else
85
+ {
86
+ if(x1->index > x2->index)
87
+ ++x2;
88
+ else
89
+ ++x1;
90
+ }
91
+ }
92
+ return (ret);
93
+ }
94
+
73
95
  static void axpy(const double a, const feature_node *x, double *y)
74
96
  {
75
97
  while(x->index != -1)
@@ -80,70 +102,195 @@ public:
80
102
  }
81
103
  };
82
104
 
83
- class l2r_lr_fun: public function
105
+ // L2-regularized empirical risk minimization
106
+ // min_w w^Tw/2 + \sum C_i \xi(w^Tx_i), where \xi() is the loss
107
+
108
+ class l2r_erm_fun: public function
84
109
  {
85
110
  public:
86
- l2r_lr_fun(const problem *prob, double *C);
87
- ~l2r_lr_fun();
111
+ l2r_erm_fun(const problem *prob, const parameter *param, double *C);
112
+ ~l2r_erm_fun();
88
113
 
89
114
  double fun(double *w);
90
- void grad(double *w, double *g);
91
- void Hv(double *s, double *Hs);
92
-
115
+ double linesearch_and_update(double *w, double *d, double *f, double *g, double alpha);
93
116
  int get_nr_variable(void);
94
- void get_diag_preconditioner(double *M);
95
117
 
96
- private:
118
+ protected:
119
+ virtual double C_times_loss(int i, double wx_i) = 0;
97
120
  void Xv(double *v, double *Xv);
98
121
  void XTv(double *v, double *XTv);
99
122
 
100
123
  double *C;
101
- double *z;
102
- double *D;
103
124
  const problem *prob;
125
+ double *wx;
126
+ double *tmp; // a working array
127
+ double wTw;
128
+ int regularize_bias;
104
129
  };
105
130
 
106
- l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
131
+ l2r_erm_fun::l2r_erm_fun(const problem *prob, const parameter *param, double *C)
107
132
  {
108
133
  int l=prob->l;
109
134
 
110
135
  this->prob = prob;
111
136
 
112
- z = new double[l];
113
- D = new double[l];
137
+ wx = new double[l];
138
+ tmp = new double[l];
114
139
  this->C = C;
140
+ this->regularize_bias = param->regularize_bias;
115
141
  }
116
142
 
117
- l2r_lr_fun::~l2r_lr_fun()
143
+ l2r_erm_fun::~l2r_erm_fun()
118
144
  {
119
- delete[] z;
120
- delete[] D;
145
+ delete[] wx;
146
+ delete[] tmp;
121
147
  }
122
148
 
123
-
124
- double l2r_lr_fun::fun(double *w)
149
+ double l2r_erm_fun::fun(double *w)
125
150
  {
126
151
  int i;
127
152
  double f=0;
128
- double *y=prob->y;
129
153
  int l=prob->l;
130
154
  int w_size=get_nr_variable();
131
155
 
132
- Xv(w, z);
156
+ wTw = 0;
157
+ Xv(w, wx);
133
158
 
134
159
  for(i=0;i<w_size;i++)
135
- f += w[i]*w[i];
136
- f /= 2.0;
160
+ wTw += w[i]*w[i];
161
+ if(regularize_bias == 0)
162
+ wTw -= w[w_size-1]*w[w_size-1];
137
163
  for(i=0;i<l;i++)
164
+ f += C_times_loss(i, wx[i]);
165
+ f = f + 0.5 * wTw;
166
+
167
+ return(f);
168
+ }
169
+
170
+ int l2r_erm_fun::get_nr_variable(void)
171
+ {
172
+ return prob->n;
173
+ }
174
+
175
+ // On entry *f must be the function value of w
176
+ // On exit w is updated and *f is the new function value
177
+ double l2r_erm_fun::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
178
+ {
179
+ int i;
180
+ int l = prob->l;
181
+ double sTs = 0;
182
+ double wTs = 0;
183
+ double gTs = 0;
184
+ double eta = 0.01;
185
+ int w_size = get_nr_variable();
186
+ int max_num_linesearch = 20;
187
+ double fold = *f;
188
+ Xv(s, tmp);
189
+
190
+ for (i=0;i<w_size;i++)
191
+ {
192
+ sTs += s[i] * s[i];
193
+ wTs += s[i] * w[i];
194
+ gTs += s[i] * g[i];
195
+ }
196
+ if(regularize_bias == 0)
197
+ {
198
+ // bias not used in calculating (w + \alpha s)^T (w + \alpha s)
199
+ sTs -= s[w_size-1] * s[w_size-1];
200
+ wTs -= s[w_size-1] * w[w_size-1];
201
+ }
202
+
203
+ int num_linesearch = 0;
204
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
138
205
  {
139
- double yz = y[i]*z[i];
140
- if (yz >= 0)
141
- f += C[i]*log(1 + exp(-yz));
206
+ double loss = 0;
207
+ for(i=0;i<l;i++)
208
+ {
209
+ double inner_product = tmp[i] * alpha + wx[i];
210
+ loss += C_times_loss(i, inner_product);
211
+ }
212
+ *f = loss + (alpha * alpha * sTs + wTw) / 2.0 + alpha * wTs;
213
+ if (*f - fold <= eta * alpha * gTs)
214
+ {
215
+ for (i=0;i<l;i++)
216
+ wx[i] += alpha * tmp[i];
217
+ break;
218
+ }
142
219
  else
143
- f += C[i]*(-yz+log(1 + exp(yz)));
220
+ alpha *= 0.5;
144
221
  }
145
222
 
146
- return(f);
223
+ if (num_linesearch >= max_num_linesearch)
224
+ {
225
+ *f = fold;
226
+ return 0;
227
+ }
228
+ else
229
+ for (i=0;i<w_size;i++)
230
+ w[i] += alpha * s[i];
231
+
232
+ wTw += alpha * alpha * sTs + 2* alpha * wTs;
233
+ return alpha;
234
+ }
235
+
236
+ void l2r_erm_fun::Xv(double *v, double *Xv)
237
+ {
238
+ int i;
239
+ int l=prob->l;
240
+ feature_node **x=prob->x;
241
+
242
+ for(i=0;i<l;i++)
243
+ Xv[i]=sparse_operator::dot(v, x[i]);
244
+ }
245
+
246
+ void l2r_erm_fun::XTv(double *v, double *XTv)
247
+ {
248
+ int i;
249
+ int l=prob->l;
250
+ int w_size=get_nr_variable();
251
+ feature_node **x=prob->x;
252
+
253
+ for(i=0;i<w_size;i++)
254
+ XTv[i]=0;
255
+ for(i=0;i<l;i++)
256
+ sparse_operator::axpy(v[i], x[i], XTv);
257
+ }
258
+
259
+ class l2r_lr_fun: public l2r_erm_fun
260
+ {
261
+ public:
262
+ l2r_lr_fun(const problem *prob, const parameter *param, double *C);
263
+ ~l2r_lr_fun();
264
+
265
+ void grad(double *w, double *g);
266
+ void Hv(double *s, double *Hs);
267
+
268
+ void get_diag_preconditioner(double *M);
269
+
270
+ private:
271
+ double *D;
272
+ double C_times_loss(int i, double wx_i);
273
+ };
274
+
275
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, const parameter *param, double *C):
276
+ l2r_erm_fun(prob, param, C)
277
+ {
278
+ int l=prob->l;
279
+ D = new double[l];
280
+ }
281
+
282
+ l2r_lr_fun::~l2r_lr_fun()
283
+ {
284
+ delete[] D;
285
+ }
286
+
287
+ double l2r_lr_fun::C_times_loss(int i, double wx_i)
288
+ {
289
+ double ywx_i = wx_i * prob->y[i];
290
+ if (ywx_i >= 0)
291
+ return C[i]*log(1 + exp(-ywx_i));
292
+ else
293
+ return C[i]*(-ywx_i + log(1 + exp(ywx_i)));
147
294
  }
148
295
 
149
296
  void l2r_lr_fun::grad(double *w, double *g)
@@ -155,19 +302,16 @@ void l2r_lr_fun::grad(double *w, double *g)
155
302
 
156
303
  for(i=0;i<l;i++)
157
304
  {
158
- z[i] = 1/(1 + exp(-y[i]*z[i]));
159
- D[i] = z[i]*(1-z[i]);
160
- z[i] = C[i]*(z[i]-1)*y[i];
305
+ tmp[i] = 1/(1 + exp(-y[i]*wx[i]));
306
+ D[i] = tmp[i]*(1-tmp[i]);
307
+ tmp[i] = C[i]*(tmp[i]-1)*y[i];
161
308
  }
162
- XTv(z, g);
309
+ XTv(tmp, g);
163
310
 
164
311
  for(i=0;i<w_size;i++)
165
312
  g[i] = w[i] + g[i];
166
- }
167
-
168
- int l2r_lr_fun::get_nr_variable(void)
169
- {
170
- return prob->n;
313
+ if(regularize_bias == 0)
314
+ g[w_size-1] -= w[w_size-1];
171
315
  }
172
316
 
173
317
  void l2r_lr_fun::get_diag_preconditioner(double *M)
@@ -179,14 +323,16 @@ void l2r_lr_fun::get_diag_preconditioner(double *M)
179
323
 
180
324
  for (i=0; i<w_size; i++)
181
325
  M[i] = 1;
326
+ if(regularize_bias == 0)
327
+ M[w_size-1] = 0;
182
328
 
183
329
  for (i=0; i<l; i++)
184
330
  {
185
- feature_node *s = x[i];
186
- while (s->index!=-1)
331
+ feature_node *xi = x[i];
332
+ while (xi->index!=-1)
187
333
  {
188
- M[s->index-1] += s->value*s->value*C[i]*D[i];
189
- s++;
334
+ M[xi->index-1] += xi->value*xi->value*C[i]*D[i];
335
+ xi++;
190
336
  }
191
337
  }
192
338
  }
@@ -211,94 +357,49 @@ void l2r_lr_fun::Hv(double *s, double *Hs)
211
357
  }
212
358
  for(i=0;i<w_size;i++)
213
359
  Hs[i] = s[i] + Hs[i];
360
+ if(regularize_bias == 0)
361
+ Hs[w_size-1] -= s[w_size-1];
214
362
  }
215
363
 
216
- void l2r_lr_fun::Xv(double *v, double *Xv)
217
- {
218
- int i;
219
- int l=prob->l;
220
- feature_node **x=prob->x;
221
-
222
- for(i=0;i<l;i++)
223
- Xv[i]=sparse_operator::dot(v, x[i]);
224
- }
225
-
226
- void l2r_lr_fun::XTv(double *v, double *XTv)
227
- {
228
- int i;
229
- int l=prob->l;
230
- int w_size=get_nr_variable();
231
- feature_node **x=prob->x;
232
-
233
- for(i=0;i<w_size;i++)
234
- XTv[i]=0;
235
- for(i=0;i<l;i++)
236
- sparse_operator::axpy(v[i], x[i], XTv);
237
- }
238
-
239
- class l2r_l2_svc_fun: public function
364
+ class l2r_l2_svc_fun: public l2r_erm_fun
240
365
  {
241
366
  public:
242
- l2r_l2_svc_fun(const problem *prob, double *C);
367
+ l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C);
243
368
  ~l2r_l2_svc_fun();
244
369
 
245
- double fun(double *w);
246
370
  void grad(double *w, double *g);
247
371
  void Hv(double *s, double *Hs);
248
372
 
249
- int get_nr_variable(void);
250
373
  void get_diag_preconditioner(double *M);
251
374
 
252
375
  protected:
253
- void Xv(double *v, double *Xv);
254
376
  void subXTv(double *v, double *XTv);
255
377
 
256
- double *C;
257
- double *z;
258
378
  int *I;
259
379
  int sizeI;
260
- const problem *prob;
380
+
381
+ private:
382
+ double C_times_loss(int i, double wx_i);
261
383
  };
262
384
 
263
- l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
385
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C):
386
+ l2r_erm_fun(prob, param, C)
264
387
  {
265
- int l=prob->l;
266
-
267
- this->prob = prob;
268
-
269
- z = new double[l];
270
- I = new int[l];
271
- this->C = C;
388
+ I = new int[prob->l];
272
389
  }
273
390
 
274
391
  l2r_l2_svc_fun::~l2r_l2_svc_fun()
275
392
  {
276
- delete[] z;
277
393
  delete[] I;
278
394
  }
279
395
 
280
- double l2r_l2_svc_fun::fun(double *w)
396
+ double l2r_l2_svc_fun::C_times_loss(int i, double wx_i)
281
397
  {
282
- int i;
283
- double f=0;
284
- double *y=prob->y;
285
- int l=prob->l;
286
- int w_size=get_nr_variable();
287
-
288
- Xv(w, z);
289
-
290
- for(i=0;i<w_size;i++)
291
- f += w[i]*w[i];
292
- f /= 2.0;
293
- for(i=0;i<l;i++)
294
- {
295
- z[i] = y[i]*z[i];
296
- double d = 1-z[i];
297
- if (d > 0)
298
- f += C[i]*d*d;
299
- }
300
-
301
- return(f);
398
+ double d = 1 - prob->y[i] * wx_i;
399
+ if (d > 0)
400
+ return C[i] * d * d;
401
+ else
402
+ return 0;
302
403
  }
303
404
 
304
405
  void l2r_l2_svc_fun::grad(double *w, double *g)
@@ -310,21 +411,21 @@ void l2r_l2_svc_fun::grad(double *w, double *g)
310
411
 
311
412
  sizeI = 0;
312
413
  for (i=0;i<l;i++)
313
- if (z[i] < 1)
414
+ {
415
+ tmp[i] = wx[i] * y[i];
416
+ if (tmp[i] < 1)
314
417
  {
315
- z[sizeI] = C[i]*y[i]*(z[i]-1);
418
+ tmp[sizeI] = C[i]*y[i]*(tmp[i]-1);
316
419
  I[sizeI] = i;
317
420
  sizeI++;
318
421
  }
319
- subXTv(z, g);
422
+ }
423
+ subXTv(tmp, g);
320
424
 
321
425
  for(i=0;i<w_size;i++)
322
426
  g[i] = w[i] + 2*g[i];
323
- }
324
-
325
- int l2r_l2_svc_fun::get_nr_variable(void)
326
- {
327
- return prob->n;
427
+ if(regularize_bias == 0)
428
+ g[w_size-1] -= w[w_size-1];
328
429
  }
329
430
 
330
431
  void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
@@ -335,15 +436,17 @@ void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
335
436
 
336
437
  for (i=0; i<w_size; i++)
337
438
  M[i] = 1;
439
+ if(regularize_bias == 0)
440
+ M[w_size-1] = 0;
338
441
 
339
442
  for (i=0; i<sizeI; i++)
340
443
  {
341
444
  int idx = I[i];
342
- feature_node *s = x[idx];
343
- while (s->index!=-1)
445
+ feature_node *xi = x[idx];
446
+ while (xi->index!=-1)
344
447
  {
345
- M[s->index-1] += s->value*s->value*C[idx]*2;
346
- s++;
448
+ M[xi->index-1] += xi->value*xi->value*C[idx]*2;
449
+ xi++;
347
450
  }
348
451
  }
349
452
  }
@@ -367,16 +470,8 @@ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
367
470
  }
368
471
  for(i=0;i<w_size;i++)
369
472
  Hs[i] = s[i] + 2*Hs[i];
370
- }
371
-
372
- void l2r_l2_svc_fun::Xv(double *v, double *Xv)
373
- {
374
- int i;
375
- int l=prob->l;
376
- feature_node **x=prob->x;
377
-
378
- for(i=0;i<l;i++)
379
- Xv[i]=sparse_operator::dot(v, x[i]);
473
+ if(regularize_bias == 0)
474
+ Hs[w_size-1] -= s[w_size-1];
380
475
  }
381
476
 
382
477
  void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
@@ -394,45 +489,30 @@ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
394
489
  class l2r_l2_svr_fun: public l2r_l2_svc_fun
395
490
  {
396
491
  public:
397
- l2r_l2_svr_fun(const problem *prob, double *C, double p);
492
+ l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C);
398
493
 
399
- double fun(double *w);
400
494
  void grad(double *w, double *g);
401
495
 
402
496
  private:
497
+ double C_times_loss(int i, double wx_i);
403
498
  double p;
404
499
  };
405
500
 
406
- l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
407
- l2r_l2_svc_fun(prob, C)
501
+ l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C):
502
+ l2r_l2_svc_fun(prob, param, C)
408
503
  {
409
- this->p = p;
504
+ this->p = param->p;
505
+ this->regularize_bias = param->regularize_bias;
410
506
  }
411
507
 
412
- double l2r_l2_svr_fun::fun(double *w)
508
+ double l2r_l2_svr_fun::C_times_loss(int i, double wx_i)
413
509
  {
414
- int i;
415
- double f=0;
416
- double *y=prob->y;
417
- int l=prob->l;
418
- int w_size=get_nr_variable();
419
- double d;
420
-
421
- Xv(w, z);
422
-
423
- for(i=0;i<w_size;i++)
424
- f += w[i]*w[i];
425
- f /= 2;
426
- for(i=0;i<l;i++)
427
- {
428
- d = z[i] - y[i];
429
- if(d < -p)
430
- f += C[i]*(d+p)*(d+p);
431
- else if(d > p)
432
- f += C[i]*(d-p)*(d-p);
433
- }
434
-
435
- return(f);
510
+ double d = wx_i - prob->y[i];
511
+ if(d < -p)
512
+ return C[i]*(d+p)*(d+p);
513
+ else if(d > p)
514
+ return C[i]*(d-p)*(d-p);
515
+ return 0;
436
516
  }
437
517
 
438
518
  void l2r_l2_svr_fun::grad(double *w, double *g)
@@ -446,27 +526,29 @@ void l2r_l2_svr_fun::grad(double *w, double *g)
446
526
  sizeI = 0;
447
527
  for(i=0;i<l;i++)
448
528
  {
449
- d = z[i] - y[i];
529
+ d = wx[i] - y[i];
450
530
 
451
531
  // generate index set I
452
532
  if(d < -p)
453
533
  {
454
- z[sizeI] = C[i]*(d+p);
534
+ tmp[sizeI] = C[i]*(d+p);
455
535
  I[sizeI] = i;
456
536
  sizeI++;
457
537
  }
458
538
  else if(d > p)
459
539
  {
460
- z[sizeI] = C[i]*(d-p);
540
+ tmp[sizeI] = C[i]*(d-p);
461
541
  I[sizeI] = i;
462
542
  sizeI++;
463
543
  }
464
544
 
465
545
  }
466
- subXTv(z, g);
546
+ subXTv(tmp, g);
467
547
 
468
548
  for(i=0;i<w_size;i++)
469
549
  g[i] = w[i] + 2*g[i];
550
+ if(regularize_bias == 0)
551
+ g[w_size-1] -= w[w_size-1];
470
552
  }
471
553
 
472
554
  // A coordinate descent algorithm for
@@ -1378,6 +1460,9 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
1378
1460
  // solution will be put in w
1379
1461
  //
1380
1462
  // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1463
+ //
1464
+ // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1465
+ // must have been added to the original data. (see -B and -R option)
1381
1466
 
1382
1467
  #undef GETI
1383
1468
  #define GETI(i) (y[i]+1)
@@ -1385,7 +1470,7 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
1385
1470
 
1386
1471
  static void solve_l1r_l2_svc(
1387
1472
  problem *prob_col, double *w, double eps,
1388
- double Cp, double Cn)
1473
+ double Cp, double Cn, int regularize_bias)
1389
1474
  {
1390
1475
  int l = prob_col->l;
1391
1476
  int w_size = prob_col->n;
@@ -1475,49 +1560,66 @@ static void solve_l1r_l2_svc(
1475
1560
  H *= 2;
1476
1561
  H = max(H, 1e-12);
1477
1562
 
1478
- double Gp = G+1;
1479
- double Gn = G-1;
1480
1563
  double violation = 0;
1481
- if(w[j] == 0)
1564
+ double Gp = 0, Gn = 0;
1565
+ if(j == w_size-1 && regularize_bias == 0)
1566
+ violation = fabs(G);
1567
+ else
1482
1568
  {
1483
- if(Gp < 0)
1484
- violation = -Gp;
1485
- else if(Gn > 0)
1486
- violation = Gn;
1487
- else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1569
+ Gp = G+1;
1570
+ Gn = G-1;
1571
+ if(w[j] == 0)
1488
1572
  {
1489
- active_size--;
1490
- swap(index[s], index[active_size]);
1491
- s--;
1492
- continue;
1573
+ if(Gp < 0)
1574
+ violation = -Gp;
1575
+ else if(Gn > 0)
1576
+ violation = Gn;
1577
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1578
+ {
1579
+ active_size--;
1580
+ swap(index[s], index[active_size]);
1581
+ s--;
1582
+ continue;
1583
+ }
1493
1584
  }
1585
+ else if(w[j] > 0)
1586
+ violation = fabs(Gp);
1587
+ else
1588
+ violation = fabs(Gn);
1494
1589
  }
1495
- else if(w[j] > 0)
1496
- violation = fabs(Gp);
1497
- else
1498
- violation = fabs(Gn);
1499
-
1500
1590
  Gmax_new = max(Gmax_new, violation);
1501
1591
  Gnorm1_new += violation;
1502
1592
 
1503
1593
  // obtain Newton direction d
1504
- if(Gp < H*w[j])
1505
- d = -Gp/H;
1506
- else if(Gn > H*w[j])
1507
- d = -Gn/H;
1594
+ if(j == w_size-1 && regularize_bias == 0)
1595
+ d = -G/H;
1508
1596
  else
1509
- d = -w[j];
1597
+ {
1598
+ if(Gp < H*w[j])
1599
+ d = -Gp/H;
1600
+ else if(Gn > H*w[j])
1601
+ d = -Gn/H;
1602
+ else
1603
+ d = -w[j];
1604
+ }
1510
1605
 
1511
1606
  if(fabs(d) < 1.0e-12)
1512
1607
  continue;
1513
1608
 
1514
- double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1609
+ double delta;
1610
+ if(j == w_size-1 && regularize_bias == 0)
1611
+ delta = G*d;
1612
+ else
1613
+ delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1515
1614
  d_old = 0;
1516
1615
  int num_linesearch;
1517
1616
  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1518
1617
  {
1519
1618
  d_diff = d_old - d;
1520
- cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1619
+ if(j == w_size-1 && regularize_bias == 0)
1620
+ cond = -sigma*delta;
1621
+ else
1622
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1521
1623
 
1522
1624
  appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1523
1625
  if(appxcond <= 0)
@@ -1632,6 +1734,8 @@ static void solve_l1r_l2_svc(
1632
1734
  nnz++;
1633
1735
  }
1634
1736
  }
1737
+ if (regularize_bias == 0)
1738
+ v -= fabs(w[w_size-1]);
1635
1739
  for(j=0; j<l; j++)
1636
1740
  if(b[j] > 0)
1637
1741
  v += C[GETI(j)]*b[j]*b[j];
@@ -1657,6 +1761,9 @@ static void solve_l1r_l2_svc(
1657
1761
  // solution will be put in w
1658
1762
  //
1659
1763
  // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1764
+ //
1765
+ // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1766
+ // must have been added to the original data. (see -B and -R option)
1660
1767
 
1661
1768
  #undef GETI
1662
1769
  #define GETI(i) (y[i]+1)
@@ -1664,7 +1771,7 @@ static void solve_l1r_l2_svc(
1664
1771
 
1665
1772
  static void solve_l1r_lr(
1666
1773
  const problem *prob_col, double *w, double eps,
1667
- double Cp, double Cn)
1774
+ double Cp, double Cn, int regularize_bias)
1668
1775
  {
1669
1776
  int l = prob_col->l;
1670
1777
  int w_size = prob_col->n;
@@ -1734,6 +1841,9 @@ static void solve_l1r_lr(
1734
1841
  x++;
1735
1842
  }
1736
1843
  }
1844
+ if (regularize_bias == 0)
1845
+ w_norm -= fabs(w[w_size-1]);
1846
+
1737
1847
  for(j=0; j<l; j++)
1738
1848
  {
1739
1849
  exp_wTx[j] = exp(exp_wTx[j]);
@@ -1765,29 +1875,33 @@ static void solve_l1r_lr(
1765
1875
  }
1766
1876
  Grad[j] = -tmp + xjneg_sum[j];
1767
1877
 
1768
- double Gp = Grad[j]+1;
1769
- double Gn = Grad[j]-1;
1770
1878
  double violation = 0;
1771
- if(w[j] == 0)
1879
+ if (j == w_size-1 && regularize_bias == 0)
1880
+ violation = fabs(Grad[j]);
1881
+ else
1772
1882
  {
1773
- if(Gp < 0)
1774
- violation = -Gp;
1775
- else if(Gn > 0)
1776
- violation = Gn;
1777
- //outer-level shrinking
1778
- else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1883
+ double Gp = Grad[j]+1;
1884
+ double Gn = Grad[j]-1;
1885
+ if(w[j] == 0)
1779
1886
  {
1780
- active_size--;
1781
- swap(index[s], index[active_size]);
1782
- s--;
1783
- continue;
1887
+ if(Gp < 0)
1888
+ violation = -Gp;
1889
+ else if(Gn > 0)
1890
+ violation = Gn;
1891
+ //outer-level shrinking
1892
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1893
+ {
1894
+ active_size--;
1895
+ swap(index[s], index[active_size]);
1896
+ s--;
1897
+ continue;
1898
+ }
1784
1899
  }
1900
+ else if(w[j] > 0)
1901
+ violation = fabs(Gp);
1902
+ else
1903
+ violation = fabs(Gn);
1785
1904
  }
1786
- else if(w[j] > 0)
1787
- violation = fabs(Gp);
1788
- else
1789
- violation = fabs(Gn);
1790
-
1791
1905
  Gmax_new = max(Gmax_new, violation);
1792
1906
  Gnorm1_new += violation;
1793
1907
  }
@@ -1831,40 +1945,48 @@ static void solve_l1r_lr(
1831
1945
  x++;
1832
1946
  }
1833
1947
 
1834
- double Gp = G+1;
1835
- double Gn = G-1;
1836
1948
  double violation = 0;
1837
- if(wpd[j] == 0)
1949
+ if (j == w_size-1 && regularize_bias == 0)
1838
1950
  {
1839
- if(Gp < 0)
1840
- violation = -Gp;
1841
- else if(Gn > 0)
1842
- violation = Gn;
1843
- //inner-level shrinking
1844
- else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1845
- {
1846
- QP_active_size--;
1847
- swap(index[s], index[QP_active_size]);
1848
- s--;
1849
- continue;
1850
- }
1951
+ // bias term not shrunken
1952
+ violation = fabs(G);
1953
+ z = -G/H;
1851
1954
  }
1852
- else if(wpd[j] > 0)
1853
- violation = fabs(Gp);
1854
1955
  else
1855
- violation = fabs(Gn);
1956
+ {
1957
+ double Gp = G+1;
1958
+ double Gn = G-1;
1959
+ if(wpd[j] == 0)
1960
+ {
1961
+ if(Gp < 0)
1962
+ violation = -Gp;
1963
+ else if(Gn > 0)
1964
+ violation = Gn;
1965
+ //inner-level shrinking
1966
+ else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1967
+ {
1968
+ QP_active_size--;
1969
+ swap(index[s], index[QP_active_size]);
1970
+ s--;
1971
+ continue;
1972
+ }
1973
+ }
1974
+ else if(wpd[j] > 0)
1975
+ violation = fabs(Gp);
1976
+ else
1977
+ violation = fabs(Gn);
1856
1978
 
1979
+ // obtain solution of one-variable problem
1980
+ if(Gp < H*wpd[j])
1981
+ z = -Gp/H;
1982
+ else if(Gn > H*wpd[j])
1983
+ z = -Gn/H;
1984
+ else
1985
+ z = -wpd[j];
1986
+ }
1857
1987
  QP_Gmax_new = max(QP_Gmax_new, violation);
1858
1988
  QP_Gnorm1_new += violation;
1859
1989
 
1860
- // obtain solution of one-variable problem
1861
- if(Gp < H*wpd[j])
1862
- z = -Gp/H;
1863
- else if(Gn > H*wpd[j])
1864
- z = -Gn/H;
1865
- else
1866
- z = -wpd[j];
1867
-
1868
1990
  if(fabs(z) < 1.0e-12)
1869
1991
  continue;
1870
1992
  z = min(max(z,-10.0),10.0);
@@ -1905,6 +2027,8 @@ static void solve_l1r_lr(
1905
2027
  if(wpd[j] != 0)
1906
2028
  w_norm_new += fabs(wpd[j]);
1907
2029
  }
2030
+ if (regularize_bias == 0)
2031
+ w_norm_new -= fabs(wpd[w_size-1]);
1908
2032
  delta += (w_norm_new-w_norm);
1909
2033
 
1910
2034
  negsum_xTd = 0;
@@ -1947,6 +2071,8 @@ static void solve_l1r_lr(
1947
2071
  if(wpd[j] != 0)
1948
2072
  w_norm_new += fabs(wpd[j]);
1949
2073
  }
2074
+ if (regularize_bias == 0)
2075
+ w_norm_new -= fabs(wpd[w_size-1]);
1950
2076
  delta *= 0.5;
1951
2077
  negsum_xTd *= 0.5;
1952
2078
  for(int i=0; i<l; i++)
@@ -1995,6 +2121,8 @@ static void solve_l1r_lr(
1995
2121
  v += fabs(w[j]);
1996
2122
  nnz++;
1997
2123
  }
2124
+ if (regularize_bias == 0)
2125
+ v -= fabs(w[w_size-1]);
1998
2126
  for(j=0; j<l; j++)
1999
2127
  if(y[j] == 1)
2000
2128
  v += C[GETI(j)]*log(1+1/exp_wTx[j]);
@@ -2017,6 +2145,342 @@ static void solve_l1r_lr(
2017
2145
  delete [] D;
2018
2146
  }
2019
2147
 
2148
+ struct heap {
2149
+ enum HEAP_TYPE { MIN, MAX };
2150
+ int _size;
2151
+ HEAP_TYPE _type;
2152
+ feature_node* a;
2153
+
2154
+ heap(int max_size, HEAP_TYPE type)
2155
+ {
2156
+ _size = 0;
2157
+ a = new feature_node[max_size];
2158
+ _type = type;
2159
+ }
2160
+ ~heap()
2161
+ {
2162
+ delete [] a;
2163
+ }
2164
+ bool cmp(const feature_node& left, const feature_node& right)
2165
+ {
2166
+ if(_type == MIN)
2167
+ return left.value > right.value;
2168
+ else
2169
+ return left.value < right.value;
2170
+ }
2171
+ int size()
2172
+ {
2173
+ return _size;
2174
+ }
2175
+ void push(feature_node node)
2176
+ {
2177
+ a[_size] = node;
2178
+ _size++;
2179
+ int i = _size-1;
2180
+ while(i)
2181
+ {
2182
+ int p = (i-1)/2;
2183
+ if(cmp(a[p], a[i]))
2184
+ {
2185
+ swap(a[i], a[p]);
2186
+ i = p;
2187
+ }
2188
+ else
2189
+ break;
2190
+ }
2191
+ }
2192
+ void pop()
2193
+ {
2194
+ _size--;
2195
+ a[0] = a[_size];
2196
+ int i = 0;
2197
+ while(i*2+1 < _size)
2198
+ {
2199
+ int l = i*2+1;
2200
+ int r = i*2+2;
2201
+ if(r < _size && cmp(a[l], a[r]))
2202
+ l = r;
2203
+ if(cmp(a[i], a[l]))
2204
+ {
2205
+ swap(a[i], a[l]);
2206
+ i = l;
2207
+ }
2208
+ else
2209
+ break;
2210
+ }
2211
+ }
2212
+ feature_node top()
2213
+ {
2214
+ return a[0];
2215
+ }
2216
+ };
2217
+
2218
+ // A two-level coordinate descent algorithm for
2219
+ // a scaled one-class SVM dual problem
2220
+ //
2221
+ // min_\alpha 0.5(\alpha^T Q \alpha),
2222
+ // s.t. 0 <= \alpha_i <= 1 and
2223
+ // e^T \alpha = \nu l
2224
+ //
2225
+ // where Qij = xi^T xj
2226
+ //
2227
+ // Given:
2228
+ // x, nu
2229
+ // eps is the stopping tolerance
2230
+ //
2231
+ // solution will be put in w and rho
2232
+ //
2233
+ // See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.
2234
+
2235
+ static void solve_oneclass_svm(const problem *prob, double *w, double *rho, double eps, double nu)
2236
+ {
2237
+ int l = prob->l;
2238
+ int w_size = prob->n;
2239
+ int i, j, s, iter = 0;
2240
+ double Gi, Gj;
2241
+ double Qij, quad_coef, delta, sum;
2242
+ double old_alpha_i;
2243
+ double *QD = new double[l];
2244
+ double *G = new double[l];
2245
+ int *index = new int[l];
2246
+ double *alpha = new double[l];
2247
+ int max_inner_iter;
2248
+ int max_iter = 1000;
2249
+ int active_size = l;
2250
+
2251
+ double negGmax; // max { -grad(f)_i | alpha_i < 1 }
2252
+ double negGmin; // min { -grad(f)_i | alpha_i > 0 }
2253
+
2254
+ int *most_violating_i = new int[l];
2255
+ int *most_violating_j = new int[l];
2256
+
2257
+ int n = (int)(nu*l); // # of alpha's at upper bound
2258
+ for(i=0; i<n; i++)
2259
+ alpha[i] = 1;
2260
+ if (n<l)
2261
+ alpha[i] = nu*l-n;
2262
+ for(i=n+1; i<l; i++)
2263
+ alpha[i] = 0;
2264
+
2265
+ for(i=0; i<w_size; i++)
2266
+ w[i] = 0;
2267
+ for(i=0; i<l; i++)
2268
+ {
2269
+ feature_node * const xi = prob->x[i];
2270
+ QD[i] = sparse_operator::nrm2_sq(xi);
2271
+ sparse_operator::axpy(alpha[i], xi, w);
2272
+
2273
+ index[i] = i;
2274
+ }
2275
+
2276
+ while (iter < max_iter)
2277
+ {
2278
+ negGmax = -INF;
2279
+ negGmin = INF;
2280
+
2281
+ for (s=0; s<active_size; s++)
2282
+ {
2283
+ i = index[s];
2284
+ feature_node * const xi = prob->x[i];
2285
+ G[i] = sparse_operator::dot(w, xi);
2286
+ if (alpha[i] < 1)
2287
+ negGmax = max(negGmax, -G[i]);
2288
+ if (alpha[i] > 0)
2289
+ negGmin = min(negGmin, -G[i]);
2290
+ }
2291
+
2292
+ if (negGmax - negGmin < eps)
2293
+ {
2294
+ if (active_size == l)
2295
+ break;
2296
+ else
2297
+ {
2298
+ active_size = l;
2299
+ info("*");
2300
+ continue;
2301
+ }
2302
+ }
2303
+
2304
+ for(s=0; s<active_size; s++)
2305
+ {
2306
+ i = index[s];
2307
+ if ((alpha[i] == 1 && -G[i] > negGmax) ||
2308
+ (alpha[i] == 0 && -G[i] < negGmin))
2309
+ {
2310
+ active_size--;
2311
+ swap(index[s], index[active_size]);
2312
+ s--;
2313
+ }
2314
+ }
2315
+
2316
+ max_inner_iter = max(active_size/10, 1);
2317
+ struct heap min_heap = heap(max_inner_iter, heap::MIN);
2318
+ struct heap max_heap = heap(max_inner_iter, heap::MAX);
2319
+ struct feature_node node;
2320
+ for(s=0; s<active_size; s++)
2321
+ {
2322
+ i = index[s];
2323
+ node.index = i;
2324
+ node.value = -G[i];
2325
+
2326
+ if (alpha[i] < 1)
2327
+ {
2328
+ if (min_heap.size() < max_inner_iter)
2329
+ min_heap.push(node);
2330
+ else if (min_heap.top().value < node.value)
2331
+ {
2332
+ min_heap.pop();
2333
+ min_heap.push(node);
2334
+ }
2335
+ }
2336
+
2337
+ if (alpha[i] > 0)
2338
+ {
2339
+ if (max_heap.size() < max_inner_iter)
2340
+ max_heap.push(node);
2341
+ else if (max_heap.top().value > node.value)
2342
+ {
2343
+ max_heap.pop();
2344
+ max_heap.push(node);
2345
+ }
2346
+ }
2347
+ }
2348
+ max_inner_iter = min(min_heap.size(), max_heap.size());
2349
+ while (max_heap.size() > max_inner_iter)
2350
+ max_heap.pop();
2351
+ while (min_heap.size() > max_inner_iter)
2352
+ min_heap.pop();
2353
+
2354
+ for (s=max_inner_iter-1; s>=0; s--)
2355
+ {
2356
+ most_violating_i[s] = min_heap.top().index;
2357
+ most_violating_j[s] = max_heap.top().index;
2358
+ min_heap.pop();
2359
+ max_heap.pop();
2360
+ }
2361
+
2362
+ for (s=0; s<max_inner_iter; s++)
2363
+ {
2364
+ i = most_violating_i[s];
2365
+ j = most_violating_j[s];
2366
+
2367
+ if ((alpha[i] == 0 && alpha[j] == 0) ||
2368
+ (alpha[i] == 1 && alpha[j] == 1))
2369
+ continue;
2370
+
2371
+ feature_node const * xi = prob->x[i];
2372
+ feature_node const * xj = prob->x[j];
2373
+
2374
+ Gi = sparse_operator::dot(w, xi);
2375
+ Gj = sparse_operator::dot(w, xj);
2376
+
2377
+ int violating_pair = 0;
2378
+ if (alpha[i] < 1 && alpha[j] > 0 && -Gj + 1e-12 < -Gi)
2379
+ violating_pair = 1;
2380
+ else
2381
+ if (alpha[i] > 0 && alpha[j] < 1 && -Gi + 1e-12 < -Gj)
2382
+ violating_pair = 1;
2383
+ if (violating_pair == 0)
2384
+ continue;
2385
+
2386
+ Qij = sparse_operator::sparse_dot(xi, xj);
2387
+ quad_coef = QD[i] + QD[j] - 2*Qij;
2388
+ if(quad_coef <= 0)
2389
+ quad_coef = 1e-12;
2390
+ delta = (Gi - Gj) / quad_coef;
2391
+ old_alpha_i = alpha[i];
2392
+ sum = alpha[i] + alpha[j];
2393
+ alpha[i] = alpha[i] - delta;
2394
+ alpha[j] = alpha[j] + delta;
2395
+ if (sum > 1)
2396
+ {
2397
+ if (alpha[i] > 1)
2398
+ {
2399
+ alpha[i] = 1;
2400
+ alpha[j] = sum - 1;
2401
+ }
2402
+ }
2403
+ else
2404
+ {
2405
+ if (alpha[j] < 0)
2406
+ {
2407
+ alpha[j] = 0;
2408
+ alpha[i] = sum;
2409
+ }
2410
+ }
2411
+ if (sum > 1)
2412
+ {
2413
+ if (alpha[j] > 1)
2414
+ {
2415
+ alpha[j] = 1;
2416
+ alpha[i] = sum - 1;
2417
+ }
2418
+ }
2419
+ else
2420
+ {
2421
+ if (alpha[i] < 0)
2422
+ {
2423
+ alpha[i] = 0;
2424
+ alpha[j] = sum;
2425
+ }
2426
+ }
2427
+ delta = alpha[i] - old_alpha_i;
2428
+ sparse_operator::axpy(delta, xi, w);
2429
+ sparse_operator::axpy(-delta, xj, w);
2430
+ }
2431
+ iter++;
2432
+ if (iter % 10 == 0)
2433
+ info(".");
2434
+ }
2435
+ info("\noptimization finished, #iter = %d\n",iter);
2436
+ if (iter >= max_iter)
2437
+ info("\nWARNING: reaching max number of iterations\n\n");
2438
+
2439
+ // calculate object value
2440
+ double v = 0;
2441
+ for(i=0; i<w_size; i++)
2442
+ v += w[i]*w[i];
2443
+ int nSV = 0;
2444
+ for(i=0; i<l; i++)
2445
+ {
2446
+ if (alpha[i] > 0)
2447
+ ++nSV;
2448
+ }
2449
+ info("Objective value = %lf\n", v/2);
2450
+ info("nSV = %d\n", nSV);
2451
+
2452
+ // calculate rho
2453
+ double nr_free = 0;
2454
+ double ub = INF, lb = -INF, sum_free = 0;
2455
+ for(i=0; i<l; i++)
2456
+ {
2457
+ double G = sparse_operator::dot(w, prob->x[i]);
2458
+ if (alpha[i] == 1)
2459
+ lb = max(lb, G);
2460
+ else if (alpha[i] == 0)
2461
+ ub = min(ub, G);
2462
+ else
2463
+ {
2464
+ ++nr_free;
2465
+ sum_free += G;
2466
+ }
2467
+ }
2468
+
2469
+ if (nr_free > 0)
2470
+ *rho = sum_free/nr_free;
2471
+ else
2472
+ *rho = (ub + lb)/2;
2473
+
2474
+ info("rho = %lf\n", *rho);
2475
+
2476
+ delete [] QD;
2477
+ delete [] G;
2478
+ delete [] index;
2479
+ delete [] alpha;
2480
+ delete [] most_violating_i;
2481
+ delete [] most_violating_j;
2482
+ }
2483
+
2020
2484
  // transpose matrix X from row format to column format
2021
2485
  static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2022
2486
  {
@@ -2152,11 +2616,7 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re
2152
2616
 
2153
2617
  static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2154
2618
  {
2155
- //inner and outer tolerances for TRON
2156
2619
  double eps = param->eps;
2157
- double eps_cg = 0.1;
2158
- if(param->init_sol != NULL)
2159
- eps_cg = 0.5;
2160
2620
 
2161
2621
  int pos = 0;
2162
2622
  int neg = 0;
@@ -2179,10 +2639,10 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2179
2639
  else
2180
2640
  C[i] = Cn;
2181
2641
  }
2182
- fun_obj=new l2r_lr_fun(prob, C);
2183
- TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2184
- tron_obj.set_print_string(liblinear_print_string);
2185
- tron_obj.tron(w);
2642
+ fun_obj=new l2r_lr_fun(prob, param, C);
2643
+ NEWTON newton_obj(fun_obj, primal_solver_tol);
2644
+ newton_obj.set_print_string(liblinear_print_string);
2645
+ newton_obj.newton(w);
2186
2646
  delete fun_obj;
2187
2647
  delete[] C;
2188
2648
  break;
@@ -2197,10 +2657,10 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2197
2657
  else
2198
2658
  C[i] = Cn;
2199
2659
  }
2200
- fun_obj=new l2r_l2_svc_fun(prob, C);
2201
- TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2202
- tron_obj.set_print_string(liblinear_print_string);
2203
- tron_obj.tron(w);
2660
+ fun_obj=new l2r_l2_svc_fun(prob, param, C);
2661
+ NEWTON newton_obj(fun_obj, primal_solver_tol);
2662
+ newton_obj.set_print_string(liblinear_print_string);
2663
+ newton_obj.newton(w);
2204
2664
  delete fun_obj;
2205
2665
  delete[] C;
2206
2666
  break;
@@ -2216,7 +2676,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2216
2676
  problem prob_col;
2217
2677
  feature_node *x_space = NULL;
2218
2678
  transpose(prob, &x_space ,&prob_col);
2219
- solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2679
+ solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2220
2680
  delete [] prob_col.y;
2221
2681
  delete [] prob_col.x;
2222
2682
  delete [] x_space;
@@ -2227,7 +2687,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2227
2687
  problem prob_col;
2228
2688
  feature_node *x_space = NULL;
2229
2689
  transpose(prob, &x_space ,&prob_col);
2230
- solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2690
+ solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2231
2691
  delete [] prob_col.y;
2232
2692
  delete [] prob_col.x;
2233
2693
  delete [] x_space;
@@ -2242,10 +2702,10 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2242
2702
  for(int i = 0; i < prob->l; i++)
2243
2703
  C[i] = param->C;
2244
2704
 
2245
- fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2246
- TRON tron_obj(fun_obj, param->eps);
2247
- tron_obj.set_print_string(liblinear_print_string);
2248
- tron_obj.tron(w);
2705
+ fun_obj=new l2r_l2_svr_fun(prob, param, C);
2706
+ NEWTON newton_obj(fun_obj, param->eps);
2707
+ newton_obj.set_print_string(liblinear_print_string);
2708
+ newton_obj.newton(w);
2249
2709
  delete fun_obj;
2250
2710
  delete[] C;
2251
2711
  break;
@@ -2432,7 +2892,7 @@ static void find_parameter_C(const problem *prob, parameter *param_tmp, double s
2432
2892
  }
2433
2893
 
2434
2894
  if(param_tmp->C > max_C)
2435
- info("warning: maximum C reached.\n");
2895
+ info("WARNING: maximum C reached.\n");
2436
2896
  free(target);
2437
2897
  for(i=0; i<nr_fold; i++)
2438
2898
  free(prev_w[i]);
@@ -2473,6 +2933,13 @@ model* train(const problem *prob, const parameter *param)
2473
2933
  model_->label = NULL;
2474
2934
  train_one(prob, param, model_->w, 0, 0);
2475
2935
  }
2936
+ else if(check_oneclass_model(model_))
2937
+ {
2938
+ model_->w = Malloc(double, w_size);
2939
+ model_->nr_class = 2;
2940
+ model_->label = NULL;
2941
+ solve_oneclass_svm(prob, model_->w, &(model_->rho), param->eps, param->nu);
2942
+ }
2476
2943
  else
2477
2944
  {
2478
2945
  int nr_class;
@@ -2716,11 +3183,11 @@ void find_parameters(const problem *prob, const parameter *param, int nr_fold, d
2716
3183
  if(start_C <= 0)
2717
3184
  start_C = calc_start_C(prob, &param_tmp);
2718
3185
  double max_C = 1024;
2719
- start_C = min(start_C, max_C);
3186
+ start_C = min(start_C, max_C);
2720
3187
  double best_C_tmp, best_score_tmp;
2721
-
3188
+
2722
3189
  find_parameter_C(prob, &param_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
2723
-
3190
+
2724
3191
  *best_C = best_C_tmp;
2725
3192
  *best_score = best_score_tmp;
2726
3193
  }
@@ -2744,9 +3211,9 @@ void find_parameters(const problem *prob, const parameter *param, int nr_fold, d
2744
3211
  start_C_tmp = start_C;
2745
3212
  start_C_tmp = min(start_C_tmp, max_C);
2746
3213
  double best_C_tmp, best_score_tmp;
2747
-
3214
+
2748
3215
  find_parameter_C(prob, &param_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
2749
-
3216
+
2750
3217
  if(best_score_tmp < *best_score)
2751
3218
  {
2752
3219
  *best_p = param_tmp.p;
@@ -2793,11 +3260,15 @@ double predict_values(const struct model *model_, const struct feature_node *x,
2793
3260
  for(i=0;i<nr_w;i++)
2794
3261
  dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2795
3262
  }
3263
+ if(check_oneclass_model(model_))
3264
+ dec_values[0] -= model_->rho;
2796
3265
 
2797
3266
  if(nr_class==2)
2798
3267
  {
2799
3268
  if(check_regression_model(model_))
2800
3269
  return dec_values[0];
3270
+ else if(check_oneclass_model(model_))
3271
+ return (dec_values[0]>0)?1:-1;
2801
3272
  else
2802
3273
  return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2803
3274
  }
@@ -2860,7 +3331,9 @@ static const char *solver_type_table[]=
2860
3331
  "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2861
3332
  "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2862
3333
  "", "", "",
2863
- "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
3334
+ "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL",
3335
+ "", "", "", "", "", "", "",
3336
+ "ONECLASS_SVM", NULL
2864
3337
  };
2865
3338
 
2866
3339
  int save_model(const char *model_file_name, const struct model *model_)
@@ -2906,6 +3379,9 @@ int save_model(const char *model_file_name, const struct model *model_)
2906
3379
 
2907
3380
  fprintf(fp, "bias %.17g\n", model_->bias);
2908
3381
 
3382
+ if(check_oneclass_model(model_))
3383
+ fprintf(fp, "rho %.17g\n", model_->rho);
3384
+
2909
3385
  fprintf(fp, "w\n");
2910
3386
  for(i=0; i<w_size; i++)
2911
3387
  {
@@ -2956,12 +3432,13 @@ struct model *load_model(const char *model_file_name)
2956
3432
  int n;
2957
3433
  int nr_class;
2958
3434
  double bias;
3435
+ double rho;
2959
3436
  model *model_ = Malloc(model,1);
2960
3437
  parameter& param = model_->param;
2961
3438
  // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2962
3439
  param.nr_weight = 0;
2963
3440
  param.weight_label = NULL;
2964
- param.weight = NULL;
3441
+ param.weight = NULL;
2965
3442
  param.init_sol = NULL;
2966
3443
 
2967
3444
  model_->label = NULL;
@@ -3010,6 +3487,11 @@ struct model *load_model(const char *model_file_name)
3010
3487
  FSCANF(fp,"%lf",&bias);
3011
3488
  model_->bias=bias;
3012
3489
  }
3490
+ else if(strcmp(cmd,"rho")==0)
3491
+ {
3492
+ FSCANF(fp,"%lf",&rho);
3493
+ model_->rho=rho;
3494
+ }
3013
3495
  else if(strcmp(cmd,"w")==0)
3014
3496
  {
3015
3497
  break;
@@ -3082,7 +3564,7 @@ static inline double get_w_value(const struct model *model_, int idx, int label_
3082
3564
 
3083
3565
  if(idx < 0 || idx > model_->nr_feature)
3084
3566
  return 0;
3085
- if(check_regression_model(model_))
3567
+ if(check_regression_model(model_) || check_oneclass_model(model_))
3086
3568
  return w[idx];
3087
3569
  else
3088
3570
  {
@@ -3102,7 +3584,8 @@ static inline double get_w_value(const struct model *model_, int idx, int label_
3102
3584
 
3103
3585
  // feat_idx: starting from 1 to nr_feature
3104
3586
  // label_idx: starting from 0 to nr_class-1 for classification models;
3105
- // for regression models, label_idx is ignored.
3587
+ // for regression and one-class SVM models, label_idx is
3588
+ // ignored.
3106
3589
  double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3107
3590
  {
3108
3591
  if(feat_idx > model_->nr_feature)
@@ -3112,6 +3595,11 @@ double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3112
3595
 
3113
3596
  double get_decfun_bias(const struct model *model_, int label_idx)
3114
3597
  {
3598
+ if(check_oneclass_model(model_))
3599
+ {
3600
+ fprintf(stderr, "ERROR: get_decfun_bias can not be called for a one-class SVM model\n");
3601
+ return 0;
3602
+ }
3115
3603
  int bias_idx = model_->nr_feature;
3116
3604
  double bias = model_->bias;
3117
3605
  if(bias <= 0)
@@ -3120,6 +3608,17 @@ double get_decfun_bias(const struct model *model_, int label_idx)
3120
3608
  return bias*get_w_value(model_, bias_idx, label_idx);
3121
3609
  }
3122
3610
 
3611
+ double get_decfun_rho(const struct model *model_)
3612
+ {
3613
+ if(check_oneclass_model(model_))
3614
+ return model_->rho;
3615
+ else
3616
+ {
3617
+ fprintf(stderr, "ERROR: get_decfun_rho can be called only for a one-class SVM model\n");
3618
+ return 0;
3619
+ }
3620
+ }
3621
+
3123
3622
  void free_model_content(struct model *model_ptr)
3124
3623
  {
3125
3624
  if(model_ptr->w != NULL)
@@ -3159,6 +3658,21 @@ const char *check_parameter(const problem *prob, const parameter *param)
3159
3658
  if(param->p < 0)
3160
3659
  return "p < 0";
3161
3660
 
3661
+ if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM)
3662
+ return "prob->bias >=0, but this is ignored in ONECLASS_SVM";
3663
+
3664
+ if(param->regularize_bias == 0)
3665
+ {
3666
+ if(prob->bias != 1.0)
3667
+ return "To not regularize bias, must specify -B 1 along with -R";
3668
+ if(param->solver_type != L2R_LR
3669
+ && param->solver_type != L2R_L2LOSS_SVC
3670
+ && param->solver_type != L1R_L2LOSS_SVC
3671
+ && param->solver_type != L1R_LR
3672
+ && param->solver_type != L2R_L2LOSS_SVR)
3673
+ return "-R option supported only for solver L2R_LR, L2R_L2LOSS_SVC, L1R_L2LOSS_SVC, L1R_LR, and L2R_L2LOSS_SVR";
3674
+ }
3675
+
3162
3676
  if(param->solver_type != L2R_LR
3163
3677
  && param->solver_type != L2R_L2LOSS_SVC_DUAL
3164
3678
  && param->solver_type != L2R_L2LOSS_SVC
@@ -3169,12 +3683,15 @@ const char *check_parameter(const problem *prob, const parameter *param)
3169
3683
  && param->solver_type != L2R_LR_DUAL
3170
3684
  && param->solver_type != L2R_L2LOSS_SVR
3171
3685
  && param->solver_type != L2R_L2LOSS_SVR_DUAL
3172
- && param->solver_type != L2R_L1LOSS_SVR_DUAL)
3686
+ && param->solver_type != L2R_L1LOSS_SVR_DUAL
3687
+ && param->solver_type != ONECLASS_SVM)
3173
3688
  return "unknown solver type";
3174
3689
 
3175
3690
  if(param->init_sol != NULL
3176
- && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
3177
- return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
3691
+ && param->solver_type != L2R_LR
3692
+ && param->solver_type != L2R_L2LOSS_SVC
3693
+ && param->solver_type != L2R_L2LOSS_SVR)
3694
+ return "Initial-solution specification supported only for solvers L2R_LR, L2R_L2LOSS_SVC, and L2R_L2LOSS_SVR";
3178
3695
 
3179
3696
  return NULL;
3180
3697
  }
@@ -3193,6 +3710,11 @@ int check_regression_model(const struct model *model_)
3193
3710
  model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3194
3711
  }
3195
3712
 
3713
+ int check_oneclass_model(const struct model *model_)
3714
+ {
3715
+ return model_->param.solver_type == ONECLASS_SVM;
3716
+ }
3717
+
3196
3718
  void set_print_string_function(void (*print_func)(const char*))
3197
3719
  {
3198
3720
  if (print_func == NULL)