numo-liblinear 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e97d55247e6ea6bb0e6c98c5ad93445adc0f37fa
4
- data.tar.gz: c164048e2db697e01269f48e816c4e6adac2ecfb
2
+ SHA256:
3
+ metadata.gz: 4e69ca89df0a86fdc4be7d1a3074a4722ead3997a6f7ba2fff84b48c6d1c36ea
4
+ data.tar.gz: 992d8f8606dbf272ac18c1f6502e923d69ce3242d6cb35a460b3856cace29671
5
5
  SHA512:
6
- metadata.gz: 4240c57da4b083bc433293ec2c9b0716893f2523ab4180d8cf41f3bdd6d769173abc45ba0ce08607615b4de34849949527b1a97ce43cfaa5dd4a1f21a0a0e67c
7
- data.tar.gz: ba830acf8df33fec1efffb35a041bf6f08bf65f368b4d03fa6fc086e5d8e6a046b99629e313126100cea57ae7b655410871f85e8e63a6ba1c6234e03a1d62a2f
6
+ metadata.gz: 46f453f5b9ee23640a7131d92d3f666806e182265654cec321ebae22c97ae6af06a5b35726e4ab0cfde96407d0fb828391a2c5578c3e27644c487df66432cce8
7
+ data.tar.gz: d1e0fc15d4227491823fe4ed8f631c491e6cd4c8fb9c078ccf230785e2881fa12134dbfca9e84069bd8d995c140523bfc0572cbaa38761b114d162015dea5be1
@@ -8,6 +8,7 @@ rvm:
8
8
  - '2.4'
9
9
  - '2.5'
10
10
  - '2.6'
11
+ - '2.7'
11
12
 
12
13
  before_install:
13
14
  - gem install bundler -v 2.0.2
@@ -1,3 +1,7 @@
1
+ # 1.1.0
2
+ - Update bundled LIBLINEAR version to 2.4.1.
3
+ - Support one-class SVM implemented on LIBLINEAR ver. 2.4.0.
4
+
1
5
  # 1.0.0
2
6
  ## Breaking change
3
7
  - For easy installation, Numo::LIBLINEAR bundles LIBLINEAR codes.
@@ -1,4 +1,4 @@
1
- Copyright (c) 2019 Atsushi Tatsuma
1
+ Copyright (c) 2019-2020 Atsushi Tatsuma
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![Build Status](https://travis-ci.org/yoshoku/numo-liblinear.svg?branch=master)](https://travis-ci.org/yoshoku/numo-liblinear)
4
4
  [![Gem Version](https://badge.fury.io/rb/numo-liblinear.svg)](https://badge.fury.io/rb/numo-liblinear)
5
5
  [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/numo-liblinear/blob/master/LICENSE.txt)
6
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/numo-liblinear/0.4.0)
6
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://yoshoku.github.io/numo-liblinear/doc/)
7
7
 
8
8
  Numo::Liblinear is a Ruby gem binding to the [LIBLINEAR](https://www.csie.ntu.edu.tw/~cjlin/liblinear/) library.
9
9
  LIBLINEAR is one of the famous libraries for large-scale regularized linear classification and regression.
@@ -29,7 +29,7 @@ end
29
29
  $LDFLAGS << ' -lstdc++ '
30
30
 
31
31
  $srcs = Dir.glob("#{$srcdir}/*.c").map { |path| File.basename(path) }
32
- $srcs.concat(%w[linear.cpp tron.cpp daxpy.c ddot.c dnrm2.c dscal.c])
32
+ $srcs.concat(%w[linear.cpp newton.cpp daxpy.c ddot.c dnrm2.c dscal.c])
33
33
 
34
34
  $INCFLAGS << " -I$(srcdir)/liblinear"
35
35
  $VPATH << "$(srcdir)/liblinear"
@@ -5,7 +5,7 @@
5
5
  #include <stdarg.h>
6
6
  #include <locale.h>
7
7
  #include "linear.h"
8
- #include "tron.h"
8
+ #include "newton.h"
9
9
  int liblinear_version = LIBLINEAR_VERSION;
10
10
  typedef signed char schar;
11
11
  template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
@@ -70,6 +70,28 @@ public:
70
70
  return (ret);
71
71
  }
72
72
 
73
+ static double sparse_dot(const feature_node *x1, const feature_node *x2)
74
+ {
75
+ double ret = 0;
76
+ while(x1->index != -1 && x2->index != -1)
77
+ {
78
+ if(x1->index == x2->index)
79
+ {
80
+ ret += x1->value * x2->value;
81
+ ++x1;
82
+ ++x2;
83
+ }
84
+ else
85
+ {
86
+ if(x1->index > x2->index)
87
+ ++x2;
88
+ else
89
+ ++x1;
90
+ }
91
+ }
92
+ return (ret);
93
+ }
94
+
73
95
  static void axpy(const double a, const feature_node *x, double *y)
74
96
  {
75
97
  while(x->index != -1)
@@ -80,70 +102,195 @@ public:
80
102
  }
81
103
  };
82
104
 
83
- class l2r_lr_fun: public function
105
+ // L2-regularized empirical risk minimization
106
+ // min_w w^Tw/2 + \sum C_i \xi(w^Tx_i), where \xi() is the loss
107
+
108
+ class l2r_erm_fun: public function
84
109
  {
85
110
  public:
86
- l2r_lr_fun(const problem *prob, double *C);
87
- ~l2r_lr_fun();
111
+ l2r_erm_fun(const problem *prob, const parameter *param, double *C);
112
+ ~l2r_erm_fun();
88
113
 
89
114
  double fun(double *w);
90
- void grad(double *w, double *g);
91
- void Hv(double *s, double *Hs);
92
-
115
+ double linesearch_and_update(double *w, double *d, double *f, double *g, double alpha);
93
116
  int get_nr_variable(void);
94
- void get_diag_preconditioner(double *M);
95
117
 
96
- private:
118
+ protected:
119
+ virtual double C_times_loss(int i, double wx_i) = 0;
97
120
  void Xv(double *v, double *Xv);
98
121
  void XTv(double *v, double *XTv);
99
122
 
100
123
  double *C;
101
- double *z;
102
- double *D;
103
124
  const problem *prob;
125
+ double *wx;
126
+ double *tmp; // a working array
127
+ double wTw;
128
+ int regularize_bias;
104
129
  };
105
130
 
106
- l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
131
+ l2r_erm_fun::l2r_erm_fun(const problem *prob, const parameter *param, double *C)
107
132
  {
108
133
  int l=prob->l;
109
134
 
110
135
  this->prob = prob;
111
136
 
112
- z = new double[l];
113
- D = new double[l];
137
+ wx = new double[l];
138
+ tmp = new double[l];
114
139
  this->C = C;
140
+ this->regularize_bias = param->regularize_bias;
115
141
  }
116
142
 
117
- l2r_lr_fun::~l2r_lr_fun()
143
+ l2r_erm_fun::~l2r_erm_fun()
118
144
  {
119
- delete[] z;
120
- delete[] D;
145
+ delete[] wx;
146
+ delete[] tmp;
121
147
  }
122
148
 
123
-
124
- double l2r_lr_fun::fun(double *w)
149
+ double l2r_erm_fun::fun(double *w)
125
150
  {
126
151
  int i;
127
152
  double f=0;
128
- double *y=prob->y;
129
153
  int l=prob->l;
130
154
  int w_size=get_nr_variable();
131
155
 
132
- Xv(w, z);
156
+ wTw = 0;
157
+ Xv(w, wx);
133
158
 
134
159
  for(i=0;i<w_size;i++)
135
- f += w[i]*w[i];
136
- f /= 2.0;
160
+ wTw += w[i]*w[i];
161
+ if(regularize_bias == 0)
162
+ wTw -= w[w_size-1]*w[w_size-1];
137
163
  for(i=0;i<l;i++)
164
+ f += C_times_loss(i, wx[i]);
165
+ f = f + 0.5 * wTw;
166
+
167
+ return(f);
168
+ }
169
+
170
+ int l2r_erm_fun::get_nr_variable(void)
171
+ {
172
+ return prob->n;
173
+ }
174
+
175
+ // On entry *f must be the function value of w
176
+ // On exit w is updated and *f is the new function value
177
+ double l2r_erm_fun::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
178
+ {
179
+ int i;
180
+ int l = prob->l;
181
+ double sTs = 0;
182
+ double wTs = 0;
183
+ double gTs = 0;
184
+ double eta = 0.01;
185
+ int w_size = get_nr_variable();
186
+ int max_num_linesearch = 20;
187
+ double fold = *f;
188
+ Xv(s, tmp);
189
+
190
+ for (i=0;i<w_size;i++)
191
+ {
192
+ sTs += s[i] * s[i];
193
+ wTs += s[i] * w[i];
194
+ gTs += s[i] * g[i];
195
+ }
196
+ if(regularize_bias == 0)
197
+ {
198
+ // bias not used in calculating (w + \alpha s)^T (w + \alpha s)
199
+ sTs -= s[w_size-1] * s[w_size-1];
200
+ wTs -= s[w_size-1] * w[w_size-1];
201
+ }
202
+
203
+ int num_linesearch = 0;
204
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
138
205
  {
139
- double yz = y[i]*z[i];
140
- if (yz >= 0)
141
- f += C[i]*log(1 + exp(-yz));
206
+ double loss = 0;
207
+ for(i=0;i<l;i++)
208
+ {
209
+ double inner_product = tmp[i] * alpha + wx[i];
210
+ loss += C_times_loss(i, inner_product);
211
+ }
212
+ *f = loss + (alpha * alpha * sTs + wTw) / 2.0 + alpha * wTs;
213
+ if (*f - fold <= eta * alpha * gTs)
214
+ {
215
+ for (i=0;i<l;i++)
216
+ wx[i] += alpha * tmp[i];
217
+ break;
218
+ }
142
219
  else
143
- f += C[i]*(-yz+log(1 + exp(yz)));
220
+ alpha *= 0.5;
144
221
  }
145
222
 
146
- return(f);
223
+ if (num_linesearch >= max_num_linesearch)
224
+ {
225
+ *f = fold;
226
+ return 0;
227
+ }
228
+ else
229
+ for (i=0;i<w_size;i++)
230
+ w[i] += alpha * s[i];
231
+
232
+ wTw += alpha * alpha * sTs + 2* alpha * wTs;
233
+ return alpha;
234
+ }
235
+
236
+ void l2r_erm_fun::Xv(double *v, double *Xv)
237
+ {
238
+ int i;
239
+ int l=prob->l;
240
+ feature_node **x=prob->x;
241
+
242
+ for(i=0;i<l;i++)
243
+ Xv[i]=sparse_operator::dot(v, x[i]);
244
+ }
245
+
246
+ void l2r_erm_fun::XTv(double *v, double *XTv)
247
+ {
248
+ int i;
249
+ int l=prob->l;
250
+ int w_size=get_nr_variable();
251
+ feature_node **x=prob->x;
252
+
253
+ for(i=0;i<w_size;i++)
254
+ XTv[i]=0;
255
+ for(i=0;i<l;i++)
256
+ sparse_operator::axpy(v[i], x[i], XTv);
257
+ }
258
+
259
+ class l2r_lr_fun: public l2r_erm_fun
260
+ {
261
+ public:
262
+ l2r_lr_fun(const problem *prob, const parameter *param, double *C);
263
+ ~l2r_lr_fun();
264
+
265
+ void grad(double *w, double *g);
266
+ void Hv(double *s, double *Hs);
267
+
268
+ void get_diag_preconditioner(double *M);
269
+
270
+ private:
271
+ double *D;
272
+ double C_times_loss(int i, double wx_i);
273
+ };
274
+
275
+ l2r_lr_fun::l2r_lr_fun(const problem *prob, const parameter *param, double *C):
276
+ l2r_erm_fun(prob, param, C)
277
+ {
278
+ int l=prob->l;
279
+ D = new double[l];
280
+ }
281
+
282
+ l2r_lr_fun::~l2r_lr_fun()
283
+ {
284
+ delete[] D;
285
+ }
286
+
287
+ double l2r_lr_fun::C_times_loss(int i, double wx_i)
288
+ {
289
+ double ywx_i = wx_i * prob->y[i];
290
+ if (ywx_i >= 0)
291
+ return C[i]*log(1 + exp(-ywx_i));
292
+ else
293
+ return C[i]*(-ywx_i + log(1 + exp(ywx_i)));
147
294
  }
148
295
 
149
296
  void l2r_lr_fun::grad(double *w, double *g)
@@ -155,19 +302,16 @@ void l2r_lr_fun::grad(double *w, double *g)
155
302
 
156
303
  for(i=0;i<l;i++)
157
304
  {
158
- z[i] = 1/(1 + exp(-y[i]*z[i]));
159
- D[i] = z[i]*(1-z[i]);
160
- z[i] = C[i]*(z[i]-1)*y[i];
305
+ tmp[i] = 1/(1 + exp(-y[i]*wx[i]));
306
+ D[i] = tmp[i]*(1-tmp[i]);
307
+ tmp[i] = C[i]*(tmp[i]-1)*y[i];
161
308
  }
162
- XTv(z, g);
309
+ XTv(tmp, g);
163
310
 
164
311
  for(i=0;i<w_size;i++)
165
312
  g[i] = w[i] + g[i];
166
- }
167
-
168
- int l2r_lr_fun::get_nr_variable(void)
169
- {
170
- return prob->n;
313
+ if(regularize_bias == 0)
314
+ g[w_size-1] -= w[w_size-1];
171
315
  }
172
316
 
173
317
  void l2r_lr_fun::get_diag_preconditioner(double *M)
@@ -179,14 +323,16 @@ void l2r_lr_fun::get_diag_preconditioner(double *M)
179
323
 
180
324
  for (i=0; i<w_size; i++)
181
325
  M[i] = 1;
326
+ if(regularize_bias == 0)
327
+ M[w_size-1] = 0;
182
328
 
183
329
  for (i=0; i<l; i++)
184
330
  {
185
- feature_node *s = x[i];
186
- while (s->index!=-1)
331
+ feature_node *xi = x[i];
332
+ while (xi->index!=-1)
187
333
  {
188
- M[s->index-1] += s->value*s->value*C[i]*D[i];
189
- s++;
334
+ M[xi->index-1] += xi->value*xi->value*C[i]*D[i];
335
+ xi++;
190
336
  }
191
337
  }
192
338
  }
@@ -211,94 +357,49 @@ void l2r_lr_fun::Hv(double *s, double *Hs)
211
357
  }
212
358
  for(i=0;i<w_size;i++)
213
359
  Hs[i] = s[i] + Hs[i];
360
+ if(regularize_bias == 0)
361
+ Hs[w_size-1] -= s[w_size-1];
214
362
  }
215
363
 
216
- void l2r_lr_fun::Xv(double *v, double *Xv)
217
- {
218
- int i;
219
- int l=prob->l;
220
- feature_node **x=prob->x;
221
-
222
- for(i=0;i<l;i++)
223
- Xv[i]=sparse_operator::dot(v, x[i]);
224
- }
225
-
226
- void l2r_lr_fun::XTv(double *v, double *XTv)
227
- {
228
- int i;
229
- int l=prob->l;
230
- int w_size=get_nr_variable();
231
- feature_node **x=prob->x;
232
-
233
- for(i=0;i<w_size;i++)
234
- XTv[i]=0;
235
- for(i=0;i<l;i++)
236
- sparse_operator::axpy(v[i], x[i], XTv);
237
- }
238
-
239
- class l2r_l2_svc_fun: public function
364
+ class l2r_l2_svc_fun: public l2r_erm_fun
240
365
  {
241
366
  public:
242
- l2r_l2_svc_fun(const problem *prob, double *C);
367
+ l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C);
243
368
  ~l2r_l2_svc_fun();
244
369
 
245
- double fun(double *w);
246
370
  void grad(double *w, double *g);
247
371
  void Hv(double *s, double *Hs);
248
372
 
249
- int get_nr_variable(void);
250
373
  void get_diag_preconditioner(double *M);
251
374
 
252
375
  protected:
253
- void Xv(double *v, double *Xv);
254
376
  void subXTv(double *v, double *XTv);
255
377
 
256
- double *C;
257
- double *z;
258
378
  int *I;
259
379
  int sizeI;
260
- const problem *prob;
380
+
381
+ private:
382
+ double C_times_loss(int i, double wx_i);
261
383
  };
262
384
 
263
- l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
385
+ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C):
386
+ l2r_erm_fun(prob, param, C)
264
387
  {
265
- int l=prob->l;
266
-
267
- this->prob = prob;
268
-
269
- z = new double[l];
270
- I = new int[l];
271
- this->C = C;
388
+ I = new int[prob->l];
272
389
  }
273
390
 
274
391
  l2r_l2_svc_fun::~l2r_l2_svc_fun()
275
392
  {
276
- delete[] z;
277
393
  delete[] I;
278
394
  }
279
395
 
280
- double l2r_l2_svc_fun::fun(double *w)
396
+ double l2r_l2_svc_fun::C_times_loss(int i, double wx_i)
281
397
  {
282
- int i;
283
- double f=0;
284
- double *y=prob->y;
285
- int l=prob->l;
286
- int w_size=get_nr_variable();
287
-
288
- Xv(w, z);
289
-
290
- for(i=0;i<w_size;i++)
291
- f += w[i]*w[i];
292
- f /= 2.0;
293
- for(i=0;i<l;i++)
294
- {
295
- z[i] = y[i]*z[i];
296
- double d = 1-z[i];
297
- if (d > 0)
298
- f += C[i]*d*d;
299
- }
300
-
301
- return(f);
398
+ double d = 1 - prob->y[i] * wx_i;
399
+ if (d > 0)
400
+ return C[i] * d * d;
401
+ else
402
+ return 0;
302
403
  }
303
404
 
304
405
  void l2r_l2_svc_fun::grad(double *w, double *g)
@@ -310,21 +411,21 @@ void l2r_l2_svc_fun::grad(double *w, double *g)
310
411
 
311
412
  sizeI = 0;
312
413
  for (i=0;i<l;i++)
313
- if (z[i] < 1)
414
+ {
415
+ tmp[i] = wx[i] * y[i];
416
+ if (tmp[i] < 1)
314
417
  {
315
- z[sizeI] = C[i]*y[i]*(z[i]-1);
418
+ tmp[sizeI] = C[i]*y[i]*(tmp[i]-1);
316
419
  I[sizeI] = i;
317
420
  sizeI++;
318
421
  }
319
- subXTv(z, g);
422
+ }
423
+ subXTv(tmp, g);
320
424
 
321
425
  for(i=0;i<w_size;i++)
322
426
  g[i] = w[i] + 2*g[i];
323
- }
324
-
325
- int l2r_l2_svc_fun::get_nr_variable(void)
326
- {
327
- return prob->n;
427
+ if(regularize_bias == 0)
428
+ g[w_size-1] -= w[w_size-1];
328
429
  }
329
430
 
330
431
  void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
@@ -335,15 +436,17 @@ void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
335
436
 
336
437
  for (i=0; i<w_size; i++)
337
438
  M[i] = 1;
439
+ if(regularize_bias == 0)
440
+ M[w_size-1] = 0;
338
441
 
339
442
  for (i=0; i<sizeI; i++)
340
443
  {
341
444
  int idx = I[i];
342
- feature_node *s = x[idx];
343
- while (s->index!=-1)
445
+ feature_node *xi = x[idx];
446
+ while (xi->index!=-1)
344
447
  {
345
- M[s->index-1] += s->value*s->value*C[idx]*2;
346
- s++;
448
+ M[xi->index-1] += xi->value*xi->value*C[idx]*2;
449
+ xi++;
347
450
  }
348
451
  }
349
452
  }
@@ -367,16 +470,8 @@ void l2r_l2_svc_fun::Hv(double *s, double *Hs)
367
470
  }
368
471
  for(i=0;i<w_size;i++)
369
472
  Hs[i] = s[i] + 2*Hs[i];
370
- }
371
-
372
- void l2r_l2_svc_fun::Xv(double *v, double *Xv)
373
- {
374
- int i;
375
- int l=prob->l;
376
- feature_node **x=prob->x;
377
-
378
- for(i=0;i<l;i++)
379
- Xv[i]=sparse_operator::dot(v, x[i]);
473
+ if(regularize_bias == 0)
474
+ Hs[w_size-1] -= s[w_size-1];
380
475
  }
381
476
 
382
477
  void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
@@ -394,45 +489,30 @@ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
394
489
  class l2r_l2_svr_fun: public l2r_l2_svc_fun
395
490
  {
396
491
  public:
397
- l2r_l2_svr_fun(const problem *prob, double *C, double p);
492
+ l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C);
398
493
 
399
- double fun(double *w);
400
494
  void grad(double *w, double *g);
401
495
 
402
496
  private:
497
+ double C_times_loss(int i, double wx_i);
403
498
  double p;
404
499
  };
405
500
 
406
- l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
407
- l2r_l2_svc_fun(prob, C)
501
+ l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C):
502
+ l2r_l2_svc_fun(prob, param, C)
408
503
  {
409
- this->p = p;
504
+ this->p = param->p;
505
+ this->regularize_bias = param->regularize_bias;
410
506
  }
411
507
 
412
- double l2r_l2_svr_fun::fun(double *w)
508
+ double l2r_l2_svr_fun::C_times_loss(int i, double wx_i)
413
509
  {
414
- int i;
415
- double f=0;
416
- double *y=prob->y;
417
- int l=prob->l;
418
- int w_size=get_nr_variable();
419
- double d;
420
-
421
- Xv(w, z);
422
-
423
- for(i=0;i<w_size;i++)
424
- f += w[i]*w[i];
425
- f /= 2;
426
- for(i=0;i<l;i++)
427
- {
428
- d = z[i] - y[i];
429
- if(d < -p)
430
- f += C[i]*(d+p)*(d+p);
431
- else if(d > p)
432
- f += C[i]*(d-p)*(d-p);
433
- }
434
-
435
- return(f);
510
+ double d = wx_i - prob->y[i];
511
+ if(d < -p)
512
+ return C[i]*(d+p)*(d+p);
513
+ else if(d > p)
514
+ return C[i]*(d-p)*(d-p);
515
+ return 0;
436
516
  }
437
517
 
438
518
  void l2r_l2_svr_fun::grad(double *w, double *g)
@@ -446,27 +526,29 @@ void l2r_l2_svr_fun::grad(double *w, double *g)
446
526
  sizeI = 0;
447
527
  for(i=0;i<l;i++)
448
528
  {
449
- d = z[i] - y[i];
529
+ d = wx[i] - y[i];
450
530
 
451
531
  // generate index set I
452
532
  if(d < -p)
453
533
  {
454
- z[sizeI] = C[i]*(d+p);
534
+ tmp[sizeI] = C[i]*(d+p);
455
535
  I[sizeI] = i;
456
536
  sizeI++;
457
537
  }
458
538
  else if(d > p)
459
539
  {
460
- z[sizeI] = C[i]*(d-p);
540
+ tmp[sizeI] = C[i]*(d-p);
461
541
  I[sizeI] = i;
462
542
  sizeI++;
463
543
  }
464
544
 
465
545
  }
466
- subXTv(z, g);
546
+ subXTv(tmp, g);
467
547
 
468
548
  for(i=0;i<w_size;i++)
469
549
  g[i] = w[i] + 2*g[i];
550
+ if(regularize_bias == 0)
551
+ g[w_size-1] -= w[w_size-1];
470
552
  }
471
553
 
472
554
  // A coordinate descent algorithm for
@@ -1378,6 +1460,9 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
1378
1460
  // solution will be put in w
1379
1461
  //
1380
1462
  // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1463
+ //
1464
+ // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1465
+ // must have been added to the original data. (see -B and -R option)
1381
1466
 
1382
1467
  #undef GETI
1383
1468
  #define GETI(i) (y[i]+1)
@@ -1385,7 +1470,7 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
1385
1470
 
1386
1471
  static void solve_l1r_l2_svc(
1387
1472
  problem *prob_col, double *w, double eps,
1388
- double Cp, double Cn)
1473
+ double Cp, double Cn, int regularize_bias)
1389
1474
  {
1390
1475
  int l = prob_col->l;
1391
1476
  int w_size = prob_col->n;
@@ -1475,49 +1560,66 @@ static void solve_l1r_l2_svc(
1475
1560
  H *= 2;
1476
1561
  H = max(H, 1e-12);
1477
1562
 
1478
- double Gp = G+1;
1479
- double Gn = G-1;
1480
1563
  double violation = 0;
1481
- if(w[j] == 0)
1564
+ double Gp = 0, Gn = 0;
1565
+ if(j == w_size-1 && regularize_bias == 0)
1566
+ violation = fabs(G);
1567
+ else
1482
1568
  {
1483
- if(Gp < 0)
1484
- violation = -Gp;
1485
- else if(Gn > 0)
1486
- violation = Gn;
1487
- else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1569
+ Gp = G+1;
1570
+ Gn = G-1;
1571
+ if(w[j] == 0)
1488
1572
  {
1489
- active_size--;
1490
- swap(index[s], index[active_size]);
1491
- s--;
1492
- continue;
1573
+ if(Gp < 0)
1574
+ violation = -Gp;
1575
+ else if(Gn > 0)
1576
+ violation = Gn;
1577
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1578
+ {
1579
+ active_size--;
1580
+ swap(index[s], index[active_size]);
1581
+ s--;
1582
+ continue;
1583
+ }
1493
1584
  }
1585
+ else if(w[j] > 0)
1586
+ violation = fabs(Gp);
1587
+ else
1588
+ violation = fabs(Gn);
1494
1589
  }
1495
- else if(w[j] > 0)
1496
- violation = fabs(Gp);
1497
- else
1498
- violation = fabs(Gn);
1499
-
1500
1590
  Gmax_new = max(Gmax_new, violation);
1501
1591
  Gnorm1_new += violation;
1502
1592
 
1503
1593
  // obtain Newton direction d
1504
- if(Gp < H*w[j])
1505
- d = -Gp/H;
1506
- else if(Gn > H*w[j])
1507
- d = -Gn/H;
1594
+ if(j == w_size-1 && regularize_bias == 0)
1595
+ d = -G/H;
1508
1596
  else
1509
- d = -w[j];
1597
+ {
1598
+ if(Gp < H*w[j])
1599
+ d = -Gp/H;
1600
+ else if(Gn > H*w[j])
1601
+ d = -Gn/H;
1602
+ else
1603
+ d = -w[j];
1604
+ }
1510
1605
 
1511
1606
  if(fabs(d) < 1.0e-12)
1512
1607
  continue;
1513
1608
 
1514
- double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1609
+ double delta;
1610
+ if(j == w_size-1 && regularize_bias == 0)
1611
+ delta = G*d;
1612
+ else
1613
+ delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1515
1614
  d_old = 0;
1516
1615
  int num_linesearch;
1517
1616
  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1518
1617
  {
1519
1618
  d_diff = d_old - d;
1520
- cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1619
+ if(j == w_size-1 && regularize_bias == 0)
1620
+ cond = -sigma*delta;
1621
+ else
1622
+ cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1521
1623
 
1522
1624
  appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1523
1625
  if(appxcond <= 0)
@@ -1632,6 +1734,8 @@ static void solve_l1r_l2_svc(
1632
1734
  nnz++;
1633
1735
  }
1634
1736
  }
1737
+ if (regularize_bias == 0)
1738
+ v -= fabs(w[w_size-1]);
1635
1739
  for(j=0; j<l; j++)
1636
1740
  if(b[j] > 0)
1637
1741
  v += C[GETI(j)]*b[j]*b[j];
@@ -1657,6 +1761,9 @@ static void solve_l1r_l2_svc(
1657
1761
  // solution will be put in w
1658
1762
  //
1659
1763
  // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1764
+ //
1765
+ // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1766
+ // must have been added to the original data. (see -B and -R option)
1660
1767
 
1661
1768
  #undef GETI
1662
1769
  #define GETI(i) (y[i]+1)
@@ -1664,7 +1771,7 @@ static void solve_l1r_l2_svc(
1664
1771
 
1665
1772
  static void solve_l1r_lr(
1666
1773
  const problem *prob_col, double *w, double eps,
1667
- double Cp, double Cn)
1774
+ double Cp, double Cn, int regularize_bias)
1668
1775
  {
1669
1776
  int l = prob_col->l;
1670
1777
  int w_size = prob_col->n;
@@ -1734,6 +1841,9 @@ static void solve_l1r_lr(
1734
1841
  x++;
1735
1842
  }
1736
1843
  }
1844
+ if (regularize_bias == 0)
1845
+ w_norm -= fabs(w[w_size-1]);
1846
+
1737
1847
  for(j=0; j<l; j++)
1738
1848
  {
1739
1849
  exp_wTx[j] = exp(exp_wTx[j]);
@@ -1765,29 +1875,33 @@ static void solve_l1r_lr(
1765
1875
  }
1766
1876
  Grad[j] = -tmp + xjneg_sum[j];
1767
1877
 
1768
- double Gp = Grad[j]+1;
1769
- double Gn = Grad[j]-1;
1770
1878
  double violation = 0;
1771
- if(w[j] == 0)
1879
+ if (j == w_size-1 && regularize_bias == 0)
1880
+ violation = fabs(Grad[j]);
1881
+ else
1772
1882
  {
1773
- if(Gp < 0)
1774
- violation = -Gp;
1775
- else if(Gn > 0)
1776
- violation = Gn;
1777
- //outer-level shrinking
1778
- else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1883
+ double Gp = Grad[j]+1;
1884
+ double Gn = Grad[j]-1;
1885
+ if(w[j] == 0)
1779
1886
  {
1780
- active_size--;
1781
- swap(index[s], index[active_size]);
1782
- s--;
1783
- continue;
1887
+ if(Gp < 0)
1888
+ violation = -Gp;
1889
+ else if(Gn > 0)
1890
+ violation = Gn;
1891
+ //outer-level shrinking
1892
+ else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1893
+ {
1894
+ active_size--;
1895
+ swap(index[s], index[active_size]);
1896
+ s--;
1897
+ continue;
1898
+ }
1784
1899
  }
1900
+ else if(w[j] > 0)
1901
+ violation = fabs(Gp);
1902
+ else
1903
+ violation = fabs(Gn);
1785
1904
  }
1786
- else if(w[j] > 0)
1787
- violation = fabs(Gp);
1788
- else
1789
- violation = fabs(Gn);
1790
-
1791
1905
  Gmax_new = max(Gmax_new, violation);
1792
1906
  Gnorm1_new += violation;
1793
1907
  }
@@ -1831,40 +1945,48 @@ static void solve_l1r_lr(
1831
1945
  x++;
1832
1946
  }
1833
1947
 
1834
- double Gp = G+1;
1835
- double Gn = G-1;
1836
1948
  double violation = 0;
1837
- if(wpd[j] == 0)
1949
+ if (j == w_size-1 && regularize_bias == 0)
1838
1950
  {
1839
- if(Gp < 0)
1840
- violation = -Gp;
1841
- else if(Gn > 0)
1842
- violation = Gn;
1843
- //inner-level shrinking
1844
- else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1845
- {
1846
- QP_active_size--;
1847
- swap(index[s], index[QP_active_size]);
1848
- s--;
1849
- continue;
1850
- }
1951
+ // bias term not shrunken
1952
+ violation = fabs(G);
1953
+ z = -G/H;
1851
1954
  }
1852
- else if(wpd[j] > 0)
1853
- violation = fabs(Gp);
1854
1955
  else
1855
- violation = fabs(Gn);
1956
+ {
1957
+ double Gp = G+1;
1958
+ double Gn = G-1;
1959
+ if(wpd[j] == 0)
1960
+ {
1961
+ if(Gp < 0)
1962
+ violation = -Gp;
1963
+ else if(Gn > 0)
1964
+ violation = Gn;
1965
+ //inner-level shrinking
1966
+ else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1967
+ {
1968
+ QP_active_size--;
1969
+ swap(index[s], index[QP_active_size]);
1970
+ s--;
1971
+ continue;
1972
+ }
1973
+ }
1974
+ else if(wpd[j] > 0)
1975
+ violation = fabs(Gp);
1976
+ else
1977
+ violation = fabs(Gn);
1856
1978
 
1979
+ // obtain solution of one-variable problem
1980
+ if(Gp < H*wpd[j])
1981
+ z = -Gp/H;
1982
+ else if(Gn > H*wpd[j])
1983
+ z = -Gn/H;
1984
+ else
1985
+ z = -wpd[j];
1986
+ }
1857
1987
  QP_Gmax_new = max(QP_Gmax_new, violation);
1858
1988
  QP_Gnorm1_new += violation;
1859
1989
 
1860
- // obtain solution of one-variable problem
1861
- if(Gp < H*wpd[j])
1862
- z = -Gp/H;
1863
- else if(Gn > H*wpd[j])
1864
- z = -Gn/H;
1865
- else
1866
- z = -wpd[j];
1867
-
1868
1990
  if(fabs(z) < 1.0e-12)
1869
1991
  continue;
1870
1992
  z = min(max(z,-10.0),10.0);
@@ -1905,6 +2027,8 @@ static void solve_l1r_lr(
1905
2027
  if(wpd[j] != 0)
1906
2028
  w_norm_new += fabs(wpd[j]);
1907
2029
  }
2030
+ if (regularize_bias == 0)
2031
+ w_norm_new -= fabs(wpd[w_size-1]);
1908
2032
  delta += (w_norm_new-w_norm);
1909
2033
 
1910
2034
  negsum_xTd = 0;
@@ -1947,6 +2071,8 @@ static void solve_l1r_lr(
1947
2071
  if(wpd[j] != 0)
1948
2072
  w_norm_new += fabs(wpd[j]);
1949
2073
  }
2074
+ if (regularize_bias == 0)
2075
+ w_norm_new -= fabs(wpd[w_size-1]);
1950
2076
  delta *= 0.5;
1951
2077
  negsum_xTd *= 0.5;
1952
2078
  for(int i=0; i<l; i++)
@@ -1995,6 +2121,8 @@ static void solve_l1r_lr(
1995
2121
  v += fabs(w[j]);
1996
2122
  nnz++;
1997
2123
  }
2124
+ if (regularize_bias == 0)
2125
+ v -= fabs(w[w_size-1]);
1998
2126
  for(j=0; j<l; j++)
1999
2127
  if(y[j] == 1)
2000
2128
  v += C[GETI(j)]*log(1+1/exp_wTx[j]);
@@ -2017,6 +2145,342 @@ static void solve_l1r_lr(
2017
2145
  delete [] D;
2018
2146
  }
2019
2147
 
2148
+ struct heap {
2149
+ enum HEAP_TYPE { MIN, MAX };
2150
+ int _size;
2151
+ HEAP_TYPE _type;
2152
+ feature_node* a;
2153
+
2154
+ heap(int max_size, HEAP_TYPE type)
2155
+ {
2156
+ _size = 0;
2157
+ a = new feature_node[max_size];
2158
+ _type = type;
2159
+ }
2160
+ ~heap()
2161
+ {
2162
+ delete [] a;
2163
+ }
2164
+ bool cmp(const feature_node& left, const feature_node& right)
2165
+ {
2166
+ if(_type == MIN)
2167
+ return left.value > right.value;
2168
+ else
2169
+ return left.value < right.value;
2170
+ }
2171
+ int size()
2172
+ {
2173
+ return _size;
2174
+ }
2175
+ void push(feature_node node)
2176
+ {
2177
+ a[_size] = node;
2178
+ _size++;
2179
+ int i = _size-1;
2180
+ while(i)
2181
+ {
2182
+ int p = (i-1)/2;
2183
+ if(cmp(a[p], a[i]))
2184
+ {
2185
+ swap(a[i], a[p]);
2186
+ i = p;
2187
+ }
2188
+ else
2189
+ break;
2190
+ }
2191
+ }
2192
+ void pop()
2193
+ {
2194
+ _size--;
2195
+ a[0] = a[_size];
2196
+ int i = 0;
2197
+ while(i*2+1 < _size)
2198
+ {
2199
+ int l = i*2+1;
2200
+ int r = i*2+2;
2201
+ if(r < _size && cmp(a[l], a[r]))
2202
+ l = r;
2203
+ if(cmp(a[i], a[l]))
2204
+ {
2205
+ swap(a[i], a[l]);
2206
+ i = l;
2207
+ }
2208
+ else
2209
+ break;
2210
+ }
2211
+ }
2212
+ feature_node top()
2213
+ {
2214
+ return a[0];
2215
+ }
2216
+ };
2217
+
2218
+ // A two-level coordinate descent algorithm for
2219
+ // a scaled one-class SVM dual problem
2220
+ //
2221
+ // min_\alpha 0.5(\alpha^T Q \alpha),
2222
+ // s.t. 0 <= \alpha_i <= 1 and
2223
+ // e^T \alpha = \nu l
2224
+ //
2225
+ // where Qij = xi^T xj
2226
+ //
2227
+ // Given:
2228
+ // x, nu
2229
+ // eps is the stopping tolerance
2230
+ //
2231
+ // solution will be put in w and rho
2232
+ //
2233
+ // See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.
2234
+
2235
+ static void solve_oneclass_svm(const problem *prob, double *w, double *rho, double eps, double nu)
2236
+ {
2237
+ int l = prob->l;
2238
+ int w_size = prob->n;
2239
+ int i, j, s, iter = 0;
2240
+ double Gi, Gj;
2241
+ double Qij, quad_coef, delta, sum;
2242
+ double old_alpha_i;
2243
+ double *QD = new double[l];
2244
+ double *G = new double[l];
2245
+ int *index = new int[l];
2246
+ double *alpha = new double[l];
2247
+ int max_inner_iter;
2248
+ int max_iter = 1000;
2249
+ int active_size = l;
2250
+
2251
+ double negGmax; // max { -grad(f)_i | alpha_i < 1 }
2252
+ double negGmin; // min { -grad(f)_i | alpha_i > 0 }
2253
+
2254
+ int *most_violating_i = new int[l];
2255
+ int *most_violating_j = new int[l];
2256
+
2257
+ int n = (int)(nu*l); // # of alpha's at upper bound
2258
+ for(i=0; i<n; i++)
2259
+ alpha[i] = 1;
2260
+ if (n<l)
2261
+ alpha[i] = nu*l-n;
2262
+ for(i=n+1; i<l; i++)
2263
+ alpha[i] = 0;
2264
+
2265
+ for(i=0; i<w_size; i++)
2266
+ w[i] = 0;
2267
+ for(i=0; i<l; i++)
2268
+ {
2269
+ feature_node * const xi = prob->x[i];
2270
+ QD[i] = sparse_operator::nrm2_sq(xi);
2271
+ sparse_operator::axpy(alpha[i], xi, w);
2272
+
2273
+ index[i] = i;
2274
+ }
2275
+
2276
+ while (iter < max_iter)
2277
+ {
2278
+ negGmax = -INF;
2279
+ negGmin = INF;
2280
+
2281
+ for (s=0; s<active_size; s++)
2282
+ {
2283
+ i = index[s];
2284
+ feature_node * const xi = prob->x[i];
2285
+ G[i] = sparse_operator::dot(w, xi);
2286
+ if (alpha[i] < 1)
2287
+ negGmax = max(negGmax, -G[i]);
2288
+ if (alpha[i] > 0)
2289
+ negGmin = min(negGmin, -G[i]);
2290
+ }
2291
+
2292
+ if (negGmax - negGmin < eps)
2293
+ {
2294
+ if (active_size == l)
2295
+ break;
2296
+ else
2297
+ {
2298
+ active_size = l;
2299
+ info("*");
2300
+ continue;
2301
+ }
2302
+ }
2303
+
2304
+ for(s=0; s<active_size; s++)
2305
+ {
2306
+ i = index[s];
2307
+ if ((alpha[i] == 1 && -G[i] > negGmax) ||
2308
+ (alpha[i] == 0 && -G[i] < negGmin))
2309
+ {
2310
+ active_size--;
2311
+ swap(index[s], index[active_size]);
2312
+ s--;
2313
+ }
2314
+ }
2315
+
2316
+ max_inner_iter = max(active_size/10, 1);
2317
+ struct heap min_heap = heap(max_inner_iter, heap::MIN);
2318
+ struct heap max_heap = heap(max_inner_iter, heap::MAX);
2319
+ struct feature_node node;
2320
+ for(s=0; s<active_size; s++)
2321
+ {
2322
+ i = index[s];
2323
+ node.index = i;
2324
+ node.value = -G[i];
2325
+
2326
+ if (alpha[i] < 1)
2327
+ {
2328
+ if (min_heap.size() < max_inner_iter)
2329
+ min_heap.push(node);
2330
+ else if (min_heap.top().value < node.value)
2331
+ {
2332
+ min_heap.pop();
2333
+ min_heap.push(node);
2334
+ }
2335
+ }
2336
+
2337
+ if (alpha[i] > 0)
2338
+ {
2339
+ if (max_heap.size() < max_inner_iter)
2340
+ max_heap.push(node);
2341
+ else if (max_heap.top().value > node.value)
2342
+ {
2343
+ max_heap.pop();
2344
+ max_heap.push(node);
2345
+ }
2346
+ }
2347
+ }
2348
+ max_inner_iter = min(min_heap.size(), max_heap.size());
2349
+ while (max_heap.size() > max_inner_iter)
2350
+ max_heap.pop();
2351
+ while (min_heap.size() > max_inner_iter)
2352
+ min_heap.pop();
2353
+
2354
+ for (s=max_inner_iter-1; s>=0; s--)
2355
+ {
2356
+ most_violating_i[s] = min_heap.top().index;
2357
+ most_violating_j[s] = max_heap.top().index;
2358
+ min_heap.pop();
2359
+ max_heap.pop();
2360
+ }
2361
+
2362
+ for (s=0; s<max_inner_iter; s++)
2363
+ {
2364
+ i = most_violating_i[s];
2365
+ j = most_violating_j[s];
2366
+
2367
+ if ((alpha[i] == 0 && alpha[j] == 0) ||
2368
+ (alpha[i] == 1 && alpha[j] == 1))
2369
+ continue;
2370
+
2371
+ feature_node const * xi = prob->x[i];
2372
+ feature_node const * xj = prob->x[j];
2373
+
2374
+ Gi = sparse_operator::dot(w, xi);
2375
+ Gj = sparse_operator::dot(w, xj);
2376
+
2377
+ int violating_pair = 0;
2378
+ if (alpha[i] < 1 && alpha[j] > 0 && -Gj + 1e-12 < -Gi)
2379
+ violating_pair = 1;
2380
+ else
2381
+ if (alpha[i] > 0 && alpha[j] < 1 && -Gi + 1e-12 < -Gj)
2382
+ violating_pair = 1;
2383
+ if (violating_pair == 0)
2384
+ continue;
2385
+
2386
+ Qij = sparse_operator::sparse_dot(xi, xj);
2387
+ quad_coef = QD[i] + QD[j] - 2*Qij;
2388
+ if(quad_coef <= 0)
2389
+ quad_coef = 1e-12;
2390
+ delta = (Gi - Gj) / quad_coef;
2391
+ old_alpha_i = alpha[i];
2392
+ sum = alpha[i] + alpha[j];
2393
+ alpha[i] = alpha[i] - delta;
2394
+ alpha[j] = alpha[j] + delta;
2395
+ if (sum > 1)
2396
+ {
2397
+ if (alpha[i] > 1)
2398
+ {
2399
+ alpha[i] = 1;
2400
+ alpha[j] = sum - 1;
2401
+ }
2402
+ }
2403
+ else
2404
+ {
2405
+ if (alpha[j] < 0)
2406
+ {
2407
+ alpha[j] = 0;
2408
+ alpha[i] = sum;
2409
+ }
2410
+ }
2411
+ if (sum > 1)
2412
+ {
2413
+ if (alpha[j] > 1)
2414
+ {
2415
+ alpha[j] = 1;
2416
+ alpha[i] = sum - 1;
2417
+ }
2418
+ }
2419
+ else
2420
+ {
2421
+ if (alpha[i] < 0)
2422
+ {
2423
+ alpha[i] = 0;
2424
+ alpha[j] = sum;
2425
+ }
2426
+ }
2427
+ delta = alpha[i] - old_alpha_i;
2428
+ sparse_operator::axpy(delta, xi, w);
2429
+ sparse_operator::axpy(-delta, xj, w);
2430
+ }
2431
+ iter++;
2432
+ if (iter % 10 == 0)
2433
+ info(".");
2434
+ }
2435
+ info("\noptimization finished, #iter = %d\n",iter);
2436
+ if (iter >= max_iter)
2437
+ info("\nWARNING: reaching max number of iterations\n\n");
2438
+
2439
+ // calculate object value
2440
+ double v = 0;
2441
+ for(i=0; i<w_size; i++)
2442
+ v += w[i]*w[i];
2443
+ int nSV = 0;
2444
+ for(i=0; i<l; i++)
2445
+ {
2446
+ if (alpha[i] > 0)
2447
+ ++nSV;
2448
+ }
2449
+ info("Objective value = %lf\n", v/2);
2450
+ info("nSV = %d\n", nSV);
2451
+
2452
+ // calculate rho
2453
+ double nr_free = 0;
2454
+ double ub = INF, lb = -INF, sum_free = 0;
2455
+ for(i=0; i<l; i++)
2456
+ {
2457
+ double G = sparse_operator::dot(w, prob->x[i]);
2458
+ if (alpha[i] == 1)
2459
+ lb = max(lb, G);
2460
+ else if (alpha[i] == 0)
2461
+ ub = min(ub, G);
2462
+ else
2463
+ {
2464
+ ++nr_free;
2465
+ sum_free += G;
2466
+ }
2467
+ }
2468
+
2469
+ if (nr_free > 0)
2470
+ *rho = sum_free/nr_free;
2471
+ else
2472
+ *rho = (ub + lb)/2;
2473
+
2474
+ info("rho = %lf\n", *rho);
2475
+
2476
+ delete [] QD;
2477
+ delete [] G;
2478
+ delete [] index;
2479
+ delete [] alpha;
2480
+ delete [] most_violating_i;
2481
+ delete [] most_violating_j;
2482
+ }
2483
+
2020
2484
  // transpose matrix X from row format to column format
2021
2485
  static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2022
2486
  {
@@ -2152,11 +2616,7 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re
2152
2616
 
2153
2617
  static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2154
2618
  {
2155
- //inner and outer tolerances for TRON
2156
2619
  double eps = param->eps;
2157
- double eps_cg = 0.1;
2158
- if(param->init_sol != NULL)
2159
- eps_cg = 0.5;
2160
2620
 
2161
2621
  int pos = 0;
2162
2622
  int neg = 0;
@@ -2179,10 +2639,10 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2179
2639
  else
2180
2640
  C[i] = Cn;
2181
2641
  }
2182
- fun_obj=new l2r_lr_fun(prob, C);
2183
- TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2184
- tron_obj.set_print_string(liblinear_print_string);
2185
- tron_obj.tron(w);
2642
+ fun_obj=new l2r_lr_fun(prob, param, C);
2643
+ NEWTON newton_obj(fun_obj, primal_solver_tol);
2644
+ newton_obj.set_print_string(liblinear_print_string);
2645
+ newton_obj.newton(w);
2186
2646
  delete fun_obj;
2187
2647
  delete[] C;
2188
2648
  break;
@@ -2197,10 +2657,10 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2197
2657
  else
2198
2658
  C[i] = Cn;
2199
2659
  }
2200
- fun_obj=new l2r_l2_svc_fun(prob, C);
2201
- TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2202
- tron_obj.set_print_string(liblinear_print_string);
2203
- tron_obj.tron(w);
2660
+ fun_obj=new l2r_l2_svc_fun(prob, param, C);
2661
+ NEWTON newton_obj(fun_obj, primal_solver_tol);
2662
+ newton_obj.set_print_string(liblinear_print_string);
2663
+ newton_obj.newton(w);
2204
2664
  delete fun_obj;
2205
2665
  delete[] C;
2206
2666
  break;
@@ -2216,7 +2676,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2216
2676
  problem prob_col;
2217
2677
  feature_node *x_space = NULL;
2218
2678
  transpose(prob, &x_space ,&prob_col);
2219
- solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2679
+ solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2220
2680
  delete [] prob_col.y;
2221
2681
  delete [] prob_col.x;
2222
2682
  delete [] x_space;
@@ -2227,7 +2687,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2227
2687
  problem prob_col;
2228
2688
  feature_node *x_space = NULL;
2229
2689
  transpose(prob, &x_space ,&prob_col);
2230
- solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2690
+ solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2231
2691
  delete [] prob_col.y;
2232
2692
  delete [] prob_col.x;
2233
2693
  delete [] x_space;
@@ -2242,10 +2702,10 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
2242
2702
  for(int i = 0; i < prob->l; i++)
2243
2703
  C[i] = param->C;
2244
2704
 
2245
- fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2246
- TRON tron_obj(fun_obj, param->eps);
2247
- tron_obj.set_print_string(liblinear_print_string);
2248
- tron_obj.tron(w);
2705
+ fun_obj=new l2r_l2_svr_fun(prob, param, C);
2706
+ NEWTON newton_obj(fun_obj, param->eps);
2707
+ newton_obj.set_print_string(liblinear_print_string);
2708
+ newton_obj.newton(w);
2249
2709
  delete fun_obj;
2250
2710
  delete[] C;
2251
2711
  break;
@@ -2432,7 +2892,7 @@ static void find_parameter_C(const problem *prob, parameter *param_tmp, double s
2432
2892
  }
2433
2893
 
2434
2894
  if(param_tmp->C > max_C)
2435
- info("warning: maximum C reached.\n");
2895
+ info("WARNING: maximum C reached.\n");
2436
2896
  free(target);
2437
2897
  for(i=0; i<nr_fold; i++)
2438
2898
  free(prev_w[i]);
@@ -2473,6 +2933,13 @@ model* train(const problem *prob, const parameter *param)
2473
2933
  model_->label = NULL;
2474
2934
  train_one(prob, param, model_->w, 0, 0);
2475
2935
  }
2936
+ else if(check_oneclass_model(model_))
2937
+ {
2938
+ model_->w = Malloc(double, w_size);
2939
+ model_->nr_class = 2;
2940
+ model_->label = NULL;
2941
+ solve_oneclass_svm(prob, model_->w, &(model_->rho), param->eps, param->nu);
2942
+ }
2476
2943
  else
2477
2944
  {
2478
2945
  int nr_class;
@@ -2716,11 +3183,11 @@ void find_parameters(const problem *prob, const parameter *param, int nr_fold, d
2716
3183
  if(start_C <= 0)
2717
3184
  start_C = calc_start_C(prob, &param_tmp);
2718
3185
  double max_C = 1024;
2719
- start_C = min(start_C, max_C);
3186
+ start_C = min(start_C, max_C);
2720
3187
  double best_C_tmp, best_score_tmp;
2721
-
3188
+
2722
3189
  find_parameter_C(prob, &param_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
2723
-
3190
+
2724
3191
  *best_C = best_C_tmp;
2725
3192
  *best_score = best_score_tmp;
2726
3193
  }
@@ -2744,9 +3211,9 @@ void find_parameters(const problem *prob, const parameter *param, int nr_fold, d
2744
3211
  start_C_tmp = start_C;
2745
3212
  start_C_tmp = min(start_C_tmp, max_C);
2746
3213
  double best_C_tmp, best_score_tmp;
2747
-
3214
+
2748
3215
  find_parameter_C(prob, &param_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
2749
-
3216
+
2750
3217
  if(best_score_tmp < *best_score)
2751
3218
  {
2752
3219
  *best_p = param_tmp.p;
@@ -2793,11 +3260,15 @@ double predict_values(const struct model *model_, const struct feature_node *x,
2793
3260
  for(i=0;i<nr_w;i++)
2794
3261
  dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2795
3262
  }
3263
+ if(check_oneclass_model(model_))
3264
+ dec_values[0] -= model_->rho;
2796
3265
 
2797
3266
  if(nr_class==2)
2798
3267
  {
2799
3268
  if(check_regression_model(model_))
2800
3269
  return dec_values[0];
3270
+ else if(check_oneclass_model(model_))
3271
+ return (dec_values[0]>0)?1:-1;
2801
3272
  else
2802
3273
  return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2803
3274
  }
@@ -2860,7 +3331,9 @@ static const char *solver_type_table[]=
2860
3331
  "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2861
3332
  "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2862
3333
  "", "", "",
2863
- "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
3334
+ "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL",
3335
+ "", "", "", "", "", "", "",
3336
+ "ONECLASS_SVM", NULL
2864
3337
  };
2865
3338
 
2866
3339
  int save_model(const char *model_file_name, const struct model *model_)
@@ -2906,6 +3379,9 @@ int save_model(const char *model_file_name, const struct model *model_)
2906
3379
 
2907
3380
  fprintf(fp, "bias %.17g\n", model_->bias);
2908
3381
 
3382
+ if(check_oneclass_model(model_))
3383
+ fprintf(fp, "rho %.17g\n", model_->rho);
3384
+
2909
3385
  fprintf(fp, "w\n");
2910
3386
  for(i=0; i<w_size; i++)
2911
3387
  {
@@ -2956,12 +3432,13 @@ struct model *load_model(const char *model_file_name)
2956
3432
  int n;
2957
3433
  int nr_class;
2958
3434
  double bias;
3435
+ double rho;
2959
3436
  model *model_ = Malloc(model,1);
2960
3437
  parameter& param = model_->param;
2961
3438
  // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2962
3439
  param.nr_weight = 0;
2963
3440
  param.weight_label = NULL;
2964
- param.weight = NULL;
3441
+ param.weight = NULL;
2965
3442
  param.init_sol = NULL;
2966
3443
 
2967
3444
  model_->label = NULL;
@@ -3010,6 +3487,11 @@ struct model *load_model(const char *model_file_name)
3010
3487
  FSCANF(fp,"%lf",&bias);
3011
3488
  model_->bias=bias;
3012
3489
  }
3490
+ else if(strcmp(cmd,"rho")==0)
3491
+ {
3492
+ FSCANF(fp,"%lf",&rho);
3493
+ model_->rho=rho;
3494
+ }
3013
3495
  else if(strcmp(cmd,"w")==0)
3014
3496
  {
3015
3497
  break;
@@ -3082,7 +3564,7 @@ static inline double get_w_value(const struct model *model_, int idx, int label_
3082
3564
 
3083
3565
  if(idx < 0 || idx > model_->nr_feature)
3084
3566
  return 0;
3085
- if(check_regression_model(model_))
3567
+ if(check_regression_model(model_) || check_oneclass_model(model_))
3086
3568
  return w[idx];
3087
3569
  else
3088
3570
  {
@@ -3102,7 +3584,8 @@ static inline double get_w_value(const struct model *model_, int idx, int label_
3102
3584
 
3103
3585
  // feat_idx: starting from 1 to nr_feature
3104
3586
  // label_idx: starting from 0 to nr_class-1 for classification models;
3105
- // for regression models, label_idx is ignored.
3587
+ // for regression and one-class SVM models, label_idx is
3588
+ // ignored.
3106
3589
  double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3107
3590
  {
3108
3591
  if(feat_idx > model_->nr_feature)
@@ -3112,6 +3595,11 @@ double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3112
3595
 
3113
3596
  double get_decfun_bias(const struct model *model_, int label_idx)
3114
3597
  {
3598
+ if(check_oneclass_model(model_))
3599
+ {
3600
+ fprintf(stderr, "ERROR: get_decfun_bias can not be called for a one-class SVM model\n");
3601
+ return 0;
3602
+ }
3115
3603
  int bias_idx = model_->nr_feature;
3116
3604
  double bias = model_->bias;
3117
3605
  if(bias <= 0)
@@ -3120,6 +3608,17 @@ double get_decfun_bias(const struct model *model_, int label_idx)
3120
3608
  return bias*get_w_value(model_, bias_idx, label_idx);
3121
3609
  }
3122
3610
 
3611
+ double get_decfun_rho(const struct model *model_)
3612
+ {
3613
+ if(check_oneclass_model(model_))
3614
+ return model_->rho;
3615
+ else
3616
+ {
3617
+ fprintf(stderr, "ERROR: get_decfun_rho can be called only for a one-class SVM model\n");
3618
+ return 0;
3619
+ }
3620
+ }
3621
+
3123
3622
  void free_model_content(struct model *model_ptr)
3124
3623
  {
3125
3624
  if(model_ptr->w != NULL)
@@ -3159,6 +3658,21 @@ const char *check_parameter(const problem *prob, const parameter *param)
3159
3658
  if(param->p < 0)
3160
3659
  return "p < 0";
3161
3660
 
3661
+ if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM)
3662
+ return "prob->bias >=0, but this is ignored in ONECLASS_SVM";
3663
+
3664
+ if(param->regularize_bias == 0)
3665
+ {
3666
+ if(prob->bias != 1.0)
3667
+ return "To not regularize bias, must specify -B 1 along with -R";
3668
+ if(param->solver_type != L2R_LR
3669
+ && param->solver_type != L2R_L2LOSS_SVC
3670
+ && param->solver_type != L1R_L2LOSS_SVC
3671
+ && param->solver_type != L1R_LR
3672
+ && param->solver_type != L2R_L2LOSS_SVR)
3673
+ return "-R option supported only for solver L2R_LR, L2R_L2LOSS_SVC, L1R_L2LOSS_SVC, L1R_LR, and L2R_L2LOSS_SVR";
3674
+ }
3675
+
3162
3676
  if(param->solver_type != L2R_LR
3163
3677
  && param->solver_type != L2R_L2LOSS_SVC_DUAL
3164
3678
  && param->solver_type != L2R_L2LOSS_SVC
@@ -3169,12 +3683,15 @@ const char *check_parameter(const problem *prob, const parameter *param)
3169
3683
  && param->solver_type != L2R_LR_DUAL
3170
3684
  && param->solver_type != L2R_L2LOSS_SVR
3171
3685
  && param->solver_type != L2R_L2LOSS_SVR_DUAL
3172
- && param->solver_type != L2R_L1LOSS_SVR_DUAL)
3686
+ && param->solver_type != L2R_L1LOSS_SVR_DUAL
3687
+ && param->solver_type != ONECLASS_SVM)
3173
3688
  return "unknown solver type";
3174
3689
 
3175
3690
  if(param->init_sol != NULL
3176
- && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
3177
- return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
3691
+ && param->solver_type != L2R_LR
3692
+ && param->solver_type != L2R_L2LOSS_SVC
3693
+ && param->solver_type != L2R_L2LOSS_SVR)
3694
+ return "Initial-solution specification supported only for solvers L2R_LR, L2R_L2LOSS_SVC, and L2R_L2LOSS_SVR";
3178
3695
 
3179
3696
  return NULL;
3180
3697
  }
@@ -3193,6 +3710,11 @@ int check_regression_model(const struct model *model_)
3193
3710
  model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3194
3711
  }
3195
3712
 
3713
+ int check_oneclass_model(const struct model *model_)
3714
+ {
3715
+ return model_->param.solver_type == ONECLASS_SVM;
3716
+ }
3717
+
3196
3718
  void set_print_string_function(void (*print_func)(const char*))
3197
3719
  {
3198
3720
  if (print_func == NULL)