numo-liblinear 0.5.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,88 @@
1
+ #ifndef _LIBLINEAR_H
2
+ #define _LIBLINEAR_H
3
+
4
+ #define LIBLINEAR_VERSION 241
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ extern int liblinear_version;
11
+
12
+ struct feature_node
13
+ {
14
+ int index;
15
+ double value;
16
+ };
17
+
18
+ struct problem
19
+ {
20
+ int l, n;
21
+ double *y;
22
+ struct feature_node **x;
23
+ double bias; /* < 0 if no bias term */
24
+ };
25
+
26
+ enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL, ONECLASS_SVM = 21 }; /* solver_type */
27
+
28
+ struct parameter
29
+ {
30
+ int solver_type;
31
+
32
+ /* these are for training only */
33
+ double eps; /* stopping criteria */
34
+ double C;
35
+ int nr_weight;
36
+ int *weight_label;
37
+ double* weight;
38
+ double p;
39
+ double nu;
40
+ double *init_sol;
41
+ int regularize_bias;
42
+ };
43
+
44
+ struct model
45
+ {
46
+ struct parameter param;
47
+ int nr_class; /* number of classes */
48
+ int nr_feature;
49
+ double *w;
50
+ int *label; /* label of each class */
51
+ double bias;
52
+ double rho; /* one-class SVM only */
53
+ };
54
+
55
+ struct model* train(const struct problem *prob, const struct parameter *param);
56
+ void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
57
+ void find_parameters(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score);
58
+
59
+ double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
60
+ double predict(const struct model *model_, const struct feature_node *x);
61
+ double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
62
+
63
+ int save_model(const char *model_file_name, const struct model *model_);
64
+ struct model *load_model(const char *model_file_name);
65
+
66
+ int get_nr_feature(const struct model *model_);
67
+ int get_nr_class(const struct model *model_);
68
+ void get_labels(const struct model *model_, int* label);
69
+ double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
70
+ double get_decfun_bias(const struct model *model_, int label_idx);
71
+ double get_decfun_rho(const struct model *model_);
72
+
73
+ void free_model_content(struct model *model_ptr);
74
+ void free_and_destroy_model(struct model **model_ptr_ptr);
75
+ void destroy_param(struct parameter *param);
76
+
77
+ const char *check_parameter(const struct problem *prob, const struct parameter *param);
78
+ int check_probability_model(const struct model *model);
79
+ int check_regression_model(const struct model *model);
80
+ int check_oneclass_model(const struct model *model);
81
+ void set_print_string_function(void (*print_func) (const char*));
82
+
83
+ #ifdef __cplusplus
84
+ }
85
+ #endif
86
+
87
+ #endif /* _LIBLINEAR_H */
88
+
@@ -0,0 +1,245 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+ #include <stdarg.h>
5
+ #include "newton.h"
6
+
7
+ #ifndef min
8
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
9
+ #endif
10
+
11
+ #ifndef max
12
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
13
+ #endif
14
+
15
+ #ifdef __cplusplus
16
+ extern "C" {
17
+ #endif
18
+
19
+ extern double dnrm2_(int *, double *, int *);
20
+ extern double ddot_(int *, double *, int *, double *, int *);
21
+ extern int daxpy_(int *, double *, double *, int *, double *, int *);
22
+ extern int dscal_(int *, double *, double *, int *);
23
+
24
+ #ifdef __cplusplus
25
+ }
26
+ #endif
27
+
28
+ static void default_print(const char *buf)
29
+ {
30
+ fputs(buf,stdout);
31
+ fflush(stdout);
32
+ }
33
+
34
+ // On entry *f must be the function value of w
35
+ // On exit w is updated and *f is the new function value
36
+ double function::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
37
+ {
38
+ double gTs = 0;
39
+ double eta = 0.01;
40
+ int n = get_nr_variable();
41
+ int max_num_linesearch = 20;
42
+ double *w_new = new double[n];
43
+ double fold = *f;
44
+
45
+ for (int i=0;i<n;i++)
46
+ gTs += s[i] * g[i];
47
+
48
+ int num_linesearch = 0;
49
+ for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
50
+ {
51
+ for (int i=0;i<n;i++)
52
+ w_new[i] = w[i] + alpha*s[i];
53
+ *f = fun(w_new);
54
+ if (*f - fold <= eta * alpha * gTs)
55
+ break;
56
+ else
57
+ alpha *= 0.5;
58
+ }
59
+
60
+ if (num_linesearch >= max_num_linesearch)
61
+ {
62
+ *f = fold;
63
+ return 0;
64
+ }
65
+ else
66
+ memcpy(w, w_new, sizeof(double)*n);
67
+
68
+ delete [] w_new;
69
+ return alpha;
70
+ }
71
+
72
+ void NEWTON::info(const char *fmt,...)
73
+ {
74
+ char buf[BUFSIZ];
75
+ va_list ap;
76
+ va_start(ap,fmt);
77
+ vsprintf(buf,fmt,ap);
78
+ va_end(ap);
79
+ (*newton_print_string)(buf);
80
+ }
81
+
82
+ NEWTON::NEWTON(const function *fun_obj, double eps, double eps_cg, int max_iter)
83
+ {
84
+ this->fun_obj=const_cast<function *>(fun_obj);
85
+ this->eps=eps;
86
+ this->eps_cg=eps_cg;
87
+ this->max_iter=max_iter;
88
+ newton_print_string = default_print;
89
+ }
90
+
91
+ NEWTON::~NEWTON()
92
+ {
93
+ }
94
+
95
+ void NEWTON::newton(double *w)
96
+ {
97
+ int n = fun_obj->get_nr_variable();
98
+ int i, cg_iter;
99
+ double step_size;
100
+ double f, fold, actred;
101
+ double init_step_size = 1;
102
+ int search = 1, iter = 1, inc = 1;
103
+ double *s = new double[n];
104
+ double *r = new double[n];
105
+ double *g = new double[n];
106
+
107
+ const double alpha_pcg = 0.01;
108
+ double *M = new double[n];
109
+
110
+ // calculate gradient norm at w=0 for stopping condition.
111
+ double *w0 = new double[n];
112
+ for (i=0; i<n; i++)
113
+ w0[i] = 0;
114
+ fun_obj->fun(w0);
115
+ fun_obj->grad(w0, g);
116
+ double gnorm0 = dnrm2_(&n, g, &inc);
117
+ delete [] w0;
118
+
119
+ f = fun_obj->fun(w);
120
+ info("init f %5.3e\n", f);
121
+ fun_obj->grad(w, g);
122
+ double gnorm = dnrm2_(&n, g, &inc);
123
+
124
+ if (gnorm <= eps*gnorm0)
125
+ search = 0;
126
+
127
+ double *w_new = new double[n];
128
+ while (iter <= max_iter && search)
129
+ {
130
+ fun_obj->get_diag_preconditioner(M);
131
+ for(i=0; i<n; i++)
132
+ M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
133
+ cg_iter = pcg(g, M, s, r);
134
+
135
+ fold = f;
136
+ step_size = fun_obj->linesearch_and_update(w, s, & f, g, init_step_size);
137
+
138
+ if (step_size == 0)
139
+ {
140
+ info("WARNING: line search fails\n");
141
+ break;
142
+ }
143
+
144
+ info("iter %2d f %5.3e |g| %5.3e CG %3d step_size %4.2e \n", iter, f, gnorm, cg_iter, step_size);
145
+
146
+ actred = fold - f;
147
+ iter++;
148
+
149
+ fun_obj->grad(w, g);
150
+
151
+ gnorm = dnrm2_(&n, g, &inc);
152
+ if (gnorm <= eps*gnorm0)
153
+ break;
154
+ if (f < -1.0e+32)
155
+ {
156
+ info("WARNING: f < -1.0e+32\n");
157
+ break;
158
+ }
159
+ if (fabs(actred) <= 1.0e-12*fabs(f))
160
+ {
161
+ info("WARNING: actred too small\n");
162
+ break;
163
+ }
164
+ }
165
+
166
+ delete[] g;
167
+ delete[] r;
168
+ delete[] w_new;
169
+ delete[] s;
170
+ delete[] M;
171
+ }
172
+
173
+ int NEWTON::pcg(double *g, double *M, double *s, double *r)
174
+ {
175
+ int i, inc = 1;
176
+ int n = fun_obj->get_nr_variable();
177
+ double one = 1;
178
+ double *d = new double[n];
179
+ double *Hd = new double[n];
180
+ double zTr, znewTrnew, alpha, beta, cgtol;
181
+ double *z = new double[n];
182
+ double Q = 0, newQ, Qdiff;
183
+
184
+ for (i=0; i<n; i++)
185
+ {
186
+ s[i] = 0;
187
+ r[i] = -g[i];
188
+ z[i] = r[i] / M[i];
189
+ d[i] = z[i];
190
+ }
191
+
192
+ zTr = ddot_(&n, z, &inc, r, &inc);
193
+ double gMinv_norm = sqrt(zTr);
194
+ cgtol = min(eps_cg, sqrt(gMinv_norm));
195
+ int cg_iter = 0;
196
+ int max_cg_iter = max(n, 5);
197
+
198
+ while (cg_iter < max_cg_iter)
199
+ {
200
+ cg_iter++;
201
+ fun_obj->Hv(d, Hd);
202
+
203
+ alpha = zTr/ddot_(&n, d, &inc, Hd, &inc);
204
+ daxpy_(&n, &alpha, d, &inc, s, &inc);
205
+ alpha = -alpha;
206
+ daxpy_(&n, &alpha, Hd, &inc, r, &inc);
207
+
208
+ // Using quadratic approximation as CG stopping criterion
209
+ newQ = -0.5*(ddot_(&n, s, &inc, r, &inc) - ddot_(&n, s, &inc, g, &inc));
210
+ Qdiff = newQ - Q;
211
+ if (newQ <= 0 && Qdiff <= 0)
212
+ {
213
+ if (cg_iter * Qdiff >= cgtol * newQ)
214
+ break;
215
+ }
216
+ else
217
+ {
218
+ info("WARNING: quadratic approximation > 0 or increasing in CG\n");
219
+ break;
220
+ }
221
+ Q = newQ;
222
+
223
+ for (i=0; i<n; i++)
224
+ z[i] = r[i] / M[i];
225
+ znewTrnew = ddot_(&n, z, &inc, r, &inc);
226
+ beta = znewTrnew/zTr;
227
+ dscal_(&n, &beta, d, &inc);
228
+ daxpy_(&n, &one, z, &inc, d, &inc);
229
+ zTr = znewTrnew;
230
+ }
231
+
232
+ if (cg_iter == max_cg_iter)
233
+ info("WARNING: reaching maximal number of CG steps\n");
234
+
235
+ delete[] d;
236
+ delete[] Hd;
237
+ delete[] z;
238
+
239
+ return(cg_iter);
240
+ }
241
+
242
+ void NEWTON::set_print_string(void (*print_string) (const char *buf))
243
+ {
244
+ newton_print_string = print_string;
245
+ }
@@ -0,0 +1,37 @@
1
+ #ifndef _NEWTON_H
2
+ #define _NEWTON_H
3
+
4
+ class function
5
+ {
6
+ public:
7
+ virtual double fun(double *w) = 0 ;
8
+ virtual void grad(double *w, double *g) = 0 ;
9
+ virtual void Hv(double *s, double *Hs) = 0 ;
10
+ virtual int get_nr_variable(void) = 0 ;
11
+ virtual void get_diag_preconditioner(double *M) = 0 ;
12
+ virtual ~function(void){}
13
+
14
+ // base implementation in newton.cpp
15
+ virtual double linesearch_and_update(double *w, double *s, double *f, double *g, double alpha);
16
+ };
17
+
18
+ class NEWTON
19
+ {
20
+ public:
21
+ NEWTON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.5, int max_iter = 1000);
22
+ ~NEWTON();
23
+
24
+ void newton(double *w);
25
+ void set_print_string(void (*i_print) (const char *buf));
26
+
27
+ private:
28
+ int pcg(double *g, double *M, double *s, double *r);
29
+
30
+ double eps;
31
+ double eps_cg;
32
+ int max_iter;
33
+ function *fun_obj;
34
+ void info(const char *fmt,...);
35
+ void (*newton_print_string)(const char *buf);
36
+ };
37
+ #endif
@@ -112,6 +112,9 @@ VALUE numo_liblinear_train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_has
112
112
  xfree_problem(problem);
113
113
  xfree_parameter(param);
114
114
 
115
+ RB_GC_GUARD(x_val);
116
+ RB_GC_GUARD(y_val);
117
+
115
118
  return model_hash;
116
119
  }
117
120
 
@@ -225,6 +228,9 @@ VALUE numo_liblinear_cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALU
225
228
  xfree_problem(problem);
226
229
  xfree_parameter(param);
227
230
 
231
+ RB_GC_GUARD(x_val);
232
+ RB_GC_GUARD(y_val);
233
+
228
234
  return t_val;
229
235
  }
230
236
 
@@ -291,6 +297,8 @@ VALUE numo_liblinear_predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE mo
291
297
  xfree_model(model);
292
298
  xfree_parameter(param);
293
299
 
300
+ RB_GC_GUARD(x_val);
301
+
294
302
  return y_val;
295
303
  }
296
304
 
@@ -381,6 +389,8 @@ VALUE numo_liblinear_decision_function(VALUE self, VALUE x_val, VALUE param_hash
381
389
  xfree_model(model);
382
390
  xfree_parameter(param);
383
391
 
392
+ RB_GC_GUARD(x_val);
393
+
384
394
  return y_val;
385
395
  }
386
396
 
@@ -457,6 +467,8 @@ VALUE numo_liblinear_predict_proba(VALUE self, VALUE x_val, VALUE param_hash, VA
457
467
  xfree_model(model);
458
468
  xfree_parameter(param);
459
469
 
470
+ RB_GC_GUARD(x_val);
471
+
460
472
  return y_val;
461
473
  }
462
474
 
@@ -492,6 +504,8 @@ VALUE numo_liblinear_load_model(VALUE self, VALUE filename)
492
504
  rb_ary_store(res, 0, param_hash);
493
505
  rb_ary_store(res, 1, model_hash);
494
506
 
507
+ RB_GC_GUARD(filename);
508
+
495
509
  return res;
496
510
  }
497
511
 
@@ -526,6 +540,8 @@ VALUE numo_liblinear_save_model(VALUE self, VALUE filename, VALUE param_hash, VA
526
540
  return Qfalse;
527
541
  }
528
542
 
543
+ RB_GC_GUARD(filename);
544
+
529
545
  return Qtrue;
530
546
  }
531
547
 
@@ -545,6 +561,9 @@ void Init_liblinearext()
545
561
  */
546
562
  mLiblinear = rb_define_module_under(mNumo, "Liblinear");
547
563
 
564
+ /* The version of LIBLINEAR used in backgroud library. */
565
+ rb_define_const(mLiblinear, "LIBLINEAR_VERSION", INT2NUM(LIBLINEAR_VERSION));
566
+
548
567
  rb_define_module_function(mLiblinear, "train", numo_liblinear_train, 3);
549
568
  rb_define_module_function(mLiblinear, "cv", numo_liblinear_cross_validation, 4);
550
569
  rb_define_module_function(mLiblinear, "predict", numo_liblinear_predict, 3);
@@ -14,6 +14,8 @@ struct model* rb_hash_to_model(VALUE model_hash)
14
14
  model->label = nary_to_int_vec(el);
15
15
  el = rb_hash_aref(model_hash, ID2SYM(rb_intern("bias")));
16
16
  model->bias = NUM2DBL(el);
17
+ el = rb_hash_aref(model_hash, ID2SYM(rb_intern("rho")));
18
+ model->rho = NUM2DBL(el);
17
19
  return model;
18
20
  }
19
21
 
@@ -29,6 +31,7 @@ VALUE model_to_rb_hash(struct model* const model)
29
31
  rb_hash_aset(model_hash, ID2SYM(rb_intern("label")),
30
32
  model->label ? int_vec_to_nary(model->label, model->nr_class) : Qnil);
31
33
  rb_hash_aset(model_hash, ID2SYM(rb_intern("bias")), DBL2NUM(model->bias));
34
+ rb_hash_aset(model_hash, ID2SYM(rb_intern("rho")), DBL2NUM(model->rho));
32
35
  return model_hash;
33
36
  }
34
37