numo-liblinear 0.5.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/build.yml +28 -0
- data/.gitmodules +3 -0
- data/CHANGELOG.md +20 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +1 -1
- data/README.md +6 -19
- data/Steepfile +20 -0
- data/ext/numo/liblinear/converter.c +6 -0
- data/ext/numo/liblinear/extconf.rb +7 -8
- data/ext/numo/liblinear/liblinear/blas/blas.h +25 -0
- data/ext/numo/liblinear/liblinear/blas/blasp.h +438 -0
- data/ext/numo/liblinear/liblinear/blas/daxpy.c +57 -0
- data/ext/numo/liblinear/liblinear/blas/ddot.c +58 -0
- data/ext/numo/liblinear/liblinear/blas/dnrm2.c +70 -0
- data/ext/numo/liblinear/liblinear/blas/dscal.c +52 -0
- data/ext/numo/liblinear/liblinear/linear.cpp +3725 -0
- data/ext/numo/liblinear/liblinear/linear.h +88 -0
- data/ext/numo/liblinear/liblinear/newton.cpp +245 -0
- data/ext/numo/liblinear/liblinear/newton.h +37 -0
- data/ext/numo/liblinear/liblinearext.c +19 -0
- data/ext/numo/liblinear/model.c +3 -0
- data/ext/numo/liblinear/parameter.c +36 -29
- data/ext/numo/liblinear/problem.c +3 -0
- data/ext/numo/liblinear/solver_type.c +8 -6
- data/lib/numo/liblinear/version.rb +1 -1
- data/numo-liblinear.gemspec +7 -4
- data/sig/numo/liblinear.rbs +52 -0
- data/sig/patch.rbs +8 -0
- metadata +18 -61
- data/.travis.yml +0 -14
@@ -0,0 +1,88 @@
|
|
1
|
+
#ifndef _LIBLINEAR_H
|
2
|
+
#define _LIBLINEAR_H
|
3
|
+
|
4
|
+
#define LIBLINEAR_VERSION 241
|
5
|
+
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
9
|
+
|
10
|
+
extern int liblinear_version;
|
11
|
+
|
12
|
+
struct feature_node
|
13
|
+
{
|
14
|
+
int index;
|
15
|
+
double value;
|
16
|
+
};
|
17
|
+
|
18
|
+
struct problem
|
19
|
+
{
|
20
|
+
int l, n;
|
21
|
+
double *y;
|
22
|
+
struct feature_node **x;
|
23
|
+
double bias; /* < 0 if no bias term */
|
24
|
+
};
|
25
|
+
|
26
|
+
enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL, ONECLASS_SVM = 21 }; /* solver_type */
|
27
|
+
|
28
|
+
struct parameter
|
29
|
+
{
|
30
|
+
int solver_type;
|
31
|
+
|
32
|
+
/* these are for training only */
|
33
|
+
double eps; /* stopping criteria */
|
34
|
+
double C;
|
35
|
+
int nr_weight;
|
36
|
+
int *weight_label;
|
37
|
+
double* weight;
|
38
|
+
double p;
|
39
|
+
double nu;
|
40
|
+
double *init_sol;
|
41
|
+
int regularize_bias;
|
42
|
+
};
|
43
|
+
|
44
|
+
struct model
|
45
|
+
{
|
46
|
+
struct parameter param;
|
47
|
+
int nr_class; /* number of classes */
|
48
|
+
int nr_feature;
|
49
|
+
double *w;
|
50
|
+
int *label; /* label of each class */
|
51
|
+
double bias;
|
52
|
+
double rho; /* one-class SVM only */
|
53
|
+
};
|
54
|
+
|
55
|
+
struct model* train(const struct problem *prob, const struct parameter *param);
|
56
|
+
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
|
57
|
+
void find_parameters(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score);
|
58
|
+
|
59
|
+
double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
|
60
|
+
double predict(const struct model *model_, const struct feature_node *x);
|
61
|
+
double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
|
62
|
+
|
63
|
+
int save_model(const char *model_file_name, const struct model *model_);
|
64
|
+
struct model *load_model(const char *model_file_name);
|
65
|
+
|
66
|
+
int get_nr_feature(const struct model *model_);
|
67
|
+
int get_nr_class(const struct model *model_);
|
68
|
+
void get_labels(const struct model *model_, int* label);
|
69
|
+
double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
|
70
|
+
double get_decfun_bias(const struct model *model_, int label_idx);
|
71
|
+
double get_decfun_rho(const struct model *model_);
|
72
|
+
|
73
|
+
void free_model_content(struct model *model_ptr);
|
74
|
+
void free_and_destroy_model(struct model **model_ptr_ptr);
|
75
|
+
void destroy_param(struct parameter *param);
|
76
|
+
|
77
|
+
const char *check_parameter(const struct problem *prob, const struct parameter *param);
|
78
|
+
int check_probability_model(const struct model *model);
|
79
|
+
int check_regression_model(const struct model *model);
|
80
|
+
int check_oneclass_model(const struct model *model);
|
81
|
+
void set_print_string_function(void (*print_func) (const char*));
|
82
|
+
|
83
|
+
#ifdef __cplusplus
|
84
|
+
}
|
85
|
+
#endif
|
86
|
+
|
87
|
+
#endif /* _LIBLINEAR_H */
|
88
|
+
|
@@ -0,0 +1,245 @@
|
|
1
|
+
#include <math.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <stdarg.h>
|
5
|
+
#include "newton.h"
|
6
|
+
|
7
|
+
#ifndef min
|
8
|
+
template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#ifndef max
|
12
|
+
template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
|
13
|
+
#endif
|
14
|
+
|
15
|
+
#ifdef __cplusplus
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
18
|
+
|
19
|
+
extern double dnrm2_(int *, double *, int *);
|
20
|
+
extern double ddot_(int *, double *, int *, double *, int *);
|
21
|
+
extern int daxpy_(int *, double *, double *, int *, double *, int *);
|
22
|
+
extern int dscal_(int *, double *, double *, int *);
|
23
|
+
|
24
|
+
#ifdef __cplusplus
|
25
|
+
}
|
26
|
+
#endif
|
27
|
+
|
28
|
+
static void default_print(const char *buf)
|
29
|
+
{
|
30
|
+
fputs(buf,stdout);
|
31
|
+
fflush(stdout);
|
32
|
+
}
|
33
|
+
|
34
|
+
// On entry *f must be the function value of w
|
35
|
+
// On exit w is updated and *f is the new function value
|
36
|
+
double function::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
|
37
|
+
{
|
38
|
+
double gTs = 0;
|
39
|
+
double eta = 0.01;
|
40
|
+
int n = get_nr_variable();
|
41
|
+
int max_num_linesearch = 20;
|
42
|
+
double *w_new = new double[n];
|
43
|
+
double fold = *f;
|
44
|
+
|
45
|
+
for (int i=0;i<n;i++)
|
46
|
+
gTs += s[i] * g[i];
|
47
|
+
|
48
|
+
int num_linesearch = 0;
|
49
|
+
for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
|
50
|
+
{
|
51
|
+
for (int i=0;i<n;i++)
|
52
|
+
w_new[i] = w[i] + alpha*s[i];
|
53
|
+
*f = fun(w_new);
|
54
|
+
if (*f - fold <= eta * alpha * gTs)
|
55
|
+
break;
|
56
|
+
else
|
57
|
+
alpha *= 0.5;
|
58
|
+
}
|
59
|
+
|
60
|
+
if (num_linesearch >= max_num_linesearch)
|
61
|
+
{
|
62
|
+
*f = fold;
|
63
|
+
return 0;
|
64
|
+
}
|
65
|
+
else
|
66
|
+
memcpy(w, w_new, sizeof(double)*n);
|
67
|
+
|
68
|
+
delete [] w_new;
|
69
|
+
return alpha;
|
70
|
+
}
|
71
|
+
|
72
|
+
void NEWTON::info(const char *fmt,...)
|
73
|
+
{
|
74
|
+
char buf[BUFSIZ];
|
75
|
+
va_list ap;
|
76
|
+
va_start(ap,fmt);
|
77
|
+
vsprintf(buf,fmt,ap);
|
78
|
+
va_end(ap);
|
79
|
+
(*newton_print_string)(buf);
|
80
|
+
}
|
81
|
+
|
82
|
+
NEWTON::NEWTON(const function *fun_obj, double eps, double eps_cg, int max_iter)
|
83
|
+
{
|
84
|
+
this->fun_obj=const_cast<function *>(fun_obj);
|
85
|
+
this->eps=eps;
|
86
|
+
this->eps_cg=eps_cg;
|
87
|
+
this->max_iter=max_iter;
|
88
|
+
newton_print_string = default_print;
|
89
|
+
}
|
90
|
+
|
91
|
+
NEWTON::~NEWTON()
|
92
|
+
{
|
93
|
+
}
|
94
|
+
|
95
|
+
void NEWTON::newton(double *w)
|
96
|
+
{
|
97
|
+
int n = fun_obj->get_nr_variable();
|
98
|
+
int i, cg_iter;
|
99
|
+
double step_size;
|
100
|
+
double f, fold, actred;
|
101
|
+
double init_step_size = 1;
|
102
|
+
int search = 1, iter = 1, inc = 1;
|
103
|
+
double *s = new double[n];
|
104
|
+
double *r = new double[n];
|
105
|
+
double *g = new double[n];
|
106
|
+
|
107
|
+
const double alpha_pcg = 0.01;
|
108
|
+
double *M = new double[n];
|
109
|
+
|
110
|
+
// calculate gradient norm at w=0 for stopping condition.
|
111
|
+
double *w0 = new double[n];
|
112
|
+
for (i=0; i<n; i++)
|
113
|
+
w0[i] = 0;
|
114
|
+
fun_obj->fun(w0);
|
115
|
+
fun_obj->grad(w0, g);
|
116
|
+
double gnorm0 = dnrm2_(&n, g, &inc);
|
117
|
+
delete [] w0;
|
118
|
+
|
119
|
+
f = fun_obj->fun(w);
|
120
|
+
info("init f %5.3e\n", f);
|
121
|
+
fun_obj->grad(w, g);
|
122
|
+
double gnorm = dnrm2_(&n, g, &inc);
|
123
|
+
|
124
|
+
if (gnorm <= eps*gnorm0)
|
125
|
+
search = 0;
|
126
|
+
|
127
|
+
double *w_new = new double[n];
|
128
|
+
while (iter <= max_iter && search)
|
129
|
+
{
|
130
|
+
fun_obj->get_diag_preconditioner(M);
|
131
|
+
for(i=0; i<n; i++)
|
132
|
+
M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
|
133
|
+
cg_iter = pcg(g, M, s, r);
|
134
|
+
|
135
|
+
fold = f;
|
136
|
+
step_size = fun_obj->linesearch_and_update(w, s, & f, g, init_step_size);
|
137
|
+
|
138
|
+
if (step_size == 0)
|
139
|
+
{
|
140
|
+
info("WARNING: line search fails\n");
|
141
|
+
break;
|
142
|
+
}
|
143
|
+
|
144
|
+
info("iter %2d f %5.3e |g| %5.3e CG %3d step_size %4.2e \n", iter, f, gnorm, cg_iter, step_size);
|
145
|
+
|
146
|
+
actred = fold - f;
|
147
|
+
iter++;
|
148
|
+
|
149
|
+
fun_obj->grad(w, g);
|
150
|
+
|
151
|
+
gnorm = dnrm2_(&n, g, &inc);
|
152
|
+
if (gnorm <= eps*gnorm0)
|
153
|
+
break;
|
154
|
+
if (f < -1.0e+32)
|
155
|
+
{
|
156
|
+
info("WARNING: f < -1.0e+32\n");
|
157
|
+
break;
|
158
|
+
}
|
159
|
+
if (fabs(actred) <= 1.0e-12*fabs(f))
|
160
|
+
{
|
161
|
+
info("WARNING: actred too small\n");
|
162
|
+
break;
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
delete[] g;
|
167
|
+
delete[] r;
|
168
|
+
delete[] w_new;
|
169
|
+
delete[] s;
|
170
|
+
delete[] M;
|
171
|
+
}
|
172
|
+
|
173
|
+
int NEWTON::pcg(double *g, double *M, double *s, double *r)
|
174
|
+
{
|
175
|
+
int i, inc = 1;
|
176
|
+
int n = fun_obj->get_nr_variable();
|
177
|
+
double one = 1;
|
178
|
+
double *d = new double[n];
|
179
|
+
double *Hd = new double[n];
|
180
|
+
double zTr, znewTrnew, alpha, beta, cgtol;
|
181
|
+
double *z = new double[n];
|
182
|
+
double Q = 0, newQ, Qdiff;
|
183
|
+
|
184
|
+
for (i=0; i<n; i++)
|
185
|
+
{
|
186
|
+
s[i] = 0;
|
187
|
+
r[i] = -g[i];
|
188
|
+
z[i] = r[i] / M[i];
|
189
|
+
d[i] = z[i];
|
190
|
+
}
|
191
|
+
|
192
|
+
zTr = ddot_(&n, z, &inc, r, &inc);
|
193
|
+
double gMinv_norm = sqrt(zTr);
|
194
|
+
cgtol = min(eps_cg, sqrt(gMinv_norm));
|
195
|
+
int cg_iter = 0;
|
196
|
+
int max_cg_iter = max(n, 5);
|
197
|
+
|
198
|
+
while (cg_iter < max_cg_iter)
|
199
|
+
{
|
200
|
+
cg_iter++;
|
201
|
+
fun_obj->Hv(d, Hd);
|
202
|
+
|
203
|
+
alpha = zTr/ddot_(&n, d, &inc, Hd, &inc);
|
204
|
+
daxpy_(&n, &alpha, d, &inc, s, &inc);
|
205
|
+
alpha = -alpha;
|
206
|
+
daxpy_(&n, &alpha, Hd, &inc, r, &inc);
|
207
|
+
|
208
|
+
// Using quadratic approximation as CG stopping criterion
|
209
|
+
newQ = -0.5*(ddot_(&n, s, &inc, r, &inc) - ddot_(&n, s, &inc, g, &inc));
|
210
|
+
Qdiff = newQ - Q;
|
211
|
+
if (newQ <= 0 && Qdiff <= 0)
|
212
|
+
{
|
213
|
+
if (cg_iter * Qdiff >= cgtol * newQ)
|
214
|
+
break;
|
215
|
+
}
|
216
|
+
else
|
217
|
+
{
|
218
|
+
info("WARNING: quadratic approximation > 0 or increasing in CG\n");
|
219
|
+
break;
|
220
|
+
}
|
221
|
+
Q = newQ;
|
222
|
+
|
223
|
+
for (i=0; i<n; i++)
|
224
|
+
z[i] = r[i] / M[i];
|
225
|
+
znewTrnew = ddot_(&n, z, &inc, r, &inc);
|
226
|
+
beta = znewTrnew/zTr;
|
227
|
+
dscal_(&n, &beta, d, &inc);
|
228
|
+
daxpy_(&n, &one, z, &inc, d, &inc);
|
229
|
+
zTr = znewTrnew;
|
230
|
+
}
|
231
|
+
|
232
|
+
if (cg_iter == max_cg_iter)
|
233
|
+
info("WARNING: reaching maximal number of CG steps\n");
|
234
|
+
|
235
|
+
delete[] d;
|
236
|
+
delete[] Hd;
|
237
|
+
delete[] z;
|
238
|
+
|
239
|
+
return(cg_iter);
|
240
|
+
}
|
241
|
+
|
242
|
+
void NEWTON::set_print_string(void (*print_string) (const char *buf))
|
243
|
+
{
|
244
|
+
newton_print_string = print_string;
|
245
|
+
}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#ifndef _NEWTON_H
|
2
|
+
#define _NEWTON_H
|
3
|
+
|
4
|
+
class function
|
5
|
+
{
|
6
|
+
public:
|
7
|
+
virtual double fun(double *w) = 0 ;
|
8
|
+
virtual void grad(double *w, double *g) = 0 ;
|
9
|
+
virtual void Hv(double *s, double *Hs) = 0 ;
|
10
|
+
virtual int get_nr_variable(void) = 0 ;
|
11
|
+
virtual void get_diag_preconditioner(double *M) = 0 ;
|
12
|
+
virtual ~function(void){}
|
13
|
+
|
14
|
+
// base implementation in newton.cpp
|
15
|
+
virtual double linesearch_and_update(double *w, double *s, double *f, double *g, double alpha);
|
16
|
+
};
|
17
|
+
|
18
|
+
class NEWTON
|
19
|
+
{
|
20
|
+
public:
|
21
|
+
NEWTON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.5, int max_iter = 1000);
|
22
|
+
~NEWTON();
|
23
|
+
|
24
|
+
void newton(double *w);
|
25
|
+
void set_print_string(void (*i_print) (const char *buf));
|
26
|
+
|
27
|
+
private:
|
28
|
+
int pcg(double *g, double *M, double *s, double *r);
|
29
|
+
|
30
|
+
double eps;
|
31
|
+
double eps_cg;
|
32
|
+
int max_iter;
|
33
|
+
function *fun_obj;
|
34
|
+
void info(const char *fmt,...);
|
35
|
+
void (*newton_print_string)(const char *buf);
|
36
|
+
};
|
37
|
+
#endif
|
@@ -112,6 +112,9 @@ VALUE numo_liblinear_train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_has
|
|
112
112
|
xfree_problem(problem);
|
113
113
|
xfree_parameter(param);
|
114
114
|
|
115
|
+
RB_GC_GUARD(x_val);
|
116
|
+
RB_GC_GUARD(y_val);
|
117
|
+
|
115
118
|
return model_hash;
|
116
119
|
}
|
117
120
|
|
@@ -225,6 +228,9 @@ VALUE numo_liblinear_cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALU
|
|
225
228
|
xfree_problem(problem);
|
226
229
|
xfree_parameter(param);
|
227
230
|
|
231
|
+
RB_GC_GUARD(x_val);
|
232
|
+
RB_GC_GUARD(y_val);
|
233
|
+
|
228
234
|
return t_val;
|
229
235
|
}
|
230
236
|
|
@@ -291,6 +297,8 @@ VALUE numo_liblinear_predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE mo
|
|
291
297
|
xfree_model(model);
|
292
298
|
xfree_parameter(param);
|
293
299
|
|
300
|
+
RB_GC_GUARD(x_val);
|
301
|
+
|
294
302
|
return y_val;
|
295
303
|
}
|
296
304
|
|
@@ -381,6 +389,8 @@ VALUE numo_liblinear_decision_function(VALUE self, VALUE x_val, VALUE param_hash
|
|
381
389
|
xfree_model(model);
|
382
390
|
xfree_parameter(param);
|
383
391
|
|
392
|
+
RB_GC_GUARD(x_val);
|
393
|
+
|
384
394
|
return y_val;
|
385
395
|
}
|
386
396
|
|
@@ -457,6 +467,8 @@ VALUE numo_liblinear_predict_proba(VALUE self, VALUE x_val, VALUE param_hash, VA
|
|
457
467
|
xfree_model(model);
|
458
468
|
xfree_parameter(param);
|
459
469
|
|
470
|
+
RB_GC_GUARD(x_val);
|
471
|
+
|
460
472
|
return y_val;
|
461
473
|
}
|
462
474
|
|
@@ -492,6 +504,8 @@ VALUE numo_liblinear_load_model(VALUE self, VALUE filename)
|
|
492
504
|
rb_ary_store(res, 0, param_hash);
|
493
505
|
rb_ary_store(res, 1, model_hash);
|
494
506
|
|
507
|
+
RB_GC_GUARD(filename);
|
508
|
+
|
495
509
|
return res;
|
496
510
|
}
|
497
511
|
|
@@ -526,6 +540,8 @@ VALUE numo_liblinear_save_model(VALUE self, VALUE filename, VALUE param_hash, VA
|
|
526
540
|
return Qfalse;
|
527
541
|
}
|
528
542
|
|
543
|
+
RB_GC_GUARD(filename);
|
544
|
+
|
529
545
|
return Qtrue;
|
530
546
|
}
|
531
547
|
|
@@ -545,6 +561,9 @@ void Init_liblinearext()
|
|
545
561
|
*/
|
546
562
|
mLiblinear = rb_define_module_under(mNumo, "Liblinear");
|
547
563
|
|
564
|
+
/* The version of LIBLINEAR used in backgroud library. */
|
565
|
+
rb_define_const(mLiblinear, "LIBLINEAR_VERSION", INT2NUM(LIBLINEAR_VERSION));
|
566
|
+
|
548
567
|
rb_define_module_function(mLiblinear, "train", numo_liblinear_train, 3);
|
549
568
|
rb_define_module_function(mLiblinear, "cv", numo_liblinear_cross_validation, 4);
|
550
569
|
rb_define_module_function(mLiblinear, "predict", numo_liblinear_predict, 3);
|
data/ext/numo/liblinear/model.c
CHANGED
@@ -14,6 +14,8 @@ struct model* rb_hash_to_model(VALUE model_hash)
|
|
14
14
|
model->label = nary_to_int_vec(el);
|
15
15
|
el = rb_hash_aref(model_hash, ID2SYM(rb_intern("bias")));
|
16
16
|
model->bias = NUM2DBL(el);
|
17
|
+
el = rb_hash_aref(model_hash, ID2SYM(rb_intern("rho")));
|
18
|
+
model->rho = NUM2DBL(el);
|
17
19
|
return model;
|
18
20
|
}
|
19
21
|
|
@@ -29,6 +31,7 @@ VALUE model_to_rb_hash(struct model* const model)
|
|
29
31
|
rb_hash_aset(model_hash, ID2SYM(rb_intern("label")),
|
30
32
|
model->label ? int_vec_to_nary(model->label, model->nr_class) : Qnil);
|
31
33
|
rb_hash_aset(model_hash, ID2SYM(rb_intern("bias")), DBL2NUM(model->bias));
|
34
|
+
rb_hash_aset(model_hash, ID2SYM(rb_intern("rho")), DBL2NUM(model->rho));
|
32
35
|
return model_hash;
|
33
36
|
}
|
34
37
|
|