numo-liblinear 0.5.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitmodules +3 -0
- data/.travis.yml +0 -1
- data/CHANGELOG.md +5 -0
- data/README.md +1 -9
- data/ext/numo/liblinear/extconf.rb +7 -8
- data/ext/numo/liblinear/liblinear/blas/blas.h +25 -0
- data/ext/numo/liblinear/liblinear/blas/blasp.h +438 -0
- data/ext/numo/liblinear/liblinear/blas/daxpy.c +57 -0
- data/ext/numo/liblinear/liblinear/blas/ddot.c +58 -0
- data/ext/numo/liblinear/liblinear/blas/dnrm2.c +70 -0
- data/ext/numo/liblinear/liblinear/blas/dscal.c +52 -0
- data/ext/numo/liblinear/liblinear/linear.cpp +3203 -0
- data/ext/numo/liblinear/liblinear/linear.h +83 -0
- data/ext/numo/liblinear/liblinear/tron.cpp +288 -0
- data/ext/numo/liblinear/liblinear/tron.h +36 -0
- data/ext/numo/liblinear/liblinearext.c +3 -0
- data/lib/numo/liblinear/version.rb +1 -1
- data/numo-liblinear.gemspec +8 -0
- metadata +13 -2
@@ -0,0 +1,83 @@
|
|
1
|
+
#ifndef _LIBLINEAR_H
|
2
|
+
#define _LIBLINEAR_H
|
3
|
+
|
4
|
+
#define LIBLINEAR_VERSION 230
|
5
|
+
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
9
|
+
|
10
|
+
extern int liblinear_version;
|
11
|
+
|
12
|
+
struct feature_node
|
13
|
+
{
|
14
|
+
int index;
|
15
|
+
double value;
|
16
|
+
};
|
17
|
+
|
18
|
+
struct problem
|
19
|
+
{
|
20
|
+
int l, n;
|
21
|
+
double *y;
|
22
|
+
struct feature_node **x;
|
23
|
+
double bias; /* < 0 if no bias term */
|
24
|
+
};
|
25
|
+
|
26
|
+
enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
|
27
|
+
|
28
|
+
struct parameter
|
29
|
+
{
|
30
|
+
int solver_type;
|
31
|
+
|
32
|
+
/* these are for training only */
|
33
|
+
double eps; /* stopping criteria */
|
34
|
+
double C;
|
35
|
+
int nr_weight;
|
36
|
+
int *weight_label;
|
37
|
+
double* weight;
|
38
|
+
double p;
|
39
|
+
double *init_sol;
|
40
|
+
};
|
41
|
+
|
42
|
+
struct model
|
43
|
+
{
|
44
|
+
struct parameter param;
|
45
|
+
int nr_class; /* number of classes */
|
46
|
+
int nr_feature;
|
47
|
+
double *w;
|
48
|
+
int *label; /* label of each class */
|
49
|
+
double bias;
|
50
|
+
};
|
51
|
+
|
52
|
+
struct model* train(const struct problem *prob, const struct parameter *param);
|
53
|
+
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
|
54
|
+
void find_parameters(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score);
|
55
|
+
|
56
|
+
double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
|
57
|
+
double predict(const struct model *model_, const struct feature_node *x);
|
58
|
+
double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
|
59
|
+
|
60
|
+
int save_model(const char *model_file_name, const struct model *model_);
|
61
|
+
struct model *load_model(const char *model_file_name);
|
62
|
+
|
63
|
+
int get_nr_feature(const struct model *model_);
|
64
|
+
int get_nr_class(const struct model *model_);
|
65
|
+
void get_labels(const struct model *model_, int* label);
|
66
|
+
double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
|
67
|
+
double get_decfun_bias(const struct model *model_, int label_idx);
|
68
|
+
|
69
|
+
void free_model_content(struct model *model_ptr);
|
70
|
+
void free_and_destroy_model(struct model **model_ptr_ptr);
|
71
|
+
void destroy_param(struct parameter *param);
|
72
|
+
|
73
|
+
const char *check_parameter(const struct problem *prob, const struct parameter *param);
|
74
|
+
int check_probability_model(const struct model *model);
|
75
|
+
int check_regression_model(const struct model *model);
|
76
|
+
void set_print_string_function(void (*print_func) (const char*));
|
77
|
+
|
78
|
+
#ifdef __cplusplus
|
79
|
+
}
|
80
|
+
#endif
|
81
|
+
|
82
|
+
#endif /* _LIBLINEAR_H */
|
83
|
+
|
@@ -0,0 +1,288 @@
|
|
1
|
+
#include <math.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <stdarg.h>
|
5
|
+
#include "tron.h"
|
6
|
+
|
7
|
+
#ifndef min
|
8
|
+
template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#ifndef max
|
12
|
+
template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
|
13
|
+
#endif
|
14
|
+
|
15
|
+
#ifdef __cplusplus
|
16
|
+
extern "C" {
|
17
|
+
#endif
|
18
|
+
|
19
|
+
extern double dnrm2_(int *, double *, int *);
|
20
|
+
extern double ddot_(int *, double *, int *, double *, int *);
|
21
|
+
extern int daxpy_(int *, double *, double *, int *, double *, int *);
|
22
|
+
extern int dscal_(int *, double *, double *, int *);
|
23
|
+
|
24
|
+
#ifdef __cplusplus
|
25
|
+
}
|
26
|
+
#endif
|
27
|
+
|
28
|
+
static void default_print(const char *buf)
|
29
|
+
{
|
30
|
+
fputs(buf,stdout);
|
31
|
+
fflush(stdout);
|
32
|
+
}
|
33
|
+
|
34
|
+
static double uTMv(int n, double *u, double *M, double *v)
|
35
|
+
{
|
36
|
+
const int m = n-4;
|
37
|
+
double res = 0;
|
38
|
+
int i;
|
39
|
+
for (i=0; i<m; i+=5)
|
40
|
+
res += u[i]*M[i]*v[i]+u[i+1]*M[i+1]*v[i+1]+u[i+2]*M[i+2]*v[i+2]+
|
41
|
+
u[i+3]*M[i+3]*v[i+3]+u[i+4]*M[i+4]*v[i+4];
|
42
|
+
for (; i<n; i++)
|
43
|
+
res += u[i]*M[i]*v[i];
|
44
|
+
return res;
|
45
|
+
}
|
46
|
+
|
47
|
+
void TRON::info(const char *fmt,...)
|
48
|
+
{
|
49
|
+
char buf[BUFSIZ];
|
50
|
+
va_list ap;
|
51
|
+
va_start(ap,fmt);
|
52
|
+
vsprintf(buf,fmt,ap);
|
53
|
+
va_end(ap);
|
54
|
+
(*tron_print_string)(buf);
|
55
|
+
}
|
56
|
+
|
57
|
+
TRON::TRON(const function *fun_obj, double eps, double eps_cg, int max_iter)
|
58
|
+
{
|
59
|
+
this->fun_obj=const_cast<function *>(fun_obj);
|
60
|
+
this->eps=eps;
|
61
|
+
this->eps_cg=eps_cg;
|
62
|
+
this->max_iter=max_iter;
|
63
|
+
tron_print_string = default_print;
|
64
|
+
}
|
65
|
+
|
66
|
+
TRON::~TRON()
|
67
|
+
{
|
68
|
+
}
|
69
|
+
|
70
|
+
void TRON::tron(double *w)
|
71
|
+
{
|
72
|
+
// Parameters for updating the iterates.
|
73
|
+
double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
|
74
|
+
|
75
|
+
// Parameters for updating the trust region size delta.
|
76
|
+
double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
|
77
|
+
|
78
|
+
int n = fun_obj->get_nr_variable();
|
79
|
+
int i, cg_iter;
|
80
|
+
double delta=0, sMnorm, one=1.0;
|
81
|
+
double alpha, f, fnew, prered, actred, gs;
|
82
|
+
int search = 1, iter = 1, inc = 1;
|
83
|
+
double *s = new double[n];
|
84
|
+
double *r = new double[n];
|
85
|
+
double *g = new double[n];
|
86
|
+
|
87
|
+
const double alpha_pcg = 0.01;
|
88
|
+
double *M = new double[n];
|
89
|
+
|
90
|
+
// calculate gradient norm at w=0 for stopping condition.
|
91
|
+
double *w0 = new double[n];
|
92
|
+
for (i=0; i<n; i++)
|
93
|
+
w0[i] = 0;
|
94
|
+
fun_obj->fun(w0);
|
95
|
+
fun_obj->grad(w0, g);
|
96
|
+
double gnorm0 = dnrm2_(&n, g, &inc);
|
97
|
+
delete [] w0;
|
98
|
+
|
99
|
+
f = fun_obj->fun(w);
|
100
|
+
fun_obj->grad(w, g);
|
101
|
+
double gnorm = dnrm2_(&n, g, &inc);
|
102
|
+
|
103
|
+
if (gnorm <= eps*gnorm0)
|
104
|
+
search = 0;
|
105
|
+
|
106
|
+
fun_obj->get_diag_preconditioner(M);
|
107
|
+
for(i=0; i<n; i++)
|
108
|
+
M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
|
109
|
+
delta = sqrt(uTMv(n, g, M, g));
|
110
|
+
|
111
|
+
double *w_new = new double[n];
|
112
|
+
bool reach_boundary;
|
113
|
+
bool delta_adjusted = false;
|
114
|
+
while (iter <= max_iter && search)
|
115
|
+
{
|
116
|
+
cg_iter = trpcg(delta, g, M, s, r, &reach_boundary);
|
117
|
+
|
118
|
+
memcpy(w_new, w, sizeof(double)*n);
|
119
|
+
daxpy_(&n, &one, s, &inc, w_new, &inc);
|
120
|
+
|
121
|
+
gs = ddot_(&n, g, &inc, s, &inc);
|
122
|
+
prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc));
|
123
|
+
fnew = fun_obj->fun(w_new);
|
124
|
+
|
125
|
+
// Compute the actual reduction.
|
126
|
+
actred = f - fnew;
|
127
|
+
|
128
|
+
// On the first iteration, adjust the initial step bound.
|
129
|
+
sMnorm = sqrt(uTMv(n, s, M, s));
|
130
|
+
if (iter == 1 && !delta_adjusted)
|
131
|
+
{
|
132
|
+
delta = min(delta, sMnorm);
|
133
|
+
delta_adjusted = true;
|
134
|
+
}
|
135
|
+
|
136
|
+
// Compute prediction alpha*sMnorm of the step.
|
137
|
+
if (fnew - f - gs <= 0)
|
138
|
+
alpha = sigma3;
|
139
|
+
else
|
140
|
+
alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
|
141
|
+
|
142
|
+
// Update the trust region bound according to the ratio of actual to predicted reduction.
|
143
|
+
if (actred < eta0*prered)
|
144
|
+
delta = min(alpha*sMnorm, sigma2*delta);
|
145
|
+
else if (actred < eta1*prered)
|
146
|
+
delta = max(sigma1*delta, min(alpha*sMnorm, sigma2*delta));
|
147
|
+
else if (actred < eta2*prered)
|
148
|
+
delta = max(sigma1*delta, min(alpha*sMnorm, sigma3*delta));
|
149
|
+
else
|
150
|
+
{
|
151
|
+
if (reach_boundary)
|
152
|
+
delta = sigma3*delta;
|
153
|
+
else
|
154
|
+
delta = max(delta, min(alpha*sMnorm, sigma3*delta));
|
155
|
+
}
|
156
|
+
|
157
|
+
info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
|
158
|
+
|
159
|
+
if (actred > eta0*prered)
|
160
|
+
{
|
161
|
+
iter++;
|
162
|
+
memcpy(w, w_new, sizeof(double)*n);
|
163
|
+
f = fnew;
|
164
|
+
fun_obj->grad(w, g);
|
165
|
+
fun_obj->get_diag_preconditioner(M);
|
166
|
+
for(i=0; i<n; i++)
|
167
|
+
M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
|
168
|
+
|
169
|
+
gnorm = dnrm2_(&n, g, &inc);
|
170
|
+
if (gnorm <= eps*gnorm0)
|
171
|
+
break;
|
172
|
+
}
|
173
|
+
if (f < -1.0e+32)
|
174
|
+
{
|
175
|
+
info("WARNING: f < -1.0e+32\n");
|
176
|
+
break;
|
177
|
+
}
|
178
|
+
if (prered <= 0)
|
179
|
+
{
|
180
|
+
info("WARNING: prered <= 0\n");
|
181
|
+
break;
|
182
|
+
}
|
183
|
+
if (fabs(actred) <= 1.0e-12*fabs(f) &&
|
184
|
+
fabs(prered) <= 1.0e-12*fabs(f))
|
185
|
+
{
|
186
|
+
info("WARNING: actred and prered too small\n");
|
187
|
+
break;
|
188
|
+
}
|
189
|
+
}
|
190
|
+
|
191
|
+
delete[] g;
|
192
|
+
delete[] r;
|
193
|
+
delete[] w_new;
|
194
|
+
delete[] s;
|
195
|
+
delete[] M;
|
196
|
+
}
|
197
|
+
|
198
|
+
int TRON::trpcg(double delta, double *g, double *M, double *s, double *r, bool *reach_boundary)
|
199
|
+
{
|
200
|
+
int i, inc = 1;
|
201
|
+
int n = fun_obj->get_nr_variable();
|
202
|
+
double one = 1;
|
203
|
+
double *d = new double[n];
|
204
|
+
double *Hd = new double[n];
|
205
|
+
double zTr, znewTrnew, alpha, beta, cgtol;
|
206
|
+
double *z = new double[n];
|
207
|
+
|
208
|
+
*reach_boundary = false;
|
209
|
+
for (i=0; i<n; i++)
|
210
|
+
{
|
211
|
+
s[i] = 0;
|
212
|
+
r[i] = -g[i];
|
213
|
+
z[i] = r[i] / M[i];
|
214
|
+
d[i] = z[i];
|
215
|
+
}
|
216
|
+
|
217
|
+
zTr = ddot_(&n, z, &inc, r, &inc);
|
218
|
+
cgtol = eps_cg*sqrt(zTr);
|
219
|
+
int cg_iter = 0;
|
220
|
+
int max_cg_iter = max(n, 5);
|
221
|
+
|
222
|
+
while (cg_iter < max_cg_iter)
|
223
|
+
{
|
224
|
+
if (sqrt(zTr) <= cgtol)
|
225
|
+
break;
|
226
|
+
cg_iter++;
|
227
|
+
fun_obj->Hv(d, Hd);
|
228
|
+
|
229
|
+
alpha = zTr/ddot_(&n, d, &inc, Hd, &inc);
|
230
|
+
daxpy_(&n, &alpha, d, &inc, s, &inc);
|
231
|
+
|
232
|
+
double sMnorm = sqrt(uTMv(n, s, M, s));
|
233
|
+
if (sMnorm > delta)
|
234
|
+
{
|
235
|
+
info("cg reaches trust region boundary\n");
|
236
|
+
*reach_boundary = true;
|
237
|
+
alpha = -alpha;
|
238
|
+
daxpy_(&n, &alpha, d, &inc, s, &inc);
|
239
|
+
|
240
|
+
double sTMd = uTMv(n, s, M, d);
|
241
|
+
double sTMs = uTMv(n, s, M, s);
|
242
|
+
double dTMd = uTMv(n, d, M, d);
|
243
|
+
double dsq = delta*delta;
|
244
|
+
double rad = sqrt(sTMd*sTMd + dTMd*(dsq-sTMs));
|
245
|
+
if (sTMd >= 0)
|
246
|
+
alpha = (dsq - sTMs)/(sTMd + rad);
|
247
|
+
else
|
248
|
+
alpha = (rad - sTMd)/dTMd;
|
249
|
+
daxpy_(&n, &alpha, d, &inc, s, &inc);
|
250
|
+
alpha = -alpha;
|
251
|
+
daxpy_(&n, &alpha, Hd, &inc, r, &inc);
|
252
|
+
break;
|
253
|
+
}
|
254
|
+
alpha = -alpha;
|
255
|
+
daxpy_(&n, &alpha, Hd, &inc, r, &inc);
|
256
|
+
|
257
|
+
for (i=0; i<n; i++)
|
258
|
+
z[i] = r[i] / M[i];
|
259
|
+
znewTrnew = ddot_(&n, z, &inc, r, &inc);
|
260
|
+
beta = znewTrnew/zTr;
|
261
|
+
dscal_(&n, &beta, d, &inc);
|
262
|
+
daxpy_(&n, &one, z, &inc, d, &inc);
|
263
|
+
zTr = znewTrnew;
|
264
|
+
}
|
265
|
+
|
266
|
+
if (cg_iter == max_cg_iter)
|
267
|
+
info("WARNING: reaching maximal number of CG steps\n");
|
268
|
+
|
269
|
+
delete[] d;
|
270
|
+
delete[] Hd;
|
271
|
+
delete[] z;
|
272
|
+
|
273
|
+
return(cg_iter);
|
274
|
+
}
|
275
|
+
|
276
|
+
double TRON::norm_inf(int n, double *x)
|
277
|
+
{
|
278
|
+
double dmax = fabs(x[0]);
|
279
|
+
for (int i=1; i<n; i++)
|
280
|
+
if (fabs(x[i]) >= dmax)
|
281
|
+
dmax = fabs(x[i]);
|
282
|
+
return(dmax);
|
283
|
+
}
|
284
|
+
|
285
|
+
void TRON::set_print_string(void (*print_string) (const char *buf))
|
286
|
+
{
|
287
|
+
tron_print_string = print_string;
|
288
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#ifndef _TRON_H
|
2
|
+
#define _TRON_H
|
3
|
+
|
4
|
+
class function
|
5
|
+
{
|
6
|
+
public:
|
7
|
+
virtual double fun(double *w) = 0 ;
|
8
|
+
virtual void grad(double *w, double *g) = 0 ;
|
9
|
+
virtual void Hv(double *s, double *Hs) = 0 ;
|
10
|
+
|
11
|
+
virtual int get_nr_variable(void) = 0 ;
|
12
|
+
virtual void get_diag_preconditioner(double *M) = 0 ;
|
13
|
+
virtual ~function(void){}
|
14
|
+
};
|
15
|
+
|
16
|
+
class TRON
|
17
|
+
{
|
18
|
+
public:
|
19
|
+
TRON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.1, int max_iter = 1000);
|
20
|
+
~TRON();
|
21
|
+
|
22
|
+
void tron(double *w);
|
23
|
+
void set_print_string(void (*i_print) (const char *buf));
|
24
|
+
|
25
|
+
private:
|
26
|
+
int trpcg(double delta, double *g, double *M, double *s, double *r, bool *reach_boundary);
|
27
|
+
double norm_inf(int n, double *x);
|
28
|
+
|
29
|
+
double eps;
|
30
|
+
double eps_cg;
|
31
|
+
int max_iter;
|
32
|
+
function *fun_obj;
|
33
|
+
void info(const char *fmt,...);
|
34
|
+
void (*tron_print_string)(const char *buf);
|
35
|
+
};
|
36
|
+
#endif
|
@@ -545,6 +545,9 @@ void Init_liblinearext()
|
|
545
545
|
*/
|
546
546
|
mLiblinear = rb_define_module_under(mNumo, "Liblinear");
|
547
547
|
|
548
|
+
/* The version of LIBLINEAR used in backgroud library. */
|
549
|
+
rb_define_const(mLiblinear, "LIBLINEAR_VERSION", INT2NUM(LIBLINEAR_VERSION));
|
550
|
+
|
548
551
|
rb_define_module_function(mLiblinear, "train", numo_liblinear_train, 3);
|
549
552
|
rb_define_module_function(mLiblinear, "cv", numo_liblinear_cross_validation, 4);
|
550
553
|
rb_define_module_function(mLiblinear, "predict", numo_liblinear_predict, 3);
|
data/numo-liblinear.gemspec
CHANGED
@@ -27,6 +27,13 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
28
28
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
29
29
|
end
|
30
|
+
|
31
|
+
gem_dir = File.expand_path(__dir__) + '/'
|
32
|
+
submodule_path = `git submodule --quiet foreach pwd`.split($OUTPUT_RECORD_SEPARATOR).first
|
33
|
+
submodule_relative_path = submodule_path.sub gem_dir, ''
|
34
|
+
liblinear_files = %w[linear.cpp linear.h tron.cpp tron.h blas/blas.h blas/blasp.h blas/daxpy.c blas/ddot.c blas/dnrm2.c blas/dscal.c]
|
35
|
+
liblinear_files.each { |liblinf| spec.files << "#{submodule_relative_path}/#{liblinf}" }
|
36
|
+
|
30
37
|
spec.bindir = 'exe'
|
31
38
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
32
39
|
spec.require_paths = ['lib']
|
@@ -39,6 +46,7 @@ Gem::Specification.new do |spec|
|
|
39
46
|
}
|
40
47
|
|
41
48
|
spec.add_runtime_dependency 'numo-narray', '~> 0.9.1'
|
49
|
+
|
42
50
|
spec.add_development_dependency 'bundler', '~> 2.0'
|
43
51
|
spec.add_development_dependency 'rake', '~> 10.0'
|
44
52
|
spec.add_development_dependency 'rake-compiler', '~> 1.0'
|