numo-liblinear 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,83 @@
1
+ #ifndef _LIBLINEAR_H
2
+ #define _LIBLINEAR_H
3
+
4
+ #define LIBLINEAR_VERSION 230
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ extern int liblinear_version;
11
+
12
+ struct feature_node
13
+ {
14
+ int index;
15
+ double value;
16
+ };
17
+
18
+ struct problem
19
+ {
20
+ int l, n;
21
+ double *y;
22
+ struct feature_node **x;
23
+ double bias; /* < 0 if no bias term */
24
+ };
25
+
26
+ enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
27
+
28
+ struct parameter
29
+ {
30
+ int solver_type;
31
+
32
+ /* these are for training only */
33
+ double eps; /* stopping criteria */
34
+ double C;
35
+ int nr_weight;
36
+ int *weight_label;
37
+ double* weight;
38
+ double p;
39
+ double *init_sol;
40
+ };
41
+
42
+ struct model
43
+ {
44
+ struct parameter param;
45
+ int nr_class; /* number of classes */
46
+ int nr_feature;
47
+ double *w;
48
+ int *label; /* label of each class */
49
+ double bias;
50
+ };
51
+
52
+ struct model* train(const struct problem *prob, const struct parameter *param);
53
+ void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
54
+ void find_parameters(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score);
55
+
56
+ double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
57
+ double predict(const struct model *model_, const struct feature_node *x);
58
+ double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
59
+
60
+ int save_model(const char *model_file_name, const struct model *model_);
61
+ struct model *load_model(const char *model_file_name);
62
+
63
+ int get_nr_feature(const struct model *model_);
64
+ int get_nr_class(const struct model *model_);
65
+ void get_labels(const struct model *model_, int* label);
66
+ double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
67
+ double get_decfun_bias(const struct model *model_, int label_idx);
68
+
69
+ void free_model_content(struct model *model_ptr);
70
+ void free_and_destroy_model(struct model **model_ptr_ptr);
71
+ void destroy_param(struct parameter *param);
72
+
73
+ const char *check_parameter(const struct problem *prob, const struct parameter *param);
74
+ int check_probability_model(const struct model *model);
75
+ int check_regression_model(const struct model *model);
76
+ void set_print_string_function(void (*print_func) (const char*));
77
+
78
+ #ifdef __cplusplus
79
+ }
80
+ #endif
81
+
82
+ #endif /* _LIBLINEAR_H */
83
+
@@ -0,0 +1,288 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+ #include <stdarg.h>
5
+ #include "tron.h"
6
+
7
+ #ifndef min
8
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
9
+ #endif
10
+
11
+ #ifndef max
12
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
13
+ #endif
14
+
15
+ #ifdef __cplusplus
16
+ extern "C" {
17
+ #endif
18
+
19
+ extern double dnrm2_(int *, double *, int *);
20
+ extern double ddot_(int *, double *, int *, double *, int *);
21
+ extern int daxpy_(int *, double *, double *, int *, double *, int *);
22
+ extern int dscal_(int *, double *, double *, int *);
23
+
24
+ #ifdef __cplusplus
25
+ }
26
+ #endif
27
+
28
+ static void default_print(const char *buf)
29
+ {
30
+ fputs(buf,stdout);
31
+ fflush(stdout);
32
+ }
33
+
34
+ static double uTMv(int n, double *u, double *M, double *v)
35
+ {
36
+ const int m = n-4;
37
+ double res = 0;
38
+ int i;
39
+ for (i=0; i<m; i+=5)
40
+ res += u[i]*M[i]*v[i]+u[i+1]*M[i+1]*v[i+1]+u[i+2]*M[i+2]*v[i+2]+
41
+ u[i+3]*M[i+3]*v[i+3]+u[i+4]*M[i+4]*v[i+4];
42
+ for (; i<n; i++)
43
+ res += u[i]*M[i]*v[i];
44
+ return res;
45
+ }
46
+
47
+ void TRON::info(const char *fmt,...)
48
+ {
49
+ char buf[BUFSIZ];
50
+ va_list ap;
51
+ va_start(ap,fmt);
52
+ vsprintf(buf,fmt,ap);
53
+ va_end(ap);
54
+ (*tron_print_string)(buf);
55
+ }
56
+
57
+ TRON::TRON(const function *fun_obj, double eps, double eps_cg, int max_iter)
58
+ {
59
+ this->fun_obj=const_cast<function *>(fun_obj);
60
+ this->eps=eps;
61
+ this->eps_cg=eps_cg;
62
+ this->max_iter=max_iter;
63
+ tron_print_string = default_print;
64
+ }
65
+
66
+ TRON::~TRON()
67
+ {
68
+ }
69
+
70
+ void TRON::tron(double *w)
71
+ {
72
+ // Parameters for updating the iterates.
73
+ double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
74
+
75
+ // Parameters for updating the trust region size delta.
76
+ double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
77
+
78
+ int n = fun_obj->get_nr_variable();
79
+ int i, cg_iter;
80
+ double delta=0, sMnorm, one=1.0;
81
+ double alpha, f, fnew, prered, actred, gs;
82
+ int search = 1, iter = 1, inc = 1;
83
+ double *s = new double[n];
84
+ double *r = new double[n];
85
+ double *g = new double[n];
86
+
87
+ const double alpha_pcg = 0.01;
88
+ double *M = new double[n];
89
+
90
+ // calculate gradient norm at w=0 for stopping condition.
91
+ double *w0 = new double[n];
92
+ for (i=0; i<n; i++)
93
+ w0[i] = 0;
94
+ fun_obj->fun(w0);
95
+ fun_obj->grad(w0, g);
96
+ double gnorm0 = dnrm2_(&n, g, &inc);
97
+ delete [] w0;
98
+
99
+ f = fun_obj->fun(w);
100
+ fun_obj->grad(w, g);
101
+ double gnorm = dnrm2_(&n, g, &inc);
102
+
103
+ if (gnorm <= eps*gnorm0)
104
+ search = 0;
105
+
106
+ fun_obj->get_diag_preconditioner(M);
107
+ for(i=0; i<n; i++)
108
+ M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
109
+ delta = sqrt(uTMv(n, g, M, g));
110
+
111
+ double *w_new = new double[n];
112
+ bool reach_boundary;
113
+ bool delta_adjusted = false;
114
+ while (iter <= max_iter && search)
115
+ {
116
+ cg_iter = trpcg(delta, g, M, s, r, &reach_boundary);
117
+
118
+ memcpy(w_new, w, sizeof(double)*n);
119
+ daxpy_(&n, &one, s, &inc, w_new, &inc);
120
+
121
+ gs = ddot_(&n, g, &inc, s, &inc);
122
+ prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc));
123
+ fnew = fun_obj->fun(w_new);
124
+
125
+ // Compute the actual reduction.
126
+ actred = f - fnew;
127
+
128
+ // On the first iteration, adjust the initial step bound.
129
+ sMnorm = sqrt(uTMv(n, s, M, s));
130
+ if (iter == 1 && !delta_adjusted)
131
+ {
132
+ delta = min(delta, sMnorm);
133
+ delta_adjusted = true;
134
+ }
135
+
136
+ // Compute prediction alpha*sMnorm of the step.
137
+ if (fnew - f - gs <= 0)
138
+ alpha = sigma3;
139
+ else
140
+ alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
141
+
142
+ // Update the trust region bound according to the ratio of actual to predicted reduction.
143
+ if (actred < eta0*prered)
144
+ delta = min(alpha*sMnorm, sigma2*delta);
145
+ else if (actred < eta1*prered)
146
+ delta = max(sigma1*delta, min(alpha*sMnorm, sigma2*delta));
147
+ else if (actred < eta2*prered)
148
+ delta = max(sigma1*delta, min(alpha*sMnorm, sigma3*delta));
149
+ else
150
+ {
151
+ if (reach_boundary)
152
+ delta = sigma3*delta;
153
+ else
154
+ delta = max(delta, min(alpha*sMnorm, sigma3*delta));
155
+ }
156
+
157
+ info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
158
+
159
+ if (actred > eta0*prered)
160
+ {
161
+ iter++;
162
+ memcpy(w, w_new, sizeof(double)*n);
163
+ f = fnew;
164
+ fun_obj->grad(w, g);
165
+ fun_obj->get_diag_preconditioner(M);
166
+ for(i=0; i<n; i++)
167
+ M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
168
+
169
+ gnorm = dnrm2_(&n, g, &inc);
170
+ if (gnorm <= eps*gnorm0)
171
+ break;
172
+ }
173
+ if (f < -1.0e+32)
174
+ {
175
+ info("WARNING: f < -1.0e+32\n");
176
+ break;
177
+ }
178
+ if (prered <= 0)
179
+ {
180
+ info("WARNING: prered <= 0\n");
181
+ break;
182
+ }
183
+ if (fabs(actred) <= 1.0e-12*fabs(f) &&
184
+ fabs(prered) <= 1.0e-12*fabs(f))
185
+ {
186
+ info("WARNING: actred and prered too small\n");
187
+ break;
188
+ }
189
+ }
190
+
191
+ delete[] g;
192
+ delete[] r;
193
+ delete[] w_new;
194
+ delete[] s;
195
+ delete[] M;
196
+ }
197
+
198
+ int TRON::trpcg(double delta, double *g, double *M, double *s, double *r, bool *reach_boundary)
199
+ {
200
+ int i, inc = 1;
201
+ int n = fun_obj->get_nr_variable();
202
+ double one = 1;
203
+ double *d = new double[n];
204
+ double *Hd = new double[n];
205
+ double zTr, znewTrnew, alpha, beta, cgtol;
206
+ double *z = new double[n];
207
+
208
+ *reach_boundary = false;
209
+ for (i=0; i<n; i++)
210
+ {
211
+ s[i] = 0;
212
+ r[i] = -g[i];
213
+ z[i] = r[i] / M[i];
214
+ d[i] = z[i];
215
+ }
216
+
217
+ zTr = ddot_(&n, z, &inc, r, &inc);
218
+ cgtol = eps_cg*sqrt(zTr);
219
+ int cg_iter = 0;
220
+ int max_cg_iter = max(n, 5);
221
+
222
+ while (cg_iter < max_cg_iter)
223
+ {
224
+ if (sqrt(zTr) <= cgtol)
225
+ break;
226
+ cg_iter++;
227
+ fun_obj->Hv(d, Hd);
228
+
229
+ alpha = zTr/ddot_(&n, d, &inc, Hd, &inc);
230
+ daxpy_(&n, &alpha, d, &inc, s, &inc);
231
+
232
+ double sMnorm = sqrt(uTMv(n, s, M, s));
233
+ if (sMnorm > delta)
234
+ {
235
+ info("cg reaches trust region boundary\n");
236
+ *reach_boundary = true;
237
+ alpha = -alpha;
238
+ daxpy_(&n, &alpha, d, &inc, s, &inc);
239
+
240
+ double sTMd = uTMv(n, s, M, d);
241
+ double sTMs = uTMv(n, s, M, s);
242
+ double dTMd = uTMv(n, d, M, d);
243
+ double dsq = delta*delta;
244
+ double rad = sqrt(sTMd*sTMd + dTMd*(dsq-sTMs));
245
+ if (sTMd >= 0)
246
+ alpha = (dsq - sTMs)/(sTMd + rad);
247
+ else
248
+ alpha = (rad - sTMd)/dTMd;
249
+ daxpy_(&n, &alpha, d, &inc, s, &inc);
250
+ alpha = -alpha;
251
+ daxpy_(&n, &alpha, Hd, &inc, r, &inc);
252
+ break;
253
+ }
254
+ alpha = -alpha;
255
+ daxpy_(&n, &alpha, Hd, &inc, r, &inc);
256
+
257
+ for (i=0; i<n; i++)
258
+ z[i] = r[i] / M[i];
259
+ znewTrnew = ddot_(&n, z, &inc, r, &inc);
260
+ beta = znewTrnew/zTr;
261
+ dscal_(&n, &beta, d, &inc);
262
+ daxpy_(&n, &one, z, &inc, d, &inc);
263
+ zTr = znewTrnew;
264
+ }
265
+
266
+ if (cg_iter == max_cg_iter)
267
+ info("WARNING: reaching maximal number of CG steps\n");
268
+
269
+ delete[] d;
270
+ delete[] Hd;
271
+ delete[] z;
272
+
273
+ return(cg_iter);
274
+ }
275
+
276
+ double TRON::norm_inf(int n, double *x)
277
+ {
278
+ double dmax = fabs(x[0]);
279
+ for (int i=1; i<n; i++)
280
+ if (fabs(x[i]) >= dmax)
281
+ dmax = fabs(x[i]);
282
+ return(dmax);
283
+ }
284
+
285
+ void TRON::set_print_string(void (*print_string) (const char *buf))
286
+ {
287
+ tron_print_string = print_string;
288
+ }
@@ -0,0 +1,36 @@
1
+ #ifndef _TRON_H
2
+ #define _TRON_H
3
+
4
+ class function
5
+ {
6
+ public:
7
+ virtual double fun(double *w) = 0 ;
8
+ virtual void grad(double *w, double *g) = 0 ;
9
+ virtual void Hv(double *s, double *Hs) = 0 ;
10
+
11
+ virtual int get_nr_variable(void) = 0 ;
12
+ virtual void get_diag_preconditioner(double *M) = 0 ;
13
+ virtual ~function(void){}
14
+ };
15
+
16
+ class TRON
17
+ {
18
+ public:
19
+ TRON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.1, int max_iter = 1000);
20
+ ~TRON();
21
+
22
+ void tron(double *w);
23
+ void set_print_string(void (*i_print) (const char *buf));
24
+
25
+ private:
26
+ int trpcg(double delta, double *g, double *M, double *s, double *r, bool *reach_boundary);
27
+ double norm_inf(int n, double *x);
28
+
29
+ double eps;
30
+ double eps_cg;
31
+ int max_iter;
32
+ function *fun_obj;
33
+ void info(const char *fmt,...);
34
+ void (*tron_print_string)(const char *buf);
35
+ };
36
+ #endif
@@ -545,6 +545,9 @@ void Init_liblinearext()
545
545
  */
546
546
  mLiblinear = rb_define_module_under(mNumo, "Liblinear");
547
547
 
548
+ /* The version of LIBLINEAR used in backgroud library. */
549
+ rb_define_const(mLiblinear, "LIBLINEAR_VERSION", INT2NUM(LIBLINEAR_VERSION));
550
+
548
551
  rb_define_module_function(mLiblinear, "train", numo_liblinear_train, 3);
549
552
  rb_define_module_function(mLiblinear, "cv", numo_liblinear_cross_validation, 4);
550
553
  rb_define_module_function(mLiblinear, "predict", numo_liblinear_predict, 3);
@@ -3,6 +3,6 @@
3
3
  module Numo
4
4
  module Liblinear
5
5
  # The version of Numo::Liblienar you are using.
6
- VERSION = '0.5.0'
6
+ VERSION = '1.0.0'
7
7
  end
8
8
  end
@@ -27,6 +27,13 @@ Gem::Specification.new do |spec|
27
27
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
28
28
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
29
29
  end
30
+
31
+ gem_dir = File.expand_path(__dir__) + '/'
32
+ submodule_path = `git submodule --quiet foreach pwd`.split($OUTPUT_RECORD_SEPARATOR).first
33
+ submodule_relative_path = submodule_path.sub gem_dir, ''
34
+ liblinear_files = %w[linear.cpp linear.h tron.cpp tron.h blas/blas.h blas/blasp.h blas/daxpy.c blas/ddot.c blas/dnrm2.c blas/dscal.c]
35
+ liblinear_files.each { |liblinf| spec.files << "#{submodule_relative_path}/#{liblinf}" }
36
+
30
37
  spec.bindir = 'exe'
31
38
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
39
  spec.require_paths = ['lib']
@@ -39,6 +46,7 @@ Gem::Specification.new do |spec|
39
46
  }
40
47
 
41
48
  spec.add_runtime_dependency 'numo-narray', '~> 0.9.1'
49
+
42
50
  spec.add_development_dependency 'bundler', '~> 2.0'
43
51
  spec.add_development_dependency 'rake', '~> 10.0'
44
52
  spec.add_development_dependency 'rake-compiler', '~> 1.0'