numo-liblinear 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ #ifndef _LIBLINEAR_H
2
+ #define _LIBLINEAR_H
3
+
4
+ #define LIBLINEAR_VERSION 230
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ extern int liblinear_version;
11
+
12
+ struct feature_node
13
+ {
14
+ int index;
15
+ double value;
16
+ };
17
+
18
+ struct problem
19
+ {
20
+ int l, n;
21
+ double *y;
22
+ struct feature_node **x;
23
+ double bias; /* < 0 if no bias term */
24
+ };
25
+
26
+ enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
27
+
28
+ struct parameter
29
+ {
30
+ int solver_type;
31
+
32
+ /* these are for training only */
33
+ double eps; /* stopping criteria */
34
+ double C;
35
+ int nr_weight;
36
+ int *weight_label;
37
+ double* weight;
38
+ double p;
39
+ double *init_sol;
40
+ };
41
+
42
+ struct model
43
+ {
44
+ struct parameter param;
45
+ int nr_class; /* number of classes */
46
+ int nr_feature;
47
+ double *w;
48
+ int *label; /* label of each class */
49
+ double bias;
50
+ };
51
+
52
+ struct model* train(const struct problem *prob, const struct parameter *param);
53
+ void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
54
+ void find_parameters(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score);
55
+
56
+ double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
57
+ double predict(const struct model *model_, const struct feature_node *x);
58
+ double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
59
+
60
+ int save_model(const char *model_file_name, const struct model *model_);
61
+ struct model *load_model(const char *model_file_name);
62
+
63
+ int get_nr_feature(const struct model *model_);
64
+ int get_nr_class(const struct model *model_);
65
+ void get_labels(const struct model *model_, int* label);
66
+ double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
67
+ double get_decfun_bias(const struct model *model_, int label_idx);
68
+
69
+ void free_model_content(struct model *model_ptr);
70
+ void free_and_destroy_model(struct model **model_ptr_ptr);
71
+ void destroy_param(struct parameter *param);
72
+
73
+ const char *check_parameter(const struct problem *prob, const struct parameter *param);
74
+ int check_probability_model(const struct model *model);
75
+ int check_regression_model(const struct model *model);
76
+ void set_print_string_function(void (*print_func) (const char*));
77
+
78
+ #ifdef __cplusplus
79
+ }
80
+ #endif
81
+
82
+ #endif /* _LIBLINEAR_H */
83
+
@@ -0,0 +1,288 @@
1
+ #include <math.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+ #include <stdarg.h>
5
+ #include "tron.h"
6
+
7
+ #ifndef min
8
+ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
9
+ #endif
10
+
11
+ #ifndef max
12
+ template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
13
+ #endif
14
+
15
+ #ifdef __cplusplus
16
+ extern "C" {
17
+ #endif
18
+
19
+ extern double dnrm2_(int *, double *, int *);
20
+ extern double ddot_(int *, double *, int *, double *, int *);
21
+ extern int daxpy_(int *, double *, double *, int *, double *, int *);
22
+ extern int dscal_(int *, double *, double *, int *);
23
+
24
+ #ifdef __cplusplus
25
+ }
26
+ #endif
27
+
28
+ static void default_print(const char *buf)
29
+ {
30
+ fputs(buf,stdout);
31
+ fflush(stdout);
32
+ }
33
+
34
+ static double uTMv(int n, double *u, double *M, double *v)
35
+ {
36
+ const int m = n-4;
37
+ double res = 0;
38
+ int i;
39
+ for (i=0; i<m; i+=5)
40
+ res += u[i]*M[i]*v[i]+u[i+1]*M[i+1]*v[i+1]+u[i+2]*M[i+2]*v[i+2]+
41
+ u[i+3]*M[i+3]*v[i+3]+u[i+4]*M[i+4]*v[i+4];
42
+ for (; i<n; i++)
43
+ res += u[i]*M[i]*v[i];
44
+ return res;
45
+ }
46
+
47
+ void TRON::info(const char *fmt,...)
48
+ {
49
+ char buf[BUFSIZ];
50
+ va_list ap;
51
+ va_start(ap,fmt);
52
+ vsprintf(buf,fmt,ap);
53
+ va_end(ap);
54
+ (*tron_print_string)(buf);
55
+ }
56
+
57
+ TRON::TRON(const function *fun_obj, double eps, double eps_cg, int max_iter)
58
+ {
59
+ this->fun_obj=const_cast<function *>(fun_obj);
60
+ this->eps=eps;
61
+ this->eps_cg=eps_cg;
62
+ this->max_iter=max_iter;
63
+ tron_print_string = default_print;
64
+ }
65
+
66
+ TRON::~TRON()
67
+ {
68
+ }
69
+
70
+ void TRON::tron(double *w)
71
+ {
72
+ // Parameters for updating the iterates.
73
+ double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
74
+
75
+ // Parameters for updating the trust region size delta.
76
+ double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
77
+
78
+ int n = fun_obj->get_nr_variable();
79
+ int i, cg_iter;
80
+ double delta=0, sMnorm, one=1.0;
81
+ double alpha, f, fnew, prered, actred, gs;
82
+ int search = 1, iter = 1, inc = 1;
83
+ double *s = new double[n];
84
+ double *r = new double[n];
85
+ double *g = new double[n];
86
+
87
+ const double alpha_pcg = 0.01;
88
+ double *M = new double[n];
89
+
90
+ // calculate gradient norm at w=0 for stopping condition.
91
+ double *w0 = new double[n];
92
+ for (i=0; i<n; i++)
93
+ w0[i] = 0;
94
+ fun_obj->fun(w0);
95
+ fun_obj->grad(w0, g);
96
+ double gnorm0 = dnrm2_(&n, g, &inc);
97
+ delete [] w0;
98
+
99
+ f = fun_obj->fun(w);
100
+ fun_obj->grad(w, g);
101
+ double gnorm = dnrm2_(&n, g, &inc);
102
+
103
+ if (gnorm <= eps*gnorm0)
104
+ search = 0;
105
+
106
+ fun_obj->get_diag_preconditioner(M);
107
+ for(i=0; i<n; i++)
108
+ M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
109
+ delta = sqrt(uTMv(n, g, M, g));
110
+
111
+ double *w_new = new double[n];
112
+ bool reach_boundary;
113
+ bool delta_adjusted = false;
114
+ while (iter <= max_iter && search)
115
+ {
116
+ cg_iter = trpcg(delta, g, M, s, r, &reach_boundary);
117
+
118
+ memcpy(w_new, w, sizeof(double)*n);
119
+ daxpy_(&n, &one, s, &inc, w_new, &inc);
120
+
121
+ gs = ddot_(&n, g, &inc, s, &inc);
122
+ prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc));
123
+ fnew = fun_obj->fun(w_new);
124
+
125
+ // Compute the actual reduction.
126
+ actred = f - fnew;
127
+
128
+ // On the first iteration, adjust the initial step bound.
129
+ sMnorm = sqrt(uTMv(n, s, M, s));
130
+ if (iter == 1 && !delta_adjusted)
131
+ {
132
+ delta = min(delta, sMnorm);
133
+ delta_adjusted = true;
134
+ }
135
+
136
+ // Compute prediction alpha*sMnorm of the step.
137
+ if (fnew - f - gs <= 0)
138
+ alpha = sigma3;
139
+ else
140
+ alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
141
+
142
+ // Update the trust region bound according to the ratio of actual to predicted reduction.
143
+ if (actred < eta0*prered)
144
+ delta = min(alpha*sMnorm, sigma2*delta);
145
+ else if (actred < eta1*prered)
146
+ delta = max(sigma1*delta, min(alpha*sMnorm, sigma2*delta));
147
+ else if (actred < eta2*prered)
148
+ delta = max(sigma1*delta, min(alpha*sMnorm, sigma3*delta));
149
+ else
150
+ {
151
+ if (reach_boundary)
152
+ delta = sigma3*delta;
153
+ else
154
+ delta = max(delta, min(alpha*sMnorm, sigma3*delta));
155
+ }
156
+
157
+ info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
158
+
159
+ if (actred > eta0*prered)
160
+ {
161
+ iter++;
162
+ memcpy(w, w_new, sizeof(double)*n);
163
+ f = fnew;
164
+ fun_obj->grad(w, g);
165
+ fun_obj->get_diag_preconditioner(M);
166
+ for(i=0; i<n; i++)
167
+ M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
168
+
169
+ gnorm = dnrm2_(&n, g, &inc);
170
+ if (gnorm <= eps*gnorm0)
171
+ break;
172
+ }
173
+ if (f < -1.0e+32)
174
+ {
175
+ info("WARNING: f < -1.0e+32\n");
176
+ break;
177
+ }
178
+ if (prered <= 0)
179
+ {
180
+ info("WARNING: prered <= 0\n");
181
+ break;
182
+ }
183
+ if (fabs(actred) <= 1.0e-12*fabs(f) &&
184
+ fabs(prered) <= 1.0e-12*fabs(f))
185
+ {
186
+ info("WARNING: actred and prered too small\n");
187
+ break;
188
+ }
189
+ }
190
+
191
+ delete[] g;
192
+ delete[] r;
193
+ delete[] w_new;
194
+ delete[] s;
195
+ delete[] M;
196
+ }
197
+
198
+ int TRON::trpcg(double delta, double *g, double *M, double *s, double *r, bool *reach_boundary)
199
+ {
200
+ int i, inc = 1;
201
+ int n = fun_obj->get_nr_variable();
202
+ double one = 1;
203
+ double *d = new double[n];
204
+ double *Hd = new double[n];
205
+ double zTr, znewTrnew, alpha, beta, cgtol;
206
+ double *z = new double[n];
207
+
208
+ *reach_boundary = false;
209
+ for (i=0; i<n; i++)
210
+ {
211
+ s[i] = 0;
212
+ r[i] = -g[i];
213
+ z[i] = r[i] / M[i];
214
+ d[i] = z[i];
215
+ }
216
+
217
+ zTr = ddot_(&n, z, &inc, r, &inc);
218
+ cgtol = eps_cg*sqrt(zTr);
219
+ int cg_iter = 0;
220
+ int max_cg_iter = max(n, 5);
221
+
222
+ while (cg_iter < max_cg_iter)
223
+ {
224
+ if (sqrt(zTr) <= cgtol)
225
+ break;
226
+ cg_iter++;
227
+ fun_obj->Hv(d, Hd);
228
+
229
+ alpha = zTr/ddot_(&n, d, &inc, Hd, &inc);
230
+ daxpy_(&n, &alpha, d, &inc, s, &inc);
231
+
232
+ double sMnorm = sqrt(uTMv(n, s, M, s));
233
+ if (sMnorm > delta)
234
+ {
235
+ info("cg reaches trust region boundary\n");
236
+ *reach_boundary = true;
237
+ alpha = -alpha;
238
+ daxpy_(&n, &alpha, d, &inc, s, &inc);
239
+
240
+ double sTMd = uTMv(n, s, M, d);
241
+ double sTMs = uTMv(n, s, M, s);
242
+ double dTMd = uTMv(n, d, M, d);
243
+ double dsq = delta*delta;
244
+ double rad = sqrt(sTMd*sTMd + dTMd*(dsq-sTMs));
245
+ if (sTMd >= 0)
246
+ alpha = (dsq - sTMs)/(sTMd + rad);
247
+ else
248
+ alpha = (rad - sTMd)/dTMd;
249
+ daxpy_(&n, &alpha, d, &inc, s, &inc);
250
+ alpha = -alpha;
251
+ daxpy_(&n, &alpha, Hd, &inc, r, &inc);
252
+ break;
253
+ }
254
+ alpha = -alpha;
255
+ daxpy_(&n, &alpha, Hd, &inc, r, &inc);
256
+
257
+ for (i=0; i<n; i++)
258
+ z[i] = r[i] / M[i];
259
+ znewTrnew = ddot_(&n, z, &inc, r, &inc);
260
+ beta = znewTrnew/zTr;
261
+ dscal_(&n, &beta, d, &inc);
262
+ daxpy_(&n, &one, z, &inc, d, &inc);
263
+ zTr = znewTrnew;
264
+ }
265
+
266
+ if (cg_iter == max_cg_iter)
267
+ info("WARNING: reaching maximal number of CG steps\n");
268
+
269
+ delete[] d;
270
+ delete[] Hd;
271
+ delete[] z;
272
+
273
+ return(cg_iter);
274
+ }
275
+
276
+ double TRON::norm_inf(int n, double *x)
277
+ {
278
+ double dmax = fabs(x[0]);
279
+ for (int i=1; i<n; i++)
280
+ if (fabs(x[i]) >= dmax)
281
+ dmax = fabs(x[i]);
282
+ return(dmax);
283
+ }
284
+
285
+ void TRON::set_print_string(void (*print_string) (const char *buf))
286
+ {
287
+ tron_print_string = print_string;
288
+ }
@@ -0,0 +1,36 @@
1
+ #ifndef _TRON_H
2
+ #define _TRON_H
3
+
4
+ class function
5
+ {
6
+ public:
7
+ virtual double fun(double *w) = 0 ;
8
+ virtual void grad(double *w, double *g) = 0 ;
9
+ virtual void Hv(double *s, double *Hs) = 0 ;
10
+
11
+ virtual int get_nr_variable(void) = 0 ;
12
+ virtual void get_diag_preconditioner(double *M) = 0 ;
13
+ virtual ~function(void){}
14
+ };
15
+
16
+ class TRON
17
+ {
18
+ public:
19
+ TRON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.1, int max_iter = 1000);
20
+ ~TRON();
21
+
22
+ void tron(double *w);
23
+ void set_print_string(void (*i_print) (const char *buf));
24
+
25
+ private:
26
+ int trpcg(double delta, double *g, double *M, double *s, double *r, bool *reach_boundary);
27
+ double norm_inf(int n, double *x);
28
+
29
+ double eps;
30
+ double eps_cg;
31
+ int max_iter;
32
+ function *fun_obj;
33
+ void info(const char *fmt,...);
34
+ void (*tron_print_string)(const char *buf);
35
+ };
36
+ #endif
@@ -545,6 +545,9 @@ void Init_liblinearext()
545
545
  */
546
546
  mLiblinear = rb_define_module_under(mNumo, "Liblinear");
547
547
 
548
+ /* The version of LIBLINEAR used in backgroud library. */
549
+ rb_define_const(mLiblinear, "LIBLINEAR_VERSION", INT2NUM(LIBLINEAR_VERSION));
550
+
548
551
  rb_define_module_function(mLiblinear, "train", numo_liblinear_train, 3);
549
552
  rb_define_module_function(mLiblinear, "cv", numo_liblinear_cross_validation, 4);
550
553
  rb_define_module_function(mLiblinear, "predict", numo_liblinear_predict, 3);
@@ -3,6 +3,6 @@
3
3
  module Numo
4
4
  module Liblinear
5
5
  # The version of Numo::Liblienar you are using.
6
- VERSION = '0.5.0'
6
+ VERSION = '1.0.0'
7
7
  end
8
8
  end
@@ -27,6 +27,13 @@ Gem::Specification.new do |spec|
27
27
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
28
28
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
29
29
  end
30
+
31
+ gem_dir = File.expand_path(__dir__) + '/'
32
+ submodule_path = `git submodule --quiet foreach pwd`.split($OUTPUT_RECORD_SEPARATOR).first
33
+ submodule_relative_path = submodule_path.sub gem_dir, ''
34
+ liblinear_files = %w[linear.cpp linear.h tron.cpp tron.h blas/blas.h blas/blasp.h blas/daxpy.c blas/ddot.c blas/dnrm2.c blas/dscal.c]
35
+ liblinear_files.each { |liblinf| spec.files << "#{submodule_relative_path}/#{liblinf}" }
36
+
30
37
  spec.bindir = 'exe'
31
38
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
39
  spec.require_paths = ['lib']
@@ -39,6 +46,7 @@ Gem::Specification.new do |spec|
39
46
  }
40
47
 
41
48
  spec.add_runtime_dependency 'numo-narray', '~> 0.9.1'
49
+
42
50
  spec.add_development_dependency 'bundler', '~> 2.0'
43
51
  spec.add_development_dependency 'rake', '~> 10.0'
44
52
  spec.add_development_dependency 'rake-compiler', '~> 1.0'