numo-libsvm 0.2.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,104 @@
1
+ #ifndef _LIBSVM_H
2
+ #define _LIBSVM_H
3
+
4
+ #define LIBSVM_VERSION 324
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ extern int libsvm_version;
11
+
12
+ struct svm_node
13
+ {
14
+ int index;
15
+ double value;
16
+ };
17
+
18
+ struct svm_problem
19
+ {
20
+ int l;
21
+ double *y;
22
+ struct svm_node **x;
23
+ };
24
+
25
+ enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */
26
+ enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
27
+
28
+ struct svm_parameter
29
+ {
30
+ int svm_type;
31
+ int kernel_type;
32
+ int degree; /* for poly */
33
+ double gamma; /* for poly/rbf/sigmoid */
34
+ double coef0; /* for poly/sigmoid */
35
+
36
+ /* these are for training only */
37
+ double cache_size; /* in MB */
38
+ double eps; /* stopping criteria */
39
+ double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
40
+ int nr_weight; /* for C_SVC */
41
+ int *weight_label; /* for C_SVC */
42
+ double* weight; /* for C_SVC */
43
+ double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
44
+ double p; /* for EPSILON_SVR */
45
+ int shrinking; /* use the shrinking heuristics */
46
+ int probability; /* do probability estimates */
47
+ };
48
+
49
+ //
50
+ // svm_model
51
+ //
52
+ struct svm_model
53
+ {
54
+ struct svm_parameter param; /* parameter */
55
+ int nr_class; /* number of classes, = 2 in regression/one class svm */
56
+ int l; /* total #SV */
57
+ struct svm_node **SV; /* SVs (SV[l]) */
58
+ double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
59
+ double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
60
+ double *probA; /* pariwise probability information */
61
+ double *probB;
62
+ int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
63
+
64
+ /* for classification only */
65
+
66
+ int *label; /* label of each class (label[k]) */
67
+ int *nSV; /* number of SVs for each class (nSV[k]) */
68
+ /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
69
+ /* XXX */
70
+ int free_sv; /* 1 if svm_model is created by svm_load_model*/
71
+ /* 0 if svm_model is created by svm_train */
72
+ };
73
+
74
+ struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
75
+ void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
76
+
77
+ int svm_save_model(const char *model_file_name, const struct svm_model *model);
78
+ struct svm_model *svm_load_model(const char *model_file_name);
79
+
80
+ int svm_get_svm_type(const struct svm_model *model);
81
+ int svm_get_nr_class(const struct svm_model *model);
82
+ void svm_get_labels(const struct svm_model *model, int *label);
83
+ void svm_get_sv_indices(const struct svm_model *model, int *sv_indices);
84
+ int svm_get_nr_sv(const struct svm_model *model);
85
+ double svm_get_svr_probability(const struct svm_model *model);
86
+
87
+ double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
88
+ double svm_predict(const struct svm_model *model, const struct svm_node *x);
89
+ double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
90
+
91
+ void svm_free_model_content(struct svm_model *model_ptr);
92
+ void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
93
+ void svm_destroy_param(struct svm_parameter *param);
94
+
95
+ const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
96
+ int svm_check_probability_model(const struct svm_model *model);
97
+
98
+ void svm_set_print_string_function(void (*print_func)(const char *));
99
+
100
+ #ifdef __cplusplus
101
+ }
102
+ #endif
103
+
104
+ #endif /* _LIBSVM_H */
@@ -16,6 +16,30 @@ void print_null(const char *s) {}
16
16
  * @param y [Numo::DFloat] (shape: [n_samples]) The labels or target values for samples.
17
17
  * @param param [Hash] The parameters of an SVM model.
18
18
  *
19
+ * @example
20
+ * require 'numo/libsvm'
21
+ *
22
+ * # Prepare XOR data.
23
+ * x = Numo::DFloat[[-0.8, -0.7], [0.9, 0.8], [-0.7, 0.9], [0.8, -0.9]]
24
+ * y = Numo::Int32[-1, -1, 1, 1]
25
+ *
26
+ * # Train C-Support Vector Classifier with RBF kernel.
27
+ * param = {
28
+ * svm_type: Numo::Libsvm::SvmType::C_SVC,
29
+ * kernel_type: Numo::Libsvm::KernelType::RBF,
30
+ * gamma: 2.0,
31
+ * C: 1,
32
+ * random_seed: 1
33
+ * }
34
+ * model = Numo::Libsvm.train(x, y, param)
35
+ *
36
+ * # Predict labels of test data.
37
+ * x_test = Numo::DFloat[[-0.4, -0.5], [0.5, -0.4]]
38
+ * result = Numo::Libsvm.predict(x_test, param, model)
39
+ * p result
40
+ * # Numo::DFloat#shape=[2]
41
+ * # [-1, 1]
42
+ *
19
43
  * @raise [ArgumentError] If the sample array is not 2-dimensional, the label array is not 1-dimensional,
20
44
  * the sample array and label array do not have the same number of samples, or
21
45
  * the hyperparameter has an invalid value, this error is raised.
@@ -30,6 +54,8 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
30
54
  narray_t* x_nary;
31
55
  narray_t* y_nary;
32
56
  char* err_msg;
57
+ VALUE random_seed;
58
+ VALUE verbose;
33
59
  VALUE model_hash;
34
60
 
35
61
  if (CLASS_OF(x_val) != numo_cDFloat) {
@@ -60,6 +86,11 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
60
86
  return Qnil;
61
87
  }
62
88
 
89
+ random_seed = rb_hash_aref(param_hash, ID2SYM(rb_intern("random_seed")));
90
+ if (!NIL_P(random_seed)) {
91
+ srand(NUM2UINT(random_seed));
92
+ }
93
+
63
94
  param = rb_hash_to_svm_parameter(param_hash);
64
95
  problem = dataset_to_svm_problem(x_val, y_val);
65
96
 
@@ -71,7 +102,11 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
71
102
  return Qnil;
72
103
  }
73
104
 
74
- svm_set_print_string_function(print_null);
105
+ verbose = rb_hash_aref(param_hash, ID2SYM(rb_intern("verbose")));
106
+ if (verbose != Qtrue) {
107
+ svm_set_print_string_function(print_null);
108
+ }
109
+
75
110
  model = svm_train(problem, param);
76
111
  model_hash = svm_model_to_rb_hash(model);
77
112
  svm_free_and_destroy_model(&model);
@@ -79,6 +114,9 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
79
114
  xfree_svm_problem(problem);
80
115
  xfree_svm_parameter(param);
81
116
 
117
+ RB_GC_GUARD(x_val);
118
+ RB_GC_GUARD(y_val);
119
+
82
120
  return model_hash;
83
121
  }
84
122
 
@@ -92,6 +130,30 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
92
130
  * @param param [Hash] The parameters of an SVM model.
93
131
  * @param n_folds [Integer] The number of folds.
94
132
  *
133
+ * @example
134
+ * require 'numo/libsvm'
135
+ *
136
+ * # x: samples
137
+ * # y: labels
138
+ *
139
+ * # Define parameters of C-SVC with RBF Kernel.
140
+ * param = {
141
+ * svm_type: Numo::Libsvm::SvmType::C_SVC,
142
+ * kernel_type: Numo::Libsvm::KernelType::RBF,
143
+ * gamma: 1.0,
144
+ * C: 1,
145
+ * random_seed: 1,
146
+ * verbose: true
147
+ * }
148
+ *
149
+ * # Perform 5-cross validation.
150
+ * n_folds = 5
151
+ * res = Numo::Libsvm.cv(x, y, param, n_folds)
152
+ *
153
+ * # Print mean accuracy.
154
+ * mean_accuracy = y.eq(res).count.fdiv(y.size)
155
+ * puts "Accuracy: %.1f %%" % (100 * mean_accuracy)
156
+ *
95
157
  * @raise [ArgumentError] If the sample array is not 2-dimensional, the label array is not 1-dimensional,
96
158
  * the sample array and label array do not have the same number of samples, or
97
159
  * the hyperparameter has an invalid value, this error is raised.
@@ -107,6 +169,8 @@ VALUE cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash, V
107
169
  narray_t* x_nary;
108
170
  narray_t* y_nary;
109
171
  char* err_msg;
172
+ VALUE random_seed;
173
+ VALUE verbose;
110
174
  struct svm_problem* problem;
111
175
  struct svm_parameter* param;
112
176
 
@@ -138,6 +202,11 @@ VALUE cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash, V
138
202
  return Qnil;
139
203
  }
140
204
 
205
+ random_seed = rb_hash_aref(param_hash, ID2SYM(rb_intern("random_seed")));
206
+ if (!NIL_P(random_seed)) {
207
+ srand(NUM2UINT(random_seed));
208
+ }
209
+
141
210
  param = rb_hash_to_svm_parameter(param_hash);
142
211
  problem = dataset_to_svm_problem(x_val, y_val);
143
212
 
@@ -153,12 +222,19 @@ VALUE cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash, V
153
222
  t_val = rb_narray_new(numo_cDFloat, 1, t_shape);
154
223
  t_pt = (double*)na_get_pointer_for_write(t_val);
155
224
 
156
- svm_set_print_string_function(print_null);
225
+ verbose = rb_hash_aref(param_hash, ID2SYM(rb_intern("verbose")));
226
+ if (verbose != Qtrue) {
227
+ svm_set_print_string_function(print_null);
228
+ }
229
+
157
230
  svm_cross_validation(problem, param, n_folds, t_pt);
158
231
 
159
232
  xfree_svm_problem(problem);
160
233
  xfree_svm_parameter(param);
161
234
 
235
+ RB_GC_GUARD(x_val);
236
+ RB_GC_GUARD(y_val);
237
+
162
238
  return t_val;
163
239
  }
164
240
 
@@ -184,9 +260,10 @@ VALUE predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
184
260
  size_t y_shape[1];
185
261
  VALUE y_val;
186
262
  double* y_pt;
187
- int i, j;
263
+ int i, j, k;
188
264
  int n_samples;
189
265
  int n_features;
266
+ int n_nonzero_features;
190
267
 
191
268
  /* Obtain C data structures. */
192
269
  if (CLASS_OF(x_val) != numo_cDFloat) {
@@ -215,21 +292,17 @@ VALUE predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
215
292
  x_pt = (double*)na_get_pointer_for_read(x_val);
216
293
 
217
294
  /* Predict values. */
218
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
219
- x_nodes[n_features].index = -1;
220
- x_nodes[n_features].value = 0.0;
221
295
  for (i = 0; i < n_samples; i++) {
222
- for (j = 0; j < n_features; j++) {
223
- x_nodes[j].index = j + 1;
224
- x_nodes[j].value = (double)x_pt[i * n_features + j];
225
- }
296
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
226
297
  y_pt[i] = svm_predict(model, x_nodes);
298
+ xfree(x_nodes);
227
299
  }
228
300
 
229
- xfree(x_nodes);
230
301
  xfree_svm_model(model);
231
302
  xfree_svm_parameter(param);
232
303
 
304
+ RB_GC_GUARD(x_val);
305
+
233
306
  return y_val;
234
307
  }
235
308
 
@@ -298,40 +371,30 @@ VALUE decision_function(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_h
298
371
 
299
372
  /* Predict values. */
300
373
  if (model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) {
301
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
302
- x_nodes[n_features].index = -1;
303
- x_nodes[n_features].value = 0.0;
304
374
  for (i = 0; i < n_samples; i++) {
305
- for (j = 0; j < n_features; j++) {
306
- x_nodes[j].index = j + 1;
307
- x_nodes[j].value = (double)x_pt[i * n_features + j];
308
- }
375
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
309
376
  svm_predict_values(model, x_nodes, &y_pt[i]);
377
+ xfree(x_nodes);
310
378
  }
311
- xfree(x_nodes);
312
379
  } else {
313
380
  y_cols = (int)y_shape[1];
314
381
  dec_values = ALLOC_N(double, y_cols);
315
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
316
- x_nodes[n_features].index = -1;
317
- x_nodes[n_features].value = 0.0;
318
382
  for (i = 0; i < n_samples; i++) {
319
- for (j = 0; j < n_features; j++) {
320
- x_nodes[j].index = j + 1;
321
- x_nodes[j].value = (double)x_pt[i * n_features + j];
322
- }
383
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
323
384
  svm_predict_values(model, x_nodes, dec_values);
385
+ xfree(x_nodes);
324
386
  for (j = 0; j < y_cols; j++) {
325
387
  y_pt[i * y_cols + j] = dec_values[j];
326
388
  }
327
389
  }
328
- xfree(x_nodes);
329
390
  xfree(dec_values);
330
391
  }
331
392
 
332
393
  xfree_svm_model(model);
333
394
  xfree_svm_parameter(param);
334
395
 
396
+ RB_GC_GUARD(x_val);
397
+
335
398
  return y_val;
336
399
  }
337
400
 
@@ -393,26 +456,22 @@ VALUE predict_proba(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
393
456
 
394
457
  /* Predict values. */
395
458
  probs = ALLOC_N(double, model->nr_class);
396
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
397
- x_nodes[n_features].index = -1;
398
- x_nodes[n_features].value = 0.0;
399
459
  for (i = 0; i < n_samples; i++) {
400
- for (j = 0; j < n_features; j++) {
401
- x_nodes[j].index = j + 1;
402
- x_nodes[j].value = (double)x_pt[i * n_features + j];
403
- }
460
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
404
461
  svm_predict_probability(model, x_nodes, probs);
462
+ xfree(x_nodes);
405
463
  for (j = 0; j < model->nr_class; j++) {
406
464
  y_pt[i * model->nr_class + j] = probs[j];
407
465
  }
408
466
  }
409
- xfree(x_nodes);
410
467
  xfree(probs);
411
468
  }
412
469
 
413
470
  xfree_svm_model(model);
414
471
  xfree_svm_parameter(param);
415
472
 
473
+ RB_GC_GUARD(x_val);
474
+
416
475
  return y_val;
417
476
  }
418
477
 
@@ -29,9 +29,12 @@ struct svm_problem* dataset_to_svm_problem(VALUE x_val, VALUE y_val)
29
29
  narray_t* x_nary;
30
30
  double* x_pt;
31
31
  double* y_pt;
32
- int i, j;
32
+ int i, j, k;
33
33
  int n_samples;
34
34
  int n_features;
35
+ int n_nonzero_features;
36
+ int is_padded;
37
+ int last_feature_id;
35
38
 
36
39
  GetNArray(x_val, x_nary);
37
40
  n_samples = (int)NA_SHAPE(x_nary)[0];
@@ -43,16 +46,45 @@ struct svm_problem* dataset_to_svm_problem(VALUE x_val, VALUE y_val)
43
46
  problem->l = n_samples;
44
47
  problem->x = ALLOC_N(struct svm_node*, n_samples);
45
48
  problem->y = ALLOC_N(double, n_samples);
49
+
50
+ is_padded = 0;
46
51
  for (i = 0; i < n_samples; i++) {
47
- problem->x[i] = ALLOC_N(struct svm_node, n_features + 1);
52
+ n_nonzero_features = 0;
48
53
  for (j = 0; j < n_features; j++) {
49
- problem->x[i][j].index = j + 1;
50
- problem->x[i][j].value = x_pt[i * n_features + j];
54
+ if (x_pt[i * n_features + j] != 0.0) {
55
+ n_nonzero_features += 1;
56
+ last_feature_id = j + 1;
57
+ }
58
+ }
59
+ if (is_padded == 0 && last_feature_id == n_features) {
60
+ is_padded = 1;
61
+ }
62
+ if (is_padded == 1) {
63
+ problem->x[i] = ALLOC_N(struct svm_node, n_nonzero_features + 1);
64
+ } else {
65
+ problem->x[i] = ALLOC_N(struct svm_node, n_nonzero_features + 2);
66
+ }
67
+ for (j = 0, k = 0; j < n_features; j++) {
68
+ if (x_pt[i * n_features + j] != 0.0) {
69
+ problem->x[i][k].index = j + 1;
70
+ problem->x[i][k].value = (double)x_pt[i * n_features + j];
71
+ k++;
72
+ }
73
+ }
74
+ if (is_padded == 1) {
75
+ problem->x[i][n_nonzero_features].index = -1;
76
+ problem->x[i][n_nonzero_features].value = 0.0;
77
+ } else {
78
+ problem->x[i][n_nonzero_features].index = n_features;
79
+ problem->x[i][n_nonzero_features].value = 0.0;
80
+ problem->x[i][n_nonzero_features + 1].index = -1;
81
+ problem->x[i][n_nonzero_features + 1].value = 0.0;
51
82
  }
52
- problem->x[i][n_features].index = -1;
53
- problem->x[i][n_features].value = 0.0;
54
83
  problem->y[i] = y_pt[i];
55
84
  }
56
85
 
86
+ RB_GC_GUARD(x_val);
87
+ RB_GC_GUARD(y_val);
88
+
57
89
  return problem;
58
90
  }
@@ -3,6 +3,6 @@
3
3
  module Numo
4
4
  module Libsvm
5
5
  # The version of Numo::Libsvm you are using.
6
- VERSION = '0.2.0'
6
+ VERSION = '1.0.1'
7
7
  end
8
8
  end
@@ -28,14 +28,28 @@ Gem::Specification.new do |spec|
28
28
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
29
29
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
30
30
  end
31
+
32
+ gem_dir = File.expand_path(__dir__) + '/'
33
+ submodule_path = `git submodule --quiet foreach pwd`.split($OUTPUT_RECORD_SEPARATOR).first
34
+ submodule_relative_path = submodule_path.sub gem_dir, ''
35
+ spec.files << "#{submodule_relative_path}/svm.cpp"
36
+ spec.files << "#{submodule_relative_path}/svm.h"
37
+
31
38
  spec.bindir = 'exe'
32
39
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
40
  spec.require_paths = ['lib']
34
41
  spec.extensions = ['ext/numo/libsvm/extconf.rb']
35
42
 
43
+ spec.metadata = {
44
+ 'homepage_uri' => 'https://github.com/yoshoku/numo-libsvm',
45
+ 'source_code_uri' => 'https://github.com/yoshoku/numo-libsvm',
46
+ 'documentation_uri' => 'https://yoshoku.github.io/numo-libsvm/doc/'
47
+ }
48
+
36
49
  spec.add_runtime_dependency 'numo-narray', '~> 0.9.1'
50
+
37
51
  spec.add_development_dependency 'bundler', '~> 2.0'
38
- spec.add_development_dependency 'rake', '~> 10.0'
52
+ spec.add_development_dependency 'rake', '~> 12.0'
39
53
  spec.add_development_dependency 'rake-compiler', '~> 1.0'
40
54
  spec.add_development_dependency 'rspec', '~> 3.0'
41
55
  end