numo-libsvm 0.2.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,104 @@
1
+ #ifndef _LIBSVM_H
2
+ #define _LIBSVM_H
3
+
4
+ #define LIBSVM_VERSION 324
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ extern int libsvm_version;
11
+
12
+ struct svm_node
13
+ {
14
+ int index;
15
+ double value;
16
+ };
17
+
18
+ struct svm_problem
19
+ {
20
+ int l;
21
+ double *y;
22
+ struct svm_node **x;
23
+ };
24
+
25
+ enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */
26
+ enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
27
+
28
+ struct svm_parameter
29
+ {
30
+ int svm_type;
31
+ int kernel_type;
32
+ int degree; /* for poly */
33
+ double gamma; /* for poly/rbf/sigmoid */
34
+ double coef0; /* for poly/sigmoid */
35
+
36
+ /* these are for training only */
37
+ double cache_size; /* in MB */
38
+ double eps; /* stopping criteria */
39
+ double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
40
+ int nr_weight; /* for C_SVC */
41
+ int *weight_label; /* for C_SVC */
42
+ double* weight; /* for C_SVC */
43
+ double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
44
+ double p; /* for EPSILON_SVR */
45
+ int shrinking; /* use the shrinking heuristics */
46
+ int probability; /* do probability estimates */
47
+ };
48
+
49
+ //
50
+ // svm_model
51
+ //
52
+ struct svm_model
53
+ {
54
+ struct svm_parameter param; /* parameter */
55
+ int nr_class; /* number of classes, = 2 in regression/one class svm */
56
+ int l; /* total #SV */
57
+ struct svm_node **SV; /* SVs (SV[l]) */
58
+ double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
59
+ double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
60
+ double *probA; /* pariwise probability information */
61
+ double *probB;
62
+ int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
63
+
64
+ /* for classification only */
65
+
66
+ int *label; /* label of each class (label[k]) */
67
+ int *nSV; /* number of SVs for each class (nSV[k]) */
68
+ /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
69
+ /* XXX */
70
+ int free_sv; /* 1 if svm_model is created by svm_load_model*/
71
+ /* 0 if svm_model is created by svm_train */
72
+ };
73
+
74
+ struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
75
+ void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
76
+
77
+ int svm_save_model(const char *model_file_name, const struct svm_model *model);
78
+ struct svm_model *svm_load_model(const char *model_file_name);
79
+
80
+ int svm_get_svm_type(const struct svm_model *model);
81
+ int svm_get_nr_class(const struct svm_model *model);
82
+ void svm_get_labels(const struct svm_model *model, int *label);
83
+ void svm_get_sv_indices(const struct svm_model *model, int *sv_indices);
84
+ int svm_get_nr_sv(const struct svm_model *model);
85
+ double svm_get_svr_probability(const struct svm_model *model);
86
+
87
+ double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
88
+ double svm_predict(const struct svm_model *model, const struct svm_node *x);
89
+ double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
90
+
91
+ void svm_free_model_content(struct svm_model *model_ptr);
92
+ void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
93
+ void svm_destroy_param(struct svm_parameter *param);
94
+
95
+ const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
96
+ int svm_check_probability_model(const struct svm_model *model);
97
+
98
+ void svm_set_print_string_function(void (*print_func)(const char *));
99
+
100
+ #ifdef __cplusplus
101
+ }
102
+ #endif
103
+
104
+ #endif /* _LIBSVM_H */
@@ -16,6 +16,30 @@ void print_null(const char *s) {}
16
16
  * @param y [Numo::DFloat] (shape: [n_samples]) The labels or target values for samples.
17
17
  * @param param [Hash] The parameters of an SVM model.
18
18
  *
19
+ * @example
20
+ * require 'numo/libsvm'
21
+ *
22
+ * # Prepare XOR data.
23
+ * x = Numo::DFloat[[-0.8, -0.7], [0.9, 0.8], [-0.7, 0.9], [0.8, -0.9]]
24
+ * y = Numo::Int32[-1, -1, 1, 1]
25
+ *
26
+ * # Train C-Support Vector Classifier with RBF kernel.
27
+ * param = {
28
+ * svm_type: Numo::Libsvm::SvmType::C_SVC,
29
+ * kernel_type: Numo::Libsvm::KernelType::RBF,
30
+ * gamma: 2.0,
31
+ * C: 1,
32
+ * random_seed: 1
33
+ * }
34
+ * model = Numo::Libsvm.train(x, y, param)
35
+ *
36
+ * # Predict labels of test data.
37
+ * x_test = Numo::DFloat[[-0.4, -0.5], [0.5, -0.4]]
38
+ * result = Numo::Libsvm.predict(x_test, param, model)
39
+ * p result
40
+ * # Numo::DFloat#shape=[2]
41
+ * # [-1, 1]
42
+ *
19
43
  * @raise [ArgumentError] If the sample array is not 2-dimensional, the label array is not 1-dimensional,
20
44
  * the sample array and label array do not have the same number of samples, or
21
45
  * the hyperparameter has an invalid value, this error is raised.
@@ -30,6 +54,8 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
30
54
  narray_t* x_nary;
31
55
  narray_t* y_nary;
32
56
  char* err_msg;
57
+ VALUE random_seed;
58
+ VALUE verbose;
33
59
  VALUE model_hash;
34
60
 
35
61
  if (CLASS_OF(x_val) != numo_cDFloat) {
@@ -60,6 +86,11 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
60
86
  return Qnil;
61
87
  }
62
88
 
89
+ random_seed = rb_hash_aref(param_hash, ID2SYM(rb_intern("random_seed")));
90
+ if (!NIL_P(random_seed)) {
91
+ srand(NUM2UINT(random_seed));
92
+ }
93
+
63
94
  param = rb_hash_to_svm_parameter(param_hash);
64
95
  problem = dataset_to_svm_problem(x_val, y_val);
65
96
 
@@ -71,7 +102,11 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
71
102
  return Qnil;
72
103
  }
73
104
 
74
- svm_set_print_string_function(print_null);
105
+ verbose = rb_hash_aref(param_hash, ID2SYM(rb_intern("verbose")));
106
+ if (verbose != Qtrue) {
107
+ svm_set_print_string_function(print_null);
108
+ }
109
+
75
110
  model = svm_train(problem, param);
76
111
  model_hash = svm_model_to_rb_hash(model);
77
112
  svm_free_and_destroy_model(&model);
@@ -79,6 +114,9 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
79
114
  xfree_svm_problem(problem);
80
115
  xfree_svm_parameter(param);
81
116
 
117
+ RB_GC_GUARD(x_val);
118
+ RB_GC_GUARD(y_val);
119
+
82
120
  return model_hash;
83
121
  }
84
122
 
@@ -92,6 +130,30 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
92
130
  * @param param [Hash] The parameters of an SVM model.
93
131
  * @param n_folds [Integer] The number of folds.
94
132
  *
133
+ * @example
134
+ * require 'numo/libsvm'
135
+ *
136
+ * # x: samples
137
+ * # y: labels
138
+ *
139
+ * # Define parameters of C-SVC with RBF Kernel.
140
+ * param = {
141
+ * svm_type: Numo::Libsvm::SvmType::C_SVC,
142
+ * kernel_type: Numo::Libsvm::KernelType::RBF,
143
+ * gamma: 1.0,
144
+ * C: 1,
145
+ * random_seed: 1,
146
+ * verbose: true
147
+ * }
148
+ *
149
+ * # Perform 5-cross validation.
150
+ * n_folds = 5
151
+ * res = Numo::Libsvm.cv(x, y, param, n_folds)
152
+ *
153
+ * # Print mean accuracy.
154
+ * mean_accuracy = y.eq(res).count.fdiv(y.size)
155
+ * puts "Accuracy: %.1f %%" % (100 * mean_accuracy)
156
+ *
95
157
  * @raise [ArgumentError] If the sample array is not 2-dimensional, the label array is not 1-dimensional,
96
158
  * the sample array and label array do not have the same number of samples, or
97
159
  * the hyperparameter has an invalid value, this error is raised.
@@ -107,6 +169,8 @@ VALUE cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash, V
107
169
  narray_t* x_nary;
108
170
  narray_t* y_nary;
109
171
  char* err_msg;
172
+ VALUE random_seed;
173
+ VALUE verbose;
110
174
  struct svm_problem* problem;
111
175
  struct svm_parameter* param;
112
176
 
@@ -138,6 +202,11 @@ VALUE cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash, V
138
202
  return Qnil;
139
203
  }
140
204
 
205
+ random_seed = rb_hash_aref(param_hash, ID2SYM(rb_intern("random_seed")));
206
+ if (!NIL_P(random_seed)) {
207
+ srand(NUM2UINT(random_seed));
208
+ }
209
+
141
210
  param = rb_hash_to_svm_parameter(param_hash);
142
211
  problem = dataset_to_svm_problem(x_val, y_val);
143
212
 
@@ -153,12 +222,19 @@ VALUE cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash, V
153
222
  t_val = rb_narray_new(numo_cDFloat, 1, t_shape);
154
223
  t_pt = (double*)na_get_pointer_for_write(t_val);
155
224
 
156
- svm_set_print_string_function(print_null);
225
+ verbose = rb_hash_aref(param_hash, ID2SYM(rb_intern("verbose")));
226
+ if (verbose != Qtrue) {
227
+ svm_set_print_string_function(print_null);
228
+ }
229
+
157
230
  svm_cross_validation(problem, param, n_folds, t_pt);
158
231
 
159
232
  xfree_svm_problem(problem);
160
233
  xfree_svm_parameter(param);
161
234
 
235
+ RB_GC_GUARD(x_val);
236
+ RB_GC_GUARD(y_val);
237
+
162
238
  return t_val;
163
239
  }
164
240
 
@@ -184,9 +260,10 @@ VALUE predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
184
260
  size_t y_shape[1];
185
261
  VALUE y_val;
186
262
  double* y_pt;
187
- int i, j;
263
+ int i, j, k;
188
264
  int n_samples;
189
265
  int n_features;
266
+ int n_nonzero_features;
190
267
 
191
268
  /* Obtain C data structures. */
192
269
  if (CLASS_OF(x_val) != numo_cDFloat) {
@@ -215,21 +292,17 @@ VALUE predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
215
292
  x_pt = (double*)na_get_pointer_for_read(x_val);
216
293
 
217
294
  /* Predict values. */
218
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
219
- x_nodes[n_features].index = -1;
220
- x_nodes[n_features].value = 0.0;
221
295
  for (i = 0; i < n_samples; i++) {
222
- for (j = 0; j < n_features; j++) {
223
- x_nodes[j].index = j + 1;
224
- x_nodes[j].value = (double)x_pt[i * n_features + j];
225
- }
296
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
226
297
  y_pt[i] = svm_predict(model, x_nodes);
298
+ xfree(x_nodes);
227
299
  }
228
300
 
229
- xfree(x_nodes);
230
301
  xfree_svm_model(model);
231
302
  xfree_svm_parameter(param);
232
303
 
304
+ RB_GC_GUARD(x_val);
305
+
233
306
  return y_val;
234
307
  }
235
308
 
@@ -298,40 +371,30 @@ VALUE decision_function(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_h
298
371
 
299
372
  /* Predict values. */
300
373
  if (model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) {
301
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
302
- x_nodes[n_features].index = -1;
303
- x_nodes[n_features].value = 0.0;
304
374
  for (i = 0; i < n_samples; i++) {
305
- for (j = 0; j < n_features; j++) {
306
- x_nodes[j].index = j + 1;
307
- x_nodes[j].value = (double)x_pt[i * n_features + j];
308
- }
375
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
309
376
  svm_predict_values(model, x_nodes, &y_pt[i]);
377
+ xfree(x_nodes);
310
378
  }
311
- xfree(x_nodes);
312
379
  } else {
313
380
  y_cols = (int)y_shape[1];
314
381
  dec_values = ALLOC_N(double, y_cols);
315
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
316
- x_nodes[n_features].index = -1;
317
- x_nodes[n_features].value = 0.0;
318
382
  for (i = 0; i < n_samples; i++) {
319
- for (j = 0; j < n_features; j++) {
320
- x_nodes[j].index = j + 1;
321
- x_nodes[j].value = (double)x_pt[i * n_features + j];
322
- }
383
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
323
384
  svm_predict_values(model, x_nodes, dec_values);
385
+ xfree(x_nodes);
324
386
  for (j = 0; j < y_cols; j++) {
325
387
  y_pt[i * y_cols + j] = dec_values[j];
326
388
  }
327
389
  }
328
- xfree(x_nodes);
329
390
  xfree(dec_values);
330
391
  }
331
392
 
332
393
  xfree_svm_model(model);
333
394
  xfree_svm_parameter(param);
334
395
 
396
+ RB_GC_GUARD(x_val);
397
+
335
398
  return y_val;
336
399
  }
337
400
 
@@ -393,26 +456,22 @@ VALUE predict_proba(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
393
456
 
394
457
  /* Predict values. */
395
458
  probs = ALLOC_N(double, model->nr_class);
396
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
397
- x_nodes[n_features].index = -1;
398
- x_nodes[n_features].value = 0.0;
399
459
  for (i = 0; i < n_samples; i++) {
400
- for (j = 0; j < n_features; j++) {
401
- x_nodes[j].index = j + 1;
402
- x_nodes[j].value = (double)x_pt[i * n_features + j];
403
- }
460
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
404
461
  svm_predict_probability(model, x_nodes, probs);
462
+ xfree(x_nodes);
405
463
  for (j = 0; j < model->nr_class; j++) {
406
464
  y_pt[i * model->nr_class + j] = probs[j];
407
465
  }
408
466
  }
409
- xfree(x_nodes);
410
467
  xfree(probs);
411
468
  }
412
469
 
413
470
  xfree_svm_model(model);
414
471
  xfree_svm_parameter(param);
415
472
 
473
+ RB_GC_GUARD(x_val);
474
+
416
475
  return y_val;
417
476
  }
418
477
 
@@ -29,9 +29,12 @@ struct svm_problem* dataset_to_svm_problem(VALUE x_val, VALUE y_val)
29
29
  narray_t* x_nary;
30
30
  double* x_pt;
31
31
  double* y_pt;
32
- int i, j;
32
+ int i, j, k;
33
33
  int n_samples;
34
34
  int n_features;
35
+ int n_nonzero_features;
36
+ int is_padded;
37
+ int last_feature_id;
35
38
 
36
39
  GetNArray(x_val, x_nary);
37
40
  n_samples = (int)NA_SHAPE(x_nary)[0];
@@ -43,16 +46,45 @@ struct svm_problem* dataset_to_svm_problem(VALUE x_val, VALUE y_val)
43
46
  problem->l = n_samples;
44
47
  problem->x = ALLOC_N(struct svm_node*, n_samples);
45
48
  problem->y = ALLOC_N(double, n_samples);
49
+
50
+ is_padded = 0;
46
51
  for (i = 0; i < n_samples; i++) {
47
- problem->x[i] = ALLOC_N(struct svm_node, n_features + 1);
52
+ n_nonzero_features = 0;
48
53
  for (j = 0; j < n_features; j++) {
49
- problem->x[i][j].index = j + 1;
50
- problem->x[i][j].value = x_pt[i * n_features + j];
54
+ if (x_pt[i * n_features + j] != 0.0) {
55
+ n_nonzero_features += 1;
56
+ last_feature_id = j + 1;
57
+ }
58
+ }
59
+ if (is_padded == 0 && last_feature_id == n_features) {
60
+ is_padded = 1;
61
+ }
62
+ if (is_padded == 1) {
63
+ problem->x[i] = ALLOC_N(struct svm_node, n_nonzero_features + 1);
64
+ } else {
65
+ problem->x[i] = ALLOC_N(struct svm_node, n_nonzero_features + 2);
66
+ }
67
+ for (j = 0, k = 0; j < n_features; j++) {
68
+ if (x_pt[i * n_features + j] != 0.0) {
69
+ problem->x[i][k].index = j + 1;
70
+ problem->x[i][k].value = (double)x_pt[i * n_features + j];
71
+ k++;
72
+ }
73
+ }
74
+ if (is_padded == 1) {
75
+ problem->x[i][n_nonzero_features].index = -1;
76
+ problem->x[i][n_nonzero_features].value = 0.0;
77
+ } else {
78
+ problem->x[i][n_nonzero_features].index = n_features;
79
+ problem->x[i][n_nonzero_features].value = 0.0;
80
+ problem->x[i][n_nonzero_features + 1].index = -1;
81
+ problem->x[i][n_nonzero_features + 1].value = 0.0;
51
82
  }
52
- problem->x[i][n_features].index = -1;
53
- problem->x[i][n_features].value = 0.0;
54
83
  problem->y[i] = y_pt[i];
55
84
  }
56
85
 
86
+ RB_GC_GUARD(x_val);
87
+ RB_GC_GUARD(y_val);
88
+
57
89
  return problem;
58
90
  }
@@ -3,6 +3,6 @@
3
3
  module Numo
4
4
  module Libsvm
5
5
  # The version of Numo::Libsvm you are using.
6
- VERSION = '0.2.0'
6
+ VERSION = '1.0.1'
7
7
  end
8
8
  end
@@ -28,14 +28,28 @@ Gem::Specification.new do |spec|
28
28
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
29
29
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
30
30
  end
31
+
32
+ gem_dir = File.expand_path(__dir__) + '/'
33
+ submodule_path = `git submodule --quiet foreach pwd`.split($OUTPUT_RECORD_SEPARATOR).first
34
+ submodule_relative_path = submodule_path.sub gem_dir, ''
35
+ spec.files << "#{submodule_relative_path}/svm.cpp"
36
+ spec.files << "#{submodule_relative_path}/svm.h"
37
+
31
38
  spec.bindir = 'exe'
32
39
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
40
  spec.require_paths = ['lib']
34
41
  spec.extensions = ['ext/numo/libsvm/extconf.rb']
35
42
 
43
+ spec.metadata = {
44
+ 'homepage_uri' => 'https://github.com/yoshoku/numo-libsvm',
45
+ 'source_code_uri' => 'https://github.com/yoshoku/numo-libsvm',
46
+ 'documentation_uri' => 'https://yoshoku.github.io/numo-libsvm/doc/'
47
+ }
48
+
36
49
  spec.add_runtime_dependency 'numo-narray', '~> 0.9.1'
50
+
37
51
  spec.add_development_dependency 'bundler', '~> 2.0'
38
- spec.add_development_dependency 'rake', '~> 10.0'
52
+ spec.add_development_dependency 'rake', '~> 12.0'
39
53
  spec.add_development_dependency 'rake-compiler', '~> 1.0'
40
54
  spec.add_development_dependency 'rspec', '~> 3.0'
41
55
  end