numo-libsvm 0.4.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,104 @@
1
+ #ifndef _LIBSVM_H
2
+ #define _LIBSVM_H
3
+
4
+ #define LIBSVM_VERSION 324
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ extern int libsvm_version;
11
+
12
+ struct svm_node
13
+ {
14
+ int index;
15
+ double value;
16
+ };
17
+
18
+ struct svm_problem
19
+ {
20
+ int l;
21
+ double *y;
22
+ struct svm_node **x;
23
+ };
24
+
25
+ enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */
26
+ enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
27
+
28
+ struct svm_parameter
29
+ {
30
+ int svm_type;
31
+ int kernel_type;
32
+ int degree; /* for poly */
33
+ double gamma; /* for poly/rbf/sigmoid */
34
+ double coef0; /* for poly/sigmoid */
35
+
36
+ /* these are for training only */
37
+ double cache_size; /* in MB */
38
+ double eps; /* stopping criteria */
39
+ double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
40
+ int nr_weight; /* for C_SVC */
41
+ int *weight_label; /* for C_SVC */
42
+ double* weight; /* for C_SVC */
43
+ double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
44
+ double p; /* for EPSILON_SVR */
45
+ int shrinking; /* use the shrinking heuristics */
46
+ int probability; /* do probability estimates */
47
+ };
48
+
49
+ //
50
+ // svm_model
51
+ //
52
+ struct svm_model
53
+ {
54
+ struct svm_parameter param; /* parameter */
55
+ int nr_class; /* number of classes, = 2 in regression/one class svm */
56
+ int l; /* total #SV */
57
+ struct svm_node **SV; /* SVs (SV[l]) */
58
+ double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
59
+ double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
60
+ double *probA; /* pariwise probability information */
61
+ double *probB;
62
+ int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
63
+
64
+ /* for classification only */
65
+
66
+ int *label; /* label of each class (label[k]) */
67
+ int *nSV; /* number of SVs for each class (nSV[k]) */
68
+ /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
69
+ /* XXX */
70
+ int free_sv; /* 1 if svm_model is created by svm_load_model*/
71
+ /* 0 if svm_model is created by svm_train */
72
+ };
73
+
74
+ struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
75
+ void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
76
+
77
+ int svm_save_model(const char *model_file_name, const struct svm_model *model);
78
+ struct svm_model *svm_load_model(const char *model_file_name);
79
+
80
+ int svm_get_svm_type(const struct svm_model *model);
81
+ int svm_get_nr_class(const struct svm_model *model);
82
+ void svm_get_labels(const struct svm_model *model, int *label);
83
+ void svm_get_sv_indices(const struct svm_model *model, int *sv_indices);
84
+ int svm_get_nr_sv(const struct svm_model *model);
85
+ double svm_get_svr_probability(const struct svm_model *model);
86
+
87
+ double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
88
+ double svm_predict(const struct svm_model *model, const struct svm_node *x);
89
+ double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
90
+
91
+ void svm_free_model_content(struct svm_model *model_ptr);
92
+ void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
93
+ void svm_destroy_param(struct svm_parameter *param);
94
+
95
+ const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
96
+ int svm_check_probability_model(const struct svm_model *model);
97
+
98
+ void svm_set_print_string_function(void (*print_func)(const char *));
99
+
100
+ #ifdef __cplusplus
101
+ }
102
+ #endif
103
+
104
+ #endif /* _LIBSVM_H */
@@ -114,6 +114,9 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
114
114
  xfree_svm_problem(problem);
115
115
  xfree_svm_parameter(param);
116
116
 
117
+ RB_GC_GUARD(x_val);
118
+ RB_GC_GUARD(y_val);
119
+
117
120
  return model_hash;
118
121
  }
119
122
 
@@ -229,6 +232,9 @@ VALUE cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash, V
229
232
  xfree_svm_problem(problem);
230
233
  xfree_svm_parameter(param);
231
234
 
235
+ RB_GC_GUARD(x_val);
236
+ RB_GC_GUARD(y_val);
237
+
232
238
  return t_val;
233
239
  }
234
240
 
@@ -254,9 +260,10 @@ VALUE predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
254
260
  size_t y_shape[1];
255
261
  VALUE y_val;
256
262
  double* y_pt;
257
- int i, j;
263
+ int i, j, k;
258
264
  int n_samples;
259
265
  int n_features;
266
+ int n_nonzero_features;
260
267
 
261
268
  /* Obtain C data structures. */
262
269
  if (CLASS_OF(x_val) != numo_cDFloat) {
@@ -285,21 +292,17 @@ VALUE predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
285
292
  x_pt = (double*)na_get_pointer_for_read(x_val);
286
293
 
287
294
  /* Predict values. */
288
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
289
- x_nodes[n_features].index = -1;
290
- x_nodes[n_features].value = 0.0;
291
295
  for (i = 0; i < n_samples; i++) {
292
- for (j = 0; j < n_features; j++) {
293
- x_nodes[j].index = j + 1;
294
- x_nodes[j].value = (double)x_pt[i * n_features + j];
295
- }
296
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
296
297
  y_pt[i] = svm_predict(model, x_nodes);
298
+ xfree(x_nodes);
297
299
  }
298
300
 
299
- xfree(x_nodes);
300
301
  xfree_svm_model(model);
301
302
  xfree_svm_parameter(param);
302
303
 
304
+ RB_GC_GUARD(x_val);
305
+
303
306
  return y_val;
304
307
  }
305
308
 
@@ -368,40 +371,30 @@ VALUE decision_function(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_h
368
371
 
369
372
  /* Predict values. */
370
373
  if (model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) {
371
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
372
- x_nodes[n_features].index = -1;
373
- x_nodes[n_features].value = 0.0;
374
374
  for (i = 0; i < n_samples; i++) {
375
- for (j = 0; j < n_features; j++) {
376
- x_nodes[j].index = j + 1;
377
- x_nodes[j].value = (double)x_pt[i * n_features + j];
378
- }
375
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
379
376
  svm_predict_values(model, x_nodes, &y_pt[i]);
377
+ xfree(x_nodes);
380
378
  }
381
- xfree(x_nodes);
382
379
  } else {
383
380
  y_cols = (int)y_shape[1];
384
381
  dec_values = ALLOC_N(double, y_cols);
385
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
386
- x_nodes[n_features].index = -1;
387
- x_nodes[n_features].value = 0.0;
388
382
  for (i = 0; i < n_samples; i++) {
389
- for (j = 0; j < n_features; j++) {
390
- x_nodes[j].index = j + 1;
391
- x_nodes[j].value = (double)x_pt[i * n_features + j];
392
- }
383
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
393
384
  svm_predict_values(model, x_nodes, dec_values);
385
+ xfree(x_nodes);
394
386
  for (j = 0; j < y_cols; j++) {
395
387
  y_pt[i * y_cols + j] = dec_values[j];
396
388
  }
397
389
  }
398
- xfree(x_nodes);
399
390
  xfree(dec_values);
400
391
  }
401
392
 
402
393
  xfree_svm_model(model);
403
394
  xfree_svm_parameter(param);
404
395
 
396
+ RB_GC_GUARD(x_val);
397
+
405
398
  return y_val;
406
399
  }
407
400
 
@@ -463,26 +456,22 @@ VALUE predict_proba(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
463
456
 
464
457
  /* Predict values. */
465
458
  probs = ALLOC_N(double, model->nr_class);
466
- x_nodes = ALLOC_N(struct svm_node, n_features + 1);
467
- x_nodes[n_features].index = -1;
468
- x_nodes[n_features].value = 0.0;
469
459
  for (i = 0; i < n_samples; i++) {
470
- for (j = 0; j < n_features; j++) {
471
- x_nodes[j].index = j + 1;
472
- x_nodes[j].value = (double)x_pt[i * n_features + j];
473
- }
460
+ x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
474
461
  svm_predict_probability(model, x_nodes, probs);
462
+ xfree(x_nodes);
475
463
  for (j = 0; j < model->nr_class; j++) {
476
464
  y_pt[i * model->nr_class + j] = probs[j];
477
465
  }
478
466
  }
479
- xfree(x_nodes);
480
467
  xfree(probs);
481
468
  }
482
469
 
483
470
  xfree_svm_model(model);
484
471
  xfree_svm_parameter(param);
485
472
 
473
+ RB_GC_GUARD(x_val);
474
+
486
475
  return y_val;
487
476
  }
488
477
 
@@ -516,6 +505,8 @@ VALUE load_svm_model(VALUE self, VALUE filename)
516
505
  rb_ary_store(res, 0, param_hash);
517
506
  rb_ary_store(res, 1, model_hash);
518
507
 
508
+ RB_GC_GUARD(filename);
509
+
519
510
  return res;
520
511
  }
521
512
 
@@ -550,6 +541,8 @@ VALUE save_svm_model(VALUE self, VALUE filename, VALUE param_hash, VALUE model_h
550
541
  return Qfalse;
551
542
  }
552
543
 
544
+ RB_GC_GUARD(filename);
545
+
553
546
  return Qtrue;
554
547
  }
555
548
 
@@ -35,13 +35,13 @@ struct svm_parameter* rb_hash_to_svm_parameter(VALUE param_hash)
35
35
  param->weight_label = NULL;
36
36
  if (!NIL_P(el)) {
37
37
  param->weight_label = ALLOC_N(int, param->nr_weight);
38
- memcpy(param->weight_label, (int32_t*)na_get_pointer_for_read(el), param->nr_weight);
38
+ memcpy(param->weight_label, (int32_t*)na_get_pointer_for_read(el), param->nr_weight * sizeof(int32_t));
39
39
  }
40
40
  el = rb_hash_aref(param_hash, ID2SYM(rb_intern("weight")));
41
41
  param->weight = NULL;
42
42
  if (!NIL_P(el)) {
43
43
  param->weight = ALLOC_N(double, param->nr_weight);
44
- memcpy(param->weight, (double*)na_get_pointer_for_read(el), param->nr_weight);
44
+ memcpy(param->weight, (double*)na_get_pointer_for_read(el), param->nr_weight * sizeof(double));
45
45
  }
46
46
  return param;
47
47
  }
@@ -29,9 +29,12 @@ struct svm_problem* dataset_to_svm_problem(VALUE x_val, VALUE y_val)
29
29
  narray_t* x_nary;
30
30
  double* x_pt;
31
31
  double* y_pt;
32
- int i, j;
32
+ int i, j, k;
33
33
  int n_samples;
34
34
  int n_features;
35
+ int n_nonzero_features;
36
+ int is_padded;
37
+ int last_feature_id;
35
38
 
36
39
  GetNArray(x_val, x_nary);
37
40
  n_samples = (int)NA_SHAPE(x_nary)[0];
@@ -43,16 +46,45 @@ struct svm_problem* dataset_to_svm_problem(VALUE x_val, VALUE y_val)
43
46
  problem->l = n_samples;
44
47
  problem->x = ALLOC_N(struct svm_node*, n_samples);
45
48
  problem->y = ALLOC_N(double, n_samples);
49
+
50
+ is_padded = 0;
46
51
  for (i = 0; i < n_samples; i++) {
47
- problem->x[i] = ALLOC_N(struct svm_node, n_features + 1);
52
+ n_nonzero_features = 0;
48
53
  for (j = 0; j < n_features; j++) {
49
- problem->x[i][j].index = j + 1;
50
- problem->x[i][j].value = x_pt[i * n_features + j];
54
+ if (x_pt[i * n_features + j] != 0.0) {
55
+ n_nonzero_features += 1;
56
+ last_feature_id = j + 1;
57
+ }
58
+ }
59
+ if (is_padded == 0 && last_feature_id == n_features) {
60
+ is_padded = 1;
61
+ }
62
+ if (is_padded == 1) {
63
+ problem->x[i] = ALLOC_N(struct svm_node, n_nonzero_features + 1);
64
+ } else {
65
+ problem->x[i] = ALLOC_N(struct svm_node, n_nonzero_features + 2);
66
+ }
67
+ for (j = 0, k = 0; j < n_features; j++) {
68
+ if (x_pt[i * n_features + j] != 0.0) {
69
+ problem->x[i][k].index = j + 1;
70
+ problem->x[i][k].value = (double)x_pt[i * n_features + j];
71
+ k++;
72
+ }
73
+ }
74
+ if (is_padded == 1) {
75
+ problem->x[i][n_nonzero_features].index = -1;
76
+ problem->x[i][n_nonzero_features].value = 0.0;
77
+ } else {
78
+ problem->x[i][n_nonzero_features].index = n_features;
79
+ problem->x[i][n_nonzero_features].value = 0.0;
80
+ problem->x[i][n_nonzero_features + 1].index = -1;
81
+ problem->x[i][n_nonzero_features + 1].value = 0.0;
51
82
  }
52
- problem->x[i][n_features].index = -1;
53
- problem->x[i][n_features].value = 0.0;
54
83
  problem->y[i] = y_pt[i];
55
84
  }
56
85
 
86
+ RB_GC_GUARD(x_val);
87
+ RB_GC_GUARD(y_val);
88
+
57
89
  return problem;
58
90
  }
@@ -3,6 +3,6 @@
3
3
  module Numo
4
4
  module Libsvm
5
5
  # The version of Numo::Libsvm you are using.
6
- VERSION = '0.4.0'
6
+ VERSION = '1.1.0'
7
7
  end
8
8
  end
data/numo-libsvm.gemspec CHANGED
@@ -28,14 +28,23 @@ Gem::Specification.new do |spec|
28
28
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
29
29
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
30
30
  end
31
+
32
+ gem_dir = File.expand_path(__dir__) + '/'
33
+ submodule_path = `git submodule --quiet foreach pwd`.split($OUTPUT_RECORD_SEPARATOR).first
34
+ submodule_relative_path = submodule_path.sub gem_dir, ''
35
+ spec.files << "#{submodule_relative_path}/svm.cpp"
36
+ spec.files << "#{submodule_relative_path}/svm.h"
37
+
31
38
  spec.bindir = 'exe'
32
39
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
40
  spec.require_paths = ['lib']
34
41
  spec.extensions = ['ext/numo/libsvm/extconf.rb']
35
42
 
43
+ spec.metadata = {
44
+ 'homepage_uri' => 'https://github.com/yoshoku/numo-libsvm',
45
+ 'source_code_uri' => 'https://github.com/yoshoku/numo-libsvm',
46
+ 'documentation_uri' => 'https://yoshoku.github.io/numo-libsvm/doc/'
47
+ }
48
+
36
49
  spec.add_runtime_dependency 'numo-narray', '~> 0.9.1'
37
- spec.add_development_dependency 'bundler', '~> 2.0'
38
- spec.add_development_dependency 'rake', '~> 10.0'
39
- spec.add_development_dependency 'rake-compiler', '~> 1.0'
40
- spec.add_development_dependency 'rspec', '~> 3.0'
41
50
  end
@@ -0,0 +1,65 @@
1
+ module Numo
2
+ module Libsvm
3
+ module SvmType
4
+ C_SVC: Integer
5
+ NU_SVC: Integer
6
+ ONE_CLASS: Integer
7
+ EPSILON_SVR: Integer
8
+ NU_SVR: Integer
9
+ end
10
+
11
+ module KenelType
12
+ LINEAR: Integer
13
+ POLY: Integer
14
+ RBF: Integer
15
+ SIGMOID: Integer
16
+ PRECOMPUTED: Integer
17
+ end
18
+
19
+ LIBSVM_VERSION: Integer
20
+ VERSION: String
21
+
22
+ type model = {
23
+ nr_class: Integer,
24
+ l: Integer,
25
+ SV: Numo::DFloat,
26
+ sv_coef: Numo::DFloat,
27
+ rho: Numo::DFloat,
28
+ probA: Numo::DFloat,
29
+ probB: Numo::DFloat,
30
+ sv_indices: Numo::Int32,
31
+ label: Numo::Int32,
32
+ nSV: Numo::Int32,
33
+ free_sv: Integer
34
+ }
35
+
36
+ type param = {
37
+ svm_type: Integer?,
38
+ kernel_type: Integer?,
39
+ degree: Integer?,
40
+ gamma: Float?,
41
+ coef0: Float?,
42
+ cache_size: Float?,
43
+ eps: Float?,
44
+ C: Float?,
45
+ nr_weight: Integer?,
46
+ weight_label: Numo::Int32?,
47
+ weight: Numo::DFloat?,
48
+ nu: Float?,
49
+ p: Float?,
50
+ shrinking: bool?,
51
+ probability: bool?,
52
+ verbose: bool?,
53
+ random_seed: Integer?
54
+ }
55
+
56
+ def self?.cv: (Numo::DFloat x, Numo::DFloat y, param, Integer n_folds) -> Numo::DFloat
57
+ def self?.train: (Numo::DFloat x, Numo::DFloat y, param) -> model
58
+ def self?.predict: (Numo::DFloat x, param, model) -> Numo::DFloat
59
+ def self?.predict_proba: (Numo::DFloat x, param, model) -> Numo::DFloat
60
+ def self?.decision_function: (Numo::DFloat x, param, model) -> Numo::DFloat
61
+ def self?.save_svm_model: (String filename, param, model) -> bool
62
+ def self?.load_svm_model: (String filename) -> [param, model]
63
+ end
64
+ end
65
+
data/sig/patch.rbs ADDED
@@ -0,0 +1,8 @@
1
+ module Numo
2
+ class NArray
3
+ end
4
+ class DFloat < NArray
5
+ end
6
+ class Int32 < NArray
7
+ end
8
+ end