numo-libsvm 0.4.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/build.yml +28 -0
- data/.gitmodules +3 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +1 -1
- data/README.md +5 -13
- data/Steepfile +20 -0
- data/ext/numo/libsvm/converter.c +57 -15
- data/ext/numo/libsvm/converter.h +2 -1
- data/ext/numo/libsvm/extconf.rb +7 -11
- data/ext/numo/libsvm/libsvm/svm.cpp +3182 -0
- data/ext/numo/libsvm/libsvm/svm.h +104 -0
- data/ext/numo/libsvm/libsvmext.c +26 -33
- data/ext/numo/libsvm/svm_parameter.c +2 -2
- data/ext/numo/libsvm/svm_problem.c +38 -6
- data/lib/numo/libsvm/version.rb +1 -1
- data/numo-libsvm.gemspec +13 -4
- data/sig/numo/libsvm.rbs +65 -0
- data/sig/patch.rbs +8 -0
- metadata +14 -62
- data/.travis.yml +0 -14
@@ -0,0 +1,104 @@
|
|
1
|
+
#ifndef _LIBSVM_H
|
2
|
+
#define _LIBSVM_H
|
3
|
+
|
4
|
+
#define LIBSVM_VERSION 324
|
5
|
+
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
9
|
+
|
10
|
+
extern int libsvm_version;
|
11
|
+
|
12
|
+
struct svm_node
|
13
|
+
{
|
14
|
+
int index;
|
15
|
+
double value;
|
16
|
+
};
|
17
|
+
|
18
|
+
struct svm_problem
|
19
|
+
{
|
20
|
+
int l;
|
21
|
+
double *y;
|
22
|
+
struct svm_node **x;
|
23
|
+
};
|
24
|
+
|
25
|
+
enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */
|
26
|
+
enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
|
27
|
+
|
28
|
+
struct svm_parameter
|
29
|
+
{
|
30
|
+
int svm_type;
|
31
|
+
int kernel_type;
|
32
|
+
int degree; /* for poly */
|
33
|
+
double gamma; /* for poly/rbf/sigmoid */
|
34
|
+
double coef0; /* for poly/sigmoid */
|
35
|
+
|
36
|
+
/* these are for training only */
|
37
|
+
double cache_size; /* in MB */
|
38
|
+
double eps; /* stopping criteria */
|
39
|
+
double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
|
40
|
+
int nr_weight; /* for C_SVC */
|
41
|
+
int *weight_label; /* for C_SVC */
|
42
|
+
double* weight; /* for C_SVC */
|
43
|
+
double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
|
44
|
+
double p; /* for EPSILON_SVR */
|
45
|
+
int shrinking; /* use the shrinking heuristics */
|
46
|
+
int probability; /* do probability estimates */
|
47
|
+
};
|
48
|
+
|
49
|
+
//
|
50
|
+
// svm_model
|
51
|
+
//
|
52
|
+
struct svm_model
|
53
|
+
{
|
54
|
+
struct svm_parameter param; /* parameter */
|
55
|
+
int nr_class; /* number of classes, = 2 in regression/one class svm */
|
56
|
+
int l; /* total #SV */
|
57
|
+
struct svm_node **SV; /* SVs (SV[l]) */
|
58
|
+
double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
|
59
|
+
double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
|
60
|
+
double *probA; /* pariwise probability information */
|
61
|
+
double *probB;
|
62
|
+
int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
|
63
|
+
|
64
|
+
/* for classification only */
|
65
|
+
|
66
|
+
int *label; /* label of each class (label[k]) */
|
67
|
+
int *nSV; /* number of SVs for each class (nSV[k]) */
|
68
|
+
/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
|
69
|
+
/* XXX */
|
70
|
+
int free_sv; /* 1 if svm_model is created by svm_load_model*/
|
71
|
+
/* 0 if svm_model is created by svm_train */
|
72
|
+
};
|
73
|
+
|
74
|
+
struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
|
75
|
+
void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
|
76
|
+
|
77
|
+
int svm_save_model(const char *model_file_name, const struct svm_model *model);
|
78
|
+
struct svm_model *svm_load_model(const char *model_file_name);
|
79
|
+
|
80
|
+
int svm_get_svm_type(const struct svm_model *model);
|
81
|
+
int svm_get_nr_class(const struct svm_model *model);
|
82
|
+
void svm_get_labels(const struct svm_model *model, int *label);
|
83
|
+
void svm_get_sv_indices(const struct svm_model *model, int *sv_indices);
|
84
|
+
int svm_get_nr_sv(const struct svm_model *model);
|
85
|
+
double svm_get_svr_probability(const struct svm_model *model);
|
86
|
+
|
87
|
+
double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
|
88
|
+
double svm_predict(const struct svm_model *model, const struct svm_node *x);
|
89
|
+
double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
|
90
|
+
|
91
|
+
void svm_free_model_content(struct svm_model *model_ptr);
|
92
|
+
void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
|
93
|
+
void svm_destroy_param(struct svm_parameter *param);
|
94
|
+
|
95
|
+
const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
|
96
|
+
int svm_check_probability_model(const struct svm_model *model);
|
97
|
+
|
98
|
+
void svm_set_print_string_function(void (*print_func)(const char *));
|
99
|
+
|
100
|
+
#ifdef __cplusplus
|
101
|
+
}
|
102
|
+
#endif
|
103
|
+
|
104
|
+
#endif /* _LIBSVM_H */
|
data/ext/numo/libsvm/libsvmext.c
CHANGED
@@ -114,6 +114,9 @@ VALUE train(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash)
|
|
114
114
|
xfree_svm_problem(problem);
|
115
115
|
xfree_svm_parameter(param);
|
116
116
|
|
117
|
+
RB_GC_GUARD(x_val);
|
118
|
+
RB_GC_GUARD(y_val);
|
119
|
+
|
117
120
|
return model_hash;
|
118
121
|
}
|
119
122
|
|
@@ -229,6 +232,9 @@ VALUE cross_validation(VALUE self, VALUE x_val, VALUE y_val, VALUE param_hash, V
|
|
229
232
|
xfree_svm_problem(problem);
|
230
233
|
xfree_svm_parameter(param);
|
231
234
|
|
235
|
+
RB_GC_GUARD(x_val);
|
236
|
+
RB_GC_GUARD(y_val);
|
237
|
+
|
232
238
|
return t_val;
|
233
239
|
}
|
234
240
|
|
@@ -254,9 +260,10 @@ VALUE predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
|
|
254
260
|
size_t y_shape[1];
|
255
261
|
VALUE y_val;
|
256
262
|
double* y_pt;
|
257
|
-
int i, j;
|
263
|
+
int i, j, k;
|
258
264
|
int n_samples;
|
259
265
|
int n_features;
|
266
|
+
int n_nonzero_features;
|
260
267
|
|
261
268
|
/* Obtain C data structures. */
|
262
269
|
if (CLASS_OF(x_val) != numo_cDFloat) {
|
@@ -285,21 +292,17 @@ VALUE predict(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
|
|
285
292
|
x_pt = (double*)na_get_pointer_for_read(x_val);
|
286
293
|
|
287
294
|
/* Predict values. */
|
288
|
-
x_nodes = ALLOC_N(struct svm_node, n_features + 1);
|
289
|
-
x_nodes[n_features].index = -1;
|
290
|
-
x_nodes[n_features].value = 0.0;
|
291
295
|
for (i = 0; i < n_samples; i++) {
|
292
|
-
|
293
|
-
x_nodes[j].index = j + 1;
|
294
|
-
x_nodes[j].value = (double)x_pt[i * n_features + j];
|
295
|
-
}
|
296
|
+
x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
|
296
297
|
y_pt[i] = svm_predict(model, x_nodes);
|
298
|
+
xfree(x_nodes);
|
297
299
|
}
|
298
300
|
|
299
|
-
xfree(x_nodes);
|
300
301
|
xfree_svm_model(model);
|
301
302
|
xfree_svm_parameter(param);
|
302
303
|
|
304
|
+
RB_GC_GUARD(x_val);
|
305
|
+
|
303
306
|
return y_val;
|
304
307
|
}
|
305
308
|
|
@@ -368,40 +371,30 @@ VALUE decision_function(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_h
|
|
368
371
|
|
369
372
|
/* Predict values. */
|
370
373
|
if (model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) {
|
371
|
-
x_nodes = ALLOC_N(struct svm_node, n_features + 1);
|
372
|
-
x_nodes[n_features].index = -1;
|
373
|
-
x_nodes[n_features].value = 0.0;
|
374
374
|
for (i = 0; i < n_samples; i++) {
|
375
|
-
|
376
|
-
x_nodes[j].index = j + 1;
|
377
|
-
x_nodes[j].value = (double)x_pt[i * n_features + j];
|
378
|
-
}
|
375
|
+
x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
|
379
376
|
svm_predict_values(model, x_nodes, &y_pt[i]);
|
377
|
+
xfree(x_nodes);
|
380
378
|
}
|
381
|
-
xfree(x_nodes);
|
382
379
|
} else {
|
383
380
|
y_cols = (int)y_shape[1];
|
384
381
|
dec_values = ALLOC_N(double, y_cols);
|
385
|
-
x_nodes = ALLOC_N(struct svm_node, n_features + 1);
|
386
|
-
x_nodes[n_features].index = -1;
|
387
|
-
x_nodes[n_features].value = 0.0;
|
388
382
|
for (i = 0; i < n_samples; i++) {
|
389
|
-
|
390
|
-
x_nodes[j].index = j + 1;
|
391
|
-
x_nodes[j].value = (double)x_pt[i * n_features + j];
|
392
|
-
}
|
383
|
+
x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
|
393
384
|
svm_predict_values(model, x_nodes, dec_values);
|
385
|
+
xfree(x_nodes);
|
394
386
|
for (j = 0; j < y_cols; j++) {
|
395
387
|
y_pt[i * y_cols + j] = dec_values[j];
|
396
388
|
}
|
397
389
|
}
|
398
|
-
xfree(x_nodes);
|
399
390
|
xfree(dec_values);
|
400
391
|
}
|
401
392
|
|
402
393
|
xfree_svm_model(model);
|
403
394
|
xfree_svm_parameter(param);
|
404
395
|
|
396
|
+
RB_GC_GUARD(x_val);
|
397
|
+
|
405
398
|
return y_val;
|
406
399
|
}
|
407
400
|
|
@@ -463,26 +456,22 @@ VALUE predict_proba(VALUE self, VALUE x_val, VALUE param_hash, VALUE model_hash)
|
|
463
456
|
|
464
457
|
/* Predict values. */
|
465
458
|
probs = ALLOC_N(double, model->nr_class);
|
466
|
-
x_nodes = ALLOC_N(struct svm_node, n_features + 1);
|
467
|
-
x_nodes[n_features].index = -1;
|
468
|
-
x_nodes[n_features].value = 0.0;
|
469
459
|
for (i = 0; i < n_samples; i++) {
|
470
|
-
|
471
|
-
x_nodes[j].index = j + 1;
|
472
|
-
x_nodes[j].value = (double)x_pt[i * n_features + j];
|
473
|
-
}
|
460
|
+
x_nodes = dbl_vec_to_svm_node(&x_pt[i * n_features], n_features);
|
474
461
|
svm_predict_probability(model, x_nodes, probs);
|
462
|
+
xfree(x_nodes);
|
475
463
|
for (j = 0; j < model->nr_class; j++) {
|
476
464
|
y_pt[i * model->nr_class + j] = probs[j];
|
477
465
|
}
|
478
466
|
}
|
479
|
-
xfree(x_nodes);
|
480
467
|
xfree(probs);
|
481
468
|
}
|
482
469
|
|
483
470
|
xfree_svm_model(model);
|
484
471
|
xfree_svm_parameter(param);
|
485
472
|
|
473
|
+
RB_GC_GUARD(x_val);
|
474
|
+
|
486
475
|
return y_val;
|
487
476
|
}
|
488
477
|
|
@@ -516,6 +505,8 @@ VALUE load_svm_model(VALUE self, VALUE filename)
|
|
516
505
|
rb_ary_store(res, 0, param_hash);
|
517
506
|
rb_ary_store(res, 1, model_hash);
|
518
507
|
|
508
|
+
RB_GC_GUARD(filename);
|
509
|
+
|
519
510
|
return res;
|
520
511
|
}
|
521
512
|
|
@@ -550,6 +541,8 @@ VALUE save_svm_model(VALUE self, VALUE filename, VALUE param_hash, VALUE model_h
|
|
550
541
|
return Qfalse;
|
551
542
|
}
|
552
543
|
|
544
|
+
RB_GC_GUARD(filename);
|
545
|
+
|
553
546
|
return Qtrue;
|
554
547
|
}
|
555
548
|
|
@@ -35,13 +35,13 @@ struct svm_parameter* rb_hash_to_svm_parameter(VALUE param_hash)
|
|
35
35
|
param->weight_label = NULL;
|
36
36
|
if (!NIL_P(el)) {
|
37
37
|
param->weight_label = ALLOC_N(int, param->nr_weight);
|
38
|
-
memcpy(param->weight_label, (int32_t*)na_get_pointer_for_read(el), param->nr_weight);
|
38
|
+
memcpy(param->weight_label, (int32_t*)na_get_pointer_for_read(el), param->nr_weight * sizeof(int32_t));
|
39
39
|
}
|
40
40
|
el = rb_hash_aref(param_hash, ID2SYM(rb_intern("weight")));
|
41
41
|
param->weight = NULL;
|
42
42
|
if (!NIL_P(el)) {
|
43
43
|
param->weight = ALLOC_N(double, param->nr_weight);
|
44
|
-
memcpy(param->weight, (double*)na_get_pointer_for_read(el), param->nr_weight);
|
44
|
+
memcpy(param->weight, (double*)na_get_pointer_for_read(el), param->nr_weight * sizeof(double));
|
45
45
|
}
|
46
46
|
return param;
|
47
47
|
}
|
@@ -29,9 +29,12 @@ struct svm_problem* dataset_to_svm_problem(VALUE x_val, VALUE y_val)
|
|
29
29
|
narray_t* x_nary;
|
30
30
|
double* x_pt;
|
31
31
|
double* y_pt;
|
32
|
-
int i, j;
|
32
|
+
int i, j, k;
|
33
33
|
int n_samples;
|
34
34
|
int n_features;
|
35
|
+
int n_nonzero_features;
|
36
|
+
int is_padded;
|
37
|
+
int last_feature_id;
|
35
38
|
|
36
39
|
GetNArray(x_val, x_nary);
|
37
40
|
n_samples = (int)NA_SHAPE(x_nary)[0];
|
@@ -43,16 +46,45 @@ struct svm_problem* dataset_to_svm_problem(VALUE x_val, VALUE y_val)
|
|
43
46
|
problem->l = n_samples;
|
44
47
|
problem->x = ALLOC_N(struct svm_node*, n_samples);
|
45
48
|
problem->y = ALLOC_N(double, n_samples);
|
49
|
+
|
50
|
+
is_padded = 0;
|
46
51
|
for (i = 0; i < n_samples; i++) {
|
47
|
-
|
52
|
+
n_nonzero_features = 0;
|
48
53
|
for (j = 0; j < n_features; j++) {
|
49
|
-
|
50
|
-
|
54
|
+
if (x_pt[i * n_features + j] != 0.0) {
|
55
|
+
n_nonzero_features += 1;
|
56
|
+
last_feature_id = j + 1;
|
57
|
+
}
|
58
|
+
}
|
59
|
+
if (is_padded == 0 && last_feature_id == n_features) {
|
60
|
+
is_padded = 1;
|
61
|
+
}
|
62
|
+
if (is_padded == 1) {
|
63
|
+
problem->x[i] = ALLOC_N(struct svm_node, n_nonzero_features + 1);
|
64
|
+
} else {
|
65
|
+
problem->x[i] = ALLOC_N(struct svm_node, n_nonzero_features + 2);
|
66
|
+
}
|
67
|
+
for (j = 0, k = 0; j < n_features; j++) {
|
68
|
+
if (x_pt[i * n_features + j] != 0.0) {
|
69
|
+
problem->x[i][k].index = j + 1;
|
70
|
+
problem->x[i][k].value = (double)x_pt[i * n_features + j];
|
71
|
+
k++;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
if (is_padded == 1) {
|
75
|
+
problem->x[i][n_nonzero_features].index = -1;
|
76
|
+
problem->x[i][n_nonzero_features].value = 0.0;
|
77
|
+
} else {
|
78
|
+
problem->x[i][n_nonzero_features].index = n_features;
|
79
|
+
problem->x[i][n_nonzero_features].value = 0.0;
|
80
|
+
problem->x[i][n_nonzero_features + 1].index = -1;
|
81
|
+
problem->x[i][n_nonzero_features + 1].value = 0.0;
|
51
82
|
}
|
52
|
-
problem->x[i][n_features].index = -1;
|
53
|
-
problem->x[i][n_features].value = 0.0;
|
54
83
|
problem->y[i] = y_pt[i];
|
55
84
|
}
|
56
85
|
|
86
|
+
RB_GC_GUARD(x_val);
|
87
|
+
RB_GC_GUARD(y_val);
|
88
|
+
|
57
89
|
return problem;
|
58
90
|
}
|
data/lib/numo/libsvm/version.rb
CHANGED
data/numo-libsvm.gemspec
CHANGED
@@ -28,14 +28,23 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
29
29
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
30
30
|
end
|
31
|
+
|
32
|
+
gem_dir = File.expand_path(__dir__) + '/'
|
33
|
+
submodule_path = `git submodule --quiet foreach pwd`.split($OUTPUT_RECORD_SEPARATOR).first
|
34
|
+
submodule_relative_path = submodule_path.sub gem_dir, ''
|
35
|
+
spec.files << "#{submodule_relative_path}/svm.cpp"
|
36
|
+
spec.files << "#{submodule_relative_path}/svm.h"
|
37
|
+
|
31
38
|
spec.bindir = 'exe'
|
32
39
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
33
40
|
spec.require_paths = ['lib']
|
34
41
|
spec.extensions = ['ext/numo/libsvm/extconf.rb']
|
35
42
|
|
43
|
+
spec.metadata = {
|
44
|
+
'homepage_uri' => 'https://github.com/yoshoku/numo-libsvm',
|
45
|
+
'source_code_uri' => 'https://github.com/yoshoku/numo-libsvm',
|
46
|
+
'documentation_uri' => 'https://yoshoku.github.io/numo-libsvm/doc/'
|
47
|
+
}
|
48
|
+
|
36
49
|
spec.add_runtime_dependency 'numo-narray', '~> 0.9.1'
|
37
|
-
spec.add_development_dependency 'bundler', '~> 2.0'
|
38
|
-
spec.add_development_dependency 'rake', '~> 10.0'
|
39
|
-
spec.add_development_dependency 'rake-compiler', '~> 1.0'
|
40
|
-
spec.add_development_dependency 'rspec', '~> 3.0'
|
41
50
|
end
|
data/sig/numo/libsvm.rbs
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
module Numo
|
2
|
+
module Libsvm
|
3
|
+
module SvmType
|
4
|
+
C_SVC: Integer
|
5
|
+
NU_SVC: Integer
|
6
|
+
ONE_CLASS: Integer
|
7
|
+
EPSILON_SVR: Integer
|
8
|
+
NU_SVR: Integer
|
9
|
+
end
|
10
|
+
|
11
|
+
module KenelType
|
12
|
+
LINEAR: Integer
|
13
|
+
POLY: Integer
|
14
|
+
RBF: Integer
|
15
|
+
SIGMOID: Integer
|
16
|
+
PRECOMPUTED: Integer
|
17
|
+
end
|
18
|
+
|
19
|
+
LIBSVM_VERSION: Integer
|
20
|
+
VERSION: String
|
21
|
+
|
22
|
+
type model = {
|
23
|
+
nr_class: Integer,
|
24
|
+
l: Integer,
|
25
|
+
SV: Numo::DFloat,
|
26
|
+
sv_coef: Numo::DFloat,
|
27
|
+
rho: Numo::DFloat,
|
28
|
+
probA: Numo::DFloat,
|
29
|
+
probB: Numo::DFloat,
|
30
|
+
sv_indices: Numo::Int32,
|
31
|
+
label: Numo::Int32,
|
32
|
+
nSV: Numo::Int32,
|
33
|
+
free_sv: Integer
|
34
|
+
}
|
35
|
+
|
36
|
+
type param = {
|
37
|
+
svm_type: Integer?,
|
38
|
+
kernel_type: Integer?,
|
39
|
+
degree: Integer?,
|
40
|
+
gamma: Float?,
|
41
|
+
coef0: Float?,
|
42
|
+
cache_size: Float?,
|
43
|
+
eps: Float?,
|
44
|
+
C: Float?,
|
45
|
+
nr_weight: Integer?,
|
46
|
+
weight_label: Numo::Int32?,
|
47
|
+
weight: Numo::DFloat?,
|
48
|
+
nu: Float?,
|
49
|
+
p: Float?,
|
50
|
+
shrinking: bool?,
|
51
|
+
probability: bool?,
|
52
|
+
verbose: bool?,
|
53
|
+
random_seed: Integer?
|
54
|
+
}
|
55
|
+
|
56
|
+
def self?.cv: (Numo::DFloat x, Numo::DFloat y, param, Integer n_folds) -> Numo::DFloat
|
57
|
+
def self?.train: (Numo::DFloat x, Numo::DFloat y, param) -> model
|
58
|
+
def self?.predict: (Numo::DFloat x, param, model) -> Numo::DFloat
|
59
|
+
def self?.predict_proba: (Numo::DFloat x, param, model) -> Numo::DFloat
|
60
|
+
def self?.decision_function: (Numo::DFloat x, param, model) -> Numo::DFloat
|
61
|
+
def self?.save_svm_model: (String filename, param, model) -> bool
|
62
|
+
def self?.load_svm_model: (String filename) -> [param, model]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|