numo-libsvm 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/numo/libsvm/libsvmext.hpp +9 -3
- data/ext/numo/libsvm/src/svm.cpp +134 -18
- data/ext/numo/libsvm/src/svm.h +2 -1
- data/lib/numo/libsvm/version.rb +1 -1
- data/sig/numo/libsvm.rbs +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37613a10119b2974024687c3f781d271ebaa30b15ccff6100df17c32642dcd2d
|
4
|
+
data.tar.gz: 210e072576d4fa859364fdb522e6f02c9be12458e5ed0647fcd8bebedfdb3e84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 142e15fe744460f679f7bd7978f1654e2e71a40a49ce3a414c3c0e68b507d6d9c286eec9edd37a10f5b9fc60a940079c42e59d1283ba607cd7748b80d16fa12f
|
7
|
+
data.tar.gz: 67909b309d9a186e77dc716cc2e8b1299d0be3baf10be7be87fc89f2bc67a47b087063827be2f8ba750378d354f745c58dec0feb072f8eea73c883c3c034a669
|
data/CHANGELOG.md
CHANGED
@@ -48,6 +48,8 @@ typedef struct svm_problem LibSvmProblem;
|
|
48
48
|
|
49
49
|
void printNull(const char* s) {}
|
50
50
|
|
51
|
+
#define NR_MARKS 10
|
52
|
+
|
51
53
|
/** CONVERTERS */
|
52
54
|
VALUE convertVectorXiToNArray(const int* const arr, const int size) {
|
53
55
|
size_t shape[1] = {(size_t)size};
|
@@ -215,6 +217,8 @@ LibSvmModel* convertHashToLibSvmModel(VALUE model_hash) {
|
|
215
217
|
model->probA = convertNArrayToVectorXd(el);
|
216
218
|
el = rb_hash_aref(model_hash, ID2SYM(rb_intern("probB")));
|
217
219
|
model->probB = convertNArrayToVectorXd(el);
|
220
|
+
el = rb_hash_aref(model_hash, ID2SYM(rb_intern("prob_density_marks")));
|
221
|
+
model->prob_density_marks = convertNArrayToVectorXd(el);
|
218
222
|
el = rb_hash_aref(model_hash, ID2SYM(rb_intern("sv_indices")));
|
219
223
|
model->sv_indices = convertNArrayToVectorXi(el);
|
220
224
|
el = rb_hash_aref(model_hash, ID2SYM(rb_intern("label")));
|
@@ -234,6 +238,7 @@ VALUE convertLibSvmModelToHash(const LibSvmModel* const model) {
|
|
234
238
|
VALUE intercepts = model->rho ? convertVectorXdToNArray(model->rho, n_classes * (n_classes - 1) / 2) : Qnil;
|
235
239
|
VALUE prob_alpha = model->probA ? convertVectorXdToNArray(model->probA, n_classes * (n_classes - 1) / 2) : Qnil;
|
236
240
|
VALUE prob_beta = model->probB ? convertVectorXdToNArray(model->probB, n_classes * (n_classes - 1) / 2) : Qnil;
|
241
|
+
VALUE prob_density_marks = model->prob_density_marks ? convertVectorXdToNArray(model->prob_density_marks, NR_MARKS) : Qnil;
|
237
242
|
VALUE sv_indices = model->sv_indices ? convertVectorXiToNArray(model->sv_indices, n_support_vecs) : Qnil;
|
238
243
|
VALUE labels = model->label ? convertVectorXiToNArray(model->label, n_classes) : Qnil;
|
239
244
|
VALUE n_support_vecs_each_class = model->nSV ? convertVectorXiToNArray(model->nSV, n_classes) : Qnil;
|
@@ -245,6 +250,7 @@ VALUE convertLibSvmModelToHash(const LibSvmModel* const model) {
|
|
245
250
|
rb_hash_aset(model_hash, ID2SYM(rb_intern("rho")), intercepts);
|
246
251
|
rb_hash_aset(model_hash, ID2SYM(rb_intern("probA")), prob_alpha);
|
247
252
|
rb_hash_aset(model_hash, ID2SYM(rb_intern("probB")), prob_beta);
|
253
|
+
rb_hash_aset(model_hash, ID2SYM(rb_intern("prob_density_marks")), prob_density_marks);
|
248
254
|
rb_hash_aset(model_hash, ID2SYM(rb_intern("sv_indices")), sv_indices);
|
249
255
|
rb_hash_aset(model_hash, ID2SYM(rb_intern("label")), labels);
|
250
256
|
rb_hash_aset(model_hash, ID2SYM(rb_intern("nSV")), n_support_vecs_each_class);
|
@@ -377,9 +383,7 @@ bool isSignleOutputModel(LibSvmModel* model) {
|
|
377
383
|
return (model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR);
|
378
384
|
}
|
379
385
|
|
380
|
-
bool isProbabilisticModel(LibSvmModel* model) {
|
381
|
-
return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && model->probA != NULL && model->probB != NULL);
|
382
|
-
}
|
386
|
+
bool isProbabilisticModel(LibSvmModel* model) { return svm_check_probability_model(model) != 0; }
|
383
387
|
|
384
388
|
void deleteLibSvmModel(LibSvmModel* model) {
|
385
389
|
if (model) {
|
@@ -399,6 +403,8 @@ void deleteLibSvmModel(LibSvmModel* model) {
|
|
399
403
|
model->probA = NULL;
|
400
404
|
xfree(model->probB);
|
401
405
|
model->probB = NULL;
|
406
|
+
xfree(model->prob_density_marks);
|
407
|
+
model->prob_density_marks = NULL;
|
402
408
|
xfree(model->sv_indices);
|
403
409
|
model->sv_indices = NULL;
|
404
410
|
xfree(model->label);
|
data/ext/numo/libsvm/src/svm.cpp
CHANGED
@@ -1825,7 +1825,7 @@ static double sigmoid_predict(double decision_value, double A, double B)
|
|
1825
1825
|
return 1.0/(1+exp(fApB)) ;
|
1826
1826
|
}
|
1827
1827
|
|
1828
|
-
// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
|
1828
|
+
// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng to predict probabilities
|
1829
1829
|
static void multiclass_probability(int k, double **r, double *p)
|
1830
1830
|
{
|
1831
1831
|
int t,j;
|
@@ -1889,7 +1889,7 @@ static void multiclass_probability(int k, double **r, double *p)
|
|
1889
1889
|
free(Qp);
|
1890
1890
|
}
|
1891
1891
|
|
1892
|
-
//
|
1892
|
+
// Using cross-validation decision values to get parameters for SVC probability estimates
|
1893
1893
|
static void svm_binary_svc_probability(
|
1894
1894
|
const svm_problem *prob, const svm_parameter *param,
|
1895
1895
|
double Cp, double Cn, double& probA, double& probB)
|
@@ -1976,6 +1976,83 @@ static void svm_binary_svc_probability(
|
|
1976
1976
|
free(perm);
|
1977
1977
|
}
|
1978
1978
|
|
1979
|
+
// Binning method from the oneclass_prob paper by Que and Lin to predict the probability as a normal instance (i.e., not an outlier)
|
1980
|
+
static double predict_one_class_probability(const svm_model *model, double dec_value)
|
1981
|
+
{
|
1982
|
+
double prob_estimate = 0.0;
|
1983
|
+
int nr_marks = 10;
|
1984
|
+
|
1985
|
+
if(dec_value < model->prob_density_marks[0])
|
1986
|
+
prob_estimate = 0.001;
|
1987
|
+
else if(dec_value > model->prob_density_marks[nr_marks-1])
|
1988
|
+
prob_estimate = 0.999;
|
1989
|
+
else
|
1990
|
+
{
|
1991
|
+
for(int i=1;i<nr_marks;i++)
|
1992
|
+
if(dec_value < model->prob_density_marks[i])
|
1993
|
+
{
|
1994
|
+
prob_estimate = (double)i/nr_marks;
|
1995
|
+
break;
|
1996
|
+
}
|
1997
|
+
}
|
1998
|
+
return prob_estimate;
|
1999
|
+
}
|
2000
|
+
|
2001
|
+
static int compare_double(const void *a, const void *b)
|
2002
|
+
{
|
2003
|
+
if(*(double *)a > *(double *)b)
|
2004
|
+
return 1;
|
2005
|
+
else if(*(double *)a < *(double *)b)
|
2006
|
+
return -1;
|
2007
|
+
return 0;
|
2008
|
+
}
|
2009
|
+
|
2010
|
+
// Get parameters for one-class SVM probability estimates
|
2011
|
+
static int svm_one_class_probability(const svm_problem *prob, const svm_model *model, double *prob_density_marks)
|
2012
|
+
{
|
2013
|
+
double *dec_values = Malloc(double,prob->l);
|
2014
|
+
double *pred_results = Malloc(double,prob->l);
|
2015
|
+
int ret = 0;
|
2016
|
+
int nr_marks = 10;
|
2017
|
+
|
2018
|
+
for(int i=0;i<prob->l;i++)
|
2019
|
+
pred_results[i] = svm_predict_values(model,prob->x[i],&dec_values[i]);
|
2020
|
+
qsort(dec_values,prob->l,sizeof(double),compare_double);
|
2021
|
+
|
2022
|
+
int neg_counter=0;
|
2023
|
+
for(int i=0;i<prob->l;i++)
|
2024
|
+
if(dec_values[i]>=0)
|
2025
|
+
{
|
2026
|
+
neg_counter = i;
|
2027
|
+
break;
|
2028
|
+
}
|
2029
|
+
|
2030
|
+
int pos_counter = prob->l-neg_counter;
|
2031
|
+
if(neg_counter<nr_marks/2 || pos_counter<nr_marks/2)
|
2032
|
+
{
|
2033
|
+
fprintf(stderr,"WARNING: number of positive or negative decision values <%d; too few to do a probability estimation.\n",nr_marks/2);
|
2034
|
+
ret = -1;
|
2035
|
+
}
|
2036
|
+
else
|
2037
|
+
{
|
2038
|
+
// Binning by density
|
2039
|
+
double *tmp_marks = Malloc(double,nr_marks+1);
|
2040
|
+
int mid = nr_marks/2;
|
2041
|
+
for(int i=0;i<mid;i++)
|
2042
|
+
tmp_marks[i] = dec_values[i*neg_counter/mid];
|
2043
|
+
tmp_marks[mid] = 0;
|
2044
|
+
for(int i=mid+1;i<nr_marks+1;i++)
|
2045
|
+
tmp_marks[i] = dec_values[neg_counter-1+(i-mid)*pos_counter/mid];
|
2046
|
+
|
2047
|
+
for(int i=0;i<nr_marks;i++)
|
2048
|
+
prob_density_marks[i] = (tmp_marks[i]+tmp_marks[i+1])/2;
|
2049
|
+
free(tmp_marks);
|
2050
|
+
}
|
2051
|
+
free(dec_values);
|
2052
|
+
free(pred_results);
|
2053
|
+
return ret;
|
2054
|
+
}
|
2055
|
+
|
1979
2056
|
// Return parameter of a Laplace distribution
|
1980
2057
|
static double svm_svr_probability(
|
1981
2058
|
const svm_problem *prob, const svm_parameter *param)
|
@@ -2104,16 +2181,9 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
|
|
2104
2181
|
model->label = NULL;
|
2105
2182
|
model->nSV = NULL;
|
2106
2183
|
model->probA = NULL; model->probB = NULL;
|
2184
|
+
model->prob_density_marks = NULL;
|
2107
2185
|
model->sv_coef = Malloc(double *,1);
|
2108
2186
|
|
2109
|
-
if(param->probability &&
|
2110
|
-
(param->svm_type == EPSILON_SVR ||
|
2111
|
-
param->svm_type == NU_SVR))
|
2112
|
-
{
|
2113
|
-
model->probA = Malloc(double,1);
|
2114
|
-
model->probA[0] = svm_svr_probability(prob,param);
|
2115
|
-
}
|
2116
|
-
|
2117
2187
|
decision_function f = svm_train_one(prob,param,0,0);
|
2118
2188
|
model->rho = Malloc(double,1);
|
2119
2189
|
model->rho[0] = f.rho;
|
@@ -2136,6 +2206,26 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
|
|
2136
2206
|
++j;
|
2137
2207
|
}
|
2138
2208
|
|
2209
|
+
if(param->probability &&
|
2210
|
+
(param->svm_type == EPSILON_SVR ||
|
2211
|
+
param->svm_type == NU_SVR))
|
2212
|
+
{
|
2213
|
+
model->probA = Malloc(double,1);
|
2214
|
+
model->probA[0] = svm_svr_probability(prob,param);
|
2215
|
+
}
|
2216
|
+
else if(param->probability && param->svm_type == ONE_CLASS)
|
2217
|
+
{
|
2218
|
+
int nr_marks = 10;
|
2219
|
+
double *prob_density_marks = Malloc(double,nr_marks);
|
2220
|
+
|
2221
|
+
if(svm_one_class_probability(prob,model,prob_density_marks) == 0)
|
2222
|
+
{
|
2223
|
+
model->prob_density_marks = Malloc(double,nr_marks);
|
2224
|
+
for(i=0;i<nr_marks;i++)
|
2225
|
+
model->prob_density_marks[i] = prob_density_marks[i];
|
2226
|
+
}
|
2227
|
+
}
|
2228
|
+
|
2139
2229
|
free(f.alpha);
|
2140
2230
|
}
|
2141
2231
|
else
|
@@ -2253,6 +2343,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
|
|
2253
2343
|
model->probA=NULL;
|
2254
2344
|
model->probB=NULL;
|
2255
2345
|
}
|
2346
|
+
model->prob_density_marks=NULL; // for one-class SVM probabilistic outputs only
|
2256
2347
|
|
2257
2348
|
int total_sv = 0;
|
2258
2349
|
int *nz_count = Malloc(int,nr_class);
|
@@ -2630,6 +2721,14 @@ double svm_predict_probability(
|
|
2630
2721
|
free(pairwise_prob);
|
2631
2722
|
return model->label[prob_max_idx];
|
2632
2723
|
}
|
2724
|
+
else if(model->param.svm_type == ONE_CLASS && model->prob_density_marks!=NULL)
|
2725
|
+
{
|
2726
|
+
double dec_value;
|
2727
|
+
double pred_result = svm_predict_values(model,x,&dec_value);
|
2728
|
+
prob_estimates[0] = predict_one_class_probability(model,dec_value);
|
2729
|
+
prob_estimates[1] = 1-prob_estimates[0];
|
2730
|
+
return pred_result;
|
2731
|
+
}
|
2633
2732
|
else
|
2634
2733
|
return svm_predict(model, x);
|
2635
2734
|
}
|
@@ -2703,6 +2802,14 @@ int svm_save_model(const char *model_file_name, const svm_model *model)
|
|
2703
2802
|
fprintf(fp," %.17g",model->probB[i]);
|
2704
2803
|
fprintf(fp, "\n");
|
2705
2804
|
}
|
2805
|
+
if(model->prob_density_marks)
|
2806
|
+
{
|
2807
|
+
fprintf(fp, "prob_density_marks");
|
2808
|
+
int nr_marks=10;
|
2809
|
+
for(int i=0;i<nr_marks;i++)
|
2810
|
+
fprintf(fp," %.17g",model->prob_density_marks[i]);
|
2811
|
+
fprintf(fp, "\n");
|
2812
|
+
}
|
2706
2813
|
|
2707
2814
|
if(model->nSV)
|
2708
2815
|
{
|
@@ -2857,6 +2964,13 @@ bool read_model_header(FILE *fp, svm_model* model)
|
|
2857
2964
|
for(int i=0;i<n;i++)
|
2858
2965
|
FSCANF(fp,"%lf",&model->probB[i]);
|
2859
2966
|
}
|
2967
|
+
else if(strcmp(cmd,"prob_density_marks")==0)
|
2968
|
+
{
|
2969
|
+
int n = 10; // nr_marks
|
2970
|
+
model->prob_density_marks = Malloc(double,n);
|
2971
|
+
for(int i=0;i<n;i++)
|
2972
|
+
FSCANF(fp,"%lf",&model->prob_density_marks[i]);
|
2973
|
+
}
|
2860
2974
|
else if(strcmp(cmd,"nr_sv")==0)
|
2861
2975
|
{
|
2862
2976
|
int n = model->nr_class;
|
@@ -2901,6 +3015,7 @@ svm_model *svm_load_model(const char *model_file_name)
|
|
2901
3015
|
model->rho = NULL;
|
2902
3016
|
model->probA = NULL;
|
2903
3017
|
model->probB = NULL;
|
3018
|
+
model->prob_density_marks = NULL;
|
2904
3019
|
model->sv_indices = NULL;
|
2905
3020
|
model->label = NULL;
|
2906
3021
|
model->nSV = NULL;
|
@@ -3012,13 +3127,16 @@ void svm_free_model_content(svm_model* model_ptr)
|
|
3012
3127
|
model_ptr->rho = NULL;
|
3013
3128
|
|
3014
3129
|
free(model_ptr->label);
|
3015
|
-
model_ptr->label= NULL;
|
3130
|
+
model_ptr->label = NULL;
|
3016
3131
|
|
3017
3132
|
free(model_ptr->probA);
|
3018
3133
|
model_ptr->probA = NULL;
|
3019
3134
|
|
3020
3135
|
free(model_ptr->probB);
|
3021
|
-
model_ptr->probB= NULL;
|
3136
|
+
model_ptr->probB = NULL;
|
3137
|
+
|
3138
|
+
free(model_ptr->prob_density_marks);
|
3139
|
+
model_ptr->prob_density_marks = NULL;
|
3022
3140
|
|
3023
3141
|
free(model_ptr->sv_indices);
|
3024
3142
|
model_ptr->sv_indices = NULL;
|
@@ -3104,10 +3222,6 @@ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *pa
|
|
3104
3222
|
param->probability != 1)
|
3105
3223
|
return "probability != 0 and probability != 1";
|
3106
3224
|
|
3107
|
-
if(param->probability == 1 &&
|
3108
|
-
svm_type == ONE_CLASS)
|
3109
|
-
return "one-class SVM probability output not supported yet";
|
3110
|
-
|
3111
3225
|
|
3112
3226
|
// check whether nu-svc is feasible
|
3113
3227
|
|
@@ -3167,8 +3281,10 @@ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *pa
|
|
3167
3281
|
|
3168
3282
|
int svm_check_probability_model(const svm_model *model)
|
3169
3283
|
{
|
3170
|
-
return
|
3171
|
-
model->
|
3284
|
+
return
|
3285
|
+
((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
|
3286
|
+
model->probA!=NULL && model->probB!=NULL) ||
|
3287
|
+
(model->param.svm_type == ONE_CLASS && model->prob_density_marks!=NULL) ||
|
3172
3288
|
((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
|
3173
3289
|
model->probA!=NULL);
|
3174
3290
|
}
|
data/ext/numo/libsvm/src/svm.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#ifndef _LIBSVM_H
|
2
2
|
#define _LIBSVM_H
|
3
3
|
|
4
|
-
#define LIBSVM_VERSION
|
4
|
+
#define LIBSVM_VERSION 330
|
5
5
|
|
6
6
|
#ifdef __cplusplus
|
7
7
|
extern "C" {
|
@@ -59,6 +59,7 @@ struct svm_model
|
|
59
59
|
double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
|
60
60
|
double *probA; /* pariwise probability information */
|
61
61
|
double *probB;
|
62
|
+
double *prob_density_marks; /* probability information for ONE_CLASS */
|
62
63
|
int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
|
63
64
|
|
64
65
|
/* for classification only */
|
data/lib/numo/libsvm/version.rb
CHANGED
data/sig/numo/libsvm.rbs
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: numo-libsvm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|