numo-libsvm 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 132e5e9b95db85b1897e804b3bcd22bb948c9d8200dcbf14752059b98f377199
4
- data.tar.gz: 24ff1957951af9036b41017b4c34e1b06046059e1c57e5e33bc9d1554f87bee5
3
+ metadata.gz: 37613a10119b2974024687c3f781d271ebaa30b15ccff6100df17c32642dcd2d
4
+ data.tar.gz: 210e072576d4fa859364fdb522e6f02c9be12458e5ed0647fcd8bebedfdb3e84
5
5
  SHA512:
6
- metadata.gz: 0a63eb30c744e3531c2de44a7b5ef229261f7fd0e50f84d4e3613701d165ee5cee17cac0026702980571ca25266d99c2d81f1c595ea1a6f215d71b50b26ba830
7
- data.tar.gz: d16ec02c158f15181e37cecb554e6bbd60d02bd60d95bf16644849683073e34d6c06c60503039da9a7ecbee958915495d1241d5482a486056177047c4a13612b
6
+ metadata.gz: 142e15fe744460f679f7bd7978f1654e2e71a40a49ce3a414c3c0e68b507d6d9c286eec9edd37a10f5b9fc60a940079c42e59d1283ba607cd7748b80d16fa12f
7
+ data.tar.gz: 67909b309d9a186e77dc716cc2e8b1299d0be3baf10be7be87fc89f2bc67a47b087063827be2f8ba750378d354f745c58dec0feb072f8eea73c883c3c034a669
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 2.1.0
2
+ - Update bundled LIBSVM to 3.30.
3
+ - Support for probilistic outputs of one-class SVM.
4
+
1
5
  # 2.0.1
2
6
  - Chnage to use memcpy to copy vector.
3
7
  - Refactor codes and configs with RuboCop.
@@ -48,6 +48,8 @@ typedef struct svm_problem LibSvmProblem;
48
48
 
49
49
  void printNull(const char* s) {}
50
50
 
51
+ #define NR_MARKS 10
52
+
51
53
  /** CONVERTERS */
52
54
  VALUE convertVectorXiToNArray(const int* const arr, const int size) {
53
55
  size_t shape[1] = {(size_t)size};
@@ -215,6 +217,8 @@ LibSvmModel* convertHashToLibSvmModel(VALUE model_hash) {
215
217
  model->probA = convertNArrayToVectorXd(el);
216
218
  el = rb_hash_aref(model_hash, ID2SYM(rb_intern("probB")));
217
219
  model->probB = convertNArrayToVectorXd(el);
220
+ el = rb_hash_aref(model_hash, ID2SYM(rb_intern("prob_density_marks")));
221
+ model->prob_density_marks = convertNArrayToVectorXd(el);
218
222
  el = rb_hash_aref(model_hash, ID2SYM(rb_intern("sv_indices")));
219
223
  model->sv_indices = convertNArrayToVectorXi(el);
220
224
  el = rb_hash_aref(model_hash, ID2SYM(rb_intern("label")));
@@ -234,6 +238,7 @@ VALUE convertLibSvmModelToHash(const LibSvmModel* const model) {
234
238
  VALUE intercepts = model->rho ? convertVectorXdToNArray(model->rho, n_classes * (n_classes - 1) / 2) : Qnil;
235
239
  VALUE prob_alpha = model->probA ? convertVectorXdToNArray(model->probA, n_classes * (n_classes - 1) / 2) : Qnil;
236
240
  VALUE prob_beta = model->probB ? convertVectorXdToNArray(model->probB, n_classes * (n_classes - 1) / 2) : Qnil;
241
+ VALUE prob_density_marks = model->prob_density_marks ? convertVectorXdToNArray(model->prob_density_marks, NR_MARKS) : Qnil;
237
242
  VALUE sv_indices = model->sv_indices ? convertVectorXiToNArray(model->sv_indices, n_support_vecs) : Qnil;
238
243
  VALUE labels = model->label ? convertVectorXiToNArray(model->label, n_classes) : Qnil;
239
244
  VALUE n_support_vecs_each_class = model->nSV ? convertVectorXiToNArray(model->nSV, n_classes) : Qnil;
@@ -245,6 +250,7 @@ VALUE convertLibSvmModelToHash(const LibSvmModel* const model) {
245
250
  rb_hash_aset(model_hash, ID2SYM(rb_intern("rho")), intercepts);
246
251
  rb_hash_aset(model_hash, ID2SYM(rb_intern("probA")), prob_alpha);
247
252
  rb_hash_aset(model_hash, ID2SYM(rb_intern("probB")), prob_beta);
253
+ rb_hash_aset(model_hash, ID2SYM(rb_intern("prob_density_marks")), prob_density_marks);
248
254
  rb_hash_aset(model_hash, ID2SYM(rb_intern("sv_indices")), sv_indices);
249
255
  rb_hash_aset(model_hash, ID2SYM(rb_intern("label")), labels);
250
256
  rb_hash_aset(model_hash, ID2SYM(rb_intern("nSV")), n_support_vecs_each_class);
@@ -377,9 +383,7 @@ bool isSignleOutputModel(LibSvmModel* model) {
377
383
  return (model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR);
378
384
  }
379
385
 
380
- bool isProbabilisticModel(LibSvmModel* model) {
381
- return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && model->probA != NULL && model->probB != NULL);
382
- }
386
+ bool isProbabilisticModel(LibSvmModel* model) { return svm_check_probability_model(model) != 0; }
383
387
 
384
388
  void deleteLibSvmModel(LibSvmModel* model) {
385
389
  if (model) {
@@ -399,6 +403,8 @@ void deleteLibSvmModel(LibSvmModel* model) {
399
403
  model->probA = NULL;
400
404
  xfree(model->probB);
401
405
  model->probB = NULL;
406
+ xfree(model->prob_density_marks);
407
+ model->prob_density_marks = NULL;
402
408
  xfree(model->sv_indices);
403
409
  model->sv_indices = NULL;
404
410
  xfree(model->label);
@@ -1825,7 +1825,7 @@ static double sigmoid_predict(double decision_value, double A, double B)
1825
1825
  return 1.0/(1+exp(fApB)) ;
1826
1826
  }
1827
1827
 
1828
- // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1828
+ // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng to predict probabilities
1829
1829
  static void multiclass_probability(int k, double **r, double *p)
1830
1830
  {
1831
1831
  int t,j;
@@ -1889,7 +1889,7 @@ static void multiclass_probability(int k, double **r, double *p)
1889
1889
  free(Qp);
1890
1890
  }
1891
1891
 
1892
- // Cross-validation decision values for probability estimates
1892
+ // Using cross-validation decision values to get parameters for SVC probability estimates
1893
1893
  static void svm_binary_svc_probability(
1894
1894
  const svm_problem *prob, const svm_parameter *param,
1895
1895
  double Cp, double Cn, double& probA, double& probB)
@@ -1976,6 +1976,83 @@ static void svm_binary_svc_probability(
1976
1976
  free(perm);
1977
1977
  }
1978
1978
 
1979
+ // Binning method from the oneclass_prob paper by Que and Lin to predict the probability as a normal instance (i.e., not an outlier)
1980
+ static double predict_one_class_probability(const svm_model *model, double dec_value)
1981
+ {
1982
+ double prob_estimate = 0.0;
1983
+ int nr_marks = 10;
1984
+
1985
+ if(dec_value < model->prob_density_marks[0])
1986
+ prob_estimate = 0.001;
1987
+ else if(dec_value > model->prob_density_marks[nr_marks-1])
1988
+ prob_estimate = 0.999;
1989
+ else
1990
+ {
1991
+ for(int i=1;i<nr_marks;i++)
1992
+ if(dec_value < model->prob_density_marks[i])
1993
+ {
1994
+ prob_estimate = (double)i/nr_marks;
1995
+ break;
1996
+ }
1997
+ }
1998
+ return prob_estimate;
1999
+ }
2000
+
2001
+ static int compare_double(const void *a, const void *b)
2002
+ {
2003
+ if(*(double *)a > *(double *)b)
2004
+ return 1;
2005
+ else if(*(double *)a < *(double *)b)
2006
+ return -1;
2007
+ return 0;
2008
+ }
2009
+
2010
+ // Get parameters for one-class SVM probability estimates
2011
+ static int svm_one_class_probability(const svm_problem *prob, const svm_model *model, double *prob_density_marks)
2012
+ {
2013
+ double *dec_values = Malloc(double,prob->l);
2014
+ double *pred_results = Malloc(double,prob->l);
2015
+ int ret = 0;
2016
+ int nr_marks = 10;
2017
+
2018
+ for(int i=0;i<prob->l;i++)
2019
+ pred_results[i] = svm_predict_values(model,prob->x[i],&dec_values[i]);
2020
+ qsort(dec_values,prob->l,sizeof(double),compare_double);
2021
+
2022
+ int neg_counter=0;
2023
+ for(int i=0;i<prob->l;i++)
2024
+ if(dec_values[i]>=0)
2025
+ {
2026
+ neg_counter = i;
2027
+ break;
2028
+ }
2029
+
2030
+ int pos_counter = prob->l-neg_counter;
2031
+ if(neg_counter<nr_marks/2 || pos_counter<nr_marks/2)
2032
+ {
2033
+ fprintf(stderr,"WARNING: number of positive or negative decision values <%d; too few to do a probability estimation.\n",nr_marks/2);
2034
+ ret = -1;
2035
+ }
2036
+ else
2037
+ {
2038
+ // Binning by density
2039
+ double *tmp_marks = Malloc(double,nr_marks+1);
2040
+ int mid = nr_marks/2;
2041
+ for(int i=0;i<mid;i++)
2042
+ tmp_marks[i] = dec_values[i*neg_counter/mid];
2043
+ tmp_marks[mid] = 0;
2044
+ for(int i=mid+1;i<nr_marks+1;i++)
2045
+ tmp_marks[i] = dec_values[neg_counter-1+(i-mid)*pos_counter/mid];
2046
+
2047
+ for(int i=0;i<nr_marks;i++)
2048
+ prob_density_marks[i] = (tmp_marks[i]+tmp_marks[i+1])/2;
2049
+ free(tmp_marks);
2050
+ }
2051
+ free(dec_values);
2052
+ free(pred_results);
2053
+ return ret;
2054
+ }
2055
+
1979
2056
  // Return parameter of a Laplace distribution
1980
2057
  static double svm_svr_probability(
1981
2058
  const svm_problem *prob, const svm_parameter *param)
@@ -2104,16 +2181,9 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2104
2181
  model->label = NULL;
2105
2182
  model->nSV = NULL;
2106
2183
  model->probA = NULL; model->probB = NULL;
2184
+ model->prob_density_marks = NULL;
2107
2185
  model->sv_coef = Malloc(double *,1);
2108
2186
 
2109
- if(param->probability &&
2110
- (param->svm_type == EPSILON_SVR ||
2111
- param->svm_type == NU_SVR))
2112
- {
2113
- model->probA = Malloc(double,1);
2114
- model->probA[0] = svm_svr_probability(prob,param);
2115
- }
2116
-
2117
2187
  decision_function f = svm_train_one(prob,param,0,0);
2118
2188
  model->rho = Malloc(double,1);
2119
2189
  model->rho[0] = f.rho;
@@ -2136,6 +2206,26 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2136
2206
  ++j;
2137
2207
  }
2138
2208
 
2209
+ if(param->probability &&
2210
+ (param->svm_type == EPSILON_SVR ||
2211
+ param->svm_type == NU_SVR))
2212
+ {
2213
+ model->probA = Malloc(double,1);
2214
+ model->probA[0] = svm_svr_probability(prob,param);
2215
+ }
2216
+ else if(param->probability && param->svm_type == ONE_CLASS)
2217
+ {
2218
+ int nr_marks = 10;
2219
+ double *prob_density_marks = Malloc(double,nr_marks);
2220
+
2221
+ if(svm_one_class_probability(prob,model,prob_density_marks) == 0)
2222
+ {
2223
+ model->prob_density_marks = Malloc(double,nr_marks);
2224
+ for(i=0;i<nr_marks;i++)
2225
+ model->prob_density_marks[i] = prob_density_marks[i];
2226
+ }
2227
+ }
2228
+
2139
2229
  free(f.alpha);
2140
2230
  }
2141
2231
  else
@@ -2253,6 +2343,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2253
2343
  model->probA=NULL;
2254
2344
  model->probB=NULL;
2255
2345
  }
2346
+ model->prob_density_marks=NULL; // for one-class SVM probabilistic outputs only
2256
2347
 
2257
2348
  int total_sv = 0;
2258
2349
  int *nz_count = Malloc(int,nr_class);
@@ -2630,6 +2721,14 @@ double svm_predict_probability(
2630
2721
  free(pairwise_prob);
2631
2722
  return model->label[prob_max_idx];
2632
2723
  }
2724
+ else if(model->param.svm_type == ONE_CLASS && model->prob_density_marks!=NULL)
2725
+ {
2726
+ double dec_value;
2727
+ double pred_result = svm_predict_values(model,x,&dec_value);
2728
+ prob_estimates[0] = predict_one_class_probability(model,dec_value);
2729
+ prob_estimates[1] = 1-prob_estimates[0];
2730
+ return pred_result;
2731
+ }
2633
2732
  else
2634
2733
  return svm_predict(model, x);
2635
2734
  }
@@ -2703,6 +2802,14 @@ int svm_save_model(const char *model_file_name, const svm_model *model)
2703
2802
  fprintf(fp," %.17g",model->probB[i]);
2704
2803
  fprintf(fp, "\n");
2705
2804
  }
2805
+ if(model->prob_density_marks)
2806
+ {
2807
+ fprintf(fp, "prob_density_marks");
2808
+ int nr_marks=10;
2809
+ for(int i=0;i<nr_marks;i++)
2810
+ fprintf(fp," %.17g",model->prob_density_marks[i]);
2811
+ fprintf(fp, "\n");
2812
+ }
2706
2813
 
2707
2814
  if(model->nSV)
2708
2815
  {
@@ -2857,6 +2964,13 @@ bool read_model_header(FILE *fp, svm_model* model)
2857
2964
  for(int i=0;i<n;i++)
2858
2965
  FSCANF(fp,"%lf",&model->probB[i]);
2859
2966
  }
2967
+ else if(strcmp(cmd,"prob_density_marks")==0)
2968
+ {
2969
+ int n = 10; // nr_marks
2970
+ model->prob_density_marks = Malloc(double,n);
2971
+ for(int i=0;i<n;i++)
2972
+ FSCANF(fp,"%lf",&model->prob_density_marks[i]);
2973
+ }
2860
2974
  else if(strcmp(cmd,"nr_sv")==0)
2861
2975
  {
2862
2976
  int n = model->nr_class;
@@ -2901,6 +3015,7 @@ svm_model *svm_load_model(const char *model_file_name)
2901
3015
  model->rho = NULL;
2902
3016
  model->probA = NULL;
2903
3017
  model->probB = NULL;
3018
+ model->prob_density_marks = NULL;
2904
3019
  model->sv_indices = NULL;
2905
3020
  model->label = NULL;
2906
3021
  model->nSV = NULL;
@@ -3012,13 +3127,16 @@ void svm_free_model_content(svm_model* model_ptr)
3012
3127
  model_ptr->rho = NULL;
3013
3128
 
3014
3129
  free(model_ptr->label);
3015
- model_ptr->label= NULL;
3130
+ model_ptr->label = NULL;
3016
3131
 
3017
3132
  free(model_ptr->probA);
3018
3133
  model_ptr->probA = NULL;
3019
3134
 
3020
3135
  free(model_ptr->probB);
3021
- model_ptr->probB= NULL;
3136
+ model_ptr->probB = NULL;
3137
+
3138
+ free(model_ptr->prob_density_marks);
3139
+ model_ptr->prob_density_marks = NULL;
3022
3140
 
3023
3141
  free(model_ptr->sv_indices);
3024
3142
  model_ptr->sv_indices = NULL;
@@ -3104,10 +3222,6 @@ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *pa
3104
3222
  param->probability != 1)
3105
3223
  return "probability != 0 and probability != 1";
3106
3224
 
3107
- if(param->probability == 1 &&
3108
- svm_type == ONE_CLASS)
3109
- return "one-class SVM probability output not supported yet";
3110
-
3111
3225
 
3112
3226
  // check whether nu-svc is feasible
3113
3227
 
@@ -3167,8 +3281,10 @@ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *pa
3167
3281
 
3168
3282
  int svm_check_probability_model(const svm_model *model)
3169
3283
  {
3170
- return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3171
- model->probA!=NULL && model->probB!=NULL) ||
3284
+ return
3285
+ ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3286
+ model->probA!=NULL && model->probB!=NULL) ||
3287
+ (model->param.svm_type == ONE_CLASS && model->prob_density_marks!=NULL) ||
3172
3288
  ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3173
3289
  model->probA!=NULL);
3174
3290
  }
@@ -1,7 +1,7 @@
1
1
  #ifndef _LIBSVM_H
2
2
  #define _LIBSVM_H
3
3
 
4
- #define LIBSVM_VERSION 324
4
+ #define LIBSVM_VERSION 330
5
5
 
6
6
  #ifdef __cplusplus
7
7
  extern "C" {
@@ -59,6 +59,7 @@ struct svm_model
59
59
  double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
60
60
  double *probA; /* pariwise probability information */
61
61
  double *probB;
62
+ double *prob_density_marks; /* probability information for ONE_CLASS */
62
63
  int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
63
64
 
64
65
  /* for classification only */
@@ -3,6 +3,6 @@
3
3
  module Numo
4
4
  module Libsvm
5
5
  # The version of Numo::Libsvm you are using.
6
- VERSION = '2.0.1'
6
+ VERSION = '2.1.0'
7
7
  end
8
8
  end
data/sig/numo/libsvm.rbs CHANGED
@@ -27,6 +27,7 @@ module Numo
27
27
  rho: Numo::DFloat,
28
28
  probA: Numo::DFloat,
29
29
  probB: Numo::DFloat,
30
+ prob_density_marks: Numo::DFloat,
30
31
  sv_indices: Numo::Int32,
31
32
  label: Numo::Int32,
32
33
  nSV: Numo::Int32,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: numo-libsvm
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-07-18 00:00:00.000000000 Z
11
+ date: 2022-09-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray