numo-libsvm 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 132e5e9b95db85b1897e804b3bcd22bb948c9d8200dcbf14752059b98f377199
4
- data.tar.gz: 24ff1957951af9036b41017b4c34e1b06046059e1c57e5e33bc9d1554f87bee5
3
+ metadata.gz: 37613a10119b2974024687c3f781d271ebaa30b15ccff6100df17c32642dcd2d
4
+ data.tar.gz: 210e072576d4fa859364fdb522e6f02c9be12458e5ed0647fcd8bebedfdb3e84
5
5
  SHA512:
6
- metadata.gz: 0a63eb30c744e3531c2de44a7b5ef229261f7fd0e50f84d4e3613701d165ee5cee17cac0026702980571ca25266d99c2d81f1c595ea1a6f215d71b50b26ba830
7
- data.tar.gz: d16ec02c158f15181e37cecb554e6bbd60d02bd60d95bf16644849683073e34d6c06c60503039da9a7ecbee958915495d1241d5482a486056177047c4a13612b
6
+ metadata.gz: 142e15fe744460f679f7bd7978f1654e2e71a40a49ce3a414c3c0e68b507d6d9c286eec9edd37a10f5b9fc60a940079c42e59d1283ba607cd7748b80d16fa12f
7
+ data.tar.gz: 67909b309d9a186e77dc716cc2e8b1299d0be3baf10be7be87fc89f2bc67a47b087063827be2f8ba750378d354f745c58dec0feb072f8eea73c883c3c034a669
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 2.1.0
2
+ - Update bundled LIBSVM to 3.30.
3
+ - Support for probilistic outputs of one-class SVM.
4
+
1
5
  # 2.0.1
2
6
  - Chnage to use memcpy to copy vector.
3
7
  - Refactor codes and configs with RuboCop.
@@ -48,6 +48,8 @@ typedef struct svm_problem LibSvmProblem;
48
48
 
49
49
  void printNull(const char* s) {}
50
50
 
51
+ #define NR_MARKS 10
52
+
51
53
  /** CONVERTERS */
52
54
  VALUE convertVectorXiToNArray(const int* const arr, const int size) {
53
55
  size_t shape[1] = {(size_t)size};
@@ -215,6 +217,8 @@ LibSvmModel* convertHashToLibSvmModel(VALUE model_hash) {
215
217
  model->probA = convertNArrayToVectorXd(el);
216
218
  el = rb_hash_aref(model_hash, ID2SYM(rb_intern("probB")));
217
219
  model->probB = convertNArrayToVectorXd(el);
220
+ el = rb_hash_aref(model_hash, ID2SYM(rb_intern("prob_density_marks")));
221
+ model->prob_density_marks = convertNArrayToVectorXd(el);
218
222
  el = rb_hash_aref(model_hash, ID2SYM(rb_intern("sv_indices")));
219
223
  model->sv_indices = convertNArrayToVectorXi(el);
220
224
  el = rb_hash_aref(model_hash, ID2SYM(rb_intern("label")));
@@ -234,6 +238,7 @@ VALUE convertLibSvmModelToHash(const LibSvmModel* const model) {
234
238
  VALUE intercepts = model->rho ? convertVectorXdToNArray(model->rho, n_classes * (n_classes - 1) / 2) : Qnil;
235
239
  VALUE prob_alpha = model->probA ? convertVectorXdToNArray(model->probA, n_classes * (n_classes - 1) / 2) : Qnil;
236
240
  VALUE prob_beta = model->probB ? convertVectorXdToNArray(model->probB, n_classes * (n_classes - 1) / 2) : Qnil;
241
+ VALUE prob_density_marks = model->prob_density_marks ? convertVectorXdToNArray(model->prob_density_marks, NR_MARKS) : Qnil;
237
242
  VALUE sv_indices = model->sv_indices ? convertVectorXiToNArray(model->sv_indices, n_support_vecs) : Qnil;
238
243
  VALUE labels = model->label ? convertVectorXiToNArray(model->label, n_classes) : Qnil;
239
244
  VALUE n_support_vecs_each_class = model->nSV ? convertVectorXiToNArray(model->nSV, n_classes) : Qnil;
@@ -245,6 +250,7 @@ VALUE convertLibSvmModelToHash(const LibSvmModel* const model) {
245
250
  rb_hash_aset(model_hash, ID2SYM(rb_intern("rho")), intercepts);
246
251
  rb_hash_aset(model_hash, ID2SYM(rb_intern("probA")), prob_alpha);
247
252
  rb_hash_aset(model_hash, ID2SYM(rb_intern("probB")), prob_beta);
253
+ rb_hash_aset(model_hash, ID2SYM(rb_intern("prob_density_marks")), prob_density_marks);
248
254
  rb_hash_aset(model_hash, ID2SYM(rb_intern("sv_indices")), sv_indices);
249
255
  rb_hash_aset(model_hash, ID2SYM(rb_intern("label")), labels);
250
256
  rb_hash_aset(model_hash, ID2SYM(rb_intern("nSV")), n_support_vecs_each_class);
@@ -377,9 +383,7 @@ bool isSignleOutputModel(LibSvmModel* model) {
377
383
  return (model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR);
378
384
  }
379
385
 
380
- bool isProbabilisticModel(LibSvmModel* model) {
381
- return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && model->probA != NULL && model->probB != NULL);
382
- }
386
+ bool isProbabilisticModel(LibSvmModel* model) { return svm_check_probability_model(model) != 0; }
383
387
 
384
388
  void deleteLibSvmModel(LibSvmModel* model) {
385
389
  if (model) {
@@ -399,6 +403,8 @@ void deleteLibSvmModel(LibSvmModel* model) {
399
403
  model->probA = NULL;
400
404
  xfree(model->probB);
401
405
  model->probB = NULL;
406
+ xfree(model->prob_density_marks);
407
+ model->prob_density_marks = NULL;
402
408
  xfree(model->sv_indices);
403
409
  model->sv_indices = NULL;
404
410
  xfree(model->label);
@@ -1825,7 +1825,7 @@ static double sigmoid_predict(double decision_value, double A, double B)
1825
1825
  return 1.0/(1+exp(fApB)) ;
1826
1826
  }
1827
1827
 
1828
- // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1828
+ // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng to predict probabilities
1829
1829
  static void multiclass_probability(int k, double **r, double *p)
1830
1830
  {
1831
1831
  int t,j;
@@ -1889,7 +1889,7 @@ static void multiclass_probability(int k, double **r, double *p)
1889
1889
  free(Qp);
1890
1890
  }
1891
1891
 
1892
- // Cross-validation decision values for probability estimates
1892
+ // Using cross-validation decision values to get parameters for SVC probability estimates
1893
1893
  static void svm_binary_svc_probability(
1894
1894
  const svm_problem *prob, const svm_parameter *param,
1895
1895
  double Cp, double Cn, double& probA, double& probB)
@@ -1976,6 +1976,83 @@ static void svm_binary_svc_probability(
1976
1976
  free(perm);
1977
1977
  }
1978
1978
 
1979
+ // Binning method from the oneclass_prob paper by Que and Lin to predict the probability as a normal instance (i.e., not an outlier)
1980
+ static double predict_one_class_probability(const svm_model *model, double dec_value)
1981
+ {
1982
+ double prob_estimate = 0.0;
1983
+ int nr_marks = 10;
1984
+
1985
+ if(dec_value < model->prob_density_marks[0])
1986
+ prob_estimate = 0.001;
1987
+ else if(dec_value > model->prob_density_marks[nr_marks-1])
1988
+ prob_estimate = 0.999;
1989
+ else
1990
+ {
1991
+ for(int i=1;i<nr_marks;i++)
1992
+ if(dec_value < model->prob_density_marks[i])
1993
+ {
1994
+ prob_estimate = (double)i/nr_marks;
1995
+ break;
1996
+ }
1997
+ }
1998
+ return prob_estimate;
1999
+ }
2000
+
2001
+ static int compare_double(const void *a, const void *b)
2002
+ {
2003
+ if(*(double *)a > *(double *)b)
2004
+ return 1;
2005
+ else if(*(double *)a < *(double *)b)
2006
+ return -1;
2007
+ return 0;
2008
+ }
2009
+
2010
+ // Get parameters for one-class SVM probability estimates
2011
+ static int svm_one_class_probability(const svm_problem *prob, const svm_model *model, double *prob_density_marks)
2012
+ {
2013
+ double *dec_values = Malloc(double,prob->l);
2014
+ double *pred_results = Malloc(double,prob->l);
2015
+ int ret = 0;
2016
+ int nr_marks = 10;
2017
+
2018
+ for(int i=0;i<prob->l;i++)
2019
+ pred_results[i] = svm_predict_values(model,prob->x[i],&dec_values[i]);
2020
+ qsort(dec_values,prob->l,sizeof(double),compare_double);
2021
+
2022
+ int neg_counter=0;
2023
+ for(int i=0;i<prob->l;i++)
2024
+ if(dec_values[i]>=0)
2025
+ {
2026
+ neg_counter = i;
2027
+ break;
2028
+ }
2029
+
2030
+ int pos_counter = prob->l-neg_counter;
2031
+ if(neg_counter<nr_marks/2 || pos_counter<nr_marks/2)
2032
+ {
2033
+ fprintf(stderr,"WARNING: number of positive or negative decision values <%d; too few to do a probability estimation.\n",nr_marks/2);
2034
+ ret = -1;
2035
+ }
2036
+ else
2037
+ {
2038
+ // Binning by density
2039
+ double *tmp_marks = Malloc(double,nr_marks+1);
2040
+ int mid = nr_marks/2;
2041
+ for(int i=0;i<mid;i++)
2042
+ tmp_marks[i] = dec_values[i*neg_counter/mid];
2043
+ tmp_marks[mid] = 0;
2044
+ for(int i=mid+1;i<nr_marks+1;i++)
2045
+ tmp_marks[i] = dec_values[neg_counter-1+(i-mid)*pos_counter/mid];
2046
+
2047
+ for(int i=0;i<nr_marks;i++)
2048
+ prob_density_marks[i] = (tmp_marks[i]+tmp_marks[i+1])/2;
2049
+ free(tmp_marks);
2050
+ }
2051
+ free(dec_values);
2052
+ free(pred_results);
2053
+ return ret;
2054
+ }
2055
+
1979
2056
  // Return parameter of a Laplace distribution
1980
2057
  static double svm_svr_probability(
1981
2058
  const svm_problem *prob, const svm_parameter *param)
@@ -2104,16 +2181,9 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2104
2181
  model->label = NULL;
2105
2182
  model->nSV = NULL;
2106
2183
  model->probA = NULL; model->probB = NULL;
2184
+ model->prob_density_marks = NULL;
2107
2185
  model->sv_coef = Malloc(double *,1);
2108
2186
 
2109
- if(param->probability &&
2110
- (param->svm_type == EPSILON_SVR ||
2111
- param->svm_type == NU_SVR))
2112
- {
2113
- model->probA = Malloc(double,1);
2114
- model->probA[0] = svm_svr_probability(prob,param);
2115
- }
2116
-
2117
2187
  decision_function f = svm_train_one(prob,param,0,0);
2118
2188
  model->rho = Malloc(double,1);
2119
2189
  model->rho[0] = f.rho;
@@ -2136,6 +2206,26 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2136
2206
  ++j;
2137
2207
  }
2138
2208
 
2209
+ if(param->probability &&
2210
+ (param->svm_type == EPSILON_SVR ||
2211
+ param->svm_type == NU_SVR))
2212
+ {
2213
+ model->probA = Malloc(double,1);
2214
+ model->probA[0] = svm_svr_probability(prob,param);
2215
+ }
2216
+ else if(param->probability && param->svm_type == ONE_CLASS)
2217
+ {
2218
+ int nr_marks = 10;
2219
+ double *prob_density_marks = Malloc(double,nr_marks);
2220
+
2221
+ if(svm_one_class_probability(prob,model,prob_density_marks) == 0)
2222
+ {
2223
+ model->prob_density_marks = Malloc(double,nr_marks);
2224
+ for(i=0;i<nr_marks;i++)
2225
+ model->prob_density_marks[i] = prob_density_marks[i];
2226
+ }
2227
+ }
2228
+
2139
2229
  free(f.alpha);
2140
2230
  }
2141
2231
  else
@@ -2253,6 +2343,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2253
2343
  model->probA=NULL;
2254
2344
  model->probB=NULL;
2255
2345
  }
2346
+ model->prob_density_marks=NULL; // for one-class SVM probabilistic outputs only
2256
2347
 
2257
2348
  int total_sv = 0;
2258
2349
  int *nz_count = Malloc(int,nr_class);
@@ -2630,6 +2721,14 @@ double svm_predict_probability(
2630
2721
  free(pairwise_prob);
2631
2722
  return model->label[prob_max_idx];
2632
2723
  }
2724
+ else if(model->param.svm_type == ONE_CLASS && model->prob_density_marks!=NULL)
2725
+ {
2726
+ double dec_value;
2727
+ double pred_result = svm_predict_values(model,x,&dec_value);
2728
+ prob_estimates[0] = predict_one_class_probability(model,dec_value);
2729
+ prob_estimates[1] = 1-prob_estimates[0];
2730
+ return pred_result;
2731
+ }
2633
2732
  else
2634
2733
  return svm_predict(model, x);
2635
2734
  }
@@ -2703,6 +2802,14 @@ int svm_save_model(const char *model_file_name, const svm_model *model)
2703
2802
  fprintf(fp," %.17g",model->probB[i]);
2704
2803
  fprintf(fp, "\n");
2705
2804
  }
2805
+ if(model->prob_density_marks)
2806
+ {
2807
+ fprintf(fp, "prob_density_marks");
2808
+ int nr_marks=10;
2809
+ for(int i=0;i<nr_marks;i++)
2810
+ fprintf(fp," %.17g",model->prob_density_marks[i]);
2811
+ fprintf(fp, "\n");
2812
+ }
2706
2813
 
2707
2814
  if(model->nSV)
2708
2815
  {
@@ -2857,6 +2964,13 @@ bool read_model_header(FILE *fp, svm_model* model)
2857
2964
  for(int i=0;i<n;i++)
2858
2965
  FSCANF(fp,"%lf",&model->probB[i]);
2859
2966
  }
2967
+ else if(strcmp(cmd,"prob_density_marks")==0)
2968
+ {
2969
+ int n = 10; // nr_marks
2970
+ model->prob_density_marks = Malloc(double,n);
2971
+ for(int i=0;i<n;i++)
2972
+ FSCANF(fp,"%lf",&model->prob_density_marks[i]);
2973
+ }
2860
2974
  else if(strcmp(cmd,"nr_sv")==0)
2861
2975
  {
2862
2976
  int n = model->nr_class;
@@ -2901,6 +3015,7 @@ svm_model *svm_load_model(const char *model_file_name)
2901
3015
  model->rho = NULL;
2902
3016
  model->probA = NULL;
2903
3017
  model->probB = NULL;
3018
+ model->prob_density_marks = NULL;
2904
3019
  model->sv_indices = NULL;
2905
3020
  model->label = NULL;
2906
3021
  model->nSV = NULL;
@@ -3012,13 +3127,16 @@ void svm_free_model_content(svm_model* model_ptr)
3012
3127
  model_ptr->rho = NULL;
3013
3128
 
3014
3129
  free(model_ptr->label);
3015
- model_ptr->label= NULL;
3130
+ model_ptr->label = NULL;
3016
3131
 
3017
3132
  free(model_ptr->probA);
3018
3133
  model_ptr->probA = NULL;
3019
3134
 
3020
3135
  free(model_ptr->probB);
3021
- model_ptr->probB= NULL;
3136
+ model_ptr->probB = NULL;
3137
+
3138
+ free(model_ptr->prob_density_marks);
3139
+ model_ptr->prob_density_marks = NULL;
3022
3140
 
3023
3141
  free(model_ptr->sv_indices);
3024
3142
  model_ptr->sv_indices = NULL;
@@ -3104,10 +3222,6 @@ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *pa
3104
3222
  param->probability != 1)
3105
3223
  return "probability != 0 and probability != 1";
3106
3224
 
3107
- if(param->probability == 1 &&
3108
- svm_type == ONE_CLASS)
3109
- return "one-class SVM probability output not supported yet";
3110
-
3111
3225
 
3112
3226
  // check whether nu-svc is feasible
3113
3227
 
@@ -3167,8 +3281,10 @@ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *pa
3167
3281
 
3168
3282
  int svm_check_probability_model(const svm_model *model)
3169
3283
  {
3170
- return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3171
- model->probA!=NULL && model->probB!=NULL) ||
3284
+ return
3285
+ ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3286
+ model->probA!=NULL && model->probB!=NULL) ||
3287
+ (model->param.svm_type == ONE_CLASS && model->prob_density_marks!=NULL) ||
3172
3288
  ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3173
3289
  model->probA!=NULL);
3174
3290
  }
@@ -1,7 +1,7 @@
1
1
  #ifndef _LIBSVM_H
2
2
  #define _LIBSVM_H
3
3
 
4
- #define LIBSVM_VERSION 324
4
+ #define LIBSVM_VERSION 330
5
5
 
6
6
  #ifdef __cplusplus
7
7
  extern "C" {
@@ -59,6 +59,7 @@ struct svm_model
59
59
  double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
60
60
  double *probA; /* pariwise probability information */
61
61
  double *probB;
62
+ double *prob_density_marks; /* probability information for ONE_CLASS */
62
63
  int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
63
64
 
64
65
  /* for classification only */
@@ -3,6 +3,6 @@
3
3
  module Numo
4
4
  module Libsvm
5
5
  # The version of Numo::Libsvm you are using.
6
- VERSION = '2.0.1'
6
+ VERSION = '2.1.0'
7
7
  end
8
8
  end
data/sig/numo/libsvm.rbs CHANGED
@@ -27,6 +27,7 @@ module Numo
27
27
  rho: Numo::DFloat,
28
28
  probA: Numo::DFloat,
29
29
  probB: Numo::DFloat,
30
+ prob_density_marks: Numo::DFloat,
30
31
  sv_indices: Numo::Int32,
31
32
  label: Numo::Int32,
32
33
  nSV: Numo::Int32,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: numo-libsvm
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-07-18 00:00:00.000000000 Z
11
+ date: 2022-09-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray