liblinear-ruby 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/ext/blasp.h +8 -0
  3. data/ext/daxpy.c +8 -0
  4. data/ext/ddot.c +8 -0
  5. data/ext/dnrm2.c +8 -0
  6. data/ext/dscal.c +8 -0
  7. data/ext/liblinear_wrap.cxx +128 -3
  8. data/ext/linear.cpp +344 -175
  9. data/ext/linear.h +2 -0
  10. data/ext/tron.cpp +14 -8
  11. data/ext/tron.h +2 -1
  12. data/lib/liblinear/version.rb +1 -1
  13. data/{liblinear-1.95 → liblinear-2.1}/COPYRIGHT +1 -1
  14. data/{liblinear-1.95 → liblinear-2.1}/Makefile +1 -1
  15. data/{liblinear-1.95 → liblinear-2.1}/Makefile.win +3 -9
  16. data/{liblinear-1.95 → liblinear-2.1}/README +45 -7
  17. data/{liblinear-1.95 → liblinear-2.1}/blas/Makefile +0 -0
  18. data/{liblinear-1.95 → liblinear-2.1}/blas/blas.h +0 -0
  19. data/{liblinear-1.95 → liblinear-2.1}/blas/blasp.h +0 -0
  20. data/{liblinear-1.95 → liblinear-2.1}/blas/daxpy.c +0 -0
  21. data/{liblinear-1.95 → liblinear-2.1}/blas/ddot.c +0 -0
  22. data/{liblinear-1.95 → liblinear-2.1}/blas/dnrm2.c +0 -0
  23. data/{liblinear-1.95 → liblinear-2.1}/blas/dscal.c +0 -0
  24. data/{liblinear-1.95 → liblinear-2.1}/heart_scale +0 -0
  25. data/{liblinear-1.95 → liblinear-2.1}/linear.cpp +344 -175
  26. data/{liblinear-1.95 → liblinear-2.1}/linear.def +1 -0
  27. data/{liblinear-1.95 → liblinear-2.1}/linear.h +2 -0
  28. data/{liblinear-1.95 → liblinear-2.1}/matlab/Makefile +0 -0
  29. data/{liblinear-1.95 → liblinear-2.1}/matlab/README +12 -2
  30. data/{liblinear-1.95 → liblinear-2.1}/matlab/libsvmread.c +0 -0
  31. data/{liblinear-1.95 → liblinear-2.1}/matlab/libsvmwrite.c +1 -1
  32. data/{liblinear-1.95 → liblinear-2.1}/matlab/linear_model_matlab.c +1 -1
  33. data/{liblinear-1.95 → liblinear-2.1}/matlab/linear_model_matlab.h +0 -0
  34. data/liblinear-2.1/matlab/make.m +22 -0
  35. data/{liblinear-1.95 → liblinear-2.1}/matlab/predict.c +1 -1
  36. data/{liblinear-1.95 → liblinear-2.1}/matlab/train.c +65 -10
  37. data/{liblinear-1.95 → liblinear-2.1}/predict.c +0 -0
  38. data/{liblinear-1.95 → liblinear-2.1}/python/Makefile +0 -0
  39. data/{liblinear-1.95 → liblinear-2.1}/python/README +7 -0
  40. data/{liblinear-1.95 → liblinear-2.1}/python/liblinear.py +27 -8
  41. data/{liblinear-1.95 → liblinear-2.1}/python/liblinearutil.py +16 -2
  42. data/{liblinear-1.95 → liblinear-2.1}/train.c +51 -1
  43. data/{liblinear-1.95 → liblinear-2.1}/tron.cpp +14 -8
  44. data/{liblinear-1.95 → liblinear-2.1}/tron.h +2 -1
  45. data/liblinear-2.1/windows/liblinear.dll +0 -0
  46. data/{liblinear-1.95 → liblinear-2.1}/windows/libsvmread.mexw64 +0 -0
  47. data/{liblinear-1.95 → liblinear-2.1}/windows/libsvmwrite.mexw64 +0 -0
  48. data/liblinear-2.1/windows/predict.exe +0 -0
  49. data/{liblinear-1.95 → liblinear-2.1}/windows/predict.mexw64 +0 -0
  50. data/liblinear-2.1/windows/train.exe +0 -0
  51. data/liblinear-2.1/windows/train.mexw64 +0 -0
  52. data/liblinear-ruby.gemspec +9 -10
  53. metadata +49 -50
  54. data/liblinear-1.95/matlab/make.m +0 -21
  55. data/liblinear-1.95/windows/liblinear.dll +0 -0
  56. data/liblinear-1.95/windows/predict.exe +0 -0
  57. data/liblinear-1.95/windows/train.exe +0 -0
  58. data/liblinear-1.95/windows/train.mexw64 +0 -0
@@ -19,3 +19,4 @@ EXPORTS
19
19
  get_decfun_coef @17
20
20
  get_decfun_bias @18
21
21
  check_regression_model @19
22
+ find_parameter_C @20
@@ -32,6 +32,7 @@ struct parameter
32
32
  int *weight_label;
33
33
  double* weight;
34
34
  double p;
35
+ double *init_sol;
35
36
  };
36
37
 
37
38
  struct model
@@ -46,6 +47,7 @@ struct model
46
47
 
47
48
  struct model* train(const struct problem *prob, const struct parameter *param);
48
49
  void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
50
+ void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate);
49
51
 
50
52
  double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
51
53
  double predict(const struct model *model_, const struct feature_node *x);
@@ -117,7 +117,7 @@ The 'train' function returns a model which can be used for future
117
117
  prediction. It is a structure and is organized as [Parameters, nr_class,
118
118
  nr_feature, bias, Label, w]:
119
119
 
120
- -Parameters: Parameters
120
+ -Parameters: Parameters (now only solver type is provided)
121
121
  -nr_class: number of classes; = 2 for regression
122
122
  -nr_feature: number of features in training data (without including the bias term)
123
123
  -bias: If >= 0, we assume one additional feature is added to the end
@@ -131,7 +131,12 @@ nr_feature, bias, Label, w]:
131
131
 
132
132
  If the '-v' option is specified, cross validation is conducted and the
133
133
  returned model is just a scalar: cross-validation accuracy for
134
- classification and mean-squared error for regression.
134
+ classification and mean-squared error for regression. If the '-C' option
135
+ is specified, the best parameter C is found by cross validation. The
136
+ returned model is a two dimensional vector, where the first value is
137
+ the best C and the second value is the corresponding cross-validation
138
+ accuracy. The parameter selection utility is supported by only -s 0
139
+ and -s 2.
135
140
 
136
141
  Result of Prediction
137
142
  ====================
@@ -184,6 +189,11 @@ For probability estimates, you need '-b 1' only in the testing phase:
184
189
 
185
190
  matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
186
191
 
192
+ Use the best parameter to train (only supported by -s 0 and -s 2):
193
+
194
+ matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
195
+ matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0
196
+
187
197
  Additional Information
188
198
  ======================
189
199
 
@@ -72,7 +72,7 @@ void libsvmwrite(const char *filename, const mxArray *label_vec, const mxArray *
72
72
 
73
73
  low = jc[i], high = jc[i+1];
74
74
  for(k=low;k<high;k++)
75
- fprintf(fp," %zu:%g", (size_t)ir[k]+1, samples[k]);
75
+ fprintf(fp," %lu:%g", (size_t)ir[k]+1, samples[k]);
76
76
 
77
77
  fprintf(fp,"\n");
78
78
  }
@@ -1,6 +1,6 @@
1
1
  #include <stdlib.h>
2
2
  #include <string.h>
3
- #include "../linear.h"
3
+ #include "linear.h"
4
4
 
5
5
  #include "mex.h"
6
6
 
@@ -0,0 +1,22 @@
1
+ % This make.m is for MATLAB and OCTAVE under Windows, Mac, and Unix
2
+ function make()
3
+ try
4
+ % This part is for OCTAVE
5
+ if(exist('OCTAVE_VERSION', 'builtin'))
6
+ mex libsvmread.c
7
+ mex libsvmwrite.c
8
+ mex -I.. train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
9
+ mex -I.. predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
10
+ % This part is for MATLAB
11
+ % Add -largeArrayDims on 64-bit machines of MATLAB
12
+ else
13
+ mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmread.c
14
+ mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmwrite.c
15
+ mex CFLAGS="\$CFLAGS -std=c99" -I.. -largeArrayDims train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
16
+ mex CFLAGS="\$CFLAGS -std=c99" -I.. -largeArrayDims predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
17
+ end
18
+ catch err
19
+ fprintf('Error: %s failed (line %d)\n', err.stack(1).file, err.stack(1).line);
20
+ disp(err.message);
21
+ fprintf('=> Please check README for detailed instructions.\n');
22
+ end
@@ -1,7 +1,7 @@
1
1
  #include <stdio.h>
2
2
  #include <stdlib.h>
3
3
  #include <string.h>
4
- #include "../linear.h"
4
+ #include "linear.h"
5
5
 
6
6
  #include "mex.h"
7
7
  #include "linear_model_matlab.h"
@@ -1,9 +1,8 @@
1
- #include <stdio.h>
2
1
  #include <math.h>
3
2
  #include <stdlib.h>
4
3
  #include <string.h>
5
4
  #include <ctype.h>
6
- #include "../linear.h"
5
+ #include "linear.h"
7
6
 
8
7
  #include "mex.h"
9
8
  #include "linear_model_matlab.h"
@@ -60,6 +59,7 @@ void exit_with_help()
60
59
  "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
61
60
  "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
62
61
  "-v n: n-fold cross validation mode\n"
62
+ "-C : find parameter C (only for -s 0 and 2)\n"
63
63
  "-q : quiet mode (no outputs)\n"
64
64
  "col:\n"
65
65
  " if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
@@ -71,11 +71,28 @@ struct parameter param; // set by parse_command_line
71
71
  struct problem prob; // set by read_problem
72
72
  struct model *model_;
73
73
  struct feature_node *x_space;
74
- int cross_validation_flag;
74
+ int flag_cross_validation;
75
+ int flag_find_C;
76
+ int flag_C_specified;
77
+ int flag_solver_specified;
75
78
  int col_format_flag;
76
79
  int nr_fold;
77
80
  double bias;
78
81
 
82
+
83
+ void do_find_parameter_C(double *best_C, double *best_rate)
84
+ {
85
+ double start_C;
86
+ double max_C = 1024;
87
+ if (flag_C_specified)
88
+ start_C = param.C;
89
+ else
90
+ start_C = -1.0;
91
+ find_parameter_C(&prob, &param, nr_fold, start_C, max_C, best_C, best_rate);
92
+ mexPrintf("Best C = %lf CV accuracy = %g%%\n", *best_C, 100.0**best_rate);
93
+ }
94
+
95
+
79
96
  double do_cross_validation()
80
97
  {
81
98
  int i;
@@ -101,8 +118,8 @@ double do_cross_validation()
101
118
  sumyy += y*y;
102
119
  sumvy += v*y;
103
120
  }
104
- printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
105
- printf("Cross Validation Squared correlation coefficient = %g\n",
121
+ mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
122
+ mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
106
123
  ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
107
124
  ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
108
125
  );
@@ -113,7 +130,7 @@ double do_cross_validation()
113
130
  for(i=0;i<prob.l;i++)
114
131
  if(target[i] == prob.y[i])
115
132
  ++total_correct;
116
- printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
133
+ mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
117
134
  retval = 100.0*total_correct/prob.l;
118
135
  }
119
136
 
@@ -137,8 +154,12 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
137
154
  param.nr_weight = 0;
138
155
  param.weight_label = NULL;
139
156
  param.weight = NULL;
140
- cross_validation_flag = 0;
157
+ param.init_sol = NULL;
158
+ flag_cross_validation = 0;
141
159
  col_format_flag = 0;
160
+ flag_C_specified = 0;
161
+ flag_solver_specified = 0;
162
+ flag_find_C = 0;
142
163
  bias = -1;
143
164
 
144
165
 
@@ -166,15 +187,17 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
166
187
  {
167
188
  if(argv[i][0] != '-') break;
168
189
  ++i;
169
- if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
190
+ if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter
170
191
  return 1;
171
192
  switch(argv[i-1][1])
172
193
  {
173
194
  case 's':
174
195
  param.solver_type = atoi(argv[i]);
196
+ flag_solver_specified = 1;
175
197
  break;
176
198
  case 'c':
177
199
  param.C = atof(argv[i]);
200
+ flag_C_specified = 1;
178
201
  break;
179
202
  case 'p':
180
203
  param.p = atof(argv[i]);
@@ -186,7 +209,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
186
209
  bias = atof(argv[i]);
187
210
  break;
188
211
  case 'v':
189
- cross_validation_flag = 1;
212
+ flag_cross_validation = 1;
190
213
  nr_fold = atoi(argv[i]);
191
214
  if(nr_fold < 2)
192
215
  {
@@ -205,6 +228,10 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
205
228
  print_func = &print_null;
206
229
  i--;
207
230
  break;
231
+ case 'C':
232
+ flag_find_C = 1;
233
+ i--;
234
+ break;
208
235
  default:
209
236
  mexPrintf("unknown option\n");
210
237
  return 1;
@@ -213,6 +240,23 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
213
240
 
214
241
  set_print_string_function(print_func);
215
242
 
243
+ // default solver for parameter selection is L2R_L2LOSS_SVC
244
+ if(flag_find_C)
245
+ {
246
+ if(!flag_cross_validation)
247
+ nr_fold = 5;
248
+ if(!flag_solver_specified)
249
+ {
250
+ mexPrintf("Solver not specified. Using -s 2\n");
251
+ param.solver_type = L2R_L2LOSS_SVC;
252
+ }
253
+ else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
254
+ {
255
+ mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n");
256
+ return 1;
257
+ }
258
+ }
259
+
216
260
  if(param.eps == INF)
217
261
  {
218
262
  switch(param.solver_type)
@@ -406,7 +450,18 @@ void mexFunction( int nlhs, mxArray *plhs[],
406
450
  return;
407
451
  }
408
452
 
409
- if(cross_validation_flag)
453
+ if (flag_find_C)
454
+ {
455
+ double best_C, best_rate, *ptr;
456
+
457
+ do_find_parameter_C(&best_C, &best_rate);
458
+
459
+ plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL);
460
+ ptr = mxGetPr(plhs[0]);
461
+ ptr[0] = best_C;
462
+ ptr[1] = best_rate;
463
+ }
464
+ else if(flag_cross_validation)
410
465
  {
411
466
  double *ptr;
412
467
  plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
@@ -277,6 +277,11 @@ The above command loads
277
277
  structure. If '-v' is specified, cross validation is
278
278
  conducted and the returned model is just a scalar: cross-validation
279
279
  accuracy for classification and mean-squared error for regression.
280
+ If the '-C' option is specified, the best parameter C is found
281
+ by cross validation. The returned model is a tuple of the best C
282
+ and the corresponding cross-validation accuracy. The parameter
283
+ selection utility is supported by only -s 0 and -s 2.
284
+
280
285
 
281
286
  To train the same data many times with different
282
287
  parameters, the second and the third ways should be faster..
@@ -290,6 +295,8 @@ The above command loads
290
295
  >>> m = train(prob, '-w1 5 -c 5')
291
296
  >>> m = train(prob, param)
292
297
  >>> CV_ACC = train(y, x, '-v 3')
298
+ >>> best_C, best_rate = train(y, x, '-C -s 0')
299
+ >>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
293
300
 
294
301
  - Function: predict
295
302
 
@@ -16,7 +16,7 @@ try:
16
16
  if sys.platform == 'win32':
17
17
  liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
18
18
  else:
19
- liblinear = CDLL(path.join(dirname, '../liblinear.so.2'))
19
+ liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
20
20
  except:
21
21
  # For unix the prefix 'lib' is not considered.
22
22
  if find_library('linear'):
@@ -127,8 +127,8 @@ class problem(Structure):
127
127
 
128
128
 
129
129
  class parameter(Structure):
130
- _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
131
- _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
130
+ _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
131
+ _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
132
132
  _fields_ = genFields(_names, _types)
133
133
 
134
134
  def __init__(self, options = None):
@@ -152,10 +152,14 @@ class parameter(Structure):
152
152
  self.C = 1
153
153
  self.p = 0.1
154
154
  self.nr_weight = 0
155
- self.weight_label = (c_int * 0)()
156
- self.weight = (c_double * 0)()
155
+ self.weight_label = None
156
+ self.weight = None
157
+ self.init_sol = None
157
158
  self.bias = -1
158
- self.cross_validation = False
159
+ self.flag_cross_validation = False
160
+ self.flag_C_specified = False
161
+ self.flag_solver_specified = False
162
+ self.flag_find_C = False
159
163
  self.nr_fold = 0
160
164
  self.print_func = cast(None, PRINT_STRING_FUN)
161
165
 
@@ -176,9 +180,11 @@ class parameter(Structure):
176
180
  if argv[i] == "-s":
177
181
  i = i + 1
178
182
  self.solver_type = int(argv[i])
183
+ self.flag_solver_specified = True
179
184
  elif argv[i] == "-c":
180
185
  i = i + 1
181
186
  self.C = float(argv[i])
187
+ self.flag_C_specified = True
182
188
  elif argv[i] == "-p":
183
189
  i = i + 1
184
190
  self.p = float(argv[i])
@@ -190,18 +196,20 @@ class parameter(Structure):
190
196
  self.bias = float(argv[i])
191
197
  elif argv[i] == "-v":
192
198
  i = i + 1
193
- self.cross_validation = 1
199
+ self.flag_cross_validation = 1
194
200
  self.nr_fold = int(argv[i])
195
201
  if self.nr_fold < 2 :
196
202
  raise ValueError("n-fold cross validation: n must >= 2")
197
203
  elif argv[i].startswith("-w"):
198
204
  i = i + 1
199
205
  self.nr_weight += 1
200
- nr_weight = self.nr_weight
201
206
  weight_label += [int(argv[i-1][2:])]
202
207
  weight += [float(argv[i])]
203
208
  elif argv[i] == "-q":
204
209
  self.print_func = PRINT_STRING_FUN(print_null)
210
+ elif argv[i] == "-C":
211
+ self.flag_find_C = True
212
+
205
213
  else :
206
214
  raise ValueError("Wrong options")
207
215
  i += 1
@@ -213,6 +221,16 @@ class parameter(Structure):
213
221
  self.weight[i] = weight[i]
214
222
  self.weight_label[i] = weight_label[i]
215
223
 
224
+ # default solver for parameter selection is L2R_L2LOSS_SVC
225
+ if self.flag_find_C:
226
+ if not self.flag_cross_validation:
227
+ self.nr_fold = 5
228
+ if not self.flag_solver_specified:
229
+ self.solver_type = L2R_L2LOSS_SVC
230
+ self.flag_solver_specified = True
231
+ elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
232
+ raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
233
+
216
234
  if self.eps == float('inf'):
217
235
  if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
218
236
  self.eps = 0.01
@@ -280,6 +298,7 @@ def toPyModel(model_ptr):
280
298
  return m
281
299
 
282
300
  fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
301
+ fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
283
302
  fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
284
303
 
285
304
  fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
@@ -127,7 +127,7 @@ def train(arg1, arg2=None, arg3=None):
127
127
  -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
128
128
  -wi weight: weights adjust the parameter C of different classes (see README for details)
129
129
  -v n: n-fold cross validation mode
130
- -q : quiet mode (no outputs)
130
+ -q : quiet mode (no outputs)
131
131
  """
132
132
  prob, param = None, None
133
133
  if isinstance(arg1, (list, tuple)):
@@ -150,7 +150,21 @@ def train(arg1, arg2=None, arg3=None):
150
150
  if err_msg :
151
151
  raise ValueError('Error: %s' % err_msg)
152
152
 
153
- if param.cross_validation:
153
+ if param.flag_find_C:
154
+ nr_fold = param.nr_fold
155
+ best_C = c_double()
156
+ best_rate = c_double()
157
+ max_C = 1024
158
+ if param.flag_C_specified:
159
+ start_C = param.C
160
+ else:
161
+ start_C = -1.0
162
+ liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
163
+ print("Best C = %lf CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
164
+ return best_C.value,best_rate.value
165
+
166
+
167
+ elif param.flag_cross_validation:
154
168
  l, nr_fold = prob.l, param.nr_fold
155
169
  target = (c_double * l)()
156
170
  liblinear.cross_validation(prob, param, nr_fold, target)
@@ -49,6 +49,7 @@ void exit_with_help()
49
49
  "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
50
50
  "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
51
51
  "-v n: n-fold cross validation mode\n"
52
+ "-C : find parameter C (only for -s 0 and 2)\n"
52
53
  "-q : quiet mode (no outputs)\n"
53
54
  );
54
55
  exit(1);
@@ -84,12 +85,16 @@ static char* readline(FILE *input)
84
85
  void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
85
86
  void read_problem(const char *filename);
86
87
  void do_cross_validation();
88
+ void do_find_parameter_C();
87
89
 
88
90
  struct feature_node *x_space;
89
91
  struct parameter param;
90
92
  struct problem prob;
91
93
  struct model* model_;
92
94
  int flag_cross_validation;
95
+ int flag_find_C;
96
+ int flag_C_specified;
97
+ int flag_solver_specified;
93
98
  int nr_fold;
94
99
  double bias;
95
100
 
@@ -109,7 +114,11 @@ int main(int argc, char **argv)
109
114
  exit(1);
110
115
  }
111
116
 
112
- if(flag_cross_validation)
117
+ if (flag_find_C)
118
+ {
119
+ do_find_parameter_C();
120
+ }
121
+ else if(flag_cross_validation)
113
122
  {
114
123
  do_cross_validation();
115
124
  }
@@ -132,6 +141,19 @@ int main(int argc, char **argv)
132
141
  return 0;
133
142
  }
134
143
 
144
+ void do_find_parameter_C()
145
+ {
146
+ double start_C, best_C, best_rate;
147
+ double max_C = 1024;
148
+ if (flag_C_specified)
149
+ start_C = param.C;
150
+ else
151
+ start_C = -1.0;
152
+ printf("Doing parameter search with %d-fold cross validation.\n", nr_fold);
153
+ find_parameter_C(&prob, &param, nr_fold, start_C, max_C, &best_C, &best_rate);
154
+ printf("Best C = %g CV accuracy = %g%%\n", best_C, 100.0*best_rate);
155
+ }
156
+
135
157
  void do_cross_validation()
136
158
  {
137
159
  int i;
@@ -186,7 +208,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
186
208
  param.nr_weight = 0;
187
209
  param.weight_label = NULL;
188
210
  param.weight = NULL;
211
+ param.init_sol = NULL;
189
212
  flag_cross_validation = 0;
213
+ flag_C_specified = 0;
214
+ flag_solver_specified = 0;
215
+ flag_find_C = 0;
190
216
  bias = -1;
191
217
 
192
218
  // parse options
@@ -199,10 +225,12 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
199
225
  {
200
226
  case 's':
201
227
  param.solver_type = atoi(argv[i]);
228
+ flag_solver_specified = 1;
202
229
  break;
203
230
 
204
231
  case 'c':
205
232
  param.C = atof(argv[i]);
233
+ flag_C_specified = 1;
206
234
  break;
207
235
 
208
236
  case 'p':
@@ -240,6 +268,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
240
268
  i--;
241
269
  break;
242
270
 
271
+ case 'C':
272
+ flag_find_C = 1;
273
+ i--;
274
+ break;
275
+
243
276
  default:
244
277
  fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
245
278
  exit_with_help();
@@ -267,6 +300,23 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
267
300
  sprintf(model_file_name,"%s.model",p);
268
301
  }
269
302
 
303
+ // default solver for parameter selection is L2R_L2LOSS_SVC
304
+ if(flag_find_C)
305
+ {
306
+ if(!flag_cross_validation)
307
+ nr_fold = 5;
308
+ if(!flag_solver_specified)
309
+ {
310
+ fprintf(stderr, "Solver not specified. Using -s 2\n");
311
+ param.solver_type = L2R_L2LOSS_SVC;
312
+ }
313
+ else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
314
+ {
315
+ fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n");
316
+ exit_with_help();
317
+ }
318
+ }
319
+
270
320
  if(param.eps == INF)
271
321
  {
272
322
  switch(param.solver_type)