liblinear-ruby 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/ext/blasp.h +8 -0
  3. data/ext/daxpy.c +8 -0
  4. data/ext/ddot.c +8 -0
  5. data/ext/dnrm2.c +8 -0
  6. data/ext/dscal.c +8 -0
  7. data/ext/liblinear_wrap.cxx +128 -3
  8. data/ext/linear.cpp +344 -175
  9. data/ext/linear.h +2 -0
  10. data/ext/tron.cpp +14 -8
  11. data/ext/tron.h +2 -1
  12. data/lib/liblinear/version.rb +1 -1
  13. data/{liblinear-1.95 → liblinear-2.1}/COPYRIGHT +1 -1
  14. data/{liblinear-1.95 → liblinear-2.1}/Makefile +1 -1
  15. data/{liblinear-1.95 → liblinear-2.1}/Makefile.win +3 -9
  16. data/{liblinear-1.95 → liblinear-2.1}/README +45 -7
  17. data/{liblinear-1.95 → liblinear-2.1}/blas/Makefile +0 -0
  18. data/{liblinear-1.95 → liblinear-2.1}/blas/blas.h +0 -0
  19. data/{liblinear-1.95 → liblinear-2.1}/blas/blasp.h +0 -0
  20. data/{liblinear-1.95 → liblinear-2.1}/blas/daxpy.c +0 -0
  21. data/{liblinear-1.95 → liblinear-2.1}/blas/ddot.c +0 -0
  22. data/{liblinear-1.95 → liblinear-2.1}/blas/dnrm2.c +0 -0
  23. data/{liblinear-1.95 → liblinear-2.1}/blas/dscal.c +0 -0
  24. data/{liblinear-1.95 → liblinear-2.1}/heart_scale +0 -0
  25. data/{liblinear-1.95 → liblinear-2.1}/linear.cpp +344 -175
  26. data/{liblinear-1.95 → liblinear-2.1}/linear.def +1 -0
  27. data/{liblinear-1.95 → liblinear-2.1}/linear.h +2 -0
  28. data/{liblinear-1.95 → liblinear-2.1}/matlab/Makefile +0 -0
  29. data/{liblinear-1.95 → liblinear-2.1}/matlab/README +12 -2
  30. data/{liblinear-1.95 → liblinear-2.1}/matlab/libsvmread.c +0 -0
  31. data/{liblinear-1.95 → liblinear-2.1}/matlab/libsvmwrite.c +1 -1
  32. data/{liblinear-1.95 → liblinear-2.1}/matlab/linear_model_matlab.c +1 -1
  33. data/{liblinear-1.95 → liblinear-2.1}/matlab/linear_model_matlab.h +0 -0
  34. data/liblinear-2.1/matlab/make.m +22 -0
  35. data/{liblinear-1.95 → liblinear-2.1}/matlab/predict.c +1 -1
  36. data/{liblinear-1.95 → liblinear-2.1}/matlab/train.c +65 -10
  37. data/{liblinear-1.95 → liblinear-2.1}/predict.c +0 -0
  38. data/{liblinear-1.95 → liblinear-2.1}/python/Makefile +0 -0
  39. data/{liblinear-1.95 → liblinear-2.1}/python/README +7 -0
  40. data/{liblinear-1.95 → liblinear-2.1}/python/liblinear.py +27 -8
  41. data/{liblinear-1.95 → liblinear-2.1}/python/liblinearutil.py +16 -2
  42. data/{liblinear-1.95 → liblinear-2.1}/train.c +51 -1
  43. data/{liblinear-1.95 → liblinear-2.1}/tron.cpp +14 -8
  44. data/{liblinear-1.95 → liblinear-2.1}/tron.h +2 -1
  45. data/liblinear-2.1/windows/liblinear.dll +0 -0
  46. data/{liblinear-1.95 → liblinear-2.1}/windows/libsvmread.mexw64 +0 -0
  47. data/{liblinear-1.95 → liblinear-2.1}/windows/libsvmwrite.mexw64 +0 -0
  48. data/liblinear-2.1/windows/predict.exe +0 -0
  49. data/{liblinear-1.95 → liblinear-2.1}/windows/predict.mexw64 +0 -0
  50. data/liblinear-2.1/windows/train.exe +0 -0
  51. data/liblinear-2.1/windows/train.mexw64 +0 -0
  52. data/liblinear-ruby.gemspec +9 -10
  53. metadata +49 -50
  54. data/liblinear-1.95/matlab/make.m +0 -21
  55. data/liblinear-1.95/windows/liblinear.dll +0 -0
  56. data/liblinear-1.95/windows/predict.exe +0 -0
  57. data/liblinear-1.95/windows/train.exe +0 -0
  58. data/liblinear-1.95/windows/train.mexw64 +0 -0
@@ -19,3 +19,4 @@ EXPORTS
19
19
  get_decfun_coef @17
20
20
  get_decfun_bias @18
21
21
  check_regression_model @19
22
+ find_parameter_C @20
@@ -32,6 +32,7 @@ struct parameter
32
32
  int *weight_label;
33
33
  double* weight;
34
34
  double p;
35
+ double *init_sol;
35
36
  };
36
37
 
37
38
  struct model
@@ -46,6 +47,7 @@ struct model
46
47
 
47
48
  struct model* train(const struct problem *prob, const struct parameter *param);
48
49
  void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
50
+ void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate);
49
51
 
50
52
  double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
51
53
  double predict(const struct model *model_, const struct feature_node *x);
@@ -117,7 +117,7 @@ The 'train' function returns a model which can be used for future
117
117
  prediction. It is a structure and is organized as [Parameters, nr_class,
118
118
  nr_feature, bias, Label, w]:
119
119
 
120
- -Parameters: Parameters
120
+ -Parameters: Parameters (now only solver type is provided)
121
121
  -nr_class: number of classes; = 2 for regression
122
122
  -nr_feature: number of features in training data (without including the bias term)
123
123
  -bias: If >= 0, we assume one additional feature is added to the end
@@ -131,7 +131,12 @@ nr_feature, bias, Label, w]:
131
131
 
132
132
  If the '-v' option is specified, cross validation is conducted and the
133
133
  returned model is just a scalar: cross-validation accuracy for
134
- classification and mean-squared error for regression.
134
+ classification and mean-squared error for regression. If the '-C' option
135
+ is specified, the best parameter C is found by cross validation. The
136
+ returned model is a two dimensional vector, where the first value is
137
+ the best C and the second value is the corresponding cross-validation
138
+ accuracy. The parameter selection utility is supported by only -s 0
139
+ and -s 2.
135
140
 
136
141
  Result of Prediction
137
142
  ====================
@@ -184,6 +189,11 @@ For probability estimates, you need '-b 1' only in the testing phase:
184
189
 
185
190
  matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
186
191
 
192
+ Use the best parameter to train (only supported by -s 0 and -s 2):
193
+
194
+ matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
195
+ matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0
196
+
187
197
  Additional Information
188
198
  ======================
189
199
 
@@ -72,7 +72,7 @@ void libsvmwrite(const char *filename, const mxArray *label_vec, const mxArray *
72
72
 
73
73
  low = jc[i], high = jc[i+1];
74
74
  for(k=low;k<high;k++)
75
- fprintf(fp," %zu:%g", (size_t)ir[k]+1, samples[k]);
75
+ fprintf(fp," %lu:%g", (size_t)ir[k]+1, samples[k]);
76
76
 
77
77
  fprintf(fp,"\n");
78
78
  }
@@ -1,6 +1,6 @@
1
1
  #include <stdlib.h>
2
2
  #include <string.h>
3
- #include "../linear.h"
3
+ #include "linear.h"
4
4
 
5
5
  #include "mex.h"
6
6
 
@@ -0,0 +1,22 @@
1
+ % This make.m is for MATLAB and OCTAVE under Windows, Mac, and Unix
2
+ function make()
3
+ try
4
+ % This part is for OCTAVE
5
+ if(exist('OCTAVE_VERSION', 'builtin'))
6
+ mex libsvmread.c
7
+ mex libsvmwrite.c
8
+ mex -I.. train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
9
+ mex -I.. predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
10
+ % This part is for MATLAB
11
+ % Add -largeArrayDims on 64-bit machines of MATLAB
12
+ else
13
+ mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmread.c
14
+ mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmwrite.c
15
+ mex CFLAGS="\$CFLAGS -std=c99" -I.. -largeArrayDims train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
16
+ mex CFLAGS="\$CFLAGS -std=c99" -I.. -largeArrayDims predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
17
+ end
18
+ catch err
19
+ fprintf('Error: %s failed (line %d)\n', err.stack(1).file, err.stack(1).line);
20
+ disp(err.message);
21
+ fprintf('=> Please check README for detailed instructions.\n');
22
+ end
@@ -1,7 +1,7 @@
1
1
  #include <stdio.h>
2
2
  #include <stdlib.h>
3
3
  #include <string.h>
4
- #include "../linear.h"
4
+ #include "linear.h"
5
5
 
6
6
  #include "mex.h"
7
7
  #include "linear_model_matlab.h"
@@ -1,9 +1,8 @@
1
- #include <stdio.h>
2
1
  #include <math.h>
3
2
  #include <stdlib.h>
4
3
  #include <string.h>
5
4
  #include <ctype.h>
6
- #include "../linear.h"
5
+ #include "linear.h"
7
6
 
8
7
  #include "mex.h"
9
8
  #include "linear_model_matlab.h"
@@ -60,6 +59,7 @@ void exit_with_help()
60
59
  "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
61
60
  "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
62
61
  "-v n: n-fold cross validation mode\n"
62
+ "-C : find parameter C (only for -s 0 and 2)\n"
63
63
  "-q : quiet mode (no outputs)\n"
64
64
  "col:\n"
65
65
  " if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
@@ -71,11 +71,28 @@ struct parameter param; // set by parse_command_line
71
71
  struct problem prob; // set by read_problem
72
72
  struct model *model_;
73
73
  struct feature_node *x_space;
74
- int cross_validation_flag;
74
+ int flag_cross_validation;
75
+ int flag_find_C;
76
+ int flag_C_specified;
77
+ int flag_solver_specified;
75
78
  int col_format_flag;
76
79
  int nr_fold;
77
80
  double bias;
78
81
 
82
+
83
+ void do_find_parameter_C(double *best_C, double *best_rate)
84
+ {
85
+ double start_C;
86
+ double max_C = 1024;
87
+ if (flag_C_specified)
88
+ start_C = param.C;
89
+ else
90
+ start_C = -1.0;
91
+ find_parameter_C(&prob, &param, nr_fold, start_C, max_C, best_C, best_rate);
92
+ mexPrintf("Best C = %lf CV accuracy = %g%%\n", *best_C, 100.0**best_rate);
93
+ }
94
+
95
+
79
96
  double do_cross_validation()
80
97
  {
81
98
  int i;
@@ -101,8 +118,8 @@ double do_cross_validation()
101
118
  sumyy += y*y;
102
119
  sumvy += v*y;
103
120
  }
104
- printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
105
- printf("Cross Validation Squared correlation coefficient = %g\n",
121
+ mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
122
+ mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
106
123
  ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
107
124
  ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
108
125
  );
@@ -113,7 +130,7 @@ double do_cross_validation()
113
130
  for(i=0;i<prob.l;i++)
114
131
  if(target[i] == prob.y[i])
115
132
  ++total_correct;
116
- printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
133
+ mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
117
134
  retval = 100.0*total_correct/prob.l;
118
135
  }
119
136
 
@@ -137,8 +154,12 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
137
154
  param.nr_weight = 0;
138
155
  param.weight_label = NULL;
139
156
  param.weight = NULL;
140
- cross_validation_flag = 0;
157
+ param.init_sol = NULL;
158
+ flag_cross_validation = 0;
141
159
  col_format_flag = 0;
160
+ flag_C_specified = 0;
161
+ flag_solver_specified = 0;
162
+ flag_find_C = 0;
142
163
  bias = -1;
143
164
 
144
165
 
@@ -166,15 +187,17 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
166
187
  {
167
188
  if(argv[i][0] != '-') break;
168
189
  ++i;
169
- if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
190
+ if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter
170
191
  return 1;
171
192
  switch(argv[i-1][1])
172
193
  {
173
194
  case 's':
174
195
  param.solver_type = atoi(argv[i]);
196
+ flag_solver_specified = 1;
175
197
  break;
176
198
  case 'c':
177
199
  param.C = atof(argv[i]);
200
+ flag_C_specified = 1;
178
201
  break;
179
202
  case 'p':
180
203
  param.p = atof(argv[i]);
@@ -186,7 +209,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
186
209
  bias = atof(argv[i]);
187
210
  break;
188
211
  case 'v':
189
- cross_validation_flag = 1;
212
+ flag_cross_validation = 1;
190
213
  nr_fold = atoi(argv[i]);
191
214
  if(nr_fold < 2)
192
215
  {
@@ -205,6 +228,10 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
205
228
  print_func = &print_null;
206
229
  i--;
207
230
  break;
231
+ case 'C':
232
+ flag_find_C = 1;
233
+ i--;
234
+ break;
208
235
  default:
209
236
  mexPrintf("unknown option\n");
210
237
  return 1;
@@ -213,6 +240,23 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
213
240
 
214
241
  set_print_string_function(print_func);
215
242
 
243
+ // default solver for parameter selection is L2R_L2LOSS_SVC
244
+ if(flag_find_C)
245
+ {
246
+ if(!flag_cross_validation)
247
+ nr_fold = 5;
248
+ if(!flag_solver_specified)
249
+ {
250
+ mexPrintf("Solver not specified. Using -s 2\n");
251
+ param.solver_type = L2R_L2LOSS_SVC;
252
+ }
253
+ else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
254
+ {
255
+ mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n");
256
+ return 1;
257
+ }
258
+ }
259
+
216
260
  if(param.eps == INF)
217
261
  {
218
262
  switch(param.solver_type)
@@ -406,7 +450,18 @@ void mexFunction( int nlhs, mxArray *plhs[],
406
450
  return;
407
451
  }
408
452
 
409
- if(cross_validation_flag)
453
+ if (flag_find_C)
454
+ {
455
+ double best_C, best_rate, *ptr;
456
+
457
+ do_find_parameter_C(&best_C, &best_rate);
458
+
459
+ plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL);
460
+ ptr = mxGetPr(plhs[0]);
461
+ ptr[0] = best_C;
462
+ ptr[1] = best_rate;
463
+ }
464
+ else if(flag_cross_validation)
410
465
  {
411
466
  double *ptr;
412
467
  plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
@@ -277,6 +277,11 @@ The above command loads
277
277
  structure. If '-v' is specified, cross validation is
278
278
  conducted and the returned model is just a scalar: cross-validation
279
279
  accuracy for classification and mean-squared error for regression.
280
+ If the '-C' option is specified, the best parameter C is found
281
+ by cross validation. The returned model is a tuple of the best C
282
+ and the corresponding cross-validation accuracy. The parameter
283
+ selection utility is supported by only -s 0 and -s 2.
284
+
280
285
 
281
286
  To train the same data many times with different
282
287
  parameters, the second and the third ways should be faster..
@@ -290,6 +295,8 @@ The above command loads
290
295
  >>> m = train(prob, '-w1 5 -c 5')
291
296
  >>> m = train(prob, param)
292
297
  >>> CV_ACC = train(y, x, '-v 3')
298
+ >>> best_C, best_rate = train(y, x, '-C -s 0')
299
+ >>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
293
300
 
294
301
  - Function: predict
295
302
 
@@ -16,7 +16,7 @@ try:
16
16
  if sys.platform == 'win32':
17
17
  liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
18
18
  else:
19
- liblinear = CDLL(path.join(dirname, '../liblinear.so.2'))
19
+ liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
20
20
  except:
21
21
  # For unix the prefix 'lib' is not considered.
22
22
  if find_library('linear'):
@@ -127,8 +127,8 @@ class problem(Structure):
127
127
 
128
128
 
129
129
  class parameter(Structure):
130
- _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
131
- _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
130
+ _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
131
+ _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
132
132
  _fields_ = genFields(_names, _types)
133
133
 
134
134
  def __init__(self, options = None):
@@ -152,10 +152,14 @@ class parameter(Structure):
152
152
  self.C = 1
153
153
  self.p = 0.1
154
154
  self.nr_weight = 0
155
- self.weight_label = (c_int * 0)()
156
- self.weight = (c_double * 0)()
155
+ self.weight_label = None
156
+ self.weight = None
157
+ self.init_sol = None
157
158
  self.bias = -1
158
- self.cross_validation = False
159
+ self.flag_cross_validation = False
160
+ self.flag_C_specified = False
161
+ self.flag_solver_specified = False
162
+ self.flag_find_C = False
159
163
  self.nr_fold = 0
160
164
  self.print_func = cast(None, PRINT_STRING_FUN)
161
165
 
@@ -176,9 +180,11 @@ class parameter(Structure):
176
180
  if argv[i] == "-s":
177
181
  i = i + 1
178
182
  self.solver_type = int(argv[i])
183
+ self.flag_solver_specified = True
179
184
  elif argv[i] == "-c":
180
185
  i = i + 1
181
186
  self.C = float(argv[i])
187
+ self.flag_C_specified = True
182
188
  elif argv[i] == "-p":
183
189
  i = i + 1
184
190
  self.p = float(argv[i])
@@ -190,18 +196,20 @@ class parameter(Structure):
190
196
  self.bias = float(argv[i])
191
197
  elif argv[i] == "-v":
192
198
  i = i + 1
193
- self.cross_validation = 1
199
+ self.flag_cross_validation = 1
194
200
  self.nr_fold = int(argv[i])
195
201
  if self.nr_fold < 2 :
196
202
  raise ValueError("n-fold cross validation: n must >= 2")
197
203
  elif argv[i].startswith("-w"):
198
204
  i = i + 1
199
205
  self.nr_weight += 1
200
- nr_weight = self.nr_weight
201
206
  weight_label += [int(argv[i-1][2:])]
202
207
  weight += [float(argv[i])]
203
208
  elif argv[i] == "-q":
204
209
  self.print_func = PRINT_STRING_FUN(print_null)
210
+ elif argv[i] == "-C":
211
+ self.flag_find_C = True
212
+
205
213
  else :
206
214
  raise ValueError("Wrong options")
207
215
  i += 1
@@ -213,6 +221,16 @@ class parameter(Structure):
213
221
  self.weight[i] = weight[i]
214
222
  self.weight_label[i] = weight_label[i]
215
223
 
224
+ # default solver for parameter selection is L2R_L2LOSS_SVC
225
+ if self.flag_find_C:
226
+ if not self.flag_cross_validation:
227
+ self.nr_fold = 5
228
+ if not self.flag_solver_specified:
229
+ self.solver_type = L2R_L2LOSS_SVC
230
+ self.flag_solver_specified = True
231
+ elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
232
+ raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
233
+
216
234
  if self.eps == float('inf'):
217
235
  if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
218
236
  self.eps = 0.01
@@ -280,6 +298,7 @@ def toPyModel(model_ptr):
280
298
  return m
281
299
 
282
300
  fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
301
+ fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
283
302
  fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
284
303
 
285
304
  fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
@@ -127,7 +127,7 @@ def train(arg1, arg2=None, arg3=None):
127
127
  -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
128
128
  -wi weight: weights adjust the parameter C of different classes (see README for details)
129
129
  -v n: n-fold cross validation mode
130
- -q : quiet mode (no outputs)
130
+ -q : quiet mode (no outputs)
131
131
  """
132
132
  prob, param = None, None
133
133
  if isinstance(arg1, (list, tuple)):
@@ -150,7 +150,21 @@ def train(arg1, arg2=None, arg3=None):
150
150
  if err_msg :
151
151
  raise ValueError('Error: %s' % err_msg)
152
152
 
153
- if param.cross_validation:
153
+ if param.flag_find_C:
154
+ nr_fold = param.nr_fold
155
+ best_C = c_double()
156
+ best_rate = c_double()
157
+ max_C = 1024
158
+ if param.flag_C_specified:
159
+ start_C = param.C
160
+ else:
161
+ start_C = -1.0
162
+ liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
163
+ print("Best C = %lf CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
164
+ return best_C.value,best_rate.value
165
+
166
+
167
+ elif param.flag_cross_validation:
154
168
  l, nr_fold = prob.l, param.nr_fold
155
169
  target = (c_double * l)()
156
170
  liblinear.cross_validation(prob, param, nr_fold, target)
@@ -49,6 +49,7 @@ void exit_with_help()
49
49
  "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
50
50
  "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
51
51
  "-v n: n-fold cross validation mode\n"
52
+ "-C : find parameter C (only for -s 0 and 2)\n"
52
53
  "-q : quiet mode (no outputs)\n"
53
54
  );
54
55
  exit(1);
@@ -84,12 +85,16 @@ static char* readline(FILE *input)
84
85
  void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
85
86
  void read_problem(const char *filename);
86
87
  void do_cross_validation();
88
+ void do_find_parameter_C();
87
89
 
88
90
  struct feature_node *x_space;
89
91
  struct parameter param;
90
92
  struct problem prob;
91
93
  struct model* model_;
92
94
  int flag_cross_validation;
95
+ int flag_find_C;
96
+ int flag_C_specified;
97
+ int flag_solver_specified;
93
98
  int nr_fold;
94
99
  double bias;
95
100
 
@@ -109,7 +114,11 @@ int main(int argc, char **argv)
109
114
  exit(1);
110
115
  }
111
116
 
112
- if(flag_cross_validation)
117
+ if (flag_find_C)
118
+ {
119
+ do_find_parameter_C();
120
+ }
121
+ else if(flag_cross_validation)
113
122
  {
114
123
  do_cross_validation();
115
124
  }
@@ -132,6 +141,19 @@ int main(int argc, char **argv)
132
141
  return 0;
133
142
  }
134
143
 
144
+ void do_find_parameter_C()
145
+ {
146
+ double start_C, best_C, best_rate;
147
+ double max_C = 1024;
148
+ if (flag_C_specified)
149
+ start_C = param.C;
150
+ else
151
+ start_C = -1.0;
152
+ printf("Doing parameter search with %d-fold cross validation.\n", nr_fold);
153
+ find_parameter_C(&prob, &param, nr_fold, start_C, max_C, &best_C, &best_rate);
154
+ printf("Best C = %g CV accuracy = %g%%\n", best_C, 100.0*best_rate);
155
+ }
156
+
135
157
  void do_cross_validation()
136
158
  {
137
159
  int i;
@@ -186,7 +208,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
186
208
  param.nr_weight = 0;
187
209
  param.weight_label = NULL;
188
210
  param.weight = NULL;
211
+ param.init_sol = NULL;
189
212
  flag_cross_validation = 0;
213
+ flag_C_specified = 0;
214
+ flag_solver_specified = 0;
215
+ flag_find_C = 0;
190
216
  bias = -1;
191
217
 
192
218
  // parse options
@@ -199,10 +225,12 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
199
225
  {
200
226
  case 's':
201
227
  param.solver_type = atoi(argv[i]);
228
+ flag_solver_specified = 1;
202
229
  break;
203
230
 
204
231
  case 'c':
205
232
  param.C = atof(argv[i]);
233
+ flag_C_specified = 1;
206
234
  break;
207
235
 
208
236
  case 'p':
@@ -240,6 +268,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
240
268
  i--;
241
269
  break;
242
270
 
271
+ case 'C':
272
+ flag_find_C = 1;
273
+ i--;
274
+ break;
275
+
243
276
  default:
244
277
  fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
245
278
  exit_with_help();
@@ -267,6 +300,23 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
267
300
  sprintf(model_file_name,"%s.model",p);
268
301
  }
269
302
 
303
+ // default solver for parameter selection is L2R_L2LOSS_SVC
304
+ if(flag_find_C)
305
+ {
306
+ if(!flag_cross_validation)
307
+ nr_fold = 5;
308
+ if(!flag_solver_specified)
309
+ {
310
+ fprintf(stderr, "Solver not specified. Using -s 2\n");
311
+ param.solver_type = L2R_L2LOSS_SVC;
312
+ }
313
+ else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
314
+ {
315
+ fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n");
316
+ exit_with_help();
317
+ }
318
+ }
319
+
270
320
  if(param.eps == INF)
271
321
  {
272
322
  switch(param.solver_type)