liblinear-ruby 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/blasp.h +8 -0
- data/ext/daxpy.c +8 -0
- data/ext/ddot.c +8 -0
- data/ext/dnrm2.c +8 -0
- data/ext/dscal.c +8 -0
- data/ext/liblinear_wrap.cxx +128 -3
- data/ext/linear.cpp +344 -175
- data/ext/linear.h +2 -0
- data/ext/tron.cpp +14 -8
- data/ext/tron.h +2 -1
- data/lib/liblinear/version.rb +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/COPYRIGHT +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/Makefile +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/Makefile.win +3 -9
- data/{liblinear-1.95 → liblinear-2.1}/README +45 -7
- data/{liblinear-1.95 → liblinear-2.1}/blas/Makefile +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/blas.h +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/blasp.h +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/daxpy.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/ddot.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/dnrm2.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/dscal.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/heart_scale +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/linear.cpp +344 -175
- data/{liblinear-1.95 → liblinear-2.1}/linear.def +1 -0
- data/{liblinear-1.95 → liblinear-2.1}/linear.h +2 -0
- data/{liblinear-1.95 → liblinear-2.1}/matlab/Makefile +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/matlab/README +12 -2
- data/{liblinear-1.95 → liblinear-2.1}/matlab/libsvmread.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/matlab/libsvmwrite.c +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/matlab/linear_model_matlab.c +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/matlab/linear_model_matlab.h +0 -0
- data/liblinear-2.1/matlab/make.m +22 -0
- data/{liblinear-1.95 → liblinear-2.1}/matlab/predict.c +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/matlab/train.c +65 -10
- data/{liblinear-1.95 → liblinear-2.1}/predict.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/python/Makefile +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/python/README +7 -0
- data/{liblinear-1.95 → liblinear-2.1}/python/liblinear.py +27 -8
- data/{liblinear-1.95 → liblinear-2.1}/python/liblinearutil.py +16 -2
- data/{liblinear-1.95 → liblinear-2.1}/train.c +51 -1
- data/{liblinear-1.95 → liblinear-2.1}/tron.cpp +14 -8
- data/{liblinear-1.95 → liblinear-2.1}/tron.h +2 -1
- data/liblinear-2.1/windows/liblinear.dll +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/windows/libsvmread.mexw64 +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/windows/libsvmwrite.mexw64 +0 -0
- data/liblinear-2.1/windows/predict.exe +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/windows/predict.mexw64 +0 -0
- data/liblinear-2.1/windows/train.exe +0 -0
- data/liblinear-2.1/windows/train.mexw64 +0 -0
- data/liblinear-ruby.gemspec +9 -10
- metadata +49 -50
- data/liblinear-1.95/matlab/make.m +0 -21
- data/liblinear-1.95/windows/liblinear.dll +0 -0
- data/liblinear-1.95/windows/predict.exe +0 -0
- data/liblinear-1.95/windows/train.exe +0 -0
- data/liblinear-1.95/windows/train.mexw64 +0 -0
@@ -32,6 +32,7 @@ struct parameter
|
|
32
32
|
int *weight_label;
|
33
33
|
double* weight;
|
34
34
|
double p;
|
35
|
+
double *init_sol;
|
35
36
|
};
|
36
37
|
|
37
38
|
struct model
|
@@ -46,6 +47,7 @@ struct model
|
|
46
47
|
|
47
48
|
struct model* train(const struct problem *prob, const struct parameter *param);
|
48
49
|
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
|
50
|
+
void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate);
|
49
51
|
|
50
52
|
double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
|
51
53
|
double predict(const struct model *model_, const struct feature_node *x);
|
File without changes
|
@@ -117,7 +117,7 @@ The 'train' function returns a model which can be used for future
|
|
117
117
|
prediction. It is a structure and is organized as [Parameters, nr_class,
|
118
118
|
nr_feature, bias, Label, w]:
|
119
119
|
|
120
|
-
-Parameters: Parameters
|
120
|
+
-Parameters: Parameters (now only solver type is provided)
|
121
121
|
-nr_class: number of classes; = 2 for regression
|
122
122
|
-nr_feature: number of features in training data (without including the bias term)
|
123
123
|
-bias: If >= 0, we assume one additional feature is added to the end
|
@@ -131,7 +131,12 @@ nr_feature, bias, Label, w]:
|
|
131
131
|
|
132
132
|
If the '-v' option is specified, cross validation is conducted and the
|
133
133
|
returned model is just a scalar: cross-validation accuracy for
|
134
|
-
classification and mean-squared error for regression.
|
134
|
+
classification and mean-squared error for regression. If the '-C' option
|
135
|
+
is specified, the best parameter C is found by cross validation. The
|
136
|
+
returned model is a two dimensional vector, where the first value is
|
137
|
+
the best C and the second value is the corresponding cross-validation
|
138
|
+
accuracy. The parameter selection utility is supported by only -s 0
|
139
|
+
and -s 2.
|
135
140
|
|
136
141
|
Result of Prediction
|
137
142
|
====================
|
@@ -184,6 +189,11 @@ For probability estimates, you need '-b 1' only in the testing phase:
|
|
184
189
|
|
185
190
|
matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
|
186
191
|
|
192
|
+
Use the best parameter to train (only supported by -s 0 and -s 2):
|
193
|
+
|
194
|
+
matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
|
195
|
+
matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0
|
196
|
+
|
187
197
|
Additional Information
|
188
198
|
======================
|
189
199
|
|
File without changes
|
@@ -72,7 +72,7 @@ void libsvmwrite(const char *filename, const mxArray *label_vec, const mxArray *
|
|
72
72
|
|
73
73
|
low = jc[i], high = jc[i+1];
|
74
74
|
for(k=low;k<high;k++)
|
75
|
-
fprintf(fp," %
|
75
|
+
fprintf(fp," %lu:%g", (size_t)ir[k]+1, samples[k]);
|
76
76
|
|
77
77
|
fprintf(fp,"\n");
|
78
78
|
}
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
% This make.m is for MATLAB and OCTAVE under Windows, Mac, and Unix
|
2
|
+
function make()
|
3
|
+
try
|
4
|
+
% This part is for OCTAVE
|
5
|
+
if(exist('OCTAVE_VERSION', 'builtin'))
|
6
|
+
mex libsvmread.c
|
7
|
+
mex libsvmwrite.c
|
8
|
+
mex -I.. train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
|
9
|
+
mex -I.. predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
|
10
|
+
% This part is for MATLAB
|
11
|
+
% Add -largeArrayDims on 64-bit machines of MATLAB
|
12
|
+
else
|
13
|
+
mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmread.c
|
14
|
+
mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmwrite.c
|
15
|
+
mex CFLAGS="\$CFLAGS -std=c99" -I.. -largeArrayDims train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
|
16
|
+
mex CFLAGS="\$CFLAGS -std=c99" -I.. -largeArrayDims predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
|
17
|
+
end
|
18
|
+
catch err
|
19
|
+
fprintf('Error: %s failed (line %d)\n', err.stack(1).file, err.stack(1).line);
|
20
|
+
disp(err.message);
|
21
|
+
fprintf('=> Please check README for detailed instructions.\n');
|
22
|
+
end
|
@@ -1,9 +1,8 @@
|
|
1
|
-
#include <stdio.h>
|
2
1
|
#include <math.h>
|
3
2
|
#include <stdlib.h>
|
4
3
|
#include <string.h>
|
5
4
|
#include <ctype.h>
|
6
|
-
#include "
|
5
|
+
#include "linear.h"
|
7
6
|
|
8
7
|
#include "mex.h"
|
9
8
|
#include "linear_model_matlab.h"
|
@@ -60,6 +59,7 @@ void exit_with_help()
|
|
60
59
|
"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
|
61
60
|
"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
|
62
61
|
"-v n: n-fold cross validation mode\n"
|
62
|
+
"-C : find parameter C (only for -s 0 and 2)\n"
|
63
63
|
"-q : quiet mode (no outputs)\n"
|
64
64
|
"col:\n"
|
65
65
|
" if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
|
@@ -71,11 +71,28 @@ struct parameter param; // set by parse_command_line
|
|
71
71
|
struct problem prob; // set by read_problem
|
72
72
|
struct model *model_;
|
73
73
|
struct feature_node *x_space;
|
74
|
-
int
|
74
|
+
int flag_cross_validation;
|
75
|
+
int flag_find_C;
|
76
|
+
int flag_C_specified;
|
77
|
+
int flag_solver_specified;
|
75
78
|
int col_format_flag;
|
76
79
|
int nr_fold;
|
77
80
|
double bias;
|
78
81
|
|
82
|
+
|
83
|
+
void do_find_parameter_C(double *best_C, double *best_rate)
|
84
|
+
{
|
85
|
+
double start_C;
|
86
|
+
double max_C = 1024;
|
87
|
+
if (flag_C_specified)
|
88
|
+
start_C = param.C;
|
89
|
+
else
|
90
|
+
start_C = -1.0;
|
91
|
+
find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, best_C, best_rate);
|
92
|
+
mexPrintf("Best C = %lf CV accuracy = %g%%\n", *best_C, 100.0**best_rate);
|
93
|
+
}
|
94
|
+
|
95
|
+
|
79
96
|
double do_cross_validation()
|
80
97
|
{
|
81
98
|
int i;
|
@@ -101,8 +118,8 @@ double do_cross_validation()
|
|
101
118
|
sumyy += y*y;
|
102
119
|
sumvy += v*y;
|
103
120
|
}
|
104
|
-
|
105
|
-
|
121
|
+
mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
|
122
|
+
mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
|
106
123
|
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
|
107
124
|
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
|
108
125
|
);
|
@@ -113,7 +130,7 @@ double do_cross_validation()
|
|
113
130
|
for(i=0;i<prob.l;i++)
|
114
131
|
if(target[i] == prob.y[i])
|
115
132
|
++total_correct;
|
116
|
-
|
133
|
+
mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
|
117
134
|
retval = 100.0*total_correct/prob.l;
|
118
135
|
}
|
119
136
|
|
@@ -137,8 +154,12 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
137
154
|
param.nr_weight = 0;
|
138
155
|
param.weight_label = NULL;
|
139
156
|
param.weight = NULL;
|
140
|
-
|
157
|
+
param.init_sol = NULL;
|
158
|
+
flag_cross_validation = 0;
|
141
159
|
col_format_flag = 0;
|
160
|
+
flag_C_specified = 0;
|
161
|
+
flag_solver_specified = 0;
|
162
|
+
flag_find_C = 0;
|
142
163
|
bias = -1;
|
143
164
|
|
144
165
|
|
@@ -166,15 +187,17 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
166
187
|
{
|
167
188
|
if(argv[i][0] != '-') break;
|
168
189
|
++i;
|
169
|
-
if(i>=argc && argv[i-1][1] != 'q') // since
|
190
|
+
if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter
|
170
191
|
return 1;
|
171
192
|
switch(argv[i-1][1])
|
172
193
|
{
|
173
194
|
case 's':
|
174
195
|
param.solver_type = atoi(argv[i]);
|
196
|
+
flag_solver_specified = 1;
|
175
197
|
break;
|
176
198
|
case 'c':
|
177
199
|
param.C = atof(argv[i]);
|
200
|
+
flag_C_specified = 1;
|
178
201
|
break;
|
179
202
|
case 'p':
|
180
203
|
param.p = atof(argv[i]);
|
@@ -186,7 +209,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
186
209
|
bias = atof(argv[i]);
|
187
210
|
break;
|
188
211
|
case 'v':
|
189
|
-
|
212
|
+
flag_cross_validation = 1;
|
190
213
|
nr_fold = atoi(argv[i]);
|
191
214
|
if(nr_fold < 2)
|
192
215
|
{
|
@@ -205,6 +228,10 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
205
228
|
print_func = &print_null;
|
206
229
|
i--;
|
207
230
|
break;
|
231
|
+
case 'C':
|
232
|
+
flag_find_C = 1;
|
233
|
+
i--;
|
234
|
+
break;
|
208
235
|
default:
|
209
236
|
mexPrintf("unknown option\n");
|
210
237
|
return 1;
|
@@ -213,6 +240,23 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
213
240
|
|
214
241
|
set_print_string_function(print_func);
|
215
242
|
|
243
|
+
// default solver for parameter selection is L2R_L2LOSS_SVC
|
244
|
+
if(flag_find_C)
|
245
|
+
{
|
246
|
+
if(!flag_cross_validation)
|
247
|
+
nr_fold = 5;
|
248
|
+
if(!flag_solver_specified)
|
249
|
+
{
|
250
|
+
mexPrintf("Solver not specified. Using -s 2\n");
|
251
|
+
param.solver_type = L2R_L2LOSS_SVC;
|
252
|
+
}
|
253
|
+
else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
|
254
|
+
{
|
255
|
+
mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n");
|
256
|
+
return 1;
|
257
|
+
}
|
258
|
+
}
|
259
|
+
|
216
260
|
if(param.eps == INF)
|
217
261
|
{
|
218
262
|
switch(param.solver_type)
|
@@ -406,7 +450,18 @@ void mexFunction( int nlhs, mxArray *plhs[],
|
|
406
450
|
return;
|
407
451
|
}
|
408
452
|
|
409
|
-
if(
|
453
|
+
if (flag_find_C)
|
454
|
+
{
|
455
|
+
double best_C, best_rate, *ptr;
|
456
|
+
|
457
|
+
do_find_parameter_C(&best_C, &best_rate);
|
458
|
+
|
459
|
+
plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL);
|
460
|
+
ptr = mxGetPr(plhs[0]);
|
461
|
+
ptr[0] = best_C;
|
462
|
+
ptr[1] = best_rate;
|
463
|
+
}
|
464
|
+
else if(flag_cross_validation)
|
410
465
|
{
|
411
466
|
double *ptr;
|
412
467
|
plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
File without changes
|
File without changes
|
@@ -277,6 +277,11 @@ The above command loads
|
|
277
277
|
structure. If '-v' is specified, cross validation is
|
278
278
|
conducted and the returned model is just a scalar: cross-validation
|
279
279
|
accuracy for classification and mean-squared error for regression.
|
280
|
+
If the '-C' option is specified, the best parameter C is found
|
281
|
+
by cross validation. The returned model is a tuple of the best C
|
282
|
+
and the corresponding cross-validation accuracy. The parameter
|
283
|
+
selection utility is supported by only -s 0 and -s 2.
|
284
|
+
|
280
285
|
|
281
286
|
To train the same data many times with different
|
282
287
|
parameters, the second and the third ways should be faster..
|
@@ -290,6 +295,8 @@ The above command loads
|
|
290
295
|
>>> m = train(prob, '-w1 5 -c 5')
|
291
296
|
>>> m = train(prob, param)
|
292
297
|
>>> CV_ACC = train(y, x, '-v 3')
|
298
|
+
>>> best_C, best_rate = train(y, x, '-C -s 0')
|
299
|
+
>>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
|
293
300
|
|
294
301
|
- Function: predict
|
295
302
|
|
@@ -16,7 +16,7 @@ try:
|
|
16
16
|
if sys.platform == 'win32':
|
17
17
|
liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
|
18
18
|
else:
|
19
|
-
liblinear = CDLL(path.join(dirname, '../liblinear.so.
|
19
|
+
liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
|
20
20
|
except:
|
21
21
|
# For unix the prefix 'lib' is not considered.
|
22
22
|
if find_library('linear'):
|
@@ -127,8 +127,8 @@ class problem(Structure):
|
|
127
127
|
|
128
128
|
|
129
129
|
class parameter(Structure):
|
130
|
-
_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
|
131
|
-
_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
|
130
|
+
_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
|
131
|
+
_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
|
132
132
|
_fields_ = genFields(_names, _types)
|
133
133
|
|
134
134
|
def __init__(self, options = None):
|
@@ -152,10 +152,14 @@ class parameter(Structure):
|
|
152
152
|
self.C = 1
|
153
153
|
self.p = 0.1
|
154
154
|
self.nr_weight = 0
|
155
|
-
self.weight_label =
|
156
|
-
self.weight =
|
155
|
+
self.weight_label = None
|
156
|
+
self.weight = None
|
157
|
+
self.init_sol = None
|
157
158
|
self.bias = -1
|
158
|
-
self.
|
159
|
+
self.flag_cross_validation = False
|
160
|
+
self.flag_C_specified = False
|
161
|
+
self.flag_solver_specified = False
|
162
|
+
self.flag_find_C = False
|
159
163
|
self.nr_fold = 0
|
160
164
|
self.print_func = cast(None, PRINT_STRING_FUN)
|
161
165
|
|
@@ -176,9 +180,11 @@ class parameter(Structure):
|
|
176
180
|
if argv[i] == "-s":
|
177
181
|
i = i + 1
|
178
182
|
self.solver_type = int(argv[i])
|
183
|
+
self.flag_solver_specified = True
|
179
184
|
elif argv[i] == "-c":
|
180
185
|
i = i + 1
|
181
186
|
self.C = float(argv[i])
|
187
|
+
self.flag_C_specified = True
|
182
188
|
elif argv[i] == "-p":
|
183
189
|
i = i + 1
|
184
190
|
self.p = float(argv[i])
|
@@ -190,18 +196,20 @@ class parameter(Structure):
|
|
190
196
|
self.bias = float(argv[i])
|
191
197
|
elif argv[i] == "-v":
|
192
198
|
i = i + 1
|
193
|
-
self.
|
199
|
+
self.flag_cross_validation = 1
|
194
200
|
self.nr_fold = int(argv[i])
|
195
201
|
if self.nr_fold < 2 :
|
196
202
|
raise ValueError("n-fold cross validation: n must >= 2")
|
197
203
|
elif argv[i].startswith("-w"):
|
198
204
|
i = i + 1
|
199
205
|
self.nr_weight += 1
|
200
|
-
nr_weight = self.nr_weight
|
201
206
|
weight_label += [int(argv[i-1][2:])]
|
202
207
|
weight += [float(argv[i])]
|
203
208
|
elif argv[i] == "-q":
|
204
209
|
self.print_func = PRINT_STRING_FUN(print_null)
|
210
|
+
elif argv[i] == "-C":
|
211
|
+
self.flag_find_C = True
|
212
|
+
|
205
213
|
else :
|
206
214
|
raise ValueError("Wrong options")
|
207
215
|
i += 1
|
@@ -213,6 +221,16 @@ class parameter(Structure):
|
|
213
221
|
self.weight[i] = weight[i]
|
214
222
|
self.weight_label[i] = weight_label[i]
|
215
223
|
|
224
|
+
# default solver for parameter selection is L2R_L2LOSS_SVC
|
225
|
+
if self.flag_find_C:
|
226
|
+
if not self.flag_cross_validation:
|
227
|
+
self.nr_fold = 5
|
228
|
+
if not self.flag_solver_specified:
|
229
|
+
self.solver_type = L2R_L2LOSS_SVC
|
230
|
+
self.flag_solver_specified = True
|
231
|
+
elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
|
232
|
+
raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
|
233
|
+
|
216
234
|
if self.eps == float('inf'):
|
217
235
|
if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
|
218
236
|
self.eps = 0.01
|
@@ -280,6 +298,7 @@ def toPyModel(model_ptr):
|
|
280
298
|
return m
|
281
299
|
|
282
300
|
fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
|
301
|
+
fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
|
283
302
|
fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
|
284
303
|
|
285
304
|
fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
|
@@ -127,7 +127,7 @@ def train(arg1, arg2=None, arg3=None):
|
|
127
127
|
-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
|
128
128
|
-wi weight: weights adjust the parameter C of different classes (see README for details)
|
129
129
|
-v n: n-fold cross validation mode
|
130
|
-
|
130
|
+
-q : quiet mode (no outputs)
|
131
131
|
"""
|
132
132
|
prob, param = None, None
|
133
133
|
if isinstance(arg1, (list, tuple)):
|
@@ -150,7 +150,21 @@ def train(arg1, arg2=None, arg3=None):
|
|
150
150
|
if err_msg :
|
151
151
|
raise ValueError('Error: %s' % err_msg)
|
152
152
|
|
153
|
-
if param.
|
153
|
+
if param.flag_find_C:
|
154
|
+
nr_fold = param.nr_fold
|
155
|
+
best_C = c_double()
|
156
|
+
best_rate = c_double()
|
157
|
+
max_C = 1024
|
158
|
+
if param.flag_C_specified:
|
159
|
+
start_C = param.C
|
160
|
+
else:
|
161
|
+
start_C = -1.0
|
162
|
+
liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
|
163
|
+
print("Best C = %lf CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
|
164
|
+
return best_C.value,best_rate.value
|
165
|
+
|
166
|
+
|
167
|
+
elif param.flag_cross_validation:
|
154
168
|
l, nr_fold = prob.l, param.nr_fold
|
155
169
|
target = (c_double * l)()
|
156
170
|
liblinear.cross_validation(prob, param, nr_fold, target)
|
@@ -49,6 +49,7 @@ void exit_with_help()
|
|
49
49
|
"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
|
50
50
|
"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
|
51
51
|
"-v n: n-fold cross validation mode\n"
|
52
|
+
"-C : find parameter C (only for -s 0 and 2)\n"
|
52
53
|
"-q : quiet mode (no outputs)\n"
|
53
54
|
);
|
54
55
|
exit(1);
|
@@ -84,12 +85,16 @@ static char* readline(FILE *input)
|
|
84
85
|
void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
|
85
86
|
void read_problem(const char *filename);
|
86
87
|
void do_cross_validation();
|
88
|
+
void do_find_parameter_C();
|
87
89
|
|
88
90
|
struct feature_node *x_space;
|
89
91
|
struct parameter param;
|
90
92
|
struct problem prob;
|
91
93
|
struct model* model_;
|
92
94
|
int flag_cross_validation;
|
95
|
+
int flag_find_C;
|
96
|
+
int flag_C_specified;
|
97
|
+
int flag_solver_specified;
|
93
98
|
int nr_fold;
|
94
99
|
double bias;
|
95
100
|
|
@@ -109,7 +114,11 @@ int main(int argc, char **argv)
|
|
109
114
|
exit(1);
|
110
115
|
}
|
111
116
|
|
112
|
-
if(
|
117
|
+
if (flag_find_C)
|
118
|
+
{
|
119
|
+
do_find_parameter_C();
|
120
|
+
}
|
121
|
+
else if(flag_cross_validation)
|
113
122
|
{
|
114
123
|
do_cross_validation();
|
115
124
|
}
|
@@ -132,6 +141,19 @@ int main(int argc, char **argv)
|
|
132
141
|
return 0;
|
133
142
|
}
|
134
143
|
|
144
|
+
void do_find_parameter_C()
|
145
|
+
{
|
146
|
+
double start_C, best_C, best_rate;
|
147
|
+
double max_C = 1024;
|
148
|
+
if (flag_C_specified)
|
149
|
+
start_C = param.C;
|
150
|
+
else
|
151
|
+
start_C = -1.0;
|
152
|
+
printf("Doing parameter search with %d-fold cross validation.\n", nr_fold);
|
153
|
+
find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, &best_C, &best_rate);
|
154
|
+
printf("Best C = %g CV accuracy = %g%%\n", best_C, 100.0*best_rate);
|
155
|
+
}
|
156
|
+
|
135
157
|
void do_cross_validation()
|
136
158
|
{
|
137
159
|
int i;
|
@@ -186,7 +208,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
|
|
186
208
|
param.nr_weight = 0;
|
187
209
|
param.weight_label = NULL;
|
188
210
|
param.weight = NULL;
|
211
|
+
param.init_sol = NULL;
|
189
212
|
flag_cross_validation = 0;
|
213
|
+
flag_C_specified = 0;
|
214
|
+
flag_solver_specified = 0;
|
215
|
+
flag_find_C = 0;
|
190
216
|
bias = -1;
|
191
217
|
|
192
218
|
// parse options
|
@@ -199,10 +225,12 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
|
|
199
225
|
{
|
200
226
|
case 's':
|
201
227
|
param.solver_type = atoi(argv[i]);
|
228
|
+
flag_solver_specified = 1;
|
202
229
|
break;
|
203
230
|
|
204
231
|
case 'c':
|
205
232
|
param.C = atof(argv[i]);
|
233
|
+
flag_C_specified = 1;
|
206
234
|
break;
|
207
235
|
|
208
236
|
case 'p':
|
@@ -240,6 +268,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
|
|
240
268
|
i--;
|
241
269
|
break;
|
242
270
|
|
271
|
+
case 'C':
|
272
|
+
flag_find_C = 1;
|
273
|
+
i--;
|
274
|
+
break;
|
275
|
+
|
243
276
|
default:
|
244
277
|
fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
|
245
278
|
exit_with_help();
|
@@ -267,6 +300,23 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
|
|
267
300
|
sprintf(model_file_name,"%s.model",p);
|
268
301
|
}
|
269
302
|
|
303
|
+
// default solver for parameter selection is L2R_L2LOSS_SVC
|
304
|
+
if(flag_find_C)
|
305
|
+
{
|
306
|
+
if(!flag_cross_validation)
|
307
|
+
nr_fold = 5;
|
308
|
+
if(!flag_solver_specified)
|
309
|
+
{
|
310
|
+
fprintf(stderr, "Solver not specified. Using -s 2\n");
|
311
|
+
param.solver_type = L2R_L2LOSS_SVC;
|
312
|
+
}
|
313
|
+
else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
|
314
|
+
{
|
315
|
+
fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n");
|
316
|
+
exit_with_help();
|
317
|
+
}
|
318
|
+
}
|
319
|
+
|
270
320
|
if(param.eps == INF)
|
271
321
|
{
|
272
322
|
switch(param.solver_type)
|