liblinear-ruby 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/blasp.h +8 -0
- data/ext/daxpy.c +8 -0
- data/ext/ddot.c +8 -0
- data/ext/dnrm2.c +8 -0
- data/ext/dscal.c +8 -0
- data/ext/liblinear_wrap.cxx +128 -3
- data/ext/linear.cpp +344 -175
- data/ext/linear.h +2 -0
- data/ext/tron.cpp +14 -8
- data/ext/tron.h +2 -1
- data/lib/liblinear/version.rb +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/COPYRIGHT +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/Makefile +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/Makefile.win +3 -9
- data/{liblinear-1.95 → liblinear-2.1}/README +45 -7
- data/{liblinear-1.95 → liblinear-2.1}/blas/Makefile +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/blas.h +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/blasp.h +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/daxpy.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/ddot.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/dnrm2.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/blas/dscal.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/heart_scale +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/linear.cpp +344 -175
- data/{liblinear-1.95 → liblinear-2.1}/linear.def +1 -0
- data/{liblinear-1.95 → liblinear-2.1}/linear.h +2 -0
- data/{liblinear-1.95 → liblinear-2.1}/matlab/Makefile +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/matlab/README +12 -2
- data/{liblinear-1.95 → liblinear-2.1}/matlab/libsvmread.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/matlab/libsvmwrite.c +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/matlab/linear_model_matlab.c +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/matlab/linear_model_matlab.h +0 -0
- data/liblinear-2.1/matlab/make.m +22 -0
- data/{liblinear-1.95 → liblinear-2.1}/matlab/predict.c +1 -1
- data/{liblinear-1.95 → liblinear-2.1}/matlab/train.c +65 -10
- data/{liblinear-1.95 → liblinear-2.1}/predict.c +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/python/Makefile +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/python/README +7 -0
- data/{liblinear-1.95 → liblinear-2.1}/python/liblinear.py +27 -8
- data/{liblinear-1.95 → liblinear-2.1}/python/liblinearutil.py +16 -2
- data/{liblinear-1.95 → liblinear-2.1}/train.c +51 -1
- data/{liblinear-1.95 → liblinear-2.1}/tron.cpp +14 -8
- data/{liblinear-1.95 → liblinear-2.1}/tron.h +2 -1
- data/liblinear-2.1/windows/liblinear.dll +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/windows/libsvmread.mexw64 +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/windows/libsvmwrite.mexw64 +0 -0
- data/liblinear-2.1/windows/predict.exe +0 -0
- data/{liblinear-1.95 → liblinear-2.1}/windows/predict.mexw64 +0 -0
- data/liblinear-2.1/windows/train.exe +0 -0
- data/liblinear-2.1/windows/train.mexw64 +0 -0
- data/liblinear-ruby.gemspec +9 -10
- metadata +49 -50
- data/liblinear-1.95/matlab/make.m +0 -21
- data/liblinear-1.95/windows/liblinear.dll +0 -0
- data/liblinear-1.95/windows/predict.exe +0 -0
- data/liblinear-1.95/windows/train.exe +0 -0
- data/liblinear-1.95/windows/train.mexw64 +0 -0
@@ -32,6 +32,7 @@ struct parameter
|
|
32
32
|
int *weight_label;
|
33
33
|
double* weight;
|
34
34
|
double p;
|
35
|
+
double *init_sol;
|
35
36
|
};
|
36
37
|
|
37
38
|
struct model
|
@@ -46,6 +47,7 @@ struct model
|
|
46
47
|
|
47
48
|
struct model* train(const struct problem *prob, const struct parameter *param);
|
48
49
|
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
|
50
|
+
void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate);
|
49
51
|
|
50
52
|
double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
|
51
53
|
double predict(const struct model *model_, const struct feature_node *x);
|
File without changes
|
@@ -117,7 +117,7 @@ The 'train' function returns a model which can be used for future
|
|
117
117
|
prediction. It is a structure and is organized as [Parameters, nr_class,
|
118
118
|
nr_feature, bias, Label, w]:
|
119
119
|
|
120
|
-
-Parameters: Parameters
|
120
|
+
-Parameters: Parameters (now only solver type is provided)
|
121
121
|
-nr_class: number of classes; = 2 for regression
|
122
122
|
-nr_feature: number of features in training data (without including the bias term)
|
123
123
|
-bias: If >= 0, we assume one additional feature is added to the end
|
@@ -131,7 +131,12 @@ nr_feature, bias, Label, w]:
|
|
131
131
|
|
132
132
|
If the '-v' option is specified, cross validation is conducted and the
|
133
133
|
returned model is just a scalar: cross-validation accuracy for
|
134
|
-
classification and mean-squared error for regression.
|
134
|
+
classification and mean-squared error for regression. If the '-C' option
|
135
|
+
is specified, the best parameter C is found by cross validation. The
|
136
|
+
returned model is a two dimensional vector, where the first value is
|
137
|
+
the best C and the second value is the corresponding cross-validation
|
138
|
+
accuracy. The parameter selection utility is supported by only -s 0
|
139
|
+
and -s 2.
|
135
140
|
|
136
141
|
Result of Prediction
|
137
142
|
====================
|
@@ -184,6 +189,11 @@ For probability estimates, you need '-b 1' only in the testing phase:
|
|
184
189
|
|
185
190
|
matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
|
186
191
|
|
192
|
+
Use the best parameter to train (only supported by -s 0 and -s 2):
|
193
|
+
|
194
|
+
matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
|
195
|
+
matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0
|
196
|
+
|
187
197
|
Additional Information
|
188
198
|
======================
|
189
199
|
|
File without changes
|
@@ -72,7 +72,7 @@ void libsvmwrite(const char *filename, const mxArray *label_vec, const mxArray *
|
|
72
72
|
|
73
73
|
low = jc[i], high = jc[i+1];
|
74
74
|
for(k=low;k<high;k++)
|
75
|
-
fprintf(fp," %
|
75
|
+
fprintf(fp," %lu:%g", (size_t)ir[k]+1, samples[k]);
|
76
76
|
|
77
77
|
fprintf(fp,"\n");
|
78
78
|
}
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
% This make.m is for MATLAB and OCTAVE under Windows, Mac, and Unix
|
2
|
+
function make()
|
3
|
+
try
|
4
|
+
% This part is for OCTAVE
|
5
|
+
if(exist('OCTAVE_VERSION', 'builtin'))
|
6
|
+
mex libsvmread.c
|
7
|
+
mex libsvmwrite.c
|
8
|
+
mex -I.. train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
|
9
|
+
mex -I.. predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
|
10
|
+
% This part is for MATLAB
|
11
|
+
% Add -largeArrayDims on 64-bit machines of MATLAB
|
12
|
+
else
|
13
|
+
mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmread.c
|
14
|
+
mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmwrite.c
|
15
|
+
mex CFLAGS="\$CFLAGS -std=c99" -I.. -largeArrayDims train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
|
16
|
+
mex CFLAGS="\$CFLAGS -std=c99" -I.. -largeArrayDims predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/daxpy.c ../blas/ddot.c ../blas/dnrm2.c ../blas/dscal.c
|
17
|
+
end
|
18
|
+
catch err
|
19
|
+
fprintf('Error: %s failed (line %d)\n', err.stack(1).file, err.stack(1).line);
|
20
|
+
disp(err.message);
|
21
|
+
fprintf('=> Please check README for detailed instructions.\n');
|
22
|
+
end
|
@@ -1,9 +1,8 @@
|
|
1
|
-
#include <stdio.h>
|
2
1
|
#include <math.h>
|
3
2
|
#include <stdlib.h>
|
4
3
|
#include <string.h>
|
5
4
|
#include <ctype.h>
|
6
|
-
#include "
|
5
|
+
#include "linear.h"
|
7
6
|
|
8
7
|
#include "mex.h"
|
9
8
|
#include "linear_model_matlab.h"
|
@@ -60,6 +59,7 @@ void exit_with_help()
|
|
60
59
|
"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
|
61
60
|
"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
|
62
61
|
"-v n: n-fold cross validation mode\n"
|
62
|
+
"-C : find parameter C (only for -s 0 and 2)\n"
|
63
63
|
"-q : quiet mode (no outputs)\n"
|
64
64
|
"col:\n"
|
65
65
|
" if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
|
@@ -71,11 +71,28 @@ struct parameter param; // set by parse_command_line
|
|
71
71
|
struct problem prob; // set by read_problem
|
72
72
|
struct model *model_;
|
73
73
|
struct feature_node *x_space;
|
74
|
-
int
|
74
|
+
int flag_cross_validation;
|
75
|
+
int flag_find_C;
|
76
|
+
int flag_C_specified;
|
77
|
+
int flag_solver_specified;
|
75
78
|
int col_format_flag;
|
76
79
|
int nr_fold;
|
77
80
|
double bias;
|
78
81
|
|
82
|
+
|
83
|
+
void do_find_parameter_C(double *best_C, double *best_rate)
|
84
|
+
{
|
85
|
+
double start_C;
|
86
|
+
double max_C = 1024;
|
87
|
+
if (flag_C_specified)
|
88
|
+
start_C = param.C;
|
89
|
+
else
|
90
|
+
start_C = -1.0;
|
91
|
+
find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, best_C, best_rate);
|
92
|
+
mexPrintf("Best C = %lf CV accuracy = %g%%\n", *best_C, 100.0**best_rate);
|
93
|
+
}
|
94
|
+
|
95
|
+
|
79
96
|
double do_cross_validation()
|
80
97
|
{
|
81
98
|
int i;
|
@@ -101,8 +118,8 @@ double do_cross_validation()
|
|
101
118
|
sumyy += y*y;
|
102
119
|
sumvy += v*y;
|
103
120
|
}
|
104
|
-
|
105
|
-
|
121
|
+
mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
|
122
|
+
mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
|
106
123
|
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
|
107
124
|
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
|
108
125
|
);
|
@@ -113,7 +130,7 @@ double do_cross_validation()
|
|
113
130
|
for(i=0;i<prob.l;i++)
|
114
131
|
if(target[i] == prob.y[i])
|
115
132
|
++total_correct;
|
116
|
-
|
133
|
+
mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
|
117
134
|
retval = 100.0*total_correct/prob.l;
|
118
135
|
}
|
119
136
|
|
@@ -137,8 +154,12 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
137
154
|
param.nr_weight = 0;
|
138
155
|
param.weight_label = NULL;
|
139
156
|
param.weight = NULL;
|
140
|
-
|
157
|
+
param.init_sol = NULL;
|
158
|
+
flag_cross_validation = 0;
|
141
159
|
col_format_flag = 0;
|
160
|
+
flag_C_specified = 0;
|
161
|
+
flag_solver_specified = 0;
|
162
|
+
flag_find_C = 0;
|
142
163
|
bias = -1;
|
143
164
|
|
144
165
|
|
@@ -166,15 +187,17 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
166
187
|
{
|
167
188
|
if(argv[i][0] != '-') break;
|
168
189
|
++i;
|
169
|
-
if(i>=argc && argv[i-1][1] != 'q') // since
|
190
|
+
if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter
|
170
191
|
return 1;
|
171
192
|
switch(argv[i-1][1])
|
172
193
|
{
|
173
194
|
case 's':
|
174
195
|
param.solver_type = atoi(argv[i]);
|
196
|
+
flag_solver_specified = 1;
|
175
197
|
break;
|
176
198
|
case 'c':
|
177
199
|
param.C = atof(argv[i]);
|
200
|
+
flag_C_specified = 1;
|
178
201
|
break;
|
179
202
|
case 'p':
|
180
203
|
param.p = atof(argv[i]);
|
@@ -186,7 +209,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
186
209
|
bias = atof(argv[i]);
|
187
210
|
break;
|
188
211
|
case 'v':
|
189
|
-
|
212
|
+
flag_cross_validation = 1;
|
190
213
|
nr_fold = atoi(argv[i]);
|
191
214
|
if(nr_fold < 2)
|
192
215
|
{
|
@@ -205,6 +228,10 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
205
228
|
print_func = &print_null;
|
206
229
|
i--;
|
207
230
|
break;
|
231
|
+
case 'C':
|
232
|
+
flag_find_C = 1;
|
233
|
+
i--;
|
234
|
+
break;
|
208
235
|
default:
|
209
236
|
mexPrintf("unknown option\n");
|
210
237
|
return 1;
|
@@ -213,6 +240,23 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
|
213
240
|
|
214
241
|
set_print_string_function(print_func);
|
215
242
|
|
243
|
+
// default solver for parameter selection is L2R_L2LOSS_SVC
|
244
|
+
if(flag_find_C)
|
245
|
+
{
|
246
|
+
if(!flag_cross_validation)
|
247
|
+
nr_fold = 5;
|
248
|
+
if(!flag_solver_specified)
|
249
|
+
{
|
250
|
+
mexPrintf("Solver not specified. Using -s 2\n");
|
251
|
+
param.solver_type = L2R_L2LOSS_SVC;
|
252
|
+
}
|
253
|
+
else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
|
254
|
+
{
|
255
|
+
mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n");
|
256
|
+
return 1;
|
257
|
+
}
|
258
|
+
}
|
259
|
+
|
216
260
|
if(param.eps == INF)
|
217
261
|
{
|
218
262
|
switch(param.solver_type)
|
@@ -406,7 +450,18 @@ void mexFunction( int nlhs, mxArray *plhs[],
|
|
406
450
|
return;
|
407
451
|
}
|
408
452
|
|
409
|
-
if(
|
453
|
+
if (flag_find_C)
|
454
|
+
{
|
455
|
+
double best_C, best_rate, *ptr;
|
456
|
+
|
457
|
+
do_find_parameter_C(&best_C, &best_rate);
|
458
|
+
|
459
|
+
plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL);
|
460
|
+
ptr = mxGetPr(plhs[0]);
|
461
|
+
ptr[0] = best_C;
|
462
|
+
ptr[1] = best_rate;
|
463
|
+
}
|
464
|
+
else if(flag_cross_validation)
|
410
465
|
{
|
411
466
|
double *ptr;
|
412
467
|
plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
File without changes
|
File without changes
|
@@ -277,6 +277,11 @@ The above command loads
|
|
277
277
|
structure. If '-v' is specified, cross validation is
|
278
278
|
conducted and the returned model is just a scalar: cross-validation
|
279
279
|
accuracy for classification and mean-squared error for regression.
|
280
|
+
If the '-C' option is specified, the best parameter C is found
|
281
|
+
by cross validation. The returned model is a tuple of the best C
|
282
|
+
and the corresponding cross-validation accuracy. The parameter
|
283
|
+
selection utility is supported by only -s 0 and -s 2.
|
284
|
+
|
280
285
|
|
281
286
|
To train the same data many times with different
|
282
287
|
parameters, the second and the third ways should be faster..
|
@@ -290,6 +295,8 @@ The above command loads
|
|
290
295
|
>>> m = train(prob, '-w1 5 -c 5')
|
291
296
|
>>> m = train(prob, param)
|
292
297
|
>>> CV_ACC = train(y, x, '-v 3')
|
298
|
+
>>> best_C, best_rate = train(y, x, '-C -s 0')
|
299
|
+
>>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
|
293
300
|
|
294
301
|
- Function: predict
|
295
302
|
|
@@ -16,7 +16,7 @@ try:
|
|
16
16
|
if sys.platform == 'win32':
|
17
17
|
liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
|
18
18
|
else:
|
19
|
-
liblinear = CDLL(path.join(dirname, '../liblinear.so.
|
19
|
+
liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
|
20
20
|
except:
|
21
21
|
# For unix the prefix 'lib' is not considered.
|
22
22
|
if find_library('linear'):
|
@@ -127,8 +127,8 @@ class problem(Structure):
|
|
127
127
|
|
128
128
|
|
129
129
|
class parameter(Structure):
|
130
|
-
_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
|
131
|
-
_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
|
130
|
+
_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
|
131
|
+
_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
|
132
132
|
_fields_ = genFields(_names, _types)
|
133
133
|
|
134
134
|
def __init__(self, options = None):
|
@@ -152,10 +152,14 @@ class parameter(Structure):
|
|
152
152
|
self.C = 1
|
153
153
|
self.p = 0.1
|
154
154
|
self.nr_weight = 0
|
155
|
-
self.weight_label =
|
156
|
-
self.weight =
|
155
|
+
self.weight_label = None
|
156
|
+
self.weight = None
|
157
|
+
self.init_sol = None
|
157
158
|
self.bias = -1
|
158
|
-
self.
|
159
|
+
self.flag_cross_validation = False
|
160
|
+
self.flag_C_specified = False
|
161
|
+
self.flag_solver_specified = False
|
162
|
+
self.flag_find_C = False
|
159
163
|
self.nr_fold = 0
|
160
164
|
self.print_func = cast(None, PRINT_STRING_FUN)
|
161
165
|
|
@@ -176,9 +180,11 @@ class parameter(Structure):
|
|
176
180
|
if argv[i] == "-s":
|
177
181
|
i = i + 1
|
178
182
|
self.solver_type = int(argv[i])
|
183
|
+
self.flag_solver_specified = True
|
179
184
|
elif argv[i] == "-c":
|
180
185
|
i = i + 1
|
181
186
|
self.C = float(argv[i])
|
187
|
+
self.flag_C_specified = True
|
182
188
|
elif argv[i] == "-p":
|
183
189
|
i = i + 1
|
184
190
|
self.p = float(argv[i])
|
@@ -190,18 +196,20 @@ class parameter(Structure):
|
|
190
196
|
self.bias = float(argv[i])
|
191
197
|
elif argv[i] == "-v":
|
192
198
|
i = i + 1
|
193
|
-
self.
|
199
|
+
self.flag_cross_validation = 1
|
194
200
|
self.nr_fold = int(argv[i])
|
195
201
|
if self.nr_fold < 2 :
|
196
202
|
raise ValueError("n-fold cross validation: n must >= 2")
|
197
203
|
elif argv[i].startswith("-w"):
|
198
204
|
i = i + 1
|
199
205
|
self.nr_weight += 1
|
200
|
-
nr_weight = self.nr_weight
|
201
206
|
weight_label += [int(argv[i-1][2:])]
|
202
207
|
weight += [float(argv[i])]
|
203
208
|
elif argv[i] == "-q":
|
204
209
|
self.print_func = PRINT_STRING_FUN(print_null)
|
210
|
+
elif argv[i] == "-C":
|
211
|
+
self.flag_find_C = True
|
212
|
+
|
205
213
|
else :
|
206
214
|
raise ValueError("Wrong options")
|
207
215
|
i += 1
|
@@ -213,6 +221,16 @@ class parameter(Structure):
|
|
213
221
|
self.weight[i] = weight[i]
|
214
222
|
self.weight_label[i] = weight_label[i]
|
215
223
|
|
224
|
+
# default solver for parameter selection is L2R_L2LOSS_SVC
|
225
|
+
if self.flag_find_C:
|
226
|
+
if not self.flag_cross_validation:
|
227
|
+
self.nr_fold = 5
|
228
|
+
if not self.flag_solver_specified:
|
229
|
+
self.solver_type = L2R_L2LOSS_SVC
|
230
|
+
self.flag_solver_specified = True
|
231
|
+
elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
|
232
|
+
raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
|
233
|
+
|
216
234
|
if self.eps == float('inf'):
|
217
235
|
if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
|
218
236
|
self.eps = 0.01
|
@@ -280,6 +298,7 @@ def toPyModel(model_ptr):
|
|
280
298
|
return m
|
281
299
|
|
282
300
|
fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
|
301
|
+
fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
|
283
302
|
fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
|
284
303
|
|
285
304
|
fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
|
@@ -127,7 +127,7 @@ def train(arg1, arg2=None, arg3=None):
|
|
127
127
|
-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
|
128
128
|
-wi weight: weights adjust the parameter C of different classes (see README for details)
|
129
129
|
-v n: n-fold cross validation mode
|
130
|
-
|
130
|
+
-q : quiet mode (no outputs)
|
131
131
|
"""
|
132
132
|
prob, param = None, None
|
133
133
|
if isinstance(arg1, (list, tuple)):
|
@@ -150,7 +150,21 @@ def train(arg1, arg2=None, arg3=None):
|
|
150
150
|
if err_msg :
|
151
151
|
raise ValueError('Error: %s' % err_msg)
|
152
152
|
|
153
|
-
if param.
|
153
|
+
if param.flag_find_C:
|
154
|
+
nr_fold = param.nr_fold
|
155
|
+
best_C = c_double()
|
156
|
+
best_rate = c_double()
|
157
|
+
max_C = 1024
|
158
|
+
if param.flag_C_specified:
|
159
|
+
start_C = param.C
|
160
|
+
else:
|
161
|
+
start_C = -1.0
|
162
|
+
liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
|
163
|
+
print("Best C = %lf CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
|
164
|
+
return best_C.value,best_rate.value
|
165
|
+
|
166
|
+
|
167
|
+
elif param.flag_cross_validation:
|
154
168
|
l, nr_fold = prob.l, param.nr_fold
|
155
169
|
target = (c_double * l)()
|
156
170
|
liblinear.cross_validation(prob, param, nr_fold, target)
|
@@ -49,6 +49,7 @@ void exit_with_help()
|
|
49
49
|
"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
|
50
50
|
"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
|
51
51
|
"-v n: n-fold cross validation mode\n"
|
52
|
+
"-C : find parameter C (only for -s 0 and 2)\n"
|
52
53
|
"-q : quiet mode (no outputs)\n"
|
53
54
|
);
|
54
55
|
exit(1);
|
@@ -84,12 +85,16 @@ static char* readline(FILE *input)
|
|
84
85
|
void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
|
85
86
|
void read_problem(const char *filename);
|
86
87
|
void do_cross_validation();
|
88
|
+
void do_find_parameter_C();
|
87
89
|
|
88
90
|
struct feature_node *x_space;
|
89
91
|
struct parameter param;
|
90
92
|
struct problem prob;
|
91
93
|
struct model* model_;
|
92
94
|
int flag_cross_validation;
|
95
|
+
int flag_find_C;
|
96
|
+
int flag_C_specified;
|
97
|
+
int flag_solver_specified;
|
93
98
|
int nr_fold;
|
94
99
|
double bias;
|
95
100
|
|
@@ -109,7 +114,11 @@ int main(int argc, char **argv)
|
|
109
114
|
exit(1);
|
110
115
|
}
|
111
116
|
|
112
|
-
if(
|
117
|
+
if (flag_find_C)
|
118
|
+
{
|
119
|
+
do_find_parameter_C();
|
120
|
+
}
|
121
|
+
else if(flag_cross_validation)
|
113
122
|
{
|
114
123
|
do_cross_validation();
|
115
124
|
}
|
@@ -132,6 +141,19 @@ int main(int argc, char **argv)
|
|
132
141
|
return 0;
|
133
142
|
}
|
134
143
|
|
144
|
+
void do_find_parameter_C()
|
145
|
+
{
|
146
|
+
double start_C, best_C, best_rate;
|
147
|
+
double max_C = 1024;
|
148
|
+
if (flag_C_specified)
|
149
|
+
start_C = param.C;
|
150
|
+
else
|
151
|
+
start_C = -1.0;
|
152
|
+
printf("Doing parameter search with %d-fold cross validation.\n", nr_fold);
|
153
|
+
find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, &best_C, &best_rate);
|
154
|
+
printf("Best C = %g CV accuracy = %g%%\n", best_C, 100.0*best_rate);
|
155
|
+
}
|
156
|
+
|
135
157
|
void do_cross_validation()
|
136
158
|
{
|
137
159
|
int i;
|
@@ -186,7 +208,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
|
|
186
208
|
param.nr_weight = 0;
|
187
209
|
param.weight_label = NULL;
|
188
210
|
param.weight = NULL;
|
211
|
+
param.init_sol = NULL;
|
189
212
|
flag_cross_validation = 0;
|
213
|
+
flag_C_specified = 0;
|
214
|
+
flag_solver_specified = 0;
|
215
|
+
flag_find_C = 0;
|
190
216
|
bias = -1;
|
191
217
|
|
192
218
|
// parse options
|
@@ -199,10 +225,12 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
|
|
199
225
|
{
|
200
226
|
case 's':
|
201
227
|
param.solver_type = atoi(argv[i]);
|
228
|
+
flag_solver_specified = 1;
|
202
229
|
break;
|
203
230
|
|
204
231
|
case 'c':
|
205
232
|
param.C = atof(argv[i]);
|
233
|
+
flag_C_specified = 1;
|
206
234
|
break;
|
207
235
|
|
208
236
|
case 'p':
|
@@ -240,6 +268,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
|
|
240
268
|
i--;
|
241
269
|
break;
|
242
270
|
|
271
|
+
case 'C':
|
272
|
+
flag_find_C = 1;
|
273
|
+
i--;
|
274
|
+
break;
|
275
|
+
|
243
276
|
default:
|
244
277
|
fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
|
245
278
|
exit_with_help();
|
@@ -267,6 +300,23 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
|
|
267
300
|
sprintf(model_file_name,"%s.model",p);
|
268
301
|
}
|
269
302
|
|
303
|
+
// default solver for parameter selection is L2R_L2LOSS_SVC
|
304
|
+
if(flag_find_C)
|
305
|
+
{
|
306
|
+
if(!flag_cross_validation)
|
307
|
+
nr_fold = 5;
|
308
|
+
if(!flag_solver_specified)
|
309
|
+
{
|
310
|
+
fprintf(stderr, "Solver not specified. Using -s 2\n");
|
311
|
+
param.solver_type = L2R_L2LOSS_SVC;
|
312
|
+
}
|
313
|
+
else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
|
314
|
+
{
|
315
|
+
fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n");
|
316
|
+
exit_with_help();
|
317
|
+
}
|
318
|
+
}
|
319
|
+
|
270
320
|
if(param.eps == INF)
|
271
321
|
{
|
272
322
|
switch(param.solver_type)
|