liblinear-ruby 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +46 -0
- data/Rakefile +1 -0
- data/ext/Makefile +237 -0
- data/ext/blas.h +25 -0
- data/ext/blasp.h +430 -0
- data/ext/daxpy.c +49 -0
- data/ext/ddot.c +50 -0
- data/ext/dnrm2.c +62 -0
- data/ext/dscal.c +44 -0
- data/ext/extconf.rb +12 -0
- data/ext/liblinear_wrap.cxx +4646 -0
- data/ext/linear.cpp +2811 -0
- data/ext/linear.h +74 -0
- data/ext/linear.rb +357 -0
- data/ext/tron.cpp +235 -0
- data/ext/tron.h +34 -0
- data/lib/liblinear.rb +89 -0
- data/lib/liblinear/error.rb +4 -0
- data/lib/liblinear/model.rb +66 -0
- data/lib/liblinear/parameter.rb +42 -0
- data/lib/liblinear/problem.rb +55 -0
- data/lib/liblinear/version.rb +3 -0
- data/liblinear-1.93/COPYRIGHT +31 -0
- data/liblinear-1.93/Makefile +37 -0
- data/liblinear-1.93/Makefile.win +30 -0
- data/liblinear-1.93/README +531 -0
- data/liblinear-1.93/blas/Makefile +22 -0
- data/liblinear-1.93/blas/blas.a +0 -0
- data/liblinear-1.93/blas/blas.h +25 -0
- data/liblinear-1.93/blas/blasp.h +430 -0
- data/liblinear-1.93/blas/daxpy.c +49 -0
- data/liblinear-1.93/blas/daxpy.o +0 -0
- data/liblinear-1.93/blas/ddot.c +50 -0
- data/liblinear-1.93/blas/ddot.o +0 -0
- data/liblinear-1.93/blas/dnrm2.c +62 -0
- data/liblinear-1.93/blas/dnrm2.o +0 -0
- data/liblinear-1.93/blas/dscal.c +44 -0
- data/liblinear-1.93/blas/dscal.o +0 -0
- data/liblinear-1.93/heart_scale +270 -0
- data/liblinear-1.93/linear.cpp +2811 -0
- data/liblinear-1.93/linear.def +18 -0
- data/liblinear-1.93/linear.h +74 -0
- data/liblinear-1.93/linear.o +0 -0
- data/liblinear-1.93/matlab/Makefile +58 -0
- data/liblinear-1.93/matlab/README +197 -0
- data/liblinear-1.93/matlab/libsvmread.c +212 -0
- data/liblinear-1.93/matlab/libsvmwrite.c +106 -0
- data/liblinear-1.93/matlab/linear_model_matlab.c +176 -0
- data/liblinear-1.93/matlab/linear_model_matlab.h +2 -0
- data/liblinear-1.93/matlab/make.m +21 -0
- data/liblinear-1.93/matlab/predict.c +331 -0
- data/liblinear-1.93/matlab/train.c +418 -0
- data/liblinear-1.93/predict +0 -0
- data/liblinear-1.93/predict.c +245 -0
- data/liblinear-1.93/python/Makefile +4 -0
- data/liblinear-1.93/python/README +343 -0
- data/liblinear-1.93/python/liblinear.py +277 -0
- data/liblinear-1.93/python/liblinearutil.py +250 -0
- data/liblinear-1.93/ruby/liblinear.i +41 -0
- data/liblinear-1.93/ruby/liblinear_wrap.cxx +4646 -0
- data/liblinear-1.93/ruby/linear.h +74 -0
- data/liblinear-1.93/ruby/linear.o +0 -0
- data/liblinear-1.93/train +0 -0
- data/liblinear-1.93/train.c +399 -0
- data/liblinear-1.93/tron.cpp +235 -0
- data/liblinear-1.93/tron.h +34 -0
- data/liblinear-1.93/tron.o +0 -0
- data/liblinear-1.93/windows/liblinear.dll +0 -0
- data/liblinear-1.93/windows/libsvmread.mexw64 +0 -0
- data/liblinear-1.93/windows/libsvmwrite.mexw64 +0 -0
- data/liblinear-1.93/windows/predict.exe +0 -0
- data/liblinear-1.93/windows/predict.mexw64 +0 -0
- data/liblinear-1.93/windows/train.exe +0 -0
- data/liblinear-1.93/windows/train.mexw64 +0 -0
- data/liblinear-ruby.gemspec +24 -0
- metadata +152 -0
@@ -0,0 +1,418 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <math.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include <ctype.h>
|
6
|
+
#include "../linear.h"
|
7
|
+
|
8
|
+
#include "mex.h"
|
9
|
+
#include "linear_model_matlab.h"
|
10
|
+
|
11
|
+
#ifdef MX_API_VER
|
12
|
+
#if MX_API_VER < 0x07030000
|
13
|
+
typedef int mwIndex;
|
14
|
+
#endif
|
15
|
+
#endif
|
16
|
+
|
17
|
+
#define CMD_LEN 2048
|
18
|
+
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
|
19
|
+
#define INF HUGE_VAL
|
20
|
+
|
21
|
+
void print_null(const char *s) {}
|
22
|
+
void print_string_matlab(const char *s) {mexPrintf(s);}
|
23
|
+
|
24
|
+
void exit_with_help()
|
25
|
+
{
|
26
|
+
mexPrintf(
|
27
|
+
"Usage: model = train(training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n"
|
28
|
+
"liblinear_options:\n"
|
29
|
+
"-s type : set type of solver (default 1)\n"
|
30
|
+
" for multi-class classification\n"
|
31
|
+
" 0 -- L2-regularized logistic regression (primal)\n"
|
32
|
+
" 1 -- L2-regularized L2-loss support vector classification (dual)\n"
|
33
|
+
" 2 -- L2-regularized L2-loss support vector classification (primal)\n"
|
34
|
+
" 3 -- L2-regularized L1-loss support vector classification (dual)\n"
|
35
|
+
" 4 -- support vector classification by Crammer and Singer\n"
|
36
|
+
" 5 -- L1-regularized L2-loss support vector classification\n"
|
37
|
+
" 6 -- L1-regularized logistic regression\n"
|
38
|
+
" 7 -- L2-regularized logistic regression (dual)\n"
|
39
|
+
" for regression\n"
|
40
|
+
" 11 -- L2-regularized L2-loss support vector regression (primal)\n"
|
41
|
+
" 12 -- L2-regularized L2-loss support vector regression (dual)\n"
|
42
|
+
" 13 -- L2-regularized L1-loss support vector regression (dual)\n"
|
43
|
+
"-c cost : set the parameter C (default 1)\n"
|
44
|
+
"-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
|
45
|
+
"-e epsilon : set tolerance of termination criterion\n"
|
46
|
+
" -s 0 and 2\n"
|
47
|
+
" |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"
|
48
|
+
" where f is the primal function and pos/neg are # of\n"
|
49
|
+
" positive/negative data (default 0.01)\n"
|
50
|
+
" -s 11\n"
|
51
|
+
" |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)\n"
|
52
|
+
" -s 1, 3, 4 and 7\n"
|
53
|
+
" Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
|
54
|
+
" -s 5 and 6\n"
|
55
|
+
" |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
|
56
|
+
" where f is the primal function (default 0.01)\n"
|
57
|
+
" -s 12 and 13\n"
|
58
|
+
" |f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
|
59
|
+
" where f is the dual function (default 0.1)\n"
|
60
|
+
"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
|
61
|
+
"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
|
62
|
+
"-v n: n-fold cross validation mode\n"
|
63
|
+
"-q : quiet mode (no outputs)\n"
|
64
|
+
"col:\n"
|
65
|
+
" if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
|
66
|
+
);
|
67
|
+
}
|
68
|
+
|
69
|
+
// liblinear arguments
|
70
|
+
struct parameter param; // set by parse_command_line
|
71
|
+
struct problem prob; // set by read_problem
|
72
|
+
struct model *model_;
|
73
|
+
struct feature_node *x_space;
|
74
|
+
int cross_validation_flag;
|
75
|
+
int col_format_flag;
|
76
|
+
int nr_fold;
|
77
|
+
double bias;
|
78
|
+
|
79
|
+
double do_cross_validation()
|
80
|
+
{
|
81
|
+
int i;
|
82
|
+
int total_correct = 0;
|
83
|
+
double total_error = 0;
|
84
|
+
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
|
85
|
+
double *target = Malloc(double, prob.l);
|
86
|
+
double retval = 0.0;
|
87
|
+
|
88
|
+
cross_validation(&prob,¶m,nr_fold,target);
|
89
|
+
if(param.solver_type == L2R_L2LOSS_SVR ||
|
90
|
+
param.solver_type == L2R_L1LOSS_SVR_DUAL ||
|
91
|
+
param.solver_type == L2R_L2LOSS_SVR_DUAL)
|
92
|
+
{
|
93
|
+
for(i=0;i<prob.l;i++)
|
94
|
+
{
|
95
|
+
double y = prob.y[i];
|
96
|
+
double v = target[i];
|
97
|
+
total_error += (v-y)*(v-y);
|
98
|
+
sumv += v;
|
99
|
+
sumy += y;
|
100
|
+
sumvv += v*v;
|
101
|
+
sumyy += y*y;
|
102
|
+
sumvy += v*y;
|
103
|
+
}
|
104
|
+
printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
|
105
|
+
printf("Cross Validation Squared correlation coefficient = %g\n",
|
106
|
+
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
|
107
|
+
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
|
108
|
+
);
|
109
|
+
retval = total_error/prob.l;
|
110
|
+
}
|
111
|
+
else
|
112
|
+
{
|
113
|
+
for(i=0;i<prob.l;i++)
|
114
|
+
if(target[i] == prob.y[i])
|
115
|
+
++total_correct;
|
116
|
+
printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
|
117
|
+
retval = 100.0*total_correct/prob.l;
|
118
|
+
}
|
119
|
+
|
120
|
+
free(target);
|
121
|
+
return retval;
|
122
|
+
}
|
123
|
+
|
124
|
+
// nrhs should be 3
|
125
|
+
int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
126
|
+
{
|
127
|
+
int i, argc = 1;
|
128
|
+
char cmd[CMD_LEN];
|
129
|
+
char *argv[CMD_LEN/2];
|
130
|
+
void (*print_func)(const char *) = print_string_matlab; // default printing to matlab display
|
131
|
+
|
132
|
+
// default values
|
133
|
+
param.solver_type = L2R_L2LOSS_SVC_DUAL;
|
134
|
+
param.C = 1;
|
135
|
+
param.eps = INF; // see setting below
|
136
|
+
param.p = 0.1;
|
137
|
+
param.nr_weight = 0;
|
138
|
+
param.weight_label = NULL;
|
139
|
+
param.weight = NULL;
|
140
|
+
cross_validation_flag = 0;
|
141
|
+
col_format_flag = 0;
|
142
|
+
bias = -1;
|
143
|
+
|
144
|
+
|
145
|
+
if(nrhs <= 1)
|
146
|
+
return 1;
|
147
|
+
|
148
|
+
if(nrhs == 4)
|
149
|
+
{
|
150
|
+
mxGetString(prhs[3], cmd, mxGetN(prhs[3])+1);
|
151
|
+
if(strcmp(cmd, "col") == 0)
|
152
|
+
col_format_flag = 1;
|
153
|
+
}
|
154
|
+
|
155
|
+
// put options in argv[]
|
156
|
+
if(nrhs > 2)
|
157
|
+
{
|
158
|
+
mxGetString(prhs[2], cmd, mxGetN(prhs[2]) + 1);
|
159
|
+
if((argv[argc] = strtok(cmd, " ")) != NULL)
|
160
|
+
while((argv[++argc] = strtok(NULL, " ")) != NULL)
|
161
|
+
;
|
162
|
+
}
|
163
|
+
|
164
|
+
// parse options
|
165
|
+
for(i=1;i<argc;i++)
|
166
|
+
{
|
167
|
+
if(argv[i][0] != '-') break;
|
168
|
+
++i;
|
169
|
+
if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
|
170
|
+
return 1;
|
171
|
+
switch(argv[i-1][1])
|
172
|
+
{
|
173
|
+
case 's':
|
174
|
+
param.solver_type = atoi(argv[i]);
|
175
|
+
break;
|
176
|
+
case 'c':
|
177
|
+
param.C = atof(argv[i]);
|
178
|
+
break;
|
179
|
+
case 'p':
|
180
|
+
param.p = atof(argv[i]);
|
181
|
+
break;
|
182
|
+
case 'e':
|
183
|
+
param.eps = atof(argv[i]);
|
184
|
+
break;
|
185
|
+
case 'B':
|
186
|
+
bias = atof(argv[i]);
|
187
|
+
break;
|
188
|
+
case 'v':
|
189
|
+
cross_validation_flag = 1;
|
190
|
+
nr_fold = atoi(argv[i]);
|
191
|
+
if(nr_fold < 2)
|
192
|
+
{
|
193
|
+
mexPrintf("n-fold cross validation: n must >= 2\n");
|
194
|
+
return 1;
|
195
|
+
}
|
196
|
+
break;
|
197
|
+
case 'w':
|
198
|
+
++param.nr_weight;
|
199
|
+
param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight);
|
200
|
+
param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight);
|
201
|
+
param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
|
202
|
+
param.weight[param.nr_weight-1] = atof(argv[i]);
|
203
|
+
break;
|
204
|
+
case 'q':
|
205
|
+
print_func = &print_null;
|
206
|
+
i--;
|
207
|
+
break;
|
208
|
+
default:
|
209
|
+
mexPrintf("unknown option\n");
|
210
|
+
return 1;
|
211
|
+
}
|
212
|
+
}
|
213
|
+
|
214
|
+
set_print_string_function(print_func);
|
215
|
+
|
216
|
+
if(param.eps == INF)
|
217
|
+
{
|
218
|
+
switch(param.solver_type)
|
219
|
+
{
|
220
|
+
case L2R_LR:
|
221
|
+
case L2R_L2LOSS_SVC:
|
222
|
+
param.eps = 0.01;
|
223
|
+
break;
|
224
|
+
case L2R_L2LOSS_SVR:
|
225
|
+
param.eps = 0.001;
|
226
|
+
break;
|
227
|
+
case L2R_L2LOSS_SVC_DUAL:
|
228
|
+
case L2R_L1LOSS_SVC_DUAL:
|
229
|
+
case MCSVM_CS:
|
230
|
+
case L2R_LR_DUAL:
|
231
|
+
param.eps = 0.1;
|
232
|
+
break;
|
233
|
+
case L1R_L2LOSS_SVC:
|
234
|
+
case L1R_LR:
|
235
|
+
param.eps = 0.01;
|
236
|
+
break;
|
237
|
+
case L2R_L1LOSS_SVR_DUAL:
|
238
|
+
case L2R_L2LOSS_SVR_DUAL:
|
239
|
+
param.eps = 0.1;
|
240
|
+
break;
|
241
|
+
}
|
242
|
+
}
|
243
|
+
return 0;
|
244
|
+
}
|
245
|
+
|
246
|
+
static void fake_answer(mxArray *plhs[])
|
247
|
+
{
|
248
|
+
plhs[0] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
249
|
+
}
|
250
|
+
|
251
|
+
int read_problem_sparse(const mxArray *label_vec, const mxArray *instance_mat)
|
252
|
+
{
|
253
|
+
int i, j, k, low, high;
|
254
|
+
mwIndex *ir, *jc;
|
255
|
+
int elements, max_index, num_samples, label_vector_row_num;
|
256
|
+
double *samples, *labels;
|
257
|
+
mxArray *instance_mat_col; // instance sparse matrix in column format
|
258
|
+
|
259
|
+
prob.x = NULL;
|
260
|
+
prob.y = NULL;
|
261
|
+
x_space = NULL;
|
262
|
+
|
263
|
+
if(col_format_flag)
|
264
|
+
instance_mat_col = (mxArray *)instance_mat;
|
265
|
+
else
|
266
|
+
{
|
267
|
+
// transpose instance matrix
|
268
|
+
mxArray *prhs[1], *plhs[1];
|
269
|
+
prhs[0] = mxDuplicateArray(instance_mat);
|
270
|
+
if(mexCallMATLAB(1, plhs, 1, prhs, "transpose"))
|
271
|
+
{
|
272
|
+
mexPrintf("Error: cannot transpose training instance matrix\n");
|
273
|
+
return -1;
|
274
|
+
}
|
275
|
+
instance_mat_col = plhs[0];
|
276
|
+
mxDestroyArray(prhs[0]);
|
277
|
+
}
|
278
|
+
|
279
|
+
// the number of instance
|
280
|
+
prob.l = (int) mxGetN(instance_mat_col);
|
281
|
+
label_vector_row_num = (int) mxGetM(label_vec);
|
282
|
+
|
283
|
+
if(label_vector_row_num!=prob.l)
|
284
|
+
{
|
285
|
+
mexPrintf("Length of label vector does not match # of instances.\n");
|
286
|
+
return -1;
|
287
|
+
}
|
288
|
+
|
289
|
+
// each column is one instance
|
290
|
+
labels = mxGetPr(label_vec);
|
291
|
+
samples = mxGetPr(instance_mat_col);
|
292
|
+
ir = mxGetIr(instance_mat_col);
|
293
|
+
jc = mxGetJc(instance_mat_col);
|
294
|
+
|
295
|
+
num_samples = (int) mxGetNzmax(instance_mat_col);
|
296
|
+
|
297
|
+
elements = num_samples + prob.l*2;
|
298
|
+
max_index = (int) mxGetM(instance_mat_col);
|
299
|
+
|
300
|
+
prob.y = Malloc(double, prob.l);
|
301
|
+
prob.x = Malloc(struct feature_node*, prob.l);
|
302
|
+
x_space = Malloc(struct feature_node, elements);
|
303
|
+
|
304
|
+
prob.bias=bias;
|
305
|
+
|
306
|
+
j = 0;
|
307
|
+
for(i=0;i<prob.l;i++)
|
308
|
+
{
|
309
|
+
prob.x[i] = &x_space[j];
|
310
|
+
prob.y[i] = labels[i];
|
311
|
+
low = (int) jc[i], high = (int) jc[i+1];
|
312
|
+
for(k=low;k<high;k++)
|
313
|
+
{
|
314
|
+
x_space[j].index = (int) ir[k]+1;
|
315
|
+
x_space[j].value = samples[k];
|
316
|
+
j++;
|
317
|
+
}
|
318
|
+
if(prob.bias>=0)
|
319
|
+
{
|
320
|
+
x_space[j].index = max_index+1;
|
321
|
+
x_space[j].value = prob.bias;
|
322
|
+
j++;
|
323
|
+
}
|
324
|
+
x_space[j++].index = -1;
|
325
|
+
}
|
326
|
+
|
327
|
+
if(prob.bias>=0)
|
328
|
+
prob.n = max_index+1;
|
329
|
+
else
|
330
|
+
prob.n = max_index;
|
331
|
+
|
332
|
+
return 0;
|
333
|
+
}
|
334
|
+
|
335
|
+
// Interface function of matlab
|
336
|
+
// now assume prhs[0]: label prhs[1]: features
|
337
|
+
void mexFunction( int nlhs, mxArray *plhs[],
|
338
|
+
int nrhs, const mxArray *prhs[] )
|
339
|
+
{
|
340
|
+
const char *error_msg;
|
341
|
+
// fix random seed to have same results for each run
|
342
|
+
// (for cross validation)
|
343
|
+
srand(1);
|
344
|
+
|
345
|
+
// Transform the input Matrix to libsvm format
|
346
|
+
if(nrhs > 1 && nrhs < 5)
|
347
|
+
{
|
348
|
+
int err=0;
|
349
|
+
|
350
|
+
if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) {
|
351
|
+
mexPrintf("Error: label vector and instance matrix must be double\n");
|
352
|
+
fake_answer(plhs);
|
353
|
+
return;
|
354
|
+
}
|
355
|
+
|
356
|
+
if(parse_command_line(nrhs, prhs, NULL))
|
357
|
+
{
|
358
|
+
exit_with_help();
|
359
|
+
destroy_param(¶m);
|
360
|
+
fake_answer(plhs);
|
361
|
+
return;
|
362
|
+
}
|
363
|
+
|
364
|
+
if(mxIsSparse(prhs[1]))
|
365
|
+
err = read_problem_sparse(prhs[0], prhs[1]);
|
366
|
+
else
|
367
|
+
{
|
368
|
+
mexPrintf("Training_instance_matrix must be sparse; "
|
369
|
+
"use sparse(Training_instance_matrix) first\n");
|
370
|
+
destroy_param(¶m);
|
371
|
+
fake_answer(plhs);
|
372
|
+
return;
|
373
|
+
}
|
374
|
+
|
375
|
+
// train's original code
|
376
|
+
error_msg = check_parameter(&prob, ¶m);
|
377
|
+
|
378
|
+
if(err || error_msg)
|
379
|
+
{
|
380
|
+
if (error_msg != NULL)
|
381
|
+
mexPrintf("Error: %s\n", error_msg);
|
382
|
+
destroy_param(¶m);
|
383
|
+
free(prob.y);
|
384
|
+
free(prob.x);
|
385
|
+
free(x_space);
|
386
|
+
fake_answer(plhs);
|
387
|
+
return;
|
388
|
+
}
|
389
|
+
|
390
|
+
if(cross_validation_flag)
|
391
|
+
{
|
392
|
+
double *ptr;
|
393
|
+
plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
394
|
+
ptr = mxGetPr(plhs[0]);
|
395
|
+
ptr[0] = do_cross_validation();
|
396
|
+
}
|
397
|
+
else
|
398
|
+
{
|
399
|
+
const char *error_msg;
|
400
|
+
|
401
|
+
model_ = train(&prob, ¶m);
|
402
|
+
error_msg = model_to_matlab_structure(plhs, model_);
|
403
|
+
if(error_msg)
|
404
|
+
mexPrintf("Error: can't convert libsvm model to matrix structure: %s\n", error_msg);
|
405
|
+
free_and_destroy_model(&model_);
|
406
|
+
}
|
407
|
+
destroy_param(¶m);
|
408
|
+
free(prob.y);
|
409
|
+
free(prob.x);
|
410
|
+
free(x_space);
|
411
|
+
}
|
412
|
+
else
|
413
|
+
{
|
414
|
+
exit_with_help();
|
415
|
+
fake_answer(plhs);
|
416
|
+
return;
|
417
|
+
}
|
418
|
+
}
|
Binary file
|
@@ -0,0 +1,245 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <ctype.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include <errno.h>
|
6
|
+
#include "linear.h"
|
7
|
+
|
8
|
+
int print_null(const char *s,...) {return 0;}
|
9
|
+
|
10
|
+
static int (*info)(const char *fmt,...) = &printf;
|
11
|
+
|
12
|
+
struct feature_node *x;
|
13
|
+
int max_nr_attr = 64;
|
14
|
+
|
15
|
+
struct model* model_;
|
16
|
+
int flag_predict_probability=0;
|
17
|
+
|
18
|
+
void exit_input_error(int line_num)
|
19
|
+
{
|
20
|
+
fprintf(stderr,"Wrong input format at line %d\n", line_num);
|
21
|
+
exit(1);
|
22
|
+
}
|
23
|
+
|
24
|
+
static char *line = NULL;
|
25
|
+
static int max_line_len;
|
26
|
+
|
27
|
+
static char* readline(FILE *input)
|
28
|
+
{
|
29
|
+
int len;
|
30
|
+
|
31
|
+
if(fgets(line,max_line_len,input) == NULL)
|
32
|
+
return NULL;
|
33
|
+
|
34
|
+
while(strrchr(line,'\n') == NULL)
|
35
|
+
{
|
36
|
+
max_line_len *= 2;
|
37
|
+
line = (char *) realloc(line,max_line_len);
|
38
|
+
len = (int) strlen(line);
|
39
|
+
if(fgets(line+len,max_line_len-len,input) == NULL)
|
40
|
+
break;
|
41
|
+
}
|
42
|
+
return line;
|
43
|
+
}
|
44
|
+
|
45
|
+
void do_predict(FILE *input, FILE *output)
|
46
|
+
{
|
47
|
+
int correct = 0;
|
48
|
+
int total = 0;
|
49
|
+
double error = 0;
|
50
|
+
double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
|
51
|
+
|
52
|
+
int nr_class=get_nr_class(model_);
|
53
|
+
double *prob_estimates=NULL;
|
54
|
+
int j, n;
|
55
|
+
int nr_feature=get_nr_feature(model_);
|
56
|
+
if(model_->bias>=0)
|
57
|
+
n=nr_feature+1;
|
58
|
+
else
|
59
|
+
n=nr_feature;
|
60
|
+
|
61
|
+
if(flag_predict_probability)
|
62
|
+
{
|
63
|
+
int *labels;
|
64
|
+
|
65
|
+
if(!check_probability_model(model_))
|
66
|
+
{
|
67
|
+
fprintf(stderr, "probability output is only supported for logistic regression\n");
|
68
|
+
exit(1);
|
69
|
+
}
|
70
|
+
|
71
|
+
labels=(int *) malloc(nr_class*sizeof(int));
|
72
|
+
get_labels(model_,labels);
|
73
|
+
prob_estimates = (double *) malloc(nr_class*sizeof(double));
|
74
|
+
fprintf(output,"labels");
|
75
|
+
for(j=0;j<nr_class;j++)
|
76
|
+
fprintf(output," %d",labels[j]);
|
77
|
+
fprintf(output,"\n");
|
78
|
+
free(labels);
|
79
|
+
}
|
80
|
+
|
81
|
+
max_line_len = 1024;
|
82
|
+
line = (char *)malloc(max_line_len*sizeof(char));
|
83
|
+
while(readline(input) != NULL)
|
84
|
+
{
|
85
|
+
int i = 0;
|
86
|
+
double target_label, predict_label;
|
87
|
+
char *idx, *val, *label, *endptr;
|
88
|
+
int inst_max_index = 0; // strtol gives 0 if wrong format
|
89
|
+
|
90
|
+
label = strtok(line," \t\n");
|
91
|
+
if(label == NULL) // empty line
|
92
|
+
exit_input_error(total+1);
|
93
|
+
|
94
|
+
target_label = strtod(label,&endptr);
|
95
|
+
if(endptr == label || *endptr != '\0')
|
96
|
+
exit_input_error(total+1);
|
97
|
+
|
98
|
+
while(1)
|
99
|
+
{
|
100
|
+
if(i>=max_nr_attr-2) // need one more for index = -1
|
101
|
+
{
|
102
|
+
max_nr_attr *= 2;
|
103
|
+
x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
|
104
|
+
}
|
105
|
+
|
106
|
+
idx = strtok(NULL,":");
|
107
|
+
val = strtok(NULL," \t");
|
108
|
+
|
109
|
+
if(val == NULL)
|
110
|
+
break;
|
111
|
+
errno = 0;
|
112
|
+
x[i].index = (int) strtol(idx,&endptr,10);
|
113
|
+
if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
|
114
|
+
exit_input_error(total+1);
|
115
|
+
else
|
116
|
+
inst_max_index = x[i].index;
|
117
|
+
|
118
|
+
errno = 0;
|
119
|
+
x[i].value = strtod(val,&endptr);
|
120
|
+
if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
|
121
|
+
exit_input_error(total+1);
|
122
|
+
|
123
|
+
// feature indices larger than those in training are not used
|
124
|
+
if(x[i].index <= nr_feature)
|
125
|
+
++i;
|
126
|
+
}
|
127
|
+
|
128
|
+
if(model_->bias>=0)
|
129
|
+
{
|
130
|
+
x[i].index = n;
|
131
|
+
x[i].value = model_->bias;
|
132
|
+
i++;
|
133
|
+
}
|
134
|
+
x[i].index = -1;
|
135
|
+
|
136
|
+
if(flag_predict_probability)
|
137
|
+
{
|
138
|
+
int j;
|
139
|
+
predict_label = predict_probability(model_,x,prob_estimates);
|
140
|
+
fprintf(output,"%g",predict_label);
|
141
|
+
for(j=0;j<model_->nr_class;j++)
|
142
|
+
fprintf(output," %g",prob_estimates[j]);
|
143
|
+
fprintf(output,"\n");
|
144
|
+
}
|
145
|
+
else
|
146
|
+
{
|
147
|
+
predict_label = predict(model_,x);
|
148
|
+
fprintf(output,"%g\n",predict_label);
|
149
|
+
}
|
150
|
+
|
151
|
+
if(predict_label == target_label)
|
152
|
+
++correct;
|
153
|
+
error += (predict_label-target_label)*(predict_label-target_label);
|
154
|
+
sump += predict_label;
|
155
|
+
sumt += target_label;
|
156
|
+
sumpp += predict_label*predict_label;
|
157
|
+
sumtt += target_label*target_label;
|
158
|
+
sumpt += predict_label*target_label;
|
159
|
+
++total;
|
160
|
+
}
|
161
|
+
if(model_->param.solver_type==L2R_L2LOSS_SVR ||
|
162
|
+
model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
|
163
|
+
model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
|
164
|
+
{
|
165
|
+
info("Mean squared error = %g (regression)\n",error/total);
|
166
|
+
info("Squared correlation coefficient = %g (regression)\n",
|
167
|
+
((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
|
168
|
+
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
|
169
|
+
);
|
170
|
+
}
|
171
|
+
else
|
172
|
+
info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
|
173
|
+
if(flag_predict_probability)
|
174
|
+
free(prob_estimates);
|
175
|
+
}
|
176
|
+
|
177
|
+
void exit_with_help()
|
178
|
+
{
|
179
|
+
printf(
|
180
|
+
"Usage: predict [options] test_file model_file output_file\n"
|
181
|
+
"options:\n"
|
182
|
+
"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
|
183
|
+
"-q : quiet mode (no outputs)\n"
|
184
|
+
);
|
185
|
+
exit(1);
|
186
|
+
}
|
187
|
+
|
188
|
+
int main(int argc, char **argv)
|
189
|
+
{
|
190
|
+
FILE *input, *output;
|
191
|
+
int i;
|
192
|
+
|
193
|
+
// parse options
|
194
|
+
for(i=1;i<argc;i++)
|
195
|
+
{
|
196
|
+
if(argv[i][0] != '-') break;
|
197
|
+
++i;
|
198
|
+
switch(argv[i-1][1])
|
199
|
+
{
|
200
|
+
case 'b':
|
201
|
+
flag_predict_probability = atoi(argv[i]);
|
202
|
+
break;
|
203
|
+
case 'q':
|
204
|
+
info = &print_null;
|
205
|
+
i--;
|
206
|
+
break;
|
207
|
+
default:
|
208
|
+
fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
|
209
|
+
exit_with_help();
|
210
|
+
break;
|
211
|
+
}
|
212
|
+
}
|
213
|
+
if(i>=argc)
|
214
|
+
exit_with_help();
|
215
|
+
|
216
|
+
input = fopen(argv[i],"r");
|
217
|
+
if(input == NULL)
|
218
|
+
{
|
219
|
+
fprintf(stderr,"can't open input file %s\n",argv[i]);
|
220
|
+
exit(1);
|
221
|
+
}
|
222
|
+
|
223
|
+
output = fopen(argv[i+2],"w");
|
224
|
+
if(output == NULL)
|
225
|
+
{
|
226
|
+
fprintf(stderr,"can't open output file %s\n",argv[i+2]);
|
227
|
+
exit(1);
|
228
|
+
}
|
229
|
+
|
230
|
+
if((model_=load_model(argv[i+1]))==0)
|
231
|
+
{
|
232
|
+
fprintf(stderr,"can't open model file %s\n",argv[i+1]);
|
233
|
+
exit(1);
|
234
|
+
}
|
235
|
+
|
236
|
+
x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
|
237
|
+
do_predict(input, output);
|
238
|
+
free_and_destroy_model(&model_);
|
239
|
+
free(line);
|
240
|
+
free(x);
|
241
|
+
fclose(input);
|
242
|
+
fclose(output);
|
243
|
+
return 0;
|
244
|
+
}
|
245
|
+
|