liblinear-ruby 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/blasp.h +8 -8
- data/ext/daxpy.c +3 -3
- data/ext/ddot.c +3 -3
- data/ext/dnrm2.c +7 -7
- data/ext/dscal.c +4 -4
- data/ext/liblinear_wrap.cxx +382 -382
- data/ext/linear.cpp +44 -55
- data/ext/linear.h +5 -1
- data/ext/tron.cpp +13 -5
- data/ext/tron.h +1 -1
- data/lib/liblinear.rb +2 -0
- data/lib/liblinear/version.rb +1 -1
- metadata +2 -41
- data/liblinear-2.1/COPYRIGHT +0 -31
- data/liblinear-2.1/Makefile +0 -37
- data/liblinear-2.1/Makefile.win +0 -24
- data/liblinear-2.1/README +0 -600
- data/liblinear-2.1/blas/Makefile +0 -22
- data/liblinear-2.1/blas/blas.h +0 -25
- data/liblinear-2.1/blas/blasp.h +0 -438
- data/liblinear-2.1/blas/daxpy.c +0 -57
- data/liblinear-2.1/blas/ddot.c +0 -58
- data/liblinear-2.1/blas/dnrm2.c +0 -70
- data/liblinear-2.1/blas/dscal.c +0 -52
- data/liblinear-2.1/heart_scale +0 -270
- data/liblinear-2.1/linear.cpp +0 -3053
- data/liblinear-2.1/linear.def +0 -22
- data/liblinear-2.1/linear.h +0 -79
- data/liblinear-2.1/matlab/Makefile +0 -49
- data/liblinear-2.1/matlab/README +0 -208
- data/liblinear-2.1/matlab/libsvmread.c +0 -212
- data/liblinear-2.1/matlab/libsvmwrite.c +0 -119
- data/liblinear-2.1/matlab/linear_model_matlab.c +0 -176
- data/liblinear-2.1/matlab/linear_model_matlab.h +0 -2
- data/liblinear-2.1/matlab/make.m +0 -22
- data/liblinear-2.1/matlab/predict.c +0 -341
- data/liblinear-2.1/matlab/train.c +0 -492
- data/liblinear-2.1/predict.c +0 -243
- data/liblinear-2.1/python/Makefile +0 -4
- data/liblinear-2.1/python/README +0 -380
- data/liblinear-2.1/python/liblinear.py +0 -323
- data/liblinear-2.1/python/liblinearutil.py +0 -270
- data/liblinear-2.1/train.c +0 -449
- data/liblinear-2.1/tron.cpp +0 -241
- data/liblinear-2.1/tron.h +0 -35
- data/liblinear-2.1/windows/liblinear.dll +0 -0
- data/liblinear-2.1/windows/libsvmread.mexw64 +0 -0
- data/liblinear-2.1/windows/libsvmwrite.mexw64 +0 -0
- data/liblinear-2.1/windows/predict.exe +0 -0
- data/liblinear-2.1/windows/predict.mexw64 +0 -0
- data/liblinear-2.1/windows/train.exe +0 -0
- data/liblinear-2.1/windows/train.mexw64 +0 -0
data/liblinear-2.1/predict.c
DELETED
@@ -1,243 +0,0 @@
|
|
1
|
-
#include <stdio.h>
|
2
|
-
#include <ctype.h>
|
3
|
-
#include <stdlib.h>
|
4
|
-
#include <string.h>
|
5
|
-
#include <errno.h>
|
6
|
-
#include "linear.h"
|
7
|
-
|
8
|
-
int print_null(const char *s,...) {return 0;}
|
9
|
-
|
10
|
-
static int (*info)(const char *fmt,...) = &printf;
|
11
|
-
|
12
|
-
struct feature_node *x;
|
13
|
-
int max_nr_attr = 64;
|
14
|
-
|
15
|
-
struct model* model_;
|
16
|
-
int flag_predict_probability=0;
|
17
|
-
|
18
|
-
void exit_input_error(int line_num)
|
19
|
-
{
|
20
|
-
fprintf(stderr,"Wrong input format at line %d\n", line_num);
|
21
|
-
exit(1);
|
22
|
-
}
|
23
|
-
|
24
|
-
static char *line = NULL;
|
25
|
-
static int max_line_len;
|
26
|
-
|
27
|
-
static char* readline(FILE *input)
|
28
|
-
{
|
29
|
-
int len;
|
30
|
-
|
31
|
-
if(fgets(line,max_line_len,input) == NULL)
|
32
|
-
return NULL;
|
33
|
-
|
34
|
-
while(strrchr(line,'\n') == NULL)
|
35
|
-
{
|
36
|
-
max_line_len *= 2;
|
37
|
-
line = (char *) realloc(line,max_line_len);
|
38
|
-
len = (int) strlen(line);
|
39
|
-
if(fgets(line+len,max_line_len-len,input) == NULL)
|
40
|
-
break;
|
41
|
-
}
|
42
|
-
return line;
|
43
|
-
}
|
44
|
-
|
45
|
-
void do_predict(FILE *input, FILE *output)
|
46
|
-
{
|
47
|
-
int correct = 0;
|
48
|
-
int total = 0;
|
49
|
-
double error = 0;
|
50
|
-
double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
|
51
|
-
|
52
|
-
int nr_class=get_nr_class(model_);
|
53
|
-
double *prob_estimates=NULL;
|
54
|
-
int j, n;
|
55
|
-
int nr_feature=get_nr_feature(model_);
|
56
|
-
if(model_->bias>=0)
|
57
|
-
n=nr_feature+1;
|
58
|
-
else
|
59
|
-
n=nr_feature;
|
60
|
-
|
61
|
-
if(flag_predict_probability)
|
62
|
-
{
|
63
|
-
int *labels;
|
64
|
-
|
65
|
-
if(!check_probability_model(model_))
|
66
|
-
{
|
67
|
-
fprintf(stderr, "probability output is only supported for logistic regression\n");
|
68
|
-
exit(1);
|
69
|
-
}
|
70
|
-
|
71
|
-
labels=(int *) malloc(nr_class*sizeof(int));
|
72
|
-
get_labels(model_,labels);
|
73
|
-
prob_estimates = (double *) malloc(nr_class*sizeof(double));
|
74
|
-
fprintf(output,"labels");
|
75
|
-
for(j=0;j<nr_class;j++)
|
76
|
-
fprintf(output," %d",labels[j]);
|
77
|
-
fprintf(output,"\n");
|
78
|
-
free(labels);
|
79
|
-
}
|
80
|
-
|
81
|
-
max_line_len = 1024;
|
82
|
-
line = (char *)malloc(max_line_len*sizeof(char));
|
83
|
-
while(readline(input) != NULL)
|
84
|
-
{
|
85
|
-
int i = 0;
|
86
|
-
double target_label, predict_label;
|
87
|
-
char *idx, *val, *label, *endptr;
|
88
|
-
int inst_max_index = 0; // strtol gives 0 if wrong format
|
89
|
-
|
90
|
-
label = strtok(line," \t\n");
|
91
|
-
if(label == NULL) // empty line
|
92
|
-
exit_input_error(total+1);
|
93
|
-
|
94
|
-
target_label = strtod(label,&endptr);
|
95
|
-
if(endptr == label || *endptr != '\0')
|
96
|
-
exit_input_error(total+1);
|
97
|
-
|
98
|
-
while(1)
|
99
|
-
{
|
100
|
-
if(i>=max_nr_attr-2) // need one more for index = -1
|
101
|
-
{
|
102
|
-
max_nr_attr *= 2;
|
103
|
-
x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
|
104
|
-
}
|
105
|
-
|
106
|
-
idx = strtok(NULL,":");
|
107
|
-
val = strtok(NULL," \t");
|
108
|
-
|
109
|
-
if(val == NULL)
|
110
|
-
break;
|
111
|
-
errno = 0;
|
112
|
-
x[i].index = (int) strtol(idx,&endptr,10);
|
113
|
-
if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
|
114
|
-
exit_input_error(total+1);
|
115
|
-
else
|
116
|
-
inst_max_index = x[i].index;
|
117
|
-
|
118
|
-
errno = 0;
|
119
|
-
x[i].value = strtod(val,&endptr);
|
120
|
-
if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
|
121
|
-
exit_input_error(total+1);
|
122
|
-
|
123
|
-
// feature indices larger than those in training are not used
|
124
|
-
if(x[i].index <= nr_feature)
|
125
|
-
++i;
|
126
|
-
}
|
127
|
-
|
128
|
-
if(model_->bias>=0)
|
129
|
-
{
|
130
|
-
x[i].index = n;
|
131
|
-
x[i].value = model_->bias;
|
132
|
-
i++;
|
133
|
-
}
|
134
|
-
x[i].index = -1;
|
135
|
-
|
136
|
-
if(flag_predict_probability)
|
137
|
-
{
|
138
|
-
int j;
|
139
|
-
predict_label = predict_probability(model_,x,prob_estimates);
|
140
|
-
fprintf(output,"%g",predict_label);
|
141
|
-
for(j=0;j<model_->nr_class;j++)
|
142
|
-
fprintf(output," %g",prob_estimates[j]);
|
143
|
-
fprintf(output,"\n");
|
144
|
-
}
|
145
|
-
else
|
146
|
-
{
|
147
|
-
predict_label = predict(model_,x);
|
148
|
-
fprintf(output,"%g\n",predict_label);
|
149
|
-
}
|
150
|
-
|
151
|
-
if(predict_label == target_label)
|
152
|
-
++correct;
|
153
|
-
error += (predict_label-target_label)*(predict_label-target_label);
|
154
|
-
sump += predict_label;
|
155
|
-
sumt += target_label;
|
156
|
-
sumpp += predict_label*predict_label;
|
157
|
-
sumtt += target_label*target_label;
|
158
|
-
sumpt += predict_label*target_label;
|
159
|
-
++total;
|
160
|
-
}
|
161
|
-
if(check_regression_model(model_))
|
162
|
-
{
|
163
|
-
info("Mean squared error = %g (regression)\n",error/total);
|
164
|
-
info("Squared correlation coefficient = %g (regression)\n",
|
165
|
-
((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
|
166
|
-
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
|
167
|
-
);
|
168
|
-
}
|
169
|
-
else
|
170
|
-
info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
|
171
|
-
if(flag_predict_probability)
|
172
|
-
free(prob_estimates);
|
173
|
-
}
|
174
|
-
|
175
|
-
void exit_with_help()
|
176
|
-
{
|
177
|
-
printf(
|
178
|
-
"Usage: predict [options] test_file model_file output_file\n"
|
179
|
-
"options:\n"
|
180
|
-
"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
|
181
|
-
"-q : quiet mode (no outputs)\n"
|
182
|
-
);
|
183
|
-
exit(1);
|
184
|
-
}
|
185
|
-
|
186
|
-
int main(int argc, char **argv)
|
187
|
-
{
|
188
|
-
FILE *input, *output;
|
189
|
-
int i;
|
190
|
-
|
191
|
-
// parse options
|
192
|
-
for(i=1;i<argc;i++)
|
193
|
-
{
|
194
|
-
if(argv[i][0] != '-') break;
|
195
|
-
++i;
|
196
|
-
switch(argv[i-1][1])
|
197
|
-
{
|
198
|
-
case 'b':
|
199
|
-
flag_predict_probability = atoi(argv[i]);
|
200
|
-
break;
|
201
|
-
case 'q':
|
202
|
-
info = &print_null;
|
203
|
-
i--;
|
204
|
-
break;
|
205
|
-
default:
|
206
|
-
fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
|
207
|
-
exit_with_help();
|
208
|
-
break;
|
209
|
-
}
|
210
|
-
}
|
211
|
-
if(i>=argc)
|
212
|
-
exit_with_help();
|
213
|
-
|
214
|
-
input = fopen(argv[i],"r");
|
215
|
-
if(input == NULL)
|
216
|
-
{
|
217
|
-
fprintf(stderr,"can't open input file %s\n",argv[i]);
|
218
|
-
exit(1);
|
219
|
-
}
|
220
|
-
|
221
|
-
output = fopen(argv[i+2],"w");
|
222
|
-
if(output == NULL)
|
223
|
-
{
|
224
|
-
fprintf(stderr,"can't open output file %s\n",argv[i+2]);
|
225
|
-
exit(1);
|
226
|
-
}
|
227
|
-
|
228
|
-
if((model_=load_model(argv[i+1]))==0)
|
229
|
-
{
|
230
|
-
fprintf(stderr,"can't open model file %s\n",argv[i+1]);
|
231
|
-
exit(1);
|
232
|
-
}
|
233
|
-
|
234
|
-
x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
|
235
|
-
do_predict(input, output);
|
236
|
-
free_and_destroy_model(&model_);
|
237
|
-
free(line);
|
238
|
-
free(x);
|
239
|
-
fclose(input);
|
240
|
-
fclose(output);
|
241
|
-
return 0;
|
242
|
-
}
|
243
|
-
|
data/liblinear-2.1/python/README
DELETED
@@ -1,380 +0,0 @@
|
|
1
|
-
-------------------------------------
|
2
|
-
--- Python interface of LIBLINEAR ---
|
3
|
-
-------------------------------------
|
4
|
-
|
5
|
-
Table of Contents
|
6
|
-
=================
|
7
|
-
|
8
|
-
- Introduction
|
9
|
-
- Installation
|
10
|
-
- Quick Start
|
11
|
-
- Design Description
|
12
|
-
- Data Structures
|
13
|
-
- Utility Functions
|
14
|
-
- Additional Information
|
15
|
-
|
16
|
-
Introduction
|
17
|
-
============
|
18
|
-
|
19
|
-
Python (http://www.python.org/) is a programming language suitable for rapid
|
20
|
-
development. This tool provides a simple Python interface to LIBLINEAR, a library
|
21
|
-
for support vector machines (http://www.csie.ntu.edu.tw/~cjlin/liblinear). The
|
22
|
-
interface is very easy to use as the usage is the same as that of LIBLINEAR. The
|
23
|
-
interface is developed with the built-in Python library "ctypes."
|
24
|
-
|
25
|
-
Installation
|
26
|
-
============
|
27
|
-
|
28
|
-
On Unix systems, type
|
29
|
-
|
30
|
-
> make
|
31
|
-
|
32
|
-
The interface needs only LIBLINEAR shared library, which is generated by
|
33
|
-
the above command. We assume that the shared library is on the LIBLINEAR
|
34
|
-
main directory or in the system path.
|
35
|
-
|
36
|
-
For windows, the shared library liblinear.dll is ready in the directory
|
37
|
-
`..\windows'. You can also copy it to the system directory (e.g.,
|
38
|
-
`C:\WINDOWS\system32\' for Windows XP). To regenerate the shared library,
|
39
|
-
please follow the instruction of building windows binaries in LIBLINEAR README.
|
40
|
-
|
41
|
-
Quick Start
|
42
|
-
===========
|
43
|
-
|
44
|
-
There are two levels of usage. The high-level one uses utility functions
|
45
|
-
in liblinearutil.py and the usage is the same as the LIBLINEAR MATLAB interface.
|
46
|
-
|
47
|
-
>>> from liblinearutil import *
|
48
|
-
# Read data in LIBSVM format
|
49
|
-
>>> y, x = svm_read_problem('../heart_scale')
|
50
|
-
>>> m = train(y[:200], x[:200], '-c 4')
|
51
|
-
>>> p_label, p_acc, p_val = predict(y[200:], x[200:], m)
|
52
|
-
|
53
|
-
# Construct problem in python format
|
54
|
-
# Dense data
|
55
|
-
>>> y, x = [1,-1], [[1,0,1], [-1,0,-1]]
|
56
|
-
# Sparse data
|
57
|
-
>>> y, x = [1,-1], [{1:1, 3:1}, {1:-1,3:-1}]
|
58
|
-
>>> prob = problem(y, x)
|
59
|
-
>>> param = parameter('-s 0 -c 4 -B 1')
|
60
|
-
>>> m = train(prob, param)
|
61
|
-
|
62
|
-
# Other utility functions
|
63
|
-
>>> save_model('heart_scale.model', m)
|
64
|
-
>>> m = load_model('heart_scale.model')
|
65
|
-
>>> p_label, p_acc, p_val = predict(y, x, m, '-b 1')
|
66
|
-
>>> ACC, MSE, SCC = evaluations(y, p_label)
|
67
|
-
|
68
|
-
# Getting online help
|
69
|
-
>>> help(train)
|
70
|
-
|
71
|
-
The low-level use directly calls C interfaces imported by liblinear.py. Note that
|
72
|
-
all arguments and return values are in ctypes format. You need to handle them
|
73
|
-
carefully.
|
74
|
-
|
75
|
-
>>> from liblinear import *
|
76
|
-
>>> prob = problem([1,-1], [{1:1, 3:1}, {1:-1,3:-1}])
|
77
|
-
>>> param = parameter('-c 4')
|
78
|
-
>>> m = liblinear.train(prob, param) # m is a ctype pointer to a model
|
79
|
-
# Convert a Python-format instance to feature_nodearray, a ctypes structure
|
80
|
-
>>> x0, max_idx = gen_feature_nodearray({1:1, 3:1})
|
81
|
-
>>> label = liblinear.predict(m, x0)
|
82
|
-
|
83
|
-
Design Description
|
84
|
-
==================
|
85
|
-
|
86
|
-
There are two files liblinear.py and liblinearutil.py, which respectively correspond to
|
87
|
-
low-level and high-level use of the interface.
|
88
|
-
|
89
|
-
In liblinear.py, we adopt the Python built-in library "ctypes," so that
|
90
|
-
Python can directly access C structures and interface functions defined
|
91
|
-
in linear.h.
|
92
|
-
|
93
|
-
While advanced users can use structures/functions in liblinear.py, to
|
94
|
-
avoid handling ctypes structures, in liblinearutil.py we provide some easy-to-use
|
95
|
-
functions. The usage is similar to LIBLINEAR MATLAB interface.
|
96
|
-
|
97
|
-
Data Structures
|
98
|
-
===============
|
99
|
-
|
100
|
-
Three data structures derived from linear.h are node, problem, and
|
101
|
-
parameter. They all contain fields with the same names in
|
102
|
-
linear.h. Access these fields carefully because you directly use a C structure
|
103
|
-
instead of a Python object. The following description introduces additional
|
104
|
-
fields and methods.
|
105
|
-
|
106
|
-
Before using the data structures, execute the following command to load the
|
107
|
-
LIBLINEAR shared library:
|
108
|
-
|
109
|
-
>>> from liblinear import *
|
110
|
-
|
111
|
-
- class feature_node:
|
112
|
-
|
113
|
-
Construct a feature_node.
|
114
|
-
|
115
|
-
>>> node = feature_node(idx, val)
|
116
|
-
|
117
|
-
idx: an integer indicates the feature index.
|
118
|
-
|
119
|
-
val: a float indicates the feature value.
|
120
|
-
|
121
|
-
Show the index and the value of a node.
|
122
|
-
|
123
|
-
>>> print(node)
|
124
|
-
|
125
|
-
- Function: gen_feature_nodearray(xi [,feature_max=None [,issparse=True]])
|
126
|
-
|
127
|
-
Generate a feature vector from a Python list/tuple or a dictionary:
|
128
|
-
|
129
|
-
>>> xi, max_idx = gen_feature_nodearray({1:1, 3:1, 5:-2})
|
130
|
-
|
131
|
-
xi: the returned feature_nodearray (a ctypes structure)
|
132
|
-
|
133
|
-
max_idx: the maximal feature index of xi
|
134
|
-
|
135
|
-
issparse: if issparse == True, zero feature values are removed. The default
|
136
|
-
value is True for the sparsity.
|
137
|
-
|
138
|
-
feature_max: if feature_max is assigned, features with indices larger than
|
139
|
-
feature_max are removed.
|
140
|
-
|
141
|
-
- class problem:
|
142
|
-
|
143
|
-
Construct a problem instance
|
144
|
-
|
145
|
-
>>> prob = problem(y, x [,bias=-1])
|
146
|
-
|
147
|
-
y: a Python list/tuple of l labels (type must be int/double).
|
148
|
-
|
149
|
-
x: a Python list/tuple of l data instances. Each element of x must be
|
150
|
-
an instance of list/tuple/dictionary type.
|
151
|
-
|
152
|
-
bias: if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term
|
153
|
-
added (default -1)
|
154
|
-
|
155
|
-
You can also modify the bias value by
|
156
|
-
|
157
|
-
>>> prob.set_bias(1)
|
158
|
-
|
159
|
-
Note that if your x contains sparse data (i.e., dictionary), the internal
|
160
|
-
ctypes data format is still sparse.
|
161
|
-
|
162
|
-
- class parameter:
|
163
|
-
|
164
|
-
Construct a parameter instance
|
165
|
-
|
166
|
-
>>> param = parameter('training_options')
|
167
|
-
|
168
|
-
If 'training_options' is empty, LIBLINEAR default values are applied.
|
169
|
-
|
170
|
-
Set param to LIBLINEAR default values.
|
171
|
-
|
172
|
-
>>> param.set_to_default_values()
|
173
|
-
|
174
|
-
Parse a string of options.
|
175
|
-
|
176
|
-
>>> param.parse_options('training_options')
|
177
|
-
|
178
|
-
Show values of parameters.
|
179
|
-
|
180
|
-
>>> print(param)
|
181
|
-
|
182
|
-
- class model:
|
183
|
-
|
184
|
-
There are two ways to obtain an instance of model:
|
185
|
-
|
186
|
-
>>> model_ = train(y, x)
|
187
|
-
>>> model_ = load_model('model_file_name')
|
188
|
-
|
189
|
-
Note that the returned structure of interface functions
|
190
|
-
liblinear.train and liblinear.load_model is a ctypes pointer of
|
191
|
-
model, which is different from the model object returned
|
192
|
-
by train and load_model in liblinearutil.py. We provide a
|
193
|
-
function toPyModel for the conversion:
|
194
|
-
|
195
|
-
>>> model_ptr = liblinear.train(prob, param)
|
196
|
-
>>> model_ = toPyModel(model_ptr)
|
197
|
-
|
198
|
-
If you obtain a model in a way other than the above approaches,
|
199
|
-
handle it carefully to avoid memory leak or segmentation fault.
|
200
|
-
|
201
|
-
Some interface functions to access LIBLINEAR models are wrapped as
|
202
|
-
members of the class model:
|
203
|
-
|
204
|
-
>>> nr_feature = model_.get_nr_feature()
|
205
|
-
>>> nr_class = model_.get_nr_class()
|
206
|
-
>>> class_labels = model_.get_labels()
|
207
|
-
>>> is_prob_model = model_.is_probability_model()
|
208
|
-
>>> is_regression_model = model_.is_regression_model()
|
209
|
-
|
210
|
-
The decision function is W*x + b, where
|
211
|
-
W is an nr_class-by-nr_feature matrix, and
|
212
|
-
b is a vector of size nr_class.
|
213
|
-
To access W_kj (i.e., coefficient for the k-th class and the j-th feature)
|
214
|
-
and b_k (i.e., bias for the k-th class), use the following functions.
|
215
|
-
|
216
|
-
>>> W_kj = model_.get_decfun_coef(feat_idx=j, label_idx=k)
|
217
|
-
>>> b_k = model_.get_decfun_bias(label_idx=k)
|
218
|
-
|
219
|
-
We also provide a function to extract w_k (i.e., the k-th row of W) and
|
220
|
-
b_k directly as follows.
|
221
|
-
|
222
|
-
>>> [w_k, b_k] = model_.get_decfun(label_idx=k)
|
223
|
-
|
224
|
-
Note that w_k is a Python list of length nr_feature, which means that
|
225
|
-
w_k[0] = W_k1.
|
226
|
-
For regression models, W is just a vector of length nr_feature. Either
|
227
|
-
set label_idx=0 or omit the label_idx parameter to access the coefficients.
|
228
|
-
|
229
|
-
>>> W_j = model_.get_decfun_coef(feat_idx=j)
|
230
|
-
>>> b = model_.get_decfun_bias()
|
231
|
-
>>> [W, b] = model_.get_decfun()
|
232
|
-
|
233
|
-
Note that in get_decfun_coef, get_decfun_bias, and get_decfun, feat_idx
|
234
|
-
starts from 1, while label_idx starts from 0. If label_idx is not in the
|
235
|
-
valid range (0 to nr_class-1), then a NaN will be returned; and if feat_idx
|
236
|
-
is not in the valid range (1 to nr_feature), then a zero value will be
|
237
|
-
returned. For regression models, label_idx is ignored.
|
238
|
-
|
239
|
-
Utility Functions
|
240
|
-
=================
|
241
|
-
|
242
|
-
To use utility functions, type
|
243
|
-
|
244
|
-
>>> from liblinearutil import *
|
245
|
-
|
246
|
-
The above command loads
|
247
|
-
train() : train a linear model
|
248
|
-
predict() : predict testing data
|
249
|
-
svm_read_problem() : read the data from a LIBSVM-format file.
|
250
|
-
load_model() : load a LIBLINEAR model.
|
251
|
-
save_model() : save model to a file.
|
252
|
-
evaluations() : evaluate prediction results.
|
253
|
-
|
254
|
-
- Function: train
|
255
|
-
|
256
|
-
There are three ways to call train()
|
257
|
-
|
258
|
-
>>> model = train(y, x [, 'training_options'])
|
259
|
-
>>> model = train(prob [, 'training_options'])
|
260
|
-
>>> model = train(prob, param)
|
261
|
-
|
262
|
-
y: a list/tuple of l training labels (type must be int/double).
|
263
|
-
|
264
|
-
x: a list/tuple of l training instances. The feature vector of
|
265
|
-
each training instance is an instance of list/tuple or dictionary.
|
266
|
-
|
267
|
-
training_options: a string in the same form as that for LIBLINEAR command
|
268
|
-
mode.
|
269
|
-
|
270
|
-
prob: a problem instance generated by calling
|
271
|
-
problem(y, x).
|
272
|
-
|
273
|
-
param: a parameter instance generated by calling
|
274
|
-
parameter('training_options')
|
275
|
-
|
276
|
-
model: the returned model instance. See linear.h for details of this
|
277
|
-
structure. If '-v' is specified, cross validation is
|
278
|
-
conducted and the returned model is just a scalar: cross-validation
|
279
|
-
accuracy for classification and mean-squared error for regression.
|
280
|
-
If the '-C' option is specified, the best parameter C is found
|
281
|
-
by cross validation. The returned model is a tuple of the best C
|
282
|
-
and the corresponding cross-validation accuracy. The parameter
|
283
|
-
selection utility is supported by only -s 0 and -s 2.
|
284
|
-
|
285
|
-
|
286
|
-
To train the same data many times with different
|
287
|
-
parameters, the second and the third ways should be faster..
|
288
|
-
|
289
|
-
Examples:
|
290
|
-
|
291
|
-
>>> y, x = svm_read_problem('../heart_scale')
|
292
|
-
>>> prob = problem(y, x)
|
293
|
-
>>> param = parameter('-s 3 -c 5 -q')
|
294
|
-
>>> m = train(y, x, '-c 5')
|
295
|
-
>>> m = train(prob, '-w1 5 -c 5')
|
296
|
-
>>> m = train(prob, param)
|
297
|
-
>>> CV_ACC = train(y, x, '-v 3')
|
298
|
-
>>> best_C, best_rate = train(y, x, '-C -s 0')
|
299
|
-
>>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
|
300
|
-
|
301
|
-
- Function: predict
|
302
|
-
|
303
|
-
To predict testing data with a model, use
|
304
|
-
|
305
|
-
>>> p_labs, p_acc, p_vals = predict(y, x, model [,'predicting_options'])
|
306
|
-
|
307
|
-
y: a list/tuple of l true labels (type must be int/double). It is used
|
308
|
-
for calculating the accuracy. Use [] if true labels are
|
309
|
-
unavailable.
|
310
|
-
|
311
|
-
x: a list/tuple of l predicting instances. The feature vector of
|
312
|
-
each predicting instance is an instance of list/tuple or dictionary.
|
313
|
-
|
314
|
-
predicting_options: a string of predicting options in the same format as
|
315
|
-
that of LIBLINEAR.
|
316
|
-
|
317
|
-
model: a model instance.
|
318
|
-
|
319
|
-
p_labels: a list of predicted labels
|
320
|
-
|
321
|
-
p_acc: a tuple including accuracy (for classification), mean
|
322
|
-
squared error, and squared correlation coefficient (for
|
323
|
-
regression).
|
324
|
-
|
325
|
-
p_vals: a list of decision values or probability estimates (if '-b 1'
|
326
|
-
is specified). If k is the number of classes, for decision values,
|
327
|
-
each element includes results of predicting k binary-class
|
328
|
-
SVMs. If k = 2 and solver is not MCSVM_CS, only one decision value
|
329
|
-
is returned. For probabilities, each element contains k values
|
330
|
-
indicating the probability that the testing instance is in each class.
|
331
|
-
Note that the order of classes here is the same as 'model.label'
|
332
|
-
field in the model structure.
|
333
|
-
|
334
|
-
Example:
|
335
|
-
|
336
|
-
>>> m = train(y, x, '-c 5')
|
337
|
-
>>> p_labels, p_acc, p_vals = predict(y, x, m)
|
338
|
-
|
339
|
-
- Functions: svm_read_problem/load_model/save_model
|
340
|
-
|
341
|
-
See the usage by examples:
|
342
|
-
|
343
|
-
>>> y, x = svm_read_problem('data.txt')
|
344
|
-
>>> m = load_model('model_file')
|
345
|
-
>>> save_model('model_file', m)
|
346
|
-
|
347
|
-
- Function: evaluations
|
348
|
-
|
349
|
-
Calculate some evaluations using the true values (ty) and predicted
|
350
|
-
values (pv):
|
351
|
-
|
352
|
-
>>> (ACC, MSE, SCC) = evaluations(ty, pv)
|
353
|
-
|
354
|
-
ty: a list of true values.
|
355
|
-
|
356
|
-
pv: a list of predict values.
|
357
|
-
|
358
|
-
ACC: accuracy.
|
359
|
-
|
360
|
-
MSE: mean squared error.
|
361
|
-
|
362
|
-
SCC: squared correlation coefficient.
|
363
|
-
|
364
|
-
|
365
|
-
Additional Information
|
366
|
-
======================
|
367
|
-
|
368
|
-
This interface was written by Hsiang-Fu Yu from Department of Computer
|
369
|
-
Science, National Taiwan University. If you find this tool useful, please
|
370
|
-
cite LIBLINEAR as follows
|
371
|
-
|
372
|
-
R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin.
|
373
|
-
LIBLINEAR: A Library for Large Linear Classification, Journal of
|
374
|
-
Machine Learning Research 9(2008), 1871-1874. Software available at
|
375
|
-
http://www.csie.ntu.edu.tw/~cjlin/liblinear
|
376
|
-
|
377
|
-
For any question, please contact Chih-Jen Lin <cjlin@csie.ntu.edu.tw>,
|
378
|
-
or check the FAQ page:
|
379
|
-
|
380
|
-
http://www.csie.ntu.edu.tw/~cjlin/liblinear/faq.html
|