liblinear-ruby 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/blasp.h +8 -8
- data/ext/daxpy.c +3 -3
- data/ext/ddot.c +3 -3
- data/ext/dnrm2.c +7 -7
- data/ext/dscal.c +4 -4
- data/ext/liblinear_wrap.cxx +382 -382
- data/ext/linear.cpp +44 -55
- data/ext/linear.h +5 -1
- data/ext/tron.cpp +13 -5
- data/ext/tron.h +1 -1
- data/lib/liblinear.rb +2 -0
- data/lib/liblinear/version.rb +1 -1
- metadata +2 -41
- data/liblinear-2.1/COPYRIGHT +0 -31
- data/liblinear-2.1/Makefile +0 -37
- data/liblinear-2.1/Makefile.win +0 -24
- data/liblinear-2.1/README +0 -600
- data/liblinear-2.1/blas/Makefile +0 -22
- data/liblinear-2.1/blas/blas.h +0 -25
- data/liblinear-2.1/blas/blasp.h +0 -438
- data/liblinear-2.1/blas/daxpy.c +0 -57
- data/liblinear-2.1/blas/ddot.c +0 -58
- data/liblinear-2.1/blas/dnrm2.c +0 -70
- data/liblinear-2.1/blas/dscal.c +0 -52
- data/liblinear-2.1/heart_scale +0 -270
- data/liblinear-2.1/linear.cpp +0 -3053
- data/liblinear-2.1/linear.def +0 -22
- data/liblinear-2.1/linear.h +0 -79
- data/liblinear-2.1/matlab/Makefile +0 -49
- data/liblinear-2.1/matlab/README +0 -208
- data/liblinear-2.1/matlab/libsvmread.c +0 -212
- data/liblinear-2.1/matlab/libsvmwrite.c +0 -119
- data/liblinear-2.1/matlab/linear_model_matlab.c +0 -176
- data/liblinear-2.1/matlab/linear_model_matlab.h +0 -2
- data/liblinear-2.1/matlab/make.m +0 -22
- data/liblinear-2.1/matlab/predict.c +0 -341
- data/liblinear-2.1/matlab/train.c +0 -492
- data/liblinear-2.1/predict.c +0 -243
- data/liblinear-2.1/python/Makefile +0 -4
- data/liblinear-2.1/python/README +0 -380
- data/liblinear-2.1/python/liblinear.py +0 -323
- data/liblinear-2.1/python/liblinearutil.py +0 -270
- data/liblinear-2.1/train.c +0 -449
- data/liblinear-2.1/tron.cpp +0 -241
- data/liblinear-2.1/tron.h +0 -35
- data/liblinear-2.1/windows/liblinear.dll +0 -0
- data/liblinear-2.1/windows/libsvmread.mexw64 +0 -0
- data/liblinear-2.1/windows/libsvmwrite.mexw64 +0 -0
- data/liblinear-2.1/windows/predict.exe +0 -0
- data/liblinear-2.1/windows/predict.mexw64 +0 -0
- data/liblinear-2.1/windows/train.exe +0 -0
- data/liblinear-2.1/windows/train.mexw64 +0 -0
data/liblinear-2.1/predict.c
DELETED
@@ -1,243 +0,0 @@
|
|
1
|
-
#include <stdio.h>
|
2
|
-
#include <ctype.h>
|
3
|
-
#include <stdlib.h>
|
4
|
-
#include <string.h>
|
5
|
-
#include <errno.h>
|
6
|
-
#include "linear.h"
|
7
|
-
|
8
|
-
int print_null(const char *s,...) {return 0;}
|
9
|
-
|
10
|
-
static int (*info)(const char *fmt,...) = &printf;
|
11
|
-
|
12
|
-
struct feature_node *x;
|
13
|
-
int max_nr_attr = 64;
|
14
|
-
|
15
|
-
struct model* model_;
|
16
|
-
int flag_predict_probability=0;
|
17
|
-
|
18
|
-
void exit_input_error(int line_num)
|
19
|
-
{
|
20
|
-
fprintf(stderr,"Wrong input format at line %d\n", line_num);
|
21
|
-
exit(1);
|
22
|
-
}
|
23
|
-
|
24
|
-
static char *line = NULL;
|
25
|
-
static int max_line_len;
|
26
|
-
|
27
|
-
static char* readline(FILE *input)
|
28
|
-
{
|
29
|
-
int len;
|
30
|
-
|
31
|
-
if(fgets(line,max_line_len,input) == NULL)
|
32
|
-
return NULL;
|
33
|
-
|
34
|
-
while(strrchr(line,'\n') == NULL)
|
35
|
-
{
|
36
|
-
max_line_len *= 2;
|
37
|
-
line = (char *) realloc(line,max_line_len);
|
38
|
-
len = (int) strlen(line);
|
39
|
-
if(fgets(line+len,max_line_len-len,input) == NULL)
|
40
|
-
break;
|
41
|
-
}
|
42
|
-
return line;
|
43
|
-
}
|
44
|
-
|
45
|
-
void do_predict(FILE *input, FILE *output)
|
46
|
-
{
|
47
|
-
int correct = 0;
|
48
|
-
int total = 0;
|
49
|
-
double error = 0;
|
50
|
-
double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
|
51
|
-
|
52
|
-
int nr_class=get_nr_class(model_);
|
53
|
-
double *prob_estimates=NULL;
|
54
|
-
int j, n;
|
55
|
-
int nr_feature=get_nr_feature(model_);
|
56
|
-
if(model_->bias>=0)
|
57
|
-
n=nr_feature+1;
|
58
|
-
else
|
59
|
-
n=nr_feature;
|
60
|
-
|
61
|
-
if(flag_predict_probability)
|
62
|
-
{
|
63
|
-
int *labels;
|
64
|
-
|
65
|
-
if(!check_probability_model(model_))
|
66
|
-
{
|
67
|
-
fprintf(stderr, "probability output is only supported for logistic regression\n");
|
68
|
-
exit(1);
|
69
|
-
}
|
70
|
-
|
71
|
-
labels=(int *) malloc(nr_class*sizeof(int));
|
72
|
-
get_labels(model_,labels);
|
73
|
-
prob_estimates = (double *) malloc(nr_class*sizeof(double));
|
74
|
-
fprintf(output,"labels");
|
75
|
-
for(j=0;j<nr_class;j++)
|
76
|
-
fprintf(output," %d",labels[j]);
|
77
|
-
fprintf(output,"\n");
|
78
|
-
free(labels);
|
79
|
-
}
|
80
|
-
|
81
|
-
max_line_len = 1024;
|
82
|
-
line = (char *)malloc(max_line_len*sizeof(char));
|
83
|
-
while(readline(input) != NULL)
|
84
|
-
{
|
85
|
-
int i = 0;
|
86
|
-
double target_label, predict_label;
|
87
|
-
char *idx, *val, *label, *endptr;
|
88
|
-
int inst_max_index = 0; // strtol gives 0 if wrong format
|
89
|
-
|
90
|
-
label = strtok(line," \t\n");
|
91
|
-
if(label == NULL) // empty line
|
92
|
-
exit_input_error(total+1);
|
93
|
-
|
94
|
-
target_label = strtod(label,&endptr);
|
95
|
-
if(endptr == label || *endptr != '\0')
|
96
|
-
exit_input_error(total+1);
|
97
|
-
|
98
|
-
while(1)
|
99
|
-
{
|
100
|
-
if(i>=max_nr_attr-2) // need one more for index = -1
|
101
|
-
{
|
102
|
-
max_nr_attr *= 2;
|
103
|
-
x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
|
104
|
-
}
|
105
|
-
|
106
|
-
idx = strtok(NULL,":");
|
107
|
-
val = strtok(NULL," \t");
|
108
|
-
|
109
|
-
if(val == NULL)
|
110
|
-
break;
|
111
|
-
errno = 0;
|
112
|
-
x[i].index = (int) strtol(idx,&endptr,10);
|
113
|
-
if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
|
114
|
-
exit_input_error(total+1);
|
115
|
-
else
|
116
|
-
inst_max_index = x[i].index;
|
117
|
-
|
118
|
-
errno = 0;
|
119
|
-
x[i].value = strtod(val,&endptr);
|
120
|
-
if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
|
121
|
-
exit_input_error(total+1);
|
122
|
-
|
123
|
-
// feature indices larger than those in training are not used
|
124
|
-
if(x[i].index <= nr_feature)
|
125
|
-
++i;
|
126
|
-
}
|
127
|
-
|
128
|
-
if(model_->bias>=0)
|
129
|
-
{
|
130
|
-
x[i].index = n;
|
131
|
-
x[i].value = model_->bias;
|
132
|
-
i++;
|
133
|
-
}
|
134
|
-
x[i].index = -1;
|
135
|
-
|
136
|
-
if(flag_predict_probability)
|
137
|
-
{
|
138
|
-
int j;
|
139
|
-
predict_label = predict_probability(model_,x,prob_estimates);
|
140
|
-
fprintf(output,"%g",predict_label);
|
141
|
-
for(j=0;j<model_->nr_class;j++)
|
142
|
-
fprintf(output," %g",prob_estimates[j]);
|
143
|
-
fprintf(output,"\n");
|
144
|
-
}
|
145
|
-
else
|
146
|
-
{
|
147
|
-
predict_label = predict(model_,x);
|
148
|
-
fprintf(output,"%g\n",predict_label);
|
149
|
-
}
|
150
|
-
|
151
|
-
if(predict_label == target_label)
|
152
|
-
++correct;
|
153
|
-
error += (predict_label-target_label)*(predict_label-target_label);
|
154
|
-
sump += predict_label;
|
155
|
-
sumt += target_label;
|
156
|
-
sumpp += predict_label*predict_label;
|
157
|
-
sumtt += target_label*target_label;
|
158
|
-
sumpt += predict_label*target_label;
|
159
|
-
++total;
|
160
|
-
}
|
161
|
-
if(check_regression_model(model_))
|
162
|
-
{
|
163
|
-
info("Mean squared error = %g (regression)\n",error/total);
|
164
|
-
info("Squared correlation coefficient = %g (regression)\n",
|
165
|
-
((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
|
166
|
-
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
|
167
|
-
);
|
168
|
-
}
|
169
|
-
else
|
170
|
-
info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
|
171
|
-
if(flag_predict_probability)
|
172
|
-
free(prob_estimates);
|
173
|
-
}
|
174
|
-
|
175
|
-
void exit_with_help()
|
176
|
-
{
|
177
|
-
printf(
|
178
|
-
"Usage: predict [options] test_file model_file output_file\n"
|
179
|
-
"options:\n"
|
180
|
-
"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
|
181
|
-
"-q : quiet mode (no outputs)\n"
|
182
|
-
);
|
183
|
-
exit(1);
|
184
|
-
}
|
185
|
-
|
186
|
-
int main(int argc, char **argv)
|
187
|
-
{
|
188
|
-
FILE *input, *output;
|
189
|
-
int i;
|
190
|
-
|
191
|
-
// parse options
|
192
|
-
for(i=1;i<argc;i++)
|
193
|
-
{
|
194
|
-
if(argv[i][0] != '-') break;
|
195
|
-
++i;
|
196
|
-
switch(argv[i-1][1])
|
197
|
-
{
|
198
|
-
case 'b':
|
199
|
-
flag_predict_probability = atoi(argv[i]);
|
200
|
-
break;
|
201
|
-
case 'q':
|
202
|
-
info = &print_null;
|
203
|
-
i--;
|
204
|
-
break;
|
205
|
-
default:
|
206
|
-
fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
|
207
|
-
exit_with_help();
|
208
|
-
break;
|
209
|
-
}
|
210
|
-
}
|
211
|
-
if(i>=argc)
|
212
|
-
exit_with_help();
|
213
|
-
|
214
|
-
input = fopen(argv[i],"r");
|
215
|
-
if(input == NULL)
|
216
|
-
{
|
217
|
-
fprintf(stderr,"can't open input file %s\n",argv[i]);
|
218
|
-
exit(1);
|
219
|
-
}
|
220
|
-
|
221
|
-
output = fopen(argv[i+2],"w");
|
222
|
-
if(output == NULL)
|
223
|
-
{
|
224
|
-
fprintf(stderr,"can't open output file %s\n",argv[i+2]);
|
225
|
-
exit(1);
|
226
|
-
}
|
227
|
-
|
228
|
-
if((model_=load_model(argv[i+1]))==0)
|
229
|
-
{
|
230
|
-
fprintf(stderr,"can't open model file %s\n",argv[i+1]);
|
231
|
-
exit(1);
|
232
|
-
}
|
233
|
-
|
234
|
-
x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
|
235
|
-
do_predict(input, output);
|
236
|
-
free_and_destroy_model(&model_);
|
237
|
-
free(line);
|
238
|
-
free(x);
|
239
|
-
fclose(input);
|
240
|
-
fclose(output);
|
241
|
-
return 0;
|
242
|
-
}
|
243
|
-
|
data/liblinear-2.1/python/README
DELETED
@@ -1,380 +0,0 @@
|
|
1
|
-
-------------------------------------
|
2
|
-
--- Python interface of LIBLINEAR ---
|
3
|
-
-------------------------------------
|
4
|
-
|
5
|
-
Table of Contents
|
6
|
-
=================
|
7
|
-
|
8
|
-
- Introduction
|
9
|
-
- Installation
|
10
|
-
- Quick Start
|
11
|
-
- Design Description
|
12
|
-
- Data Structures
|
13
|
-
- Utility Functions
|
14
|
-
- Additional Information
|
15
|
-
|
16
|
-
Introduction
|
17
|
-
============
|
18
|
-
|
19
|
-
Python (http://www.python.org/) is a programming language suitable for rapid
|
20
|
-
development. This tool provides a simple Python interface to LIBLINEAR, a library
|
21
|
-
for support vector machines (http://www.csie.ntu.edu.tw/~cjlin/liblinear). The
|
22
|
-
interface is very easy to use as the usage is the same as that of LIBLINEAR. The
|
23
|
-
interface is developed with the built-in Python library "ctypes."
|
24
|
-
|
25
|
-
Installation
|
26
|
-
============
|
27
|
-
|
28
|
-
On Unix systems, type
|
29
|
-
|
30
|
-
> make
|
31
|
-
|
32
|
-
The interface needs only LIBLINEAR shared library, which is generated by
|
33
|
-
the above command. We assume that the shared library is on the LIBLINEAR
|
34
|
-
main directory or in the system path.
|
35
|
-
|
36
|
-
For windows, the shared library liblinear.dll is ready in the directory
|
37
|
-
`..\windows'. You can also copy it to the system directory (e.g.,
|
38
|
-
`C:\WINDOWS\system32\' for Windows XP). To regenerate the shared library,
|
39
|
-
please follow the instruction of building windows binaries in LIBLINEAR README.
|
40
|
-
|
41
|
-
Quick Start
|
42
|
-
===========
|
43
|
-
|
44
|
-
There are two levels of usage. The high-level one uses utility functions
|
45
|
-
in liblinearutil.py and the usage is the same as the LIBLINEAR MATLAB interface.
|
46
|
-
|
47
|
-
>>> from liblinearutil import *
|
48
|
-
# Read data in LIBSVM format
|
49
|
-
>>> y, x = svm_read_problem('../heart_scale')
|
50
|
-
>>> m = train(y[:200], x[:200], '-c 4')
|
51
|
-
>>> p_label, p_acc, p_val = predict(y[200:], x[200:], m)
|
52
|
-
|
53
|
-
# Construct problem in python format
|
54
|
-
# Dense data
|
55
|
-
>>> y, x = [1,-1], [[1,0,1], [-1,0,-1]]
|
56
|
-
# Sparse data
|
57
|
-
>>> y, x = [1,-1], [{1:1, 3:1}, {1:-1,3:-1}]
|
58
|
-
>>> prob = problem(y, x)
|
59
|
-
>>> param = parameter('-s 0 -c 4 -B 1')
|
60
|
-
>>> m = train(prob, param)
|
61
|
-
|
62
|
-
# Other utility functions
|
63
|
-
>>> save_model('heart_scale.model', m)
|
64
|
-
>>> m = load_model('heart_scale.model')
|
65
|
-
>>> p_label, p_acc, p_val = predict(y, x, m, '-b 1')
|
66
|
-
>>> ACC, MSE, SCC = evaluations(y, p_label)
|
67
|
-
|
68
|
-
# Getting online help
|
69
|
-
>>> help(train)
|
70
|
-
|
71
|
-
The low-level use directly calls C interfaces imported by liblinear.py. Note that
|
72
|
-
all arguments and return values are in ctypes format. You need to handle them
|
73
|
-
carefully.
|
74
|
-
|
75
|
-
>>> from liblinear import *
|
76
|
-
>>> prob = problem([1,-1], [{1:1, 3:1}, {1:-1,3:-1}])
|
77
|
-
>>> param = parameter('-c 4')
|
78
|
-
>>> m = liblinear.train(prob, param) # m is a ctype pointer to a model
|
79
|
-
# Convert a Python-format instance to feature_nodearray, a ctypes structure
|
80
|
-
>>> x0, max_idx = gen_feature_nodearray({1:1, 3:1})
|
81
|
-
>>> label = liblinear.predict(m, x0)
|
82
|
-
|
83
|
-
Design Description
|
84
|
-
==================
|
85
|
-
|
86
|
-
There are two files liblinear.py and liblinearutil.py, which respectively correspond to
|
87
|
-
low-level and high-level use of the interface.
|
88
|
-
|
89
|
-
In liblinear.py, we adopt the Python built-in library "ctypes," so that
|
90
|
-
Python can directly access C structures and interface functions defined
|
91
|
-
in linear.h.
|
92
|
-
|
93
|
-
While advanced users can use structures/functions in liblinear.py, to
|
94
|
-
avoid handling ctypes structures, in liblinearutil.py we provide some easy-to-use
|
95
|
-
functions. The usage is similar to LIBLINEAR MATLAB interface.
|
96
|
-
|
97
|
-
Data Structures
|
98
|
-
===============
|
99
|
-
|
100
|
-
Three data structures derived from linear.h are node, problem, and
|
101
|
-
parameter. They all contain fields with the same names in
|
102
|
-
linear.h. Access these fields carefully because you directly use a C structure
|
103
|
-
instead of a Python object. The following description introduces additional
|
104
|
-
fields and methods.
|
105
|
-
|
106
|
-
Before using the data structures, execute the following command to load the
|
107
|
-
LIBLINEAR shared library:
|
108
|
-
|
109
|
-
>>> from liblinear import *
|
110
|
-
|
111
|
-
- class feature_node:
|
112
|
-
|
113
|
-
Construct a feature_node.
|
114
|
-
|
115
|
-
>>> node = feature_node(idx, val)
|
116
|
-
|
117
|
-
idx: an integer indicates the feature index.
|
118
|
-
|
119
|
-
val: a float indicates the feature value.
|
120
|
-
|
121
|
-
Show the index and the value of a node.
|
122
|
-
|
123
|
-
>>> print(node)
|
124
|
-
|
125
|
-
- Function: gen_feature_nodearray(xi [,feature_max=None [,issparse=True]])
|
126
|
-
|
127
|
-
Generate a feature vector from a Python list/tuple or a dictionary:
|
128
|
-
|
129
|
-
>>> xi, max_idx = gen_feature_nodearray({1:1, 3:1, 5:-2})
|
130
|
-
|
131
|
-
xi: the returned feature_nodearray (a ctypes structure)
|
132
|
-
|
133
|
-
max_idx: the maximal feature index of xi
|
134
|
-
|
135
|
-
issparse: if issparse == True, zero feature values are removed. The default
|
136
|
-
value is True for the sparsity.
|
137
|
-
|
138
|
-
feature_max: if feature_max is assigned, features with indices larger than
|
139
|
-
feature_max are removed.
|
140
|
-
|
141
|
-
- class problem:
|
142
|
-
|
143
|
-
Construct a problem instance
|
144
|
-
|
145
|
-
>>> prob = problem(y, x [,bias=-1])
|
146
|
-
|
147
|
-
y: a Python list/tuple of l labels (type must be int/double).
|
148
|
-
|
149
|
-
x: a Python list/tuple of l data instances. Each element of x must be
|
150
|
-
an instance of list/tuple/dictionary type.
|
151
|
-
|
152
|
-
bias: if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term
|
153
|
-
added (default -1)
|
154
|
-
|
155
|
-
You can also modify the bias value by
|
156
|
-
|
157
|
-
>>> prob.set_bias(1)
|
158
|
-
|
159
|
-
Note that if your x contains sparse data (i.e., dictionary), the internal
|
160
|
-
ctypes data format is still sparse.
|
161
|
-
|
162
|
-
- class parameter:
|
163
|
-
|
164
|
-
Construct a parameter instance
|
165
|
-
|
166
|
-
>>> param = parameter('training_options')
|
167
|
-
|
168
|
-
If 'training_options' is empty, LIBLINEAR default values are applied.
|
169
|
-
|
170
|
-
Set param to LIBLINEAR default values.
|
171
|
-
|
172
|
-
>>> param.set_to_default_values()
|
173
|
-
|
174
|
-
Parse a string of options.
|
175
|
-
|
176
|
-
>>> param.parse_options('training_options')
|
177
|
-
|
178
|
-
Show values of parameters.
|
179
|
-
|
180
|
-
>>> print(param)
|
181
|
-
|
182
|
-
- class model:
|
183
|
-
|
184
|
-
There are two ways to obtain an instance of model:
|
185
|
-
|
186
|
-
>>> model_ = train(y, x)
|
187
|
-
>>> model_ = load_model('model_file_name')
|
188
|
-
|
189
|
-
Note that the returned structure of interface functions
|
190
|
-
liblinear.train and liblinear.load_model is a ctypes pointer of
|
191
|
-
model, which is different from the model object returned
|
192
|
-
by train and load_model in liblinearutil.py. We provide a
|
193
|
-
function toPyModel for the conversion:
|
194
|
-
|
195
|
-
>>> model_ptr = liblinear.train(prob, param)
|
196
|
-
>>> model_ = toPyModel(model_ptr)
|
197
|
-
|
198
|
-
If you obtain a model in a way other than the above approaches,
|
199
|
-
handle it carefully to avoid memory leak or segmentation fault.
|
200
|
-
|
201
|
-
Some interface functions to access LIBLINEAR models are wrapped as
|
202
|
-
members of the class model:
|
203
|
-
|
204
|
-
>>> nr_feature = model_.get_nr_feature()
|
205
|
-
>>> nr_class = model_.get_nr_class()
|
206
|
-
>>> class_labels = model_.get_labels()
|
207
|
-
>>> is_prob_model = model_.is_probability_model()
|
208
|
-
>>> is_regression_model = model_.is_regression_model()
|
209
|
-
|
210
|
-
The decision function is W*x + b, where
|
211
|
-
W is an nr_class-by-nr_feature matrix, and
|
212
|
-
b is a vector of size nr_class.
|
213
|
-
To access W_kj (i.e., coefficient for the k-th class and the j-th feature)
|
214
|
-
and b_k (i.e., bias for the k-th class), use the following functions.
|
215
|
-
|
216
|
-
>>> W_kj = model_.get_decfun_coef(feat_idx=j, label_idx=k)
|
217
|
-
>>> b_k = model_.get_decfun_bias(label_idx=k)
|
218
|
-
|
219
|
-
We also provide a function to extract w_k (i.e., the k-th row of W) and
|
220
|
-
b_k directly as follows.
|
221
|
-
|
222
|
-
>>> [w_k, b_k] = model_.get_decfun(label_idx=k)
|
223
|
-
|
224
|
-
Note that w_k is a Python list of length nr_feature, which means that
|
225
|
-
w_k[0] = W_k1.
|
226
|
-
For regression models, W is just a vector of length nr_feature. Either
|
227
|
-
set label_idx=0 or omit the label_idx parameter to access the coefficients.
|
228
|
-
|
229
|
-
>>> W_j = model_.get_decfun_coef(feat_idx=j)
|
230
|
-
>>> b = model_.get_decfun_bias()
|
231
|
-
>>> [W, b] = model_.get_decfun()
|
232
|
-
|
233
|
-
Note that in get_decfun_coef, get_decfun_bias, and get_decfun, feat_idx
|
234
|
-
starts from 1, while label_idx starts from 0. If label_idx is not in the
|
235
|
-
valid range (0 to nr_class-1), then a NaN will be returned; and if feat_idx
|
236
|
-
is not in the valid range (1 to nr_feature), then a zero value will be
|
237
|
-
returned. For regression models, label_idx is ignored.
|
238
|
-
|
239
|
-
Utility Functions
|
240
|
-
=================
|
241
|
-
|
242
|
-
To use utility functions, type
|
243
|
-
|
244
|
-
>>> from liblinearutil import *
|
245
|
-
|
246
|
-
The above command loads
|
247
|
-
train() : train a linear model
|
248
|
-
predict() : predict testing data
|
249
|
-
svm_read_problem() : read the data from a LIBSVM-format file.
|
250
|
-
load_model() : load a LIBLINEAR model.
|
251
|
-
save_model() : save model to a file.
|
252
|
-
evaluations() : evaluate prediction results.
|
253
|
-
|
254
|
-
- Function: train
|
255
|
-
|
256
|
-
There are three ways to call train()
|
257
|
-
|
258
|
-
>>> model = train(y, x [, 'training_options'])
|
259
|
-
>>> model = train(prob [, 'training_options'])
|
260
|
-
>>> model = train(prob, param)
|
261
|
-
|
262
|
-
y: a list/tuple of l training labels (type must be int/double).
|
263
|
-
|
264
|
-
x: a list/tuple of l training instances. The feature vector of
|
265
|
-
each training instance is an instance of list/tuple or dictionary.
|
266
|
-
|
267
|
-
training_options: a string in the same form as that for LIBLINEAR command
|
268
|
-
mode.
|
269
|
-
|
270
|
-
prob: a problem instance generated by calling
|
271
|
-
problem(y, x).
|
272
|
-
|
273
|
-
param: a parameter instance generated by calling
|
274
|
-
parameter('training_options')
|
275
|
-
|
276
|
-
model: the returned model instance. See linear.h for details of this
|
277
|
-
structure. If '-v' is specified, cross validation is
|
278
|
-
conducted and the returned model is just a scalar: cross-validation
|
279
|
-
accuracy for classification and mean-squared error for regression.
|
280
|
-
If the '-C' option is specified, the best parameter C is found
|
281
|
-
by cross validation. The returned model is a tuple of the best C
|
282
|
-
and the corresponding cross-validation accuracy. The parameter
|
283
|
-
selection utility is supported by only -s 0 and -s 2.
|
284
|
-
|
285
|
-
|
286
|
-
To train the same data many times with different
|
287
|
-
parameters, the second and the third ways should be faster..
|
288
|
-
|
289
|
-
Examples:
|
290
|
-
|
291
|
-
>>> y, x = svm_read_problem('../heart_scale')
|
292
|
-
>>> prob = problem(y, x)
|
293
|
-
>>> param = parameter('-s 3 -c 5 -q')
|
294
|
-
>>> m = train(y, x, '-c 5')
|
295
|
-
>>> m = train(prob, '-w1 5 -c 5')
|
296
|
-
>>> m = train(prob, param)
|
297
|
-
>>> CV_ACC = train(y, x, '-v 3')
|
298
|
-
>>> best_C, best_rate = train(y, x, '-C -s 0')
|
299
|
-
>>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
|
300
|
-
|
301
|
-
- Function: predict
|
302
|
-
|
303
|
-
To predict testing data with a model, use
|
304
|
-
|
305
|
-
>>> p_labs, p_acc, p_vals = predict(y, x, model [,'predicting_options'])
|
306
|
-
|
307
|
-
y: a list/tuple of l true labels (type must be int/double). It is used
|
308
|
-
for calculating the accuracy. Use [] if true labels are
|
309
|
-
unavailable.
|
310
|
-
|
311
|
-
x: a list/tuple of l predicting instances. The feature vector of
|
312
|
-
each predicting instance is an instance of list/tuple or dictionary.
|
313
|
-
|
314
|
-
predicting_options: a string of predicting options in the same format as
|
315
|
-
that of LIBLINEAR.
|
316
|
-
|
317
|
-
model: a model instance.
|
318
|
-
|
319
|
-
p_labels: a list of predicted labels
|
320
|
-
|
321
|
-
p_acc: a tuple including accuracy (for classification), mean
|
322
|
-
squared error, and squared correlation coefficient (for
|
323
|
-
regression).
|
324
|
-
|
325
|
-
p_vals: a list of decision values or probability estimates (if '-b 1'
|
326
|
-
is specified). If k is the number of classes, for decision values,
|
327
|
-
each element includes results of predicting k binary-class
|
328
|
-
SVMs. If k = 2 and solver is not MCSVM_CS, only one decision value
|
329
|
-
is returned. For probabilities, each element contains k values
|
330
|
-
indicating the probability that the testing instance is in each class.
|
331
|
-
Note that the order of classes here is the same as 'model.label'
|
332
|
-
field in the model structure.
|
333
|
-
|
334
|
-
Example:
|
335
|
-
|
336
|
-
>>> m = train(y, x, '-c 5')
|
337
|
-
>>> p_labels, p_acc, p_vals = predict(y, x, m)
|
338
|
-
|
339
|
-
- Functions: svm_read_problem/load_model/save_model
|
340
|
-
|
341
|
-
See the usage by examples:
|
342
|
-
|
343
|
-
>>> y, x = svm_read_problem('data.txt')
|
344
|
-
>>> m = load_model('model_file')
|
345
|
-
>>> save_model('model_file', m)
|
346
|
-
|
347
|
-
- Function: evaluations
|
348
|
-
|
349
|
-
Calculate some evaluations using the true values (ty) and predicted
|
350
|
-
values (pv):
|
351
|
-
|
352
|
-
>>> (ACC, MSE, SCC) = evaluations(ty, pv)
|
353
|
-
|
354
|
-
ty: a list of true values.
|
355
|
-
|
356
|
-
pv: a list of predict values.
|
357
|
-
|
358
|
-
ACC: accuracy.
|
359
|
-
|
360
|
-
MSE: mean squared error.
|
361
|
-
|
362
|
-
SCC: squared correlation coefficient.
|
363
|
-
|
364
|
-
|
365
|
-
Additional Information
|
366
|
-
======================
|
367
|
-
|
368
|
-
This interface was written by Hsiang-Fu Yu from Department of Computer
|
369
|
-
Science, National Taiwan University. If you find this tool useful, please
|
370
|
-
cite LIBLINEAR as follows
|
371
|
-
|
372
|
-
R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin.
|
373
|
-
LIBLINEAR: A Library for Large Linear Classification, Journal of
|
374
|
-
Machine Learning Research 9(2008), 1871-1874. Software available at
|
375
|
-
http://www.csie.ntu.edu.tw/~cjlin/liblinear
|
376
|
-
|
377
|
-
For any question, please contact Chih-Jen Lin <cjlin@csie.ntu.edu.tw>,
|
378
|
-
or check the FAQ page:
|
379
|
-
|
380
|
-
http://www.csie.ntu.edu.tw/~cjlin/liblinear/faq.html
|