eluka 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/DOCUMENTATION_STANDARDS +39 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +20 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +69 -0
- data/VERSION +1 -0
- data/examples/example.rb +59 -0
- data/ext/libsvm/COPYRIGHT +31 -0
- data/ext/libsvm/FAQ.html +1749 -0
- data/ext/libsvm/Makefile +25 -0
- data/ext/libsvm/Makefile.win +33 -0
- data/ext/libsvm/README +733 -0
- data/ext/libsvm/extconf.rb +1 -0
- data/ext/libsvm/heart_scale +270 -0
- data/ext/libsvm/java/Makefile +25 -0
- data/ext/libsvm/java/libsvm.jar +0 -0
- data/ext/libsvm/java/libsvm/svm.java +2776 -0
- data/ext/libsvm/java/libsvm/svm.m4 +2776 -0
- data/ext/libsvm/java/libsvm/svm_model.java +21 -0
- data/ext/libsvm/java/libsvm/svm_node.java +6 -0
- data/ext/libsvm/java/libsvm/svm_parameter.java +47 -0
- data/ext/libsvm/java/libsvm/svm_print_interface.java +5 -0
- data/ext/libsvm/java/libsvm/svm_problem.java +7 -0
- data/ext/libsvm/java/svm_predict.java +163 -0
- data/ext/libsvm/java/svm_scale.java +350 -0
- data/ext/libsvm/java/svm_toy.java +471 -0
- data/ext/libsvm/java/svm_train.java +318 -0
- data/ext/libsvm/java/test_applet.html +1 -0
- data/ext/libsvm/python/Makefile +4 -0
- data/ext/libsvm/python/README +331 -0
- data/ext/libsvm/python/svm.py +259 -0
- data/ext/libsvm/python/svmutil.py +242 -0
- data/ext/libsvm/svm-predict.c +226 -0
- data/ext/libsvm/svm-scale.c +353 -0
- data/ext/libsvm/svm-toy/gtk/Makefile +22 -0
- data/ext/libsvm/svm-toy/gtk/callbacks.cpp +423 -0
- data/ext/libsvm/svm-toy/gtk/callbacks.h +54 -0
- data/ext/libsvm/svm-toy/gtk/interface.c +164 -0
- data/ext/libsvm/svm-toy/gtk/interface.h +14 -0
- data/ext/libsvm/svm-toy/gtk/main.c +23 -0
- data/ext/libsvm/svm-toy/gtk/svm-toy.glade +238 -0
- data/ext/libsvm/svm-toy/qt/Makefile +17 -0
- data/ext/libsvm/svm-toy/qt/svm-toy.cpp +413 -0
- data/ext/libsvm/svm-toy/windows/svm-toy.cpp +456 -0
- data/ext/libsvm/svm-train.c +376 -0
- data/ext/libsvm/svm.cpp +3060 -0
- data/ext/libsvm/svm.def +19 -0
- data/ext/libsvm/svm.h +105 -0
- data/ext/libsvm/svm.o +0 -0
- data/ext/libsvm/tools/README +149 -0
- data/ext/libsvm/tools/checkdata.py +108 -0
- data/ext/libsvm/tools/easy.py +79 -0
- data/ext/libsvm/tools/grid.py +359 -0
- data/ext/libsvm/tools/subset.py +146 -0
- data/ext/libsvm/windows/libsvm.dll +0 -0
- data/ext/libsvm/windows/svm-predict.exe +0 -0
- data/ext/libsvm/windows/svm-scale.exe +0 -0
- data/ext/libsvm/windows/svm-toy.exe +0 -0
- data/ext/libsvm/windows/svm-train.exe +0 -0
- data/lib/eluka.rb +10 -0
- data/lib/eluka/bijection.rb +23 -0
- data/lib/eluka/data_point.rb +36 -0
- data/lib/eluka/document.rb +47 -0
- data/lib/eluka/feature_vector.rb +86 -0
- data/lib/eluka/features.rb +31 -0
- data/lib/eluka/model.rb +129 -0
- data/lib/fselect.rb +321 -0
- data/lib/grid.rb +25 -0
- data/test/helper.rb +18 -0
- data/test/test_eluka.rb +7 -0
- metadata +214 -0
@@ -0,0 +1,318 @@
|
|
1
|
+
import libsvm.*;
|
2
|
+
import java.io.*;
|
3
|
+
import java.util.*;
|
4
|
+
|
5
|
+
class svm_train {
|
6
|
+
private svm_parameter param; // set by parse_command_line
|
7
|
+
private svm_problem prob; // set by read_problem
|
8
|
+
private svm_model model;
|
9
|
+
private String input_file_name; // set by parse_command_line
|
10
|
+
private String model_file_name; // set by parse_command_line
|
11
|
+
private String error_msg;
|
12
|
+
private int cross_validation;
|
13
|
+
private int nr_fold;
|
14
|
+
|
15
|
+
private static svm_print_interface svm_print_null = new svm_print_interface()
|
16
|
+
{
|
17
|
+
public void print(String s) {}
|
18
|
+
};
|
19
|
+
|
20
|
+
private static void exit_with_help()
|
21
|
+
{
|
22
|
+
System.out.print(
|
23
|
+
"Usage: svm_train [options] training_set_file [model_file]\n"
|
24
|
+
+"options:\n"
|
25
|
+
+"-s svm_type : set type of SVM (default 0)\n"
|
26
|
+
+" 0 -- C-SVC\n"
|
27
|
+
+" 1 -- nu-SVC\n"
|
28
|
+
+" 2 -- one-class SVM\n"
|
29
|
+
+" 3 -- epsilon-SVR\n"
|
30
|
+
+" 4 -- nu-SVR\n"
|
31
|
+
+"-t kernel_type : set type of kernel function (default 2)\n"
|
32
|
+
+" 0 -- linear: u'*v\n"
|
33
|
+
+" 1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
|
34
|
+
+" 2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
|
35
|
+
+" 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
|
36
|
+
+" 4 -- precomputed kernel (kernel values in training_set_file)\n"
|
37
|
+
+"-d degree : set degree in kernel function (default 3)\n"
|
38
|
+
+"-g gamma : set gamma in kernel function (default 1/num_features)\n"
|
39
|
+
+"-r coef0 : set coef0 in kernel function (default 0)\n"
|
40
|
+
+"-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n"
|
41
|
+
+"-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n"
|
42
|
+
+"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
|
43
|
+
+"-m cachesize : set cache memory size in MB (default 100)\n"
|
44
|
+
+"-e epsilon : set tolerance of termination criterion (default 0.001)\n"
|
45
|
+
+"-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n"
|
46
|
+
+"-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n"
|
47
|
+
+"-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n"
|
48
|
+
+"-v n : n-fold cross validation mode\n"
|
49
|
+
+"-q : quiet mode (no outputs)\n"
|
50
|
+
);
|
51
|
+
System.exit(1);
|
52
|
+
}
|
53
|
+
|
54
|
+
private void do_cross_validation()
|
55
|
+
{
|
56
|
+
int i;
|
57
|
+
int total_correct = 0;
|
58
|
+
double total_error = 0;
|
59
|
+
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
|
60
|
+
double[] target = new double[prob.l];
|
61
|
+
|
62
|
+
svm.svm_cross_validation(prob,param,nr_fold,target);
|
63
|
+
if(param.svm_type == svm_parameter.EPSILON_SVR ||
|
64
|
+
param.svm_type == svm_parameter.NU_SVR)
|
65
|
+
{
|
66
|
+
for(i=0;i<prob.l;i++)
|
67
|
+
{
|
68
|
+
double y = prob.y[i];
|
69
|
+
double v = target[i];
|
70
|
+
total_error += (v-y)*(v-y);
|
71
|
+
sumv += v;
|
72
|
+
sumy += y;
|
73
|
+
sumvv += v*v;
|
74
|
+
sumyy += y*y;
|
75
|
+
sumvy += v*y;
|
76
|
+
}
|
77
|
+
System.out.print("Cross Validation Mean squared error = "+total_error/prob.l+"\n");
|
78
|
+
System.out.print("Cross Validation Squared correlation coefficient = "+
|
79
|
+
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
|
80
|
+
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))+"\n"
|
81
|
+
);
|
82
|
+
}
|
83
|
+
else
|
84
|
+
{
|
85
|
+
for(i=0;i<prob.l;i++)
|
86
|
+
if(target[i] == prob.y[i])
|
87
|
+
++total_correct;
|
88
|
+
System.out.print("Cross Validation Accuracy = "+100.0*total_correct/prob.l+"%\n");
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
private void run(String argv[]) throws IOException
|
93
|
+
{
|
94
|
+
parse_command_line(argv);
|
95
|
+
read_problem();
|
96
|
+
error_msg = svm.svm_check_parameter(prob,param);
|
97
|
+
|
98
|
+
if(error_msg != null)
|
99
|
+
{
|
100
|
+
System.err.print("Error: "+error_msg+"\n");
|
101
|
+
System.exit(1);
|
102
|
+
}
|
103
|
+
|
104
|
+
if(cross_validation != 0)
|
105
|
+
{
|
106
|
+
do_cross_validation();
|
107
|
+
}
|
108
|
+
else
|
109
|
+
{
|
110
|
+
model = svm.svm_train(prob,param);
|
111
|
+
svm.svm_save_model(model_file_name,model);
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
public static void main(String argv[]) throws IOException
|
116
|
+
{
|
117
|
+
svm_train t = new svm_train();
|
118
|
+
t.run(argv);
|
119
|
+
}
|
120
|
+
|
121
|
+
private static double atof(String s)
|
122
|
+
{
|
123
|
+
double d = Double.valueOf(s).doubleValue();
|
124
|
+
if (Double.isNaN(d) || Double.isInfinite(d))
|
125
|
+
{
|
126
|
+
System.err.print("NaN or Infinity in input\n");
|
127
|
+
System.exit(1);
|
128
|
+
}
|
129
|
+
return(d);
|
130
|
+
}
|
131
|
+
|
132
|
+
private static int atoi(String s)
|
133
|
+
{
|
134
|
+
return Integer.parseInt(s);
|
135
|
+
}
|
136
|
+
|
137
|
+
private void parse_command_line(String argv[])
|
138
|
+
{
|
139
|
+
int i;
|
140
|
+
svm_print_interface print_func = null; // default printing to stdout
|
141
|
+
|
142
|
+
param = new svm_parameter();
|
143
|
+
// default values
|
144
|
+
param.svm_type = svm_parameter.C_SVC;
|
145
|
+
param.kernel_type = svm_parameter.RBF;
|
146
|
+
param.degree = 3;
|
147
|
+
param.gamma = 0; // 1/num_features
|
148
|
+
param.coef0 = 0;
|
149
|
+
param.nu = 0.5;
|
150
|
+
param.cache_size = 100;
|
151
|
+
param.C = 1;
|
152
|
+
param.eps = 1e-3;
|
153
|
+
param.p = 0.1;
|
154
|
+
param.shrinking = 1;
|
155
|
+
param.probability = 0;
|
156
|
+
param.nr_weight = 0;
|
157
|
+
param.weight_label = new int[0];
|
158
|
+
param.weight = new double[0];
|
159
|
+
cross_validation = 0;
|
160
|
+
|
161
|
+
// parse options
|
162
|
+
for(i=0;i<argv.length;i++)
|
163
|
+
{
|
164
|
+
if(argv[i].charAt(0) != '-') break;
|
165
|
+
if(++i>=argv.length)
|
166
|
+
exit_with_help();
|
167
|
+
switch(argv[i-1].charAt(1))
|
168
|
+
{
|
169
|
+
case 's':
|
170
|
+
param.svm_type = atoi(argv[i]);
|
171
|
+
break;
|
172
|
+
case 't':
|
173
|
+
param.kernel_type = atoi(argv[i]);
|
174
|
+
break;
|
175
|
+
case 'd':
|
176
|
+
param.degree = atoi(argv[i]);
|
177
|
+
break;
|
178
|
+
case 'g':
|
179
|
+
param.gamma = atof(argv[i]);
|
180
|
+
break;
|
181
|
+
case 'r':
|
182
|
+
param.coef0 = atof(argv[i]);
|
183
|
+
break;
|
184
|
+
case 'n':
|
185
|
+
param.nu = atof(argv[i]);
|
186
|
+
break;
|
187
|
+
case 'm':
|
188
|
+
param.cache_size = atof(argv[i]);
|
189
|
+
break;
|
190
|
+
case 'c':
|
191
|
+
param.C = atof(argv[i]);
|
192
|
+
break;
|
193
|
+
case 'e':
|
194
|
+
param.eps = atof(argv[i]);
|
195
|
+
break;
|
196
|
+
case 'p':
|
197
|
+
param.p = atof(argv[i]);
|
198
|
+
break;
|
199
|
+
case 'h':
|
200
|
+
param.shrinking = atoi(argv[i]);
|
201
|
+
break;
|
202
|
+
case 'b':
|
203
|
+
param.probability = atoi(argv[i]);
|
204
|
+
break;
|
205
|
+
case 'q':
|
206
|
+
print_func = svm_print_null;
|
207
|
+
i--;
|
208
|
+
break;
|
209
|
+
case 'v':
|
210
|
+
cross_validation = 1;
|
211
|
+
nr_fold = atoi(argv[i]);
|
212
|
+
if(nr_fold < 2)
|
213
|
+
{
|
214
|
+
System.err.print("n-fold cross validation: n must >= 2\n");
|
215
|
+
exit_with_help();
|
216
|
+
}
|
217
|
+
break;
|
218
|
+
case 'w':
|
219
|
+
++param.nr_weight;
|
220
|
+
{
|
221
|
+
int[] old = param.weight_label;
|
222
|
+
param.weight_label = new int[param.nr_weight];
|
223
|
+
System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1);
|
224
|
+
}
|
225
|
+
|
226
|
+
{
|
227
|
+
double[] old = param.weight;
|
228
|
+
param.weight = new double[param.nr_weight];
|
229
|
+
System.arraycopy(old,0,param.weight,0,param.nr_weight-1);
|
230
|
+
}
|
231
|
+
|
232
|
+
param.weight_label[param.nr_weight-1] = atoi(argv[i-1].substring(2));
|
233
|
+
param.weight[param.nr_weight-1] = atof(argv[i]);
|
234
|
+
break;
|
235
|
+
default:
|
236
|
+
System.err.print("Unknown option: " + argv[i-1] + "\n");
|
237
|
+
exit_with_help();
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
svm.svm_set_print_string_function(print_func);
|
242
|
+
|
243
|
+
// determine filenames
|
244
|
+
|
245
|
+
if(i>=argv.length)
|
246
|
+
exit_with_help();
|
247
|
+
|
248
|
+
input_file_name = argv[i];
|
249
|
+
|
250
|
+
if(i<argv.length-1)
|
251
|
+
model_file_name = argv[i+1];
|
252
|
+
else
|
253
|
+
{
|
254
|
+
int p = argv[i].lastIndexOf('/');
|
255
|
+
++p; // whew...
|
256
|
+
model_file_name = argv[i].substring(p)+".model";
|
257
|
+
}
|
258
|
+
}
|
259
|
+
|
260
|
+
// read in a problem (in svmlight format)
|
261
|
+
|
262
|
+
private void read_problem() throws IOException
|
263
|
+
{
|
264
|
+
BufferedReader fp = new BufferedReader(new FileReader(input_file_name));
|
265
|
+
Vector<Double> vy = new Vector<Double>();
|
266
|
+
Vector<svm_node[]> vx = new Vector<svm_node[]>();
|
267
|
+
int max_index = 0;
|
268
|
+
|
269
|
+
while(true)
|
270
|
+
{
|
271
|
+
String line = fp.readLine();
|
272
|
+
if(line == null) break;
|
273
|
+
|
274
|
+
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
|
275
|
+
|
276
|
+
vy.addElement(atof(st.nextToken()));
|
277
|
+
int m = st.countTokens()/2;
|
278
|
+
svm_node[] x = new svm_node[m];
|
279
|
+
for(int j=0;j<m;j++)
|
280
|
+
{
|
281
|
+
x[j] = new svm_node();
|
282
|
+
x[j].index = atoi(st.nextToken());
|
283
|
+
x[j].value = atof(st.nextToken());
|
284
|
+
}
|
285
|
+
if(m>0) max_index = Math.max(max_index, x[m-1].index);
|
286
|
+
vx.addElement(x);
|
287
|
+
}
|
288
|
+
|
289
|
+
prob = new svm_problem();
|
290
|
+
prob.l = vy.size();
|
291
|
+
prob.x = new svm_node[prob.l][];
|
292
|
+
for(int i=0;i<prob.l;i++)
|
293
|
+
prob.x[i] = vx.elementAt(i);
|
294
|
+
prob.y = new double[prob.l];
|
295
|
+
for(int i=0;i<prob.l;i++)
|
296
|
+
prob.y[i] = vy.elementAt(i);
|
297
|
+
|
298
|
+
if(param.gamma == 0 && max_index > 0)
|
299
|
+
param.gamma = 1.0/max_index;
|
300
|
+
|
301
|
+
if(param.kernel_type == svm_parameter.PRECOMPUTED)
|
302
|
+
for(int i=0;i<prob.l;i++)
|
303
|
+
{
|
304
|
+
if (prob.x[i][0].index != 0)
|
305
|
+
{
|
306
|
+
System.err.print("Wrong kernel matrix: first column must be 0:sample_serial_number\n");
|
307
|
+
System.exit(1);
|
308
|
+
}
|
309
|
+
if ((int)prob.x[i][0].value <= 0 || (int)prob.x[i][0].value > max_index)
|
310
|
+
{
|
311
|
+
System.err.print("Wrong input format: sample_serial_number out of range\n");
|
312
|
+
System.exit(1);
|
313
|
+
}
|
314
|
+
}
|
315
|
+
|
316
|
+
fp.close();
|
317
|
+
}
|
318
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
<APPLET code="svm_toy.class" archive="libsvm.jar" width=300 height=350></APPLET>
|
@@ -0,0 +1,331 @@
|
|
1
|
+
----------------------------------
|
2
|
+
--- Python interface of LIBSVM ---
|
3
|
+
----------------------------------
|
4
|
+
|
5
|
+
Table of Contents
|
6
|
+
=================
|
7
|
+
|
8
|
+
- Introduction
|
9
|
+
- Installation
|
10
|
+
- Quick Start
|
11
|
+
- Design Description
|
12
|
+
- Data Structures
|
13
|
+
- Utility Functions
|
14
|
+
- Additional Information
|
15
|
+
|
16
|
+
Introduction
|
17
|
+
============
|
18
|
+
|
19
|
+
Python (http://www.python.org/) is a programming language suitable for rapid
|
20
|
+
development. This tool provides a simple Python interface to LIBSVM, a library
|
21
|
+
for support vector machines (http://www.csie.ntu.edu.tw/~cjlin/libsvm). The
|
22
|
+
interface is very easy to use as the usage is the same as that of LIBSVM. The
|
23
|
+
interface is developed with the built-in Python library "ctypes."
|
24
|
+
|
25
|
+
Installation
|
26
|
+
============
|
27
|
+
|
28
|
+
On Unix systems, type
|
29
|
+
|
30
|
+
> make
|
31
|
+
|
32
|
+
The interface needs only LIBSVM shared library, which is generated by
|
33
|
+
the above command. We assume that the shared library is on the LIBSVM
|
34
|
+
main directory or in the system path.
|
35
|
+
|
36
|
+
For windows, the shared library libsvm.dll is ready in the directory
|
37
|
+
`..\windows'. You can also copy it to the system directory (e.g.,
|
38
|
+
`C:\WINDOWS\system32\' for Windows XP). To regenerate the shared library,
|
39
|
+
please follow the instruction of building windows binaries in LIBSVM README.
|
40
|
+
|
41
|
+
Quick Start
|
42
|
+
===========
|
43
|
+
|
44
|
+
There are two levels of usage. The high-level one uses utility functions
|
45
|
+
in svmutil.py and the usage is the same as the LIBSVM MATLAB interface.
|
46
|
+
|
47
|
+
>>> from svmutil import *
|
48
|
+
# Read data in LIBSVM format
|
49
|
+
>>> y, x = svm_read_problem('../heart_scale')
|
50
|
+
>>> m = svm_train(y[:200], x[:200], '-c 4')
|
51
|
+
>>> p_label, p_acc, p_val = svm_predict(y[200:], x[200:], m)
|
52
|
+
|
53
|
+
# Construct problem in python format
|
54
|
+
# Dense data
|
55
|
+
>>> y, x = [1,-1], [[1,0,1], [-1,0,-1]]
|
56
|
+
# Sparse data
|
57
|
+
>>> y, x = [1,-1], [{1:1, 3:1}, {1:-1,3:-1}]
|
58
|
+
>>> prob = svm_problem(y, x)
|
59
|
+
>>> param = svm_parameter('-c 4 -b 1')
|
60
|
+
>>> m = svm_train(prob, param)
|
61
|
+
|
62
|
+
# Other utility functions
|
63
|
+
>>> svm_save_model('heart_scale.model', m)
|
64
|
+
>>> m = svm_load_model('heart_scale.model')
|
65
|
+
>>> p_label, p_acc, p_val = svm_predict(y, x, m, '-b 1')
|
66
|
+
>>> ACC, MSE, SCC = evaluations(y, p_val)
|
67
|
+
|
68
|
+
# Getting online help
|
69
|
+
>>> help(svm_train)
|
70
|
+
|
71
|
+
The low-level use directly calls C interfaces imported by svm.py. Note that
|
72
|
+
all arguments and return values are in ctypes format. You need to handle them
|
73
|
+
carefully.
|
74
|
+
|
75
|
+
>>> from svm import *
|
76
|
+
>>> prob = svm_problem([1,-1], [{1:1, 3:1}, {1:-1,3:-1}])
|
77
|
+
>>> param = svm_parameter('-c 4')
|
78
|
+
>>> m = libsvm.svm_train(prob, param) # m is a ctype pointer to an svm_model
|
79
|
+
# Convet a Python-fromat instance to svm_nodearray, a ctypes structure
|
80
|
+
>>> x0, max_idx = gen_svm_nodearray({1:1, 3:1})
|
81
|
+
>>> label = libsvm.svm_predict(m, x0)
|
82
|
+
|
83
|
+
Design Description
|
84
|
+
==================
|
85
|
+
|
86
|
+
There are two files svm.py and svmutil.py, which respectively correspond to
|
87
|
+
low-level and high-level use of the interface.
|
88
|
+
|
89
|
+
In svm.py, we adopt the Python built-in library "ctypes," so that
|
90
|
+
Python can directly access C structures and interface functions defined
|
91
|
+
in svm.h.
|
92
|
+
|
93
|
+
While advanced users can use structures/functions in svm.py, to
|
94
|
+
avoid handling ctypes structures, in svmutil.py we provide some easy-to-use
|
95
|
+
functions. The usage is similar to LIBSVM MATLAB interface.
|
96
|
+
|
97
|
+
Data Structures
|
98
|
+
===============
|
99
|
+
|
100
|
+
Three data structures derived from svm.h are svm_node, svm_problem, and
|
101
|
+
svm_parameter. They all contain fields with the same names in
|
102
|
+
svm.h. Access these fields carefully because you directly use a C structure
|
103
|
+
instead of a Python object. The following description introduces additional
|
104
|
+
fields and methods.
|
105
|
+
|
106
|
+
Before using the data structures, execute the following command to load the
|
107
|
+
LIBSVM shared library:
|
108
|
+
|
109
|
+
>>> from svm import *
|
110
|
+
|
111
|
+
- class svm_node:
|
112
|
+
|
113
|
+
Construct an svm_node.
|
114
|
+
|
115
|
+
>>> node = svm_node(idx, val)
|
116
|
+
|
117
|
+
idx: an integer indicates the feature index.
|
118
|
+
|
119
|
+
val: a float indicates the feature value.
|
120
|
+
|
121
|
+
- Function: gen_svm_nodearray(xi [,feature_max=None [,issparse=False]])
|
122
|
+
|
123
|
+
Generate a feature vector from a Python list/tuple or a dictionary:
|
124
|
+
|
125
|
+
>>> xi, max_idx = gen_svm_nodearray({1:1, 3:1, 5:-2})
|
126
|
+
|
127
|
+
xi: the returned svm_nodearray (a ctypes structure)
|
128
|
+
|
129
|
+
max_idx: the maximal feature index of xi
|
130
|
+
|
131
|
+
issparse: if issparse == True, zero feature values are removed. The default
|
132
|
+
value is False for supporting the pre-computed kernel.
|
133
|
+
|
134
|
+
feature_max: if feature_max is assigned, features with indices larger than
|
135
|
+
feature_max are removed.
|
136
|
+
|
137
|
+
- class svm_problem:
|
138
|
+
|
139
|
+
Construct an svm_problem instance
|
140
|
+
|
141
|
+
>>> prob = svm_problem(y, x)
|
142
|
+
|
143
|
+
y: a Python list/tuple of l labels (type must be int/double).
|
144
|
+
|
145
|
+
x: a Python list/tuple of l data instances. Each element of x must be
|
146
|
+
an instance of list/tuple/dictionary type.
|
147
|
+
|
148
|
+
Note that if your x contains sparse data (i.e., dictionary), the internal
|
149
|
+
ctypes data format is still sparse.
|
150
|
+
|
151
|
+
- class svm_parameter:
|
152
|
+
|
153
|
+
Construct an svm_parameter instance
|
154
|
+
|
155
|
+
>>> param = svm_parameter('training_options')
|
156
|
+
|
157
|
+
If 'training_options' is empty, LIBSVM default values are applied.
|
158
|
+
|
159
|
+
Set param to LIBSVM default values.
|
160
|
+
|
161
|
+
>>> param.set_to_default_values()
|
162
|
+
|
163
|
+
Parse a string of options.
|
164
|
+
|
165
|
+
>>> param.parse_options('training_options')
|
166
|
+
|
167
|
+
Show values of parameters.
|
168
|
+
|
169
|
+
>>> param.show()
|
170
|
+
|
171
|
+
- class svm_model:
|
172
|
+
|
173
|
+
There are two ways to obtain an instance of svm_model:
|
174
|
+
|
175
|
+
>>> model = svm_train(y, x)
|
176
|
+
>>> model = svm_load_model('model_file_name')
|
177
|
+
|
178
|
+
Note that the returned structure of interface functions
|
179
|
+
libsvm.svm_train and libsvm.svm_load_model is a ctypes pointer of
|
180
|
+
svm_model, which is different from the svm_model object returned
|
181
|
+
by svm_train and svm_load_model in svmutil.py. We provide a
|
182
|
+
function toPyModel for the conversion:
|
183
|
+
|
184
|
+
>>> model_ptr = libsvm.svm_train(prob, param)
|
185
|
+
>>> model = toPyModel(model_ptr)
|
186
|
+
|
187
|
+
If you obtain a model in a way other than the above approaches,
|
188
|
+
handle it carefully to avoid memory leak or segmentation fault.
|
189
|
+
|
190
|
+
Some interface functions to access LIBSVM models are wrapped as
|
191
|
+
members of the class svm_model:
|
192
|
+
|
193
|
+
>>> svm_type = model.get_svm_type()
|
194
|
+
>>> nr_class = model.get_nr_class()
|
195
|
+
>>> svr_probability = model.get_svr_probability()
|
196
|
+
>>> class_labels = model.get_labels()
|
197
|
+
>>> is_prob_model = model.is_probability_model()
|
198
|
+
|
199
|
+
Utility Functions
|
200
|
+
=================
|
201
|
+
|
202
|
+
To use utility functions, type
|
203
|
+
|
204
|
+
>>> from svmutil import *
|
205
|
+
|
206
|
+
The above command loads
|
207
|
+
svm_train() : train an SVM model
|
208
|
+
svm_predict() : predict testing data
|
209
|
+
svm_read_problem() : read the data from a LIBSVM-format file.
|
210
|
+
svm_load_model() : load a LIBSVM model.
|
211
|
+
svm_save_model() : save model to a file.
|
212
|
+
evaluations() : evaluate prediction results.
|
213
|
+
|
214
|
+
- Function: svm_train
|
215
|
+
|
216
|
+
There are three ways to call svm_train()
|
217
|
+
|
218
|
+
>>> model = svm_train(y, x [, 'training_options'])
|
219
|
+
>>> model = svm_train(prob [, 'training_options'])
|
220
|
+
>>> model = svm_train(prob, param)
|
221
|
+
|
222
|
+
y: a list/tuple of l training labels (type must be int/double).
|
223
|
+
|
224
|
+
x: a list/tuple of l training instances. The feature vector of
|
225
|
+
each training instance is an instance of list/tuple or dictionary.
|
226
|
+
|
227
|
+
training_options: a string in the same form as that for LIBSVM command
|
228
|
+
mode.
|
229
|
+
|
230
|
+
prob: an svm_problem instance generated by calling
|
231
|
+
svm_problem(y, x).
|
232
|
+
|
233
|
+
param: an svm_parameter instance generated by calling
|
234
|
+
svm_parameter('training_options')
|
235
|
+
|
236
|
+
model: the returned svm_model instance. See svm.h for details of this
|
237
|
+
structure. If '-v' is specified, cross validation is
|
238
|
+
conducted and the returned model is just a scalar: cross-validation
|
239
|
+
accuracy for classification and mean-squared error for regression.
|
240
|
+
|
241
|
+
To train the same data many times with different
|
242
|
+
parameters, the second and the third ways should be faster..
|
243
|
+
|
244
|
+
Examples:
|
245
|
+
|
246
|
+
>>> y, x = svm_read_problem('../heart_scale')
|
247
|
+
>>> prob = svm_problem(y, x)
|
248
|
+
>>> param = svm_parameter('-s 3 -c 5 -h 0')
|
249
|
+
>>> m = svm_train(y, x, '-c 5')
|
250
|
+
>>> m = svm_train(prob, '-t 2 -c 5')
|
251
|
+
>>> m = svm_train(prob, param)
|
252
|
+
>>> CV_ACC = svm_train(y, x, '-v 3')
|
253
|
+
|
254
|
+
- Function: svm_predict
|
255
|
+
|
256
|
+
To predict testing data with a model, use
|
257
|
+
|
258
|
+
>>> p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options'])
|
259
|
+
|
260
|
+
y: a list/tuple of l true labels (type must be int/double). It is used
|
261
|
+
for calculating the accuracy. Use [0]*len(x) if true labels are
|
262
|
+
unavailable.
|
263
|
+
|
264
|
+
x: a list/tuple of l predicting instances. The feature vector of
|
265
|
+
each predicting instance is an instance of list/tuple or dictionary.
|
266
|
+
|
267
|
+
predicting_options: a string of predicting options in the same format as
|
268
|
+
that of LIBSVM.
|
269
|
+
|
270
|
+
model: an svm_model instance.
|
271
|
+
|
272
|
+
p_labels: a list of predicted labels
|
273
|
+
|
274
|
+
p_acc: a tuple including accuracy (for classification), mean
|
275
|
+
squared error, and squared correlation coefficient (for
|
276
|
+
regression).
|
277
|
+
|
278
|
+
p_vals: a list of decision values or probability estimates (if '-b 1'
|
279
|
+
is specified). If k is the number of classes, for decision values,
|
280
|
+
each element includes results of predicting k(k-1)/2 binary-class
|
281
|
+
SVMs. For probabilities, each element contains k values indicating
|
282
|
+
the probability that the testing instance is in each class.
|
283
|
+
Note that the order of classes is the same as the 'model.label'
|
284
|
+
field in the model structure.
|
285
|
+
|
286
|
+
Example:
|
287
|
+
|
288
|
+
>>> m = svm_train(y, x, '-c 5')
|
289
|
+
>>> p_labels, p_acc, p_vals = svm_predict(y, x, m)
|
290
|
+
|
291
|
+
- Functions: svm_read_problem/svm_load_model/svm_save_model
|
292
|
+
|
293
|
+
See the usage by examples:
|
294
|
+
|
295
|
+
>>> y, x = svm_read_problem('data.txt')
|
296
|
+
>>> m = svm_load_model('model_file')
|
297
|
+
>>> svm_save_model('model_file', m)
|
298
|
+
|
299
|
+
- Function: evaluations
|
300
|
+
|
301
|
+
Calculate some evaluations using the true values (ty) and predicted
|
302
|
+
values (pv):
|
303
|
+
|
304
|
+
>>> (ACC, MSE, SCC) = evaluations(ty, pv)
|
305
|
+
|
306
|
+
ty: a list of true values.
|
307
|
+
|
308
|
+
pv: a list of predict values.
|
309
|
+
|
310
|
+
ACC: accuracy.
|
311
|
+
|
312
|
+
MSE: mean squared error.
|
313
|
+
|
314
|
+
SCC: squared correlation coefficient.
|
315
|
+
|
316
|
+
|
317
|
+
Additional Information
|
318
|
+
======================
|
319
|
+
|
320
|
+
This interface was written by Hsiang-Fu Yu from Department of Computer
|
321
|
+
Science, National Taiwan University. If you find this tool useful, please
|
322
|
+
cite LIBSVM as follows
|
323
|
+
|
324
|
+
Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for
|
325
|
+
support vector machines, 2001. Software available at
|
326
|
+
http://www.csie.ntu.edu.tw/~cjlin/libsvm
|
327
|
+
|
328
|
+
For any question, please contact Chih-Jen Lin <cjlin@csie.ntu.edu.tw>,
|
329
|
+
or check the FAQ page:
|
330
|
+
|
331
|
+
http://www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html
|