eluka 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/.document +5 -0
  2. data/DOCUMENTATION_STANDARDS +39 -0
  3. data/Gemfile +13 -0
  4. data/Gemfile.lock +20 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +19 -0
  7. data/Rakefile +69 -0
  8. data/VERSION +1 -0
  9. data/examples/example.rb +59 -0
  10. data/ext/libsvm/COPYRIGHT +31 -0
  11. data/ext/libsvm/FAQ.html +1749 -0
  12. data/ext/libsvm/Makefile +25 -0
  13. data/ext/libsvm/Makefile.win +33 -0
  14. data/ext/libsvm/README +733 -0
  15. data/ext/libsvm/extconf.rb +1 -0
  16. data/ext/libsvm/heart_scale +270 -0
  17. data/ext/libsvm/java/Makefile +25 -0
  18. data/ext/libsvm/java/libsvm.jar +0 -0
  19. data/ext/libsvm/java/libsvm/svm.java +2776 -0
  20. data/ext/libsvm/java/libsvm/svm.m4 +2776 -0
  21. data/ext/libsvm/java/libsvm/svm_model.java +21 -0
  22. data/ext/libsvm/java/libsvm/svm_node.java +6 -0
  23. data/ext/libsvm/java/libsvm/svm_parameter.java +47 -0
  24. data/ext/libsvm/java/libsvm/svm_print_interface.java +5 -0
  25. data/ext/libsvm/java/libsvm/svm_problem.java +7 -0
  26. data/ext/libsvm/java/svm_predict.java +163 -0
  27. data/ext/libsvm/java/svm_scale.java +350 -0
  28. data/ext/libsvm/java/svm_toy.java +471 -0
  29. data/ext/libsvm/java/svm_train.java +318 -0
  30. data/ext/libsvm/java/test_applet.html +1 -0
  31. data/ext/libsvm/python/Makefile +4 -0
  32. data/ext/libsvm/python/README +331 -0
  33. data/ext/libsvm/python/svm.py +259 -0
  34. data/ext/libsvm/python/svmutil.py +242 -0
  35. data/ext/libsvm/svm-predict.c +226 -0
  36. data/ext/libsvm/svm-scale.c +353 -0
  37. data/ext/libsvm/svm-toy/gtk/Makefile +22 -0
  38. data/ext/libsvm/svm-toy/gtk/callbacks.cpp +423 -0
  39. data/ext/libsvm/svm-toy/gtk/callbacks.h +54 -0
  40. data/ext/libsvm/svm-toy/gtk/interface.c +164 -0
  41. data/ext/libsvm/svm-toy/gtk/interface.h +14 -0
  42. data/ext/libsvm/svm-toy/gtk/main.c +23 -0
  43. data/ext/libsvm/svm-toy/gtk/svm-toy.glade +238 -0
  44. data/ext/libsvm/svm-toy/qt/Makefile +17 -0
  45. data/ext/libsvm/svm-toy/qt/svm-toy.cpp +413 -0
  46. data/ext/libsvm/svm-toy/windows/svm-toy.cpp +456 -0
  47. data/ext/libsvm/svm-train.c +376 -0
  48. data/ext/libsvm/svm.cpp +3060 -0
  49. data/ext/libsvm/svm.def +19 -0
  50. data/ext/libsvm/svm.h +105 -0
  51. data/ext/libsvm/svm.o +0 -0
  52. data/ext/libsvm/tools/README +149 -0
  53. data/ext/libsvm/tools/checkdata.py +108 -0
  54. data/ext/libsvm/tools/easy.py +79 -0
  55. data/ext/libsvm/tools/grid.py +359 -0
  56. data/ext/libsvm/tools/subset.py +146 -0
  57. data/ext/libsvm/windows/libsvm.dll +0 -0
  58. data/ext/libsvm/windows/svm-predict.exe +0 -0
  59. data/ext/libsvm/windows/svm-scale.exe +0 -0
  60. data/ext/libsvm/windows/svm-toy.exe +0 -0
  61. data/ext/libsvm/windows/svm-train.exe +0 -0
  62. data/lib/eluka.rb +10 -0
  63. data/lib/eluka/bijection.rb +23 -0
  64. data/lib/eluka/data_point.rb +36 -0
  65. data/lib/eluka/document.rb +47 -0
  66. data/lib/eluka/feature_vector.rb +86 -0
  67. data/lib/eluka/features.rb +31 -0
  68. data/lib/eluka/model.rb +129 -0
  69. data/lib/fselect.rb +321 -0
  70. data/lib/grid.rb +25 -0
  71. data/test/helper.rb +18 -0
  72. data/test/test_eluka.rb +7 -0
  73. metadata +214 -0
@@ -0,0 +1,25 @@
1
+ CXX ?= g++
2
+ CFLAGS = -Wall -Wconversion -O3 -fPIC
3
+ SHVER = 2
4
+
5
+ all: svm-train svm-predict svm-scale
6
+
7
+ lib: svm.o
8
+ $(CXX) -shared -dynamiclib svm.o -o libsvm.so.$(SHVER)
9
+
10
+ svm-predict: svm-predict.c svm.o
11
+ $(CXX) $(CFLAGS) svm-predict.c svm.o -o eluka-svm-predict -lm
12
+ svm-train: svm-train.c svm.o
13
+ $(CXX) $(CFLAGS) svm-train.c svm.o -o eluka-svm-train -lm
14
+ svm-scale: svm-scale.c
15
+ $(CXX) $(CFLAGS) svm-scale.c -o eluka-svm-scale
16
+ svm.o: svm.cpp svm.h
17
+ $(CXX) $(CFLAGS) -c svm.cpp
18
+ clean:
19
+ rm -f *~ svm.o eluka-svm-train eluka-svm-predict eluka-svm-scale libsvm.so.$(SHVER)
20
+
21
+ install:
22
+ mkdir -p ../../bin
23
+ cp eluka-svm-train ../../bin
24
+ cp eluka-svm-predict ../../bin
25
+ cp eluka-svm-scale ../../bin
@@ -0,0 +1,33 @@
1
+ #You must ensure nmake.exe, cl.exe, link.exe are in system path.
2
+ #VCVARS32.bat
3
+ #Under dosbox prompt
4
+ #nmake -f Makefile.win
5
+
6
+ ##########################################
7
+ CXX = cl.exe
8
+ CFLAGS = -nologo -O2 -EHsc -I. -D __WIN32__ -D _CRT_SECURE_NO_DEPRECATE
9
+ TARGET = windows
10
+
11
+ all: $(TARGET)\svm-train.exe $(TARGET)\svm-predict.exe $(TARGET)\svm-scale.exe $(TARGET)\svm-toy.exe lib
12
+
13
+ $(TARGET)\svm-predict.exe: svm.h svm-predict.c svm.obj
14
+ $(CXX) $(CFLAGS) svm-predict.c svm.obj -Fe$(TARGET)\svm-predict.exe
15
+
16
+ $(TARGET)\svm-train.exe: svm.h svm-train.c svm.obj
17
+ $(CXX) $(CFLAGS) svm-train.c svm.obj -Fe$(TARGET)\svm-train.exe
18
+
19
+ $(TARGET)\svm-scale.exe: svm.h svm-scale.c
20
+ $(CXX) $(CFLAGS) svm-scale.c -Fe$(TARGET)\svm-scale.exe
21
+
22
+ $(TARGET)\svm-toy.exe: svm.h svm.obj svm-toy\windows\svm-toy.cpp
23
+ $(CXX) $(CFLAGS) svm-toy\windows\svm-toy.cpp svm.obj user32.lib gdi32.lib comdlg32.lib -Fe$(TARGET)\svm-toy.exe
24
+
25
+ svm.obj: svm.cpp svm.h
26
+ $(CXX) $(CFLAGS) -c svm.cpp
27
+
28
+ lib: svm.cpp svm.h svm.def
29
+ $(CXX) $(CFLAGS) -LD svm.cpp -Fe$(TARGET)\libsvm -link -DEF:svm.def
30
+
31
+ clean:
32
+ -erase /Q *.obj $(TARGET)\.
33
+
data/ext/libsvm/README ADDED
@@ -0,0 +1,733 @@
1
+ Libsvm is a simple, easy-to-use, and efficient software for SVM
2
+ classification and regression. It solves C-SVM classification, nu-SVM
3
+ classification, one-class-SVM, epsilon-SVM regression, and nu-SVM
4
+ regression. It also provides an automatic model selection tool for
5
+ C-SVM classification. This document explains the use of libsvm.
6
+
7
+ Libsvm is available at
8
+ http://www.csie.ntu.edu.tw/~cjlin/libsvm
9
+ Please read the COPYRIGHT file before using libsvm.
10
+
11
+ Table of Contents
12
+ =================
13
+
14
+ - Quick Start
15
+ - Installation and Data Format
16
+ - `svm-train' Usage
17
+ - `svm-predict' Usage
18
+ - `svm-scale' Usage
19
+ - Tips on Practical Use
20
+ - Examples
21
+ - Precomputed Kernels
22
+ - Library Usage
23
+ - Java Version
24
+ - Building Windows Binaries
25
+ - Additional Tools: Sub-sampling, Parameter Selection, Format checking, etc.
26
+ - Python Interface
27
+ - Additional Information
28
+
29
+ Quick Start
30
+ ===========
31
+
32
+ If you are new to SVM and if the data is not large, please go to
33
+ `tools' directory and use easy.py after installation. It does
34
+ everything automatic -- from data scaling to parameter selection.
35
+
36
+ Usage: easy.py training_file [testing_file]
37
+
38
+ More information about parameter selection can be found in
39
+ `tools/README.'
40
+
41
+ Installation and Data Format
42
+ ============================
43
+
44
+ On Unix systems, type `make' to build the `svm-train' and `svm-predict'
45
+ programs. Run them without arguments to show the usages of them.
46
+
47
+ On other systems, consult `Makefile' to build them (e.g., see
48
+ 'Building Windows binaries' in this file) or use the pre-built
49
+ binaries (Windows binaries are in the directory `windows').
50
+
51
+ The format of training and testing data file is:
52
+
53
+ <label> <index1>:<value1> <index2>:<value2> ...
54
+ .
55
+ .
56
+ .
57
+
58
+ Each line contains an instance and is ended by a '\n' character. For
59
+ classification, <label> is an integer indicating the class label
60
+ (multi-class is supported). For regression, <label> is the target
61
+ value which can be any real number. For one-class SVM, it's not used
62
+ so can be any number. Except using precomputed kernels (explained in
63
+ another section), <index>:<value> gives a feature (attribute) value.
64
+ <index> is an integer starting from 1 and <value> is a real
65
+ number. Indices must be in ASCENDING order. Labels in the testing
66
+ file are only used to calculate accuracy or errors. If they are
67
+ unknown, just fill the first column with any numbers.
68
+
69
+ A sample classification data included in this package is
70
+ `heart_scale'. To check if your data is in a correct form, use
71
+ `tools/checkdata.py' (details in `tools/README').
72
+
73
+ Type `svm-train heart_scale', and the program will read the training
74
+ data and output the model file `heart_scale.model'. If you have a test
75
+ set called heart_scale.t, then type `svm-predict heart_scale.t
76
+ heart_scale.model output' to see the prediction accuracy. The `output'
77
+ file contains the predicted class labels.
78
+
79
+ There are some other useful programs in this package.
80
+
81
+ svm-scale:
82
+
83
+ This is a tool for scaling input data file.
84
+
85
+ svm-toy:
86
+
87
+ This is a simple graphical interface which shows how SVM
88
+ separate data in a plane. You can click in the window to
89
+ draw data points. Use "change" button to choose class
90
+ 1, 2 or 3 (i.e., up to three classes are supported), "load"
91
+ button to load data from a file, "save" button to save data to
92
+ a file, "run" button to obtain an SVM model, and "clear"
93
+ button to clear the window.
94
+
95
+ You can enter options in the bottom of the window, the syntax of
96
+ options is the same as `svm-train'.
97
+
98
+ Note that "load" and "save" consider data in the
99
+ classification but not the regression case. Each data point
100
+ has one label (the color) which must be 1, 2, or 3 and two
101
+ attributes (x-axis and y-axis values) in [0,1].
102
+
103
+ Type `make' in respective directories to build them.
104
+
105
+ You need Qt library to build the Qt version.
106
+ (available from http://www.trolltech.com)
107
+
108
+ You need GTK+ library to build the GTK version.
109
+ (available from http://www.gtk.org)
110
+
111
+ The pre-built Windows binaries are in the `windows'
112
+ directory. We use Visual C++ on a 32-bit machine, so the
113
+ maximal cache size is 2GB.
114
+
115
+ `svm-train' Usage
116
+ =================
117
+
118
+ Usage: svm-train [options] training_set_file [model_file]
119
+ options:
120
+ -s svm_type : set type of SVM (default 0)
121
+ 0 -- C-SVC
122
+ 1 -- nu-SVC
123
+ 2 -- one-class SVM
124
+ 3 -- epsilon-SVR
125
+ 4 -- nu-SVR
126
+ -t kernel_type : set type of kernel function (default 2)
127
+ 0 -- linear: u'*v
128
+ 1 -- polynomial: (gamma*u'*v + coef0)^degree
129
+ 2 -- radial basis function: exp(-gamma*|u-v|^2)
130
+ 3 -- sigmoid: tanh(gamma*u'*v + coef0)
131
+ 4 -- precomputed kernel (kernel values in training_set_file)
132
+ -d degree : set degree in kernel function (default 3)
133
+ -g gamma : set gamma in kernel function (default 1/num_features)
134
+ -r coef0 : set coef0 in kernel function (default 0)
135
+ -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
136
+ -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
137
+ -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
138
+ -m cachesize : set cache memory size in MB (default 100)
139
+ -e epsilon : set tolerance of termination criterion (default 0.001)
140
+ -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
141
+ -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
142
+ -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
143
+ -v n: n-fold cross validation mode
144
+ -q : quiet mode (no outputs)
145
+
146
+
147
+ The k in the -g option means the number of attributes in the input data.
148
+
149
+ option -v randomly splits the data into n parts and calculates cross
150
+ validation accuracy/mean squared error on them.
151
+
152
+ See libsvm FAQ for the meaning of outputs.
153
+
154
+ `svm-predict' Usage
155
+ ===================
156
+
157
+ Usage: svm-predict [options] test_file model_file output_file
158
+ options:
159
+ -b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported
160
+
161
+ model_file is the model file generated by svm-train.
162
+ test_file is the test data you want to predict.
163
+ svm-predict will produce output in the output_file.
164
+
165
+ `svm-scale' Usage
166
+ =================
167
+
168
+ Usage: svm-scale [options] data_filename
169
+ options:
170
+ -l lower : x scaling lower limit (default -1)
171
+ -u upper : x scaling upper limit (default +1)
172
+ -y y_lower y_upper : y scaling limits (default: no y scaling)
173
+ -s save_filename : save scaling parameters to save_filename
174
+ -r restore_filename : restore scaling parameters from restore_filename
175
+
176
+ See 'Examples' in this file for examples.
177
+
178
+ Tips on Practical Use
179
+ =====================
180
+
181
+ * Scale your data. For example, scale each attribute to [0,1] or [-1,+1].
182
+ * For C-SVC, consider using the model selection tool in the tools directory.
183
+ * nu in nu-SVC/one-class-SVM/nu-SVR approximates the fraction of training
184
+ errors and support vectors.
185
+ * If data for classification are unbalanced (e.g. many positive and
186
+ few negative), try different penalty parameters C by -wi (see
187
+ examples below).
188
+ * Specify larger cache size (i.e., larger -m) for huge problems.
189
+
190
+ Examples
191
+ ========
192
+
193
+ > svm-scale -l -1 -u 1 -s range train > train.scale
194
+ > svm-scale -r range test > test.scale
195
+
196
+ Scale each feature of the training data to be in [-1,1]. Scaling
197
+ factors are stored in the file range and then used for scaling the
198
+ test data.
199
+
200
+ > svm-train -s 0 -c 5 -t 2 -g 0.5 -e 0.1 data_file
201
+
202
+ Train a classifier with RBF kernel exp(-0.5|u-v|^2), C=10, and
203
+ stopping tolerance 0.1.
204
+
205
+ > svm-train -s 3 -p 0.1 -t 0 data_file
206
+
207
+ Solve SVM regression with linear kernel u'v and epsilon=0.1
208
+ in the loss function.
209
+
210
+ > svm-train -c 10 -w1 1 -w2 5 -w4 2 data_file
211
+
212
+ Train a classifier with penalty 10 = 1 * 10 for class 1, penalty 50 =
213
+ 5 * 10 for class 2, and penalty 20 = 2 * 10 for class 4.
214
+
215
+ > svm-train -s 0 -c 100 -g 0.1 -v 5 data_file
216
+
217
+ Do five-fold cross validation for the classifier using
218
+ the parameters C = 100 and gamma = 0.1
219
+
220
+ > svm-train -s 0 -b 1 data_file
221
+ > svm-predict -b 1 test_file data_file.model output_file
222
+
223
+ Obtain a model with probability information and predict test data with
224
+ probability estimates
225
+
226
+ Precomputed Kernels
227
+ ===================
228
+
229
+ Users may precompute kernel values and input them as training and
230
+ testing files. Then libsvm does not need the original
231
+ training/testing sets.
232
+
233
+ Assume there are L training instances x1, ..., xL and.
234
+ Let K(x, y) be the kernel
235
+ value of two instances x and y. The input formats
236
+ are:
237
+
238
+ New training instance for xi:
239
+
240
+ <label> 0:i 1:K(xi,x1) ... L:K(xi,xL)
241
+
242
+ New testing instance for any x:
243
+
244
+ <label> 0:? 1:K(x,x1) ... L:K(x,xL)
245
+
246
+ That is, in the training file the first column must be the "ID" of
247
+ xi. In testing, ? can be any value.
248
+
249
+ All kernel values including ZEROs must be explicitly provided. Any
250
+ permutation or random subsets of the training/testing files are also
251
+ valid (see examples below).
252
+
253
+ Note: the format is slightly different from the precomputed kernel
254
+ package released in libsvmtools earlier.
255
+
256
+ Examples:
257
+
258
+ Assume the original training data has three four-feature
259
+ instances and testing data has one instance:
260
+
261
+ 15 1:1 2:1 3:1 4:1
262
+ 45 2:3 4:3
263
+ 25 3:1
264
+
265
+ 15 1:1 3:1
266
+
267
+ If the linear kernel is used, we have the following new
268
+ training/testing sets:
269
+
270
+ 15 0:1 1:4 2:6 3:1
271
+ 45 0:2 1:6 2:18 3:0
272
+ 25 0:3 1:1 2:0 3:1
273
+
274
+ 15 0:? 1:2 2:0 3:1
275
+
276
+ ? can be any value.
277
+
278
+ Any subset of the above training file is also valid. For example,
279
+
280
+ 25 0:3 1:1 2:0 3:1
281
+ 45 0:2 1:6 2:18 3:0
282
+
283
+ implies that the kernel matrix is
284
+
285
+ [K(2,2) K(2,3)] = [18 0]
286
+ [K(3,2) K(3,3)] = [0 1]
287
+
288
+ Library Usage
289
+ =============
290
+
291
+ These functions and structures are declared in the header file
292
+ `svm.h'. You need to #include "svm.h" in your C/C++ source files and
293
+ link your program with `svm.cpp'. You can see `svm-train.c' and
294
+ `svm-predict.c' for examples showing how to use them. We define
295
+ LIBSVM_VERSION and declare `extern int libsvm_version; ' in svm.h, so
296
+ you can check the version number.
297
+
298
+ Before you classify test data, you need to construct an SVM model
299
+ (`svm_model') using training data. A model can also be saved in
300
+ a file for later use. Once an SVM model is available, you can use it
301
+ to classify new data.
302
+
303
+ - Function: struct svm_model *svm_train(const struct svm_problem *prob,
304
+ const struct svm_parameter *param);
305
+
306
+ This function constructs and returns an SVM model according to
307
+ the given training data and parameters.
308
+
309
+ struct svm_problem describes the problem:
310
+
311
+ struct svm_problem
312
+ {
313
+ int l;
314
+ double *y;
315
+ struct svm_node **x;
316
+ };
317
+
318
+ where `l' is the number of training data, and `y' is an array containing
319
+ their target values. (integers in classification, real numbers in
320
+ regression) `x' is an array of pointers, each of which points to a sparse
321
+ representation (array of svm_node) of one training vector.
322
+
323
+ For example, if we have the following training data:
324
+
325
+ LABEL ATTR1 ATTR2 ATTR3 ATTR4 ATTR5
326
+ ----- ----- ----- ----- ----- -----
327
+ 1 0 0.1 0.2 0 0
328
+ 2 0 0.1 0.3 -1.2 0
329
+ 1 0.4 0 0 0 0
330
+ 2 0 0.1 0 1.4 0.5
331
+ 3 -0.1 -0.2 0.1 1.1 0.1
332
+
333
+ then the components of svm_problem are:
334
+
335
+ l = 5
336
+
337
+ y -> 1 2 1 2 3
338
+
339
+ x -> [ ] -> (2,0.1) (3,0.2) (-1,?)
340
+ [ ] -> (2,0.1) (3,0.3) (4,-1.2) (-1,?)
341
+ [ ] -> (1,0.4) (-1,?)
342
+ [ ] -> (2,0.1) (4,1.4) (5,0.5) (-1,?)
343
+ [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (-1,?)
344
+
345
+ where (index,value) is stored in the structure `svm_node':
346
+
347
+ struct svm_node
348
+ {
349
+ int index;
350
+ double value;
351
+ };
352
+
353
+ index = -1 indicates the end of one vector. Note that indices must
354
+ be in ASCENDING order.
355
+
356
+ struct svm_parameter describes the parameters of an SVM model:
357
+
358
+ struct svm_parameter
359
+ {
360
+ int svm_type;
361
+ int kernel_type;
362
+ int degree; /* for poly */
363
+ double gamma; /* for poly/rbf/sigmoid */
364
+ double coef0; /* for poly/sigmoid */
365
+
366
+ /* these are for training only */
367
+ double cache_size; /* in MB */
368
+ double eps; /* stopping criteria */
369
+ double C; /* for C_SVC, EPSILON_SVR, and NU_SVR */
370
+ int nr_weight; /* for C_SVC */
371
+ int *weight_label; /* for C_SVC */
372
+ double* weight; /* for C_SVC */
373
+ double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
374
+ double p; /* for EPSILON_SVR */
375
+ int shrinking; /* use the shrinking heuristics */
376
+ int probability; /* do probability estimates */
377
+ };
378
+
379
+ svm_type can be one of C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR.
380
+
381
+ C_SVC: C-SVM classification
382
+ NU_SVC: nu-SVM classification
383
+ ONE_CLASS: one-class-SVM
384
+ EPSILON_SVR: epsilon-SVM regression
385
+ NU_SVR: nu-SVM regression
386
+
387
+ kernel_type can be one of LINEAR, POLY, RBF, SIGMOID.
388
+
389
+ LINEAR: u'*v
390
+ POLY: (gamma*u'*v + coef0)^degree
391
+ RBF: exp(-gamma*|u-v|^2)
392
+ SIGMOID: tanh(gamma*u'*v + coef0)
393
+ PRECOMPUTED: kernel values in training_set_file
394
+
395
+ cache_size is the size of the kernel cache, specified in megabytes.
396
+ C is the cost of constraints violation.
397
+ eps is the stopping criterion. (we usually use 0.00001 in nu-SVC,
398
+ 0.001 in others). nu is the parameter in nu-SVM, nu-SVR, and
399
+ one-class-SVM. p is the epsilon in epsilon-insensitive loss function
400
+ of epsilon-SVM regression. shrinking = 1 means shrinking is conducted;
401
+ = 0 otherwise. probability = 1 means model with probability
402
+ information is obtained; = 0 otherwise.
403
+
404
+ nr_weight, weight_label, and weight are used to change the penalty
405
+ for some classes (If the weight for a class is not changed, it is
406
+ set to 1). This is useful for training classifier using unbalanced
407
+ input data or with asymmetric misclassification cost.
408
+
409
+ nr_weight is the number of elements in the array weight_label and
410
+ weight. Each weight[i] corresponds to weight_label[i], meaning that
411
+ the penalty of class weight_label[i] is scaled by a factor of weight[i].
412
+
413
+ If you do not want to change penalty for any of the classes,
414
+ just set nr_weight to 0.
415
+
416
+ *NOTE* Because svm_model contains pointers to svm_problem, you can
417
+ not free the memory used by svm_problem if you are still using the
418
+ svm_model produced by svm_train().
419
+
420
+ *NOTE* To avoid wrong parameters, svm_check_parameter() should be
421
+ called before svm_train().
422
+
423
+ struct svm_model stores the model obtained from the training procedure.
424
+ It is not recommended to directly access entries in this structure.
425
+ Programmers should use the interface functions to get the values.
426
+
427
+ struct svm_model
428
+ {
429
+ struct svm_parameter param; /* parameter */
430
+ int nr_class; /* number of classes, = 2 in regression/one class svm */
431
+ int l; /* total #SV */
432
+ struct svm_node **SV; /* SVs (SV[l]) */
433
+ double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
434
+ double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
435
+ double *probA; /* pairwise probability information */
436
+ double *probB;
437
+
438
+ /* for classification only */
439
+
440
+ int *label; /* label of each class (label[k]) */
441
+ int *nSV; /* number of SVs for each class (nSV[k]) */
442
+ /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
443
+ /* XXX */
444
+ int free_sv; /* 1 if svm_model is created by svm_load_model*/
445
+ /* 0 if svm_model is created by svm_train */
446
+ };
447
+
448
+ param describes the parameters used to obtain the model.
449
+
450
+ nr_class is the number of classes. It is 2 for regression and one-class SVM.
451
+
452
+ l is the number of support vectors. SV and sv_coef are support
453
+ vectors and the corresponding coefficients, respectively. Assume there are
454
+ k classes. For data in class j, the corresponding sv_coef includes (k-1) y*alpha vectors,
455
+ where alpha's are solutions of the following two class problems:
456
+ 1 vs j, 2 vs j, ..., j-1 vs j, j vs j+1, j vs j+2, ..., j vs k
457
+ and y=1 for the first j-1 vectors, while y=-1 for the remaining k-j
458
+ vectors. For example, if there are 4 classes, sv_coef and SV are like:
459
+
460
+ +-+-+-+--------------------+
461
+ |1|1|1| |
462
+ |v|v|v| SVs from class 1 |
463
+ |2|3|4| |
464
+ +-+-+-+--------------------+
465
+ |1|2|2| |
466
+ |v|v|v| SVs from class 2 |
467
+ |2|3|4| |
468
+ +-+-+-+--------------------+
469
+ |1|2|3| |
470
+ |v|v|v| SVs from class 3 |
471
+ |3|3|4| |
472
+ +-+-+-+--------------------+
473
+ |1|2|3| |
474
+ |v|v|v| SVs from class 4 |
475
+ |4|4|4| |
476
+ +-+-+-+--------------------+
477
+
478
+ See svm_train() for an example of assigning values to sv_coef.
479
+
480
+ rho is the bias term (-b). probA and probB are parameters used in
481
+ probability outputs. If there are k classes, there are k*(k-1)/2
482
+ binary problems as well as rho, probA, and probB values. They are
483
+ aligned in the order of binary problems:
484
+ 1 vs 2, 1 vs 3, ..., 1 vs k, 2 vs 3, ..., 2 vs k, ..., k-1 vs k.
485
+
486
+ label contains labels in the training data.
487
+
488
+ nSV is the number of support vectors in each class.
489
+
490
+ free_sv is a flag used to determine whether the space of SV should
491
+ be released in free_model_content(struct svm_model*) and
492
+ free_and_destroy_model(struct svm_model**). If the model is
493
+ generated by svm_train(), then SV points to data in svm_problem
494
+ and should not be removed. For example, free_sv is 0 if svm_model
495
+ is created by svm_train, but is 0 if created by svm_load_model.
496
+
497
+ - Function: double svm_predict(const struct svm_model *model,
498
+ const struct svm_node *x);
499
+
500
+ This function does classification or regression on a test vector x
501
+ given a model.
502
+
503
+ For a classification model, the predicted class for x is returned.
504
+ For a regression model, the function value of x calculated using
505
+ the model is returned. For an one-class model, +1 or -1 is
506
+ returned.
507
+
508
+ - Function: void svm_cross_validation(const struct svm_problem *prob,
509
+ const struct svm_parameter *param, int nr_fold, double *target);
510
+
511
+ This function conducts cross validation. Data are separated to
512
+ nr_fold folds. Under given parameters, sequentially each fold is
513
+ validated using the model from training the remaining. Predicted
514
+ labels (of all prob's instances) in the validation process are
515
+ stored in the array called target.
516
+
517
+ The format of svm_prob is same as that for svm_train().
518
+
519
+ - Function: int svm_get_svm_type(const struct svm_model *model);
520
+
521
+ This function gives svm_type of the model. Possible values of
522
+ svm_type are defined in svm.h.
523
+
524
+ - Function: int svm_get_nr_class(const svm_model *model);
525
+
526
+ For a classification model, this function gives the number of
527
+ classes. For a regression or an one-class model, 2 is returned.
528
+
529
+ - Function: void svm_get_labels(const svm_model *model, int* label)
530
+
531
+ For a classification model, this function outputs the name of
532
+ labels into an array called label. For regression and one-class
533
+ models, label is unchanged.
534
+
535
+ - Function: double svm_get_svr_probability(const struct svm_model *model);
536
+
537
+ For a regression model with probability information, this function
538
+ outputs a value sigma > 0. For test data, we consider the
539
+ probability model: target value = predicted value + z, z: Laplace
540
+ distribution e^(-|z|/sigma)/(2sigma)
541
+
542
+ If the model is not for svr or does not contain required
543
+ information, 0 is returned.
544
+
545
+ - Function: double svm_predict_values(const svm_model *model,
546
+ const svm_node *x, double* dec_values)
547
+
548
+ This function gives decision values on a test vector x given a
549
+ model, and return the predicted label (classification) or
550
+ the function value (regression).
551
+
552
+ For a classification model with nr_class classes, this function
553
+ gives nr_class*(nr_class-1)/2 decision values in the array
554
+ dec_values, where nr_class can be obtained from the function
555
+ svm_get_nr_class. The order is label[0] vs. label[1], ...,
556
+ label[0] vs. label[nr_class-1], label[1] vs. label[2], ...,
557
+ label[nr_class-2] vs. label[nr_class-1], where label can be
558
+ obtained from the function svm_get_labels. The returned value is
559
+ the predicted class for x.
560
+
561
+ For a regression model, dec_values[0] and the returned value are
562
+ both the function value of x calculated using the model. For a
563
+ one-class model, dec_values[0] is the decision value of x, while
564
+ the returned value is +1/-1.
565
+
566
+ - Function: double svm_predict_probability(const struct svm_model *model,
567
+ const struct svm_node *x, double* prob_estimates);
568
+
569
+ This function does classification or regression on a test vector x
570
+ given a model with probability information.
571
+
572
+ For a classification model with probability information, this
573
+ function gives nr_class probability estimates in the array
574
+ prob_estimates. nr_class can be obtained from the function
575
+ svm_get_nr_class. The class with the highest probability is
576
+ returned. For regression/one-class SVM, the array prob_estimates
577
+ is unchanged and the returned value is the same as that of
578
+ svm_predict.
579
+
580
+ - Function: const char *svm_check_parameter(const struct svm_problem *prob,
581
+ const struct svm_parameter *param);
582
+
583
+ This function checks whether the parameters are within the feasible
584
+ range of the problem. This function should be called before calling
585
+ svm_train() and svm_cross_validation(). It returns NULL if the
586
+ parameters are feasible, otherwise an error message is returned.
587
+
588
+ - Function: int svm_check_probability_model(const struct svm_model *model);
589
+
590
+ This function checks whether the model contains required
591
+ information to do probability estimates. If so, it returns
592
+ +1. Otherwise, 0 is returned. This function should be called
593
+ before calling svm_get_svr_probability and
594
+ svm_predict_probability.
595
+
596
+ - Function: int svm_save_model(const char *model_file_name,
597
+ const struct svm_model *model);
598
+
599
+ This function saves a model to a file; returns 0 on success, or -1
600
+ if an error occurs.
601
+
602
+ - Function: struct svm_model *svm_load_model(const char *model_file_name);
603
+
604
+ This function returns a pointer to the model read from the file,
605
+ or a null pointer if the model could not be loaded.
606
+
607
+ - Function: void svm_free_model_content(struct svm_model *model_ptr);
608
+
609
+ This function frees the memory used by the entries in a model structure.
610
+
611
+ - Function: void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
612
+
613
+ This function frees the memory used by a model and destroys the model
614
+ structure. It is equivalent to svm_destroy_model, which
615
+ is deprecated after version 3.0.
616
+
617
+ - Function: void svm_destroy_param(struct svm_parameter *param);
618
+
619
+ This function frees the memory used by a parameter set.
620
+
621
+ - Function: void svm_set_print_string_function(void (*print_func)(const char *));
622
+
623
+ Users can specify their output format by a function. Use
624
+ svm_set_print_string_function(NULL);
625
+ for default printing to stdout.
626
+
627
+ Java Version
628
+ ============
629
+
630
+ The pre-compiled java class archive `libsvm.jar' and its source files are
631
+ in the java directory. To run the programs, use
632
+
633
+ java -classpath libsvm.jar svm_train <arguments>
634
+ java -classpath libsvm.jar svm_predict <arguments>
635
+ java -classpath libsvm.jar svm_toy
636
+ java -classpath libsvm.jar svm_scale <arguments>
637
+
638
+ Note that you need Java 1.5 (5.0) or above to run it.
639
+
640
+ You may need to add Java runtime library (like classes.zip) to the classpath.
641
+ You may need to increase maximum Java heap size.
642
+
643
+ Library usages are similar to the C version. These functions are available:
644
+
645
+ public class svm {
646
+ public static final int LIBSVM_VERSION=300;
647
+ public static svm_model svm_train(svm_problem prob, svm_parameter param);
648
+ public static void svm_cross_validation(svm_problem prob, svm_parameter param, int nr_fold, double[] target);
649
+ public static int svm_get_svm_type(svm_model model);
650
+ public static int svm_get_nr_class(svm_model model);
651
+ public static void svm_get_labels(svm_model model, int[] label);
652
+ public static double svm_get_svr_probability(svm_model model);
653
+ public static double svm_predict_values(svm_model model, svm_node[] x, double[] dec_values);
654
+ public static double svm_predict(svm_model model, svm_node[] x);
655
+ public static double svm_predict_probability(svm_model model, svm_node[] x, double[] prob_estimates);
656
+ public static void svm_save_model(String model_file_name, svm_model model) throws IOException
657
+ public static svm_model svm_load_model(String model_file_name) throws IOException
658
+ public static String svm_check_parameter(svm_problem prob, svm_parameter param);
659
+ public static int svm_check_probability_model(svm_model model);
660
+ public static void svm_set_print_string_function(svm_print_interface print_func);
661
+ }
662
+
663
+ The library is in the "libsvm" package.
664
+ Note that in Java version, svm_node[] is not ended with a node whose index = -1.
665
+
666
+ Users can specify their output format by
667
+
668
+ your_print_func = new svm_print_interface()
669
+ {
670
+ public void print(String s)
671
+ {
672
+ // your own format
673
+ }
674
+ };
675
+ svm.svm_set_print_string_function(your_print_func);
676
+
677
+ Building Windows Binaries
678
+ =========================
679
+
680
+ Windows binaries are in the directory `windows'. To build them via
681
+ Visual C++, use the following steps:
682
+
683
+ 1. Open a DOS command box (or Visual Studio Command Prompt) and change
684
+ to libsvm directory. If environment variables of VC++ have not been
685
+ set, type
686
+
687
+ "C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\vcvars32.bat"
688
+
689
+ You may have to modify the above command according which version of
690
+ VC++ or where it is installed.
691
+
692
+ 2. Type
693
+
694
+ nmake -f Makefile.win clean all
695
+
696
+ 3. (optional) To build shared library libsvm.dll, type
697
+
698
+ nmake -f Makefile.win lib
699
+
700
+ Another way is to build them from Visual C++ environment. See details
701
+ in libsvm FAQ.
702
+
703
+ - Additional Tools: Sub-sampling, Parameter Selection, Format checking, etc.
704
+ ============================================================================
705
+
706
+ See the README file in the tools directory.
707
+
708
+ Python Interface
709
+ ================
710
+
711
+ See the README file in python directory.
712
+
713
+ Additional Information
714
+ ======================
715
+
716
+ If you find LIBSVM helpful, please cite it as
717
+
718
+ Chih-Chung Chang and Chih-Jen Lin, LIBSVM: a library for
719
+ support vector machines, 2001.
720
+ Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
721
+
722
+ LIBSVM implementation document is available at
723
+ http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf
724
+
725
+ For any questions and comments, please email cjlin@csie.ntu.edu.tw
726
+
727
+ Acknowledgments:
728
+ This work was supported in part by the National Science
729
+ Council of Taiwan via the grant NSC 89-2213-E-002-013.
730
+ The authors thank their group members and users
731
+ for many helpful discussions and comments. They are listed in
732
+ http://www.csie.ntu.edu.tw/~cjlin/libsvm/acknowledgements
733
+