eluka 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/.document +5 -0
  2. data/DOCUMENTATION_STANDARDS +39 -0
  3. data/Gemfile +13 -0
  4. data/Gemfile.lock +20 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +19 -0
  7. data/Rakefile +69 -0
  8. data/VERSION +1 -0
  9. data/examples/example.rb +59 -0
  10. data/ext/libsvm/COPYRIGHT +31 -0
  11. data/ext/libsvm/FAQ.html +1749 -0
  12. data/ext/libsvm/Makefile +25 -0
  13. data/ext/libsvm/Makefile.win +33 -0
  14. data/ext/libsvm/README +733 -0
  15. data/ext/libsvm/extconf.rb +1 -0
  16. data/ext/libsvm/heart_scale +270 -0
  17. data/ext/libsvm/java/Makefile +25 -0
  18. data/ext/libsvm/java/libsvm.jar +0 -0
  19. data/ext/libsvm/java/libsvm/svm.java +2776 -0
  20. data/ext/libsvm/java/libsvm/svm.m4 +2776 -0
  21. data/ext/libsvm/java/libsvm/svm_model.java +21 -0
  22. data/ext/libsvm/java/libsvm/svm_node.java +6 -0
  23. data/ext/libsvm/java/libsvm/svm_parameter.java +47 -0
  24. data/ext/libsvm/java/libsvm/svm_print_interface.java +5 -0
  25. data/ext/libsvm/java/libsvm/svm_problem.java +7 -0
  26. data/ext/libsvm/java/svm_predict.java +163 -0
  27. data/ext/libsvm/java/svm_scale.java +350 -0
  28. data/ext/libsvm/java/svm_toy.java +471 -0
  29. data/ext/libsvm/java/svm_train.java +318 -0
  30. data/ext/libsvm/java/test_applet.html +1 -0
  31. data/ext/libsvm/python/Makefile +4 -0
  32. data/ext/libsvm/python/README +331 -0
  33. data/ext/libsvm/python/svm.py +259 -0
  34. data/ext/libsvm/python/svmutil.py +242 -0
  35. data/ext/libsvm/svm-predict.c +226 -0
  36. data/ext/libsvm/svm-scale.c +353 -0
  37. data/ext/libsvm/svm-toy/gtk/Makefile +22 -0
  38. data/ext/libsvm/svm-toy/gtk/callbacks.cpp +423 -0
  39. data/ext/libsvm/svm-toy/gtk/callbacks.h +54 -0
  40. data/ext/libsvm/svm-toy/gtk/interface.c +164 -0
  41. data/ext/libsvm/svm-toy/gtk/interface.h +14 -0
  42. data/ext/libsvm/svm-toy/gtk/main.c +23 -0
  43. data/ext/libsvm/svm-toy/gtk/svm-toy.glade +238 -0
  44. data/ext/libsvm/svm-toy/qt/Makefile +17 -0
  45. data/ext/libsvm/svm-toy/qt/svm-toy.cpp +413 -0
  46. data/ext/libsvm/svm-toy/windows/svm-toy.cpp +456 -0
  47. data/ext/libsvm/svm-train.c +376 -0
  48. data/ext/libsvm/svm.cpp +3060 -0
  49. data/ext/libsvm/svm.def +19 -0
  50. data/ext/libsvm/svm.h +105 -0
  51. data/ext/libsvm/svm.o +0 -0
  52. data/ext/libsvm/tools/README +149 -0
  53. data/ext/libsvm/tools/checkdata.py +108 -0
  54. data/ext/libsvm/tools/easy.py +79 -0
  55. data/ext/libsvm/tools/grid.py +359 -0
  56. data/ext/libsvm/tools/subset.py +146 -0
  57. data/ext/libsvm/windows/libsvm.dll +0 -0
  58. data/ext/libsvm/windows/svm-predict.exe +0 -0
  59. data/ext/libsvm/windows/svm-scale.exe +0 -0
  60. data/ext/libsvm/windows/svm-toy.exe +0 -0
  61. data/ext/libsvm/windows/svm-train.exe +0 -0
  62. data/lib/eluka.rb +10 -0
  63. data/lib/eluka/bijection.rb +23 -0
  64. data/lib/eluka/data_point.rb +36 -0
  65. data/lib/eluka/document.rb +47 -0
  66. data/lib/eluka/feature_vector.rb +86 -0
  67. data/lib/eluka/features.rb +31 -0
  68. data/lib/eluka/model.rb +129 -0
  69. data/lib/fselect.rb +321 -0
  70. data/lib/grid.rb +25 -0
  71. data/test/helper.rb +18 -0
  72. data/test/test_eluka.rb +7 -0
  73. metadata +214 -0
@@ -0,0 +1,25 @@
1
+ CXX ?= g++
2
+ CFLAGS = -Wall -Wconversion -O3 -fPIC
3
+ SHVER = 2
4
+
5
+ all: svm-train svm-predict svm-scale
6
+
7
+ lib: svm.o
8
+ $(CXX) -shared -dynamiclib svm.o -o libsvm.so.$(SHVER)
9
+
10
+ svm-predict: svm-predict.c svm.o
11
+ $(CXX) $(CFLAGS) svm-predict.c svm.o -o eluka-svm-predict -lm
12
+ svm-train: svm-train.c svm.o
13
+ $(CXX) $(CFLAGS) svm-train.c svm.o -o eluka-svm-train -lm
14
+ svm-scale: svm-scale.c
15
+ $(CXX) $(CFLAGS) svm-scale.c -o eluka-svm-scale
16
+ svm.o: svm.cpp svm.h
17
+ $(CXX) $(CFLAGS) -c svm.cpp
18
+ clean:
19
+ rm -f *~ svm.o eluka-svm-train eluka-svm-predict eluka-svm-scale libsvm.so.$(SHVER)
20
+
21
+ install:
22
+ mkdir -p ../../bin
23
+ cp eluka-svm-train ../../bin
24
+ cp eluka-svm-predict ../../bin
25
+ cp eluka-svm-scale ../../bin
@@ -0,0 +1,33 @@
1
+ #You must ensure nmake.exe, cl.exe, link.exe are in system path.
2
+ #VCVARS32.bat
3
+ #Under dosbox prompt
4
+ #nmake -f Makefile.win
5
+
6
+ ##########################################
7
+ CXX = cl.exe
8
+ CFLAGS = -nologo -O2 -EHsc -I. -D __WIN32__ -D _CRT_SECURE_NO_DEPRECATE
9
+ TARGET = windows
10
+
11
+ all: $(TARGET)\svm-train.exe $(TARGET)\svm-predict.exe $(TARGET)\svm-scale.exe $(TARGET)\svm-toy.exe lib
12
+
13
+ $(TARGET)\svm-predict.exe: svm.h svm-predict.c svm.obj
14
+ $(CXX) $(CFLAGS) svm-predict.c svm.obj -Fe$(TARGET)\svm-predict.exe
15
+
16
+ $(TARGET)\svm-train.exe: svm.h svm-train.c svm.obj
17
+ $(CXX) $(CFLAGS) svm-train.c svm.obj -Fe$(TARGET)\svm-train.exe
18
+
19
+ $(TARGET)\svm-scale.exe: svm.h svm-scale.c
20
+ $(CXX) $(CFLAGS) svm-scale.c -Fe$(TARGET)\svm-scale.exe
21
+
22
+ $(TARGET)\svm-toy.exe: svm.h svm.obj svm-toy\windows\svm-toy.cpp
23
+ $(CXX) $(CFLAGS) svm-toy\windows\svm-toy.cpp svm.obj user32.lib gdi32.lib comdlg32.lib -Fe$(TARGET)\svm-toy.exe
24
+
25
+ svm.obj: svm.cpp svm.h
26
+ $(CXX) $(CFLAGS) -c svm.cpp
27
+
28
+ lib: svm.cpp svm.h svm.def
29
+ $(CXX) $(CFLAGS) -LD svm.cpp -Fe$(TARGET)\libsvm -link -DEF:svm.def
30
+
31
+ clean:
32
+ -erase /Q *.obj $(TARGET)\.
33
+
data/ext/libsvm/README ADDED
@@ -0,0 +1,733 @@
1
+ Libsvm is a simple, easy-to-use, and efficient software for SVM
2
+ classification and regression. It solves C-SVM classification, nu-SVM
3
+ classification, one-class-SVM, epsilon-SVM regression, and nu-SVM
4
+ regression. It also provides an automatic model selection tool for
5
+ C-SVM classification. This document explains the use of libsvm.
6
+
7
+ Libsvm is available at
8
+ http://www.csie.ntu.edu.tw/~cjlin/libsvm
9
+ Please read the COPYRIGHT file before using libsvm.
10
+
11
+ Table of Contents
12
+ =================
13
+
14
+ - Quick Start
15
+ - Installation and Data Format
16
+ - `svm-train' Usage
17
+ - `svm-predict' Usage
18
+ - `svm-scale' Usage
19
+ - Tips on Practical Use
20
+ - Examples
21
+ - Precomputed Kernels
22
+ - Library Usage
23
+ - Java Version
24
+ - Building Windows Binaries
25
+ - Additional Tools: Sub-sampling, Parameter Selection, Format checking, etc.
26
+ - Python Interface
27
+ - Additional Information
28
+
29
+ Quick Start
30
+ ===========
31
+
32
+ If you are new to SVM and if the data is not large, please go to
33
+ `tools' directory and use easy.py after installation. It does
34
+ everything automatic -- from data scaling to parameter selection.
35
+
36
+ Usage: easy.py training_file [testing_file]
37
+
38
+ More information about parameter selection can be found in
39
+ `tools/README.'
40
+
41
+ Installation and Data Format
42
+ ============================
43
+
44
+ On Unix systems, type `make' to build the `svm-train' and `svm-predict'
45
+ programs. Run them without arguments to show the usages of them.
46
+
47
+ On other systems, consult `Makefile' to build them (e.g., see
48
+ 'Building Windows binaries' in this file) or use the pre-built
49
+ binaries (Windows binaries are in the directory `windows').
50
+
51
+ The format of training and testing data file is:
52
+
53
+ <label> <index1>:<value1> <index2>:<value2> ...
54
+ .
55
+ .
56
+ .
57
+
58
+ Each line contains an instance and is ended by a '\n' character. For
59
+ classification, <label> is an integer indicating the class label
60
+ (multi-class is supported). For regression, <label> is the target
61
+ value which can be any real number. For one-class SVM, it's not used
62
+ so can be any number. Except using precomputed kernels (explained in
63
+ another section), <index>:<value> gives a feature (attribute) value.
64
+ <index> is an integer starting from 1 and <value> is a real
65
+ number. Indices must be in ASCENDING order. Labels in the testing
66
+ file are only used to calculate accuracy or errors. If they are
67
+ unknown, just fill the first column with any numbers.
68
+
69
+ A sample classification data included in this package is
70
+ `heart_scale'. To check if your data is in a correct form, use
71
+ `tools/checkdata.py' (details in `tools/README').
72
+
73
+ Type `svm-train heart_scale', and the program will read the training
74
+ data and output the model file `heart_scale.model'. If you have a test
75
+ set called heart_scale.t, then type `svm-predict heart_scale.t
76
+ heart_scale.model output' to see the prediction accuracy. The `output'
77
+ file contains the predicted class labels.
78
+
79
+ There are some other useful programs in this package.
80
+
81
+ svm-scale:
82
+
83
+ This is a tool for scaling input data file.
84
+
85
+ svm-toy:
86
+
87
+ This is a simple graphical interface which shows how SVM
88
+ separate data in a plane. You can click in the window to
89
+ draw data points. Use "change" button to choose class
90
+ 1, 2 or 3 (i.e., up to three classes are supported), "load"
91
+ button to load data from a file, "save" button to save data to
92
+ a file, "run" button to obtain an SVM model, and "clear"
93
+ button to clear the window.
94
+
95
+ You can enter options in the bottom of the window, the syntax of
96
+ options is the same as `svm-train'.
97
+
98
+ Note that "load" and "save" consider data in the
99
+ classification but not the regression case. Each data point
100
+ has one label (the color) which must be 1, 2, or 3 and two
101
+ attributes (x-axis and y-axis values) in [0,1].
102
+
103
+ Type `make' in respective directories to build them.
104
+
105
+ You need Qt library to build the Qt version.
106
+ (available from http://www.trolltech.com)
107
+
108
+ You need GTK+ library to build the GTK version.
109
+ (available from http://www.gtk.org)
110
+
111
+ The pre-built Windows binaries are in the `windows'
112
+ directory. We use Visual C++ on a 32-bit machine, so the
113
+ maximal cache size is 2GB.
114
+
115
+ `svm-train' Usage
116
+ =================
117
+
118
+ Usage: svm-train [options] training_set_file [model_file]
119
+ options:
120
+ -s svm_type : set type of SVM (default 0)
121
+ 0 -- C-SVC
122
+ 1 -- nu-SVC
123
+ 2 -- one-class SVM
124
+ 3 -- epsilon-SVR
125
+ 4 -- nu-SVR
126
+ -t kernel_type : set type of kernel function (default 2)
127
+ 0 -- linear: u'*v
128
+ 1 -- polynomial: (gamma*u'*v + coef0)^degree
129
+ 2 -- radial basis function: exp(-gamma*|u-v|^2)
130
+ 3 -- sigmoid: tanh(gamma*u'*v + coef0)
131
+ 4 -- precomputed kernel (kernel values in training_set_file)
132
+ -d degree : set degree in kernel function (default 3)
133
+ -g gamma : set gamma in kernel function (default 1/num_features)
134
+ -r coef0 : set coef0 in kernel function (default 0)
135
+ -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
136
+ -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
137
+ -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
138
+ -m cachesize : set cache memory size in MB (default 100)
139
+ -e epsilon : set tolerance of termination criterion (default 0.001)
140
+ -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
141
+ -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
142
+ -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
143
+ -v n: n-fold cross validation mode
144
+ -q : quiet mode (no outputs)
145
+
146
+
147
+ The k in the -g option means the number of attributes in the input data.
148
+
149
+ option -v randomly splits the data into n parts and calculates cross
150
+ validation accuracy/mean squared error on them.
151
+
152
+ See libsvm FAQ for the meaning of outputs.
153
+
154
+ `svm-predict' Usage
155
+ ===================
156
+
157
+ Usage: svm-predict [options] test_file model_file output_file
158
+ options:
159
+ -b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported
160
+
161
+ model_file is the model file generated by svm-train.
162
+ test_file is the test data you want to predict.
163
+ svm-predict will produce output in the output_file.
164
+
165
+ `svm-scale' Usage
166
+ =================
167
+
168
+ Usage: svm-scale [options] data_filename
169
+ options:
170
+ -l lower : x scaling lower limit (default -1)
171
+ -u upper : x scaling upper limit (default +1)
172
+ -y y_lower y_upper : y scaling limits (default: no y scaling)
173
+ -s save_filename : save scaling parameters to save_filename
174
+ -r restore_filename : restore scaling parameters from restore_filename
175
+
176
+ See 'Examples' in this file for examples.
177
+
178
+ Tips on Practical Use
179
+ =====================
180
+
181
+ * Scale your data. For example, scale each attribute to [0,1] or [-1,+1].
182
+ * For C-SVC, consider using the model selection tool in the tools directory.
183
+ * nu in nu-SVC/one-class-SVM/nu-SVR approximates the fraction of training
184
+ errors and support vectors.
185
+ * If data for classification are unbalanced (e.g. many positive and
186
+ few negative), try different penalty parameters C by -wi (see
187
+ examples below).
188
+ * Specify larger cache size (i.e., larger -m) for huge problems.
189
+
190
+ Examples
191
+ ========
192
+
193
+ > svm-scale -l -1 -u 1 -s range train > train.scale
194
+ > svm-scale -r range test > test.scale
195
+
196
+ Scale each feature of the training data to be in [-1,1]. Scaling
197
+ factors are stored in the file range and then used for scaling the
198
+ test data.
199
+
200
+ > svm-train -s 0 -c 5 -t 2 -g 0.5 -e 0.1 data_file
201
+
202
+ Train a classifier with RBF kernel exp(-0.5|u-v|^2), C=10, and
203
+ stopping tolerance 0.1.
204
+
205
+ > svm-train -s 3 -p 0.1 -t 0 data_file
206
+
207
+ Solve SVM regression with linear kernel u'v and epsilon=0.1
208
+ in the loss function.
209
+
210
+ > svm-train -c 10 -w1 1 -w2 5 -w4 2 data_file
211
+
212
+ Train a classifier with penalty 10 = 1 * 10 for class 1, penalty 50 =
213
+ 5 * 10 for class 2, and penalty 20 = 2 * 10 for class 4.
214
+
215
+ > svm-train -s 0 -c 100 -g 0.1 -v 5 data_file
216
+
217
+ Do five-fold cross validation for the classifier using
218
+ the parameters C = 100 and gamma = 0.1
219
+
220
+ > svm-train -s 0 -b 1 data_file
221
+ > svm-predict -b 1 test_file data_file.model output_file
222
+
223
+ Obtain a model with probability information and predict test data with
224
+ probability estimates
225
+
226
+ Precomputed Kernels
227
+ ===================
228
+
229
+ Users may precompute kernel values and input them as training and
230
+ testing files. Then libsvm does not need the original
231
+ training/testing sets.
232
+
233
+ Assume there are L training instances x1, ..., xL and.
234
+ Let K(x, y) be the kernel
235
+ value of two instances x and y. The input formats
236
+ are:
237
+
238
+ New training instance for xi:
239
+
240
+ <label> 0:i 1:K(xi,x1) ... L:K(xi,xL)
241
+
242
+ New testing instance for any x:
243
+
244
+ <label> 0:? 1:K(x,x1) ... L:K(x,xL)
245
+
246
+ That is, in the training file the first column must be the "ID" of
247
+ xi. In testing, ? can be any value.
248
+
249
+ All kernel values including ZEROs must be explicitly provided. Any
250
+ permutation or random subsets of the training/testing files are also
251
+ valid (see examples below).
252
+
253
+ Note: the format is slightly different from the precomputed kernel
254
+ package released in libsvmtools earlier.
255
+
256
+ Examples:
257
+
258
+ Assume the original training data has three four-feature
259
+ instances and testing data has one instance:
260
+
261
+ 15 1:1 2:1 3:1 4:1
262
+ 45 2:3 4:3
263
+ 25 3:1
264
+
265
+ 15 1:1 3:1
266
+
267
+ If the linear kernel is used, we have the following new
268
+ training/testing sets:
269
+
270
+ 15 0:1 1:4 2:6 3:1
271
+ 45 0:2 1:6 2:18 3:0
272
+ 25 0:3 1:1 2:0 3:1
273
+
274
+ 15 0:? 1:2 2:0 3:1
275
+
276
+ ? can be any value.
277
+
278
+ Any subset of the above training file is also valid. For example,
279
+
280
+ 25 0:3 1:1 2:0 3:1
281
+ 45 0:2 1:6 2:18 3:0
282
+
283
+ implies that the kernel matrix is
284
+
285
+ [K(2,2) K(2,3)] = [18 0]
286
+ [K(3,2) K(3,3)] = [0 1]
287
+
288
+ Library Usage
289
+ =============
290
+
291
+ These functions and structures are declared in the header file
292
+ `svm.h'. You need to #include "svm.h" in your C/C++ source files and
293
+ link your program with `svm.cpp'. You can see `svm-train.c' and
294
+ `svm-predict.c' for examples showing how to use them. We define
295
+ LIBSVM_VERSION and declare `extern int libsvm_version; ' in svm.h, so
296
+ you can check the version number.
297
+
298
+ Before you classify test data, you need to construct an SVM model
299
+ (`svm_model') using training data. A model can also be saved in
300
+ a file for later use. Once an SVM model is available, you can use it
301
+ to classify new data.
302
+
303
+ - Function: struct svm_model *svm_train(const struct svm_problem *prob,
304
+ const struct svm_parameter *param);
305
+
306
+ This function constructs and returns an SVM model according to
307
+ the given training data and parameters.
308
+
309
+ struct svm_problem describes the problem:
310
+
311
+ struct svm_problem
312
+ {
313
+ int l;
314
+ double *y;
315
+ struct svm_node **x;
316
+ };
317
+
318
+ where `l' is the number of training data, and `y' is an array containing
319
+ their target values. (integers in classification, real numbers in
320
+ regression) `x' is an array of pointers, each of which points to a sparse
321
+ representation (array of svm_node) of one training vector.
322
+
323
+ For example, if we have the following training data:
324
+
325
+ LABEL ATTR1 ATTR2 ATTR3 ATTR4 ATTR5
326
+ ----- ----- ----- ----- ----- -----
327
+ 1 0 0.1 0.2 0 0
328
+ 2 0 0.1 0.3 -1.2 0
329
+ 1 0.4 0 0 0 0
330
+ 2 0 0.1 0 1.4 0.5
331
+ 3 -0.1 -0.2 0.1 1.1 0.1
332
+
333
+ then the components of svm_problem are:
334
+
335
+ l = 5
336
+
337
+ y -> 1 2 1 2 3
338
+
339
+ x -> [ ] -> (2,0.1) (3,0.2) (-1,?)
340
+ [ ] -> (2,0.1) (3,0.3) (4,-1.2) (-1,?)
341
+ [ ] -> (1,0.4) (-1,?)
342
+ [ ] -> (2,0.1) (4,1.4) (5,0.5) (-1,?)
343
+ [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (-1,?)
344
+
345
+ where (index,value) is stored in the structure `svm_node':
346
+
347
+ struct svm_node
348
+ {
349
+ int index;
350
+ double value;
351
+ };
352
+
353
+ index = -1 indicates the end of one vector. Note that indices must
354
+ be in ASCENDING order.
355
+
356
+ struct svm_parameter describes the parameters of an SVM model:
357
+
358
+ struct svm_parameter
359
+ {
360
+ int svm_type;
361
+ int kernel_type;
362
+ int degree; /* for poly */
363
+ double gamma; /* for poly/rbf/sigmoid */
364
+ double coef0; /* for poly/sigmoid */
365
+
366
+ /* these are for training only */
367
+ double cache_size; /* in MB */
368
+ double eps; /* stopping criteria */
369
+ double C; /* for C_SVC, EPSILON_SVR, and NU_SVR */
370
+ int nr_weight; /* for C_SVC */
371
+ int *weight_label; /* for C_SVC */
372
+ double* weight; /* for C_SVC */
373
+ double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
374
+ double p; /* for EPSILON_SVR */
375
+ int shrinking; /* use the shrinking heuristics */
376
+ int probability; /* do probability estimates */
377
+ };
378
+
379
+ svm_type can be one of C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR.
380
+
381
+ C_SVC: C-SVM classification
382
+ NU_SVC: nu-SVM classification
383
+ ONE_CLASS: one-class-SVM
384
+ EPSILON_SVR: epsilon-SVM regression
385
+ NU_SVR: nu-SVM regression
386
+
387
+ kernel_type can be one of LINEAR, POLY, RBF, SIGMOID.
388
+
389
+ LINEAR: u'*v
390
+ POLY: (gamma*u'*v + coef0)^degree
391
+ RBF: exp(-gamma*|u-v|^2)
392
+ SIGMOID: tanh(gamma*u'*v + coef0)
393
+ PRECOMPUTED: kernel values in training_set_file
394
+
395
+ cache_size is the size of the kernel cache, specified in megabytes.
396
+ C is the cost of constraints violation.
397
+ eps is the stopping criterion. (we usually use 0.00001 in nu-SVC,
398
+ 0.001 in others). nu is the parameter in nu-SVM, nu-SVR, and
399
+ one-class-SVM. p is the epsilon in epsilon-insensitive loss function
400
+ of epsilon-SVM regression. shrinking = 1 means shrinking is conducted;
401
+ = 0 otherwise. probability = 1 means model with probability
402
+ information is obtained; = 0 otherwise.
403
+
404
+ nr_weight, weight_label, and weight are used to change the penalty
405
+ for some classes (If the weight for a class is not changed, it is
406
+ set to 1). This is useful for training classifier using unbalanced
407
+ input data or with asymmetric misclassification cost.
408
+
409
+ nr_weight is the number of elements in the array weight_label and
410
+ weight. Each weight[i] corresponds to weight_label[i], meaning that
411
+ the penalty of class weight_label[i] is scaled by a factor of weight[i].
412
+
413
+ If you do not want to change penalty for any of the classes,
414
+ just set nr_weight to 0.
415
+
416
+ *NOTE* Because svm_model contains pointers to svm_problem, you can
417
+ not free the memory used by svm_problem if you are still using the
418
+ svm_model produced by svm_train().
419
+
420
+ *NOTE* To avoid wrong parameters, svm_check_parameter() should be
421
+ called before svm_train().
422
+
423
+ struct svm_model stores the model obtained from the training procedure.
424
+ It is not recommended to directly access entries in this structure.
425
+ Programmers should use the interface functions to get the values.
426
+
427
+ struct svm_model
428
+ {
429
+ struct svm_parameter param; /* parameter */
430
+ int nr_class; /* number of classes, = 2 in regression/one class svm */
431
+ int l; /* total #SV */
432
+ struct svm_node **SV; /* SVs (SV[l]) */
433
+ double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
434
+ double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
435
+ double *probA; /* pairwise probability information */
436
+ double *probB;
437
+
438
+ /* for classification only */
439
+
440
+ int *label; /* label of each class (label[k]) */
441
+ int *nSV; /* number of SVs for each class (nSV[k]) */
442
+ /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
443
+ /* XXX */
444
+ int free_sv; /* 1 if svm_model is created by svm_load_model*/
445
+ /* 0 if svm_model is created by svm_train */
446
+ };
447
+
448
+ param describes the parameters used to obtain the model.
449
+
450
+ nr_class is the number of classes. It is 2 for regression and one-class SVM.
451
+
452
+ l is the number of support vectors. SV and sv_coef are support
453
+ vectors and the corresponding coefficients, respectively. Assume there are
454
+ k classes. For data in class j, the corresponding sv_coef includes (k-1) y*alpha vectors,
455
+ where alpha's are solutions of the following two class problems:
456
+ 1 vs j, 2 vs j, ..., j-1 vs j, j vs j+1, j vs j+2, ..., j vs k
457
+ and y=1 for the first j-1 vectors, while y=-1 for the remaining k-j
458
+ vectors. For example, if there are 4 classes, sv_coef and SV are like:
459
+
460
+ +-+-+-+--------------------+
461
+ |1|1|1| |
462
+ |v|v|v| SVs from class 1 |
463
+ |2|3|4| |
464
+ +-+-+-+--------------------+
465
+ |1|2|2| |
466
+ |v|v|v| SVs from class 2 |
467
+ |2|3|4| |
468
+ +-+-+-+--------------------+
469
+ |1|2|3| |
470
+ |v|v|v| SVs from class 3 |
471
+ |3|3|4| |
472
+ +-+-+-+--------------------+
473
+ |1|2|3| |
474
+ |v|v|v| SVs from class 4 |
475
+ |4|4|4| |
476
+ +-+-+-+--------------------+
477
+
478
+ See svm_train() for an example of assigning values to sv_coef.
479
+
480
+ rho is the bias term (-b). probA and probB are parameters used in
481
+ probability outputs. If there are k classes, there are k*(k-1)/2
482
+ binary problems as well as rho, probA, and probB values. They are
483
+ aligned in the order of binary problems:
484
+ 1 vs 2, 1 vs 3, ..., 1 vs k, 2 vs 3, ..., 2 vs k, ..., k-1 vs k.
485
+
486
+ label contains labels in the training data.
487
+
488
+ nSV is the number of support vectors in each class.
489
+
490
+ free_sv is a flag used to determine whether the space of SV should
491
+ be released in free_model_content(struct svm_model*) and
492
+ free_and_destroy_model(struct svm_model**). If the model is
493
+ generated by svm_train(), then SV points to data in svm_problem
494
+ and should not be removed. For example, free_sv is 0 if svm_model
495
+ is created by svm_train, but is 0 if created by svm_load_model.
496
+
497
+ - Function: double svm_predict(const struct svm_model *model,
498
+ const struct svm_node *x);
499
+
500
+ This function does classification or regression on a test vector x
501
+ given a model.
502
+
503
+ For a classification model, the predicted class for x is returned.
504
+ For a regression model, the function value of x calculated using
505
+ the model is returned. For an one-class model, +1 or -1 is
506
+ returned.
507
+
508
+ - Function: void svm_cross_validation(const struct svm_problem *prob,
509
+ const struct svm_parameter *param, int nr_fold, double *target);
510
+
511
+ This function conducts cross validation. Data are separated to
512
+ nr_fold folds. Under given parameters, sequentially each fold is
513
+ validated using the model from training the remaining. Predicted
514
+ labels (of all prob's instances) in the validation process are
515
+ stored in the array called target.
516
+
517
+ The format of svm_prob is same as that for svm_train().
518
+
519
+ - Function: int svm_get_svm_type(const struct svm_model *model);
520
+
521
+ This function gives svm_type of the model. Possible values of
522
+ svm_type are defined in svm.h.
523
+
524
+ - Function: int svm_get_nr_class(const svm_model *model);
525
+
526
+ For a classification model, this function gives the number of
527
+ classes. For a regression or an one-class model, 2 is returned.
528
+
529
+ - Function: void svm_get_labels(const svm_model *model, int* label)
530
+
531
+ For a classification model, this function outputs the name of
532
+ labels into an array called label. For regression and one-class
533
+ models, label is unchanged.
534
+
535
+ - Function: double svm_get_svr_probability(const struct svm_model *model);
536
+
537
+ For a regression model with probability information, this function
538
+ outputs a value sigma > 0. For test data, we consider the
539
+ probability model: target value = predicted value + z, z: Laplace
540
+ distribution e^(-|z|/sigma)/(2sigma)
541
+
542
+ If the model is not for svr or does not contain required
543
+ information, 0 is returned.
544
+
545
+ - Function: double svm_predict_values(const svm_model *model,
546
+ const svm_node *x, double* dec_values)
547
+
548
+ This function gives decision values on a test vector x given a
549
+ model, and return the predicted label (classification) or
550
+ the function value (regression).
551
+
552
+ For a classification model with nr_class classes, this function
553
+ gives nr_class*(nr_class-1)/2 decision values in the array
554
+ dec_values, where nr_class can be obtained from the function
555
+ svm_get_nr_class. The order is label[0] vs. label[1], ...,
556
+ label[0] vs. label[nr_class-1], label[1] vs. label[2], ...,
557
+ label[nr_class-2] vs. label[nr_class-1], where label can be
558
+ obtained from the function svm_get_labels. The returned value is
559
+ the predicted class for x.
560
+
561
+ For a regression model, dec_values[0] and the returned value are
562
+ both the function value of x calculated using the model. For a
563
+ one-class model, dec_values[0] is the decision value of x, while
564
+ the returned value is +1/-1.
565
+
566
+ - Function: double svm_predict_probability(const struct svm_model *model,
567
+ const struct svm_node *x, double* prob_estimates);
568
+
569
+ This function does classification or regression on a test vector x
570
+ given a model with probability information.
571
+
572
+ For a classification model with probability information, this
573
+ function gives nr_class probability estimates in the array
574
+ prob_estimates. nr_class can be obtained from the function
575
+ svm_get_nr_class. The class with the highest probability is
576
+ returned. For regression/one-class SVM, the array prob_estimates
577
+ is unchanged and the returned value is the same as that of
578
+ svm_predict.
579
+
580
+ - Function: const char *svm_check_parameter(const struct svm_problem *prob,
581
+ const struct svm_parameter *param);
582
+
583
+ This function checks whether the parameters are within the feasible
584
+ range of the problem. This function should be called before calling
585
+ svm_train() and svm_cross_validation(). It returns NULL if the
586
+ parameters are feasible, otherwise an error message is returned.
587
+
588
+ - Function: int svm_check_probability_model(const struct svm_model *model);
589
+
590
+ This function checks whether the model contains required
591
+ information to do probability estimates. If so, it returns
592
+ +1. Otherwise, 0 is returned. This function should be called
593
+ before calling svm_get_svr_probability and
594
+ svm_predict_probability.
595
+
596
+ - Function: int svm_save_model(const char *model_file_name,
597
+ const struct svm_model *model);
598
+
599
+ This function saves a model to a file; returns 0 on success, or -1
600
+ if an error occurs.
601
+
602
+ - Function: struct svm_model *svm_load_model(const char *model_file_name);
603
+
604
+ This function returns a pointer to the model read from the file,
605
+ or a null pointer if the model could not be loaded.
606
+
607
+ - Function: void svm_free_model_content(struct svm_model *model_ptr);
608
+
609
+ This function frees the memory used by the entries in a model structure.
610
+
611
+ - Function: void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
612
+
613
+ This function frees the memory used by a model and destroys the model
614
+ structure. It is equivalent to svm_destroy_model, which
615
+ is deprecated after version 3.0.
616
+
617
+ - Function: void svm_destroy_param(struct svm_parameter *param);
618
+
619
+ This function frees the memory used by a parameter set.
620
+
621
+ - Function: void svm_set_print_string_function(void (*print_func)(const char *));
622
+
623
+ Users can specify their output format by a function. Use
624
+ svm_set_print_string_function(NULL);
625
+ for default printing to stdout.
626
+
627
+ Java Version
628
+ ============
629
+
630
+ The pre-compiled java class archive `libsvm.jar' and its source files are
631
+ in the java directory. To run the programs, use
632
+
633
+ java -classpath libsvm.jar svm_train <arguments>
634
+ java -classpath libsvm.jar svm_predict <arguments>
635
+ java -classpath libsvm.jar svm_toy
636
+ java -classpath libsvm.jar svm_scale <arguments>
637
+
638
+ Note that you need Java 1.5 (5.0) or above to run it.
639
+
640
+ You may need to add Java runtime library (like classes.zip) to the classpath.
641
+ You may need to increase maximum Java heap size.
642
+
643
+ Library usages are similar to the C version. These functions are available:
644
+
645
+ public class svm {
646
+ public static final int LIBSVM_VERSION=300;
647
+ public static svm_model svm_train(svm_problem prob, svm_parameter param);
648
+ public static void svm_cross_validation(svm_problem prob, svm_parameter param, int nr_fold, double[] target);
649
+ public static int svm_get_svm_type(svm_model model);
650
+ public static int svm_get_nr_class(svm_model model);
651
+ public static void svm_get_labels(svm_model model, int[] label);
652
+ public static double svm_get_svr_probability(svm_model model);
653
+ public static double svm_predict_values(svm_model model, svm_node[] x, double[] dec_values);
654
+ public static double svm_predict(svm_model model, svm_node[] x);
655
+ public static double svm_predict_probability(svm_model model, svm_node[] x, double[] prob_estimates);
656
+ public static void svm_save_model(String model_file_name, svm_model model) throws IOException
657
+ public static svm_model svm_load_model(String model_file_name) throws IOException
658
+ public static String svm_check_parameter(svm_problem prob, svm_parameter param);
659
+ public static int svm_check_probability_model(svm_model model);
660
+ public static void svm_set_print_string_function(svm_print_interface print_func);
661
+ }
662
+
663
+ The library is in the "libsvm" package.
664
+ Note that in Java version, svm_node[] is not ended with a node whose index = -1.
665
+
666
+ Users can specify their output format by
667
+
668
+ your_print_func = new svm_print_interface()
669
+ {
670
+ public void print(String s)
671
+ {
672
+ // your own format
673
+ }
674
+ };
675
+ svm.svm_set_print_string_function(your_print_func);
676
+
677
+ Building Windows Binaries
678
+ =========================
679
+
680
+ Windows binaries are in the directory `windows'. To build them via
681
+ Visual C++, use the following steps:
682
+
683
+ 1. Open a DOS command box (or Visual Studio Command Prompt) and change
684
+ to libsvm directory. If environment variables of VC++ have not been
685
+ set, type
686
+
687
+ "C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\vcvars32.bat"
688
+
689
+ You may have to modify the above command according which version of
690
+ VC++ or where it is installed.
691
+
692
+ 2. Type
693
+
694
+ nmake -f Makefile.win clean all
695
+
696
+ 3. (optional) To build shared library libsvm.dll, type
697
+
698
+ nmake -f Makefile.win lib
699
+
700
+ Another way is to build them from Visual C++ environment. See details
701
+ in libsvm FAQ.
702
+
703
+ - Additional Tools: Sub-sampling, Parameter Selection, Format checking, etc.
704
+ ============================================================================
705
+
706
+ See the README file in the tools directory.
707
+
708
+ Python Interface
709
+ ================
710
+
711
+ See the README file in python directory.
712
+
713
+ Additional Information
714
+ ======================
715
+
716
+ If you find LIBSVM helpful, please cite it as
717
+
718
+ Chih-Chung Chang and Chih-Jen Lin, LIBSVM: a library for
719
+ support vector machines, 2001.
720
+ Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
721
+
722
+ LIBSVM implementation document is available at
723
+ http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf
724
+
725
+ For any questions and comments, please email cjlin@csie.ntu.edu.tw
726
+
727
+ Acknowledgments:
728
+ This work was supported in part by the National Science
729
+ Council of Taiwan via the grant NSC 89-2213-E-002-013.
730
+ The authors thank their group members and users
731
+ for many helpful discussions and comments. They are listed in
732
+ http://www.csie.ntu.edu.tw/~cjlin/libsvm/acknowledgements
733
+