liblinear-ruby 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/blasp.h +8 -8
  4. data/ext/daxpy.c +3 -3
  5. data/ext/ddot.c +3 -3
  6. data/ext/dnrm2.c +7 -7
  7. data/ext/dscal.c +4 -4
  8. data/ext/liblinear_wrap.cxx +382 -382
  9. data/ext/linear.cpp +44 -55
  10. data/ext/linear.h +5 -1
  11. data/ext/tron.cpp +13 -5
  12. data/ext/tron.h +1 -1
  13. data/lib/liblinear.rb +2 -0
  14. data/lib/liblinear/version.rb +1 -1
  15. metadata +2 -41
  16. data/liblinear-2.1/COPYRIGHT +0 -31
  17. data/liblinear-2.1/Makefile +0 -37
  18. data/liblinear-2.1/Makefile.win +0 -24
  19. data/liblinear-2.1/README +0 -600
  20. data/liblinear-2.1/blas/Makefile +0 -22
  21. data/liblinear-2.1/blas/blas.h +0 -25
  22. data/liblinear-2.1/blas/blasp.h +0 -438
  23. data/liblinear-2.1/blas/daxpy.c +0 -57
  24. data/liblinear-2.1/blas/ddot.c +0 -58
  25. data/liblinear-2.1/blas/dnrm2.c +0 -70
  26. data/liblinear-2.1/blas/dscal.c +0 -52
  27. data/liblinear-2.1/heart_scale +0 -270
  28. data/liblinear-2.1/linear.cpp +0 -3053
  29. data/liblinear-2.1/linear.def +0 -22
  30. data/liblinear-2.1/linear.h +0 -79
  31. data/liblinear-2.1/matlab/Makefile +0 -49
  32. data/liblinear-2.1/matlab/README +0 -208
  33. data/liblinear-2.1/matlab/libsvmread.c +0 -212
  34. data/liblinear-2.1/matlab/libsvmwrite.c +0 -119
  35. data/liblinear-2.1/matlab/linear_model_matlab.c +0 -176
  36. data/liblinear-2.1/matlab/linear_model_matlab.h +0 -2
  37. data/liblinear-2.1/matlab/make.m +0 -22
  38. data/liblinear-2.1/matlab/predict.c +0 -341
  39. data/liblinear-2.1/matlab/train.c +0 -492
  40. data/liblinear-2.1/predict.c +0 -243
  41. data/liblinear-2.1/python/Makefile +0 -4
  42. data/liblinear-2.1/python/README +0 -380
  43. data/liblinear-2.1/python/liblinear.py +0 -323
  44. data/liblinear-2.1/python/liblinearutil.py +0 -270
  45. data/liblinear-2.1/train.c +0 -449
  46. data/liblinear-2.1/tron.cpp +0 -241
  47. data/liblinear-2.1/tron.h +0 -35
  48. data/liblinear-2.1/windows/liblinear.dll +0 -0
  49. data/liblinear-2.1/windows/libsvmread.mexw64 +0 -0
  50. data/liblinear-2.1/windows/libsvmwrite.mexw64 +0 -0
  51. data/liblinear-2.1/windows/predict.exe +0 -0
  52. data/liblinear-2.1/windows/predict.mexw64 +0 -0
  53. data/liblinear-2.1/windows/train.exe +0 -0
  54. data/liblinear-2.1/windows/train.mexw64 +0 -0
@@ -1,31 +0,0 @@
1
-
2
- Copyright (c) 2007-2015 The LIBLINEAR Project.
3
- All rights reserved.
4
-
5
- Redistribution and use in source and binary forms, with or without
6
- modification, are permitted provided that the following conditions
7
- are met:
8
-
9
- 1. Redistributions of source code must retain the above copyright
10
- notice, this list of conditions and the following disclaimer.
11
-
12
- 2. Redistributions in binary form must reproduce the above copyright
13
- notice, this list of conditions and the following disclaimer in the
14
- documentation and/or other materials provided with the distribution.
15
-
16
- 3. Neither name of copyright holders nor the names of its contributors
17
- may be used to endorse or promote products derived from this software
18
- without specific prior written permission.
19
-
20
-
21
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -1,37 +0,0 @@
1
- CXX ?= g++
2
- CC ?= gcc
3
- CFLAGS = -Wall -Wconversion -O3 -fPIC
4
- LIBS = blas/blas.a
5
- SHVER = 3
6
- OS = $(shell uname)
7
- #LIBS = -lblas
8
-
9
- all: train predict
10
-
11
- lib: linear.o tron.o blas/blas.a
12
- if [ "$(OS)" = "Darwin" ]; then \
13
- SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,liblinear.so.$(SHVER)"; \
14
- else \
15
- SHARED_LIB_FLAG="-shared -Wl,-soname,liblinear.so.$(SHVER)"; \
16
- fi; \
17
- $(CXX) $${SHARED_LIB_FLAG} linear.o tron.o blas/blas.a -o liblinear.so.$(SHVER)
18
-
19
- train: tron.o linear.o train.c blas/blas.a
20
- $(CXX) $(CFLAGS) -o train train.c tron.o linear.o $(LIBS)
21
-
22
- predict: tron.o linear.o predict.c blas/blas.a
23
- $(CXX) $(CFLAGS) -o predict predict.c tron.o linear.o $(LIBS)
24
-
25
- tron.o: tron.cpp tron.h
26
- $(CXX) $(CFLAGS) -c -o tron.o tron.cpp
27
-
28
- linear.o: linear.cpp linear.h
29
- $(CXX) $(CFLAGS) -c -o linear.o linear.cpp
30
-
31
- blas/blas.a: blas/*.c blas/*.h
32
- make -C blas OPTFLAGS='$(CFLAGS)' CC='$(CC)';
33
-
34
- clean:
35
- make -C blas clean
36
- make -C matlab clean
37
- rm -f *~ tron.o linear.o train predict liblinear.so.$(SHVER)
@@ -1,24 +0,0 @@
1
- CXX = cl.exe
2
- CFLAGS = /nologo /O2 /EHsc /I. /D _WIN64 /D _CRT_SECURE_NO_DEPRECATE
3
- TARGET = windows
4
-
5
- all: $(TARGET)\train.exe $(TARGET)\predict.exe lib
6
-
7
- $(TARGET)\train.exe: tron.obj linear.obj train.c blas\*.c
8
- $(CXX) $(CFLAGS) -Fe$(TARGET)\train.exe tron.obj linear.obj train.c blas\*.c
9
-
10
- $(TARGET)\predict.exe: tron.obj linear.obj predict.c blas\*.c
11
- $(CXX) $(CFLAGS) -Fe$(TARGET)\predict.exe tron.obj linear.obj predict.c blas\*.c
12
-
13
- linear.obj: linear.cpp linear.h
14
- $(CXX) $(CFLAGS) -c linear.cpp
15
-
16
- tron.obj: tron.cpp tron.h
17
- $(CXX) $(CFLAGS) -c tron.cpp
18
-
19
- lib: linear.cpp linear.h linear.def tron.obj
20
- $(CXX) $(CFLAGS) -LD linear.cpp tron.obj blas\*.c -Fe$(TARGET)\liblinear -link -DEF:linear.def
21
-
22
- clean:
23
- -erase /Q *.obj $(TARGET)\*.exe $(TARGET)\*.dll $(TARGET)\*.exp $(TARGET)\*.lib
24
-
@@ -1,600 +0,0 @@
1
- LIBLINEAR is a simple package for solving large-scale regularized linear
2
- classification and regression. It currently supports
3
- - L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification
4
- - L1-regularized L2-loss support vector classification/L1-regularized logistic regression
5
- - L2-regularized L2-loss support vector regression/L1-loss support vector regression.
6
- This document explains the usage of LIBLINEAR.
7
-
8
- To get started, please read the ``Quick Start'' section first.
9
- For developers, please check the ``Library Usage'' section to learn
10
- how to integrate LIBLINEAR in your software.
11
-
12
- Table of Contents
13
- =================
14
-
15
- - When to use LIBLINEAR but not LIBSVM
16
- - Quick Start
17
- - Installation
18
- - `train' Usage
19
- - `predict' Usage
20
- - Examples
21
- - Library Usage
22
- - Building Windows Binaries
23
- - Additional Information
24
- - MATLAB/OCTAVE interface
25
- - PYTHON interface
26
-
27
- When to use LIBLINEAR but not LIBSVM
28
- ====================================
29
-
30
- There are some large data for which with/without nonlinear mappings
31
- gives similar performances. Without using kernels, one can
32
- efficiently train a much larger set via linear classification/regression.
33
- These data usually have a large number of features. Document classification
34
- is an example.
35
-
36
- Warning: While generally liblinear is very fast, its default solver
37
- may be slow under certain situations (e.g., data not scaled or C is
38
- large). See Appendix B of our SVM guide about how to handle such
39
- cases.
40
- http://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf
41
-
42
- Warning: If you are a beginner and your data sets are not large, you
43
- should consider LIBSVM first.
44
-
45
- LIBSVM page:
46
- http://www.csie.ntu.edu.tw/~cjlin/libsvm
47
-
48
-
49
- Quick Start
50
- ===========
51
-
52
- See the section ``Installation'' for installing LIBLINEAR.
53
-
54
- After installation, there are programs `train' and `predict' for
55
- training and testing, respectively.
56
-
57
- About the data format, please check the README file of LIBSVM. Note
58
- that feature index must start from 1 (but not 0).
59
-
60
- A sample classification data included in this package is `heart_scale'.
61
-
62
- Type `train heart_scale', and the program will read the training
63
- data and output the model file `heart_scale.model'. If you have a test
64
- set called heart_scale.t, then type `predict heart_scale.t
65
- heart_scale.model output' to see the prediction accuracy. The `output'
66
- file contains the predicted class labels.
67
-
68
- For more information about `train' and `predict', see the sections
69
- `train' Usage and `predict' Usage.
70
-
71
- To obtain good performances, sometimes one needs to scale the
72
- data. Please check the program `svm-scale' of LIBSVM. For large and
73
- sparse data, use `-l 0' to keep the sparsity.
74
-
75
- Installation
76
- ============
77
-
78
- On Unix systems, type `make' to build the `train' and `predict'
79
- programs. Run them without arguments to show the usages.
80
-
81
- On other systems, consult `Makefile' to build them (e.g., see
82
- 'Building Windows binaries' in this file) or use the pre-built
83
- binaries (Windows binaries are in the directory `windows').
84
-
85
- This software uses some level-1 BLAS subroutines. The needed functions are
86
- included in this package. If a BLAS library is available on your
87
- machine, you may use it by modifying the Makefile: Unmark the following line
88
-
89
- #LIBS ?= -lblas
90
-
91
- and mark
92
-
93
- LIBS ?= blas/blas.a
94
-
95
- `train' Usage
96
- =============
97
-
98
- Usage: train [options] training_set_file [model_file]
99
- options:
100
- -s type : set type of solver (default 1)
101
- for multi-class classification
102
- 0 -- L2-regularized logistic regression (primal)
103
- 1 -- L2-regularized L2-loss support vector classification (dual)
104
- 2 -- L2-regularized L2-loss support vector classification (primal)
105
- 3 -- L2-regularized L1-loss support vector classification (dual)
106
- 4 -- support vector classification by Crammer and Singer
107
- 5 -- L1-regularized L2-loss support vector classification
108
- 6 -- L1-regularized logistic regression
109
- 7 -- L2-regularized logistic regression (dual)
110
- for regression
111
- 11 -- L2-regularized L2-loss support vector regression (primal)
112
- 12 -- L2-regularized L2-loss support vector regression (dual)
113
- 13 -- L2-regularized L1-loss support vector regression (dual)
114
- -c cost : set the parameter C (default 1)
115
- -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
116
- -e epsilon : set tolerance of termination criterion
117
- -s 0 and 2
118
- |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
119
- where f is the primal function and pos/neg are # of
120
- positive/negative data (default 0.01)
121
- -s 11
122
- |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
123
- -s 1, 3, 4 and 7
124
- Dual maximal violation <= eps; similar to libsvm (default 0.1)
125
- -s 5 and 6
126
- |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,
127
- where f is the primal function (default 0.01)
128
- -s 12 and 13\n"
129
- |f'(alpha)|_1 <= eps |f'(alpha0)|,
130
- where f is the dual function (default 0.1)
131
- -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
132
- -wi weight: weights adjust the parameter C of different classes (see README for details)
133
- -v n: n-fold cross validation mode
134
- -C : find parameter C (only for -s 0 and 2)
135
- -q : quiet mode (no outputs)
136
-
137
- Option -v randomly splits the data into n parts and calculates cross
138
- validation accuracy on them.
139
-
140
- Option -C conducts cross validation under different C values and finds
141
- the best one. This options is supported only by -s 0 and -s 2. If
142
- the solver is not specified, -s 2 is used.
143
-
144
- Formulations:
145
-
146
- For L2-regularized logistic regression (-s 0), we solve
147
-
148
- min_w w^Tw/2 + C \sum log(1 + exp(-y_i w^Tx_i))
149
-
150
- For L2-regularized L2-loss SVC dual (-s 1), we solve
151
-
152
- min_alpha 0.5(alpha^T (Q + I/2/C) alpha) - e^T alpha
153
- s.t. 0 <= alpha_i,
154
-
155
- For L2-regularized L2-loss SVC (-s 2), we solve
156
-
157
- min_w w^Tw/2 + C \sum max(0, 1- y_i w^Tx_i)^2
158
-
159
- For L2-regularized L1-loss SVC dual (-s 3), we solve
160
-
161
- min_alpha 0.5(alpha^T Q alpha) - e^T alpha
162
- s.t. 0 <= alpha_i <= C,
163
-
164
- For L1-regularized L2-loss SVC (-s 5), we solve
165
-
166
- min_w \sum |w_j| + C \sum max(0, 1- y_i w^Tx_i)^2
167
-
168
- For L1-regularized logistic regression (-s 6), we solve
169
-
170
- min_w \sum |w_j| + C \sum log(1 + exp(-y_i w^Tx_i))
171
-
172
- For L2-regularized logistic regression (-s 7), we solve
173
-
174
- min_alpha 0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
175
- s.t. 0 <= alpha_i <= C,
176
-
177
- where
178
-
179
- Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
180
-
181
- For L2-regularized L2-loss SVR (-s 11), we solve
182
-
183
- min_w w^Tw/2 + C \sum max(0, |y_i-w^Tx_i|-epsilon)^2
184
-
185
- For L2-regularized L2-loss SVR dual (-s 12), we solve
186
-
187
- min_beta 0.5(beta^T (Q + lambda I/2/C) beta) - y^T beta + \sum |beta_i|
188
-
189
- For L2-regularized L1-loss SVR dual (-s 13), we solve
190
-
191
- min_beta 0.5(beta^T Q beta) - y^T beta + \sum |beta_i|
192
- s.t. -C <= beta_i <= C,
193
-
194
- where
195
-
196
- Q is a matrix with Q_ij = x_i^T x_j.
197
-
198
- If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias].
199
-
200
- The primal-dual relationship implies that -s 1 and -s 2 give the same
201
- model, -s 0 and -s 7 give the same, and -s 11 and -s 12 give the same.
202
-
203
- We implement 1-vs-the rest multi-class strategy for classification.
204
- In training i vs. non_i, their C parameters are (weight from -wi)*C
205
- and C, respectively. If there are only two classes, we train only one
206
- model. Thus weight1*C vs. weight2*C is used. See examples below.
207
-
208
- We also implement multi-class SVM by Crammer and Singer (-s 4):
209
-
210
- min_{w_m, \xi_i} 0.5 \sum_m ||w_m||^2 + C \sum_i \xi_i
211
- s.t. w^T_{y_i} x_i - w^T_m x_i >= \e^m_i - \xi_i \forall m,i
212
-
213
- where e^m_i = 0 if y_i = m,
214
- e^m_i = 1 if y_i != m,
215
-
216
- Here we solve the dual problem:
217
-
218
- min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
219
- s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
220
-
221
- where w_m(\alpha) = \sum_i \alpha^m_i x_i,
222
- and C^m_i = C if m = y_i,
223
- C^m_i = 0 if m != y_i.
224
-
225
- `predict' Usage
226
- ===============
227
-
228
- Usage: predict [options] test_file model_file output_file
229
- options:
230
- -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
231
- -q : quiet mode (no outputs)
232
-
233
- Note that -b is only needed in the prediction phase. This is different
234
- from the setting of LIBSVM.
235
-
236
- Examples
237
- ========
238
-
239
- > train data_file
240
-
241
- Train linear SVM with L2-loss function.
242
-
243
- > train -s 0 data_file
244
-
245
- Train a logistic regression model.
246
-
247
- > train -v 5 -e 0.001 data_file
248
-
249
- Do five-fold cross-validation using L2-loss SVM.
250
- Use a smaller stopping tolerance 0.001 than the default
251
- 0.1 if you want more accurate solutions.
252
-
253
- > train -C data_file
254
-
255
- Conduct cross validation many times by L2-loss SVM
256
- and find the parameter C which achieves the best cross
257
- validation accuracy.
258
-
259
- > train -C -s 0 -v 3 -c 0.5 -e 0.0001 data_file
260
-
261
- For parameter selection by -C, users can specify other
262
- solvers (currently -s 0 and -s 2 are supported) and
263
- different number of CV folds. Further, users can use
264
- the -c option to specify the smallest C value of the
265
- search range. This setting is useful when users want
266
- to rerun the parameter selection procedure from a
267
- specified C under a different setting, such as a stricter
268
- stopping tolerance -e 0.0001 in the above example.
269
-
270
- > train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file
271
-
272
- Train four classifiers:
273
- positive negative Cp Cn
274
- class 1 class 2,3,4. 20 10
275
- class 2 class 1,3,4. 50 10
276
- class 3 class 1,2,4. 20 10
277
- class 4 class 1,2,3. 10 10
278
-
279
- > train -c 10 -w3 1 -w2 5 two_class_data_file
280
-
281
- If there are only two classes, we train ONE model.
282
- The C values for the two classes are 10 and 50.
283
-
284
- > predict -b 1 test_file data_file.model output_file
285
-
286
- Output probability estimates (for logistic regression only).
287
-
288
- Library Usage
289
- =============
290
-
291
- - Function: model* train(const struct problem *prob,
292
- const struct parameter *param);
293
-
294
- This function constructs and returns a linear classification
295
- or regression model according to the given training data and
296
- parameters.
297
-
298
- struct problem describes the problem:
299
-
300
- struct problem
301
- {
302
- int l, n;
303
- int *y;
304
- struct feature_node **x;
305
- double bias;
306
- };
307
-
308
- where `l' is the number of training data. If bias >= 0, we assume
309
- that one additional feature is added to the end of each data
310
- instance. `n' is the number of feature (including the bias feature
311
- if bias >= 0). `y' is an array containing the target values. (integers
312
- in classification, real numbers in regression) And `x' is an array
313
- of pointers, each of which points to a sparse representation (array
314
- of feature_node) of one training vector.
315
-
316
- For example, if we have the following training data:
317
-
318
- LABEL ATTR1 ATTR2 ATTR3 ATTR4 ATTR5
319
- ----- ----- ----- ----- ----- -----
320
- 1 0 0.1 0.2 0 0
321
- 2 0 0.1 0.3 -1.2 0
322
- 1 0.4 0 0 0 0
323
- 2 0 0.1 0 1.4 0.5
324
- 3 -0.1 -0.2 0.1 1.1 0.1
325
-
326
- and bias = 1, then the components of problem are:
327
-
328
- l = 5
329
- n = 6
330
-
331
- y -> 1 2 1 2 3
332
-
333
- x -> [ ] -> (2,0.1) (3,0.2) (6,1) (-1,?)
334
- [ ] -> (2,0.1) (3,0.3) (4,-1.2) (6,1) (-1,?)
335
- [ ] -> (1,0.4) (6,1) (-1,?)
336
- [ ] -> (2,0.1) (4,1.4) (5,0.5) (6,1) (-1,?)
337
- [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (6,1) (-1,?)
338
-
339
- struct parameter describes the parameters of a linear classification
340
- or regression model:
341
-
342
- struct parameter
343
- {
344
- int solver_type;
345
-
346
- /* these are for training only */
347
- double eps; /* stopping criteria */
348
- double C;
349
- int nr_weight;
350
- int *weight_label;
351
- double* weight;
352
- double p;
353
- };
354
-
355
- solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL.
356
- for classification
357
- L2R_LR L2-regularized logistic regression (primal)
358
- L2R_L2LOSS_SVC_DUAL L2-regularized L2-loss support vector classification (dual)
359
- L2R_L2LOSS_SVC L2-regularized L2-loss support vector classification (primal)
360
- L2R_L1LOSS_SVC_DUAL L2-regularized L1-loss support vector classification (dual)
361
- MCSVM_CS support vector classification by Crammer and Singer
362
- L1R_L2LOSS_SVC L1-regularized L2-loss support vector classification
363
- L1R_LR L1-regularized logistic regression
364
- L2R_LR_DUAL L2-regularized logistic regression (dual)
365
- for regression
366
- L2R_L2LOSS_SVR L2-regularized L2-loss support vector regression (primal)
367
- L2R_L2LOSS_SVR_DUAL L2-regularized L2-loss support vector regression (dual)
368
- L2R_L1LOSS_SVR_DUAL L2-regularized L1-loss support vector regression (dual)
369
-
370
- C is the cost of constraints violation.
371
- p is the sensitiveness of loss of support vector regression.
372
- eps is the stopping criterion.
373
-
374
- nr_weight, weight_label, and weight are used to change the penalty
375
- for some classes (If the weight for a class is not changed, it is
376
- set to 1). This is useful for training classifier using unbalanced
377
- input data or with asymmetric misclassification cost.
378
-
379
- nr_weight is the number of elements in the array weight_label and
380
- weight. Each weight[i] corresponds to weight_label[i], meaning that
381
- the penalty of class weight_label[i] is scaled by a factor of weight[i].
382
-
383
- If you do not want to change penalty for any of the classes,
384
- just set nr_weight to 0.
385
-
386
- *NOTE* To avoid wrong parameters, check_parameter() should be
387
- called before train().
388
-
389
- struct model stores the model obtained from the training procedure:
390
-
391
- struct model
392
- {
393
- struct parameter param;
394
- int nr_class; /* number of classes */
395
- int nr_feature;
396
- double *w;
397
- int *label; /* label of each class */
398
- double bias;
399
- };
400
-
401
- param describes the parameters used to obtain the model.
402
-
403
- nr_class and nr_feature are the number of classes and features,
404
- respectively. nr_class = 2 for regression.
405
-
406
- The nr_feature*nr_class array w gives feature weights. We use one
407
- against the rest for multi-class classification, so each feature
408
- index corresponds to nr_class weight values. Weights are
409
- organized in the following way
410
-
411
- +------------------+------------------+------------+
412
- | nr_class weights | nr_class weights | ...
413
- | for 1st feature | for 2nd feature |
414
- +------------------+------------------+------------+
415
-
416
- If bias >= 0, x becomes [x; bias]. The number of features is
417
- increased by one, so w is a (nr_feature+1)*nr_class array. The
418
- value of bias is stored in the variable bias.
419
-
420
- The array label stores class labels.
421
-
422
- - Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target);
423
-
424
- This function conducts cross validation. Data are separated to
425
- nr_fold folds. Under given parameters, sequentially each fold is
426
- validated using the model from training the remaining. Predicted
427
- labels in the validation process are stored in the array called
428
- target.
429
-
430
- The format of prob is same as that for train().
431
-
432
- - Function: void find_parameter_C(const struct problem *prob,
433
- const struct parameter *param, int nr_fold, double start_C,
434
- double max_C, double *best_C, double *best_rate);
435
-
436
- This function is similar to cross_validation. However, instead of
437
- conducting cross validation under a specified parameter C, it
438
- conducts cross validation many times under parameters C = start_C,
439
- 2*start_C, 4*start_C, 8*start_C, ..., and finds the best one with
440
- the highest cross validation accuracy.
441
-
442
- If start_C <= 0, then this procedure calculates a small enough C
443
- for prob as the start_C. The procedure stops when the models of
444
- all folds become stable or C reaches max_C. The best C and the
445
- corresponding accuracy are assigned to *best_C and *best_rate,
446
- respectively.
447
-
448
- - Function: double predict(const model *model_, const feature_node *x);
449
-
450
- For a classification model, the predicted class for x is returned.
451
- For a regression model, the function value of x calculated using
452
- the model is returned.
453
-
454
- - Function: double predict_values(const struct model *model_,
455
- const struct feature_node *x, double* dec_values);
456
-
457
- This function gives nr_w decision values in the array dec_values.
458
- nr_w=1 if regression is applied or the number of classes is two. An exception is
459
- multi-class SVM by Crammer and Singer (-s 4), where nr_w = 2 if there are two classes. For all other situations, nr_w is the
460
- number of classes.
461
-
462
- We implement one-vs-the rest multi-class strategy (-s 0,1,2,3,5,6,7)
463
- and multi-class SVM by Crammer and Singer (-s 4) for multi-class SVM.
464
- The class with the highest decision value is returned.
465
-
466
- - Function: double predict_probability(const struct model *model_,
467
- const struct feature_node *x, double* prob_estimates);
468
-
469
- This function gives nr_class probability estimates in the array
470
- prob_estimates. nr_class can be obtained from the function
471
- get_nr_class. The class with the highest probability is
472
- returned. Currently, we support only the probability outputs of
473
- logistic regression.
474
-
475
- - Function: int get_nr_feature(const model *model_);
476
-
477
- The function gives the number of attributes of the model.
478
-
479
- - Function: int get_nr_class(const model *model_);
480
-
481
- The function gives the number of classes of the model.
482
- For a regression model, 2 is returned.
483
-
484
- - Function: void get_labels(const model *model_, int* label);
485
-
486
- This function outputs the name of labels into an array called label.
487
- For a regression model, label is unchanged.
488
-
489
- - Function: double get_decfun_coef(const struct model *model_, int feat_idx,
490
- int label_idx);
491
-
492
- This function gives the coefficient for the feature with feature index =
493
- feat_idx and the class with label index = label_idx. Note that feat_idx
494
- starts from 1, while label_idx starts from 0. If feat_idx is not in the
495
- valid range (1 to nr_feature), then a zero value will be returned. For
496
- classification models, if label_idx is not in the valid range (0 to
497
- nr_class-1), then a zero value will be returned; for regression models,
498
- label_idx is ignored.
499
-
500
- - Function: double get_decfun_bias(const struct model *model_, int label_idx);
501
-
502
- This function gives the bias term corresponding to the class with the
503
- label_idx. For classification models, if label_idx is not in a valid range
504
- (0 to nr_class-1), then a zero value will be returned; for regression
505
- models, label_idx is ignored.
506
-
507
- - Function: const char *check_parameter(const struct problem *prob,
508
- const struct parameter *param);
509
-
510
- This function checks whether the parameters are within the feasible
511
- range of the problem. This function should be called before calling
512
- train() and cross_validation(). It returns NULL if the
513
- parameters are feasible, otherwise an error message is returned.
514
-
515
- - Function: int check_probability_model(const struct model *model);
516
-
517
- This function returns 1 if the model supports probability output;
518
- otherwise, it returns 0.
519
-
520
- - Function: int check_regression_model(const struct model *model);
521
-
522
- This function returns 1 if the model is a regression model; otherwise
523
- it returns 0.
524
-
525
- - Function: int save_model(const char *model_file_name,
526
- const struct model *model_);
527
-
528
- This function saves a model to a file; returns 0 on success, or -1
529
- if an error occurs.
530
-
531
- - Function: struct model *load_model(const char *model_file_name);
532
-
533
- This function returns a pointer to the model read from the file,
534
- or a null pointer if the model could not be loaded.
535
-
536
- - Function: void free_model_content(struct model *model_ptr);
537
-
538
- This function frees the memory used by the entries in a model structure.
539
-
540
- - Function: void free_and_destroy_model(struct model **model_ptr_ptr);
541
-
542
- This function frees the memory used by a model and destroys the model
543
- structure.
544
-
545
- - Function: void destroy_param(struct parameter *param);
546
-
547
- This function frees the memory used by a parameter set.
548
-
549
- - Function: void set_print_string_function(void (*print_func)(const char *));
550
-
551
- Users can specify their output format by a function. Use
552
- set_print_string_function(NULL);
553
- for default printing to stdout.
554
-
555
- Building Windows Binaries
556
- =========================
557
-
558
- Windows binaries are in the directory `windows'. To build them via
559
- Visual C++, use the following steps:
560
-
561
- 1. Open a dos command box and change to liblinear directory. If
562
- environment variables of VC++ have not been set, type
563
-
564
- ""C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\amd64\vcvars64.bat""
565
-
566
- You may have to modify the above command according which version of
567
- VC++ or where it is installed.
568
-
569
- 2. Type
570
-
571
- nmake -f Makefile.win clean all
572
-
573
- 2. (Optional) To build 32-bit windows binaries, you must
574
- (1) Setup "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\vcvars32.bat" instead of vcvars64.bat
575
- (2) Change CFLAGS in Makefile.win: /D _WIN64 to /D _WIN32
576
-
577
- MATLAB/OCTAVE Interface
578
- =======================
579
-
580
- Please check the file README in the directory `matlab'.
581
-
582
- PYTHON Interface
583
- ================
584
-
585
- Please check the file README in the directory `python'.
586
-
587
- Additional Information
588
- ======================
589
-
590
- If you find LIBLINEAR helpful, please cite it as
591
-
592
- R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin.
593
- LIBLINEAR: A Library for Large Linear Classification, Journal of
594
- Machine Learning Research 9(2008), 1871-1874. Software available at
595
- http://www.csie.ntu.edu.tw/~cjlin/liblinear
596
-
597
- For any questions and comments, please send your email to
598
- cjlin@csie.ntu.edu.tw
599
-
600
-