liblinear-ruby 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/blasp.h +8 -8
  4. data/ext/daxpy.c +3 -3
  5. data/ext/ddot.c +3 -3
  6. data/ext/dnrm2.c +7 -7
  7. data/ext/dscal.c +4 -4
  8. data/ext/liblinear_wrap.cxx +382 -382
  9. data/ext/linear.cpp +44 -55
  10. data/ext/linear.h +5 -1
  11. data/ext/tron.cpp +13 -5
  12. data/ext/tron.h +1 -1
  13. data/lib/liblinear.rb +2 -0
  14. data/lib/liblinear/version.rb +1 -1
  15. metadata +2 -41
  16. data/liblinear-2.1/COPYRIGHT +0 -31
  17. data/liblinear-2.1/Makefile +0 -37
  18. data/liblinear-2.1/Makefile.win +0 -24
  19. data/liblinear-2.1/README +0 -600
  20. data/liblinear-2.1/blas/Makefile +0 -22
  21. data/liblinear-2.1/blas/blas.h +0 -25
  22. data/liblinear-2.1/blas/blasp.h +0 -438
  23. data/liblinear-2.1/blas/daxpy.c +0 -57
  24. data/liblinear-2.1/blas/ddot.c +0 -58
  25. data/liblinear-2.1/blas/dnrm2.c +0 -70
  26. data/liblinear-2.1/blas/dscal.c +0 -52
  27. data/liblinear-2.1/heart_scale +0 -270
  28. data/liblinear-2.1/linear.cpp +0 -3053
  29. data/liblinear-2.1/linear.def +0 -22
  30. data/liblinear-2.1/linear.h +0 -79
  31. data/liblinear-2.1/matlab/Makefile +0 -49
  32. data/liblinear-2.1/matlab/README +0 -208
  33. data/liblinear-2.1/matlab/libsvmread.c +0 -212
  34. data/liblinear-2.1/matlab/libsvmwrite.c +0 -119
  35. data/liblinear-2.1/matlab/linear_model_matlab.c +0 -176
  36. data/liblinear-2.1/matlab/linear_model_matlab.h +0 -2
  37. data/liblinear-2.1/matlab/make.m +0 -22
  38. data/liblinear-2.1/matlab/predict.c +0 -341
  39. data/liblinear-2.1/matlab/train.c +0 -492
  40. data/liblinear-2.1/predict.c +0 -243
  41. data/liblinear-2.1/python/Makefile +0 -4
  42. data/liblinear-2.1/python/README +0 -380
  43. data/liblinear-2.1/python/liblinear.py +0 -323
  44. data/liblinear-2.1/python/liblinearutil.py +0 -270
  45. data/liblinear-2.1/train.c +0 -449
  46. data/liblinear-2.1/tron.cpp +0 -241
  47. data/liblinear-2.1/tron.h +0 -35
  48. data/liblinear-2.1/windows/liblinear.dll +0 -0
  49. data/liblinear-2.1/windows/libsvmread.mexw64 +0 -0
  50. data/liblinear-2.1/windows/libsvmwrite.mexw64 +0 -0
  51. data/liblinear-2.1/windows/predict.exe +0 -0
  52. data/liblinear-2.1/windows/predict.mexw64 +0 -0
  53. data/liblinear-2.1/windows/train.exe +0 -0
  54. data/liblinear-2.1/windows/train.mexw64 +0 -0
@@ -1,323 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- from ctypes import *
4
- from ctypes.util import find_library
5
- from os import path
6
- import sys
7
-
8
- __all__ = ['liblinear', 'feature_node', 'gen_feature_nodearray', 'problem',
9
- 'parameter', 'model', 'toPyModel', 'L2R_LR', 'L2R_L2LOSS_SVC_DUAL',
10
- 'L2R_L2LOSS_SVC', 'L2R_L1LOSS_SVC_DUAL', 'MCSVM_CS',
11
- 'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL', 'L2R_L2LOSS_SVR',
12
- 'L2R_L2LOSS_SVR_DUAL', 'L2R_L1LOSS_SVR_DUAL', 'print_null']
13
-
14
- try:
15
- dirname = path.dirname(path.abspath(__file__))
16
- if sys.platform == 'win32':
17
- liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
18
- else:
19
- liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
20
- except:
21
- # For unix the prefix 'lib' is not considered.
22
- if find_library('linear'):
23
- liblinear = CDLL(find_library('linear'))
24
- elif find_library('liblinear'):
25
- liblinear = CDLL(find_library('liblinear'))
26
- else:
27
- raise Exception('LIBLINEAR library not found.')
28
-
29
- L2R_LR = 0
30
- L2R_L2LOSS_SVC_DUAL = 1
31
- L2R_L2LOSS_SVC = 2
32
- L2R_L1LOSS_SVC_DUAL = 3
33
- MCSVM_CS = 4
34
- L1R_L2LOSS_SVC = 5
35
- L1R_LR = 6
36
- L2R_LR_DUAL = 7
37
- L2R_L2LOSS_SVR = 11
38
- L2R_L2LOSS_SVR_DUAL = 12
39
- L2R_L1LOSS_SVR_DUAL = 13
40
-
41
- PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
42
- def print_null(s):
43
- return
44
-
45
- def genFields(names, types):
46
- return list(zip(names, types))
47
-
48
- def fillprototype(f, restype, argtypes):
49
- f.restype = restype
50
- f.argtypes = argtypes
51
-
52
- class feature_node(Structure):
53
- _names = ["index", "value"]
54
- _types = [c_int, c_double]
55
- _fields_ = genFields(_names, _types)
56
-
57
- def __str__(self):
58
- return '%d:%g' % (self.index, self.value)
59
-
60
- def gen_feature_nodearray(xi, feature_max=None, issparse=True):
61
- if isinstance(xi, dict):
62
- index_range = xi.keys()
63
- elif isinstance(xi, (list, tuple)):
64
- xi = [0] + xi # idx should start from 1
65
- index_range = range(1, len(xi))
66
- else:
67
- raise TypeError('xi should be a dictionary, list or tuple')
68
-
69
- if feature_max:
70
- assert(isinstance(feature_max, int))
71
- index_range = filter(lambda j: j <= feature_max, index_range)
72
- if issparse:
73
- index_range = filter(lambda j:xi[j] != 0, index_range)
74
-
75
- index_range = sorted(index_range)
76
- ret = (feature_node * (len(index_range)+2))()
77
- ret[-1].index = -1 # for bias term
78
- ret[-2].index = -1
79
- for idx, j in enumerate(index_range):
80
- ret[idx].index = j
81
- ret[idx].value = xi[j]
82
- max_idx = 0
83
- if index_range :
84
- max_idx = index_range[-1]
85
- return ret, max_idx
86
-
87
- class problem(Structure):
88
- _names = ["l", "n", "y", "x", "bias"]
89
- _types = [c_int, c_int, POINTER(c_double), POINTER(POINTER(feature_node)), c_double]
90
- _fields_ = genFields(_names, _types)
91
-
92
- def __init__(self, y, x, bias = -1):
93
- if len(y) != len(x) :
94
- raise ValueError("len(y) != len(x)")
95
- self.l = l = len(y)
96
- self.bias = -1
97
-
98
- max_idx = 0
99
- x_space = self.x_space = []
100
- for i, xi in enumerate(x):
101
- tmp_xi, tmp_idx = gen_feature_nodearray(xi)
102
- x_space += [tmp_xi]
103
- max_idx = max(max_idx, tmp_idx)
104
- self.n = max_idx
105
-
106
- self.y = (c_double * l)()
107
- for i, yi in enumerate(y): self.y[i] = y[i]
108
-
109
- self.x = (POINTER(feature_node) * l)()
110
- for i, xi in enumerate(self.x_space): self.x[i] = xi
111
-
112
- self.set_bias(bias)
113
-
114
- def set_bias(self, bias):
115
- if self.bias == bias:
116
- return
117
- if bias >= 0 and self.bias < 0:
118
- self.n += 1
119
- node = feature_node(self.n, bias)
120
- if bias < 0 and self.bias >= 0:
121
- self.n -= 1
122
- node = feature_node(-1, bias)
123
-
124
- for xi in self.x_space:
125
- xi[-2] = node
126
- self.bias = bias
127
-
128
-
129
- class parameter(Structure):
130
- _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
131
- _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
132
- _fields_ = genFields(_names, _types)
133
-
134
- def __init__(self, options = None):
135
- if options == None:
136
- options = ''
137
- self.parse_options(options)
138
-
139
- def __str__(self):
140
- s = ''
141
- attrs = parameter._names + list(self.__dict__.keys())
142
- values = map(lambda attr: getattr(self, attr), attrs)
143
- for attr, val in zip(attrs, values):
144
- s += (' %s: %s\n' % (attr, val))
145
- s = s.strip()
146
-
147
- return s
148
-
149
- def set_to_default_values(self):
150
- self.solver_type = L2R_L2LOSS_SVC_DUAL
151
- self.eps = float('inf')
152
- self.C = 1
153
- self.p = 0.1
154
- self.nr_weight = 0
155
- self.weight_label = None
156
- self.weight = None
157
- self.init_sol = None
158
- self.bias = -1
159
- self.flag_cross_validation = False
160
- self.flag_C_specified = False
161
- self.flag_solver_specified = False
162
- self.flag_find_C = False
163
- self.nr_fold = 0
164
- self.print_func = cast(None, PRINT_STRING_FUN)
165
-
166
- def parse_options(self, options):
167
- if isinstance(options, list):
168
- argv = options
169
- elif isinstance(options, str):
170
- argv = options.split()
171
- else:
172
- raise TypeError("arg 1 should be a list or a str.")
173
- self.set_to_default_values()
174
- self.print_func = cast(None, PRINT_STRING_FUN)
175
- weight_label = []
176
- weight = []
177
-
178
- i = 0
179
- while i < len(argv) :
180
- if argv[i] == "-s":
181
- i = i + 1
182
- self.solver_type = int(argv[i])
183
- self.flag_solver_specified = True
184
- elif argv[i] == "-c":
185
- i = i + 1
186
- self.C = float(argv[i])
187
- self.flag_C_specified = True
188
- elif argv[i] == "-p":
189
- i = i + 1
190
- self.p = float(argv[i])
191
- elif argv[i] == "-e":
192
- i = i + 1
193
- self.eps = float(argv[i])
194
- elif argv[i] == "-B":
195
- i = i + 1
196
- self.bias = float(argv[i])
197
- elif argv[i] == "-v":
198
- i = i + 1
199
- self.flag_cross_validation = 1
200
- self.nr_fold = int(argv[i])
201
- if self.nr_fold < 2 :
202
- raise ValueError("n-fold cross validation: n must >= 2")
203
- elif argv[i].startswith("-w"):
204
- i = i + 1
205
- self.nr_weight += 1
206
- weight_label += [int(argv[i-1][2:])]
207
- weight += [float(argv[i])]
208
- elif argv[i] == "-q":
209
- self.print_func = PRINT_STRING_FUN(print_null)
210
- elif argv[i] == "-C":
211
- self.flag_find_C = True
212
-
213
- else :
214
- raise ValueError("Wrong options")
215
- i += 1
216
-
217
- liblinear.set_print_string_function(self.print_func)
218
- self.weight_label = (c_int*self.nr_weight)()
219
- self.weight = (c_double*self.nr_weight)()
220
- for i in range(self.nr_weight):
221
- self.weight[i] = weight[i]
222
- self.weight_label[i] = weight_label[i]
223
-
224
- # default solver for parameter selection is L2R_L2LOSS_SVC
225
- if self.flag_find_C:
226
- if not self.flag_cross_validation:
227
- self.nr_fold = 5
228
- if not self.flag_solver_specified:
229
- self.solver_type = L2R_L2LOSS_SVC
230
- self.flag_solver_specified = True
231
- elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
232
- raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
233
-
234
- if self.eps == float('inf'):
235
- if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
236
- self.eps = 0.01
237
- elif self.solver_type in [L2R_L2LOSS_SVR]:
238
- self.eps = 0.001
239
- elif self.solver_type in [L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L2R_LR_DUAL]:
240
- self.eps = 0.1
241
- elif self.solver_type in [L1R_L2LOSS_SVC, L1R_LR]:
242
- self.eps = 0.01
243
- elif self.solver_type in [L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
244
- self.eps = 0.1
245
-
246
- class model(Structure):
247
- _names = ["param", "nr_class", "nr_feature", "w", "label", "bias"]
248
- _types = [parameter, c_int, c_int, POINTER(c_double), POINTER(c_int), c_double]
249
- _fields_ = genFields(_names, _types)
250
-
251
- def __init__(self):
252
- self.__createfrom__ = 'python'
253
-
254
- def __del__(self):
255
- # free memory created by C to avoid memory leak
256
- if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
257
- liblinear.free_and_destroy_model(pointer(self))
258
-
259
- def get_nr_feature(self):
260
- return liblinear.get_nr_feature(self)
261
-
262
- def get_nr_class(self):
263
- return liblinear.get_nr_class(self)
264
-
265
- def get_labels(self):
266
- nr_class = self.get_nr_class()
267
- labels = (c_int * nr_class)()
268
- liblinear.get_labels(self, labels)
269
- return labels[:nr_class]
270
-
271
- def get_decfun_coef(self, feat_idx, label_idx=0):
272
- return liblinear.get_decfun_coef(self, feat_idx, label_idx)
273
-
274
- def get_decfun_bias(self, label_idx=0):
275
- return liblinear.get_decfun_bias(self, label_idx)
276
-
277
- def get_decfun(self, label_idx=0):
278
- w = [liblinear.get_decfun_coef(self, feat_idx, label_idx) for feat_idx in range(1, self.nr_feature+1)]
279
- b = liblinear.get_decfun_bias(self, label_idx)
280
- return (w, b)
281
-
282
- def is_probability_model(self):
283
- return (liblinear.check_probability_model(self) == 1)
284
-
285
- def is_regression_model(self):
286
- return (liblinear.check_regression_model(self) == 1)
287
-
288
- def toPyModel(model_ptr):
289
- """
290
- toPyModel(model_ptr) -> model
291
-
292
- Convert a ctypes POINTER(model) to a Python model
293
- """
294
- if bool(model_ptr) == False:
295
- raise ValueError("Null pointer")
296
- m = model_ptr.contents
297
- m.__createfrom__ = 'C'
298
- return m
299
-
300
- fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
301
- fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
302
- fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
303
-
304
- fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
305
- fillprototype(liblinear.predict, c_double, [POINTER(model), POINTER(feature_node)])
306
- fillprototype(liblinear.predict_probability, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
307
-
308
- fillprototype(liblinear.save_model, c_int, [c_char_p, POINTER(model)])
309
- fillprototype(liblinear.load_model, POINTER(model), [c_char_p])
310
-
311
- fillprototype(liblinear.get_nr_feature, c_int, [POINTER(model)])
312
- fillprototype(liblinear.get_nr_class, c_int, [POINTER(model)])
313
- fillprototype(liblinear.get_labels, None, [POINTER(model), POINTER(c_int)])
314
- fillprototype(liblinear.get_decfun_coef, c_double, [POINTER(model), c_int, c_int])
315
- fillprototype(liblinear.get_decfun_bias, c_double, [POINTER(model), c_int])
316
-
317
- fillprototype(liblinear.free_model_content, None, [POINTER(model)])
318
- fillprototype(liblinear.free_and_destroy_model, None, [POINTER(POINTER(model))])
319
- fillprototype(liblinear.destroy_param, None, [POINTER(parameter)])
320
- fillprototype(liblinear.check_parameter, c_char_p, [POINTER(problem), POINTER(parameter)])
321
- fillprototype(liblinear.check_probability_model, c_int, [POINTER(model)])
322
- fillprototype(liblinear.check_regression_model, c_int, [POINTER(model)])
323
- fillprototype(liblinear.set_print_string_function, None, [CFUNCTYPE(None, c_char_p)])
@@ -1,270 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- import os, sys
4
- sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
5
- from liblinear import *
6
- from liblinear import __all__ as liblinear_all
7
- from ctypes import c_double
8
-
9
- __all__ = ['svm_read_problem', 'load_model', 'save_model', 'evaluations',
10
- 'train', 'predict'] + liblinear_all
11
-
12
-
13
- def svm_read_problem(data_file_name):
14
- """
15
- svm_read_problem(data_file_name) -> [y, x]
16
-
17
- Read LIBSVM-format data from data_file_name and return labels y
18
- and data instances x.
19
- """
20
- prob_y = []
21
- prob_x = []
22
- for line in open(data_file_name):
23
- line = line.split(None, 1)
24
- # In case an instance with all zero features
25
- if len(line) == 1: line += ['']
26
- label, features = line
27
- xi = {}
28
- for e in features.split():
29
- ind, val = e.split(":")
30
- xi[int(ind)] = float(val)
31
- prob_y += [float(label)]
32
- prob_x += [xi]
33
- return (prob_y, prob_x)
34
-
35
- def load_model(model_file_name):
36
- """
37
- load_model(model_file_name) -> model
38
-
39
- Load a LIBLINEAR model from model_file_name and return.
40
- """
41
- model = liblinear.load_model(model_file_name.encode())
42
- if not model:
43
- print("can't open model file %s" % model_file_name)
44
- return None
45
- model = toPyModel(model)
46
- return model
47
-
48
- def save_model(model_file_name, model):
49
- """
50
- save_model(model_file_name, model) -> None
51
-
52
- Save a LIBLINEAR model to the file model_file_name.
53
- """
54
- liblinear.save_model(model_file_name.encode(), model)
55
-
56
- def evaluations(ty, pv):
57
- """
58
- evaluations(ty, pv) -> (ACC, MSE, SCC)
59
-
60
- Calculate accuracy, mean squared error and squared correlation coefficient
61
- using the true values (ty) and predicted values (pv).
62
- """
63
- if len(ty) != len(pv):
64
- raise ValueError("len(ty) must equal to len(pv)")
65
- total_correct = total_error = 0
66
- sumv = sumy = sumvv = sumyy = sumvy = 0
67
- for v, y in zip(pv, ty):
68
- if y == v:
69
- total_correct += 1
70
- total_error += (v-y)*(v-y)
71
- sumv += v
72
- sumy += y
73
- sumvv += v*v
74
- sumyy += y*y
75
- sumvy += v*y
76
- l = len(ty)
77
- ACC = 100.0*total_correct/l
78
- MSE = total_error/l
79
- try:
80
- SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
81
- except:
82
- SCC = float('nan')
83
- return (ACC, MSE, SCC)
84
-
85
- def train(arg1, arg2=None, arg3=None):
86
- """
87
- train(y, x [, options]) -> model | ACC
88
- train(prob [, options]) -> model | ACC
89
- train(prob, param) -> model | ACC
90
-
91
- Train a model from data (y, x) or a problem prob using
92
- 'options' or a parameter param.
93
- If '-v' is specified in 'options' (i.e., cross validation)
94
- either accuracy (ACC) or mean-squared error (MSE) is returned.
95
-
96
- options:
97
- -s type : set type of solver (default 1)
98
- for multi-class classification
99
- 0 -- L2-regularized logistic regression (primal)
100
- 1 -- L2-regularized L2-loss support vector classification (dual)
101
- 2 -- L2-regularized L2-loss support vector classification (primal)
102
- 3 -- L2-regularized L1-loss support vector classification (dual)
103
- 4 -- support vector classification by Crammer and Singer
104
- 5 -- L1-regularized L2-loss support vector classification
105
- 6 -- L1-regularized logistic regression
106
- 7 -- L2-regularized logistic regression (dual)
107
- for regression
108
- 11 -- L2-regularized L2-loss support vector regression (primal)
109
- 12 -- L2-regularized L2-loss support vector regression (dual)
110
- 13 -- L2-regularized L1-loss support vector regression (dual)
111
- -c cost : set the parameter C (default 1)
112
- -p epsilon : set the epsilon in loss function of SVR (default 0.1)
113
- -e epsilon : set tolerance of termination criterion
114
- -s 0 and 2
115
- |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
116
- where f is the primal function, (default 0.01)
117
- -s 11
118
- |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
119
- -s 1, 3, 4, and 7
120
- Dual maximal violation <= eps; similar to liblinear (default 0.)
121
- -s 5 and 6
122
- |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
123
- where f is the primal function (default 0.01)
124
- -s 12 and 13
125
- |f'(alpha)|_1 <= eps |f'(alpha0)|,
126
- where f is the dual function (default 0.1)
127
- -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
128
- -wi weight: weights adjust the parameter C of different classes (see README for details)
129
- -v n: n-fold cross validation mode
130
- -q : quiet mode (no outputs)
131
- """
132
- prob, param = None, None
133
- if isinstance(arg1, (list, tuple)):
134
- assert isinstance(arg2, (list, tuple))
135
- y, x, options = arg1, arg2, arg3
136
- prob = problem(y, x)
137
- param = parameter(options)
138
- elif isinstance(arg1, problem):
139
- prob = arg1
140
- if isinstance(arg2, parameter):
141
- param = arg2
142
- else :
143
- param = parameter(arg2)
144
- if prob == None or param == None :
145
- raise TypeError("Wrong types for the arguments")
146
-
147
- prob.set_bias(param.bias)
148
- liblinear.set_print_string_function(param.print_func)
149
- err_msg = liblinear.check_parameter(prob, param)
150
- if err_msg :
151
- raise ValueError('Error: %s' % err_msg)
152
-
153
- if param.flag_find_C:
154
- nr_fold = param.nr_fold
155
- best_C = c_double()
156
- best_rate = c_double()
157
- max_C = 1024
158
- if param.flag_C_specified:
159
- start_C = param.C
160
- else:
161
- start_C = -1.0
162
- liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
163
- print("Best C = %lf CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
164
- return best_C.value,best_rate.value
165
-
166
-
167
- elif param.flag_cross_validation:
168
- l, nr_fold = prob.l, param.nr_fold
169
- target = (c_double * l)()
170
- liblinear.cross_validation(prob, param, nr_fold, target)
171
- ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
172
- if param.solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
173
- print("Cross Validation Mean squared error = %g" % MSE)
174
- print("Cross Validation Squared correlation coefficient = %g" % SCC)
175
- return MSE
176
- else:
177
- print("Cross Validation Accuracy = %g%%" % ACC)
178
- return ACC
179
- else :
180
- m = liblinear.train(prob, param)
181
- m = toPyModel(m)
182
-
183
- return m
184
-
185
- def predict(y, x, m, options=""):
186
- """
187
- predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
188
-
189
- Predict data (y, x) with the SVM model m.
190
- options:
191
- -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
192
- -q quiet mode (no outputs)
193
-
194
- The return tuple contains
195
- p_labels: a list of predicted labels
196
- p_acc: a tuple including accuracy (for classification), mean-squared
197
- error, and squared correlation coefficient (for regression).
198
- p_vals: a list of decision values or probability estimates (if '-b 1'
199
- is specified). If k is the number of classes, for decision values,
200
- each element includes results of predicting k binary-class
201
- SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value
202
- is returned. For probabilities, each element contains k values
203
- indicating the probability that the testing instance is in each class.
204
- Note that the order of classes here is the same as 'model.label'
205
- field in the model structure.
206
- """
207
-
208
- def info(s):
209
- print(s)
210
-
211
- predict_probability = 0
212
- argv = options.split()
213
- i = 0
214
- while i < len(argv):
215
- if argv[i] == '-b':
216
- i += 1
217
- predict_probability = int(argv[i])
218
- elif argv[i] == '-q':
219
- info = print_null
220
- else:
221
- raise ValueError("Wrong options")
222
- i+=1
223
-
224
- solver_type = m.param.solver_type
225
- nr_class = m.get_nr_class()
226
- nr_feature = m.get_nr_feature()
227
- is_prob_model = m.is_probability_model()
228
- bias = m.bias
229
- if bias >= 0:
230
- biasterm = feature_node(nr_feature+1, bias)
231
- else:
232
- biasterm = feature_node(-1, bias)
233
- pred_labels = []
234
- pred_values = []
235
-
236
- if predict_probability:
237
- if not is_prob_model:
238
- raise TypeError('probability output is only supported for logistic regression')
239
- prob_estimates = (c_double * nr_class)()
240
- for xi in x:
241
- xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
242
- xi[-2] = biasterm
243
- label = liblinear.predict_probability(m, xi, prob_estimates)
244
- values = prob_estimates[:nr_class]
245
- pred_labels += [label]
246
- pred_values += [values]
247
- else:
248
- if nr_class <= 2:
249
- nr_classifier = 1
250
- else:
251
- nr_classifier = nr_class
252
- dec_values = (c_double * nr_classifier)()
253
- for xi in x:
254
- xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
255
- xi[-2] = biasterm
256
- label = liblinear.predict_values(m, xi, dec_values)
257
- values = dec_values[:nr_classifier]
258
- pred_labels += [label]
259
- pred_values += [values]
260
- if len(y) == 0:
261
- y = [0] * len(x)
262
- ACC, MSE, SCC = evaluations(y, pred_labels)
263
- l = len(y)
264
- if m.is_regression_model():
265
- info("Mean squared error = %g (regression)" % MSE)
266
- info("Squared correlation coefficient = %g (regression)" % SCC)
267
- else:
268
- info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
269
-
270
- return pred_labels, (ACC, MSE, SCC), pred_values