liblinear-ruby 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/blasp.h +8 -8
- data/ext/daxpy.c +3 -3
- data/ext/ddot.c +3 -3
- data/ext/dnrm2.c +7 -7
- data/ext/dscal.c +4 -4
- data/ext/liblinear_wrap.cxx +382 -382
- data/ext/linear.cpp +44 -55
- data/ext/linear.h +5 -1
- data/ext/tron.cpp +13 -5
- data/ext/tron.h +1 -1
- data/lib/liblinear.rb +2 -0
- data/lib/liblinear/version.rb +1 -1
- metadata +2 -41
- data/liblinear-2.1/COPYRIGHT +0 -31
- data/liblinear-2.1/Makefile +0 -37
- data/liblinear-2.1/Makefile.win +0 -24
- data/liblinear-2.1/README +0 -600
- data/liblinear-2.1/blas/Makefile +0 -22
- data/liblinear-2.1/blas/blas.h +0 -25
- data/liblinear-2.1/blas/blasp.h +0 -438
- data/liblinear-2.1/blas/daxpy.c +0 -57
- data/liblinear-2.1/blas/ddot.c +0 -58
- data/liblinear-2.1/blas/dnrm2.c +0 -70
- data/liblinear-2.1/blas/dscal.c +0 -52
- data/liblinear-2.1/heart_scale +0 -270
- data/liblinear-2.1/linear.cpp +0 -3053
- data/liblinear-2.1/linear.def +0 -22
- data/liblinear-2.1/linear.h +0 -79
- data/liblinear-2.1/matlab/Makefile +0 -49
- data/liblinear-2.1/matlab/README +0 -208
- data/liblinear-2.1/matlab/libsvmread.c +0 -212
- data/liblinear-2.1/matlab/libsvmwrite.c +0 -119
- data/liblinear-2.1/matlab/linear_model_matlab.c +0 -176
- data/liblinear-2.1/matlab/linear_model_matlab.h +0 -2
- data/liblinear-2.1/matlab/make.m +0 -22
- data/liblinear-2.1/matlab/predict.c +0 -341
- data/liblinear-2.1/matlab/train.c +0 -492
- data/liblinear-2.1/predict.c +0 -243
- data/liblinear-2.1/python/Makefile +0 -4
- data/liblinear-2.1/python/README +0 -380
- data/liblinear-2.1/python/liblinear.py +0 -323
- data/liblinear-2.1/python/liblinearutil.py +0 -270
- data/liblinear-2.1/train.c +0 -449
- data/liblinear-2.1/tron.cpp +0 -241
- data/liblinear-2.1/tron.h +0 -35
- data/liblinear-2.1/windows/liblinear.dll +0 -0
- data/liblinear-2.1/windows/libsvmread.mexw64 +0 -0
- data/liblinear-2.1/windows/libsvmwrite.mexw64 +0 -0
- data/liblinear-2.1/windows/predict.exe +0 -0
- data/liblinear-2.1/windows/predict.mexw64 +0 -0
- data/liblinear-2.1/windows/train.exe +0 -0
- data/liblinear-2.1/windows/train.mexw64 +0 -0
@@ -1,323 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
|
3
|
-
from ctypes import *
|
4
|
-
from ctypes.util import find_library
|
5
|
-
from os import path
|
6
|
-
import sys
|
7
|
-
|
8
|
-
__all__ = ['liblinear', 'feature_node', 'gen_feature_nodearray', 'problem',
|
9
|
-
'parameter', 'model', 'toPyModel', 'L2R_LR', 'L2R_L2LOSS_SVC_DUAL',
|
10
|
-
'L2R_L2LOSS_SVC', 'L2R_L1LOSS_SVC_DUAL', 'MCSVM_CS',
|
11
|
-
'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL', 'L2R_L2LOSS_SVR',
|
12
|
-
'L2R_L2LOSS_SVR_DUAL', 'L2R_L1LOSS_SVR_DUAL', 'print_null']
|
13
|
-
|
14
|
-
try:
|
15
|
-
dirname = path.dirname(path.abspath(__file__))
|
16
|
-
if sys.platform == 'win32':
|
17
|
-
liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
|
18
|
-
else:
|
19
|
-
liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
|
20
|
-
except:
|
21
|
-
# For unix the prefix 'lib' is not considered.
|
22
|
-
if find_library('linear'):
|
23
|
-
liblinear = CDLL(find_library('linear'))
|
24
|
-
elif find_library('liblinear'):
|
25
|
-
liblinear = CDLL(find_library('liblinear'))
|
26
|
-
else:
|
27
|
-
raise Exception('LIBLINEAR library not found.')
|
28
|
-
|
29
|
-
L2R_LR = 0
|
30
|
-
L2R_L2LOSS_SVC_DUAL = 1
|
31
|
-
L2R_L2LOSS_SVC = 2
|
32
|
-
L2R_L1LOSS_SVC_DUAL = 3
|
33
|
-
MCSVM_CS = 4
|
34
|
-
L1R_L2LOSS_SVC = 5
|
35
|
-
L1R_LR = 6
|
36
|
-
L2R_LR_DUAL = 7
|
37
|
-
L2R_L2LOSS_SVR = 11
|
38
|
-
L2R_L2LOSS_SVR_DUAL = 12
|
39
|
-
L2R_L1LOSS_SVR_DUAL = 13
|
40
|
-
|
41
|
-
PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
|
42
|
-
def print_null(s):
|
43
|
-
return
|
44
|
-
|
45
|
-
def genFields(names, types):
|
46
|
-
return list(zip(names, types))
|
47
|
-
|
48
|
-
def fillprototype(f, restype, argtypes):
|
49
|
-
f.restype = restype
|
50
|
-
f.argtypes = argtypes
|
51
|
-
|
52
|
-
class feature_node(Structure):
|
53
|
-
_names = ["index", "value"]
|
54
|
-
_types = [c_int, c_double]
|
55
|
-
_fields_ = genFields(_names, _types)
|
56
|
-
|
57
|
-
def __str__(self):
|
58
|
-
return '%d:%g' % (self.index, self.value)
|
59
|
-
|
60
|
-
def gen_feature_nodearray(xi, feature_max=None, issparse=True):
|
61
|
-
if isinstance(xi, dict):
|
62
|
-
index_range = xi.keys()
|
63
|
-
elif isinstance(xi, (list, tuple)):
|
64
|
-
xi = [0] + xi # idx should start from 1
|
65
|
-
index_range = range(1, len(xi))
|
66
|
-
else:
|
67
|
-
raise TypeError('xi should be a dictionary, list or tuple')
|
68
|
-
|
69
|
-
if feature_max:
|
70
|
-
assert(isinstance(feature_max, int))
|
71
|
-
index_range = filter(lambda j: j <= feature_max, index_range)
|
72
|
-
if issparse:
|
73
|
-
index_range = filter(lambda j:xi[j] != 0, index_range)
|
74
|
-
|
75
|
-
index_range = sorted(index_range)
|
76
|
-
ret = (feature_node * (len(index_range)+2))()
|
77
|
-
ret[-1].index = -1 # for bias term
|
78
|
-
ret[-2].index = -1
|
79
|
-
for idx, j in enumerate(index_range):
|
80
|
-
ret[idx].index = j
|
81
|
-
ret[idx].value = xi[j]
|
82
|
-
max_idx = 0
|
83
|
-
if index_range :
|
84
|
-
max_idx = index_range[-1]
|
85
|
-
return ret, max_idx
|
86
|
-
|
87
|
-
class problem(Structure):
|
88
|
-
_names = ["l", "n", "y", "x", "bias"]
|
89
|
-
_types = [c_int, c_int, POINTER(c_double), POINTER(POINTER(feature_node)), c_double]
|
90
|
-
_fields_ = genFields(_names, _types)
|
91
|
-
|
92
|
-
def __init__(self, y, x, bias = -1):
|
93
|
-
if len(y) != len(x) :
|
94
|
-
raise ValueError("len(y) != len(x)")
|
95
|
-
self.l = l = len(y)
|
96
|
-
self.bias = -1
|
97
|
-
|
98
|
-
max_idx = 0
|
99
|
-
x_space = self.x_space = []
|
100
|
-
for i, xi in enumerate(x):
|
101
|
-
tmp_xi, tmp_idx = gen_feature_nodearray(xi)
|
102
|
-
x_space += [tmp_xi]
|
103
|
-
max_idx = max(max_idx, tmp_idx)
|
104
|
-
self.n = max_idx
|
105
|
-
|
106
|
-
self.y = (c_double * l)()
|
107
|
-
for i, yi in enumerate(y): self.y[i] = y[i]
|
108
|
-
|
109
|
-
self.x = (POINTER(feature_node) * l)()
|
110
|
-
for i, xi in enumerate(self.x_space): self.x[i] = xi
|
111
|
-
|
112
|
-
self.set_bias(bias)
|
113
|
-
|
114
|
-
def set_bias(self, bias):
|
115
|
-
if self.bias == bias:
|
116
|
-
return
|
117
|
-
if bias >= 0 and self.bias < 0:
|
118
|
-
self.n += 1
|
119
|
-
node = feature_node(self.n, bias)
|
120
|
-
if bias < 0 and self.bias >= 0:
|
121
|
-
self.n -= 1
|
122
|
-
node = feature_node(-1, bias)
|
123
|
-
|
124
|
-
for xi in self.x_space:
|
125
|
-
xi[-2] = node
|
126
|
-
self.bias = bias
|
127
|
-
|
128
|
-
|
129
|
-
class parameter(Structure):
|
130
|
-
_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
|
131
|
-
_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
|
132
|
-
_fields_ = genFields(_names, _types)
|
133
|
-
|
134
|
-
def __init__(self, options = None):
|
135
|
-
if options == None:
|
136
|
-
options = ''
|
137
|
-
self.parse_options(options)
|
138
|
-
|
139
|
-
def __str__(self):
|
140
|
-
s = ''
|
141
|
-
attrs = parameter._names + list(self.__dict__.keys())
|
142
|
-
values = map(lambda attr: getattr(self, attr), attrs)
|
143
|
-
for attr, val in zip(attrs, values):
|
144
|
-
s += (' %s: %s\n' % (attr, val))
|
145
|
-
s = s.strip()
|
146
|
-
|
147
|
-
return s
|
148
|
-
|
149
|
-
def set_to_default_values(self):
|
150
|
-
self.solver_type = L2R_L2LOSS_SVC_DUAL
|
151
|
-
self.eps = float('inf')
|
152
|
-
self.C = 1
|
153
|
-
self.p = 0.1
|
154
|
-
self.nr_weight = 0
|
155
|
-
self.weight_label = None
|
156
|
-
self.weight = None
|
157
|
-
self.init_sol = None
|
158
|
-
self.bias = -1
|
159
|
-
self.flag_cross_validation = False
|
160
|
-
self.flag_C_specified = False
|
161
|
-
self.flag_solver_specified = False
|
162
|
-
self.flag_find_C = False
|
163
|
-
self.nr_fold = 0
|
164
|
-
self.print_func = cast(None, PRINT_STRING_FUN)
|
165
|
-
|
166
|
-
def parse_options(self, options):
|
167
|
-
if isinstance(options, list):
|
168
|
-
argv = options
|
169
|
-
elif isinstance(options, str):
|
170
|
-
argv = options.split()
|
171
|
-
else:
|
172
|
-
raise TypeError("arg 1 should be a list or a str.")
|
173
|
-
self.set_to_default_values()
|
174
|
-
self.print_func = cast(None, PRINT_STRING_FUN)
|
175
|
-
weight_label = []
|
176
|
-
weight = []
|
177
|
-
|
178
|
-
i = 0
|
179
|
-
while i < len(argv) :
|
180
|
-
if argv[i] == "-s":
|
181
|
-
i = i + 1
|
182
|
-
self.solver_type = int(argv[i])
|
183
|
-
self.flag_solver_specified = True
|
184
|
-
elif argv[i] == "-c":
|
185
|
-
i = i + 1
|
186
|
-
self.C = float(argv[i])
|
187
|
-
self.flag_C_specified = True
|
188
|
-
elif argv[i] == "-p":
|
189
|
-
i = i + 1
|
190
|
-
self.p = float(argv[i])
|
191
|
-
elif argv[i] == "-e":
|
192
|
-
i = i + 1
|
193
|
-
self.eps = float(argv[i])
|
194
|
-
elif argv[i] == "-B":
|
195
|
-
i = i + 1
|
196
|
-
self.bias = float(argv[i])
|
197
|
-
elif argv[i] == "-v":
|
198
|
-
i = i + 1
|
199
|
-
self.flag_cross_validation = 1
|
200
|
-
self.nr_fold = int(argv[i])
|
201
|
-
if self.nr_fold < 2 :
|
202
|
-
raise ValueError("n-fold cross validation: n must >= 2")
|
203
|
-
elif argv[i].startswith("-w"):
|
204
|
-
i = i + 1
|
205
|
-
self.nr_weight += 1
|
206
|
-
weight_label += [int(argv[i-1][2:])]
|
207
|
-
weight += [float(argv[i])]
|
208
|
-
elif argv[i] == "-q":
|
209
|
-
self.print_func = PRINT_STRING_FUN(print_null)
|
210
|
-
elif argv[i] == "-C":
|
211
|
-
self.flag_find_C = True
|
212
|
-
|
213
|
-
else :
|
214
|
-
raise ValueError("Wrong options")
|
215
|
-
i += 1
|
216
|
-
|
217
|
-
liblinear.set_print_string_function(self.print_func)
|
218
|
-
self.weight_label = (c_int*self.nr_weight)()
|
219
|
-
self.weight = (c_double*self.nr_weight)()
|
220
|
-
for i in range(self.nr_weight):
|
221
|
-
self.weight[i] = weight[i]
|
222
|
-
self.weight_label[i] = weight_label[i]
|
223
|
-
|
224
|
-
# default solver for parameter selection is L2R_L2LOSS_SVC
|
225
|
-
if self.flag_find_C:
|
226
|
-
if not self.flag_cross_validation:
|
227
|
-
self.nr_fold = 5
|
228
|
-
if not self.flag_solver_specified:
|
229
|
-
self.solver_type = L2R_L2LOSS_SVC
|
230
|
-
self.flag_solver_specified = True
|
231
|
-
elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
|
232
|
-
raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
|
233
|
-
|
234
|
-
if self.eps == float('inf'):
|
235
|
-
if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
|
236
|
-
self.eps = 0.01
|
237
|
-
elif self.solver_type in [L2R_L2LOSS_SVR]:
|
238
|
-
self.eps = 0.001
|
239
|
-
elif self.solver_type in [L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L2R_LR_DUAL]:
|
240
|
-
self.eps = 0.1
|
241
|
-
elif self.solver_type in [L1R_L2LOSS_SVC, L1R_LR]:
|
242
|
-
self.eps = 0.01
|
243
|
-
elif self.solver_type in [L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
|
244
|
-
self.eps = 0.1
|
245
|
-
|
246
|
-
class model(Structure):
|
247
|
-
_names = ["param", "nr_class", "nr_feature", "w", "label", "bias"]
|
248
|
-
_types = [parameter, c_int, c_int, POINTER(c_double), POINTER(c_int), c_double]
|
249
|
-
_fields_ = genFields(_names, _types)
|
250
|
-
|
251
|
-
def __init__(self):
|
252
|
-
self.__createfrom__ = 'python'
|
253
|
-
|
254
|
-
def __del__(self):
|
255
|
-
# free memory created by C to avoid memory leak
|
256
|
-
if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
|
257
|
-
liblinear.free_and_destroy_model(pointer(self))
|
258
|
-
|
259
|
-
def get_nr_feature(self):
|
260
|
-
return liblinear.get_nr_feature(self)
|
261
|
-
|
262
|
-
def get_nr_class(self):
|
263
|
-
return liblinear.get_nr_class(self)
|
264
|
-
|
265
|
-
def get_labels(self):
|
266
|
-
nr_class = self.get_nr_class()
|
267
|
-
labels = (c_int * nr_class)()
|
268
|
-
liblinear.get_labels(self, labels)
|
269
|
-
return labels[:nr_class]
|
270
|
-
|
271
|
-
def get_decfun_coef(self, feat_idx, label_idx=0):
|
272
|
-
return liblinear.get_decfun_coef(self, feat_idx, label_idx)
|
273
|
-
|
274
|
-
def get_decfun_bias(self, label_idx=0):
|
275
|
-
return liblinear.get_decfun_bias(self, label_idx)
|
276
|
-
|
277
|
-
def get_decfun(self, label_idx=0):
|
278
|
-
w = [liblinear.get_decfun_coef(self, feat_idx, label_idx) for feat_idx in range(1, self.nr_feature+1)]
|
279
|
-
b = liblinear.get_decfun_bias(self, label_idx)
|
280
|
-
return (w, b)
|
281
|
-
|
282
|
-
def is_probability_model(self):
|
283
|
-
return (liblinear.check_probability_model(self) == 1)
|
284
|
-
|
285
|
-
def is_regression_model(self):
|
286
|
-
return (liblinear.check_regression_model(self) == 1)
|
287
|
-
|
288
|
-
def toPyModel(model_ptr):
|
289
|
-
"""
|
290
|
-
toPyModel(model_ptr) -> model
|
291
|
-
|
292
|
-
Convert a ctypes POINTER(model) to a Python model
|
293
|
-
"""
|
294
|
-
if bool(model_ptr) == False:
|
295
|
-
raise ValueError("Null pointer")
|
296
|
-
m = model_ptr.contents
|
297
|
-
m.__createfrom__ = 'C'
|
298
|
-
return m
|
299
|
-
|
300
|
-
fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
|
301
|
-
fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
|
302
|
-
fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
|
303
|
-
|
304
|
-
fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
|
305
|
-
fillprototype(liblinear.predict, c_double, [POINTER(model), POINTER(feature_node)])
|
306
|
-
fillprototype(liblinear.predict_probability, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
|
307
|
-
|
308
|
-
fillprototype(liblinear.save_model, c_int, [c_char_p, POINTER(model)])
|
309
|
-
fillprototype(liblinear.load_model, POINTER(model), [c_char_p])
|
310
|
-
|
311
|
-
fillprototype(liblinear.get_nr_feature, c_int, [POINTER(model)])
|
312
|
-
fillprototype(liblinear.get_nr_class, c_int, [POINTER(model)])
|
313
|
-
fillprototype(liblinear.get_labels, None, [POINTER(model), POINTER(c_int)])
|
314
|
-
fillprototype(liblinear.get_decfun_coef, c_double, [POINTER(model), c_int, c_int])
|
315
|
-
fillprototype(liblinear.get_decfun_bias, c_double, [POINTER(model), c_int])
|
316
|
-
|
317
|
-
fillprototype(liblinear.free_model_content, None, [POINTER(model)])
|
318
|
-
fillprototype(liblinear.free_and_destroy_model, None, [POINTER(POINTER(model))])
|
319
|
-
fillprototype(liblinear.destroy_param, None, [POINTER(parameter)])
|
320
|
-
fillprototype(liblinear.check_parameter, c_char_p, [POINTER(problem), POINTER(parameter)])
|
321
|
-
fillprototype(liblinear.check_probability_model, c_int, [POINTER(model)])
|
322
|
-
fillprototype(liblinear.check_regression_model, c_int, [POINTER(model)])
|
323
|
-
fillprototype(liblinear.set_print_string_function, None, [CFUNCTYPE(None, c_char_p)])
|
@@ -1,270 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
|
3
|
-
import os, sys
|
4
|
-
sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
|
5
|
-
from liblinear import *
|
6
|
-
from liblinear import __all__ as liblinear_all
|
7
|
-
from ctypes import c_double
|
8
|
-
|
9
|
-
__all__ = ['svm_read_problem', 'load_model', 'save_model', 'evaluations',
|
10
|
-
'train', 'predict'] + liblinear_all
|
11
|
-
|
12
|
-
|
13
|
-
def svm_read_problem(data_file_name):
|
14
|
-
"""
|
15
|
-
svm_read_problem(data_file_name) -> [y, x]
|
16
|
-
|
17
|
-
Read LIBSVM-format data from data_file_name and return labels y
|
18
|
-
and data instances x.
|
19
|
-
"""
|
20
|
-
prob_y = []
|
21
|
-
prob_x = []
|
22
|
-
for line in open(data_file_name):
|
23
|
-
line = line.split(None, 1)
|
24
|
-
# In case an instance with all zero features
|
25
|
-
if len(line) == 1: line += ['']
|
26
|
-
label, features = line
|
27
|
-
xi = {}
|
28
|
-
for e in features.split():
|
29
|
-
ind, val = e.split(":")
|
30
|
-
xi[int(ind)] = float(val)
|
31
|
-
prob_y += [float(label)]
|
32
|
-
prob_x += [xi]
|
33
|
-
return (prob_y, prob_x)
|
34
|
-
|
35
|
-
def load_model(model_file_name):
|
36
|
-
"""
|
37
|
-
load_model(model_file_name) -> model
|
38
|
-
|
39
|
-
Load a LIBLINEAR model from model_file_name and return.
|
40
|
-
"""
|
41
|
-
model = liblinear.load_model(model_file_name.encode())
|
42
|
-
if not model:
|
43
|
-
print("can't open model file %s" % model_file_name)
|
44
|
-
return None
|
45
|
-
model = toPyModel(model)
|
46
|
-
return model
|
47
|
-
|
48
|
-
def save_model(model_file_name, model):
|
49
|
-
"""
|
50
|
-
save_model(model_file_name, model) -> None
|
51
|
-
|
52
|
-
Save a LIBLINEAR model to the file model_file_name.
|
53
|
-
"""
|
54
|
-
liblinear.save_model(model_file_name.encode(), model)
|
55
|
-
|
56
|
-
def evaluations(ty, pv):
|
57
|
-
"""
|
58
|
-
evaluations(ty, pv) -> (ACC, MSE, SCC)
|
59
|
-
|
60
|
-
Calculate accuracy, mean squared error and squared correlation coefficient
|
61
|
-
using the true values (ty) and predicted values (pv).
|
62
|
-
"""
|
63
|
-
if len(ty) != len(pv):
|
64
|
-
raise ValueError("len(ty) must equal to len(pv)")
|
65
|
-
total_correct = total_error = 0
|
66
|
-
sumv = sumy = sumvv = sumyy = sumvy = 0
|
67
|
-
for v, y in zip(pv, ty):
|
68
|
-
if y == v:
|
69
|
-
total_correct += 1
|
70
|
-
total_error += (v-y)*(v-y)
|
71
|
-
sumv += v
|
72
|
-
sumy += y
|
73
|
-
sumvv += v*v
|
74
|
-
sumyy += y*y
|
75
|
-
sumvy += v*y
|
76
|
-
l = len(ty)
|
77
|
-
ACC = 100.0*total_correct/l
|
78
|
-
MSE = total_error/l
|
79
|
-
try:
|
80
|
-
SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
|
81
|
-
except:
|
82
|
-
SCC = float('nan')
|
83
|
-
return (ACC, MSE, SCC)
|
84
|
-
|
85
|
-
def train(arg1, arg2=None, arg3=None):
|
86
|
-
"""
|
87
|
-
train(y, x [, options]) -> model | ACC
|
88
|
-
train(prob [, options]) -> model | ACC
|
89
|
-
train(prob, param) -> model | ACC
|
90
|
-
|
91
|
-
Train a model from data (y, x) or a problem prob using
|
92
|
-
'options' or a parameter param.
|
93
|
-
If '-v' is specified in 'options' (i.e., cross validation)
|
94
|
-
either accuracy (ACC) or mean-squared error (MSE) is returned.
|
95
|
-
|
96
|
-
options:
|
97
|
-
-s type : set type of solver (default 1)
|
98
|
-
for multi-class classification
|
99
|
-
0 -- L2-regularized logistic regression (primal)
|
100
|
-
1 -- L2-regularized L2-loss support vector classification (dual)
|
101
|
-
2 -- L2-regularized L2-loss support vector classification (primal)
|
102
|
-
3 -- L2-regularized L1-loss support vector classification (dual)
|
103
|
-
4 -- support vector classification by Crammer and Singer
|
104
|
-
5 -- L1-regularized L2-loss support vector classification
|
105
|
-
6 -- L1-regularized logistic regression
|
106
|
-
7 -- L2-regularized logistic regression (dual)
|
107
|
-
for regression
|
108
|
-
11 -- L2-regularized L2-loss support vector regression (primal)
|
109
|
-
12 -- L2-regularized L2-loss support vector regression (dual)
|
110
|
-
13 -- L2-regularized L1-loss support vector regression (dual)
|
111
|
-
-c cost : set the parameter C (default 1)
|
112
|
-
-p epsilon : set the epsilon in loss function of SVR (default 0.1)
|
113
|
-
-e epsilon : set tolerance of termination criterion
|
114
|
-
-s 0 and 2
|
115
|
-
|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
|
116
|
-
where f is the primal function, (default 0.01)
|
117
|
-
-s 11
|
118
|
-
|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
|
119
|
-
-s 1, 3, 4, and 7
|
120
|
-
Dual maximal violation <= eps; similar to liblinear (default 0.)
|
121
|
-
-s 5 and 6
|
122
|
-
|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
|
123
|
-
where f is the primal function (default 0.01)
|
124
|
-
-s 12 and 13
|
125
|
-
|f'(alpha)|_1 <= eps |f'(alpha0)|,
|
126
|
-
where f is the dual function (default 0.1)
|
127
|
-
-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
|
128
|
-
-wi weight: weights adjust the parameter C of different classes (see README for details)
|
129
|
-
-v n: n-fold cross validation mode
|
130
|
-
-q : quiet mode (no outputs)
|
131
|
-
"""
|
132
|
-
prob, param = None, None
|
133
|
-
if isinstance(arg1, (list, tuple)):
|
134
|
-
assert isinstance(arg2, (list, tuple))
|
135
|
-
y, x, options = arg1, arg2, arg3
|
136
|
-
prob = problem(y, x)
|
137
|
-
param = parameter(options)
|
138
|
-
elif isinstance(arg1, problem):
|
139
|
-
prob = arg1
|
140
|
-
if isinstance(arg2, parameter):
|
141
|
-
param = arg2
|
142
|
-
else :
|
143
|
-
param = parameter(arg2)
|
144
|
-
if prob == None or param == None :
|
145
|
-
raise TypeError("Wrong types for the arguments")
|
146
|
-
|
147
|
-
prob.set_bias(param.bias)
|
148
|
-
liblinear.set_print_string_function(param.print_func)
|
149
|
-
err_msg = liblinear.check_parameter(prob, param)
|
150
|
-
if err_msg :
|
151
|
-
raise ValueError('Error: %s' % err_msg)
|
152
|
-
|
153
|
-
if param.flag_find_C:
|
154
|
-
nr_fold = param.nr_fold
|
155
|
-
best_C = c_double()
|
156
|
-
best_rate = c_double()
|
157
|
-
max_C = 1024
|
158
|
-
if param.flag_C_specified:
|
159
|
-
start_C = param.C
|
160
|
-
else:
|
161
|
-
start_C = -1.0
|
162
|
-
liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
|
163
|
-
print("Best C = %lf CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
|
164
|
-
return best_C.value,best_rate.value
|
165
|
-
|
166
|
-
|
167
|
-
elif param.flag_cross_validation:
|
168
|
-
l, nr_fold = prob.l, param.nr_fold
|
169
|
-
target = (c_double * l)()
|
170
|
-
liblinear.cross_validation(prob, param, nr_fold, target)
|
171
|
-
ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
|
172
|
-
if param.solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
|
173
|
-
print("Cross Validation Mean squared error = %g" % MSE)
|
174
|
-
print("Cross Validation Squared correlation coefficient = %g" % SCC)
|
175
|
-
return MSE
|
176
|
-
else:
|
177
|
-
print("Cross Validation Accuracy = %g%%" % ACC)
|
178
|
-
return ACC
|
179
|
-
else :
|
180
|
-
m = liblinear.train(prob, param)
|
181
|
-
m = toPyModel(m)
|
182
|
-
|
183
|
-
return m
|
184
|
-
|
185
|
-
def predict(y, x, m, options=""):
|
186
|
-
"""
|
187
|
-
predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
|
188
|
-
|
189
|
-
Predict data (y, x) with the SVM model m.
|
190
|
-
options:
|
191
|
-
-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
|
192
|
-
-q quiet mode (no outputs)
|
193
|
-
|
194
|
-
The return tuple contains
|
195
|
-
p_labels: a list of predicted labels
|
196
|
-
p_acc: a tuple including accuracy (for classification), mean-squared
|
197
|
-
error, and squared correlation coefficient (for regression).
|
198
|
-
p_vals: a list of decision values or probability estimates (if '-b 1'
|
199
|
-
is specified). If k is the number of classes, for decision values,
|
200
|
-
each element includes results of predicting k binary-class
|
201
|
-
SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value
|
202
|
-
is returned. For probabilities, each element contains k values
|
203
|
-
indicating the probability that the testing instance is in each class.
|
204
|
-
Note that the order of classes here is the same as 'model.label'
|
205
|
-
field in the model structure.
|
206
|
-
"""
|
207
|
-
|
208
|
-
def info(s):
|
209
|
-
print(s)
|
210
|
-
|
211
|
-
predict_probability = 0
|
212
|
-
argv = options.split()
|
213
|
-
i = 0
|
214
|
-
while i < len(argv):
|
215
|
-
if argv[i] == '-b':
|
216
|
-
i += 1
|
217
|
-
predict_probability = int(argv[i])
|
218
|
-
elif argv[i] == '-q':
|
219
|
-
info = print_null
|
220
|
-
else:
|
221
|
-
raise ValueError("Wrong options")
|
222
|
-
i+=1
|
223
|
-
|
224
|
-
solver_type = m.param.solver_type
|
225
|
-
nr_class = m.get_nr_class()
|
226
|
-
nr_feature = m.get_nr_feature()
|
227
|
-
is_prob_model = m.is_probability_model()
|
228
|
-
bias = m.bias
|
229
|
-
if bias >= 0:
|
230
|
-
biasterm = feature_node(nr_feature+1, bias)
|
231
|
-
else:
|
232
|
-
biasterm = feature_node(-1, bias)
|
233
|
-
pred_labels = []
|
234
|
-
pred_values = []
|
235
|
-
|
236
|
-
if predict_probability:
|
237
|
-
if not is_prob_model:
|
238
|
-
raise TypeError('probability output is only supported for logistic regression')
|
239
|
-
prob_estimates = (c_double * nr_class)()
|
240
|
-
for xi in x:
|
241
|
-
xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
|
242
|
-
xi[-2] = biasterm
|
243
|
-
label = liblinear.predict_probability(m, xi, prob_estimates)
|
244
|
-
values = prob_estimates[:nr_class]
|
245
|
-
pred_labels += [label]
|
246
|
-
pred_values += [values]
|
247
|
-
else:
|
248
|
-
if nr_class <= 2:
|
249
|
-
nr_classifier = 1
|
250
|
-
else:
|
251
|
-
nr_classifier = nr_class
|
252
|
-
dec_values = (c_double * nr_classifier)()
|
253
|
-
for xi in x:
|
254
|
-
xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
|
255
|
-
xi[-2] = biasterm
|
256
|
-
label = liblinear.predict_values(m, xi, dec_values)
|
257
|
-
values = dec_values[:nr_classifier]
|
258
|
-
pred_labels += [label]
|
259
|
-
pred_values += [values]
|
260
|
-
if len(y) == 0:
|
261
|
-
y = [0] * len(x)
|
262
|
-
ACC, MSE, SCC = evaluations(y, pred_labels)
|
263
|
-
l = len(y)
|
264
|
-
if m.is_regression_model():
|
265
|
-
info("Mean squared error = %g (regression)" % MSE)
|
266
|
-
info("Squared correlation coefficient = %g (regression)" % SCC)
|
267
|
-
else:
|
268
|
-
info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
|
269
|
-
|
270
|
-
return pred_labels, (ACC, MSE, SCC), pred_values
|