libsvm-ruby-swig 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +3 -0
- data/COPYING +24 -0
- data/History.txt +4 -0
- data/Manifest.txt +11 -0
- data/README.rdoc +60 -0
- data/Rakefile +38 -0
- data/ext/extconf.rb +7 -0
- data/ext/libsvm_wrap.cxx +4387 -0
- data/ext/svm.cpp +3072 -0
- data/ext/svm.h +76 -0
- data/lib/svm.rb +356 -0
- metadata +89 -0
data/ext/svm.h
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
#ifndef _LIBSVM_H
|
2
|
+
#define _LIBSVM_H
|
3
|
+
|
4
|
+
#define LIBSVM_VERSION 290
|
5
|
+
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
9
|
+
|
10
|
+
extern int libsvm_version;
|
11
|
+
|
12
|
+
struct svm_node
|
13
|
+
{
|
14
|
+
int index;
|
15
|
+
double value;
|
16
|
+
};
|
17
|
+
|
18
|
+
struct svm_problem
|
19
|
+
{
|
20
|
+
int l;
|
21
|
+
double *y;
|
22
|
+
struct svm_node **x;
|
23
|
+
};
|
24
|
+
|
25
|
+
enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */
|
26
|
+
enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
|
27
|
+
|
28
|
+
struct svm_parameter
|
29
|
+
{
|
30
|
+
int svm_type;
|
31
|
+
int kernel_type;
|
32
|
+
int degree; /* for poly */
|
33
|
+
double gamma; /* for poly/rbf/sigmoid */
|
34
|
+
double coef0; /* for poly/sigmoid */
|
35
|
+
|
36
|
+
/* these are for training only */
|
37
|
+
double cache_size; /* in MB */
|
38
|
+
double eps; /* stopping criteria */
|
39
|
+
double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
|
40
|
+
int nr_weight; /* for C_SVC */
|
41
|
+
int *weight_label; /* for C_SVC */
|
42
|
+
double* weight; /* for C_SVC */
|
43
|
+
double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
|
44
|
+
double p; /* for EPSILON_SVR */
|
45
|
+
int shrinking; /* use the shrinking heuristics */
|
46
|
+
int probability; /* do probability estimates */
|
47
|
+
};
|
48
|
+
|
49
|
+
struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
|
50
|
+
void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
|
51
|
+
|
52
|
+
int svm_save_model(const char *model_file_name, const struct svm_model *model);
|
53
|
+
struct svm_model *svm_load_model(const char *model_file_name);
|
54
|
+
|
55
|
+
int svm_get_svm_type(const struct svm_model *model);
|
56
|
+
int svm_get_nr_class(const struct svm_model *model);
|
57
|
+
void svm_get_labels(const struct svm_model *model, int *label);
|
58
|
+
double svm_get_svr_probability(const struct svm_model *model);
|
59
|
+
|
60
|
+
void svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
|
61
|
+
double svm_predict(const struct svm_model *model, const struct svm_node *x);
|
62
|
+
double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
|
63
|
+
|
64
|
+
void svm_destroy_model(struct svm_model *model);
|
65
|
+
void svm_destroy_param(struct svm_parameter *param);
|
66
|
+
|
67
|
+
const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
|
68
|
+
int svm_check_probability_model(const struct svm_model *model);
|
69
|
+
|
70
|
+
extern void (*svm_print_string) (const char *);
|
71
|
+
|
72
|
+
#ifdef __cplusplus
|
73
|
+
}
|
74
|
+
#endif
|
75
|
+
|
76
|
+
#endif /* _LIBSVM_H */
|
data/lib/svm.rb
ADDED
@@ -0,0 +1,356 @@
|
|
1
|
+
require 'libsvm'
|
2
|
+
include Libsvm
|
3
|
+
|
4
|
+
def _int_array(seq)
|
5
|
+
size = seq.size
|
6
|
+
array = new_int(size)
|
7
|
+
i = 0
|
8
|
+
for item in seq
|
9
|
+
int_setitem(array,i,item)
|
10
|
+
i = i + 1
|
11
|
+
end
|
12
|
+
return array
|
13
|
+
end
|
14
|
+
|
15
|
+
def _double_array(seq)
|
16
|
+
size = seq.size
|
17
|
+
array = new_double(size)
|
18
|
+
i = 0
|
19
|
+
for item in seq
|
20
|
+
double_setitem(array,i,item)
|
21
|
+
i = i + 1
|
22
|
+
end
|
23
|
+
return array
|
24
|
+
end
|
25
|
+
|
26
|
+
def _free_int_array(x)
|
27
|
+
if !x.nil? and !x.empty?
|
28
|
+
delete_int(x)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def _free_double_array(x)
|
33
|
+
if !x.nil? and !x.empty?
|
34
|
+
delete_double(x)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def _int_array_to_list(x,n)
|
39
|
+
list = []
|
40
|
+
(0..n-1).each {|i| list << int_getitem(x,i) }
|
41
|
+
return list
|
42
|
+
end
|
43
|
+
|
44
|
+
def _double_array_to_list(x,n)
|
45
|
+
list = []
|
46
|
+
(0..n-1).each {|i| list << double_getitem(x,i) }
|
47
|
+
return list
|
48
|
+
end
|
49
|
+
|
50
|
+
class Parameter
|
51
|
+
attr_accessor :param
|
52
|
+
|
53
|
+
def initialize(*args)
|
54
|
+
@param = Svm_parameter.new
|
55
|
+
@param.svm_type = C_SVC
|
56
|
+
@param.kernel_type = RBF
|
57
|
+
@param.degree = 3
|
58
|
+
@param.gamma = 0 # 1/k
|
59
|
+
@param.coef0 = 0
|
60
|
+
@param.nu = 0.5
|
61
|
+
@param.cache_size = 100
|
62
|
+
@param.C = 1
|
63
|
+
@param.eps = 1e-3
|
64
|
+
@param.p = 0.1
|
65
|
+
@param.shrinking = 1
|
66
|
+
@param.nr_weight = 0
|
67
|
+
#@param.weight_label = _int_array([])
|
68
|
+
#@param.weight = _double_array([])
|
69
|
+
@param.probability = 0
|
70
|
+
|
71
|
+
args[0].each {|k,v|
|
72
|
+
self.send("#{k}=",v)
|
73
|
+
} if !args[0].nil?
|
74
|
+
end
|
75
|
+
|
76
|
+
def method_missing(m, *args)
|
77
|
+
if m.to_s == 'weight_label='
|
78
|
+
@weight_label_len = args[0].size
|
79
|
+
pargs = _int_array(args[0])
|
80
|
+
_free_int_array(@param.weight_label)
|
81
|
+
elsif m.to_s == 'weight='
|
82
|
+
@weight_len = args[0].size
|
83
|
+
pargs = _double_array(args[0])
|
84
|
+
_free_double_array(@param.weight)
|
85
|
+
else
|
86
|
+
pargs = args[0]
|
87
|
+
end
|
88
|
+
|
89
|
+
if m.to_s.index('=')
|
90
|
+
@param.send("#{m}",pargs)
|
91
|
+
else
|
92
|
+
@param.send("#{m}")
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
def destroy
|
98
|
+
_free_int_array(@param.weight_label)
|
99
|
+
_free_double_array(@param.weight)
|
100
|
+
#delete_svm_parameter(@param)
|
101
|
+
@param = nil
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def _convert_to_svm_node_array(x)
|
106
|
+
# convert a hash or array to an svm_node array
|
107
|
+
|
108
|
+
# Find non zero elements
|
109
|
+
iter_range = []
|
110
|
+
if x.class == Hash
|
111
|
+
x.each {|k, v|
|
112
|
+
# all zeros kept due to the precomputed kernel; no good solution yet
|
113
|
+
iter_range << k # if v != 0
|
114
|
+
}
|
115
|
+
elsif x.class == Array
|
116
|
+
x.each_index {|j|
|
117
|
+
iter_range << j #if x[j] != 0
|
118
|
+
}
|
119
|
+
else
|
120
|
+
raise TypeError,"data must be a hash or an array"
|
121
|
+
end
|
122
|
+
|
123
|
+
iter_range.sort!
|
124
|
+
data = svm_node_array(iter_range.size+1)
|
125
|
+
svm_node_array_set(data,iter_range.size,-1,0)
|
126
|
+
|
127
|
+
j = 0
|
128
|
+
for k in iter_range
|
129
|
+
svm_node_array_set(data,j,k,x[k])
|
130
|
+
j = j + 1
|
131
|
+
end
|
132
|
+
return data
|
133
|
+
end
|
134
|
+
|
135
|
+
class Problem
|
136
|
+
attr_accessor :prob, :maxlen, :size
|
137
|
+
|
138
|
+
def initialize(y,x)
|
139
|
+
#assert y.size == x.size
|
140
|
+
@prob = prob = Svm_problem.new
|
141
|
+
@size = size = y.size
|
142
|
+
|
143
|
+
@y_array = y_array = new_double(size)
|
144
|
+
for i in (0..size-1)
|
145
|
+
double_setitem(@y_array,i,y[i])
|
146
|
+
end
|
147
|
+
|
148
|
+
@x_matrix = x_matrix = svm_node_matrix(size)
|
149
|
+
@data = []
|
150
|
+
@maxlen = 0
|
151
|
+
for i in (0..size-1)
|
152
|
+
data = _convert_to_svm_node_array(x[i])
|
153
|
+
@data << data
|
154
|
+
svm_node_matrix_set(x_matrix,i,data)
|
155
|
+
if x[i].class == Hash
|
156
|
+
if x[i].size > 0
|
157
|
+
@maxlen = [@maxlen,x[i].keys.max].max
|
158
|
+
end
|
159
|
+
else
|
160
|
+
@maxlen = [@maxlen,x[i].size].max
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
prob.l = size
|
165
|
+
prob.y = y_array
|
166
|
+
prob.x = x_matrix
|
167
|
+
end
|
168
|
+
|
169
|
+
def inspect
|
170
|
+
return "Problem: size = #{size}"
|
171
|
+
end
|
172
|
+
|
173
|
+
def destroy
|
174
|
+
delete_svm_problem(@prob)
|
175
|
+
delete_double(@y_array)
|
176
|
+
for i in (0..size-1)
|
177
|
+
svm_node_array_destroy(@data[i])
|
178
|
+
end
|
179
|
+
svm_node_matrix_destroy(@x_matrix)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
class Model
|
184
|
+
attr_accessor :model
|
185
|
+
|
186
|
+
def initialize(arg1,arg2=nil)
|
187
|
+
if arg2 == nil
|
188
|
+
# create model from file
|
189
|
+
filename = arg1
|
190
|
+
@model = svm_load_model(filename)
|
191
|
+
else
|
192
|
+
# create model from problem and parameter
|
193
|
+
prob,param = arg1,arg2
|
194
|
+
@prob = prob
|
195
|
+
if param.gamma == 0
|
196
|
+
param.gamma = 1.0/prob.maxlen
|
197
|
+
end
|
198
|
+
msg = svm_check_parameter(prob.prob,param.param)
|
199
|
+
raise ::ArgumentError, msg if msg
|
200
|
+
@model = svm_train(prob.prob,param.param)
|
201
|
+
end
|
202
|
+
|
203
|
+
#setup some classwide variables
|
204
|
+
@nr_class = svm_get_nr_class(@model)
|
205
|
+
@svm_type = svm_get_svm_type(@model)
|
206
|
+
#create labels(classes)
|
207
|
+
intarr = new_int(@nr_class)
|
208
|
+
svm_get_labels(@model,intarr)
|
209
|
+
@labels = _int_array_to_list(intarr, @nr_class)
|
210
|
+
delete_int(intarr)
|
211
|
+
#check if valid probability model
|
212
|
+
@probability = svm_check_probability_model(@model)
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
def predict(x)
|
217
|
+
data = _convert_to_svm_node_array(x)
|
218
|
+
ret = svm_predict(@model,data)
|
219
|
+
svm_node_array_destroy(data)
|
220
|
+
return ret
|
221
|
+
end
|
222
|
+
|
223
|
+
|
224
|
+
def get_nr_class
|
225
|
+
return @nr_class
|
226
|
+
end
|
227
|
+
|
228
|
+
def get_labels
|
229
|
+
if @svm_type == NU_SVR or @svm_type == EPSILON_SVR or @svm_type == ONE_CLASS
|
230
|
+
raise TypeError, "Unable to get label from a SVR/ONE_CLASS model"
|
231
|
+
end
|
232
|
+
return @labels
|
233
|
+
end
|
234
|
+
|
235
|
+
def predict_values_raw(x)
|
236
|
+
#convert x into svm_node, allocate a double array for return
|
237
|
+
n = (@nr_class*(@nr_class-1)/2).floor
|
238
|
+
data = _convert_to_svm_node_array(x)
|
239
|
+
dblarr = new_double(n)
|
240
|
+
svm_predict_values(@model, data, dblarr)
|
241
|
+
ret = _double_array_to_list(dblarr, n)
|
242
|
+
delete_double(dblarr)
|
243
|
+
svm_node_array_destroy(data)
|
244
|
+
return ret
|
245
|
+
end
|
246
|
+
|
247
|
+
def predict_values(x)
|
248
|
+
v=predict_values_raw(x)
|
249
|
+
#puts v.inspect
|
250
|
+
if @svm_type == NU_SVR or @svm_type == EPSILON_SVR or @svm_type == ONE_CLASS
|
251
|
+
return v[0]
|
252
|
+
else #self.svm_type == C_SVC or self.svm_type == NU_SVC
|
253
|
+
count = 0
|
254
|
+
|
255
|
+
# create a width x height array
|
256
|
+
width = @labels.size
|
257
|
+
height = @labels.size
|
258
|
+
d = Array.new(width)
|
259
|
+
d.map! { Array.new(height) }
|
260
|
+
|
261
|
+
for i in (0..@labels.size-1)
|
262
|
+
for j in (i+1..@labels.size-1)
|
263
|
+
d[@labels[i]][@labels[j]] = v[count]
|
264
|
+
d[@labels[j]][@labels[i]] = -v[count]
|
265
|
+
count += 1
|
266
|
+
end
|
267
|
+
end
|
268
|
+
return d
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def predict_probability(x)
|
273
|
+
#c code will do nothing on wrong type, so we have to check ourself
|
274
|
+
if @svm_type == NU_SVR or @svm_type == EPSILON_SVR
|
275
|
+
raise TypeError, "call get_svr_probability or get_svr_pdf for probability output of regression"
|
276
|
+
elsif @svm_type == ONE_CLASS
|
277
|
+
raise TypeError, "probability not supported yet for one-class problem"
|
278
|
+
end
|
279
|
+
#only C_SVC,NU_SVC goes in
|
280
|
+
if not @probability
|
281
|
+
raise TypeError, "model does not support probabiliy estimates"
|
282
|
+
end
|
283
|
+
|
284
|
+
#convert x into svm_node, alloc a double array to receive probabilities
|
285
|
+
data = _convert_to_svm_node_array(x)
|
286
|
+
dblarr = new_double(@nr_class)
|
287
|
+
pred = svm_predict_probability(@model, data, dblarr)
|
288
|
+
pv = _double_array_to_list(dblarr, @nr_class)
|
289
|
+
delete_double(dblarr)
|
290
|
+
svm_node_array_destroy(data)
|
291
|
+
p = {}
|
292
|
+
for i in (0..@labels.size-1)
|
293
|
+
p[@labels[i]] = pv[i]
|
294
|
+
end
|
295
|
+
return pred, p
|
296
|
+
end
|
297
|
+
|
298
|
+
def get_svr_probability
|
299
|
+
#leave the Error checking to svm.cpp code
|
300
|
+
ret = svm_get_svr_probability(@model)
|
301
|
+
if ret == 0
|
302
|
+
raise TypeError, "not a regression model or probability information not available"
|
303
|
+
end
|
304
|
+
return ret
|
305
|
+
end
|
306
|
+
|
307
|
+
def get_svr_pdf
|
308
|
+
#get_svr_probability will handle error checking
|
309
|
+
sigma = get_svr_probability()
|
310
|
+
return Proc.new{|z| exp(-z.abs/sigma)/(2*sigma)} # TODO: verify this works
|
311
|
+
end
|
312
|
+
|
313
|
+
def save(filename)
|
314
|
+
svm_save_model(filename,@model)
|
315
|
+
end
|
316
|
+
|
317
|
+
def destroy
|
318
|
+
svm_destroy_model(@model)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
|
323
|
+
def cross_validation(prob, param, fold)
|
324
|
+
if param.gamma == 0
|
325
|
+
param.gamma = 1.0/prob.maxlen
|
326
|
+
end
|
327
|
+
dblarr = new_double(prob.size)
|
328
|
+
svm_cross_validation(prob.prob, param.param, fold, dblarr)
|
329
|
+
ret = _double_array_to_list(dblarr, prob.size)
|
330
|
+
delete_double(dblarr)
|
331
|
+
return ret
|
332
|
+
end
|
333
|
+
|
334
|
+
def read_file filename
|
335
|
+
labels = []
|
336
|
+
samples = []
|
337
|
+
max_index = 0
|
338
|
+
|
339
|
+
f = File.open(filename)
|
340
|
+
f.each do |line|
|
341
|
+
elems = line.split
|
342
|
+
sample = {}
|
343
|
+
for e in elems[1..-1]
|
344
|
+
points = e.split(":")
|
345
|
+
sample[points[0].to_i] = points[1].to_f
|
346
|
+
if points[0].to_i < max_index
|
347
|
+
max_index = points[0].to_i
|
348
|
+
end
|
349
|
+
end
|
350
|
+
labels << elems[0].to_i
|
351
|
+
samples << sample
|
352
|
+
end
|
353
|
+
puts "#{filename}: #{samples.size} samples loaded."
|
354
|
+
return labels,samples
|
355
|
+
end
|
356
|
+
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: libsvm-ruby-swig
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 4
|
8
|
+
- 0
|
9
|
+
version: 0.4.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Tom Zeng
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-03-27 00:00:00 -04:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: hoe
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 1
|
29
|
+
- 8
|
30
|
+
- 3
|
31
|
+
version: 1.8.3
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
description: Ruby wrapper of LIBSVM using SWIG
|
35
|
+
email: tom.z.zeng@gmail.com
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions:
|
39
|
+
- ext/extconf.rb
|
40
|
+
extra_rdoc_files:
|
41
|
+
- History.txt
|
42
|
+
- Manifest.txt
|
43
|
+
- README.rdoc
|
44
|
+
files:
|
45
|
+
- History.txt
|
46
|
+
- COPYING
|
47
|
+
- AUTHORS
|
48
|
+
- Manifest.txt
|
49
|
+
- README.rdoc
|
50
|
+
- Rakefile
|
51
|
+
- lib/svm.rb
|
52
|
+
- ext/libsvm_wrap.cxx
|
53
|
+
- ext/svm.cpp
|
54
|
+
- ext/svm.h
|
55
|
+
- ext/extconf.rb
|
56
|
+
has_rdoc: true
|
57
|
+
homepage: http://www.tomzconsulting.com
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options:
|
62
|
+
- --main
|
63
|
+
- README.rdoc
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
- ext
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
requirements: []
|
82
|
+
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 1.3.6
|
85
|
+
signing_key:
|
86
|
+
specification_version: 2
|
87
|
+
summary: Ruby wrapper of LIBSVM using SWIG
|
88
|
+
test_files: []
|
89
|
+
|