feedbackmine-libsvm-ruby 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/svm.rb ADDED
@@ -0,0 +1,337 @@
1
+ require 'svmc'
2
+ include Svmc
3
+
4
+ def _int_array(seq)
5
+ size = seq.size
6
+ array = new_int(size)
7
+ i = 0
8
+ for item in seq
9
+ int_setitem(array,i,item)
10
+ i = i + 1
11
+ end
12
+ return array
13
+ end
14
+
15
+ def _double_array(seq)
16
+ size = seq.size
17
+ array = new_double(size)
18
+ i = 0
19
+ for item in seq
20
+ double_setitem(array,i,item)
21
+ i = i + 1
22
+ end
23
+ return array
24
+ end
25
+
26
+ def _free_int_array(x)
27
+ if !x.nil? and !x.empty?
28
+ delete_int(x)
29
+ end
30
+ end
31
+
32
+ def _free_double_array(x)
33
+ if !x.nil? and !x.empty?
34
+ delete_double(x)
35
+ end
36
+ end
37
+
38
+ def _int_array_to_list(x,n)
39
+ list = []
40
+ (0..n-1).each {|i| list << int_getitem(x,i) }
41
+ return list
42
+ end
43
+
44
+ def _double_array_to_list(x,n)
45
+ list = []
46
+ (0..n-1).each {|i| list << double_getitem(x,i) }
47
+ return list
48
+ end
49
+
50
+ class Parameter
51
+ attr_accessor :param
52
+
53
+ def initialize(*args)
54
+ @param = Svm_parameter.new
55
+ @param.svm_type = C_SVC
56
+ @param.kernel_type = RBF
57
+ @param.degree = 3
58
+ @param.gamma = 0 # 1/k
59
+ @param.coef0 = 0
60
+ @param.nu = 0.5
61
+ @param.cache_size = 100
62
+ @param.C = 1
63
+ @param.eps = 1e-3
64
+ @param.p = 0.1
65
+ @param.shrinking = 1
66
+ @param.nr_weight = 0
67
+ #@param.weight_label = _int_array([])
68
+ #@param.weight = _double_array([])
69
+ @param.probability = 0
70
+
71
+ args[0].each {|k,v|
72
+ self.send("#{k}=",v)
73
+ } if !args[0].nil?
74
+ end
75
+
76
+ def method_missing(m, *args)
77
+ if m.to_s == 'weight_label='
78
+ @weight_label_len = args[0].size
79
+ pargs = _int_array(args[0])
80
+ _free_int_array(@param.weight_label)
81
+ elsif m.to_s == 'weight='
82
+ @weight_len = args[0].size
83
+ pargs = _double_array(args[0])
84
+ _free_double_array(@param.weight)
85
+ else
86
+ pargs = args[0]
87
+ end
88
+
89
+ if m.to_s.index('=')
90
+ @param.send("#{m}",pargs)
91
+ else
92
+ @param.send("#{m}")
93
+ end
94
+
95
+ end
96
+
97
+ def destroy
98
+ _free_int_array(@param.weight_label)
99
+ _free_double_array(@param.weight)
100
+ #delete_svm_parameter(@param)
101
+ @param = nil
102
+ end
103
+ end
104
+
105
+ def _convert_to_svm_node_array(x)
106
+ # convert a sequence or mapping to an svm_node array
107
+
108
+ # Find non zero elements
109
+ iter_range = []
110
+ if x.class == Hash
111
+ x.each {|k, v|
112
+ # all zeros kept due to the precomputed kernel; no good solution yet
113
+ iter_range << k # if v != 0
114
+ }
115
+ elsif x.class == Array
116
+ x.each_index {|j|
117
+ iter_range << j #if x[j] != 0
118
+ }
119
+ else
120
+ raise TypeError,"data must be a mapping or a sequence"
121
+ end
122
+
123
+ iter_range.sort
124
+ data = svm_node_array(iter_range.size+1)
125
+ svm_node_array_set(data,iter_range.size,-1,0)
126
+
127
+ j = 0
128
+ for k in iter_range
129
+ svm_node_array_set(data,j,k,x[k])
130
+ j = j + 1
131
+ end
132
+ return data
133
+ end
134
+
135
+ class Problem
136
+ attr_accessor :prob, :maxlen, :size
137
+
138
+ def initialize(y,x)
139
+ #assert y.size == x.size
140
+ @prob = prob = Svm_problem.new
141
+ @size = size = y.size
142
+
143
+ @y_array = y_array = new_double(size)
144
+ for i in (0..size-1)
145
+ double_setitem(@y_array,i,y[i])
146
+ end
147
+
148
+ @x_matrix = x_matrix = svm_node_matrix(size)
149
+ @data = []
150
+ @maxlen = 0
151
+ for i in (0..size-1)
152
+ data = _convert_to_svm_node_array(x[i])
153
+ @data << data
154
+ svm_node_matrix_set(x_matrix,i,data)
155
+ if x[i].class == Hash
156
+ if x[i].size > 0
157
+ @maxlen = [@maxlen,x[i].keys.max].max
158
+ end
159
+ else
160
+ @maxlen = [@maxlen,x[i].size].max
161
+ end
162
+ end
163
+
164
+ prob.l = size
165
+ prob.y = y_array
166
+ prob.x = x_matrix
167
+ end
168
+
169
+ def inspect
170
+ return "svm_problem: size = #{size}"
171
+ end
172
+
173
+ def destroy
174
+ delete_svm_problem(@prob)
175
+ delete_double(@y_array)
176
+ for i in (0..size-1)
177
+ svm_node_array_destroy(@data[i])
178
+ end
179
+ svm_node_matrix_destroy(@x_matrix)
180
+ end
181
+ end
182
+
183
+ class Model
184
+ attr_accessor :model,:objs
185
+
186
+ def initialize(arg1,arg2=nil)
187
+ if arg2 == nil
188
+ # create model from file
189
+ filename = arg1
190
+ @model = svm_load_model(filename)
191
+ else
192
+ # create model from problem and parameter
193
+ prob,param = arg1,arg2
194
+ @prob = prob
195
+ if param.gamma == 0
196
+ param.gamma = 1.0/prob.maxlen
197
+ end
198
+ msg = svm_check_parameter(prob.prob,param.param)
199
+ raise "ValueError", msg if msg
200
+ @model = svm_train(prob.prob,param.param)
201
+ end
202
+
203
+ #setup some classwide variables
204
+ @nr_class = svm_get_nr_class(@model)
205
+ @svm_type = svm_get_svm_type(@model)
206
+ #create labels(classes)
207
+ intarr = new_int(@nr_class)
208
+ svm_get_labels(@model,intarr)
209
+ @labels = _int_array_to_list(intarr, @nr_class)
210
+ delete_int(intarr)
211
+ #check if valid probability model
212
+ @probability = svm_check_probability_model(@model)
213
+
214
+ @objs = []
215
+ for i in (0..@labels.size-1)
216
+ @objs << svm_get_obj(@model, i)
217
+ end if arg2 != nil
218
+
219
+ end
220
+
221
+ def predict(x)
222
+ data = _convert_to_svm_node_array(x)
223
+ ret = svm_predict(@model,data)
224
+ svm_node_array_destroy(data)
225
+ return ret
226
+ end
227
+
228
+
229
+ def get_nr_class
230
+ return @nr_class
231
+ end
232
+
233
+ def get_labels
234
+ if @svm_type == NU_SVR or @svm_type == EPSILON_SVR or @svm_type == ONE_CLASS
235
+ raise TypeError, "Unable to get label from a SVR/ONE_CLASS model"
236
+ end
237
+ return @labels
238
+ end
239
+
240
+ def predict_values_raw(x)
241
+ #convert x into svm_node, allocate a double array for return
242
+ n = (@nr_class*(@nr_class-1)/2).floor
243
+ data = _convert_to_svm_node_array(x)
244
+ dblarr = new_double(n)
245
+ svm_predict_values(@model, data, dblarr)
246
+ ret = _double_array_to_list(dblarr, n)
247
+ delete_double(dblarr)
248
+ svm_node_array_destroy(data)
249
+ return ret
250
+ end
251
+
252
+ def predict_values(x)
253
+ v=predict_values_raw(x)
254
+ #puts v.inspect
255
+ if @svm_type == NU_SVR or @svm_type == EPSILON_SVR or @svm_type == ONE_CLASS
256
+ return v[0]
257
+ else #self.svm_type == C_SVC or self.svm_type == NU_SVC
258
+ count = 0
259
+
260
+ # create a width x height array
261
+ width = @labels.size
262
+ height = @labels.size
263
+ d = Array.new(width)
264
+ d.map! { Array.new(height) }
265
+
266
+ for i in (0..@labels.size-1)
267
+ for j in (i+1..@labels.size-1)
268
+ d[@labels[i]][@labels[j]] = v[count]
269
+ d[@labels[j]][@labels[i]] = -v[count]
270
+ count += 1
271
+ end
272
+ end
273
+ return d
274
+ end
275
+ end
276
+
277
+ def predict_probability(x)
278
+ #c code will do nothing on wrong type, so we have to check ourself
279
+ if @svm_type == NU_SVR or @svm_type == EPSILON_SVR
280
+ raise TypeError, "call get_svr_probability or get_svr_pdf for probability output of regression"
281
+ elsif @svm_type == ONE_CLASS
282
+ raise TypeError, "probability not supported yet for one-class problem"
283
+ end
284
+ #only C_SVC,NU_SVC goes in
285
+ if not @probability
286
+ raise TypeError, "model does not support probabiliy estimates"
287
+ end
288
+
289
+ #convert x into svm_node, alloc a double array to receive probabilities
290
+ data = _convert_to_svm_node_array(x)
291
+ dblarr = new_double(@nr_class)
292
+ pred = svm_predict_probability(@model, data, dblarr)
293
+ pv = _double_array_to_list(dblarr, @nr_class)
294
+ delete_double(dblarr)
295
+ svm_node_array_destroy(data)
296
+ p = {}
297
+ for i in (0..@labels.size-1)
298
+ p[@labels[i]] = pv[i]
299
+ end
300
+ return pred, p
301
+ end
302
+
303
+ def get_svr_probability
304
+ #leave the Error checking to svm.cpp code
305
+ ret = svm_get_svr_probability(@model)
306
+ if ret == 0
307
+ raise TypeError, "not a regression model or probability information not available"
308
+ end
309
+ return ret
310
+ end
311
+
312
+ def get_svr_pdf
313
+ #get_svr_probability will handle error checking
314
+ sigma = get_svr_probability()
315
+ return Proc.new{|z| exp(-z.abs/sigma)/(2*sigma)} # TODO: verify this works
316
+ end
317
+
318
+ def save(filename)
319
+ svm_save_model(filename,@model)
320
+ end
321
+
322
+ def destroy
323
+ svm_destroy_model(@model)
324
+ end
325
+ end
326
+
327
+
328
+ def cross_validation(prob, param, fold)
329
+ if param.gamma == 0
330
+ param.gamma = 1.0/prob.maxlen
331
+ end
332
+ dblarr = new_double(prob.size)
333
+ svm_cross_validation(prob.prob, param.param, fold, dblarr)
334
+ ret = _double_array_to_list(dblarr, prob.size)
335
+ delete_double(dblarr)
336
+ return ret
337
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedbackmine-libsvm-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - FeedbackMine
@@ -39,7 +39,10 @@ files:
39
39
  - Manifest.txt
40
40
  - README.txt
41
41
  - Rakefile
42
- - ext/main.cpp
42
+ - lib/svm.rb
43
+ - ext/svmc_wrap.cxx
44
+ - ext/svm.cpp
45
+ - ext/svm.h
43
46
  - ext/extconf.rb
44
47
  has_rdoc: true
45
48
  homepage: http://www.tweetjobsearch.com
data/ext/main.cpp DELETED
@@ -1,775 +0,0 @@
1
- /* RubySVM 1.0 by Rudi Cilibrasi (cilibrar@ofb.net)
2
- * Released under the GPL
3
- * Mon May 12 11:20:48 CEST 2003,
4
- * based on libsvm-2.4
5
- */
6
-
7
- #define obstack_chunk_alloc xmalloc
8
- #define obstack_chunk_free free
9
-
10
- #define HAVE_DEFINE_ALLOC_FUNCTION 1
11
-
12
- #include "ruby.h"
13
- #include "node.h"
14
- #include <string.h>
15
- #include <obstack.h>
16
- #include <stdio.h>
17
- #include <malloc.h>
18
- #include <libsvm/svm.h>
19
- #include <stdlib.h>
20
-
21
- VALUE mSVM, cSVMProblem, cSVMParameter, cSVMModel;
22
- static VALUE cMarshal;
23
-
24
- static int getSVCount(struct svm_model *m);
25
-
26
- struct RSVM_Problem {
27
- struct svm_problem prob;
28
- struct obstack xs, ys;
29
- int k;
30
- };
31
-
32
- struct RSVM_Model {
33
- struct svm_model *m;
34
- };
35
-
36
- struct RSVM_Parameter {
37
- struct svm_parameter p;
38
- };
39
-
40
- VALUE svmpa_new(VALUE cl);
41
-
42
- /*
43
- * Converts a Ruby array of consecutive values into a list of
44
- * value-index svm_node's.
45
- */
46
- struct svm_node *rubyArrayToNodelist(VALUE xs)
47
- {
48
- //struct obstack xso;
49
- struct svm_node *n;
50
- int i;
51
- int len = RARRAY(xs)->len;
52
- n = (struct svm_node *) calloc(sizeof(struct svm_node), len+1);
53
- for (i = 0; i < len; ++i) {
54
- n[i].value = NUM2DBL(rb_ary_entry(xs, i));
55
- n[i].index = i;
56
- }
57
- n[i].value = 0;
58
- n[i].index = -1;
59
-
60
- return n;
61
- }
62
-
63
- /*value is harcoded as 1, array is sorted features*/
64
- struct svm_node *rubyArrayToNodelist2(VALUE xs)
65
- {
66
- //struct obstack xso;
67
- struct svm_node *n;
68
- int i;
69
- int len = RARRAY(xs)->len;
70
- n = (struct svm_node *) calloc(sizeof(struct svm_node), len+1);
71
- for (i = 0; i < len; ++i) {
72
- n[i].value = 1;
73
- n[i].index = NUM2DBL(rb_ary_entry(xs, i));
74
- }
75
- n[i].value = 0;
76
- n[i].index = -1;
77
-
78
- return n;
79
- }
80
-
81
-
82
- /*
83
- * Serializes an SVMParameter object
84
- */
85
- VALUE svmpa_svm_dump(VALUE self, VALUE limit)
86
- {
87
- struct RSVM_Parameter *rp;
88
- VALUE obj = rb_ary_new();
89
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
90
- rb_ary_push(obj, INT2NUM(rp->p.svm_type));
91
- rb_ary_push(obj, INT2NUM(rp->p.kernel_type));
92
- rb_ary_push(obj, rb_float_new(rp->p.degree));
93
- rb_ary_push(obj, rb_float_new(rp->p.gamma));
94
- rb_ary_push(obj, rb_float_new(rp->p.coef0));
95
- rb_ary_push(obj, rb_float_new(rp->p.cache_size));
96
- rb_ary_push(obj, rb_float_new(rp->p.eps));
97
- rb_ary_push(obj, rb_float_new(rp->p.C));
98
- rb_ary_push(obj, rb_float_new(rp->p.nu));
99
- rb_ary_push(obj, rb_float_new(rp->p.p));
100
- rb_ary_push(obj, INT2NUM(rp->p.shrinking));
101
- return rb_funcall(cMarshal, rb_intern("dump"), 1, obj);
102
- }
103
-
104
- /*
105
- * Deserializes an SVMParameter object
106
- */
107
-
108
- VALUE svmpa_svm_load(VALUE kl, VALUE obj)
109
- {
110
- struct RSVM_Parameter *rp;
111
- printf("In load!\n");
112
- VALUE self = svmpa_new(cSVMParameter);
113
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
114
- printf("RP is %p\n", rp);
115
- obj = rb_funcall(cMarshal, rb_intern("load"), 1, obj);
116
- rp->p.svm_type = NUM2INT(rb_ary_entry(obj, 0));
117
- printf("first!\n");
118
- rp->p.kernel_type = NUM2INT(rb_ary_entry(obj, 1));
119
- rp->p.degree = (int) NUM2DBL(rb_ary_entry(obj, 2));
120
- rp->p.gamma = NUM2DBL(rb_ary_entry(obj, 3));
121
- rp->p.coef0 = NUM2DBL(rb_ary_entry(obj, 4));
122
- rp->p.cache_size = NUM2DBL(rb_ary_entry(obj, 5));
123
- printf("midway!\n");
124
- rp->p.eps = NUM2DBL(rb_ary_entry(obj, 6));
125
- rp->p.C = NUM2DBL(rb_ary_entry(obj, 7));
126
- rp->p.nu = NUM2DBL(rb_ary_entry(obj, 8));
127
- rp->p.p = NUM2DBL(rb_ary_entry(obj, 9));
128
- rp->p.shrinking = NUM2INT(rb_ary_entry(obj, 10));
129
- printf("Never returned!\n");
130
- return self;
131
- }
132
-
133
- /*
134
- * Gets gamma value, the exponent used in the kernel function
135
- */
136
- VALUE svmpa_gamma(VALUE self) {
137
- struct RSVM_Parameter *rp;
138
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
139
- return rb_float_new(rp->p.gamma);
140
- }
141
-
142
- /*
143
- * Sets gamma value, the exponent used in the kernel function
144
- */
145
- VALUE svmpa_gammaeq(VALUE self, VALUE eq) {
146
- struct RSVM_Parameter *rp;
147
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
148
- rp->p.gamma = NUM2DBL(eq);
149
- return Qnil;
150
- }
151
- /*
152
- * Gets coef0, the constant added in the polynomial kernel
153
- */
154
- VALUE svmpa_coef0(VALUE self) {
155
- struct RSVM_Parameter *rp;
156
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
157
- return rb_float_new(rp->p.coef0);
158
- }
159
-
160
- /*
161
- * Sets coef0, the constant added in the polynomial kernel
162
- */
163
- VALUE svmpa_coef0eq(VALUE self, VALUE eq) {
164
- struct RSVM_Parameter *rp;
165
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
166
- rp->p.coef0 = NUM2DBL(eq);
167
- return Qnil;
168
- }
169
-
170
- /*
171
- * Gets coef0, the constant added in the polynomial kernel
172
- */
173
- VALUE svmpa_probability(VALUE self) {
174
- struct RSVM_Parameter *rp;
175
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
176
- return rb_float_new(rp->p.probability);
177
- }
178
-
179
- /*
180
- * Sets probability
181
- */
182
- VALUE svmpa_probabilityeq(VALUE self, VALUE eq) {
183
- struct RSVM_Parameter *rp;
184
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
185
- rp->p.probability = NUM2DBL(eq);
186
- return Qnil;
187
- }
188
-
189
- /*
190
- * Gets cachesize, the number of megabytes of memory to use for the cache
191
- */
192
- VALUE svmpa_cache_size(VALUE self) {
193
- struct RSVM_Parameter *rp;
194
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
195
- return rb_float_new(rp->p.cache_size);
196
- }
197
-
198
- /*
199
- * Sets cachesize, the number of megabytes of memory to use for the cache
200
- */
201
- VALUE svmpa_cache_sizeeq(VALUE self, VALUE eq) {
202
- struct RSVM_Parameter *rp;
203
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
204
- rp->p.cache_size = NUM2DBL(eq);
205
- return Qnil;
206
- }
207
- /*
208
- * Gets eps, the tolerance of termination criterion
209
- */
210
- VALUE svmpa_eps(VALUE self) {
211
- struct RSVM_Parameter *rp;
212
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
213
- return rb_float_new(rp->p.eps);
214
- }
215
-
216
- /*
217
- * Sets eps, the tolerance of termination criterion
218
- */
219
- VALUE svmpa_epseq(VALUE self, VALUE eq) {
220
- struct RSVM_Parameter *rp;
221
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
222
- rp->p.eps = NUM2DBL(eq);
223
- return Qnil;
224
- }
225
- /*
226
- * Gets C, the cost parameter of C-SVC, epsilon-SVR, and nu-SVR
227
- */
228
- VALUE svmpa_C(VALUE self) {
229
- struct RSVM_Parameter *rp;
230
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
231
- return rb_float_new(rp->p.C);
232
- }
233
-
234
- /*
235
- * Sets C, the cost parameter of C-SVC, epsilon-SVR, and nu-SVR
236
- */
237
- VALUE svmpa_Ceq(VALUE self, VALUE eq) {
238
- struct RSVM_Parameter *rp;
239
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
240
- rp->p.C = NUM2DBL(eq);
241
- return Qnil;
242
- }
243
-
244
- /*
245
- * Gets nu, the SV-ratio parameter of nu-SVC, one-class SVM, and nu-SVR
246
- */
247
- VALUE svmpa_nu(VALUE self) {
248
- struct RSVM_Parameter *rp;
249
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
250
- return rb_float_new(rp->p.nu);
251
- }
252
-
253
- /*
254
- * Sets nu, the SV-ratio parameter of nu-SVC, one-class SVM, and nu-SVR
255
- */
256
- VALUE svmpa_nueq(VALUE self, VALUE eq) {
257
- struct RSVM_Parameter *rp;
258
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
259
- rp->p.nu = NUM2DBL(eq);
260
- return Qnil;
261
- }
262
-
263
- /*
264
- * Gets p, the zero-loss width zone in epsilon-insensitive SVR
265
- */
266
- VALUE svmpa_p(VALUE self) {
267
- struct RSVM_Parameter *rp;
268
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
269
- return rb_float_new(rp->p.p);
270
- }
271
-
272
- /*
273
- * Sets p, the zero-loss width zone in epsilon-insensitive SVR
274
- */
275
- VALUE svmpa_peq(VALUE self, VALUE eq) {
276
- struct RSVM_Parameter *rp;
277
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
278
- rp->p.p = NUM2DBL(eq);
279
- return Qnil;
280
- }
281
-
282
- /*
283
- * Gets degree, the degree of the kernel function
284
- */
285
- VALUE svmpa_degree(VALUE self) {
286
- struct RSVM_Parameter *rp;
287
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
288
- return rb_float_new(rp->p.degree);
289
- }
290
-
291
- /*
292
- * Sets degree, the degree of the kernel function
293
- */
294
- VALUE svmpa_degreeeq(VALUE self, VALUE eq) {
295
- struct RSVM_Parameter *rp;
296
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
297
- rp->p.degree = (int) NUM2DBL(eq);
298
- return Qnil;
299
- }
300
-
301
- /*
302
- * Gets kernel_type, which is one of:
303
- * * LINEAR
304
- * * POLY
305
- * * RBF
306
- * * SIGMOID
307
- */
308
- VALUE svmpa_kernel_type(VALUE self) {
309
- struct RSVM_Parameter *rp;
310
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
311
- return INT2FIX(rp->p.kernel_type);
312
- }
313
-
314
- /*
315
- * Sets kernel_type, which is one of:
316
- * * LINEAR
317
- * * POLY
318
- * * RBF
319
- * * SIGMOID
320
- */
321
- VALUE svmpa_kernel_typeeq(VALUE self, VALUE eq) {
322
- struct RSVM_Parameter *rp;
323
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
324
- rp->p.kernel_type = FIX2INT(eq);
325
- return Qnil;
326
- }
327
-
328
- /*
329
- * Gets svm_type, which is one of:
330
- * * C_SVC
331
- * * NU_SVC
332
- * * ONE_CLASS
333
- * * EPSILON_SVR
334
- * * NU_SVR
335
- */
336
- VALUE svmpa_svm_type(VALUE self) {
337
- struct RSVM_Parameter *rp;
338
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
339
- return INT2FIX(rp->p.svm_type);
340
- }
341
-
342
- /*
343
- * Sets svm_type, which is one of:
344
- * * C_SVC
345
- * * NU_SVC
346
- * * ONE_CLASS
347
- * * EPSILON_SVR
348
- * * NU_SVR
349
- */
350
- VALUE svmpa_svm_typeeq(VALUE self, VALUE eq) {
351
- struct RSVM_Parameter *rp;
352
- Data_Get_Struct(self, struct RSVM_Parameter, rp);
353
- rp->p.svm_type = FIX2INT(eq);
354
- return Qnil;
355
- }
356
-
357
- struct RSVM_Parameter *newParameter()
358
- {
359
- struct RSVM_Parameter *rp = (struct RSVM_Parameter *)
360
- calloc(sizeof(struct RSVM_Parameter), 1);
361
- rp->p.svm_type = C_SVC;
362
- rp->p.kernel_type = RBF;
363
- rp->p.degree = 3;
364
- rp->p.gamma = 0;
365
- rp->p.coef0 = 0;
366
- rp->p.nu = 0.5;
367
- rp->p.cache_size = 40;
368
- rp->p.C = 1;
369
- rp->p.eps = 1e-3;
370
- rp->p.p = 0.1;
371
- rp->p.shrinking = 1;
372
- rp->p.nr_weight = 0;
373
- return rp;
374
- }
375
-
376
- /*
377
- * Creates a new, empty SVMProblem object.
378
- */
379
- struct RSVM_Problem *newProblem()
380
- {
381
- struct RSVM_Problem *rprob = (struct RSVM_Problem *) calloc(sizeof(struct RSVM_Problem), 1);
382
- rprob->prob.l = 0;
383
- rprob->prob.x = NULL;
384
- rprob->prob.y = NULL;
385
- obstack_init(&rprob->xs);
386
- obstack_init(&rprob->ys);
387
- return rprob;
388
- }
389
-
390
- /*
391
- * Adds an example to an SVMProblem given a target value and an input vector.
392
- */
393
- void addExample(struct RSVM_Problem *rp, double y, struct svm_node *x)
394
- {
395
- obstack_grow(&rp->ys, &y, sizeof(double));
396
- obstack_grow(&rp->xs, &x, sizeof(struct svm_node *));
397
- }
398
-
399
- void syncProblem(struct RSVM_Problem *rp)
400
- {
401
- rp->prob.l = obstack_object_size(&rp->ys) / sizeof(double);
402
- rp->prob.y = (double *) obstack_base(&rp->ys);
403
- rp->prob.x = (struct svm_node **) obstack_base(&rp->xs);
404
- }
405
-
406
- /*
407
- * Frees an SVMModel
408
- */
409
- static void svmpm_free(void *ptr)
410
- {
411
- struct RSVM_Model *rp = (struct RSVM_Model *) ptr;
412
- svm_destroy_model(rp->m);
413
- free(rp);
414
- }
415
-
416
- /*
417
- * Frees an SVMParameter
418
- */
419
- static void svmpa_free(void *ptr)
420
- {
421
- struct RSVM_Parameter *rp = (struct RSVM_Parameter *) ptr;
422
- free(rp);
423
- }
424
-
425
- /*
426
- * Frees an SVMProblem
427
- */
428
- static void svmpr_free(void *ptr)
429
- {
430
- struct RSVM_Problem *rp = (struct RSVM_Problem *) ptr;
431
- int i;
432
- syncProblem(rp);
433
- for (i = 0; i < rp->prob.l; ++i)
434
- free(rp->prob.x[i]);
435
- obstack_free(&(rp->xs),NULL);
436
- obstack_free(&(rp->ys),NULL);
437
- free(rp);
438
- }
439
-
440
- /*
441
- * Creates a new SVMParameter object.
442
- * Uses the following default values:
443
- * * svm_type = C_SVC
444
- * * kernel_type = RBF
445
- * * degree = 3
446
- * * gamma = 1 / k (0 means this also)
447
- * * coef0 = 0
448
- * * nu = 0.5
449
- * * cache_size = 40
450
- * * C = 1
451
- * * eps = 1e-3
452
- * * p = 0.1
453
- * * shrinking = 1
454
- * * nr_weight = 0
455
- */
456
- VALUE svmpa_new(VALUE cl)
457
- {
458
- struct RSVM_Parameter *rp = newParameter();
459
- VALUE tdata = Data_Wrap_Struct(cl, 0, svmpa_free, rp);
460
- printf("In the new!!\n");
461
- rb_obj_call_init(tdata, 0, NULL);
462
- return tdata;
463
- }
464
-
465
- #ifdef HAVE_DEFINE_ALLOC_FUNCTION
466
- static VALUE svmpa_allocate(VALUE kl) {
467
- return svmpa_new(kl);
468
- }
469
- #endif
470
-
471
- /*
472
- * Creates a new, empty SVMProblem object.
473
- */
474
- VALUE svmpr_new(VALUE cl)
475
- {
476
- struct RSVM_Problem *rp = newProblem();
477
- VALUE tdata = Data_Wrap_Struct(cl, 0, svmpr_free, rp);
478
- rb_obj_call_init(tdata, 0, NULL);
479
- return tdata;
480
- }
481
-
482
- /*
483
- * Trains an SVM according to a given problem set and parameter specification
484
- */
485
- VALUE svmpm_new(VALUE cl, VALUE prob, VALUE par)
486
- {
487
- struct RSVM_Model *rp = (struct RSVM_Model *) calloc(sizeof(struct RSVM_Model), 1);
488
- struct RSVM_Problem *cpro;
489
- struct RSVM_Parameter *cpa;
490
- bool defgamma = false;
491
- VALUE tdata = Data_Wrap_Struct(cl, 0, svmpm_free, rp);
492
- Data_Get_Struct(prob, struct RSVM_Problem, cpro);
493
- Data_Get_Struct(par, struct RSVM_Parameter, cpa);
494
- syncProblem(cpro);
495
- if (cpa->p.gamma == 0)
496
- defgamma = true;
497
- if (defgamma)
498
- cpa->p.gamma = 1.0 / (double) cpro->k;
499
- cpa->p.probability = 1;
500
- rp->m = svm_train(&cpro->prob, &cpa->p);
501
- if (defgamma)
502
- cpa->p.gamma = 0;
503
- rb_obj_call_init(tdata, 0, NULL);
504
- return tdata;
505
- }
506
-
507
- static VALUE svmpm_predict_values(VALUE self, VALUE xs)
508
- {
509
- struct RSVM_Model *rp;
510
- double *pe;
511
- struct svm_node *x = rubyArrayToNodelist(xs);
512
- int i, nr_class, numvals;
513
- VALUE decvals;
514
- Data_Get_Struct(self, struct RSVM_Model, rp);
515
- nr_class = svm_get_nr_class(rp->m);
516
- decvals = rb_ary_new();
517
- numvals = (nr_class * (nr_class - 1))/2;
518
- pe = (double *) calloc(numvals, sizeof(double));
519
- svm_predict_values(rp->m, x, pe);
520
- for (i = 0; i < numvals; i += 1)
521
- rb_ary_push(decvals, rb_float_new(pe[i]));
522
- free(pe);
523
- return decvals;
524
- }
525
-
526
- static VALUE svmpm_predict_probability(VALUE self, VALUE xs)
527
- {
528
- double result;
529
- struct RSVM_Model *rp;
530
- double *pe;
531
- struct svm_node *x = rubyArrayToNodelist(xs);
532
- int i;
533
- VALUE probs, retval;
534
- retval = rb_ary_new();
535
- Data_Get_Struct(self, struct RSVM_Model, rp);
536
- probs = rb_ary_new();
537
- pe = (double *) calloc(svm_get_nr_class(rp->m), sizeof(double));
538
- result = svm_predict_probability(rp->m, x, pe);
539
- for (i = 0; i < svm_get_nr_class(rp->m); i += 1)
540
- rb_ary_push(probs, rb_float_new(pe[i]));
541
- free(pe);
542
- rb_ary_push(retval, rb_float_new(result));
543
- rb_ary_push(retval, probs);
544
- return retval;
545
- }
546
-
547
- /*
548
- * Predicts a value (regression or classification) based on an input vector
549
- */
550
- static VALUE svmpm_predict(VALUE self, VALUE xs)
551
- {
552
- double result;
553
- struct RSVM_Model *rp;
554
- Data_Get_Struct(self, struct RSVM_Model, rp);
555
- struct svm_node *x = rubyArrayToNodelist(xs);
556
- result = svm_predict(rp->m, x);
557
- free(x);
558
- return rb_float_new(result);
559
- }
560
-
561
- /*
562
- * Predicts a value (regression or classification) based on an input vector
563
- */
564
- static VALUE svmpm_predict2(VALUE self, VALUE xs)
565
- {
566
- double result;
567
- struct RSVM_Model *rp;
568
- Data_Get_Struct(self, struct RSVM_Model, rp);
569
- struct svm_node *x = rubyArrayToNodelist2(xs);
570
- result = svm_predict(rp->m, x);
571
- free(x);
572
- return rb_float_new(result);
573
- }
574
-
575
- static VALUE svmpm_save(VALUE self, VALUE filename)
576
- {
577
- int result;
578
- struct RSVM_Model *rp;
579
- Data_Get_Struct(self, struct RSVM_Model, rp);
580
- char *name = STR2CSTR(filename);
581
- result = svm_save_model(name, rp->m);
582
- return INT2FIX(result);
583
- }
584
-
585
- static VALUE svmpm_load(VALUE cl, VALUE filename)
586
- {
587
- struct RSVM_Model *rp = (struct RSVM_Model *) calloc(sizeof(struct RSVM_Model), 1);
588
- char *name = STR2CSTR(filename);
589
- struct svm_model * model = svm_load_model(name);
590
- rp->m = model;
591
- return Data_Wrap_Struct(cl, 0, svmpm_free, rp);
592
- }
593
-
594
- /*
595
- * Initializes an SVMModel
596
- */
597
- static VALUE svmpm_init(VALUE self)
598
- {
599
- return self;
600
- }
601
-
602
- /*
603
- * Initializes an SVMParameter
604
- */
605
- static VALUE svmpa_init(VALUE self)
606
- {
607
- return self;
608
- }
609
-
610
- /*
611
- * Initializes an SVMProblem
612
- */
613
- static VALUE svmpr_init(VALUE self)
614
- {
615
- return self;
616
- }
617
-
618
- /*
619
- * Returns the number of samples in an SVMProblem
620
- */
621
- static VALUE svmpr_size(VALUE self)
622
- {
623
- struct RSVM_Problem *rp;
624
- Data_Get_Struct(self, struct RSVM_Problem, rp);
625
- syncProblem(rp);
626
- return INT2FIX(rp->prob.l);
627
- }
628
-
629
- /*
630
- * Returns the number of Support Vectors in an SVMModel
631
- */
632
- static VALUE svmpm_svcount(VALUE self)
633
- {
634
- struct RSVM_Model *rp;
635
- Data_Get_Struct(self, struct RSVM_Model, rp);
636
- return INT2FIX(getSVCount(rp->m));
637
- }
638
-
639
- /*
640
- * Adds a training example to an SVMProblem
641
- */
642
- static VALUE svmpr_addex(VALUE self, VALUE y, VALUE xs)
643
- {
644
- struct RSVM_Problem *rp;
645
- struct svm_node *fini;
646
- double yd;
647
- Data_Get_Struct(self, struct RSVM_Problem, rp);
648
- yd = NUM2DBL(y);
649
- fini = rubyArrayToNodelist(xs);
650
- addExample(rp, yd, fini);
651
- if (rp->k == 0) rp->k = RARRAY(xs)->len;
652
- return Qnil;
653
- }
654
-
655
- static VALUE svmpr_addex2(VALUE self, VALUE y, VALUE xs)
656
- {
657
- struct RSVM_Problem *rp;
658
- struct svm_node *fini;
659
- double yd;
660
- Data_Get_Struct(self, struct RSVM_Problem, rp);
661
- yd = NUM2DBL(y);
662
- fini = rubyArrayToNodelist2(xs);
663
- addExample(rp, yd, fini);
664
- if (rp->k == 0) rp->k = RARRAY(xs)->len;
665
- return Qnil;
666
- }
667
-
668
- /* To be removed in next version */
669
- struct svm_model
670
- {
671
- svm_parameter param; // parameter
672
- int nr_class; // number of classes, = 2 in regression/one class svm
673
- int l; // total #SV
674
- svm_node **SV; // SVs (SV[l])
675
- double **sv_coef; // coefficients for SVs in decision functions (sv_coef[n-1][l])
676
- double *rho; // constants in decision functions (rho[n*(n-1)/2])
677
-
678
- // for classification only
679
-
680
- int *label; // label of each class (label[n])
681
- int *nSV; // number of SVs for each class (nSV[n])
682
- // nSV[0] + nSV[1] + ... + nSV[n-1] = l
683
- // XXX
684
- int free_sv; // 1 if svm_model is created by svm_load_model
685
- // 0 if svm_model is created by svm_train
686
- };
687
-
688
- static int getSVCount(struct svm_model *m)
689
- {
690
- return m->l;
691
- }
692
-
693
- extern "C" {
694
- void Init_SVM();
695
- };
696
-
697
- void Init_SVM()
698
- {
699
- #ifdef QUIETFUNC
700
- svm_set_verbosity(0);
701
- #endif
702
- mSVM = rb_define_module("SVM");
703
- cSVMProblem = rb_define_class_under(mSVM, "Problem", rb_cObject);
704
- cSVMParameter = rb_define_class_under(mSVM, "Parameter", rb_cObject);
705
- cSVMModel = rb_define_class_under(mSVM, "Model", rb_cObject);
706
-
707
-
708
- rb_define_singleton_method(cSVMProblem, "new", (VALUE (*) (...))svmpr_new, 0);
709
- rb_define_method(cSVMProblem, "initialize", (VALUE (*) (...))svmpr_init, 0);
710
- rb_define_method(cSVMProblem, "size", (VALUE (*) (...))svmpr_size, 0);
711
- rb_define_method(cSVMProblem, "addExample", (VALUE (*) (...))svmpr_addex, 2);
712
- rb_define_method(cSVMProblem, "addExample2", (VALUE (*) (...))svmpr_addex2, 2);
713
-
714
- rb_define_const(mSVM, "C_SVC", INT2FIX(C_SVC));
715
- rb_define_const(mSVM, "NU_SVC", INT2FIX(NU_SVC));
716
- rb_define_const(mSVM, "ONE_CLASS", INT2FIX(ONE_CLASS));
717
- rb_define_const(mSVM, "EPSILON_SVR", INT2FIX(EPSILON_SVR));
718
- rb_define_const(mSVM, "NU_SVR", INT2FIX(NU_SVR));
719
- rb_define_const(mSVM, "LINEAR", INT2FIX(LINEAR));
720
- rb_define_const(mSVM, "POLY", INT2FIX(POLY));
721
- rb_define_const(mSVM, "RBF", INT2FIX(RBF));
722
- rb_define_const(mSVM, "SIGMOID", INT2FIX(SIGMOID));
723
-
724
- rb_define_singleton_method(cSVMParameter, "new", (VALUE (*) (...))svmpa_new, 0);
725
- rb_define_method(cSVMParameter, "degree", (VALUE (*) (...))svmpa_degree, 0);
726
- rb_define_method(cSVMParameter, "degree=", (VALUE (*) (...))svmpa_degreeeq, 1);
727
- rb_define_method(cSVMParameter, "gamma", (VALUE (*) (...))svmpa_gamma, 0);
728
- rb_define_method(cSVMParameter, "gamma=", (VALUE (*) (...))svmpa_gammaeq, 1);
729
- rb_define_method(cSVMParameter, "coef0", (VALUE (*) (...))svmpa_coef0, 0);
730
- rb_define_method(cSVMParameter, "coef0=", (VALUE (*) (...))svmpa_coef0eq, 1);
731
- rb_define_method(cSVMParameter, "probability", (VALUE (*) (...))svmpa_probability, 0);
732
- rb_define_method(cSVMParameter, "probability=", (VALUE (*) (...))svmpa_probabilityeq, 1);
733
- rb_define_method(cSVMParameter, "cache_size", (VALUE (*) (...))svmpa_cache_size, 0);
734
- rb_define_method(cSVMParameter, "cache_size=", (VALUE (*) (...))svmpa_cache_sizeeq, 1);
735
- rb_define_method(cSVMParameter, "eps", (VALUE (*) (...))svmpa_eps, 0);
736
- rb_define_method(cSVMParameter, "eps=", (VALUE (*) (...))svmpa_epseq, 1);
737
- rb_define_method(cSVMParameter, "C", (VALUE (*) (...))svmpa_C, 0);
738
- rb_define_method(cSVMParameter, "C=", (VALUE (*) (...))svmpa_Ceq, 1);
739
- rb_define_method(cSVMParameter, "nu", (VALUE (*) (...))svmpa_nu, 0);
740
- rb_define_method(cSVMParameter, "nu=", (VALUE (*) (...))svmpa_nueq, 1);
741
- rb_define_method(cSVMParameter, "p", (VALUE (*) (...))svmpa_p, 0);
742
- rb_define_method(cSVMParameter, "p=", (VALUE (*) (...))svmpa_peq, 1);
743
- rb_define_method(cSVMParameter, "kernel_type", (VALUE (*) (...))svmpa_kernel_type, 0);
744
- rb_define_method(cSVMParameter, "kernel_type=", (VALUE (*) (...))svmpa_kernel_typeeq, 1);
745
- rb_define_method(cSVMParameter, "svm_type", (VALUE (*) (...))svmpa_svm_type, 0);
746
- rb_define_method(cSVMParameter, "svm_type=", (VALUE (*) (...))svmpa_svm_typeeq, 1);
747
- /*
748
- rb_define_method(cSVMParameter, "_dump_data", (VALUE (*) (...))svmpa_svm_dump_data, 0);
749
-
750
- rb_define_method(cSVMParameter, "_load_data", (VALUE (*) (...))svmpa_svm_load_data, 1);
751
- */
752
- rb_define_method(cSVMParameter, "_dump", (VALUE (*) (...))svmpa_svm_dump, 1);
753
-
754
- rb_define_singleton_method(cSVMParameter, "_load", (VALUE (*) (...))svmpa_svm_load, 1);
755
-
756
- #ifdef HAVE_DEFINE_ALLOC_FUNCTION
757
- rb_define_alloc_func(cSVMModel, svmpa_allocate);
758
- #endif
759
- /*rb_undef_alloc_func(cSVMModel); */
760
- /* rb_add_method(cSVMModel, ID_ALLOCATOR, NEW_CFUNC(svmpa_allocate, 0), NOEX_PRIVATE | NOEX_CFUNC); */
761
- /* rb_define_singleton_method(cSVMModel, "allocate", (VALUE (*) (...))svmpa_allocate, 1);
762
- rb_define_singleton_method(cSVMModel, "_alloc", (VALUE (*) (...))svmpa_allocate, 1);
763
- */
764
-
765
- rb_define_singleton_method(cSVMModel, "new", (VALUE (*) (...))svmpm_new, 2);
766
- rb_define_method(cSVMModel, "predict", (VALUE (*) (...))svmpm_predict, 1);
767
- rb_define_method(cSVMModel, "predict2", (VALUE (*) (...))svmpm_predict2, 1);
768
- rb_define_method(cSVMModel, "predict_probability", (VALUE (*) (...))svmpm_predict_probability, 1);
769
- rb_define_method(cSVMModel, "predict_values", (VALUE (*) (...))svmpm_predict_values, 1);
770
- rb_define_method(cSVMModel, "svcount", (VALUE (*) (...))svmpm_svcount, 0);
771
- rb_define_method(cSVMModel, "save", (VALUE (*) (...))svmpm_save, 1);
772
- rb_define_singleton_method(cSVMModel, "load", (VALUE (*) (...))svmpm_load, 1);
773
- cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
774
- }
775
-