svmredlight 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document CHANGED
@@ -1,5 +1,6 @@
1
1
  lib/**/*.rb
2
2
  bin/*
3
+ ext/*.c
3
4
  -
4
5
  features/**/*.feature
5
6
  LICENSE.txt
data/README.rdoc CHANGED
@@ -7,9 +7,15 @@ A partial interface to SVM-light [http://svmlight.joachims.org/] using it you ca
7
7
 
8
8
  As of now it's know to work with SVM 6.02.
9
9
 
10
+ === Installing svmlight as a library
11
+
10
12
  Make sure to build the libsvmlight.o version of svmlight by using
11
13
  "make libsvmlight_hideo".
12
14
 
15
+ Make sure the .h files in the svmlight distribution are in your include path, inside a subdirectory called svm_light,
16
+ and the object code for the library is in your include path (/usr/lib for instance).
17
+
18
+
13
19
 
14
20
  == Document
15
21
 
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ require 'rake'
14
14
  require 'jeweler'
15
15
  Jeweler::Tasks.new do |gem|
16
16
  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
- gem.version = '0.1.0'
17
+ gem.version = '0.1.1'
18
18
  gem.name = "svmredlight"
19
19
  gem.homepage = "http://github.com/camilo/svmredlight"
20
20
  gem.license = "MIT"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
data/ext/svmredlight.c CHANGED
@@ -9,13 +9,10 @@ is_linear(MODEL *model){
9
9
  return model->kernel_parm.kernel_type == 0;
10
10
  }
11
11
 
12
- // Modules and Classes
13
12
  static VALUE rb_mSvmLight;
14
13
  static VALUE rb_cModel;
15
14
  static VALUE rb_cDocument;
16
15
 
17
- // GC functions
18
-
19
16
  /* Not using deep free anymore, let ruby call free on the documents otherwise we might end
20
17
  * up having double free problems, from svm_learn_main: Warning: The model contains
21
18
  * references to the original data 'docs'. If you want to free the original data, and
@@ -52,11 +49,12 @@ model_read_from_file(VALUE klass, VALUE filename){
52
49
  /* Helper function type checks a string meant to be used as a learn_parm, in case of error
53
50
  * returns 1 and sets the correct exception message in error, on success returns 0 and
54
51
  * copies the c string data of new_val to target*/
55
- int check_string_param(VALUE new_val,
56
- const char *default_val,
57
- char *target,
58
- const char *name,
59
- char *error){
52
+ int
53
+ check_string_param(VALUE new_val,
54
+ const char *default_val,
55
+ char *target,
56
+ const char *name,
57
+ char *error){
60
58
 
61
59
  if(TYPE(new_val) == T_STRING){
62
60
  strlcpy(target, StringValuePtr(new_val), 199);
@@ -73,7 +71,8 @@ int check_string_param(VALUE new_val,
73
71
  /* Helper function type checks a long meant to be used as a learn_parm or kernel_parm, in
74
72
  * case of error returns 1 and sets the correct exception message in error, on success
75
73
  * returns 0 and copies the c string data of new_val to target*/
76
- int check_long_param(VALUE new_val,
74
+ int
75
+ check_long_param(VALUE new_val,
77
76
  long default_val,
78
77
  long *target,
79
78
  const char *name,
@@ -93,11 +92,12 @@ int check_long_param(VALUE new_val,
93
92
  /* Helper function type checks a double meant to be used as a learn_parm or kernel_parm, in
94
93
  * case of error returns 1 and sets the correct exception message in error, on success
95
94
  * returns 0 and copies the c string data of new_val to target*/
96
- int check_double_param(VALUE new_val,
97
- double default_val,
98
- double *target,
99
- const char *name,
100
- char *error){
95
+ int
96
+ check_double_param(VALUE new_val,
97
+ double default_val,
98
+ double *target,
99
+ const char *name,
100
+ char *error){
101
101
  if(TYPE(new_val) == T_FLOAT || TYPE(new_val) == T_FIXNUM){
102
102
  *target = NUM2DBL(new_val);
103
103
  }else if(NIL_P(new_val) ){
@@ -113,11 +113,12 @@ int check_double_param(VALUE new_val,
113
113
  /* Helper function type checks an int meant to be used as a boolean learn_parm or
114
114
  * kernel_parm, in case of error returns 1 and sets the correct exception message in
115
115
  * error, on success returns 0 and copies the c string data of new_val to target*/
116
- int check_bool_param(VALUE new_val,
117
- long default_val,
118
- long *target,
119
- const char *name,
120
- char *error){
116
+ int
117
+ check_bool_param(VALUE new_val,
118
+ long default_val,
119
+ long *target,
120
+ const char *name,
121
+ char *error){
121
122
  if(TYPE(new_val) == T_TRUE){
122
123
  *target = 1L;
123
124
  }else if(TYPE(new_val) == T_FALSE){
@@ -134,208 +135,209 @@ int check_bool_param(VALUE new_val,
134
135
 
135
136
  /* Helper function in charge of setting up the learn parameters before they are passed to
136
137
  * the svm_learn_classification copies part of the logic in svm_learn_main.c */
137
- int setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_message){
138
+ int
139
+ setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_message){
138
140
  // Defaults taken from from svm_learn_main
139
141
  VALUE inter_val, temp_ary, svm_type, svm_type_ruby_str;
140
142
  char *svm_type_str;
141
143
 
142
144
  inter_val = rb_hash_aref(r_hash, rb_str_new2("predfile"));
143
145
  if(1 == check_string_param(inter_val,
144
- "trans_predictions",
145
- &c_learn_param->predfile,
146
- "predfile",
147
- error_message)){
146
+ "trans_predictions",
147
+ (char *)&c_learn_param->predfile,
148
+ "predfile",
149
+ error_message)){
148
150
  return 1;
149
151
  }
150
152
 
151
153
  inter_val = rb_hash_aref(r_hash, rb_str_new2("alphafile"));
152
154
  if(1 == check_string_param(inter_val,
153
- "",
154
- &c_learn_param->alphafile,
155
- "alphafile",
156
- error_message)){
155
+ "",
156
+ (char*)&c_learn_param->alphafile,
157
+ "alphafile",
158
+ error_message)){
157
159
  return 1;
158
160
  }
159
161
 
160
162
  inter_val = rb_hash_aref(r_hash, rb_str_new2("biased_hyperplane"));
161
163
  if(1 == check_bool_param(inter_val,
162
- 1L,
163
- &(c_learn_param->biased_hyperplane),
164
- "biased_hyperplane",
165
- error_message)){
164
+ 1L,
165
+ &(c_learn_param->biased_hyperplane),
166
+ "biased_hyperplane",
167
+ error_message)){
166
168
  return 1;
167
169
  }
168
170
 
169
171
  inter_val = rb_hash_aref(r_hash, rb_str_new2("sharedslack"));
170
172
  if(1 == check_bool_param(inter_val,
171
- 0L,
172
- &(c_learn_param->sharedslack),
173
- "sharedslack",
174
- error_message)){
173
+ 0L,
174
+ &(c_learn_param->sharedslack),
175
+ "sharedslack",
176
+ error_message)){
175
177
  return 1;
176
178
  }
177
179
 
178
180
  inter_val = rb_hash_aref(r_hash, rb_str_new2("remove_inconsistent"));
179
181
  if(1 == check_bool_param(inter_val,
180
- 0L,
181
- &(c_learn_param->remove_inconsistent),
182
- "remove_inconsistent",
183
- error_message)){
182
+ 0L,
183
+ &(c_learn_param->remove_inconsistent),
184
+ "remove_inconsistent",
185
+ error_message)){
184
186
  return 1;
185
187
  }
186
188
 
187
189
  inter_val = rb_hash_aref(r_hash, rb_str_new2("skip_final_opt_check"));
188
190
  if(1 == check_bool_param(inter_val,
189
- 0L,
190
- &(c_learn_param->skip_final_opt_check),
191
- "skip_final_opt_check",
192
- error_message)){
191
+ 0L,
192
+ &(c_learn_param->skip_final_opt_check),
193
+ "skip_final_opt_check",
194
+ error_message)){
193
195
  return 1;
194
196
  }
195
197
 
196
198
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_newvarsinqp"));
197
199
  if(1 == check_bool_param(inter_val,
198
- 0L,
199
- &(c_learn_param->svm_newvarsinqp),
200
- "svm_newvarsinqp",
201
- error_message)){
200
+ 0L,
201
+ &(c_learn_param->svm_newvarsinqp),
202
+ "svm_newvarsinqp",
203
+ error_message)){
202
204
  return 1;
203
205
  }
204
206
 
205
207
  inter_val = rb_hash_aref(r_hash, rb_str_new2("compute_loo"));
206
208
  if(1 == check_bool_param(inter_val,
207
- 0L,
208
- &(c_learn_param->compute_loo),
209
- "compute_loo",
210
- error_message)){
209
+ 0L,
210
+ &(c_learn_param->compute_loo),
211
+ "compute_loo",
212
+ error_message)){
211
213
  return 1;
212
214
  }
213
215
 
214
216
 
215
217
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_maxqpsize"));
216
218
  if(1 == check_long_param(inter_val,
217
- 10L,
218
- &(c_learn_param->svm_maxqpsize),
219
- "svm_maxqpsize",
220
- error_message)){
219
+ 10L,
220
+ &(c_learn_param->svm_maxqpsize),
221
+ "svm_maxqpsize",
222
+ error_message)){
221
223
  return 1;
222
224
  }
223
225
 
224
226
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_iter_to_shrink"));
225
227
  if(1 == check_long_param(inter_val,
226
- -9999,
227
- &(c_learn_param->svm_iter_to_shrink),
228
- "svm_iter_to_shrink",
229
- error_message)){
228
+ -9999,
229
+ &(c_learn_param->svm_iter_to_shrink),
230
+ "svm_iter_to_shrink",
231
+ error_message)){
230
232
  return 1;
231
233
  }
232
234
 
233
235
  inter_val = rb_hash_aref(r_hash, rb_str_new2("maxiter"));
234
236
  if(1 == check_long_param(inter_val,
235
- 100000,
236
- &(c_learn_param->maxiter),
237
- "maxiter",
238
- error_message)){
237
+ 100000,
238
+ &(c_learn_param->maxiter),
239
+ "maxiter",
240
+ error_message)){
239
241
  return 1;
240
242
  }
241
243
 
242
244
  inter_val = rb_hash_aref(r_hash, rb_str_new2("kernel_cache_size"));
243
245
  if(1 == check_long_param(inter_val,
244
- 40L,
245
- &(c_learn_param->kernel_cache_size),
246
- "kernel_cache_size",
247
- error_message)){
246
+ 40L,
247
+ &(c_learn_param->kernel_cache_size),
248
+ "kernel_cache_size",
249
+ error_message)){
248
250
  return 1;
249
251
  }
250
252
 
251
253
  inter_val = rb_hash_aref(r_hash, rb_str_new2("xa_depth"));
252
254
  if(1 == check_long_param(inter_val,
253
- 0L,
254
- &(c_learn_param->xa_depth),
255
- "xa_depth",
256
- error_message)){
255
+ 0L,
256
+ &(c_learn_param->xa_depth),
257
+ "xa_depth",
258
+ error_message)){
257
259
  return 1;
258
260
  }
259
261
 
260
262
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_c"));
261
263
  if(1 == check_double_param(inter_val,
262
- 0.0,
263
- &(c_learn_param->svm_c),
264
- "svm_c",
265
- error_message)){
264
+ 0.0,
265
+ &(c_learn_param->svm_c),
266
+ "svm_c",
267
+ error_message)){
266
268
  return 1;
267
269
  }
268
270
 
269
271
  inter_val = rb_hash_aref(r_hash, rb_str_new2("eps"));
270
272
  if(1 == check_double_param(inter_val,
271
- 0.1,
272
- &(c_learn_param->eps),
273
- "eps",
274
- error_message)){
273
+ 0.1,
274
+ &(c_learn_param->eps),
275
+ "eps",
276
+ error_message)){
275
277
  return 1;
276
278
  }
277
279
 
278
280
  inter_val = rb_hash_aref(r_hash, rb_str_new2("transduction_posratio"));
279
281
  if(1 == check_double_param(inter_val,
280
- -1.0,
281
- &(c_learn_param->transduction_posratio),
282
- "transduction_posratio",
283
- error_message)){
282
+ -1.0,
283
+ &(c_learn_param->transduction_posratio),
284
+ "transduction_posratio",
285
+ error_message)){
284
286
  return 1;
285
287
  }
286
288
 
287
289
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_costratio"));
288
290
  if(1 == check_double_param(inter_val,
289
- 1.0,
290
- &(c_learn_param->svm_costratio),
291
- "svm_costratio",
292
- error_message)){
291
+ 1.0,
292
+ &(c_learn_param->svm_costratio),
293
+ "svm_costratio",
294
+ error_message)){
293
295
  return 1;
294
296
  }
295
297
 
296
298
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_costratio_unlab"));
297
299
  if(1 == check_double_param(inter_val,
298
- 1.0,
299
- &(c_learn_param->svm_costratio_unlab),
300
- "svm_costratio_unlab",
301
- error_message)){
300
+ 1.0,
301
+ &(c_learn_param->svm_costratio_unlab),
302
+ "svm_costratio_unlab",
303
+ error_message)){
302
304
  return 1;
303
305
  }
304
306
 
305
307
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_unlabbound"));
306
308
  if(1 == check_double_param(inter_val,
307
- 1.0000000000000001e-05,
308
- &(c_learn_param->svm_unlabbound),
309
- "svm_unlabbound",
310
- error_message)){
309
+ 1.0000000000000001e-05,
310
+ &(c_learn_param->svm_unlabbound),
311
+ "svm_unlabbound",
312
+ error_message)){
311
313
  return 1;
312
314
  }
313
315
 
314
316
  inter_val = rb_hash_aref(r_hash, rb_str_new2("epsilon_crit"));
315
317
  if(1 == check_double_param(inter_val,
316
- 0.001,
317
- &(c_learn_param->epsilon_crit),
318
- "epsilon_crit",
319
- error_message)){
318
+ 0.001,
319
+ &(c_learn_param->epsilon_crit),
320
+ "epsilon_crit",
321
+ error_message)){
320
322
  return 1;
321
323
  }
322
324
 
323
325
  inter_val = rb_hash_aref(r_hash, rb_str_new2("epsilon_a"));
324
326
  if(1 == check_double_param(inter_val,
325
- 1E-15,
326
- &(c_learn_param->epsilon_a),
327
- "epsilon_a",
328
- error_message)){
327
+ 1E-15,
328
+ &(c_learn_param->epsilon_a),
329
+ "epsilon_a",
330
+ error_message)){
329
331
  return 1;
330
332
  }
331
333
 
332
334
  c_learn_param->rho=1.0;
333
335
  inter_val = rb_hash_aref(r_hash, rb_str_new2("rho"));
334
336
  if(1 == check_double_param(inter_val,
335
- 1.0,
336
- &(c_learn_param->rho),
337
- "rho",
338
- error_message)){
337
+ 1.0,
338
+ &(c_learn_param->rho),
339
+ "rho",
340
+ error_message)){
339
341
  return 1;
340
342
  }
341
343
 
@@ -343,41 +345,42 @@ int setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_mess
343
345
  return 0;
344
346
  }
345
347
 
346
- int setup_kernel_params(KERNEL_PARM *c_kernel_param, VALUE r_hash, char *error_message){
348
+ int
349
+ setup_kernel_params(KERNEL_PARM *c_kernel_param, VALUE r_hash, char *error_message){
347
350
  VALUE inter_val;
348
351
  inter_val = rb_hash_aref(r_hash, rb_str_new2("poly_degree"));
349
352
  if(1 == check_long_param(inter_val,
350
- 3L,
351
- &(c_kernel_param->poly_degree),
352
- "poly_degree",
353
- error_message)){
353
+ 3L,
354
+ &(c_kernel_param->poly_degree),
355
+ "poly_degree",
356
+ error_message)){
354
357
  return 1;
355
358
  }
356
359
 
357
360
  inter_val = rb_hash_aref(r_hash, rb_str_new2("rbf_gamma"));
358
361
  if(1 == check_double_param(inter_val,
359
- 1.0,
360
- &(c_kernel_param->rbf_gamma),
361
- "rbf_gamma",
362
- error_message)){
362
+ 1.0,
363
+ &(c_kernel_param->rbf_gamma),
364
+ "rbf_gamma",
365
+ error_message)){
363
366
  return 1;
364
367
  }
365
368
 
366
369
  inter_val = rb_hash_aref(r_hash, rb_str_new2("coef_lin"));
367
370
  if(1 == check_double_param(inter_val,
368
- 1.0,
369
- &(c_kernel_param->coef_lin),
370
- "coef_lin",
371
- error_message)){
371
+ 1.0,
372
+ &(c_kernel_param->coef_lin),
373
+ "coef_lin",
374
+ error_message)){
372
375
  return 1;
373
376
  }
374
377
 
375
378
  inter_val = rb_hash_aref(r_hash, rb_str_new2("coef_const"));
376
379
  if(1 == check_double_param(inter_val,
377
- 1.0,
378
- &(c_kernel_param->coef_const),
379
- "coef_const",
380
- error_message)){
380
+ 1.0,
381
+ &(c_kernel_param->coef_const),
382
+ "coef_const",
383
+ error_message)){
381
384
  return 1;
382
385
  }
383
386
 
@@ -634,6 +637,18 @@ model_support_vectors_count(VALUE self){
634
637
  return INT2FIX(m->sv_num);
635
638
  }
636
639
 
640
+ static VALUE
641
+ model_write_to_file(VALUE self, VALUE pahtofile){
642
+ Check_Type(pahtofile, T_STRING);
643
+
644
+ MODEL *m;
645
+ Data_Get_Struct(self, MODEL, m);
646
+
647
+ write_model(StringValuePtr(pahtofile), m);
648
+
649
+ return Qnil;
650
+ }
651
+
637
652
  static VALUE
638
653
  model_total_words(VALUE self){
639
654
  MODEL *m;
@@ -745,8 +760,9 @@ Init_svmredlight(){
745
760
  rb_mSvmLight = rb_define_module("SVMLight");
746
761
  //Model
747
762
  rb_cModel = rb_define_class_under(rb_mSvmLight, "Model", rb_cObject);
748
- rb_define_singleton_method(rb_cModel, "read_from_file", model_read_from_file, 1);
763
+ rb_define_singleton_method(rb_cModel, "from_file", model_read_from_file, 1);
749
764
  rb_define_singleton_method(rb_cModel, "learn_classification", model_learn_classification, 5);
765
+ rb_define_method(rb_cModel, "to_file", model_write_to_file, 1);
750
766
  rb_define_method(rb_cModel, "support_vectors_count", model_support_vectors_count, 0);
751
767
  rb_define_method(rb_cModel, "total_words", model_total_words, 0);
752
768
  rb_define_method(rb_cModel, "classify", model_classify_example, 1);
data/lib/svmredlight.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  require File.dirname(__FILE__) + '/../ext/svmredlight'
2
- require 'svmredlight/model'
3
- require 'svmredlight/document'
2
+ require File.dirname(__FILE__) + '/svmredlight/model'
3
+ require File.dirname(__FILE__) + '/svmredlight/document'
4
4
 
@@ -1,9 +1,7 @@
1
1
  module SVMLight
2
- # A document is the Ruby representation of a DOC structure in SVMlight, it contains a
3
- # queryid, a slackid, a costfactor ( c ) and a vector with feature numbers and their
4
- # correspondent weights.
2
+ # A document is the Ruby representation of a DOC structure in SVMlight, it contains a queryid, a slackid, a costfactor
3
+ # ( c ) and a vector with feature numbers and their correspondent weights.
5
4
  class Document
6
-
7
5
  # @param [Hash] vector a hash where the keys are feature numbers and the values its weights
8
6
  # @param [Hash] opts the options coincide with SVMLight parameters to the create_example function, the default values for all the options are 0
9
7
  # @option [:docnum] Numeric docum
@@ -1,4 +1,6 @@
1
1
  module SVMLight
2
+
3
+ class MissingModelFile < StandardError; end
2
4
  # A model is the product of training a SVM, once created it can take documents as inputs
3
5
  # and act of them (by for instance classifying them). Models can also be read from files
4
6
  # created by svm_learn.
@@ -16,7 +18,48 @@ module SVMLight
16
18
 
17
19
  learn_classification(documents_and_lables, learn_params, kernel_params, false, alphas)
18
20
  end
19
-
21
+
20
22
  private_class_method :learn_classification
23
+ private_class_method :from_file
24
+
25
+ # in self.read_from_file and #write_to_file
26
+ #
27
+ # This is an anti-pattern. Checking for existence of resources is normally something to be avoided. Trying to open
28
+ # the resource and then rescuing the exception/reading the error code is a much better practice, however SVMLight
29
+ # will call exit(1) if the file does not exists, and, that in turn will kill the ruby VM, so in this case to
30
+ # minimize that possibility I'm optimistically check for the file existence and hope it is still there when it is
31
+ # actually time to open it.
32
+ #
33
+ # TODO: Come up with a proper replacement for those methods, probably simply reimplementing them in svmredlight.c
34
+ # and raising an exception when files cannot be open.
35
+
36
+ # Will load an existent model from a file
37
+ # @param [String] pahtofile path to the model file
38
+ def self.read_from_file(pahtofile)
39
+ if File.exists?(pahtofile) && File.file?(pahtofile)
40
+ from_file(pahtofile)
41
+
42
+ else
43
+
44
+ raise MissingModelFile, "the #{pahtofile} does not exists or is not a file"
45
+ end
46
+ end
47
+
48
+ private :to_file
49
+
50
+ # Will create a file containing the model info, the model info can be turn back into a model by using
51
+ # Model.read_from_file
52
+ # @param [String] pahtofile
53
+ def write_to_file(pahtofile)
54
+ dir = File.dirname(pahtofile)
55
+
56
+ if File.directory?(dir) && File.writable?(dir)
57
+ to_file(pahtofile)
58
+
59
+ else
60
+ raise ModelWriteError, "impossible to write #{pahtofile}"
61
+
62
+ end
63
+ end
21
64
  end
22
65
  end
data/svmredlight.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{svmredlight}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Camilo Lopez"]
12
- s.date = %q{2011-09-11}
12
+ s.date = %q{2011-09-22}
13
13
  s.description = %q{Ruby interface to SVMLight}
14
14
  s.email = %q{camilo@camilolopez.com}
15
15
  s.extensions = ["ext/extconf.rb"]
data/test/helper.rb CHANGED
@@ -8,6 +8,7 @@ rescue Bundler::BundlerError => e
8
8
  $stderr.puts "Run `bundle install` to install missing gems"
9
9
  exit e.status_code
10
10
  end
11
+
11
12
  require 'test/unit'
12
13
  require 'shoulda'
13
14
  require './lib/svmredlight'
@@ -3,53 +3,59 @@ include SVMLight
3
3
 
4
4
  class TestDocument < Test::Unit::TestCase
5
5
 
6
- def test_create
7
- d = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
8
- assert_kind_of Document, d
9
- end
10
-
11
- def test_create_should_accept_integer_as_feature_weight
12
- d = Document.create(0, 0.5, 1, 0, [[1, 0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
13
- assert_kind_of Document, d
14
- end
6
+ context "creating a new document" do
7
+
8
+ should "succed when using #create" do
9
+ d = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
10
+ assert_kind_of Document, d
11
+ end
12
+
13
+ should "accept integers as feature weights" do
14
+ d = Document.create(0, 0.5, 1, 0, [[1, 0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
15
+ assert_kind_of Document, d
16
+ end
17
+
18
+ should "create documents useing new as well" do
19
+ d = Document.new({1 => 566.0, 4 => 133.0}, {docnum: 10, slackid: 1, queryid: 2, costfactor: 0.5})
20
+
21
+ assert_equal 10, d.docnum
22
+ assert_equal 1, d.slackid
23
+ assert_equal 2, d.queryid
24
+ assert_equal 0.5, d.costfactor
25
+ end
26
+
27
+ should "raise argument error if any of the word numbers is less or equal to 0" do
28
+ assert_raise(ArgumentError){ Document.create(0, 0.5, 1, 0, [[0, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]]) }
29
+ assert_raise(ArgumentError){ Document.create(1, 0.5, 1, 0, [[-1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])}
30
+ end
31
+
32
+ should "raise type error when the fourth argument is not an array" do
33
+ assert_raise(TypeError) { Document.create(-1, 0, 1, 0, {}) }
34
+ end
35
+
36
+ should "raise type error when the fourth argument is empty" do
37
+ assert_raise(ArgumentError) { Document.create(-1, 0, 1, 0 [])}
38
+ end
15
39
 
16
- def test_create_using_new
17
- d = Document.new({1 => 566.0, 4 => 133.0}, {docnum: 10, slackid: 1, queryid: 2, costfactor: 0.5})
18
-
19
- assert_equal 10, d.docnum
20
- assert_equal 1, d.slackid
21
- assert_equal 2, d.queryid
22
- assert_equal 0.5, d.costfactor
23
40
  end
41
+
42
+ context 'a document' do
43
+ should "have accessible docnum, queryid, slackid, and, costfacor" do
44
+ d1 = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]])
45
+ d2 = Document.create(1, 0.6, 2, 1, [[1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])
24
46
 
25
- def test_should_be_able_to_access_properties
26
- d1 = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]])
27
- d2 = Document.create(1, 0.6, 2, 1, [[1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])
28
-
29
- assert_equal 0, d1.docnum
30
- assert_equal 1, d2.docnum
31
-
32
- assert_equal 1, d1.slackid
33
- assert_equal 2, d2.slackid
47
+ assert_equal 0, d1.docnum
48
+ assert_equal 1, d2.docnum
34
49
 
35
- assert_equal 0, d1.queryid
36
- assert_equal 1, d2.queryid
50
+ assert_equal 1, d1.slackid
51
+ assert_equal 2, d2.slackid
37
52
 
38
- assert_equal 0.5, d1.costfactor
39
- assert_equal 0.6, d2.costfactor
40
- end
41
-
42
- def test_all_word_numbers_should_be_greater_than_zero
43
- assert_raise(ArgumentError){ Document.create(0, 0.5, 1, 0, [[0, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]]) }
44
- assert_raise(ArgumentError){ Document.create(1, 0.5, 1, 0, [[-1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])}
45
- end
46
-
47
- def test_create_with_no_array
48
- assert_raise(TypeError) { Document.create(-1, 0, 1, 0, {}) }
49
- end
53
+ assert_equal 0, d1.queryid
54
+ assert_equal 1, d2.queryid
50
55
 
51
- def test_create_with_empty_array
52
- assert_raise(ArgumentError) { Document.create(-1, 0, 1, 0 [])}
56
+ assert_equal 0.5, d1.costfactor
57
+ assert_equal 0.6, d2.costfactor
58
+ end
53
59
  end
54
60
  end
55
61
 
data/test/test_model.rb CHANGED
@@ -3,112 +3,157 @@ include SVMLight
3
3
 
4
4
  class TestModel < Test::Unit::TestCase
5
5
 
6
- def setup
7
- @features ||= [
8
- [ [1,0.6], [11, 0.0], [34, 0.1] ],
9
- [ [5,0.4], [15, 0.0], [30, 0.1] ],
10
- [ [1,0.1], [13, 0.0], [31, 0.1] ],
11
- [ [7,0.7], [15, 0.0], [35, 0.1] ],
12
- [ [5,0.6], [19, 0.0], [44, 0.1] ],
13
- ]
14
- @docs_and_labels ||= @features.each_with_index.map{|f,i| [ Document.create(i + 1, 1, 0, 0, f), i%2 * -1]}
15
- end
6
+ context "reading a model from file" do
7
+
8
+ setup do
9
+ @file_name = 'test/assets/model'
10
+ end
11
+
12
+ should "read properly from a well formed file" do
13
+ assert m = Model.read_from_file(@file_name)
14
+ assert_equal 3877, m.support_vectors_count
15
+ assert_equal 39118, m.total_words
16
+ end
16
17
 
17
- def test_learn_classification_with_alpha
18
- m = Model.new(:classification, @docs_and_labels, {}, {}, [1, 0.0] * 50)
19
- assert_kind_of Model, m
18
+ should "classify successfully after reading the model from a file" do
19
+ m = Model.read_from_file(@file_name)
20
20
 
21
- @docs_and_labels.each_with_index do |item, i|
22
- assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
21
+ assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1 ,v.to_f]} ) )
22
+ assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
23
+ assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0, 0, 0, 0.8, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
24
+ assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0.5, 0, 0, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
25
+ end
26
+
27
+ should "raise file not found exception when file does not exists" do
28
+ assert_raises(MissingModelFile){ Model.read_from_file(@file_name + 'bleh') }
23
29
  end
24
30
  end
25
31
 
26
- def test_learn_classification
27
- m = Model.new(:classification, @docs_and_labels, {}, {}, nil)
28
- assert_kind_of Model, m
29
- assert_equal 44, m.total_words
30
- assert_equal 5, m.totdoc
32
+ context "writting a model to a file" do
33
+ setup do
34
+ @features ||= [
35
+ [ [1,0.6], [11, 0.0], [34, 0.1] ],
36
+ [ [5,0.4], [15, 0.0], [30, 0.1] ],
37
+ [ [1,0.1], [13, 0.0], [31, 0.1] ],
38
+ [ [7,0.7], [15, 0.0], [35, 0.1] ],
39
+ [ [5,0.6], [19, 0.0], [44, 0.1] ],
40
+ ]
41
+
42
+ @docs_and_labels ||= @features.each_with_index.map do |feature, index|
43
+ [ Document.create(index + 1, 1, 0, 0, feature), index%2 * -1]
44
+ end
45
+
46
+ @filepath = './test/assets/written_model'
47
+ @model = Model.new(:classification, @docs_and_labels, {}, {}, nil)
48
+ end
49
+
50
+ should "write a model from memmory to a file" do
51
+ @model.write_to_file(@filepath)
31
52
 
32
- @docs_and_labels.each_with_index do |item, i|
33
- assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
53
+ assert File.exists?(@filepath)
54
+ assert File.file?(@filepath)
55
+ # TODO: Implement actual model equality
56
+ assert_equal @model.support_vectors_count, Model.read_from_file(@filepath).support_vectors_count
34
57
  end
35
58
 
59
+ # Need to find a good way to test this without relaying too much in the environment
60
+ should "raise ModelWriteError when it is impossible to write a model file"
61
+
62
+ teardown do
63
+ `rm #{@filepath} &> /dev/null`
64
+ end
36
65
  end
37
66
 
38
- def test_learn_classification_with_learn_params
39
-
40
- learn_params = {
67
+ context "when learning from new documents" do
68
+
69
+ setup do
70
+ @features ||= [
71
+ [ [1,0.6], [11, 0.0], [34, 0.1] ],
72
+ [ [5,0.4], [15, 0.0], [30, 0.1] ],
73
+ [ [1,0.1], [13, 0.0], [31, 0.1] ],
74
+ [ [7,0.7], [15, 0.0], [35, 0.1] ],
75
+ [ [5,0.6], [19, 0.0], [44, 0.1] ],
76
+ ]
77
+
78
+ @docs_and_labels ||= @features.each_with_index.map do |feature, index|
79
+ [ Document.create(index + 1, 1, 0, 0, feature), index%2 * -1]
80
+ end
81
+ end
82
+
83
+ should "learn classification with default arguments" do
84
+ m = Model.new(:classification, @docs_and_labels, {}, {}, nil)
85
+ assert_kind_of Model, m
86
+ assert_equal 44, m.total_words
87
+ assert_equal 5, m.totdoc
88
+
89
+ @docs_and_labels.each_with_index do |item, i|
90
+ assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
91
+ end
92
+ end
93
+
94
+ should "learn classification with alpha values" do
95
+ m = Model.new(:classification, @docs_and_labels, {}, {}, [1, 0.0] * 50)
96
+ assert_kind_of Model, m
97
+
98
+ @docs_and_labels.each_with_index do |item, i|
99
+ assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
100
+ end
101
+ end
102
+
103
+ should "raise argument error when one of the alphas is not numeric " do
104
+ assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, [1, {}] )}
105
+ end
106
+
107
+ should "learn classification and accept learn parameters" do
108
+ learn_params = {
41
109
  "predfile" => "custom_file",
42
110
  "alphafile" => "alpha",
43
111
  "biased_hyperplane" => false,
44
112
  "sharedslack" => false,
45
113
  "remove_inconsistent" => true
46
- }
114
+ }
47
115
 
48
- m = Model.new(:classification, @docs_and_labels, learn_params, {}, nil)
49
- assert_kind_of Model, m
116
+ m = Model.new(:classification, @docs_and_labels, learn_params, {}, nil)
117
+ assert_kind_of Model, m
50
118
 
51
- @docs_and_labels.each_with_index do |item, i|
52
- assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
119
+ @docs_and_labels.each_with_index do |item, i|
120
+ assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
121
+ end
53
122
  end
54
- end
55
123
 
56
- def test_learn_classification_with_invalid_learn_params
57
- learn_params = {"svm_c" => -1}
58
- assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
59
- learn_params = {"svm_iter_to_shrink" => -1}
60
- assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
61
- end
124
+ should "raise argument error when learn parameters are invalid" do
125
+ learn_params = {"svm_c" => -1}
126
+ assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
127
+ learn_params = {"svm_iter_to_shrink" => -1}
128
+ assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
129
+ end
130
+
131
+ should "learn calssification while accepting kernel paramters" do
62
132
 
63
- def test_learn_classification_with_kernel_params
64
-
65
- kernel_params = {
133
+ kernel_params = {
66
134
  "poly_degree" => 3,
67
135
  "rbf_gamma" => 0.5,
68
136
  "coef_lin" => 0.4,
69
137
  "coef_const" => 0.56
70
- }
138
+ }
71
139
 
72
- m = Model.new(:classification, @docs_and_labels, {}, kernel_params, nil)
73
- assert_kind_of Model, m
140
+ m = Model.new(:classification, @docs_and_labels, {}, kernel_params, nil)
141
+ assert_kind_of Model, m
74
142
 
75
- @docs_and_labels.each_with_index do |item, i|
76
- assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
77
- end
78
- end
79
-
80
- def test_learn_classification_with_learn_params_when_predfile_is_not_string
81
-
82
- learn_params = { "predfile" => {}}
83
-
84
- assert_raise(ArgumentError) do
85
- Model.new(:classification, @docs_and_labels, learn_params, {}, [1, 0.0, 1])
143
+ @docs_and_labels.each_with_index do |item, i|
144
+ assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
145
+ end
86
146
  end
87
147
 
88
- end
148
+ should "raise argument error when predfile is not string" do
89
149
 
90
- def test_learn_classification_fails_when_element_is_not_array
91
- @docs_and_labels << []
92
- assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, nil)}
93
- end
150
+ learn_params = { "predfile" => {}}
94
151
 
95
- def test_learn_classification_fails_when_element_is_arry_with_the_wrong_types
96
- assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, [1, {}] )}
97
- end
98
-
99
- def test_read
100
- assert m = Model.read_from_file('test/assets/model')
101
- assert_equal 3877, m.support_vectors_count
102
- assert_equal 39118, m.total_words
103
- end
152
+ assert_raise(ArgumentError) do
153
+ Model.new(:classification, @docs_and_labels, learn_params, {}, [1, 0.0, 1])
154
+ end
155
+ end
104
156
 
105
- def test_classify
106
- m = Model.read_from_file('test/assets/model')
107
- assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1 ,v.to_f]} ) )
108
- assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
109
- assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0, 0, 0, 0.8, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
110
- assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0.5, 0, 0, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
111
157
  end
112
-
113
158
  end
114
159
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: svmredlight
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.0
5
+ version: 0.1.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Camilo Lopez
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-09-11 00:00:00 -04:00
13
+ date: 2011-09-22 00:00:00 -04:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -113,7 +113,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
113
113
  requirements:
114
114
  - - ">="
115
115
  - !ruby/object:Gem::Version
116
- hash: 2455726499843414946
116
+ hash: 2966611142819785204
117
117
  segments:
118
118
  - 0
119
119
  version: "0"