svmredlight 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document CHANGED
@@ -1,5 +1,6 @@
1
1
  lib/**/*.rb
2
2
  bin/*
3
+ ext/*.c
3
4
  -
4
5
  features/**/*.feature
5
6
  LICENSE.txt
data/README.rdoc CHANGED
@@ -7,9 +7,15 @@ A partial interface to SVM-light [http://svmlight.joachims.org/] using it you ca
7
7
 
8
8
  As of now it's know to work with SVM 6.02.
9
9
 
10
+ === Installing svmlight as a library
11
+
10
12
  Make sure to build the libsvmlight.o version of svmlight by using
11
13
  "make libsvmlight_hideo".
12
14
 
15
+ Make sure the .h files in the svmlight distribution are in your include path, inside a subdirectory called svm_light,
16
+ and the object code for the library is in your include path (/usr/lib for instance).
17
+
18
+
13
19
 
14
20
  == Document
15
21
 
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ require 'rake'
14
14
  require 'jeweler'
15
15
  Jeweler::Tasks.new do |gem|
16
16
  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
- gem.version = '0.1.0'
17
+ gem.version = '0.1.1'
18
18
  gem.name = "svmredlight"
19
19
  gem.homepage = "http://github.com/camilo/svmredlight"
20
20
  gem.license = "MIT"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
data/ext/svmredlight.c CHANGED
@@ -9,13 +9,10 @@ is_linear(MODEL *model){
9
9
  return model->kernel_parm.kernel_type == 0;
10
10
  }
11
11
 
12
- // Modules and Classes
13
12
  static VALUE rb_mSvmLight;
14
13
  static VALUE rb_cModel;
15
14
  static VALUE rb_cDocument;
16
15
 
17
- // GC functions
18
-
19
16
  /* Not using deep free anymore, let ruby call free on the documents otherwise we might end
20
17
  * up having double free problems, from svm_learn_main: Warning: The model contains
21
18
  * references to the original data 'docs'. If you want to free the original data, and
@@ -52,11 +49,12 @@ model_read_from_file(VALUE klass, VALUE filename){
52
49
  /* Helper function type checks a string meant to be used as a learn_parm, in case of error
53
50
  * returns 1 and sets the correct exception message in error, on success returns 0 and
54
51
  * copies the c string data of new_val to target*/
55
- int check_string_param(VALUE new_val,
56
- const char *default_val,
57
- char *target,
58
- const char *name,
59
- char *error){
52
+ int
53
+ check_string_param(VALUE new_val,
54
+ const char *default_val,
55
+ char *target,
56
+ const char *name,
57
+ char *error){
60
58
 
61
59
  if(TYPE(new_val) == T_STRING){
62
60
  strlcpy(target, StringValuePtr(new_val), 199);
@@ -73,7 +71,8 @@ int check_string_param(VALUE new_val,
73
71
  /* Helper function type checks a long meant to be used as a learn_parm or kernel_parm, in
74
72
  * case of error returns 1 and sets the correct exception message in error, on success
75
73
  * returns 0 and copies the c string data of new_val to target*/
76
- int check_long_param(VALUE new_val,
74
+ int
75
+ check_long_param(VALUE new_val,
77
76
  long default_val,
78
77
  long *target,
79
78
  const char *name,
@@ -93,11 +92,12 @@ int check_long_param(VALUE new_val,
93
92
  /* Helper function type checks a double meant to be used as a learn_parm or kernel_parm, in
94
93
  * case of error returns 1 and sets the correct exception message in error, on success
95
94
  * returns 0 and copies the c string data of new_val to target*/
96
- int check_double_param(VALUE new_val,
97
- double default_val,
98
- double *target,
99
- const char *name,
100
- char *error){
95
+ int
96
+ check_double_param(VALUE new_val,
97
+ double default_val,
98
+ double *target,
99
+ const char *name,
100
+ char *error){
101
101
  if(TYPE(new_val) == T_FLOAT || TYPE(new_val) == T_FIXNUM){
102
102
  *target = NUM2DBL(new_val);
103
103
  }else if(NIL_P(new_val) ){
@@ -113,11 +113,12 @@ int check_double_param(VALUE new_val,
113
113
  /* Helper function type checks an int meant to be used as a boolean learn_parm or
114
114
  * kernel_parm, in case of error returns 1 and sets the correct exception message in
115
115
  * error, on success returns 0 and copies the c string data of new_val to target*/
116
- int check_bool_param(VALUE new_val,
117
- long default_val,
118
- long *target,
119
- const char *name,
120
- char *error){
116
+ int
117
+ check_bool_param(VALUE new_val,
118
+ long default_val,
119
+ long *target,
120
+ const char *name,
121
+ char *error){
121
122
  if(TYPE(new_val) == T_TRUE){
122
123
  *target = 1L;
123
124
  }else if(TYPE(new_val) == T_FALSE){
@@ -134,208 +135,209 @@ int check_bool_param(VALUE new_val,
134
135
 
135
136
  /* Helper function in charge of setting up the learn parameters before they are passed to
136
137
  * the svm_learn_classification copies part of the logic in svm_learn_main.c */
137
- int setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_message){
138
+ int
139
+ setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_message){
138
140
  // Defaults taken from from svm_learn_main
139
141
  VALUE inter_val, temp_ary, svm_type, svm_type_ruby_str;
140
142
  char *svm_type_str;
141
143
 
142
144
  inter_val = rb_hash_aref(r_hash, rb_str_new2("predfile"));
143
145
  if(1 == check_string_param(inter_val,
144
- "trans_predictions",
145
- &c_learn_param->predfile,
146
- "predfile",
147
- error_message)){
146
+ "trans_predictions",
147
+ (char *)&c_learn_param->predfile,
148
+ "predfile",
149
+ error_message)){
148
150
  return 1;
149
151
  }
150
152
 
151
153
  inter_val = rb_hash_aref(r_hash, rb_str_new2("alphafile"));
152
154
  if(1 == check_string_param(inter_val,
153
- "",
154
- &c_learn_param->alphafile,
155
- "alphafile",
156
- error_message)){
155
+ "",
156
+ (char*)&c_learn_param->alphafile,
157
+ "alphafile",
158
+ error_message)){
157
159
  return 1;
158
160
  }
159
161
 
160
162
  inter_val = rb_hash_aref(r_hash, rb_str_new2("biased_hyperplane"));
161
163
  if(1 == check_bool_param(inter_val,
162
- 1L,
163
- &(c_learn_param->biased_hyperplane),
164
- "biased_hyperplane",
165
- error_message)){
164
+ 1L,
165
+ &(c_learn_param->biased_hyperplane),
166
+ "biased_hyperplane",
167
+ error_message)){
166
168
  return 1;
167
169
  }
168
170
 
169
171
  inter_val = rb_hash_aref(r_hash, rb_str_new2("sharedslack"));
170
172
  if(1 == check_bool_param(inter_val,
171
- 0L,
172
- &(c_learn_param->sharedslack),
173
- "sharedslack",
174
- error_message)){
173
+ 0L,
174
+ &(c_learn_param->sharedslack),
175
+ "sharedslack",
176
+ error_message)){
175
177
  return 1;
176
178
  }
177
179
 
178
180
  inter_val = rb_hash_aref(r_hash, rb_str_new2("remove_inconsistent"));
179
181
  if(1 == check_bool_param(inter_val,
180
- 0L,
181
- &(c_learn_param->remove_inconsistent),
182
- "remove_inconsistent",
183
- error_message)){
182
+ 0L,
183
+ &(c_learn_param->remove_inconsistent),
184
+ "remove_inconsistent",
185
+ error_message)){
184
186
  return 1;
185
187
  }
186
188
 
187
189
  inter_val = rb_hash_aref(r_hash, rb_str_new2("skip_final_opt_check"));
188
190
  if(1 == check_bool_param(inter_val,
189
- 0L,
190
- &(c_learn_param->skip_final_opt_check),
191
- "skip_final_opt_check",
192
- error_message)){
191
+ 0L,
192
+ &(c_learn_param->skip_final_opt_check),
193
+ "skip_final_opt_check",
194
+ error_message)){
193
195
  return 1;
194
196
  }
195
197
 
196
198
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_newvarsinqp"));
197
199
  if(1 == check_bool_param(inter_val,
198
- 0L,
199
- &(c_learn_param->svm_newvarsinqp),
200
- "svm_newvarsinqp",
201
- error_message)){
200
+ 0L,
201
+ &(c_learn_param->svm_newvarsinqp),
202
+ "svm_newvarsinqp",
203
+ error_message)){
202
204
  return 1;
203
205
  }
204
206
 
205
207
  inter_val = rb_hash_aref(r_hash, rb_str_new2("compute_loo"));
206
208
  if(1 == check_bool_param(inter_val,
207
- 0L,
208
- &(c_learn_param->compute_loo),
209
- "compute_loo",
210
- error_message)){
209
+ 0L,
210
+ &(c_learn_param->compute_loo),
211
+ "compute_loo",
212
+ error_message)){
211
213
  return 1;
212
214
  }
213
215
 
214
216
 
215
217
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_maxqpsize"));
216
218
  if(1 == check_long_param(inter_val,
217
- 10L,
218
- &(c_learn_param->svm_maxqpsize),
219
- "svm_maxqpsize",
220
- error_message)){
219
+ 10L,
220
+ &(c_learn_param->svm_maxqpsize),
221
+ "svm_maxqpsize",
222
+ error_message)){
221
223
  return 1;
222
224
  }
223
225
 
224
226
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_iter_to_shrink"));
225
227
  if(1 == check_long_param(inter_val,
226
- -9999,
227
- &(c_learn_param->svm_iter_to_shrink),
228
- "svm_iter_to_shrink",
229
- error_message)){
228
+ -9999,
229
+ &(c_learn_param->svm_iter_to_shrink),
230
+ "svm_iter_to_shrink",
231
+ error_message)){
230
232
  return 1;
231
233
  }
232
234
 
233
235
  inter_val = rb_hash_aref(r_hash, rb_str_new2("maxiter"));
234
236
  if(1 == check_long_param(inter_val,
235
- 100000,
236
- &(c_learn_param->maxiter),
237
- "maxiter",
238
- error_message)){
237
+ 100000,
238
+ &(c_learn_param->maxiter),
239
+ "maxiter",
240
+ error_message)){
239
241
  return 1;
240
242
  }
241
243
 
242
244
  inter_val = rb_hash_aref(r_hash, rb_str_new2("kernel_cache_size"));
243
245
  if(1 == check_long_param(inter_val,
244
- 40L,
245
- &(c_learn_param->kernel_cache_size),
246
- "kernel_cache_size",
247
- error_message)){
246
+ 40L,
247
+ &(c_learn_param->kernel_cache_size),
248
+ "kernel_cache_size",
249
+ error_message)){
248
250
  return 1;
249
251
  }
250
252
 
251
253
  inter_val = rb_hash_aref(r_hash, rb_str_new2("xa_depth"));
252
254
  if(1 == check_long_param(inter_val,
253
- 0L,
254
- &(c_learn_param->xa_depth),
255
- "xa_depth",
256
- error_message)){
255
+ 0L,
256
+ &(c_learn_param->xa_depth),
257
+ "xa_depth",
258
+ error_message)){
257
259
  return 1;
258
260
  }
259
261
 
260
262
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_c"));
261
263
  if(1 == check_double_param(inter_val,
262
- 0.0,
263
- &(c_learn_param->svm_c),
264
- "svm_c",
265
- error_message)){
264
+ 0.0,
265
+ &(c_learn_param->svm_c),
266
+ "svm_c",
267
+ error_message)){
266
268
  return 1;
267
269
  }
268
270
 
269
271
  inter_val = rb_hash_aref(r_hash, rb_str_new2("eps"));
270
272
  if(1 == check_double_param(inter_val,
271
- 0.1,
272
- &(c_learn_param->eps),
273
- "eps",
274
- error_message)){
273
+ 0.1,
274
+ &(c_learn_param->eps),
275
+ "eps",
276
+ error_message)){
275
277
  return 1;
276
278
  }
277
279
 
278
280
  inter_val = rb_hash_aref(r_hash, rb_str_new2("transduction_posratio"));
279
281
  if(1 == check_double_param(inter_val,
280
- -1.0,
281
- &(c_learn_param->transduction_posratio),
282
- "transduction_posratio",
283
- error_message)){
282
+ -1.0,
283
+ &(c_learn_param->transduction_posratio),
284
+ "transduction_posratio",
285
+ error_message)){
284
286
  return 1;
285
287
  }
286
288
 
287
289
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_costratio"));
288
290
  if(1 == check_double_param(inter_val,
289
- 1.0,
290
- &(c_learn_param->svm_costratio),
291
- "svm_costratio",
292
- error_message)){
291
+ 1.0,
292
+ &(c_learn_param->svm_costratio),
293
+ "svm_costratio",
294
+ error_message)){
293
295
  return 1;
294
296
  }
295
297
 
296
298
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_costratio_unlab"));
297
299
  if(1 == check_double_param(inter_val,
298
- 1.0,
299
- &(c_learn_param->svm_costratio_unlab),
300
- "svm_costratio_unlab",
301
- error_message)){
300
+ 1.0,
301
+ &(c_learn_param->svm_costratio_unlab),
302
+ "svm_costratio_unlab",
303
+ error_message)){
302
304
  return 1;
303
305
  }
304
306
 
305
307
  inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_unlabbound"));
306
308
  if(1 == check_double_param(inter_val,
307
- 1.0000000000000001e-05,
308
- &(c_learn_param->svm_unlabbound),
309
- "svm_unlabbound",
310
- error_message)){
309
+ 1.0000000000000001e-05,
310
+ &(c_learn_param->svm_unlabbound),
311
+ "svm_unlabbound",
312
+ error_message)){
311
313
  return 1;
312
314
  }
313
315
 
314
316
  inter_val = rb_hash_aref(r_hash, rb_str_new2("epsilon_crit"));
315
317
  if(1 == check_double_param(inter_val,
316
- 0.001,
317
- &(c_learn_param->epsilon_crit),
318
- "epsilon_crit",
319
- error_message)){
318
+ 0.001,
319
+ &(c_learn_param->epsilon_crit),
320
+ "epsilon_crit",
321
+ error_message)){
320
322
  return 1;
321
323
  }
322
324
 
323
325
  inter_val = rb_hash_aref(r_hash, rb_str_new2("epsilon_a"));
324
326
  if(1 == check_double_param(inter_val,
325
- 1E-15,
326
- &(c_learn_param->epsilon_a),
327
- "epsilon_a",
328
- error_message)){
327
+ 1E-15,
328
+ &(c_learn_param->epsilon_a),
329
+ "epsilon_a",
330
+ error_message)){
329
331
  return 1;
330
332
  }
331
333
 
332
334
  c_learn_param->rho=1.0;
333
335
  inter_val = rb_hash_aref(r_hash, rb_str_new2("rho"));
334
336
  if(1 == check_double_param(inter_val,
335
- 1.0,
336
- &(c_learn_param->rho),
337
- "rho",
338
- error_message)){
337
+ 1.0,
338
+ &(c_learn_param->rho),
339
+ "rho",
340
+ error_message)){
339
341
  return 1;
340
342
  }
341
343
 
@@ -343,41 +345,42 @@ int setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_mess
343
345
  return 0;
344
346
  }
345
347
 
346
- int setup_kernel_params(KERNEL_PARM *c_kernel_param, VALUE r_hash, char *error_message){
348
+ int
349
+ setup_kernel_params(KERNEL_PARM *c_kernel_param, VALUE r_hash, char *error_message){
347
350
  VALUE inter_val;
348
351
  inter_val = rb_hash_aref(r_hash, rb_str_new2("poly_degree"));
349
352
  if(1 == check_long_param(inter_val,
350
- 3L,
351
- &(c_kernel_param->poly_degree),
352
- "poly_degree",
353
- error_message)){
353
+ 3L,
354
+ &(c_kernel_param->poly_degree),
355
+ "poly_degree",
356
+ error_message)){
354
357
  return 1;
355
358
  }
356
359
 
357
360
  inter_val = rb_hash_aref(r_hash, rb_str_new2("rbf_gamma"));
358
361
  if(1 == check_double_param(inter_val,
359
- 1.0,
360
- &(c_kernel_param->rbf_gamma),
361
- "rbf_gamma",
362
- error_message)){
362
+ 1.0,
363
+ &(c_kernel_param->rbf_gamma),
364
+ "rbf_gamma",
365
+ error_message)){
363
366
  return 1;
364
367
  }
365
368
 
366
369
  inter_val = rb_hash_aref(r_hash, rb_str_new2("coef_lin"));
367
370
  if(1 == check_double_param(inter_val,
368
- 1.0,
369
- &(c_kernel_param->coef_lin),
370
- "coef_lin",
371
- error_message)){
371
+ 1.0,
372
+ &(c_kernel_param->coef_lin),
373
+ "coef_lin",
374
+ error_message)){
372
375
  return 1;
373
376
  }
374
377
 
375
378
  inter_val = rb_hash_aref(r_hash, rb_str_new2("coef_const"));
376
379
  if(1 == check_double_param(inter_val,
377
- 1.0,
378
- &(c_kernel_param->coef_const),
379
- "coef_const",
380
- error_message)){
380
+ 1.0,
381
+ &(c_kernel_param->coef_const),
382
+ "coef_const",
383
+ error_message)){
381
384
  return 1;
382
385
  }
383
386
 
@@ -634,6 +637,18 @@ model_support_vectors_count(VALUE self){
634
637
  return INT2FIX(m->sv_num);
635
638
  }
636
639
 
640
+ static VALUE
641
+ model_write_to_file(VALUE self, VALUE pahtofile){
642
+ Check_Type(pahtofile, T_STRING);
643
+
644
+ MODEL *m;
645
+ Data_Get_Struct(self, MODEL, m);
646
+
647
+ write_model(StringValuePtr(pahtofile), m);
648
+
649
+ return Qnil;
650
+ }
651
+
637
652
  static VALUE
638
653
  model_total_words(VALUE self){
639
654
  MODEL *m;
@@ -745,8 +760,9 @@ Init_svmredlight(){
745
760
  rb_mSvmLight = rb_define_module("SVMLight");
746
761
  //Model
747
762
  rb_cModel = rb_define_class_under(rb_mSvmLight, "Model", rb_cObject);
748
- rb_define_singleton_method(rb_cModel, "read_from_file", model_read_from_file, 1);
763
+ rb_define_singleton_method(rb_cModel, "from_file", model_read_from_file, 1);
749
764
  rb_define_singleton_method(rb_cModel, "learn_classification", model_learn_classification, 5);
765
+ rb_define_method(rb_cModel, "to_file", model_write_to_file, 1);
750
766
  rb_define_method(rb_cModel, "support_vectors_count", model_support_vectors_count, 0);
751
767
  rb_define_method(rb_cModel, "total_words", model_total_words, 0);
752
768
  rb_define_method(rb_cModel, "classify", model_classify_example, 1);
data/lib/svmredlight.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  require File.dirname(__FILE__) + '/../ext/svmredlight'
2
- require 'svmredlight/model'
3
- require 'svmredlight/document'
2
+ require File.dirname(__FILE__) + '/svmredlight/model'
3
+ require File.dirname(__FILE__) + '/svmredlight/document'
4
4
 
@@ -1,9 +1,7 @@
1
1
  module SVMLight
2
- # A document is the Ruby representation of a DOC structure in SVMlight, it contains a
3
- # queryid, a slackid, a costfactor ( c ) and a vector with feature numbers and their
4
- # correspondent weights.
2
+ # A document is the Ruby representation of a DOC structure in SVMlight, it contains a queryid, a slackid, a costfactor
3
+ # ( c ) and a vector with feature numbers and their correspondent weights.
5
4
  class Document
6
-
7
5
  # @param [Hash] vector a hash where the keys are feature numbers and the values its weights
8
6
  # @param [Hash] opts the options coincide with SVMLight parameters to the create_example function, the default values for all the options are 0
9
7
  # @option [:docnum] Numeric docum
@@ -1,4 +1,6 @@
1
1
  module SVMLight
2
+
3
+ class MissingModelFile < StandardError; end
2
4
  # A model is the product of training a SVM, once created it can take documents as inputs
3
5
  # and act of them (by for instance classifying them). Models can also be read from files
4
6
  # created by svm_learn.
@@ -16,7 +18,48 @@ module SVMLight
16
18
 
17
19
  learn_classification(documents_and_lables, learn_params, kernel_params, false, alphas)
18
20
  end
19
-
21
+
20
22
  private_class_method :learn_classification
23
+ private_class_method :from_file
24
+
25
+ # in self.read_from_file and #write_to_file
26
+ #
27
+ # This is an anti-pattern. Checking for existence of resources is normally something to be avoided. Trying to open
28
+ # the resource and then rescuing the exception/reading the error code is a much better practice, however SVMLight
29
+ # will call exit(1) if the file does not exists, and, that in turn will kill the ruby VM, so in this case to
30
+ # minimize that possibility I'm optimistically check for the file existence and hope it is still there when it is
31
+ # actually time to open it.
32
+ #
33
+ # TODO: Come up with a proper replacement for those methods, probably simply reimplementing them in svmredlight.c
34
+ # and raising an exception when files cannot be open.
35
+
36
+ # Will load an existent model from a file
37
+ # @param [String] pahtofile path to the model file
38
+ def self.read_from_file(pahtofile)
39
+ if File.exists?(pahtofile) && File.file?(pahtofile)
40
+ from_file(pahtofile)
41
+
42
+ else
43
+
44
+ raise MissingModelFile, "the #{pahtofile} does not exists or is not a file"
45
+ end
46
+ end
47
+
48
+ private :to_file
49
+
50
+ # Will create a file containing the model info, the model info can be turn back into a model by using
51
+ # Model.read_from_file
52
+ # @param [String] pahtofile
53
+ def write_to_file(pahtofile)
54
+ dir = File.dirname(pahtofile)
55
+
56
+ if File.directory?(dir) && File.writable?(dir)
57
+ to_file(pahtofile)
58
+
59
+ else
60
+ raise ModelWriteError, "impossible to write #{pahtofile}"
61
+
62
+ end
63
+ end
21
64
  end
22
65
  end
data/svmredlight.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{svmredlight}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Camilo Lopez"]
12
- s.date = %q{2011-09-11}
12
+ s.date = %q{2011-09-22}
13
13
  s.description = %q{Ruby interface to SVMLight}
14
14
  s.email = %q{camilo@camilolopez.com}
15
15
  s.extensions = ["ext/extconf.rb"]
data/test/helper.rb CHANGED
@@ -8,6 +8,7 @@ rescue Bundler::BundlerError => e
8
8
  $stderr.puts "Run `bundle install` to install missing gems"
9
9
  exit e.status_code
10
10
  end
11
+
11
12
  require 'test/unit'
12
13
  require 'shoulda'
13
14
  require './lib/svmredlight'
@@ -3,53 +3,59 @@ include SVMLight
3
3
 
4
4
  class TestDocument < Test::Unit::TestCase
5
5
 
6
- def test_create
7
- d = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
8
- assert_kind_of Document, d
9
- end
10
-
11
- def test_create_should_accept_integer_as_feature_weight
12
- d = Document.create(0, 0.5, 1, 0, [[1, 0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
13
- assert_kind_of Document, d
14
- end
6
+ context "creating a new document" do
7
+
8
+ should "succed when using #create" do
9
+ d = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
10
+ assert_kind_of Document, d
11
+ end
12
+
13
+ should "accept integers as feature weights" do
14
+ d = Document.create(0, 0.5, 1, 0, [[1, 0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
15
+ assert_kind_of Document, d
16
+ end
17
+
18
+ should "create documents useing new as well" do
19
+ d = Document.new({1 => 566.0, 4 => 133.0}, {docnum: 10, slackid: 1, queryid: 2, costfactor: 0.5})
20
+
21
+ assert_equal 10, d.docnum
22
+ assert_equal 1, d.slackid
23
+ assert_equal 2, d.queryid
24
+ assert_equal 0.5, d.costfactor
25
+ end
26
+
27
+ should "raise argument error if any of the word numbers is less or equal to 0" do
28
+ assert_raise(ArgumentError){ Document.create(0, 0.5, 1, 0, [[0, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]]) }
29
+ assert_raise(ArgumentError){ Document.create(1, 0.5, 1, 0, [[-1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])}
30
+ end
31
+
32
+ should "raise type error when the fourth argument is not an array" do
33
+ assert_raise(TypeError) { Document.create(-1, 0, 1, 0, {}) }
34
+ end
35
+
36
+ should "raise type error when the fourth argument is empty" do
37
+ assert_raise(ArgumentError) { Document.create(-1, 0, 1, 0 [])}
38
+ end
15
39
 
16
- def test_create_using_new
17
- d = Document.new({1 => 566.0, 4 => 133.0}, {docnum: 10, slackid: 1, queryid: 2, costfactor: 0.5})
18
-
19
- assert_equal 10, d.docnum
20
- assert_equal 1, d.slackid
21
- assert_equal 2, d.queryid
22
- assert_equal 0.5, d.costfactor
23
40
  end
41
+
42
+ context 'a document' do
43
+ should "have accessible docnum, queryid, slackid, and, costfacor" do
44
+ d1 = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]])
45
+ d2 = Document.create(1, 0.6, 2, 1, [[1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])
24
46
 
25
- def test_should_be_able_to_access_properties
26
- d1 = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]])
27
- d2 = Document.create(1, 0.6, 2, 1, [[1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])
28
-
29
- assert_equal 0, d1.docnum
30
- assert_equal 1, d2.docnum
31
-
32
- assert_equal 1, d1.slackid
33
- assert_equal 2, d2.slackid
47
+ assert_equal 0, d1.docnum
48
+ assert_equal 1, d2.docnum
34
49
 
35
- assert_equal 0, d1.queryid
36
- assert_equal 1, d2.queryid
50
+ assert_equal 1, d1.slackid
51
+ assert_equal 2, d2.slackid
37
52
 
38
- assert_equal 0.5, d1.costfactor
39
- assert_equal 0.6, d2.costfactor
40
- end
41
-
42
- def test_all_word_numbers_should_be_greater_than_zero
43
- assert_raise(ArgumentError){ Document.create(0, 0.5, 1, 0, [[0, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]]) }
44
- assert_raise(ArgumentError){ Document.create(1, 0.5, 1, 0, [[-1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])}
45
- end
46
-
47
- def test_create_with_no_array
48
- assert_raise(TypeError) { Document.create(-1, 0, 1, 0, {}) }
49
- end
53
+ assert_equal 0, d1.queryid
54
+ assert_equal 1, d2.queryid
50
55
 
51
- def test_create_with_empty_array
52
- assert_raise(ArgumentError) { Document.create(-1, 0, 1, 0 [])}
56
+ assert_equal 0.5, d1.costfactor
57
+ assert_equal 0.6, d2.costfactor
58
+ end
53
59
  end
54
60
  end
55
61
 
data/test/test_model.rb CHANGED
@@ -3,112 +3,157 @@ include SVMLight
3
3
 
4
4
  class TestModel < Test::Unit::TestCase
5
5
 
6
- def setup
7
- @features ||= [
8
- [ [1,0.6], [11, 0.0], [34, 0.1] ],
9
- [ [5,0.4], [15, 0.0], [30, 0.1] ],
10
- [ [1,0.1], [13, 0.0], [31, 0.1] ],
11
- [ [7,0.7], [15, 0.0], [35, 0.1] ],
12
- [ [5,0.6], [19, 0.0], [44, 0.1] ],
13
- ]
14
- @docs_and_labels ||= @features.each_with_index.map{|f,i| [ Document.create(i + 1, 1, 0, 0, f), i%2 * -1]}
15
- end
6
+ context "reading a model from file" do
7
+
8
+ setup do
9
+ @file_name = 'test/assets/model'
10
+ end
11
+
12
+ should "read properly from a well formed file" do
13
+ assert m = Model.read_from_file(@file_name)
14
+ assert_equal 3877, m.support_vectors_count
15
+ assert_equal 39118, m.total_words
16
+ end
16
17
 
17
- def test_learn_classification_with_alpha
18
- m = Model.new(:classification, @docs_and_labels, {}, {}, [1, 0.0] * 50)
19
- assert_kind_of Model, m
18
+ should "classify successfully after reading the model from a file" do
19
+ m = Model.read_from_file(@file_name)
20
20
 
21
- @docs_and_labels.each_with_index do |item, i|
22
- assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
21
+ assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1 ,v.to_f]} ) )
22
+ assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
23
+ assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0, 0, 0, 0.8, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
24
+ assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0.5, 0, 0, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
25
+ end
26
+
27
+ should "raise file not found exception when file does not exists" do
28
+ assert_raises(MissingModelFile){ Model.read_from_file(@file_name + 'bleh') }
23
29
  end
24
30
  end
25
31
 
26
- def test_learn_classification
27
- m = Model.new(:classification, @docs_and_labels, {}, {}, nil)
28
- assert_kind_of Model, m
29
- assert_equal 44, m.total_words
30
- assert_equal 5, m.totdoc
32
+ context "writting a model to a file" do
33
+ setup do
34
+ @features ||= [
35
+ [ [1,0.6], [11, 0.0], [34, 0.1] ],
36
+ [ [5,0.4], [15, 0.0], [30, 0.1] ],
37
+ [ [1,0.1], [13, 0.0], [31, 0.1] ],
38
+ [ [7,0.7], [15, 0.0], [35, 0.1] ],
39
+ [ [5,0.6], [19, 0.0], [44, 0.1] ],
40
+ ]
41
+
42
+ @docs_and_labels ||= @features.each_with_index.map do |feature, index|
43
+ [ Document.create(index + 1, 1, 0, 0, feature), index%2 * -1]
44
+ end
45
+
46
+ @filepath = './test/assets/written_model'
47
+ @model = Model.new(:classification, @docs_and_labels, {}, {}, nil)
48
+ end
49
+
50
+ should "write a model from memmory to a file" do
51
+ @model.write_to_file(@filepath)
31
52
 
32
- @docs_and_labels.each_with_index do |item, i|
33
- assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
53
+ assert File.exists?(@filepath)
54
+ assert File.file?(@filepath)
55
+ # TODO: Implement actual model equality
56
+ assert_equal @model.support_vectors_count, Model.read_from_file(@filepath).support_vectors_count
34
57
  end
35
58
 
59
+ # Need to find a good way to test this without relaying too much in the environment
60
+ should "raise ModelWriteError when it is impossible to write a model file"
61
+
62
+ teardown do
63
+ `rm #{@filepath} &> /dev/null`
64
+ end
36
65
  end
37
66
 
38
- def test_learn_classification_with_learn_params
39
-
40
- learn_params = {
67
+ context "when learning from new documents" do
68
+
69
+ setup do
70
+ @features ||= [
71
+ [ [1,0.6], [11, 0.0], [34, 0.1] ],
72
+ [ [5,0.4], [15, 0.0], [30, 0.1] ],
73
+ [ [1,0.1], [13, 0.0], [31, 0.1] ],
74
+ [ [7,0.7], [15, 0.0], [35, 0.1] ],
75
+ [ [5,0.6], [19, 0.0], [44, 0.1] ],
76
+ ]
77
+
78
+ @docs_and_labels ||= @features.each_with_index.map do |feature, index|
79
+ [ Document.create(index + 1, 1, 0, 0, feature), index%2 * -1]
80
+ end
81
+ end
82
+
83
+ should "learn classification with default arguments" do
84
+ m = Model.new(:classification, @docs_and_labels, {}, {}, nil)
85
+ assert_kind_of Model, m
86
+ assert_equal 44, m.total_words
87
+ assert_equal 5, m.totdoc
88
+
89
+ @docs_and_labels.each_with_index do |item, i|
90
+ assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
91
+ end
92
+ end
93
+
94
+ should "learn classification with alpha values" do
95
+ m = Model.new(:classification, @docs_and_labels, {}, {}, [1, 0.0] * 50)
96
+ assert_kind_of Model, m
97
+
98
+ @docs_and_labels.each_with_index do |item, i|
99
+ assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
100
+ end
101
+ end
102
+
103
+ should "raise argument error when one of the alphas is not numeric " do
104
+ assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, [1, {}] )}
105
+ end
106
+
107
+ should "learn classification and accept learn parameters" do
108
+ learn_params = {
41
109
  "predfile" => "custom_file",
42
110
  "alphafile" => "alpha",
43
111
  "biased_hyperplane" => false,
44
112
  "sharedslack" => false,
45
113
  "remove_inconsistent" => true
46
- }
114
+ }
47
115
 
48
- m = Model.new(:classification, @docs_and_labels, learn_params, {}, nil)
49
- assert_kind_of Model, m
116
+ m = Model.new(:classification, @docs_and_labels, learn_params, {}, nil)
117
+ assert_kind_of Model, m
50
118
 
51
- @docs_and_labels.each_with_index do |item, i|
52
- assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
119
+ @docs_and_labels.each_with_index do |item, i|
120
+ assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
121
+ end
53
122
  end
54
- end
55
123
 
56
- def test_learn_classification_with_invalid_learn_params
57
- learn_params = {"svm_c" => -1}
58
- assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
59
- learn_params = {"svm_iter_to_shrink" => -1}
60
- assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
61
- end
124
+ should "raise argument error when learn parameters are invalid" do
125
+ learn_params = {"svm_c" => -1}
126
+ assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
127
+ learn_params = {"svm_iter_to_shrink" => -1}
128
+ assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
129
+ end
130
+
131
+ should "learn calssification while accepting kernel paramters" do
62
132
 
63
- def test_learn_classification_with_kernel_params
64
-
65
- kernel_params = {
133
+ kernel_params = {
66
134
  "poly_degree" => 3,
67
135
  "rbf_gamma" => 0.5,
68
136
  "coef_lin" => 0.4,
69
137
  "coef_const" => 0.56
70
- }
138
+ }
71
139
 
72
- m = Model.new(:classification, @docs_and_labels, {}, kernel_params, nil)
73
- assert_kind_of Model, m
140
+ m = Model.new(:classification, @docs_and_labels, {}, kernel_params, nil)
141
+ assert_kind_of Model, m
74
142
 
75
- @docs_and_labels.each_with_index do |item, i|
76
- assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
77
- end
78
- end
79
-
80
- def test_learn_classification_with_learn_params_when_predfile_is_not_string
81
-
82
- learn_params = { "predfile" => {}}
83
-
84
- assert_raise(ArgumentError) do
85
- Model.new(:classification, @docs_and_labels, learn_params, {}, [1, 0.0, 1])
143
+ @docs_and_labels.each_with_index do |item, i|
144
+ assert_kind_of Numeric, m.classify(item.first), "failed in item # #{i}"
145
+ end
86
146
  end
87
147
 
88
- end
148
+ should "raise argument error when predfile is not string" do
89
149
 
90
- def test_learn_classification_fails_when_element_is_not_array
91
- @docs_and_labels << []
92
- assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, nil)}
93
- end
150
+ learn_params = { "predfile" => {}}
94
151
 
95
- def test_learn_classification_fails_when_element_is_arry_with_the_wrong_types
96
- assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, [1, {}] )}
97
- end
98
-
99
- def test_read
100
- assert m = Model.read_from_file('test/assets/model')
101
- assert_equal 3877, m.support_vectors_count
102
- assert_equal 39118, m.total_words
103
- end
152
+ assert_raise(ArgumentError) do
153
+ Model.new(:classification, @docs_and_labels, learn_params, {}, [1, 0.0, 1])
154
+ end
155
+ end
104
156
 
105
- def test_classify
106
- m = Model.read_from_file('test/assets/model')
107
- assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1 ,v.to_f]} ) )
108
- assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
109
- assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0, 0, 0, 0.8, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
110
- assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0.5, 0, 0, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
111
157
  end
112
-
113
158
  end
114
159
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: svmredlight
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.0
5
+ version: 0.1.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Camilo Lopez
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-09-11 00:00:00 -04:00
13
+ date: 2011-09-22 00:00:00 -04:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -113,7 +113,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
113
113
  requirements:
114
114
  - - ">="
115
115
  - !ruby/object:Gem::Version
116
- hash: 2455726499843414946
116
+ hash: 2966611142819785204
117
117
  segments:
118
118
  - 0
119
119
  version: "0"