RubyGems - svmredlight - Versions diffs - 0.1.0 → 0.1.1 - Mend

svmredlight 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/.document CHANGED Viewed

@@ -1,5 +1,6 @@
 lib/**/*.rb
 bin/*
+ext/*.c
 -
 features/**/*.feature
 LICENSE.txt

data/README.rdoc CHANGED Viewed

@@ -7,9 +7,15 @@ A partial interface to SVM-light [http://svmlight.joachims.org/] using it you ca
 As of now it's know to work with SVM 6.02.
+=== Installing svmlight as a library
 Make sure to build the libsvmlight.o version of svmlight by using
 "make libsvmlight_hideo".
+Make sure the .h files in the svmlight distribution are in your include path, inside a subdirectory called svm_light,
+and the object code for the library is in your include path (/usr/lib for instance).
 == Document

data/Rakefile CHANGED Viewed

@@ -14,7 +14,7 @@ require 'rake'
 require 'jeweler'
 Jeweler::Tasks.new do |gem|
   # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
-  gem.version = '0.1.0'
+  gem.version = '0.1.1'
   gem.name = "svmredlight"
   gem.homepage = "http://github.com/camilo/svmredlight"
   gem.license = "MIT"

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.1.0
1	+ 0.1.1

data/ext/svmredlight.c CHANGED Viewed

@@ -9,13 +9,10 @@ is_linear(MODEL *model){
   return model->kernel_parm.kernel_type == 0;
 }
-// Modules and Classes
 static VALUE rb_mSvmLight;
 static VALUE rb_cModel;
 static VALUE rb_cDocument;
-// GC functions
 /* Not using deep free anymore, let ruby call free on the documents otherwise we might end
  * up having double free problems, from svm_learn_main: Warning: The model contains
  * references to the original data 'docs'.  If you want to free the original data, and
@@ -52,11 +49,12 @@ model_read_from_file(VALUE klass, VALUE filename){
 /* Helper function type checks a string meant to be used as a learn_parm, in case of error
  * returns 1 and sets the correct exception message in error, on success returns 0 and
  * copies the c string data of new_val to target*/
-int check_string_param(VALUE new_val,
-                             const char *default_val,
-                             char *target,
-                             const char *name,
-                             char *error){
+int
+check_string_param(VALUE new_val,
+                   const char *default_val,
+                   char *target,
+                   const char *name,
+                   char *error){
   if(TYPE(new_val) == T_STRING){
     strlcpy(target, StringValuePtr(new_val), 199);
@@ -73,7 +71,8 @@ int check_string_param(VALUE new_val,
 /* Helper function type checks a long meant to be used as a learn_parm or kernel_parm, in
  * case of error returns 1 and sets the correct exception message in error, on success
  * returns 0 and copies the c string data of new_val to target*/
-int check_long_param(VALUE new_val,
+int
+check_long_param(VALUE new_val,
                            long default_val,
                            long *target,
                            const char *name,
@@ -93,11 +92,12 @@ int check_long_param(VALUE new_val,
 /* Helper function type checks a double meant to be used as a learn_parm or kernel_parm, in
  * case of error returns 1 and sets the correct exception message in error, on success
  * returns 0 and copies the c string data of new_val to target*/
-int check_double_param(VALUE new_val,
-                             double default_val,
-                             double *target,
-                             const char *name,
-                             char *error){
+int
+check_double_param(VALUE new_val,
+                         double default_val,
+                         double *target,
+                         const char *name,
+                         char *error){
   if(TYPE(new_val) == T_FLOAT || TYPE(new_val) == T_FIXNUM){
     *target = NUM2DBL(new_val);
   }else if(NIL_P(new_val) ){
@@ -113,11 +113,12 @@ int check_double_param(VALUE new_val,
 /* Helper function type checks an int meant to be used as a boolean learn_parm or
  * kernel_parm, in case of error returns 1 and sets the correct exception message in
  * error, on success returns 0 and copies the c string data of new_val to target*/
-int check_bool_param(VALUE new_val,
-                           long default_val,
-                           long *target,
-                           const char *name,
-                           char *error){
+int
+check_bool_param(VALUE new_val,
+                       long default_val,
+                       long *target,
+                       const char *name,
+                       char *error){
   if(TYPE(new_val) == T_TRUE){
     *target = 1L;
   }else if(TYPE(new_val) == T_FALSE){
@@ -134,208 +135,209 @@ int check_bool_param(VALUE new_val,
 /* Helper function in charge of setting up the learn parameters before they are passed to
  * the svm_learn_classification copies part of the logic in svm_learn_main.c */
-int setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_message){
+int
+setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_message){
   // Defaults taken from from svm_learn_main
   VALUE inter_val, temp_ary, svm_type, svm_type_ruby_str;
   char *svm_type_str;
   inter_val = rb_hash_aref(r_hash, rb_str_new2("predfile"));
   if(1 == check_string_param(inter_val,
-                                   "trans_predictions",
-                                   &c_learn_param->predfile,
-                                   "predfile",
-                                   error_message)){
+                             "trans_predictions",
+                             (char *)&c_learn_param->predfile,
+                             "predfile",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("alphafile"));
   if(1 == check_string_param(inter_val,
-                                   "",
-                                   &c_learn_param->alphafile,
-                                   "alphafile",
-                                   error_message)){
+                            "",
+                            (char*)&c_learn_param->alphafile,
+                            "alphafile",
+                            error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("biased_hyperplane"));
   if(1 == check_bool_param(inter_val,
-                                 1L,
-                                 &(c_learn_param->biased_hyperplane),
-                                 "biased_hyperplane",
-                                 error_message)){
+                           1L,
+                           &(c_learn_param->biased_hyperplane),
+                           "biased_hyperplane",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("sharedslack"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->sharedslack),
-                                 "sharedslack",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->sharedslack),
+                           "sharedslack",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("remove_inconsistent"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->remove_inconsistent),
-                                 "remove_inconsistent",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->remove_inconsistent),
+                           "remove_inconsistent",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("skip_final_opt_check"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->skip_final_opt_check),
-                                 "skip_final_opt_check",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->skip_final_opt_check),
+                           "skip_final_opt_check",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_newvarsinqp"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->svm_newvarsinqp),
-                                 "svm_newvarsinqp",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->svm_newvarsinqp),
+                           "svm_newvarsinqp",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("compute_loo"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->compute_loo),
-                                 "compute_loo",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->compute_loo),
+                           "compute_loo",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_maxqpsize"));
   if(1 == check_long_param(inter_val,
-                                 10L,
-                                 &(c_learn_param->svm_maxqpsize),
-                                 "svm_maxqpsize",
-                                 error_message)){
+                           10L,
+                           &(c_learn_param->svm_maxqpsize),
+                           "svm_maxqpsize",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_iter_to_shrink"));
   if(1 == check_long_param(inter_val,
-                                 -9999,
-                                 &(c_learn_param->svm_iter_to_shrink),
-                                 "svm_iter_to_shrink",
-                                 error_message)){
+                           -9999,
+                           &(c_learn_param->svm_iter_to_shrink),
+                           "svm_iter_to_shrink",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("maxiter"));
   if(1 == check_long_param(inter_val,
-                                 100000,
-                                 &(c_learn_param->maxiter),
-                                 "maxiter",
-                                 error_message)){
+                           100000,
+                           &(c_learn_param->maxiter),
+                           "maxiter",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("kernel_cache_size"));
   if(1 == check_long_param(inter_val,
-                                 40L,
-                                 &(c_learn_param->kernel_cache_size),
-                                 "kernel_cache_size",
-                                 error_message)){
+                           40L,
+                           &(c_learn_param->kernel_cache_size),
+                           "kernel_cache_size",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("xa_depth"));
   if(1 == check_long_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->xa_depth),
-                                 "xa_depth",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->xa_depth),
+                           "xa_depth",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_c"));
   if(1 == check_double_param(inter_val,
-                                 0.0,
-                                 &(c_learn_param->svm_c),
-                                 "svm_c",
-                                 error_message)){
+                             0.0,
+                             &(c_learn_param->svm_c),
+                             "svm_c",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("eps"));
   if(1 == check_double_param(inter_val,
-                                 0.1,
-                                 &(c_learn_param->eps),
-                                 "eps",
-                                 error_message)){
+                             0.1,
+                             &(c_learn_param->eps),
+                             "eps",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("transduction_posratio"));
   if(1 == check_double_param(inter_val,
-                                 -1.0,
-                                 &(c_learn_param->transduction_posratio),
-                                 "transduction_posratio",
-                                 error_message)){
+                             -1.0,
+                             &(c_learn_param->transduction_posratio),
+                             "transduction_posratio",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_costratio"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_learn_param->svm_costratio),
-                                 "svm_costratio",
-                                 error_message)){
+                             1.0,
+                             &(c_learn_param->svm_costratio),
+                             "svm_costratio",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_costratio_unlab"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_learn_param->svm_costratio_unlab),
-                                 "svm_costratio_unlab",
-                                 error_message)){
+                             1.0,
+                             &(c_learn_param->svm_costratio_unlab),
+                             "svm_costratio_unlab",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_unlabbound"));
   if(1 == check_double_param(inter_val,
-                                 1.0000000000000001e-05,
-                                 &(c_learn_param->svm_unlabbound),
-                                 "svm_unlabbound",
-                                 error_message)){
+                             1.0000000000000001e-05,
+                             &(c_learn_param->svm_unlabbound),
+                             "svm_unlabbound",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("epsilon_crit"));
   if(1 == check_double_param(inter_val,
-                                 0.001,
-                                 &(c_learn_param->epsilon_crit),
-                                 "epsilon_crit",
-                                 error_message)){
+                             0.001,
+                             &(c_learn_param->epsilon_crit),
+                             "epsilon_crit",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("epsilon_a"));
   if(1 == check_double_param(inter_val,
-                                 1E-15,
-                                 &(c_learn_param->epsilon_a),
-                                 "epsilon_a",
-                                 error_message)){
+                             1E-15,
+                             &(c_learn_param->epsilon_a),
+                             "epsilon_a",
+                             error_message)){
     return 1;
   }
   c_learn_param->rho=1.0;
   inter_val = rb_hash_aref(r_hash, rb_str_new2("rho"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_learn_param->rho),
-                                 "rho",
-                                 error_message)){
+                             1.0,
+                             &(c_learn_param->rho),
+                             "rho",
+                             error_message)){
     return 1;
   }
@@ -343,41 +345,42 @@ int setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_mess
   return 0;
 }
-int setup_kernel_params(KERNEL_PARM *c_kernel_param, VALUE r_hash, char *error_message){
+int
+setup_kernel_params(KERNEL_PARM *c_kernel_param, VALUE r_hash, char *error_message){
   VALUE inter_val;
   inter_val = rb_hash_aref(r_hash, rb_str_new2("poly_degree"));
   if(1 == check_long_param(inter_val,
-                                 3L,
-                                 &(c_kernel_param->poly_degree),
-                                 "poly_degree",
-                                 error_message)){
+                           3L,
+                           &(c_kernel_param->poly_degree),
+                           "poly_degree",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("rbf_gamma"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_kernel_param->rbf_gamma),
-                                 "rbf_gamma",
-                                 error_message)){
+                             1.0,
+                             &(c_kernel_param->rbf_gamma),
+                             "rbf_gamma",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("coef_lin"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_kernel_param->coef_lin),
-                                 "coef_lin",
-                                 error_message)){
+                             1.0,
+                             &(c_kernel_param->coef_lin),
+                             "coef_lin",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("coef_const"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_kernel_param->coef_const),
-                                 "coef_const",
-                                 error_message)){
+                             1.0,
+                             &(c_kernel_param->coef_const),
+                             "coef_const",
+                             error_message)){
     return 1;
   }
@@ -634,6 +637,18 @@ model_support_vectors_count(VALUE self){
   return INT2FIX(m->sv_num);
 }
+static VALUE
+model_write_to_file(VALUE self, VALUE pahtofile){
+  Check_Type(pahtofile, T_STRING);
+  MODEL *m;
+  Data_Get_Struct(self, MODEL, m);
+  write_model(StringValuePtr(pahtofile), m);
+  return Qnil;
+}
 static VALUE
 model_total_words(VALUE self){
   MODEL *m;
@@ -745,8 +760,9 @@ Init_svmredlight(){
   rb_mSvmLight = rb_define_module("SVMLight");
   //Model
   rb_cModel = rb_define_class_under(rb_mSvmLight, "Model", rb_cObject);
-  rb_define_singleton_method(rb_cModel, "read_from_file", model_read_from_file, 1);
+  rb_define_singleton_method(rb_cModel, "from_file", model_read_from_file, 1);
   rb_define_singleton_method(rb_cModel, "learn_classification", model_learn_classification, 5);
+  rb_define_method(rb_cModel, "to_file", model_write_to_file, 1);
   rb_define_method(rb_cModel, "support_vectors_count", model_support_vectors_count, 0);
   rb_define_method(rb_cModel, "total_words", model_total_words, 0);
   rb_define_method(rb_cModel, "classify", model_classify_example, 1);

data/lib/svmredlight.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 require File.dirname(__FILE__) + '/../ext/svmredlight'
-require  'svmredlight/model'
-require  'svmredlight/document'
+require File.dirname(__FILE__) + '/svmredlight/model'
+require File.dirname(__FILE__) + '/svmredlight/document'

data/lib/svmredlight/document.rb CHANGED Viewed

@@ -1,9 +1,7 @@
 module SVMLight
-  # A document is the Ruby representation of a DOC structure in SVMlight, it contains a
-  # queryid, a slackid, a costfactor ( c ) and a vector with feature numbers and their
-  # correspondent weights.
+  # A document is the Ruby representation of a DOC structure in SVMlight, it contains a queryid, a slackid, a costfactor
+  # ( c ) and a vector with feature numbers and their correspondent weights.
   class Document
     # @param [Hash] vector a hash where the keys are feature numbers and the values its weights
     # @param [Hash] opts the options coincide with SVMLight parameters to the create_example function, the default values for all the options are 0
     # @option [:docnum] Numeric docum

data/lib/svmredlight/model.rb CHANGED Viewed

@@ -1,4 +1,6 @@
 module SVMLight
+  class MissingModelFile < StandardError; end
   # A model is the product of training a SVM, once created it can take documents as inputs
   # and act of them (by for instance classifying them). Models can also be read from files
   # created by svm_learn.
@@ -16,7 +18,48 @@ module SVMLight
       learn_classification(documents_and_lables, learn_params, kernel_params, false, alphas)
     end
     private_class_method :learn_classification
+    private_class_method :from_file
+    # in self.read_from_file and #write_to_file
+    #
+    # This is an anti-pattern. Checking for existence of resources is normally something to be avoided. Trying to open
+    # the resource and then rescuing the exception/reading the error code is a much better practice, however SVMLight
+    # will call exit(1) if the file does not exists, and, that in turn will kill the ruby VM, so in this case to
+    # minimize that possibility I'm optimistically check for the file existence and hope it is still there when it is
+    # actually time to open it.
+    #
+    # TODO: Come up with a proper replacement for those methods, probably simply reimplementing them in svmredlight.c
+    # and raising an exception when files cannot be open.
+    # Will load an existent model from a file
+    # @param [String] pahtofile path to the model file
+    def self.read_from_file(pahtofile)
+      if File.exists?(pahtofile) && File.file?(pahtofile)
+        from_file(pahtofile)
+      else
+        raise MissingModelFile, "the #{pahtofile} does not exists or is not a file"
+      end
+    end
+    private :to_file
+    # Will create a file containing the model info, the model info can be turn back into a model by using
+    # Model.read_from_file
+    # @param [String] pahtofile
+    def write_to_file(pahtofile)
+      dir = File.dirname(pahtofile)
+      if File.directory?(dir) && File.writable?(dir)
+        to_file(pahtofile)
+      else
+        raise ModelWriteError, "impossible to write #{pahtofile}"
+      end
+    end
   end
 end

data/svmredlight.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{svmredlight}
-  s.version = "0.1.0"
+  s.version = "0.1.1"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Camilo Lopez"]
-  s.date = %q{2011-09-11}
+  s.date = %q{2011-09-22}
   s.description = %q{Ruby interface to SVMLight}
   s.email = %q{camilo@camilolopez.com}
   s.extensions = ["ext/extconf.rb"]

data/test/helper.rb CHANGED Viewed

@@ -8,6 +8,7 @@ rescue Bundler::BundlerError => e
   $stderr.puts "Run `bundle install` to install missing gems"
   exit e.status_code
 end
 require 'test/unit'
 require 'shoulda'
 require './lib/svmredlight'

data/test/test_document.rb CHANGED Viewed

@@ -3,53 +3,59 @@ include SVMLight
 class TestDocument < Test::Unit::TestCase
-  def test_create
-    d = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
-    assert_kind_of Document, d
-  end
-  def test_create_should_accept_integer_as_feature_weight
-    d = Document.create(0, 0.5, 1, 0, [[1, 0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
-    assert_kind_of Document, d
-  end
+  context "creating a new document" do
+    should "succed when using #create" do
+      d = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
+      assert_kind_of Document, d
+    end
+    should "accept integers as feature weights" do
+      d = Document.create(0, 0.5, 1, 0, [[1, 0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
+      assert_kind_of Document, d
+    end
+    should "create documents useing new as well" do
+      d = Document.new({1 => 566.0, 4 => 133.0}, {docnum: 10, slackid: 1, queryid: 2, costfactor: 0.5})
+      assert_equal 10, d.docnum
+      assert_equal 1, d.slackid
+      assert_equal 2, d.queryid
+      assert_equal 0.5, d.costfactor
+    end
+    should "raise argument error if any of the word numbers is less or equal to 0" do
+      assert_raise(ArgumentError){ Document.create(0, 0.5, 1, 0, [[0, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]]) }
+      assert_raise(ArgumentError){ Document.create(1, 0.5, 1, 0, [[-1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])}
+    end
+    should "raise type error when the fourth argument is not an array" do
+      assert_raise(TypeError) { Document.create(-1, 0, 1, 0, {})  }
+    end
+    should "raise type error when the fourth argument is empty" do
+      assert_raise(ArgumentError) { Document.create(-1, 0, 1, 0 [])}
+    end
-  def test_create_using_new
-    d = Document.new({1 => 566.0, 4 => 133.0}, {docnum: 10, slackid: 1, queryid: 2, costfactor: 0.5})
-    assert_equal 10, d.docnum
-    assert_equal 1, d.slackid
-    assert_equal 2, d.queryid
-    assert_equal 0.5, d.costfactor
   end
+  context 'a document' do
+    should "have accessible docnum, queryid, slackid, and, costfacor" do
+      d1 = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]])
+      d2 = Document.create(1, 0.6, 2, 1, [[1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])
-  def test_should_be_able_to_access_properties
-    d1 = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]])
-    d2 = Document.create(1, 0.6, 2, 1, [[1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])
-    assert_equal 0, d1.docnum
-    assert_equal 1, d2.docnum
-    assert_equal 1, d1.slackid
-    assert_equal 2, d2.slackid
+      assert_equal 0, d1.docnum
+      assert_equal 1, d2.docnum
-    assert_equal 0, d1.queryid
-    assert_equal 1, d2.queryid
+      assert_equal 1, d1.slackid
+      assert_equal 2, d2.slackid
-    assert_equal 0.5, d1.costfactor
-    assert_equal 0.6, d2.costfactor
-  end
-  def test_all_word_numbers_should_be_greater_than_zero
-    assert_raise(ArgumentError){ Document.create(0, 0.5, 1, 0, [[0, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]]) }
-    assert_raise(ArgumentError){ Document.create(1, 0.5, 1, 0, [[-1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])}
-  end
-  def test_create_with_no_array
-    assert_raise(TypeError) { Document.create(-1, 0, 1, 0, {})  }
-  end
+      assert_equal 0, d1.queryid
+      assert_equal 1, d2.queryid
-  def test_create_with_empty_array
-    assert_raise(ArgumentError) { Document.create(-1, 0, 1, 0 [])}
+      assert_equal 0.5, d1.costfactor
+      assert_equal 0.6, d2.costfactor
+    end
   end
 end

data/test/test_model.rb CHANGED Viewed

@@ -3,112 +3,157 @@ include SVMLight
 class TestModel < Test::Unit::TestCase
-  def setup
-    @features ||= [
-      [ [1,0.6], [11, 0.0], [34, 0.1] ],
-      [ [5,0.4], [15, 0.0], [30, 0.1] ],
-      [ [1,0.1], [13, 0.0], [31, 0.1] ],
-      [ [7,0.7], [15, 0.0], [35, 0.1] ],
-      [ [5,0.6], [19, 0.0], [44, 0.1] ],
-    ]
-    @docs_and_labels ||= @features.each_with_index.map{|f,i| [ Document.create(i + 1, 1, 0, 0,  f), i%2 * -1]}
-  end
+  context "reading a model from file" do
+    setup do
+      @file_name = 'test/assets/model'
+    end
+    should "read properly from a well formed file" do
+      assert m     = Model.read_from_file(@file_name)
+      assert_equal 3877,  m.support_vectors_count
+      assert_equal 39118, m.total_words
+    end
-  def test_learn_classification_with_alpha
-    m = Model.new(:classification, @docs_and_labels, {}, {}, [1, 0.0] * 50)
-    assert_kind_of Model, m
+    should "classify successfully after reading the model from a file" do
+      m = Model.read_from_file(@file_name)
-    @docs_and_labels.each_with_index do |item, i|
-      assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1 ,v.to_f]} ) )
+      assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
+      assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0, 0, 0, 0.8, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
+      assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0.5, 0, 0, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
+    end
+    should "raise file not found exception when file does not exists" do
+      assert_raises(MissingModelFile){ Model.read_from_file(@file_name + 'bleh') }
     end
   end
-  def test_learn_classification
-    m = Model.new(:classification, @docs_and_labels, {}, {}, nil)
-    assert_kind_of Model, m
-    assert_equal 44, m.total_words
-    assert_equal 5, m.totdoc
+  context "writting a model to a file" do
+    setup do
+      @features ||= [
+        [ [1,0.6], [11, 0.0], [34, 0.1] ],
+        [ [5,0.4], [15, 0.0], [30, 0.1] ],
+        [ [1,0.1], [13, 0.0], [31, 0.1] ],
+        [ [7,0.7], [15, 0.0], [35, 0.1] ],
+        [ [5,0.6], [19, 0.0], [44, 0.1] ],
+      ]
+      @docs_and_labels ||= @features.each_with_index.map do |feature, index|
+        [ Document.create(index + 1, 1, 0, 0,  feature), index%2 * -1]
+      end
+      @filepath = './test/assets/written_model'
+      @model    = Model.new(:classification, @docs_and_labels, {}, {}, nil)
+    end
+    should "write a model from memmory to a file" do
+      @model.write_to_file(@filepath)
-    @docs_and_labels.each_with_index do |item, i|
-      assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      assert File.exists?(@filepath)
+      assert File.file?(@filepath)
+      # TODO: Implement actual model equality
+      assert_equal @model.support_vectors_count, Model.read_from_file(@filepath).support_vectors_count
     end
+    # Need to find a good way to test this without relaying too much in the environment
+    should "raise ModelWriteError when it is impossible to write a model file"
+    teardown do
+      `rm #{@filepath} &> /dev/null`
+    end
   end
-  def test_learn_classification_with_learn_params
-    learn_params = {
+  context "when learning from new documents" do
+    setup do
+      @features ||= [
+        [ [1,0.6], [11, 0.0], [34, 0.1] ],
+        [ [5,0.4], [15, 0.0], [30, 0.1] ],
+        [ [1,0.1], [13, 0.0], [31, 0.1] ],
+        [ [7,0.7], [15, 0.0], [35, 0.1] ],
+        [ [5,0.6], [19, 0.0], [44, 0.1] ],
+      ]
+      @docs_and_labels ||= @features.each_with_index.map do |feature, index|
+        [ Document.create(index + 1, 1, 0, 0,  feature), index%2 * -1]
+      end
+    end
+    should "learn classification with default arguments" do
+      m = Model.new(:classification, @docs_and_labels, {}, {}, nil)
+      assert_kind_of Model, m
+      assert_equal 44, m.total_words
+      assert_equal 5, m.totdoc
+      @docs_and_labels.each_with_index do |item, i|
+        assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      end
+    end
+    should "learn classification with alpha values" do
+      m = Model.new(:classification, @docs_and_labels, {}, {}, [1, 0.0] * 50)
+      assert_kind_of Model, m
+      @docs_and_labels.each_with_index do |item, i|
+        assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      end
+    end
+    should "raise argument error when one of the alphas is not numeric " do
+      assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, [1, {}] )}
+    end
+    should "learn classification and accept learn parameters" do
+      learn_params = {
        "predfile"            => "custom_file",
        "alphafile"           => "alpha",
        "biased_hyperplane"   => false,
        "sharedslack"         => false,
        "remove_inconsistent" => true
-    }
+      }
-    m = Model.new(:classification, @docs_and_labels, learn_params, {}, nil)
-    assert_kind_of Model, m
+      m = Model.new(:classification, @docs_and_labels, learn_params, {}, nil)
+      assert_kind_of Model, m
-    @docs_and_labels.each_with_index do |item, i|
-      assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      @docs_and_labels.each_with_index do |item, i|
+        assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      end
     end
-  end
-  def test_learn_classification_with_invalid_learn_params
-    learn_params = {"svm_c" =>  -1}
-    assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
-    learn_params = {"svm_iter_to_shrink" =>  -1}
-    assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
-  end
+    should "raise argument error when learn parameters are invalid" do
+      learn_params = {"svm_c" =>  -1}
+      assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
+      learn_params = {"svm_iter_to_shrink" =>  -1}
+      assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
+    end
+    should "learn calssification while accepting kernel paramters" do
-  def test_learn_classification_with_kernel_params
-    kernel_params = {
+      kernel_params = {
       "poly_degree" => 3,
       "rbf_gamma"   => 0.5,
       "coef_lin"    => 0.4,
       "coef_const"  => 0.56
-    }
+      }
-    m = Model.new(:classification, @docs_and_labels, {}, kernel_params, nil)
-    assert_kind_of Model, m
+      m = Model.new(:classification, @docs_and_labels, {}, kernel_params, nil)
+      assert_kind_of Model, m
-    @docs_and_labels.each_with_index do |item, i|
-      assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
-    end
-  end
-  def test_learn_classification_with_learn_params_when_predfile_is_not_string
-    learn_params = { "predfile"  => {}}
-    assert_raise(ArgumentError) do
-      Model.new(:classification, @docs_and_labels, learn_params, {}, [1, 0.0, 1])
+      @docs_and_labels.each_with_index do |item, i|
+        assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      end
     end
-  end
+    should "raise argument error when predfile is not string" do
-  def test_learn_classification_fails_when_element_is_not_array
-    @docs_and_labels << []
-    assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, nil)}
-  end
+      learn_params = { "predfile"  => {}}
-  def test_learn_classification_fails_when_element_is_arry_with_the_wrong_types
-    assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, [1, {}] )}
-  end
-  def test_read
-    assert m     = Model.read_from_file('test/assets/model')
-    assert_equal 3877,  m.support_vectors_count
-    assert_equal 39118, m.total_words
-  end
+      assert_raise(ArgumentError) do
+        Model.new(:classification, @docs_and_labels, learn_params, {}, [1, 0.0, 1])
+      end
+    end
-  def test_classify
-    m = Model.read_from_file('test/assets/model')
-    assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1 ,v.to_f]} ) )
-    assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
-    assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0, 0, 0, 0.8, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
-    assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0.5, 0, 0, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
   end
 end

metadata CHANGED Viewed

@@ -2,7 +2,7 @@
 name: svmredlight
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - Camilo Lopez
@@ -10,7 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-09-11 00:00:00 -04:00
+date: 2011-09-22 00:00:00 -04:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -113,7 +113,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 2455726499843414946
+      hash: 2966611142819785204
       segments:
       - 0
       version: "0"