RubyGems - svmredlight - Versions diffs - 0.1.0 → 0.1.1 - Mend

svmredlight 0.1.0 → 0.1.1

Files changed (13) hide show

data/.document CHANGED Viewed

@@ -1,5 +1,6 @@
 lib/**/*.rb
 bin/*
+ext/*.c
 -
 features/**/*.feature
 LICENSE.txt

data/README.rdoc CHANGED Viewed

@@ -7,9 +7,15 @@ A partial interface to SVM-light [http://svmlight.joachims.org/] using it you ca
 As of now it's know to work with SVM 6.02.
+=== Installing svmlight as a library
 Make sure to build the libsvmlight.o version of svmlight by using
 "make libsvmlight_hideo".
+Make sure the .h files in the svmlight distribution are in your include path, inside a subdirectory called svm_light,
+and the object code for the library is in your include path (/usr/lib for instance).
 == Document

data/Rakefile CHANGED Viewed

@@ -14,7 +14,7 @@ require 'rake'
 require 'jeweler'
 Jeweler::Tasks.new do |gem|
   # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
-  gem.version = '0.1.0'
+  gem.version = '0.1.1'
   gem.name = "svmredlight"
   gem.homepage = "http://github.com/camilo/svmredlight"
   gem.license = "MIT"

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.1.0
1	+ 0.1.1

data/ext/svmredlight.c CHANGED Viewed

@@ -9,13 +9,10 @@ is_linear(MODEL *model){
   return model->kernel_parm.kernel_type == 0;
 }
-// Modules and Classes
 static VALUE rb_mSvmLight;
 static VALUE rb_cModel;
 static VALUE rb_cDocument;
-// GC functions
 /* Not using deep free anymore, let ruby call free on the documents otherwise we might end
  * up having double free problems, from svm_learn_main: Warning: The model contains
  * references to the original data 'docs'.  If you want to free the original data, and
@@ -52,11 +49,12 @@ model_read_from_file(VALUE klass, VALUE filename){
 /* Helper function type checks a string meant to be used as a learn_parm, in case of error
  * returns 1 and sets the correct exception message in error, on success returns 0 and
  * copies the c string data of new_val to target*/
-int check_string_param(VALUE new_val,
-                             const char *default_val,
-                             char *target,
-                             const char *name,
-                             char *error){
+int
+check_string_param(VALUE new_val,
+                   const char *default_val,
+                   char *target,
+                   const char *name,
+                   char *error){
   if(TYPE(new_val) == T_STRING){
     strlcpy(target, StringValuePtr(new_val), 199);
@@ -73,7 +71,8 @@ int check_string_param(VALUE new_val,
 /* Helper function type checks a long meant to be used as a learn_parm or kernel_parm, in
  * case of error returns 1 and sets the correct exception message in error, on success
  * returns 0 and copies the c string data of new_val to target*/
-int check_long_param(VALUE new_val,
+int
+check_long_param(VALUE new_val,
                            long default_val,
                            long *target,
                            const char *name,
@@ -93,11 +92,12 @@ int check_long_param(VALUE new_val,
 /* Helper function type checks a double meant to be used as a learn_parm or kernel_parm, in
  * case of error returns 1 and sets the correct exception message in error, on success
  * returns 0 and copies the c string data of new_val to target*/
-int check_double_param(VALUE new_val,
-                             double default_val,
-                             double *target,
-                             const char *name,
-                             char *error){
+int
+check_double_param(VALUE new_val,
+                         double default_val,
+                         double *target,
+                         const char *name,
+                         char *error){
   if(TYPE(new_val) == T_FLOAT || TYPE(new_val) == T_FIXNUM){
     *target = NUM2DBL(new_val);
   }else if(NIL_P(new_val) ){
@@ -113,11 +113,12 @@ int check_double_param(VALUE new_val,
 /* Helper function type checks an int meant to be used as a boolean learn_parm or
  * kernel_parm, in case of error returns 1 and sets the correct exception message in
  * error, on success returns 0 and copies the c string data of new_val to target*/
-int check_bool_param(VALUE new_val,
-                           long default_val,
-                           long *target,
-                           const char *name,
-                           char *error){
+int
+check_bool_param(VALUE new_val,
+                       long default_val,
+                       long *target,
+                       const char *name,
+                       char *error){
   if(TYPE(new_val) == T_TRUE){
     *target = 1L;
   }else if(TYPE(new_val) == T_FALSE){
@@ -134,208 +135,209 @@ int check_bool_param(VALUE new_val,
 /* Helper function in charge of setting up the learn parameters before they are passed to
  * the svm_learn_classification copies part of the logic in svm_learn_main.c */
-int setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_message){
+int
+setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_message){
   // Defaults taken from from svm_learn_main
   VALUE inter_val, temp_ary, svm_type, svm_type_ruby_str;
   char *svm_type_str;
   inter_val = rb_hash_aref(r_hash, rb_str_new2("predfile"));
   if(1 == check_string_param(inter_val,
-                                   "trans_predictions",
-                                   &c_learn_param->predfile,
-                                   "predfile",
-                                   error_message)){
+                             "trans_predictions",
+                             (char *)&c_learn_param->predfile,
+                             "predfile",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("alphafile"));
   if(1 == check_string_param(inter_val,
-                                   "",
-                                   &c_learn_param->alphafile,
-                                   "alphafile",
-                                   error_message)){
+                            "",
+                            (char*)&c_learn_param->alphafile,
+                            "alphafile",
+                            error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("biased_hyperplane"));
   if(1 == check_bool_param(inter_val,
-                                 1L,
-                                 &(c_learn_param->biased_hyperplane),
-                                 "biased_hyperplane",
-                                 error_message)){
+                           1L,
+                           &(c_learn_param->biased_hyperplane),
+                           "biased_hyperplane",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("sharedslack"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->sharedslack),
-                                 "sharedslack",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->sharedslack),
+                           "sharedslack",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("remove_inconsistent"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->remove_inconsistent),
-                                 "remove_inconsistent",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->remove_inconsistent),
+                           "remove_inconsistent",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("skip_final_opt_check"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->skip_final_opt_check),
-                                 "skip_final_opt_check",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->skip_final_opt_check),
+                           "skip_final_opt_check",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_newvarsinqp"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->svm_newvarsinqp),
-                                 "svm_newvarsinqp",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->svm_newvarsinqp),
+                           "svm_newvarsinqp",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("compute_loo"));
   if(1 == check_bool_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->compute_loo),
-                                 "compute_loo",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->compute_loo),
+                           "compute_loo",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_maxqpsize"));
   if(1 == check_long_param(inter_val,
-                                 10L,
-                                 &(c_learn_param->svm_maxqpsize),
-                                 "svm_maxqpsize",
-                                 error_message)){
+                           10L,
+                           &(c_learn_param->svm_maxqpsize),
+                           "svm_maxqpsize",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_iter_to_shrink"));
   if(1 == check_long_param(inter_val,
-                                 -9999,
-                                 &(c_learn_param->svm_iter_to_shrink),
-                                 "svm_iter_to_shrink",
-                                 error_message)){
+                           -9999,
+                           &(c_learn_param->svm_iter_to_shrink),
+                           "svm_iter_to_shrink",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("maxiter"));
   if(1 == check_long_param(inter_val,
-                                 100000,
-                                 &(c_learn_param->maxiter),
-                                 "maxiter",
-                                 error_message)){
+                           100000,
+                           &(c_learn_param->maxiter),
+                           "maxiter",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("kernel_cache_size"));
   if(1 == check_long_param(inter_val,
-                                 40L,
-                                 &(c_learn_param->kernel_cache_size),
-                                 "kernel_cache_size",
-                                 error_message)){
+                           40L,
+                           &(c_learn_param->kernel_cache_size),
+                           "kernel_cache_size",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("xa_depth"));
   if(1 == check_long_param(inter_val,
-                                 0L,
-                                 &(c_learn_param->xa_depth),
-                                 "xa_depth",
-                                 error_message)){
+                           0L,
+                           &(c_learn_param->xa_depth),
+                           "xa_depth",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_c"));
   if(1 == check_double_param(inter_val,
-                                 0.0,
-                                 &(c_learn_param->svm_c),
-                                 "svm_c",
-                                 error_message)){
+                             0.0,
+                             &(c_learn_param->svm_c),
+                             "svm_c",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("eps"));
   if(1 == check_double_param(inter_val,
-                                 0.1,
-                                 &(c_learn_param->eps),
-                                 "eps",
-                                 error_message)){
+                             0.1,
+                             &(c_learn_param->eps),
+                             "eps",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("transduction_posratio"));
   if(1 == check_double_param(inter_val,
-                                 -1.0,
-                                 &(c_learn_param->transduction_posratio),
-                                 "transduction_posratio",
-                                 error_message)){
+                             -1.0,
+                             &(c_learn_param->transduction_posratio),
+                             "transduction_posratio",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_costratio"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_learn_param->svm_costratio),
-                                 "svm_costratio",
-                                 error_message)){
+                             1.0,
+                             &(c_learn_param->svm_costratio),
+                             "svm_costratio",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_costratio_unlab"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_learn_param->svm_costratio_unlab),
-                                 "svm_costratio_unlab",
-                                 error_message)){
+                             1.0,
+                             &(c_learn_param->svm_costratio_unlab),
+                             "svm_costratio_unlab",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("svm_unlabbound"));
   if(1 == check_double_param(inter_val,
-                                 1.0000000000000001e-05,
-                                 &(c_learn_param->svm_unlabbound),
-                                 "svm_unlabbound",
-                                 error_message)){
+                             1.0000000000000001e-05,
+                             &(c_learn_param->svm_unlabbound),
+                             "svm_unlabbound",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("epsilon_crit"));
   if(1 == check_double_param(inter_val,
-                                 0.001,
-                                 &(c_learn_param->epsilon_crit),
-                                 "epsilon_crit",
-                                 error_message)){
+                             0.001,
+                             &(c_learn_param->epsilon_crit),
+                             "epsilon_crit",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("epsilon_a"));
   if(1 == check_double_param(inter_val,
-                                 1E-15,
-                                 &(c_learn_param->epsilon_a),
-                                 "epsilon_a",
-                                 error_message)){
+                             1E-15,
+                             &(c_learn_param->epsilon_a),
+                             "epsilon_a",
+                             error_message)){
     return 1;
   }
   c_learn_param->rho=1.0;
   inter_val = rb_hash_aref(r_hash, rb_str_new2("rho"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_learn_param->rho),
-                                 "rho",
-                                 error_message)){
+                             1.0,
+                             &(c_learn_param->rho),
+                             "rho",
+                             error_message)){
     return 1;
   }
@@ -343,41 +345,42 @@ int setup_learn_params(LEARN_PARM *c_learn_param, VALUE r_hash, char *error_mess
   return 0;
 }
-int setup_kernel_params(KERNEL_PARM *c_kernel_param, VALUE r_hash, char *error_message){
+int
+setup_kernel_params(KERNEL_PARM *c_kernel_param, VALUE r_hash, char *error_message){
   VALUE inter_val;
   inter_val = rb_hash_aref(r_hash, rb_str_new2("poly_degree"));
   if(1 == check_long_param(inter_val,
-                                 3L,
-                                 &(c_kernel_param->poly_degree),
-                                 "poly_degree",
-                                 error_message)){
+                           3L,
+                           &(c_kernel_param->poly_degree),
+                           "poly_degree",
+                           error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("rbf_gamma"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_kernel_param->rbf_gamma),
-                                 "rbf_gamma",
-                                 error_message)){
+                             1.0,
+                             &(c_kernel_param->rbf_gamma),
+                             "rbf_gamma",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("coef_lin"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_kernel_param->coef_lin),
-                                 "coef_lin",
-                                 error_message)){
+                             1.0,
+                             &(c_kernel_param->coef_lin),
+                             "coef_lin",
+                             error_message)){
     return 1;
   }
   inter_val = rb_hash_aref(r_hash, rb_str_new2("coef_const"));
   if(1 == check_double_param(inter_val,
-                                 1.0,
-                                 &(c_kernel_param->coef_const),
-                                 "coef_const",
-                                 error_message)){
+                             1.0,
+                             &(c_kernel_param->coef_const),
+                             "coef_const",
+                             error_message)){
     return 1;
   }
@@ -634,6 +637,18 @@ model_support_vectors_count(VALUE self){
   return INT2FIX(m->sv_num);
 }
+static VALUE
+model_write_to_file(VALUE self, VALUE pahtofile){
+  Check_Type(pahtofile, T_STRING);
+  MODEL *m;
+  Data_Get_Struct(self, MODEL, m);
+  write_model(StringValuePtr(pahtofile), m);
+  return Qnil;
+}
 static VALUE
 model_total_words(VALUE self){
   MODEL *m;
@@ -745,8 +760,9 @@ Init_svmredlight(){
   rb_mSvmLight = rb_define_module("SVMLight");
   //Model
   rb_cModel = rb_define_class_under(rb_mSvmLight, "Model", rb_cObject);
-  rb_define_singleton_method(rb_cModel, "read_from_file", model_read_from_file, 1);
+  rb_define_singleton_method(rb_cModel, "from_file", model_read_from_file, 1);
   rb_define_singleton_method(rb_cModel, "learn_classification", model_learn_classification, 5);
+  rb_define_method(rb_cModel, "to_file", model_write_to_file, 1);
   rb_define_method(rb_cModel, "support_vectors_count", model_support_vectors_count, 0);
   rb_define_method(rb_cModel, "total_words", model_total_words, 0);
   rb_define_method(rb_cModel, "classify", model_classify_example, 1);

data/lib/svmredlight.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 require File.dirname(__FILE__) + '/../ext/svmredlight'
-require  'svmredlight/model'
-require  'svmredlight/document'
+require File.dirname(__FILE__) + '/svmredlight/model'
+require File.dirname(__FILE__) + '/svmredlight/document'

data/lib/svmredlight/document.rb CHANGED Viewed

@@ -1,9 +1,7 @@
 module SVMLight
-  # A document is the Ruby representation of a DOC structure in SVMlight, it contains a
-  # queryid, a slackid, a costfactor ( c ) and a vector with feature numbers and their
-  # correspondent weights.
+  # A document is the Ruby representation of a DOC structure in SVMlight, it contains a queryid, a slackid, a costfactor
+  # ( c ) and a vector with feature numbers and their correspondent weights.
   class Document
     # @param [Hash] vector a hash where the keys are feature numbers and the values its weights
     # @param [Hash] opts the options coincide with SVMLight parameters to the create_example function, the default values for all the options are 0
     # @option [:docnum] Numeric docum

data/lib/svmredlight/model.rb CHANGED Viewed

@@ -1,4 +1,6 @@
 module SVMLight
+  class MissingModelFile < StandardError; end
   # A model is the product of training a SVM, once created it can take documents as inputs
   # and act of them (by for instance classifying them). Models can also be read from files
   # created by svm_learn.
@@ -16,7 +18,48 @@ module SVMLight
       learn_classification(documents_and_lables, learn_params, kernel_params, false, alphas)
     end
     private_class_method :learn_classification
+    private_class_method :from_file
+    # in self.read_from_file and #write_to_file
+    #
+    # This is an anti-pattern. Checking for existence of resources is normally something to be avoided. Trying to open
+    # the resource and then rescuing the exception/reading the error code is a much better practice, however SVMLight
+    # will call exit(1) if the file does not exists, and, that in turn will kill the ruby VM, so in this case to
+    # minimize that possibility I'm optimistically check for the file existence and hope it is still there when it is
+    # actually time to open it.
+    #
+    # TODO: Come up with a proper replacement for those methods, probably simply reimplementing them in svmredlight.c
+    # and raising an exception when files cannot be open.
+    # Will load an existent model from a file
+    # @param [String] pahtofile path to the model file
+    def self.read_from_file(pahtofile)
+      if File.exists?(pahtofile) && File.file?(pahtofile)
+        from_file(pahtofile)
+      else
+        raise MissingModelFile, "the #{pahtofile} does not exists or is not a file"
+      end
+    end
+    private :to_file
+    # Will create a file containing the model info, the model info can be turn back into a model by using
+    # Model.read_from_file
+    # @param [String] pahtofile
+    def write_to_file(pahtofile)
+      dir = File.dirname(pahtofile)
+      if File.directory?(dir) && File.writable?(dir)
+        to_file(pahtofile)
+      else
+        raise ModelWriteError, "impossible to write #{pahtofile}"
+      end
+    end
   end
 end

data/svmredlight.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{svmredlight}
-  s.version = "0.1.0"
+  s.version = "0.1.1"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Camilo Lopez"]
-  s.date = %q{2011-09-11}
+  s.date = %q{2011-09-22}
   s.description = %q{Ruby interface to SVMLight}
   s.email = %q{camilo@camilolopez.com}
   s.extensions = ["ext/extconf.rb"]

data/test/helper.rb CHANGED Viewed

@@ -8,6 +8,7 @@ rescue Bundler::BundlerError => e
   $stderr.puts "Run `bundle install` to install missing gems"
   exit e.status_code
 end
 require 'test/unit'
 require 'shoulda'
 require './lib/svmredlight'

data/test/test_document.rb CHANGED Viewed

@@ -3,53 +3,59 @@ include SVMLight
 class TestDocument < Test::Unit::TestCase
-  def test_create
-    d = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
-    assert_kind_of Document, d
-  end
-  def test_create_should_accept_integer_as_feature_weight
-    d = Document.create(0, 0.5, 1, 0, [[1, 0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
-    assert_kind_of Document, d
-  end
+  context "creating a new document" do
+    should "succed when using #create" do
+      d = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
+      assert_kind_of Document, d
+    end
+    should "accept integers as feature weights" do
+      d = Document.create(0, 0.5, 1, 0, [[1, 0 ], [4, 0.0] , [10, 0.0] ,[ 11, 0.5 ]])
+      assert_kind_of Document, d
+    end
+    should "create documents useing new as well" do
+      d = Document.new({1 => 566.0, 4 => 133.0}, {docnum: 10, slackid: 1, queryid: 2, costfactor: 0.5})
+      assert_equal 10, d.docnum
+      assert_equal 1, d.slackid
+      assert_equal 2, d.queryid
+      assert_equal 0.5, d.costfactor
+    end
+    should "raise argument error if any of the word numbers is less or equal to 0" do
+      assert_raise(ArgumentError){ Document.create(0, 0.5, 1, 0, [[0, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]]) }
+      assert_raise(ArgumentError){ Document.create(1, 0.5, 1, 0, [[-1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])}
+    end
+    should "raise type error when the fourth argument is not an array" do
+      assert_raise(TypeError) { Document.create(-1, 0, 1, 0, {})  }
+    end
+    should "raise type error when the fourth argument is empty" do
+      assert_raise(ArgumentError) { Document.create(-1, 0, 1, 0 [])}
+    end
-  def test_create_using_new
-    d = Document.new({1 => 566.0, 4 => 133.0}, {docnum: 10, slackid: 1, queryid: 2, costfactor: 0.5})
-    assert_equal 10, d.docnum
-    assert_equal 1, d.slackid
-    assert_equal 2, d.queryid
-    assert_equal 0.5, d.costfactor
   end
+  context 'a document' do
+    should "have accessible docnum, queryid, slackid, and, costfacor" do
+      d1 = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]])
+      d2 = Document.create(1, 0.6, 2, 1, [[1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])
-  def test_should_be_able_to_access_properties
-    d1 = Document.create(0, 0.5, 1, 0, [[1, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]])
-    d2 = Document.create(1, 0.6, 2, 1, [[1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])
-    assert_equal 0, d1.docnum
-    assert_equal 1, d2.docnum
-    assert_equal 1, d1.slackid
-    assert_equal 2, d2.slackid
+      assert_equal 0, d1.docnum
+      assert_equal 1, d2.docnum
-    assert_equal 0, d1.queryid
-    assert_equal 1, d2.queryid
+      assert_equal 1, d1.slackid
+      assert_equal 2, d2.slackid
-    assert_equal 0.5, d1.costfactor
-    assert_equal 0.6, d2.costfactor
-  end
-  def test_all_word_numbers_should_be_greater_than_zero
-    assert_raise(ArgumentError){ Document.create(0, 0.5, 1, 0, [[0, 1.0 ], [10, 0.0 ], [20, 0.0], [21, 0.1 ]]) }
-    assert_raise(ArgumentError){ Document.create(1, 0.5, 1, 0, [[-1, 1.0 ], [30, 0.0 ], [40, 0.0], [41, 0.1 ]])}
-  end
-  def test_create_with_no_array
-    assert_raise(TypeError) { Document.create(-1, 0, 1, 0, {})  }
-  end
+      assert_equal 0, d1.queryid
+      assert_equal 1, d2.queryid
-  def test_create_with_empty_array
-    assert_raise(ArgumentError) { Document.create(-1, 0, 1, 0 [])}
+      assert_equal 0.5, d1.costfactor
+      assert_equal 0.6, d2.costfactor
+    end
   end
 end

data/test/test_model.rb CHANGED Viewed

@@ -3,112 +3,157 @@ include SVMLight
 class TestModel < Test::Unit::TestCase
-  def setup
-    @features ||= [
-      [ [1,0.6], [11, 0.0], [34, 0.1] ],
-      [ [5,0.4], [15, 0.0], [30, 0.1] ],
-      [ [1,0.1], [13, 0.0], [31, 0.1] ],
-      [ [7,0.7], [15, 0.0], [35, 0.1] ],
-      [ [5,0.6], [19, 0.0], [44, 0.1] ],
-    ]
-    @docs_and_labels ||= @features.each_with_index.map{|f,i| [ Document.create(i + 1, 1, 0, 0,  f), i%2 * -1]}
-  end
+  context "reading a model from file" do
+    setup do
+      @file_name = 'test/assets/model'
+    end
+    should "read properly from a well formed file" do
+      assert m     = Model.read_from_file(@file_name)
+      assert_equal 3877,  m.support_vectors_count
+      assert_equal 39118, m.total_words
+    end
-  def test_learn_classification_with_alpha
-    m = Model.new(:classification, @docs_and_labels, {}, {}, [1, 0.0] * 50)
-    assert_kind_of Model, m
+    should "classify successfully after reading the model from a file" do
+      m = Model.read_from_file(@file_name)
-    @docs_and_labels.each_with_index do |item, i|
-      assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1 ,v.to_f]} ) )
+      assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
+      assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0, 0, 0, 0.8, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
+      assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0.5, 0, 0, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
+    end
+    should "raise file not found exception when file does not exists" do
+      assert_raises(MissingModelFile){ Model.read_from_file(@file_name + 'bleh') }
     end
   end
-  def test_learn_classification
-    m = Model.new(:classification, @docs_and_labels, {}, {}, nil)
-    assert_kind_of Model, m
-    assert_equal 44, m.total_words
-    assert_equal 5, m.totdoc
+  context "writting a model to a file" do
+    setup do
+      @features ||= [
+        [ [1,0.6], [11, 0.0], [34, 0.1] ],
+        [ [5,0.4], [15, 0.0], [30, 0.1] ],
+        [ [1,0.1], [13, 0.0], [31, 0.1] ],
+        [ [7,0.7], [15, 0.0], [35, 0.1] ],
+        [ [5,0.6], [19, 0.0], [44, 0.1] ],
+      ]
+      @docs_and_labels ||= @features.each_with_index.map do |feature, index|
+        [ Document.create(index + 1, 1, 0, 0,  feature), index%2 * -1]
+      end
+      @filepath = './test/assets/written_model'
+      @model    = Model.new(:classification, @docs_and_labels, {}, {}, nil)
+    end
+    should "write a model from memmory to a file" do
+      @model.write_to_file(@filepath)
-    @docs_and_labels.each_with_index do |item, i|
-      assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      assert File.exists?(@filepath)
+      assert File.file?(@filepath)
+      # TODO: Implement actual model equality
+      assert_equal @model.support_vectors_count, Model.read_from_file(@filepath).support_vectors_count
     end
+    # Need to find a good way to test this without relaying too much in the environment
+    should "raise ModelWriteError when it is impossible to write a model file"
+    teardown do
+      `rm #{@filepath} &> /dev/null`
+    end
   end
-  def test_learn_classification_with_learn_params
-    learn_params = {
+  context "when learning from new documents" do
+    setup do
+      @features ||= [
+        [ [1,0.6], [11, 0.0], [34, 0.1] ],
+        [ [5,0.4], [15, 0.0], [30, 0.1] ],
+        [ [1,0.1], [13, 0.0], [31, 0.1] ],
+        [ [7,0.7], [15, 0.0], [35, 0.1] ],
+        [ [5,0.6], [19, 0.0], [44, 0.1] ],
+      ]
+      @docs_and_labels ||= @features.each_with_index.map do |feature, index|
+        [ Document.create(index + 1, 1, 0, 0,  feature), index%2 * -1]
+      end
+    end
+    should "learn classification with default arguments" do
+      m = Model.new(:classification, @docs_and_labels, {}, {}, nil)
+      assert_kind_of Model, m
+      assert_equal 44, m.total_words
+      assert_equal 5, m.totdoc
+      @docs_and_labels.each_with_index do |item, i|
+        assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      end
+    end
+    should "learn classification with alpha values" do
+      m = Model.new(:classification, @docs_and_labels, {}, {}, [1, 0.0] * 50)
+      assert_kind_of Model, m
+      @docs_and_labels.each_with_index do |item, i|
+        assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      end
+    end
+    should "raise argument error when one of the alphas is not numeric " do
+      assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, [1, {}] )}
+    end
+    should "learn classification and accept learn parameters" do
+      learn_params = {
        "predfile"            => "custom_file",
        "alphafile"           => "alpha",
        "biased_hyperplane"   => false,
        "sharedslack"         => false,
        "remove_inconsistent" => true
-    }
+      }
-    m = Model.new(:classification, @docs_and_labels, learn_params, {}, nil)
-    assert_kind_of Model, m
+      m = Model.new(:classification, @docs_and_labels, learn_params, {}, nil)
+      assert_kind_of Model, m
-    @docs_and_labels.each_with_index do |item, i|
-      assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      @docs_and_labels.each_with_index do |item, i|
+        assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      end
     end
-  end
-  def test_learn_classification_with_invalid_learn_params
-    learn_params = {"svm_c" =>  -1}
-    assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
-    learn_params = {"svm_iter_to_shrink" =>  -1}
-    assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
-  end
+    should "raise argument error when learn parameters are invalid" do
+      learn_params = {"svm_c" =>  -1}
+      assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
+      learn_params = {"svm_iter_to_shrink" =>  -1}
+      assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, learn_params, {}, nil)}
+    end
+    should "learn calssification while accepting kernel paramters" do
-  def test_learn_classification_with_kernel_params
-    kernel_params = {
+      kernel_params = {
       "poly_degree" => 3,
       "rbf_gamma"   => 0.5,
       "coef_lin"    => 0.4,
       "coef_const"  => 0.56
-    }
+      }
-    m = Model.new(:classification, @docs_and_labels, {}, kernel_params, nil)
-    assert_kind_of Model, m
+      m = Model.new(:classification, @docs_and_labels, {}, kernel_params, nil)
+      assert_kind_of Model, m
-    @docs_and_labels.each_with_index do |item, i|
-      assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
-    end
-  end
-  def test_learn_classification_with_learn_params_when_predfile_is_not_string
-    learn_params = { "predfile"  => {}}
-    assert_raise(ArgumentError) do
-      Model.new(:classification, @docs_and_labels, learn_params, {}, [1, 0.0, 1])
+      @docs_and_labels.each_with_index do |item, i|
+        assert_kind_of  Numeric, m.classify(item.first), "failed in item # #{i}"
+      end
     end
-  end
+    should "raise argument error when predfile is not string" do
-  def test_learn_classification_fails_when_element_is_not_array
-    @docs_and_labels << []
-    assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, nil)}
-  end
+      learn_params = { "predfile"  => {}}
-  def test_learn_classification_fails_when_element_is_arry_with_the_wrong_types
-    assert_raises(ArgumentError){Model.new(:classification, @docs_and_labels, {}, {}, [1, {}] )}
-  end
-  def test_read
-    assert m     = Model.read_from_file('test/assets/model')
-    assert_equal 3877,  m.support_vectors_count
-    assert_equal 39118, m.total_words
-  end
+      assert_raise(ArgumentError) do
+        Model.new(:classification, @docs_and_labels, learn_params, {}, [1, 0.0, 1])
+      end
+    end
-  def test_classify
-    m = Model.read_from_file('test/assets/model')
-    assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1 ,v.to_f]} ) )
-    assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1.0, 0, 0, 0, 0.5 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
-    assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0, 0, 0, 0.8, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
-    assert_kind_of Numeric, m.classify( Document.create(-1, 1, 0, 0,[1, 0.5, 0, 0, 0, 0 , 0 ].each_with_index.map{|v, i| [i + 1,v.to_f]}) )
   end
 end

metadata CHANGED Viewed

@@ -2,7 +2,7 @@
 name: svmredlight
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - Camilo Lopez
@@ -10,7 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-09-11 00:00:00 -04:00
+date: 2011-09-22 00:00:00 -04:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -113,7 +113,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 2455726499843414946
+      hash: 2966611142819785204
       segments:
       - 0
       version: "0"