RubyGems - bio-band - Versions diffs - 0.1.2 → 0.1.3 - Mend

bio-band 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

data/VERSION +1 -1
data/bio-band.gemspec +8 -3
data/features/step_definitions/create_dataset.rb +16 -17
data/features/step_definitions/weka_clustering.rb +2 -2
data/features/step_definitions/weka_filters.rb +12 -9
data/features/step_definitions/weka_parsers.rb +13 -13
data/lib/bio-band/core/type/instances.rb +33 -14
data/lib/bio-band/weka.rb +3 -1
data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +18 -0
data/lib/bio-band/weka/attribute_selection/evaluators.rb +21 -0
data/lib/bio-band/weka/attribute_selection/search.rb +26 -0
data/lib/bio-band/weka/classifiers/bayes/bayes.rb +74 -54
data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +43 -23
data/lib/bio-band/weka/classifiers/evaluation.rb +1 -1
data/lib/bio-band/weka/classifiers/functions/functions.rb +157 -2
data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +45 -25
data/lib/bio-band/weka/classifiers/lazy/lazy.rb +69 -4
data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +48 -28
data/lib/bio-band/weka/classifiers/mi/mi.rb +190 -0
data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +65 -0
data/lib/bio-band/weka/classifiers/rules/rules.rb +190 -0
data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +45 -25
data/lib/bio-band/weka/classifiers/trees/trees.rb +66 -0
data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +47 -27
data/lib/bio-band/weka/clusterers/clusterers.rb +34 -0
data/lib/bio-band/weka/clusterers/clusterers_utils.rb +2 -4
data/lib/bio-band/weka/db/db.rb +67 -67
data/lib/bio-band/weka/filters/supervised/attribute/attribute.rb +31 -1
data/lib/bio-band/weka/filters/supervised/supervised_utils.rb +33 -31
data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +12 -0
data/lib/bio-band/weka/filters/unsupervised/unsupervised_utils.rb +29 -29
metadata +8 -3
data/lib/bio-band/weka/classifiers/rules/rules.rb +0 -32

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.1.2
1	+ 0.1.3

data/bio-band.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = "bio-band"
-  s.version = "0.1.2"
+  s.version = "0.1.3"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["arrigonialberto86"]
-  s.date = "2013-07-30"
+  s.date = "2013-09-02"
   s.description = "Data mining and machine learning algorithms for JRuby "
   s.email = "arrigonialberto86@gmail.com"
   s.executables = ["bio-band"]
@@ -55,6 +55,9 @@ Gem::Specification.new do |s|
     "lib/bio-band/core/type/instances.rb",
     "lib/bio-band/core/type/utils.rb",
     "lib/bio-band/weka.rb",
+    "lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb",
+    "lib/bio-band/weka/attribute_selection/evaluators.rb",
+    "lib/bio-band/weka/attribute_selection/search.rb",
     "lib/bio-band/weka/classifiers/bayes/bayes.rb",
     "lib/bio-band/weka/classifiers/bayes/bayes_utils.rb",
     "lib/bio-band/weka/classifiers/evaluation.rb",
@@ -62,7 +65,9 @@ Gem::Specification.new do |s|
     "lib/bio-band/weka/classifiers/functions/functions_utils.rb",
     "lib/bio-band/weka/classifiers/lazy/lazy.rb",
     "lib/bio-band/weka/classifiers/lazy/lazy_utils.rb",
-    "lib/bio-band/weka/classifiers/rules/rules.rb ",
+    "lib/bio-band/weka/classifiers/mi/mi.rb",
+    "lib/bio-band/weka/classifiers/mi/mi_utils.rb",
+    "lib/bio-band/weka/classifiers/rules/rules.rb",
     "lib/bio-band/weka/classifiers/rules/rules_utils.rb",
     "lib/bio-band/weka/classifiers/trees/trees.rb",
     "lib/bio-band/weka/classifiers/trees/trees_utils.rb",

data/features/step_definitions/create_dataset.rb CHANGED Viewed

@@ -4,37 +4,36 @@ Given(/^a nominal attribute, named "(.*?)", with values "(.*?)"$/) do |arg1, arg
 end
 Given(/^one numeric attribute, named "(.*?)"$/) do |arg1|
-	$second_att = arg1.to_sym
+  $second_att = arg1.to_sym
 end
 Given(/^another numeric attribute, names "(.*?)"$/) do |arg1|
-	$third_att = arg1.to_sym
+  $third_att = arg1.to_sym
 end
 Given(/^two data rows: "(.*?)","(.*?)"$/) do |arg1, arg2|
-	@first_row = arg1.split(',')
-	@second_row = arg2.split(',')
-	@first_row[1] = @first_row[1].to_f
-	@first_row[2] = @first_row[2].to_f
-	@second_row[1] = @second_row[1].to_f
-	@second_row[2] = @second_row[2].to_f
+  $first_row = arg1.split(',')
+  $second_row = arg2.split(',')
+  $first_row[1] = $first_row[1].to_f
+  $first_row[2] = $first_row[2].to_f
+  $second_row[1] = $second_row[1].to_f
+  $second_row[2] = $second_row[2].to_f
 end
 Then(/^I want to build en empty dataset for my use$/) do
-	class Dataset < Core::Type::Instances::Base
-		nominal $first_att, $values
-  	numeric $second_att
-  	string $third_att
+  $my_instance = Core::Type::Instances::Base.new do
+    nominal $first_att, $values
+    numeric $second_att
+    string $third_att
   end
-  @my_instance = Dataset.new
-  @my_instance.summary
+  $my_instance.summary
 end
 Then(/^I want to populate the dataset by row$/) do
-	@my_instance.populate_by_row([@first_row,@second_row])
-	@my_instance.summary
+  $my_instance.populate_by_row([$first_row,$second_row])
+  $my_instance.summary
 end
 Then(/^I want to print my dataset as a bidimensional Ruby Array$/) do
-	puts @my_instance.to_a2d.inspect
+  puts $my_instance.to_a2d.inspect
 end

data/features/step_definitions/weka_clustering.rb CHANGED Viewed

@@ -30,5 +30,5 @@ Then(/^I want to report result statistics$/) do
 end
 Then(/^I want to use Weka clustering cross\-validation$/) do
-  puts @clustered.validate
-end
+  puts @clustered.evaluate
+end

data/features/step_definitions/weka_filters.rb CHANGED Viewed

@@ -1,29 +1,32 @@
 Given(/^the example ARFF file "(.*?)"$/) do |arff_file|
-	@arff = File.join('resources',arff_file)
+  @arff = File.join('resources',arff_file)
 end
 Given(/^the Weka Attribute Add filter$/) do
-	@filter = Weka::Filter::Unsupervised::Attribute::Add.new
+  @filter = Weka::Filter::Unsupervised::Attribute::Add.new
 end
 Then(/^I want to parse the file in order to create an Instances class object$/) do
-	@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
-	@dataset_ARFF.summary
+  $dataset_ARFF = Core::Parser::parse_ARFF(@arff)
+  puts $dataset_ARFF.summary
 end
 Then(/^I want to print the available filter options and usage$/) do
-	@filter.filter_options
+  @filter.options_list
 end
 Then(/^I want to set the option String "(.*?)"$/) do |arg1|
-	@filter.set_filter_options(arg1)
-	@filter.set_data(@dataset_ARFF)
+  $arg1 =  arg1
+  @filter.set do
+    filter_options $arg1
+    data $dataset_ARFF
+  end
 end
 Then(/^I want to add an attribute \(a column\) to the dataset using the Weka filter Add$/) do
-	@new_inst = @filter.use
+  @new_inst = @filter.use
 end
 Then(/^I want to print a "(.*?)" for the modified dataset$/) do |arg1|
-	@new_inst.send arg1.to_sym
+  @new_inst.send arg1.to_sym
 end

data/features/step_definitions/weka_parsers.rb CHANGED Viewed

@@ -1,20 +1,20 @@
 Given /^the CSV file "(.*?)"$/ do |csv_file|
-	@csv = File.join('resources',csv_file)
-	@dataset_CSV = Core::Parser::parse_CSV(@csv)
+  @csv = File.join('resources',csv_file)
+  @dataset_CSV = Core::Parser::parse_CSV(@csv)
 end
 Then /^I want to print to stdout the summary for the CSV parsed Instances object$/ do
-	@dataset_CSV.summary
+  @dataset_CSV.summary
 end
 Given /^the ARFF file "(.*?)"$/ do |arff_file|
-	@arff = File.join('resources',arff_file)
-	puts @arff
-	@dataset_ARFF = Core::Parser::parse_ARFF(@arff)
+  @arff = File.join('resources',arff_file)
+  puts @arff
+  @dataset_ARFF = Core::Parser::parse_ARFF(@arff)
 end
 Then /^I want to print to stdout the summary for the ARFF parsed Instances object$/ do
-	@dataset_ARFF.summary
+  @dataset_ARFF.summary
 end
 Given(/^the database "(.*?)"$/) do |arg1|
@@ -22,7 +22,7 @@ Given(/^the database "(.*?)"$/) do |arg1|
 end
 Given(/^a table named "(.*?)"$/) do |arg1|
-	@target_table = arg1
+  @target_table = arg1
 end
 Then(/^I want to extract data from that table$/) do
@@ -34,11 +34,11 @@ Then(/^I want to print to stdout the summary for the parsed Instances object$/)
 end
 Then(/^I want to convert the data into a bidimensional Ruby Array$/) do
-	@dataset.to_a2d.should == [["sunny", 85.0, 85.0, "FALSE", "no"], ["sunny", 80.0, 90.0, "TRUE", "no"], ["overcast", 83.0, 86.0, "FALSE", "yes"],
-													 ["rainy", 70.0, 96.0, "FALSE", "yes"], ["rainy", 68.0, 80.0, "FALSE", "yes"], ["rainy", 65.0, 70.0, "TRUE", "no"],
-													 ["overcast", 64.0, 65.0, "TRUE", "yes"], ["sunny", 72.0, 95.0, "FALSE", "no"], ["sunny", 69.0, 70.0, "FALSE", "yes"],
-													 ["rainy", 75.0, 80.0, "FALSE", "yes"], ["sunny", 75.0, 70.0, "TRUE", "yes"], ["overcast", 72.0, 90.0, "TRUE", "yes"],
-													 ["overcast", 81.0, 75.0, "FALSE", "yes"], ["rainy", 71.0, 91.0, "TRUE", "no"]]
+  @dataset.to_a2d.should == [["sunny", 85.0, 85.0, "FALSE", "no"], ["sunny", 80.0, 90.0, "TRUE", "no"], ["overcast", 83.0, 86.0, "FALSE", "yes"],
+                           ["rainy", 70.0, 96.0, "FALSE", "yes"], ["rainy", 68.0, 80.0, "FALSE", "yes"], ["rainy", 65.0, 70.0, "TRUE", "no"],
+                           ["overcast", 64.0, 65.0, "TRUE", "yes"], ["sunny", 72.0, 95.0, "FALSE", "no"], ["sunny", 69.0, 70.0, "FALSE", "yes"],
+                           ["rainy", 75.0, 80.0, "FALSE", "yes"], ["sunny", 75.0, 70.0, "TRUE", "yes"], ["overcast", 72.0, 90.0, "TRUE", "yes"],
+                           ["overcast", 81.0, 75.0, "FALSE", "yes"], ["rainy", 71.0, 91.0, "TRUE", "no"]]
 end

data/lib/bio-band/core/type/instances.rb CHANGED Viewed

@@ -3,6 +3,8 @@ require 'ruport'
 require 'json'
 module Core
+  java_import "weka.core.SerializationHelper"
   module Type
     java_import "weka.core.Instances"
@@ -46,7 +48,7 @@ module Core
       end
       # Return the number of columns (Attribute objects) in the dataset
-      def n_columns
+      def n_col
         return numAttributes
       end
@@ -55,6 +57,14 @@ module Core
         puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
       end
+      def each_row
+        enumerate_instances.each {|inst| yield(inst)}
+      end
+      def each_column
+        enumerate_attributes.each {|attribute| yield(attribute)}
+      end
       # Check if this instance's attributes are all Numeric
       def check_numeric_instance
         enumerateAttributes.each do |att|
@@ -160,7 +170,7 @@ module Core
       # (check function): should check that the array is bidimensional and that
       # the lengths are equal
       def check_array(data)
-        return true
+        return true # still to be done
       end
       # An entire dataset is inserted 'by row' into the current Instances object
@@ -236,9 +246,11 @@ module Core
         puts summary
-        count=0
-        enumerateInstances.each {|inst| count=count+1}
-        puts "\nNumber of rows: #{count}"
+        unless enumerate_instances.nil?
+          count=0
+          enumerateInstances.each {|inst| count=count+1}
+          puts "\nNumber of rows: #{count}"
+        end
       end
       # Merges two sets of Instances together. The resulting set will have all the
@@ -257,50 +269,51 @@ module Core
      #   return instances
      # end
-      @@positions = []
       # This method is used for attributes definition in uninitialized Instances-derived classes
-      def self.att(attr_type,name,*values)
+      def att(attr_type,name,*values)
         att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
         att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
         att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
         att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
-        @@positions << att
+        @positions << att
       end
       # This method is used for Nominal attributes definition in uninitialized Instances-derived classes
       # * *Args*    :
       #   - +name+ -> Attribute name, a String
       #   - +values+ -> An array of values for the nominal attribute
-      def self.nominal(name,values)
+      def nominal(name,values)
         att :nominal, name, values
       end
       # This method is used for Numeric attributes definition in uninitialized Instances-derived classes
       # * *Args*    :
       #   - +name+ -> Attribute name, a String
-      def self.numeric(name)
+      def numeric(name)
         att :numeric, name
       end
       # This method is used for Date attributes definition in uninitialized Instances-derived classes
       # * *Args*    :
       #   - +name+ -> Attribute name, a String
-      def self.date(name)
+      def date(name)
         att :date, name
       end
       # This method is used for String attributes definition in uninitialized Instances-derived classes
       # * *Args*    :
       #   - +name+ -> Attribute name, a String
-      def self.string(name)
+      def string(name)
         att :string, name
       end
       # Class used for the creation of a new dataset (Instances class)
       class Base < Instances
-        def initialize
+        def initialize(&block)
           attributes_vector = FastVector.new
-          @@positions.each {|value| attributes_vector.addElement(value)}
+          @positions = []
+          self.instance_eval(&block) if block
+          @positions.each {|value| attributes_vector.addElement(value)}
           super('Instances',attributes_vector,0)
         end
       end
@@ -325,7 +338,13 @@ module Core
       attributes.each {|value| attributes_vector.addElement(value)}
       return Instances.new(name,attributes_vector,0)
     end
+  end
+  # Helper class for serialization
+  # Works with classifiers, filters, clusterers...
+  class SerializationHelper
   end
 end

data/lib/bio-band/weka.rb CHANGED Viewed

@@ -9,4 +9,6 @@ require 'bio-band/weka/classifiers/functions/functions'
 require 'bio-band/weka/classifiers/trees/trees'
 require 'bio-band/weka/classifiers/lazy/lazy'
 require 'bio-band/weka/classifiers/rules/rules'
-require 'bio-band/weka/clusterers/clusterers'
+require 'bio-band/weka/clusterers/clusterers'
+require 'bio-band/weka/attribute_selection/evaluators'
+require 'bio-band/weka/attribute_selection/search'

data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb ADDED Viewed

@@ -0,0 +1,18 @@
+module Attribute_selection_Utils
+  java_import "weka.core.Utils"
+  #Instance methods list
+  def options_list
+    listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
+  end
+  def select_options(options_string)
+    options = Utils.splitOptions(options_string)
+    set_options(options)
+  end
+  def description
+    globalInfo
+ end
+end

data/lib/bio-band/weka/attribute_selection/evaluators.rb ADDED Viewed

@@ -0,0 +1,21 @@
+$:.unshift File.dirname(__FILE__)
+require 'attribute_selection_utils'
+module Weka
+  module Attribute_selection
+    module Evaluator
+      java_import 'weka.attributeSelection.CfsSubsetEval'
+      java_import 'weka.attributeSelection.ChiSquaredAttributeEval'
+      class CfsSubsetEval
+        include Attribute_selection_Utils
+        java_alias :use_options , :setOptions, [Java::Java.lang.String[]]
+      end
+      class ChiSquaredAttributeEval
+        include Attribute_selection_Utils
+      end
+    end
+  end
+end

data/lib/bio-band/weka/attribute_selection/search.rb ADDED Viewed

@@ -0,0 +1,26 @@
+$:.unshift File.dirname(__FILE__)
+require 'attribute_selection_utils'
+module Weka
+  module Attribute_selection
+    module Search
+      java_import 'weka.attributeSelection.GreedyStepwise'
+      java_import 'weka.attributeSelection.RankSearch'
+      java_import 'weka.attributeSelection.Ranker'
+      class GreedyStepwise
+        include Attribute_selection_Utils
+      end
+      class Ranker
+        include Attribute_selection_Utils
+      end
+      class RankSearch
+        include Attribute_selection_Utils
+      end
+    end
+  end
+end

data/lib/bio-band/weka/classifiers/bayes/bayes.rb CHANGED Viewed

@@ -2,64 +2,84 @@ $:.unshift File.dirname(__FILE__)
 require 'bayes_utils'
 module Weka
-	module Classifier
-		module Bayes
-			java_import "weka.classifiers.bayes.NaiveBayes"
-			java_import "weka.classifiers.bayes.BayesianLogisticRegression"
-			java_import "weka.classifiers.bayes.AODE"
-			java_import "weka.classifiers.bayes.ComplementNaiveBayes"
-			java_import "weka.classifiers.bayes.WAODE"
+  module Classifier
+    module Bayes
+      java_import "weka.classifiers.bayes.NaiveBayes"
+      java_import "weka.classifiers.bayes.BayesianLogisticRegression"
+      java_import "weka.classifiers.bayes.AODE"
+      java_import "weka.classifiers.bayes.ComplementNaiveBayes"
+      java_import "weka.classifiers.bayes.WAODE"
-			class NaiveBayes
-				include Bayes_utils
-				class Base < NaiveBayes
-					def initialize
-						super
-            init_classifier
-					end
-				end
-			end
+      class NaiveBayes
+        include Bayes_utils
+        class Base < NaiveBayes
+          def initialize(&block)
+            super
+            if block_given?
+              init_instance_classifier(&block)
+            else
+              init_classifier
+            end
+          end
+        end
+      end
-			class AODE
-				include Bayes_utils
-				class Base < AODE
-					def initialize
-						super
-            init_classifier
-					end
-				end
-			end
+      class AODE
+        include Bayes_utils
+        class Base < AODE
+          def initialize(&block)
+            super
+            if block_given?
+              init_instance_classifier(&block)
+            else
+              init_classifier
+            end
+          end
+        end
+      end
-			class BayesianLogisticRegression
-				include Bayes_utils
-				class Base < BayesianLogisticRegression
-					def initialize
-						super
-            init_classifier
-					end
-				end
-			end
+      class BayesianLogisticRegression
+        include Bayes_utils
+        class Base < BayesianLogisticRegression
+          def initialize(&block)
+            super
+            if block_given?
+              init_instance_classifier(&block)
+            else
+              init_classifier
+            end
+          end
+        end
+      end
-			class ComplementNaiveBayes
-				include Bayes_utils
-				class Base < ComplementNaiveBayes
-					def initialize
-						super
-            init_classifier
-					end
-				end
-			end
+      class ComplementNaiveBayes
+        include Bayes_utils
+        class Base < ComplementNaiveBayes
+          def initialize(&block)
+            super
+            if block_given?
+              init_instance_classifier(&block)
+            else
+              init_classifier
+            end
+          end
+        end
+      end
-			class WAODE
-				include Bayes_utils
-				class Base < WAODE
-					def initialize
-						super
-            init_classifier
-					end
-				end
-			end
+      class WAODE
+        include Bayes_utils
+        class Base < WAODE
+          def initialize(&block)
+            super
+            if block_given?
+              init_instance_classifier(&block)
+            else
+              init_classifier
+            end
+          end
+        end
+      end
-		end
-	end
+    end
+  end
 end