RubyGems - bio-band - Versions diffs - 0.1.3 → 0.1.4 - Mend

bio-band 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

data/Gemfile +1 -0
data/Gemfile.lock +5 -0
data/Jarfile +1 -1
data/Jarfile.lock +1 -1
data/README.rdoc +2 -0
data/Rakefile +2 -1
data/VERSION +1 -1
data/band_server/client.rb +35 -0
data/band_server/client_alt.rb +35 -0
data/band_server/first_dataset.csv +15 -0
data/band_server/second_dataset.csv +15 -0
data/band_server/simple_server.rb +95 -0
data/band_server/third_dataset.csv +15 -0
data/band_server/uploads/first_dataset.csv +15 -0
data/band_server/uploads/second_dataset.csv +15 -0
data/band_server/uploads/third_dataset.csv +15 -0
data/bio-band.gemspec +19 -3
data/features/step_definitions/weka_classifiers.rb +3 -2
data/features/weka_classifiers.feature +13 -13
data/lib/bio-band.rb +2 -0
data/lib/bio-band/apache/stat/inference.rb +25 -19
data/lib/bio-band/apache/stat/regression.rb +2 -2
data/lib/bio-band/core/parser/parser.rb +6 -6
data/lib/bio-band/core/type/instances.rb +15 -5
data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
data/test/helper.rb +18 -0
data/test/test_apacheCorrelation.rb +22 -0
data/test/test_apacheInference.rb +46 -0
data/test/test_bio-band.rb +9 -0
metadata +33 -2

data/lib/bio-band/weka/classifiers/rules/rules_utils.rb CHANGED Viewed

@@ -20,14 +20,21 @@ module Rules_utils
     build_classifier(@dataset)
   end
+  #Set data for instance classifier
+  #ARGV
+  # data -> an Instances object
   def set_data(data)
     @dataset = data
   end
+  #Set a class index for the input dataset
   def set_class_index(class_index)
     @class_index = class_index
   end
+  #Set options for the selected classifier
+  #ARGS:
+  #options -> a String, i.e. "-K 3"
   def set_options(options)
     options_inst = Utils.splitOptions(options)
     setOptions(options_inst)
@@ -41,10 +48,19 @@ module Rules_utils
     puts globalInfo
   end
+  # perform crossvalidation on a trained classifier
+  #ARGV:
+  #fold -> 'int' value
   def cross_validate(fold)
-    eval = Weka::Classifier::Evaluation.new self.class.data
-    eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
-    eval.summary
+    if self.class.data
+      eval = Weka::Classifier::Evaluation.new self.class.data
+      eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
+      eval.summary
+    else
+      eval = Weka::Classifier::Evaluation.new @dataset
+      eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
+      eval.summary
+    end
   end
   #Class methods module

data/lib/bio-band/weka/classifiers/trees/trees.rb CHANGED Viewed

@@ -3,6 +3,7 @@ require 'trees_utils'
 module Weka
 	module Classifier
+    #This module contains classifiers from the 'weka.classifiers.trees' package
 		module Trees
       java_import 'weka.classifiers.trees.J48'
       java_import 'weka.classifiers.trees.FT'

data/lib/bio-band/weka/classifiers/trees/trees_utils.rb CHANGED Viewed

@@ -20,14 +20,21 @@ module Trees_utils
     build_classifier(@dataset)
   end
+  #Set input data for the selected classifier
+  #ARGV:
+  #data -> an Instances class object
   def set_data(data)
     @dataset = data
   end
+  #Set the class index for the input dataset
   def set_class_index(class_index)
     @class_index = class_index
   end
+  #Set options for the instance classifier
+  #ARGS:
+  #options -> A String object, i.e. "-K 3"
   def set_options(options)
     options_inst = Utils.splitOptions(options)
     setOptions(options_inst)
@@ -37,14 +44,24 @@ module Trees_utils
     listOptions.each {|key| puts "#{key.synopsis} #{key.description}"}
   end
+  #Return a short description for the selected classifier
   def description
     puts globalInfo
   end
+  # perform crossvalidation on a trained classifier
+  #ARGV:
+  #fold -> 'int' value
   def cross_validate(fold)
-    eval = Weka::Classifier::Evaluation.new self.class.data
-    eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
-    eval.summary
+    if self.class.data
+      eval = Weka::Classifier::Evaluation.new self.class.data
+      eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
+      eval.summary
+    else
+      eval = Weka::Classifier::Evaluation.new @dataset
+      eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
+      eval.summary
+    end
   end
   #Class methods module

data/lib/bio-band/weka/clusterers/clusterers.rb CHANGED Viewed

@@ -2,6 +2,7 @@ $:.unshift File.dirname(__FILE__)
 require 'clusterers_utils'
 module Weka
+  #This module contains the clusterers from the 'weka.clusterers' package
 	module Clusterer
     java_import 'weka.clusterers.SimpleKMeans'
     java_import 'weka.clusterers.FarthestFirst'
@@ -13,9 +14,13 @@ module Weka
     class Cobweb
       include Clusterer_utils
       class Base < Cobweb
-        def initialize
+        def initialize(&block)
           super
-          init_clusterer
+          if block_given?
+            init_instance_clusterer(&block)
+          else
+            init_clusterer
+          end
         end
       end
     end
@@ -23,9 +28,13 @@ module Weka
     class EM
       include Clusterer_utils
       class Base < EM
-        def initialize
+        def initialize(&block)
           super
-          init_clusterer
+          if block_given?
+            init_instance_clusterer(&block)
+          else
+            init_clusterer
+          end
         end
       end
     end
@@ -33,20 +42,27 @@ module Weka
     class HierarchicalClusterer
       include Clusterer_utils
       class Base < HierarchicalClusterer
-        def initialize
+        def initialize(&block)
           super
-          init_clusterer
+          if block_given?
+            init_instance_clusterer(&block)
+          else
+            init_clusterer
+          end
         end
       end
     end
     class SimpleKMeans
       include Clusterer_utils
       class Base < SimpleKMeans
-        def initialize
+        def initialize(&block)
           super
-          init_clusterer
+          if block_given?
+            init_instance_clusterer(&block)
+          else
+            init_clusterer
+          end
         end
       end
     end
@@ -54,9 +70,13 @@ module Weka
     class FarthestFirst
       include Clusterer_utils
       class Base < FarthestFirst
-        def initialize
+        def initialize(&block)
           super
-          init_clusterer
+          if block_given?
+            init_instance_clusterer(&block)
+          else
+            init_clusterer
+          end
         end
       end
     end
@@ -64,9 +84,13 @@ module Weka
     class XMeans
       include Clusterer_utils
       class Base < XMeans
-        def initialize
+        def initialize(&block)
           super
-          init_clusterer
+          if block_given?
+            init_instance_clusterer(&block)
+          else
+            init_clusterer
+          end
         end
       end
     end

data/lib/bio-band/weka/clusterers/clusterers_utils.rb CHANGED Viewed

@@ -1,61 +1,86 @@
 #This module is used by the classes from the Clusterer module
 #to inherit the following methods (instance and class methods)
 module Clusterer_utils
-	java_import "weka.core.Utils"
-	java_import "weka.clusterers.ClusterEvaluation"
+  java_import "weka.core.Utils"
+  java_import "weka.clusterers.ClusterEvaluation"
-	def init_clusterer
+  def init_clusterer
     set_options(self.class.options) if self.class.options
     buildClusterer(self.class.data)
-	end
+  end
+  def init_instance_clusterer(&block)
+    self.instance_eval(&block)
+    #@dataset.setClassIndex(@class_index)
+    buildClusterer(@dataset)
+  end
-	#Instance methods list
+  #Instance methods list
   def self.included(base)
     base.extend(ClassMethods)
   end
+  #set instance data for the clusterer
+  def set_data(data)
+    @dataset = data
+  end
+  #set options for the clusterer
   def set_options(options)
-  	options_inst = Utils.splitOptions(options)
-		setOptions(options_inst)
+    options_inst = Utils.splitOptions(options)
+    setOptions(options_inst)
   end
-	def list_options
-		listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
-	end
+  def list_options
+    listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
+  end
-	def description
+  #the description provided by the Weka Documentation
+  def description
     globalInfo
-	end
+  end
+  #list cluster centroids with coordinates
   def get_centroids
     getClusterCentroids
   end
+  #list cluster's capabilities with attributes (i.e Numeric, Nominal...)
   def list_capabilities
     get_capabilities.to_s
   end
-  # 'data' is an Instances class object
- 	def evaluate
- 		eval = ClusterEvaluation.new
- 		eval.setClusterer(self)
- 		eval.evaluateClusterer(self.class.data)
- 		eval.clusterResultsToString
- 	end
-	#Class methods module
-	module ClassMethods
-		def self.classifier_attr_accessor(*args)
-	    args.each do |arg|
-	      #Here's the getter
-	      self.class_eval("def #{arg};@#{arg};end")
-	      #Here's the setter
-	      self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
-	  	end
-  	end
-		classifier_attr_accessor :options,:data
-	end
+  # Validate clusterer. If the evaluation needs to be performed on a different dataset this function accepts
+  # an optional parameter (an Instances class object)
+  def evaluate(*args)
+    eval = ClusterEvaluation.new
+    eval.setClusterer(self)
+    if not args[0]
+      if self.class.data
+        eval.evaluateClusterer(self.class.data)
+      else
+        eval.evaluateClusterer(@dataset)
+      end
+    else
+      eval.evaluateClusterer(args[0])
+    end
+    puts 'performing evaluation'
+    eval.clusterResultsToString
+  end
+  #Class methods module
+  module ClassMethods
+    def self.classifier_attr_accessor(*args)
+      args.each do |arg|
+        #Here's the getter
+        self.class_eval("def #{arg};@#{arg};end")
+        #Here's the setter
+        self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
+      end
+    end
+    classifier_attr_accessor :options,:data
+  end
 end

data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb CHANGED Viewed

@@ -27,6 +27,10 @@ module Weka
         class AddCluster
           include Unsupervised_Util
+          alias_method :clusterer, :set_clusterer
+          def set_clusterer(index)
+            set_clusterer(index)
+          end
         end
   			class Center
@@ -52,8 +56,12 @@ module Weka
   			class PrincipalComponents
           include Unsupervised_Util
   			end
   			class Remove
+          alias_method :attribute_indices, :setAttributeIndices
+          def setAttributeIndices(index)
+            setAttributeIndices(index)
+          end
           include Unsupervised_Util
   			end

data/test/helper.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require 'rubygems'
+require 'bundler'
+begin
+  Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+  $stderr.puts e.message
+  $stderr.puts "Run `bundle install` to install missing gems"
+  exit e.status_code
+end
+require 'test/unit'
+require 'shoulda'
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+require 'bio-band'
+class Test::Unit::TestCase
+end

data/test/test_apacheCorrelation.rb ADDED Viewed

@@ -0,0 +1,22 @@
+require 'helper'
+class TestApacheCorrelation < Test::Unit::TestCase
+  context "Apache correlation methods" do
+    should "calculate covariance having as input two Ruby arrays" do
+      result = Apache::Stat::Correlation.covariance([1,2,3,4],[6,5,2,0])
+      assert_equal -3.5, result
+    end
+    should "calculate Pearson correlation having as input two Ruby arrays" do
+      result = Apache::Stat::Correlation.pearson_correlation([1,2,3,4],[1,2,3,4])
+      assert_equal 1, result
+    end
+    should "calculate Spearman correlation having as input two Ruby arrays" do
+      result = Apache::Stat::Correlation.spearman_correlation([1,2,3,4],[4,3,2,1])
+      assert_equal -1, result
+    end
+  end
+end

data/test/test_apacheInference.rb ADDED Viewed

@@ -0,0 +1,46 @@
+require 'helper'
+class TestApacheInference < Test::Unit::TestCase
+  context "Apache inference module" do
+    should "Perform Wilcoxon signed rank test" do
+      val,p_val = Apache::Stat::Inference.wilcoxon_test([1,2,3,4],[6,5,2,0])
+      assert_equal 6,val
+      assert_equal 0.875,p_val
+    end
+    should "Computes the Chi-Square statistic comparing observed and expected frequency counts" do
+      val,p_val = Apache::Stat::Inference.chi_square([1,2,3,4,5],[1,2,3,4,5])
+      assert_equal 0,val
+      assert_equal 1,p_val
+      val,p_val = Apache::Stat::Inference.chi_square [[1,2,3,4,5],[1,2,3,4,5]]
+      assert_equal 0,val
+      assert_equal 1,p_val
+    end
+    should "Perform the Mann-Whitney U test on two input datasets" do
+      val,p_val = Apache::Stat::Inference.mann_whitney_u([1,2,3,4,5],[1,2,3,4,5])
+      assert_equal 12.5,val
+      assert_equal 1,p_val
+    end
+    should "Perform a homoscedastic T test on two input datasets" do
+      val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14],homoscedastic=true)
+      assert_equal -9,val
+      assert_equal 1.853118429643006e-05,p_val
+    end
+    should "Perform a paired T test on two input datasets" do
+      val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14])
+      assert_equal -9,val
+      assert_equal 1.853118429643006e-05,p_val
+    end
+    should "Calculate one-way ANOVA (analysis of variance) statistics on input data" do
+      val,p_val = Apache::Stat::Inference.one_way_anova [[1,2,3,4,5],[10,11,12,13,14]]
+      assert_equal 81,val
+      assert_equal 1.8531184296399772e-05,p_val
+    end
+  end
+end

data/test/test_bio-band.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require 'helper'
+class TestBioBand < Test::Unit::TestCase
+#  should "probably rename this file and start testing for real" do
+#    flunk "hey buddy, you should probably rename this file and start testing for real"
+#  end
+end