bio-band 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +5 -0
  3. data/Jarfile +1 -1
  4. data/Jarfile.lock +1 -1
  5. data/README.rdoc +2 -0
  6. data/Rakefile +2 -1
  7. data/VERSION +1 -1
  8. data/band_server/client.rb +35 -0
  9. data/band_server/client_alt.rb +35 -0
  10. data/band_server/first_dataset.csv +15 -0
  11. data/band_server/second_dataset.csv +15 -0
  12. data/band_server/simple_server.rb +95 -0
  13. data/band_server/third_dataset.csv +15 -0
  14. data/band_server/uploads/first_dataset.csv +15 -0
  15. data/band_server/uploads/second_dataset.csv +15 -0
  16. data/band_server/uploads/third_dataset.csv +15 -0
  17. data/bio-band.gemspec +19 -3
  18. data/features/step_definitions/weka_classifiers.rb +3 -2
  19. data/features/weka_classifiers.feature +13 -13
  20. data/lib/bio-band.rb +2 -0
  21. data/lib/bio-band/apache/stat/inference.rb +25 -19
  22. data/lib/bio-band/apache/stat/regression.rb +2 -2
  23. data/lib/bio-band/core/parser/parser.rb +6 -6
  24. data/lib/bio-band/core/type/instances.rb +15 -5
  25. data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
  26. data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
  27. data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
  28. data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
  29. data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
  30. data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
  31. data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
  32. data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
  33. data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
  34. data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
  35. data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
  36. data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
  37. data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
  38. data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
  39. data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
  40. data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
  41. data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
  42. data/test/helper.rb +18 -0
  43. data/test/test_apacheCorrelation.rb +22 -0
  44. data/test/test_apacheInference.rb +46 -0
  45. data/test/test_bio-band.rb +9 -0
  46. metadata +33 -2
@@ -20,14 +20,21 @@ module Rules_utils
20
20
  build_classifier(@dataset)
21
21
  end
22
22
 
23
+ #Set data for instance classifier
24
+ #ARGV
25
+ # data -> an Instances object
23
26
  def set_data(data)
24
27
  @dataset = data
25
28
  end
26
-
29
+
30
+ #Set a class index for the input dataset
27
31
  def set_class_index(class_index)
28
32
  @class_index = class_index
29
33
  end
30
34
 
35
+ #Set options for the selected classifier
36
+ #ARGS:
37
+ #options -> a String, i.e. "-K 3"
31
38
  def set_options(options)
32
39
  options_inst = Utils.splitOptions(options)
33
40
  setOptions(options_inst)
@@ -41,10 +48,19 @@ module Rules_utils
41
48
  puts globalInfo
42
49
  end
43
50
 
51
+ # perform crossvalidation on a trained classifier
52
+ #ARGV:
53
+ #fold -> 'int' value
44
54
  def cross_validate(fold)
45
- eval = Weka::Classifier::Evaluation.new self.class.data
46
- eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
47
- eval.summary
55
+ if self.class.data
56
+ eval = Weka::Classifier::Evaluation.new self.class.data
57
+ eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
58
+ eval.summary
59
+ else
60
+ eval = Weka::Classifier::Evaluation.new @dataset
61
+ eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
62
+ eval.summary
63
+ end
48
64
  end
49
65
 
50
66
  #Class methods module
@@ -3,6 +3,7 @@ require 'trees_utils'
3
3
 
4
4
  module Weka
5
5
  module Classifier
6
+ #This module contains classifiers from the 'weka.classifiers.trees' package
6
7
  module Trees
7
8
  java_import 'weka.classifiers.trees.J48'
8
9
  java_import 'weka.classifiers.trees.FT'
@@ -20,14 +20,21 @@ module Trees_utils
20
20
  build_classifier(@dataset)
21
21
  end
22
22
 
23
+ #Set input data for the selected classifier
24
+ #ARGV:
25
+ #data -> an Instances class object
23
26
  def set_data(data)
24
27
  @dataset = data
25
28
  end
26
29
 
30
+ #Set the class index for the input dataset
27
31
  def set_class_index(class_index)
28
32
  @class_index = class_index
29
33
  end
30
34
 
35
+ #Set options for the instance classifier
36
+ #ARGS:
37
+ #options -> A String object, i.e. "-K 3"
31
38
  def set_options(options)
32
39
  options_inst = Utils.splitOptions(options)
33
40
  setOptions(options_inst)
@@ -37,14 +44,24 @@ module Trees_utils
37
44
  listOptions.each {|key| puts "#{key.synopsis} #{key.description}"}
38
45
  end
39
46
 
47
+ #Return a short description for the selected classifier
40
48
  def description
41
49
  puts globalInfo
42
50
  end
43
51
 
52
+ # perform crossvalidation on a trained classifier
53
+ #ARGV:
54
+ #fold -> 'int' value
44
55
  def cross_validate(fold)
45
- eval = Weka::Classifier::Evaluation.new self.class.data
46
- eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
47
- eval.summary
56
+ if self.class.data
57
+ eval = Weka::Classifier::Evaluation.new self.class.data
58
+ eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
59
+ eval.summary
60
+ else
61
+ eval = Weka::Classifier::Evaluation.new @dataset
62
+ eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
63
+ eval.summary
64
+ end
48
65
  end
49
66
 
50
67
  #Class methods module
@@ -2,6 +2,7 @@ $:.unshift File.dirname(__FILE__)
2
2
  require 'clusterers_utils'
3
3
 
4
4
  module Weka
5
+ #This module contains the clusterers from the 'weka.clusterers' package
5
6
  module Clusterer
6
7
  java_import 'weka.clusterers.SimpleKMeans'
7
8
  java_import 'weka.clusterers.FarthestFirst'
@@ -13,9 +14,13 @@ module Weka
13
14
  class Cobweb
14
15
  include Clusterer_utils
15
16
  class Base < Cobweb
16
- def initialize
17
+ def initialize(&block)
17
18
  super
18
- init_clusterer
19
+ if block_given?
20
+ init_instance_clusterer(&block)
21
+ else
22
+ init_clusterer
23
+ end
19
24
  end
20
25
  end
21
26
  end
@@ -23,9 +28,13 @@ module Weka
23
28
  class EM
24
29
  include Clusterer_utils
25
30
  class Base < EM
26
- def initialize
31
+ def initialize(&block)
27
32
  super
28
- init_clusterer
33
+ if block_given?
34
+ init_instance_clusterer(&block)
35
+ else
36
+ init_clusterer
37
+ end
29
38
  end
30
39
  end
31
40
  end
@@ -33,20 +42,27 @@ module Weka
33
42
  class HierarchicalClusterer
34
43
  include Clusterer_utils
35
44
  class Base < HierarchicalClusterer
36
- def initialize
45
+ def initialize(&block)
37
46
  super
38
- init_clusterer
47
+ if block_given?
48
+ init_instance_clusterer(&block)
49
+ else
50
+ init_clusterer
51
+ end
39
52
  end
40
53
  end
41
54
  end
42
55
 
43
-
44
56
  class SimpleKMeans
45
57
  include Clusterer_utils
46
58
  class Base < SimpleKMeans
47
- def initialize
59
+ def initialize(&block)
48
60
  super
49
- init_clusterer
61
+ if block_given?
62
+ init_instance_clusterer(&block)
63
+ else
64
+ init_clusterer
65
+ end
50
66
  end
51
67
  end
52
68
  end
@@ -54,9 +70,13 @@ module Weka
54
70
  class FarthestFirst
55
71
  include Clusterer_utils
56
72
  class Base < FarthestFirst
57
- def initialize
73
+ def initialize(&block)
58
74
  super
59
- init_clusterer
75
+ if block_given?
76
+ init_instance_clusterer(&block)
77
+ else
78
+ init_clusterer
79
+ end
60
80
  end
61
81
  end
62
82
  end
@@ -64,9 +84,13 @@ module Weka
64
84
  class XMeans
65
85
  include Clusterer_utils
66
86
  class Base < XMeans
67
- def initialize
87
+ def initialize(&block)
68
88
  super
69
- init_clusterer
89
+ if block_given?
90
+ init_instance_clusterer(&block)
91
+ else
92
+ init_clusterer
93
+ end
70
94
  end
71
95
  end
72
96
  end
@@ -1,61 +1,86 @@
1
1
  #This module is used by the classes from the Clusterer module
2
2
  #to inherit the following methods (instance and class methods)
3
3
  module Clusterer_utils
4
- java_import "weka.core.Utils"
5
- java_import "weka.clusterers.ClusterEvaluation"
4
+ java_import "weka.core.Utils"
5
+ java_import "weka.clusterers.ClusterEvaluation"
6
6
 
7
- def init_clusterer
7
+ def init_clusterer
8
8
  set_options(self.class.options) if self.class.options
9
9
  buildClusterer(self.class.data)
10
- end
10
+ end
11
+
12
+ def init_instance_clusterer(&block)
13
+ self.instance_eval(&block)
14
+ #@dataset.setClassIndex(@class_index)
15
+ buildClusterer(@dataset)
16
+ end
11
17
 
12
- #Instance methods list
18
+ #Instance methods list
13
19
  def self.included(base)
14
20
  base.extend(ClassMethods)
15
21
  end
16
22
 
23
+ #set instance data for the clusterer
24
+ def set_data(data)
25
+ @dataset = data
26
+ end
27
+
28
+ #set options for the clusterer
17
29
  def set_options(options)
18
- options_inst = Utils.splitOptions(options)
19
- setOptions(options_inst)
30
+ options_inst = Utils.splitOptions(options)
31
+ setOptions(options_inst)
20
32
  end
21
33
 
22
- def list_options
23
- listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
24
- end
34
+ def list_options
35
+ listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
36
+ end
25
37
 
26
- def description
38
+ #the description provided by the Weka Documentation
39
+ def description
27
40
  globalInfo
28
- end
41
+ end
29
42
 
43
+ #list cluster centroids with coordinates
30
44
  def get_centroids
31
45
  getClusterCentroids
32
46
  end
33
47
 
48
+ #list cluster's capabilities with attributes (i.e Numeric, Nominal...)
34
49
  def list_capabilities
35
50
  get_capabilities.to_s
36
51
  end
37
52
 
38
- # 'data' is an Instances class object
39
- def evaluate
40
- eval = ClusterEvaluation.new
41
- eval.setClusterer(self)
42
- eval.evaluateClusterer(self.class.data)
43
- eval.clusterResultsToString
44
- end
45
-
46
- #Class methods module
47
- module ClassMethods
48
-
49
- def self.classifier_attr_accessor(*args)
50
- args.each do |arg|
51
- #Here's the getter
52
- self.class_eval("def #{arg};@#{arg};end")
53
- #Here's the setter
54
- self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
55
- end
56
- end
57
-
58
- classifier_attr_accessor :options,:data
59
-
60
- end
53
+ # Validate clusterer. If the evaluation needs to be performed on a different dataset this function accepts
54
+ # an optional parameter (an Instances class object)
55
+ def evaluate(*args)
56
+ eval = ClusterEvaluation.new
57
+ eval.setClusterer(self)
58
+ if not args[0]
59
+ if self.class.data
60
+ eval.evaluateClusterer(self.class.data)
61
+ else
62
+ eval.evaluateClusterer(@dataset)
63
+ end
64
+ else
65
+ eval.evaluateClusterer(args[0])
66
+ end
67
+ puts 'performing evaluation'
68
+ eval.clusterResultsToString
69
+ end
70
+
71
+ #Class methods module
72
+ module ClassMethods
73
+
74
+ def self.classifier_attr_accessor(*args)
75
+ args.each do |arg|
76
+ #Here's the getter
77
+ self.class_eval("def #{arg};@#{arg};end")
78
+ #Here's the setter
79
+ self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
80
+ end
81
+ end
82
+
83
+ classifier_attr_accessor :options,:data
84
+
85
+ end
61
86
  end
@@ -27,6 +27,10 @@ module Weka
27
27
 
28
28
  class AddCluster
29
29
  include Unsupervised_Util
30
+ alias_method :clusterer, :set_clusterer
31
+ def set_clusterer(index)
32
+ set_clusterer(index)
33
+ end
30
34
  end
31
35
 
32
36
  class Center
@@ -52,8 +56,12 @@ module Weka
52
56
  class PrincipalComponents
53
57
  include Unsupervised_Util
54
58
  end
55
-
59
+
56
60
  class Remove
61
+ alias_method :attribute_indices, :setAttributeIndices
62
+ def setAttributeIndices(index)
63
+ setAttributeIndices(index)
64
+ end
57
65
  include Unsupervised_Util
58
66
  end
59
67
 
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-band'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,22 @@
1
+ require 'helper'
2
+
3
+ class TestApacheCorrelation < Test::Unit::TestCase
4
+
5
+ context "Apache correlation methods" do
6
+ should "calculate covariance having as input two Ruby arrays" do
7
+ result = Apache::Stat::Correlation.covariance([1,2,3,4],[6,5,2,0])
8
+ assert_equal -3.5, result
9
+ end
10
+
11
+ should "calculate Pearson correlation having as input two Ruby arrays" do
12
+ result = Apache::Stat::Correlation.pearson_correlation([1,2,3,4],[1,2,3,4])
13
+ assert_equal 1, result
14
+ end
15
+
16
+ should "calculate Spearman correlation having as input two Ruby arrays" do
17
+ result = Apache::Stat::Correlation.spearman_correlation([1,2,3,4],[4,3,2,1])
18
+ assert_equal -1, result
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,46 @@
1
+ require 'helper'
2
+
3
+ class TestApacheInference < Test::Unit::TestCase
4
+
5
+ context "Apache inference module" do
6
+ should "Perform Wilcoxon signed rank test" do
7
+ val,p_val = Apache::Stat::Inference.wilcoxon_test([1,2,3,4],[6,5,2,0])
8
+ assert_equal 6,val
9
+ assert_equal 0.875,p_val
10
+ end
11
+
12
+ should "Computes the Chi-Square statistic comparing observed and expected frequency counts" do
13
+ val,p_val = Apache::Stat::Inference.chi_square([1,2,3,4,5],[1,2,3,4,5])
14
+ assert_equal 0,val
15
+ assert_equal 1,p_val
16
+ val,p_val = Apache::Stat::Inference.chi_square [[1,2,3,4,5],[1,2,3,4,5]]
17
+ assert_equal 0,val
18
+ assert_equal 1,p_val
19
+ end
20
+
21
+ should "Perform the Mann-Whitney U test on two input datasets" do
22
+ val,p_val = Apache::Stat::Inference.mann_whitney_u([1,2,3,4,5],[1,2,3,4,5])
23
+ assert_equal 12.5,val
24
+ assert_equal 1,p_val
25
+ end
26
+
27
+ should "Perform a homoscedastic T test on two input datasets" do
28
+ val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14],homoscedastic=true)
29
+ assert_equal -9,val
30
+ assert_equal 1.853118429643006e-05,p_val
31
+ end
32
+
33
+ should "Perform a paired T test on two input datasets" do
34
+ val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14])
35
+ assert_equal -9,val
36
+ assert_equal 1.853118429643006e-05,p_val
37
+ end
38
+
39
+ should "Calculate one-way ANOVA (analysis of variance) statistics on input data" do
40
+ val,p_val = Apache::Stat::Inference.one_way_anova [[1,2,3,4,5],[10,11,12,13,14]]
41
+ assert_equal 81,val
42
+ assert_equal 1.8531184296399772e-05,p_val
43
+ end
44
+
45
+ end
46
+ end
@@ -0,0 +1,9 @@
1
+ require 'helper'
2
+
3
+ class TestBioBand < Test::Unit::TestCase
4
+ # should "probably rename this file and start testing for real" do
5
+ # flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ # end
7
+ end
8
+
9
+