bio-band 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +5 -0
  3. data/Jarfile +1 -1
  4. data/Jarfile.lock +1 -1
  5. data/README.rdoc +2 -0
  6. data/Rakefile +2 -1
  7. data/VERSION +1 -1
  8. data/band_server/client.rb +35 -0
  9. data/band_server/client_alt.rb +35 -0
  10. data/band_server/first_dataset.csv +15 -0
  11. data/band_server/second_dataset.csv +15 -0
  12. data/band_server/simple_server.rb +95 -0
  13. data/band_server/third_dataset.csv +15 -0
  14. data/band_server/uploads/first_dataset.csv +15 -0
  15. data/band_server/uploads/second_dataset.csv +15 -0
  16. data/band_server/uploads/third_dataset.csv +15 -0
  17. data/bio-band.gemspec +19 -3
  18. data/features/step_definitions/weka_classifiers.rb +3 -2
  19. data/features/weka_classifiers.feature +13 -13
  20. data/lib/bio-band.rb +2 -0
  21. data/lib/bio-band/apache/stat/inference.rb +25 -19
  22. data/lib/bio-band/apache/stat/regression.rb +2 -2
  23. data/lib/bio-band/core/parser/parser.rb +6 -6
  24. data/lib/bio-band/core/type/instances.rb +15 -5
  25. data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
  26. data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
  27. data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
  28. data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
  29. data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
  30. data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
  31. data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
  32. data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
  33. data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
  34. data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
  35. data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
  36. data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
  37. data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
  38. data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
  39. data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
  40. data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
  41. data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
  42. data/test/helper.rb +18 -0
  43. data/test/test_apacheCorrelation.rb +22 -0
  44. data/test/test_apacheInference.rb +46 -0
  45. data/test/test_bio-band.rb +9 -0
  46. metadata +33 -2
@@ -20,14 +20,21 @@ module Rules_utils
20
20
  build_classifier(@dataset)
21
21
  end
22
22
 
23
+ #Set data for instance classifier
24
+ #ARGV
25
+ # data -> an Instances object
23
26
  def set_data(data)
24
27
  @dataset = data
25
28
  end
26
-
29
+
30
+ #Set a class index for the input dataset
27
31
  def set_class_index(class_index)
28
32
  @class_index = class_index
29
33
  end
30
34
 
35
+ #Set options for the selected classifier
36
+ #ARGS:
37
+ #options -> a String, i.e. "-K 3"
31
38
  def set_options(options)
32
39
  options_inst = Utils.splitOptions(options)
33
40
  setOptions(options_inst)
@@ -41,10 +48,19 @@ module Rules_utils
41
48
  puts globalInfo
42
49
  end
43
50
 
51
+ # perform crossvalidation on a trained classifier
52
+ #ARGV:
53
+ #fold -> 'int' value
44
54
  def cross_validate(fold)
45
- eval = Weka::Classifier::Evaluation.new self.class.data
46
- eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
47
- eval.summary
55
+ if self.class.data
56
+ eval = Weka::Classifier::Evaluation.new self.class.data
57
+ eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
58
+ eval.summary
59
+ else
60
+ eval = Weka::Classifier::Evaluation.new @dataset
61
+ eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
62
+ eval.summary
63
+ end
48
64
  end
49
65
 
50
66
  #Class methods module
@@ -3,6 +3,7 @@ require 'trees_utils'
3
3
 
4
4
  module Weka
5
5
  module Classifier
6
+ #This module contains classifiers from the 'weka.classifiers.trees' package
6
7
  module Trees
7
8
  java_import 'weka.classifiers.trees.J48'
8
9
  java_import 'weka.classifiers.trees.FT'
@@ -20,14 +20,21 @@ module Trees_utils
20
20
  build_classifier(@dataset)
21
21
  end
22
22
 
23
+ #Set input data for the selected classifier
24
+ #ARGV:
25
+ #data -> an Instances class object
23
26
  def set_data(data)
24
27
  @dataset = data
25
28
  end
26
29
 
30
+ #Set the class index for the input dataset
27
31
  def set_class_index(class_index)
28
32
  @class_index = class_index
29
33
  end
30
34
 
35
+ #Set options for the instance classifier
36
+ #ARGS:
37
+ #options -> A String object, i.e. "-K 3"
31
38
  def set_options(options)
32
39
  options_inst = Utils.splitOptions(options)
33
40
  setOptions(options_inst)
@@ -37,14 +44,24 @@ module Trees_utils
37
44
  listOptions.each {|key| puts "#{key.synopsis} #{key.description}"}
38
45
  end
39
46
 
47
+ #Return a short description for the selected classifier
40
48
  def description
41
49
  puts globalInfo
42
50
  end
43
51
 
52
+ # perform crossvalidation on a trained classifier
53
+ #ARGV:
54
+ #fold -> 'int' value
44
55
  def cross_validate(fold)
45
- eval = Weka::Classifier::Evaluation.new self.class.data
46
- eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
47
- eval.summary
56
+ if self.class.data
57
+ eval = Weka::Classifier::Evaluation.new self.class.data
58
+ eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
59
+ eval.summary
60
+ else
61
+ eval = Weka::Classifier::Evaluation.new @dataset
62
+ eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
63
+ eval.summary
64
+ end
48
65
  end
49
66
 
50
67
  #Class methods module
@@ -2,6 +2,7 @@ $:.unshift File.dirname(__FILE__)
2
2
  require 'clusterers_utils'
3
3
 
4
4
  module Weka
5
+ #This module contains the clusterers from the 'weka.clusterers' package
5
6
  module Clusterer
6
7
  java_import 'weka.clusterers.SimpleKMeans'
7
8
  java_import 'weka.clusterers.FarthestFirst'
@@ -13,9 +14,13 @@ module Weka
13
14
  class Cobweb
14
15
  include Clusterer_utils
15
16
  class Base < Cobweb
16
- def initialize
17
+ def initialize(&block)
17
18
  super
18
- init_clusterer
19
+ if block_given?
20
+ init_instance_clusterer(&block)
21
+ else
22
+ init_clusterer
23
+ end
19
24
  end
20
25
  end
21
26
  end
@@ -23,9 +28,13 @@ module Weka
23
28
  class EM
24
29
  include Clusterer_utils
25
30
  class Base < EM
26
- def initialize
31
+ def initialize(&block)
27
32
  super
28
- init_clusterer
33
+ if block_given?
34
+ init_instance_clusterer(&block)
35
+ else
36
+ init_clusterer
37
+ end
29
38
  end
30
39
  end
31
40
  end
@@ -33,20 +42,27 @@ module Weka
33
42
  class HierarchicalClusterer
34
43
  include Clusterer_utils
35
44
  class Base < HierarchicalClusterer
36
- def initialize
45
+ def initialize(&block)
37
46
  super
38
- init_clusterer
47
+ if block_given?
48
+ init_instance_clusterer(&block)
49
+ else
50
+ init_clusterer
51
+ end
39
52
  end
40
53
  end
41
54
  end
42
55
 
43
-
44
56
  class SimpleKMeans
45
57
  include Clusterer_utils
46
58
  class Base < SimpleKMeans
47
- def initialize
59
+ def initialize(&block)
48
60
  super
49
- init_clusterer
61
+ if block_given?
62
+ init_instance_clusterer(&block)
63
+ else
64
+ init_clusterer
65
+ end
50
66
  end
51
67
  end
52
68
  end
@@ -54,9 +70,13 @@ module Weka
54
70
  class FarthestFirst
55
71
  include Clusterer_utils
56
72
  class Base < FarthestFirst
57
- def initialize
73
+ def initialize(&block)
58
74
  super
59
- init_clusterer
75
+ if block_given?
76
+ init_instance_clusterer(&block)
77
+ else
78
+ init_clusterer
79
+ end
60
80
  end
61
81
  end
62
82
  end
@@ -64,9 +84,13 @@ module Weka
64
84
  class XMeans
65
85
  include Clusterer_utils
66
86
  class Base < XMeans
67
- def initialize
87
+ def initialize(&block)
68
88
  super
69
- init_clusterer
89
+ if block_given?
90
+ init_instance_clusterer(&block)
91
+ else
92
+ init_clusterer
93
+ end
70
94
  end
71
95
  end
72
96
  end
@@ -1,61 +1,86 @@
1
1
  #This module is used by the classes from the Clusterer module
2
2
  #to inherit the following methods (instance and class methods)
3
3
  module Clusterer_utils
4
- java_import "weka.core.Utils"
5
- java_import "weka.clusterers.ClusterEvaluation"
4
+ java_import "weka.core.Utils"
5
+ java_import "weka.clusterers.ClusterEvaluation"
6
6
 
7
- def init_clusterer
7
+ def init_clusterer
8
8
  set_options(self.class.options) if self.class.options
9
9
  buildClusterer(self.class.data)
10
- end
10
+ end
11
+
12
+ def init_instance_clusterer(&block)
13
+ self.instance_eval(&block)
14
+ #@dataset.setClassIndex(@class_index)
15
+ buildClusterer(@dataset)
16
+ end
11
17
 
12
- #Instance methods list
18
+ #Instance methods list
13
19
  def self.included(base)
14
20
  base.extend(ClassMethods)
15
21
  end
16
22
 
23
+ #set instance data for the clusterer
24
+ def set_data(data)
25
+ @dataset = data
26
+ end
27
+
28
+ #set options for the clusterer
17
29
  def set_options(options)
18
- options_inst = Utils.splitOptions(options)
19
- setOptions(options_inst)
30
+ options_inst = Utils.splitOptions(options)
31
+ setOptions(options_inst)
20
32
  end
21
33
 
22
- def list_options
23
- listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
24
- end
34
+ def list_options
35
+ listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
36
+ end
25
37
 
26
- def description
38
+ #the description provided by the Weka Documentation
39
+ def description
27
40
  globalInfo
28
- end
41
+ end
29
42
 
43
+ #list cluster centroids with coordinates
30
44
  def get_centroids
31
45
  getClusterCentroids
32
46
  end
33
47
 
48
+ #list cluster's capabilities with attributes (i.e Numeric, Nominal...)
34
49
  def list_capabilities
35
50
  get_capabilities.to_s
36
51
  end
37
52
 
38
- # 'data' is an Instances class object
39
- def evaluate
40
- eval = ClusterEvaluation.new
41
- eval.setClusterer(self)
42
- eval.evaluateClusterer(self.class.data)
43
- eval.clusterResultsToString
44
- end
45
-
46
- #Class methods module
47
- module ClassMethods
48
-
49
- def self.classifier_attr_accessor(*args)
50
- args.each do |arg|
51
- #Here's the getter
52
- self.class_eval("def #{arg};@#{arg};end")
53
- #Here's the setter
54
- self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
55
- end
56
- end
57
-
58
- classifier_attr_accessor :options,:data
59
-
60
- end
53
+ # Validate clusterer. If the evaluation needs to be performed on a different dataset this function accepts
54
+ # an optional parameter (an Instances class object)
55
+ def evaluate(*args)
56
+ eval = ClusterEvaluation.new
57
+ eval.setClusterer(self)
58
+ if not args[0]
59
+ if self.class.data
60
+ eval.evaluateClusterer(self.class.data)
61
+ else
62
+ eval.evaluateClusterer(@dataset)
63
+ end
64
+ else
65
+ eval.evaluateClusterer(args[0])
66
+ end
67
+ puts 'performing evaluation'
68
+ eval.clusterResultsToString
69
+ end
70
+
71
+ #Class methods module
72
+ module ClassMethods
73
+
74
+ def self.classifier_attr_accessor(*args)
75
+ args.each do |arg|
76
+ #Here's the getter
77
+ self.class_eval("def #{arg};@#{arg};end")
78
+ #Here's the setter
79
+ self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
80
+ end
81
+ end
82
+
83
+ classifier_attr_accessor :options,:data
84
+
85
+ end
61
86
  end
@@ -27,6 +27,10 @@ module Weka
27
27
 
28
28
  class AddCluster
29
29
  include Unsupervised_Util
30
+ alias_method :clusterer, :set_clusterer
31
+ def set_clusterer(index)
32
+ set_clusterer(index)
33
+ end
30
34
  end
31
35
 
32
36
  class Center
@@ -52,8 +56,12 @@ module Weka
52
56
  class PrincipalComponents
53
57
  include Unsupervised_Util
54
58
  end
55
-
59
+
56
60
  class Remove
61
+ alias_method :attribute_indices, :setAttributeIndices
62
+ def setAttributeIndices(index)
63
+ setAttributeIndices(index)
64
+ end
57
65
  include Unsupervised_Util
58
66
  end
59
67
 
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-band'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,22 @@
1
+ require 'helper'
2
+
3
+ class TestApacheCorrelation < Test::Unit::TestCase
4
+
5
+ context "Apache correlation methods" do
6
+ should "calculate covariance having as input two Ruby arrays" do
7
+ result = Apache::Stat::Correlation.covariance([1,2,3,4],[6,5,2,0])
8
+ assert_equal -3.5, result
9
+ end
10
+
11
+ should "calculate Pearson correlation having as input two Ruby arrays" do
12
+ result = Apache::Stat::Correlation.pearson_correlation([1,2,3,4],[1,2,3,4])
13
+ assert_equal 1, result
14
+ end
15
+
16
+ should "calculate Spearman correlation having as input two Ruby arrays" do
17
+ result = Apache::Stat::Correlation.spearman_correlation([1,2,3,4],[4,3,2,1])
18
+ assert_equal -1, result
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,46 @@
1
+ require 'helper'
2
+
3
+ class TestApacheInference < Test::Unit::TestCase
4
+
5
+ context "Apache inference module" do
6
+ should "Perform Wilcoxon signed rank test" do
7
+ val,p_val = Apache::Stat::Inference.wilcoxon_test([1,2,3,4],[6,5,2,0])
8
+ assert_equal 6,val
9
+ assert_equal 0.875,p_val
10
+ end
11
+
12
+ should "Computes the Chi-Square statistic comparing observed and expected frequency counts" do
13
+ val,p_val = Apache::Stat::Inference.chi_square([1,2,3,4,5],[1,2,3,4,5])
14
+ assert_equal 0,val
15
+ assert_equal 1,p_val
16
+ val,p_val = Apache::Stat::Inference.chi_square [[1,2,3,4,5],[1,2,3,4,5]]
17
+ assert_equal 0,val
18
+ assert_equal 1,p_val
19
+ end
20
+
21
+ should "Perform the Mann-Whitney U test on two input datasets" do
22
+ val,p_val = Apache::Stat::Inference.mann_whitney_u([1,2,3,4,5],[1,2,3,4,5])
23
+ assert_equal 12.5,val
24
+ assert_equal 1,p_val
25
+ end
26
+
27
+ should "Perform a homoscedastic T test on two input datasets" do
28
+ val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14],homoscedastic=true)
29
+ assert_equal -9,val
30
+ assert_equal 1.853118429643006e-05,p_val
31
+ end
32
+
33
+ should "Perform a paired T test on two input datasets" do
34
+ val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14])
35
+ assert_equal -9,val
36
+ assert_equal 1.853118429643006e-05,p_val
37
+ end
38
+
39
+ should "Calculate one-way ANOVA (analysis of variance) statistics on input data" do
40
+ val,p_val = Apache::Stat::Inference.one_way_anova [[1,2,3,4,5],[10,11,12,13,14]]
41
+ assert_equal 81,val
42
+ assert_equal 1.8531184296399772e-05,p_val
43
+ end
44
+
45
+ end
46
+ end
@@ -0,0 +1,9 @@
1
+ require 'helper'
2
+
3
+ class TestBioBand < Test::Unit::TestCase
4
+ # should "probably rename this file and start testing for real" do
5
+ # flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ # end
7
+ end
8
+
9
+