bio-band 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +5 -0
- data/Jarfile +1 -1
- data/Jarfile.lock +1 -1
- data/README.rdoc +2 -0
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +95 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bio-band.gemspec +19 -3
- data/features/step_definitions/weka_classifiers.rb +3 -2
- data/features/weka_classifiers.feature +13 -13
- data/lib/bio-band.rb +2 -0
- data/lib/bio-band/apache/stat/inference.rb +25 -19
- data/lib/bio-band/apache/stat/regression.rb +2 -2
- data/lib/bio-band/core/parser/parser.rb +6 -6
- data/lib/bio-band/core/type/instances.rb +15 -5
- data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
- data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
- data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
- data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
- data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
- data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
- data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
- data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
- data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
- data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
- data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
- data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
- data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
- data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
- data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_bio-band.rb +9 -0
- metadata +33 -2
@@ -20,14 +20,21 @@ module Rules_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set data for instance classifier
|
24
|
+
#ARGV
|
25
|
+
# data -> an Instances object
|
23
26
|
def set_data(data)
|
24
27
|
@dataset = data
|
25
28
|
end
|
26
|
-
|
29
|
+
|
30
|
+
#Set a class index for the input dataset
|
27
31
|
def set_class_index(class_index)
|
28
32
|
@class_index = class_index
|
29
33
|
end
|
30
34
|
|
35
|
+
#Set options for the selected classifier
|
36
|
+
#ARGS:
|
37
|
+
#options -> a String, i.e. "-K 3"
|
31
38
|
def set_options(options)
|
32
39
|
options_inst = Utils.splitOptions(options)
|
33
40
|
setOptions(options_inst)
|
@@ -41,10 +48,19 @@ module Rules_utils
|
|
41
48
|
puts globalInfo
|
42
49
|
end
|
43
50
|
|
51
|
+
# perform crossvalidation on a trained classifier
|
52
|
+
#ARGV:
|
53
|
+
#fold -> 'int' value
|
44
54
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
55
|
+
if self.class.data
|
56
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
57
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
58
|
+
eval.summary
|
59
|
+
else
|
60
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
61
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
62
|
+
eval.summary
|
63
|
+
end
|
48
64
|
end
|
49
65
|
|
50
66
|
#Class methods module
|
@@ -20,14 +20,21 @@ module Trees_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set input data for the selected classifier
|
24
|
+
#ARGV:
|
25
|
+
#data -> an Instances class object
|
23
26
|
def set_data(data)
|
24
27
|
@dataset = data
|
25
28
|
end
|
26
29
|
|
30
|
+
#Set the class index for the input dataset
|
27
31
|
def set_class_index(class_index)
|
28
32
|
@class_index = class_index
|
29
33
|
end
|
30
34
|
|
35
|
+
#Set options for the instance classifier
|
36
|
+
#ARGS:
|
37
|
+
#options -> A String object, i.e. "-K 3"
|
31
38
|
def set_options(options)
|
32
39
|
options_inst = Utils.splitOptions(options)
|
33
40
|
setOptions(options_inst)
|
@@ -37,14 +44,24 @@ module Trees_utils
|
|
37
44
|
listOptions.each {|key| puts "#{key.synopsis} #{key.description}"}
|
38
45
|
end
|
39
46
|
|
47
|
+
#Return a short description for the selected classifier
|
40
48
|
def description
|
41
49
|
puts globalInfo
|
42
50
|
end
|
43
51
|
|
52
|
+
# perform crossvalidation on a trained classifier
|
53
|
+
#ARGV:
|
54
|
+
#fold -> 'int' value
|
44
55
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
56
|
+
if self.class.data
|
57
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
58
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
59
|
+
eval.summary
|
60
|
+
else
|
61
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
62
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
63
|
+
eval.summary
|
64
|
+
end
|
48
65
|
end
|
49
66
|
|
50
67
|
#Class methods module
|
@@ -2,6 +2,7 @@ $:.unshift File.dirname(__FILE__)
|
|
2
2
|
require 'clusterers_utils'
|
3
3
|
|
4
4
|
module Weka
|
5
|
+
#This module contains the clusterers from the 'weka.clusterers' package
|
5
6
|
module Clusterer
|
6
7
|
java_import 'weka.clusterers.SimpleKMeans'
|
7
8
|
java_import 'weka.clusterers.FarthestFirst'
|
@@ -13,9 +14,13 @@ module Weka
|
|
13
14
|
class Cobweb
|
14
15
|
include Clusterer_utils
|
15
16
|
class Base < Cobweb
|
16
|
-
def initialize
|
17
|
+
def initialize(&block)
|
17
18
|
super
|
18
|
-
|
19
|
+
if block_given?
|
20
|
+
init_instance_clusterer(&block)
|
21
|
+
else
|
22
|
+
init_clusterer
|
23
|
+
end
|
19
24
|
end
|
20
25
|
end
|
21
26
|
end
|
@@ -23,9 +28,13 @@ module Weka
|
|
23
28
|
class EM
|
24
29
|
include Clusterer_utils
|
25
30
|
class Base < EM
|
26
|
-
def initialize
|
31
|
+
def initialize(&block)
|
27
32
|
super
|
28
|
-
|
33
|
+
if block_given?
|
34
|
+
init_instance_clusterer(&block)
|
35
|
+
else
|
36
|
+
init_clusterer
|
37
|
+
end
|
29
38
|
end
|
30
39
|
end
|
31
40
|
end
|
@@ -33,20 +42,27 @@ module Weka
|
|
33
42
|
class HierarchicalClusterer
|
34
43
|
include Clusterer_utils
|
35
44
|
class Base < HierarchicalClusterer
|
36
|
-
def initialize
|
45
|
+
def initialize(&block)
|
37
46
|
super
|
38
|
-
|
47
|
+
if block_given?
|
48
|
+
init_instance_clusterer(&block)
|
49
|
+
else
|
50
|
+
init_clusterer
|
51
|
+
end
|
39
52
|
end
|
40
53
|
end
|
41
54
|
end
|
42
55
|
|
43
|
-
|
44
56
|
class SimpleKMeans
|
45
57
|
include Clusterer_utils
|
46
58
|
class Base < SimpleKMeans
|
47
|
-
def initialize
|
59
|
+
def initialize(&block)
|
48
60
|
super
|
49
|
-
|
61
|
+
if block_given?
|
62
|
+
init_instance_clusterer(&block)
|
63
|
+
else
|
64
|
+
init_clusterer
|
65
|
+
end
|
50
66
|
end
|
51
67
|
end
|
52
68
|
end
|
@@ -54,9 +70,13 @@ module Weka
|
|
54
70
|
class FarthestFirst
|
55
71
|
include Clusterer_utils
|
56
72
|
class Base < FarthestFirst
|
57
|
-
def initialize
|
73
|
+
def initialize(&block)
|
58
74
|
super
|
59
|
-
|
75
|
+
if block_given?
|
76
|
+
init_instance_clusterer(&block)
|
77
|
+
else
|
78
|
+
init_clusterer
|
79
|
+
end
|
60
80
|
end
|
61
81
|
end
|
62
82
|
end
|
@@ -64,9 +84,13 @@ module Weka
|
|
64
84
|
class XMeans
|
65
85
|
include Clusterer_utils
|
66
86
|
class Base < XMeans
|
67
|
-
def initialize
|
87
|
+
def initialize(&block)
|
68
88
|
super
|
69
|
-
|
89
|
+
if block_given?
|
90
|
+
init_instance_clusterer(&block)
|
91
|
+
else
|
92
|
+
init_clusterer
|
93
|
+
end
|
70
94
|
end
|
71
95
|
end
|
72
96
|
end
|
@@ -1,61 +1,86 @@
|
|
1
1
|
#This module is used by the classes from the Clusterer module
|
2
2
|
#to inherit the following methods (instance and class methods)
|
3
3
|
module Clusterer_utils
|
4
|
-
|
5
|
-
|
4
|
+
java_import "weka.core.Utils"
|
5
|
+
java_import "weka.clusterers.ClusterEvaluation"
|
6
6
|
|
7
|
-
|
7
|
+
def init_clusterer
|
8
8
|
set_options(self.class.options) if self.class.options
|
9
9
|
buildClusterer(self.class.data)
|
10
|
-
|
10
|
+
end
|
11
|
+
|
12
|
+
def init_instance_clusterer(&block)
|
13
|
+
self.instance_eval(&block)
|
14
|
+
#@dataset.setClassIndex(@class_index)
|
15
|
+
buildClusterer(@dataset)
|
16
|
+
end
|
11
17
|
|
12
|
-
|
18
|
+
#Instance methods list
|
13
19
|
def self.included(base)
|
14
20
|
base.extend(ClassMethods)
|
15
21
|
end
|
16
22
|
|
23
|
+
#set instance data for the clusterer
|
24
|
+
def set_data(data)
|
25
|
+
@dataset = data
|
26
|
+
end
|
27
|
+
|
28
|
+
#set options for the clusterer
|
17
29
|
def set_options(options)
|
18
|
-
|
19
|
-
|
30
|
+
options_inst = Utils.splitOptions(options)
|
31
|
+
setOptions(options_inst)
|
20
32
|
end
|
21
33
|
|
22
|
-
|
23
|
-
|
24
|
-
|
34
|
+
def list_options
|
35
|
+
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
36
|
+
end
|
25
37
|
|
26
|
-
|
38
|
+
#the description provided by the Weka Documentation
|
39
|
+
def description
|
27
40
|
globalInfo
|
28
|
-
|
41
|
+
end
|
29
42
|
|
43
|
+
#list cluster centroids with coordinates
|
30
44
|
def get_centroids
|
31
45
|
getClusterCentroids
|
32
46
|
end
|
33
47
|
|
48
|
+
#list cluster's capabilities with attributes (i.e Numeric, Nominal...)
|
34
49
|
def list_capabilities
|
35
50
|
get_capabilities.to_s
|
36
51
|
end
|
37
52
|
|
38
|
-
#
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
53
|
+
# Validate clusterer. If the evaluation needs to be performed on a different dataset this function accepts
|
54
|
+
# an optional parameter (an Instances class object)
|
55
|
+
def evaluate(*args)
|
56
|
+
eval = ClusterEvaluation.new
|
57
|
+
eval.setClusterer(self)
|
58
|
+
if not args[0]
|
59
|
+
if self.class.data
|
60
|
+
eval.evaluateClusterer(self.class.data)
|
61
|
+
else
|
62
|
+
eval.evaluateClusterer(@dataset)
|
63
|
+
end
|
64
|
+
else
|
65
|
+
eval.evaluateClusterer(args[0])
|
66
|
+
end
|
67
|
+
puts 'performing evaluation'
|
68
|
+
eval.clusterResultsToString
|
69
|
+
end
|
70
|
+
|
71
|
+
#Class methods module
|
72
|
+
module ClassMethods
|
73
|
+
|
74
|
+
def self.classifier_attr_accessor(*args)
|
75
|
+
args.each do |arg|
|
76
|
+
#Here's the getter
|
77
|
+
self.class_eval("def #{arg};@#{arg};end")
|
78
|
+
#Here's the setter
|
79
|
+
self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
classifier_attr_accessor :options,:data
|
84
|
+
|
85
|
+
end
|
61
86
|
end
|
@@ -27,6 +27,10 @@ module Weka
|
|
27
27
|
|
28
28
|
class AddCluster
|
29
29
|
include Unsupervised_Util
|
30
|
+
alias_method :clusterer, :set_clusterer
|
31
|
+
def set_clusterer(index)
|
32
|
+
set_clusterer(index)
|
33
|
+
end
|
30
34
|
end
|
31
35
|
|
32
36
|
class Center
|
@@ -52,8 +56,12 @@ module Weka
|
|
52
56
|
class PrincipalComponents
|
53
57
|
include Unsupervised_Util
|
54
58
|
end
|
55
|
-
|
59
|
+
|
56
60
|
class Remove
|
61
|
+
alias_method :attribute_indices, :setAttributeIndices
|
62
|
+
def setAttributeIndices(index)
|
63
|
+
setAttributeIndices(index)
|
64
|
+
end
|
57
65
|
include Unsupervised_Util
|
58
66
|
end
|
59
67
|
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-band'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestApacheCorrelation < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context "Apache correlation methods" do
|
6
|
+
should "calculate covariance having as input two Ruby arrays" do
|
7
|
+
result = Apache::Stat::Correlation.covariance([1,2,3,4],[6,5,2,0])
|
8
|
+
assert_equal -3.5, result
|
9
|
+
end
|
10
|
+
|
11
|
+
should "calculate Pearson correlation having as input two Ruby arrays" do
|
12
|
+
result = Apache::Stat::Correlation.pearson_correlation([1,2,3,4],[1,2,3,4])
|
13
|
+
assert_equal 1, result
|
14
|
+
end
|
15
|
+
|
16
|
+
should "calculate Spearman correlation having as input two Ruby arrays" do
|
17
|
+
result = Apache::Stat::Correlation.spearman_correlation([1,2,3,4],[4,3,2,1])
|
18
|
+
assert_equal -1, result
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestApacheInference < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context "Apache inference module" do
|
6
|
+
should "Perform Wilcoxon signed rank test" do
|
7
|
+
val,p_val = Apache::Stat::Inference.wilcoxon_test([1,2,3,4],[6,5,2,0])
|
8
|
+
assert_equal 6,val
|
9
|
+
assert_equal 0.875,p_val
|
10
|
+
end
|
11
|
+
|
12
|
+
should "Computes the Chi-Square statistic comparing observed and expected frequency counts" do
|
13
|
+
val,p_val = Apache::Stat::Inference.chi_square([1,2,3,4,5],[1,2,3,4,5])
|
14
|
+
assert_equal 0,val
|
15
|
+
assert_equal 1,p_val
|
16
|
+
val,p_val = Apache::Stat::Inference.chi_square [[1,2,3,4,5],[1,2,3,4,5]]
|
17
|
+
assert_equal 0,val
|
18
|
+
assert_equal 1,p_val
|
19
|
+
end
|
20
|
+
|
21
|
+
should "Perform the Mann-Whitney U test on two input datasets" do
|
22
|
+
val,p_val = Apache::Stat::Inference.mann_whitney_u([1,2,3,4,5],[1,2,3,4,5])
|
23
|
+
assert_equal 12.5,val
|
24
|
+
assert_equal 1,p_val
|
25
|
+
end
|
26
|
+
|
27
|
+
should "Perform a homoscedastic T test on two input datasets" do
|
28
|
+
val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14],homoscedastic=true)
|
29
|
+
assert_equal -9,val
|
30
|
+
assert_equal 1.853118429643006e-05,p_val
|
31
|
+
end
|
32
|
+
|
33
|
+
should "Perform a paired T test on two input datasets" do
|
34
|
+
val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14])
|
35
|
+
assert_equal -9,val
|
36
|
+
assert_equal 1.853118429643006e-05,p_val
|
37
|
+
end
|
38
|
+
|
39
|
+
should "Calculate one-way ANOVA (analysis of variance) statistics on input data" do
|
40
|
+
val,p_val = Apache::Stat::Inference.one_way_anova [[1,2,3,4,5],[10,11,12,13,14]]
|
41
|
+
assert_equal 81,val
|
42
|
+
assert_equal 1.8531184296399772e-05,p_val
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|