bio-band 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +5 -0
- data/Jarfile +1 -1
- data/Jarfile.lock +1 -1
- data/README.rdoc +2 -0
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +95 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bio-band.gemspec +19 -3
- data/features/step_definitions/weka_classifiers.rb +3 -2
- data/features/weka_classifiers.feature +13 -13
- data/lib/bio-band.rb +2 -0
- data/lib/bio-band/apache/stat/inference.rb +25 -19
- data/lib/bio-band/apache/stat/regression.rb +2 -2
- data/lib/bio-band/core/parser/parser.rb +6 -6
- data/lib/bio-band/core/type/instances.rb +15 -5
- data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
- data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
- data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
- data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
- data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
- data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
- data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
- data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
- data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
- data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
- data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
- data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
- data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
- data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
- data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_bio-band.rb +9 -0
- metadata +33 -2
@@ -20,14 +20,21 @@ module Rules_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set data for instance classifier
|
24
|
+
#ARGV
|
25
|
+
# data -> an Instances object
|
23
26
|
def set_data(data)
|
24
27
|
@dataset = data
|
25
28
|
end
|
26
|
-
|
29
|
+
|
30
|
+
#Set a class index for the input dataset
|
27
31
|
def set_class_index(class_index)
|
28
32
|
@class_index = class_index
|
29
33
|
end
|
30
34
|
|
35
|
+
#Set options for the selected classifier
|
36
|
+
#ARGS:
|
37
|
+
#options -> a String, i.e. "-K 3"
|
31
38
|
def set_options(options)
|
32
39
|
options_inst = Utils.splitOptions(options)
|
33
40
|
setOptions(options_inst)
|
@@ -41,10 +48,19 @@ module Rules_utils
|
|
41
48
|
puts globalInfo
|
42
49
|
end
|
43
50
|
|
51
|
+
# perform crossvalidation on a trained classifier
|
52
|
+
#ARGV:
|
53
|
+
#fold -> 'int' value
|
44
54
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
55
|
+
if self.class.data
|
56
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
57
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
58
|
+
eval.summary
|
59
|
+
else
|
60
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
61
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
62
|
+
eval.summary
|
63
|
+
end
|
48
64
|
end
|
49
65
|
|
50
66
|
#Class methods module
|
@@ -20,14 +20,21 @@ module Trees_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set input data for the selected classifier
|
24
|
+
#ARGV:
|
25
|
+
#data -> an Instances class object
|
23
26
|
def set_data(data)
|
24
27
|
@dataset = data
|
25
28
|
end
|
26
29
|
|
30
|
+
#Set the class index for the input dataset
|
27
31
|
def set_class_index(class_index)
|
28
32
|
@class_index = class_index
|
29
33
|
end
|
30
34
|
|
35
|
+
#Set options for the instance classifier
|
36
|
+
#ARGS:
|
37
|
+
#options -> A String object, i.e. "-K 3"
|
31
38
|
def set_options(options)
|
32
39
|
options_inst = Utils.splitOptions(options)
|
33
40
|
setOptions(options_inst)
|
@@ -37,14 +44,24 @@ module Trees_utils
|
|
37
44
|
listOptions.each {|key| puts "#{key.synopsis} #{key.description}"}
|
38
45
|
end
|
39
46
|
|
47
|
+
#Return a short description for the selected classifier
|
40
48
|
def description
|
41
49
|
puts globalInfo
|
42
50
|
end
|
43
51
|
|
52
|
+
# perform crossvalidation on a trained classifier
|
53
|
+
#ARGV:
|
54
|
+
#fold -> 'int' value
|
44
55
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
56
|
+
if self.class.data
|
57
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
58
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
59
|
+
eval.summary
|
60
|
+
else
|
61
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
62
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
63
|
+
eval.summary
|
64
|
+
end
|
48
65
|
end
|
49
66
|
|
50
67
|
#Class methods module
|
@@ -2,6 +2,7 @@ $:.unshift File.dirname(__FILE__)
|
|
2
2
|
require 'clusterers_utils'
|
3
3
|
|
4
4
|
module Weka
|
5
|
+
#This module contains the clusterers from the 'weka.clusterers' package
|
5
6
|
module Clusterer
|
6
7
|
java_import 'weka.clusterers.SimpleKMeans'
|
7
8
|
java_import 'weka.clusterers.FarthestFirst'
|
@@ -13,9 +14,13 @@ module Weka
|
|
13
14
|
class Cobweb
|
14
15
|
include Clusterer_utils
|
15
16
|
class Base < Cobweb
|
16
|
-
def initialize
|
17
|
+
def initialize(&block)
|
17
18
|
super
|
18
|
-
|
19
|
+
if block_given?
|
20
|
+
init_instance_clusterer(&block)
|
21
|
+
else
|
22
|
+
init_clusterer
|
23
|
+
end
|
19
24
|
end
|
20
25
|
end
|
21
26
|
end
|
@@ -23,9 +28,13 @@ module Weka
|
|
23
28
|
class EM
|
24
29
|
include Clusterer_utils
|
25
30
|
class Base < EM
|
26
|
-
def initialize
|
31
|
+
def initialize(&block)
|
27
32
|
super
|
28
|
-
|
33
|
+
if block_given?
|
34
|
+
init_instance_clusterer(&block)
|
35
|
+
else
|
36
|
+
init_clusterer
|
37
|
+
end
|
29
38
|
end
|
30
39
|
end
|
31
40
|
end
|
@@ -33,20 +42,27 @@ module Weka
|
|
33
42
|
class HierarchicalClusterer
|
34
43
|
include Clusterer_utils
|
35
44
|
class Base < HierarchicalClusterer
|
36
|
-
def initialize
|
45
|
+
def initialize(&block)
|
37
46
|
super
|
38
|
-
|
47
|
+
if block_given?
|
48
|
+
init_instance_clusterer(&block)
|
49
|
+
else
|
50
|
+
init_clusterer
|
51
|
+
end
|
39
52
|
end
|
40
53
|
end
|
41
54
|
end
|
42
55
|
|
43
|
-
|
44
56
|
class SimpleKMeans
|
45
57
|
include Clusterer_utils
|
46
58
|
class Base < SimpleKMeans
|
47
|
-
def initialize
|
59
|
+
def initialize(&block)
|
48
60
|
super
|
49
|
-
|
61
|
+
if block_given?
|
62
|
+
init_instance_clusterer(&block)
|
63
|
+
else
|
64
|
+
init_clusterer
|
65
|
+
end
|
50
66
|
end
|
51
67
|
end
|
52
68
|
end
|
@@ -54,9 +70,13 @@ module Weka
|
|
54
70
|
class FarthestFirst
|
55
71
|
include Clusterer_utils
|
56
72
|
class Base < FarthestFirst
|
57
|
-
def initialize
|
73
|
+
def initialize(&block)
|
58
74
|
super
|
59
|
-
|
75
|
+
if block_given?
|
76
|
+
init_instance_clusterer(&block)
|
77
|
+
else
|
78
|
+
init_clusterer
|
79
|
+
end
|
60
80
|
end
|
61
81
|
end
|
62
82
|
end
|
@@ -64,9 +84,13 @@ module Weka
|
|
64
84
|
class XMeans
|
65
85
|
include Clusterer_utils
|
66
86
|
class Base < XMeans
|
67
|
-
def initialize
|
87
|
+
def initialize(&block)
|
68
88
|
super
|
69
|
-
|
89
|
+
if block_given?
|
90
|
+
init_instance_clusterer(&block)
|
91
|
+
else
|
92
|
+
init_clusterer
|
93
|
+
end
|
70
94
|
end
|
71
95
|
end
|
72
96
|
end
|
@@ -1,61 +1,86 @@
|
|
1
1
|
#This module is used by the classes from the Clusterer module
|
2
2
|
#to inherit the following methods (instance and class methods)
|
3
3
|
module Clusterer_utils
|
4
|
-
|
5
|
-
|
4
|
+
java_import "weka.core.Utils"
|
5
|
+
java_import "weka.clusterers.ClusterEvaluation"
|
6
6
|
|
7
|
-
|
7
|
+
def init_clusterer
|
8
8
|
set_options(self.class.options) if self.class.options
|
9
9
|
buildClusterer(self.class.data)
|
10
|
-
|
10
|
+
end
|
11
|
+
|
12
|
+
def init_instance_clusterer(&block)
|
13
|
+
self.instance_eval(&block)
|
14
|
+
#@dataset.setClassIndex(@class_index)
|
15
|
+
buildClusterer(@dataset)
|
16
|
+
end
|
11
17
|
|
12
|
-
|
18
|
+
#Instance methods list
|
13
19
|
def self.included(base)
|
14
20
|
base.extend(ClassMethods)
|
15
21
|
end
|
16
22
|
|
23
|
+
#set instance data for the clusterer
|
24
|
+
def set_data(data)
|
25
|
+
@dataset = data
|
26
|
+
end
|
27
|
+
|
28
|
+
#set options for the clusterer
|
17
29
|
def set_options(options)
|
18
|
-
|
19
|
-
|
30
|
+
options_inst = Utils.splitOptions(options)
|
31
|
+
setOptions(options_inst)
|
20
32
|
end
|
21
33
|
|
22
|
-
|
23
|
-
|
24
|
-
|
34
|
+
def list_options
|
35
|
+
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
36
|
+
end
|
25
37
|
|
26
|
-
|
38
|
+
#the description provided by the Weka Documentation
|
39
|
+
def description
|
27
40
|
globalInfo
|
28
|
-
|
41
|
+
end
|
29
42
|
|
43
|
+
#list cluster centroids with coordinates
|
30
44
|
def get_centroids
|
31
45
|
getClusterCentroids
|
32
46
|
end
|
33
47
|
|
48
|
+
#list cluster's capabilities with attributes (i.e Numeric, Nominal...)
|
34
49
|
def list_capabilities
|
35
50
|
get_capabilities.to_s
|
36
51
|
end
|
37
52
|
|
38
|
-
#
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
53
|
+
# Validate clusterer. If the evaluation needs to be performed on a different dataset this function accepts
|
54
|
+
# an optional parameter (an Instances class object)
|
55
|
+
def evaluate(*args)
|
56
|
+
eval = ClusterEvaluation.new
|
57
|
+
eval.setClusterer(self)
|
58
|
+
if not args[0]
|
59
|
+
if self.class.data
|
60
|
+
eval.evaluateClusterer(self.class.data)
|
61
|
+
else
|
62
|
+
eval.evaluateClusterer(@dataset)
|
63
|
+
end
|
64
|
+
else
|
65
|
+
eval.evaluateClusterer(args[0])
|
66
|
+
end
|
67
|
+
puts 'performing evaluation'
|
68
|
+
eval.clusterResultsToString
|
69
|
+
end
|
70
|
+
|
71
|
+
#Class methods module
|
72
|
+
module ClassMethods
|
73
|
+
|
74
|
+
def self.classifier_attr_accessor(*args)
|
75
|
+
args.each do |arg|
|
76
|
+
#Here's the getter
|
77
|
+
self.class_eval("def #{arg};@#{arg};end")
|
78
|
+
#Here's the setter
|
79
|
+
self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
classifier_attr_accessor :options,:data
|
84
|
+
|
85
|
+
end
|
61
86
|
end
|
@@ -27,6 +27,10 @@ module Weka
|
|
27
27
|
|
28
28
|
class AddCluster
|
29
29
|
include Unsupervised_Util
|
30
|
+
alias_method :clusterer, :set_clusterer
|
31
|
+
def set_clusterer(index)
|
32
|
+
set_clusterer(index)
|
33
|
+
end
|
30
34
|
end
|
31
35
|
|
32
36
|
class Center
|
@@ -52,8 +56,12 @@ module Weka
|
|
52
56
|
class PrincipalComponents
|
53
57
|
include Unsupervised_Util
|
54
58
|
end
|
55
|
-
|
59
|
+
|
56
60
|
class Remove
|
61
|
+
alias_method :attribute_indices, :setAttributeIndices
|
62
|
+
def setAttributeIndices(index)
|
63
|
+
setAttributeIndices(index)
|
64
|
+
end
|
57
65
|
include Unsupervised_Util
|
58
66
|
end
|
59
67
|
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-band'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestApacheCorrelation < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context "Apache correlation methods" do
|
6
|
+
should "calculate covariance having as input two Ruby arrays" do
|
7
|
+
result = Apache::Stat::Correlation.covariance([1,2,3,4],[6,5,2,0])
|
8
|
+
assert_equal -3.5, result
|
9
|
+
end
|
10
|
+
|
11
|
+
should "calculate Pearson correlation having as input two Ruby arrays" do
|
12
|
+
result = Apache::Stat::Correlation.pearson_correlation([1,2,3,4],[1,2,3,4])
|
13
|
+
assert_equal 1, result
|
14
|
+
end
|
15
|
+
|
16
|
+
should "calculate Spearman correlation having as input two Ruby arrays" do
|
17
|
+
result = Apache::Stat::Correlation.spearman_correlation([1,2,3,4],[4,3,2,1])
|
18
|
+
assert_equal -1, result
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestApacheInference < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context "Apache inference module" do
|
6
|
+
should "Perform Wilcoxon signed rank test" do
|
7
|
+
val,p_val = Apache::Stat::Inference.wilcoxon_test([1,2,3,4],[6,5,2,0])
|
8
|
+
assert_equal 6,val
|
9
|
+
assert_equal 0.875,p_val
|
10
|
+
end
|
11
|
+
|
12
|
+
should "Computes the Chi-Square statistic comparing observed and expected frequency counts" do
|
13
|
+
val,p_val = Apache::Stat::Inference.chi_square([1,2,3,4,5],[1,2,3,4,5])
|
14
|
+
assert_equal 0,val
|
15
|
+
assert_equal 1,p_val
|
16
|
+
val,p_val = Apache::Stat::Inference.chi_square [[1,2,3,4,5],[1,2,3,4,5]]
|
17
|
+
assert_equal 0,val
|
18
|
+
assert_equal 1,p_val
|
19
|
+
end
|
20
|
+
|
21
|
+
should "Perform the Mann-Whitney U test on two input datasets" do
|
22
|
+
val,p_val = Apache::Stat::Inference.mann_whitney_u([1,2,3,4,5],[1,2,3,4,5])
|
23
|
+
assert_equal 12.5,val
|
24
|
+
assert_equal 1,p_val
|
25
|
+
end
|
26
|
+
|
27
|
+
should "Perform a homoscedastic T test on two input datasets" do
|
28
|
+
val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14],homoscedastic=true)
|
29
|
+
assert_equal -9,val
|
30
|
+
assert_equal 1.853118429643006e-05,p_val
|
31
|
+
end
|
32
|
+
|
33
|
+
should "Perform a paired T test on two input datasets" do
|
34
|
+
val,p_val = Apache::Stat::Inference.t_test([1,2,3,4,5],[10,11,12,13,14])
|
35
|
+
assert_equal -9,val
|
36
|
+
assert_equal 1.853118429643006e-05,p_val
|
37
|
+
end
|
38
|
+
|
39
|
+
should "Calculate one-way ANOVA (analysis of variance) statistics on input data" do
|
40
|
+
val,p_val = Apache::Stat::Inference.one_way_anova [[1,2,3,4,5],[10,11,12,13,14]]
|
41
|
+
assert_equal 81,val
|
42
|
+
assert_equal 1.8531184296399772e-05,p_val
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|