bio-band 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +5 -0
- data/Jarfile +1 -1
- data/Jarfile.lock +1 -1
- data/README.rdoc +2 -0
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +95 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bio-band.gemspec +19 -3
- data/features/step_definitions/weka_classifiers.rb +3 -2
- data/features/weka_classifiers.feature +13 -13
- data/lib/bio-band.rb +2 -0
- data/lib/bio-band/apache/stat/inference.rb +25 -19
- data/lib/bio-band/apache/stat/regression.rb +2 -2
- data/lib/bio-band/core/parser/parser.rb +6 -6
- data/lib/bio-band/core/type/instances.rb +15 -5
- data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
- data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
- data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
- data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
- data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
- data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
- data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
- data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
- data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
- data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
- data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
- data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
- data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
- data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
- data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_bio-band.rb +9 -0
- metadata +33 -2
data/lib/bio-band.rb
CHANGED
@@ -2,28 +2,28 @@ require 'java'
|
|
2
2
|
|
3
3
|
module Apache
|
4
4
|
module Stat
|
5
|
-
|
5
|
+
module Inference
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
java_import 'org.apache.commons.math3.stat.inference.ChiSquareTest'
|
8
|
+
java_import 'org.apache.commons.math3.stat.inference.MannWhitneyUTest'
|
9
|
+
java_import 'org.apache.commons.math3.stat.inference.OneWayAnova'
|
10
|
+
java_import 'org.apache.commons.math3.stat.inference.TTest'
|
11
|
+
java_import 'org.apache.commons.math3.stat.inference.WilcoxonSignedRankTest'
|
12
|
+
java_import 'org.apache.commons.math3.stat.StatUtils'
|
13
13
|
java_import 'java.util.ArrayList'
|
14
|
-
|
14
|
+
|
15
15
|
# An implementation of the Wilcoxon signed-rank test
|
16
16
|
# * *Args* :
|
17
17
|
# - +Array1+ -> must be a RubyArray.
|
18
18
|
# - +Array2+ -> must be a RubyArray.
|
19
|
-
|
20
|
-
|
19
|
+
def self.wilcoxon_test(array_1,array_2)
|
20
|
+
obj = WilcoxonSignedRankTest.new
|
21
21
|
first = Core::Utils::double_to_a(array_1)
|
22
22
|
second = Core::Utils::double_to_a(array_2)
|
23
|
-
|
23
|
+
val = obj.wilcoxonSignedRank first, second
|
24
24
|
p_val = obj.wilcoxonSignedRankTest first, second, true.to_java(:boolean)
|
25
|
-
|
26
|
-
|
25
|
+
return val,p_val
|
26
|
+
end
|
27
27
|
|
28
28
|
# Utility class called by 'chi_square' method in this same package
|
29
29
|
class Chi_square
|
@@ -61,6 +61,7 @@ module Apache
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
+
# Compare two datasets stored in Ruby Arrays
|
64
65
|
def self.chi_square_dataset_compare(observed1,observed2)
|
65
66
|
obj = ChiSquareTest.new
|
66
67
|
val = obj.chiSquareDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
|
@@ -68,12 +69,17 @@ module Apache
|
|
68
69
|
return val,p_value
|
69
70
|
end
|
70
71
|
|
71
|
-
|
72
|
+
# An implementation of the Mann-Whitney U test
|
73
|
+
# (also called Wilcoxon rank-sum test)
|
74
|
+
# * *Args* :
|
75
|
+
# - +Array1+ -> must be a RubyArray.
|
76
|
+
# - +Array2+ -> must be a RubyArray.
|
77
|
+
def self.mann_whitney_u(array1,array2)
|
72
78
|
obj = MannWhitneyUTest.new
|
73
|
-
first =
|
74
|
-
second =
|
75
|
-
value = mannWhitneyU first,second
|
76
|
-
p_value = mannWhitneyUTest first,second
|
79
|
+
first = array1.to_java :double
|
80
|
+
second = array2.to_java :double
|
81
|
+
value = obj.mannWhitneyU first,second
|
82
|
+
p_value = obj.mannWhitneyUTest first,second
|
77
83
|
return value,p_value
|
78
84
|
end
|
79
85
|
|
@@ -140,6 +146,6 @@ module Apache
|
|
140
146
|
p_value = obj.anovaPValue(collection)
|
141
147
|
return f_value,p_value
|
142
148
|
end
|
143
|
-
|
149
|
+
end
|
144
150
|
end
|
145
151
|
end
|
@@ -2,7 +2,7 @@ require 'java'
|
|
2
2
|
|
3
3
|
module Apache
|
4
4
|
module Stat
|
5
|
-
|
5
|
+
module Regression
|
6
6
|
java_import "org.apache.commons.math3.stat.regression.SimpleRegression"
|
7
7
|
|
8
8
|
# Create a simple regression model on the input data
|
@@ -17,6 +17,6 @@ module Apache
|
|
17
17
|
end
|
18
18
|
|
19
19
|
|
20
|
-
|
20
|
+
end
|
21
21
|
end
|
22
22
|
end
|
@@ -2,22 +2,22 @@ require 'java'
|
|
2
2
|
|
3
3
|
module Core
|
4
4
|
module Parser
|
5
|
-
|
6
|
-
|
5
|
+
# Parse an ARFF file and create an Instances object
|
6
|
+
def Parser.parse_ARFF(arff_file)
|
7
7
|
java_import 'java.io.FileReader'
|
8
8
|
file_in = FileReader.new arff_file
|
9
9
|
data_instance = Core::Type::Instances.new file_in
|
10
10
|
return data_instance
|
11
|
-
|
11
|
+
end
|
12
12
|
# Parse an CSV file and create an Instances object
|
13
|
-
|
14
|
-
|
13
|
+
def Parser.parse_CSV(csv_file)
|
14
|
+
java_import 'weka.core.converters.CSVLoader'
|
15
15
|
java_import 'java.io.File'
|
16
16
|
loader = CSVLoader.new
|
17
17
|
file = File.new csv_file
|
18
18
|
loader.setSource(file)
|
19
19
|
data_instance = loader.getDataSet
|
20
20
|
return data_instance
|
21
|
-
|
21
|
+
end
|
22
22
|
end
|
23
23
|
end
|
@@ -61,10 +61,18 @@ module Core
|
|
61
61
|
enumerate_instances.each {|inst| yield(inst)}
|
62
62
|
end
|
63
63
|
|
64
|
+
def each_row_with_index
|
65
|
+
enumerate_instances.each_with_index {|inst,id| yield(inst,id)}
|
66
|
+
end
|
67
|
+
|
64
68
|
def each_column
|
65
69
|
enumerate_attributes.each {|attribute| yield(attribute)}
|
66
70
|
end
|
67
71
|
|
72
|
+
def each_column_with_index
|
73
|
+
enumerate_attributes.each_with_index {|attribute,id| yield(attribute,id)}
|
74
|
+
end
|
75
|
+
|
68
76
|
# Check if this instance's attributes are all Numeric
|
69
77
|
def check_numeric_instance
|
70
78
|
enumerateAttributes.each do |att|
|
@@ -226,7 +234,7 @@ module Core
|
|
226
234
|
summary = Ruport::Data::Table::new
|
227
235
|
summary.add_column 'Attributes'
|
228
236
|
enumerateAttributes.each_with_index do |att,idx|
|
229
|
-
summary.add_column idx
|
237
|
+
summary.add_column idx
|
230
238
|
end
|
231
239
|
|
232
240
|
att_names = ['Names']
|
@@ -244,13 +252,15 @@ module Core
|
|
244
252
|
end
|
245
253
|
summary << att_types
|
246
254
|
|
247
|
-
|
255
|
+
display = []
|
256
|
+
display << summary
|
248
257
|
|
249
258
|
unless enumerate_instances.nil?
|
250
259
|
count=0
|
251
260
|
enumerateInstances.each {|inst| count=count+1}
|
252
|
-
|
253
|
-
end
|
261
|
+
display << "\nNumber of rows: #{count}"
|
262
|
+
end
|
263
|
+
display
|
254
264
|
end
|
255
265
|
|
256
266
|
# Merges two sets of Instances together. The resulting set will have all the
|
@@ -321,7 +331,7 @@ module Core
|
|
321
331
|
# Return a json String for the current Instances object
|
322
332
|
# The output is modeled on the 'datatable' Google charts APIs
|
323
333
|
# More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
|
324
|
-
def
|
334
|
+
def to_json_format
|
325
335
|
dataset_hash = Hash.new
|
326
336
|
dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
|
327
337
|
dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
|
@@ -6,11 +6,13 @@ module Attribute_selection_Utils
|
|
6
6
|
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
7
7
|
end
|
8
8
|
|
9
|
+
#Set options for an evaluator or a search algorithm
|
9
10
|
def select_options(options_string)
|
10
11
|
options = Utils.splitOptions(options_string)
|
11
12
|
set_options(options)
|
12
13
|
end
|
13
14
|
|
15
|
+
#Return a short description for the selected evalutator object or search algorithm
|
14
16
|
def description
|
15
17
|
globalInfo
|
16
18
|
end
|
@@ -4,12 +4,13 @@ require 'attribute_selection_utils'
|
|
4
4
|
module Weka
|
5
5
|
module Attribute_selection
|
6
6
|
module Evaluator
|
7
|
+
#This module contains evaluators from the 'weka.attributeSelection' packages
|
7
8
|
java_import 'weka.attributeSelection.CfsSubsetEval'
|
8
9
|
java_import 'weka.attributeSelection.ChiSquaredAttributeEval'
|
9
10
|
|
10
11
|
class CfsSubsetEval
|
11
12
|
include Attribute_selection_Utils
|
12
|
-
|
13
|
+
# java_alias :use_options , :setOptions, [Java::Java.lang.String[]]
|
13
14
|
end
|
14
15
|
|
15
16
|
class ChiSquaredAttributeEval
|
@@ -16,10 +16,12 @@ module Bayes_utils
|
|
16
16
|
build_classifier(@dataset)
|
17
17
|
end
|
18
18
|
|
19
|
+
# set data for instance classifier
|
19
20
|
def set_data(data)
|
20
21
|
@dataset = data
|
21
22
|
end
|
22
23
|
|
24
|
+
# set class index for the input dataset
|
23
25
|
def set_class_index(class_index)
|
24
26
|
@class_index = class_index
|
25
27
|
end
|
@@ -29,6 +31,7 @@ module Bayes_utils
|
|
29
31
|
base.extend(ClassMethods)
|
30
32
|
end
|
31
33
|
|
34
|
+
# set classifier options
|
32
35
|
def set_options(options)
|
33
36
|
options_inst = Utils.splitOptions(options)
|
34
37
|
setOptions(options_inst)
|
@@ -38,14 +41,24 @@ module Bayes_utils
|
|
38
41
|
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
39
42
|
end
|
40
43
|
|
44
|
+
# return the description reported in the Weka Java doc
|
41
45
|
def description
|
42
46
|
globalInfo
|
43
47
|
end
|
44
48
|
|
49
|
+
# perform crossvalidation on a trained classifier
|
50
|
+
#ARGV:
|
51
|
+
#fold -> 'int' value
|
45
52
|
def cross_validate(fold)
|
46
|
-
|
47
|
-
|
48
|
-
|
53
|
+
if self.class.data
|
54
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
55
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
56
|
+
eval.summary
|
57
|
+
else
|
58
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
59
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
60
|
+
eval.summary
|
61
|
+
end
|
49
62
|
end
|
50
63
|
|
51
64
|
#Class methods module
|
@@ -1,13 +1,13 @@
|
|
1
1
|
module Weka
|
2
|
-
|
3
|
-
|
4
|
-
java_import 'java.util.Random'
|
2
|
+
module Classifier
|
3
|
+
java_import 'weka.classifiers.Evaluation'
|
5
4
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
# Weka Evaluation class to be used with classfiers and clusterers
|
6
|
+
class Evaluation
|
7
|
+
def summary
|
8
|
+
toSummaryString
|
9
|
+
end
|
10
|
+
end
|
11
11
|
|
12
|
-
|
12
|
+
end
|
13
13
|
end
|
@@ -3,6 +3,7 @@ require 'functions_utils'
|
|
3
3
|
|
4
4
|
module Weka
|
5
5
|
module Classifier
|
6
|
+
#This module stores the classifiers from the 'weka.classifiers.functions' package
|
6
7
|
module Functions
|
7
8
|
java_import 'weka.classifiers.functions.LinearRegression'
|
8
9
|
java_import 'weka.classifiers.functions.PLSClassifier'
|
@@ -20,10 +20,12 @@ module Functions_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set instance data for the instance classifier
|
23
24
|
def set_data(data)
|
24
25
|
@dataset = data
|
25
26
|
end
|
26
27
|
|
28
|
+
#Set a class index for the input dataset
|
27
29
|
def set_class_index(class_index)
|
28
30
|
@class_index = class_index
|
29
31
|
end
|
@@ -33,18 +35,29 @@ module Functions_utils
|
|
33
35
|
setOptions(options_inst)
|
34
36
|
end
|
35
37
|
|
38
|
+
#List available options
|
36
39
|
def list_options
|
37
40
|
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
38
41
|
end
|
39
42
|
|
43
|
+
#Return a description from the Weka Javadoc for the selected classifier
|
40
44
|
def description
|
41
45
|
puts globalInfo
|
42
46
|
end
|
43
47
|
|
48
|
+
# perform crossvalidation on a trained classifier
|
49
|
+
#ARGV:
|
50
|
+
#fold -> 'int' value
|
44
51
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
52
|
+
if self.class.data
|
53
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
54
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
55
|
+
eval.summary
|
56
|
+
else
|
57
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
58
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
59
|
+
eval.summary
|
60
|
+
end
|
48
61
|
end
|
49
62
|
|
50
63
|
#Class methods module
|
@@ -20,31 +20,49 @@ module Lazy_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set data for instance classifier
|
24
|
+
#ARGV
|
25
|
+
# data -> an Instances object
|
23
26
|
def set_data(data)
|
24
27
|
@dataset = data
|
25
28
|
end
|
26
29
|
|
30
|
+
#Set a class index for the input dataset
|
27
31
|
def set_class_index(class_index)
|
28
32
|
@class_index = class_index
|
29
33
|
end
|
30
34
|
|
35
|
+
#Set options for the selected classfier
|
36
|
+
#ARGS:
|
37
|
+
#options -> a String, i.e. "-K"
|
31
38
|
def set_options(options)
|
32
39
|
options_inst = Utils.splitOptions(options)
|
33
40
|
setOptions(options_inst)
|
34
41
|
end
|
35
42
|
|
43
|
+
#List available options
|
36
44
|
def list_options
|
37
45
|
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
38
46
|
end
|
39
47
|
|
48
|
+
#Return a description from the Weka JavaDoc for the selected classifier
|
40
49
|
def description
|
41
50
|
puts globalInfo
|
42
51
|
end
|
43
52
|
|
53
|
+
# perform crossvalidation on a trained classifier
|
54
|
+
#ARGV:
|
55
|
+
#fold -> 'int' value
|
44
56
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
57
|
+
if self.class.data
|
58
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
59
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
60
|
+
eval.summary
|
61
|
+
else
|
62
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
63
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
64
|
+
eval.summary
|
65
|
+
end
|
48
66
|
end
|
49
67
|
|
50
68
|
#Class methods module
|
@@ -4,6 +4,7 @@ require 'mi_utils'
|
|
4
4
|
module Weka
|
5
5
|
module Classifier
|
6
6
|
module Mi
|
7
|
+
#This module contains classifiers from the 'weka.classifiers.mi' package
|
7
8
|
java_import 'weka.classifiers.mi.CitationKNN'
|
8
9
|
java_import 'weka.classifiers.mi.MDD'
|
9
10
|
java_import 'weka.classifiers.mi.MIBoost'
|
@@ -20,10 +20,14 @@ module Mi_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set input data for the selected classifier
|
24
|
+
#ARGV:
|
25
|
+
#data -> an Instances class object
|
23
26
|
def set_data(data)
|
24
27
|
@dataset = data
|
25
28
|
end
|
26
29
|
|
30
|
+
#Set class index for the input dataset
|
27
31
|
def set_class_index(class_index)
|
28
32
|
@class_index = class_index
|
29
33
|
end
|
@@ -33,18 +37,29 @@ module Mi_utils
|
|
33
37
|
setOptions(options_inst)
|
34
38
|
end
|
35
39
|
|
40
|
+
#List options for the selected classifier
|
36
41
|
def list_options
|
37
42
|
listOptions.each {|key| puts "#{key.synopsis} #{key.description}"}
|
38
43
|
end
|
39
44
|
|
45
|
+
#Return a short description for the current classifier
|
40
46
|
def description
|
41
47
|
puts globalInfo
|
42
48
|
end
|
43
49
|
|
50
|
+
# perform crossvalidation on a trained classifier
|
51
|
+
#ARGV:
|
52
|
+
#fold -> 'int' value
|
44
53
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
54
|
+
if self.class.data
|
55
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
56
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
57
|
+
eval.summary
|
58
|
+
else
|
59
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
60
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
61
|
+
eval.summary
|
62
|
+
end
|
48
63
|
end
|
49
64
|
|
50
65
|
#Class methods module
|