bio-band 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +5 -0
- data/Jarfile +1 -1
- data/Jarfile.lock +1 -1
- data/README.rdoc +2 -0
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +95 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bio-band.gemspec +19 -3
- data/features/step_definitions/weka_classifiers.rb +3 -2
- data/features/weka_classifiers.feature +13 -13
- data/lib/bio-band.rb +2 -0
- data/lib/bio-band/apache/stat/inference.rb +25 -19
- data/lib/bio-band/apache/stat/regression.rb +2 -2
- data/lib/bio-band/core/parser/parser.rb +6 -6
- data/lib/bio-band/core/type/instances.rb +15 -5
- data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
- data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
- data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
- data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
- data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
- data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
- data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
- data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
- data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
- data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
- data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
- data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
- data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
- data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
- data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_bio-band.rb +9 -0
- metadata +33 -2
data/lib/bio-band.rb
CHANGED
@@ -2,28 +2,28 @@ require 'java'
|
|
2
2
|
|
3
3
|
module Apache
|
4
4
|
module Stat
|
5
|
-
|
5
|
+
module Inference
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
java_import 'org.apache.commons.math3.stat.inference.ChiSquareTest'
|
8
|
+
java_import 'org.apache.commons.math3.stat.inference.MannWhitneyUTest'
|
9
|
+
java_import 'org.apache.commons.math3.stat.inference.OneWayAnova'
|
10
|
+
java_import 'org.apache.commons.math3.stat.inference.TTest'
|
11
|
+
java_import 'org.apache.commons.math3.stat.inference.WilcoxonSignedRankTest'
|
12
|
+
java_import 'org.apache.commons.math3.stat.StatUtils'
|
13
13
|
java_import 'java.util.ArrayList'
|
14
|
-
|
14
|
+
|
15
15
|
# An implementation of the Wilcoxon signed-rank test
|
16
16
|
# * *Args* :
|
17
17
|
# - +Array1+ -> must be a RubyArray.
|
18
18
|
# - +Array2+ -> must be a RubyArray.
|
19
|
-
|
20
|
-
|
19
|
+
def self.wilcoxon_test(array_1,array_2)
|
20
|
+
obj = WilcoxonSignedRankTest.new
|
21
21
|
first = Core::Utils::double_to_a(array_1)
|
22
22
|
second = Core::Utils::double_to_a(array_2)
|
23
|
-
|
23
|
+
val = obj.wilcoxonSignedRank first, second
|
24
24
|
p_val = obj.wilcoxonSignedRankTest first, second, true.to_java(:boolean)
|
25
|
-
|
26
|
-
|
25
|
+
return val,p_val
|
26
|
+
end
|
27
27
|
|
28
28
|
# Utility class called by 'chi_square' method in this same package
|
29
29
|
class Chi_square
|
@@ -61,6 +61,7 @@ module Apache
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
+
# Compare two datasets stored in Ruby Arrays
|
64
65
|
def self.chi_square_dataset_compare(observed1,observed2)
|
65
66
|
obj = ChiSquareTest.new
|
66
67
|
val = obj.chiSquareDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
|
@@ -68,12 +69,17 @@ module Apache
|
|
68
69
|
return val,p_value
|
69
70
|
end
|
70
71
|
|
71
|
-
|
72
|
+
# An implementation of the Mann-Whitney U test
|
73
|
+
# (also called Wilcoxon rank-sum test)
|
74
|
+
# * *Args* :
|
75
|
+
# - +Array1+ -> must be a RubyArray.
|
76
|
+
# - +Array2+ -> must be a RubyArray.
|
77
|
+
def self.mann_whitney_u(array1,array2)
|
72
78
|
obj = MannWhitneyUTest.new
|
73
|
-
first =
|
74
|
-
second =
|
75
|
-
value = mannWhitneyU first,second
|
76
|
-
p_value = mannWhitneyUTest first,second
|
79
|
+
first = array1.to_java :double
|
80
|
+
second = array2.to_java :double
|
81
|
+
value = obj.mannWhitneyU first,second
|
82
|
+
p_value = obj.mannWhitneyUTest first,second
|
77
83
|
return value,p_value
|
78
84
|
end
|
79
85
|
|
@@ -140,6 +146,6 @@ module Apache
|
|
140
146
|
p_value = obj.anovaPValue(collection)
|
141
147
|
return f_value,p_value
|
142
148
|
end
|
143
|
-
|
149
|
+
end
|
144
150
|
end
|
145
151
|
end
|
@@ -2,7 +2,7 @@ require 'java'
|
|
2
2
|
|
3
3
|
module Apache
|
4
4
|
module Stat
|
5
|
-
|
5
|
+
module Regression
|
6
6
|
java_import "org.apache.commons.math3.stat.regression.SimpleRegression"
|
7
7
|
|
8
8
|
# Create a simple regression model on the input data
|
@@ -17,6 +17,6 @@ module Apache
|
|
17
17
|
end
|
18
18
|
|
19
19
|
|
20
|
-
|
20
|
+
end
|
21
21
|
end
|
22
22
|
end
|
@@ -2,22 +2,22 @@ require 'java'
|
|
2
2
|
|
3
3
|
module Core
|
4
4
|
module Parser
|
5
|
-
|
6
|
-
|
5
|
+
# Parse an ARFF file and create an Instances object
|
6
|
+
def Parser.parse_ARFF(arff_file)
|
7
7
|
java_import 'java.io.FileReader'
|
8
8
|
file_in = FileReader.new arff_file
|
9
9
|
data_instance = Core::Type::Instances.new file_in
|
10
10
|
return data_instance
|
11
|
-
|
11
|
+
end
|
12
12
|
# Parse an CSV file and create an Instances object
|
13
|
-
|
14
|
-
|
13
|
+
def Parser.parse_CSV(csv_file)
|
14
|
+
java_import 'weka.core.converters.CSVLoader'
|
15
15
|
java_import 'java.io.File'
|
16
16
|
loader = CSVLoader.new
|
17
17
|
file = File.new csv_file
|
18
18
|
loader.setSource(file)
|
19
19
|
data_instance = loader.getDataSet
|
20
20
|
return data_instance
|
21
|
-
|
21
|
+
end
|
22
22
|
end
|
23
23
|
end
|
@@ -61,10 +61,18 @@ module Core
|
|
61
61
|
enumerate_instances.each {|inst| yield(inst)}
|
62
62
|
end
|
63
63
|
|
64
|
+
def each_row_with_index
|
65
|
+
enumerate_instances.each_with_index {|inst,id| yield(inst,id)}
|
66
|
+
end
|
67
|
+
|
64
68
|
def each_column
|
65
69
|
enumerate_attributes.each {|attribute| yield(attribute)}
|
66
70
|
end
|
67
71
|
|
72
|
+
def each_column_with_index
|
73
|
+
enumerate_attributes.each_with_index {|attribute,id| yield(attribute,id)}
|
74
|
+
end
|
75
|
+
|
68
76
|
# Check if this instance's attributes are all Numeric
|
69
77
|
def check_numeric_instance
|
70
78
|
enumerateAttributes.each do |att|
|
@@ -226,7 +234,7 @@ module Core
|
|
226
234
|
summary = Ruport::Data::Table::new
|
227
235
|
summary.add_column 'Attributes'
|
228
236
|
enumerateAttributes.each_with_index do |att,idx|
|
229
|
-
summary.add_column idx
|
237
|
+
summary.add_column idx
|
230
238
|
end
|
231
239
|
|
232
240
|
att_names = ['Names']
|
@@ -244,13 +252,15 @@ module Core
|
|
244
252
|
end
|
245
253
|
summary << att_types
|
246
254
|
|
247
|
-
|
255
|
+
display = []
|
256
|
+
display << summary
|
248
257
|
|
249
258
|
unless enumerate_instances.nil?
|
250
259
|
count=0
|
251
260
|
enumerateInstances.each {|inst| count=count+1}
|
252
|
-
|
253
|
-
end
|
261
|
+
display << "\nNumber of rows: #{count}"
|
262
|
+
end
|
263
|
+
display
|
254
264
|
end
|
255
265
|
|
256
266
|
# Merges two sets of Instances together. The resulting set will have all the
|
@@ -321,7 +331,7 @@ module Core
|
|
321
331
|
# Return a json String for the current Instances object
|
322
332
|
# The output is modeled on the 'datatable' Google charts APIs
|
323
333
|
# More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
|
324
|
-
def
|
334
|
+
def to_json_format
|
325
335
|
dataset_hash = Hash.new
|
326
336
|
dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
|
327
337
|
dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
|
@@ -6,11 +6,13 @@ module Attribute_selection_Utils
|
|
6
6
|
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
7
7
|
end
|
8
8
|
|
9
|
+
#Set options for an evaluator or a search algorithm
|
9
10
|
def select_options(options_string)
|
10
11
|
options = Utils.splitOptions(options_string)
|
11
12
|
set_options(options)
|
12
13
|
end
|
13
14
|
|
15
|
+
#Return a short description for the selected evalutator object or search algorithm
|
14
16
|
def description
|
15
17
|
globalInfo
|
16
18
|
end
|
@@ -4,12 +4,13 @@ require 'attribute_selection_utils'
|
|
4
4
|
module Weka
|
5
5
|
module Attribute_selection
|
6
6
|
module Evaluator
|
7
|
+
#This module contains evaluators from the 'weka.attributeSelection' packages
|
7
8
|
java_import 'weka.attributeSelection.CfsSubsetEval'
|
8
9
|
java_import 'weka.attributeSelection.ChiSquaredAttributeEval'
|
9
10
|
|
10
11
|
class CfsSubsetEval
|
11
12
|
include Attribute_selection_Utils
|
12
|
-
|
13
|
+
# java_alias :use_options , :setOptions, [Java::Java.lang.String[]]
|
13
14
|
end
|
14
15
|
|
15
16
|
class ChiSquaredAttributeEval
|
@@ -16,10 +16,12 @@ module Bayes_utils
|
|
16
16
|
build_classifier(@dataset)
|
17
17
|
end
|
18
18
|
|
19
|
+
# set data for instance classifier
|
19
20
|
def set_data(data)
|
20
21
|
@dataset = data
|
21
22
|
end
|
22
23
|
|
24
|
+
# set class index for the input dataset
|
23
25
|
def set_class_index(class_index)
|
24
26
|
@class_index = class_index
|
25
27
|
end
|
@@ -29,6 +31,7 @@ module Bayes_utils
|
|
29
31
|
base.extend(ClassMethods)
|
30
32
|
end
|
31
33
|
|
34
|
+
# set classifier options
|
32
35
|
def set_options(options)
|
33
36
|
options_inst = Utils.splitOptions(options)
|
34
37
|
setOptions(options_inst)
|
@@ -38,14 +41,24 @@ module Bayes_utils
|
|
38
41
|
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
39
42
|
end
|
40
43
|
|
44
|
+
# return the description reported in the Weka Java doc
|
41
45
|
def description
|
42
46
|
globalInfo
|
43
47
|
end
|
44
48
|
|
49
|
+
# perform crossvalidation on a trained classifier
|
50
|
+
#ARGV:
|
51
|
+
#fold -> 'int' value
|
45
52
|
def cross_validate(fold)
|
46
|
-
|
47
|
-
|
48
|
-
|
53
|
+
if self.class.data
|
54
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
55
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
56
|
+
eval.summary
|
57
|
+
else
|
58
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
59
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
60
|
+
eval.summary
|
61
|
+
end
|
49
62
|
end
|
50
63
|
|
51
64
|
#Class methods module
|
@@ -1,13 +1,13 @@
|
|
1
1
|
module Weka
|
2
|
-
|
3
|
-
|
4
|
-
java_import 'java.util.Random'
|
2
|
+
module Classifier
|
3
|
+
java_import 'weka.classifiers.Evaluation'
|
5
4
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
# Weka Evaluation class to be used with classfiers and clusterers
|
6
|
+
class Evaluation
|
7
|
+
def summary
|
8
|
+
toSummaryString
|
9
|
+
end
|
10
|
+
end
|
11
11
|
|
12
|
-
|
12
|
+
end
|
13
13
|
end
|
@@ -3,6 +3,7 @@ require 'functions_utils'
|
|
3
3
|
|
4
4
|
module Weka
|
5
5
|
module Classifier
|
6
|
+
#This module stores the classifiers from the 'weka.classifiers.functions' package
|
6
7
|
module Functions
|
7
8
|
java_import 'weka.classifiers.functions.LinearRegression'
|
8
9
|
java_import 'weka.classifiers.functions.PLSClassifier'
|
@@ -20,10 +20,12 @@ module Functions_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set instance data for the instance classifier
|
23
24
|
def set_data(data)
|
24
25
|
@dataset = data
|
25
26
|
end
|
26
27
|
|
28
|
+
#Set a class index for the input dataset
|
27
29
|
def set_class_index(class_index)
|
28
30
|
@class_index = class_index
|
29
31
|
end
|
@@ -33,18 +35,29 @@ module Functions_utils
|
|
33
35
|
setOptions(options_inst)
|
34
36
|
end
|
35
37
|
|
38
|
+
#List available options
|
36
39
|
def list_options
|
37
40
|
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
38
41
|
end
|
39
42
|
|
43
|
+
#Return a description from the Weka Javadoc for the selected classifier
|
40
44
|
def description
|
41
45
|
puts globalInfo
|
42
46
|
end
|
43
47
|
|
48
|
+
# perform crossvalidation on a trained classifier
|
49
|
+
#ARGV:
|
50
|
+
#fold -> 'int' value
|
44
51
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
52
|
+
if self.class.data
|
53
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
54
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
55
|
+
eval.summary
|
56
|
+
else
|
57
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
58
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
59
|
+
eval.summary
|
60
|
+
end
|
48
61
|
end
|
49
62
|
|
50
63
|
#Class methods module
|
@@ -20,31 +20,49 @@ module Lazy_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set data for instance classifier
|
24
|
+
#ARGV
|
25
|
+
# data -> an Instances object
|
23
26
|
def set_data(data)
|
24
27
|
@dataset = data
|
25
28
|
end
|
26
29
|
|
30
|
+
#Set a class index for the input dataset
|
27
31
|
def set_class_index(class_index)
|
28
32
|
@class_index = class_index
|
29
33
|
end
|
30
34
|
|
35
|
+
#Set options for the selected classfier
|
36
|
+
#ARGS:
|
37
|
+
#options -> a String, i.e. "-K"
|
31
38
|
def set_options(options)
|
32
39
|
options_inst = Utils.splitOptions(options)
|
33
40
|
setOptions(options_inst)
|
34
41
|
end
|
35
42
|
|
43
|
+
#List available options
|
36
44
|
def list_options
|
37
45
|
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
38
46
|
end
|
39
47
|
|
48
|
+
#Return a description from the Weka JavaDoc for the selected classifier
|
40
49
|
def description
|
41
50
|
puts globalInfo
|
42
51
|
end
|
43
52
|
|
53
|
+
# perform crossvalidation on a trained classifier
|
54
|
+
#ARGV:
|
55
|
+
#fold -> 'int' value
|
44
56
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
57
|
+
if self.class.data
|
58
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
59
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
60
|
+
eval.summary
|
61
|
+
else
|
62
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
63
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
64
|
+
eval.summary
|
65
|
+
end
|
48
66
|
end
|
49
67
|
|
50
68
|
#Class methods module
|
@@ -4,6 +4,7 @@ require 'mi_utils'
|
|
4
4
|
module Weka
|
5
5
|
module Classifier
|
6
6
|
module Mi
|
7
|
+
#This module contains classifiers from the 'weka.classifiers.mi' package
|
7
8
|
java_import 'weka.classifiers.mi.CitationKNN'
|
8
9
|
java_import 'weka.classifiers.mi.MDD'
|
9
10
|
java_import 'weka.classifiers.mi.MIBoost'
|
@@ -20,10 +20,14 @@ module Mi_utils
|
|
20
20
|
build_classifier(@dataset)
|
21
21
|
end
|
22
22
|
|
23
|
+
#Set input data for the selected classifier
|
24
|
+
#ARGV:
|
25
|
+
#data -> an Instances class object
|
23
26
|
def set_data(data)
|
24
27
|
@dataset = data
|
25
28
|
end
|
26
29
|
|
30
|
+
#Set class index for the input dataset
|
27
31
|
def set_class_index(class_index)
|
28
32
|
@class_index = class_index
|
29
33
|
end
|
@@ -33,18 +37,29 @@ module Mi_utils
|
|
33
37
|
setOptions(options_inst)
|
34
38
|
end
|
35
39
|
|
40
|
+
#List options for the selected classifier
|
36
41
|
def list_options
|
37
42
|
listOptions.each {|key| puts "#{key.synopsis} #{key.description}"}
|
38
43
|
end
|
39
44
|
|
45
|
+
#Return a short description for the current classifier
|
40
46
|
def description
|
41
47
|
puts globalInfo
|
42
48
|
end
|
43
49
|
|
50
|
+
# perform crossvalidation on a trained classifier
|
51
|
+
#ARGV:
|
52
|
+
#fold -> 'int' value
|
44
53
|
def cross_validate(fold)
|
45
|
-
|
46
|
-
|
47
|
-
|
54
|
+
if self.class.data
|
55
|
+
eval = Weka::Classifier::Evaluation.new self.class.data
|
56
|
+
eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
|
57
|
+
eval.summary
|
58
|
+
else
|
59
|
+
eval = Weka::Classifier::Evaluation.new @dataset
|
60
|
+
eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
|
61
|
+
eval.summary
|
62
|
+
end
|
48
63
|
end
|
49
64
|
|
50
65
|
#Class methods module
|