bio-band 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +20 -0
- data/Gemfile.lock +79 -0
- data/Jarfile +9 -0
- data/Jarfile.lock +10 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +54 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/bin/bio-band +83 -0
- data/bio-band.gemspec +129 -0
- data/ext/mkrf_conf.rb +74 -0
- data/features/create_dataset.feature +12 -0
- data/features/step_definitions/create_dataset.rb +40 -0
- data/features/step_definitions/weka_classifiers.rb +42 -0
- data/features/step_definitions/weka_clustering.rb +30 -0
- data/features/step_definitions/weka_filters.rb +29 -0
- data/features/step_definitions/weka_parsers.rb +45 -0
- data/features/support/env.rb +3 -0
- data/features/weka_classifiers.feature +16 -0
- data/features/weka_clustering.feature +14 -0
- data/features/weka_filters.feature +12 -0
- data/features/weka_parsers.feature +18 -0
- data/features/weka_pipeline.feature +13 -0
- data/lib/bio-band.rb +10 -0
- data/lib/bio-band/apache.rb +1 -0
- data/lib/bio-band/apache/stat/inference.rb +145 -0
- data/lib/bio-band/core.rb +6 -0
- data/lib/bio-band/core/parser/parser.rb +23 -0
- data/lib/bio-band/core/type/apache_matrices.rb +35 -0
- data/lib/bio-band/core/type/attribute.rb +53 -0
- data/lib/bio-band/core/type/instance.rb +10 -0
- data/lib/bio-band/core/type/instances.rb +332 -0
- data/lib/bio-band/core/type/utils.rb +31 -0
- data/lib/bio-band/weka.rb +11 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes.rb +75 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +42 -0
- data/lib/bio-band/weka/classifiers/evaluation.rb +12 -0
- data/lib/bio-band/weka/classifiers/functions/functions.rb +23 -0
- data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +39 -0
- data/lib/bio-band/weka/classifiers/lazy/lazy.rb +23 -0
- data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +39 -0
- data/lib/bio-band/weka/classifiers/trees/trees.rb +48 -0
- data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +42 -0
- data/lib/bio-band/weka/clusterers/clusterers.rb +32 -0
- data/lib/bio-band/weka/clusterers/clusterers_utils.rb +49 -0
- data/lib/bio-band/weka/db/DatabaseUtils_mysql +280 -0
- data/lib/bio-band/weka/db/DatabaseUtils_postgresql +594 -0
- data/lib/bio-band/weka/db/db.rb +74 -0
- data/lib/bio-band/weka/filters/supervised/attribute/attribute.rb +25 -0
- data/lib/bio-band/weka/filters/supervised/instance/instance.rb +17 -0
- data/lib/bio-band/weka/filters/supervised/supervised_utils.rb +32 -0
- data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +70 -0
- data/lib/bio-band/weka/filters/unsupervised/instance/instance.rb +48 -0
- data/lib/bio-band/weka/filters/unsupervised/unsupervised_utils.rb +33 -0
- data/resources/weather.csv +15 -0
- data/resources/weather.numeric.arff +23 -0
- data/spec/bio-band_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- metadata +302 -0
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Parser
|
5
|
+
# Parse an ARFF file and create an Instances object
|
6
|
+
def Parser.parse_ARFF(arff_file)
|
7
|
+
java_import 'java.io.FileReader'
|
8
|
+
file_in = FileReader.new arff_file
|
9
|
+
data_instance = Core::Type::Instances.new file_in
|
10
|
+
return data_instance
|
11
|
+
end
|
12
|
+
# Parse an CSV file and create an Instances object
|
13
|
+
def Parser.parse_CSV(csv_file)
|
14
|
+
java_import 'weka.core.converters.CSVLoader'
|
15
|
+
java_import 'java.io.File'
|
16
|
+
loader = CSVLoader.new
|
17
|
+
file = File.new csv_file
|
18
|
+
loader.setSource(file)
|
19
|
+
data_instance = loader.getDataSet
|
20
|
+
return data_instance
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Type
|
5
|
+
|
6
|
+
java_import 'org.apache.commons.math3.linear.BlockRealMatrix'
|
7
|
+
java_import 'org.apache.commons.math3.linear.Array2DRowRealMatrix'
|
8
|
+
|
9
|
+
#Define variables to use ruby-like names instead of Java's
|
10
|
+
Apache_matrix = Array2DRowRealMatrix
|
11
|
+
Apache_matrix_block = BlockRealMatrix
|
12
|
+
|
13
|
+
#* *Description* :
|
14
|
+
#Linear algebra support in commons-math provides operations on real matrices (both dense
|
15
|
+
#and sparse matrices are supported) and vectors. It features basic operations (addition, subtraction ...)
|
16
|
+
#and decomposition algorithms that can be used to solve linear systems either in exact sense and
|
17
|
+
#in least squares sense.
|
18
|
+
#The 'Apache_matrix' class represents a matrix with real numbers as entries.
|
19
|
+
#The following basic matrix operations are supported:
|
20
|
+
#- Matrix addition, subtraction, multiplication
|
21
|
+
#- Scalar addition and multiplication
|
22
|
+
#- Transpose
|
23
|
+
#- Norm and trace
|
24
|
+
#- Operation on a vector
|
25
|
+
class Apache_matrix
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
# Apache matrix implementation suited to dimensions above 50 or 100
|
30
|
+
class Apache_matrix_block
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Type
|
5
|
+
|
6
|
+
java_import "weka.core.Attribute"
|
7
|
+
java_import "weka.core.FastVector"
|
8
|
+
|
9
|
+
class Attribute
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
# Return an Numeric Attribute class object
|
14
|
+
# * *Args* :
|
15
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
16
|
+
def self.create_numeric_attr(name_of_attr)
|
17
|
+
numeric = Attribute.new name_of_attr
|
18
|
+
return numeric
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return an Date Attribute class object
|
22
|
+
# * *Args* :
|
23
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
24
|
+
# - +format+ -> The format of the attribute
|
25
|
+
def self.create_date_attr(name_of_attr,format)
|
26
|
+
date = Attribute.new(name_of_attr,format)
|
27
|
+
return date
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return a Nominal Attribute class object
|
31
|
+
# * *Args* :
|
32
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
33
|
+
# - +values_list+ -> An array, the list of nominal values
|
34
|
+
def self.create_nominal_attr(name_of_attr,values_list)
|
35
|
+
labels = FastVector.new
|
36
|
+
values_list.each {|value| labels.addElement(value)}
|
37
|
+
nominal = Attribute.new(name_of_attr,labels)
|
38
|
+
return nominal
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return a String Attribute class object
|
42
|
+
# * *Args* :
|
43
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
44
|
+
def self.create_string_attr(name_of_attr)
|
45
|
+
construct = Attribute.java_class.constructor(Java::java.lang.String,Java::weka.core.FastVector)
|
46
|
+
string = construct.new_instance(name_of_attr,nil).to_java
|
47
|
+
return string
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
|
@@ -0,0 +1,332 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'ruport'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module Core
|
6
|
+
module Type
|
7
|
+
|
8
|
+
java_import "weka.core.Instances"
|
9
|
+
java_import 'java.io.File'
|
10
|
+
java_import 'weka.core.converters.CSVSaver'
|
11
|
+
java_import 'weka.core.converters.ArffSaver'
|
12
|
+
java_import "weka.core.FastVector"
|
13
|
+
java_import "weka.core.Instance"
|
14
|
+
|
15
|
+
#
|
16
|
+
# * *Description* :
|
17
|
+
# This is the main class from the Weka package for data handling. It is essentially a matrix: each row
|
18
|
+
# is an instance of the 'Instance' class, while each column is an instance of the 'Attribute' class
|
19
|
+
# The class 'Instances' is here extended to add custom functionalities
|
20
|
+
class Instances
|
21
|
+
|
22
|
+
# Convert an Instances object to a bidimensional Ruby array
|
23
|
+
# where each row corresponds to an Instance object
|
24
|
+
def to_a2d
|
25
|
+
matrix = Array.new
|
26
|
+
att = Array.new
|
27
|
+
self.enumerateAttributes.each_with_index do |a,idx|
|
28
|
+
if a.isNumeric
|
29
|
+
enumerate_instances.each {|s| att << s.value(s.attribute(idx))}
|
30
|
+
matrix << att
|
31
|
+
att = Array.new
|
32
|
+
else
|
33
|
+
enumerateInstances.each do |inst|
|
34
|
+
att << inst.string_value(idx)
|
35
|
+
end
|
36
|
+
matrix << att
|
37
|
+
att = Array.new
|
38
|
+
end
|
39
|
+
end
|
40
|
+
return matrix.transpose
|
41
|
+
end
|
42
|
+
|
43
|
+
# Return the number of rows (Instance objects) in the dataset
|
44
|
+
def n_rows
|
45
|
+
return numInstances
|
46
|
+
end
|
47
|
+
|
48
|
+
# Return the number of columns (Attribute objects) in the dataset
|
49
|
+
def n_columns
|
50
|
+
return numAttributes
|
51
|
+
end
|
52
|
+
|
53
|
+
# Return the dimensions of the dataset (for the current Instances class object)
|
54
|
+
def dim
|
55
|
+
puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
|
56
|
+
end
|
57
|
+
|
58
|
+
# Check if this instance's attributes are all Numeric
|
59
|
+
def check_numeric_instance
|
60
|
+
enumerateAttributes.each do |att|
|
61
|
+
unless att.isNumeric
|
62
|
+
raise ArgumentError, "Sorry, attribute '#{att.name}' is not numeric!"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Convert the present Instances object to an Apache matrix if every Instances attribute
|
68
|
+
# is Numeric
|
69
|
+
def to_Apache_matrix
|
70
|
+
check_numeric_instance
|
71
|
+
ruby_array = to_a
|
72
|
+
java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
|
73
|
+
return Core::Type::Apache_matrix.new(java_double_array)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Convert the present Instances object to an Apache matrix (block) if every Instances attribute
|
77
|
+
# is Numeric
|
78
|
+
def to_Apache_matrix_block
|
79
|
+
check_numeric_instance
|
80
|
+
ruby_array = to_a
|
81
|
+
java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
|
82
|
+
return Core::Type::Apache_matrix_block.new(java_double_array)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Return data for a single attribute (a column from the Instances object)
|
86
|
+
# * *Args* :
|
87
|
+
# - +att+ -> a String, the name of the attribute
|
88
|
+
def return_attr_data(att)
|
89
|
+
attr_values = Array.new
|
90
|
+
if attribute(att).isNumeric
|
91
|
+
enumerateInstances.each do |i|
|
92
|
+
attr_values << i.value(attribute(att))
|
93
|
+
end
|
94
|
+
else
|
95
|
+
attr_index = attribute(att).index
|
96
|
+
enumerateInstances.each do |inst|
|
97
|
+
attr_values << inst.string_value(attr_index)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
return attr_values
|
101
|
+
end
|
102
|
+
|
103
|
+
# Return the mean value of a single attribute (a column from the Instances object)
|
104
|
+
# * *Args* :
|
105
|
+
# - +attribute_name+ -> a String, the name of the attribute
|
106
|
+
def mean(attribute_name)
|
107
|
+
sum = enumerateInstances.inject(0) do |s,x|
|
108
|
+
s+=x.value(attribute(attribute_name))
|
109
|
+
end
|
110
|
+
return sum/(numInstances*1.0)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Return the variance of a single attribute (a column from the Instances object)
|
114
|
+
# * *Args* :
|
115
|
+
# - +attribute_name+ -> a String, the name of the attribute
|
116
|
+
def variance(attribute_name)
|
117
|
+
enumerateAttributes.each_with_idx do |att,idx|
|
118
|
+
return variance(idx) if att.name==attribute_name
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Write the content of the current Instances object to a .csv file
|
123
|
+
# * *Args* :
|
124
|
+
# - +out_file+ -> a String, the name of the output file
|
125
|
+
def to_CSV(out_file)
|
126
|
+
saver = CSVSaver.new
|
127
|
+
saver.setInstances(self)
|
128
|
+
out_file = File.new out_file
|
129
|
+
saver.setFile(out_file);
|
130
|
+
saver.writeBatch();
|
131
|
+
end
|
132
|
+
|
133
|
+
# Write the content of the current Instances object to a .arff file
|
134
|
+
# * *Args* :
|
135
|
+
# - +out_file+ -> a String, the name of the output file
|
136
|
+
def to_ARFF(out_file)
|
137
|
+
saver = ArffSaver.new
|
138
|
+
saver.setInstances(self)
|
139
|
+
out_file = File.new out_file
|
140
|
+
saver.setFile(out_file);
|
141
|
+
saver.writeBatch();
|
142
|
+
end
|
143
|
+
|
144
|
+
def insert_attribute(attribute_value,position)
|
145
|
+
att=attribute_value
|
146
|
+
if self.attribute(position).isNumeric
|
147
|
+
return attribute_value
|
148
|
+
elsif self.attribute(position).isNominal
|
149
|
+
idx = self.attribute(position).indexOfValue(attribute_value)
|
150
|
+
return idx
|
151
|
+
elsif self.attribute(position).isDate
|
152
|
+
date = self.attribute(position).ParseDate(attribute_value)
|
153
|
+
return date
|
154
|
+
else
|
155
|
+
puts 'Attribute type is unknown!'
|
156
|
+
end
|
157
|
+
end
|
158
|
+
private :insert_attribute
|
159
|
+
|
160
|
+
# (check function): should check that the array is bidimensional and that
|
161
|
+
# the lengths are equal
|
162
|
+
def check_array(data)
|
163
|
+
return true
|
164
|
+
end
|
165
|
+
|
166
|
+
# An entire dataset is inserted 'by row' into the current Instances object
|
167
|
+
# i.e. one Instance object is inserted at the time
|
168
|
+
# * *Args* :
|
169
|
+
# - +data+ -> a bidimensional array
|
170
|
+
def populate_by_row(data)
|
171
|
+
unless check_array(data) == false
|
172
|
+
data.each do |row|
|
173
|
+
add_instance(row)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# An Instance instance object (one row) is inserted into the current Instances object
|
179
|
+
# * *Args* :
|
180
|
+
# - +instance+ -> an array of values of the correct data type (:nominal,:numeric,etc...)
|
181
|
+
def add_instance(instance)
|
182
|
+
data_ref=Array.new
|
183
|
+
instance.each_with_index do |attribute,idx|
|
184
|
+
data_ref << insert_attribute(attribute,idx)
|
185
|
+
end
|
186
|
+
double_array = data_ref.to_java :double
|
187
|
+
single_row = Instance.new(1.0, double_array)
|
188
|
+
self.add(single_row)
|
189
|
+
end
|
190
|
+
|
191
|
+
# An Attribute instance object is inserted into the current Instances object
|
192
|
+
# * *Args* :
|
193
|
+
# - +attribute_name+ -> A name for the new attribute
|
194
|
+
# * *WARNING* :
|
195
|
+
# This method only creates an empty attribute field
|
196
|
+
def add_numeric_attribute(attribute_name)
|
197
|
+
insertAttributeAt(Attribute.new(attribute_name), self.numAttributes)
|
198
|
+
end
|
199
|
+
|
200
|
+
# An Attribute instance object is inserted into the current Instances object
|
201
|
+
# * *Args* :
|
202
|
+
# - +attribute_name+ -> A name for the new attribute
|
203
|
+
# - +values+ -> RubyArray with nominal values
|
204
|
+
# * *WARNING* :
|
205
|
+
# This method only creates an empty attribute field
|
206
|
+
def add_nominal_attribute(attribute,list_values)
|
207
|
+
values = FastVector.new
|
208
|
+
list_values.each do |val|
|
209
|
+
values.addElement(val)
|
210
|
+
end
|
211
|
+
insertAttributeAt(Attribute.new(attribute, values), self.numAttributes)
|
212
|
+
end
|
213
|
+
|
214
|
+
#Print to STDOUT the list of the Instances's attributes (with the corresponding types)
|
215
|
+
def summary
|
216
|
+
summary = Ruport::Data::Table::new
|
217
|
+
summary.add_column 'Attributes'
|
218
|
+
enumerateAttributes.each_with_index do |att,idx|
|
219
|
+
summary.add_column idx+1
|
220
|
+
end
|
221
|
+
|
222
|
+
att_names = ['Names']
|
223
|
+
enumerateAttributes.each do |att|
|
224
|
+
att_names << "'#{att.name}'"
|
225
|
+
end
|
226
|
+
summary << att_names
|
227
|
+
|
228
|
+
att_types = ['Types']
|
229
|
+
enumerateAttributes.each do |att|
|
230
|
+
att_types << "Numeric" if att.isNumeric
|
231
|
+
att_types << "Nominal" if att.isNominal
|
232
|
+
att_types << "Date" if att.isDate
|
233
|
+
att_types << "String" if att.isString
|
234
|
+
end
|
235
|
+
summary << att_types
|
236
|
+
|
237
|
+
puts summary
|
238
|
+
|
239
|
+
count=0
|
240
|
+
enumerateInstances.each {|inst| count=count+1}
|
241
|
+
puts "\nNumber of rows: #{count}"
|
242
|
+
end
|
243
|
+
|
244
|
+
# Merges two sets of Instances together. The resulting set will have all the
|
245
|
+
# attributes of the first set plus all the attributes of the second set. The
|
246
|
+
# number of instances in both sets must be the same.
|
247
|
+
# * *Args* :
|
248
|
+
# - +instances+ -> An Instances class object
|
249
|
+
def merge_with(instances)
|
250
|
+
return Instances.mergeInstances(self,instances)
|
251
|
+
end
|
252
|
+
|
253
|
+
# This method creates an Instances object (see Cucumber documentation for further details)
|
254
|
+
# def self.create
|
255
|
+
# name = 'Instances'
|
256
|
+
# instances = Core::Type.create_instances(name,@@positions)
|
257
|
+
# return instances
|
258
|
+
# end
|
259
|
+
|
260
|
+
@@positions = []
|
261
|
+
# This method is used for attributes definition in uninitialized Instances-derived classes
|
262
|
+
def self.att(attr_type,name,*values)
|
263
|
+
att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
|
264
|
+
att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
|
265
|
+
att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
|
266
|
+
att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
|
267
|
+
@@positions << att
|
268
|
+
end
|
269
|
+
|
270
|
+
# This method is used for Nominal attributes definition in uninitialized Instances-derived classes
|
271
|
+
# * *Args* :
|
272
|
+
# - +name+ -> Attribute name, a String
|
273
|
+
# - +values+ -> An array of values for the nominal attribute
|
274
|
+
def self.nominal(name,values)
|
275
|
+
att :nominal, name, values
|
276
|
+
end
|
277
|
+
|
278
|
+
# This method is used for Numeric attributes definition in uninitialized Instances-derived classes
|
279
|
+
# * *Args* :
|
280
|
+
# - +name+ -> Attribute name, a String
|
281
|
+
def self.numeric(name)
|
282
|
+
att :numeric, name
|
283
|
+
end
|
284
|
+
|
285
|
+
# This method is used for Date attributes definition in uninitialized Instances-derived classes
|
286
|
+
# * *Args* :
|
287
|
+
# - +name+ -> Attribute name, a String
|
288
|
+
def self.date(name)
|
289
|
+
att :date, name
|
290
|
+
end
|
291
|
+
|
292
|
+
# This method is used for String attributes definition in uninitialized Instances-derived classes
|
293
|
+
# * *Args* :
|
294
|
+
# - +name+ -> Attribute name, a String
|
295
|
+
def self.string(name)
|
296
|
+
att :string, name
|
297
|
+
end
|
298
|
+
|
299
|
+
# Class used for the creation of a new dataset (Instances class)
|
300
|
+
class Base < Instances
|
301
|
+
def initialize
|
302
|
+
attributes_vector = FastVector.new
|
303
|
+
@@positions.each {|value| attributes_vector.addElement(value)}
|
304
|
+
super('Instances',attributes_vector,0)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
# Return a json String for the current Instances object
|
309
|
+
# The output is modeled on the 'datatable' Google charts APIs
|
310
|
+
# More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
|
311
|
+
def to_json
|
312
|
+
dataset_hash = Hash.new
|
313
|
+
dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
|
314
|
+
dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
|
315
|
+
return JSON.pretty_generate(dataset_hash)
|
316
|
+
end
|
317
|
+
end #Instances class
|
318
|
+
|
319
|
+
# Create an Instances object
|
320
|
+
# * *Args* :
|
321
|
+
# - +name+ -> A name for the Instances object
|
322
|
+
# - +attributes+ -> An array containing Attribute objects
|
323
|
+
def Type.create_instances(name,attributes)
|
324
|
+
attributes_vector = FastVector.new
|
325
|
+
attributes.each {|value| attributes_vector.addElement(value)}
|
326
|
+
return Instances.new(name,attributes_vector,0)
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
|
332
|
+
|