bio-band 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +20 -0
- data/Gemfile.lock +79 -0
- data/Jarfile +9 -0
- data/Jarfile.lock +10 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +54 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/bin/bio-band +83 -0
- data/bio-band.gemspec +129 -0
- data/ext/mkrf_conf.rb +74 -0
- data/features/create_dataset.feature +12 -0
- data/features/step_definitions/create_dataset.rb +40 -0
- data/features/step_definitions/weka_classifiers.rb +42 -0
- data/features/step_definitions/weka_clustering.rb +30 -0
- data/features/step_definitions/weka_filters.rb +29 -0
- data/features/step_definitions/weka_parsers.rb +45 -0
- data/features/support/env.rb +3 -0
- data/features/weka_classifiers.feature +16 -0
- data/features/weka_clustering.feature +14 -0
- data/features/weka_filters.feature +12 -0
- data/features/weka_parsers.feature +18 -0
- data/features/weka_pipeline.feature +13 -0
- data/lib/bio-band.rb +10 -0
- data/lib/bio-band/apache.rb +1 -0
- data/lib/bio-band/apache/stat/inference.rb +145 -0
- data/lib/bio-band/core.rb +6 -0
- data/lib/bio-band/core/parser/parser.rb +23 -0
- data/lib/bio-band/core/type/apache_matrices.rb +35 -0
- data/lib/bio-band/core/type/attribute.rb +53 -0
- data/lib/bio-band/core/type/instance.rb +10 -0
- data/lib/bio-band/core/type/instances.rb +332 -0
- data/lib/bio-band/core/type/utils.rb +31 -0
- data/lib/bio-band/weka.rb +11 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes.rb +75 -0
- data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +42 -0
- data/lib/bio-band/weka/classifiers/evaluation.rb +12 -0
- data/lib/bio-band/weka/classifiers/functions/functions.rb +23 -0
- data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +39 -0
- data/lib/bio-band/weka/classifiers/lazy/lazy.rb +23 -0
- data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +39 -0
- data/lib/bio-band/weka/classifiers/trees/trees.rb +48 -0
- data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +42 -0
- data/lib/bio-band/weka/clusterers/clusterers.rb +32 -0
- data/lib/bio-band/weka/clusterers/clusterers_utils.rb +49 -0
- data/lib/bio-band/weka/db/DatabaseUtils_mysql +280 -0
- data/lib/bio-band/weka/db/DatabaseUtils_postgresql +594 -0
- data/lib/bio-band/weka/db/db.rb +74 -0
- data/lib/bio-band/weka/filters/supervised/attribute/attribute.rb +25 -0
- data/lib/bio-band/weka/filters/supervised/instance/instance.rb +17 -0
- data/lib/bio-band/weka/filters/supervised/supervised_utils.rb +32 -0
- data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +70 -0
- data/lib/bio-band/weka/filters/unsupervised/instance/instance.rb +48 -0
- data/lib/bio-band/weka/filters/unsupervised/unsupervised_utils.rb +33 -0
- data/resources/weather.csv +15 -0
- data/resources/weather.numeric.arff +23 -0
- data/spec/bio-band_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- metadata +302 -0
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Parser
|
5
|
+
# Parse an ARFF file and create an Instances object
|
6
|
+
def Parser.parse_ARFF(arff_file)
|
7
|
+
java_import 'java.io.FileReader'
|
8
|
+
file_in = FileReader.new arff_file
|
9
|
+
data_instance = Core::Type::Instances.new file_in
|
10
|
+
return data_instance
|
11
|
+
end
|
12
|
+
# Parse an CSV file and create an Instances object
|
13
|
+
def Parser.parse_CSV(csv_file)
|
14
|
+
java_import 'weka.core.converters.CSVLoader'
|
15
|
+
java_import 'java.io.File'
|
16
|
+
loader = CSVLoader.new
|
17
|
+
file = File.new csv_file
|
18
|
+
loader.setSource(file)
|
19
|
+
data_instance = loader.getDataSet
|
20
|
+
return data_instance
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Type
|
5
|
+
|
6
|
+
java_import 'org.apache.commons.math3.linear.BlockRealMatrix'
|
7
|
+
java_import 'org.apache.commons.math3.linear.Array2DRowRealMatrix'
|
8
|
+
|
9
|
+
#Define variables to use ruby-like names instead of Java's
|
10
|
+
Apache_matrix = Array2DRowRealMatrix
|
11
|
+
Apache_matrix_block = BlockRealMatrix
|
12
|
+
|
13
|
+
#* *Description* :
|
14
|
+
#Linear algebra support in commons-math provides operations on real matrices (both dense
|
15
|
+
#and sparse matrices are supported) and vectors. It features basic operations (addition, subtraction ...)
|
16
|
+
#and decomposition algorithms that can be used to solve linear systems either in exact sense and
|
17
|
+
#in least squares sense.
|
18
|
+
#The 'Apache_matrix' class represents a matrix with real numbers as entries.
|
19
|
+
#The following basic matrix operations are supported:
|
20
|
+
#- Matrix addition, subtraction, multiplication
|
21
|
+
#- Scalar addition and multiplication
|
22
|
+
#- Transpose
|
23
|
+
#- Norm and trace
|
24
|
+
#- Operation on a vector
|
25
|
+
class Apache_matrix
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
# Apache matrix implementation suited to dimensions above 50 or 100
|
30
|
+
class Apache_matrix_block
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Type
|
5
|
+
|
6
|
+
java_import "weka.core.Attribute"
|
7
|
+
java_import "weka.core.FastVector"
|
8
|
+
|
9
|
+
class Attribute
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
# Return an Numeric Attribute class object
|
14
|
+
# * *Args* :
|
15
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
16
|
+
def self.create_numeric_attr(name_of_attr)
|
17
|
+
numeric = Attribute.new name_of_attr
|
18
|
+
return numeric
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return an Date Attribute class object
|
22
|
+
# * *Args* :
|
23
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
24
|
+
# - +format+ -> The format of the attribute
|
25
|
+
def self.create_date_attr(name_of_attr,format)
|
26
|
+
date = Attribute.new(name_of_attr,format)
|
27
|
+
return date
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return a Nominal Attribute class object
|
31
|
+
# * *Args* :
|
32
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
33
|
+
# - +values_list+ -> An array, the list of nominal values
|
34
|
+
def self.create_nominal_attr(name_of_attr,values_list)
|
35
|
+
labels = FastVector.new
|
36
|
+
values_list.each {|value| labels.addElement(value)}
|
37
|
+
nominal = Attribute.new(name_of_attr,labels)
|
38
|
+
return nominal
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return a String Attribute class object
|
42
|
+
# * *Args* :
|
43
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
44
|
+
def self.create_string_attr(name_of_attr)
|
45
|
+
construct = Attribute.java_class.constructor(Java::java.lang.String,Java::weka.core.FastVector)
|
46
|
+
string = construct.new_instance(name_of_attr,nil).to_java
|
47
|
+
return string
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
|
@@ -0,0 +1,332 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'ruport'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module Core
|
6
|
+
module Type
|
7
|
+
|
8
|
+
java_import "weka.core.Instances"
|
9
|
+
java_import 'java.io.File'
|
10
|
+
java_import 'weka.core.converters.CSVSaver'
|
11
|
+
java_import 'weka.core.converters.ArffSaver'
|
12
|
+
java_import "weka.core.FastVector"
|
13
|
+
java_import "weka.core.Instance"
|
14
|
+
|
15
|
+
#
|
16
|
+
# * *Description* :
|
17
|
+
# This is the main class from the Weka package for data handling. It is essentially a matrix: each row
|
18
|
+
# is an instance of the 'Instance' class, while each column is an instance of the 'Attribute' class
|
19
|
+
# The class 'Instances' is here extended to add custom functionalities
|
20
|
+
class Instances
|
21
|
+
|
22
|
+
# Convert an Instances object to a bidimensional Ruby array
|
23
|
+
# where each row corresponds to an Instance object
|
24
|
+
def to_a2d
|
25
|
+
matrix = Array.new
|
26
|
+
att = Array.new
|
27
|
+
self.enumerateAttributes.each_with_index do |a,idx|
|
28
|
+
if a.isNumeric
|
29
|
+
enumerate_instances.each {|s| att << s.value(s.attribute(idx))}
|
30
|
+
matrix << att
|
31
|
+
att = Array.new
|
32
|
+
else
|
33
|
+
enumerateInstances.each do |inst|
|
34
|
+
att << inst.string_value(idx)
|
35
|
+
end
|
36
|
+
matrix << att
|
37
|
+
att = Array.new
|
38
|
+
end
|
39
|
+
end
|
40
|
+
return matrix.transpose
|
41
|
+
end
|
42
|
+
|
43
|
+
# Return the number of rows (Instance objects) in the dataset
|
44
|
+
def n_rows
|
45
|
+
return numInstances
|
46
|
+
end
|
47
|
+
|
48
|
+
# Return the number of columns (Attribute objects) in the dataset
|
49
|
+
def n_columns
|
50
|
+
return numAttributes
|
51
|
+
end
|
52
|
+
|
53
|
+
# Return the dimensions of the dataset (for the current Instances class object)
|
54
|
+
def dim
|
55
|
+
puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
|
56
|
+
end
|
57
|
+
|
58
|
+
# Check if this instance's attributes are all Numeric
|
59
|
+
def check_numeric_instance
|
60
|
+
enumerateAttributes.each do |att|
|
61
|
+
unless att.isNumeric
|
62
|
+
raise ArgumentError, "Sorry, attribute '#{att.name}' is not numeric!"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Convert the present Instances object to an Apache matrix if every Instances attribute
|
68
|
+
# is Numeric
|
69
|
+
def to_Apache_matrix
|
70
|
+
check_numeric_instance
|
71
|
+
ruby_array = to_a
|
72
|
+
java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
|
73
|
+
return Core::Type::Apache_matrix.new(java_double_array)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Convert the present Instances object to an Apache matrix (block) if every Instances attribute
|
77
|
+
# is Numeric
|
78
|
+
def to_Apache_matrix_block
|
79
|
+
check_numeric_instance
|
80
|
+
ruby_array = to_a
|
81
|
+
java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
|
82
|
+
return Core::Type::Apache_matrix_block.new(java_double_array)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Return data for a single attribute (a column from the Instances object)
|
86
|
+
# * *Args* :
|
87
|
+
# - +att+ -> a String, the name of the attribute
|
88
|
+
def return_attr_data(att)
|
89
|
+
attr_values = Array.new
|
90
|
+
if attribute(att).isNumeric
|
91
|
+
enumerateInstances.each do |i|
|
92
|
+
attr_values << i.value(attribute(att))
|
93
|
+
end
|
94
|
+
else
|
95
|
+
attr_index = attribute(att).index
|
96
|
+
enumerateInstances.each do |inst|
|
97
|
+
attr_values << inst.string_value(attr_index)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
return attr_values
|
101
|
+
end
|
102
|
+
|
103
|
+
# Return the mean value of a single attribute (a column from the Instances object)
|
104
|
+
# * *Args* :
|
105
|
+
# - +attribute_name+ -> a String, the name of the attribute
|
106
|
+
def mean(attribute_name)
|
107
|
+
sum = enumerateInstances.inject(0) do |s,x|
|
108
|
+
s+=x.value(attribute(attribute_name))
|
109
|
+
end
|
110
|
+
return sum/(numInstances*1.0)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Return the variance of a single attribute (a column from the Instances object)
|
114
|
+
# * *Args* :
|
115
|
+
# - +attribute_name+ -> a String, the name of the attribute
|
116
|
+
def variance(attribute_name)
|
117
|
+
enumerateAttributes.each_with_idx do |att,idx|
|
118
|
+
return variance(idx) if att.name==attribute_name
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Write the content of the current Instances object to a .csv file
|
123
|
+
# * *Args* :
|
124
|
+
# - +out_file+ -> a String, the name of the output file
|
125
|
+
def to_CSV(out_file)
|
126
|
+
saver = CSVSaver.new
|
127
|
+
saver.setInstances(self)
|
128
|
+
out_file = File.new out_file
|
129
|
+
saver.setFile(out_file);
|
130
|
+
saver.writeBatch();
|
131
|
+
end
|
132
|
+
|
133
|
+
# Write the content of the current Instances object to a .arff file
|
134
|
+
# * *Args* :
|
135
|
+
# - +out_file+ -> a String, the name of the output file
|
136
|
+
def to_ARFF(out_file)
|
137
|
+
saver = ArffSaver.new
|
138
|
+
saver.setInstances(self)
|
139
|
+
out_file = File.new out_file
|
140
|
+
saver.setFile(out_file);
|
141
|
+
saver.writeBatch();
|
142
|
+
end
|
143
|
+
|
144
|
+
def insert_attribute(attribute_value,position)
|
145
|
+
att=attribute_value
|
146
|
+
if self.attribute(position).isNumeric
|
147
|
+
return attribute_value
|
148
|
+
elsif self.attribute(position).isNominal
|
149
|
+
idx = self.attribute(position).indexOfValue(attribute_value)
|
150
|
+
return idx
|
151
|
+
elsif self.attribute(position).isDate
|
152
|
+
date = self.attribute(position).ParseDate(attribute_value)
|
153
|
+
return date
|
154
|
+
else
|
155
|
+
puts 'Attribute type is unknown!'
|
156
|
+
end
|
157
|
+
end
|
158
|
+
private :insert_attribute
|
159
|
+
|
160
|
+
# (check function): should check that the array is bidimensional and that
|
161
|
+
# the lengths are equal
|
162
|
+
def check_array(data)
|
163
|
+
return true
|
164
|
+
end
|
165
|
+
|
166
|
+
# An entire dataset is inserted 'by row' into the current Instances object
|
167
|
+
# i.e. one Instance object is inserted at the time
|
168
|
+
# * *Args* :
|
169
|
+
# - +data+ -> a bidimensional array
|
170
|
+
def populate_by_row(data)
|
171
|
+
unless check_array(data) == false
|
172
|
+
data.each do |row|
|
173
|
+
add_instance(row)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# An Instance instance object (one row) is inserted into the current Instances object
|
179
|
+
# * *Args* :
|
180
|
+
# - +instance+ -> an array of values of the correct data type (:nominal,:numeric,etc...)
|
181
|
+
def add_instance(instance)
|
182
|
+
data_ref=Array.new
|
183
|
+
instance.each_with_index do |attribute,idx|
|
184
|
+
data_ref << insert_attribute(attribute,idx)
|
185
|
+
end
|
186
|
+
double_array = data_ref.to_java :double
|
187
|
+
single_row = Instance.new(1.0, double_array)
|
188
|
+
self.add(single_row)
|
189
|
+
end
|
190
|
+
|
191
|
+
# An Attribute instance object is inserted into the current Instances object
|
192
|
+
# * *Args* :
|
193
|
+
# - +attribute_name+ -> A name for the new attribute
|
194
|
+
# * *WARNING* :
|
195
|
+
# This method only creates an empty attribute field
|
196
|
+
def add_numeric_attribute(attribute_name)
|
197
|
+
insertAttributeAt(Attribute.new(attribute_name), self.numAttributes)
|
198
|
+
end
|
199
|
+
|
200
|
+
# An Attribute instance object is inserted into the current Instances object
|
201
|
+
# * *Args* :
|
202
|
+
# - +attribute_name+ -> A name for the new attribute
|
203
|
+
# - +values+ -> RubyArray with nominal values
|
204
|
+
# * *WARNING* :
|
205
|
+
# This method only creates an empty attribute field
|
206
|
+
def add_nominal_attribute(attribute,list_values)
|
207
|
+
values = FastVector.new
|
208
|
+
list_values.each do |val|
|
209
|
+
values.addElement(val)
|
210
|
+
end
|
211
|
+
insertAttributeAt(Attribute.new(attribute, values), self.numAttributes)
|
212
|
+
end
|
213
|
+
|
214
|
+
#Print to STDOUT the list of the Instances's attributes (with the corresponding types)
|
215
|
+
def summary
|
216
|
+
summary = Ruport::Data::Table::new
|
217
|
+
summary.add_column 'Attributes'
|
218
|
+
enumerateAttributes.each_with_index do |att,idx|
|
219
|
+
summary.add_column idx+1
|
220
|
+
end
|
221
|
+
|
222
|
+
att_names = ['Names']
|
223
|
+
enumerateAttributes.each do |att|
|
224
|
+
att_names << "'#{att.name}'"
|
225
|
+
end
|
226
|
+
summary << att_names
|
227
|
+
|
228
|
+
att_types = ['Types']
|
229
|
+
enumerateAttributes.each do |att|
|
230
|
+
att_types << "Numeric" if att.isNumeric
|
231
|
+
att_types << "Nominal" if att.isNominal
|
232
|
+
att_types << "Date" if att.isDate
|
233
|
+
att_types << "String" if att.isString
|
234
|
+
end
|
235
|
+
summary << att_types
|
236
|
+
|
237
|
+
puts summary
|
238
|
+
|
239
|
+
count=0
|
240
|
+
enumerateInstances.each {|inst| count=count+1}
|
241
|
+
puts "\nNumber of rows: #{count}"
|
242
|
+
end
|
243
|
+
|
244
|
+
# Merges two sets of Instances together. The resulting set will have all the
|
245
|
+
# attributes of the first set plus all the attributes of the second set. The
|
246
|
+
# number of instances in both sets must be the same.
|
247
|
+
# * *Args* :
|
248
|
+
# - +instances+ -> An Instances class object
|
249
|
+
def merge_with(instances)
|
250
|
+
return Instances.mergeInstances(self,instances)
|
251
|
+
end
|
252
|
+
|
253
|
+
# This method creates an Instances object (see Cucumber documentation for further details)
|
254
|
+
# def self.create
|
255
|
+
# name = 'Instances'
|
256
|
+
# instances = Core::Type.create_instances(name,@@positions)
|
257
|
+
# return instances
|
258
|
+
# end
|
259
|
+
|
260
|
+
@@positions = []
|
261
|
+
# This method is used for attributes definition in uninitialized Instances-derived classes
|
262
|
+
def self.att(attr_type,name,*values)
|
263
|
+
att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
|
264
|
+
att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
|
265
|
+
att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
|
266
|
+
att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
|
267
|
+
@@positions << att
|
268
|
+
end
|
269
|
+
|
270
|
+
# This method is used for Nominal attributes definition in uninitialized Instances-derived classes
|
271
|
+
# * *Args* :
|
272
|
+
# - +name+ -> Attribute name, a String
|
273
|
+
# - +values+ -> An array of values for the nominal attribute
|
274
|
+
def self.nominal(name,values)
|
275
|
+
att :nominal, name, values
|
276
|
+
end
|
277
|
+
|
278
|
+
# This method is used for Numeric attributes definition in uninitialized Instances-derived classes
|
279
|
+
# * *Args* :
|
280
|
+
# - +name+ -> Attribute name, a String
|
281
|
+
def self.numeric(name)
|
282
|
+
att :numeric, name
|
283
|
+
end
|
284
|
+
|
285
|
+
# This method is used for Date attributes definition in uninitialized Instances-derived classes
|
286
|
+
# * *Args* :
|
287
|
+
# - +name+ -> Attribute name, a String
|
288
|
+
def self.date(name)
|
289
|
+
att :date, name
|
290
|
+
end
|
291
|
+
|
292
|
+
# This method is used for String attributes definition in uninitialized Instances-derived classes
|
293
|
+
# * *Args* :
|
294
|
+
# - +name+ -> Attribute name, a String
|
295
|
+
def self.string(name)
|
296
|
+
att :string, name
|
297
|
+
end
|
298
|
+
|
299
|
+
# Class used for the creation of a new dataset (Instances class)
|
300
|
+
class Base < Instances
|
301
|
+
def initialize
|
302
|
+
attributes_vector = FastVector.new
|
303
|
+
@@positions.each {|value| attributes_vector.addElement(value)}
|
304
|
+
super('Instances',attributes_vector,0)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
# Return a json String for the current Instances object
|
309
|
+
# The output is modeled on the 'datatable' Google charts APIs
|
310
|
+
# More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
|
311
|
+
def to_json
|
312
|
+
dataset_hash = Hash.new
|
313
|
+
dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
|
314
|
+
dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
|
315
|
+
return JSON.pretty_generate(dataset_hash)
|
316
|
+
end
|
317
|
+
end #Instances class
|
318
|
+
|
319
|
+
# Create an Instances object
|
320
|
+
# * *Args* :
|
321
|
+
# - +name+ -> A name for the Instances object
|
322
|
+
# - +attributes+ -> An array containing Attribute objects
|
323
|
+
def Type.create_instances(name,attributes)
|
324
|
+
attributes_vector = FastVector.new
|
325
|
+
attributes.each {|value| attributes_vector.addElement(value)}
|
326
|
+
return Instances.new(name,attributes_vector,0)
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
|
332
|
+
|