bio-band 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/Gemfile +20 -0
  2. data/Gemfile.lock +79 -0
  3. data/Jarfile +9 -0
  4. data/Jarfile.lock +10 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +54 -0
  7. data/Rakefile +54 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-band +83 -0
  10. data/bio-band.gemspec +129 -0
  11. data/ext/mkrf_conf.rb +74 -0
  12. data/features/create_dataset.feature +12 -0
  13. data/features/step_definitions/create_dataset.rb +40 -0
  14. data/features/step_definitions/weka_classifiers.rb +42 -0
  15. data/features/step_definitions/weka_clustering.rb +30 -0
  16. data/features/step_definitions/weka_filters.rb +29 -0
  17. data/features/step_definitions/weka_parsers.rb +45 -0
  18. data/features/support/env.rb +3 -0
  19. data/features/weka_classifiers.feature +16 -0
  20. data/features/weka_clustering.feature +14 -0
  21. data/features/weka_filters.feature +12 -0
  22. data/features/weka_parsers.feature +18 -0
  23. data/features/weka_pipeline.feature +13 -0
  24. data/lib/bio-band.rb +10 -0
  25. data/lib/bio-band/apache.rb +1 -0
  26. data/lib/bio-band/apache/stat/inference.rb +145 -0
  27. data/lib/bio-band/core.rb +6 -0
  28. data/lib/bio-band/core/parser/parser.rb +23 -0
  29. data/lib/bio-band/core/type/apache_matrices.rb +35 -0
  30. data/lib/bio-band/core/type/attribute.rb +53 -0
  31. data/lib/bio-band/core/type/instance.rb +10 -0
  32. data/lib/bio-band/core/type/instances.rb +332 -0
  33. data/lib/bio-band/core/type/utils.rb +31 -0
  34. data/lib/bio-band/weka.rb +11 -0
  35. data/lib/bio-band/weka/classifiers/bayes/bayes.rb +75 -0
  36. data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +42 -0
  37. data/lib/bio-band/weka/classifiers/evaluation.rb +12 -0
  38. data/lib/bio-band/weka/classifiers/functions/functions.rb +23 -0
  39. data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +39 -0
  40. data/lib/bio-band/weka/classifiers/lazy/lazy.rb +23 -0
  41. data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +39 -0
  42. data/lib/bio-band/weka/classifiers/trees/trees.rb +48 -0
  43. data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +42 -0
  44. data/lib/bio-band/weka/clusterers/clusterers.rb +32 -0
  45. data/lib/bio-band/weka/clusterers/clusterers_utils.rb +49 -0
  46. data/lib/bio-band/weka/db/DatabaseUtils_mysql +280 -0
  47. data/lib/bio-band/weka/db/DatabaseUtils_postgresql +594 -0
  48. data/lib/bio-band/weka/db/db.rb +74 -0
  49. data/lib/bio-band/weka/filters/supervised/attribute/attribute.rb +25 -0
  50. data/lib/bio-band/weka/filters/supervised/instance/instance.rb +17 -0
  51. data/lib/bio-band/weka/filters/supervised/supervised_utils.rb +32 -0
  52. data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +70 -0
  53. data/lib/bio-band/weka/filters/unsupervised/instance/instance.rb +48 -0
  54. data/lib/bio-band/weka/filters/unsupervised/unsupervised_utils.rb +33 -0
  55. data/resources/weather.csv +15 -0
  56. data/resources/weather.numeric.arff +23 -0
  57. data/spec/bio-band_spec.rb +7 -0
  58. data/spec/spec_helper.rb +12 -0
  59. metadata +302 -0
@@ -0,0 +1,6 @@
1
+ require 'bio-band/core/type/instances'
2
+ require 'bio-band/core/type/instance'
3
+ require 'bio-band/core/parser/parser'
4
+ require 'bio-band/core/type/utils'
5
+ require 'bio-band/core/type/attribute'
6
+ require 'bio-band/core/type/apache_matrices'
@@ -0,0 +1,23 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Parser
5
+ # Parse an ARFF file and create an Instances object
6
+ def Parser.parse_ARFF(arff_file)
7
+ java_import 'java.io.FileReader'
8
+ file_in = FileReader.new arff_file
9
+ data_instance = Core::Type::Instances.new file_in
10
+ return data_instance
11
+ end
12
+ # Parse an CSV file and create an Instances object
13
+ def Parser.parse_CSV(csv_file)
14
+ java_import 'weka.core.converters.CSVLoader'
15
+ java_import 'java.io.File'
16
+ loader = CSVLoader.new
17
+ file = File.new csv_file
18
+ loader.setSource(file)
19
+ data_instance = loader.getDataSet
20
+ return data_instance
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,35 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Type
5
+
6
+ java_import 'org.apache.commons.math3.linear.BlockRealMatrix'
7
+ java_import 'org.apache.commons.math3.linear.Array2DRowRealMatrix'
8
+
9
+ #Define variables to use ruby-like names instead of Java's
10
+ Apache_matrix = Array2DRowRealMatrix
11
+ Apache_matrix_block = BlockRealMatrix
12
+
13
+ #* *Description* :
14
+ #Linear algebra support in commons-math provides operations on real matrices (both dense
15
+ #and sparse matrices are supported) and vectors. It features basic operations (addition, subtraction ...)
16
+ #and decomposition algorithms that can be used to solve linear systems either in exact sense and
17
+ #in least squares sense.
18
+ #The 'Apache_matrix' class represents a matrix with real numbers as entries.
19
+ #The following basic matrix operations are supported:
20
+ #- Matrix addition, subtraction, multiplication
21
+ #- Scalar addition and multiplication
22
+ #- Transpose
23
+ #- Norm and trace
24
+ #- Operation on a vector
25
+ class Apache_matrix
26
+
27
+ end
28
+
29
+ # Apache matrix implementation suited to dimensions above 50 or 100
30
+ class Apache_matrix_block
31
+
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,53 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Type
5
+
6
+ java_import "weka.core.Attribute"
7
+ java_import "weka.core.FastVector"
8
+
9
+ class Attribute
10
+
11
+ end
12
+
13
+ # Return an Numeric Attribute class object
14
+ # * *Args* :
15
+ # - +name_of_attr+ -> a String, the name of the attribute
16
+ def self.create_numeric_attr(name_of_attr)
17
+ numeric = Attribute.new name_of_attr
18
+ return numeric
19
+ end
20
+
21
+ # Return an Date Attribute class object
22
+ # * *Args* :
23
+ # - +name_of_attr+ -> a String, the name of the attribute
24
+ # - +format+ -> The format of the attribute
25
+ def self.create_date_attr(name_of_attr,format)
26
+ date = Attribute.new(name_of_attr,format)
27
+ return date
28
+ end
29
+
30
+ # Return a Nominal Attribute class object
31
+ # * *Args* :
32
+ # - +name_of_attr+ -> a String, the name of the attribute
33
+ # - +values_list+ -> An array, the list of nominal values
34
+ def self.create_nominal_attr(name_of_attr,values_list)
35
+ labels = FastVector.new
36
+ values_list.each {|value| labels.addElement(value)}
37
+ nominal = Attribute.new(name_of_attr,labels)
38
+ return nominal
39
+ end
40
+
41
+ # Return a String Attribute class object
42
+ # * *Args* :
43
+ # - +name_of_attr+ -> a String, the name of the attribute
44
+ def self.create_string_attr(name_of_attr)
45
+ construct = Attribute.java_class.constructor(Java::java.lang.String,Java::weka.core.FastVector)
46
+ string = construct.new_instance(name_of_attr,nil).to_java
47
+ return string
48
+ end
49
+ end
50
+
51
+ end
52
+
53
+
@@ -0,0 +1,10 @@
1
+ module Core
2
+ module Type
3
+
4
+ java_import "weka.core.FastVector"
5
+ java_import "weka.core.Instance"
6
+
7
+ class Instance
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,332 @@
1
+ require 'java'
2
+ require 'ruport'
3
+ require 'json'
4
+
5
+ module Core
6
+ module Type
7
+
8
+ java_import "weka.core.Instances"
9
+ java_import 'java.io.File'
10
+ java_import 'weka.core.converters.CSVSaver'
11
+ java_import 'weka.core.converters.ArffSaver'
12
+ java_import "weka.core.FastVector"
13
+ java_import "weka.core.Instance"
14
+
15
+ #
16
+ # * *Description* :
17
+ # This is the main class from the Weka package for data handling. It is essentially a matrix: each row
18
+ # is an instance of the 'Instance' class, while each column is an instance of the 'Attribute' class
19
+ # The class 'Instances' is here extended to add custom functionalities
20
+ class Instances
21
+
22
+ # Convert an Instances object to a bidimensional Ruby array
23
+ # where each row corresponds to an Instance object
24
+ def to_a2d
25
+ matrix = Array.new
26
+ att = Array.new
27
+ self.enumerateAttributes.each_with_index do |a,idx|
28
+ if a.isNumeric
29
+ enumerate_instances.each {|s| att << s.value(s.attribute(idx))}
30
+ matrix << att
31
+ att = Array.new
32
+ else
33
+ enumerateInstances.each do |inst|
34
+ att << inst.string_value(idx)
35
+ end
36
+ matrix << att
37
+ att = Array.new
38
+ end
39
+ end
40
+ return matrix.transpose
41
+ end
42
+
43
+ # Return the number of rows (Instance objects) in the dataset
44
+ def n_rows
45
+ return numInstances
46
+ end
47
+
48
+ # Return the number of columns (Attribute objects) in the dataset
49
+ def n_columns
50
+ return numAttributes
51
+ end
52
+
53
+ # Return the dimensions of the dataset (for the current Instances class object)
54
+ def dim
55
+ puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
56
+ end
57
+
58
+ # Check if this instance's attributes are all Numeric
59
+ def check_numeric_instance
60
+ enumerateAttributes.each do |att|
61
+ unless att.isNumeric
62
+ raise ArgumentError, "Sorry, attribute '#{att.name}' is not numeric!"
63
+ end
64
+ end
65
+ end
66
+
67
+ # Convert the present Instances object to an Apache matrix if every Instances attribute
68
+ # is Numeric
69
+ def to_Apache_matrix
70
+ check_numeric_instance
71
+ ruby_array = to_a
72
+ java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
73
+ return Core::Type::Apache_matrix.new(java_double_array)
74
+ end
75
+
76
+ # Convert the present Instances object to an Apache matrix (block) if every Instances attribute
77
+ # is Numeric
78
+ def to_Apache_matrix_block
79
+ check_numeric_instance
80
+ ruby_array = to_a
81
+ java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
82
+ return Core::Type::Apache_matrix_block.new(java_double_array)
83
+ end
84
+
85
+ # Return data for a single attribute (a column from the Instances object)
86
+ # * *Args* :
87
+ # - +att+ -> a String, the name of the attribute
88
+ def return_attr_data(att)
89
+ attr_values = Array.new
90
+ if attribute(att).isNumeric
91
+ enumerateInstances.each do |i|
92
+ attr_values << i.value(attribute(att))
93
+ end
94
+ else
95
+ attr_index = attribute(att).index
96
+ enumerateInstances.each do |inst|
97
+ attr_values << inst.string_value(attr_index)
98
+ end
99
+ end
100
+ return attr_values
101
+ end
102
+
103
+ # Return the mean value of a single attribute (a column from the Instances object)
104
+ # * *Args* :
105
+ # - +attribute_name+ -> a String, the name of the attribute
106
+ def mean(attribute_name)
107
+ sum = enumerateInstances.inject(0) do |s,x|
108
+ s+=x.value(attribute(attribute_name))
109
+ end
110
+ return sum/(numInstances*1.0)
111
+ end
112
+
113
+ # Return the variance of a single attribute (a column from the Instances object)
114
+ # * *Args* :
115
+ # - +attribute_name+ -> a String, the name of the attribute
116
+ def variance(attribute_name)
117
+ enumerateAttributes.each_with_idx do |att,idx|
118
+ return variance(idx) if att.name==attribute_name
119
+ end
120
+ end
121
+
122
+ # Write the content of the current Instances object to a .csv file
123
+ # * *Args* :
124
+ # - +out_file+ -> a String, the name of the output file
125
+ def to_CSV(out_file)
126
+ saver = CSVSaver.new
127
+ saver.setInstances(self)
128
+ out_file = File.new out_file
129
+ saver.setFile(out_file);
130
+ saver.writeBatch();
131
+ end
132
+
133
+ # Write the content of the current Instances object to a .arff file
134
+ # * *Args* :
135
+ # - +out_file+ -> a String, the name of the output file
136
+ def to_ARFF(out_file)
137
+ saver = ArffSaver.new
138
+ saver.setInstances(self)
139
+ out_file = File.new out_file
140
+ saver.setFile(out_file);
141
+ saver.writeBatch();
142
+ end
143
+
144
+ def insert_attribute(attribute_value,position)
145
+ att=attribute_value
146
+ if self.attribute(position).isNumeric
147
+ return attribute_value
148
+ elsif self.attribute(position).isNominal
149
+ idx = self.attribute(position).indexOfValue(attribute_value)
150
+ return idx
151
+ elsif self.attribute(position).isDate
152
+ date = self.attribute(position).ParseDate(attribute_value)
153
+ return date
154
+ else
155
+ puts 'Attribute type is unknown!'
156
+ end
157
+ end
158
+ private :insert_attribute
159
+
160
+ # (check function): should check that the array is bidimensional and that
161
+ # the lengths are equal
162
+ def check_array(data)
163
+ return true
164
+ end
165
+
166
+ # An entire dataset is inserted 'by row' into the current Instances object
167
+ # i.e. one Instance object is inserted at the time
168
+ # * *Args* :
169
+ # - +data+ -> a bidimensional array
170
+ def populate_by_row(data)
171
+ unless check_array(data) == false
172
+ data.each do |row|
173
+ add_instance(row)
174
+ end
175
+ end
176
+ end
177
+
178
+ # An Instance instance object (one row) is inserted into the current Instances object
179
+ # * *Args* :
180
+ # - +instance+ -> an array of values of the correct data type (:nominal,:numeric,etc...)
181
+ def add_instance(instance)
182
+ data_ref=Array.new
183
+ instance.each_with_index do |attribute,idx|
184
+ data_ref << insert_attribute(attribute,idx)
185
+ end
186
+ double_array = data_ref.to_java :double
187
+ single_row = Instance.new(1.0, double_array)
188
+ self.add(single_row)
189
+ end
190
+
191
+ # An Attribute instance object is inserted into the current Instances object
192
+ # * *Args* :
193
+ # - +attribute_name+ -> A name for the new attribute
194
+ # * *WARNING* :
195
+ # This method only creates an empty attribute field
196
+ def add_numeric_attribute(attribute_name)
197
+ insertAttributeAt(Attribute.new(attribute_name), self.numAttributes)
198
+ end
199
+
200
+ # An Attribute instance object is inserted into the current Instances object
201
+ # * *Args* :
202
+ # - +attribute_name+ -> A name for the new attribute
203
+ # - +values+ -> RubyArray with nominal values
204
+ # * *WARNING* :
205
+ # This method only creates an empty attribute field
206
+ def add_nominal_attribute(attribute,list_values)
207
+ values = FastVector.new
208
+ list_values.each do |val|
209
+ values.addElement(val)
210
+ end
211
+ insertAttributeAt(Attribute.new(attribute, values), self.numAttributes)
212
+ end
213
+
214
+ #Print to STDOUT the list of the Instances's attributes (with the corresponding types)
215
+ def summary
216
+ summary = Ruport::Data::Table::new
217
+ summary.add_column 'Attributes'
218
+ enumerateAttributes.each_with_index do |att,idx|
219
+ summary.add_column idx+1
220
+ end
221
+
222
+ att_names = ['Names']
223
+ enumerateAttributes.each do |att|
224
+ att_names << "'#{att.name}'"
225
+ end
226
+ summary << att_names
227
+
228
+ att_types = ['Types']
229
+ enumerateAttributes.each do |att|
230
+ att_types << "Numeric" if att.isNumeric
231
+ att_types << "Nominal" if att.isNominal
232
+ att_types << "Date" if att.isDate
233
+ att_types << "String" if att.isString
234
+ end
235
+ summary << att_types
236
+
237
+ puts summary
238
+
239
+ count=0
240
+ enumerateInstances.each {|inst| count=count+1}
241
+ puts "\nNumber of rows: #{count}"
242
+ end
243
+
244
+ # Merges two sets of Instances together. The resulting set will have all the
245
+ # attributes of the first set plus all the attributes of the second set. The
246
+ # number of instances in both sets must be the same.
247
+ # * *Args* :
248
+ # - +instances+ -> An Instances class object
249
+ def merge_with(instances)
250
+ return Instances.mergeInstances(self,instances)
251
+ end
252
+
253
+ # This method creates an Instances object (see Cucumber documentation for further details)
254
+ # def self.create
255
+ # name = 'Instances'
256
+ # instances = Core::Type.create_instances(name,@@positions)
257
+ # return instances
258
+ # end
259
+
260
+ @@positions = []
261
+ # This method is used for attributes definition in uninitialized Instances-derived classes
262
+ def self.att(attr_type,name,*values)
263
+ att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
264
+ att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
265
+ att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
266
+ att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
267
+ @@positions << att
268
+ end
269
+
270
+ # This method is used for Nominal attributes definition in uninitialized Instances-derived classes
271
+ # * *Args* :
272
+ # - +name+ -> Attribute name, a String
273
+ # - +values+ -> An array of values for the nominal attribute
274
+ def self.nominal(name,values)
275
+ att :nominal, name, values
276
+ end
277
+
278
+ # This method is used for Numeric attributes definition in uninitialized Instances-derived classes
279
+ # * *Args* :
280
+ # - +name+ -> Attribute name, a String
281
+ def self.numeric(name)
282
+ att :numeric, name
283
+ end
284
+
285
+ # This method is used for Date attributes definition in uninitialized Instances-derived classes
286
+ # * *Args* :
287
+ # - +name+ -> Attribute name, a String
288
+ def self.date(name)
289
+ att :date, name
290
+ end
291
+
292
+ # This method is used for String attributes definition in uninitialized Instances-derived classes
293
+ # * *Args* :
294
+ # - +name+ -> Attribute name, a String
295
+ def self.string(name)
296
+ att :string, name
297
+ end
298
+
299
+ # Class used for the creation of a new dataset (Instances class)
300
+ class Base < Instances
301
+ def initialize
302
+ attributes_vector = FastVector.new
303
+ @@positions.each {|value| attributes_vector.addElement(value)}
304
+ super('Instances',attributes_vector,0)
305
+ end
306
+ end
307
+
308
+ # Return a json String for the current Instances object
309
+ # The output is modeled on the 'datatable' Google charts APIs
310
+ # More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
311
+ def to_json
312
+ dataset_hash = Hash.new
313
+ dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
314
+ dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
315
+ return JSON.pretty_generate(dataset_hash)
316
+ end
317
+ end #Instances class
318
+
319
+ # Create an Instances object
320
+ # * *Args* :
321
+ # - +name+ -> A name for the Instances object
322
+ # - +attributes+ -> An array containing Attribute objects
323
+ def Type.create_instances(name,attributes)
324
+ attributes_vector = FastVector.new
325
+ attributes.each {|value| attributes_vector.addElement(value)}
326
+ return Instances.new(name,attributes_vector,0)
327
+ end
328
+ end
329
+ end
330
+
331
+
332
+