bio-band 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/Gemfile +20 -0
  2. data/Gemfile.lock +79 -0
  3. data/Jarfile +9 -0
  4. data/Jarfile.lock +10 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.rdoc +54 -0
  7. data/Rakefile +54 -0
  8. data/VERSION +1 -0
  9. data/bin/bio-band +83 -0
  10. data/bio-band.gemspec +129 -0
  11. data/ext/mkrf_conf.rb +74 -0
  12. data/features/create_dataset.feature +12 -0
  13. data/features/step_definitions/create_dataset.rb +40 -0
  14. data/features/step_definitions/weka_classifiers.rb +42 -0
  15. data/features/step_definitions/weka_clustering.rb +30 -0
  16. data/features/step_definitions/weka_filters.rb +29 -0
  17. data/features/step_definitions/weka_parsers.rb +45 -0
  18. data/features/support/env.rb +3 -0
  19. data/features/weka_classifiers.feature +16 -0
  20. data/features/weka_clustering.feature +14 -0
  21. data/features/weka_filters.feature +12 -0
  22. data/features/weka_parsers.feature +18 -0
  23. data/features/weka_pipeline.feature +13 -0
  24. data/lib/bio-band.rb +10 -0
  25. data/lib/bio-band/apache.rb +1 -0
  26. data/lib/bio-band/apache/stat/inference.rb +145 -0
  27. data/lib/bio-band/core.rb +6 -0
  28. data/lib/bio-band/core/parser/parser.rb +23 -0
  29. data/lib/bio-band/core/type/apache_matrices.rb +35 -0
  30. data/lib/bio-band/core/type/attribute.rb +53 -0
  31. data/lib/bio-band/core/type/instance.rb +10 -0
  32. data/lib/bio-band/core/type/instances.rb +332 -0
  33. data/lib/bio-band/core/type/utils.rb +31 -0
  34. data/lib/bio-band/weka.rb +11 -0
  35. data/lib/bio-band/weka/classifiers/bayes/bayes.rb +75 -0
  36. data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +42 -0
  37. data/lib/bio-band/weka/classifiers/evaluation.rb +12 -0
  38. data/lib/bio-band/weka/classifiers/functions/functions.rb +23 -0
  39. data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +39 -0
  40. data/lib/bio-band/weka/classifiers/lazy/lazy.rb +23 -0
  41. data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +39 -0
  42. data/lib/bio-band/weka/classifiers/trees/trees.rb +48 -0
  43. data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +42 -0
  44. data/lib/bio-band/weka/clusterers/clusterers.rb +32 -0
  45. data/lib/bio-band/weka/clusterers/clusterers_utils.rb +49 -0
  46. data/lib/bio-band/weka/db/DatabaseUtils_mysql +280 -0
  47. data/lib/bio-band/weka/db/DatabaseUtils_postgresql +594 -0
  48. data/lib/bio-band/weka/db/db.rb +74 -0
  49. data/lib/bio-band/weka/filters/supervised/attribute/attribute.rb +25 -0
  50. data/lib/bio-band/weka/filters/supervised/instance/instance.rb +17 -0
  51. data/lib/bio-band/weka/filters/supervised/supervised_utils.rb +32 -0
  52. data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +70 -0
  53. data/lib/bio-band/weka/filters/unsupervised/instance/instance.rb +48 -0
  54. data/lib/bio-band/weka/filters/unsupervised/unsupervised_utils.rb +33 -0
  55. data/resources/weather.csv +15 -0
  56. data/resources/weather.numeric.arff +23 -0
  57. data/spec/bio-band_spec.rb +7 -0
  58. data/spec/spec_helper.rb +12 -0
  59. metadata +302 -0
@@ -0,0 +1,6 @@
1
+ require 'bio-band/core/type/instances'
2
+ require 'bio-band/core/type/instance'
3
+ require 'bio-band/core/parser/parser'
4
+ require 'bio-band/core/type/utils'
5
+ require 'bio-band/core/type/attribute'
6
+ require 'bio-band/core/type/apache_matrices'
@@ -0,0 +1,23 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Parser
5
+ # Parse an ARFF file and create an Instances object
6
+ def Parser.parse_ARFF(arff_file)
7
+ java_import 'java.io.FileReader'
8
+ file_in = FileReader.new arff_file
9
+ data_instance = Core::Type::Instances.new file_in
10
+ return data_instance
11
+ end
12
+ # Parse an CSV file and create an Instances object
13
+ def Parser.parse_CSV(csv_file)
14
+ java_import 'weka.core.converters.CSVLoader'
15
+ java_import 'java.io.File'
16
+ loader = CSVLoader.new
17
+ file = File.new csv_file
18
+ loader.setSource(file)
19
+ data_instance = loader.getDataSet
20
+ return data_instance
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,35 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Type
5
+
6
+ java_import 'org.apache.commons.math3.linear.BlockRealMatrix'
7
+ java_import 'org.apache.commons.math3.linear.Array2DRowRealMatrix'
8
+
9
+ #Define variables to use ruby-like names instead of Java's
10
+ Apache_matrix = Array2DRowRealMatrix
11
+ Apache_matrix_block = BlockRealMatrix
12
+
13
+ #* *Description* :
14
+ #Linear algebra support in commons-math provides operations on real matrices (both dense
15
+ #and sparse matrices are supported) and vectors. It features basic operations (addition, subtraction ...)
16
+ #and decomposition algorithms that can be used to solve linear systems either in exact sense and
17
+ #in least squares sense.
18
+ #The 'Apache_matrix' class represents a matrix with real numbers as entries.
19
+ #The following basic matrix operations are supported:
20
+ #- Matrix addition, subtraction, multiplication
21
+ #- Scalar addition and multiplication
22
+ #- Transpose
23
+ #- Norm and trace
24
+ #- Operation on a vector
25
+ class Apache_matrix
26
+
27
+ end
28
+
29
+ # Apache matrix implementation suited to dimensions above 50 or 100
30
+ class Apache_matrix_block
31
+
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,53 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Type
5
+
6
+ java_import "weka.core.Attribute"
7
+ java_import "weka.core.FastVector"
8
+
9
+ class Attribute
10
+
11
+ end
12
+
13
+ # Return an Numeric Attribute class object
14
+ # * *Args* :
15
+ # - +name_of_attr+ -> a String, the name of the attribute
16
+ def self.create_numeric_attr(name_of_attr)
17
+ numeric = Attribute.new name_of_attr
18
+ return numeric
19
+ end
20
+
21
+ # Return an Date Attribute class object
22
+ # * *Args* :
23
+ # - +name_of_attr+ -> a String, the name of the attribute
24
+ # - +format+ -> The format of the attribute
25
+ def self.create_date_attr(name_of_attr,format)
26
+ date = Attribute.new(name_of_attr,format)
27
+ return date
28
+ end
29
+
30
+ # Return a Nominal Attribute class object
31
+ # * *Args* :
32
+ # - +name_of_attr+ -> a String, the name of the attribute
33
+ # - +values_list+ -> An array, the list of nominal values
34
+ def self.create_nominal_attr(name_of_attr,values_list)
35
+ labels = FastVector.new
36
+ values_list.each {|value| labels.addElement(value)}
37
+ nominal = Attribute.new(name_of_attr,labels)
38
+ return nominal
39
+ end
40
+
41
+ # Return a String Attribute class object
42
+ # * *Args* :
43
+ # - +name_of_attr+ -> a String, the name of the attribute
44
+ def self.create_string_attr(name_of_attr)
45
+ construct = Attribute.java_class.constructor(Java::java.lang.String,Java::weka.core.FastVector)
46
+ string = construct.new_instance(name_of_attr,nil).to_java
47
+ return string
48
+ end
49
+ end
50
+
51
+ end
52
+
53
+
@@ -0,0 +1,10 @@
1
+ module Core
2
+ module Type
3
+
4
+ java_import "weka.core.FastVector"
5
+ java_import "weka.core.Instance"
6
+
7
+ class Instance
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,332 @@
1
+ require 'java'
2
+ require 'ruport'
3
+ require 'json'
4
+
5
+ module Core
6
+ module Type
7
+
8
+ java_import "weka.core.Instances"
9
+ java_import 'java.io.File'
10
+ java_import 'weka.core.converters.CSVSaver'
11
+ java_import 'weka.core.converters.ArffSaver'
12
+ java_import "weka.core.FastVector"
13
+ java_import "weka.core.Instance"
14
+
15
+ #
16
+ # * *Description* :
17
+ # This is the main class from the Weka package for data handling. It is essentially a matrix: each row
18
+ # is an instance of the 'Instance' class, while each column is an instance of the 'Attribute' class
19
+ # The class 'Instances' is here extended to add custom functionalities
20
+ class Instances
21
+
22
+ # Convert an Instances object to a bidimensional Ruby array
23
+ # where each row corresponds to an Instance object
24
+ def to_a2d
25
+ matrix = Array.new
26
+ att = Array.new
27
+ self.enumerateAttributes.each_with_index do |a,idx|
28
+ if a.isNumeric
29
+ enumerate_instances.each {|s| att << s.value(s.attribute(idx))}
30
+ matrix << att
31
+ att = Array.new
32
+ else
33
+ enumerateInstances.each do |inst|
34
+ att << inst.string_value(idx)
35
+ end
36
+ matrix << att
37
+ att = Array.new
38
+ end
39
+ end
40
+ return matrix.transpose
41
+ end
42
+
43
+ # Return the number of rows (Instance objects) in the dataset
44
+ def n_rows
45
+ return numInstances
46
+ end
47
+
48
+ # Return the number of columns (Attribute objects) in the dataset
49
+ def n_columns
50
+ return numAttributes
51
+ end
52
+
53
+ # Return the dimensions of the dataset (for the current Instances class object)
54
+ def dim
55
+ puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
56
+ end
57
+
58
+ # Check if this instance's attributes are all Numeric
59
+ def check_numeric_instance
60
+ enumerateAttributes.each do |att|
61
+ unless att.isNumeric
62
+ raise ArgumentError, "Sorry, attribute '#{att.name}' is not numeric!"
63
+ end
64
+ end
65
+ end
66
+
67
+ # Convert the present Instances object to an Apache matrix if every Instances attribute
68
+ # is Numeric
69
+ def to_Apache_matrix
70
+ check_numeric_instance
71
+ ruby_array = to_a
72
+ java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
73
+ return Core::Type::Apache_matrix.new(java_double_array)
74
+ end
75
+
76
+ # Convert the present Instances object to an Apache matrix (block) if every Instances attribute
77
+ # is Numeric
78
+ def to_Apache_matrix_block
79
+ check_numeric_instance
80
+ ruby_array = to_a
81
+ java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
82
+ return Core::Type::Apache_matrix_block.new(java_double_array)
83
+ end
84
+
85
+ # Return data for a single attribute (a column from the Instances object)
86
+ # * *Args* :
87
+ # - +att+ -> a String, the name of the attribute
88
+ def return_attr_data(att)
89
+ attr_values = Array.new
90
+ if attribute(att).isNumeric
91
+ enumerateInstances.each do |i|
92
+ attr_values << i.value(attribute(att))
93
+ end
94
+ else
95
+ attr_index = attribute(att).index
96
+ enumerateInstances.each do |inst|
97
+ attr_values << inst.string_value(attr_index)
98
+ end
99
+ end
100
+ return attr_values
101
+ end
102
+
103
+ # Return the mean value of a single attribute (a column from the Instances object)
104
+ # * *Args* :
105
+ # - +attribute_name+ -> a String, the name of the attribute
106
+ def mean(attribute_name)
107
+ sum = enumerateInstances.inject(0) do |s,x|
108
+ s+=x.value(attribute(attribute_name))
109
+ end
110
+ return sum/(numInstances*1.0)
111
+ end
112
+
113
+ # Return the variance of a single attribute (a column from the Instances object)
114
+ # * *Args* :
115
+ # - +attribute_name+ -> a String, the name of the attribute
116
+ def variance(attribute_name)
117
+ enumerateAttributes.each_with_idx do |att,idx|
118
+ return variance(idx) if att.name==attribute_name
119
+ end
120
+ end
121
+
122
+ # Write the content of the current Instances object to a .csv file
123
+ # * *Args* :
124
+ # - +out_file+ -> a String, the name of the output file
125
+ def to_CSV(out_file)
126
+ saver = CSVSaver.new
127
+ saver.setInstances(self)
128
+ out_file = File.new out_file
129
+ saver.setFile(out_file);
130
+ saver.writeBatch();
131
+ end
132
+
133
+ # Write the content of the current Instances object to a .arff file
134
+ # * *Args* :
135
+ # - +out_file+ -> a String, the name of the output file
136
+ def to_ARFF(out_file)
137
+ saver = ArffSaver.new
138
+ saver.setInstances(self)
139
+ out_file = File.new out_file
140
+ saver.setFile(out_file);
141
+ saver.writeBatch();
142
+ end
143
+
144
+ def insert_attribute(attribute_value,position)
145
+ att=attribute_value
146
+ if self.attribute(position).isNumeric
147
+ return attribute_value
148
+ elsif self.attribute(position).isNominal
149
+ idx = self.attribute(position).indexOfValue(attribute_value)
150
+ return idx
151
+ elsif self.attribute(position).isDate
152
+ date = self.attribute(position).ParseDate(attribute_value)
153
+ return date
154
+ else
155
+ puts 'Attribute type is unknown!'
156
+ end
157
+ end
158
+ private :insert_attribute
159
+
160
+ # (check function): should check that the array is bidimensional and that
161
+ # the lengths are equal
162
+ def check_array(data)
163
+ return true
164
+ end
165
+
166
+ # An entire dataset is inserted 'by row' into the current Instances object
167
+ # i.e. one Instance object is inserted at the time
168
+ # * *Args* :
169
+ # - +data+ -> a bidimensional array
170
+ def populate_by_row(data)
171
+ unless check_array(data) == false
172
+ data.each do |row|
173
+ add_instance(row)
174
+ end
175
+ end
176
+ end
177
+
178
+ # An Instance instance object (one row) is inserted into the current Instances object
179
+ # * *Args* :
180
+ # - +instance+ -> an array of values of the correct data type (:nominal,:numeric,etc...)
181
+ def add_instance(instance)
182
+ data_ref=Array.new
183
+ instance.each_with_index do |attribute,idx|
184
+ data_ref << insert_attribute(attribute,idx)
185
+ end
186
+ double_array = data_ref.to_java :double
187
+ single_row = Instance.new(1.0, double_array)
188
+ self.add(single_row)
189
+ end
190
+
191
+ # An Attribute instance object is inserted into the current Instances object
192
+ # * *Args* :
193
+ # - +attribute_name+ -> A name for the new attribute
194
+ # * *WARNING* :
195
+ # This method only creates an empty attribute field
196
+ def add_numeric_attribute(attribute_name)
197
+ insertAttributeAt(Attribute.new(attribute_name), self.numAttributes)
198
+ end
199
+
200
+ # An Attribute instance object is inserted into the current Instances object
201
+ # * *Args* :
202
+ # - +attribute_name+ -> A name for the new attribute
203
+ # - +values+ -> RubyArray with nominal values
204
+ # * *WARNING* :
205
+ # This method only creates an empty attribute field
206
+ def add_nominal_attribute(attribute,list_values)
207
+ values = FastVector.new
208
+ list_values.each do |val|
209
+ values.addElement(val)
210
+ end
211
+ insertAttributeAt(Attribute.new(attribute, values), self.numAttributes)
212
+ end
213
+
214
+ #Print to STDOUT the list of the Instances's attributes (with the corresponding types)
215
+ def summary
216
+ summary = Ruport::Data::Table::new
217
+ summary.add_column 'Attributes'
218
+ enumerateAttributes.each_with_index do |att,idx|
219
+ summary.add_column idx+1
220
+ end
221
+
222
+ att_names = ['Names']
223
+ enumerateAttributes.each do |att|
224
+ att_names << "'#{att.name}'"
225
+ end
226
+ summary << att_names
227
+
228
+ att_types = ['Types']
229
+ enumerateAttributes.each do |att|
230
+ att_types << "Numeric" if att.isNumeric
231
+ att_types << "Nominal" if att.isNominal
232
+ att_types << "Date" if att.isDate
233
+ att_types << "String" if att.isString
234
+ end
235
+ summary << att_types
236
+
237
+ puts summary
238
+
239
+ count=0
240
+ enumerateInstances.each {|inst| count=count+1}
241
+ puts "\nNumber of rows: #{count}"
242
+ end
243
+
244
+ # Merges two sets of Instances together. The resulting set will have all the
245
+ # attributes of the first set plus all the attributes of the second set. The
246
+ # number of instances in both sets must be the same.
247
+ # * *Args* :
248
+ # - +instances+ -> An Instances class object
249
+ def merge_with(instances)
250
+ return Instances.mergeInstances(self,instances)
251
+ end
252
+
253
+ # This method creates an Instances object (see Cucumber documentation for further details)
254
+ # def self.create
255
+ # name = 'Instances'
256
+ # instances = Core::Type.create_instances(name,@@positions)
257
+ # return instances
258
+ # end
259
+
260
+ @@positions = []
261
+ # This method is used for attributes definition in uninitialized Instances-derived classes
262
+ def self.att(attr_type,name,*values)
263
+ att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
264
+ att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
265
+ att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
266
+ att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
267
+ @@positions << att
268
+ end
269
+
270
+ # This method is used for Nominal attributes definition in uninitialized Instances-derived classes
271
+ # * *Args* :
272
+ # - +name+ -> Attribute name, a String
273
+ # - +values+ -> An array of values for the nominal attribute
274
+ def self.nominal(name,values)
275
+ att :nominal, name, values
276
+ end
277
+
278
+ # This method is used for Numeric attributes definition in uninitialized Instances-derived classes
279
+ # * *Args* :
280
+ # - +name+ -> Attribute name, a String
281
+ def self.numeric(name)
282
+ att :numeric, name
283
+ end
284
+
285
+ # This method is used for Date attributes definition in uninitialized Instances-derived classes
286
+ # * *Args* :
287
+ # - +name+ -> Attribute name, a String
288
+ def self.date(name)
289
+ att :date, name
290
+ end
291
+
292
+ # This method is used for String attributes definition in uninitialized Instances-derived classes
293
+ # * *Args* :
294
+ # - +name+ -> Attribute name, a String
295
+ def self.string(name)
296
+ att :string, name
297
+ end
298
+
299
+ # Class used for the creation of a new dataset (Instances class)
300
+ class Base < Instances
301
+ def initialize
302
+ attributes_vector = FastVector.new
303
+ @@positions.each {|value| attributes_vector.addElement(value)}
304
+ super('Instances',attributes_vector,0)
305
+ end
306
+ end
307
+
308
+ # Return a json String for the current Instances object
309
+ # The output is modeled on the 'datatable' Google charts APIs
310
+ # More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
311
+ def to_json
312
+ dataset_hash = Hash.new
313
+ dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
314
+ dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
315
+ return JSON.pretty_generate(dataset_hash)
316
+ end
317
+ end #Instances class
318
+
319
+ # Create an Instances object
320
+ # * *Args* :
321
+ # - +name+ -> A name for the Instances object
322
+ # - +attributes+ -> An array containing Attribute objects
323
+ def Type.create_instances(name,attributes)
324
+ attributes_vector = FastVector.new
325
+ attributes.each {|value| attributes_vector.addElement(value)}
326
+ return Instances.new(name,attributes_vector,0)
327
+ end
328
+ end
329
+ end
330
+
331
+
332
+