bio-band 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. data/VERSION +1 -1
  2. data/bio-band.gemspec +8 -3
  3. data/features/step_definitions/create_dataset.rb +16 -17
  4. data/features/step_definitions/weka_clustering.rb +2 -2
  5. data/features/step_definitions/weka_filters.rb +12 -9
  6. data/features/step_definitions/weka_parsers.rb +13 -13
  7. data/lib/bio-band/core/type/instances.rb +33 -14
  8. data/lib/bio-band/weka.rb +3 -1
  9. data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +18 -0
  10. data/lib/bio-band/weka/attribute_selection/evaluators.rb +21 -0
  11. data/lib/bio-band/weka/attribute_selection/search.rb +26 -0
  12. data/lib/bio-band/weka/classifiers/bayes/bayes.rb +74 -54
  13. data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +43 -23
  14. data/lib/bio-band/weka/classifiers/evaluation.rb +1 -1
  15. data/lib/bio-band/weka/classifiers/functions/functions.rb +157 -2
  16. data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +45 -25
  17. data/lib/bio-band/weka/classifiers/lazy/lazy.rb +69 -4
  18. data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +48 -28
  19. data/lib/bio-band/weka/classifiers/mi/mi.rb +190 -0
  20. data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +65 -0
  21. data/lib/bio-band/weka/classifiers/rules/rules.rb +190 -0
  22. data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +45 -25
  23. data/lib/bio-band/weka/classifiers/trees/trees.rb +66 -0
  24. data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +47 -27
  25. data/lib/bio-band/weka/clusterers/clusterers.rb +34 -0
  26. data/lib/bio-band/weka/clusterers/clusterers_utils.rb +2 -4
  27. data/lib/bio-band/weka/db/db.rb +67 -67
  28. data/lib/bio-band/weka/filters/supervised/attribute/attribute.rb +31 -1
  29. data/lib/bio-band/weka/filters/supervised/supervised_utils.rb +33 -31
  30. data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +12 -0
  31. data/lib/bio-band/weka/filters/unsupervised/unsupervised_utils.rb +29 -29
  32. metadata +8 -3
  33. data/lib/bio-band/weka/classifiers/rules/rules.rb +0 -32
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
data/bio-band.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-band"
8
- s.version = "0.1.2"
8
+ s.version = "0.1.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["arrigonialberto86"]
12
- s.date = "2013-07-30"
12
+ s.date = "2013-09-02"
13
13
  s.description = "Data mining and machine learning algorithms for JRuby "
14
14
  s.email = "arrigonialberto86@gmail.com"
15
15
  s.executables = ["bio-band"]
@@ -55,6 +55,9 @@ Gem::Specification.new do |s|
55
55
  "lib/bio-band/core/type/instances.rb",
56
56
  "lib/bio-band/core/type/utils.rb",
57
57
  "lib/bio-band/weka.rb",
58
+ "lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb",
59
+ "lib/bio-band/weka/attribute_selection/evaluators.rb",
60
+ "lib/bio-band/weka/attribute_selection/search.rb",
58
61
  "lib/bio-band/weka/classifiers/bayes/bayes.rb",
59
62
  "lib/bio-band/weka/classifiers/bayes/bayes_utils.rb",
60
63
  "lib/bio-band/weka/classifiers/evaluation.rb",
@@ -62,7 +65,9 @@ Gem::Specification.new do |s|
62
65
  "lib/bio-band/weka/classifiers/functions/functions_utils.rb",
63
66
  "lib/bio-band/weka/classifiers/lazy/lazy.rb",
64
67
  "lib/bio-band/weka/classifiers/lazy/lazy_utils.rb",
65
- "lib/bio-band/weka/classifiers/rules/rules.rb ",
68
+ "lib/bio-band/weka/classifiers/mi/mi.rb",
69
+ "lib/bio-band/weka/classifiers/mi/mi_utils.rb",
70
+ "lib/bio-band/weka/classifiers/rules/rules.rb",
66
71
  "lib/bio-band/weka/classifiers/rules/rules_utils.rb",
67
72
  "lib/bio-band/weka/classifiers/trees/trees.rb",
68
73
  "lib/bio-band/weka/classifiers/trees/trees_utils.rb",
@@ -4,37 +4,36 @@ Given(/^a nominal attribute, named "(.*?)", with values "(.*?)"$/) do |arg1, arg
4
4
  end
5
5
 
6
6
  Given(/^one numeric attribute, named "(.*?)"$/) do |arg1|
7
- $second_att = arg1.to_sym
7
+ $second_att = arg1.to_sym
8
8
  end
9
9
 
10
10
  Given(/^another numeric attribute, names "(.*?)"$/) do |arg1|
11
- $third_att = arg1.to_sym
11
+ $third_att = arg1.to_sym
12
12
  end
13
13
 
14
14
  Given(/^two data rows: "(.*?)","(.*?)"$/) do |arg1, arg2|
15
- @first_row = arg1.split(',')
16
- @second_row = arg2.split(',')
17
- @first_row[1] = @first_row[1].to_f
18
- @first_row[2] = @first_row[2].to_f
19
- @second_row[1] = @second_row[1].to_f
20
- @second_row[2] = @second_row[2].to_f
15
+ $first_row = arg1.split(',')
16
+ $second_row = arg2.split(',')
17
+ $first_row[1] = $first_row[1].to_f
18
+ $first_row[2] = $first_row[2].to_f
19
+ $second_row[1] = $second_row[1].to_f
20
+ $second_row[2] = $second_row[2].to_f
21
21
  end
22
22
 
23
23
  Then(/^I want to build en empty dataset for my use$/) do
24
- class Dataset < Core::Type::Instances::Base
25
- nominal $first_att, $values
26
- numeric $second_att
27
- string $third_att
24
+ $my_instance = Core::Type::Instances::Base.new do
25
+ nominal $first_att, $values
26
+ numeric $second_att
27
+ string $third_att
28
28
  end
29
- @my_instance = Dataset.new
30
- @my_instance.summary
29
+ $my_instance.summary
31
30
  end
32
31
 
33
32
  Then(/^I want to populate the dataset by row$/) do
34
- @my_instance.populate_by_row([@first_row,@second_row])
35
- @my_instance.summary
33
+ $my_instance.populate_by_row([$first_row,$second_row])
34
+ $my_instance.summary
36
35
  end
37
36
 
38
37
  Then(/^I want to print my dataset as a bidimensional Ruby Array$/) do
39
- puts @my_instance.to_a2d.inspect
38
+ puts $my_instance.to_a2d.inspect
40
39
  end
@@ -30,5 +30,5 @@ Then(/^I want to report result statistics$/) do
30
30
  end
31
31
 
32
32
  Then(/^I want to use Weka clustering cross\-validation$/) do
33
- puts @clustered.validate
34
- end
33
+ puts @clustered.evaluate
34
+ end
@@ -1,29 +1,32 @@
1
1
  Given(/^the example ARFF file "(.*?)"$/) do |arff_file|
2
- @arff = File.join('resources',arff_file)
2
+ @arff = File.join('resources',arff_file)
3
3
  end
4
4
 
5
5
  Given(/^the Weka Attribute Add filter$/) do
6
- @filter = Weka::Filter::Unsupervised::Attribute::Add.new
6
+ @filter = Weka::Filter::Unsupervised::Attribute::Add.new
7
7
  end
8
8
 
9
9
  Then(/^I want to parse the file in order to create an Instances class object$/) do
10
- @dataset_ARFF = Core::Parser::parse_ARFF(@arff)
11
- @dataset_ARFF.summary
10
+ $dataset_ARFF = Core::Parser::parse_ARFF(@arff)
11
+ puts $dataset_ARFF.summary
12
12
  end
13
13
 
14
14
  Then(/^I want to print the available filter options and usage$/) do
15
- @filter.filter_options
15
+ @filter.options_list
16
16
  end
17
17
 
18
18
  Then(/^I want to set the option String "(.*?)"$/) do |arg1|
19
- @filter.set_filter_options(arg1)
20
- @filter.set_data(@dataset_ARFF)
19
+ $arg1 = arg1
20
+ @filter.set do
21
+ filter_options $arg1
22
+ data $dataset_ARFF
23
+ end
21
24
  end
22
25
 
23
26
  Then(/^I want to add an attribute \(a column\) to the dataset using the Weka filter Add$/) do
24
- @new_inst = @filter.use
27
+ @new_inst = @filter.use
25
28
  end
26
29
 
27
30
  Then(/^I want to print a "(.*?)" for the modified dataset$/) do |arg1|
28
- @new_inst.send arg1.to_sym
31
+ @new_inst.send arg1.to_sym
29
32
  end
@@ -1,20 +1,20 @@
1
1
  Given /^the CSV file "(.*?)"$/ do |csv_file|
2
- @csv = File.join('resources',csv_file)
3
- @dataset_CSV = Core::Parser::parse_CSV(@csv)
2
+ @csv = File.join('resources',csv_file)
3
+ @dataset_CSV = Core::Parser::parse_CSV(@csv)
4
4
  end
5
5
 
6
6
  Then /^I want to print to stdout the summary for the CSV parsed Instances object$/ do
7
- @dataset_CSV.summary
7
+ @dataset_CSV.summary
8
8
  end
9
9
 
10
10
  Given /^the ARFF file "(.*?)"$/ do |arff_file|
11
- @arff = File.join('resources',arff_file)
12
- puts @arff
13
- @dataset_ARFF = Core::Parser::parse_ARFF(@arff)
11
+ @arff = File.join('resources',arff_file)
12
+ puts @arff
13
+ @dataset_ARFF = Core::Parser::parse_ARFF(@arff)
14
14
  end
15
15
 
16
16
  Then /^I want to print to stdout the summary for the ARFF parsed Instances object$/ do
17
- @dataset_ARFF.summary
17
+ @dataset_ARFF.summary
18
18
  end
19
19
 
20
20
  Given(/^the database "(.*?)"$/) do |arg1|
@@ -22,7 +22,7 @@ Given(/^the database "(.*?)"$/) do |arg1|
22
22
  end
23
23
 
24
24
  Given(/^a table named "(.*?)"$/) do |arg1|
25
- @target_table = arg1
25
+ @target_table = arg1
26
26
  end
27
27
 
28
28
  Then(/^I want to extract data from that table$/) do
@@ -34,11 +34,11 @@ Then(/^I want to print to stdout the summary for the parsed Instances object$/)
34
34
  end
35
35
 
36
36
  Then(/^I want to convert the data into a bidimensional Ruby Array$/) do
37
- @dataset.to_a2d.should == [["sunny", 85.0, 85.0, "FALSE", "no"], ["sunny", 80.0, 90.0, "TRUE", "no"], ["overcast", 83.0, 86.0, "FALSE", "yes"],
38
- ["rainy", 70.0, 96.0, "FALSE", "yes"], ["rainy", 68.0, 80.0, "FALSE", "yes"], ["rainy", 65.0, 70.0, "TRUE", "no"],
39
- ["overcast", 64.0, 65.0, "TRUE", "yes"], ["sunny", 72.0, 95.0, "FALSE", "no"], ["sunny", 69.0, 70.0, "FALSE", "yes"],
40
- ["rainy", 75.0, 80.0, "FALSE", "yes"], ["sunny", 75.0, 70.0, "TRUE", "yes"], ["overcast", 72.0, 90.0, "TRUE", "yes"],
41
- ["overcast", 81.0, 75.0, "FALSE", "yes"], ["rainy", 71.0, 91.0, "TRUE", "no"]]
37
+ @dataset.to_a2d.should == [["sunny", 85.0, 85.0, "FALSE", "no"], ["sunny", 80.0, 90.0, "TRUE", "no"], ["overcast", 83.0, 86.0, "FALSE", "yes"],
38
+ ["rainy", 70.0, 96.0, "FALSE", "yes"], ["rainy", 68.0, 80.0, "FALSE", "yes"], ["rainy", 65.0, 70.0, "TRUE", "no"],
39
+ ["overcast", 64.0, 65.0, "TRUE", "yes"], ["sunny", 72.0, 95.0, "FALSE", "no"], ["sunny", 69.0, 70.0, "FALSE", "yes"],
40
+ ["rainy", 75.0, 80.0, "FALSE", "yes"], ["sunny", 75.0, 70.0, "TRUE", "yes"], ["overcast", 72.0, 90.0, "TRUE", "yes"],
41
+ ["overcast", 81.0, 75.0, "FALSE", "yes"], ["rainy", 71.0, 91.0, "TRUE", "no"]]
42
42
  end
43
43
 
44
44
 
@@ -3,6 +3,8 @@ require 'ruport'
3
3
  require 'json'
4
4
 
5
5
  module Core
6
+
7
+ java_import "weka.core.SerializationHelper"
6
8
  module Type
7
9
 
8
10
  java_import "weka.core.Instances"
@@ -46,7 +48,7 @@ module Core
46
48
  end
47
49
 
48
50
  # Return the number of columns (Attribute objects) in the dataset
49
- def n_columns
51
+ def n_col
50
52
  return numAttributes
51
53
  end
52
54
 
@@ -55,6 +57,14 @@ module Core
55
57
  puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
56
58
  end
57
59
 
60
+ def each_row
61
+ enumerate_instances.each {|inst| yield(inst)}
62
+ end
63
+
64
+ def each_column
65
+ enumerate_attributes.each {|attribute| yield(attribute)}
66
+ end
67
+
58
68
  # Check if this instance's attributes are all Numeric
59
69
  def check_numeric_instance
60
70
  enumerateAttributes.each do |att|
@@ -160,7 +170,7 @@ module Core
160
170
  # (check function): should check that the array is bidimensional and that
161
171
  # the lengths are equal
162
172
  def check_array(data)
163
- return true
173
+ return true # still to be done
164
174
  end
165
175
 
166
176
  # An entire dataset is inserted 'by row' into the current Instances object
@@ -236,9 +246,11 @@ module Core
236
246
 
237
247
  puts summary
238
248
 
239
- count=0
240
- enumerateInstances.each {|inst| count=count+1}
241
- puts "\nNumber of rows: #{count}"
249
+ unless enumerate_instances.nil?
250
+ count=0
251
+ enumerateInstances.each {|inst| count=count+1}
252
+ puts "\nNumber of rows: #{count}"
253
+ end
242
254
  end
243
255
 
244
256
  # Merges two sets of Instances together. The resulting set will have all the
@@ -257,50 +269,51 @@ module Core
257
269
  # return instances
258
270
  # end
259
271
 
260
- @@positions = []
261
272
  # This method is used for attributes definition in uninitialized Instances-derived classes
262
- def self.att(attr_type,name,*values)
273
+ def att(attr_type,name,*values)
263
274
  att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
264
275
  att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
265
276
  att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
266
277
  att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
267
- @@positions << att
278
+ @positions << att
268
279
  end
269
280
 
270
281
  # This method is used for Nominal attributes definition in uninitialized Instances-derived classes
271
282
  # * *Args* :
272
283
  # - +name+ -> Attribute name, a String
273
284
  # - +values+ -> An array of values for the nominal attribute
274
- def self.nominal(name,values)
285
+ def nominal(name,values)
275
286
  att :nominal, name, values
276
287
  end
277
288
 
278
289
  # This method is used for Numeric attributes definition in uninitialized Instances-derived classes
279
290
  # * *Args* :
280
291
  # - +name+ -> Attribute name, a String
281
- def self.numeric(name)
292
+ def numeric(name)
282
293
  att :numeric, name
283
294
  end
284
295
 
285
296
  # This method is used for Date attributes definition in uninitialized Instances-derived classes
286
297
  # * *Args* :
287
298
  # - +name+ -> Attribute name, a String
288
- def self.date(name)
299
+ def date(name)
289
300
  att :date, name
290
301
  end
291
302
 
292
303
  # This method is used for String attributes definition in uninitialized Instances-derived classes
293
304
  # * *Args* :
294
305
  # - +name+ -> Attribute name, a String
295
- def self.string(name)
306
+ def string(name)
296
307
  att :string, name
297
308
  end
298
309
 
299
310
  # Class used for the creation of a new dataset (Instances class)
300
311
  class Base < Instances
301
- def initialize
312
+ def initialize(&block)
302
313
  attributes_vector = FastVector.new
303
- @@positions.each {|value| attributes_vector.addElement(value)}
314
+ @positions = []
315
+ self.instance_eval(&block) if block
316
+ @positions.each {|value| attributes_vector.addElement(value)}
304
317
  super('Instances',attributes_vector,0)
305
318
  end
306
319
  end
@@ -325,7 +338,13 @@ module Core
325
338
  attributes.each {|value| attributes_vector.addElement(value)}
326
339
  return Instances.new(name,attributes_vector,0)
327
340
  end
341
+
342
+ end
343
+ # Helper class for serialization
344
+ # Works with classifiers, filters, clusterers...
345
+ class SerializationHelper
328
346
  end
347
+
329
348
  end
330
349
 
331
350
 
data/lib/bio-band/weka.rb CHANGED
@@ -9,4 +9,6 @@ require 'bio-band/weka/classifiers/functions/functions'
9
9
  require 'bio-band/weka/classifiers/trees/trees'
10
10
  require 'bio-band/weka/classifiers/lazy/lazy'
11
11
  require 'bio-band/weka/classifiers/rules/rules'
12
- require 'bio-band/weka/clusterers/clusterers'
12
+ require 'bio-band/weka/clusterers/clusterers'
13
+ require 'bio-band/weka/attribute_selection/evaluators'
14
+ require 'bio-band/weka/attribute_selection/search'
@@ -0,0 +1,18 @@
1
+ module Attribute_selection_Utils
2
+ java_import "weka.core.Utils"
3
+
4
+ #Instance methods list
5
+ def options_list
6
+ listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
7
+ end
8
+
9
+ def select_options(options_string)
10
+ options = Utils.splitOptions(options_string)
11
+ set_options(options)
12
+ end
13
+
14
+ def description
15
+ globalInfo
16
+ end
17
+
18
+ end
@@ -0,0 +1,21 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require 'attribute_selection_utils'
3
+
4
+ module Weka
5
+ module Attribute_selection
6
+ module Evaluator
7
+ java_import 'weka.attributeSelection.CfsSubsetEval'
8
+ java_import 'weka.attributeSelection.ChiSquaredAttributeEval'
9
+
10
+ class CfsSubsetEval
11
+ include Attribute_selection_Utils
12
+ java_alias :use_options , :setOptions, [Java::Java.lang.String[]]
13
+ end
14
+
15
+ class ChiSquaredAttributeEval
16
+ include Attribute_selection_Utils
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,26 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require 'attribute_selection_utils'
3
+
4
+ module Weka
5
+ module Attribute_selection
6
+ module Search
7
+
8
+ java_import 'weka.attributeSelection.GreedyStepwise'
9
+ java_import 'weka.attributeSelection.RankSearch'
10
+ java_import 'weka.attributeSelection.Ranker'
11
+
12
+ class GreedyStepwise
13
+ include Attribute_selection_Utils
14
+ end
15
+
16
+ class Ranker
17
+ include Attribute_selection_Utils
18
+ end
19
+
20
+ class RankSearch
21
+ include Attribute_selection_Utils
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -2,64 +2,84 @@ $:.unshift File.dirname(__FILE__)
2
2
  require 'bayes_utils'
3
3
 
4
4
  module Weka
5
- module Classifier
6
- module Bayes
7
- java_import "weka.classifiers.bayes.NaiveBayes"
8
- java_import "weka.classifiers.bayes.BayesianLogisticRegression"
9
- java_import "weka.classifiers.bayes.AODE"
10
- java_import "weka.classifiers.bayes.ComplementNaiveBayes"
11
- java_import "weka.classifiers.bayes.WAODE"
5
+ module Classifier
6
+ module Bayes
7
+ java_import "weka.classifiers.bayes.NaiveBayes"
8
+ java_import "weka.classifiers.bayes.BayesianLogisticRegression"
9
+ java_import "weka.classifiers.bayes.AODE"
10
+ java_import "weka.classifiers.bayes.ComplementNaiveBayes"
11
+ java_import "weka.classifiers.bayes.WAODE"
12
12
 
13
- class NaiveBayes
14
- include Bayes_utils
15
- class Base < NaiveBayes
16
- def initialize
17
- super
18
- init_classifier
19
- end
20
- end
21
- end
13
+ class NaiveBayes
14
+ include Bayes_utils
15
+ class Base < NaiveBayes
16
+ def initialize(&block)
17
+ super
18
+ if block_given?
19
+ init_instance_classifier(&block)
20
+ else
21
+ init_classifier
22
+ end
23
+ end
24
+ end
25
+ end
22
26
 
23
- class AODE
24
- include Bayes_utils
25
- class Base < AODE
26
- def initialize
27
- super
28
- init_classifier
29
- end
30
- end
31
- end
27
+ class AODE
28
+ include Bayes_utils
29
+ class Base < AODE
30
+ def initialize(&block)
31
+ super
32
+ if block_given?
33
+ init_instance_classifier(&block)
34
+ else
35
+ init_classifier
36
+ end
37
+ end
38
+ end
39
+ end
32
40
 
33
- class BayesianLogisticRegression
34
- include Bayes_utils
35
- class Base < BayesianLogisticRegression
36
- def initialize
37
- super
38
- init_classifier
39
- end
40
- end
41
- end
41
+ class BayesianLogisticRegression
42
+ include Bayes_utils
43
+ class Base < BayesianLogisticRegression
44
+ def initialize(&block)
45
+ super
46
+ if block_given?
47
+ init_instance_classifier(&block)
48
+ else
49
+ init_classifier
50
+ end
51
+ end
52
+ end
53
+ end
42
54
 
43
- class ComplementNaiveBayes
44
- include Bayes_utils
45
- class Base < ComplementNaiveBayes
46
- def initialize
47
- super
48
- init_classifier
49
- end
50
- end
51
- end
55
+ class ComplementNaiveBayes
56
+ include Bayes_utils
57
+ class Base < ComplementNaiveBayes
58
+ def initialize(&block)
59
+ super
60
+ if block_given?
61
+ init_instance_classifier(&block)
62
+ else
63
+ init_classifier
64
+ end
65
+ end
66
+ end
67
+ end
52
68
 
53
- class WAODE
54
- include Bayes_utils
55
- class Base < WAODE
56
- def initialize
57
- super
58
- init_classifier
59
- end
60
- end
61
- end
69
+ class WAODE
70
+ include Bayes_utils
71
+ class Base < WAODE
72
+ def initialize(&block)
73
+ super
74
+ if block_given?
75
+ init_instance_classifier(&block)
76
+ else
77
+ init_classifier
78
+ end
79
+ end
80
+ end
81
+ end
62
82
 
63
- end
64
- end
83
+ end
84
+ end
65
85
  end