bio-band 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. data/VERSION +1 -1
  2. data/bio-band.gemspec +8 -3
  3. data/features/step_definitions/create_dataset.rb +16 -17
  4. data/features/step_definitions/weka_clustering.rb +2 -2
  5. data/features/step_definitions/weka_filters.rb +12 -9
  6. data/features/step_definitions/weka_parsers.rb +13 -13
  7. data/lib/bio-band/core/type/instances.rb +33 -14
  8. data/lib/bio-band/weka.rb +3 -1
  9. data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +18 -0
  10. data/lib/bio-band/weka/attribute_selection/evaluators.rb +21 -0
  11. data/lib/bio-band/weka/attribute_selection/search.rb +26 -0
  12. data/lib/bio-band/weka/classifiers/bayes/bayes.rb +74 -54
  13. data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +43 -23
  14. data/lib/bio-band/weka/classifiers/evaluation.rb +1 -1
  15. data/lib/bio-band/weka/classifiers/functions/functions.rb +157 -2
  16. data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +45 -25
  17. data/lib/bio-band/weka/classifiers/lazy/lazy.rb +69 -4
  18. data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +48 -28
  19. data/lib/bio-band/weka/classifiers/mi/mi.rb +190 -0
  20. data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +65 -0
  21. data/lib/bio-band/weka/classifiers/rules/rules.rb +190 -0
  22. data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +45 -25
  23. data/lib/bio-band/weka/classifiers/trees/trees.rb +66 -0
  24. data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +47 -27
  25. data/lib/bio-band/weka/clusterers/clusterers.rb +34 -0
  26. data/lib/bio-band/weka/clusterers/clusterers_utils.rb +2 -4
  27. data/lib/bio-band/weka/db/db.rb +67 -67
  28. data/lib/bio-band/weka/filters/supervised/attribute/attribute.rb +31 -1
  29. data/lib/bio-band/weka/filters/supervised/supervised_utils.rb +33 -31
  30. data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +12 -0
  31. data/lib/bio-band/weka/filters/unsupervised/unsupervised_utils.rb +29 -29
  32. metadata +8 -3
  33. data/lib/bio-band/weka/classifiers/rules/rules.rb +0 -32
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
data/bio-band.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-band"
8
- s.version = "0.1.2"
8
+ s.version = "0.1.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["arrigonialberto86"]
12
- s.date = "2013-07-30"
12
+ s.date = "2013-09-02"
13
13
  s.description = "Data mining and machine learning algorithms for JRuby "
14
14
  s.email = "arrigonialberto86@gmail.com"
15
15
  s.executables = ["bio-band"]
@@ -55,6 +55,9 @@ Gem::Specification.new do |s|
55
55
  "lib/bio-band/core/type/instances.rb",
56
56
  "lib/bio-band/core/type/utils.rb",
57
57
  "lib/bio-band/weka.rb",
58
+ "lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb",
59
+ "lib/bio-band/weka/attribute_selection/evaluators.rb",
60
+ "lib/bio-band/weka/attribute_selection/search.rb",
58
61
  "lib/bio-band/weka/classifiers/bayes/bayes.rb",
59
62
  "lib/bio-band/weka/classifiers/bayes/bayes_utils.rb",
60
63
  "lib/bio-band/weka/classifiers/evaluation.rb",
@@ -62,7 +65,9 @@ Gem::Specification.new do |s|
62
65
  "lib/bio-band/weka/classifiers/functions/functions_utils.rb",
63
66
  "lib/bio-band/weka/classifiers/lazy/lazy.rb",
64
67
  "lib/bio-band/weka/classifiers/lazy/lazy_utils.rb",
65
- "lib/bio-band/weka/classifiers/rules/rules.rb ",
68
+ "lib/bio-band/weka/classifiers/mi/mi.rb",
69
+ "lib/bio-band/weka/classifiers/mi/mi_utils.rb",
70
+ "lib/bio-band/weka/classifiers/rules/rules.rb",
66
71
  "lib/bio-band/weka/classifiers/rules/rules_utils.rb",
67
72
  "lib/bio-band/weka/classifiers/trees/trees.rb",
68
73
  "lib/bio-band/weka/classifiers/trees/trees_utils.rb",
@@ -4,37 +4,36 @@ Given(/^a nominal attribute, named "(.*?)", with values "(.*?)"$/) do |arg1, arg
4
4
  end
5
5
 
6
6
  Given(/^one numeric attribute, named "(.*?)"$/) do |arg1|
7
- $second_att = arg1.to_sym
7
+ $second_att = arg1.to_sym
8
8
  end
9
9
 
10
10
  Given(/^another numeric attribute, names "(.*?)"$/) do |arg1|
11
- $third_att = arg1.to_sym
11
+ $third_att = arg1.to_sym
12
12
  end
13
13
 
14
14
  Given(/^two data rows: "(.*?)","(.*?)"$/) do |arg1, arg2|
15
- @first_row = arg1.split(',')
16
- @second_row = arg2.split(',')
17
- @first_row[1] = @first_row[1].to_f
18
- @first_row[2] = @first_row[2].to_f
19
- @second_row[1] = @second_row[1].to_f
20
- @second_row[2] = @second_row[2].to_f
15
+ $first_row = arg1.split(',')
16
+ $second_row = arg2.split(',')
17
+ $first_row[1] = $first_row[1].to_f
18
+ $first_row[2] = $first_row[2].to_f
19
+ $second_row[1] = $second_row[1].to_f
20
+ $second_row[2] = $second_row[2].to_f
21
21
  end
22
22
 
23
23
  Then(/^I want to build en empty dataset for my use$/) do
24
- class Dataset < Core::Type::Instances::Base
25
- nominal $first_att, $values
26
- numeric $second_att
27
- string $third_att
24
+ $my_instance = Core::Type::Instances::Base.new do
25
+ nominal $first_att, $values
26
+ numeric $second_att
27
+ string $third_att
28
28
  end
29
- @my_instance = Dataset.new
30
- @my_instance.summary
29
+ $my_instance.summary
31
30
  end
32
31
 
33
32
  Then(/^I want to populate the dataset by row$/) do
34
- @my_instance.populate_by_row([@first_row,@second_row])
35
- @my_instance.summary
33
+ $my_instance.populate_by_row([$first_row,$second_row])
34
+ $my_instance.summary
36
35
  end
37
36
 
38
37
  Then(/^I want to print my dataset as a bidimensional Ruby Array$/) do
39
- puts @my_instance.to_a2d.inspect
38
+ puts $my_instance.to_a2d.inspect
40
39
  end
@@ -30,5 +30,5 @@ Then(/^I want to report result statistics$/) do
30
30
  end
31
31
 
32
32
  Then(/^I want to use Weka clustering cross\-validation$/) do
33
- puts @clustered.validate
34
- end
33
+ puts @clustered.evaluate
34
+ end
@@ -1,29 +1,32 @@
1
1
  Given(/^the example ARFF file "(.*?)"$/) do |arff_file|
2
- @arff = File.join('resources',arff_file)
2
+ @arff = File.join('resources',arff_file)
3
3
  end
4
4
 
5
5
  Given(/^the Weka Attribute Add filter$/) do
6
- @filter = Weka::Filter::Unsupervised::Attribute::Add.new
6
+ @filter = Weka::Filter::Unsupervised::Attribute::Add.new
7
7
  end
8
8
 
9
9
  Then(/^I want to parse the file in order to create an Instances class object$/) do
10
- @dataset_ARFF = Core::Parser::parse_ARFF(@arff)
11
- @dataset_ARFF.summary
10
+ $dataset_ARFF = Core::Parser::parse_ARFF(@arff)
11
+ puts $dataset_ARFF.summary
12
12
  end
13
13
 
14
14
  Then(/^I want to print the available filter options and usage$/) do
15
- @filter.filter_options
15
+ @filter.options_list
16
16
  end
17
17
 
18
18
  Then(/^I want to set the option String "(.*?)"$/) do |arg1|
19
- @filter.set_filter_options(arg1)
20
- @filter.set_data(@dataset_ARFF)
19
+ $arg1 = arg1
20
+ @filter.set do
21
+ filter_options $arg1
22
+ data $dataset_ARFF
23
+ end
21
24
  end
22
25
 
23
26
  Then(/^I want to add an attribute \(a column\) to the dataset using the Weka filter Add$/) do
24
- @new_inst = @filter.use
27
+ @new_inst = @filter.use
25
28
  end
26
29
 
27
30
  Then(/^I want to print a "(.*?)" for the modified dataset$/) do |arg1|
28
- @new_inst.send arg1.to_sym
31
+ @new_inst.send arg1.to_sym
29
32
  end
@@ -1,20 +1,20 @@
1
1
  Given /^the CSV file "(.*?)"$/ do |csv_file|
2
- @csv = File.join('resources',csv_file)
3
- @dataset_CSV = Core::Parser::parse_CSV(@csv)
2
+ @csv = File.join('resources',csv_file)
3
+ @dataset_CSV = Core::Parser::parse_CSV(@csv)
4
4
  end
5
5
 
6
6
  Then /^I want to print to stdout the summary for the CSV parsed Instances object$/ do
7
- @dataset_CSV.summary
7
+ @dataset_CSV.summary
8
8
  end
9
9
 
10
10
  Given /^the ARFF file "(.*?)"$/ do |arff_file|
11
- @arff = File.join('resources',arff_file)
12
- puts @arff
13
- @dataset_ARFF = Core::Parser::parse_ARFF(@arff)
11
+ @arff = File.join('resources',arff_file)
12
+ puts @arff
13
+ @dataset_ARFF = Core::Parser::parse_ARFF(@arff)
14
14
  end
15
15
 
16
16
  Then /^I want to print to stdout the summary for the ARFF parsed Instances object$/ do
17
- @dataset_ARFF.summary
17
+ @dataset_ARFF.summary
18
18
  end
19
19
 
20
20
  Given(/^the database "(.*?)"$/) do |arg1|
@@ -22,7 +22,7 @@ Given(/^the database "(.*?)"$/) do |arg1|
22
22
  end
23
23
 
24
24
  Given(/^a table named "(.*?)"$/) do |arg1|
25
- @target_table = arg1
25
+ @target_table = arg1
26
26
  end
27
27
 
28
28
  Then(/^I want to extract data from that table$/) do
@@ -34,11 +34,11 @@ Then(/^I want to print to stdout the summary for the parsed Instances object$/)
34
34
  end
35
35
 
36
36
  Then(/^I want to convert the data into a bidimensional Ruby Array$/) do
37
- @dataset.to_a2d.should == [["sunny", 85.0, 85.0, "FALSE", "no"], ["sunny", 80.0, 90.0, "TRUE", "no"], ["overcast", 83.0, 86.0, "FALSE", "yes"],
38
- ["rainy", 70.0, 96.0, "FALSE", "yes"], ["rainy", 68.0, 80.0, "FALSE", "yes"], ["rainy", 65.0, 70.0, "TRUE", "no"],
39
- ["overcast", 64.0, 65.0, "TRUE", "yes"], ["sunny", 72.0, 95.0, "FALSE", "no"], ["sunny", 69.0, 70.0, "FALSE", "yes"],
40
- ["rainy", 75.0, 80.0, "FALSE", "yes"], ["sunny", 75.0, 70.0, "TRUE", "yes"], ["overcast", 72.0, 90.0, "TRUE", "yes"],
41
- ["overcast", 81.0, 75.0, "FALSE", "yes"], ["rainy", 71.0, 91.0, "TRUE", "no"]]
37
+ @dataset.to_a2d.should == [["sunny", 85.0, 85.0, "FALSE", "no"], ["sunny", 80.0, 90.0, "TRUE", "no"], ["overcast", 83.0, 86.0, "FALSE", "yes"],
38
+ ["rainy", 70.0, 96.0, "FALSE", "yes"], ["rainy", 68.0, 80.0, "FALSE", "yes"], ["rainy", 65.0, 70.0, "TRUE", "no"],
39
+ ["overcast", 64.0, 65.0, "TRUE", "yes"], ["sunny", 72.0, 95.0, "FALSE", "no"], ["sunny", 69.0, 70.0, "FALSE", "yes"],
40
+ ["rainy", 75.0, 80.0, "FALSE", "yes"], ["sunny", 75.0, 70.0, "TRUE", "yes"], ["overcast", 72.0, 90.0, "TRUE", "yes"],
41
+ ["overcast", 81.0, 75.0, "FALSE", "yes"], ["rainy", 71.0, 91.0, "TRUE", "no"]]
42
42
  end
43
43
 
44
44
 
@@ -3,6 +3,8 @@ require 'ruport'
3
3
  require 'json'
4
4
 
5
5
  module Core
6
+
7
+ java_import "weka.core.SerializationHelper"
6
8
  module Type
7
9
 
8
10
  java_import "weka.core.Instances"
@@ -46,7 +48,7 @@ module Core
46
48
  end
47
49
 
48
50
  # Return the number of columns (Attribute objects) in the dataset
49
- def n_columns
51
+ def n_col
50
52
  return numAttributes
51
53
  end
52
54
 
@@ -55,6 +57,14 @@ module Core
55
57
  puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
56
58
  end
57
59
 
60
+ def each_row
61
+ enumerate_instances.each {|inst| yield(inst)}
62
+ end
63
+
64
+ def each_column
65
+ enumerate_attributes.each {|attribute| yield(attribute)}
66
+ end
67
+
58
68
  # Check if this instance's attributes are all Numeric
59
69
  def check_numeric_instance
60
70
  enumerateAttributes.each do |att|
@@ -160,7 +170,7 @@ module Core
160
170
  # (check function): should check that the array is bidimensional and that
161
171
  # the lengths are equal
162
172
  def check_array(data)
163
- return true
173
+ return true # still to be done
164
174
  end
165
175
 
166
176
  # An entire dataset is inserted 'by row' into the current Instances object
@@ -236,9 +246,11 @@ module Core
236
246
 
237
247
  puts summary
238
248
 
239
- count=0
240
- enumerateInstances.each {|inst| count=count+1}
241
- puts "\nNumber of rows: #{count}"
249
+ unless enumerate_instances.nil?
250
+ count=0
251
+ enumerateInstances.each {|inst| count=count+1}
252
+ puts "\nNumber of rows: #{count}"
253
+ end
242
254
  end
243
255
 
244
256
  # Merges two sets of Instances together. The resulting set will have all the
@@ -257,50 +269,51 @@ module Core
257
269
  # return instances
258
270
  # end
259
271
 
260
- @@positions = []
261
272
  # This method is used for attributes definition in uninitialized Instances-derived classes
262
- def self.att(attr_type,name,*values)
273
+ def att(attr_type,name,*values)
263
274
  att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
264
275
  att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
265
276
  att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
266
277
  att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
267
- @@positions << att
278
+ @positions << att
268
279
  end
269
280
 
270
281
  # This method is used for Nominal attributes definition in uninitialized Instances-derived classes
271
282
  # * *Args* :
272
283
  # - +name+ -> Attribute name, a String
273
284
  # - +values+ -> An array of values for the nominal attribute
274
- def self.nominal(name,values)
285
+ def nominal(name,values)
275
286
  att :nominal, name, values
276
287
  end
277
288
 
278
289
  # This method is used for Numeric attributes definition in uninitialized Instances-derived classes
279
290
  # * *Args* :
280
291
  # - +name+ -> Attribute name, a String
281
- def self.numeric(name)
292
+ def numeric(name)
282
293
  att :numeric, name
283
294
  end
284
295
 
285
296
  # This method is used for Date attributes definition in uninitialized Instances-derived classes
286
297
  # * *Args* :
287
298
  # - +name+ -> Attribute name, a String
288
- def self.date(name)
299
+ def date(name)
289
300
  att :date, name
290
301
  end
291
302
 
292
303
  # This method is used for String attributes definition in uninitialized Instances-derived classes
293
304
  # * *Args* :
294
305
  # - +name+ -> Attribute name, a String
295
- def self.string(name)
306
+ def string(name)
296
307
  att :string, name
297
308
  end
298
309
 
299
310
  # Class used for the creation of a new dataset (Instances class)
300
311
  class Base < Instances
301
- def initialize
312
+ def initialize(&block)
302
313
  attributes_vector = FastVector.new
303
- @@positions.each {|value| attributes_vector.addElement(value)}
314
+ @positions = []
315
+ self.instance_eval(&block) if block
316
+ @positions.each {|value| attributes_vector.addElement(value)}
304
317
  super('Instances',attributes_vector,0)
305
318
  end
306
319
  end
@@ -325,7 +338,13 @@ module Core
325
338
  attributes.each {|value| attributes_vector.addElement(value)}
326
339
  return Instances.new(name,attributes_vector,0)
327
340
  end
341
+
342
+ end
343
+ # Helper class for serialization
344
+ # Works with classifiers, filters, clusterers...
345
+ class SerializationHelper
328
346
  end
347
+
329
348
  end
330
349
 
331
350
 
data/lib/bio-band/weka.rb CHANGED
@@ -9,4 +9,6 @@ require 'bio-band/weka/classifiers/functions/functions'
9
9
  require 'bio-band/weka/classifiers/trees/trees'
10
10
  require 'bio-band/weka/classifiers/lazy/lazy'
11
11
  require 'bio-band/weka/classifiers/rules/rules'
12
- require 'bio-band/weka/clusterers/clusterers'
12
+ require 'bio-band/weka/clusterers/clusterers'
13
+ require 'bio-band/weka/attribute_selection/evaluators'
14
+ require 'bio-band/weka/attribute_selection/search'
@@ -0,0 +1,18 @@
1
+ module Attribute_selection_Utils
2
+ java_import "weka.core.Utils"
3
+
4
+ #Instance methods list
5
+ def options_list
6
+ listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
7
+ end
8
+
9
+ def select_options(options_string)
10
+ options = Utils.splitOptions(options_string)
11
+ set_options(options)
12
+ end
13
+
14
+ def description
15
+ globalInfo
16
+ end
17
+
18
+ end
@@ -0,0 +1,21 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require 'attribute_selection_utils'
3
+
4
+ module Weka
5
+ module Attribute_selection
6
+ module Evaluator
7
+ java_import 'weka.attributeSelection.CfsSubsetEval'
8
+ java_import 'weka.attributeSelection.ChiSquaredAttributeEval'
9
+
10
+ class CfsSubsetEval
11
+ include Attribute_selection_Utils
12
+ java_alias :use_options , :setOptions, [Java::Java.lang.String[]]
13
+ end
14
+
15
+ class ChiSquaredAttributeEval
16
+ include Attribute_selection_Utils
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,26 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require 'attribute_selection_utils'
3
+
4
+ module Weka
5
+ module Attribute_selection
6
+ module Search
7
+
8
+ java_import 'weka.attributeSelection.GreedyStepwise'
9
+ java_import 'weka.attributeSelection.RankSearch'
10
+ java_import 'weka.attributeSelection.Ranker'
11
+
12
+ class GreedyStepwise
13
+ include Attribute_selection_Utils
14
+ end
15
+
16
+ class Ranker
17
+ include Attribute_selection_Utils
18
+ end
19
+
20
+ class RankSearch
21
+ include Attribute_selection_Utils
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -2,64 +2,84 @@ $:.unshift File.dirname(__FILE__)
2
2
  require 'bayes_utils'
3
3
 
4
4
  module Weka
5
- module Classifier
6
- module Bayes
7
- java_import "weka.classifiers.bayes.NaiveBayes"
8
- java_import "weka.classifiers.bayes.BayesianLogisticRegression"
9
- java_import "weka.classifiers.bayes.AODE"
10
- java_import "weka.classifiers.bayes.ComplementNaiveBayes"
11
- java_import "weka.classifiers.bayes.WAODE"
5
+ module Classifier
6
+ module Bayes
7
+ java_import "weka.classifiers.bayes.NaiveBayes"
8
+ java_import "weka.classifiers.bayes.BayesianLogisticRegression"
9
+ java_import "weka.classifiers.bayes.AODE"
10
+ java_import "weka.classifiers.bayes.ComplementNaiveBayes"
11
+ java_import "weka.classifiers.bayes.WAODE"
12
12
 
13
- class NaiveBayes
14
- include Bayes_utils
15
- class Base < NaiveBayes
16
- def initialize
17
- super
18
- init_classifier
19
- end
20
- end
21
- end
13
+ class NaiveBayes
14
+ include Bayes_utils
15
+ class Base < NaiveBayes
16
+ def initialize(&block)
17
+ super
18
+ if block_given?
19
+ init_instance_classifier(&block)
20
+ else
21
+ init_classifier
22
+ end
23
+ end
24
+ end
25
+ end
22
26
 
23
- class AODE
24
- include Bayes_utils
25
- class Base < AODE
26
- def initialize
27
- super
28
- init_classifier
29
- end
30
- end
31
- end
27
+ class AODE
28
+ include Bayes_utils
29
+ class Base < AODE
30
+ def initialize(&block)
31
+ super
32
+ if block_given?
33
+ init_instance_classifier(&block)
34
+ else
35
+ init_classifier
36
+ end
37
+ end
38
+ end
39
+ end
32
40
 
33
- class BayesianLogisticRegression
34
- include Bayes_utils
35
- class Base < BayesianLogisticRegression
36
- def initialize
37
- super
38
- init_classifier
39
- end
40
- end
41
- end
41
+ class BayesianLogisticRegression
42
+ include Bayes_utils
43
+ class Base < BayesianLogisticRegression
44
+ def initialize(&block)
45
+ super
46
+ if block_given?
47
+ init_instance_classifier(&block)
48
+ else
49
+ init_classifier
50
+ end
51
+ end
52
+ end
53
+ end
42
54
 
43
- class ComplementNaiveBayes
44
- include Bayes_utils
45
- class Base < ComplementNaiveBayes
46
- def initialize
47
- super
48
- init_classifier
49
- end
50
- end
51
- end
55
+ class ComplementNaiveBayes
56
+ include Bayes_utils
57
+ class Base < ComplementNaiveBayes
58
+ def initialize(&block)
59
+ super
60
+ if block_given?
61
+ init_instance_classifier(&block)
62
+ else
63
+ init_classifier
64
+ end
65
+ end
66
+ end
67
+ end
52
68
 
53
- class WAODE
54
- include Bayes_utils
55
- class Base < WAODE
56
- def initialize
57
- super
58
- init_classifier
59
- end
60
- end
61
- end
69
+ class WAODE
70
+ include Bayes_utils
71
+ class Base < WAODE
72
+ def initialize(&block)
73
+ super
74
+ if block_given?
75
+ init_instance_classifier(&block)
76
+ else
77
+ init_classifier
78
+ end
79
+ end
80
+ end
81
+ end
62
82
 
63
- end
64
- end
83
+ end
84
+ end
65
85
  end