bio-band 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +5 -0
  3. data/Jarfile +1 -1
  4. data/Jarfile.lock +1 -1
  5. data/README.rdoc +2 -0
  6. data/Rakefile +2 -1
  7. data/VERSION +1 -1
  8. data/band_server/client.rb +35 -0
  9. data/band_server/client_alt.rb +35 -0
  10. data/band_server/first_dataset.csv +15 -0
  11. data/band_server/second_dataset.csv +15 -0
  12. data/band_server/simple_server.rb +95 -0
  13. data/band_server/third_dataset.csv +15 -0
  14. data/band_server/uploads/first_dataset.csv +15 -0
  15. data/band_server/uploads/second_dataset.csv +15 -0
  16. data/band_server/uploads/third_dataset.csv +15 -0
  17. data/bio-band.gemspec +19 -3
  18. data/features/step_definitions/weka_classifiers.rb +3 -2
  19. data/features/weka_classifiers.feature +13 -13
  20. data/lib/bio-band.rb +2 -0
  21. data/lib/bio-band/apache/stat/inference.rb +25 -19
  22. data/lib/bio-band/apache/stat/regression.rb +2 -2
  23. data/lib/bio-band/core/parser/parser.rb +6 -6
  24. data/lib/bio-band/core/type/instances.rb +15 -5
  25. data/lib/bio-band/weka/attribute_selection/attribute_selection_utils.rb +2 -0
  26. data/lib/bio-band/weka/attribute_selection/evaluators.rb +2 -1
  27. data/lib/bio-band/weka/attribute_selection/search.rb +1 -0
  28. data/lib/bio-band/weka/classifiers/bayes/bayes.rb +1 -0
  29. data/lib/bio-band/weka/classifiers/bayes/bayes_utils.rb +16 -3
  30. data/lib/bio-band/weka/classifiers/evaluation.rb +9 -9
  31. data/lib/bio-band/weka/classifiers/functions/functions.rb +1 -0
  32. data/lib/bio-band/weka/classifiers/functions/functions_utils.rb +16 -3
  33. data/lib/bio-band/weka/classifiers/lazy/lazy_utils.rb +21 -3
  34. data/lib/bio-band/weka/classifiers/mi/mi.rb +1 -0
  35. data/lib/bio-band/weka/classifiers/mi/mi_utils.rb +18 -3
  36. data/lib/bio-band/weka/classifiers/rules/rules_utils.rb +20 -4
  37. data/lib/bio-band/weka/classifiers/trees/trees.rb +1 -0
  38. data/lib/bio-band/weka/classifiers/trees/trees_utils.rb +20 -3
  39. data/lib/bio-band/weka/clusterers/clusterers.rb +37 -13
  40. data/lib/bio-band/weka/clusterers/clusterers_utils.rb +60 -35
  41. data/lib/bio-band/weka/filters/unsupervised/attribute/attribute.rb +9 -1
  42. data/test/helper.rb +18 -0
  43. data/test/test_apacheCorrelation.rb +22 -0
  44. data/test/test_apacheInference.rb +46 -0
  45. data/test/test_bio-band.rb +9 -0
  46. metadata +33 -2
data/lib/bio-band.rb CHANGED
@@ -7,4 +7,6 @@ require "java"
7
7
  require "bio-band/core"
8
8
  require "bio-band/weka"
9
9
  require "bio-band/apache"
10
+ Random.ancestors[1].instance_eval {remove_const :Random} if defined?(Random)
11
+ java_import 'java.util.Random'
10
12
 
@@ -2,28 +2,28 @@ require 'java'
2
2
 
3
3
  module Apache
4
4
  module Stat
5
- module Inference
5
+ module Inference
6
6
 
7
- java_import 'org.apache.commons.math3.stat.inference.ChiSquareTest'
8
- java_import 'org.apache.commons.math3.stat.inference.MannWhitneyUTest'
9
- java_import 'org.apache.commons.math3.stat.inference.OneWayAnova'
10
- java_import 'org.apache.commons.math3.stat.inference.TTest'
11
- java_import 'org.apache.commons.math3.stat.inference.WilcoxonSignedRankTest'
12
- java_import 'org.apache.commons.math3.stat.StatUtils'
7
+ java_import 'org.apache.commons.math3.stat.inference.ChiSquareTest'
8
+ java_import 'org.apache.commons.math3.stat.inference.MannWhitneyUTest'
9
+ java_import 'org.apache.commons.math3.stat.inference.OneWayAnova'
10
+ java_import 'org.apache.commons.math3.stat.inference.TTest'
11
+ java_import 'org.apache.commons.math3.stat.inference.WilcoxonSignedRankTest'
12
+ java_import 'org.apache.commons.math3.stat.StatUtils'
13
13
  java_import 'java.util.ArrayList'
14
-
14
+
15
15
  # An implementation of the Wilcoxon signed-rank test
16
16
  # * *Args* :
17
17
  # - +Array1+ -> must be a RubyArray.
18
18
  # - +Array2+ -> must be a RubyArray.
19
- def self.wilcoxon_test(array_1,array_2)
20
- obj = WilcoxonSignedRankTest.new
19
+ def self.wilcoxon_test(array_1,array_2)
20
+ obj = WilcoxonSignedRankTest.new
21
21
  first = Core::Utils::double_to_a(array_1)
22
22
  second = Core::Utils::double_to_a(array_2)
23
- val = obj.wilcoxonSignedRank first, second
23
+ val = obj.wilcoxonSignedRank first, second
24
24
  p_val = obj.wilcoxonSignedRankTest first, second, true.to_java(:boolean)
25
- return val,p_val
26
- end
25
+ return val,p_val
26
+ end
27
27
 
28
28
  # Utility class called by 'chi_square' method in this same package
29
29
  class Chi_square
@@ -61,6 +61,7 @@ module Apache
61
61
  end
62
62
  end
63
63
 
64
+ # Compare two datasets stored in Ruby Arrays
64
65
  def self.chi_square_dataset_compare(observed1,observed2)
65
66
  obj = ChiSquareTest.new
66
67
  val = obj.chiSquareDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
@@ -68,12 +69,17 @@ module Apache
68
69
  return val,p_value
69
70
  end
70
71
 
71
- def mann_whitney_u(array1,array2)
72
+ # An implementation of the Mann-Whitney U test
73
+ # (also called Wilcoxon rank-sum test)
74
+ # * *Args* :
75
+ # - +Array1+ -> must be a RubyArray.
76
+ # - +Array2+ -> must be a RubyArray.
77
+ def self.mann_whitney_u(array1,array2)
72
78
  obj = MannWhitneyUTest.new
73
- first = array_1.to_java :double
74
- second = array_2.to_java :double
75
- value = mannWhitneyU first,second
76
- p_value = mannWhitneyUTest first,second
79
+ first = array1.to_java :double
80
+ second = array2.to_java :double
81
+ value = obj.mannWhitneyU first,second
82
+ p_value = obj.mannWhitneyUTest first,second
77
83
  return value,p_value
78
84
  end
79
85
 
@@ -140,6 +146,6 @@ module Apache
140
146
  p_value = obj.anovaPValue(collection)
141
147
  return f_value,p_value
142
148
  end
143
- end
149
+ end
144
150
  end
145
151
  end
@@ -2,7 +2,7 @@ require 'java'
2
2
 
3
3
  module Apache
4
4
  module Stat
5
- module Regression
5
+ module Regression
6
6
  java_import "org.apache.commons.math3.stat.regression.SimpleRegression"
7
7
 
8
8
  # Create a simple regression model on the input data
@@ -17,6 +17,6 @@ module Apache
17
17
  end
18
18
 
19
19
 
20
- end
20
+ end
21
21
  end
22
22
  end
@@ -2,22 +2,22 @@ require 'java'
2
2
 
3
3
  module Core
4
4
  module Parser
5
- # Parse an ARFF file and create an Instances object
6
- def Parser.parse_ARFF(arff_file)
5
+ # Parse an ARFF file and create an Instances object
6
+ def Parser.parse_ARFF(arff_file)
7
7
  java_import 'java.io.FileReader'
8
8
  file_in = FileReader.new arff_file
9
9
  data_instance = Core::Type::Instances.new file_in
10
10
  return data_instance
11
- end
11
+ end
12
12
  # Parse an CSV file and create an Instances object
13
- def Parser.parse_CSV(csv_file)
14
- java_import 'weka.core.converters.CSVLoader'
13
+ def Parser.parse_CSV(csv_file)
14
+ java_import 'weka.core.converters.CSVLoader'
15
15
  java_import 'java.io.File'
16
16
  loader = CSVLoader.new
17
17
  file = File.new csv_file
18
18
  loader.setSource(file)
19
19
  data_instance = loader.getDataSet
20
20
  return data_instance
21
- end
21
+ end
22
22
  end
23
23
  end
@@ -61,10 +61,18 @@ module Core
61
61
  enumerate_instances.each {|inst| yield(inst)}
62
62
  end
63
63
 
64
+ def each_row_with_index
65
+ enumerate_instances.each_with_index {|inst,id| yield(inst,id)}
66
+ end
67
+
64
68
  def each_column
65
69
  enumerate_attributes.each {|attribute| yield(attribute)}
66
70
  end
67
71
 
72
+ def each_column_with_index
73
+ enumerate_attributes.each_with_index {|attribute,id| yield(attribute,id)}
74
+ end
75
+
68
76
  # Check if this instance's attributes are all Numeric
69
77
  def check_numeric_instance
70
78
  enumerateAttributes.each do |att|
@@ -226,7 +234,7 @@ module Core
226
234
  summary = Ruport::Data::Table::new
227
235
  summary.add_column 'Attributes'
228
236
  enumerateAttributes.each_with_index do |att,idx|
229
- summary.add_column idx+1
237
+ summary.add_column idx
230
238
  end
231
239
 
232
240
  att_names = ['Names']
@@ -244,13 +252,15 @@ module Core
244
252
  end
245
253
  summary << att_types
246
254
 
247
- puts summary
255
+ display = []
256
+ display << summary
248
257
 
249
258
  unless enumerate_instances.nil?
250
259
  count=0
251
260
  enumerateInstances.each {|inst| count=count+1}
252
- puts "\nNumber of rows: #{count}"
253
- end
261
+ display << "\nNumber of rows: #{count}"
262
+ end
263
+ display
254
264
  end
255
265
 
256
266
  # Merges two sets of Instances together. The resulting set will have all the
@@ -321,7 +331,7 @@ module Core
321
331
  # Return a json String for the current Instances object
322
332
  # The output is modeled on the 'datatable' Google charts APIs
323
333
  # More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
324
- def to_json
334
+ def to_json_format
325
335
  dataset_hash = Hash.new
326
336
  dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
327
337
  dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
@@ -6,11 +6,13 @@ module Attribute_selection_Utils
6
6
  listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
7
7
  end
8
8
 
9
+ #Set options for an evaluator or a search algorithm
9
10
  def select_options(options_string)
10
11
  options = Utils.splitOptions(options_string)
11
12
  set_options(options)
12
13
  end
13
14
 
15
+ #Return a short description for the selected evalutator object or search algorithm
14
16
  def description
15
17
  globalInfo
16
18
  end
@@ -4,12 +4,13 @@ require 'attribute_selection_utils'
4
4
  module Weka
5
5
  module Attribute_selection
6
6
  module Evaluator
7
+ #This module contains evaluators from the 'weka.attributeSelection' packages
7
8
  java_import 'weka.attributeSelection.CfsSubsetEval'
8
9
  java_import 'weka.attributeSelection.ChiSquaredAttributeEval'
9
10
 
10
11
  class CfsSubsetEval
11
12
  include Attribute_selection_Utils
12
- java_alias :use_options , :setOptions, [Java::Java.lang.String[]]
13
+ # java_alias :use_options , :setOptions, [Java::Java.lang.String[]]
13
14
  end
14
15
 
15
16
  class ChiSquaredAttributeEval
@@ -3,6 +3,7 @@ require 'attribute_selection_utils'
3
3
 
4
4
  module Weka
5
5
  module Attribute_selection
6
+ #This module contains search algorithms from the 'weka.attributeSelection' packages
6
7
  module Search
7
8
 
8
9
  java_import 'weka.attributeSelection.GreedyStepwise'
@@ -3,6 +3,7 @@ require 'bayes_utils'
3
3
 
4
4
  module Weka
5
5
  module Classifier
6
+ #This module stores the classifiers from the 'weka.classifiers.bayes' package
6
7
  module Bayes
7
8
  java_import "weka.classifiers.bayes.NaiveBayes"
8
9
  java_import "weka.classifiers.bayes.BayesianLogisticRegression"
@@ -16,10 +16,12 @@ module Bayes_utils
16
16
  build_classifier(@dataset)
17
17
  end
18
18
 
19
+ # set data for instance classifier
19
20
  def set_data(data)
20
21
  @dataset = data
21
22
  end
22
23
 
24
+ # set class index for the input dataset
23
25
  def set_class_index(class_index)
24
26
  @class_index = class_index
25
27
  end
@@ -29,6 +31,7 @@ module Bayes_utils
29
31
  base.extend(ClassMethods)
30
32
  end
31
33
 
34
+ # set classifier options
32
35
  def set_options(options)
33
36
  options_inst = Utils.splitOptions(options)
34
37
  setOptions(options_inst)
@@ -38,14 +41,24 @@ module Bayes_utils
38
41
  listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
39
42
  end
40
43
 
44
+ # return the description reported in the Weka Java doc
41
45
  def description
42
46
  globalInfo
43
47
  end
44
48
 
49
+ # perform crossvalidation on a trained classifier
50
+ #ARGV:
51
+ #fold -> 'int' value
45
52
  def cross_validate(fold)
46
- eval = Weka::Classifier::Evaluation.new self.class.data
47
- eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
48
- eval.summary
53
+ if self.class.data
54
+ eval = Weka::Classifier::Evaluation.new self.class.data
55
+ eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
56
+ eval.summary
57
+ else
58
+ eval = Weka::Classifier::Evaluation.new @dataset
59
+ eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
60
+ eval.summary
61
+ end
49
62
  end
50
63
 
51
64
  #Class methods module
@@ -1,13 +1,13 @@
1
1
  module Weka
2
- module Classifier
3
- java_import 'weka.classifiers.Evaluation'
4
- java_import 'java.util.Random'
2
+ module Classifier
3
+ java_import 'weka.classifiers.Evaluation'
5
4
 
6
- class Evaluation
7
- def summary
8
- toSummaryString
9
- end
10
- end
5
+ # Weka Evaluation class to be used with classfiers and clusterers
6
+ class Evaluation
7
+ def summary
8
+ toSummaryString
9
+ end
10
+ end
11
11
 
12
- end
12
+ end
13
13
  end
@@ -3,6 +3,7 @@ require 'functions_utils'
3
3
 
4
4
  module Weka
5
5
  module Classifier
6
+ #This module stores the classifiers from the 'weka.classifiers.functions' package
6
7
  module Functions
7
8
  java_import 'weka.classifiers.functions.LinearRegression'
8
9
  java_import 'weka.classifiers.functions.PLSClassifier'
@@ -20,10 +20,12 @@ module Functions_utils
20
20
  build_classifier(@dataset)
21
21
  end
22
22
 
23
+ #Set instance data for the instance classifier
23
24
  def set_data(data)
24
25
  @dataset = data
25
26
  end
26
27
 
28
+ #Set a class index for the input dataset
27
29
  def set_class_index(class_index)
28
30
  @class_index = class_index
29
31
  end
@@ -33,18 +35,29 @@ module Functions_utils
33
35
  setOptions(options_inst)
34
36
  end
35
37
 
38
+ #List available options
36
39
  def list_options
37
40
  listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
38
41
  end
39
42
 
43
+ #Return a description from the Weka Javadoc for the selected classifier
40
44
  def description
41
45
  puts globalInfo
42
46
  end
43
47
 
48
+ # perform crossvalidation on a trained classifier
49
+ #ARGV:
50
+ #fold -> 'int' value
44
51
  def cross_validate(fold)
45
- eval = Weka::Classifier::Evaluation.new self.class.data
46
- eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
47
- eval.summary
52
+ if self.class.data
53
+ eval = Weka::Classifier::Evaluation.new self.class.data
54
+ eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
55
+ eval.summary
56
+ else
57
+ eval = Weka::Classifier::Evaluation.new @dataset
58
+ eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
59
+ eval.summary
60
+ end
48
61
  end
49
62
 
50
63
  #Class methods module
@@ -20,31 +20,49 @@ module Lazy_utils
20
20
  build_classifier(@dataset)
21
21
  end
22
22
 
23
+ #Set data for instance classifier
24
+ #ARGV
25
+ # data -> an Instances object
23
26
  def set_data(data)
24
27
  @dataset = data
25
28
  end
26
29
 
30
+ #Set a class index for the input dataset
27
31
  def set_class_index(class_index)
28
32
  @class_index = class_index
29
33
  end
30
34
 
35
+ #Set options for the selected classfier
36
+ #ARGS:
37
+ #options -> a String, i.e. "-K"
31
38
  def set_options(options)
32
39
  options_inst = Utils.splitOptions(options)
33
40
  setOptions(options_inst)
34
41
  end
35
42
 
43
+ #List available options
36
44
  def list_options
37
45
  listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
38
46
  end
39
47
 
48
+ #Return a description from the Weka JavaDoc for the selected classifier
40
49
  def description
41
50
  puts globalInfo
42
51
  end
43
52
 
53
+ # perform crossvalidation on a trained classifier
54
+ #ARGV:
55
+ #fold -> 'int' value
44
56
  def cross_validate(fold)
45
- eval = Weka::Classifier::Evaluation.new self.class.data
46
- eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
47
- eval.summary
57
+ if self.class.data
58
+ eval = Weka::Classifier::Evaluation.new self.class.data
59
+ eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
60
+ eval.summary
61
+ else
62
+ eval = Weka::Classifier::Evaluation.new @dataset
63
+ eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
64
+ eval.summary
65
+ end
48
66
  end
49
67
 
50
68
  #Class methods module
@@ -4,6 +4,7 @@ require 'mi_utils'
4
4
  module Weka
5
5
  module Classifier
6
6
  module Mi
7
+ #This module contains classifiers from the 'weka.classifiers.mi' package
7
8
  java_import 'weka.classifiers.mi.CitationKNN'
8
9
  java_import 'weka.classifiers.mi.MDD'
9
10
  java_import 'weka.classifiers.mi.MIBoost'
@@ -20,10 +20,14 @@ module Mi_utils
20
20
  build_classifier(@dataset)
21
21
  end
22
22
 
23
+ #Set input data for the selected classifier
24
+ #ARGV:
25
+ #data -> an Instances class object
23
26
  def set_data(data)
24
27
  @dataset = data
25
28
  end
26
29
 
30
+ #Set class index for the input dataset
27
31
  def set_class_index(class_index)
28
32
  @class_index = class_index
29
33
  end
@@ -33,18 +37,29 @@ module Mi_utils
33
37
  setOptions(options_inst)
34
38
  end
35
39
 
40
+ #List options for the selected classifier
36
41
  def list_options
37
42
  listOptions.each {|key| puts "#{key.synopsis} #{key.description}"}
38
43
  end
39
44
 
45
+ #Return a short description for the current classifier
40
46
  def description
41
47
  puts globalInfo
42
48
  end
43
49
 
50
+ # perform crossvalidation on a trained classifier
51
+ #ARGV:
52
+ #fold -> 'int' value
44
53
  def cross_validate(fold)
45
- eval = Weka::Classifier::Evaluation.new self.class.data
46
- eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
47
- eval.summary
54
+ if self.class.data
55
+ eval = Weka::Classifier::Evaluation.new self.class.data
56
+ eval.crossValidateModel(self.class.ancestors[2].new, self.class.data, fold.to_java(:int), Random.new(1))
57
+ eval.summary
58
+ else
59
+ eval = Weka::Classifier::Evaluation.new @dataset
60
+ eval.crossValidateModel(self.class.ancestors[1].new, @dataset, fold.to_java(:int), Random.new(1))
61
+ eval.summary
62
+ end
48
63
  end
49
64
 
50
65
  #Class methods module