ruby-band 0.1.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. data/.travis.yml +3 -0
  2. data/Gemfile +30 -0
  3. data/Gemfile.lock +119 -0
  4. data/Jarfile +9 -0
  5. data/Jarfile.lock +10 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +321 -0
  8. data/README.rdoc +70 -0
  9. data/Rakefile +66 -0
  10. data/VERSION +1 -0
  11. data/band_server/client.rb +35 -0
  12. data/band_server/client_alt.rb +35 -0
  13. data/band_server/first_dataset.csv +15 -0
  14. data/band_server/second_dataset.csv +15 -0
  15. data/band_server/simple_server.rb +90 -0
  16. data/band_server/third_dataset.csv +15 -0
  17. data/band_server/uploads/first_dataset.csv +15 -0
  18. data/band_server/uploads/second_dataset.csv +15 -0
  19. data/band_server/uploads/third_dataset.csv +15 -0
  20. data/bin/ruby-band +83 -0
  21. data/ext/mkrf_conf.rb +74 -0
  22. data/features/create_dataset.feature +12 -0
  23. data/features/step_definitions/create_dataset.rb +39 -0
  24. data/features/step_definitions/weka_classifiers.rb +43 -0
  25. data/features/step_definitions/weka_clustering.rb +34 -0
  26. data/features/step_definitions/weka_filters.rb +32 -0
  27. data/features/step_definitions/weka_parsers.rb +46 -0
  28. data/features/step_definitions/weka_pipeline.rb +41 -0
  29. data/features/support/env.rb +3 -0
  30. data/features/weka_classifiers.feature +16 -0
  31. data/features/weka_clustering.feature +15 -0
  32. data/features/weka_filters.feature +12 -0
  33. data/features/weka_parsers.feature +18 -0
  34. data/features/weka_pipeline.feature +14 -0
  35. data/lib/ruby-band.rb +12 -0
  36. data/lib/ruby-band/apache.rb +2 -0
  37. data/lib/ruby-band/apache/stat/correlation.rb +42 -0
  38. data/lib/ruby-band/apache/stat/inference.rb +151 -0
  39. data/lib/ruby-band/apache/stat/regression.rb +22 -0
  40. data/lib/ruby-band/core.rb +6 -0
  41. data/lib/ruby-band/core/parser/parser.rb +27 -0
  42. data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
  43. data/lib/ruby-band/core/type/attribute.rb +53 -0
  44. data/lib/ruby-band/core/type/instance.rb +10 -0
  45. data/lib/ruby-band/core/type/instances.rb +361 -0
  46. data/lib/ruby-band/core/type/utils.rb +31 -0
  47. data/lib/ruby-band/weka.rb +14 -0
  48. data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
  49. data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
  50. data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
  51. data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
  52. data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
  53. data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
  54. data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
  55. data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
  56. data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
  57. data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
  58. data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
  59. data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
  60. data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
  61. data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
  62. data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
  63. data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
  64. data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
  65. data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
  66. data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
  67. data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
  68. data/lib/ruby-band/weka/db/db.rb +74 -0
  69. data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
  70. data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
  71. data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
  72. data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
  73. data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
  74. data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
  75. data/resources/ReutersGrain-test.arff +611 -0
  76. data/resources/ReutersGrain-train.arff +1561 -0
  77. data/resources/weather.csv +15 -0
  78. data/resources/weather.numeric.arff +23 -0
  79. data/ruby-band.gemspec +178 -0
  80. data/spec/ruby-band_spec.rb +7 -0
  81. data/spec/spec_helper.rb +12 -0
  82. data/test/helper.rb +18 -0
  83. data/test/test_apacheCorrelation.rb +22 -0
  84. data/test/test_apacheInference.rb +46 -0
  85. data/test/test_ruby-band.rb +9 -0
  86. metadata +426 -0
@@ -0,0 +1,151 @@
1
+ require 'java'
2
+
3
+ module Apache
4
+ module Stat
5
+ module Inference
6
+
7
+ java_import 'org.apache.commons.math3.stat.inference.ChiSquareTest'
8
+ java_import 'org.apache.commons.math3.stat.inference.MannWhitneyUTest'
9
+ java_import 'org.apache.commons.math3.stat.inference.OneWayAnova'
10
+ java_import 'org.apache.commons.math3.stat.inference.TTest'
11
+ java_import 'org.apache.commons.math3.stat.inference.WilcoxonSignedRankTest'
12
+ java_import 'org.apache.commons.math3.stat.StatUtils'
13
+ java_import 'java.util.ArrayList'
14
+
15
+ # An implementation of the Wilcoxon signed-rank test
16
+ # * *Args* :
17
+ # - +Array1+ -> must be a RubyArray.
18
+ # - +Array2+ -> must be a RubyArray.
19
+ def self.wilcoxon_test(array_1,array_2)
20
+ obj = WilcoxonSignedRankTest.new
21
+ first = Core::Utils::double_to_a(array_1)
22
+ second = Core::Utils::double_to_a(array_2)
23
+ val = obj.wilcoxonSignedRank first, second
24
+ p_val = obj.wilcoxonSignedRankTest first, second, true.to_java(:boolean)
25
+ return val,p_val
26
+ end
27
+
28
+ # Utility class called by 'chi_square' method in this same package
29
+ class Chi_square
30
+ def self.chi_square_2d(array_2d)
31
+ obj = ChiSquareTest.new
32
+ val = obj.chi_square(array_2d.to_java(Java::long[]))
33
+ p_value = obj.chi_square_test(array_2d.to_java(Java::long[]))
34
+ return val,p_value
35
+ end
36
+
37
+ def self.chi_square_two_arrays(expected,observed)
38
+ obj = ChiSquareTest.new
39
+ val = obj.chi_square(expected.to_java(:double),observed.to_java(:long))
40
+ p_value = obj.chi_square_test(expected.to_java(:double),observed.to_java(:long))
41
+ return val,p_value
42
+ end
43
+ end
44
+
45
+ # 1) Computes the Chi-Square statistic comparing observed and expected frequency counts.
46
+ # * *Args* :
47
+ # - +Array+ -> must be a bidimensional RubyArray.
48
+ # 2) Computes the Chi-Square statistic associated with a chi-square test of independence
49
+ # based on the input counts array, viewed as a two-way table.
50
+ # * *Args* :
51
+ # - +Array1+ -> must be a RubyArray.
52
+ # - +Array2+ -> must be a RubyArray.
53
+ def self.chi_square(*args)
54
+ if args.length == 2
55
+ Chi_square.chi_square_two_arrays(*args)
56
+ elsif args.length == 1
57
+ raise ArgumentError,"RubyArray must be bidimensional" unless args[0].is_2d?
58
+ Chi_square.chi_square_2d(*args)
59
+ else
60
+ raise ArgumentError, 'Function *args should be two RubyArrays or a bidimensional RubyArray'
61
+ end
62
+ end
63
+
64
+ # Compare two datasets stored in Ruby Arrays
65
+ def self.chi_square_dataset_compare(observed1,observed2)
66
+ obj = ChiSquareTest.new
67
+ val = obj.chiSquareDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
68
+ p_value = obj.chiSquareTestDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
69
+ return val,p_value
70
+ end
71
+
72
+ # An implementation of the Mann-Whitney U test
73
+ # (also called Wilcoxon rank-sum test)
74
+ # * *Args* :
75
+ # - +Array1+ -> must be a RubyArray.
76
+ # - +Array2+ -> must be a RubyArray.
77
+ def self.mann_whitney_u(array1,array2)
78
+ obj = MannWhitneyUTest.new
79
+ first = array1.to_java :double
80
+ second = array2.to_java :double
81
+ value = obj.mannWhitneyU first,second
82
+ p_value = obj.mannWhitneyUTest first,second
83
+ return value,p_value
84
+ end
85
+
86
+ #Utility class called by 't_test' method in this same package
87
+ class T_test
88
+
89
+ def self.homoscedastic(array_1,array_2)
90
+ obj = TTest.new
91
+ first = array_1.to_java :double
92
+ second = array_2.to_java :double
93
+ value = obj.homoscedasticT(first,second)
94
+ p_value = obj.homoscedasticTTest(first,second)
95
+ return value, p_value
96
+ end
97
+
98
+ def self.paired(array_1,array_2)
99
+ obj = TTest.new
100
+ first = array_1.to_java :double
101
+ second =array_2.to_java :double
102
+ value = obj.pairedT(first,second)
103
+ p_value = obj.pairedTTest(first,second)
104
+ return value,p_value
105
+ end
106
+
107
+ def self.t(array_1,array_2)
108
+ obj = TTest.new
109
+ first = array_1.to_java :double
110
+ second =array_2.to_java :double
111
+ value = obj.t(first,second)
112
+ p_value =obj.tTest(first,second)
113
+ return value,p_value
114
+ end
115
+ end
116
+
117
+ # An implementation for Student's t-tests
118
+ # * *Args* :
119
+ # - +sample_1+ -> an array of numeric values representing a sample
120
+ # - +sample_2+ -> an array of numeric values representing a sample
121
+ # - +homoscedastic+ -> set to true for equal variance assumption
122
+ # - +paired+ -> set to true if you want to perform a 'paired' t test
123
+ def self.t_test(sample_1,sample_2,homoscedastic=false,paired=false)
124
+ if homoscedastic == true
125
+ T_test.homoscedastic(sample_1,sample_2)
126
+ elsif paired == true
127
+ T_test.paired(sample_1,sample_2)
128
+ else
129
+ T_test.t(sample_1,sample_2)
130
+ end
131
+ end
132
+
133
+ # Implements one-way ANOVA (analysis of variance) statistics.
134
+ # Tests for differences between two or more categories of univariate data (for example,
135
+ # the body mass index of accountants, lawyers, doctors and computer programmers). When
136
+ # two categories are given, this is equivalent to the TTest.
137
+ # * *Args* :
138
+ # - +bidimensional_array+ -> a 2d RubyArray
139
+ def self.one_way_anova(bidimensional_array)
140
+ collection = ArrayList.new
141
+ bidimensional_array.each do |array|
142
+ collection.add(array.to_java :double)
143
+ end
144
+ obj = OneWayAnova.new
145
+ f_value = obj.anovaFValue(collection)
146
+ p_value = obj.anovaPValue(collection)
147
+ return f_value,p_value
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,22 @@
1
+ require 'java'
2
+
3
+ module Apache
4
+ module Stat
5
+ module Regression
6
+ java_import "org.apache.commons.math3.stat.regression.SimpleRegression"
7
+
8
+ # Create a simple regression model on the input data
9
+ # * *Args* :
10
+ # - +vector+ -> must be a multidimensional array
11
+ def self.simple_regression(vector)
12
+ data = Core::Utils.bidimensional_to_double vector
13
+ obj = SimpleRegression.new
14
+ obj.addData(data)
15
+ return obj
16
+ # add Jruby methods for regression analysis
17
+ end
18
+
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,6 @@
1
+ require 'ruby-band/core/type/instances'
2
+ require 'ruby-band/core/type/instance'
3
+ require 'ruby-band/core/parser/parser'
4
+ require 'ruby-band/core/type/utils'
5
+ require 'ruby-band/core/type/attribute'
6
+ require 'ruby-band/core/type/apache_matrices'
@@ -0,0 +1,27 @@
1
+ require 'java'
2
+ java_import 'weka.core.converters.CSVLoader'
3
+ java_import 'weka.core.converters.ArffLoader'
4
+
5
+ module Core
6
+ module Parser
7
+ # Parse an ARFF file and create an Instances object
8
+ def Parser.parse_ARFF(arff_file)
9
+ java_import 'java.io.File'
10
+ loader = ArffLoader.new
11
+ file = File.new arff_file
12
+ loader.setSource(file)
13
+ data_instance = loader.getDataSet
14
+ return data_instance
15
+ end
16
+
17
+ # Parse an CSV file and create an Instances object
18
+ def Parser.parse_CSV(csv_file)
19
+ java_import 'java.io.File'
20
+ loader = CSVLoader.new
21
+ file = File.new csv_file
22
+ loader.setSource(file)
23
+ data_instance = loader.getDataSet
24
+ return data_instance
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,35 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Type
5
+
6
+ java_import 'org.apache.commons.math3.linear.BlockRealMatrix'
7
+ java_import 'org.apache.commons.math3.linear.Array2DRowRealMatrix'
8
+
9
+ #Define variables to use ruby-like names instead of Java's
10
+ Apache_matrix = Array2DRowRealMatrix
11
+ Apache_matrix_block = BlockRealMatrix
12
+
13
+ #* *Description* :
14
+ #Linear algebra support in commons-math provides operations on real matrices (both dense
15
+ #and sparse matrices are supported) and vectors. It features basic operations (addition, subtraction ...)
16
+ #and decomposition algorithms that can be used to solve linear systems either in exact sense and
17
+ #in least squares sense.
18
+ #The 'Apache_matrix' class represents a matrix with real numbers as entries.
19
+ #The following basic matrix operations are supported:
20
+ #- Matrix addition, subtraction, multiplication
21
+ #- Scalar addition and multiplication
22
+ #- Transpose
23
+ #- Norm and trace
24
+ #- Operation on a vector
25
+ class Apache_matrix
26
+
27
+ end
28
+
29
+ # Apache matrix implementation suited to dimensions above 50 or 100
30
+ class Apache_matrix_block
31
+
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,53 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Type
5
+
6
+ java_import "weka.core.Attribute"
7
+ java_import "weka.core.FastVector"
8
+
9
+ class Attribute
10
+
11
+ end
12
+
13
+ # Return an Numeric Attribute class object
14
+ # * *Args* :
15
+ # - +name_of_attr+ -> a String, the name of the attribute
16
+ def self.create_numeric_attr(name_of_attr)
17
+ numeric = Attribute.new name_of_attr
18
+ return numeric
19
+ end
20
+
21
+ # Return an Date Attribute class object
22
+ # * *Args* :
23
+ # - +name_of_attr+ -> a String, the name of the attribute
24
+ # - +format+ -> The format of the attribute
25
+ def self.create_date_attr(name_of_attr,format)
26
+ date = Attribute.new(name_of_attr,format)
27
+ return date
28
+ end
29
+
30
+ # Return a Nominal Attribute class object
31
+ # * *Args* :
32
+ # - +name_of_attr+ -> a String, the name of the attribute
33
+ # - +values_list+ -> An array, the list of nominal values
34
+ def self.create_nominal_attr(name_of_attr,values_list)
35
+ labels = FastVector.new
36
+ values_list.each {|value| labels.addElement(value)}
37
+ nominal = Attribute.new(name_of_attr,labels)
38
+ return nominal
39
+ end
40
+
41
+ # Return a String Attribute class object
42
+ # * *Args* :
43
+ # - +name_of_attr+ -> a String, the name of the attribute
44
+ def self.create_string_attr(name_of_attr)
45
+ construct = Attribute.java_class.constructor(Java::java.lang.String,Java::weka.core.FastVector)
46
+ string = construct.new_instance(name_of_attr,nil).to_java
47
+ return string
48
+ end
49
+ end
50
+
51
+ end
52
+
53
+
@@ -0,0 +1,10 @@
1
+ module Core
2
+ module Type
3
+
4
+ java_import "weka.core.FastVector"
5
+ java_import "weka.core.Instance"
6
+
7
+ class Instance
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,361 @@
1
+ require 'java'
2
+ require 'ruport'
3
+ require 'json'
4
+
5
+ module Core
6
+
7
+ java_import "weka.core.SerializationHelper"
8
+ module Type
9
+
10
+ java_import "weka.core.Instances"
11
+ java_import 'java.io.File'
12
+ java_import 'weka.core.converters.CSVSaver'
13
+ java_import 'weka.core.converters.ArffSaver'
14
+ java_import "weka.core.FastVector"
15
+ java_import "weka.core.Instance"
16
+
17
+ #
18
+ # * *Description* :
19
+ # This is the main class from the Weka package for data handling. It is essentially a matrix: each row
20
+ # is an instance of the 'Instance' class, while each column is an instance of the 'Attribute' class
21
+ # The class 'Instances' is here extended to add custom functionalities
22
+ class Instances
23
+
24
+ # Convert an Instances object to a bidimensional Ruby array
25
+ # where each row corresponds to an Instance object
26
+ def to_a2d
27
+ matrix = Array.new
28
+ att = Array.new
29
+ self.enumerateAttributes.each_with_index do |a,idx|
30
+ if a.isNumeric
31
+ enumerate_instances.each {|s| att << s.value(s.attribute(idx))}
32
+ matrix << att
33
+ att = Array.new
34
+ else
35
+ enumerateInstances.each do |inst|
36
+ att << inst.string_value(idx)
37
+ end
38
+ matrix << att
39
+ att = Array.new
40
+ end
41
+ end
42
+ return matrix.transpose
43
+ end
44
+
45
+ # Return the number of rows (Instance objects) in the dataset
46
+ def n_rows
47
+ return numInstances
48
+ end
49
+
50
+ # Return the number of columns (Attribute objects) in the dataset
51
+ def n_col
52
+ return numAttributes
53
+ end
54
+
55
+ # Return the dimensions of the dataset (for the current Instances class object)
56
+ def dim
57
+ puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
58
+ end
59
+
60
+ def each_row
61
+ enumerate_instances.each {|inst| yield(inst)}
62
+ end
63
+
64
+ def each_row_with_index
65
+ enumerate_instances.each_with_index {|inst,id| yield(inst,id)}
66
+ end
67
+
68
+ def each_column
69
+ enumerate_attributes.each {|attribute| yield(attribute)}
70
+ end
71
+
72
+ def each_column_with_index
73
+ enumerate_attributes.each_with_index {|attribute,id| yield(attribute,id)}
74
+ end
75
+
76
+ # Check if this instance's attributes are all Numeric
77
+ def check_numeric_instance
78
+ enumerateAttributes.each do |att|
79
+ unless att.isNumeric
80
+ raise ArgumentError, "Sorry, attribute '#{att.name}' is not numeric!"
81
+ end
82
+ end
83
+ end
84
+
85
+ # Convert the present Instances object to an Apache matrix if every Instances attribute
86
+ # is Numeric
87
+ def to_Apache_matrix
88
+ check_numeric_instance
89
+ ruby_array = to_a
90
+ java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
91
+ return Core::Type::Apache_matrix.new(java_double_array)
92
+ end
93
+
94
+ # Convert the present Instances object to an Apache matrix (block) if every Instances attribute
95
+ # is Numeric
96
+ def to_Apache_matrix_block
97
+ check_numeric_instance
98
+ ruby_array = to_a
99
+ java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
100
+ return Core::Type::Apache_matrix_block.new(java_double_array)
101
+ end
102
+
103
+ # Return data for a single attribute (a column from the Instances object)
104
+ # * *Args* :
105
+ # - +att+ -> a String, the name of the attribute
106
+ def return_attr_data(att)
107
+ attr_values = Array.new
108
+ if attribute(att).isNumeric
109
+ enumerateInstances.each do |i|
110
+ attr_values << i.value(attribute(att))
111
+ end
112
+ else
113
+ attr_index = attribute(att).index
114
+ enumerateInstances.each do |inst|
115
+ attr_values << inst.string_value(attr_index)
116
+ end
117
+ end
118
+ return attr_values
119
+ end
120
+
121
+ # Return the mean value of a single attribute (a column from the Instances object)
122
+ # * *Args* :
123
+ # - +attribute_name+ -> a String, the name of the attribute
124
+ def mean(attribute_name)
125
+ sum = enumerateInstances.inject(0) do |s,x|
126
+ s+=x.value(attribute(attribute_name))
127
+ end
128
+ return sum/(numInstances*1.0)
129
+ end
130
+
131
+ # Return the variance of a single attribute (a column from the Instances object)
132
+ # * *Args* :
133
+ # - +attribute_name+ -> a String, the name of the attribute
134
+ def variance(attribute_name)
135
+ enumerateAttributes.each_with_idx do |att,idx|
136
+ return variance(idx) if att.name==attribute_name
137
+ end
138
+ end
139
+
140
+ # Write the content of the current Instances object to a .csv file
141
+ # * *Args* :
142
+ # - +out_file+ -> a String, the name of the output file
143
+ def to_CSV(out_file)
144
+ saver = CSVSaver.new
145
+ saver.setInstances(self)
146
+ out_file = File.new out_file
147
+ saver.setFile(out_file);
148
+ saver.writeBatch();
149
+ end
150
+
151
+ # Write the content of the current Instances object to a .arff file
152
+ # * *Args* :
153
+ # - +out_file+ -> a String, the name of the output file
154
+ def to_ARFF(out_file)
155
+ saver = ArffSaver.new
156
+ saver.setInstances(self)
157
+ out_file = File.new out_file
158
+ saver.setFile(out_file);
159
+ saver.writeBatch();
160
+ end
161
+
162
+ def insert_attribute(attribute_value,position)
163
+ att=attribute_value
164
+ if self.attribute(position).isNumeric
165
+ return attribute_value
166
+ elsif self.attribute(position).isNominal
167
+ idx = self.attribute(position).indexOfValue(attribute_value)
168
+ return idx
169
+ elsif self.attribute(position).isDate
170
+ date = self.attribute(position).ParseDate(attribute_value)
171
+ return date
172
+ else
173
+ puts 'Attribute type is unknown!'
174
+ end
175
+ end
176
+ private :insert_attribute
177
+
178
+ # (check function): should check that the array is bidimensional and that
179
+ # the lengths are equal
180
+ def check_array(data)
181
+ return true # still to be done
182
+ end
183
+
184
+ # An entire dataset is inserted 'by row' into the current Instances object
185
+ # i.e. one Instance object is inserted at the time
186
+ # * *Args* :
187
+ # - +data+ -> a bidimensional array
188
+ def populate_by_row(data)
189
+ unless check_array(data) == false
190
+ data.each do |row|
191
+ add_instance(row)
192
+ end
193
+ end
194
+ end
195
+
196
+ # An Instance instance object (one row) is inserted into the current Instances object
197
+ # * *Args* :
198
+ # - +instance+ -> an array of values of the correct data type (:nominal,:numeric,etc...)
199
+ def add_instance(instance)
200
+ data_ref=Array.new
201
+ instance.each_with_index do |attribute,idx|
202
+ data_ref << insert_attribute(attribute,idx)
203
+ end
204
+ double_array = data_ref.to_java :double
205
+ single_row = Instance.new(1.0, double_array)
206
+ self.add(single_row)
207
+ end
208
+
209
+ # An Attribute instance object is inserted into the current Instances object
210
+ # * *Args* :
211
+ # - +attribute_name+ -> A name for the new attribute
212
+ # * *WARNING* :
213
+ # This method only creates an empty attribute field
214
+ def add_numeric_attribute(attribute_name)
215
+ insertAttributeAt(Attribute.new(attribute_name), self.numAttributes)
216
+ end
217
+
218
+ # An Attribute instance object is inserted into the current Instances object
219
+ # * *Args* :
220
+ # - +attribute_name+ -> A name for the new attribute
221
+ # - +values+ -> RubyArray with nominal values
222
+ # * *WARNING* :
223
+ # This method only creates an empty attribute field
224
+ def add_nominal_attribute(attribute,list_values)
225
+ values = FastVector.new
226
+ list_values.each do |val|
227
+ values.addElement(val)
228
+ end
229
+ insertAttributeAt(Attribute.new(attribute, values), self.numAttributes)
230
+ end
231
+
232
+ #Print to STDOUT the list of the Instances's attributes (with the corresponding types)
233
+ def summary
234
+ summary = Ruport::Data::Table::new
235
+ summary.add_column 'Attributes'
236
+ enumerateAttributes.each_with_index do |att,idx|
237
+ summary.add_column idx
238
+ end
239
+
240
+ att_names = ['Names']
241
+ enumerateAttributes.each do |att|
242
+ att_names << "'#{att.name}'"
243
+ end
244
+ summary << att_names
245
+
246
+ att_types = ['Types']
247
+ enumerateAttributes.each do |att|
248
+ att_types << "Numeric" if att.isNumeric
249
+ att_types << "Nominal" if att.isNominal
250
+ att_types << "Date" if att.isDate
251
+ att_types << "String" if att.isString
252
+ end
253
+ summary << att_types
254
+
255
+ display = []
256
+ display << summary
257
+
258
+ unless enumerate_instances.nil?
259
+ count=0
260
+ enumerateInstances.each {|inst| count=count+1}
261
+ display << "\nNumber of rows: #{count}"
262
+ end
263
+ display
264
+ end
265
+
266
+ # Merges two sets of Instances together. The resulting set will have all the
267
+ # attributes of the first set plus all the attributes of the second set. The
268
+ # number of instances in both sets must be the same.
269
+ # * *Args* :
270
+ # - +instances+ -> An Instances class object
271
+ def merge_with(instances)
272
+ return Instances.mergeInstances(self,instances)
273
+ end
274
+
275
+ # This method creates an Instances object (see Cucumber documentation for further details)
276
+ # def self.create
277
+ # name = 'Instances'
278
+ # instances = Core::Type.create_instances(name,@@positions)
279
+ # return instances
280
+ # end
281
+
282
+ # This method is used for attributes definition in uninitialized Instances-derived classes
283
+ def att(attr_type,name,*values)
284
+ att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
285
+ att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
286
+ att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
287
+ att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
288
+ @positions << att
289
+ end
290
+
291
+ # This method is used for Nominal attributes definition in uninitialized Instances-derived classes
292
+ # * *Args* :
293
+ # - +name+ -> Attribute name, a String
294
+ # - +values+ -> An array of values for the nominal attribute
295
+ def nominal(name,values)
296
+ att :nominal, name, values
297
+ end
298
+
299
+ # This method is used for Numeric attributes definition in uninitialized Instances-derived classes
300
+ # * *Args* :
301
+ # - +name+ -> Attribute name, a String
302
+ def numeric(name)
303
+ att :numeric, name
304
+ end
305
+
306
+ # This method is used for Date attributes definition in uninitialized Instances-derived classes
307
+ # * *Args* :
308
+ # - +name+ -> Attribute name, a String
309
+ def date(name)
310
+ att :date, name
311
+ end
312
+
313
+ # This method is used for String attributes definition in uninitialized Instances-derived classes
314
+ # * *Args* :
315
+ # - +name+ -> Attribute name, a String
316
+ def string(name)
317
+ att :string, name
318
+ end
319
+
320
+ # Class used for the creation of a new dataset (Instances class)
321
+ class Base < Instances
322
+ def initialize(&block)
323
+ attributes_vector = FastVector.new
324
+ @positions = []
325
+ self.instance_eval(&block) if block
326
+ @positions.each {|value| attributes_vector.addElement(value)}
327
+ super('Instances',attributes_vector,0)
328
+ end
329
+ end
330
+
331
+ # Return a json String for the current Instances object
332
+ # The output is modeled on the 'datatable' Google charts APIs
333
+ # More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
334
+ def to_json_format
335
+ dataset_hash = Hash.new
336
+ dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
337
+ dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
338
+ return JSON.pretty_generate(dataset_hash)
339
+ end
340
+ end #Instances class
341
+
342
+ # Create an Instances object
343
+ # * *Args* :
344
+ # - +name+ -> A name for the Instances object
345
+ # - +attributes+ -> An array containing Attribute objects
346
+ def Type.create_instances(name,attributes)
347
+ attributes_vector = FastVector.new
348
+ attributes.each {|value| attributes_vector.addElement(value)}
349
+ return Instances.new(name,attributes_vector,0)
350
+ end
351
+
352
+ end
353
+ # Helper class for serialization
354
+ # Works with classifiers, filters, clusterers...
355
+ class SerializationHelper
356
+ end
357
+
358
+ end
359
+
360
+
361
+