ruby-band 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.travis.yml +3 -0
  2. data/Gemfile +30 -0
  3. data/Gemfile.lock +119 -0
  4. data/Jarfile +9 -0
  5. data/Jarfile.lock +10 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +321 -0
  8. data/README.rdoc +70 -0
  9. data/Rakefile +66 -0
  10. data/VERSION +1 -0
  11. data/band_server/client.rb +35 -0
  12. data/band_server/client_alt.rb +35 -0
  13. data/band_server/first_dataset.csv +15 -0
  14. data/band_server/second_dataset.csv +15 -0
  15. data/band_server/simple_server.rb +90 -0
  16. data/band_server/third_dataset.csv +15 -0
  17. data/band_server/uploads/first_dataset.csv +15 -0
  18. data/band_server/uploads/second_dataset.csv +15 -0
  19. data/band_server/uploads/third_dataset.csv +15 -0
  20. data/bin/ruby-band +83 -0
  21. data/ext/mkrf_conf.rb +74 -0
  22. data/features/create_dataset.feature +12 -0
  23. data/features/step_definitions/create_dataset.rb +39 -0
  24. data/features/step_definitions/weka_classifiers.rb +43 -0
  25. data/features/step_definitions/weka_clustering.rb +34 -0
  26. data/features/step_definitions/weka_filters.rb +32 -0
  27. data/features/step_definitions/weka_parsers.rb +46 -0
  28. data/features/step_definitions/weka_pipeline.rb +41 -0
  29. data/features/support/env.rb +3 -0
  30. data/features/weka_classifiers.feature +16 -0
  31. data/features/weka_clustering.feature +15 -0
  32. data/features/weka_filters.feature +12 -0
  33. data/features/weka_parsers.feature +18 -0
  34. data/features/weka_pipeline.feature +14 -0
  35. data/lib/ruby-band.rb +12 -0
  36. data/lib/ruby-band/apache.rb +2 -0
  37. data/lib/ruby-band/apache/stat/correlation.rb +42 -0
  38. data/lib/ruby-band/apache/stat/inference.rb +151 -0
  39. data/lib/ruby-band/apache/stat/regression.rb +22 -0
  40. data/lib/ruby-band/core.rb +6 -0
  41. data/lib/ruby-band/core/parser/parser.rb +27 -0
  42. data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
  43. data/lib/ruby-band/core/type/attribute.rb +53 -0
  44. data/lib/ruby-band/core/type/instance.rb +10 -0
  45. data/lib/ruby-band/core/type/instances.rb +361 -0
  46. data/lib/ruby-band/core/type/utils.rb +31 -0
  47. data/lib/ruby-band/weka.rb +14 -0
  48. data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
  49. data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
  50. data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
  51. data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
  52. data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
  53. data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
  54. data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
  55. data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
  56. data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
  57. data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
  58. data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
  59. data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
  60. data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
  61. data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
  62. data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
  63. data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
  64. data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
  65. data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
  66. data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
  67. data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
  68. data/lib/ruby-band/weka/db/db.rb +74 -0
  69. data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
  70. data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
  71. data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
  72. data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
  73. data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
  74. data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
  75. data/resources/ReutersGrain-test.arff +611 -0
  76. data/resources/ReutersGrain-train.arff +1561 -0
  77. data/resources/weather.csv +15 -0
  78. data/resources/weather.numeric.arff +23 -0
  79. data/ruby-band.gemspec +178 -0
  80. data/spec/ruby-band_spec.rb +7 -0
  81. data/spec/spec_helper.rb +12 -0
  82. data/test/helper.rb +18 -0
  83. data/test/test_apacheCorrelation.rb +22 -0
  84. data/test/test_apacheInference.rb +46 -0
  85. data/test/test_ruby-band.rb +9 -0
  86. metadata +426 -0
@@ -0,0 +1,151 @@
1
+ require 'java'
2
+
3
+ module Apache
4
+ module Stat
5
+ module Inference
6
+
7
+ java_import 'org.apache.commons.math3.stat.inference.ChiSquareTest'
8
+ java_import 'org.apache.commons.math3.stat.inference.MannWhitneyUTest'
9
+ java_import 'org.apache.commons.math3.stat.inference.OneWayAnova'
10
+ java_import 'org.apache.commons.math3.stat.inference.TTest'
11
+ java_import 'org.apache.commons.math3.stat.inference.WilcoxonSignedRankTest'
12
+ java_import 'org.apache.commons.math3.stat.StatUtils'
13
+ java_import 'java.util.ArrayList'
14
+
15
+ # An implementation of the Wilcoxon signed-rank test
16
+ # * *Args* :
17
+ # - +Array1+ -> must be a RubyArray.
18
+ # - +Array2+ -> must be a RubyArray.
19
+ def self.wilcoxon_test(array_1,array_2)
20
+ obj = WilcoxonSignedRankTest.new
21
+ first = Core::Utils::double_to_a(array_1)
22
+ second = Core::Utils::double_to_a(array_2)
23
+ val = obj.wilcoxonSignedRank first, second
24
+ p_val = obj.wilcoxonSignedRankTest first, second, true.to_java(:boolean)
25
+ return val,p_val
26
+ end
27
+
28
+ # Utility class called by 'chi_square' method in this same package
29
+ class Chi_square
30
+ def self.chi_square_2d(array_2d)
31
+ obj = ChiSquareTest.new
32
+ val = obj.chi_square(array_2d.to_java(Java::long[]))
33
+ p_value = obj.chi_square_test(array_2d.to_java(Java::long[]))
34
+ return val,p_value
35
+ end
36
+
37
+ def self.chi_square_two_arrays(expected,observed)
38
+ obj = ChiSquareTest.new
39
+ val = obj.chi_square(expected.to_java(:double),observed.to_java(:long))
40
+ p_value = obj.chi_square_test(expected.to_java(:double),observed.to_java(:long))
41
+ return val,p_value
42
+ end
43
+ end
44
+
45
+ # 1) Computes the Chi-Square statistic comparing observed and expected frequency counts.
46
+ # * *Args* :
47
+ # - +Array+ -> must be a bidimensional RubyArray.
48
+ # 2) Computes the Chi-Square statistic associated with a chi-square test of independence
49
+ # based on the input counts array, viewed as a two-way table.
50
+ # * *Args* :
51
+ # - +Array1+ -> must be a RubyArray.
52
+ # - +Array2+ -> must be a RubyArray.
53
+ def self.chi_square(*args)
54
+ if args.length == 2
55
+ Chi_square.chi_square_two_arrays(*args)
56
+ elsif args.length == 1
57
+ raise ArgumentError,"RubyArray must be bidimensional" unless args[0].is_2d?
58
+ Chi_square.chi_square_2d(*args)
59
+ else
60
+ raise ArgumentError, 'Function *args should be two RubyArrays or a bidimensional RubyArray'
61
+ end
62
+ end
63
+
64
+ # Compare two datasets stored in Ruby Arrays
65
+ def self.chi_square_dataset_compare(observed1,observed2)
66
+ obj = ChiSquareTest.new
67
+ val = obj.chiSquareDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
68
+ p_value = obj.chiSquareTestDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
69
+ return val,p_value
70
+ end
71
+
72
+ # An implementation of the Mann-Whitney U test
73
+ # (also called Wilcoxon rank-sum test)
74
+ # * *Args* :
75
+ # - +Array1+ -> must be a RubyArray.
76
+ # - +Array2+ -> must be a RubyArray.
77
+ def self.mann_whitney_u(array1,array2)
78
+ obj = MannWhitneyUTest.new
79
+ first = array1.to_java :double
80
+ second = array2.to_java :double
81
+ value = obj.mannWhitneyU first,second
82
+ p_value = obj.mannWhitneyUTest first,second
83
+ return value,p_value
84
+ end
85
+
86
+ #Utility class called by 't_test' method in this same package
87
+ class T_test
88
+
89
+ def self.homoscedastic(array_1,array_2)
90
+ obj = TTest.new
91
+ first = array_1.to_java :double
92
+ second = array_2.to_java :double
93
+ value = obj.homoscedasticT(first,second)
94
+ p_value = obj.homoscedasticTTest(first,second)
95
+ return value, p_value
96
+ end
97
+
98
+ def self.paired(array_1,array_2)
99
+ obj = TTest.new
100
+ first = array_1.to_java :double
101
+ second =array_2.to_java :double
102
+ value = obj.pairedT(first,second)
103
+ p_value = obj.pairedTTest(first,second)
104
+ return value,p_value
105
+ end
106
+
107
+ def self.t(array_1,array_2)
108
+ obj = TTest.new
109
+ first = array_1.to_java :double
110
+ second =array_2.to_java :double
111
+ value = obj.t(first,second)
112
+ p_value =obj.tTest(first,second)
113
+ return value,p_value
114
+ end
115
+ end
116
+
117
+ # An implementation for Student's t-tests
118
+ # * *Args* :
119
+ # - +sample_1+ -> an array of numeric values representing a sample
120
+ # - +sample_2+ -> an array of numeric values representing a sample
121
+ # - +homoscedastic+ -> set to true for equal variance assumption
122
+ # - +paired+ -> set to true if you want to perform a 'paired' t test
123
+ def self.t_test(sample_1,sample_2,homoscedastic=false,paired=false)
124
+ if homoscedastic == true
125
+ T_test.homoscedastic(sample_1,sample_2)
126
+ elsif paired == true
127
+ T_test.paired(sample_1,sample_2)
128
+ else
129
+ T_test.t(sample_1,sample_2)
130
+ end
131
+ end
132
+
133
+ # Implements one-way ANOVA (analysis of variance) statistics.
134
+ # Tests for differences between two or more categories of univariate data (for example,
135
+ # the body mass index of accountants, lawyers, doctors and computer programmers). When
136
+ # two categories are given, this is equivalent to the TTest.
137
+ # * *Args* :
138
+ # - +bidimensional_array+ -> a 2d RubyArray
139
+ def self.one_way_anova(bidimensional_array)
140
+ collection = ArrayList.new
141
+ bidimensional_array.each do |array|
142
+ collection.add(array.to_java :double)
143
+ end
144
+ obj = OneWayAnova.new
145
+ f_value = obj.anovaFValue(collection)
146
+ p_value = obj.anovaPValue(collection)
147
+ return f_value,p_value
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,22 @@
1
+ require 'java'
2
+
3
+ module Apache
4
+ module Stat
5
+ module Regression
6
+ java_import "org.apache.commons.math3.stat.regression.SimpleRegression"
7
+
8
+ # Create a simple regression model on the input data
9
+ # * *Args* :
10
+ # - +vector+ -> must be a multidimensional array
11
+ def self.simple_regression(vector)
12
+ data = Core::Utils.bidimensional_to_double vector
13
+ obj = SimpleRegression.new
14
+ obj.addData(data)
15
+ return obj
16
+ # add Jruby methods for regression analysis
17
+ end
18
+
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,6 @@
1
+ require 'ruby-band/core/type/instances'
2
+ require 'ruby-band/core/type/instance'
3
+ require 'ruby-band/core/parser/parser'
4
+ require 'ruby-band/core/type/utils'
5
+ require 'ruby-band/core/type/attribute'
6
+ require 'ruby-band/core/type/apache_matrices'
@@ -0,0 +1,27 @@
1
+ require 'java'
2
+ java_import 'weka.core.converters.CSVLoader'
3
+ java_import 'weka.core.converters.ArffLoader'
4
+
5
+ module Core
6
+ module Parser
7
+ # Parse an ARFF file and create an Instances object
8
+ def Parser.parse_ARFF(arff_file)
9
+ java_import 'java.io.File'
10
+ loader = ArffLoader.new
11
+ file = File.new arff_file
12
+ loader.setSource(file)
13
+ data_instance = loader.getDataSet
14
+ return data_instance
15
+ end
16
+
17
+ # Parse an CSV file and create an Instances object
18
+ def Parser.parse_CSV(csv_file)
19
+ java_import 'java.io.File'
20
+ loader = CSVLoader.new
21
+ file = File.new csv_file
22
+ loader.setSource(file)
23
+ data_instance = loader.getDataSet
24
+ return data_instance
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,35 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Type
5
+
6
+ java_import 'org.apache.commons.math3.linear.BlockRealMatrix'
7
+ java_import 'org.apache.commons.math3.linear.Array2DRowRealMatrix'
8
+
9
+ #Define variables to use ruby-like names instead of Java's
10
+ Apache_matrix = Array2DRowRealMatrix
11
+ Apache_matrix_block = BlockRealMatrix
12
+
13
+ #* *Description* :
14
+ #Linear algebra support in commons-math provides operations on real matrices (both dense
15
+ #and sparse matrices are supported) and vectors. It features basic operations (addition, subtraction ...)
16
+ #and decomposition algorithms that can be used to solve linear systems either in exact sense and
17
+ #in least squares sense.
18
+ #The 'Apache_matrix' class represents a matrix with real numbers as entries.
19
+ #The following basic matrix operations are supported:
20
+ #- Matrix addition, subtraction, multiplication
21
+ #- Scalar addition and multiplication
22
+ #- Transpose
23
+ #- Norm and trace
24
+ #- Operation on a vector
25
+ class Apache_matrix
26
+
27
+ end
28
+
29
+ # Apache matrix implementation suited to dimensions above 50 or 100
30
+ class Apache_matrix_block
31
+
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,53 @@
1
+ require 'java'
2
+
3
+ module Core
4
+ module Type
5
+
6
+ java_import "weka.core.Attribute"
7
+ java_import "weka.core.FastVector"
8
+
9
+ class Attribute
10
+
11
+ end
12
+
13
+ # Return an Numeric Attribute class object
14
+ # * *Args* :
15
+ # - +name_of_attr+ -> a String, the name of the attribute
16
+ def self.create_numeric_attr(name_of_attr)
17
+ numeric = Attribute.new name_of_attr
18
+ return numeric
19
+ end
20
+
21
+ # Return an Date Attribute class object
22
+ # * *Args* :
23
+ # - +name_of_attr+ -> a String, the name of the attribute
24
+ # - +format+ -> The format of the attribute
25
+ def self.create_date_attr(name_of_attr,format)
26
+ date = Attribute.new(name_of_attr,format)
27
+ return date
28
+ end
29
+
30
+ # Return a Nominal Attribute class object
31
+ # * *Args* :
32
+ # - +name_of_attr+ -> a String, the name of the attribute
33
+ # - +values_list+ -> An array, the list of nominal values
34
+ def self.create_nominal_attr(name_of_attr,values_list)
35
+ labels = FastVector.new
36
+ values_list.each {|value| labels.addElement(value)}
37
+ nominal = Attribute.new(name_of_attr,labels)
38
+ return nominal
39
+ end
40
+
41
+ # Return a String Attribute class object
42
+ # * *Args* :
43
+ # - +name_of_attr+ -> a String, the name of the attribute
44
+ def self.create_string_attr(name_of_attr)
45
+ construct = Attribute.java_class.constructor(Java::java.lang.String,Java::weka.core.FastVector)
46
+ string = construct.new_instance(name_of_attr,nil).to_java
47
+ return string
48
+ end
49
+ end
50
+
51
+ end
52
+
53
+
@@ -0,0 +1,10 @@
1
+ module Core
2
+ module Type
3
+
4
+ java_import "weka.core.FastVector"
5
+ java_import "weka.core.Instance"
6
+
7
+ class Instance
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,361 @@
1
+ require 'java'
2
+ require 'ruport'
3
+ require 'json'
4
+
5
+ module Core
6
+
7
+ java_import "weka.core.SerializationHelper"
8
+ module Type
9
+
10
+ java_import "weka.core.Instances"
11
+ java_import 'java.io.File'
12
+ java_import 'weka.core.converters.CSVSaver'
13
+ java_import 'weka.core.converters.ArffSaver'
14
+ java_import "weka.core.FastVector"
15
+ java_import "weka.core.Instance"
16
+
17
+ #
18
+ # * *Description* :
19
+ # This is the main class from the Weka package for data handling. It is essentially a matrix: each row
20
+ # is an instance of the 'Instance' class, while each column is an instance of the 'Attribute' class
21
+ # The class 'Instances' is here extended to add custom functionalities
22
+ class Instances
23
+
24
+ # Convert an Instances object to a bidimensional Ruby array
25
+ # where each row corresponds to an Instance object
26
+ def to_a2d
27
+ matrix = Array.new
28
+ att = Array.new
29
+ self.enumerateAttributes.each_with_index do |a,idx|
30
+ if a.isNumeric
31
+ enumerate_instances.each {|s| att << s.value(s.attribute(idx))}
32
+ matrix << att
33
+ att = Array.new
34
+ else
35
+ enumerateInstances.each do |inst|
36
+ att << inst.string_value(idx)
37
+ end
38
+ matrix << att
39
+ att = Array.new
40
+ end
41
+ end
42
+ return matrix.transpose
43
+ end
44
+
45
+ # Return the number of rows (Instance objects) in the dataset
46
+ def n_rows
47
+ return numInstances
48
+ end
49
+
50
+ # Return the number of columns (Attribute objects) in the dataset
51
+ def n_col
52
+ return numAttributes
53
+ end
54
+
55
+ # Return the dimensions of the dataset (for the current Instances class object)
56
+ def dim
57
+ puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
58
+ end
59
+
60
+ def each_row
61
+ enumerate_instances.each {|inst| yield(inst)}
62
+ end
63
+
64
+ def each_row_with_index
65
+ enumerate_instances.each_with_index {|inst,id| yield(inst,id)}
66
+ end
67
+
68
+ def each_column
69
+ enumerate_attributes.each {|attribute| yield(attribute)}
70
+ end
71
+
72
+ def each_column_with_index
73
+ enumerate_attributes.each_with_index {|attribute,id| yield(attribute,id)}
74
+ end
75
+
76
+ # Check if this instance's attributes are all Numeric
77
+ def check_numeric_instance
78
+ enumerateAttributes.each do |att|
79
+ unless att.isNumeric
80
+ raise ArgumentError, "Sorry, attribute '#{att.name}' is not numeric!"
81
+ end
82
+ end
83
+ end
84
+
85
+ # Convert the present Instances object to an Apache matrix if every Instances attribute
86
+ # is Numeric
87
+ def to_Apache_matrix
88
+ check_numeric_instance
89
+ ruby_array = to_a
90
+ java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
91
+ return Core::Type::Apache_matrix.new(java_double_array)
92
+ end
93
+
94
+ # Convert the present Instances object to an Apache matrix (block) if every Instances attribute
95
+ # is Numeric
96
+ def to_Apache_matrix_block
97
+ check_numeric_instance
98
+ ruby_array = to_a
99
+ java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
100
+ return Core::Type::Apache_matrix_block.new(java_double_array)
101
+ end
102
+
103
+ # Return data for a single attribute (a column from the Instances object)
104
+ # * *Args* :
105
+ # - +att+ -> a String, the name of the attribute
106
+ def return_attr_data(att)
107
+ attr_values = Array.new
108
+ if attribute(att).isNumeric
109
+ enumerateInstances.each do |i|
110
+ attr_values << i.value(attribute(att))
111
+ end
112
+ else
113
+ attr_index = attribute(att).index
114
+ enumerateInstances.each do |inst|
115
+ attr_values << inst.string_value(attr_index)
116
+ end
117
+ end
118
+ return attr_values
119
+ end
120
+
121
+ # Return the mean value of a single attribute (a column from the Instances object)
122
+ # * *Args* :
123
+ # - +attribute_name+ -> a String, the name of the attribute
124
+ def mean(attribute_name)
125
+ sum = enumerateInstances.inject(0) do |s,x|
126
+ s+=x.value(attribute(attribute_name))
127
+ end
128
+ return sum/(numInstances*1.0)
129
+ end
130
+
131
+ # Return the variance of a single attribute (a column from the Instances object)
132
+ # * *Args* :
133
+ # - +attribute_name+ -> a String, the name of the attribute
134
+ def variance(attribute_name)
135
+ enumerateAttributes.each_with_idx do |att,idx|
136
+ return variance(idx) if att.name==attribute_name
137
+ end
138
+ end
139
+
140
+ # Write the content of the current Instances object to a .csv file
141
+ # * *Args* :
142
+ # - +out_file+ -> a String, the name of the output file
143
+ def to_CSV(out_file)
144
+ saver = CSVSaver.new
145
+ saver.setInstances(self)
146
+ out_file = File.new out_file
147
+ saver.setFile(out_file);
148
+ saver.writeBatch();
149
+ end
150
+
151
+ # Write the content of the current Instances object to a .arff file
152
+ # * *Args* :
153
+ # - +out_file+ -> a String, the name of the output file
154
+ def to_ARFF(out_file)
155
+ saver = ArffSaver.new
156
+ saver.setInstances(self)
157
+ out_file = File.new out_file
158
+ saver.setFile(out_file);
159
+ saver.writeBatch();
160
+ end
161
+
162
+ def insert_attribute(attribute_value,position)
163
+ att=attribute_value
164
+ if self.attribute(position).isNumeric
165
+ return attribute_value
166
+ elsif self.attribute(position).isNominal
167
+ idx = self.attribute(position).indexOfValue(attribute_value)
168
+ return idx
169
+ elsif self.attribute(position).isDate
170
+ date = self.attribute(position).ParseDate(attribute_value)
171
+ return date
172
+ else
173
+ puts 'Attribute type is unknown!'
174
+ end
175
+ end
176
+ private :insert_attribute
177
+
178
+ # (check function): should check that the array is bidimensional and that
179
+ # the lengths are equal
180
+ def check_array(data)
181
+ return true # still to be done
182
+ end
183
+
184
+ # An entire dataset is inserted 'by row' into the current Instances object
185
+ # i.e. one Instance object is inserted at the time
186
+ # * *Args* :
187
+ # - +data+ -> a bidimensional array
188
+ def populate_by_row(data)
189
+ unless check_array(data) == false
190
+ data.each do |row|
191
+ add_instance(row)
192
+ end
193
+ end
194
+ end
195
+
196
+ # An Instance instance object (one row) is inserted into the current Instances object
197
+ # * *Args* :
198
+ # - +instance+ -> an array of values of the correct data type (:nominal,:numeric,etc...)
199
+ def add_instance(instance)
200
+ data_ref=Array.new
201
+ instance.each_with_index do |attribute,idx|
202
+ data_ref << insert_attribute(attribute,idx)
203
+ end
204
+ double_array = data_ref.to_java :double
205
+ single_row = Instance.new(1.0, double_array)
206
+ self.add(single_row)
207
+ end
208
+
209
+ # An Attribute instance object is inserted into the current Instances object
210
+ # * *Args* :
211
+ # - +attribute_name+ -> A name for the new attribute
212
+ # * *WARNING* :
213
+ # This method only creates an empty attribute field
214
+ def add_numeric_attribute(attribute_name)
215
+ insertAttributeAt(Attribute.new(attribute_name), self.numAttributes)
216
+ end
217
+
218
+ # An Attribute instance object is inserted into the current Instances object
219
+ # * *Args* :
220
+ # - +attribute_name+ -> A name for the new attribute
221
+ # - +values+ -> RubyArray with nominal values
222
+ # * *WARNING* :
223
+ # This method only creates an empty attribute field
224
+ def add_nominal_attribute(attribute,list_values)
225
+ values = FastVector.new
226
+ list_values.each do |val|
227
+ values.addElement(val)
228
+ end
229
+ insertAttributeAt(Attribute.new(attribute, values), self.numAttributes)
230
+ end
231
+
232
+ #Print to STDOUT the list of the Instances's attributes (with the corresponding types)
233
+ def summary
234
+ summary = Ruport::Data::Table::new
235
+ summary.add_column 'Attributes'
236
+ enumerateAttributes.each_with_index do |att,idx|
237
+ summary.add_column idx
238
+ end
239
+
240
+ att_names = ['Names']
241
+ enumerateAttributes.each do |att|
242
+ att_names << "'#{att.name}'"
243
+ end
244
+ summary << att_names
245
+
246
+ att_types = ['Types']
247
+ enumerateAttributes.each do |att|
248
+ att_types << "Numeric" if att.isNumeric
249
+ att_types << "Nominal" if att.isNominal
250
+ att_types << "Date" if att.isDate
251
+ att_types << "String" if att.isString
252
+ end
253
+ summary << att_types
254
+
255
+ display = []
256
+ display << summary
257
+
258
+ unless enumerate_instances.nil?
259
+ count=0
260
+ enumerateInstances.each {|inst| count=count+1}
261
+ display << "\nNumber of rows: #{count}"
262
+ end
263
+ display
264
+ end
265
+
266
+ # Merges two sets of Instances together. The resulting set will have all the
267
+ # attributes of the first set plus all the attributes of the second set. The
268
+ # number of instances in both sets must be the same.
269
+ # * *Args* :
270
+ # - +instances+ -> An Instances class object
271
+ def merge_with(instances)
272
+ return Instances.mergeInstances(self,instances)
273
+ end
274
+
275
+ # This method creates an Instances object (see Cucumber documentation for further details)
276
+ # def self.create
277
+ # name = 'Instances'
278
+ # instances = Core::Type.create_instances(name,@@positions)
279
+ # return instances
280
+ # end
281
+
282
+ # This method is used for attributes definition in uninitialized Instances-derived classes
283
+ def att(attr_type,name,*values)
284
+ att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
285
+ att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
286
+ att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
287
+ att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
288
+ @positions << att
289
+ end
290
+
291
+ # This method is used for Nominal attributes definition in uninitialized Instances-derived classes
292
+ # * *Args* :
293
+ # - +name+ -> Attribute name, a String
294
+ # - +values+ -> An array of values for the nominal attribute
295
+ def nominal(name,values)
296
+ att :nominal, name, values
297
+ end
298
+
299
+ # This method is used for Numeric attributes definition in uninitialized Instances-derived classes
300
+ # * *Args* :
301
+ # - +name+ -> Attribute name, a String
302
+ def numeric(name)
303
+ att :numeric, name
304
+ end
305
+
306
+ # This method is used for Date attributes definition in uninitialized Instances-derived classes
307
+ # * *Args* :
308
+ # - +name+ -> Attribute name, a String
309
+ def date(name)
310
+ att :date, name
311
+ end
312
+
313
+ # This method is used for String attributes definition in uninitialized Instances-derived classes
314
+ # * *Args* :
315
+ # - +name+ -> Attribute name, a String
316
+ def string(name)
317
+ att :string, name
318
+ end
319
+
320
+ # Class used for the creation of a new dataset (Instances class)
321
+ class Base < Instances
322
+ def initialize(&block)
323
+ attributes_vector = FastVector.new
324
+ @positions = []
325
+ self.instance_eval(&block) if block
326
+ @positions.each {|value| attributes_vector.addElement(value)}
327
+ super('Instances',attributes_vector,0)
328
+ end
329
+ end
330
+
331
+ # Return a json String for the current Instances object
332
+ # The output is modeled on the 'datatable' Google charts APIs
333
+ # More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
334
+ def to_json_format
335
+ dataset_hash = Hash.new
336
+ dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
337
+ dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
338
+ return JSON.pretty_generate(dataset_hash)
339
+ end
340
+ end #Instances class
341
+
342
+ # Create an Instances object
343
+ # * *Args* :
344
+ # - +name+ -> A name for the Instances object
345
+ # - +attributes+ -> An array containing Attribute objects
346
+ def Type.create_instances(name,attributes)
347
+ attributes_vector = FastVector.new
348
+ attributes.each {|value| attributes_vector.addElement(value)}
349
+ return Instances.new(name,attributes_vector,0)
350
+ end
351
+
352
+ end
353
+ # Helper class for serialization
354
+ # Works with classifiers, filters, clusterers...
355
+ class SerializationHelper
356
+ end
357
+
358
+ end
359
+
360
+
361
+