ruby-band 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +3 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +119 -0
- data/Jarfile +9 -0
- data/Jarfile.lock +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +321 -0
- data/README.rdoc +70 -0
- data/Rakefile +66 -0
- data/VERSION +1 -0
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +90 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bin/ruby-band +83 -0
- data/ext/mkrf_conf.rb +74 -0
- data/features/create_dataset.feature +12 -0
- data/features/step_definitions/create_dataset.rb +39 -0
- data/features/step_definitions/weka_classifiers.rb +43 -0
- data/features/step_definitions/weka_clustering.rb +34 -0
- data/features/step_definitions/weka_filters.rb +32 -0
- data/features/step_definitions/weka_parsers.rb +46 -0
- data/features/step_definitions/weka_pipeline.rb +41 -0
- data/features/support/env.rb +3 -0
- data/features/weka_classifiers.feature +16 -0
- data/features/weka_clustering.feature +15 -0
- data/features/weka_filters.feature +12 -0
- data/features/weka_parsers.feature +18 -0
- data/features/weka_pipeline.feature +14 -0
- data/lib/ruby-band.rb +12 -0
- data/lib/ruby-band/apache.rb +2 -0
- data/lib/ruby-band/apache/stat/correlation.rb +42 -0
- data/lib/ruby-band/apache/stat/inference.rb +151 -0
- data/lib/ruby-band/apache/stat/regression.rb +22 -0
- data/lib/ruby-band/core.rb +6 -0
- data/lib/ruby-band/core/parser/parser.rb +27 -0
- data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
- data/lib/ruby-band/core/type/attribute.rb +53 -0
- data/lib/ruby-band/core/type/instance.rb +10 -0
- data/lib/ruby-band/core/type/instances.rb +361 -0
- data/lib/ruby-band/core/type/utils.rb +31 -0
- data/lib/ruby-band/weka.rb +14 -0
- data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
- data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
- data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
- data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
- data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
- data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
- data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
- data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
- data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
- data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
- data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
- data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
- data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
- data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
- data/lib/ruby-band/weka/db/db.rb +74 -0
- data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
- data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
- data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
- data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
- data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
- data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
- data/resources/ReutersGrain-test.arff +611 -0
- data/resources/ReutersGrain-train.arff +1561 -0
- data/resources/weather.csv +15 -0
- data/resources/weather.numeric.arff +23 -0
- data/ruby-band.gemspec +178 -0
- data/spec/ruby-band_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_ruby-band.rb +9 -0
- metadata +426 -0
@@ -0,0 +1,151 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Apache
|
4
|
+
module Stat
|
5
|
+
module Inference
|
6
|
+
|
7
|
+
java_import 'org.apache.commons.math3.stat.inference.ChiSquareTest'
|
8
|
+
java_import 'org.apache.commons.math3.stat.inference.MannWhitneyUTest'
|
9
|
+
java_import 'org.apache.commons.math3.stat.inference.OneWayAnova'
|
10
|
+
java_import 'org.apache.commons.math3.stat.inference.TTest'
|
11
|
+
java_import 'org.apache.commons.math3.stat.inference.WilcoxonSignedRankTest'
|
12
|
+
java_import 'org.apache.commons.math3.stat.StatUtils'
|
13
|
+
java_import 'java.util.ArrayList'
|
14
|
+
|
15
|
+
# An implementation of the Wilcoxon signed-rank test
|
16
|
+
# * *Args* :
|
17
|
+
# - +Array1+ -> must be a RubyArray.
|
18
|
+
# - +Array2+ -> must be a RubyArray.
|
19
|
+
def self.wilcoxon_test(array_1,array_2)
|
20
|
+
obj = WilcoxonSignedRankTest.new
|
21
|
+
first = Core::Utils::double_to_a(array_1)
|
22
|
+
second = Core::Utils::double_to_a(array_2)
|
23
|
+
val = obj.wilcoxonSignedRank first, second
|
24
|
+
p_val = obj.wilcoxonSignedRankTest first, second, true.to_java(:boolean)
|
25
|
+
return val,p_val
|
26
|
+
end
|
27
|
+
|
28
|
+
# Utility class called by 'chi_square' method in this same package
|
29
|
+
class Chi_square
|
30
|
+
def self.chi_square_2d(array_2d)
|
31
|
+
obj = ChiSquareTest.new
|
32
|
+
val = obj.chi_square(array_2d.to_java(Java::long[]))
|
33
|
+
p_value = obj.chi_square_test(array_2d.to_java(Java::long[]))
|
34
|
+
return val,p_value
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.chi_square_two_arrays(expected,observed)
|
38
|
+
obj = ChiSquareTest.new
|
39
|
+
val = obj.chi_square(expected.to_java(:double),observed.to_java(:long))
|
40
|
+
p_value = obj.chi_square_test(expected.to_java(:double),observed.to_java(:long))
|
41
|
+
return val,p_value
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# 1) Computes the Chi-Square statistic comparing observed and expected frequency counts.
|
46
|
+
# * *Args* :
|
47
|
+
# - +Array+ -> must be a bidimensional RubyArray.
|
48
|
+
# 2) Computes the Chi-Square statistic associated with a chi-square test of independence
|
49
|
+
# based on the input counts array, viewed as a two-way table.
|
50
|
+
# * *Args* :
|
51
|
+
# - +Array1+ -> must be a RubyArray.
|
52
|
+
# - +Array2+ -> must be a RubyArray.
|
53
|
+
def self.chi_square(*args)
|
54
|
+
if args.length == 2
|
55
|
+
Chi_square.chi_square_two_arrays(*args)
|
56
|
+
elsif args.length == 1
|
57
|
+
raise ArgumentError,"RubyArray must be bidimensional" unless args[0].is_2d?
|
58
|
+
Chi_square.chi_square_2d(*args)
|
59
|
+
else
|
60
|
+
raise ArgumentError, 'Function *args should be two RubyArrays or a bidimensional RubyArray'
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Compare two datasets stored in Ruby Arrays
|
65
|
+
def self.chi_square_dataset_compare(observed1,observed2)
|
66
|
+
obj = ChiSquareTest.new
|
67
|
+
val = obj.chiSquareDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
|
68
|
+
p_value = obj.chiSquareTestDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
|
69
|
+
return val,p_value
|
70
|
+
end
|
71
|
+
|
72
|
+
# An implementation of the Mann-Whitney U test
|
73
|
+
# (also called Wilcoxon rank-sum test)
|
74
|
+
# * *Args* :
|
75
|
+
# - +Array1+ -> must be a RubyArray.
|
76
|
+
# - +Array2+ -> must be a RubyArray.
|
77
|
+
def self.mann_whitney_u(array1,array2)
|
78
|
+
obj = MannWhitneyUTest.new
|
79
|
+
first = array1.to_java :double
|
80
|
+
second = array2.to_java :double
|
81
|
+
value = obj.mannWhitneyU first,second
|
82
|
+
p_value = obj.mannWhitneyUTest first,second
|
83
|
+
return value,p_value
|
84
|
+
end
|
85
|
+
|
86
|
+
#Utility class called by 't_test' method in this same package
|
87
|
+
class T_test
|
88
|
+
|
89
|
+
def self.homoscedastic(array_1,array_2)
|
90
|
+
obj = TTest.new
|
91
|
+
first = array_1.to_java :double
|
92
|
+
second = array_2.to_java :double
|
93
|
+
value = obj.homoscedasticT(first,second)
|
94
|
+
p_value = obj.homoscedasticTTest(first,second)
|
95
|
+
return value, p_value
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.paired(array_1,array_2)
|
99
|
+
obj = TTest.new
|
100
|
+
first = array_1.to_java :double
|
101
|
+
second =array_2.to_java :double
|
102
|
+
value = obj.pairedT(first,second)
|
103
|
+
p_value = obj.pairedTTest(first,second)
|
104
|
+
return value,p_value
|
105
|
+
end
|
106
|
+
|
107
|
+
def self.t(array_1,array_2)
|
108
|
+
obj = TTest.new
|
109
|
+
first = array_1.to_java :double
|
110
|
+
second =array_2.to_java :double
|
111
|
+
value = obj.t(first,second)
|
112
|
+
p_value =obj.tTest(first,second)
|
113
|
+
return value,p_value
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# An implementation for Student's t-tests
|
118
|
+
# * *Args* :
|
119
|
+
# - +sample_1+ -> an array of numeric values representing a sample
|
120
|
+
# - +sample_2+ -> an array of numeric values representing a sample
|
121
|
+
# - +homoscedastic+ -> set to true for equal variance assumption
|
122
|
+
# - +paired+ -> set to true if you want to perform a 'paired' t test
|
123
|
+
def self.t_test(sample_1,sample_2,homoscedastic=false,paired=false)
|
124
|
+
if homoscedastic == true
|
125
|
+
T_test.homoscedastic(sample_1,sample_2)
|
126
|
+
elsif paired == true
|
127
|
+
T_test.paired(sample_1,sample_2)
|
128
|
+
else
|
129
|
+
T_test.t(sample_1,sample_2)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Implements one-way ANOVA (analysis of variance) statistics.
|
134
|
+
# Tests for differences between two or more categories of univariate data (for example,
|
135
|
+
# the body mass index of accountants, lawyers, doctors and computer programmers). When
|
136
|
+
# two categories are given, this is equivalent to the TTest.
|
137
|
+
# * *Args* :
|
138
|
+
# - +bidimensional_array+ -> a 2d RubyArray
|
139
|
+
def self.one_way_anova(bidimensional_array)
|
140
|
+
collection = ArrayList.new
|
141
|
+
bidimensional_array.each do |array|
|
142
|
+
collection.add(array.to_java :double)
|
143
|
+
end
|
144
|
+
obj = OneWayAnova.new
|
145
|
+
f_value = obj.anovaFValue(collection)
|
146
|
+
p_value = obj.anovaPValue(collection)
|
147
|
+
return f_value,p_value
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Apache
|
4
|
+
module Stat
|
5
|
+
module Regression
|
6
|
+
java_import "org.apache.commons.math3.stat.regression.SimpleRegression"
|
7
|
+
|
8
|
+
# Create a simple regression model on the input data
|
9
|
+
# * *Args* :
|
10
|
+
# - +vector+ -> must be a multidimensional array
|
11
|
+
def self.simple_regression(vector)
|
12
|
+
data = Core::Utils.bidimensional_to_double vector
|
13
|
+
obj = SimpleRegression.new
|
14
|
+
obj.addData(data)
|
15
|
+
return obj
|
16
|
+
# add Jruby methods for regression analysis
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'java'
|
2
|
+
java_import 'weka.core.converters.CSVLoader'
|
3
|
+
java_import 'weka.core.converters.ArffLoader'
|
4
|
+
|
5
|
+
module Core
|
6
|
+
module Parser
|
7
|
+
# Parse an ARFF file and create an Instances object
|
8
|
+
def Parser.parse_ARFF(arff_file)
|
9
|
+
java_import 'java.io.File'
|
10
|
+
loader = ArffLoader.new
|
11
|
+
file = File.new arff_file
|
12
|
+
loader.setSource(file)
|
13
|
+
data_instance = loader.getDataSet
|
14
|
+
return data_instance
|
15
|
+
end
|
16
|
+
|
17
|
+
# Parse an CSV file and create an Instances object
|
18
|
+
def Parser.parse_CSV(csv_file)
|
19
|
+
java_import 'java.io.File'
|
20
|
+
loader = CSVLoader.new
|
21
|
+
file = File.new csv_file
|
22
|
+
loader.setSource(file)
|
23
|
+
data_instance = loader.getDataSet
|
24
|
+
return data_instance
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Type
|
5
|
+
|
6
|
+
java_import 'org.apache.commons.math3.linear.BlockRealMatrix'
|
7
|
+
java_import 'org.apache.commons.math3.linear.Array2DRowRealMatrix'
|
8
|
+
|
9
|
+
#Define variables to use ruby-like names instead of Java's
|
10
|
+
Apache_matrix = Array2DRowRealMatrix
|
11
|
+
Apache_matrix_block = BlockRealMatrix
|
12
|
+
|
13
|
+
#* *Description* :
|
14
|
+
#Linear algebra support in commons-math provides operations on real matrices (both dense
|
15
|
+
#and sparse matrices are supported) and vectors. It features basic operations (addition, subtraction ...)
|
16
|
+
#and decomposition algorithms that can be used to solve linear systems either in exact sense and
|
17
|
+
#in least squares sense.
|
18
|
+
#The 'Apache_matrix' class represents a matrix with real numbers as entries.
|
19
|
+
#The following basic matrix operations are supported:
|
20
|
+
#- Matrix addition, subtraction, multiplication
|
21
|
+
#- Scalar addition and multiplication
|
22
|
+
#- Transpose
|
23
|
+
#- Norm and trace
|
24
|
+
#- Operation on a vector
|
25
|
+
class Apache_matrix
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
# Apache matrix implementation suited to dimensions above 50 or 100
|
30
|
+
class Apache_matrix_block
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Type
|
5
|
+
|
6
|
+
java_import "weka.core.Attribute"
|
7
|
+
java_import "weka.core.FastVector"
|
8
|
+
|
9
|
+
class Attribute
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
# Return an Numeric Attribute class object
|
14
|
+
# * *Args* :
|
15
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
16
|
+
def self.create_numeric_attr(name_of_attr)
|
17
|
+
numeric = Attribute.new name_of_attr
|
18
|
+
return numeric
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return an Date Attribute class object
|
22
|
+
# * *Args* :
|
23
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
24
|
+
# - +format+ -> The format of the attribute
|
25
|
+
def self.create_date_attr(name_of_attr,format)
|
26
|
+
date = Attribute.new(name_of_attr,format)
|
27
|
+
return date
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return a Nominal Attribute class object
|
31
|
+
# * *Args* :
|
32
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
33
|
+
# - +values_list+ -> An array, the list of nominal values
|
34
|
+
def self.create_nominal_attr(name_of_attr,values_list)
|
35
|
+
labels = FastVector.new
|
36
|
+
values_list.each {|value| labels.addElement(value)}
|
37
|
+
nominal = Attribute.new(name_of_attr,labels)
|
38
|
+
return nominal
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return a String Attribute class object
|
42
|
+
# * *Args* :
|
43
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
44
|
+
def self.create_string_attr(name_of_attr)
|
45
|
+
construct = Attribute.java_class.constructor(Java::java.lang.String,Java::weka.core.FastVector)
|
46
|
+
string = construct.new_instance(name_of_attr,nil).to_java
|
47
|
+
return string
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
|
@@ -0,0 +1,361 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'ruport'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module Core
|
6
|
+
|
7
|
+
java_import "weka.core.SerializationHelper"
|
8
|
+
module Type
|
9
|
+
|
10
|
+
java_import "weka.core.Instances"
|
11
|
+
java_import 'java.io.File'
|
12
|
+
java_import 'weka.core.converters.CSVSaver'
|
13
|
+
java_import 'weka.core.converters.ArffSaver'
|
14
|
+
java_import "weka.core.FastVector"
|
15
|
+
java_import "weka.core.Instance"
|
16
|
+
|
17
|
+
#
|
18
|
+
# * *Description* :
|
19
|
+
# This is the main class from the Weka package for data handling. It is essentially a matrix: each row
|
20
|
+
# is an instance of the 'Instance' class, while each column is an instance of the 'Attribute' class
|
21
|
+
# The class 'Instances' is here extended to add custom functionalities
|
22
|
+
class Instances
|
23
|
+
|
24
|
+
# Convert an Instances object to a bidimensional Ruby array
|
25
|
+
# where each row corresponds to an Instance object
|
26
|
+
def to_a2d
|
27
|
+
matrix = Array.new
|
28
|
+
att = Array.new
|
29
|
+
self.enumerateAttributes.each_with_index do |a,idx|
|
30
|
+
if a.isNumeric
|
31
|
+
enumerate_instances.each {|s| att << s.value(s.attribute(idx))}
|
32
|
+
matrix << att
|
33
|
+
att = Array.new
|
34
|
+
else
|
35
|
+
enumerateInstances.each do |inst|
|
36
|
+
att << inst.string_value(idx)
|
37
|
+
end
|
38
|
+
matrix << att
|
39
|
+
att = Array.new
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return matrix.transpose
|
43
|
+
end
|
44
|
+
|
45
|
+
# Return the number of rows (Instance objects) in the dataset
|
46
|
+
def n_rows
|
47
|
+
return numInstances
|
48
|
+
end
|
49
|
+
|
50
|
+
# Return the number of columns (Attribute objects) in the dataset
|
51
|
+
def n_col
|
52
|
+
return numAttributes
|
53
|
+
end
|
54
|
+
|
55
|
+
# Return the dimensions of the dataset (for the current Instances class object)
|
56
|
+
def dim
|
57
|
+
puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
|
58
|
+
end
|
59
|
+
|
60
|
+
def each_row
|
61
|
+
enumerate_instances.each {|inst| yield(inst)}
|
62
|
+
end
|
63
|
+
|
64
|
+
def each_row_with_index
|
65
|
+
enumerate_instances.each_with_index {|inst,id| yield(inst,id)}
|
66
|
+
end
|
67
|
+
|
68
|
+
def each_column
|
69
|
+
enumerate_attributes.each {|attribute| yield(attribute)}
|
70
|
+
end
|
71
|
+
|
72
|
+
def each_column_with_index
|
73
|
+
enumerate_attributes.each_with_index {|attribute,id| yield(attribute,id)}
|
74
|
+
end
|
75
|
+
|
76
|
+
# Check if this instance's attributes are all Numeric
|
77
|
+
def check_numeric_instance
|
78
|
+
enumerateAttributes.each do |att|
|
79
|
+
unless att.isNumeric
|
80
|
+
raise ArgumentError, "Sorry, attribute '#{att.name}' is not numeric!"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Convert the present Instances object to an Apache matrix if every Instances attribute
|
86
|
+
# is Numeric
|
87
|
+
def to_Apache_matrix
|
88
|
+
check_numeric_instance
|
89
|
+
ruby_array = to_a
|
90
|
+
java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
|
91
|
+
return Core::Type::Apache_matrix.new(java_double_array)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Convert the present Instances object to an Apache matrix (block) if every Instances attribute
|
95
|
+
# is Numeric
|
96
|
+
def to_Apache_matrix_block
|
97
|
+
check_numeric_instance
|
98
|
+
ruby_array = to_a
|
99
|
+
java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
|
100
|
+
return Core::Type::Apache_matrix_block.new(java_double_array)
|
101
|
+
end
|
102
|
+
|
103
|
+
# Return data for a single attribute (a column from the Instances object)
|
104
|
+
# * *Args* :
|
105
|
+
# - +att+ -> a String, the name of the attribute
|
106
|
+
def return_attr_data(att)
|
107
|
+
attr_values = Array.new
|
108
|
+
if attribute(att).isNumeric
|
109
|
+
enumerateInstances.each do |i|
|
110
|
+
attr_values << i.value(attribute(att))
|
111
|
+
end
|
112
|
+
else
|
113
|
+
attr_index = attribute(att).index
|
114
|
+
enumerateInstances.each do |inst|
|
115
|
+
attr_values << inst.string_value(attr_index)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
return attr_values
|
119
|
+
end
|
120
|
+
|
121
|
+
# Return the mean value of a single attribute (a column from the Instances object)
|
122
|
+
# * *Args* :
|
123
|
+
# - +attribute_name+ -> a String, the name of the attribute
|
124
|
+
def mean(attribute_name)
|
125
|
+
sum = enumerateInstances.inject(0) do |s,x|
|
126
|
+
s+=x.value(attribute(attribute_name))
|
127
|
+
end
|
128
|
+
return sum/(numInstances*1.0)
|
129
|
+
end
|
130
|
+
|
131
|
+
# Return the variance of a single attribute (a column from the Instances object)
|
132
|
+
# * *Args* :
|
133
|
+
# - +attribute_name+ -> a String, the name of the attribute
|
134
|
+
def variance(attribute_name)
|
135
|
+
enumerateAttributes.each_with_idx do |att,idx|
|
136
|
+
return variance(idx) if att.name==attribute_name
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Write the content of the current Instances object to a .csv file
|
141
|
+
# * *Args* :
|
142
|
+
# - +out_file+ -> a String, the name of the output file
|
143
|
+
def to_CSV(out_file)
|
144
|
+
saver = CSVSaver.new
|
145
|
+
saver.setInstances(self)
|
146
|
+
out_file = File.new out_file
|
147
|
+
saver.setFile(out_file);
|
148
|
+
saver.writeBatch();
|
149
|
+
end
|
150
|
+
|
151
|
+
# Write the content of the current Instances object to a .arff file
|
152
|
+
# * *Args* :
|
153
|
+
# - +out_file+ -> a String, the name of the output file
|
154
|
+
def to_ARFF(out_file)
|
155
|
+
saver = ArffSaver.new
|
156
|
+
saver.setInstances(self)
|
157
|
+
out_file = File.new out_file
|
158
|
+
saver.setFile(out_file);
|
159
|
+
saver.writeBatch();
|
160
|
+
end
|
161
|
+
|
162
|
+
def insert_attribute(attribute_value,position)
|
163
|
+
att=attribute_value
|
164
|
+
if self.attribute(position).isNumeric
|
165
|
+
return attribute_value
|
166
|
+
elsif self.attribute(position).isNominal
|
167
|
+
idx = self.attribute(position).indexOfValue(attribute_value)
|
168
|
+
return idx
|
169
|
+
elsif self.attribute(position).isDate
|
170
|
+
date = self.attribute(position).ParseDate(attribute_value)
|
171
|
+
return date
|
172
|
+
else
|
173
|
+
puts 'Attribute type is unknown!'
|
174
|
+
end
|
175
|
+
end
|
176
|
+
private :insert_attribute
|
177
|
+
|
178
|
+
# (check function): should check that the array is bidimensional and that
|
179
|
+
# the lengths are equal
|
180
|
+
def check_array(data)
|
181
|
+
return true # still to be done
|
182
|
+
end
|
183
|
+
|
184
|
+
# An entire dataset is inserted 'by row' into the current Instances object
|
185
|
+
# i.e. one Instance object is inserted at the time
|
186
|
+
# * *Args* :
|
187
|
+
# - +data+ -> a bidimensional array
|
188
|
+
def populate_by_row(data)
|
189
|
+
unless check_array(data) == false
|
190
|
+
data.each do |row|
|
191
|
+
add_instance(row)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
# An Instance instance object (one row) is inserted into the current Instances object
|
197
|
+
# * *Args* :
|
198
|
+
# - +instance+ -> an array of values of the correct data type (:nominal,:numeric,etc...)
|
199
|
+
def add_instance(instance)
|
200
|
+
data_ref=Array.new
|
201
|
+
instance.each_with_index do |attribute,idx|
|
202
|
+
data_ref << insert_attribute(attribute,idx)
|
203
|
+
end
|
204
|
+
double_array = data_ref.to_java :double
|
205
|
+
single_row = Instance.new(1.0, double_array)
|
206
|
+
self.add(single_row)
|
207
|
+
end
|
208
|
+
|
209
|
+
# An Attribute instance object is inserted into the current Instances object
|
210
|
+
# * *Args* :
|
211
|
+
# - +attribute_name+ -> A name for the new attribute
|
212
|
+
# * *WARNING* :
|
213
|
+
# This method only creates an empty attribute field
|
214
|
+
def add_numeric_attribute(attribute_name)
|
215
|
+
insertAttributeAt(Attribute.new(attribute_name), self.numAttributes)
|
216
|
+
end
|
217
|
+
|
218
|
+
# An Attribute instance object is inserted into the current Instances object
|
219
|
+
# * *Args* :
|
220
|
+
# - +attribute_name+ -> A name for the new attribute
|
221
|
+
# - +values+ -> RubyArray with nominal values
|
222
|
+
# * *WARNING* :
|
223
|
+
# This method only creates an empty attribute field
|
224
|
+
def add_nominal_attribute(attribute,list_values)
|
225
|
+
values = FastVector.new
|
226
|
+
list_values.each do |val|
|
227
|
+
values.addElement(val)
|
228
|
+
end
|
229
|
+
insertAttributeAt(Attribute.new(attribute, values), self.numAttributes)
|
230
|
+
end
|
231
|
+
|
232
|
+
#Print to STDOUT the list of the Instances's attributes (with the corresponding types)
|
233
|
+
def summary
|
234
|
+
summary = Ruport::Data::Table::new
|
235
|
+
summary.add_column 'Attributes'
|
236
|
+
enumerateAttributes.each_with_index do |att,idx|
|
237
|
+
summary.add_column idx
|
238
|
+
end
|
239
|
+
|
240
|
+
att_names = ['Names']
|
241
|
+
enumerateAttributes.each do |att|
|
242
|
+
att_names << "'#{att.name}'"
|
243
|
+
end
|
244
|
+
summary << att_names
|
245
|
+
|
246
|
+
att_types = ['Types']
|
247
|
+
enumerateAttributes.each do |att|
|
248
|
+
att_types << "Numeric" if att.isNumeric
|
249
|
+
att_types << "Nominal" if att.isNominal
|
250
|
+
att_types << "Date" if att.isDate
|
251
|
+
att_types << "String" if att.isString
|
252
|
+
end
|
253
|
+
summary << att_types
|
254
|
+
|
255
|
+
display = []
|
256
|
+
display << summary
|
257
|
+
|
258
|
+
unless enumerate_instances.nil?
|
259
|
+
count=0
|
260
|
+
enumerateInstances.each {|inst| count=count+1}
|
261
|
+
display << "\nNumber of rows: #{count}"
|
262
|
+
end
|
263
|
+
display
|
264
|
+
end
|
265
|
+
|
266
|
+
# Merges two sets of Instances together. The resulting set will have all the
|
267
|
+
# attributes of the first set plus all the attributes of the second set. The
|
268
|
+
# number of instances in both sets must be the same.
|
269
|
+
# * *Args* :
|
270
|
+
# - +instances+ -> An Instances class object
|
271
|
+
def merge_with(instances)
|
272
|
+
return Instances.mergeInstances(self,instances)
|
273
|
+
end
|
274
|
+
|
275
|
+
# This method creates an Instances object (see Cucumber documentation for further details)
|
276
|
+
# def self.create
|
277
|
+
# name = 'Instances'
|
278
|
+
# instances = Core::Type.create_instances(name,@@positions)
|
279
|
+
# return instances
|
280
|
+
# end
|
281
|
+
|
282
|
+
# This method is used for attributes definition in uninitialized Instances-derived classes
|
283
|
+
def att(attr_type,name,*values)
|
284
|
+
att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
|
285
|
+
att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
|
286
|
+
att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
|
287
|
+
att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
|
288
|
+
@positions << att
|
289
|
+
end
|
290
|
+
|
291
|
+
# This method is used for Nominal attributes definition in uninitialized Instances-derived classes
|
292
|
+
# * *Args* :
|
293
|
+
# - +name+ -> Attribute name, a String
|
294
|
+
# - +values+ -> An array of values for the nominal attribute
|
295
|
+
def nominal(name,values)
|
296
|
+
att :nominal, name, values
|
297
|
+
end
|
298
|
+
|
299
|
+
# This method is used for Numeric attributes definition in uninitialized Instances-derived classes
|
300
|
+
# * *Args* :
|
301
|
+
# - +name+ -> Attribute name, a String
|
302
|
+
def numeric(name)
|
303
|
+
att :numeric, name
|
304
|
+
end
|
305
|
+
|
306
|
+
# This method is used for Date attributes definition in uninitialized Instances-derived classes
|
307
|
+
# * *Args* :
|
308
|
+
# - +name+ -> Attribute name, a String
|
309
|
+
def date(name)
|
310
|
+
att :date, name
|
311
|
+
end
|
312
|
+
|
313
|
+
# This method is used for String attributes definition in uninitialized Instances-derived classes
|
314
|
+
# * *Args* :
|
315
|
+
# - +name+ -> Attribute name, a String
|
316
|
+
def string(name)
|
317
|
+
att :string, name
|
318
|
+
end
|
319
|
+
|
320
|
+
# Class used for the creation of a new dataset (Instances class)
|
321
|
+
class Base < Instances
|
322
|
+
def initialize(&block)
|
323
|
+
attributes_vector = FastVector.new
|
324
|
+
@positions = []
|
325
|
+
self.instance_eval(&block) if block
|
326
|
+
@positions.each {|value| attributes_vector.addElement(value)}
|
327
|
+
super('Instances',attributes_vector,0)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
# Return a json String for the current Instances object
|
332
|
+
# The output is modeled on the 'datatable' Google charts APIs
|
333
|
+
# More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
|
334
|
+
def to_json_format
|
335
|
+
dataset_hash = Hash.new
|
336
|
+
dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
|
337
|
+
dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
|
338
|
+
return JSON.pretty_generate(dataset_hash)
|
339
|
+
end
|
340
|
+
end #Instances class
|
341
|
+
|
342
|
+
# Create an Instances object
|
343
|
+
# * *Args* :
|
344
|
+
# - +name+ -> A name for the Instances object
|
345
|
+
# - +attributes+ -> An array containing Attribute objects
|
346
|
+
def Type.create_instances(name,attributes)
|
347
|
+
attributes_vector = FastVector.new
|
348
|
+
attributes.each {|value| attributes_vector.addElement(value)}
|
349
|
+
return Instances.new(name,attributes_vector,0)
|
350
|
+
end
|
351
|
+
|
352
|
+
end
|
353
|
+
# Helper class for serialization
|
354
|
+
# Works with classifiers, filters, clusterers...
|
355
|
+
class SerializationHelper
|
356
|
+
end
|
357
|
+
|
358
|
+
end
|
359
|
+
|
360
|
+
|
361
|
+
|