ruby-band 0.1.11
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +3 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +119 -0
- data/Jarfile +9 -0
- data/Jarfile.lock +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +321 -0
- data/README.rdoc +70 -0
- data/Rakefile +66 -0
- data/VERSION +1 -0
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +90 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bin/ruby-band +83 -0
- data/ext/mkrf_conf.rb +74 -0
- data/features/create_dataset.feature +12 -0
- data/features/step_definitions/create_dataset.rb +39 -0
- data/features/step_definitions/weka_classifiers.rb +43 -0
- data/features/step_definitions/weka_clustering.rb +34 -0
- data/features/step_definitions/weka_filters.rb +32 -0
- data/features/step_definitions/weka_parsers.rb +46 -0
- data/features/step_definitions/weka_pipeline.rb +41 -0
- data/features/support/env.rb +3 -0
- data/features/weka_classifiers.feature +16 -0
- data/features/weka_clustering.feature +15 -0
- data/features/weka_filters.feature +12 -0
- data/features/weka_parsers.feature +18 -0
- data/features/weka_pipeline.feature +14 -0
- data/lib/ruby-band.rb +12 -0
- data/lib/ruby-band/apache.rb +2 -0
- data/lib/ruby-band/apache/stat/correlation.rb +42 -0
- data/lib/ruby-band/apache/stat/inference.rb +151 -0
- data/lib/ruby-band/apache/stat/regression.rb +22 -0
- data/lib/ruby-band/core.rb +6 -0
- data/lib/ruby-band/core/parser/parser.rb +27 -0
- data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
- data/lib/ruby-band/core/type/attribute.rb +53 -0
- data/lib/ruby-band/core/type/instance.rb +10 -0
- data/lib/ruby-band/core/type/instances.rb +361 -0
- data/lib/ruby-band/core/type/utils.rb +31 -0
- data/lib/ruby-band/weka.rb +14 -0
- data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
- data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
- data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
- data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
- data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
- data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
- data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
- data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
- data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
- data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
- data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
- data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
- data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
- data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
- data/lib/ruby-band/weka/db/db.rb +74 -0
- data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
- data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
- data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
- data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
- data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
- data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
- data/resources/ReutersGrain-test.arff +611 -0
- data/resources/ReutersGrain-train.arff +1561 -0
- data/resources/weather.csv +15 -0
- data/resources/weather.numeric.arff +23 -0
- data/ruby-band.gemspec +178 -0
- data/spec/ruby-band_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_ruby-band.rb +9 -0
- metadata +426 -0
@@ -0,0 +1,151 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Apache
|
4
|
+
module Stat
|
5
|
+
module Inference
|
6
|
+
|
7
|
+
java_import 'org.apache.commons.math3.stat.inference.ChiSquareTest'
|
8
|
+
java_import 'org.apache.commons.math3.stat.inference.MannWhitneyUTest'
|
9
|
+
java_import 'org.apache.commons.math3.stat.inference.OneWayAnova'
|
10
|
+
java_import 'org.apache.commons.math3.stat.inference.TTest'
|
11
|
+
java_import 'org.apache.commons.math3.stat.inference.WilcoxonSignedRankTest'
|
12
|
+
java_import 'org.apache.commons.math3.stat.StatUtils'
|
13
|
+
java_import 'java.util.ArrayList'
|
14
|
+
|
15
|
+
# An implementation of the Wilcoxon signed-rank test
|
16
|
+
# * *Args* :
|
17
|
+
# - +Array1+ -> must be a RubyArray.
|
18
|
+
# - +Array2+ -> must be a RubyArray.
|
19
|
+
def self.wilcoxon_test(array_1,array_2)
|
20
|
+
obj = WilcoxonSignedRankTest.new
|
21
|
+
first = Core::Utils::double_to_a(array_1)
|
22
|
+
second = Core::Utils::double_to_a(array_2)
|
23
|
+
val = obj.wilcoxonSignedRank first, second
|
24
|
+
p_val = obj.wilcoxonSignedRankTest first, second, true.to_java(:boolean)
|
25
|
+
return val,p_val
|
26
|
+
end
|
27
|
+
|
28
|
+
# Utility class called by 'chi_square' method in this same package
|
29
|
+
class Chi_square
|
30
|
+
def self.chi_square_2d(array_2d)
|
31
|
+
obj = ChiSquareTest.new
|
32
|
+
val = obj.chi_square(array_2d.to_java(Java::long[]))
|
33
|
+
p_value = obj.chi_square_test(array_2d.to_java(Java::long[]))
|
34
|
+
return val,p_value
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.chi_square_two_arrays(expected,observed)
|
38
|
+
obj = ChiSquareTest.new
|
39
|
+
val = obj.chi_square(expected.to_java(:double),observed.to_java(:long))
|
40
|
+
p_value = obj.chi_square_test(expected.to_java(:double),observed.to_java(:long))
|
41
|
+
return val,p_value
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# 1) Computes the Chi-Square statistic comparing observed and expected frequency counts.
|
46
|
+
# * *Args* :
|
47
|
+
# - +Array+ -> must be a bidimensional RubyArray.
|
48
|
+
# 2) Computes the Chi-Square statistic associated with a chi-square test of independence
|
49
|
+
# based on the input counts array, viewed as a two-way table.
|
50
|
+
# * *Args* :
|
51
|
+
# - +Array1+ -> must be a RubyArray.
|
52
|
+
# - +Array2+ -> must be a RubyArray.
|
53
|
+
def self.chi_square(*args)
|
54
|
+
if args.length == 2
|
55
|
+
Chi_square.chi_square_two_arrays(*args)
|
56
|
+
elsif args.length == 1
|
57
|
+
raise ArgumentError,"RubyArray must be bidimensional" unless args[0].is_2d?
|
58
|
+
Chi_square.chi_square_2d(*args)
|
59
|
+
else
|
60
|
+
raise ArgumentError, 'Function *args should be two RubyArrays or a bidimensional RubyArray'
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Compare two datasets stored in Ruby Arrays
|
65
|
+
def self.chi_square_dataset_compare(observed1,observed2)
|
66
|
+
obj = ChiSquareTest.new
|
67
|
+
val = obj.chiSquareDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
|
68
|
+
p_value = obj.chiSquareTestDataSetsComparison(observed1.to_java(:long),observed2.to_java(:long))
|
69
|
+
return val,p_value
|
70
|
+
end
|
71
|
+
|
72
|
+
# An implementation of the Mann-Whitney U test
|
73
|
+
# (also called Wilcoxon rank-sum test)
|
74
|
+
# * *Args* :
|
75
|
+
# - +Array1+ -> must be a RubyArray.
|
76
|
+
# - +Array2+ -> must be a RubyArray.
|
77
|
+
def self.mann_whitney_u(array1,array2)
|
78
|
+
obj = MannWhitneyUTest.new
|
79
|
+
first = array1.to_java :double
|
80
|
+
second = array2.to_java :double
|
81
|
+
value = obj.mannWhitneyU first,second
|
82
|
+
p_value = obj.mannWhitneyUTest first,second
|
83
|
+
return value,p_value
|
84
|
+
end
|
85
|
+
|
86
|
+
#Utility class called by 't_test' method in this same package
|
87
|
+
class T_test
|
88
|
+
|
89
|
+
def self.homoscedastic(array_1,array_2)
|
90
|
+
obj = TTest.new
|
91
|
+
first = array_1.to_java :double
|
92
|
+
second = array_2.to_java :double
|
93
|
+
value = obj.homoscedasticT(first,second)
|
94
|
+
p_value = obj.homoscedasticTTest(first,second)
|
95
|
+
return value, p_value
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.paired(array_1,array_2)
|
99
|
+
obj = TTest.new
|
100
|
+
first = array_1.to_java :double
|
101
|
+
second =array_2.to_java :double
|
102
|
+
value = obj.pairedT(first,second)
|
103
|
+
p_value = obj.pairedTTest(first,second)
|
104
|
+
return value,p_value
|
105
|
+
end
|
106
|
+
|
107
|
+
def self.t(array_1,array_2)
|
108
|
+
obj = TTest.new
|
109
|
+
first = array_1.to_java :double
|
110
|
+
second =array_2.to_java :double
|
111
|
+
value = obj.t(first,second)
|
112
|
+
p_value =obj.tTest(first,second)
|
113
|
+
return value,p_value
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# An implementation for Student's t-tests
|
118
|
+
# * *Args* :
|
119
|
+
# - +sample_1+ -> an array of numeric values representing a sample
|
120
|
+
# - +sample_2+ -> an array of numeric values representing a sample
|
121
|
+
# - +homoscedastic+ -> set to true for equal variance assumption
|
122
|
+
# - +paired+ -> set to true if you want to perform a 'paired' t test
|
123
|
+
def self.t_test(sample_1,sample_2,homoscedastic=false,paired=false)
|
124
|
+
if homoscedastic == true
|
125
|
+
T_test.homoscedastic(sample_1,sample_2)
|
126
|
+
elsif paired == true
|
127
|
+
T_test.paired(sample_1,sample_2)
|
128
|
+
else
|
129
|
+
T_test.t(sample_1,sample_2)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Implements one-way ANOVA (analysis of variance) statistics.
|
134
|
+
# Tests for differences between two or more categories of univariate data (for example,
|
135
|
+
# the body mass index of accountants, lawyers, doctors and computer programmers). When
|
136
|
+
# two categories are given, this is equivalent to the TTest.
|
137
|
+
# * *Args* :
|
138
|
+
# - +bidimensional_array+ -> a 2d RubyArray
|
139
|
+
def self.one_way_anova(bidimensional_array)
|
140
|
+
collection = ArrayList.new
|
141
|
+
bidimensional_array.each do |array|
|
142
|
+
collection.add(array.to_java :double)
|
143
|
+
end
|
144
|
+
obj = OneWayAnova.new
|
145
|
+
f_value = obj.anovaFValue(collection)
|
146
|
+
p_value = obj.anovaPValue(collection)
|
147
|
+
return f_value,p_value
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Apache
|
4
|
+
module Stat
|
5
|
+
module Regression
|
6
|
+
java_import "org.apache.commons.math3.stat.regression.SimpleRegression"
|
7
|
+
|
8
|
+
# Create a simple regression model on the input data
|
9
|
+
# * *Args* :
|
10
|
+
# - +vector+ -> must be a multidimensional array
|
11
|
+
def self.simple_regression(vector)
|
12
|
+
data = Core::Utils.bidimensional_to_double vector
|
13
|
+
obj = SimpleRegression.new
|
14
|
+
obj.addData(data)
|
15
|
+
return obj
|
16
|
+
# add Jruby methods for regression analysis
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'java'
|
2
|
+
java_import 'weka.core.converters.CSVLoader'
|
3
|
+
java_import 'weka.core.converters.ArffLoader'
|
4
|
+
|
5
|
+
module Core
|
6
|
+
module Parser
|
7
|
+
# Parse an ARFF file and create an Instances object
|
8
|
+
def Parser.parse_ARFF(arff_file)
|
9
|
+
java_import 'java.io.File'
|
10
|
+
loader = ArffLoader.new
|
11
|
+
file = File.new arff_file
|
12
|
+
loader.setSource(file)
|
13
|
+
data_instance = loader.getDataSet
|
14
|
+
return data_instance
|
15
|
+
end
|
16
|
+
|
17
|
+
# Parse an CSV file and create an Instances object
|
18
|
+
def Parser.parse_CSV(csv_file)
|
19
|
+
java_import 'java.io.File'
|
20
|
+
loader = CSVLoader.new
|
21
|
+
file = File.new csv_file
|
22
|
+
loader.setSource(file)
|
23
|
+
data_instance = loader.getDataSet
|
24
|
+
return data_instance
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Type
|
5
|
+
|
6
|
+
java_import 'org.apache.commons.math3.linear.BlockRealMatrix'
|
7
|
+
java_import 'org.apache.commons.math3.linear.Array2DRowRealMatrix'
|
8
|
+
|
9
|
+
#Define variables to use ruby-like names instead of Java's
|
10
|
+
Apache_matrix = Array2DRowRealMatrix
|
11
|
+
Apache_matrix_block = BlockRealMatrix
|
12
|
+
|
13
|
+
#* *Description* :
|
14
|
+
#Linear algebra support in commons-math provides operations on real matrices (both dense
|
15
|
+
#and sparse matrices are supported) and vectors. It features basic operations (addition, subtraction ...)
|
16
|
+
#and decomposition algorithms that can be used to solve linear systems either in exact sense and
|
17
|
+
#in least squares sense.
|
18
|
+
#The 'Apache_matrix' class represents a matrix with real numbers as entries.
|
19
|
+
#The following basic matrix operations are supported:
|
20
|
+
#- Matrix addition, subtraction, multiplication
|
21
|
+
#- Scalar addition and multiplication
|
22
|
+
#- Transpose
|
23
|
+
#- Norm and trace
|
24
|
+
#- Operation on a vector
|
25
|
+
class Apache_matrix
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
# Apache matrix implementation suited to dimensions above 50 or 100
|
30
|
+
class Apache_matrix_block
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
module Core
|
4
|
+
module Type
|
5
|
+
|
6
|
+
java_import "weka.core.Attribute"
|
7
|
+
java_import "weka.core.FastVector"
|
8
|
+
|
9
|
+
class Attribute
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
# Return an Numeric Attribute class object
|
14
|
+
# * *Args* :
|
15
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
16
|
+
def self.create_numeric_attr(name_of_attr)
|
17
|
+
numeric = Attribute.new name_of_attr
|
18
|
+
return numeric
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return an Date Attribute class object
|
22
|
+
# * *Args* :
|
23
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
24
|
+
# - +format+ -> The format of the attribute
|
25
|
+
def self.create_date_attr(name_of_attr,format)
|
26
|
+
date = Attribute.new(name_of_attr,format)
|
27
|
+
return date
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return a Nominal Attribute class object
|
31
|
+
# * *Args* :
|
32
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
33
|
+
# - +values_list+ -> An array, the list of nominal values
|
34
|
+
def self.create_nominal_attr(name_of_attr,values_list)
|
35
|
+
labels = FastVector.new
|
36
|
+
values_list.each {|value| labels.addElement(value)}
|
37
|
+
nominal = Attribute.new(name_of_attr,labels)
|
38
|
+
return nominal
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return a String Attribute class object
|
42
|
+
# * *Args* :
|
43
|
+
# - +name_of_attr+ -> a String, the name of the attribute
|
44
|
+
def self.create_string_attr(name_of_attr)
|
45
|
+
construct = Attribute.java_class.constructor(Java::java.lang.String,Java::weka.core.FastVector)
|
46
|
+
string = construct.new_instance(name_of_attr,nil).to_java
|
47
|
+
return string
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
|
@@ -0,0 +1,361 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'ruport'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module Core
|
6
|
+
|
7
|
+
java_import "weka.core.SerializationHelper"
|
8
|
+
module Type
|
9
|
+
|
10
|
+
java_import "weka.core.Instances"
|
11
|
+
java_import 'java.io.File'
|
12
|
+
java_import 'weka.core.converters.CSVSaver'
|
13
|
+
java_import 'weka.core.converters.ArffSaver'
|
14
|
+
java_import "weka.core.FastVector"
|
15
|
+
java_import "weka.core.Instance"
|
16
|
+
|
17
|
+
#
|
18
|
+
# * *Description* :
|
19
|
+
# This is the main class from the Weka package for data handling. It is essentially a matrix: each row
|
20
|
+
# is an instance of the 'Instance' class, while each column is an instance of the 'Attribute' class
|
21
|
+
# The class 'Instances' is here extended to add custom functionalities
|
22
|
+
class Instances
|
23
|
+
|
24
|
+
# Convert an Instances object to a bidimensional Ruby array
|
25
|
+
# where each row corresponds to an Instance object
|
26
|
+
def to_a2d
|
27
|
+
matrix = Array.new
|
28
|
+
att = Array.new
|
29
|
+
self.enumerateAttributes.each_with_index do |a,idx|
|
30
|
+
if a.isNumeric
|
31
|
+
enumerate_instances.each {|s| att << s.value(s.attribute(idx))}
|
32
|
+
matrix << att
|
33
|
+
att = Array.new
|
34
|
+
else
|
35
|
+
enumerateInstances.each do |inst|
|
36
|
+
att << inst.string_value(idx)
|
37
|
+
end
|
38
|
+
matrix << att
|
39
|
+
att = Array.new
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return matrix.transpose
|
43
|
+
end
|
44
|
+
|
45
|
+
# Return the number of rows (Instance objects) in the dataset
|
46
|
+
def n_rows
|
47
|
+
return numInstances
|
48
|
+
end
|
49
|
+
|
50
|
+
# Return the number of columns (Attribute objects) in the dataset
|
51
|
+
def n_col
|
52
|
+
return numAttributes
|
53
|
+
end
|
54
|
+
|
55
|
+
# Return the dimensions of the dataset (for the current Instances class object)
|
56
|
+
def dim
|
57
|
+
puts "Rows number:\t#{numInstances}\nColumns number:\t #{numAttributes}"
|
58
|
+
end
|
59
|
+
|
60
|
+
def each_row
|
61
|
+
enumerate_instances.each {|inst| yield(inst)}
|
62
|
+
end
|
63
|
+
|
64
|
+
def each_row_with_index
|
65
|
+
enumerate_instances.each_with_index {|inst,id| yield(inst,id)}
|
66
|
+
end
|
67
|
+
|
68
|
+
def each_column
|
69
|
+
enumerate_attributes.each {|attribute| yield(attribute)}
|
70
|
+
end
|
71
|
+
|
72
|
+
def each_column_with_index
|
73
|
+
enumerate_attributes.each_with_index {|attribute,id| yield(attribute,id)}
|
74
|
+
end
|
75
|
+
|
76
|
+
# Check if this instance's attributes are all Numeric
|
77
|
+
def check_numeric_instance
|
78
|
+
enumerateAttributes.each do |att|
|
79
|
+
unless att.isNumeric
|
80
|
+
raise ArgumentError, "Sorry, attribute '#{att.name}' is not numeric!"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Convert the present Instances object to an Apache matrix if every Instances attribute
|
86
|
+
# is Numeric
|
87
|
+
def to_Apache_matrix
|
88
|
+
check_numeric_instance
|
89
|
+
ruby_array = to_a
|
90
|
+
java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
|
91
|
+
return Core::Type::Apache_matrix.new(java_double_array)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Convert the present Instances object to an Apache matrix (block) if every Instances attribute
|
95
|
+
# is Numeric
|
96
|
+
def to_Apache_matrix_block
|
97
|
+
check_numeric_instance
|
98
|
+
ruby_array = to_a
|
99
|
+
java_double_array = Core::Utils::bidimensional_to_double(ruby_array)
|
100
|
+
return Core::Type::Apache_matrix_block.new(java_double_array)
|
101
|
+
end
|
102
|
+
|
103
|
+
# Return data for a single attribute (a column from the Instances object)
|
104
|
+
# * *Args* :
|
105
|
+
# - +att+ -> a String, the name of the attribute
|
106
|
+
def return_attr_data(att)
|
107
|
+
attr_values = Array.new
|
108
|
+
if attribute(att).isNumeric
|
109
|
+
enumerateInstances.each do |i|
|
110
|
+
attr_values << i.value(attribute(att))
|
111
|
+
end
|
112
|
+
else
|
113
|
+
attr_index = attribute(att).index
|
114
|
+
enumerateInstances.each do |inst|
|
115
|
+
attr_values << inst.string_value(attr_index)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
return attr_values
|
119
|
+
end
|
120
|
+
|
121
|
+
# Return the mean value of a single attribute (a column from the Instances object)
|
122
|
+
# * *Args* :
|
123
|
+
# - +attribute_name+ -> a String, the name of the attribute
|
124
|
+
def mean(attribute_name)
|
125
|
+
sum = enumerateInstances.inject(0) do |s,x|
|
126
|
+
s+=x.value(attribute(attribute_name))
|
127
|
+
end
|
128
|
+
return sum/(numInstances*1.0)
|
129
|
+
end
|
130
|
+
|
131
|
+
# Return the variance of a single attribute (a column from the Instances object)
|
132
|
+
# * *Args* :
|
133
|
+
# - +attribute_name+ -> a String, the name of the attribute
|
134
|
+
def variance(attribute_name)
|
135
|
+
enumerateAttributes.each_with_idx do |att,idx|
|
136
|
+
return variance(idx) if att.name==attribute_name
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Write the content of the current Instances object to a .csv file
|
141
|
+
# * *Args* :
|
142
|
+
# - +out_file+ -> a String, the name of the output file
|
143
|
+
def to_CSV(out_file)
|
144
|
+
saver = CSVSaver.new
|
145
|
+
saver.setInstances(self)
|
146
|
+
out_file = File.new out_file
|
147
|
+
saver.setFile(out_file);
|
148
|
+
saver.writeBatch();
|
149
|
+
end
|
150
|
+
|
151
|
+
# Write the content of the current Instances object to a .arff file
|
152
|
+
# * *Args* :
|
153
|
+
# - +out_file+ -> a String, the name of the output file
|
154
|
+
def to_ARFF(out_file)
|
155
|
+
saver = ArffSaver.new
|
156
|
+
saver.setInstances(self)
|
157
|
+
out_file = File.new out_file
|
158
|
+
saver.setFile(out_file);
|
159
|
+
saver.writeBatch();
|
160
|
+
end
|
161
|
+
|
162
|
+
def insert_attribute(attribute_value,position)
|
163
|
+
att=attribute_value
|
164
|
+
if self.attribute(position).isNumeric
|
165
|
+
return attribute_value
|
166
|
+
elsif self.attribute(position).isNominal
|
167
|
+
idx = self.attribute(position).indexOfValue(attribute_value)
|
168
|
+
return idx
|
169
|
+
elsif self.attribute(position).isDate
|
170
|
+
date = self.attribute(position).ParseDate(attribute_value)
|
171
|
+
return date
|
172
|
+
else
|
173
|
+
puts 'Attribute type is unknown!'
|
174
|
+
end
|
175
|
+
end
|
176
|
+
private :insert_attribute
|
177
|
+
|
178
|
+
# (check function): should check that the array is bidimensional and that
|
179
|
+
# the lengths are equal
|
180
|
+
def check_array(data)
|
181
|
+
return true # still to be done
|
182
|
+
end
|
183
|
+
|
184
|
+
# An entire dataset is inserted 'by row' into the current Instances object
|
185
|
+
# i.e. one Instance object is inserted at the time
|
186
|
+
# * *Args* :
|
187
|
+
# - +data+ -> a bidimensional array
|
188
|
+
def populate_by_row(data)
|
189
|
+
unless check_array(data) == false
|
190
|
+
data.each do |row|
|
191
|
+
add_instance(row)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
# An Instance instance object (one row) is inserted into the current Instances object
|
197
|
+
# * *Args* :
|
198
|
+
# - +instance+ -> an array of values of the correct data type (:nominal,:numeric,etc...)
|
199
|
+
def add_instance(instance)
|
200
|
+
data_ref=Array.new
|
201
|
+
instance.each_with_index do |attribute,idx|
|
202
|
+
data_ref << insert_attribute(attribute,idx)
|
203
|
+
end
|
204
|
+
double_array = data_ref.to_java :double
|
205
|
+
single_row = Instance.new(1.0, double_array)
|
206
|
+
self.add(single_row)
|
207
|
+
end
|
208
|
+
|
209
|
+
# An Attribute instance object is inserted into the current Instances object
|
210
|
+
# * *Args* :
|
211
|
+
# - +attribute_name+ -> A name for the new attribute
|
212
|
+
# * *WARNING* :
|
213
|
+
# This method only creates an empty attribute field
|
214
|
+
def add_numeric_attribute(attribute_name)
|
215
|
+
insertAttributeAt(Attribute.new(attribute_name), self.numAttributes)
|
216
|
+
end
|
217
|
+
|
218
|
+
# An Attribute instance object is inserted into the current Instances object
|
219
|
+
# * *Args* :
|
220
|
+
# - +attribute_name+ -> A name for the new attribute
|
221
|
+
# - +values+ -> RubyArray with nominal values
|
222
|
+
# * *WARNING* :
|
223
|
+
# This method only creates an empty attribute field
|
224
|
+
def add_nominal_attribute(attribute,list_values)
|
225
|
+
values = FastVector.new
|
226
|
+
list_values.each do |val|
|
227
|
+
values.addElement(val)
|
228
|
+
end
|
229
|
+
insertAttributeAt(Attribute.new(attribute, values), self.numAttributes)
|
230
|
+
end
|
231
|
+
|
232
|
+
#Print to STDOUT the list of the Instances's attributes (with the corresponding types)
|
233
|
+
def summary
|
234
|
+
summary = Ruport::Data::Table::new
|
235
|
+
summary.add_column 'Attributes'
|
236
|
+
enumerateAttributes.each_with_index do |att,idx|
|
237
|
+
summary.add_column idx
|
238
|
+
end
|
239
|
+
|
240
|
+
att_names = ['Names']
|
241
|
+
enumerateAttributes.each do |att|
|
242
|
+
att_names << "'#{att.name}'"
|
243
|
+
end
|
244
|
+
summary << att_names
|
245
|
+
|
246
|
+
att_types = ['Types']
|
247
|
+
enumerateAttributes.each do |att|
|
248
|
+
att_types << "Numeric" if att.isNumeric
|
249
|
+
att_types << "Nominal" if att.isNominal
|
250
|
+
att_types << "Date" if att.isDate
|
251
|
+
att_types << "String" if att.isString
|
252
|
+
end
|
253
|
+
summary << att_types
|
254
|
+
|
255
|
+
display = []
|
256
|
+
display << summary
|
257
|
+
|
258
|
+
unless enumerate_instances.nil?
|
259
|
+
count=0
|
260
|
+
enumerateInstances.each {|inst| count=count+1}
|
261
|
+
display << "\nNumber of rows: #{count}"
|
262
|
+
end
|
263
|
+
display
|
264
|
+
end
|
265
|
+
|
266
|
+
# Merges two sets of Instances together. The resulting set will have all the
|
267
|
+
# attributes of the first set plus all the attributes of the second set. The
|
268
|
+
# number of instances in both sets must be the same.
|
269
|
+
# * *Args* :
|
270
|
+
# - +instances+ -> An Instances class object
|
271
|
+
def merge_with(instances)
|
272
|
+
return Instances.mergeInstances(self,instances)
|
273
|
+
end
|
274
|
+
|
275
|
+
# This method creates an Instances object (see Cucumber documentation for further details)
|
276
|
+
# def self.create
|
277
|
+
# name = 'Instances'
|
278
|
+
# instances = Core::Type.create_instances(name,@@positions)
|
279
|
+
# return instances
|
280
|
+
# end
|
281
|
+
|
282
|
+
# This method is used for attributes definition in uninitialized Instances-derived classes
|
283
|
+
def att(attr_type,name,*values)
|
284
|
+
att = Core::Type.create_numeric_attr(name.to_java(:string)) if attr_type == :numeric
|
285
|
+
att = Core::Type.create_nominal_attr(name.to_java(:string),values[0]) if attr_type == :nominal
|
286
|
+
att = Core::Type.create_date_attr(name.to_java(:string),values[0]) if attr_type == :date
|
287
|
+
att = att = Core::Type.create_string_attr(name.to_java(:string)) if attr_type == :string
|
288
|
+
@positions << att
|
289
|
+
end
|
290
|
+
|
291
|
+
# This method is used for Nominal attributes definition in uninitialized Instances-derived classes
|
292
|
+
# * *Args* :
|
293
|
+
# - +name+ -> Attribute name, a String
|
294
|
+
# - +values+ -> An array of values for the nominal attribute
|
295
|
+
def nominal(name,values)
|
296
|
+
att :nominal, name, values
|
297
|
+
end
|
298
|
+
|
299
|
+
# This method is used for Numeric attributes definition in uninitialized Instances-derived classes
|
300
|
+
# * *Args* :
|
301
|
+
# - +name+ -> Attribute name, a String
|
302
|
+
def numeric(name)
|
303
|
+
att :numeric, name
|
304
|
+
end
|
305
|
+
|
306
|
+
# This method is used for Date attributes definition in uninitialized Instances-derived classes
|
307
|
+
# * *Args* :
|
308
|
+
# - +name+ -> Attribute name, a String
|
309
|
+
def date(name)
|
310
|
+
att :date, name
|
311
|
+
end
|
312
|
+
|
313
|
+
# This method is used for String attributes definition in uninitialized Instances-derived classes
|
314
|
+
# * *Args* :
|
315
|
+
# - +name+ -> Attribute name, a String
|
316
|
+
def string(name)
|
317
|
+
att :string, name
|
318
|
+
end
|
319
|
+
|
320
|
+
# Class used for the creation of a new dataset (Instances class)
|
321
|
+
class Base < Instances
|
322
|
+
def initialize(&block)
|
323
|
+
attributes_vector = FastVector.new
|
324
|
+
@positions = []
|
325
|
+
self.instance_eval(&block) if block
|
326
|
+
@positions.each {|value| attributes_vector.addElement(value)}
|
327
|
+
super('Instances',attributes_vector,0)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
# Return a json String for the current Instances object
|
332
|
+
# The output is modeled on the 'datatable' Google charts APIs
|
333
|
+
# More details at: 'https://developers.google.com/chart/interactive/docs/reference#DataTable'
|
334
|
+
def to_json_format
|
335
|
+
dataset_hash = Hash.new
|
336
|
+
dataset_hash[:cols] = enumerateAttributes.collect {|attribute| attribute.name}
|
337
|
+
dataset_hash[:rows] = enumerateInstances.collect {|instance| instance.toString}
|
338
|
+
return JSON.pretty_generate(dataset_hash)
|
339
|
+
end
|
340
|
+
end #Instances class
|
341
|
+
|
342
|
+
# Create an Instances object
|
343
|
+
# * *Args* :
|
344
|
+
# - +name+ -> A name for the Instances object
|
345
|
+
# - +attributes+ -> An array containing Attribute objects
|
346
|
+
def Type.create_instances(name,attributes)
|
347
|
+
attributes_vector = FastVector.new
|
348
|
+
attributes.each {|value| attributes_vector.addElement(value)}
|
349
|
+
return Instances.new(name,attributes_vector,0)
|
350
|
+
end
|
351
|
+
|
352
|
+
end
|
353
|
+
# Helper class for serialization
|
354
|
+
# Works with classifiers, filters, clusterers...
|
355
|
+
class SerializationHelper
|
356
|
+
end
|
357
|
+
|
358
|
+
end
|
359
|
+
|
360
|
+
|
361
|
+
|