ruby-band 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +3 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +119 -0
- data/Jarfile +9 -0
- data/Jarfile.lock +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +321 -0
- data/README.rdoc +70 -0
- data/Rakefile +66 -0
- data/VERSION +1 -0
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +90 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bin/ruby-band +83 -0
- data/ext/mkrf_conf.rb +74 -0
- data/features/create_dataset.feature +12 -0
- data/features/step_definitions/create_dataset.rb +39 -0
- data/features/step_definitions/weka_classifiers.rb +43 -0
- data/features/step_definitions/weka_clustering.rb +34 -0
- data/features/step_definitions/weka_filters.rb +32 -0
- data/features/step_definitions/weka_parsers.rb +46 -0
- data/features/step_definitions/weka_pipeline.rb +41 -0
- data/features/support/env.rb +3 -0
- data/features/weka_classifiers.feature +16 -0
- data/features/weka_clustering.feature +15 -0
- data/features/weka_filters.feature +12 -0
- data/features/weka_parsers.feature +18 -0
- data/features/weka_pipeline.feature +14 -0
- data/lib/ruby-band.rb +12 -0
- data/lib/ruby-band/apache.rb +2 -0
- data/lib/ruby-band/apache/stat/correlation.rb +42 -0
- data/lib/ruby-band/apache/stat/inference.rb +151 -0
- data/lib/ruby-band/apache/stat/regression.rb +22 -0
- data/lib/ruby-band/core.rb +6 -0
- data/lib/ruby-band/core/parser/parser.rb +27 -0
- data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
- data/lib/ruby-band/core/type/attribute.rb +53 -0
- data/lib/ruby-band/core/type/instance.rb +10 -0
- data/lib/ruby-band/core/type/instances.rb +361 -0
- data/lib/ruby-band/core/type/utils.rb +31 -0
- data/lib/ruby-band/weka.rb +14 -0
- data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
- data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
- data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
- data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
- data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
- data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
- data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
- data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
- data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
- data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
- data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
- data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
- data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
- data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
- data/lib/ruby-band/weka/db/db.rb +74 -0
- data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
- data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
- data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
- data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
- data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
- data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
- data/resources/ReutersGrain-test.arff +611 -0
- data/resources/ReutersGrain-train.arff +1561 -0
- data/resources/weather.csv +15 -0
- data/resources/weather.numeric.arff +23 -0
- data/ruby-band.gemspec +178 -0
- data/spec/ruby-band_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_ruby-band.rb +9 -0
- metadata +426 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
2
|
+
require 'clusterers_utils'
|
3
|
+
|
4
|
+
module Weka
|
5
|
+
#This module contains the clusterers from the 'weka.clusterers' package
|
6
|
+
module Clusterer
|
7
|
+
java_import 'weka.clusterers.SimpleKMeans'
|
8
|
+
java_import 'weka.clusterers.FarthestFirst'
|
9
|
+
java_import 'weka.clusterers.EM'
|
10
|
+
java_import 'weka.clusterers.XMeans'
|
11
|
+
java_import 'weka.clusterers.HierarchicalClusterer'
|
12
|
+
java_import 'weka.clusterers.Cobweb'
|
13
|
+
|
14
|
+
class Cobweb
|
15
|
+
include Clusterer_utils
|
16
|
+
class Base < Cobweb
|
17
|
+
def initialize(&block)
|
18
|
+
super
|
19
|
+
if block_given?
|
20
|
+
init_instance_clusterer(&block)
|
21
|
+
else
|
22
|
+
init_clusterer
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class EM
|
29
|
+
include Clusterer_utils
|
30
|
+
class Base < EM
|
31
|
+
def initialize(&block)
|
32
|
+
super
|
33
|
+
if block_given?
|
34
|
+
init_instance_clusterer(&block)
|
35
|
+
else
|
36
|
+
init_clusterer
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class HierarchicalClusterer
|
43
|
+
include Clusterer_utils
|
44
|
+
class Base < HierarchicalClusterer
|
45
|
+
def initialize(&block)
|
46
|
+
super
|
47
|
+
if block_given?
|
48
|
+
init_instance_clusterer(&block)
|
49
|
+
else
|
50
|
+
init_clusterer
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class SimpleKMeans
|
57
|
+
include Clusterer_utils
|
58
|
+
class Base < SimpleKMeans
|
59
|
+
def initialize(&block)
|
60
|
+
super
|
61
|
+
if block_given?
|
62
|
+
init_instance_clusterer(&block)
|
63
|
+
else
|
64
|
+
init_clusterer
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class FarthestFirst
|
71
|
+
include Clusterer_utils
|
72
|
+
class Base < FarthestFirst
|
73
|
+
def initialize(&block)
|
74
|
+
super
|
75
|
+
if block_given?
|
76
|
+
init_instance_clusterer(&block)
|
77
|
+
else
|
78
|
+
init_clusterer
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class XMeans
|
85
|
+
include Clusterer_utils
|
86
|
+
class Base < XMeans
|
87
|
+
def initialize(&block)
|
88
|
+
super
|
89
|
+
if block_given?
|
90
|
+
init_instance_clusterer(&block)
|
91
|
+
else
|
92
|
+
init_clusterer
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#This module is used by the classes from the Clusterer module
|
2
|
+
#to inherit the following methods (instance and class methods)
|
3
|
+
module Clusterer_utils
|
4
|
+
java_import "weka.core.Utils"
|
5
|
+
java_import "weka.clusterers.ClusterEvaluation"
|
6
|
+
|
7
|
+
def init_clusterer
|
8
|
+
set_options(self.class.options) if self.class.options
|
9
|
+
buildClusterer(self.class.data)
|
10
|
+
end
|
11
|
+
|
12
|
+
def init_instance_clusterer(&block)
|
13
|
+
self.instance_eval(&block)
|
14
|
+
#@dataset.setClassIndex(@class_index)
|
15
|
+
buildClusterer(@dataset)
|
16
|
+
end
|
17
|
+
|
18
|
+
#Instance methods list
|
19
|
+
def self.included(base)
|
20
|
+
base.extend(ClassMethods)
|
21
|
+
end
|
22
|
+
|
23
|
+
#set instance data for the clusterer
|
24
|
+
def set_data(data)
|
25
|
+
@dataset = data
|
26
|
+
end
|
27
|
+
|
28
|
+
#set options for the clusterer
|
29
|
+
def set_options(options)
|
30
|
+
options_inst = Utils.splitOptions(options)
|
31
|
+
setOptions(options_inst)
|
32
|
+
end
|
33
|
+
|
34
|
+
def list_options
|
35
|
+
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
36
|
+
end
|
37
|
+
|
38
|
+
#the description provided by the Weka Documentation
|
39
|
+
def description
|
40
|
+
globalInfo
|
41
|
+
end
|
42
|
+
|
43
|
+
#list cluster centroids with coordinates
|
44
|
+
def get_centroids
|
45
|
+
getClusterCentroids
|
46
|
+
end
|
47
|
+
|
48
|
+
#list cluster's capabilities with attributes (i.e Numeric, Nominal...)
|
49
|
+
def list_capabilities
|
50
|
+
get_capabilities.to_s
|
51
|
+
end
|
52
|
+
|
53
|
+
# Validate clusterer. If the evaluation needs to be performed on a different dataset this function accepts
|
54
|
+
# an optional parameter (an Instances class object)
|
55
|
+
def evaluate(*args)
|
56
|
+
eval = ClusterEvaluation.new
|
57
|
+
eval.setClusterer(self)
|
58
|
+
if not args[0]
|
59
|
+
if self.class.data
|
60
|
+
eval.evaluateClusterer(self.class.data)
|
61
|
+
else
|
62
|
+
eval.evaluateClusterer(@dataset)
|
63
|
+
end
|
64
|
+
else
|
65
|
+
eval.evaluateClusterer(args[0])
|
66
|
+
end
|
67
|
+
puts 'performing evaluation'
|
68
|
+
eval.clusterResultsToString
|
69
|
+
end
|
70
|
+
|
71
|
+
#Class methods module
|
72
|
+
module ClassMethods
|
73
|
+
|
74
|
+
def self.classifier_attr_accessor(*args)
|
75
|
+
args.each do |arg|
|
76
|
+
#Here's the getter
|
77
|
+
self.class_eval("def #{arg};@#{arg};end")
|
78
|
+
#Here's the setter
|
79
|
+
self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
classifier_attr_accessor :options,:data
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
# Database settings for MySQL 3.23.x, 4.x
|
2
|
+
#
|
3
|
+
# General information on database access can be found here:
|
4
|
+
# http://weka.wikispaces.com/Databases
|
5
|
+
#
|
6
|
+
# url: http://www.mysql.com/
|
7
|
+
# jdbc: http://www.mysql.com/products/connector/j/
|
8
|
+
# author: Fracpete (fracpete at waikato dot ac dot nz)
|
9
|
+
# version: $Revision: 5836 $
|
10
|
+
|
11
|
+
# JDBC driver (comma-separated list)
|
12
|
+
jdbcDriver=org.gjt.mm.mysql.Driver
|
13
|
+
|
14
|
+
# database URL
|
15
|
+
jdbcURL=jdbc:mysql://server_name:3306/database_name
|
16
|
+
|
17
|
+
# specific data types
|
18
|
+
# string, getString() = 0; --> nominal
|
19
|
+
# boolean, getBoolean() = 1; --> nominal
|
20
|
+
# double, getDouble() = 2; --> numeric
|
21
|
+
# byte, getByte() = 3; --> numeric
|
22
|
+
# short, getByte()= 4; --> numeric
|
23
|
+
# int, getInteger() = 5; --> numeric
|
24
|
+
# long, getLong() = 6; --> numeric
|
25
|
+
# float, getFloat() = 7; --> numeric
|
26
|
+
# date, getDate() = 8; --> date
|
27
|
+
# text, getString() = 9; --> string
|
28
|
+
# time, getTime() = 10; --> date
|
29
|
+
|
30
|
+
# other options
|
31
|
+
CREATE_DOUBLE=DOUBLE
|
32
|
+
CREATE_STRING=TEXT
|
33
|
+
CREATE_INT=INT
|
34
|
+
CREATE_DATE=DATETIME
|
35
|
+
DateFormat=yyyy-MM-dd HH:mm:ss
|
36
|
+
checkUpperCaseNames=false
|
37
|
+
checkLowerCaseNames=false
|
38
|
+
checkForTable=true
|
39
|
+
|
40
|
+
# All the reserved keywords for this database
|
41
|
+
# Based on the keywords listed at the following URL (2009-04-13):
|
42
|
+
# http://dev.mysql.com/doc/mysqld-version-reference/en/mysqld-version-reference-reservedwords-5-0.html
|
43
|
+
Keywords=\
|
44
|
+
ADD,\
|
45
|
+
ALL,\
|
46
|
+
ALTER,\
|
47
|
+
ANALYZE,\
|
48
|
+
AND,\
|
49
|
+
AS,\
|
50
|
+
ASC,\
|
51
|
+
ASENSITIVE,\
|
52
|
+
BEFORE,\
|
53
|
+
BETWEEN,\
|
54
|
+
BIGINT,\
|
55
|
+
BINARY,\
|
56
|
+
BLOB,\
|
57
|
+
BOTH,\
|
58
|
+
BY,\
|
59
|
+
CALL,\
|
60
|
+
CASCADE,\
|
61
|
+
CASE,\
|
62
|
+
CHANGE,\
|
63
|
+
CHAR,\
|
64
|
+
CHARACTER,\
|
65
|
+
CHECK,\
|
66
|
+
COLLATE,\
|
67
|
+
COLUMN,\
|
68
|
+
COLUMNS,\
|
69
|
+
CONDITION,\
|
70
|
+
CONNECTION,\
|
71
|
+
CONSTRAINT,\
|
72
|
+
CONTINUE,\
|
73
|
+
CONVERT,\
|
74
|
+
CREATE,\
|
75
|
+
CROSS,\
|
76
|
+
CURRENT_DATE,\
|
77
|
+
CURRENT_TIME,\
|
78
|
+
CURRENT_TIMESTAMP,\
|
79
|
+
CURRENT_USER,\
|
80
|
+
CURSOR,\
|
81
|
+
DATABASE,\
|
82
|
+
DATABASES,\
|
83
|
+
DAY_HOUR,\
|
84
|
+
DAY_MICROSECOND,\
|
85
|
+
DAY_MINUTE,\
|
86
|
+
DAY_SECOND,\
|
87
|
+
DEC,\
|
88
|
+
DECIMAL,\
|
89
|
+
DECLARE,\
|
90
|
+
DEFAULT,\
|
91
|
+
DELAYED,\
|
92
|
+
DELETE,\
|
93
|
+
DESC,\
|
94
|
+
DESCRIBE,\
|
95
|
+
DETERMINISTIC,\
|
96
|
+
DISTINCT,\
|
97
|
+
DISTINCTROW,\
|
98
|
+
DIV,\
|
99
|
+
DOUBLE,\
|
100
|
+
DROP,\
|
101
|
+
DUAL,\
|
102
|
+
EACH,\
|
103
|
+
ELSE,\
|
104
|
+
ELSEIF,\
|
105
|
+
ENCLOSED,\
|
106
|
+
ESCAPED,\
|
107
|
+
EXISTS,\
|
108
|
+
EXIT,\
|
109
|
+
EXPLAIN,\
|
110
|
+
FALSE,\
|
111
|
+
FETCH,\
|
112
|
+
FIELDS,\
|
113
|
+
FLOAT,\
|
114
|
+
FLOAT4,\
|
115
|
+
FLOAT8,\
|
116
|
+
FOR,\
|
117
|
+
FORCE,\
|
118
|
+
FOREIGN,\
|
119
|
+
FROM,\
|
120
|
+
FULLTEXT,\
|
121
|
+
GOTO,\
|
122
|
+
GRANT,\
|
123
|
+
GROUP,\
|
124
|
+
HAVING,\
|
125
|
+
HIGH_PRIORITY,\
|
126
|
+
HOUR_MICROSECOND,\
|
127
|
+
HOUR_MINUTE,\
|
128
|
+
HOUR_SECOND,\
|
129
|
+
IF,\
|
130
|
+
IGNORE,\
|
131
|
+
IN,\
|
132
|
+
INDEX,\
|
133
|
+
INFILE,\
|
134
|
+
INNER,\
|
135
|
+
INOUT,\
|
136
|
+
INSENSITIVE,\
|
137
|
+
INSERT,\
|
138
|
+
INT,\
|
139
|
+
INT1,\
|
140
|
+
INT2,\
|
141
|
+
INT3,\
|
142
|
+
INT4,\
|
143
|
+
INT8,\
|
144
|
+
INTEGER,\
|
145
|
+
INTERVAL,\
|
146
|
+
INTO,\
|
147
|
+
IS,\
|
148
|
+
ITERATE,\
|
149
|
+
JOIN,\
|
150
|
+
KEY,\
|
151
|
+
KEYS,\
|
152
|
+
KILL,\
|
153
|
+
LABEL,\
|
154
|
+
LEADING,\
|
155
|
+
LEAVE,\
|
156
|
+
LEFT,\
|
157
|
+
LIKE,\
|
158
|
+
LIMIT,\
|
159
|
+
LINES,\
|
160
|
+
LOAD,\
|
161
|
+
LOCALTIME,\
|
162
|
+
LOCALTIMESTAMP,\
|
163
|
+
LOCK,\
|
164
|
+
LONG,\
|
165
|
+
LONGBLOB,\
|
166
|
+
LONGTEXT,\
|
167
|
+
LOOP,\
|
168
|
+
LOW_PRIORITY,\
|
169
|
+
MATCH,\
|
170
|
+
MEDIUMBLOB,\
|
171
|
+
MEDIUMINT,\
|
172
|
+
MEDIUMTEXT,\
|
173
|
+
MIDDLEINT,\
|
174
|
+
MINUTE_MICROSECOND,\
|
175
|
+
MINUTE_SECOND,\
|
176
|
+
MOD,\
|
177
|
+
MODIFIES,\
|
178
|
+
NATURAL,\
|
179
|
+
NOT,\
|
180
|
+
NO_WRITE_TO_BINLOG,\
|
181
|
+
NULL,\
|
182
|
+
NUMERIC,\
|
183
|
+
ON,\
|
184
|
+
OPTIMIZE,\
|
185
|
+
OPTION,\
|
186
|
+
OPTIONALLY,\
|
187
|
+
OR,\
|
188
|
+
ORDER,\
|
189
|
+
OUT,\
|
190
|
+
OUTER,\
|
191
|
+
OUTFILE,\
|
192
|
+
PRECISION,\
|
193
|
+
PRIMARY,\
|
194
|
+
PRIVILEGES,\
|
195
|
+
PROCEDURE,\
|
196
|
+
PURGE,\
|
197
|
+
READ,\
|
198
|
+
READS,\
|
199
|
+
REAL,\
|
200
|
+
REFERENCES,\
|
201
|
+
REGEXP,\
|
202
|
+
RELEASE,\
|
203
|
+
RENAME,\
|
204
|
+
REPEAT,\
|
205
|
+
REPLACE,\
|
206
|
+
REQUIRE,\
|
207
|
+
RESTRICT,\
|
208
|
+
RETURN,\
|
209
|
+
REVOKE,\
|
210
|
+
RIGHT,\
|
211
|
+
RLIKE,\
|
212
|
+
SCHEMA,\
|
213
|
+
SCHEMAS,\
|
214
|
+
SECOND_MICROSECOND,\
|
215
|
+
SELECT,\
|
216
|
+
SENSITIVE,\
|
217
|
+
SEPARATOR,\
|
218
|
+
SET,\
|
219
|
+
SHOW,\
|
220
|
+
SMALLINT,\
|
221
|
+
SONAME,\
|
222
|
+
SPATIAL,\
|
223
|
+
SPECIFIC,\
|
224
|
+
SQL,\
|
225
|
+
SQLEXCEPTION,\
|
226
|
+
SQLSTATE,\
|
227
|
+
SQLWARNING,\
|
228
|
+
SQL_BIG_RESULT,\
|
229
|
+
SQL_CALC_FOUND_ROWS,\
|
230
|
+
SQL_SMALL_RESULT,\
|
231
|
+
SSL,\
|
232
|
+
STARTING,\
|
233
|
+
STRAIGHT_JOIN,\
|
234
|
+
TABLE,\
|
235
|
+
TABLES,\
|
236
|
+
TERMINATED,\
|
237
|
+
THEN,\
|
238
|
+
TINYBLOB,\
|
239
|
+
TINYINT,\
|
240
|
+
TINYTEXT,\
|
241
|
+
TO,\
|
242
|
+
TRAILING,\
|
243
|
+
TRIGGER,\
|
244
|
+
TRUE,\
|
245
|
+
UNDO,\
|
246
|
+
UNION,\
|
247
|
+
UNIQUE,\
|
248
|
+
UNLOCK,\
|
249
|
+
UNSIGNED,\
|
250
|
+
UPDATE,\
|
251
|
+
UPGRADE,\
|
252
|
+
USAGE,\
|
253
|
+
USE,\
|
254
|
+
USING,\
|
255
|
+
UTC_DATE,\
|
256
|
+
UTC_TIME,\
|
257
|
+
UTC_TIMESTAMP,\
|
258
|
+
VALUES,\
|
259
|
+
VARBINARY,\
|
260
|
+
VARCHAR,\
|
261
|
+
VARCHARACTER,\
|
262
|
+
VARYING,\
|
263
|
+
WHEN,\
|
264
|
+
WHERE,\
|
265
|
+
WHILE,\
|
266
|
+
WITH,\
|
267
|
+
WRITE,\
|
268
|
+
XOR,\
|
269
|
+
YEAR_MONTH,\
|
270
|
+
ZEROFILL
|
271
|
+
|
272
|
+
# The character to append to attribute names to avoid exceptions due to
|
273
|
+
# clashes between keywords and attribute names
|
274
|
+
KeywordsMaskChar=_
|
275
|
+
|
276
|
+
#flags for loading and saving instances using DatabaseLoader/Saver
|
277
|
+
nominalToStringLimit=50
|
278
|
+
idColumn=auto_generated_id
|
279
|
+
|
280
|
+
|