ruby-band 0.1.11
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +3 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +119 -0
- data/Jarfile +9 -0
- data/Jarfile.lock +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +321 -0
- data/README.rdoc +70 -0
- data/Rakefile +66 -0
- data/VERSION +1 -0
- data/band_server/client.rb +35 -0
- data/band_server/client_alt.rb +35 -0
- data/band_server/first_dataset.csv +15 -0
- data/band_server/second_dataset.csv +15 -0
- data/band_server/simple_server.rb +90 -0
- data/band_server/third_dataset.csv +15 -0
- data/band_server/uploads/first_dataset.csv +15 -0
- data/band_server/uploads/second_dataset.csv +15 -0
- data/band_server/uploads/third_dataset.csv +15 -0
- data/bin/ruby-band +83 -0
- data/ext/mkrf_conf.rb +74 -0
- data/features/create_dataset.feature +12 -0
- data/features/step_definitions/create_dataset.rb +39 -0
- data/features/step_definitions/weka_classifiers.rb +43 -0
- data/features/step_definitions/weka_clustering.rb +34 -0
- data/features/step_definitions/weka_filters.rb +32 -0
- data/features/step_definitions/weka_parsers.rb +46 -0
- data/features/step_definitions/weka_pipeline.rb +41 -0
- data/features/support/env.rb +3 -0
- data/features/weka_classifiers.feature +16 -0
- data/features/weka_clustering.feature +15 -0
- data/features/weka_filters.feature +12 -0
- data/features/weka_parsers.feature +18 -0
- data/features/weka_pipeline.feature +14 -0
- data/lib/ruby-band.rb +12 -0
- data/lib/ruby-band/apache.rb +2 -0
- data/lib/ruby-band/apache/stat/correlation.rb +42 -0
- data/lib/ruby-band/apache/stat/inference.rb +151 -0
- data/lib/ruby-band/apache/stat/regression.rb +22 -0
- data/lib/ruby-band/core.rb +6 -0
- data/lib/ruby-band/core/parser/parser.rb +27 -0
- data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
- data/lib/ruby-band/core/type/attribute.rb +53 -0
- data/lib/ruby-band/core/type/instance.rb +10 -0
- data/lib/ruby-band/core/type/instances.rb +361 -0
- data/lib/ruby-band/core/type/utils.rb +31 -0
- data/lib/ruby-band/weka.rb +14 -0
- data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
- data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
- data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
- data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
- data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
- data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
- data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
- data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
- data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
- data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
- data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
- data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
- data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
- data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
- data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
- data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
- data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
- data/lib/ruby-band/weka/db/db.rb +74 -0
- data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
- data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
- data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
- data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
- data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
- data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
- data/resources/ReutersGrain-test.arff +611 -0
- data/resources/ReutersGrain-train.arff +1561 -0
- data/resources/weather.csv +15 -0
- data/resources/weather.numeric.arff +23 -0
- data/ruby-band.gemspec +178 -0
- data/spec/ruby-band_spec.rb +7 -0
- data/spec/spec_helper.rb +12 -0
- data/test/helper.rb +18 -0
- data/test/test_apacheCorrelation.rb +22 -0
- data/test/test_apacheInference.rb +46 -0
- data/test/test_ruby-band.rb +9 -0
- metadata +426 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
2
|
+
require 'clusterers_utils'
|
3
|
+
|
4
|
+
module Weka
|
5
|
+
#This module contains the clusterers from the 'weka.clusterers' package
|
6
|
+
module Clusterer
|
7
|
+
java_import 'weka.clusterers.SimpleKMeans'
|
8
|
+
java_import 'weka.clusterers.FarthestFirst'
|
9
|
+
java_import 'weka.clusterers.EM'
|
10
|
+
java_import 'weka.clusterers.XMeans'
|
11
|
+
java_import 'weka.clusterers.HierarchicalClusterer'
|
12
|
+
java_import 'weka.clusterers.Cobweb'
|
13
|
+
|
14
|
+
class Cobweb
|
15
|
+
include Clusterer_utils
|
16
|
+
class Base < Cobweb
|
17
|
+
def initialize(&block)
|
18
|
+
super
|
19
|
+
if block_given?
|
20
|
+
init_instance_clusterer(&block)
|
21
|
+
else
|
22
|
+
init_clusterer
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class EM
|
29
|
+
include Clusterer_utils
|
30
|
+
class Base < EM
|
31
|
+
def initialize(&block)
|
32
|
+
super
|
33
|
+
if block_given?
|
34
|
+
init_instance_clusterer(&block)
|
35
|
+
else
|
36
|
+
init_clusterer
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class HierarchicalClusterer
|
43
|
+
include Clusterer_utils
|
44
|
+
class Base < HierarchicalClusterer
|
45
|
+
def initialize(&block)
|
46
|
+
super
|
47
|
+
if block_given?
|
48
|
+
init_instance_clusterer(&block)
|
49
|
+
else
|
50
|
+
init_clusterer
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class SimpleKMeans
|
57
|
+
include Clusterer_utils
|
58
|
+
class Base < SimpleKMeans
|
59
|
+
def initialize(&block)
|
60
|
+
super
|
61
|
+
if block_given?
|
62
|
+
init_instance_clusterer(&block)
|
63
|
+
else
|
64
|
+
init_clusterer
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class FarthestFirst
|
71
|
+
include Clusterer_utils
|
72
|
+
class Base < FarthestFirst
|
73
|
+
def initialize(&block)
|
74
|
+
super
|
75
|
+
if block_given?
|
76
|
+
init_instance_clusterer(&block)
|
77
|
+
else
|
78
|
+
init_clusterer
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class XMeans
|
85
|
+
include Clusterer_utils
|
86
|
+
class Base < XMeans
|
87
|
+
def initialize(&block)
|
88
|
+
super
|
89
|
+
if block_given?
|
90
|
+
init_instance_clusterer(&block)
|
91
|
+
else
|
92
|
+
init_clusterer
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#This module is used by the classes from the Clusterer module
|
2
|
+
#to inherit the following methods (instance and class methods)
|
3
|
+
module Clusterer_utils
|
4
|
+
java_import "weka.core.Utils"
|
5
|
+
java_import "weka.clusterers.ClusterEvaluation"
|
6
|
+
|
7
|
+
def init_clusterer
|
8
|
+
set_options(self.class.options) if self.class.options
|
9
|
+
buildClusterer(self.class.data)
|
10
|
+
end
|
11
|
+
|
12
|
+
def init_instance_clusterer(&block)
|
13
|
+
self.instance_eval(&block)
|
14
|
+
#@dataset.setClassIndex(@class_index)
|
15
|
+
buildClusterer(@dataset)
|
16
|
+
end
|
17
|
+
|
18
|
+
#Instance methods list
|
19
|
+
def self.included(base)
|
20
|
+
base.extend(ClassMethods)
|
21
|
+
end
|
22
|
+
|
23
|
+
#set instance data for the clusterer
|
24
|
+
def set_data(data)
|
25
|
+
@dataset = data
|
26
|
+
end
|
27
|
+
|
28
|
+
#set options for the clusterer
|
29
|
+
def set_options(options)
|
30
|
+
options_inst = Utils.splitOptions(options)
|
31
|
+
setOptions(options_inst)
|
32
|
+
end
|
33
|
+
|
34
|
+
def list_options
|
35
|
+
listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
|
36
|
+
end
|
37
|
+
|
38
|
+
#the description provided by the Weka Documentation
|
39
|
+
def description
|
40
|
+
globalInfo
|
41
|
+
end
|
42
|
+
|
43
|
+
#list cluster centroids with coordinates
|
44
|
+
def get_centroids
|
45
|
+
getClusterCentroids
|
46
|
+
end
|
47
|
+
|
48
|
+
#list cluster's capabilities with attributes (i.e Numeric, Nominal...)
|
49
|
+
def list_capabilities
|
50
|
+
get_capabilities.to_s
|
51
|
+
end
|
52
|
+
|
53
|
+
# Validate clusterer. If the evaluation needs to be performed on a different dataset this function accepts
|
54
|
+
# an optional parameter (an Instances class object)
|
55
|
+
def evaluate(*args)
|
56
|
+
eval = ClusterEvaluation.new
|
57
|
+
eval.setClusterer(self)
|
58
|
+
if not args[0]
|
59
|
+
if self.class.data
|
60
|
+
eval.evaluateClusterer(self.class.data)
|
61
|
+
else
|
62
|
+
eval.evaluateClusterer(@dataset)
|
63
|
+
end
|
64
|
+
else
|
65
|
+
eval.evaluateClusterer(args[0])
|
66
|
+
end
|
67
|
+
puts 'performing evaluation'
|
68
|
+
eval.clusterResultsToString
|
69
|
+
end
|
70
|
+
|
71
|
+
#Class methods module
|
72
|
+
module ClassMethods
|
73
|
+
|
74
|
+
def self.classifier_attr_accessor(*args)
|
75
|
+
args.each do |arg|
|
76
|
+
#Here's the getter
|
77
|
+
self.class_eval("def #{arg};@#{arg};end")
|
78
|
+
#Here's the setter
|
79
|
+
self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
classifier_attr_accessor :options,:data
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
# Database settings for MySQL 3.23.x, 4.x
|
2
|
+
#
|
3
|
+
# General information on database access can be found here:
|
4
|
+
# http://weka.wikispaces.com/Databases
|
5
|
+
#
|
6
|
+
# url: http://www.mysql.com/
|
7
|
+
# jdbc: http://www.mysql.com/products/connector/j/
|
8
|
+
# author: Fracpete (fracpete at waikato dot ac dot nz)
|
9
|
+
# version: $Revision: 5836 $
|
10
|
+
|
11
|
+
# JDBC driver (comma-separated list)
|
12
|
+
jdbcDriver=org.gjt.mm.mysql.Driver
|
13
|
+
|
14
|
+
# database URL
|
15
|
+
jdbcURL=jdbc:mysql://server_name:3306/database_name
|
16
|
+
|
17
|
+
# specific data types
|
18
|
+
# string, getString() = 0; --> nominal
|
19
|
+
# boolean, getBoolean() = 1; --> nominal
|
20
|
+
# double, getDouble() = 2; --> numeric
|
21
|
+
# byte, getByte() = 3; --> numeric
|
22
|
+
# short, getByte()= 4; --> numeric
|
23
|
+
# int, getInteger() = 5; --> numeric
|
24
|
+
# long, getLong() = 6; --> numeric
|
25
|
+
# float, getFloat() = 7; --> numeric
|
26
|
+
# date, getDate() = 8; --> date
|
27
|
+
# text, getString() = 9; --> string
|
28
|
+
# time, getTime() = 10; --> date
|
29
|
+
|
30
|
+
# other options
|
31
|
+
CREATE_DOUBLE=DOUBLE
|
32
|
+
CREATE_STRING=TEXT
|
33
|
+
CREATE_INT=INT
|
34
|
+
CREATE_DATE=DATETIME
|
35
|
+
DateFormat=yyyy-MM-dd HH:mm:ss
|
36
|
+
checkUpperCaseNames=false
|
37
|
+
checkLowerCaseNames=false
|
38
|
+
checkForTable=true
|
39
|
+
|
40
|
+
# All the reserved keywords for this database
|
41
|
+
# Based on the keywords listed at the following URL (2009-04-13):
|
42
|
+
# http://dev.mysql.com/doc/mysqld-version-reference/en/mysqld-version-reference-reservedwords-5-0.html
|
43
|
+
Keywords=\
|
44
|
+
ADD,\
|
45
|
+
ALL,\
|
46
|
+
ALTER,\
|
47
|
+
ANALYZE,\
|
48
|
+
AND,\
|
49
|
+
AS,\
|
50
|
+
ASC,\
|
51
|
+
ASENSITIVE,\
|
52
|
+
BEFORE,\
|
53
|
+
BETWEEN,\
|
54
|
+
BIGINT,\
|
55
|
+
BINARY,\
|
56
|
+
BLOB,\
|
57
|
+
BOTH,\
|
58
|
+
BY,\
|
59
|
+
CALL,\
|
60
|
+
CASCADE,\
|
61
|
+
CASE,\
|
62
|
+
CHANGE,\
|
63
|
+
CHAR,\
|
64
|
+
CHARACTER,\
|
65
|
+
CHECK,\
|
66
|
+
COLLATE,\
|
67
|
+
COLUMN,\
|
68
|
+
COLUMNS,\
|
69
|
+
CONDITION,\
|
70
|
+
CONNECTION,\
|
71
|
+
CONSTRAINT,\
|
72
|
+
CONTINUE,\
|
73
|
+
CONVERT,\
|
74
|
+
CREATE,\
|
75
|
+
CROSS,\
|
76
|
+
CURRENT_DATE,\
|
77
|
+
CURRENT_TIME,\
|
78
|
+
CURRENT_TIMESTAMP,\
|
79
|
+
CURRENT_USER,\
|
80
|
+
CURSOR,\
|
81
|
+
DATABASE,\
|
82
|
+
DATABASES,\
|
83
|
+
DAY_HOUR,\
|
84
|
+
DAY_MICROSECOND,\
|
85
|
+
DAY_MINUTE,\
|
86
|
+
DAY_SECOND,\
|
87
|
+
DEC,\
|
88
|
+
DECIMAL,\
|
89
|
+
DECLARE,\
|
90
|
+
DEFAULT,\
|
91
|
+
DELAYED,\
|
92
|
+
DELETE,\
|
93
|
+
DESC,\
|
94
|
+
DESCRIBE,\
|
95
|
+
DETERMINISTIC,\
|
96
|
+
DISTINCT,\
|
97
|
+
DISTINCTROW,\
|
98
|
+
DIV,\
|
99
|
+
DOUBLE,\
|
100
|
+
DROP,\
|
101
|
+
DUAL,\
|
102
|
+
EACH,\
|
103
|
+
ELSE,\
|
104
|
+
ELSEIF,\
|
105
|
+
ENCLOSED,\
|
106
|
+
ESCAPED,\
|
107
|
+
EXISTS,\
|
108
|
+
EXIT,\
|
109
|
+
EXPLAIN,\
|
110
|
+
FALSE,\
|
111
|
+
FETCH,\
|
112
|
+
FIELDS,\
|
113
|
+
FLOAT,\
|
114
|
+
FLOAT4,\
|
115
|
+
FLOAT8,\
|
116
|
+
FOR,\
|
117
|
+
FORCE,\
|
118
|
+
FOREIGN,\
|
119
|
+
FROM,\
|
120
|
+
FULLTEXT,\
|
121
|
+
GOTO,\
|
122
|
+
GRANT,\
|
123
|
+
GROUP,\
|
124
|
+
HAVING,\
|
125
|
+
HIGH_PRIORITY,\
|
126
|
+
HOUR_MICROSECOND,\
|
127
|
+
HOUR_MINUTE,\
|
128
|
+
HOUR_SECOND,\
|
129
|
+
IF,\
|
130
|
+
IGNORE,\
|
131
|
+
IN,\
|
132
|
+
INDEX,\
|
133
|
+
INFILE,\
|
134
|
+
INNER,\
|
135
|
+
INOUT,\
|
136
|
+
INSENSITIVE,\
|
137
|
+
INSERT,\
|
138
|
+
INT,\
|
139
|
+
INT1,\
|
140
|
+
INT2,\
|
141
|
+
INT3,\
|
142
|
+
INT4,\
|
143
|
+
INT8,\
|
144
|
+
INTEGER,\
|
145
|
+
INTERVAL,\
|
146
|
+
INTO,\
|
147
|
+
IS,\
|
148
|
+
ITERATE,\
|
149
|
+
JOIN,\
|
150
|
+
KEY,\
|
151
|
+
KEYS,\
|
152
|
+
KILL,\
|
153
|
+
LABEL,\
|
154
|
+
LEADING,\
|
155
|
+
LEAVE,\
|
156
|
+
LEFT,\
|
157
|
+
LIKE,\
|
158
|
+
LIMIT,\
|
159
|
+
LINES,\
|
160
|
+
LOAD,\
|
161
|
+
LOCALTIME,\
|
162
|
+
LOCALTIMESTAMP,\
|
163
|
+
LOCK,\
|
164
|
+
LONG,\
|
165
|
+
LONGBLOB,\
|
166
|
+
LONGTEXT,\
|
167
|
+
LOOP,\
|
168
|
+
LOW_PRIORITY,\
|
169
|
+
MATCH,\
|
170
|
+
MEDIUMBLOB,\
|
171
|
+
MEDIUMINT,\
|
172
|
+
MEDIUMTEXT,\
|
173
|
+
MIDDLEINT,\
|
174
|
+
MINUTE_MICROSECOND,\
|
175
|
+
MINUTE_SECOND,\
|
176
|
+
MOD,\
|
177
|
+
MODIFIES,\
|
178
|
+
NATURAL,\
|
179
|
+
NOT,\
|
180
|
+
NO_WRITE_TO_BINLOG,\
|
181
|
+
NULL,\
|
182
|
+
NUMERIC,\
|
183
|
+
ON,\
|
184
|
+
OPTIMIZE,\
|
185
|
+
OPTION,\
|
186
|
+
OPTIONALLY,\
|
187
|
+
OR,\
|
188
|
+
ORDER,\
|
189
|
+
OUT,\
|
190
|
+
OUTER,\
|
191
|
+
OUTFILE,\
|
192
|
+
PRECISION,\
|
193
|
+
PRIMARY,\
|
194
|
+
PRIVILEGES,\
|
195
|
+
PROCEDURE,\
|
196
|
+
PURGE,\
|
197
|
+
READ,\
|
198
|
+
READS,\
|
199
|
+
REAL,\
|
200
|
+
REFERENCES,\
|
201
|
+
REGEXP,\
|
202
|
+
RELEASE,\
|
203
|
+
RENAME,\
|
204
|
+
REPEAT,\
|
205
|
+
REPLACE,\
|
206
|
+
REQUIRE,\
|
207
|
+
RESTRICT,\
|
208
|
+
RETURN,\
|
209
|
+
REVOKE,\
|
210
|
+
RIGHT,\
|
211
|
+
RLIKE,\
|
212
|
+
SCHEMA,\
|
213
|
+
SCHEMAS,\
|
214
|
+
SECOND_MICROSECOND,\
|
215
|
+
SELECT,\
|
216
|
+
SENSITIVE,\
|
217
|
+
SEPARATOR,\
|
218
|
+
SET,\
|
219
|
+
SHOW,\
|
220
|
+
SMALLINT,\
|
221
|
+
SONAME,\
|
222
|
+
SPATIAL,\
|
223
|
+
SPECIFIC,\
|
224
|
+
SQL,\
|
225
|
+
SQLEXCEPTION,\
|
226
|
+
SQLSTATE,\
|
227
|
+
SQLWARNING,\
|
228
|
+
SQL_BIG_RESULT,\
|
229
|
+
SQL_CALC_FOUND_ROWS,\
|
230
|
+
SQL_SMALL_RESULT,\
|
231
|
+
SSL,\
|
232
|
+
STARTING,\
|
233
|
+
STRAIGHT_JOIN,\
|
234
|
+
TABLE,\
|
235
|
+
TABLES,\
|
236
|
+
TERMINATED,\
|
237
|
+
THEN,\
|
238
|
+
TINYBLOB,\
|
239
|
+
TINYINT,\
|
240
|
+
TINYTEXT,\
|
241
|
+
TO,\
|
242
|
+
TRAILING,\
|
243
|
+
TRIGGER,\
|
244
|
+
TRUE,\
|
245
|
+
UNDO,\
|
246
|
+
UNION,\
|
247
|
+
UNIQUE,\
|
248
|
+
UNLOCK,\
|
249
|
+
UNSIGNED,\
|
250
|
+
UPDATE,\
|
251
|
+
UPGRADE,\
|
252
|
+
USAGE,\
|
253
|
+
USE,\
|
254
|
+
USING,\
|
255
|
+
UTC_DATE,\
|
256
|
+
UTC_TIME,\
|
257
|
+
UTC_TIMESTAMP,\
|
258
|
+
VALUES,\
|
259
|
+
VARBINARY,\
|
260
|
+
VARCHAR,\
|
261
|
+
VARCHARACTER,\
|
262
|
+
VARYING,\
|
263
|
+
WHEN,\
|
264
|
+
WHERE,\
|
265
|
+
WHILE,\
|
266
|
+
WITH,\
|
267
|
+
WRITE,\
|
268
|
+
XOR,\
|
269
|
+
YEAR_MONTH,\
|
270
|
+
ZEROFILL
|
271
|
+
|
272
|
+
# The character to append to attribute names to avoid exceptions due to
|
273
|
+
# clashes between keywords and attribute names
|
274
|
+
KeywordsMaskChar=_
|
275
|
+
|
276
|
+
#flags for loading and saving instances using DatabaseLoader/Saver
|
277
|
+
nominalToStringLimit=50
|
278
|
+
idColumn=auto_generated_id
|
279
|
+
|
280
|
+
|