ruby-band 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.travis.yml +3 -0
  2. data/Gemfile +30 -0
  3. data/Gemfile.lock +119 -0
  4. data/Jarfile +9 -0
  5. data/Jarfile.lock +10 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +321 -0
  8. data/README.rdoc +70 -0
  9. data/Rakefile +66 -0
  10. data/VERSION +1 -0
  11. data/band_server/client.rb +35 -0
  12. data/band_server/client_alt.rb +35 -0
  13. data/band_server/first_dataset.csv +15 -0
  14. data/band_server/second_dataset.csv +15 -0
  15. data/band_server/simple_server.rb +90 -0
  16. data/band_server/third_dataset.csv +15 -0
  17. data/band_server/uploads/first_dataset.csv +15 -0
  18. data/band_server/uploads/second_dataset.csv +15 -0
  19. data/band_server/uploads/third_dataset.csv +15 -0
  20. data/bin/ruby-band +83 -0
  21. data/ext/mkrf_conf.rb +74 -0
  22. data/features/create_dataset.feature +12 -0
  23. data/features/step_definitions/create_dataset.rb +39 -0
  24. data/features/step_definitions/weka_classifiers.rb +43 -0
  25. data/features/step_definitions/weka_clustering.rb +34 -0
  26. data/features/step_definitions/weka_filters.rb +32 -0
  27. data/features/step_definitions/weka_parsers.rb +46 -0
  28. data/features/step_definitions/weka_pipeline.rb +41 -0
  29. data/features/support/env.rb +3 -0
  30. data/features/weka_classifiers.feature +16 -0
  31. data/features/weka_clustering.feature +15 -0
  32. data/features/weka_filters.feature +12 -0
  33. data/features/weka_parsers.feature +18 -0
  34. data/features/weka_pipeline.feature +14 -0
  35. data/lib/ruby-band.rb +12 -0
  36. data/lib/ruby-band/apache.rb +2 -0
  37. data/lib/ruby-band/apache/stat/correlation.rb +42 -0
  38. data/lib/ruby-band/apache/stat/inference.rb +151 -0
  39. data/lib/ruby-band/apache/stat/regression.rb +22 -0
  40. data/lib/ruby-band/core.rb +6 -0
  41. data/lib/ruby-band/core/parser/parser.rb +27 -0
  42. data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
  43. data/lib/ruby-band/core/type/attribute.rb +53 -0
  44. data/lib/ruby-band/core/type/instance.rb +10 -0
  45. data/lib/ruby-band/core/type/instances.rb +361 -0
  46. data/lib/ruby-band/core/type/utils.rb +31 -0
  47. data/lib/ruby-band/weka.rb +14 -0
  48. data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
  49. data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
  50. data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
  51. data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
  52. data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
  53. data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
  54. data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
  55. data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
  56. data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
  57. data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
  58. data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
  59. data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
  60. data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
  61. data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
  62. data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
  63. data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
  64. data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
  65. data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
  66. data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
  67. data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
  68. data/lib/ruby-band/weka/db/db.rb +74 -0
  69. data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
  70. data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
  71. data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
  72. data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
  73. data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
  74. data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
  75. data/resources/ReutersGrain-test.arff +611 -0
  76. data/resources/ReutersGrain-train.arff +1561 -0
  77. data/resources/weather.csv +15 -0
  78. data/resources/weather.numeric.arff +23 -0
  79. data/ruby-band.gemspec +178 -0
  80. data/spec/ruby-band_spec.rb +7 -0
  81. data/spec/spec_helper.rb +12 -0
  82. data/test/helper.rb +18 -0
  83. data/test/test_apacheCorrelation.rb +22 -0
  84. data/test/test_apacheInference.rb +46 -0
  85. data/test/test_ruby-band.rb +9 -0
  86. metadata +426 -0
@@ -0,0 +1,99 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require 'clusterers_utils'
3
+
4
+ module Weka
5
+ #This module contains the clusterers from the 'weka.clusterers' package
6
+ module Clusterer
7
+ java_import 'weka.clusterers.SimpleKMeans'
8
+ java_import 'weka.clusterers.FarthestFirst'
9
+ java_import 'weka.clusterers.EM'
10
+ java_import 'weka.clusterers.XMeans'
11
+ java_import 'weka.clusterers.HierarchicalClusterer'
12
+ java_import 'weka.clusterers.Cobweb'
13
+
14
+ class Cobweb
15
+ include Clusterer_utils
16
+ class Base < Cobweb
17
+ def initialize(&block)
18
+ super
19
+ if block_given?
20
+ init_instance_clusterer(&block)
21
+ else
22
+ init_clusterer
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ class EM
29
+ include Clusterer_utils
30
+ class Base < EM
31
+ def initialize(&block)
32
+ super
33
+ if block_given?
34
+ init_instance_clusterer(&block)
35
+ else
36
+ init_clusterer
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ class HierarchicalClusterer
43
+ include Clusterer_utils
44
+ class Base < HierarchicalClusterer
45
+ def initialize(&block)
46
+ super
47
+ if block_given?
48
+ init_instance_clusterer(&block)
49
+ else
50
+ init_clusterer
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+ class SimpleKMeans
57
+ include Clusterer_utils
58
+ class Base < SimpleKMeans
59
+ def initialize(&block)
60
+ super
61
+ if block_given?
62
+ init_instance_clusterer(&block)
63
+ else
64
+ init_clusterer
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ class FarthestFirst
71
+ include Clusterer_utils
72
+ class Base < FarthestFirst
73
+ def initialize(&block)
74
+ super
75
+ if block_given?
76
+ init_instance_clusterer(&block)
77
+ else
78
+ init_clusterer
79
+ end
80
+ end
81
+ end
82
+ end
83
+
84
+ class XMeans
85
+ include Clusterer_utils
86
+ class Base < XMeans
87
+ def initialize(&block)
88
+ super
89
+ if block_given?
90
+ init_instance_clusterer(&block)
91
+ else
92
+ init_clusterer
93
+ end
94
+ end
95
+ end
96
+ end
97
+
98
+ end
99
+ end
@@ -0,0 +1,86 @@
1
+ #This module is used by the classes from the Clusterer module
2
+ #to inherit the following methods (instance and class methods)
3
+ module Clusterer_utils
4
+ java_import "weka.core.Utils"
5
+ java_import "weka.clusterers.ClusterEvaluation"
6
+
7
+ def init_clusterer
8
+ set_options(self.class.options) if self.class.options
9
+ buildClusterer(self.class.data)
10
+ end
11
+
12
+ def init_instance_clusterer(&block)
13
+ self.instance_eval(&block)
14
+ #@dataset.setClassIndex(@class_index)
15
+ buildClusterer(@dataset)
16
+ end
17
+
18
+ #Instance methods list
19
+ def self.included(base)
20
+ base.extend(ClassMethods)
21
+ end
22
+
23
+ #set instance data for the clusterer
24
+ def set_data(data)
25
+ @dataset = data
26
+ end
27
+
28
+ #set options for the clusterer
29
+ def set_options(options)
30
+ options_inst = Utils.splitOptions(options)
31
+ setOptions(options_inst)
32
+ end
33
+
34
+ def list_options
35
+ listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
36
+ end
37
+
38
+ #the description provided by the Weka Documentation
39
+ def description
40
+ globalInfo
41
+ end
42
+
43
+ #list cluster centroids with coordinates
44
+ def get_centroids
45
+ getClusterCentroids
46
+ end
47
+
48
+ #list cluster's capabilities with attributes (i.e Numeric, Nominal...)
49
+ def list_capabilities
50
+ get_capabilities.to_s
51
+ end
52
+
53
+ # Validate clusterer. If the evaluation needs to be performed on a different dataset this function accepts
54
+ # an optional parameter (an Instances class object)
55
+ def evaluate(*args)
56
+ eval = ClusterEvaluation.new
57
+ eval.setClusterer(self)
58
+ if not args[0]
59
+ if self.class.data
60
+ eval.evaluateClusterer(self.class.data)
61
+ else
62
+ eval.evaluateClusterer(@dataset)
63
+ end
64
+ else
65
+ eval.evaluateClusterer(args[0])
66
+ end
67
+ puts 'performing evaluation'
68
+ eval.clusterResultsToString
69
+ end
70
+
71
+ #Class methods module
72
+ module ClassMethods
73
+
74
+ def self.classifier_attr_accessor(*args)
75
+ args.each do |arg|
76
+ #Here's the getter
77
+ self.class_eval("def #{arg};@#{arg};end")
78
+ #Here's the setter
79
+ self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
80
+ end
81
+ end
82
+
83
+ classifier_attr_accessor :options,:data
84
+
85
+ end
86
+ end
@@ -0,0 +1,280 @@
1
+ # Database settings for MySQL 3.23.x, 4.x
2
+ #
3
+ # General information on database access can be found here:
4
+ # http://weka.wikispaces.com/Databases
5
+ #
6
+ # url: http://www.mysql.com/
7
+ # jdbc: http://www.mysql.com/products/connector/j/
8
+ # author: Fracpete (fracpete at waikato dot ac dot nz)
9
+ # version: $Revision: 5836 $
10
+
11
+ # JDBC driver (comma-separated list)
12
+ jdbcDriver=org.gjt.mm.mysql.Driver
13
+
14
+ # database URL
15
+ jdbcURL=jdbc:mysql://server_name:3306/database_name
16
+
17
+ # specific data types
18
+ # string, getString() = 0; --> nominal
19
+ # boolean, getBoolean() = 1; --> nominal
20
+ # double, getDouble() = 2; --> numeric
21
+ # byte, getByte() = 3; --> numeric
22
+ # short, getByte()= 4; --> numeric
23
+ # int, getInteger() = 5; --> numeric
24
+ # long, getLong() = 6; --> numeric
25
+ # float, getFloat() = 7; --> numeric
26
+ # date, getDate() = 8; --> date
27
+ # text, getString() = 9; --> string
28
+ # time, getTime() = 10; --> date
29
+
30
+ # other options
31
+ CREATE_DOUBLE=DOUBLE
32
+ CREATE_STRING=TEXT
33
+ CREATE_INT=INT
34
+ CREATE_DATE=DATETIME
35
+ DateFormat=yyyy-MM-dd HH:mm:ss
36
+ checkUpperCaseNames=false
37
+ checkLowerCaseNames=false
38
+ checkForTable=true
39
+
40
+ # All the reserved keywords for this database
41
+ # Based on the keywords listed at the following URL (2009-04-13):
42
+ # http://dev.mysql.com/doc/mysqld-version-reference/en/mysqld-version-reference-reservedwords-5-0.html
43
+ Keywords=\
44
+ ADD,\
45
+ ALL,\
46
+ ALTER,\
47
+ ANALYZE,\
48
+ AND,\
49
+ AS,\
50
+ ASC,\
51
+ ASENSITIVE,\
52
+ BEFORE,\
53
+ BETWEEN,\
54
+ BIGINT,\
55
+ BINARY,\
56
+ BLOB,\
57
+ BOTH,\
58
+ BY,\
59
+ CALL,\
60
+ CASCADE,\
61
+ CASE,\
62
+ CHANGE,\
63
+ CHAR,\
64
+ CHARACTER,\
65
+ CHECK,\
66
+ COLLATE,\
67
+ COLUMN,\
68
+ COLUMNS,\
69
+ CONDITION,\
70
+ CONNECTION,\
71
+ CONSTRAINT,\
72
+ CONTINUE,\
73
+ CONVERT,\
74
+ CREATE,\
75
+ CROSS,\
76
+ CURRENT_DATE,\
77
+ CURRENT_TIME,\
78
+ CURRENT_TIMESTAMP,\
79
+ CURRENT_USER,\
80
+ CURSOR,\
81
+ DATABASE,\
82
+ DATABASES,\
83
+ DAY_HOUR,\
84
+ DAY_MICROSECOND,\
85
+ DAY_MINUTE,\
86
+ DAY_SECOND,\
87
+ DEC,\
88
+ DECIMAL,\
89
+ DECLARE,\
90
+ DEFAULT,\
91
+ DELAYED,\
92
+ DELETE,\
93
+ DESC,\
94
+ DESCRIBE,\
95
+ DETERMINISTIC,\
96
+ DISTINCT,\
97
+ DISTINCTROW,\
98
+ DIV,\
99
+ DOUBLE,\
100
+ DROP,\
101
+ DUAL,\
102
+ EACH,\
103
+ ELSE,\
104
+ ELSEIF,\
105
+ ENCLOSED,\
106
+ ESCAPED,\
107
+ EXISTS,\
108
+ EXIT,\
109
+ EXPLAIN,\
110
+ FALSE,\
111
+ FETCH,\
112
+ FIELDS,\
113
+ FLOAT,\
114
+ FLOAT4,\
115
+ FLOAT8,\
116
+ FOR,\
117
+ FORCE,\
118
+ FOREIGN,\
119
+ FROM,\
120
+ FULLTEXT,\
121
+ GOTO,\
122
+ GRANT,\
123
+ GROUP,\
124
+ HAVING,\
125
+ HIGH_PRIORITY,\
126
+ HOUR_MICROSECOND,\
127
+ HOUR_MINUTE,\
128
+ HOUR_SECOND,\
129
+ IF,\
130
+ IGNORE,\
131
+ IN,\
132
+ INDEX,\
133
+ INFILE,\
134
+ INNER,\
135
+ INOUT,\
136
+ INSENSITIVE,\
137
+ INSERT,\
138
+ INT,\
139
+ INT1,\
140
+ INT2,\
141
+ INT3,\
142
+ INT4,\
143
+ INT8,\
144
+ INTEGER,\
145
+ INTERVAL,\
146
+ INTO,\
147
+ IS,\
148
+ ITERATE,\
149
+ JOIN,\
150
+ KEY,\
151
+ KEYS,\
152
+ KILL,\
153
+ LABEL,\
154
+ LEADING,\
155
+ LEAVE,\
156
+ LEFT,\
157
+ LIKE,\
158
+ LIMIT,\
159
+ LINES,\
160
+ LOAD,\
161
+ LOCALTIME,\
162
+ LOCALTIMESTAMP,\
163
+ LOCK,\
164
+ LONG,\
165
+ LONGBLOB,\
166
+ LONGTEXT,\
167
+ LOOP,\
168
+ LOW_PRIORITY,\
169
+ MATCH,\
170
+ MEDIUMBLOB,\
171
+ MEDIUMINT,\
172
+ MEDIUMTEXT,\
173
+ MIDDLEINT,\
174
+ MINUTE_MICROSECOND,\
175
+ MINUTE_SECOND,\
176
+ MOD,\
177
+ MODIFIES,\
178
+ NATURAL,\
179
+ NOT,\
180
+ NO_WRITE_TO_BINLOG,\
181
+ NULL,\
182
+ NUMERIC,\
183
+ ON,\
184
+ OPTIMIZE,\
185
+ OPTION,\
186
+ OPTIONALLY,\
187
+ OR,\
188
+ ORDER,\
189
+ OUT,\
190
+ OUTER,\
191
+ OUTFILE,\
192
+ PRECISION,\
193
+ PRIMARY,\
194
+ PRIVILEGES,\
195
+ PROCEDURE,\
196
+ PURGE,\
197
+ READ,\
198
+ READS,\
199
+ REAL,\
200
+ REFERENCES,\
201
+ REGEXP,\
202
+ RELEASE,\
203
+ RENAME,\
204
+ REPEAT,\
205
+ REPLACE,\
206
+ REQUIRE,\
207
+ RESTRICT,\
208
+ RETURN,\
209
+ REVOKE,\
210
+ RIGHT,\
211
+ RLIKE,\
212
+ SCHEMA,\
213
+ SCHEMAS,\
214
+ SECOND_MICROSECOND,\
215
+ SELECT,\
216
+ SENSITIVE,\
217
+ SEPARATOR,\
218
+ SET,\
219
+ SHOW,\
220
+ SMALLINT,\
221
+ SONAME,\
222
+ SPATIAL,\
223
+ SPECIFIC,\
224
+ SQL,\
225
+ SQLEXCEPTION,\
226
+ SQLSTATE,\
227
+ SQLWARNING,\
228
+ SQL_BIG_RESULT,\
229
+ SQL_CALC_FOUND_ROWS,\
230
+ SQL_SMALL_RESULT,\
231
+ SSL,\
232
+ STARTING,\
233
+ STRAIGHT_JOIN,\
234
+ TABLE,\
235
+ TABLES,\
236
+ TERMINATED,\
237
+ THEN,\
238
+ TINYBLOB,\
239
+ TINYINT,\
240
+ TINYTEXT,\
241
+ TO,\
242
+ TRAILING,\
243
+ TRIGGER,\
244
+ TRUE,\
245
+ UNDO,\
246
+ UNION,\
247
+ UNIQUE,\
248
+ UNLOCK,\
249
+ UNSIGNED,\
250
+ UPDATE,\
251
+ UPGRADE,\
252
+ USAGE,\
253
+ USE,\
254
+ USING,\
255
+ UTC_DATE,\
256
+ UTC_TIME,\
257
+ UTC_TIMESTAMP,\
258
+ VALUES,\
259
+ VARBINARY,\
260
+ VARCHAR,\
261
+ VARCHARACTER,\
262
+ VARYING,\
263
+ WHEN,\
264
+ WHERE,\
265
+ WHILE,\
266
+ WITH,\
267
+ WRITE,\
268
+ XOR,\
269
+ YEAR_MONTH,\
270
+ ZEROFILL
271
+
272
+ # The character to append to attribute names to avoid exceptions due to
273
+ # clashes between keywords and attribute names
274
+ KeywordsMaskChar=_
275
+
276
+ #flags for loading and saving instances using DatabaseLoader/Saver
277
+ nominalToStringLimit=50
278
+ idColumn=auto_generated_id
279
+
280
+