ruby-band 0.1.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. data/.travis.yml +3 -0
  2. data/Gemfile +30 -0
  3. data/Gemfile.lock +119 -0
  4. data/Jarfile +9 -0
  5. data/Jarfile.lock +10 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +321 -0
  8. data/README.rdoc +70 -0
  9. data/Rakefile +66 -0
  10. data/VERSION +1 -0
  11. data/band_server/client.rb +35 -0
  12. data/band_server/client_alt.rb +35 -0
  13. data/band_server/first_dataset.csv +15 -0
  14. data/band_server/second_dataset.csv +15 -0
  15. data/band_server/simple_server.rb +90 -0
  16. data/band_server/third_dataset.csv +15 -0
  17. data/band_server/uploads/first_dataset.csv +15 -0
  18. data/band_server/uploads/second_dataset.csv +15 -0
  19. data/band_server/uploads/third_dataset.csv +15 -0
  20. data/bin/ruby-band +83 -0
  21. data/ext/mkrf_conf.rb +74 -0
  22. data/features/create_dataset.feature +12 -0
  23. data/features/step_definitions/create_dataset.rb +39 -0
  24. data/features/step_definitions/weka_classifiers.rb +43 -0
  25. data/features/step_definitions/weka_clustering.rb +34 -0
  26. data/features/step_definitions/weka_filters.rb +32 -0
  27. data/features/step_definitions/weka_parsers.rb +46 -0
  28. data/features/step_definitions/weka_pipeline.rb +41 -0
  29. data/features/support/env.rb +3 -0
  30. data/features/weka_classifiers.feature +16 -0
  31. data/features/weka_clustering.feature +15 -0
  32. data/features/weka_filters.feature +12 -0
  33. data/features/weka_parsers.feature +18 -0
  34. data/features/weka_pipeline.feature +14 -0
  35. data/lib/ruby-band.rb +12 -0
  36. data/lib/ruby-band/apache.rb +2 -0
  37. data/lib/ruby-band/apache/stat/correlation.rb +42 -0
  38. data/lib/ruby-band/apache/stat/inference.rb +151 -0
  39. data/lib/ruby-band/apache/stat/regression.rb +22 -0
  40. data/lib/ruby-band/core.rb +6 -0
  41. data/lib/ruby-band/core/parser/parser.rb +27 -0
  42. data/lib/ruby-band/core/type/apache_matrices.rb +35 -0
  43. data/lib/ruby-band/core/type/attribute.rb +53 -0
  44. data/lib/ruby-band/core/type/instance.rb +10 -0
  45. data/lib/ruby-band/core/type/instances.rb +361 -0
  46. data/lib/ruby-band/core/type/utils.rb +31 -0
  47. data/lib/ruby-band/weka.rb +14 -0
  48. data/lib/ruby-band/weka/attribute_selection/attribute_selection_utils.rb +20 -0
  49. data/lib/ruby-band/weka/attribute_selection/evaluators.rb +58 -0
  50. data/lib/ruby-band/weka/attribute_selection/search.rb +52 -0
  51. data/lib/ruby-band/weka/classifiers/bayes/bayes.rb +86 -0
  52. data/lib/ruby-band/weka/classifiers/bayes/bayes_utils.rb +82 -0
  53. data/lib/ruby-band/weka/classifiers/evaluation.rb +13 -0
  54. data/lib/ruby-band/weka/classifiers/functions/functions.rb +177 -0
  55. data/lib/ruby-band/weka/classifiers/functions/functions_utils.rb +78 -0
  56. data/lib/ruby-band/weka/classifiers/lazy/lazy.rb +86 -0
  57. data/lib/ruby-band/weka/classifiers/lazy/lazy_utils.rb +83 -0
  58. data/lib/ruby-band/weka/classifiers/mi/mi.rb +191 -0
  59. data/lib/ruby-band/weka/classifiers/mi/mi_utils.rb +80 -0
  60. data/lib/ruby-band/weka/classifiers/rules/rules.rb +190 -0
  61. data/lib/ruby-band/weka/classifiers/rules/rules_utils.rb +81 -0
  62. data/lib/ruby-band/weka/classifiers/trees/trees.rb +110 -0
  63. data/lib/ruby-band/weka/classifiers/trees/trees_utils.rb +85 -0
  64. data/lib/ruby-band/weka/clusterers/clusterers.rb +99 -0
  65. data/lib/ruby-band/weka/clusterers/clusterers_utils.rb +86 -0
  66. data/lib/ruby-band/weka/db/DatabaseUtils_mysql +280 -0
  67. data/lib/ruby-band/weka/db/DatabaseUtils_postgresql +594 -0
  68. data/lib/ruby-band/weka/db/db.rb +74 -0
  69. data/lib/ruby-band/weka/filters/supervised/attribute/attribute.rb +55 -0
  70. data/lib/ruby-band/weka/filters/supervised/instance/instance.rb +17 -0
  71. data/lib/ruby-band/weka/filters/supervised/supervised_utils.rb +38 -0
  72. data/lib/ruby-band/weka/filters/unsupervised/attribute/attribute.rb +90 -0
  73. data/lib/ruby-band/weka/filters/unsupervised/instance/instance.rb +48 -0
  74. data/lib/ruby-band/weka/filters/unsupervised/unsupervised_utils.rb +38 -0
  75. data/resources/ReutersGrain-test.arff +611 -0
  76. data/resources/ReutersGrain-train.arff +1561 -0
  77. data/resources/weather.csv +15 -0
  78. data/resources/weather.numeric.arff +23 -0
  79. data/ruby-band.gemspec +178 -0
  80. data/spec/ruby-band_spec.rb +7 -0
  81. data/spec/spec_helper.rb +12 -0
  82. data/test/helper.rb +18 -0
  83. data/test/test_apacheCorrelation.rb +22 -0
  84. data/test/test_apacheInference.rb +46 -0
  85. data/test/test_ruby-band.rb +9 -0
  86. metadata +426 -0
@@ -0,0 +1,99 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require 'clusterers_utils'
3
+
4
+ module Weka
5
+ #This module contains the clusterers from the 'weka.clusterers' package
6
+ module Clusterer
7
+ java_import 'weka.clusterers.SimpleKMeans'
8
+ java_import 'weka.clusterers.FarthestFirst'
9
+ java_import 'weka.clusterers.EM'
10
+ java_import 'weka.clusterers.XMeans'
11
+ java_import 'weka.clusterers.HierarchicalClusterer'
12
+ java_import 'weka.clusterers.Cobweb'
13
+
14
+ class Cobweb
15
+ include Clusterer_utils
16
+ class Base < Cobweb
17
+ def initialize(&block)
18
+ super
19
+ if block_given?
20
+ init_instance_clusterer(&block)
21
+ else
22
+ init_clusterer
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ class EM
29
+ include Clusterer_utils
30
+ class Base < EM
31
+ def initialize(&block)
32
+ super
33
+ if block_given?
34
+ init_instance_clusterer(&block)
35
+ else
36
+ init_clusterer
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ class HierarchicalClusterer
43
+ include Clusterer_utils
44
+ class Base < HierarchicalClusterer
45
+ def initialize(&block)
46
+ super
47
+ if block_given?
48
+ init_instance_clusterer(&block)
49
+ else
50
+ init_clusterer
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+ class SimpleKMeans
57
+ include Clusterer_utils
58
+ class Base < SimpleKMeans
59
+ def initialize(&block)
60
+ super
61
+ if block_given?
62
+ init_instance_clusterer(&block)
63
+ else
64
+ init_clusterer
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ class FarthestFirst
71
+ include Clusterer_utils
72
+ class Base < FarthestFirst
73
+ def initialize(&block)
74
+ super
75
+ if block_given?
76
+ init_instance_clusterer(&block)
77
+ else
78
+ init_clusterer
79
+ end
80
+ end
81
+ end
82
+ end
83
+
84
+ class XMeans
85
+ include Clusterer_utils
86
+ class Base < XMeans
87
+ def initialize(&block)
88
+ super
89
+ if block_given?
90
+ init_instance_clusterer(&block)
91
+ else
92
+ init_clusterer
93
+ end
94
+ end
95
+ end
96
+ end
97
+
98
+ end
99
+ end
@@ -0,0 +1,86 @@
1
+ #This module is used by the classes from the Clusterer module
2
+ #to inherit the following methods (instance and class methods)
3
+ module Clusterer_utils
4
+ java_import "weka.core.Utils"
5
+ java_import "weka.clusterers.ClusterEvaluation"
6
+
7
+ def init_clusterer
8
+ set_options(self.class.options) if self.class.options
9
+ buildClusterer(self.class.data)
10
+ end
11
+
12
+ def init_instance_clusterer(&block)
13
+ self.instance_eval(&block)
14
+ #@dataset.setClassIndex(@class_index)
15
+ buildClusterer(@dataset)
16
+ end
17
+
18
+ #Instance methods list
19
+ def self.included(base)
20
+ base.extend(ClassMethods)
21
+ end
22
+
23
+ #set instance data for the clusterer
24
+ def set_data(data)
25
+ @dataset = data
26
+ end
27
+
28
+ #set options for the clusterer
29
+ def set_options(options)
30
+ options_inst = Utils.splitOptions(options)
31
+ setOptions(options_inst)
32
+ end
33
+
34
+ def list_options
35
+ listOptions.map {|key| "#{key.synopsis} #{key.description}"}.join("\n")
36
+ end
37
+
38
+ #the description provided by the Weka Documentation
39
+ def description
40
+ globalInfo
41
+ end
42
+
43
+ #list cluster centroids with coordinates
44
+ def get_centroids
45
+ getClusterCentroids
46
+ end
47
+
48
+ #list cluster's capabilities with attributes (i.e Numeric, Nominal...)
49
+ def list_capabilities
50
+ get_capabilities.to_s
51
+ end
52
+
53
+ # Validate clusterer. If the evaluation needs to be performed on a different dataset this function accepts
54
+ # an optional parameter (an Instances class object)
55
+ def evaluate(*args)
56
+ eval = ClusterEvaluation.new
57
+ eval.setClusterer(self)
58
+ if not args[0]
59
+ if self.class.data
60
+ eval.evaluateClusterer(self.class.data)
61
+ else
62
+ eval.evaluateClusterer(@dataset)
63
+ end
64
+ else
65
+ eval.evaluateClusterer(args[0])
66
+ end
67
+ puts 'performing evaluation'
68
+ eval.clusterResultsToString
69
+ end
70
+
71
+ #Class methods module
72
+ module ClassMethods
73
+
74
+ def self.classifier_attr_accessor(*args)
75
+ args.each do |arg|
76
+ #Here's the getter
77
+ self.class_eval("def #{arg};@#{arg};end")
78
+ #Here's the setter
79
+ self.class_eval("def set_#{arg}(val);@#{arg}=val;end")
80
+ end
81
+ end
82
+
83
+ classifier_attr_accessor :options,:data
84
+
85
+ end
86
+ end
@@ -0,0 +1,280 @@
1
+ # Database settings for MySQL 3.23.x, 4.x
2
+ #
3
+ # General information on database access can be found here:
4
+ # http://weka.wikispaces.com/Databases
5
+ #
6
+ # url: http://www.mysql.com/
7
+ # jdbc: http://www.mysql.com/products/connector/j/
8
+ # author: Fracpete (fracpete at waikato dot ac dot nz)
9
+ # version: $Revision: 5836 $
10
+
11
+ # JDBC driver (comma-separated list)
12
+ jdbcDriver=org.gjt.mm.mysql.Driver
13
+
14
+ # database URL
15
+ jdbcURL=jdbc:mysql://server_name:3306/database_name
16
+
17
+ # specific data types
18
+ # string, getString() = 0; --> nominal
19
+ # boolean, getBoolean() = 1; --> nominal
20
+ # double, getDouble() = 2; --> numeric
21
+ # byte, getByte() = 3; --> numeric
22
+ # short, getByte()= 4; --> numeric
23
+ # int, getInteger() = 5; --> numeric
24
+ # long, getLong() = 6; --> numeric
25
+ # float, getFloat() = 7; --> numeric
26
+ # date, getDate() = 8; --> date
27
+ # text, getString() = 9; --> string
28
+ # time, getTime() = 10; --> date
29
+
30
+ # other options
31
+ CREATE_DOUBLE=DOUBLE
32
+ CREATE_STRING=TEXT
33
+ CREATE_INT=INT
34
+ CREATE_DATE=DATETIME
35
+ DateFormat=yyyy-MM-dd HH:mm:ss
36
+ checkUpperCaseNames=false
37
+ checkLowerCaseNames=false
38
+ checkForTable=true
39
+
40
+ # All the reserved keywords for this database
41
+ # Based on the keywords listed at the following URL (2009-04-13):
42
+ # http://dev.mysql.com/doc/mysqld-version-reference/en/mysqld-version-reference-reservedwords-5-0.html
43
+ Keywords=\
44
+ ADD,\
45
+ ALL,\
46
+ ALTER,\
47
+ ANALYZE,\
48
+ AND,\
49
+ AS,\
50
+ ASC,\
51
+ ASENSITIVE,\
52
+ BEFORE,\
53
+ BETWEEN,\
54
+ BIGINT,\
55
+ BINARY,\
56
+ BLOB,\
57
+ BOTH,\
58
+ BY,\
59
+ CALL,\
60
+ CASCADE,\
61
+ CASE,\
62
+ CHANGE,\
63
+ CHAR,\
64
+ CHARACTER,\
65
+ CHECK,\
66
+ COLLATE,\
67
+ COLUMN,\
68
+ COLUMNS,\
69
+ CONDITION,\
70
+ CONNECTION,\
71
+ CONSTRAINT,\
72
+ CONTINUE,\
73
+ CONVERT,\
74
+ CREATE,\
75
+ CROSS,\
76
+ CURRENT_DATE,\
77
+ CURRENT_TIME,\
78
+ CURRENT_TIMESTAMP,\
79
+ CURRENT_USER,\
80
+ CURSOR,\
81
+ DATABASE,\
82
+ DATABASES,\
83
+ DAY_HOUR,\
84
+ DAY_MICROSECOND,\
85
+ DAY_MINUTE,\
86
+ DAY_SECOND,\
87
+ DEC,\
88
+ DECIMAL,\
89
+ DECLARE,\
90
+ DEFAULT,\
91
+ DELAYED,\
92
+ DELETE,\
93
+ DESC,\
94
+ DESCRIBE,\
95
+ DETERMINISTIC,\
96
+ DISTINCT,\
97
+ DISTINCTROW,\
98
+ DIV,\
99
+ DOUBLE,\
100
+ DROP,\
101
+ DUAL,\
102
+ EACH,\
103
+ ELSE,\
104
+ ELSEIF,\
105
+ ENCLOSED,\
106
+ ESCAPED,\
107
+ EXISTS,\
108
+ EXIT,\
109
+ EXPLAIN,\
110
+ FALSE,\
111
+ FETCH,\
112
+ FIELDS,\
113
+ FLOAT,\
114
+ FLOAT4,\
115
+ FLOAT8,\
116
+ FOR,\
117
+ FORCE,\
118
+ FOREIGN,\
119
+ FROM,\
120
+ FULLTEXT,\
121
+ GOTO,\
122
+ GRANT,\
123
+ GROUP,\
124
+ HAVING,\
125
+ HIGH_PRIORITY,\
126
+ HOUR_MICROSECOND,\
127
+ HOUR_MINUTE,\
128
+ HOUR_SECOND,\
129
+ IF,\
130
+ IGNORE,\
131
+ IN,\
132
+ INDEX,\
133
+ INFILE,\
134
+ INNER,\
135
+ INOUT,\
136
+ INSENSITIVE,\
137
+ INSERT,\
138
+ INT,\
139
+ INT1,\
140
+ INT2,\
141
+ INT3,\
142
+ INT4,\
143
+ INT8,\
144
+ INTEGER,\
145
+ INTERVAL,\
146
+ INTO,\
147
+ IS,\
148
+ ITERATE,\
149
+ JOIN,\
150
+ KEY,\
151
+ KEYS,\
152
+ KILL,\
153
+ LABEL,\
154
+ LEADING,\
155
+ LEAVE,\
156
+ LEFT,\
157
+ LIKE,\
158
+ LIMIT,\
159
+ LINES,\
160
+ LOAD,\
161
+ LOCALTIME,\
162
+ LOCALTIMESTAMP,\
163
+ LOCK,\
164
+ LONG,\
165
+ LONGBLOB,\
166
+ LONGTEXT,\
167
+ LOOP,\
168
+ LOW_PRIORITY,\
169
+ MATCH,\
170
+ MEDIUMBLOB,\
171
+ MEDIUMINT,\
172
+ MEDIUMTEXT,\
173
+ MIDDLEINT,\
174
+ MINUTE_MICROSECOND,\
175
+ MINUTE_SECOND,\
176
+ MOD,\
177
+ MODIFIES,\
178
+ NATURAL,\
179
+ NOT,\
180
+ NO_WRITE_TO_BINLOG,\
181
+ NULL,\
182
+ NUMERIC,\
183
+ ON,\
184
+ OPTIMIZE,\
185
+ OPTION,\
186
+ OPTIONALLY,\
187
+ OR,\
188
+ ORDER,\
189
+ OUT,\
190
+ OUTER,\
191
+ OUTFILE,\
192
+ PRECISION,\
193
+ PRIMARY,\
194
+ PRIVILEGES,\
195
+ PROCEDURE,\
196
+ PURGE,\
197
+ READ,\
198
+ READS,\
199
+ REAL,\
200
+ REFERENCES,\
201
+ REGEXP,\
202
+ RELEASE,\
203
+ RENAME,\
204
+ REPEAT,\
205
+ REPLACE,\
206
+ REQUIRE,\
207
+ RESTRICT,\
208
+ RETURN,\
209
+ REVOKE,\
210
+ RIGHT,\
211
+ RLIKE,\
212
+ SCHEMA,\
213
+ SCHEMAS,\
214
+ SECOND_MICROSECOND,\
215
+ SELECT,\
216
+ SENSITIVE,\
217
+ SEPARATOR,\
218
+ SET,\
219
+ SHOW,\
220
+ SMALLINT,\
221
+ SONAME,\
222
+ SPATIAL,\
223
+ SPECIFIC,\
224
+ SQL,\
225
+ SQLEXCEPTION,\
226
+ SQLSTATE,\
227
+ SQLWARNING,\
228
+ SQL_BIG_RESULT,\
229
+ SQL_CALC_FOUND_ROWS,\
230
+ SQL_SMALL_RESULT,\
231
+ SSL,\
232
+ STARTING,\
233
+ STRAIGHT_JOIN,\
234
+ TABLE,\
235
+ TABLES,\
236
+ TERMINATED,\
237
+ THEN,\
238
+ TINYBLOB,\
239
+ TINYINT,\
240
+ TINYTEXT,\
241
+ TO,\
242
+ TRAILING,\
243
+ TRIGGER,\
244
+ TRUE,\
245
+ UNDO,\
246
+ UNION,\
247
+ UNIQUE,\
248
+ UNLOCK,\
249
+ UNSIGNED,\
250
+ UPDATE,\
251
+ UPGRADE,\
252
+ USAGE,\
253
+ USE,\
254
+ USING,\
255
+ UTC_DATE,\
256
+ UTC_TIME,\
257
+ UTC_TIMESTAMP,\
258
+ VALUES,\
259
+ VARBINARY,\
260
+ VARCHAR,\
261
+ VARCHARACTER,\
262
+ VARYING,\
263
+ WHEN,\
264
+ WHERE,\
265
+ WHILE,\
266
+ WITH,\
267
+ WRITE,\
268
+ XOR,\
269
+ YEAR_MONTH,\
270
+ ZEROFILL
271
+
272
+ # The character to append to attribute names to avoid exceptions due to
273
+ # clashes between keywords and attribute names
274
+ KeywordsMaskChar=_
275
+
276
+ #flags for loading and saving instances using DatabaseLoader/Saver
277
+ nominalToStringLimit=50
278
+ idColumn=auto_generated_id
279
+
280
+