wwood-rarff 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Rakefile +1 -1
- data/lib/rarff.rb +31 -8
- data/test/test_rarff.rb +64 -2
- metadata +3 -2
data/History.txt
CHANGED
data/Rakefile
CHANGED
data/lib/rarff.rb
CHANGED
@@ -63,7 +63,8 @@ module Rarff
|
|
63
63
|
################################################################################
|
64
64
|
|
65
65
|
class Attribute
|
66
|
-
attr_accessor :name
|
66
|
+
attr_accessor :name
|
67
|
+
attr_reader :type
|
67
68
|
|
68
69
|
def initialize(name='', type='')
|
69
70
|
@name = name
|
@@ -88,7 +89,8 @@ module Rarff
|
|
88
89
|
@type_is_nominal = true
|
89
90
|
# Example format: "{nom1,nom2, nom3, nom4,nom5 } "
|
90
91
|
# Split on '{' ',' or '}'
|
91
|
-
@type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
|
92
|
+
# @type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
|
93
|
+
@type = @type.split(/\s*\,\s*/)
|
92
94
|
end
|
93
95
|
end
|
94
96
|
|
@@ -104,7 +106,7 @@ module Rarff
|
|
104
106
|
|
105
107
|
def to_arff
|
106
108
|
if @type_is_nominal == true
|
107
|
-
ATTRIBUTE_MARKER + " #{@name}
|
109
|
+
ATTRIBUTE_MARKER + " #{@name} #{@type.join(',').gsub(' ','_')}"
|
108
110
|
else
|
109
111
|
ATTRIBUTE_MARKER + " #{@name} #{@type}"
|
110
112
|
end
|
@@ -120,7 +122,8 @@ module Rarff
|
|
120
122
|
|
121
123
|
|
122
124
|
class Relation
|
123
|
-
attr_accessor :name, :attributes
|
125
|
+
attr_accessor :name, :attributes
|
126
|
+
attr_reader :instances
|
124
127
|
|
125
128
|
|
126
129
|
def initialize(name='')
|
@@ -197,10 +200,10 @@ module Rarff
|
|
197
200
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
|
198
201
|
elsif col.kind_of?(String)
|
199
202
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
|
200
|
-
elsif col
|
203
|
+
elsif col == false or col == true #exactly equal to a boolean
|
201
204
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_BOOLEAN)
|
202
205
|
else
|
203
|
-
raise Exception, "Could not parse attribute: #{col.inspect}"
|
206
|
+
raise Exception, "Could not parse attribute to ARFF data type: #{col.inspect}"
|
204
207
|
end
|
205
208
|
}
|
206
209
|
}
|
@@ -212,6 +215,26 @@ module Rarff
|
|
212
215
|
end
|
213
216
|
end
|
214
217
|
|
218
|
+
# Make all String type attributes into nominal attributes, because
|
219
|
+
# they are more useful in WEKA because more techniques handle them than
|
220
|
+
# strings
|
221
|
+
def set_string_attributes_to_nominal
|
222
|
+
nominals = {}
|
223
|
+
# Frustratingly, we have to traverse this 2D array with the
|
224
|
+
# wrong dimension first. Oh well.
|
225
|
+
@instances.each_with_index do |row, row_index|
|
226
|
+
row.each_with_index do |string, col_index|
|
227
|
+
next unless @attributes[col_index].type == ATTRIBUTE_STRING
|
228
|
+
|
229
|
+
nominals[col_index] ||= {}
|
230
|
+
nominals[col_index][string] ||= true
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
nominals.each do |index, strings|
|
235
|
+
@attributes[index].type = "{#{strings.keys.join(',')}}"
|
236
|
+
end
|
237
|
+
end
|
215
238
|
|
216
239
|
def expand_sparse(str)
|
217
240
|
arr = Array.new(@attributes.size, 0)
|
@@ -247,7 +270,7 @@ module Rarff
|
|
247
270
|
|
248
271
|
# Do the final output
|
249
272
|
if sparse
|
250
|
-
if col.nil? or
|
273
|
+
if col.nil? or
|
251
274
|
(@attributes[i].type =~ /^#{ATTRIBUTE_NUMERIC}$/i and col == 0)
|
252
275
|
nil
|
253
276
|
else
|
@@ -267,7 +290,7 @@ module Rarff
|
|
267
290
|
else
|
268
291
|
mapped.join(", ")
|
269
292
|
end
|
270
|
-
}.join("\n")
|
293
|
+
}.join("\n")
|
271
294
|
end
|
272
295
|
|
273
296
|
|
data/test/test_rarff.rb
CHANGED
@@ -101,7 +101,6 @@ class TestArffLib < Test::Unit::TestCase
|
|
101
101
|
# assert_equal(0, rel.instances[3][12])
|
102
102
|
# # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
|
103
103
|
# end
|
104
|
-
#
|
105
104
|
def test_output_missing
|
106
105
|
arff_file_str = <<-END_OF_ARFF_FILE
|
107
106
|
@RELATION MyCoolRelation
|
@@ -155,12 +154,75 @@ class TestArffLib < Test::Unit::TestCase
|
|
155
154
|
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
156
155
|
|
157
156
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
158
|
-
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
157
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
|
159
158
|
end
|
160
159
|
|
161
160
|
def test_boolean
|
162
161
|
arff_file_str = <<-END_OF_ARFF_FILE
|
163
162
|
@RELATION MyCoolRelation
|
163
|
+
@ATTRIBUTE Attr0 {false,true}
|
164
|
+
@DATA
|
165
|
+
true
|
166
|
+
END_OF_ARFF_FILE
|
167
|
+
|
168
|
+
arff_file_str.gsub!(/\n$/, '')
|
169
|
+
|
170
|
+
instances = [ [true]]
|
171
|
+
|
172
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
173
|
+
rel.instances = instances
|
174
|
+
|
175
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
176
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
|
177
|
+
end
|
178
|
+
|
179
|
+
def test_boolean_multipl
|
180
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
181
|
+
@RELATION MyCoolRelation
|
182
|
+
@ATTRIBUTE Attr0 {false,true}
|
183
|
+
@ATTRIBUTE Attr1 {false,true}
|
184
|
+
@ATTRIBUTE Attr2 {false,true}
|
185
|
+
@DATA
|
186
|
+
true, false, true
|
187
|
+
true, true, true
|
188
|
+
END_OF_ARFF_FILE
|
189
|
+
|
190
|
+
arff_file_str.gsub!(/\n$/, '')
|
191
|
+
|
192
|
+
instances = [ [true,false,true],[true,true,true]]
|
193
|
+
|
194
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
195
|
+
rel.instances = instances
|
196
|
+
|
197
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
198
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_strings_as_nominal
|
202
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
203
|
+
@RELATION MyCoolRelation
|
204
|
+
@ATTRIBUTE Attr0 {two,one}
|
205
|
+
@ATTRIBUTE Attr1 {three,four}
|
206
|
+
@DATA
|
207
|
+
one, three
|
208
|
+
two, four
|
209
|
+
END_OF_ARFF_FILE
|
210
|
+
|
211
|
+
arff_file_str.gsub!(/\n$/, '')
|
212
|
+
|
213
|
+
instances = [ ['one','three'],['two','four']]
|
214
|
+
|
215
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
216
|
+
rel.instances = instances
|
217
|
+
rel.set_string_attributes_to_nominal
|
218
|
+
|
219
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
220
|
+
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
221
|
+
end
|
222
|
+
|
223
|
+
def test_boolean_2
|
224
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
225
|
+
@RELATION MyCoolRelation
|
164
226
|
@ATTRIBUTE Attr0 NUMERIC
|
165
227
|
@ATTRIBUTE subject STRING
|
166
228
|
@ATTRIBUTE Attr2 {false,true}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wwood-rarff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Payne, Ben J Woodcroft
|
@@ -9,11 +9,12 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-02-18 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: hoe
|
17
|
+
type: :development
|
17
18
|
version_requirement:
|
18
19
|
version_requirements: !ruby/object:Gem::Requirement
|
19
20
|
requirements:
|