wwood-rarff 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Rakefile +1 -1
- data/lib/rarff.rb +31 -8
- data/test/test_rarff.rb +64 -2
- metadata +3 -2
data/History.txt
CHANGED
data/Rakefile
CHANGED
data/lib/rarff.rb
CHANGED
@@ -63,7 +63,8 @@ module Rarff
|
|
63
63
|
################################################################################
|
64
64
|
|
65
65
|
class Attribute
|
66
|
-
attr_accessor :name
|
66
|
+
attr_accessor :name
|
67
|
+
attr_reader :type
|
67
68
|
|
68
69
|
def initialize(name='', type='')
|
69
70
|
@name = name
|
@@ -88,7 +89,8 @@ module Rarff
|
|
88
89
|
@type_is_nominal = true
|
89
90
|
# Example format: "{nom1,nom2, nom3, nom4,nom5 } "
|
90
91
|
# Split on '{' ',' or '}'
|
91
|
-
@type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
|
92
|
+
# @type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
|
93
|
+
@type = @type.split(/\s*\,\s*/)
|
92
94
|
end
|
93
95
|
end
|
94
96
|
|
@@ -104,7 +106,7 @@ module Rarff
|
|
104
106
|
|
105
107
|
def to_arff
|
106
108
|
if @type_is_nominal == true
|
107
|
-
ATTRIBUTE_MARKER + " #{@name}
|
109
|
+
ATTRIBUTE_MARKER + " #{@name} #{@type.join(',').gsub(' ','_')}"
|
108
110
|
else
|
109
111
|
ATTRIBUTE_MARKER + " #{@name} #{@type}"
|
110
112
|
end
|
@@ -120,7 +122,8 @@ module Rarff
|
|
120
122
|
|
121
123
|
|
122
124
|
class Relation
|
123
|
-
attr_accessor :name, :attributes
|
125
|
+
attr_accessor :name, :attributes
|
126
|
+
attr_reader :instances
|
124
127
|
|
125
128
|
|
126
129
|
def initialize(name='')
|
@@ -197,10 +200,10 @@ module Rarff
|
|
197
200
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
|
198
201
|
elsif col.kind_of?(String)
|
199
202
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
|
200
|
-
elsif col
|
203
|
+
elsif col == false or col == true #exactly equal to a boolean
|
201
204
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_BOOLEAN)
|
202
205
|
else
|
203
|
-
raise Exception, "Could not parse attribute: #{col.inspect}"
|
206
|
+
raise Exception, "Could not parse attribute to ARFF data type: #{col.inspect}"
|
204
207
|
end
|
205
208
|
}
|
206
209
|
}
|
@@ -212,6 +215,26 @@ module Rarff
|
|
212
215
|
end
|
213
216
|
end
|
214
217
|
|
218
|
+
# Make all String type attributes into nominal attributes, because
|
219
|
+
# they are more useful in WEKA because more techniques handle them than
|
220
|
+
# strings
|
221
|
+
def set_string_attributes_to_nominal
|
222
|
+
nominals = {}
|
223
|
+
# Frustratingly, we have to traverse this 2D array with the
|
224
|
+
# wrong dimension first. Oh well.
|
225
|
+
@instances.each_with_index do |row, row_index|
|
226
|
+
row.each_with_index do |string, col_index|
|
227
|
+
next unless @attributes[col_index].type == ATTRIBUTE_STRING
|
228
|
+
|
229
|
+
nominals[col_index] ||= {}
|
230
|
+
nominals[col_index][string] ||= true
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
nominals.each do |index, strings|
|
235
|
+
@attributes[index].type = "{#{strings.keys.join(',')}}"
|
236
|
+
end
|
237
|
+
end
|
215
238
|
|
216
239
|
def expand_sparse(str)
|
217
240
|
arr = Array.new(@attributes.size, 0)
|
@@ -247,7 +270,7 @@ module Rarff
|
|
247
270
|
|
248
271
|
# Do the final output
|
249
272
|
if sparse
|
250
|
-
if col.nil? or
|
273
|
+
if col.nil? or
|
251
274
|
(@attributes[i].type =~ /^#{ATTRIBUTE_NUMERIC}$/i and col == 0)
|
252
275
|
nil
|
253
276
|
else
|
@@ -267,7 +290,7 @@ module Rarff
|
|
267
290
|
else
|
268
291
|
mapped.join(", ")
|
269
292
|
end
|
270
|
-
}.join("\n")
|
293
|
+
}.join("\n")
|
271
294
|
end
|
272
295
|
|
273
296
|
|
data/test/test_rarff.rb
CHANGED
@@ -101,7 +101,6 @@ class TestArffLib < Test::Unit::TestCase
|
|
101
101
|
# assert_equal(0, rel.instances[3][12])
|
102
102
|
# # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
|
103
103
|
# end
|
104
|
-
#
|
105
104
|
def test_output_missing
|
106
105
|
arff_file_str = <<-END_OF_ARFF_FILE
|
107
106
|
@RELATION MyCoolRelation
|
@@ -155,12 +154,75 @@ class TestArffLib < Test::Unit::TestCase
|
|
155
154
|
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
156
155
|
|
157
156
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
158
|
-
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
157
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
|
159
158
|
end
|
160
159
|
|
161
160
|
def test_boolean
|
162
161
|
arff_file_str = <<-END_OF_ARFF_FILE
|
163
162
|
@RELATION MyCoolRelation
|
163
|
+
@ATTRIBUTE Attr0 {false,true}
|
164
|
+
@DATA
|
165
|
+
true
|
166
|
+
END_OF_ARFF_FILE
|
167
|
+
|
168
|
+
arff_file_str.gsub!(/\n$/, '')
|
169
|
+
|
170
|
+
instances = [ [true]]
|
171
|
+
|
172
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
173
|
+
rel.instances = instances
|
174
|
+
|
175
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
176
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
|
177
|
+
end
|
178
|
+
|
179
|
+
def test_boolean_multipl
|
180
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
181
|
+
@RELATION MyCoolRelation
|
182
|
+
@ATTRIBUTE Attr0 {false,true}
|
183
|
+
@ATTRIBUTE Attr1 {false,true}
|
184
|
+
@ATTRIBUTE Attr2 {false,true}
|
185
|
+
@DATA
|
186
|
+
true, false, true
|
187
|
+
true, true, true
|
188
|
+
END_OF_ARFF_FILE
|
189
|
+
|
190
|
+
arff_file_str.gsub!(/\n$/, '')
|
191
|
+
|
192
|
+
instances = [ [true,false,true],[true,true,true]]
|
193
|
+
|
194
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
195
|
+
rel.instances = instances
|
196
|
+
|
197
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
198
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_strings_as_nominal
|
202
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
203
|
+
@RELATION MyCoolRelation
|
204
|
+
@ATTRIBUTE Attr0 {two,one}
|
205
|
+
@ATTRIBUTE Attr1 {three,four}
|
206
|
+
@DATA
|
207
|
+
one, three
|
208
|
+
two, four
|
209
|
+
END_OF_ARFF_FILE
|
210
|
+
|
211
|
+
arff_file_str.gsub!(/\n$/, '')
|
212
|
+
|
213
|
+
instances = [ ['one','three'],['two','four']]
|
214
|
+
|
215
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
216
|
+
rel.instances = instances
|
217
|
+
rel.set_string_attributes_to_nominal
|
218
|
+
|
219
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
220
|
+
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
221
|
+
end
|
222
|
+
|
223
|
+
def test_boolean_2
|
224
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
225
|
+
@RELATION MyCoolRelation
|
164
226
|
@ATTRIBUTE Attr0 NUMERIC
|
165
227
|
@ATTRIBUTE subject STRING
|
166
228
|
@ATTRIBUTE Attr2 {false,true}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wwood-rarff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Payne, Ben J Woodcroft
|
@@ -9,11 +9,12 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-02-18 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: hoe
|
17
|
+
type: :development
|
17
18
|
version_requirement:
|
18
19
|
version_requirements: !ruby/object:Gem::Requirement
|
19
20
|
requirements:
|