wwood-rarff 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +9 -0
- data/Manifest.txt +8 -0
- data/README.txt +90 -0
- data/Rakefile +24 -0
- data/lib/rarff.rb +304 -0
- data/test/test_arff.arff +27 -0
- data/test/test_rarff.rb +163 -0
- data/test/test_sparse_arff.arff +24 -0
- metadata +73 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
= rarff
|
|
2
|
+
|
|
3
|
+
http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
|
|
4
|
+
|
|
5
|
+
== DESCRIPTION:
|
|
6
|
+
|
|
7
|
+
Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
== FEATURES/PROBLEMS:
|
|
11
|
+
|
|
12
|
+
=== FEATURES
|
|
13
|
+
* Missing values - '?' are handled in creation of ARFF files
|
|
14
|
+
|
|
15
|
+
=== PROBLEMS
|
|
16
|
+
* Spaces or quotes in nominal types
|
|
17
|
+
* Commas in quoted attributes or in nominal types
|
|
18
|
+
* Add error checking/validation
|
|
19
|
+
* Creation of sparse ARFF files
|
|
20
|
+
* Dates - do some work to create, translate, and interpret date format strings.
|
|
21
|
+
|
|
22
|
+
== SYNOPSIS:
|
|
23
|
+
|
|
24
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
|
25
|
+
@RELATION MyCoolRelation
|
|
26
|
+
@ATTRIBUTE Attr0 NUMERIC
|
|
27
|
+
@ATTRIBUTE subject STRING
|
|
28
|
+
@ATTRIBUTE Attr2 NUMERIC
|
|
29
|
+
@ATTRIBUTE Attr3 STRING
|
|
30
|
+
@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
|
31
|
+
@DATA
|
|
32
|
+
1.4, 'foo bar', 5, baz, "1900-08-08 12:12:12"
|
|
33
|
+
20.9, ruby, 46, rocks, "2005-10-23 12:12:12"
|
|
34
|
+
0, ruby, 46, rocks, "2001-02-19 12:12:12"
|
|
35
|
+
68.1, stuff, 728, 'is cool', "1974-02-10 12:12:12"
|
|
36
|
+
END_OF_ARFF_FILE
|
|
37
|
+
|
|
38
|
+
arff_file_str.gsub!(/\n$/, '')
|
|
39
|
+
|
|
40
|
+
instances = [ [1.4, 'foo bar', 5, 'baz', "1900-08-08 12:12:12"],
|
|
41
|
+
[20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
|
42
|
+
[0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
|
43
|
+
[68.1, 'stuff', 728, 'is cool', "1974-02-10 12:12:12"]]
|
|
44
|
+
|
|
45
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
|
46
|
+
rel.instances = instances
|
|
47
|
+
rel.attributes[1].name = 'subject'
|
|
48
|
+
rel.attributes[4].name = 'birthday'
|
|
49
|
+
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
|
50
|
+
|
|
51
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
52
|
+
assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
|
|
53
|
+
|
|
54
|
+
== REQUIREMENTS:
|
|
55
|
+
|
|
56
|
+
== INSTALL:
|
|
57
|
+
|
|
58
|
+
* sudo gem install wwood-rarff
|
|
59
|
+
|
|
60
|
+
== LICENSE:
|
|
61
|
+
|
|
62
|
+
Copyright (c) 2008 Andy Payne
|
|
63
|
+
All rights reserved.
|
|
64
|
+
|
|
65
|
+
Redistribution and use in source and binary forms, with or without
|
|
66
|
+
modification, are permitted provided that the following conditions are met:
|
|
67
|
+
|
|
68
|
+
* Redistributions of source code must retain the above copyright notice,
|
|
69
|
+
this list of conditions and the following disclaimer.
|
|
70
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
71
|
+
this list of conditions and the following disclaimer in the
|
|
72
|
+
documentation and/or other materials provided with the distribution.
|
|
73
|
+
* Neither the name of the COPYRIGHT OWNER nor the names of its contributors
|
|
74
|
+
may be used to endorse or promote products derived from this software
|
|
75
|
+
without specific prior written permission.
|
|
76
|
+
|
|
77
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
78
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
79
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
80
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
81
|
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
82
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
83
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
84
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
85
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
86
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
|
data/Rakefile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'hoe'
|
|
3
|
+
#require './lib/rarff.rb'
|
|
4
|
+
|
|
5
|
+
gem_name = 'rarff'
|
|
6
|
+
hoe = Hoe.new(gem_name,'0.2.1') do |p|
|
|
7
|
+
|
|
8
|
+
p.author = "Andy Payne, Ben J Woodcroft"
|
|
9
|
+
p.email = "apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au"
|
|
10
|
+
p.url = "http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html"
|
|
11
|
+
|
|
12
|
+
p.description = 'Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify
|
|
13
|
+
data sets for data mining and machine learning.'
|
|
14
|
+
p.summary = 'Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files'
|
|
15
|
+
|
|
16
|
+
p.rdoc_pattern = /(^lib\/.*\.rb$|^examples\/.*\.rb$|^README|^History|^License)/
|
|
17
|
+
|
|
18
|
+
p.spec_extras = {
|
|
19
|
+
:require_paths => ['lib','test'],
|
|
20
|
+
:has_rdoc => true,
|
|
21
|
+
:extra_rdoc_files => ["README.txt"],
|
|
22
|
+
:rdoc_options => ["--exclude", "test/*", "--main", "README.txt", "--inline-source"]
|
|
23
|
+
}
|
|
24
|
+
end
|
data/lib/rarff.rb
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# = rarff
|
|
2
|
+
|
|
3
|
+
# This is the top-level include file for rarff. See the README file for
|
|
4
|
+
# details.
|
|
5
|
+
|
|
6
|
+
################################################################################
|
|
7
|
+
|
|
8
|
+
# Custom scan that returns a boolean indicating whether the regex matched.
|
|
9
|
+
# TODO: Is there a way to avoid doing this?
|
|
10
|
+
class String
|
|
11
|
+
def my_scan(re)
|
|
12
|
+
hit = false
|
|
13
|
+
scan(re) { |arr|
|
|
14
|
+
yield arr if block_given?
|
|
15
|
+
hit = true
|
|
16
|
+
}
|
|
17
|
+
hit
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
################################################################################
|
|
22
|
+
|
|
23
|
+
module Enumerable
|
|
24
|
+
# This map_with_index hack allows access to the index of each item as the map
|
|
25
|
+
# iterates.
|
|
26
|
+
# TODO: Is there a better way?
|
|
27
|
+
def map_with_index
|
|
28
|
+
# Ugly, but I need the yield to be the last statement in the map.
|
|
29
|
+
i = -1
|
|
30
|
+
return map { |item|
|
|
31
|
+
i += 1
|
|
32
|
+
yield item, i
|
|
33
|
+
}
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
################################################################################
|
|
38
|
+
|
|
39
|
+
module Rarff
|
|
40
|
+
|
|
41
|
+
COMMENT_MARKER = '%'
|
|
42
|
+
RELATION_MARKER = '@RELATION'
|
|
43
|
+
ATTRIBUTE_MARKER = '@ATTRIBUTE'
|
|
44
|
+
DATA_MARKER = '@DATA'
|
|
45
|
+
|
|
46
|
+
SPARSE_ARFF_BEGIN = '{'
|
|
47
|
+
ESC_SPARSE_ARFF_BEGIN = '\\' + SPARSE_ARFF_BEGIN
|
|
48
|
+
SPARSE_ARFF_END = '}'
|
|
49
|
+
ESC_SPARSE_ARFF_END = '\\' + SPARSE_ARFF_END
|
|
50
|
+
|
|
51
|
+
ATTRIBUTE_NUMERIC = 'NUMERIC'
|
|
52
|
+
ATTRIBUTE_REAL = 'REAL'
|
|
53
|
+
ATTRIBUTE_INTEGER = 'INTEGER'
|
|
54
|
+
ATTRIBUTE_STRING = 'STRING'
|
|
55
|
+
ATTRIBUTE_DATE = 'DATE'
|
|
56
|
+
|
|
57
|
+
MISSING = '?'
|
|
58
|
+
|
|
59
|
+
################################################################################
|
|
60
|
+
|
|
61
|
+
class Attribute
|
|
62
|
+
attr_accessor :name, :type
|
|
63
|
+
|
|
64
|
+
def initialize(name='', type='')
|
|
65
|
+
@name = name
|
|
66
|
+
|
|
67
|
+
@type_is_nominal = false
|
|
68
|
+
@type = type
|
|
69
|
+
|
|
70
|
+
check_nominal()
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def type=(type)
|
|
75
|
+
@type = type
|
|
76
|
+
check_nominal()
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# Convert string representation of nominal type to array, if necessary
|
|
81
|
+
# TODO: This might falsely trigger on wacky date formats.
|
|
82
|
+
def check_nominal
|
|
83
|
+
if @type =~ /^\s*\{.*(\,.*)+\}\s*$/
|
|
84
|
+
@type_is_nominal = true
|
|
85
|
+
# Example format: "{nom1,nom2, nom3, nom4,nom5 } "
|
|
86
|
+
# Split on '{' ',' or '}'
|
|
87
|
+
@type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def add_nominal_value(str)
|
|
93
|
+
if @type_is_nominal == false
|
|
94
|
+
@type = Array.new
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
@type << str
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def to_arff
|
|
102
|
+
if @type_is_nominal == true
|
|
103
|
+
ATTRIBUTE_MARKER + " #{@name} #{@type.join(',')}"
|
|
104
|
+
else
|
|
105
|
+
ATTRIBUTE_MARKER + " #{@name} #{@type}"
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def to_s
|
|
111
|
+
to_arff
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class Relation
|
|
119
|
+
attr_accessor :name, :attributes, :instances
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def initialize(name='')
|
|
123
|
+
@name = name
|
|
124
|
+
@attributes = Array.new
|
|
125
|
+
@instances = Array.new
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def parse(str)
|
|
130
|
+
in_data_section = false
|
|
131
|
+
|
|
132
|
+
# TODO: Doesn't handle commas in quoted attributes.
|
|
133
|
+
str.split("\n").each { |line|
|
|
134
|
+
next if line =~ /^\s*$/
|
|
135
|
+
next if line =~ /^\s*#{COMMENT_MARKER}/
|
|
136
|
+
next if line.my_scan(/^\s*#{RELATION_MARKER}\s*(.*)\s*$/i) { |name| @name = name }
|
|
137
|
+
next if line.my_scan(/^\s*#{ATTRIBUTE_MARKER}\s*([^\s]*)\s+(.*)\s*$/i) { |name, type|
|
|
138
|
+
@attributes.push(Attribute.new(name, type))
|
|
139
|
+
}
|
|
140
|
+
next if line.my_scan(/^\s*#{DATA_MARKER}/i) { in_data_section = true }
|
|
141
|
+
next if in_data_section == false ## Below is data section handling
|
|
142
|
+
# next if line.gsub(/^\s*(.*)\s*$/, "\\1").my_scan(/^\s*#{SPARSE_ARFF_BEGIN}(.*)#{SPARSE_ARFF_END}\s*$/) { |data|
|
|
143
|
+
next if line.gsub(/^\s*(.*)\s*$/, "\\1").my_scan(/^#{ESC_SPARSE_ARFF_BEGIN}(.*)#{ESC_SPARSE_ARFF_END}$/) { |data|
|
|
144
|
+
# Sparse ARFF
|
|
145
|
+
# TODO: Factor duplication with non-sparse data below
|
|
146
|
+
@instances << expand_sparse(data.first)
|
|
147
|
+
create_attributes(true)
|
|
148
|
+
}
|
|
149
|
+
next if line.my_scan(/^\s*(.*)\s*$/) { |data|
|
|
150
|
+
@instances << data.first.split(/,\s*/).map { |field|
|
|
151
|
+
# Remove outer single quotes on strings, if any ('foo bar' --> foo bar)
|
|
152
|
+
field.gsub(/^\s*\'(.*)\'\s*$/, "\\1")
|
|
153
|
+
}
|
|
154
|
+
create_attributes(true)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# Assign instances to the internal array
|
|
161
|
+
# parse: choose to parse strings into numerics
|
|
162
|
+
def instances=(instances, parse=false)
|
|
163
|
+
@instances = instances
|
|
164
|
+
create_attributes(parse)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def create_attributes(attr_parse=false)
|
|
170
|
+
raise Exception, "Not enough data to create ARFF attributes" if @instances.nil? or
|
|
171
|
+
@instances.empty? or
|
|
172
|
+
@instances[0].empty?
|
|
173
|
+
|
|
174
|
+
# Keep track of whether an attribute has been defined or not.
|
|
175
|
+
# The only reason an attribute would not be defined in the first
|
|
176
|
+
# row is if it has nil's in it. The geek inside screams for a binary
|
|
177
|
+
# encoding like chmod but eh.
|
|
178
|
+
attributes_defined = {}
|
|
179
|
+
@instances.each_with_index { |row, i|
|
|
180
|
+
row.each_with_index { |col, j|
|
|
181
|
+
next if attributes_defined[j] or col.nil?
|
|
182
|
+
|
|
183
|
+
attributes_defined[j] = true #whatever happens, we are going to define it
|
|
184
|
+
if attr_parse
|
|
185
|
+
if col =~ /^\-?\d+\.?\d*$/
|
|
186
|
+
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
|
|
187
|
+
end
|
|
188
|
+
next #parse next column - this one is finished
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# No parsing - just take it how it is
|
|
192
|
+
if col.kind_of?(Numeric)
|
|
193
|
+
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
|
|
194
|
+
elsif col.kind_of?(String)
|
|
195
|
+
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
|
|
196
|
+
else
|
|
197
|
+
raise Exception, "Could not parse attribute: #{col.inspect}"
|
|
198
|
+
end
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
# Make sure all attributes have a definition, because otherwise
|
|
203
|
+
# needless errors are thrown
|
|
204
|
+
@instances[0].each_index do |i|
|
|
205
|
+
@attributes[i] ||= Attribute.new("Attr#{i}", ATTRIBUTE_NUMERIC)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def expand_sparse(str)
|
|
211
|
+
arr = Array.new(@attributes.size, 0)
|
|
212
|
+
str.gsub(/^\s*\{(.*)\}\s*$/, "\\1").split(/\s*\,\s*/).map { |pr|
|
|
213
|
+
pra = pr.split(/\s/)
|
|
214
|
+
arr[pra[0].to_i] = pra[1]
|
|
215
|
+
}
|
|
216
|
+
arr
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def to_arff(sparse=false)
|
|
221
|
+
RELATION_MARKER + " #{@name}\n" +
|
|
222
|
+
@attributes.map{ |attr| attr.to_arff }.join("\n") +
|
|
223
|
+
"\n" +
|
|
224
|
+
DATA_MARKER + "\n" +
|
|
225
|
+
|
|
226
|
+
@instances.map { |inst|
|
|
227
|
+
mapped = inst.map_with_index { |col, i|
|
|
228
|
+
# First pass - quote strings with spaces, and dates
|
|
229
|
+
# TODO: Doesn't handle cases in which strings already contain
|
|
230
|
+
# quotes or are already quoted.
|
|
231
|
+
unless col.nil?
|
|
232
|
+
if @attributes[i].type =~ /^#{ATTRIBUTE_STRING}$/i
|
|
233
|
+
if col =~ /\s+/
|
|
234
|
+
col = "'" + col + "'"
|
|
235
|
+
end
|
|
236
|
+
elsif @attributes[i].type =~ /^#{ATTRIBUTE_DATE}/i ## Hack comparison. Ugh.
|
|
237
|
+
col = '"' + col + '"'
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Do the final output
|
|
242
|
+
if sparse
|
|
243
|
+
if col.nil? or
|
|
244
|
+
(@attributes[i].type =~ /^#{ATTRIBUTE_NUMERIC}$/i and col == 0)
|
|
245
|
+
nil
|
|
246
|
+
else
|
|
247
|
+
"#{i} #{col}"
|
|
248
|
+
end
|
|
249
|
+
else
|
|
250
|
+
if col.nil?
|
|
251
|
+
MISSING
|
|
252
|
+
else
|
|
253
|
+
col
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if sparse
|
|
259
|
+
mapped.reject{|col| col.nil?}.join(', ')
|
|
260
|
+
else
|
|
261
|
+
mapped.join(", ")
|
|
262
|
+
end
|
|
263
|
+
}.join("\n").gsub(/^/, sparse ? '{' : '').gsub(/$/, sparse ? '}' : '')
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def to_s
|
|
268
|
+
to_arff
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
end # module Rarff
|
|
275
|
+
|
|
276
|
+
################################################################################
|
|
277
|
+
|
|
278
|
+
if $0 == __FILE__ then
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
if ARGV[0]
|
|
282
|
+
in_file = ARGV[0]
|
|
283
|
+
contents = ''
|
|
284
|
+
|
|
285
|
+
contents = File.open(in_file).read
|
|
286
|
+
|
|
287
|
+
rel = Rarff::Relation.new
|
|
288
|
+
rel.parse(contents)
|
|
289
|
+
|
|
290
|
+
else
|
|
291
|
+
exit
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
puts '='*80
|
|
295
|
+
puts '='*80
|
|
296
|
+
puts "ARFF:"
|
|
297
|
+
puts rel
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
################################################################################
|
|
303
|
+
|
|
304
|
+
|
data/test/test_arff.arff
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
% 1. Title: Iris Plants Database
|
|
2
|
+
%
|
|
3
|
+
% 2. Sources:
|
|
4
|
+
% (a) Creator: R.A. Fisher
|
|
5
|
+
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
|
|
6
|
+
% (c) Date: July, 1988
|
|
7
|
+
%
|
|
8
|
+
@RELATION iris
|
|
9
|
+
|
|
10
|
+
@ATTRIBUTE sepallength NUMERIC
|
|
11
|
+
@ATTRIBUTE sepalwidth NUMERIC
|
|
12
|
+
@ATTRIBUTE petallength NUMERIC
|
|
13
|
+
@ATTRIBUTE petalwidth NUMERIC
|
|
14
|
+
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
|
15
|
+
|
|
16
|
+
@DATA
|
|
17
|
+
5.1,3.5,1.4,0.2,Iris-setosa
|
|
18
|
+
4.9,3.0,1.4,0.2,Iris-setosa
|
|
19
|
+
4.7,3.2,1.3,0.2,Iris-setosa
|
|
20
|
+
4.6,3.1,1.5,0.2,Iris-setosa
|
|
21
|
+
5.0,3.6,1.4,0.2,Iris-setosa
|
|
22
|
+
5.4,3.9,1.7,0.4,Iris-setosa
|
|
23
|
+
4.6,3.4,1.4,0.3,Iris-setosa
|
|
24
|
+
5.0,3.4,1.5,0.2,Iris-setosa
|
|
25
|
+
4.4,2.9,1.4,0.2,Iris-setosa
|
|
26
|
+
4.9,3.1,1.5,0.1,Iris-setosa
|
|
27
|
+
|
data/test/test_rarff.rb
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# See the README file for more information.
|
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
|
3
|
+
require 'test/unit'
|
|
4
|
+
require 'rarff'
|
|
5
|
+
|
|
6
|
+
class TestArffLib < Test::Unit::TestCase
|
|
7
|
+
|
|
8
|
+
# Test creation of an arff file string.
|
|
9
|
+
def test_arff_creation
|
|
10
|
+
|
|
11
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
|
12
|
+
@RELATION MyCoolRelation
|
|
13
|
+
@ATTRIBUTE Attr0 NUMERIC
|
|
14
|
+
@ATTRIBUTE subject STRING
|
|
15
|
+
@ATTRIBUTE Attr2 NUMERIC
|
|
16
|
+
@ATTRIBUTE Attr3 STRING
|
|
17
|
+
@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
|
18
|
+
@DATA
|
|
19
|
+
1.4, 'foo bar', 5, baz, "1900-08-08 12:12:12"
|
|
20
|
+
20.9, ruby, 46, rocks, "2005-10-23 12:12:12"
|
|
21
|
+
0, ruby, 46, rocks, "2001-02-19 12:12:12"
|
|
22
|
+
68.1, stuff, 728, 'is cool', "1974-02-10 12:12:12"
|
|
23
|
+
END_OF_ARFF_FILE
|
|
24
|
+
|
|
25
|
+
arff_file_str.gsub!(/\n$/, '')
|
|
26
|
+
|
|
27
|
+
instances = [ [1.4, 'foo bar', 5, 'baz', "1900-08-08 12:12:12"],
|
|
28
|
+
[20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
|
29
|
+
[0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
|
30
|
+
[68.1, 'stuff', 728, 'is cool', "1974-02-10 12:12:12"]]
|
|
31
|
+
|
|
32
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
|
33
|
+
rel.instances = instances
|
|
34
|
+
rel.attributes[1].name = 'subject'
|
|
35
|
+
rel.attributes[4].name = 'birthday'
|
|
36
|
+
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
|
37
|
+
|
|
38
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
39
|
+
assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
|
|
40
|
+
end
|
|
41
|
+
#
|
|
42
|
+
# # Test creation of a sparse arff file string.
|
|
43
|
+
# def test_sparse_arff_creation
|
|
44
|
+
#
|
|
45
|
+
# arff_file_str = <<-END_OF_ARFF_FILE
|
|
46
|
+
#@RELATION MyCoolRelation
|
|
47
|
+
#@ATTRIBUTE Attr0 NUMERIC
|
|
48
|
+
#@ATTRIBUTE subject STRING
|
|
49
|
+
#@ATTRIBUTE Attr2 NUMERIC
|
|
50
|
+
#@ATTRIBUTE Attr3 STRING
|
|
51
|
+
#@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
|
52
|
+
#@DATA
|
|
53
|
+
#{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
|
|
54
|
+
#{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
|
|
55
|
+
#{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
|
|
56
|
+
#{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
|
|
57
|
+
# END_OF_ARFF_FILE
|
|
58
|
+
#
|
|
59
|
+
# arff_file_str.gsub!(/\n$/, '')
|
|
60
|
+
#
|
|
61
|
+
# instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
|
|
62
|
+
# [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
|
63
|
+
# [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
|
64
|
+
# [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
|
|
65
|
+
#
|
|
66
|
+
# rel = Rarff::Relation.new('MyCoolRelation')
|
|
67
|
+
# rel.instances = instances
|
|
68
|
+
# rel.attributes[1].name = 'subject'
|
|
69
|
+
# rel.attributes[4].name = 'birthday'
|
|
70
|
+
# rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
|
71
|
+
#
|
|
72
|
+
# # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
|
|
73
|
+
# assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
|
|
74
|
+
# end
|
|
75
|
+
#
|
|
76
|
+
#
|
|
77
|
+
# # Test parsing of an arff file.
|
|
78
|
+
# def test_arff_parse
|
|
79
|
+
# in_file = './test_arff.arff'
|
|
80
|
+
# rel = Rarff::Relation.new
|
|
81
|
+
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
|
82
|
+
#
|
|
83
|
+
# assert_equal(rel.instances[2][1], 3.2)
|
|
84
|
+
# assert_equal(rel.instances[7][4], 'Iris-setosa')
|
|
85
|
+
# end
|
|
86
|
+
#
|
|
87
|
+
#
|
|
88
|
+
# # Test parsing of sparse ARFF format
|
|
89
|
+
# def test_sparse_arff_parse
|
|
90
|
+
# in_file = './test_sparse_arff.arff'
|
|
91
|
+
# rel = Rarff::Relation.new
|
|
92
|
+
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
|
93
|
+
#
|
|
94
|
+
# assert_equal(13, rel.instances[0].size)
|
|
95
|
+
# assert_equal(0, rel.instances[0][1])
|
|
96
|
+
# assert_equal(7, rel.instances[0][3])
|
|
97
|
+
# assert_equal(2.4, rel.instances[1][1])
|
|
98
|
+
# assert_equal(0, rel.instances[1][2])
|
|
99
|
+
# assert_equal(19, rel.instances[1][12])
|
|
100
|
+
# assert_equal(6, rel.instances[2][6])
|
|
101
|
+
# assert_equal(0, rel.instances[3][12])
|
|
102
|
+
# # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
|
|
103
|
+
# end
|
|
104
|
+
#
|
|
105
|
+
def test_output_missing
|
|
106
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
|
107
|
+
@RELATION MyCoolRelation
|
|
108
|
+
@ATTRIBUTE Attr0 NUMERIC
|
|
109
|
+
@ATTRIBUTE subject STRING
|
|
110
|
+
@ATTRIBUTE Attr2 NUMERIC
|
|
111
|
+
@ATTRIBUTE Attr3 STRING
|
|
112
|
+
@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
|
113
|
+
@DATA
|
|
114
|
+
?, 'foo bar', 5, baz, ?
|
|
115
|
+
20.9, ruby, 46, ?, "2005-10-23 12:12:12"
|
|
116
|
+
END_OF_ARFF_FILE
|
|
117
|
+
|
|
118
|
+
arff_file_str.gsub!(/\n$/, '')
|
|
119
|
+
|
|
120
|
+
instances = [ [nil, 'foo bar', 5, 'baz', nil],
|
|
121
|
+
[20.9, 'ruby', 46, nil, "2005-10-23 12:12:12"]]
|
|
122
|
+
|
|
123
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
|
124
|
+
rel.instances = instances
|
|
125
|
+
rel.attributes[1].name = 'subject'
|
|
126
|
+
rel.attributes[4].name = 'birthday'
|
|
127
|
+
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
|
128
|
+
|
|
129
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
130
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def test_output_missing_undefined_first_row
|
|
134
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
|
135
|
+
@RELATION MyCoolRelation
|
|
136
|
+
@ATTRIBUTE Attr0 NUMERIC
|
|
137
|
+
@ATTRIBUTE subject STRING
|
|
138
|
+
@ATTRIBUTE Attr2 NUMERIC
|
|
139
|
+
@ATTRIBUTE Attr3 NUMERIC
|
|
140
|
+
@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
|
141
|
+
@DATA
|
|
142
|
+
?, ?, ?, ?, ?
|
|
143
|
+
20.9, ruby, 46, ?, "2005-10-23 12:12:12"
|
|
144
|
+
END_OF_ARFF_FILE
|
|
145
|
+
|
|
146
|
+
arff_file_str.gsub!(/\n$/, '')
|
|
147
|
+
|
|
148
|
+
instances = [ [nil, nil, nil, nil, nil],
|
|
149
|
+
[20.9, 'ruby', 46, nil, "2005-10-23 12:12:12"]]
|
|
150
|
+
|
|
151
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
|
152
|
+
rel.instances = instances
|
|
153
|
+
rel.attributes[1].name = 'subject'
|
|
154
|
+
rel.attributes[4].name = 'birthday'
|
|
155
|
+
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
|
156
|
+
|
|
157
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
158
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
% Sample sparse ARFF file
|
|
2
|
+
@RELATION sparseness
|
|
3
|
+
|
|
4
|
+
@ATTRIBUTE attr1 NUMERIC
|
|
5
|
+
@ATTRIBUTE attr2 NUMERIC
|
|
6
|
+
@ATTRIBUTE attr3 NUMERIC
|
|
7
|
+
@ATTRIBUTE attr4 NUMERIC
|
|
8
|
+
@ATTRIBUTE attr5 NUMERIC
|
|
9
|
+
@ATTRIBUTE attr6 NUMERIC
|
|
10
|
+
@ATTRIBUTE attr7 NUMERIC
|
|
11
|
+
@ATTRIBUTE attr8 NUMERIC
|
|
12
|
+
@ATTRIBUTE attr9 NUMERIC
|
|
13
|
+
@ATTRIBUTE attr10 NUMERIC
|
|
14
|
+
@ATTRIBUTE attr11 NUMERIC
|
|
15
|
+
@ATTRIBUTE attr12 NUMERIC
|
|
16
|
+
@ATTRIBUTE attr13 NUMERIC
|
|
17
|
+
|
|
18
|
+
@DATA
|
|
19
|
+
{3 7, 10 34}
|
|
20
|
+
{1 2.4, 4 62, 12 19}
|
|
21
|
+
{0 0, 1 1, 2 2, 3 3, 4 4, 5 5, 6 6, 7 7, 8 8, 9 9, 10 10, 11 11, 12 12}
|
|
22
|
+
{9 42}
|
|
23
|
+
{2 54.3, 3 92, 11 10.2}
|
|
24
|
+
|
metadata
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: wwood-rarff
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.2.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Andy Payne, Ben J Woodcroft
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
|
|
12
|
+
date: 2008-11-25 00:00:00 -08:00
|
|
13
|
+
default_executable:
|
|
14
|
+
dependencies:
|
|
15
|
+
- !ruby/object:Gem::Dependency
|
|
16
|
+
name: hoe
|
|
17
|
+
version_requirement:
|
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
19
|
+
requirements:
|
|
20
|
+
- - ">="
|
|
21
|
+
- !ruby/object:Gem::Version
|
|
22
|
+
version: 1.8.2
|
|
23
|
+
version:
|
|
24
|
+
description: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
|
|
25
|
+
email: apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au
|
|
26
|
+
executables: []
|
|
27
|
+
|
|
28
|
+
extensions: []
|
|
29
|
+
|
|
30
|
+
extra_rdoc_files:
|
|
31
|
+
- README.txt
|
|
32
|
+
files:
|
|
33
|
+
- History.txt
|
|
34
|
+
- Manifest.txt
|
|
35
|
+
- README.txt
|
|
36
|
+
- Rakefile
|
|
37
|
+
- lib/rarff.rb
|
|
38
|
+
- test/test_arff.arff
|
|
39
|
+
- test/test_sparse_arff.arff
|
|
40
|
+
- test/test_rarff.rb
|
|
41
|
+
has_rdoc: true
|
|
42
|
+
homepage: http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
|
|
43
|
+
post_install_message:
|
|
44
|
+
rdoc_options:
|
|
45
|
+
- --exclude
|
|
46
|
+
- test/*
|
|
47
|
+
- --main
|
|
48
|
+
- README.txt
|
|
49
|
+
- --inline-source
|
|
50
|
+
require_paths:
|
|
51
|
+
- lib
|
|
52
|
+
- test
|
|
53
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
54
|
+
requirements:
|
|
55
|
+
- - ">="
|
|
56
|
+
- !ruby/object:Gem::Version
|
|
57
|
+
version: "0"
|
|
58
|
+
version:
|
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
60
|
+
requirements:
|
|
61
|
+
- - ">="
|
|
62
|
+
- !ruby/object:Gem::Version
|
|
63
|
+
version: "0"
|
|
64
|
+
version:
|
|
65
|
+
requirements: []
|
|
66
|
+
|
|
67
|
+
rubyforge_project: rarff
|
|
68
|
+
rubygems_version: 1.2.0
|
|
69
|
+
signing_key:
|
|
70
|
+
specification_version: 2
|
|
71
|
+
summary: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files
|
|
72
|
+
test_files:
|
|
73
|
+
- test/test_rarff.rb
|