wwood-rarff 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +9 -0
- data/Manifest.txt +8 -0
- data/README.txt +90 -0
- data/Rakefile +24 -0
- data/lib/rarff.rb +304 -0
- data/test/test_arff.arff +27 -0
- data/test/test_rarff.rb +163 -0
- data/test/test_sparse_arff.arff +24 -0
- metadata +73 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
= rarff
|
2
|
+
|
3
|
+
http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
|
8
|
+
|
9
|
+
|
10
|
+
== FEATURES/PROBLEMS:
|
11
|
+
|
12
|
+
=== FEATURES
|
13
|
+
* Missing values - '?' are handled in creation of ARFF files
|
14
|
+
|
15
|
+
=== PROBLEMS
|
16
|
+
* Spaces or quotes in nominal types
|
17
|
+
* Commas in quoted attributes or in nominal types
|
18
|
+
* Add error checking/validation
|
19
|
+
* Creation of sparse ARFF files
|
20
|
+
* Dates - do some work to create, translate, and interpret date format strings.
|
21
|
+
|
22
|
+
== SYNOPSIS:
|
23
|
+
|
24
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
25
|
+
@RELATION MyCoolRelation
|
26
|
+
@ATTRIBUTE Attr0 NUMERIC
|
27
|
+
@ATTRIBUTE subject STRING
|
28
|
+
@ATTRIBUTE Attr2 NUMERIC
|
29
|
+
@ATTRIBUTE Attr3 STRING
|
30
|
+
@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
31
|
+
@DATA
|
32
|
+
1.4, 'foo bar', 5, baz, "1900-08-08 12:12:12"
|
33
|
+
20.9, ruby, 46, rocks, "2005-10-23 12:12:12"
|
34
|
+
0, ruby, 46, rocks, "2001-02-19 12:12:12"
|
35
|
+
68.1, stuff, 728, 'is cool', "1974-02-10 12:12:12"
|
36
|
+
END_OF_ARFF_FILE
|
37
|
+
|
38
|
+
arff_file_str.gsub!(/\n$/, '')
|
39
|
+
|
40
|
+
instances = [ [1.4, 'foo bar', 5, 'baz', "1900-08-08 12:12:12"],
|
41
|
+
[20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
42
|
+
[0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
43
|
+
[68.1, 'stuff', 728, 'is cool', "1974-02-10 12:12:12"]]
|
44
|
+
|
45
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
46
|
+
rel.instances = instances
|
47
|
+
rel.attributes[1].name = 'subject'
|
48
|
+
rel.attributes[4].name = 'birthday'
|
49
|
+
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
50
|
+
|
51
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
52
|
+
assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
|
53
|
+
|
54
|
+
== REQUIREMENTS:
|
55
|
+
|
56
|
+
== INSTALL:
|
57
|
+
|
58
|
+
* sudo gem install wwood-rarff
|
59
|
+
|
60
|
+
== LICENSE:
|
61
|
+
|
62
|
+
Copyright (c) 2008 Andy Payne
|
63
|
+
All rights reserved.
|
64
|
+
|
65
|
+
Redistribution and use in source and binary forms, with or without
|
66
|
+
modification, are permitted provided that the following conditions are met:
|
67
|
+
|
68
|
+
* Redistributions of source code must retain the above copyright notice,
|
69
|
+
this list of conditions and the following disclaimer.
|
70
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
71
|
+
this list of conditions and the following disclaimer in the
|
72
|
+
documentation and/or other materials provided with the distribution.
|
73
|
+
* Neither the name of the COPYRIGHT OWNER nor the names of its contributors
|
74
|
+
may be used to endorse or promote products derived from this software
|
75
|
+
without specific prior written permission.
|
76
|
+
|
77
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
78
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
79
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
80
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
81
|
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
82
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
83
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
84
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
85
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
86
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hoe'
|
3
|
+
#require './lib/rarff.rb'
|
4
|
+
|
5
|
+
gem_name = 'rarff'
|
6
|
+
hoe = Hoe.new(gem_name,'0.2.1') do |p|
|
7
|
+
|
8
|
+
p.author = "Andy Payne, Ben J Woodcroft"
|
9
|
+
p.email = "apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au"
|
10
|
+
p.url = "http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html"
|
11
|
+
|
12
|
+
p.description = 'Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify
|
13
|
+
data sets for data mining and machine learning.'
|
14
|
+
p.summary = 'Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files'
|
15
|
+
|
16
|
+
p.rdoc_pattern = /(^lib\/.*\.rb$|^examples\/.*\.rb$|^README|^History|^License)/
|
17
|
+
|
18
|
+
p.spec_extras = {
|
19
|
+
:require_paths => ['lib','test'],
|
20
|
+
:has_rdoc => true,
|
21
|
+
:extra_rdoc_files => ["README.txt"],
|
22
|
+
:rdoc_options => ["--exclude", "test/*", "--main", "README.txt", "--inline-source"]
|
23
|
+
}
|
24
|
+
end
|
data/lib/rarff.rb
ADDED
@@ -0,0 +1,304 @@
|
|
1
|
+
# = rarff
|
2
|
+
|
3
|
+
# This is the top-level include file for rarff. See the README file for
|
4
|
+
# details.
|
5
|
+
|
6
|
+
################################################################################
|
7
|
+
|
8
|
+
# Custom scan that returns a boolean indicating whether the regex matched.
|
9
|
+
# TODO: Is there a way to avoid doing this?
|
10
|
+
class String
|
11
|
+
def my_scan(re)
|
12
|
+
hit = false
|
13
|
+
scan(re) { |arr|
|
14
|
+
yield arr if block_given?
|
15
|
+
hit = true
|
16
|
+
}
|
17
|
+
hit
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
################################################################################
|
22
|
+
|
23
|
+
module Enumerable
|
24
|
+
# This map_with_index hack allows access to the index of each item as the map
|
25
|
+
# iterates.
|
26
|
+
# TODO: Is there a better way?
|
27
|
+
def map_with_index
|
28
|
+
# Ugly, but I need the yield to be the last statement in the map.
|
29
|
+
i = -1
|
30
|
+
return map { |item|
|
31
|
+
i += 1
|
32
|
+
yield item, i
|
33
|
+
}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
################################################################################
|
38
|
+
|
39
|
+
module Rarff
|
40
|
+
|
41
|
+
COMMENT_MARKER = '%'
|
42
|
+
RELATION_MARKER = '@RELATION'
|
43
|
+
ATTRIBUTE_MARKER = '@ATTRIBUTE'
|
44
|
+
DATA_MARKER = '@DATA'
|
45
|
+
|
46
|
+
SPARSE_ARFF_BEGIN = '{'
|
47
|
+
ESC_SPARSE_ARFF_BEGIN = '\\' + SPARSE_ARFF_BEGIN
|
48
|
+
SPARSE_ARFF_END = '}'
|
49
|
+
ESC_SPARSE_ARFF_END = '\\' + SPARSE_ARFF_END
|
50
|
+
|
51
|
+
ATTRIBUTE_NUMERIC = 'NUMERIC'
|
52
|
+
ATTRIBUTE_REAL = 'REAL'
|
53
|
+
ATTRIBUTE_INTEGER = 'INTEGER'
|
54
|
+
ATTRIBUTE_STRING = 'STRING'
|
55
|
+
ATTRIBUTE_DATE = 'DATE'
|
56
|
+
|
57
|
+
MISSING = '?'
|
58
|
+
|
59
|
+
################################################################################
|
60
|
+
|
61
|
+
class Attribute
|
62
|
+
attr_accessor :name, :type
|
63
|
+
|
64
|
+
def initialize(name='', type='')
|
65
|
+
@name = name
|
66
|
+
|
67
|
+
@type_is_nominal = false
|
68
|
+
@type = type
|
69
|
+
|
70
|
+
check_nominal()
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def type=(type)
|
75
|
+
@type = type
|
76
|
+
check_nominal()
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
# Convert string representation of nominal type to array, if necessary
|
81
|
+
# TODO: This might falsely trigger on wacky date formats.
|
82
|
+
def check_nominal
|
83
|
+
if @type =~ /^\s*\{.*(\,.*)+\}\s*$/
|
84
|
+
@type_is_nominal = true
|
85
|
+
# Example format: "{nom1,nom2, nom3, nom4,nom5 } "
|
86
|
+
# Split on '{' ',' or '}'
|
87
|
+
@type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
def add_nominal_value(str)
|
93
|
+
if @type_is_nominal == false
|
94
|
+
@type = Array.new
|
95
|
+
end
|
96
|
+
|
97
|
+
@type << str
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
def to_arff
|
102
|
+
if @type_is_nominal == true
|
103
|
+
ATTRIBUTE_MARKER + " #{@name} #{@type.join(',')}"
|
104
|
+
else
|
105
|
+
ATTRIBUTE_MARKER + " #{@name} #{@type}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
def to_s
|
111
|
+
to_arff
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
class Relation
|
119
|
+
attr_accessor :name, :attributes, :instances
|
120
|
+
|
121
|
+
|
122
|
+
def initialize(name='')
|
123
|
+
@name = name
|
124
|
+
@attributes = Array.new
|
125
|
+
@instances = Array.new
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
def parse(str)
|
130
|
+
in_data_section = false
|
131
|
+
|
132
|
+
# TODO: Doesn't handle commas in quoted attributes.
|
133
|
+
str.split("\n").each { |line|
|
134
|
+
next if line =~ /^\s*$/
|
135
|
+
next if line =~ /^\s*#{COMMENT_MARKER}/
|
136
|
+
next if line.my_scan(/^\s*#{RELATION_MARKER}\s*(.*)\s*$/i) { |name| @name = name }
|
137
|
+
next if line.my_scan(/^\s*#{ATTRIBUTE_MARKER}\s*([^\s]*)\s+(.*)\s*$/i) { |name, type|
|
138
|
+
@attributes.push(Attribute.new(name, type))
|
139
|
+
}
|
140
|
+
next if line.my_scan(/^\s*#{DATA_MARKER}/i) { in_data_section = true }
|
141
|
+
next if in_data_section == false ## Below is data section handling
|
142
|
+
# next if line.gsub(/^\s*(.*)\s*$/, "\\1").my_scan(/^\s*#{SPARSE_ARFF_BEGIN}(.*)#{SPARSE_ARFF_END}\s*$/) { |data|
|
143
|
+
next if line.gsub(/^\s*(.*)\s*$/, "\\1").my_scan(/^#{ESC_SPARSE_ARFF_BEGIN}(.*)#{ESC_SPARSE_ARFF_END}$/) { |data|
|
144
|
+
# Sparse ARFF
|
145
|
+
# TODO: Factor duplication with non-sparse data below
|
146
|
+
@instances << expand_sparse(data.first)
|
147
|
+
create_attributes(true)
|
148
|
+
}
|
149
|
+
next if line.my_scan(/^\s*(.*)\s*$/) { |data|
|
150
|
+
@instances << data.first.split(/,\s*/).map { |field|
|
151
|
+
# Remove outer single quotes on strings, if any ('foo bar' --> foo bar)
|
152
|
+
field.gsub(/^\s*\'(.*)\'\s*$/, "\\1")
|
153
|
+
}
|
154
|
+
create_attributes(true)
|
155
|
+
}
|
156
|
+
}
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
# Assign instances to the internal array
|
161
|
+
# parse: choose to parse strings into numerics
|
162
|
+
def instances=(instances, parse=false)
|
163
|
+
@instances = instances
|
164
|
+
create_attributes(parse)
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
def create_attributes(attr_parse=false)
|
170
|
+
raise Exception, "Not enough data to create ARFF attributes" if @instances.nil? or
|
171
|
+
@instances.empty? or
|
172
|
+
@instances[0].empty?
|
173
|
+
|
174
|
+
# Keep track of whether an attribute has been defined or not.
|
175
|
+
# The only reason an attribute would not be defined in the first
|
176
|
+
# row is if it has nil's in it. The geek inside screams for a binary
|
177
|
+
# encoding like chmod but eh.
|
178
|
+
attributes_defined = {}
|
179
|
+
@instances.each_with_index { |row, i|
|
180
|
+
row.each_with_index { |col, j|
|
181
|
+
next if attributes_defined[j] or col.nil?
|
182
|
+
|
183
|
+
attributes_defined[j] = true #whatever happens, we are going to define it
|
184
|
+
if attr_parse
|
185
|
+
if col =~ /^\-?\d+\.?\d*$/
|
186
|
+
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
|
187
|
+
end
|
188
|
+
next #parse next column - this one is finished
|
189
|
+
end
|
190
|
+
|
191
|
+
# No parsing - just take it how it is
|
192
|
+
if col.kind_of?(Numeric)
|
193
|
+
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
|
194
|
+
elsif col.kind_of?(String)
|
195
|
+
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
|
196
|
+
else
|
197
|
+
raise Exception, "Could not parse attribute: #{col.inspect}"
|
198
|
+
end
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
# Make sure all attributes have a definition, because otherwise
|
203
|
+
# needless errors are thrown
|
204
|
+
@instances[0].each_index do |i|
|
205
|
+
@attributes[i] ||= Attribute.new("Attr#{i}", ATTRIBUTE_NUMERIC)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
|
210
|
+
def expand_sparse(str)
|
211
|
+
arr = Array.new(@attributes.size, 0)
|
212
|
+
str.gsub(/^\s*\{(.*)\}\s*$/, "\\1").split(/\s*\,\s*/).map { |pr|
|
213
|
+
pra = pr.split(/\s/)
|
214
|
+
arr[pra[0].to_i] = pra[1]
|
215
|
+
}
|
216
|
+
arr
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
def to_arff(sparse=false)
|
221
|
+
RELATION_MARKER + " #{@name}\n" +
|
222
|
+
@attributes.map{ |attr| attr.to_arff }.join("\n") +
|
223
|
+
"\n" +
|
224
|
+
DATA_MARKER + "\n" +
|
225
|
+
|
226
|
+
@instances.map { |inst|
|
227
|
+
mapped = inst.map_with_index { |col, i|
|
228
|
+
# First pass - quote strings with spaces, and dates
|
229
|
+
# TODO: Doesn't handle cases in which strings already contain
|
230
|
+
# quotes or are already quoted.
|
231
|
+
unless col.nil?
|
232
|
+
if @attributes[i].type =~ /^#{ATTRIBUTE_STRING}$/i
|
233
|
+
if col =~ /\s+/
|
234
|
+
col = "'" + col + "'"
|
235
|
+
end
|
236
|
+
elsif @attributes[i].type =~ /^#{ATTRIBUTE_DATE}/i ## Hack comparison. Ugh.
|
237
|
+
col = '"' + col + '"'
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
# Do the final output
|
242
|
+
if sparse
|
243
|
+
if col.nil? or
|
244
|
+
(@attributes[i].type =~ /^#{ATTRIBUTE_NUMERIC}$/i and col == 0)
|
245
|
+
nil
|
246
|
+
else
|
247
|
+
"#{i} #{col}"
|
248
|
+
end
|
249
|
+
else
|
250
|
+
if col.nil?
|
251
|
+
MISSING
|
252
|
+
else
|
253
|
+
col
|
254
|
+
end
|
255
|
+
end
|
256
|
+
}
|
257
|
+
|
258
|
+
if sparse
|
259
|
+
mapped.reject{|col| col.nil?}.join(', ')
|
260
|
+
else
|
261
|
+
mapped.join(", ")
|
262
|
+
end
|
263
|
+
}.join("\n").gsub(/^/, sparse ? '{' : '').gsub(/$/, sparse ? '}' : '')
|
264
|
+
end
|
265
|
+
|
266
|
+
|
267
|
+
def to_s
|
268
|
+
to_arff
|
269
|
+
end
|
270
|
+
|
271
|
+
end
|
272
|
+
|
273
|
+
|
274
|
+
end # module Rarff
|
275
|
+
|
276
|
+
################################################################################
|
277
|
+
|
278
|
+
if $0 == __FILE__ then
|
279
|
+
|
280
|
+
|
281
|
+
if ARGV[0]
|
282
|
+
in_file = ARGV[0]
|
283
|
+
contents = ''
|
284
|
+
|
285
|
+
contents = File.open(in_file).read
|
286
|
+
|
287
|
+
rel = Rarff::Relation.new
|
288
|
+
rel.parse(contents)
|
289
|
+
|
290
|
+
else
|
291
|
+
exit
|
292
|
+
end
|
293
|
+
|
294
|
+
puts '='*80
|
295
|
+
puts '='*80
|
296
|
+
puts "ARFF:"
|
297
|
+
puts rel
|
298
|
+
|
299
|
+
|
300
|
+
end
|
301
|
+
|
302
|
+
################################################################################
|
303
|
+
|
304
|
+
|
data/test/test_arff.arff
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
% 1. Title: Iris Plants Database
|
2
|
+
%
|
3
|
+
% 2. Sources:
|
4
|
+
% (a) Creator: R.A. Fisher
|
5
|
+
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
|
6
|
+
% (c) Date: July, 1988
|
7
|
+
%
|
8
|
+
@RELATION iris
|
9
|
+
|
10
|
+
@ATTRIBUTE sepallength NUMERIC
|
11
|
+
@ATTRIBUTE sepalwidth NUMERIC
|
12
|
+
@ATTRIBUTE petallength NUMERIC
|
13
|
+
@ATTRIBUTE petalwidth NUMERIC
|
14
|
+
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
15
|
+
|
16
|
+
@DATA
|
17
|
+
5.1,3.5,1.4,0.2,Iris-setosa
|
18
|
+
4.9,3.0,1.4,0.2,Iris-setosa
|
19
|
+
4.7,3.2,1.3,0.2,Iris-setosa
|
20
|
+
4.6,3.1,1.5,0.2,Iris-setosa
|
21
|
+
5.0,3.6,1.4,0.2,Iris-setosa
|
22
|
+
5.4,3.9,1.7,0.4,Iris-setosa
|
23
|
+
4.6,3.4,1.4,0.3,Iris-setosa
|
24
|
+
5.0,3.4,1.5,0.2,Iris-setosa
|
25
|
+
4.4,2.9,1.4,0.2,Iris-setosa
|
26
|
+
4.9,3.1,1.5,0.1,Iris-setosa
|
27
|
+
|
data/test/test_rarff.rb
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
# See the README file for more information.
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rarff'
|
5
|
+
|
6
|
+
class TestArffLib < Test::Unit::TestCase
|
7
|
+
|
8
|
+
# Test creation of an arff file string.
|
9
|
+
def test_arff_creation
|
10
|
+
|
11
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
12
|
+
@RELATION MyCoolRelation
|
13
|
+
@ATTRIBUTE Attr0 NUMERIC
|
14
|
+
@ATTRIBUTE subject STRING
|
15
|
+
@ATTRIBUTE Attr2 NUMERIC
|
16
|
+
@ATTRIBUTE Attr3 STRING
|
17
|
+
@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
18
|
+
@DATA
|
19
|
+
1.4, 'foo bar', 5, baz, "1900-08-08 12:12:12"
|
20
|
+
20.9, ruby, 46, rocks, "2005-10-23 12:12:12"
|
21
|
+
0, ruby, 46, rocks, "2001-02-19 12:12:12"
|
22
|
+
68.1, stuff, 728, 'is cool', "1974-02-10 12:12:12"
|
23
|
+
END_OF_ARFF_FILE
|
24
|
+
|
25
|
+
arff_file_str.gsub!(/\n$/, '')
|
26
|
+
|
27
|
+
instances = [ [1.4, 'foo bar', 5, 'baz', "1900-08-08 12:12:12"],
|
28
|
+
[20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
29
|
+
[0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
30
|
+
[68.1, 'stuff', 728, 'is cool', "1974-02-10 12:12:12"]]
|
31
|
+
|
32
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
33
|
+
rel.instances = instances
|
34
|
+
rel.attributes[1].name = 'subject'
|
35
|
+
rel.attributes[4].name = 'birthday'
|
36
|
+
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
37
|
+
|
38
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
39
|
+
assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
|
40
|
+
end
|
41
|
+
#
|
42
|
+
# # Test creation of a sparse arff file string.
|
43
|
+
# def test_sparse_arff_creation
|
44
|
+
#
|
45
|
+
# arff_file_str = <<-END_OF_ARFF_FILE
|
46
|
+
#@RELATION MyCoolRelation
|
47
|
+
#@ATTRIBUTE Attr0 NUMERIC
|
48
|
+
#@ATTRIBUTE subject STRING
|
49
|
+
#@ATTRIBUTE Attr2 NUMERIC
|
50
|
+
#@ATTRIBUTE Attr3 STRING
|
51
|
+
#@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
52
|
+
#@DATA
|
53
|
+
#{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
|
54
|
+
#{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
|
55
|
+
#{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
|
56
|
+
#{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
|
57
|
+
# END_OF_ARFF_FILE
|
58
|
+
#
|
59
|
+
# arff_file_str.gsub!(/\n$/, '')
|
60
|
+
#
|
61
|
+
# instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
|
62
|
+
# [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
63
|
+
# [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
64
|
+
# [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
|
65
|
+
#
|
66
|
+
# rel = Rarff::Relation.new('MyCoolRelation')
|
67
|
+
# rel.instances = instances
|
68
|
+
# rel.attributes[1].name = 'subject'
|
69
|
+
# rel.attributes[4].name = 'birthday'
|
70
|
+
# rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
71
|
+
#
|
72
|
+
# # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
|
73
|
+
# assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
|
74
|
+
# end
|
75
|
+
#
|
76
|
+
#
|
77
|
+
# # Test parsing of an arff file.
|
78
|
+
# def test_arff_parse
|
79
|
+
# in_file = './test_arff.arff'
|
80
|
+
# rel = Rarff::Relation.new
|
81
|
+
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
82
|
+
#
|
83
|
+
# assert_equal(rel.instances[2][1], 3.2)
|
84
|
+
# assert_equal(rel.instances[7][4], 'Iris-setosa')
|
85
|
+
# end
|
86
|
+
#
|
87
|
+
#
|
88
|
+
# # Test parsing of sparse ARFF format
|
89
|
+
# def test_sparse_arff_parse
|
90
|
+
# in_file = './test_sparse_arff.arff'
|
91
|
+
# rel = Rarff::Relation.new
|
92
|
+
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
93
|
+
#
|
94
|
+
# assert_equal(13, rel.instances[0].size)
|
95
|
+
# assert_equal(0, rel.instances[0][1])
|
96
|
+
# assert_equal(7, rel.instances[0][3])
|
97
|
+
# assert_equal(2.4, rel.instances[1][1])
|
98
|
+
# assert_equal(0, rel.instances[1][2])
|
99
|
+
# assert_equal(19, rel.instances[1][12])
|
100
|
+
# assert_equal(6, rel.instances[2][6])
|
101
|
+
# assert_equal(0, rel.instances[3][12])
|
102
|
+
# # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
|
103
|
+
# end
|
104
|
+
#
|
105
|
+
def test_output_missing
|
106
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
107
|
+
@RELATION MyCoolRelation
|
108
|
+
@ATTRIBUTE Attr0 NUMERIC
|
109
|
+
@ATTRIBUTE subject STRING
|
110
|
+
@ATTRIBUTE Attr2 NUMERIC
|
111
|
+
@ATTRIBUTE Attr3 STRING
|
112
|
+
@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
113
|
+
@DATA
|
114
|
+
?, 'foo bar', 5, baz, ?
|
115
|
+
20.9, ruby, 46, ?, "2005-10-23 12:12:12"
|
116
|
+
END_OF_ARFF_FILE
|
117
|
+
|
118
|
+
arff_file_str.gsub!(/\n$/, '')
|
119
|
+
|
120
|
+
instances = [ [nil, 'foo bar', 5, 'baz', nil],
|
121
|
+
[20.9, 'ruby', 46, nil, "2005-10-23 12:12:12"]]
|
122
|
+
|
123
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
124
|
+
rel.instances = instances
|
125
|
+
rel.attributes[1].name = 'subject'
|
126
|
+
rel.attributes[4].name = 'birthday'
|
127
|
+
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
128
|
+
|
129
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
130
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_output_missing_undefined_first_row
|
134
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
135
|
+
@RELATION MyCoolRelation
|
136
|
+
@ATTRIBUTE Attr0 NUMERIC
|
137
|
+
@ATTRIBUTE subject STRING
|
138
|
+
@ATTRIBUTE Attr2 NUMERIC
|
139
|
+
@ATTRIBUTE Attr3 NUMERIC
|
140
|
+
@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
141
|
+
@DATA
|
142
|
+
?, ?, ?, ?, ?
|
143
|
+
20.9, ruby, 46, ?, "2005-10-23 12:12:12"
|
144
|
+
END_OF_ARFF_FILE
|
145
|
+
|
146
|
+
arff_file_str.gsub!(/\n$/, '')
|
147
|
+
|
148
|
+
instances = [ [nil, nil, nil, nil, nil],
|
149
|
+
[20.9, 'ruby', 46, nil, "2005-10-23 12:12:12"]]
|
150
|
+
|
151
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
152
|
+
rel.instances = instances
|
153
|
+
rel.attributes[1].name = 'subject'
|
154
|
+
rel.attributes[4].name = 'birthday'
|
155
|
+
rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
156
|
+
|
157
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
158
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
% Sample sparse ARFF file
|
2
|
+
@RELATION sparseness
|
3
|
+
|
4
|
+
@ATTRIBUTE attr1 NUMERIC
|
5
|
+
@ATTRIBUTE attr2 NUMERIC
|
6
|
+
@ATTRIBUTE attr3 NUMERIC
|
7
|
+
@ATTRIBUTE attr4 NUMERIC
|
8
|
+
@ATTRIBUTE attr5 NUMERIC
|
9
|
+
@ATTRIBUTE attr6 NUMERIC
|
10
|
+
@ATTRIBUTE attr7 NUMERIC
|
11
|
+
@ATTRIBUTE attr8 NUMERIC
|
12
|
+
@ATTRIBUTE attr9 NUMERIC
|
13
|
+
@ATTRIBUTE attr10 NUMERIC
|
14
|
+
@ATTRIBUTE attr11 NUMERIC
|
15
|
+
@ATTRIBUTE attr12 NUMERIC
|
16
|
+
@ATTRIBUTE attr13 NUMERIC
|
17
|
+
|
18
|
+
@DATA
|
19
|
+
{3 7, 10 34}
|
20
|
+
{1 2.4, 4 62, 12 19}
|
21
|
+
{0 0, 1 1, 2 2, 3 3, 4 4, 5 5, 6 6, 7 7, 8 8, 9 9, 10 10, 11 11, 12 12}
|
22
|
+
{9 42}
|
23
|
+
{2 54.3, 3 92, 11 10.2}
|
24
|
+
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wwood-rarff
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andy Payne, Ben J Woodcroft
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-11-25 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hoe
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.8.2
|
23
|
+
version:
|
24
|
+
description: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
|
25
|
+
email: apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au
|
26
|
+
executables: []
|
27
|
+
|
28
|
+
extensions: []
|
29
|
+
|
30
|
+
extra_rdoc_files:
|
31
|
+
- README.txt
|
32
|
+
files:
|
33
|
+
- History.txt
|
34
|
+
- Manifest.txt
|
35
|
+
- README.txt
|
36
|
+
- Rakefile
|
37
|
+
- lib/rarff.rb
|
38
|
+
- test/test_arff.arff
|
39
|
+
- test/test_sparse_arff.arff
|
40
|
+
- test/test_rarff.rb
|
41
|
+
has_rdoc: true
|
42
|
+
homepage: http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --exclude
|
46
|
+
- test/*
|
47
|
+
- --main
|
48
|
+
- README.txt
|
49
|
+
- --inline-source
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
- test
|
53
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: "0"
|
58
|
+
version:
|
59
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: "0"
|
64
|
+
version:
|
65
|
+
requirements: []
|
66
|
+
|
67
|
+
rubyforge_project: rarff
|
68
|
+
rubygems_version: 1.2.0
|
69
|
+
signing_key:
|
70
|
+
specification_version: 2
|
71
|
+
summary: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files
|
72
|
+
test_files:
|
73
|
+
- test/test_rarff.rb
|