wwood-rarff 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,9 @@
1
+ == Changes
2
+
3
+ === 0.2.1 (unofficial)
4
+
5
+ * Handles missing data in output, encoded internally as nil values
6
+
7
+ == 0.2.0 ?
8
+
9
+ * Sparse ARFF files (thanks to Tom Adams)
data/Manifest.txt ADDED
@@ -0,0 +1,8 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/rarff.rb
6
+ test/test_arff.arff
7
+ test/test_sparse_arff.arff
8
+ test/ts_rarff.rb
data/README.txt ADDED
@@ -0,0 +1,90 @@
1
+ = rarff
2
+
3
+ http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
4
+
5
+ == DESCRIPTION:
6
+
7
+ Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
8
+
9
+
10
+ == FEATURES/PROBLEMS:
11
+
12
+ === FEATURES
13
+ * Missing values - '?' are handled in creation of ARFF files
14
+
15
+ === PROBLEMS
16
+ * Spaces or quotes in nominal types
17
+ * Commas in quoted attributes or in nominal types
18
+ * Add error checking/validation
19
+ * Creation of sparse ARFF files
20
+ * Dates - do some work to create, translate, and interpret date format strings.
21
+
22
+ == SYNOPSIS:
23
+
24
+ arff_file_str = <<-END_OF_ARFF_FILE
25
+ @RELATION MyCoolRelation
26
+ @ATTRIBUTE Attr0 NUMERIC
27
+ @ATTRIBUTE subject STRING
28
+ @ATTRIBUTE Attr2 NUMERIC
29
+ @ATTRIBUTE Attr3 STRING
30
+ @ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
31
+ @DATA
32
+ 1.4, 'foo bar', 5, baz, "1900-08-08 12:12:12"
33
+ 20.9, ruby, 46, rocks, "2005-10-23 12:12:12"
34
+ 0, ruby, 46, rocks, "2001-02-19 12:12:12"
35
+ 68.1, stuff, 728, 'is cool', "1974-02-10 12:12:12"
36
+ END_OF_ARFF_FILE
37
+
38
+ arff_file_str.gsub!(/\n$/, '')
39
+
40
+ instances = [ [1.4, 'foo bar', 5, 'baz', "1900-08-08 12:12:12"],
41
+ [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
42
+ [0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
43
+ [68.1, 'stuff', 728, 'is cool', "1974-02-10 12:12:12"]]
44
+
45
+ rel = Rarff::Relation.new('MyCoolRelation')
46
+ rel.instances = instances
47
+ rel.attributes[1].name = 'subject'
48
+ rel.attributes[4].name = 'birthday'
49
+ rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
50
+
51
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
52
+ assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
53
+
54
+ == REQUIREMENTS:
55
+
56
+ == INSTALL:
57
+
58
+ * sudo gem install wwood-rarff
59
+
60
+ == LICENSE:
61
+
62
+ Copyright (c) 2008 Andy Payne
63
+ All rights reserved.
64
+
65
+ Redistribution and use in source and binary forms, with or without
66
+ modification, are permitted provided that the following conditions are met:
67
+
68
+ * Redistributions of source code must retain the above copyright notice,
69
+ this list of conditions and the following disclaimer.
70
+ * Redistributions in binary form must reproduce the above copyright notice,
71
+ this list of conditions and the following disclaimer in the
72
+ documentation and/or other materials provided with the distribution.
73
+ * Neither the name of the COPYRIGHT OWNER nor the names of its contributors
74
+ may be used to endorse or promote products derived from this software
75
+ without specific prior written permission.
76
+
77
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
78
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
79
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
80
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
81
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
82
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
83
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
84
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
85
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
86
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
87
+
88
+
89
+
90
+
data/Rakefile ADDED
@@ -0,0 +1,24 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+ #require './lib/rarff.rb'
4
+
5
+ gem_name = 'rarff'
6
+ hoe = Hoe.new(gem_name,'0.2.1') do |p|
7
+
8
+ p.author = "Andy Payne, Ben J Woodcroft"
9
+ p.email = "apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au"
10
+ p.url = "http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html"
11
+
12
+ p.description = 'Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify
13
+ data sets for data mining and machine learning.'
14
+ p.summary = 'Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files'
15
+
16
+ p.rdoc_pattern = /(^lib\/.*\.rb$|^examples\/.*\.rb$|^README|^History|^License)/
17
+
18
+ p.spec_extras = {
19
+ :require_paths => ['lib','test'],
20
+ :has_rdoc => true,
21
+ :extra_rdoc_files => ["README.txt"],
22
+ :rdoc_options => ["--exclude", "test/*", "--main", "README.txt", "--inline-source"]
23
+ }
24
+ end
data/lib/rarff.rb ADDED
@@ -0,0 +1,304 @@
1
+ # = rarff
2
+
3
+ # This is the top-level include file for rarff. See the README file for
4
+ # details.
5
+
6
+ ################################################################################
7
+
8
+ # Custom scan that returns a boolean indicating whether the regex matched.
9
+ # TODO: Is there a way to avoid doing this?
10
+ class String
11
+ def my_scan(re)
12
+ hit = false
13
+ scan(re) { |arr|
14
+ yield arr if block_given?
15
+ hit = true
16
+ }
17
+ hit
18
+ end
19
+ end
20
+
21
+ ################################################################################
22
+
23
+ module Enumerable
24
+ # This map_with_index hack allows access to the index of each item as the map
25
+ # iterates.
26
+ # TODO: Is there a better way?
27
+ def map_with_index
28
+ # Ugly, but I need the yield to be the last statement in the map.
29
+ i = -1
30
+ return map { |item|
31
+ i += 1
32
+ yield item, i
33
+ }
34
+ end
35
+ end
36
+
37
+ ################################################################################
38
+
39
+ module Rarff
40
+
41
+ COMMENT_MARKER = '%'
42
+ RELATION_MARKER = '@RELATION'
43
+ ATTRIBUTE_MARKER = '@ATTRIBUTE'
44
+ DATA_MARKER = '@DATA'
45
+
46
+ SPARSE_ARFF_BEGIN = '{'
47
+ ESC_SPARSE_ARFF_BEGIN = '\\' + SPARSE_ARFF_BEGIN
48
+ SPARSE_ARFF_END = '}'
49
+ ESC_SPARSE_ARFF_END = '\\' + SPARSE_ARFF_END
50
+
51
+ ATTRIBUTE_NUMERIC = 'NUMERIC'
52
+ ATTRIBUTE_REAL = 'REAL'
53
+ ATTRIBUTE_INTEGER = 'INTEGER'
54
+ ATTRIBUTE_STRING = 'STRING'
55
+ ATTRIBUTE_DATE = 'DATE'
56
+
57
+ MISSING = '?'
58
+
59
+ ################################################################################
60
+
61
+ class Attribute
62
+ attr_accessor :name, :type
63
+
64
+ def initialize(name='', type='')
65
+ @name = name
66
+
67
+ @type_is_nominal = false
68
+ @type = type
69
+
70
+ check_nominal()
71
+ end
72
+
73
+
74
+ def type=(type)
75
+ @type = type
76
+ check_nominal()
77
+ end
78
+
79
+
80
+ # Convert string representation of nominal type to array, if necessary
81
+ # TODO: This might falsely trigger on wacky date formats.
82
+ def check_nominal
83
+ if @type =~ /^\s*\{.*(\,.*)+\}\s*$/
84
+ @type_is_nominal = true
85
+ # Example format: "{nom1,nom2, nom3, nom4,nom5 } "
86
+ # Split on '{' ',' or '}'
87
+ @type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
88
+ end
89
+ end
90
+
91
+
92
+ def add_nominal_value(str)
93
+ if @type_is_nominal == false
94
+ @type = Array.new
95
+ end
96
+
97
+ @type << str
98
+ end
99
+
100
+
101
+ def to_arff
102
+ if @type_is_nominal == true
103
+ ATTRIBUTE_MARKER + " #{@name} #{@type.join(',')}"
104
+ else
105
+ ATTRIBUTE_MARKER + " #{@name} #{@type}"
106
+ end
107
+ end
108
+
109
+
110
+ def to_s
111
+ to_arff
112
+ end
113
+
114
+ end
115
+
116
+
117
+
118
+ class Relation
119
+ attr_accessor :name, :attributes, :instances
120
+
121
+
122
+ def initialize(name='')
123
+ @name = name
124
+ @attributes = Array.new
125
+ @instances = Array.new
126
+ end
127
+
128
+
129
+ def parse(str)
130
+ in_data_section = false
131
+
132
+ # TODO: Doesn't handle commas in quoted attributes.
133
+ str.split("\n").each { |line|
134
+ next if line =~ /^\s*$/
135
+ next if line =~ /^\s*#{COMMENT_MARKER}/
136
+ next if line.my_scan(/^\s*#{RELATION_MARKER}\s*(.*)\s*$/i) { |name| @name = name }
137
+ next if line.my_scan(/^\s*#{ATTRIBUTE_MARKER}\s*([^\s]*)\s+(.*)\s*$/i) { |name, type|
138
+ @attributes.push(Attribute.new(name, type))
139
+ }
140
+ next if line.my_scan(/^\s*#{DATA_MARKER}/i) { in_data_section = true }
141
+ next if in_data_section == false ## Below is data section handling
142
+ # next if line.gsub(/^\s*(.*)\s*$/, "\\1").my_scan(/^\s*#{SPARSE_ARFF_BEGIN}(.*)#{SPARSE_ARFF_END}\s*$/) { |data|
143
+ next if line.gsub(/^\s*(.*)\s*$/, "\\1").my_scan(/^#{ESC_SPARSE_ARFF_BEGIN}(.*)#{ESC_SPARSE_ARFF_END}$/) { |data|
144
+ # Sparse ARFF
145
+ # TODO: Factor duplication with non-sparse data below
146
+ @instances << expand_sparse(data.first)
147
+ create_attributes(true)
148
+ }
149
+ next if line.my_scan(/^\s*(.*)\s*$/) { |data|
150
+ @instances << data.first.split(/,\s*/).map { |field|
151
+ # Remove outer single quotes on strings, if any ('foo bar' --> foo bar)
152
+ field.gsub(/^\s*\'(.*)\'\s*$/, "\\1")
153
+ }
154
+ create_attributes(true)
155
+ }
156
+ }
157
+ end
158
+
159
+
160
+ # Assign instances to the internal array
161
+ # parse: choose to parse strings into numerics
162
+ def instances=(instances, parse=false)
163
+ @instances = instances
164
+ create_attributes(parse)
165
+ end
166
+
167
+
168
+
169
+ def create_attributes(attr_parse=false)
170
+ raise Exception, "Not enough data to create ARFF attributes" if @instances.nil? or
171
+ @instances.empty? or
172
+ @instances[0].empty?
173
+
174
+ # Keep track of whether an attribute has been defined or not.
175
+ # The only reason an attribute would not be defined in the first
176
+ # row is if it has nil's in it. The geek inside screams for a binary
177
+ # encoding like chmod but eh.
178
+ attributes_defined = {}
179
+ @instances.each_with_index { |row, i|
180
+ row.each_with_index { |col, j|
181
+ next if attributes_defined[j] or col.nil?
182
+
183
+ attributes_defined[j] = true #whatever happens, we are going to define it
184
+ if attr_parse
185
+ if col =~ /^\-?\d+\.?\d*$/
186
+ @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
187
+ end
188
+ next #parse next column - this one is finished
189
+ end
190
+
191
+ # No parsing - just take it how it is
192
+ if col.kind_of?(Numeric)
193
+ @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
194
+ elsif col.kind_of?(String)
195
+ @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
196
+ else
197
+ raise Exception, "Could not parse attribute: #{col.inspect}"
198
+ end
199
+ }
200
+ }
201
+
202
+ # Make sure all attributes have a definition, because otherwise
203
+ # needless errors are thrown
204
+ @instances[0].each_index do |i|
205
+ @attributes[i] ||= Attribute.new("Attr#{i}", ATTRIBUTE_NUMERIC)
206
+ end
207
+ end
208
+
209
+
210
+ def expand_sparse(str)
211
+ arr = Array.new(@attributes.size, 0)
212
+ str.gsub(/^\s*\{(.*)\}\s*$/, "\\1").split(/\s*\,\s*/).map { |pr|
213
+ pra = pr.split(/\s/)
214
+ arr[pra[0].to_i] = pra[1]
215
+ }
216
+ arr
217
+ end
218
+
219
+
220
+ def to_arff(sparse=false)
221
+ RELATION_MARKER + " #{@name}\n" +
222
+ @attributes.map{ |attr| attr.to_arff }.join("\n") +
223
+ "\n" +
224
+ DATA_MARKER + "\n" +
225
+
226
+ @instances.map { |inst|
227
+ mapped = inst.map_with_index { |col, i|
228
+ # First pass - quote strings with spaces, and dates
229
+ # TODO: Doesn't handle cases in which strings already contain
230
+ # quotes or are already quoted.
231
+ unless col.nil?
232
+ if @attributes[i].type =~ /^#{ATTRIBUTE_STRING}$/i
233
+ if col =~ /\s+/
234
+ col = "'" + col + "'"
235
+ end
236
+ elsif @attributes[i].type =~ /^#{ATTRIBUTE_DATE}/i ## Hack comparison. Ugh.
237
+ col = '"' + col + '"'
238
+ end
239
+ end
240
+
241
+ # Do the final output
242
+ if sparse
243
+ if col.nil? or
244
+ (@attributes[i].type =~ /^#{ATTRIBUTE_NUMERIC}$/i and col == 0)
245
+ nil
246
+ else
247
+ "#{i} #{col}"
248
+ end
249
+ else
250
+ if col.nil?
251
+ MISSING
252
+ else
253
+ col
254
+ end
255
+ end
256
+ }
257
+
258
+ if sparse
259
+ mapped.reject{|col| col.nil?}.join(', ')
260
+ else
261
+ mapped.join(", ")
262
+ end
263
+ }.join("\n").gsub(/^/, sparse ? '{' : '').gsub(/$/, sparse ? '}' : '')
264
+ end
265
+
266
+
267
+ def to_s
268
+ to_arff
269
+ end
270
+
271
+ end
272
+
273
+
274
+ end # module Rarff
275
+
276
+ ################################################################################
277
+
278
+ if $0 == __FILE__ then
279
+
280
+
281
+ if ARGV[0]
282
+ in_file = ARGV[0]
283
+ contents = ''
284
+
285
+ contents = File.open(in_file).read
286
+
287
+ rel = Rarff::Relation.new
288
+ rel.parse(contents)
289
+
290
+ else
291
+ exit
292
+ end
293
+
294
+ puts '='*80
295
+ puts '='*80
296
+ puts "ARFF:"
297
+ puts rel
298
+
299
+
300
+ end
301
+
302
+ ################################################################################
303
+
304
+
@@ -0,0 +1,27 @@
1
+ % 1. Title: Iris Plants Database
2
+ %
3
+ % 2. Sources:
4
+ % (a) Creator: R.A. Fisher
5
+ % (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
6
+ % (c) Date: July, 1988
7
+ %
8
+ @RELATION iris
9
+
10
+ @ATTRIBUTE sepallength NUMERIC
11
+ @ATTRIBUTE sepalwidth NUMERIC
12
+ @ATTRIBUTE petallength NUMERIC
13
+ @ATTRIBUTE petalwidth NUMERIC
14
+ @ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
15
+
16
+ @DATA
17
+ 5.1,3.5,1.4,0.2,Iris-setosa
18
+ 4.9,3.0,1.4,0.2,Iris-setosa
19
+ 4.7,3.2,1.3,0.2,Iris-setosa
20
+ 4.6,3.1,1.5,0.2,Iris-setosa
21
+ 5.0,3.6,1.4,0.2,Iris-setosa
22
+ 5.4,3.9,1.7,0.4,Iris-setosa
23
+ 4.6,3.4,1.4,0.3,Iris-setosa
24
+ 5.0,3.4,1.5,0.2,Iris-setosa
25
+ 4.4,2.9,1.4,0.2,Iris-setosa
26
+ 4.9,3.1,1.5,0.1,Iris-setosa
27
+
@@ -0,0 +1,163 @@
1
+ # See the README file for more information.
2
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
3
+ require 'test/unit'
4
+ require 'rarff'
5
+
6
+ class TestArffLib < Test::Unit::TestCase
7
+
8
+ # Test creation of an arff file string.
9
+ def test_arff_creation
10
+
11
+ arff_file_str = <<-END_OF_ARFF_FILE
12
+ @RELATION MyCoolRelation
13
+ @ATTRIBUTE Attr0 NUMERIC
14
+ @ATTRIBUTE subject STRING
15
+ @ATTRIBUTE Attr2 NUMERIC
16
+ @ATTRIBUTE Attr3 STRING
17
+ @ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
18
+ @DATA
19
+ 1.4, 'foo bar', 5, baz, "1900-08-08 12:12:12"
20
+ 20.9, ruby, 46, rocks, "2005-10-23 12:12:12"
21
+ 0, ruby, 46, rocks, "2001-02-19 12:12:12"
22
+ 68.1, stuff, 728, 'is cool', "1974-02-10 12:12:12"
23
+ END_OF_ARFF_FILE
24
+
25
+ arff_file_str.gsub!(/\n$/, '')
26
+
27
+ instances = [ [1.4, 'foo bar', 5, 'baz', "1900-08-08 12:12:12"],
28
+ [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
29
+ [0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
30
+ [68.1, 'stuff', 728, 'is cool', "1974-02-10 12:12:12"]]
31
+
32
+ rel = Rarff::Relation.new('MyCoolRelation')
33
+ rel.instances = instances
34
+ rel.attributes[1].name = 'subject'
35
+ rel.attributes[4].name = 'birthday'
36
+ rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
37
+
38
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
39
+ assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
40
+ end
41
+ #
42
+ # # Test creation of a sparse arff file string.
43
+ # def test_sparse_arff_creation
44
+ #
45
+ # arff_file_str = <<-END_OF_ARFF_FILE
46
+ #@RELATION MyCoolRelation
47
+ #@ATTRIBUTE Attr0 NUMERIC
48
+ #@ATTRIBUTE subject STRING
49
+ #@ATTRIBUTE Attr2 NUMERIC
50
+ #@ATTRIBUTE Attr3 STRING
51
+ #@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
52
+ #@DATA
53
+ #{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
54
+ #{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
55
+ #{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
56
+ #{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
57
+ # END_OF_ARFF_FILE
58
+ #
59
+ # arff_file_str.gsub!(/\n$/, '')
60
+ #
61
+ # instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
62
+ # [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
63
+ # [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
64
+ # [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
65
+ #
66
+ # rel = Rarff::Relation.new('MyCoolRelation')
67
+ # rel.instances = instances
68
+ # rel.attributes[1].name = 'subject'
69
+ # rel.attributes[4].name = 'birthday'
70
+ # rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
71
+ #
72
+ # # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
73
+ # assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
74
+ # end
75
+ #
76
+ #
77
+ # # Test parsing of an arff file.
78
+ # def test_arff_parse
79
+ # in_file = './test_arff.arff'
80
+ # rel = Rarff::Relation.new
81
+ # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
82
+ #
83
+ # assert_equal(rel.instances[2][1], 3.2)
84
+ # assert_equal(rel.instances[7][4], 'Iris-setosa')
85
+ # end
86
+ #
87
+ #
88
+ # # Test parsing of sparse ARFF format
89
+ # def test_sparse_arff_parse
90
+ # in_file = './test_sparse_arff.arff'
91
+ # rel = Rarff::Relation.new
92
+ # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
93
+ #
94
+ # assert_equal(13, rel.instances[0].size)
95
+ # assert_equal(0, rel.instances[0][1])
96
+ # assert_equal(7, rel.instances[0][3])
97
+ # assert_equal(2.4, rel.instances[1][1])
98
+ # assert_equal(0, rel.instances[1][2])
99
+ # assert_equal(19, rel.instances[1][12])
100
+ # assert_equal(6, rel.instances[2][6])
101
+ # assert_equal(0, rel.instances[3][12])
102
+ # # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
103
+ # end
104
+ #
105
+ def test_output_missing
106
+ arff_file_str = <<-END_OF_ARFF_FILE
107
+ @RELATION MyCoolRelation
108
+ @ATTRIBUTE Attr0 NUMERIC
109
+ @ATTRIBUTE subject STRING
110
+ @ATTRIBUTE Attr2 NUMERIC
111
+ @ATTRIBUTE Attr3 STRING
112
+ @ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
113
+ @DATA
114
+ ?, 'foo bar', 5, baz, ?
115
+ 20.9, ruby, 46, ?, "2005-10-23 12:12:12"
116
+ END_OF_ARFF_FILE
117
+
118
+ arff_file_str.gsub!(/\n$/, '')
119
+
120
+ instances = [ [nil, 'foo bar', 5, 'baz', nil],
121
+ [20.9, 'ruby', 46, nil, "2005-10-23 12:12:12"]]
122
+
123
+ rel = Rarff::Relation.new('MyCoolRelation')
124
+ rel.instances = instances
125
+ rel.attributes[1].name = 'subject'
126
+ rel.attributes[4].name = 'birthday'
127
+ rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
128
+
129
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
130
+ assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
131
+ end
132
+
133
+ def test_output_missing_undefined_first_row
134
+ arff_file_str = <<-END_OF_ARFF_FILE
135
+ @RELATION MyCoolRelation
136
+ @ATTRIBUTE Attr0 NUMERIC
137
+ @ATTRIBUTE subject STRING
138
+ @ATTRIBUTE Attr2 NUMERIC
139
+ @ATTRIBUTE Attr3 NUMERIC
140
+ @ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
141
+ @DATA
142
+ ?, ?, ?, ?, ?
143
+ 20.9, ruby, 46, ?, "2005-10-23 12:12:12"
144
+ END_OF_ARFF_FILE
145
+
146
+ arff_file_str.gsub!(/\n$/, '')
147
+
148
+ instances = [ [nil, nil, nil, nil, nil],
149
+ [20.9, 'ruby', 46, nil, "2005-10-23 12:12:12"]]
150
+
151
+ rel = Rarff::Relation.new('MyCoolRelation')
152
+ rel.instances = instances
153
+ rel.attributes[1].name = 'subject'
154
+ rel.attributes[4].name = 'birthday'
155
+ rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
156
+
157
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
158
+ assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
159
+ end
160
+ end
161
+
162
+
163
+
@@ -0,0 +1,24 @@
1
+ % Sample sparse ARFF file
2
+ @RELATION sparseness
3
+
4
+ @ATTRIBUTE attr1 NUMERIC
5
+ @ATTRIBUTE attr2 NUMERIC
6
+ @ATTRIBUTE attr3 NUMERIC
7
+ @ATTRIBUTE attr4 NUMERIC
8
+ @ATTRIBUTE attr5 NUMERIC
9
+ @ATTRIBUTE attr6 NUMERIC
10
+ @ATTRIBUTE attr7 NUMERIC
11
+ @ATTRIBUTE attr8 NUMERIC
12
+ @ATTRIBUTE attr9 NUMERIC
13
+ @ATTRIBUTE attr10 NUMERIC
14
+ @ATTRIBUTE attr11 NUMERIC
15
+ @ATTRIBUTE attr12 NUMERIC
16
+ @ATTRIBUTE attr13 NUMERIC
17
+
18
+ @DATA
19
+ {3 7, 10 34}
20
+ {1 2.4, 4 62, 12 19}
21
+ {0 0, 1 1, 2 2, 3 3, 4 4, 5 5, 6 6, 7 7, 8 8, 9 9, 10 10, 11 11, 12 12}
22
+ {9 42}
23
+ {2 54.3, 3 92, 11 10.2}
24
+
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wwood-rarff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ platform: ruby
6
+ authors:
7
+ - Andy Payne, Ben J Woodcroft
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-11-25 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hoe
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.8.2
23
+ version:
24
+ description: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
25
+ email: apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files:
31
+ - README.txt
32
+ files:
33
+ - History.txt
34
+ - Manifest.txt
35
+ - README.txt
36
+ - Rakefile
37
+ - lib/rarff.rb
38
+ - test/test_arff.arff
39
+ - test/test_sparse_arff.arff
40
+ - test/test_rarff.rb
41
+ has_rdoc: true
42
+ homepage: http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
43
+ post_install_message:
44
+ rdoc_options:
45
+ - --exclude
46
+ - test/*
47
+ - --main
48
+ - README.txt
49
+ - --inline-source
50
+ require_paths:
51
+ - lib
52
+ - test
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
58
+ version:
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: "0"
64
+ version:
65
+ requirements: []
66
+
67
+ rubyforge_project: rarff
68
+ rubygems_version: 1.2.0
69
+ signing_key:
70
+ specification_version: 2
71
+ summary: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files
72
+ test_files:
73
+ - test/test_rarff.rb