wwood-rarff 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/Rakefile +1 -1
- data/lib/rarff.rb +9 -2
- data/test/test_rarff.rb +87 -64
- metadata +5 -4
- data/test/test_sparse_arff.arff +0 -24
data/History.txt
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
== Changes
|
|
2
2
|
|
|
3
|
+
=== 0.2.2 (unofficial)
|
|
4
|
+
|
|
5
|
+
* Handles boolean inputs, which are modelled as nominals
|
|
6
|
+
* Handles spaces in nominals, which are replaced by underscores. Probably should be quoting these, but is good enough for me right now
|
|
7
|
+
|
|
3
8
|
=== 0.2.1 (unofficial)
|
|
4
9
|
|
|
5
10
|
* Handles missing data in output, encoded internally as nil values
|
data/Rakefile
CHANGED
data/lib/rarff.rb
CHANGED
|
@@ -53,6 +53,10 @@ module Rarff
|
|
|
53
53
|
ATTRIBUTE_INTEGER = 'INTEGER'
|
|
54
54
|
ATTRIBUTE_STRING = 'STRING'
|
|
55
55
|
ATTRIBUTE_DATE = 'DATE'
|
|
56
|
+
# Model Boolean as a Nominal Attribute.
|
|
57
|
+
# Use {false, true} not {true, false} because then in visualisations in Weka
|
|
58
|
+
# true is to the right, which makes more intuitive sense
|
|
59
|
+
ATTRIBUTE_BOOLEAN = '{false, true}'
|
|
56
60
|
|
|
57
61
|
MISSING = '?'
|
|
58
62
|
|
|
@@ -100,7 +104,7 @@ module Rarff
|
|
|
100
104
|
|
|
101
105
|
def to_arff
|
|
102
106
|
if @type_is_nominal == true
|
|
103
|
-
ATTRIBUTE_MARKER + " #{@name} #{@type.join(',')}"
|
|
107
|
+
ATTRIBUTE_MARKER + " #{@name} {#{@type.join(',').gsub(' ','_')}}"
|
|
104
108
|
else
|
|
105
109
|
ATTRIBUTE_MARKER + " #{@name} #{@type}"
|
|
106
110
|
end
|
|
@@ -193,6 +197,8 @@ module Rarff
|
|
|
193
197
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
|
|
194
198
|
elsif col.kind_of?(String)
|
|
195
199
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
|
|
200
|
+
elsif col.kind_of?(TrueClass) or col.kind_of?(FalseClass) # How come there is no generic BooleanClass?
|
|
201
|
+
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_BOOLEAN)
|
|
196
202
|
else
|
|
197
203
|
raise Exception, "Could not parse attribute: #{col.inspect}"
|
|
198
204
|
end
|
|
@@ -219,7 +225,8 @@ module Rarff
|
|
|
219
225
|
|
|
220
226
|
def to_arff(sparse=false)
|
|
221
227
|
RELATION_MARKER + " #{@name}\n" +
|
|
222
|
-
@attributes.map{ |attr| attr.to_arff }.join("\n") +
|
|
228
|
+
# @attributes.map{ |attr| attr.to_arff }.join("\n") +
|
|
229
|
+
@attributes.join("\n") +
|
|
223
230
|
"\n" +
|
|
224
231
|
DATA_MARKER + "\n" +
|
|
225
232
|
|
data/test/test_rarff.rb
CHANGED
|
@@ -38,70 +38,70 @@ class TestArffLib < Test::Unit::TestCase
|
|
|
38
38
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
39
39
|
assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
|
|
40
40
|
end
|
|
41
|
-
#
|
|
42
|
-
# # Test creation of a sparse arff file string.
|
|
43
|
-
# def test_sparse_arff_creation
|
|
44
|
-
#
|
|
45
|
-
# arff_file_str = <<-END_OF_ARFF_FILE
|
|
46
|
-
#@RELATION MyCoolRelation
|
|
47
|
-
#@ATTRIBUTE Attr0 NUMERIC
|
|
48
|
-
#@ATTRIBUTE subject STRING
|
|
49
|
-
#@ATTRIBUTE Attr2 NUMERIC
|
|
50
|
-
#@ATTRIBUTE Attr3 STRING
|
|
51
|
-
#@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
|
52
|
-
#@DATA
|
|
53
|
-
#{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
|
|
54
|
-
#{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
|
|
55
|
-
#{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
|
|
56
|
-
#{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
|
|
57
|
-
# END_OF_ARFF_FILE
|
|
58
|
-
#
|
|
59
|
-
# arff_file_str.gsub!(/\n$/, '')
|
|
60
|
-
#
|
|
61
|
-
# instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
|
|
62
|
-
# [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
|
63
|
-
# [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
|
64
|
-
# [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
|
|
65
|
-
#
|
|
66
|
-
# rel = Rarff::Relation.new('MyCoolRelation')
|
|
67
|
-
# rel.instances = instances
|
|
68
|
-
# rel.attributes[1].name = 'subject'
|
|
69
|
-
# rel.attributes[4].name = 'birthday'
|
|
70
|
-
# rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
|
71
|
-
#
|
|
72
|
-
# # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
|
|
73
|
-
# assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
|
|
74
|
-
# end
|
|
75
|
-
#
|
|
76
|
-
#
|
|
77
|
-
# # Test parsing of an arff file.
|
|
78
|
-
# def test_arff_parse
|
|
79
|
-
# in_file = './test_arff.arff'
|
|
80
|
-
# rel = Rarff::Relation.new
|
|
81
|
-
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
|
82
|
-
#
|
|
83
|
-
# assert_equal(rel.instances[2][1], 3.2)
|
|
84
|
-
# assert_equal(rel.instances[7][4], 'Iris-setosa')
|
|
85
|
-
# end
|
|
86
|
-
#
|
|
87
|
-
#
|
|
88
|
-
# # Test parsing of sparse ARFF format
|
|
89
|
-
# def test_sparse_arff_parse
|
|
90
|
-
# in_file = './test_sparse_arff.arff'
|
|
91
|
-
# rel = Rarff::Relation.new
|
|
92
|
-
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
|
93
|
-
#
|
|
94
|
-
# assert_equal(13, rel.instances[0].size)
|
|
95
|
-
# assert_equal(0, rel.instances[0][1])
|
|
96
|
-
# assert_equal(7, rel.instances[0][3])
|
|
97
|
-
# assert_equal(2.4, rel.instances[1][1])
|
|
98
|
-
# assert_equal(0, rel.instances[1][2])
|
|
99
|
-
# assert_equal(19, rel.instances[1][12])
|
|
100
|
-
# assert_equal(6, rel.instances[2][6])
|
|
101
|
-
# assert_equal(0, rel.instances[3][12])
|
|
102
|
-
# # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
|
|
103
|
-
# end
|
|
104
|
-
#
|
|
41
|
+
#
|
|
42
|
+
# # Test creation of a sparse arff file string.
|
|
43
|
+
# def test_sparse_arff_creation
|
|
44
|
+
#
|
|
45
|
+
# arff_file_str = <<-END_OF_ARFF_FILE
|
|
46
|
+
#@RELATION MyCoolRelation
|
|
47
|
+
#@ATTRIBUTE Attr0 NUMERIC
|
|
48
|
+
#@ATTRIBUTE subject STRING
|
|
49
|
+
#@ATTRIBUTE Attr2 NUMERIC
|
|
50
|
+
#@ATTRIBUTE Attr3 STRING
|
|
51
|
+
#@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
|
52
|
+
#@DATA
|
|
53
|
+
#{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
|
|
54
|
+
#{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
|
|
55
|
+
#{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
|
|
56
|
+
#{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
|
|
57
|
+
# END_OF_ARFF_FILE
|
|
58
|
+
#
|
|
59
|
+
# arff_file_str.gsub!(/\n$/, '')
|
|
60
|
+
#
|
|
61
|
+
# instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
|
|
62
|
+
# [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
|
63
|
+
# [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
|
64
|
+
# [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
|
|
65
|
+
#
|
|
66
|
+
# rel = Rarff::Relation.new('MyCoolRelation')
|
|
67
|
+
# rel.instances = instances
|
|
68
|
+
# rel.attributes[1].name = 'subject'
|
|
69
|
+
# rel.attributes[4].name = 'birthday'
|
|
70
|
+
# rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
|
71
|
+
#
|
|
72
|
+
# # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
|
|
73
|
+
# assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
|
|
74
|
+
# end
|
|
75
|
+
#
|
|
76
|
+
#
|
|
77
|
+
# # Test parsing of an arff file.
|
|
78
|
+
# def test_arff_parse
|
|
79
|
+
# in_file = './test_arff.arff'
|
|
80
|
+
# rel = Rarff::Relation.new
|
|
81
|
+
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
|
82
|
+
#
|
|
83
|
+
# assert_equal(rel.instances[2][1], 3.2)
|
|
84
|
+
# assert_equal(rel.instances[7][4], 'Iris-setosa')
|
|
85
|
+
# end
|
|
86
|
+
#
|
|
87
|
+
#
|
|
88
|
+
# # Test parsing of sparse ARFF format
|
|
89
|
+
# def test_sparse_arff_parse
|
|
90
|
+
# in_file = './test_sparse_arff.arff'
|
|
91
|
+
# rel = Rarff::Relation.new
|
|
92
|
+
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
|
93
|
+
#
|
|
94
|
+
# assert_equal(13, rel.instances[0].size)
|
|
95
|
+
# assert_equal(0, rel.instances[0][1])
|
|
96
|
+
# assert_equal(7, rel.instances[0][3])
|
|
97
|
+
# assert_equal(2.4, rel.instances[1][1])
|
|
98
|
+
# assert_equal(0, rel.instances[1][2])
|
|
99
|
+
# assert_equal(19, rel.instances[1][12])
|
|
100
|
+
# assert_equal(6, rel.instances[2][6])
|
|
101
|
+
# assert_equal(0, rel.instances[3][12])
|
|
102
|
+
# # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
|
|
103
|
+
# end
|
|
104
|
+
#
|
|
105
105
|
def test_output_missing
|
|
106
106
|
arff_file_str = <<-END_OF_ARFF_FILE
|
|
107
107
|
@RELATION MyCoolRelation
|
|
@@ -157,6 +157,29 @@ class TestArffLib < Test::Unit::TestCase
|
|
|
157
157
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
158
158
|
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
|
159
159
|
end
|
|
160
|
+
|
|
161
|
+
def test_boolean
|
|
162
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
|
163
|
+
@RELATION MyCoolRelation
|
|
164
|
+
@ATTRIBUTE Attr0 NUMERIC
|
|
165
|
+
@ATTRIBUTE subject STRING
|
|
166
|
+
@ATTRIBUTE Attr2 {false,true}
|
|
167
|
+
@DATA
|
|
168
|
+
?, ?, ?
|
|
169
|
+
20.9, ruby, true
|
|
170
|
+
END_OF_ARFF_FILE
|
|
171
|
+
|
|
172
|
+
arff_file_str.gsub!(/\n$/, '')
|
|
173
|
+
|
|
174
|
+
instances = [ [nil, nil, nil],
|
|
175
|
+
[20.9, 'ruby', true]]
|
|
176
|
+
|
|
177
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
|
178
|
+
rel.instances = instances
|
|
179
|
+
rel.attributes[1].name = 'subject'
|
|
180
|
+
|
|
181
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
|
182
|
+
end
|
|
160
183
|
end
|
|
161
184
|
|
|
162
185
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: wwood-rarff
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andy Payne, Ben J Woodcroft
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date:
|
|
12
|
+
date: 2009-01-30 00:00:00 -08:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
@@ -19,7 +19,7 @@ dependencies:
|
|
|
19
19
|
requirements:
|
|
20
20
|
- - ">="
|
|
21
21
|
- !ruby/object:Gem::Version
|
|
22
|
-
version: 1.8.
|
|
22
|
+
version: 1.8.3
|
|
23
23
|
version:
|
|
24
24
|
description: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
|
|
25
25
|
email: apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au
|
|
@@ -36,7 +36,8 @@ files:
|
|
|
36
36
|
- Rakefile
|
|
37
37
|
- lib/rarff.rb
|
|
38
38
|
- test/test_arff.arff
|
|
39
|
-
- test/test_sparse_arff.arff
|
|
39
|
+
- "test/test_sparse_arff.arff "
|
|
40
|
+
- test/ts_rarff.rb
|
|
40
41
|
- test/test_rarff.rb
|
|
41
42
|
has_rdoc: true
|
|
42
43
|
homepage: http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
|
data/test/test_sparse_arff.arff
DELETED
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
% Sample sparse ARFF file
|
|
2
|
-
@RELATION sparseness
|
|
3
|
-
|
|
4
|
-
@ATTRIBUTE attr1 NUMERIC
|
|
5
|
-
@ATTRIBUTE attr2 NUMERIC
|
|
6
|
-
@ATTRIBUTE attr3 NUMERIC
|
|
7
|
-
@ATTRIBUTE attr4 NUMERIC
|
|
8
|
-
@ATTRIBUTE attr5 NUMERIC
|
|
9
|
-
@ATTRIBUTE attr6 NUMERIC
|
|
10
|
-
@ATTRIBUTE attr7 NUMERIC
|
|
11
|
-
@ATTRIBUTE attr8 NUMERIC
|
|
12
|
-
@ATTRIBUTE attr9 NUMERIC
|
|
13
|
-
@ATTRIBUTE attr10 NUMERIC
|
|
14
|
-
@ATTRIBUTE attr11 NUMERIC
|
|
15
|
-
@ATTRIBUTE attr12 NUMERIC
|
|
16
|
-
@ATTRIBUTE attr13 NUMERIC
|
|
17
|
-
|
|
18
|
-
@DATA
|
|
19
|
-
{3 7, 10 34}
|
|
20
|
-
{1 2.4, 4 62, 12 19}
|
|
21
|
-
{0 0, 1 1, 2 2, 3 3, 4 4, 5 5, 6 6, 7 7, 8 8, 9 9, 10 10, 11 11, 12 12}
|
|
22
|
-
{9 42}
|
|
23
|
-
{2 54.3, 3 92, 11 10.2}
|
|
24
|
-
|