wwood-rarff 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/Rakefile +1 -1
- data/lib/rarff.rb +9 -2
- data/test/test_rarff.rb +87 -64
- metadata +5 -4
- data/test/test_sparse_arff.arff +0 -24
data/History.txt
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
== Changes
|
2
2
|
|
3
|
+
=== 0.2.2 (unofficial)
|
4
|
+
|
5
|
+
* Handles boolean inputs, which are modelled as nominals
|
6
|
+
* Handles spaces in nominals, which are replaced by underscores. Probably should be quoting these, but is good enough for me right now
|
7
|
+
|
3
8
|
=== 0.2.1 (unofficial)
|
4
9
|
|
5
10
|
* Handles missing data in output, encoded internally as nil values
|
data/Rakefile
CHANGED
data/lib/rarff.rb
CHANGED
@@ -53,6 +53,10 @@ module Rarff
|
|
53
53
|
ATTRIBUTE_INTEGER = 'INTEGER'
|
54
54
|
ATTRIBUTE_STRING = 'STRING'
|
55
55
|
ATTRIBUTE_DATE = 'DATE'
|
56
|
+
# Model Boolean as a Nominal Attribute.
|
57
|
+
# Use {false, true} not {true, false} because then in visualisations in Weka
|
58
|
+
# true is to the right, which makes more intuitive sense
|
59
|
+
ATTRIBUTE_BOOLEAN = '{false, true}'
|
56
60
|
|
57
61
|
MISSING = '?'
|
58
62
|
|
@@ -100,7 +104,7 @@ module Rarff
|
|
100
104
|
|
101
105
|
def to_arff
|
102
106
|
if @type_is_nominal == true
|
103
|
-
ATTRIBUTE_MARKER + " #{@name} #{@type.join(',')}"
|
107
|
+
ATTRIBUTE_MARKER + " #{@name} {#{@type.join(',').gsub(' ','_')}}"
|
104
108
|
else
|
105
109
|
ATTRIBUTE_MARKER + " #{@name} #{@type}"
|
106
110
|
end
|
@@ -193,6 +197,8 @@ module Rarff
|
|
193
197
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
|
194
198
|
elsif col.kind_of?(String)
|
195
199
|
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
|
200
|
+
elsif col.kind_of?(TrueClass) or col.kind_of?(FalseClass) # How come there is no generic BooleanClass?
|
201
|
+
@attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_BOOLEAN)
|
196
202
|
else
|
197
203
|
raise Exception, "Could not parse attribute: #{col.inspect}"
|
198
204
|
end
|
@@ -219,7 +225,8 @@ module Rarff
|
|
219
225
|
|
220
226
|
def to_arff(sparse=false)
|
221
227
|
RELATION_MARKER + " #{@name}\n" +
|
222
|
-
@attributes.map{ |attr| attr.to_arff }.join("\n") +
|
228
|
+
# @attributes.map{ |attr| attr.to_arff }.join("\n") +
|
229
|
+
@attributes.join("\n") +
|
223
230
|
"\n" +
|
224
231
|
DATA_MARKER + "\n" +
|
225
232
|
|
data/test/test_rarff.rb
CHANGED
@@ -38,70 +38,70 @@ class TestArffLib < Test::Unit::TestCase
|
|
38
38
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
39
39
|
assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
|
40
40
|
end
|
41
|
-
#
|
42
|
-
# # Test creation of a sparse arff file string.
|
43
|
-
# def test_sparse_arff_creation
|
44
|
-
#
|
45
|
-
# arff_file_str = <<-END_OF_ARFF_FILE
|
46
|
-
#@RELATION MyCoolRelation
|
47
|
-
#@ATTRIBUTE Attr0 NUMERIC
|
48
|
-
#@ATTRIBUTE subject STRING
|
49
|
-
#@ATTRIBUTE Attr2 NUMERIC
|
50
|
-
#@ATTRIBUTE Attr3 STRING
|
51
|
-
#@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
52
|
-
#@DATA
|
53
|
-
#{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
|
54
|
-
#{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
|
55
|
-
#{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
|
56
|
-
#{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
|
57
|
-
# END_OF_ARFF_FILE
|
58
|
-
#
|
59
|
-
# arff_file_str.gsub!(/\n$/, '')
|
60
|
-
#
|
61
|
-
# instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
|
62
|
-
# [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
63
|
-
# [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
64
|
-
# [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
|
65
|
-
#
|
66
|
-
# rel = Rarff::Relation.new('MyCoolRelation')
|
67
|
-
# rel.instances = instances
|
68
|
-
# rel.attributes[1].name = 'subject'
|
69
|
-
# rel.attributes[4].name = 'birthday'
|
70
|
-
# rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
71
|
-
#
|
72
|
-
# # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
|
73
|
-
# assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
|
74
|
-
# end
|
75
|
-
#
|
76
|
-
#
|
77
|
-
# # Test parsing of an arff file.
|
78
|
-
# def test_arff_parse
|
79
|
-
# in_file = './test_arff.arff'
|
80
|
-
# rel = Rarff::Relation.new
|
81
|
-
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
82
|
-
#
|
83
|
-
# assert_equal(rel.instances[2][1], 3.2)
|
84
|
-
# assert_equal(rel.instances[7][4], 'Iris-setosa')
|
85
|
-
# end
|
86
|
-
#
|
87
|
-
#
|
88
|
-
# # Test parsing of sparse ARFF format
|
89
|
-
# def test_sparse_arff_parse
|
90
|
-
# in_file = './test_sparse_arff.arff'
|
91
|
-
# rel = Rarff::Relation.new
|
92
|
-
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
93
|
-
#
|
94
|
-
# assert_equal(13, rel.instances[0].size)
|
95
|
-
# assert_equal(0, rel.instances[0][1])
|
96
|
-
# assert_equal(7, rel.instances[0][3])
|
97
|
-
# assert_equal(2.4, rel.instances[1][1])
|
98
|
-
# assert_equal(0, rel.instances[1][2])
|
99
|
-
# assert_equal(19, rel.instances[1][12])
|
100
|
-
# assert_equal(6, rel.instances[2][6])
|
101
|
-
# assert_equal(0, rel.instances[3][12])
|
102
|
-
# # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
|
103
|
-
# end
|
104
|
-
#
|
41
|
+
#
|
42
|
+
# # Test creation of a sparse arff file string.
|
43
|
+
# def test_sparse_arff_creation
|
44
|
+
#
|
45
|
+
# arff_file_str = <<-END_OF_ARFF_FILE
|
46
|
+
#@RELATION MyCoolRelation
|
47
|
+
#@ATTRIBUTE Attr0 NUMERIC
|
48
|
+
#@ATTRIBUTE subject STRING
|
49
|
+
#@ATTRIBUTE Attr2 NUMERIC
|
50
|
+
#@ATTRIBUTE Attr3 STRING
|
51
|
+
#@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
|
52
|
+
#@DATA
|
53
|
+
#{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
|
54
|
+
#{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
|
55
|
+
#{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
|
56
|
+
#{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
|
57
|
+
# END_OF_ARFF_FILE
|
58
|
+
#
|
59
|
+
# arff_file_str.gsub!(/\n$/, '')
|
60
|
+
#
|
61
|
+
# instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
|
62
|
+
# [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
|
63
|
+
# [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
|
64
|
+
# [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
|
65
|
+
#
|
66
|
+
# rel = Rarff::Relation.new('MyCoolRelation')
|
67
|
+
# rel.instances = instances
|
68
|
+
# rel.attributes[1].name = 'subject'
|
69
|
+
# rel.attributes[4].name = 'birthday'
|
70
|
+
# rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
|
71
|
+
#
|
72
|
+
# # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
|
73
|
+
# assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
|
74
|
+
# end
|
75
|
+
#
|
76
|
+
#
|
77
|
+
# # Test parsing of an arff file.
|
78
|
+
# def test_arff_parse
|
79
|
+
# in_file = './test_arff.arff'
|
80
|
+
# rel = Rarff::Relation.new
|
81
|
+
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
82
|
+
#
|
83
|
+
# assert_equal(rel.instances[2][1], 3.2)
|
84
|
+
# assert_equal(rel.instances[7][4], 'Iris-setosa')
|
85
|
+
# end
|
86
|
+
#
|
87
|
+
#
|
88
|
+
# # Test parsing of sparse ARFF format
|
89
|
+
# def test_sparse_arff_parse
|
90
|
+
# in_file = './test_sparse_arff.arff'
|
91
|
+
# rel = Rarff::Relation.new
|
92
|
+
# rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
|
93
|
+
#
|
94
|
+
# assert_equal(13, rel.instances[0].size)
|
95
|
+
# assert_equal(0, rel.instances[0][1])
|
96
|
+
# assert_equal(7, rel.instances[0][3])
|
97
|
+
# assert_equal(2.4, rel.instances[1][1])
|
98
|
+
# assert_equal(0, rel.instances[1][2])
|
99
|
+
# assert_equal(19, rel.instances[1][12])
|
100
|
+
# assert_equal(6, rel.instances[2][6])
|
101
|
+
# assert_equal(0, rel.instances[3][12])
|
102
|
+
# # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
|
103
|
+
# end
|
104
|
+
#
|
105
105
|
def test_output_missing
|
106
106
|
arff_file_str = <<-END_OF_ARFF_FILE
|
107
107
|
@RELATION MyCoolRelation
|
@@ -157,6 +157,29 @@ class TestArffLib < Test::Unit::TestCase
|
|
157
157
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
158
158
|
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
159
159
|
end
|
160
|
+
|
161
|
+
def test_boolean
|
162
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
163
|
+
@RELATION MyCoolRelation
|
164
|
+
@ATTRIBUTE Attr0 NUMERIC
|
165
|
+
@ATTRIBUTE subject STRING
|
166
|
+
@ATTRIBUTE Attr2 {false,true}
|
167
|
+
@DATA
|
168
|
+
?, ?, ?
|
169
|
+
20.9, ruby, true
|
170
|
+
END_OF_ARFF_FILE
|
171
|
+
|
172
|
+
arff_file_str.gsub!(/\n$/, '')
|
173
|
+
|
174
|
+
instances = [ [nil, nil, nil],
|
175
|
+
[20.9, 'ruby', true]]
|
176
|
+
|
177
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
178
|
+
rel.instances = instances
|
179
|
+
rel.attributes[1].name = 'subject'
|
180
|
+
|
181
|
+
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
182
|
+
end
|
160
183
|
end
|
161
184
|
|
162
185
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wwood-rarff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Payne, Ben J Woodcroft
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-01-30 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
requirements:
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 1.8.
|
22
|
+
version: 1.8.3
|
23
23
|
version:
|
24
24
|
description: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
|
25
25
|
email: apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au
|
@@ -36,7 +36,8 @@ files:
|
|
36
36
|
- Rakefile
|
37
37
|
- lib/rarff.rb
|
38
38
|
- test/test_arff.arff
|
39
|
-
- test/test_sparse_arff.arff
|
39
|
+
- "test/test_sparse_arff.arff "
|
40
|
+
- test/ts_rarff.rb
|
40
41
|
- test/test_rarff.rb
|
41
42
|
has_rdoc: true
|
42
43
|
homepage: http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
|
data/test/test_sparse_arff.arff
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
% Sample sparse ARFF file
|
2
|
-
@RELATION sparseness
|
3
|
-
|
4
|
-
@ATTRIBUTE attr1 NUMERIC
|
5
|
-
@ATTRIBUTE attr2 NUMERIC
|
6
|
-
@ATTRIBUTE attr3 NUMERIC
|
7
|
-
@ATTRIBUTE attr4 NUMERIC
|
8
|
-
@ATTRIBUTE attr5 NUMERIC
|
9
|
-
@ATTRIBUTE attr6 NUMERIC
|
10
|
-
@ATTRIBUTE attr7 NUMERIC
|
11
|
-
@ATTRIBUTE attr8 NUMERIC
|
12
|
-
@ATTRIBUTE attr9 NUMERIC
|
13
|
-
@ATTRIBUTE attr10 NUMERIC
|
14
|
-
@ATTRIBUTE attr11 NUMERIC
|
15
|
-
@ATTRIBUTE attr12 NUMERIC
|
16
|
-
@ATTRIBUTE attr13 NUMERIC
|
17
|
-
|
18
|
-
@DATA
|
19
|
-
{3 7, 10 34}
|
20
|
-
{1 2.4, 4 62, 12 19}
|
21
|
-
{0 0, 1 1, 2 2, 3 3, 4 4, 5 5, 6 6, 7 7, 8 8, 9 9, 10 10, 11 11, 12 12}
|
22
|
-
{9 42}
|
23
|
-
{2 54.3, 3 92, 11 10.2}
|
24
|
-
|