wwood-rarff 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,5 +1,10 @@
1
1
  == Changes
2
2
 
3
+ === 0.2.2 (unofficial)
4
+
5
+ * Handles boolean inputs, which are modelled as nominals
6
+ * Handles spaces in nominals, which are replaced by underscores. Probably should be quoting these, but is good enough for me right now
7
+
3
8
  === 0.2.1 (unofficial)
4
9
 
5
10
  * Handles missing data in output, encoded internally as nil values
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'hoe'
3
3
  #require './lib/rarff.rb'
4
4
 
5
5
  gem_name = 'rarff'
6
- hoe = Hoe.new(gem_name,'0.2.1') do |p|
6
+ hoe = Hoe.new(gem_name,'0.2.2') do |p|
7
7
 
8
8
  p.author = "Andy Payne, Ben J Woodcroft"
9
9
  p.email = "apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au"
data/lib/rarff.rb CHANGED
@@ -53,6 +53,10 @@ module Rarff
53
53
  ATTRIBUTE_INTEGER = 'INTEGER'
54
54
  ATTRIBUTE_STRING = 'STRING'
55
55
  ATTRIBUTE_DATE = 'DATE'
56
+ # Model Boolean as a Nominal Attribute.
57
+ # Use {false, true} not {true, false} because then in visualisations in Weka
58
+ # true is to the right, which makes more intuitive sense
59
+ ATTRIBUTE_BOOLEAN = '{false, true}'
56
60
 
57
61
  MISSING = '?'
58
62
 
@@ -100,7 +104,7 @@ module Rarff
100
104
 
101
105
  def to_arff
102
106
  if @type_is_nominal == true
103
- ATTRIBUTE_MARKER + " #{@name} #{@type.join(',')}"
107
+ ATTRIBUTE_MARKER + " #{@name} {#{@type.join(',').gsub(' ','_')}}"
104
108
  else
105
109
  ATTRIBUTE_MARKER + " #{@name} #{@type}"
106
110
  end
@@ -193,6 +197,8 @@ module Rarff
193
197
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
194
198
  elsif col.kind_of?(String)
195
199
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
200
+ elsif col.kind_of?(TrueClass) or col.kind_of?(FalseClass) # How come there is no generic BooleanClass?
201
+ @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_BOOLEAN)
196
202
  else
197
203
  raise Exception, "Could not parse attribute: #{col.inspect}"
198
204
  end
@@ -219,7 +225,8 @@ module Rarff
219
225
 
220
226
  def to_arff(sparse=false)
221
227
  RELATION_MARKER + " #{@name}\n" +
222
- @attributes.map{ |attr| attr.to_arff }.join("\n") +
228
+ # @attributes.map{ |attr| attr.to_arff }.join("\n") +
229
+ @attributes.join("\n") +
223
230
  "\n" +
224
231
  DATA_MARKER + "\n" +
225
232
 
data/test/test_rarff.rb CHANGED
@@ -38,70 +38,70 @@ class TestArffLib < Test::Unit::TestCase
38
38
  # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
39
39
  assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
40
40
  end
41
- #
42
- # # Test creation of a sparse arff file string.
43
- # def test_sparse_arff_creation
44
- #
45
- # arff_file_str = <<-END_OF_ARFF_FILE
46
- #@RELATION MyCoolRelation
47
- #@ATTRIBUTE Attr0 NUMERIC
48
- #@ATTRIBUTE subject STRING
49
- #@ATTRIBUTE Attr2 NUMERIC
50
- #@ATTRIBUTE Attr3 STRING
51
- #@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
52
- #@DATA
53
- #{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
54
- #{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
55
- #{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
56
- #{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
57
- # END_OF_ARFF_FILE
58
- #
59
- # arff_file_str.gsub!(/\n$/, '')
60
- #
61
- # instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
62
- # [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
63
- # [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
64
- # [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
65
- #
66
- # rel = Rarff::Relation.new('MyCoolRelation')
67
- # rel.instances = instances
68
- # rel.attributes[1].name = 'subject'
69
- # rel.attributes[4].name = 'birthday'
70
- # rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
71
- #
72
- # # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
73
- # assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
74
- # end
75
- #
76
- #
77
- # # Test parsing of an arff file.
78
- # def test_arff_parse
79
- # in_file = './test_arff.arff'
80
- # rel = Rarff::Relation.new
81
- # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
82
- #
83
- # assert_equal(rel.instances[2][1], 3.2)
84
- # assert_equal(rel.instances[7][4], 'Iris-setosa')
85
- # end
86
- #
87
- #
88
- # # Test parsing of sparse ARFF format
89
- # def test_sparse_arff_parse
90
- # in_file = './test_sparse_arff.arff'
91
- # rel = Rarff::Relation.new
92
- # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
93
- #
94
- # assert_equal(13, rel.instances[0].size)
95
- # assert_equal(0, rel.instances[0][1])
96
- # assert_equal(7, rel.instances[0][3])
97
- # assert_equal(2.4, rel.instances[1][1])
98
- # assert_equal(0, rel.instances[1][2])
99
- # assert_equal(19, rel.instances[1][12])
100
- # assert_equal(6, rel.instances[2][6])
101
- # assert_equal(0, rel.instances[3][12])
102
- # # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
103
- # end
104
- #
41
+ #
42
+ # # Test creation of a sparse arff file string.
43
+ # def test_sparse_arff_creation
44
+ #
45
+ # arff_file_str = <<-END_OF_ARFF_FILE
46
+ #@RELATION MyCoolRelation
47
+ #@ATTRIBUTE Attr0 NUMERIC
48
+ #@ATTRIBUTE subject STRING
49
+ #@ATTRIBUTE Attr2 NUMERIC
50
+ #@ATTRIBUTE Attr3 STRING
51
+ #@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
52
+ #@DATA
53
+ #{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
54
+ #{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
55
+ #{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
56
+ #{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
57
+ # END_OF_ARFF_FILE
58
+ #
59
+ # arff_file_str.gsub!(/\n$/, '')
60
+ #
61
+ # instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
62
+ # [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
63
+ # [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
64
+ # [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
65
+ #
66
+ # rel = Rarff::Relation.new('MyCoolRelation')
67
+ # rel.instances = instances
68
+ # rel.attributes[1].name = 'subject'
69
+ # rel.attributes[4].name = 'birthday'
70
+ # rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
71
+ #
72
+ # # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
73
+ # assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
74
+ # end
75
+ #
76
+ #
77
+ # # Test parsing of an arff file.
78
+ # def test_arff_parse
79
+ # in_file = './test_arff.arff'
80
+ # rel = Rarff::Relation.new
81
+ # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
82
+ #
83
+ # assert_equal(rel.instances[2][1], 3.2)
84
+ # assert_equal(rel.instances[7][4], 'Iris-setosa')
85
+ # end
86
+ #
87
+ #
88
+ # # Test parsing of sparse ARFF format
89
+ # def test_sparse_arff_parse
90
+ # in_file = './test_sparse_arff.arff'
91
+ # rel = Rarff::Relation.new
92
+ # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
93
+ #
94
+ # assert_equal(13, rel.instances[0].size)
95
+ # assert_equal(0, rel.instances[0][1])
96
+ # assert_equal(7, rel.instances[0][3])
97
+ # assert_equal(2.4, rel.instances[1][1])
98
+ # assert_equal(0, rel.instances[1][2])
99
+ # assert_equal(19, rel.instances[1][12])
100
+ # assert_equal(6, rel.instances[2][6])
101
+ # assert_equal(0, rel.instances[3][12])
102
+ # # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
103
+ # end
104
+ #
105
105
  def test_output_missing
106
106
  arff_file_str = <<-END_OF_ARFF_FILE
107
107
  @RELATION MyCoolRelation
@@ -157,6 +157,29 @@ class TestArffLib < Test::Unit::TestCase
157
157
  # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
158
158
  assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
159
159
  end
160
+
161
+ def test_boolean
162
+ arff_file_str = <<-END_OF_ARFF_FILE
163
+ @RELATION MyCoolRelation
164
+ @ATTRIBUTE Attr0 NUMERIC
165
+ @ATTRIBUTE subject STRING
166
+ @ATTRIBUTE Attr2 {false,true}
167
+ @DATA
168
+ ?, ?, ?
169
+ 20.9, ruby, true
170
+ END_OF_ARFF_FILE
171
+
172
+ arff_file_str.gsub!(/\n$/, '')
173
+
174
+ instances = [ [nil, nil, nil],
175
+ [20.9, 'ruby', true]]
176
+
177
+ rel = Rarff::Relation.new('MyCoolRelation')
178
+ rel.instances = instances
179
+ rel.attributes[1].name = 'subject'
180
+
181
+ assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
182
+ end
160
183
  end
161
184
 
162
185
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wwood-rarff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Payne, Ben J Woodcroft
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-11-25 00:00:00 -08:00
12
+ date: 2009-01-30 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -19,7 +19,7 @@ dependencies:
19
19
  requirements:
20
20
  - - ">="
21
21
  - !ruby/object:Gem::Version
22
- version: 1.8.2
22
+ version: 1.8.3
23
23
  version:
24
24
  description: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
25
25
  email: apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au
@@ -36,7 +36,8 @@ files:
36
36
  - Rakefile
37
37
  - lib/rarff.rb
38
38
  - test/test_arff.arff
39
- - test/test_sparse_arff.arff
39
+ - "test/test_sparse_arff.arff "
40
+ - test/ts_rarff.rb
40
41
  - test/test_rarff.rb
41
42
  has_rdoc: true
42
43
  homepage: http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
@@ -1,24 +0,0 @@
1
- % Sample sparse ARFF file
2
- @RELATION sparseness
3
-
4
- @ATTRIBUTE attr1 NUMERIC
5
- @ATTRIBUTE attr2 NUMERIC
6
- @ATTRIBUTE attr3 NUMERIC
7
- @ATTRIBUTE attr4 NUMERIC
8
- @ATTRIBUTE attr5 NUMERIC
9
- @ATTRIBUTE attr6 NUMERIC
10
- @ATTRIBUTE attr7 NUMERIC
11
- @ATTRIBUTE attr8 NUMERIC
12
- @ATTRIBUTE attr9 NUMERIC
13
- @ATTRIBUTE attr10 NUMERIC
14
- @ATTRIBUTE attr11 NUMERIC
15
- @ATTRIBUTE attr12 NUMERIC
16
- @ATTRIBUTE attr13 NUMERIC
17
-
18
- @DATA
19
- {3 7, 10 34}
20
- {1 2.4, 4 62, 12 19}
21
- {0 0, 1 1, 2 2, 3 3, 4 4, 5 5, 6 6, 7 7, 8 8, 9 9, 10 10, 11 11, 12 12}
22
- {9 42}
23
- {2 54.3, 3 92, 11 10.2}
24
-