wwood-rarff 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,5 +1,10 @@
1
1
  == Changes
2
2
 
3
+ === 0.2.2 (unofficial)
4
+
5
+ * Handles boolean inputs, which are modelled as nominals
6
+ * Handles spaces in nominals, which are replaced by underscores. Probably should be quoting these, but is good enough for me right now
7
+
3
8
  === 0.2.1 (unofficial)
4
9
 
5
10
  * Handles missing data in output, encoded internally as nil values
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'hoe'
3
3
  #require './lib/rarff.rb'
4
4
 
5
5
  gem_name = 'rarff'
6
- hoe = Hoe.new(gem_name,'0.2.1') do |p|
6
+ hoe = Hoe.new(gem_name,'0.2.2') do |p|
7
7
 
8
8
  p.author = "Andy Payne, Ben J Woodcroft"
9
9
  p.email = "apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au"
data/lib/rarff.rb CHANGED
@@ -53,6 +53,10 @@ module Rarff
53
53
  ATTRIBUTE_INTEGER = 'INTEGER'
54
54
  ATTRIBUTE_STRING = 'STRING'
55
55
  ATTRIBUTE_DATE = 'DATE'
56
+ # Model Boolean as a Nominal Attribute.
57
+ # Use {false, true} not {true, false} because then in visualisations in Weka
58
+ # true is to the right, which makes more intuitive sense
59
+ ATTRIBUTE_BOOLEAN = '{false, true}'
56
60
 
57
61
  MISSING = '?'
58
62
 
@@ -100,7 +104,7 @@ module Rarff
100
104
 
101
105
  def to_arff
102
106
  if @type_is_nominal == true
103
- ATTRIBUTE_MARKER + " #{@name} #{@type.join(',')}"
107
+ ATTRIBUTE_MARKER + " #{@name} {#{@type.join(',').gsub(' ','_')}}"
104
108
  else
105
109
  ATTRIBUTE_MARKER + " #{@name} #{@type}"
106
110
  end
@@ -193,6 +197,8 @@ module Rarff
193
197
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
194
198
  elsif col.kind_of?(String)
195
199
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
200
+ elsif col.kind_of?(TrueClass) or col.kind_of?(FalseClass) # How come there is no generic BooleanClass?
201
+ @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_BOOLEAN)
196
202
  else
197
203
  raise Exception, "Could not parse attribute: #{col.inspect}"
198
204
  end
@@ -219,7 +225,8 @@ module Rarff
219
225
 
220
226
  def to_arff(sparse=false)
221
227
  RELATION_MARKER + " #{@name}\n" +
222
- @attributes.map{ |attr| attr.to_arff }.join("\n") +
228
+ # @attributes.map{ |attr| attr.to_arff }.join("\n") +
229
+ @attributes.join("\n") +
223
230
  "\n" +
224
231
  DATA_MARKER + "\n" +
225
232
 
data/test/test_rarff.rb CHANGED
@@ -38,70 +38,70 @@ class TestArffLib < Test::Unit::TestCase
38
38
  # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
39
39
  assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
40
40
  end
41
- #
42
- # # Test creation of a sparse arff file string.
43
- # def test_sparse_arff_creation
44
- #
45
- # arff_file_str = <<-END_OF_ARFF_FILE
46
- #@RELATION MyCoolRelation
47
- #@ATTRIBUTE Attr0 NUMERIC
48
- #@ATTRIBUTE subject STRING
49
- #@ATTRIBUTE Attr2 NUMERIC
50
- #@ATTRIBUTE Attr3 STRING
51
- #@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
52
- #@DATA
53
- #{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
54
- #{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
55
- #{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
56
- #{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
57
- # END_OF_ARFF_FILE
58
- #
59
- # arff_file_str.gsub!(/\n$/, '')
60
- #
61
- # instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
62
- # [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
63
- # [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
64
- # [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
65
- #
66
- # rel = Rarff::Relation.new('MyCoolRelation')
67
- # rel.instances = instances
68
- # rel.attributes[1].name = 'subject'
69
- # rel.attributes[4].name = 'birthday'
70
- # rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
71
- #
72
- # # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
73
- # assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
74
- # end
75
- #
76
- #
77
- # # Test parsing of an arff file.
78
- # def test_arff_parse
79
- # in_file = './test_arff.arff'
80
- # rel = Rarff::Relation.new
81
- # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
82
- #
83
- # assert_equal(rel.instances[2][1], 3.2)
84
- # assert_equal(rel.instances[7][4], 'Iris-setosa')
85
- # end
86
- #
87
- #
88
- # # Test parsing of sparse ARFF format
89
- # def test_sparse_arff_parse
90
- # in_file = './test_sparse_arff.arff'
91
- # rel = Rarff::Relation.new
92
- # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
93
- #
94
- # assert_equal(13, rel.instances[0].size)
95
- # assert_equal(0, rel.instances[0][1])
96
- # assert_equal(7, rel.instances[0][3])
97
- # assert_equal(2.4, rel.instances[1][1])
98
- # assert_equal(0, rel.instances[1][2])
99
- # assert_equal(19, rel.instances[1][12])
100
- # assert_equal(6, rel.instances[2][6])
101
- # assert_equal(0, rel.instances[3][12])
102
- # # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
103
- # end
104
- #
41
+ #
42
+ # # Test creation of a sparse arff file string.
43
+ # def test_sparse_arff_creation
44
+ #
45
+ # arff_file_str = <<-END_OF_ARFF_FILE
46
+ #@RELATION MyCoolRelation
47
+ #@ATTRIBUTE Attr0 NUMERIC
48
+ #@ATTRIBUTE subject STRING
49
+ #@ATTRIBUTE Attr2 NUMERIC
50
+ #@ATTRIBUTE Attr3 STRING
51
+ #@ATTRIBUTE birthday DATE "yyyy-MM-dd HH:mm:ss"
52
+ #@DATA
53
+ #{0 1.4, 1 'foo bar', 3 baz, 4 "1900-08-08 12:12:12"}
54
+ #{0 20.9, 1 ruby, 2 46, 3 rocks, 4 "2005-10-23 12:12:12"}
55
+ #{1 ruby, 2 46, 3 rocks, 4 "2001-02-19 12:12:12"}
56
+ #{0 68.1, 1 stuff, 3 'is cool', 4 "1974-02-10 12:12:12"}
57
+ # END_OF_ARFF_FILE
58
+ #
59
+ # arff_file_str.gsub!(/\n$/, '')
60
+ #
61
+ # instances = [ [1.4, 'foo bar', 0, 'baz', "1900-08-08 12:12:12"],
62
+ # [20.9, 'ruby', 46, 'rocks', "2005-10-23 12:12:12"],
63
+ # [0.0, 'ruby', 46, 'rocks', "2001-02-19 12:12:12"],
64
+ # [68.1, 'stuff', 0, 'is cool', "1974-02-10 12:12:12"]]
65
+ #
66
+ # rel = Rarff::Relation.new('MyCoolRelation')
67
+ # rel.instances = instances
68
+ # rel.attributes[1].name = 'subject'
69
+ # rel.attributes[4].name = 'birthday'
70
+ # rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
71
+ #
72
+ # # puts "rel.to_arff(true):\n(\n#{rel.to_arff(true)}\n)\n"
73
+ # assert_equal( arff_file_str, rel.to_arff(true), "test_sparse_arff_creation.")
74
+ # end
75
+ #
76
+ #
77
+ # # Test parsing of an arff file.
78
+ # def test_arff_parse
79
+ # in_file = './test_arff.arff'
80
+ # rel = Rarff::Relation.new
81
+ # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
82
+ #
83
+ # assert_equal(rel.instances[2][1], 3.2)
84
+ # assert_equal(rel.instances[7][4], 'Iris-setosa')
85
+ # end
86
+ #
87
+ #
88
+ # # Test parsing of sparse ARFF format
89
+ # def test_sparse_arff_parse
90
+ # in_file = './test_sparse_arff.arff'
91
+ # rel = Rarff::Relation.new
92
+ # rel.parse(File.open(File.join(File.dirname(__FILE__),in_file)).read)
93
+ #
94
+ # assert_equal(13, rel.instances[0].size)
95
+ # assert_equal(0, rel.instances[0][1])
96
+ # assert_equal(7, rel.instances[0][3])
97
+ # assert_equal(2.4, rel.instances[1][1])
98
+ # assert_equal(0, rel.instances[1][2])
99
+ # assert_equal(19, rel.instances[1][12])
100
+ # assert_equal(6, rel.instances[2][6])
101
+ # assert_equal(0, rel.instances[3][12])
102
+ # # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
103
+ # end
104
+ #
105
105
  def test_output_missing
106
106
  arff_file_str = <<-END_OF_ARFF_FILE
107
107
  @RELATION MyCoolRelation
@@ -157,6 +157,29 @@ class TestArffLib < Test::Unit::TestCase
157
157
  # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
158
158
  assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
159
159
  end
160
+
161
+ def test_boolean
162
+ arff_file_str = <<-END_OF_ARFF_FILE
163
+ @RELATION MyCoolRelation
164
+ @ATTRIBUTE Attr0 NUMERIC
165
+ @ATTRIBUTE subject STRING
166
+ @ATTRIBUTE Attr2 {false,true}
167
+ @DATA
168
+ ?, ?, ?
169
+ 20.9, ruby, true
170
+ END_OF_ARFF_FILE
171
+
172
+ arff_file_str.gsub!(/\n$/, '')
173
+
174
+ instances = [ [nil, nil, nil],
175
+ [20.9, 'ruby', true]]
176
+
177
+ rel = Rarff::Relation.new('MyCoolRelation')
178
+ rel.instances = instances
179
+ rel.attributes[1].name = 'subject'
180
+
181
+ assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
182
+ end
160
183
  end
161
184
 
162
185
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wwood-rarff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Payne, Ben J Woodcroft
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-11-25 00:00:00 -08:00
12
+ date: 2009-01-30 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -19,7 +19,7 @@ dependencies:
19
19
  requirements:
20
20
  - - ">="
21
21
  - !ruby/object:Gem::Version
22
- version: 1.8.2
22
+ version: 1.8.3
23
23
  version:
24
24
  description: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify data sets for data mining and machine learning.
25
25
  email: apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au
@@ -36,7 +36,8 @@ files:
36
36
  - Rakefile
37
37
  - lib/rarff.rb
38
38
  - test/test_arff.arff
39
- - test/test_sparse_arff.arff
39
+ - "test/test_sparse_arff.arff "
40
+ - test/ts_rarff.rb
40
41
  - test/test_rarff.rb
41
42
  has_rdoc: true
42
43
  homepage: http://adenserparlance.blogspot.com/2007/01/rarff-simple-arff-library-in-ruby.html
@@ -1,24 +0,0 @@
1
- % Sample sparse ARFF file
2
- @RELATION sparseness
3
-
4
- @ATTRIBUTE attr1 NUMERIC
5
- @ATTRIBUTE attr2 NUMERIC
6
- @ATTRIBUTE attr3 NUMERIC
7
- @ATTRIBUTE attr4 NUMERIC
8
- @ATTRIBUTE attr5 NUMERIC
9
- @ATTRIBUTE attr6 NUMERIC
10
- @ATTRIBUTE attr7 NUMERIC
11
- @ATTRIBUTE attr8 NUMERIC
12
- @ATTRIBUTE attr9 NUMERIC
13
- @ATTRIBUTE attr10 NUMERIC
14
- @ATTRIBUTE attr11 NUMERIC
15
- @ATTRIBUTE attr12 NUMERIC
16
- @ATTRIBUTE attr13 NUMERIC
17
-
18
- @DATA
19
- {3 7, 10 34}
20
- {1 2.4, 4 62, 12 19}
21
- {0 0, 1 1, 2 2, 3 3, 4 4, 5 5, 6 6, 7 7, 8 8, 9 9, 10 10, 11 11, 12 12}
22
- {9 42}
23
- {2 54.3, 3 92, 11 10.2}
24
-