wwood-rarff 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/History.txt +4 -0
  2. data/Rakefile +1 -1
  3. data/lib/rarff.rb +31 -8
  4. data/test/test_rarff.rb +64 -2
  5. metadata +3 -2
data/History.txt CHANGED
@@ -1,5 +1,9 @@
1
1
  == Changes
2
2
 
3
+ === 0.2.3
4
+
5
+ * Added set_string_attributes_to_nominal for easy conversion of string attributes to nominal ones
6
+
3
7
  === 0.2.2 (unofficial)
4
8
 
5
9
  * Handles boolean inputs, which are modelled as nominals
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'hoe'
3
3
  #require './lib/rarff.rb'
4
4
 
5
5
  gem_name = 'rarff'
6
- hoe = Hoe.new(gem_name,'0.2.2') do |p|
6
+ hoe = Hoe.new(gem_name,'0.2.3') do |p|
7
7
 
8
8
  p.author = "Andy Payne, Ben J Woodcroft"
9
9
  p.email = "apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au"
data/lib/rarff.rb CHANGED
@@ -63,7 +63,8 @@ module Rarff
63
63
  ################################################################################
64
64
 
65
65
  class Attribute
66
- attr_accessor :name, :type
66
+ attr_accessor :name
67
+ attr_reader :type
67
68
 
68
69
  def initialize(name='', type='')
69
70
  @name = name
@@ -88,7 +89,8 @@ module Rarff
88
89
  @type_is_nominal = true
89
90
  # Example format: "{nom1,nom2, nom3, nom4,nom5 } "
90
91
  # Split on '{' ',' or '}'
91
- @type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
92
+ # @type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
93
+ @type = @type.split(/\s*\,\s*/)
92
94
  end
93
95
  end
94
96
 
@@ -104,7 +106,7 @@ module Rarff
104
106
 
105
107
  def to_arff
106
108
  if @type_is_nominal == true
107
- ATTRIBUTE_MARKER + " #{@name} {#{@type.join(',').gsub(' ','_')}}"
109
+ ATTRIBUTE_MARKER + " #{@name} #{@type.join(',').gsub(' ','_')}"
108
110
  else
109
111
  ATTRIBUTE_MARKER + " #{@name} #{@type}"
110
112
  end
@@ -120,7 +122,8 @@ module Rarff
120
122
 
121
123
 
122
124
  class Relation
123
- attr_accessor :name, :attributes, :instances
125
+ attr_accessor :name, :attributes
126
+ attr_reader :instances
124
127
 
125
128
 
126
129
  def initialize(name='')
@@ -197,10 +200,10 @@ module Rarff
197
200
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
198
201
  elsif col.kind_of?(String)
199
202
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
200
- elsif col.kind_of?(TrueClass) or col.kind_of?(FalseClass) # How come there is no generic BooleanClass?
203
+ elsif col == false or col == true #exactly equal to a boolean
201
204
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_BOOLEAN)
202
205
  else
203
- raise Exception, "Could not parse attribute: #{col.inspect}"
206
+ raise Exception, "Could not parse attribute to ARFF data type: #{col.inspect}"
204
207
  end
205
208
  }
206
209
  }
@@ -212,6 +215,26 @@ module Rarff
212
215
  end
213
216
  end
214
217
 
218
+ # Make all String type attributes into nominal attributes, because
219
+ # they are more useful in WEKA because more techniques handle them than
220
+ # strings
221
+ def set_string_attributes_to_nominal
222
+ nominals = {}
223
+ # Frustratingly, we have to traverse this 2D array with the
224
+ # wrong dimension first. Oh well.
225
+ @instances.each_with_index do |row, row_index|
226
+ row.each_with_index do |string, col_index|
227
+ next unless @attributes[col_index].type == ATTRIBUTE_STRING
228
+
229
+ nominals[col_index] ||= {}
230
+ nominals[col_index][string] ||= true
231
+ end
232
+ end
233
+
234
+ nominals.each do |index, strings|
235
+ @attributes[index].type = "{#{strings.keys.join(',')}}"
236
+ end
237
+ end
215
238
 
216
239
  def expand_sparse(str)
217
240
  arr = Array.new(@attributes.size, 0)
@@ -247,7 +270,7 @@ module Rarff
247
270
 
248
271
  # Do the final output
249
272
  if sparse
250
- if col.nil? or
273
+ if col.nil? or
251
274
  (@attributes[i].type =~ /^#{ATTRIBUTE_NUMERIC}$/i and col == 0)
252
275
  nil
253
276
  else
@@ -267,7 +290,7 @@ module Rarff
267
290
  else
268
291
  mapped.join(", ")
269
292
  end
270
- }.join("\n").gsub(/^/, sparse ? '{' : '').gsub(/$/, sparse ? '}' : '')
293
+ }.join("\n")
271
294
  end
272
295
 
273
296
 
data/test/test_rarff.rb CHANGED
@@ -101,7 +101,6 @@ class TestArffLib < Test::Unit::TestCase
101
101
  # assert_equal(0, rel.instances[3][12])
102
102
  # # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
103
103
  # end
104
- #
105
104
  def test_output_missing
106
105
  arff_file_str = <<-END_OF_ARFF_FILE
107
106
  @RELATION MyCoolRelation
@@ -155,12 +154,75 @@ class TestArffLib < Test::Unit::TestCase
155
154
  rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
156
155
 
157
156
  # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
158
- assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
157
+ assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
159
158
  end
160
159
 
161
160
  def test_boolean
162
161
  arff_file_str = <<-END_OF_ARFF_FILE
163
162
  @RELATION MyCoolRelation
163
+ @ATTRIBUTE Attr0 {false,true}
164
+ @DATA
165
+ true
166
+ END_OF_ARFF_FILE
167
+
168
+ arff_file_str.gsub!(/\n$/, '')
169
+
170
+ instances = [ [true]]
171
+
172
+ rel = Rarff::Relation.new('MyCoolRelation')
173
+ rel.instances = instances
174
+
175
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
176
+ assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
177
+ end
178
+
179
+ def test_boolean_multipl
180
+ arff_file_str = <<-END_OF_ARFF_FILE
181
+ @RELATION MyCoolRelation
182
+ @ATTRIBUTE Attr0 {false,true}
183
+ @ATTRIBUTE Attr1 {false,true}
184
+ @ATTRIBUTE Attr2 {false,true}
185
+ @DATA
186
+ true, false, true
187
+ true, true, true
188
+ END_OF_ARFF_FILE
189
+
190
+ arff_file_str.gsub!(/\n$/, '')
191
+
192
+ instances = [ [true,false,true],[true,true,true]]
193
+
194
+ rel = Rarff::Relation.new('MyCoolRelation')
195
+ rel.instances = instances
196
+
197
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
198
+ assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
199
+ end
200
+
201
+ def test_strings_as_nominal
202
+ arff_file_str = <<-END_OF_ARFF_FILE
203
+ @RELATION MyCoolRelation
204
+ @ATTRIBUTE Attr0 {two,one}
205
+ @ATTRIBUTE Attr1 {three,four}
206
+ @DATA
207
+ one, three
208
+ two, four
209
+ END_OF_ARFF_FILE
210
+
211
+ arff_file_str.gsub!(/\n$/, '')
212
+
213
+ instances = [ ['one','three'],['two','four']]
214
+
215
+ rel = Rarff::Relation.new('MyCoolRelation')
216
+ rel.instances = instances
217
+ rel.set_string_attributes_to_nominal
218
+
219
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
220
+ assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
221
+ end
222
+
223
+ def test_boolean_2
224
+ arff_file_str = <<-END_OF_ARFF_FILE
225
+ @RELATION MyCoolRelation
164
226
  @ATTRIBUTE Attr0 NUMERIC
165
227
  @ATTRIBUTE subject STRING
166
228
  @ATTRIBUTE Attr2 {false,true}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wwood-rarff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Payne, Ben J Woodcroft
@@ -9,11 +9,12 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-30 00:00:00 -08:00
12
+ date: 2009-02-18 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: hoe
17
+ type: :development
17
18
  version_requirement:
18
19
  version_requirements: !ruby/object:Gem::Requirement
19
20
  requirements: