wwood-rarff 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/History.txt +4 -0
  2. data/Rakefile +1 -1
  3. data/lib/rarff.rb +31 -8
  4. data/test/test_rarff.rb +64 -2
  5. metadata +3 -2
data/History.txt CHANGED
@@ -1,5 +1,9 @@
1
1
  == Changes
2
2
 
3
+ === 0.2.3
4
+
5
+ * Added set_string_attributes_to_nominal for easy conversion of string attributes to nominal ones
6
+
3
7
  === 0.2.2 (unofficial)
4
8
 
5
9
  * Handles boolean inputs, which are modelled as nominals
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'hoe'
3
3
  #require './lib/rarff.rb'
4
4
 
5
5
  gem_name = 'rarff'
6
- hoe = Hoe.new(gem_name,'0.2.2') do |p|
6
+ hoe = Hoe.new(gem_name,'0.2.3') do |p|
7
7
 
8
8
  p.author = "Andy Payne, Ben J Woodcroft"
9
9
  p.email = "apayne .at. gmail.com, b.woodcroft@pgrad.unimelb.edu.au"
data/lib/rarff.rb CHANGED
@@ -63,7 +63,8 @@ module Rarff
63
63
  ################################################################################
64
64
 
65
65
  class Attribute
66
- attr_accessor :name, :type
66
+ attr_accessor :name
67
+ attr_reader :type
67
68
 
68
69
  def initialize(name='', type='')
69
70
  @name = name
@@ -88,7 +89,8 @@ module Rarff
88
89
  @type_is_nominal = true
89
90
  # Example format: "{nom1,nom2, nom3, nom4,nom5 } "
90
91
  # Split on '{' ',' or '}'
91
- @type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
92
+ # @type = @type.gsub(/^\s*\{\s*/, '').gsub(/\s*\}\s*$/, '').split(/\s*\,\s*/)
93
+ @type = @type.split(/\s*\,\s*/)
92
94
  end
93
95
  end
94
96
 
@@ -104,7 +106,7 @@ module Rarff
104
106
 
105
107
  def to_arff
106
108
  if @type_is_nominal == true
107
- ATTRIBUTE_MARKER + " #{@name} {#{@type.join(',').gsub(' ','_')}}"
109
+ ATTRIBUTE_MARKER + " #{@name} #{@type.join(',').gsub(' ','_')}"
108
110
  else
109
111
  ATTRIBUTE_MARKER + " #{@name} #{@type}"
110
112
  end
@@ -120,7 +122,8 @@ module Rarff
120
122
 
121
123
 
122
124
  class Relation
123
- attr_accessor :name, :attributes, :instances
125
+ attr_accessor :name, :attributes
126
+ attr_reader :instances
124
127
 
125
128
 
126
129
  def initialize(name='')
@@ -197,10 +200,10 @@ module Rarff
197
200
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC)
198
201
  elsif col.kind_of?(String)
199
202
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING)
200
- elsif col.kind_of?(TrueClass) or col.kind_of?(FalseClass) # How come there is no generic BooleanClass?
203
+ elsif col == false or col == true #exactly equal to a boolean
201
204
  @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_BOOLEAN)
202
205
  else
203
- raise Exception, "Could not parse attribute: #{col.inspect}"
206
+ raise Exception, "Could not parse attribute to ARFF data type: #{col.inspect}"
204
207
  end
205
208
  }
206
209
  }
@@ -212,6 +215,26 @@ module Rarff
212
215
  end
213
216
  end
214
217
 
218
+ # Make all String type attributes into nominal attributes, because
219
+ # they are more useful in WEKA because more techniques handle them than
220
+ # strings
221
+ def set_string_attributes_to_nominal
222
+ nominals = {}
223
+ # Frustratingly, we have to traverse this 2D array with the
224
+ # wrong dimension first. Oh well.
225
+ @instances.each_with_index do |row, row_index|
226
+ row.each_with_index do |string, col_index|
227
+ next unless @attributes[col_index].type == ATTRIBUTE_STRING
228
+
229
+ nominals[col_index] ||= {}
230
+ nominals[col_index][string] ||= true
231
+ end
232
+ end
233
+
234
+ nominals.each do |index, strings|
235
+ @attributes[index].type = "{#{strings.keys.join(',')}}"
236
+ end
237
+ end
215
238
 
216
239
  def expand_sparse(str)
217
240
  arr = Array.new(@attributes.size, 0)
@@ -247,7 +270,7 @@ module Rarff
247
270
 
248
271
  # Do the final output
249
272
  if sparse
250
- if col.nil? or
273
+ if col.nil? or
251
274
  (@attributes[i].type =~ /^#{ATTRIBUTE_NUMERIC}$/i and col == 0)
252
275
  nil
253
276
  else
@@ -267,7 +290,7 @@ module Rarff
267
290
  else
268
291
  mapped.join(", ")
269
292
  end
270
- }.join("\n").gsub(/^/, sparse ? '{' : '').gsub(/$/, sparse ? '}' : '')
293
+ }.join("\n")
271
294
  end
272
295
 
273
296
 
data/test/test_rarff.rb CHANGED
@@ -101,7 +101,6 @@ class TestArffLib < Test::Unit::TestCase
101
101
  # assert_equal(0, rel.instances[3][12])
102
102
  # # puts "\n\nARFF: (\n#{rel.to_arff}\n)"
103
103
  # end
104
- #
105
104
  def test_output_missing
106
105
  arff_file_str = <<-END_OF_ARFF_FILE
107
106
  @RELATION MyCoolRelation
@@ -155,12 +154,75 @@ class TestArffLib < Test::Unit::TestCase
155
154
  rel.attributes[4].type = 'DATE "yyyy-MM-dd HH:mm:ss"'
156
155
 
157
156
  # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
158
- assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
157
+ assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
159
158
  end
160
159
 
161
160
  def test_boolean
162
161
  arff_file_str = <<-END_OF_ARFF_FILE
163
162
  @RELATION MyCoolRelation
163
+ @ATTRIBUTE Attr0 {false,true}
164
+ @DATA
165
+ true
166
+ END_OF_ARFF_FILE
167
+
168
+ arff_file_str.gsub!(/\n$/, '')
169
+
170
+ instances = [ [true]]
171
+
172
+ rel = Rarff::Relation.new('MyCoolRelation')
173
+ rel.instances = instances
174
+
175
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
176
+ assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
177
+ end
178
+
179
+ def test_boolean_multipl
180
+ arff_file_str = <<-END_OF_ARFF_FILE
181
+ @RELATION MyCoolRelation
182
+ @ATTRIBUTE Attr0 {false,true}
183
+ @ATTRIBUTE Attr1 {false,true}
184
+ @ATTRIBUTE Attr2 {false,true}
185
+ @DATA
186
+ true, false, true
187
+ true, true, true
188
+ END_OF_ARFF_FILE
189
+
190
+ arff_file_str.gsub!(/\n$/, '')
191
+
192
+ instances = [ [true,false,true],[true,true,true]]
193
+
194
+ rel = Rarff::Relation.new('MyCoolRelation')
195
+ rel.instances = instances
196
+
197
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
198
+ assert_equal(arff_file_str, rel.to_arff, "missing data from first line output failure")
199
+ end
200
+
201
+ def test_strings_as_nominal
202
+ arff_file_str = <<-END_OF_ARFF_FILE
203
+ @RELATION MyCoolRelation
204
+ @ATTRIBUTE Attr0 {two,one}
205
+ @ATTRIBUTE Attr1 {three,four}
206
+ @DATA
207
+ one, three
208
+ two, four
209
+ END_OF_ARFF_FILE
210
+
211
+ arff_file_str.gsub!(/\n$/, '')
212
+
213
+ instances = [ ['one','three'],['two','four']]
214
+
215
+ rel = Rarff::Relation.new('MyCoolRelation')
216
+ rel.instances = instances
217
+ rel.set_string_attributes_to_nominal
218
+
219
+ # puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
220
+ assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
221
+ end
222
+
223
+ def test_boolean_2
224
+ arff_file_str = <<-END_OF_ARFF_FILE
225
+ @RELATION MyCoolRelation
164
226
  @ATTRIBUTE Attr0 NUMERIC
165
227
  @ATTRIBUTE subject STRING
166
228
  @ATTRIBUTE Attr2 {false,true}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wwood-rarff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Payne, Ben J Woodcroft
@@ -9,11 +9,12 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-30 00:00:00 -08:00
12
+ date: 2009-02-18 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: hoe
17
+ type: :development
17
18
  version_requirement:
18
19
  version_requirements: !ruby/object:Gem::Requirement
19
20
  requirements: