wwood-rarff 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +0 -3
- data/Rakefile +51 -22
- data/VERSION +1 -0
- data/lib/rarff.rb +7 -2
- data/rarff.gemspec +53 -0
- data/test/test_rarff.rb +93 -1
- data/test/test_sparse_arff.arff +24 -0
- metadata +20 -19
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
|
@@ -1,24 +1,53 @@
|
|
|
1
1
|
require 'rubygems'
|
|
2
|
-
require '
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
2
|
+
require 'rake'
|
|
3
|
+
|
|
4
|
+
begin
|
|
5
|
+
require 'jeweler'
|
|
6
|
+
Jeweler::Tasks.new do |gem|
|
|
7
|
+
gem.name = "wwood-rarff"
|
|
8
|
+
gem.summary = %Q{Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files}
|
|
9
|
+
gem.description = %Q{Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify
|
|
10
|
+
data sets for data mining and machine learning.}
|
|
11
|
+
gem.email = "donttrustben near gmail.com"
|
|
12
|
+
gem.homepage = "http://github.com/wwood/rarff"
|
|
13
|
+
gem.authors = ["Ben J Woodcroft","Andy Payne"]
|
|
14
|
+
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
|
15
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
16
|
+
end
|
|
17
|
+
rescue LoadError
|
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
require 'rake/testtask'
|
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
|
23
|
+
test.libs << 'lib' << 'test'
|
|
24
|
+
test.pattern = 'test/**/test_*.rb'
|
|
25
|
+
test.verbose = true
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
begin
|
|
29
|
+
require 'rcov/rcovtask'
|
|
30
|
+
Rcov::RcovTask.new do |test|
|
|
31
|
+
test.libs << 'test'
|
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
|
33
|
+
test.verbose = true
|
|
34
|
+
end
|
|
35
|
+
rescue LoadError
|
|
36
|
+
task :rcov do
|
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
task :test => :check_dependencies
|
|
42
|
+
|
|
43
|
+
task :default => :test
|
|
44
|
+
|
|
45
|
+
require 'rake/rdoctask'
|
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
|
48
|
+
|
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
50
|
+
rdoc.title = "blah #{version}"
|
|
51
|
+
rdoc.rdoc_files.include('README*')
|
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
24
53
|
end
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.2.4
|
data/lib/rarff.rb
CHANGED
|
@@ -217,14 +217,19 @@ module Rarff
|
|
|
217
217
|
|
|
218
218
|
# Make all String type attributes into nominal attributes, because
|
|
219
219
|
# they are more useful in WEKA because more techniques handle them than
|
|
220
|
-
# strings
|
|
221
|
-
|
|
220
|
+
# strings.
|
|
221
|
+
#
|
|
222
|
+
# column_indices is an optional argumetn specifying the columns that
|
|
223
|
+
# are to be set to nominal (0 based indexes). if nil (the default), then
|
|
224
|
+
# all columns are included
|
|
225
|
+
def set_string_attributes_to_nominal(column_indices = nil)
|
|
222
226
|
nominals = {}
|
|
223
227
|
# Frustratingly, we have to traverse this 2D array with the
|
|
224
228
|
# wrong dimension first. Oh well.
|
|
225
229
|
@instances.each_with_index do |row, row_index|
|
|
226
230
|
row.each_with_index do |string, col_index|
|
|
227
231
|
next unless @attributes[col_index].type == ATTRIBUTE_STRING
|
|
232
|
+
next unless column_indices.nil? or column_indices.include?(col_index)
|
|
228
233
|
|
|
229
234
|
nominals[col_index] ||= {}
|
|
230
235
|
nominals[col_index][string] ||= true
|
data/rarff.gemspec
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Generated by jeweler
|
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
|
4
|
+
# -*- encoding: utf-8 -*-
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |s|
|
|
7
|
+
s.name = %q{rarff}
|
|
8
|
+
s.version = "0.2.4"
|
|
9
|
+
|
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
|
+
s.authors = ["Ben J Woodcroft", "Andy Payne"]
|
|
12
|
+
s.date = %q{2009-11-19}
|
|
13
|
+
s.description = %q{Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify
|
|
14
|
+
data sets for data mining and machine learning.}
|
|
15
|
+
s.email = %q{donttrustben near gmail.com}
|
|
16
|
+
s.extra_rdoc_files = [
|
|
17
|
+
"README.txt"
|
|
18
|
+
]
|
|
19
|
+
s.files = [
|
|
20
|
+
"History.txt",
|
|
21
|
+
"Manifest.txt",
|
|
22
|
+
"README.txt",
|
|
23
|
+
"Rakefile",
|
|
24
|
+
"VERSION",
|
|
25
|
+
"lib/rarff.rb",
|
|
26
|
+
"rarff.gemspec",
|
|
27
|
+
"test/test_arff.arff",
|
|
28
|
+
"test/test_rarff.rb",
|
|
29
|
+
"test/test_sparse_arff.arff"
|
|
30
|
+
]
|
|
31
|
+
s.homepage = %q{http://github.com/wwood/rarff}
|
|
32
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
|
33
|
+
s.require_paths = ["lib"]
|
|
34
|
+
s.rubygems_version = %q{1.3.5}
|
|
35
|
+
s.summary = %q{Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files}
|
|
36
|
+
s.test_files = [
|
|
37
|
+
"test/test_rarff.rb"
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
if s.respond_to? :specification_version then
|
|
41
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
|
42
|
+
s.specification_version = 3
|
|
43
|
+
|
|
44
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
|
45
|
+
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
|
46
|
+
else
|
|
47
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
|
48
|
+
end
|
|
49
|
+
else
|
|
50
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
data/test/test_rarff.rb
CHANGED
|
@@ -38,7 +38,7 @@ class TestArffLib < Test::Unit::TestCase
|
|
|
38
38
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
39
39
|
assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
|
|
40
40
|
end
|
|
41
|
-
|
|
41
|
+
|
|
42
42
|
# # Test creation of a sparse arff file string.
|
|
43
43
|
# def test_sparse_arff_creation
|
|
44
44
|
#
|
|
@@ -219,6 +219,72 @@ two, four
|
|
|
219
219
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
220
220
|
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
|
221
221
|
end
|
|
222
|
+
|
|
223
|
+
def test_set_strings_nominal2
|
|
224
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
|
225
|
+
@RELATION MyCoolRelation
|
|
226
|
+
@ATTRIBUTE Attr0 NUMERIC
|
|
227
|
+
@ATTRIBUTE Attr1 {three,four}
|
|
228
|
+
@DATA
|
|
229
|
+
1, three
|
|
230
|
+
2, four
|
|
231
|
+
END_OF_ARFF_FILE
|
|
232
|
+
|
|
233
|
+
arff_file_str.gsub!(/\n$/, '')
|
|
234
|
+
|
|
235
|
+
instances = [ [1,'three'],[2,'four']]
|
|
236
|
+
|
|
237
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
|
238
|
+
rel.instances = instances
|
|
239
|
+
rel.set_string_attributes_to_nominal
|
|
240
|
+
|
|
241
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
242
|
+
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def test_strings_nominal_with_arguments1
|
|
246
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
|
247
|
+
@RELATION MyCoolRelation
|
|
248
|
+
@ATTRIBUTE Attr0 NUMERIC
|
|
249
|
+
@ATTRIBUTE Attr1 STRING
|
|
250
|
+
@DATA
|
|
251
|
+
1, three
|
|
252
|
+
2, four
|
|
253
|
+
END_OF_ARFF_FILE
|
|
254
|
+
|
|
255
|
+
arff_file_str.gsub!(/\n$/, '')
|
|
256
|
+
|
|
257
|
+
instances = [ [1,'three'],[2,'four']]
|
|
258
|
+
|
|
259
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
|
260
|
+
rel.instances = instances
|
|
261
|
+
rel.set_string_attributes_to_nominal([0])
|
|
262
|
+
|
|
263
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
264
|
+
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def test_strings_nominal_with_arguments2
|
|
268
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
|
269
|
+
@RELATION MyCoolRelation
|
|
270
|
+
@ATTRIBUTE Attr0 NUMERIC
|
|
271
|
+
@ATTRIBUTE Attr1 {three,four}
|
|
272
|
+
@DATA
|
|
273
|
+
1, three
|
|
274
|
+
2, four
|
|
275
|
+
END_OF_ARFF_FILE
|
|
276
|
+
|
|
277
|
+
arff_file_str.gsub!(/\n$/, '')
|
|
278
|
+
|
|
279
|
+
instances = [ [1,'three'],[2,'four']]
|
|
280
|
+
|
|
281
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
|
282
|
+
rel.instances = instances
|
|
283
|
+
rel.set_string_attributes_to_nominal([0,1])
|
|
284
|
+
|
|
285
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
|
286
|
+
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
|
287
|
+
end
|
|
222
288
|
|
|
223
289
|
def test_boolean_2
|
|
224
290
|
arff_file_str = <<-END_OF_ARFF_FILE
|
|
@@ -242,6 +308,32 @@ two, four
|
|
|
242
308
|
|
|
243
309
|
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
|
244
310
|
end
|
|
311
|
+
|
|
312
|
+
def test_commas_in_attribute_name
|
|
313
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
|
314
|
+
@RELATION MyCoolRelation
|
|
315
|
+
@ATTRIBUTE subject {ruby_yeh,ruby}
|
|
316
|
+
@ATTRIBUTE Attr1 {duh}
|
|
317
|
+
@DATA
|
|
318
|
+
ruby__yeh, duh
|
|
319
|
+
ruby, duh
|
|
320
|
+
END_OF_ARFF_FILE
|
|
321
|
+
|
|
322
|
+
arff_file_str.gsub!(/\n$/, '')
|
|
323
|
+
|
|
324
|
+
instances = [
|
|
325
|
+
['ruby, yeh','duh'],
|
|
326
|
+
['ruby','duh']
|
|
327
|
+
]
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
|
331
|
+
rel.instances = instances
|
|
332
|
+
rel.attributes[0].name = 'subject'
|
|
333
|
+
rel.set_string_attributes_to_nominal
|
|
334
|
+
|
|
335
|
+
assert_equal(arff_file_str, rel.to_arff, "comma in string attribute failure")
|
|
336
|
+
end
|
|
245
337
|
end
|
|
246
338
|
|
|
247
339
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
% Sample sparse ARFF file
|
|
2
|
+
@RELATION sparseness
|
|
3
|
+
|
|
4
|
+
@ATTRIBUTE attr1 NUMERIC
|
|
5
|
+
@ATTRIBUTE attr2 NUMERIC
|
|
6
|
+
@ATTRIBUTE attr3 NUMERIC
|
|
7
|
+
@ATTRIBUTE attr4 NUMERIC
|
|
8
|
+
@ATTRIBUTE attr5 NUMERIC
|
|
9
|
+
@ATTRIBUTE attr6 NUMERIC
|
|
10
|
+
@ATTRIBUTE attr7 NUMERIC
|
|
11
|
+
@ATTRIBUTE attr8 NUMERIC
|
|
12
|
+
@ATTRIBUTE attr9 NUMERIC
|
|
13
|
+
@ATTRIBUTE attr10 NUMERIC
|
|
14
|
+
@ATTRIBUTE attr11 NUMERIC
|
|
15
|
+
@ATTRIBUTE attr12 NUMERIC
|
|
16
|
+
@ATTRIBUTE attr13 NUMERIC
|
|
17
|
+
|
|
18
|
+
@DATA
|
|
19
|
+
{3 7, 10 34}
|
|
20
|
+
{1 2.4, 4 62, 12 19}
|
|
21
|
+
{0 0, 1 1, 2 2, 3 3, 4 4, 5 5, 6 6, 7 7, 8 8, 9 9, 10 10, 11 11, 12 12}
|
|
22
|
+
{9 42}
|
|
23
|
+
{2 54.3, 3 92, 11 10.2}
|
|
24
|
+
|
metadata
CHANGED
|
@@ -1,29 +1,32 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: wwood-rarff
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
|
-
-
|
|
7
|
+
- Ben J Woodcroft
|
|
8
|
+
- Andy Payne
|
|
8
9
|
autorequire:
|
|
9
10
|
bindir: bin
|
|
10
11
|
cert_chain: []
|
|
11
12
|
|
|
12
|
-
date: 2009-
|
|
13
|
+
date: 2009-11-19 00:00:00 +11:00
|
|
13
14
|
default_executable:
|
|
14
15
|
dependencies:
|
|
15
16
|
- !ruby/object:Gem::Dependency
|
|
16
|
-
name:
|
|
17
|
+
name: thoughtbot-shoulda
|
|
17
18
|
type: :development
|
|
18
19
|
version_requirement:
|
|
19
20
|
version_requirements: !ruby/object:Gem::Requirement
|
|
20
21
|
requirements:
|
|
21
22
|
- - ">="
|
|
22
23
|
- !ruby/object:Gem::Version
|
|
23
|
-
version:
|
|
24
|
+
version: "0"
|
|
24
25
|
version:
|
|
25
|
-
description:
|
|
26
|
-
|
|
26
|
+
description: |-
|
|
27
|
+
Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify
|
|
28
|
+
data sets for data mining and machine learning.
|
|
29
|
+
email: donttrustben near gmail.com
|
|
27
30
|
executables: []
|
|
28
31
|
|
|
29
32
|
extensions: []
|
|
@@ -35,23 +38,21 @@ files:
|
|
|
35
38
|
- Manifest.txt
|
|
36
39
|
- README.txt
|
|
37
40
|
- Rakefile
|
|
41
|
+
- VERSION
|
|
38
42
|
- lib/rarff.rb
|
|
43
|
+
- rarff.gemspec
|
|
39
44
|
- test/test_arff.arff
|
|
40
|
-
- "test/test_sparse_arff.arff "
|
|
41
|
-
- test/ts_rarff.rb
|
|
42
45
|
- test/test_rarff.rb
|
|
46
|
+
- test/test_sparse_arff.arff
|
|
43
47
|
has_rdoc: true
|
|
44
|
-
homepage: http://
|
|
48
|
+
homepage: http://github.com/wwood/rarff
|
|
49
|
+
licenses: []
|
|
50
|
+
|
|
45
51
|
post_install_message:
|
|
46
52
|
rdoc_options:
|
|
47
|
-
- --
|
|
48
|
-
- test/*
|
|
49
|
-
- --main
|
|
50
|
-
- README.txt
|
|
51
|
-
- --inline-source
|
|
53
|
+
- --charset=UTF-8
|
|
52
54
|
require_paths:
|
|
53
55
|
- lib
|
|
54
|
-
- test
|
|
55
56
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
56
57
|
requirements:
|
|
57
58
|
- - ">="
|
|
@@ -66,10 +67,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
66
67
|
version:
|
|
67
68
|
requirements: []
|
|
68
69
|
|
|
69
|
-
rubyforge_project:
|
|
70
|
-
rubygems_version: 1.
|
|
70
|
+
rubyforge_project:
|
|
71
|
+
rubygems_version: 1.3.5
|
|
71
72
|
signing_key:
|
|
72
|
-
specification_version:
|
|
73
|
+
specification_version: 3
|
|
73
74
|
summary: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files
|
|
74
75
|
test_files:
|
|
75
76
|
- test/test_rarff.rb
|