wwood-rarff 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +0 -3
- data/Rakefile +51 -22
- data/VERSION +1 -0
- data/lib/rarff.rb +7 -2
- data/rarff.gemspec +53 -0
- data/test/test_rarff.rb +93 -1
- data/test/test_sparse_arff.arff +24 -0
- metadata +20 -19
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -1,24 +1,53 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require '
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "wwood-rarff"
|
8
|
+
gem.summary = %Q{Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files}
|
9
|
+
gem.description = %Q{Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify
|
10
|
+
data sets for data mining and machine learning.}
|
11
|
+
gem.email = "donttrustben near gmail.com"
|
12
|
+
gem.homepage = "http://github.com/wwood/rarff"
|
13
|
+
gem.authors = ["Ben J Woodcroft","Andy Payne"]
|
14
|
+
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
15
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
|
+
end
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/test_*.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "blah #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
24
53
|
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.4
|
data/lib/rarff.rb
CHANGED
@@ -217,14 +217,19 @@ module Rarff
|
|
217
217
|
|
218
218
|
# Make all String type attributes into nominal attributes, because
|
219
219
|
# they are more useful in WEKA because more techniques handle them than
|
220
|
-
# strings
|
221
|
-
|
220
|
+
# strings.
|
221
|
+
#
|
222
|
+
# column_indices is an optional argumetn specifying the columns that
|
223
|
+
# are to be set to nominal (0 based indexes). if nil (the default), then
|
224
|
+
# all columns are included
|
225
|
+
def set_string_attributes_to_nominal(column_indices = nil)
|
222
226
|
nominals = {}
|
223
227
|
# Frustratingly, we have to traverse this 2D array with the
|
224
228
|
# wrong dimension first. Oh well.
|
225
229
|
@instances.each_with_index do |row, row_index|
|
226
230
|
row.each_with_index do |string, col_index|
|
227
231
|
next unless @attributes[col_index].type == ATTRIBUTE_STRING
|
232
|
+
next unless column_indices.nil? or column_indices.include?(col_index)
|
228
233
|
|
229
234
|
nominals[col_index] ||= {}
|
230
235
|
nominals[col_index][string] ||= true
|
data/rarff.gemspec
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{rarff}
|
8
|
+
s.version = "0.2.4"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Ben J Woodcroft", "Andy Payne"]
|
12
|
+
s.date = %q{2009-11-19}
|
13
|
+
s.description = %q{Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify
|
14
|
+
data sets for data mining and machine learning.}
|
15
|
+
s.email = %q{donttrustben near gmail.com}
|
16
|
+
s.extra_rdoc_files = [
|
17
|
+
"README.txt"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
"History.txt",
|
21
|
+
"Manifest.txt",
|
22
|
+
"README.txt",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION",
|
25
|
+
"lib/rarff.rb",
|
26
|
+
"rarff.gemspec",
|
27
|
+
"test/test_arff.arff",
|
28
|
+
"test/test_rarff.rb",
|
29
|
+
"test/test_sparse_arff.arff"
|
30
|
+
]
|
31
|
+
s.homepage = %q{http://github.com/wwood/rarff}
|
32
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
33
|
+
s.require_paths = ["lib"]
|
34
|
+
s.rubygems_version = %q{1.3.5}
|
35
|
+
s.summary = %q{Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files}
|
36
|
+
s.test_files = [
|
37
|
+
"test/test_rarff.rb"
|
38
|
+
]
|
39
|
+
|
40
|
+
if s.respond_to? :specification_version then
|
41
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
42
|
+
s.specification_version = 3
|
43
|
+
|
44
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
45
|
+
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
46
|
+
else
|
47
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
48
|
+
end
|
49
|
+
else
|
50
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
data/test/test_rarff.rb
CHANGED
@@ -38,7 +38,7 @@ class TestArffLib < Test::Unit::TestCase
|
|
38
38
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
39
39
|
assert_equal(arff_file_str, rel.to_arff, "Arff creation test failed.")
|
40
40
|
end
|
41
|
-
|
41
|
+
|
42
42
|
# # Test creation of a sparse arff file string.
|
43
43
|
# def test_sparse_arff_creation
|
44
44
|
#
|
@@ -219,6 +219,72 @@ two, four
|
|
219
219
|
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
220
220
|
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
221
221
|
end
|
222
|
+
|
223
|
+
def test_set_strings_nominal2
|
224
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
225
|
+
@RELATION MyCoolRelation
|
226
|
+
@ATTRIBUTE Attr0 NUMERIC
|
227
|
+
@ATTRIBUTE Attr1 {three,four}
|
228
|
+
@DATA
|
229
|
+
1, three
|
230
|
+
2, four
|
231
|
+
END_OF_ARFF_FILE
|
232
|
+
|
233
|
+
arff_file_str.gsub!(/\n$/, '')
|
234
|
+
|
235
|
+
instances = [ [1,'three'],[2,'four']]
|
236
|
+
|
237
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
238
|
+
rel.instances = instances
|
239
|
+
rel.set_string_attributes_to_nominal
|
240
|
+
|
241
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
242
|
+
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
243
|
+
end
|
244
|
+
|
245
|
+
def test_strings_nominal_with_arguments1
|
246
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
247
|
+
@RELATION MyCoolRelation
|
248
|
+
@ATTRIBUTE Attr0 NUMERIC
|
249
|
+
@ATTRIBUTE Attr1 STRING
|
250
|
+
@DATA
|
251
|
+
1, three
|
252
|
+
2, four
|
253
|
+
END_OF_ARFF_FILE
|
254
|
+
|
255
|
+
arff_file_str.gsub!(/\n$/, '')
|
256
|
+
|
257
|
+
instances = [ [1,'three'],[2,'four']]
|
258
|
+
|
259
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
260
|
+
rel.instances = instances
|
261
|
+
rel.set_string_attributes_to_nominal([0])
|
262
|
+
|
263
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
264
|
+
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
265
|
+
end
|
266
|
+
|
267
|
+
def test_strings_nominal_with_arguments2
|
268
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
269
|
+
@RELATION MyCoolRelation
|
270
|
+
@ATTRIBUTE Attr0 NUMERIC
|
271
|
+
@ATTRIBUTE Attr1 {three,four}
|
272
|
+
@DATA
|
273
|
+
1, three
|
274
|
+
2, four
|
275
|
+
END_OF_ARFF_FILE
|
276
|
+
|
277
|
+
arff_file_str.gsub!(/\n$/, '')
|
278
|
+
|
279
|
+
instances = [ [1,'three'],[2,'four']]
|
280
|
+
|
281
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
282
|
+
rel.instances = instances
|
283
|
+
rel.set_string_attributes_to_nominal([0,1])
|
284
|
+
|
285
|
+
# puts "rel.to_arff:\n(\n#{rel.to_arff}\n)\n"
|
286
|
+
assert_equal(arff_file_str, rel.to_arff, "test_strings_as_nominal")
|
287
|
+
end
|
222
288
|
|
223
289
|
def test_boolean_2
|
224
290
|
arff_file_str = <<-END_OF_ARFF_FILE
|
@@ -242,6 +308,32 @@ two, four
|
|
242
308
|
|
243
309
|
assert_equal(arff_file_str, rel.to_arff, "missing data output failure")
|
244
310
|
end
|
311
|
+
|
312
|
+
def test_commas_in_attribute_name
|
313
|
+
arff_file_str = <<-END_OF_ARFF_FILE
|
314
|
+
@RELATION MyCoolRelation
|
315
|
+
@ATTRIBUTE subject {ruby_yeh,ruby}
|
316
|
+
@ATTRIBUTE Attr1 {duh}
|
317
|
+
@DATA
|
318
|
+
ruby__yeh, duh
|
319
|
+
ruby, duh
|
320
|
+
END_OF_ARFF_FILE
|
321
|
+
|
322
|
+
arff_file_str.gsub!(/\n$/, '')
|
323
|
+
|
324
|
+
instances = [
|
325
|
+
['ruby, yeh','duh'],
|
326
|
+
['ruby','duh']
|
327
|
+
]
|
328
|
+
|
329
|
+
|
330
|
+
rel = Rarff::Relation.new('MyCoolRelation')
|
331
|
+
rel.instances = instances
|
332
|
+
rel.attributes[0].name = 'subject'
|
333
|
+
rel.set_string_attributes_to_nominal
|
334
|
+
|
335
|
+
assert_equal(arff_file_str, rel.to_arff, "comma in string attribute failure")
|
336
|
+
end
|
245
337
|
end
|
246
338
|
|
247
339
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
% Sample sparse ARFF file
|
2
|
+
@RELATION sparseness
|
3
|
+
|
4
|
+
@ATTRIBUTE attr1 NUMERIC
|
5
|
+
@ATTRIBUTE attr2 NUMERIC
|
6
|
+
@ATTRIBUTE attr3 NUMERIC
|
7
|
+
@ATTRIBUTE attr4 NUMERIC
|
8
|
+
@ATTRIBUTE attr5 NUMERIC
|
9
|
+
@ATTRIBUTE attr6 NUMERIC
|
10
|
+
@ATTRIBUTE attr7 NUMERIC
|
11
|
+
@ATTRIBUTE attr8 NUMERIC
|
12
|
+
@ATTRIBUTE attr9 NUMERIC
|
13
|
+
@ATTRIBUTE attr10 NUMERIC
|
14
|
+
@ATTRIBUTE attr11 NUMERIC
|
15
|
+
@ATTRIBUTE attr12 NUMERIC
|
16
|
+
@ATTRIBUTE attr13 NUMERIC
|
17
|
+
|
18
|
+
@DATA
|
19
|
+
{3 7, 10 34}
|
20
|
+
{1 2.4, 4 62, 12 19}
|
21
|
+
{0 0, 1 1, 2 2, 3 3, 4 4, 5 5, 6 6, 7 7, 8 8, 9 9, 10 10, 11 11, 12 12}
|
22
|
+
{9 42}
|
23
|
+
{2 54.3, 3 92, 11 10.2}
|
24
|
+
|
metadata
CHANGED
@@ -1,29 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wwood-rarff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- Ben J Woodcroft
|
8
|
+
- Andy Payne
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
12
|
|
12
|
-
date: 2009-
|
13
|
+
date: 2009-11-19 00:00:00 +11:00
|
13
14
|
default_executable:
|
14
15
|
dependencies:
|
15
16
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
17
|
+
name: thoughtbot-shoulda
|
17
18
|
type: :development
|
18
19
|
version_requirement:
|
19
20
|
version_requirements: !ruby/object:Gem::Requirement
|
20
21
|
requirements:
|
21
22
|
- - ">="
|
22
23
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
24
|
+
version: "0"
|
24
25
|
version:
|
25
|
-
description:
|
26
|
-
|
26
|
+
description: |-
|
27
|
+
Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files. ARFF files are used to specify
|
28
|
+
data sets for data mining and machine learning.
|
29
|
+
email: donttrustben near gmail.com
|
27
30
|
executables: []
|
28
31
|
|
29
32
|
extensions: []
|
@@ -35,23 +38,21 @@ files:
|
|
35
38
|
- Manifest.txt
|
36
39
|
- README.txt
|
37
40
|
- Rakefile
|
41
|
+
- VERSION
|
38
42
|
- lib/rarff.rb
|
43
|
+
- rarff.gemspec
|
39
44
|
- test/test_arff.arff
|
40
|
-
- "test/test_sparse_arff.arff "
|
41
|
-
- test/ts_rarff.rb
|
42
45
|
- test/test_rarff.rb
|
46
|
+
- test/test_sparse_arff.arff
|
43
47
|
has_rdoc: true
|
44
|
-
homepage: http://
|
48
|
+
homepage: http://github.com/wwood/rarff
|
49
|
+
licenses: []
|
50
|
+
|
45
51
|
post_install_message:
|
46
52
|
rdoc_options:
|
47
|
-
- --
|
48
|
-
- test/*
|
49
|
-
- --main
|
50
|
-
- README.txt
|
51
|
-
- --inline-source
|
53
|
+
- --charset=UTF-8
|
52
54
|
require_paths:
|
53
55
|
- lib
|
54
|
-
- test
|
55
56
|
required_ruby_version: !ruby/object:Gem::Requirement
|
56
57
|
requirements:
|
57
58
|
- - ">="
|
@@ -66,10 +67,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
67
|
version:
|
67
68
|
requirements: []
|
68
69
|
|
69
|
-
rubyforge_project:
|
70
|
-
rubygems_version: 1.
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.3.5
|
71
72
|
signing_key:
|
72
|
-
specification_version:
|
73
|
+
specification_version: 3
|
73
74
|
summary: Rarff is a Ruby library for dealing with Attribute-Relation File Format (ARFF) files
|
74
75
|
test_files:
|
75
76
|
- test/test_rarff.rb
|