davidrichards-data_frame 0.0.17 → 0.0.18
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +1 -1
- data/lib/data_frame.rb +1 -0
- data/lib/data_frame/arff.rb +45 -0
- data/lib/data_frame/data_frame.rb +18 -0
- data/spec/data_frame/arff_spec.rb +47 -0
- data/spec/data_frame/data_frame_spec.rb +16 -0
- data/spec/fixtures/basic.csv +3 -0
- metadata +6 -2
data/VERSION.yml
CHANGED
data/lib/data_frame.rb
CHANGED
@@ -0,0 +1,45 @@
|
|
1
|
+
# Turns a data frame into ARFF-formatted content.
|
2
|
+
module ARFF
|
3
|
+
|
4
|
+
# Used in arff, but generally useful.
|
5
|
+
def to_csv(include_header=true)
|
6
|
+
value = include_header ? self.labels.map{|e| e.to_s}.join(',') + "\n" : ''
|
7
|
+
self.items.inject(value) do |list, e|
|
8
|
+
list << e.map {|cell| cell.to_s}.join(',') + "\n"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_arff
|
13
|
+
arff_header + to_csv(false)
|
14
|
+
end
|
15
|
+
|
16
|
+
protected
|
17
|
+
def arff_attributes
|
18
|
+
container = defined?(Dictionary) ? Dictionary.new : Hash.new
|
19
|
+
|
20
|
+
self.labels.inject(container) do |list, e|
|
21
|
+
list[e] = self.render_column(e).categories
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def arff_formatted_attributes
|
26
|
+
self.labels.inject('') do |str, e|
|
27
|
+
val = "{" + self.render_column(e).categories.map{|x| x.to_s}.join(',') + "}"
|
28
|
+
str << "@attribute #{e} #{val}\n"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def arff_relation
|
33
|
+
self.name ? self.name.to_underscore_sym.to_s : 'unamed_relation'
|
34
|
+
end
|
35
|
+
|
36
|
+
def arff_header
|
37
|
+
%[@relation #{arff_relation}
|
38
|
+
|
39
|
+
#{arff_formatted_attributes}
|
40
|
+
@data
|
41
|
+
]
|
42
|
+
end
|
43
|
+
|
44
|
+
alias :arff_items :to_csv
|
45
|
+
end
|
@@ -18,13 +18,25 @@ class DataFrame
|
|
18
18
|
# This returns bar where 'foo' was found and 'foo' everywhere else.
|
19
19
|
def from_csv(obj, opts={})
|
20
20
|
labels, table = infer_csv_contents(obj, opts)
|
21
|
+
name = infer_name_from_contents(obj, opts)
|
21
22
|
return nil unless labels and table
|
22
23
|
df = new(*labels)
|
23
24
|
df.import(table)
|
25
|
+
df.name = name
|
24
26
|
df
|
25
27
|
end
|
26
28
|
|
27
29
|
protected
|
30
|
+
|
31
|
+
# Only works for names sources, urls and files
|
32
|
+
def infer_name_from_contents(obj, opts={})
|
33
|
+
begin
|
34
|
+
File.split(obj).last.split('.')[0..-2].join('.').titleize
|
35
|
+
rescue
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
28
40
|
def infer_csv_contents(obj, opts={})
|
29
41
|
contents = File.read(obj) if File.exist?(obj)
|
30
42
|
begin
|
@@ -45,6 +57,9 @@ class DataFrame
|
|
45
57
|
def default_csv_opts; {:converters => :all}; end
|
46
58
|
end
|
47
59
|
|
60
|
+
# Include the methods from arff.rb
|
61
|
+
include ARFF
|
62
|
+
|
48
63
|
# Loads a batch of rows. Expects an array of arrays, else you don't
|
49
64
|
# know what you have.
|
50
65
|
def import(rows)
|
@@ -64,6 +79,9 @@ class DataFrame
|
|
64
79
|
# The items stored in the frame
|
65
80
|
attr_reader :items
|
66
81
|
|
82
|
+
# An optional name, useful for arff files
|
83
|
+
attr_accessor :name
|
84
|
+
|
67
85
|
def initialize(*labels)
|
68
86
|
@labels = labels.map {|e| e.to_underscore_sym }
|
69
87
|
@items = TransposableArray.new
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe "ARFF" do
|
4
|
+
before do
|
5
|
+
@df = DataFrame.from_csv(File.expand_path(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.csv')))
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should allow a data frame to be expressed as an arff-formatted file" do
|
9
|
+
@df.to_arff.should eql(basic_arff)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should add a to_csv method" do
|
13
|
+
@df.to_csv.should eql(%{x,y,month,day,ffmc,dmc,dc,isi,temp,rh,wind,rain,area
|
14
|
+
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
15
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
16
|
+
})
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should allow a non-header export for to_csv" do
|
20
|
+
@df.to_csv(false).should eql(%{7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
21
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
22
|
+
})
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def basic_arff
|
27
|
+
%[@relation basic
|
28
|
+
|
29
|
+
@attribute x {7}
|
30
|
+
@attribute y {4,5}
|
31
|
+
@attribute month {mar,oct}
|
32
|
+
@attribute day {fri,tue}
|
33
|
+
@attribute ffmc {86.2,90.6}
|
34
|
+
@attribute dmc {26.2,35.4}
|
35
|
+
@attribute dc {94.3,669.1}
|
36
|
+
@attribute isi {5.1,6.7}
|
37
|
+
@attribute temp {8.2,18}
|
38
|
+
@attribute rh {33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51}
|
39
|
+
@attribute wind {0.9,6.7}
|
40
|
+
@attribute rain {0}
|
41
|
+
@attribute area {0}
|
42
|
+
|
43
|
+
@data
|
44
|
+
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
45
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
46
|
+
]
|
47
|
+
end
|
@@ -12,6 +12,10 @@ describe DataFrame do
|
|
12
12
|
df.labels.should eql(@labels)
|
13
13
|
end
|
14
14
|
|
15
|
+
it "should have an optional name" do
|
16
|
+
@df.name = :some_name
|
17
|
+
@df.name.should eql(:some_name)
|
18
|
+
end
|
15
19
|
it "should initialize with an empty items list" do
|
16
20
|
@df.items.should be_is_a(TransposableArray)
|
17
21
|
@df.items.should be_empty
|
@@ -91,6 +95,18 @@ describe DataFrame do
|
|
91
95
|
@df.x.should eql([7,7])
|
92
96
|
@df.area.should eql([0,0])
|
93
97
|
end
|
98
|
+
|
99
|
+
it "should infer a name when importing from a file" do
|
100
|
+
filename = "/tmp/data_frame_spec.csv"
|
101
|
+
contents = %{X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
|
102
|
+
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
103
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
104
|
+
}
|
105
|
+
File.open(filename, 'w') {|f| f.write contents}
|
106
|
+
@df = DataFrame.from_csv(filename)
|
107
|
+
@df.name.should eql('Data Frame Spec')
|
108
|
+
`rm -rf #{filename}`
|
109
|
+
end
|
94
110
|
end
|
95
111
|
|
96
112
|
it "should be able to remove a column" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: davidrichards-data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Richards
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-13 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -54,6 +54,7 @@ files:
|
|
54
54
|
- README.rdoc
|
55
55
|
- VERSION.yml
|
56
56
|
- lib/data_frame
|
57
|
+
- lib/data_frame/arff.rb
|
57
58
|
- lib/data_frame/callback_array.rb
|
58
59
|
- lib/data_frame/data_frame.rb
|
59
60
|
- lib/data_frame/model.rb
|
@@ -65,12 +66,15 @@ files:
|
|
65
66
|
- lib/ext/string.rb
|
66
67
|
- lib/ext/symbol.rb
|
67
68
|
- spec/data_frame
|
69
|
+
- spec/data_frame/arff_spec.rb
|
68
70
|
- spec/data_frame/callback_array_spec.rb
|
69
71
|
- spec/data_frame/data_frame_spec.rb
|
70
72
|
- spec/data_frame/model_spec.rb
|
71
73
|
- spec/data_frame/parameter_capture_spec.rb
|
72
74
|
- spec/data_frame/transposable_array_spec.rb
|
73
75
|
- spec/data_frame_spec.rb
|
76
|
+
- spec/fixtures
|
77
|
+
- spec/fixtures/basic.csv
|
74
78
|
- spec/spec_helper.rb
|
75
79
|
has_rdoc: true
|
76
80
|
homepage: http://github.com/davidrichards/data_frame
|