davidrichards-data_frame 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +1 -1
- data/lib/data_frame.rb +1 -0
- data/lib/data_frame/arff.rb +45 -0
- data/lib/data_frame/data_frame.rb +18 -0
- data/spec/data_frame/arff_spec.rb +47 -0
- data/spec/data_frame/data_frame_spec.rb +16 -0
- data/spec/fixtures/basic.csv +3 -0
- metadata +6 -2
data/VERSION.yml
CHANGED
data/lib/data_frame.rb
CHANGED
@@ -0,0 +1,45 @@
|
|
1
|
+
# Turns a data frame into ARFF-formatted content.
|
2
|
+
module ARFF
|
3
|
+
|
4
|
+
# Used in arff, but generally useful.
|
5
|
+
def to_csv(include_header=true)
|
6
|
+
value = include_header ? self.labels.map{|e| e.to_s}.join(',') + "\n" : ''
|
7
|
+
self.items.inject(value) do |list, e|
|
8
|
+
list << e.map {|cell| cell.to_s}.join(',') + "\n"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_arff
|
13
|
+
arff_header + to_csv(false)
|
14
|
+
end
|
15
|
+
|
16
|
+
protected
|
17
|
+
def arff_attributes
|
18
|
+
container = defined?(Dictionary) ? Dictionary.new : Hash.new
|
19
|
+
|
20
|
+
self.labels.inject(container) do |list, e|
|
21
|
+
list[e] = self.render_column(e).categories
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def arff_formatted_attributes
|
26
|
+
self.labels.inject('') do |str, e|
|
27
|
+
val = "{" + self.render_column(e).categories.map{|x| x.to_s}.join(',') + "}"
|
28
|
+
str << "@attribute #{e} #{val}\n"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def arff_relation
|
33
|
+
self.name ? self.name.to_underscore_sym.to_s : 'unamed_relation'
|
34
|
+
end
|
35
|
+
|
36
|
+
def arff_header
|
37
|
+
%[@relation #{arff_relation}
|
38
|
+
|
39
|
+
#{arff_formatted_attributes}
|
40
|
+
@data
|
41
|
+
]
|
42
|
+
end
|
43
|
+
|
44
|
+
alias :arff_items :to_csv
|
45
|
+
end
|
@@ -18,13 +18,25 @@ class DataFrame
|
|
18
18
|
# This returns bar where 'foo' was found and 'foo' everywhere else.
|
19
19
|
def from_csv(obj, opts={})
|
20
20
|
labels, table = infer_csv_contents(obj, opts)
|
21
|
+
name = infer_name_from_contents(obj, opts)
|
21
22
|
return nil unless labels and table
|
22
23
|
df = new(*labels)
|
23
24
|
df.import(table)
|
25
|
+
df.name = name
|
24
26
|
df
|
25
27
|
end
|
26
28
|
|
27
29
|
protected
|
30
|
+
|
31
|
+
# Only works for names sources, urls and files
|
32
|
+
def infer_name_from_contents(obj, opts={})
|
33
|
+
begin
|
34
|
+
File.split(obj).last.split('.')[0..-2].join('.').titleize
|
35
|
+
rescue
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
28
40
|
def infer_csv_contents(obj, opts={})
|
29
41
|
contents = File.read(obj) if File.exist?(obj)
|
30
42
|
begin
|
@@ -45,6 +57,9 @@ class DataFrame
|
|
45
57
|
def default_csv_opts; {:converters => :all}; end
|
46
58
|
end
|
47
59
|
|
60
|
+
# Include the methods from arff.rb
|
61
|
+
include ARFF
|
62
|
+
|
48
63
|
# Loads a batch of rows. Expects an array of arrays, else you don't
|
49
64
|
# know what you have.
|
50
65
|
def import(rows)
|
@@ -64,6 +79,9 @@ class DataFrame
|
|
64
79
|
# The items stored in the frame
|
65
80
|
attr_reader :items
|
66
81
|
|
82
|
+
# An optional name, useful for arff files
|
83
|
+
attr_accessor :name
|
84
|
+
|
67
85
|
def initialize(*labels)
|
68
86
|
@labels = labels.map {|e| e.to_underscore_sym }
|
69
87
|
@items = TransposableArray.new
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe "ARFF" do
|
4
|
+
before do
|
5
|
+
@df = DataFrame.from_csv(File.expand_path(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.csv')))
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should allow a data frame to be expressed as an arff-formatted file" do
|
9
|
+
@df.to_arff.should eql(basic_arff)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should add a to_csv method" do
|
13
|
+
@df.to_csv.should eql(%{x,y,month,day,ffmc,dmc,dc,isi,temp,rh,wind,rain,area
|
14
|
+
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
15
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
16
|
+
})
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should allow a non-header export for to_csv" do
|
20
|
+
@df.to_csv(false).should eql(%{7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
21
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
22
|
+
})
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def basic_arff
|
27
|
+
%[@relation basic
|
28
|
+
|
29
|
+
@attribute x {7}
|
30
|
+
@attribute y {4,5}
|
31
|
+
@attribute month {mar,oct}
|
32
|
+
@attribute day {fri,tue}
|
33
|
+
@attribute ffmc {86.2,90.6}
|
34
|
+
@attribute dmc {26.2,35.4}
|
35
|
+
@attribute dc {94.3,669.1}
|
36
|
+
@attribute isi {5.1,6.7}
|
37
|
+
@attribute temp {8.2,18}
|
38
|
+
@attribute rh {33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51}
|
39
|
+
@attribute wind {0.9,6.7}
|
40
|
+
@attribute rain {0}
|
41
|
+
@attribute area {0}
|
42
|
+
|
43
|
+
@data
|
44
|
+
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
45
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
46
|
+
]
|
47
|
+
end
|
@@ -12,6 +12,10 @@ describe DataFrame do
|
|
12
12
|
df.labels.should eql(@labels)
|
13
13
|
end
|
14
14
|
|
15
|
+
it "should have an optional name" do
|
16
|
+
@df.name = :some_name
|
17
|
+
@df.name.should eql(:some_name)
|
18
|
+
end
|
15
19
|
it "should initialize with an empty items list" do
|
16
20
|
@df.items.should be_is_a(TransposableArray)
|
17
21
|
@df.items.should be_empty
|
@@ -91,6 +95,18 @@ describe DataFrame do
|
|
91
95
|
@df.x.should eql([7,7])
|
92
96
|
@df.area.should eql([0,0])
|
93
97
|
end
|
98
|
+
|
99
|
+
it "should infer a name when importing from a file" do
|
100
|
+
filename = "/tmp/data_frame_spec.csv"
|
101
|
+
contents = %{X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
|
102
|
+
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
103
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
104
|
+
}
|
105
|
+
File.open(filename, 'w') {|f| f.write contents}
|
106
|
+
@df = DataFrame.from_csv(filename)
|
107
|
+
@df.name.should eql('Data Frame Spec')
|
108
|
+
`rm -rf #{filename}`
|
109
|
+
end
|
94
110
|
end
|
95
111
|
|
96
112
|
it "should be able to remove a column" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: davidrichards-data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Richards
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-13 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -54,6 +54,7 @@ files:
|
|
54
54
|
- README.rdoc
|
55
55
|
- VERSION.yml
|
56
56
|
- lib/data_frame
|
57
|
+
- lib/data_frame/arff.rb
|
57
58
|
- lib/data_frame/callback_array.rb
|
58
59
|
- lib/data_frame/data_frame.rb
|
59
60
|
- lib/data_frame/model.rb
|
@@ -65,12 +66,15 @@ files:
|
|
65
66
|
- lib/ext/string.rb
|
66
67
|
- lib/ext/symbol.rb
|
67
68
|
- spec/data_frame
|
69
|
+
- spec/data_frame/arff_spec.rb
|
68
70
|
- spec/data_frame/callback_array_spec.rb
|
69
71
|
- spec/data_frame/data_frame_spec.rb
|
70
72
|
- spec/data_frame/model_spec.rb
|
71
73
|
- spec/data_frame/parameter_capture_spec.rb
|
72
74
|
- spec/data_frame/transposable_array_spec.rb
|
73
75
|
- spec/data_frame_spec.rb
|
76
|
+
- spec/fixtures
|
77
|
+
- spec/fixtures/basic.csv
|
74
78
|
- spec/spec_helper.rb
|
75
79
|
has_rdoc: true
|
76
80
|
homepage: http://github.com/davidrichards/data_frame
|