data_frame 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +122 -0
- data/VERSION.yml +4 -0
- data/bin/plain_frame +22 -0
- data/lib/data_frame.rb +26 -0
- data/lib/data_frame/arff.rb +52 -0
- data/lib/data_frame/callback_array.rb +152 -0
- data/lib/data_frame/core/column_management.rb +147 -0
- data/lib/data_frame/core/filter.rb +48 -0
- data/lib/data_frame/core/import.rb +113 -0
- data/lib/data_frame/core/pre_process.rb +69 -0
- data/lib/data_frame/core/saving.rb +29 -0
- data/lib/data_frame/core/training.rb +46 -0
- data/lib/data_frame/data_frame.rb +115 -0
- data/lib/data_frame/id3.rb +28 -0
- data/lib/data_frame/kmeans.rb +10 -0
- data/lib/data_frame/labels_from_uci.rb +48 -0
- data/lib/data_frame/mlp.rb +18 -0
- data/lib/data_frame/model.rb +22 -0
- data/lib/data_frame/parameter_capture.rb +50 -0
- data/lib/data_frame/sbn.rb +18 -0
- data/lib/data_frame/transposable_array.rb +23 -0
- data/lib/ext/array.rb +11 -0
- data/lib/ext/open_struct.rb +5 -0
- data/lib/ext/string.rb +5 -0
- data/lib/ext/symbol.rb +5 -0
- data/spec/data_frame/arff_spec.rb +48 -0
- data/spec/data_frame/callback_array_spec.rb +148 -0
- data/spec/data_frame/core/column_management_spec.rb +128 -0
- data/spec/data_frame/core/filter_spec.rb +88 -0
- data/spec/data_frame/core/import_spec.rb +41 -0
- data/spec/data_frame/core/pre_process_spec.rb +103 -0
- data/spec/data_frame/core/saving_spec.rb +61 -0
- data/spec/data_frame/core/training_spec.rb +72 -0
- data/spec/data_frame/data_frame_spec.rb +141 -0
- data/spec/data_frame/id3_spec.rb +22 -0
- data/spec/data_frame/model_spec.rb +36 -0
- data/spec/data_frame/parameter_capture_spec.rb +32 -0
- data/spec/data_frame/transposable_array_spec.rb +138 -0
- data/spec/data_frame_spec.rb +29 -0
- data/spec/ext/array_spec.rb +13 -0
- data/spec/fixtures/basic.csv +3 -0
- data/spec/fixtures/discrete_testing.csv +4 -0
- data/spec/fixtures/discrete_training.csv +21 -0
- data/spec/spec_helper.rb +8 -0
- metadata +128 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "Filter" do
|
4
|
+
before do
|
5
|
+
@labels = [:these, :are, :the, :labels]
|
6
|
+
@df = DataFrame.new(*@labels)
|
7
|
+
@df.add [1,2,3,4]
|
8
|
+
@df.add [5, 6, 7, 8]
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should be able to filter a data frame with a block using an OpenStruct for each row" do
|
12
|
+
@df.filter!(:open_struct) {|row| row.these == 5}
|
13
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to filter a data frame with a block using a Hash for each row" do
|
17
|
+
@df.filter!(:hash) {|row| row[:these] == 5}
|
18
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
19
|
+
end
|
20
|
+
|
21
|
+
S4 = Struct.new(:one, :two, :three, :four)
|
22
|
+
it "should be able to filter a data frame with a block using another class that uses the row as input" do
|
23
|
+
@df.filter!(S4) {|row| row.one == 5}
|
24
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should be able to filter a data frame with a block using an array for each row" do
|
28
|
+
@df.filter! {|row| row.first == 5}
|
29
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should be able to do fancy things with the row as the filter" do
|
33
|
+
@df.filter! {|row| row.sum > 10}
|
34
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should be able to generate a new data frame with filter" do
|
38
|
+
new_df = @df.filter(:open_struct) {|row| row.these == 5}
|
39
|
+
new_df.items.should eql([[5, 6, 7, 8]])
|
40
|
+
@df.items.should eql([[1, 2, 3, 4], [5, 6, 7, 8]])
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
context "filter_by_category" do
|
46
|
+
|
47
|
+
before do
|
48
|
+
@df = DataFrame.new(:weather, :date)
|
49
|
+
|
50
|
+
(1..31).each do |i|
|
51
|
+
@df.add [(i % 3 == 1) ? :fair : :good, Date.parse("07/#{i}/2009")]
|
52
|
+
end
|
53
|
+
|
54
|
+
@d1 = Date.parse("07/15/2009")
|
55
|
+
@d2 = Date.parse("07/31/2009")
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should be able to filter by category" do
|
60
|
+
filtered = @df.filter_by_category(:weather => :good)
|
61
|
+
filtered.weather.uniq.should eql([:good])
|
62
|
+
@df.weather.uniq.should be_include(:fair)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should be able to manage ranges for filter values" do
|
66
|
+
filtered = @df.filter_by_category(:date => (@d1..@d2))
|
67
|
+
filtered.date.should_not be_include(Date.parse("07/01/2009"))
|
68
|
+
filtered.date.should_not be_include(Date.parse("07/14/2009"))
|
69
|
+
filtered.date.should be_include(Date.parse("07/15/2009"))
|
70
|
+
filtered.date.should be_include(Date.parse("07/31/2009"))
|
71
|
+
@df.date.should be_include(Date.parse("07/01/2009"))
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should be able to take an array of values to filter with" do
|
75
|
+
filtered = @df.filter_by_category(:date => [@d1, @d2])
|
76
|
+
filtered.date.should_not be_include(Date.parse("07/01/2009"))
|
77
|
+
filtered.date.should be_include(Date.parse("07/15/2009"))
|
78
|
+
filtered.date.should be_include(Date.parse("07/31/2009"))
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should have a destructive version" do
|
82
|
+
@df.filter_by_category!(:date => [@d1, @d2])
|
83
|
+
@df.date.should_not be_include(Date.parse("07/01/2009"))
|
84
|
+
@df.date.should be_include(Date.parse("07/15/2009"))
|
85
|
+
@df.date.should be_include(Date.parse("07/31/2009"))
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "Import" do
|
4
|
+
|
5
|
+
before do
|
6
|
+
@labels = [:these, :are, :the, :labels]
|
7
|
+
@df = DataFrame.new(*@labels)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should be able to add an item" do
|
11
|
+
item = [1,2,3,4]
|
12
|
+
@df.add_item(item)
|
13
|
+
@df.items.should eql([item])
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to import more than one row at a time" do
|
17
|
+
@df.import([[2,2,2,2],[3,3,3,3],[4,4,4,4]])
|
18
|
+
@df.row_labels = [:twos, :threes, :fours]
|
19
|
+
@df.twos.should eql([2,2,2,2])
|
20
|
+
@df.threes.should eql([3,3,3,3])
|
21
|
+
@df.fours.should eql([4,4,4,4])
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should be able to import only one row" do
|
25
|
+
@df.import([2,2,2,2])
|
26
|
+
@df.these.should eql([2])
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should be able to import a reference to csv" do
|
30
|
+
contents = %{7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
31
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
32
|
+
}
|
33
|
+
|
34
|
+
@labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
|
35
|
+
@df = DataFrame.new(@labels)
|
36
|
+
@df.import(contents)
|
37
|
+
@df.x.should eql([7,7])
|
38
|
+
@df.area.should eql([0,0])
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "PreProcess" do
|
4
|
+
it "should be able to j_binary_ize! a column, taking its categories and creating a column for each" do
|
5
|
+
df = DataFrame.new(:observations)
|
6
|
+
df.add [:many]
|
7
|
+
df.add [:fine]
|
8
|
+
df.add [:things]
|
9
|
+
df.add [:are]
|
10
|
+
df.add [:available]
|
11
|
+
df.j_binary_ize!(:observations)
|
12
|
+
df.observations_many.should eql([true, false, false, false, false])
|
13
|
+
df.observations_fine.should eql([false, true, false, false, false])
|
14
|
+
df.observations_things.should eql([false, false, true, false, false])
|
15
|
+
df.observations_are.should eql([false, false, false, true, false])
|
16
|
+
df.observations_available.should eql([false, false, false, false, true])
|
17
|
+
df.observations.should eql([:many, :fine, :things, :are, :available])
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should be able to j_binary_ize! a more normal column" do
|
21
|
+
df = DataFrame.new(:observations)
|
22
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
23
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
24
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
25
|
+
df.j_binary_ize!(:observations)
|
26
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
27
|
+
df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
|
31
|
+
df = DataFrame.new(:observations)
|
32
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
33
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
34
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
35
|
+
df.j_binary_ize!(:observations, :allow_overlap => true)
|
36
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
37
|
+
df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
|
41
|
+
df = DataFrame.new(:observations)
|
42
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
43
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
44
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
45
|
+
df.j_binary_ize!(:observations, :allow_overlap => true)
|
46
|
+
df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
|
47
|
+
df.j_binary_ize!(:observations)
|
48
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
49
|
+
df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
|
50
|
+
df.observations.should eql([1,2,3,4,5,4,3,2,1])
|
51
|
+
df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
|
52
|
+
df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
|
53
|
+
end
|
54
|
+
|
55
|
+
context "numericize!" do
|
56
|
+
|
57
|
+
before do
|
58
|
+
@df = DataFrame.new(:observations)
|
59
|
+
@df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
60
|
+
@df.observations.add_category(:small) {|e| e <= 3}
|
61
|
+
@df.observations.add_category(:large) {|e| e > 3}
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should be able to numericize nominal data" do
|
65
|
+
@df.numericize!(:observations)
|
66
|
+
@df.numerical_observations.should eql([[1,0],[1,0],[1,0],[0,1],[0,1],[0,1],[1,0],[1,0],[1,0]])
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
context "categorize!" do
|
72
|
+
before do
|
73
|
+
@df = DataFrame.new(:observations)
|
74
|
+
@df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
75
|
+
@df.observations.add_category(0) {|e| e <= 3}
|
76
|
+
@df.observations.add_category(1) {|e| e > 3}
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should be able to replace a column with its category values" do
|
80
|
+
@df.categorize!(:observations)
|
81
|
+
@df.observations.should eql([0,0,0,1,1,1,0,0,0])
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should be able to replace more than one column at a time" do
|
85
|
+
@df.duplicate!(:observations)
|
86
|
+
@df.observations.add_category(0) {|e| e <= 3}
|
87
|
+
@df.observations.add_category(1) {|e| e > 3}
|
88
|
+
@df.observations1.add_category(:small) {|e| e <= 3}
|
89
|
+
@df.observations1.add_category(:large) {|e| e > 3}
|
90
|
+
@df.categorize!(:observations, :observations1)
|
91
|
+
@df.observations.should eql([0,0,0,1,1,1,0,0,0])
|
92
|
+
@df.observations1.should eql([:small,:small,:small,:large,:large,:large,:small,:small,:small])
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should be able to categorize a column that doesn't have a range_hash setup" do
|
96
|
+
@df = DataFrame.new(:observations)
|
97
|
+
@df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
98
|
+
@df.observations.range_hash.should be_nil
|
99
|
+
lambda{@df.categorize!(:observations)}.should_not raise_error
|
100
|
+
@df.observations.should eql([1,2,3,4,5,4,3,2,1])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "Saving" do
|
4
|
+
before do
|
5
|
+
@df = DataFrame.new(:observations)
|
6
|
+
@df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
7
|
+
@df.observations.add_category(:small) {|e| e <= 3}
|
8
|
+
@df.observations.add_category(:large) {|e| e > 3}
|
9
|
+
@filename = "/tmp/numericized_observations"
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
`rm -rf #{@filename}`
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to save the data frame" do
|
17
|
+
@df.numericize!(:observations)
|
18
|
+
@df.save(@filename)
|
19
|
+
File.read(@filename).should eql(@df.to_csv)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should be able to save the data frame without the header" do
|
23
|
+
@df.save(@filename, :include_header => false)
|
24
|
+
File.read(@filename).should eql(@df.to_csv(false))
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should be able to save off a subset" do
|
28
|
+
@df = DataFrame.new(:observations, :junk)
|
29
|
+
@df.import( [1,2,3,4,5,4,3,2,1].map{ |e| [e,e] } )
|
30
|
+
@df.save(@filename, :subset => :observations)
|
31
|
+
File.read(@filename).should eql(@df.subset_from_columns(:observations).to_csv)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should be able to filter the rows" do
|
35
|
+
@df = DataFrame.new(:observations, :junk)
|
36
|
+
@df.import( [1,2,3,4,5,4,3,2,1].map{ |e| [e,e] } )
|
37
|
+
@df.save(@filename, :subset => :observations)
|
38
|
+
@df.observations.add_category(:small) {|e| e <= 3}
|
39
|
+
@df.observations.add_category(:large) {|e| e > 3}
|
40
|
+
@df.save(@filename, :filter_by_category => {:observations => :small})
|
41
|
+
File.read(@filename).should eql(@df.filter_by_category(:observations => :small).to_csv)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should have a shortcut for subset, only" do
|
45
|
+
@df = DataFrame.new(:observations, :junk)
|
46
|
+
@df.import( [1,2,3,4,5,4,3,2,1].map{ |e| [e,e] } )
|
47
|
+
@df.save(@filename, :only => :observations)
|
48
|
+
File.read(@filename).should eql(@df.subset_from_columns(:observations).to_csv)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should have a shortcut for filter_by_category, filter" do
|
52
|
+
@df = DataFrame.new(:observations, :junk)
|
53
|
+
@df.import( [1,2,3,4,5,4,3,2,1].map{ |e| [e,e] } )
|
54
|
+
@df.save(@filename, :subset => :observations)
|
55
|
+
@df.observations.add_category(:small) {|e| e <= 3}
|
56
|
+
@df.observations.add_category(:large) {|e| e > 3}
|
57
|
+
@df.save(@filename, :filter => {:observations => :small})
|
58
|
+
File.read(@filename).should eql(@df.filter_by_category(:observations => :small).to_csv)
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "Training" do
|
4
|
+
before do
|
5
|
+
@df = DataFrame.new(:one)
|
6
|
+
@df.import((0...100).to_a)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should be able to create a proportional training set from a data frame" do
|
10
|
+
@df.training_set(:n => 3)
|
11
|
+
@df.training_set.size.should eql(3)
|
12
|
+
@df.training_set.all? {|e| @df.items.should be_include(e)}
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should use the same training set unless reset is passed to it" do
|
16
|
+
@df.training_set(:n => 5)
|
17
|
+
@df.training_set.should eql(@df.training_set)
|
18
|
+
old = @df.training_set
|
19
|
+
@df.training_set(:reset => true, :n => 5)
|
20
|
+
@df.training_set.should_not eql(old)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should be able to create a proportional training set" do
|
24
|
+
@df.training_set(:proportion => 0.6)
|
25
|
+
@df.training_set.size.should eql(60)
|
26
|
+
@df.training_set(:proportion => 0.42, :reset => true)
|
27
|
+
@df.training_set.size.should eql(42)
|
28
|
+
@df.training_set(:proportion => 0, :reset => true)
|
29
|
+
@df.training_set.size.should eql(0)
|
30
|
+
@df.training_set(:proportion => 1, :reset => true)
|
31
|
+
@df.training_set.size.should eql(100)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should not have a set size exceeding the items size" do
|
35
|
+
@df.training_set(:proportion => 2)
|
36
|
+
@df.training_set.size.should eql(100)
|
37
|
+
@df.training_set(:n => 200, :reset => true)
|
38
|
+
@df.training_set.size.should eql(100)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should not have any items when the proportion is calculated below 0" do
|
42
|
+
@df.training_set(:proportion => -2)
|
43
|
+
@df.training_set.size.should eql(0)
|
44
|
+
@df.training_set(:n => -2, :reset => true)
|
45
|
+
@df.training_set.size.should eql(0)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should have a default proportion of 80%" do
|
49
|
+
@df.training_set.size.should eql(80)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should offer the test_set, all items except those in the training set" do
|
53
|
+
@df.test_set.should eql(@df.items.exclusive_not(@df.training_set))
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should reset the test set when the training set is reset" do
|
57
|
+
@df.training_set(:n => 2)
|
58
|
+
@df.test_set.size.should eql(98)
|
59
|
+
@df.test_set.size.should eql(98)
|
60
|
+
@df.training_set(:n => 1, :reset => true)
|
61
|
+
@df.test_set.size.should eql(99)
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should not reset the training set when the test set is reset" do
|
65
|
+
hold = @df.training_set.dup
|
66
|
+
@df.training_set.should eql(hold)
|
67
|
+
@df.test_set
|
68
|
+
@df.test_set(:reset => true)
|
69
|
+
@df.training_set.should eql(hold)
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe DataFrame do
|
4
|
+
|
5
|
+
before do
|
6
|
+
@labels = [:these, :are, :the, :labels]
|
7
|
+
@df = DataFrame.new(*@labels)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should initialize with labels" do
|
11
|
+
df = DataFrame.new(*@labels)
|
12
|
+
df.labels.should eql(@labels)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should have an optional name" do
|
16
|
+
@df.name = :some_name
|
17
|
+
@df.name.should eql(:some_name)
|
18
|
+
end
|
19
|
+
it "should initialize with an empty items list" do
|
20
|
+
@df.items.should be_is_a(TransposableArray)
|
21
|
+
@df.items.should be_empty
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should use just_enumerable_stats" do
|
25
|
+
[1,2,3].std.should eql(1)
|
26
|
+
lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
|
27
|
+
end
|
28
|
+
|
29
|
+
context "column and row operations" do
|
30
|
+
before do
|
31
|
+
@df.add_item([1,2,3,4])
|
32
|
+
@df.add_item([5,6,7,8])
|
33
|
+
@df.add_item([9,10,11,12])
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should have a method for every label, the column in the data frame" do
|
37
|
+
@df.these.should eql([1,5,9])
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should make columns easily computable" do
|
41
|
+
@df.these.std.should eql([1,5,9].std)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should defer unknown methods to the items in the data frame" do
|
45
|
+
@df[0].should eql([1,2,3,4])
|
46
|
+
@df << [13,14,15,16]
|
47
|
+
@df.last.should eql([13,14,15,16])
|
48
|
+
@df.map { |e| e.sum }.should eql([10,26,42,58])
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should allow optional row labels" do
|
52
|
+
@df.row_labels.should eql([])
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should have a setter for row labels" do
|
56
|
+
@df.row_labels = [:other, :things, :here]
|
57
|
+
@df.row_labels.should eql([:other, :things, :here])
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should be able to access rows by their labels" do
|
61
|
+
@df.row_labels = [:other, :things, :here]
|
62
|
+
@df.here.should eql([9,10,11,12])
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should make rows easily computable" do
|
66
|
+
@df.row_labels = [:other, :things, :here]
|
67
|
+
@df.here.sum.should eql(42)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should be able to initialize from an array" do
|
72
|
+
contents = %{7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
73
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
74
|
+
}
|
75
|
+
|
76
|
+
@labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
|
77
|
+
@df = DataFrame.new(@labels)
|
78
|
+
@df.import(contents)
|
79
|
+
@df.labels.should eql(@labels)
|
80
|
+
end
|
81
|
+
|
82
|
+
context "csv" do
|
83
|
+
it "should compute easily from csv" do
|
84
|
+
contents = %{X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
|
85
|
+
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
86
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
87
|
+
}
|
88
|
+
labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
|
89
|
+
|
90
|
+
@df = DataFrame.from_csv(contents)
|
91
|
+
@df.labels.should eql(labels)
|
92
|
+
@df.x.should eql([7,7])
|
93
|
+
@df.area.should eql([0,0])
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should infer a name when importing from a file" do
|
97
|
+
filename = "/tmp/data_frame_spec.csv"
|
98
|
+
contents = %{X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
|
99
|
+
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
100
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
101
|
+
}
|
102
|
+
File.open(filename, 'w') {|f| f.write contents}
|
103
|
+
@df = DataFrame.from_csv(filename)
|
104
|
+
@df.name.should eql('Data Frame Spec')
|
105
|
+
`rm -rf #{filename}`
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should offer a hash-like structure of columns" do
|
110
|
+
@df.add [1,2,3,4]
|
111
|
+
@df.add [5, 6, 7, 8]
|
112
|
+
@df.columns[:these].should eql([1, 5])
|
113
|
+
@df.columns[:are].should eql([2, 6])
|
114
|
+
@df.columns[:the].should eql([3, 7])
|
115
|
+
@df.columns[:labels].should eql([4, 8])
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should alias items with rows" do
|
119
|
+
@df.add [1,2,3,4]
|
120
|
+
@df.add [5, 6, 7, 8]
|
121
|
+
@df.rows.should eql(@df.items)
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should be able to export a hash" do
|
125
|
+
@df.add [1,2,3,4]
|
126
|
+
@df.add [5, 6, 7, 8]
|
127
|
+
hash = @df.to_hash
|
128
|
+
values = [[1,5],[2,6],[3,7],[4,8]]
|
129
|
+
hash.keys.size.should eql(@labels.size)
|
130
|
+
hash.keys.all? {|e| @labels.should be_include(e)}
|
131
|
+
hash.values.size.should eql(@labels.size)
|
132
|
+
hash.values.all? {|e| values.should be_include(e)}
|
133
|
+
end
|
134
|
+
|
135
|
+
it "should use variables like labels" do
|
136
|
+
@df.labels.should eql(@labels)
|
137
|
+
@df.variables.should eql(@labels)
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
end
|