davidrichards-data_frame 0.0.18 → 0.0.19
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +16 -0
- data/VERSION.yml +1 -1
- data/bin/plain_frame +22 -0
- data/lib/data_frame.rb +2 -1
- data/lib/data_frame/arff.rb +43 -36
- data/lib/data_frame/core/column_management.rb +102 -0
- data/lib/data_frame/core/filter.rb +48 -0
- data/lib/data_frame/core/import.rb +112 -0
- data/lib/data_frame/core/pre_process.rb +61 -0
- data/lib/data_frame/core/saving.rb +29 -0
- data/lib/data_frame/core/training.rb +36 -0
- data/lib/data_frame/data_frame.rb +37 -241
- data/lib/data_frame/id3.rb +28 -0
- data/lib/data_frame/kmeans.rb +10 -0
- data/lib/data_frame/labels_from_uci.rb +48 -0
- data/lib/data_frame/mlp.rb +18 -0
- data/lib/data_frame/sbn.rb +18 -0
- data/lib/data_frame/transposable_array.rb +1 -1
- data/lib/ext/array.rb +11 -0
- data/spec/data_frame/arff_spec.rb +1 -0
- data/spec/data_frame/core/column_management_spec.rb +97 -0
- data/spec/data_frame/core/filter_spec.rb +88 -0
- data/spec/data_frame/core/import_spec.rb +41 -0
- data/spec/data_frame/core/pre_process_spec.rb +71 -0
- data/spec/data_frame/core/saving_spec.rb +61 -0
- data/spec/data_frame/core/training_spec.rb +51 -0
- data/spec/data_frame/data_frame_spec.rb +10 -226
- data/spec/data_frame/id3_spec.rb +22 -0
- data/spec/ext/array_spec.rb +13 -0
- data/spec/fixtures/discrete_testing.csv +4 -0
- data/spec/fixtures/discrete_training.csv +21 -0
- metadata +33 -6
@@ -0,0 +1,18 @@
|
|
1
|
+
module DF #:nodoc:
|
2
|
+
# Turns Data Frame into a feeder for Red Davis' MLP classifier.
|
3
|
+
# Will install it if you don't have it.
|
4
|
+
module MLP
|
5
|
+
begin
|
6
|
+
gem 'reddavis-mlp'
|
7
|
+
require 'mlp'
|
8
|
+
rescue
|
9
|
+
`sudo gem install reddavis-mlp`
|
10
|
+
gem 'reddavis-mlp'
|
11
|
+
require 'mlp'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class DataFrame
|
17
|
+
include DF::MLP
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module DF #:nodoc:
|
2
|
+
# Turns Data Frame into a feeder for Carl Youngblood's Simple Bayesian classifier.
|
3
|
+
# Will install it if you don't have it.
|
4
|
+
module SBN
|
5
|
+
begin
|
6
|
+
gem 'sbn'
|
7
|
+
require 'sbn'
|
8
|
+
rescue
|
9
|
+
`sudo gem install sbn`
|
10
|
+
gem 'sbn'
|
11
|
+
require 'sbn'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class DataFrame
|
17
|
+
include DF::SBN
|
18
|
+
end
|
data/lib/ext/array.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
class Array
|
2
|
+
# Defines the number of dimensions:
|
3
|
+
# [1,2,3] is 1-dimensional
|
4
|
+
# [[1,2,3], [1,2,3]] is 2-dimensional
|
5
|
+
# [[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3], [[1,2,3], [1,2,3]]]] is 3-dimensional
|
6
|
+
# So [[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3], [[1,2,3], [1,2,3]]]].dimensions == 3
|
7
|
+
def dimensions(n=0)
|
8
|
+
n += 1
|
9
|
+
self.first.is_a?(Array) ? self.first.dimensions(n) : n
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "Column Management" do
|
4
|
+
before do
|
5
|
+
@labels = [:these, :are, :the, :labels]
|
6
|
+
@df = DataFrame.new(*@labels)
|
7
|
+
@df.add [1,2,3,4]
|
8
|
+
@df.add [5, 6, 7, 8]
|
9
|
+
end
|
10
|
+
|
11
|
+
context "append!" do
|
12
|
+
it "should be able to append an array of values to the data frame" do
|
13
|
+
@df.append!(:new_column, [5,5])
|
14
|
+
@df.new_column.should eql([5,5])
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should be able to append a default value to the data frame" do
|
18
|
+
@df.append!(:new_column, :value)
|
19
|
+
@df.new_column.should eql([:value, :value])
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should use nil as the default value" do
|
23
|
+
@df.append!(:new_column)
|
24
|
+
@df.new_column.should eql([nil, nil])
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "move_to_last!" do
|
29
|
+
it "should be able to move a column to the end of the data frame, useful for dependency models" do
|
30
|
+
@df.labels.should eql(@labels)
|
31
|
+
@df.move_to_last!(:these)
|
32
|
+
@df.labels.should eql([:are, :the, :labels, :these])
|
33
|
+
@df.these.should eql([1,5])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context "rename!" do
|
38
|
+
it "should be able to rename a column" do
|
39
|
+
@df.rename!(:new_name, :these)
|
40
|
+
@df.labels.should eql([:new_name, :are, :the, :labels])
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context "drop!" do
|
45
|
+
it "should be able to remove a column" do
|
46
|
+
@df = DataFrame.new :twos, :threes, :fours
|
47
|
+
@df.import([[2,3,4], [2,3,4], [2,3,4], [2,3,4]])
|
48
|
+
@df.drop!(:twos)
|
49
|
+
@df.items.all? {|i| i.should eql([3,4])}
|
50
|
+
@df.labels.should eql([:threes, :fours])
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should be able to remove more than one column at a time" do
|
54
|
+
@df = DataFrame.new :twos, :threes, :fours
|
55
|
+
@df.import([[2,3,4], [2,3,4], [2,3,4], [2,3,4]])
|
56
|
+
@df.drop!(:twos, :fours)
|
57
|
+
@df.items.all? {|i| i.should eql([3])}
|
58
|
+
@df.labels.should eql([:threes])
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
context "replace!" do
|
64
|
+
before do
|
65
|
+
@doubler = lambda{|e| e * 2}
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should only replace columns that actually exist" do
|
69
|
+
lambda{@df.replace!(:not_a_column, &@doubler)}.should raise_error(
|
70
|
+
ArgumentError, /Must provide the name of an existing column./)
|
71
|
+
lambda{@df.replace!(:these, &@doubler)}.should_not raise_error
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should be able to replace a column with a block" do
|
75
|
+
@df.replace!(:these) {|e| e * 2}
|
76
|
+
@df.these.should eql([2,10])
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should be able to replace a column with an array" do
|
80
|
+
@a = [5,9]
|
81
|
+
@df.replace!(:these, @a)
|
82
|
+
@df.these.should eql(@a)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
context "subset_from_columns" do
|
87
|
+
|
88
|
+
it "should be able to create a subset of columns" do
|
89
|
+
new_data_frame = @df.subset_from_columns(:these, :labels)
|
90
|
+
new_data_frame.should_not eql(@df)
|
91
|
+
new_data_frame.labels.should eql([:these, :labels])
|
92
|
+
new_data_frame.items.should eql([[1,4],[5,8]])
|
93
|
+
new_data_frame.these.should eql([1,5])
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "Filter" do
|
4
|
+
before do
|
5
|
+
@labels = [:these, :are, :the, :labels]
|
6
|
+
@df = DataFrame.new(*@labels)
|
7
|
+
@df.add [1,2,3,4]
|
8
|
+
@df.add [5, 6, 7, 8]
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should be able to filter a data frame with a block using an OpenStruct for each row" do
|
12
|
+
@df.filter!(:open_struct) {|row| row.these == 5}
|
13
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to filter a data frame with a block using a Hash for each row" do
|
17
|
+
@df.filter!(:hash) {|row| row[:these] == 5}
|
18
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
19
|
+
end
|
20
|
+
|
21
|
+
S4 = Struct.new(:one, :two, :three, :four)
|
22
|
+
it "should be able to filter a data frame with a block using another class that uses the row as input" do
|
23
|
+
@df.filter!(S4) {|row| row.one == 5}
|
24
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should be able to filter a data frame with a block using an array for each row" do
|
28
|
+
@df.filter! {|row| row.first == 5}
|
29
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should be able to do fancy things with the row as the filter" do
|
33
|
+
@df.filter! {|row| row.sum > 10}
|
34
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should be able to generate a new data frame with filter" do
|
38
|
+
new_df = @df.filter(:open_struct) {|row| row.these == 5}
|
39
|
+
new_df.items.should eql([[5, 6, 7, 8]])
|
40
|
+
@df.items.should eql([[1, 2, 3, 4], [5, 6, 7, 8]])
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
context "filter_by_category" do
|
46
|
+
|
47
|
+
before do
|
48
|
+
@df = DataFrame.new(:weather, :date)
|
49
|
+
|
50
|
+
(1..31).each do |i|
|
51
|
+
@df.add [(i % 3 == 1) ? :fair : :good, Date.parse("07/#{i}/2009")]
|
52
|
+
end
|
53
|
+
|
54
|
+
@d1 = Date.parse("07/15/2009")
|
55
|
+
@d2 = Date.parse("07/31/2009")
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should be able to filter by category" do
|
60
|
+
filtered = @df.filter_by_category(:weather => :good)
|
61
|
+
filtered.weather.uniq.should eql([:good])
|
62
|
+
@df.weather.uniq.should be_include(:fair)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should be able to manage ranges for filter values" do
|
66
|
+
filtered = @df.filter_by_category(:date => (@d1..@d2))
|
67
|
+
filtered.date.should_not be_include(Date.parse("07/01/2009"))
|
68
|
+
filtered.date.should_not be_include(Date.parse("07/14/2009"))
|
69
|
+
filtered.date.should be_include(Date.parse("07/15/2009"))
|
70
|
+
filtered.date.should be_include(Date.parse("07/31/2009"))
|
71
|
+
@df.date.should be_include(Date.parse("07/01/2009"))
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should be able to take an array of values to filter with" do
|
75
|
+
filtered = @df.filter_by_category(:date => [@d1, @d2])
|
76
|
+
filtered.date.should_not be_include(Date.parse("07/01/2009"))
|
77
|
+
filtered.date.should be_include(Date.parse("07/15/2009"))
|
78
|
+
filtered.date.should be_include(Date.parse("07/31/2009"))
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should have a destructive version" do
|
82
|
+
@df.filter_by_category!(:date => [@d1, @d2])
|
83
|
+
@df.date.should_not be_include(Date.parse("07/01/2009"))
|
84
|
+
@df.date.should be_include(Date.parse("07/15/2009"))
|
85
|
+
@df.date.should be_include(Date.parse("07/31/2009"))
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "Import" do
|
4
|
+
|
5
|
+
before do
|
6
|
+
@labels = [:these, :are, :the, :labels]
|
7
|
+
@df = DataFrame.new(*@labels)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should be able to add an item" do
|
11
|
+
item = [1,2,3,4]
|
12
|
+
@df.add_item(item)
|
13
|
+
@df.items.should eql([item])
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to import more than one row at a time" do
|
17
|
+
@df.import([[2,2,2,2],[3,3,3,3],[4,4,4,4]])
|
18
|
+
@df.row_labels = [:twos, :threes, :fours]
|
19
|
+
@df.twos.should eql([2,2,2,2])
|
20
|
+
@df.threes.should eql([3,3,3,3])
|
21
|
+
@df.fours.should eql([4,4,4,4])
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should be able to import only one row" do
|
25
|
+
@df.import([2,2,2,2])
|
26
|
+
@df.these.should eql([2])
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should be able to import a reference to csv" do
|
30
|
+
contents = %{7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
|
31
|
+
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
|
32
|
+
}
|
33
|
+
|
34
|
+
@labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
|
35
|
+
@df = DataFrame.new(@labels)
|
36
|
+
@df.import(contents)
|
37
|
+
@df.x.should eql([7,7])
|
38
|
+
@df.area.should eql([0,0])
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "PreProcess" do
|
4
|
+
it "should be able to j_binary_ize! a column, taking its categories and creating a column for each" do
|
5
|
+
df = DataFrame.new(:observations)
|
6
|
+
df.add [:many]
|
7
|
+
df.add [:fine]
|
8
|
+
df.add [:things]
|
9
|
+
df.add [:are]
|
10
|
+
df.add [:available]
|
11
|
+
df.j_binary_ize!(:observations)
|
12
|
+
df.observations_many.should eql([true, false, false, false, false])
|
13
|
+
df.observations_fine.should eql([false, true, false, false, false])
|
14
|
+
df.observations_things.should eql([false, false, true, false, false])
|
15
|
+
df.observations_are.should eql([false, false, false, true, false])
|
16
|
+
df.observations_available.should eql([false, false, false, false, true])
|
17
|
+
df.observations.should eql([:many, :fine, :things, :are, :available])
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should be able to j_binary_ize! a more normal column" do
|
21
|
+
df = DataFrame.new(:observations)
|
22
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
23
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
24
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
25
|
+
df.j_binary_ize!(:observations)
|
26
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
27
|
+
df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
|
31
|
+
df = DataFrame.new(:observations)
|
32
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
33
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
34
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
35
|
+
df.j_binary_ize!(:observations, :allow_overlap => true)
|
36
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
37
|
+
df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
|
41
|
+
df = DataFrame.new(:observations)
|
42
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
43
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
44
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
45
|
+
df.j_binary_ize!(:observations, :allow_overlap => true)
|
46
|
+
df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
|
47
|
+
df.j_binary_ize!(:observations)
|
48
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
49
|
+
df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
|
50
|
+
df.observations.should eql([1,2,3,4,5,4,3,2,1])
|
51
|
+
df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
|
52
|
+
df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
|
53
|
+
end
|
54
|
+
|
55
|
+
context "numericize!" do
|
56
|
+
|
57
|
+
before do
|
58
|
+
@df = DataFrame.new(:observations)
|
59
|
+
@df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
60
|
+
@df.observations.add_category(:small) {|e| e <= 3}
|
61
|
+
@df.observations.add_category(:large) {|e| e > 3}
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should be able to numericize nominal data" do
|
65
|
+
@df.numericize!(:observations)
|
66
|
+
@df.numerical_observations.should eql([[1,0],[1,0],[1,0],[0,1],[0,1],[0,1],[1,0],[1,0],[1,0]])
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "Saving" do
|
4
|
+
before do
|
5
|
+
@df = DataFrame.new(:observations)
|
6
|
+
@df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
7
|
+
@df.observations.add_category(:small) {|e| e <= 3}
|
8
|
+
@df.observations.add_category(:large) {|e| e > 3}
|
9
|
+
@filename = "/tmp/numericized_observations"
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
`rm -rf #{@filename}`
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to save the data frame" do
|
17
|
+
@df.numericize!(:observations)
|
18
|
+
@df.save(@filename)
|
19
|
+
File.read(@filename).should eql(@df.to_csv)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should be able to save the data frame without the header" do
|
23
|
+
@df.save(@filename, :include_header => false)
|
24
|
+
File.read(@filename).should eql(@df.to_csv(false))
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should be able to save off a subset" do
|
28
|
+
@df = DataFrame.new(:observations, :junk)
|
29
|
+
@df.import( [1,2,3,4,5,4,3,2,1].map{ |e| [e,e] } )
|
30
|
+
@df.save(@filename, :subset => :observations)
|
31
|
+
File.read(@filename).should eql(@df.subset_from_columns(:observations).to_csv)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should be able to filter the rows" do
|
35
|
+
@df = DataFrame.new(:observations, :junk)
|
36
|
+
@df.import( [1,2,3,4,5,4,3,2,1].map{ |e| [e,e] } )
|
37
|
+
@df.save(@filename, :subset => :observations)
|
38
|
+
@df.observations.add_category(:small) {|e| e <= 3}
|
39
|
+
@df.observations.add_category(:large) {|e| e > 3}
|
40
|
+
@df.save(@filename, :filter_by_category => {:observations => :small})
|
41
|
+
File.read(@filename).should eql(@df.filter_by_category(:observations => :small).to_csv)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should have a shortcut for subset, only" do
|
45
|
+
@df = DataFrame.new(:observations, :junk)
|
46
|
+
@df.import( [1,2,3,4,5,4,3,2,1].map{ |e| [e,e] } )
|
47
|
+
@df.save(@filename, :only => :observations)
|
48
|
+
File.read(@filename).should eql(@df.subset_from_columns(:observations).to_csv)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should have a shortcut for filter_by_category, filter" do
|
52
|
+
@df = DataFrame.new(:observations, :junk)
|
53
|
+
@df.import( [1,2,3,4,5,4,3,2,1].map{ |e| [e,e] } )
|
54
|
+
@df.save(@filename, :subset => :observations)
|
55
|
+
@df.observations.add_category(:small) {|e| e <= 3}
|
56
|
+
@df.observations.add_category(:large) {|e| e > 3}
|
57
|
+
@df.save(@filename, :filter => {:observations => :small})
|
58
|
+
File.read(@filename).should eql(@df.filter_by_category(:observations => :small).to_csv)
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../../spec_helper")
|
2
|
+
|
3
|
+
describe "Training" do
|
4
|
+
before do
|
5
|
+
@df = DataFrame.new(:one)
|
6
|
+
@df.import((0...100).to_a)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should be able to create a proportional training set from a data frame" do
|
10
|
+
@df.training_set(:n => 3)
|
11
|
+
@df.training_set.size.should eql(3)
|
12
|
+
@df.training_set.all? {|e| @df.items.should be_include(e)}
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should use the same training set unless reset is passed to it" do
|
16
|
+
@df.training_set(:n => 5)
|
17
|
+
@df.training_set.should eql(@df.training_set)
|
18
|
+
old = @df.training_set
|
19
|
+
@df.training_set(:reset => true, :n => 5)
|
20
|
+
@df.training_set.should_not eql(old)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should be able to create a proportional training set" do
|
24
|
+
@df.training_set(:proportion => 0.6)
|
25
|
+
@df.training_set.size.should eql(60)
|
26
|
+
@df.training_set(:proportion => 0.42, :reset => true)
|
27
|
+
@df.training_set.size.should eql(42)
|
28
|
+
@df.training_set(:proportion => 0, :reset => true)
|
29
|
+
@df.training_set.size.should eql(0)
|
30
|
+
@df.training_set(:proportion => 1, :reset => true)
|
31
|
+
@df.training_set.size.should eql(100)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should not have a set size exceeding the items size" do
|
35
|
+
@df.training_set(:proportion => 2)
|
36
|
+
@df.training_set.size.should eql(100)
|
37
|
+
@df.training_set(:n => 200, :reset => true)
|
38
|
+
@df.training_set.size.should eql(100)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should not have any items when the proportion is calculated below 0" do
|
42
|
+
@df.training_set(:proportion => -2)
|
43
|
+
@df.training_set.size.should eql(0)
|
44
|
+
@df.training_set(:n => -2, :reset => true)
|
45
|
+
@df.training_set.size.should eql(0)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should have a default proportion of 80%" do
|
49
|
+
@df.training_set.size.should eql(80)
|
50
|
+
end
|
51
|
+
end
|