data_frame 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +122 -0
- data/VERSION.yml +4 -0
- data/bin/plain_frame +22 -0
- data/lib/data_frame.rb +26 -0
- data/lib/data_frame/arff.rb +52 -0
- data/lib/data_frame/callback_array.rb +152 -0
- data/lib/data_frame/core/column_management.rb +147 -0
- data/lib/data_frame/core/filter.rb +48 -0
- data/lib/data_frame/core/import.rb +113 -0
- data/lib/data_frame/core/pre_process.rb +69 -0
- data/lib/data_frame/core/saving.rb +29 -0
- data/lib/data_frame/core/training.rb +46 -0
- data/lib/data_frame/data_frame.rb +115 -0
- data/lib/data_frame/id3.rb +28 -0
- data/lib/data_frame/kmeans.rb +10 -0
- data/lib/data_frame/labels_from_uci.rb +48 -0
- data/lib/data_frame/mlp.rb +18 -0
- data/lib/data_frame/model.rb +22 -0
- data/lib/data_frame/parameter_capture.rb +50 -0
- data/lib/data_frame/sbn.rb +18 -0
- data/lib/data_frame/transposable_array.rb +23 -0
- data/lib/ext/array.rb +11 -0
- data/lib/ext/open_struct.rb +5 -0
- data/lib/ext/string.rb +5 -0
- data/lib/ext/symbol.rb +5 -0
- data/spec/data_frame/arff_spec.rb +48 -0
- data/spec/data_frame/callback_array_spec.rb +148 -0
- data/spec/data_frame/core/column_management_spec.rb +128 -0
- data/spec/data_frame/core/filter_spec.rb +88 -0
- data/spec/data_frame/core/import_spec.rb +41 -0
- data/spec/data_frame/core/pre_process_spec.rb +103 -0
- data/spec/data_frame/core/saving_spec.rb +61 -0
- data/spec/data_frame/core/training_spec.rb +72 -0
- data/spec/data_frame/data_frame_spec.rb +141 -0
- data/spec/data_frame/id3_spec.rb +22 -0
- data/spec/data_frame/model_spec.rb +36 -0
- data/spec/data_frame/parameter_capture_spec.rb +32 -0
- data/spec/data_frame/transposable_array_spec.rb +138 -0
- data/spec/data_frame_spec.rb +29 -0
- data/spec/ext/array_spec.rb +13 -0
- data/spec/fixtures/basic.csv +3 -0
- data/spec/fixtures/discrete_testing.csv +4 -0
- data/spec/fixtures/discrete_training.csv +21 -0
- data/spec/spec_helper.rb +8 -0
- metadata +128 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
require 'data_frame/id3'
|
3
|
+
|
4
|
+
describe "DecisionTree" do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@filename = File.expand_path(File.join(File.dirname(__FILE__), "../fixtures/discrete_training.csv"))
|
8
|
+
@df = DataFrame.from_csv(@filename)
|
9
|
+
@test_data = File.read(@filename)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should require the decisiontree gem" do
|
13
|
+
defined?(DecisionTree::ID3Tree).should eql('constant')
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to create a decision tree from a data frame" do
|
17
|
+
# Come back to this.
|
18
|
+
# @df.create_id3(:purchase)
|
19
|
+
# @df.id3.train
|
20
|
+
# @df.id3.predict(["36 - 55", "masters", "high", "single", 1]).should eql(1)
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe DataFrame, "model" do
|
4
|
+
before do
|
5
|
+
@csv = %{a,b,c
|
6
|
+
1,2,3
|
7
|
+
2,2,2
|
8
|
+
4,5,6}
|
9
|
+
@df = DataFrame.from_csv(@csv)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should be able to define a model with a block" do
|
13
|
+
@df.model(:b2) do |m|
|
14
|
+
m.b 2
|
15
|
+
end
|
16
|
+
|
17
|
+
@df.models.table.keys.should eql([:b2])
|
18
|
+
@df.models.b2.size.should eql(2)
|
19
|
+
@df.models.b2.b.should eql([2,2])
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should be able to define a model with a range of values" do
|
23
|
+
@df.model(:a12) do |m|
|
24
|
+
m.a(1..2)
|
25
|
+
end
|
26
|
+
@df.models.a12.a.should eql([1,2])
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should be able to define a model with a set of values" do
|
30
|
+
@df.model(:a14) do |m|
|
31
|
+
m.a [1,4]
|
32
|
+
end
|
33
|
+
@df.models.a14.a.should eql([1,4])
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe ParameterCapture do
|
4
|
+
|
5
|
+
it "should take a block for a column list" do
|
6
|
+
pc = ParameterCapture.new do |p|
|
7
|
+
p.a 1
|
8
|
+
p.b 2
|
9
|
+
end
|
10
|
+
pc.parameters.table.should == {:a => 1, :b => 2}
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should be able to capture an array as a parameter, meaning an or-condition" do
|
14
|
+
pc = ParameterCapture.new do |p|
|
15
|
+
p.or_condition [1,2]
|
16
|
+
end
|
17
|
+
pc.parameters.or_condition.should eql([1,2])
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should be able to capture a range as a parameter, meaning a continuous-or-condition" do
|
21
|
+
pc = ParameterCapture.new do |p|
|
22
|
+
p.a(1..2)
|
23
|
+
end
|
24
|
+
pc.parameters.a.should eql((1..2))
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should play setter/getter schizophrenia" do
|
28
|
+
pc = ParameterCapture.new {}
|
29
|
+
pc.show 1
|
30
|
+
pc.show.should eql(1)
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe TransposableArray do
|
4
|
+
before do
|
5
|
+
@ta = TransposableArray.new [[1,2,3],[4,5,6],[7,8,9]]
|
6
|
+
@t = [[1,4,7],[2,5,8],[3,6,9]]
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should be able to transpose itself" do
|
10
|
+
@ta.transpose.should eql(@t)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should cache the transpose" do
|
14
|
+
@ta.cache.should be_nil
|
15
|
+
@ta.transpose
|
16
|
+
@ta.cache.should eql(@t)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should clear the cache on taint" do
|
20
|
+
@count = nil
|
21
|
+
@ta.transpose
|
22
|
+
@ta.taint
|
23
|
+
@ta.cache.should be_nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should clear the cache on []=" do
|
27
|
+
@ta.transpose
|
28
|
+
@ta[0] = 1
|
29
|
+
@ta.cache.should be_nil
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should clear the cache on <<" do
|
33
|
+
@ta.transpose
|
34
|
+
@ta << 1
|
35
|
+
@ta.cache.should be_nil
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should clear the cache on delete" do
|
39
|
+
@ta.transpose
|
40
|
+
@ta.delete(0)
|
41
|
+
@ta.cache.should be_nil
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should clear the cache on push" do
|
45
|
+
@ta.transpose
|
46
|
+
@ta.push(1)
|
47
|
+
@ta.cache.should be_nil
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should clear the cache on pop" do
|
51
|
+
@ta.transpose
|
52
|
+
@ta.pop
|
53
|
+
@ta.cache.should be_nil
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should clear the cache on shift" do
|
57
|
+
@ta.transpose
|
58
|
+
@ta.shift
|
59
|
+
@ta.cache.should be_nil
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should clear the cache on unshift" do
|
63
|
+
@ta.transpose
|
64
|
+
@ta.unshift(1)
|
65
|
+
@ta.cache.should be_nil
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should clear the cache on map!" do
|
69
|
+
@ta.transpose
|
70
|
+
@ta.map!{ |e| e }
|
71
|
+
@ta.cache.should be_nil
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should clear the cache on sort!" do
|
75
|
+
@ta.transpose
|
76
|
+
@ta.sort!
|
77
|
+
@ta.cache.should be_nil
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should clear the cache on reverse!" do
|
81
|
+
@ta.transpose
|
82
|
+
@ta.reverse!
|
83
|
+
@ta.cache.should be_nil
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should clear the cache on collect!" do
|
87
|
+
@ta.transpose
|
88
|
+
@ta.collect! {|e| e}
|
89
|
+
@ta.cache.should be_nil
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should clear the cache on compact!" do
|
93
|
+
@ta.transpose
|
94
|
+
@ta.compact!
|
95
|
+
@ta.cache.should be_nil
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should clear the cache on reject!" do
|
99
|
+
@ta.transpose
|
100
|
+
@ta.reject! {|e| e}
|
101
|
+
@ta.cache.should be_nil
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should clear the cache on slice!" do
|
105
|
+
@ta.transpose
|
106
|
+
@ta.slice!(1,2)
|
107
|
+
@ta.cache.should be_nil
|
108
|
+
end
|
109
|
+
|
110
|
+
it "should clear the cache on flatten!" do
|
111
|
+
@ta.transpose
|
112
|
+
@ta.flatten!
|
113
|
+
@ta.cache.should be_nil
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should clear the cache on uniq!" do
|
117
|
+
@ta.transpose
|
118
|
+
@ta.uniq!
|
119
|
+
@ta.cache.should be_nil
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should clear the cache on clear" do
|
123
|
+
@ta.transpose
|
124
|
+
@ta.clear
|
125
|
+
@ta.cache.should be_nil
|
126
|
+
end
|
127
|
+
|
128
|
+
it "should not adjust the array in other methods" do
|
129
|
+
@ta.transpose
|
130
|
+
@ta.at(0)
|
131
|
+
@ta.sort
|
132
|
+
@ta.uniq
|
133
|
+
@ta.find{|e| e}
|
134
|
+
@ta.cache.should eql(@t)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/spec_helper")
|
2
|
+
|
3
|
+
describe DataFrame, "gem dependencies" do
|
4
|
+
|
5
|
+
it "should use RubyGems" do
|
6
|
+
defined?(Gem).should eql('constant')
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should use ActiveSupport" do
|
10
|
+
defined?(ActiveSupport).should eql('constant')
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should use JustEnumerableStats" do
|
14
|
+
[1]._jes_average.should eql(1)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should use OpenURI" do
|
18
|
+
defined?(OpenURI).should eql('constant')
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should use FasterCSV" do
|
22
|
+
defined?(FasterCSV).should eql('constant')
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should use OpenStruct" do
|
26
|
+
defined?(OpenStruct).should eql('constant')
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe Array do
|
4
|
+
it "should be able to determine its dimensions" do
|
5
|
+
[1,2,3].dimensions.should eql(1)
|
6
|
+
[[1,2,3], [1,2,3]].dimensions.should eql(2)
|
7
|
+
[[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3], [[1,2,3], [1,2,3]]]].dimensions.should eql(3)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should depend on the first element to determine dimensions" do
|
11
|
+
[1, [1,2]].dimensions.should eql(1)
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Age,Education,Income,Marital Status,Purchase
|
2
|
+
36 - 55,masters,high,single,will buy
|
3
|
+
18 - 35,high school,low,single,won't buy
|
4
|
+
36 - 55,masters,low,single,will buy
|
5
|
+
18 - 35,bachelors,high,single,won't buy
|
6
|
+
< 18,high school,low,single,will buy
|
7
|
+
18 - 35,bachelors,high,married,won't buy
|
8
|
+
36 - 55,bachelors,low,married,won't buy
|
9
|
+
> 55,bachelors,high,single,will buy
|
10
|
+
36 - 55,masters,low,married,won't buy
|
11
|
+
> 55,masters,low,married,will buy
|
12
|
+
36 - 55,masters,high,single,will buy
|
13
|
+
> 55,masters,high,single,will buy
|
14
|
+
< 18,high school,high,single,won't buy
|
15
|
+
36 - 55,masters,low,single,will buy
|
16
|
+
36 - 55,high school,low,single,will buy
|
17
|
+
< 18,high school,low,married,will buy
|
18
|
+
18 - 35,bachelors,high,married,won't buy
|
19
|
+
> 55,high school,high,married,will buy
|
20
|
+
> 55,bachelors,low,single,will buy
|
21
|
+
36 - 55,high school,high,married,won't buy
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: data_frame
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.8
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Richards
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-15 00:00:00 -06:00
|
13
|
+
default_executable: plain_frame
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: activesupport
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: davidrichards-just_enumerable_stats
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: fastercsv
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
description: Data Frames with memoized transpose
|
46
|
+
email: davidlamontrichards@gmail.com
|
47
|
+
executables:
|
48
|
+
- plain_frame
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- README.rdoc
|
55
|
+
- VERSION.yml
|
56
|
+
- bin/plain_frame
|
57
|
+
- lib/data_frame/arff.rb
|
58
|
+
- lib/data_frame/callback_array.rb
|
59
|
+
- lib/data_frame/core/column_management.rb
|
60
|
+
- lib/data_frame/core/filter.rb
|
61
|
+
- lib/data_frame/core/import.rb
|
62
|
+
- lib/data_frame/core/pre_process.rb
|
63
|
+
- lib/data_frame/core/saving.rb
|
64
|
+
- lib/data_frame/core/training.rb
|
65
|
+
- lib/data_frame/data_frame.rb
|
66
|
+
- lib/data_frame/id3.rb
|
67
|
+
- lib/data_frame/kmeans.rb
|
68
|
+
- lib/data_frame/labels_from_uci.rb
|
69
|
+
- lib/data_frame/mlp.rb
|
70
|
+
- lib/data_frame/model.rb
|
71
|
+
- lib/data_frame/parameter_capture.rb
|
72
|
+
- lib/data_frame/sbn.rb
|
73
|
+
- lib/data_frame/transposable_array.rb
|
74
|
+
- lib/data_frame.rb
|
75
|
+
- lib/ext/array.rb
|
76
|
+
- lib/ext/open_struct.rb
|
77
|
+
- lib/ext/string.rb
|
78
|
+
- lib/ext/symbol.rb
|
79
|
+
- spec/data_frame/arff_spec.rb
|
80
|
+
- spec/data_frame/callback_array_spec.rb
|
81
|
+
- spec/data_frame/core/column_management_spec.rb
|
82
|
+
- spec/data_frame/core/filter_spec.rb
|
83
|
+
- spec/data_frame/core/import_spec.rb
|
84
|
+
- spec/data_frame/core/pre_process_spec.rb
|
85
|
+
- spec/data_frame/core/saving_spec.rb
|
86
|
+
- spec/data_frame/core/training_spec.rb
|
87
|
+
- spec/data_frame/data_frame_spec.rb
|
88
|
+
- spec/data_frame/id3_spec.rb
|
89
|
+
- spec/data_frame/model_spec.rb
|
90
|
+
- spec/data_frame/parameter_capture_spec.rb
|
91
|
+
- spec/data_frame/transposable_array_spec.rb
|
92
|
+
- spec/data_frame_spec.rb
|
93
|
+
- spec/ext/array_spec.rb
|
94
|
+
- spec/fixtures/basic.csv
|
95
|
+
- spec/fixtures/discrete_testing.csv
|
96
|
+
- spec/fixtures/discrete_training.csv
|
97
|
+
- spec/spec_helper.rb
|
98
|
+
has_rdoc: true
|
99
|
+
homepage: http://github.com/davidrichards/data_frame
|
100
|
+
licenses: []
|
101
|
+
|
102
|
+
post_install_message:
|
103
|
+
rdoc_options:
|
104
|
+
- --inline-source
|
105
|
+
- --charset=UTF-8
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: "0"
|
113
|
+
version:
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: "0"
|
119
|
+
version:
|
120
|
+
requirements: []
|
121
|
+
|
122
|
+
rubyforge_project:
|
123
|
+
rubygems_version: 1.3.5
|
124
|
+
signing_key:
|
125
|
+
specification_version: 3
|
126
|
+
summary: Data Frames with memoized transpose
|
127
|
+
test_files: []
|
128
|
+
|