data_frame 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +122 -0
- data/VERSION.yml +4 -0
- data/bin/plain_frame +22 -0
- data/lib/data_frame.rb +26 -0
- data/lib/data_frame/arff.rb +52 -0
- data/lib/data_frame/callback_array.rb +152 -0
- data/lib/data_frame/core/column_management.rb +147 -0
- data/lib/data_frame/core/filter.rb +48 -0
- data/lib/data_frame/core/import.rb +113 -0
- data/lib/data_frame/core/pre_process.rb +69 -0
- data/lib/data_frame/core/saving.rb +29 -0
- data/lib/data_frame/core/training.rb +46 -0
- data/lib/data_frame/data_frame.rb +115 -0
- data/lib/data_frame/id3.rb +28 -0
- data/lib/data_frame/kmeans.rb +10 -0
- data/lib/data_frame/labels_from_uci.rb +48 -0
- data/lib/data_frame/mlp.rb +18 -0
- data/lib/data_frame/model.rb +22 -0
- data/lib/data_frame/parameter_capture.rb +50 -0
- data/lib/data_frame/sbn.rb +18 -0
- data/lib/data_frame/transposable_array.rb +23 -0
- data/lib/ext/array.rb +11 -0
- data/lib/ext/open_struct.rb +5 -0
- data/lib/ext/string.rb +5 -0
- data/lib/ext/symbol.rb +5 -0
- data/spec/data_frame/arff_spec.rb +48 -0
- data/spec/data_frame/callback_array_spec.rb +148 -0
- data/spec/data_frame/core/column_management_spec.rb +128 -0
- data/spec/data_frame/core/filter_spec.rb +88 -0
- data/spec/data_frame/core/import_spec.rb +41 -0
- data/spec/data_frame/core/pre_process_spec.rb +103 -0
- data/spec/data_frame/core/saving_spec.rb +61 -0
- data/spec/data_frame/core/training_spec.rb +72 -0
- data/spec/data_frame/data_frame_spec.rb +141 -0
- data/spec/data_frame/id3_spec.rb +22 -0
- data/spec/data_frame/model_spec.rb +36 -0
- data/spec/data_frame/parameter_capture_spec.rb +32 -0
- data/spec/data_frame/transposable_array_spec.rb +138 -0
- data/spec/data_frame_spec.rb +29 -0
- data/spec/ext/array_spec.rb +13 -0
- data/spec/fixtures/basic.csv +3 -0
- data/spec/fixtures/discrete_testing.csv +4 -0
- data/spec/fixtures/discrete_training.csv +21 -0
- data/spec/spec_helper.rb +8 -0
- metadata +128 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
require 'data_frame/id3'
|
3
|
+
|
4
|
+
describe "DecisionTree" do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@filename = File.expand_path(File.join(File.dirname(__FILE__), "../fixtures/discrete_training.csv"))
|
8
|
+
@df = DataFrame.from_csv(@filename)
|
9
|
+
@test_data = File.read(@filename)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should require the decisiontree gem" do
|
13
|
+
defined?(DecisionTree::ID3Tree).should eql('constant')
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to create a decision tree from a data frame" do
|
17
|
+
# Come back to this.
|
18
|
+
# @df.create_id3(:purchase)
|
19
|
+
# @df.id3.train
|
20
|
+
# @df.id3.predict(["36 - 55", "masters", "high", "single", 1]).should eql(1)
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe DataFrame, "model" do
|
4
|
+
before do
|
5
|
+
@csv = %{a,b,c
|
6
|
+
1,2,3
|
7
|
+
2,2,2
|
8
|
+
4,5,6}
|
9
|
+
@df = DataFrame.from_csv(@csv)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should be able to define a model with a block" do
|
13
|
+
@df.model(:b2) do |m|
|
14
|
+
m.b 2
|
15
|
+
end
|
16
|
+
|
17
|
+
@df.models.table.keys.should eql([:b2])
|
18
|
+
@df.models.b2.size.should eql(2)
|
19
|
+
@df.models.b2.b.should eql([2,2])
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should be able to define a model with a range of values" do
|
23
|
+
@df.model(:a12) do |m|
|
24
|
+
m.a(1..2)
|
25
|
+
end
|
26
|
+
@df.models.a12.a.should eql([1,2])
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should be able to define a model with a set of values" do
|
30
|
+
@df.model(:a14) do |m|
|
31
|
+
m.a [1,4]
|
32
|
+
end
|
33
|
+
@df.models.a14.a.should eql([1,4])
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe ParameterCapture do
|
4
|
+
|
5
|
+
it "should take a block for a column list" do
|
6
|
+
pc = ParameterCapture.new do |p|
|
7
|
+
p.a 1
|
8
|
+
p.b 2
|
9
|
+
end
|
10
|
+
pc.parameters.table.should == {:a => 1, :b => 2}
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should be able to capture an array as a parameter, meaning an or-condition" do
|
14
|
+
pc = ParameterCapture.new do |p|
|
15
|
+
p.or_condition [1,2]
|
16
|
+
end
|
17
|
+
pc.parameters.or_condition.should eql([1,2])
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should be able to capture a range as a parameter, meaning a continuous-or-condition" do
|
21
|
+
pc = ParameterCapture.new do |p|
|
22
|
+
p.a(1..2)
|
23
|
+
end
|
24
|
+
pc.parameters.a.should eql((1..2))
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should play setter/getter schizophrenia" do
|
28
|
+
pc = ParameterCapture.new {}
|
29
|
+
pc.show 1
|
30
|
+
pc.show.should eql(1)
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe TransposableArray do
|
4
|
+
before do
|
5
|
+
@ta = TransposableArray.new [[1,2,3],[4,5,6],[7,8,9]]
|
6
|
+
@t = [[1,4,7],[2,5,8],[3,6,9]]
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should be able to transpose itself" do
|
10
|
+
@ta.transpose.should eql(@t)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should cache the transpose" do
|
14
|
+
@ta.cache.should be_nil
|
15
|
+
@ta.transpose
|
16
|
+
@ta.cache.should eql(@t)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should clear the cache on taint" do
|
20
|
+
@count = nil
|
21
|
+
@ta.transpose
|
22
|
+
@ta.taint
|
23
|
+
@ta.cache.should be_nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should clear the cache on []=" do
|
27
|
+
@ta.transpose
|
28
|
+
@ta[0] = 1
|
29
|
+
@ta.cache.should be_nil
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should clear the cache on <<" do
|
33
|
+
@ta.transpose
|
34
|
+
@ta << 1
|
35
|
+
@ta.cache.should be_nil
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should clear the cache on delete" do
|
39
|
+
@ta.transpose
|
40
|
+
@ta.delete(0)
|
41
|
+
@ta.cache.should be_nil
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should clear the cache on push" do
|
45
|
+
@ta.transpose
|
46
|
+
@ta.push(1)
|
47
|
+
@ta.cache.should be_nil
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should clear the cache on pop" do
|
51
|
+
@ta.transpose
|
52
|
+
@ta.pop
|
53
|
+
@ta.cache.should be_nil
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should clear the cache on shift" do
|
57
|
+
@ta.transpose
|
58
|
+
@ta.shift
|
59
|
+
@ta.cache.should be_nil
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should clear the cache on unshift" do
|
63
|
+
@ta.transpose
|
64
|
+
@ta.unshift(1)
|
65
|
+
@ta.cache.should be_nil
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should clear the cache on map!" do
|
69
|
+
@ta.transpose
|
70
|
+
@ta.map!{ |e| e }
|
71
|
+
@ta.cache.should be_nil
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should clear the cache on sort!" do
|
75
|
+
@ta.transpose
|
76
|
+
@ta.sort!
|
77
|
+
@ta.cache.should be_nil
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should clear the cache on reverse!" do
|
81
|
+
@ta.transpose
|
82
|
+
@ta.reverse!
|
83
|
+
@ta.cache.should be_nil
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should clear the cache on collect!" do
|
87
|
+
@ta.transpose
|
88
|
+
@ta.collect! {|e| e}
|
89
|
+
@ta.cache.should be_nil
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should clear the cache on compact!" do
|
93
|
+
@ta.transpose
|
94
|
+
@ta.compact!
|
95
|
+
@ta.cache.should be_nil
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should clear the cache on reject!" do
|
99
|
+
@ta.transpose
|
100
|
+
@ta.reject! {|e| e}
|
101
|
+
@ta.cache.should be_nil
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should clear the cache on slice!" do
|
105
|
+
@ta.transpose
|
106
|
+
@ta.slice!(1,2)
|
107
|
+
@ta.cache.should be_nil
|
108
|
+
end
|
109
|
+
|
110
|
+
it "should clear the cache on flatten!" do
|
111
|
+
@ta.transpose
|
112
|
+
@ta.flatten!
|
113
|
+
@ta.cache.should be_nil
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should clear the cache on uniq!" do
|
117
|
+
@ta.transpose
|
118
|
+
@ta.uniq!
|
119
|
+
@ta.cache.should be_nil
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should clear the cache on clear" do
|
123
|
+
@ta.transpose
|
124
|
+
@ta.clear
|
125
|
+
@ta.cache.should be_nil
|
126
|
+
end
|
127
|
+
|
128
|
+
it "should not adjust the array in other methods" do
|
129
|
+
@ta.transpose
|
130
|
+
@ta.at(0)
|
131
|
+
@ta.sort
|
132
|
+
@ta.uniq
|
133
|
+
@ta.find{|e| e}
|
134
|
+
@ta.cache.should eql(@t)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/spec_helper")
|
2
|
+
|
3
|
+
describe DataFrame, "gem dependencies" do
|
4
|
+
|
5
|
+
it "should use RubyGems" do
|
6
|
+
defined?(Gem).should eql('constant')
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should use ActiveSupport" do
|
10
|
+
defined?(ActiveSupport).should eql('constant')
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should use JustEnumerableStats" do
|
14
|
+
[1]._jes_average.should eql(1)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should use OpenURI" do
|
18
|
+
defined?(OpenURI).should eql('constant')
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should use FasterCSV" do
|
22
|
+
defined?(FasterCSV).should eql('constant')
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should use OpenStruct" do
|
26
|
+
defined?(OpenStruct).should eql('constant')
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "/../spec_helper")
|
2
|
+
|
3
|
+
describe Array do
|
4
|
+
it "should be able to determine its dimensions" do
|
5
|
+
[1,2,3].dimensions.should eql(1)
|
6
|
+
[[1,2,3], [1,2,3]].dimensions.should eql(2)
|
7
|
+
[[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3], [[1,2,3], [1,2,3]]]].dimensions.should eql(3)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should depend on the first element to determine dimensions" do
|
11
|
+
[1, [1,2]].dimensions.should eql(1)
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Age,Education,Income,Marital Status,Purchase
|
2
|
+
36 - 55,masters,high,single,will buy
|
3
|
+
18 - 35,high school,low,single,won't buy
|
4
|
+
36 - 55,masters,low,single,will buy
|
5
|
+
18 - 35,bachelors,high,single,won't buy
|
6
|
+
< 18,high school,low,single,will buy
|
7
|
+
18 - 35,bachelors,high,married,won't buy
|
8
|
+
36 - 55,bachelors,low,married,won't buy
|
9
|
+
> 55,bachelors,high,single,will buy
|
10
|
+
36 - 55,masters,low,married,won't buy
|
11
|
+
> 55,masters,low,married,will buy
|
12
|
+
36 - 55,masters,high,single,will buy
|
13
|
+
> 55,masters,high,single,will buy
|
14
|
+
< 18,high school,high,single,won't buy
|
15
|
+
36 - 55,masters,low,single,will buy
|
16
|
+
36 - 55,high school,low,single,will buy
|
17
|
+
< 18,high school,low,married,will buy
|
18
|
+
18 - 35,bachelors,high,married,won't buy
|
19
|
+
> 55,high school,high,married,will buy
|
20
|
+
> 55,bachelors,low,single,will buy
|
21
|
+
36 - 55,high school,high,married,won't buy
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: data_frame
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.8
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Richards
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-15 00:00:00 -06:00
|
13
|
+
default_executable: plain_frame
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: activesupport
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: davidrichards-just_enumerable_stats
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: fastercsv
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
description: Data Frames with memoized transpose
|
46
|
+
email: davidlamontrichards@gmail.com
|
47
|
+
executables:
|
48
|
+
- plain_frame
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- README.rdoc
|
55
|
+
- VERSION.yml
|
56
|
+
- bin/plain_frame
|
57
|
+
- lib/data_frame/arff.rb
|
58
|
+
- lib/data_frame/callback_array.rb
|
59
|
+
- lib/data_frame/core/column_management.rb
|
60
|
+
- lib/data_frame/core/filter.rb
|
61
|
+
- lib/data_frame/core/import.rb
|
62
|
+
- lib/data_frame/core/pre_process.rb
|
63
|
+
- lib/data_frame/core/saving.rb
|
64
|
+
- lib/data_frame/core/training.rb
|
65
|
+
- lib/data_frame/data_frame.rb
|
66
|
+
- lib/data_frame/id3.rb
|
67
|
+
- lib/data_frame/kmeans.rb
|
68
|
+
- lib/data_frame/labels_from_uci.rb
|
69
|
+
- lib/data_frame/mlp.rb
|
70
|
+
- lib/data_frame/model.rb
|
71
|
+
- lib/data_frame/parameter_capture.rb
|
72
|
+
- lib/data_frame/sbn.rb
|
73
|
+
- lib/data_frame/transposable_array.rb
|
74
|
+
- lib/data_frame.rb
|
75
|
+
- lib/ext/array.rb
|
76
|
+
- lib/ext/open_struct.rb
|
77
|
+
- lib/ext/string.rb
|
78
|
+
- lib/ext/symbol.rb
|
79
|
+
- spec/data_frame/arff_spec.rb
|
80
|
+
- spec/data_frame/callback_array_spec.rb
|
81
|
+
- spec/data_frame/core/column_management_spec.rb
|
82
|
+
- spec/data_frame/core/filter_spec.rb
|
83
|
+
- spec/data_frame/core/import_spec.rb
|
84
|
+
- spec/data_frame/core/pre_process_spec.rb
|
85
|
+
- spec/data_frame/core/saving_spec.rb
|
86
|
+
- spec/data_frame/core/training_spec.rb
|
87
|
+
- spec/data_frame/data_frame_spec.rb
|
88
|
+
- spec/data_frame/id3_spec.rb
|
89
|
+
- spec/data_frame/model_spec.rb
|
90
|
+
- spec/data_frame/parameter_capture_spec.rb
|
91
|
+
- spec/data_frame/transposable_array_spec.rb
|
92
|
+
- spec/data_frame_spec.rb
|
93
|
+
- spec/ext/array_spec.rb
|
94
|
+
- spec/fixtures/basic.csv
|
95
|
+
- spec/fixtures/discrete_testing.csv
|
96
|
+
- spec/fixtures/discrete_training.csv
|
97
|
+
- spec/spec_helper.rb
|
98
|
+
has_rdoc: true
|
99
|
+
homepage: http://github.com/davidrichards/data_frame
|
100
|
+
licenses: []
|
101
|
+
|
102
|
+
post_install_message:
|
103
|
+
rdoc_options:
|
104
|
+
- --inline-source
|
105
|
+
- --charset=UTF-8
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: "0"
|
113
|
+
version:
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: "0"
|
119
|
+
version:
|
120
|
+
requirements: []
|
121
|
+
|
122
|
+
rubyforge_project:
|
123
|
+
rubygems_version: 1.3.5
|
124
|
+
signing_key:
|
125
|
+
specification_version: 3
|
126
|
+
summary: Data Frames with memoized transpose
|
127
|
+
test_files: []
|
128
|
+
|