data_frame 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/README.rdoc +122 -0
  2. data/VERSION.yml +4 -0
  3. data/bin/plain_frame +22 -0
  4. data/lib/data_frame.rb +26 -0
  5. data/lib/data_frame/arff.rb +52 -0
  6. data/lib/data_frame/callback_array.rb +152 -0
  7. data/lib/data_frame/core/column_management.rb +147 -0
  8. data/lib/data_frame/core/filter.rb +48 -0
  9. data/lib/data_frame/core/import.rb +113 -0
  10. data/lib/data_frame/core/pre_process.rb +69 -0
  11. data/lib/data_frame/core/saving.rb +29 -0
  12. data/lib/data_frame/core/training.rb +46 -0
  13. data/lib/data_frame/data_frame.rb +115 -0
  14. data/lib/data_frame/id3.rb +28 -0
  15. data/lib/data_frame/kmeans.rb +10 -0
  16. data/lib/data_frame/labels_from_uci.rb +48 -0
  17. data/lib/data_frame/mlp.rb +18 -0
  18. data/lib/data_frame/model.rb +22 -0
  19. data/lib/data_frame/parameter_capture.rb +50 -0
  20. data/lib/data_frame/sbn.rb +18 -0
  21. data/lib/data_frame/transposable_array.rb +23 -0
  22. data/lib/ext/array.rb +11 -0
  23. data/lib/ext/open_struct.rb +5 -0
  24. data/lib/ext/string.rb +5 -0
  25. data/lib/ext/symbol.rb +5 -0
  26. data/spec/data_frame/arff_spec.rb +48 -0
  27. data/spec/data_frame/callback_array_spec.rb +148 -0
  28. data/spec/data_frame/core/column_management_spec.rb +128 -0
  29. data/spec/data_frame/core/filter_spec.rb +88 -0
  30. data/spec/data_frame/core/import_spec.rb +41 -0
  31. data/spec/data_frame/core/pre_process_spec.rb +103 -0
  32. data/spec/data_frame/core/saving_spec.rb +61 -0
  33. data/spec/data_frame/core/training_spec.rb +72 -0
  34. data/spec/data_frame/data_frame_spec.rb +141 -0
  35. data/spec/data_frame/id3_spec.rb +22 -0
  36. data/spec/data_frame/model_spec.rb +36 -0
  37. data/spec/data_frame/parameter_capture_spec.rb +32 -0
  38. data/spec/data_frame/transposable_array_spec.rb +138 -0
  39. data/spec/data_frame_spec.rb +29 -0
  40. data/spec/ext/array_spec.rb +13 -0
  41. data/spec/fixtures/basic.csv +3 -0
  42. data/spec/fixtures/discrete_testing.csv +4 -0
  43. data/spec/fixtures/discrete_training.csv +21 -0
  44. data/spec/spec_helper.rb +8 -0
  45. metadata +128 -0
@@ -0,0 +1,22 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+ require 'data_frame/id3'
3
+
4
+ describe "DecisionTree" do
5
+
6
+ before do
7
+ @filename = File.expand_path(File.join(File.dirname(__FILE__), "../fixtures/discrete_training.csv"))
8
+ @df = DataFrame.from_csv(@filename)
9
+ @test_data = File.read(@filename)
10
+ end
11
+
12
+ it "should require the decisiontree gem" do
13
+ defined?(DecisionTree::ID3Tree).should eql('constant')
14
+ end
15
+
16
+ it "should be able to create a decision tree from a data frame" do
17
+ # Come back to this.
18
+ # @df.create_id3(:purchase)
19
+ # @df.id3.train
20
+ # @df.id3.predict(["36 - 55", "masters", "high", "single", 1]).should eql(1)
21
+ end
22
+ end
@@ -0,0 +1,36 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe DataFrame, "model" do
4
+ before do
5
+ @csv = %{a,b,c
6
+ 1,2,3
7
+ 2,2,2
8
+ 4,5,6}
9
+ @df = DataFrame.from_csv(@csv)
10
+ end
11
+
12
+ it "should be able to define a model with a block" do
13
+ @df.model(:b2) do |m|
14
+ m.b 2
15
+ end
16
+
17
+ @df.models.table.keys.should eql([:b2])
18
+ @df.models.b2.size.should eql(2)
19
+ @df.models.b2.b.should eql([2,2])
20
+ end
21
+
22
+ it "should be able to define a model with a range of values" do
23
+ @df.model(:a12) do |m|
24
+ m.a(1..2)
25
+ end
26
+ @df.models.a12.a.should eql([1,2])
27
+ end
28
+
29
+ it "should be able to define a model with a set of values" do
30
+ @df.model(:a14) do |m|
31
+ m.a [1,4]
32
+ end
33
+ @df.models.a14.a.should eql([1,4])
34
+ end
35
+
36
+ end
@@ -0,0 +1,32 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe ParameterCapture do
4
+
5
+ it "should take a block for a column list" do
6
+ pc = ParameterCapture.new do |p|
7
+ p.a 1
8
+ p.b 2
9
+ end
10
+ pc.parameters.table.should == {:a => 1, :b => 2}
11
+ end
12
+
13
+ it "should be able to capture an array as a parameter, meaning an or-condition" do
14
+ pc = ParameterCapture.new do |p|
15
+ p.or_condition [1,2]
16
+ end
17
+ pc.parameters.or_condition.should eql([1,2])
18
+ end
19
+
20
+ it "should be able to capture a range as a parameter, meaning a continuous-or-condition" do
21
+ pc = ParameterCapture.new do |p|
22
+ p.a(1..2)
23
+ end
24
+ pc.parameters.a.should eql((1..2))
25
+ end
26
+
27
+ it "should play setter/getter schizophrenia" do
28
+ pc = ParameterCapture.new {}
29
+ pc.show 1
30
+ pc.show.should eql(1)
31
+ end
32
+ end
@@ -0,0 +1,138 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe TransposableArray do
4
+ before do
5
+ @ta = TransposableArray.new [[1,2,3],[4,5,6],[7,8,9]]
6
+ @t = [[1,4,7],[2,5,8],[3,6,9]]
7
+ end
8
+
9
+ it "should be able to transpose itself" do
10
+ @ta.transpose.should eql(@t)
11
+ end
12
+
13
+ it "should cache the transpose" do
14
+ @ta.cache.should be_nil
15
+ @ta.transpose
16
+ @ta.cache.should eql(@t)
17
+ end
18
+
19
+ it "should clear the cache on taint" do
20
+ @count = nil
21
+ @ta.transpose
22
+ @ta.taint
23
+ @ta.cache.should be_nil
24
+ end
25
+
26
+ it "should clear the cache on []=" do
27
+ @ta.transpose
28
+ @ta[0] = 1
29
+ @ta.cache.should be_nil
30
+ end
31
+
32
+ it "should clear the cache on <<" do
33
+ @ta.transpose
34
+ @ta << 1
35
+ @ta.cache.should be_nil
36
+ end
37
+
38
+ it "should clear the cache on delete" do
39
+ @ta.transpose
40
+ @ta.delete(0)
41
+ @ta.cache.should be_nil
42
+ end
43
+
44
+ it "should clear the cache on push" do
45
+ @ta.transpose
46
+ @ta.push(1)
47
+ @ta.cache.should be_nil
48
+ end
49
+
50
+ it "should clear the cache on pop" do
51
+ @ta.transpose
52
+ @ta.pop
53
+ @ta.cache.should be_nil
54
+ end
55
+
56
+ it "should clear the cache on shift" do
57
+ @ta.transpose
58
+ @ta.shift
59
+ @ta.cache.should be_nil
60
+ end
61
+
62
+ it "should clear the cache on unshift" do
63
+ @ta.transpose
64
+ @ta.unshift(1)
65
+ @ta.cache.should be_nil
66
+ end
67
+
68
+ it "should clear the cache on map!" do
69
+ @ta.transpose
70
+ @ta.map!{ |e| e }
71
+ @ta.cache.should be_nil
72
+ end
73
+
74
+ it "should clear the cache on sort!" do
75
+ @ta.transpose
76
+ @ta.sort!
77
+ @ta.cache.should be_nil
78
+ end
79
+
80
+ it "should clear the cache on reverse!" do
81
+ @ta.transpose
82
+ @ta.reverse!
83
+ @ta.cache.should be_nil
84
+ end
85
+
86
+ it "should clear the cache on collect!" do
87
+ @ta.transpose
88
+ @ta.collect! {|e| e}
89
+ @ta.cache.should be_nil
90
+ end
91
+
92
+ it "should clear the cache on compact!" do
93
+ @ta.transpose
94
+ @ta.compact!
95
+ @ta.cache.should be_nil
96
+ end
97
+
98
+ it "should clear the cache on reject!" do
99
+ @ta.transpose
100
+ @ta.reject! {|e| e}
101
+ @ta.cache.should be_nil
102
+ end
103
+
104
+ it "should clear the cache on slice!" do
105
+ @ta.transpose
106
+ @ta.slice!(1,2)
107
+ @ta.cache.should be_nil
108
+ end
109
+
110
+ it "should clear the cache on flatten!" do
111
+ @ta.transpose
112
+ @ta.flatten!
113
+ @ta.cache.should be_nil
114
+ end
115
+
116
+ it "should clear the cache on uniq!" do
117
+ @ta.transpose
118
+ @ta.uniq!
119
+ @ta.cache.should be_nil
120
+ end
121
+
122
+ it "should clear the cache on clear" do
123
+ @ta.transpose
124
+ @ta.clear
125
+ @ta.cache.should be_nil
126
+ end
127
+
128
+ it "should not adjust the array in other methods" do
129
+ @ta.transpose
130
+ @ta.at(0)
131
+ @ta.sort
132
+ @ta.uniq
133
+ @ta.find{|e| e}
134
+ @ta.cache.should eql(@t)
135
+ end
136
+ end
137
+
138
+
@@ -0,0 +1,29 @@
1
+ require File.join(File.dirname(__FILE__), "/spec_helper")
2
+
3
+ describe DataFrame, "gem dependencies" do
4
+
5
+ it "should use RubyGems" do
6
+ defined?(Gem).should eql('constant')
7
+ end
8
+
9
+ it "should use ActiveSupport" do
10
+ defined?(ActiveSupport).should eql('constant')
11
+ end
12
+
13
+ it "should use JustEnumerableStats" do
14
+ [1]._jes_average.should eql(1)
15
+ end
16
+
17
+ it "should use OpenURI" do
18
+ defined?(OpenURI).should eql('constant')
19
+ end
20
+
21
+ it "should use FasterCSV" do
22
+ defined?(FasterCSV).should eql('constant')
23
+ end
24
+
25
+ it "should use OpenStruct" do
26
+ defined?(OpenStruct).should eql('constant')
27
+ end
28
+
29
+ end
@@ -0,0 +1,13 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe Array do
4
+ it "should be able to determine its dimensions" do
5
+ [1,2,3].dimensions.should eql(1)
6
+ [[1,2,3], [1,2,3]].dimensions.should eql(2)
7
+ [[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3], [[1,2,3], [1,2,3]]]].dimensions.should eql(3)
8
+ end
9
+
10
+ it "should depend on the first element to determine dimensions" do
11
+ [1, [1,2]].dimensions.should eql(1)
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
2
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
3
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
@@ -0,0 +1,4 @@
1
+ 36 - 55,masters,high,single,will buy
2
+ 18 - 35,high school,low,single,won't buy
3
+ 18 - 35,masters,high,single,won't buy
4
+ 36 - 55,high school,low,single,will buy
@@ -0,0 +1,21 @@
1
+ Age,Education,Income,Marital Status,Purchase
2
+ 36 - 55,masters,high,single,will buy
3
+ 18 - 35,high school,low,single,won't buy
4
+ 36 - 55,masters,low,single,will buy
5
+ 18 - 35,bachelors,high,single,won't buy
6
+ < 18,high school,low,single,will buy
7
+ 18 - 35,bachelors,high,married,won't buy
8
+ 36 - 55,bachelors,low,married,won't buy
9
+ > 55,bachelors,high,single,will buy
10
+ 36 - 55,masters,low,married,won't buy
11
+ > 55,masters,low,married,will buy
12
+ 36 - 55,masters,high,single,will buy
13
+ > 55,masters,high,single,will buy
14
+ < 18,high school,high,single,won't buy
15
+ 36 - 55,masters,low,single,will buy
16
+ 36 - 55,high school,low,single,will buy
17
+ < 18,high school,low,married,will buy
18
+ 18 - 35,bachelors,high,married,won't buy
19
+ > 55,high school,high,married,will buy
20
+ > 55,bachelors,low,single,will buy
21
+ 36 - 55,high school,high,married,won't buy
@@ -0,0 +1,8 @@
1
+ $: << File.join(File.dirname(__FILE__), "/../lib")
2
+ require 'rubygems'
3
+ require 'spec'
4
+ require 'data_frame'
5
+
6
+ Spec::Runner.configure do |config|
7
+
8
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_frame
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.8
5
+ platform: ruby
6
+ authors:
7
+ - David Richards
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-15 00:00:00 -06:00
13
+ default_executable: plain_frame
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activesupport
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: davidrichards-just_enumerable_stats
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: fastercsv
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: Data Frames with memoized transpose
46
+ email: davidlamontrichards@gmail.com
47
+ executables:
48
+ - plain_frame
49
+ extensions: []
50
+
51
+ extra_rdoc_files: []
52
+
53
+ files:
54
+ - README.rdoc
55
+ - VERSION.yml
56
+ - bin/plain_frame
57
+ - lib/data_frame/arff.rb
58
+ - lib/data_frame/callback_array.rb
59
+ - lib/data_frame/core/column_management.rb
60
+ - lib/data_frame/core/filter.rb
61
+ - lib/data_frame/core/import.rb
62
+ - lib/data_frame/core/pre_process.rb
63
+ - lib/data_frame/core/saving.rb
64
+ - lib/data_frame/core/training.rb
65
+ - lib/data_frame/data_frame.rb
66
+ - lib/data_frame/id3.rb
67
+ - lib/data_frame/kmeans.rb
68
+ - lib/data_frame/labels_from_uci.rb
69
+ - lib/data_frame/mlp.rb
70
+ - lib/data_frame/model.rb
71
+ - lib/data_frame/parameter_capture.rb
72
+ - lib/data_frame/sbn.rb
73
+ - lib/data_frame/transposable_array.rb
74
+ - lib/data_frame.rb
75
+ - lib/ext/array.rb
76
+ - lib/ext/open_struct.rb
77
+ - lib/ext/string.rb
78
+ - lib/ext/symbol.rb
79
+ - spec/data_frame/arff_spec.rb
80
+ - spec/data_frame/callback_array_spec.rb
81
+ - spec/data_frame/core/column_management_spec.rb
82
+ - spec/data_frame/core/filter_spec.rb
83
+ - spec/data_frame/core/import_spec.rb
84
+ - spec/data_frame/core/pre_process_spec.rb
85
+ - spec/data_frame/core/saving_spec.rb
86
+ - spec/data_frame/core/training_spec.rb
87
+ - spec/data_frame/data_frame_spec.rb
88
+ - spec/data_frame/id3_spec.rb
89
+ - spec/data_frame/model_spec.rb
90
+ - spec/data_frame/parameter_capture_spec.rb
91
+ - spec/data_frame/transposable_array_spec.rb
92
+ - spec/data_frame_spec.rb
93
+ - spec/ext/array_spec.rb
94
+ - spec/fixtures/basic.csv
95
+ - spec/fixtures/discrete_testing.csv
96
+ - spec/fixtures/discrete_training.csv
97
+ - spec/spec_helper.rb
98
+ has_rdoc: true
99
+ homepage: http://github.com/davidrichards/data_frame
100
+ licenses: []
101
+
102
+ post_install_message:
103
+ rdoc_options:
104
+ - --inline-source
105
+ - --charset=UTF-8
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: "0"
113
+ version:
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: "0"
119
+ version:
120
+ requirements: []
121
+
122
+ rubyforge_project:
123
+ rubygems_version: 1.3.5
124
+ signing_key:
125
+ specification_version: 3
126
+ summary: Data Frames with memoized transpose
127
+ test_files: []
128
+