data_frame 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/README.rdoc +122 -0
  2. data/VERSION.yml +4 -0
  3. data/bin/plain_frame +22 -0
  4. data/lib/data_frame.rb +26 -0
  5. data/lib/data_frame/arff.rb +52 -0
  6. data/lib/data_frame/callback_array.rb +152 -0
  7. data/lib/data_frame/core/column_management.rb +147 -0
  8. data/lib/data_frame/core/filter.rb +48 -0
  9. data/lib/data_frame/core/import.rb +113 -0
  10. data/lib/data_frame/core/pre_process.rb +69 -0
  11. data/lib/data_frame/core/saving.rb +29 -0
  12. data/lib/data_frame/core/training.rb +46 -0
  13. data/lib/data_frame/data_frame.rb +115 -0
  14. data/lib/data_frame/id3.rb +28 -0
  15. data/lib/data_frame/kmeans.rb +10 -0
  16. data/lib/data_frame/labels_from_uci.rb +48 -0
  17. data/lib/data_frame/mlp.rb +18 -0
  18. data/lib/data_frame/model.rb +22 -0
  19. data/lib/data_frame/parameter_capture.rb +50 -0
  20. data/lib/data_frame/sbn.rb +18 -0
  21. data/lib/data_frame/transposable_array.rb +23 -0
  22. data/lib/ext/array.rb +11 -0
  23. data/lib/ext/open_struct.rb +5 -0
  24. data/lib/ext/string.rb +5 -0
  25. data/lib/ext/symbol.rb +5 -0
  26. data/spec/data_frame/arff_spec.rb +48 -0
  27. data/spec/data_frame/callback_array_spec.rb +148 -0
  28. data/spec/data_frame/core/column_management_spec.rb +128 -0
  29. data/spec/data_frame/core/filter_spec.rb +88 -0
  30. data/spec/data_frame/core/import_spec.rb +41 -0
  31. data/spec/data_frame/core/pre_process_spec.rb +103 -0
  32. data/spec/data_frame/core/saving_spec.rb +61 -0
  33. data/spec/data_frame/core/training_spec.rb +72 -0
  34. data/spec/data_frame/data_frame_spec.rb +141 -0
  35. data/spec/data_frame/id3_spec.rb +22 -0
  36. data/spec/data_frame/model_spec.rb +36 -0
  37. data/spec/data_frame/parameter_capture_spec.rb +32 -0
  38. data/spec/data_frame/transposable_array_spec.rb +138 -0
  39. data/spec/data_frame_spec.rb +29 -0
  40. data/spec/ext/array_spec.rb +13 -0
  41. data/spec/fixtures/basic.csv +3 -0
  42. data/spec/fixtures/discrete_testing.csv +4 -0
  43. data/spec/fixtures/discrete_training.csv +21 -0
  44. data/spec/spec_helper.rb +8 -0
  45. metadata +128 -0
@@ -0,0 +1,22 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+ require 'data_frame/id3'
3
+
4
+ describe "DecisionTree" do
5
+
6
+ before do
7
+ @filename = File.expand_path(File.join(File.dirname(__FILE__), "../fixtures/discrete_training.csv"))
8
+ @df = DataFrame.from_csv(@filename)
9
+ @test_data = File.read(@filename)
10
+ end
11
+
12
+ it "should require the decisiontree gem" do
13
+ defined?(DecisionTree::ID3Tree).should eql('constant')
14
+ end
15
+
16
+ it "should be able to create a decision tree from a data frame" do
17
+ # Come back to this.
18
+ # @df.create_id3(:purchase)
19
+ # @df.id3.train
20
+ # @df.id3.predict(["36 - 55", "masters", "high", "single", 1]).should eql(1)
21
+ end
22
+ end
@@ -0,0 +1,36 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe DataFrame, "model" do
4
+ before do
5
+ @csv = %{a,b,c
6
+ 1,2,3
7
+ 2,2,2
8
+ 4,5,6}
9
+ @df = DataFrame.from_csv(@csv)
10
+ end
11
+
12
+ it "should be able to define a model with a block" do
13
+ @df.model(:b2) do |m|
14
+ m.b 2
15
+ end
16
+
17
+ @df.models.table.keys.should eql([:b2])
18
+ @df.models.b2.size.should eql(2)
19
+ @df.models.b2.b.should eql([2,2])
20
+ end
21
+
22
+ it "should be able to define a model with a range of values" do
23
+ @df.model(:a12) do |m|
24
+ m.a(1..2)
25
+ end
26
+ @df.models.a12.a.should eql([1,2])
27
+ end
28
+
29
+ it "should be able to define a model with a set of values" do
30
+ @df.model(:a14) do |m|
31
+ m.a [1,4]
32
+ end
33
+ @df.models.a14.a.should eql([1,4])
34
+ end
35
+
36
+ end
@@ -0,0 +1,32 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe ParameterCapture do
4
+
5
+ it "should take a block for a column list" do
6
+ pc = ParameterCapture.new do |p|
7
+ p.a 1
8
+ p.b 2
9
+ end
10
+ pc.parameters.table.should == {:a => 1, :b => 2}
11
+ end
12
+
13
+ it "should be able to capture an array as a parameter, meaning an or-condition" do
14
+ pc = ParameterCapture.new do |p|
15
+ p.or_condition [1,2]
16
+ end
17
+ pc.parameters.or_condition.should eql([1,2])
18
+ end
19
+
20
+ it "should be able to capture a range as a parameter, meaning a continuous-or-condition" do
21
+ pc = ParameterCapture.new do |p|
22
+ p.a(1..2)
23
+ end
24
+ pc.parameters.a.should eql((1..2))
25
+ end
26
+
27
+ it "should play setter/getter schizophrenia" do
28
+ pc = ParameterCapture.new {}
29
+ pc.show 1
30
+ pc.show.should eql(1)
31
+ end
32
+ end
@@ -0,0 +1,138 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe TransposableArray do
4
+ before do
5
+ @ta = TransposableArray.new [[1,2,3],[4,5,6],[7,8,9]]
6
+ @t = [[1,4,7],[2,5,8],[3,6,9]]
7
+ end
8
+
9
+ it "should be able to transpose itself" do
10
+ @ta.transpose.should eql(@t)
11
+ end
12
+
13
+ it "should cache the transpose" do
14
+ @ta.cache.should be_nil
15
+ @ta.transpose
16
+ @ta.cache.should eql(@t)
17
+ end
18
+
19
+ it "should clear the cache on taint" do
20
+ @count = nil
21
+ @ta.transpose
22
+ @ta.taint
23
+ @ta.cache.should be_nil
24
+ end
25
+
26
+ it "should clear the cache on []=" do
27
+ @ta.transpose
28
+ @ta[0] = 1
29
+ @ta.cache.should be_nil
30
+ end
31
+
32
+ it "should clear the cache on <<" do
33
+ @ta.transpose
34
+ @ta << 1
35
+ @ta.cache.should be_nil
36
+ end
37
+
38
+ it "should clear the cache on delete" do
39
+ @ta.transpose
40
+ @ta.delete(0)
41
+ @ta.cache.should be_nil
42
+ end
43
+
44
+ it "should clear the cache on push" do
45
+ @ta.transpose
46
+ @ta.push(1)
47
+ @ta.cache.should be_nil
48
+ end
49
+
50
+ it "should clear the cache on pop" do
51
+ @ta.transpose
52
+ @ta.pop
53
+ @ta.cache.should be_nil
54
+ end
55
+
56
+ it "should clear the cache on shift" do
57
+ @ta.transpose
58
+ @ta.shift
59
+ @ta.cache.should be_nil
60
+ end
61
+
62
+ it "should clear the cache on unshift" do
63
+ @ta.transpose
64
+ @ta.unshift(1)
65
+ @ta.cache.should be_nil
66
+ end
67
+
68
+ it "should clear the cache on map!" do
69
+ @ta.transpose
70
+ @ta.map!{ |e| e }
71
+ @ta.cache.should be_nil
72
+ end
73
+
74
+ it "should clear the cache on sort!" do
75
+ @ta.transpose
76
+ @ta.sort!
77
+ @ta.cache.should be_nil
78
+ end
79
+
80
+ it "should clear the cache on reverse!" do
81
+ @ta.transpose
82
+ @ta.reverse!
83
+ @ta.cache.should be_nil
84
+ end
85
+
86
+ it "should clear the cache on collect!" do
87
+ @ta.transpose
88
+ @ta.collect! {|e| e}
89
+ @ta.cache.should be_nil
90
+ end
91
+
92
+ it "should clear the cache on compact!" do
93
+ @ta.transpose
94
+ @ta.compact!
95
+ @ta.cache.should be_nil
96
+ end
97
+
98
+ it "should clear the cache on reject!" do
99
+ @ta.transpose
100
+ @ta.reject! {|e| e}
101
+ @ta.cache.should be_nil
102
+ end
103
+
104
+ it "should clear the cache on slice!" do
105
+ @ta.transpose
106
+ @ta.slice!(1,2)
107
+ @ta.cache.should be_nil
108
+ end
109
+
110
+ it "should clear the cache on flatten!" do
111
+ @ta.transpose
112
+ @ta.flatten!
113
+ @ta.cache.should be_nil
114
+ end
115
+
116
+ it "should clear the cache on uniq!" do
117
+ @ta.transpose
118
+ @ta.uniq!
119
+ @ta.cache.should be_nil
120
+ end
121
+
122
+ it "should clear the cache on clear" do
123
+ @ta.transpose
124
+ @ta.clear
125
+ @ta.cache.should be_nil
126
+ end
127
+
128
+ it "should not adjust the array in other methods" do
129
+ @ta.transpose
130
+ @ta.at(0)
131
+ @ta.sort
132
+ @ta.uniq
133
+ @ta.find{|e| e}
134
+ @ta.cache.should eql(@t)
135
+ end
136
+ end
137
+
138
+
@@ -0,0 +1,29 @@
1
+ require File.join(File.dirname(__FILE__), "/spec_helper")
2
+
3
+ describe DataFrame, "gem dependencies" do
4
+
5
+ it "should use RubyGems" do
6
+ defined?(Gem).should eql('constant')
7
+ end
8
+
9
+ it "should use ActiveSupport" do
10
+ defined?(ActiveSupport).should eql('constant')
11
+ end
12
+
13
+ it "should use JustEnumerableStats" do
14
+ [1]._jes_average.should eql(1)
15
+ end
16
+
17
+ it "should use OpenURI" do
18
+ defined?(OpenURI).should eql('constant')
19
+ end
20
+
21
+ it "should use FasterCSV" do
22
+ defined?(FasterCSV).should eql('constant')
23
+ end
24
+
25
+ it "should use OpenStruct" do
26
+ defined?(OpenStruct).should eql('constant')
27
+ end
28
+
29
+ end
@@ -0,0 +1,13 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe Array do
4
+ it "should be able to determine its dimensions" do
5
+ [1,2,3].dimensions.should eql(1)
6
+ [[1,2,3], [1,2,3]].dimensions.should eql(2)
7
+ [[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3], [[1,2,3], [1,2,3]]]].dimensions.should eql(3)
8
+ end
9
+
10
+ it "should depend on the first element to determine dimensions" do
11
+ [1, [1,2]].dimensions.should eql(1)
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
2
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
3
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
@@ -0,0 +1,4 @@
1
+ 36 - 55,masters,high,single,will buy
2
+ 18 - 35,high school,low,single,won't buy
3
+ 18 - 35,masters,high,single,won't buy
4
+ 36 - 55,high school,low,single,will buy
@@ -0,0 +1,21 @@
1
+ Age,Education,Income,Marital Status,Purchase
2
+ 36 - 55,masters,high,single,will buy
3
+ 18 - 35,high school,low,single,won't buy
4
+ 36 - 55,masters,low,single,will buy
5
+ 18 - 35,bachelors,high,single,won't buy
6
+ < 18,high school,low,single,will buy
7
+ 18 - 35,bachelors,high,married,won't buy
8
+ 36 - 55,bachelors,low,married,won't buy
9
+ > 55,bachelors,high,single,will buy
10
+ 36 - 55,masters,low,married,won't buy
11
+ > 55,masters,low,married,will buy
12
+ 36 - 55,masters,high,single,will buy
13
+ > 55,masters,high,single,will buy
14
+ < 18,high school,high,single,won't buy
15
+ 36 - 55,masters,low,single,will buy
16
+ 36 - 55,high school,low,single,will buy
17
+ < 18,high school,low,married,will buy
18
+ 18 - 35,bachelors,high,married,won't buy
19
+ > 55,high school,high,married,will buy
20
+ > 55,bachelors,low,single,will buy
21
+ 36 - 55,high school,high,married,won't buy
@@ -0,0 +1,8 @@
1
+ $: << File.join(File.dirname(__FILE__), "/../lib")
2
+ require 'rubygems'
3
+ require 'spec'
4
+ require 'data_frame'
5
+
6
+ Spec::Runner.configure do |config|
7
+
8
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_frame
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.8
5
+ platform: ruby
6
+ authors:
7
+ - David Richards
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-15 00:00:00 -06:00
13
+ default_executable: plain_frame
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activesupport
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: davidrichards-just_enumerable_stats
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: fastercsv
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: Data Frames with memoized transpose
46
+ email: davidlamontrichards@gmail.com
47
+ executables:
48
+ - plain_frame
49
+ extensions: []
50
+
51
+ extra_rdoc_files: []
52
+
53
+ files:
54
+ - README.rdoc
55
+ - VERSION.yml
56
+ - bin/plain_frame
57
+ - lib/data_frame/arff.rb
58
+ - lib/data_frame/callback_array.rb
59
+ - lib/data_frame/core/column_management.rb
60
+ - lib/data_frame/core/filter.rb
61
+ - lib/data_frame/core/import.rb
62
+ - lib/data_frame/core/pre_process.rb
63
+ - lib/data_frame/core/saving.rb
64
+ - lib/data_frame/core/training.rb
65
+ - lib/data_frame/data_frame.rb
66
+ - lib/data_frame/id3.rb
67
+ - lib/data_frame/kmeans.rb
68
+ - lib/data_frame/labels_from_uci.rb
69
+ - lib/data_frame/mlp.rb
70
+ - lib/data_frame/model.rb
71
+ - lib/data_frame/parameter_capture.rb
72
+ - lib/data_frame/sbn.rb
73
+ - lib/data_frame/transposable_array.rb
74
+ - lib/data_frame.rb
75
+ - lib/ext/array.rb
76
+ - lib/ext/open_struct.rb
77
+ - lib/ext/string.rb
78
+ - lib/ext/symbol.rb
79
+ - spec/data_frame/arff_spec.rb
80
+ - spec/data_frame/callback_array_spec.rb
81
+ - spec/data_frame/core/column_management_spec.rb
82
+ - spec/data_frame/core/filter_spec.rb
83
+ - spec/data_frame/core/import_spec.rb
84
+ - spec/data_frame/core/pre_process_spec.rb
85
+ - spec/data_frame/core/saving_spec.rb
86
+ - spec/data_frame/core/training_spec.rb
87
+ - spec/data_frame/data_frame_spec.rb
88
+ - spec/data_frame/id3_spec.rb
89
+ - spec/data_frame/model_spec.rb
90
+ - spec/data_frame/parameter_capture_spec.rb
91
+ - spec/data_frame/transposable_array_spec.rb
92
+ - spec/data_frame_spec.rb
93
+ - spec/ext/array_spec.rb
94
+ - spec/fixtures/basic.csv
95
+ - spec/fixtures/discrete_testing.csv
96
+ - spec/fixtures/discrete_training.csv
97
+ - spec/spec_helper.rb
98
+ has_rdoc: true
99
+ homepage: http://github.com/davidrichards/data_frame
100
+ licenses: []
101
+
102
+ post_install_message:
103
+ rdoc_options:
104
+ - --inline-source
105
+ - --charset=UTF-8
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: "0"
113
+ version:
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: "0"
119
+ version:
120
+ requirements: []
121
+
122
+ rubyforge_project:
123
+ rubygems_version: 1.3.5
124
+ signing_key:
125
+ specification_version: 3
126
+ summary: Data Frames with memoized transpose
127
+ test_files: []
128
+