davidrichards-data_frame 0.0.18 → 0.0.19

Sign up to get free protection for your applications and to get access to all the features.
@@ -21,12 +21,6 @@ describe DataFrame do
21
21
  @df.items.should be_empty
22
22
  end
23
23
 
24
- it "should be able to add an item" do
25
- item = [1,2,3,4]
26
- @df.add_item(item)
27
- @df.items.should eql([item])
28
- end
29
-
30
24
  it "should use just_enumerable_stats" do
31
25
  [1,2,3].std.should eql(1)
32
26
  lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
@@ -74,12 +68,15 @@ describe DataFrame do
74
68
  end
75
69
  end
76
70
 
77
- it "should be able to import more than one row at a time" do
78
- @df.import([[2,2,2,2],[3,3,3,3],[4,4,4,4]])
79
- @df.row_labels = [:twos, :threes, :fours]
80
- @df.twos.should eql([2,2,2,2])
81
- @df.threes.should eql([3,3,3,3])
82
- @df.fours.should eql([4,4,4,4])
71
+ it "should be able to initialize from an array" do
72
+ contents = %{7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
73
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
74
+ }
75
+
76
+ @labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
77
+ @df = DataFrame.new(@labels)
78
+ @df.import(contents)
79
+ @df.labels.should eql(@labels)
83
80
  end
84
81
 
85
82
  context "csv" do
@@ -89,7 +86,7 @@ describe DataFrame do
89
86
  7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
90
87
  }
91
88
  labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
92
-
89
+
93
90
  @df = DataFrame.from_csv(contents)
94
91
  @df.labels.should eql(labels)
95
92
  @df.x.should eql([7,7])
@@ -109,22 +106,6 @@ describe DataFrame do
109
106
  end
110
107
  end
111
108
 
112
- it "should be able to remove a column" do
113
- @df = DataFrame.new :twos, :threes, :fours
114
- @df.import([[2,3,4], [2,3,4], [2,3,4], [2,3,4]])
115
- @df.drop!(:twos)
116
- @df.items.all? {|i| i.should eql([3,4])}
117
- @df.labels.should eql([:threes, :fours])
118
- end
119
-
120
- it "should be able to remove more than one column at a time" do
121
- @df = DataFrame.new :twos, :threes, :fours
122
- @df.import([[2,3,4], [2,3,4], [2,3,4], [2,3,4]])
123
- @df.drop!(:twos, :fours)
124
- @df.items.all? {|i| i.should eql([3])}
125
- @df.labels.should eql([:threes])
126
- end
127
-
128
109
  it "should offer a hash-like structure of columns" do
129
110
  @df.add [1,2,3,4]
130
111
  @df.add [5, 6, 7, 8]
@@ -156,202 +137,5 @@ describe DataFrame do
156
137
  @df.variables.should eql(@labels)
157
138
  end
158
139
 
159
- context "replace!" do
160
- before do
161
- @df.add [1,2,3,4]
162
- @df.add [5, 6, 7, 8]
163
- @doubler = lambda{|e| e * 2}
164
- end
165
-
166
- it "should only replace columns that actually exist" do
167
- lambda{@df.replace!(:not_a_column, &@doubler)}.should raise_error(
168
- ArgumentError, /Must provide the name of an existing column./)
169
- lambda{@df.replace!(:these, &@doubler)}.should_not raise_error
170
- end
171
-
172
- it "should be able to replace a column with a block" do
173
- @df.replace!(:these) {|e| e * 2}
174
- @df.these.should eql([2,10])
175
- end
176
-
177
- it "should be able to replace a column with an array" do
178
- @a = [5,9]
179
- @df.replace!(:these, @a)
180
- @df.these.should eql(@a)
181
- end
182
- end
183
-
184
- context "filter!" do
185
- before do
186
- @df.add [1,2,3,4]
187
- @df.add [5, 6, 7, 8]
188
- end
189
-
190
- it "should be able to filter a data frame with a block using an OpenStruct for each row" do
191
- @df.filter!(:open_struct) {|row| row.these == 5}
192
- @df.items.should eql([[5, 6, 7, 8]])
193
- end
194
-
195
- it "should be able to filter a data frame with a block using a Hash for each row" do
196
- @df.filter!(:hash) {|row| row[:these] == 5}
197
- @df.items.should eql([[5, 6, 7, 8]])
198
- end
199
-
200
- S4 = Struct.new(:one, :two, :three, :four)
201
- it "should be able to filter a data frame with a block using another class that uses the row as input" do
202
- @df.filter!(S4) {|row| row.one == 5}
203
- @df.items.should eql([[5, 6, 7, 8]])
204
- end
205
-
206
- it "should be able to filter a data frame with a block using an array for each row" do
207
- @df.filter! {|row| row.first == 5}
208
- @df.items.should eql([[5, 6, 7, 8]])
209
- end
210
-
211
- it "should be able to do fancy things with the row as the filter" do
212
- @df.filter! {|row| row.sum > 10}
213
- @df.items.should eql([[5, 6, 7, 8]])
214
- end
215
-
216
- it "should be able to generate a new data frame with filter" do
217
- new_df = @df.filter(:open_struct) {|row| row.these == 5}
218
- new_df.items.should eql([[5, 6, 7, 8]])
219
- @df.items.should eql([[1, 2, 3, 4], [5, 6, 7, 8]])
220
- end
221
-
222
- end
223
-
224
- context "filter_by_category" do
225
-
226
- before do
227
- @df = DataFrame.new(:weather, :date)
228
-
229
- (1..31).each do |i|
230
- @df.add [(i % 3 == 1) ? :fair : :good, Date.parse("07/#{i}/2009")]
231
- end
232
-
233
- @d1 = Date.parse("07/15/2009")
234
- @d2 = Date.parse("07/31/2009")
235
-
236
- end
237
-
238
- it "should be able to filter by category" do
239
- filtered = @df.filter_by_category(:weather => :good)
240
- filtered.weather.uniq.should eql([:good])
241
- @df.weather.uniq.should be_include(:fair)
242
- end
243
-
244
- it "should be able to manage ranges for filter values" do
245
- filtered = @df.filter_by_category(:date => (@d1..@d2))
246
- filtered.date.should_not be_include(Date.parse("07/01/2009"))
247
- filtered.date.should_not be_include(Date.parse("07/14/2009"))
248
- filtered.date.should be_include(Date.parse("07/15/2009"))
249
- filtered.date.should be_include(Date.parse("07/31/2009"))
250
- @df.date.should be_include(Date.parse("07/01/2009"))
251
- end
252
-
253
- it "should be able to take an array of values to filter with" do
254
- filtered = @df.filter_by_category(:date => [@d1, @d2])
255
- filtered.date.should_not be_include(Date.parse("07/01/2009"))
256
- filtered.date.should be_include(Date.parse("07/15/2009"))
257
- filtered.date.should be_include(Date.parse("07/31/2009"))
258
- end
259
-
260
- it "should have a destructive version" do
261
- @df.filter_by_category!(:date => [@d1, @d2])
262
- @df.date.should_not be_include(Date.parse("07/01/2009"))
263
- @df.date.should be_include(Date.parse("07/15/2009"))
264
- @df.date.should be_include(Date.parse("07/31/2009"))
265
- end
266
-
267
- end
268
-
269
- context "subset_from_columns" do
270
- before do
271
- @df.add [1,2,3,4]
272
- @df.add [5, 6, 7, 8]
273
- end
274
-
275
- it "should be able to create a subset of columns" do
276
- new_data_frame = @df.subset_from_columns(:these, :labels)
277
- new_data_frame.should_not eql(@df)
278
- new_data_frame.labels.should eql([:these, :labels])
279
- new_data_frame.items.should eql([[1,4],[5,8]])
280
- new_data_frame.these.should eql([1,5])
281
- end
282
- end
283
-
284
- it "should be able to j_binary_ize! a column, taking its categories and creating a column for each" do
285
- df = DataFrame.new(:observations)
286
- df.add [:many]
287
- df.add [:fine]
288
- df.add [:things]
289
- df.add [:are]
290
- df.add [:available]
291
- df.j_binary_ize!(:observations)
292
- df.observations_many.should eql([true, false, false, false, false])
293
- df.observations_fine.should eql([false, true, false, false, false])
294
- df.observations_things.should eql([false, false, true, false, false])
295
- df.observations_are.should eql([false, false, false, true, false])
296
- df.observations_available.should eql([false, false, false, false, true])
297
- df.observations.should eql([:many, :fine, :things, :are, :available])
298
- end
299
-
300
- it "should be able to j_binary_ize! a more normal column" do
301
- df = DataFrame.new(:observations)
302
- df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
303
- df.observations.add_category(:small) {|e| e <= 3}
304
- df.observations.add_category(:large) {|e| e >= 3}
305
- df.j_binary_ize!(:observations)
306
- df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
307
- df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
308
- end
309
140
 
310
- it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
311
- df = DataFrame.new(:observations)
312
- df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
313
- df.observations.add_category(:small) {|e| e <= 3}
314
- df.observations.add_category(:large) {|e| e >= 3}
315
- df.j_binary_ize!(:observations, :allow_overlap => true)
316
- df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
317
- df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
318
- end
319
-
320
- it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
321
- df = DataFrame.new(:observations)
322
- df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
323
- df.observations.add_category(:small) {|e| e <= 3}
324
- df.observations.add_category(:large) {|e| e >= 3}
325
- df.j_binary_ize!(:observations, :allow_overlap => true)
326
- df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
327
- df.j_binary_ize!(:observations)
328
- df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
329
- df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
330
- df.observations.should eql([1,2,3,4,5,4,3,2,1])
331
- df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
332
- df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
333
- end
334
-
335
- context "append!" do
336
-
337
- before do
338
- @df.add [1,2,3,4]
339
- @df.add [5, 6, 7, 8]
340
- end
341
-
342
- it "should be able to append an array of values to the data frame" do
343
- @df.append!(:new_column, [5,5])
344
- @df.new_column.should eql([5,5])
345
- end
346
-
347
- it "should be able to append a default value to the data frame" do
348
- @df.append!(:new_column, :value)
349
- @df.new_column.should eql([:value, :value])
350
- end
351
-
352
- it "should use nil as the default value" do
353
- @df.append!(:new_column)
354
- @df.new_column.should eql([nil, nil])
355
- end
356
- end
357
141
  end
@@ -0,0 +1,22 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+ require 'data_frame/id3'
3
+
4
+ describe "DecisionTree" do
5
+
6
+ before do
7
+ @filename = File.expand_path(File.join(File.dirname(__FILE__), "../fixtures/discrete_training.csv"))
8
+ @df = DataFrame.from_csv(@filename)
9
+ @test_data = File.read(@filename)
10
+ end
11
+
12
+ it "should require the decisiontree gem" do
13
+ defined?(DecisionTree::ID3Tree).should eql('constant')
14
+ end
15
+
16
+ it "should be able to create a decision tree from a data frame" do
17
+ # Come back to this.
18
+ # @df.create_id3(:purchase)
19
+ # @df.id3.train
20
+ # @df.id3.predict(["36 - 55", "masters", "high", "single", 1]).should eql(1)
21
+ end
22
+ end
@@ -0,0 +1,13 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe Array do
4
+ it "should be able to determine its dimensions" do
5
+ [1,2,3].dimensions.should eql(1)
6
+ [[1,2,3], [1,2,3]].dimensions.should eql(2)
7
+ [[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3], [[1,2,3], [1,2,3]]]].dimensions.should eql(3)
8
+ end
9
+
10
+ it "should depend on the first element to determine dimensions" do
11
+ [1, [1,2]].dimensions.should eql(1)
12
+ end
13
+ end
@@ -0,0 +1,4 @@
1
+ 36 - 55,masters,high,single,will buy
2
+ 18 - 35,high school,low,single,won't buy
3
+ 18 - 35,masters,high,single,won't buy
4
+ 36 - 55,high school,low,single,will buy
@@ -0,0 +1,21 @@
1
+ Age,Education,Income,Marital Status,Purchase
2
+ 36 - 55,masters,high,single,will buy
3
+ 18 - 35,high school,low,single,won't buy
4
+ 36 - 55,masters,low,single,will buy
5
+ 18 - 35,bachelors,high,single,won't buy
6
+ < 18,high school,low,single,will buy
7
+ 18 - 35,bachelors,high,married,won't buy
8
+ 36 - 55,bachelors,low,married,won't buy
9
+ > 55,bachelors,high,single,will buy
10
+ 36 - 55,masters,low,married,won't buy
11
+ > 55,masters,low,married,will buy
12
+ 36 - 55,masters,high,single,will buy
13
+ > 55,masters,high,single,will buy
14
+ < 18,high school,high,single,won't buy
15
+ 36 - 55,masters,low,single,will buy
16
+ 36 - 55,high school,low,single,will buy
17
+ < 18,high school,low,married,will buy
18
+ 18 - 35,bachelors,high,married,won't buy
19
+ > 55,high school,high,married,will buy
20
+ > 55,bachelors,low,single,will buy
21
+ 36 - 55,high school,high,married,won't buy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 0.0.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,8 +9,8 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-13 00:00:00 -07:00
13
- default_executable:
12
+ date: 2009-09-24 00:00:00 -07:00
13
+ default_executable: plain_frame
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activesupport
@@ -44,8 +44,8 @@ dependencies:
44
44
  version:
45
45
  description: Data Frames with memoized transpose
46
46
  email: davidlamontrichards@gmail.com
47
- executables: []
48
-
47
+ executables:
48
+ - plain_frame
49
49
  extensions: []
50
50
 
51
51
  extra_rdoc_files: []
@@ -53,31 +53,58 @@ extra_rdoc_files: []
53
53
  files:
54
54
  - README.rdoc
55
55
  - VERSION.yml
56
+ - bin/plain_frame
56
57
  - lib/data_frame
57
58
  - lib/data_frame/arff.rb
58
59
  - lib/data_frame/callback_array.rb
60
+ - lib/data_frame/core
61
+ - lib/data_frame/core/column_management.rb
62
+ - lib/data_frame/core/filter.rb
63
+ - lib/data_frame/core/import.rb
64
+ - lib/data_frame/core/pre_process.rb
65
+ - lib/data_frame/core/saving.rb
66
+ - lib/data_frame/core/training.rb
59
67
  - lib/data_frame/data_frame.rb
68
+ - lib/data_frame/id3.rb
69
+ - lib/data_frame/kmeans.rb
70
+ - lib/data_frame/labels_from_uci.rb
71
+ - lib/data_frame/mlp.rb
60
72
  - lib/data_frame/model.rb
61
73
  - lib/data_frame/parameter_capture.rb
74
+ - lib/data_frame/sbn.rb
62
75
  - lib/data_frame/transposable_array.rb
63
76
  - lib/data_frame.rb
64
77
  - lib/ext
78
+ - lib/ext/array.rb
65
79
  - lib/ext/open_struct.rb
66
80
  - lib/ext/string.rb
67
81
  - lib/ext/symbol.rb
68
82
  - spec/data_frame
69
83
  - spec/data_frame/arff_spec.rb
70
84
  - spec/data_frame/callback_array_spec.rb
85
+ - spec/data_frame/core
86
+ - spec/data_frame/core/column_management_spec.rb
87
+ - spec/data_frame/core/filter_spec.rb
88
+ - spec/data_frame/core/import_spec.rb
89
+ - spec/data_frame/core/pre_process_spec.rb
90
+ - spec/data_frame/core/saving_spec.rb
91
+ - spec/data_frame/core/training_spec.rb
71
92
  - spec/data_frame/data_frame_spec.rb
93
+ - spec/data_frame/id3_spec.rb
72
94
  - spec/data_frame/model_spec.rb
73
95
  - spec/data_frame/parameter_capture_spec.rb
74
96
  - spec/data_frame/transposable_array_spec.rb
75
97
  - spec/data_frame_spec.rb
98
+ - spec/ext
99
+ - spec/ext/array_spec.rb
76
100
  - spec/fixtures
77
101
  - spec/fixtures/basic.csv
102
+ - spec/fixtures/discrete_testing.csv
103
+ - spec/fixtures/discrete_training.csv
78
104
  - spec/spec_helper.rb
79
105
  has_rdoc: true
80
106
  homepage: http://github.com/davidrichards/data_frame
107
+ licenses:
81
108
  post_install_message:
82
109
  rdoc_options:
83
110
  - --inline-source
@@ -99,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
126
  requirements: []
100
127
 
101
128
  rubyforge_project:
102
- rubygems_version: 1.2.0
129
+ rubygems_version: 1.3.5
103
130
  signing_key:
104
131
  specification_version: 2
105
132
  summary: Data Frames with memoized transpose