davidrichards-data_frame 0.0.18 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,12 +21,6 @@ describe DataFrame do
21
21
  @df.items.should be_empty
22
22
  end
23
23
 
24
- it "should be able to add an item" do
25
- item = [1,2,3,4]
26
- @df.add_item(item)
27
- @df.items.should eql([item])
28
- end
29
-
30
24
  it "should use just_enumerable_stats" do
31
25
  [1,2,3].std.should eql(1)
32
26
  lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
@@ -74,12 +68,15 @@ describe DataFrame do
74
68
  end
75
69
  end
76
70
 
77
- it "should be able to import more than one row at a time" do
78
- @df.import([[2,2,2,2],[3,3,3,3],[4,4,4,4]])
79
- @df.row_labels = [:twos, :threes, :fours]
80
- @df.twos.should eql([2,2,2,2])
81
- @df.threes.should eql([3,3,3,3])
82
- @df.fours.should eql([4,4,4,4])
71
+ it "should be able to initialize from an array" do
72
+ contents = %{7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
73
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
74
+ }
75
+
76
+ @labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
77
+ @df = DataFrame.new(@labels)
78
+ @df.import(contents)
79
+ @df.labels.should eql(@labels)
83
80
  end
84
81
 
85
82
  context "csv" do
@@ -89,7 +86,7 @@ describe DataFrame do
89
86
  7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
90
87
  }
91
88
  labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
92
-
89
+
93
90
  @df = DataFrame.from_csv(contents)
94
91
  @df.labels.should eql(labels)
95
92
  @df.x.should eql([7,7])
@@ -109,22 +106,6 @@ describe DataFrame do
109
106
  end
110
107
  end
111
108
 
112
- it "should be able to remove a column" do
113
- @df = DataFrame.new :twos, :threes, :fours
114
- @df.import([[2,3,4], [2,3,4], [2,3,4], [2,3,4]])
115
- @df.drop!(:twos)
116
- @df.items.all? {|i| i.should eql([3,4])}
117
- @df.labels.should eql([:threes, :fours])
118
- end
119
-
120
- it "should be able to remove more than one column at a time" do
121
- @df = DataFrame.new :twos, :threes, :fours
122
- @df.import([[2,3,4], [2,3,4], [2,3,4], [2,3,4]])
123
- @df.drop!(:twos, :fours)
124
- @df.items.all? {|i| i.should eql([3])}
125
- @df.labels.should eql([:threes])
126
- end
127
-
128
109
  it "should offer a hash-like structure of columns" do
129
110
  @df.add [1,2,3,4]
130
111
  @df.add [5, 6, 7, 8]
@@ -156,202 +137,5 @@ describe DataFrame do
156
137
  @df.variables.should eql(@labels)
157
138
  end
158
139
 
159
- context "replace!" do
160
- before do
161
- @df.add [1,2,3,4]
162
- @df.add [5, 6, 7, 8]
163
- @doubler = lambda{|e| e * 2}
164
- end
165
-
166
- it "should only replace columns that actually exist" do
167
- lambda{@df.replace!(:not_a_column, &@doubler)}.should raise_error(
168
- ArgumentError, /Must provide the name of an existing column./)
169
- lambda{@df.replace!(:these, &@doubler)}.should_not raise_error
170
- end
171
-
172
- it "should be able to replace a column with a block" do
173
- @df.replace!(:these) {|e| e * 2}
174
- @df.these.should eql([2,10])
175
- end
176
-
177
- it "should be able to replace a column with an array" do
178
- @a = [5,9]
179
- @df.replace!(:these, @a)
180
- @df.these.should eql(@a)
181
- end
182
- end
183
-
184
- context "filter!" do
185
- before do
186
- @df.add [1,2,3,4]
187
- @df.add [5, 6, 7, 8]
188
- end
189
-
190
- it "should be able to filter a data frame with a block using an OpenStruct for each row" do
191
- @df.filter!(:open_struct) {|row| row.these == 5}
192
- @df.items.should eql([[5, 6, 7, 8]])
193
- end
194
-
195
- it "should be able to filter a data frame with a block using a Hash for each row" do
196
- @df.filter!(:hash) {|row| row[:these] == 5}
197
- @df.items.should eql([[5, 6, 7, 8]])
198
- end
199
-
200
- S4 = Struct.new(:one, :two, :three, :four)
201
- it "should be able to filter a data frame with a block using another class that uses the row as input" do
202
- @df.filter!(S4) {|row| row.one == 5}
203
- @df.items.should eql([[5, 6, 7, 8]])
204
- end
205
-
206
- it "should be able to filter a data frame with a block using an array for each row" do
207
- @df.filter! {|row| row.first == 5}
208
- @df.items.should eql([[5, 6, 7, 8]])
209
- end
210
-
211
- it "should be able to do fancy things with the row as the filter" do
212
- @df.filter! {|row| row.sum > 10}
213
- @df.items.should eql([[5, 6, 7, 8]])
214
- end
215
-
216
- it "should be able to generate a new data frame with filter" do
217
- new_df = @df.filter(:open_struct) {|row| row.these == 5}
218
- new_df.items.should eql([[5, 6, 7, 8]])
219
- @df.items.should eql([[1, 2, 3, 4], [5, 6, 7, 8]])
220
- end
221
-
222
- end
223
-
224
- context "filter_by_category" do
225
-
226
- before do
227
- @df = DataFrame.new(:weather, :date)
228
-
229
- (1..31).each do |i|
230
- @df.add [(i % 3 == 1) ? :fair : :good, Date.parse("07/#{i}/2009")]
231
- end
232
-
233
- @d1 = Date.parse("07/15/2009")
234
- @d2 = Date.parse("07/31/2009")
235
-
236
- end
237
-
238
- it "should be able to filter by category" do
239
- filtered = @df.filter_by_category(:weather => :good)
240
- filtered.weather.uniq.should eql([:good])
241
- @df.weather.uniq.should be_include(:fair)
242
- end
243
-
244
- it "should be able to manage ranges for filter values" do
245
- filtered = @df.filter_by_category(:date => (@d1..@d2))
246
- filtered.date.should_not be_include(Date.parse("07/01/2009"))
247
- filtered.date.should_not be_include(Date.parse("07/14/2009"))
248
- filtered.date.should be_include(Date.parse("07/15/2009"))
249
- filtered.date.should be_include(Date.parse("07/31/2009"))
250
- @df.date.should be_include(Date.parse("07/01/2009"))
251
- end
252
-
253
- it "should be able to take an array of values to filter with" do
254
- filtered = @df.filter_by_category(:date => [@d1, @d2])
255
- filtered.date.should_not be_include(Date.parse("07/01/2009"))
256
- filtered.date.should be_include(Date.parse("07/15/2009"))
257
- filtered.date.should be_include(Date.parse("07/31/2009"))
258
- end
259
-
260
- it "should have a destructive version" do
261
- @df.filter_by_category!(:date => [@d1, @d2])
262
- @df.date.should_not be_include(Date.parse("07/01/2009"))
263
- @df.date.should be_include(Date.parse("07/15/2009"))
264
- @df.date.should be_include(Date.parse("07/31/2009"))
265
- end
266
-
267
- end
268
-
269
- context "subset_from_columns" do
270
- before do
271
- @df.add [1,2,3,4]
272
- @df.add [5, 6, 7, 8]
273
- end
274
-
275
- it "should be able to create a subset of columns" do
276
- new_data_frame = @df.subset_from_columns(:these, :labels)
277
- new_data_frame.should_not eql(@df)
278
- new_data_frame.labels.should eql([:these, :labels])
279
- new_data_frame.items.should eql([[1,4],[5,8]])
280
- new_data_frame.these.should eql([1,5])
281
- end
282
- end
283
-
284
- it "should be able to j_binary_ize! a column, taking its categories and creating a column for each" do
285
- df = DataFrame.new(:observations)
286
- df.add [:many]
287
- df.add [:fine]
288
- df.add [:things]
289
- df.add [:are]
290
- df.add [:available]
291
- df.j_binary_ize!(:observations)
292
- df.observations_many.should eql([true, false, false, false, false])
293
- df.observations_fine.should eql([false, true, false, false, false])
294
- df.observations_things.should eql([false, false, true, false, false])
295
- df.observations_are.should eql([false, false, false, true, false])
296
- df.observations_available.should eql([false, false, false, false, true])
297
- df.observations.should eql([:many, :fine, :things, :are, :available])
298
- end
299
-
300
- it "should be able to j_binary_ize! a more normal column" do
301
- df = DataFrame.new(:observations)
302
- df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
303
- df.observations.add_category(:small) {|e| e <= 3}
304
- df.observations.add_category(:large) {|e| e >= 3}
305
- df.j_binary_ize!(:observations)
306
- df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
307
- df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
308
- end
309
140
 
310
- it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
311
- df = DataFrame.new(:observations)
312
- df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
313
- df.observations.add_category(:small) {|e| e <= 3}
314
- df.observations.add_category(:large) {|e| e >= 3}
315
- df.j_binary_ize!(:observations, :allow_overlap => true)
316
- df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
317
- df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
318
- end
319
-
320
- it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
321
- df = DataFrame.new(:observations)
322
- df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
323
- df.observations.add_category(:small) {|e| e <= 3}
324
- df.observations.add_category(:large) {|e| e >= 3}
325
- df.j_binary_ize!(:observations, :allow_overlap => true)
326
- df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
327
- df.j_binary_ize!(:observations)
328
- df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
329
- df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
330
- df.observations.should eql([1,2,3,4,5,4,3,2,1])
331
- df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
332
- df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
333
- end
334
-
335
- context "append!" do
336
-
337
- before do
338
- @df.add [1,2,3,4]
339
- @df.add [5, 6, 7, 8]
340
- end
341
-
342
- it "should be able to append an array of values to the data frame" do
343
- @df.append!(:new_column, [5,5])
344
- @df.new_column.should eql([5,5])
345
- end
346
-
347
- it "should be able to append a default value to the data frame" do
348
- @df.append!(:new_column, :value)
349
- @df.new_column.should eql([:value, :value])
350
- end
351
-
352
- it "should use nil as the default value" do
353
- @df.append!(:new_column)
354
- @df.new_column.should eql([nil, nil])
355
- end
356
- end
357
141
  end
@@ -0,0 +1,22 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+ require 'data_frame/id3'
3
+
4
+ describe "DecisionTree" do
5
+
6
+ before do
7
+ @filename = File.expand_path(File.join(File.dirname(__FILE__), "../fixtures/discrete_training.csv"))
8
+ @df = DataFrame.from_csv(@filename)
9
+ @test_data = File.read(@filename)
10
+ end
11
+
12
+ it "should require the decisiontree gem" do
13
+ defined?(DecisionTree::ID3Tree).should eql('constant')
14
+ end
15
+
16
+ it "should be able to create a decision tree from a data frame" do
17
+ # Come back to this.
18
+ # @df.create_id3(:purchase)
19
+ # @df.id3.train
20
+ # @df.id3.predict(["36 - 55", "masters", "high", "single", 1]).should eql(1)
21
+ end
22
+ end
@@ -0,0 +1,13 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe Array do
4
+ it "should be able to determine its dimensions" do
5
+ [1,2,3].dimensions.should eql(1)
6
+ [[1,2,3], [1,2,3]].dimensions.should eql(2)
7
+ [[[1,2,3], [1,2,3]], [[1,2,3], [1,2,3], [[1,2,3], [1,2,3]]]].dimensions.should eql(3)
8
+ end
9
+
10
+ it "should depend on the first element to determine dimensions" do
11
+ [1, [1,2]].dimensions.should eql(1)
12
+ end
13
+ end
@@ -0,0 +1,4 @@
1
+ 36 - 55,masters,high,single,will buy
2
+ 18 - 35,high school,low,single,won't buy
3
+ 18 - 35,masters,high,single,won't buy
4
+ 36 - 55,high school,low,single,will buy
@@ -0,0 +1,21 @@
1
+ Age,Education,Income,Marital Status,Purchase
2
+ 36 - 55,masters,high,single,will buy
3
+ 18 - 35,high school,low,single,won't buy
4
+ 36 - 55,masters,low,single,will buy
5
+ 18 - 35,bachelors,high,single,won't buy
6
+ < 18,high school,low,single,will buy
7
+ 18 - 35,bachelors,high,married,won't buy
8
+ 36 - 55,bachelors,low,married,won't buy
9
+ > 55,bachelors,high,single,will buy
10
+ 36 - 55,masters,low,married,won't buy
11
+ > 55,masters,low,married,will buy
12
+ 36 - 55,masters,high,single,will buy
13
+ > 55,masters,high,single,will buy
14
+ < 18,high school,high,single,won't buy
15
+ 36 - 55,masters,low,single,will buy
16
+ 36 - 55,high school,low,single,will buy
17
+ < 18,high school,low,married,will buy
18
+ 18 - 35,bachelors,high,married,won't buy
19
+ > 55,high school,high,married,will buy
20
+ > 55,bachelors,low,single,will buy
21
+ 36 - 55,high school,high,married,won't buy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 0.0.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,8 +9,8 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-13 00:00:00 -07:00
13
- default_executable:
12
+ date: 2009-09-24 00:00:00 -07:00
13
+ default_executable: plain_frame
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activesupport
@@ -44,8 +44,8 @@ dependencies:
44
44
  version:
45
45
  description: Data Frames with memoized transpose
46
46
  email: davidlamontrichards@gmail.com
47
- executables: []
48
-
47
+ executables:
48
+ - plain_frame
49
49
  extensions: []
50
50
 
51
51
  extra_rdoc_files: []
@@ -53,31 +53,58 @@ extra_rdoc_files: []
53
53
  files:
54
54
  - README.rdoc
55
55
  - VERSION.yml
56
+ - bin/plain_frame
56
57
  - lib/data_frame
57
58
  - lib/data_frame/arff.rb
58
59
  - lib/data_frame/callback_array.rb
60
+ - lib/data_frame/core
61
+ - lib/data_frame/core/column_management.rb
62
+ - lib/data_frame/core/filter.rb
63
+ - lib/data_frame/core/import.rb
64
+ - lib/data_frame/core/pre_process.rb
65
+ - lib/data_frame/core/saving.rb
66
+ - lib/data_frame/core/training.rb
59
67
  - lib/data_frame/data_frame.rb
68
+ - lib/data_frame/id3.rb
69
+ - lib/data_frame/kmeans.rb
70
+ - lib/data_frame/labels_from_uci.rb
71
+ - lib/data_frame/mlp.rb
60
72
  - lib/data_frame/model.rb
61
73
  - lib/data_frame/parameter_capture.rb
74
+ - lib/data_frame/sbn.rb
62
75
  - lib/data_frame/transposable_array.rb
63
76
  - lib/data_frame.rb
64
77
  - lib/ext
78
+ - lib/ext/array.rb
65
79
  - lib/ext/open_struct.rb
66
80
  - lib/ext/string.rb
67
81
  - lib/ext/symbol.rb
68
82
  - spec/data_frame
69
83
  - spec/data_frame/arff_spec.rb
70
84
  - spec/data_frame/callback_array_spec.rb
85
+ - spec/data_frame/core
86
+ - spec/data_frame/core/column_management_spec.rb
87
+ - spec/data_frame/core/filter_spec.rb
88
+ - spec/data_frame/core/import_spec.rb
89
+ - spec/data_frame/core/pre_process_spec.rb
90
+ - spec/data_frame/core/saving_spec.rb
91
+ - spec/data_frame/core/training_spec.rb
71
92
  - spec/data_frame/data_frame_spec.rb
93
+ - spec/data_frame/id3_spec.rb
72
94
  - spec/data_frame/model_spec.rb
73
95
  - spec/data_frame/parameter_capture_spec.rb
74
96
  - spec/data_frame/transposable_array_spec.rb
75
97
  - spec/data_frame_spec.rb
98
+ - spec/ext
99
+ - spec/ext/array_spec.rb
76
100
  - spec/fixtures
77
101
  - spec/fixtures/basic.csv
102
+ - spec/fixtures/discrete_testing.csv
103
+ - spec/fixtures/discrete_training.csv
78
104
  - spec/spec_helper.rb
79
105
  has_rdoc: true
80
106
  homepage: http://github.com/davidrichards/data_frame
107
+ licenses:
81
108
  post_install_message:
82
109
  rdoc_options:
83
110
  - --inline-source
@@ -99,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
126
  requirements: []
100
127
 
101
128
  rubyforge_project:
102
- rubygems_version: 1.2.0
129
+ rubygems_version: 1.3.5
103
130
  signing_key:
104
131
  specification_version: 2
105
132
  summary: Data Frames with memoized transpose