davidrichards-data_frame 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 0
4
- :patch: 13
4
+ :patch: 14
@@ -212,6 +212,11 @@ class DataFrame
212
212
  self
213
213
  end
214
214
 
215
+ def filter(as=Array, &block)
216
+ new_data_frame = self.clone
217
+ new_data_frame.filter!(as, &block)
218
+ end
219
+
215
220
  def infer_class(obj)
216
221
  obj = obj.to_s.classify.constantize if obj.is_a?(Symbol)
217
222
  obj = obj.classify.constantize if obj.is_a?(String)
@@ -235,7 +240,7 @@ class DataFrame
235
240
  elsif as == Array
236
241
  row
237
242
  else
238
- as.new(row)
243
+ as.new(*row)
239
244
  end
240
245
  end
241
246
  protected :cast_row
@@ -253,5 +258,53 @@ class DataFrame
253
258
  end
254
259
  new_data_frame
255
260
  end
261
+
262
+ # A weird name. This creates a column for every category in a column
263
+ # and marks each row by its value
264
+ def j_binary_ize!(*columns)
265
+ columns.each do |col|
266
+ values = render_column(col.to_underscore_sym)
267
+ values.categories.each do |category|
268
+ self.append!(category, values.map{|e| e == category ? true : false})
269
+ end
270
+ end
271
+ end
272
+
273
+ # Adds a unique column to the table
274
+ def append!(column_name, value=nil)
275
+ raise ArgumentError, "Can't have duplicate column names" if self.labels.include?(column_name)
276
+ self.labels << column_name.to_underscore_sym
277
+ if value.is_a?(Array)
278
+ self.items.each_with_index do |item, i|
279
+ item << value[i]
280
+ end
281
+ else
282
+ self.items.each do |item|
283
+ item << value
284
+ end
285
+ end
286
+ # Because we are tainting the sub arrays, the TaintableArray doesn't know it's been changed.
287
+ self.items.taint
288
+ end
289
+
290
+ def filter_by_category(hash)
291
+ new_data_frame = self.dup
292
+ hash.each do |key, value|
293
+ key = key.to_underscore_sym
294
+ next unless self.labels.include?(key)
295
+ value = [value] unless value.is_a?(Array) or value.is_a?(Range)
296
+ new_data_frame.filter!(:hash) {|row| value.include?(row[key])}
297
+ end
298
+ new_data_frame
299
+ end
300
+
301
+ def filter_by_category!(hash)
302
+ hash.each do |key, value|
303
+ key = key.to_underscore_sym
304
+ next unless self.labels.include?(key)
305
+ value = [value] unless value.is_a?(Array) or value.is_a?(Range)
306
+ self.filter!(:hash) {|row| value.include?(row[key])}
307
+ end
308
+ end
256
309
 
257
310
  end
@@ -6,6 +6,7 @@ class TransposableArray < CallbackArray
6
6
 
7
7
  orig_transpose = instance_method(:transpose)
8
8
  define_method(:transpose) {
9
+ self.untaint
9
10
  @transpose ||= orig_transpose.bind(self).call
10
11
  }
11
12
 
@@ -171,10 +171,83 @@ describe DataFrame do
171
171
  @df.add [5, 6, 7, 8]
172
172
  end
173
173
 
174
- it "should be able to filter a data frame with a block" do
174
+ it "should be able to filter a data frame with a block using an OpenStruct for each row" do
175
175
  @df.filter!(:open_struct) {|row| row.these == 5}
176
176
  @df.items.should eql([[5, 6, 7, 8]])
177
177
  end
178
+
179
+ it "should be able to filter a data frame with a block using a Hash for each row" do
180
+ @df.filter!(:hash) {|row| row[:these] == 5}
181
+ @df.items.should eql([[5, 6, 7, 8]])
182
+ end
183
+
184
+ S4 = Struct.new(:one, :two, :three, :four)
185
+ it "should be able to filter a data frame with a block using another class that uses the row as input" do
186
+ @df.filter!(S4) {|row| row.one == 5}
187
+ @df.items.should eql([[5, 6, 7, 8]])
188
+ end
189
+
190
+ it "should be able to filter a data frame with a block using an array for each row" do
191
+ @df.filter! {|row| row.first == 5}
192
+ @df.items.should eql([[5, 6, 7, 8]])
193
+ end
194
+
195
+ it "should be able to do fancy things with the row as the filter" do
196
+ @df.filter! {|row| row.sum > 10}
197
+ @df.items.should eql([[5, 6, 7, 8]])
198
+ end
199
+
200
+ it "should be able to generate a new data frame with filter" do
201
+ new_df = @df.filter(:open_struct) {|row| row.these == 5}
202
+ new_df.items.should eql([[5, 6, 7, 8]])
203
+ @df.items.should eql([[1, 2, 3, 4], [5, 6, 7, 8]])
204
+ end
205
+
206
+ end
207
+
208
+ context "filter_by_category" do
209
+
210
+ before do
211
+ @df = DataFrame.new(:weather, :date)
212
+
213
+ (1..31).each do |i|
214
+ @df.add [(i % 3 == 1) ? :fair : :good, Date.parse("07/#{i}/2009")]
215
+ end
216
+
217
+ @d1 = Date.parse("07/15/2009")
218
+ @d2 = Date.parse("07/31/2009")
219
+
220
+ end
221
+
222
+ it "should be able to filter by category" do
223
+ filtered = @df.filter_by_category(:weather => :good)
224
+ filtered.weather.uniq.should eql([:good])
225
+ @df.weather.uniq.should be_include(:fair)
226
+ end
227
+
228
+ it "should be able to manage ranges for filter values" do
229
+ filtered = @df.filter_by_category(:date => (@d1..@d2))
230
+ filtered.date.should_not be_include(Date.parse("07/01/2009"))
231
+ filtered.date.should_not be_include(Date.parse("07/14/2009"))
232
+ filtered.date.should be_include(Date.parse("07/15/2009"))
233
+ filtered.date.should be_include(Date.parse("07/31/2009"))
234
+ @df.date.should be_include(Date.parse("07/01/2009"))
235
+ end
236
+
237
+ it "should be able to take an array of values to filter with" do
238
+ filtered = @df.filter_by_category(:date => [@d1, @d2])
239
+ filtered.date.should_not be_include(Date.parse("07/01/2009"))
240
+ filtered.date.should be_include(Date.parse("07/15/2009"))
241
+ filtered.date.should be_include(Date.parse("07/31/2009"))
242
+ end
243
+
244
+ it "should have a destructive version" do
245
+ @df.filter_by_category!(:date => [@d1, @d2])
246
+ @df.date.should_not be_include(Date.parse("07/01/2009"))
247
+ @df.date.should be_include(Date.parse("07/15/2009"))
248
+ @df.date.should be_include(Date.parse("07/31/2009"))
249
+ end
250
+
178
251
  end
179
252
 
180
253
  context "subset_from_columns" do
@@ -191,4 +264,43 @@ describe DataFrame do
191
264
  new_data_frame.these.should eql([1,5])
192
265
  end
193
266
  end
267
+
268
+ it "should be able to j_binary_ize! a column, taking its categories and creating a column for each" do
269
+ df = DataFrame.new(:observations)
270
+ df.add [:many]
271
+ df.add [:fine]
272
+ df.add [:things]
273
+ df.add [:are]
274
+ df.add [:available]
275
+ df.j_binary_ize!(:observations)
276
+ df.many.should eql([true, false, false, false, false])
277
+ df.fine.should eql([false, true, false, false, false])
278
+ df.things.should eql([false, false, true, false, false])
279
+ df.are.should eql([false, false, false, true, false])
280
+ df.available.should eql([false, false, false, false, true])
281
+ df.observations.should eql([:many, :fine, :things, :are, :available])
282
+ end
283
+
284
+ context "append!" do
285
+
286
+ before do
287
+ @df.add [1,2,3,4]
288
+ @df.add [5, 6, 7, 8]
289
+ end
290
+
291
+ it "should be able to append an array of values to the data frame" do
292
+ @df.append!(:new_column, [5,5])
293
+ @df.new_column.should eql([5,5])
294
+ end
295
+
296
+ it "should be able to append a default value to the data frame" do
297
+ @df.append!(:new_column, :value)
298
+ @df.new_column.should eql([:value, :value])
299
+ end
300
+
301
+ it "should use nil as the default value" do
302
+ @df.append!(:new_column)
303
+ @df.new_column.should eql([nil, nil])
304
+ end
305
+ end
194
306
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-16 00:00:00 -07:00
12
+ date: 2009-08-17 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency