davidrichards-data_frame 0.0.13 → 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 0
4
- :patch: 13
4
+ :patch: 14
@@ -212,6 +212,11 @@ class DataFrame
212
212
  self
213
213
  end
214
214
 
215
+ def filter(as=Array, &block)
216
+ new_data_frame = self.clone
217
+ new_data_frame.filter!(as, &block)
218
+ end
219
+
215
220
  def infer_class(obj)
216
221
  obj = obj.to_s.classify.constantize if obj.is_a?(Symbol)
217
222
  obj = obj.classify.constantize if obj.is_a?(String)
@@ -235,7 +240,7 @@ class DataFrame
235
240
  elsif as == Array
236
241
  row
237
242
  else
238
- as.new(row)
243
+ as.new(*row)
239
244
  end
240
245
  end
241
246
  protected :cast_row
@@ -253,5 +258,53 @@ class DataFrame
253
258
  end
254
259
  new_data_frame
255
260
  end
261
+
262
+ # A weird name. This creates a column for every category in a column
263
+ # and marks each row by its value
264
+ def j_binary_ize!(*columns)
265
+ columns.each do |col|
266
+ values = render_column(col.to_underscore_sym)
267
+ values.categories.each do |category|
268
+ self.append!(category, values.map{|e| e == category ? true : false})
269
+ end
270
+ end
271
+ end
272
+
273
+ # Adds a unique column to the table
274
+ def append!(column_name, value=nil)
275
+ raise ArgumentError, "Can't have duplicate column names" if self.labels.include?(column_name)
276
+ self.labels << column_name.to_underscore_sym
277
+ if value.is_a?(Array)
278
+ self.items.each_with_index do |item, i|
279
+ item << value[i]
280
+ end
281
+ else
282
+ self.items.each do |item|
283
+ item << value
284
+ end
285
+ end
286
+ # Because we are tainting the sub arrays, the TaintableArray doesn't know it's been changed.
287
+ self.items.taint
288
+ end
289
+
290
+ def filter_by_category(hash)
291
+ new_data_frame = self.dup
292
+ hash.each do |key, value|
293
+ key = key.to_underscore_sym
294
+ next unless self.labels.include?(key)
295
+ value = [value] unless value.is_a?(Array) or value.is_a?(Range)
296
+ new_data_frame.filter!(:hash) {|row| value.include?(row[key])}
297
+ end
298
+ new_data_frame
299
+ end
300
+
301
+ def filter_by_category!(hash)
302
+ hash.each do |key, value|
303
+ key = key.to_underscore_sym
304
+ next unless self.labels.include?(key)
305
+ value = [value] unless value.is_a?(Array) or value.is_a?(Range)
306
+ self.filter!(:hash) {|row| value.include?(row[key])}
307
+ end
308
+ end
256
309
 
257
310
  end
@@ -6,6 +6,7 @@ class TransposableArray < CallbackArray
6
6
 
7
7
  orig_transpose = instance_method(:transpose)
8
8
  define_method(:transpose) {
9
+ self.untaint
9
10
  @transpose ||= orig_transpose.bind(self).call
10
11
  }
11
12
 
@@ -171,10 +171,83 @@ describe DataFrame do
171
171
  @df.add [5, 6, 7, 8]
172
172
  end
173
173
 
174
- it "should be able to filter a data frame with a block" do
174
+ it "should be able to filter a data frame with a block using an OpenStruct for each row" do
175
175
  @df.filter!(:open_struct) {|row| row.these == 5}
176
176
  @df.items.should eql([[5, 6, 7, 8]])
177
177
  end
178
+
179
+ it "should be able to filter a data frame with a block using a Hash for each row" do
180
+ @df.filter!(:hash) {|row| row[:these] == 5}
181
+ @df.items.should eql([[5, 6, 7, 8]])
182
+ end
183
+
184
+ S4 = Struct.new(:one, :two, :three, :four)
185
+ it "should be able to filter a data frame with a block using another class that uses the row as input" do
186
+ @df.filter!(S4) {|row| row.one == 5}
187
+ @df.items.should eql([[5, 6, 7, 8]])
188
+ end
189
+
190
+ it "should be able to filter a data frame with a block using an array for each row" do
191
+ @df.filter! {|row| row.first == 5}
192
+ @df.items.should eql([[5, 6, 7, 8]])
193
+ end
194
+
195
+ it "should be able to do fancy things with the row as the filter" do
196
+ @df.filter! {|row| row.sum > 10}
197
+ @df.items.should eql([[5, 6, 7, 8]])
198
+ end
199
+
200
+ it "should be able to generate a new data frame with filter" do
201
+ new_df = @df.filter(:open_struct) {|row| row.these == 5}
202
+ new_df.items.should eql([[5, 6, 7, 8]])
203
+ @df.items.should eql([[1, 2, 3, 4], [5, 6, 7, 8]])
204
+ end
205
+
206
+ end
207
+
208
+ context "filter_by_category" do
209
+
210
+ before do
211
+ @df = DataFrame.new(:weather, :date)
212
+
213
+ (1..31).each do |i|
214
+ @df.add [(i % 3 == 1) ? :fair : :good, Date.parse("07/#{i}/2009")]
215
+ end
216
+
217
+ @d1 = Date.parse("07/15/2009")
218
+ @d2 = Date.parse("07/31/2009")
219
+
220
+ end
221
+
222
+ it "should be able to filter by category" do
223
+ filtered = @df.filter_by_category(:weather => :good)
224
+ filtered.weather.uniq.should eql([:good])
225
+ @df.weather.uniq.should be_include(:fair)
226
+ end
227
+
228
+ it "should be able to manage ranges for filter values" do
229
+ filtered = @df.filter_by_category(:date => (@d1..@d2))
230
+ filtered.date.should_not be_include(Date.parse("07/01/2009"))
231
+ filtered.date.should_not be_include(Date.parse("07/14/2009"))
232
+ filtered.date.should be_include(Date.parse("07/15/2009"))
233
+ filtered.date.should be_include(Date.parse("07/31/2009"))
234
+ @df.date.should be_include(Date.parse("07/01/2009"))
235
+ end
236
+
237
+ it "should be able to take an array of values to filter with" do
238
+ filtered = @df.filter_by_category(:date => [@d1, @d2])
239
+ filtered.date.should_not be_include(Date.parse("07/01/2009"))
240
+ filtered.date.should be_include(Date.parse("07/15/2009"))
241
+ filtered.date.should be_include(Date.parse("07/31/2009"))
242
+ end
243
+
244
+ it "should have a destructive version" do
245
+ @df.filter_by_category!(:date => [@d1, @d2])
246
+ @df.date.should_not be_include(Date.parse("07/01/2009"))
247
+ @df.date.should be_include(Date.parse("07/15/2009"))
248
+ @df.date.should be_include(Date.parse("07/31/2009"))
249
+ end
250
+
178
251
  end
179
252
 
180
253
  context "subset_from_columns" do
@@ -191,4 +264,43 @@ describe DataFrame do
191
264
  new_data_frame.these.should eql([1,5])
192
265
  end
193
266
  end
267
+
268
+ it "should be able to j_binary_ize! a column, taking its categories and creating a column for each" do
269
+ df = DataFrame.new(:observations)
270
+ df.add [:many]
271
+ df.add [:fine]
272
+ df.add [:things]
273
+ df.add [:are]
274
+ df.add [:available]
275
+ df.j_binary_ize!(:observations)
276
+ df.many.should eql([true, false, false, false, false])
277
+ df.fine.should eql([false, true, false, false, false])
278
+ df.things.should eql([false, false, true, false, false])
279
+ df.are.should eql([false, false, false, true, false])
280
+ df.available.should eql([false, false, false, false, true])
281
+ df.observations.should eql([:many, :fine, :things, :are, :available])
282
+ end
283
+
284
+ context "append!" do
285
+
286
+ before do
287
+ @df.add [1,2,3,4]
288
+ @df.add [5, 6, 7, 8]
289
+ end
290
+
291
+ it "should be able to append an array of values to the data frame" do
292
+ @df.append!(:new_column, [5,5])
293
+ @df.new_column.should eql([5,5])
294
+ end
295
+
296
+ it "should be able to append a default value to the data frame" do
297
+ @df.append!(:new_column, :value)
298
+ @df.new_column.should eql([:value, :value])
299
+ end
300
+
301
+ it "should use nil as the default value" do
302
+ @df.append!(:new_column)
303
+ @df.new_column.should eql([nil, nil])
304
+ end
305
+ end
194
306
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-16 00:00:00 -07:00
12
+ date: 2009-08-17 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency