davidrichards-data_frame 0.0.13 → 0.0.14
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +1 -1
- data/lib/data_frame.rb +54 -1
- data/lib/data_frame/transposable_array.rb +1 -0
- data/spec/data_frame_spec.rb +113 -1
- metadata +2 -2
data/VERSION.yml
CHANGED
data/lib/data_frame.rb
CHANGED
@@ -212,6 +212,11 @@ class DataFrame
|
|
212
212
|
self
|
213
213
|
end
|
214
214
|
|
215
|
+
def filter(as=Array, &block)
|
216
|
+
new_data_frame = self.clone
|
217
|
+
new_data_frame.filter!(as, &block)
|
218
|
+
end
|
219
|
+
|
215
220
|
def infer_class(obj)
|
216
221
|
obj = obj.to_s.classify.constantize if obj.is_a?(Symbol)
|
217
222
|
obj = obj.classify.constantize if obj.is_a?(String)
|
@@ -235,7 +240,7 @@ class DataFrame
|
|
235
240
|
elsif as == Array
|
236
241
|
row
|
237
242
|
else
|
238
|
-
as.new(row)
|
243
|
+
as.new(*row)
|
239
244
|
end
|
240
245
|
end
|
241
246
|
protected :cast_row
|
@@ -253,5 +258,53 @@ class DataFrame
|
|
253
258
|
end
|
254
259
|
new_data_frame
|
255
260
|
end
|
261
|
+
|
262
|
+
# A weird name. This creates a column for every category in a column
|
263
|
+
# and marks each row by its value
|
264
|
+
def j_binary_ize!(*columns)
|
265
|
+
columns.each do |col|
|
266
|
+
values = render_column(col.to_underscore_sym)
|
267
|
+
values.categories.each do |category|
|
268
|
+
self.append!(category, values.map{|e| e == category ? true : false})
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
# Adds a unique column to the table
|
274
|
+
def append!(column_name, value=nil)
|
275
|
+
raise ArgumentError, "Can't have duplicate column names" if self.labels.include?(column_name)
|
276
|
+
self.labels << column_name.to_underscore_sym
|
277
|
+
if value.is_a?(Array)
|
278
|
+
self.items.each_with_index do |item, i|
|
279
|
+
item << value[i]
|
280
|
+
end
|
281
|
+
else
|
282
|
+
self.items.each do |item|
|
283
|
+
item << value
|
284
|
+
end
|
285
|
+
end
|
286
|
+
# Because we are tainting the sub arrays, the TaintableArray doesn't know it's been changed.
|
287
|
+
self.items.taint
|
288
|
+
end
|
289
|
+
|
290
|
+
def filter_by_category(hash)
|
291
|
+
new_data_frame = self.dup
|
292
|
+
hash.each do |key, value|
|
293
|
+
key = key.to_underscore_sym
|
294
|
+
next unless self.labels.include?(key)
|
295
|
+
value = [value] unless value.is_a?(Array) or value.is_a?(Range)
|
296
|
+
new_data_frame.filter!(:hash) {|row| value.include?(row[key])}
|
297
|
+
end
|
298
|
+
new_data_frame
|
299
|
+
end
|
300
|
+
|
301
|
+
def filter_by_category!(hash)
|
302
|
+
hash.each do |key, value|
|
303
|
+
key = key.to_underscore_sym
|
304
|
+
next unless self.labels.include?(key)
|
305
|
+
value = [value] unless value.is_a?(Array) or value.is_a?(Range)
|
306
|
+
self.filter!(:hash) {|row| value.include?(row[key])}
|
307
|
+
end
|
308
|
+
end
|
256
309
|
|
257
310
|
end
|
data/spec/data_frame_spec.rb
CHANGED
@@ -171,10 +171,83 @@ describe DataFrame do
|
|
171
171
|
@df.add [5, 6, 7, 8]
|
172
172
|
end
|
173
173
|
|
174
|
-
it "should be able to filter a data frame with a block" do
|
174
|
+
it "should be able to filter a data frame with a block using an OpenStruct for each row" do
|
175
175
|
@df.filter!(:open_struct) {|row| row.these == 5}
|
176
176
|
@df.items.should eql([[5, 6, 7, 8]])
|
177
177
|
end
|
178
|
+
|
179
|
+
it "should be able to filter a data frame with a block using a Hash for each row" do
|
180
|
+
@df.filter!(:hash) {|row| row[:these] == 5}
|
181
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
182
|
+
end
|
183
|
+
|
184
|
+
S4 = Struct.new(:one, :two, :three, :four)
|
185
|
+
it "should be able to filter a data frame with a block using another class that uses the row as input" do
|
186
|
+
@df.filter!(S4) {|row| row.one == 5}
|
187
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
188
|
+
end
|
189
|
+
|
190
|
+
it "should be able to filter a data frame with a block using an array for each row" do
|
191
|
+
@df.filter! {|row| row.first == 5}
|
192
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
193
|
+
end
|
194
|
+
|
195
|
+
it "should be able to do fancy things with the row as the filter" do
|
196
|
+
@df.filter! {|row| row.sum > 10}
|
197
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
198
|
+
end
|
199
|
+
|
200
|
+
it "should be able to generate a new data frame with filter" do
|
201
|
+
new_df = @df.filter(:open_struct) {|row| row.these == 5}
|
202
|
+
new_df.items.should eql([[5, 6, 7, 8]])
|
203
|
+
@df.items.should eql([[1, 2, 3, 4], [5, 6, 7, 8]])
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
context "filter_by_category" do
|
209
|
+
|
210
|
+
before do
|
211
|
+
@df = DataFrame.new(:weather, :date)
|
212
|
+
|
213
|
+
(1..31).each do |i|
|
214
|
+
@df.add [(i % 3 == 1) ? :fair : :good, Date.parse("07/#{i}/2009")]
|
215
|
+
end
|
216
|
+
|
217
|
+
@d1 = Date.parse("07/15/2009")
|
218
|
+
@d2 = Date.parse("07/31/2009")
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
it "should be able to filter by category" do
|
223
|
+
filtered = @df.filter_by_category(:weather => :good)
|
224
|
+
filtered.weather.uniq.should eql([:good])
|
225
|
+
@df.weather.uniq.should be_include(:fair)
|
226
|
+
end
|
227
|
+
|
228
|
+
it "should be able to manage ranges for filter values" do
|
229
|
+
filtered = @df.filter_by_category(:date => (@d1..@d2))
|
230
|
+
filtered.date.should_not be_include(Date.parse("07/01/2009"))
|
231
|
+
filtered.date.should_not be_include(Date.parse("07/14/2009"))
|
232
|
+
filtered.date.should be_include(Date.parse("07/15/2009"))
|
233
|
+
filtered.date.should be_include(Date.parse("07/31/2009"))
|
234
|
+
@df.date.should be_include(Date.parse("07/01/2009"))
|
235
|
+
end
|
236
|
+
|
237
|
+
it "should be able to take an array of values to filter with" do
|
238
|
+
filtered = @df.filter_by_category(:date => [@d1, @d2])
|
239
|
+
filtered.date.should_not be_include(Date.parse("07/01/2009"))
|
240
|
+
filtered.date.should be_include(Date.parse("07/15/2009"))
|
241
|
+
filtered.date.should be_include(Date.parse("07/31/2009"))
|
242
|
+
end
|
243
|
+
|
244
|
+
it "should have a destructive version" do
|
245
|
+
@df.filter_by_category!(:date => [@d1, @d2])
|
246
|
+
@df.date.should_not be_include(Date.parse("07/01/2009"))
|
247
|
+
@df.date.should be_include(Date.parse("07/15/2009"))
|
248
|
+
@df.date.should be_include(Date.parse("07/31/2009"))
|
249
|
+
end
|
250
|
+
|
178
251
|
end
|
179
252
|
|
180
253
|
context "subset_from_columns" do
|
@@ -191,4 +264,43 @@ describe DataFrame do
|
|
191
264
|
new_data_frame.these.should eql([1,5])
|
192
265
|
end
|
193
266
|
end
|
267
|
+
|
268
|
+
it "should be able to j_binary_ize! a column, taking its categories and creating a column for each" do
|
269
|
+
df = DataFrame.new(:observations)
|
270
|
+
df.add [:many]
|
271
|
+
df.add [:fine]
|
272
|
+
df.add [:things]
|
273
|
+
df.add [:are]
|
274
|
+
df.add [:available]
|
275
|
+
df.j_binary_ize!(:observations)
|
276
|
+
df.many.should eql([true, false, false, false, false])
|
277
|
+
df.fine.should eql([false, true, false, false, false])
|
278
|
+
df.things.should eql([false, false, true, false, false])
|
279
|
+
df.are.should eql([false, false, false, true, false])
|
280
|
+
df.available.should eql([false, false, false, false, true])
|
281
|
+
df.observations.should eql([:many, :fine, :things, :are, :available])
|
282
|
+
end
|
283
|
+
|
284
|
+
context "append!" do
|
285
|
+
|
286
|
+
before do
|
287
|
+
@df.add [1,2,3,4]
|
288
|
+
@df.add [5, 6, 7, 8]
|
289
|
+
end
|
290
|
+
|
291
|
+
it "should be able to append an array of values to the data frame" do
|
292
|
+
@df.append!(:new_column, [5,5])
|
293
|
+
@df.new_column.should eql([5,5])
|
294
|
+
end
|
295
|
+
|
296
|
+
it "should be able to append a default value to the data frame" do
|
297
|
+
@df.append!(:new_column, :value)
|
298
|
+
@df.new_column.should eql([:value, :value])
|
299
|
+
end
|
300
|
+
|
301
|
+
it "should use nil as the default value" do
|
302
|
+
@df.append!(:new_column)
|
303
|
+
@df.new_column.should eql([nil, nil])
|
304
|
+
end
|
305
|
+
end
|
194
306
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: davidrichards-data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Richards
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-17 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|