davidrichards-data_frame 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +1 -1
- data/lib/data_frame.rb +54 -1
- data/lib/data_frame/transposable_array.rb +1 -0
- data/spec/data_frame_spec.rb +113 -1
- metadata +2 -2
data/VERSION.yml
CHANGED
data/lib/data_frame.rb
CHANGED
@@ -212,6 +212,11 @@ class DataFrame
|
|
212
212
|
self
|
213
213
|
end
|
214
214
|
|
215
|
+
def filter(as=Array, &block)
|
216
|
+
new_data_frame = self.clone
|
217
|
+
new_data_frame.filter!(as, &block)
|
218
|
+
end
|
219
|
+
|
215
220
|
def infer_class(obj)
|
216
221
|
obj = obj.to_s.classify.constantize if obj.is_a?(Symbol)
|
217
222
|
obj = obj.classify.constantize if obj.is_a?(String)
|
@@ -235,7 +240,7 @@ class DataFrame
|
|
235
240
|
elsif as == Array
|
236
241
|
row
|
237
242
|
else
|
238
|
-
as.new(row)
|
243
|
+
as.new(*row)
|
239
244
|
end
|
240
245
|
end
|
241
246
|
protected :cast_row
|
@@ -253,5 +258,53 @@ class DataFrame
|
|
253
258
|
end
|
254
259
|
new_data_frame
|
255
260
|
end
|
261
|
+
|
262
|
+
# A weird name. This creates a column for every category in a column
|
263
|
+
# and marks each row by its value
|
264
|
+
def j_binary_ize!(*columns)
|
265
|
+
columns.each do |col|
|
266
|
+
values = render_column(col.to_underscore_sym)
|
267
|
+
values.categories.each do |category|
|
268
|
+
self.append!(category, values.map{|e| e == category ? true : false})
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
# Adds a unique column to the table
|
274
|
+
def append!(column_name, value=nil)
|
275
|
+
raise ArgumentError, "Can't have duplicate column names" if self.labels.include?(column_name)
|
276
|
+
self.labels << column_name.to_underscore_sym
|
277
|
+
if value.is_a?(Array)
|
278
|
+
self.items.each_with_index do |item, i|
|
279
|
+
item << value[i]
|
280
|
+
end
|
281
|
+
else
|
282
|
+
self.items.each do |item|
|
283
|
+
item << value
|
284
|
+
end
|
285
|
+
end
|
286
|
+
# Because we are tainting the sub arrays, the TaintableArray doesn't know it's been changed.
|
287
|
+
self.items.taint
|
288
|
+
end
|
289
|
+
|
290
|
+
def filter_by_category(hash)
|
291
|
+
new_data_frame = self.dup
|
292
|
+
hash.each do |key, value|
|
293
|
+
key = key.to_underscore_sym
|
294
|
+
next unless self.labels.include?(key)
|
295
|
+
value = [value] unless value.is_a?(Array) or value.is_a?(Range)
|
296
|
+
new_data_frame.filter!(:hash) {|row| value.include?(row[key])}
|
297
|
+
end
|
298
|
+
new_data_frame
|
299
|
+
end
|
300
|
+
|
301
|
+
def filter_by_category!(hash)
|
302
|
+
hash.each do |key, value|
|
303
|
+
key = key.to_underscore_sym
|
304
|
+
next unless self.labels.include?(key)
|
305
|
+
value = [value] unless value.is_a?(Array) or value.is_a?(Range)
|
306
|
+
self.filter!(:hash) {|row| value.include?(row[key])}
|
307
|
+
end
|
308
|
+
end
|
256
309
|
|
257
310
|
end
|
data/spec/data_frame_spec.rb
CHANGED
@@ -171,10 +171,83 @@ describe DataFrame do
|
|
171
171
|
@df.add [5, 6, 7, 8]
|
172
172
|
end
|
173
173
|
|
174
|
-
it "should be able to filter a data frame with a block" do
|
174
|
+
it "should be able to filter a data frame with a block using an OpenStruct for each row" do
|
175
175
|
@df.filter!(:open_struct) {|row| row.these == 5}
|
176
176
|
@df.items.should eql([[5, 6, 7, 8]])
|
177
177
|
end
|
178
|
+
|
179
|
+
it "should be able to filter a data frame with a block using a Hash for each row" do
|
180
|
+
@df.filter!(:hash) {|row| row[:these] == 5}
|
181
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
182
|
+
end
|
183
|
+
|
184
|
+
S4 = Struct.new(:one, :two, :three, :four)
|
185
|
+
it "should be able to filter a data frame with a block using another class that uses the row as input" do
|
186
|
+
@df.filter!(S4) {|row| row.one == 5}
|
187
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
188
|
+
end
|
189
|
+
|
190
|
+
it "should be able to filter a data frame with a block using an array for each row" do
|
191
|
+
@df.filter! {|row| row.first == 5}
|
192
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
193
|
+
end
|
194
|
+
|
195
|
+
it "should be able to do fancy things with the row as the filter" do
|
196
|
+
@df.filter! {|row| row.sum > 10}
|
197
|
+
@df.items.should eql([[5, 6, 7, 8]])
|
198
|
+
end
|
199
|
+
|
200
|
+
it "should be able to generate a new data frame with filter" do
|
201
|
+
new_df = @df.filter(:open_struct) {|row| row.these == 5}
|
202
|
+
new_df.items.should eql([[5, 6, 7, 8]])
|
203
|
+
@df.items.should eql([[1, 2, 3, 4], [5, 6, 7, 8]])
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
context "filter_by_category" do
|
209
|
+
|
210
|
+
before do
|
211
|
+
@df = DataFrame.new(:weather, :date)
|
212
|
+
|
213
|
+
(1..31).each do |i|
|
214
|
+
@df.add [(i % 3 == 1) ? :fair : :good, Date.parse("07/#{i}/2009")]
|
215
|
+
end
|
216
|
+
|
217
|
+
@d1 = Date.parse("07/15/2009")
|
218
|
+
@d2 = Date.parse("07/31/2009")
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
it "should be able to filter by category" do
|
223
|
+
filtered = @df.filter_by_category(:weather => :good)
|
224
|
+
filtered.weather.uniq.should eql([:good])
|
225
|
+
@df.weather.uniq.should be_include(:fair)
|
226
|
+
end
|
227
|
+
|
228
|
+
it "should be able to manage ranges for filter values" do
|
229
|
+
filtered = @df.filter_by_category(:date => (@d1..@d2))
|
230
|
+
filtered.date.should_not be_include(Date.parse("07/01/2009"))
|
231
|
+
filtered.date.should_not be_include(Date.parse("07/14/2009"))
|
232
|
+
filtered.date.should be_include(Date.parse("07/15/2009"))
|
233
|
+
filtered.date.should be_include(Date.parse("07/31/2009"))
|
234
|
+
@df.date.should be_include(Date.parse("07/01/2009"))
|
235
|
+
end
|
236
|
+
|
237
|
+
it "should be able to take an array of values to filter with" do
|
238
|
+
filtered = @df.filter_by_category(:date => [@d1, @d2])
|
239
|
+
filtered.date.should_not be_include(Date.parse("07/01/2009"))
|
240
|
+
filtered.date.should be_include(Date.parse("07/15/2009"))
|
241
|
+
filtered.date.should be_include(Date.parse("07/31/2009"))
|
242
|
+
end
|
243
|
+
|
244
|
+
it "should have a destructive version" do
|
245
|
+
@df.filter_by_category!(:date => [@d1, @d2])
|
246
|
+
@df.date.should_not be_include(Date.parse("07/01/2009"))
|
247
|
+
@df.date.should be_include(Date.parse("07/15/2009"))
|
248
|
+
@df.date.should be_include(Date.parse("07/31/2009"))
|
249
|
+
end
|
250
|
+
|
178
251
|
end
|
179
252
|
|
180
253
|
context "subset_from_columns" do
|
@@ -191,4 +264,43 @@ describe DataFrame do
|
|
191
264
|
new_data_frame.these.should eql([1,5])
|
192
265
|
end
|
193
266
|
end
|
267
|
+
|
268
|
+
it "should be able to j_binary_ize! a column, taking its categories and creating a column for each" do
|
269
|
+
df = DataFrame.new(:observations)
|
270
|
+
df.add [:many]
|
271
|
+
df.add [:fine]
|
272
|
+
df.add [:things]
|
273
|
+
df.add [:are]
|
274
|
+
df.add [:available]
|
275
|
+
df.j_binary_ize!(:observations)
|
276
|
+
df.many.should eql([true, false, false, false, false])
|
277
|
+
df.fine.should eql([false, true, false, false, false])
|
278
|
+
df.things.should eql([false, false, true, false, false])
|
279
|
+
df.are.should eql([false, false, false, true, false])
|
280
|
+
df.available.should eql([false, false, false, false, true])
|
281
|
+
df.observations.should eql([:many, :fine, :things, :are, :available])
|
282
|
+
end
|
283
|
+
|
284
|
+
context "append!" do
|
285
|
+
|
286
|
+
before do
|
287
|
+
@df.add [1,2,3,4]
|
288
|
+
@df.add [5, 6, 7, 8]
|
289
|
+
end
|
290
|
+
|
291
|
+
it "should be able to append an array of values to the data frame" do
|
292
|
+
@df.append!(:new_column, [5,5])
|
293
|
+
@df.new_column.should eql([5,5])
|
294
|
+
end
|
295
|
+
|
296
|
+
it "should be able to append a default value to the data frame" do
|
297
|
+
@df.append!(:new_column, :value)
|
298
|
+
@df.new_column.should eql([:value, :value])
|
299
|
+
end
|
300
|
+
|
301
|
+
it "should use nil as the default value" do
|
302
|
+
@df.append!(:new_column)
|
303
|
+
@df.new_column.should eql([nil, nil])
|
304
|
+
end
|
305
|
+
end
|
194
306
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: davidrichards-data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Richards
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-17 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|