davidrichards-data_frame 0.0.14 → 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +1 -1
- data/lib/data_frame.rb +14 -1
- data/spec/data_frame_spec.rb +42 -7
- metadata +2 -2
data/VERSION.yml
CHANGED
data/lib/data_frame.rb
CHANGED
@@ -262,10 +262,23 @@ class DataFrame
|
|
262
262
|
# A weird name. This creates a column for every category in a column
|
263
263
|
# and marks each row by its value
|
264
264
|
def j_binary_ize!(*columns)
|
265
|
+
# Allows to mix a hash with the columns.
|
266
|
+
options = columns.find_all {|e| e.is_a?(Hash)}.inject({}) {|h, e| h.merge!(e)}
|
267
|
+
columns.delete_if {|e| e.is_a?(Hash)}
|
268
|
+
|
269
|
+
# Generates new columns
|
265
270
|
columns.each do |col|
|
266
271
|
values = render_column(col.to_underscore_sym)
|
267
272
|
values.categories.each do |category|
|
268
|
-
|
273
|
+
full_name = (col.to_s + "_" + category.to_s).to_sym
|
274
|
+
if options[:allow_overlap]
|
275
|
+
category_map = values.inject([]) do |list, e|
|
276
|
+
list << values.all_categories(e)
|
277
|
+
end
|
278
|
+
self.append!(full_name, category_map.map{|e| e.include?(category)})
|
279
|
+
else
|
280
|
+
self.append!(full_name, values.category_map.map{|e| e == category})
|
281
|
+
end
|
269
282
|
end
|
270
283
|
end
|
271
284
|
end
|
data/spec/data_frame_spec.rb
CHANGED
@@ -24,7 +24,7 @@ describe DataFrame do
|
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should use just_enumerable_stats" do
|
27
|
-
[1,2,3].std.should eql(1
|
27
|
+
[1,2,3].std.should eql(1)
|
28
28
|
lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
|
29
29
|
end
|
30
30
|
|
@@ -66,7 +66,7 @@ describe DataFrame do
|
|
66
66
|
|
67
67
|
it "should make rows easily computable" do
|
68
68
|
@df.row_labels = [:other, :things, :here]
|
69
|
-
@df.here.
|
69
|
+
@df.here.sum.should eql(42)
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
@@ -273,14 +273,49 @@ describe DataFrame do
|
|
273
273
|
df.add [:are]
|
274
274
|
df.add [:available]
|
275
275
|
df.j_binary_ize!(:observations)
|
276
|
-
df.
|
277
|
-
df.
|
278
|
-
df.
|
279
|
-
df.
|
280
|
-
df.
|
276
|
+
df.observations_many.should eql([true, false, false, false, false])
|
277
|
+
df.observations_fine.should eql([false, true, false, false, false])
|
278
|
+
df.observations_things.should eql([false, false, true, false, false])
|
279
|
+
df.observations_are.should eql([false, false, false, true, false])
|
280
|
+
df.observations_available.should eql([false, false, false, false, true])
|
281
281
|
df.observations.should eql([:many, :fine, :things, :are, :available])
|
282
282
|
end
|
283
283
|
|
284
|
+
it "should be able to j_binary_ize! a more normal column" do
|
285
|
+
df = DataFrame.new(:observations)
|
286
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
287
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
288
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
289
|
+
df.j_binary_ize!(:observations)
|
290
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
291
|
+
df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
|
292
|
+
end
|
293
|
+
|
294
|
+
it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
|
295
|
+
df = DataFrame.new(:observations)
|
296
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
297
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
298
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
299
|
+
df.j_binary_ize!(:observations, :allow_overlap => true)
|
300
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
301
|
+
df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
|
302
|
+
end
|
303
|
+
|
304
|
+
it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
|
305
|
+
df = DataFrame.new(:observations)
|
306
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
307
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
308
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
309
|
+
df.j_binary_ize!(:observations, :allow_overlap => true)
|
310
|
+
df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
|
311
|
+
df.j_binary_ize!(:observations)
|
312
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
313
|
+
df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
|
314
|
+
df.observations.should eql([1,2,3,4,5,4,3,2,1])
|
315
|
+
df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
|
316
|
+
df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
|
317
|
+
end
|
318
|
+
|
284
319
|
context "append!" do
|
285
320
|
|
286
321
|
before do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: davidrichards-data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Richards
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-24 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|