davidrichards-data_frame 0.0.14 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +1 -1
- data/lib/data_frame.rb +14 -1
- data/spec/data_frame_spec.rb +42 -7
- metadata +2 -2
data/VERSION.yml
CHANGED
data/lib/data_frame.rb
CHANGED
@@ -262,10 +262,23 @@ class DataFrame
|
|
262
262
|
# A weird name. This creates a column for every category in a column
|
263
263
|
# and marks each row by its value
|
264
264
|
def j_binary_ize!(*columns)
|
265
|
+
# Allows to mix a hash with the columns.
|
266
|
+
options = columns.find_all {|e| e.is_a?(Hash)}.inject({}) {|h, e| h.merge!(e)}
|
267
|
+
columns.delete_if {|e| e.is_a?(Hash)}
|
268
|
+
|
269
|
+
# Generates new columns
|
265
270
|
columns.each do |col|
|
266
271
|
values = render_column(col.to_underscore_sym)
|
267
272
|
values.categories.each do |category|
|
268
|
-
|
273
|
+
full_name = (col.to_s + "_" + category.to_s).to_sym
|
274
|
+
if options[:allow_overlap]
|
275
|
+
category_map = values.inject([]) do |list, e|
|
276
|
+
list << values.all_categories(e)
|
277
|
+
end
|
278
|
+
self.append!(full_name, category_map.map{|e| e.include?(category)})
|
279
|
+
else
|
280
|
+
self.append!(full_name, values.category_map.map{|e| e == category})
|
281
|
+
end
|
269
282
|
end
|
270
283
|
end
|
271
284
|
end
|
data/spec/data_frame_spec.rb
CHANGED
@@ -24,7 +24,7 @@ describe DataFrame do
|
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should use just_enumerable_stats" do
|
27
|
-
[1,2,3].std.should eql(1
|
27
|
+
[1,2,3].std.should eql(1)
|
28
28
|
lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
|
29
29
|
end
|
30
30
|
|
@@ -66,7 +66,7 @@ describe DataFrame do
|
|
66
66
|
|
67
67
|
it "should make rows easily computable" do
|
68
68
|
@df.row_labels = [:other, :things, :here]
|
69
|
-
@df.here.
|
69
|
+
@df.here.sum.should eql(42)
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
@@ -273,14 +273,49 @@ describe DataFrame do
|
|
273
273
|
df.add [:are]
|
274
274
|
df.add [:available]
|
275
275
|
df.j_binary_ize!(:observations)
|
276
|
-
df.
|
277
|
-
df.
|
278
|
-
df.
|
279
|
-
df.
|
280
|
-
df.
|
276
|
+
df.observations_many.should eql([true, false, false, false, false])
|
277
|
+
df.observations_fine.should eql([false, true, false, false, false])
|
278
|
+
df.observations_things.should eql([false, false, true, false, false])
|
279
|
+
df.observations_are.should eql([false, false, false, true, false])
|
280
|
+
df.observations_available.should eql([false, false, false, false, true])
|
281
281
|
df.observations.should eql([:many, :fine, :things, :are, :available])
|
282
282
|
end
|
283
283
|
|
284
|
+
it "should be able to j_binary_ize! a more normal column" do
|
285
|
+
df = DataFrame.new(:observations)
|
286
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
287
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
288
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
289
|
+
df.j_binary_ize!(:observations)
|
290
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
291
|
+
df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
|
292
|
+
end
|
293
|
+
|
294
|
+
it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
|
295
|
+
df = DataFrame.new(:observations)
|
296
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
297
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
298
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
299
|
+
df.j_binary_ize!(:observations, :allow_overlap => true)
|
300
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
301
|
+
df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
|
302
|
+
end
|
303
|
+
|
304
|
+
it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
|
305
|
+
df = DataFrame.new(:observations)
|
306
|
+
df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
|
307
|
+
df.observations.add_category(:small) {|e| e <= 3}
|
308
|
+
df.observations.add_category(:large) {|e| e >= 3}
|
309
|
+
df.j_binary_ize!(:observations, :allow_overlap => true)
|
310
|
+
df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
|
311
|
+
df.j_binary_ize!(:observations)
|
312
|
+
df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
|
313
|
+
df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
|
314
|
+
df.observations.should eql([1,2,3,4,5,4,3,2,1])
|
315
|
+
df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
|
316
|
+
df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
|
317
|
+
end
|
318
|
+
|
284
319
|
context "append!" do
|
285
320
|
|
286
321
|
before do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: davidrichards-data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Richards
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-24 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|