davidrichards-data_frame 0.0.14 → 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 0
4
- :patch: 14
4
+ :patch: 15
@@ -262,10 +262,23 @@ class DataFrame
262
262
  # A weird name. This creates a column for every category in a column
263
263
  # and marks each row by its value
264
264
  def j_binary_ize!(*columns)
265
+ # Allows to mix a hash with the columns.
266
+ options = columns.find_all {|e| e.is_a?(Hash)}.inject({}) {|h, e| h.merge!(e)}
267
+ columns.delete_if {|e| e.is_a?(Hash)}
268
+
269
+ # Generates new columns
265
270
  columns.each do |col|
266
271
  values = render_column(col.to_underscore_sym)
267
272
  values.categories.each do |category|
268
- self.append!(category, values.map{|e| e == category ? true : false})
273
+ full_name = (col.to_s + "_" + category.to_s).to_sym
274
+ if options[:allow_overlap]
275
+ category_map = values.inject([]) do |list, e|
276
+ list << values.all_categories(e)
277
+ end
278
+ self.append!(full_name, category_map.map{|e| e.include?(category)})
279
+ else
280
+ self.append!(full_name, values.category_map.map{|e| e == category})
281
+ end
269
282
  end
270
283
  end
271
284
  end
@@ -24,7 +24,7 @@ describe DataFrame do
24
24
  end
25
25
 
26
26
  it "should use just_enumerable_stats" do
27
- [1,2,3].std.should eql(1.0)
27
+ [1,2,3].std.should eql(1)
28
28
  lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
29
29
  end
30
30
 
@@ -66,7 +66,7 @@ describe DataFrame do
66
66
 
67
67
  it "should make rows easily computable" do
68
68
  @df.row_labels = [:other, :things, :here]
69
- @df.here.std.should be_close(1.414, 0.001)
69
+ @df.here.sum.should eql(42)
70
70
  end
71
71
  end
72
72
 
@@ -273,14 +273,49 @@ describe DataFrame do
273
273
  df.add [:are]
274
274
  df.add [:available]
275
275
  df.j_binary_ize!(:observations)
276
- df.many.should eql([true, false, false, false, false])
277
- df.fine.should eql([false, true, false, false, false])
278
- df.things.should eql([false, false, true, false, false])
279
- df.are.should eql([false, false, false, true, false])
280
- df.available.should eql([false, false, false, false, true])
276
+ df.observations_many.should eql([true, false, false, false, false])
277
+ df.observations_fine.should eql([false, true, false, false, false])
278
+ df.observations_things.should eql([false, false, true, false, false])
279
+ df.observations_are.should eql([false, false, false, true, false])
280
+ df.observations_available.should eql([false, false, false, false, true])
281
281
  df.observations.should eql([:many, :fine, :things, :are, :available])
282
282
  end
283
283
 
284
+ it "should be able to j_binary_ize! a more normal column" do
285
+ df = DataFrame.new(:observations)
286
+ df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
287
+ df.observations.add_category(:small) {|e| e <= 3}
288
+ df.observations.add_category(:large) {|e| e >= 3}
289
+ df.j_binary_ize!(:observations)
290
+ df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
291
+ df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
292
+ end
293
+
294
+ it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
295
+ df = DataFrame.new(:observations)
296
+ df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
297
+ df.observations.add_category(:small) {|e| e <= 3}
298
+ df.observations.add_category(:large) {|e| e >= 3}
299
+ df.j_binary_ize!(:observations, :allow_overlap => true)
300
+ df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
301
+ df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
302
+ end
303
+
304
+ it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
305
+ df = DataFrame.new(:observations)
306
+ df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
307
+ df.observations.add_category(:small) {|e| e <= 3}
308
+ df.observations.add_category(:large) {|e| e >= 3}
309
+ df.j_binary_ize!(:observations, :allow_overlap => true)
310
+ df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
311
+ df.j_binary_ize!(:observations)
312
+ df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
313
+ df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
314
+ df.observations.should eql([1,2,3,4,5,4,3,2,1])
315
+ df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
316
+ df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
317
+ end
318
+
284
319
  context "append!" do
285
320
 
286
321
  before do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-17 00:00:00 -07:00
12
+ date: 2009-08-24 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency