davidrichards-data_frame 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 0
4
- :patch: 14
4
+ :patch: 15
@@ -262,10 +262,23 @@ class DataFrame
262
262
  # A weird name. This creates a column for every category in a column
263
263
  # and marks each row by its value
264
264
  def j_binary_ize!(*columns)
265
+ # Allows to mix a hash with the columns.
266
+ options = columns.find_all {|e| e.is_a?(Hash)}.inject({}) {|h, e| h.merge!(e)}
267
+ columns.delete_if {|e| e.is_a?(Hash)}
268
+
269
+ # Generates new columns
265
270
  columns.each do |col|
266
271
  values = render_column(col.to_underscore_sym)
267
272
  values.categories.each do |category|
268
- self.append!(category, values.map{|e| e == category ? true : false})
273
+ full_name = (col.to_s + "_" + category.to_s).to_sym
274
+ if options[:allow_overlap]
275
+ category_map = values.inject([]) do |list, e|
276
+ list << values.all_categories(e)
277
+ end
278
+ self.append!(full_name, category_map.map{|e| e.include?(category)})
279
+ else
280
+ self.append!(full_name, values.category_map.map{|e| e == category})
281
+ end
269
282
  end
270
283
  end
271
284
  end
@@ -24,7 +24,7 @@ describe DataFrame do
24
24
  end
25
25
 
26
26
  it "should use just_enumerable_stats" do
27
- [1,2,3].std.should eql(1.0)
27
+ [1,2,3].std.should eql(1)
28
28
  lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
29
29
  end
30
30
 
@@ -66,7 +66,7 @@ describe DataFrame do
66
66
 
67
67
  it "should make rows easily computable" do
68
68
  @df.row_labels = [:other, :things, :here]
69
- @df.here.std.should be_close(1.414, 0.001)
69
+ @df.here.sum.should eql(42)
70
70
  end
71
71
  end
72
72
 
@@ -273,14 +273,49 @@ describe DataFrame do
273
273
  df.add [:are]
274
274
  df.add [:available]
275
275
  df.j_binary_ize!(:observations)
276
- df.many.should eql([true, false, false, false, false])
277
- df.fine.should eql([false, true, false, false, false])
278
- df.things.should eql([false, false, true, false, false])
279
- df.are.should eql([false, false, false, true, false])
280
- df.available.should eql([false, false, false, false, true])
276
+ df.observations_many.should eql([true, false, false, false, false])
277
+ df.observations_fine.should eql([false, true, false, false, false])
278
+ df.observations_things.should eql([false, false, true, false, false])
279
+ df.observations_are.should eql([false, false, false, true, false])
280
+ df.observations_available.should eql([false, false, false, false, true])
281
281
  df.observations.should eql([:many, :fine, :things, :are, :available])
282
282
  end
283
283
 
284
+ it "should be able to j_binary_ize! a more normal column" do
285
+ df = DataFrame.new(:observations)
286
+ df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
287
+ df.observations.add_category(:small) {|e| e <= 3}
288
+ df.observations.add_category(:large) {|e| e >= 3}
289
+ df.j_binary_ize!(:observations)
290
+ df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
291
+ df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
292
+ end
293
+
294
+ it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
295
+ df = DataFrame.new(:observations)
296
+ df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
297
+ df.observations.add_category(:small) {|e| e <= 3}
298
+ df.observations.add_category(:large) {|e| e >= 3}
299
+ df.j_binary_ize!(:observations, :allow_overlap => true)
300
+ df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
301
+ df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
302
+ end
303
+
304
+ it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
305
+ df = DataFrame.new(:observations)
306
+ df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
307
+ df.observations.add_category(:small) {|e| e <= 3}
308
+ df.observations.add_category(:large) {|e| e >= 3}
309
+ df.j_binary_ize!(:observations, :allow_overlap => true)
310
+ df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
311
+ df.j_binary_ize!(:observations)
312
+ df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
313
+ df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
314
+ df.observations.should eql([1,2,3,4,5,4,3,2,1])
315
+ df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
316
+ df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
317
+ end
318
+
284
319
  context "append!" do
285
320
 
286
321
  before do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-17 00:00:00 -07:00
12
+ date: 2009-08-24 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency