RubyGems - davidrichards-data_frame - Versions diffs - 0.0.14 → 0.0.15 - Mend

davidrichards-data_frame 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/VERSION.yml CHANGED

@@ -1,4 +1,4 @@
 ---
 :major: 0
 :minor: 0
-:patch: 14
+:patch: 15

data/lib/data_frame.rb CHANGED

@@ -262,10 +262,23 @@ class DataFrame
   # A weird name.  This creates a column for every category in a column
   # and marks each row by its value
   def j_binary_ize!(*columns)
+    # Allows to mix a hash with the columns.
+    options = columns.find_all {|e| e.is_a?(Hash)}.inject({}) {|h, e| h.merge!(e)}
+    columns.delete_if {|e| e.is_a?(Hash)}
+    # Generates new columns
     columns.each do |col|
       values = render_column(col.to_underscore_sym)
       values.categories.each do |category|
-        self.append!(category, values.map{|e| e == category ? true : false})
+        full_name = (col.to_s + "_" + category.to_s).to_sym
+        if options[:allow_overlap]
+          category_map = values.inject([]) do |list, e|
+            list << values.all_categories(e)
+          end
+          self.append!(full_name, category_map.map{|e| e.include?(category)})
+        else
+          self.append!(full_name, values.category_map.map{|e| e == category})
+        end
       end
     end
   end

data/spec/data_frame_spec.rb CHANGED

@@ -24,7 +24,7 @@ describe DataFrame do
   end
   it "should use just_enumerable_stats" do
-    [1,2,3].std.should eql(1.0)
+    [1,2,3].std.should eql(1)
     lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
   end
@@ -66,7 +66,7 @@ describe DataFrame do
     it "should make rows easily computable" do
       @df.row_labels = [:other, :things, :here]
-      @df.here.std.should be_close(1.414, 0.001)
+      @df.here.sum.should eql(42)
     end
   end
@@ -273,14 +273,49 @@ describe DataFrame do
     df.add [:are]
     df.add [:available]
     df.j_binary_ize!(:observations)
-    df.many.should eql([true, false, false, false, false])
-    df.fine.should eql([false, true, false, false, false])
-    df.things.should eql([false, false, true, false, false])
-    df.are.should eql([false, false, false, true, false])
-    df.available.should eql([false, false, false, false, true])
+    df.observations_many.should eql([true, false, false, false, false])
+    df.observations_fine.should eql([false, true, false, false, false])
+    df.observations_things.should eql([false, false, true, false, false])
+    df.observations_are.should eql([false, false, false, true, false])
+    df.observations_available.should eql([false, false, false, false, true])
     df.observations.should eql([:many, :fine, :things, :are, :available])
   end
+  it "should be able to j_binary_ize! a more normal column" do
+    df = DataFrame.new(:observations)
+    df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
+    df.observations.add_category(:small) {|e| e <= 3}
+    df.observations.add_category(:large) {|e| e >= 3}
+    df.j_binary_ize!(:observations)
+    df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
+    df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
+  end
+  it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
+    df = DataFrame.new(:observations)
+    df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
+    df.observations.add_category(:small) {|e| e <= 3}
+    df.observations.add_category(:large) {|e| e >= 3}
+    df.j_binary_ize!(:observations, :allow_overlap => true)
+    df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
+    df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
+  end
+  it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
+    df = DataFrame.new(:observations)
+    df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
+    df.observations.add_category(:small) {|e| e <= 3}
+    df.observations.add_category(:large) {|e| e >= 3}
+    df.j_binary_ize!(:observations, :allow_overlap => true)
+    df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
+    df.j_binary_ize!(:observations)
+    df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
+    df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
+    df.observations.should eql([1,2,3,4,5,4,3,2,1])
+    df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
+    df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
+  end
   context "append!" do
     before do

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: davidrichards-data_frame
 version: !ruby/object:Gem::Version
-  version: 0.0.14
+  version: 0.0.15
 platform: ruby
 authors:
 - David Richards
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-08-17 00:00:00 -07:00
+date: 2009-08-24 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency