RubyGems - goldmine - Versions diffs - 1.1.4 → 1.2.0 - Mend

goldmine 1.1.4 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c686bfb600cadea453033f457a751ce8538b00af
-  data.tar.gz: 1216c9ddd41197c6f25c1017039b0d2428a20105
+  metadata.gz: eac055065f84b7550241a15c980bf74e0fc892a6
+  data.tar.gz: 02d1831f66469c098299f819d0e3698959bb725f
 SHA512:
-  metadata.gz: 3baf5482a83183114999ed3d705ff8c97b6bc5b0cb9a3ae02e64f0eb9af378ea8fc4a8e39e5003f6da9eb8fae275407bb541f6eb51880e886205a72843f7532e
-  data.tar.gz: acf22665e1acf226ef31274021da2ceb406ef3a268555abb7f597fccc3b08ba7290f81ef9789b684073455b4f9c316a0d889208375c240da57e34600c0a978fd
+  metadata.gz: c4a8519848f4776ac73febc4850919c5449f302471a482673c8e7e47427237759e52444759544596ceb778642ebf0ce2bd866364868cee04fe7a38dbfae443cb
+  data.tar.gz: e1243ab291afcafa019b6b287f98a20b5054f080fc31764322cffe07b604c59d0b23893da822c27401c8727c3b9f64fabe65d9e1acb167f57a5ed82b814abfec

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    goldmine (1.1.4)
+    goldmine (1.2.0)
 GEM
   remote: https://rubygems.org/

data/README.md CHANGED

@@ -21,6 +21,7 @@ Think of __Goldmine__ as `Enumerable#group_by` on steroids.
 ---
 The [demo project](http://hopsoft.github.io/goldmine/) demonstrates some of Goldmine's uses.
+`TODO: update the demo project`
 ---
@@ -109,7 +110,7 @@ list.pivot { |record| record[:favorite_colors] }
 }
 ```
-# Stacked pivots
+## Stacked pivots
 ```ruby
 list = [
@@ -142,31 +143,72 @@ end
 }
 ```
-# Returning pivots in tabular format
+## Rollups
-This feature is useful when you need to do things like export to CSV or build user interfaces.
+Sometimes it's useful to roll pivots into a summary.
 ```ruby
-# using the stacked pivot example above
-mined.to_a
+list = [1,2,3,4,5,6,7,8,9]
+list = Goldmine::ArrayMiner.new(list)
+pivoted = list.pivot(:less_than_5) { |i| i < 5 }.pivot(:even) { |i| i % 2 == 0 }
+pivoted.rollup { |values| values.size }
+# result:
+{
+  { :less_than_5 => true, :even => false} => 2,
+  { :less_than_5 => true, :even => true} => 2,
+  { :less_than_5 => false, :even => false} => 3,
+  { :less_than_5 => false, :even => true} => 2
+}
+```
+## Tabular data
+Tabular data provides a more streamlined summary view of a pivot.
+```ruby
+list = [1,2,3,4,5,6,7,8,9]
+list = Goldmine::ArrayMiner.new(list)
+pivoted = list.pivot(:less_than_5) { |i| i < 5 }.pivot(:even) { |i| i % 2 == 0 }
+pivoted.to_tabular
 # result:
 [
-  ["Name has an 'e'", ">= 21 years old", "Percent of Total", "Count"],
-  [false, true, 0.4, 2],
-  [true, true, 0.4, 2],
-  [true, false, 0.2, 1]
+  ["less_than_5", "even", "percent", "count"],
+  [true, false, 0.22, 2],
+  [true, true, 0.22, 2],
+  [false, false, 0.33, 3],
+  [false, true, 0.22, 2]
 ]
 ```
-The first entry is the header row.
-Subsequent entries are data rows.
-The last value in each data row indicates the number of matches.
+## CSV table
-Need to sort the rows? Just pass a `sort_by` block.
+CSV tables are a formalized version of tabular data.
+They simplify the complexity of working with tabular data.
 ```ruby
-# sort on "total" i.e. 3rd value in the row
-mined.to_a do |row|
-  row[2]
+list = [1,2,3,4,5,6,7,8,9]
+list = Goldmine::ArrayMiner.new(list)
+pivoted = list.pivot(:less_than_5) { |i| i < 5 }.pivot(:even) { |i| i % 2 == 0 }
+csv = pivoted.to_csv
+csv.headers # => ["less_than_5", "even", "percent", "count"]
+csv.each do |row|
+  puts row["less_than_5"]
+  puts row["even"]
 end
+csv.to_csv
+# result:
+"less_than_5,even,percent,count\ntrue,false,0.22,2\ntrue,true,0.22,2\nfalse,false,0.33,3\nfalse,true,0.22,2\n"
 ```
+## Summary
+Goldmine allows you to combine the power of pivots, rollups, tabular data,
+& csv to construct deep insights into your data with minimal effort.
+One of our common use cases is to query a database using ActiveRecord,
+pivot the results, convert to csv, sort, pivot again,
+then rollup the results to create data visualizations in the form of charts & graphs.

data/lib/goldmine.rb CHANGED

@@ -3,9 +3,11 @@ require "array_miner"
 require "hash_miner"
 module Goldmine
-  def self.miner(object)
-    return ArrayMiner.new(object) if object.is_a?(Array)
-    return HashMiner.new(object) if object.is_a?(Hash)
-    nil
+  class << self
+    def miner(object)
+      return ArrayMiner.new(object) if object.is_a?(Array)
+      return HashMiner.new(object) if object.is_a?(Hash)
+      nil
+    end
   end
 end

data/lib/goldmine/array_miner.rb CHANGED

@@ -4,8 +4,9 @@ module Goldmine
   class ArrayMiner < SimpleDelegator
     attr_reader :source_data
-    def initialize(array=[])
-      super @source_data = array
+    def initialize(array=[], source_data: [])
+      @source_data = source_data
+      super array
     end
     # Pivots the Array into a Hash of mined data.
@@ -47,7 +48,7 @@ module Goldmine
     # @yield [Object] Yields once for each item in the Array
     # @return [Hash] The pivoted Hash of data.
     def pivot(name=nil, &block)
-      reduce(HashMiner.new(source_data: source_data)) do |memo, item|
+      reduce(HashMiner.new(source_data: self)) do |memo, item|
         value = yield(item)
         if value.is_a?(Array)

data/lib/goldmine/hash_miner.rb CHANGED

@@ -1,11 +1,12 @@
 require "delegate"
+require "csv"
 module Goldmine
   class HashMiner < SimpleDelegator
     attr_reader :source_data
-    def initialize(hash={}, source_data: nil)
-      @source_data = source_data || hash
+    def initialize(hash={}, source_data: [])
+      @source_data = source_data
       super hash
     end
@@ -28,8 +29,8 @@ module Goldmine
     #
     # @note This method should not be called directly. Call Array#pivot instead.
     #
-    # @param [String] name The named of the pivot.
-    # @yield [Object] Yields once for each item in the Array
+    # @param name [String] The named of the pivot.
+    # @yield [Object] Yields once for each item in the Array.
     # @return [Hash] The pivoted Hash of data.
     def pivot(name=nil, &block)
       return self unless goldmine
@@ -51,10 +52,51 @@ module Goldmine
       end
     end
+    # Returns a new "rolled up" Hash based on the return value of the yield.
+    #
+    # @yield [Object] Yields once for each pivoted group.
+    # @return [Hash] The rollup Hash of data.
+    def rollup
+      each_with_object({}) do |pair, memo|
+        memo[pair.first] = yield(pair.last)
+      end
+    end
+    # Returns a tabular representation of the pivot.
+    # Useful for building CSVs & data visualizations.
+    #
+    # @param percent_column_name [String] The name of the percent column (percent of total)
+    # @param count_column_name [String] The name of the count column (number of objects)
+    # @return [Array] The tabular representation of the data.
+    def to_tabular(percent_column_name: "percent", count_column_name: "count")
+      [].tap do |rows|
+        rows << tabular_header_from_key(first.first) + [percent_column_name, count_column_name]
+        rolled = rollup { |row| row.size }
+        rolled.each do |key, value|
+          tabular_row_from_key(key).tap do |row|
+            rows << row + [calculate_percentage(value, source_data.size), value]
+          end
+        end
+      end
+    end
+    # Returns an in memory CSV table representation of the pivot.
+    # Useful for working with data & building data visualizations.
+    #
+    # @param percent_column_name [String] The name of the percent column (percent of total)
+    # @param count_column_name [String] The name of the count column (number of objects)
+    # @return [CSV::Table] The CSV representation of the data.
+    def to_csv(percent_column_name: "percent", count_column_name: "count")
+      tabular = to_tabular(percent_column_name: percent_column_name, count_column_name: count_column_name)
+      header = tabular.shift
+      rows = tabular.map { |row| CSV::Row.new(header, row) }
+      CSV::Table.new rows
+    end
     # Assigns a key/value pair to the Hash.
-    # @param [String] name The name of a pivot (can be null).
-    # @param [Object] key The key to use.
-    # @param [Object] value The value to assign
+    # @param name [String] The name of a pivot (can be null).
+    # @param key [Object] The key to use.
+    # @param value [Object] The value to assign
     # @return [Object] The result of the assignment.
     def assign_mined(name, key, value)
       goldmine_key = goldmine_key(name, key)
@@ -63,35 +105,31 @@ module Goldmine
     end
     # Creates a key for a pivot-name/key combo.
-    # @param [String] name The name of a pivot (can be null).
-    # @param [Object] key The key to use.
+    # @param name [String] The name of a pivot (can be null).
+    # @param key [Object] The key to use.
     # @return [Object] The constructed key.
     def goldmine_key(name, key)
       goldmine_key = { name => key } if name
       goldmine_key ||= key
     end
-    # Returns the pivot keys.
-    # @return [Array]
-    def pivoted_keys
-      first.first.keys
+    private
+    def calculate_percentage(count, total)
+      return 0.0 unless total > 0
+      sprintf("%.2f", count / total.to_f).to_f
     end
-    # Returns pivoted data as a tabular Array that can be used to build CSVs or user interfaces.
-    # @return [Array] Tabular pivot data
-    # @yield [Array] sort_by block for sorting the Array
-    def to_a(&block)
-      rows = map do |pair|
-        [].tap do |row|
-          row.concat pair.first.values
-          row << sprintf("%.2f", (pair.last.size / source_data.size.to_f)).to_f
-          row << pair.last.size
-        end
-      end
-      rows = rows.sort_by(&block) if block_given?
-      header = [pivoted_keys.map(&:to_s), "Percent of Total", "Count"].flatten
-      rows.insert 0, header
-      rows
+    def tabular_header_from_key(key)
+      return key.keys.map(&:to_s) if key.is_a?(Hash)
+      key = [key] unless key.is_a?(Array)
+      (0..key.size-1).map { |i| "column#{i}" }
+    end
+    def tabular_row_from_key(key)
+      return key.dup if key.is_a?(Array)
+      return [key] unless key.is_a?(Hash)
+      key.values.dup
     end
   end

data/lib/goldmine/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Goldmine
-  VERSION = "1.1.4"
+  VERSION = "1.2.0"
 end

data/test/test_goldmine.rb CHANGED

@@ -30,6 +30,44 @@ class TestGoldmine < PryTest::Test
     assert data == expected
   end
+  test "simple pivot rollup" do
+    list = [1,2,3,4,5,6,7,8,9]
+    list = Goldmine::ArrayMiner.new(list)
+    data = list.pivot { |i| i < 5 }
+    rolled = data.rollup { |row| row.size }
+    expected = {
+      true => 4,
+      false => 5
+    }
+    assert rolled == expected
+  end
+  test "simple pivot to_tabular" do
+    list = [1,2,3,4,5,6,7,8,9]
+    list = Goldmine::ArrayMiner.new(list)
+    data = list.pivot { |i| i < 5 }
+    expected = [
+      ["column0", "percent", "count"],
+      [true, 0.44, 4],
+      [false, 0.56, 5]
+    ]
+    assert data.to_tabular == expected
+  end
+  test "simple pivot to_csv" do
+    list = [1,2,3,4,5,6,7,8,9]
+    list = Goldmine::ArrayMiner.new(list)
+    data = list.pivot { |i| i < 5 }
+    csv = data.to_csv
+    assert csv.headers == ["column0", "percent", "count"]
+    assert csv.to_a == [["column0", "percent", "count"], [true, 0.44, 4], [false, 0.56, 5]]
+  end
   test "named pivot" do
     list = [1,2,3,4,5,6,7,8,9]
     list = Goldmine::ArrayMiner.new(list)
@@ -43,56 +81,32 @@ class TestGoldmine < PryTest::Test
     assert data == expected
   end
-  test "pivoted_keys" do
+  test "named pivot rollup" do
     list = [1,2,3,4,5,6,7,8,9]
     list = Goldmine::ArrayMiner.new(list)
     data = list.pivot("less than 5") { |i| i < 5 }
-    expected = ["less than 5"]
-    assert data.pivoted_keys == expected
-  end
-  test "to_a tabular data" do
-    list = [
-      { :name => "Sally",   :age => 21 },
-      { :name => "John",    :age => 28 },
-      { :name => "Stephen", :age => 37 },
-      { :name => "Emily",   :age => 32 },
-      { :name => "Joe",     :age => 18 }
-    ]
-    list = Goldmine::ArrayMiner.new(list)
-    mined = list.pivot("Name has an 'e'") do |record|
-      !!record[:name].match(/e/i)
-    end
-    mined = mined.pivot(">= 21 years old") do |record|
-      record[:age] >= 21
-    end
+    rolled = data.rollup { |row| row.size }
-    expected = [["Name has an 'e'", ">= 21 years old", "Percent of Total", "Count"], [true, false, 0.2, 1], [false, true, 0.4, 2], [true, true, 0.4, 2]]
-    # block is sort_by
-    tabular_data = mined.to_a do |row|
-      [row[2], row[0] ? 1 : 0, row[1] ? 1 : 0]
-    end
+    expected = {
+      { "less than 5" => true }  => 4,
+      { "less than 5" => false } => 5
+    }
-    assert tabular_data == expected
+    assert rolled == expected
   end
-  test "source_data" do
-    list = [
-      { :name => "Sally",   :age => 21 },
-      { :name => "John",    :age => 28 },
-      { :name => "Stephen", :age => 37 },
-      { :name => "Emily",   :age => 32 },
-      { :name => "Joe",     :age => 18 }
-    ]
+  test "named pivot to_tabular" do
+    list = [1,2,3,4,5,6,7,8,9]
     list = Goldmine::ArrayMiner.new(list)
-    mined = list.pivot("Name has an 'e'") do |record|
-      !!record[:name].match(/e/i)
-    end
-    mined = mined.pivot(">= 21 years old") do |record|
-      record[:age] >= 21
-    end
-    assert mined.source_data == list
+    data = list.pivot("less than 5") { |i| i < 5 }
+    expected = [
+      ["less than 5", "percent", "count"],
+      [true, 0.44, 4],
+      [false, 0.56, 5]
+    ]
+    assert data.to_tabular == expected
   end
   test "pivot of list values" do
@@ -164,6 +178,38 @@ class TestGoldmine < PryTest::Test
     assert data == expected
   end
+  test "chained pivots rollup" do
+    list = [1,2,3,4,5,6,7,8,9]
+    list = Goldmine::ArrayMiner.new(list)
+    data = list.pivot { |i| i < 5 }.pivot { |i| i % 2 == 0 }
+    rolled = data.rollup { |row| row.size }
+    expected = {
+      [true, false]  => 2,
+      [true, true]   => 2,
+      [false, false] => 3,
+      [false, true]  => 2
+    }
+    assert rolled == expected
+  end
+  test "chained pivots to_tabular" do
+    list = [1,2,3,4,5,6,7,8,9]
+    list = Goldmine::ArrayMiner.new(list)
+    data = list.pivot { |i| i < 5 }.pivot { |i| i % 2 == 0 }
+    expected = [
+      ["column0", "column1", "percent", "count"],
+      [true, false, 0.22, 2],
+      [true, true, 0.22, 2],
+      [false, false, 0.33, 3],
+      [false, true, 0.22, 2]
+    ]
+    assert data.to_tabular == expected
+  end
   test "deep chained pivots" do
     list = [1,2,3,4,5,6,7,8,9]
     list = Goldmine::ArrayMiner.new(list)
@@ -207,7 +253,6 @@ class TestGoldmine < PryTest::Test
     }
     assert data == expected
-    assert data.source_data == list
   end
   test "named chained pivots" do
@@ -225,4 +270,53 @@ class TestGoldmine < PryTest::Test
     assert data == expected
   end
+  test "named chained pivots rollup" do
+    list = [1,2,3,4,5,6,7,8,9]
+    list = Goldmine::ArrayMiner.new(list)
+    data = list.pivot("less than 5") { |i| i < 5 }.pivot("divisible by 2") { |i| i % 2 == 0 }
+    rolled = data.rollup { |row| row.size }
+    expected = {
+      { "less than 5" => true, "divisible by 2" => false }  => 2,
+      { "less than 5" => true, "divisible by 2" => true }   => 2,
+      { "less than 5" => false, "divisible by 2" => false } => 3,
+      { "less than 5" => false, "divisible by 2" => true }  => 2
+    }
+    assert rolled == expected
+  end
+  test "named chained pivots to tabular" do
+    list = [1,2,3,4,5,6,7,8,9]
+    list = Goldmine::ArrayMiner.new(list)
+    data = list.pivot("less than 5") { |i| i < 5 }.pivot("divisible by 2") { |i| i % 2 == 0 }
+    expected = [
+      ["less than 5", "divisible by 2", "percent", "count"],
+      [true, false, 0.22, 2],
+      [true, true, 0.22, 2],
+      [false, false, 0.33, 3],
+      [false, true, 0.22, 2]
+    ]
+    assert data.to_tabular == expected
+  end
+  test "named chained pivots to csv" do
+    list = [1,2,3,4,5,6,7,8,9]
+    list = Goldmine::ArrayMiner.new(list)
+    data = list.pivot("less than 5") { |i| i < 5 }.pivot("divisible by 2") { |i| i % 2 == 0 }
+    csv = data.to_csv
+    assert csv.to_a == data.to_tabular
+    expected = ["less than 5", "divisible by 2", "percent", "count"]
+    assert csv.headers == expected
+    row = csv.first
+    assert row["less than 5"] == true
+    assert row["divisible by 2"] == false
+    assert row["percent"] == 0.22
+    assert row ["count"] == 2
+  end
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: goldmine
 version: !ruby/object:Gem::Version
-  version: 1.1.4
+  version: 1.2.0
 platform: ruby
 authors:
 - Nathan Hopkins
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-05-28 00:00:00.000000000 Z
+date: 2015-06-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake