RubyGems - data_hut - Versions diffs - 0.0.6 → 0.0.7 - Mend

data_hut 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/CHANGELOG.md +11 -0
data/README.md +28 -15
data/lib/data_hut/data_warehouse.rb +45 -0
data/lib/data_hut/version.rb +1 -1
data/samples/league_of_legends.rb +19 -10
data/test/spec/basic_test.rb +49 -3
metadata +1 -1

data/CHANGELOG.md CHANGED

@@ -1,5 +1,16 @@
 # Changelog
+## 0.0.7
+* added capability to store and fetch arbitrary metadata from the DataHut.
+  This is useful in the case motivated by the samples/league_of_legends.rb:
+    stat name is known at initial extract time, however
+    subsequent transform runs may or may not have any transient variables for stat names... hence the metadata needs to be stored
+    somewhere for future transform processing.
+    note: stat name is not of the same cardinality as the data records themselves, so it is truly metadata that governs how the records
+    are understood.
 ## 0.0.6
 * externalized the Sequel database logger so that it can be set by DataHut clients.  See DataHut::DataWarehouse#logger=

data/README.md CHANGED

@@ -90,6 +90,7 @@ Read more about the [Sequel gem](http://sequel.rubyforge.org/rdoc/files/README_r
 Taking a popular game like League of Legends and hand-rolling some simple analysis of the champions...
+    require 'data_hut'
     require 'nokogiri'
     require 'open-uri'
     require 'pry'
@@ -121,12 +122,14 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
         r.name = champion_page.css('div.page_header_text').text
         st = champion_page.css('table.stats_table')
-        names = st.css('td.stats_name').collect{|e| e.text.strip}
+        names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
         values = st.css('td.stats_value').collect{|e| e.text.strip}
         modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
+        dh.store_meta(:stats, names)
         (0..names.count-1).collect do |i|
-          stat = (names[i].downcase.gsub(/ /,'_') << "=").to_sym
+          stat = (names[i] + "=").to_sym
           r.send(stat, values[i].to_f)
           stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
           per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
@@ -145,18 +148,25 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
       puts "done."
     end
+    # connect again in case extract was skipped because the core data already exists:
     dh = DataHut.connect("lolstats")
+    # instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
+    def total_stat(r,stat)
+      total_stat = ("total_" + stat + "=").to_sym
+      stat_per_level = r.send((stat + "_per_level").to_sym)
+      base = r.send(stat.to_sym)
+      total = base + (stat_per_level * 18.0)
+      r.send(total_stat, total)
+    end
+    # we need to fetch metadata that was written during extract (potentially in a previous process run)
+    stats = dh.fetch_meta(:stats)
     puts "first transform"
     dh.transform do |r|
-      r.total_damage = r.damage + (r.damage_per_level * 18.0)
-      r.total_health = r.health + (r.health_per_level * 18.0)
-      r.total_mana = r.mana + (r.mana_per_level * 18.0)
-      r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
-      r.total_armor = r.armor + (r.armor_per_level * 18.0)
-      r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
-      r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
-      r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
+      stats.each do |stat|
+        total_stat(r,stat)
+      end
       print '.'
     end
@@ -184,16 +194,17 @@ Now that we have some data, lets play...
 * who has the most base damage?
-        [14] pry(main)> ds.order(Sequel.desc(:damage)).limit(5).collect{|c| {c.name => c.damage}}
+        [1] pry(main)> ds.order(Sequel.desc(:damage)).limit(5).collect{|c| {c.name => c.damage}}
         => [{"Taric"=>58.0},
          {"Maokai"=>58.0},
          {"Warwick"=>56.76},
          {"Singed"=>56.65},
          {"Poppy"=>56.3}]
 * but wait a minute... what about at level 18?  Fortunately, we've transformed our data to add some extra fields for this...
-        [3] pry(main)> ds.order(Sequel.desc(:total_damage)).limit(5).collect{|c| {c.name => c.total_damage}}
+        [2] pry(main)> ds.order(Sequel.desc(:total_damage)).limit(5).collect{|c| {c.name => c.total_damage}}
         => [{"Skarner"=>129.70000000000002},
          {"Cho'Gath"=>129.70000000000002},
          {"Kassadin"=>122.5},
@@ -203,7 +214,7 @@ Now that we have some data, lets play...
 * how about using some of the indexes we defined above... like the 'nuke_index' (notice that the assumptions on what make a good
 nuke are subjective, but that's the fun of it; we can model our assumptions and see how the data changes in response.)
-        [5] pry(main)> ds.order(Sequel.desc(:nuke_index)).limit(5).collect{|c| {c.name => [c.total_damage, c.total_move_speed, c.total_mana, c.ability_power]}}
+        [3] pry(main)> ds.order(Sequel.desc(:nuke_index)).limit(5).collect{|c| {c.name => [c.total_damage, c.total_move_speed, c.total_mana, c.ability_power]}}
         => [{"Karthus"=>[100.7, 335.0, 1368.0, 10]},
          {"Morgana"=>[114.58, 335.0, 1320.0, 9]},
          {"Ryze"=>[106.0, 335.0, 1240.0, 10]},
@@ -214,14 +225,16 @@ I must have hit close to the mark, because personally I hate each of these champ
 * and (now I risk becoming addicted to datahut myself), here's some further guesses with an easy_nuke index:
-        [2] pry(main)> ds.order(Sequel.desc(:easy_nuke_index)).limit(5).collect{|c| c.name}
+        [4] pry(main)> ds.order(Sequel.desc(:easy_nuke_index)).limit(5).collect{|c| c.name}
         => ["Sona", "Ryze", "Nasus", "Soraka", "Heimerdinger"]
 * makes sense, but is still fascinating... what about my crack at a support_index?
-        [3] pry(main)> ds.order(Sequel.desc(:support_index)).limit(5).collect{|c| c.name}
+        [5] pry(main)> ds.order(Sequel.desc(:support_index)).limit(5).collect{|c| c.name}
         => ["Sion", "Diana", "Nunu", "Nautilus", "Amumu"]
 You get the idea now!  *Extract* your data from anywhere, *transform* it however you like and *analyze* it for insights!
 Have fun!

data/lib/data_hut/data_warehouse.rb CHANGED

@@ -161,6 +161,42 @@ module DataHut
       @db.logger = logger
     end
+    # stores metadata
+    #
+    # @param key [Symbol] to lookup the metadata by
+    # @param value [Object] ruby object to store
+    def store_meta(key, value)
+      key = key.to_s if key.instance_of?(Symbol)
+      begin
+        value = Sequel::SQL::Blob.new(Marshal.dump(value))
+        if (@db[:data_warehouse_meta].where(key: key).count > 0)
+          @db[:data_warehouse_meta].where(key: key).update(value: value)
+        else
+          @db[:data_warehouse_meta].insert(key: key, value: value)
+        end
+      rescue Exception => e
+        raise(ArgumentError, "DataHut: unable to store metadata value #{value.inspect}.", caller)
+      end
+    end
+    # retrieves previously stored metadata by key
+    #
+    # @param key [Symbol] to lookup the metadata by
+    # @return [Object] ruby object that was fetched
+    def fetch_meta(key)
+      key = key.to_s if key.instance_of?(Symbol)
+      begin
+        r = @db[:data_warehouse_meta].where(key: key).first
+        value = r[:value] unless r.nil?
+        value = Marshal.load(value) unless value.nil?
+      rescue Exception => e
+        raise(ArgumentError, "DataHut: unable to fetch metadata key #{key}.", caller)
+      end
+      value
+    end
     private
     def initialize(name)
@@ -173,6 +209,15 @@ module DataHut
           column :dw_processed, TrueClass, :null => false, :default => false
         end
       end
+      unless @db.table_exists?(:data_warehouse_meta)
+        @db.create_table(:data_warehouse_meta) do
+          primary_key :dw_id
+          String :key
+          index :key
+          blob :value
+        end
+      end
     end
     def store(r)

data/lib/data_hut/version.rb CHANGED

@@ -1,3 +1,3 @@
 module DataHut
-  VERSION = "0.0.6"
+  VERSION = "0.0.7"
 end

data/samples/league_of_legends.rb CHANGED

@@ -36,12 +36,14 @@ unless File.exists?("lolstats.db")
     r.name = champion_page.css('div.page_header_text').text
     st = champion_page.css('table.stats_table')
-    names = st.css('td.stats_name').collect{|e| e.text.strip}
+    names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
     values = st.css('td.stats_value').collect{|e| e.text.strip}
     modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
+    dh.store_meta(:stats, names)
     (0..names.count-1).collect do |i|
-      stat = (names[i].downcase.gsub(/ /,'_') << "=").to_sym
+      stat = (names[i] + "=").to_sym
       r.send(stat, values[i].to_f)
       stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
       per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
@@ -60,18 +62,25 @@ unless File.exists?("lolstats.db")
   puts "done."
 end
+# connect again in case extract was skipped because the core data already exists:
 dh = DataHut.connect("lolstats")
+# instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
+def total_stat(r,stat)
+  total_stat = ("total_" + stat + "=").to_sym
+  stat_per_level = r.send((stat + "_per_level").to_sym)
+  base = r.send(stat.to_sym)
+  total = base + (stat_per_level * 18.0)
+  r.send(total_stat, total)
+end
+# we need to fetch metadata that was written during extract (potentially in a previous process run)
+stats = dh.fetch_meta(:stats)
 puts "first transform"
 dh.transform do |r|
-  r.total_damage = r.damage + (r.damage_per_level * 18.0)
-  r.total_health = r.health + (r.health_per_level * 18.0)
-  r.total_mana = r.mana + (r.mana_per_level * 18.0)
-  r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
-  r.total_armor = r.armor + (r.armor_per_level * 18.0)
-  r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
-  r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
-  r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
+  stats.each do |stat|
+    total_stat(r,stat)
+  end
   print '.'
 end

data/test/spec/basic_test.rb CHANGED

@@ -1,5 +1,16 @@
 require_relative File.join(*%w[.. test_helper])
+class Foo
+  attr_accessor :bar
+  def initialize
+    @time = DateTime.now
+  end
+  def what
+    puts "say what?"
+  end
+end
 describe DataHut do
   def teardown
@@ -139,9 +150,6 @@ describe DataHut do
   describe "nice usage" do
-    class Foo
-    end
     it "should provide logging services to see or debug underlying Sequel" do
       dh = DataHut.connect("foo")
@@ -174,5 +182,43 @@ describe DataHut do
   end
+  describe "support adding and retrieving possibly useful metadata" do
+    it "should store and retrieve metadata" do
+      dh = DataHut.connect("foo")
+      val1 = "wizard"
+      val2 = ["larry", "steve", "barney"]
+      val3 = {one: "for the money", two: "for the show"}
+      val4 = Foo.new
+      dh.store_meta(:harry, val1)
+      dh.store_meta(:users, val2)
+      dh.store_meta(:my_little_hash, val3)
+      dh.store_meta(:an_object, val4)
+      assert_equal val1, dh.fetch_meta(:harry)
+      assert_equal val2, dh.fetch_meta(:users)
+      assert_equal val3, dh.fetch_meta(:my_little_hash)
+      assert_raises(MiniTest::Assertion) do
+        assert_equal val4, dh.fetch_meta(:an_object)
+      end
+      assert_equal nil, dh.fetch_meta(:not_there)
+      val5 = "muggle"
+      dh.store_meta(:harry, val5)
+      assert_equal val5, dh.fetch_meta(:harry)
+      val6 = nil
+      dh.store_meta(:harry, val6)
+      assert_equal val6, dh.fetch_meta(:harry)
+    end
+  end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: data_hut
 version: !ruby/object:Gem::Version
-  version: 0.0.6
+  version: 0.0.7
   prerelease:
 platform: ruby
 authors: