data_hut 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.0.7
4
+
5
+ * added capability to store and fetch arbitrary metadata from the DataHut.
6
+
7
+ This is useful in the case motivated by the samples/league_of_legends.rb:
8
+ stat name is known at initial extract time, however
9
+ subsequent transform runs may or may not have any transient variables for stat names... hence the metadata needs to be stored
10
+ somewhere for future transform processing.
11
+ note: stat name is not of the same cardinality as the data records themselves, so it is truly metadata that governs how the records
12
+ are understood.
13
+
3
14
  ## 0.0.6
4
15
 
5
16
  * externalized the Sequel database logger so that it can be set by DataHut clients. See DataHut::DataWarehouse#logger=
data/README.md CHANGED
@@ -90,6 +90,7 @@ Read more about the [Sequel gem](http://sequel.rubyforge.org/rdoc/files/README_r
90
90
 
91
91
  Taking a popular game like League of Legends and hand-rolling some simple analysis of the champions...
92
92
 
93
+ require 'data_hut'
93
94
  require 'nokogiri'
94
95
  require 'open-uri'
95
96
  require 'pry'
@@ -121,12 +122,14 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
121
122
  r.name = champion_page.css('div.page_header_text').text
122
123
 
123
124
  st = champion_page.css('table.stats_table')
124
- names = st.css('td.stats_name').collect{|e| e.text.strip}
125
+ names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
125
126
  values = st.css('td.stats_value').collect{|e| e.text.strip}
126
127
  modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
127
128
 
129
+ dh.store_meta(:stats, names)
130
+
128
131
  (0..names.count-1).collect do |i|
129
- stat = (names[i].downcase.gsub(/ /,'_') << "=").to_sym
132
+ stat = (names[i] + "=").to_sym
130
133
  r.send(stat, values[i].to_f)
131
134
  stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
132
135
  per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
@@ -145,18 +148,25 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
145
148
  puts "done."
146
149
  end
147
150
 
151
+ # connect again in case extract was skipped because the core data already exists:
148
152
  dh = DataHut.connect("lolstats")
149
153
 
154
+ # instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
155
+ def total_stat(r,stat)
156
+ total_stat = ("total_" + stat + "=").to_sym
157
+ stat_per_level = r.send((stat + "_per_level").to_sym)
158
+ base = r.send(stat.to_sym)
159
+ total = base + (stat_per_level * 18.0)
160
+ r.send(total_stat, total)
161
+ end
162
+ # we need to fetch metadata that was written during extract (potentially in a previous process run)
163
+ stats = dh.fetch_meta(:stats)
164
+
150
165
  puts "first transform"
151
166
  dh.transform do |r|
152
- r.total_damage = r.damage + (r.damage_per_level * 18.0)
153
- r.total_health = r.health + (r.health_per_level * 18.0)
154
- r.total_mana = r.mana + (r.mana_per_level * 18.0)
155
- r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
156
- r.total_armor = r.armor + (r.armor_per_level * 18.0)
157
- r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
158
- r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
159
- r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
167
+ stats.each do |stat|
168
+ total_stat(r,stat)
169
+ end
160
170
  print '.'
161
171
  end
162
172
 
@@ -184,16 +194,17 @@ Now that we have some data, lets play...
184
194
 
185
195
  * who has the most base damage?
186
196
 
187
- [14] pry(main)> ds.order(Sequel.desc(:damage)).limit(5).collect{|c| {c.name => c.damage}}
197
+ [1] pry(main)> ds.order(Sequel.desc(:damage)).limit(5).collect{|c| {c.name => c.damage}}
188
198
  => [{"Taric"=>58.0},
189
199
  {"Maokai"=>58.0},
190
200
  {"Warwick"=>56.76},
191
201
  {"Singed"=>56.65},
192
202
  {"Poppy"=>56.3}]
193
203
 
204
+
194
205
  * but wait a minute... what about at level 18? Fortunately, we've transformed our data to add some extra fields for this...
195
206
 
196
- [3] pry(main)> ds.order(Sequel.desc(:total_damage)).limit(5).collect{|c| {c.name => c.total_damage}}
207
+ [2] pry(main)> ds.order(Sequel.desc(:total_damage)).limit(5).collect{|c| {c.name => c.total_damage}}
197
208
  => [{"Skarner"=>129.70000000000002},
198
209
  {"Cho'Gath"=>129.70000000000002},
199
210
  {"Kassadin"=>122.5},
@@ -203,7 +214,7 @@ Now that we have some data, lets play...
203
214
  * how about using some of the indexes we defined above... like the 'nuke_index' (notice that the assumptions on what make a good
204
215
  nuke are subjective, but that's the fun of it; we can model our assumptions and see how the data changes in response.)
205
216
 
206
- [5] pry(main)> ds.order(Sequel.desc(:nuke_index)).limit(5).collect{|c| {c.name => [c.total_damage, c.total_move_speed, c.total_mana, c.ability_power]}}
217
+ [3] pry(main)> ds.order(Sequel.desc(:nuke_index)).limit(5).collect{|c| {c.name => [c.total_damage, c.total_move_speed, c.total_mana, c.ability_power]}}
207
218
  => [{"Karthus"=>[100.7, 335.0, 1368.0, 10]},
208
219
  {"Morgana"=>[114.58, 335.0, 1320.0, 9]},
209
220
  {"Ryze"=>[106.0, 335.0, 1240.0, 10]},
@@ -214,14 +225,16 @@ I must have hit close to the mark, because personally I hate each of these champ
214
225
 
215
226
  * and (now I risk becoming addicted to datahut myself), here's some further guesses with an easy_nuke index:
216
227
 
217
- [2] pry(main)> ds.order(Sequel.desc(:easy_nuke_index)).limit(5).collect{|c| c.name}
228
+ [4] pry(main)> ds.order(Sequel.desc(:easy_nuke_index)).limit(5).collect{|c| c.name}
218
229
  => ["Sona", "Ryze", "Nasus", "Soraka", "Heimerdinger"]
219
230
 
220
231
  * makes sense, but is still fascinating... what about my crack at a support_index?
221
232
 
222
- [3] pry(main)> ds.order(Sequel.desc(:support_index)).limit(5).collect{|c| c.name}
233
+ [5] pry(main)> ds.order(Sequel.desc(:support_index)).limit(5).collect{|c| c.name}
223
234
  => ["Sion", "Diana", "Nunu", "Nautilus", "Amumu"]
224
235
 
236
+
237
+
225
238
  You get the idea now! *Extract* your data from anywhere, *transform* it however you like and *analyze* it for insights!
226
239
 
227
240
  Have fun!
@@ -161,6 +161,42 @@ module DataHut
161
161
  @db.logger = logger
162
162
  end
163
163
 
164
+
165
+
166
+ # stores metadata
167
+ #
168
+ # @param key [Symbol] to lookup the metadata by
169
+ # @param value [Object] ruby object to store
170
+ def store_meta(key, value)
171
+ key = key.to_s if key.instance_of?(Symbol)
172
+ begin
173
+ value = Sequel::SQL::Blob.new(Marshal.dump(value))
174
+ if (@db[:data_warehouse_meta].where(key: key).count > 0)
175
+ @db[:data_warehouse_meta].where(key: key).update(value: value)
176
+ else
177
+ @db[:data_warehouse_meta].insert(key: key, value: value)
178
+ end
179
+ rescue Exception => e
180
+ raise(ArgumentError, "DataHut: unable to store metadata value #{value.inspect}.", caller)
181
+ end
182
+ end
183
+
184
+ # retrieves previously stored metadata by key
185
+ #
186
+ # @param key [Symbol] to lookup the metadata by
187
+ # @return [Object] ruby object that was fetched
188
+ def fetch_meta(key)
189
+ key = key.to_s if key.instance_of?(Symbol)
190
+ begin
191
+ r = @db[:data_warehouse_meta].where(key: key).first
192
+ value = r[:value] unless r.nil?
193
+ value = Marshal.load(value) unless value.nil?
194
+ rescue Exception => e
195
+ raise(ArgumentError, "DataHut: unable to fetch metadata key #{key}.", caller)
196
+ end
197
+ value
198
+ end
199
+
164
200
  private
165
201
 
166
202
  def initialize(name)
@@ -173,6 +209,15 @@ module DataHut
173
209
  column :dw_processed, TrueClass, :null => false, :default => false
174
210
  end
175
211
  end
212
+
213
+ unless @db.table_exists?(:data_warehouse_meta)
214
+ @db.create_table(:data_warehouse_meta) do
215
+ primary_key :dw_id
216
+ String :key
217
+ index :key
218
+ blob :value
219
+ end
220
+ end
176
221
  end
177
222
 
178
223
  def store(r)
@@ -1,3 +1,3 @@
1
1
  module DataHut
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
@@ -36,12 +36,14 @@ unless File.exists?("lolstats.db")
36
36
  r.name = champion_page.css('div.page_header_text').text
37
37
 
38
38
  st = champion_page.css('table.stats_table')
39
- names = st.css('td.stats_name').collect{|e| e.text.strip}
39
+ names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
40
40
  values = st.css('td.stats_value').collect{|e| e.text.strip}
41
41
  modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
42
42
 
43
+ dh.store_meta(:stats, names)
44
+
43
45
  (0..names.count-1).collect do |i|
44
- stat = (names[i].downcase.gsub(/ /,'_') << "=").to_sym
46
+ stat = (names[i] + "=").to_sym
45
47
  r.send(stat, values[i].to_f)
46
48
  stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
47
49
  per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
@@ -60,18 +62,25 @@ unless File.exists?("lolstats.db")
60
62
  puts "done."
61
63
  end
62
64
 
65
+ # connect again in case extract was skipped because the core data already exists:
63
66
  dh = DataHut.connect("lolstats")
64
67
 
68
+ # instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
69
+ def total_stat(r,stat)
70
+ total_stat = ("total_" + stat + "=").to_sym
71
+ stat_per_level = r.send((stat + "_per_level").to_sym)
72
+ base = r.send(stat.to_sym)
73
+ total = base + (stat_per_level * 18.0)
74
+ r.send(total_stat, total)
75
+ end
76
+ # we need to fetch metadata that was written during extract (potentially in a previous process run)
77
+ stats = dh.fetch_meta(:stats)
78
+
65
79
  puts "first transform"
66
80
  dh.transform do |r|
67
- r.total_damage = r.damage + (r.damage_per_level * 18.0)
68
- r.total_health = r.health + (r.health_per_level * 18.0)
69
- r.total_mana = r.mana + (r.mana_per_level * 18.0)
70
- r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
71
- r.total_armor = r.armor + (r.armor_per_level * 18.0)
72
- r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
73
- r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
74
- r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
81
+ stats.each do |stat|
82
+ total_stat(r,stat)
83
+ end
75
84
  print '.'
76
85
  end
77
86
 
@@ -1,5 +1,16 @@
1
1
  require_relative File.join(*%w[.. test_helper])
2
2
 
3
+ class Foo
4
+ attr_accessor :bar
5
+
6
+ def initialize
7
+ @time = DateTime.now
8
+ end
9
+
10
+ def what
11
+ puts "say what?"
12
+ end
13
+ end
3
14
 
4
15
  describe DataHut do
5
16
  def teardown
@@ -139,9 +150,6 @@ describe DataHut do
139
150
 
140
151
  describe "nice usage" do
141
152
 
142
- class Foo
143
- end
144
-
145
153
  it "should provide logging services to see or debug underlying Sequel" do
146
154
  dh = DataHut.connect("foo")
147
155
 
@@ -174,5 +182,43 @@ describe DataHut do
174
182
 
175
183
  end
176
184
 
185
+
186
+ describe "support adding and retrieving possibly useful metadata" do
187
+
188
+ it "should store and retrieve metadata" do
189
+ dh = DataHut.connect("foo")
190
+
191
+ val1 = "wizard"
192
+ val2 = ["larry", "steve", "barney"]
193
+ val3 = {one: "for the money", two: "for the show"}
194
+ val4 = Foo.new
195
+
196
+ dh.store_meta(:harry, val1)
197
+ dh.store_meta(:users, val2)
198
+ dh.store_meta(:my_little_hash, val3)
199
+ dh.store_meta(:an_object, val4)
200
+
201
+ assert_equal val1, dh.fetch_meta(:harry)
202
+ assert_equal val2, dh.fetch_meta(:users)
203
+ assert_equal val3, dh.fetch_meta(:my_little_hash)
204
+
205
+ assert_raises(MiniTest::Assertion) do
206
+ assert_equal val4, dh.fetch_meta(:an_object)
207
+ end
208
+
209
+ assert_equal nil, dh.fetch_meta(:not_there)
210
+
211
+ val5 = "muggle"
212
+ dh.store_meta(:harry, val5)
213
+ assert_equal val5, dh.fetch_meta(:harry)
214
+
215
+ val6 = nil
216
+ dh.store_meta(:harry, val6)
217
+ assert_equal val6, dh.fetch_meta(:harry)
218
+
219
+ end
220
+
221
+ end
222
+
177
223
  end
178
224
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_hut
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: