data_hut 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.0.7
4
+
5
+ * added capability to store and fetch arbitrary metadata from the DataHut.
6
+
7
+ This is useful in the case motivated by the samples/league_of_legends.rb:
8
+ stat name is known at initial extract time, however
9
+ subsequent transform runs may or may not have any transient variables for stat names... hence the metadata needs to be stored
10
+ somewhere for future transform processing.
11
+ note: stat name is not of the same cardinality as the data records themselves, so it is truly metadata that governs how the records
12
+ are understood.
13
+
3
14
  ## 0.0.6
4
15
 
5
16
  * externalized the Sequel database logger so that it can be set by DataHut clients. See DataHut::DataWarehouse#logger=
data/README.md CHANGED
@@ -90,6 +90,7 @@ Read more about the [Sequel gem](http://sequel.rubyforge.org/rdoc/files/README_r
90
90
 
91
91
  Taking a popular game like League of Legends and hand-rolling some simple analysis of the champions...
92
92
 
93
+ require 'data_hut'
93
94
  require 'nokogiri'
94
95
  require 'open-uri'
95
96
  require 'pry'
@@ -121,12 +122,14 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
121
122
  r.name = champion_page.css('div.page_header_text').text
122
123
 
123
124
  st = champion_page.css('table.stats_table')
124
- names = st.css('td.stats_name').collect{|e| e.text.strip}
125
+ names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
125
126
  values = st.css('td.stats_value').collect{|e| e.text.strip}
126
127
  modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
127
128
 
129
+ dh.store_meta(:stats, names)
130
+
128
131
  (0..names.count-1).collect do |i|
129
- stat = (names[i].downcase.gsub(/ /,'_') << "=").to_sym
132
+ stat = (names[i] + "=").to_sym
130
133
  r.send(stat, values[i].to_f)
131
134
  stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
132
135
  per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
@@ -145,18 +148,25 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
145
148
  puts "done."
146
149
  end
147
150
 
151
+ # connect again in case extract was skipped because the core data already exists:
148
152
  dh = DataHut.connect("lolstats")
149
153
 
154
+ # instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
155
+ def total_stat(r,stat)
156
+ total_stat = ("total_" + stat + "=").to_sym
157
+ stat_per_level = r.send((stat + "_per_level").to_sym)
158
+ base = r.send(stat.to_sym)
159
+ total = base + (stat_per_level * 18.0)
160
+ r.send(total_stat, total)
161
+ end
162
+ # we need to fetch metadata that was written during extract (potentially in a previous process run)
163
+ stats = dh.fetch_meta(:stats)
164
+
150
165
  puts "first transform"
151
166
  dh.transform do |r|
152
- r.total_damage = r.damage + (r.damage_per_level * 18.0)
153
- r.total_health = r.health + (r.health_per_level * 18.0)
154
- r.total_mana = r.mana + (r.mana_per_level * 18.0)
155
- r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
156
- r.total_armor = r.armor + (r.armor_per_level * 18.0)
157
- r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
158
- r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
159
- r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
167
+ stats.each do |stat|
168
+ total_stat(r,stat)
169
+ end
160
170
  print '.'
161
171
  end
162
172
 
@@ -184,16 +194,17 @@ Now that we have some data, lets play...
184
194
 
185
195
  * who has the most base damage?
186
196
 
187
- [14] pry(main)> ds.order(Sequel.desc(:damage)).limit(5).collect{|c| {c.name => c.damage}}
197
+ [1] pry(main)> ds.order(Sequel.desc(:damage)).limit(5).collect{|c| {c.name => c.damage}}
188
198
  => [{"Taric"=>58.0},
189
199
  {"Maokai"=>58.0},
190
200
  {"Warwick"=>56.76},
191
201
  {"Singed"=>56.65},
192
202
  {"Poppy"=>56.3}]
193
203
 
204
+
194
205
  * but wait a minute... what about at level 18? Fortunately, we've transformed our data to add some extra fields for this...
195
206
 
196
- [3] pry(main)> ds.order(Sequel.desc(:total_damage)).limit(5).collect{|c| {c.name => c.total_damage}}
207
+ [2] pry(main)> ds.order(Sequel.desc(:total_damage)).limit(5).collect{|c| {c.name => c.total_damage}}
197
208
  => [{"Skarner"=>129.70000000000002},
198
209
  {"Cho'Gath"=>129.70000000000002},
199
210
  {"Kassadin"=>122.5},
@@ -203,7 +214,7 @@ Now that we have some data, lets play...
203
214
  * how about using some of the indexes we defined above... like the 'nuke_index' (notice that the assumptions on what make a good
204
215
  nuke are subjective, but that's the fun of it; we can model our assumptions and see how the data changes in response.)
205
216
 
206
- [5] pry(main)> ds.order(Sequel.desc(:nuke_index)).limit(5).collect{|c| {c.name => [c.total_damage, c.total_move_speed, c.total_mana, c.ability_power]}}
217
+ [3] pry(main)> ds.order(Sequel.desc(:nuke_index)).limit(5).collect{|c| {c.name => [c.total_damage, c.total_move_speed, c.total_mana, c.ability_power]}}
207
218
  => [{"Karthus"=>[100.7, 335.0, 1368.0, 10]},
208
219
  {"Morgana"=>[114.58, 335.0, 1320.0, 9]},
209
220
  {"Ryze"=>[106.0, 335.0, 1240.0, 10]},
@@ -214,14 +225,16 @@ I must have hit close to the mark, because personally I hate each of these champ
214
225
 
215
226
  * and (now I risk becoming addicted to datahut myself), here's some further guesses with an easy_nuke index:
216
227
 
217
- [2] pry(main)> ds.order(Sequel.desc(:easy_nuke_index)).limit(5).collect{|c| c.name}
228
+ [4] pry(main)> ds.order(Sequel.desc(:easy_nuke_index)).limit(5).collect{|c| c.name}
218
229
  => ["Sona", "Ryze", "Nasus", "Soraka", "Heimerdinger"]
219
230
 
220
231
  * makes sense, but is still fascinating... what about my crack at a support_index?
221
232
 
222
- [3] pry(main)> ds.order(Sequel.desc(:support_index)).limit(5).collect{|c| c.name}
233
+ [5] pry(main)> ds.order(Sequel.desc(:support_index)).limit(5).collect{|c| c.name}
223
234
  => ["Sion", "Diana", "Nunu", "Nautilus", "Amumu"]
224
235
 
236
+
237
+
225
238
  You get the idea now! *Extract* your data from anywhere, *transform* it however you like and *analyze* it for insights!
226
239
 
227
240
  Have fun!
@@ -161,6 +161,42 @@ module DataHut
161
161
  @db.logger = logger
162
162
  end
163
163
 
164
+
165
+
166
+ # stores metadata
167
+ #
168
+ # @param key [Symbol] to lookup the metadata by
169
+ # @param value [Object] ruby object to store
170
+ def store_meta(key, value)
171
+ key = key.to_s if key.instance_of?(Symbol)
172
+ begin
173
+ value = Sequel::SQL::Blob.new(Marshal.dump(value))
174
+ if (@db[:data_warehouse_meta].where(key: key).count > 0)
175
+ @db[:data_warehouse_meta].where(key: key).update(value: value)
176
+ else
177
+ @db[:data_warehouse_meta].insert(key: key, value: value)
178
+ end
179
+ rescue Exception => e
180
+ raise(ArgumentError, "DataHut: unable to store metadata value #{value.inspect}.", caller)
181
+ end
182
+ end
183
+
184
+ # retrieves previously stored metadata by key
185
+ #
186
+ # @param key [Symbol] to lookup the metadata by
187
+ # @return [Object] ruby object that was fetched
188
+ def fetch_meta(key)
189
+ key = key.to_s if key.instance_of?(Symbol)
190
+ begin
191
+ r = @db[:data_warehouse_meta].where(key: key).first
192
+ value = r[:value] unless r.nil?
193
+ value = Marshal.load(value) unless value.nil?
194
+ rescue Exception => e
195
+ raise(ArgumentError, "DataHut: unable to fetch metadata key #{key}.", caller)
196
+ end
197
+ value
198
+ end
199
+
164
200
  private
165
201
 
166
202
  def initialize(name)
@@ -173,6 +209,15 @@ module DataHut
173
209
  column :dw_processed, TrueClass, :null => false, :default => false
174
210
  end
175
211
  end
212
+
213
+ unless @db.table_exists?(:data_warehouse_meta)
214
+ @db.create_table(:data_warehouse_meta) do
215
+ primary_key :dw_id
216
+ String :key
217
+ index :key
218
+ blob :value
219
+ end
220
+ end
176
221
  end
177
222
 
178
223
  def store(r)
@@ -1,3 +1,3 @@
1
1
  module DataHut
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
@@ -36,12 +36,14 @@ unless File.exists?("lolstats.db")
36
36
  r.name = champion_page.css('div.page_header_text').text
37
37
 
38
38
  st = champion_page.css('table.stats_table')
39
- names = st.css('td.stats_name').collect{|e| e.text.strip}
39
+ names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
40
40
  values = st.css('td.stats_value').collect{|e| e.text.strip}
41
41
  modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
42
42
 
43
+ dh.store_meta(:stats, names)
44
+
43
45
  (0..names.count-1).collect do |i|
44
- stat = (names[i].downcase.gsub(/ /,'_') << "=").to_sym
46
+ stat = (names[i] + "=").to_sym
45
47
  r.send(stat, values[i].to_f)
46
48
  stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
47
49
  per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
@@ -60,18 +62,25 @@ unless File.exists?("lolstats.db")
60
62
  puts "done."
61
63
  end
62
64
 
65
+ # connect again in case extract was skipped because the core data already exists:
63
66
  dh = DataHut.connect("lolstats")
64
67
 
68
+ # instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
69
+ def total_stat(r,stat)
70
+ total_stat = ("total_" + stat + "=").to_sym
71
+ stat_per_level = r.send((stat + "_per_level").to_sym)
72
+ base = r.send(stat.to_sym)
73
+ total = base + (stat_per_level * 18.0)
74
+ r.send(total_stat, total)
75
+ end
76
+ # we need to fetch metadata that was written during extract (potentially in a previous process run)
77
+ stats = dh.fetch_meta(:stats)
78
+
65
79
  puts "first transform"
66
80
  dh.transform do |r|
67
- r.total_damage = r.damage + (r.damage_per_level * 18.0)
68
- r.total_health = r.health + (r.health_per_level * 18.0)
69
- r.total_mana = r.mana + (r.mana_per_level * 18.0)
70
- r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
71
- r.total_armor = r.armor + (r.armor_per_level * 18.0)
72
- r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
73
- r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
74
- r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
81
+ stats.each do |stat|
82
+ total_stat(r,stat)
83
+ end
75
84
  print '.'
76
85
  end
77
86
 
@@ -1,5 +1,16 @@
1
1
  require_relative File.join(*%w[.. test_helper])
2
2
 
3
+ class Foo
4
+ attr_accessor :bar
5
+
6
+ def initialize
7
+ @time = DateTime.now
8
+ end
9
+
10
+ def what
11
+ puts "say what?"
12
+ end
13
+ end
3
14
 
4
15
  describe DataHut do
5
16
  def teardown
@@ -139,9 +150,6 @@ describe DataHut do
139
150
 
140
151
  describe "nice usage" do
141
152
 
142
- class Foo
143
- end
144
-
145
153
  it "should provide logging services to see or debug underlying Sequel" do
146
154
  dh = DataHut.connect("foo")
147
155
 
@@ -174,5 +182,43 @@ describe DataHut do
174
182
 
175
183
  end
176
184
 
185
+
186
+ describe "support adding and retrieving possibly useful metadata" do
187
+
188
+ it "should store and retrieve metadata" do
189
+ dh = DataHut.connect("foo")
190
+
191
+ val1 = "wizard"
192
+ val2 = ["larry", "steve", "barney"]
193
+ val3 = {one: "for the money", two: "for the show"}
194
+ val4 = Foo.new
195
+
196
+ dh.store_meta(:harry, val1)
197
+ dh.store_meta(:users, val2)
198
+ dh.store_meta(:my_little_hash, val3)
199
+ dh.store_meta(:an_object, val4)
200
+
201
+ assert_equal val1, dh.fetch_meta(:harry)
202
+ assert_equal val2, dh.fetch_meta(:users)
203
+ assert_equal val3, dh.fetch_meta(:my_little_hash)
204
+
205
+ assert_raises(MiniTest::Assertion) do
206
+ assert_equal val4, dh.fetch_meta(:an_object)
207
+ end
208
+
209
+ assert_equal nil, dh.fetch_meta(:not_there)
210
+
211
+ val5 = "muggle"
212
+ dh.store_meta(:harry, val5)
213
+ assert_equal val5, dh.fetch_meta(:harry)
214
+
215
+ val6 = nil
216
+ dh.store_meta(:harry, val6)
217
+ assert_equal val6, dh.fetch_meta(:harry)
218
+
219
+ end
220
+
221
+ end
222
+
177
223
  end
178
224
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_hut
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: