data_hut 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +11 -0
- data/README.md +28 -15
- data/lib/data_hut/data_warehouse.rb +45 -0
- data/lib/data_hut/version.rb +1 -1
- data/samples/league_of_legends.rb +19 -10
- data/test/spec/basic_test.rb +49 -3
- metadata +1 -1
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.0.7
|
4
|
+
|
5
|
+
* added capability to store and fetch arbitrary metadata from the DataHut.
|
6
|
+
|
7
|
+
This is useful in the case motivated by the samples/league_of_legends.rb:
|
8
|
+
stat name is known at initial extract time, however
|
9
|
+
subsequent transform runs may or may not have any transient variables for stat names... hence the metadata needs to be stored
|
10
|
+
somewhere for future transform processing.
|
11
|
+
note: stat name is not of the same cardinality as the data records themselves, so it is truly metadata that governs how the records
|
12
|
+
are understood.
|
13
|
+
|
3
14
|
## 0.0.6
|
4
15
|
|
5
16
|
* externalized the Sequel database logger so that it can be set by DataHut clients. See DataHut::DataWarehouse#logger=
|
data/README.md
CHANGED
@@ -90,6 +90,7 @@ Read more about the [Sequel gem](http://sequel.rubyforge.org/rdoc/files/README_r
|
|
90
90
|
|
91
91
|
Taking a popular game like League of Legends and hand-rolling some simple analysis of the champions...
|
92
92
|
|
93
|
+
require 'data_hut'
|
93
94
|
require 'nokogiri'
|
94
95
|
require 'open-uri'
|
95
96
|
require 'pry'
|
@@ -121,12 +122,14 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
|
|
121
122
|
r.name = champion_page.css('div.page_header_text').text
|
122
123
|
|
123
124
|
st = champion_page.css('table.stats_table')
|
124
|
-
names = st.css('td.stats_name').collect{|e| e.text.strip}
|
125
|
+
names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
|
125
126
|
values = st.css('td.stats_value').collect{|e| e.text.strip}
|
126
127
|
modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
|
127
128
|
|
129
|
+
dh.store_meta(:stats, names)
|
130
|
+
|
128
131
|
(0..names.count-1).collect do |i|
|
129
|
-
stat = (names[i]
|
132
|
+
stat = (names[i] + "=").to_sym
|
130
133
|
r.send(stat, values[i].to_f)
|
131
134
|
stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
|
132
135
|
per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
|
@@ -145,18 +148,25 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
|
|
145
148
|
puts "done."
|
146
149
|
end
|
147
150
|
|
151
|
+
# connect again in case extract was skipped because the core data already exists:
|
148
152
|
dh = DataHut.connect("lolstats")
|
149
153
|
|
154
|
+
# instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
|
155
|
+
def total_stat(r,stat)
|
156
|
+
total_stat = ("total_" + stat + "=").to_sym
|
157
|
+
stat_per_level = r.send((stat + "_per_level").to_sym)
|
158
|
+
base = r.send(stat.to_sym)
|
159
|
+
total = base + (stat_per_level * 18.0)
|
160
|
+
r.send(total_stat, total)
|
161
|
+
end
|
162
|
+
# we need to fetch metadata that was written during extract (potentially in a previous process run)
|
163
|
+
stats = dh.fetch_meta(:stats)
|
164
|
+
|
150
165
|
puts "first transform"
|
151
166
|
dh.transform do |r|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
|
156
|
-
r.total_armor = r.armor + (r.armor_per_level * 18.0)
|
157
|
-
r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
|
158
|
-
r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
|
159
|
-
r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
|
167
|
+
stats.each do |stat|
|
168
|
+
total_stat(r,stat)
|
169
|
+
end
|
160
170
|
print '.'
|
161
171
|
end
|
162
172
|
|
@@ -184,16 +194,17 @@ Now that we have some data, lets play...
|
|
184
194
|
|
185
195
|
* who has the most base damage?
|
186
196
|
|
187
|
-
[
|
197
|
+
[1] pry(main)> ds.order(Sequel.desc(:damage)).limit(5).collect{|c| {c.name => c.damage}}
|
188
198
|
=> [{"Taric"=>58.0},
|
189
199
|
{"Maokai"=>58.0},
|
190
200
|
{"Warwick"=>56.76},
|
191
201
|
{"Singed"=>56.65},
|
192
202
|
{"Poppy"=>56.3}]
|
193
203
|
|
204
|
+
|
194
205
|
* but wait a minute... what about at level 18? Fortunately, we've transformed our data to add some extra fields for this...
|
195
206
|
|
196
|
-
[
|
207
|
+
[2] pry(main)> ds.order(Sequel.desc(:total_damage)).limit(5).collect{|c| {c.name => c.total_damage}}
|
197
208
|
=> [{"Skarner"=>129.70000000000002},
|
198
209
|
{"Cho'Gath"=>129.70000000000002},
|
199
210
|
{"Kassadin"=>122.5},
|
@@ -203,7 +214,7 @@ Now that we have some data, lets play...
|
|
203
214
|
* how about using some of the indexes we defined above... like the 'nuke_index' (notice that the assumptions on what make a good
|
204
215
|
nuke are subjective, but that's the fun of it; we can model our assumptions and see how the data changes in response.)
|
205
216
|
|
206
|
-
[
|
217
|
+
[3] pry(main)> ds.order(Sequel.desc(:nuke_index)).limit(5).collect{|c| {c.name => [c.total_damage, c.total_move_speed, c.total_mana, c.ability_power]}}
|
207
218
|
=> [{"Karthus"=>[100.7, 335.0, 1368.0, 10]},
|
208
219
|
{"Morgana"=>[114.58, 335.0, 1320.0, 9]},
|
209
220
|
{"Ryze"=>[106.0, 335.0, 1240.0, 10]},
|
@@ -214,14 +225,16 @@ I must have hit close to the mark, because personally I hate each of these champ
|
|
214
225
|
|
215
226
|
* and (now I risk becoming addicted to datahut myself), here's some further guesses with an easy_nuke index:
|
216
227
|
|
217
|
-
[
|
228
|
+
[4] pry(main)> ds.order(Sequel.desc(:easy_nuke_index)).limit(5).collect{|c| c.name}
|
218
229
|
=> ["Sona", "Ryze", "Nasus", "Soraka", "Heimerdinger"]
|
219
230
|
|
220
231
|
* makes sense, but is still fascinating... what about my crack at a support_index?
|
221
232
|
|
222
|
-
[
|
233
|
+
[5] pry(main)> ds.order(Sequel.desc(:support_index)).limit(5).collect{|c| c.name}
|
223
234
|
=> ["Sion", "Diana", "Nunu", "Nautilus", "Amumu"]
|
224
235
|
|
236
|
+
|
237
|
+
|
225
238
|
You get the idea now! *Extract* your data from anywhere, *transform* it however you like and *analyze* it for insights!
|
226
239
|
|
227
240
|
Have fun!
|
@@ -161,6 +161,42 @@ module DataHut
|
|
161
161
|
@db.logger = logger
|
162
162
|
end
|
163
163
|
|
164
|
+
|
165
|
+
|
166
|
+
# stores metadata
|
167
|
+
#
|
168
|
+
# @param key [Symbol] to lookup the metadata by
|
169
|
+
# @param value [Object] ruby object to store
|
170
|
+
def store_meta(key, value)
|
171
|
+
key = key.to_s if key.instance_of?(Symbol)
|
172
|
+
begin
|
173
|
+
value = Sequel::SQL::Blob.new(Marshal.dump(value))
|
174
|
+
if (@db[:data_warehouse_meta].where(key: key).count > 0)
|
175
|
+
@db[:data_warehouse_meta].where(key: key).update(value: value)
|
176
|
+
else
|
177
|
+
@db[:data_warehouse_meta].insert(key: key, value: value)
|
178
|
+
end
|
179
|
+
rescue Exception => e
|
180
|
+
raise(ArgumentError, "DataHut: unable to store metadata value #{value.inspect}.", caller)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# retrieves previously stored metadata by key
|
185
|
+
#
|
186
|
+
# @param key [Symbol] to lookup the metadata by
|
187
|
+
# @return [Object] ruby object that was fetched
|
188
|
+
def fetch_meta(key)
|
189
|
+
key = key.to_s if key.instance_of?(Symbol)
|
190
|
+
begin
|
191
|
+
r = @db[:data_warehouse_meta].where(key: key).first
|
192
|
+
value = r[:value] unless r.nil?
|
193
|
+
value = Marshal.load(value) unless value.nil?
|
194
|
+
rescue Exception => e
|
195
|
+
raise(ArgumentError, "DataHut: unable to fetch metadata key #{key}.", caller)
|
196
|
+
end
|
197
|
+
value
|
198
|
+
end
|
199
|
+
|
164
200
|
private
|
165
201
|
|
166
202
|
def initialize(name)
|
@@ -173,6 +209,15 @@ module DataHut
|
|
173
209
|
column :dw_processed, TrueClass, :null => false, :default => false
|
174
210
|
end
|
175
211
|
end
|
212
|
+
|
213
|
+
unless @db.table_exists?(:data_warehouse_meta)
|
214
|
+
@db.create_table(:data_warehouse_meta) do
|
215
|
+
primary_key :dw_id
|
216
|
+
String :key
|
217
|
+
index :key
|
218
|
+
blob :value
|
219
|
+
end
|
220
|
+
end
|
176
221
|
end
|
177
222
|
|
178
223
|
def store(r)
|
data/lib/data_hut/version.rb
CHANGED
@@ -36,12 +36,14 @@ unless File.exists?("lolstats.db")
|
|
36
36
|
r.name = champion_page.css('div.page_header_text').text
|
37
37
|
|
38
38
|
st = champion_page.css('table.stats_table')
|
39
|
-
names = st.css('td.stats_name').collect{|e| e.text.strip}
|
39
|
+
names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
|
40
40
|
values = st.css('td.stats_value').collect{|e| e.text.strip}
|
41
41
|
modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
|
42
42
|
|
43
|
+
dh.store_meta(:stats, names)
|
44
|
+
|
43
45
|
(0..names.count-1).collect do |i|
|
44
|
-
stat = (names[i]
|
46
|
+
stat = (names[i] + "=").to_sym
|
45
47
|
r.send(stat, values[i].to_f)
|
46
48
|
stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
|
47
49
|
per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
|
@@ -60,18 +62,25 @@ unless File.exists?("lolstats.db")
|
|
60
62
|
puts "done."
|
61
63
|
end
|
62
64
|
|
65
|
+
# connect again in case extract was skipped because the core data already exists:
|
63
66
|
dh = DataHut.connect("lolstats")
|
64
67
|
|
68
|
+
# instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
|
69
|
+
def total_stat(r,stat)
|
70
|
+
total_stat = ("total_" + stat + "=").to_sym
|
71
|
+
stat_per_level = r.send((stat + "_per_level").to_sym)
|
72
|
+
base = r.send(stat.to_sym)
|
73
|
+
total = base + (stat_per_level * 18.0)
|
74
|
+
r.send(total_stat, total)
|
75
|
+
end
|
76
|
+
# we need to fetch metadata that was written during extract (potentially in a previous process run)
|
77
|
+
stats = dh.fetch_meta(:stats)
|
78
|
+
|
65
79
|
puts "first transform"
|
66
80
|
dh.transform do |r|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
|
71
|
-
r.total_armor = r.armor + (r.armor_per_level * 18.0)
|
72
|
-
r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
|
73
|
-
r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
|
74
|
-
r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
|
81
|
+
stats.each do |stat|
|
82
|
+
total_stat(r,stat)
|
83
|
+
end
|
75
84
|
print '.'
|
76
85
|
end
|
77
86
|
|
data/test/spec/basic_test.rb
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
require_relative File.join(*%w[.. test_helper])
|
2
2
|
|
3
|
+
class Foo
|
4
|
+
attr_accessor :bar
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@time = DateTime.now
|
8
|
+
end
|
9
|
+
|
10
|
+
def what
|
11
|
+
puts "say what?"
|
12
|
+
end
|
13
|
+
end
|
3
14
|
|
4
15
|
describe DataHut do
|
5
16
|
def teardown
|
@@ -139,9 +150,6 @@ describe DataHut do
|
|
139
150
|
|
140
151
|
describe "nice usage" do
|
141
152
|
|
142
|
-
class Foo
|
143
|
-
end
|
144
|
-
|
145
153
|
it "should provide logging services to see or debug underlying Sequel" do
|
146
154
|
dh = DataHut.connect("foo")
|
147
155
|
|
@@ -174,5 +182,43 @@ describe DataHut do
|
|
174
182
|
|
175
183
|
end
|
176
184
|
|
185
|
+
|
186
|
+
describe "support adding and retrieving possibly useful metadata" do
|
187
|
+
|
188
|
+
it "should store and retrieve metadata" do
|
189
|
+
dh = DataHut.connect("foo")
|
190
|
+
|
191
|
+
val1 = "wizard"
|
192
|
+
val2 = ["larry", "steve", "barney"]
|
193
|
+
val3 = {one: "for the money", two: "for the show"}
|
194
|
+
val4 = Foo.new
|
195
|
+
|
196
|
+
dh.store_meta(:harry, val1)
|
197
|
+
dh.store_meta(:users, val2)
|
198
|
+
dh.store_meta(:my_little_hash, val3)
|
199
|
+
dh.store_meta(:an_object, val4)
|
200
|
+
|
201
|
+
assert_equal val1, dh.fetch_meta(:harry)
|
202
|
+
assert_equal val2, dh.fetch_meta(:users)
|
203
|
+
assert_equal val3, dh.fetch_meta(:my_little_hash)
|
204
|
+
|
205
|
+
assert_raises(MiniTest::Assertion) do
|
206
|
+
assert_equal val4, dh.fetch_meta(:an_object)
|
207
|
+
end
|
208
|
+
|
209
|
+
assert_equal nil, dh.fetch_meta(:not_there)
|
210
|
+
|
211
|
+
val5 = "muggle"
|
212
|
+
dh.store_meta(:harry, val5)
|
213
|
+
assert_equal val5, dh.fetch_meta(:harry)
|
214
|
+
|
215
|
+
val6 = nil
|
216
|
+
dh.store_meta(:harry, val6)
|
217
|
+
assert_equal val6, dh.fetch_meta(:harry)
|
218
|
+
|
219
|
+
end
|
220
|
+
|
221
|
+
end
|
222
|
+
|
177
223
|
end
|
178
224
|
|