data_hut 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +11 -0
- data/README.md +28 -15
- data/lib/data_hut/data_warehouse.rb +45 -0
- data/lib/data_hut/version.rb +1 -1
- data/samples/league_of_legends.rb +19 -10
- data/test/spec/basic_test.rb +49 -3
- metadata +1 -1
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.0.7
|
4
|
+
|
5
|
+
* added capability to store and fetch arbitrary metadata from the DataHut.
|
6
|
+
|
7
|
+
This is useful in the case motivated by the samples/league_of_legends.rb:
|
8
|
+
stat name is known at initial extract time, however
|
9
|
+
subsequent transform runs may or may not have any transient variables for stat names... hence the metadata needs to be stored
|
10
|
+
somewhere for future transform processing.
|
11
|
+
note: stat name is not of the same cardinality as the data records themselves, so it is truly metadata that governs how the records
|
12
|
+
are understood.
|
13
|
+
|
3
14
|
## 0.0.6
|
4
15
|
|
5
16
|
* externalized the Sequel database logger so that it can be set by DataHut clients. See DataHut::DataWarehouse#logger=
|
data/README.md
CHANGED
@@ -90,6 +90,7 @@ Read more about the [Sequel gem](http://sequel.rubyforge.org/rdoc/files/README_r
|
|
90
90
|
|
91
91
|
Taking a popular game like League of Legends and hand-rolling some simple analysis of the champions...
|
92
92
|
|
93
|
+
require 'data_hut'
|
93
94
|
require 'nokogiri'
|
94
95
|
require 'open-uri'
|
95
96
|
require 'pry'
|
@@ -121,12 +122,14 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
|
|
121
122
|
r.name = champion_page.css('div.page_header_text').text
|
122
123
|
|
123
124
|
st = champion_page.css('table.stats_table')
|
124
|
-
names = st.css('td.stats_name').collect{|e| e.text.strip}
|
125
|
+
names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
|
125
126
|
values = st.css('td.stats_value').collect{|e| e.text.strip}
|
126
127
|
modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
|
127
128
|
|
129
|
+
dh.store_meta(:stats, names)
|
130
|
+
|
128
131
|
(0..names.count-1).collect do |i|
|
129
|
-
stat = (names[i]
|
132
|
+
stat = (names[i] + "=").to_sym
|
130
133
|
r.send(stat, values[i].to_f)
|
131
134
|
stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
|
132
135
|
per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
|
@@ -145,18 +148,25 @@ Taking a popular game like League of Legends and hand-rolling some simple analys
|
|
145
148
|
puts "done."
|
146
149
|
end
|
147
150
|
|
151
|
+
# connect again in case extract was skipped because the core data already exists:
|
148
152
|
dh = DataHut.connect("lolstats")
|
149
153
|
|
154
|
+
# instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
|
155
|
+
def total_stat(r,stat)
|
156
|
+
total_stat = ("total_" + stat + "=").to_sym
|
157
|
+
stat_per_level = r.send((stat + "_per_level").to_sym)
|
158
|
+
base = r.send(stat.to_sym)
|
159
|
+
total = base + (stat_per_level * 18.0)
|
160
|
+
r.send(total_stat, total)
|
161
|
+
end
|
162
|
+
# we need to fetch metadata that was written during extract (potentially in a previous process run)
|
163
|
+
stats = dh.fetch_meta(:stats)
|
164
|
+
|
150
165
|
puts "first transform"
|
151
166
|
dh.transform do |r|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
|
156
|
-
r.total_armor = r.armor + (r.armor_per_level * 18.0)
|
157
|
-
r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
|
158
|
-
r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
|
159
|
-
r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
|
167
|
+
stats.each do |stat|
|
168
|
+
total_stat(r,stat)
|
169
|
+
end
|
160
170
|
print '.'
|
161
171
|
end
|
162
172
|
|
@@ -184,16 +194,17 @@ Now that we have some data, lets play...
|
|
184
194
|
|
185
195
|
* who has the most base damage?
|
186
196
|
|
187
|
-
[
|
197
|
+
[1] pry(main)> ds.order(Sequel.desc(:damage)).limit(5).collect{|c| {c.name => c.damage}}
|
188
198
|
=> [{"Taric"=>58.0},
|
189
199
|
{"Maokai"=>58.0},
|
190
200
|
{"Warwick"=>56.76},
|
191
201
|
{"Singed"=>56.65},
|
192
202
|
{"Poppy"=>56.3}]
|
193
203
|
|
204
|
+
|
194
205
|
* but wait a minute... what about at level 18? Fortunately, we've transformed our data to add some extra fields for this...
|
195
206
|
|
196
|
-
[
|
207
|
+
[2] pry(main)> ds.order(Sequel.desc(:total_damage)).limit(5).collect{|c| {c.name => c.total_damage}}
|
197
208
|
=> [{"Skarner"=>129.70000000000002},
|
198
209
|
{"Cho'Gath"=>129.70000000000002},
|
199
210
|
{"Kassadin"=>122.5},
|
@@ -203,7 +214,7 @@ Now that we have some data, lets play...
|
|
203
214
|
* how about using some of the indexes we defined above... like the 'nuke_index' (notice that the assumptions on what make a good
|
204
215
|
nuke are subjective, but that's the fun of it; we can model our assumptions and see how the data changes in response.)
|
205
216
|
|
206
|
-
[
|
217
|
+
[3] pry(main)> ds.order(Sequel.desc(:nuke_index)).limit(5).collect{|c| {c.name => [c.total_damage, c.total_move_speed, c.total_mana, c.ability_power]}}
|
207
218
|
=> [{"Karthus"=>[100.7, 335.0, 1368.0, 10]},
|
208
219
|
{"Morgana"=>[114.58, 335.0, 1320.0, 9]},
|
209
220
|
{"Ryze"=>[106.0, 335.0, 1240.0, 10]},
|
@@ -214,14 +225,16 @@ I must have hit close to the mark, because personally I hate each of these champ
|
|
214
225
|
|
215
226
|
* and (now I risk becoming addicted to datahut myself), here's some further guesses with an easy_nuke index:
|
216
227
|
|
217
|
-
[
|
228
|
+
[4] pry(main)> ds.order(Sequel.desc(:easy_nuke_index)).limit(5).collect{|c| c.name}
|
218
229
|
=> ["Sona", "Ryze", "Nasus", "Soraka", "Heimerdinger"]
|
219
230
|
|
220
231
|
* makes sense, but is still fascinating... what about my crack at a support_index?
|
221
232
|
|
222
|
-
[
|
233
|
+
[5] pry(main)> ds.order(Sequel.desc(:support_index)).limit(5).collect{|c| c.name}
|
223
234
|
=> ["Sion", "Diana", "Nunu", "Nautilus", "Amumu"]
|
224
235
|
|
236
|
+
|
237
|
+
|
225
238
|
You get the idea now! *Extract* your data from anywhere, *transform* it however you like and *analyze* it for insights!
|
226
239
|
|
227
240
|
Have fun!
|
@@ -161,6 +161,42 @@ module DataHut
|
|
161
161
|
@db.logger = logger
|
162
162
|
end
|
163
163
|
|
164
|
+
|
165
|
+
|
166
|
+
# stores metadata
|
167
|
+
#
|
168
|
+
# @param key [Symbol] to lookup the metadata by
|
169
|
+
# @param value [Object] ruby object to store
|
170
|
+
def store_meta(key, value)
|
171
|
+
key = key.to_s if key.instance_of?(Symbol)
|
172
|
+
begin
|
173
|
+
value = Sequel::SQL::Blob.new(Marshal.dump(value))
|
174
|
+
if (@db[:data_warehouse_meta].where(key: key).count > 0)
|
175
|
+
@db[:data_warehouse_meta].where(key: key).update(value: value)
|
176
|
+
else
|
177
|
+
@db[:data_warehouse_meta].insert(key: key, value: value)
|
178
|
+
end
|
179
|
+
rescue Exception => e
|
180
|
+
raise(ArgumentError, "DataHut: unable to store metadata value #{value.inspect}.", caller)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# retrieves previously stored metadata by key
|
185
|
+
#
|
186
|
+
# @param key [Symbol] to lookup the metadata by
|
187
|
+
# @return [Object] ruby object that was fetched
|
188
|
+
def fetch_meta(key)
|
189
|
+
key = key.to_s if key.instance_of?(Symbol)
|
190
|
+
begin
|
191
|
+
r = @db[:data_warehouse_meta].where(key: key).first
|
192
|
+
value = r[:value] unless r.nil?
|
193
|
+
value = Marshal.load(value) unless value.nil?
|
194
|
+
rescue Exception => e
|
195
|
+
raise(ArgumentError, "DataHut: unable to fetch metadata key #{key}.", caller)
|
196
|
+
end
|
197
|
+
value
|
198
|
+
end
|
199
|
+
|
164
200
|
private
|
165
201
|
|
166
202
|
def initialize(name)
|
@@ -173,6 +209,15 @@ module DataHut
|
|
173
209
|
column :dw_processed, TrueClass, :null => false, :default => false
|
174
210
|
end
|
175
211
|
end
|
212
|
+
|
213
|
+
unless @db.table_exists?(:data_warehouse_meta)
|
214
|
+
@db.create_table(:data_warehouse_meta) do
|
215
|
+
primary_key :dw_id
|
216
|
+
String :key
|
217
|
+
index :key
|
218
|
+
blob :value
|
219
|
+
end
|
220
|
+
end
|
176
221
|
end
|
177
222
|
|
178
223
|
def store(r)
|
data/lib/data_hut/version.rb
CHANGED
@@ -36,12 +36,14 @@ unless File.exists?("lolstats.db")
|
|
36
36
|
r.name = champion_page.css('div.page_header_text').text
|
37
37
|
|
38
38
|
st = champion_page.css('table.stats_table')
|
39
|
-
names = st.css('td.stats_name').collect{|e| e.text.strip}
|
39
|
+
names = st.css('td.stats_name').collect{|e| e.text.strip.downcase.gsub(/ /,'_')}
|
40
40
|
values = st.css('td.stats_value').collect{|e| e.text.strip}
|
41
41
|
modifiers = st.css('td.stats_modifier').collect{|e| e.text.strip}
|
42
42
|
|
43
|
+
dh.store_meta(:stats, names)
|
44
|
+
|
43
45
|
(0..names.count-1).collect do |i|
|
44
|
-
stat = (names[i]
|
46
|
+
stat = (names[i] + "=").to_sym
|
45
47
|
r.send(stat, values[i].to_f)
|
46
48
|
stat_per_level = (names[i].downcase.gsub(/ /,'_') << "_per_level=").to_sym
|
47
49
|
per_level_value = modifiers[i].match(/\+([\d\.]+)/)[1].to_f rescue 0
|
@@ -60,18 +62,25 @@ unless File.exists?("lolstats.db")
|
|
60
62
|
puts "done."
|
61
63
|
end
|
62
64
|
|
65
|
+
# connect again in case extract was skipped because the core data already exists:
|
63
66
|
dh = DataHut.connect("lolstats")
|
64
67
|
|
68
|
+
# instead of writing out each stat line manually, we can use some metaprogramming along with some metadata to automate this.
|
69
|
+
def total_stat(r,stat)
|
70
|
+
total_stat = ("total_" + stat + "=").to_sym
|
71
|
+
stat_per_level = r.send((stat + "_per_level").to_sym)
|
72
|
+
base = r.send(stat.to_sym)
|
73
|
+
total = base + (stat_per_level * 18.0)
|
74
|
+
r.send(total_stat, total)
|
75
|
+
end
|
76
|
+
# we need to fetch metadata that was written during extract (potentially in a previous process run)
|
77
|
+
stats = dh.fetch_meta(:stats)
|
78
|
+
|
65
79
|
puts "first transform"
|
66
80
|
dh.transform do |r|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
r.total_move_speed = r.move_speed + (r.move_speed_per_level * 18.0)
|
71
|
-
r.total_armor = r.armor + (r.armor_per_level * 18.0)
|
72
|
-
r.total_spell_block = r.spell_block + (r.spell_block_per_level * 18.0)
|
73
|
-
r.total_health_regen = r.health_regen + (r.health_regen_per_level * 18.0)
|
74
|
-
r.total_mana_regen = r.mana_regen + (r.mana_regen_per_level * 18.0)
|
81
|
+
stats.each do |stat|
|
82
|
+
total_stat(r,stat)
|
83
|
+
end
|
75
84
|
print '.'
|
76
85
|
end
|
77
86
|
|
data/test/spec/basic_test.rb
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
require_relative File.join(*%w[.. test_helper])
|
2
2
|
|
3
|
+
class Foo
|
4
|
+
attr_accessor :bar
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@time = DateTime.now
|
8
|
+
end
|
9
|
+
|
10
|
+
def what
|
11
|
+
puts "say what?"
|
12
|
+
end
|
13
|
+
end
|
3
14
|
|
4
15
|
describe DataHut do
|
5
16
|
def teardown
|
@@ -139,9 +150,6 @@ describe DataHut do
|
|
139
150
|
|
140
151
|
describe "nice usage" do
|
141
152
|
|
142
|
-
class Foo
|
143
|
-
end
|
144
|
-
|
145
153
|
it "should provide logging services to see or debug underlying Sequel" do
|
146
154
|
dh = DataHut.connect("foo")
|
147
155
|
|
@@ -174,5 +182,43 @@ describe DataHut do
|
|
174
182
|
|
175
183
|
end
|
176
184
|
|
185
|
+
|
186
|
+
describe "support adding and retrieving possibly useful metadata" do
|
187
|
+
|
188
|
+
it "should store and retrieve metadata" do
|
189
|
+
dh = DataHut.connect("foo")
|
190
|
+
|
191
|
+
val1 = "wizard"
|
192
|
+
val2 = ["larry", "steve", "barney"]
|
193
|
+
val3 = {one: "for the money", two: "for the show"}
|
194
|
+
val4 = Foo.new
|
195
|
+
|
196
|
+
dh.store_meta(:harry, val1)
|
197
|
+
dh.store_meta(:users, val2)
|
198
|
+
dh.store_meta(:my_little_hash, val3)
|
199
|
+
dh.store_meta(:an_object, val4)
|
200
|
+
|
201
|
+
assert_equal val1, dh.fetch_meta(:harry)
|
202
|
+
assert_equal val2, dh.fetch_meta(:users)
|
203
|
+
assert_equal val3, dh.fetch_meta(:my_little_hash)
|
204
|
+
|
205
|
+
assert_raises(MiniTest::Assertion) do
|
206
|
+
assert_equal val4, dh.fetch_meta(:an_object)
|
207
|
+
end
|
208
|
+
|
209
|
+
assert_equal nil, dh.fetch_meta(:not_there)
|
210
|
+
|
211
|
+
val5 = "muggle"
|
212
|
+
dh.store_meta(:harry, val5)
|
213
|
+
assert_equal val5, dh.fetch_meta(:harry)
|
214
|
+
|
215
|
+
val6 = nil
|
216
|
+
dh.store_meta(:harry, val6)
|
217
|
+
assert_equal val6, dh.fetch_meta(:harry)
|
218
|
+
|
219
|
+
end
|
220
|
+
|
221
|
+
end
|
222
|
+
|
177
223
|
end
|
178
224
|
|