greenmonster 0.5.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/LICENSE.txt +21 -0
- data/README.markdown +11 -28
- data/Rakefile +3 -8
- data/doc/Gemfile.html +6 -24
- data/doc/Greenmonster/Spider.html +47 -258
- data/doc/Greenmonster.html +25 -86
- data/doc/Rakefile.html +10 -25
- data/doc/created.rid +12 -18
- data/doc/index.html +6 -24
- data/doc/js/search_index.js +1 -1
- data/doc/table_of_contents.html +12 -129
- data/greenmonster.gemspec +16 -13
- data/lib/greenmonster/day_spider.rb +61 -0
- data/lib/greenmonster/file_downloader.rb +38 -0
- data/lib/greenmonster/game_spider.rb +100 -0
- data/lib/greenmonster/innings_downloader.rb +41 -0
- data/lib/greenmonster/spider.rb +1 -0
- data/lib/greenmonster/version.rb +2 -2
- data/lib/greenmonster.rb +16 -82
- data/spec/cassettes/aaa/year_2015/month_05/day_09.yml +153 -0
- data/spec/cassettes/gid_2007_10_01_sdnmlb_colmlb_1/innings.yml +135 -0
- data/spec/cassettes/gid_2015_04_18_balmlb_bos_mlb_1/players_xml.yml +292 -0
- data/spec/cassettes/gid_2015_04_18_balmlb_bos_mlb_2/players_xml.yml +46 -0
- data/spec/cassettes/mlb/year_2015/month_04/day_18.yml +170 -0
- data/spec/day_spider_spec.rb +80 -0
- data/spec/file_downloader_spec.rb +51 -0
- data/spec/fixtures/aaa_players.xml +70 -0
- data/spec/fixtures/boxscore.xml +1147 -0
- data/spec/fixtures/inning_all.xml +1 -0
- data/spec/fixtures/inning_hit.xml +1 -0
- data/spec/fixtures/linescore.xml +131 -0
- data/spec/fixtures/players.xml +63 -0
- data/spec/game_spider_spec.rb +116 -0
- data/spec/greenmonster_spec.rb +22 -34
- data/spec/innings_downloader_spec.rb +44 -0
- data/spec/spec_helper.rb +13 -21
- data/spec/support/path_helpers.rb +21 -0
- data/spec/support/vcr.rb +11 -0
- metadata +91 -67
- data/doc/Athlete.html +0 -254
- data/doc/Greenmonster/Generators/InstallGenerator.html +0 -253
- data/doc/Greenmonster/Generators.html +0 -147
- data/doc/Greenmonster/Parser.html +0 -263
- data/doc/Greenmonster/Player.html +0 -236
- data/doc/GreenmonsterPlayerTest.html +0 -199
- data/doc/GreenmonsterSpiderTest.html +0 -668
- data/doc/GreenmonsterTest.html +0 -262
- data/doc/GreenmonsterTraversalTest.html +0 -376
- data/doc/InstallGreenmonster.html +0 -260
- data/doc/TestCreatePlayersFromGamedayXMLGame.html +0 -266
- data/doc/TestParsePlayersFromGamedayXMLFiles.html +0 -307
- data/spec/games/tst/year_2012/month_03/day_27/gid_2012_03_27_aaamlb_aabmlb_1/blank.txt +0 -0
- data/spec/games/tst/year_2012/month_03/day_27/gid_2012_03_27_aaamlb_aabmlb_1_bak/blank.txt +0 -0
- data/spec/games/tst/year_2012/month_03/day_27/not_2012_03_27_aaamlb_aabmlb_1/blank.txt +0 -0
- data/spec/greenmonster/spider_spec.rb +0 -85
@@ -47,7 +47,6 @@
|
|
47
47
|
<nav id="file-list-section" class="section">
|
48
48
|
<h3 class="section-header">Defined In</h3>
|
49
49
|
<ul>
|
50
|
-
<li>lib/greenmonster/greenmonster.rb
|
51
50
|
<li>lib/greenmonster/spider.rb
|
52
51
|
</ul>
|
53
52
|
</nav>
|
@@ -83,17 +82,11 @@
|
|
83
82
|
|
84
83
|
<ul class="link-list">
|
85
84
|
|
86
|
-
<li><a href="#method-
|
85
|
+
<li><a href="#method-i-pull_day">#pull_day</a>
|
87
86
|
|
88
|
-
<li><a href="#method-
|
87
|
+
<li><a href="#method-i-pull_days">#pull_days</a>
|
89
88
|
|
90
|
-
<li><a href="#method-
|
91
|
-
|
92
|
-
<li><a href="#method-c-pull_day">::pull_day</a>
|
93
|
-
|
94
|
-
<li><a href="#method-c-pull_days">::pull_days</a>
|
95
|
-
|
96
|
-
<li><a href="#method-c-pull_game">::pull_game</a>
|
89
|
+
<li><a href="#method-i-pull_game">#pull_game</a>
|
97
90
|
|
98
91
|
</ul>
|
99
92
|
</nav>
|
@@ -110,6 +103,12 @@
|
|
110
103
|
|
111
104
|
<li class="file"><a href="../Rakefile.html">Rakefile</a>
|
112
105
|
|
106
|
+
<li class="file"><a href="../spec/games/tst/year_2012/month_03/day_27/gid_2012_03_27_aaamlb_aabmlb_1/blank_txt.html">blank</a>
|
107
|
+
|
108
|
+
<li class="file"><a href="../spec/games/tst/year_2012/month_03/day_27/gid_2012_03_27_aaamlb_aabmlb_1_bak/blank_txt.html">blank</a>
|
109
|
+
|
110
|
+
<li class="file"><a href="../spec/games/tst/year_2012/month_03/day_27/not_2012_03_27_aaamlb_aabmlb_1/blank_txt.html">blank</a>
|
111
|
+
|
113
112
|
</ul>
|
114
113
|
</nav>
|
115
114
|
|
@@ -120,32 +119,8 @@
|
|
120
119
|
|
121
120
|
<li><a href="../Greenmonster.html">Greenmonster</a>
|
122
121
|
|
123
|
-
<li><a href="../Greenmonster/Generators.html">Greenmonster::Generators</a>
|
124
|
-
|
125
|
-
<li><a href="../Greenmonster/Generators/InstallGenerator.html">Greenmonster::Generators::InstallGenerator</a>
|
126
|
-
|
127
|
-
<li><a href="../Greenmonster/Parser.html">Greenmonster::Parser</a>
|
128
|
-
|
129
|
-
<li><a href="../Greenmonster/Player.html">Greenmonster::Player</a>
|
130
|
-
|
131
122
|
<li><a href="../Greenmonster/Spider.html">Greenmonster::Spider</a>
|
132
123
|
|
133
|
-
<li><a href="../Athlete.html">Athlete</a>
|
134
|
-
|
135
|
-
<li><a href="../GreenmonsterPlayerTest.html">GreenmonsterPlayerTest</a>
|
136
|
-
|
137
|
-
<li><a href="../GreenmonsterSpiderTest.html">GreenmonsterSpiderTest</a>
|
138
|
-
|
139
|
-
<li><a href="../GreenmonsterTest.html">GreenmonsterTest</a>
|
140
|
-
|
141
|
-
<li><a href="../GreenmonsterTraversalTest.html">GreenmonsterTraversalTest</a>
|
142
|
-
|
143
|
-
<li><a href="../InstallGreenmonster.html">InstallGreenmonster</a>
|
144
|
-
|
145
|
-
<li><a href="../TestCreatePlayersFromGamedayXMLGame.html">TestCreatePlayersFromGamedayXMLGame</a>
|
146
|
-
|
147
|
-
<li><a href="../TestParsePlayersFromGamedayXMLFiles.html">TestParsePlayersFromGamedayXMLFiles</a>
|
148
|
-
|
149
124
|
</ul>
|
150
125
|
</nav>
|
151
126
|
|
@@ -175,158 +150,24 @@
|
|
175
150
|
|
176
151
|
<!-- Methods -->
|
177
152
|
|
178
|
-
<section id="public-
|
179
|
-
<h3 class="section-header">Public
|
180
|
-
|
181
|
-
|
182
|
-
<div id="method-c-copyGameDayXML" class="method-detail ">
|
183
|
-
|
184
|
-
<div class="method-heading">
|
185
|
-
<span class="method-name">copyGameDayXML</span><span
|
186
|
-
class="method-args">(file_name,location,paths)</span>
|
187
|
-
<span class="method-click-advice">click to toggle source</span>
|
188
|
-
</div>
|
189
|
-
|
190
|
-
|
191
|
-
<div class="method-description">
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
<div class="method-source-code" id="copyGameDayXML-source">
|
198
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/greenmonster.rb, line 8</span>
|
199
|
-
<span class="ruby-keyword">def</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">copyGameDayXML</span> (<span class="ruby-identifier">file_name</span>,<span class="ruby-identifier">location</span>,<span class="ruby-identifier">paths</span>)
|
200
|
-
<span class="ruby-identifier">open</span>(<span class="ruby-identifier">paths</span>[<span class="ruby-value">:localGameFolder</span>] <span class="ruby-operator">+</span> <span class="ruby-node">"#{file_name =~ /inning/ ? 'inning/' : ''}"</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">file_name</span>, <span class="ruby-string">'w'</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file</span><span class="ruby-operator">|</span>
|
201
|
-
<span class="ruby-identifier">file</span>.<span class="ruby-identifier">write</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">paths</span>[<span class="ruby-value">:mlbGameFolder</span>] <span class="ruby-operator">+</span> <span class="ruby-node">"#{file_name =~ /inning/ ? 'inning/' : ''}"</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">file_name</span>).<span class="ruby-identifier">body</span>)
|
202
|
-
<span class="ruby-keyword">end</span>
|
203
|
-
<span class="ruby-keyword">end</span></pre>
|
204
|
-
</div><!-- copyGameDayXML-source -->
|
205
|
-
|
206
|
-
</div>
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
</div><!-- copyGameDayXML-method -->
|
153
|
+
<section id="public-instance-5Buntitled-5D-method-details" class="method-section section">
|
154
|
+
<h3 class="section-header">Public Instance Methods</h3>
|
212
155
|
|
213
156
|
|
214
|
-
<div id="method-
|
215
|
-
|
216
|
-
<div class="method-heading">
|
217
|
-
<span class="method-name">format_date_as_folder</span><span
|
218
|
-
class="method-args">(date)</span>
|
219
|
-
<span class="method-click-advice">click to toggle source</span>
|
220
|
-
</div>
|
221
|
-
|
222
|
-
|
223
|
-
<div class="method-description">
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
<div class="method-source-code" id="format_date_as_folder-source">
|
230
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/greenmonster.rb, line 14</span>
|
231
|
-
<span class="ruby-keyword">def</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">format_date_as_folder</span>(<span class="ruby-identifier">date</span>)
|
232
|
-
<span class="ruby-identifier">date</span>.<span class="ruby-identifier">strftime</span>(<span class="ruby-string">"year_%Y/month_%m/day_%d"</span>)
|
233
|
-
<span class="ruby-keyword">end</span></pre>
|
234
|
-
</div><!-- format_date_as_folder-source -->
|
235
|
-
|
236
|
-
</div>
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
</div><!-- format_date_as_folder-method -->
|
242
|
-
|
243
|
-
|
244
|
-
<div id="method-c-pull" class="method-detail ">
|
245
|
-
|
246
|
-
<div class="method-heading">
|
247
|
-
<span class="method-name">pull</span><span
|
248
|
-
class="method-args">(date = Date.today,args = {})</span>
|
249
|
-
<span class="method-click-advice">click to toggle source</span>
|
250
|
-
</div>
|
251
|
-
|
252
|
-
|
253
|
-
<div class="method-description">
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
<div class="method-source-code" id="pull-source">
|
260
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/greenmonster.rb, line 18</span>
|
261
|
-
<span class="ruby-keyword">def</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">pull</span>(<span class="ruby-identifier">date</span> = <span class="ruby-constant">Date</span>.<span class="ruby-identifier">today</span>,<span class="ruby-identifier">args</span> = {})
|
262
|
-
<span class="ruby-identifier">game_day_url_for_date</span> = <span class="ruby-node">"http://gd2.mlb.com/components/game/#{args[:league] || 'mlb'}/#{format_date_as_folder(date)}"</span>
|
263
|
-
<span class="ruby-identifier">puts</span> <span class="ruby-node">"Finding games in #{game_day_url_for_date}"</span>
|
264
|
-
|
265
|
-
<span class="ruby-comment"># Iterate through every hyperlink on the page.</span>
|
266
|
-
<span class="ruby-comment"># These links represent the individual game folders</span>
|
267
|
-
<span class="ruby-comment"># for each date.</span>
|
268
|
-
(<span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">game_day_url_for_date</span>))<span class="ruby-operator">/</span><span class="ruby-string">"a"</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">e</span><span class="ruby-operator">|</span>
|
269
|
-
|
270
|
-
<span class="ruby-comment"># See if the link is to a game, otherwise ignore it</span>
|
271
|
-
<span class="ruby-keyword">if</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">0</span>,<span class="ruby-value">3</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"gid"</span>
|
272
|
-
<span class="ruby-identifier">puts</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-string">'/'</span>,<span class="ruby-string">''</span>)
|
273
|
-
|
274
|
-
<span class="ruby-identifier">paths</span> = {
|
275
|
-
<span class="ruby-value">:localGameFolder</span> =<span class="ruby-operator">></span> <span class="ruby-node">"#{MlbGame::GAMES_LOCATION}/#{args[:league] || 'mlb'}/#{format_date_as_folder(date)}/#{e.attribute('href').value}"</span>,
|
276
|
-
<span class="ruby-value">:mlbGameFolder</span> =<span class="ruby-operator">></span> <span class="ruby-node">"#{game_day_url_for_date}/#{e.attribute('href').value}"</span>
|
277
|
-
}
|
278
|
-
|
279
|
-
<span class="ruby-constant">FileUtils</span>.<span class="ruby-identifier">mkdir_p</span> <span class="ruby-identifier">paths</span>[<span class="ruby-value">:localGameFolder</span>] <span class="ruby-operator">+</span> <span class="ruby-string">'inning'</span>
|
280
|
-
|
281
|
-
<span class="ruby-keyword">begin</span>
|
282
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-string">'linescore.xml'</span>,<span class="ruby-string">'base'</span>,<span class="ruby-identifier">paths</span>)
|
283
|
-
|
284
|
-
<span class="ruby-keyword">if</span> <span class="ruby-identifier">date</span>.<span class="ruby-identifier">year</span> <span class="ruby-operator">></span> <span class="ruby-value">2007</span>
|
285
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-string">'inning_all.xml'</span>,<span class="ruby-string">'inning'</span>,<span class="ruby-identifier">paths</span>)
|
286
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-string">'inning_hit.xml'</span>,<span class="ruby-string">'inning'</span>,<span class="ruby-identifier">paths</span>)
|
287
|
-
<span class="ruby-keyword">else</span>
|
288
|
-
<span class="ruby-comment"># Iterate through the inning files, but skip inning </span>
|
289
|
-
<span class="ruby-comment"># files numbered 0 (some bad spring training data)</span>
|
290
|
-
(<span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-node">"#{paths[:mlbGameFolder]}/inning/"</span>).<span class="ruby-identifier">body</span>).<span class="ruby-identifier">search</span>(<span class="ruby-string">'a'</span>)).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">ic</span><span class="ruby-operator">|</span>
|
291
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>),<span class="ruby-string">'inning'</span>,<span class="ruby-identifier">paths</span>) <span class="ruby-keyword">if</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-3</span>,<span class="ruby-value">3</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"xml"</span> <span class="ruby-keyword">unless</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-6</span>,<span class="ruby-value">6</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"_0.xml"</span>
|
292
|
-
<span class="ruby-keyword">end</span>
|
293
|
-
<span class="ruby-keyword">end</span>
|
294
|
-
|
295
|
-
<span class="ruby-comment"># Copy base data files </span>
|
296
|
-
<span class="ruby-comment"># (if inning data wasn't there, this gets skipped)</span>
|
297
|
-
[<span class="ruby-string">'boxscore.xml'</span>,<span class="ruby-string">'eventLog.xml'</span>,<span class="ruby-string">'players.xml'</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file</span><span class="ruby-operator">|</span>
|
298
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-identifier">file</span>,<span class="ruby-string">'base'</span>,<span class="ruby-identifier">paths</span>)
|
299
|
-
<span class="ruby-keyword">end</span>
|
300
|
-
<span class="ruby-keyword">rescue</span> <span class="ruby-constant">OpenURI</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTPError</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">bang</span>
|
301
|
-
<span class="ruby-keyword">end</span>
|
302
|
-
<span class="ruby-keyword">end</span>
|
303
|
-
<span class="ruby-keyword">end</span>
|
304
|
-
<span class="ruby-keyword">end</span></pre>
|
305
|
-
</div><!-- pull-source -->
|
306
|
-
|
307
|
-
</div>
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
</div><!-- pull-method -->
|
313
|
-
|
314
|
-
|
315
|
-
<div id="method-c-pull_day" class="method-detail ">
|
157
|
+
<div id="method-i-pull_day" class="method-detail ">
|
316
158
|
|
317
159
|
<div class="method-heading">
|
318
160
|
<span class="method-name">pull_day</span><span
|
319
|
-
class="method-args">(
|
161
|
+
class="method-args">(date, sport_code)</span>
|
320
162
|
<span class="method-click-advice">click to toggle source</span>
|
321
163
|
</div>
|
322
164
|
|
323
165
|
|
324
166
|
<div class="method-description">
|
325
167
|
|
326
|
-
<p>Pull Gameday XML files for a given date. Default options for
|
327
|
-
|
328
|
-
|
329
|
-
date.</p>
|
168
|
+
<p>Pull Gameday XML files for a given date. Default options for the spider are
|
169
|
+
to pull games with sport_code of ‘mlb’ (games played by MLB games rather
|
170
|
+
than MiLB teams or foreign teams) and to pull games on the current date.</p>
|
330
171
|
|
331
172
|
<p>Example:</p>
|
332
173
|
|
@@ -341,33 +182,11 @@ date.</p>
|
|
341
182
|
|
342
183
|
|
343
184
|
<div class="method-source-code" id="pull_day-source">
|
344
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/spider.rb, line
|
345
|
-
<span class="ruby-keyword">def</span> <span class="ruby-
|
346
|
-
<span class="ruby-identifier">
|
347
|
-
<span class="ruby-
|
348
|
-
<span class="ruby-value">:sport_code</span> =<span class="ruby-operator">></span> <span class="ruby-string">'mlb'</span>,
|
349
|
-
}.<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">args</span>)
|
350
|
-
|
351
|
-
<span class="ruby-comment"># If we want all sport codes, set up the array.</span>
|
352
|
-
<span class="ruby-keyword">if</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:all_sport_codes</span>]
|
353
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_codes</span>] = <span class="ruby-node">%w(aaa aax afa afx asx bbc fps hsb ind int jml nae naf nas nat naw oly rok win)</span>
|
354
|
-
<span class="ruby-keyword">else</span>
|
355
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_codes</span>] = [<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_code</span>] <span class="ruby-operator">||</span> <span class="ruby-string">'mlb'</span>].<span class="ruby-identifier">flatten</span>
|
356
|
-
<span class="ruby-keyword">end</span>
|
357
|
-
|
358
|
-
<span class="ruby-comment"># Iterate through every hyperlink on the page.</span>
|
359
|
-
<span class="ruby-comment"># These links represent the individual game folders</span>
|
360
|
-
<span class="ruby-comment"># for each date. Reject any links that aren't to game</span>
|
361
|
-
<span class="ruby-comment"># folders or that are to what look like backup game</span>
|
362
|
-
<span class="ruby-comment"># folders.</span>
|
363
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_codes</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sport_code</span><span class="ruby-operator">|</span>
|
364
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_code</span>] = <span class="ruby-identifier">sport_code</span>
|
365
|
-
(<span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">gameday_league_and_date_url</span>(<span class="ruby-identifier">args</span>)))<span class="ruby-operator">/</span><span class="ruby-string">"a"</span>).<span class="ruby-identifier">reject</span>{<span class="ruby-operator">|</span><span class="ruby-identifier">l</span><span class="ruby-operator">|</span> <span class="ruby-identifier">l</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">0</span>,<span class="ruby-value">4</span>] <span class="ruby-operator">!=</span> <span class="ruby-string">"gid_"</span> <span class="ruby-keyword">or</span> <span class="ruby-identifier">l</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-5</span>,<span class="ruby-value">4</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"_bak"</span>}.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">e</span><span class="ruby-operator">|</span>
|
366
|
-
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">pull_game</span>(<span class="ruby-identifier">e</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-string">'/'</span>,<span class="ruby-string">''</span>),<span class="ruby-identifier">args</span>)
|
367
|
-
<span class="ruby-keyword">end</span>
|
185
|
+
<pre><span class="ruby-comment"># File lib/greenmonster/spider.rb, line 37</span>
|
186
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">pull_day</span>(<span class="ruby-identifier">date</span>, <span class="ruby-identifier">sport_code</span>)
|
187
|
+
<span class="ruby-identifier">game_links_on_gameday_date_page</span>(<span class="ruby-identifier">date</span>, <span class="ruby-identifier">sport_code</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">game_id</span><span class="ruby-operator">|</span>
|
188
|
+
<span class="ruby-identifier">pull_game</span>(<span class="ruby-identifier">game_id</span>, <span class="ruby-identifier">date</span>)
|
368
189
|
<span class="ruby-keyword">end</span>
|
369
|
-
|
370
|
-
<span class="ruby-keyword">return</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_code</span>]
|
371
190
|
<span class="ruby-keyword">end</span></pre>
|
372
191
|
</div><!-- pull_day-source -->
|
373
192
|
|
@@ -379,25 +198,37 @@ date.</p>
|
|
379
198
|
</div><!-- pull_day-method -->
|
380
199
|
|
381
200
|
|
382
|
-
<div id="method-
|
201
|
+
<div id="method-i-pull_days" class="method-detail ">
|
383
202
|
|
384
203
|
<div class="method-heading">
|
385
204
|
<span class="method-name">pull_days</span><span
|
386
|
-
class="method-args">(range
|
205
|
+
class="method-args">(range, sport_code)</span>
|
387
206
|
<span class="method-click-advice">click to toggle source</span>
|
388
207
|
</div>
|
389
208
|
|
390
209
|
|
391
210
|
<div class="method-description">
|
392
211
|
|
393
|
-
|
212
|
+
<p>Pull Gameday XML files for a range of dates. The args hash passes arguments
|
213
|
+
like games_folder location on to Spider.pull.</p>
|
214
|
+
|
215
|
+
<p>Example:</p>
|
216
|
+
|
217
|
+
<pre class="ruby"><span class="ruby-comment"># Pull all games in MLB in July 2011</span>
|
218
|
+
<span class="ruby-operator">>></span> <span class="ruby-constant">Gameday</span><span class="ruby-operator">::</span><span class="ruby-constant">Spider</span>.<span class="ruby-identifier">pull_days</span>(<span class="ruby-constant">Date</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">2011</span>,<span class="ruby-value">7</span>,<span class="ruby-value">1</span>)<span class="ruby-operator">..</span><span class="ruby-constant">Date</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">2011</span>,<span class="ruby-value">7</span>,<span class="ruby-value">31</span>), {:<span class="ruby-identifier">games_folder</span> =<span class="ruby-operator">></span> <span class="ruby-string">'/Users/geoff/games'</span>})
|
219
|
+
</pre>
|
220
|
+
|
221
|
+
<p>Arguments:</p>
|
222
|
+
|
223
|
+
<pre>range: (Range)
|
224
|
+
args: (Hash)</pre>
|
394
225
|
|
395
226
|
|
396
227
|
|
397
228
|
<div class="method-source-code" id="pull_days-source">
|
398
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/
|
399
|
-
<span class="ruby-keyword">def</span> <span class="ruby-
|
400
|
-
<span class="ruby-identifier">range</span>.<span class="ruby-identifier">each</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">
|
229
|
+
<pre><span class="ruby-comment"># File lib/greenmonster/spider.rb, line 55</span>
|
230
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">pull_days</span>(<span class="ruby-identifier">range</span>, <span class="ruby-identifier">sport_code</span>)
|
231
|
+
<span class="ruby-identifier">range</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">date</span><span class="ruby-operator">|</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">pull_day</span>(<span class="ruby-identifier">date</span>, <span class="ruby-identifier">sport_code</span>) }
|
401
232
|
<span class="ruby-keyword">end</span></pre>
|
402
233
|
</div><!-- pull_days-source -->
|
403
234
|
|
@@ -409,18 +240,18 @@ date.</p>
|
|
409
240
|
</div><!-- pull_days-method -->
|
410
241
|
|
411
242
|
|
412
|
-
<div id="method-
|
243
|
+
<div id="method-i-pull_game" class="method-detail ">
|
413
244
|
|
414
245
|
<div class="method-heading">
|
415
246
|
<span class="method-name">pull_game</span><span
|
416
|
-
class="method-args">(game_id,
|
247
|
+
class="method-args">(game_id, date)</span>
|
417
248
|
<span class="method-click-advice">click to toggle source</span>
|
418
249
|
</div>
|
419
250
|
|
420
251
|
|
421
252
|
<div class="method-description">
|
422
253
|
|
423
|
-
<p>Pull Gameday XML files for a given game, specified by the game
|
254
|
+
<p>Pull Gameday XML files for a given game, specified by the game ID. If date
|
424
255
|
and sport code are not specified as options, these values are guessed from
|
425
256
|
the game ID string using the home team’s sport code and the date from the
|
426
257
|
scheduled date values in the game ID.</p>
|
@@ -434,54 +265,12 @@ scheduled date values in the game ID.</p>
|
|
434
265
|
|
435
266
|
<div class="method-source-code" id="pull_game-source">
|
436
267
|
<pre><span class="ruby-comment"># File lib/greenmonster/spider.rb, line 15</span>
|
437
|
-
<span class="ruby-keyword">def</span> <span class="ruby-
|
438
|
-
<span class="ruby-identifier">
|
439
|
-
|
440
|
-
|
441
|
-
<span class="ruby-
|
442
|
-
<span class="ruby-value">:games_folder</span> =<span class="ruby-operator">></span> <span class="ruby-constant">Greenmonster</span>.<span class="ruby-identifier">games_folder</span>
|
443
|
-
}.<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">args</span>)
|
444
|
-
<span class="ruby-identifier">raise</span> <span class="ruby-string">"Games folder location required."</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:games_folder</span>].<span class="ruby-identifier">nil?</span>
|
445
|
-
|
446
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:games_folder</span>] = <span class="ruby-constant">Pathname</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">args</span>[<span class="ruby-value">:games_folder</span>])
|
447
|
-
|
448
|
-
<span class="ruby-identifier">puts</span> <span class="ruby-identifier">game_id</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:print_games</span>]
|
449
|
-
|
450
|
-
<span class="ruby-identifier">paths</span> = {
|
451
|
-
<span class="ruby-value">:localGameFolder</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:games_folder</span>] <span class="ruby-operator">+</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_code</span>] <span class="ruby-operator">+</span> <span class="ruby-identifier">format_date_as_folder</span>(<span class="ruby-identifier">args</span>[<span class="ruby-value">:date</span>]) <span class="ruby-operator">+</span> <span class="ruby-identifier">game_id</span>,
|
452
|
-
<span class="ruby-value">:mlbGameFolder</span> =<span class="ruby-operator">></span> <span class="ruby-node">"#{gameday_league_and_date_url(args)}/#{game_id}/"</span>
|
453
|
-
}
|
454
|
-
|
455
|
-
<span class="ruby-constant">FileUtils</span>.<span class="ruby-identifier">mkdir_p</span> <span class="ruby-identifier">paths</span>[<span class="ruby-value">:localGameFolder</span>] <span class="ruby-operator">+</span> <span class="ruby-string">'inning'</span>
|
456
|
-
|
457
|
-
<span class="ruby-keyword">begin</span>
|
458
|
-
<span class="ruby-comment"># Always copy linescore first. If we can't get this</span>
|
459
|
-
<span class="ruby-comment"># data, all other game data is useless.</span>
|
460
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-string">'linescore.xml'</span>,<span class="ruby-identifier">paths</span>)
|
461
|
-
|
462
|
-
<span class="ruby-keyword">if</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:date</span>].<span class="ruby-identifier">year</span> <span class="ruby-operator">></span> <span class="ruby-value">2007</span>
|
463
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-string">'inning_all.xml'</span>,<span class="ruby-identifier">paths</span>)
|
464
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-string">'inning_hit.xml'</span>,<span class="ruby-identifier">paths</span>)
|
465
|
-
<span class="ruby-keyword">else</span>
|
466
|
-
<span class="ruby-comment"># Iterate through the inning files, but skip inning </span>
|
467
|
-
<span class="ruby-comment"># files numbered 0 (some bad spring training data).</span>
|
468
|
-
<span class="ruby-comment"># Necessary for games prior to 2008 because there is</span>
|
469
|
-
<span class="ruby-comment"># no inning_all.xml file in older games.</span>
|
470
|
-
(<span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-node">"#{paths[:mlbGameFolder]}/inning/"</span>).<span class="ruby-identifier">body</span>).<span class="ruby-identifier">search</span>(<span class="ruby-string">'a'</span>)).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">ic</span><span class="ruby-operator">|</span>
|
471
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>,<span class="ruby-identifier">paths</span>) <span class="ruby-keyword">if</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-3</span>,<span class="ruby-value">3</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"xml"</span> <span class="ruby-keyword">unless</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-6</span>,<span class="ruby-value">6</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"_0.xml"</span> <span class="ruby-keyword">or</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-string">'Score'</span>)
|
472
|
-
<span class="ruby-keyword">end</span>
|
473
|
-
<span class="ruby-keyword">end</span>
|
474
|
-
|
475
|
-
<span class="ruby-comment"># Copy base data files </span>
|
476
|
-
<span class="ruby-comment"># (if inning data wasn't there, this gets skipped)</span>
|
477
|
-
[<span class="ruby-string">'boxscore.xml'</span>,<span class="ruby-string">'eventLog.xml'</span>,<span class="ruby-string">'players.xml'</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file</span><span class="ruby-operator">|</span>
|
478
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-identifier">file</span>,<span class="ruby-identifier">paths</span>)
|
479
|
-
<span class="ruby-keyword">end</span>
|
480
|
-
<span class="ruby-keyword">rescue</span> <span class="ruby-constant">StandardError</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">bang</span>
|
481
|
-
<span class="ruby-identifier">puts</span> <span class="ruby-node">"Unable to download some data for #{game_id}"</span>
|
268
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">pull_game</span>(<span class="ruby-identifier">game_id</span>, <span class="ruby-identifier">date</span>)
|
269
|
+
<span class="ruby-identifier">make_folders_for_game</span>(<span class="ruby-identifier">game_id</span>, <span class="ruby-identifier">date</span>)
|
270
|
+
|
271
|
+
<span class="ruby-node">%w(boxscore.xml game_events.xml inning_all.xml linescore.xml players.xml)</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file_name</span><span class="ruby-operator">|</span>
|
272
|
+
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-identifier">game_id</span>, <span class="ruby-identifier">date</span>, <span class="ruby-identifier">file_name</span>)
|
482
273
|
<span class="ruby-keyword">end</span>
|
483
|
-
|
484
|
-
<span class="ruby-keyword">return</span> <span class="ruby-identifier">game_id</span>
|
485
274
|
<span class="ruby-keyword">end</span></pre>
|
486
275
|
</div><!-- pull_game-source -->
|
487
276
|
|
@@ -493,7 +282,7 @@ scheduled date values in the game ID.</p>
|
|
493
282
|
</div><!-- pull_game-method -->
|
494
283
|
|
495
284
|
|
496
|
-
</section><!-- public-
|
285
|
+
</section><!-- public-instance-method-details -->
|
497
286
|
|
498
287
|
</section><!-- 5Buntitled-5D -->
|
499
288
|
|