greenmonster 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/LICENSE.txt +21 -0
- data/README.markdown +11 -28
- data/Rakefile +3 -8
- data/doc/Gemfile.html +6 -24
- data/doc/Greenmonster/Spider.html +47 -258
- data/doc/Greenmonster.html +25 -86
- data/doc/Rakefile.html +10 -25
- data/doc/created.rid +12 -18
- data/doc/index.html +6 -24
- data/doc/js/search_index.js +1 -1
- data/doc/table_of_contents.html +12 -129
- data/greenmonster.gemspec +16 -13
- data/lib/greenmonster/day_spider.rb +61 -0
- data/lib/greenmonster/file_downloader.rb +38 -0
- data/lib/greenmonster/game_spider.rb +100 -0
- data/lib/greenmonster/innings_downloader.rb +41 -0
- data/lib/greenmonster/spider.rb +1 -0
- data/lib/greenmonster/version.rb +2 -2
- data/lib/greenmonster.rb +16 -82
- data/spec/cassettes/aaa/year_2015/month_05/day_09.yml +153 -0
- data/spec/cassettes/gid_2007_10_01_sdnmlb_colmlb_1/innings.yml +135 -0
- data/spec/cassettes/gid_2015_04_18_balmlb_bos_mlb_1/players_xml.yml +292 -0
- data/spec/cassettes/gid_2015_04_18_balmlb_bos_mlb_2/players_xml.yml +46 -0
- data/spec/cassettes/mlb/year_2015/month_04/day_18.yml +170 -0
- data/spec/day_spider_spec.rb +80 -0
- data/spec/file_downloader_spec.rb +51 -0
- data/spec/fixtures/aaa_players.xml +70 -0
- data/spec/fixtures/boxscore.xml +1147 -0
- data/spec/fixtures/inning_all.xml +1 -0
- data/spec/fixtures/inning_hit.xml +1 -0
- data/spec/fixtures/linescore.xml +131 -0
- data/spec/fixtures/players.xml +63 -0
- data/spec/game_spider_spec.rb +116 -0
- data/spec/greenmonster_spec.rb +22 -34
- data/spec/innings_downloader_spec.rb +44 -0
- data/spec/spec_helper.rb +13 -21
- data/spec/support/path_helpers.rb +21 -0
- data/spec/support/vcr.rb +11 -0
- metadata +91 -67
- data/doc/Athlete.html +0 -254
- data/doc/Greenmonster/Generators/InstallGenerator.html +0 -253
- data/doc/Greenmonster/Generators.html +0 -147
- data/doc/Greenmonster/Parser.html +0 -263
- data/doc/Greenmonster/Player.html +0 -236
- data/doc/GreenmonsterPlayerTest.html +0 -199
- data/doc/GreenmonsterSpiderTest.html +0 -668
- data/doc/GreenmonsterTest.html +0 -262
- data/doc/GreenmonsterTraversalTest.html +0 -376
- data/doc/InstallGreenmonster.html +0 -260
- data/doc/TestCreatePlayersFromGamedayXMLGame.html +0 -266
- data/doc/TestParsePlayersFromGamedayXMLFiles.html +0 -307
- data/spec/games/tst/year_2012/month_03/day_27/gid_2012_03_27_aaamlb_aabmlb_1/blank.txt +0 -0
- data/spec/games/tst/year_2012/month_03/day_27/gid_2012_03_27_aaamlb_aabmlb_1_bak/blank.txt +0 -0
- data/spec/games/tst/year_2012/month_03/day_27/not_2012_03_27_aaamlb_aabmlb_1/blank.txt +0 -0
- data/spec/greenmonster/spider_spec.rb +0 -85
@@ -47,7 +47,6 @@
|
|
47
47
|
<nav id="file-list-section" class="section">
|
48
48
|
<h3 class="section-header">Defined In</h3>
|
49
49
|
<ul>
|
50
|
-
<li>lib/greenmonster/greenmonster.rb
|
51
50
|
<li>lib/greenmonster/spider.rb
|
52
51
|
</ul>
|
53
52
|
</nav>
|
@@ -83,17 +82,11 @@
|
|
83
82
|
|
84
83
|
<ul class="link-list">
|
85
84
|
|
86
|
-
<li><a href="#method-
|
85
|
+
<li><a href="#method-i-pull_day">#pull_day</a>
|
87
86
|
|
88
|
-
<li><a href="#method-
|
87
|
+
<li><a href="#method-i-pull_days">#pull_days</a>
|
89
88
|
|
90
|
-
<li><a href="#method-
|
91
|
-
|
92
|
-
<li><a href="#method-c-pull_day">::pull_day</a>
|
93
|
-
|
94
|
-
<li><a href="#method-c-pull_days">::pull_days</a>
|
95
|
-
|
96
|
-
<li><a href="#method-c-pull_game">::pull_game</a>
|
89
|
+
<li><a href="#method-i-pull_game">#pull_game</a>
|
97
90
|
|
98
91
|
</ul>
|
99
92
|
</nav>
|
@@ -110,6 +103,12 @@
|
|
110
103
|
|
111
104
|
<li class="file"><a href="../Rakefile.html">Rakefile</a>
|
112
105
|
|
106
|
+
<li class="file"><a href="../spec/games/tst/year_2012/month_03/day_27/gid_2012_03_27_aaamlb_aabmlb_1/blank_txt.html">blank</a>
|
107
|
+
|
108
|
+
<li class="file"><a href="../spec/games/tst/year_2012/month_03/day_27/gid_2012_03_27_aaamlb_aabmlb_1_bak/blank_txt.html">blank</a>
|
109
|
+
|
110
|
+
<li class="file"><a href="../spec/games/tst/year_2012/month_03/day_27/not_2012_03_27_aaamlb_aabmlb_1/blank_txt.html">blank</a>
|
111
|
+
|
113
112
|
</ul>
|
114
113
|
</nav>
|
115
114
|
|
@@ -120,32 +119,8 @@
|
|
120
119
|
|
121
120
|
<li><a href="../Greenmonster.html">Greenmonster</a>
|
122
121
|
|
123
|
-
<li><a href="../Greenmonster/Generators.html">Greenmonster::Generators</a>
|
124
|
-
|
125
|
-
<li><a href="../Greenmonster/Generators/InstallGenerator.html">Greenmonster::Generators::InstallGenerator</a>
|
126
|
-
|
127
|
-
<li><a href="../Greenmonster/Parser.html">Greenmonster::Parser</a>
|
128
|
-
|
129
|
-
<li><a href="../Greenmonster/Player.html">Greenmonster::Player</a>
|
130
|
-
|
131
122
|
<li><a href="../Greenmonster/Spider.html">Greenmonster::Spider</a>
|
132
123
|
|
133
|
-
<li><a href="../Athlete.html">Athlete</a>
|
134
|
-
|
135
|
-
<li><a href="../GreenmonsterPlayerTest.html">GreenmonsterPlayerTest</a>
|
136
|
-
|
137
|
-
<li><a href="../GreenmonsterSpiderTest.html">GreenmonsterSpiderTest</a>
|
138
|
-
|
139
|
-
<li><a href="../GreenmonsterTest.html">GreenmonsterTest</a>
|
140
|
-
|
141
|
-
<li><a href="../GreenmonsterTraversalTest.html">GreenmonsterTraversalTest</a>
|
142
|
-
|
143
|
-
<li><a href="../InstallGreenmonster.html">InstallGreenmonster</a>
|
144
|
-
|
145
|
-
<li><a href="../TestCreatePlayersFromGamedayXMLGame.html">TestCreatePlayersFromGamedayXMLGame</a>
|
146
|
-
|
147
|
-
<li><a href="../TestParsePlayersFromGamedayXMLFiles.html">TestParsePlayersFromGamedayXMLFiles</a>
|
148
|
-
|
149
124
|
</ul>
|
150
125
|
</nav>
|
151
126
|
|
@@ -175,158 +150,24 @@
|
|
175
150
|
|
176
151
|
<!-- Methods -->
|
177
152
|
|
178
|
-
<section id="public-
|
179
|
-
<h3 class="section-header">Public
|
180
|
-
|
181
|
-
|
182
|
-
<div id="method-c-copyGameDayXML" class="method-detail ">
|
183
|
-
|
184
|
-
<div class="method-heading">
|
185
|
-
<span class="method-name">copyGameDayXML</span><span
|
186
|
-
class="method-args">(file_name,location,paths)</span>
|
187
|
-
<span class="method-click-advice">click to toggle source</span>
|
188
|
-
</div>
|
189
|
-
|
190
|
-
|
191
|
-
<div class="method-description">
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
<div class="method-source-code" id="copyGameDayXML-source">
|
198
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/greenmonster.rb, line 8</span>
|
199
|
-
<span class="ruby-keyword">def</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">copyGameDayXML</span> (<span class="ruby-identifier">file_name</span>,<span class="ruby-identifier">location</span>,<span class="ruby-identifier">paths</span>)
|
200
|
-
<span class="ruby-identifier">open</span>(<span class="ruby-identifier">paths</span>[<span class="ruby-value">:localGameFolder</span>] <span class="ruby-operator">+</span> <span class="ruby-node">"#{file_name =~ /inning/ ? 'inning/' : ''}"</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">file_name</span>, <span class="ruby-string">'w'</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file</span><span class="ruby-operator">|</span>
|
201
|
-
<span class="ruby-identifier">file</span>.<span class="ruby-identifier">write</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">paths</span>[<span class="ruby-value">:mlbGameFolder</span>] <span class="ruby-operator">+</span> <span class="ruby-node">"#{file_name =~ /inning/ ? 'inning/' : ''}"</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">file_name</span>).<span class="ruby-identifier">body</span>)
|
202
|
-
<span class="ruby-keyword">end</span>
|
203
|
-
<span class="ruby-keyword">end</span></pre>
|
204
|
-
</div><!-- copyGameDayXML-source -->
|
205
|
-
|
206
|
-
</div>
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
</div><!-- copyGameDayXML-method -->
|
153
|
+
<section id="public-instance-5Buntitled-5D-method-details" class="method-section section">
|
154
|
+
<h3 class="section-header">Public Instance Methods</h3>
|
212
155
|
|
213
156
|
|
214
|
-
<div id="method-
|
215
|
-
|
216
|
-
<div class="method-heading">
|
217
|
-
<span class="method-name">format_date_as_folder</span><span
|
218
|
-
class="method-args">(date)</span>
|
219
|
-
<span class="method-click-advice">click to toggle source</span>
|
220
|
-
</div>
|
221
|
-
|
222
|
-
|
223
|
-
<div class="method-description">
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
<div class="method-source-code" id="format_date_as_folder-source">
|
230
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/greenmonster.rb, line 14</span>
|
231
|
-
<span class="ruby-keyword">def</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">format_date_as_folder</span>(<span class="ruby-identifier">date</span>)
|
232
|
-
<span class="ruby-identifier">date</span>.<span class="ruby-identifier">strftime</span>(<span class="ruby-string">"year_%Y/month_%m/day_%d"</span>)
|
233
|
-
<span class="ruby-keyword">end</span></pre>
|
234
|
-
</div><!-- format_date_as_folder-source -->
|
235
|
-
|
236
|
-
</div>
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
</div><!-- format_date_as_folder-method -->
|
242
|
-
|
243
|
-
|
244
|
-
<div id="method-c-pull" class="method-detail ">
|
245
|
-
|
246
|
-
<div class="method-heading">
|
247
|
-
<span class="method-name">pull</span><span
|
248
|
-
class="method-args">(date = Date.today,args = {})</span>
|
249
|
-
<span class="method-click-advice">click to toggle source</span>
|
250
|
-
</div>
|
251
|
-
|
252
|
-
|
253
|
-
<div class="method-description">
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
<div class="method-source-code" id="pull-source">
|
260
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/greenmonster.rb, line 18</span>
|
261
|
-
<span class="ruby-keyword">def</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">pull</span>(<span class="ruby-identifier">date</span> = <span class="ruby-constant">Date</span>.<span class="ruby-identifier">today</span>,<span class="ruby-identifier">args</span> = {})
|
262
|
-
<span class="ruby-identifier">game_day_url_for_date</span> = <span class="ruby-node">"http://gd2.mlb.com/components/game/#{args[:league] || 'mlb'}/#{format_date_as_folder(date)}"</span>
|
263
|
-
<span class="ruby-identifier">puts</span> <span class="ruby-node">"Finding games in #{game_day_url_for_date}"</span>
|
264
|
-
|
265
|
-
<span class="ruby-comment"># Iterate through every hyperlink on the page.</span>
|
266
|
-
<span class="ruby-comment"># These links represent the individual game folders</span>
|
267
|
-
<span class="ruby-comment"># for each date.</span>
|
268
|
-
(<span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">game_day_url_for_date</span>))<span class="ruby-operator">/</span><span class="ruby-string">"a"</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">e</span><span class="ruby-operator">|</span>
|
269
|
-
|
270
|
-
<span class="ruby-comment"># See if the link is to a game, otherwise ignore it</span>
|
271
|
-
<span class="ruby-keyword">if</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">0</span>,<span class="ruby-value">3</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"gid"</span>
|
272
|
-
<span class="ruby-identifier">puts</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-string">'/'</span>,<span class="ruby-string">''</span>)
|
273
|
-
|
274
|
-
<span class="ruby-identifier">paths</span> = {
|
275
|
-
<span class="ruby-value">:localGameFolder</span> =<span class="ruby-operator">></span> <span class="ruby-node">"#{MlbGame::GAMES_LOCATION}/#{args[:league] || 'mlb'}/#{format_date_as_folder(date)}/#{e.attribute('href').value}"</span>,
|
276
|
-
<span class="ruby-value">:mlbGameFolder</span> =<span class="ruby-operator">></span> <span class="ruby-node">"#{game_day_url_for_date}/#{e.attribute('href').value}"</span>
|
277
|
-
}
|
278
|
-
|
279
|
-
<span class="ruby-constant">FileUtils</span>.<span class="ruby-identifier">mkdir_p</span> <span class="ruby-identifier">paths</span>[<span class="ruby-value">:localGameFolder</span>] <span class="ruby-operator">+</span> <span class="ruby-string">'inning'</span>
|
280
|
-
|
281
|
-
<span class="ruby-keyword">begin</span>
|
282
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-string">'linescore.xml'</span>,<span class="ruby-string">'base'</span>,<span class="ruby-identifier">paths</span>)
|
283
|
-
|
284
|
-
<span class="ruby-keyword">if</span> <span class="ruby-identifier">date</span>.<span class="ruby-identifier">year</span> <span class="ruby-operator">></span> <span class="ruby-value">2007</span>
|
285
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-string">'inning_all.xml'</span>,<span class="ruby-string">'inning'</span>,<span class="ruby-identifier">paths</span>)
|
286
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-string">'inning_hit.xml'</span>,<span class="ruby-string">'inning'</span>,<span class="ruby-identifier">paths</span>)
|
287
|
-
<span class="ruby-keyword">else</span>
|
288
|
-
<span class="ruby-comment"># Iterate through the inning files, but skip inning </span>
|
289
|
-
<span class="ruby-comment"># files numbered 0 (some bad spring training data)</span>
|
290
|
-
(<span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-node">"#{paths[:mlbGameFolder]}/inning/"</span>).<span class="ruby-identifier">body</span>).<span class="ruby-identifier">search</span>(<span class="ruby-string">'a'</span>)).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">ic</span><span class="ruby-operator">|</span>
|
291
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>),<span class="ruby-string">'inning'</span>,<span class="ruby-identifier">paths</span>) <span class="ruby-keyword">if</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-3</span>,<span class="ruby-value">3</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"xml"</span> <span class="ruby-keyword">unless</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-6</span>,<span class="ruby-value">6</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"_0.xml"</span>
|
292
|
-
<span class="ruby-keyword">end</span>
|
293
|
-
<span class="ruby-keyword">end</span>
|
294
|
-
|
295
|
-
<span class="ruby-comment"># Copy base data files </span>
|
296
|
-
<span class="ruby-comment"># (if inning data wasn't there, this gets skipped)</span>
|
297
|
-
[<span class="ruby-string">'boxscore.xml'</span>,<span class="ruby-string">'eventLog.xml'</span>,<span class="ruby-string">'players.xml'</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file</span><span class="ruby-operator">|</span>
|
298
|
-
<span class="ruby-identifier">copyGameDayXML</span>(<span class="ruby-identifier">file</span>,<span class="ruby-string">'base'</span>,<span class="ruby-identifier">paths</span>)
|
299
|
-
<span class="ruby-keyword">end</span>
|
300
|
-
<span class="ruby-keyword">rescue</span> <span class="ruby-constant">OpenURI</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTPError</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">bang</span>
|
301
|
-
<span class="ruby-keyword">end</span>
|
302
|
-
<span class="ruby-keyword">end</span>
|
303
|
-
<span class="ruby-keyword">end</span>
|
304
|
-
<span class="ruby-keyword">end</span></pre>
|
305
|
-
</div><!-- pull-source -->
|
306
|
-
|
307
|
-
</div>
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
</div><!-- pull-method -->
|
313
|
-
|
314
|
-
|
315
|
-
<div id="method-c-pull_day" class="method-detail ">
|
157
|
+
<div id="method-i-pull_day" class="method-detail ">
|
316
158
|
|
317
159
|
<div class="method-heading">
|
318
160
|
<span class="method-name">pull_day</span><span
|
319
|
-
class="method-args">(
|
161
|
+
class="method-args">(date, sport_code)</span>
|
320
162
|
<span class="method-click-advice">click to toggle source</span>
|
321
163
|
</div>
|
322
164
|
|
323
165
|
|
324
166
|
<div class="method-description">
|
325
167
|
|
326
|
-
<p>Pull Gameday XML files for a given date. Default options for
|
327
|
-
|
328
|
-
|
329
|
-
date.</p>
|
168
|
+
<p>Pull Gameday XML files for a given date. Default options for the spider are
|
169
|
+
to pull games with sport_code of ‘mlb’ (games played by MLB games rather
|
170
|
+
than MiLB teams or foreign teams) and to pull games on the current date.</p>
|
330
171
|
|
331
172
|
<p>Example:</p>
|
332
173
|
|
@@ -341,33 +182,11 @@ date.</p>
|
|
341
182
|
|
342
183
|
|
343
184
|
<div class="method-source-code" id="pull_day-source">
|
344
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/spider.rb, line
|
345
|
-
<span class="ruby-keyword">def</span> <span class="ruby-
|
346
|
-
<span class="ruby-identifier">
|
347
|
-
<span class="ruby-
|
348
|
-
<span class="ruby-value">:sport_code</span> =<span class="ruby-operator">></span> <span class="ruby-string">'mlb'</span>,
|
349
|
-
}.<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">args</span>)
|
350
|
-
|
351
|
-
<span class="ruby-comment"># If we want all sport codes, set up the array.</span>
|
352
|
-
<span class="ruby-keyword">if</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:all_sport_codes</span>]
|
353
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_codes</span>] = <span class="ruby-node">%w(aaa aax afa afx asx bbc fps hsb ind int jml nae naf nas nat naw oly rok win)</span>
|
354
|
-
<span class="ruby-keyword">else</span>
|
355
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_codes</span>] = [<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_code</span>] <span class="ruby-operator">||</span> <span class="ruby-string">'mlb'</span>].<span class="ruby-identifier">flatten</span>
|
356
|
-
<span class="ruby-keyword">end</span>
|
357
|
-
|
358
|
-
<span class="ruby-comment"># Iterate through every hyperlink on the page.</span>
|
359
|
-
<span class="ruby-comment"># These links represent the individual game folders</span>
|
360
|
-
<span class="ruby-comment"># for each date. Reject any links that aren't to game</span>
|
361
|
-
<span class="ruby-comment"># folders or that are to what look like backup game</span>
|
362
|
-
<span class="ruby-comment"># folders.</span>
|
363
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_codes</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sport_code</span><span class="ruby-operator">|</span>
|
364
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_code</span>] = <span class="ruby-identifier">sport_code</span>
|
365
|
-
(<span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">gameday_league_and_date_url</span>(<span class="ruby-identifier">args</span>)))<span class="ruby-operator">/</span><span class="ruby-string">"a"</span>).<span class="ruby-identifier">reject</span>{<span class="ruby-operator">|</span><span class="ruby-identifier">l</span><span class="ruby-operator">|</span> <span class="ruby-identifier">l</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">0</span>,<span class="ruby-value">4</span>] <span class="ruby-operator">!=</span> <span class="ruby-string">"gid_"</span> <span class="ruby-keyword">or</span> <span class="ruby-identifier">l</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-5</span>,<span class="ruby-value">4</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"_bak"</span>}.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">e</span><span class="ruby-operator">|</span>
|
366
|
-
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">pull_game</span>(<span class="ruby-identifier">e</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-string">'/'</span>,<span class="ruby-string">''</span>),<span class="ruby-identifier">args</span>)
|
367
|
-
<span class="ruby-keyword">end</span>
|
185
|
+
<pre><span class="ruby-comment"># File lib/greenmonster/spider.rb, line 37</span>
|
186
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">pull_day</span>(<span class="ruby-identifier">date</span>, <span class="ruby-identifier">sport_code</span>)
|
187
|
+
<span class="ruby-identifier">game_links_on_gameday_date_page</span>(<span class="ruby-identifier">date</span>, <span class="ruby-identifier">sport_code</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">game_id</span><span class="ruby-operator">|</span>
|
188
|
+
<span class="ruby-identifier">pull_game</span>(<span class="ruby-identifier">game_id</span>, <span class="ruby-identifier">date</span>)
|
368
189
|
<span class="ruby-keyword">end</span>
|
369
|
-
|
370
|
-
<span class="ruby-keyword">return</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_code</span>]
|
371
190
|
<span class="ruby-keyword">end</span></pre>
|
372
191
|
</div><!-- pull_day-source -->
|
373
192
|
|
@@ -379,25 +198,37 @@ date.</p>
|
|
379
198
|
</div><!-- pull_day-method -->
|
380
199
|
|
381
200
|
|
382
|
-
<div id="method-
|
201
|
+
<div id="method-i-pull_days" class="method-detail ">
|
383
202
|
|
384
203
|
<div class="method-heading">
|
385
204
|
<span class="method-name">pull_days</span><span
|
386
|
-
class="method-args">(range
|
205
|
+
class="method-args">(range, sport_code)</span>
|
387
206
|
<span class="method-click-advice">click to toggle source</span>
|
388
207
|
</div>
|
389
208
|
|
390
209
|
|
391
210
|
<div class="method-description">
|
392
211
|
|
393
|
-
|
212
|
+
<p>Pull Gameday XML files for a range of dates. The args hash passes arguments
|
213
|
+
like games_folder location on to Spider.pull.</p>
|
214
|
+
|
215
|
+
<p>Example:</p>
|
216
|
+
|
217
|
+
<pre class="ruby"><span class="ruby-comment"># Pull all games in MLB in July 2011</span>
|
218
|
+
<span class="ruby-operator">>></span> <span class="ruby-constant">Gameday</span><span class="ruby-operator">::</span><span class="ruby-constant">Spider</span>.<span class="ruby-identifier">pull_days</span>(<span class="ruby-constant">Date</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">2011</span>,<span class="ruby-value">7</span>,<span class="ruby-value">1</span>)<span class="ruby-operator">..</span><span class="ruby-constant">Date</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">2011</span>,<span class="ruby-value">7</span>,<span class="ruby-value">31</span>), {:<span class="ruby-identifier">games_folder</span> =<span class="ruby-operator">></span> <span class="ruby-string">'/Users/geoff/games'</span>})
|
219
|
+
</pre>
|
220
|
+
|
221
|
+
<p>Arguments:</p>
|
222
|
+
|
223
|
+
<pre>range: (Range)
|
224
|
+
args: (Hash)</pre>
|
394
225
|
|
395
226
|
|
396
227
|
|
397
228
|
<div class="method-source-code" id="pull_days-source">
|
398
|
-
<pre><span class="ruby-comment"># File lib/greenmonster/
|
399
|
-
<span class="ruby-keyword">def</span> <span class="ruby-
|
400
|
-
<span class="ruby-identifier">range</span>.<span class="ruby-identifier">each</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">
|
229
|
+
<pre><span class="ruby-comment"># File lib/greenmonster/spider.rb, line 55</span>
|
230
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">pull_days</span>(<span class="ruby-identifier">range</span>, <span class="ruby-identifier">sport_code</span>)
|
231
|
+
<span class="ruby-identifier">range</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">date</span><span class="ruby-operator">|</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">pull_day</span>(<span class="ruby-identifier">date</span>, <span class="ruby-identifier">sport_code</span>) }
|
401
232
|
<span class="ruby-keyword">end</span></pre>
|
402
233
|
</div><!-- pull_days-source -->
|
403
234
|
|
@@ -409,18 +240,18 @@ date.</p>
|
|
409
240
|
</div><!-- pull_days-method -->
|
410
241
|
|
411
242
|
|
412
|
-
<div id="method-
|
243
|
+
<div id="method-i-pull_game" class="method-detail ">
|
413
244
|
|
414
245
|
<div class="method-heading">
|
415
246
|
<span class="method-name">pull_game</span><span
|
416
|
-
class="method-args">(game_id,
|
247
|
+
class="method-args">(game_id, date)</span>
|
417
248
|
<span class="method-click-advice">click to toggle source</span>
|
418
249
|
</div>
|
419
250
|
|
420
251
|
|
421
252
|
<div class="method-description">
|
422
253
|
|
423
|
-
<p>Pull Gameday XML files for a given game, specified by the game
|
254
|
+
<p>Pull Gameday XML files for a given game, specified by the game ID. If date
|
424
255
|
and sport code are not specified as options, these values are guessed from
|
425
256
|
the game ID string using the home team’s sport code and the date from the
|
426
257
|
scheduled date values in the game ID.</p>
|
@@ -434,54 +265,12 @@ scheduled date values in the game ID.</p>
|
|
434
265
|
|
435
266
|
<div class="method-source-code" id="pull_game-source">
|
436
267
|
<pre><span class="ruby-comment"># File lib/greenmonster/spider.rb, line 15</span>
|
437
|
-
<span class="ruby-keyword">def</span> <span class="ruby-
|
438
|
-
<span class="ruby-identifier">
|
439
|
-
|
440
|
-
|
441
|
-
<span class="ruby-
|
442
|
-
<span class="ruby-value">:games_folder</span> =<span class="ruby-operator">></span> <span class="ruby-constant">Greenmonster</span>.<span class="ruby-identifier">games_folder</span>
|
443
|
-
}.<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">args</span>)
|
444
|
-
<span class="ruby-identifier">raise</span> <span class="ruby-string">"Games folder location required."</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:games_folder</span>].<span class="ruby-identifier">nil?</span>
|
445
|
-
|
446
|
-
<span class="ruby-identifier">args</span>[<span class="ruby-value">:games_folder</span>] = <span class="ruby-constant">Pathname</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">args</span>[<span class="ruby-value">:games_folder</span>])
|
447
|
-
|
448
|
-
<span class="ruby-identifier">puts</span> <span class="ruby-identifier">game_id</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:print_games</span>]
|
449
|
-
|
450
|
-
<span class="ruby-identifier">paths</span> = {
|
451
|
-
<span class="ruby-value">:localGameFolder</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:games_folder</span>] <span class="ruby-operator">+</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:sport_code</span>] <span class="ruby-operator">+</span> <span class="ruby-identifier">format_date_as_folder</span>(<span class="ruby-identifier">args</span>[<span class="ruby-value">:date</span>]) <span class="ruby-operator">+</span> <span class="ruby-identifier">game_id</span>,
|
452
|
-
<span class="ruby-value">:mlbGameFolder</span> =<span class="ruby-operator">></span> <span class="ruby-node">"#{gameday_league_and_date_url(args)}/#{game_id}/"</span>
|
453
|
-
}
|
454
|
-
|
455
|
-
<span class="ruby-constant">FileUtils</span>.<span class="ruby-identifier">mkdir_p</span> <span class="ruby-identifier">paths</span>[<span class="ruby-value">:localGameFolder</span>] <span class="ruby-operator">+</span> <span class="ruby-string">'inning'</span>
|
456
|
-
|
457
|
-
<span class="ruby-keyword">begin</span>
|
458
|
-
<span class="ruby-comment"># Always copy linescore first. If we can't get this</span>
|
459
|
-
<span class="ruby-comment"># data, all other game data is useless.</span>
|
460
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-string">'linescore.xml'</span>,<span class="ruby-identifier">paths</span>)
|
461
|
-
|
462
|
-
<span class="ruby-keyword">if</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:date</span>].<span class="ruby-identifier">year</span> <span class="ruby-operator">></span> <span class="ruby-value">2007</span>
|
463
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-string">'inning_all.xml'</span>,<span class="ruby-identifier">paths</span>)
|
464
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-string">'inning_hit.xml'</span>,<span class="ruby-identifier">paths</span>)
|
465
|
-
<span class="ruby-keyword">else</span>
|
466
|
-
<span class="ruby-comment"># Iterate through the inning files, but skip inning </span>
|
467
|
-
<span class="ruby-comment"># files numbered 0 (some bad spring training data).</span>
|
468
|
-
<span class="ruby-comment"># Necessary for games prior to 2008 because there is</span>
|
469
|
-
<span class="ruby-comment"># no inning_all.xml file in older games.</span>
|
470
|
-
(<span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get</span>(<span class="ruby-node">"#{paths[:mlbGameFolder]}/inning/"</span>).<span class="ruby-identifier">body</span>).<span class="ruby-identifier">search</span>(<span class="ruby-string">'a'</span>)).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">ic</span><span class="ruby-operator">|</span>
|
471
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>,<span class="ruby-identifier">paths</span>) <span class="ruby-keyword">if</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-3</span>,<span class="ruby-value">3</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"xml"</span> <span class="ruby-keyword">unless</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>[<span class="ruby-value">-6</span>,<span class="ruby-value">6</span>] <span class="ruby-operator">==</span> <span class="ruby-string">"_0.xml"</span> <span class="ruby-keyword">or</span> <span class="ruby-identifier">ic</span>.<span class="ruby-identifier">attribute</span>(<span class="ruby-string">'href'</span>).<span class="ruby-identifier">value</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-string">'Score'</span>)
|
472
|
-
<span class="ruby-keyword">end</span>
|
473
|
-
<span class="ruby-keyword">end</span>
|
474
|
-
|
475
|
-
<span class="ruby-comment"># Copy base data files </span>
|
476
|
-
<span class="ruby-comment"># (if inning data wasn't there, this gets skipped)</span>
|
477
|
-
[<span class="ruby-string">'boxscore.xml'</span>,<span class="ruby-string">'eventLog.xml'</span>,<span class="ruby-string">'players.xml'</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file</span><span class="ruby-operator">|</span>
|
478
|
-
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-identifier">file</span>,<span class="ruby-identifier">paths</span>)
|
479
|
-
<span class="ruby-keyword">end</span>
|
480
|
-
<span class="ruby-keyword">rescue</span> <span class="ruby-constant">StandardError</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">bang</span>
|
481
|
-
<span class="ruby-identifier">puts</span> <span class="ruby-node">"Unable to download some data for #{game_id}"</span>
|
268
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">pull_game</span>(<span class="ruby-identifier">game_id</span>, <span class="ruby-identifier">date</span>)
|
269
|
+
<span class="ruby-identifier">make_folders_for_game</span>(<span class="ruby-identifier">game_id</span>, <span class="ruby-identifier">date</span>)
|
270
|
+
|
271
|
+
<span class="ruby-node">%w(boxscore.xml game_events.xml inning_all.xml linescore.xml players.xml)</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file_name</span><span class="ruby-operator">|</span>
|
272
|
+
<span class="ruby-identifier">copy_gameday_xml</span>(<span class="ruby-identifier">game_id</span>, <span class="ruby-identifier">date</span>, <span class="ruby-identifier">file_name</span>)
|
482
273
|
<span class="ruby-keyword">end</span>
|
483
|
-
|
484
|
-
<span class="ruby-keyword">return</span> <span class="ruby-identifier">game_id</span>
|
485
274
|
<span class="ruby-keyword">end</span></pre>
|
486
275
|
</div><!-- pull_game-source -->
|
487
276
|
|
@@ -493,7 +282,7 @@ scheduled date values in the game ID.</p>
|
|
493
282
|
</div><!-- pull_game-method -->
|
494
283
|
|
495
284
|
|
496
|
-
</section><!-- public-
|
285
|
+
</section><!-- public-instance-method-details -->
|
497
286
|
|
498
287
|
</section><!-- 5Buntitled-5D -->
|
499
288
|
|