yahoo_sports19 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+
2
+ source "https://rubygems.org"
3
+
4
+ gem "scrapi", "~> 2.0"
5
+ gem "tzinfo", "~> 0.3.15"
6
+ gem "htmlentities"
7
+
8
+ group :development, :test do
9
+ gem "minitest"
10
+ gem "turn"
11
+ gem "yard"
12
+ gem "jeweler", "~> 1.8.3"
13
+ end
@@ -0,0 +1,37 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ ansi (1.4.3)
5
+ ffi (1.1.5)
6
+ git (1.2.5)
7
+ htmlentities (4.3.1)
8
+ jeweler (1.8.4)
9
+ bundler (~> 1.0)
10
+ git (>= 1.2.5)
11
+ rake
12
+ rdoc
13
+ json (1.7.5)
14
+ minitest (4.0.0)
15
+ rake (0.9.2.2)
16
+ rdoc (3.12)
17
+ json (~> 1.4)
18
+ scrapi (2.0.0)
19
+ tidy_ffi (>= 0.1.2)
20
+ tidy_ffi (0.1.4)
21
+ ffi (>= 0.3.5)
22
+ turn (0.9.6)
23
+ ansi
24
+ tzinfo (0.3.33)
25
+ yard (0.8.2.1)
26
+
27
+ PLATFORMS
28
+ ruby
29
+
30
+ DEPENDENCIES
31
+ htmlentities
32
+ jeweler (~> 1.8.3)
33
+ minitest
34
+ scrapi (~> 2.0)
35
+ turn
36
+ tzinfo (~> 0.3.15)
37
+ yard
@@ -0,0 +1,4 @@
1
+ === 0.0.1 2009-11-18
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
@@ -0,0 +1,17 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ lib/yahoo_sports.rb
6
+ lib/yahoo_sports/base.rb
7
+ lib/yahoo_sports/mlb.rb
8
+ lib/yahoo_sports/nba.rb
9
+ lib/yahoo_sports/nfl.rb
10
+ lib/yahoo_sports/nhl.rb
11
+ test/test_helper.rb
12
+ test/yahoo_sports/base_test.rb
13
+ test/yahoo_sports/mlb_test.rb
14
+ test/yahoo_sports/nba_test.rb
15
+ test/yahoo_sports/nfl_test.rb
16
+ test/yahoo_sports/nhl_test.rb
17
+ test/yahoo_sports_test.rb
@@ -0,0 +1,62 @@
1
+ = yahoo_sports
2
+
3
+ * http://github.com/chetan/yahoo_sports
4
+
5
+ == DESCRIPTION:
6
+
7
+ Ruby library for parsing stats from Yahoo! Sports pages. Currently supports
8
+ MLB, NBA, NFL and NHL stats and info.
9
+
10
+ == DOCUMENTATION:
11
+
12
+ Documentation is available online at at rdoc.info[http://rdoc.info/projects/chetan/yahoo_sports]
13
+
14
+ == FEATURES:
15
+
16
+ * Pull previous day and current days games for each sport
17
+ * Pull specific team information (full name, standing, game schedule)
18
+
19
+ == SYNOPSIS:
20
+
21
+ require "yahoo_sports"
22
+ team = YahooSports::NHL.get_team_stats("nyr")
23
+ team.name # => "New York Rangers"
24
+ team.standing # => "11-9-1"
25
+ team.position # => "4th Atlantic"
26
+ team.last5[0].team # => "Edmonton Oilers (7-7-1)"
27
+ team.last5[0].status # => "W 4 - 2"
28
+
29
+ == REQUIREMENTS:
30
+
31
+ * tzinfo
32
+ * scrapi
33
+
34
+ == INSTALL:
35
+
36
+ sudo gem install yahoo_sports
37
+
38
+
39
+ == LICENSE:
40
+
41
+ (The MIT License)
42
+
43
+ Copyright (c) 2011 Pixelcop Research, Inc.
44
+
45
+ Permission is hereby granted, free of charge, to any person obtaining
46
+ a copy of this software and associated documentation files (the
47
+ 'Software'), to deal in the Software without restriction, including
48
+ without limitation the rights to use, copy, modify, merge, publish,
49
+ distribute, sublicense, and/or sell copies of the Software, and to
50
+ permit persons to whom the Software is furnished to do so, subject to
51
+ the following conditions:
52
+
53
+ The above copyright notice and this permission notice shall be
54
+ included in all copies or substantial portions of the Software.
55
+
56
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
57
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
58
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
59
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
60
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
61
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
62
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+ require 'jeweler'
14
+
15
+ Jeweler::Tasks.new do |gemspec|
16
+ gemspec.name = "yahoo_sports19"
17
+ gemspec.summary = "Ruby library for parsing stats from Yahoo! Sports pages"
18
+ gemspec.description = "Ruby library for parsing stats from Yahoo! Sports pages. Currently supports MLB, NBA, NFL and NHL stats and info."
19
+ gemspec.email = "chetan@pixelcop.net"
20
+ gemspec.homepage = "http://github.com/chetan/yahoo_sports"
21
+ gemspec.authors = ["Chetan Sarva"]
22
+ end
23
+ Jeweler::RubygemsDotOrgTasks.new
24
+
25
+ require "rake/testtask"
26
+ desc "Run unit tests"
27
+ Rake::TestTask.new("test") { |t|
28
+ t.libs << 'lib' << 'test'
29
+ t.pattern = "./test/**/*.rb"
30
+ t.verbose = false
31
+ t.warning = false
32
+ }
33
+
34
+ task :default => :test
35
+
36
+ require "yard"
37
+ YARD::Rake::YardocTask.new("docs")
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.1
@@ -0,0 +1,12 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ module YahooSports
5
+
6
+ autoload :Base, "yahoo_sports/base"
7
+ autoload :MLB, "yahoo_sports/mlb"
8
+ autoload :NBA, "yahoo_sports/nba"
9
+ autoload :NFL, "yahoo_sports/nfl"
10
+ autoload :NHL, "yahoo_sports/nhl"
11
+
12
+ end
@@ -0,0 +1,429 @@
1
+
2
+ require 'rubygems'
3
+ require 'tzinfo'
4
+
5
+ require 'net/http'
6
+ require 'scrapi'
7
+ require 'ostruct'
8
+ require 'htmlentities'
9
+
10
+ module YahooSports
11
+
12
+ # Fetches the given URL and returns the body
13
+ #
14
+ # @param [String] URL
15
+ # @return [String] contents of response body
16
+ def self.fetchurl(url)
17
+ # puts "FETCHING: '#{url}'"
18
+ return Net::HTTP.get_response(URI.parse(URI.escape(url))).body
19
+ end
20
+
21
+ # Strip HTML tags from the given string. Also performs some common entity
22
+ # substitutions.
23
+ #
24
+ # List of entity codes:
25
+ # * &nbsp;
26
+ # * &amp;
27
+ # * &quot;
28
+ # * &lt;
29
+ # * &gt;
30
+ # * &ellip;
31
+ # * &apos;
32
+ #
33
+ # @param [String] html text to be filtered
34
+ # @return [String] original string with HTML tags filtered out and entities replaced
35
+ def self.strip_tags(html)
36
+
37
+ HTMLEntities.new.decode(
38
+ html.gsub(/<.+?>/,'').
39
+ gsub(/&nbsp;/,' ').
40
+ gsub(/&amp;/,'&').
41
+ gsub(/&quot;/,'"').
42
+ gsub(/&lt;/,'<').
43
+ gsub(/&gt;/,'>').
44
+ gsub(/&ellip;/,'...').
45
+ gsub(/&apos;/, "'").
46
+ gsub(/<br *\/>/m, '')
47
+ ).strip
48
+
49
+ end
50
+
51
+ class Base
52
+
53
+ # Get the scoreboard games for the given sport. Includes recently completed,
54
+ # live and upcoming games.
55
+ #
56
+ # Source: http://sports.yahoo.com/<sport>
57
+ #
58
+ # Game struct has the following keys:
59
+ # game.date # date of game; includes time if preview
60
+ # game.team1 # visiting team
61
+ # game.team2 # home team
62
+ # game.score1 # team1's score, if live or final
63
+ # game.score2 # team2's score, if live or final
64
+ # game.state # live, final or preview
65
+ # game.tv # TV station showing the game, if preview and available
66
+ #
67
+ # Example:
68
+ # #<OpenStruct state="final", score1="34", date=Thu Nov 26 00:00:00 -0500 2009, score2="12", team1="Green Bay", team2="Detroit">
69
+ #
70
+ #
71
+ # @param [String] sport sport to list, can be one of ["mlb", "nba", "nfl", "nhl"]
72
+ # @param [String] state Optionally filter for the given state ("live", "final", or "preview")
73
+ # @return [Array<OpenStruct>] list of games
74
+ def self.get_homepage_games(sport, state = "")
75
+
76
+ sport.downcase!
77
+ if sport !~ /^(nba|nhl|nfl|mlb)$/ then
78
+ raise sprintf("Invalid param for 'sport' = '%s'", sport)
79
+ end
80
+
81
+ state.downcase! if not state.empty?
82
+ if not state.empty? and state !~ /^(live|final|preview)$/ then
83
+ raise sprintf("Invalid param for 'state' = '%s'", state)
84
+ end
85
+
86
+ html = YahooSports.fetchurl("http://sports.yahoo.com/#{sport}/proxy/html/scorethin")
87
+ if not html then
88
+ raise 'Error fetching url'
89
+ end
90
+
91
+ sports_game = Scraper.define do
92
+
93
+ array :teams
94
+ array :scores
95
+
96
+ process "li.odd, li.even, li.live", :date_src => "@id"
97
+ process "li.odd, li.even, li.live", :class_src => "@class"
98
+ process "li.link-box", :extra_src => :text
99
+
100
+ process "td.team>a", :teams => :text
101
+ process "td.score", :scores => :text
102
+
103
+ process "li.status>a", :status => :text
104
+
105
+
106
+ result :date_src, :teams, :scores, :status, :class_src, :extra_src
107
+
108
+ end
109
+
110
+ sports = Scraper.define do
111
+
112
+ array :games
113
+
114
+ process "ul.game-list>li", :games => sports_game
115
+
116
+ result :games
117
+
118
+ end
119
+
120
+ games_temp = sports.scrape(html)
121
+ games = []
122
+ return games if games_temp.nil?
123
+
124
+ games_temp.each { |g|
125
+
126
+ gm = OpenStruct.new
127
+ gm.team1 = g.teams[0].strip if g.teams[0]
128
+ gm.team2 = g.teams[1].strip if g.teams[1]
129
+ gm.score1 = g.scores[0].strip if g.scores[0]
130
+ gm.score2 = g.scores[1].strip if g.scores[1]
131
+
132
+ if g.class_src.include? ' ' then
133
+ gm.state = g.class_src[ g.class_src.index(' ')+1, g.class_src.length ].strip
134
+ else
135
+ gm.state = g.class_src.strip
136
+ end
137
+
138
+ gm.tv = $1 if g.extra_src =~ /TV: (.*)/
139
+
140
+ status = g.status.strip if g.status
141
+ time_str = (gm.state == "preview" ? " #{status}" : "")
142
+
143
+ if sport == 'mlb' then
144
+ gm.date = Time.parse(Time.new.strftime('%Y') + g.date_src[2,4] + time_str)
145
+ else
146
+ gm.date = Time.parse(g.date_src[0,8] + time_str)
147
+ end
148
+
149
+ next if not state.empty? and state != gm.state
150
+ games << gm
151
+
152
+ }
153
+
154
+ return games
155
+
156
+ end
157
+
158
+ # Retrieves team information for the team in the given sport
159
+ #
160
+ # Source: http://sports.yahoo.com/<sport>/teams/<team>
161
+ #
162
+ # Team struct has the following keys:
163
+ # team.name # full team name
164
+ # team.standing # current standing
165
+ # team.position # position in the conference
166
+ # team.last5 # previous games results
167
+ # team.next5 # upcoming scheduled games
168
+ # team.live # struct describing in-progress game, if available
169
+ #
170
+ #
171
+ # Games in the last5 and next5 lists have the following keys:
172
+ # game.date # date of game
173
+ # game.team # full team name
174
+ # game.status # score for completed games (e.g. "L 20 - 23") or "preview"
175
+ # game.away # boolean value indicating an away game
176
+ #
177
+ # @param [String] sport sport to list, can be one of ["mlb", "nba", "nfl", "nhl"]
178
+ # @param [String] str 3-letter team code or partial team name
179
+ # @return [OpenStruct] team info
180
+ def self.get_team_stats(sport, str)
181
+
182
+ sport.downcase!
183
+ if sport !~ /^(nba|nhl|nfl|mlb)$/ then
184
+ raise sprintf("Invalid param for 'sport' = '%s'", sport)
185
+ end
186
+
187
+ str.downcase!
188
+ (team, html) = find_team_page(sport, str)
189
+ if html.nil? then
190
+ raise sprintf("Can't find team '%s'", str)
191
+ end
192
+
193
+ info = get_team_info(html)
194
+ last5, next5 = get_scores_and_schedule(html)
195
+ live_game = get_live_game(info.name, html)
196
+
197
+ return OpenStruct.new({:name => info.name,
198
+ :standing => info.standing,
199
+ :position => info.position,
200
+ :last5 => last5,
201
+ :next5 => next5,
202
+ :live => live_game})
203
+ end
204
+
205
+
206
+ private
207
+
208
+ def self.get_team_info(html)
209
+
210
+ info_scraper = Scraper.define do
211
+
212
+ process "div#team-header div.info h1", :name => :text
213
+ process "div#team-header div.info div.stats li.score", :standing => :text
214
+ process "div#team-header div.info div.stats li.position", :position => :text
215
+
216
+ result :name, :standing, :position
217
+ end
218
+
219
+ info_temp = info_scraper.scrape(html)
220
+
221
+ info = OpenStruct.new
222
+ return info if info_temp.nil?
223
+
224
+ info.name = info_temp.name
225
+ info_temp.standing.gsub!(/,/, '')
226
+ info.standing = info_temp.standing
227
+ info.position = info_temp.position
228
+ return info
229
+ end
230
+
231
+ def self.get_scores_and_schedule(html)
232
+
233
+ last5 = []
234
+ next5 = []
235
+
236
+ games_scraper = Scraper.define do
237
+
238
+ array :games
239
+ array :teams
240
+
241
+ process "div#team-schedule-list div.bd table tbody tr", :games => :text
242
+ process "div#team-schedule-list div.bd table tbody tr td.title span", :teams => :text
243
+
244
+ result :games, :teams
245
+
246
+ end
247
+
248
+ games_temp = games_scraper.scrape(html)
249
+
250
+ return [last5, next5] if games_temp.nil?
251
+
252
+ bye = false # bye week support for nfl
253
+ bye_added = false # help us put it in the right place (hopefully)
254
+
255
+ games_temp.games.each_index { |i|
256
+
257
+ info = games_temp.games[i].split("\n").slice(1, 3)
258
+ if info[0] == "Bye"
259
+ # team is in a bye week
260
+ bye = true
261
+ next
262
+ else
263
+ t = (bye ? i - 1 : i)
264
+ team = games_temp.teams[t].strip
265
+ end
266
+
267
+ gm = OpenStruct.new
268
+
269
+ info[1] =~ /(\([\d-]+\))/
270
+ record = $1
271
+ status = info[2]
272
+
273
+ preview = (status !~ /^(W|L)/)
274
+ date_str = (preview ? "#{info[0]} #{status}" : info[0])
275
+ gm.date = Time.parse(date_str)
276
+
277
+ gm.team = "#{team} #{record}".strip
278
+ gm.status = (preview ? "preview" : status)
279
+
280
+ gm.away = (info[1] =~ / at / ? true : false)
281
+
282
+ if preview then
283
+ if bye and not bye_added then
284
+ gmb = OpenStruct.new
285
+ gmb.bye = true
286
+ next5 << gmb
287
+ bye_added = true
288
+ end
289
+ next5 << gm
290
+ else
291
+ if bye and not bye_added then
292
+ gmb = OpenStruct.new
293
+ gmb.bye = true
294
+ last5 << gmb
295
+ bye_added = true
296
+ end
297
+ last5 << gm
298
+ end
299
+
300
+ }
301
+
302
+ return [last5, next5]
303
+ end
304
+
305
+ def self.get_live_game(team, html)
306
+
307
+ return nil if html !~ /In Progress Game/
308
+
309
+ team_scraper = Scraper.define do
310
+
311
+ process_first "td:nth-child(2)", :name => :text
312
+ process_first "td:nth-child(4)", :runs => :text
313
+ process_first "td:nth-child(5)", :hits => :text
314
+ process_first "td:nth-child(6)", :errors => :text
315
+
316
+ result :name, :runs, :hits, :errors
317
+
318
+ end
319
+
320
+ live_scraper = Scraper.define do
321
+ array :teams
322
+ process_first "td.yspscores", :inning => :text
323
+ process "tr.ysptblclbg5", :teams => team_scraper
324
+ result :inning, :teams
325
+ end
326
+
327
+ game = live_scraper.scrape(html)
328
+ game = struct_to_ostruct(game)
329
+ game.inning.strip!
330
+
331
+ # they are at home if team 1 (2nd team) is them
332
+ if game.teams[1].name.split.size > 1 then
333
+ t = game.teams[1].name.split[-1]
334
+ else
335
+ t = game.teams[1].name
336
+ end
337
+
338
+ if team.include? t then
339
+ # home game
340
+ game.home = true
341
+ else
342
+ game.home = false
343
+ end
344
+
345
+ # helpers
346
+ game.away_team = game.teams[0]
347
+ game.home_team = game.teams[1]
348
+ game.delete_field('teams')
349
+
350
+ return game
351
+
352
+ end
353
+
354
+ def self.find_team_page(sport, str)
355
+
356
+ sport.downcase!
357
+ str.downcase!
358
+
359
+ begin
360
+ html = YahooSports.fetchurl("http://sports.yahoo.com/#{sport}/teams/" + str)
361
+ rescue => ex
362
+ puts ex
363
+ return
364
+ end
365
+
366
+ if html !~ %r{<title><MapleRegion id ="page_title_generic"/></title>} then
367
+ # got the right page
368
+ return [str, html]
369
+ end
370
+
371
+ # look for it
372
+ begin
373
+ html = YahooSports.fetchurl("http://sports.yahoo.com/#{sport}/teams")
374
+ rescue => ex
375
+ puts ex
376
+ return
377
+ end
378
+
379
+
380
+ team_scraper = Scraper.define do
381
+ array :teams, :links
382
+ process "table.yspcontent tr.ysprow1", :teams => :text
383
+ process "table.yspcontent tr.ysprow1 a", :links => "@href"
384
+ process "table.yspcontent tr.ysprow2", :teams => :text
385
+ process "table.yspcontent tr.ysprow2 a", :links => "@href"
386
+ result :teams, :links
387
+ end
388
+
389
+ ret = team_scraper.scrape(html)
390
+ return nil if ret.nil?
391
+
392
+ ret.teams.each_index { |i|
393
+ t = ret.teams[i]
394
+ l = ret.links[i].strip.gsub(%r{/$}, "") # strip trailing slash for nfl
395
+ t = YahooSports.strip_tags(t).strip
396
+
397
+ if t == str or t.downcase.include? str then
398
+ # found a matching team
399
+ begin
400
+ html = YahooSports.fetchurl("http://sports.yahoo.com#{l}")
401
+ rescue => ex
402
+ puts ex
403
+ return
404
+ end
405
+ t =~ %r{^/[a-z]+/teams/(.+)$}
406
+ return [$1, html]
407
+ end
408
+ }
409
+
410
+ return nil
411
+
412
+ end
413
+
414
+ def self.struct_to_ostruct(struct)
415
+ hash = {}
416
+ struct.each_pair { |key,val|
417
+ if val.kind_of? Struct then
418
+ val = struct_to_ostruct(val)
419
+ elsif val.kind_of? Array then
420
+ val.map! { |v| v.to_s =~ /struct/ ? struct_to_ostruct(v) : v }
421
+ end
422
+ hash[key] = val
423
+ }
424
+ return OpenStruct.new(hash)
425
+ end
426
+
427
+ end
428
+
429
+ end