yahoo_sports19 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +13 -0
- data/Gemfile.lock +37 -0
- data/History.txt +4 -0
- data/Manifest.txt +17 -0
- data/README.rdoc +62 -0
- data/Rakefile +37 -0
- data/VERSION +1 -0
- data/lib/yahoo_sports.rb +12 -0
- data/lib/yahoo_sports/base.rb +429 -0
- data/lib/yahoo_sports/mlb.rb +54 -0
- data/lib/yahoo_sports/nba.rb +54 -0
- data/lib/yahoo_sports/nfl.rb +54 -0
- data/lib/yahoo_sports/nhl.rb +54 -0
- data/test/test_helper.rb +98 -0
- data/test/test_yahoo_sports.rb +14 -0
- data/test/yahoo_sports/test_base.rb +17 -0
- data/test/yahoo_sports/test_mlb.rb +32 -0
- data/test/yahoo_sports/test_nba.rb +32 -0
- data/test/yahoo_sports/test_nfl.rb +32 -0
- data/test/yahoo_sports/test_nhl.rb +32 -0
- data/yahoo_sports19.gemspec +76 -0
- metadata +182 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
ansi (1.4.3)
|
5
|
+
ffi (1.1.5)
|
6
|
+
git (1.2.5)
|
7
|
+
htmlentities (4.3.1)
|
8
|
+
jeweler (1.8.4)
|
9
|
+
bundler (~> 1.0)
|
10
|
+
git (>= 1.2.5)
|
11
|
+
rake
|
12
|
+
rdoc
|
13
|
+
json (1.7.5)
|
14
|
+
minitest (4.0.0)
|
15
|
+
rake (0.9.2.2)
|
16
|
+
rdoc (3.12)
|
17
|
+
json (~> 1.4)
|
18
|
+
scrapi (2.0.0)
|
19
|
+
tidy_ffi (>= 0.1.2)
|
20
|
+
tidy_ffi (0.1.4)
|
21
|
+
ffi (>= 0.3.5)
|
22
|
+
turn (0.9.6)
|
23
|
+
ansi
|
24
|
+
tzinfo (0.3.33)
|
25
|
+
yard (0.8.2.1)
|
26
|
+
|
27
|
+
PLATFORMS
|
28
|
+
ruby
|
29
|
+
|
30
|
+
DEPENDENCIES
|
31
|
+
htmlentities
|
32
|
+
jeweler (~> 1.8.3)
|
33
|
+
minitest
|
34
|
+
scrapi (~> 2.0)
|
35
|
+
turn
|
36
|
+
tzinfo (~> 0.3.15)
|
37
|
+
yard
|
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.rdoc
|
4
|
+
Rakefile
|
5
|
+
lib/yahoo_sports.rb
|
6
|
+
lib/yahoo_sports/base.rb
|
7
|
+
lib/yahoo_sports/mlb.rb
|
8
|
+
lib/yahoo_sports/nba.rb
|
9
|
+
lib/yahoo_sports/nfl.rb
|
10
|
+
lib/yahoo_sports/nhl.rb
|
11
|
+
test/test_helper.rb
|
12
|
+
test/yahoo_sports/base_test.rb
|
13
|
+
test/yahoo_sports/mlb_test.rb
|
14
|
+
test/yahoo_sports/nba_test.rb
|
15
|
+
test/yahoo_sports/nfl_test.rb
|
16
|
+
test/yahoo_sports/nhl_test.rb
|
17
|
+
test/yahoo_sports_test.rb
|
data/README.rdoc
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
= yahoo_sports
|
2
|
+
|
3
|
+
* http://github.com/chetan/yahoo_sports
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Ruby library for parsing stats from Yahoo! Sports pages. Currently supports
|
8
|
+
MLB, NBA, NFL and NHL stats and info.
|
9
|
+
|
10
|
+
== DOCUMENTATION:
|
11
|
+
|
12
|
+
Documentation is available online at at rdoc.info[http://rdoc.info/projects/chetan/yahoo_sports]
|
13
|
+
|
14
|
+
== FEATURES:
|
15
|
+
|
16
|
+
* Pull previous day and current days games for each sport
|
17
|
+
* Pull specific team information (full name, standing, game schedule)
|
18
|
+
|
19
|
+
== SYNOPSIS:
|
20
|
+
|
21
|
+
require "yahoo_sports"
|
22
|
+
team = YahooSports::NHL.get_team_stats("nyr")
|
23
|
+
team.name # => "New York Rangers"
|
24
|
+
team.standing # => "11-9-1"
|
25
|
+
team.position # => "4th Atlantic"
|
26
|
+
team.last5[0].team # => "Edmonton Oilers (7-7-1)"
|
27
|
+
team.last5[0].status # => "W 4 - 2"
|
28
|
+
|
29
|
+
== REQUIREMENTS:
|
30
|
+
|
31
|
+
* tzinfo
|
32
|
+
* scrapi
|
33
|
+
|
34
|
+
== INSTALL:
|
35
|
+
|
36
|
+
sudo gem install yahoo_sports
|
37
|
+
|
38
|
+
|
39
|
+
== LICENSE:
|
40
|
+
|
41
|
+
(The MIT License)
|
42
|
+
|
43
|
+
Copyright (c) 2011 Pixelcop Research, Inc.
|
44
|
+
|
45
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
46
|
+
a copy of this software and associated documentation files (the
|
47
|
+
'Software'), to deal in the Software without restriction, including
|
48
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
49
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
50
|
+
permit persons to whom the Software is furnished to do so, subject to
|
51
|
+
the following conditions:
|
52
|
+
|
53
|
+
The above copyright notice and this permission notice shall be
|
54
|
+
included in all copies or substantial portions of the Software.
|
55
|
+
|
56
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
57
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
58
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
59
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
60
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
61
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
62
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
require 'jeweler'
|
14
|
+
|
15
|
+
Jeweler::Tasks.new do |gemspec|
|
16
|
+
gemspec.name = "yahoo_sports19"
|
17
|
+
gemspec.summary = "Ruby library for parsing stats from Yahoo! Sports pages"
|
18
|
+
gemspec.description = "Ruby library for parsing stats from Yahoo! Sports pages. Currently supports MLB, NBA, NFL and NHL stats and info."
|
19
|
+
gemspec.email = "chetan@pixelcop.net"
|
20
|
+
gemspec.homepage = "http://github.com/chetan/yahoo_sports"
|
21
|
+
gemspec.authors = ["Chetan Sarva"]
|
22
|
+
end
|
23
|
+
Jeweler::RubygemsDotOrgTasks.new
|
24
|
+
|
25
|
+
require "rake/testtask"
|
26
|
+
desc "Run unit tests"
|
27
|
+
Rake::TestTask.new("test") { |t|
|
28
|
+
t.libs << 'lib' << 'test'
|
29
|
+
t.pattern = "./test/**/*.rb"
|
30
|
+
t.verbose = false
|
31
|
+
t.warning = false
|
32
|
+
}
|
33
|
+
|
34
|
+
task :default => :test
|
35
|
+
|
36
|
+
require "yard"
|
37
|
+
YARD::Rake::YardocTask.new("docs")
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.1
|
data/lib/yahoo_sports.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
|
4
|
+
module YahooSports
|
5
|
+
|
6
|
+
autoload :Base, "yahoo_sports/base"
|
7
|
+
autoload :MLB, "yahoo_sports/mlb"
|
8
|
+
autoload :NBA, "yahoo_sports/nba"
|
9
|
+
autoload :NFL, "yahoo_sports/nfl"
|
10
|
+
autoload :NHL, "yahoo_sports/nhl"
|
11
|
+
|
12
|
+
end
|
@@ -0,0 +1,429 @@
|
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'tzinfo'
|
4
|
+
|
5
|
+
require 'net/http'
|
6
|
+
require 'scrapi'
|
7
|
+
require 'ostruct'
|
8
|
+
require 'htmlentities'
|
9
|
+
|
10
|
+
module YahooSports
|
11
|
+
|
12
|
+
# Fetches the given URL and returns the body
|
13
|
+
#
|
14
|
+
# @param [String] URL
|
15
|
+
# @return [String] contents of response body
|
16
|
+
def self.fetchurl(url)
|
17
|
+
# puts "FETCHING: '#{url}'"
|
18
|
+
return Net::HTTP.get_response(URI.parse(URI.escape(url))).body
|
19
|
+
end
|
20
|
+
|
21
|
+
# Strip HTML tags from the given string. Also performs some common entity
|
22
|
+
# substitutions.
|
23
|
+
#
|
24
|
+
# List of entity codes:
|
25
|
+
# *
|
26
|
+
# * &
|
27
|
+
# * "
|
28
|
+
# * <
|
29
|
+
# * >
|
30
|
+
# * &ellip;
|
31
|
+
# * '
|
32
|
+
#
|
33
|
+
# @param [String] html text to be filtered
|
34
|
+
# @return [String] original string with HTML tags filtered out and entities replaced
|
35
|
+
def self.strip_tags(html)
|
36
|
+
|
37
|
+
HTMLEntities.new.decode(
|
38
|
+
html.gsub(/<.+?>/,'').
|
39
|
+
gsub(/ /,' ').
|
40
|
+
gsub(/&/,'&').
|
41
|
+
gsub(/"/,'"').
|
42
|
+
gsub(/</,'<').
|
43
|
+
gsub(/>/,'>').
|
44
|
+
gsub(/&ellip;/,'...').
|
45
|
+
gsub(/'/, "'").
|
46
|
+
gsub(/<br *\/>/m, '')
|
47
|
+
).strip
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
class Base
|
52
|
+
|
53
|
+
# Get the scoreboard games for the given sport. Includes recently completed,
|
54
|
+
# live and upcoming games.
|
55
|
+
#
|
56
|
+
# Source: http://sports.yahoo.com/<sport>
|
57
|
+
#
|
58
|
+
# Game struct has the following keys:
|
59
|
+
# game.date # date of game; includes time if preview
|
60
|
+
# game.team1 # visiting team
|
61
|
+
# game.team2 # home team
|
62
|
+
# game.score1 # team1's score, if live or final
|
63
|
+
# game.score2 # team2's score, if live or final
|
64
|
+
# game.state # live, final or preview
|
65
|
+
# game.tv # TV station showing the game, if preview and available
|
66
|
+
#
|
67
|
+
# Example:
|
68
|
+
# #<OpenStruct state="final", score1="34", date=Thu Nov 26 00:00:00 -0500 2009, score2="12", team1="Green Bay", team2="Detroit">
|
69
|
+
#
|
70
|
+
#
|
71
|
+
# @param [String] sport sport to list, can be one of ["mlb", "nba", "nfl", "nhl"]
|
72
|
+
# @param [String] state Optionally filter for the given state ("live", "final", or "preview")
|
73
|
+
# @return [Array<OpenStruct>] list of games
|
74
|
+
def self.get_homepage_games(sport, state = "")
|
75
|
+
|
76
|
+
sport.downcase!
|
77
|
+
if sport !~ /^(nba|nhl|nfl|mlb)$/ then
|
78
|
+
raise sprintf("Invalid param for 'sport' = '%s'", sport)
|
79
|
+
end
|
80
|
+
|
81
|
+
state.downcase! if not state.empty?
|
82
|
+
if not state.empty? and state !~ /^(live|final|preview)$/ then
|
83
|
+
raise sprintf("Invalid param for 'state' = '%s'", state)
|
84
|
+
end
|
85
|
+
|
86
|
+
html = YahooSports.fetchurl("http://sports.yahoo.com/#{sport}/proxy/html/scorethin")
|
87
|
+
if not html then
|
88
|
+
raise 'Error fetching url'
|
89
|
+
end
|
90
|
+
|
91
|
+
sports_game = Scraper.define do
|
92
|
+
|
93
|
+
array :teams
|
94
|
+
array :scores
|
95
|
+
|
96
|
+
process "li.odd, li.even, li.live", :date_src => "@id"
|
97
|
+
process "li.odd, li.even, li.live", :class_src => "@class"
|
98
|
+
process "li.link-box", :extra_src => :text
|
99
|
+
|
100
|
+
process "td.team>a", :teams => :text
|
101
|
+
process "td.score", :scores => :text
|
102
|
+
|
103
|
+
process "li.status>a", :status => :text
|
104
|
+
|
105
|
+
|
106
|
+
result :date_src, :teams, :scores, :status, :class_src, :extra_src
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
sports = Scraper.define do
|
111
|
+
|
112
|
+
array :games
|
113
|
+
|
114
|
+
process "ul.game-list>li", :games => sports_game
|
115
|
+
|
116
|
+
result :games
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
games_temp = sports.scrape(html)
|
121
|
+
games = []
|
122
|
+
return games if games_temp.nil?
|
123
|
+
|
124
|
+
games_temp.each { |g|
|
125
|
+
|
126
|
+
gm = OpenStruct.new
|
127
|
+
gm.team1 = g.teams[0].strip if g.teams[0]
|
128
|
+
gm.team2 = g.teams[1].strip if g.teams[1]
|
129
|
+
gm.score1 = g.scores[0].strip if g.scores[0]
|
130
|
+
gm.score2 = g.scores[1].strip if g.scores[1]
|
131
|
+
|
132
|
+
if g.class_src.include? ' ' then
|
133
|
+
gm.state = g.class_src[ g.class_src.index(' ')+1, g.class_src.length ].strip
|
134
|
+
else
|
135
|
+
gm.state = g.class_src.strip
|
136
|
+
end
|
137
|
+
|
138
|
+
gm.tv = $1 if g.extra_src =~ /TV: (.*)/
|
139
|
+
|
140
|
+
status = g.status.strip if g.status
|
141
|
+
time_str = (gm.state == "preview" ? " #{status}" : "")
|
142
|
+
|
143
|
+
if sport == 'mlb' then
|
144
|
+
gm.date = Time.parse(Time.new.strftime('%Y') + g.date_src[2,4] + time_str)
|
145
|
+
else
|
146
|
+
gm.date = Time.parse(g.date_src[0,8] + time_str)
|
147
|
+
end
|
148
|
+
|
149
|
+
next if not state.empty? and state != gm.state
|
150
|
+
games << gm
|
151
|
+
|
152
|
+
}
|
153
|
+
|
154
|
+
return games
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
# Retrieves team information for the team in the given sport
|
159
|
+
#
|
160
|
+
# Source: http://sports.yahoo.com/<sport>/teams/<team>
|
161
|
+
#
|
162
|
+
# Team struct has the following keys:
|
163
|
+
# team.name # full team name
|
164
|
+
# team.standing # current standing
|
165
|
+
# team.position # position in the conference
|
166
|
+
# team.last5 # previous games results
|
167
|
+
# team.next5 # upcoming scheduled games
|
168
|
+
# team.live # struct describing in-progress game, if available
|
169
|
+
#
|
170
|
+
#
|
171
|
+
# Games in the last5 and next5 lists have the following keys:
|
172
|
+
# game.date # date of game
|
173
|
+
# game.team # full team name
|
174
|
+
# game.status # score for completed games (e.g. "L 20 - 23") or "preview"
|
175
|
+
# game.away # boolean value indicating an away game
|
176
|
+
#
|
177
|
+
# @param [String] sport sport to list, can be one of ["mlb", "nba", "nfl", "nhl"]
|
178
|
+
# @param [String] str 3-letter team code or partial team name
|
179
|
+
# @return [OpenStruct] team info
|
180
|
+
def self.get_team_stats(sport, str)
|
181
|
+
|
182
|
+
sport.downcase!
|
183
|
+
if sport !~ /^(nba|nhl|nfl|mlb)$/ then
|
184
|
+
raise sprintf("Invalid param for 'sport' = '%s'", sport)
|
185
|
+
end
|
186
|
+
|
187
|
+
str.downcase!
|
188
|
+
(team, html) = find_team_page(sport, str)
|
189
|
+
if html.nil? then
|
190
|
+
raise sprintf("Can't find team '%s'", str)
|
191
|
+
end
|
192
|
+
|
193
|
+
info = get_team_info(html)
|
194
|
+
last5, next5 = get_scores_and_schedule(html)
|
195
|
+
live_game = get_live_game(info.name, html)
|
196
|
+
|
197
|
+
return OpenStruct.new({:name => info.name,
|
198
|
+
:standing => info.standing,
|
199
|
+
:position => info.position,
|
200
|
+
:last5 => last5,
|
201
|
+
:next5 => next5,
|
202
|
+
:live => live_game})
|
203
|
+
end
|
204
|
+
|
205
|
+
|
206
|
+
private
|
207
|
+
|
208
|
+
def self.get_team_info(html)
|
209
|
+
|
210
|
+
info_scraper = Scraper.define do
|
211
|
+
|
212
|
+
process "div#team-header div.info h1", :name => :text
|
213
|
+
process "div#team-header div.info div.stats li.score", :standing => :text
|
214
|
+
process "div#team-header div.info div.stats li.position", :position => :text
|
215
|
+
|
216
|
+
result :name, :standing, :position
|
217
|
+
end
|
218
|
+
|
219
|
+
info_temp = info_scraper.scrape(html)
|
220
|
+
|
221
|
+
info = OpenStruct.new
|
222
|
+
return info if info_temp.nil?
|
223
|
+
|
224
|
+
info.name = info_temp.name
|
225
|
+
info_temp.standing.gsub!(/,/, '')
|
226
|
+
info.standing = info_temp.standing
|
227
|
+
info.position = info_temp.position
|
228
|
+
return info
|
229
|
+
end
|
230
|
+
|
231
|
+
def self.get_scores_and_schedule(html)
|
232
|
+
|
233
|
+
last5 = []
|
234
|
+
next5 = []
|
235
|
+
|
236
|
+
games_scraper = Scraper.define do
|
237
|
+
|
238
|
+
array :games
|
239
|
+
array :teams
|
240
|
+
|
241
|
+
process "div#team-schedule-list div.bd table tbody tr", :games => :text
|
242
|
+
process "div#team-schedule-list div.bd table tbody tr td.title span", :teams => :text
|
243
|
+
|
244
|
+
result :games, :teams
|
245
|
+
|
246
|
+
end
|
247
|
+
|
248
|
+
games_temp = games_scraper.scrape(html)
|
249
|
+
|
250
|
+
return [last5, next5] if games_temp.nil?
|
251
|
+
|
252
|
+
bye = false # bye week support for nfl
|
253
|
+
bye_added = false # help us put it in the right place (hopefully)
|
254
|
+
|
255
|
+
games_temp.games.each_index { |i|
|
256
|
+
|
257
|
+
info = games_temp.games[i].split("\n").slice(1, 3)
|
258
|
+
if info[0] == "Bye"
|
259
|
+
# team is in a bye week
|
260
|
+
bye = true
|
261
|
+
next
|
262
|
+
else
|
263
|
+
t = (bye ? i - 1 : i)
|
264
|
+
team = games_temp.teams[t].strip
|
265
|
+
end
|
266
|
+
|
267
|
+
gm = OpenStruct.new
|
268
|
+
|
269
|
+
info[1] =~ /(\([\d-]+\))/
|
270
|
+
record = $1
|
271
|
+
status = info[2]
|
272
|
+
|
273
|
+
preview = (status !~ /^(W|L)/)
|
274
|
+
date_str = (preview ? "#{info[0]} #{status}" : info[0])
|
275
|
+
gm.date = Time.parse(date_str)
|
276
|
+
|
277
|
+
gm.team = "#{team} #{record}".strip
|
278
|
+
gm.status = (preview ? "preview" : status)
|
279
|
+
|
280
|
+
gm.away = (info[1] =~ / at / ? true : false)
|
281
|
+
|
282
|
+
if preview then
|
283
|
+
if bye and not bye_added then
|
284
|
+
gmb = OpenStruct.new
|
285
|
+
gmb.bye = true
|
286
|
+
next5 << gmb
|
287
|
+
bye_added = true
|
288
|
+
end
|
289
|
+
next5 << gm
|
290
|
+
else
|
291
|
+
if bye and not bye_added then
|
292
|
+
gmb = OpenStruct.new
|
293
|
+
gmb.bye = true
|
294
|
+
last5 << gmb
|
295
|
+
bye_added = true
|
296
|
+
end
|
297
|
+
last5 << gm
|
298
|
+
end
|
299
|
+
|
300
|
+
}
|
301
|
+
|
302
|
+
return [last5, next5]
|
303
|
+
end
|
304
|
+
|
305
|
+
def self.get_live_game(team, html)
|
306
|
+
|
307
|
+
return nil if html !~ /In Progress Game/
|
308
|
+
|
309
|
+
team_scraper = Scraper.define do
|
310
|
+
|
311
|
+
process_first "td:nth-child(2)", :name => :text
|
312
|
+
process_first "td:nth-child(4)", :runs => :text
|
313
|
+
process_first "td:nth-child(5)", :hits => :text
|
314
|
+
process_first "td:nth-child(6)", :errors => :text
|
315
|
+
|
316
|
+
result :name, :runs, :hits, :errors
|
317
|
+
|
318
|
+
end
|
319
|
+
|
320
|
+
live_scraper = Scraper.define do
|
321
|
+
array :teams
|
322
|
+
process_first "td.yspscores", :inning => :text
|
323
|
+
process "tr.ysptblclbg5", :teams => team_scraper
|
324
|
+
result :inning, :teams
|
325
|
+
end
|
326
|
+
|
327
|
+
game = live_scraper.scrape(html)
|
328
|
+
game = struct_to_ostruct(game)
|
329
|
+
game.inning.strip!
|
330
|
+
|
331
|
+
# they are at home if team 1 (2nd team) is them
|
332
|
+
if game.teams[1].name.split.size > 1 then
|
333
|
+
t = game.teams[1].name.split[-1]
|
334
|
+
else
|
335
|
+
t = game.teams[1].name
|
336
|
+
end
|
337
|
+
|
338
|
+
if team.include? t then
|
339
|
+
# home game
|
340
|
+
game.home = true
|
341
|
+
else
|
342
|
+
game.home = false
|
343
|
+
end
|
344
|
+
|
345
|
+
# helpers
|
346
|
+
game.away_team = game.teams[0]
|
347
|
+
game.home_team = game.teams[1]
|
348
|
+
game.delete_field('teams')
|
349
|
+
|
350
|
+
return game
|
351
|
+
|
352
|
+
end
|
353
|
+
|
354
|
+
def self.find_team_page(sport, str)
|
355
|
+
|
356
|
+
sport.downcase!
|
357
|
+
str.downcase!
|
358
|
+
|
359
|
+
begin
|
360
|
+
html = YahooSports.fetchurl("http://sports.yahoo.com/#{sport}/teams/" + str)
|
361
|
+
rescue => ex
|
362
|
+
puts ex
|
363
|
+
return
|
364
|
+
end
|
365
|
+
|
366
|
+
if html !~ %r{<title><MapleRegion id ="page_title_generic"/></title>} then
|
367
|
+
# got the right page
|
368
|
+
return [str, html]
|
369
|
+
end
|
370
|
+
|
371
|
+
# look for it
|
372
|
+
begin
|
373
|
+
html = YahooSports.fetchurl("http://sports.yahoo.com/#{sport}/teams")
|
374
|
+
rescue => ex
|
375
|
+
puts ex
|
376
|
+
return
|
377
|
+
end
|
378
|
+
|
379
|
+
|
380
|
+
team_scraper = Scraper.define do
|
381
|
+
array :teams, :links
|
382
|
+
process "table.yspcontent tr.ysprow1", :teams => :text
|
383
|
+
process "table.yspcontent tr.ysprow1 a", :links => "@href"
|
384
|
+
process "table.yspcontent tr.ysprow2", :teams => :text
|
385
|
+
process "table.yspcontent tr.ysprow2 a", :links => "@href"
|
386
|
+
result :teams, :links
|
387
|
+
end
|
388
|
+
|
389
|
+
ret = team_scraper.scrape(html)
|
390
|
+
return nil if ret.nil?
|
391
|
+
|
392
|
+
ret.teams.each_index { |i|
|
393
|
+
t = ret.teams[i]
|
394
|
+
l = ret.links[i].strip.gsub(%r{/$}, "") # strip trailing slash for nfl
|
395
|
+
t = YahooSports.strip_tags(t).strip
|
396
|
+
|
397
|
+
if t == str or t.downcase.include? str then
|
398
|
+
# found a matching team
|
399
|
+
begin
|
400
|
+
html = YahooSports.fetchurl("http://sports.yahoo.com#{l}")
|
401
|
+
rescue => ex
|
402
|
+
puts ex
|
403
|
+
return
|
404
|
+
end
|
405
|
+
t =~ %r{^/[a-z]+/teams/(.+)$}
|
406
|
+
return [$1, html]
|
407
|
+
end
|
408
|
+
}
|
409
|
+
|
410
|
+
return nil
|
411
|
+
|
412
|
+
end
|
413
|
+
|
414
|
+
def self.struct_to_ostruct(struct)
|
415
|
+
hash = {}
|
416
|
+
struct.each_pair { |key,val|
|
417
|
+
if val.kind_of? Struct then
|
418
|
+
val = struct_to_ostruct(val)
|
419
|
+
elsif val.kind_of? Array then
|
420
|
+
val.map! { |v| v.to_s =~ /struct/ ? struct_to_ostruct(v) : v }
|
421
|
+
end
|
422
|
+
hash[key] = val
|
423
|
+
}
|
424
|
+
return OpenStruct.new(hash)
|
425
|
+
end
|
426
|
+
|
427
|
+
end
|
428
|
+
|
429
|
+
end
|