espn_scraper 1.3.1 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e71f14160cc29d944f5860b4eaa3f983d353890
4
- data.tar.gz: 75353026bf2aa879d8151a4c1455314e69af8c78
3
+ metadata.gz: 92cd55c7052685e30d2dbc52d24a058cd7e7ef46
4
+ data.tar.gz: d4751ef0577a07dd87e2721af60dfd5041129656
5
5
  SHA512:
6
- metadata.gz: 5f06578dd6ca1eeefa13910a35a03e84a4781499de5d478c924707db90a476780fb25a9276a6ede4d3b66e1135c31dd1b0c105ad3bd37cc213cd15ae050720f6
7
- data.tar.gz: d2b1eda63a460fc734f7280ae171ce0e9260612d1513bc6507dda595d8d484d038a7dc3df213c5066fcfb51151799156cd81b51a07a75bd204cbec1d99c267e8
6
+ metadata.gz: ac9fcbc028381b407c01ecde9ea3a47d75658c6ca582bdb6024c03e7333e9bd0ec41e5d27a48664ce331a48ea6d541e150918eb13c74b8d9d7d2f201c3d4ef25
7
+ data.tar.gz: bac030d2c801f9749a85be6d58986062dc79f94d27d7110c1cd006f1bc23d43f64c7320881329780dca7c9f674815e81bc1b9f54762ab459f12b089ac39083f9
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # ESPN Scraper
2
2
 
3
- ESPN Scraper is a simple gem for scraping teams and scores from `ESPN`'s website. Please note that `ESPN` is not involved with this gem or me in any way. I chose `ESPN` because it is a leader in sports statistics and has a robust website.
3
+ ESPN Scraper is a simple gem for scraping teams and scores from `ESPN`'s website. Please note that `ESPN` is not involved with this gem or me in any way. I chose `ESPN` because it is a leader in sports statistics and has a robust website.
4
4
 
5
5
  ```ruby
6
6
  ESPN.responding?
@@ -55,11 +55,11 @@ You can get the teams in each league by acronym. It returns a hash of each divis
55
55
  ```ruby
56
56
  ESPN.get_teams_in('nba')
57
57
  # => {
58
- # "atlantic"=> [
59
- # { :name => "Boston Celtics", :data_name => "bos" },
60
- # { :name => "Brooklyn Nets", :data_name => "bkn" },
61
- # { :name => "New York Knicks", :data_name => "ny" },
62
- # { :name => "Philadelphia 76ers", :data_name => "phi" },
58
+ # "atlantic"=> [
59
+ # { :name => "Boston Celtics", :data_name => "bos" },
60
+ # { :name => "Brooklyn Nets", :data_name => "bkn" },
61
+ # { :name => "New York Knicks", :data_name => "ny" },
62
+ # { :name => "Philadelphia 76ers", :data_name => "phi" },
63
63
  # { :name => "Toronto Raptors", :data_name => "tor" }
64
64
  # ]
65
65
  # "pacific" => ...
@@ -121,7 +121,7 @@ require 'espn_scraper'
121
121
 
122
122
  ## Contributing
123
123
 
124
- Please report back if something breaks on you!
124
+ Please report back if something breaks on you!
125
125
 
126
126
  Also please let me know if any of the data names get outdated. For instance a bunch of NFL data names were recently changed. You can make fixes temporarily with the following:
127
127
 
@@ -129,7 +129,19 @@ Also please let me know if any of the data names get outdated. For instance a bu
129
129
  ESPN::DATA_NAME_FIXES['nfl']['gnb'] = 'gb'
130
130
  ```
131
131
 
132
- Future plans:
132
+ Running tests:
133
+ ```
134
+ rake test
135
+ ```
136
+
137
+ Re-building the gem and installing locally:
138
+ ```
139
+ gem build espn_scraper.gemspec
140
+ gem uninstall espn_scraper
141
+ gem install espn_scraper-x.x.x.gem
142
+ ```
143
+
144
+ Future plans:
133
145
  - Get start and end dates of a season
134
146
 
135
147
  ### Thank You
data/Rakefile CHANGED
@@ -1,4 +1,3 @@
1
- require "bundler/gem_tasks"
2
1
  require 'rake/testtask'
3
2
 
4
3
  Rake::TestTask.new do |t|
@@ -3,11 +3,11 @@ require 'nokogiri'
3
3
 
4
4
  module ESPN
5
5
  class << self
6
-
6
+
7
7
  def responding?
8
- HTTParty.get('http://espn.go.com/').code == 200
8
+ HTTParty.get('http://www.espn.com').code == 200
9
9
  end
10
-
10
+
11
11
  def down?
12
12
  !responding?
13
13
  end
@@ -15,9 +15,8 @@ module ESPN
15
15
  # Ex: ESPN.url('scores')
16
16
  # ESPN.url('teams', 'nba')
17
17
  def url(*path)
18
- subdomain = (path.first == 'scores') ? path.shift : nil
19
- domain = [subdomain, 'espn', 'go', 'com'].compact.join('.')
20
- ['http:/', domain, *path].join('/')
18
+ path.shift if path.first == 'scores'
19
+ ['http://www.espn.com', *path].join('/')
21
20
  end
22
21
 
23
22
  # Returns Nokogiri HTML document
@@ -29,22 +28,22 @@ module ESPN
29
28
  Nokogiri::HTML(response.body)
30
29
  else
31
30
  raise ArgumentError, error_message(url, path)
32
- end
31
+ end
33
32
  end
34
-
33
+
35
34
  def dasherize(str)
36
35
  str.strip.downcase.gsub(/\s+/, '-')
37
36
  end
38
-
39
-
37
+
38
+
40
39
  private
41
-
42
-
43
-
40
+
41
+
42
+
44
43
  def error_message(url, path)
45
44
  "The url #{url} from the path #{path} did not return a valid page."
46
45
  end
47
-
46
+
48
47
  end
49
48
  end
50
49
 
@@ -4,14 +4,14 @@ require 'json'
4
4
 
5
5
  module ESPN
6
6
  SEASONS = {
7
- preseason: 1,
8
- regular_season: 2,
9
- postseason: 3
7
+ preseason: 1,
8
+ regular_season: 2,
9
+ postseason: 3
10
10
  }
11
11
 
12
- mlb_ignores = %w(
13
- florida-state u-of-south-florida georgetown fla.-southern northeastern boston-college
14
- miami-florida florida-intl canada hanshin yomiuri sacramento springfield corpus-christi
12
+ mlb_ignores = %w(
13
+ florida-state u-of-south-florida georgetown fla.-southern northeastern boston-college
14
+ miami-florida florida-intl canada hanshin yomiuri sacramento springfield corpus-christi
15
15
  round-rock carolina manatee-cc mexico cincinnati-(f) atlanta-(f) frisco toledo norfolk
16
16
  fort-myers tampa-bay-(f) nl-all-stars al-all-stars
17
17
  )
@@ -23,7 +23,7 @@ module ESPN
23
23
  team-chara team-alfredsson
24
24
  )
25
25
 
26
- ncf_ignores = %w( paul-quinn san-diego-christian ferris-st notre-dame-college chaminade
26
+ ncf_ignores = %w( paul-quinn san-diego-christian ferris-st notre-dame-college chaminade
27
27
  w-new-mexico n-new-mexico tx-a&m-commerce nw-oklahoma-st )
28
28
 
29
29
  IGNORED_TEAMS = (mlb_ignores + nhl_ignores + nba_ignores + ncf_ignores).inject({}) do |h, team|
@@ -31,29 +31,29 @@ module ESPN
31
31
  end
32
32
 
33
33
  DATA_NAME_EXCEPTIONS = {
34
- 'nets' => 'bkn',
35
- 'supersonics' => 'okc',
36
- 'hornets' => 'no',
34
+ 'nets' => 'bkn',
35
+ 'supersonics' => 'okc',
36
+ 'hornets' => 'no',
37
37
 
38
- 'marlins' => 'mia'
38
+ 'marlins' => 'mia'
39
39
  }.merge(IGNORED_TEAMS)
40
40
 
41
41
  DATA_NAME_FIXES = {
42
- 'nfl' => {
43
- 'nwe' => 'ne',
44
- 'kan' => 'kc',
45
- 'was' => 'wsh',
46
- 'nor' => 'no',
47
- 'gnb' => 'gb',
48
- 'sfo' => 'sf',
49
- 'tam' => 'tb',
50
- 'sdg' => 'sd'
51
- },
52
- 'mlb' => {},
53
- 'nba' => {},
54
- 'nhl' => {},
55
- 'ncf' => {},
56
- 'ncb' => {}
42
+ 'nfl' => {
43
+ 'nwe' => 'ne',
44
+ 'kan' => 'kc',
45
+ 'was' => 'wsh',
46
+ 'nor' => 'no',
47
+ 'gnb' => 'gb',
48
+ 'sfo' => 'sf',
49
+ 'tam' => 'tb',
50
+ 'sdg' => 'sd'
51
+ },
52
+ 'mlb' => {},
53
+ 'nba' => {},
54
+ 'nhl' => {},
55
+ 'ncf' => {},
56
+ 'ncb' => {}
57
57
  }
58
58
 
59
59
  # Example output:
@@ -68,13 +68,6 @@ module ESPN
68
68
 
69
69
  class << self
70
70
 
71
- def get_superbowl_score
72
- markup = Scores.markup_from_superbowl
73
- scores = Scores.home_away_parse(markup, false)
74
- add_league_and_fixes(scores, 'nfl')
75
- scores
76
- end
77
-
78
71
  def get_nfl_scores(year, week)
79
72
  markup = Scores.markup_from_year_and_week('nfl', year, week)
80
73
  scores = Scores.home_away_parse(markup)
@@ -84,7 +77,7 @@ module ESPN
84
77
 
85
78
  def get_mlb_scores(date)
86
79
  markup = Scores.markup_from_date('mlb', date)
87
- scores = Scores.home_away_parse(markup)
80
+ scores = Scores.home_away_parse(markup, date)
88
81
  scores.each { |report| report[:league] = 'mlb' }
89
82
  scores
90
83
  end
@@ -104,7 +97,7 @@ module ESPN
104
97
  end
105
98
 
106
99
  def get_ncf_scores(year, week)
107
- markup = Scores.markup_from_year_and_week('college-football', year, week)
100
+ markup = Scores.markup_from_year_and_week('college-football', year, week, 80)
108
101
  scores = Scores.ncf_parse(markup)
109
102
  scores.each { |report| report[:league] = 'college-football' }
110
103
  scores
@@ -112,32 +105,15 @@ module ESPN
112
105
 
113
106
  alias_method :get_college_football_scores, :get_ncf_scores
114
107
 
115
- def get_ncb_scores(date, conference_id=nil, final_only=true)
116
- if conference_id
117
- markup = Scores.markup_from_date_and_conference('ncb', date, conference_id)
118
- else
119
- markup = Scores.markup_from_date('ncb', date)
120
- end
121
-
122
- scores = Scores.home_away_parse(markup, final_only)
123
-
124
- scores.each do |report|
125
- report[:league] ||= 'mens-college-basketball'
126
- report[:game_date] ||= date
127
- end
128
-
108
+ def get_ncb_scores(date, conference_id)
109
+ markup = Scores.markup_from_date_and_conference('ncb', date, conference_id)
110
+ scores = Scores.home_away_parse(markup, date)
111
+ scores.each { |report| report.merge! league: 'mens-college-basketball', game_date: date }
129
112
  scores
130
113
  end
131
114
 
132
115
  alias_method :get_college_basketball_scores, :get_ncb_scores
133
116
 
134
- def get_ncb_abbreviations(date)
135
- markup = Scores.markup_from_date('ncb', date)
136
-
137
- teams = Scores.team_abbreviation_parse(markup)
138
- teams
139
- end
140
-
141
117
  def add_league_and_fixes(scores, league)
142
118
  scores.each do |report|
143
119
  report[:league] = league
@@ -156,12 +132,12 @@ module ESPN
156
132
 
157
133
  # Get Markup
158
134
 
159
- def markup_from_superbowl
160
- ESPN.get 'scores', 'nfl', "scoreboard/_/group/80/year/#{Time.now.year - 1}/seasontype/3/week/5"
161
- end
162
-
163
- def markup_from_year_and_week(league, year, week)
164
- ESPN.get 'scores', league, "scoreboard/_/group/80/year/#{year}/seasontype/2/week/#{week}"
135
+ def markup_from_year_and_week(league, year, week, group=nil)
136
+ if group
137
+ ESPN.get 'scores', league, "scoreboard/_/group/#{group}/year/#{year}/seasontype/2/week/#{week}"
138
+ else
139
+ ESPN.get 'scores', league, "scoreboard/_/year/#{year}/seasontype/2/week/#{week}"
140
+ end
165
141
  end
166
142
 
167
143
  def markup_from_date(league, date)
@@ -176,7 +152,7 @@ module ESPN
176
152
 
177
153
  # parsing strategies
178
154
 
179
- def home_away_parse(doc, final=true)
155
+ def home_away_parse(doc, date=nil)
180
156
  scores = []
181
157
  games = []
182
158
  espn_regex = /window\.espn\.scoreboardData \t= (\{.*?\});/
@@ -190,11 +166,16 @@ module ESPN
190
166
  games.each do |game|
191
167
  # Game must be regular or postseason
192
168
  next unless game['season']['type'] == SEASONS[:regular_season] || game['season']['type'] == SEASONS[:postseason]
169
+
170
+ # Game must not be suspended if it was supposed to start on the query date.
171
+ # This prevents fetching scores for suspended games which are not yet completed.
172
+ game_start = DateTime.parse(game['date']).to_time.utc + Time.zone_offset('EDT')
173
+ next if date && game['competitions'][0]['wasSuspended'] && game_start.to_date == date
174
+
193
175
  score = {}
194
176
  competition = game['competitions'].first
195
-
196
177
  # Score must be final
197
- if !final || competition['status']['type']['detail'] =~ /^Final/
178
+ if competition['status']['type']['detail'] =~ /^Final/
198
179
  competition['competitors'].each do |competitor|
199
180
  if competitor['homeAway'] == 'home'
200
181
  score[:home_team] = competitor['team']['abbreviation'].downcase
@@ -205,34 +186,12 @@ module ESPN
205
186
  end
206
187
  end
207
188
  score[:game_date] = DateTime.parse(game['date'])
208
- score[:status] = competition['status']['type']['detail']
209
189
  scores << score
210
190
  end
211
191
  end
212
192
  scores
213
193
  end
214
194
 
215
- def team_abbreviation_parse(doc)
216
- games = []
217
- teams = {}
218
- espn_regex = /window\.espn\.scoreboardData \t= (\{.*?\});/
219
- doc.xpath("//script").each do |script_section|
220
- if script_section.content =~ espn_regex
221
- espn_data = JSON.parse(espn_regex.match(script_section.content)[1])
222
- games = espn_data['events']
223
- break
224
- end
225
- end
226
- games.each do |game|
227
- competition = game['competitions'].first
228
- competition['competitors'].each do |competitor|
229
- teams[competitor['team']['displayName']] = competitor['team']['abbreviation'].downcase
230
- end
231
- end
232
-
233
- teams
234
- end
235
-
236
195
  def ncf_parse(doc)
237
196
  scores = []
238
197
  games = []
@@ -256,7 +215,7 @@ module ESPN
256
215
  if competitor['homeAway'] == 'home'
257
216
  score[:home_team] = competitor['team']['id'].downcase
258
217
  score[:home_score] = competitor['score'].to_i
259
- else
218
+ else
260
219
  score[:away_team] = competitor['team']['id'].downcase
261
220
  score[:away_score] = competitor['score'].to_i
262
221
  end
@@ -308,4 +267,4 @@ module ESPN
308
267
 
309
268
  end
310
269
  end
311
- end
270
+ end
@@ -1,3 +1,3 @@
1
1
  module ESPN
2
- VERSION = '1.3.1'
2
+ VERSION = '1.5.0'
3
3
  end
@@ -1,34 +1,34 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class BoilerplateTest < EspnTest
4
-
4
+
5
5
  test 'espn is up' do
6
6
  assert ESPN.responding?
7
7
  assert !ESPN.down?
8
8
  end
9
-
9
+
10
10
  test 'paths are working' do
11
- assert_equal 'http://scores.espn.go.com', ESPN.url('scores')
12
- assert_equal 'http://espn.go.com/nba/teams', ESPN.url('nba', 'teams')
11
+ assert_equal 'http://www.espn.com', ESPN.url('scores')
12
+ assert_equal 'http://www.espn.com/nba/teams', ESPN.url('nba', 'teams')
13
13
  end
14
-
14
+
15
15
  test 'error message works' do
16
16
  assert_raises(ArgumentError) do
17
17
  ESPN.get('bad-api-keyword')
18
18
  end
19
19
  end
20
-
20
+
21
21
  test 'get pages is working' do
22
22
  assert ESPN.get('scores')
23
23
  end
24
-
24
+
25
25
  test 'dasherize strings' do
26
26
  assert_equal 'string-is-dashed', ESPN.send(:dasherize, 'String is dashed')
27
27
  end
28
-
28
+
29
29
  test 'leagues' do
30
30
  leagues = 'nfl mlb nba nhl ncf ncb'.split
31
31
  assert_equal leagues, ESPN.leagues
32
32
  end
33
33
 
34
- end
34
+ end
@@ -1,7 +1,7 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class MlbTest < EspnTest
4
-
4
+
5
5
  test 'mlb august 13th 2012 yankees beat rangers' do
6
6
  starts_at = DateTime.parse('2012-08-13T23:00:00+00:00')
7
7
  expected = {
@@ -29,12 +29,18 @@ class MlbTest < EspnTest
29
29
  scores = ESPN.get_mlb_scores(starts_at.to_date)
30
30
  assert scores.include?(expected), 'A known MLB final score cannot be found'
31
31
  end
32
-
32
+
33
+ test 'mlb sept 5th 2014 suspended game pittsburgh pirates at chicago cubs' do
34
+ scores = ESPN.get_mlb_scores(Date.parse('2014-09-05'))
35
+ home_teams = scores.each { |s| s['home_team'] }
36
+ assert !home_teams.include?('chc'), 'A known suspended game was unexpectedly found'
37
+ end
38
+
33
39
  test 'random mlb days' do
34
40
  random_days.each do |day|
35
41
  scores = ESPN.get_mlb_scores(day)
36
42
  assert all_names_present?(scores), "Error on #{day} for mlb"
37
43
  end
38
44
  end
39
-
40
- end
45
+
46
+ end
@@ -1,7 +1,7 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class TeamsTest < EspnTest
4
-
4
+
5
5
  test 'scrape nfl teams' do
6
6
  divisions = ESPN.get_teams_in('nfl')
7
7
  assert_equal 8, divisions.count
@@ -13,7 +13,7 @@ class TeamsTest < EspnTest
13
13
  assert_equal 32, teams.map{ |h| h[:data_name] }.uniq.count
14
14
  assert divisions['nfc-west'].include?({ name: 'Seattle Seahawks', data_name: 'sea' })
15
15
  end
16
-
16
+
17
17
  test 'scrape mlb teams' do
18
18
  divisions = ESPN.get_teams_in('mlb')
19
19
  assert_equal 6, divisions.count
@@ -25,7 +25,7 @@ class TeamsTest < EspnTest
25
25
  assert_equal 30, teams.map{ |h| h[:data_name] }.uniq.count
26
26
  assert divisions['al-west'].include?({ name: 'Seattle Mariners', data_name: 'sea' })
27
27
  end
28
-
28
+
29
29
  test 'scrape nba teams' do
30
30
  divisions = ESPN.get_teams_in('nba')
31
31
  assert_equal 6, divisions.count
@@ -37,18 +37,18 @@ class TeamsTest < EspnTest
37
37
  assert_equal 30, teams.map{ |h| h[:data_name] }.uniq.count
38
38
  assert divisions['atlantic'].include?({ name: 'Toronto Raptors', data_name: 'tor' })
39
39
  end
40
-
40
+
41
41
  test 'scrape nhl teams' do
42
42
  divisions = ESPN.get_teams_in('nhl')
43
43
  assert_equal 4, divisions.count
44
44
  assert_equal 7, divisions['central'].count
45
45
  assert_equal 8, divisions['atlantic'].count
46
46
  teams = divisions.values.flatten
47
- assert_equal 30, teams.map{ |h| h[:name] }.uniq.count
48
- assert_equal 30, teams.map{ |h| h[:data_name] }.uniq.count
47
+ assert_equal 31, teams.map{ |h| h[:name] }.uniq.count
48
+ assert_equal 31, teams.map{ |h| h[:data_name] }.uniq.count
49
49
  assert divisions['atlantic'].include?({ name: 'Montreal Canadiens', data_name: 'mtl' })
50
50
  end
51
-
51
+
52
52
  test 'scrape ncaa football teams' do
53
53
  divisions = ESPN.get_teams_in('college-football')
54
54
  assert_equal 25, divisions.count
@@ -57,23 +57,23 @@ class TeamsTest < EspnTest
57
57
  assert divisions['conference-usa'].include?({ name: 'Rice Owls', data_name: '242' })
58
58
  assert divisions['meac'].include?({ name: 'Bethune-Cookman Wildcats', data_name: '2065' })
59
59
  assert divisions['northeast'].include?({ name: 'St Francis Red Flash', data_name: '2598' })
60
- assert divisions['swac'].include?({ name: 'Alabama A&M Bulldogs', data_name: '2010' })
60
+ assert divisions['swac'].include?({ name: 'Alabama State Hornets', data_name: '2011' })
61
61
  end
62
-
62
+
63
63
  test 'scrape ncaa basketball teams' do
64
64
  divisions = ESPN.get_teams_in('mens-college-basketball')
65
65
  assert_equal 32, divisions.count
66
66
  assert_equal 15, divisions['acc'].count
67
67
  assert_equal 10, divisions['patriot-league'].count
68
-
69
- assert divisions['southland'].include?({ name: 'Texas A&M-CC Islanders', data_name: '357' })
68
+
69
+ assert divisions['southland'].include?({ name: 'Incarnate Word Cardinals', data_name: '2916' })
70
70
  assert divisions['atlantic-10'].include?({ name: "Saint Joe's Saint Joseph's Hawks", data_name: '2603' })
71
71
  end
72
72
 
73
73
  test 'scrape ncaa basketball conferences' do
74
74
  conferences = ESPN.get_conferences_in_ncb
75
- assert_equal 32, conferences.count
75
+ assert_equal 33, conferences.count
76
76
  assert conferences.include?({ name: 'Mountain West', data_name: '44' })
77
77
  end
78
-
79
- end
78
+
79
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: espn_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - aj0strow
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
84
  version: '0'
85
85
  requirements: []
86
86
  rubyforge_project:
87
- rubygems_version: 2.5.2
87
+ rubygems_version: 2.6.11
88
88
  signing_key:
89
89
  specification_version: 4
90
90
  summary: ESPN Scraper