TopDownHockey-Scraper 6.0.1__tar.gz → 6.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of TopDownHockey-Scraper might be problematic. Click here for more details.
- {topdownhockey_scraper-6.0.1/src/TopDownHockey_Scraper.egg-info → topdownhockey_scraper-6.0.2}/PKG-INFO +1 -1
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/setup.cfg +1 -1
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/setup.py +1 -1
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/src/TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py +37 -29
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2/src/TopDownHockey_Scraper.egg-info}/PKG-INFO +1 -1
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/LICENSE +0 -0
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/README.md +0 -0
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/pyproject.toml +0 -0
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/src/TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py +0 -0
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/src/TopDownHockey_Scraper.egg-info/SOURCES.txt +0 -0
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/src/TopDownHockey_Scraper.egg-info/dependency_links.txt +0 -0
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/src/TopDownHockey_Scraper.egg-info/requires.txt +0 -0
- {topdownhockey_scraper-6.0.1 → topdownhockey_scraper-6.0.2}/src/TopDownHockey_Scraper.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = TopDownHockey_Scraper
|
|
3
|
-
version = 6.0.
|
|
3
|
+
version = 6.0.2
|
|
4
4
|
author = Patrick Bacon
|
|
5
5
|
author_email = patrick.s.bacon@gmail.com
|
|
6
6
|
description = A package built for scraping hockey data from EliteProspects, the NHL's HTML/API reports, and ESPN's XML reports.
|
|
@@ -9,7 +9,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="TopDownHockey_Scraper", # Replace with your own username
|
|
12
|
-
version="6.0.
|
|
12
|
+
version="6.0.2",
|
|
13
13
|
author="Patrick Bacon",
|
|
14
14
|
author_email="patrick.s.bacon@gmail.com",
|
|
15
15
|
description="The TopDownHockey Scraper",
|
|
@@ -2255,6 +2255,7 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
|
2255
2255
|
this_date = (game_date)
|
|
2256
2256
|
url = 'http://www.espn.com/nhl/scoreboard?date=' + this_date.replace("-", "")
|
|
2257
2257
|
page = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout = 500)
|
|
2258
|
+
print('Request to ESPN IDs successful.')
|
|
2258
2259
|
soup = BeautifulSoup(page.content, parser = 'lxml')
|
|
2259
2260
|
soup_found = soup.find_all('a', {'class':['AnchorLink truncate',
|
|
2260
2261
|
'AnchorLink Button Button--sm Button--anchorLink Button--alt mb4 w-100',
|
|
@@ -2926,7 +2927,10 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
|
|
|
2926
2927
|
espn_home_team = 'SJS'
|
|
2927
2928
|
if away_team == 'S.J':
|
|
2928
2929
|
espn_away_team = 'SJS'
|
|
2930
|
+
print('Scraping ESPN IDs')
|
|
2929
2931
|
espn_id = scrape_espn_ids_single_game(str(game_date.date()), espn_home_team, espn_away_team).espn_id.iloc[0]
|
|
2932
|
+
print('Scraping ESPN Events')
|
|
2933
|
+
print('Here is the ESPN ID': espn_id)
|
|
2930
2934
|
event_coords = scrape_espn_events(int(espn_id))
|
|
2931
2935
|
event_coords['coordinate_source'] = 'espn'
|
|
2932
2936
|
events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'period', 'version', 'event'], how = 'left').drop(columns = ['espn_id'])
|
|
@@ -3196,35 +3200,39 @@ def full_scrape(game_id_list, live = True, shift = False):
|
|
|
3196
3200
|
df = full_scrape_1by1(game_id_list, live, shift_to_espn = shift)
|
|
3197
3201
|
|
|
3198
3202
|
# Fixing the Pettersson issue for event player. Just going downstream for this.
|
|
3199
|
-
|
|
3200
|
-
|
|
3201
|
-
|
|
3202
|
-
|
|
3203
|
-
|
|
3204
|
-
|
|
3205
|
-
|
|
3206
|
-
|
|
3207
|
-
|
|
3208
|
-
|
|
3209
|
-
|
|
3210
|
-
|
|
3211
|
-
|
|
3212
|
-
|
|
3213
|
-
|
|
3214
|
-
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
|
|
3219
|
-
|
|
3220
|
-
|
|
3221
|
-
|
|
3222
|
-
|
|
3223
|
-
|
|
3224
|
-
|
|
3225
|
-
|
|
3226
|
-
|
|
3227
|
-
|
|
3203
|
+
try:
|
|
3204
|
+
df = df.assign(
|
|
3205
|
+
event_player_1 = np.where(
|
|
3206
|
+
(df.event_player_1 == 'ELIAS PETTERSSON') &
|
|
3207
|
+
(df.event_description.str.contains('#', na=False)) &
|
|
3208
|
+
(df.event_description.str.contains(' PETTERSSON', na=False)) &
|
|
3209
|
+
(df.event_description.str.extract(r'#(\d+) PETTERSSON', expand=False) == '25'),
|
|
3210
|
+
'ELIAS PETTERSSON(D)', df.event_player_1),
|
|
3211
|
+
event_player_2 = np.where(
|
|
3212
|
+
(df.event_player_2 == 'ELIAS PETTERSSON') &
|
|
3213
|
+
(
|
|
3214
|
+
# Goal and Petey got A1
|
|
3215
|
+
((df.event_type == 'GOAL') &
|
|
3216
|
+
(df.event_description.str.contains(': #', na=False)) &
|
|
3217
|
+
(df.event_description.str.contains(' PETTERSSON', na=False)) &
|
|
3218
|
+
(df.event_description.str.extract(r': #(\d+) PETTERSSON', expand=False) == '25')) |
|
|
3219
|
+
# Not a goal, Petey was EP2
|
|
3220
|
+
((df.event_type != 'GOAL') &
|
|
3221
|
+
(df.event_description.str.contains('VAN #', na=False)) &
|
|
3222
|
+
(df.event_description.str.contains(' PETTERSSON', na=False)) &
|
|
3223
|
+
(df.event_description.str.extract(r'VAN #(\d+) PETTERSSON', expand=False) == '25'))
|
|
3224
|
+
),
|
|
3225
|
+
'ELIAS PETTERSSON(D)', df.event_player_2),
|
|
3226
|
+
event_player_3 = np.where(
|
|
3227
|
+
(df.event_player_3=='ELIAS PETTERSSON') &
|
|
3228
|
+
(df.event_description.str.contains('#', na=False)) &
|
|
3229
|
+
(df.event_description.str.contains(' PETTERSSON', na=False)) &
|
|
3230
|
+
(df.event_description.str.extract(r'#(\d+) PETTERSSON(?:\s|$)', expand=False) == '25'),
|
|
3231
|
+
'ELIAS PETTERSSON(D)', df.event_player_3)
|
|
3232
|
+
)
|
|
3233
|
+
except Exception as e:
|
|
3234
|
+
print(e)
|
|
3235
|
+
continue
|
|
3228
3236
|
|
|
3229
3237
|
# Don't even need this, we've had this problem with Stutzle for years, just let it be.
|
|
3230
3238
|
# df.event_description = df.event_description.str.replace('FEHÃ\x89RVÃ\x81RY', 'FEHERVARY').str.replace('BLÃMEL', 'BLAMEL')
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|