TopDownHockey-Scraper 4.3__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of TopDownHockey-Scraper might be problematic. Click here for more details.
- {topdownhockey_scraper-4.3/src/TopDownHockey_Scraper.egg-info → topdownhockey_scraper-5.0.0}/PKG-INFO +1 -1
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/setup.cfg +1 -1
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/setup.py +1 -1
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/src/TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py +14 -4
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0/src/TopDownHockey_Scraper.egg-info}/PKG-INFO +1 -1
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/LICENSE +0 -0
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/README.md +0 -0
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/pyproject.toml +0 -0
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/src/TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py +0 -0
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/src/TopDownHockey_Scraper.egg-info/SOURCES.txt +0 -0
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/src/TopDownHockey_Scraper.egg-info/dependency_links.txt +0 -0
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/src/TopDownHockey_Scraper.egg-info/requires.txt +0 -0
- {topdownhockey_scraper-4.3 → topdownhockey_scraper-5.0.0}/src/TopDownHockey_Scraper.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = TopDownHockey_Scraper
|
|
3
|
-
version =
|
|
3
|
+
version = 5.0.0
|
|
4
4
|
author = Patrick Bacon
|
|
5
5
|
author_email = patrick.s.bacon@gmail.com
|
|
6
6
|
description = A package built for scraping hockey data from EliteProspects, the NHL's HTML/API reports, and ESPN's XML reports.
|
|
@@ -9,7 +9,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="TopDownHockey_Scraper", # Replace with your own username
|
|
12
|
-
version="
|
|
12
|
+
version="5.0.0",
|
|
13
13
|
author="Patrick Bacon",
|
|
14
14
|
author_email="patrick.s.bacon@gmail.com",
|
|
15
15
|
description="The TopDownHockey Scraper",
|
|
@@ -508,6 +508,8 @@ def scrape_html_roster(season, game_id):
|
|
|
508
508
|
|
|
509
509
|
roster_df['Name'] = roster_df['Name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
|
|
510
510
|
|
|
511
|
+
roster_df['Name'] = np.where(roster_df['Name']== "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", roster_df['Name']) # Need to do this after normalization, only then he becomes Slafkovska?
|
|
512
|
+
|
|
511
513
|
return roster_df
|
|
512
514
|
|
|
513
515
|
def scrape_html_shifts(season, game_id):
|
|
@@ -1229,6 +1231,8 @@ def scrape_html_shifts(season, game_id):
|
|
|
1229
1231
|
all_shifts['name']))))))))))))))))))))))))))))))))))
|
|
1230
1232
|
|
|
1231
1233
|
all_shifts['name'] = all_shifts['name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
|
|
1234
|
+
|
|
1235
|
+
all_shifts['name'] = np.where(all_shifts['name']== "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", all_shifts['name']) # Need to do this after normalization, only then he becomes Slafkovska?
|
|
1232
1236
|
|
|
1233
1237
|
|
|
1234
1238
|
all_shifts = all_shifts.assign(end_time = np.where(pd.to_datetime(all_shifts.start_time).dt.time > pd.to_datetime(all_shifts.end_time).dt.time, '20:00', all_shifts.end_time),
|
|
@@ -1555,6 +1559,8 @@ def scrape_api_events(game_id, drop_description = True, shift_to_espn = False):
|
|
|
1555
1559
|
|
|
1556
1560
|
api_events['ep1_name'] = api_events['ep1_name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
|
|
1557
1561
|
|
|
1562
|
+
api_events['ep1_name'] = np.where(api_events['ep1_name'] == "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", api_events['ep1_name']) # Need to do this after normalization, only then he becomes Slafkovska?
|
|
1563
|
+
|
|
1558
1564
|
api_events = api_events.assign(ep1_name = np.where(api_events.ep1_name=='ALEX BARRÃ-BOULET', 'ALEX BARRE_BOULET', api_events.ep1_name))
|
|
1559
1565
|
|
|
1560
1566
|
if drop_description == True:
|
|
@@ -1967,6 +1973,8 @@ def scrape_espn_events(espn_game_id, drop_description = True):
|
|
|
1967
1973
|
espn_events['event_player_1'] = espn_events['event_player_1'].str.strip()
|
|
1968
1974
|
|
|
1969
1975
|
espn_events['event_player_1'] = espn_events['event_player_1'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
|
|
1976
|
+
|
|
1977
|
+
espn_events['event_player_1'] = np.where(espn_events['event_player_1'] == "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", espn_events['event_player_1'])
|
|
1970
1978
|
|
|
1971
1979
|
#espn_events = espn_events.assign(event_player_1 = np.where(
|
|
1972
1980
|
#espn_events.event_player_1=='ALEX BURROWS', 'ALEXANDRE BURROWS', espn_events.event_player_1))
|
|
@@ -2003,15 +2011,17 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
|
2003
2011
|
soup_found = soup.find_all('a', {'class':['AnchorLink truncate',
|
|
2004
2012
|
'AnchorLink Button Button--sm Button--anchorLink Button--alt mb4 w-100',
|
|
2005
2013
|
'AnchorLink Button Button--sm Button--anchorLink Button--alt mb4 w-100 mr2'], 'href':[re.compile("/nhl/team/_/name/"), re.compile("game/_")]})
|
|
2014
|
+
game_sections = soup.find_all('section', class_ = 'Scoreboard bg-clr-white flex flex-auto justify-between')
|
|
2015
|
+
|
|
2006
2016
|
at = []
|
|
2007
2017
|
ht = []
|
|
2008
2018
|
gids = []
|
|
2009
2019
|
fax = pd.DataFrame()
|
|
2010
2020
|
#print(str(i))
|
|
2011
|
-
for i in range
|
|
2012
|
-
away =
|
|
2013
|
-
home =
|
|
2014
|
-
espnid =
|
|
2021
|
+
for i in range(0, len(game_sections)):
|
|
2022
|
+
away = game_sections[i].find_all('div', class_='ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName db')[0].contents[0].upper()
|
|
2023
|
+
home = game_sections[i].find_all('div', class_='ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName db')[1].contents[0].upper()
|
|
2024
|
+
espnid = game_sections[0]['id']
|
|
2015
2025
|
at.append(away)
|
|
2016
2026
|
ht.append(home)
|
|
2017
2027
|
gids.append(espnid)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|