TopDownHockey-Scraper 4.3__tar.gz → 5.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 4.3
3
+ Version: 5.0.0
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = TopDownHockey_Scraper
3
- version = 4.3
3
+ version = 5.0.0
4
4
  author = Patrick Bacon
5
5
  author_email = patrick.s.bacon@gmail.com
6
6
  description = A package built for scraping hockey data from EliteProspects, the NHL's HTML/API reports, and ESPN's XML reports.
@@ -9,7 +9,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
9
9
 
10
10
  setup(
11
11
  name="TopDownHockey_Scraper", # Replace with your own username
12
- version="4.3",
12
+ version="5.0.0",
13
13
  author="Patrick Bacon",
14
14
  author_email="patrick.s.bacon@gmail.com",
15
15
  description="The TopDownHockey Scraper",
@@ -508,6 +508,8 @@ def scrape_html_roster(season, game_id):
508
508
 
509
509
  roster_df['Name'] = roster_df['Name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
510
510
 
511
+ roster_df['Name'] = np.where(roster_df['Name']== "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", roster_df['Name']) # Need to do this after normalization, only then he becomes Slafkovska?
512
+
511
513
  return roster_df
512
514
 
513
515
  def scrape_html_shifts(season, game_id):
@@ -1229,6 +1231,8 @@ def scrape_html_shifts(season, game_id):
1229
1231
  all_shifts['name']))))))))))))))))))))))))))))))))))
1230
1232
 
1231
1233
  all_shifts['name'] = all_shifts['name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
1234
+
1235
+ all_shifts['name'] = np.where(all_shifts['name']== "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", all_shifts['name']) # Need to do this after normalization, only then he becomes Slafkovska?
1232
1236
 
1233
1237
 
1234
1238
  all_shifts = all_shifts.assign(end_time = np.where(pd.to_datetime(all_shifts.start_time).dt.time > pd.to_datetime(all_shifts.end_time).dt.time, '20:00', all_shifts.end_time),
@@ -1555,6 +1559,8 @@ def scrape_api_events(game_id, drop_description = True, shift_to_espn = False):
1555
1559
 
1556
1560
  api_events['ep1_name'] = api_events['ep1_name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
1557
1561
 
1562
+ api_events['ep1_name'] = np.where(api_events['ep1_name'] == "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", api_events['ep1_name']) # Need to do this after normalization, only then he becomes Slafkovska?
1563
+
1558
1564
  api_events = api_events.assign(ep1_name = np.where(api_events.ep1_name=='ALEX BARRÉ-BOULET', 'ALEX BARRE_BOULET', api_events.ep1_name))
1559
1565
 
1560
1566
  if drop_description == True:
@@ -1967,6 +1973,8 @@ def scrape_espn_events(espn_game_id, drop_description = True):
1967
1973
  espn_events['event_player_1'] = espn_events['event_player_1'].str.strip()
1968
1974
 
1969
1975
  espn_events['event_player_1'] = espn_events['event_player_1'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
1976
+
1977
+ espn_events['event_player_1'] = np.where(espn_events['event_player_1'] == "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", espn_events['event_player_1'])
1970
1978
 
1971
1979
  #espn_events = espn_events.assign(event_player_1 = np.where(
1972
1980
  #espn_events.event_player_1=='ALEX BURROWS', 'ALEXANDRE BURROWS', espn_events.event_player_1))
@@ -2003,15 +2011,17 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
2003
2011
  soup_found = soup.find_all('a', {'class':['AnchorLink truncate',
2004
2012
  'AnchorLink Button Button--sm Button--anchorLink Button--alt mb4 w-100',
2005
2013
  'AnchorLink Button Button--sm Button--anchorLink Button--alt mb4 w-100 mr2'], 'href':[re.compile("/nhl/team/_/name/"), re.compile("game/_")]})
2014
+ game_sections = soup.find_all('section', class_ = 'Scoreboard bg-clr-white flex flex-auto justify-between')
2015
+
2006
2016
  at = []
2007
2017
  ht = []
2008
2018
  gids = []
2009
2019
  fax = pd.DataFrame()
2010
2020
  #print(str(i))
2011
- for i in range (0, ((len(soup_found)))):
2012
- away = soup_found[i]['href'].rsplit('/')[-1].split('-')[0].upper()
2013
- home = soup_found[i]['href'].rsplit('/')[-1].split('-')[-1].upper()
2014
- espnid = soup_found[i]['href'].rsplit('/')[-2]
2021
+ for i in range(0, len(game_sections)):
2022
+ away = game_sections[i].find_all('div', class_='ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName db')[0].contents[0].upper()
2023
+ home = game_sections[i].find_all('div', class_='ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName db')[1].contents[0].upper()
2024
+ espnid = game_sections[0]['id']
2015
2025
  at.append(away)
2016
2026
  ht.append(home)
2017
2027
  gids.append(espnid)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 4.3
3
+ Version: 5.0.0
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon