TopDownHockey-Scraper 6.0.7__py3-none-any.whl → 6.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -1036,7 +1036,7 @@ def scrape_html_shifts(season, game_id, live = True):
1036
1036
 
1037
1037
  # Additional logic to handle period 1 scrape when we don't have goalie shifts yet.
1038
1038
 
1039
- if len(home_shifts[(home_shifts.name.isin(goalie_names))]) == 0 or len(away_shifts[(away_shifts.name.isin(goalie_names))]):
1039
+ if len(home_shifts[(home_shifts.name.isin(goalie_names))]) == 0 or len(away_shifts[(away_shifts.name.isin(goalie_names))]) == 0:
1040
1040
 
1041
1041
  pbp_html_url = f'https://www.nhl.com/scores/htmlreports/{season}/GS0{game_id}.HTM'
1042
1042
  pbp_soup = BeautifulSoup(requests.get(pbp_html_url).content)
@@ -1047,26 +1047,27 @@ def scrape_html_shifts(season, game_id, live = True):
1047
1047
 
1048
1048
  away_teams = pd.read_html(str(goalie_table))[0][:2]
1049
1049
  away_team = away_teams[0].iloc[0]
1050
-
1051
- away_goalies = pd.read_html(str(goalie_table))[0][2:4]
1050
+
1051
+ away_goalies = pd.read_html(str(goalie_table))[0][2:4][
1052
+ ~pd.isna(pd.read_html(str(goalie_table))[0][2:4])[0]
1053
+ ]
1052
1054
  away_goalies = away_goalies[~pd.isna(away_goalies[6])]
1053
-
1055
+
1054
1056
  away_goalies = away_goalies.assign(team = away_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
1055
1057
 
1056
- home_teams = pd.read_html(str(goalie_table))[0][6:7]
1058
+ away_goalies = away_goalies[away_goalies.TOI!='TOT']
1059
+
1060
+ home_teams = pd.read_html(str(goalie_table))[0][6:8][
1061
+ ~pd.isna(pd.read_html(str(goalie_table))[0][6:8])[0]
1062
+ ]
1057
1063
  home_team = home_teams[0].iloc[0]
1058
-
1064
+
1059
1065
  home_goalies = pd.read_html(str(goalie_table))[0][8:10]
1060
1066
  home_goalies = home_goalies[~pd.isna(home_goalies[6])]
1061
-
1062
- home_goalies = home_goalies.assign(team = home_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
1063
1067
 
1064
- # home_goalies = pd.read_html(str(goalie_table))[0][8:9]
1068
+ home_goalies = home_goalies.assign(team = home_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
1065
1069
 
1066
- # Temporary to test. Will fix later.
1067
-
1068
- # home_goalies = home_goalies.assign(TOI = '11:26')
1069
- # away_goalies = away_goalies.assign(TOI = '11:26')
1070
+ home_goalies = home_goalies[home_goalies.TOI!='TOT']
1070
1071
 
1071
1072
  if len(home_shifts[(home_shifts.name.isin(goalie_names))]) == 0:
1072
1073
 
@@ -1410,7 +1411,8 @@ def scrape_html_events(season, game_id):
1410
1411
  #global stripped_html
1411
1412
  #global eventdf
1412
1413
  stripped_html = hs_strip_html(tds)
1413
- length = int(len(stripped_html)/8)
1414
+ length = (len(stripped_html) // 8) * 8
1415
+ stripped_html = stripped_html[:length]
1414
1416
  eventdf = pd.DataFrame(np.array(stripped_html).reshape(length, 8)).rename(
1415
1417
  columns = {0:'index', 1:'period', 2:'strength', 3:'time', 4:'event', 5:'description', 6:'away_skaters', 7:'home_skaters'})
1416
1418
  split = eventdf.time.str.split(':')
@@ -2693,11 +2695,13 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2693
2695
 
2694
2696
  except IndexError as e:
2695
2697
  print(str(game_id) + ' has an issue with the HTML Report. Here is the error: ' + str(e))
2698
+ print(traceback.format_exc())
2696
2699
  i = i + 1
2697
2700
  continue
2698
2701
 
2699
2702
  except ValueError as e:
2700
2703
  print(str(game_id) + ' has an issue with the HTML Report. Here is the error: ' + str(e))
2704
+ print(traceback.format_exc())
2701
2705
  i = i + 1
2702
2706
  continue
2703
2707
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 6.0.7
3
+ Version: 6.0.9
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -0,0 +1,7 @@
1
+ TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
2
+ TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=rg_7RJo1eWL9dWlRweCat2v21fWX1Z45olcZN859BN4,163992
3
+ topdownhockey_scraper-6.0.9.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
+ topdownhockey_scraper-6.0.9.dist-info/METADATA,sha256=nM1FoBq-lslyopV84S10eukQQ4pPMSyFGA2S_5xfa8g,5670
5
+ topdownhockey_scraper-6.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ topdownhockey_scraper-6.0.9.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
+ topdownhockey_scraper-6.0.9.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
2
- TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=0YWnDOGFexyaUSjux-4pVp1NLOlnqk7CfCsQNQT6isI,163865
3
- topdownhockey_scraper-6.0.7.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
- topdownhockey_scraper-6.0.7.dist-info/METADATA,sha256=nEp4X2tq_dylw6TAIzzmxL5L052rWQ7fTGj60AaQD7o,5670
5
- topdownhockey_scraper-6.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- topdownhockey_scraper-6.0.7.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
- topdownhockey_scraper-6.0.7.dist-info/RECORD,,