TopDownHockey-Scraper 6.0.7__py3-none-any.whl → 6.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of TopDownHockey-Scraper might be problematic. Click here for more details.
- TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py +19 -15
- {topdownhockey_scraper-6.0.7.dist-info → topdownhockey_scraper-6.0.10.dist-info}/METADATA +1 -1
- topdownhockey_scraper-6.0.10.dist-info/RECORD +7 -0
- topdownhockey_scraper-6.0.7.dist-info/RECORD +0 -7
- {topdownhockey_scraper-6.0.7.dist-info → topdownhockey_scraper-6.0.10.dist-info}/WHEEL +0 -0
- {topdownhockey_scraper-6.0.7.dist-info → topdownhockey_scraper-6.0.10.dist-info}/licenses/LICENSE +0 -0
- {topdownhockey_scraper-6.0.7.dist-info → topdownhockey_scraper-6.0.10.dist-info}/top_level.txt +0 -0
|
@@ -1036,7 +1036,7 @@ def scrape_html_shifts(season, game_id, live = True):
|
|
|
1036
1036
|
|
|
1037
1037
|
# Additional logic to handle period 1 scrape when we don't have goalie shifts yet.
|
|
1038
1038
|
|
|
1039
|
-
if len(home_shifts[(home_shifts.name.isin(goalie_names))]) == 0 or len(away_shifts[(away_shifts.name.isin(goalie_names))]):
|
|
1039
|
+
if len(home_shifts[(home_shifts.name.isin(goalie_names))]) == 0 or len(away_shifts[(away_shifts.name.isin(goalie_names))]) == 0:
|
|
1040
1040
|
|
|
1041
1041
|
pbp_html_url = f'https://www.nhl.com/scores/htmlreports/{season}/GS0{game_id}.HTM'
|
|
1042
1042
|
pbp_soup = BeautifulSoup(requests.get(pbp_html_url).content)
|
|
@@ -1047,26 +1047,27 @@ def scrape_html_shifts(season, game_id, live = True):
|
|
|
1047
1047
|
|
|
1048
1048
|
away_teams = pd.read_html(str(goalie_table))[0][:2]
|
|
1049
1049
|
away_team = away_teams[0].iloc[0]
|
|
1050
|
-
|
|
1051
|
-
away_goalies = pd.read_html(str(goalie_table))[0][2:4]
|
|
1050
|
+
|
|
1051
|
+
away_goalies = pd.read_html(str(goalie_table))[0][2:4][
|
|
1052
|
+
~pd.isna(pd.read_html(str(goalie_table))[0][2:4])[0]
|
|
1053
|
+
]
|
|
1052
1054
|
away_goalies = away_goalies[~pd.isna(away_goalies[6])]
|
|
1053
|
-
|
|
1055
|
+
|
|
1054
1056
|
away_goalies = away_goalies.assign(team = away_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
|
|
1055
1057
|
|
|
1056
|
-
|
|
1058
|
+
away_goalies = away_goalies[away_goalies.TOI!='TOT']
|
|
1059
|
+
|
|
1060
|
+
home_teams = pd.read_html(str(goalie_table))[0][6:8][
|
|
1061
|
+
~pd.isna(pd.read_html(str(goalie_table))[0][6:8])[0]
|
|
1062
|
+
]
|
|
1057
1063
|
home_team = home_teams[0].iloc[0]
|
|
1058
|
-
|
|
1064
|
+
|
|
1059
1065
|
home_goalies = pd.read_html(str(goalie_table))[0][8:10]
|
|
1060
1066
|
home_goalies = home_goalies[~pd.isna(home_goalies[6])]
|
|
1061
|
-
|
|
1062
|
-
home_goalies = home_goalies.assign(team = home_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
|
|
1063
1067
|
|
|
1064
|
-
|
|
1068
|
+
home_goalies = home_goalies.assign(team = home_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
|
|
1065
1069
|
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
# home_goalies = home_goalies.assign(TOI = '11:26')
|
|
1069
|
-
# away_goalies = away_goalies.assign(TOI = '11:26')
|
|
1070
|
+
home_goalies = home_goalies[home_goalies.TOI!='TOT']
|
|
1070
1071
|
|
|
1071
1072
|
if len(home_shifts[(home_shifts.name.isin(goalie_names))]) == 0:
|
|
1072
1073
|
|
|
@@ -1410,8 +1411,9 @@ def scrape_html_events(season, game_id):
|
|
|
1410
1411
|
#global stripped_html
|
|
1411
1412
|
#global eventdf
|
|
1412
1413
|
stripped_html = hs_strip_html(tds)
|
|
1413
|
-
length =
|
|
1414
|
-
|
|
1414
|
+
length = (len(stripped_html) // 8) * 8
|
|
1415
|
+
stripped_html = stripped_html[:length]
|
|
1416
|
+
eventdf = pd.DataFrame(np.array(stripped_html).reshape(int(length/8), 8)).rename(
|
|
1415
1417
|
columns = {0:'index', 1:'period', 2:'strength', 3:'time', 4:'event', 5:'description', 6:'away_skaters', 7:'home_skaters'})
|
|
1416
1418
|
split = eventdf.time.str.split(':')
|
|
1417
1419
|
game_date = soup.find_all('td', {'align':'center', 'style':'font-size: 10px;font-weight:bold'})[2].get_text()
|
|
@@ -2693,11 +2695,13 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
|
|
|
2693
2695
|
|
|
2694
2696
|
except IndexError as e:
|
|
2695
2697
|
print(str(game_id) + ' has an issue with the HTML Report. Here is the error: ' + str(e))
|
|
2698
|
+
print(traceback.format_exc())
|
|
2696
2699
|
i = i + 1
|
|
2697
2700
|
continue
|
|
2698
2701
|
|
|
2699
2702
|
except ValueError as e:
|
|
2700
2703
|
print(str(game_id) + ' has an issue with the HTML Report. Here is the error: ' + str(e))
|
|
2704
|
+
print(traceback.format_exc())
|
|
2701
2705
|
i = i + 1
|
|
2702
2706
|
continue
|
|
2703
2707
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
|
|
2
|
+
TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=m6osH-VY491_4HUo5-A-JU41NHLPP9F48KJmwbfmvvI,163999
|
|
3
|
+
topdownhockey_scraper-6.0.10.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
|
4
|
+
topdownhockey_scraper-6.0.10.dist-info/METADATA,sha256=X1NaTOIWuTiRPjzHMqLv0Lw2uGLTSNuBWZfFUwRcyv8,5671
|
|
5
|
+
topdownhockey_scraper-6.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
topdownhockey_scraper-6.0.10.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
|
|
7
|
+
topdownhockey_scraper-6.0.10.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
|
|
2
|
-
TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=0YWnDOGFexyaUSjux-4pVp1NLOlnqk7CfCsQNQT6isI,163865
|
|
3
|
-
topdownhockey_scraper-6.0.7.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
|
4
|
-
topdownhockey_scraper-6.0.7.dist-info/METADATA,sha256=nEp4X2tq_dylw6TAIzzmxL5L052rWQ7fTGj60AaQD7o,5670
|
|
5
|
-
topdownhockey_scraper-6.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
-
topdownhockey_scraper-6.0.7.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
|
|
7
|
-
topdownhockey_scraper-6.0.7.dist-info/RECORD,,
|
|
File without changes
|
{topdownhockey_scraper-6.0.7.dist-info → topdownhockey_scraper-6.0.10.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{topdownhockey_scraper-6.0.7.dist-info → topdownhockey_scraper-6.0.10.dist-info}/top_level.txt
RENAMED
|
File without changes
|