TopDownHockey-Scraper 6.0.6__py3-none-any.whl → 6.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of TopDownHockey-Scraper might be problematic. Click here for more details.
- TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py +9 -1
- {topdownhockey_scraper-6.0.6.dist-info → topdownhockey_scraper-6.0.7.dist-info}/METADATA +1 -1
- topdownhockey_scraper-6.0.7.dist-info/RECORD +7 -0
- topdownhockey_scraper-6.0.6.dist-info/RECORD +0 -7
- {topdownhockey_scraper-6.0.6.dist-info → topdownhockey_scraper-6.0.7.dist-info}/WHEEL +0 -0
- {topdownhockey_scraper-6.0.6.dist-info → topdownhockey_scraper-6.0.7.dist-info}/licenses/LICENSE +0 -0
- {topdownhockey_scraper-6.0.6.dist-info → topdownhockey_scraper-6.0.7.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ import xml.etree.ElementTree as ET
|
|
|
18
18
|
import xmltodict
|
|
19
19
|
from xml.parsers.expat import ExpatError
|
|
20
20
|
from requests.exceptions import ChunkedEncodingError
|
|
21
|
+
import traceback
|
|
21
22
|
|
|
22
23
|
print('Successfully did local install plus update')
|
|
23
24
|
|
|
@@ -1060,7 +1061,7 @@ def scrape_html_shifts(season, game_id, live = True):
|
|
|
1060
1061
|
|
|
1061
1062
|
home_goalies = home_goalies.assign(team = home_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
|
|
1062
1063
|
|
|
1063
|
-
home_goalies = pd.read_html(str(goalie_table))[0][8:9]
|
|
1064
|
+
# home_goalies = pd.read_html(str(goalie_table))[0][8:9]
|
|
1064
1065
|
|
|
1065
1066
|
# Temporary to test. Will fix later.
|
|
1066
1067
|
|
|
@@ -2462,7 +2463,9 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
|
|
|
2462
2463
|
if len(event_coords[(event_coords.event.isin(ewc)) & (pd.isna(event_coords.coords_x))]) > 0:
|
|
2463
2464
|
raise ExpatError('Bad takes, dude!')
|
|
2464
2465
|
event_coords['game_id'] = int(game_id)
|
|
2466
|
+
print('Attempting to merge events again')
|
|
2465
2467
|
events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'version', 'period', 'game_id', 'event'], how = 'left')
|
|
2468
|
+
print('Merged events again, we have this many rows:', len(events))
|
|
2466
2469
|
try:
|
|
2467
2470
|
events = fix_missing(single, event_coords, events)
|
|
2468
2471
|
except IndexError as e:
|
|
@@ -2521,8 +2524,11 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
|
|
|
2521
2524
|
print('Scraping ESPN Events')
|
|
2522
2525
|
print('Here is the ESPN ID:', espn_id)
|
|
2523
2526
|
event_coords = scrape_espn_events(int(espn_id))
|
|
2527
|
+
print('Scraped ESPN Events, we have this many rows:', len(event_coords))
|
|
2524
2528
|
event_coords['coordinate_source'] = 'espn'
|
|
2529
|
+
print('Attempting to merge events')
|
|
2525
2530
|
events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'period', 'version', 'event'], how = 'left').drop(columns = ['espn_id'])
|
|
2531
|
+
print('Merged events, we have this many rows:', len(events))
|
|
2526
2532
|
try:
|
|
2527
2533
|
events = fix_missing(single, event_coords, events)
|
|
2528
2534
|
except IndexError as e:
|
|
@@ -2681,6 +2687,7 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
|
|
|
2681
2687
|
|
|
2682
2688
|
except AttributeError as e:
|
|
2683
2689
|
print(str(game_id) + ' does not have an HTML report. Here is the error: ' + str(e))
|
|
2690
|
+
print(traceback.format_exc())
|
|
2684
2691
|
i = i + 1
|
|
2685
2692
|
continue
|
|
2686
2693
|
|
|
@@ -2787,6 +2794,7 @@ def full_scrape(game_id_list, live = True, shift = False):
|
|
|
2787
2794
|
hidden_patrick = 0
|
|
2788
2795
|
|
|
2789
2796
|
df = full_scrape_1by1(game_id_list, live, shift_to_espn = shift)
|
|
2797
|
+
print('Full scrape complete, we have this many rows:', len(df))
|
|
2790
2798
|
|
|
2791
2799
|
# Fixing the Pettersson issue for event player. Just going downstream for this.
|
|
2792
2800
|
try:
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
|
|
2
|
+
TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=0YWnDOGFexyaUSjux-4pVp1NLOlnqk7CfCsQNQT6isI,163865
|
|
3
|
+
topdownhockey_scraper-6.0.7.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
|
4
|
+
topdownhockey_scraper-6.0.7.dist-info/METADATA,sha256=nEp4X2tq_dylw6TAIzzmxL5L052rWQ7fTGj60AaQD7o,5670
|
|
5
|
+
topdownhockey_scraper-6.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
topdownhockey_scraper-6.0.7.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
|
|
7
|
+
topdownhockey_scraper-6.0.7.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
|
|
2
|
-
TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=WjxE2YormwL9AEH1uLLu3QwlrRGJp71Dhm-E76QF-1w,163353
|
|
3
|
-
topdownhockey_scraper-6.0.6.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
|
4
|
-
topdownhockey_scraper-6.0.6.dist-info/METADATA,sha256=SqmRjZmvdxf5K8edOZiQhYWf1wUHF6qaK-6P0bRf4wg,5670
|
|
5
|
-
topdownhockey_scraper-6.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
-
topdownhockey_scraper-6.0.6.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
|
|
7
|
-
topdownhockey_scraper-6.0.6.dist-info/RECORD,,
|
|
File without changes
|
{topdownhockey_scraper-6.0.6.dist-info → topdownhockey_scraper-6.0.7.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{topdownhockey_scraper-6.0.6.dist-info → topdownhockey_scraper-6.0.7.dist-info}/top_level.txt
RENAMED
|
File without changes
|