TopDownHockey-Scraper 6.0.5__py3-none-any.whl → 6.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of TopDownHockey-Scraper might be problematic. Click here for more details.
- TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py +9 -10
- {topdownhockey_scraper-6.0.5.dist-info → topdownhockey_scraper-6.0.7.dist-info}/METADATA +1 -1
- topdownhockey_scraper-6.0.7.dist-info/RECORD +7 -0
- topdownhockey_scraper-6.0.5.dist-info/RECORD +0 -7
- {topdownhockey_scraper-6.0.5.dist-info → topdownhockey_scraper-6.0.7.dist-info}/WHEEL +0 -0
- {topdownhockey_scraper-6.0.5.dist-info → topdownhockey_scraper-6.0.7.dist-info}/licenses/LICENSE +0 -0
- {topdownhockey_scraper-6.0.5.dist-info → topdownhockey_scraper-6.0.7.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ import xml.etree.ElementTree as ET
|
|
|
18
18
|
import xmltodict
|
|
19
19
|
from xml.parsers.expat import ExpatError
|
|
20
20
|
from requests.exceptions import ChunkedEncodingError
|
|
21
|
+
import traceback
|
|
21
22
|
|
|
22
23
|
print('Successfully did local install plus update')
|
|
23
24
|
|
|
@@ -834,8 +835,6 @@ def scrape_html_shifts(season, game_id, live = True):
|
|
|
834
835
|
|
|
835
836
|
home_shifts = alldf
|
|
836
837
|
|
|
837
|
-
home_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/home_shifts.csv', index = False)
|
|
838
|
-
|
|
839
838
|
if live == True:
|
|
840
839
|
|
|
841
840
|
home_shifts = home_shifts.assign(shift_number = home_shifts.shift_number.astype(int))
|
|
@@ -956,8 +955,6 @@ def scrape_html_shifts(season, game_id, live = True):
|
|
|
956
955
|
|
|
957
956
|
away_shifts = alldf
|
|
958
957
|
|
|
959
|
-
away_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/away_shifts.csv', index = False)
|
|
960
|
-
|
|
961
958
|
if live == True:
|
|
962
959
|
|
|
963
960
|
away_shifts = away_shifts.assign(shift_number = away_shifts.shift_number.astype(int))
|
|
@@ -1064,7 +1061,7 @@ def scrape_html_shifts(season, game_id, live = True):
|
|
|
1064
1061
|
|
|
1065
1062
|
home_goalies = home_goalies.assign(team = home_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
|
|
1066
1063
|
|
|
1067
|
-
home_goalies = pd.read_html(str(goalie_table))[0][8:9]
|
|
1064
|
+
# home_goalies = pd.read_html(str(goalie_table))[0][8:9]
|
|
1068
1065
|
|
|
1069
1066
|
# Temporary to test. Will fix later.
|
|
1070
1067
|
|
|
@@ -1108,8 +1105,6 @@ def scrape_html_shifts(season, game_id, live = True):
|
|
|
1108
1105
|
global all_shifts
|
|
1109
1106
|
|
|
1110
1107
|
all_shifts = pd.concat([home_shifts, away_shifts])
|
|
1111
|
-
|
|
1112
|
-
#all_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/all_shifts.csv', index = False)
|
|
1113
1108
|
|
|
1114
1109
|
all_shifts = all_shifts.assign(start_time = all_shifts.shift_start.str.split('/').str[0])
|
|
1115
1110
|
|
|
@@ -1359,7 +1354,6 @@ def scrape_html_shifts(season, game_id, live = True):
|
|
|
1359
1354
|
global changes_on
|
|
1360
1355
|
global changes_off
|
|
1361
1356
|
myshifts = all_shifts
|
|
1362
|
-
#myshifts.to_csv('/Users/patrickbacon/compact_topdownhockey/tmp.csv', index = False)
|
|
1363
1357
|
#print('Printing my shifts')
|
|
1364
1358
|
|
|
1365
1359
|
#print(myshifts)
|
|
@@ -2080,8 +2074,6 @@ def merge_and_prepare(events, shifts):
|
|
|
2080
2074
|
np.where(merged.event.isin(['PGSTR', 'PGEND', 'PSTR', 'PEND', 'ANTHEM']), -1, 1))).sort_values(
|
|
2081
2075
|
by = ['game_seconds', 'period', 'event_index'])
|
|
2082
2076
|
|
|
2083
|
-
merged.to_csv('/Users/patrickbacon/compact_topdownhockey/first_merged.csv', index = False)
|
|
2084
|
-
|
|
2085
2077
|
merged = merged.assign(change_before_event = np.where(
|
|
2086
2078
|
(
|
|
2087
2079
|
(merged.away_on_ice!='') & (merged.event.shift()=='CHANGE') & (merged.away_on_ice!=merged.away_on_ice.shift()) |
|
|
@@ -2471,7 +2463,9 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
|
|
|
2471
2463
|
if len(event_coords[(event_coords.event.isin(ewc)) & (pd.isna(event_coords.coords_x))]) > 0:
|
|
2472
2464
|
raise ExpatError('Bad takes, dude!')
|
|
2473
2465
|
event_coords['game_id'] = int(game_id)
|
|
2466
|
+
print('Attempting to merge events again')
|
|
2474
2467
|
events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'version', 'period', 'game_id', 'event'], how = 'left')
|
|
2468
|
+
print('Merged events again, we have this many rows:', len(events))
|
|
2475
2469
|
try:
|
|
2476
2470
|
events = fix_missing(single, event_coords, events)
|
|
2477
2471
|
except IndexError as e:
|
|
@@ -2530,8 +2524,11 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
|
|
|
2530
2524
|
print('Scraping ESPN Events')
|
|
2531
2525
|
print('Here is the ESPN ID:', espn_id)
|
|
2532
2526
|
event_coords = scrape_espn_events(int(espn_id))
|
|
2527
|
+
print('Scraped ESPN Events, we have this many rows:', len(event_coords))
|
|
2533
2528
|
event_coords['coordinate_source'] = 'espn'
|
|
2529
|
+
print('Attempting to merge events')
|
|
2534
2530
|
events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'period', 'version', 'event'], how = 'left').drop(columns = ['espn_id'])
|
|
2531
|
+
print('Merged events, we have this many rows:', len(events))
|
|
2535
2532
|
try:
|
|
2536
2533
|
events = fix_missing(single, event_coords, events)
|
|
2537
2534
|
except IndexError as e:
|
|
@@ -2690,6 +2687,7 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
|
|
|
2690
2687
|
|
|
2691
2688
|
except AttributeError as e:
|
|
2692
2689
|
print(str(game_id) + ' does not have an HTML report. Here is the error: ' + str(e))
|
|
2690
|
+
print(traceback.format_exc())
|
|
2693
2691
|
i = i + 1
|
|
2694
2692
|
continue
|
|
2695
2693
|
|
|
@@ -2796,6 +2794,7 @@ def full_scrape(game_id_list, live = True, shift = False):
|
|
|
2796
2794
|
hidden_patrick = 0
|
|
2797
2795
|
|
|
2798
2796
|
df = full_scrape_1by1(game_id_list, live, shift_to_espn = shift)
|
|
2797
|
+
print('Full scrape complete, we have this many rows:', len(df))
|
|
2799
2798
|
|
|
2800
2799
|
# Fixing the Pettersson issue for event player. Just going downstream for this.
|
|
2801
2800
|
try:
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
|
|
2
|
+
TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=0YWnDOGFexyaUSjux-4pVp1NLOlnqk7CfCsQNQT6isI,163865
|
|
3
|
+
topdownhockey_scraper-6.0.7.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
|
4
|
+
topdownhockey_scraper-6.0.7.dist-info/METADATA,sha256=nEp4X2tq_dylw6TAIzzmxL5L052rWQ7fTGj60AaQD7o,5670
|
|
5
|
+
topdownhockey_scraper-6.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
topdownhockey_scraper-6.0.7.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
|
|
7
|
+
topdownhockey_scraper-6.0.7.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
|
|
2
|
-
TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=kRZPB-pfRCDn6K2UK4ZHYlo09bDHxF7B34w8VE59GoI,163837
|
|
3
|
-
topdownhockey_scraper-6.0.5.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
|
4
|
-
topdownhockey_scraper-6.0.5.dist-info/METADATA,sha256=yvjnUIQ66Z80Oi02-mWvV2GdFMvvBk2O-lgoGOB4kx0,5670
|
|
5
|
-
topdownhockey_scraper-6.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
-
topdownhockey_scraper-6.0.5.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
|
|
7
|
-
topdownhockey_scraper-6.0.5.dist-info/RECORD,,
|
|
File without changes
|
{topdownhockey_scraper-6.0.5.dist-info → topdownhockey_scraper-6.0.7.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{topdownhockey_scraper-6.0.5.dist-info → topdownhockey_scraper-6.0.7.dist-info}/top_level.txt
RENAMED
|
File without changes
|