TopDownHockey-Scraper 6.0.5__py3-none-any.whl → 6.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -18,6 +18,7 @@ import xml.etree.ElementTree as ET
18
18
  import xmltodict
19
19
  from xml.parsers.expat import ExpatError
20
20
  from requests.exceptions import ChunkedEncodingError
21
+ import traceback
21
22
 
22
23
  print('Successfully did local install plus update')
23
24
 
@@ -834,8 +835,6 @@ def scrape_html_shifts(season, game_id, live = True):
834
835
 
835
836
  home_shifts = alldf
836
837
 
837
- home_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/home_shifts.csv', index = False)
838
-
839
838
  if live == True:
840
839
 
841
840
  home_shifts = home_shifts.assign(shift_number = home_shifts.shift_number.astype(int))
@@ -956,8 +955,6 @@ def scrape_html_shifts(season, game_id, live = True):
956
955
 
957
956
  away_shifts = alldf
958
957
 
959
- away_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/away_shifts.csv', index = False)
960
-
961
958
  if live == True:
962
959
 
963
960
  away_shifts = away_shifts.assign(shift_number = away_shifts.shift_number.astype(int))
@@ -1064,7 +1061,7 @@ def scrape_html_shifts(season, game_id, live = True):
1064
1061
 
1065
1062
  home_goalies = home_goalies.assign(team = home_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
1066
1063
 
1067
- home_goalies = pd.read_html(str(goalie_table))[0][8:9]
1064
+ # home_goalies = pd.read_html(str(goalie_table))[0][8:9]
1068
1065
 
1069
1066
  # Temporary to test. Will fix later.
1070
1067
 
@@ -1108,8 +1105,6 @@ def scrape_html_shifts(season, game_id, live = True):
1108
1105
  global all_shifts
1109
1106
 
1110
1107
  all_shifts = pd.concat([home_shifts, away_shifts])
1111
-
1112
- #all_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/all_shifts.csv', index = False)
1113
1108
 
1114
1109
  all_shifts = all_shifts.assign(start_time = all_shifts.shift_start.str.split('/').str[0])
1115
1110
 
@@ -1359,7 +1354,6 @@ def scrape_html_shifts(season, game_id, live = True):
1359
1354
  global changes_on
1360
1355
  global changes_off
1361
1356
  myshifts = all_shifts
1362
- #myshifts.to_csv('/Users/patrickbacon/compact_topdownhockey/tmp.csv', index = False)
1363
1357
  #print('Printing my shifts')
1364
1358
 
1365
1359
  #print(myshifts)
@@ -2080,8 +2074,6 @@ def merge_and_prepare(events, shifts):
2080
2074
  np.where(merged.event.isin(['PGSTR', 'PGEND', 'PSTR', 'PEND', 'ANTHEM']), -1, 1))).sort_values(
2081
2075
  by = ['game_seconds', 'period', 'event_index'])
2082
2076
 
2083
- merged.to_csv('/Users/patrickbacon/compact_topdownhockey/first_merged.csv', index = False)
2084
-
2085
2077
  merged = merged.assign(change_before_event = np.where(
2086
2078
  (
2087
2079
  (merged.away_on_ice!='') & (merged.event.shift()=='CHANGE') & (merged.away_on_ice!=merged.away_on_ice.shift()) |
@@ -2471,7 +2463,9 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2471
2463
  if len(event_coords[(event_coords.event.isin(ewc)) & (pd.isna(event_coords.coords_x))]) > 0:
2472
2464
  raise ExpatError('Bad takes, dude!')
2473
2465
  event_coords['game_id'] = int(game_id)
2466
+ print('Attempting to merge events again')
2474
2467
  events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'version', 'period', 'game_id', 'event'], how = 'left')
2468
+ print('Merged events again, we have this many rows:', len(events))
2475
2469
  try:
2476
2470
  events = fix_missing(single, event_coords, events)
2477
2471
  except IndexError as e:
@@ -2530,8 +2524,11 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2530
2524
  print('Scraping ESPN Events')
2531
2525
  print('Here is the ESPN ID:', espn_id)
2532
2526
  event_coords = scrape_espn_events(int(espn_id))
2527
+ print('Scraped ESPN Events, we have this many rows:', len(event_coords))
2533
2528
  event_coords['coordinate_source'] = 'espn'
2529
+ print('Attempting to merge events')
2534
2530
  events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'period', 'version', 'event'], how = 'left').drop(columns = ['espn_id'])
2531
+ print('Merged events, we have this many rows:', len(events))
2535
2532
  try:
2536
2533
  events = fix_missing(single, event_coords, events)
2537
2534
  except IndexError as e:
@@ -2690,6 +2687,7 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2690
2687
 
2691
2688
  except AttributeError as e:
2692
2689
  print(str(game_id) + ' does not have an HTML report. Here is the error: ' + str(e))
2690
+ print(traceback.format_exc())
2693
2691
  i = i + 1
2694
2692
  continue
2695
2693
 
@@ -2796,6 +2794,7 @@ def full_scrape(game_id_list, live = True, shift = False):
2796
2794
  hidden_patrick = 0
2797
2795
 
2798
2796
  df = full_scrape_1by1(game_id_list, live, shift_to_espn = shift)
2797
+ print('Full scrape complete, we have this many rows:', len(df))
2799
2798
 
2800
2799
  # Fixing the Pettersson issue for event player. Just going downstream for this.
2801
2800
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 6.0.5
3
+ Version: 6.0.7
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -0,0 +1,7 @@
1
+ TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
2
+ TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=0YWnDOGFexyaUSjux-4pVp1NLOlnqk7CfCsQNQT6isI,163865
3
+ topdownhockey_scraper-6.0.7.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
+ topdownhockey_scraper-6.0.7.dist-info/METADATA,sha256=nEp4X2tq_dylw6TAIzzmxL5L052rWQ7fTGj60AaQD7o,5670
5
+ topdownhockey_scraper-6.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ topdownhockey_scraper-6.0.7.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
+ topdownhockey_scraper-6.0.7.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
2
- TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=kRZPB-pfRCDn6K2UK4ZHYlo09bDHxF7B34w8VE59GoI,163837
3
- topdownhockey_scraper-6.0.5.dist-info/licenses/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
- topdownhockey_scraper-6.0.5.dist-info/METADATA,sha256=yvjnUIQ66Z80Oi02-mWvV2GdFMvvBk2O-lgoGOB4kx0,5670
5
- topdownhockey_scraper-6.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- topdownhockey_scraper-6.0.5.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
- topdownhockey_scraper-6.0.5.dist-info/RECORD,,