TopDownHockey-Scraper 6.0.6__tar.gz → 6.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 6.0.6
3
+ Version: 6.0.7
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = TopDownHockey_Scraper
3
- version = 6.0.6
3
+ version = 6.0.7
4
4
  author = Patrick Bacon
5
5
  author_email = patrick.s.bacon@gmail.com
6
6
  description = A package built for scraping hockey data from EliteProspects, the NHL's HTML/API reports, and ESPN's XML reports.
@@ -9,7 +9,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
9
9
 
10
10
  setup(
11
11
  name="TopDownHockey_Scraper", # Replace with your own username
12
- version="6.0.6",
12
+ version="6.0.7",
13
13
  author="Patrick Bacon",
14
14
  author_email="patrick.s.bacon@gmail.com",
15
15
  description="The TopDownHockey Scraper",
@@ -18,6 +18,7 @@ import xml.etree.ElementTree as ET
18
18
  import xmltodict
19
19
  from xml.parsers.expat import ExpatError
20
20
  from requests.exceptions import ChunkedEncodingError
21
+ import traceback
21
22
 
22
23
  print('Successfully did local install plus update')
23
24
 
@@ -1060,7 +1061,7 @@ def scrape_html_shifts(season, game_id, live = True):
1060
1061
 
1061
1062
  home_goalies = home_goalies.assign(team = home_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
1062
1063
 
1063
- home_goalies = pd.read_html(str(goalie_table))[0][8:9]
1064
+ # home_goalies = pd.read_html(str(goalie_table))[0][8:9]
1064
1065
 
1065
1066
  # Temporary to test. Will fix later.
1066
1067
 
@@ -2462,7 +2463,9 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2462
2463
  if len(event_coords[(event_coords.event.isin(ewc)) & (pd.isna(event_coords.coords_x))]) > 0:
2463
2464
  raise ExpatError('Bad takes, dude!')
2464
2465
  event_coords['game_id'] = int(game_id)
2466
+ print('Attempting to merge events again')
2465
2467
  events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'version', 'period', 'game_id', 'event'], how = 'left')
2468
+ print('Merged events again, we have this many rows:', len(events))
2466
2469
  try:
2467
2470
  events = fix_missing(single, event_coords, events)
2468
2471
  except IndexError as e:
@@ -2521,8 +2524,11 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2521
2524
  print('Scraping ESPN Events')
2522
2525
  print('Here is the ESPN ID:', espn_id)
2523
2526
  event_coords = scrape_espn_events(int(espn_id))
2527
+ print('Scraped ESPN Events, we have this many rows:', len(event_coords))
2524
2528
  event_coords['coordinate_source'] = 'espn'
2529
+ print('Attempting to merge events')
2525
2530
  events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'period', 'version', 'event'], how = 'left').drop(columns = ['espn_id'])
2531
+ print('Merged events, we have this many rows:', len(events))
2526
2532
  try:
2527
2533
  events = fix_missing(single, event_coords, events)
2528
2534
  except IndexError as e:
@@ -2681,6 +2687,7 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2681
2687
 
2682
2688
  except AttributeError as e:
2683
2689
  print(str(game_id) + ' does not have an HTML report. Here is the error: ' + str(e))
2690
+ print(traceback.format_exc())
2684
2691
  i = i + 1
2685
2692
  continue
2686
2693
 
@@ -2787,6 +2794,7 @@ def full_scrape(game_id_list, live = True, shift = False):
2787
2794
  hidden_patrick = 0
2788
2795
 
2789
2796
  df = full_scrape_1by1(game_id_list, live, shift_to_espn = shift)
2797
+ print('Full scrape complete, we have this many rows:', len(df))
2790
2798
 
2791
2799
  # Fixing the Pettersson issue for event player. Just going downstream for this.
2792
2800
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 6.0.6
3
+ Version: 6.0.7
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon