TopDownHockey-Scraper 6.0.3__tar.gz → 6.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 6.0.3
3
+ Version: 6.0.6
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = TopDownHockey_Scraper
3
- version = 6.0.3
3
+ version = 6.0.6
4
4
  author = Patrick Bacon
5
5
  author_email = patrick.s.bacon@gmail.com
6
6
  description = A package built for scraping hockey data from EliteProspects, the NHL's HTML/API reports, and ESPN's XML reports.
@@ -9,7 +9,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
9
9
 
10
10
  setup(
11
11
  name="TopDownHockey_Scraper", # Replace with your own username
12
- version="6.0.3",
12
+ version="6.0.6",
13
13
  author="Patrick Bacon",
14
14
  author_email="patrick.s.bacon@gmail.com",
15
15
  description="The TopDownHockey Scraper",
@@ -834,8 +834,6 @@ def scrape_html_shifts(season, game_id, live = True):
834
834
 
835
835
  home_shifts = alldf
836
836
 
837
- home_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/home_shifts.csv', index = False)
838
-
839
837
  if live == True:
840
838
 
841
839
  home_shifts = home_shifts.assign(shift_number = home_shifts.shift_number.astype(int))
@@ -956,8 +954,6 @@ def scrape_html_shifts(season, game_id, live = True):
956
954
 
957
955
  away_shifts = alldf
958
956
 
959
- away_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/away_shifts.csv', index = False)
960
-
961
957
  if live == True:
962
958
 
963
959
  away_shifts = away_shifts.assign(shift_number = away_shifts.shift_number.astype(int))
@@ -1108,8 +1104,6 @@ def scrape_html_shifts(season, game_id, live = True):
1108
1104
  global all_shifts
1109
1105
 
1110
1106
  all_shifts = pd.concat([home_shifts, away_shifts])
1111
-
1112
- #all_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/all_shifts.csv', index = False)
1113
1107
 
1114
1108
  all_shifts = all_shifts.assign(start_time = all_shifts.shift_start.str.split('/').str[0])
1115
1109
 
@@ -1359,7 +1353,6 @@ def scrape_html_shifts(season, game_id, live = True):
1359
1353
  global changes_on
1360
1354
  global changes_off
1361
1355
  myshifts = all_shifts
1362
- #myshifts.to_csv('/Users/patrickbacon/compact_topdownhockey/tmp.csv', index = False)
1363
1356
  #print('Printing my shifts')
1364
1357
 
1365
1358
  #print(myshifts)
@@ -2080,8 +2073,6 @@ def merge_and_prepare(events, shifts):
2080
2073
  np.where(merged.event.isin(['PGSTR', 'PGEND', 'PSTR', 'PEND', 'ANTHEM']), -1, 1))).sort_values(
2081
2074
  by = ['game_seconds', 'period', 'event_index'])
2082
2075
 
2083
- merged.to_csv('/Users/patrickbacon/compact_topdownhockey/first_merged.csv', index = False)
2084
-
2085
2076
  merged = merged.assign(change_before_event = np.where(
2086
2077
  (
2087
2078
  (merged.away_on_ice!='') & (merged.event.shift()=='CHANGE') & (merged.away_on_ice!=merged.away_on_ice.shift()) |
@@ -2465,7 +2456,7 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2465
2456
  # If all goes well with the HTML scrape:
2466
2457
 
2467
2458
  try:
2468
- event_coords = scrape_api_events(game_id, shift_to_espn = shift_to_espn)
2459
+ event_coords = scrape_api_events(game_id, shift_to_espn = True)
2469
2460
  api_coords = event_coords
2470
2461
  api_coords['coordinate_source'] = 'api'
2471
2462
  if len(event_coords[(event_coords.event.isin(ewc)) & (pd.isna(event_coords.coords_x))]) > 0:
@@ -2528,7 +2519,7 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2528
2519
  print('Scraping ESPN IDs')
2529
2520
  espn_id = scrape_espn_ids_single_game(str(game_date.date()), espn_home_team, espn_away_team).espn_id.iloc[0]
2530
2521
  print('Scraping ESPN Events')
2531
- print('Here is the ESPN ID': espn_id)
2522
+ print('Here is the ESPN ID:', espn_id)
2532
2523
  event_coords = scrape_espn_events(int(espn_id))
2533
2524
  event_coords['coordinate_source'] = 'espn'
2534
2525
  events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'period', 'version', 'event'], how = 'left').drop(columns = ['espn_id'])
@@ -2830,7 +2821,6 @@ def full_scrape(game_id_list, live = True, shift = False):
2830
2821
  )
2831
2822
  except Exception as e:
2832
2823
  print(e)
2833
- continue
2834
2824
 
2835
2825
  # Don't even need this, we've had this problem with Stutzle for years, just let it be.
2836
2826
  # df.event_description = df.event_description.str.replace('FEHÃ\x89RVÃ\x81RY', 'FEHERVARY').str.replace('BLÜMEL', 'BLAMEL')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 6.0.3
3
+ Version: 6.0.6
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon