TopDownHockey-Scraper 6.0.0__tar.gz → 6.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 6.0.0
3
+ Version: 6.0.2
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -17,8 +17,6 @@ Requires-Dist: numpy
17
17
  Requires-Dist: pandas
18
18
  Requires-Dist: bs4
19
19
  Requires-Dist: datetime
20
- Requires-Dist: seaborn
21
- Requires-Dist: matplotlib
22
20
  Requires-Dist: xmltodict
23
21
  Requires-Dist: lxml
24
22
  Requires-Dist: natsort
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = TopDownHockey_Scraper
3
- version = 5.0.2
3
+ version = 6.0.2
4
4
  author = Patrick Bacon
5
5
  author_email = patrick.s.bacon@gmail.com
6
6
  description = A package built for scraping hockey data from EliteProspects, the NHL's HTML/API reports, and ESPN's XML reports.
@@ -25,8 +25,6 @@ install_requires =
25
25
  pandas
26
26
  datetime
27
27
  requests
28
- seasborn
29
- matplotlib
30
28
  xml
31
29
  xmltodict
32
30
  requests
@@ -9,7 +9,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
9
9
 
10
10
  setup(
11
11
  name="TopDownHockey_Scraper", # Replace with your own username
12
- version="6.0.0",
12
+ version="6.0.2",
13
13
  author="Patrick Bacon",
14
14
  author_email="patrick.s.bacon@gmail.com",
15
15
  description="The TopDownHockey Scraper",
@@ -33,8 +33,6 @@ setup(
33
33
  'pandas',
34
34
  'bs4',
35
35
  'datetime',
36
- 'seaborn',
37
- 'matplotlib',
38
36
  'xmltodict',
39
37
  'lxml',
40
38
  'natsort'
@@ -2255,6 +2255,7 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
2255
2255
  this_date = (game_date)
2256
2256
  url = 'http://www.espn.com/nhl/scoreboard?date=' + this_date.replace("-", "")
2257
2257
  page = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout = 500)
2258
+ print('Request to ESPN IDs successful.')
2258
2259
  soup = BeautifulSoup(page.content, parser = 'lxml')
2259
2260
  soup_found = soup.find_all('a', {'class':['AnchorLink truncate',
2260
2261
  'AnchorLink Button Button--sm Button--anchorLink Button--alt mb4 w-100',
@@ -2926,7 +2927,10 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
2926
2927
  espn_home_team = 'SJS'
2927
2928
  if away_team == 'S.J':
2928
2929
  espn_away_team = 'SJS'
2930
+ print('Scraping ESPN IDs')
2929
2931
  espn_id = scrape_espn_ids_single_game(str(game_date.date()), espn_home_team, espn_away_team).espn_id.iloc[0]
2932
+ print('Scraping ESPN Events')
2933
+ print('Here is the ESPN ID': espn_id)
2930
2934
  event_coords = scrape_espn_events(int(espn_id))
2931
2935
  event_coords['coordinate_source'] = 'espn'
2932
2936
  events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'period', 'version', 'event'], how = 'left').drop(columns = ['espn_id'])
@@ -3196,35 +3200,39 @@ def full_scrape(game_id_list, live = True, shift = False):
3196
3200
  df = full_scrape_1by1(game_id_list, live, shift_to_espn = shift)
3197
3201
 
3198
3202
  # Fixing the Pettersson issue for event player. Just going downstream for this.
3199
- df = df.assign(
3200
- event_player_1 = np.where(
3201
- (df.event_player_1 == 'ELIAS PETTERSSON') &
3202
- (df.event_description.str.contains('#', na=False)) &
3203
- (df.event_description.str.contains(' PETTERSSON', na=False)) &
3204
- (df.event_description.str.extract(r'#(\d+) PETTERSSON', expand=False) == '25'),
3205
- 'ELIAS PETTERSSON(D)', df.event_player_1),
3206
- event_player_2 = np.where(
3207
- (df.event_player_2 == 'ELIAS PETTERSSON') &
3208
- (
3209
- # Goal and Petey got A1
3210
- ((df.event_type == 'GOAL') &
3211
- (df.event_description.str.contains(': #', na=False)) &
3212
- (df.event_description.str.contains(' PETTERSSON', na=False)) &
3213
- (df.event_description.str.extract(r': #(\d+) PETTERSSON', expand=False) == '25')) |
3214
- # Not a goal, Petey was EP2
3215
- ((df.event_type != 'GOAL') &
3216
- (df.event_description.str.contains('VAN #', na=False)) &
3217
- (df.event_description.str.contains(' PETTERSSON', na=False)) &
3218
- (df.event_description.str.extract(r'VAN #(\d+) PETTERSSON', expand=False) == '25'))
3219
- ),
3220
- 'ELIAS PETTERSSON(D)', df.event_player_2),
3221
- event_player_3 = np.where(
3222
- (df.event_player_3=='ELIAS PETTERSSON') &
3223
- (df.event_description.str.contains('#', na=False)) &
3224
- (df.event_description.str.contains(' PETTERSSON', na=False)) &
3225
- (df.event_description.str.extract(r'#(\d+) PETTERSSON(?:\s|$)', expand=False) == '25'),
3226
- 'ELIAS PETTERSSON(D)', df.event_player_3)
3227
- )
3203
+ try:
3204
+ df = df.assign(
3205
+ event_player_1 = np.where(
3206
+ (df.event_player_1 == 'ELIAS PETTERSSON') &
3207
+ (df.event_description.str.contains('#', na=False)) &
3208
+ (df.event_description.str.contains(' PETTERSSON', na=False)) &
3209
+ (df.event_description.str.extract(r'#(\d+) PETTERSSON', expand=False) == '25'),
3210
+ 'ELIAS PETTERSSON(D)', df.event_player_1),
3211
+ event_player_2 = np.where(
3212
+ (df.event_player_2 == 'ELIAS PETTERSSON') &
3213
+ (
3214
+ # Goal and Petey got A1
3215
+ ((df.event_type == 'GOAL') &
3216
+ (df.event_description.str.contains(': #', na=False)) &
3217
+ (df.event_description.str.contains(' PETTERSSON', na=False)) &
3218
+ (df.event_description.str.extract(r': #(\d+) PETTERSSON', expand=False) == '25')) |
3219
+ # Not a goal, Petey was EP2
3220
+ ((df.event_type != 'GOAL') &
3221
+ (df.event_description.str.contains('VAN #', na=False)) &
3222
+ (df.event_description.str.contains(' PETTERSSON', na=False)) &
3223
+ (df.event_description.str.extract(r'VAN #(\d+) PETTERSSON', expand=False) == '25'))
3224
+ ),
3225
+ 'ELIAS PETTERSSON(D)', df.event_player_2),
3226
+ event_player_3 = np.where(
3227
+ (df.event_player_3=='ELIAS PETTERSSON') &
3228
+ (df.event_description.str.contains('#', na=False)) &
3229
+ (df.event_description.str.contains(' PETTERSSON', na=False)) &
3230
+ (df.event_description.str.extract(r'#(\d+) PETTERSSON(?:\s|$)', expand=False) == '25'),
3231
+ 'ELIAS PETTERSSON(D)', df.event_player_3)
3232
+ )
3233
+ except Exception as e:
3234
+ print(e)
3235
+ continue
3228
3236
 
3229
3237
  # Don't even need this, we've had this problem with Stutzle for years, just let it be.
3230
3238
  # df.event_description = df.event_description.str.replace('FEHÃ\x89RVÃ\x81RY', 'FEHERVARY').str.replace('BLÜMEL', 'BLAMEL')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: TopDownHockey_Scraper
3
- Version: 6.0.0
3
+ Version: 6.0.2
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -17,8 +17,6 @@ Requires-Dist: numpy
17
17
  Requires-Dist: pandas
18
18
  Requires-Dist: bs4
19
19
  Requires-Dist: datetime
20
- Requires-Dist: seaborn
21
- Requires-Dist: matplotlib
22
20
  Requires-Dist: xmltodict
23
21
  Requires-Dist: lxml
24
22
  Requires-Dist: natsort
@@ -2,8 +2,6 @@ numpy
2
2
  pandas
3
3
  bs4
4
4
  datetime
5
- seaborn
6
- matplotlib
7
5
  xmltodict
8
6
  lxml
9
7
  natsort