TopDownHockey-Scraper 3.2.8__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -252,7 +252,7 @@ def get_info(link):
252
252
  """
253
253
  A function that is built strictly for the back end and should not be run by the user.
254
254
  """
255
-
255
+
256
256
  page = requests.get(link, timeout = 500)
257
257
  soup = BeautifulSoup(page.content, "html.parser")
258
258
 
@@ -265,99 +265,80 @@ def get_info(link):
265
265
  soup = BeautifulSoup(page.content, "html.parser")
266
266
  time.sleep(60)
267
267
 
268
- if soup.find("title") != None:
269
- player = soup.find("title").string.replace(" - Elite Prospects" ,"")
268
+ lis = soup.find_all('li')
270
269
 
271
- else: player = "-"
272
-
273
- if soup.find("div", {"class":"order-11 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"})!=None:
274
- rights = soup.find("div", {"class":"order-11 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"}
275
- ).find("div", {"class":"col-xs-12 col-18 text-right p-0"}).find("span").string.split("\n")[1].split("/")[0].strip()
276
- status = soup.find("div", {"class":"order-11 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"}
277
- ).find("div", {"class":"col-xs-12 col-18 text-right p-0"}).find("span").string.split("\n")[1].split("/")[1].strip()
278
- else:
279
- rights = "-"
280
- status = "-"
281
-
282
- if (soup.find("div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"}))!= None:
283
- if 'dob' in (soup.find("div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"})).find("a")['href']:
284
- dob = soup.find("div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"}).find("a")['href'].split("dob=", 1)[1].split("&sort", 1)[0]
285
- else:
286
- dob = "-"
270
+ relevant_lis = [li for li in lis if li.find('span') is not None]
287
271
 
272
+ # player
273
+
274
+ if soup.find("title") != None:
275
+ player = soup.find("title").string.replace(' - Stats, Contract, Salary & More', '')
288
276
  else:
289
- dob = "-"
290
-
291
- if soup.find("div", {"class":"order-6 order-sm-3 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}) != None:
292
- if "cm" in soup.find("div", {"class":"order-6 order-sm-3 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
293
- ).find(
294
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string:
295
- height = soup.find("div", {"class":"order-6 order-sm-3 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
296
- ).find(
297
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string.split(" / ")[1].split("cm")[0].strip()
298
- else:
299
- height = "-"
300
-
277
+ player = '-'
278
+
279
+ # status
280
+
281
+ if [li for li in relevant_lis if li.find('span').text=='NHL Rights'] != []:
282
+ rights = [li for li in relevant_lis if li.find('span').text=='NHL Rights'][0].find('a').text.split(' /')[0]
283
+ else:
284
+ rights = '-'
285
+
286
+ # rights
287
+
288
+ if [li for li in relevant_lis if li.find('span').text=='NHL Rights'] != []:
289
+ status = [li for li in relevant_lis if li.find('span').text=='NHL Rights'][0].find('a').text.split(' / ')[1]
290
+ else:
291
+ status = '-'
292
+
293
+ # dob
294
+
295
+ if [li.find('a')['href'].split('dob=', 1)[1].split('&sort', 1)[0] for li in relevant_lis if li.span.text == 'Date of Birth'] != []:
296
+ dob = [li.find('a')['href'].split('dob=', 1)[1].split('&sort', 1)[0] for li in relevant_lis if li.span.text == 'Date of Birth'][0]
297
+ else:
298
+ dob = '-'
299
+
300
+ # height
301
+
302
+ if [li for li in relevant_lis if li.find('span').text=='Height'] != []:
303
+ height = [li for li in relevant_lis if li.find('span').text=='Height'][0].text.split('Height')[1].split(' cm')[0]
301
304
  else:
302
- height = "-"
303
-
304
- if soup.find("div", {"class":"order-7 order-sm-5 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}) != None:
305
- if soup.find("div", {"class":"order-7 order-sm-5 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
306
- ).find(
307
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string.split("\n")[1].split("lbs")[0].strip() == '- / -':
308
- weight = "-"
309
- else:
310
- weight = soup.find("div", {"class":"order-7 order-sm-5 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
311
- ).find(
312
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string.split("\n")[1].split("lbs")[0].strip()
313
-
314
- else: weight = "-"
315
-
316
- if soup.find("div", {"class":"order-2 order-sm-4 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
317
- ) != None:
318
- if soup.find("div", {"class":"order-2 order-sm-4 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
319
- ).find(
320
- "div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"}).find("a") != None:
321
-
322
- birthplace = soup.find("div", {"class":"order-2 order-sm-4 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
323
- ).find(
324
- "div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"}).find("a").string.replace("\n", "").strip()
325
-
326
- else:
327
- birthplace = "-"
305
+ height = '-'
306
+
307
+ # weight
308
+
309
+ if [li for li in relevant_lis if li.find('span').text=='Weight'] != []:
310
+ weight = [li for li in relevant_lis if li.find('span').text=='Weight'][0].text.split('Weight')[1].split(' cm')[0]
328
311
  else:
329
- birthplace = "-"
330
-
331
- if soup.find("div", {"class":"order-3 order-sm-6 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}) != None:
332
- if soup.find("div", {"class":"order-3 order-sm-6 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
333
- ).find(
334
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).find("a") != None:
335
- nation = soup.find("div", {"class":"order-3 order-sm-6 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
336
- ).find(
337
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).find("a").string.replace("\n", "").strip()
338
- else: nation = "-"
339
-
312
+ weight = '-'
313
+
314
+ # birthplace
315
+
316
+ if [li for li in relevant_lis if li.find('span').text=='Place of Birth'] != []:
317
+ birthplace = [li for li in relevant_lis if li.find('span').text=='Place of Birth'][0].text.split('Birth')[1]
340
318
  else:
341
- nation = "-"
342
-
343
- if soup.find("div", {"class":"order-8 order-sm-7 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}) !=None:
344
- shoots = soup.find("div", {"class":"order-8 order-sm-7 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
345
- ).find(
346
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string.replace("\n", "").strip()
347
-
319
+ birthplace = '-'
320
+
321
+ # nation
322
+
323
+ if [li for li in relevant_lis if li.find('span').text=='Nation'] != []:
324
+ nation = [li for li in relevant_lis if li.find('span').text=='Nation'][0].text.split('Nation')[1]
348
325
  else:
349
- shoots = "-"
350
-
351
- if soup.find("div", {"class":"order-12 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"}) != None:
352
- draft = soup.find("div", {"class":"order-12 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"}
353
- ).find(
354
- "div", {"class":"col-xs-12 col-18 text-right p-0"}).find("a").string.replace("\n", "").strip()
355
- else:
356
- draft = "-"
357
-
358
- #height = np.where(height=="- / -", "-", height)
359
-
360
- #print(player + " scraped!")
326
+ nation = '-'
327
+
328
+ # shoots
329
+
330
+ if [li for li in relevant_lis if li.find('span').text=='Shoots'] != []:
331
+ shoots = [li for li in relevant_lis if li.find('span').text=='Shoots'][0].text.split('Shoots')[1]
332
+ else:
333
+ shoots = '-'
334
+
335
+ # draft
336
+
337
+ if [li for li in relevant_lis if li.find('span').text=='Drafted'] != []:
338
+ draft = [li for li in relevant_lis if li.find('span').text=='Drafted'][0].text.split('Drafted')[1]
339
+ else:
340
+ draft = '-'
341
+
361
342
  return(player, rights, status, dob, height, weight, birthplace, nation, shoots, draft, link)
362
343
 
363
344
  def get_player_information(dataframe):
@@ -382,17 +363,17 @@ def get_player_information(dataframe):
382
363
  for i in range(0, len(list(set(dataframe.link)))):
383
364
  try:
384
365
  myresult = get_info(((list(set(dataframe.link))[i])))
385
- myplayer._append(myresult[0])
386
- myrights._append(myresult[1])
387
- mystatus._append(myresult[2])
388
- mydob._append(myresult[3])
389
- myheight._append(myresult[4])
390
- myweight._append(myresult[5])
391
- mybirthplace._append(myresult[6])
392
- mynation._append(myresult[7])
393
- myshot._append(myresult[8])
394
- mydraft._append(myresult[9])
395
- mylink._append(myresult[10])
366
+ myplayer.append(myresult[0])
367
+ myrights.append(myresult[1])
368
+ mystatus.append(myresult[2])
369
+ mydob.append(myresult[3])
370
+ myheight.append(myresult[4])
371
+ myweight.append(myresult[5])
372
+ mybirthplace.append(myresult[6])
373
+ mynation.append(myresult[7])
374
+ myshot.append(myresult[8])
375
+ mydraft.append(myresult[9])
376
+ mylink.append(myresult[10])
396
377
  print(myresult[0] + " scraped! That's " + str(i + 1) + " down! Only " + str(len(list(set(dataframe.link))) - (i + 1)) + " left to go!")
397
378
  except KeyboardInterrupt:
398
379
  print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
@@ -1912,6 +1912,8 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
1912
1912
  game_date = pd.to_datetime(this_date))
1913
1913
 
1914
1914
  gamedays = gamedays._append(fax)
1915
+
1916
+ gamedays = gamedays[gamedays.espn_id!='gameId']
1915
1917
 
1916
1918
  gamedays = gamedays.assign(
1917
1919
  home_team = np.where(gamedays.home_team=='ST LOUIS BLUES', 'ST. LOUIS BLUES', gamedays.home_team),
@@ -1977,8 +1979,10 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
1977
1979
  np.where(gamedays.away_team=='GOLDEN', 'VGK',
1978
1980
  np.where(gamedays.away_team=='KNIGHTS', 'VGK',
1979
1981
  np.where(gamedays.away_team=='CAPITALS', 'WSH',
1980
- np.where(gamedays.away_team=='JETS', 'WPG', 'mistake'
1981
- ))))))))))))))))))))))))))))))))))))
1982
+ np.where(gamedays.away_team=='JETS', 'WPG',
1983
+ np.where(gamedays.away_team=='CLUB', 'UTA',
1984
+ np.where(gamedays.away_team=='HOCKEY', 'UTA', 'mistake'
1985
+ ))))))))))))))))))))))))))))))))))))))
1982
1986
  )
1983
1987
 
1984
1988
  gamedays = gamedays.assign(
@@ -2017,8 +2021,10 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
2017
2021
  np.where(gamedays.home_team=='GOLDEN', 'VGK',
2018
2022
  np.where(gamedays.home_team=='KNIGHTS', 'VGK',
2019
2023
  np.where(gamedays.home_team=='CAPITALS', 'WSH',
2020
- np.where(gamedays.home_team=='JETS', 'WPG', 'mistake'
2021
- ))))))))))))))))))))))))))))))))))))
2024
+ np.where(gamedays.home_team=='JETS', 'WPG',
2025
+ np.where(gamedays.home_team=='CLUB', 'UTA',
2026
+ np.where(gamedays.home_team=='HOCKEY', 'UTA', 'mistake'
2027
+ ))))))))))))))))))))))))))))))))))))))
2022
2028
  )
2023
2029
 
2024
2030
  gamedays = gamedays[(gamedays.game_date==this_date) & (gamedays.home_team==home_team) & (gamedays.away_team==away_team)]
@@ -2434,7 +2440,7 @@ def fix_missing(single, event_coords, events):
2434
2440
 
2435
2441
  return(events)
2436
2442
 
2437
- def full_scrape_1by1(game_id_list, shift_to_espn = False):
2443
+ def full_scrape_1by1(game_id_list, shift_to_espn = True):
2438
2444
 
2439
2445
  global single
2440
2446
  global event_coords
@@ -2446,12 +2452,13 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2446
2452
 
2447
2453
  i = 0
2448
2454
 
2449
- while i in range(0, len(game_id_list)):
2455
+ while i in range(0, len(game_id_list)) and len(game_id_list)>0:
2450
2456
 
2451
2457
  # First thing to try: Scraping HTML events
2452
2458
 
2453
2459
  try:
2454
2460
  first_time = time.time()
2461
+ print(game_id_list[i])
2455
2462
  game_id = game_id_list[i]
2456
2463
  print('Attempting scrape for: ' + str(game_id))
2457
2464
  season = str(int(str(game_id)[:4])) + str(int(str(game_id)[:4]) + 1)
@@ -2678,6 +2685,11 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2678
2685
  print(str(game_id) + ' has an issue with the HTML Report. Here is the error: ' + str(e))
2679
2686
  i = i + 1
2680
2687
  continue
2688
+
2689
+ except KeyError as k:
2690
+ print(str(game_id) + 'gave some kind of Key Error. Here is the error: ' + str(e))
2691
+ i = i + 1
2692
+ continue
2681
2693
 
2682
2694
  except KeyboardInterrupt:
2683
2695
  print('You manually interrupted the scrape. You will get to keep every game you have already completed scraping after just a bit of post-processing. Good bye.')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: TopDownHockey_Scraper
3
- Version: 3.2.8
3
+ Version: 4.0.1
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -0,0 +1,7 @@
1
+ TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
2
+ TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=KYIDRpxvZJdLA5AsMGqmxpnCh87LbZJLEIU42i4ULDI,153759
3
+ TopDownHockey_Scraper-4.0.1.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
+ TopDownHockey_Scraper-4.0.1.dist-info/METADATA,sha256=jWKaLwhy1rOIth7qe8rcqlqLGBjas2c-sLg_2ljvE2Y,5462
5
+ TopDownHockey_Scraper-4.0.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
6
+ TopDownHockey_Scraper-4.0.1.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
+ TopDownHockey_Scraper-4.0.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +0,0 @@
1
- TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=KyQjVTUKLDlnwJb9Sdm6jUaCh6ZxJoq2kEXBHFb1PcM,45374
2
- TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=vumUGPWrtHOTWltSwKZCJNgKzum9UKr_xh7xX0E9_Fo,153213
3
- TopDownHockey_Scraper-3.2.8.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
- TopDownHockey_Scraper-3.2.8.dist-info/METADATA,sha256=ngmt5EJasFMsJyNmTR7iOSK_2VBk_7bY0l3eefOf1zk,5462
5
- TopDownHockey_Scraper-3.2.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- TopDownHockey_Scraper-3.2.8.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
- TopDownHockey_Scraper-3.2.8.dist-info/RECORD,,