TopDownHockey-Scraper 3.2.8__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -252,7 +252,7 @@ def get_info(link):
252
252
  """
253
253
  A function that is built strictly for the back end and should not be run by the user.
254
254
  """
255
-
255
+
256
256
  page = requests.get(link, timeout = 500)
257
257
  soup = BeautifulSoup(page.content, "html.parser")
258
258
 
@@ -265,99 +265,80 @@ def get_info(link):
265
265
  soup = BeautifulSoup(page.content, "html.parser")
266
266
  time.sleep(60)
267
267
 
268
- if soup.find("title") != None:
269
- player = soup.find("title").string.replace(" - Elite Prospects" ,"")
268
+ lis = soup.find_all('li')
270
269
 
271
- else: player = "-"
272
-
273
- if soup.find("div", {"class":"order-11 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"})!=None:
274
- rights = soup.find("div", {"class":"order-11 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"}
275
- ).find("div", {"class":"col-xs-12 col-18 text-right p-0"}).find("span").string.split("\n")[1].split("/")[0].strip()
276
- status = soup.find("div", {"class":"order-11 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"}
277
- ).find("div", {"class":"col-xs-12 col-18 text-right p-0"}).find("span").string.split("\n")[1].split("/")[1].strip()
278
- else:
279
- rights = "-"
280
- status = "-"
281
-
282
- if (soup.find("div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"}))!= None:
283
- if 'dob' in (soup.find("div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"})).find("a")['href']:
284
- dob = soup.find("div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"}).find("a")['href'].split("dob=", 1)[1].split("&sort", 1)[0]
285
- else:
286
- dob = "-"
270
+ relevant_lis = [li for li in lis if li.find('span') is not None]
287
271
 
272
+ # player
273
+
274
+ if soup.find("title") != None:
275
+ player = soup.find("title").string.replace(' - Stats, Contract, Salary & More', '')
288
276
  else:
289
- dob = "-"
290
-
291
- if soup.find("div", {"class":"order-6 order-sm-3 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}) != None:
292
- if "cm" in soup.find("div", {"class":"order-6 order-sm-3 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
293
- ).find(
294
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string:
295
- height = soup.find("div", {"class":"order-6 order-sm-3 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
296
- ).find(
297
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string.split(" / ")[1].split("cm")[0].strip()
298
- else:
299
- height = "-"
300
-
277
+ player = '-'
278
+
279
+ # status
280
+
281
+ if [li for li in relevant_lis if li.find('span').text=='NHL Rights'] != []:
282
+ rights = [li for li in relevant_lis if li.find('span').text=='NHL Rights'][0].find('a').text.split(' /')[0]
283
+ else:
284
+ rights = '-'
285
+
286
+ # rights
287
+
288
+ if [li for li in relevant_lis if li.find('span').text=='NHL Rights'] != []:
289
+ status = [li for li in relevant_lis if li.find('span').text=='NHL Rights'][0].find('a').text.split(' / ')[1]
290
+ else:
291
+ status = '-'
292
+
293
+ # dob
294
+
295
+ if [li.find('a')['href'].split('dob=', 1)[1].split('&sort', 1)[0] for li in relevant_lis if li.span.text == 'Date of Birth'] != []:
296
+ dob = [li.find('a')['href'].split('dob=', 1)[1].split('&sort', 1)[0] for li in relevant_lis if li.span.text == 'Date of Birth'][0]
297
+ else:
298
+ dob = '-'
299
+
300
+ # height
301
+
302
+ if [li for li in relevant_lis if li.find('span').text=='Height'] != []:
303
+ height = [li for li in relevant_lis if li.find('span').text=='Height'][0].text.split('Height')[1].split(' cm')[0]
301
304
  else:
302
- height = "-"
303
-
304
- if soup.find("div", {"class":"order-7 order-sm-5 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}) != None:
305
- if soup.find("div", {"class":"order-7 order-sm-5 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
306
- ).find(
307
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string.split("\n")[1].split("lbs")[0].strip() == '- / -':
308
- weight = "-"
309
- else:
310
- weight = soup.find("div", {"class":"order-7 order-sm-5 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
311
- ).find(
312
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string.split("\n")[1].split("lbs")[0].strip()
313
-
314
- else: weight = "-"
315
-
316
- if soup.find("div", {"class":"order-2 order-sm-4 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
317
- ) != None:
318
- if soup.find("div", {"class":"order-2 order-sm-4 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
319
- ).find(
320
- "div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"}).find("a") != None:
321
-
322
- birthplace = soup.find("div", {"class":"order-2 order-sm-4 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
323
- ).find(
324
- "div", {"class":"col-xs-12 col-17 text-right p-0 ep-text-color--black"}).find("a").string.replace("\n", "").strip()
325
-
326
- else:
327
- birthplace = "-"
305
+ height = '-'
306
+
307
+ # weight
308
+
309
+ if [li for li in relevant_lis if li.find('span').text=='Weight'] != []:
310
+ weight = [li for li in relevant_lis if li.find('span').text=='Weight'][0].text.split('Weight')[1].split(' cm')[0]
328
311
  else:
329
- birthplace = "-"
330
-
331
- if soup.find("div", {"class":"order-3 order-sm-6 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}) != None:
332
- if soup.find("div", {"class":"order-3 order-sm-6 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
333
- ).find(
334
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).find("a") != None:
335
- nation = soup.find("div", {"class":"order-3 order-sm-6 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
336
- ).find(
337
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).find("a").string.replace("\n", "").strip()
338
- else: nation = "-"
339
-
312
+ weight = '-'
313
+
314
+ # birthplace
315
+
316
+ if [li for li in relevant_lis if li.find('span').text=='Place of Birth'] != []:
317
+ birthplace = [li for li in relevant_lis if li.find('span').text=='Place of Birth'][0].text.split('Birth')[1]
340
318
  else:
341
- nation = "-"
342
-
343
- if soup.find("div", {"class":"order-8 order-sm-7 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}) !=None:
344
- shoots = soup.find("div", {"class":"order-8 order-sm-7 ep-list__item ep-list__item--col-2 ep-list__item--in-card-body ep-list__item--is-compact"}
345
- ).find(
346
- "div", {"class":"col-xs-12 col-18 text-right p-0 ep-text-color--black"}).string.replace("\n", "").strip()
347
-
319
+ birthplace = '-'
320
+
321
+ # nation
322
+
323
+ if [li for li in relevant_lis if li.find('span').text=='Nation'] != []:
324
+ nation = [li for li in relevant_lis if li.find('span').text=='Nation'][0].text.split('Nation')[1]
348
325
  else:
349
- shoots = "-"
350
-
351
- if soup.find("div", {"class":"order-12 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"}) != None:
352
- draft = soup.find("div", {"class":"order-12 ep-list__item ep-list__item--in-card-body ep-list__item--is-compact"}
353
- ).find(
354
- "div", {"class":"col-xs-12 col-18 text-right p-0"}).find("a").string.replace("\n", "").strip()
355
- else:
356
- draft = "-"
357
-
358
- #height = np.where(height=="- / -", "-", height)
359
-
360
- #print(player + " scraped!")
326
+ nation = '-'
327
+
328
+ # shoots
329
+
330
+ if [li for li in relevant_lis if li.find('span').text=='Shoots'] != []:
331
+ shoots = [li for li in relevant_lis if li.find('span').text=='Shoots'][0].text.split('Shoots')[1]
332
+ else:
333
+ shoots = '-'
334
+
335
+ # draft
336
+
337
+ if [li for li in relevant_lis if li.find('span').text=='Drafted'] != []:
338
+ draft = [li for li in relevant_lis if li.find('span').text=='Drafted'][0].text.split('Drafted')[1]
339
+ else:
340
+ draft = '-'
341
+
361
342
  return(player, rights, status, dob, height, weight, birthplace, nation, shoots, draft, link)
362
343
 
363
344
  def get_player_information(dataframe):
@@ -382,17 +363,17 @@ def get_player_information(dataframe):
382
363
  for i in range(0, len(list(set(dataframe.link)))):
383
364
  try:
384
365
  myresult = get_info(((list(set(dataframe.link))[i])))
385
- myplayer._append(myresult[0])
386
- myrights._append(myresult[1])
387
- mystatus._append(myresult[2])
388
- mydob._append(myresult[3])
389
- myheight._append(myresult[4])
390
- myweight._append(myresult[5])
391
- mybirthplace._append(myresult[6])
392
- mynation._append(myresult[7])
393
- myshot._append(myresult[8])
394
- mydraft._append(myresult[9])
395
- mylink._append(myresult[10])
366
+ myplayer.append(myresult[0])
367
+ myrights.append(myresult[1])
368
+ mystatus.append(myresult[2])
369
+ mydob.append(myresult[3])
370
+ myheight.append(myresult[4])
371
+ myweight.append(myresult[5])
372
+ mybirthplace.append(myresult[6])
373
+ mynation.append(myresult[7])
374
+ myshot.append(myresult[8])
375
+ mydraft.append(myresult[9])
376
+ mylink.append(myresult[10])
396
377
  print(myresult[0] + " scraped! That's " + str(i + 1) + " down! Only " + str(len(list(set(dataframe.link))) - (i + 1)) + " left to go!")
397
378
  except KeyboardInterrupt:
398
379
  print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
@@ -1912,6 +1912,8 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
1912
1912
  game_date = pd.to_datetime(this_date))
1913
1913
 
1914
1914
  gamedays = gamedays._append(fax)
1915
+
1916
+ gamedays = gamedays[gamedays.espn_id!='gameId']
1915
1917
 
1916
1918
  gamedays = gamedays.assign(
1917
1919
  home_team = np.where(gamedays.home_team=='ST LOUIS BLUES', 'ST. LOUIS BLUES', gamedays.home_team),
@@ -1977,8 +1979,9 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
1977
1979
  np.where(gamedays.away_team=='GOLDEN', 'VGK',
1978
1980
  np.where(gamedays.away_team=='KNIGHTS', 'VGK',
1979
1981
  np.where(gamedays.away_team=='CAPITALS', 'WSH',
1980
- np.where(gamedays.away_team=='JETS', 'WPG', 'mistake'
1981
- ))))))))))))))))))))))))))))))))))))
1982
+ np.where(gamedays.away_team=='JETS', 'WPG',
1983
+ np.where(gamedays.away_team=='CLUB', 'UTA', 'mistake'
1984
+ )))))))))))))))))))))))))))))))))))))
1982
1985
  )
1983
1986
 
1984
1987
  gamedays = gamedays.assign(
@@ -2017,8 +2020,9 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
2017
2020
  np.where(gamedays.home_team=='GOLDEN', 'VGK',
2018
2021
  np.where(gamedays.home_team=='KNIGHTS', 'VGK',
2019
2022
  np.where(gamedays.home_team=='CAPITALS', 'WSH',
2020
- np.where(gamedays.home_team=='JETS', 'WPG', 'mistake'
2021
- ))))))))))))))))))))))))))))))))))))
2023
+ np.where(gamedays.home_team=='JETS', 'WPG',
2024
+ np.where(gamedays.home_team=='CLUB', 'UTA', 'mistake'
2025
+ )))))))))))))))))))))))))))))))))))))
2022
2026
  )
2023
2027
 
2024
2028
  gamedays = gamedays[(gamedays.game_date==this_date) & (gamedays.home_team==home_team) & (gamedays.away_team==away_team)]
@@ -2434,7 +2438,7 @@ def fix_missing(single, event_coords, events):
2434
2438
 
2435
2439
  return(events)
2436
2440
 
2437
- def full_scrape_1by1(game_id_list, shift_to_espn = False):
2441
+ def full_scrape_1by1(game_id_list, shift_to_espn = True):
2438
2442
 
2439
2443
  global single
2440
2444
  global event_coords
@@ -2446,12 +2450,13 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2446
2450
 
2447
2451
  i = 0
2448
2452
 
2449
- while i in range(0, len(game_id_list)):
2453
+ while i in range(0, len(game_id_list)) and len(game_id_list)>0:
2450
2454
 
2451
2455
  # First thing to try: Scraping HTML events
2452
2456
 
2453
2457
  try:
2454
2458
  first_time = time.time()
2459
+ print(game_id_list[i])
2455
2460
  game_id = game_id_list[i]
2456
2461
  print('Attempting scrape for: ' + str(game_id))
2457
2462
  season = str(int(str(game_id)[:4])) + str(int(str(game_id)[:4]) + 1)
@@ -2678,6 +2683,11 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2678
2683
  print(str(game_id) + ' has an issue with the HTML Report. Here is the error: ' + str(e))
2679
2684
  i = i + 1
2680
2685
  continue
2686
+
2687
+ except KeyError as k:
2688
+ print(str(game_id) + 'gave some kind of Key Error. Here is the error: ' + str(e))
2689
+ i = i + 1
2690
+ continue
2681
2691
 
2682
2692
  except KeyboardInterrupt:
2683
2693
  print('You manually interrupted the scrape. You will get to keep every game you have already completed scraping after just a bit of post-processing. Good bye.')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: TopDownHockey_Scraper
3
- Version: 3.2.8
3
+ Version: 4.0.0
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -0,0 +1,7 @@
1
+ TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=j-7gTk-cp_0LyZihNxm67xH9KdA3Fx4xrFKKu3-9-rU,42245
2
+ TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=jPYnjZMTN6tvQgAvQo9mmFSVQmc4-fEra1jLeuFRkpA,153624
3
+ TopDownHockey_Scraper-4.0.0.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
+ TopDownHockey_Scraper-4.0.0.dist-info/METADATA,sha256=5CD1aQY7EMR8wuQcRmItqnY9Uk9Ews8P0Sx_b3sJFUI,5462
5
+ TopDownHockey_Scraper-4.0.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
6
+ TopDownHockey_Scraper-4.0.0.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
+ TopDownHockey_Scraper-4.0.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +0,0 @@
1
- TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=KyQjVTUKLDlnwJb9Sdm6jUaCh6ZxJoq2kEXBHFb1PcM,45374
2
- TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=vumUGPWrtHOTWltSwKZCJNgKzum9UKr_xh7xX0E9_Fo,153213
3
- TopDownHockey_Scraper-3.2.8.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
- TopDownHockey_Scraper-3.2.8.dist-info/METADATA,sha256=ngmt5EJasFMsJyNmTR7iOSK_2VBk_7bY0l3eefOf1zk,5462
5
- TopDownHockey_Scraper-3.2.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- TopDownHockey_Scraper-3.2.8.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
- TopDownHockey_Scraper-3.2.8.dist-info/RECORD,,