TopDownHockey-Scraper 6.1.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,820 @@
1
+ """
2
+
3
+ A package built for scraping Elite Prospects
4
+
5
+ This package is built for personal use. If you are interested in professional use, look into the EliteProspects API.
6
+ """
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ from bs4 import BeautifulSoup
11
+ import requests
12
+ import time
13
+ from datetime import datetime
14
+ import warnings
15
+ warnings.filterwarnings("ignore")
16
+ import sys
17
+ from requests import ConnectionError, ReadTimeout, ConnectTimeout, HTTPError, Timeout
18
+
19
+ def tableDataText(table):
20
+
21
+ """
22
+ A function that is built strictly for the back end and should not be run by the user.
23
+ Function built by Marcus Sjölin
24
+ """
25
+
26
+ rows = []
27
+ trs = table.find_all('tr')
28
+
29
+ headerow = [td.get_text(strip=True) for td in trs[0].find_all('th')] # header row
30
+ if headerow: # if there is a header row include first
31
+ rows.append(headerow)
32
+ trs = trs[1:]
33
+ for tr in trs: # for every table row
34
+ rows.append([td.get_text(strip=True) for td in tr.find_all('td')]) # data row
35
+
36
+ df_rows = pd.DataFrame(rows[1:], columns=rows[0])
37
+
38
+ return df_rows
39
+
40
+ def getskaters(league, year):
41
+ """
42
+ A function that is built strictly for the back end and should not be run by the user.
43
+ """
44
+
45
+ url = 'https://www.eliteprospects.com/league/' + league + '/stats/' + year + '?page='
46
+ # print('Collects data from ' + 'https://www.eliteprospects.com/league/' + league + '/stats/' + year)
47
+
48
+ print("Beginning scrape of " + league + " skater data from " + year + ".")
49
+
50
+ # Return list with all plyers for season in link
51
+ players = []
52
+
53
+ page = (requests.get(url+str(1), timeout = 500))
54
+ first_page_string = str(page)
55
+
56
+ while first_page_string == '<Response [403]>':
57
+ print("Just got a 403 Error before entering the page. Time to Sleep, then re-obtain the link.")
58
+ time.sleep(100)
59
+ page = (requests.get(url+str(1), timeout = 500))
60
+ first_page_string = str(page)
61
+ print("Changed the string before entering the page. Let's try again")
62
+
63
+ if (str(first_page_string) == '<Response [404]>'):
64
+ print("ERROR: " + str(first_page_string) + " on league: " + league + " in year: " + year + ". Data doesn't exist for this league in this year.")
65
+
66
+ else:
67
+
68
+ for i in range(1,99):
69
+ page = requests.get(url+str(i), timeout = 500)
70
+ page_string = str(page)
71
+
72
+ while page_string == '<Response [403]>':
73
+ print("Just got a 403 Error within the page. Time to Sleep, then re-obtain the link.")
74
+ time.sleep(100)
75
+ page = requests.get(url+str(i), timeout = 500)
76
+ page_string = str(page)
77
+ print("Changed the string within the page. Let's try again")
78
+
79
+ soup = BeautifulSoup(page.content, "html.parser")
80
+
81
+ # Get data for players table
82
+ player_table = soup.find( "table", {"class":"table table-striped table-sortable player-stats highlight-stats season"})
83
+
84
+ try:
85
+ df_players = tableDataText(player_table)
86
+
87
+ except AttributeError:
88
+ print("BREAK: TABLE NONE ERROR: " + str(requests.get(url+str(i), timeout = 500)) + " On League: " + league + " In Year: " + year)
89
+ break
90
+
91
+ if len(df_players)>0:
92
+
93
+ if df_players['#'].count()>0:
94
+ # Remove empty rows
95
+ df_players = df_players[df_players['#']!=''].reset_index(drop=True)
96
+
97
+ # Extract href links in table
98
+ href_row = []
99
+ for link in player_table.find_all('a'):
100
+ href_row.append(link.attrs['href'])
101
+
102
+ # Create data frame, rename and only keep links to players
103
+ df_links = pd.DataFrame(href_row)
104
+ df_links.rename(columns={ df_links.columns[0]:"link"}, inplace=True)
105
+ df_links= df_links[df_links['link'].str.contains("/player/")].reset_index(drop=True)
106
+
107
+ # Add links to players
108
+ df_players['link']=df_links['link']
109
+
110
+ players.append(df_players)
111
+
112
+ # Wait 3 seconds before going to next
113
+ #time.sleep(1)
114
+ #print("Scraped page " + str(i))
115
+
116
+ else:
117
+ #print("Scraped final page of: " + league + " In Year: " + year)
118
+ break
119
+
120
+
121
+ if len(players)!=0:
122
+ df_players = pd.concat(players).reset_index()
123
+
124
+ df_players.columns = map(str.lower, df_players.columns)
125
+
126
+ # Clean up dataset
127
+ df_players['season'] = year
128
+ df_players['league'] = league
129
+
130
+ df_players = df_players.drop(['index','#'], axis=1).reset_index(drop=True)
131
+
132
+ df_players['playername'] = df_players['player'].str.replace(r"\(.*\)","")
133
+ df_players['position'] = df_players['player'].str.extract('.*\((.*)\).*')
134
+ df_players['position'] = np.where(pd.isna(df_players['position']), "F", df_players['position'])
135
+
136
+ df_players['fw_def'] = df_players['position'].str.contains('LW|RW|C|F')
137
+ df_players.loc[df_players['position'].str.contains('LW|RW|C'), 'fw_def'] = 'FW'
138
+ df_players.loc[df_players['position'].str.contains('D'), 'fw_def'] = 'DEF'
139
+
140
+ # Adjust columns; transform data
141
+ team = df_players['team'].str.split("“", n=1, expand=True)
142
+ df_players['team'] = team[0]
143
+
144
+ # drop player-column
145
+ df_players = df_players.drop(columns = ['fw_def'], axis=1)
146
+ print("Successfully scraped all " + league + " skater data from " + year + ".")
147
+
148
+ return df_players
149
+
150
+ else: print("LENGTH 0 ERROR: " + str(requests.get(url+str(1), timeout = 500)) + " On League: " + league + " In Year: " + year)
151
+
152
+ def getgoalies(league, year):
153
+ """
154
+ A function that is built strictly for the back end and should not be run by the user.
155
+ """
156
+
157
+ url = 'https://www.eliteprospects.com/league/' + league + '/stats/' + year + '?page-goalie='
158
+ # print('Collects data from ' + 'https://www.eliteprospects.com/league/' + league + '/stats/' + year)
159
+
160
+ print("Beginning scrape of " + league + " goalie data from " + year + ".")
161
+
162
+ # Return list with all plyers for season in link
163
+ players = []
164
+
165
+ page = (requests.get(url + str(1) + "#goalies", timeout = 500))
166
+ first_page_string = str(page)
167
+
168
+ while first_page_string == '<Response [403]>':
169
+ print("Just got a 403 Error before entering the page. This means EliteProspects has temporarily blocked your IP address.")
170
+ print("We're going to sleep for 60 seconds, then try again.")
171
+ time.sleep(100)
172
+ page = (requests.get(url + str(1) + "#goalies", timeout = 500))
173
+ first_page_string = str(page)
174
+ print("Okay, let's try this again")
175
+
176
+ if (first_page_string) == '<Response [404]>':
177
+ print("ERROR: " + first_page_string + " on league: " + league + " in year: " + year + ". Data doesn't exist for this league and season.")
178
+
179
+ else:
180
+
181
+ for i in range(1,99):
182
+ page = requests.get(url+str(i), timeout = 500)
183
+ page_string = str(page)
184
+
185
+ while page_string == '<Response [403]>':
186
+ print("Just got a 403 Error within the page. Time to Sleep, then re-obtain the link.")
187
+ time.sleep(100)
188
+ page = (requests.get(url+str(i), timeout = 500))
189
+ page_string = str(page)
190
+ print("Changed the string within the page. Let's try again")
191
+
192
+ soup = BeautifulSoup(page.content, "html.parser")
193
+
194
+ # Get data for players table
195
+ player_table = soup.find("table", {"class":"table table-striped table-sortable goalie-stats highlight-stats season"})
196
+
197
+ try:
198
+ df_players = tableDataText(player_table)
199
+ except AttributeError:
200
+ print("BREAK: TABLE NONE ERROR: " + str(requests.get(url+str(i), timeout = 500)) + " On League: " + league + " In Year: " + year)
201
+ break
202
+
203
+ if len(df_players)>0:
204
+
205
+ if df_players['#'].count()>0:
206
+ # Remove empty rows
207
+ df_players = df_players[df_players['#']!=''].reset_index(drop=True)
208
+
209
+ # Extract href links in table
210
+ href_row = []
211
+ for link in player_table.find_all('a'):
212
+ href_row.append(link.attrs['href'])
213
+
214
+ # Create data frame, rename and only keep links to players
215
+ df_links = pd.DataFrame(href_row)
216
+ df_links.rename(columns={ df_links.columns[0]:"link"}, inplace=True)
217
+ df_links= df_links[df_links['link'].str.contains("/player/")].reset_index(drop=True)
218
+
219
+ # Add links to players
220
+ df_players['link']=df_links['link']
221
+
222
+ players.append(df_players)
223
+
224
+ # Wait 3 seconds before going to next
225
+ # time.sleep(1)
226
+ #print("Scraped page " + str(i))
227
+
228
+ else:
229
+ #print("Scraped final page of: " + league + " In Year: " + year)
230
+ break
231
+
232
+
233
+ if len(players)!=0:
234
+ df_players = pd.concat(players).reset_index()
235
+
236
+ df_players.columns = map(str.lower, df_players.columns)
237
+
238
+ # Clean up dataset
239
+ df_players['season'] = year
240
+ df_players['league'] = league
241
+
242
+ df_players = df_players.drop(['index','#'], axis=1).reset_index(drop=True)
243
+
244
+ print("Successfully scraped all " + league + " goalie data from " + year + ".")
245
+
246
+ df_players = df_players.loc[((df_players.gp!=0) & (~pd.isna(df_players.gp)) & (df_players.gp!="0") & (df_players.gaa!="-"))]
247
+
248
+ return df_players
249
+ else: print("LENGTH 0 ERROR: " + str(requests.get(url+str(1), timeout = 500)) + " On League: " + league + " In Year: " + year)
250
+
251
+ def get_info(link):
252
+ """
253
+ A function that is built strictly for the back end and should not be run by the user.
254
+ """
255
+
256
+ page = requests.get(link, timeout = 500)
257
+ soup = BeautifulSoup(page.content, "html.parser")
258
+
259
+ page_string = str(page)
260
+
261
+ while ((page_string == '<Response [403]>') or ("evil" in str(soup.p))):
262
+ print("403 Error. re-obtaining string and re-trying.")
263
+ page = requests.get(link, timeout = 500)
264
+ page_string = str(page)
265
+ soup = BeautifulSoup(page.content, "html.parser")
266
+ time.sleep(60)
267
+
268
+ lis = soup.find_all('li')
269
+
270
+ relevant_lis = [li for li in lis if li.find('span') is not None]
271
+
272
+ # player
273
+
274
+ if soup.find("title") != None:
275
+ player = soup.find("title").string.replace(' - Stats, Contract, Salary & More', '')
276
+ else:
277
+ player = '-'
278
+
279
+ # status
280
+
281
+ if [li for li in relevant_lis if li.find('span').text=='NHL Rights'] != []:
282
+ rights = [li for li in relevant_lis if li.find('span').text=='NHL Rights'][0].find('a').text.split(' /')[0]
283
+ else:
284
+ rights = '-'
285
+
286
+ # rights
287
+
288
+ if [li for li in relevant_lis if li.find('span').text=='NHL Rights'] != []:
289
+ status = [li for li in relevant_lis if li.find('span').text=='NHL Rights'][0].find('a').text.split(' / ')[1]
290
+ else:
291
+ status = '-'
292
+
293
+ # dob
294
+
295
+ if [li.find('a')['href'].split('dob=', 1)[1].split('&sort', 1)[0] for li in relevant_lis if li.span.text == 'Date of Birth'] != []:
296
+ dob = [li.find('a')['href'].split('dob=', 1)[1].split('&sort', 1)[0] for li in relevant_lis if li.span.text == 'Date of Birth'][0]
297
+ else:
298
+ dob = '-'
299
+
300
+ # height
301
+
302
+ if [li for li in relevant_lis if li.find('span').text=='Height'] != []:
303
+ height = [li for li in relevant_lis if li.find('span').text=='Height'][0].text.split('Height')[1].split(' cm')[0]
304
+ else:
305
+ height = '-'
306
+
307
+ # weight
308
+
309
+ if [li for li in relevant_lis if li.find('span').text=='Weight'] != []:
310
+ weight = [li for li in relevant_lis if li.find('span').text=='Weight'][0].text.split('Weight')[1].split(' cm')[0]
311
+ else:
312
+ weight = '-'
313
+
314
+ # birthplace
315
+
316
+ if [li for li in relevant_lis if li.find('span').text=='Place of Birth'] != []:
317
+ birthplace = [li for li in relevant_lis if li.find('span').text=='Place of Birth'][0].text.split('Birth')[1]
318
+ else:
319
+ birthplace = '-'
320
+
321
+ # nation
322
+
323
+ if [li for li in relevant_lis if li.find('span').text=='Nation'] != []:
324
+ nation = [li for li in relevant_lis if li.find('span').text=='Nation'][0].text.split('Nation')[1]
325
+ else:
326
+ nation = '-'
327
+
328
+ # shoots
329
+
330
+ if [li for li in relevant_lis if li.find('span').text=='Shoots'] != []:
331
+ shoots = [li for li in relevant_lis if li.find('span').text=='Shoots'][0].text.split('Shoots')[1]
332
+ else:
333
+ shoots = '-'
334
+
335
+ # draft
336
+
337
+ if [li for li in relevant_lis if li.find('span').text=='Drafted'] != []:
338
+ draft = [li for li in relevant_lis if li.find('span').text=='Drafted'][0].text.split('Drafted')[1]
339
+ else:
340
+ draft = '-'
341
+
342
+ return(player, rights, status, dob, height, weight, birthplace, nation, shoots, draft, link)
343
+
344
+ def get_player_information(dataframe):
345
+ '''
346
+ Takes a data frame from the get_players or get_goalies function and obtains biographcal information for all players in said dataframe, then returns it as a dataframe.
347
+ '''
348
+
349
+ myplayer = []
350
+ myrights = []
351
+ mystatus = []
352
+ mydob = []
353
+ myheight = []
354
+ myweight = []
355
+ mybirthplace = []
356
+ mynation = []
357
+ myshot = []
358
+ mydraft = []
359
+ mylink = []
360
+
361
+ print("Beginning scrape for " + str(len(list(set(dataframe.link)))) + " players.")
362
+
363
+ for i in range(0, len(list(set(dataframe.link)))):
364
+ try:
365
+ myresult = get_info(((list(set(dataframe.link))[i])))
366
+ myplayer.append(myresult[0])
367
+ myrights.append(myresult[1])
368
+ mystatus.append(myresult[2])
369
+ mydob.append(myresult[3])
370
+ myheight.append(myresult[4])
371
+ myweight.append(myresult[5])
372
+ mybirthplace.append(myresult[6])
373
+ mynation.append(myresult[7])
374
+ myshot.append(myresult[8])
375
+ mydraft.append(myresult[9])
376
+ mylink.append(myresult[10])
377
+ print(myresult[0] + " scraped! That's " + str(i + 1) + " down! Only " + str(len(list(set(dataframe.link))) - (i + 1)) + " left to go!")
378
+ except KeyboardInterrupt:
379
+ print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
380
+ break
381
+ except (ConnectionError,
382
+ HTTPError,
383
+ ReadTimeout,
384
+ ConnectTimeout,
385
+ ValueError) as errormessage:
386
+ print("You've been disconnected. Here's the error message:")
387
+ print(errormessage)
388
+ print("Luckily, everything you've scraped up to this point will still be safe.")
389
+ break
390
+
391
+ resultdf = pd.DataFrame(columns = ["player", "rights", "status", "dob", "height", "weight", "birthplace", "nation", "shoots", "draft", "link"])
392
+
393
+ resultdf.player = myplayer
394
+ resultdf.rights = myrights
395
+ resultdf.status = mystatus
396
+ resultdf.dob = mydob
397
+ resultdf.height = myheight
398
+ resultdf.weight = myweight
399
+ resultdf.birthplace = mybirthplace
400
+ resultdf.nation = mynation
401
+ resultdf.shoots = myshot
402
+ resultdf.draft = mydraft
403
+ resultdf.link = mylink
404
+
405
+ print("Your scrape is complete! You've obtained player information for " + str(len(resultdf)) + " players!")
406
+
407
+ return resultdf
408
+
409
+ def get_league_skater_boxcars(league, seasons):
410
+ """
411
+ A function that is built strictly for the back end and should not be run by the user.
412
+ """
413
+
414
+ if len(set(seasons))==1:
415
+ scraped_season_list = str(seasons)
416
+ elif len(set(seasons))>2:
417
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
418
+ else:
419
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
420
+
421
+
422
+ global hidden_patrick
423
+ hidden_patrick = 0
424
+ global error
425
+ error = 0
426
+
427
+ output = pd.DataFrame()
428
+
429
+ if type(seasons) == str:
430
+ single = getskaters(league, seasons)
431
+ output = output._append(single)
432
+ print("Scraping " + league + " data is complete. You scraped skater data from " + seasons + ".")
433
+ return(output)
434
+
435
+ elif ((type(seasons) == tuple) or (type(seasons) == list)):
436
+
437
+ for i in range(0, len(seasons)):
438
+ try:
439
+ single = getskaters(league, seasons[i])
440
+ output = output._append(single)
441
+ except KeyboardInterrupt as e:
442
+ hidden_patrick = 4
443
+ error = e
444
+ return output
445
+ except (ConnectionError,
446
+ HTTPError,
447
+ ReadTimeout,
448
+ ConnectTimeout,
449
+ ValueError) as e:
450
+ hidden_patrick = 5
451
+ error = e
452
+ return output
453
+
454
+ print("Scraping " + league + " data is complete. You scraped skater data from " + scraped_season_list + ".")
455
+ return(output)
456
+
457
+ def get_league_goalie_boxcars(league, seasons):
458
+ """
459
+ A function that is built strictly for the back end and should not be run by the user.
460
+ """
461
+
462
+ if len(set(seasons))==1:
463
+ scraped_season_list = str(seasons)
464
+ elif len(set(seasons))>2:
465
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
466
+ else:
467
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
468
+
469
+
470
+ global hidden_patrick
471
+ hidden_patrick = 0
472
+ global error
473
+ error = 0
474
+
475
+ output = pd.DataFrame()
476
+
477
+ if type(seasons) == str:
478
+ single = getgoalies(league, seasons)
479
+ output = output._append(single)
480
+ print("Scraping " + league + " data is complete. You scraped goalie data from " + seasons + ".")
481
+ return(output)
482
+
483
+ elif ((type(seasons) == tuple) or (type(seasons) == list)):
484
+
485
+ for i in range(0, len(seasons)):
486
+ try:
487
+ single = getgoalies(league, seasons[i])
488
+ output = output._append(single)
489
+ except KeyboardInterrupt as e:
490
+ hidden_patrick = 4
491
+ error = e
492
+ return output
493
+ except (ConnectionError,
494
+ HTTPError,
495
+ ReadTimeout,
496
+ ConnectTimeout) as e:
497
+ hidden_patrick = 5
498
+ error = e
499
+ return output
500
+
501
+ print("Scraping " + league + " data is complete. You scraped goalie data from " + scraped_season_list + ".")
502
+ return(output)
503
+
504
+ def get_goalies(leagues, seasons):
505
+ '''
506
+ Obtains goalie data for at least one season and at least one league. Returns a dataframe.
507
+ '''
508
+
509
+ if (len(seasons)==1 or type(seasons)==str):
510
+ season_string = str(seasons)
511
+ elif len(seasons)==2:
512
+ season_string = " and".join(str((tuple(sorted(tuple(seasons))))).replace("'", "").replace("(", "").replace(")", "").split(","))
513
+ else:
514
+ season_string = str(((tuple(sorted(tuple(seasons)))))[:-1]).replace("'", "").replace("(", "").replace(")", "") + " and " + str(((tuple(sorted(tuple(seasons)))))[-1])
515
+
516
+ if (len(leagues)==1 or type(leagues)==str):
517
+ league_string = str(leagues)
518
+ elif len(leagues)==2:
519
+ league_string = " and".join(str((tuple(sorted(tuple(leagues))))).replace("'", "").replace("(", "").replace(")", "").split(","))
520
+ else:
521
+ league_string = str(((tuple(sorted(tuple(leagues)))))[:-1]).replace("'", "").replace("(", "").replace(")", "") + " and " + str(((tuple(sorted(tuple(leagues)))))[-1])
522
+
523
+ leaguesall = pd.DataFrame()
524
+
525
+ if ((type(leagues)==str) and (type(seasons)==str)):
526
+ print("Your scrape request is goalie data from the following league:")
527
+ print(league_string)
528
+ print("In the following season:")
529
+ print(season_string)
530
+ leaguesall = get_league_goalie_boxcars(leagues, seasons)
531
+ print("Completed scraping goalie data from the following league:")
532
+ print(str(leagues))
533
+ print("Over the following season:")
534
+ print(str(seasons))
535
+ return(leaguesall.reset_index().drop(columns = 'index'))
536
+
537
+ elif ((type(leagues)==str) and ((type(seasons) == tuple) or (type(seasons) == list))):
538
+ print("Your scrape request is goalie data from the following league:")
539
+ print(league_string)
540
+ print("In the following seasons:")
541
+ print(season_string)
542
+ leaguesall = get_league_goalie_boxcars(leagues, seasons)
543
+
544
+ if hidden_patrick == 4:
545
+ print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
546
+ return(leaguesall.reset_index().drop(columns = 'index'))
547
+ if hidden_patrick == 5:
548
+ print("You were disconnected! The output here will be every player you've scraped so far. Here's your error message:")
549
+ print(error)
550
+ return(leaguesall.reset_index().drop(columns = 'index'))
551
+
552
+ if len(set(leaguesall.league))==1:
553
+ scraped_league_list = leaguesall.league
554
+ elif len(set(leaguesall.league))>2:
555
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
556
+ else:
557
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
558
+
559
+ if len(set(seasons))==1:
560
+ scraped_season_list = seasons
561
+ elif len(set(seasons))>2:
562
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
563
+ else:
564
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
565
+
566
+ print("Completed scraping goalie data from the following league:")
567
+ print(str(leagues))
568
+ print("Over the following seasons:")
569
+ print(scraped_season_list)
570
+ return(leaguesall.reset_index().drop(columns = 'index'))
571
+
572
+ elif ((type(seasons) == str) and ((type(leagues) == tuple) or (type(leagues) == list))):
573
+ print("Your scrape request is goalie data from the following leagues:")
574
+ print(league_string)
575
+ print("In the following season:")
576
+ print(season_string)
577
+
578
+ for i in range (0, len(leagues)):
579
+ try:
580
+ targetleague = get_league_goalie_boxcars(leagues[i], seasons)
581
+ leaguesall = leaguesall._append(targetleague)
582
+ if hidden_patrick == 4:
583
+ raise KeyboardInterrupt
584
+ if hidden_patrick == 5:
585
+ raise ConnectionError
586
+ except KeyboardInterrupt:
587
+ print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
588
+ break
589
+ except ConnectionError:
590
+ print("You were disconnected! Let's sleep and try again.")
591
+ print(error)
592
+ time.sleep(100)
593
+ continue
594
+
595
+ if len(set(leaguesall.league))==1:
596
+ scraped_league_list = leaguesall.league
597
+ elif len(set(leaguesall.league))>2:
598
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
599
+ else:
600
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
601
+
602
+ print("Completed scraping goalie data from the following leagues:")
603
+ print(scraped_league_list)
604
+ print("Over the following season:")
605
+ print((seasons))
606
+ return(leaguesall.reset_index().drop(columns = 'index'))
607
+
608
+ elif (((type(seasons) == tuple) or (type(seasons) == list)) and ((type(leagues) == tuple) or (type(leagues) == list))):
609
+ print("Your scrape request is goalie data from the following leagues:")
610
+ print(league_string)
611
+ print("In the following seasons:")
612
+ print(season_string)
613
+ #print("Your scrape request: " + str(leagues[:-1]).replace("'", "").replace("(", "").replace(")", "") + ", and " + (leagues)[-1] + " goalie data from " +str(seasons[:-1]).replace("'", "").replace("(", "").replace(")", "") + ", and " + (seasons)[-1] + ".")
614
+ for i in range (0, len(leagues)):
615
+ try:
616
+ targetleague = get_league_goalie_boxcars(leagues[i], seasons)
617
+ leaguesall = leaguesall._append(targetleague)
618
+ if hidden_patrick == 4:
619
+ raise KeyboardInterrupt
620
+ if hidden_patrick == 5:
621
+ raise ConnectionError
622
+ except KeyboardInterrupt:
623
+ print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
624
+ break
625
+ except ConnectionError:
626
+ print("You were disconnected! Let's sleep and try again.")
627
+ print(error)
628
+ time.sleep(100)
629
+ continue
630
+
631
+ if len(set(leaguesall.league))==1:
632
+ scraped_league_list = leaguesall.league
633
+ elif len(set(leaguesall.league))>2:
634
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
635
+ else:
636
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
637
+
638
+ if len(set(seasons))==1:
639
+ scraped_season_list = seasons
640
+ elif len(set(seasons))>2:
641
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
642
+ else:
643
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
644
+
645
+ print("Completed scraping goalie data from the following leagues:")
646
+ print(scraped_league_list)
647
+ print("Over the following seasons:")
648
+ print(scraped_season_list)
649
+ return(leaguesall.reset_index().drop(columns = 'index'))
650
+
651
+ else:
652
+ print("There was an issue with the request you made. Please enter a single league and season as a string, or multiple leagues as either a list or tuple.")
653
+
654
+
655
+ def get_skaters(leagues, seasons):
656
+
657
+ '''
658
+ Obtains skater data for at least one season and at least one league. Returns a dataframe.
659
+ '''
660
+
661
+ if (len(seasons)==1 or type(seasons)==str):
662
+ season_string = str(seasons)
663
+ elif len(seasons)==2:
664
+ season_string = " and".join(str((tuple(sorted(tuple(seasons))))).replace("'", "").replace("(", "").replace(")", "").split(","))
665
+ else:
666
+ season_string = str(((tuple(sorted(tuple(seasons)))))[:-1]).replace("'", "").replace("(", "").replace(")", "") + " and " + str(((tuple(sorted(tuple(seasons)))))[-1])
667
+
668
+ if (len(leagues)==1 or type(leagues)==str):
669
+ league_string = str(leagues)
670
+ elif len(leagues)==2:
671
+ league_string = " and".join(str((tuple(sorted(tuple(leagues))))).replace("'", "").replace("(", "").replace(")", "").split(","))
672
+ else:
673
+ league_string = str(((tuple(sorted(tuple(leagues)))))[:-1]).replace("'", "").replace("(", "").replace(")", "") + " and " + str(((tuple(sorted(tuple(leagues)))))[-1])
674
+
675
+ leaguesall = pd.DataFrame()
676
+
677
+ if ((type(leagues)==str) and (type(seasons)==str)):
678
+ print("Your scrape request is skater data from the following league:")
679
+ print(league_string)
680
+ print("In the following season:")
681
+ print(season_string)
682
+ leaguesall = get_league_skater_boxcars(leagues, seasons)
683
+ print("Completed scraping skater data from the following league:")
684
+ print(str(leagues))
685
+ print("Over the following season:")
686
+ print(str(seasons))
687
+ return(leaguesall.reset_index().drop(columns = 'index'))
688
+
689
+ elif ((type(leagues)==str) and ((type(seasons) == tuple) or (type(seasons) == list))):
690
+ print("Your scrape request is skater data from the following league:")
691
+ print(league_string)
692
+ print("In the following seasons:")
693
+ print(season_string)
694
+ leaguesall = get_league_skater_boxcars(leagues, seasons)
695
+
696
+ if hidden_patrick == 4:
697
+ print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
698
+ return(leaguesall.reset_index().drop(columns = 'index'))
699
+ if hidden_patrick == 5:
700
+ print("You were disconnected! The output here will be every player you've scraped so far. Here's your error message:")
701
+ print(error)
702
+ return(leaguesall.reset_index().drop(columns = 'index'))
703
+
704
+ if len(set(leaguesall.league))==1:
705
+ scraped_league_list = leaguesall.league
706
+ elif len(set(leaguesall.league))>2:
707
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
708
+ else:
709
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
710
+
711
+ if len(set(seasons))==1:
712
+ scraped_season_list = seasons
713
+ elif len(set(seasons))>2:
714
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
715
+ else:
716
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
717
+
718
+ print("Completed scraping skater data from the following league:")
719
+ print(str(leagues))
720
+ print("Over the following seasons:")
721
+ print(scraped_season_list)
722
+ return(leaguesall.reset_index().drop(columns = 'index'))
723
+
724
+ elif ((type(seasons) == str) and ((type(leagues) == tuple) or (type(leagues) == list))):
725
+ print("Your scrape request is skater data from the following leagues:")
726
+ print(league_string)
727
+ print("In the following season:")
728
+ print(season_string)
729
+
730
+ for i in range (0, len(leagues)):
731
+ try:
732
+ targetleague = get_league_skater_boxcars(leagues[i], seasons)
733
+ leaguesall = leaguesall._append(targetleague)
734
+ if hidden_patrick == 4:
735
+ raise KeyboardInterrupt
736
+ if hidden_patrick == 5:
737
+ raise ConnectionError
738
+ except KeyboardInterrupt:
739
+ print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
740
+ break
741
+ except ConnectionError:
742
+ print("You were disconnected! Let's sleep and try again.")
743
+ print(error)
744
+ time.sleep(100)
745
+ continue
746
+
747
+ if len(set(leaguesall.league))==1:
748
+ scraped_league_list = leaguesall.league
749
+ elif len(set(leaguesall.league))>2:
750
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
751
+ else:
752
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
753
+
754
+ print("Completed scraping skater data from the following leagues:")
755
+ print(scraped_league_list)
756
+ print("Over the following season:")
757
+ print((seasons))
758
+ return(leaguesall.reset_index().drop(columns = 'index'))
759
+
760
+ elif (((type(seasons) == tuple) or (type(seasons) == list)) and ((type(leagues) == tuple) or (type(leagues) == list))):
761
+ print("Your scrape request is skater data from the following leagues:")
762
+ print(league_string)
763
+ print("In the following seasons:")
764
+ print(season_string)
765
+ #print("Your scrape request: " + str(leagues[:-1]).replace("'", "").replace("(", "").replace(")", "") + ", and " + (leagues)[-1] + " skater data from " +str(seasons[:-1]).replace("'", "").replace("(", "").replace(")", "") + ", and " + (seasons)[-1] + ".")
766
+ for i in range (0, len(leagues)):
767
+ try:
768
+ targetleague = get_league_skater_boxcars(leagues[i], seasons)
769
+ leaguesall = leaguesall._append(targetleague)
770
+ if hidden_patrick == 4:
771
+ raise KeyboardInterrupt
772
+ if hidden_patrick == 5:
773
+ raise ConnectionError
774
+ except KeyboardInterrupt:
775
+ print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
776
+ break
777
+ except ConnectionError:
778
+ print("You were disconnected! Let's sleep and try again.")
779
+ print(error)
780
+ time.sleep(100)
781
+ continue
782
+
783
+ if len(set(leaguesall.league))==1:
784
+ scraped_league_list = leaguesall.league
785
+ elif len(set(leaguesall.league))>2:
786
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
787
+ else:
788
+ scraped_league_list = str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(list(set(leaguesall.league))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
789
+
790
+ if len(set(seasons))==1:
791
+ scraped_season_list = seasons
792
+ elif len(set(seasons))>2:
793
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + ", and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
794
+ else:
795
+ scraped_season_list = str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[:-1]).replace("'", "").replace("[", "").replace("]", "") + " and " + str(((str(tuple(sorted(tuple(set(seasons))))).replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", ""))).split(", ")[-1])
796
+
797
+ print("Completed scraping skater data from the following leagues:")
798
+ print(scraped_league_list)
799
+ print("Incorporating the following seasons:")
800
+ print(scraped_season_list)
801
+ return(leaguesall.reset_index().drop(columns = 'index'))
802
+
803
+ else:
804
+ print("There was an issue with the request you made. Please enter a single league and season as a string, or multiple leagues as either a list or tuple.")
805
+
806
+ def add_player_information(dataframe):
807
+ '''
808
+ Takes a data frame from the get_players or get_goalies function and obtains biographcal information for all players in said dataframe, then returns it as a dataframe that adds to the other data you've already scraped..
809
+ '''
810
+ with_player_info = get_player_information(dataframe)
811
+ doubledup = dataframe.merge(with_player_info.drop(columns = ['player']), on = 'link', how = 'inner')
812
+ return doubledup
813
+
814
+ ### EXAMPLE ONE: GET ALL SKATERS FROM THE MHL IN 2020-2021 ###
815
+
816
+ #mhl2021 = get_skaters("mhl", "2020-2021")
817
+ print("Welcome to the TopDownHockey EliteProspects Scraper, built by Patrick Bacon.")
818
+ print("This scraper is built strictly for personal use. For commercial or professional use, please look into the EliteProspects API.")
819
+ print("If you enjoy the scraper and would like to support my work, feel free to follow me on Twitter @TopDownHockey. Have fun!")
820
+