TopDownHockey-Scraper 2.2.1__py3-none-any.whl → 2.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -28,10 +28,10 @@ def tableDataText(table):
28
28
 
29
29
  headerow = [td.get_text(strip=True) for td in trs[0].find_all('th')] # header row
30
30
  if headerow: # if there is a header row include first
31
- rows.append(headerow)
31
+ rows._append(headerow)
32
32
  trs = trs[1:]
33
33
  for tr in trs: # for every table row
34
- rows.append([td.get_text(strip=True) for td in tr.find_all('td')]) # data row
34
+ rows._append([td.get_text(strip=True) for td in tr.find_all('td')]) # data row
35
35
 
36
36
  df_rows = pd.DataFrame(rows[1:], columns=rows[0])
37
37
 
@@ -97,7 +97,7 @@ def getskaters(league, year):
97
97
  # Extract href links in table
98
98
  href_row = []
99
99
  for link in player_table.find_all('a'):
100
- href_row.append(link.attrs['href'])
100
+ href_row._append(link.attrs['href'])
101
101
 
102
102
  # Create data frame, rename and only keep links to players
103
103
  df_links = pd.DataFrame(href_row)
@@ -107,7 +107,7 @@ def getskaters(league, year):
107
107
  # Add links to players
108
108
  df_players['link']=df_links['link']
109
109
 
110
- players.append(df_players)
110
+ players._append(df_players)
111
111
 
112
112
  # Wait 3 seconds before going to next
113
113
  #time.sleep(1)
@@ -209,7 +209,7 @@ def getgoalies(league, year):
209
209
  # Extract href links in table
210
210
  href_row = []
211
211
  for link in player_table.find_all('a'):
212
- href_row.append(link.attrs['href'])
212
+ href_row._append(link.attrs['href'])
213
213
 
214
214
  # Create data frame, rename and only keep links to players
215
215
  df_links = pd.DataFrame(href_row)
@@ -219,7 +219,7 @@ def getgoalies(league, year):
219
219
  # Add links to players
220
220
  df_players['link']=df_links['link']
221
221
 
222
- players.append(df_players)
222
+ players._append(df_players)
223
223
 
224
224
  # Wait 3 seconds before going to next
225
225
  # time.sleep(1)
@@ -382,17 +382,17 @@ def get_player_information(dataframe):
382
382
  for i in range(0, len(list(set(dataframe.link)))):
383
383
  try:
384
384
  myresult = get_info(((list(set(dataframe.link))[i])))
385
- myplayer.append(myresult[0])
386
- myrights.append(myresult[1])
387
- mystatus.append(myresult[2])
388
- mydob.append(myresult[3])
389
- myheight.append(myresult[4])
390
- myweight.append(myresult[5])
391
- mybirthplace.append(myresult[6])
392
- mynation.append(myresult[7])
393
- myshot.append(myresult[8])
394
- mydraft.append(myresult[9])
395
- mylink.append(myresult[10])
385
+ myplayer._append(myresult[0])
386
+ myrights._append(myresult[1])
387
+ mystatus._append(myresult[2])
388
+ mydob._append(myresult[3])
389
+ myheight._append(myresult[4])
390
+ myweight._append(myresult[5])
391
+ mybirthplace._append(myresult[6])
392
+ mynation._append(myresult[7])
393
+ myshot._append(myresult[8])
394
+ mydraft._append(myresult[9])
395
+ mylink._append(myresult[10])
396
396
  print(myresult[0] + " scraped! That's " + str(i + 1) + " down! Only " + str(len(list(set(dataframe.link))) - (i + 1)) + " left to go!")
397
397
  except KeyboardInterrupt:
398
398
  print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
@@ -447,7 +447,7 @@ def get_league_skater_boxcars(league, seasons):
447
447
 
448
448
  if type(seasons) == str:
449
449
  single = getskaters(league, seasons)
450
- output = output.append(single)
450
+ output = output._append(single)
451
451
  print("Scraping " + league + " data is complete. You scraped skater data from " + seasons + ".")
452
452
  return(output)
453
453
 
@@ -456,7 +456,7 @@ def get_league_skater_boxcars(league, seasons):
456
456
  for i in range(0, len(seasons)):
457
457
  try:
458
458
  single = getskaters(league, seasons[i])
459
- output = output.append(single)
459
+ output = output._append(single)
460
460
  except KeyboardInterrupt as e:
461
461
  hidden_patrick = 4
462
462
  error = e
@@ -495,7 +495,7 @@ def get_league_goalie_boxcars(league, seasons):
495
495
 
496
496
  if type(seasons) == str:
497
497
  single = getgoalies(league, seasons)
498
- output = output.append(single)
498
+ output = output._append(single)
499
499
  print("Scraping " + league + " data is complete. You scraped goalie data from " + seasons + ".")
500
500
  return(output)
501
501
 
@@ -504,7 +504,7 @@ def get_league_goalie_boxcars(league, seasons):
504
504
  for i in range(0, len(seasons)):
505
505
  try:
506
506
  single = getgoalies(league, seasons[i])
507
- output = output.append(single)
507
+ output = output._append(single)
508
508
  except KeyboardInterrupt as e:
509
509
  hidden_patrick = 4
510
510
  error = e
@@ -597,7 +597,7 @@ def get_goalies(leagues, seasons):
597
597
  for i in range (0, len(leagues)):
598
598
  try:
599
599
  targetleague = get_league_goalie_boxcars(leagues[i], seasons)
600
- leaguesall = leaguesall.append(targetleague)
600
+ leaguesall = leaguesall._append(targetleague)
601
601
  if hidden_patrick == 4:
602
602
  raise KeyboardInterrupt
603
603
  if hidden_patrick == 5:
@@ -633,7 +633,7 @@ def get_goalies(leagues, seasons):
633
633
  for i in range (0, len(leagues)):
634
634
  try:
635
635
  targetleague = get_league_goalie_boxcars(leagues[i], seasons)
636
- leaguesall = leaguesall.append(targetleague)
636
+ leaguesall = leaguesall._append(targetleague)
637
637
  if hidden_patrick == 4:
638
638
  raise KeyboardInterrupt
639
639
  if hidden_patrick == 5:
@@ -749,7 +749,7 @@ def get_skaters(leagues, seasons):
749
749
  for i in range (0, len(leagues)):
750
750
  try:
751
751
  targetleague = get_league_skater_boxcars(leagues[i], seasons)
752
- leaguesall = leaguesall.append(targetleague)
752
+ leaguesall = leaguesall._append(targetleague)
753
753
  if hidden_patrick == 4:
754
754
  raise KeyboardInterrupt
755
755
  if hidden_patrick == 5:
@@ -785,7 +785,7 @@ def get_skaters(leagues, seasons):
785
785
  for i in range (0, len(leagues)):
786
786
  try:
787
787
  targetleague = get_league_skater_boxcars(leagues[i], seasons)
788
- leaguesall = leaguesall.append(targetleague)
788
+ leaguesall = leaguesall._append(targetleague)
789
789
  if hidden_patrick == 4:
790
790
  raise KeyboardInterrupt
791
791
  if hidden_patrick == 5:
@@ -48,20 +48,20 @@ def scrape_standings(season):
48
48
  div = (record_df['division'].iloc[i]['name'])
49
49
  conf = (record_df['conference'].iloc[i]['name'])
50
50
  for x in range(0, len((record_df['teamRecords'].iloc[i]))):
51
- divisions.append(div)
52
- conferences.append(conf)
53
- team.append(record_df['teamRecords'].iloc[i][x]['team']['name'])
54
- wins.append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['wins'])
55
- losses.append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['losses'])
56
- otl.append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['ot'])
57
- gf.append(record_df['teamRecords'].iloc[i][x]['goalsScored'])
58
- ga.append(record_df['teamRecords'].iloc[i][x]['goalsAgainst'])
51
+ divisions._append(div)
52
+ conferences._append(conf)
53
+ team._append(record_df['teamRecords'].iloc[i][x]['team']['name'])
54
+ wins._append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['wins'])
55
+ losses._append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['losses'])
56
+ otl._append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['ot'])
57
+ gf._append(record_df['teamRecords'].iloc[i][x]['goalsScored'])
58
+ ga._append(record_df['teamRecords'].iloc[i][x]['goalsAgainst'])
59
59
  if season>20092010:
60
- row.append(record_df['teamRecords'].iloc[i][x]['row'])
61
- gp.append(record_df['teamRecords'].iloc[i][x]['gamesPlayed'])
62
- pts.append(record_df['teamRecords'].iloc[i][x]['points'])
60
+ row._append(record_df['teamRecords'].iloc[i][x]['row'])
61
+ gp._append(record_df['teamRecords'].iloc[i][x]['gamesPlayed'])
62
+ pts._append(record_df['teamRecords'].iloc[i][x]['points'])
63
63
  if season>20192020:
64
- rw.append(record_df['teamRecords'].iloc[i][x]['regulationWins'])
64
+ rw._append(record_df['teamRecords'].iloc[i][x]['regulationWins'])
65
65
 
66
66
  if season < 20092010:
67
67
  stand = pd.DataFrame().assign(Team = team, Division = divisions, Conference = conferences,
@@ -97,7 +97,7 @@ def scrape_schedule(start_date, end_date):
97
97
 
98
98
  for i in range (0, len(date_df)):
99
99
  datedf = pd.DataFrame(date_df.games.iloc[i])
100
- gamedf = gamedf.append(datedf)
100
+ gamedf = gamedf._append(datedf)
101
101
  global team_df
102
102
  team_df = pd.DataFrame(gamedf['teams'].values.tolist(), index = gamedf.index)
103
103
  away_df = pd.DataFrame(team_df['away'].values.tolist(), index = team_df.index)
@@ -160,7 +160,7 @@ def hs_strip_html(td):
160
160
  elif i % 3 == 1:
161
161
  if name != '':
162
162
  position = bar[i].get_text()
163
- players.append([name, number, position])
163
+ players._append([name, number, position])
164
164
 
165
165
  td[y] = players
166
166
  else:
@@ -429,9 +429,10 @@ def scrape_html_roster(season, game_id):
429
429
  roster_df['Name'] = np.where(roster_df['Name']=='COLIN', 'COLIN WHITE CAN', roster_df['Name'])
430
430
 
431
431
  roster_df['Name'] = (np.where(roster_df['Name']== "JANIS MOSER" , "J.J. MOSER",
432
- (np.where(roster_df['Name']== "NICK PAUL" , "NICHOLAS PAUL",
432
+ (np.where(roster_df['Name']== "NICHOLAS PAUL" , "NICK PAUL",
433
433
  (np.where(roster_df['Name']== "JACOB MIDDLETON" , "JAKE MIDDLETON",
434
- roster_df['Name']))))))
434
+ (np.where(roster_df['Name']== "TOMMY NOVAK" , "THOMAS NOVAK",
435
+ roster_df['Name']))))))))
435
436
 
436
437
  roster_df['Name'] = roster_df['Name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
437
438
 
@@ -874,7 +875,7 @@ def scrape_html_shifts(season, game_id):
874
875
  number = players[key]['number'],
875
876
  team = thisteam,
876
877
  venue = "home")
877
- alldf = alldf.append(df)
878
+ alldf = alldf._append(df)
878
879
 
879
880
  home_shifts = alldf
880
881
 
@@ -911,7 +912,7 @@ def scrape_html_shifts(season, game_id):
911
912
  number = players[key]['number'],
912
913
  team = thisteam,
913
914
  venue = "away")
914
- alldf = alldf.append(df)
915
+ alldf = alldf._append(df)
915
916
 
916
917
  away_shifts = alldf
917
918
 
@@ -1638,8 +1639,8 @@ def scrape_espn_events(espn_game_id, drop_description = True):
1638
1639
  play_id_list = []
1639
1640
 
1640
1641
  for i in range(0, len(playdict)):
1641
- play_list.append(playdict[i]['#text'])
1642
- play_id_list.append(playdict[i]['@id'])
1642
+ play_list._append(playdict[i]['#text'])
1643
+ play_id_list._append(playdict[i]['@id'])
1643
1644
 
1644
1645
  x_coordinates = []
1645
1646
  y_coordinates = []
@@ -1650,18 +1651,18 @@ def scrape_espn_events(espn_game_id, drop_description = True):
1650
1651
 
1651
1652
  for i in range (0, len(play_list)):
1652
1653
  split = play_list[i].split('~')
1653
- x_coordinates.append(split[0])
1654
- y_coordinates.append(split[1])
1655
- game_mins.append(play_list[i].split(':')[0].split('~')[-1])
1656
- game_secs.append(play_list[i].split(':')[1].split('~')[0].split('-')[0])
1657
- event_desc.append(" ".join(re.findall("[a-z-'.A-Z]+|\dst|\drd|\d2nd|\d minutes|\d minutes", play_list[i])))
1654
+ x_coordinates._append(split[0])
1655
+ y_coordinates._append(split[1])
1656
+ game_mins._append(play_list[i].split(':')[0].split('~')[-1])
1657
+ game_secs._append(play_list[i].split(':')[1].split('~')[0].split('-')[0])
1658
+ event_desc._append(" ".join(re.findall("[a-z-'.A-Z]+|\dst|\drd|\d2nd|\d minutes|\d minutes", play_list[i])))
1658
1659
  if (len(re.split(r'(:\d+)~', play_list[i])))>1:
1659
- game_pd.append((re.split(r'(:\d+)~', play_list[i])[2][0]))
1660
+ game_pd._append((re.split(r'(:\d+)~', play_list[i])[2][0]))
1660
1661
  else:
1661
- game_pd.append(re.split('-\d~|-\d:\d-\d~', play_list[i])[1][0])
1662
+ game_pd._append(re.split('-\d~|-\d:\d-\d~', play_list[i])[1][0])
1662
1663
 
1663
1664
 
1664
- #event_desc.append(" ".join(re.findall("[a-zA-Z]+", play_list[i])))
1665
+ #event_desc._append(" ".join(re.findall("[a-zA-Z]+", play_list[i])))
1665
1666
  # Below is the code to get information that includes period number and penalty minutes. It is timely and unncessary.
1666
1667
 
1667
1668
 
@@ -1943,9 +1944,9 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
1943
1944
  away = soup_found[0 + (i * 3)]['href'].rsplit('/')[-1].replace('-', ' ').upper()
1944
1945
  home = soup_found[1 + (i * 3)]['href'].rsplit('/')[-1].replace('-', ' ').upper()
1945
1946
  espnid = soup_found[2 + (i * 3)]['href'].split('gameId/', 1)[1]
1946
- at.append(away)
1947
- ht.append(home)
1948
- gids.append(espnid)
1947
+ at._append(away)
1948
+ ht._append(home)
1949
+ gids._append(espnid)
1949
1950
 
1950
1951
  fax = fax.assign(
1951
1952
  away_team = at,
@@ -1953,7 +1954,7 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
1953
1954
  espn_id = gids,
1954
1955
  game_date = pd.to_datetime(this_date))
1955
1956
 
1956
- gamedays = gamedays.append(fax)
1957
+ gamedays = gamedays._append(fax)
1957
1958
 
1958
1959
  gamedays = gamedays.assign(
1959
1960
  home_team = np.where(gamedays.home_team=='ST LOUIS BLUES', 'ST. LOUIS BLUES', gamedays.home_team),
@@ -2421,7 +2422,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2421
2422
  try:
2422
2423
  shifts = scrape_html_shifts(season, small_id)
2423
2424
  finalized = merge_and_prepare(events, shifts)
2424
- full = full.append(finalized)
2425
+ full = full._append(finalized)
2425
2426
  second_time = time.time()
2426
2427
  except IndexError as e:
2427
2428
  print('There was no shift data for this game. Error: ' + str(e))
@@ -2433,7 +2434,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2433
2434
  ).drop(
2434
2435
  columns = ['original_time', 'other_team', 'strength', 'event_player_str', 'version', 'hometeamfull', 'awayteamfull']
2435
2436
  ).assign(game_warning = 'NO SHIFT DATA.')
2436
- full = full.append(fixed_events)
2437
+ full = full._append(fixed_events)
2437
2438
  print('Successfully scraped ' + str(game_id) + '. Coordinates sourced from the API.')
2438
2439
  print("This game took " + str(round(second_time - first_time, 2)) + " seconds.")
2439
2440
  i = i + 1
@@ -2463,7 +2464,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2463
2464
  try:
2464
2465
  shifts = scrape_html_shifts(season, small_id)
2465
2466
  finalized = merge_and_prepare(events, shifts)
2466
- full = full.append(finalized)
2467
+ full = full._append(finalized)
2467
2468
  second_time = time.time()
2468
2469
  except IndexError as e:
2469
2470
  print('There was no shift data for this game. Error: ' + str(e))
@@ -2476,7 +2477,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2476
2477
  columns = ['original_time', 'other_team', 'strength', 'event_player_str', 'version', 'hometeamfull', 'awayteamfull']
2477
2478
  ).assign(game_warning = 'NO SHIFT DATA', season = season)
2478
2479
  fixed_events['coordinate_source'] = 'espn'
2479
- full = full.append(fixed_events)
2480
+ full = full._append(fixed_events)
2480
2481
  second_time = time.time()
2481
2482
  # Fix this so it doesn't say sourced from ESPN if no coords.
2482
2483
  if single.equals(events):
@@ -2530,7 +2531,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2530
2531
  duped_coords = duped_coords.assign(coords_x = np.where(pd.isna(duped_coords.coords_x_x), duped_coords.coords_x_y, duped_coords.coords_x_x),
2531
2532
  coords_y = np.where(pd.isna(duped_coords.coords_y_x), duped_coords.coords_y_y, duped_coords.coords_y_x))
2532
2533
  col_list = list(api_coords.columns)
2533
- col_list.append('source')
2534
+ col_list._append('source')
2534
2535
  duped_coords = duped_coords.loc[:, col_list]
2535
2536
  duped_coords = duped_coords[duped_coords.event.isin(['SHOT', 'HIT', 'BLOCK', 'MISS', 'GIVE', 'TAKE', 'GOAL', 'PENL', 'FAC'])]
2536
2537
  duped_coords = duped_coords[~duped_coords.duplicated()]
@@ -2556,7 +2557,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2556
2557
  try:
2557
2558
  shifts = scrape_html_shifts(season, small_id)
2558
2559
  finalized = merge_and_prepare(events, shifts)
2559
- full = full.append(finalized)
2560
+ full = full._append(finalized)
2560
2561
  second_time = time.time()
2561
2562
  except IndexError as e:
2562
2563
  print('There was no shift data for this game. Error: ' + str(e))
@@ -2568,7 +2569,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
2568
2569
  ).drop(
2569
2570
  columns = ['original_time', 'other_team', 'strength', 'event_player_str', 'version', 'hometeamfull', 'awayteamfull']
2570
2571
  ).assign(game_warning = 'NO SHIFT DATA', season = season)
2571
- full = full.append(fixed_events)
2572
+ full = full._append(fixed_events)
2572
2573
  second_time = time.time()
2573
2574
  # Fix this so it doesn't say sourced from ESPN if no coords.
2574
2575
  print('Successfully scraped ' + str(game_id) + '. Coordinates sourced from ESPN.')
@@ -2720,7 +2721,7 @@ def full_scrape(game_id_list, shift = False):
2720
2721
  print('You missed the following games: ' + str(missing))
2721
2722
  print('Let us try scraping each of them one more time.')
2722
2723
  retry = full_scrape_1by1(missing)
2723
- df = df.append(retry)
2724
+ df = df._append(retry)
2724
2725
  return df
2725
2726
  else:
2726
2727
  return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: TopDownHockey-Scraper
3
- Version: 2.2.1
3
+ Version: 2.2.3
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -0,0 +1,7 @@
1
+ TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=-EPVHQc06W8OcpVoTQvpUH40sjLj9Nwsv1-y3ANrOiQ,45380
2
+ TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=dJKZxPZphekkCxPXR6VffHU2m4IEIPnGYisYLQLCrIE,153173
3
+ TopDownHockey_Scraper-2.2.3.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
+ TopDownHockey_Scraper-2.2.3.dist-info/METADATA,sha256=dTFyTYSW3FpGdnzjawuQmPO3RaT3rG0xI7MxKq7C1Bw,6284
5
+ TopDownHockey_Scraper-2.2.3.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
6
+ TopDownHockey_Scraper-2.2.3.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
+ TopDownHockey_Scraper-2.2.3.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=wpIpT8IcZjs9W-Iy6EQuhORh8cy5xr1RQGDLZ6CaaTE,45355
2
- TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=1yGDKcc9SyDLKp4qd5KMXzwSO-AbmeRTtei09WJvAjQ,153063
3
- TopDownHockey_Scraper-2.2.1.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
4
- TopDownHockey_Scraper-2.2.1.dist-info/METADATA,sha256=Io4CVqUV1szHnt0w1ZcAt_GG_PKtNGNW-SLeD6dpxio,6284
5
- TopDownHockey_Scraper-2.2.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
6
- TopDownHockey_Scraper-2.2.1.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
7
- TopDownHockey_Scraper-2.2.1.dist-info/RECORD,,