PyPI - TopDownHockey-Scraper - Versions diffs - 2.2.1__py3-none-any.whl → 2.2.3__py3-none-any.whl - Mend

TopDownHockey-Scraper 2.2.1py3-none-any.whl → 2.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of TopDownHockey-Scraper might be problematic. Click here for more details.

Files changed (8) hide show

TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py CHANGED Viewed

@@ -28,10 +28,10 @@ def tableDataText(table):
     headerow = [td.get_text(strip=True) for td in trs[0].find_all('th')] # header row
     if headerow: # if there is a header row include first
-        rows.append(headerow)
+        rows._append(headerow)
         trs = trs[1:]
     for tr in trs: # for every table row
-        rows.append([td.get_text(strip=True) for td in tr.find_all('td')]) # data row
+        rows._append([td.get_text(strip=True) for td in tr.find_all('td')]) # data row
     df_rows = pd.DataFrame(rows[1:], columns=rows[0])
@@ -97,7 +97,7 @@ def getskaters(league, year):
                     # Extract href links in table
                     href_row = []
                     for link in player_table.find_all('a'):
-                        href_row.append(link.attrs['href'])
+                        href_row._append(link.attrs['href'])
                     # Create data frame, rename and only keep links to players
                     df_links = pd.DataFrame(href_row)
@@ -107,7 +107,7 @@ def getskaters(league, year):
                     # Add links to players
                     df_players['link']=df_links['link']
-                    players.append(df_players)
+                    players._append(df_players)
                     # Wait 3 seconds before going to next
                     #time.sleep(1)
@@ -209,7 +209,7 @@ def getgoalies(league, year):
                     # Extract href links in table
                     href_row = []
                     for link in player_table.find_all('a'):
-                        href_row.append(link.attrs['href'])
+                        href_row._append(link.attrs['href'])
                     # Create data frame, rename and only keep links to players
                     df_links = pd.DataFrame(href_row)
@@ -219,7 +219,7 @@ def getgoalies(league, year):
                     # Add links to players
                     df_players['link']=df_links['link']
-                    players.append(df_players)
+                    players._append(df_players)
                     # Wait 3 seconds before going to next
                     # time.sleep(1)
@@ -382,17 +382,17 @@ def get_player_information(dataframe):
     for i in range(0, len(list(set(dataframe.link)))):
         try:
             myresult = get_info(((list(set(dataframe.link))[i])))
-            myplayer.append(myresult[0])
-            myrights.append(myresult[1])
-            mystatus.append(myresult[2])
-            mydob.append(myresult[3])
-            myheight.append(myresult[4])
-            myweight.append(myresult[5])
-            mybirthplace.append(myresult[6])
-            mynation.append(myresult[7])
-            myshot.append(myresult[8])
-            mydraft.append(myresult[9])
-            mylink.append(myresult[10])
+            myplayer._append(myresult[0])
+            myrights._append(myresult[1])
+            mystatus._append(myresult[2])
+            mydob._append(myresult[3])
+            myheight._append(myresult[4])
+            myweight._append(myresult[5])
+            mybirthplace._append(myresult[6])
+            mynation._append(myresult[7])
+            myshot._append(myresult[8])
+            mydraft._append(myresult[9])
+            mylink._append(myresult[10])
             print(myresult[0] + " scraped! That's " + str(i + 1) + " down! Only " + str(len(list(set(dataframe.link))) - (i + 1)) +  " left to go!")
         except KeyboardInterrupt:
             print("You interrupted this one manually. The output here will be every player you've scraped so far. Good bye!")
@@ -447,7 +447,7 @@ def get_league_skater_boxcars(league, seasons):
     if type(seasons) == str:
         single = getskaters(league, seasons)
-        output = output.append(single)
+        output = output._append(single)
         print("Scraping " + league + " data is complete. You scraped skater data from " + seasons + ".")
         return(output)
@@ -456,7 +456,7 @@ def get_league_skater_boxcars(league, seasons):
         for i in range(0, len(seasons)):
             try:
                 single = getskaters(league, seasons[i])
-                output = output.append(single)
+                output = output._append(single)
             except KeyboardInterrupt as e:
                 hidden_patrick = 4
                 error = e
@@ -495,7 +495,7 @@ def get_league_goalie_boxcars(league, seasons):
     if type(seasons) == str:
         single = getgoalies(league, seasons)
-        output = output.append(single)
+        output = output._append(single)
         print("Scraping " + league + " data is complete. You scraped goalie data from " + seasons + ".")
         return(output)
@@ -504,7 +504,7 @@ def get_league_goalie_boxcars(league, seasons):
         for i in range(0, len(seasons)):
             try:
                 single = getgoalies(league, seasons[i])
-                output = output.append(single)
+                output = output._append(single)
             except KeyboardInterrupt as e:
                 hidden_patrick = 4
                 error = e
@@ -597,7 +597,7 @@ def get_goalies(leagues, seasons):
         for i in range (0, len(leagues)):
             try:
                 targetleague = get_league_goalie_boxcars(leagues[i], seasons)
-                leaguesall = leaguesall.append(targetleague)
+                leaguesall = leaguesall._append(targetleague)
                 if hidden_patrick == 4:
                     raise KeyboardInterrupt
                 if hidden_patrick == 5:
@@ -633,7 +633,7 @@ def get_goalies(leagues, seasons):
         for i in range (0, len(leagues)):
             try:
                 targetleague = get_league_goalie_boxcars(leagues[i], seasons)
-                leaguesall = leaguesall.append(targetleague)
+                leaguesall = leaguesall._append(targetleague)
                 if hidden_patrick == 4:
                     raise KeyboardInterrupt
                 if hidden_patrick == 5:
@@ -749,7 +749,7 @@ def get_skaters(leagues, seasons):
         for i in range (0, len(leagues)):
             try:
                 targetleague = get_league_skater_boxcars(leagues[i], seasons)
-                leaguesall = leaguesall.append(targetleague)
+                leaguesall = leaguesall._append(targetleague)
                 if hidden_patrick == 4:
                     raise KeyboardInterrupt
                 if hidden_patrick == 5:
@@ -785,7 +785,7 @@ def get_skaters(leagues, seasons):
         for i in range (0, len(leagues)):
             try:
                 targetleague = get_league_skater_boxcars(leagues[i], seasons)
-                leaguesall = leaguesall.append(targetleague)
+                leaguesall = leaguesall._append(targetleague)
                 if hidden_patrick == 4:
                     raise KeyboardInterrupt
                 if hidden_patrick == 5:

TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py CHANGED Viewed

@@ -48,20 +48,20 @@ def scrape_standings(season):
         div = (record_df['division'].iloc[i]['name'])
         conf = (record_df['conference'].iloc[i]['name'])
         for x in range(0, len((record_df['teamRecords'].iloc[i]))):
-            divisions.append(div)
-            conferences.append(conf)
-            team.append(record_df['teamRecords'].iloc[i][x]['team']['name'])
-            wins.append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['wins'])
-            losses.append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['losses'])
-            otl.append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['ot'])
-            gf.append(record_df['teamRecords'].iloc[i][x]['goalsScored'])
-            ga.append(record_df['teamRecords'].iloc[i][x]['goalsAgainst'])
+            divisions._append(div)
+            conferences._append(conf)
+            team._append(record_df['teamRecords'].iloc[i][x]['team']['name'])
+            wins._append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['wins'])
+            losses._append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['losses'])
+            otl._append(record_df['teamRecords'].iloc[i][x]['leagueRecord']['ot'])
+            gf._append(record_df['teamRecords'].iloc[i][x]['goalsScored'])
+            ga._append(record_df['teamRecords'].iloc[i][x]['goalsAgainst'])
             if season>20092010:
-                row.append(record_df['teamRecords'].iloc[i][x]['row'])
-            gp.append(record_df['teamRecords'].iloc[i][x]['gamesPlayed'])
-            pts.append(record_df['teamRecords'].iloc[i][x]['points'])
+                row._append(record_df['teamRecords'].iloc[i][x]['row'])
+            gp._append(record_df['teamRecords'].iloc[i][x]['gamesPlayed'])
+            pts._append(record_df['teamRecords'].iloc[i][x]['points'])
             if season>20192020:
-                rw.append(record_df['teamRecords'].iloc[i][x]['regulationWins'])
+                rw._append(record_df['teamRecords'].iloc[i][x]['regulationWins'])
     if season < 20092010:
         stand = pd.DataFrame().assign(Team = team, Division = divisions, Conference = conferences,
@@ -97,7 +97,7 @@ def scrape_schedule(start_date, end_date):
     for i in range (0, len(date_df)):
         datedf = pd.DataFrame(date_df.games.iloc[i])
-        gamedf = gamedf.append(datedf)
+        gamedf = gamedf._append(datedf)
     global team_df
     team_df = pd.DataFrame(gamedf['teams'].values.tolist(), index = gamedf.index)
     away_df = pd.DataFrame(team_df['away'].values.tolist(), index = team_df.index)
@@ -160,7 +160,7 @@ def hs_strip_html(td):
                 elif i % 3 == 1:
                     if name != '':
                         position = bar[i].get_text()
-                        players.append([name, number, position])
+                        players._append([name, number, position])
             td[y] = players
         else:
@@ -429,9 +429,10 @@ def scrape_html_roster(season, game_id):
     roster_df['Name'] = np.where(roster_df['Name']=='COLIN', 'COLIN WHITE CAN', roster_df['Name'])
     roster_df['Name'] = (np.where(roster_df['Name']== "JANIS MOSER" , "J.J. MOSER",
-        (np.where(roster_df['Name']== "NICK PAUL" , "NICHOLAS PAUL",
+        (np.where(roster_df['Name']== "NICHOLAS PAUL" , "NICK PAUL",
         (np.where(roster_df['Name']== "JACOB MIDDLETON" , "JAKE MIDDLETON",
-        roster_df['Name']))))))
+        (np.where(roster_df['Name']== "TOMMY NOVAK" , "THOMAS NOVAK",
+        roster_df['Name']))))))))
     roster_df['Name'] = roster_df['Name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
@@ -874,7 +875,7 @@ def scrape_html_shifts(season, game_id):
                       number = players[key]['number'],
                       team = thisteam,
                       venue = "home")
-        alldf = alldf.append(df)
+        alldf = alldf._append(df)
     home_shifts = alldf
@@ -911,7 +912,7 @@ def scrape_html_shifts(season, game_id):
                       number = players[key]['number'],
                       team = thisteam,
                       venue = "away")
-        alldf = alldf.append(df)
+        alldf = alldf._append(df)
     away_shifts = alldf
@@ -1638,8 +1639,8 @@ def scrape_espn_events(espn_game_id, drop_description = True):
         play_id_list = []
         for i in range(0, len(playdict)):
-            play_list.append(playdict[i]['#text'])
-            play_id_list.append(playdict[i]['@id'])
+            play_list._append(playdict[i]['#text'])
+            play_id_list._append(playdict[i]['@id'])
         x_coordinates = []
         y_coordinates = []
@@ -1650,18 +1651,18 @@ def scrape_espn_events(espn_game_id, drop_description = True):
         for i in range (0, len(play_list)):
             split = play_list[i].split('~')
-            x_coordinates.append(split[0])
-            y_coordinates.append(split[1])
-            game_mins.append(play_list[i].split(':')[0].split('~')[-1])
-            game_secs.append(play_list[i].split(':')[1].split('~')[0].split('-')[0])
-            event_desc.append(" ".join(re.findall("[a-z-'.A-Z]+|\dst|\drd|\d2nd|\d  minutes|\d minutes", play_list[i])))
+            x_coordinates._append(split[0])
+            y_coordinates._append(split[1])
+            game_mins._append(play_list[i].split(':')[0].split('~')[-1])
+            game_secs._append(play_list[i].split(':')[1].split('~')[0].split('-')[0])
+            event_desc._append(" ".join(re.findall("[a-z-'.A-Z]+|\dst|\drd|\d2nd|\d  minutes|\d minutes", play_list[i])))
             if (len(re.split(r'(:\d+)~', play_list[i])))>1:
-                game_pd.append((re.split(r'(:\d+)~', play_list[i])[2][0]))
+                game_pd._append((re.split(r'(:\d+)~', play_list[i])[2][0]))
             else:
-                game_pd.append(re.split('-\d~|-\d:\d-\d~', play_list[i])[1][0])
+                game_pd._append(re.split('-\d~|-\d:\d-\d~', play_list[i])[1][0])
-        #event_desc.append(" ".join(re.findall("[a-zA-Z]+", play_list[i])))
+        #event_desc._append(" ".join(re.findall("[a-zA-Z]+", play_list[i])))
         # Below is the code to get information that includes period number and penalty minutes. It is timely and unncessary.
@@ -1943,9 +1944,9 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
         away = soup_found[0 + (i * 3)]['href'].rsplit('/')[-1].replace('-', ' ').upper()
         home = soup_found[1 + (i * 3)]['href'].rsplit('/')[-1].replace('-', ' ').upper()
         espnid = soup_found[2 + (i * 3)]['href'].split('gameId/', 1)[1]
-        at.append(away)
-        ht.append(home)
-        gids.append(espnid)
+        at._append(away)
+        ht._append(home)
+        gids._append(espnid)
     fax = fax.assign(
     away_team = at,
@@ -1953,7 +1954,7 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
     espn_id = gids,
     game_date = pd.to_datetime(this_date))
-    gamedays = gamedays.append(fax)
+    gamedays = gamedays._append(fax)
     gamedays = gamedays.assign(
         home_team = np.where(gamedays.home_team=='ST LOUIS BLUES', 'ST. LOUIS BLUES', gamedays.home_team),
@@ -2421,7 +2422,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
                 try:
                     shifts = scrape_html_shifts(season, small_id)
                     finalized = merge_and_prepare(events, shifts)
-                    full = full.append(finalized)
+                    full = full._append(finalized)
                     second_time = time.time()
                 except IndexError as e:
                     print('There was no shift data for this game. Error: ' + str(e))
@@ -2433,7 +2434,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
                     ).drop(
                     columns = ['original_time', 'other_team', 'strength', 'event_player_str', 'version', 'hometeamfull', 'awayteamfull']
                     ).assign(game_warning = 'NO SHIFT DATA.')
-                    full = full.append(fixed_events)
+                    full = full._append(fixed_events)
                 print('Successfully scraped ' + str(game_id) + '. Coordinates sourced from the API.')
                 print("This game took " + str(round(second_time - first_time, 2)) + " seconds.")
                 i = i + 1
@@ -2463,7 +2464,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
                     try:
                         shifts = scrape_html_shifts(season, small_id)
                         finalized = merge_and_prepare(events, shifts)
-                        full = full.append(finalized)
+                        full = full._append(finalized)
                         second_time = time.time()
                     except IndexError as e:
                         print('There was no shift data for this game. Error: ' + str(e))
@@ -2476,7 +2477,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
                         columns = ['original_time', 'other_team', 'strength', 'event_player_str', 'version', 'hometeamfull', 'awayteamfull']
                         ).assign(game_warning = 'NO SHIFT DATA', season = season)
                         fixed_events['coordinate_source'] = 'espn'
-                        full = full.append(fixed_events)
+                        full = full._append(fixed_events)
                     second_time = time.time()
                     # Fix this so it doesn't say sourced from ESPN if no coords.
                     if single.equals(events):
@@ -2530,7 +2531,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
                         duped_coords = duped_coords.assign(coords_x = np.where(pd.isna(duped_coords.coords_x_x), duped_coords.coords_x_y, duped_coords.coords_x_x),
                                           coords_y = np.where(pd.isna(duped_coords.coords_y_x), duped_coords.coords_y_y, duped_coords.coords_y_x))
                         col_list = list(api_coords.columns)
-                        col_list.append('source')
+                        col_list._append('source')
                         duped_coords = duped_coords.loc[:, col_list]
                         duped_coords = duped_coords[duped_coords.event.isin(['SHOT', 'HIT', 'BLOCK', 'MISS', 'GIVE', 'TAKE', 'GOAL', 'PENL', 'FAC'])]
                         duped_coords = duped_coords[~duped_coords.duplicated()]
@@ -2556,7 +2557,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
                     try:
                         shifts = scrape_html_shifts(season, small_id)
                         finalized = merge_and_prepare(events, shifts)
-                        full = full.append(finalized)
+                        full = full._append(finalized)
                         second_time = time.time()
                     except IndexError as e:
                         print('There was no shift data for this game. Error: ' + str(e))
@@ -2568,7 +2569,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = False):
                         ).drop(
                         columns = ['original_time', 'other_team', 'strength', 'event_player_str', 'version', 'hometeamfull', 'awayteamfull']
                         ).assign(game_warning = 'NO SHIFT DATA', season = season)
-                        full = full.append(fixed_events)
+                        full = full._append(fixed_events)
                     second_time = time.time()
                     # Fix this so it doesn't say sourced from ESPN if no coords.
                     print('Successfully scraped ' + str(game_id) + '. Coordinates sourced from ESPN.')
@@ -2720,7 +2721,7 @@ def full_scrape(game_id_list, shift = False):
             print('You missed the following games: ' + str(missing))
             print('Let us try scraping each of them one more time.')
             retry = full_scrape_1by1(missing)
-            df = df.append(retry)
+            df = df._append(retry)
             return df
         else:
             return df

{TopDownHockey_Scraper-2.2.1.dist-info → TopDownHockey_Scraper-2.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: TopDownHockey-Scraper
-Version: 2.2.1
+Version: 2.2.3
 Summary: The TopDownHockey Scraper
 Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
 Author: Patrick Bacon

TopDownHockey_Scraper-2.2.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=-EPVHQc06W8OcpVoTQvpUH40sjLj9Nwsv1-y3ANrOiQ,45380
+TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=dJKZxPZphekkCxPXR6VffHU2m4IEIPnGYisYLQLCrIE,153173
+TopDownHockey_Scraper-2.2.3.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
+TopDownHockey_Scraper-2.2.3.dist-info/METADATA,sha256=dTFyTYSW3FpGdnzjawuQmPO3RaT3rG0xI7MxKq7C1Bw,6284
+TopDownHockey_Scraper-2.2.3.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
+TopDownHockey_Scraper-2.2.3.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
+TopDownHockey_Scraper-2.2.3.dist-info/RECORD,,

TopDownHockey_Scraper-2.2.1.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py,sha256=wpIpT8IcZjs9W-Iy6EQuhORh8cy5xr1RQGDLZ6CaaTE,45355
-TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py,sha256=1yGDKcc9SyDLKp4qd5KMXzwSO-AbmeRTtei09WJvAjQ,153063
-TopDownHockey_Scraper-2.2.1.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
-TopDownHockey_Scraper-2.2.1.dist-info/METADATA,sha256=Io4CVqUV1szHnt0w1ZcAt_GG_PKtNGNW-SLeD6dpxio,6284
-TopDownHockey_Scraper-2.2.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
-TopDownHockey_Scraper-2.2.1.dist-info/top_level.txt,sha256=PBd96GLGFq97ZDLd7_4ZCx8_ZFr_wdWKs5SIpGl5xCs,22
-TopDownHockey_Scraper-2.2.1.dist-info/RECORD,,

{TopDownHockey_Scraper-2.2.1.dist-info → TopDownHockey_Scraper-2.2.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{TopDownHockey_Scraper-2.2.1.dist-info → TopDownHockey_Scraper-2.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{TopDownHockey_Scraper-2.2.1.dist-info → TopDownHockey_Scraper-2.2.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

TopDownHockey-Scraper 2.2.1__py3-none-any.whl → 2.2.3__py3-none-any.whl

Potentially problematic release.

TopDownHockey-Scraper 2.2.1py3-none-any.whl → 2.2.3py3-none-any.whl