TopDownHockey-Scraper 3.2.1__tar.gz → 3.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TopDownHockey-Scraper might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: TopDownHockey_Scraper
3
- Version: 3.2.1
3
+ Version: 3.2.3
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = TopDownHockey_Scraper
3
- version = 3.2.1
3
+ version = 3.2.3
4
4
  author = Patrick Bacon
5
5
  author_email = patrick.s.bacon@gmail.com
6
6
  description = A package built for scraping hockey data from EliteProspects, the NHL's HTML/API reports, and ESPN's XML reports.
@@ -9,7 +9,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
9
9
 
10
10
  setup(
11
11
  name="TopDownHockey_Scraper", # Replace with your own username
12
- version="3.2.1",
12
+ version="3.2.3",
13
13
  author="Patrick Bacon",
14
14
  author_email="patrick.s.bacon@gmail.com",
15
15
  description="The TopDownHockey Scraper",
@@ -1687,7 +1687,9 @@ def scrape_espn_events(espn_game_id, drop_description = True):
1687
1687
 
1688
1688
  clock_df = clock_df[~pd.isna(clock_df.clock)]
1689
1689
 
1690
- coords_df = pd.DataFrame(json.loads(str(soup).split('plays":')[1].split(',"st":1')[0].split(',"st":2')[0]))
1690
+ # Needed to add .split(',"st":3')[0] for playoffs
1691
+
1692
+ coords_df = pd.DataFrame(json.loads(str(soup).split('plays":')[1].split(',"st":1')[0].split(',"st":2')[0].split(',"st":3')[0]))
1691
1693
 
1692
1694
  clock_df = clock_df.assign(
1693
1695
  clock = clock_df.clock.apply(lambda x: x['displayValue'])
@@ -1870,6 +1872,7 @@ def scrape_espn_events(espn_game_id, drop_description = True):
1870
1872
  return espn_events
1871
1873
 
1872
1874
  def scrape_espn_ids_single_game(game_date, home_team, away_team):
1875
+
1873
1876
  gamedays = pd.DataFrame()
1874
1877
 
1875
1878
  if home_team == 'ATLANTA THRASHERS':
@@ -1894,33 +1897,29 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
1894
1897
  gids = []
1895
1898
  fax = pd.DataFrame()
1896
1899
  #print(str(i))
1897
- for i in range (0, (int(len(soup_found)/3))):
1898
- away = soup_found[0 + (i * 3)]['href'].rsplit('/')[-2].upper()
1899
- home = soup_found[1 + (i * 3)]['href'].rsplit('/')[-2].upper()
1900
- espnid = soup_found[2 + (i * 3)]['href'].split('gameId/', 1)[1]
1900
+ for i in range (0, ((len(soup_found)))):
1901
+ away = soup_found[i]['href'].rsplit('/')[-1].split('-')[0].upper()
1902
+ home = soup_found[i]['href'].rsplit('/')[-1].split('-')[1].upper()
1903
+ espnid = soup_found[i]['href'].rsplit('/')[-2]
1901
1904
  at.append(away)
1902
1905
  ht.append(home)
1903
1906
  gids.append(espnid)
1904
-
1907
+
1905
1908
  fax = fax.assign(
1906
1909
  away_team = at,
1907
1910
  home_team = ht,
1908
1911
  espn_id = gids,
1909
1912
  game_date = pd.to_datetime(this_date))
1910
-
1913
+
1911
1914
  gamedays = gamedays._append(fax)
1912
-
1915
+
1913
1916
  gamedays = gamedays.assign(
1914
1917
  home_team = np.where(gamedays.home_team=='ST LOUIS BLUES', 'ST. LOUIS BLUES', gamedays.home_team),
1915
1918
  away_team = np.where(gamedays.away_team=='ST LOUIS BLUES', 'ST. LOUIS BLUES', gamedays.away_team),
1916
1919
  espn_id = gamedays.espn_id.str.split('/').str[0].astype(int)
1917
-
1920
+
1918
1921
  )
1919
- #gamedays = gamedays.assign(
1920
- # home_team = np.where(gamedays.home_team=='WINNIPEG JETS', 'ATLANTA THRASHERS', gamedays.home_team),
1921
- # away_team = np.where(gamedays.away_team=='WINNIPEG JETS', 'ATLANTA THRASHERS', gamedays.away_team),
1922
- # espn_id = gamedays.espn_id.astype(int))
1923
-
1922
+
1924
1923
  gamedays = gamedays.assign(
1925
1924
  home_team = np.where(gamedays.home_team=='TB', 'TBL',
1926
1925
  np.where(gamedays.home_team=='T.B', 'TBL',
@@ -1942,7 +1941,9 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
1942
1941
  gamedays.away_team)))))))),
1943
1942
  espn_id = gamedays.espn_id.astype(int))
1944
1943
 
1945
- gamedays = gamedays[(gamedays.game_date==this_date) & (gamedays.home_team==home_team) & (gamedays.away_team==away_team)]
1944
+ # Might need to fix later; don't have right home/away teams right now
1945
+
1946
+ #gamedays = gamedays[(gamedays.game_date==this_date) & (gamedays.home_team==home_team) & (gamedays.away_team==away_team)]
1946
1947
 
1947
1948
  return(gamedays)
1948
1949
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: TopDownHockey_Scraper
3
- Version: 3.2.1
3
+ Version: 3.2.3
4
4
  Summary: The TopDownHockey Scraper
5
5
  Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
6
6
  Author: Patrick Bacon