TopDownHockey-Scraper 3.2.1__tar.gz → 3.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of TopDownHockey-Scraper might be problematic. Click here for more details.
- {topdownhockey_scraper-3.2.1/src/TopDownHockey_Scraper.egg-info → topdownhockey_scraper-3.2.3}/PKG-INFO +1 -1
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/setup.cfg +1 -1
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/setup.py +1 -1
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/src/TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py +16 -15
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3/src/TopDownHockey_Scraper.egg-info}/PKG-INFO +1 -1
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/LICENSE +0 -0
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/README.md +0 -0
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/pyproject.toml +0 -0
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/src/TopDownHockey_Scraper/TopDownHockey_EliteProspects_Scraper.py +0 -0
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/src/TopDownHockey_Scraper.egg-info/SOURCES.txt +0 -0
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/src/TopDownHockey_Scraper.egg-info/dependency_links.txt +0 -0
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/src/TopDownHockey_Scraper.egg-info/requires.txt +0 -0
- {topdownhockey_scraper-3.2.1 → topdownhockey_scraper-3.2.3}/src/TopDownHockey_Scraper.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = TopDownHockey_Scraper
|
|
3
|
-
version = 3.2.
|
|
3
|
+
version = 3.2.3
|
|
4
4
|
author = Patrick Bacon
|
|
5
5
|
author_email = patrick.s.bacon@gmail.com
|
|
6
6
|
description = A package built for scraping hockey data from EliteProspects, the NHL's HTML/API reports, and ESPN's XML reports.
|
|
@@ -9,7 +9,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="TopDownHockey_Scraper", # Replace with your own username
|
|
12
|
-
version="3.2.
|
|
12
|
+
version="3.2.3",
|
|
13
13
|
author="Patrick Bacon",
|
|
14
14
|
author_email="patrick.s.bacon@gmail.com",
|
|
15
15
|
description="The TopDownHockey Scraper",
|
|
@@ -1687,7 +1687,9 @@ def scrape_espn_events(espn_game_id, drop_description = True):
|
|
|
1687
1687
|
|
|
1688
1688
|
clock_df = clock_df[~pd.isna(clock_df.clock)]
|
|
1689
1689
|
|
|
1690
|
-
|
|
1690
|
+
# Needed to add .split(',"st":3')[0] for playoffs
|
|
1691
|
+
|
|
1692
|
+
coords_df = pd.DataFrame(json.loads(str(soup).split('plays":')[1].split(',"st":1')[0].split(',"st":2')[0].split(',"st":3')[0]))
|
|
1691
1693
|
|
|
1692
1694
|
clock_df = clock_df.assign(
|
|
1693
1695
|
clock = clock_df.clock.apply(lambda x: x['displayValue'])
|
|
@@ -1870,6 +1872,7 @@ def scrape_espn_events(espn_game_id, drop_description = True):
|
|
|
1870
1872
|
return espn_events
|
|
1871
1873
|
|
|
1872
1874
|
def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
1875
|
+
|
|
1873
1876
|
gamedays = pd.DataFrame()
|
|
1874
1877
|
|
|
1875
1878
|
if home_team == 'ATLANTA THRASHERS':
|
|
@@ -1894,33 +1897,29 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
|
1894
1897
|
gids = []
|
|
1895
1898
|
fax = pd.DataFrame()
|
|
1896
1899
|
#print(str(i))
|
|
1897
|
-
for i in range (0, (
|
|
1898
|
-
away = soup_found[
|
|
1899
|
-
home = soup_found[
|
|
1900
|
-
espnid = soup_found[
|
|
1900
|
+
for i in range (0, ((len(soup_found)))):
|
|
1901
|
+
away = soup_found[i]['href'].rsplit('/')[-1].split('-')[0].upper()
|
|
1902
|
+
home = soup_found[i]['href'].rsplit('/')[-1].split('-')[1].upper()
|
|
1903
|
+
espnid = soup_found[i]['href'].rsplit('/')[-2]
|
|
1901
1904
|
at.append(away)
|
|
1902
1905
|
ht.append(home)
|
|
1903
1906
|
gids.append(espnid)
|
|
1904
|
-
|
|
1907
|
+
|
|
1905
1908
|
fax = fax.assign(
|
|
1906
1909
|
away_team = at,
|
|
1907
1910
|
home_team = ht,
|
|
1908
1911
|
espn_id = gids,
|
|
1909
1912
|
game_date = pd.to_datetime(this_date))
|
|
1910
|
-
|
|
1913
|
+
|
|
1911
1914
|
gamedays = gamedays._append(fax)
|
|
1912
|
-
|
|
1915
|
+
|
|
1913
1916
|
gamedays = gamedays.assign(
|
|
1914
1917
|
home_team = np.where(gamedays.home_team=='ST LOUIS BLUES', 'ST. LOUIS BLUES', gamedays.home_team),
|
|
1915
1918
|
away_team = np.where(gamedays.away_team=='ST LOUIS BLUES', 'ST. LOUIS BLUES', gamedays.away_team),
|
|
1916
1919
|
espn_id = gamedays.espn_id.str.split('/').str[0].astype(int)
|
|
1917
|
-
|
|
1920
|
+
|
|
1918
1921
|
)
|
|
1919
|
-
|
|
1920
|
-
# home_team = np.where(gamedays.home_team=='WINNIPEG JETS', 'ATLANTA THRASHERS', gamedays.home_team),
|
|
1921
|
-
# away_team = np.where(gamedays.away_team=='WINNIPEG JETS', 'ATLANTA THRASHERS', gamedays.away_team),
|
|
1922
|
-
# espn_id = gamedays.espn_id.astype(int))
|
|
1923
|
-
|
|
1922
|
+
|
|
1924
1923
|
gamedays = gamedays.assign(
|
|
1925
1924
|
home_team = np.where(gamedays.home_team=='TB', 'TBL',
|
|
1926
1925
|
np.where(gamedays.home_team=='T.B', 'TBL',
|
|
@@ -1942,7 +1941,9 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
|
1942
1941
|
gamedays.away_team)))))))),
|
|
1943
1942
|
espn_id = gamedays.espn_id.astype(int))
|
|
1944
1943
|
|
|
1945
|
-
|
|
1944
|
+
# Might need to fix later; don't have right home/away teams right now
|
|
1945
|
+
|
|
1946
|
+
#gamedays = gamedays[(gamedays.game_date==this_date) & (gamedays.home_team==home_team) & (gamedays.away_team==away_team)]
|
|
1946
1947
|
|
|
1947
1948
|
return(gamedays)
|
|
1948
1949
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|