PyPI - TopDownHockey-Scraper - Versions diffs - 6.0.0__tar.gz → 6.0.2__tar.gz - Mend

TopDownHockey-Scraper 6.0.0tar.gz → 6.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of TopDownHockey-Scraper might be problematic. Click here for more details.

Files changed (13) hide show

{topdownhockey_scraper-6.0.0/src/TopDownHockey_Scraper.egg-info → topdownhockey_scraper-6.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: TopDownHockey_Scraper
-Version: 6.0.0
+Version: 6.0.2
 Summary: The TopDownHockey Scraper
 Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
 Author: Patrick Bacon
@@ -17,8 +17,6 @@ Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: bs4
 Requires-Dist: datetime
-Requires-Dist: seaborn
-Requires-Dist: matplotlib
 Requires-Dist: xmltodict
 Requires-Dist: lxml
 Requires-Dist: natsort

{topdownhockey_scraper-6.0.0 → topdownhockey_scraper-6.0.2}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = TopDownHockey_Scraper
-version = 5.0.2
+version = 6.0.2
 author = Patrick Bacon
 author_email = patrick.s.bacon@gmail.com
 description = A package built for scraping hockey data from EliteProspects, the NHL's HTML/API reports, and ESPN's XML reports.
@@ -25,8 +25,6 @@ install_requires =
 	pandas
 	datetime
 	requests
-	seasborn
-	matplotlib
 	xml
 	xmltodict
 	requests

{topdownhockey_scraper-6.0.0 → topdownhockey_scraper-6.0.2}/setup.py RENAMED Viewed

@@ -9,7 +9,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 setup(
     name="TopDownHockey_Scraper", # Replace with your own username
-    version="6.0.0",
+    version="6.0.2",
     author="Patrick Bacon",
     author_email="patrick.s.bacon@gmail.com",
     description="The TopDownHockey Scraper",
@@ -33,8 +33,6 @@ setup(
     'pandas',
     'bs4',
     'datetime',
-    'seaborn',
-    'matplotlib',
     'xmltodict',
     'lxml',
     'natsort'

{topdownhockey_scraper-6.0.0 → topdownhockey_scraper-6.0.2}/src/TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py RENAMED Viewed

@@ -2255,6 +2255,7 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
     this_date = (game_date)
     url = 'http://www.espn.com/nhl/scoreboard?date=' + this_date.replace("-", "")
     page = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout = 500)
+    print('Request to ESPN IDs successful.')
     soup = BeautifulSoup(page.content, parser = 'lxml')
     soup_found = soup.find_all('a', {'class':['AnchorLink truncate',
                              'AnchorLink Button Button--sm Button--anchorLink Button--alt mb4 w-100',
@@ -2926,7 +2927,10 @@ def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
                             espn_home_team = 'SJS'
                         if away_team == 'S.J':
                             espn_away_team = 'SJS'
+                        print('Scraping ESPN IDs')
                         espn_id = scrape_espn_ids_single_game(str(game_date.date()), espn_home_team, espn_away_team).espn_id.iloc[0]
+                        print('Scraping ESPN Events')
+                        print('Here is the ESPN ID': espn_id)
                         event_coords = scrape_espn_events(int(espn_id))
                         event_coords['coordinate_source'] = 'espn'
                         events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'period', 'version', 'event'], how = 'left').drop(columns = ['espn_id'])
@@ -3196,35 +3200,39 @@ def full_scrape(game_id_list, live = True, shift = False):
     df = full_scrape_1by1(game_id_list, live, shift_to_espn = shift)
     # Fixing the Pettersson issue for event player. Just going downstream for this.
-    df = df.assign(
-        event_player_1 = np.where(
-            (df.event_player_1 == 'ELIAS PETTERSSON') &
-            (df.event_description.str.contains('#', na=False)) &
-            (df.event_description.str.contains(' PETTERSSON', na=False)) &
-            (df.event_description.str.extract(r'#(\d+) PETTERSSON', expand=False) == '25'),
-            'ELIAS PETTERSSON(D)', df.event_player_1),
-        event_player_2 = np.where(
-            (df.event_player_2 == 'ELIAS PETTERSSON') &
-            (
-                # Goal and Petey got A1
-                ((df.event_type == 'GOAL') &
-                 (df.event_description.str.contains(': #', na=False)) &
-                 (df.event_description.str.contains(' PETTERSSON', na=False)) &
-                 (df.event_description.str.extract(r': #(\d+) PETTERSSON', expand=False) == '25')) |
-                # Not a goal, Petey was EP2
-                ((df.event_type != 'GOAL') &
-                 (df.event_description.str.contains('VAN #', na=False)) &
-                 (df.event_description.str.contains(' PETTERSSON', na=False)) &
-                 (df.event_description.str.extract(r'VAN #(\d+) PETTERSSON', expand=False) == '25'))
-            ),
-            'ELIAS PETTERSSON(D)', df.event_player_2),
-        event_player_3 = np.where(
-            (df.event_player_3=='ELIAS PETTERSSON') &
-            (df.event_description.str.contains('#', na=False)) &
-            (df.event_description.str.contains(' PETTERSSON', na=False)) &
-            (df.event_description.str.extract(r'#(\d+) PETTERSSON(?:\s|$)', expand=False) == '25'),
-            'ELIAS PETTERSSON(D)', df.event_player_3)
-    )
+    try:
+        df = df.assign(
+            event_player_1 = np.where(
+                (df.event_player_1 == 'ELIAS PETTERSSON') &
+                (df.event_description.str.contains('#', na=False)) &
+                (df.event_description.str.contains(' PETTERSSON', na=False)) &
+                (df.event_description.str.extract(r'#(\d+) PETTERSSON', expand=False) == '25'),
+                'ELIAS PETTERSSON(D)', df.event_player_1),
+            event_player_2 = np.where(
+                (df.event_player_2 == 'ELIAS PETTERSSON') &
+                (
+                    # Goal and Petey got A1
+                    ((df.event_type == 'GOAL') &
+                    (df.event_description.str.contains(': #', na=False)) &
+                    (df.event_description.str.contains(' PETTERSSON', na=False)) &
+                    (df.event_description.str.extract(r': #(\d+) PETTERSSON', expand=False) == '25')) |
+                    # Not a goal, Petey was EP2
+                    ((df.event_type != 'GOAL') &
+                    (df.event_description.str.contains('VAN #', na=False)) &
+                    (df.event_description.str.contains(' PETTERSSON', na=False)) &
+                    (df.event_description.str.extract(r'VAN #(\d+) PETTERSSON', expand=False) == '25'))
+                ),
+                'ELIAS PETTERSSON(D)', df.event_player_2),
+            event_player_3 = np.where(
+                (df.event_player_3=='ELIAS PETTERSSON') &
+                (df.event_description.str.contains('#', na=False)) &
+                (df.event_description.str.contains(' PETTERSSON', na=False)) &
+                (df.event_description.str.extract(r'#(\d+) PETTERSSON(?:\s|$)', expand=False) == '25'),
+                'ELIAS PETTERSSON(D)', df.event_player_3)
+        )
+    except Exception as e:
+        print(e)
+        continue
     # Don't even need this, we've had this problem with Stutzle for years, just let it be.
     # df.event_description = df.event_description.str.replace('FEHÃ\x89RVÃ\x81RY', 'FEHERVARY').str.replace('BLÃMEL', 'BLAMEL')

{topdownhockey_scraper-6.0.0 → topdownhockey_scraper-6.0.2/src/TopDownHockey_Scraper.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: TopDownHockey_Scraper
-Version: 6.0.0
+Version: 6.0.2
 Summary: The TopDownHockey Scraper
 Home-page: https://github.com/TopDownHockey/TopDownHockey_Scraper
 Author: Patrick Bacon
@@ -17,8 +17,6 @@ Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: bs4
 Requires-Dist: datetime
-Requires-Dist: seaborn
-Requires-Dist: matplotlib
 Requires-Dist: xmltodict
 Requires-Dist: lxml
 Requires-Dist: natsort