TopDownHockey-Scraper 5.0.1__py3-none-any.whl → 6.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of TopDownHockey-Scraper might be problematic. Click here for more details.
- TopDownHockey_Scraper/TopDownHockey_NHL_Scraper.py +820 -485
- {topdownhockey_scraper-5.0.1.dist-info → topdownhockey_scraper-6.0.2.dist-info}/METADATA +1 -3
- topdownhockey_scraper-6.0.2.dist-info/RECORD +7 -0
- {topdownhockey_scraper-5.0.1.dist-info → topdownhockey_scraper-6.0.2.dist-info}/WHEEL +1 -1
- topdownhockey_scraper-5.0.1.dist-info/RECORD +0 -7
- {topdownhockey_scraper-5.0.1.dist-info → topdownhockey_scraper-6.0.2.dist-info}/licenses/LICENSE +0 -0
- {topdownhockey_scraper-5.0.1.dist-info → topdownhockey_scraper-6.0.2.dist-info}/top_level.txt +0 -0
|
@@ -19,6 +19,45 @@ import xmltodict
|
|
|
19
19
|
from xml.parsers.expat import ExpatError
|
|
20
20
|
from requests.exceptions import ChunkedEncodingError
|
|
21
21
|
|
|
22
|
+
print('Successfully did local install plus update')
|
|
23
|
+
|
|
24
|
+
team_names = ['ANAHEIM DUCKS',
|
|
25
|
+
'ARIZONA COYOTES',
|
|
26
|
+
'ATLANTA THRASHERS',
|
|
27
|
+
'BOSTON BRUINS',
|
|
28
|
+
'BUFFALO SABRES',
|
|
29
|
+
'CALGARY FLAMES',
|
|
30
|
+
'CHICAGO BLACKHAWKS',
|
|
31
|
+
'COLORADO AVALANCHE',
|
|
32
|
+
'COLUMBUS BLUE JACKETS',
|
|
33
|
+
'DALLAS STARS',
|
|
34
|
+
'DETROIT RED WINGS',
|
|
35
|
+
'EDMONTON OILERS',
|
|
36
|
+
'FLORIDA PANTHERS',
|
|
37
|
+
'LOS ANGELES KINGS',
|
|
38
|
+
'MINNESOTA WILD',
|
|
39
|
+
'MONTRÉAL CANADIENS',
|
|
40
|
+
'MONTREAL CANADIENS',
|
|
41
|
+
'NASHVILLE PREDATORS',
|
|
42
|
+
'NEW JERSEY DEVILS',
|
|
43
|
+
'NEW YORK ISLANDERS',
|
|
44
|
+
'NEW YORK RANGERS',
|
|
45
|
+
'OTTAWA SENATORS',
|
|
46
|
+
'PHILADELPHIA FLYERS',
|
|
47
|
+
'PITTSBURGH PENGUINS',
|
|
48
|
+
'PHOENIX COYOTES',
|
|
49
|
+
'CAROLINA HURRICANES',
|
|
50
|
+
'SAN JOSE SHARKS',
|
|
51
|
+
'ST. LOUIS BLUES',
|
|
52
|
+
'TAMPA BAY LIGHTNING',
|
|
53
|
+
'TORONTO MAPLE LEAFS',
|
|
54
|
+
'UTAH MAMMOTH',
|
|
55
|
+
'VANCOUVER CANUCKS',
|
|
56
|
+
'VEGAS GOLDEN KNIGHTS',
|
|
57
|
+
'WASHINGTON CAPITALS',
|
|
58
|
+
'WINNIPEG JETS',
|
|
59
|
+
'SEATTLE KRAKEN']
|
|
60
|
+
|
|
22
61
|
# ewc stands for "Events we care about."
|
|
23
62
|
|
|
24
63
|
ewc = ['SHOT', 'HIT', 'BLOCK', 'MISS', 'GIVE', 'TAKE', 'GOAL']
|
|
@@ -231,16 +270,17 @@ def group_if_not_none(result):
|
|
|
231
270
|
return(result)
|
|
232
271
|
|
|
233
272
|
def scrape_html_roster(season, game_id):
|
|
273
|
+
|
|
234
274
|
url = 'http://www.nhl.com/scores/htmlreports/' + season + '/RO0' + game_id + '.HTM'
|
|
235
275
|
page = requests.get(url)
|
|
236
276
|
soup = BeautifulSoup(page.content.decode('ISO-8859-1'), 'lxml', multi_valued_attributes = None)
|
|
237
|
-
|
|
277
|
+
|
|
238
278
|
teamsoup = soup.find_all('td', {'align':'center', 'class':['teamHeading + border', 'teamHeading + border '], 'width':'50%'})
|
|
239
279
|
away_team = teamsoup[0].get_text()
|
|
240
280
|
home_team = teamsoup[1].get_text()
|
|
241
|
-
|
|
281
|
+
|
|
242
282
|
home_player_soup = (soup.find_all('table', {'align':'center', 'border':'0', 'cellpadding':'0',
|
|
243
|
-
'cellspacing':'0', 'width':'100%'
|
|
283
|
+
'cellspacing':'0', 'width':'100%'}))[2].find_all('td')
|
|
244
284
|
|
|
245
285
|
length = int(len(home_player_soup)/3)
|
|
246
286
|
|
|
@@ -251,7 +291,7 @@ def scrape_html_roster(season, game_id):
|
|
|
251
291
|
home_player_df = home_player_df.drop(0).assign(team = 'home', team_name = home_team)
|
|
252
292
|
|
|
253
293
|
away_player_soup = (soup.find_all('table', {'align':'center', 'border':'0', 'cellpadding':'0',
|
|
254
|
-
'cellspacing':'0', 'width':'100%',
|
|
294
|
+
'cellspacing':'0', 'width':'100%', }))[1].find_all('td')
|
|
255
295
|
|
|
256
296
|
length = int(len(away_player_soup)/3)
|
|
257
297
|
|
|
@@ -260,15 +300,15 @@ def scrape_html_roster(season, game_id):
|
|
|
260
300
|
away_player_df.columns = away_player_df.iloc[0]
|
|
261
301
|
|
|
262
302
|
away_player_df = away_player_df.drop(0).assign(team = 'away', team_name = away_team)
|
|
263
|
-
|
|
303
|
+
|
|
264
304
|
#global home_scratch_soup
|
|
265
|
-
|
|
305
|
+
|
|
266
306
|
if len(soup.find_all('table', {'align':'center', 'border':'0', 'cellpadding':'0',
|
|
267
|
-
'cellspacing':'0', 'width':'100%',
|
|
307
|
+
'cellspacing':'0', 'width':'100%', }))>3:
|
|
268
308
|
|
|
269
309
|
home_scratch_soup = (soup.find_all('table', {'align':'center', 'border':'0', 'cellpadding':'0',
|
|
270
|
-
'cellspacing':'0', 'width':'100%',
|
|
271
|
-
|
|
310
|
+
'cellspacing':'0', 'width':'100%', }))[4].find_all('td')
|
|
311
|
+
|
|
272
312
|
if len(home_scratch_soup)>1:
|
|
273
313
|
|
|
274
314
|
length = int(len(home_scratch_soup)/3)
|
|
@@ -284,10 +324,10 @@ def scrape_html_roster(season, game_id):
|
|
|
284
324
|
home_scratch_df = pd.DataFrame()
|
|
285
325
|
|
|
286
326
|
if len(soup.find_all('table', {'align':'center', 'border':'0', 'cellpadding':'0',
|
|
287
|
-
'cellspacing':'0', 'width':'100%',
|
|
288
|
-
|
|
327
|
+
'cellspacing':'0', 'width':'100%', }))>2:
|
|
328
|
+
|
|
289
329
|
away_scratch_soup = (soup.find_all('table', {'align':'center', 'border':'0', 'cellpadding':'0',
|
|
290
|
-
'cellspacing':'0', 'width':'100%',
|
|
330
|
+
'cellspacing':'0', 'width':'100%', }))[3].find_all('td')
|
|
291
331
|
|
|
292
332
|
if len(away_scratch_soup)>1:
|
|
293
333
|
|
|
@@ -306,31 +346,34 @@ def scrape_html_roster(season, game_id):
|
|
|
306
346
|
player_df = pd.concat([home_player_df, away_player_df]).assign(status = 'player')
|
|
307
347
|
scratch_df = pd.concat([home_scratch_df, away_scratch_df]).assign(status = 'scratch')
|
|
308
348
|
roster_df = pd.concat([player_df, scratch_df])
|
|
309
|
-
|
|
349
|
+
|
|
310
350
|
roster_df = roster_df.assign(team = np.where(roster_df.team=='CANADIENS MONTREAL', 'MONTREAL CANADIENS', roster_df.team))
|
|
311
|
-
|
|
351
|
+
|
|
352
|
+
roster_df = roster_df.assign(team = np.where(roster_df.team=='MONTRÉAL CANADIENS', 'MONTREAL CANADIENS', roster_df.team))
|
|
353
|
+
|
|
312
354
|
# FIX NAMES
|
|
313
355
|
|
|
314
356
|
roster_df = roster_df.rename(columns = {'Nom/Name':'Name'})
|
|
315
|
-
|
|
316
|
-
roster_df.Name = roster_df.Name.
|
|
317
|
-
|
|
357
|
+
|
|
358
|
+
roster_df.Name = roster_df.Name.apply(lambda x: re.sub(r' \(A\)$', '', x).strip())
|
|
359
|
+
roster_df.Name = roster_df.Name.apply(lambda x: re.sub(r' \(C\)$', '', x).strip())
|
|
360
|
+
|
|
318
361
|
# Max Pacioretty doesn't exist in ESPN in 2009-2010, sadly.
|
|
319
|
-
|
|
362
|
+
|
|
320
363
|
roster_df['Name'] = np.where(roster_df['Name'].str.contains('ALEXANDRE '),
|
|
321
364
|
roster_df.Name.str.replace('ALEXANDRE ', 'ALEX '),
|
|
322
365
|
roster_df['Name'])
|
|
323
|
-
|
|
366
|
+
|
|
324
367
|
roster_df['Name'] = np.where(roster_df['Name'].str.contains('ALEXANDER '),
|
|
325
368
|
roster_df.Name.str.replace('ALEXANDER ', 'ALEX '),
|
|
326
369
|
roster_df['Name'])
|
|
327
|
-
|
|
370
|
+
|
|
328
371
|
roster_df['Name'] = np.where(roster_df['Name'].str.contains('CHRISTOPHER '),
|
|
329
372
|
roster_df.Name.str.replace('CHRISTOPHER ', 'CHRIS '),
|
|
330
373
|
roster_df['Name'])
|
|
331
|
-
|
|
374
|
+
|
|
332
375
|
# List of names and fixed from Evolving Hockey Scraper.
|
|
333
|
-
|
|
376
|
+
|
|
334
377
|
roster_df = roster_df.assign(Name =
|
|
335
378
|
(np.where(roster_df['Name']== "ANDREI KASTSITSYN" , "ANDREI KOSTITSYN",
|
|
336
379
|
(np.where(roster_df['Name']== "AJ GREER" , "A.J. GREER",
|
|
@@ -434,7 +477,7 @@ def scrape_html_roster(season, game_id):
|
|
|
434
477
|
roster_df['Name']))))))))))))))))))))))))))))))))))))))))))))))))))))))
|
|
435
478
|
)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
|
|
436
479
|
))))))))))
|
|
437
|
-
|
|
480
|
+
|
|
438
481
|
roster_df['Name'] = (np.where(roster_df['Name']== "RODNEY PELLEY" , "ROD PELLEY",
|
|
439
482
|
(np.where(roster_df['Name']== "SIARHEI KASTSITSYN" , "SERGEI KOSTITSYN",
|
|
440
483
|
(np.where(roster_df['Name']== "SIMEON VARLAMOV" , "SEMYON VARLAMOV",
|
|
@@ -475,7 +518,7 @@ def scrape_html_roster(season, game_id):
|
|
|
475
518
|
(np.where(roster_df['Name']== "MATTIAS JANMARK-NYLEN" , "MATTIAS JANMARK",
|
|
476
519
|
(np.where(roster_df['Name']== "JOSH DUNNE" , "JOSHUA DUNNE",roster_df['Name'])))))))))))))))))))))))))))))))))))))))))))
|
|
477
520
|
)))))))))))))))))))))))))))))))))))
|
|
478
|
-
|
|
521
|
+
|
|
479
522
|
roster_df['Name'] = np.where((roster_df['Name']=="SEBASTIAN AHO") & (roster_df['Pos']=='D'), 'SEBASTIAN AHO SWE', roster_df['Name'])
|
|
480
523
|
roster_df['Name'] = np.where((roster_df['Name']=="ELIAS PETTERSSON") & (roster_df['Pos']=='D'), 'ELIAS PETTERSSON(D)', roster_df['Name'])
|
|
481
524
|
roster_df['Name'] = np.where((roster_df['Name']=="COLIN WHITE") & (roster_df['Pos']=='D'), 'COLIN WHITE CAN', roster_df['Name'])
|
|
@@ -504,454 +547,374 @@ def scrape_html_roster(season, game_id):
|
|
|
504
547
|
(np.where(roster_df['Name']== "EMIL LILLEBERG" , "EMIL MARTINSEN LILLEBERG",
|
|
505
548
|
(np.where(roster_df['Name']== "CAMERON ATKINSON" , "CAM ATKINSON",
|
|
506
549
|
(np.where(roster_df['Name']== "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY",
|
|
507
|
-
roster_df['Name']
|
|
550
|
+
(np.where(roster_df['Name']== "MARTIN FEHARVARY" , "MARTIN FEHERVARY",
|
|
551
|
+
roster_df['Name']))))))))))))))))))))))))))))))))))))
|
|
508
552
|
|
|
509
553
|
roster_df['Name'] = roster_df['Name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
|
|
510
554
|
|
|
511
555
|
roster_df['Name'] = np.where(roster_df['Name']== "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", roster_df['Name']) # Need to do this after normalization, only then he becomes Slafkovska?
|
|
556
|
+
roster_df['Name'] = np.where(roster_df['Name']== "JOHN (JACK) ROSLOVIC" , "JACK ROSLOVIC", roster_df['Name'])
|
|
557
|
+
roster_df['Name'] = np.where(roster_df['Name']== "ANTHONY-JOHN (AJ) GREER" , "A.J. GREER", roster_df['Name'])
|
|
558
|
+
|
|
559
|
+
roster_df['Name'] = np.where(roster_df['Name']== "MARTIN FEHARVARY" , "MARTIN FEHERVARY", roster_df['Name'])
|
|
560
|
+
|
|
561
|
+
roster_df['Name'] = np.where(roster_df['Name']== "MATAJ BLAMEL" , "MATAJ BLAMEL", roster_df['Name'])
|
|
562
|
+
|
|
563
|
+
roster_df['Name'] = roster_df['Name'].str.replace(' ', ' ')
|
|
512
564
|
|
|
513
565
|
return roster_df
|
|
514
566
|
|
|
515
|
-
def scrape_html_shifts(season, game_id):
|
|
516
|
-
|
|
567
|
+
def scrape_html_shifts(season, game_id, live = True):
|
|
568
|
+
|
|
569
|
+
goalie_names = ['AARON DELL',
|
|
570
|
+
'ADAM HUSKA',
|
|
571
|
+
'ADAM WERNER',
|
|
572
|
+
'ADAM WILCOX',
|
|
573
|
+
'ADIN HILL',
|
|
574
|
+
'AKIRA SCHMID',
|
|
575
|
+
'AL MONTOYA',
|
|
576
|
+
'ALEKSEI KOLOSOV',
|
|
577
|
+
'ALES STEZKA',
|
|
578
|
+
'ALEX AULD',
|
|
579
|
+
'ALEX LYON',
|
|
580
|
+
'ALEX NEDELJKOVIC',
|
|
581
|
+
'ALEX PECHURSKI',
|
|
582
|
+
'ALEX SALAK',
|
|
583
|
+
'ALEX STALOCK',
|
|
584
|
+
'ALEXANDAR GEORGIEV',
|
|
585
|
+
'ALEXEI MELNICHUK',
|
|
586
|
+
'ALLEN YORK',
|
|
587
|
+
'ANDERS LINDBACK',
|
|
588
|
+
'ANDERS NILSSON',
|
|
589
|
+
'ANDREI VASILEVSKIY',
|
|
590
|
+
'ANDREW HAMMOND',
|
|
591
|
+
'ANDREW RAYCROFT',
|
|
592
|
+
'ANDREY MAKAROV',
|
|
593
|
+
'ANTERO NIITTYMAKI',
|
|
594
|
+
'ANTHONY STOLARZ',
|
|
595
|
+
'ANTOINE BIBEAU',
|
|
596
|
+
'ANTON FORSBERG',
|
|
597
|
+
'ANTON KHUDOBIN',
|
|
598
|
+
'ANTTI NIEMI',
|
|
599
|
+
'ANTTI RAANTA',
|
|
600
|
+
'ARTURS SILOVS',
|
|
601
|
+
'ARTYOM ZAGIDULIN',
|
|
602
|
+
'ARVID SODERBLOM',
|
|
603
|
+
'BEN BISHOP',
|
|
604
|
+
'BEN SCRIVENS',
|
|
605
|
+
'BRAD THIESSEN',
|
|
606
|
+
'BRADEN HOLTBY',
|
|
607
|
+
'BRANDON HALVERSON',
|
|
608
|
+
'BRENT JOHNSON',
|
|
609
|
+
'BRENT KRAHN',
|
|
610
|
+
'BRIAN BOUCHER',
|
|
611
|
+
'BRIAN ELLIOTT',
|
|
612
|
+
'BRIAN FOSTER',
|
|
613
|
+
'CAL PETERSEN',
|
|
614
|
+
'CALVIN HEETER',
|
|
615
|
+
'CALVIN PETERSEN',
|
|
616
|
+
'CALVIN PICKARD',
|
|
617
|
+
'CAM TALBOT',
|
|
618
|
+
'CAM WARD',
|
|
619
|
+
'CAREY PRICE',
|
|
620
|
+
'CARTER HART',
|
|
621
|
+
'CARTER HUTTON',
|
|
622
|
+
'CASEY DESMITH',
|
|
623
|
+
'CAYDEN PRIMEAU',
|
|
624
|
+
'CEDRICK DESJARDINS',
|
|
625
|
+
'CHAD JOHNSON',
|
|
626
|
+
'CHARLIE LINDGREN',
|
|
627
|
+
'CHRIS BECKFORD-TSEU',
|
|
628
|
+
'CHRIS DRIEDGER',
|
|
629
|
+
'CHRIS GIBSON',
|
|
630
|
+
'CHRIS HOLT',
|
|
631
|
+
'CHRIS MASON',
|
|
632
|
+
'CHRIS OSGOOD',
|
|
633
|
+
'COLLIN DELIA',
|
|
634
|
+
'CONNOR HELLEBUYCK',
|
|
635
|
+
'CONNOR INGRAM',
|
|
636
|
+
'CONNOR KNAPP',
|
|
637
|
+
'COREY CRAWFORD',
|
|
638
|
+
'CORY SCHNEIDER',
|
|
639
|
+
'CRAIG ANDERSON',
|
|
640
|
+
'CRISTOBAL HUET',
|
|
641
|
+
'CRISTOPHER NILSTORP',
|
|
642
|
+
'CURTIS JOSEPH',
|
|
643
|
+
'CURTIS MCELHINNEY',
|
|
644
|
+
'CURTIS SANFORD',
|
|
645
|
+
'DAN CLOUTIER',
|
|
646
|
+
'DAN ELLIS',
|
|
647
|
+
'DAN VLADAR',
|
|
648
|
+
'DANIEL LACOSTA',
|
|
649
|
+
'DANIEL TAYLOR',
|
|
650
|
+
'DANIIL TARASOV',
|
|
651
|
+
'DANY SABOURIN',
|
|
652
|
+
'DARCY KUEMPER',
|
|
653
|
+
'DAVID AEBISCHER',
|
|
654
|
+
'DAVID AYRES',
|
|
655
|
+
'DAVID LENEVEU',
|
|
656
|
+
'DAVID RITTICH',
|
|
657
|
+
'DENNIS HILDEBY',
|
|
658
|
+
'DEVAN DUBNYK',
|
|
659
|
+
'DEVIN COOLEY',
|
|
660
|
+
'DEVON LEVI',
|
|
661
|
+
'DIMITRI PATZOLD',
|
|
662
|
+
'DOMINIK HASEK',
|
|
663
|
+
'DREW COMMESSO',
|
|
664
|
+
'DREW MACINTYRE',
|
|
665
|
+
'DUSTIN TOKARSKI',
|
|
666
|
+
'DUSTIN WOLF',
|
|
667
|
+
'DWAYNE ROLOSON',
|
|
668
|
+
'DYLAN FERGUSON',
|
|
669
|
+
'DYLAN WELLS',
|
|
670
|
+
'EDDIE LACK',
|
|
671
|
+
'EDWARD PASQUALE',
|
|
672
|
+
'EETU MAKINIEMI',
|
|
673
|
+
'ELVIS MERZLIKINS',
|
|
674
|
+
'ERIC COMRIE',
|
|
675
|
+
'ERIK ERSBERG',
|
|
676
|
+
'ERIK KALLGREN',
|
|
677
|
+
'ERIK PORTILLO',
|
|
678
|
+
'EVGENI NABOKOV',
|
|
679
|
+
'FELIX SANDSTROM',
|
|
680
|
+
'FILIP GUSTAVSSON',
|
|
681
|
+
'FREDERIK ANDERSEN',
|
|
682
|
+
'FREDRIK NORRENA',
|
|
683
|
+
'GARRET SPARKS',
|
|
684
|
+
'GEORGI ROMANOV',
|
|
685
|
+
'GILLES SENN',
|
|
686
|
+
'HANNU TOIVONEN',
|
|
687
|
+
'HARRI SATERI',
|
|
688
|
+
'HENRIK KARLSSON',
|
|
689
|
+
'HENRIK LUNDQVIST',
|
|
690
|
+
'HUGO ALNEFELT',
|
|
691
|
+
'HUNTER MISKA',
|
|
692
|
+
'HUNTER SHEPARD',
|
|
693
|
+
'IGOR SHESTERKIN',
|
|
694
|
+
'IIRO TARKKI',
|
|
695
|
+
'ILYA BRYZGALOV',
|
|
696
|
+
'ILYA SAMSONOV',
|
|
697
|
+
'ILYA SOROKIN',
|
|
698
|
+
'IVAN FEDOTOV',
|
|
699
|
+
'IVAN PROSVETOV',
|
|
700
|
+
'J-F BERUBE',
|
|
701
|
+
'JACK CAMPBELL',
|
|
702
|
+
'JACK LAFONTAINE',
|
|
703
|
+
'JACOB MARKSTROM',
|
|
704
|
+
'JAKE ALLEN',
|
|
705
|
+
'JAKE OETTINGER',
|
|
706
|
+
'JAKUB DOBES',
|
|
707
|
+
'JAKUB SKAREK',
|
|
708
|
+
'JAMES REIMER',
|
|
709
|
+
'JARED COREAU',
|
|
710
|
+
'JAROSLAV HALAK',
|
|
711
|
+
'JASON KASDORF',
|
|
712
|
+
'JASON LABARBERA',
|
|
713
|
+
'JAXSON STAUBER',
|
|
714
|
+
'JEAN-SEBASTIEN AUBIN',
|
|
715
|
+
'JEAN-SEBASTIEN GIGUERE',
|
|
716
|
+
'JEFF DESLAURIERS',
|
|
717
|
+
'JEFF FRAZEE',
|
|
718
|
+
'JEFF GLASS',
|
|
719
|
+
'JEFF ZATKOFF',
|
|
720
|
+
'JEREMY DUCHESNE',
|
|
721
|
+
'JEREMY SMITH',
|
|
722
|
+
'JEREMY SWAYMAN',
|
|
723
|
+
'JESPER WALLSTEDT',
|
|
724
|
+
'JET GREAVES',
|
|
725
|
+
'JHONAS ENROTH',
|
|
726
|
+
'JIMMY HOWARD',
|
|
727
|
+
'JIRI PATERA',
|
|
728
|
+
'JOACIM ERIKSSON',
|
|
729
|
+
'JOCELYN THIBAULT',
|
|
730
|
+
'JOEL BLOMQVIST',
|
|
731
|
+
'JOEL HOFER',
|
|
732
|
+
'JOEY DACCORD',
|
|
733
|
+
'JOEY MACDONALD',
|
|
734
|
+
'JOHAN BACKLUND',
|
|
735
|
+
'JOHAN HEDBERG',
|
|
736
|
+
'JOHAN HOLMQVIST',
|
|
737
|
+
'JOHN CURRY',
|
|
738
|
+
'JOHN GIBSON',
|
|
739
|
+
'JOHN GRAHAME',
|
|
740
|
+
'JON GILLIES',
|
|
741
|
+
'JONAS GUSTAVSSON',
|
|
742
|
+
'JONAS HILLER',
|
|
743
|
+
'JONAS JOHANSSON',
|
|
744
|
+
'JONATHAN BERNIER',
|
|
745
|
+
'JONATHAN QUICK',
|
|
746
|
+
'JONI ORTIO',
|
|
747
|
+
'JOONAS KORPISALO',
|
|
748
|
+
'JORDAN BINNINGTON',
|
|
749
|
+
'JOSE THEODORE',
|
|
750
|
+
'JOSEF KORENAR',
|
|
751
|
+
'JOSEPH WOLL',
|
|
752
|
+
'JOSH HARDING',
|
|
753
|
+
'JOSH TORDJMAN',
|
|
754
|
+
'JUSSI RYNNAS',
|
|
755
|
+
'JUSTIN PETERS',
|
|
756
|
+
'JUSTIN POGGE',
|
|
757
|
+
'JUSTUS ANNUNEN',
|
|
758
|
+
'JUUSE SAROS',
|
|
759
|
+
'KAAPO KAHKONEN',
|
|
760
|
+
'KADEN FULCHER',
|
|
761
|
+
'KAREL VEJMELKA',
|
|
762
|
+
'KARI LEHTONEN',
|
|
763
|
+
'KARRI RAMO',
|
|
764
|
+
'KASIMIR KASKISUO',
|
|
765
|
+
'KEITH KINKAID',
|
|
766
|
+
'KEN APPLEBY',
|
|
767
|
+
'KENNETH APPLEBY',
|
|
768
|
+
'KENT SIMPSON',
|
|
769
|
+
'KEVIN BOYLE',
|
|
770
|
+
'KEVIN LANKINEN',
|
|
771
|
+
'KEVIN MANDOLESE',
|
|
772
|
+
'KEVIN POULIN',
|
|
773
|
+
'KEVIN WEEKES',
|
|
774
|
+
'KRISTERS GUDLEVSKIS',
|
|
775
|
+
'LANDON BOW',
|
|
776
|
+
'LAURENT BROSSOIT',
|
|
777
|
+
'LEEVI MERILAINEN',
|
|
778
|
+
'LELAND IRVING',
|
|
779
|
+
'LINUS ULLMARK',
|
|
780
|
+
'LOGAN THOMPSON',
|
|
781
|
+
'LOUIS DOMINGUE',
|
|
782
|
+
'LUKAS DOSTAL',
|
|
783
|
+
'MACKENZIE BLACKWOOD',
|
|
784
|
+
'MACKENZIE SKAPSKI',
|
|
785
|
+
'MADS SOGAARD',
|
|
786
|
+
'MAGNUS CHRONA',
|
|
787
|
+
'MAGNUS HELLBERG',
|
|
788
|
+
'MALCOLM SUBBAN',
|
|
789
|
+
'MANNY FERNANDEZ',
|
|
790
|
+
'MANNY LEGACE',
|
|
791
|
+
'MARC DENIS',
|
|
792
|
+
'MARC-ANDRE FLEURY',
|
|
793
|
+
'MARCUS HOGBERG',
|
|
794
|
+
'MAREK LANGHAMER',
|
|
795
|
+
'MAREK MAZANEC',
|
|
796
|
+
'MAREK SCHWARZ',
|
|
797
|
+
'MARK DEKANICH',
|
|
798
|
+
'MARK VISENTIN',
|
|
799
|
+
'MARTIN BIRON',
|
|
800
|
+
'MARTIN BRODEUR',
|
|
801
|
+
'MARTIN GERBER',
|
|
802
|
+
'MARTIN JONES',
|
|
803
|
+
'MARTY TURCO',
|
|
804
|
+
'MATHIEU GARON',
|
|
805
|
+
'MATISS KIVLENIEKS',
|
|
806
|
+
'MATT CLIMIE',
|
|
807
|
+
'MATT HACKETT',
|
|
808
|
+
'MATT KEETLEY',
|
|
809
|
+
'MATT MURRAY',
|
|
810
|
+
'MATT TOMKINS',
|
|
811
|
+
'MATT VILLALTA',
|
|
812
|
+
'MATT ZABA',
|
|
813
|
+
"MATTHEW O'CONNOR",
|
|
814
|
+
'MAXIME LAGACE',
|
|
815
|
+
'MICHAEL DIPIETRO',
|
|
816
|
+
'MICHAEL HOUSER',
|
|
817
|
+
'MICHAEL HUTCHINSON',
|
|
818
|
+
'MICHAEL LEIGHTON',
|
|
819
|
+
'MICHAEL MCNIVEN',
|
|
820
|
+
'MICHAL NEUVIRTH',
|
|
821
|
+
'MIIKKA KIPRUSOFF',
|
|
822
|
+
'MIKAEL TELLQVIST',
|
|
823
|
+
'MIKE BRODEUR',
|
|
824
|
+
'MIKE CONDON',
|
|
825
|
+
'MIKE MCKENNA',
|
|
826
|
+
'MIKE MURPHY',
|
|
827
|
+
'MIKE SMITH',
|
|
828
|
+
'MIKKO KOSKINEN',
|
|
829
|
+
'NATHAN LAWSON',
|
|
830
|
+
'NATHAN LIEUWEN',
|
|
831
|
+
'NICO DAWS',
|
|
832
|
+
'NIKITA TOLOPILO',
|
|
833
|
+
'NIKKE KOKKO',
|
|
834
|
+
'NIKLAS BACKSTROM',
|
|
835
|
+
'NIKLAS SVEDBERG',
|
|
836
|
+
'NIKLAS TREUTLE',
|
|
837
|
+
'NIKOLAI KHABIBULIN',
|
|
838
|
+
'OLIE KOLZIG',
|
|
839
|
+
'OLIVIER RODRIGUE',
|
|
840
|
+
'OLLE ERIKSSON EK',
|
|
841
|
+
'ONDREJ PAVELEC',
|
|
842
|
+
'OSCAR DANSK',
|
|
843
|
+
'PASCAL LECLAIRE',
|
|
844
|
+
'PATRICK LALIME',
|
|
845
|
+
'PAVEL FRANCOUZ',
|
|
846
|
+
'PEKKA RINNE',
|
|
847
|
+
'PETER BUDAJ',
|
|
848
|
+
'PETER MANNINO',
|
|
849
|
+
'PETR MRAZEK',
|
|
850
|
+
'PHEONIX COPLEY',
|
|
851
|
+
'PHILIPP GRUBAUER',
|
|
852
|
+
'PYOTR KOCHETKOV',
|
|
853
|
+
'RAY EMERY',
|
|
854
|
+
'RETO BERRA',
|
|
855
|
+
'RICHARD BACHMAN',
|
|
856
|
+
'RICK DIPIETRO',
|
|
857
|
+
'RIKU HELENIUS',
|
|
858
|
+
'ROB ZEPP',
|
|
859
|
+
'ROBERTO LUONGO',
|
|
860
|
+
'ROBIN LEHNER',
|
|
861
|
+
'ROMAN WILL',
|
|
862
|
+
'RYAN MILLER',
|
|
863
|
+
'SAM MONTEMBEAULT',
|
|
864
|
+
'SAMUEL MONTEMBEAULT',
|
|
865
|
+
'SAMI AITTOKALLIO',
|
|
866
|
+
'SAMUEL ERSSON',
|
|
867
|
+
'SCOTT CLEMMENSEN',
|
|
868
|
+
'SCOTT DARLING',
|
|
869
|
+
'SCOTT FOSTER',
|
|
870
|
+
'SCOTT WEDGEWOOD',
|
|
871
|
+
'SEBASTIAN COSSA',
|
|
872
|
+
'SEBASTIEN CARON',
|
|
873
|
+
'SEMYON VARLAMOV',
|
|
874
|
+
'SERGEI BOBROVSKY',
|
|
875
|
+
'SPENCER KNIGHT',
|
|
876
|
+
'SPENCER MARTIN',
|
|
877
|
+
'STEVE MASON',
|
|
878
|
+
'STEVE VALIQUETTE',
|
|
879
|
+
'STUART SKINNER',
|
|
880
|
+
'THATCHER DEMKO',
|
|
881
|
+
'THOMAS GREISS',
|
|
882
|
+
'THOMAS HODGES',
|
|
883
|
+
'TIM THOMAS',
|
|
884
|
+
'TIMO PIELMEIER',
|
|
885
|
+
'TOBIAS STEPHAN',
|
|
886
|
+
'TOM MCCOLLUM',
|
|
887
|
+
'TOMAS VOKOUN',
|
|
888
|
+
'TRENT MINER',
|
|
889
|
+
'TRISTAN JARRY',
|
|
890
|
+
'TRISTAN LENNOX',
|
|
891
|
+
'TROY GROSENICK',
|
|
892
|
+
'TUUKKA RASK',
|
|
893
|
+
'TY CONKLIN',
|
|
894
|
+
'TYLER BUNZ',
|
|
895
|
+
'TYLER WEIMAN',
|
|
896
|
+
'UKKO-PEKKA LUUKKONEN',
|
|
897
|
+
'VEINI VEHVILAINEN',
|
|
898
|
+
'VESA TOSKALA',
|
|
899
|
+
'VICTOR OSTMAN',
|
|
900
|
+
'VIKTOR FASTH',
|
|
901
|
+
'VILLE HUSSO',
|
|
902
|
+
'VITEK VANECEK',
|
|
903
|
+
'WADE DUBIELEWICZ',
|
|
904
|
+
'YANIV PERETS',
|
|
905
|
+
'YANN DANIS',
|
|
906
|
+
'YAROSLAV ASKAROV',
|
|
907
|
+
'ZACH FUCALE',
|
|
908
|
+
'ZACH SAWCHENKO',
|
|
909
|
+
'ZANE MCINTYRE']
|
|
910
|
+
|
|
517
911
|
url = 'http://www.nhl.com/scores/htmlreports/' + season + '/TH0' + game_id + '.HTM'
|
|
518
912
|
page = (requests.get(url))
|
|
519
|
-
|
|
520
|
-
found =
|
|
913
|
+
home_soup = BeautifulSoup(page.content)
|
|
914
|
+
found = home_soup.find_all('td', {'class':['playerHeading + border', 'lborder + bborder']})
|
|
521
915
|
if len(found)==0:
|
|
522
916
|
raise IndexError('This game has no shift data.')
|
|
523
|
-
thisteam =
|
|
524
|
-
|
|
525
|
-
goalie_names = ['AARON DELL',
|
|
526
|
-
'AARON SOROCHAN',
|
|
527
|
-
'ADAM HUSKA',
|
|
528
|
-
'ADAM WERNER',
|
|
529
|
-
'ADAM WILCOX',
|
|
530
|
-
'ADIN HILL',
|
|
531
|
-
'AKIRA SCHMID',
|
|
532
|
-
'AL MONTOYA',
|
|
533
|
-
'ALEKSEI KOLOSOV',
|
|
534
|
-
'ALEX AULD',
|
|
535
|
-
"ALEX D'ORIO",
|
|
536
|
-
'ALEX LYON',
|
|
537
|
-
'ALEX NEDELJKOVIC',
|
|
538
|
-
'ALEX PECHURSKI',
|
|
539
|
-
'ALEX SALAK',
|
|
540
|
-
'ALEX STALOCK',
|
|
541
|
-
'ALEXANDAR GEORGIEV',
|
|
542
|
-
'ALEXEI MELNICHUK',
|
|
543
|
-
'ALLEN YORK',
|
|
544
|
-
'ANDERS LINDBACK',
|
|
545
|
-
'ANDERS NILSSON',
|
|
546
|
-
'ANDREI VASILEVSKIY',
|
|
547
|
-
'ANDREW HAMMOND',
|
|
548
|
-
'ANDREW RAYCROFT',
|
|
549
|
-
'ANDREY MAKAROV',
|
|
550
|
-
'ANGUS REDMOND',
|
|
551
|
-
'ANTERO NIITTYMAKI',
|
|
552
|
-
'ANTHONY STOLARZ',
|
|
553
|
-
'ANTOINE BIBEAU',
|
|
554
|
-
'ANTON FORSBERG',
|
|
555
|
-
'ANTON KHUDOBIN',
|
|
556
|
-
'ANTTI NIEMI',
|
|
557
|
-
'ANTTI RAANTA',
|
|
558
|
-
'ARTURS SILOVS',
|
|
559
|
-
'ARTYOM ZAGIDULIN',
|
|
560
|
-
'ARVID SODERBLOM',
|
|
561
|
-
'BEN BISHOP',
|
|
562
|
-
'BEN SCRIVENS',
|
|
563
|
-
'BEN WEXLER',
|
|
564
|
-
'BRAD THIESSEN',
|
|
565
|
-
'BRADEN HOLTBY',
|
|
566
|
-
'BRANDON HALVERSON',
|
|
567
|
-
'BRENT JOHNSON',
|
|
568
|
-
'BRENT KRAHN',
|
|
569
|
-
'BRETT LEONHARDT',
|
|
570
|
-
'BRIAN BOUCHER',
|
|
571
|
-
'BRIAN ELLIOTT',
|
|
572
|
-
'BRIAN FOSTER',
|
|
573
|
-
'BRYAN PITTON',
|
|
574
|
-
'CAL PETERSEN',
|
|
575
|
-
'CALVIN HEETER',
|
|
576
|
-
'CALVIN PETERSEN',
|
|
577
|
-
'CALVIN PICKARD',
|
|
578
|
-
'CAM TALBOT',
|
|
579
|
-
'CAM WARD',
|
|
580
|
-
'CAMERON JOHNSON',
|
|
581
|
-
'CAREY PRICE',
|
|
582
|
-
'CARTER HART',
|
|
583
|
-
'CARTER HUTTON',
|
|
584
|
-
'CASEY DESMITH',
|
|
585
|
-
'CAYDEN PRIMEAU',
|
|
586
|
-
'CEDRICK DESJARDINS',
|
|
587
|
-
'CHAD JOHNSON',
|
|
588
|
-
'CHARLIE LINDGREN',
|
|
589
|
-
'CHET PICKARD',
|
|
590
|
-
'CHRIS BECKFORD-TSEU',
|
|
591
|
-
'CHRIS DRIEDGER',
|
|
592
|
-
'CHRIS GIBSON',
|
|
593
|
-
'CHRIS HOLT',
|
|
594
|
-
'CHRIS MASON',
|
|
595
|
-
'CHRIS OSGOOD',
|
|
596
|
-
'COLE KEHLER',
|
|
597
|
-
'COLLIN DELIA',
|
|
598
|
-
'CONNOR HELLEBUYCK',
|
|
599
|
-
'CONNOR INGRAM',
|
|
600
|
-
'CONNOR KNAPP',
|
|
601
|
-
'COREY CRAWFORD',
|
|
602
|
-
'CORY SCHNEIDER',
|
|
603
|
-
'CRAIG ANDERSON',
|
|
604
|
-
'CRISTOBAL HUET',
|
|
605
|
-
'CRISTOPHER NILSTORP',
|
|
606
|
-
'CURTIS JOSEPH',
|
|
607
|
-
'CURTIS MCELHINNEY',
|
|
608
|
-
'CURTIS SANFORD',
|
|
609
|
-
'DAN CLOUTIER',
|
|
610
|
-
'DAN ELLIS',
|
|
611
|
-
'DAN TURPLE',
|
|
612
|
-
'DAN VLADAR',
|
|
613
|
-
'DANIEL ALTSHULLER',
|
|
614
|
-
'DANIEL LACOSTA',
|
|
615
|
-
'DANIEL LARSSON',
|
|
616
|
-
'DANIEL MANZATO',
|
|
617
|
-
'DANIEL TAYLOR',
|
|
618
|
-
'DANIIL TARASOV',
|
|
619
|
-
'DANY SABOURIN',
|
|
620
|
-
'DARCY KUEMPER',
|
|
621
|
-
'DAREN MACHESNEY',
|
|
622
|
-
'DAVID AEBISCHER',
|
|
623
|
-
'DAVID AYRES',
|
|
624
|
-
'DAVID LENEVEU',
|
|
625
|
-
'DAVID RITTICH',
|
|
626
|
-
'DAVID SHANTZ',
|
|
627
|
-
'DENNIS ENDRAS',
|
|
628
|
-
'DENNIS HILDEBY',
|
|
629
|
-
'DERECK BARIBEAU',
|
|
630
|
-
'DEVAN DUBNYK',
|
|
631
|
-
'DEVIN COOLEY',
|
|
632
|
-
'DEVON LEVI',
|
|
633
|
-
'DIMITRI PATZOLD',
|
|
634
|
-
'DOMINIK HASEK',
|
|
635
|
-
'DREW COMMESSO',
|
|
636
|
-
'DREW MACINTYRE',
|
|
637
|
-
'DUSTIN BUTLER',
|
|
638
|
-
'DUSTIN TOKARSKI',
|
|
639
|
-
'DUSTIN WOLF',
|
|
640
|
-
'DUSTYN ZENNER',
|
|
641
|
-
'DWAYNE ROLOSON',
|
|
642
|
-
'DYLAN FERGUSON',
|
|
643
|
-
'DYLAN WELLS',
|
|
644
|
-
'EAMON MCADAM',
|
|
645
|
-
'EDDIE LACK',
|
|
646
|
-
'EDWARD PASQUALE',
|
|
647
|
-
'EETU MAKINIEMI',
|
|
648
|
-
'ELVIS MERZLIKINS',
|
|
649
|
-
'EMIL LARMI',
|
|
650
|
-
'ERIC COMRIE',
|
|
651
|
-
'ERIC HARTZELL',
|
|
652
|
-
'ERIC SEMBORSKI',
|
|
653
|
-
'ERIK ERSBERG',
|
|
654
|
-
'ERIK KALLGREN',
|
|
655
|
-
'ERIK PORTILLO',
|
|
656
|
-
'EVAN CORMIER',
|
|
657
|
-
'EVAN FITZPATRICK',
|
|
658
|
-
'EVGENI NABOKOV',
|
|
659
|
-
'FELIX SANDSTROM',
|
|
660
|
-
'FILIP GUSTAVSSON',
|
|
661
|
-
'FRED BRATHWAITE',
|
|
662
|
-
'FREDERIC CASSIVI',
|
|
663
|
-
'FREDERIK ANDERSEN',
|
|
664
|
-
'FREDRIK NORRENA',
|
|
665
|
-
'GARRET SPARKS',
|
|
666
|
-
'GAVIN MCHALE',
|
|
667
|
-
'GEORGI ROMANOV',
|
|
668
|
-
'GERALD COLEMAN',
|
|
669
|
-
'GILLES SENN',
|
|
670
|
-
'HANNU TOIVONEN',
|
|
671
|
-
'HARRI SATERI',
|
|
672
|
-
'HENRIK KARLSSON',
|
|
673
|
-
'HENRIK LUNDQVIST',
|
|
674
|
-
'HUGO ALNEFELT',
|
|
675
|
-
'HUNTER MISKA',
|
|
676
|
-
'HUNTER SHEPARD',
|
|
677
|
-
'IGOR BOBKOV',
|
|
678
|
-
'IGOR SHESTERKIN',
|
|
679
|
-
'IIRO TARKKI',
|
|
680
|
-
'ILYA BRYZGALOV',
|
|
681
|
-
'ILYA SAMSONOV',
|
|
682
|
-
'ILYA SOROKIN',
|
|
683
|
-
'IVAN FEDOTOV',
|
|
684
|
-
'IVAN PROSVETOV',
|
|
685
|
-
'J-F BERUBE',
|
|
686
|
-
'J.F. BERUBE',
|
|
687
|
-
'JACK CAMPBELL',
|
|
688
|
-
'JACK LAFONTAINE',
|
|
689
|
-
'JACOB MARKSTROM',
|
|
690
|
-
'JAKE ALLEN',
|
|
691
|
-
'JAKE OETTINGER',
|
|
692
|
-
'JAMES REIMER',
|
|
693
|
-
'JARED COREAU',
|
|
694
|
-
'JAROSLAV HALAK',
|
|
695
|
-
'JASON BACASHIHUA',
|
|
696
|
-
'JASON KASDORF',
|
|
697
|
-
'JASON LABARBERA',
|
|
698
|
-
'JASON MISSIAEN',
|
|
699
|
-
'JAXSON STAUBER',
|
|
700
|
-
'JEAN-PHILIPPE LEVASSEUR',
|
|
701
|
-
'JEAN-SEBASTIEN AUBIN',
|
|
702
|
-
'JEAN-SEBASTIEN GIGUERE',
|
|
703
|
-
'JEFF DESLAURIERS',
|
|
704
|
-
'JEFF FRAZEE',
|
|
705
|
-
'JEFF GLASS',
|
|
706
|
-
'JEFF TYNI',
|
|
707
|
-
'JEFF ZATKOFF',
|
|
708
|
-
'JEREMY DUCHESNE',
|
|
709
|
-
'JEREMY SMITH',
|
|
710
|
-
'JEREMY SWAYMAN',
|
|
711
|
-
'JESPER WALLSTEDT',
|
|
712
|
-
'JET GREAVES',
|
|
713
|
-
'JHONAS ENROTH',
|
|
714
|
-
'JIMMY HOWARD',
|
|
715
|
-
'JIRI PATERA',
|
|
716
|
-
'JOACIM ERIKSSON',
|
|
717
|
-
'JOCELYN THIBAULT',
|
|
718
|
-
'JOE CANNATA',
|
|
719
|
-
'JOE FALLON',
|
|
720
|
-
'JOEL BLOMQVIST',
|
|
721
|
-
'JOEL HOFER',
|
|
722
|
-
'JOEL MARTIN',
|
|
723
|
-
'JOEY DACCORD',
|
|
724
|
-
'JOEY MACDONALD',
|
|
725
|
-
'JOHAN BACKLUND',
|
|
726
|
-
'JOHAN GUSTAFSSON',
|
|
727
|
-
'JOHAN HEDBERG',
|
|
728
|
-
'JOHAN HOLMQVIST',
|
|
729
|
-
'JOHN CURRY',
|
|
730
|
-
'JOHN GIBSON',
|
|
731
|
-
'JOHN GRAHAME',
|
|
732
|
-
'JOHN MUSE',
|
|
733
|
-
'JON GILLIES',
|
|
734
|
-
'JON-PAUL ANDERSON',
|
|
735
|
-
'JONAS GUSTAVSSON',
|
|
736
|
-
'JONAS HILLER',
|
|
737
|
-
'JONAS JOHANSSON',
|
|
738
|
-
'JONATHAN BERNIER',
|
|
739
|
-
'JONATHAN BOUTIN',
|
|
740
|
-
'JONATHAN QUICK',
|
|
741
|
-
'JONI ORTIO',
|
|
742
|
-
'JOONAS KORPISALO',
|
|
743
|
-
'JORDAN BINNINGTON',
|
|
744
|
-
'JORDAN PEARCE',
|
|
745
|
-
'JORDAN SIGALET',
|
|
746
|
-
'JORDAN WHITE',
|
|
747
|
-
'JORGE ALVES',
|
|
748
|
-
'JOSE THEODORE',
|
|
749
|
-
'JOSEF KORENAR',
|
|
750
|
-
'JOSEPH WOLL',
|
|
751
|
-
'JOSH HARDING',
|
|
752
|
-
'JOSH TORDJMAN',
|
|
753
|
-
'JUSSI RYNNAS',
|
|
754
|
-
'JUSTIN KOWALKOSKI',
|
|
755
|
-
'JUSTIN PETERS',
|
|
756
|
-
'JUSTIN POGGE',
|
|
757
|
-
'JUSTUS ANNUNEN',
|
|
758
|
-
'JUUSE SAROS',
|
|
759
|
-
'JUUSO RIKSMAN',
|
|
760
|
-
'KAAPO KAHKONEN',
|
|
761
|
-
'KADEN FULCHER',
|
|
762
|
-
'KAREL VEJMELKA',
|
|
763
|
-
'KARI LEHTONEN',
|
|
764
|
-
'KARRI RAMO',
|
|
765
|
-
'KASIMIR KASKISUO',
|
|
766
|
-
'KEITH KINKAID',
|
|
767
|
-
'KEN APPLEBY',
|
|
768
|
-
'KENNETH APPLEBY',
|
|
769
|
-
'KENT SIMPSON',
|
|
770
|
-
'KEVIN BOYLE',
|
|
771
|
-
'KEVIN LANKINEN',
|
|
772
|
-
'KEVIN MANDOLESE',
|
|
773
|
-
'KEVIN NASTIUK',
|
|
774
|
-
'KEVIN POULIN',
|
|
775
|
-
'KEVIN WEEKES',
|
|
776
|
-
'KRISTERS GUDLEVSKIS',
|
|
777
|
-
'KURTIS MUCHA',
|
|
778
|
-
'LANDON BOW',
|
|
779
|
-
'LARS JOHANSSON',
|
|
780
|
-
'LAURENT BROSSOIT',
|
|
781
|
-
'LEEVI MERILAINEN',
|
|
782
|
-
'LELAND IRVING',
|
|
783
|
-
'LINUS ULLMARK',
|
|
784
|
-
'LOGAN THOMPSON',
|
|
785
|
-
'LOUIS DOMINGUE',
|
|
786
|
-
'LUKAS DOSTAL',
|
|
787
|
-
'MACKENZIE BLACKWOOD',
|
|
788
|
-
'MACKENZIE SKAPSKI',
|
|
789
|
-
'MADS SOGAARD',
|
|
790
|
-
'MAGNUS CHRONA',
|
|
791
|
-
'MAGNUS HELLBERG',
|
|
792
|
-
'MALCOLM SUBBAN',
|
|
793
|
-
'MANNY FERNANDEZ',
|
|
794
|
-
'MANNY LEGACE',
|
|
795
|
-
'MARC CHEVERIE',
|
|
796
|
-
'MARC DENIS',
|
|
797
|
-
'MARC-ANDRE FLEURY',
|
|
798
|
-
'MARCUS HOGBERG',
|
|
799
|
-
'MAREK LANGHAMER',
|
|
800
|
-
'MAREK MAZANEC',
|
|
801
|
-
'MAREK SCHWARZ',
|
|
802
|
-
'MARK DEKANICH',
|
|
803
|
-
'MARK VISENTIN',
|
|
804
|
-
'MARTIN BIRON',
|
|
805
|
-
'MARTIN BRODEUR',
|
|
806
|
-
'MARTIN GERBER',
|
|
807
|
-
'MARTIN JONES',
|
|
808
|
-
'MARTY TURCO',
|
|
809
|
-
'MAT ROBSON',
|
|
810
|
-
'MATHIEU CORBEIL',
|
|
811
|
-
'MATHIEU GARON',
|
|
812
|
-
'MATISS KIVLENIEKS',
|
|
813
|
-
'MATT CLIMIE',
|
|
814
|
-
'MATT DALTON',
|
|
815
|
-
'MATT HACKETT',
|
|
816
|
-
'MATT KEETLEY',
|
|
817
|
-
'MATT MURRAY',
|
|
818
|
-
'MATT TOMKINS',
|
|
819
|
-
'MATT VILLALTA',
|
|
820
|
-
'MATT ZABA',
|
|
821
|
-
'MATTHEW HEWITT',
|
|
822
|
-
"MATTHEW O'CONNOR",
|
|
823
|
-
'MAXIME LAGACE',
|
|
824
|
-
'MICHAEL DIPIETRO',
|
|
825
|
-
'MICHAEL GARTEIG',
|
|
826
|
-
'MICHAEL HOUSER',
|
|
827
|
-
'MICHAEL HUTCHINSON',
|
|
828
|
-
'MICHAEL LEE',
|
|
829
|
-
'MICHAEL LEIGHTON',
|
|
830
|
-
'MICHAEL MCNIVEN',
|
|
831
|
-
'MICHAEL MOLE',
|
|
832
|
-
'MICHAEL MORRISON',
|
|
833
|
-
'MICHAEL WALL',
|
|
834
|
-
'MICHAL NEUVIRTH',
|
|
835
|
-
'MIIKA WIIKMAN',
|
|
836
|
-
'MIIKKA KIPRUSOFF',
|
|
837
|
-
'MIKAEL TELLQVIST',
|
|
838
|
-
'MIKE BRODEUR',
|
|
839
|
-
'MIKE CONDON',
|
|
840
|
-
'MIKE MCKENNA',
|
|
841
|
-
'MIKE MURPHY',
|
|
842
|
-
'MIKE SMITH',
|
|
843
|
-
'MIKKO KOSKINEN',
|
|
844
|
-
'MIROSLAV SVOBODA',
|
|
845
|
-
'NATHAN DEOBALD',
|
|
846
|
-
'NATHAN LAWSON',
|
|
847
|
-
'NATHAN LIEUWEN',
|
|
848
|
-
'NATHAN SCHOENFELD',
|
|
849
|
-
'NICK ELLIS',
|
|
850
|
-
'NICO DAWS',
|
|
851
|
-
'NIKKE KOKKO',
|
|
852
|
-
'NIKLAS BACKSTROM',
|
|
853
|
-
'NIKLAS LUNDSTROM',
|
|
854
|
-
'NIKLAS SVEDBERG',
|
|
855
|
-
'NIKLAS TREUTLE',
|
|
856
|
-
'NIKOLAI KHABIBULIN',
|
|
857
|
-
'NOLAN SCHAEFER',
|
|
858
|
-
'OLIE KOLZIG',
|
|
859
|
-
'OLLE ERIKSSON EK',
|
|
860
|
-
'ONDREJ PAVELEC',
|
|
861
|
-
'OSCAR DANSK',
|
|
862
|
-
'PASCAL LECLAIRE',
|
|
863
|
-
'PAT CONACHER',
|
|
864
|
-
'PATRICK KILLEEN',
|
|
865
|
-
'PATRICK LALIME',
|
|
866
|
-
'PAUL DEUTSCH',
|
|
867
|
-
'PAVEL FRANCOUZ',
|
|
868
|
-
'PEKKA RINNE',
|
|
869
|
-
'PETER BUDAJ',
|
|
870
|
-
'PETER MANNINO',
|
|
871
|
-
'PETR MRAZEK',
|
|
872
|
-
'PHEONIX COPLEY',
|
|
873
|
-
'PHILIPP GRUBAUER',
|
|
874
|
-
'PHILIPPE DESROSIERS',
|
|
875
|
-
'PYOTR KOCHETKOV',
|
|
876
|
-
'RAY EMERY',
|
|
877
|
-
'RETO BERRA',
|
|
878
|
-
'RICHARD BACHMAN',
|
|
879
|
-
'RICK DIPIETRO',
|
|
880
|
-
'RIKU HELENIUS',
|
|
881
|
-
'ROB LAURIE',
|
|
882
|
-
'ROB ZEPP',
|
|
883
|
-
'ROBB TALLAS',
|
|
884
|
-
'ROBBIE TALLAS',
|
|
885
|
-
'ROBERT MAYER',
|
|
886
|
-
'ROBERTO LUONGO',
|
|
887
|
-
'ROBIN LEHNER',
|
|
888
|
-
'ROMAN WILL',
|
|
889
|
-
'RYAN LOWE',
|
|
890
|
-
'RYAN MILLER',
|
|
891
|
-
'RYAN MUNCE',
|
|
892
|
-
'RYAN VINZ',
|
|
893
|
-
'SAM BRITTAIN',
|
|
894
|
-
'SAM MONTEMBEAULT',
|
|
895
|
-
'SAMI AITTOKALLIO',
|
|
896
|
-
'SAMUEL ERSSON',
|
|
897
|
-
'SAMUEL MONTEMBEAULT',
|
|
898
|
-
'SAM MONTEMBEAULT',
|
|
899
|
-
'SCOTT CLEMMENSEN',
|
|
900
|
-
'SCOTT DARLING',
|
|
901
|
-
'SCOTT FOSTER',
|
|
902
|
-
'SCOTT MUNROE',
|
|
903
|
-
'SCOTT STAJCER',
|
|
904
|
-
'SCOTT WEDGEWOOD',
|
|
905
|
-
'SEBASTIEN CARON',
|
|
906
|
-
'SEMYON VARLAMOV',
|
|
907
|
-
'SERGEI BOBROVSKY',
|
|
908
|
-
'SHAWN HUNWICK',
|
|
909
|
-
'SPENCER KNIGHT',
|
|
910
|
-
'SPENCER MARTIN',
|
|
911
|
-
'STEFANOS LEKKAS',
|
|
912
|
-
'STEVE MASON',
|
|
913
|
-
'STEVE MICHALEK',
|
|
914
|
-
'STEVE VALIQUETTE',
|
|
915
|
-
'STUART SKINNER',
|
|
916
|
-
'THATCHER DEMKO',
|
|
917
|
-
'THOMAS FENTON',
|
|
918
|
-
'THOMAS GREISS',
|
|
919
|
-
'THOMAS HODGES',
|
|
920
|
-
'TIM THOMAS',
|
|
921
|
-
'TIMO PIELMEIER',
|
|
922
|
-
'TIMOTHY JR. THOMAS',
|
|
923
|
-
'TOBIAS STEPHAN',
|
|
924
|
-
'TODD FORD',
|
|
925
|
-
'TOM MCCOLLUM',
|
|
926
|
-
'TOMAS POPPERLE',
|
|
927
|
-
'TOMAS VOKOUN',
|
|
928
|
-
'TORRIE JUNG',
|
|
929
|
-
'TRENT MINER',
|
|
930
|
-
'TRISTAN JARRY',
|
|
931
|
-
'TROY GROSENICK',
|
|
932
|
-
'TUUKKA RASK',
|
|
933
|
-
'TY CONKLIN',
|
|
934
|
-
'TYLER BUNZ',
|
|
935
|
-
'TYLER PLANTE',
|
|
936
|
-
'TYLER STEWART',
|
|
937
|
-
'TYLER WEIMAN',
|
|
938
|
-
'TYSON SEXSMITH',
|
|
939
|
-
'UKKO-PEKKA LUUKKONEN',
|
|
940
|
-
'VEINI VEHVILAINEN',
|
|
941
|
-
'VESA TOSKALA',
|
|
942
|
-
'VIKTOR FASTH',
|
|
943
|
-
'VILLE HUSSO',
|
|
944
|
-
'VITEK VANECEK',
|
|
945
|
-
'WADE DUBIELEWICZ',
|
|
946
|
-
'YANIV PERETS',
|
|
947
|
-
'YANN DANIS',
|
|
948
|
-
'YAROSLAV ASKAROV',
|
|
949
|
-
'ZACH FUCALE',
|
|
950
|
-
'ZACH SAWCHENKO',
|
|
951
|
-
'ZACH SIKICH',
|
|
952
|
-
'ZACHARY FUCALE',
|
|
953
|
-
'ZANE KALEMBA',
|
|
954
|
-
'ZANE MCINTYRE']
|
|
917
|
+
thisteam = home_soup.find('td', {'align':'center', 'class':'teamHeading + border'}).get_text()
|
|
955
918
|
|
|
956
919
|
players = dict()
|
|
957
920
|
|
|
@@ -985,12 +948,95 @@ def scrape_html_shifts(season, game_id):
|
|
|
985
948
|
alldf = alldf._append(df)
|
|
986
949
|
|
|
987
950
|
home_shifts = alldf
|
|
951
|
+
|
|
952
|
+
home_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/home_shifts.csv', index = False)
|
|
953
|
+
|
|
954
|
+
if live == True:
|
|
955
|
+
|
|
956
|
+
home_shifts = home_shifts.assign(shift_number = home_shifts.shift_number.astype(int))
|
|
957
|
+
home_shifts = home_shifts.assign(number = home_shifts.number.astype(int))
|
|
958
|
+
|
|
959
|
+
found = home_soup.find_all('td', {'class':['playerHeading + border', 'bborder + lborder +']})
|
|
960
|
+
if len(found)==0:
|
|
961
|
+
raise IndexError('This game has no shift data.')
|
|
962
|
+
thisteam = home_soup.find('td', {'align':'center', 'class':'teamHeading + border'}).get_text()
|
|
963
|
+
|
|
964
|
+
players = dict()
|
|
965
|
+
|
|
966
|
+
for i in range(len(found)):
|
|
967
|
+
line = found[i].get_text()
|
|
968
|
+
if line == '25 PETTERSSON, ELIAS':
|
|
969
|
+
line = '25 PETTERSSON(D), ELIAS'
|
|
970
|
+
if ', ' in line:
|
|
971
|
+
name = line.split(',')
|
|
972
|
+
number = name[0].split(' ')[0].strip()
|
|
973
|
+
last_name = name[0].split(' ')[1].strip()
|
|
974
|
+
first_name = name[1].strip()
|
|
975
|
+
full_name = first_name + " " + last_name
|
|
976
|
+
players[full_name] = dict()
|
|
977
|
+
players[full_name]['number'] = number
|
|
978
|
+
players[full_name]['name'] = full_name
|
|
979
|
+
players[full_name]['shifts'] = []
|
|
980
|
+
else:
|
|
981
|
+
players[full_name]['shifts'].extend([line])
|
|
982
|
+
|
|
983
|
+
alldf = pd.DataFrame()
|
|
984
|
+
|
|
985
|
+
for key in players.keys():
|
|
986
|
+
length = length = int(len(players[key]['shifts'])/6)
|
|
987
|
+
df = df = pd.DataFrame(np.array((players[key]['shifts'])).reshape(length, 6)).rename(
|
|
988
|
+
columns = {0:'period', 1:'shifts', 2:'avg', 3:'TOI', 4:'EV Total', 5:'PP Total'})
|
|
989
|
+
df = df.assign(name = players[key]['name'],
|
|
990
|
+
number = players[key]['number'],
|
|
991
|
+
team = thisteam,
|
|
992
|
+
venue = "home")
|
|
993
|
+
alldf = alldf._append(df)
|
|
994
|
+
|
|
995
|
+
home_extra_shifts = alldf
|
|
996
|
+
|
|
997
|
+
shifts_needing_to_be_added = home_extra_shifts[home_extra_shifts.shifts=='0']
|
|
998
|
+
|
|
999
|
+
def subtract_from_twenty_minutes(time_string):
|
|
1000
|
+
# Parse the input time string
|
|
1001
|
+
minutes, seconds = map(int, time_string.split(':'))
|
|
1002
|
+
|
|
1003
|
+
# Convert to total seconds
|
|
1004
|
+
input_seconds = minutes * 60 + seconds
|
|
1005
|
+
twenty_minutes_seconds = 20 * 60 # 1200 seconds
|
|
1006
|
+
|
|
1007
|
+
# Calculate the difference
|
|
1008
|
+
difference_seconds = twenty_minutes_seconds - input_seconds
|
|
1009
|
+
|
|
1010
|
+
# Convert back to MM:SS format
|
|
1011
|
+
result_minutes = difference_seconds // 60
|
|
1012
|
+
result_seconds = difference_seconds % 60
|
|
1013
|
+
|
|
1014
|
+
# Format the result
|
|
1015
|
+
return f"{result_minutes}:{result_seconds:02d}"
|
|
1016
|
+
|
|
1017
|
+
shifts_needing_to_be_added = shifts_needing_to_be_added.assign(shift_start = '0:00 / ' + shifts_needing_to_be_added.TOI,
|
|
1018
|
+
shift_end = shifts_needing_to_be_added.TOI + ' / ' + shifts_needing_to_be_added.TOI.apply(lambda x: subtract_from_twenty_minutes(x)),
|
|
1019
|
+
duration = shifts_needing_to_be_added.TOI)
|
|
1020
|
+
|
|
1021
|
+
shifts_needing_to_be_added = shifts_needing_to_be_added.merge(
|
|
1022
|
+
home_shifts.assign(shift_number = home_shifts.shift_number.astype(int)).groupby('name')['shift_number'].max().reset_index().rename(columns = {'shift_number':'prior_max_shift'})
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
shifts_needing_to_be_added = shifts_needing_to_be_added.assign(shift_number = shifts_needing_to_be_added.prior_max_shift + 1)
|
|
1026
|
+
|
|
1027
|
+
shifts_needing_to_be_added = shifts_needing_to_be_added.loc[:, ['shift_number', 'period', 'shift_start', 'shift_end', 'duration', 'name', 'number', 'team', 'venue']]
|
|
1028
|
+
|
|
1029
|
+
shifts_needing_to_be_added['number'] = shifts_needing_to_be_added['number'].astype(int)
|
|
1030
|
+
|
|
1031
|
+
home_shifts = pd.concat([home_shifts, shifts_needing_to_be_added]).sort_values(by = ['number', 'period', 'shift_number'])
|
|
988
1032
|
|
|
989
1033
|
url = 'http://www.nhl.com/scores/htmlreports/' + season + '/TV0' + game_id + '.HTM'
|
|
990
1034
|
page = (requests.get(url))
|
|
991
|
-
|
|
992
|
-
found =
|
|
993
|
-
|
|
1035
|
+
away_soup = BeautifulSoup(page.content)
|
|
1036
|
+
found = away_soup.find_all('td', {'class':['playerHeading + border', 'lborder + bborder']})
|
|
1037
|
+
if len(found)==0:
|
|
1038
|
+
raise IndexError('This game has no shift data.')
|
|
1039
|
+
thisteam = away_soup.find('td', {'align':'center', 'class':'teamHeading + border'}).get_text()
|
|
994
1040
|
|
|
995
1041
|
players = dict()
|
|
996
1042
|
|
|
@@ -1022,12 +1068,163 @@ def scrape_html_shifts(season, game_id):
|
|
|
1022
1068
|
team = thisteam,
|
|
1023
1069
|
venue = "away")
|
|
1024
1070
|
alldf = alldf._append(df)
|
|
1025
|
-
|
|
1071
|
+
|
|
1026
1072
|
away_shifts = alldf
|
|
1027
|
-
|
|
1073
|
+
|
|
1074
|
+
away_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/away_shifts.csv', index = False)
|
|
1075
|
+
|
|
1076
|
+
if live == True:
|
|
1077
|
+
|
|
1078
|
+
away_shifts = away_shifts.assign(shift_number = away_shifts.shift_number.astype(int))
|
|
1079
|
+
away_shifts = away_shifts.assign(number = away_shifts.number.astype(int))
|
|
1080
|
+
|
|
1081
|
+
found = away_soup.find_all('td', {'class':['playerHeading + border', 'bborder + lborder +']})
|
|
1082
|
+
if len(found)==0:
|
|
1083
|
+
raise IndexError('This game has no shift data.')
|
|
1084
|
+
thisteam = away_soup.find('td', {'align':'center', 'class':'teamHeading + border'}).get_text()
|
|
1085
|
+
|
|
1086
|
+
players = dict()
|
|
1087
|
+
|
|
1088
|
+
for i in range(len(found)):
|
|
1089
|
+
line = found[i].get_text()
|
|
1090
|
+
if line == '25 PETTERSSON, ELIAS':
|
|
1091
|
+
line = '25 PETTERSSON(D), ELIAS'
|
|
1092
|
+
if ', ' in line:
|
|
1093
|
+
name = line.split(',')
|
|
1094
|
+
number = name[0].split(' ')[0].strip()
|
|
1095
|
+
last_name = name[0].split(' ')[1].strip()
|
|
1096
|
+
first_name = name[1].strip()
|
|
1097
|
+
full_name = first_name + " " + last_name
|
|
1098
|
+
players[full_name] = dict()
|
|
1099
|
+
players[full_name]['number'] = number
|
|
1100
|
+
players[full_name]['name'] = full_name
|
|
1101
|
+
players[full_name]['shifts'] = []
|
|
1102
|
+
else:
|
|
1103
|
+
players[full_name]['shifts'].extend([line])
|
|
1104
|
+
|
|
1105
|
+
alldf = pd.DataFrame()
|
|
1106
|
+
|
|
1107
|
+
for key in players.keys():
|
|
1108
|
+
length = length = int(len(players[key]['shifts'])/6)
|
|
1109
|
+
df = df = pd.DataFrame(np.array((players[key]['shifts'])).reshape(length, 6)).rename(
|
|
1110
|
+
columns = {0:'period', 1:'shifts', 2:'avg', 3:'TOI', 4:'EV Total', 5:'PP Total'})
|
|
1111
|
+
df = df.assign(name = players[key]['name'],
|
|
1112
|
+
number = players[key]['number'],
|
|
1113
|
+
team = thisteam,
|
|
1114
|
+
venue = "away")
|
|
1115
|
+
alldf = alldf._append(df)
|
|
1116
|
+
|
|
1117
|
+
away_extra_shifts = alldf
|
|
1118
|
+
|
|
1119
|
+
shifts_needing_to_be_added = away_extra_shifts[away_extra_shifts.shifts=='0']
|
|
1120
|
+
|
|
1121
|
+
def subtract_from_twenty_minutes(time_string):
|
|
1122
|
+
# Parse the input time string
|
|
1123
|
+
minutes, seconds = map(int, time_string.split(':'))
|
|
1124
|
+
|
|
1125
|
+
# Convert to total seconds
|
|
1126
|
+
input_seconds = minutes * 60 + seconds
|
|
1127
|
+
twenty_minutes_seconds = 20 * 60 # 1200 seconds
|
|
1128
|
+
|
|
1129
|
+
# Calculate the difference
|
|
1130
|
+
difference_seconds = twenty_minutes_seconds - input_seconds
|
|
1131
|
+
|
|
1132
|
+
# Convert back to MM:SS format
|
|
1133
|
+
result_minutes = difference_seconds // 60
|
|
1134
|
+
result_seconds = difference_seconds % 60
|
|
1135
|
+
|
|
1136
|
+
# Format the result
|
|
1137
|
+
return f"{result_minutes}:{result_seconds:02d}"
|
|
1138
|
+
|
|
1139
|
+
shifts_needing_to_be_added = shifts_needing_to_be_added.assign(shift_start = '0:00 / ' + shifts_needing_to_be_added.TOI.astype(str),
|
|
1140
|
+
shift_end = shifts_needing_to_be_added.TOI.astype(str) + ' / ' + shifts_needing_to_be_added.TOI.apply(lambda x: subtract_from_twenty_minutes(x)),
|
|
1141
|
+
duration = shifts_needing_to_be_added.TOI.astype(str))
|
|
1142
|
+
|
|
1143
|
+
shifts_needing_to_be_added = shifts_needing_to_be_added.merge(
|
|
1144
|
+
away_shifts.assign(shift_number = away_shifts.shift_number.astype(int)).groupby('name')['shift_number'].max().reset_index().rename(columns = {'shift_number':'prior_max_shift'})
|
|
1145
|
+
)
|
|
1146
|
+
|
|
1147
|
+
shifts_needing_to_be_added = shifts_needing_to_be_added.assign(shift_number = shifts_needing_to_be_added.prior_max_shift + 1)
|
|
1148
|
+
|
|
1149
|
+
shifts_needing_to_be_added = shifts_needing_to_be_added.loc[:, ['shift_number', 'period', 'shift_start', 'shift_end', 'duration', 'name', 'number', 'team', 'venue']]
|
|
1150
|
+
|
|
1151
|
+
shifts_needing_to_be_added['number'] = shifts_needing_to_be_added['number'].astype(int)
|
|
1152
|
+
|
|
1153
|
+
away_shifts = pd.concat([away_shifts, shifts_needing_to_be_added]).sort_values(by = ['number', 'period', 'shift_number'])
|
|
1154
|
+
|
|
1155
|
+
# Additional logic to handle period 1 scrape when we don't have goalie shifts yet.
|
|
1156
|
+
|
|
1157
|
+
if len(home_shifts[(home_shifts.name.isin(goalie_names))]) == 0 or len(away_shifts[(away_shifts.name.isin(goalie_names))]):
|
|
1158
|
+
|
|
1159
|
+
pbp_html_url = f'https://www.nhl.com/scores/htmlreports/{season}/GS0{game_id}.HTM'
|
|
1160
|
+
pbp_soup = BeautifulSoup(requests.get(pbp_html_url).content)
|
|
1161
|
+
goalie_header = pbp_soup.find('td', text='GOALTENDER SUMMARY')
|
|
1162
|
+
|
|
1163
|
+
# Navigate to the table containing goalie data
|
|
1164
|
+
goalie_table = goalie_header.find_next('table')
|
|
1165
|
+
|
|
1166
|
+
away_teams = pd.read_html(str(goalie_table))[0][:2]
|
|
1167
|
+
away_team = away_teams[0].iloc[0]
|
|
1168
|
+
|
|
1169
|
+
away_goalies = pd.read_html(str(goalie_table))[0][2:4]
|
|
1170
|
+
away_goalies = away_goalies[~pd.isna(away_goalies[6])]
|
|
1171
|
+
|
|
1172
|
+
away_goalies = away_goalies.assign(team = away_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
|
|
1173
|
+
|
|
1174
|
+
home_teams = pd.read_html(str(goalie_table))[0][6:7]
|
|
1175
|
+
home_team = home_teams[0].iloc[0]
|
|
1176
|
+
|
|
1177
|
+
home_goalies = pd.read_html(str(goalie_table))[0][8:10]
|
|
1178
|
+
home_goalies = home_goalies[~pd.isna(home_goalies[6])]
|
|
1179
|
+
|
|
1180
|
+
home_goalies = home_goalies.assign(team = home_team).rename(columns = {0:'number', 2:'name', 6:'TOI'}).loc[:, ['number', 'name', 'TOI', 'team']]
|
|
1181
|
+
|
|
1182
|
+
home_goalies = pd.read_html(str(goalie_table))[0][8:9]
|
|
1183
|
+
|
|
1184
|
+
# Temporary to test. Will fix later.
|
|
1185
|
+
|
|
1186
|
+
# home_goalies = home_goalies.assign(TOI = '11:26')
|
|
1187
|
+
# away_goalies = away_goalies.assign(TOI = '11:26')
|
|
1188
|
+
|
|
1189
|
+
if len(home_shifts[(home_shifts.name.isin(goalie_names))]) == 0:
|
|
1190
|
+
|
|
1191
|
+
home_goalie_shift = home_goalies.assign(shift_number = 1,
|
|
1192
|
+
period = 1,
|
|
1193
|
+
name = home_goalies.name.str.split(', ').str[1] + ' ' + home_goalies.name.str.split(', ').str[0],
|
|
1194
|
+
shift_start = '0:00 / 20:00',
|
|
1195
|
+
shift_end = home_goalies.TOI + ' / ' + home_goalies.TOI.apply(lambda x: subtract_from_twenty_minutes(x)),
|
|
1196
|
+
duration = home_goalies.TOI,
|
|
1197
|
+
venue = 'home').loc[
|
|
1198
|
+
:, ['shift_number', 'period', 'shift_start', 'shift_end', 'duration', 'name', 'number', 'team', 'venue']]
|
|
1199
|
+
|
|
1200
|
+
home_goalie_shift = home_goalie_shift.assign(period = home_goalie_shift.period.astype(int),
|
|
1201
|
+
shift_number = home_goalie_shift.shift_number.astype(int),
|
|
1202
|
+
number = home_goalie_shift.number.astype(int))
|
|
1203
|
+
|
|
1204
|
+
home_shifts = pd.concat([home_shifts, home_goalie_shift]).sort_values(by = ['number', 'period', 'shift_number'])
|
|
1205
|
+
|
|
1206
|
+
if len(away_shifts[(away_shifts.name.isin(goalie_names))]) == 0:
|
|
1207
|
+
|
|
1208
|
+
away_goalie_shift = away_goalies.assign(shift_number = 1,
|
|
1209
|
+
period = 1,
|
|
1210
|
+
name = away_goalies.name.str.split(', ').str[1] + ' ' + away_goalies.name.str.split(', ').str[0],
|
|
1211
|
+
shift_start = '0:00 / 20:00',
|
|
1212
|
+
shift_end = away_goalies.TOI + ' / ' + away_goalies.TOI.apply(lambda x: subtract_from_twenty_minutes(x)),
|
|
1213
|
+
duration = away_goalies.TOI,
|
|
1214
|
+
venue = 'away').loc[
|
|
1215
|
+
:, ['shift_number', 'period', 'shift_start', 'shift_end', 'duration', 'name', 'number', 'team', 'venue']]
|
|
1216
|
+
|
|
1217
|
+
away_goalie_shift = away_goalie_shift.assign(period = away_goalie_shift.period.astype(int),
|
|
1218
|
+
shift_number = away_goalie_shift.shift_number.astype(int),
|
|
1219
|
+
number = away_goalie_shift.number.astype(int))
|
|
1220
|
+
|
|
1221
|
+
away_shifts = pd.concat([away_shifts, away_goalie_shift]).sort_values(by = ['number', 'period', 'shift_number'])
|
|
1222
|
+
|
|
1028
1223
|
global all_shifts
|
|
1029
1224
|
|
|
1030
1225
|
all_shifts = pd.concat([home_shifts, away_shifts])
|
|
1226
|
+
|
|
1227
|
+
#all_shifts.to_csv('/Users/patrickbacon/compact_topdownhockey/all_shifts.csv', index = False)
|
|
1031
1228
|
|
|
1032
1229
|
all_shifts = all_shifts.assign(start_time = all_shifts.shift_start.str.split('/').str[0])
|
|
1033
1230
|
|
|
@@ -1232,8 +1429,20 @@ def scrape_html_shifts(season, game_id):
|
|
|
1232
1429
|
|
|
1233
1430
|
all_shifts['name'] = all_shifts['name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
|
|
1234
1431
|
|
|
1432
|
+
# Apply regex to remove (A) and (C) designations at end of names
|
|
1433
|
+
all_shifts['name'] = all_shifts['name'].apply(lambda x: re.sub(r' \(A\)$', '', x).strip())
|
|
1434
|
+
all_shifts['name'] = all_shifts['name'].apply(lambda x: re.sub(r' \(C\)$', '', x).strip())
|
|
1435
|
+
|
|
1436
|
+
# Apply specific name corrections
|
|
1235
1437
|
all_shifts['name'] = np.where(all_shifts['name']== "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", all_shifts['name']) # Need to do this after normalization, only then he becomes Slafkovska?
|
|
1438
|
+
all_shifts['name'] = np.where(all_shifts['name']== "JOHN (JACK) ROSLOVIC" , "JACK ROSLOVIC", all_shifts['name'])
|
|
1439
|
+
all_shifts['name'] = np.where(all_shifts['name']== "ANTHONY-JOHN (AJ) GREER" , "A.J. GREER", all_shifts['name'])
|
|
1440
|
+
|
|
1441
|
+
all_shifts['name'] = np.where(all_shifts['name']== 'MARTIN FEHARVARY' , 'MARTIN FEHERVARY', all_shifts['name'])
|
|
1442
|
+
|
|
1443
|
+
all_shifts['name'] = np.where(all_shifts['name']== 'MATAJ BLAMEL' , 'MATAJ BLAMEL', all_shifts['name'])
|
|
1236
1444
|
|
|
1445
|
+
all_shifts['name'] = all_shifts['name'].str.replace(' ', ' ')
|
|
1237
1446
|
|
|
1238
1447
|
all_shifts = all_shifts.assign(end_time = np.where(pd.to_datetime(all_shifts.start_time).dt.time > pd.to_datetime(all_shifts.end_time).dt.time, '20:00', all_shifts.end_time),
|
|
1239
1448
|
goalie = np.where(all_shifts.name.isin(goalie_names), 1, 0))
|
|
@@ -1265,10 +1474,16 @@ def scrape_html_shifts(season, game_id):
|
|
|
1265
1474
|
global changes_on
|
|
1266
1475
|
global changes_off
|
|
1267
1476
|
myshifts = all_shifts
|
|
1477
|
+
#myshifts.to_csv('/Users/patrickbacon/compact_topdownhockey/tmp.csv', index = False)
|
|
1478
|
+
#print('Printing my shifts')
|
|
1479
|
+
|
|
1480
|
+
#print(myshifts)
|
|
1268
1481
|
|
|
1269
1482
|
myshifts.start_time = myshifts.start_time.str.strip()
|
|
1270
1483
|
myshifts.end_time = myshifts.end_time.str.strip()
|
|
1271
1484
|
|
|
1485
|
+
myshifts['number'] = myshifts.number.astype(str)
|
|
1486
|
+
|
|
1272
1487
|
changes_on = myshifts.groupby(['team', 'period', 'start_time']).agg(
|
|
1273
1488
|
on = ('name', ', '.join),
|
|
1274
1489
|
on_numbers = ('number', ', '.join),
|
|
@@ -1293,6 +1508,8 @@ def scrape_html_shifts(season, game_id):
|
|
|
1293
1508
|
3900))
|
|
1294
1509
|
|
|
1295
1510
|
full_changes = full_changes.assign(team = np.where(full_changes.team=='CANADIENS MONTREAL', 'MONTREAL CANADIENS', full_changes.team))
|
|
1511
|
+
|
|
1512
|
+
full_changes = full_changes.assign(team = np.where(full_changes.team=='MONTRÉAL CANADIENS', 'MONTREAL CANADIENS', full_changes.team))
|
|
1296
1513
|
|
|
1297
1514
|
return full_changes.reset_index(drop = True)#.drop(columns = ['time', 'period_seconds'])
|
|
1298
1515
|
|
|
@@ -1559,7 +1776,20 @@ def scrape_api_events(game_id, drop_description = True, shift_to_espn = False):
|
|
|
1559
1776
|
|
|
1560
1777
|
api_events['ep1_name'] = api_events['ep1_name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
|
|
1561
1778
|
|
|
1779
|
+
# Apply regex to remove (A) and (C) designations at end of names
|
|
1780
|
+
api_events['ep1_name'] = api_events['ep1_name'].apply(lambda x: re.sub(r' \(A\)$', '', x).strip())
|
|
1781
|
+
api_events['ep1_name'] = api_events['ep1_name'].apply(lambda x: re.sub(r' \(C\)$', '', x).strip())
|
|
1782
|
+
|
|
1783
|
+
# Apply specific name corrections
|
|
1562
1784
|
api_events['ep1_name'] = np.where(api_events['ep1_name'] == "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", api_events['ep1_name']) # Need to do this after normalization, only then he becomes Slafkovska?
|
|
1785
|
+
api_events['ep1_name'] = np.where(api_events['ep1_name'] == "JOHN (JACK) ROSLOVIC" , "JACK ROSLOVIC", api_events['ep1_name'])
|
|
1786
|
+
api_events['ep1_name'] = np.where(api_events['ep1_name'] == "ANTHONY-JOHN (AJ) GREER" , "A.J. GREER", api_events['ep1_name'])
|
|
1787
|
+
|
|
1788
|
+
api_events['ep1_name'] = np.where(api_events['ep1_name'] == 'MARTIN FEHARVARY' , 'MARTIN FEHERVARY', api_events['ep1_name'])
|
|
1789
|
+
|
|
1790
|
+
api_events['ep1_name'] = np.where(api_events['ep1_name'] == 'MATAJ BLAMEL' , 'MATAJ BLAMEL', api_events['ep1_name'])
|
|
1791
|
+
|
|
1792
|
+
api_events['ep1_name'] = api_events['ep1_name'].str.replace(' ', ' ')
|
|
1563
1793
|
|
|
1564
1794
|
api_events = api_events.assign(ep1_name = np.where(api_events.ep1_name=='ALEX BARRÃ-BOULET', 'ALEX BARRE_BOULET', api_events.ep1_name))
|
|
1565
1795
|
|
|
@@ -1679,8 +1909,9 @@ def scrape_html_events(season, game_id):
|
|
|
1679
1909
|
game.home_team_abbreviated.iloc[0],
|
|
1680
1910
|
game.away_team_abbreviated.iloc[0]))
|
|
1681
1911
|
|
|
1682
|
-
roster = roster.assign(teamnum = roster.team_abbreviated + roster['#']
|
|
1683
|
-
|
|
1912
|
+
roster = roster.assign(teamnum = roster.team_abbreviated + roster['#'])
|
|
1913
|
+
roster['Name'] = roster.Name.apply(lambda x: re.sub(r' \(A\)$', '', x).strip())
|
|
1914
|
+
roster['Name'] = roster.Name.apply(lambda x: re.sub(r' \(C\)$', '', x).strip())
|
|
1684
1915
|
|
|
1685
1916
|
event_player_1s = roster.loc[:, ['teamnum', 'Name']].rename(columns = {'teamnum':'event_player_1', 'Name':'ep1_name'})
|
|
1686
1917
|
event_player_2s = roster.loc[:, ['teamnum', 'Name']].rename(columns = {'teamnum':'event_player_2', 'Name':'ep2_name'})
|
|
@@ -1743,6 +1974,9 @@ def scrape_html_events(season, game_id):
|
|
|
1743
1974
|
|
|
1744
1975
|
game = game.assign(home_team = np.where(game.home_team=='CANADIENS MONTREAL', 'MONTREAL CANADIENS', game.home_team),
|
|
1745
1976
|
away_team = np.where(game.away_team=='CANADIENS MONTREAL', 'MONTREAL CANADIENS', game.away_team))
|
|
1977
|
+
|
|
1978
|
+
game = game.assign(home_team = np.where(game.home_team=='MONTRÉAL CANADIENS', 'MONTREAL CANADIENS', game.home_team),
|
|
1979
|
+
away_team = np.where(game.away_team=='MONTRÉAL CANADIENS', 'MONTREAL CANADIENS', game.away_team))
|
|
1746
1980
|
|
|
1747
1981
|
if int(game_id[0])!=3:
|
|
1748
1982
|
game = game[game.game_seconds<4000]
|
|
@@ -1940,7 +2174,8 @@ def scrape_espn_events(espn_game_id, drop_description = True):
|
|
|
1940
2174
|
(np.where(espn_events['event_player_1']== "EMIL LILLEBERG" , "EMIL MARTINSEN LILLEBERG",
|
|
1941
2175
|
(np.where(espn_events['event_player_1']== "CAMERON ATKINSON" , "CAM ATKINSON",
|
|
1942
2176
|
(np.where(espn_events['event_player_1']== "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY",
|
|
1943
|
-
espn_events['event_player_1']
|
|
2177
|
+
(np.where(espn_events['event_player_1']== "MARTIN FEHARVARY" , "MARTIN FEHERVARY",
|
|
2178
|
+
espn_events['event_player_1']))))))))))))))))))))))))))))))))))))
|
|
1944
2179
|
|
|
1945
2180
|
|
|
1946
2181
|
espn_events = espn_events.assign(version =
|
|
@@ -1974,8 +2209,21 @@ def scrape_espn_events(espn_game_id, drop_description = True):
|
|
|
1974
2209
|
|
|
1975
2210
|
espn_events['event_player_1'] = espn_events['event_player_1'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8').str.upper()
|
|
1976
2211
|
|
|
1977
|
-
|
|
2212
|
+
# Apply regex to remove (A) and (C) designations at end of names
|
|
2213
|
+
espn_events['event_player_1'] = espn_events['event_player_1'].apply(lambda x: re.sub(r' \(A\)$', '', x).strip())
|
|
2214
|
+
espn_events['event_player_1'] = espn_events['event_player_1'].apply(lambda x: re.sub(r' \(C\)$', '', x).strip())
|
|
2215
|
+
|
|
2216
|
+
# Apply specific name corrections
|
|
2217
|
+
espn_events['event_player_1'] = np.where(espn_events['event_player_1'] == "JURAJ SLAFKOVSKA" , "JURAJ SLAFKOVSKY", espn_events['event_player_1'])
|
|
2218
|
+
espn_events['event_player_1'] = np.where(espn_events['event_player_1'] == "JOHN (JACK) ROSLOVIC" , "JACK ROSLOVIC", espn_events['event_player_1'])
|
|
2219
|
+
espn_events['event_player_1'] = np.where(espn_events['event_player_1'] == "ANTHONY-JOHN (AJ) GREER" , "A.J. GREER", espn_events['event_player_1'])
|
|
2220
|
+
|
|
2221
|
+
espn_events['event_player_1'] = np.where(espn_events['event_player_1'] == 'MARTIN FEHARVARY' , 'MARTIN FEHERVARY', espn_events['event_player_1'])
|
|
2222
|
+
|
|
2223
|
+
espn_events['event_player_1'] = np.where(espn_events['event_player_1'] == 'MATAJ BLAMEL' , 'MATAJ BLAMEL', espn_events['event_player_1'])
|
|
1978
2224
|
|
|
2225
|
+
espn_events['event_player_1'] = espn_events['event_player_1'].str.replace(' ', ' ')
|
|
2226
|
+
|
|
1979
2227
|
#espn_events = espn_events.assign(event_player_1 = np.where(
|
|
1980
2228
|
#espn_events.event_player_1=='ALEX BURROWS', 'ALEXANDRE BURROWS', espn_events.event_player_1))
|
|
1981
2229
|
|
|
@@ -2007,6 +2255,7 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
|
2007
2255
|
this_date = (game_date)
|
|
2008
2256
|
url = 'http://www.espn.com/nhl/scoreboard?date=' + this_date.replace("-", "")
|
|
2009
2257
|
page = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout = 500)
|
|
2258
|
+
print('Request to ESPN IDs successful.')
|
|
2010
2259
|
soup = BeautifulSoup(page.content, parser = 'lxml')
|
|
2011
2260
|
soup_found = soup.find_all('a', {'class':['AnchorLink truncate',
|
|
2012
2261
|
'AnchorLink Button Button--sm Button--anchorLink Button--alt mb4 w-100',
|
|
@@ -2075,9 +2324,11 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
|
2075
2324
|
np.where(gamedays.away_team=='AVALANCHE', 'COL',
|
|
2076
2325
|
np.where(gamedays.away_team=='BLUE', 'CBJ',
|
|
2077
2326
|
np.where(gamedays.away_team=='JACKETS', 'CBJ',
|
|
2327
|
+
np.where(gamedays.away_team=='BLUE JACKETS', 'CBJ',
|
|
2078
2328
|
np.where(gamedays.away_team=='STARS', 'DAL',
|
|
2079
2329
|
np.where(gamedays.away_team=='RED', 'DET',
|
|
2080
2330
|
np.where(gamedays.away_team=='WINGS', 'DET',
|
|
2331
|
+
np.where(gamedays.away_team=='RED WINGS', 'DET',
|
|
2081
2332
|
np.where(gamedays.away_team=='OILERS', 'EDM',
|
|
2082
2333
|
np.where(gamedays.away_team=='PANTHERS', 'FLA',
|
|
2083
2334
|
np.where(gamedays.away_team=='KINGS', 'LAK',
|
|
@@ -2096,15 +2347,17 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
|
2096
2347
|
np.where(gamedays.away_team=='LIGHTNING', 'TBL',
|
|
2097
2348
|
np.where(gamedays.away_team=='LEAFS', 'TOR',
|
|
2098
2349
|
np.where(gamedays.away_team=='MAPLE', 'TOR',
|
|
2350
|
+
np.where(gamedays.away_team=='MAPLE LEAFS', 'TOR',
|
|
2099
2351
|
np.where(gamedays.away_team=='CANUCKS', 'VAN',
|
|
2100
2352
|
np.where(gamedays.away_team=='GOLDEN', 'VGK',
|
|
2101
2353
|
np.where(gamedays.away_team=='KNIGHTS', 'VGK',
|
|
2354
|
+
np.where(gamedays.away_team=='GOLDEN KNIGHTS', 'VGK',
|
|
2102
2355
|
np.where(gamedays.away_team=='CAPITALS', 'WSH',
|
|
2103
2356
|
np.where(gamedays.away_team=='JETS', 'WPG',
|
|
2104
2357
|
np.where(gamedays.away_team=='CLUB', 'UTA',
|
|
2358
|
+
np.where(gamedays.away_team=='MAMMOTH', 'UTA',
|
|
2105
2359
|
np.where(gamedays.away_team=='HOCKEY', 'UTA', 'mistake'
|
|
2106
|
-
))))))))))))))))))))))))))))))))))))))
|
|
2107
|
-
)
|
|
2360
|
+
))))))))))))))))))))))))))))))))))))))))))))
|
|
2108
2361
|
|
|
2109
2362
|
gamedays = gamedays.assign(
|
|
2110
2363
|
home_team = np.where(gamedays.home_team=='DUCKS', 'ANA',
|
|
@@ -2117,9 +2370,11 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
|
2117
2370
|
np.where(gamedays.home_team=='AVALANCHE', 'COL',
|
|
2118
2371
|
np.where(gamedays.home_team=='BLUE', 'CBJ',
|
|
2119
2372
|
np.where(gamedays.home_team=='JACKETS', 'CBJ',
|
|
2373
|
+
np.where(gamedays.home_team=='BLUE JACKETS', 'CBJ',
|
|
2120
2374
|
np.where(gamedays.home_team=='STARS', 'DAL',
|
|
2121
2375
|
np.where(gamedays.home_team=='RED', 'DET',
|
|
2122
2376
|
np.where(gamedays.home_team=='WINGS', 'DET',
|
|
2377
|
+
np.where(gamedays.home_team=='RED WINGS', 'DET',
|
|
2123
2378
|
np.where(gamedays.home_team=='OILERS', 'EDM',
|
|
2124
2379
|
np.where(gamedays.home_team=='PANTHERS', 'FLA',
|
|
2125
2380
|
np.where(gamedays.home_team=='KINGS', 'LAK',
|
|
@@ -2138,15 +2393,17 @@ def scrape_espn_ids_single_game(game_date, home_team, away_team):
|
|
|
2138
2393
|
np.where(gamedays.home_team=='LIGHTNING', 'TBL',
|
|
2139
2394
|
np.where(gamedays.home_team=='MAPLE', 'TOR',
|
|
2140
2395
|
np.where(gamedays.home_team=='LEAFS', 'TOR',
|
|
2396
|
+
np.where(gamedays.home_team=='MAPLE LEAFS', 'TOR',
|
|
2141
2397
|
np.where(gamedays.home_team=='CANUCKS', 'VAN',
|
|
2142
2398
|
np.where(gamedays.home_team=='GOLDEN', 'VGK',
|
|
2143
2399
|
np.where(gamedays.home_team=='KNIGHTS', 'VGK',
|
|
2400
|
+
np.where(gamedays.home_team=='GOLDEN KNIGHTS', 'VGK',
|
|
2144
2401
|
np.where(gamedays.home_team=='CAPITALS', 'WSH',
|
|
2145
2402
|
np.where(gamedays.home_team=='JETS', 'WPG',
|
|
2146
2403
|
np.where(gamedays.home_team=='CLUB', 'UTA',
|
|
2404
|
+
np.where(gamedays.home_team=='MAMMOTH', 'UTA',
|
|
2147
2405
|
np.where(gamedays.home_team=='HOCKEY', 'UTA', 'mistake'
|
|
2148
|
-
))))))))))))))))))))))))))))))))))))))
|
|
2149
|
-
)
|
|
2406
|
+
))))))))))))))))))))))))))))))))))))))))))))
|
|
2150
2407
|
|
|
2151
2408
|
gamedays = gamedays[(gamedays.game_date==this_date) & (gamedays.home_team==home_team) & (gamedays.away_team==away_team)]
|
|
2152
2409
|
|
|
@@ -2159,9 +2416,23 @@ def merge_and_prepare(events, shifts):
|
|
|
2159
2416
|
game_id = int(events.game_id.iloc[0])
|
|
2160
2417
|
|
|
2161
2418
|
merged = pd.concat([events, shifts])
|
|
2419
|
+
|
|
2420
|
+
home_team = merged[~(pd.isna(merged.home_team))].home_team.iloc[0]
|
|
2421
|
+
#print(home_team)
|
|
2422
|
+
away_team = merged[~(pd.isna(merged.away_team))].away_team.iloc[0]
|
|
2423
|
+
#print(away_team)
|
|
2424
|
+
|
|
2425
|
+
if 'CANADIENS' in home_team:
|
|
2426
|
+
home_team = 'MONTREAL CANADIENS'
|
|
2427
|
+
|
|
2428
|
+
if 'CANADIENS' in away_team:
|
|
2429
|
+
away_team = 'MONTREAL CANADIENS'
|
|
2430
|
+
|
|
2431
|
+
#print(home_team)
|
|
2432
|
+
#print(away_team)
|
|
2162
2433
|
|
|
2163
|
-
merged = merged.assign(home_team =
|
|
2164
|
-
away_team =
|
|
2434
|
+
merged = merged.assign(home_team = home_team,
|
|
2435
|
+
away_team = away_team,
|
|
2165
2436
|
home_team_abbreviated = merged[~(pd.isna(merged.home_team_abbreviated))].home_team_abbreviated.iloc[0],
|
|
2166
2437
|
away_team_abbreviated = merged[~(pd.isna(merged.away_team_abbreviated))].away_team_abbreviated.iloc[0])
|
|
2167
2438
|
|
|
@@ -2211,6 +2482,8 @@ def merge_and_prepare(events, shifts):
|
|
|
2211
2482
|
np.where(merged.event.isin(['PGSTR', 'PGEND', 'PSTR', 'PEND', 'ANTHEM']), -1, 1))).sort_values(
|
|
2212
2483
|
by = ['game_seconds', 'period', 'event_index'])
|
|
2213
2484
|
|
|
2485
|
+
merged.to_csv('/Users/patrickbacon/compact_topdownhockey/first_merged.csv', index = False)
|
|
2486
|
+
|
|
2214
2487
|
merged = merged.assign(change_before_event = np.where(
|
|
2215
2488
|
(
|
|
2216
2489
|
(merged.away_on_ice!='') & (merged.event.shift()=='CHANGE') & (merged.away_on_ice!=merged.away_on_ice.shift()) |
|
|
@@ -2242,8 +2515,9 @@ def merge_and_prepare(events, shifts):
|
|
|
2242
2515
|
merged.home_team_abbreviated.iloc[0],
|
|
2243
2516
|
merged.away_team_abbreviated.iloc[0]))
|
|
2244
2517
|
|
|
2245
|
-
roster = roster.assign(teamnum = roster.team_abbreviated + roster['#']
|
|
2246
|
-
|
|
2518
|
+
roster = roster.assign(teamnum = roster.team_abbreviated + roster['#'])
|
|
2519
|
+
roster['Name'] = roster.Name.apply(lambda x: re.sub(r' \(A\)$', '', x).strip())
|
|
2520
|
+
roster['Name'] = roster.Name.apply(lambda x: re.sub(r' \(C\)$', '', x).strip())
|
|
2247
2521
|
|
|
2248
2522
|
roster = roster.assign(Name = np.where((roster.Name=='SEBASTIAN AHO') &( roster.team_name == 'NEW YORK ISLANDERS'), 'SEBASTIAN AHO (SWE)', roster.Name))
|
|
2249
2523
|
roster = roster.assign(Name = np.where((roster.Name=='ELIAS PETTERSSON') &( roster.Pos == 'D'), 'ELIAS PETTERSSON(D)', roster.Name))
|
|
@@ -2562,7 +2836,7 @@ def fix_missing(single, event_coords, events):
|
|
|
2562
2836
|
|
|
2563
2837
|
return(events)
|
|
2564
2838
|
|
|
2565
|
-
def full_scrape_1by1(game_id_list, shift_to_espn = True):
|
|
2839
|
+
def full_scrape_1by1(game_id_list, live = False, shift_to_espn = True):
|
|
2566
2840
|
|
|
2567
2841
|
global single
|
|
2568
2842
|
global event_coords
|
|
@@ -2585,7 +2859,9 @@ def full_scrape_1by1(game_id_list, shift_to_espn = True):
|
|
|
2585
2859
|
print('Attempting scrape for: ' + str(game_id))
|
|
2586
2860
|
season = str(int(str(game_id)[:4])) + str(int(str(game_id)[:4]) + 1)
|
|
2587
2861
|
small_id = str(game_id)[5:]
|
|
2862
|
+
print('Scraping HTML events')
|
|
2588
2863
|
single = scrape_html_events(season, small_id)
|
|
2864
|
+
print('Scraped HTML events')
|
|
2589
2865
|
single['game_id'] = int(game_id)
|
|
2590
2866
|
|
|
2591
2867
|
# If all goes well with the HTML scrape:
|
|
@@ -2604,7 +2880,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = True):
|
|
|
2604
2880
|
print('Issue when fixing problematic events. Here it is: ' + str(e))
|
|
2605
2881
|
continue
|
|
2606
2882
|
try:
|
|
2607
|
-
shifts = scrape_html_shifts(season, small_id)
|
|
2883
|
+
shifts = scrape_html_shifts(season, small_id, live)
|
|
2608
2884
|
finalized = merge_and_prepare(events, shifts)
|
|
2609
2885
|
full = full._append(finalized)
|
|
2610
2886
|
second_time = time.time()
|
|
@@ -2632,8 +2908,29 @@ def full_scrape_1by1(game_id_list, shift_to_espn = True):
|
|
|
2632
2908
|
home_team = single['home_team_abbreviated'].iloc[0]
|
|
2633
2909
|
away_team = single['away_team_abbreviated'].iloc[0]
|
|
2634
2910
|
game_date = single['game_date'].iloc[0]
|
|
2911
|
+
espn_home_team = home_team
|
|
2912
|
+
espn_away_team = away_team
|
|
2635
2913
|
try:
|
|
2636
|
-
|
|
2914
|
+
if home_team == 'T.B':
|
|
2915
|
+
espn_home_team = 'TBL'
|
|
2916
|
+
if away_team == 'T.B':
|
|
2917
|
+
espn_away_team = 'TBL'
|
|
2918
|
+
if home_team == 'L.A':
|
|
2919
|
+
espn_home_team = 'LAK'
|
|
2920
|
+
if away_team == 'L.A':
|
|
2921
|
+
espn_away_team = 'LAK'
|
|
2922
|
+
if home_team == 'N.J':
|
|
2923
|
+
espn_home_team = 'NJD'
|
|
2924
|
+
if away_team == 'N.J':
|
|
2925
|
+
espn_away_team = 'NJD'
|
|
2926
|
+
if home_team == 'S.J':
|
|
2927
|
+
espn_home_team = 'SJS'
|
|
2928
|
+
if away_team == 'S.J':
|
|
2929
|
+
espn_away_team = 'SJS'
|
|
2930
|
+
print('Scraping ESPN IDs')
|
|
2931
|
+
espn_id = scrape_espn_ids_single_game(str(game_date.date()), espn_home_team, espn_away_team).espn_id.iloc[0]
|
|
2932
|
+
print('Scraping ESPN Events')
|
|
2933
|
+
print('Here is the ESPN ID': espn_id)
|
|
2637
2934
|
event_coords = scrape_espn_events(int(espn_id))
|
|
2638
2935
|
event_coords['coordinate_source'] = 'espn'
|
|
2639
2936
|
events = single.merge(event_coords, on = ['event_player_1', 'game_seconds', 'period', 'version', 'event'], how = 'left').drop(columns = ['espn_id'])
|
|
@@ -2646,7 +2943,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = True):
|
|
|
2646
2943
|
print('This game does not have ESPN or API coordinates. You will get it anyway, though.')
|
|
2647
2944
|
events = single
|
|
2648
2945
|
try:
|
|
2649
|
-
shifts = scrape_html_shifts(season, small_id)
|
|
2946
|
+
shifts = scrape_html_shifts(season, small_id, live)
|
|
2650
2947
|
finalized = merge_and_prepare(events, shifts)
|
|
2651
2948
|
full = full._append(finalized)
|
|
2652
2949
|
second_time = time.time()
|
|
@@ -2739,7 +3036,7 @@ def full_scrape_1by1(game_id_list, shift_to_espn = True):
|
|
|
2739
3036
|
events = single
|
|
2740
3037
|
events['coordinate_source'] = 'none'
|
|
2741
3038
|
try:
|
|
2742
|
-
shifts = scrape_html_shifts(season, small_id)
|
|
3039
|
+
shifts = scrape_html_shifts(season, small_id, live)
|
|
2743
3040
|
finalized = merge_and_prepare(events, shifts)
|
|
2744
3041
|
full = full._append(finalized)
|
|
2745
3042
|
second_time = time.time()
|
|
@@ -2895,12 +3192,50 @@ def full_scrape_1by1(game_id_list, shift_to_espn = True):
|
|
|
2895
3192
|
|
|
2896
3193
|
return full
|
|
2897
3194
|
|
|
2898
|
-
def full_scrape(game_id_list, shift = False):
|
|
3195
|
+
def full_scrape(game_id_list, live = True, shift = False):
|
|
2899
3196
|
|
|
2900
3197
|
global hidden_patrick
|
|
2901
3198
|
hidden_patrick = 0
|
|
2902
3199
|
|
|
2903
|
-
df = full_scrape_1by1(game_id_list, shift_to_espn = shift)
|
|
3200
|
+
df = full_scrape_1by1(game_id_list, live, shift_to_espn = shift)
|
|
3201
|
+
|
|
3202
|
+
# Fixing the Pettersson issue for event player. Just going downstream for this.
|
|
3203
|
+
try:
|
|
3204
|
+
df = df.assign(
|
|
3205
|
+
event_player_1 = np.where(
|
|
3206
|
+
(df.event_player_1 == 'ELIAS PETTERSSON') &
|
|
3207
|
+
(df.event_description.str.contains('#', na=False)) &
|
|
3208
|
+
(df.event_description.str.contains(' PETTERSSON', na=False)) &
|
|
3209
|
+
(df.event_description.str.extract(r'#(\d+) PETTERSSON', expand=False) == '25'),
|
|
3210
|
+
'ELIAS PETTERSSON(D)', df.event_player_1),
|
|
3211
|
+
event_player_2 = np.where(
|
|
3212
|
+
(df.event_player_2 == 'ELIAS PETTERSSON') &
|
|
3213
|
+
(
|
|
3214
|
+
# Goal and Petey got A1
|
|
3215
|
+
((df.event_type == 'GOAL') &
|
|
3216
|
+
(df.event_description.str.contains(': #', na=False)) &
|
|
3217
|
+
(df.event_description.str.contains(' PETTERSSON', na=False)) &
|
|
3218
|
+
(df.event_description.str.extract(r': #(\d+) PETTERSSON', expand=False) == '25')) |
|
|
3219
|
+
# Not a goal, Petey was EP2
|
|
3220
|
+
((df.event_type != 'GOAL') &
|
|
3221
|
+
(df.event_description.str.contains('VAN #', na=False)) &
|
|
3222
|
+
(df.event_description.str.contains(' PETTERSSON', na=False)) &
|
|
3223
|
+
(df.event_description.str.extract(r'VAN #(\d+) PETTERSSON', expand=False) == '25'))
|
|
3224
|
+
),
|
|
3225
|
+
'ELIAS PETTERSSON(D)', df.event_player_2),
|
|
3226
|
+
event_player_3 = np.where(
|
|
3227
|
+
(df.event_player_3=='ELIAS PETTERSSON') &
|
|
3228
|
+
(df.event_description.str.contains('#', na=False)) &
|
|
3229
|
+
(df.event_description.str.contains(' PETTERSSON', na=False)) &
|
|
3230
|
+
(df.event_description.str.extract(r'#(\d+) PETTERSSON(?:\s|$)', expand=False) == '25'),
|
|
3231
|
+
'ELIAS PETTERSSON(D)', df.event_player_3)
|
|
3232
|
+
)
|
|
3233
|
+
except Exception as e:
|
|
3234
|
+
print(e)
|
|
3235
|
+
continue
|
|
3236
|
+
|
|
3237
|
+
# Don't even need this, we've had this problem with Stutzle for years, just let it be.
|
|
3238
|
+
# df.event_description = df.event_description.str.replace('FEHÃ\x89RVÃ\x81RY', 'FEHERVARY').str.replace('BLÃMEL', 'BLAMEL')
|
|
2904
3239
|
|
|
2905
3240
|
if (hidden_patrick==0) and (len(df)>0):
|
|
2906
3241
|
|