TopDownHockey-Scraper 6.1.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,302 @@
1
+ import pandas as pd
2
+ import re
3
+ import unicodedata
4
+
5
+ NAME_CORRECTIONS = {
6
+ "ANDREI KASTSITSYN": "ANDREI KOSTITSYN",
7
+ "AJ GREER": "A.J. GREER",
8
+ "ANDREW GREENE": "ANDY GREENE",
9
+ "ANDREW WOZNIEWSKI": "ANDY WOZNIEWSKI",
10
+ "ANTHONY DEANGELO": "TONY DEANGELO",
11
+ "BATES (JON) BATTAGLIA": "BATES BATTAGLIA",
12
+ "BRADLEY MILLS": "BRAD MILLS",
13
+ "CAMERON BARKER": "CAM BARKER",
14
+ "COLIN (JOHN) WHITE": "COLIN WHITE",
15
+ "CRISTOVAL NIEVES": "BOO NIEVES",
16
+ "CHRIS VANDE VELDE": "CHRIS VANDEVELDE",
17
+ "DANNY BRIERE": "DANIEL BRIERE",
18
+ "DANIEL GIRARDI": "DAN GIRARDI",
19
+ "DANNY O'REGAN": "DANIEL O'REGAN",
20
+ "DANIEL CARCILLO": "DAN CARCILLO",
21
+ "DAVID JOHNNY ODUYA": "JOHNNY ODUYA",
22
+ "DAVID BOLLAND": "DAVE BOLLAND",
23
+ "DENIS JR. GAUTHIER": "DENIS GAUTHIER",
24
+ "DWAYNE KING": "DJ KING",
25
+ "EDWARD PURCELL": "TEDDY PURCELL",
26
+ "EMMANUEL FERNANDEZ": "MANNY FERNANDEZ",
27
+ "EMMANUEL LEGACE": "MANNY LEGACE",
28
+ "EVGENII DADONOV": "EVGENY DADONOV",
29
+ "FREDDY MODIN": "FREDRIK MODIN",
30
+ "FREDERICK MEYER IV": "FREDDY MEYER",
31
+ "HARRISON ZOLNIERCZYK": "HARRY ZOLNIERCZYK",
32
+ "ILJA BRYZGALOV": "ILYA BRYZGALOV",
33
+ "JACOB DOWELL": "JAKE DOWELL",
34
+ "JAMES HOWARD": "JIMMY HOWARD",
35
+ "JAMES VANDERMEER": "JIM VANDERMEER",
36
+ "JAMES WYMAN": "JT WYMAN",
37
+ "JOHN HILLEN III": "JACK HILLEN",
38
+ "JOHN ODUYA": "JOHNNY ODUYA",
39
+ "JOHN PEVERLEY": "RICH PEVERLEY",
40
+ "JONATHAN SIM": "JON SIM",
41
+ "JONATHON KALINSKI": "JON KALINSKI",
42
+ "JONATHAN AUDY-MARCHESSAULT": "JONATHAN MARCHESSAULT",
43
+ "JOSEPH CRABB": "JOEY CRABB",
44
+ "JOSEPH CORVO": "JOE CORVO",
45
+ "JOSHUA BAILEY": "JOSH BAILEY",
46
+ "JOSHUA HENNESSY": "JOSH HENNESSY",
47
+ "JOSHUA MORRISSEY": "JOSH MORRISSEY",
48
+ "JEAN-FRANCOIS JACQUES": "J-F JACQUES",
49
+ "JT COMPHER": "J.T. COMPHER",
50
+ "KRISTOPHER LETANG": "KRIS LETANG",
51
+ "KRYSTOFER BARCH": "KRYS BARCH",
52
+ "KRYSTOFER KOLANOS": "KRYS KOLANOS",
53
+ "MARC POULIOT": "MARC-ANTOINE POULIOT",
54
+ "MARTIN ST LOUIS": "MARTIN ST. LOUIS",
55
+ "MARTIN ST PIERRE": "MARTIN ST. PIERRE",
56
+ "MARTY HAVLAT": "MARTIN HAVLAT",
57
+ "MATTHEW CARLE": "MATT CARLE",
58
+ "MATHEW DUMBA": "MATT DUMBA",
59
+ "MATTHEW BENNING": "MATT BENNING",
60
+ "MATTHEW IRWIN": "MATT IRWIN",
61
+ "MATTHEW NIETO": "MATT NIETO",
62
+ "MATTHEW STAJAN": "MATT STAJAN",
63
+ "MAXIM MAYOROV": "MAKSIM MAYOROV",
64
+ "MAXIME TALBOT": "MAX TALBOT",
65
+ "MAXWELL REINHART": "MAX REINHART",
66
+ "MICHAEL BLUNDEN": "MIKE BLUNDEN",
67
+ "MICHAEL CAMMALLERI": "MIKE CAMMALLERI",
68
+ "MICHAEL FERLAND": "MICHEAL FERLAND",
69
+ "MICHAEL GRIER": "MIKE GRIER",
70
+ "MICHAEL KNUBLE": "MIKE KNUBLE",
71
+ "MICHAEL KOMISAREK": "MIKE KOMISAREK",
72
+ "MICHAEL MATHESON": "MIKE MATHESON",
73
+ "MICHAEL MODANO": "MIKE MODANO",
74
+ "MICHAEL RUPP": "MIKE RUPP",
75
+ "MICHAEL SANTORELLI": "MIKE SANTORELLI",
76
+ "MICHAEL SILLINGER": "MIKE SILLINGER",
77
+ "MITCHELL MARNER": "MITCH MARNER",
78
+ "NATHAN GUENIN": "NATE GUENIN",
79
+ "NICHOLAS BOYNTON": "NICK BOYNTON",
80
+ "NICHOLAS DRAZENOVIC": "NICK DRAZENOVIC",
81
+ "NICKLAS BERGFORS": "NICLAS BERGFORS",
82
+ "NICKLAS GROSSMAN": "NICKLAS GROSSMANN",
83
+ "NICOLAS PETAN": "NIC PETAN",
84
+ "NIKLAS KRONVALL": "NIKLAS KRONWALL",
85
+ "NIKOLAI ANTROPOV": "NIK ANTROPOV",
86
+ "NIKOLAI KULEMIN": "NIKOLAY KULEMIN",
87
+ "NIKOLAI ZHERDEV": "NIKOLAY ZHERDEV",
88
+ "OLIVIER MAGNAN-GRENIER": "OLIVIER MAGNAN",
89
+ "PAT MAROON": "PATRICK MAROON",
90
+ "PHILIP VARONE": "PHIL VARONE",
91
+ "QUINTIN HUGHES": "QUINN HUGHES",
92
+ "RAYMOND MACIAS": "RAY MACIAS",
93
+ "RJ UMBERGER": "R.J. UMBERGER",
94
+ "ROBERT BLAKE": "ROB BLAKE",
95
+ "ROBERT EARL": "ROBBIE EARL",
96
+ "ROBERT HOLIK": "BOBBY HOLIK",
97
+ "ROBERT SCUDERI": "ROB SCUDERI",
98
+ "RODNEY PELLEY": "ROD PELLEY",
99
+ "SIARHEI KASTSITSYN": "SERGEI KOSTITSYN",
100
+ "SIMEON VARLAMOV": "SEMYON VARLAMOV",
101
+ "STAFFAN KRONVALL": "STAFFAN KRONWALL",
102
+ "STEVEN REINPRECHT": "STEVE REINPRECHT",
103
+ "TJ GALIARDI": "T.J. GALIARDI",
104
+ "TJ HENSICK": "T.J. HENSICK",
105
+ "TOBY ENSTROM": "TOBIAS ENSTROM",
106
+ "TOMMY SESTITO": "TOM SESTITO",
107
+ "VACLAV PROSPAL": "VINNY PROSPAL",
108
+ "VINCENT HINOSTROZA": "VINNIE HINOSTROZA",
109
+ "WILLIAM THOMAS": "BILL THOMAS",
110
+ "ZACHARY ASTON-REESE": "ZACH ASTON-REESE",
111
+ "ZACHARY SANFORD": "ZACH SANFORD",
112
+ "ZACHERY STORTINI": "ZACK STORTINI",
113
+ "MATTHEW MURRAY": "MATT MURRAY",
114
+ "J-SEBASTIEN AUBIN": "JEAN-SEBASTIEN AUBIN",
115
+ "JEFF DROUIN-DESLAURIERS": "JEFF DESLAURIERS",
116
+ "NICHOLAS BAPTISTE": "NICK BAPTISTE",
117
+ "OLAF KOLZIG": "OLIE KOLZIG",
118
+ "STEPHEN VALIQUETTE": "STEVE VALIQUETTE",
119
+ "THOMAS MCCOLLUM": "TOM MCCOLLUM",
120
+ "TIMOTHY JR. THOMAS": "TIM THOMAS",
121
+ "TIM GETTINGER": "TIMOTHY GETTINGER",
122
+ "NICHOLAS SHORE": "NICK SHORE",
123
+ "T.J. TYNAN": "TJ TYNAN",
124
+ "ALEXIS LAFRENIÈRE": "ALEXIS LAFRENIERE",
125
+ "ALEXIS LAFRENI?RE": "ALEXIS LAFRENIERE",
126
+ "ALEXIS LAFRENIÃRE": "ALEXIS LAFRENIERE",
127
+ 'ALEXIS LAFRENIARE': 'ALEXIS LAFRENIERE',
128
+ "TIM STÜTZLE": "TIM STUTZLE",
129
+ "TIM ST?TZLE": "TIM STUTZLE",
130
+ "TIM STÃTZLE": "TIM STUTZLE",
131
+ "TIM STATZLE": "TIM STUTZLE",
132
+ "JANI HAKANPÃ\x84Ã\x84": "JANI HAKANPAA",
133
+ "EGOR SHARANGOVICH": "YEGOR SHARANGOVICH",
134
+ "CALLAN FOOTE": "CAL FOOTE",
135
+ "MATTIAS JANMARK-NYLEN": "MATTIAS JANMARK",
136
+ "JOSH DUNNE": "JOSHUA DUNNE",
137
+ "JANIS MOSER": "J.J. MOSER",
138
+ "NICHOLAS PAUL": "NICK PAUL",
139
+ "JACOB MIDDLETON": "JAKE MIDDLETON",
140
+ "TOMMY NOVAK": "THOMAS NOVAK",
141
+ "JOSHUA NORRIS": "JOSH NORRIS",
142
+ "P.O JOSEPH": "PIERRE-OLIVIER JOSEPH",
143
+ "MIKEY EYSSIMONT": "MICHAEL EYSSIMONT",
144
+ "MATAJ BLAMEL": "MATAJ BLAMEL",
145
+ "MATEJ BLAMEL": "MATAJ BLAMEL",
146
+ "VITTORIO MANCINI": "VICTOR MANCINI",
147
+ "JOSHUA MAHURA": "JOSH MAHURA",
148
+ "JOSEPH VELENO": "JOE VELENO",
149
+ "ZACK BOLDUC": "ZACHARY BOLDUC",
150
+ "JOSHUA BROWN": "JOSH BROWN",
151
+ "JAKE LUCCHINI": "JACOB LUCCHINI",
152
+ "EMIL LILLEBERG": "EMIL MARTINSEN LILLEBERG",
153
+ "CAMERON ATKINSON": "CAM ATKINSON",
154
+ "JURAJ SLAFKOVSKA": "JURAJ SLAFKOVSKY",
155
+ "MARTIN FEHARVARY": "MARTIN FEHERVARY",
156
+ "JOHN (JACK) ROSLOVIC": "JACK ROSLOVIC",
157
+ "ANTHONY-JOHN (AJ) GREER": "A.J. GREER",
158
+ "ALEX BARRÃ-BOULET": "ALEX BARRE-BOULET",
159
+ "COLIN": "COLIN WHITE CAN",
160
+ "CAMERON TALBOT":"CAM TALBOT",
161
+ 'DANIEL VLADAR': 'DAN VLADAR',
162
+ 'LUCAS GLENDENING': 'LUKE GLENDENING',
163
+ 'FREDDY GAUDREAU': 'FREDERICK GAUDREAU',
164
+ 'SAMUEL BLAIS': 'SAMMY BLAIS',
165
+ 'ISAC LUNDESTRAM': 'ISAC LUNDESTROM',
166
+ 'NATHAN LEGARE': 'NATHAN LAGARA',
167
+ 'NATHAN LEGARA': 'NATHAN LAGARA',
168
+ 'NATHAN LAGARE': 'NATHAN LAGARA',
169
+ 'SAMUEL MONTEMBEAULT': 'SAM MONTEMBEAULT',
170
+ 'SAMUEL MONTEMBAULT': 'SAM MONTEMBEAULT',
171
+ 'MATTHEW GRZELCYK': 'MATT GRZELCYK',
172
+ 'MATEJ BLUMEL': 'MATAJ BLAMEL',
173
+ }
174
+
175
+
176
+ # Multiple name mappings (for .isin() checks)
177
+ NAME_CORRECTIONS_MULTI = {
178
+ "BJ CROMBEEN": "B.J. CROMBEEN",
179
+ "B.J CROMBEEN": "B.J. CROMBEEN",
180
+ "BRANDON CROMBEEN": "B.J. CROMBEEN",
181
+ "B J CROMBEEN": "B.J. CROMBEEN",
182
+ "DAN CLEARY": "DANIEL CLEARY",
183
+ "DANNY CLEARY": "DANIEL CLEARY",
184
+ "MICHAËL BOURNIVAL": "MICHAEL BOURNIVAL",
185
+ "MICHAÃ\x8bL BOURNIVAL": "MICHAEL BOURNIVAL",
186
+ "J P DUMONT": "J-P DUMONT",
187
+ "JEAN-PIERRE DUMONT": "J-P DUMONT",
188
+ "P. J. AXELSSON": "P.J. AXELSSON",
189
+ "PER JOHAN AXELSSON": "P.J. AXELSSON",
190
+ "PK SUBBAN": "P.K. SUBBAN",
191
+ "P.K SUBBAN": "P.K. SUBBAN",
192
+ "PIERRE PARENTEAU": "P.A. PARENTEAU",
193
+ "PIERRE-ALEX PARENTEAU": "P.A. PARENTEAU",
194
+ "PIERRE-ALEXANDRE PARENTEAU": "P.A. PARENTEAU",
195
+ "PA PARENTEAU": "P.A. PARENTEAU",
196
+ "P.A PARENTEAU": "P.A. PARENTEAU",
197
+ "P-A PARENTEAU": "P.A. PARENTEAU",
198
+ "TJ OSHIE": "T.J. OSHIE",
199
+ "T.J OSHIE": "T.J. OSHIE",
200
+ "J.F. BERUBE": "J-F BERUBE",
201
+ "JEAN-FRANCOIS BERUBE": "J-F BERUBE",
202
+ 'GUSTAV LINDSTRAM': 'GUSTAV LINDSTROM',
203
+ 'JESSE YLANEN': 'JESSE YLONEN'
204
+ }
205
+
206
+ # Specific name corrections (matching ESPN function)
207
+ ESPNNAME_CORRECTIONS = {
208
+ 'J T COMPHER': 'J.T. COMPHER',
209
+ 'J T MILLER': 'J.T. MILLER',
210
+ 'T J OSHIE': 'T.J. OSHIE',
211
+ 'ALEXIS LAFRENI RE': 'ALEXIS LAFRENIERE',
212
+ 'T.J. BRODIE': 'TJ BRODIE',
213
+ 'MATTHEW IRWIN': 'MATT IRWIN',
214
+ 'STEVE KAMPFER': 'STEVEN KAMPFER',
215
+ 'JEFFREY TRUCHON-VIEL': 'JEFFREY VIEL',
216
+ 'ZACHARY JONES': 'ZAC JONES',
217
+ 'MATHEW DUMBA': 'MATT DUMBA',
218
+ 'JOSHUA MORRISSEY': 'JOSH MORRISSEY',
219
+ 'P K SUBBAN': 'P.K. SUBBAN',
220
+ 'EGOR SHARANGOVICH': 'YEGOR SHARANGOVICH',
221
+ 'MAXIME COMTOIS': 'MAX COMTOIS',
222
+ 'NICHOLAS CAAMANO': 'NICK CAAMANO',
223
+ 'DANIEL CARCILLO': 'DAN CARCILLO',
224
+ 'ALEXANDER OVECHKIN': 'ALEX OVECHKIN',
225
+ 'MICHAEL CAMMALLERI': 'MIKE CAMMALLERI',
226
+ 'DAVE STECKEL': 'DAVID STECKEL',
227
+ 'JIM DOWD': 'JAMES DOWD',
228
+ 'MAXIME TALBOT': 'MAX TALBOT',
229
+ 'MIKE ZIGOMANIS': 'MICHAEL ZIGOMANIS',
230
+ 'VINNY PROSPAL': 'VACLAV PROSPAL',
231
+ 'MIKE YORK': 'MICHAEL YORK',
232
+ 'JACOB DOWELL': 'JAKE DOWELL',
233
+ 'MICHAEL RUPP': 'MIKE RUPP',
234
+ 'ALEXEI KOVALEV': 'ALEX KOVALEV',
235
+ 'SLAVA KOZLOV': 'VYACHESLAV KOZLOV',
236
+ 'JEFF HAMILTON': 'JEFFREY HAMILTON',
237
+ 'JOHNNY POHL': 'JOHN POHL',
238
+ 'DANIEL GIRARDI': 'DAN GIRARDI',
239
+ 'NIKOLAI ZHERDEV': 'NIKOLAY ZHERDEV',
240
+ 'J.P. DUMONT': 'J-P DUMONT',
241
+ 'DWAYNE KING': 'DJ KING',
242
+ 'JOHN ODUYA': 'JOHNNY ODUYA',
243
+ 'ROBERT SCUDERI': 'ROB SCUDERI',
244
+ 'DOUG MURRAY': 'DOUGLAS MURRAY',
245
+ 'VACLAV PROSPAL': 'VINNY PROSPAL',
246
+ 'RICH PEVERLY': 'RICH PEVERLEY',
247
+ 'JANIS MOSER': 'J.J. MOSER',
248
+ 'NICHOLAS PAUL': 'NICK PAUL',
249
+ 'JACOB MIDDLETON': 'JAKE MIDDLETON',
250
+ 'TOMMY NOVAK': 'THOMAS NOVAK',
251
+ 'JOHHNY BEECHER': 'JOHN BEECHER',
252
+ 'ALEXANDER BARKOV': 'ALEKSANDER BARKOV',
253
+ 'JOSHUA NORRIS': 'JOSH NORRIS',
254
+ 'P.O JOSEPH': 'PIERRE-OLIVIER JOSEPH',
255
+ 'MIKEY EYSSIMONT': 'MICHAEL EYSSIMONT',
256
+ 'MATAJ BLAMEL': 'MATAJ BLAMEL',
257
+ 'VITTORIO MANCINI': 'VICTOR MANCINI',
258
+ 'JOSHUA MAHURA': 'JOSH MAHURA',
259
+ 'JOSEPH VELENO': 'JOE VELENO',
260
+ 'JOSHUA BROWN': 'JOSH BROWN',
261
+ 'JAKE LUCCHINI': 'JACOB LUCCHINI',
262
+ 'EMIL LILLEBERG': 'EMIL MARTINSEN LILLEBERG',
263
+ 'CAMERON ATKINSON': 'CAM ATKINSON',
264
+ 'JURAJ SLAFKOVSKA': 'JURAJ SLAFKOVSKY',
265
+ 'MARTIN FEHARVARY': 'MARTIN FEHERVARY',
266
+ 'JOHN (JACK) ROSLOVIC': 'JACK ROSLOVIC',
267
+ 'ANTHONY-JOHN (AJ) GREER': 'A.J. GREER',
268
+ }
269
+
270
+ # Merge multi into main dict
271
+ NAME_CORRECTIONS.update(NAME_CORRECTIONS_MULTI)
272
+
273
+ NAME_CORRECTIONS.update(ESPNNAME_CORRECTIONS)
274
+
275
+ def normalize_player_name(name):
276
+ """Apply the same name normalization as scrape_espn_events"""
277
+ if pd.isna(name) or name == '':
278
+ return name
279
+
280
+ name = str(name).strip()
281
+
282
+ # Remove (A) and (C) designations
283
+ name = re.sub(r' \(A\)$', '', name).strip()
284
+ name = re.sub(r' \(C\)$', '', name).strip()
285
+
286
+ # Normalize unicode characters
287
+ name = unicodedata.normalize('NFKD', name).encode('ascii', errors='ignore').decode('utf-8')
288
+ name = name.upper()
289
+
290
+ # Clean up multiple spaces BEFORE name corrections lookup
291
+ name = re.sub(r' +', ' ', name)
292
+
293
+ # Common name replacements
294
+ name = name.replace('ALEXANDRE ', 'ALEX ')
295
+ name = name.replace('ALEXANDER ', 'ALEX ')
296
+ name = name.replace('CHRISTOPHER ', 'CHRIS ')
297
+
298
+ name_corrections = NAME_CORRECTIONS
299
+
300
+ name = name_corrections.get(name, name)
301
+
302
+ return name.strip()