hockey-blast-common-lib 0.1.63__py3-none-any.whl → 0.1.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. hockey_blast_common_lib/aggregate_all_stats.py +7 -4
  2. hockey_blast_common_lib/aggregate_goalie_stats.py +301 -107
  3. hockey_blast_common_lib/aggregate_h2h_stats.py +64 -33
  4. hockey_blast_common_lib/aggregate_human_stats.py +565 -280
  5. hockey_blast_common_lib/aggregate_referee_stats.py +286 -135
  6. hockey_blast_common_lib/aggregate_s2s_stats.py +85 -25
  7. hockey_blast_common_lib/aggregate_scorekeeper_stats.py +228 -113
  8. hockey_blast_common_lib/aggregate_skater_stats.py +561 -238
  9. hockey_blast_common_lib/assign_skater_skill.py +21 -11
  10. hockey_blast_common_lib/db_connection.py +59 -8
  11. hockey_blast_common_lib/embedding_utils.py +309 -0
  12. hockey_blast_common_lib/h2h_models.py +150 -56
  13. hockey_blast_common_lib/models.py +305 -150
  14. hockey_blast_common_lib/options.py +30 -15
  15. hockey_blast_common_lib/progress_utils.py +21 -13
  16. hockey_blast_common_lib/skills_in_divisions.py +170 -33
  17. hockey_blast_common_lib/skills_propagation.py +164 -70
  18. hockey_blast_common_lib/stats_models.py +489 -245
  19. hockey_blast_common_lib/stats_utils.py +6 -3
  20. hockey_blast_common_lib/utils.py +89 -25
  21. hockey_blast_common_lib/wsgi.py +7 -5
  22. {hockey_blast_common_lib-0.1.63.dist-info → hockey_blast_common_lib-0.1.64.dist-info}/METADATA +1 -1
  23. hockey_blast_common_lib-0.1.64.dist-info/RECORD +29 -0
  24. hockey_blast_common_lib-0.1.63.dist-info/RECORD +0 -28
  25. {hockey_blast_common_lib-0.1.63.dist-info → hockey_blast_common_lib-0.1.64.dist-info}/WHEEL +0 -0
  26. {hockey_blast_common_lib-0.1.63.dist-info → hockey_blast_common_lib-0.1.64.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,23 @@
1
- import sys
2
1
  import os
2
+ import sys
3
3
  from collections import defaultdict
4
+
4
5
  import numpy as np
5
6
 
6
7
  # Add the project root directory to the Python path
7
8
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
9
 
9
- from hockey_blast_common_lib.models import Level, Division
10
- from hockey_blast_common_lib.stats_models import LevelsGraphEdge, LevelStatsSkater, SkillValuePPGRatio
10
+ from sqlalchemy.exc import IntegrityError
11
+
11
12
  from hockey_blast_common_lib.db_connection import create_session
13
+ from hockey_blast_common_lib.models import Division, Level
12
14
  from hockey_blast_common_lib.progress_utils import create_progress_tracker
13
- from sqlalchemy import func
14
- from sqlalchemy.exc import IntegrityError
15
+ from hockey_blast_common_lib.stats_models import (
16
+ LevelsGraphEdge,
17
+ LevelStatsSkater,
18
+ SkillValuePPGRatio,
19
+ )
15
20
 
16
- import numpy as np
17
21
 
18
22
  class Config:
19
23
  MIN_GAMES_PLAYED_FOR_EDGE = 10
@@ -42,6 +46,7 @@ class Config:
42
46
  modified_z_score = 0.6745 * diff / med_abs_deviation
43
47
  return data[modified_z_score < m]
44
48
 
49
+
45
50
  def reset_skill_values_in_divisions():
46
51
  session = create_session("boss")
47
52
 
@@ -52,14 +57,18 @@ def reset_skill_values_in_divisions():
52
57
  # Look up the Skill table using the level from Division
53
58
  div_level = division.level
54
59
  # Query to find the matching Skill
55
- level = session.query(Level).filter(Level.org_id == division.org_id, Level.level_name == div_level).one_or_none()
60
+ level = (
61
+ session.query(Level)
62
+ .filter(Level.org_id == division.org_id, Level.level_name == div_level)
63
+ .one_or_none()
64
+ )
56
65
 
57
66
  if not level:
58
67
  # If no match found, check each alternative name individually
59
68
  skills = session.query(Level).filter(Level.org_id == division.org_id).all()
60
69
  for s in skills:
61
70
  if s.level_alternative_name: # Check if not None
62
- alternative_names = s.level_alternative_name.split(',')
71
+ alternative_names = s.level_alternative_name.split(",")
63
72
  if div_level in alternative_names:
64
73
  level = s
65
74
  break
@@ -74,11 +83,14 @@ def reset_skill_values_in_divisions():
74
83
  level.skill_value = -1
75
84
  else:
76
85
  # Check if level already exists with this org_id/level_name combination
77
- existing_level = session.query(Level).filter(
78
- Level.org_id == division.org_id,
79
- Level.level_name == division.level
80
- ).first()
81
-
86
+ existing_level = (
87
+ session.query(Level)
88
+ .filter(
89
+ Level.org_id == division.org_id, Level.level_name == division.level
90
+ )
91
+ .first()
92
+ )
93
+
82
94
  if existing_level:
83
95
  # Use existing level
84
96
  division.level_id = existing_level.id
@@ -89,9 +101,9 @@ def reset_skill_values_in_divisions():
89
101
  org_id=division.org_id,
90
102
  skill_value=-1,
91
103
  level_name=division.level,
92
- level_alternative_name='',
104
+ level_alternative_name="",
93
105
  is_seed=False,
94
- skill_propagation_sequence=-1
106
+ skill_propagation_sequence=-1,
95
107
  )
96
108
  session.add(new_level)
97
109
  try:
@@ -101,20 +113,31 @@ def reset_skill_values_in_divisions():
101
113
  except IntegrityError:
102
114
  session.rollback()
103
115
  # Another process created this level, query for it
104
- existing_level = session.query(Level).filter(
105
- Level.org_id == division.org_id,
106
- Level.level_name == division.level
107
- ).first()
116
+ existing_level = (
117
+ session.query(Level)
118
+ .filter(
119
+ Level.org_id == division.org_id,
120
+ Level.level_name == division.level,
121
+ )
122
+ .first()
123
+ )
108
124
  if existing_level:
109
125
  division.level_id = existing_level.id
110
- print(f"Race condition resolved - using existing Level for Division {division.level}")
126
+ print(
127
+ f"Race condition resolved - using existing Level for Division {division.level}"
128
+ )
111
129
  else:
112
- raise RuntimeError(f"Unable to create or find level: {division.level} for org_id: {division.org_id}")
130
+ raise RuntimeError(
131
+ f"Unable to create or find level: {division.level} for org_id: {division.org_id}"
132
+ )
113
133
 
114
134
  # Commit the changes to the Division
115
135
  session.commit()
116
136
 
117
- print("Level values and propagation sequences have been populated into the Division table.")
137
+ print(
138
+ "Level values and propagation sequences have been populated into the Division table."
139
+ )
140
+
118
141
 
119
142
  def build_levels_graph_edges():
120
143
  # Creates unique edges from levelA to levelB (there is no reverse edge levelB to levelA)
@@ -131,10 +154,13 @@ def build_levels_graph_edges():
131
154
  level_human_stats = defaultdict(lambda: defaultdict(dict))
132
155
 
133
156
  for stat in level_stats:
134
- if stat.games_played >= Config.MIN_GAMES_PLAYED_FOR_EDGE and stat.points_per_game >= Config.MIN_PPG_FOR_EDGE:
157
+ if (
158
+ stat.games_played >= Config.MIN_GAMES_PLAYED_FOR_EDGE
159
+ and stat.points_per_game >= Config.MIN_PPG_FOR_EDGE
160
+ ):
135
161
  level_human_stats[stat.level_id][stat.human_id] = {
136
- 'games_played': stat.games_played,
137
- 'points_per_game': stat.points_per_game
162
+ "games_played": stat.games_played,
163
+ "points_per_game": stat.points_per_game,
138
164
  }
139
165
 
140
166
  # Dictionary to store edges
@@ -142,8 +168,11 @@ def build_levels_graph_edges():
142
168
 
143
169
  # Build edges - batch load all levels first for performance
144
170
  all_level_ids = list(level_human_stats.keys())
145
- levels_dict = {level.id: level for level in session.query(Level).filter(Level.id.in_(all_level_ids)).all()}
146
-
171
+ levels_dict = {
172
+ level.id: level
173
+ for level in session.query(Level).filter(Level.id.in_(all_level_ids)).all()
174
+ }
175
+
147
176
  total_levels = len(level_human_stats)
148
177
  progress = create_progress_tracker(total_levels, "Building level graph edges")
149
178
  processed_levels = 0
@@ -167,10 +196,10 @@ def build_levels_graph_edges():
167
196
  # if from_level.id == 223 and to_level.id == 219: #216
168
197
  # print(f"Debug: From Level ID: {from_level.id}, To Level ID: {to_level.id}")
169
198
  for human_id in common_humans:
170
- from_ppg = from_humans[human_id]['points_per_game']
171
- to_ppg = to_humans[human_id]['points_per_game']
172
- from_games = from_humans[human_id]['games_played']
173
- to_games = to_humans[human_id]['games_played']
199
+ from_ppg = from_humans[human_id]["points_per_game"]
200
+ to_ppg = to_humans[human_id]["points_per_game"]
201
+ from_games = from_humans[human_id]["games_played"]
202
+ to_games = to_humans[human_id]["games_played"]
174
203
  min_games = min(from_games, to_games)
175
204
  n_games += min_games
176
205
 
@@ -199,7 +228,7 @@ def build_levels_graph_edges():
199
228
  to_level_id=to_level_id,
200
229
  n_connections=n_connections,
201
230
  ppg_ratio=avg_ppg_ratio,
202
- n_games=n_games # Store the number of games
231
+ n_games=n_games, # Store the number of games
203
232
  )
204
233
  edges[(from_level_id, to_level_id)] = edge
205
234
 
@@ -213,9 +242,10 @@ def build_levels_graph_edges():
213
242
 
214
243
  print("\nLevels graph edges have been populated into the database.")
215
244
 
245
+
216
246
  def propagate_skill_levels(propagation_sequence):
217
- min_skill_value = float('inf')
218
- max_skill_value = float('-inf')
247
+ min_skill_value = float("inf")
248
+ max_skill_value = float("-inf")
219
249
 
220
250
  session = create_session("boss")
221
251
 
@@ -225,7 +255,9 @@ def propagate_skill_levels(propagation_sequence):
225
255
  session.commit()
226
256
 
227
257
  # Build and save the correlation data
228
- levels = session.query(Level).filter(Level.skill_propagation_sequence == 0).all()
258
+ levels = (
259
+ session.query(Level).filter(Level.skill_propagation_sequence == 0).all()
260
+ )
229
261
  level_ids = {level.id for level in levels}
230
262
  correlation_data = defaultdict(list)
231
263
 
@@ -233,10 +265,14 @@ def propagate_skill_levels(propagation_sequence):
233
265
  if level.skill_value == -1:
234
266
  continue
235
267
 
236
- edges = session.query(LevelsGraphEdge).filter(
237
- (LevelsGraphEdge.from_level_id == level.id) |
238
- (LevelsGraphEdge.to_level_id == level.id)
239
- ).all()
268
+ edges = (
269
+ session.query(LevelsGraphEdge)
270
+ .filter(
271
+ (LevelsGraphEdge.from_level_id == level.id)
272
+ | (LevelsGraphEdge.to_level_id == level.id)
273
+ )
274
+ .all()
275
+ )
240
276
 
241
277
  for edge in edges:
242
278
  if edge.n_connections < Config.MIN_CONNECTIONS_FOR_CORRELATION:
@@ -252,7 +288,9 @@ def propagate_skill_levels(propagation_sequence):
252
288
  if target_level_id not in level_ids:
253
289
  continue
254
290
 
255
- target_level = session.query(Level).filter_by(id=target_level_id).first()
291
+ target_level = (
292
+ session.query(Level).filter_by(id=target_level_id).first()
293
+ )
256
294
  if target_level:
257
295
  skill_value_from = level.skill_value
258
296
  skill_value_to = target_level.skill_value
@@ -261,10 +299,12 @@ def propagate_skill_levels(propagation_sequence):
261
299
  if skill_value_from == skill_value_to:
262
300
  continue
263
301
 
264
-
265
302
  # Since we go over all levels in the sequence 0, we will see each edge twice
266
303
  # This condition eliminates duplicates
267
- if abs(skill_value_from - skill_value_to) > Config.MAX_SKILL_DIFF_IN_EDGE:
304
+ if (
305
+ abs(skill_value_from - skill_value_to)
306
+ > Config.MAX_SKILL_DIFF_IN_EDGE
307
+ ):
268
308
  continue
269
309
 
270
310
  # Debug prints
@@ -272,7 +312,10 @@ def propagate_skill_levels(propagation_sequence):
272
312
 
273
313
  # Ensure INCREASING SKILL VALUES for the correlation data!
274
314
  if skill_value_from > skill_value_to:
275
- skill_value_from, skill_value_to = skill_value_to, skill_value_from
315
+ skill_value_from, skill_value_to = (
316
+ skill_value_to,
317
+ skill_value_from,
318
+ )
276
319
  ppg_ratio_edge = 1 / ppg_ratio_edge
277
320
 
278
321
  correlation_data[(skill_value_from, skill_value_to)].append(
@@ -282,16 +325,33 @@ def propagate_skill_levels(propagation_sequence):
282
325
  # Save correlation data to the database
283
326
  for (skill_value_from, skill_value_to), ppg_ratios in correlation_data.items():
284
327
  ppg_ratios = [(ppg_ratio, n_games) for ppg_ratio, n_games in ppg_ratios]
285
- ppg_ratios_array = np.array(ppg_ratios, dtype=[('ppg_ratio', float), ('n_games', int)])
286
- ppg_ratios_filtered = Config.discard_outliers(ppg_ratios_array['ppg_ratio'])
328
+ ppg_ratios_array = np.array(
329
+ ppg_ratios, dtype=[("ppg_ratio", float), ("n_games", int)]
330
+ )
331
+ ppg_ratios_filtered = Config.discard_outliers(ppg_ratios_array["ppg_ratio"])
287
332
  if len(ppg_ratios_filtered) > 0:
288
- avg_ppg_ratio = float(sum(ppg_ratio * n_games for ppg_ratio, n_games in ppg_ratios if ppg_ratio in ppg_ratios_filtered) / sum(n_games for ppg_ratio, n_games in ppg_ratios if ppg_ratio in ppg_ratios_filtered))
289
- total_n_games = sum(n_games for ppg_ratio, n_games in ppg_ratios if ppg_ratio in ppg_ratios_filtered)
333
+ avg_ppg_ratio = float(
334
+ sum(
335
+ ppg_ratio * n_games
336
+ for ppg_ratio, n_games in ppg_ratios
337
+ if ppg_ratio in ppg_ratios_filtered
338
+ )
339
+ / sum(
340
+ n_games
341
+ for ppg_ratio, n_games in ppg_ratios
342
+ if ppg_ratio in ppg_ratios_filtered
343
+ )
344
+ )
345
+ total_n_games = sum(
346
+ n_games
347
+ for ppg_ratio, n_games in ppg_ratios
348
+ if ppg_ratio in ppg_ratios_filtered
349
+ )
290
350
  correlation = SkillValuePPGRatio(
291
351
  from_skill_value=skill_value_from,
292
352
  to_skill_value=skill_value_to,
293
353
  ppg_ratio=avg_ppg_ratio,
294
- n_games=total_n_games # Store the sum of games
354
+ n_games=total_n_games, # Store the sum of games
295
355
  )
296
356
  session.add(correlation)
297
357
  session.commit()
@@ -300,14 +360,22 @@ def propagate_skill_levels(propagation_sequence):
300
360
  max_skill_value = max(max_skill_value, skill_value_from, skill_value_to)
301
361
 
302
362
  # Propagate skill levels
303
- levels = session.query(Level).filter(Level.skill_propagation_sequence == propagation_sequence).all()
363
+ levels = (
364
+ session.query(Level)
365
+ .filter(Level.skill_propagation_sequence == propagation_sequence)
366
+ .all()
367
+ )
304
368
  suggested_skill_values = defaultdict(list)
305
369
 
306
370
  for level in levels:
307
- edges = session.query(LevelsGraphEdge).filter(
308
- (LevelsGraphEdge.from_level_id == level.id) |
309
- (LevelsGraphEdge.to_level_id == level.id)
310
- ).all()
371
+ edges = (
372
+ session.query(LevelsGraphEdge)
373
+ .filter(
374
+ (LevelsGraphEdge.from_level_id == level.id)
375
+ | (LevelsGraphEdge.to_level_id == level.id)
376
+ )
377
+ .all()
378
+ )
311
379
 
312
380
  for edge in edges:
313
381
  if edge.n_connections < Config.MIN_CONNECTIONS_FOR_PROPAGATION:
@@ -322,10 +390,14 @@ def propagate_skill_levels(propagation_sequence):
322
390
 
323
391
  target_level = session.query(Level).filter_by(id=target_level_id).first()
324
392
  if target_level and target_level.skill_propagation_sequence == -1:
325
- correlations = session.query(SkillValuePPGRatio).filter(
326
- (SkillValuePPGRatio.from_skill_value <= level.skill_value) &
327
- (SkillValuePPGRatio.to_skill_value >= level.skill_value)
328
- ).all()
393
+ correlations = (
394
+ session.query(SkillValuePPGRatio)
395
+ .filter(
396
+ (SkillValuePPGRatio.from_skill_value <= level.skill_value)
397
+ & (SkillValuePPGRatio.to_skill_value >= level.skill_value)
398
+ )
399
+ .all()
400
+ )
329
401
 
330
402
  if correlations:
331
403
  weighted_skill_values = []
@@ -334,17 +406,24 @@ def propagate_skill_levels(propagation_sequence):
334
406
  # Let's avoid extrapolating from the end of the edge and away from the edge!
335
407
 
336
408
  # Check left side of the edge
337
- if (level.skill_value == correlation.from_skill_value and level.skill_value > min_skill_value):
409
+ if (
410
+ level.skill_value == correlation.from_skill_value
411
+ and level.skill_value > min_skill_value
412
+ ):
338
413
  if ppg_ratio_edge < 1:
339
414
  continue
340
415
  # Check right side of the edge
341
- if (level.skill_value == correlation.to_skill_value and level.skill_value < max_skill_value):
416
+ if (
417
+ level.skill_value == correlation.to_skill_value
418
+ and level.skill_value < max_skill_value
419
+ ):
342
420
  if ppg_ratio_edge > 1:
343
421
  continue
344
422
 
345
-
346
423
  # First confirm which way are we going here
347
- if (ppg_ratio_edge < 1 and correlation.ppg_ratio > 1) or (ppg_ratio_edge > 1 and correlation.ppg_ratio < 1):
424
+ if (ppg_ratio_edge < 1 and correlation.ppg_ratio > 1) or (
425
+ ppg_ratio_edge > 1 and correlation.ppg_ratio < 1
426
+ ):
348
427
  # Reverse the correlation
349
428
  from_skill_value = correlation.to_skill_value
350
429
  to_skill_value = correlation.from_skill_value
@@ -360,12 +439,16 @@ def propagate_skill_levels(propagation_sequence):
360
439
  ppg_ratio_range = 1 / ppg_ratio_range
361
440
  else:
362
441
  ppg_ratio_for_extrapolation = ppg_ratio_edge
363
-
442
+
364
443
  # Interpolate or extrapolate skill value
365
444
  skill_value_range = to_skill_value - from_skill_value
366
- skill_value_diff = (ppg_ratio_for_extrapolation / ppg_ratio_range) * skill_value_range
445
+ skill_value_diff = (
446
+ ppg_ratio_for_extrapolation / ppg_ratio_range
447
+ ) * skill_value_range
367
448
  new_skill_value = level.skill_value + skill_value_diff
368
- weighted_skill_values.append((new_skill_value, correlation.n_games))
449
+ weighted_skill_values.append(
450
+ (new_skill_value, correlation.n_games)
451
+ )
369
452
  # if target_level.id == 229:
370
453
  # print(f"Debug: From Level ID: {level.id}, To Level ID: {target_level.id}")
371
454
  # print(f"Debug: From Skill Value: {level.skill_value} PPG Ratio: {ppg_ratio_for_extrapolation}, PPG Ratio Range: {ppg_ratio_range}")
@@ -374,8 +457,16 @@ def propagate_skill_levels(propagation_sequence):
374
457
 
375
458
  # Calculate weighted average of new skill values
376
459
  total_n_games = sum(n_games for _, n_games in weighted_skill_values)
377
- weighted_avg_skill_value = sum(skill_value * n_games for skill_value, n_games in weighted_skill_values) / total_n_games
378
- suggested_skill_values[target_level_id].append(weighted_avg_skill_value)
460
+ weighted_avg_skill_value = (
461
+ sum(
462
+ skill_value * n_games
463
+ for skill_value, n_games in weighted_skill_values
464
+ )
465
+ / total_n_games
466
+ )
467
+ suggested_skill_values[target_level_id].append(
468
+ weighted_avg_skill_value
469
+ )
379
470
 
380
471
  # Update skill values for target levels
381
472
  session.flush() # Ensure all previous changes are flushed before updates
@@ -387,10 +478,12 @@ def propagate_skill_levels(propagation_sequence):
387
478
  if avg_skill_value < min_skill_value:
388
479
  avg_skill_value = min_skill_value - 0.01
389
480
  try:
390
- session.query(Level).filter_by(id=target_level_id).update({
391
- 'skill_value': avg_skill_value,
392
- 'skill_propagation_sequence': propagation_sequence + 1
393
- })
481
+ session.query(Level).filter_by(id=target_level_id).update(
482
+ {
483
+ "skill_value": avg_skill_value,
484
+ "skill_propagation_sequence": propagation_sequence + 1,
485
+ }
486
+ )
394
487
  session.flush() # Flush each update individually
395
488
  except Exception as e:
396
489
  print(f"Error updating level {target_level_id}: {e}")
@@ -400,6 +493,7 @@ def propagate_skill_levels(propagation_sequence):
400
493
 
401
494
  print(f"Skill levels have been propagated for sequence {propagation_sequence}.")
402
495
 
496
+
403
497
  if __name__ == "__main__":
404
498
  reset_skill_values_in_divisions()
405
499
  build_levels_graph_edges()