hockey-blast-common-lib 0.1.63__py3-none-any.whl → 0.1.65__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. hockey_blast_common_lib/aggregate_all_stats.py +7 -4
  2. hockey_blast_common_lib/aggregate_goalie_stats.py +301 -107
  3. hockey_blast_common_lib/aggregate_h2h_stats.py +64 -33
  4. hockey_blast_common_lib/aggregate_human_stats.py +565 -280
  5. hockey_blast_common_lib/aggregate_referee_stats.py +286 -135
  6. hockey_blast_common_lib/aggregate_s2s_stats.py +85 -25
  7. hockey_blast_common_lib/aggregate_scorekeeper_stats.py +228 -113
  8. hockey_blast_common_lib/aggregate_skater_stats.py +561 -238
  9. hockey_blast_common_lib/assign_skater_skill.py +21 -11
  10. hockey_blast_common_lib/db_connection.py +59 -8
  11. hockey_blast_common_lib/embedding_utils.py +309 -0
  12. hockey_blast_common_lib/h2h_models.py +150 -56
  13. hockey_blast_common_lib/hockey_blast_sample_backup.sql.gz +0 -0
  14. hockey_blast_common_lib/models.py +305 -150
  15. hockey_blast_common_lib/options.py +30 -15
  16. hockey_blast_common_lib/progress_utils.py +21 -13
  17. hockey_blast_common_lib/skills_in_divisions.py +170 -33
  18. hockey_blast_common_lib/skills_propagation.py +164 -70
  19. hockey_blast_common_lib/stats_models.py +489 -245
  20. hockey_blast_common_lib/stats_utils.py +6 -3
  21. hockey_blast_common_lib/utils.py +91 -25
  22. hockey_blast_common_lib/wsgi.py +7 -5
  23. {hockey_blast_common_lib-0.1.63.dist-info → hockey_blast_common_lib-0.1.65.dist-info}/METADATA +1 -1
  24. hockey_blast_common_lib-0.1.65.dist-info/RECORD +29 -0
  25. hockey_blast_common_lib-0.1.63.dist-info/RECORD +0 -28
  26. {hockey_blast_common_lib-0.1.63.dist-info → hockey_blast_common_lib-0.1.65.dist-info}/WHEEL +0 -0
  27. {hockey_blast_common_lib-0.1.63.dist-info → hockey_blast_common_lib-0.1.65.dist-info}/top_level.txt +0 -0
@@ -1,40 +1,65 @@
1
- import sys, os
1
+ import os
2
+ import sys
2
3
  from datetime import datetime
3
4
 
4
5
  # Add the package directory to the Python path
5
6
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
6
7
 
7
- from hockey_blast_common_lib.models import Game, Goal, Penalty, GameRoster
8
- from hockey_blast_common_lib.h2h_models import SkaterToSkaterStats, SkaterToSkaterStatsMeta
9
- from hockey_blast_common_lib.db_connection import create_session
10
- from sqlalchemy.sql import func
11
8
  from sqlalchemy import types
9
+ from sqlalchemy.sql import func
10
+
11
+ from hockey_blast_common_lib.db_connection import create_session
12
+ from hockey_blast_common_lib.h2h_models import (
13
+ SkaterToSkaterStats,
14
+ SkaterToSkaterStatsMeta,
15
+ )
16
+ from hockey_blast_common_lib.models import Game, GameRoster, Goal, Penalty
12
17
 
13
18
  # Optional: Limit processing to a specific human_id
14
19
  LIMIT_HUMAN_ID = None
15
20
 
21
+
16
22
  def aggregate_s2s_stats():
17
23
  session = create_session("boss")
18
- meta = session.query(SkaterToSkaterStatsMeta).order_by(SkaterToSkaterStatsMeta.id.desc()).first()
24
+ meta = (
25
+ session.query(SkaterToSkaterStatsMeta)
26
+ .order_by(SkaterToSkaterStatsMeta.id.desc())
27
+ .first()
28
+ )
19
29
  s2s_stats_dict = {} # (skater1_id, skater2_id) -> SkaterToSkaterStats instance
20
30
 
21
- if meta is None or meta.last_run_timestamp is None or meta.last_processed_game_id is None:
31
+ if (
32
+ meta is None
33
+ or meta.last_run_timestamp is None
34
+ or meta.last_processed_game_id is None
35
+ ):
22
36
  # Full run: delete all existing stats and process all games
23
37
  session.query(SkaterToSkaterStats).delete()
24
38
  session.commit()
25
39
  games_query = session.query(Game).order_by(Game.date, Game.time, Game.id)
26
- print("No previous run found, deleted all existing Skater-to-Skater stats, processing all games...")
40
+ print(
41
+ "No previous run found, deleted all existing Skater-to-Skater stats, processing all games..."
42
+ )
27
43
  else:
28
44
  # Incremental: only process games after last processed
29
45
  for stat in session.query(SkaterToSkaterStats).all():
30
46
  s2s_stats_dict[(stat.skater1_id, stat.skater2_id)] = stat
31
- last_game = session.query(Game).filter(Game.id == meta.last_processed_game_id).first()
47
+ last_game = (
48
+ session.query(Game).filter(Game.id == meta.last_processed_game_id).first()
49
+ )
32
50
  if last_game:
33
51
  last_dt = datetime.combine(last_game.date, last_game.time)
34
- games_query = session.query(Game).filter(
35
- func.cast(func.concat(Game.date, ' ', Game.time), types.TIMESTAMP()) > last_dt
36
- ).order_by(Game.date, Game.time, Game.id)
37
- print(f"Resuming from game after id {meta.last_processed_game_id} ({last_dt})...")
52
+ games_query = (
53
+ session.query(Game)
54
+ .filter(
55
+ func.cast(func.concat(Game.date, " ", Game.time), types.TIMESTAMP())
56
+ > last_dt
57
+ )
58
+ .order_by(Game.date, Game.time, Game.id)
59
+ )
60
+ print(
61
+ f"Resuming from game after id {meta.last_processed_game_id} ({last_dt})..."
62
+ )
38
63
  else:
39
64
  games_query = session.query(Game).order_by(Game.date, Game.time, Game.id)
40
65
  print("Previous game id not found, processing all games...")
@@ -46,16 +71,40 @@ def aggregate_s2s_stats():
46
71
 
47
72
  for game in games_query:
48
73
  # Separate skaters into home and away rosters (exclude goalies)
49
- home_skaters = [entry.human_id for entry in session.query(GameRoster).filter(GameRoster.game_id == game.id, GameRoster.team_id == game.home_team_id, ~GameRoster.role.ilike('g')).all()]
50
- away_skaters = [entry.human_id for entry in session.query(GameRoster).filter(GameRoster.game_id == game.id, GameRoster.team_id == game.visitor_team_id, ~GameRoster.role.ilike('g')).all()]
51
-
52
- if LIMIT_HUMAN_ID is not None and LIMIT_HUMAN_ID not in home_skaters + away_skaters:
74
+ home_skaters = [
75
+ entry.human_id
76
+ for entry in session.query(GameRoster)
77
+ .filter(
78
+ GameRoster.game_id == game.id,
79
+ GameRoster.team_id == game.home_team_id,
80
+ ~GameRoster.role.ilike("g"),
81
+ )
82
+ .all()
83
+ ]
84
+ away_skaters = [
85
+ entry.human_id
86
+ for entry in session.query(GameRoster)
87
+ .filter(
88
+ GameRoster.game_id == game.id,
89
+ GameRoster.team_id == game.visitor_team_id,
90
+ ~GameRoster.role.ilike("g"),
91
+ )
92
+ .all()
93
+ ]
94
+
95
+ if (
96
+ LIMIT_HUMAN_ID is not None
97
+ and LIMIT_HUMAN_ID not in home_skaters + away_skaters
98
+ ):
53
99
  continue
54
100
 
55
101
  # Create pairs of skaters from different rosters
56
102
  for h_skater in home_skaters:
57
103
  for a_skater in away_skaters:
58
- if LIMIT_HUMAN_ID is not None and LIMIT_HUMAN_ID not in [h_skater, a_skater]:
104
+ if LIMIT_HUMAN_ID is not None and LIMIT_HUMAN_ID not in [
105
+ h_skater,
106
+ a_skater,
107
+ ]:
59
108
  continue
60
109
 
61
110
  s1, s2 = sorted([h_skater, a_skater])
@@ -74,7 +123,7 @@ def aggregate_s2s_stats():
74
123
  skater1_assists_against_skater2=0,
75
124
  skater2_assists_against_skater1=0,
76
125
  skater1_penalties_against_skater2=0,
77
- skater2_penalties_against_skater1=0
126
+ skater2_penalties_against_skater1=0,
78
127
  )
79
128
  s2s_stats_dict[key] = s2s
80
129
 
@@ -100,7 +149,9 @@ def aggregate_s2s_stats():
100
149
  s2s.skater2_assists_against_skater1 += 1
101
150
 
102
151
  # Penalties
103
- penalties_stats = session.query(Penalty).filter(Penalty.game_id == game.id).all()
152
+ penalties_stats = (
153
+ session.query(Penalty).filter(Penalty.game_id == game.id).all()
154
+ )
104
155
  for penalty in penalties_stats:
105
156
  if penalty.penalized_player_id == s1:
106
157
  s2s.skater1_penalties_against_skater2 += 1
@@ -110,7 +161,10 @@ def aggregate_s2s_stats():
110
161
  latest_game_id = game.id
111
162
  processed += 1
112
163
  if processed % 10 == 0 or processed == total_games:
113
- print(f"\rProcessed {processed}/{total_games} games ({(processed/total_games)*100:.2f}%)", end="")
164
+ print(
165
+ f"\rProcessed {processed}/{total_games} games ({(processed/total_games)*100:.2f}%)",
166
+ end="",
167
+ )
114
168
  sys.stdout.flush()
115
169
 
116
170
  # Commit all stats at once
@@ -121,13 +175,13 @@ def aggregate_s2s_stats():
121
175
 
122
176
  # Save/update meta
123
177
  meta = SkaterToSkaterStatsMeta(
124
- last_run_timestamp=datetime.utcnow(),
125
- last_processed_game_id=latest_game_id
178
+ last_run_timestamp=datetime.utcnow(), last_processed_game_id=latest_game_id
126
179
  )
127
180
  session.add(meta)
128
181
  session.commit()
129
182
  print("Skater-to-Skater aggregation complete.")
130
183
 
184
+
131
185
  # --- Helper functions for win/loss/tie ---
132
186
  def _is_win(game, skater_id, team_id):
133
187
  if team_id == game.home_team_id:
@@ -136,8 +190,14 @@ def _is_win(game, skater_id, team_id):
136
190
  return (game.visitor_final_score or 0) > (game.home_final_score or 0)
137
191
  return False
138
192
 
193
+
139
194
  def _is_tie(game):
140
- return (game.home_final_score is not None and game.visitor_final_score is not None and game.home_final_score == game.visitor_final_score)
195
+ return (
196
+ game.home_final_score is not None
197
+ and game.visitor_final_score is not None
198
+ and game.home_final_score == game.visitor_final_score
199
+ )
200
+
141
201
 
142
202
  if __name__ == "__main__":
143
- aggregate_s2s_stats()
203
+ aggregate_s2s_stats()
@@ -1,21 +1,30 @@
1
- import sys, os
1
+ import os
2
+ import sys
2
3
 
3
4
  # Add the package directory to the Python path
4
5
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
5
6
 
6
- from datetime import datetime, timedelta
7
+
7
8
  import sqlalchemy
9
+ from sqlalchemy.sql import func
8
10
 
9
- from hockey_blast_common_lib.models import Game, ScorekeeperSaveQuality
10
- from hockey_blast_common_lib.stats_models import OrgStatsScorekeeper, OrgStatsWeeklyScorekeeper, OrgStatsDailyScorekeeper
11
11
  from hockey_blast_common_lib.db_connection import create_session
12
- from sqlalchemy.sql import func, case
13
- from hockey_blast_common_lib.options import parse_args, MIN_GAMES_FOR_ORG_STATS, MIN_GAMES_FOR_DIVISION_STATS
14
- from hockey_blast_common_lib.utils import get_org_id_from_alias, get_non_human_ids
15
- from hockey_blast_common_lib.utils import assign_ranks
16
- from hockey_blast_common_lib.utils import get_start_datetime
17
- from hockey_blast_common_lib.stats_utils import ALL_ORGS_ID
12
+ from hockey_blast_common_lib.models import Game, ScorekeeperSaveQuality
13
+ from hockey_blast_common_lib.options import (
14
+ MIN_GAMES_FOR_ORG_STATS,
15
+ )
18
16
  from hockey_blast_common_lib.progress_utils import create_progress_tracker
17
+ from hockey_blast_common_lib.stats_models import (
18
+ OrgStatsDailyScorekeeper,
19
+ OrgStatsScorekeeper,
20
+ OrgStatsWeeklyScorekeeper,
21
+ )
22
+ from hockey_blast_common_lib.stats_utils import ALL_ORGS_ID
23
+ from hockey_blast_common_lib.utils import (
24
+ assign_ranks,
25
+ get_non_human_ids,
26
+ get_start_datetime,
27
+ )
19
28
 
20
29
  # Import status constants for game filtering
21
30
  FINAL_STATUS = "Final"
@@ -23,7 +32,10 @@ FINAL_SO_STATUS = "Final(SO)"
23
32
  FORFEIT_STATUS = "FORFEIT"
24
33
  NOEVENTS_STATUS = "NOEVENTS"
25
34
 
26
- def calculate_quality_score(avg_max_saves_5sec, avg_max_saves_20sec, peak_max_saves_5sec, peak_max_saves_20sec):
35
+
36
+ def calculate_quality_score(
37
+ avg_max_saves_5sec, avg_max_saves_20sec, peak_max_saves_5sec, peak_max_saves_20sec
38
+ ):
27
39
  """
28
40
  Calculate a quality score based on excessive clicking patterns.
29
41
  Lower scores are better (less problematic clicking).
@@ -43,35 +55,40 @@ def calculate_quality_score(avg_max_saves_5sec, avg_max_saves_20sec, peak_max_sa
43
55
  return 0.0
44
56
 
45
57
  # Weight factors (can be tuned based on analysis)
46
- avg_5sec_weight = 2.0 # Average clicking in 5sec windows
47
- avg_20sec_weight = 1.0 # Average clicking in 20sec windows
48
- peak_5sec_weight = 5.0 # Peak 5sec incidents are heavily penalized
58
+ avg_5sec_weight = 2.0 # Average clicking in 5sec windows
59
+ avg_20sec_weight = 1.0 # Average clicking in 20sec windows
60
+ peak_5sec_weight = 5.0 # Peak 5sec incidents are heavily penalized
49
61
  peak_20sec_weight = 3.0 # Peak 20sec incidents are moderately penalized
50
62
 
51
63
  score = (
52
- (avg_max_saves_5sec * avg_5sec_weight) +
53
- (avg_max_saves_20sec * avg_20sec_weight) +
54
- (peak_max_saves_5sec * peak_5sec_weight) +
55
- (peak_max_saves_20sec * peak_20sec_weight)
64
+ (avg_max_saves_5sec * avg_5sec_weight)
65
+ + (avg_max_saves_20sec * avg_20sec_weight)
66
+ + (peak_max_saves_5sec * peak_5sec_weight)
67
+ + (peak_max_saves_20sec * peak_20sec_weight)
56
68
  )
57
69
 
58
70
  return round(score, 2)
59
71
 
60
- def aggregate_scorekeeper_stats(session, aggregation_type, aggregation_id, aggregation_window=None):
72
+
73
+ def aggregate_scorekeeper_stats(
74
+ session, aggregation_type, aggregation_id, aggregation_window=None
75
+ ):
61
76
  # Only process scorekeeper stats for ALL_ORGS_ID - skip individual organizations
62
77
  # This prevents redundant processing when upstream logic calls with all organization IDs
63
- if aggregation_type == 'org' and aggregation_id != ALL_ORGS_ID:
78
+ if aggregation_type == "org" and aggregation_id != ALL_ORGS_ID:
64
79
  return # Do nothing for individual organization IDs
65
80
 
66
81
  human_ids_to_filter = get_non_human_ids(session)
67
82
 
68
- if aggregation_type == 'org':
83
+ if aggregation_type == "org":
69
84
  aggregation_name = "All Orgs"
70
85
  filter_condition = sqlalchemy.true() # No filter for organization
71
- print(f"Aggregating scorekeeper stats for {aggregation_name} with window {aggregation_window}...")
72
- if aggregation_window == 'Daily':
86
+ print(
87
+ f"Aggregating scorekeeper stats for {aggregation_name} with window {aggregation_window}..."
88
+ )
89
+ if aggregation_window == "Daily":
73
90
  StatsModel = OrgStatsDailyScorekeeper
74
- elif aggregation_window == 'Weekly':
91
+ elif aggregation_window == "Weekly":
75
92
  StatsModel = OrgStatsWeeklyScorekeeper
76
93
  else:
77
94
  StatsModel = OrgStatsScorekeeper
@@ -80,42 +97,74 @@ def aggregate_scorekeeper_stats(session, aggregation_type, aggregation_id, aggre
80
97
  raise ValueError("Invalid aggregation type")
81
98
 
82
99
  # Delete existing items from the stats table
83
- session.query(StatsModel).filter(StatsModel.aggregation_id == aggregation_id).delete()
100
+ session.query(StatsModel).filter(
101
+ StatsModel.aggregation_id == aggregation_id
102
+ ).delete()
84
103
  session.commit()
85
104
 
86
105
  # Apply aggregation window filter
87
106
  if aggregation_window:
88
- last_game_datetime_str = session.query(func.max(func.concat(Game.date, ' ', Game.time))).filter(filter_condition, Game.status.like('Final%')).scalar()
107
+ last_game_datetime_str = (
108
+ session.query(func.max(func.concat(Game.date, " ", Game.time)))
109
+ .filter(filter_condition, Game.status.like("Final%"))
110
+ .scalar()
111
+ )
89
112
  start_datetime = get_start_datetime(last_game_datetime_str, aggregation_window)
90
113
  if start_datetime:
91
- game_window_filter = func.cast(func.concat(Game.date, ' ', Game.time), sqlalchemy.types.TIMESTAMP).between(start_datetime, last_game_datetime_str)
114
+ game_window_filter = func.cast(
115
+ func.concat(Game.date, " ", Game.time), sqlalchemy.types.TIMESTAMP
116
+ ).between(start_datetime, last_game_datetime_str)
92
117
  filter_condition = filter_condition & game_window_filter
93
118
  else:
94
119
  return
95
120
 
96
-
97
121
  # Aggregate scorekeeper quality data for each human
98
122
  # games_participated: Count FINAL, FINAL_SO, FORFEIT, NOEVENTS
99
123
  # games_with_stats: Count only FINAL, FINAL_SO (for per-game averages)
100
124
  # Filter by game status upfront for performance
101
- scorekeeper_quality_stats = session.query(
102
- ScorekeeperSaveQuality.scorekeeper_id.label('human_id'),
103
- func.count(ScorekeeperSaveQuality.game_id).label('games_recorded'),
104
- func.count(ScorekeeperSaveQuality.game_id).label('games_participated'), # Same as games_recorded after filtering
105
- func.count(ScorekeeperSaveQuality.game_id).label('games_with_stats'), # Same as games_recorded after filtering
106
- func.sum(ScorekeeperSaveQuality.total_saves_recorded).label('total_saves_recorded'),
107
- func.avg(ScorekeeperSaveQuality.total_saves_recorded).label('avg_saves_per_game'),
108
- func.avg(ScorekeeperSaveQuality.max_saves_per_5sec).label('avg_max_saves_per_5sec'),
109
- func.avg(ScorekeeperSaveQuality.max_saves_per_20sec).label('avg_max_saves_per_20sec'),
110
- func.max(ScorekeeperSaveQuality.max_saves_per_5sec).label('peak_max_saves_per_5sec'),
111
- func.max(ScorekeeperSaveQuality.max_saves_per_20sec).label('peak_max_saves_per_20sec'),
112
- func.array_agg(ScorekeeperSaveQuality.game_id).label('game_ids')
113
- ).join(Game, Game.id == ScorekeeperSaveQuality.game_id).filter(
114
- Game.status.in_([FINAL_STATUS, FINAL_SO_STATUS, FORFEIT_STATUS, NOEVENTS_STATUS])
125
+ scorekeeper_quality_stats = (
126
+ session.query(
127
+ ScorekeeperSaveQuality.scorekeeper_id.label("human_id"),
128
+ func.count(ScorekeeperSaveQuality.game_id).label("games_recorded"),
129
+ func.count(ScorekeeperSaveQuality.game_id).label(
130
+ "games_participated"
131
+ ), # Same as games_recorded after filtering
132
+ func.count(ScorekeeperSaveQuality.game_id).label(
133
+ "games_with_stats"
134
+ ), # Same as games_recorded after filtering
135
+ func.sum(ScorekeeperSaveQuality.total_saves_recorded).label(
136
+ "total_saves_recorded"
137
+ ),
138
+ func.avg(ScorekeeperSaveQuality.total_saves_recorded).label(
139
+ "avg_saves_per_game"
140
+ ),
141
+ func.avg(ScorekeeperSaveQuality.max_saves_per_5sec).label(
142
+ "avg_max_saves_per_5sec"
143
+ ),
144
+ func.avg(ScorekeeperSaveQuality.max_saves_per_20sec).label(
145
+ "avg_max_saves_per_20sec"
146
+ ),
147
+ func.max(ScorekeeperSaveQuality.max_saves_per_5sec).label(
148
+ "peak_max_saves_per_5sec"
149
+ ),
150
+ func.max(ScorekeeperSaveQuality.max_saves_per_20sec).label(
151
+ "peak_max_saves_per_20sec"
152
+ ),
153
+ func.array_agg(ScorekeeperSaveQuality.game_id).label("game_ids"),
154
+ )
155
+ .join(Game, Game.id == ScorekeeperSaveQuality.game_id)
156
+ .filter(
157
+ Game.status.in_(
158
+ [FINAL_STATUS, FINAL_SO_STATUS, FORFEIT_STATUS, NOEVENTS_STATUS]
159
+ )
160
+ )
115
161
  )
116
162
 
117
-
118
- scorekeeper_quality_stats = scorekeeper_quality_stats.filter(filter_condition).group_by(ScorekeeperSaveQuality.scorekeeper_id).all()
163
+ scorekeeper_quality_stats = (
164
+ scorekeeper_quality_stats.filter(filter_condition)
165
+ .group_by(ScorekeeperSaveQuality.scorekeeper_id)
166
+ .all()
167
+ )
119
168
 
120
169
  # Combine the results
121
170
  stats_dict = {}
@@ -129,55 +178,79 @@ def aggregate_scorekeeper_stats(session, aggregation_type, aggregation_id, aggre
129
178
  stat.avg_max_saves_per_5sec or 0.0,
130
179
  stat.avg_max_saves_per_20sec or 0.0,
131
180
  stat.peak_max_saves_per_5sec or 0,
132
- stat.peak_max_saves_per_20sec or 0
181
+ stat.peak_max_saves_per_20sec or 0,
133
182
  )
134
183
 
135
184
  stats_dict[key] = {
136
- 'games_recorded': stat.games_recorded, # DEPRECATED - for backward compatibility
137
- 'games_participated': stat.games_participated, # Total games: FINAL, FINAL_SO, FORFEIT, NOEVENTS
138
- 'games_with_stats': stat.games_with_stats, # Games with full stats: FINAL, FINAL_SO only
139
- 'sog_given': stat.total_saves_recorded, # Legacy field name mapping
140
- 'sog_per_game': stat.avg_saves_per_game or 0.0, # Legacy field name mapping
141
- 'total_saves_recorded': stat.total_saves_recorded,
142
- 'avg_saves_per_game': stat.avg_saves_per_game or 0.0,
143
- 'avg_max_saves_per_5sec': stat.avg_max_saves_per_5sec or 0.0,
144
- 'avg_max_saves_per_20sec': stat.avg_max_saves_per_20sec or 0.0,
145
- 'peak_max_saves_per_5sec': stat.peak_max_saves_per_5sec or 0,
146
- 'peak_max_saves_per_20sec': stat.peak_max_saves_per_20sec or 0,
147
- 'quality_score': quality_score,
148
- 'game_ids': stat.game_ids,
149
- 'first_game_id': None,
150
- 'last_game_id': None
185
+ "games_recorded": stat.games_recorded, # DEPRECATED - for backward compatibility
186
+ "games_participated": stat.games_participated, # Total games: FINAL, FINAL_SO, FORFEIT, NOEVENTS
187
+ "games_with_stats": stat.games_with_stats, # Games with full stats: FINAL, FINAL_SO only
188
+ "sog_given": stat.total_saves_recorded, # Legacy field name mapping
189
+ "sog_per_game": stat.avg_saves_per_game or 0.0, # Legacy field name mapping
190
+ "total_saves_recorded": stat.total_saves_recorded,
191
+ "avg_saves_per_game": stat.avg_saves_per_game or 0.0,
192
+ "avg_max_saves_per_5sec": stat.avg_max_saves_per_5sec or 0.0,
193
+ "avg_max_saves_per_20sec": stat.avg_max_saves_per_20sec or 0.0,
194
+ "peak_max_saves_per_5sec": stat.peak_max_saves_per_5sec or 0,
195
+ "peak_max_saves_per_20sec": stat.peak_max_saves_per_20sec or 0,
196
+ "quality_score": quality_score,
197
+ "game_ids": stat.game_ids,
198
+ "first_game_id": None,
199
+ "last_game_id": None,
151
200
  }
152
201
 
153
202
  # Filter out entries with games_recorded less than min_games
154
- stats_dict = {key: value for key, value in stats_dict.items() if value['games_recorded'] >= min_games}
203
+ stats_dict = {
204
+ key: value
205
+ for key, value in stats_dict.items()
206
+ if value["games_recorded"] >= min_games
207
+ }
155
208
 
156
209
  # Populate first_game_id and last_game_id
157
210
  for key, stat in stats_dict.items():
158
- all_game_ids = stat['game_ids']
211
+ all_game_ids = stat["game_ids"]
159
212
  if all_game_ids:
160
- first_game = session.query(Game).filter(Game.id.in_(all_game_ids)).order_by(Game.date, Game.time).first()
161
- last_game = session.query(Game).filter(Game.id.in_(all_game_ids)).order_by(Game.date.desc(), Game.time.desc()).first()
162
- stat['first_game_id'] = first_game.id if first_game else None
163
- stat['last_game_id'] = last_game.id if last_game else None
213
+ first_game = (
214
+ session.query(Game)
215
+ .filter(Game.id.in_(all_game_ids))
216
+ .order_by(Game.date, Game.time)
217
+ .first()
218
+ )
219
+ last_game = (
220
+ session.query(Game)
221
+ .filter(Game.id.in_(all_game_ids))
222
+ .order_by(Game.date.desc(), Game.time.desc())
223
+ .first()
224
+ )
225
+ stat["first_game_id"] = first_game.id if first_game else None
226
+ stat["last_game_id"] = last_game.id if last_game else None
164
227
 
165
228
  # Calculate total_in_rank
166
229
  total_in_rank = len(stats_dict)
167
230
 
168
231
  # Assign ranks - note: for quality metrics, lower values are better (reverse_rank=True for avg and peak clicking)
169
- assign_ranks(stats_dict, 'games_recorded')
170
- assign_ranks(stats_dict, 'games_participated') # Rank by total participation
171
- assign_ranks(stats_dict, 'games_with_stats') # Rank by games with full stats
172
- assign_ranks(stats_dict, 'sog_given') # Legacy field
173
- assign_ranks(stats_dict, 'sog_per_game') # Legacy field
174
- assign_ranks(stats_dict, 'total_saves_recorded')
175
- assign_ranks(stats_dict, 'avg_saves_per_game')
176
- assign_ranks(stats_dict, 'avg_max_saves_per_5sec', reverse_rank=True) # Lower is better (less clicking)
177
- assign_ranks(stats_dict, 'avg_max_saves_per_20sec', reverse_rank=True) # Lower is better
178
- assign_ranks(stats_dict, 'peak_max_saves_per_5sec', reverse_rank=True) # Lower is better
179
- assign_ranks(stats_dict, 'peak_max_saves_per_20sec', reverse_rank=True) # Lower is better
180
- assign_ranks(stats_dict, 'quality_score', reverse_rank=True) # Lower is better (less problematic)
232
+ assign_ranks(stats_dict, "games_recorded")
233
+ assign_ranks(stats_dict, "games_participated") # Rank by total participation
234
+ assign_ranks(stats_dict, "games_with_stats") # Rank by games with full stats
235
+ assign_ranks(stats_dict, "sog_given") # Legacy field
236
+ assign_ranks(stats_dict, "sog_per_game") # Legacy field
237
+ assign_ranks(stats_dict, "total_saves_recorded")
238
+ assign_ranks(stats_dict, "avg_saves_per_game")
239
+ assign_ranks(
240
+ stats_dict, "avg_max_saves_per_5sec", reverse_rank=True
241
+ ) # Lower is better (less clicking)
242
+ assign_ranks(
243
+ stats_dict, "avg_max_saves_per_20sec", reverse_rank=True
244
+ ) # Lower is better
245
+ assign_ranks(
246
+ stats_dict, "peak_max_saves_per_5sec", reverse_rank=True
247
+ ) # Lower is better
248
+ assign_ranks(
249
+ stats_dict, "peak_max_saves_per_20sec", reverse_rank=True
250
+ ) # Lower is better
251
+ assign_ranks(
252
+ stats_dict, "quality_score", reverse_rank=True
253
+ ) # Lower is better (less problematic)
181
254
 
182
255
  # Insert aggregated stats into the appropriate table with progress output
183
256
  batch_size = 1000
@@ -186,33 +259,39 @@ def aggregate_scorekeeper_stats(session, aggregation_type, aggregation_id, aggre
186
259
  scorekeeper_stat = StatsModel(
187
260
  aggregation_id=aggregation_id,
188
261
  human_id=human_id,
189
- games_recorded=stat['games_recorded'], # DEPRECATED - for backward compatibility
190
- games_participated=stat['games_participated'], # Total games: FINAL, FINAL_SO, FORFEIT, NOEVENTS
191
- games_participated_rank=stat['games_participated_rank'],
192
- games_with_stats=stat['games_with_stats'], # Games with full stats: FINAL, FINAL_SO only
193
- games_with_stats_rank=stat['games_with_stats_rank'],
194
- sog_given=stat['sog_given'], # Legacy field mapping
195
- sog_per_game=stat['sog_per_game'], # Legacy field mapping
196
- total_saves_recorded=stat['total_saves_recorded'],
197
- total_saves_recorded_rank=stat['total_saves_recorded_rank'],
198
- avg_saves_per_game=stat['avg_saves_per_game'],
199
- avg_saves_per_game_rank=stat['avg_saves_per_game_rank'],
200
- avg_max_saves_per_5sec=stat['avg_max_saves_per_5sec'],
201
- avg_max_saves_per_5sec_rank=stat['avg_max_saves_per_5sec_rank'],
202
- avg_max_saves_per_20sec=stat['avg_max_saves_per_20sec'],
203
- avg_max_saves_per_20sec_rank=stat['avg_max_saves_per_20sec_rank'],
204
- peak_max_saves_per_5sec=stat['peak_max_saves_per_5sec'],
205
- peak_max_saves_per_5sec_rank=stat['peak_max_saves_per_5sec_rank'],
206
- peak_max_saves_per_20sec=stat['peak_max_saves_per_20sec'],
207
- peak_max_saves_per_20sec_rank=stat['peak_max_saves_per_20sec_rank'],
208
- quality_score=stat['quality_score'],
209
- quality_score_rank=stat['quality_score_rank'],
210
- games_recorded_rank=stat['games_recorded_rank'],
211
- sog_given_rank=stat['sog_given_rank'], # Legacy field
212
- sog_per_game_rank=stat['sog_per_game_rank'], # Legacy field
262
+ games_recorded=stat[
263
+ "games_recorded"
264
+ ], # DEPRECATED - for backward compatibility
265
+ games_participated=stat[
266
+ "games_participated"
267
+ ], # Total games: FINAL, FINAL_SO, FORFEIT, NOEVENTS
268
+ games_participated_rank=stat["games_participated_rank"],
269
+ games_with_stats=stat[
270
+ "games_with_stats"
271
+ ], # Games with full stats: FINAL, FINAL_SO only
272
+ games_with_stats_rank=stat["games_with_stats_rank"],
273
+ sog_given=stat["sog_given"], # Legacy field mapping
274
+ sog_per_game=stat["sog_per_game"], # Legacy field mapping
275
+ total_saves_recorded=stat["total_saves_recorded"],
276
+ total_saves_recorded_rank=stat["total_saves_recorded_rank"],
277
+ avg_saves_per_game=stat["avg_saves_per_game"],
278
+ avg_saves_per_game_rank=stat["avg_saves_per_game_rank"],
279
+ avg_max_saves_per_5sec=stat["avg_max_saves_per_5sec"],
280
+ avg_max_saves_per_5sec_rank=stat["avg_max_saves_per_5sec_rank"],
281
+ avg_max_saves_per_20sec=stat["avg_max_saves_per_20sec"],
282
+ avg_max_saves_per_20sec_rank=stat["avg_max_saves_per_20sec_rank"],
283
+ peak_max_saves_per_5sec=stat["peak_max_saves_per_5sec"],
284
+ peak_max_saves_per_5sec_rank=stat["peak_max_saves_per_5sec_rank"],
285
+ peak_max_saves_per_20sec=stat["peak_max_saves_per_20sec"],
286
+ peak_max_saves_per_20sec_rank=stat["peak_max_saves_per_20sec_rank"],
287
+ quality_score=stat["quality_score"],
288
+ quality_score_rank=stat["quality_score_rank"],
289
+ games_recorded_rank=stat["games_recorded_rank"],
290
+ sog_given_rank=stat["sog_given_rank"], # Legacy field
291
+ sog_per_game_rank=stat["sog_per_game_rank"], # Legacy field
213
292
  total_in_rank=total_in_rank,
214
- first_game_id=stat['first_game_id'],
215
- last_game_id=stat['last_game_id']
293
+ first_game_id=stat["first_game_id"],
294
+ last_game_id=stat["last_game_id"],
216
295
  )
217
296
  session.add(scorekeeper_stat)
218
297
  # Commit in batches
@@ -220,6 +299,7 @@ def aggregate_scorekeeper_stats(session, aggregation_type, aggregation_id, aggre
220
299
  session.commit()
221
300
  session.commit()
222
301
 
302
+
223
303
  def run_aggregate_scorekeeper_stats():
224
304
  session = create_session("boss")
225
305
  human_id_to_debug = None
@@ -227,6 +307,7 @@ def run_aggregate_scorekeeper_stats():
227
307
  # Get all org_id present in the Organization table (following goalie stats pattern)
228
308
  # Individual org calls will be skipped by early exit, only ALL_ORGS_ID will process
229
309
  from hockey_blast_common_lib.models import Organization
310
+
230
311
  org_ids = session.query(Organization.id).all()
231
312
  org_ids = [org_id[0] for org_id in org_ids]
232
313
 
@@ -235,18 +316,52 @@ def run_aggregate_scorekeeper_stats():
235
316
 
236
317
  for org_id in org_ids:
237
318
  if human_id_to_debug is None:
238
- org_name = "All Organizations" if org_id == ALL_ORGS_ID else session.query(Organization.organization_name).filter(Organization.id == org_id).scalar() or f"org_id {org_id}"
239
- org_progress = create_progress_tracker(3, f"Processing scorekeeper stats for {org_name}")
240
- aggregate_scorekeeper_stats(session, aggregation_type='org', aggregation_id=org_id)
319
+ org_name = (
320
+ "All Organizations"
321
+ if org_id == ALL_ORGS_ID
322
+ else session.query(Organization.organization_name)
323
+ .filter(Organization.id == org_id)
324
+ .scalar()
325
+ or f"org_id {org_id}"
326
+ )
327
+ org_progress = create_progress_tracker(
328
+ 3, f"Processing scorekeeper stats for {org_name}"
329
+ )
330
+ aggregate_scorekeeper_stats(
331
+ session, aggregation_type="org", aggregation_id=org_id
332
+ )
241
333
  org_progress.update(1)
242
- aggregate_scorekeeper_stats(session, aggregation_type='org', aggregation_id=org_id, aggregation_window='Weekly')
334
+ aggregate_scorekeeper_stats(
335
+ session,
336
+ aggregation_type="org",
337
+ aggregation_id=org_id,
338
+ aggregation_window="Weekly",
339
+ )
243
340
  org_progress.update(2)
244
- aggregate_scorekeeper_stats(session, aggregation_type='org', aggregation_id=org_id, aggregation_window='Daily')
341
+ aggregate_scorekeeper_stats(
342
+ session,
343
+ aggregation_type="org",
344
+ aggregation_id=org_id,
345
+ aggregation_window="Daily",
346
+ )
245
347
  org_progress.update(3)
246
348
  else:
247
- aggregate_scorekeeper_stats(session, aggregation_type='org', aggregation_id=org_id)
248
- aggregate_scorekeeper_stats(session, aggregation_type='org', aggregation_id=org_id, aggregation_window='Weekly')
249
- aggregate_scorekeeper_stats(session, aggregation_type='org', aggregation_id=org_id, aggregation_window='Daily')
349
+ aggregate_scorekeeper_stats(
350
+ session, aggregation_type="org", aggregation_id=org_id
351
+ )
352
+ aggregate_scorekeeper_stats(
353
+ session,
354
+ aggregation_type="org",
355
+ aggregation_id=org_id,
356
+ aggregation_window="Weekly",
357
+ )
358
+ aggregate_scorekeeper_stats(
359
+ session,
360
+ aggregation_type="org",
361
+ aggregation_id=org_id,
362
+ aggregation_window="Daily",
363
+ )
364
+
250
365
 
251
366
  if __name__ == "__main__":
252
- run_aggregate_scorekeeper_stats()
367
+ run_aggregate_scorekeeper_stats()