hockey-blast-common-lib 0.1.66__py3-none-any.whl → 0.1.68__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,361 @@
1
+ import os
2
+ import sys
3
+
4
+ # Add the package directory to the Python path
5
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
6
+
7
+ from datetime import datetime, timedelta
8
+
9
+ from sqlalchemy import and_, case, func
10
+ from sqlalchemy.exc import IntegrityError
11
+
12
+ from hockey_blast_common_lib.db_connection import create_session
13
+ from hockey_blast_common_lib.models import Division, Game, GameRoster, Goal, Human, Penalty
14
+ from hockey_blast_common_lib.progress_utils import create_progress_tracker
15
+ from hockey_blast_common_lib.stats_models import GameStatsSkater
16
+ from hockey_blast_common_lib.utils import get_non_human_ids
17
+
18
+ # Import status constants for game filtering
19
+ FINAL_STATUS = "Final"
20
+ FINAL_SO_STATUS = "Final(SO)"
21
+
22
+
23
+ def aggregate_game_stats_skater(session, mode="full", human_id=None):
24
+ """Aggregate per-game skater statistics.
25
+
26
+ Args:
27
+ session: Database session
28
+ mode: "full" to regenerate all records, "append" to process new games only
29
+ human_id: Optional human_id to process only one player (for testing/debugging)
30
+
31
+ The function stores individual game performance for each skater with non-zero stats.
32
+ Only games where the player recorded at least one goal, assist, or penalty minute are saved.
33
+ This sparse storage is optimized for RAG system queries.
34
+
35
+ Uses Incognito Human sentinel record (game_id=-1) to track last processed timestamp
36
+ for append mode with 1-day overlap to catch data corrections.
37
+ """
38
+
39
+ # Get Incognito Human for sentinel tracking (first_name="Incognito", middle_name="", last_name="Human")
40
+ incognito_human = session.query(Human).filter_by(
41
+ first_name="Incognito", middle_name="", last_name="Human"
42
+ ).first()
43
+ if not incognito_human:
44
+ raise RuntimeError("Incognito Human not found in database - required for sentinel tracking")
45
+ incognito_human_id = incognito_human.id
46
+
47
+ non_human_ids = get_non_human_ids(session)
48
+
49
+ # Add human_id to filter if specified
50
+ if human_id:
51
+ human = session.query(Human).filter_by(id=human_id).first()
52
+ if not human:
53
+ print(f"ERROR: Human ID {human_id} not found in database")
54
+ return
55
+ print(f"Limiting to human_id={human_id}: {human.first_name} {human.last_name}\n")
56
+
57
+ print(f"\n{'='*80}")
58
+ print(f"Aggregating per-game skater statistics (mode: {mode})")
59
+ print(f"{'='*80}\n")
60
+
61
+ # Determine game filtering based on mode
62
+ if mode == "append":
63
+ # Query sentinel record for last processed timestamp
64
+ sentinel = (
65
+ session.query(GameStatsSkater)
66
+ .filter(
67
+ GameStatsSkater.human_id == incognito_human_id,
68
+ GameStatsSkater.game_id == -1,
69
+ )
70
+ .first()
71
+ )
72
+
73
+ if sentinel:
74
+ last_processed = datetime.combine(sentinel.game_date, sentinel.game_time)
75
+ # Subtract 1 day for overlap to catch data corrections
76
+ start_datetime = last_processed - timedelta(days=1)
77
+ print(f"Append mode: Processing games after {start_datetime}")
78
+ print(f"(1-day overlap from last processed: {last_processed})\n")
79
+
80
+ # Delete records for games in the overlap window
81
+ delete_count = (
82
+ session.query(GameStatsSkater)
83
+ .filter(
84
+ GameStatsSkater.human_id != incognito_human_id,
85
+ func.cast(
86
+ func.concat(GameStatsSkater.game_date, " ", GameStatsSkater.game_time),
87
+ func.TIMESTAMP,
88
+ ) >= start_datetime,
89
+ )
90
+ .delete(synchronize_session=False)
91
+ )
92
+ session.commit()
93
+ print(f"Deleted {delete_count} existing records in overlap window\n")
94
+ else:
95
+ # No sentinel found, treat as full mode
96
+ print("No sentinel record found - treating as full mode\n")
97
+ mode = "full"
98
+ start_datetime = None
99
+ else:
100
+ start_datetime = None
101
+
102
+ if mode == "full":
103
+ # Delete all existing records except sentinel
104
+ delete_count = (
105
+ session.query(GameStatsSkater)
106
+ .filter(GameStatsSkater.human_id != incognito_human_id)
107
+ .delete(synchronize_session=False)
108
+ )
109
+ session.commit()
110
+ print(f"Full mode: Deleted {delete_count} existing records\n")
111
+
112
+ # Build game filter for eligible games
113
+ game_filter = Game.status.in_([FINAL_STATUS, FINAL_SO_STATUS])
114
+ if mode == "append" and start_datetime:
115
+ game_filter = and_(
116
+ game_filter,
117
+ func.cast(
118
+ func.concat(Game.date, " ", Game.time),
119
+ func.TIMESTAMP,
120
+ ) >= start_datetime,
121
+ )
122
+
123
+ # Count total games to process for progress tracking
124
+ total_games = session.query(Game).filter(game_filter).count()
125
+ print(f"Processing {total_games} games...\n")
126
+
127
+ if total_games == 0:
128
+ print("No games to process.\n")
129
+ return
130
+
131
+ # Query game roster entries for skaters (exclude goalies)
132
+ # Join with games to get metadata, filter by game status and date window
133
+ roster_query = (
134
+ session.query(
135
+ GameRoster.game_id,
136
+ GameRoster.human_id,
137
+ GameRoster.team_id,
138
+ Game.org_id,
139
+ Division.level_id,
140
+ Game.date.label("game_date"),
141
+ Game.time.label("game_time"),
142
+ )
143
+ .join(Game, GameRoster.game_id == Game.id)
144
+ .join(Division, Game.division_id == Division.id)
145
+ .filter(
146
+ ~GameRoster.role.ilike("g"), # Exclude goalies
147
+ GameRoster.human_id.notin_(non_human_ids), # Filter placeholder humans
148
+ game_filter,
149
+ )
150
+ )
151
+
152
+ # Add human_id filter if specified
153
+ if human_id:
154
+ roster_query = roster_query.filter(GameRoster.human_id == human_id)
155
+
156
+ roster_entries = roster_query.all()
157
+
158
+ # Build dict of roster entries by (game_id, human_id) for fast lookup
159
+ roster_dict = {}
160
+ for entry in roster_entries:
161
+ key = (entry.game_id, entry.human_id)
162
+ roster_dict[key] = {
163
+ "team_id": entry.team_id,
164
+ "org_id": entry.org_id,
165
+ "level_id": entry.level_id,
166
+ "game_date": entry.game_date,
167
+ "game_time": entry.game_time,
168
+ "goals": 0,
169
+ "assists": 0,
170
+ "points": 0,
171
+ "penalty_minutes": 0,
172
+ }
173
+
174
+ print(f"Found {len(roster_dict)} skater roster entries\n")
175
+
176
+ # Query goals and count by scorer and assisters
177
+ print("Aggregating goals and assists...")
178
+ goals = (
179
+ session.query(Goal)
180
+ .join(Game, Goal.game_id == Game.id)
181
+ .filter(game_filter)
182
+ .all()
183
+ )
184
+
185
+ for goal in goals:
186
+ # Count goal for scorer
187
+ key = (goal.game_id, goal.goal_scorer_id)
188
+ if key in roster_dict:
189
+ roster_dict[key]["goals"] += 1
190
+ roster_dict[key]["points"] += 1
191
+
192
+ # Count assists
193
+ if goal.assist_1_id:
194
+ key = (goal.game_id, goal.assist_1_id)
195
+ if key in roster_dict:
196
+ roster_dict[key]["assists"] += 1
197
+ roster_dict[key]["points"] += 1
198
+
199
+ if goal.assist_2_id:
200
+ key = (goal.game_id, goal.assist_2_id)
201
+ if key in roster_dict:
202
+ roster_dict[key]["assists"] += 1
203
+ roster_dict[key]["points"] += 1
204
+
205
+ print(f"Processed {len(goals)} goals\n")
206
+
207
+ # Query penalties and aggregate by penalized player
208
+ print("Aggregating penalties...")
209
+ penalties = (
210
+ session.query(Penalty)
211
+ .join(Game, Penalty.game_id == Game.id)
212
+ .filter(game_filter)
213
+ .all()
214
+ )
215
+
216
+ for penalty in penalties:
217
+ key = (penalty.game_id, penalty.penalized_player_id)
218
+ if key in roster_dict:
219
+ # Convert penalty minutes: "GM" (game misconduct) = 10, else parse integer
220
+ if penalty.penalty_minutes and penalty.penalty_minutes.upper() == "GM":
221
+ roster_dict[key]["penalty_minutes"] += 10
222
+ else:
223
+ try:
224
+ minutes = int(penalty.penalty_minutes) if penalty.penalty_minutes else 0
225
+ roster_dict[key]["penalty_minutes"] += minutes
226
+ except (ValueError, TypeError):
227
+ # Log unconvertible values but don't crash
228
+ print(f"Warning: Could not convert penalty_minutes '{penalty.penalty_minutes}' for penalty {penalty.id}")
229
+
230
+ print(f"Processed {len(penalties)} penalties\n")
231
+
232
+ # Filter to only non-zero stats (CRITICAL for RAG efficiency)
233
+ print("Filtering to non-zero records...")
234
+ nonzero_dict = {
235
+ key: stats
236
+ for key, stats in roster_dict.items()
237
+ if stats["goals"] > 0 or stats["assists"] > 0 or stats["penalty_minutes"] > 0
238
+ }
239
+
240
+ print(f"Filtered: {len(nonzero_dict)} non-zero records (from {len(roster_dict)} total)\n")
241
+
242
+ # Insert records in batches with progress tracking
243
+ batch_size = 1000
244
+ total_records = len(nonzero_dict)
245
+
246
+ if total_records == 0:
247
+ print("No non-zero records to insert.\n")
248
+ else:
249
+ progress = create_progress_tracker(total_records, "Inserting per-game skater stats")
250
+
251
+ records_to_insert = []
252
+ for i, (key, stats) in enumerate(nonzero_dict.items(), 1):
253
+ game_id, human_id = key
254
+
255
+ record = GameStatsSkater(
256
+ game_id=game_id,
257
+ human_id=human_id,
258
+ team_id=stats["team_id"],
259
+ org_id=stats["org_id"],
260
+ level_id=stats["level_id"],
261
+ game_date=stats["game_date"],
262
+ game_time=stats["game_time"],
263
+ goals=stats["goals"],
264
+ assists=stats["assists"],
265
+ points=stats["points"],
266
+ penalty_minutes=stats["penalty_minutes"],
267
+ created_at=datetime.utcnow(),
268
+ )
269
+
270
+ records_to_insert.append(record)
271
+
272
+ # Commit in batches
273
+ if i % batch_size == 0 or i == total_records:
274
+ session.bulk_save_objects(records_to_insert)
275
+ session.commit()
276
+ records_to_insert = []
277
+ progress.update(i)
278
+
279
+ print("\nInsert complete.\n")
280
+
281
+ # Update or create sentinel record with max game timestamp (skip if filtering by human_id)
282
+ if not human_id:
283
+ max_game = (
284
+ session.query(
285
+ Game.date.label("game_date"),
286
+ Game.time.label("game_time"),
287
+ )
288
+ .filter(game_filter)
289
+ .order_by(Game.date.desc(), Game.time.desc())
290
+ .first()
291
+ )
292
+
293
+ if max_game:
294
+ # Try to update existing sentinel
295
+ sentinel = (
296
+ session.query(GameStatsSkater)
297
+ .filter(
298
+ GameStatsSkater.human_id == incognito_human_id,
299
+ GameStatsSkater.game_id == -1,
300
+ )
301
+ .first()
302
+ )
303
+
304
+ if sentinel:
305
+ sentinel.game_date = max_game.game_date
306
+ sentinel.game_time = max_game.game_time
307
+ print(f"Updated sentinel record: {max_game.game_date} {max_game.game_time}")
308
+ else:
309
+ # Create new sentinel
310
+ sentinel = GameStatsSkater(
311
+ game_id=-1,
312
+ human_id=incognito_human_id,
313
+ team_id=-1, # Dummy value
314
+ org_id=-1, # Dummy value
315
+ level_id=-1, # Dummy value
316
+ game_date=max_game.game_date,
317
+ game_time=max_game.game_time,
318
+ goals=0,
319
+ assists=0,
320
+ points=0,
321
+ penalty_minutes=0,
322
+ created_at=datetime.utcnow(),
323
+ )
324
+ session.add(sentinel)
325
+ print(f"Created sentinel record: {max_game.game_date} {max_game.game_time}")
326
+
327
+ session.commit()
328
+ else:
329
+ print("Skipping sentinel record creation (human_id filter active)")
330
+
331
+ print(f"\n{'='*80}")
332
+ print("Per-game skater statistics aggregation complete")
333
+ print(f"{'='*80}\n")
334
+
335
+
336
+ def run_aggregate_game_stats_skater():
337
+ """Main entry point for skater per-game aggregation."""
338
+ import argparse
339
+
340
+ parser = argparse.ArgumentParser(description="Aggregate per-game skater statistics")
341
+ parser.add_argument(
342
+ "--mode",
343
+ choices=["full", "append"],
344
+ default="full",
345
+ help="Aggregation mode: 'full' to regenerate all, 'append' to add new games only",
346
+ )
347
+ parser.add_argument(
348
+ "--human-id",
349
+ type=int,
350
+ default=None,
351
+ help="Optional: Limit processing to specific human_id (for testing)",
352
+ )
353
+
354
+ args = parser.parse_args()
355
+
356
+ session = create_session("boss")
357
+ aggregate_game_stats_skater(session, mode=args.mode, human_id=args.human_id)
358
+
359
+
360
+ if __name__ == "__main__":
361
+ run_aggregate_game_stats_skater()
@@ -117,6 +117,9 @@ def aggregate_goalie_stats(
117
117
  debug_human_id=None,
118
118
  aggregation_window=None,
119
119
  ):
120
+ # Capture start time for aggregation tracking
121
+ aggregation_start_time = datetime.utcnow()
122
+
120
123
  human_ids_to_filter = get_non_human_ids(session)
121
124
 
122
125
  # Get the name of the aggregation, for debug purposes
@@ -364,6 +367,7 @@ def aggregate_goalie_stats(
364
367
  total_in_rank=total_in_rank,
365
368
  first_game_id=stat["first_game_id"],
366
369
  last_game_id=stat["last_game_id"],
370
+ aggregation_started_at=aggregation_start_time,
367
371
  )
368
372
  session.add(goalie_stat)
369
373
  # Commit in batches
@@ -371,6 +375,13 @@ def aggregate_goalie_stats(
371
375
  session.commit()
372
376
  session.commit()
373
377
 
378
+ # Update all records with completion timestamp
379
+ aggregation_end_time = datetime.utcnow()
380
+ session.query(StatsModel).filter(
381
+ StatsModel.aggregation_id == aggregation_id
382
+ ).update({StatsModel.aggregation_completed_at: aggregation_end_time})
383
+ session.commit()
384
+
374
385
 
375
386
  def run_aggregate_goalie_stats():
376
387
  session = create_session("boss")
@@ -44,6 +44,9 @@ def aggregate_human_stats(
44
44
  human_id_filter=None,
45
45
  aggregation_window=None,
46
46
  ):
47
+ # Capture start time for aggregation tracking
48
+ aggregation_start_time = datetime.utcnow()
49
+
47
50
  human_ids_to_filter = get_non_human_ids(session)
48
51
 
49
52
  if aggregation_type == "org":
@@ -517,6 +520,7 @@ def aggregate_human_stats(
517
520
  last_game_id_referee=stat["last_game_id_referee"],
518
521
  first_game_id_scorekeeper=stat["first_game_id_scorekeeper"],
519
522
  last_game_id_scorekeeper=stat["last_game_id_scorekeeper"],
523
+ aggregation_started_at=aggregation_start_time,
520
524
  )
521
525
  session.add(human_stat)
522
526
  # Commit in batches
@@ -607,10 +611,18 @@ def aggregate_human_stats(
607
611
  last_game_id_referee=overall_stats["last_game_id_referee"],
608
612
  first_game_id_scorekeeper=overall_stats["first_game_id_scorekeeper"],
609
613
  last_game_id_scorekeeper=overall_stats["last_game_id_scorekeeper"],
614
+ aggregation_started_at=aggregation_start_time,
610
615
  )
611
616
  session.add(overall_human_stat)
612
617
  session.commit()
613
618
 
619
+ # Update all records with completion timestamp
620
+ aggregation_end_time = datetime.utcnow()
621
+ session.query(StatsModel).filter(
622
+ StatsModel.aggregation_id == aggregation_id
623
+ ).update({StatsModel.aggregation_completed_at: aggregation_end_time})
624
+ session.commit()
625
+
614
626
 
615
627
  def run_aggregate_human_stats():
616
628
  session = create_session("boss")
@@ -5,6 +5,7 @@ import sys
5
5
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
6
6
 
7
7
 
8
+ from datetime import datetime
8
9
 
9
10
  import sqlalchemy
10
11
  from sqlalchemy.sql import case, func
@@ -102,6 +103,9 @@ def insert_percentile_markers_referee(
102
103
  def aggregate_referee_stats(
103
104
  session, aggregation_type, aggregation_id, aggregation_window=None
104
105
  ):
106
+ # Capture start time for aggregation tracking
107
+ aggregation_start_time = datetime.utcnow()
108
+
105
109
  human_ids_to_filter = get_non_human_ids(session)
106
110
 
107
111
  if aggregation_type == "org":
@@ -375,6 +379,7 @@ def aggregate_referee_stats(
375
379
  total_in_rank=total_in_rank,
376
380
  first_game_id=stat["first_game_id"],
377
381
  last_game_id=stat["last_game_id"],
382
+ aggregation_started_at=aggregation_start_time,
378
383
  )
379
384
  session.add(referee_stat)
380
385
  # Commit in batches
@@ -382,6 +387,13 @@ def aggregate_referee_stats(
382
387
  session.commit()
383
388
  session.commit()
384
389
 
390
+ # Update all records with completion timestamp
391
+ aggregation_end_time = datetime.utcnow()
392
+ session.query(StatsModel).filter(
393
+ StatsModel.aggregation_id == aggregation_id
394
+ ).update({StatsModel.aggregation_completed_at: aggregation_end_time})
395
+ session.commit()
396
+
385
397
 
386
398
  def run_aggregate_referee_stats():
387
399
  session = create_session("boss")
@@ -5,6 +5,8 @@ import sys
5
5
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
6
6
 
7
7
 
8
+ from datetime import datetime
9
+
8
10
  import sqlalchemy
9
11
  from sqlalchemy.sql import func
10
12
 
@@ -151,6 +153,9 @@ def aggregate_scorekeeper_stats(
151
153
  if aggregation_type == "org" and aggregation_id != ALL_ORGS_ID:
152
154
  return # Do nothing for individual organization IDs
153
155
 
156
+ # Capture start time for aggregation tracking
157
+ aggregation_start_time = datetime.utcnow()
158
+
154
159
  human_ids_to_filter = get_non_human_ids(session)
155
160
 
156
161
  if aggregation_type == "org":
@@ -370,6 +375,7 @@ def aggregate_scorekeeper_stats(
370
375
  total_in_rank=total_in_rank,
371
376
  first_game_id=stat["first_game_id"],
372
377
  last_game_id=stat["last_game_id"],
378
+ aggregation_started_at=aggregation_start_time,
373
379
  )
374
380
  session.add(scorekeeper_stat)
375
381
  # Commit in batches
@@ -377,6 +383,13 @@ def aggregate_scorekeeper_stats(
377
383
  session.commit()
378
384
  session.commit()
379
385
 
386
+ # Update all records with completion timestamp
387
+ aggregation_end_time = datetime.utcnow()
388
+ session.query(StatsModel).filter(
389
+ StatsModel.aggregation_id == aggregation_id
390
+ ).update({StatsModel.aggregation_completed_at: aggregation_end_time})
391
+ session.commit()
392
+
380
393
 
381
394
  def run_aggregate_scorekeeper_stats():
382
395
  session = create_session("boss")