hockey-blast-common-lib 0.1.62__py3-none-any.whl → 0.1.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. hockey_blast_common_lib/aggregate_all_stats.py +7 -4
  2. hockey_blast_common_lib/aggregate_goalie_stats.py +303 -113
  3. hockey_blast_common_lib/aggregate_h2h_stats.py +64 -33
  4. hockey_blast_common_lib/aggregate_human_stats.py +566 -281
  5. hockey_blast_common_lib/aggregate_referee_stats.py +287 -145
  6. hockey_blast_common_lib/aggregate_s2s_stats.py +85 -25
  7. hockey_blast_common_lib/aggregate_scorekeeper_stats.py +231 -119
  8. hockey_blast_common_lib/aggregate_skater_stats.py +595 -240
  9. hockey_blast_common_lib/assign_skater_skill.py +21 -11
  10. hockey_blast_common_lib/db_connection.py +59 -8
  11. hockey_blast_common_lib/embedding_utils.py +309 -0
  12. hockey_blast_common_lib/h2h_models.py +150 -56
  13. hockey_blast_common_lib/hockey_blast_sample_backup.sql.gz +0 -0
  14. hockey_blast_common_lib/models.py +305 -149
  15. hockey_blast_common_lib/options.py +30 -15
  16. hockey_blast_common_lib/progress_utils.py +21 -13
  17. hockey_blast_common_lib/skills_in_divisions.py +170 -33
  18. hockey_blast_common_lib/skills_propagation.py +164 -70
  19. hockey_blast_common_lib/stats_models.py +489 -245
  20. hockey_blast_common_lib/stats_utils.py +6 -3
  21. hockey_blast_common_lib/utils.py +89 -25
  22. hockey_blast_common_lib/wsgi.py +7 -5
  23. {hockey_blast_common_lib-0.1.62.dist-info → hockey_blast_common_lib-0.1.64.dist-info}/METADATA +1 -1
  24. hockey_blast_common_lib-0.1.64.dist-info/RECORD +29 -0
  25. hockey_blast_common_lib-0.1.62.dist-info/RECORD +0 -28
  26. {hockey_blast_common_lib-0.1.62.dist-info → hockey_blast_common_lib-0.1.64.dist-info}/WHEEL +0 -0
  27. {hockey_blast_common_lib-0.1.62.dist-info → hockey_blast_common_lib-0.1.64.dist-info}/top_level.txt +0 -0
@@ -1,45 +1,50 @@
1
- import sys, os
1
+ import os
2
+ import sys
2
3
 
3
4
  # Add the package directory to the Python path
4
5
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
5
6
 
6
- from hockey_blast_common_lib.models import Human, Level
7
- from hockey_blast_common_lib.stats_models import LevelStatsSkater
8
7
  from hockey_blast_common_lib.db_connection import create_session
8
+ from hockey_blast_common_lib.models import Human, Level
9
9
  from hockey_blast_common_lib.progress_utils import create_progress_tracker
10
+ from hockey_blast_common_lib.stats_models import LevelStatsSkater
11
+
10
12
 
11
13
  def calculate_skater_skill_value(session, level_stats):
12
- min_skill_value = float('inf') # Start with infinity since we want the minimum
14
+ min_skill_value = float("inf") # Start with infinity since we want the minimum
13
15
 
14
16
  for stat in level_stats:
15
17
  level = session.query(Level).filter(Level.id == stat.level_id).first()
16
18
  if not level or level.skill_value < 0:
17
19
  continue
18
20
  level_skill_value = level.skill_value
19
-
21
+
20
22
  # Fix critical bug: Invert rank ratios so better players (lower ranks) get higher skill values
21
23
  # Rank 1 (best) should get factor close to 1.0, worst rank should get factor close to 0.0
22
24
  if stat.total_in_rank > 1:
23
- ppg_skill_factor = 1 - (stat.points_per_game_rank - 1) / (stat.total_in_rank - 1)
25
+ ppg_skill_factor = 1 - (stat.points_per_game_rank - 1) / (
26
+ stat.total_in_rank - 1
27
+ )
24
28
  else:
25
29
  ppg_skill_factor = 1.0 # Only one player in level
26
-
30
+
27
31
  # Apply skill adjustment: range from 0.8 to 1.2 of level base skill
28
32
  # Since lower skill_value is better: Best player gets 0.8x (closer to better levels), worst gets 1.2x
29
33
  skill_adjustment = 1.3 - 0.2 * ppg_skill_factor
30
34
  skill_value = level_skill_value * skill_adjustment
31
-
35
+
32
36
  # Take the minimum skill value across all levels the player has played in (lower is better)
33
37
  min_skill_value = min(min_skill_value, skill_value)
34
38
 
35
- return min_skill_value if min_skill_value != float('inf') else 0
39
+ return min_skill_value if min_skill_value != float("inf") else 0
40
+
36
41
 
37
42
  def assign_skater_skill_values():
38
43
  session = create_session("boss")
39
44
 
40
45
  humans = session.query(Human).all()
41
46
  total_humans = len(humans)
42
-
47
+
43
48
  # Create progress tracker
44
49
  progress = create_progress_tracker(total_humans, "Assigning skater skill values")
45
50
 
@@ -47,7 +52,11 @@ def assign_skater_skill_values():
47
52
  updates_count = 0
48
53
 
49
54
  for i, human in enumerate(humans):
50
- level_stats = session.query(LevelStatsSkater).filter(LevelStatsSkater.human_id == human.id).all()
55
+ level_stats = (
56
+ session.query(LevelStatsSkater)
57
+ .filter(LevelStatsSkater.human_id == human.id)
58
+ .all()
59
+ )
51
60
  if level_stats:
52
61
  skater_skill_value = calculate_skater_skill_value(session, level_stats)
53
62
  human.skater_skill_value = skater_skill_value
@@ -65,5 +74,6 @@ def assign_skater_skill_values():
65
74
 
66
75
  print("Skater skill values have been assigned to all humans.")
67
76
 
77
+
68
78
  if __name__ == "__main__":
69
79
  assign_skater_skill_values()
@@ -1,7 +1,8 @@
1
1
  import os
2
+
3
+ from dotenv import load_dotenv
2
4
  from sqlalchemy import create_engine
3
5
  from sqlalchemy.orm import sessionmaker
4
- from dotenv import load_dotenv
5
6
 
6
7
  # Load environment variables from .env file in the root directory of THE PROJECT (not this library)
7
8
  load_dotenv()
@@ -13,36 +14,86 @@ DB_PARAMS = {
13
14
  "user": os.getenv("DB_USER", "frontend_user"),
14
15
  "password": os.getenv("DB_PASSWORD", "hockey-blast"),
15
16
  "host": os.getenv("DB_HOST", "localhost"),
16
- "port": int(os.getenv("DB_PORT", 5432))
17
+ "port": int(os.getenv("DB_PORT", 5432)),
17
18
  },
18
-
19
19
  "frontend-sample-db": {
20
20
  "dbname": os.getenv("DB_NAME_SAMPLE", "hockey_blast_sample"),
21
21
  "user": os.getenv("DB_USER", "frontend_user"),
22
22
  "password": os.getenv("DB_PASSWORD", "hockey-blast"),
23
23
  "host": os.getenv("DB_HOST", "localhost"),
24
- "port": int(os.getenv("DB_PORT", 5432))
24
+ "port": int(os.getenv("DB_PORT", 5432)),
25
25
  },
26
-
27
- # The section below is to handle recovery of sample DB where boss user is present, to avoid warnings and errors
28
- # TODO: Maybe figure out a way to do backup without it and make frontend_user own the sample?
26
+ # MCP server uses read-only frontend_user (same as frontend)
27
+ "mcp": {
28
+ "dbname": os.getenv("DB_NAME", "hockey_blast"),
29
+ "user": os.getenv("DB_USER", "frontend_user"),
30
+ "password": os.getenv("DB_PASSWORD", "hockey-blast"),
31
+ "host": os.getenv("DB_HOST", "localhost"),
32
+ "port": int(os.getenv("DB_PORT", 5432)),
33
+ },
34
+ # The section below is to handle recovery of sample DB where boss user is present, to avoid warnings and errors
35
+ # TODO: Maybe figure out a way to do backup without it and make frontend_user own the sample?
29
36
  "boss": {
30
37
  "dbname": os.getenv("DB_NAME", "hockey_blast"),
31
38
  "user": os.getenv("DB_USER_BOSS", "boss"),
32
39
  "password": os.getenv("DB_PASSWORD_BOSS", "boss"),
33
40
  "host": os.getenv("DB_HOST", "localhost"),
34
- "port": int(os.getenv("DB_PORT", 5432))
41
+ "port": int(os.getenv("DB_PORT", 5432)),
35
42
  },
36
43
  }
37
44
 
45
+
38
46
  def get_db_params(config_name):
39
47
  if config_name not in DB_PARAMS:
40
48
  raise ValueError(f"Invalid organization: {config_name}")
41
49
  return DB_PARAMS[config_name]
42
50
 
51
+
43
52
  def create_session(config_name):
53
+ """
54
+ Create a database session using the specified configuration.
55
+
56
+ Args:
57
+ config_name: One of "frontend", "frontend-sample-db", "mcp", "boss"
58
+
59
+ Returns:
60
+ SQLAlchemy session object
61
+ """
44
62
  db_params = get_db_params(config_name)
45
63
  db_url = f"postgresql://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['dbname']}"
46
64
  engine = create_engine(db_url)
47
65
  Session = sessionmaker(bind=engine)
48
66
  return Session()
67
+
68
+
69
+ # Convenience functions for standardized session creation
70
+ def create_session_frontend():
71
+ """
72
+ Create read-only session for frontend web application.
73
+ Uses frontend_user with limited permissions.
74
+ """
75
+ return create_session("frontend")
76
+
77
+
78
+ def create_session_mcp():
79
+ """
80
+ Create read-only session for MCP server.
81
+ Uses frontend_user with limited permissions (same as frontend).
82
+ """
83
+ return create_session("mcp")
84
+
85
+
86
+ def create_session_frontend_sampledb():
87
+ """
88
+ Create read-only session for frontend sample database.
89
+ Uses frontend_user with limited permissions.
90
+ """
91
+ return create_session("frontend-sample-db")
92
+
93
+
94
+ def create_session_boss():
95
+ """
96
+ Create full-access session for pipeline operations.
97
+ WARNING: Has write permissions. Use only in pipeline scripts.
98
+ """
99
+ return create_session("boss")
@@ -0,0 +1,309 @@
1
+ """
2
+ Embedding utilities for vector search using AWS Bedrock Titan.
3
+
4
+ This module provides functions to generate embeddings for text using AWS Bedrock's
5
+ Titan embedding model and store them in the database for semantic search.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from typing import List, Optional
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # AWS Bedrock configuration
15
+ BEDROCK_REGION = "us-west-2"
16
+ BEDROCK_EMBEDDING_MODEL = "amazon.titan-embed-text-v2:0"
17
+ EMBEDDING_DIMENSION = 1024 # Titan v2 produces 1024-dimensional embeddings
18
+
19
+
20
+ def generate_embedding(text: str) -> Optional[List[float]]:
21
+ """
22
+ Generate embedding vector for text using AWS Bedrock Titan.
23
+
24
+ Args:
25
+ text: Text to embed (e.g., "Pavel Kletskov" or "Good Guys")
26
+
27
+ Returns:
28
+ List of floats representing the embedding vector (1024 dimensions)
29
+ None if embedding generation fails
30
+ """
31
+ if not text or not text.strip():
32
+ logger.warning("Empty text provided to generate_embedding")
33
+ return None
34
+
35
+ try:
36
+ import boto3
37
+
38
+ bedrock = boto3.client(
39
+ service_name="bedrock-runtime",
40
+ region_name=BEDROCK_REGION
41
+ )
42
+
43
+ # Titan embedding API request format
44
+ request_body = {
45
+ "inputText": text.strip()
46
+ }
47
+
48
+ response = bedrock.invoke_model(
49
+ modelId=BEDROCK_EMBEDDING_MODEL,
50
+ body=json.dumps(request_body)
51
+ )
52
+
53
+ response_body = json.loads(response["body"].read())
54
+ embedding = response_body.get("embedding")
55
+
56
+ if not embedding:
57
+ logger.error(f"No embedding returned for text: {text[:50]}")
58
+ return None
59
+
60
+ if len(embedding) != EMBEDDING_DIMENSION:
61
+ logger.error(
62
+ f"Unexpected embedding dimension: {len(embedding)} (expected {EMBEDDING_DIMENSION})"
63
+ )
64
+ return None
65
+
66
+ return embedding
67
+
68
+ except Exception as e:
69
+ logger.error(f"Failed to generate embedding for '{text[:50]}': {e}")
70
+ return None
71
+
72
+
73
+ def update_human_embedding(session, human_id: int, full_name: str) -> bool:
74
+ """
75
+ Generate and store embedding for a human (player/referee/scorekeeper).
76
+
77
+ Args:
78
+ session: SQLAlchemy session
79
+ human_id: Human ID in database
80
+ full_name: Full name like "Pavel Kletskov"
81
+
82
+ Returns:
83
+ True if embedding was successfully stored, False otherwise
84
+ """
85
+ from datetime import datetime
86
+ from hockey_blast_common_lib.models import HumanEmbedding
87
+
88
+ if not full_name or not full_name.strip():
89
+ logger.warning(f"Empty full_name for human_id {human_id}")
90
+ return False
91
+
92
+ # Generate embedding
93
+ embedding = generate_embedding(full_name.strip())
94
+ if not embedding:
95
+ return False
96
+
97
+ try:
98
+ # Convert Python list to PostgreSQL array format for vector type
99
+ embedding_str = "[" + ",".join(str(x) for x in embedding) + "]"
100
+
101
+ # Check if embedding already exists using ORM
102
+ existing = session.query(HumanEmbedding).filter_by(human_id=human_id).first()
103
+
104
+ if existing:
105
+ # Update existing record
106
+ # Note: Need raw SQL for vector type casting with ::vector
107
+ # Using format() for embedding since it needs ::vector cast
108
+ from sqlalchemy import text
109
+ sql = text(f"""
110
+ UPDATE human_embeddings
111
+ SET full_name = :full_name,
112
+ embedding = '{embedding_str}'::vector,
113
+ updated_at = :updated_at
114
+ WHERE human_id = :human_id
115
+ """)
116
+ session.execute(
117
+ sql,
118
+ {
119
+ "human_id": human_id,
120
+ "full_name": full_name.strip(),
121
+ "updated_at": datetime.utcnow()
122
+ }
123
+ )
124
+ else:
125
+ # Insert new record
126
+ # Note: Need raw SQL for vector type casting with ::vector
127
+ # Using format() for embedding since it needs ::vector cast
128
+ from sqlalchemy import text
129
+ sql = text(f"""
130
+ INSERT INTO human_embeddings (human_id, full_name, embedding, updated_at)
131
+ VALUES (:human_id, :full_name, '{embedding_str}'::vector, :updated_at)
132
+ """)
133
+ session.execute(
134
+ sql,
135
+ {
136
+ "human_id": human_id,
137
+ "full_name": full_name.strip(),
138
+ "updated_at": datetime.utcnow()
139
+ }
140
+ )
141
+
142
+ logger.info(f"Updated embedding for human_id={human_id}, name='{full_name}'")
143
+ return True
144
+
145
+ except Exception as e:
146
+ logger.error(f"Failed to store embedding for human_id={human_id}: {e}")
147
+ return False
148
+
149
+
150
+ def update_team_embedding(session, team_id: int, team_name: str) -> bool:
151
+ """
152
+ Generate and store embedding for a team.
153
+
154
+ Args:
155
+ session: SQLAlchemy session
156
+ team_id: Team ID in database
157
+ team_name: Team name like "Good Guys"
158
+
159
+ Returns:
160
+ True if embedding was successfully stored, False otherwise
161
+ """
162
+ from datetime import datetime
163
+ from hockey_blast_common_lib.models import TeamEmbedding
164
+
165
+ if not team_name or not team_name.strip():
166
+ logger.warning(f"Empty team_name for team_id {team_id}")
167
+ return False
168
+
169
+ # Generate embedding
170
+ embedding = generate_embedding(team_name.strip())
171
+ if not embedding:
172
+ return False
173
+
174
+ try:
175
+ # Convert Python list to PostgreSQL array format for vector type
176
+ embedding_str = "[" + ",".join(str(x) for x in embedding) + "]"
177
+
178
+ # Check if embedding already exists using ORM
179
+ existing = session.query(TeamEmbedding).filter_by(team_id=team_id).first()
180
+
181
+ if existing:
182
+ # Update existing record
183
+ # Note: Need raw SQL for vector type casting with ::vector
184
+ # Using format() for embedding since it needs ::vector cast
185
+ from sqlalchemy import text
186
+ sql = text(f"""
187
+ UPDATE team_embeddings
188
+ SET team_name = :team_name,
189
+ embedding = '{embedding_str}'::vector,
190
+ updated_at = :updated_at
191
+ WHERE team_id = :team_id
192
+ """)
193
+ session.execute(
194
+ sql,
195
+ {
196
+ "team_id": team_id,
197
+ "team_name": team_name.strip(),
198
+ "updated_at": datetime.utcnow()
199
+ }
200
+ )
201
+ else:
202
+ # Insert new record
203
+ # Note: Need raw SQL for vector type casting with ::vector
204
+ # Using format() for embedding since it needs ::vector cast
205
+ from sqlalchemy import text
206
+ sql = text(f"""
207
+ INSERT INTO team_embeddings (team_id, team_name, embedding, updated_at)
208
+ VALUES (:team_id, :team_name, '{embedding_str}'::vector, :updated_at)
209
+ """)
210
+ session.execute(
211
+ sql,
212
+ {
213
+ "team_id": team_id,
214
+ "team_name": team_name.strip(),
215
+ "updated_at": datetime.utcnow()
216
+ }
217
+ )
218
+
219
+ logger.info(f"Updated embedding for team_id={team_id}, name='{team_name}'")
220
+ return True
221
+
222
+ except Exception as e:
223
+ logger.error(f"Failed to store embedding for team_id={team_id}: {e}")
224
+ return False
225
+
226
+
227
+ def search_embeddings_semantic(
228
+ session,
229
+ query: str,
230
+ entity_type: str = "all",
231
+ limit: int = 10
232
+ ) -> List[dict]:
233
+ """
234
+ Semantic search across human and team embeddings.
235
+
236
+ Args:
237
+ session: SQLAlchemy session
238
+ query: Search query (e.g., "good guy", "pavel")
239
+ entity_type: "human", "team", or "all"
240
+ limit: Maximum number of results
241
+
242
+ Returns:
243
+ List of dicts with keys: type, id, name, similarity
244
+ """
245
+ from sqlalchemy import text
246
+
247
+ # Generate query embedding
248
+ query_embedding = generate_embedding(query)
249
+ if not query_embedding:
250
+ logger.error(f"Failed to generate embedding for query: {query}")
251
+ return []
252
+
253
+ embedding_str = "[" + ",".join(str(x) for x in query_embedding) + "]"
254
+ results = []
255
+
256
+ try:
257
+ # Search humans
258
+ # Note: Using raw SQL for pgvector distance operator (<=>)
259
+ # Using format() for embedding since it needs ::vector cast
260
+ if entity_type in ("human", "all"):
261
+ sql = text(f"""
262
+ SELECT
263
+ human_id,
264
+ full_name,
265
+ 1 - (embedding <=> '{embedding_str}'::vector) as similarity
266
+ FROM human_embeddings
267
+ ORDER BY embedding <=> '{embedding_str}'::vector
268
+ LIMIT :limit
269
+ """)
270
+ human_results = session.execute(sql, {"limit": limit}).fetchall()
271
+
272
+ for row in human_results:
273
+ results.append({
274
+ "type": "human",
275
+ "id": row[0],
276
+ "name": row[1],
277
+ "similarity": float(row[2])
278
+ })
279
+
280
+ # Search teams
281
+ # Note: Using raw SQL for pgvector distance operator (<=>)
282
+ # Using format() for embedding since it needs ::vector cast
283
+ if entity_type in ("team", "all"):
284
+ sql = text(f"""
285
+ SELECT
286
+ team_id,
287
+ team_name,
288
+ 1 - (embedding <=> '{embedding_str}'::vector) as similarity
289
+ FROM team_embeddings
290
+ ORDER BY embedding <=> '{embedding_str}'::vector
291
+ LIMIT :limit
292
+ """)
293
+ team_results = session.execute(sql, {"limit": limit}).fetchall()
294
+
295
+ for row in team_results:
296
+ results.append({
297
+ "type": "team",
298
+ "id": row[0],
299
+ "name": row[1],
300
+ "similarity": float(row[2])
301
+ })
302
+
303
+ # Sort by similarity descending and limit
304
+ results.sort(key=lambda x: x["similarity"], reverse=True)
305
+ return results[:limit]
306
+
307
+ except Exception as e:
308
+ logger.error(f"Semantic search failed for query '{query}': {e}")
309
+ return []