hockey-blast-common-lib 0.1.63__py3-none-any.whl → 0.1.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hockey_blast_common_lib/aggregate_all_stats.py +7 -4
- hockey_blast_common_lib/aggregate_goalie_stats.py +301 -107
- hockey_blast_common_lib/aggregate_h2h_stats.py +64 -33
- hockey_blast_common_lib/aggregate_human_stats.py +565 -280
- hockey_blast_common_lib/aggregate_referee_stats.py +286 -135
- hockey_blast_common_lib/aggregate_s2s_stats.py +85 -25
- hockey_blast_common_lib/aggregate_scorekeeper_stats.py +228 -113
- hockey_blast_common_lib/aggregate_skater_stats.py +561 -238
- hockey_blast_common_lib/assign_skater_skill.py +21 -11
- hockey_blast_common_lib/db_connection.py +59 -8
- hockey_blast_common_lib/embedding_utils.py +309 -0
- hockey_blast_common_lib/h2h_models.py +150 -56
- hockey_blast_common_lib/models.py +305 -150
- hockey_blast_common_lib/options.py +30 -15
- hockey_blast_common_lib/progress_utils.py +21 -13
- hockey_blast_common_lib/skills_in_divisions.py +170 -33
- hockey_blast_common_lib/skills_propagation.py +164 -70
- hockey_blast_common_lib/stats_models.py +489 -245
- hockey_blast_common_lib/stats_utils.py +6 -3
- hockey_blast_common_lib/utils.py +89 -25
- hockey_blast_common_lib/wsgi.py +7 -5
- {hockey_blast_common_lib-0.1.63.dist-info → hockey_blast_common_lib-0.1.64.dist-info}/METADATA +1 -1
- hockey_blast_common_lib-0.1.64.dist-info/RECORD +29 -0
- hockey_blast_common_lib-0.1.63.dist-info/RECORD +0 -28
- {hockey_blast_common_lib-0.1.63.dist-info → hockey_blast_common_lib-0.1.64.dist-info}/WHEEL +0 -0
- {hockey_blast_common_lib-0.1.63.dist-info → hockey_blast_common_lib-0.1.64.dist-info}/top_level.txt +0 -0
|
@@ -1,45 +1,50 @@
|
|
|
1
|
-
import
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
2
3
|
|
|
3
4
|
# Add the package directory to the Python path
|
|
4
5
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
5
6
|
|
|
6
|
-
from hockey_blast_common_lib.models import Human, Level
|
|
7
|
-
from hockey_blast_common_lib.stats_models import LevelStatsSkater
|
|
8
7
|
from hockey_blast_common_lib.db_connection import create_session
|
|
8
|
+
from hockey_blast_common_lib.models import Human, Level
|
|
9
9
|
from hockey_blast_common_lib.progress_utils import create_progress_tracker
|
|
10
|
+
from hockey_blast_common_lib.stats_models import LevelStatsSkater
|
|
11
|
+
|
|
10
12
|
|
|
11
13
|
def calculate_skater_skill_value(session, level_stats):
|
|
12
|
-
min_skill_value = float(
|
|
14
|
+
min_skill_value = float("inf") # Start with infinity since we want the minimum
|
|
13
15
|
|
|
14
16
|
for stat in level_stats:
|
|
15
17
|
level = session.query(Level).filter(Level.id == stat.level_id).first()
|
|
16
18
|
if not level or level.skill_value < 0:
|
|
17
19
|
continue
|
|
18
20
|
level_skill_value = level.skill_value
|
|
19
|
-
|
|
21
|
+
|
|
20
22
|
# Fix critical bug: Invert rank ratios so better players (lower ranks) get higher skill values
|
|
21
23
|
# Rank 1 (best) should get factor close to 1.0, worst rank should get factor close to 0.0
|
|
22
24
|
if stat.total_in_rank > 1:
|
|
23
|
-
ppg_skill_factor = 1 - (stat.points_per_game_rank - 1) / (
|
|
25
|
+
ppg_skill_factor = 1 - (stat.points_per_game_rank - 1) / (
|
|
26
|
+
stat.total_in_rank - 1
|
|
27
|
+
)
|
|
24
28
|
else:
|
|
25
29
|
ppg_skill_factor = 1.0 # Only one player in level
|
|
26
|
-
|
|
30
|
+
|
|
27
31
|
# Apply skill adjustment: range from 0.8 to 1.2 of level base skill
|
|
28
32
|
# Since lower skill_value is better: Best player gets 0.8x (closer to better levels), worst gets 1.2x
|
|
29
33
|
skill_adjustment = 1.3 - 0.2 * ppg_skill_factor
|
|
30
34
|
skill_value = level_skill_value * skill_adjustment
|
|
31
|
-
|
|
35
|
+
|
|
32
36
|
# Take the minimum skill value across all levels the player has played in (lower is better)
|
|
33
37
|
min_skill_value = min(min_skill_value, skill_value)
|
|
34
38
|
|
|
35
|
-
return min_skill_value if min_skill_value != float(
|
|
39
|
+
return min_skill_value if min_skill_value != float("inf") else 0
|
|
40
|
+
|
|
36
41
|
|
|
37
42
|
def assign_skater_skill_values():
|
|
38
43
|
session = create_session("boss")
|
|
39
44
|
|
|
40
45
|
humans = session.query(Human).all()
|
|
41
46
|
total_humans = len(humans)
|
|
42
|
-
|
|
47
|
+
|
|
43
48
|
# Create progress tracker
|
|
44
49
|
progress = create_progress_tracker(total_humans, "Assigning skater skill values")
|
|
45
50
|
|
|
@@ -47,7 +52,11 @@ def assign_skater_skill_values():
|
|
|
47
52
|
updates_count = 0
|
|
48
53
|
|
|
49
54
|
for i, human in enumerate(humans):
|
|
50
|
-
level_stats =
|
|
55
|
+
level_stats = (
|
|
56
|
+
session.query(LevelStatsSkater)
|
|
57
|
+
.filter(LevelStatsSkater.human_id == human.id)
|
|
58
|
+
.all()
|
|
59
|
+
)
|
|
51
60
|
if level_stats:
|
|
52
61
|
skater_skill_value = calculate_skater_skill_value(session, level_stats)
|
|
53
62
|
human.skater_skill_value = skater_skill_value
|
|
@@ -65,5 +74,6 @@ def assign_skater_skill_values():
|
|
|
65
74
|
|
|
66
75
|
print("Skater skill values have been assigned to all humans.")
|
|
67
76
|
|
|
77
|
+
|
|
68
78
|
if __name__ == "__main__":
|
|
69
79
|
assign_skater_skill_values()
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
2
4
|
from sqlalchemy import create_engine
|
|
3
5
|
from sqlalchemy.orm import sessionmaker
|
|
4
|
-
from dotenv import load_dotenv
|
|
5
6
|
|
|
6
7
|
# Load environment variables from .env file in the root directory of THE PROJECT (not this library)
|
|
7
8
|
load_dotenv()
|
|
@@ -13,36 +14,86 @@ DB_PARAMS = {
|
|
|
13
14
|
"user": os.getenv("DB_USER", "frontend_user"),
|
|
14
15
|
"password": os.getenv("DB_PASSWORD", "hockey-blast"),
|
|
15
16
|
"host": os.getenv("DB_HOST", "localhost"),
|
|
16
|
-
"port": int(os.getenv("DB_PORT", 5432))
|
|
17
|
+
"port": int(os.getenv("DB_PORT", 5432)),
|
|
17
18
|
},
|
|
18
|
-
|
|
19
19
|
"frontend-sample-db": {
|
|
20
20
|
"dbname": os.getenv("DB_NAME_SAMPLE", "hockey_blast_sample"),
|
|
21
21
|
"user": os.getenv("DB_USER", "frontend_user"),
|
|
22
22
|
"password": os.getenv("DB_PASSWORD", "hockey-blast"),
|
|
23
23
|
"host": os.getenv("DB_HOST", "localhost"),
|
|
24
|
-
"port": int(os.getenv("DB_PORT", 5432))
|
|
24
|
+
"port": int(os.getenv("DB_PORT", 5432)),
|
|
25
25
|
},
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
# MCP server uses read-only frontend_user (same as frontend)
|
|
27
|
+
"mcp": {
|
|
28
|
+
"dbname": os.getenv("DB_NAME", "hockey_blast"),
|
|
29
|
+
"user": os.getenv("DB_USER", "frontend_user"),
|
|
30
|
+
"password": os.getenv("DB_PASSWORD", "hockey-blast"),
|
|
31
|
+
"host": os.getenv("DB_HOST", "localhost"),
|
|
32
|
+
"port": int(os.getenv("DB_PORT", 5432)),
|
|
33
|
+
},
|
|
34
|
+
# The section below is to handle recovery of sample DB where boss user is present, to avoid warnings and errors
|
|
35
|
+
# TODO: Maybe figure out a way to do backup without it and make frontend_user own the sample?
|
|
29
36
|
"boss": {
|
|
30
37
|
"dbname": os.getenv("DB_NAME", "hockey_blast"),
|
|
31
38
|
"user": os.getenv("DB_USER_BOSS", "boss"),
|
|
32
39
|
"password": os.getenv("DB_PASSWORD_BOSS", "boss"),
|
|
33
40
|
"host": os.getenv("DB_HOST", "localhost"),
|
|
34
|
-
"port": int(os.getenv("DB_PORT", 5432))
|
|
41
|
+
"port": int(os.getenv("DB_PORT", 5432)),
|
|
35
42
|
},
|
|
36
43
|
}
|
|
37
44
|
|
|
45
|
+
|
|
38
46
|
def get_db_params(config_name):
|
|
39
47
|
if config_name not in DB_PARAMS:
|
|
40
48
|
raise ValueError(f"Invalid organization: {config_name}")
|
|
41
49
|
return DB_PARAMS[config_name]
|
|
42
50
|
|
|
51
|
+
|
|
43
52
|
def create_session(config_name):
|
|
53
|
+
"""
|
|
54
|
+
Create a database session using the specified configuration.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
config_name: One of "frontend", "frontend-sample-db", "mcp", "boss"
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
SQLAlchemy session object
|
|
61
|
+
"""
|
|
44
62
|
db_params = get_db_params(config_name)
|
|
45
63
|
db_url = f"postgresql://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['dbname']}"
|
|
46
64
|
engine = create_engine(db_url)
|
|
47
65
|
Session = sessionmaker(bind=engine)
|
|
48
66
|
return Session()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# Convenience functions for standardized session creation
|
|
70
|
+
def create_session_frontend():
|
|
71
|
+
"""
|
|
72
|
+
Create read-only session for frontend web application.
|
|
73
|
+
Uses frontend_user with limited permissions.
|
|
74
|
+
"""
|
|
75
|
+
return create_session("frontend")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def create_session_mcp():
|
|
79
|
+
"""
|
|
80
|
+
Create read-only session for MCP server.
|
|
81
|
+
Uses frontend_user with limited permissions (same as frontend).
|
|
82
|
+
"""
|
|
83
|
+
return create_session("mcp")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def create_session_frontend_sampledb():
|
|
87
|
+
"""
|
|
88
|
+
Create read-only session for frontend sample database.
|
|
89
|
+
Uses frontend_user with limited permissions.
|
|
90
|
+
"""
|
|
91
|
+
return create_session("frontend-sample-db")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def create_session_boss():
|
|
95
|
+
"""
|
|
96
|
+
Create full-access session for pipeline operations.
|
|
97
|
+
WARNING: Has write permissions. Use only in pipeline scripts.
|
|
98
|
+
"""
|
|
99
|
+
return create_session("boss")
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embedding utilities for vector search using AWS Bedrock Titan.
|
|
3
|
+
|
|
4
|
+
This module provides functions to generate embeddings for text using AWS Bedrock's
|
|
5
|
+
Titan embedding model and store them in the database for semantic search.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from typing import List, Optional
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# AWS Bedrock configuration
|
|
15
|
+
BEDROCK_REGION = "us-west-2"
|
|
16
|
+
BEDROCK_EMBEDDING_MODEL = "amazon.titan-embed-text-v2:0"
|
|
17
|
+
EMBEDDING_DIMENSION = 1024 # Titan v2 produces 1024-dimensional embeddings
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def generate_embedding(text: str) -> Optional[List[float]]:
|
|
21
|
+
"""
|
|
22
|
+
Generate embedding vector for text using AWS Bedrock Titan.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
text: Text to embed (e.g., "Pavel Kletskov" or "Good Guys")
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
List of floats representing the embedding vector (1024 dimensions)
|
|
29
|
+
None if embedding generation fails
|
|
30
|
+
"""
|
|
31
|
+
if not text or not text.strip():
|
|
32
|
+
logger.warning("Empty text provided to generate_embedding")
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
import boto3
|
|
37
|
+
|
|
38
|
+
bedrock = boto3.client(
|
|
39
|
+
service_name="bedrock-runtime",
|
|
40
|
+
region_name=BEDROCK_REGION
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Titan embedding API request format
|
|
44
|
+
request_body = {
|
|
45
|
+
"inputText": text.strip()
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
response = bedrock.invoke_model(
|
|
49
|
+
modelId=BEDROCK_EMBEDDING_MODEL,
|
|
50
|
+
body=json.dumps(request_body)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
response_body = json.loads(response["body"].read())
|
|
54
|
+
embedding = response_body.get("embedding")
|
|
55
|
+
|
|
56
|
+
if not embedding:
|
|
57
|
+
logger.error(f"No embedding returned for text: {text[:50]}")
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
if len(embedding) != EMBEDDING_DIMENSION:
|
|
61
|
+
logger.error(
|
|
62
|
+
f"Unexpected embedding dimension: {len(embedding)} (expected {EMBEDDING_DIMENSION})"
|
|
63
|
+
)
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
return embedding
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
logger.error(f"Failed to generate embedding for '{text[:50]}': {e}")
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def update_human_embedding(session, human_id: int, full_name: str) -> bool:
|
|
74
|
+
"""
|
|
75
|
+
Generate and store embedding for a human (player/referee/scorekeeper).
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
session: SQLAlchemy session
|
|
79
|
+
human_id: Human ID in database
|
|
80
|
+
full_name: Full name like "Pavel Kletskov"
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
True if embedding was successfully stored, False otherwise
|
|
84
|
+
"""
|
|
85
|
+
from datetime import datetime
|
|
86
|
+
from hockey_blast_common_lib.models import HumanEmbedding
|
|
87
|
+
|
|
88
|
+
if not full_name or not full_name.strip():
|
|
89
|
+
logger.warning(f"Empty full_name for human_id {human_id}")
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
# Generate embedding
|
|
93
|
+
embedding = generate_embedding(full_name.strip())
|
|
94
|
+
if not embedding:
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
# Convert Python list to PostgreSQL array format for vector type
|
|
99
|
+
embedding_str = "[" + ",".join(str(x) for x in embedding) + "]"
|
|
100
|
+
|
|
101
|
+
# Check if embedding already exists using ORM
|
|
102
|
+
existing = session.query(HumanEmbedding).filter_by(human_id=human_id).first()
|
|
103
|
+
|
|
104
|
+
if existing:
|
|
105
|
+
# Update existing record
|
|
106
|
+
# Note: Need raw SQL for vector type casting with ::vector
|
|
107
|
+
# Using format() for embedding since it needs ::vector cast
|
|
108
|
+
from sqlalchemy import text
|
|
109
|
+
sql = text(f"""
|
|
110
|
+
UPDATE human_embeddings
|
|
111
|
+
SET full_name = :full_name,
|
|
112
|
+
embedding = '{embedding_str}'::vector,
|
|
113
|
+
updated_at = :updated_at
|
|
114
|
+
WHERE human_id = :human_id
|
|
115
|
+
""")
|
|
116
|
+
session.execute(
|
|
117
|
+
sql,
|
|
118
|
+
{
|
|
119
|
+
"human_id": human_id,
|
|
120
|
+
"full_name": full_name.strip(),
|
|
121
|
+
"updated_at": datetime.utcnow()
|
|
122
|
+
}
|
|
123
|
+
)
|
|
124
|
+
else:
|
|
125
|
+
# Insert new record
|
|
126
|
+
# Note: Need raw SQL for vector type casting with ::vector
|
|
127
|
+
# Using format() for embedding since it needs ::vector cast
|
|
128
|
+
from sqlalchemy import text
|
|
129
|
+
sql = text(f"""
|
|
130
|
+
INSERT INTO human_embeddings (human_id, full_name, embedding, updated_at)
|
|
131
|
+
VALUES (:human_id, :full_name, '{embedding_str}'::vector, :updated_at)
|
|
132
|
+
""")
|
|
133
|
+
session.execute(
|
|
134
|
+
sql,
|
|
135
|
+
{
|
|
136
|
+
"human_id": human_id,
|
|
137
|
+
"full_name": full_name.strip(),
|
|
138
|
+
"updated_at": datetime.utcnow()
|
|
139
|
+
}
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
logger.info(f"Updated embedding for human_id={human_id}, name='{full_name}'")
|
|
143
|
+
return True
|
|
144
|
+
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.error(f"Failed to store embedding for human_id={human_id}: {e}")
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def update_team_embedding(session, team_id: int, team_name: str) -> bool:
|
|
151
|
+
"""
|
|
152
|
+
Generate and store embedding for a team.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
session: SQLAlchemy session
|
|
156
|
+
team_id: Team ID in database
|
|
157
|
+
team_name: Team name like "Good Guys"
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
True if embedding was successfully stored, False otherwise
|
|
161
|
+
"""
|
|
162
|
+
from datetime import datetime
|
|
163
|
+
from hockey_blast_common_lib.models import TeamEmbedding
|
|
164
|
+
|
|
165
|
+
if not team_name or not team_name.strip():
|
|
166
|
+
logger.warning(f"Empty team_name for team_id {team_id}")
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
# Generate embedding
|
|
170
|
+
embedding = generate_embedding(team_name.strip())
|
|
171
|
+
if not embedding:
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
# Convert Python list to PostgreSQL array format for vector type
|
|
176
|
+
embedding_str = "[" + ",".join(str(x) for x in embedding) + "]"
|
|
177
|
+
|
|
178
|
+
# Check if embedding already exists using ORM
|
|
179
|
+
existing = session.query(TeamEmbedding).filter_by(team_id=team_id).first()
|
|
180
|
+
|
|
181
|
+
if existing:
|
|
182
|
+
# Update existing record
|
|
183
|
+
# Note: Need raw SQL for vector type casting with ::vector
|
|
184
|
+
# Using format() for embedding since it needs ::vector cast
|
|
185
|
+
from sqlalchemy import text
|
|
186
|
+
sql = text(f"""
|
|
187
|
+
UPDATE team_embeddings
|
|
188
|
+
SET team_name = :team_name,
|
|
189
|
+
embedding = '{embedding_str}'::vector,
|
|
190
|
+
updated_at = :updated_at
|
|
191
|
+
WHERE team_id = :team_id
|
|
192
|
+
""")
|
|
193
|
+
session.execute(
|
|
194
|
+
sql,
|
|
195
|
+
{
|
|
196
|
+
"team_id": team_id,
|
|
197
|
+
"team_name": team_name.strip(),
|
|
198
|
+
"updated_at": datetime.utcnow()
|
|
199
|
+
}
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
# Insert new record
|
|
203
|
+
# Note: Need raw SQL for vector type casting with ::vector
|
|
204
|
+
# Using format() for embedding since it needs ::vector cast
|
|
205
|
+
from sqlalchemy import text
|
|
206
|
+
sql = text(f"""
|
|
207
|
+
INSERT INTO team_embeddings (team_id, team_name, embedding, updated_at)
|
|
208
|
+
VALUES (:team_id, :team_name, '{embedding_str}'::vector, :updated_at)
|
|
209
|
+
""")
|
|
210
|
+
session.execute(
|
|
211
|
+
sql,
|
|
212
|
+
{
|
|
213
|
+
"team_id": team_id,
|
|
214
|
+
"team_name": team_name.strip(),
|
|
215
|
+
"updated_at": datetime.utcnow()
|
|
216
|
+
}
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
logger.info(f"Updated embedding for team_id={team_id}, name='{team_name}'")
|
|
220
|
+
return True
|
|
221
|
+
|
|
222
|
+
except Exception as e:
|
|
223
|
+
logger.error(f"Failed to store embedding for team_id={team_id}: {e}")
|
|
224
|
+
return False
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def search_embeddings_semantic(
|
|
228
|
+
session,
|
|
229
|
+
query: str,
|
|
230
|
+
entity_type: str = "all",
|
|
231
|
+
limit: int = 10
|
|
232
|
+
) -> List[dict]:
|
|
233
|
+
"""
|
|
234
|
+
Semantic search across human and team embeddings.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
session: SQLAlchemy session
|
|
238
|
+
query: Search query (e.g., "good guy", "pavel")
|
|
239
|
+
entity_type: "human", "team", or "all"
|
|
240
|
+
limit: Maximum number of results
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
List of dicts with keys: type, id, name, similarity
|
|
244
|
+
"""
|
|
245
|
+
from sqlalchemy import text
|
|
246
|
+
|
|
247
|
+
# Generate query embedding
|
|
248
|
+
query_embedding = generate_embedding(query)
|
|
249
|
+
if not query_embedding:
|
|
250
|
+
logger.error(f"Failed to generate embedding for query: {query}")
|
|
251
|
+
return []
|
|
252
|
+
|
|
253
|
+
embedding_str = "[" + ",".join(str(x) for x in query_embedding) + "]"
|
|
254
|
+
results = []
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
# Search humans
|
|
258
|
+
# Note: Using raw SQL for pgvector distance operator (<=>)
|
|
259
|
+
# Using format() for embedding since it needs ::vector cast
|
|
260
|
+
if entity_type in ("human", "all"):
|
|
261
|
+
sql = text(f"""
|
|
262
|
+
SELECT
|
|
263
|
+
human_id,
|
|
264
|
+
full_name,
|
|
265
|
+
1 - (embedding <=> '{embedding_str}'::vector) as similarity
|
|
266
|
+
FROM human_embeddings
|
|
267
|
+
ORDER BY embedding <=> '{embedding_str}'::vector
|
|
268
|
+
LIMIT :limit
|
|
269
|
+
""")
|
|
270
|
+
human_results = session.execute(sql, {"limit": limit}).fetchall()
|
|
271
|
+
|
|
272
|
+
for row in human_results:
|
|
273
|
+
results.append({
|
|
274
|
+
"type": "human",
|
|
275
|
+
"id": row[0],
|
|
276
|
+
"name": row[1],
|
|
277
|
+
"similarity": float(row[2])
|
|
278
|
+
})
|
|
279
|
+
|
|
280
|
+
# Search teams
|
|
281
|
+
# Note: Using raw SQL for pgvector distance operator (<=>)
|
|
282
|
+
# Using format() for embedding since it needs ::vector cast
|
|
283
|
+
if entity_type in ("team", "all"):
|
|
284
|
+
sql = text(f"""
|
|
285
|
+
SELECT
|
|
286
|
+
team_id,
|
|
287
|
+
team_name,
|
|
288
|
+
1 - (embedding <=> '{embedding_str}'::vector) as similarity
|
|
289
|
+
FROM team_embeddings
|
|
290
|
+
ORDER BY embedding <=> '{embedding_str}'::vector
|
|
291
|
+
LIMIT :limit
|
|
292
|
+
""")
|
|
293
|
+
team_results = session.execute(sql, {"limit": limit}).fetchall()
|
|
294
|
+
|
|
295
|
+
for row in team_results:
|
|
296
|
+
results.append({
|
|
297
|
+
"type": "team",
|
|
298
|
+
"id": row[0],
|
|
299
|
+
"name": row[1],
|
|
300
|
+
"similarity": float(row[2])
|
|
301
|
+
})
|
|
302
|
+
|
|
303
|
+
# Sort by similarity descending and limit
|
|
304
|
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
305
|
+
return results[:limit]
|
|
306
|
+
|
|
307
|
+
except Exception as e:
|
|
308
|
+
logger.error(f"Semantic search failed for query '{query}': {e}")
|
|
309
|
+
return []
|