orchestrator-core 4.5.3__py3-none-any.whl → 4.6.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +1 -1
- orchestrator/agentic_app.py +1 -21
- orchestrator/api/api_v1/api.py +5 -0
- orchestrator/api/api_v1/endpoints/agent.py +50 -0
- orchestrator/api/api_v1/endpoints/search.py +120 -201
- orchestrator/cli/database.py +3 -0
- orchestrator/cli/generate.py +11 -4
- orchestrator/cli/generator/generator/migration.py +7 -3
- orchestrator/cli/scheduler.py +15 -22
- orchestrator/cli/search/resize_embedding.py +28 -22
- orchestrator/cli/search/speedtest.py +4 -6
- orchestrator/db/__init__.py +6 -0
- orchestrator/db/models.py +75 -0
- orchestrator/migrations/helpers.py +46 -38
- orchestrator/schedules/scheduler.py +32 -15
- orchestrator/schedules/validate_products.py +1 -1
- orchestrator/schemas/search.py +8 -85
- orchestrator/search/agent/__init__.py +2 -2
- orchestrator/search/agent/agent.py +25 -29
- orchestrator/search/agent/json_patch.py +51 -0
- orchestrator/search/agent/prompts.py +35 -9
- orchestrator/search/agent/state.py +28 -2
- orchestrator/search/agent/tools.py +192 -53
- orchestrator/search/core/exceptions.py +6 -0
- orchestrator/search/core/types.py +1 -0
- orchestrator/search/export.py +199 -0
- orchestrator/search/indexing/indexer.py +13 -4
- orchestrator/search/indexing/registry.py +14 -1
- orchestrator/search/llm_migration.py +55 -0
- orchestrator/search/retrieval/__init__.py +3 -2
- orchestrator/search/retrieval/builder.py +5 -1
- orchestrator/search/retrieval/engine.py +66 -23
- orchestrator/search/retrieval/pagination.py +46 -56
- orchestrator/search/retrieval/query_state.py +61 -0
- orchestrator/search/retrieval/retrievers/base.py +26 -40
- orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
- orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
- orchestrator/search/retrieval/retrievers/semantic.py +9 -8
- orchestrator/search/retrieval/retrievers/structured.py +6 -6
- orchestrator/search/schemas/parameters.py +17 -13
- orchestrator/search/schemas/results.py +4 -1
- orchestrator/settings.py +1 -0
- orchestrator/utils/auth.py +3 -2
- {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/METADATA +3 -3
- {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/RECORD +47 -43
- {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from uuid import UUID
|
|
15
|
+
|
|
16
|
+
from sqlalchemy import select
|
|
17
|
+
from sqlalchemy.orm import selectinload
|
|
18
|
+
|
|
19
|
+
from orchestrator.db import (
|
|
20
|
+
ProcessTable,
|
|
21
|
+
ProductTable,
|
|
22
|
+
SubscriptionTable,
|
|
23
|
+
WorkflowTable,
|
|
24
|
+
db,
|
|
25
|
+
)
|
|
26
|
+
from orchestrator.search.core.types import EntityType
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def fetch_subscription_export_data(entity_ids: list[str]) -> list[dict]:
|
|
30
|
+
"""Fetch subscription data for export.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
entity_ids: List of subscription IDs as strings
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List of flattened subscription dictionaries with fields:
|
|
37
|
+
subscription_id, description, status, insync, start_date, end_date,
|
|
38
|
+
note, product_name, tag, product_type, customer_id
|
|
39
|
+
"""
|
|
40
|
+
stmt = (
|
|
41
|
+
select(
|
|
42
|
+
SubscriptionTable.subscription_id,
|
|
43
|
+
SubscriptionTable.description,
|
|
44
|
+
SubscriptionTable.status,
|
|
45
|
+
SubscriptionTable.insync,
|
|
46
|
+
SubscriptionTable.start_date,
|
|
47
|
+
SubscriptionTable.end_date,
|
|
48
|
+
SubscriptionTable.note,
|
|
49
|
+
SubscriptionTable.customer_id,
|
|
50
|
+
ProductTable.name.label("product_name"),
|
|
51
|
+
ProductTable.tag,
|
|
52
|
+
ProductTable.product_type,
|
|
53
|
+
)
|
|
54
|
+
.join(ProductTable, SubscriptionTable.product_id == ProductTable.product_id)
|
|
55
|
+
.filter(SubscriptionTable.subscription_id.in_([UUID(sid) for sid in entity_ids]))
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
rows = db.session.execute(stmt).all()
|
|
59
|
+
|
|
60
|
+
return [
|
|
61
|
+
{
|
|
62
|
+
"subscription_id": str(row.subscription_id),
|
|
63
|
+
"description": row.description,
|
|
64
|
+
"status": row.status,
|
|
65
|
+
"insync": row.insync,
|
|
66
|
+
"start_date": row.start_date.isoformat() if row.start_date else None,
|
|
67
|
+
"end_date": row.end_date.isoformat() if row.end_date else None,
|
|
68
|
+
"note": row.note,
|
|
69
|
+
"product_name": row.product_name,
|
|
70
|
+
"tag": row.tag,
|
|
71
|
+
"product_type": row.product_type,
|
|
72
|
+
"customer_id": row.customer_id,
|
|
73
|
+
}
|
|
74
|
+
for row in rows
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def fetch_workflow_export_data(entity_ids: list[str]) -> list[dict]:
|
|
79
|
+
"""Fetch workflow data for export.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
entity_ids: List of workflow names as strings
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
List of flattened workflow dictionaries with fields:
|
|
86
|
+
name, description, created_at, product_names (comma-separated),
|
|
87
|
+
product_ids (comma-separated), product_types (comma-separated)
|
|
88
|
+
"""
|
|
89
|
+
stmt = (
|
|
90
|
+
select(WorkflowTable).options(selectinload(WorkflowTable.products)).filter(WorkflowTable.name.in_(entity_ids))
|
|
91
|
+
)
|
|
92
|
+
workflows = db.session.scalars(stmt).all()
|
|
93
|
+
|
|
94
|
+
return [
|
|
95
|
+
{
|
|
96
|
+
"name": w.name,
|
|
97
|
+
"description": w.description,
|
|
98
|
+
"created_at": w.created_at.isoformat() if w.created_at else None,
|
|
99
|
+
"product_names": ", ".join(p.name for p in w.products),
|
|
100
|
+
"product_ids": ", ".join(str(p.product_id) for p in w.products),
|
|
101
|
+
"product_types": ", ".join(p.product_type for p in w.products),
|
|
102
|
+
}
|
|
103
|
+
for w in workflows
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def fetch_product_export_data(entity_ids: list[str]) -> list[dict]:
|
|
108
|
+
"""Fetch product data for export.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
entity_ids: List of product IDs as strings
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
List of flattened product dictionaries with fields:
|
|
115
|
+
product_id, name, product_type, tag, description, status, created_at
|
|
116
|
+
"""
|
|
117
|
+
stmt = (
|
|
118
|
+
select(ProductTable)
|
|
119
|
+
.options(
|
|
120
|
+
selectinload(ProductTable.workflows),
|
|
121
|
+
selectinload(ProductTable.fixed_inputs),
|
|
122
|
+
selectinload(ProductTable.product_blocks),
|
|
123
|
+
)
|
|
124
|
+
.filter(ProductTable.product_id.in_([UUID(pid) for pid in entity_ids]))
|
|
125
|
+
)
|
|
126
|
+
products = db.session.scalars(stmt).all()
|
|
127
|
+
|
|
128
|
+
return [
|
|
129
|
+
{
|
|
130
|
+
"product_id": str(p.product_id),
|
|
131
|
+
"name": p.name,
|
|
132
|
+
"product_type": p.product_type,
|
|
133
|
+
"tag": p.tag,
|
|
134
|
+
"description": p.description,
|
|
135
|
+
"status": p.status,
|
|
136
|
+
"created_at": p.created_at.isoformat() if p.created_at else None,
|
|
137
|
+
}
|
|
138
|
+
for p in products
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def fetch_process_export_data(entity_ids: list[str]) -> list[dict]:
|
|
143
|
+
"""Fetch process data for export.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
entity_ids: List of process IDs as strings
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
List of flattened process dictionaries with fields:
|
|
150
|
+
process_id, workflow_name, workflow_id, last_status, is_task,
|
|
151
|
+
created_by, started_at, last_modified_at, last_step
|
|
152
|
+
"""
|
|
153
|
+
stmt = (
|
|
154
|
+
select(ProcessTable)
|
|
155
|
+
.options(selectinload(ProcessTable.workflow))
|
|
156
|
+
.filter(ProcessTable.process_id.in_([UUID(pid) for pid in entity_ids]))
|
|
157
|
+
)
|
|
158
|
+
processes = db.session.scalars(stmt).all()
|
|
159
|
+
|
|
160
|
+
return [
|
|
161
|
+
{
|
|
162
|
+
"process_id": str(p.process_id),
|
|
163
|
+
"workflow_name": p.workflow.name if p.workflow else None,
|
|
164
|
+
"workflow_id": str(p.workflow_id),
|
|
165
|
+
"last_status": p.last_status,
|
|
166
|
+
"is_task": p.is_task,
|
|
167
|
+
"created_by": p.created_by,
|
|
168
|
+
"started_at": p.started_at.isoformat() if p.started_at else None,
|
|
169
|
+
"last_modified_at": p.last_modified_at.isoformat() if p.last_modified_at else None,
|
|
170
|
+
"last_step": p.last_step,
|
|
171
|
+
}
|
|
172
|
+
for p in processes
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def fetch_export_data(entity_type: EntityType, entity_ids: list[str]) -> list[dict]:
|
|
177
|
+
"""Fetch export data for any entity type.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
entity_type: The type of entities to fetch
|
|
181
|
+
entity_ids: List of entity IDs/names as strings
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of flattened entity dictionaries ready for CSV export
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
ValueError: If entity_type is not supported
|
|
188
|
+
"""
|
|
189
|
+
match entity_type:
|
|
190
|
+
case EntityType.SUBSCRIPTION:
|
|
191
|
+
return fetch_subscription_export_data(entity_ids)
|
|
192
|
+
case EntityType.WORKFLOW:
|
|
193
|
+
return fetch_workflow_export_data(entity_ids)
|
|
194
|
+
case EntityType.PRODUCT:
|
|
195
|
+
return fetch_product_export_data(entity_ids)
|
|
196
|
+
case EntityType.PROCESS:
|
|
197
|
+
return fetch_process_export_data(entity_ids)
|
|
198
|
+
case _:
|
|
199
|
+
raise ValueError(f"Unsupported entity type: {entity_type}")
|
|
@@ -96,6 +96,7 @@ class Indexer:
|
|
|
96
96
|
self.chunk_size = chunk_size
|
|
97
97
|
self.embedding_model = llm_settings.EMBEDDING_MODEL
|
|
98
98
|
self.logger = logger.bind(entity_kind=config.entity_kind.value)
|
|
99
|
+
self._entity_titles: dict[str, str] = {}
|
|
99
100
|
|
|
100
101
|
def run(self, entities: Iterable[DatabaseEntity]) -> int:
|
|
101
102
|
"""Orchestrates the entire indexing process."""
|
|
@@ -138,6 +139,8 @@ class Indexer:
|
|
|
138
139
|
if not entity_chunk:
|
|
139
140
|
return 0, 0
|
|
140
141
|
|
|
142
|
+
self._entity_titles.clear()
|
|
143
|
+
|
|
141
144
|
fields_to_upsert, paths_to_delete, identical_count = self._determine_changes(entity_chunk, session)
|
|
142
145
|
|
|
143
146
|
if paths_to_delete and session is not None:
|
|
@@ -174,12 +177,15 @@ class Indexer:
|
|
|
174
177
|
entity, pk_name=self.config.pk_name, root_name=self.config.root_name
|
|
175
178
|
)
|
|
176
179
|
|
|
180
|
+
entity_title = self.config.get_title_from_fields(current_fields)
|
|
181
|
+
self._entity_titles[entity_id] = entity_title
|
|
182
|
+
|
|
177
183
|
entity_hashes = existing_hashes.get(entity_id, {})
|
|
178
184
|
current_paths = set()
|
|
179
185
|
|
|
180
186
|
for field in current_fields:
|
|
181
187
|
current_paths.add(field.path)
|
|
182
|
-
current_hash = self._compute_content_hash(field.path, field.value, field.value_type)
|
|
188
|
+
current_hash = self._compute_content_hash(field.path, field.value, field.value_type, entity_title)
|
|
183
189
|
if field.path not in entity_hashes or entity_hashes[field.path] != current_hash:
|
|
184
190
|
fields_to_upsert.append((entity_id, field))
|
|
185
191
|
else:
|
|
@@ -301,21 +307,23 @@ class Indexer:
|
|
|
301
307
|
return f"{field.path}: {str(field.value)}"
|
|
302
308
|
|
|
303
309
|
@staticmethod
|
|
304
|
-
def _compute_content_hash(path: str, value: Any, value_type: Any) -> str:
|
|
310
|
+
def _compute_content_hash(path: str, value: Any, value_type: Any, entity_title: str = "") -> str:
|
|
305
311
|
v = "" if value is None else str(value)
|
|
306
|
-
content = f"{path}:{v}:{value_type}"
|
|
312
|
+
content = f"{path}:{v}:{value_type}:{entity_title}"
|
|
307
313
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
308
314
|
|
|
309
315
|
def _make_indexable_record(
|
|
310
316
|
self, field: ExtractedField, entity_id: str, embedding: list[float] | None
|
|
311
317
|
) -> IndexableRecord:
|
|
318
|
+
entity_title = self._entity_titles[entity_id]
|
|
312
319
|
return IndexableRecord(
|
|
313
320
|
entity_id=entity_id,
|
|
314
321
|
entity_type=self.config.entity_kind.value,
|
|
322
|
+
entity_title=entity_title,
|
|
315
323
|
path=Ltree(field.path),
|
|
316
324
|
value=field.value,
|
|
317
325
|
value_type=field.value_type,
|
|
318
|
-
content_hash=self._compute_content_hash(field.path, field.value, field.value_type),
|
|
326
|
+
content_hash=self._compute_content_hash(field.path, field.value, field.value_type, entity_title),
|
|
319
327
|
embedding=embedding if embedding else None,
|
|
320
328
|
)
|
|
321
329
|
|
|
@@ -326,6 +334,7 @@ class Indexer:
|
|
|
326
334
|
return stmt.on_conflict_do_update(
|
|
327
335
|
index_elements=[AiSearchIndex.entity_id, AiSearchIndex.path],
|
|
328
336
|
set_={
|
|
337
|
+
AiSearchIndex.entity_title: stmt.excluded.entity_title,
|
|
329
338
|
AiSearchIndex.value: stmt.excluded.value,
|
|
330
339
|
AiSearchIndex.value_type: stmt.excluded.value_type,
|
|
331
340
|
AiSearchIndex.content_hash: stmt.excluded.content_hash,
|
|
@@ -25,7 +25,7 @@ from orchestrator.db import (
|
|
|
25
25
|
WorkflowTable,
|
|
26
26
|
)
|
|
27
27
|
from orchestrator.db.database import BaseModel
|
|
28
|
-
from orchestrator.search.core.types import EntityType
|
|
28
|
+
from orchestrator.search.core.types import EntityType, ExtractedField
|
|
29
29
|
|
|
30
30
|
from .traverse import (
|
|
31
31
|
BaseTraverser,
|
|
@@ -48,6 +48,7 @@ class EntityConfig(Generic[ModelT]):
|
|
|
48
48
|
traverser: "type[BaseTraverser]"
|
|
49
49
|
pk_name: str
|
|
50
50
|
root_name: str
|
|
51
|
+
title_paths: list[str] # List of field paths to check for title (with fallback)
|
|
51
52
|
|
|
52
53
|
def get_all_query(self, entity_id: str | None = None) -> Query | Select:
|
|
53
54
|
query = self.table.query
|
|
@@ -56,6 +57,14 @@ class EntityConfig(Generic[ModelT]):
|
|
|
56
57
|
query = query.filter(pk_column == UUID(entity_id))
|
|
57
58
|
return query
|
|
58
59
|
|
|
60
|
+
def get_title_from_fields(self, fields: list[ExtractedField]) -> str:
|
|
61
|
+
"""Extract title from fields using configured paths."""
|
|
62
|
+
for title_path in self.title_paths:
|
|
63
|
+
for field in fields:
|
|
64
|
+
if field.path == title_path and field.value:
|
|
65
|
+
return str(field.value)
|
|
66
|
+
return "UNKNOWN"
|
|
67
|
+
|
|
59
68
|
|
|
60
69
|
@dataclass(frozen=True)
|
|
61
70
|
class WorkflowConfig(EntityConfig[WorkflowTable]):
|
|
@@ -76,6 +85,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
|
|
|
76
85
|
traverser=SubscriptionTraverser,
|
|
77
86
|
pk_name="subscription_id",
|
|
78
87
|
root_name="subscription",
|
|
88
|
+
title_paths=["subscription.description"],
|
|
79
89
|
),
|
|
80
90
|
EntityType.PRODUCT: EntityConfig(
|
|
81
91
|
entity_kind=EntityType.PRODUCT,
|
|
@@ -83,6 +93,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
|
|
|
83
93
|
traverser=ProductTraverser,
|
|
84
94
|
pk_name="product_id",
|
|
85
95
|
root_name="product",
|
|
96
|
+
title_paths=["product.description", "product.name"],
|
|
86
97
|
),
|
|
87
98
|
EntityType.PROCESS: EntityConfig(
|
|
88
99
|
entity_kind=EntityType.PROCESS,
|
|
@@ -90,6 +101,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
|
|
|
90
101
|
traverser=ProcessTraverser,
|
|
91
102
|
pk_name="process_id",
|
|
92
103
|
root_name="process",
|
|
104
|
+
title_paths=["process.workflow_name"],
|
|
93
105
|
),
|
|
94
106
|
EntityType.WORKFLOW: WorkflowConfig(
|
|
95
107
|
entity_kind=EntityType.WORKFLOW,
|
|
@@ -97,5 +109,6 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
|
|
|
97
109
|
traverser=WorkflowTraverser,
|
|
98
110
|
pk_name="workflow_id",
|
|
99
111
|
root_name="workflow",
|
|
112
|
+
title_paths=["workflow.description", "workflow.name"],
|
|
100
113
|
),
|
|
101
114
|
}
|
|
@@ -37,6 +37,7 @@ def run_migration(connection: Connection) -> None:
|
|
|
37
37
|
if llm_settings.LLM_FORCE_EXTENTION_MIGRATION or res.rowcount == 0:
|
|
38
38
|
# Create PostgreSQL extensions
|
|
39
39
|
logger.info("Attempting to run the extention creation;")
|
|
40
|
+
connection.execute(text('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";'))
|
|
40
41
|
connection.execute(text("CREATE EXTENSION IF NOT EXISTS ltree;"))
|
|
41
42
|
connection.execute(text("CREATE EXTENSION IF NOT EXISTS unaccent;"))
|
|
42
43
|
connection.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;"))
|
|
@@ -64,6 +65,7 @@ def run_migration(connection: Connection) -> None:
|
|
|
64
65
|
CREATE TABLE IF NOT EXISTS {TABLE} (
|
|
65
66
|
entity_type TEXT NOT NULL,
|
|
66
67
|
entity_id UUID NOT NULL,
|
|
68
|
+
entity_title TEXT,
|
|
67
69
|
path LTREE NOT NULL,
|
|
68
70
|
value TEXT NOT NULL,
|
|
69
71
|
embedding VECTOR({TARGET_DIM}),
|
|
@@ -78,6 +80,23 @@ def run_migration(connection: Connection) -> None:
|
|
|
78
80
|
# Drop default
|
|
79
81
|
connection.execute(text(f"ALTER TABLE {TABLE} ALTER COLUMN value_type DROP DEFAULT;"))
|
|
80
82
|
|
|
83
|
+
# Add entity_title column if it doesn't exist (for existing installations)
|
|
84
|
+
connection.execute(
|
|
85
|
+
text(
|
|
86
|
+
f"""
|
|
87
|
+
DO $$
|
|
88
|
+
BEGIN
|
|
89
|
+
IF NOT EXISTS (
|
|
90
|
+
SELECT 1 FROM information_schema.columns
|
|
91
|
+
WHERE table_name = '{TABLE}' AND column_name = 'entity_title'
|
|
92
|
+
) THEN
|
|
93
|
+
ALTER TABLE {TABLE} ADD COLUMN entity_title TEXT;
|
|
94
|
+
END IF;
|
|
95
|
+
END $$;
|
|
96
|
+
"""
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
|
|
81
100
|
# Create indexes with IF NOT EXISTS
|
|
82
101
|
connection.execute(text(f"CREATE INDEX IF NOT EXISTS ix_ai_search_index_entity_id ON {TABLE} (entity_id);"))
|
|
83
102
|
connection.execute(
|
|
@@ -96,6 +115,42 @@ def run_migration(connection: Connection) -> None:
|
|
|
96
115
|
)
|
|
97
116
|
)
|
|
98
117
|
|
|
118
|
+
# Create agent_runs table
|
|
119
|
+
connection.execute(
|
|
120
|
+
text(
|
|
121
|
+
"""
|
|
122
|
+
CREATE TABLE IF NOT EXISTS agent_runs (
|
|
123
|
+
run_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
124
|
+
agent_type VARCHAR(50) NOT NULL,
|
|
125
|
+
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL
|
|
126
|
+
);
|
|
127
|
+
"""
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
connection.execute(text("CREATE INDEX IF NOT EXISTS ix_agent_runs_created_at ON agent_runs (created_at);"))
|
|
131
|
+
|
|
132
|
+
# Create search_queries table
|
|
133
|
+
connection.execute(
|
|
134
|
+
text(
|
|
135
|
+
f"""
|
|
136
|
+
CREATE TABLE IF NOT EXISTS search_queries (
|
|
137
|
+
query_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
138
|
+
run_id UUID,
|
|
139
|
+
query_number INTEGER NOT NULL,
|
|
140
|
+
parameters JSONB NOT NULL,
|
|
141
|
+
query_embedding VECTOR({TARGET_DIM}),
|
|
142
|
+
executed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
|
143
|
+
CONSTRAINT fk_search_queries_run_id FOREIGN KEY (run_id) REFERENCES agent_runs(run_id) ON DELETE CASCADE
|
|
144
|
+
);
|
|
145
|
+
"""
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
connection.execute(text("CREATE INDEX IF NOT EXISTS ix_search_queries_run_id ON search_queries (run_id);"))
|
|
149
|
+
connection.execute(
|
|
150
|
+
text("CREATE INDEX IF NOT EXISTS ix_search_queries_executed_at ON search_queries (executed_at);")
|
|
151
|
+
)
|
|
152
|
+
connection.execute(text("CREATE INDEX IF NOT EXISTS ix_search_queries_query_id ON search_queries (query_id);"))
|
|
153
|
+
|
|
99
154
|
connection.commit()
|
|
100
155
|
logger.info("LLM migration completed successfully")
|
|
101
156
|
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
|
|
14
|
-
from .engine import execute_search
|
|
14
|
+
from .engine import execute_search, execute_search_for_export
|
|
15
|
+
from .query_state import SearchQueryState
|
|
15
16
|
|
|
16
|
-
__all__ = ["execute_search"]
|
|
17
|
+
__all__ = ["execute_search", "execute_search_for_export", "SearchQueryState"]
|
|
@@ -43,7 +43,11 @@ def build_candidate_query(params: BaseSearchParameters) -> Select:
|
|
|
43
43
|
Select: The SQLAlchemy `Select` object representing the query.
|
|
44
44
|
"""
|
|
45
45
|
|
|
46
|
-
stmt =
|
|
46
|
+
stmt = (
|
|
47
|
+
select(AiSearchIndex.entity_id, AiSearchIndex.entity_title)
|
|
48
|
+
.where(AiSearchIndex.entity_type == params.entity_type.value)
|
|
49
|
+
.distinct()
|
|
50
|
+
)
|
|
47
51
|
|
|
48
52
|
if params.filters is not None:
|
|
49
53
|
entity_id_col = AiSearchIndex.entity_id
|
|
@@ -17,13 +17,15 @@ import structlog
|
|
|
17
17
|
from sqlalchemy.engine.row import RowMapping
|
|
18
18
|
from sqlalchemy.orm import Session
|
|
19
19
|
|
|
20
|
+
from orchestrator.search.core.embedding import QueryEmbedder
|
|
20
21
|
from orchestrator.search.core.types import FilterOp, SearchMetadata
|
|
21
22
|
from orchestrator.search.filters import FilterTree, LtreeFilter
|
|
22
23
|
from orchestrator.search.schemas.parameters import BaseSearchParameters
|
|
23
24
|
from orchestrator.search.schemas.results import MatchingField, SearchResponse, SearchResult
|
|
24
25
|
|
|
25
26
|
from .builder import build_candidate_query
|
|
26
|
-
from .pagination import
|
|
27
|
+
from .pagination import PageCursor
|
|
28
|
+
from .query_state import SearchQueryState
|
|
27
29
|
from .retrievers import Retriever
|
|
28
30
|
from .utils import generate_highlight_indices
|
|
29
31
|
|
|
@@ -74,9 +76,15 @@ def _format_response(
|
|
|
74
76
|
# Structured search (filter-only)
|
|
75
77
|
matching_field = _extract_matching_field_from_filters(search_params.filters)
|
|
76
78
|
|
|
79
|
+
entity_title = row.get("entity_title", "")
|
|
80
|
+
if not isinstance(entity_title, str):
|
|
81
|
+
entity_title = str(entity_title) if entity_title is not None else ""
|
|
82
|
+
|
|
77
83
|
results.append(
|
|
78
84
|
SearchResult(
|
|
79
85
|
entity_id=str(row.entity_id),
|
|
86
|
+
entity_type=search_params.entity_type,
|
|
87
|
+
entity_title=entity_title,
|
|
80
88
|
score=row.score,
|
|
81
89
|
perfect_match=row.get("perfect_match", 0),
|
|
82
90
|
matching_field=matching_field,
|
|
@@ -110,45 +118,80 @@ def _extract_matching_field_from_filters(filters: FilterTree) -> MatchingField |
|
|
|
110
118
|
return MatchingField(text=text, path=pf.path, highlight_indices=[(0, len(text))])
|
|
111
119
|
|
|
112
120
|
|
|
113
|
-
async def
|
|
121
|
+
async def _execute_search_internal(
|
|
114
122
|
search_params: BaseSearchParameters,
|
|
115
123
|
db_session: Session,
|
|
116
|
-
|
|
124
|
+
limit: int,
|
|
125
|
+
cursor: PageCursor | None = None,
|
|
126
|
+
query_embedding: list[float] | None = None,
|
|
117
127
|
) -> SearchResponse:
|
|
118
|
-
"""
|
|
119
|
-
|
|
120
|
-
Builds a candidate entity query based on the given search parameters,
|
|
121
|
-
applies the appropriate ranking strategy, and executes the final ranked
|
|
122
|
-
query to retrieve results.
|
|
128
|
+
"""Internal function to execute search with specified parameters.
|
|
123
129
|
|
|
124
130
|
Args:
|
|
125
|
-
search_params
|
|
126
|
-
db_session
|
|
127
|
-
|
|
128
|
-
|
|
131
|
+
search_params: The search parameters specifying vector, fuzzy, or filter criteria.
|
|
132
|
+
db_session: The active SQLAlchemy session for executing the query.
|
|
133
|
+
limit: Maximum number of results to return.
|
|
134
|
+
cursor: Optional pagination cursor.
|
|
135
|
+
query_embedding: Optional pre-computed query embedding to use instead of generating a new one.
|
|
129
136
|
|
|
130
137
|
Returns:
|
|
131
|
-
SearchResponse
|
|
132
|
-
and optional highlight metadata.
|
|
133
|
-
|
|
134
|
-
Notes:
|
|
135
|
-
If no vector query, filters, or fuzzy term are provided, a warning is logged
|
|
136
|
-
and an empty result set is returned.
|
|
138
|
+
SearchResponse with results and embedding (for internal use).
|
|
137
139
|
"""
|
|
138
|
-
|
|
139
140
|
if not search_params.vector_query and not search_params.filters and not search_params.fuzzy_term:
|
|
140
141
|
logger.warning("No search criteria provided (vector_query, fuzzy_term, or filters).")
|
|
141
142
|
return SearchResponse(results=[], metadata=SearchMetadata.empty())
|
|
142
143
|
|
|
143
144
|
candidate_query = build_candidate_query(search_params)
|
|
144
145
|
|
|
145
|
-
|
|
146
|
-
|
|
146
|
+
if search_params.vector_query and not query_embedding:
|
|
147
|
+
|
|
148
|
+
query_embedding = await QueryEmbedder.generate_for_text_async(search_params.vector_query)
|
|
149
|
+
|
|
150
|
+
retriever = await Retriever.route(search_params, cursor, query_embedding)
|
|
147
151
|
logger.debug("Using retriever", retriever_type=retriever.__class__.__name__)
|
|
148
152
|
|
|
149
153
|
final_stmt = retriever.apply(candidate_query)
|
|
150
|
-
final_stmt = final_stmt.limit(
|
|
154
|
+
final_stmt = final_stmt.limit(limit)
|
|
151
155
|
logger.debug(final_stmt)
|
|
152
156
|
result = db_session.execute(final_stmt).mappings().all()
|
|
153
157
|
|
|
154
|
-
|
|
158
|
+
response = _format_response(result, search_params, retriever.metadata)
|
|
159
|
+
# Store embedding in response for agent to save to DB
|
|
160
|
+
response.query_embedding = query_embedding
|
|
161
|
+
return response
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
async def execute_search(
|
|
165
|
+
search_params: BaseSearchParameters,
|
|
166
|
+
db_session: Session,
|
|
167
|
+
cursor: PageCursor | None = None,
|
|
168
|
+
query_embedding: list[float] | None = None,
|
|
169
|
+
) -> SearchResponse:
|
|
170
|
+
"""Execute a search and return ranked results."""
|
|
171
|
+
return await _execute_search_internal(search_params, db_session, search_params.limit, cursor, query_embedding)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
async def execute_search_for_export(
|
|
175
|
+
query_state: SearchQueryState,
|
|
176
|
+
db_session: Session,
|
|
177
|
+
) -> list[dict]:
|
|
178
|
+
"""Execute a search for export and fetch flattened entity data.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
query_state: Query state containing parameters and query_embedding.
|
|
182
|
+
db_session: The active SQLAlchemy session for executing the query.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
List of flattened entity records suitable for export.
|
|
186
|
+
"""
|
|
187
|
+
from orchestrator.search.export import fetch_export_data
|
|
188
|
+
|
|
189
|
+
search_response = await _execute_search_internal(
|
|
190
|
+
search_params=query_state.parameters,
|
|
191
|
+
db_session=db_session,
|
|
192
|
+
limit=query_state.parameters.export_limit,
|
|
193
|
+
query_embedding=query_state.query_embedding,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
entity_ids = [res.entity_id for res in search_response.results]
|
|
197
|
+
return fetch_export_data(query_state.parameters.entity_type, entity_ids)
|