orchestrator-core 4.5.2__py3-none-any.whl → 4.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +2 -2
- orchestrator/agentic_app.py +3 -23
- orchestrator/api/api_v1/api.py +5 -0
- orchestrator/api/api_v1/endpoints/agent.py +49 -0
- orchestrator/api/api_v1/endpoints/search.py +120 -201
- orchestrator/app.py +1 -1
- orchestrator/cli/database.py +3 -0
- orchestrator/cli/generate.py +11 -4
- orchestrator/cli/generator/generator/migration.py +7 -3
- orchestrator/cli/main.py +1 -1
- orchestrator/cli/scheduler.py +15 -22
- orchestrator/cli/search/resize_embedding.py +28 -22
- orchestrator/cli/search/speedtest.py +4 -6
- orchestrator/db/__init__.py +6 -0
- orchestrator/db/models.py +75 -0
- orchestrator/llm_settings.py +18 -1
- orchestrator/migrations/helpers.py +47 -39
- orchestrator/schedules/scheduler.py +32 -15
- orchestrator/schedules/validate_products.py +1 -1
- orchestrator/schemas/search.py +8 -85
- orchestrator/search/agent/__init__.py +2 -2
- orchestrator/search/agent/agent.py +26 -30
- orchestrator/search/agent/json_patch.py +51 -0
- orchestrator/search/agent/prompts.py +35 -9
- orchestrator/search/agent/state.py +28 -2
- orchestrator/search/agent/tools.py +192 -53
- orchestrator/search/core/embedding.py +2 -2
- orchestrator/search/core/exceptions.py +6 -0
- orchestrator/search/core/types.py +1 -0
- orchestrator/search/export.py +199 -0
- orchestrator/search/indexing/indexer.py +13 -4
- orchestrator/search/indexing/registry.py +14 -1
- orchestrator/search/llm_migration.py +55 -0
- orchestrator/search/retrieval/__init__.py +3 -2
- orchestrator/search/retrieval/builder.py +5 -1
- orchestrator/search/retrieval/engine.py +66 -23
- orchestrator/search/retrieval/pagination.py +46 -56
- orchestrator/search/retrieval/query_state.py +61 -0
- orchestrator/search/retrieval/retrievers/base.py +26 -40
- orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
- orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
- orchestrator/search/retrieval/retrievers/semantic.py +9 -8
- orchestrator/search/retrieval/retrievers/structured.py +6 -6
- orchestrator/search/schemas/parameters.py +17 -13
- orchestrator/search/schemas/results.py +4 -1
- orchestrator/settings.py +1 -0
- orchestrator/utils/auth.py +3 -2
- orchestrator/workflow.py +23 -6
- orchestrator/workflows/tasks/validate_product_type.py +3 -3
- {orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/METADATA +17 -12
- {orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/RECORD +53 -49
- {orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,11 +11,11 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
from typing import Any
|
|
14
|
+
import json
|
|
15
|
+
from typing import Any
|
|
16
16
|
|
|
17
17
|
import structlog
|
|
18
|
-
from ag_ui.core import EventType, StateSnapshotEvent
|
|
18
|
+
from ag_ui.core import EventType, StateDeltaEvent, StateSnapshotEvent
|
|
19
19
|
from pydantic_ai import RunContext
|
|
20
20
|
from pydantic_ai.ag_ui import StateDeps
|
|
21
21
|
from pydantic_ai.exceptions import ModelRetry
|
|
@@ -25,34 +25,22 @@ from pydantic_ai.toolsets import FunctionToolset
|
|
|
25
25
|
from orchestrator.api.api_v1.endpoints.search import (
|
|
26
26
|
get_definitions,
|
|
27
27
|
list_paths,
|
|
28
|
-
search_processes,
|
|
29
|
-
search_products,
|
|
30
|
-
search_subscriptions,
|
|
31
|
-
search_workflows,
|
|
32
28
|
)
|
|
33
|
-
from orchestrator.
|
|
29
|
+
from orchestrator.db import AgentRunTable, SearchQueryTable, db
|
|
30
|
+
from orchestrator.search.agent.json_patch import JSONPatchOp
|
|
31
|
+
from orchestrator.search.agent.state import ExportData, SearchResultsData, SearchState
|
|
34
32
|
from orchestrator.search.core.types import ActionType, EntityType, FilterOp
|
|
33
|
+
from orchestrator.search.export import fetch_export_data
|
|
35
34
|
from orchestrator.search.filters import FilterTree
|
|
35
|
+
from orchestrator.search.retrieval.engine import execute_search
|
|
36
36
|
from orchestrator.search.retrieval.exceptions import FilterValidationError, PathNotFoundError
|
|
37
|
+
from orchestrator.search.retrieval.query_state import SearchQueryState
|
|
37
38
|
from orchestrator.search.retrieval.validation import validate_filter_tree
|
|
38
|
-
from orchestrator.search.schemas.parameters import
|
|
39
|
-
|
|
40
|
-
from .state import SearchState
|
|
39
|
+
from orchestrator.search.schemas.parameters import BaseSearchParameters
|
|
40
|
+
from orchestrator.settings import app_settings
|
|
41
41
|
|
|
42
42
|
logger = structlog.get_logger(__name__)
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
P = TypeVar("P", bound=BaseSearchParameters)
|
|
46
|
-
|
|
47
|
-
SearchFn = Callable[[P], Awaitable[SearchResultsSchema[Any]]]
|
|
48
|
-
|
|
49
|
-
SEARCH_FN_MAP: dict[EntityType, SearchFn] = {
|
|
50
|
-
EntityType.SUBSCRIPTION: search_subscriptions,
|
|
51
|
-
EntityType.WORKFLOW: search_workflows,
|
|
52
|
-
EntityType.PRODUCT: search_products,
|
|
53
|
-
EntityType.PROCESS: search_processes,
|
|
54
|
-
}
|
|
55
|
-
|
|
56
44
|
search_toolset: FunctionToolset[StateDeps[SearchState]] = FunctionToolset(max_retries=1)
|
|
57
45
|
|
|
58
46
|
|
|
@@ -65,32 +53,50 @@ def last_user_message(ctx: RunContext[StateDeps[SearchState]]) -> str | None:
|
|
|
65
53
|
return None
|
|
66
54
|
|
|
67
55
|
|
|
56
|
+
def _set_parameters(
|
|
57
|
+
ctx: RunContext[StateDeps[SearchState]],
|
|
58
|
+
entity_type: EntityType,
|
|
59
|
+
action: str | ActionType,
|
|
60
|
+
query: str,
|
|
61
|
+
filters: Any | None,
|
|
62
|
+
) -> None:
|
|
63
|
+
"""Internal helper to set parameters."""
|
|
64
|
+
ctx.deps.state.parameters = {
|
|
65
|
+
"action": action,
|
|
66
|
+
"entity_type": entity_type,
|
|
67
|
+
"filters": filters,
|
|
68
|
+
"query": query,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
68
72
|
@search_toolset.tool
|
|
69
|
-
async def
|
|
73
|
+
async def start_new_search(
|
|
70
74
|
ctx: RunContext[StateDeps[SearchState]],
|
|
71
75
|
entity_type: EntityType,
|
|
72
76
|
action: str | ActionType = ActionType.SELECT,
|
|
73
77
|
) -> StateSnapshotEvent:
|
|
74
|
-
"""
|
|
78
|
+
"""Starts a completely new search, clearing all previous state.
|
|
75
79
|
|
|
76
|
-
This MUST be the first tool called
|
|
77
|
-
Warning:
|
|
80
|
+
This MUST be the first tool called when the user asks for a NEW search.
|
|
81
|
+
Warning: This will erase any existing filters, results, and search state.
|
|
78
82
|
"""
|
|
79
|
-
|
|
80
|
-
is_new_search = params.get("entity_type") != entity_type.value
|
|
81
|
-
final_query = (last_user_message(ctx) or "") if is_new_search else params.get("query", "")
|
|
83
|
+
final_query = last_user_message(ctx) or ""
|
|
82
84
|
|
|
83
85
|
logger.debug(
|
|
84
|
-
"
|
|
86
|
+
"Starting new search",
|
|
85
87
|
entity_type=entity_type.value,
|
|
86
88
|
action=action,
|
|
87
|
-
is_new_search=is_new_search,
|
|
88
89
|
query=final_query,
|
|
89
90
|
)
|
|
90
91
|
|
|
91
|
-
|
|
92
|
-
ctx.deps.state.
|
|
93
|
-
|
|
92
|
+
# Clear all state
|
|
93
|
+
ctx.deps.state.results_data = None
|
|
94
|
+
ctx.deps.state.export_data = None
|
|
95
|
+
|
|
96
|
+
# Set fresh parameters with no filters
|
|
97
|
+
_set_parameters(ctx, entity_type, action, final_query, None)
|
|
98
|
+
|
|
99
|
+
logger.debug("New search started", parameters=ctx.deps.state.parameters)
|
|
94
100
|
|
|
95
101
|
return StateSnapshotEvent(
|
|
96
102
|
type=EventType.STATE_SNAPSHOT,
|
|
@@ -102,7 +108,7 @@ async def set_search_parameters(
|
|
|
102
108
|
async def set_filter_tree(
|
|
103
109
|
ctx: RunContext[StateDeps[SearchState]],
|
|
104
110
|
filters: FilterTree | None,
|
|
105
|
-
) ->
|
|
111
|
+
) -> StateDeltaEvent:
|
|
106
112
|
"""Replace current filters atomically with a full FilterTree, or clear with None.
|
|
107
113
|
|
|
108
114
|
Requirements:
|
|
@@ -111,7 +117,7 @@ async def set_filter_tree(
|
|
|
111
117
|
- See the FilterTree schema examples for the exact shape.
|
|
112
118
|
"""
|
|
113
119
|
if ctx.deps.state.parameters is None:
|
|
114
|
-
raise ModelRetry("Search parameters are not initialized. Call
|
|
120
|
+
raise ModelRetry("Search parameters are not initialized. Call start_new_search first.")
|
|
115
121
|
|
|
116
122
|
entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
|
|
117
123
|
|
|
@@ -136,28 +142,33 @@ async def set_filter_tree(
|
|
|
136
142
|
raise ModelRetry(f"Filter validation failed: {str(e)}. Please check your filter structure and try again.")
|
|
137
143
|
|
|
138
144
|
filter_data = None if filters is None else filters.model_dump(mode="json", by_alias=True)
|
|
145
|
+
filters_existed = "filters" in ctx.deps.state.parameters
|
|
139
146
|
ctx.deps.state.parameters["filters"] = filter_data
|
|
140
|
-
return
|
|
147
|
+
return StateDeltaEvent(
|
|
148
|
+
type=EventType.STATE_DELTA,
|
|
149
|
+
delta=[
|
|
150
|
+
JSONPatchOp.upsert(
|
|
151
|
+
path="/parameters/filters",
|
|
152
|
+
value=filter_data,
|
|
153
|
+
existed=filters_existed,
|
|
154
|
+
)
|
|
155
|
+
],
|
|
156
|
+
)
|
|
141
157
|
|
|
142
158
|
|
|
143
159
|
@search_toolset.tool
|
|
144
|
-
async def
|
|
160
|
+
async def run_search(
|
|
145
161
|
ctx: RunContext[StateDeps[SearchState]],
|
|
146
162
|
limit: int = 10,
|
|
147
|
-
) ->
|
|
148
|
-
"""Execute the search with the current parameters."""
|
|
163
|
+
) -> StateDeltaEvent:
|
|
164
|
+
"""Execute the search with the current parameters and save to database."""
|
|
149
165
|
if not ctx.deps.state.parameters:
|
|
150
166
|
raise ValueError("No search parameters set")
|
|
151
167
|
|
|
152
|
-
|
|
153
|
-
param_class = PARAMETER_REGISTRY.get(entity_type)
|
|
154
|
-
if not param_class:
|
|
155
|
-
raise ValueError(f"Unknown entity type: {entity_type}")
|
|
156
|
-
|
|
157
|
-
params = param_class(**ctx.deps.state.parameters)
|
|
168
|
+
params = BaseSearchParameters.create(**ctx.deps.state.parameters)
|
|
158
169
|
logger.debug(
|
|
159
170
|
"Executing database search",
|
|
160
|
-
search_entity_type=entity_type.value,
|
|
171
|
+
search_entity_type=params.entity_type.value,
|
|
161
172
|
limit=limit,
|
|
162
173
|
has_filters=params.filters is not None,
|
|
163
174
|
query=params.query,
|
|
@@ -169,17 +180,61 @@ async def execute_search(
|
|
|
169
180
|
|
|
170
181
|
params.limit = limit
|
|
171
182
|
|
|
172
|
-
|
|
173
|
-
|
|
183
|
+
changes: list[JSONPatchOp] = []
|
|
184
|
+
|
|
185
|
+
if not ctx.deps.state.run_id:
|
|
186
|
+
agent_run = AgentRunTable(agent_type="search")
|
|
187
|
+
|
|
188
|
+
db.session.add(agent_run)
|
|
189
|
+
db.session.commit()
|
|
190
|
+
db.session.expire_all() # Release connection to prevent stacking while agent runs
|
|
191
|
+
|
|
192
|
+
ctx.deps.state.run_id = agent_run.run_id
|
|
193
|
+
logger.debug("Created new agent run", run_id=str(agent_run.run_id))
|
|
194
|
+
changes.append(JSONPatchOp(op="add", path="/run_id", value=str(ctx.deps.state.run_id)))
|
|
195
|
+
|
|
196
|
+
# Get query with embedding and save to DB
|
|
197
|
+
search_response = await execute_search(params, db.session)
|
|
198
|
+
query_embedding = search_response.query_embedding
|
|
199
|
+
query_state = SearchQueryState(parameters=params, query_embedding=query_embedding)
|
|
200
|
+
query_number = db.session.query(SearchQueryTable).filter_by(run_id=ctx.deps.state.run_id).count() + 1
|
|
201
|
+
search_query = SearchQueryTable.from_state(
|
|
202
|
+
state=query_state,
|
|
203
|
+
run_id=ctx.deps.state.run_id,
|
|
204
|
+
query_number=query_number,
|
|
205
|
+
)
|
|
206
|
+
db.session.add(search_query)
|
|
207
|
+
db.session.commit()
|
|
208
|
+
db.session.expire_all()
|
|
209
|
+
|
|
210
|
+
query_id_existed = ctx.deps.state.query_id is not None
|
|
211
|
+
ctx.deps.state.query_id = search_query.query_id
|
|
212
|
+
logger.debug("Saved search query", query_id=str(search_query.query_id), query_number=query_number)
|
|
213
|
+
changes.append(JSONPatchOp.upsert(path="/query_id", value=str(ctx.deps.state.query_id), existed=query_id_existed))
|
|
174
214
|
|
|
175
215
|
logger.debug(
|
|
176
216
|
"Search completed",
|
|
177
|
-
total_results=len(
|
|
217
|
+
total_results=len(search_response.results),
|
|
178
218
|
)
|
|
179
219
|
|
|
180
|
-
|
|
220
|
+
# Store results data for both frontend display and agent context
|
|
221
|
+
results_url = f"{app_settings.BASE_URL}/api/search/queries/{ctx.deps.state.query_id}"
|
|
222
|
+
|
|
223
|
+
results_data_existed = ctx.deps.state.results_data is not None
|
|
224
|
+
ctx.deps.state.results_data = SearchResultsData(
|
|
225
|
+
query_id=str(ctx.deps.state.query_id),
|
|
226
|
+
results_url=results_url,
|
|
227
|
+
total_count=len(search_response.results),
|
|
228
|
+
message=f"Found {len(search_response.results)} results.",
|
|
229
|
+
results=search_response.results, # Include actual results in state
|
|
230
|
+
)
|
|
231
|
+
changes.append(
|
|
232
|
+
JSONPatchOp.upsert(
|
|
233
|
+
path="/results_data", value=ctx.deps.state.results_data.model_dump(), existed=results_data_existed
|
|
234
|
+
)
|
|
235
|
+
)
|
|
181
236
|
|
|
182
|
-
return
|
|
237
|
+
return StateDeltaEvent(type=EventType.STATE_DELTA, delta=changes)
|
|
183
238
|
|
|
184
239
|
|
|
185
240
|
@search_toolset.tool
|
|
@@ -256,3 +311,87 @@ async def get_valid_operators() -> dict[str, list[FilterOp]]:
|
|
|
256
311
|
if hasattr(type_def, "operators"):
|
|
257
312
|
operator_map[key] = type_def.operators
|
|
258
313
|
return operator_map
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
@search_toolset.tool
|
|
317
|
+
async def fetch_entity_details(
|
|
318
|
+
ctx: RunContext[StateDeps[SearchState]],
|
|
319
|
+
limit: int = 10,
|
|
320
|
+
) -> str:
|
|
321
|
+
"""Fetch detailed entity information to answer user questions.
|
|
322
|
+
|
|
323
|
+
Use this tool when you need detailed information about entities from the search results
|
|
324
|
+
to answer the user's question. This provides the same detailed data that would be
|
|
325
|
+
included in an export (e.g., subscription status, product details, workflow info, etc.).
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
ctx: Runtime context for agent (injected).
|
|
329
|
+
limit: Maximum number of entities to fetch details for (default 10).
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
JSON string containing detailed entity information.
|
|
333
|
+
|
|
334
|
+
Raises:
|
|
335
|
+
ValueError: If no search results are available.
|
|
336
|
+
"""
|
|
337
|
+
if not ctx.deps.state.results_data or not ctx.deps.state.results_data.results:
|
|
338
|
+
raise ValueError("No search results available. Run a search first before fetching entity details.")
|
|
339
|
+
|
|
340
|
+
if not ctx.deps.state.parameters:
|
|
341
|
+
raise ValueError("No search parameters found.")
|
|
342
|
+
|
|
343
|
+
entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
|
|
344
|
+
|
|
345
|
+
entity_ids = [r.entity_id for r in ctx.deps.state.results_data.results[:limit]]
|
|
346
|
+
|
|
347
|
+
logger.debug(
|
|
348
|
+
"Fetching detailed entity data",
|
|
349
|
+
entity_type=entity_type.value,
|
|
350
|
+
entity_count=len(entity_ids),
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
detailed_data = fetch_export_data(entity_type, entity_ids)
|
|
354
|
+
|
|
355
|
+
return json.dumps(detailed_data, indent=2)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
@search_toolset.tool
|
|
359
|
+
async def prepare_export(
|
|
360
|
+
ctx: RunContext[StateDeps[SearchState]],
|
|
361
|
+
) -> StateSnapshotEvent:
|
|
362
|
+
"""Prepares export URL using the last executed search query."""
|
|
363
|
+
if not ctx.deps.state.query_id or not ctx.deps.state.run_id:
|
|
364
|
+
raise ValueError("No search has been executed yet. Run a search first before exporting.")
|
|
365
|
+
|
|
366
|
+
if not ctx.deps.state.parameters:
|
|
367
|
+
raise ValueError("No search parameters found. Run a search first before exporting.")
|
|
368
|
+
|
|
369
|
+
# Validate that export is only available for SELECT actions
|
|
370
|
+
action = ctx.deps.state.parameters.get("action", ActionType.SELECT)
|
|
371
|
+
if action != ActionType.SELECT:
|
|
372
|
+
raise ValueError(
|
|
373
|
+
f"Export is only available for SELECT actions. Current action is '{action}'. "
|
|
374
|
+
"Please run a SELECT search first."
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
logger.debug(
|
|
378
|
+
"Prepared query for export",
|
|
379
|
+
query_id=str(ctx.deps.state.query_id),
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
download_url = f"{app_settings.BASE_URL}/api/search/queries/{ctx.deps.state.query_id}/export"
|
|
383
|
+
|
|
384
|
+
ctx.deps.state.export_data = ExportData(
|
|
385
|
+
query_id=str(ctx.deps.state.query_id),
|
|
386
|
+
download_url=download_url,
|
|
387
|
+
message="Export ready for download.",
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
logger.debug("Export data set in state", export_data=ctx.deps.state.export_data.model_dump())
|
|
391
|
+
|
|
392
|
+
# Should use StateDelta here? Use snapshot to workaround state persistence issue
|
|
393
|
+
# TODO: Fix root cause; state is empty on frontend when it should have data from run_search
|
|
394
|
+
return StateSnapshotEvent(
|
|
395
|
+
type=EventType.STATE_SNAPSHOT,
|
|
396
|
+
snapshot=ctx.deps.state.model_dump(),
|
|
397
|
+
)
|
|
@@ -42,7 +42,7 @@ class EmbeddingIndexer:
|
|
|
42
42
|
max_retries=llm_settings.LLM_MAX_RETRIES,
|
|
43
43
|
)
|
|
44
44
|
data = sorted(resp.data, key=lambda e: e["index"])
|
|
45
|
-
return [row["embedding"] for row in data]
|
|
45
|
+
return [row["embedding"][: llm_settings.EMBEDDING_DIMENSION] for row in data]
|
|
46
46
|
except (llm_exc.APIError, llm_exc.APIConnectionError, llm_exc.RateLimitError, llm_exc.Timeout) as e:
|
|
47
47
|
logger.error("Embedding request failed", error=str(e))
|
|
48
48
|
return [[] for _ in texts]
|
|
@@ -67,7 +67,7 @@ class QueryEmbedder:
|
|
|
67
67
|
timeout=5.0,
|
|
68
68
|
max_retries=0, # No retries, prioritize speed.
|
|
69
69
|
)
|
|
70
|
-
return resp.data[0]["embedding"]
|
|
70
|
+
return resp.data[0]["embedding"][: llm_settings.EMBEDDING_DIMENSION]
|
|
71
71
|
except Exception as e:
|
|
72
72
|
logger.error("Async embedding generation failed", error=str(e))
|
|
73
73
|
return []
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from uuid import UUID
|
|
15
|
+
|
|
16
|
+
from sqlalchemy import select
|
|
17
|
+
from sqlalchemy.orm import selectinload
|
|
18
|
+
|
|
19
|
+
from orchestrator.db import (
|
|
20
|
+
ProcessTable,
|
|
21
|
+
ProductTable,
|
|
22
|
+
SubscriptionTable,
|
|
23
|
+
WorkflowTable,
|
|
24
|
+
db,
|
|
25
|
+
)
|
|
26
|
+
from orchestrator.search.core.types import EntityType
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def fetch_subscription_export_data(entity_ids: list[str]) -> list[dict]:
|
|
30
|
+
"""Fetch subscription data for export.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
entity_ids: List of subscription IDs as strings
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List of flattened subscription dictionaries with fields:
|
|
37
|
+
subscription_id, description, status, insync, start_date, end_date,
|
|
38
|
+
note, product_name, tag, product_type, customer_id
|
|
39
|
+
"""
|
|
40
|
+
stmt = (
|
|
41
|
+
select(
|
|
42
|
+
SubscriptionTable.subscription_id,
|
|
43
|
+
SubscriptionTable.description,
|
|
44
|
+
SubscriptionTable.status,
|
|
45
|
+
SubscriptionTable.insync,
|
|
46
|
+
SubscriptionTable.start_date,
|
|
47
|
+
SubscriptionTable.end_date,
|
|
48
|
+
SubscriptionTable.note,
|
|
49
|
+
SubscriptionTable.customer_id,
|
|
50
|
+
ProductTable.name.label("product_name"),
|
|
51
|
+
ProductTable.tag,
|
|
52
|
+
ProductTable.product_type,
|
|
53
|
+
)
|
|
54
|
+
.join(ProductTable, SubscriptionTable.product_id == ProductTable.product_id)
|
|
55
|
+
.filter(SubscriptionTable.subscription_id.in_([UUID(sid) for sid in entity_ids]))
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
rows = db.session.execute(stmt).all()
|
|
59
|
+
|
|
60
|
+
return [
|
|
61
|
+
{
|
|
62
|
+
"subscription_id": str(row.subscription_id),
|
|
63
|
+
"description": row.description,
|
|
64
|
+
"status": row.status,
|
|
65
|
+
"insync": row.insync,
|
|
66
|
+
"start_date": row.start_date.isoformat() if row.start_date else None,
|
|
67
|
+
"end_date": row.end_date.isoformat() if row.end_date else None,
|
|
68
|
+
"note": row.note,
|
|
69
|
+
"product_name": row.product_name,
|
|
70
|
+
"tag": row.tag,
|
|
71
|
+
"product_type": row.product_type,
|
|
72
|
+
"customer_id": row.customer_id,
|
|
73
|
+
}
|
|
74
|
+
for row in rows
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def fetch_workflow_export_data(entity_ids: list[str]) -> list[dict]:
|
|
79
|
+
"""Fetch workflow data for export.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
entity_ids: List of workflow names as strings
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
List of flattened workflow dictionaries with fields:
|
|
86
|
+
name, description, created_at, product_names (comma-separated),
|
|
87
|
+
product_ids (comma-separated), product_types (comma-separated)
|
|
88
|
+
"""
|
|
89
|
+
stmt = (
|
|
90
|
+
select(WorkflowTable).options(selectinload(WorkflowTable.products)).filter(WorkflowTable.name.in_(entity_ids))
|
|
91
|
+
)
|
|
92
|
+
workflows = db.session.scalars(stmt).all()
|
|
93
|
+
|
|
94
|
+
return [
|
|
95
|
+
{
|
|
96
|
+
"name": w.name,
|
|
97
|
+
"description": w.description,
|
|
98
|
+
"created_at": w.created_at.isoformat() if w.created_at else None,
|
|
99
|
+
"product_names": ", ".join(p.name for p in w.products),
|
|
100
|
+
"product_ids": ", ".join(str(p.product_id) for p in w.products),
|
|
101
|
+
"product_types": ", ".join(p.product_type for p in w.products),
|
|
102
|
+
}
|
|
103
|
+
for w in workflows
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def fetch_product_export_data(entity_ids: list[str]) -> list[dict]:
|
|
108
|
+
"""Fetch product data for export.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
entity_ids: List of product IDs as strings
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
List of flattened product dictionaries with fields:
|
|
115
|
+
product_id, name, product_type, tag, description, status, created_at
|
|
116
|
+
"""
|
|
117
|
+
stmt = (
|
|
118
|
+
select(ProductTable)
|
|
119
|
+
.options(
|
|
120
|
+
selectinload(ProductTable.workflows),
|
|
121
|
+
selectinload(ProductTable.fixed_inputs),
|
|
122
|
+
selectinload(ProductTable.product_blocks),
|
|
123
|
+
)
|
|
124
|
+
.filter(ProductTable.product_id.in_([UUID(pid) for pid in entity_ids]))
|
|
125
|
+
)
|
|
126
|
+
products = db.session.scalars(stmt).all()
|
|
127
|
+
|
|
128
|
+
return [
|
|
129
|
+
{
|
|
130
|
+
"product_id": str(p.product_id),
|
|
131
|
+
"name": p.name,
|
|
132
|
+
"product_type": p.product_type,
|
|
133
|
+
"tag": p.tag,
|
|
134
|
+
"description": p.description,
|
|
135
|
+
"status": p.status,
|
|
136
|
+
"created_at": p.created_at.isoformat() if p.created_at else None,
|
|
137
|
+
}
|
|
138
|
+
for p in products
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def fetch_process_export_data(entity_ids: list[str]) -> list[dict]:
|
|
143
|
+
"""Fetch process data for export.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
entity_ids: List of process IDs as strings
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
List of flattened process dictionaries with fields:
|
|
150
|
+
process_id, workflow_name, workflow_id, last_status, is_task,
|
|
151
|
+
created_by, started_at, last_modified_at, last_step
|
|
152
|
+
"""
|
|
153
|
+
stmt = (
|
|
154
|
+
select(ProcessTable)
|
|
155
|
+
.options(selectinload(ProcessTable.workflow))
|
|
156
|
+
.filter(ProcessTable.process_id.in_([UUID(pid) for pid in entity_ids]))
|
|
157
|
+
)
|
|
158
|
+
processes = db.session.scalars(stmt).all()
|
|
159
|
+
|
|
160
|
+
return [
|
|
161
|
+
{
|
|
162
|
+
"process_id": str(p.process_id),
|
|
163
|
+
"workflow_name": p.workflow.name if p.workflow else None,
|
|
164
|
+
"workflow_id": str(p.workflow_id),
|
|
165
|
+
"last_status": p.last_status,
|
|
166
|
+
"is_task": p.is_task,
|
|
167
|
+
"created_by": p.created_by,
|
|
168
|
+
"started_at": p.started_at.isoformat() if p.started_at else None,
|
|
169
|
+
"last_modified_at": p.last_modified_at.isoformat() if p.last_modified_at else None,
|
|
170
|
+
"last_step": p.last_step,
|
|
171
|
+
}
|
|
172
|
+
for p in processes
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def fetch_export_data(entity_type: EntityType, entity_ids: list[str]) -> list[dict]:
|
|
177
|
+
"""Fetch export data for any entity type.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
entity_type: The type of entities to fetch
|
|
181
|
+
entity_ids: List of entity IDs/names as strings
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of flattened entity dictionaries ready for CSV export
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
ValueError: If entity_type is not supported
|
|
188
|
+
"""
|
|
189
|
+
match entity_type:
|
|
190
|
+
case EntityType.SUBSCRIPTION:
|
|
191
|
+
return fetch_subscription_export_data(entity_ids)
|
|
192
|
+
case EntityType.WORKFLOW:
|
|
193
|
+
return fetch_workflow_export_data(entity_ids)
|
|
194
|
+
case EntityType.PRODUCT:
|
|
195
|
+
return fetch_product_export_data(entity_ids)
|
|
196
|
+
case EntityType.PROCESS:
|
|
197
|
+
return fetch_process_export_data(entity_ids)
|
|
198
|
+
case _:
|
|
199
|
+
raise ValueError(f"Unsupported entity type: {entity_type}")
|
|
@@ -96,6 +96,7 @@ class Indexer:
|
|
|
96
96
|
self.chunk_size = chunk_size
|
|
97
97
|
self.embedding_model = llm_settings.EMBEDDING_MODEL
|
|
98
98
|
self.logger = logger.bind(entity_kind=config.entity_kind.value)
|
|
99
|
+
self._entity_titles: dict[str, str] = {}
|
|
99
100
|
|
|
100
101
|
def run(self, entities: Iterable[DatabaseEntity]) -> int:
|
|
101
102
|
"""Orchestrates the entire indexing process."""
|
|
@@ -138,6 +139,8 @@ class Indexer:
|
|
|
138
139
|
if not entity_chunk:
|
|
139
140
|
return 0, 0
|
|
140
141
|
|
|
142
|
+
self._entity_titles.clear()
|
|
143
|
+
|
|
141
144
|
fields_to_upsert, paths_to_delete, identical_count = self._determine_changes(entity_chunk, session)
|
|
142
145
|
|
|
143
146
|
if paths_to_delete and session is not None:
|
|
@@ -174,12 +177,15 @@ class Indexer:
|
|
|
174
177
|
entity, pk_name=self.config.pk_name, root_name=self.config.root_name
|
|
175
178
|
)
|
|
176
179
|
|
|
180
|
+
entity_title = self.config.get_title_from_fields(current_fields)
|
|
181
|
+
self._entity_titles[entity_id] = entity_title
|
|
182
|
+
|
|
177
183
|
entity_hashes = existing_hashes.get(entity_id, {})
|
|
178
184
|
current_paths = set()
|
|
179
185
|
|
|
180
186
|
for field in current_fields:
|
|
181
187
|
current_paths.add(field.path)
|
|
182
|
-
current_hash = self._compute_content_hash(field.path, field.value, field.value_type)
|
|
188
|
+
current_hash = self._compute_content_hash(field.path, field.value, field.value_type, entity_title)
|
|
183
189
|
if field.path not in entity_hashes or entity_hashes[field.path] != current_hash:
|
|
184
190
|
fields_to_upsert.append((entity_id, field))
|
|
185
191
|
else:
|
|
@@ -301,21 +307,23 @@ class Indexer:
|
|
|
301
307
|
return f"{field.path}: {str(field.value)}"
|
|
302
308
|
|
|
303
309
|
@staticmethod
|
|
304
|
-
def _compute_content_hash(path: str, value: Any, value_type: Any) -> str:
|
|
310
|
+
def _compute_content_hash(path: str, value: Any, value_type: Any, entity_title: str = "") -> str:
|
|
305
311
|
v = "" if value is None else str(value)
|
|
306
|
-
content = f"{path}:{v}:{value_type}"
|
|
312
|
+
content = f"{path}:{v}:{value_type}:{entity_title}"
|
|
307
313
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
308
314
|
|
|
309
315
|
def _make_indexable_record(
|
|
310
316
|
self, field: ExtractedField, entity_id: str, embedding: list[float] | None
|
|
311
317
|
) -> IndexableRecord:
|
|
318
|
+
entity_title = self._entity_titles[entity_id]
|
|
312
319
|
return IndexableRecord(
|
|
313
320
|
entity_id=entity_id,
|
|
314
321
|
entity_type=self.config.entity_kind.value,
|
|
322
|
+
entity_title=entity_title,
|
|
315
323
|
path=Ltree(field.path),
|
|
316
324
|
value=field.value,
|
|
317
325
|
value_type=field.value_type,
|
|
318
|
-
content_hash=self._compute_content_hash(field.path, field.value, field.value_type),
|
|
326
|
+
content_hash=self._compute_content_hash(field.path, field.value, field.value_type, entity_title),
|
|
319
327
|
embedding=embedding if embedding else None,
|
|
320
328
|
)
|
|
321
329
|
|
|
@@ -326,6 +334,7 @@ class Indexer:
|
|
|
326
334
|
return stmt.on_conflict_do_update(
|
|
327
335
|
index_elements=[AiSearchIndex.entity_id, AiSearchIndex.path],
|
|
328
336
|
set_={
|
|
337
|
+
AiSearchIndex.entity_title: stmt.excluded.entity_title,
|
|
329
338
|
AiSearchIndex.value: stmt.excluded.value,
|
|
330
339
|
AiSearchIndex.value_type: stmt.excluded.value_type,
|
|
331
340
|
AiSearchIndex.content_hash: stmt.excluded.content_hash,
|