orchestrator-core 4.5.3__py3-none-any.whl → 4.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/agentic_app.py +3 -23
  3. orchestrator/api/api_v1/api.py +5 -0
  4. orchestrator/api/api_v1/endpoints/agent.py +49 -0
  5. orchestrator/api/api_v1/endpoints/search.py +120 -201
  6. orchestrator/app.py +1 -1
  7. orchestrator/cli/database.py +3 -0
  8. orchestrator/cli/generate.py +11 -4
  9. orchestrator/cli/generator/generator/migration.py +7 -3
  10. orchestrator/cli/main.py +1 -1
  11. orchestrator/cli/scheduler.py +15 -22
  12. orchestrator/cli/search/resize_embedding.py +28 -22
  13. orchestrator/cli/search/speedtest.py +4 -6
  14. orchestrator/db/__init__.py +6 -0
  15. orchestrator/db/models.py +75 -0
  16. orchestrator/llm_settings.py +18 -1
  17. orchestrator/migrations/helpers.py +47 -39
  18. orchestrator/schedules/scheduler.py +32 -15
  19. orchestrator/schedules/validate_products.py +1 -1
  20. orchestrator/schemas/search.py +8 -85
  21. orchestrator/search/agent/__init__.py +2 -2
  22. orchestrator/search/agent/agent.py +26 -30
  23. orchestrator/search/agent/json_patch.py +51 -0
  24. orchestrator/search/agent/prompts.py +35 -9
  25. orchestrator/search/agent/state.py +28 -2
  26. orchestrator/search/agent/tools.py +192 -53
  27. orchestrator/search/core/embedding.py +2 -2
  28. orchestrator/search/core/exceptions.py +6 -0
  29. orchestrator/search/core/types.py +1 -0
  30. orchestrator/search/export.py +199 -0
  31. orchestrator/search/indexing/indexer.py +13 -4
  32. orchestrator/search/indexing/registry.py +14 -1
  33. orchestrator/search/llm_migration.py +55 -0
  34. orchestrator/search/retrieval/__init__.py +3 -2
  35. orchestrator/search/retrieval/builder.py +5 -1
  36. orchestrator/search/retrieval/engine.py +66 -23
  37. orchestrator/search/retrieval/pagination.py +46 -56
  38. orchestrator/search/retrieval/query_state.py +61 -0
  39. orchestrator/search/retrieval/retrievers/base.py +26 -40
  40. orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
  41. orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
  42. orchestrator/search/retrieval/retrievers/semantic.py +9 -8
  43. orchestrator/search/retrieval/retrievers/structured.py +6 -6
  44. orchestrator/search/schemas/parameters.py +17 -13
  45. orchestrator/search/schemas/results.py +4 -1
  46. orchestrator/settings.py +1 -0
  47. orchestrator/utils/auth.py +3 -2
  48. orchestrator/workflow.py +23 -6
  49. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0.dist-info}/METADATA +16 -11
  50. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0.dist-info}/RECORD +52 -48
  51. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0.dist-info}/WHEEL +0 -0
  52. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -11,11 +11,11 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
 
14
- from collections.abc import Awaitable, Callable
15
- from typing import Any, TypeVar
14
+ import json
15
+ from typing import Any
16
16
 
17
17
  import structlog
18
- from ag_ui.core import EventType, StateSnapshotEvent
18
+ from ag_ui.core import EventType, StateDeltaEvent, StateSnapshotEvent
19
19
  from pydantic_ai import RunContext
20
20
  from pydantic_ai.ag_ui import StateDeps
21
21
  from pydantic_ai.exceptions import ModelRetry
@@ -25,34 +25,22 @@ from pydantic_ai.toolsets import FunctionToolset
25
25
  from orchestrator.api.api_v1.endpoints.search import (
26
26
  get_definitions,
27
27
  list_paths,
28
- search_processes,
29
- search_products,
30
- search_subscriptions,
31
- search_workflows,
32
28
  )
33
- from orchestrator.schemas.search import SearchResultsSchema
29
+ from orchestrator.db import AgentRunTable, SearchQueryTable, db
30
+ from orchestrator.search.agent.json_patch import JSONPatchOp
31
+ from orchestrator.search.agent.state import ExportData, SearchResultsData, SearchState
34
32
  from orchestrator.search.core.types import ActionType, EntityType, FilterOp
33
+ from orchestrator.search.export import fetch_export_data
35
34
  from orchestrator.search.filters import FilterTree
35
+ from orchestrator.search.retrieval.engine import execute_search
36
36
  from orchestrator.search.retrieval.exceptions import FilterValidationError, PathNotFoundError
37
+ from orchestrator.search.retrieval.query_state import SearchQueryState
37
38
  from orchestrator.search.retrieval.validation import validate_filter_tree
38
- from orchestrator.search.schemas.parameters import PARAMETER_REGISTRY, BaseSearchParameters
39
-
40
- from .state import SearchState
39
+ from orchestrator.search.schemas.parameters import BaseSearchParameters
40
+ from orchestrator.settings import app_settings
41
41
 
42
42
  logger = structlog.get_logger(__name__)
43
43
 
44
-
45
- P = TypeVar("P", bound=BaseSearchParameters)
46
-
47
- SearchFn = Callable[[P], Awaitable[SearchResultsSchema[Any]]]
48
-
49
- SEARCH_FN_MAP: dict[EntityType, SearchFn] = {
50
- EntityType.SUBSCRIPTION: search_subscriptions,
51
- EntityType.WORKFLOW: search_workflows,
52
- EntityType.PRODUCT: search_products,
53
- EntityType.PROCESS: search_processes,
54
- }
55
-
56
44
  search_toolset: FunctionToolset[StateDeps[SearchState]] = FunctionToolset(max_retries=1)
57
45
 
58
46
 
@@ -65,32 +53,50 @@ def last_user_message(ctx: RunContext[StateDeps[SearchState]]) -> str | None:
65
53
  return None
66
54
 
67
55
 
56
+ def _set_parameters(
57
+ ctx: RunContext[StateDeps[SearchState]],
58
+ entity_type: EntityType,
59
+ action: str | ActionType,
60
+ query: str,
61
+ filters: Any | None,
62
+ ) -> None:
63
+ """Internal helper to set parameters."""
64
+ ctx.deps.state.parameters = {
65
+ "action": action,
66
+ "entity_type": entity_type,
67
+ "filters": filters,
68
+ "query": query,
69
+ }
70
+
71
+
68
72
  @search_toolset.tool
69
- async def set_search_parameters(
73
+ async def start_new_search(
70
74
  ctx: RunContext[StateDeps[SearchState]],
71
75
  entity_type: EntityType,
72
76
  action: str | ActionType = ActionType.SELECT,
73
77
  ) -> StateSnapshotEvent:
74
- """Sets the initial search context, like the entity type and the user's query.
78
+ """Starts a completely new search, clearing all previous state.
75
79
 
76
- This MUST be the first tool called to start any new search.
77
- Warning: Calling this tool will erase any existing filters and search results from the state.
80
+ This MUST be the first tool called when the user asks for a NEW search.
81
+ Warning: This will erase any existing filters, results, and search state.
78
82
  """
79
- params = ctx.deps.state.parameters or {}
80
- is_new_search = params.get("entity_type") != entity_type.value
81
- final_query = (last_user_message(ctx) or "") if is_new_search else params.get("query", "")
83
+ final_query = last_user_message(ctx) or ""
82
84
 
83
85
  logger.debug(
84
- "Setting search parameters",
86
+ "Starting new search",
85
87
  entity_type=entity_type.value,
86
88
  action=action,
87
- is_new_search=is_new_search,
88
89
  query=final_query,
89
90
  )
90
91
 
91
- ctx.deps.state.parameters = {"action": action, "entity_type": entity_type, "filters": None, "query": final_query}
92
- ctx.deps.state.results = []
93
- logger.debug("Search parameters set", parameters=ctx.deps.state.parameters)
92
+ # Clear all state
93
+ ctx.deps.state.results_data = None
94
+ ctx.deps.state.export_data = None
95
+
96
+ # Set fresh parameters with no filters
97
+ _set_parameters(ctx, entity_type, action, final_query, None)
98
+
99
+ logger.debug("New search started", parameters=ctx.deps.state.parameters)
94
100
 
95
101
  return StateSnapshotEvent(
96
102
  type=EventType.STATE_SNAPSHOT,
@@ -102,7 +108,7 @@ async def set_search_parameters(
102
108
  async def set_filter_tree(
103
109
  ctx: RunContext[StateDeps[SearchState]],
104
110
  filters: FilterTree | None,
105
- ) -> StateSnapshotEvent:
111
+ ) -> StateDeltaEvent:
106
112
  """Replace current filters atomically with a full FilterTree, or clear with None.
107
113
 
108
114
  Requirements:
@@ -111,7 +117,7 @@ async def set_filter_tree(
111
117
  - See the FilterTree schema examples for the exact shape.
112
118
  """
113
119
  if ctx.deps.state.parameters is None:
114
- raise ModelRetry("Search parameters are not initialized. Call set_search_parameters first.")
120
+ raise ModelRetry("Search parameters are not initialized. Call start_new_search first.")
115
121
 
116
122
  entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
117
123
 
@@ -136,28 +142,33 @@ async def set_filter_tree(
136
142
  raise ModelRetry(f"Filter validation failed: {str(e)}. Please check your filter structure and try again.")
137
143
 
138
144
  filter_data = None if filters is None else filters.model_dump(mode="json", by_alias=True)
145
+ filters_existed = "filters" in ctx.deps.state.parameters
139
146
  ctx.deps.state.parameters["filters"] = filter_data
140
- return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
147
+ return StateDeltaEvent(
148
+ type=EventType.STATE_DELTA,
149
+ delta=[
150
+ JSONPatchOp.upsert(
151
+ path="/parameters/filters",
152
+ value=filter_data,
153
+ existed=filters_existed,
154
+ )
155
+ ],
156
+ )
141
157
 
142
158
 
143
159
  @search_toolset.tool
144
- async def execute_search(
160
+ async def run_search(
145
161
  ctx: RunContext[StateDeps[SearchState]],
146
162
  limit: int = 10,
147
- ) -> StateSnapshotEvent:
148
- """Execute the search with the current parameters."""
163
+ ) -> StateDeltaEvent:
164
+ """Execute the search with the current parameters and save to database."""
149
165
  if not ctx.deps.state.parameters:
150
166
  raise ValueError("No search parameters set")
151
167
 
152
- entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
153
- param_class = PARAMETER_REGISTRY.get(entity_type)
154
- if not param_class:
155
- raise ValueError(f"Unknown entity type: {entity_type}")
156
-
157
- params = param_class(**ctx.deps.state.parameters)
168
+ params = BaseSearchParameters.create(**ctx.deps.state.parameters)
158
169
  logger.debug(
159
170
  "Executing database search",
160
- search_entity_type=entity_type.value,
171
+ search_entity_type=params.entity_type.value,
161
172
  limit=limit,
162
173
  has_filters=params.filters is not None,
163
174
  query=params.query,
@@ -169,17 +180,61 @@ async def execute_search(
169
180
 
170
181
  params.limit = limit
171
182
 
172
- fn = SEARCH_FN_MAP[entity_type]
173
- search_results = await fn(params)
183
+ changes: list[JSONPatchOp] = []
184
+
185
+ if not ctx.deps.state.run_id:
186
+ agent_run = AgentRunTable(agent_type="search")
187
+
188
+ db.session.add(agent_run)
189
+ db.session.commit()
190
+ db.session.expire_all() # Release connection to prevent stacking while agent runs
191
+
192
+ ctx.deps.state.run_id = agent_run.run_id
193
+ logger.debug("Created new agent run", run_id=str(agent_run.run_id))
194
+ changes.append(JSONPatchOp(op="add", path="/run_id", value=str(ctx.deps.state.run_id)))
195
+
196
+ # Get query with embedding and save to DB
197
+ search_response = await execute_search(params, db.session)
198
+ query_embedding = search_response.query_embedding
199
+ query_state = SearchQueryState(parameters=params, query_embedding=query_embedding)
200
+ query_number = db.session.query(SearchQueryTable).filter_by(run_id=ctx.deps.state.run_id).count() + 1
201
+ search_query = SearchQueryTable.from_state(
202
+ state=query_state,
203
+ run_id=ctx.deps.state.run_id,
204
+ query_number=query_number,
205
+ )
206
+ db.session.add(search_query)
207
+ db.session.commit()
208
+ db.session.expire_all()
209
+
210
+ query_id_existed = ctx.deps.state.query_id is not None
211
+ ctx.deps.state.query_id = search_query.query_id
212
+ logger.debug("Saved search query", query_id=str(search_query.query_id), query_number=query_number)
213
+ changes.append(JSONPatchOp.upsert(path="/query_id", value=str(ctx.deps.state.query_id), existed=query_id_existed))
174
214
 
175
215
  logger.debug(
176
216
  "Search completed",
177
- total_results=len(search_results.data) if search_results.data else 0,
217
+ total_results=len(search_response.results),
178
218
  )
179
219
 
180
- ctx.deps.state.results = search_results.data
220
+ # Store results data for both frontend display and agent context
221
+ results_url = f"{app_settings.BASE_URL}/api/search/queries/{ctx.deps.state.query_id}"
222
+
223
+ results_data_existed = ctx.deps.state.results_data is not None
224
+ ctx.deps.state.results_data = SearchResultsData(
225
+ query_id=str(ctx.deps.state.query_id),
226
+ results_url=results_url,
227
+ total_count=len(search_response.results),
228
+ message=f"Found {len(search_response.results)} results.",
229
+ results=search_response.results, # Include actual results in state
230
+ )
231
+ changes.append(
232
+ JSONPatchOp.upsert(
233
+ path="/results_data", value=ctx.deps.state.results_data.model_dump(), existed=results_data_existed
234
+ )
235
+ )
181
236
 
182
- return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
237
+ return StateDeltaEvent(type=EventType.STATE_DELTA, delta=changes)
183
238
 
184
239
 
185
240
  @search_toolset.tool
@@ -256,3 +311,87 @@ async def get_valid_operators() -> dict[str, list[FilterOp]]:
256
311
  if hasattr(type_def, "operators"):
257
312
  operator_map[key] = type_def.operators
258
313
  return operator_map
314
+
315
+
316
+ @search_toolset.tool
317
+ async def fetch_entity_details(
318
+ ctx: RunContext[StateDeps[SearchState]],
319
+ limit: int = 10,
320
+ ) -> str:
321
+ """Fetch detailed entity information to answer user questions.
322
+
323
+ Use this tool when you need detailed information about entities from the search results
324
+ to answer the user's question. This provides the same detailed data that would be
325
+ included in an export (e.g., subscription status, product details, workflow info, etc.).
326
+
327
+ Args:
328
+ ctx: Runtime context for agent (injected).
329
+ limit: Maximum number of entities to fetch details for (default 10).
330
+
331
+ Returns:
332
+ JSON string containing detailed entity information.
333
+
334
+ Raises:
335
+ ValueError: If no search results are available.
336
+ """
337
+ if not ctx.deps.state.results_data or not ctx.deps.state.results_data.results:
338
+ raise ValueError("No search results available. Run a search first before fetching entity details.")
339
+
340
+ if not ctx.deps.state.parameters:
341
+ raise ValueError("No search parameters found.")
342
+
343
+ entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
344
+
345
+ entity_ids = [r.entity_id for r in ctx.deps.state.results_data.results[:limit]]
346
+
347
+ logger.debug(
348
+ "Fetching detailed entity data",
349
+ entity_type=entity_type.value,
350
+ entity_count=len(entity_ids),
351
+ )
352
+
353
+ detailed_data = fetch_export_data(entity_type, entity_ids)
354
+
355
+ return json.dumps(detailed_data, indent=2)
356
+
357
+
358
+ @search_toolset.tool
359
+ async def prepare_export(
360
+ ctx: RunContext[StateDeps[SearchState]],
361
+ ) -> StateSnapshotEvent:
362
+ """Prepares export URL using the last executed search query."""
363
+ if not ctx.deps.state.query_id or not ctx.deps.state.run_id:
364
+ raise ValueError("No search has been executed yet. Run a search first before exporting.")
365
+
366
+ if not ctx.deps.state.parameters:
367
+ raise ValueError("No search parameters found. Run a search first before exporting.")
368
+
369
+ # Validate that export is only available for SELECT actions
370
+ action = ctx.deps.state.parameters.get("action", ActionType.SELECT)
371
+ if action != ActionType.SELECT:
372
+ raise ValueError(
373
+ f"Export is only available for SELECT actions. Current action is '{action}'. "
374
+ "Please run a SELECT search first."
375
+ )
376
+
377
+ logger.debug(
378
+ "Prepared query for export",
379
+ query_id=str(ctx.deps.state.query_id),
380
+ )
381
+
382
+ download_url = f"{app_settings.BASE_URL}/api/search/queries/{ctx.deps.state.query_id}/export"
383
+
384
+ ctx.deps.state.export_data = ExportData(
385
+ query_id=str(ctx.deps.state.query_id),
386
+ download_url=download_url,
387
+ message="Export ready for download.",
388
+ )
389
+
390
+ logger.debug("Export data set in state", export_data=ctx.deps.state.export_data.model_dump())
391
+
392
+ # Should use StateDelta here? Use snapshot to workaround state persistence issue
393
+ # TODO: Fix root cause; state is empty on frontend when it should have data from run_search
394
+ return StateSnapshotEvent(
395
+ type=EventType.STATE_SNAPSHOT,
396
+ snapshot=ctx.deps.state.model_dump(),
397
+ )
@@ -42,7 +42,7 @@ class EmbeddingIndexer:
42
42
  max_retries=llm_settings.LLM_MAX_RETRIES,
43
43
  )
44
44
  data = sorted(resp.data, key=lambda e: e["index"])
45
- return [row["embedding"] for row in data]
45
+ return [row["embedding"][: llm_settings.EMBEDDING_DIMENSION] for row in data]
46
46
  except (llm_exc.APIError, llm_exc.APIConnectionError, llm_exc.RateLimitError, llm_exc.Timeout) as e:
47
47
  logger.error("Embedding request failed", error=str(e))
48
48
  return [[] for _ in texts]
@@ -67,7 +67,7 @@ class QueryEmbedder:
67
67
  timeout=5.0,
68
68
  max_retries=0, # No retries, prioritize speed.
69
69
  )
70
- return resp.data[0]["embedding"]
70
+ return resp.data[0]["embedding"][: llm_settings.EMBEDDING_DIMENSION]
71
71
  except Exception as e:
72
72
  logger.error("Async embedding generation failed", error=str(e))
73
73
  return []
@@ -34,3 +34,9 @@ class InvalidCursorError(SearchUtilsError):
34
34
  """Raised when cursor cannot be decoded."""
35
35
 
36
36
  pass
37
+
38
+
39
+ class QueryStateNotFoundError(SearchUtilsError):
40
+ """Raised when a query state cannot be found in the database."""
41
+
42
+ pass
@@ -289,6 +289,7 @@ class ExtractedField(NamedTuple):
289
289
  class IndexableRecord(TypedDict):
290
290
  entity_id: str
291
291
  entity_type: str
292
+ entity_title: str
292
293
  path: Ltree
293
294
  value: Any
294
295
  value_type: Any
@@ -0,0 +1,199 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from uuid import UUID
15
+
16
+ from sqlalchemy import select
17
+ from sqlalchemy.orm import selectinload
18
+
19
+ from orchestrator.db import (
20
+ ProcessTable,
21
+ ProductTable,
22
+ SubscriptionTable,
23
+ WorkflowTable,
24
+ db,
25
+ )
26
+ from orchestrator.search.core.types import EntityType
27
+
28
+
29
+ def fetch_subscription_export_data(entity_ids: list[str]) -> list[dict]:
30
+ """Fetch subscription data for export.
31
+
32
+ Args:
33
+ entity_ids: List of subscription IDs as strings
34
+
35
+ Returns:
36
+ List of flattened subscription dictionaries with fields:
37
+ subscription_id, description, status, insync, start_date, end_date,
38
+ note, product_name, tag, product_type, customer_id
39
+ """
40
+ stmt = (
41
+ select(
42
+ SubscriptionTable.subscription_id,
43
+ SubscriptionTable.description,
44
+ SubscriptionTable.status,
45
+ SubscriptionTable.insync,
46
+ SubscriptionTable.start_date,
47
+ SubscriptionTable.end_date,
48
+ SubscriptionTable.note,
49
+ SubscriptionTable.customer_id,
50
+ ProductTable.name.label("product_name"),
51
+ ProductTable.tag,
52
+ ProductTable.product_type,
53
+ )
54
+ .join(ProductTable, SubscriptionTable.product_id == ProductTable.product_id)
55
+ .filter(SubscriptionTable.subscription_id.in_([UUID(sid) for sid in entity_ids]))
56
+ )
57
+
58
+ rows = db.session.execute(stmt).all()
59
+
60
+ return [
61
+ {
62
+ "subscription_id": str(row.subscription_id),
63
+ "description": row.description,
64
+ "status": row.status,
65
+ "insync": row.insync,
66
+ "start_date": row.start_date.isoformat() if row.start_date else None,
67
+ "end_date": row.end_date.isoformat() if row.end_date else None,
68
+ "note": row.note,
69
+ "product_name": row.product_name,
70
+ "tag": row.tag,
71
+ "product_type": row.product_type,
72
+ "customer_id": row.customer_id,
73
+ }
74
+ for row in rows
75
+ ]
76
+
77
+
78
+ def fetch_workflow_export_data(entity_ids: list[str]) -> list[dict]:
79
+ """Fetch workflow data for export.
80
+
81
+ Args:
82
+ entity_ids: List of workflow names as strings
83
+
84
+ Returns:
85
+ List of flattened workflow dictionaries with fields:
86
+ name, description, created_at, product_names (comma-separated),
87
+ product_ids (comma-separated), product_types (comma-separated)
88
+ """
89
+ stmt = (
90
+ select(WorkflowTable).options(selectinload(WorkflowTable.products)).filter(WorkflowTable.name.in_(entity_ids))
91
+ )
92
+ workflows = db.session.scalars(stmt).all()
93
+
94
+ return [
95
+ {
96
+ "name": w.name,
97
+ "description": w.description,
98
+ "created_at": w.created_at.isoformat() if w.created_at else None,
99
+ "product_names": ", ".join(p.name for p in w.products),
100
+ "product_ids": ", ".join(str(p.product_id) for p in w.products),
101
+ "product_types": ", ".join(p.product_type for p in w.products),
102
+ }
103
+ for w in workflows
104
+ ]
105
+
106
+
107
+ def fetch_product_export_data(entity_ids: list[str]) -> list[dict]:
108
+ """Fetch product data for export.
109
+
110
+ Args:
111
+ entity_ids: List of product IDs as strings
112
+
113
+ Returns:
114
+ List of flattened product dictionaries with fields:
115
+ product_id, name, product_type, tag, description, status, created_at
116
+ """
117
+ stmt = (
118
+ select(ProductTable)
119
+ .options(
120
+ selectinload(ProductTable.workflows),
121
+ selectinload(ProductTable.fixed_inputs),
122
+ selectinload(ProductTable.product_blocks),
123
+ )
124
+ .filter(ProductTable.product_id.in_([UUID(pid) for pid in entity_ids]))
125
+ )
126
+ products = db.session.scalars(stmt).all()
127
+
128
+ return [
129
+ {
130
+ "product_id": str(p.product_id),
131
+ "name": p.name,
132
+ "product_type": p.product_type,
133
+ "tag": p.tag,
134
+ "description": p.description,
135
+ "status": p.status,
136
+ "created_at": p.created_at.isoformat() if p.created_at else None,
137
+ }
138
+ for p in products
139
+ ]
140
+
141
+
142
+ def fetch_process_export_data(entity_ids: list[str]) -> list[dict]:
143
+ """Fetch process data for export.
144
+
145
+ Args:
146
+ entity_ids: List of process IDs as strings
147
+
148
+ Returns:
149
+ List of flattened process dictionaries with fields:
150
+ process_id, workflow_name, workflow_id, last_status, is_task,
151
+ created_by, started_at, last_modified_at, last_step
152
+ """
153
+ stmt = (
154
+ select(ProcessTable)
155
+ .options(selectinload(ProcessTable.workflow))
156
+ .filter(ProcessTable.process_id.in_([UUID(pid) for pid in entity_ids]))
157
+ )
158
+ processes = db.session.scalars(stmt).all()
159
+
160
+ return [
161
+ {
162
+ "process_id": str(p.process_id),
163
+ "workflow_name": p.workflow.name if p.workflow else None,
164
+ "workflow_id": str(p.workflow_id),
165
+ "last_status": p.last_status,
166
+ "is_task": p.is_task,
167
+ "created_by": p.created_by,
168
+ "started_at": p.started_at.isoformat() if p.started_at else None,
169
+ "last_modified_at": p.last_modified_at.isoformat() if p.last_modified_at else None,
170
+ "last_step": p.last_step,
171
+ }
172
+ for p in processes
173
+ ]
174
+
175
+
176
+ def fetch_export_data(entity_type: EntityType, entity_ids: list[str]) -> list[dict]:
177
+ """Fetch export data for any entity type.
178
+
179
+ Args:
180
+ entity_type: The type of entities to fetch
181
+ entity_ids: List of entity IDs/names as strings
182
+
183
+ Returns:
184
+ List of flattened entity dictionaries ready for CSV export
185
+
186
+ Raises:
187
+ ValueError: If entity_type is not supported
188
+ """
189
+ match entity_type:
190
+ case EntityType.SUBSCRIPTION:
191
+ return fetch_subscription_export_data(entity_ids)
192
+ case EntityType.WORKFLOW:
193
+ return fetch_workflow_export_data(entity_ids)
194
+ case EntityType.PRODUCT:
195
+ return fetch_product_export_data(entity_ids)
196
+ case EntityType.PROCESS:
197
+ return fetch_process_export_data(entity_ids)
198
+ case _:
199
+ raise ValueError(f"Unsupported entity type: {entity_type}")
@@ -96,6 +96,7 @@ class Indexer:
96
96
  self.chunk_size = chunk_size
97
97
  self.embedding_model = llm_settings.EMBEDDING_MODEL
98
98
  self.logger = logger.bind(entity_kind=config.entity_kind.value)
99
+ self._entity_titles: dict[str, str] = {}
99
100
 
100
101
  def run(self, entities: Iterable[DatabaseEntity]) -> int:
101
102
  """Orchestrates the entire indexing process."""
@@ -138,6 +139,8 @@ class Indexer:
138
139
  if not entity_chunk:
139
140
  return 0, 0
140
141
 
142
+ self._entity_titles.clear()
143
+
141
144
  fields_to_upsert, paths_to_delete, identical_count = self._determine_changes(entity_chunk, session)
142
145
 
143
146
  if paths_to_delete and session is not None:
@@ -174,12 +177,15 @@ class Indexer:
174
177
  entity, pk_name=self.config.pk_name, root_name=self.config.root_name
175
178
  )
176
179
 
180
+ entity_title = self.config.get_title_from_fields(current_fields)
181
+ self._entity_titles[entity_id] = entity_title
182
+
177
183
  entity_hashes = existing_hashes.get(entity_id, {})
178
184
  current_paths = set()
179
185
 
180
186
  for field in current_fields:
181
187
  current_paths.add(field.path)
182
- current_hash = self._compute_content_hash(field.path, field.value, field.value_type)
188
+ current_hash = self._compute_content_hash(field.path, field.value, field.value_type, entity_title)
183
189
  if field.path not in entity_hashes or entity_hashes[field.path] != current_hash:
184
190
  fields_to_upsert.append((entity_id, field))
185
191
  else:
@@ -301,21 +307,23 @@ class Indexer:
301
307
  return f"{field.path}: {str(field.value)}"
302
308
 
303
309
  @staticmethod
304
- def _compute_content_hash(path: str, value: Any, value_type: Any) -> str:
310
+ def _compute_content_hash(path: str, value: Any, value_type: Any, entity_title: str = "") -> str:
305
311
  v = "" if value is None else str(value)
306
- content = f"{path}:{v}:{value_type}"
312
+ content = f"{path}:{v}:{value_type}:{entity_title}"
307
313
  return hashlib.sha256(content.encode("utf-8")).hexdigest()
308
314
 
309
315
  def _make_indexable_record(
310
316
  self, field: ExtractedField, entity_id: str, embedding: list[float] | None
311
317
  ) -> IndexableRecord:
318
+ entity_title = self._entity_titles[entity_id]
312
319
  return IndexableRecord(
313
320
  entity_id=entity_id,
314
321
  entity_type=self.config.entity_kind.value,
322
+ entity_title=entity_title,
315
323
  path=Ltree(field.path),
316
324
  value=field.value,
317
325
  value_type=field.value_type,
318
- content_hash=self._compute_content_hash(field.path, field.value, field.value_type),
326
+ content_hash=self._compute_content_hash(field.path, field.value, field.value_type, entity_title),
319
327
  embedding=embedding if embedding else None,
320
328
  )
321
329
 
@@ -326,6 +334,7 @@ class Indexer:
326
334
  return stmt.on_conflict_do_update(
327
335
  index_elements=[AiSearchIndex.entity_id, AiSearchIndex.path],
328
336
  set_={
337
+ AiSearchIndex.entity_title: stmt.excluded.entity_title,
329
338
  AiSearchIndex.value: stmt.excluded.value,
330
339
  AiSearchIndex.value_type: stmt.excluded.value_type,
331
340
  AiSearchIndex.content_hash: stmt.excluded.content_hash,