orchestrator-core 4.5.3__py3-none-any.whl → 4.6.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. orchestrator/__init__.py +1 -1
  2. orchestrator/agentic_app.py +1 -21
  3. orchestrator/api/api_v1/api.py +5 -0
  4. orchestrator/api/api_v1/endpoints/agent.py +50 -0
  5. orchestrator/api/api_v1/endpoints/search.py +120 -201
  6. orchestrator/cli/database.py +3 -0
  7. orchestrator/cli/generate.py +11 -4
  8. orchestrator/cli/generator/generator/migration.py +7 -3
  9. orchestrator/cli/scheduler.py +15 -22
  10. orchestrator/cli/search/resize_embedding.py +28 -22
  11. orchestrator/cli/search/speedtest.py +4 -6
  12. orchestrator/db/__init__.py +6 -0
  13. orchestrator/db/models.py +75 -0
  14. orchestrator/migrations/helpers.py +46 -38
  15. orchestrator/schedules/scheduler.py +32 -15
  16. orchestrator/schedules/validate_products.py +1 -1
  17. orchestrator/schemas/search.py +8 -85
  18. orchestrator/search/agent/__init__.py +2 -2
  19. orchestrator/search/agent/agent.py +25 -29
  20. orchestrator/search/agent/json_patch.py +51 -0
  21. orchestrator/search/agent/prompts.py +35 -9
  22. orchestrator/search/agent/state.py +28 -2
  23. orchestrator/search/agent/tools.py +192 -53
  24. orchestrator/search/core/exceptions.py +6 -0
  25. orchestrator/search/core/types.py +1 -0
  26. orchestrator/search/export.py +199 -0
  27. orchestrator/search/indexing/indexer.py +13 -4
  28. orchestrator/search/indexing/registry.py +14 -1
  29. orchestrator/search/llm_migration.py +55 -0
  30. orchestrator/search/retrieval/__init__.py +3 -2
  31. orchestrator/search/retrieval/builder.py +5 -1
  32. orchestrator/search/retrieval/engine.py +66 -23
  33. orchestrator/search/retrieval/pagination.py +46 -56
  34. orchestrator/search/retrieval/query_state.py +61 -0
  35. orchestrator/search/retrieval/retrievers/base.py +26 -40
  36. orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
  37. orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
  38. orchestrator/search/retrieval/retrievers/semantic.py +9 -8
  39. orchestrator/search/retrieval/retrievers/structured.py +6 -6
  40. orchestrator/search/schemas/parameters.py +17 -13
  41. orchestrator/search/schemas/results.py +4 -1
  42. orchestrator/settings.py +1 -0
  43. orchestrator/utils/auth.py +3 -2
  44. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/METADATA +3 -3
  45. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/RECORD +47 -43
  46. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/WHEEL +0 -0
  47. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,199 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from uuid import UUID
15
+
16
+ from sqlalchemy import select
17
+ from sqlalchemy.orm import selectinload
18
+
19
+ from orchestrator.db import (
20
+ ProcessTable,
21
+ ProductTable,
22
+ SubscriptionTable,
23
+ WorkflowTable,
24
+ db,
25
+ )
26
+ from orchestrator.search.core.types import EntityType
27
+
28
+
29
+ def fetch_subscription_export_data(entity_ids: list[str]) -> list[dict]:
30
+ """Fetch subscription data for export.
31
+
32
+ Args:
33
+ entity_ids: List of subscription IDs as strings
34
+
35
+ Returns:
36
+ List of flattened subscription dictionaries with fields:
37
+ subscription_id, description, status, insync, start_date, end_date,
38
+ note, product_name, tag, product_type, customer_id
39
+ """
40
+ stmt = (
41
+ select(
42
+ SubscriptionTable.subscription_id,
43
+ SubscriptionTable.description,
44
+ SubscriptionTable.status,
45
+ SubscriptionTable.insync,
46
+ SubscriptionTable.start_date,
47
+ SubscriptionTable.end_date,
48
+ SubscriptionTable.note,
49
+ SubscriptionTable.customer_id,
50
+ ProductTable.name.label("product_name"),
51
+ ProductTable.tag,
52
+ ProductTable.product_type,
53
+ )
54
+ .join(ProductTable, SubscriptionTable.product_id == ProductTable.product_id)
55
+ .filter(SubscriptionTable.subscription_id.in_([UUID(sid) for sid in entity_ids]))
56
+ )
57
+
58
+ rows = db.session.execute(stmt).all()
59
+
60
+ return [
61
+ {
62
+ "subscription_id": str(row.subscription_id),
63
+ "description": row.description,
64
+ "status": row.status,
65
+ "insync": row.insync,
66
+ "start_date": row.start_date.isoformat() if row.start_date else None,
67
+ "end_date": row.end_date.isoformat() if row.end_date else None,
68
+ "note": row.note,
69
+ "product_name": row.product_name,
70
+ "tag": row.tag,
71
+ "product_type": row.product_type,
72
+ "customer_id": row.customer_id,
73
+ }
74
+ for row in rows
75
+ ]
76
+
77
+
78
+ def fetch_workflow_export_data(entity_ids: list[str]) -> list[dict]:
79
+ """Fetch workflow data for export.
80
+
81
+ Args:
82
+ entity_ids: List of workflow names as strings
83
+
84
+ Returns:
85
+ List of flattened workflow dictionaries with fields:
86
+ name, description, created_at, product_names (comma-separated),
87
+ product_ids (comma-separated), product_types (comma-separated)
88
+ """
89
+ stmt = (
90
+ select(WorkflowTable).options(selectinload(WorkflowTable.products)).filter(WorkflowTable.name.in_(entity_ids))
91
+ )
92
+ workflows = db.session.scalars(stmt).all()
93
+
94
+ return [
95
+ {
96
+ "name": w.name,
97
+ "description": w.description,
98
+ "created_at": w.created_at.isoformat() if w.created_at else None,
99
+ "product_names": ", ".join(p.name for p in w.products),
100
+ "product_ids": ", ".join(str(p.product_id) for p in w.products),
101
+ "product_types": ", ".join(p.product_type for p in w.products),
102
+ }
103
+ for w in workflows
104
+ ]
105
+
106
+
107
+ def fetch_product_export_data(entity_ids: list[str]) -> list[dict]:
108
+ """Fetch product data for export.
109
+
110
+ Args:
111
+ entity_ids: List of product IDs as strings
112
+
113
+ Returns:
114
+ List of flattened product dictionaries with fields:
115
+ product_id, name, product_type, tag, description, status, created_at
116
+ """
117
+ stmt = (
118
+ select(ProductTable)
119
+ .options(
120
+ selectinload(ProductTable.workflows),
121
+ selectinload(ProductTable.fixed_inputs),
122
+ selectinload(ProductTable.product_blocks),
123
+ )
124
+ .filter(ProductTable.product_id.in_([UUID(pid) for pid in entity_ids]))
125
+ )
126
+ products = db.session.scalars(stmt).all()
127
+
128
+ return [
129
+ {
130
+ "product_id": str(p.product_id),
131
+ "name": p.name,
132
+ "product_type": p.product_type,
133
+ "tag": p.tag,
134
+ "description": p.description,
135
+ "status": p.status,
136
+ "created_at": p.created_at.isoformat() if p.created_at else None,
137
+ }
138
+ for p in products
139
+ ]
140
+
141
+
142
+ def fetch_process_export_data(entity_ids: list[str]) -> list[dict]:
143
+ """Fetch process data for export.
144
+
145
+ Args:
146
+ entity_ids: List of process IDs as strings
147
+
148
+ Returns:
149
+ List of flattened process dictionaries with fields:
150
+ process_id, workflow_name, workflow_id, last_status, is_task,
151
+ created_by, started_at, last_modified_at, last_step
152
+ """
153
+ stmt = (
154
+ select(ProcessTable)
155
+ .options(selectinload(ProcessTable.workflow))
156
+ .filter(ProcessTable.process_id.in_([UUID(pid) for pid in entity_ids]))
157
+ )
158
+ processes = db.session.scalars(stmt).all()
159
+
160
+ return [
161
+ {
162
+ "process_id": str(p.process_id),
163
+ "workflow_name": p.workflow.name if p.workflow else None,
164
+ "workflow_id": str(p.workflow_id),
165
+ "last_status": p.last_status,
166
+ "is_task": p.is_task,
167
+ "created_by": p.created_by,
168
+ "started_at": p.started_at.isoformat() if p.started_at else None,
169
+ "last_modified_at": p.last_modified_at.isoformat() if p.last_modified_at else None,
170
+ "last_step": p.last_step,
171
+ }
172
+ for p in processes
173
+ ]
174
+
175
+
176
+ def fetch_export_data(entity_type: EntityType, entity_ids: list[str]) -> list[dict]:
177
+ """Fetch export data for any entity type.
178
+
179
+ Args:
180
+ entity_type: The type of entities to fetch
181
+ entity_ids: List of entity IDs/names as strings
182
+
183
+ Returns:
184
+ List of flattened entity dictionaries ready for CSV export
185
+
186
+ Raises:
187
+ ValueError: If entity_type is not supported
188
+ """
189
+ match entity_type:
190
+ case EntityType.SUBSCRIPTION:
191
+ return fetch_subscription_export_data(entity_ids)
192
+ case EntityType.WORKFLOW:
193
+ return fetch_workflow_export_data(entity_ids)
194
+ case EntityType.PRODUCT:
195
+ return fetch_product_export_data(entity_ids)
196
+ case EntityType.PROCESS:
197
+ return fetch_process_export_data(entity_ids)
198
+ case _:
199
+ raise ValueError(f"Unsupported entity type: {entity_type}")
@@ -96,6 +96,7 @@ class Indexer:
96
96
  self.chunk_size = chunk_size
97
97
  self.embedding_model = llm_settings.EMBEDDING_MODEL
98
98
  self.logger = logger.bind(entity_kind=config.entity_kind.value)
99
+ self._entity_titles: dict[str, str] = {}
99
100
 
100
101
  def run(self, entities: Iterable[DatabaseEntity]) -> int:
101
102
  """Orchestrates the entire indexing process."""
@@ -138,6 +139,8 @@ class Indexer:
138
139
  if not entity_chunk:
139
140
  return 0, 0
140
141
 
142
+ self._entity_titles.clear()
143
+
141
144
  fields_to_upsert, paths_to_delete, identical_count = self._determine_changes(entity_chunk, session)
142
145
 
143
146
  if paths_to_delete and session is not None:
@@ -174,12 +177,15 @@ class Indexer:
174
177
  entity, pk_name=self.config.pk_name, root_name=self.config.root_name
175
178
  )
176
179
 
180
+ entity_title = self.config.get_title_from_fields(current_fields)
181
+ self._entity_titles[entity_id] = entity_title
182
+
177
183
  entity_hashes = existing_hashes.get(entity_id, {})
178
184
  current_paths = set()
179
185
 
180
186
  for field in current_fields:
181
187
  current_paths.add(field.path)
182
- current_hash = self._compute_content_hash(field.path, field.value, field.value_type)
188
+ current_hash = self._compute_content_hash(field.path, field.value, field.value_type, entity_title)
183
189
  if field.path not in entity_hashes or entity_hashes[field.path] != current_hash:
184
190
  fields_to_upsert.append((entity_id, field))
185
191
  else:
@@ -301,21 +307,23 @@ class Indexer:
301
307
  return f"{field.path}: {str(field.value)}"
302
308
 
303
309
  @staticmethod
304
- def _compute_content_hash(path: str, value: Any, value_type: Any) -> str:
310
+ def _compute_content_hash(path: str, value: Any, value_type: Any, entity_title: str = "") -> str:
305
311
  v = "" if value is None else str(value)
306
- content = f"{path}:{v}:{value_type}"
312
+ content = f"{path}:{v}:{value_type}:{entity_title}"
307
313
  return hashlib.sha256(content.encode("utf-8")).hexdigest()
308
314
 
309
315
  def _make_indexable_record(
310
316
  self, field: ExtractedField, entity_id: str, embedding: list[float] | None
311
317
  ) -> IndexableRecord:
318
+ entity_title = self._entity_titles[entity_id]
312
319
  return IndexableRecord(
313
320
  entity_id=entity_id,
314
321
  entity_type=self.config.entity_kind.value,
322
+ entity_title=entity_title,
315
323
  path=Ltree(field.path),
316
324
  value=field.value,
317
325
  value_type=field.value_type,
318
- content_hash=self._compute_content_hash(field.path, field.value, field.value_type),
326
+ content_hash=self._compute_content_hash(field.path, field.value, field.value_type, entity_title),
319
327
  embedding=embedding if embedding else None,
320
328
  )
321
329
 
@@ -326,6 +334,7 @@ class Indexer:
326
334
  return stmt.on_conflict_do_update(
327
335
  index_elements=[AiSearchIndex.entity_id, AiSearchIndex.path],
328
336
  set_={
337
+ AiSearchIndex.entity_title: stmt.excluded.entity_title,
329
338
  AiSearchIndex.value: stmt.excluded.value,
330
339
  AiSearchIndex.value_type: stmt.excluded.value_type,
331
340
  AiSearchIndex.content_hash: stmt.excluded.content_hash,
@@ -25,7 +25,7 @@ from orchestrator.db import (
25
25
  WorkflowTable,
26
26
  )
27
27
  from orchestrator.db.database import BaseModel
28
- from orchestrator.search.core.types import EntityType
28
+ from orchestrator.search.core.types import EntityType, ExtractedField
29
29
 
30
30
  from .traverse import (
31
31
  BaseTraverser,
@@ -48,6 +48,7 @@ class EntityConfig(Generic[ModelT]):
48
48
  traverser: "type[BaseTraverser]"
49
49
  pk_name: str
50
50
  root_name: str
51
+ title_paths: list[str] # List of field paths to check for title (with fallback)
51
52
 
52
53
  def get_all_query(self, entity_id: str | None = None) -> Query | Select:
53
54
  query = self.table.query
@@ -56,6 +57,14 @@ class EntityConfig(Generic[ModelT]):
56
57
  query = query.filter(pk_column == UUID(entity_id))
57
58
  return query
58
59
 
60
+ def get_title_from_fields(self, fields: list[ExtractedField]) -> str:
61
+ """Extract title from fields using configured paths."""
62
+ for title_path in self.title_paths:
63
+ for field in fields:
64
+ if field.path == title_path and field.value:
65
+ return str(field.value)
66
+ return "UNKNOWN"
67
+
59
68
 
60
69
  @dataclass(frozen=True)
61
70
  class WorkflowConfig(EntityConfig[WorkflowTable]):
@@ -76,6 +85,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
76
85
  traverser=SubscriptionTraverser,
77
86
  pk_name="subscription_id",
78
87
  root_name="subscription",
88
+ title_paths=["subscription.description"],
79
89
  ),
80
90
  EntityType.PRODUCT: EntityConfig(
81
91
  entity_kind=EntityType.PRODUCT,
@@ -83,6 +93,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
83
93
  traverser=ProductTraverser,
84
94
  pk_name="product_id",
85
95
  root_name="product",
96
+ title_paths=["product.description", "product.name"],
86
97
  ),
87
98
  EntityType.PROCESS: EntityConfig(
88
99
  entity_kind=EntityType.PROCESS,
@@ -90,6 +101,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
90
101
  traverser=ProcessTraverser,
91
102
  pk_name="process_id",
92
103
  root_name="process",
104
+ title_paths=["process.workflow_name"],
93
105
  ),
94
106
  EntityType.WORKFLOW: WorkflowConfig(
95
107
  entity_kind=EntityType.WORKFLOW,
@@ -97,5 +109,6 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
97
109
  traverser=WorkflowTraverser,
98
110
  pk_name="workflow_id",
99
111
  root_name="workflow",
112
+ title_paths=["workflow.description", "workflow.name"],
100
113
  ),
101
114
  }
@@ -37,6 +37,7 @@ def run_migration(connection: Connection) -> None:
37
37
  if llm_settings.LLM_FORCE_EXTENTION_MIGRATION or res.rowcount == 0:
38
38
  # Create PostgreSQL extensions
39
39
  logger.info("Attempting to run the extention creation;")
40
+ connection.execute(text('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";'))
40
41
  connection.execute(text("CREATE EXTENSION IF NOT EXISTS ltree;"))
41
42
  connection.execute(text("CREATE EXTENSION IF NOT EXISTS unaccent;"))
42
43
  connection.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;"))
@@ -64,6 +65,7 @@ def run_migration(connection: Connection) -> None:
64
65
  CREATE TABLE IF NOT EXISTS {TABLE} (
65
66
  entity_type TEXT NOT NULL,
66
67
  entity_id UUID NOT NULL,
68
+ entity_title TEXT,
67
69
  path LTREE NOT NULL,
68
70
  value TEXT NOT NULL,
69
71
  embedding VECTOR({TARGET_DIM}),
@@ -78,6 +80,23 @@ def run_migration(connection: Connection) -> None:
78
80
  # Drop default
79
81
  connection.execute(text(f"ALTER TABLE {TABLE} ALTER COLUMN value_type DROP DEFAULT;"))
80
82
 
83
+ # Add entity_title column if it doesn't exist (for existing installations)
84
+ connection.execute(
85
+ text(
86
+ f"""
87
+ DO $$
88
+ BEGIN
89
+ IF NOT EXISTS (
90
+ SELECT 1 FROM information_schema.columns
91
+ WHERE table_name = '{TABLE}' AND column_name = 'entity_title'
92
+ ) THEN
93
+ ALTER TABLE {TABLE} ADD COLUMN entity_title TEXT;
94
+ END IF;
95
+ END $$;
96
+ """
97
+ )
98
+ )
99
+
81
100
  # Create indexes with IF NOT EXISTS
82
101
  connection.execute(text(f"CREATE INDEX IF NOT EXISTS ix_ai_search_index_entity_id ON {TABLE} (entity_id);"))
83
102
  connection.execute(
@@ -96,6 +115,42 @@ def run_migration(connection: Connection) -> None:
96
115
  )
97
116
  )
98
117
 
118
+ # Create agent_runs table
119
+ connection.execute(
120
+ text(
121
+ """
122
+ CREATE TABLE IF NOT EXISTS agent_runs (
123
+ run_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
124
+ agent_type VARCHAR(50) NOT NULL,
125
+ created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL
126
+ );
127
+ """
128
+ )
129
+ )
130
+ connection.execute(text("CREATE INDEX IF NOT EXISTS ix_agent_runs_created_at ON agent_runs (created_at);"))
131
+
132
+ # Create search_queries table
133
+ connection.execute(
134
+ text(
135
+ f"""
136
+ CREATE TABLE IF NOT EXISTS search_queries (
137
+ query_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
138
+ run_id UUID,
139
+ query_number INTEGER NOT NULL,
140
+ parameters JSONB NOT NULL,
141
+ query_embedding VECTOR({TARGET_DIM}),
142
+ executed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL,
143
+ CONSTRAINT fk_search_queries_run_id FOREIGN KEY (run_id) REFERENCES agent_runs(run_id) ON DELETE CASCADE
144
+ );
145
+ """
146
+ )
147
+ )
148
+ connection.execute(text("CREATE INDEX IF NOT EXISTS ix_search_queries_run_id ON search_queries (run_id);"))
149
+ connection.execute(
150
+ text("CREATE INDEX IF NOT EXISTS ix_search_queries_executed_at ON search_queries (executed_at);")
151
+ )
152
+ connection.execute(text("CREATE INDEX IF NOT EXISTS ix_search_queries_query_id ON search_queries (query_id);"))
153
+
99
154
  connection.commit()
100
155
  logger.info("LLM migration completed successfully")
101
156
 
@@ -11,6 +11,7 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
 
14
- from .engine import execute_search
14
+ from .engine import execute_search, execute_search_for_export
15
+ from .query_state import SearchQueryState
15
16
 
16
- __all__ = ["execute_search"]
17
+ __all__ = ["execute_search", "execute_search_for_export", "SearchQueryState"]
@@ -43,7 +43,11 @@ def build_candidate_query(params: BaseSearchParameters) -> Select:
43
43
  Select: The SQLAlchemy `Select` object representing the query.
44
44
  """
45
45
 
46
- stmt = select(AiSearchIndex.entity_id).where(AiSearchIndex.entity_type == params.entity_type.value).distinct()
46
+ stmt = (
47
+ select(AiSearchIndex.entity_id, AiSearchIndex.entity_title)
48
+ .where(AiSearchIndex.entity_type == params.entity_type.value)
49
+ .distinct()
50
+ )
47
51
 
48
52
  if params.filters is not None:
49
53
  entity_id_col = AiSearchIndex.entity_id
@@ -17,13 +17,15 @@ import structlog
17
17
  from sqlalchemy.engine.row import RowMapping
18
18
  from sqlalchemy.orm import Session
19
19
 
20
+ from orchestrator.search.core.embedding import QueryEmbedder
20
21
  from orchestrator.search.core.types import FilterOp, SearchMetadata
21
22
  from orchestrator.search.filters import FilterTree, LtreeFilter
22
23
  from orchestrator.search.schemas.parameters import BaseSearchParameters
23
24
  from orchestrator.search.schemas.results import MatchingField, SearchResponse, SearchResult
24
25
 
25
26
  from .builder import build_candidate_query
26
- from .pagination import PaginationParams
27
+ from .pagination import PageCursor
28
+ from .query_state import SearchQueryState
27
29
  from .retrievers import Retriever
28
30
  from .utils import generate_highlight_indices
29
31
 
@@ -74,9 +76,15 @@ def _format_response(
74
76
  # Structured search (filter-only)
75
77
  matching_field = _extract_matching_field_from_filters(search_params.filters)
76
78
 
79
+ entity_title = row.get("entity_title", "")
80
+ if not isinstance(entity_title, str):
81
+ entity_title = str(entity_title) if entity_title is not None else ""
82
+
77
83
  results.append(
78
84
  SearchResult(
79
85
  entity_id=str(row.entity_id),
86
+ entity_type=search_params.entity_type,
87
+ entity_title=entity_title,
80
88
  score=row.score,
81
89
  perfect_match=row.get("perfect_match", 0),
82
90
  matching_field=matching_field,
@@ -110,45 +118,80 @@ def _extract_matching_field_from_filters(filters: FilterTree) -> MatchingField |
110
118
  return MatchingField(text=text, path=pf.path, highlight_indices=[(0, len(text))])
111
119
 
112
120
 
113
- async def execute_search(
121
+ async def _execute_search_internal(
114
122
  search_params: BaseSearchParameters,
115
123
  db_session: Session,
116
- pagination_params: PaginationParams | None = None,
124
+ limit: int,
125
+ cursor: PageCursor | None = None,
126
+ query_embedding: list[float] | None = None,
117
127
  ) -> SearchResponse:
118
- """Execute a hybrid search and return ranked results.
119
-
120
- Builds a candidate entity query based on the given search parameters,
121
- applies the appropriate ranking strategy, and executes the final ranked
122
- query to retrieve results.
128
+ """Internal function to execute search with specified parameters.
123
129
 
124
130
  Args:
125
- search_params (BaseSearchParameters): The search parameters specifying vector, fuzzy, or filter criteria.
126
- db_session (Session): The active SQLAlchemy session for executing the query.
127
- pagination_params (PaginationParams): Parameters controlling pagination of the search results.
128
- limit (int, optional): The maximum number of search results to return, by default 5.
131
+ search_params: The search parameters specifying vector, fuzzy, or filter criteria.
132
+ db_session: The active SQLAlchemy session for executing the query.
133
+ limit: Maximum number of results to return.
134
+ cursor: Optional pagination cursor.
135
+ query_embedding: Optional pre-computed query embedding to use instead of generating a new one.
129
136
 
130
137
  Returns:
131
- SearchResponse: A list of `SearchResult` objects containing entity IDs, scores,
132
- and optional highlight metadata.
133
-
134
- Notes:
135
- If no vector query, filters, or fuzzy term are provided, a warning is logged
136
- and an empty result set is returned.
138
+ SearchResponse with results and embedding (for internal use).
137
139
  """
138
-
139
140
  if not search_params.vector_query and not search_params.filters and not search_params.fuzzy_term:
140
141
  logger.warning("No search criteria provided (vector_query, fuzzy_term, or filters).")
141
142
  return SearchResponse(results=[], metadata=SearchMetadata.empty())
142
143
 
143
144
  candidate_query = build_candidate_query(search_params)
144
145
 
145
- pagination_params = pagination_params or PaginationParams()
146
- retriever = await Retriever.from_params(search_params, pagination_params)
146
+ if search_params.vector_query and not query_embedding:
147
+
148
+ query_embedding = await QueryEmbedder.generate_for_text_async(search_params.vector_query)
149
+
150
+ retriever = await Retriever.route(search_params, cursor, query_embedding)
147
151
  logger.debug("Using retriever", retriever_type=retriever.__class__.__name__)
148
152
 
149
153
  final_stmt = retriever.apply(candidate_query)
150
- final_stmt = final_stmt.limit(search_params.limit)
154
+ final_stmt = final_stmt.limit(limit)
151
155
  logger.debug(final_stmt)
152
156
  result = db_session.execute(final_stmt).mappings().all()
153
157
 
154
- return _format_response(result, search_params, retriever.metadata)
158
+ response = _format_response(result, search_params, retriever.metadata)
159
+ # Store embedding in response for agent to save to DB
160
+ response.query_embedding = query_embedding
161
+ return response
162
+
163
+
164
+ async def execute_search(
165
+ search_params: BaseSearchParameters,
166
+ db_session: Session,
167
+ cursor: PageCursor | None = None,
168
+ query_embedding: list[float] | None = None,
169
+ ) -> SearchResponse:
170
+ """Execute a search and return ranked results."""
171
+ return await _execute_search_internal(search_params, db_session, search_params.limit, cursor, query_embedding)
172
+
173
+
174
+ async def execute_search_for_export(
175
+ query_state: SearchQueryState,
176
+ db_session: Session,
177
+ ) -> list[dict]:
178
+ """Execute a search for export and fetch flattened entity data.
179
+
180
+ Args:
181
+ query_state: Query state containing parameters and query_embedding.
182
+ db_session: The active SQLAlchemy session for executing the query.
183
+
184
+ Returns:
185
+ List of flattened entity records suitable for export.
186
+ """
187
+ from orchestrator.search.export import fetch_export_data
188
+
189
+ search_response = await _execute_search_internal(
190
+ search_params=query_state.parameters,
191
+ db_session=db_session,
192
+ limit=query_state.parameters.export_limit,
193
+ query_embedding=query_state.query_embedding,
194
+ )
195
+
196
+ entity_ids = [res.entity_id for res in search_response.results]
197
+ return fetch_export_data(query_state.parameters.entity_type, entity_ids)