orchestrator-core 4.5.3__py3-none-any.whl → 4.6.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. orchestrator/__init__.py +1 -1
  2. orchestrator/agentic_app.py +1 -21
  3. orchestrator/api/api_v1/api.py +5 -0
  4. orchestrator/api/api_v1/endpoints/agent.py +50 -0
  5. orchestrator/api/api_v1/endpoints/search.py +120 -201
  6. orchestrator/cli/database.py +3 -0
  7. orchestrator/cli/generate.py +11 -4
  8. orchestrator/cli/generator/generator/migration.py +7 -3
  9. orchestrator/cli/scheduler.py +15 -22
  10. orchestrator/cli/search/resize_embedding.py +28 -22
  11. orchestrator/cli/search/speedtest.py +4 -6
  12. orchestrator/db/__init__.py +6 -0
  13. orchestrator/db/models.py +75 -0
  14. orchestrator/migrations/helpers.py +46 -38
  15. orchestrator/schedules/scheduler.py +32 -15
  16. orchestrator/schedules/validate_products.py +1 -1
  17. orchestrator/schemas/search.py +8 -85
  18. orchestrator/search/agent/__init__.py +2 -2
  19. orchestrator/search/agent/agent.py +25 -29
  20. orchestrator/search/agent/json_patch.py +51 -0
  21. orchestrator/search/agent/prompts.py +35 -9
  22. orchestrator/search/agent/state.py +28 -2
  23. orchestrator/search/agent/tools.py +192 -53
  24. orchestrator/search/core/exceptions.py +6 -0
  25. orchestrator/search/core/types.py +1 -0
  26. orchestrator/search/export.py +199 -0
  27. orchestrator/search/indexing/indexer.py +13 -4
  28. orchestrator/search/indexing/registry.py +14 -1
  29. orchestrator/search/llm_migration.py +55 -0
  30. orchestrator/search/retrieval/__init__.py +3 -2
  31. orchestrator/search/retrieval/builder.py +5 -1
  32. orchestrator/search/retrieval/engine.py +66 -23
  33. orchestrator/search/retrieval/pagination.py +46 -56
  34. orchestrator/search/retrieval/query_state.py +61 -0
  35. orchestrator/search/retrieval/retrievers/base.py +26 -40
  36. orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
  37. orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
  38. orchestrator/search/retrieval/retrievers/semantic.py +9 -8
  39. orchestrator/search/retrieval/retrievers/structured.py +6 -6
  40. orchestrator/search/schemas/parameters.py +17 -13
  41. orchestrator/search/schemas/results.py +4 -1
  42. orchestrator/settings.py +1 -0
  43. orchestrator/utils/auth.py +3 -2
  44. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/METADATA +3 -3
  45. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/RECORD +47 -43
  46. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/WHEEL +0 -0
  47. {orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/licenses/LICENSE +0 -0
@@ -11,42 +11,21 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
 
14
- import array
15
14
  import base64
16
- from dataclasses import dataclass
15
+ from uuid import UUID
17
16
 
18
17
  from pydantic import BaseModel
19
18
 
19
+ from orchestrator.db import SearchQueryTable, db
20
20
  from orchestrator.search.core.exceptions import InvalidCursorError
21
- from orchestrator.search.schemas.parameters import BaseSearchParameters
22
- from orchestrator.search.schemas.results import SearchResult
23
-
24
-
25
- @dataclass
26
- class PaginationParams:
27
- """Parameters for pagination in search queries."""
28
-
29
- page_after_score: float | None = None
30
- page_after_id: str | None = None
31
- q_vec_override: list[float] | None = None
32
-
33
-
34
- def floats_to_b64(v: list[float]) -> str:
35
- a = array.array("f", v)
36
- return base64.urlsafe_b64encode(a.tobytes()).decode("ascii")
37
-
38
-
39
- def b64_to_floats(s: str) -> list[float]:
40
- raw = base64.urlsafe_b64decode(s.encode("ascii"))
41
- a = array.array("f")
42
- a.frombytes(raw)
43
- return list(a)
21
+ from orchestrator.search.schemas.parameters import SearchParameters
22
+ from orchestrator.search.schemas.results import SearchResponse
44
23
 
45
24
 
46
25
  class PageCursor(BaseModel):
47
26
  score: float
48
27
  id: str
49
- q_vec_b64: str
28
+ query_id: UUID
50
29
 
51
30
  def encode(self) -> str:
52
31
  """Encode the cursor data into a URL-safe Base64 string."""
@@ -63,34 +42,45 @@ class PageCursor(BaseModel):
63
42
  raise InvalidCursorError("Invalid pagination cursor") from e
64
43
 
65
44
 
66
- async def process_pagination_cursor(cursor: str | None, search_params: BaseSearchParameters) -> PaginationParams:
67
- """Process pagination cursor and return pagination parameters."""
68
- if cursor:
69
- c = PageCursor.decode(cursor)
70
- return PaginationParams(
71
- page_after_score=c.score,
72
- page_after_id=c.id,
73
- q_vec_override=b64_to_floats(c.q_vec_b64),
74
- )
75
- if search_params.vector_query:
76
- from orchestrator.search.core.embedding import QueryEmbedder
77
-
78
- q_vec_override = await QueryEmbedder.generate_for_text_async(search_params.vector_query)
79
- return PaginationParams(q_vec_override=q_vec_override)
80
- return PaginationParams()
81
-
82
-
83
- def create_next_page_cursor(
84
- search_results: list[SearchResult], pagination_params: PaginationParams, limit: int
45
+ def encode_next_page_cursor(
46
+ search_response: SearchResponse,
47
+ cursor: PageCursor | None,
48
+ search_params: SearchParameters,
85
49
  ) -> str | None:
86
- """Create next page cursor if there are more results."""
87
- has_next_page = len(search_results) == limit and limit > 0
88
- if has_next_page:
89
- last_item = search_results[-1]
90
- cursor_data = PageCursor(
91
- score=float(last_item.score),
92
- id=last_item.entity_id,
93
- q_vec_b64=floats_to_b64(pagination_params.q_vec_override or []),
94
- )
95
- return cursor_data.encode()
96
- return None
50
+ """Create next page cursor if there are more results.
51
+
52
+ On first page, saves the query to database and includes query_id in cursor
53
+ for subsequent pages to ensure consistent parameters across pagination.
54
+
55
+ Args:
56
+ search_response: SearchResponse containing results and query_embedding
57
+ cursor: Current page cursor (None for first page, PageCursor for subsequent pages)
58
+ search_params: Search parameters to save for pagination consistency
59
+
60
+ Returns:
61
+ Encoded cursor for next page, or None if no more results
62
+ """
63
+ from orchestrator.search.retrieval.query_state import SearchQueryState
64
+
65
+ has_next_page = len(search_response.results) == search_params.limit and search_params.limit > 0
66
+ if not has_next_page:
67
+ return None
68
+
69
+ # If this is the first page, save query state to database
70
+ if cursor is None:
71
+ query_state = SearchQueryState(parameters=search_params, query_embedding=search_response.query_embedding)
72
+ search_query = SearchQueryTable.from_state(state=query_state)
73
+
74
+ db.session.add(search_query)
75
+ db.session.commit()
76
+ query_id = search_query.query_id
77
+ else:
78
+ query_id = cursor.query_id
79
+
80
+ last_item = search_response.results[-1]
81
+ cursor_data = PageCursor(
82
+ score=float(last_item.score),
83
+ id=last_item.entity_id,
84
+ query_id=query_id,
85
+ )
86
+ return cursor_data.encode()
@@ -0,0 +1,61 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from uuid import UUID
15
+
16
+ from pydantic import BaseModel, ConfigDict, Field
17
+
18
+ from orchestrator.db import SearchQueryTable, db
19
+ from orchestrator.search.core.exceptions import QueryStateNotFoundError
20
+ from orchestrator.search.schemas.parameters import SearchParameters
21
+
22
+
23
+ class SearchQueryState(BaseModel):
24
+ """State of a search query including parameters and embedding.
25
+
26
+ This model provides a complete snapshot of what was searched and how.
27
+ Used for both agent and regular API searches.
28
+ """
29
+
30
+ parameters: SearchParameters = Field(discriminator="entity_type")
31
+ query_embedding: list[float] | None = Field(default=None, description="The embedding vector for semantic search")
32
+
33
+ model_config = ConfigDict(from_attributes=True)
34
+
35
+ @classmethod
36
+ def load_from_id(cls, query_id: UUID | str) -> "SearchQueryState":
37
+ """Load query state from database by query_id.
38
+
39
+ Args:
40
+ query_id: UUID or string UUID of the saved query
41
+
42
+ Returns:
43
+ SearchQueryState loaded from database
44
+
45
+ Raises:
46
+ ValueError: If query_id format is invalid
47
+ QueryStateNotFoundError: If query not found in database
48
+ """
49
+ if isinstance(query_id, UUID):
50
+ query_uuid = query_id
51
+ else:
52
+ try:
53
+ query_uuid = UUID(query_id)
54
+ except (ValueError, TypeError) as e:
55
+ raise ValueError(f"Invalid query_id format: {query_id}") from e
56
+
57
+ search_query = db.session.query(SearchQueryTable).filter_by(query_id=query_uuid).first()
58
+ if not search_query:
59
+ raise QueryStateNotFoundError(f"Query {query_uuid} not found in database")
60
+
61
+ return cls.model_validate(search_query)
@@ -20,7 +20,7 @@ from sqlalchemy import BindParameter, Numeric, Select, literal
20
20
  from orchestrator.search.core.types import FieldType, SearchMetadata
21
21
  from orchestrator.search.schemas.parameters import BaseSearchParameters
22
22
 
23
- from ..pagination import PaginationParams
23
+ from ..pagination import PageCursor
24
24
 
25
25
  logger = structlog.get_logger(__name__)
26
26
 
@@ -41,62 +41,48 @@ class Retriever(ABC):
41
41
  ]
42
42
 
43
43
  @classmethod
44
- async def from_params(
44
+ async def route(
45
45
  cls,
46
46
  params: BaseSearchParameters,
47
- pagination_params: PaginationParams,
47
+ cursor: PageCursor | None,
48
+ query_embedding: list[float] | None = None,
48
49
  ) -> "Retriever":
49
- """Create the appropriate retriever instance from search parameters.
50
+ """Route to the appropriate retriever instance based on search parameters.
51
+
52
+ Selects the retriever type based on available search criteria:
53
+ - Hybrid: both embedding and fuzzy term available
54
+ - Semantic: only embedding available
55
+ - Fuzzy: only text term available (or fallback when embedding generation fails)
56
+ - Structured: only filters available
50
57
 
51
58
  Args:
52
- params (BaseSearchParameters): Search parameters including vector queries, fuzzy terms, and filters.
53
- pagination_params (PaginationParams): Pagination parameters for cursor-based paging.
59
+ params: Search parameters including vector queries, fuzzy terms, and filters
60
+ cursor: Pagination cursor for cursor-based paging
61
+ query_embedding: Query embedding for semantic search, or None if not available
54
62
 
55
63
  Returns:
56
- Retriever: A concrete retriever instance (semantic, fuzzy, hybrid, or structured).
64
+ A concrete retriever instance based on available search criteria
57
65
  """
58
-
59
66
  from .fuzzy import FuzzyRetriever
60
67
  from .hybrid import RrfHybridRetriever
61
68
  from .semantic import SemanticRetriever
62
69
  from .structured import StructuredRetriever
63
70
 
64
71
  fuzzy_term = params.fuzzy_term
65
- q_vec = await cls._get_query_vector(params.vector_query, pagination_params.q_vec_override)
66
-
67
- # If semantic search was attempted but failed, fall back to fuzzy with the full query
68
- fallback_fuzzy_term = fuzzy_term
69
- if q_vec is None and params.vector_query is not None and params.query is not None:
70
- fallback_fuzzy_term = params.query
71
-
72
- if q_vec is not None and fallback_fuzzy_term is not None:
73
- return RrfHybridRetriever(q_vec, fallback_fuzzy_term, pagination_params)
74
- if q_vec is not None:
75
- return SemanticRetriever(q_vec, pagination_params)
76
- if fallback_fuzzy_term is not None:
77
- return FuzzyRetriever(fallback_fuzzy_term, pagination_params)
78
-
79
- return StructuredRetriever(pagination_params)
80
-
81
- @classmethod
82
- async def _get_query_vector(
83
- cls, vector_query: str | None, q_vec_override: list[float] | None
84
- ) -> list[float] | None:
85
- """Get query vector either from override or by generating from text."""
86
- if q_vec_override:
87
- return q_vec_override
88
-
89
- if not vector_query:
90
- return None
91
72
 
92
- from orchestrator.search.core.embedding import QueryEmbedder
73
+ # If vector_query exists but embedding generation failed, fall back to fuzzy search with full query
74
+ if query_embedding is None and params.vector_query is not None and params.query is not None:
75
+ fuzzy_term = params.query
93
76
 
94
- q_vec = await QueryEmbedder.generate_for_text_async(vector_query)
95
- if not q_vec:
96
- logger.warning("Embedding generation failed; using non-semantic retriever")
97
- return None
77
+ # Select retriever based on available search criteria
78
+ if query_embedding is not None and fuzzy_term is not None:
79
+ return RrfHybridRetriever(query_embedding, fuzzy_term, cursor)
80
+ if query_embedding is not None:
81
+ return SemanticRetriever(query_embedding, cursor)
82
+ if fuzzy_term is not None:
83
+ return FuzzyRetriever(fuzzy_term, cursor)
98
84
 
99
- return q_vec
85
+ return StructuredRetriever(cursor)
100
86
 
101
87
  @abstractmethod
102
88
  def apply(self, candidate_query: Select) -> Select:
@@ -17,17 +17,16 @@ from sqlalchemy.sql.expression import ColumnElement
17
17
  from orchestrator.db.models import AiSearchIndex
18
18
  from orchestrator.search.core.types import SearchMetadata
19
19
 
20
- from ..pagination import PaginationParams
20
+ from ..pagination import PageCursor
21
21
  from .base import Retriever
22
22
 
23
23
 
24
24
  class FuzzyRetriever(Retriever):
25
25
  """Ranks results based on the max of fuzzy text similarity scores."""
26
26
 
27
- def __init__(self, fuzzy_term: str, pagination_params: PaginationParams) -> None:
27
+ def __init__(self, fuzzy_term: str, cursor: PageCursor | None) -> None:
28
28
  self.fuzzy_term = fuzzy_term
29
- self.page_after_score = pagination_params.page_after_score
30
- self.page_after_id = pagination_params.page_after_id
29
+ self.cursor = cursor
31
30
 
32
31
  def apply(self, candidate_query: Select) -> Select:
33
32
  cand = candidate_query.subquery()
@@ -42,6 +41,7 @@ class FuzzyRetriever(Retriever):
42
41
  combined_query = (
43
42
  select(
44
43
  AiSearchIndex.entity_id,
44
+ AiSearchIndex.entity_title,
45
45
  score,
46
46
  func.first_value(AiSearchIndex.value)
47
47
  .over(partition_by=AiSearchIndex.entity_id, order_by=[similarity_expr.desc(), AiSearchIndex.path.asc()])
@@ -58,12 +58,13 @@ class FuzzyRetriever(Retriever):
58
58
  literal(self.fuzzy_term).op("<%")(AiSearchIndex.value),
59
59
  )
60
60
  )
61
- .distinct(AiSearchIndex.entity_id)
61
+ .distinct(AiSearchIndex.entity_id, AiSearchIndex.entity_title)
62
62
  )
63
63
  final_query = combined_query.subquery("ranked_fuzzy")
64
64
 
65
65
  stmt = select(
66
66
  final_query.c.entity_id,
67
+ final_query.c.entity_title,
67
68
  final_query.c.score,
68
69
  final_query.c.highlight_text,
69
70
  final_query.c.highlight_path,
@@ -81,13 +82,13 @@ class FuzzyRetriever(Retriever):
81
82
  self, stmt: Select, score_column: ColumnElement, entity_id_column: ColumnElement
82
83
  ) -> Select:
83
84
  """Apply standard score + entity_id pagination."""
84
- if self.page_after_score is not None and self.page_after_id is not None:
85
+ if self.cursor is not None:
85
86
  stmt = stmt.where(
86
87
  or_(
87
- score_column < self.page_after_score,
88
+ score_column < self.cursor.score,
88
89
  and_(
89
- score_column == self.page_after_score,
90
- entity_id_column > self.page_after_id,
90
+ score_column == self.cursor.score,
91
+ entity_id_column > self.cursor.id,
91
92
  ),
92
93
  )
93
94
  )
@@ -20,7 +20,7 @@ from sqlalchemy.types import TypeEngine
20
20
  from orchestrator.db.models import AiSearchIndex
21
21
  from orchestrator.search.core.types import SearchMetadata
22
22
 
23
- from ..pagination import PaginationParams
23
+ from ..pagination import PageCursor
24
24
  from .base import Retriever
25
25
 
26
26
 
@@ -127,14 +127,13 @@ class RrfHybridRetriever(Retriever):
127
127
  self,
128
128
  q_vec: list[float],
129
129
  fuzzy_term: str,
130
- pagination_params: PaginationParams,
130
+ cursor: PageCursor | None,
131
131
  k: int = 60,
132
132
  field_candidates_limit: int = 100,
133
133
  ) -> None:
134
134
  self.q_vec = q_vec
135
135
  self.fuzzy_term = fuzzy_term
136
- self.page_after_score = pagination_params.page_after_score
137
- self.page_after_id = pagination_params.page_after_id
136
+ self.cursor = cursor
138
137
  self.k = k
139
138
  self.field_candidates_limit = field_candidates_limit
140
139
 
@@ -154,6 +153,7 @@ class RrfHybridRetriever(Retriever):
154
153
  field_candidates = (
155
154
  select(
156
155
  AiSearchIndex.entity_id,
156
+ AiSearchIndex.entity_title,
157
157
  AiSearchIndex.path,
158
158
  AiSearchIndex.value,
159
159
  sem_val,
@@ -178,9 +178,10 @@ class RrfHybridRetriever(Retriever):
178
178
  entity_scores = (
179
179
  select(
180
180
  field_candidates.c.entity_id,
181
+ field_candidates.c.entity_title,
181
182
  func.avg(field_candidates.c.semantic_distance).label("avg_semantic_distance"),
182
183
  func.avg(field_candidates.c.fuzzy_score).label("avg_fuzzy_score"),
183
- ).group_by(field_candidates.c.entity_id)
184
+ ).group_by(field_candidates.c.entity_id, field_candidates.c.entity_title)
184
185
  ).cte("entity_scores")
185
186
 
186
187
  entity_highlights = (
@@ -204,6 +205,7 @@ class RrfHybridRetriever(Retriever):
204
205
  ranked = (
205
206
  select(
206
207
  entity_scores.c.entity_id,
208
+ entity_scores.c.entity_title,
207
209
  entity_scores.c.avg_semantic_distance,
208
210
  entity_scores.c.avg_fuzzy_score,
209
211
  entity_highlights.c.highlight_text,
@@ -242,6 +244,7 @@ class RrfHybridRetriever(Retriever):
242
244
 
243
245
  stmt = select(
244
246
  ranked.c.entity_id,
247
+ ranked.c.entity_title,
245
248
  score,
246
249
  ranked.c.highlight_text,
247
250
  ranked.c.highlight_path,
@@ -262,12 +265,12 @@ class RrfHybridRetriever(Retriever):
262
265
  entity_id_column: ColumnElement,
263
266
  ) -> Select:
264
267
  """Keyset paginate by fused score + id."""
265
- if self.page_after_score is not None and self.page_after_id is not None:
266
- score_param = self._quantize_score_for_pagination(self.page_after_score)
268
+ if self.cursor is not None:
269
+ score_param = self._quantize_score_for_pagination(self.cursor.score)
267
270
  stmt = stmt.where(
268
271
  or_(
269
272
  score_column < score_param,
270
- and_(score_column == score_param, entity_id_column > self.page_after_id),
273
+ and_(score_column == score_param, entity_id_column > self.cursor.id),
271
274
  )
272
275
  )
273
276
  return stmt
@@ -17,17 +17,16 @@ from sqlalchemy.sql.expression import ColumnElement
17
17
  from orchestrator.db.models import AiSearchIndex
18
18
  from orchestrator.search.core.types import SearchMetadata
19
19
 
20
- from ..pagination import PaginationParams
20
+ from ..pagination import PageCursor
21
21
  from .base import Retriever
22
22
 
23
23
 
24
24
  class SemanticRetriever(Retriever):
25
25
  """Ranks results based on the minimum semantic vector distance."""
26
26
 
27
- def __init__(self, vector_query: list[float], pagination_params: PaginationParams) -> None:
27
+ def __init__(self, vector_query: list[float], cursor: PageCursor | None) -> None:
28
28
  self.vector_query = vector_query
29
- self.page_after_score = pagination_params.page_after_score
30
- self.page_after_id = pagination_params.page_after_id
29
+ self.cursor = cursor
31
30
 
32
31
  def apply(self, candidate_query: Select) -> Select:
33
32
  cand = candidate_query.subquery()
@@ -49,6 +48,7 @@ class SemanticRetriever(Retriever):
49
48
  combined_query = (
50
49
  select(
51
50
  AiSearchIndex.entity_id,
51
+ AiSearchIndex.entity_title,
52
52
  score,
53
53
  func.first_value(AiSearchIndex.value)
54
54
  .over(partition_by=AiSearchIndex.entity_id, order_by=[dist.asc(), AiSearchIndex.path.asc()])
@@ -60,12 +60,13 @@ class SemanticRetriever(Retriever):
60
60
  .select_from(AiSearchIndex)
61
61
  .join(cand, cand.c.entity_id == AiSearchIndex.entity_id)
62
62
  .where(AiSearchIndex.embedding.isnot(None))
63
- .distinct(AiSearchIndex.entity_id)
63
+ .distinct(AiSearchIndex.entity_id, AiSearchIndex.entity_title)
64
64
  )
65
65
  final_query = combined_query.subquery("ranked_semantic")
66
66
 
67
67
  stmt = select(
68
68
  final_query.c.entity_id,
69
+ final_query.c.entity_title,
69
70
  final_query.c.score,
70
71
  final_query.c.highlight_text,
71
72
  final_query.c.highlight_path,
@@ -83,12 +84,12 @@ class SemanticRetriever(Retriever):
83
84
  self, stmt: Select, score_column: ColumnElement, entity_id_column: ColumnElement
84
85
  ) -> Select:
85
86
  """Apply semantic score pagination with precise Decimal handling."""
86
- if self.page_after_score is not None and self.page_after_id is not None:
87
- score_param = self._quantize_score_for_pagination(self.page_after_score)
87
+ if self.cursor is not None:
88
+ score_param = self._quantize_score_for_pagination(self.cursor.score)
88
89
  stmt = stmt.where(
89
90
  or_(
90
91
  score_column < score_param,
91
- and_(score_column == score_param, entity_id_column > self.page_after_id),
92
+ and_(score_column == score_param, entity_id_column > self.cursor.id),
92
93
  )
93
94
  )
94
95
  return stmt
@@ -15,22 +15,22 @@ from sqlalchemy import Select, literal, select
15
15
 
16
16
  from orchestrator.search.core.types import SearchMetadata
17
17
 
18
- from ..pagination import PaginationParams
18
+ from ..pagination import PageCursor
19
19
  from .base import Retriever
20
20
 
21
21
 
22
22
  class StructuredRetriever(Retriever):
23
23
  """Applies a dummy score for purely structured searches with no text query."""
24
24
 
25
- def __init__(self, pagination_params: PaginationParams) -> None:
26
- self.page_after_id = pagination_params.page_after_id
25
+ def __init__(self, cursor: PageCursor | None) -> None:
26
+ self.cursor = cursor
27
27
 
28
28
  def apply(self, candidate_query: Select) -> Select:
29
29
  cand = candidate_query.subquery()
30
- stmt = select(cand.c.entity_id, literal(1.0).label("score")).select_from(cand)
30
+ stmt = select(cand.c.entity_id, cand.c.entity_title, literal(1.0).label("score")).select_from(cand)
31
31
 
32
- if self.page_after_id:
33
- stmt = stmt.where(cand.c.entity_id > self.page_after_id)
32
+ if self.cursor is not None:
33
+ stmt = stmt.where(cand.c.entity_id > self.cursor.id)
34
34
 
35
35
  return stmt.order_by(cand.c.entity_id.asc())
36
36
 
@@ -12,9 +12,9 @@
12
12
  # limitations under the License.
13
13
 
14
14
  import uuid
15
- from typing import Any, Literal
15
+ from typing import Any, ClassVar, Literal
16
16
 
17
- from pydantic import BaseModel, ConfigDict, Field
17
+ from pydantic import BaseModel, ConfigDict, Field, TypeAdapter
18
18
 
19
19
  from orchestrator.search.core.types import ActionType, EntityType
20
20
  from orchestrator.search.filters import FilterTree
@@ -23,6 +23,9 @@ from orchestrator.search.filters import FilterTree
23
23
  class BaseSearchParameters(BaseModel):
24
24
  """Base model with common search parameters."""
25
25
 
26
+ DEFAULT_EXPORT_LIMIT: ClassVar[int] = 1000
27
+ MAX_EXPORT_LIMIT: ClassVar[int] = 10000
28
+
26
29
  action: ActionType = Field(default=ActionType.SELECT, description="The action to perform.")
27
30
  entity_type: EntityType
28
31
 
@@ -33,14 +36,18 @@ class BaseSearchParameters(BaseModel):
33
36
  )
34
37
 
35
38
  limit: int = Field(default=10, ge=1, le=30, description="Maximum number of search results to return.")
39
+ export_limit: int = Field(
40
+ default=DEFAULT_EXPORT_LIMIT, ge=1, le=MAX_EXPORT_LIMIT, description="Maximum number of results to export."
41
+ )
36
42
  model_config = ConfigDict(extra="forbid")
37
43
 
38
44
  @classmethod
39
- def create(cls, entity_type: EntityType, **kwargs: Any) -> "BaseSearchParameters":
40
- try:
41
- return PARAMETER_REGISTRY[entity_type](entity_type=entity_type, **kwargs)
42
- except KeyError:
43
- raise ValueError(f"No search parameter class found for entity type: {entity_type.value}")
45
+ def create(cls, **kwargs: Any) -> "SearchParameters":
46
+ """Create the correct search parameter subclass instance based on entity_type."""
47
+ from orchestrator.search.schemas.parameters import SearchParameters
48
+
49
+ adapter: TypeAdapter = TypeAdapter(SearchParameters)
50
+ return adapter.validate_python(kwargs)
44
51
 
45
52
  @property
46
53
  def vector_query(self) -> str | None:
@@ -121,9 +128,6 @@ class ProcessSearchParameters(BaseSearchParameters):
121
128
  )
122
129
 
123
130
 
124
- PARAMETER_REGISTRY: dict[EntityType, type[BaseSearchParameters]] = {
125
- EntityType.SUBSCRIPTION: SubscriptionSearchParameters,
126
- EntityType.PRODUCT: ProductSearchParameters,
127
- EntityType.WORKFLOW: WorkflowSearchParameters,
128
- EntityType.PROCESS: ProcessSearchParameters,
129
- }
131
+ SearchParameters = (
132
+ SubscriptionSearchParameters | ProductSearchParameters | WorkflowSearchParameters | ProcessSearchParameters
133
+ )
@@ -15,7 +15,7 @@ from typing import Literal
15
15
 
16
16
  from pydantic import BaseModel, ConfigDict
17
17
 
18
- from orchestrator.search.core.types import FilterOp, SearchMetadata, UIType
18
+ from orchestrator.search.core.types import EntityType, FilterOp, SearchMetadata, UIType
19
19
 
20
20
 
21
21
  class MatchingField(BaseModel):
@@ -30,6 +30,8 @@ class SearchResult(BaseModel):
30
30
  """Represents a single search result item."""
31
31
 
32
32
  entity_id: str
33
+ entity_type: EntityType
34
+ entity_title: str
33
35
  score: float
34
36
  perfect_match: int = 0
35
37
  matching_field: MatchingField | None = None
@@ -40,6 +42,7 @@ class SearchResponse(BaseModel):
40
42
 
41
43
  results: list[SearchResult]
42
44
  metadata: SearchMetadata
45
+ query_embedding: list[float] | None = None
43
46
 
44
47
 
45
48
  class ValueSchema(BaseModel):
orchestrator/settings.py CHANGED
@@ -57,6 +57,7 @@ class AppSettings(BaseSettings):
57
57
  EXECUTOR: str = ExecutorType.THREADPOOL
58
58
  WORKFLOWS_SWAGGER_HOST: str = "localhost"
59
59
  WORKFLOWS_GUI_URI: str = "http://localhost:3000"
60
+ BASE_URL: str = "http://localhost:8080" # Base URL for the API (used for generating export URLs)
60
61
  DATABASE_URI: PostgresDsn = "postgresql://nwa:nwa@localhost/orchestrator-core" # type: ignore
61
62
  MAX_WORKERS: int = 5
62
63
  MAIL_SERVER: str = "localhost"
@@ -1,9 +1,10 @@
1
1
  from collections.abc import Callable
2
- from typing import TypeAlias
2
+ from typing import TypeAlias, TypeVar
3
3
 
4
4
  from oauth2_lib.fastapi import OIDCUserModel
5
5
 
6
6
  # This file is broken out separately to avoid circular imports.
7
7
 
8
8
  # Can instead use "type Authorizer = ..." in later Python versions.
9
- Authorizer: TypeAlias = Callable[[OIDCUserModel | None], bool]
9
+ T = TypeVar("T", bound=OIDCUserModel)
10
+ Authorizer: TypeAlias = Callable[[T | None], bool]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orchestrator-core
3
- Version: 4.5.3
3
+ Version: 4.6.0rc2
4
4
  Summary: This is the orchestrator workflow engine.
5
5
  Author-email: SURF <automation-beheer@surf.nl>
6
6
  Requires-Python: >=3.11,<3.14
@@ -42,7 +42,7 @@ Requires-Dist: itsdangerous>=2.2.0
42
42
  Requires-Dist: jinja2==3.1.6
43
43
  Requires-Dist: more-itertools~=10.7.0
44
44
  Requires-Dist: nwa-stdlib~=1.9.2
45
- Requires-Dist: oauth2-lib>=2.4.1
45
+ Requires-Dist: oauth2-lib==2.4.2
46
46
  Requires-Dist: orjson==3.10.18
47
47
  Requires-Dist: pgvector>=0.4.1
48
48
  Requires-Dist: prometheus-client==0.22.1
@@ -63,7 +63,7 @@ Requires-Dist: structlog>=25.4.0
63
63
  Requires-Dist: tabulate==0.9.0
64
64
  Requires-Dist: typer==0.15.4
65
65
  Requires-Dist: uvicorn[standard]~=0.34.0
66
- Requires-Dist: pydantic-ai-slim ==0.7.0 ; extra == "agent"
66
+ Requires-Dist: pydantic-ai-slim >=1.3.0 ; extra == "agent"
67
67
  Requires-Dist: ag-ui-protocol>=0.1.8 ; extra == "agent"
68
68
  Requires-Dist: litellm>=1.75.7 ; extra == "agent"
69
69
  Requires-Dist: celery~=5.5.1 ; extra == "celery"