orchestrator-core 4.6.1__py3-none-any.whl → 4.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +1 -1
- orchestrator/api/api_v1/endpoints/processes.py +4 -1
- orchestrator/api/api_v1/endpoints/search.py +44 -34
- orchestrator/{search/retrieval/utils.py → cli/search/display.py} +4 -29
- orchestrator/cli/search/search_explore.py +22 -24
- orchestrator/cli/search/speedtest.py +11 -9
- orchestrator/db/models.py +6 -6
- orchestrator/graphql/resolvers/helpers.py +15 -0
- orchestrator/graphql/resolvers/process.py +5 -3
- orchestrator/graphql/resolvers/product.py +3 -2
- orchestrator/graphql/resolvers/product_block.py +3 -2
- orchestrator/graphql/resolvers/resource_type.py +3 -2
- orchestrator/graphql/resolvers/scheduled_tasks.py +3 -1
- orchestrator/graphql/resolvers/settings.py +2 -0
- orchestrator/graphql/resolvers/subscription.py +5 -3
- orchestrator/graphql/resolvers/version.py +2 -0
- orchestrator/graphql/resolvers/workflow.py +3 -2
- orchestrator/graphql/schemas/process.py +3 -3
- orchestrator/log_config.py +2 -0
- orchestrator/schemas/search.py +1 -1
- orchestrator/schemas/search_requests.py +59 -0
- orchestrator/search/agent/handlers.py +129 -0
- orchestrator/search/agent/prompts.py +54 -33
- orchestrator/search/agent/state.py +9 -24
- orchestrator/search/agent/tools.py +223 -144
- orchestrator/search/agent/validation.py +80 -0
- orchestrator/search/{schemas → aggregations}/__init__.py +20 -0
- orchestrator/search/aggregations/base.py +201 -0
- orchestrator/search/core/types.py +3 -2
- orchestrator/search/filters/__init__.py +4 -0
- orchestrator/search/filters/definitions.py +22 -1
- orchestrator/search/filters/numeric_filter.py +3 -3
- orchestrator/search/llm_migration.py +2 -1
- orchestrator/search/query/__init__.py +90 -0
- orchestrator/search/query/builder.py +285 -0
- orchestrator/search/query/engine.py +162 -0
- orchestrator/search/{retrieval → query}/exceptions.py +38 -7
- orchestrator/search/query/mixins.py +95 -0
- orchestrator/search/query/queries.py +129 -0
- orchestrator/search/query/results.py +252 -0
- orchestrator/search/{retrieval/query_state.py → query/state.py} +31 -11
- orchestrator/search/{retrieval → query}/validation.py +58 -1
- orchestrator/search/retrieval/__init__.py +0 -5
- orchestrator/search/retrieval/pagination.py +7 -8
- orchestrator/search/retrieval/retrievers/base.py +9 -9
- orchestrator/workflows/translations/en-GB.json +1 -0
- {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/METADATA +16 -15
- {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/RECORD +51 -45
- orchestrator/search/retrieval/builder.py +0 -127
- orchestrator/search/retrieval/engine.py +0 -197
- orchestrator/search/schemas/parameters.py +0 -133
- orchestrator/search/schemas/results.py +0 -80
- /orchestrator/search/{export.py → query/export.py} +0 -0
- {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
"""Query execution result models and formatting functions."""
|
|
15
|
+
|
|
16
|
+
from collections.abc import Sequence
|
|
17
|
+
from typing import Literal
|
|
18
|
+
|
|
19
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
20
|
+
from sqlalchemy.engine.row import RowMapping
|
|
21
|
+
|
|
22
|
+
from orchestrator.search.core.types import EntityType, FilterOp, SearchMetadata
|
|
23
|
+
from orchestrator.search.filters import FilterTree
|
|
24
|
+
|
|
25
|
+
from .queries import AggregateQuery, CountQuery, ExportQuery, SelectQuery
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class VisualizationType(BaseModel):
|
|
29
|
+
"""Visualization type for aggregation results.
|
|
30
|
+
|
|
31
|
+
Choose based on the query context:
|
|
32
|
+
- 'pie': For categorical distributions (e.g., subscriptions by status, products by type)
|
|
33
|
+
- 'line': For time-series data (e.g., subscriptions created per month, trends over time)
|
|
34
|
+
- 'table': For detailed data or multiple grouping dimensions (default)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
type: Literal["pie", "line", "table"] = Field(
|
|
38
|
+
default="table",
|
|
39
|
+
description="Visualization render type: 'pie' for categorical distributions, 'line' for time-series, 'table' for detailed data",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class MatchingField(BaseModel):
|
|
44
|
+
"""Contains the field that contributed most to the (fuzzy) search result."""
|
|
45
|
+
|
|
46
|
+
text: str
|
|
47
|
+
path: str
|
|
48
|
+
highlight_indices: list[tuple[int, int]] | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class SearchResult(BaseModel):
|
|
52
|
+
"""Represents a single search result item."""
|
|
53
|
+
|
|
54
|
+
entity_id: str
|
|
55
|
+
entity_type: EntityType
|
|
56
|
+
entity_title: str
|
|
57
|
+
score: float
|
|
58
|
+
perfect_match: int = 0
|
|
59
|
+
matching_field: MatchingField | None = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class SearchResponse(BaseModel):
|
|
63
|
+
"""Response containing search results and metadata."""
|
|
64
|
+
|
|
65
|
+
results: list[SearchResult]
|
|
66
|
+
metadata: SearchMetadata
|
|
67
|
+
query_embedding: list[float] | None = None
|
|
68
|
+
has_more: bool = False
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class AggregationResult(BaseModel):
|
|
72
|
+
"""Represents a single aggregation result row."""
|
|
73
|
+
|
|
74
|
+
group_values: dict[str, str] = Field(default_factory=dict) # group_by field -> value
|
|
75
|
+
aggregations: dict[str, float | int] = Field(default_factory=dict) # alias -> computed value
|
|
76
|
+
|
|
77
|
+
model_config = ConfigDict(extra="forbid")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class AggregationResponse(BaseModel):
|
|
81
|
+
"""Response containing aggregation results."""
|
|
82
|
+
|
|
83
|
+
results: list[AggregationResult]
|
|
84
|
+
total_groups: int
|
|
85
|
+
metadata: SearchMetadata
|
|
86
|
+
visualization_type: VisualizationType = Field(default_factory=VisualizationType)
|
|
87
|
+
|
|
88
|
+
model_config = ConfigDict(extra="forbid")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ExportData(BaseModel):
|
|
92
|
+
"""Export metadata for download."""
|
|
93
|
+
|
|
94
|
+
action: str = "export"
|
|
95
|
+
query_id: str
|
|
96
|
+
download_url: str
|
|
97
|
+
message: str
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def format_aggregation_response(
|
|
101
|
+
result_rows: Sequence[RowMapping],
|
|
102
|
+
group_column_names: list[str],
|
|
103
|
+
query: CountQuery | AggregateQuery,
|
|
104
|
+
) -> AggregationResponse:
|
|
105
|
+
"""Format raw aggregation query results into AggregationResponse.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
result_rows: Raw database result rows
|
|
109
|
+
group_column_names: List of column names that are grouping columns
|
|
110
|
+
query: Query plan for metadata
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
AggregationResponse with formatted results and metadata
|
|
114
|
+
"""
|
|
115
|
+
results = []
|
|
116
|
+
for row in result_rows:
|
|
117
|
+
group_values = {}
|
|
118
|
+
aggregations = {}
|
|
119
|
+
|
|
120
|
+
for key, value in row.items():
|
|
121
|
+
if key in group_column_names:
|
|
122
|
+
# It's a grouping column
|
|
123
|
+
group_values[key] = str(value) if value is not None else ""
|
|
124
|
+
else:
|
|
125
|
+
# It's an aggregation result
|
|
126
|
+
aggregations[key] = value if value is not None else 0
|
|
127
|
+
|
|
128
|
+
results.append(AggregationResult(group_values=group_values, aggregations=aggregations))
|
|
129
|
+
|
|
130
|
+
metadata = SearchMetadata(
|
|
131
|
+
search_type="aggregation",
|
|
132
|
+
description=f"Aggregation query with {len(query.group_by or [])} grouping dimension(s)",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
return AggregationResponse(
|
|
136
|
+
results=results,
|
|
137
|
+
total_groups=len(results),
|
|
138
|
+
metadata=metadata,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def generate_highlight_indices(text: str, term: str) -> list[tuple[int, int]]:
|
|
143
|
+
"""Finds all occurrences of individual words from the term, including both word boundary and substring matches."""
|
|
144
|
+
import re
|
|
145
|
+
|
|
146
|
+
if not text or not term:
|
|
147
|
+
return []
|
|
148
|
+
|
|
149
|
+
all_matches = []
|
|
150
|
+
words = [w.strip() for w in term.split() if w.strip()]
|
|
151
|
+
|
|
152
|
+
for word in words:
|
|
153
|
+
# First find word boundary matches
|
|
154
|
+
word_boundary_pattern = rf"\b{re.escape(word)}\b"
|
|
155
|
+
word_matches = list(re.finditer(word_boundary_pattern, text, re.IGNORECASE))
|
|
156
|
+
all_matches.extend([(m.start(), m.end()) for m in word_matches])
|
|
157
|
+
|
|
158
|
+
# Then find all substring matches
|
|
159
|
+
substring_pattern = re.escape(word)
|
|
160
|
+
substring_matches = list(re.finditer(substring_pattern, text, re.IGNORECASE))
|
|
161
|
+
all_matches.extend([(m.start(), m.end()) for m in substring_matches])
|
|
162
|
+
|
|
163
|
+
return sorted(set(all_matches))
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def format_search_response(
|
|
167
|
+
db_rows: Sequence[RowMapping], query: "SelectQuery | ExportQuery", metadata: SearchMetadata
|
|
168
|
+
) -> SearchResponse:
|
|
169
|
+
"""Format database query results into a `SearchResponse`.
|
|
170
|
+
|
|
171
|
+
Converts raw SQLAlchemy `RowMapping` objects into `SearchResult` instances,
|
|
172
|
+
including highlight metadata if present in the database results.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
db_rows: The rows returned from the executed SQLAlchemy query.
|
|
176
|
+
query: SelectQuery or ExportQuery with search criteria.
|
|
177
|
+
metadata: Metadata about the search execution.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
SearchResponse: A list of `SearchResult` objects containing entity IDs, scores,
|
|
181
|
+
and optional highlight information.
|
|
182
|
+
"""
|
|
183
|
+
from orchestrator.search.retrieval.retrievers import Retriever
|
|
184
|
+
|
|
185
|
+
if not db_rows:
|
|
186
|
+
return SearchResponse(results=[], metadata=metadata)
|
|
187
|
+
|
|
188
|
+
user_query = query.query_text
|
|
189
|
+
|
|
190
|
+
results = []
|
|
191
|
+
for row in db_rows:
|
|
192
|
+
matching_field = None
|
|
193
|
+
|
|
194
|
+
if (
|
|
195
|
+
user_query
|
|
196
|
+
and (text := row.get(Retriever.HIGHLIGHT_TEXT_LABEL)) is not None
|
|
197
|
+
and (path := row.get(Retriever.HIGHLIGHT_PATH_LABEL)) is not None
|
|
198
|
+
):
|
|
199
|
+
if not isinstance(text, str):
|
|
200
|
+
text = str(text)
|
|
201
|
+
if not isinstance(path, str):
|
|
202
|
+
path = str(path)
|
|
203
|
+
|
|
204
|
+
highlight_indices = generate_highlight_indices(text, user_query) or None
|
|
205
|
+
matching_field = MatchingField(text=text, path=path, highlight_indices=highlight_indices)
|
|
206
|
+
|
|
207
|
+
elif not user_query and query.filters and metadata.search_type == "structured":
|
|
208
|
+
# Structured search (filter-only)
|
|
209
|
+
matching_field = _extract_matching_field_from_filters(query.filters)
|
|
210
|
+
|
|
211
|
+
entity_title = row.get("entity_title", "")
|
|
212
|
+
if not isinstance(entity_title, str):
|
|
213
|
+
entity_title = str(entity_title) if entity_title is not None else ""
|
|
214
|
+
|
|
215
|
+
results.append(
|
|
216
|
+
SearchResult(
|
|
217
|
+
entity_id=str(row.entity_id),
|
|
218
|
+
entity_type=query.entity_type,
|
|
219
|
+
entity_title=entity_title,
|
|
220
|
+
score=row.score,
|
|
221
|
+
perfect_match=row.get("perfect_match", 0),
|
|
222
|
+
matching_field=matching_field,
|
|
223
|
+
)
|
|
224
|
+
)
|
|
225
|
+
return SearchResponse(results=results, metadata=metadata)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _extract_matching_field_from_filters(filters: "FilterTree") -> MatchingField | None:
|
|
229
|
+
"""Extract the first path filter to use as matching field for structured searches."""
|
|
230
|
+
from orchestrator.search.filters import LtreeFilter
|
|
231
|
+
|
|
232
|
+
leaves = filters.get_all_leaves()
|
|
233
|
+
if len(leaves) != 1:
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
pf = leaves[0]
|
|
237
|
+
|
|
238
|
+
if isinstance(pf.condition, LtreeFilter):
|
|
239
|
+
op = pf.condition.op
|
|
240
|
+
# Prefer the original component/pattern (validator may set path="*" and move the value)
|
|
241
|
+
display = str(getattr(pf.condition, "value", "") or pf.path)
|
|
242
|
+
|
|
243
|
+
# There can be no match for abscence.
|
|
244
|
+
if op == FilterOp.NOT_HAS_COMPONENT:
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
return MatchingField(text=display, path=display, highlight_indices=[(0, len(display))])
|
|
248
|
+
|
|
249
|
+
# Everything thats not Ltree
|
|
250
|
+
val = getattr(pf.condition, "value", "")
|
|
251
|
+
text = "" if val is None else str(val)
|
|
252
|
+
return MatchingField(text=text, path=pf.path, highlight_indices=[(0, len(text))])
|
|
@@ -11,39 +11,47 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
|
|
14
|
+
from typing import Generic, TypeVar, cast
|
|
14
15
|
from uuid import UUID
|
|
15
16
|
|
|
17
|
+
import structlog
|
|
16
18
|
from pydantic import BaseModel, ConfigDict, Field
|
|
17
19
|
|
|
18
20
|
from orchestrator.db import SearchQueryTable, db
|
|
19
21
|
from orchestrator.search.core.exceptions import QueryStateNotFoundError
|
|
20
|
-
from orchestrator.search.
|
|
22
|
+
from orchestrator.search.query.queries import BaseQuery, Query
|
|
21
23
|
|
|
24
|
+
logger = structlog.get_logger(__name__)
|
|
22
25
|
|
|
23
|
-
|
|
24
|
-
"""State of a search query including parameters and embedding.
|
|
26
|
+
T = TypeVar("T", bound=Query)
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
|
|
29
|
+
class QueryState(BaseModel, Generic[T]):
|
|
30
|
+
"""State of a query including parameters and embedding.
|
|
31
|
+
|
|
32
|
+
Thin wrapper around SearchQueryTable that stores query as JSONB blob.
|
|
33
|
+
Generic over query type for type-safe loading.
|
|
34
|
+
Used for both agent and regular API queries.
|
|
28
35
|
"""
|
|
29
36
|
|
|
30
|
-
|
|
37
|
+
query: T
|
|
31
38
|
query_embedding: list[float] | None = Field(default=None, description="The embedding vector for semantic search")
|
|
32
39
|
|
|
33
40
|
model_config = ConfigDict(from_attributes=True)
|
|
34
41
|
|
|
35
42
|
@classmethod
|
|
36
|
-
def load_from_id(cls, query_id: UUID | str) -> "
|
|
37
|
-
"""Load query state from database by query_id.
|
|
43
|
+
def load_from_id(cls, query_id: UUID | str, expected_type: type[T]) -> "QueryState[T]":
|
|
44
|
+
"""Load query state from database by query_id with type validation.
|
|
38
45
|
|
|
39
46
|
Args:
|
|
40
47
|
query_id: UUID or string UUID of the saved query
|
|
48
|
+
expected_type: Expected query type class (SelectQuery, ExportQuery, etc.)
|
|
41
49
|
|
|
42
50
|
Returns:
|
|
43
|
-
|
|
51
|
+
QueryState with validated query type
|
|
44
52
|
|
|
45
53
|
Raises:
|
|
46
|
-
ValueError: If query_id format is invalid
|
|
54
|
+
ValueError: If query_id format is invalid or query type doesn't match expected
|
|
47
55
|
QueryStateNotFoundError: If query not found in database
|
|
48
56
|
"""
|
|
49
57
|
if isinstance(query_id, UUID):
|
|
@@ -58,4 +66,16 @@ class SearchQueryState(BaseModel):
|
|
|
58
66
|
if not search_query:
|
|
59
67
|
raise QueryStateNotFoundError(f"Query {query_uuid} not found in database")
|
|
60
68
|
|
|
61
|
-
|
|
69
|
+
# Clamp limit to valid range to handle legacy queries outside the current limits
|
|
70
|
+
if "limit" in search_query.parameters and search_query.parameters["limit"] > BaseQuery.MAX_LIMIT:
|
|
71
|
+
logger.warning(
|
|
72
|
+
"Loaded query limit exceeds maximum, clamping to MAX_LIMIT",
|
|
73
|
+
query_id=query_uuid,
|
|
74
|
+
original_limit=search_query.parameters["limit"],
|
|
75
|
+
clamped_to=BaseQuery.MAX_LIMIT,
|
|
76
|
+
)
|
|
77
|
+
search_query.parameters["limit"] = BaseQuery.MAX_LIMIT
|
|
78
|
+
|
|
79
|
+
query = cast(T, expected_type.from_dict(search_query.parameters))
|
|
80
|
+
|
|
81
|
+
return cls(query=query, query_embedding=search_query.query_embedding)
|
|
@@ -18,12 +18,15 @@ from sqlalchemy_utils import Ltree
|
|
|
18
18
|
from orchestrator.db import db
|
|
19
19
|
from orchestrator.db.database import WrappedSession
|
|
20
20
|
from orchestrator.db.models import AiSearchIndex
|
|
21
|
+
from orchestrator.search.aggregations import AggregationType
|
|
21
22
|
from orchestrator.search.core.types import EntityType, FieldType
|
|
22
23
|
from orchestrator.search.filters import FilterCondition, FilterTree, LtreeFilter, PathFilter
|
|
23
24
|
from orchestrator.search.filters.definitions import operators_for
|
|
24
|
-
from orchestrator.search.
|
|
25
|
+
from orchestrator.search.query.exceptions import (
|
|
25
26
|
EmptyFilterPathError,
|
|
27
|
+
IncompatibleAggregationTypeError,
|
|
26
28
|
IncompatibleFilterTypeError,
|
|
29
|
+
IncompatibleTemporalGroupingTypeError,
|
|
27
30
|
InvalidEntityPrefixError,
|
|
28
31
|
InvalidLtreePatternError,
|
|
29
32
|
PathNotFoundError,
|
|
@@ -150,3 +153,57 @@ async def validate_filter_tree(filters: FilterTree | None, entity_type: EntityTy
|
|
|
150
153
|
return
|
|
151
154
|
for leaf in filters.get_all_leaves():
|
|
152
155
|
await complete_filter_validation(leaf, entity_type)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def validate_aggregation_field(agg_type: AggregationType, field_path: str) -> None:
|
|
159
|
+
"""Validate that an aggregation field exists and is compatible with the aggregation type.
|
|
160
|
+
|
|
161
|
+
Note: Only for FieldAggregations (SUM, AVG, MIN, MAX). COUNT does not require field validation.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
agg_type: The aggregation type enum
|
|
165
|
+
field_path: The field path to validate
|
|
166
|
+
|
|
167
|
+
Raises:
|
|
168
|
+
PathNotFoundError: If the field doesn't exist in the database.
|
|
169
|
+
IncompatibleAggregationTypeError: If the field type is incompatible with the aggregation type.
|
|
170
|
+
"""
|
|
171
|
+
# Check if field exists in database
|
|
172
|
+
field_type_str = validate_filter_path(field_path)
|
|
173
|
+
if field_type_str is None:
|
|
174
|
+
raise PathNotFoundError(field_path)
|
|
175
|
+
|
|
176
|
+
# Validate field type compatibility with aggregation type
|
|
177
|
+
if agg_type in (AggregationType.SUM, AggregationType.AVG):
|
|
178
|
+
if field_type_str not in (FieldType.INTEGER.value, FieldType.FLOAT.value):
|
|
179
|
+
raise IncompatibleAggregationTypeError(
|
|
180
|
+
agg_type.value, field_type_str, field_path, [FieldType.INTEGER.value, FieldType.FLOAT.value]
|
|
181
|
+
)
|
|
182
|
+
elif agg_type in (AggregationType.MIN, AggregationType.MAX):
|
|
183
|
+
if field_type_str not in (FieldType.INTEGER.value, FieldType.FLOAT.value, FieldType.DATETIME.value):
|
|
184
|
+
raise IncompatibleAggregationTypeError(
|
|
185
|
+
agg_type.value,
|
|
186
|
+
field_type_str,
|
|
187
|
+
field_path,
|
|
188
|
+
[FieldType.INTEGER.value, FieldType.FLOAT.value, FieldType.DATETIME.value],
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def validate_temporal_grouping_field(field_path: str) -> None:
|
|
193
|
+
"""Validate that a field exists and is a datetime type for temporal grouping.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
field_path: The field path to validate
|
|
197
|
+
|
|
198
|
+
Raises:
|
|
199
|
+
PathNotFoundError: If the field doesn't exist in the database
|
|
200
|
+
IncompatibleTemporalGroupingTypeError: If the field is not a datetime type
|
|
201
|
+
"""
|
|
202
|
+
# Check if field exists in database
|
|
203
|
+
field_type_str = validate_filter_path(field_path)
|
|
204
|
+
if field_type_str is None:
|
|
205
|
+
raise PathNotFoundError(field_path)
|
|
206
|
+
|
|
207
|
+
# Validate field type is datetime
|
|
208
|
+
if field_type_str != FieldType.DATETIME.value:
|
|
209
|
+
raise IncompatibleTemporalGroupingTypeError(field_path, field_type_str)
|
|
@@ -10,8 +10,3 @@
|
|
|
10
10
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
|
-
|
|
14
|
-
from .engine import execute_search, execute_search_for_export
|
|
15
|
-
from .query_state import SearchQueryState
|
|
16
|
-
|
|
17
|
-
__all__ = ["execute_search", "execute_search_for_export", "SearchQueryState"]
|
|
@@ -18,8 +18,8 @@ from pydantic import BaseModel
|
|
|
18
18
|
|
|
19
19
|
from orchestrator.db import SearchQueryTable, db
|
|
20
20
|
from orchestrator.search.core.exceptions import InvalidCursorError
|
|
21
|
-
from orchestrator.search.
|
|
22
|
-
from orchestrator.search.
|
|
21
|
+
from orchestrator.search.query.queries import SelectQuery
|
|
22
|
+
from orchestrator.search.query.results import SearchResponse
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class PageCursor(BaseModel):
|
|
@@ -45,7 +45,7 @@ class PageCursor(BaseModel):
|
|
|
45
45
|
def encode_next_page_cursor(
|
|
46
46
|
search_response: SearchResponse,
|
|
47
47
|
cursor: PageCursor | None,
|
|
48
|
-
|
|
48
|
+
query: SelectQuery,
|
|
49
49
|
) -> str | None:
|
|
50
50
|
"""Create next page cursor if there are more results.
|
|
51
51
|
|
|
@@ -55,20 +55,19 @@ def encode_next_page_cursor(
|
|
|
55
55
|
Args:
|
|
56
56
|
search_response: SearchResponse containing results and query_embedding
|
|
57
57
|
cursor: Current page cursor (None for first page, PageCursor for subsequent pages)
|
|
58
|
-
|
|
58
|
+
query: SelectQuery for search operation to save for pagination consistency
|
|
59
59
|
|
|
60
60
|
Returns:
|
|
61
61
|
Encoded cursor for next page, or None if no more results
|
|
62
62
|
"""
|
|
63
|
-
from orchestrator.search.
|
|
63
|
+
from orchestrator.search.query.state import QueryState
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
if not has_next_page:
|
|
65
|
+
if not search_response.has_more:
|
|
67
66
|
return None
|
|
68
67
|
|
|
69
68
|
# If this is the first page, save query state to database
|
|
70
69
|
if cursor is None:
|
|
71
|
-
query_state =
|
|
70
|
+
query_state = QueryState(query=query, query_embedding=search_response.query_embedding)
|
|
72
71
|
search_query = SearchQueryTable.from_state(state=query_state)
|
|
73
72
|
|
|
74
73
|
db.session.add(search_query)
|
|
@@ -18,7 +18,7 @@ import structlog
|
|
|
18
18
|
from sqlalchemy import BindParameter, Numeric, Select, literal
|
|
19
19
|
|
|
20
20
|
from orchestrator.search.core.types import FieldType, SearchMetadata
|
|
21
|
-
from orchestrator.search.
|
|
21
|
+
from orchestrator.search.query.queries import ExportQuery, SelectQuery
|
|
22
22
|
|
|
23
23
|
from ..pagination import PageCursor
|
|
24
24
|
|
|
@@ -41,13 +41,13 @@ class Retriever(ABC):
|
|
|
41
41
|
]
|
|
42
42
|
|
|
43
43
|
@classmethod
|
|
44
|
-
|
|
44
|
+
def route(
|
|
45
45
|
cls,
|
|
46
|
-
|
|
46
|
+
query: "SelectQuery | ExportQuery",
|
|
47
47
|
cursor: PageCursor | None,
|
|
48
48
|
query_embedding: list[float] | None = None,
|
|
49
49
|
) -> "Retriever":
|
|
50
|
-
"""Route to the appropriate retriever instance based on
|
|
50
|
+
"""Route to the appropriate retriever instance based on query plan.
|
|
51
51
|
|
|
52
52
|
Selects the retriever type based on available search criteria:
|
|
53
53
|
- Hybrid: both embedding and fuzzy term available
|
|
@@ -56,7 +56,7 @@ class Retriever(ABC):
|
|
|
56
56
|
- Structured: only filters available
|
|
57
57
|
|
|
58
58
|
Args:
|
|
59
|
-
|
|
59
|
+
query: SelectQuery or ExportQuery with search criteria
|
|
60
60
|
cursor: Pagination cursor for cursor-based paging
|
|
61
61
|
query_embedding: Query embedding for semantic search, or None if not available
|
|
62
62
|
|
|
@@ -68,11 +68,11 @@ class Retriever(ABC):
|
|
|
68
68
|
from .semantic import SemanticRetriever
|
|
69
69
|
from .structured import StructuredRetriever
|
|
70
70
|
|
|
71
|
-
fuzzy_term =
|
|
71
|
+
fuzzy_term = query.fuzzy_term
|
|
72
72
|
|
|
73
|
-
# If vector_query exists but embedding generation failed, fall back to fuzzy search with full query
|
|
74
|
-
if query_embedding is None and
|
|
75
|
-
fuzzy_term =
|
|
73
|
+
# If vector_query exists but embedding generation failed, fall back to fuzzy search with full query text
|
|
74
|
+
if query_embedding is None and query.vector_query is not None and query.query_text is not None:
|
|
75
|
+
fuzzy_term = query.query_text
|
|
76
76
|
|
|
77
77
|
# Select retriever based on available search criteria
|
|
78
78
|
if query_embedding is not None and fuzzy_term is not None:
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: orchestrator-core
|
|
3
|
-
Version: 4.6.
|
|
3
|
+
Version: 4.6.3
|
|
4
4
|
Summary: This is the orchestrator workflow engine.
|
|
5
5
|
Author-email: SURF <automation-beheer@surf.nl>
|
|
6
|
-
Requires-Python: >=3.11,<3.
|
|
6
|
+
Requires-Python: >=3.11,<3.15
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-Expression: Apache-2.0
|
|
9
9
|
Classifier: Development Status :: 5 - Production/Stable
|
|
@@ -20,6 +20,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
24
|
Classifier: Programming Language :: Python
|
|
24
25
|
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
25
26
|
Classifier: Topic :: Internet :: WWW/HTTP
|
|
@@ -30,40 +31,40 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
30
31
|
Classifier: Topic :: Software Development
|
|
31
32
|
Classifier: Typing :: Typed
|
|
32
33
|
License-File: LICENSE
|
|
33
|
-
Requires-Dist: alembic==1.
|
|
34
|
+
Requires-Dist: alembic==1.17.2
|
|
34
35
|
Requires-Dist: anyio>=3.7.0
|
|
35
36
|
Requires-Dist: apscheduler>=3.11.0
|
|
36
37
|
Requires-Dist: click==8.*
|
|
37
38
|
Requires-Dist: deepmerge==2.0
|
|
38
39
|
Requires-Dist: deprecated>=1.2.18
|
|
39
|
-
Requires-Dist: fastapi~=0.
|
|
40
|
+
Requires-Dist: fastapi~=0.121.1
|
|
40
41
|
Requires-Dist: fastapi-etag==0.4.0
|
|
41
42
|
Requires-Dist: itsdangerous>=2.2.0
|
|
42
43
|
Requires-Dist: jinja2==3.1.6
|
|
43
|
-
Requires-Dist: more-itertools~=10.
|
|
44
|
+
Requires-Dist: more-itertools~=10.8.0
|
|
44
45
|
Requires-Dist: nwa-stdlib~=1.10.3
|
|
45
46
|
Requires-Dist: oauth2-lib>=2.5.0
|
|
46
|
-
Requires-Dist: orjson==3.
|
|
47
|
+
Requires-Dist: orjson==3.11.4
|
|
47
48
|
Requires-Dist: pgvector>=0.4.1
|
|
48
|
-
Requires-Dist: prometheus-client==0.
|
|
49
|
-
Requires-Dist: psycopg2-binary==2.9.
|
|
49
|
+
Requires-Dist: prometheus-client==0.23.1
|
|
50
|
+
Requires-Dist: psycopg2-binary==2.9.11
|
|
50
51
|
Requires-Dist: pydantic-forms>=1.4.0
|
|
51
|
-
Requires-Dist: pydantic-settings~=2.
|
|
52
|
-
Requires-Dist: pydantic[email]~=2.
|
|
53
|
-
Requires-Dist: python-dateutil==2.
|
|
54
|
-
Requires-Dist: python-rapidjson>=1.
|
|
52
|
+
Requires-Dist: pydantic-settings~=2.12.0
|
|
53
|
+
Requires-Dist: pydantic[email]~=2.12.4
|
|
54
|
+
Requires-Dist: python-dateutil==2.9.0.post0
|
|
55
|
+
Requires-Dist: python-rapidjson>=1.22,<1.23
|
|
55
56
|
Requires-Dist: pytz==2025.2
|
|
56
57
|
Requires-Dist: redis==5.1.1
|
|
57
58
|
Requires-Dist: semver==3.0.4
|
|
58
59
|
Requires-Dist: sentry-sdk[fastapi]>=2.29.1
|
|
59
|
-
Requires-Dist: sqlalchemy==2.0.
|
|
60
|
+
Requires-Dist: sqlalchemy==2.0.44
|
|
60
61
|
Requires-Dist: sqlalchemy-utils==0.41.2
|
|
61
|
-
Requires-Dist: strawberry-graphql>=0.281.0
|
|
62
|
+
Requires-Dist: strawberry-graphql>=0.281.0,<0.285.0
|
|
62
63
|
Requires-Dist: structlog>=25.4.0
|
|
63
64
|
Requires-Dist: tabulate==0.9.0
|
|
64
65
|
Requires-Dist: typer==0.15.4
|
|
65
66
|
Requires-Dist: uvicorn[standard]~=0.34.0
|
|
66
|
-
Requires-Dist: pydantic-ai-slim >=1.
|
|
67
|
+
Requires-Dist: pydantic-ai-slim >=1.9.0 ; extra == "agent"
|
|
67
68
|
Requires-Dist: ag-ui-protocol>=0.1.8 ; extra == "agent"
|
|
68
69
|
Requires-Dist: litellm>=1.75.7 ; extra == "agent"
|
|
69
70
|
Requires-Dist: celery~=5.5.1 ; extra == "celery"
|