orchestrator-core 4.6.1__py3-none-any.whl → 4.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. orchestrator/__init__.py +1 -1
  2. orchestrator/api/api_v1/endpoints/processes.py +4 -1
  3. orchestrator/api/api_v1/endpoints/search.py +44 -34
  4. orchestrator/{search/retrieval/utils.py → cli/search/display.py} +4 -29
  5. orchestrator/cli/search/search_explore.py +22 -24
  6. orchestrator/cli/search/speedtest.py +11 -9
  7. orchestrator/db/models.py +6 -6
  8. orchestrator/graphql/resolvers/helpers.py +15 -0
  9. orchestrator/graphql/resolvers/process.py +5 -3
  10. orchestrator/graphql/resolvers/product.py +3 -2
  11. orchestrator/graphql/resolvers/product_block.py +3 -2
  12. orchestrator/graphql/resolvers/resource_type.py +3 -2
  13. orchestrator/graphql/resolvers/scheduled_tasks.py +3 -1
  14. orchestrator/graphql/resolvers/settings.py +2 -0
  15. orchestrator/graphql/resolvers/subscription.py +5 -3
  16. orchestrator/graphql/resolvers/version.py +2 -0
  17. orchestrator/graphql/resolvers/workflow.py +3 -2
  18. orchestrator/graphql/schemas/process.py +3 -3
  19. orchestrator/log_config.py +2 -0
  20. orchestrator/schemas/search.py +1 -1
  21. orchestrator/schemas/search_requests.py +59 -0
  22. orchestrator/search/agent/handlers.py +129 -0
  23. orchestrator/search/agent/prompts.py +54 -33
  24. orchestrator/search/agent/state.py +9 -24
  25. orchestrator/search/agent/tools.py +223 -144
  26. orchestrator/search/agent/validation.py +80 -0
  27. orchestrator/search/{schemas → aggregations}/__init__.py +20 -0
  28. orchestrator/search/aggregations/base.py +201 -0
  29. orchestrator/search/core/types.py +3 -2
  30. orchestrator/search/filters/__init__.py +4 -0
  31. orchestrator/search/filters/definitions.py +22 -1
  32. orchestrator/search/filters/numeric_filter.py +3 -3
  33. orchestrator/search/llm_migration.py +2 -1
  34. orchestrator/search/query/__init__.py +90 -0
  35. orchestrator/search/query/builder.py +285 -0
  36. orchestrator/search/query/engine.py +162 -0
  37. orchestrator/search/{retrieval → query}/exceptions.py +38 -7
  38. orchestrator/search/query/mixins.py +95 -0
  39. orchestrator/search/query/queries.py +129 -0
  40. orchestrator/search/query/results.py +252 -0
  41. orchestrator/search/{retrieval/query_state.py → query/state.py} +31 -11
  42. orchestrator/search/{retrieval → query}/validation.py +58 -1
  43. orchestrator/search/retrieval/__init__.py +0 -5
  44. orchestrator/search/retrieval/pagination.py +7 -8
  45. orchestrator/search/retrieval/retrievers/base.py +9 -9
  46. orchestrator/workflows/translations/en-GB.json +1 -0
  47. {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/METADATA +16 -15
  48. {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/RECORD +51 -45
  49. orchestrator/search/retrieval/builder.py +0 -127
  50. orchestrator/search/retrieval/engine.py +0 -197
  51. orchestrator/search/schemas/parameters.py +0 -133
  52. orchestrator/search/schemas/results.py +0 -80
  53. /orchestrator/search/{export.py → query/export.py} +0 -0
  54. {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/WHEEL +0 -0
  55. {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,162 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ import structlog
15
+ from sqlalchemy.orm import Session
16
+
17
+ from orchestrator.search.core.embedding import QueryEmbedder
18
+ from orchestrator.search.core.types import SearchMetadata
19
+ from orchestrator.search.query.results import (
20
+ AggregationResponse,
21
+ SearchResponse,
22
+ format_aggregation_response,
23
+ format_search_response,
24
+ )
25
+ from orchestrator.search.retrieval.pagination import PageCursor
26
+ from orchestrator.search.retrieval.retrievers import Retriever
27
+
28
+ from .builder import build_aggregation_query, build_candidate_query, build_simple_count_query
29
+ from .export import fetch_export_data
30
+ from .queries import AggregateQuery, CountQuery, ExportQuery, SelectQuery
31
+
32
+ logger = structlog.get_logger(__name__)
33
+
34
+
35
+ async def _execute_search(
36
+ query: SelectQuery | ExportQuery,
37
+ db_session: Session,
38
+ limit: int,
39
+ cursor: PageCursor | None = None,
40
+ query_embedding: list[float] | None = None,
41
+ ) -> SearchResponse:
42
+ """Internal implementation to execute search with specified query.
43
+
44
+ Args:
45
+ query: The SELECT or EXPORT query with vector, fuzzy, or filter criteria.
46
+ db_session: The active SQLAlchemy session for executing the query.
47
+ limit: Maximum number of results to return.
48
+ cursor: Optional pagination cursor.
49
+ query_embedding: Optional pre-computed query embedding to use instead of generating a new one.
50
+
51
+ Returns:
52
+ SearchResponse with results and embedding (for internal use).
53
+ """
54
+ if not query.vector_query and not query.filters and not query.fuzzy_term:
55
+ logger.warning("No search criteria provided (vector_query, fuzzy_term, or filters).")
56
+ return SearchResponse(results=[], metadata=SearchMetadata.empty())
57
+
58
+ candidate_query = build_candidate_query(query)
59
+
60
+ if query.vector_query and not query_embedding:
61
+ query_embedding = await QueryEmbedder.generate_for_text_async(query.vector_query)
62
+
63
+ retriever = Retriever.route(query, cursor, query_embedding)
64
+ logger.debug("Using retriever", retriever_type=retriever.__class__.__name__)
65
+
66
+ final_stmt = retriever.apply(candidate_query)
67
+ final_stmt = final_stmt.limit(limit)
68
+ logger.debug(final_stmt)
69
+ result = db_session.execute(final_stmt).mappings().all()
70
+
71
+ response = format_search_response(result, query, retriever.metadata)
72
+ # Store embedding in response for agent to save to DB
73
+ response.query_embedding = query_embedding
74
+ return response
75
+
76
+
77
+ async def execute_search(
78
+ query: SelectQuery,
79
+ db_session: Session,
80
+ cursor: PageCursor | None = None,
81
+ query_embedding: list[float] | None = None,
82
+ ) -> SearchResponse:
83
+ """Execute a SELECT search query.
84
+
85
+ This executes a SELECT action search using vector/fuzzy/filter search with ranking.
86
+
87
+ Args:
88
+ query: SelectQuery with search criteria
89
+ db_session: Database session
90
+ cursor: Optional pagination cursor
91
+ query_embedding: Optional pre-computed embedding
92
+
93
+ Returns:
94
+ SearchResponse with ranked results
95
+ """
96
+
97
+ # Fetch one extra to determine if there is a next page
98
+ fetch_limit = query.limit + 1 if query.limit > 0 else query.limit
99
+ response = await _execute_search(query, db_session, fetch_limit, cursor, query_embedding)
100
+ has_more = len(response.results) > query.limit and query.limit > 0
101
+
102
+ # Trim to requested limit
103
+ response.results = response.results[: query.limit]
104
+ response.has_more = has_more
105
+
106
+ return response
107
+
108
+
109
+ async def execute_export(
110
+ query: ExportQuery,
111
+ db_session: Session,
112
+ query_embedding: list[float] | None = None,
113
+ ) -> list[dict]:
114
+ """Execute a search and export flattened entity data.
115
+
116
+ Args:
117
+ query: ExportQuery with search criteria
118
+ db_session: Database session
119
+ query_embedding: Optional pre-computed embedding
120
+
121
+ Returns:
122
+ List of flattened entity records suitable for export.
123
+ """
124
+ search_response = await _execute_search(
125
+ query=query,
126
+ db_session=db_session,
127
+ limit=query.limit,
128
+ query_embedding=query_embedding,
129
+ )
130
+
131
+ entity_ids = [res.entity_id for res in search_response.results]
132
+ return fetch_export_data(query.entity_type, entity_ids)
133
+
134
+
135
+ async def execute_aggregation(
136
+ query: CountQuery | AggregateQuery,
137
+ db_session: Session,
138
+ ) -> AggregationResponse:
139
+ """Execute aggregation query and return formatted results.
140
+
141
+ Args:
142
+ query: CountQuery or AggregateQuery
143
+ db_session: Database session
144
+
145
+ Returns:
146
+ AggregationResponse with results and metadata
147
+ """
148
+ candidate_query = build_candidate_query(query)
149
+
150
+ if isinstance(query, CountQuery) and not query.group_by and not query.temporal_group_by:
151
+ # Simple count without grouping
152
+ agg_query = build_simple_count_query(candidate_query)
153
+ group_column_names: list[str] = []
154
+ else:
155
+ # Grouped aggregation - needs pivoting
156
+ agg_query, group_column_names = build_aggregation_query(query, candidate_query)
157
+
158
+ logger.debug("Executing aggregation query", sql=str(agg_query))
159
+
160
+ result_rows = db_session.execute(agg_query).mappings().all()
161
+
162
+ return format_aggregation_response(result_rows, group_column_names, query)
@@ -14,13 +14,13 @@
14
14
  from orchestrator.search.core.types import FilterOp
15
15
 
16
16
 
17
- class FilterValidationError(Exception):
18
- """Base exception for filter validation errors."""
17
+ class QueryValidationError(Exception):
18
+ """Base exception for all query validation errors."""
19
19
 
20
20
  pass
21
21
 
22
22
 
23
- class InvalidLtreePatternError(FilterValidationError):
23
+ class InvalidLtreePatternError(QueryValidationError):
24
24
  """Raised when an ltree pattern has invalid ltree query syntax."""
25
25
 
26
26
  def __init__(self, pattern: str) -> None:
@@ -28,7 +28,7 @@ class InvalidLtreePatternError(FilterValidationError):
28
28
  super().__init__(message)
29
29
 
30
30
 
31
- class EmptyFilterPathError(FilterValidationError):
31
+ class EmptyFilterPathError(QueryValidationError):
32
32
  """Raised when a filter path is empty or contains only whitespace."""
33
33
 
34
34
  def __init__(self) -> None:
@@ -38,7 +38,7 @@ class EmptyFilterPathError(FilterValidationError):
38
38
  super().__init__(message)
39
39
 
40
40
 
41
- class PathNotFoundError(FilterValidationError):
41
+ class PathNotFoundError(QueryValidationError):
42
42
  """Raised when a filter path doesn't exist in the database schema.
43
43
 
44
44
  Examples:
@@ -53,7 +53,7 @@ class PathNotFoundError(FilterValidationError):
53
53
  super().__init__(message)
54
54
 
55
55
 
56
- class IncompatibleFilterTypeError(FilterValidationError):
56
+ class IncompatibleFilterTypeError(QueryValidationError):
57
57
  """Raised when a filter operator is incompatible with the field's data type.
58
58
 
59
59
  Examples:
@@ -75,7 +75,7 @@ class IncompatibleFilterTypeError(FilterValidationError):
75
75
  super().__init__(message)
76
76
 
77
77
 
78
- class InvalidEntityPrefixError(FilterValidationError):
78
+ class InvalidEntityPrefixError(QueryValidationError):
79
79
  """Raised when a filter path doesn't have the correct entity type prefix.
80
80
 
81
81
  Examples:
@@ -88,3 +88,34 @@ class InvalidEntityPrefixError(FilterValidationError):
88
88
  def __init__(self, path: str, expected_prefix: str, entity_type: str) -> None:
89
89
  message = f"Filter path '{path}' must start with '{expected_prefix}' for {entity_type} searches, or use '*' for wildcard paths."
90
90
  super().__init__(message)
91
+
92
+
93
+ class IncompatibleAggregationTypeError(QueryValidationError):
94
+ """Raised when an aggregation function is incompatible with the field's data type.
95
+
96
+ Examples:
97
+ Using SUM on a string field:
98
+
99
+ >>> print(IncompatibleAggregationTypeError('sum', 'string', 'subscription.name', ['integer', 'float']))
100
+ Aggregation 'sum' requires numeric fields (integer, float), but 'subscription.name' has type 'string'.
101
+ """
102
+
103
+ def __init__(self, agg_type: str, field_type: str, path: str, expected_types: list[str]) -> None:
104
+ expected_types_str = ", ".join(expected_types)
105
+ message = f"Aggregation '{agg_type}' requires numeric fields ({expected_types_str}), but '{path}' has type '{field_type}'."
106
+ super().__init__(message)
107
+
108
+
109
+ class IncompatibleTemporalGroupingTypeError(QueryValidationError):
110
+ """Raised when temporal grouping is used on a non-datetime field.
111
+
112
+ Examples:
113
+ Using temporal grouping on a string field:
114
+
115
+ >>> print(IncompatibleTemporalGroupingTypeError('subscription.name', 'string'))
116
+ Temporal grouping requires a datetime field, but 'subscription.name' has type 'string'.
117
+ """
118
+
119
+ def __init__(self, path: str, field_type: str) -> None:
120
+ message = f"Temporal grouping requires a datetime field, but '{path}' has type '{field_type}'."
121
+ super().__init__(message)
@@ -0,0 +1,95 @@
1
+ import uuid
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from orchestrator.search.aggregations import Aggregation, TemporalGrouping
6
+
7
+ __all__ = [
8
+ "SearchMixin",
9
+ "GroupingMixin",
10
+ "AggregationMixin",
11
+ ]
12
+
13
+
14
+ class SearchMixin(BaseModel):
15
+ """Mixin providing text search capability.
16
+
17
+ Provides query text processing and derived properties for vector and fuzzy search.
18
+ """
19
+
20
+ query_text: str | None = Field(default=None, description="Text query for semantic/fuzzy search")
21
+
22
+ @property
23
+ def vector_query(self) -> str | None:
24
+ """Extract vector query from query text.
25
+
26
+ Returns None if query_text is empty or is a UUID (UUIDs are not vectorized).
27
+ This matches the original logic from BaseQuery.
28
+ """
29
+ if not self.query_text:
30
+ return None
31
+ try:
32
+ uuid.UUID(self.query_text)
33
+ return None # It's a UUID, disable vector search
34
+ except ValueError:
35
+ return self.query_text
36
+
37
+ @property
38
+ def fuzzy_term(self) -> str | None:
39
+ """Extract fuzzy term from query text.
40
+
41
+ Only single-word queries are used for fuzzy search to avoid
42
+ the trigram operator filtering out too many results.
43
+ This matches the original logic from BaseQuery.
44
+ """
45
+ if not self.query_text:
46
+ return None
47
+ words = self.query_text.split()
48
+ return self.query_text if len(words) == 1 else None
49
+
50
+
51
+ class GroupingMixin(BaseModel):
52
+ """Mixin providing grouping capability.
53
+
54
+ Used by COUNT and AGGREGATE queries for grouping results.
55
+ """
56
+
57
+ group_by: list[str] | None = Field(default=None, description="Field paths to group by")
58
+ temporal_group_by: list[TemporalGrouping] | None = Field(
59
+ default=None,
60
+ description="Temporal grouping specifications (group by month, year, etc.)",
61
+ )
62
+
63
+ def get_pivot_fields(self) -> list[str]:
64
+ """Get all fields needed for EAV pivot from grouping.
65
+
66
+ Returns deduplicated list maintaining insertion order.
67
+ This matches the original logic from BaseQuery.get_pivot_fields().
68
+ """
69
+ fields = list(self.group_by or [])
70
+
71
+ # Collect from temporal groupings
72
+ if self.temporal_group_by:
73
+ for temp_group in self.temporal_group_by:
74
+ fields.extend(temp_group.get_pivot_fields())
75
+
76
+ return list(dict.fromkeys(fields))
77
+
78
+
79
+ class AggregationMixin(BaseModel):
80
+ """Mixin providing aggregation computation capability.
81
+
82
+ Used by AGGREGATE queries to define what statistics to compute.
83
+ """
84
+
85
+ aggregations: list[Aggregation] = Field(description="Aggregations to compute (SUM, AVG, MIN, MAX, COUNT)")
86
+
87
+ def get_aggregation_pivot_fields(self) -> list[str]:
88
+ """Get fields needed for EAV pivot from aggregations.
89
+
90
+ Returns deduplicated list maintaining insertion order.
91
+ """
92
+ fields = []
93
+ for agg in self.aggregations:
94
+ fields.extend(agg.get_pivot_fields())
95
+ return list(dict.fromkeys(fields))
@@ -0,0 +1,129 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from typing import Annotated, Any, ClassVar, Literal, Self, Union
15
+
16
+ from pydantic import BaseModel, ConfigDict, Discriminator, Field
17
+
18
+ from orchestrator.search.core.types import ActionType, EntityType
19
+ from orchestrator.search.filters import FilterTree
20
+
21
+ from .mixins import (
22
+ AggregationMixin,
23
+ GroupingMixin,
24
+ SearchMixin,
25
+ )
26
+
27
+
28
+ class BaseQuery(BaseModel):
29
+ """Base model for all query types.
30
+
31
+ Contains shared constants, properties, and utilities.
32
+ """
33
+
34
+ MIN_LIMIT: ClassVar[int] = 1
35
+ DEFAULT_LIMIT: ClassVar[int] = 10
36
+ MAX_LIMIT: ClassVar[int] = 30
37
+ DEFAULT_EXPORT_LIMIT: ClassVar[int] = 1000
38
+ MAX_EXPORT_LIMIT: ClassVar[int] = 10000
39
+
40
+ _action: ClassVar[ActionType]
41
+
42
+ entity_type: EntityType
43
+ filters: FilterTree | None = Field(default=None, description="Structured filters to apply")
44
+
45
+ model_config = ConfigDict(extra="forbid")
46
+
47
+ @property
48
+ def action(self) -> ActionType:
49
+ return self._action
50
+
51
+ @classmethod
52
+ def from_dict(cls, data: dict[str, Any]) -> Self:
53
+ """Build query from a dictionary.
54
+
55
+ Args:
56
+ data: Dictionary with query parameters
57
+
58
+ Returns:
59
+ Query instance of the appropriate type
60
+ """
61
+ return cls.model_validate(data)
62
+
63
+
64
+ class SelectQuery(BaseQuery, SearchMixin):
65
+ """Query for SELECT operations.
66
+
67
+ Composes BaseQuery with SearchMixin for text search, with strict result limits.
68
+ """
69
+
70
+ query_type: Literal["select"] = "select"
71
+ _action: ClassVar[ActionType] = ActionType.SELECT
72
+
73
+ limit: int = Field(
74
+ default=BaseQuery.DEFAULT_LIMIT,
75
+ ge=BaseQuery.MIN_LIMIT,
76
+ le=BaseQuery.MAX_LIMIT,
77
+ description="Maximum number of search results to return",
78
+ )
79
+
80
+
81
+ class ExportQuery(BaseQuery, SearchMixin):
82
+ """Query for EXPORT operations .
83
+
84
+ Similar to SelectQuery but with higher limits for bulk exports.
85
+ """
86
+
87
+ query_type: Literal["export"] = "export"
88
+ _action: ClassVar[ActionType] = ActionType.SELECT
89
+
90
+ limit: int = Field(
91
+ default=BaseQuery.DEFAULT_EXPORT_LIMIT,
92
+ ge=BaseQuery.MIN_LIMIT,
93
+ le=BaseQuery.MAX_EXPORT_LIMIT,
94
+ description="Maximum number of results to export",
95
+ )
96
+
97
+
98
+ class CountQuery(BaseQuery, GroupingMixin):
99
+ """Query for COUNT operations with optional grouping."""
100
+
101
+ query_type: Literal["count"] = "count"
102
+ _action: ClassVar[ActionType] = ActionType.COUNT
103
+
104
+
105
+ class AggregateQuery(BaseQuery, GroupingMixin, AggregationMixin):
106
+ """Query for AGGREGATE operations.
107
+
108
+ Composes BaseQuery with GroupingMixin and AggregationMixin
109
+ to provide both grouping and aggregation capabilities.
110
+ """
111
+
112
+ query_type: Literal["aggregate"] = "aggregate"
113
+ _action: ClassVar[ActionType] = ActionType.AGGREGATE
114
+
115
+ def get_pivot_fields(self) -> list[str]:
116
+ """Get all fields needed for EAV pivot including aggregation fields."""
117
+ # Get grouping fields from GroupingMixin
118
+ fields = super().get_pivot_fields()
119
+
120
+ # Add aggregation fields
121
+ fields.extend(self.get_aggregation_pivot_fields())
122
+
123
+ return list(dict.fromkeys(fields))
124
+
125
+
126
+ Query = Annotated[
127
+ Union[SelectQuery, ExportQuery, CountQuery, AggregateQuery],
128
+ Discriminator("query_type"),
129
+ ]