orchestrator-core 4.6.1__py3-none-any.whl → 4.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. orchestrator/__init__.py +1 -1
  2. orchestrator/api/api_v1/endpoints/processes.py +4 -1
  3. orchestrator/api/api_v1/endpoints/search.py +44 -34
  4. orchestrator/{search/retrieval/utils.py → cli/search/display.py} +4 -29
  5. orchestrator/cli/search/search_explore.py +22 -24
  6. orchestrator/cli/search/speedtest.py +11 -9
  7. orchestrator/db/models.py +6 -6
  8. orchestrator/graphql/resolvers/helpers.py +15 -0
  9. orchestrator/graphql/resolvers/process.py +5 -3
  10. orchestrator/graphql/resolvers/product.py +3 -2
  11. orchestrator/graphql/resolvers/product_block.py +3 -2
  12. orchestrator/graphql/resolvers/resource_type.py +3 -2
  13. orchestrator/graphql/resolvers/scheduled_tasks.py +3 -1
  14. orchestrator/graphql/resolvers/settings.py +2 -0
  15. orchestrator/graphql/resolvers/subscription.py +5 -3
  16. orchestrator/graphql/resolvers/version.py +2 -0
  17. orchestrator/graphql/resolvers/workflow.py +3 -2
  18. orchestrator/graphql/schemas/process.py +3 -3
  19. orchestrator/log_config.py +2 -0
  20. orchestrator/schemas/search.py +1 -1
  21. orchestrator/schemas/search_requests.py +59 -0
  22. orchestrator/search/agent/handlers.py +129 -0
  23. orchestrator/search/agent/prompts.py +54 -33
  24. orchestrator/search/agent/state.py +9 -24
  25. orchestrator/search/agent/tools.py +223 -144
  26. orchestrator/search/agent/validation.py +80 -0
  27. orchestrator/search/{schemas → aggregations}/__init__.py +20 -0
  28. orchestrator/search/aggregations/base.py +201 -0
  29. orchestrator/search/core/types.py +3 -2
  30. orchestrator/search/filters/__init__.py +4 -0
  31. orchestrator/search/filters/definitions.py +22 -1
  32. orchestrator/search/filters/numeric_filter.py +3 -3
  33. orchestrator/search/llm_migration.py +2 -1
  34. orchestrator/search/query/__init__.py +90 -0
  35. orchestrator/search/query/builder.py +285 -0
  36. orchestrator/search/query/engine.py +162 -0
  37. orchestrator/search/{retrieval → query}/exceptions.py +38 -7
  38. orchestrator/search/query/mixins.py +95 -0
  39. orchestrator/search/query/queries.py +129 -0
  40. orchestrator/search/query/results.py +252 -0
  41. orchestrator/search/{retrieval/query_state.py → query/state.py} +31 -11
  42. orchestrator/search/{retrieval → query}/validation.py +58 -1
  43. orchestrator/search/retrieval/__init__.py +0 -5
  44. orchestrator/search/retrieval/pagination.py +7 -8
  45. orchestrator/search/retrieval/retrievers/base.py +9 -9
  46. orchestrator/workflows/translations/en-GB.json +1 -0
  47. {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/METADATA +16 -15
  48. {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/RECORD +51 -45
  49. orchestrator/search/retrieval/builder.py +0 -127
  50. orchestrator/search/retrieval/engine.py +0 -197
  51. orchestrator/search/schemas/parameters.py +0 -133
  52. orchestrator/search/schemas/results.py +0 -80
  53. /orchestrator/search/{export.py → query/export.py} +0 -0
  54. {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/WHEEL +0 -0
  55. {orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/licenses/LICENSE +0 -0
@@ -9,7 +9,7 @@ from orchestrator.db.range.range import apply_range_to_statement
9
9
  from orchestrator.db.sorting import Sort
10
10
  from orchestrator.db.sorting.workflow import sort_workflows, workflow_sort_fields
11
11
  from orchestrator.graphql.pagination import Connection
12
- from orchestrator.graphql.resolvers.helpers import rows_from_statement
12
+ from orchestrator.graphql.resolvers.helpers import make_async, rows_from_statement
13
13
  from orchestrator.graphql.schemas.workflow import Workflow
14
14
  from orchestrator.graphql.types import GraphqlFilter, GraphqlSort, OrchestratorInfo
15
15
  from orchestrator.graphql.utils import create_resolver_error_handler, is_querying_page_data, to_graphql_result_page
@@ -19,7 +19,8 @@ from orchestrator.utils.search_query import create_sqlalchemy_select
19
19
  logger = structlog.get_logger(__name__)
20
20
 
21
21
 
22
- async def resolve_workflows(
22
+ @make_async
23
+ def resolve_workflows(
23
24
  info: OrchestratorInfo,
24
25
  filter_by: list[GraphqlFilter] | None = None,
25
26
  sort_by: list[GraphqlSort] | None = None,
@@ -86,11 +86,11 @@ class ProcessType:
86
86
  oidc_user = await info.context.get_current_user
87
87
  workflow = get_workflow(self.workflow_name)
88
88
  process = load_process(db.session.get(ProcessTable, self.process_id)) # type: ignore[arg-type]
89
- auth_resume, auth_retry = get_auth_callbacks(get_steps_to_evaluate_for_rbac(process), workflow) # type: ignore[arg-type]
89
+ auth_resume, auth_retry = get_auth_callbacks(get_steps_to_evaluate_for_rbac(process), workflow)
90
90
 
91
91
  return FormUserPermissionsType(
92
- retryAllowed=auth_retry and auth_retry(oidc_user), # type: ignore[arg-type]
93
- resumeAllowed=auth_resume and auth_resume(oidc_user), # type: ignore[arg-type]
92
+ retryAllowed=bool(auth_retry and auth_retry(oidc_user)),
93
+ resumeAllowed=bool(auth_resume and auth_resume(oidc_user)),
94
94
  )
95
95
 
96
96
  @authenticated_field(description="Returns list of subscriptions of the process") # type: ignore
@@ -41,9 +41,11 @@ LOGGER_OVERRIDES = dict(
41
41
  [
42
42
  logger_config("asyncio"),
43
43
  logger_config("httpcore"),
44
+ logger_config("openai", default_level="WARNING"),
44
45
  logger_config("orchestrator.graphql.autoregistration"),
45
46
  logger_config("sqlalchemy.engine", default_level="WARNING"),
46
47
  logger_config("uvicorn"),
47
48
  logger_config("LiteLLM", default_level="WARNING"),
49
+ logger_config("pydantic_ai", default_level="DEBUG"),
48
50
  ]
49
51
  )
@@ -16,7 +16,7 @@ from typing import Generic, TypeVar
16
16
  from pydantic import BaseModel, ConfigDict, Field
17
17
 
18
18
  from orchestrator.search.core.types import SearchMetadata
19
- from orchestrator.search.schemas.results import ComponentInfo, LeafInfo
19
+ from orchestrator.search.query.builder import ComponentInfo, LeafInfo
20
20
 
21
21
  T = TypeVar("T")
22
22
 
@@ -0,0 +1,59 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+
15
+ from pydantic import BaseModel, ConfigDict, Field
16
+
17
+ from orchestrator.search.core.types import EntityType
18
+ from orchestrator.search.filters import FilterTree
19
+ from orchestrator.search.query.queries import SelectQuery
20
+
21
+
22
+ class SearchRequest(BaseModel):
23
+ """API request model for search operations.
24
+
25
+ Only supports SELECT action, used by search endpoints.
26
+ """
27
+
28
+ filters: FilterTree | None = Field(
29
+ default=None,
30
+ description="Structured filters to apply to the search.",
31
+ )
32
+ query: str | None = Field(
33
+ default=None,
34
+ description="Text search query for semantic/fuzzy search.",
35
+ )
36
+ limit: int = Field(
37
+ default=SelectQuery.DEFAULT_LIMIT,
38
+ ge=SelectQuery.MIN_LIMIT,
39
+ le=SelectQuery.MAX_LIMIT,
40
+ description="Maximum number of search results to return.",
41
+ )
42
+
43
+ model_config = ConfigDict(extra="forbid")
44
+
45
+ def to_query(self, entity_type: EntityType) -> SelectQuery:
46
+ """Convert API request to SelectQuery domain model.
47
+
48
+ Args:
49
+ entity_type: The entity type to search (provided by endpoint)
50
+
51
+ Returns:
52
+ SelectQuery for search operation
53
+ """
54
+ return SelectQuery(
55
+ entity_type=entity_type,
56
+ filters=self.filters,
57
+ query_text=self.query,
58
+ limit=self.limit,
59
+ )
@@ -0,0 +1,129 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ """Handlers for search and aggregation execution with persistence."""
15
+
16
+ from uuid import UUID
17
+
18
+ import structlog
19
+
20
+ from orchestrator.db import AgentRunTable, SearchQueryTable
21
+ from orchestrator.db.database import WrappedSession
22
+ from orchestrator.search.query import engine
23
+ from orchestrator.search.query.queries import AggregateQuery, CountQuery, SelectQuery
24
+ from orchestrator.search.query.results import AggregationResponse, SearchResponse
25
+ from orchestrator.search.query.state import QueryState
26
+
27
+ logger = structlog.get_logger(__name__)
28
+
29
+
30
+ async def execute_search_with_persistence(
31
+ query: SelectQuery,
32
+ db_session: WrappedSession,
33
+ run_id: UUID | None,
34
+ ) -> tuple[SearchResponse, UUID, UUID]:
35
+ """Execute search, persist to DB, return response and IDs.
36
+
37
+ Args:
38
+ query: SelectQuery for search operation
39
+ db_session: Database session
40
+ run_id: Existing run ID or None to create new one
41
+
42
+ Returns:
43
+ Tuple of (search_response, run_id, query_id)
44
+ """
45
+ # Create agent run
46
+ if not run_id:
47
+ agent_run = AgentRunTable(agent_type="search")
48
+ db_session.add(agent_run)
49
+ db_session.commit()
50
+ db_session.expire_all()
51
+ run_id = agent_run.run_id
52
+ logger.debug("Created new agent run", run_id=str(run_id))
53
+
54
+ if run_id is None:
55
+ raise ValueError("run_id should not be None here")
56
+
57
+ # Execute search
58
+ search_response = await engine.execute_search(query, db_session)
59
+
60
+ # Save to database
61
+ query_embedding = search_response.query_embedding
62
+ query_state = QueryState(query=query, query_embedding=query_embedding)
63
+ query_number = db_session.query(SearchQueryTable).filter_by(run_id=run_id).count() + 1
64
+ search_query = SearchQueryTable.from_state(
65
+ state=query_state,
66
+ run_id=run_id,
67
+ query_number=query_number,
68
+ )
69
+ db_session.add(search_query)
70
+ db_session.commit()
71
+ db_session.expire_all()
72
+
73
+ logger.debug("Saved search query", query_id=str(search_query.query_id), query_number=query_number)
74
+
75
+ logger.debug(
76
+ "Search results",
77
+ results=[r.model_dump() for r in search_response.results],
78
+ total_count=len(search_response.results),
79
+ search_type=search_response.metadata.search_type,
80
+ )
81
+
82
+ return search_response, run_id, search_query.query_id
83
+
84
+
85
+ async def execute_aggregation_with_persistence(
86
+ query: CountQuery | AggregateQuery,
87
+ db_session: WrappedSession,
88
+ run_id: UUID | None,
89
+ ) -> tuple[AggregationResponse, UUID, UUID]:
90
+ """Execute aggregation, persist to DB, return response and IDs.
91
+
92
+ Args:
93
+ query: CountQuery or AggregateQuery for aggregation operations
94
+ db_session: Database session
95
+ run_id: Existing run ID or None to create new one
96
+
97
+ Returns:
98
+ Tuple of (aggregation_response, run_id, query_id)
99
+ """
100
+ # Create agent run if needed
101
+ if not run_id:
102
+ agent_run = AgentRunTable(agent_type="search")
103
+ db_session.add(agent_run)
104
+ db_session.commit()
105
+ db_session.expire_all()
106
+ run_id = agent_run.run_id
107
+ logger.debug("Created new agent run", run_id=str(run_id))
108
+
109
+ if run_id is None:
110
+ raise ValueError("run_id should not be None here")
111
+
112
+ # Execute aggregation
113
+ aggregation_response = await engine.execute_aggregation(query, db_session)
114
+
115
+ # Save to database
116
+ query_state = QueryState(query=query, query_embedding=None)
117
+ query_number = db_session.query(SearchQueryTable).filter_by(run_id=run_id).count() + 1
118
+ search_query = SearchQueryTable.from_state(
119
+ state=query_state,
120
+ run_id=run_id,
121
+ query_number=query_number,
122
+ )
123
+ db_session.add(search_query)
124
+ db_session.commit()
125
+ db_session.expire_all()
126
+
127
+ logger.debug("Saved aggregation query", query_id=str(search_query.query_id), query_number=query_number)
128
+
129
+ return aggregation_response, run_id, search_query.query_id
@@ -19,6 +19,7 @@ from pydantic_ai import RunContext
19
19
  from pydantic_ai.ag_ui import StateDeps
20
20
 
21
21
  from orchestrator.search.agent.state import SearchState
22
+ from orchestrator.search.core.types import ActionType
22
23
 
23
24
  logger = structlog.get_logger(__name__)
24
25
 
@@ -33,7 +34,6 @@ async def get_base_instructions() -> str:
33
34
 
34
35
  Your ultimate goal is to **find information** that answers the user's request.
35
36
 
36
- To do this, you will perform either a broad search or a filtered search.
37
37
  For **filtered searches**, your primary method is to **construct a valid `FilterTree` object**.
38
38
  To do this correctly, you must infer the exact structure, operators, and nesting rules from the Pydantic schema of the `set_filter_tree` tool itself.
39
39
 
@@ -48,15 +48,19 @@ async def get_base_instructions() -> str:
48
48
  ---
49
49
  ### 3. Execution Workflow
50
50
 
51
- Follow these steps in strict order:
51
+ Follow these steps:
52
52
 
53
- 1. **Set Context**: If the user is asking for a NEW search, call `start_new_search`.
54
- 2. **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
55
- - **If filters ARE required**, follow these sub-steps:
56
- a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
57
- b. **Construct FilterTree**: Build the `FilterTree` object.
58
- c. **Set Filters**: Call `set_filter_tree`.
59
- 3. **Execute**: Call `run_search`. This is done for both filtered and non-filtered searches.
53
+ 1. **Set Context**: Call `start_new_search` with appropriate entity_type and action
54
+ 2. **Set Filters** (if needed): Discover paths, build FilterTree, call `set_filter_tree`
55
+ - IMPORTANT: Temporal constraints like "in 2025", "in January", "between X and Y" require filters on datetime fields
56
+ - Filters restrict WHICH records to include; grouping controls HOW to aggregate them
57
+ 3. **Set Grouping/Aggregations** (for COUNT/AGGREGATE):
58
+ - For temporal grouping (per month, per year, per day, etc.): Use `set_temporal_grouping`
59
+ - For regular grouping (by status, by name, etc.): Use `set_grouping`
60
+ - For aggregations: Use `set_aggregations`
61
+ 4. **Execute**:
62
+ - For SELECT action: Call `run_search()`
63
+ - For COUNT/AGGREGATE actions: Call `run_aggregation()`
60
64
 
61
65
  After search execution, follow the dynamic instructions based on the current state.
62
66
 
@@ -73,31 +77,46 @@ async def get_base_instructions() -> str:
73
77
  async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> str:
74
78
  """Dynamically provides 'next step' coaching based on the current state."""
75
79
  state = ctx.deps.state
76
- param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
77
- results_count = state.results_data.total_count if state.results_data else 0
80
+ query_state_str = json.dumps(state.query.model_dump(), indent=2, default=str) if state.query else "Not set."
81
+ results_count = state.results_count or 0
82
+ action = state.action or ActionType.SELECT
78
83
 
79
- if state.export_data:
84
+ if not state.query:
80
85
  next_step_guidance = (
81
- "INSTRUCTION: Export has been prepared successfully. "
82
- "Simply confirm to the user that the export is ready for download. "
83
- "DO NOT include or mention the download URL - the UI will display it automatically."
84
- )
85
- elif not state.parameters or not state.parameters.get("entity_type"):
86
- next_step_guidance = (
87
- "INSTRUCTION: The search context is not set. Your next action is to call `start_new_search`."
86
+ f"INSTRUCTION: The search context is not set. Your next action is to call `start_new_search`. "
87
+ f"For counting or aggregation queries, set action='{ActionType.COUNT.value}' or action='{ActionType.AGGREGATE.value}'."
88
88
  )
89
89
  elif results_count > 0:
90
- next_step_guidance = dedent(
91
- f"""
92
- INSTRUCTION: Search completed successfully.
93
- Found {results_count} results containing only: entity_id, title, score.
94
-
95
- Choose your next action based on what the user requested:
96
- 1. **Broad/generic search** (e.g., 'show me subscriptions'): Confirm search completed and report count. Do nothing else.
97
- 2. **Question answerable with entity_id/title/score**: Answer directly using the current results.
98
- 3. **Question requiring other details**: Call `fetch_entity_details` first, then answer with the detailed data.
99
- 4. **Export request** (phrases like 'export', 'download', 'save as CSV'): Call `prepare_export` directly.
100
- """
90
+ if action in (ActionType.COUNT, ActionType.AGGREGATE):
91
+ # Aggregation completed
92
+ next_step_guidance = (
93
+ "INSTRUCTION: Aggregation completed successfully. "
94
+ "The results are already displayed in the UI. "
95
+ "Simply confirm completion to the user in a brief sentence. "
96
+ "DO NOT repeat, summarize, or restate the aggregation data."
97
+ )
98
+ else:
99
+ # Search completed
100
+ next_step_guidance = dedent(
101
+ f"""
102
+ INSTRUCTION: Search completed successfully.
103
+ Found {results_count} results containing only: entity_id, title, score.
104
+
105
+ Choose your next action based on what the user requested:
106
+ 1. **Broad/generic search** (e.g., 'show me subscriptions'): Confirm search completed and report count. Do not repeat the results.
107
+ 2. **Question answerable with entity_id/title/score**: Answer directly using the current results.
108
+ 3. **Question requiring other details**: Call `fetch_entity_details` first, then answer with the detailed data.
109
+ 4. **Export request** (phrases like 'export', 'download', 'save as CSV'): Call `prepare_export` directly. Simply confirm the export is ready. Do not repeat the results.
110
+ """
111
+ )
112
+ elif action in (ActionType.COUNT, ActionType.AGGREGATE):
113
+ # COUNT or AGGREGATE action but no results yet
114
+ next_step_guidance = (
115
+ "INSTRUCTION: Aggregation context is set. "
116
+ "For temporal queries (per month, per year, over time): call `set_temporal_grouping` with datetime field and period. "
117
+ "For regular grouping: call `set_grouping` with paths to group by. "
118
+ f"For {ActionType.AGGREGATE.value.upper()}: call `set_aggregations` with aggregation specs. "
119
+ "Then call `run_aggregation`."
101
120
  )
102
121
  else:
103
122
  next_step_guidance = (
@@ -106,17 +125,19 @@ async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> s
106
125
  "If no specific filters are needed, you can proceed directly to `run_search`."
107
126
  )
108
127
 
128
+ status_summary = f"Results: {results_count}" if results_count > 0 else "No results yet"
129
+
109
130
  return dedent(
110
131
  f"""
111
132
  ---
112
133
  ## CURRENT STATE
113
134
 
114
- **Current Search Parameters:**
135
+ **Current Query:**
115
136
  ```json
116
- {param_state_str}
137
+ {query_state_str}
117
138
  ```
118
139
 
119
- **Current Results Count:** {results_count}
140
+ **Status:** {status_summary}
120
141
 
121
142
  ---
122
143
  ## NEXT ACTION REQUIRED
@@ -11,37 +11,22 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
 
14
- from typing import Any
15
14
  from uuid import UUID
16
15
 
17
16
  from pydantic import BaseModel
18
17
 
19
- from orchestrator.search.schemas.results import SearchResult
18
+ from orchestrator.search.core.types import ActionType
19
+ from orchestrator.search.query.queries import Query
20
20
 
21
21
 
22
- class ExportData(BaseModel):
23
- """Export metadata for download."""
24
-
25
- action: str = "export"
26
- query_id: str
27
- download_url: str
28
- message: str
29
-
30
-
31
- class SearchResultsData(BaseModel):
32
- """Search results data for frontend display and agent context."""
33
-
34
- action: str = "view_results"
35
- query_id: str
36
- results_url: str
37
- total_count: int
38
- message: str
39
- results: list[SearchResult] = []
22
+ class SearchState(BaseModel):
23
+ """Agent state for search operations.
40
24
 
25
+ Tracks the current search context and execution status.
26
+ """
41
27
 
42
- class SearchState(BaseModel):
43
28
  run_id: UUID | None = None
44
29
  query_id: UUID | None = None
45
- parameters: dict[str, Any] | None = None
46
- results_data: SearchResultsData | None = None
47
- export_data: ExportData | None = None
30
+ action: ActionType | None = None
31
+ query: Query | None = None
32
+ results_count: int | None = None # Number of results from last executed search/aggregation