orchestrator-core 4.5.2__py3-none-any.whl → 4.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/agentic_app.py +3 -23
  3. orchestrator/api/api_v1/api.py +5 -0
  4. orchestrator/api/api_v1/endpoints/agent.py +49 -0
  5. orchestrator/api/api_v1/endpoints/search.py +120 -201
  6. orchestrator/app.py +1 -1
  7. orchestrator/cli/database.py +3 -0
  8. orchestrator/cli/generate.py +11 -4
  9. orchestrator/cli/generator/generator/migration.py +7 -3
  10. orchestrator/cli/main.py +1 -1
  11. orchestrator/cli/scheduler.py +15 -22
  12. orchestrator/cli/search/resize_embedding.py +28 -22
  13. orchestrator/cli/search/speedtest.py +4 -6
  14. orchestrator/db/__init__.py +6 -0
  15. orchestrator/db/models.py +75 -0
  16. orchestrator/llm_settings.py +18 -1
  17. orchestrator/migrations/helpers.py +47 -39
  18. orchestrator/schedules/scheduler.py +32 -15
  19. orchestrator/schedules/validate_products.py +1 -1
  20. orchestrator/schemas/search.py +8 -85
  21. orchestrator/search/agent/__init__.py +2 -2
  22. orchestrator/search/agent/agent.py +26 -30
  23. orchestrator/search/agent/json_patch.py +51 -0
  24. orchestrator/search/agent/prompts.py +35 -9
  25. orchestrator/search/agent/state.py +28 -2
  26. orchestrator/search/agent/tools.py +192 -53
  27. orchestrator/search/core/embedding.py +2 -2
  28. orchestrator/search/core/exceptions.py +6 -0
  29. orchestrator/search/core/types.py +1 -0
  30. orchestrator/search/export.py +199 -0
  31. orchestrator/search/indexing/indexer.py +13 -4
  32. orchestrator/search/indexing/registry.py +14 -1
  33. orchestrator/search/llm_migration.py +55 -0
  34. orchestrator/search/retrieval/__init__.py +3 -2
  35. orchestrator/search/retrieval/builder.py +5 -1
  36. orchestrator/search/retrieval/engine.py +66 -23
  37. orchestrator/search/retrieval/pagination.py +46 -56
  38. orchestrator/search/retrieval/query_state.py +61 -0
  39. orchestrator/search/retrieval/retrievers/base.py +26 -40
  40. orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
  41. orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
  42. orchestrator/search/retrieval/retrievers/semantic.py +9 -8
  43. orchestrator/search/retrieval/retrievers/structured.py +6 -6
  44. orchestrator/search/schemas/parameters.py +17 -13
  45. orchestrator/search/schemas/results.py +4 -1
  46. orchestrator/settings.py +1 -0
  47. orchestrator/utils/auth.py +3 -2
  48. orchestrator/workflow.py +23 -6
  49. orchestrator/workflows/tasks/validate_product_type.py +3 -3
  50. {orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/METADATA +17 -12
  51. {orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/RECORD +53 -49
  52. {orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/WHEEL +0 -0
  53. {orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -11,14 +11,12 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
 
14
- from datetime import datetime
15
- from typing import Any, Generic, TypeVar
16
- from uuid import UUID
14
+ from typing import Generic, TypeVar
17
15
 
18
16
  from pydantic import BaseModel, ConfigDict, Field
19
17
 
20
18
  from orchestrator.search.core.types import SearchMetadata
21
- from orchestrator.search.schemas.results import ComponentInfo, LeafInfo, MatchingField
19
+ from orchestrator.search.schemas.results import ComponentInfo, LeafInfo
22
20
 
23
21
  T = TypeVar("T")
24
22
 
@@ -36,95 +34,20 @@ class ProductSchema(BaseModel):
36
34
  product_type: str
37
35
 
38
36
 
39
- class SubscriptionSearchResult(BaseModel):
40
- score: float
41
- perfect_match: int
42
- matching_field: MatchingField | None = None
43
- subscription: dict[str, Any]
44
-
45
-
46
37
  class SearchResultsSchema(BaseModel, Generic[T]):
47
38
  data: list[T] = Field(default_factory=list)
48
39
  page_info: PageInfoSchema = Field(default_factory=PageInfoSchema)
49
40
  search_metadata: SearchMetadata | None = None
50
41
 
51
42
 
52
- class WorkflowProductSchema(BaseModel):
53
- """Product associated with a workflow."""
54
-
55
- model_config = ConfigDict(from_attributes=True)
56
-
57
- product_type: str
58
- product_id: UUID
59
- name: str
60
-
61
-
62
- class WorkflowSearchSchema(BaseModel):
63
- """Schema for workflow search results."""
64
-
65
- model_config = ConfigDict(from_attributes=True)
66
-
67
- name: str
68
- products: list[WorkflowProductSchema]
69
- description: str | None = None
70
- created_at: datetime | None = None
71
-
72
-
73
- class ProductSearchSchema(BaseModel):
74
- """Schema for product search results."""
75
-
76
- model_config = ConfigDict(from_attributes=True)
77
-
78
- product_id: UUID
79
- name: str
80
- product_type: str
81
- tag: str | None = None
82
- description: str | None = None
83
- status: str | None = None
84
- created_at: datetime | None = None
85
-
86
-
87
- class ProcessSearchSchema(BaseModel):
88
- """Schema for process search results."""
89
-
90
- model_config = ConfigDict(from_attributes=True)
91
-
92
- process_id: UUID
93
- workflow_name: str
94
- workflow_id: UUID
95
- last_status: str
96
- is_task: bool
97
- created_by: str | None = None
98
- started_at: datetime
99
- last_modified_at: datetime
100
- last_step: str | None = None
101
- failed_reason: str | None = None
102
- subscription_ids: list[UUID] | None = None
103
-
104
-
105
- class WorkflowSearchResult(BaseModel):
106
- score: float
107
- perfect_match: int
108
- matching_field: MatchingField | None = None
109
- workflow: WorkflowSearchSchema
110
-
111
-
112
- class ProductSearchResult(BaseModel):
113
- score: float
114
- perfect_match: int
115
- matching_field: MatchingField | None = None
116
- product: ProductSearchSchema
117
-
118
-
119
- class ProcessSearchResult(BaseModel):
120
- score: float
121
- perfect_match: int
122
- matching_field: MatchingField | None = None
123
- process: ProcessSearchSchema
124
-
125
-
126
43
  class PathsResponse(BaseModel):
127
44
  leaves: list[LeafInfo]
128
45
  components: list[ComponentInfo]
129
46
 
130
47
  model_config = ConfigDict(extra="forbid", use_enum_values=True)
48
+
49
+
50
+ class ExportResponse(BaseModel):
51
+ page: list[dict]
52
+
53
+ model_config = ConfigDict(extra="forbid")
@@ -14,8 +14,8 @@
14
14
  # This module requires: pydantic-ai==0.7.0, ag-ui-protocol>=0.1.8
15
15
 
16
16
 
17
- from orchestrator.search.agent.agent import build_agent_router
17
+ from orchestrator.search.agent.agent import build_agent_instance
18
18
 
19
19
  __all__ = [
20
- "build_agent_router",
20
+ "build_agent_instance",
21
21
  ]
@@ -14,13 +14,11 @@
14
14
  from typing import Any
15
15
 
16
16
  import structlog
17
- from fastapi import APIRouter, HTTPException, Request
18
- from pydantic_ai.ag_ui import StateDeps, handle_ag_ui_request
17
+ from pydantic_ai.ag_ui import StateDeps
19
18
  from pydantic_ai.agent import Agent
20
- from pydantic_ai.models.openai import OpenAIModel
19
+ from pydantic_ai.models.openai import OpenAIChatModel
21
20
  from pydantic_ai.settings import ModelSettings
22
21
  from pydantic_ai.toolsets import FunctionToolset
23
- from starlette.responses import Response
24
22
 
25
23
  from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
26
24
  from orchestrator.search.agent.state import SearchState
@@ -29,34 +27,32 @@ from orchestrator.search.agent.tools import search_toolset
29
27
  logger = structlog.get_logger(__name__)
30
28
 
31
29
 
32
- def build_agent_router(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> APIRouter:
33
- router = APIRouter()
30
+ def build_agent_instance(
31
+ model: str | OpenAIChatModel, agent_tools: list[FunctionToolset[Any]] | None = None
32
+ ) -> Agent[StateDeps[SearchState], str]:
33
+ """Build and configure the search agent instance.
34
34
 
35
- try:
36
- toolsets = toolsets + [search_toolset] if toolsets else [search_toolset]
35
+ Args:
36
+ model: The LLM model to use (string or OpenAIChatModel instance)
37
+ agent_tools: Optional list of additional toolsets to include
37
38
 
38
- agent = Agent(
39
- model=model,
40
- deps_type=StateDeps[SearchState],
41
- model_settings=ModelSettings(
42
- parallel_tool_calls=False,
43
- ), # https://github.com/pydantic/pydantic-ai/issues/562
44
- toolsets=toolsets,
45
- )
46
- agent.instructions(get_base_instructions)
47
- agent.instructions(get_dynamic_instructions)
39
+ Returns:
40
+ Configured Agent instance with StateDeps[SearchState] dependencies
48
41
 
49
- @router.post("/")
50
- async def agent_endpoint(request: Request) -> Response:
51
- return await handle_ag_ui_request(agent, request, deps=StateDeps(SearchState()))
42
+ Raises:
43
+ Exception: If agent initialization fails
44
+ """
45
+ toolsets = agent_tools + [search_toolset] if agent_tools else [search_toolset]
52
46
 
53
- return router
54
- except Exception as e:
55
- logger.error("Agent init failed; serving disabled stub.", error=str(e))
56
- error_msg = f"Agent disabled: {str(e)}"
47
+ agent = Agent(
48
+ model=model,
49
+ deps_type=StateDeps[SearchState],
50
+ model_settings=ModelSettings(
51
+ parallel_tool_calls=False,
52
+ ), # https://github.com/pydantic/pydantic-ai/issues/562
53
+ toolsets=toolsets,
54
+ )
55
+ agent.instructions(get_base_instructions)
56
+ agent.instructions(get_dynamic_instructions)
57
57
 
58
- @router.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
59
- async def _disabled(path: str) -> None:
60
- raise HTTPException(status_code=503, detail=error_msg)
61
-
62
- return router
58
+ return agent
@@ -0,0 +1,51 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from typing import Any, Literal
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+
19
+ class JSONPatchOp(BaseModel):
20
+ """A JSON Patch operation (RFC 6902).
21
+
22
+ Docs reference: https://docs.ag-ui.com/concepts/state
23
+ """
24
+
25
+ op: Literal["add", "remove", "replace", "move", "copy", "test"] = Field(
26
+ description="The operation to perform: add, remove, replace, move, copy, or test"
27
+ )
28
+ path: str = Field(description="JSON Pointer (RFC 6901) to the target location")
29
+ value: Any | None = Field(
30
+ default=None,
31
+ description="The value to apply (for add, replace operations)",
32
+ )
33
+ from_: str | None = Field(
34
+ default=None,
35
+ alias="from",
36
+ description="Source path (for move, copy operations)",
37
+ )
38
+
39
+ @classmethod
40
+ def upsert(cls, path: str, value: Any, existed: bool) -> "JSONPatchOp":
41
+ """Create an add or replace operation depending on whether the path existed.
42
+
43
+ Args:
44
+ path: JSON Pointer path to the target location
45
+ value: The value to set
46
+ existed: True if the path already exists (use replace), False otherwise (use add)
47
+
48
+ Returns:
49
+ JSONPatchOp with 'replace' if existed is True, 'add' otherwise
50
+ """
51
+ return cls(op="replace" if existed else "add", path=path, value=value)
@@ -50,14 +50,15 @@ async def get_base_instructions() -> str:
50
50
 
51
51
  Follow these steps in strict order:
52
52
 
53
- 1. **Set Context**: Always begin by calling `set_search_parameters`.
53
+ 1. **Set Context**: If the user is asking for a NEW search, call `start_new_search`.
54
54
  2. **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
55
55
  - **If filters ARE required**, follow these sub-steps:
56
56
  a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
57
57
  b. **Construct FilterTree**: Build the `FilterTree` object.
58
58
  c. **Set Filters**: Call `set_filter_tree`.
59
- 3. **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
60
- 4. **Report**: Answer the users' question directly and summarize when appropiate.
59
+ 3. **Execute**: Call `run_search`. This is done for both filtered and non-filtered searches.
60
+
61
+ After search execution, follow the dynamic instructions based on the current state.
61
62
 
62
63
  ---
63
64
  ### 4. Critical Rules
@@ -73,28 +74,53 @@ async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> s
73
74
  """Dynamically provides 'next step' coaching based on the current state."""
74
75
  state = ctx.deps.state
75
76
  param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
77
+ results_count = state.results_data.total_count if state.results_data else 0
76
78
 
77
- next_step_guidance = ""
78
- if not state.parameters or not state.parameters.get("entity_type"):
79
+ if state.export_data:
80
+ next_step_guidance = (
81
+ "INSTRUCTION: Export has been prepared successfully. "
82
+ "Simply confirm to the user that the export is ready for download. "
83
+ "DO NOT include or mention the download URL - the UI will display it automatically."
84
+ )
85
+ elif not state.parameters or not state.parameters.get("entity_type"):
79
86
  next_step_guidance = (
80
- "INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
87
+ "INSTRUCTION: The search context is not set. Your next action is to call `start_new_search`."
88
+ )
89
+ elif results_count > 0:
90
+ next_step_guidance = dedent(
91
+ f"""
92
+ INSTRUCTION: Search completed successfully.
93
+ Found {results_count} results containing only: entity_id, title, score.
94
+
95
+ Choose your next action based on what the user requested:
96
+ 1. **Broad/generic search** (e.g., 'show me subscriptions'): Confirm search completed and report count. Do nothing else.
97
+ 2. **Question answerable with entity_id/title/score**: Answer directly using the current results.
98
+ 3. **Question requiring other details**: Call `fetch_entity_details` first, then answer with the detailed data.
99
+ 4. **Export request** (phrases like 'export', 'download', 'save as CSV'): Call `prepare_export` directly.
100
+ """
81
101
  )
82
102
  else:
83
103
  next_step_guidance = (
84
104
  "INSTRUCTION: Context is set. Now, analyze the user's request. "
85
105
  "If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
86
- "If no specific filters are needed, you can proceed directly to `execute_search`."
106
+ "If no specific filters are needed, you can proceed directly to `run_search`."
87
107
  )
108
+
88
109
  return dedent(
89
110
  f"""
90
111
  ---
91
- ### Current State & Next Action
112
+ ## CURRENT STATE
92
113
 
93
114
  **Current Search Parameters:**
94
115
  ```json
95
116
  {param_state_str}
96
117
  ```
97
118
 
98
- **{next_step_guidance}**
119
+ **Current Results Count:** {results_count}
120
+
121
+ ---
122
+ ## NEXT ACTION REQUIRED
123
+
124
+ {next_step_guidance}
99
125
  """
100
126
  )
@@ -12,10 +12,36 @@
12
12
  # limitations under the License.
13
13
 
14
14
  from typing import Any
15
+ from uuid import UUID
15
16
 
16
- from pydantic import BaseModel, Field
17
+ from pydantic import BaseModel
18
+
19
+ from orchestrator.search.schemas.results import SearchResult
20
+
21
+
22
+ class ExportData(BaseModel):
23
+ """Export metadata for download."""
24
+
25
+ action: str = "export"
26
+ query_id: str
27
+ download_url: str
28
+ message: str
29
+
30
+
31
+ class SearchResultsData(BaseModel):
32
+ """Search results data for frontend display and agent context."""
33
+
34
+ action: str = "view_results"
35
+ query_id: str
36
+ results_url: str
37
+ total_count: int
38
+ message: str
39
+ results: list[SearchResult] = []
17
40
 
18
41
 
19
42
  class SearchState(BaseModel):
43
+ run_id: UUID | None = None
44
+ query_id: UUID | None = None
20
45
  parameters: dict[str, Any] | None = None
21
- results: list[dict[str, Any]] = Field(default_factory=list)
46
+ results_data: SearchResultsData | None = None
47
+ export_data: ExportData | None = None