orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +17 -2
- orchestrator/agentic_app.py +103 -0
- orchestrator/api/api_v1/api.py +14 -2
- orchestrator/api/api_v1/endpoints/processes.py +2 -0
- orchestrator/api/api_v1/endpoints/search.py +296 -0
- orchestrator/app.py +32 -0
- orchestrator/cli/main.py +22 -1
- orchestrator/cli/search/__init__.py +32 -0
- orchestrator/cli/search/index_llm.py +73 -0
- orchestrator/cli/search/resize_embedding.py +135 -0
- orchestrator/cli/search/search_explore.py +208 -0
- orchestrator/cli/search/speedtest.py +151 -0
- orchestrator/db/models.py +37 -1
- orchestrator/devtools/populator.py +16 -0
- orchestrator/domain/base.py +2 -7
- orchestrator/domain/lifecycle.py +24 -7
- orchestrator/llm_settings.py +57 -0
- orchestrator/log_config.py +1 -0
- orchestrator/migrations/helpers.py +7 -1
- orchestrator/schemas/search.py +130 -0
- orchestrator/schemas/workflow.py +1 -0
- orchestrator/search/__init__.py +12 -0
- orchestrator/search/agent/__init__.py +21 -0
- orchestrator/search/agent/agent.py +62 -0
- orchestrator/search/agent/prompts.py +100 -0
- orchestrator/search/agent/state.py +21 -0
- orchestrator/search/agent/tools.py +258 -0
- orchestrator/search/core/__init__.py +12 -0
- orchestrator/search/core/embedding.py +73 -0
- orchestrator/search/core/exceptions.py +36 -0
- orchestrator/search/core/types.py +296 -0
- orchestrator/search/core/validators.py +40 -0
- orchestrator/search/docs/index.md +37 -0
- orchestrator/search/docs/running_local_text_embedding_inference.md +46 -0
- orchestrator/search/filters/__init__.py +40 -0
- orchestrator/search/filters/base.py +295 -0
- orchestrator/search/filters/date_filters.py +88 -0
- orchestrator/search/filters/definitions.py +107 -0
- orchestrator/search/filters/ltree_filters.py +56 -0
- orchestrator/search/filters/numeric_filter.py +73 -0
- orchestrator/search/indexing/__init__.py +16 -0
- orchestrator/search/indexing/indexer.py +334 -0
- orchestrator/search/indexing/registry.py +101 -0
- orchestrator/search/indexing/tasks.py +69 -0
- orchestrator/search/indexing/traverse.py +334 -0
- orchestrator/search/llm_migration.py +108 -0
- orchestrator/search/retrieval/__init__.py +16 -0
- orchestrator/search/retrieval/builder.py +123 -0
- orchestrator/search/retrieval/engine.py +154 -0
- orchestrator/search/retrieval/exceptions.py +90 -0
- orchestrator/search/retrieval/pagination.py +96 -0
- orchestrator/search/retrieval/retrievers/__init__.py +26 -0
- orchestrator/search/retrieval/retrievers/base.py +123 -0
- orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
- orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
- orchestrator/search/retrieval/retrievers/semantic.py +94 -0
- orchestrator/search/retrieval/retrievers/structured.py +39 -0
- orchestrator/search/retrieval/utils.py +120 -0
- orchestrator/search/retrieval/validation.py +152 -0
- orchestrator/search/schemas/__init__.py +12 -0
- orchestrator/search/schemas/parameters.py +129 -0
- orchestrator/search/schemas/results.py +77 -0
- orchestrator/services/processes.py +2 -1
- orchestrator/services/settings_env_variables.py +2 -2
- orchestrator/settings.py +8 -1
- orchestrator/utils/state.py +6 -1
- orchestrator/workflows/steps.py +15 -1
- orchestrator/workflows/tasks/validate_products.py +1 -1
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/METADATA +15 -8
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/RECORD +72 -22
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
from pydantic import Field, field_validator
|
|
14
|
+
from pydantic_settings import BaseSettings
|
|
15
|
+
from structlog import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LLMSettings(BaseSettings):
|
|
21
|
+
# Feature flags for LLM functionality
|
|
22
|
+
SEARCH_ENABLED: bool = False # Enable search/indexing with embeddings
|
|
23
|
+
AGENT_ENABLED: bool = False # Enable agentic functionality
|
|
24
|
+
|
|
25
|
+
# Pydantic-ai Agent settings
|
|
26
|
+
AGENT_MODEL: str = "gpt-4o-mini" # See pydantic-ai docs for supported models.
|
|
27
|
+
AGENT_MODEL_VERSION: str = "2025-01-01-preview"
|
|
28
|
+
OPENAI_API_KEY: str = "" # Change per provider (Azure, etc).
|
|
29
|
+
# Embedding settings
|
|
30
|
+
EMBEDDING_DIMENSION: int = 1536
|
|
31
|
+
EMBEDDING_MODEL: str = "openai/text-embedding-3-small" # See litellm docs for supported models.
|
|
32
|
+
EMBEDDING_SAFE_MARGIN_PERCENT: float = Field(
|
|
33
|
+
0.1, description="Safety margin as a percentage (e.g., 0.1 for 10%) for token budgeting.", ge=0, le=1
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# The following settings are only needed for local models or system constraints.
|
|
37
|
+
# By default, they are set conservative assuming a small model like All-MiniLM-L6-V2.
|
|
38
|
+
OPENAI_BASE_URL: str | None = None
|
|
39
|
+
EMBEDDING_FALLBACK_MAX_TOKENS: int | None = 512
|
|
40
|
+
EMBEDDING_MAX_BATCH_SIZE: int | None = None
|
|
41
|
+
|
|
42
|
+
# General LiteLLM settings
|
|
43
|
+
LLM_MAX_RETRIES: int = 3
|
|
44
|
+
LLM_TIMEOUT: int = 30
|
|
45
|
+
|
|
46
|
+
# Toggle creation of extensions
|
|
47
|
+
LLM_FORCE_EXTENTION_MIGRATION: bool = False
|
|
48
|
+
|
|
49
|
+
@field_validator("EMBEDDING_MODEL")
|
|
50
|
+
def validate_embedding_model_format(cls, v: str) -> str:
|
|
51
|
+
"""Validate that embedding model is in 'vendor/model' format."""
|
|
52
|
+
if "/" not in v:
|
|
53
|
+
raise ValueError("EMBEDDING_MODEL must be in format 'vendor/model'")
|
|
54
|
+
return v
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
llm_settings = LLMSettings()
|
orchestrator/log_config.py
CHANGED
|
@@ -155,7 +155,7 @@ def create_workflow(conn: sa.engine.Connection, workflow: dict) -> None:
|
|
|
155
155
|
conn: DB connection as available in migration main file.
|
|
156
156
|
workflow: Dict with data for a new workflow.
|
|
157
157
|
name: Name of the workflow.
|
|
158
|
-
target: Target of the workflow ("CREATE", "MODIFY", "TERMINATE", "SYSTEM")
|
|
158
|
+
target: Target of the workflow ("CREATE", "MODIFY", "RECONCILE", "TERMINATE", "SYSTEM")
|
|
159
159
|
description: Description of the workflow.
|
|
160
160
|
product_type: Product type to add the workflow to.
|
|
161
161
|
|
|
@@ -166,12 +166,16 @@ def create_workflow(conn: sa.engine.Connection, workflow: dict) -> None:
|
|
|
166
166
|
"is_task": False,
|
|
167
167
|
"description": "workflow description",
|
|
168
168
|
"product_type": "product_type",
|
|
169
|
+
"product_tag": "product_tag",
|
|
169
170
|
}
|
|
170
171
|
>>> create_workflow(conn, workflow)
|
|
171
172
|
"""
|
|
172
173
|
if not workflow.get("is_task", False):
|
|
173
174
|
workflow["is_task"] = False
|
|
174
175
|
|
|
176
|
+
if not workflow.get("product_tag"):
|
|
177
|
+
workflow["product_tag"] = None
|
|
178
|
+
|
|
175
179
|
if has_table_column(table_name="workflows", column_name="is_task", conn=conn):
|
|
176
180
|
query = """
|
|
177
181
|
WITH new_workflow AS (
|
|
@@ -186,6 +190,7 @@ def create_workflow(conn: sa.engine.Connection, workflow: dict) -> None:
|
|
|
186
190
|
FROM products AS p
|
|
187
191
|
CROSS JOIN new_workflow AS nw
|
|
188
192
|
WHERE p.product_type = :product_type
|
|
193
|
+
AND (:product_tag IS NULL OR p.tag = :product_tag)
|
|
189
194
|
ON CONFLICT DO NOTHING
|
|
190
195
|
"""
|
|
191
196
|
else:
|
|
@@ -203,6 +208,7 @@ def create_workflow(conn: sa.engine.Connection, workflow: dict) -> None:
|
|
|
203
208
|
FROM products AS p
|
|
204
209
|
CROSS JOIN new_workflow AS nw
|
|
205
210
|
WHERE p.product_type = :product_type
|
|
211
|
+
AND (:product_tag IS NULL OR p.tag = :product_tag)
|
|
206
212
|
ON CONFLICT DO NOTHING
|
|
207
213
|
"""
|
|
208
214
|
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from typing import Any, Generic, TypeVar
|
|
16
|
+
from uuid import UUID
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
19
|
+
|
|
20
|
+
from orchestrator.search.core.types import SearchMetadata
|
|
21
|
+
from orchestrator.search.schemas.results import ComponentInfo, LeafInfo, MatchingField
|
|
22
|
+
|
|
23
|
+
T = TypeVar("T")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PageInfoSchema(BaseModel):
|
|
27
|
+
has_next_page: bool = False
|
|
28
|
+
next_page_cursor: str | None = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ProductSchema(BaseModel):
|
|
32
|
+
model_config = ConfigDict(from_attributes=True)
|
|
33
|
+
|
|
34
|
+
name: str
|
|
35
|
+
tag: str
|
|
36
|
+
product_type: str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SubscriptionSearchResult(BaseModel):
|
|
40
|
+
score: float
|
|
41
|
+
perfect_match: int
|
|
42
|
+
matching_field: MatchingField | None = None
|
|
43
|
+
subscription: dict[str, Any]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SearchResultsSchema(BaseModel, Generic[T]):
|
|
47
|
+
data: list[T] = Field(default_factory=list)
|
|
48
|
+
page_info: PageInfoSchema = Field(default_factory=PageInfoSchema)
|
|
49
|
+
search_metadata: SearchMetadata | None = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class WorkflowProductSchema(BaseModel):
|
|
53
|
+
"""Product associated with a workflow."""
|
|
54
|
+
|
|
55
|
+
model_config = ConfigDict(from_attributes=True)
|
|
56
|
+
|
|
57
|
+
product_type: str
|
|
58
|
+
product_id: UUID
|
|
59
|
+
name: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class WorkflowSearchSchema(BaseModel):
|
|
63
|
+
"""Schema for workflow search results."""
|
|
64
|
+
|
|
65
|
+
model_config = ConfigDict(from_attributes=True)
|
|
66
|
+
|
|
67
|
+
name: str
|
|
68
|
+
products: list[WorkflowProductSchema]
|
|
69
|
+
description: str | None = None
|
|
70
|
+
created_at: datetime | None = None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class ProductSearchSchema(BaseModel):
|
|
74
|
+
"""Schema for product search results."""
|
|
75
|
+
|
|
76
|
+
model_config = ConfigDict(from_attributes=True)
|
|
77
|
+
|
|
78
|
+
product_id: UUID
|
|
79
|
+
name: str
|
|
80
|
+
product_type: str
|
|
81
|
+
tag: str | None = None
|
|
82
|
+
description: str | None = None
|
|
83
|
+
status: str | None = None
|
|
84
|
+
created_at: datetime | None = None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class ProcessSearchSchema(BaseModel):
|
|
88
|
+
"""Schema for process search results."""
|
|
89
|
+
|
|
90
|
+
model_config = ConfigDict(from_attributes=True)
|
|
91
|
+
|
|
92
|
+
process_id: UUID
|
|
93
|
+
workflow_name: str
|
|
94
|
+
workflow_id: UUID
|
|
95
|
+
last_status: str
|
|
96
|
+
is_task: bool
|
|
97
|
+
created_by: str | None = None
|
|
98
|
+
started_at: datetime
|
|
99
|
+
last_modified_at: datetime
|
|
100
|
+
last_step: str | None = None
|
|
101
|
+
failed_reason: str | None = None
|
|
102
|
+
subscription_ids: list[UUID] | None = None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class WorkflowSearchResult(BaseModel):
|
|
106
|
+
score: float
|
|
107
|
+
perfect_match: int
|
|
108
|
+
matching_field: MatchingField | None = None
|
|
109
|
+
workflow: WorkflowSearchSchema
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class ProductSearchResult(BaseModel):
|
|
113
|
+
score: float
|
|
114
|
+
perfect_match: int
|
|
115
|
+
matching_field: MatchingField | None = None
|
|
116
|
+
product: ProductSearchSchema
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class ProcessSearchResult(BaseModel):
|
|
120
|
+
score: float
|
|
121
|
+
perfect_match: int
|
|
122
|
+
matching_field: MatchingField | None = None
|
|
123
|
+
process: ProcessSearchSchema
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class PathsResponse(BaseModel):
|
|
127
|
+
leaves: list[LeafInfo]
|
|
128
|
+
components: list[ComponentInfo]
|
|
129
|
+
|
|
130
|
+
model_config = ConfigDict(extra="forbid", use_enum_values=True)
|
orchestrator/schemas/workflow.py
CHANGED
|
@@ -60,6 +60,7 @@ class SubscriptionWorkflowListsSchema(OrchestratorBaseModel):
|
|
|
60
60
|
modify: list[WorkflowListItemSchema]
|
|
61
61
|
terminate: list[WorkflowListItemSchema]
|
|
62
62
|
system: list[WorkflowListItemSchema]
|
|
63
|
+
reconcile: list[WorkflowListItemSchema]
|
|
63
64
|
validate_: list[WorkflowListItemSchema] = Field(default_factory=list, alias="validate")
|
|
64
65
|
|
|
65
66
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
# This module requires: pydantic-ai==0.7.0, ag-ui-protocol>=0.1.8
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
from orchestrator.search.agent.agent import build_agent_router
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"build_agent_router",
|
|
21
|
+
]
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import structlog
|
|
17
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
18
|
+
from pydantic_ai.ag_ui import StateDeps, handle_ag_ui_request
|
|
19
|
+
from pydantic_ai.agent import Agent
|
|
20
|
+
from pydantic_ai.models.openai import OpenAIModel
|
|
21
|
+
from pydantic_ai.settings import ModelSettings
|
|
22
|
+
from pydantic_ai.toolsets import FunctionToolset
|
|
23
|
+
from starlette.responses import Response
|
|
24
|
+
|
|
25
|
+
from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
|
|
26
|
+
from orchestrator.search.agent.state import SearchState
|
|
27
|
+
from orchestrator.search.agent.tools import search_toolset
|
|
28
|
+
|
|
29
|
+
logger = structlog.get_logger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def build_agent_router(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> APIRouter:
|
|
33
|
+
router = APIRouter()
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
toolsets = toolsets + [search_toolset] if toolsets else [search_toolset]
|
|
37
|
+
|
|
38
|
+
agent = Agent(
|
|
39
|
+
model=model,
|
|
40
|
+
deps_type=StateDeps[SearchState],
|
|
41
|
+
model_settings=ModelSettings(
|
|
42
|
+
parallel_tool_calls=False,
|
|
43
|
+
), # https://github.com/pydantic/pydantic-ai/issues/562
|
|
44
|
+
toolsets=toolsets,
|
|
45
|
+
)
|
|
46
|
+
agent.instructions(get_base_instructions)
|
|
47
|
+
agent.instructions(get_dynamic_instructions)
|
|
48
|
+
|
|
49
|
+
@router.post("/")
|
|
50
|
+
async def agent_endpoint(request: Request) -> Response:
|
|
51
|
+
return await handle_ag_ui_request(agent, request, deps=StateDeps(SearchState()))
|
|
52
|
+
|
|
53
|
+
return router
|
|
54
|
+
except Exception as e:
|
|
55
|
+
logger.error("Agent init failed; serving disabled stub.", error=str(e))
|
|
56
|
+
error_msg = f"Agent disabled: {str(e)}"
|
|
57
|
+
|
|
58
|
+
@router.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
|
|
59
|
+
async def _disabled(path: str) -> None:
|
|
60
|
+
raise HTTPException(status_code=503, detail=error_msg)
|
|
61
|
+
|
|
62
|
+
return router
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
from textwrap import dedent
|
|
16
|
+
|
|
17
|
+
import structlog
|
|
18
|
+
from pydantic_ai import RunContext
|
|
19
|
+
from pydantic_ai.ag_ui import StateDeps
|
|
20
|
+
|
|
21
|
+
from orchestrator.search.agent.state import SearchState
|
|
22
|
+
|
|
23
|
+
logger = structlog.get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def get_base_instructions() -> str:
|
|
27
|
+
return dedent(
|
|
28
|
+
"""
|
|
29
|
+
You are an expert assistant designed to find relevant information by building and running database queries.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
### 1. Your Goal and Method
|
|
33
|
+
|
|
34
|
+
Your ultimate goal is to **find information** that answers the user's request.
|
|
35
|
+
|
|
36
|
+
To do this, you will perform either a broad search or a filtered search.
|
|
37
|
+
For **filtered searches**, your primary method is to **construct a valid `FilterTree` object**.
|
|
38
|
+
To do this correctly, you must infer the exact structure, operators, and nesting rules from the Pydantic schema of the `set_filter_tree` tool itself.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
### 2. Information-Gathering Tools
|
|
42
|
+
|
|
43
|
+
**If you determine that a `FilterTree` is needed**, use these tools to gather information first:
|
|
44
|
+
|
|
45
|
+
- **discover_filter_paths(field_names: list[str])**: Use this to discover all valid filter paths for a list of field names in a single call.
|
|
46
|
+
- **get_valid_operators()**: Use this to get the JSON map of all valid operators for each field type.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
### 3. Execution Workflow
|
|
50
|
+
|
|
51
|
+
Follow these steps in strict order:
|
|
52
|
+
|
|
53
|
+
1. **Set Context**: Always begin by calling `set_search_parameters`.
|
|
54
|
+
2. **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
|
|
55
|
+
- **If filters ARE required**, follow these sub-steps:
|
|
56
|
+
a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
|
|
57
|
+
b. **Construct FilterTree**: Build the `FilterTree` object.
|
|
58
|
+
c. **Set Filters**: Call `set_filter_tree`.
|
|
59
|
+
3. **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
|
|
60
|
+
4. **Report**: Answer the users' question directly and summarize when appropiate.
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
### 4. Critical Rules
|
|
64
|
+
|
|
65
|
+
- **NEVER GUESS PATHS IN THE DATABASE**: You *must* verify every filter path by calling `discover_filter_paths` first. If a path does not exist, you may attempt to map the question on an existing paths that are valid and available from `discover_filter_paths`. If you cannot infer a match, inform the user and do not include it in the `FilterTree`.
|
|
66
|
+
- **USE FULL PATHS**: Always use the full, unambiguous path returned by the discovery tool.
|
|
67
|
+
- **MATCH OPERATORS**: Only use operators that are compatible with the field type as confirmed by `get_filter_operators`.
|
|
68
|
+
"""
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> str:
|
|
73
|
+
"""Dynamically provides 'next step' coaching based on the current state."""
|
|
74
|
+
state = ctx.deps.state
|
|
75
|
+
param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
|
|
76
|
+
|
|
77
|
+
next_step_guidance = ""
|
|
78
|
+
if not state.parameters or not state.parameters.get("entity_type"):
|
|
79
|
+
next_step_guidance = (
|
|
80
|
+
"INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
|
|
81
|
+
)
|
|
82
|
+
else:
|
|
83
|
+
next_step_guidance = (
|
|
84
|
+
"INSTRUCTION: Context is set. Now, analyze the user's request. "
|
|
85
|
+
"If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
|
|
86
|
+
"If no specific filters are needed, you can proceed directly to `execute_search`."
|
|
87
|
+
)
|
|
88
|
+
return dedent(
|
|
89
|
+
f"""
|
|
90
|
+
---
|
|
91
|
+
### Current State & Next Action
|
|
92
|
+
|
|
93
|
+
**Current Search Parameters:**
|
|
94
|
+
```json
|
|
95
|
+
{param_state_str}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**{next_step_guidance}**
|
|
99
|
+
"""
|
|
100
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SearchState(BaseModel):
|
|
20
|
+
parameters: dict[str, Any] | None = None
|
|
21
|
+
results: list[dict[str, Any]] = Field(default_factory=list)
|