orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +26 -2
- orchestrator/agentic_app.py +84 -0
- orchestrator/api/api_v1/api.py +10 -0
- orchestrator/api/api_v1/endpoints/search.py +290 -0
- orchestrator/app.py +32 -0
- orchestrator/cli/index_llm.py +73 -0
- orchestrator/cli/main.py +22 -1
- orchestrator/cli/resize_embedding.py +135 -0
- orchestrator/cli/search_explore.py +208 -0
- orchestrator/cli/speedtest.py +151 -0
- orchestrator/db/models.py +37 -1
- orchestrator/devtools/populator.py +16 -0
- orchestrator/llm_settings.py +51 -0
- orchestrator/log_config.py +1 -0
- orchestrator/migrations/helpers.py +1 -1
- orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
- orchestrator/schemas/search.py +130 -0
- orchestrator/schemas/workflow.py +1 -0
- orchestrator/search/__init__.py +12 -0
- orchestrator/search/agent/__init__.py +21 -0
- orchestrator/search/agent/agent.py +60 -0
- orchestrator/search/agent/prompts.py +100 -0
- orchestrator/search/agent/state.py +21 -0
- orchestrator/search/agent/tools.py +258 -0
- orchestrator/search/core/__init__.py +12 -0
- orchestrator/search/core/embedding.py +73 -0
- orchestrator/search/core/exceptions.py +36 -0
- orchestrator/search/core/types.py +296 -0
- orchestrator/search/core/validators.py +40 -0
- orchestrator/search/docs/index.md +37 -0
- orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
- orchestrator/search/filters/__init__.py +40 -0
- orchestrator/search/filters/base.py +280 -0
- orchestrator/search/filters/date_filters.py +88 -0
- orchestrator/search/filters/definitions.py +107 -0
- orchestrator/search/filters/ltree_filters.py +56 -0
- orchestrator/search/filters/numeric_filter.py +73 -0
- orchestrator/search/indexing/__init__.py +16 -0
- orchestrator/search/indexing/indexer.py +336 -0
- orchestrator/search/indexing/registry.py +101 -0
- orchestrator/search/indexing/tasks.py +66 -0
- orchestrator/search/indexing/traverse.py +334 -0
- orchestrator/search/retrieval/__init__.py +16 -0
- orchestrator/search/retrieval/builder.py +123 -0
- orchestrator/search/retrieval/engine.py +158 -0
- orchestrator/search/retrieval/exceptions.py +90 -0
- orchestrator/search/retrieval/pagination.py +96 -0
- orchestrator/search/retrieval/retrievers/__init__.py +26 -0
- orchestrator/search/retrieval/retrievers/base.py +122 -0
- orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
- orchestrator/search/retrieval/retrievers/hybrid.py +188 -0
- orchestrator/search/retrieval/retrievers/semantic.py +94 -0
- orchestrator/search/retrieval/retrievers/structured.py +39 -0
- orchestrator/search/retrieval/utils.py +120 -0
- orchestrator/search/retrieval/validation.py +152 -0
- orchestrator/search/schemas/__init__.py +12 -0
- orchestrator/search/schemas/parameters.py +129 -0
- orchestrator/search/schemas/results.py +77 -0
- orchestrator/services/settings_env_variables.py +2 -2
- orchestrator/settings.py +1 -1
- orchestrator/workflows/tasks/validate_products.py +1 -1
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/METADATA +9 -4
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/RECORD +65 -16
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Search index model for llm integration.
|
|
2
|
+
|
|
3
|
+
Revision ID: 52b37b5b2714
|
|
4
|
+
Revises: 850dccac3b02
|
|
5
|
+
Create Date: 2025-08-12 22:34:26.694750
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
from alembic import op
|
|
11
|
+
from pgvector.sqlalchemy import Vector
|
|
12
|
+
from sqlalchemy.dialects import postgresql
|
|
13
|
+
from sqlalchemy_utils import LtreeType
|
|
14
|
+
|
|
15
|
+
from orchestrator.search.core.types import FieldType
|
|
16
|
+
|
|
17
|
+
# revision identifiers, used by Alembic.
|
|
18
|
+
revision = "52b37b5b2714"
|
|
19
|
+
down_revision = "850dccac3b02"
|
|
20
|
+
branch_labels = None
|
|
21
|
+
depends_on = None
|
|
22
|
+
|
|
23
|
+
TABLE = "ai_search_index"
|
|
24
|
+
IDX_EMBED_HNSW = "ix_flat_embed_hnsw"
|
|
25
|
+
IDX_PATH_GIST = "ix_flat_path_gist"
|
|
26
|
+
IDX_PATH_BTREE = "ix_flat_path_btree"
|
|
27
|
+
IDX_VALUE_TRGM = "ix_flat_value_trgm"
|
|
28
|
+
IDX_CONTENT_HASH = "idx_ai_search_index_content_hash"
|
|
29
|
+
|
|
30
|
+
TARGET_DIM = 1536
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def upgrade() -> None:
|
|
34
|
+
# Create PostgreSQL extensions
|
|
35
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS ltree;")
|
|
36
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS unaccent;")
|
|
37
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
|
|
38
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS vector;")
|
|
39
|
+
|
|
40
|
+
# Create the ai_search_index table
|
|
41
|
+
op.create_table(
|
|
42
|
+
TABLE,
|
|
43
|
+
sa.Column("entity_type", sa.Text, nullable=False),
|
|
44
|
+
sa.Column("entity_id", postgresql.UUID, nullable=False),
|
|
45
|
+
sa.Column("path", LtreeType, nullable=False),
|
|
46
|
+
sa.Column("value", sa.Text, nullable=False),
|
|
47
|
+
sa.Column("embedding", Vector(TARGET_DIM), nullable=True),
|
|
48
|
+
sa.Column("content_hash", sa.String(64), nullable=False),
|
|
49
|
+
sa.PrimaryKeyConstraint("entity_id", "path", name="pk_ai_search_index"),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
field_type_enum = sa.Enum(*[ft.value for ft in FieldType], name="field_type")
|
|
53
|
+
field_type_enum.create(op.get_bind(), checkfirst=True)
|
|
54
|
+
op.add_column(
|
|
55
|
+
TABLE,
|
|
56
|
+
sa.Column("value_type", field_type_enum, nullable=False, server_default=FieldType.STRING.value),
|
|
57
|
+
)
|
|
58
|
+
op.alter_column(TABLE, "value_type", server_default=None)
|
|
59
|
+
|
|
60
|
+
op.create_index(op.f("ix_ai_search_index_entity_id"), TABLE, ["entity_id"], unique=False)
|
|
61
|
+
op.create_index(IDX_CONTENT_HASH, TABLE, ["content_hash"])
|
|
62
|
+
|
|
63
|
+
op.create_index(
|
|
64
|
+
IDX_PATH_GIST,
|
|
65
|
+
TABLE,
|
|
66
|
+
["path"],
|
|
67
|
+
postgresql_using="GIST",
|
|
68
|
+
postgresql_ops={"path": "gist_ltree_ops"},
|
|
69
|
+
)
|
|
70
|
+
op.create_index(IDX_PATH_BTREE, TABLE, ["path"])
|
|
71
|
+
op.create_index(IDX_VALUE_TRGM, TABLE, ["value"], postgresql_using="GIN", postgresql_ops={"value": "gin_trgm_ops"})
|
|
72
|
+
|
|
73
|
+
op.create_index(
|
|
74
|
+
IDX_EMBED_HNSW,
|
|
75
|
+
TABLE,
|
|
76
|
+
["embedding"],
|
|
77
|
+
postgresql_using="HNSW",
|
|
78
|
+
postgresql_with={"m": 16, "ef_construction": 64},
|
|
79
|
+
postgresql_ops={"embedding": "vector_l2_ops"},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def downgrade() -> None:
|
|
84
|
+
# Drop all indexes
|
|
85
|
+
op.drop_index(IDX_EMBED_HNSW, table_name=TABLE, if_exists=True)
|
|
86
|
+
op.drop_index(IDX_VALUE_TRGM, table_name=TABLE, if_exists=True)
|
|
87
|
+
op.drop_index(IDX_PATH_BTREE, table_name=TABLE, if_exists=True)
|
|
88
|
+
op.drop_index(IDX_PATH_GIST, table_name=TABLE, if_exists=True)
|
|
89
|
+
op.drop_index(IDX_CONTENT_HASH, table_name=TABLE, if_exists=True)
|
|
90
|
+
op.drop_index(op.f("ix_ai_search_index_entity_id"), table_name=TABLE, if_exists=True)
|
|
91
|
+
|
|
92
|
+
# Drop table and enum
|
|
93
|
+
op.drop_table(TABLE, if_exists=True)
|
|
94
|
+
field_type_enum = sa.Enum(name="field_type")
|
|
95
|
+
field_type_enum.drop(op.get_bind(), checkfirst=True)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from typing import Any, Generic, TypeVar
|
|
16
|
+
from uuid import UUID
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
19
|
+
|
|
20
|
+
from orchestrator.search.core.types import SearchMetadata
|
|
21
|
+
from orchestrator.search.schemas.results import ComponentInfo, LeafInfo, MatchingField
|
|
22
|
+
|
|
23
|
+
T = TypeVar("T")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PageInfoSchema(BaseModel):
|
|
27
|
+
has_next_page: bool = False
|
|
28
|
+
next_page_cursor: str | None = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ProductSchema(BaseModel):
|
|
32
|
+
model_config = ConfigDict(from_attributes=True)
|
|
33
|
+
|
|
34
|
+
name: str
|
|
35
|
+
tag: str
|
|
36
|
+
product_type: str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SubscriptionSearchResult(BaseModel):
|
|
40
|
+
score: float
|
|
41
|
+
perfect_match: int
|
|
42
|
+
matching_field: MatchingField | None = None
|
|
43
|
+
subscription: dict[str, Any]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SearchResultsSchema(BaseModel, Generic[T]):
|
|
47
|
+
data: list[T] = Field(default_factory=list)
|
|
48
|
+
page_info: PageInfoSchema = Field(default_factory=PageInfoSchema)
|
|
49
|
+
search_metadata: SearchMetadata | None = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class WorkflowProductSchema(BaseModel):
|
|
53
|
+
"""Product associated with a workflow."""
|
|
54
|
+
|
|
55
|
+
model_config = ConfigDict(from_attributes=True)
|
|
56
|
+
|
|
57
|
+
product_type: str
|
|
58
|
+
product_id: UUID
|
|
59
|
+
name: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class WorkflowSearchSchema(BaseModel):
|
|
63
|
+
"""Schema for workflow search results."""
|
|
64
|
+
|
|
65
|
+
model_config = ConfigDict(from_attributes=True)
|
|
66
|
+
|
|
67
|
+
name: str
|
|
68
|
+
products: list[WorkflowProductSchema]
|
|
69
|
+
description: str | None = None
|
|
70
|
+
created_at: datetime | None = None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class ProductSearchSchema(BaseModel):
|
|
74
|
+
"""Schema for product search results."""
|
|
75
|
+
|
|
76
|
+
model_config = ConfigDict(from_attributes=True)
|
|
77
|
+
|
|
78
|
+
product_id: UUID
|
|
79
|
+
name: str
|
|
80
|
+
product_type: str
|
|
81
|
+
tag: str | None = None
|
|
82
|
+
description: str | None = None
|
|
83
|
+
status: str | None = None
|
|
84
|
+
created_at: datetime | None = None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class ProcessSearchSchema(BaseModel):
|
|
88
|
+
"""Schema for process search results."""
|
|
89
|
+
|
|
90
|
+
model_config = ConfigDict(from_attributes=True)
|
|
91
|
+
|
|
92
|
+
process_id: UUID
|
|
93
|
+
workflow_name: str
|
|
94
|
+
workflow_id: UUID
|
|
95
|
+
last_status: str
|
|
96
|
+
is_task: bool
|
|
97
|
+
created_by: str | None = None
|
|
98
|
+
started_at: datetime
|
|
99
|
+
last_modified_at: datetime
|
|
100
|
+
last_step: str | None = None
|
|
101
|
+
failed_reason: str | None = None
|
|
102
|
+
subscription_ids: list[UUID] | None = None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class WorkflowSearchResult(BaseModel):
|
|
106
|
+
score: float
|
|
107
|
+
perfect_match: int
|
|
108
|
+
matching_field: MatchingField | None = None
|
|
109
|
+
workflow: WorkflowSearchSchema
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class ProductSearchResult(BaseModel):
|
|
113
|
+
score: float
|
|
114
|
+
perfect_match: int
|
|
115
|
+
matching_field: MatchingField | None = None
|
|
116
|
+
product: ProductSearchSchema
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class ProcessSearchResult(BaseModel):
|
|
120
|
+
score: float
|
|
121
|
+
perfect_match: int
|
|
122
|
+
matching_field: MatchingField | None = None
|
|
123
|
+
process: ProcessSearchSchema
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class PathsResponse(BaseModel):
|
|
127
|
+
leaves: list[LeafInfo]
|
|
128
|
+
components: list[ComponentInfo]
|
|
129
|
+
|
|
130
|
+
model_config = ConfigDict(extra="forbid", use_enum_values=True)
|
orchestrator/schemas/workflow.py
CHANGED
|
@@ -60,6 +60,7 @@ class SubscriptionWorkflowListsSchema(OrchestratorBaseModel):
|
|
|
60
60
|
modify: list[WorkflowListItemSchema]
|
|
61
61
|
terminate: list[WorkflowListItemSchema]
|
|
62
62
|
system: list[WorkflowListItemSchema]
|
|
63
|
+
reconcile: list[WorkflowListItemSchema]
|
|
63
64
|
validate_: list[WorkflowListItemSchema] = Field(default_factory=list, alias="validate")
|
|
64
65
|
|
|
65
66
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
# This module requires: pydantic-ai==0.7.0, ag-ui-protocol>=0.1.8
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
from orchestrator.search.agent.agent import build_agent_app
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"build_agent_app",
|
|
21
|
+
]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import structlog
|
|
17
|
+
from fastapi import FastAPI, HTTPException
|
|
18
|
+
from pydantic_ai.ag_ui import StateDeps
|
|
19
|
+
from pydantic_ai.agent import Agent
|
|
20
|
+
from pydantic_ai.models.openai import OpenAIModel
|
|
21
|
+
from pydantic_ai.settings import ModelSettings
|
|
22
|
+
from pydantic_ai.toolsets import FunctionToolset
|
|
23
|
+
from starlette.types import ASGIApp
|
|
24
|
+
|
|
25
|
+
from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
|
|
26
|
+
from orchestrator.search.agent.state import SearchState
|
|
27
|
+
from orchestrator.search.agent.tools import search_toolset
|
|
28
|
+
|
|
29
|
+
logger = structlog.get_logger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _disabled_agent_app(reason: str) -> FastAPI:
|
|
33
|
+
app = FastAPI(title="Agent disabled")
|
|
34
|
+
|
|
35
|
+
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
|
|
36
|
+
async def _disabled(path: str) -> None:
|
|
37
|
+
raise HTTPException(status_code=503, detail=f"Agent disabled: {reason}")
|
|
38
|
+
|
|
39
|
+
return app
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def build_agent_app(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> ASGIApp:
|
|
43
|
+
try:
|
|
44
|
+
toolsets = toolsets + [search_toolset] if toolsets else [search_toolset]
|
|
45
|
+
|
|
46
|
+
agent = Agent(
|
|
47
|
+
model=model,
|
|
48
|
+
deps_type=StateDeps[SearchState],
|
|
49
|
+
model_settings=ModelSettings(
|
|
50
|
+
parallel_tool_calls=False,
|
|
51
|
+
), # https://github.com/pydantic/pydantic-ai/issues/562
|
|
52
|
+
toolsets=toolsets,
|
|
53
|
+
)
|
|
54
|
+
agent.instructions(get_base_instructions)
|
|
55
|
+
agent.instructions(get_dynamic_instructions)
|
|
56
|
+
|
|
57
|
+
return agent.to_ag_ui(deps=StateDeps(SearchState()))
|
|
58
|
+
except Exception as e:
|
|
59
|
+
logger.error("Agent init failed; serving disabled stub.", error=str(e))
|
|
60
|
+
return _disabled_agent_app(str(e))
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
from textwrap import dedent
|
|
16
|
+
|
|
17
|
+
import structlog
|
|
18
|
+
from pydantic_ai import RunContext
|
|
19
|
+
from pydantic_ai.ag_ui import StateDeps
|
|
20
|
+
|
|
21
|
+
from orchestrator.search.agent.state import SearchState
|
|
22
|
+
|
|
23
|
+
logger = structlog.get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def get_base_instructions() -> str:
|
|
27
|
+
return dedent(
|
|
28
|
+
"""
|
|
29
|
+
You are an expert assistant designed to find relevant information by building and running database queries.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
### 1. Your Goal and Method
|
|
33
|
+
|
|
34
|
+
Your ultimate goal is to **find information** that answers the user's request.
|
|
35
|
+
|
|
36
|
+
To do this, you will perform either a broad search or a filtered search.
|
|
37
|
+
For **filtered searches**, your primary method is to **construct a valid `FilterTree` object**.
|
|
38
|
+
To do this correctly, you must infer the exact structure, operators, and nesting rules from the Pydantic schema of the `set_filter_tree` tool itself.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
### 2. Information-Gathering Tools
|
|
42
|
+
|
|
43
|
+
**If you determine that a `FilterTree` is needed**, use these tools to gather information first:
|
|
44
|
+
|
|
45
|
+
- **discover_filter_paths(field_names: list[str])**: Use this to discover all valid filter paths for a list of field names in a single call.
|
|
46
|
+
- **get_valid_operators()**: Use this to get the JSON map of all valid operators for each field type.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
### 3. Execution Workflow
|
|
50
|
+
|
|
51
|
+
Follow these steps in strict order:
|
|
52
|
+
|
|
53
|
+
1. **Set Context**: Always begin by calling `set_search_parameters`.
|
|
54
|
+
2. **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
|
|
55
|
+
- **If filters ARE required**, follow these sub-steps:
|
|
56
|
+
a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
|
|
57
|
+
b. **Construct FilterTree**: Build the `FilterTree` object.
|
|
58
|
+
c. **Set Filters**: Call `set_filter_tree`.
|
|
59
|
+
3. **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
|
|
60
|
+
4. **Report**: Answer the users' question directly and summarize when appropiate.
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
### 4. Critical Rules
|
|
64
|
+
|
|
65
|
+
- **NEVER GUESS PATHS**: You *must* verify every filter path by calling `discover_filter_paths` first. If a path does not exist, you must inform the user and not include it in the `FilterTree`.
|
|
66
|
+
- **USE FULL PATHS**: Always use the full, unambiguous path returned by the discovery tool.
|
|
67
|
+
- **MATCH OPERATORS**: Only use operators that are compatible with the field type as confirmed by `get_filter_operators`.
|
|
68
|
+
"""
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> str:
|
|
73
|
+
"""Dynamically provides 'next step' coaching based on the current state."""
|
|
74
|
+
state = ctx.deps.state
|
|
75
|
+
param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
|
|
76
|
+
|
|
77
|
+
next_step_guidance = ""
|
|
78
|
+
if not state.parameters or not state.parameters.get("entity_type"):
|
|
79
|
+
next_step_guidance = (
|
|
80
|
+
"INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
|
|
81
|
+
)
|
|
82
|
+
else:
|
|
83
|
+
next_step_guidance = (
|
|
84
|
+
"INSTRUCTION: Context is set. Now, analyze the user's request. "
|
|
85
|
+
"If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
|
|
86
|
+
"If no specific filters are needed, you can proceed directly to `execute_search`."
|
|
87
|
+
)
|
|
88
|
+
return dedent(
|
|
89
|
+
f"""
|
|
90
|
+
---
|
|
91
|
+
### Current State & Next Action
|
|
92
|
+
|
|
93
|
+
**Current Search Parameters:**
|
|
94
|
+
```json
|
|
95
|
+
{param_state_str}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**{next_step_guidance}**
|
|
99
|
+
"""
|
|
100
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SearchState(BaseModel):
|
|
20
|
+
parameters: dict[str, Any] | None = None
|
|
21
|
+
results: list[dict[str, Any]] = Field(default_factory=list)
|