orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. orchestrator/__init__.py +26 -2
  2. orchestrator/agentic_app.py +84 -0
  3. orchestrator/api/api_v1/api.py +10 -0
  4. orchestrator/api/api_v1/endpoints/search.py +277 -0
  5. orchestrator/app.py +32 -0
  6. orchestrator/cli/index_llm.py +73 -0
  7. orchestrator/cli/main.py +22 -1
  8. orchestrator/cli/resize_embedding.py +135 -0
  9. orchestrator/cli/search_explore.py +208 -0
  10. orchestrator/cli/speedtest.py +151 -0
  11. orchestrator/db/models.py +37 -1
  12. orchestrator/llm_settings.py +51 -0
  13. orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
  14. orchestrator/schemas/search.py +117 -0
  15. orchestrator/search/__init__.py +12 -0
  16. orchestrator/search/agent/__init__.py +8 -0
  17. orchestrator/search/agent/agent.py +47 -0
  18. orchestrator/search/agent/prompts.py +87 -0
  19. orchestrator/search/agent/state.py +8 -0
  20. orchestrator/search/agent/tools.py +236 -0
  21. orchestrator/search/core/__init__.py +0 -0
  22. orchestrator/search/core/embedding.py +64 -0
  23. orchestrator/search/core/exceptions.py +22 -0
  24. orchestrator/search/core/types.py +281 -0
  25. orchestrator/search/core/validators.py +27 -0
  26. orchestrator/search/docs/index.md +37 -0
  27. orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
  28. orchestrator/search/filters/__init__.py +27 -0
  29. orchestrator/search/filters/base.py +275 -0
  30. orchestrator/search/filters/date_filters.py +75 -0
  31. orchestrator/search/filters/definitions.py +93 -0
  32. orchestrator/search/filters/ltree_filters.py +43 -0
  33. orchestrator/search/filters/numeric_filter.py +60 -0
  34. orchestrator/search/indexing/__init__.py +3 -0
  35. orchestrator/search/indexing/indexer.py +323 -0
  36. orchestrator/search/indexing/registry.py +88 -0
  37. orchestrator/search/indexing/tasks.py +53 -0
  38. orchestrator/search/indexing/traverse.py +322 -0
  39. orchestrator/search/retrieval/__init__.py +3 -0
  40. orchestrator/search/retrieval/builder.py +113 -0
  41. orchestrator/search/retrieval/engine.py +152 -0
  42. orchestrator/search/retrieval/pagination.py +83 -0
  43. orchestrator/search/retrieval/retriever.py +447 -0
  44. orchestrator/search/retrieval/utils.py +106 -0
  45. orchestrator/search/retrieval/validation.py +174 -0
  46. orchestrator/search/schemas/__init__.py +0 -0
  47. orchestrator/search/schemas/parameters.py +116 -0
  48. orchestrator/search/schemas/results.py +64 -0
  49. orchestrator/services/settings_env_variables.py +2 -2
  50. orchestrator/settings.py +1 -1
  51. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a2.dist-info}/METADATA +8 -3
  52. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a2.dist-info}/RECORD +54 -11
  53. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a2.dist-info}/WHEEL +0 -0
  54. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,95 @@
1
+ """Search index model for llm integration.
2
+
3
+ Revision ID: 52b37b5b2714
4
+ Revises: 850dccac3b02
5
+ Create Date: 2025-08-12 22:34:26.694750
6
+
7
+ """
8
+
9
+ import sqlalchemy as sa
10
+ from alembic import op
11
+ from pgvector.sqlalchemy import Vector
12
+ from sqlalchemy.dialects import postgresql
13
+ from sqlalchemy_utils import LtreeType
14
+
15
+ from orchestrator.search.core.types import FieldType
16
+
17
+ # revision identifiers, used by Alembic.
18
+ revision = "52b37b5b2714"
19
+ down_revision = "850dccac3b02"
20
+ branch_labels = None
21
+ depends_on = None
22
+
23
+ TABLE = "ai_search_index"
24
+ IDX_EMBED_HNSW = "ix_flat_embed_hnsw"
25
+ IDX_PATH_GIST = "ix_flat_path_gist"
26
+ IDX_PATH_BTREE = "ix_flat_path_btree"
27
+ IDX_VALUE_TRGM = "ix_flat_value_trgm"
28
+ IDX_CONTENT_HASH = "idx_ai_search_index_content_hash"
29
+
30
+ TARGET_DIM = 1536
31
+
32
+
33
+ def upgrade() -> None:
34
+ # Create PostgreSQL extensions
35
+ op.execute("CREATE EXTENSION IF NOT EXISTS ltree;")
36
+ op.execute("CREATE EXTENSION IF NOT EXISTS unaccent;")
37
+ op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
38
+ op.execute("CREATE EXTENSION IF NOT EXISTS vector;")
39
+
40
+ # Create the ai_search_index table
41
+ op.create_table(
42
+ TABLE,
43
+ sa.Column("entity_type", sa.Text, nullable=False),
44
+ sa.Column("entity_id", postgresql.UUID, nullable=False),
45
+ sa.Column("path", LtreeType, nullable=False),
46
+ sa.Column("value", sa.Text, nullable=False),
47
+ sa.Column("embedding", Vector(TARGET_DIM), nullable=True),
48
+ sa.Column("content_hash", sa.String(64), nullable=False),
49
+ sa.PrimaryKeyConstraint("entity_id", "path", name="pk_ai_search_index"),
50
+ )
51
+
52
+ field_type_enum = sa.Enum(*[ft.value for ft in FieldType], name="field_type")
53
+ field_type_enum.create(op.get_bind(), checkfirst=True)
54
+ op.add_column(
55
+ TABLE,
56
+ sa.Column("value_type", field_type_enum, nullable=False, server_default=FieldType.STRING.value),
57
+ )
58
+ op.alter_column(TABLE, "value_type", server_default=None)
59
+
60
+ op.create_index(op.f("ix_ai_search_index_entity_id"), TABLE, ["entity_id"], unique=False)
61
+ op.create_index(IDX_CONTENT_HASH, TABLE, ["content_hash"])
62
+
63
+ op.create_index(
64
+ IDX_PATH_GIST,
65
+ TABLE,
66
+ ["path"],
67
+ postgresql_using="GIST",
68
+ postgresql_ops={"path": "gist_ltree_ops"},
69
+ )
70
+ op.create_index(IDX_PATH_BTREE, TABLE, ["path"])
71
+ op.create_index(IDX_VALUE_TRGM, TABLE, ["value"], postgresql_using="GIN", postgresql_ops={"value": "gin_trgm_ops"})
72
+
73
+ op.create_index(
74
+ IDX_EMBED_HNSW,
75
+ TABLE,
76
+ ["embedding"],
77
+ postgresql_using="HNSW",
78
+ postgresql_with={"m": 16, "ef_construction": 64},
79
+ postgresql_ops={"embedding": "vector_l2_ops"},
80
+ )
81
+
82
+
83
+ def downgrade() -> None:
84
+ # Drop all indexes
85
+ op.drop_index(IDX_EMBED_HNSW, table_name=TABLE, if_exists=True)
86
+ op.drop_index(IDX_VALUE_TRGM, table_name=TABLE, if_exists=True)
87
+ op.drop_index(IDX_PATH_BTREE, table_name=TABLE, if_exists=True)
88
+ op.drop_index(IDX_PATH_GIST, table_name=TABLE, if_exists=True)
89
+ op.drop_index(IDX_CONTENT_HASH, table_name=TABLE, if_exists=True)
90
+ op.drop_index(op.f("ix_ai_search_index_entity_id"), table_name=TABLE, if_exists=True)
91
+
92
+ # Drop table and enum
93
+ op.drop_table(TABLE, if_exists=True)
94
+ field_type_enum = sa.Enum(name="field_type")
95
+ field_type_enum.drop(op.get_bind(), checkfirst=True)
@@ -0,0 +1,117 @@
1
+ from datetime import datetime
2
+ from typing import Any, Generic, TypeVar
3
+ from uuid import UUID
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field
6
+
7
+ from orchestrator.search.core.types import SearchMetadata
8
+ from orchestrator.search.schemas.results import ComponentInfo, LeafInfo, MatchingField
9
+
10
+ T = TypeVar("T")
11
+
12
+
13
+ class PageInfoSchema(BaseModel):
14
+ has_next_page: bool = False
15
+ next_page_cursor: str | None = None
16
+
17
+
18
+ class ProductSchema(BaseModel):
19
+ model_config = ConfigDict(from_attributes=True)
20
+
21
+ name: str
22
+ tag: str
23
+ product_type: str
24
+
25
+
26
+ class SubscriptionSearchResult(BaseModel):
27
+ score: float
28
+ perfect_match: int
29
+ matching_field: MatchingField | None = None
30
+ subscription: dict[str, Any]
31
+
32
+
33
+ class SearchResultsSchema(BaseModel, Generic[T]):
34
+ data: list[T] = Field(default_factory=list)
35
+ page_info: PageInfoSchema = Field(default_factory=PageInfoSchema)
36
+ search_metadata: SearchMetadata | None = None
37
+
38
+
39
+ class WorkflowProductSchema(BaseModel):
40
+ """Product associated with a workflow."""
41
+
42
+ model_config = ConfigDict(from_attributes=True)
43
+
44
+ product_type: str
45
+ product_id: UUID
46
+ name: str
47
+
48
+
49
+ class WorkflowSearchSchema(BaseModel):
50
+ """Schema for workflow search results."""
51
+
52
+ model_config = ConfigDict(from_attributes=True)
53
+
54
+ name: str
55
+ products: list[WorkflowProductSchema]
56
+ description: str | None = None
57
+ created_at: datetime | None = None
58
+
59
+
60
+ class ProductSearchSchema(BaseModel):
61
+ """Schema for product search results."""
62
+
63
+ model_config = ConfigDict(from_attributes=True)
64
+
65
+ product_id: UUID
66
+ name: str
67
+ product_type: str
68
+ tag: str | None = None
69
+ description: str | None = None
70
+ status: str | None = None
71
+ created_at: datetime | None = None
72
+
73
+
74
+ class ProcessSearchSchema(BaseModel):
75
+ """Schema for process search results."""
76
+
77
+ model_config = ConfigDict(from_attributes=True)
78
+
79
+ process_id: UUID
80
+ workflow_name: str
81
+ workflow_id: UUID
82
+ last_status: str
83
+ is_task: bool
84
+ created_by: str | None = None
85
+ started_at: datetime
86
+ last_modified_at: datetime
87
+ last_step: str | None = None
88
+ failed_reason: str | None = None
89
+ subscription_ids: list[UUID] | None = None
90
+
91
+
92
+ class WorkflowSearchResult(BaseModel):
93
+ score: float
94
+ perfect_match: int
95
+ matching_field: MatchingField | None = None
96
+ workflow: WorkflowSearchSchema
97
+
98
+
99
+ class ProductSearchResult(BaseModel):
100
+ score: float
101
+ perfect_match: int
102
+ matching_field: MatchingField | None = None
103
+ product: ProductSearchSchema
104
+
105
+
106
+ class ProcessSearchResult(BaseModel):
107
+ score: float
108
+ perfect_match: int
109
+ matching_field: MatchingField | None = None
110
+ process: ProcessSearchSchema
111
+
112
+
113
+ class PathsResponse(BaseModel):
114
+ leaves: list[LeafInfo]
115
+ components: list[ComponentInfo]
116
+
117
+ model_config = ConfigDict(extra="forbid", use_enum_values=True)
@@ -0,0 +1,12 @@
1
+ # Copyright 2019-2025 SURF.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
@@ -0,0 +1,8 @@
1
+ # This module requires: pydantic-ai==0.7.0, ag-ui-protocol>=0.1.8
2
+
3
+
4
+ from orchestrator.search.agent.agent import build_agent_app
5
+
6
+ __all__ = [
7
+ "build_agent_app",
8
+ ]
@@ -0,0 +1,47 @@
1
+ from typing import Any
2
+
3
+ import structlog
4
+ from fastapi import FastAPI, HTTPException
5
+ from pydantic_ai.ag_ui import StateDeps
6
+ from pydantic_ai.agent import Agent
7
+ from pydantic_ai.models.openai import OpenAIModel
8
+ from pydantic_ai.settings import ModelSettings
9
+ from pydantic_ai.toolsets import FunctionToolset
10
+ from starlette.types import ASGIApp
11
+
12
+ from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
13
+ from orchestrator.search.agent.state import SearchState
14
+ from orchestrator.search.agent.tools import search_toolset
15
+
16
+ logger = structlog.get_logger(__name__)
17
+
18
+
19
+ def _disabled_agent_app(reason: str) -> FastAPI:
20
+ app = FastAPI(title="Agent disabled")
21
+
22
+ @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
23
+ async def _disabled(path: str) -> None:
24
+ raise HTTPException(status_code=503, detail=f"Agent disabled: {reason}")
25
+
26
+ return app
27
+
28
+
29
+ def build_agent_app(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> ASGIApp:
30
+ try:
31
+ toolsets = toolsets + [search_toolset] if toolsets else [search_toolset]
32
+
33
+ agent = Agent(
34
+ model=model,
35
+ deps_type=StateDeps[SearchState],
36
+ model_settings=ModelSettings(
37
+ parallel_tool_calls=False,
38
+ ), # https://github.com/pydantic/pydantic-ai/issues/562
39
+ toolsets=toolsets,
40
+ )
41
+ agent.instructions(get_base_instructions)
42
+ agent.instructions(get_dynamic_instructions)
43
+
44
+ return agent.to_ag_ui(deps=StateDeps(SearchState()))
45
+ except Exception as e:
46
+ logger.error("Agent init failed; serving disabled stub.", error=str(e))
47
+ return _disabled_agent_app(str(e))
@@ -0,0 +1,87 @@
1
+ import json
2
+ from textwrap import dedent
3
+
4
+ import structlog
5
+ from pydantic_ai import RunContext
6
+ from pydantic_ai.ag_ui import StateDeps
7
+
8
+ from orchestrator.search.agent.state import SearchState
9
+
10
+ logger = structlog.get_logger(__name__)
11
+
12
+
13
+ async def get_base_instructions() -> str:
14
+ return dedent(
15
+ """
16
+ You are an expert assistant designed to find relevant information by building and running database queries.
17
+
18
+ ---
19
+ ### 1. Your Goal and Method
20
+
21
+ Your ultimate goal is to **find information** that answers the user's request.
22
+
23
+ To do this, you will perform either a broad search or a filtered search.
24
+ For **filtered searches**, your primary method is to **construct a valid `FilterTree` object**.
25
+ To do this correctly, you must infer the exact structure, operators, and nesting rules from the Pydantic schema of the `set_filter_tree` tool itself.
26
+
27
+ ---
28
+ ### 2. Information-Gathering Tools
29
+
30
+ **If you determine that a `FilterTree` is needed**, use these tools to gather information first:
31
+
32
+ - **discover_filter_paths(field_names: list[str])**: Use this to discover all valid filter paths for a list of field names in a single call.
33
+ - **get_valid_operators()**: Use this to get the JSON map of all valid operators for each field type.
34
+
35
+ ---
36
+ ### 3. Execution Workflow
37
+
38
+ Follow these steps in strict order:
39
+
40
+ 1. **Set Context**: Always begin by calling `set_search_parameters`.
41
+ 2. **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
42
+ - **If filters ARE required**, follow these sub-steps:
43
+ a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
44
+ b. **Construct FilterTree**: Build the `FilterTree` object.
45
+ c. **Set Filters**: Call `set_filter_tree`.
46
+ 3. **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
47
+ 4. **Report**: Answer the users' question directly and summarize when appropiate.
48
+
49
+ ---
50
+ ### 4. Critical Rules
51
+
52
+ - **NEVER GUESS PATHS**: You *must* verify every filter path by calling `discover_filter_paths` first. If a path does not exist, you must inform the user and not include it in the `FilterTree`.
53
+ - **USE FULL PATHS**: Always use the full, unambiguous path returned by the discovery tool.
54
+ - **MATCH OPERATORS**: Only use operators that are compatible with the field type as confirmed by `get_filter_operators`.
55
+ """
56
+ )
57
+
58
+
59
+ async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> str:
60
+ """Dynamically provides 'next step' coaching based on the current state."""
61
+ state = ctx.deps.state
62
+ param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
63
+
64
+ next_step_guidance = ""
65
+ if not state.parameters or not state.parameters.get("entity_type"):
66
+ next_step_guidance = (
67
+ "INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
68
+ )
69
+ else:
70
+ next_step_guidance = (
71
+ "INSTRUCTION: Context is set. Now, analyze the user's request. "
72
+ "If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
73
+ "If no specific filters are needed, you can proceed directly to `execute_search`."
74
+ )
75
+ return dedent(
76
+ f"""
77
+ ---
78
+ ### Current State & Next Action
79
+
80
+ **Current Search Parameters:**
81
+ ```json
82
+ {param_state_str}
83
+ ```
84
+
85
+ **{next_step_guidance}**
86
+ """
87
+ )
@@ -0,0 +1,8 @@
1
+ from typing import Any
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class SearchState(BaseModel):
7
+ parameters: dict[str, Any] | None = None
8
+ results: list[dict[str, Any]] = Field(default_factory=list)
@@ -0,0 +1,236 @@
1
+ from collections.abc import Awaitable, Callable
2
+ from typing import Any, TypeVar
3
+
4
+ import structlog
5
+ from ag_ui.core import EventType, StateSnapshotEvent
6
+ from pydantic_ai import RunContext
7
+ from pydantic_ai.ag_ui import StateDeps
8
+ from pydantic_ai.exceptions import ModelRetry
9
+ from pydantic_ai.messages import ModelRequest, UserPromptPart
10
+ from pydantic_ai.toolsets import FunctionToolset
11
+
12
+ from orchestrator.api.api_v1.endpoints.search import (
13
+ get_definitions,
14
+ list_paths,
15
+ search_processes,
16
+ search_products,
17
+ search_subscriptions,
18
+ search_workflows,
19
+ )
20
+ from orchestrator.schemas.search import SearchResultsSchema
21
+ from orchestrator.search.core.types import ActionType, EntityType, FilterOp
22
+ from orchestrator.search.filters import FilterTree
23
+ from orchestrator.search.retrieval.validation import validate_filter_tree
24
+ from orchestrator.search.schemas.parameters import PARAMETER_REGISTRY, BaseSearchParameters
25
+
26
+ from .state import SearchState
27
+
28
+ logger = structlog.get_logger(__name__)
29
+
30
+
31
+ P = TypeVar("P", bound=BaseSearchParameters)
32
+
33
+ SearchFn = Callable[[P], Awaitable[SearchResultsSchema[Any]]]
34
+
35
+ SEARCH_FN_MAP: dict[EntityType, SearchFn] = {
36
+ EntityType.SUBSCRIPTION: search_subscriptions,
37
+ EntityType.WORKFLOW: search_workflows,
38
+ EntityType.PRODUCT: search_products,
39
+ EntityType.PROCESS: search_processes,
40
+ }
41
+
42
+ search_toolset: FunctionToolset[StateDeps[SearchState]] = FunctionToolset(max_retries=1)
43
+
44
+
45
+ def last_user_message(ctx: RunContext[StateDeps[SearchState]]) -> str | None:
46
+ for msg in reversed(ctx.messages):
47
+ if isinstance(msg, ModelRequest):
48
+ for part in msg.parts:
49
+ if isinstance(part, UserPromptPart) and isinstance(part.content, str):
50
+ return part.content
51
+ return None
52
+
53
+
54
+ @search_toolset.tool
55
+ async def set_search_parameters(
56
+ ctx: RunContext[StateDeps[SearchState]],
57
+ entity_type: EntityType,
58
+ action: str | ActionType = ActionType.SELECT,
59
+ ) -> StateSnapshotEvent:
60
+ """Sets the initial search context, like the entity type and the user's query.
61
+
62
+ This MUST be the first tool called to start any new search.
63
+ Warning: Calling this tool will erase any existing filters and search results from the state.
64
+ """
65
+ params = ctx.deps.state.parameters or {}
66
+ is_new_search = params.get("entity_type") != entity_type.value
67
+ final_query = (last_user_message(ctx) or "") if is_new_search else params.get("query", "")
68
+
69
+ logger.debug(
70
+ "Setting search parameters",
71
+ entity_type=entity_type.value,
72
+ action=action,
73
+ is_new_search=is_new_search,
74
+ query=final_query,
75
+ )
76
+
77
+ ctx.deps.state.parameters = {"action": action, "entity_type": entity_type, "filters": None, "query": final_query}
78
+ ctx.deps.state.results = []
79
+ logger.debug("Search parameters set", parameters=ctx.deps.state.parameters)
80
+
81
+ return StateSnapshotEvent(
82
+ type=EventType.STATE_SNAPSHOT,
83
+ snapshot=ctx.deps.state.model_dump(),
84
+ )
85
+
86
+
87
+ @search_toolset.tool(retries=2)
88
+ async def set_filter_tree(
89
+ ctx: RunContext[StateDeps[SearchState]],
90
+ filters: FilterTree | None,
91
+ ) -> StateSnapshotEvent:
92
+ """Replace current filters atomically with a full FilterTree, or clear with None.
93
+
94
+ Requirements:
95
+ - Root/group operators must be 'AND' or 'OR' (uppercase).
96
+ - Provide either PathFilters or nested groups under `children`.
97
+ - See the FilterTree schema examples for the exact shape.
98
+ """
99
+ if ctx.deps.state.parameters is None:
100
+ raise ModelRetry("Search parameters are not initialized. Call set_search_parameters first.")
101
+
102
+ entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
103
+
104
+ logger.debug(
105
+ "Setting filter tree",
106
+ entity_type=entity_type.value,
107
+ has_filters=filters is not None,
108
+ filter_summary=f"{len(filters.get_all_leaves())} filters" if filters else "no filters",
109
+ )
110
+
111
+ try:
112
+ await validate_filter_tree(filters, entity_type)
113
+ except Exception as e:
114
+ # TODO: Define specific filter validation exceptions and catch them instructing what should change.
115
+ raise ModelRetry(str(e))
116
+
117
+ filter_data = None if filters is None else filters.model_dump(mode="json", by_alias=True)
118
+ ctx.deps.state.parameters["filters"] = filter_data
119
+ return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
120
+
121
+
122
+ @search_toolset.tool
123
+ async def execute_search(
124
+ ctx: RunContext[StateDeps[SearchState]],
125
+ limit: int = 5,
126
+ ) -> StateSnapshotEvent:
127
+ """Execute the search with the current parameters."""
128
+ if not ctx.deps.state.parameters:
129
+ raise ValueError("No search parameters set")
130
+
131
+ entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
132
+ param_class = PARAMETER_REGISTRY.get(entity_type)
133
+ if not param_class:
134
+ raise ValueError(f"Unknown entity type: {entity_type}")
135
+
136
+ params = param_class(**ctx.deps.state.parameters)
137
+ logger.debug(
138
+ "Executing database search",
139
+ search_entity_type=entity_type.value,
140
+ limit=limit,
141
+ has_filters=params.filters is not None,
142
+ query=params.query,
143
+ action=params.action,
144
+ )
145
+
146
+ if params.filters:
147
+ logger.debug("Search filters", filters=params.filters)
148
+
149
+ fn = SEARCH_FN_MAP[entity_type]
150
+ search_results = await fn(params)
151
+
152
+ logger.debug(
153
+ "Search completed",
154
+ total_results=len(search_results.data) if search_results.data else 0,
155
+ limited_to=limit,
156
+ )
157
+
158
+ ctx.deps.state.results = search_results.data[:limit]
159
+
160
+ return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
161
+
162
+
163
+ @search_toolset.tool
164
+ async def discover_filter_paths(
165
+ ctx: RunContext[StateDeps[SearchState]],
166
+ field_names: list[str],
167
+ entity_type: EntityType | None = None,
168
+ ) -> dict[str, dict[str, Any]]:
169
+ """Discovers available filter paths for a list of field names.
170
+
171
+ Returns a dictionary where each key is a field_name from the input list and
172
+ the value is its discovery result.
173
+ """
174
+ if not entity_type and ctx.deps.state.parameters:
175
+ entity_type = EntityType(ctx.deps.state.parameters.get("entity_type"))
176
+ if not entity_type:
177
+ entity_type = EntityType.SUBSCRIPTION
178
+
179
+ all_results = {}
180
+ for field_name in field_names:
181
+ paths_response = await list_paths(prefix="", q=field_name, entity_type=entity_type, limit=100)
182
+
183
+ matching_leaves = []
184
+ for leaf in paths_response.leaves:
185
+ if field_name.lower() in leaf.name.lower():
186
+ matching_leaves.append(
187
+ {
188
+ "name": leaf.name,
189
+ "value_kind": leaf.ui_types,
190
+ "paths": leaf.paths,
191
+ }
192
+ )
193
+
194
+ matching_components = []
195
+ for comp in paths_response.components:
196
+ if field_name.lower() in comp.name.lower():
197
+ matching_components.append(
198
+ {
199
+ "name": comp.name,
200
+ "value_kind": comp.ui_types,
201
+ }
202
+ )
203
+
204
+ result_for_field: dict[str, Any]
205
+ if not matching_leaves and not matching_components:
206
+ result_for_field = {
207
+ "status": "NOT_FOUND",
208
+ "guidance": f"No filterable paths found containing '{field_name}'. Do not create a filter for this.",
209
+ "leaves": [],
210
+ "components": [],
211
+ }
212
+ else:
213
+ result_for_field = {
214
+ "status": "OK",
215
+ "guidance": f"Found {len(matching_leaves)} field(s) and {len(matching_components)} component(s) for '{field_name}'.",
216
+ "leaves": matching_leaves,
217
+ "components": matching_components,
218
+ }
219
+
220
+ all_results[field_name] = result_for_field
221
+ logger.debug("Returning found fieldname - path mapping", all_results=all_results)
222
+ return all_results
223
+
224
+
225
+ @search_toolset.tool
226
+ async def get_valid_operators() -> dict[str, list[FilterOp]]:
227
+ """Gets the mapping of field types to their valid filter operators."""
228
+ definitions = await get_definitions()
229
+
230
+ operator_map = {}
231
+ for ui_type, type_def in definitions.items():
232
+ key = ui_type.value
233
+
234
+ if hasattr(type_def, "operators"):
235
+ operator_map[key] = type_def.operators
236
+ return operator_map
File without changes
@@ -0,0 +1,64 @@
1
+ import logging
2
+
3
+ import structlog
4
+ from litellm import aembedding as llm_aembedding
5
+ from litellm import embedding as llm_embedding
6
+ from litellm import exceptions as llm_exc
7
+
8
+ from orchestrator.llm_settings import llm_settings
9
+
10
+ logger = structlog.get_logger(__name__)
11
+
12
+ # Its logging alot of noise such as embedding vectors.
13
+ logging.getLogger("LiteLLM").setLevel(logging.WARNING)
14
+
15
+
16
+ class EmbeddingIndexer:
17
+
18
+ @classmethod
19
+ def get_embeddings_from_api_batch(cls, texts: list[str], dry_run: bool) -> list[list[float]]:
20
+ if not texts:
21
+ return []
22
+ if dry_run:
23
+ logger.debug("Dry Run: returning empty embeddings")
24
+ return [[] for _ in texts]
25
+
26
+ try:
27
+ resp = llm_embedding(
28
+ model=llm_settings.EMBEDDING_MODEL,
29
+ input=[t.lower() for t in texts],
30
+ api_key=llm_settings.OPENAI_API_KEY,
31
+ api_base=llm_settings.OPENAI_BASE_URL,
32
+ timeout=llm_settings.LLM_TIMEOUT,
33
+ max_retries=llm_settings.LLM_MAX_RETRIES,
34
+ )
35
+ data = sorted(resp.data, key=lambda e: e["index"])
36
+ return [row["embedding"] for row in data]
37
+ except (llm_exc.APIError, llm_exc.APIConnectionError, llm_exc.RateLimitError, llm_exc.Timeout) as e:
38
+ logger.error("Embedding request failed", error=str(e))
39
+ return [[] for _ in texts]
40
+ except Exception as e:
41
+ logger.error("Unexpected embedding error", error=str(e))
42
+ return [[] for _ in texts]
43
+
44
+
45
+ class QueryEmbedder:
46
+ """A stateless, async utility for embedding real-time user queries."""
47
+
48
+ @classmethod
49
+ async def generate_for_text_async(cls, text: str) -> list[float]:
50
+ if not text:
51
+ return []
52
+ try:
53
+ resp = await llm_aembedding(
54
+ model=llm_settings.EMBEDDING_MODEL,
55
+ input=[text.lower()],
56
+ api_key=llm_settings.OPENAI_API_KEY,
57
+ api_base=llm_settings.OPENAI_BASE_URL,
58
+ timeout=5.0,
59
+ max_retries=0, # No retries, prioritize speed.
60
+ )
61
+ return resp.data[0]["embedding"]
62
+ except Exception as e:
63
+ logger.error("Async embedding generation failed", error=str(e))
64
+ return []