orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. orchestrator/__init__.py +26 -2
  2. orchestrator/agentic_app.py +84 -0
  3. orchestrator/api/api_v1/api.py +10 -0
  4. orchestrator/api/api_v1/endpoints/search.py +290 -0
  5. orchestrator/app.py +32 -0
  6. orchestrator/cli/index_llm.py +73 -0
  7. orchestrator/cli/main.py +22 -1
  8. orchestrator/cli/resize_embedding.py +135 -0
  9. orchestrator/cli/search_explore.py +208 -0
  10. orchestrator/cli/speedtest.py +151 -0
  11. orchestrator/db/models.py +37 -1
  12. orchestrator/devtools/populator.py +16 -0
  13. orchestrator/llm_settings.py +51 -0
  14. orchestrator/log_config.py +1 -0
  15. orchestrator/migrations/helpers.py +1 -1
  16. orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
  17. orchestrator/schemas/search.py +130 -0
  18. orchestrator/schemas/workflow.py +1 -0
  19. orchestrator/search/__init__.py +12 -0
  20. orchestrator/search/agent/__init__.py +21 -0
  21. orchestrator/search/agent/agent.py +60 -0
  22. orchestrator/search/agent/prompts.py +100 -0
  23. orchestrator/search/agent/state.py +21 -0
  24. orchestrator/search/agent/tools.py +258 -0
  25. orchestrator/search/core/__init__.py +12 -0
  26. orchestrator/search/core/embedding.py +73 -0
  27. orchestrator/search/core/exceptions.py +36 -0
  28. orchestrator/search/core/types.py +296 -0
  29. orchestrator/search/core/validators.py +40 -0
  30. orchestrator/search/docs/index.md +37 -0
  31. orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
  32. orchestrator/search/filters/__init__.py +40 -0
  33. orchestrator/search/filters/base.py +280 -0
  34. orchestrator/search/filters/date_filters.py +88 -0
  35. orchestrator/search/filters/definitions.py +107 -0
  36. orchestrator/search/filters/ltree_filters.py +56 -0
  37. orchestrator/search/filters/numeric_filter.py +73 -0
  38. orchestrator/search/indexing/__init__.py +16 -0
  39. orchestrator/search/indexing/indexer.py +336 -0
  40. orchestrator/search/indexing/registry.py +101 -0
  41. orchestrator/search/indexing/tasks.py +66 -0
  42. orchestrator/search/indexing/traverse.py +334 -0
  43. orchestrator/search/retrieval/__init__.py +16 -0
  44. orchestrator/search/retrieval/builder.py +123 -0
  45. orchestrator/search/retrieval/engine.py +158 -0
  46. orchestrator/search/retrieval/exceptions.py +90 -0
  47. orchestrator/search/retrieval/pagination.py +96 -0
  48. orchestrator/search/retrieval/retrievers/__init__.py +26 -0
  49. orchestrator/search/retrieval/retrievers/base.py +122 -0
  50. orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
  51. orchestrator/search/retrieval/retrievers/hybrid.py +188 -0
  52. orchestrator/search/retrieval/retrievers/semantic.py +94 -0
  53. orchestrator/search/retrieval/retrievers/structured.py +39 -0
  54. orchestrator/search/retrieval/utils.py +120 -0
  55. orchestrator/search/retrieval/validation.py +152 -0
  56. orchestrator/search/schemas/__init__.py +12 -0
  57. orchestrator/search/schemas/parameters.py +129 -0
  58. orchestrator/search/schemas/results.py +77 -0
  59. orchestrator/services/settings_env_variables.py +2 -2
  60. orchestrator/settings.py +1 -1
  61. orchestrator/workflows/tasks/validate_products.py +1 -1
  62. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/METADATA +9 -4
  63. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/RECORD +65 -16
  64. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/WHEEL +0 -0
  65. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,95 @@
1
+ """Search index model for llm integration.
2
+
3
+ Revision ID: 52b37b5b2714
4
+ Revises: 850dccac3b02
5
+ Create Date: 2025-08-12 22:34:26.694750
6
+
7
+ """
8
+
9
+ import sqlalchemy as sa
10
+ from alembic import op
11
+ from pgvector.sqlalchemy import Vector
12
+ from sqlalchemy.dialects import postgresql
13
+ from sqlalchemy_utils import LtreeType
14
+
15
+ from orchestrator.search.core.types import FieldType
16
+
17
+ # revision identifiers, used by Alembic.
18
+ revision = "52b37b5b2714"
19
+ down_revision = "850dccac3b02"
20
+ branch_labels = None
21
+ depends_on = None
22
+
23
+ TABLE = "ai_search_index"
24
+ IDX_EMBED_HNSW = "ix_flat_embed_hnsw"
25
+ IDX_PATH_GIST = "ix_flat_path_gist"
26
+ IDX_PATH_BTREE = "ix_flat_path_btree"
27
+ IDX_VALUE_TRGM = "ix_flat_value_trgm"
28
+ IDX_CONTENT_HASH = "idx_ai_search_index_content_hash"
29
+
30
+ TARGET_DIM = 1536
31
+
32
+
33
+ def upgrade() -> None:
34
+ # Create PostgreSQL extensions
35
+ op.execute("CREATE EXTENSION IF NOT EXISTS ltree;")
36
+ op.execute("CREATE EXTENSION IF NOT EXISTS unaccent;")
37
+ op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
38
+ op.execute("CREATE EXTENSION IF NOT EXISTS vector;")
39
+
40
+ # Create the ai_search_index table
41
+ op.create_table(
42
+ TABLE,
43
+ sa.Column("entity_type", sa.Text, nullable=False),
44
+ sa.Column("entity_id", postgresql.UUID, nullable=False),
45
+ sa.Column("path", LtreeType, nullable=False),
46
+ sa.Column("value", sa.Text, nullable=False),
47
+ sa.Column("embedding", Vector(TARGET_DIM), nullable=True),
48
+ sa.Column("content_hash", sa.String(64), nullable=False),
49
+ sa.PrimaryKeyConstraint("entity_id", "path", name="pk_ai_search_index"),
50
+ )
51
+
52
+ field_type_enum = sa.Enum(*[ft.value for ft in FieldType], name="field_type")
53
+ field_type_enum.create(op.get_bind(), checkfirst=True)
54
+ op.add_column(
55
+ TABLE,
56
+ sa.Column("value_type", field_type_enum, nullable=False, server_default=FieldType.STRING.value),
57
+ )
58
+ op.alter_column(TABLE, "value_type", server_default=None)
59
+
60
+ op.create_index(op.f("ix_ai_search_index_entity_id"), TABLE, ["entity_id"], unique=False)
61
+ op.create_index(IDX_CONTENT_HASH, TABLE, ["content_hash"])
62
+
63
+ op.create_index(
64
+ IDX_PATH_GIST,
65
+ TABLE,
66
+ ["path"],
67
+ postgresql_using="GIST",
68
+ postgresql_ops={"path": "gist_ltree_ops"},
69
+ )
70
+ op.create_index(IDX_PATH_BTREE, TABLE, ["path"])
71
+ op.create_index(IDX_VALUE_TRGM, TABLE, ["value"], postgresql_using="GIN", postgresql_ops={"value": "gin_trgm_ops"})
72
+
73
+ op.create_index(
74
+ IDX_EMBED_HNSW,
75
+ TABLE,
76
+ ["embedding"],
77
+ postgresql_using="HNSW",
78
+ postgresql_with={"m": 16, "ef_construction": 64},
79
+ postgresql_ops={"embedding": "vector_l2_ops"},
80
+ )
81
+
82
+
83
+ def downgrade() -> None:
84
+ # Drop all indexes
85
+ op.drop_index(IDX_EMBED_HNSW, table_name=TABLE, if_exists=True)
86
+ op.drop_index(IDX_VALUE_TRGM, table_name=TABLE, if_exists=True)
87
+ op.drop_index(IDX_PATH_BTREE, table_name=TABLE, if_exists=True)
88
+ op.drop_index(IDX_PATH_GIST, table_name=TABLE, if_exists=True)
89
+ op.drop_index(IDX_CONTENT_HASH, table_name=TABLE, if_exists=True)
90
+ op.drop_index(op.f("ix_ai_search_index_entity_id"), table_name=TABLE, if_exists=True)
91
+
92
+ # Drop table and enum
93
+ op.drop_table(TABLE, if_exists=True)
94
+ field_type_enum = sa.Enum(name="field_type")
95
+ field_type_enum.drop(op.get_bind(), checkfirst=True)
@@ -0,0 +1,130 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from datetime import datetime
15
+ from typing import Any, Generic, TypeVar
16
+ from uuid import UUID
17
+
18
+ from pydantic import BaseModel, ConfigDict, Field
19
+
20
+ from orchestrator.search.core.types import SearchMetadata
21
+ from orchestrator.search.schemas.results import ComponentInfo, LeafInfo, MatchingField
22
+
23
+ T = TypeVar("T")
24
+
25
+
26
+ class PageInfoSchema(BaseModel):
27
+ has_next_page: bool = False
28
+ next_page_cursor: str | None = None
29
+
30
+
31
+ class ProductSchema(BaseModel):
32
+ model_config = ConfigDict(from_attributes=True)
33
+
34
+ name: str
35
+ tag: str
36
+ product_type: str
37
+
38
+
39
+ class SubscriptionSearchResult(BaseModel):
40
+ score: float
41
+ perfect_match: int
42
+ matching_field: MatchingField | None = None
43
+ subscription: dict[str, Any]
44
+
45
+
46
+ class SearchResultsSchema(BaseModel, Generic[T]):
47
+ data: list[T] = Field(default_factory=list)
48
+ page_info: PageInfoSchema = Field(default_factory=PageInfoSchema)
49
+ search_metadata: SearchMetadata | None = None
50
+
51
+
52
+ class WorkflowProductSchema(BaseModel):
53
+ """Product associated with a workflow."""
54
+
55
+ model_config = ConfigDict(from_attributes=True)
56
+
57
+ product_type: str
58
+ product_id: UUID
59
+ name: str
60
+
61
+
62
+ class WorkflowSearchSchema(BaseModel):
63
+ """Schema for workflow search results."""
64
+
65
+ model_config = ConfigDict(from_attributes=True)
66
+
67
+ name: str
68
+ products: list[WorkflowProductSchema]
69
+ description: str | None = None
70
+ created_at: datetime | None = None
71
+
72
+
73
+ class ProductSearchSchema(BaseModel):
74
+ """Schema for product search results."""
75
+
76
+ model_config = ConfigDict(from_attributes=True)
77
+
78
+ product_id: UUID
79
+ name: str
80
+ product_type: str
81
+ tag: str | None = None
82
+ description: str | None = None
83
+ status: str | None = None
84
+ created_at: datetime | None = None
85
+
86
+
87
+ class ProcessSearchSchema(BaseModel):
88
+ """Schema for process search results."""
89
+
90
+ model_config = ConfigDict(from_attributes=True)
91
+
92
+ process_id: UUID
93
+ workflow_name: str
94
+ workflow_id: UUID
95
+ last_status: str
96
+ is_task: bool
97
+ created_by: str | None = None
98
+ started_at: datetime
99
+ last_modified_at: datetime
100
+ last_step: str | None = None
101
+ failed_reason: str | None = None
102
+ subscription_ids: list[UUID] | None = None
103
+
104
+
105
+ class WorkflowSearchResult(BaseModel):
106
+ score: float
107
+ perfect_match: int
108
+ matching_field: MatchingField | None = None
109
+ workflow: WorkflowSearchSchema
110
+
111
+
112
+ class ProductSearchResult(BaseModel):
113
+ score: float
114
+ perfect_match: int
115
+ matching_field: MatchingField | None = None
116
+ product: ProductSearchSchema
117
+
118
+
119
+ class ProcessSearchResult(BaseModel):
120
+ score: float
121
+ perfect_match: int
122
+ matching_field: MatchingField | None = None
123
+ process: ProcessSearchSchema
124
+
125
+
126
+ class PathsResponse(BaseModel):
127
+ leaves: list[LeafInfo]
128
+ components: list[ComponentInfo]
129
+
130
+ model_config = ConfigDict(extra="forbid", use_enum_values=True)
@@ -60,6 +60,7 @@ class SubscriptionWorkflowListsSchema(OrchestratorBaseModel):
60
60
  modify: list[WorkflowListItemSchema]
61
61
  terminate: list[WorkflowListItemSchema]
62
62
  system: list[WorkflowListItemSchema]
63
+ reconcile: list[WorkflowListItemSchema]
63
64
  validate_: list[WorkflowListItemSchema] = Field(default_factory=list, alias="validate")
64
65
 
65
66
 
@@ -0,0 +1,12 @@
1
+ # Copyright 2019-2025 SURF.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
@@ -0,0 +1,21 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ # This module requires: pydantic-ai==0.7.0, ag-ui-protocol>=0.1.8
15
+
16
+
17
+ from orchestrator.search.agent.agent import build_agent_app
18
+
19
+ __all__ = [
20
+ "build_agent_app",
21
+ ]
@@ -0,0 +1,60 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from typing import Any
15
+
16
+ import structlog
17
+ from fastapi import FastAPI, HTTPException
18
+ from pydantic_ai.ag_ui import StateDeps
19
+ from pydantic_ai.agent import Agent
20
+ from pydantic_ai.models.openai import OpenAIModel
21
+ from pydantic_ai.settings import ModelSettings
22
+ from pydantic_ai.toolsets import FunctionToolset
23
+ from starlette.types import ASGIApp
24
+
25
+ from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
26
+ from orchestrator.search.agent.state import SearchState
27
+ from orchestrator.search.agent.tools import search_toolset
28
+
29
+ logger = structlog.get_logger(__name__)
30
+
31
+
32
+ def _disabled_agent_app(reason: str) -> FastAPI:
33
+ app = FastAPI(title="Agent disabled")
34
+
35
+ @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
36
+ async def _disabled(path: str) -> None:
37
+ raise HTTPException(status_code=503, detail=f"Agent disabled: {reason}")
38
+
39
+ return app
40
+
41
+
42
+ def build_agent_app(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> ASGIApp:
43
+ try:
44
+ toolsets = toolsets + [search_toolset] if toolsets else [search_toolset]
45
+
46
+ agent = Agent(
47
+ model=model,
48
+ deps_type=StateDeps[SearchState],
49
+ model_settings=ModelSettings(
50
+ parallel_tool_calls=False,
51
+ ), # https://github.com/pydantic/pydantic-ai/issues/562
52
+ toolsets=toolsets,
53
+ )
54
+ agent.instructions(get_base_instructions)
55
+ agent.instructions(get_dynamic_instructions)
56
+
57
+ return agent.to_ag_ui(deps=StateDeps(SearchState()))
58
+ except Exception as e:
59
+ logger.error("Agent init failed; serving disabled stub.", error=str(e))
60
+ return _disabled_agent_app(str(e))
@@ -0,0 +1,100 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ import json
15
+ from textwrap import dedent
16
+
17
+ import structlog
18
+ from pydantic_ai import RunContext
19
+ from pydantic_ai.ag_ui import StateDeps
20
+
21
+ from orchestrator.search.agent.state import SearchState
22
+
23
+ logger = structlog.get_logger(__name__)
24
+
25
+
26
+ async def get_base_instructions() -> str:
27
+ return dedent(
28
+ """
29
+ You are an expert assistant designed to find relevant information by building and running database queries.
30
+
31
+ ---
32
+ ### 1. Your Goal and Method
33
+
34
+ Your ultimate goal is to **find information** that answers the user's request.
35
+
36
+ To do this, you will perform either a broad search or a filtered search.
37
+ For **filtered searches**, your primary method is to **construct a valid `FilterTree` object**.
38
+ To do this correctly, you must infer the exact structure, operators, and nesting rules from the Pydantic schema of the `set_filter_tree` tool itself.
39
+
40
+ ---
41
+ ### 2. Information-Gathering Tools
42
+
43
+ **If you determine that a `FilterTree` is needed**, use these tools to gather information first:
44
+
45
+ - **discover_filter_paths(field_names: list[str])**: Use this to discover all valid filter paths for a list of field names in a single call.
46
+ - **get_valid_operators()**: Use this to get the JSON map of all valid operators for each field type.
47
+
48
+ ---
49
+ ### 3. Execution Workflow
50
+
51
+ Follow these steps in strict order:
52
+
53
+ 1. **Set Context**: Always begin by calling `set_search_parameters`.
54
+ 2. **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
55
+ - **If filters ARE required**, follow these sub-steps:
56
+ a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
57
+ b. **Construct FilterTree**: Build the `FilterTree` object.
58
+ c. **Set Filters**: Call `set_filter_tree`.
59
+ 3. **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
60
+ 4. **Report**: Answer the users' question directly and summarize when appropiate.
61
+
62
+ ---
63
+ ### 4. Critical Rules
64
+
65
+ - **NEVER GUESS PATHS**: You *must* verify every filter path by calling `discover_filter_paths` first. If a path does not exist, you must inform the user and not include it in the `FilterTree`.
66
+ - **USE FULL PATHS**: Always use the full, unambiguous path returned by the discovery tool.
67
+ - **MATCH OPERATORS**: Only use operators that are compatible with the field type as confirmed by `get_filter_operators`.
68
+ """
69
+ )
70
+
71
+
72
+ async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> str:
73
+ """Dynamically provides 'next step' coaching based on the current state."""
74
+ state = ctx.deps.state
75
+ param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
76
+
77
+ next_step_guidance = ""
78
+ if not state.parameters or not state.parameters.get("entity_type"):
79
+ next_step_guidance = (
80
+ "INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
81
+ )
82
+ else:
83
+ next_step_guidance = (
84
+ "INSTRUCTION: Context is set. Now, analyze the user's request. "
85
+ "If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
86
+ "If no specific filters are needed, you can proceed directly to `execute_search`."
87
+ )
88
+ return dedent(
89
+ f"""
90
+ ---
91
+ ### Current State & Next Action
92
+
93
+ **Current Search Parameters:**
94
+ ```json
95
+ {param_state_str}
96
+ ```
97
+
98
+ **{next_step_guidance}**
99
+ """
100
+ )
@@ -0,0 +1,21 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from typing import Any
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+
19
+ class SearchState(BaseModel):
20
+ parameters: dict[str, Any] | None = None
21
+ results: list[dict[str, Any]] = Field(default_factory=list)