orchestrator-core 4.5.0a7__py3-none-any.whl → 4.5.0a8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +3 -12
- orchestrator/agentic_app.py +48 -29
- orchestrator/api/api_v1/api.py +1 -1
- orchestrator/api/api_v1/endpoints/search.py +13 -7
- orchestrator/cli/main.py +2 -2
- orchestrator/cli/search/__init__.py +32 -0
- orchestrator/llm_settings.py +6 -3
- orchestrator/search/agent/__init__.py +2 -2
- orchestrator/search/agent/agent.py +17 -15
- orchestrator/search/docs/running_local_text_embedding_inference.md +1 -0
- orchestrator/search/filters/base.py +37 -22
- orchestrator/search/indexing/indexer.py +1 -3
- orchestrator/search/indexing/tasks.py +4 -1
- orchestrator/search/llm_migration.py +102 -0
- orchestrator/search/retrieval/engine.py +1 -5
- orchestrator/search/retrieval/retrievers/base.py +2 -1
- orchestrator/search/retrieval/retrievers/hybrid.py +112 -23
- orchestrator/workflows/steps.py +1 -1
- {orchestrator_core-4.5.0a7.dist-info → orchestrator_core-4.5.0a8.dist-info}/METADATA +7 -5
- {orchestrator_core-4.5.0a7.dist-info → orchestrator_core-4.5.0a8.dist-info}/RECORD +26 -25
- orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +0 -95
- /orchestrator/cli/{index_llm.py → search/index_llm.py} +0 -0
- /orchestrator/cli/{resize_embedding.py → search/resize_embedding.py} +0 -0
- /orchestrator/cli/{search_explore.py → search/search_explore.py} +0 -0
- /orchestrator/cli/{speedtest.py → search/speedtest.py} +0 -0
- {orchestrator_core-4.5.0a7.dist-info → orchestrator_core-4.5.0a8.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.5.0a7.dist-info → orchestrator_core-4.5.0a8.dist-info}/licenses/LICENSE +0 -0
orchestrator/__init__.py
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
"""This is the orchestrator workflow engine."""
|
|
15
15
|
|
|
16
|
-
__version__ = "4.5.
|
|
16
|
+
__version__ = "4.5.0a8"
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
from structlog import get_logger
|
|
@@ -25,18 +25,9 @@ logger.info("Starting the orchestrator", version=__version__)
|
|
|
25
25
|
from orchestrator.llm_settings import llm_settings
|
|
26
26
|
from orchestrator.settings import app_settings
|
|
27
27
|
|
|
28
|
-
if llm_settings.
|
|
29
|
-
try:
|
|
30
|
-
from importlib import import_module
|
|
28
|
+
if llm_settings.SEARCH_ENABLED or llm_settings.AGENT_ENABLED:
|
|
31
29
|
|
|
32
|
-
|
|
33
|
-
from orchestrator.agentic_app import AgenticOrchestratorCore as OrchestratorCore
|
|
34
|
-
|
|
35
|
-
except ImportError:
|
|
36
|
-
logger.error(
|
|
37
|
-
"Unable to import 'pydantic_ai' module, please install the orchestrator with llm dependencies. `pip install orchestrator-core[llm]",
|
|
38
|
-
)
|
|
39
|
-
exit(1)
|
|
30
|
+
from orchestrator.agentic_app import LLMOrchestratorCore as OrchestratorCore
|
|
40
31
|
else:
|
|
41
32
|
from orchestrator.app import OrchestratorCore # type: ignore[assignment]
|
|
42
33
|
|
orchestrator/agentic_app.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
"""The main application module.
|
|
3
3
|
|
|
4
|
-
This module contains the main `
|
|
5
|
-
provides the ability to run the CLI.
|
|
4
|
+
This module contains the main `LLMOrchestratorCore` class for the `FastAPI` backend and
|
|
5
|
+
provides the ability to run the CLI with LLM features (search and/or agent).
|
|
6
6
|
"""
|
|
7
7
|
# Copyright 2019-2025 SURF
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -16,65 +16,84 @@ provides the ability to run the CLI.
|
|
|
16
16
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17
17
|
# See the License for the specific language governing permissions and
|
|
18
18
|
# limitations under the License.
|
|
19
|
-
from typing import Any
|
|
19
|
+
from typing import TYPE_CHECKING, Any
|
|
20
20
|
|
|
21
21
|
import typer
|
|
22
|
-
from pydantic_ai.models.openai import OpenAIModel
|
|
23
|
-
from pydantic_ai.toolsets import FunctionToolset
|
|
24
22
|
from structlog import get_logger
|
|
25
23
|
|
|
26
24
|
from orchestrator.app import OrchestratorCore
|
|
27
25
|
from orchestrator.cli.main import app as cli_app
|
|
28
26
|
from orchestrator.llm_settings import LLMSettings, llm_settings
|
|
29
27
|
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from pydantic_ai.models.openai import OpenAIModel
|
|
30
|
+
from pydantic_ai.toolsets import FunctionToolset
|
|
31
|
+
|
|
30
32
|
logger = get_logger(__name__)
|
|
31
33
|
|
|
32
34
|
|
|
33
|
-
class
|
|
35
|
+
class LLMOrchestratorCore(OrchestratorCore):
|
|
34
36
|
def __init__(
|
|
35
37
|
self,
|
|
36
38
|
*args: Any,
|
|
37
|
-
llm_model: OpenAIModel | str = "gpt-4o-mini",
|
|
38
39
|
llm_settings: LLMSettings = llm_settings,
|
|
39
|
-
|
|
40
|
+
agent_model: "OpenAIModel | str | None" = None,
|
|
41
|
+
agent_tools: "list[FunctionToolset] | None" = None,
|
|
40
42
|
**kwargs: Any,
|
|
41
43
|
) -> None:
|
|
42
|
-
"""Initialize the `
|
|
44
|
+
"""Initialize the `LLMOrchestratorCore` class.
|
|
43
45
|
|
|
44
|
-
This class
|
|
46
|
+
This class extends `OrchestratorCore` with LLM features (search and agent).
|
|
47
|
+
It runs the search migration and mounts the agent endpoint based on feature flags.
|
|
45
48
|
|
|
46
49
|
Args:
|
|
47
50
|
*args: All the normal arguments passed to the `OrchestratorCore` class.
|
|
48
|
-
llm_model: An OpenAI model class or string, not limited to OpenAI models (gpt-4o-mini etc)
|
|
49
51
|
llm_settings: A class of settings for the LLM
|
|
52
|
+
agent_model: Override the agent model (defaults to llm_settings.AGENT_MODEL)
|
|
50
53
|
agent_tools: A list of tools that can be used by the agent
|
|
51
54
|
**kwargs: Additional arguments passed to the `OrchestratorCore` class.
|
|
52
55
|
|
|
53
56
|
Returns:
|
|
54
57
|
None
|
|
55
58
|
"""
|
|
56
|
-
self.llm_model = llm_model
|
|
57
|
-
self.agent_tools = agent_tools
|
|
58
59
|
self.llm_settings = llm_settings
|
|
60
|
+
self.agent_model = agent_model or llm_settings.AGENT_MODEL
|
|
61
|
+
self.agent_tools = agent_tools
|
|
59
62
|
|
|
60
63
|
super().__init__(*args, **kwargs)
|
|
61
64
|
|
|
62
|
-
|
|
63
|
-
self.
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
65
|
+
# Run search migration if search or agent is enabled
|
|
66
|
+
if self.llm_settings.SEARCH_ENABLED or self.llm_settings.AGENT_ENABLED:
|
|
67
|
+
logger.info("Running search migration")
|
|
68
|
+
try:
|
|
69
|
+
from orchestrator.db import db
|
|
70
|
+
from orchestrator.search.llm_migration import run_migration
|
|
71
|
+
|
|
72
|
+
with db.engine.begin() as connection:
|
|
73
|
+
run_migration(connection)
|
|
74
|
+
except ImportError as e:
|
|
75
|
+
logger.error(
|
|
76
|
+
"Unable to run search migration. Please install search dependencies: "
|
|
77
|
+
"`pip install orchestrator-core[search]`",
|
|
78
|
+
error=str(e),
|
|
79
|
+
)
|
|
80
|
+
raise
|
|
81
|
+
|
|
82
|
+
# Mount agent endpoint if agent is enabled
|
|
83
|
+
if self.llm_settings.AGENT_ENABLED:
|
|
84
|
+
logger.info("Initializing agent features", model=self.agent_model)
|
|
85
|
+
try:
|
|
86
|
+
from orchestrator.search.agent import build_agent_router
|
|
87
|
+
|
|
88
|
+
agent_app = build_agent_router(self.agent_model, self.agent_tools)
|
|
89
|
+
self.mount("/agent", agent_app)
|
|
90
|
+
except ImportError as e:
|
|
91
|
+
logger.error(
|
|
92
|
+
"Unable to initialize agent features. Please install agent dependencies: "
|
|
93
|
+
"`pip install orchestrator-core[agent]`",
|
|
94
|
+
error=str(e),
|
|
95
|
+
)
|
|
96
|
+
raise
|
|
78
97
|
|
|
79
98
|
|
|
80
99
|
main_typer_app = typer.Typer()
|
orchestrator/api/api_v1/api.py
CHANGED
|
@@ -89,7 +89,7 @@ api_router.include_router(
|
|
|
89
89
|
ws.router, prefix="/ws", tags=["Core", "Events"]
|
|
90
90
|
) # Auth on the websocket is handled in the Websocket Manager
|
|
91
91
|
|
|
92
|
-
if llm_settings.
|
|
92
|
+
if llm_settings.SEARCH_ENABLED:
|
|
93
93
|
from orchestrator.api.api_v1.endpoints import search
|
|
94
94
|
|
|
95
95
|
api_router.include_router(
|
|
@@ -24,6 +24,7 @@ from orchestrator.db import (
|
|
|
24
24
|
db,
|
|
25
25
|
)
|
|
26
26
|
from orchestrator.domain.base import SubscriptionModel
|
|
27
|
+
from orchestrator.domain.context_cache import cache_subscription_models
|
|
27
28
|
from orchestrator.schemas.search import (
|
|
28
29
|
PageInfoSchema,
|
|
29
30
|
PathsResponse,
|
|
@@ -192,17 +193,22 @@ async def search_subscriptions(
|
|
|
192
193
|
page_info = PageInfoSchema(has_next_page=has_next_page, next_page_cursor=next_page_cursor)
|
|
193
194
|
|
|
194
195
|
search_info_map = {res.entity_id: res for res in search_response.results}
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
196
|
+
|
|
197
|
+
with cache_subscription_models():
|
|
198
|
+
subscriptions_data = {
|
|
199
|
+
sub_id: SubscriptionModel.from_subscription(sub_id).model_dump(exclude_unset=False)
|
|
200
|
+
for sub_id in search_info_map
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
results_data = [
|
|
204
|
+
SubscriptionSearchResult(
|
|
205
|
+
subscription=format_special_types(subscriptions_data[sub_id]),
|
|
201
206
|
score=search_info.score,
|
|
202
207
|
perfect_match=search_info.perfect_match,
|
|
203
208
|
matching_field=search_info.matching_field,
|
|
204
209
|
)
|
|
205
|
-
|
|
210
|
+
for sub_id, search_info in search_info_map.items()
|
|
211
|
+
]
|
|
206
212
|
|
|
207
213
|
return SearchResultsSchema(data=results_data, page_info=page_info, search_metadata=search_response.metadata)
|
|
208
214
|
|
orchestrator/cli/main.py
CHANGED
|
@@ -25,8 +25,8 @@ app.add_typer(scheduler.app, name="scheduler", help="Access all the scheduler fu
|
|
|
25
25
|
app.add_typer(database.app, name="db", help="Interact with the application database")
|
|
26
26
|
app.add_typer(generate.app, name="generate", help="Generate products, workflows and other artifacts")
|
|
27
27
|
|
|
28
|
-
if llm_settings.
|
|
29
|
-
from orchestrator.cli import index_llm, resize_embedding, search_explore, speedtest
|
|
28
|
+
if llm_settings.SEARCH_ENABLED:
|
|
29
|
+
from orchestrator.cli.search import index_llm, resize_embedding, search_explore, speedtest
|
|
30
30
|
|
|
31
31
|
app.add_typer(index_llm.app, name="index", help="(Re-)Index the search table.")
|
|
32
32
|
app.add_typer(search_explore.app, name="search", help="Try out different search types.")
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Copyright 2019-2020 SURF.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
import typer
|
|
15
|
+
|
|
16
|
+
from orchestrator.cli.search import index_llm, resize_embedding, search_explore, speedtest
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def register_commands(app: typer.Typer) -> None:
|
|
20
|
+
"""Register all LLM/search related commands to the main app."""
|
|
21
|
+
app.add_typer(index_llm.app, name="index", help="(Re-)Index the search table.")
|
|
22
|
+
app.add_typer(search_explore.app, name="search", help="Try out different search types.")
|
|
23
|
+
app.add_typer(
|
|
24
|
+
resize_embedding.app,
|
|
25
|
+
name="embedding",
|
|
26
|
+
help="Resize the vector dimension of the embedding column in the search table.",
|
|
27
|
+
)
|
|
28
|
+
app.add_typer(
|
|
29
|
+
speedtest.app,
|
|
30
|
+
name="speedtest",
|
|
31
|
+
help="Search performance testing and analysis.",
|
|
32
|
+
)
|
orchestrator/llm_settings.py
CHANGED
|
@@ -18,7 +18,10 @@ logger = get_logger(__name__)
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class LLMSettings(BaseSettings):
|
|
21
|
-
|
|
21
|
+
# Feature flags for LLM functionality
|
|
22
|
+
SEARCH_ENABLED: bool = False # Enable search/indexing with embeddings
|
|
23
|
+
AGENT_ENABLED: bool = False # Enable agentic functionality
|
|
24
|
+
|
|
22
25
|
# Pydantic-ai Agent settings
|
|
23
26
|
AGENT_MODEL: str = "gpt-4o-mini" # See pydantic-ai docs for supported models.
|
|
24
27
|
AGENT_MODEL_VERSION: str = "2025-01-01-preview"
|
|
@@ -30,11 +33,11 @@ class LLMSettings(BaseSettings):
|
|
|
30
33
|
0.1, description="Safety margin as a percentage (e.g., 0.1 for 10%) for token budgeting.", ge=0, le=1
|
|
31
34
|
)
|
|
32
35
|
|
|
33
|
-
# The following settings are only needed for local models.
|
|
36
|
+
# The following settings are only needed for local models or system constraints.
|
|
34
37
|
# By default, they are set conservative assuming a small model like All-MiniLM-L6-V2.
|
|
35
38
|
OPENAI_BASE_URL: str | None = None
|
|
36
39
|
EMBEDDING_FALLBACK_MAX_TOKENS: int | None = 512
|
|
37
|
-
EMBEDDING_MAX_BATCH_SIZE: int | None =
|
|
40
|
+
EMBEDDING_MAX_BATCH_SIZE: int | None = None
|
|
38
41
|
|
|
39
42
|
# General LiteLLM settings
|
|
40
43
|
LLM_MAX_RETRIES: int = 3
|
|
@@ -14,8 +14,8 @@
|
|
|
14
14
|
# This module requires: pydantic-ai==0.7.0, ag-ui-protocol>=0.1.8
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
from orchestrator.search.agent.agent import
|
|
17
|
+
from orchestrator.search.agent.agent import build_agent_router
|
|
18
18
|
|
|
19
19
|
__all__ = [
|
|
20
|
-
"
|
|
20
|
+
"build_agent_router",
|
|
21
21
|
]
|
|
@@ -14,13 +14,13 @@
|
|
|
14
14
|
from typing import Any
|
|
15
15
|
|
|
16
16
|
import structlog
|
|
17
|
-
from fastapi import
|
|
18
|
-
from pydantic_ai.ag_ui import StateDeps
|
|
17
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
18
|
+
from pydantic_ai.ag_ui import StateDeps, handle_ag_ui_request
|
|
19
19
|
from pydantic_ai.agent import Agent
|
|
20
20
|
from pydantic_ai.models.openai import OpenAIModel
|
|
21
21
|
from pydantic_ai.settings import ModelSettings
|
|
22
22
|
from pydantic_ai.toolsets import FunctionToolset
|
|
23
|
-
from starlette.
|
|
23
|
+
from starlette.responses import Response
|
|
24
24
|
|
|
25
25
|
from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
|
|
26
26
|
from orchestrator.search.agent.state import SearchState
|
|
@@ -29,17 +29,9 @@ from orchestrator.search.agent.tools import search_toolset
|
|
|
29
29
|
logger = structlog.get_logger(__name__)
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def
|
|
33
|
-
|
|
32
|
+
def build_agent_router(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> APIRouter:
|
|
33
|
+
router = APIRouter()
|
|
34
34
|
|
|
35
|
-
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
|
|
36
|
-
async def _disabled(path: str) -> None:
|
|
37
|
-
raise HTTPException(status_code=503, detail=f"Agent disabled: {reason}")
|
|
38
|
-
|
|
39
|
-
return app
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def build_agent_app(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> ASGIApp:
|
|
43
35
|
try:
|
|
44
36
|
toolsets = toolsets + [search_toolset] if toolsets else [search_toolset]
|
|
45
37
|
|
|
@@ -54,7 +46,17 @@ def build_agent_app(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any
|
|
|
54
46
|
agent.instructions(get_base_instructions)
|
|
55
47
|
agent.instructions(get_dynamic_instructions)
|
|
56
48
|
|
|
57
|
-
|
|
49
|
+
@router.post("/")
|
|
50
|
+
async def agent_endpoint(request: Request) -> Response:
|
|
51
|
+
return await handle_ag_ui_request(agent, request, deps=StateDeps(SearchState()))
|
|
52
|
+
|
|
53
|
+
return router
|
|
58
54
|
except Exception as e:
|
|
59
55
|
logger.error("Agent init failed; serving disabled stub.", error=str(e))
|
|
60
|
-
|
|
56
|
+
error_msg = f"Agent disabled: {str(e)}"
|
|
57
|
+
|
|
58
|
+
@router.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
|
|
59
|
+
async def _disabled(path: str) -> None:
|
|
60
|
+
raise HTTPException(status_code=503, detail=error_msg)
|
|
61
|
+
|
|
62
|
+
return router
|
|
@@ -18,6 +18,7 @@ Point your backend to the local endpoint and declare the new vector size:
|
|
|
18
18
|
```env
|
|
19
19
|
OPENAI_BASE_URL=http://localhost:8080/v1
|
|
20
20
|
EMBEDDING_DIMENSION=384
|
|
21
|
+
EMBEDDING_MAX_BATCH_SIZE=32 # Not required when using OpenAI embeddings
|
|
21
22
|
```
|
|
22
23
|
|
|
23
24
|
Depending on the model, you might want to change the `EMBEDDING_FALLBACK_MAX_TOKENS` and `EMBEDDING_MAX_BATCH_SIZE` settings, which are set conservatively and according to the requirements of the setup used in this example.
|
|
@@ -226,6 +226,38 @@ class FilterTree(BaseModel):
|
|
|
226
226
|
leaves.extend(child.get_all_leaves())
|
|
227
227
|
return leaves
|
|
228
228
|
|
|
229
|
+
@staticmethod
|
|
230
|
+
def _build_correlates(
|
|
231
|
+
alias: Any, entity_id_col: SQLAColumn, entity_type_value: str | None
|
|
232
|
+
) -> list[ColumnElement[bool]]:
|
|
233
|
+
"""Build the correlation predicates that link the subquery to the outer query."""
|
|
234
|
+
correlates = [alias.entity_id == entity_id_col]
|
|
235
|
+
if entity_type_value is not None:
|
|
236
|
+
correlates.append(alias.entity_type == entity_type_value)
|
|
237
|
+
return correlates
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _handle_ltree_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
|
|
241
|
+
"""Handle path-only filters (has_component, not_has_component, ends_with)."""
|
|
242
|
+
# row-level predicate is always positive
|
|
243
|
+
positive = pf.condition.to_expression(alias.path, pf.path)
|
|
244
|
+
subq = select(1).select_from(alias).where(and_(*correlates, positive))
|
|
245
|
+
if pf.condition.op == FilterOp.NOT_HAS_COMPONENT:
|
|
246
|
+
return ~exists(subq) # NOT at the entity level
|
|
247
|
+
return exists(subq)
|
|
248
|
+
|
|
249
|
+
@staticmethod
|
|
250
|
+
def _handle_value_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
|
|
251
|
+
"""Handle value-based filters (equality, comparison, etc)."""
|
|
252
|
+
if "." not in pf.path:
|
|
253
|
+
path_pred = LtreeFilter(op=FilterOp.ENDS_WITH, value=pf.path).to_expression(alias.path, "")
|
|
254
|
+
else:
|
|
255
|
+
path_pred = alias.path == Ltree(pf.path)
|
|
256
|
+
|
|
257
|
+
value_pred = pf.to_expression(alias.value, alias.value_type)
|
|
258
|
+
subq = select(1).select_from(alias).where(and_(*correlates, path_pred, value_pred))
|
|
259
|
+
return exists(subq)
|
|
260
|
+
|
|
229
261
|
def to_expression(
|
|
230
262
|
self,
|
|
231
263
|
entity_id_col: SQLAColumn,
|
|
@@ -241,35 +273,18 @@ class FilterTree(BaseModel):
|
|
|
241
273
|
Returns:
|
|
242
274
|
ColumnElement[bool]: A SQLAlchemy expression suitable for use in a WHERE clause.
|
|
243
275
|
"""
|
|
276
|
+
from sqlalchemy.orm import aliased
|
|
244
277
|
|
|
245
278
|
alias_idx = count(1)
|
|
246
279
|
|
|
247
280
|
def leaf_exists(pf: PathFilter) -> ColumnElement[bool]:
|
|
248
|
-
|
|
249
|
-
|
|
281
|
+
"""Convert a PathFilter into an EXISTS subquery."""
|
|
250
282
|
alias = aliased(AiSearchIndex, name=f"flt_{next(alias_idx)}")
|
|
251
|
-
|
|
252
|
-
correlates = [alias.entity_id == entity_id_col]
|
|
253
|
-
if entity_type_value is not None:
|
|
254
|
-
correlates.append(alias.entity_type == entity_type_value)
|
|
283
|
+
correlates = self._build_correlates(alias, entity_id_col, entity_type_value)
|
|
255
284
|
|
|
256
285
|
if isinstance(pf.condition, LtreeFilter):
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
subq = select(1).select_from(alias).where(and_(*correlates, positive))
|
|
260
|
-
if pf.condition.op == FilterOp.NOT_HAS_COMPONENT:
|
|
261
|
-
return ~exists(subq) # NOT at the entity level
|
|
262
|
-
return exists(subq)
|
|
263
|
-
|
|
264
|
-
# value leaf: path predicate + typed value compare
|
|
265
|
-
if "." not in pf.path:
|
|
266
|
-
path_pred = LtreeFilter(op=FilterOp.ENDS_WITH, value=pf.path).to_expression(alias.path, "")
|
|
267
|
-
else:
|
|
268
|
-
path_pred = alias.path == Ltree(pf.path)
|
|
269
|
-
|
|
270
|
-
value_pred = pf.to_expression(alias.value, alias.value_type)
|
|
271
|
-
subq = select(1).select_from(alias).where(and_(*correlates, path_pred, value_pred))
|
|
272
|
-
return exists(subq)
|
|
286
|
+
return self._handle_ltree_filter(pf, alias, correlates)
|
|
287
|
+
return self._handle_value_filter(pf, alias, correlates)
|
|
273
288
|
|
|
274
289
|
def compile_node(node: FilterTree | PathFilter) -> ColumnElement[bool]:
|
|
275
290
|
if isinstance(node, FilterTree):
|
|
@@ -226,9 +226,7 @@ class Indexer:
|
|
|
226
226
|
safe_margin = int(max_ctx * llm_settings.EMBEDDING_SAFE_MARGIN_PERCENT)
|
|
227
227
|
token_budget = max(1, max_ctx - safe_margin)
|
|
228
228
|
|
|
229
|
-
max_batch_size =
|
|
230
|
-
if llm_settings.OPENAI_BASE_URL: # We are using a local model
|
|
231
|
-
max_batch_size = llm_settings.EMBEDDING_MAX_BATCH_SIZE
|
|
229
|
+
max_batch_size = llm_settings.EMBEDDING_MAX_BATCH_SIZE
|
|
232
230
|
|
|
233
231
|
for entity_id, field in fields_to_upsert:
|
|
234
232
|
if field.value_type.is_embeddable(field.value):
|
|
@@ -15,6 +15,7 @@ import structlog
|
|
|
15
15
|
from sqlalchemy.orm import Query
|
|
16
16
|
|
|
17
17
|
from orchestrator.db import db
|
|
18
|
+
from orchestrator.domain.context_cache import cache_subscription_models
|
|
18
19
|
from orchestrator.search.core.types import EntityType
|
|
19
20
|
from orchestrator.search.indexing.indexer import Indexer
|
|
20
21
|
from orchestrator.search.indexing.registry import ENTITY_CONFIG_REGISTRY
|
|
@@ -63,4 +64,6 @@ def run_indexing_for_entity(
|
|
|
63
64
|
entities = db.session.execute(stmt).scalars()
|
|
64
65
|
|
|
65
66
|
indexer = Indexer(config=config, dry_run=dry_run, force_index=force_index, chunk_size=chunk_size)
|
|
66
|
-
|
|
67
|
+
|
|
68
|
+
with cache_subscription_models():
|
|
69
|
+
indexer.run(entities)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
"""Simple search migration function that runs when SEARCH_ENABLED = True."""
|
|
15
|
+
|
|
16
|
+
from sqlalchemy import text
|
|
17
|
+
from sqlalchemy.engine import Connection
|
|
18
|
+
from structlog import get_logger
|
|
19
|
+
|
|
20
|
+
from orchestrator.search.core.types import FieldType
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
TABLE = "ai_search_index"
|
|
25
|
+
TARGET_DIM = 1536
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def run_migration(connection: Connection) -> None:
|
|
29
|
+
"""Run LLM migration with ON CONFLICT DO NOTHING pattern."""
|
|
30
|
+
logger.info("Running LLM migration")
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
# Create PostgreSQL extensions
|
|
34
|
+
connection.execute(text("CREATE EXTENSION IF NOT EXISTS ltree;"))
|
|
35
|
+
connection.execute(text("CREATE EXTENSION IF NOT EXISTS unaccent;"))
|
|
36
|
+
connection.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;"))
|
|
37
|
+
connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
|
|
38
|
+
|
|
39
|
+
# Create field_type enum
|
|
40
|
+
field_type_values = "', '".join([ft.value for ft in FieldType])
|
|
41
|
+
connection.execute(
|
|
42
|
+
text(
|
|
43
|
+
f"""
|
|
44
|
+
DO $$
|
|
45
|
+
BEGIN
|
|
46
|
+
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'field_type') THEN
|
|
47
|
+
CREATE TYPE field_type AS ENUM ('{field_type_values}');
|
|
48
|
+
END IF;
|
|
49
|
+
END $$;
|
|
50
|
+
"""
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Create table with ON CONFLICT DO NOTHING pattern
|
|
55
|
+
connection.execute(
|
|
56
|
+
text(
|
|
57
|
+
f"""
|
|
58
|
+
CREATE TABLE IF NOT EXISTS {TABLE} (
|
|
59
|
+
entity_type TEXT NOT NULL,
|
|
60
|
+
entity_id UUID NOT NULL,
|
|
61
|
+
path LTREE NOT NULL,
|
|
62
|
+
value TEXT NOT NULL,
|
|
63
|
+
embedding VECTOR({TARGET_DIM}),
|
|
64
|
+
content_hash VARCHAR(64) NOT NULL,
|
|
65
|
+
value_type field_type NOT NULL DEFAULT '{FieldType.STRING.value}',
|
|
66
|
+
CONSTRAINT pk_ai_search_index PRIMARY KEY (entity_id, path)
|
|
67
|
+
);
|
|
68
|
+
"""
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Drop default
|
|
73
|
+
connection.execute(text(f"ALTER TABLE {TABLE} ALTER COLUMN value_type DROP DEFAULT;"))
|
|
74
|
+
|
|
75
|
+
# Create indexes with IF NOT EXISTS
|
|
76
|
+
connection.execute(text(f"CREATE INDEX IF NOT EXISTS ix_ai_search_index_entity_id ON {TABLE} (entity_id);"))
|
|
77
|
+
connection.execute(
|
|
78
|
+
text(f"CREATE INDEX IF NOT EXISTS idx_ai_search_index_content_hash ON {TABLE} (content_hash);")
|
|
79
|
+
)
|
|
80
|
+
connection.execute(
|
|
81
|
+
text(f"CREATE INDEX IF NOT EXISTS ix_flat_path_gist ON {TABLE} USING GIST (path gist_ltree_ops);")
|
|
82
|
+
)
|
|
83
|
+
connection.execute(text(f"CREATE INDEX IF NOT EXISTS ix_flat_path_btree ON {TABLE} (path);"))
|
|
84
|
+
connection.execute(
|
|
85
|
+
text(f"CREATE INDEX IF NOT EXISTS ix_flat_value_trgm ON {TABLE} USING GIN (value gin_trgm_ops);")
|
|
86
|
+
)
|
|
87
|
+
connection.execute(
|
|
88
|
+
text(
|
|
89
|
+
f"CREATE INDEX IF NOT EXISTS ix_flat_embed_hnsw ON {TABLE} USING HNSW (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);"
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
connection.commit()
|
|
94
|
+
logger.info("LLM migration completed successfully")
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
logger.error("LLM migration failed", error=str(e))
|
|
98
|
+
raise Exception(
|
|
99
|
+
f"LLM migration failed. This likely means the pgvector extension "
|
|
100
|
+
f"is not installed. Please install pgvector and ensure your PostgreSQL "
|
|
101
|
+
f"version supports it. Error: {e}"
|
|
102
|
+
) from e
|
|
@@ -86,11 +86,7 @@ def _format_response(
|
|
|
86
86
|
|
|
87
87
|
|
|
88
88
|
def _extract_matching_field_from_filters(filters: FilterTree) -> MatchingField | None:
|
|
89
|
-
"""Extract the first path filter to use as matching field for structured searches.
|
|
90
|
-
|
|
91
|
-
TODO: Should we allow a list of matched fields in the MatchingField model?
|
|
92
|
-
We need a different approach, probably a cross join in StructuredRetriever.
|
|
93
|
-
"""
|
|
89
|
+
"""Extract the first path filter to use as matching field for structured searches."""
|
|
94
90
|
leaves = filters.get_all_leaves()
|
|
95
91
|
if len(leaves) != 1:
|
|
96
92
|
return None
|
|
@@ -112,7 +112,8 @@ class Retriever(ABC):
|
|
|
112
112
|
|
|
113
113
|
def _quantize_score_for_pagination(self, score_value: float) -> BindParameter[Decimal]:
|
|
114
114
|
"""Convert score value to properly quantized Decimal parameter for pagination."""
|
|
115
|
-
|
|
115
|
+
quantizer = Decimal(1).scaleb(-self.SCORE_PRECISION)
|
|
116
|
+
pas_dec = Decimal(str(score_value)).quantize(quantizer)
|
|
116
117
|
return literal(pas_dec, type_=self.SCORE_NUMERIC_TYPE)
|
|
117
118
|
|
|
118
119
|
@property
|
|
@@ -11,8 +11,11 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
|
|
14
|
+
from typing import TypedDict
|
|
15
|
+
|
|
14
16
|
from sqlalchemy import BindParameter, Select, and_, bindparam, case, cast, func, literal, or_, select
|
|
15
|
-
from sqlalchemy.sql.expression import ColumnElement
|
|
17
|
+
from sqlalchemy.sql.expression import ColumnElement, Label
|
|
18
|
+
from sqlalchemy.types import TypeEngine
|
|
16
19
|
|
|
17
20
|
from orchestrator.db.models import AiSearchIndex
|
|
18
21
|
from orchestrator.search.core.types import SearchMetadata
|
|
@@ -21,6 +24,102 @@ from ..pagination import PaginationParams
|
|
|
21
24
|
from .base import Retriever
|
|
22
25
|
|
|
23
26
|
|
|
27
|
+
class RrfScoreSqlComponents(TypedDict):
|
|
28
|
+
"""SQL expression components of the RRF hybrid score calculation."""
|
|
29
|
+
|
|
30
|
+
rrf_num: ColumnElement
|
|
31
|
+
perfect: Label
|
|
32
|
+
beta: ColumnElement
|
|
33
|
+
rrf_max: ColumnElement
|
|
34
|
+
fused_num: ColumnElement
|
|
35
|
+
normalized_score: ColumnElement
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def compute_rrf_hybrid_score_sql(
|
|
39
|
+
sem_rank_col: ColumnElement,
|
|
40
|
+
fuzzy_rank_col: ColumnElement,
|
|
41
|
+
avg_fuzzy_score_col: ColumnElement,
|
|
42
|
+
k: int,
|
|
43
|
+
perfect_threshold: float,
|
|
44
|
+
n_sources: int = 2,
|
|
45
|
+
margin_factor: float = 0.05,
|
|
46
|
+
score_numeric_type: TypeEngine | None = None,
|
|
47
|
+
) -> RrfScoreSqlComponents:
|
|
48
|
+
"""Compute RRF (Reciprocal Rank Fusion) hybrid score as SQL expressions for database execution.
|
|
49
|
+
|
|
50
|
+
This function implements the core scoring logic for hybrid search combining semantic
|
|
51
|
+
and fuzzy ranking. It computes:
|
|
52
|
+
1. Base RRF score from both ranks
|
|
53
|
+
2. Perfect match detection and boosting
|
|
54
|
+
3. Dynamic beta parameter based on k and n_sources
|
|
55
|
+
4. Normalized final score in [0, 1] range
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
sem_rank_col: SQLAlchemy column expression for semantic rank
|
|
59
|
+
fuzzy_rank_col: SQLAlchemy column expression for fuzzy rank
|
|
60
|
+
avg_fuzzy_score_col: SQLAlchemy column expression for average fuzzy score
|
|
61
|
+
k: RRF constant controlling rank influence (typically 60)
|
|
62
|
+
perfect_threshold: Threshold for perfect match boost (typically 0.9)
|
|
63
|
+
n_sources: Number of ranking sources being fused (default: 2 for semantic + fuzzy)
|
|
64
|
+
margin_factor: Margin above rrf_max as fraction (default: 0.05 = 5%)
|
|
65
|
+
score_numeric_type: SQLAlchemy numeric type for casting scores
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
RrfScoreSqlComponents: Dictionary of SQL expressions for score components
|
|
69
|
+
- rrf_num: Raw RRF score (cast to numeric type if provided)
|
|
70
|
+
- perfect: Perfect match flag (1 if avg_fuzzy_score >= threshold, else 0)
|
|
71
|
+
- beta: Boost amount for perfect matches
|
|
72
|
+
- rrf_max: Maximum possible RRF score
|
|
73
|
+
- fused_num: RRF + perfect boost
|
|
74
|
+
- normalized_score: Final score normalized to [0, 1]
|
|
75
|
+
|
|
76
|
+
Note:
|
|
77
|
+
- Keep margin_factor small to avoid compressing perfects near 1 after normalization.
|
|
78
|
+
|
|
79
|
+
- The `beta` boost is calculated to be greater than the maximum possible standard
|
|
80
|
+
RRF score (`rrf_max`). This guarantees that any item flagged as a "perfect" match
|
|
81
|
+
will always rank above any non-perfect match.
|
|
82
|
+
|
|
83
|
+
- This function assumes that rank columns do not
|
|
84
|
+
contain `NULL` values. A `NULL` in any rank column will result in a `NULL` final score
|
|
85
|
+
for that item.
|
|
86
|
+
"""
|
|
87
|
+
# RRF (rank-based): sum of 1/(k + rank_i) for each ranking source
|
|
88
|
+
rrf_raw = (1.0 / (k + sem_rank_col)) + (1.0 / (k + fuzzy_rank_col))
|
|
89
|
+
rrf_num = cast(rrf_raw, score_numeric_type) if score_numeric_type else rrf_raw
|
|
90
|
+
|
|
91
|
+
# Perfect flag to boost near perfect fuzzy matches
|
|
92
|
+
perfect = case((avg_fuzzy_score_col >= perfect_threshold, 1), else_=0).label("perfect_match")
|
|
93
|
+
|
|
94
|
+
# Dynamic beta based on k and number of sources
|
|
95
|
+
# rrf_max = n_sources / (k + 1)
|
|
96
|
+
k_num = literal(float(k), type_=score_numeric_type) if score_numeric_type else literal(float(k))
|
|
97
|
+
n_sources_lit = (
|
|
98
|
+
literal(float(n_sources), type_=score_numeric_type) if score_numeric_type else literal(float(n_sources))
|
|
99
|
+
)
|
|
100
|
+
rrf_max = n_sources_lit / (k_num + literal(1.0, type_=score_numeric_type if score_numeric_type else None))
|
|
101
|
+
|
|
102
|
+
margin = rrf_max * literal(margin_factor, type_=score_numeric_type if score_numeric_type else None)
|
|
103
|
+
beta = rrf_max + margin
|
|
104
|
+
|
|
105
|
+
# Fused score: RRF + perfect match boost
|
|
106
|
+
perfect_casted = cast(perfect, score_numeric_type) if score_numeric_type else perfect
|
|
107
|
+
fused_num = rrf_num + beta * perfect_casted
|
|
108
|
+
|
|
109
|
+
# Normalize to [0,1] via the theoretical max (beta + rrf_max)
|
|
110
|
+
norm_den = beta + rrf_max
|
|
111
|
+
normalized_score = fused_num / norm_den
|
|
112
|
+
|
|
113
|
+
return RrfScoreSqlComponents(
|
|
114
|
+
rrf_num=rrf_num,
|
|
115
|
+
perfect=perfect,
|
|
116
|
+
beta=beta,
|
|
117
|
+
rrf_max=rrf_max,
|
|
118
|
+
fused_num=fused_num,
|
|
119
|
+
normalized_score=normalized_score,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
24
123
|
class RrfHybridRetriever(Retriever):
|
|
25
124
|
"""Reciprocal Rank Fusion of semantic and fuzzy ranking with parent-child retrieval."""
|
|
26
125
|
|
|
@@ -122,30 +221,20 @@ class RrfHybridRetriever(Retriever):
|
|
|
122
221
|
)
|
|
123
222
|
).cte("ranked_results")
|
|
124
223
|
|
|
125
|
-
# RRF
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
k_num = literal(float(self.k), type_=self.SCORE_NUMERIC_TYPE)
|
|
135
|
-
n_sources = literal(2.0, type_=self.SCORE_NUMERIC_TYPE) # semantic + fuzzy
|
|
136
|
-
rrf_max = n_sources / (k_num + literal(1.0, type_=self.SCORE_NUMERIC_TYPE))
|
|
137
|
-
|
|
138
|
-
# Choose a small positive margin above rrf_max to ensure strict separation
|
|
139
|
-
# Keep it small to avoid compressing perfects near 1 after normalization
|
|
140
|
-
margin = rrf_max * literal(0.05, type_=self.SCORE_NUMERIC_TYPE) # 5% above bound
|
|
141
|
-
beta = rrf_max + margin
|
|
142
|
-
|
|
143
|
-
fused_num = rrf_num + beta * cast(perfect, self.SCORE_NUMERIC_TYPE)
|
|
224
|
+
# Compute RRF hybrid score
|
|
225
|
+
score_components = compute_rrf_hybrid_score_sql(
|
|
226
|
+
sem_rank_col=ranked.c.sem_rank,
|
|
227
|
+
fuzzy_rank_col=ranked.c.fuzzy_rank,
|
|
228
|
+
avg_fuzzy_score_col=ranked.c.avg_fuzzy_score,
|
|
229
|
+
k=self.k,
|
|
230
|
+
perfect_threshold=0.9,
|
|
231
|
+
score_numeric_type=self.SCORE_NUMERIC_TYPE,
|
|
232
|
+
)
|
|
144
233
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
normalized_score = fused_num / norm_den
|
|
234
|
+
perfect = score_components["perfect"]
|
|
235
|
+
normalized_score = score_components["normalized_score"]
|
|
148
236
|
|
|
237
|
+
# Round to configured precision
|
|
149
238
|
score = cast(
|
|
150
239
|
func.round(cast(normalized_score, self.SCORE_NUMERIC_TYPE), self.SCORE_PRECISION),
|
|
151
240
|
self.SCORE_NUMERIC_TYPE,
|
orchestrator/workflows/steps.py
CHANGED
|
@@ -156,7 +156,7 @@ def refresh_subscription_search_index(subscription: SubscriptionModel | None) ->
|
|
|
156
156
|
"""
|
|
157
157
|
try:
|
|
158
158
|
reset_search_index()
|
|
159
|
-
if llm_settings.
|
|
159
|
+
if llm_settings.SEARCH_ENABLED and subscription:
|
|
160
160
|
run_indexing_for_entity(EntityType.SUBSCRIPTION, str(subscription.subscription_id))
|
|
161
161
|
except Exception:
|
|
162
162
|
# Don't fail workflow in case of unexpected error
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: orchestrator-core
|
|
3
|
-
Version: 4.5.
|
|
3
|
+
Version: 4.5.0a8
|
|
4
4
|
Summary: This is the orchestrator workflow engine.
|
|
5
5
|
Author-email: SURF <automation-beheer@surf.nl>
|
|
6
6
|
Requires-Python: >=3.11,<3.14
|
|
@@ -63,15 +63,17 @@ Requires-Dist: structlog>=25.4.0
|
|
|
63
63
|
Requires-Dist: tabulate==0.9.0
|
|
64
64
|
Requires-Dist: typer==0.15.4
|
|
65
65
|
Requires-Dist: uvicorn[standard]~=0.34.0
|
|
66
|
+
Requires-Dist: pydantic-ai-slim ==0.7.0 ; extra == "agent"
|
|
67
|
+
Requires-Dist: ag-ui-protocol>=0.1.8 ; extra == "agent"
|
|
68
|
+
Requires-Dist: litellm>=1.75.7 ; extra == "agent"
|
|
66
69
|
Requires-Dist: celery~=5.5.1 ; extra == "celery"
|
|
67
|
-
Requires-Dist:
|
|
68
|
-
Requires-Dist: ag-ui-protocol>=0.1.8 ; extra == "llm"
|
|
69
|
-
Requires-Dist: litellm>=1.75.7 ; extra == "llm"
|
|
70
|
+
Requires-Dist: litellm>=1.75.7 ; extra == "search"
|
|
70
71
|
Project-URL: Documentation, https://workfloworchestrator.org/orchestrator-core
|
|
71
72
|
Project-URL: Homepage, https://workfloworchestrator.org/orchestrator-core
|
|
72
73
|
Project-URL: Source, https://github.com/workfloworchestrator/orchestrator-core
|
|
74
|
+
Provides-Extra: agent
|
|
73
75
|
Provides-Extra: celery
|
|
74
|
-
Provides-Extra:
|
|
76
|
+
Provides-Extra: search
|
|
75
77
|
|
|
76
78
|
# Orchestrator-Core
|
|
77
79
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
orchestrator/__init__.py,sha256=
|
|
2
|
-
orchestrator/agentic_app.py,sha256=
|
|
1
|
+
orchestrator/__init__.py,sha256=bfe5yk8RiIvi9Pr2-Ym6xKS3wO9qHR93paLma0iVR9A,1449
|
|
2
|
+
orchestrator/agentic_app.py,sha256=op7osw7KJRww90iYuWBt_bB5qI-sAkpG0fyr0liubQw,3968
|
|
3
3
|
orchestrator/app.py,sha256=UPKQuDpg8MWNC6r3SRRbp6l9RBzwb00IMIaGRk-jbCU,13203
|
|
4
4
|
orchestrator/exception_handlers.py,sha256=UsW3dw8q0QQlNLcV359bIotah8DYjMsj2Ts1LfX4ClY,1268
|
|
5
|
-
orchestrator/llm_settings.py,sha256=
|
|
5
|
+
orchestrator/llm_settings.py,sha256=RjOY-FRVd648HYa6Im8ni4h76KlLFja36zlGD14sPeY,2231
|
|
6
6
|
orchestrator/log_config.py,sha256=1cPl_OXT4tEUyNxG8cwIWXrmadUm1E81vq0mdtrV-v4,1912
|
|
7
7
|
orchestrator/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
orchestrator/security.py,sha256=iXFxGxab54aav7oHEKLAVkTgrQMJGHy6IYLojEnD7gI,2422
|
|
@@ -16,14 +16,14 @@ orchestrator/api/error_handling.py,sha256=YrPCxSa-DSa9KwqIMlXI-KGBGnbGIW5ukOPiik
|
|
|
16
16
|
orchestrator/api/helpers.py,sha256=s0QRHYw8AvEmlkmRhuEzz9xixaZKUF3YuPzUVHkcoXk,6933
|
|
17
17
|
orchestrator/api/models.py,sha256=z9BDBx7uI4KBHWbD_LVrLsqNQ0_w-Mg9Qiy7PR_rZhk,5996
|
|
18
18
|
orchestrator/api/api_v1/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
19
|
-
orchestrator/api/api_v1/api.py,sha256=
|
|
19
|
+
orchestrator/api/api_v1/api.py,sha256=bWsvWgLap7b6ltu1BvwZpW7X2dEE6cQ7-WY0HcY7Yoo,3279
|
|
20
20
|
orchestrator/api/api_v1/endpoints/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
21
21
|
orchestrator/api/api_v1/endpoints/health.py,sha256=iaxs1XX1_250_gKNsspuULCV2GEMBjbtjsmfQTOvMAI,1284
|
|
22
22
|
orchestrator/api/api_v1/endpoints/processes.py,sha256=OVbt6FgFnJ4aHaYGIg0cPoim8mxDpdzJ4TGAyfB_kPw,16269
|
|
23
23
|
orchestrator/api/api_v1/endpoints/product_blocks.py,sha256=kZ6ywIOsS_S2qGq7RvZ4KzjvaS1LmwbGWR37AKRvWOw,2146
|
|
24
24
|
orchestrator/api/api_v1/endpoints/products.py,sha256=BfFtwu9dZXEQbtKxYj9icc73GKGvAGMR5ytyf41nQlQ,3081
|
|
25
25
|
orchestrator/api/api_v1/endpoints/resource_types.py,sha256=gGyuaDyOD0TAVoeFGaGmjDGnQ8eQQArOxKrrk4MaDzA,2145
|
|
26
|
-
orchestrator/api/api_v1/endpoints/search.py,sha256=
|
|
26
|
+
orchestrator/api/api_v1/endpoints/search.py,sha256=NooZcMXmlnD1NxdhFWlqF3jhmixF1DZYuUG8XtEVGjo,10885
|
|
27
27
|
orchestrator/api/api_v1/endpoints/settings.py,sha256=5s-k169podZjgGHUbVDmSQwpY_3Cs_Bbf2PPtZIkBcw,6184
|
|
28
28
|
orchestrator/api/api_v1/endpoints/subscription_customer_descriptions.py,sha256=1_6LtgQleoq3M6z_W-Qz__Bj3OFUweoPrUqHMwSH6AM,3288
|
|
29
29
|
orchestrator/api/api_v1/endpoints/subscriptions.py,sha256=7KaodccUiMkcVnrFnK2azp_V_-hGudcIyhov5WwVGQY,9810
|
|
@@ -34,16 +34,12 @@ orchestrator/api/api_v1/endpoints/ws.py,sha256=1l7E0ag_sZ6UMfQPHlmew7ENwxjm6fflB
|
|
|
34
34
|
orchestrator/cli/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
35
35
|
orchestrator/cli/database.py,sha256=YkYAbCY2VPAa6mDW0PpNKG5wL4FuAQYD2CGl1_DQtEk,19595
|
|
36
36
|
orchestrator/cli/generate.py,sha256=SBaYfRijXPF9r3VxarPKTiDzDcB6GBMMQvecQIb_ZLQ,7377
|
|
37
|
-
orchestrator/cli/
|
|
38
|
-
orchestrator/cli/main.py,sha256=U4eAG_iT3JhmeB6yZnogB6KTM6kFlDUo7zY4qBdIHv4,1648
|
|
37
|
+
orchestrator/cli/main.py,sha256=xGLc_cS2LoSIbK5qkMFE7GCnZoOi5kATgtmQDFNQU7E,1658
|
|
39
38
|
orchestrator/cli/migrate_domain_models.py,sha256=WRXy_1OnziQwpsCFZXvjB30nDJtjj0ikVXy8YNLque4,20928
|
|
40
39
|
orchestrator/cli/migrate_tasks.py,sha256=bju8XColjSZD0v3rS4kl-24dLr8En_H4-6enBmqd494,7255
|
|
41
40
|
orchestrator/cli/migrate_workflows.py,sha256=nxUpx0vgEIc_8aJrjAyrw3E9Dt8JmaamTts8oiQ4vHY,8923
|
|
42
41
|
orchestrator/cli/migration_helpers.py,sha256=C5tpkP5WEBr7G9S-1k1hgSI8ili6xd9Z5ygc9notaK0,4110
|
|
43
|
-
orchestrator/cli/resize_embedding.py,sha256=ds830T26ADOD9vZS7psRHJVF_u2xar2d4vvIH1AOlww,4216
|
|
44
42
|
orchestrator/cli/scheduler.py,sha256=2q6xT_XVOodY3e_qzIV98MWNvKvrbFpOJajWesj1fcs,1911
|
|
45
|
-
orchestrator/cli/search_explore.py,sha256=SDn1DMN8a4roSPodIHl-KrNAvdHo5jTDUvMUFLVh1P4,8602
|
|
46
|
-
orchestrator/cli/speedtest.py,sha256=QkQ_YhKh7TnNRX4lKjgrmF7DyufU9teLqw4CWkm52ko,4972
|
|
47
43
|
orchestrator/cli/domain_gen_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
44
|
orchestrator/cli/domain_gen_helpers/fixed_input_helpers.py,sha256=uzpwsaau81hHSxNMOS9-o7kF-9_78R0f_UE0AvWooZQ,6775
|
|
49
45
|
orchestrator/cli/domain_gen_helpers/helpers.py,sha256=tIPxn8ezED_xYZxH7ZAtQLwkDc6RNmLZVxWAoJ3a9lw,4203
|
|
@@ -106,6 +102,11 @@ orchestrator/cli/generator/templates/validate_product.j2,sha256=_gPNYS8dGOSpRm2E
|
|
|
106
102
|
orchestrator/cli/helpers/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
107
103
|
orchestrator/cli/helpers/input_helpers.py,sha256=pv5GTMuIWLzBE_bKNhn1XD_gxoqB0s1ZN4cnKkIIu5I,1139
|
|
108
104
|
orchestrator/cli/helpers/print_helpers.py,sha256=b3ePg6HfBLKPYBBVr5XOA__JnFEMI5HBjbjov3CP8Po,859
|
|
105
|
+
orchestrator/cli/search/__init__.py,sha256=K15_iW9ogR7xtX7qHDal4H09tmwVGnOBZWyPBLWhuzc,1274
|
|
106
|
+
orchestrator/cli/search/index_llm.py,sha256=RWPkFz5bxiznjpN1vMsSWeqcvYKB90DLL4pXQ92QJNI,2239
|
|
107
|
+
orchestrator/cli/search/resize_embedding.py,sha256=ds830T26ADOD9vZS7psRHJVF_u2xar2d4vvIH1AOlww,4216
|
|
108
|
+
orchestrator/cli/search/search_explore.py,sha256=SDn1DMN8a4roSPodIHl-KrNAvdHo5jTDUvMUFLVh1P4,8602
|
|
109
|
+
orchestrator/cli/search/speedtest.py,sha256=QkQ_YhKh7TnNRX4lKjgrmF7DyufU9teLqw4CWkm52ko,4972
|
|
109
110
|
orchestrator/config/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
110
111
|
orchestrator/config/assignee.py,sha256=9mFFe9hoi2NCkXFOKL2pU2aveBzcZhljSvqUnh55vrk,780
|
|
111
112
|
orchestrator/db/__init__.py,sha256=41_v-oeX5SMnwH2uIeBzscoST3FRXdpkEFDE5AoQR1E,3498
|
|
@@ -254,7 +255,6 @@ orchestrator/migrations/versions/schema/2025-05-08_161918133bec_add_is_task_to_w
|
|
|
254
255
|
orchestrator/migrations/versions/schema/2025-07-01_93fc5834c7e5_changed_timestamping_fields_in_process_steps.py,sha256=Oezd8b2qaI1Kyq-sZFVFmdzd4d9NjXrf6HtJGk11fy0,1914
|
|
255
256
|
orchestrator/migrations/versions/schema/2025-07-04_4b58e336d1bf_deprecating_workflow_target_in_.py,sha256=xnD6w-97R4ClS7rbmXQEXc36K3fdcXKhCy7ZZNy_FX4,742
|
|
256
257
|
orchestrator/migrations/versions/schema/2025-07-28_850dccac3b02_update_description_of_resume_workflows_.py,sha256=R6Qoga83DJ1IL0WYPu0u5u2ZvAmqGlDmUMv_KtJyOhQ,812
|
|
257
|
-
orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py,sha256=6lRbUd1hJBjG8KM4Ow_J4pk2qwlRVhTKczS7XmoW-q4,3304
|
|
258
258
|
orchestrator/schedules/__init__.py,sha256=Zy0fTOBMGIRFoh5iVFDLF9_PRAFaONYDThGK9EsysWo,981
|
|
259
259
|
orchestrator/schedules/resume_workflows.py,sha256=jRnVRWDy687pQu-gtk80ecwiLSdrvtL15tG3U2zWA6I,891
|
|
260
260
|
orchestrator/schedules/scheduler.py,sha256=nnuehZnBbtC90MsFP_Q6kqcD1ihsq08vr1ALJ6jHF_s,5833
|
|
@@ -276,8 +276,9 @@ orchestrator/schemas/subscription.py,sha256=-jXyHZIed9Xlia18ksSDyenblNN6Q2yM2FlG
|
|
|
276
276
|
orchestrator/schemas/subscription_descriptions.py,sha256=Ft_jw1U0bf9Z0U8O4OWfLlcl0mXCVT_qYVagBP3GbIQ,1262
|
|
277
277
|
orchestrator/schemas/workflow.py,sha256=StVoRGyNT2iIeq3z8BIlTPt0bcafzbeYxXRrCucR6LU,2146
|
|
278
278
|
orchestrator/search/__init__.py,sha256=2uhTQexKx-cdBP1retV3CYSNCs02s8WL3fhGvupRGZk,571
|
|
279
|
-
orchestrator/search/
|
|
280
|
-
orchestrator/search/agent/
|
|
279
|
+
orchestrator/search/llm_migration.py,sha256=UvFyzLGhlfPulA9T1pcpq8HnRd7Uu2ssKqW_N5NMeQk,3962
|
|
280
|
+
orchestrator/search/agent/__init__.py,sha256=_b7Q43peWSi2bb3-69CThAqt_sxgoaMbHeq6erLGR00,752
|
|
281
|
+
orchestrator/search/agent/agent.py,sha256=zhDyXwRf118vH96CmKRbo5O8GKl_mnLJTDNfWgvsKeE,2450
|
|
281
282
|
orchestrator/search/agent/prompts.py,sha256=-1VLYwPecC6xroKQTc9AE9MTtg_ffAUfHUi8ZATyUMg,4556
|
|
282
283
|
orchestrator/search/agent/state.py,sha256=1WHYol5UlYpq2QZz-BVsBFYrJZms5P18ohN2Ur8P2F4,783
|
|
283
284
|
orchestrator/search/agent/tools.py,sha256=4kvY0tG7i5-w8C-ZMuSabxb_sJmd_TpFl3F4xeGgzok,9513
|
|
@@ -287,29 +288,29 @@ orchestrator/search/core/exceptions.py,sha256=qp7ZdyDvN5b2HD5_oZXMgoLJgy79krpCls
|
|
|
287
288
|
orchestrator/search/core/types.py,sha256=Gaf77cKUqnE8vJNCpk-g3h2U5912axhIgZZnF_0_O48,8831
|
|
288
289
|
orchestrator/search/core/validators.py,sha256=zktY5A3RTBmfdARJoxoz9rnnyTZj7L30Kbmh9UTQz2o,1204
|
|
289
290
|
orchestrator/search/docs/index.md,sha256=zKzE2fbtHDfYTKaHg628wAsqCTOJ5yFUWV0ucFH3pAg,863
|
|
290
|
-
orchestrator/search/docs/running_local_text_embedding_inference.md,sha256=
|
|
291
|
+
orchestrator/search/docs/running_local_text_embedding_inference.md,sha256=OR0NVZMb8DqpgXYxlwDUrJwfRk0bYOk1-LkDMqsV6bU,1327
|
|
291
292
|
orchestrator/search/filters/__init__.py,sha256=Yutr21lv8RtZf5OKaBozlYufgmmV2QHuzAPPjvUamLE,1222
|
|
292
|
-
orchestrator/search/filters/base.py,sha256=
|
|
293
|
+
orchestrator/search/filters/base.py,sha256=lUr0eW0zi4oIMVUHuRD3GAQ9xEbHiFUl_EfAI6ABPVo,12456
|
|
293
294
|
orchestrator/search/filters/date_filters.py,sha256=0a6nbUTK647_Qf4XXZMLDvBLVjF5Qqy9eJ-9SrTGaGg,3040
|
|
294
295
|
orchestrator/search/filters/definitions.py,sha256=wl2HiXlTWXQN4JmuSq2SBuhTMvyIeonTtUZoCrJAK6M,4093
|
|
295
296
|
orchestrator/search/filters/ltree_filters.py,sha256=1OOmM5K90NsGBQmTqyoDlphdAOGd9r2rmz1rNItm8yk,2341
|
|
296
297
|
orchestrator/search/filters/numeric_filter.py,sha256=lcOAOpPNTwA0SW8QPiMOs1oKTYZLwGDQSrwFydXgMUU,2774
|
|
297
298
|
orchestrator/search/indexing/__init__.py,sha256=Or78bizNPiuNOgwLGJQ0mspCF1G_gSe5C9Ap7qi0MZk,662
|
|
298
|
-
orchestrator/search/indexing/indexer.py,sha256=
|
|
299
|
+
orchestrator/search/indexing/indexer.py,sha256=puYOL7IXyJi7A7huT1jQ_2G3YZimeivkQJF2BZR4apQ,14866
|
|
299
300
|
orchestrator/search/indexing/registry.py,sha256=zEOUmQDmZHJ4xzT63VSJzuuHWVTnuBSvhZg4l6lFTUU,3048
|
|
300
|
-
orchestrator/search/indexing/tasks.py,sha256=
|
|
301
|
+
orchestrator/search/indexing/tasks.py,sha256=vmS1nnprPF74yitS0xGpP1dhSDis2nekMYF0v_jduDE,2478
|
|
301
302
|
orchestrator/search/indexing/traverse.py,sha256=NKkKSri-if1d1vwzTQlDCF0hvBdB2IbWWuMdPrQ78Jg,14330
|
|
302
303
|
orchestrator/search/retrieval/__init__.py,sha256=JP5WGYhmjd2RKXEExorvU6koMBLsTLdlDGCR_r1t8ug,645
|
|
303
304
|
orchestrator/search/retrieval/builder.py,sha256=70cEvbsWI1dj-4H-LJq4o6Q71e3WERd-V6bzlZhGtHw,4607
|
|
304
|
-
orchestrator/search/retrieval/engine.py,sha256=
|
|
305
|
+
orchestrator/search/retrieval/engine.py,sha256=jHxKuULcsqkdTyh9NEzBCsOnBaZzlbvcGseJoJec1yw,6147
|
|
305
306
|
orchestrator/search/retrieval/exceptions.py,sha256=oHoLGLLxxmVcV-W36uK0V-Pn4vf_iw6hajpQbap3NqI,3588
|
|
306
307
|
orchestrator/search/retrieval/pagination.py,sha256=bRcXtWxxWvOhCQyhjwfJ7S6q_Dn3pYm8TCg7ofjVP44,3353
|
|
307
308
|
orchestrator/search/retrieval/utils.py,sha256=svhF9YfMClq2MVPArS3ir3pg5_e_bremquv_l6tTsOQ,4597
|
|
308
309
|
orchestrator/search/retrieval/validation.py,sha256=AjhttVJWlZDaT1_pUL_LaypQV11U21JpTCE4OwnpoqA,5849
|
|
309
310
|
orchestrator/search/retrieval/retrievers/__init__.py,sha256=1bGmbae0GYRM6e1vxf0ww79NaTSmfOMe9S0pPVmh3CM,897
|
|
310
|
-
orchestrator/search/retrieval/retrievers/base.py,sha256=
|
|
311
|
+
orchestrator/search/retrieval/retrievers/base.py,sha256=Sp8h992lw_7vigE4s2QB0gqtqMACEOA8nDnhuXXHtxA,4570
|
|
311
312
|
orchestrator/search/retrieval/retrievers/fuzzy.py,sha256=U_WNAaxSUVUlVrmFrYFt-s0ebw9ift1Z2zBHG8TSPLE,3839
|
|
312
|
-
orchestrator/search/retrieval/retrievers/hybrid.py,sha256=
|
|
313
|
+
orchestrator/search/retrieval/retrievers/hybrid.py,sha256=YriY3gF6E7pQUumqdSDSyFJvYQbZZ6vSsMUhM5JHGpg,11102
|
|
313
314
|
orchestrator/search/retrieval/retrievers/semantic.py,sha256=oWNJ9DuqM16BXYXUwmRmkfDmp_2vQH2PySNMk8TcvVk,3961
|
|
314
315
|
orchestrator/search/retrieval/retrievers/structured.py,sha256=OHsHEjjLg1QwtEytQNeyWcCBQd8rJxHVf59HxvA9_vc,1452
|
|
315
316
|
orchestrator/search/schemas/__init__.py,sha256=q5G0z3nKjIHKFs1PkEG3nvTUy3Wp4kCyBtCbqUITj3A,579
|
|
@@ -360,7 +361,7 @@ orchestrator/websocket/managers/memory_websocket_manager.py,sha256=lF5EEx1iFMCGE
|
|
|
360
361
|
orchestrator/workflows/__init__.py,sha256=NzIGGI-8SNAwCk2YqH6sHhEWbgAY457ntDwjO15N8v4,4131
|
|
361
362
|
orchestrator/workflows/modify_note.py,sha256=eXt5KQvrkOXf-3YEXCn2XbBLP9N-n1pUYRW2t8Odupo,2150
|
|
362
363
|
orchestrator/workflows/removed_workflow.py,sha256=V0Da5TEdfLdZZKD38ig-MTp3_IuE7VGqzHHzvPYQmLI,909
|
|
363
|
-
orchestrator/workflows/steps.py,sha256=
|
|
364
|
+
orchestrator/workflows/steps.py,sha256=teis7vHLOEAchMrzw_pvPPQ6pRFliKZRpe02vsv3AZY,6994
|
|
364
365
|
orchestrator/workflows/utils.py,sha256=VUCDoIl5XAKtIeAJpVpyW2pCIg3PoVWfwGn28BYlYhA,15424
|
|
365
366
|
orchestrator/workflows/tasks/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
366
367
|
orchestrator/workflows/tasks/cleanup_tasks_log.py,sha256=BfWYbPXhnLAHUJ0mlODDnjZnQQAvKCZJDVTwbwOWI04,1624
|
|
@@ -368,7 +369,7 @@ orchestrator/workflows/tasks/resume_workflows.py,sha256=T3iobSJjVgiupe0rClD34kUZ
|
|
|
368
369
|
orchestrator/workflows/tasks/validate_product_type.py,sha256=paG-NAY1bdde3Adt8zItkcBKf5Pxw6f5ngGW6an6dYU,3192
|
|
369
370
|
orchestrator/workflows/tasks/validate_products.py,sha256=kXBGZTkobfYH8e_crhdErT-ypdouH0a3_WLImmbKXcE,8523
|
|
370
371
|
orchestrator/workflows/translations/en-GB.json,sha256=ST53HxkphFLTMjFHonykDBOZ7-P_KxksktZU3GbxLt0,846
|
|
371
|
-
orchestrator_core-4.5.
|
|
372
|
-
orchestrator_core-4.5.
|
|
373
|
-
orchestrator_core-4.5.
|
|
374
|
-
orchestrator_core-4.5.
|
|
372
|
+
orchestrator_core-4.5.0a8.dist-info/licenses/LICENSE,sha256=b-aA5OZQuuBATmLKo_mln8CQrDPPhg3ghLzjPjLn4Tg,11409
|
|
373
|
+
orchestrator_core-4.5.0a8.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
374
|
+
orchestrator_core-4.5.0a8.dist-info/METADATA,sha256=kPLUhDg7J-mr72Nwo-oB9hEFmhp_xihUJ7JFWHFq64A,6252
|
|
375
|
+
orchestrator_core-4.5.0a8.dist-info/RECORD,,
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
"""Search index model for llm integration.
|
|
2
|
-
|
|
3
|
-
Revision ID: 52b37b5b2714
|
|
4
|
-
Revises: 850dccac3b02
|
|
5
|
-
Create Date: 2025-08-12 22:34:26.694750
|
|
6
|
-
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import sqlalchemy as sa
|
|
10
|
-
from alembic import op
|
|
11
|
-
from pgvector.sqlalchemy import Vector
|
|
12
|
-
from sqlalchemy.dialects import postgresql
|
|
13
|
-
from sqlalchemy_utils import LtreeType
|
|
14
|
-
|
|
15
|
-
from orchestrator.search.core.types import FieldType
|
|
16
|
-
|
|
17
|
-
# revision identifiers, used by Alembic.
|
|
18
|
-
revision = "52b37b5b2714"
|
|
19
|
-
down_revision = "850dccac3b02"
|
|
20
|
-
branch_labels = None
|
|
21
|
-
depends_on = None
|
|
22
|
-
|
|
23
|
-
TABLE = "ai_search_index"
|
|
24
|
-
IDX_EMBED_HNSW = "ix_flat_embed_hnsw"
|
|
25
|
-
IDX_PATH_GIST = "ix_flat_path_gist"
|
|
26
|
-
IDX_PATH_BTREE = "ix_flat_path_btree"
|
|
27
|
-
IDX_VALUE_TRGM = "ix_flat_value_trgm"
|
|
28
|
-
IDX_CONTENT_HASH = "idx_ai_search_index_content_hash"
|
|
29
|
-
|
|
30
|
-
TARGET_DIM = 1536
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def upgrade() -> None:
|
|
34
|
-
# Create PostgreSQL extensions
|
|
35
|
-
op.execute("CREATE EXTENSION IF NOT EXISTS ltree;")
|
|
36
|
-
op.execute("CREATE EXTENSION IF NOT EXISTS unaccent;")
|
|
37
|
-
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
|
|
38
|
-
op.execute("CREATE EXTENSION IF NOT EXISTS vector;")
|
|
39
|
-
|
|
40
|
-
# Create the ai_search_index table
|
|
41
|
-
op.create_table(
|
|
42
|
-
TABLE,
|
|
43
|
-
sa.Column("entity_type", sa.Text, nullable=False),
|
|
44
|
-
sa.Column("entity_id", postgresql.UUID, nullable=False),
|
|
45
|
-
sa.Column("path", LtreeType, nullable=False),
|
|
46
|
-
sa.Column("value", sa.Text, nullable=False),
|
|
47
|
-
sa.Column("embedding", Vector(TARGET_DIM), nullable=True),
|
|
48
|
-
sa.Column("content_hash", sa.String(64), nullable=False),
|
|
49
|
-
sa.PrimaryKeyConstraint("entity_id", "path", name="pk_ai_search_index"),
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
field_type_enum = sa.Enum(*[ft.value for ft in FieldType], name="field_type")
|
|
53
|
-
field_type_enum.create(op.get_bind(), checkfirst=True)
|
|
54
|
-
op.add_column(
|
|
55
|
-
TABLE,
|
|
56
|
-
sa.Column("value_type", field_type_enum, nullable=False, server_default=FieldType.STRING.value),
|
|
57
|
-
)
|
|
58
|
-
op.alter_column(TABLE, "value_type", server_default=None)
|
|
59
|
-
|
|
60
|
-
op.create_index(op.f("ix_ai_search_index_entity_id"), TABLE, ["entity_id"], unique=False)
|
|
61
|
-
op.create_index(IDX_CONTENT_HASH, TABLE, ["content_hash"])
|
|
62
|
-
|
|
63
|
-
op.create_index(
|
|
64
|
-
IDX_PATH_GIST,
|
|
65
|
-
TABLE,
|
|
66
|
-
["path"],
|
|
67
|
-
postgresql_using="GIST",
|
|
68
|
-
postgresql_ops={"path": "gist_ltree_ops"},
|
|
69
|
-
)
|
|
70
|
-
op.create_index(IDX_PATH_BTREE, TABLE, ["path"])
|
|
71
|
-
op.create_index(IDX_VALUE_TRGM, TABLE, ["value"], postgresql_using="GIN", postgresql_ops={"value": "gin_trgm_ops"})
|
|
72
|
-
|
|
73
|
-
op.create_index(
|
|
74
|
-
IDX_EMBED_HNSW,
|
|
75
|
-
TABLE,
|
|
76
|
-
["embedding"],
|
|
77
|
-
postgresql_using="HNSW",
|
|
78
|
-
postgresql_with={"m": 16, "ef_construction": 64},
|
|
79
|
-
postgresql_ops={"embedding": "vector_l2_ops"},
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def downgrade() -> None:
|
|
84
|
-
# Drop all indexes
|
|
85
|
-
op.drop_index(IDX_EMBED_HNSW, table_name=TABLE, if_exists=True)
|
|
86
|
-
op.drop_index(IDX_VALUE_TRGM, table_name=TABLE, if_exists=True)
|
|
87
|
-
op.drop_index(IDX_PATH_BTREE, table_name=TABLE, if_exists=True)
|
|
88
|
-
op.drop_index(IDX_PATH_GIST, table_name=TABLE, if_exists=True)
|
|
89
|
-
op.drop_index(IDX_CONTENT_HASH, table_name=TABLE, if_exists=True)
|
|
90
|
-
op.drop_index(op.f("ix_ai_search_index_entity_id"), table_name=TABLE, if_exists=True)
|
|
91
|
-
|
|
92
|
-
# Drop table and enum
|
|
93
|
-
op.drop_table(TABLE, if_exists=True)
|
|
94
|
-
field_type_enum = sa.Enum(name="field_type")
|
|
95
|
-
field_type_enum.drop(op.get_bind(), checkfirst=True)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{orchestrator_core-4.5.0a7.dist-info → orchestrator_core-4.5.0a8.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|