orchestrator-core 4.4.0rc2__py3-none-any.whl → 5.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +1 -1
- orchestrator/api/api_v1/api.py +7 -0
- orchestrator/api/api_v1/endpoints/agent.py +62 -0
- orchestrator/api/api_v1/endpoints/processes.py +6 -12
- orchestrator/api/api_v1/endpoints/search.py +197 -0
- orchestrator/api/api_v1/endpoints/subscriptions.py +0 -1
- orchestrator/app.py +4 -0
- orchestrator/cli/index_llm.py +73 -0
- orchestrator/cli/main.py +8 -1
- orchestrator/cli/resize_embedding.py +136 -0
- orchestrator/cli/scheduler.py +29 -40
- orchestrator/cli/search_explore.py +203 -0
- orchestrator/db/models.py +37 -1
- orchestrator/graphql/schema.py +0 -5
- orchestrator/graphql/schemas/process.py +2 -2
- orchestrator/graphql/utils/create_resolver_error_handler.py +1 -1
- orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
- orchestrator/schedules/__init__.py +2 -1
- orchestrator/schedules/resume_workflows.py +2 -2
- orchestrator/schedules/scheduling.py +24 -64
- orchestrator/schedules/task_vacuum.py +2 -2
- orchestrator/schedules/validate_products.py +2 -8
- orchestrator/schedules/validate_subscriptions.py +2 -2
- orchestrator/schemas/search.py +101 -0
- orchestrator/search/__init__.py +0 -0
- orchestrator/search/agent/__init__.py +1 -0
- orchestrator/search/agent/prompts.py +62 -0
- orchestrator/search/agent/state.py +8 -0
- orchestrator/search/agent/tools.py +122 -0
- orchestrator/search/core/__init__.py +0 -0
- orchestrator/search/core/embedding.py +64 -0
- orchestrator/search/core/exceptions.py +16 -0
- orchestrator/search/core/types.py +162 -0
- orchestrator/search/core/validators.py +27 -0
- orchestrator/search/docs/index.md +37 -0
- orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
- orchestrator/search/filters/__init__.py +27 -0
- orchestrator/search/filters/base.py +236 -0
- orchestrator/search/filters/date_filters.py +75 -0
- orchestrator/search/filters/definitions.py +76 -0
- orchestrator/search/filters/ltree_filters.py +31 -0
- orchestrator/search/filters/numeric_filter.py +60 -0
- orchestrator/search/indexing/__init__.py +3 -0
- orchestrator/search/indexing/indexer.py +316 -0
- orchestrator/search/indexing/registry.py +88 -0
- orchestrator/search/indexing/tasks.py +53 -0
- orchestrator/search/indexing/traverse.py +209 -0
- orchestrator/search/retrieval/__init__.py +3 -0
- orchestrator/search/retrieval/builder.py +64 -0
- orchestrator/search/retrieval/engine.py +96 -0
- orchestrator/search/retrieval/ranker.py +202 -0
- orchestrator/search/retrieval/utils.py +88 -0
- orchestrator/search/retrieval/validation.py +174 -0
- orchestrator/search/schemas/__init__.py +0 -0
- orchestrator/search/schemas/parameters.py +114 -0
- orchestrator/search/schemas/results.py +47 -0
- orchestrator/services/processes.py +11 -16
- orchestrator/services/subscriptions.py +0 -4
- orchestrator/settings.py +29 -1
- orchestrator/targets.py +0 -1
- orchestrator/workflow.py +1 -8
- orchestrator/workflows/utils.py +1 -48
- {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/METADATA +6 -3
- {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/RECORD +66 -30
- orchestrator/graphql/resolvers/scheduled_tasks.py +0 -36
- orchestrator/graphql/schemas/scheduled_task.py +0 -8
- orchestrator/schedules/scheduler.py +0 -163
- {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/licenses/LICENSE +0 -0
orchestrator/__init__.py
CHANGED
orchestrator/api/api_v1/api.py
CHANGED
|
@@ -22,6 +22,7 @@ from orchestrator.api.api_v1.endpoints import (
|
|
|
22
22
|
product_blocks,
|
|
23
23
|
products,
|
|
24
24
|
resource_types,
|
|
25
|
+
search,
|
|
25
26
|
settings,
|
|
26
27
|
subscription_customer_descriptions,
|
|
27
28
|
subscriptions,
|
|
@@ -83,3 +84,9 @@ api_router.include_router(
|
|
|
83
84
|
tags=["Core", "Translations"],
|
|
84
85
|
)
|
|
85
86
|
api_router.include_router(ws.router, prefix="/ws", tags=["Core", "Events"])
|
|
87
|
+
|
|
88
|
+
api_router.include_router(
|
|
89
|
+
search.router,
|
|
90
|
+
prefix="/search",
|
|
91
|
+
tags=["Core", "Search"],
|
|
92
|
+
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import structlog
|
|
2
|
+
from fastapi import FastAPI, HTTPException
|
|
3
|
+
from starlette.types import ASGIApp
|
|
4
|
+
|
|
5
|
+
from orchestrator.settings import app_settings
|
|
6
|
+
|
|
7
|
+
logger = structlog.get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _disabled_agent_app(reason: str) -> FastAPI:
|
|
11
|
+
app = FastAPI(title="Agent disabled")
|
|
12
|
+
|
|
13
|
+
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
|
|
14
|
+
async def _disabled(path: str) -> None:
|
|
15
|
+
raise HTTPException(status_code=503, detail=f"Agent disabled: {reason}")
|
|
16
|
+
|
|
17
|
+
return app
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def build_agent_app() -> ASGIApp:
|
|
21
|
+
if not app_settings.AGENT_MODEL or not app_settings.OPENAI_API_KEY:
|
|
22
|
+
logger.warning("Agent route disabled: missing model or OPENAI_API_KEY")
|
|
23
|
+
return _disabled_agent_app("missing configuration")
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
from pydantic_ai.ag_ui import StateDeps
|
|
27
|
+
from pydantic_ai.agent import Agent
|
|
28
|
+
from pydantic_ai.settings import ModelSettings
|
|
29
|
+
|
|
30
|
+
from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
|
|
31
|
+
from orchestrator.search.agent.state import SearchState
|
|
32
|
+
from orchestrator.search.agent.tools import search_toolset
|
|
33
|
+
except ImportError:
|
|
34
|
+
logger.error(
|
|
35
|
+
"\nRequired packages not installed:\n"
|
|
36
|
+
"WARNING: These packages are NOT compatible with the current "
|
|
37
|
+
"pydantic version in orchestrator-core.\n Upgrading pydantic to install "
|
|
38
|
+
"may cause incompatibilities or runtime errors.\n\n"
|
|
39
|
+
" pydantic-ai==0.7.0\n"
|
|
40
|
+
" ag-ui-protocol>=0.1.8\n\n"
|
|
41
|
+
"Install them locally to enable the agent:\n"
|
|
42
|
+
" pip install 'pydantic-ai==0.7.0' 'ag-ui-protocol>=0.1.8'\n"
|
|
43
|
+
)
|
|
44
|
+
logger.warning("Agent route disabled: Missing required packages")
|
|
45
|
+
return _disabled_agent_app("Missing required packages")
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
agent = Agent(
|
|
49
|
+
model=app_settings.AGENT_MODEL,
|
|
50
|
+
deps_type=StateDeps[SearchState],
|
|
51
|
+
model_settings=ModelSettings(
|
|
52
|
+
parallel_tool_calls=False
|
|
53
|
+
), # https://github.com/pydantic/pydantic-ai/issues/562
|
|
54
|
+
toolsets=[search_toolset],
|
|
55
|
+
)
|
|
56
|
+
agent.instructions(get_base_instructions)
|
|
57
|
+
agent.instructions(get_dynamic_instructions)
|
|
58
|
+
|
|
59
|
+
return agent.to_ag_ui(deps=StateDeps(SearchState()))
|
|
60
|
+
except Exception as e:
|
|
61
|
+
logger.error("Agent init failed; serving disabled stub.", error=str(e))
|
|
62
|
+
return _disabled_agent_app(str(e))
|
|
@@ -25,7 +25,7 @@ from fastapi.param_functions import Body, Depends, Header
|
|
|
25
25
|
from fastapi.routing import APIRouter
|
|
26
26
|
from fastapi.websockets import WebSocket
|
|
27
27
|
from fastapi_etag.dependency import CacheHit
|
|
28
|
-
from more_itertools import chunked,
|
|
28
|
+
from more_itertools import chunked, last
|
|
29
29
|
from sentry_sdk.tracing import trace
|
|
30
30
|
from sqlalchemy import CompoundSelect, Select, select
|
|
31
31
|
from sqlalchemy.orm import defer, joinedload
|
|
@@ -88,17 +88,11 @@ def check_global_lock() -> None:
|
|
|
88
88
|
)
|
|
89
89
|
|
|
90
90
|
|
|
91
|
-
def
|
|
92
|
-
"""Extract
|
|
93
|
-
|
|
94
|
-
For a suspended process this includes all previously completed steps as well as the current step.
|
|
95
|
-
For a completed process this includes all steps.
|
|
96
|
-
"""
|
|
97
|
-
if not (remaining_steps := pstat.log):
|
|
98
|
-
return pstat.workflow.steps
|
|
99
|
-
|
|
91
|
+
def get_current_steps(pstat: ProcessStat) -> StepList:
|
|
92
|
+
"""Extract past and current steps from the ProcessStat."""
|
|
93
|
+
remaining_steps = pstat.log
|
|
100
94
|
past_steps = pstat.workflow.steps[: -len(remaining_steps)]
|
|
101
|
-
return StepList(past_steps
|
|
95
|
+
return StepList(past_steps + [pstat.log[0]])
|
|
102
96
|
|
|
103
97
|
|
|
104
98
|
def get_auth_callbacks(steps: StepList, workflow: Workflow) -> tuple[Authorizer | None, Authorizer | None]:
|
|
@@ -206,7 +200,7 @@ def resume_process_endpoint(
|
|
|
206
200
|
raise_status(HTTPStatus.CONFLICT, f"Resuming a {process.last_status.lower()} workflow is not possible")
|
|
207
201
|
|
|
208
202
|
pstat = load_process(process)
|
|
209
|
-
auth_resume, auth_retry = get_auth_callbacks(
|
|
203
|
+
auth_resume, auth_retry = get_auth_callbacks(get_current_steps(pstat), pstat.workflow)
|
|
210
204
|
if process.last_status == ProcessStatus.SUSPENDED:
|
|
211
205
|
if auth_resume is not None and not auth_resume(user_model):
|
|
212
206
|
raise_status(HTTPStatus.FORBIDDEN, "User is not authorized to resume step")
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
from typing import Any, TypeVar, cast
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, HTTPException, Query, status
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from sqlalchemy import case, select
|
|
6
|
+
from sqlalchemy.orm import selectinload
|
|
7
|
+
|
|
8
|
+
from orchestrator.db import (
|
|
9
|
+
ProcessTable,
|
|
10
|
+
ProductTable,
|
|
11
|
+
SubscriptionTable,
|
|
12
|
+
WorkflowTable,
|
|
13
|
+
db,
|
|
14
|
+
)
|
|
15
|
+
from orchestrator.schemas.search import (
|
|
16
|
+
ConnectionSchema,
|
|
17
|
+
PageInfoSchema,
|
|
18
|
+
PathsResponse,
|
|
19
|
+
ProcessSearchSchema,
|
|
20
|
+
ProductSearchSchema,
|
|
21
|
+
SubscriptionSearchResult,
|
|
22
|
+
WorkflowSearchSchema,
|
|
23
|
+
)
|
|
24
|
+
from orchestrator.schemas.subscription import SubscriptionDomainModelSchema
|
|
25
|
+
from orchestrator.search.core.types import EntityType, FieldType, UIType
|
|
26
|
+
from orchestrator.search.filters.definitions import generate_definitions
|
|
27
|
+
from orchestrator.search.retrieval import execute_search
|
|
28
|
+
from orchestrator.search.retrieval.builder import build_paths_query, create_path_autocomplete_lquery
|
|
29
|
+
from orchestrator.search.retrieval.validation import is_lquery_syntactically_valid
|
|
30
|
+
from orchestrator.search.schemas.parameters import (
|
|
31
|
+
BaseSearchParameters,
|
|
32
|
+
ProcessSearchParameters,
|
|
33
|
+
ProductSearchParameters,
|
|
34
|
+
SubscriptionSearchParameters,
|
|
35
|
+
WorkflowSearchParameters,
|
|
36
|
+
)
|
|
37
|
+
from orchestrator.search.schemas.results import PathInfo, TypeDefinition
|
|
38
|
+
|
|
39
|
+
router = APIRouter()
|
|
40
|
+
T = TypeVar("T", bound=BaseModel)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def _perform_search_and_fetch_simple(
|
|
44
|
+
search_params: BaseSearchParameters,
|
|
45
|
+
db_model: Any,
|
|
46
|
+
response_schema: type[BaseModel],
|
|
47
|
+
pk_column_name: str,
|
|
48
|
+
eager_loads: list[Any],
|
|
49
|
+
) -> ConnectionSchema:
|
|
50
|
+
results = await execute_search(search_params=search_params, db_session=db.session, limit=20)
|
|
51
|
+
|
|
52
|
+
if not results:
|
|
53
|
+
data: dict[str, Any] = {"page_info": PageInfoSchema(), "page": []}
|
|
54
|
+
return ConnectionSchema(**cast(Any, data))
|
|
55
|
+
|
|
56
|
+
entity_ids = [res.entity_id for res in results]
|
|
57
|
+
pk_column = getattr(db_model, pk_column_name)
|
|
58
|
+
ordering_case = case({entity_id: i for i, entity_id in enumerate(entity_ids)}, value=pk_column)
|
|
59
|
+
|
|
60
|
+
stmt = select(db_model).options(*eager_loads).filter(pk_column.in_(entity_ids)).order_by(ordering_case)
|
|
61
|
+
entities = db.session.scalars(stmt).all()
|
|
62
|
+
|
|
63
|
+
page = [response_schema.model_validate(entity) for entity in entities]
|
|
64
|
+
|
|
65
|
+
data = {"page_info": PageInfoSchema(), "page": page}
|
|
66
|
+
return ConnectionSchema(**cast(Any, data))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@router.post(
|
|
70
|
+
"/subscriptions",
|
|
71
|
+
response_model=ConnectionSchema[SubscriptionSearchResult],
|
|
72
|
+
response_model_by_alias=True,
|
|
73
|
+
)
|
|
74
|
+
async def search_subscriptions(
|
|
75
|
+
search_params: SubscriptionSearchParameters,
|
|
76
|
+
) -> ConnectionSchema[SubscriptionSearchResult]:
|
|
77
|
+
search_results = await execute_search(search_params=search_params, db_session=db.session, limit=20)
|
|
78
|
+
|
|
79
|
+
if not search_results:
|
|
80
|
+
data = {"page_info": PageInfoSchema(), "page": []}
|
|
81
|
+
return ConnectionSchema(**cast(Any, data))
|
|
82
|
+
|
|
83
|
+
search_info_map = {res.entity_id: res for res in search_results}
|
|
84
|
+
entity_ids = list(search_info_map.keys())
|
|
85
|
+
|
|
86
|
+
pk_column = SubscriptionTable.subscription_id
|
|
87
|
+
ordering_case = case({entity_id: i for i, entity_id in enumerate(entity_ids)}, value=pk_column)
|
|
88
|
+
|
|
89
|
+
stmt = (
|
|
90
|
+
select(SubscriptionTable)
|
|
91
|
+
.options(
|
|
92
|
+
selectinload(SubscriptionTable.product),
|
|
93
|
+
selectinload(SubscriptionTable.customer_descriptions),
|
|
94
|
+
)
|
|
95
|
+
.filter(pk_column.in_(entity_ids))
|
|
96
|
+
.order_by(ordering_case)
|
|
97
|
+
)
|
|
98
|
+
subscriptions = db.session.scalars(stmt).all()
|
|
99
|
+
|
|
100
|
+
page = []
|
|
101
|
+
for sub in subscriptions:
|
|
102
|
+
search_data = search_info_map.get(str(sub.subscription_id))
|
|
103
|
+
if search_data:
|
|
104
|
+
subscription_model = SubscriptionDomainModelSchema.model_validate(sub)
|
|
105
|
+
|
|
106
|
+
result_item = SubscriptionSearchResult(
|
|
107
|
+
score=search_data.score,
|
|
108
|
+
highlight=search_data.highlight,
|
|
109
|
+
subscription=subscription_model.model_dump(),
|
|
110
|
+
)
|
|
111
|
+
page.append(result_item)
|
|
112
|
+
|
|
113
|
+
data = {"page_info": PageInfoSchema(), "page": page}
|
|
114
|
+
return ConnectionSchema(**cast(Any, data))
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@router.post("/workflows", response_model=ConnectionSchema[WorkflowSearchSchema], response_model_by_alias=True)
|
|
118
|
+
async def search_workflows(search_params: WorkflowSearchParameters) -> ConnectionSchema[WorkflowSearchSchema]:
|
|
119
|
+
return await _perform_search_and_fetch_simple(
|
|
120
|
+
search_params=search_params,
|
|
121
|
+
db_model=WorkflowTable,
|
|
122
|
+
response_schema=WorkflowSearchSchema,
|
|
123
|
+
pk_column_name="workflow_id",
|
|
124
|
+
eager_loads=[selectinload(WorkflowTable.products)],
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@router.post("/products", response_model=ConnectionSchema[ProductSearchSchema], response_model_by_alias=True)
|
|
129
|
+
async def search_products(search_params: ProductSearchParameters) -> ConnectionSchema[ProductSearchSchema]:
|
|
130
|
+
return await _perform_search_and_fetch_simple(
|
|
131
|
+
search_params=search_params,
|
|
132
|
+
db_model=ProductTable,
|
|
133
|
+
response_schema=ProductSearchSchema,
|
|
134
|
+
pk_column_name="product_id",
|
|
135
|
+
eager_loads=[
|
|
136
|
+
selectinload(ProductTable.workflows),
|
|
137
|
+
selectinload(ProductTable.fixed_inputs),
|
|
138
|
+
selectinload(ProductTable.product_blocks),
|
|
139
|
+
],
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@router.post("/processes", response_model=ConnectionSchema[ProcessSearchSchema], response_model_by_alias=True)
|
|
144
|
+
async def search_processes(search_params: ProcessSearchParameters) -> ConnectionSchema[ProcessSearchSchema]:
|
|
145
|
+
return await _perform_search_and_fetch_simple(
|
|
146
|
+
search_params=search_params,
|
|
147
|
+
db_model=ProcessTable,
|
|
148
|
+
response_schema=ProcessSearchSchema,
|
|
149
|
+
pk_column_name="process_id",
|
|
150
|
+
eager_loads=[
|
|
151
|
+
selectinload(ProcessTable.workflow),
|
|
152
|
+
],
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@router.get(
|
|
157
|
+
"/paths",
|
|
158
|
+
response_model=PathsResponse,
|
|
159
|
+
response_model_exclude_none=True,
|
|
160
|
+
)
|
|
161
|
+
async def list_paths(
|
|
162
|
+
prefix: str = Query("", min_length=0),
|
|
163
|
+
q: str | None = Query(None, description="Query for path suggestions"),
|
|
164
|
+
entity_type: EntityType = Query(EntityType.SUBSCRIPTION),
|
|
165
|
+
limit: int = Query(10, ge=1, le=10),
|
|
166
|
+
) -> PathsResponse:
|
|
167
|
+
if prefix:
|
|
168
|
+
lquery_pattern = create_path_autocomplete_lquery(prefix)
|
|
169
|
+
|
|
170
|
+
if not is_lquery_syntactically_valid(lquery_pattern, db.session):
|
|
171
|
+
raise HTTPException(
|
|
172
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
173
|
+
detail=f"Prefix '{prefix}' creates an invalid search pattern.",
|
|
174
|
+
)
|
|
175
|
+
stmt = build_paths_query(entity_type=entity_type, prefix=prefix, q=q)
|
|
176
|
+
stmt = stmt.limit(limit)
|
|
177
|
+
rows = db.session.execute(stmt).all()
|
|
178
|
+
|
|
179
|
+
paths = [
|
|
180
|
+
PathInfo(
|
|
181
|
+
path=str(path),
|
|
182
|
+
type=UIType.from_field_type(FieldType(value_type)),
|
|
183
|
+
)
|
|
184
|
+
for path, value_type in rows
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
return PathsResponse(prefix=prefix, paths=paths)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@router.get(
|
|
191
|
+
"/definitions",
|
|
192
|
+
response_model=dict[UIType, TypeDefinition],
|
|
193
|
+
response_model_exclude_none=True,
|
|
194
|
+
)
|
|
195
|
+
async def get_definitions() -> dict[UIType, TypeDefinition]:
|
|
196
|
+
"""Provide a static definition of operators and schemas for each UI type."""
|
|
197
|
+
return generate_definitions()
|
orchestrator/app.py
CHANGED
|
@@ -41,6 +41,7 @@ from nwastdlib.logging import ClearStructlogContextASGIMiddleware, initialise_lo
|
|
|
41
41
|
from oauth2_lib.fastapi import AuthManager, Authorization, GraphqlAuthorization, OIDCAuth
|
|
42
42
|
from orchestrator import __version__
|
|
43
43
|
from orchestrator.api.api_v1.api import api_router
|
|
44
|
+
from orchestrator.api.api_v1.endpoints.agent import build_agent_app
|
|
44
45
|
from orchestrator.api.error_handling import ProblemDetailException
|
|
45
46
|
from orchestrator.cli.main import app as cli_app
|
|
46
47
|
from orchestrator.db import db, init_database
|
|
@@ -150,6 +151,9 @@ class OrchestratorCore(FastAPI):
|
|
|
150
151
|
metrics_app = make_asgi_app(registry=ORCHESTRATOR_METRICS_REGISTRY)
|
|
151
152
|
self.mount("/api/metrics", metrics_app)
|
|
152
153
|
|
|
154
|
+
agent_app = build_agent_app()
|
|
155
|
+
self.mount("/agent", agent_app)
|
|
156
|
+
|
|
153
157
|
@self.router.get("/", response_model=str, response_class=JSONResponse, include_in_schema=False)
|
|
154
158
|
def _index() -> str:
|
|
155
159
|
return "Orchestrator Core"
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
|
|
3
|
+
from orchestrator.search.core.types import EntityType
|
|
4
|
+
from orchestrator.search.indexing import run_indexing_for_entity
|
|
5
|
+
|
|
6
|
+
app = typer.Typer(
|
|
7
|
+
name="index",
|
|
8
|
+
help="Index search indexes",
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@app.command("subscriptions")
|
|
13
|
+
def subscriptions_command(
|
|
14
|
+
subscription_id: str | None = typer.Option(None, help="UUID (default = all)"),
|
|
15
|
+
dry_run: bool = typer.Option(False, help="No DB writes"),
|
|
16
|
+
force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
|
|
17
|
+
) -> None:
|
|
18
|
+
"""Index subscription_search_index."""
|
|
19
|
+
run_indexing_for_entity(
|
|
20
|
+
entity_kind=EntityType.SUBSCRIPTION,
|
|
21
|
+
entity_id=subscription_id,
|
|
22
|
+
dry_run=dry_run,
|
|
23
|
+
force_index=force_index,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.command("products")
|
|
28
|
+
def products_command(
|
|
29
|
+
product_id: str | None = typer.Option(None, help="UUID (default = all)"),
|
|
30
|
+
dry_run: bool = typer.Option(False, help="No DB writes"),
|
|
31
|
+
force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
|
|
32
|
+
) -> None:
|
|
33
|
+
"""Index product_search_index."""
|
|
34
|
+
run_indexing_for_entity(
|
|
35
|
+
entity_kind=EntityType.PRODUCT,
|
|
36
|
+
entity_id=product_id,
|
|
37
|
+
dry_run=dry_run,
|
|
38
|
+
force_index=force_index,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@app.command("processes")
|
|
43
|
+
def processes_command(
|
|
44
|
+
process_id: str | None = typer.Option(None, help="UUID (default = all)"),
|
|
45
|
+
dry_run: bool = typer.Option(False, help="No DB writes"),
|
|
46
|
+
force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
|
|
47
|
+
) -> None:
|
|
48
|
+
"""Index process_search_index."""
|
|
49
|
+
run_indexing_for_entity(
|
|
50
|
+
entity_kind=EntityType.PROCESS,
|
|
51
|
+
entity_id=process_id,
|
|
52
|
+
dry_run=dry_run,
|
|
53
|
+
force_index=force_index,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@app.command("workflows")
|
|
58
|
+
def workflows_command(
|
|
59
|
+
workflow_id: str | None = typer.Option(None, help="UUID (default = all)"),
|
|
60
|
+
dry_run: bool = typer.Option(False, help="No DB writes"),
|
|
61
|
+
force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
|
|
62
|
+
) -> None:
|
|
63
|
+
"""Index workflow_search_index."""
|
|
64
|
+
run_indexing_for_entity(
|
|
65
|
+
entity_kind=EntityType.WORKFLOW,
|
|
66
|
+
entity_id=workflow_id,
|
|
67
|
+
dry_run=dry_run,
|
|
68
|
+
force_index=force_index,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
if __name__ == "__main__":
|
|
73
|
+
app()
|
orchestrator/cli/main.py
CHANGED
|
@@ -13,12 +13,19 @@
|
|
|
13
13
|
|
|
14
14
|
import typer
|
|
15
15
|
|
|
16
|
-
from orchestrator.cli import database, generate, scheduler
|
|
16
|
+
from orchestrator.cli import database, generate, index_llm, resize_embedding, scheduler, search_explore
|
|
17
17
|
|
|
18
18
|
app = typer.Typer()
|
|
19
19
|
app.add_typer(scheduler.app, name="scheduler", help="Access all the scheduler functions")
|
|
20
20
|
app.add_typer(database.app, name="db", help="Interact with the application database")
|
|
21
21
|
app.add_typer(generate.app, name="generate", help="Generate products, workflows and other artifacts")
|
|
22
|
+
app.add_typer(index_llm.app, name="index", help="(Re-)Index the search table.")
|
|
23
|
+
app.add_typer(search_explore.app, name="search", help="Try out different search types.")
|
|
24
|
+
app.add_typer(
|
|
25
|
+
resize_embedding.app,
|
|
26
|
+
name="embedding",
|
|
27
|
+
help="Resize the vector dimension of the embedding column in the search table.",
|
|
28
|
+
)
|
|
22
29
|
|
|
23
30
|
|
|
24
31
|
if __name__ == "__main__":
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
import structlog
|
|
3
|
+
from sqlalchemy import text
|
|
4
|
+
from sqlalchemy.exc import SQLAlchemyError
|
|
5
|
+
from settings import app_settings
|
|
6
|
+
from orchestrator.db import db
|
|
7
|
+
from orchestrator.db.models import AiSearchIndex
|
|
8
|
+
|
|
9
|
+
logger = structlog.get_logger(__name__)
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(
|
|
12
|
+
name="embedding",
|
|
13
|
+
help="Resize vector dimensions of the embeddings.",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_current_embedding_dimension() -> int | None:
|
|
18
|
+
"""Get the current dimension of the embedding column from ai_search_index table.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Current dimension size or None if no records exist or column doesn't exist
|
|
22
|
+
"""
|
|
23
|
+
try:
|
|
24
|
+
query = text(
|
|
25
|
+
"""
|
|
26
|
+
SELECT vector_dims(embedding) as dimension
|
|
27
|
+
FROM ai_search_index
|
|
28
|
+
WHERE embedding IS NOT NULL
|
|
29
|
+
LIMIT 1
|
|
30
|
+
"""
|
|
31
|
+
)
|
|
32
|
+
result = db.session.execute(query).fetchone()
|
|
33
|
+
if result and result[0]:
|
|
34
|
+
return result[0]
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
except SQLAlchemyError as e:
|
|
38
|
+
logger.error("Failed to get current embedding dimension", error=str(e))
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def drop_all_embeddings() -> int:
|
|
43
|
+
"""Drop all records from the ai_search_index table.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Number of records deleted
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
result = db.session.query(AiSearchIndex).delete()
|
|
50
|
+
db.session.commit()
|
|
51
|
+
logger.info(f"Deleted {result} records from ai_search_index")
|
|
52
|
+
return result
|
|
53
|
+
|
|
54
|
+
except SQLAlchemyError as e:
|
|
55
|
+
db.session.rollback()
|
|
56
|
+
logger.error("Failed to drop embeddings records", error=str(e))
|
|
57
|
+
raise
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def alter_embedding_column_dimension(new_dimension: int) -> None:
|
|
61
|
+
"""Alter the embedding column to use the new dimension size.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
new_dimension: New vector dimension size
|
|
65
|
+
"""
|
|
66
|
+
try:
|
|
67
|
+
drop_query = text("ALTER TABLE ai_search_index DROP COLUMN IF EXISTS embedding")
|
|
68
|
+
db.session.execute(drop_query)
|
|
69
|
+
|
|
70
|
+
add_query = text(f"ALTER TABLE ai_search_index ADD COLUMN embedding vector({new_dimension})")
|
|
71
|
+
db.session.execute(add_query)
|
|
72
|
+
|
|
73
|
+
db.session.commit()
|
|
74
|
+
logger.info(f"Altered embedding column to dimension {new_dimension}")
|
|
75
|
+
|
|
76
|
+
except SQLAlchemyError as e:
|
|
77
|
+
db.session.rollback()
|
|
78
|
+
logger.error("Failed to alter embedding column dimension", error=str(e))
|
|
79
|
+
raise
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@app.command("resize")
|
|
83
|
+
def resize_embeddings_command() -> None:
|
|
84
|
+
"""Resize vector dimensions of the ai_search_index embedding column.
|
|
85
|
+
|
|
86
|
+
Compares the current embedding dimension in the database with the configured
|
|
87
|
+
dimension in app_settings. If they differ, drops all records and alters the
|
|
88
|
+
column to match the new dimension.
|
|
89
|
+
"""
|
|
90
|
+
new_dimension = app_settings.EMBEDDING_DIMENSION
|
|
91
|
+
|
|
92
|
+
logger.info("Starting embedding dimension resize", new_dimension=new_dimension)
|
|
93
|
+
|
|
94
|
+
current_dimension = get_current_embedding_dimension()
|
|
95
|
+
|
|
96
|
+
if current_dimension is None:
|
|
97
|
+
logger.warning("Could not determine current dimension for embedding column")
|
|
98
|
+
|
|
99
|
+
if current_dimension == new_dimension:
|
|
100
|
+
logger.info(
|
|
101
|
+
"Embedding dimensions match, no resize needed",
|
|
102
|
+
current_dimension=current_dimension,
|
|
103
|
+
new_dimension=new_dimension,
|
|
104
|
+
)
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
logger.info("Dimension mismatch detected", current_dimension=current_dimension, new_dimension=new_dimension)
|
|
108
|
+
|
|
109
|
+
if not typer.confirm(
|
|
110
|
+
f"This will DELETE ALL RECORDS from ai_search_index and alter the embedding column. Continue?"
|
|
111
|
+
):
|
|
112
|
+
logger.info("Operation cancelled by user")
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
# Drop all records first.
|
|
117
|
+
logger.info("Dropping all embedding records...")
|
|
118
|
+
deleted_count = drop_all_embeddings()
|
|
119
|
+
|
|
120
|
+
# Then alter column dimension.
|
|
121
|
+
logger.info(f"Altering embedding column to dimension {new_dimension}...")
|
|
122
|
+
alter_embedding_column_dimension(new_dimension)
|
|
123
|
+
|
|
124
|
+
logger.info(
|
|
125
|
+
"Embedding dimension resize completed successfully",
|
|
126
|
+
records_deleted=deleted_count,
|
|
127
|
+
new_dimension=new_dimension,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
logger.error("Embedding dimension resize failed", error=str(e))
|
|
132
|
+
raise typer.Exit(1)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
if __name__ == "__main__":
|
|
136
|
+
app()
|
orchestrator/cli/scheduler.py
CHANGED
|
@@ -13,11 +13,12 @@
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
|
+
from time import sleep
|
|
16
17
|
|
|
18
|
+
import schedule
|
|
17
19
|
import typer
|
|
18
|
-
from apscheduler.schedulers.blocking import BlockingScheduler
|
|
19
20
|
|
|
20
|
-
from orchestrator.schedules
|
|
21
|
+
from orchestrator.schedules import ALL_SCHEDULERS
|
|
21
22
|
|
|
22
23
|
log = logging.getLogger(__name__)
|
|
23
24
|
|
|
@@ -26,48 +27,36 @@ app: typer.Typer = typer.Typer()
|
|
|
26
27
|
|
|
27
28
|
@app.command()
|
|
28
29
|
def run() -> None:
|
|
29
|
-
"""
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
30
|
+
"""Loop eternally and run schedulers at configured times."""
|
|
31
|
+
for s in ALL_SCHEDULERS:
|
|
32
|
+
job = getattr(schedule.every(s.period), s.time_unit)
|
|
33
|
+
if s.at:
|
|
34
|
+
job = job.at(s.at)
|
|
35
|
+
job.do(s).tag(s.name)
|
|
36
|
+
log.info("Starting Schedule")
|
|
37
|
+
for j in schedule.jobs:
|
|
38
|
+
log.info("%s: %s", ", ".join(j.tags), j)
|
|
39
|
+
while True:
|
|
40
|
+
schedule.run_pending()
|
|
41
|
+
idle = schedule.idle_seconds()
|
|
42
|
+
if idle < 0:
|
|
43
|
+
log.info("Next job in queue is scheduled in the past, run it now.")
|
|
44
|
+
else:
|
|
45
|
+
log.info("Sleeping for %d seconds", idle)
|
|
46
|
+
sleep(idle)
|
|
42
47
|
|
|
43
48
|
|
|
44
49
|
@app.command()
|
|
45
50
|
def show_schedule() -> None:
|
|
46
|
-
"""Show the currently configured schedule.
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
with get_paused_scheduler() as scheduler:
|
|
51
|
-
jobs = scheduler.get_jobs()
|
|
52
|
-
|
|
53
|
-
for job in jobs:
|
|
54
|
-
typer.echo(f"[{job.id}] Next run: {job.next_run_time} | Trigger: {job.trigger}")
|
|
51
|
+
"""Show the currently configured schedule."""
|
|
52
|
+
for s in ALL_SCHEDULERS:
|
|
53
|
+
at_str = f"@ {s.at} " if s.at else ""
|
|
54
|
+
typer.echo(f"{s.name}: {s.__name__} {at_str}every {s.period} {s.time_unit}")
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
@app.command()
|
|
58
|
-
def force(
|
|
59
|
-
"""Force the execution of (a) scheduler(s) based on a
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
if not job:
|
|
64
|
-
typer.echo(f"Job '{job_id}' not found.")
|
|
65
|
-
raise typer.Exit(code=1)
|
|
66
|
-
|
|
67
|
-
typer.echo(f"Running job [{job.id}] now...")
|
|
68
|
-
try:
|
|
69
|
-
job.func(*job.args or (), **job.kwargs or {})
|
|
70
|
-
typer.echo("Job executed successfully.")
|
|
71
|
-
except Exception as e:
|
|
72
|
-
typer.echo(f"Job execution failed: {e}")
|
|
73
|
-
raise typer.Exit(code=1)
|
|
58
|
+
def force(keyword: str) -> None:
|
|
59
|
+
"""Force the execution of (a) scheduler(s) based on a keyword."""
|
|
60
|
+
for s in ALL_SCHEDULERS:
|
|
61
|
+
if keyword in s.name or keyword in s.__name__:
|
|
62
|
+
s()
|