orchestrator-core 4.4.0rc2__py3-none-any.whl → 5.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. orchestrator/__init__.py +1 -1
  2. orchestrator/api/api_v1/api.py +7 -0
  3. orchestrator/api/api_v1/endpoints/agent.py +62 -0
  4. orchestrator/api/api_v1/endpoints/processes.py +6 -12
  5. orchestrator/api/api_v1/endpoints/search.py +197 -0
  6. orchestrator/api/api_v1/endpoints/subscriptions.py +0 -1
  7. orchestrator/app.py +4 -0
  8. orchestrator/cli/index_llm.py +73 -0
  9. orchestrator/cli/main.py +8 -1
  10. orchestrator/cli/resize_embedding.py +136 -0
  11. orchestrator/cli/scheduler.py +29 -40
  12. orchestrator/cli/search_explore.py +203 -0
  13. orchestrator/db/models.py +37 -1
  14. orchestrator/graphql/schema.py +0 -5
  15. orchestrator/graphql/schemas/process.py +2 -2
  16. orchestrator/graphql/utils/create_resolver_error_handler.py +1 -1
  17. orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
  18. orchestrator/schedules/__init__.py +2 -1
  19. orchestrator/schedules/resume_workflows.py +2 -2
  20. orchestrator/schedules/scheduling.py +24 -64
  21. orchestrator/schedules/task_vacuum.py +2 -2
  22. orchestrator/schedules/validate_products.py +2 -8
  23. orchestrator/schedules/validate_subscriptions.py +2 -2
  24. orchestrator/schemas/search.py +101 -0
  25. orchestrator/search/__init__.py +0 -0
  26. orchestrator/search/agent/__init__.py +1 -0
  27. orchestrator/search/agent/prompts.py +62 -0
  28. orchestrator/search/agent/state.py +8 -0
  29. orchestrator/search/agent/tools.py +122 -0
  30. orchestrator/search/core/__init__.py +0 -0
  31. orchestrator/search/core/embedding.py +64 -0
  32. orchestrator/search/core/exceptions.py +16 -0
  33. orchestrator/search/core/types.py +162 -0
  34. orchestrator/search/core/validators.py +27 -0
  35. orchestrator/search/docs/index.md +37 -0
  36. orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
  37. orchestrator/search/filters/__init__.py +27 -0
  38. orchestrator/search/filters/base.py +236 -0
  39. orchestrator/search/filters/date_filters.py +75 -0
  40. orchestrator/search/filters/definitions.py +76 -0
  41. orchestrator/search/filters/ltree_filters.py +31 -0
  42. orchestrator/search/filters/numeric_filter.py +60 -0
  43. orchestrator/search/indexing/__init__.py +3 -0
  44. orchestrator/search/indexing/indexer.py +316 -0
  45. orchestrator/search/indexing/registry.py +88 -0
  46. orchestrator/search/indexing/tasks.py +53 -0
  47. orchestrator/search/indexing/traverse.py +209 -0
  48. orchestrator/search/retrieval/__init__.py +3 -0
  49. orchestrator/search/retrieval/builder.py +64 -0
  50. orchestrator/search/retrieval/engine.py +96 -0
  51. orchestrator/search/retrieval/ranker.py +202 -0
  52. orchestrator/search/retrieval/utils.py +88 -0
  53. orchestrator/search/retrieval/validation.py +174 -0
  54. orchestrator/search/schemas/__init__.py +0 -0
  55. orchestrator/search/schemas/parameters.py +114 -0
  56. orchestrator/search/schemas/results.py +47 -0
  57. orchestrator/services/processes.py +11 -16
  58. orchestrator/services/subscriptions.py +0 -4
  59. orchestrator/settings.py +29 -1
  60. orchestrator/targets.py +0 -1
  61. orchestrator/workflow.py +1 -8
  62. orchestrator/workflows/utils.py +1 -48
  63. {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/METADATA +6 -3
  64. {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/RECORD +66 -30
  65. orchestrator/graphql/resolvers/scheduled_tasks.py +0 -36
  66. orchestrator/graphql/schemas/scheduled_task.py +0 -8
  67. orchestrator/schedules/scheduler.py +0 -163
  68. {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/WHEEL +0 -0
  69. {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/licenses/LICENSE +0 -0
orchestrator/__init__.py CHANGED
@@ -13,7 +13,7 @@
13
13
 
14
14
  """This is the orchestrator workflow engine."""
15
15
 
16
- __version__ = "4.4.0rc2"
16
+ __version__ = "5.0.0a1"
17
17
 
18
18
  from orchestrator.app import OrchestratorCore
19
19
  from orchestrator.settings import app_settings
@@ -22,6 +22,7 @@ from orchestrator.api.api_v1.endpoints import (
22
22
  product_blocks,
23
23
  products,
24
24
  resource_types,
25
+ search,
25
26
  settings,
26
27
  subscription_customer_descriptions,
27
28
  subscriptions,
@@ -83,3 +84,9 @@ api_router.include_router(
83
84
  tags=["Core", "Translations"],
84
85
  )
85
86
  api_router.include_router(ws.router, prefix="/ws", tags=["Core", "Events"])
87
+
88
+ api_router.include_router(
89
+ search.router,
90
+ prefix="/search",
91
+ tags=["Core", "Search"],
92
+ )
@@ -0,0 +1,62 @@
1
+ import structlog
2
+ from fastapi import FastAPI, HTTPException
3
+ from starlette.types import ASGIApp
4
+
5
+ from orchestrator.settings import app_settings
6
+
7
+ logger = structlog.get_logger(__name__)
8
+
9
+
10
+ def _disabled_agent_app(reason: str) -> FastAPI:
11
+ app = FastAPI(title="Agent disabled")
12
+
13
+ @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
14
+ async def _disabled(path: str) -> None:
15
+ raise HTTPException(status_code=503, detail=f"Agent disabled: {reason}")
16
+
17
+ return app
18
+
19
+
20
+ def build_agent_app() -> ASGIApp:
21
+ if not app_settings.AGENT_MODEL or not app_settings.OPENAI_API_KEY:
22
+ logger.warning("Agent route disabled: missing model or OPENAI_API_KEY")
23
+ return _disabled_agent_app("missing configuration")
24
+
25
+ try:
26
+ from pydantic_ai.ag_ui import StateDeps
27
+ from pydantic_ai.agent import Agent
28
+ from pydantic_ai.settings import ModelSettings
29
+
30
+ from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
31
+ from orchestrator.search.agent.state import SearchState
32
+ from orchestrator.search.agent.tools import search_toolset
33
+ except ImportError:
34
+ logger.error(
35
+ "\nRequired packages not installed:\n"
36
+ "WARNING: These packages are NOT compatible with the current "
37
+ "pydantic version in orchestrator-core.\n Upgrading pydantic to install "
38
+ "may cause incompatibilities or runtime errors.\n\n"
39
+ " pydantic-ai==0.7.0\n"
40
+ " ag-ui-protocol>=0.1.8\n\n"
41
+ "Install them locally to enable the agent:\n"
42
+ " pip install 'pydantic-ai==0.7.0' 'ag-ui-protocol>=0.1.8'\n"
43
+ )
44
+ logger.warning("Agent route disabled: Missing required packages")
45
+ return _disabled_agent_app("Missing required packages")
46
+
47
+ try:
48
+ agent = Agent(
49
+ model=app_settings.AGENT_MODEL,
50
+ deps_type=StateDeps[SearchState],
51
+ model_settings=ModelSettings(
52
+ parallel_tool_calls=False
53
+ ), # https://github.com/pydantic/pydantic-ai/issues/562
54
+ toolsets=[search_toolset],
55
+ )
56
+ agent.instructions(get_base_instructions)
57
+ agent.instructions(get_dynamic_instructions)
58
+
59
+ return agent.to_ag_ui(deps=StateDeps(SearchState()))
60
+ except Exception as e:
61
+ logger.error("Agent init failed; serving disabled stub.", error=str(e))
62
+ return _disabled_agent_app(str(e))
@@ -25,7 +25,7 @@ from fastapi.param_functions import Body, Depends, Header
25
25
  from fastapi.routing import APIRouter
26
26
  from fastapi.websockets import WebSocket
27
27
  from fastapi_etag.dependency import CacheHit
28
- from more_itertools import chunked, first, last
28
+ from more_itertools import chunked, last
29
29
  from sentry_sdk.tracing import trace
30
30
  from sqlalchemy import CompoundSelect, Select, select
31
31
  from sqlalchemy.orm import defer, joinedload
@@ -88,17 +88,11 @@ def check_global_lock() -> None:
88
88
  )
89
89
 
90
90
 
91
- def get_steps_to_evaluate_for_rbac(pstat: ProcessStat) -> StepList:
92
- """Extract all steps from the ProcessStat for a process that should be evaluated for a RBAC callback.
93
-
94
- For a suspended process this includes all previously completed steps as well as the current step.
95
- For a completed process this includes all steps.
96
- """
97
- if not (remaining_steps := pstat.log):
98
- return pstat.workflow.steps
99
-
91
+ def get_current_steps(pstat: ProcessStat) -> StepList:
92
+ """Extract past and current steps from the ProcessStat."""
93
+ remaining_steps = pstat.log
100
94
  past_steps = pstat.workflow.steps[: -len(remaining_steps)]
101
- return StepList(past_steps >> first(remaining_steps))
95
+ return StepList(past_steps + [pstat.log[0]])
102
96
 
103
97
 
104
98
  def get_auth_callbacks(steps: StepList, workflow: Workflow) -> tuple[Authorizer | None, Authorizer | None]:
@@ -206,7 +200,7 @@ def resume_process_endpoint(
206
200
  raise_status(HTTPStatus.CONFLICT, f"Resuming a {process.last_status.lower()} workflow is not possible")
207
201
 
208
202
  pstat = load_process(process)
209
- auth_resume, auth_retry = get_auth_callbacks(get_steps_to_evaluate_for_rbac(pstat), pstat.workflow)
203
+ auth_resume, auth_retry = get_auth_callbacks(get_current_steps(pstat), pstat.workflow)
210
204
  if process.last_status == ProcessStatus.SUSPENDED:
211
205
  if auth_resume is not None and not auth_resume(user_model):
212
206
  raise_status(HTTPStatus.FORBIDDEN, "User is not authorized to resume step")
@@ -0,0 +1,197 @@
1
+ from typing import Any, TypeVar, cast
2
+
3
+ from fastapi import APIRouter, HTTPException, Query, status
4
+ from pydantic import BaseModel
5
+ from sqlalchemy import case, select
6
+ from sqlalchemy.orm import selectinload
7
+
8
+ from orchestrator.db import (
9
+ ProcessTable,
10
+ ProductTable,
11
+ SubscriptionTable,
12
+ WorkflowTable,
13
+ db,
14
+ )
15
+ from orchestrator.schemas.search import (
16
+ ConnectionSchema,
17
+ PageInfoSchema,
18
+ PathsResponse,
19
+ ProcessSearchSchema,
20
+ ProductSearchSchema,
21
+ SubscriptionSearchResult,
22
+ WorkflowSearchSchema,
23
+ )
24
+ from orchestrator.schemas.subscription import SubscriptionDomainModelSchema
25
+ from orchestrator.search.core.types import EntityType, FieldType, UIType
26
+ from orchestrator.search.filters.definitions import generate_definitions
27
+ from orchestrator.search.retrieval import execute_search
28
+ from orchestrator.search.retrieval.builder import build_paths_query, create_path_autocomplete_lquery
29
+ from orchestrator.search.retrieval.validation import is_lquery_syntactically_valid
30
+ from orchestrator.search.schemas.parameters import (
31
+ BaseSearchParameters,
32
+ ProcessSearchParameters,
33
+ ProductSearchParameters,
34
+ SubscriptionSearchParameters,
35
+ WorkflowSearchParameters,
36
+ )
37
+ from orchestrator.search.schemas.results import PathInfo, TypeDefinition
38
+
39
+ router = APIRouter()
40
+ T = TypeVar("T", bound=BaseModel)
41
+
42
+
43
+ async def _perform_search_and_fetch_simple(
44
+ search_params: BaseSearchParameters,
45
+ db_model: Any,
46
+ response_schema: type[BaseModel],
47
+ pk_column_name: str,
48
+ eager_loads: list[Any],
49
+ ) -> ConnectionSchema:
50
+ results = await execute_search(search_params=search_params, db_session=db.session, limit=20)
51
+
52
+ if not results:
53
+ data: dict[str, Any] = {"page_info": PageInfoSchema(), "page": []}
54
+ return ConnectionSchema(**cast(Any, data))
55
+
56
+ entity_ids = [res.entity_id for res in results]
57
+ pk_column = getattr(db_model, pk_column_name)
58
+ ordering_case = case({entity_id: i for i, entity_id in enumerate(entity_ids)}, value=pk_column)
59
+
60
+ stmt = select(db_model).options(*eager_loads).filter(pk_column.in_(entity_ids)).order_by(ordering_case)
61
+ entities = db.session.scalars(stmt).all()
62
+
63
+ page = [response_schema.model_validate(entity) for entity in entities]
64
+
65
+ data = {"page_info": PageInfoSchema(), "page": page}
66
+ return ConnectionSchema(**cast(Any, data))
67
+
68
+
69
+ @router.post(
70
+ "/subscriptions",
71
+ response_model=ConnectionSchema[SubscriptionSearchResult],
72
+ response_model_by_alias=True,
73
+ )
74
+ async def search_subscriptions(
75
+ search_params: SubscriptionSearchParameters,
76
+ ) -> ConnectionSchema[SubscriptionSearchResult]:
77
+ search_results = await execute_search(search_params=search_params, db_session=db.session, limit=20)
78
+
79
+ if not search_results:
80
+ data = {"page_info": PageInfoSchema(), "page": []}
81
+ return ConnectionSchema(**cast(Any, data))
82
+
83
+ search_info_map = {res.entity_id: res for res in search_results}
84
+ entity_ids = list(search_info_map.keys())
85
+
86
+ pk_column = SubscriptionTable.subscription_id
87
+ ordering_case = case({entity_id: i for i, entity_id in enumerate(entity_ids)}, value=pk_column)
88
+
89
+ stmt = (
90
+ select(SubscriptionTable)
91
+ .options(
92
+ selectinload(SubscriptionTable.product),
93
+ selectinload(SubscriptionTable.customer_descriptions),
94
+ )
95
+ .filter(pk_column.in_(entity_ids))
96
+ .order_by(ordering_case)
97
+ )
98
+ subscriptions = db.session.scalars(stmt).all()
99
+
100
+ page = []
101
+ for sub in subscriptions:
102
+ search_data = search_info_map.get(str(sub.subscription_id))
103
+ if search_data:
104
+ subscription_model = SubscriptionDomainModelSchema.model_validate(sub)
105
+
106
+ result_item = SubscriptionSearchResult(
107
+ score=search_data.score,
108
+ highlight=search_data.highlight,
109
+ subscription=subscription_model.model_dump(),
110
+ )
111
+ page.append(result_item)
112
+
113
+ data = {"page_info": PageInfoSchema(), "page": page}
114
+ return ConnectionSchema(**cast(Any, data))
115
+
116
+
117
+ @router.post("/workflows", response_model=ConnectionSchema[WorkflowSearchSchema], response_model_by_alias=True)
118
+ async def search_workflows(search_params: WorkflowSearchParameters) -> ConnectionSchema[WorkflowSearchSchema]:
119
+ return await _perform_search_and_fetch_simple(
120
+ search_params=search_params,
121
+ db_model=WorkflowTable,
122
+ response_schema=WorkflowSearchSchema,
123
+ pk_column_name="workflow_id",
124
+ eager_loads=[selectinload(WorkflowTable.products)],
125
+ )
126
+
127
+
128
+ @router.post("/products", response_model=ConnectionSchema[ProductSearchSchema], response_model_by_alias=True)
129
+ async def search_products(search_params: ProductSearchParameters) -> ConnectionSchema[ProductSearchSchema]:
130
+ return await _perform_search_and_fetch_simple(
131
+ search_params=search_params,
132
+ db_model=ProductTable,
133
+ response_schema=ProductSearchSchema,
134
+ pk_column_name="product_id",
135
+ eager_loads=[
136
+ selectinload(ProductTable.workflows),
137
+ selectinload(ProductTable.fixed_inputs),
138
+ selectinload(ProductTable.product_blocks),
139
+ ],
140
+ )
141
+
142
+
143
+ @router.post("/processes", response_model=ConnectionSchema[ProcessSearchSchema], response_model_by_alias=True)
144
+ async def search_processes(search_params: ProcessSearchParameters) -> ConnectionSchema[ProcessSearchSchema]:
145
+ return await _perform_search_and_fetch_simple(
146
+ search_params=search_params,
147
+ db_model=ProcessTable,
148
+ response_schema=ProcessSearchSchema,
149
+ pk_column_name="process_id",
150
+ eager_loads=[
151
+ selectinload(ProcessTable.workflow),
152
+ ],
153
+ )
154
+
155
+
156
+ @router.get(
157
+ "/paths",
158
+ response_model=PathsResponse,
159
+ response_model_exclude_none=True,
160
+ )
161
+ async def list_paths(
162
+ prefix: str = Query("", min_length=0),
163
+ q: str | None = Query(None, description="Query for path suggestions"),
164
+ entity_type: EntityType = Query(EntityType.SUBSCRIPTION),
165
+ limit: int = Query(10, ge=1, le=10),
166
+ ) -> PathsResponse:
167
+ if prefix:
168
+ lquery_pattern = create_path_autocomplete_lquery(prefix)
169
+
170
+ if not is_lquery_syntactically_valid(lquery_pattern, db.session):
171
+ raise HTTPException(
172
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
173
+ detail=f"Prefix '{prefix}' creates an invalid search pattern.",
174
+ )
175
+ stmt = build_paths_query(entity_type=entity_type, prefix=prefix, q=q)
176
+ stmt = stmt.limit(limit)
177
+ rows = db.session.execute(stmt).all()
178
+
179
+ paths = [
180
+ PathInfo(
181
+ path=str(path),
182
+ type=UIType.from_field_type(FieldType(value_type)),
183
+ )
184
+ for path, value_type in rows
185
+ ]
186
+
187
+ return PathsResponse(prefix=prefix, paths=paths)
188
+
189
+
190
+ @router.get(
191
+ "/definitions",
192
+ response_model=dict[UIType, TypeDefinition],
193
+ response_model_exclude_none=True,
194
+ )
195
+ async def get_definitions() -> dict[UIType, TypeDefinition]:
196
+ """Provide a static definition of operators and schemas for each UI type."""
197
+ return generate_definitions()
@@ -12,7 +12,6 @@
12
12
  # limitations under the License.
13
13
 
14
14
  """Module that implements subscription related API endpoints."""
15
-
16
15
  from http import HTTPStatus
17
16
  from typing import Any
18
17
  from uuid import UUID
orchestrator/app.py CHANGED
@@ -41,6 +41,7 @@ from nwastdlib.logging import ClearStructlogContextASGIMiddleware, initialise_lo
41
41
  from oauth2_lib.fastapi import AuthManager, Authorization, GraphqlAuthorization, OIDCAuth
42
42
  from orchestrator import __version__
43
43
  from orchestrator.api.api_v1.api import api_router
44
+ from orchestrator.api.api_v1.endpoints.agent import build_agent_app
44
45
  from orchestrator.api.error_handling import ProblemDetailException
45
46
  from orchestrator.cli.main import app as cli_app
46
47
  from orchestrator.db import db, init_database
@@ -150,6 +151,9 @@ class OrchestratorCore(FastAPI):
150
151
  metrics_app = make_asgi_app(registry=ORCHESTRATOR_METRICS_REGISTRY)
151
152
  self.mount("/api/metrics", metrics_app)
152
153
 
154
+ agent_app = build_agent_app()
155
+ self.mount("/agent", agent_app)
156
+
153
157
  @self.router.get("/", response_model=str, response_class=JSONResponse, include_in_schema=False)
154
158
  def _index() -> str:
155
159
  return "Orchestrator Core"
@@ -0,0 +1,73 @@
1
+ import typer
2
+
3
+ from orchestrator.search.core.types import EntityType
4
+ from orchestrator.search.indexing import run_indexing_for_entity
5
+
6
+ app = typer.Typer(
7
+ name="index",
8
+ help="Index search indexes",
9
+ )
10
+
11
+
12
+ @app.command("subscriptions")
13
+ def subscriptions_command(
14
+ subscription_id: str | None = typer.Option(None, help="UUID (default = all)"),
15
+ dry_run: bool = typer.Option(False, help="No DB writes"),
16
+ force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
17
+ ) -> None:
18
+ """Index subscription_search_index."""
19
+ run_indexing_for_entity(
20
+ entity_kind=EntityType.SUBSCRIPTION,
21
+ entity_id=subscription_id,
22
+ dry_run=dry_run,
23
+ force_index=force_index,
24
+ )
25
+
26
+
27
+ @app.command("products")
28
+ def products_command(
29
+ product_id: str | None = typer.Option(None, help="UUID (default = all)"),
30
+ dry_run: bool = typer.Option(False, help="No DB writes"),
31
+ force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
32
+ ) -> None:
33
+ """Index product_search_index."""
34
+ run_indexing_for_entity(
35
+ entity_kind=EntityType.PRODUCT,
36
+ entity_id=product_id,
37
+ dry_run=dry_run,
38
+ force_index=force_index,
39
+ )
40
+
41
+
42
+ @app.command("processes")
43
+ def processes_command(
44
+ process_id: str | None = typer.Option(None, help="UUID (default = all)"),
45
+ dry_run: bool = typer.Option(False, help="No DB writes"),
46
+ force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
47
+ ) -> None:
48
+ """Index process_search_index."""
49
+ run_indexing_for_entity(
50
+ entity_kind=EntityType.PROCESS,
51
+ entity_id=process_id,
52
+ dry_run=dry_run,
53
+ force_index=force_index,
54
+ )
55
+
56
+
57
+ @app.command("workflows")
58
+ def workflows_command(
59
+ workflow_id: str | None = typer.Option(None, help="UUID (default = all)"),
60
+ dry_run: bool = typer.Option(False, help="No DB writes"),
61
+ force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
62
+ ) -> None:
63
+ """Index workflow_search_index."""
64
+ run_indexing_for_entity(
65
+ entity_kind=EntityType.WORKFLOW,
66
+ entity_id=workflow_id,
67
+ dry_run=dry_run,
68
+ force_index=force_index,
69
+ )
70
+
71
+
72
+ if __name__ == "__main__":
73
+ app()
orchestrator/cli/main.py CHANGED
@@ -13,12 +13,19 @@
13
13
 
14
14
  import typer
15
15
 
16
- from orchestrator.cli import database, generate, scheduler
16
+ from orchestrator.cli import database, generate, index_llm, resize_embedding, scheduler, search_explore
17
17
 
18
18
  app = typer.Typer()
19
19
  app.add_typer(scheduler.app, name="scheduler", help="Access all the scheduler functions")
20
20
  app.add_typer(database.app, name="db", help="Interact with the application database")
21
21
  app.add_typer(generate.app, name="generate", help="Generate products, workflows and other artifacts")
22
+ app.add_typer(index_llm.app, name="index", help="(Re-)Index the search table.")
23
+ app.add_typer(search_explore.app, name="search", help="Try out different search types.")
24
+ app.add_typer(
25
+ resize_embedding.app,
26
+ name="embedding",
27
+ help="Resize the vector dimension of the embedding column in the search table.",
28
+ )
22
29
 
23
30
 
24
31
  if __name__ == "__main__":
@@ -0,0 +1,136 @@
1
+ import typer
2
+ import structlog
3
+ from sqlalchemy import text
4
+ from sqlalchemy.exc import SQLAlchemyError
5
+ from settings import app_settings
6
+ from orchestrator.db import db
7
+ from orchestrator.db.models import AiSearchIndex
8
+
9
+ logger = structlog.get_logger(__name__)
10
+
11
+ app = typer.Typer(
12
+ name="embedding",
13
+ help="Resize vector dimensions of the embeddings.",
14
+ )
15
+
16
+
17
+ def get_current_embedding_dimension() -> int | None:
18
+ """Get the current dimension of the embedding column from ai_search_index table.
19
+
20
+ Returns:
21
+ Current dimension size or None if no records exist or column doesn't exist
22
+ """
23
+ try:
24
+ query = text(
25
+ """
26
+ SELECT vector_dims(embedding) as dimension
27
+ FROM ai_search_index
28
+ WHERE embedding IS NOT NULL
29
+ LIMIT 1
30
+ """
31
+ )
32
+ result = db.session.execute(query).fetchone()
33
+ if result and result[0]:
34
+ return result[0]
35
+ return None
36
+
37
+ except SQLAlchemyError as e:
38
+ logger.error("Failed to get current embedding dimension", error=str(e))
39
+ return None
40
+
41
+
42
+ def drop_all_embeddings() -> int:
43
+ """Drop all records from the ai_search_index table.
44
+
45
+ Returns:
46
+ Number of records deleted
47
+ """
48
+ try:
49
+ result = db.session.query(AiSearchIndex).delete()
50
+ db.session.commit()
51
+ logger.info(f"Deleted {result} records from ai_search_index")
52
+ return result
53
+
54
+ except SQLAlchemyError as e:
55
+ db.session.rollback()
56
+ logger.error("Failed to drop embeddings records", error=str(e))
57
+ raise
58
+
59
+
60
+ def alter_embedding_column_dimension(new_dimension: int) -> None:
61
+ """Alter the embedding column to use the new dimension size.
62
+
63
+ Args:
64
+ new_dimension: New vector dimension size
65
+ """
66
+ try:
67
+ drop_query = text("ALTER TABLE ai_search_index DROP COLUMN IF EXISTS embedding")
68
+ db.session.execute(drop_query)
69
+
70
+ add_query = text(f"ALTER TABLE ai_search_index ADD COLUMN embedding vector({new_dimension})")
71
+ db.session.execute(add_query)
72
+
73
+ db.session.commit()
74
+ logger.info(f"Altered embedding column to dimension {new_dimension}")
75
+
76
+ except SQLAlchemyError as e:
77
+ db.session.rollback()
78
+ logger.error("Failed to alter embedding column dimension", error=str(e))
79
+ raise
80
+
81
+
82
+ @app.command("resize")
83
+ def resize_embeddings_command() -> None:
84
+ """Resize vector dimensions of the ai_search_index embedding column.
85
+
86
+ Compares the current embedding dimension in the database with the configured
87
+ dimension in app_settings. If they differ, drops all records and alters the
88
+ column to match the new dimension.
89
+ """
90
+ new_dimension = app_settings.EMBEDDING_DIMENSION
91
+
92
+ logger.info("Starting embedding dimension resize", new_dimension=new_dimension)
93
+
94
+ current_dimension = get_current_embedding_dimension()
95
+
96
+ if current_dimension is None:
97
+ logger.warning("Could not determine current dimension for embedding column")
98
+
99
+ if current_dimension == new_dimension:
100
+ logger.info(
101
+ "Embedding dimensions match, no resize needed",
102
+ current_dimension=current_dimension,
103
+ new_dimension=new_dimension,
104
+ )
105
+ return
106
+
107
+ logger.info("Dimension mismatch detected", current_dimension=current_dimension, new_dimension=new_dimension)
108
+
109
+ if not typer.confirm(
110
+ f"This will DELETE ALL RECORDS from ai_search_index and alter the embedding column. Continue?"
111
+ ):
112
+ logger.info("Operation cancelled by user")
113
+ return
114
+
115
+ try:
116
+ # Drop all records first.
117
+ logger.info("Dropping all embedding records...")
118
+ deleted_count = drop_all_embeddings()
119
+
120
+ # Then alter column dimension.
121
+ logger.info(f"Altering embedding column to dimension {new_dimension}...")
122
+ alter_embedding_column_dimension(new_dimension)
123
+
124
+ logger.info(
125
+ "Embedding dimension resize completed successfully",
126
+ records_deleted=deleted_count,
127
+ new_dimension=new_dimension,
128
+ )
129
+
130
+ except Exception as e:
131
+ logger.error("Embedding dimension resize failed", error=str(e))
132
+ raise typer.Exit(1)
133
+
134
+
135
+ if __name__ == "__main__":
136
+ app()
@@ -13,11 +13,12 @@
13
13
 
14
14
 
15
15
  import logging
16
+ from time import sleep
16
17
 
18
+ import schedule
17
19
  import typer
18
- from apscheduler.schedulers.blocking import BlockingScheduler
19
20
 
20
- from orchestrator.schedules.scheduler import get_paused_scheduler, jobstores, scheduler_dispose_db_connections
21
+ from orchestrator.schedules import ALL_SCHEDULERS
21
22
 
22
23
  log = logging.getLogger(__name__)
23
24
 
@@ -26,48 +27,36 @@ app: typer.Typer = typer.Typer()
26
27
 
27
28
  @app.command()
28
29
  def run() -> None:
29
- """Start scheduler and loop eternally to keep thread alive."""
30
- # necessary to add the schedules to the DB since they are added to the BackgroundScheduler
31
- with get_paused_scheduler() as scheduler:
32
- scheduler.resume()
33
- scheduler.pause()
34
-
35
- blocking_scheduler = BlockingScheduler(jobstores=jobstores, jobstore_update_interval=5)
36
-
37
- try:
38
- blocking_scheduler.start()
39
- finally:
40
- blocking_scheduler.shutdown()
41
- scheduler_dispose_db_connections()
30
+ """Loop eternally and run schedulers at configured times."""
31
+ for s in ALL_SCHEDULERS:
32
+ job = getattr(schedule.every(s.period), s.time_unit)
33
+ if s.at:
34
+ job = job.at(s.at)
35
+ job.do(s).tag(s.name)
36
+ log.info("Starting Schedule")
37
+ for j in schedule.jobs:
38
+ log.info("%s: %s", ", ".join(j.tags), j)
39
+ while True:
40
+ schedule.run_pending()
41
+ idle = schedule.idle_seconds()
42
+ if idle < 0:
43
+ log.info("Next job in queue is scheduled in the past, run it now.")
44
+ else:
45
+ log.info("Sleeping for %d seconds", idle)
46
+ sleep(idle)
42
47
 
43
48
 
44
49
  @app.command()
45
50
  def show_schedule() -> None:
46
- """Show the currently configured schedule.
47
-
48
- in cli underscore is replaced by a dash `show-schedule`
49
- """
50
- with get_paused_scheduler() as scheduler:
51
- jobs = scheduler.get_jobs()
52
-
53
- for job in jobs:
54
- typer.echo(f"[{job.id}] Next run: {job.next_run_time} | Trigger: {job.trigger}")
51
+ """Show the currently configured schedule."""
52
+ for s in ALL_SCHEDULERS:
53
+ at_str = f"@ {s.at} " if s.at else ""
54
+ typer.echo(f"{s.name}: {s.__name__} {at_str}every {s.period} {s.time_unit}")
55
55
 
56
56
 
57
57
  @app.command()
58
- def force(job_id: str) -> None:
59
- """Force the execution of (a) scheduler(s) based on a job_id."""
60
- with get_paused_scheduler() as scheduler:
61
- job = scheduler.get_job(job_id)
62
-
63
- if not job:
64
- typer.echo(f"Job '{job_id}' not found.")
65
- raise typer.Exit(code=1)
66
-
67
- typer.echo(f"Running job [{job.id}] now...")
68
- try:
69
- job.func(*job.args or (), **job.kwargs or {})
70
- typer.echo("Job executed successfully.")
71
- except Exception as e:
72
- typer.echo(f"Job execution failed: {e}")
73
- raise typer.Exit(code=1)
58
+ def force(keyword: str) -> None:
59
+ """Force the execution of (a) scheduler(s) based on a keyword."""
60
+ for s in ALL_SCHEDULERS:
61
+ if keyword in s.name or keyword in s.__name__:
62
+ s()