orchestrator-core 3.2.2__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +1 -1
- orchestrator/api/api_v1/endpoints/settings.py +3 -13
- orchestrator/api/api_v1/endpoints/subscription_customer_descriptions.py +0 -2
- orchestrator/api/api_v1/endpoints/subscriptions.py +1 -0
- orchestrator/app.py +10 -1
- orchestrator/cli/generator/templates/new_product_migration.j2 +5 -1
- orchestrator/cli/migrate_tasks.py +5 -5
- orchestrator/cli/migrate_workflows.py +1 -2
- orchestrator/db/models.py +3 -1
- orchestrator/domain/base.py +4 -24
- orchestrator/domain/customer_description.py +0 -4
- orchestrator/graphql/mutations/customer_description.py +1 -1
- orchestrator/metrics/__init__.py +3 -0
- orchestrator/metrics/engine.py +49 -0
- orchestrator/metrics/init.py +14 -0
- orchestrator/metrics/processes.py +147 -0
- orchestrator/metrics/subscriptions.py +93 -0
- orchestrator/migrations/helpers.py +14 -6
- orchestrator/migrations/templates/alembic.ini.j2 +1 -2
- orchestrator/migrations/templates/env.py.j2 +4 -7
- orchestrator/migrations/versions/schema/2025-02-20_68d14db1b8da_make_workflow_description_mandatory.py +33 -0
- orchestrator/migrations/versions/schema/2025-05-08_161918133bec_add_is_task_to_workflow.py +28 -0
- orchestrator/schedules/validate_subscriptions.py +4 -4
- orchestrator/schemas/workflow.py +3 -1
- orchestrator/services/celery.py +13 -5
- orchestrator/services/processes.py +3 -3
- orchestrator/services/settings.py +10 -1
- orchestrator/services/subscriptions.py +10 -23
- orchestrator/services/workflows.py +8 -4
- orchestrator/settings.py +1 -4
- orchestrator/targets.py +1 -0
- orchestrator/utils/get_subscription_dict.py +0 -4
- orchestrator/utils/redis.py +1 -67
- orchestrator/workflows/modify_note.py +3 -11
- orchestrator/workflows/steps.py +2 -86
- orchestrator/workflows/tasks/validate_product_type.py +2 -2
- orchestrator/workflows/tasks/validate_products.py +3 -6
- orchestrator/workflows/utils.py +3 -23
- {orchestrator_core-3.2.2.dist-info → orchestrator_core-4.0.0.dist-info}/METADATA +9 -8
- {orchestrator_core-3.2.2.dist-info → orchestrator_core-4.0.0.dist-info}/RECORD +42 -35
- {orchestrator_core-3.2.2.dist-info → orchestrator_core-4.0.0.dist-info}/WHEEL +0 -0
- {orchestrator_core-3.2.2.dist-info → orchestrator_core-4.0.0.dist-info}/licenses/LICENSE +0 -0
orchestrator/__init__.py
CHANGED
|
@@ -22,9 +22,10 @@ from sqlalchemy.exc import SQLAlchemyError
|
|
|
22
22
|
from oauth2_lib.fastapi import OIDCUserModel
|
|
23
23
|
from orchestrator.api.error_handling import raise_status
|
|
24
24
|
from orchestrator.db import EngineSettingsTable
|
|
25
|
-
from orchestrator.schemas import EngineSettingsBaseSchema, EngineSettingsSchema,
|
|
25
|
+
from orchestrator.schemas import EngineSettingsBaseSchema, EngineSettingsSchema, WorkerStatus
|
|
26
26
|
from orchestrator.security import authenticate
|
|
27
27
|
from orchestrator.services import processes, settings
|
|
28
|
+
from orchestrator.services.settings import generate_engine_global_status
|
|
28
29
|
from orchestrator.settings import ExecutorType, app_settings
|
|
29
30
|
from orchestrator.utils.json import json_dumps
|
|
30
31
|
from orchestrator.utils.redis import delete_keys_matching_pattern
|
|
@@ -165,17 +166,6 @@ def generate_engine_status_response(
|
|
|
165
166
|
Engine StatusEnum
|
|
166
167
|
|
|
167
168
|
"""
|
|
168
|
-
|
|
169
|
-
if engine_settings.global_lock and engine_settings.running_processes > 0:
|
|
170
|
-
result = EngineSettingsSchema.model_validate(engine_settings)
|
|
171
|
-
result.global_status = GlobalStatusEnum.PAUSING
|
|
172
|
-
return result
|
|
173
|
-
|
|
174
|
-
if engine_settings.global_lock and engine_settings.running_processes == 0:
|
|
175
|
-
result = EngineSettingsSchema.model_validate(engine_settings)
|
|
176
|
-
result.global_status = GlobalStatusEnum.PAUSED
|
|
177
|
-
return result
|
|
178
|
-
|
|
179
169
|
result = EngineSettingsSchema.model_validate(engine_settings)
|
|
180
|
-
result.global_status =
|
|
170
|
+
result.global_status = generate_engine_global_status(engine_settings)
|
|
181
171
|
return result
|
|
@@ -28,7 +28,6 @@ from orchestrator.domain.customer_description import (
|
|
|
28
28
|
from orchestrator.schemas import SubscriptionDescriptionBaseSchema, SubscriptionDescriptionSchema
|
|
29
29
|
from orchestrator.schemas.subscription_descriptions import UpdateSubscriptionDescriptionSchema
|
|
30
30
|
from orchestrator.utils.errors import StaleDataError
|
|
31
|
-
from orchestrator.utils.redis import delete_from_redis
|
|
32
31
|
|
|
33
32
|
router = APIRouter()
|
|
34
33
|
|
|
@@ -55,7 +54,6 @@ def delete_subscription_customer_descriptions(_id: UUID) -> None:
|
|
|
55
54
|
description = db.session.get(SubscriptionCustomerDescriptionTable, _id)
|
|
56
55
|
if description:
|
|
57
56
|
delete(SubscriptionCustomerDescriptionTable, _id)
|
|
58
|
-
delete_from_redis(description.subscription_id)
|
|
59
57
|
|
|
60
58
|
|
|
61
59
|
@router.get("/{_id}", response_model=SubscriptionDescriptionSchema)
|
|
@@ -163,6 +163,7 @@ def subscriptions_search(
|
|
|
163
163
|
@router.get(
|
|
164
164
|
"/workflows/{subscription_id}",
|
|
165
165
|
response_model=SubscriptionWorkflowListsSchema,
|
|
166
|
+
response_model_by_alias=True,
|
|
166
167
|
response_model_exclude_none=True,
|
|
167
168
|
deprecated=True,
|
|
168
169
|
description="This endpoint is deprecated and will be removed in a future release. Please use the GraphQL query",
|
orchestrator/app.py
CHANGED
|
@@ -25,6 +25,7 @@ import structlog
|
|
|
25
25
|
import typer
|
|
26
26
|
from fastapi.applications import FastAPI
|
|
27
27
|
from fastapi_etag.dependency import add_exception_handler
|
|
28
|
+
from prometheus_client import make_asgi_app
|
|
28
29
|
from sentry_sdk.integrations import Integration
|
|
29
30
|
from sentry_sdk.integrations.asyncio import AsyncioIntegration
|
|
30
31
|
from sentry_sdk.integrations.fastapi import FastApiIntegration
|
|
@@ -54,6 +55,7 @@ from orchestrator.graphql.schema import ContextGetterFactory
|
|
|
54
55
|
from orchestrator.graphql.schemas.subscription import SubscriptionInterface
|
|
55
56
|
from orchestrator.graphql.types import ScalarOverrideType, StrawberryModelType
|
|
56
57
|
from orchestrator.log_config import LOGGER_OVERRIDES
|
|
58
|
+
from orchestrator.metrics import ORCHESTRATOR_METRICS_REGISTRY, initialize_default_metrics
|
|
57
59
|
from orchestrator.services.process_broadcast_thread import ProcessDataBroadcastThread
|
|
58
60
|
from orchestrator.settings import AppSettings, ExecutorType, app_settings
|
|
59
61
|
from orchestrator.version import GIT_COMMIT_HASH
|
|
@@ -143,9 +145,14 @@ class OrchestratorCore(FastAPI):
|
|
|
143
145
|
self.add_exception_handler(ProblemDetailException, problem_detail_handler) # type: ignore[arg-type]
|
|
144
146
|
add_exception_handler(self)
|
|
145
147
|
|
|
148
|
+
if base_settings.ENABLE_PROMETHEUS_METRICS_ENDPOINT:
|
|
149
|
+
initialize_default_metrics()
|
|
150
|
+
metrics_app = make_asgi_app(registry=ORCHESTRATOR_METRICS_REGISTRY)
|
|
151
|
+
self.mount("/api/metrics", metrics_app)
|
|
152
|
+
|
|
146
153
|
@self.router.get("/", response_model=str, response_class=JSONResponse, include_in_schema=False)
|
|
147
154
|
def _index() -> str:
|
|
148
|
-
return "Orchestrator
|
|
155
|
+
return "Orchestrator Core"
|
|
149
156
|
|
|
150
157
|
def add_sentry(
|
|
151
158
|
self,
|
|
@@ -154,6 +161,7 @@ class OrchestratorCore(FastAPI):
|
|
|
154
161
|
server_name: str,
|
|
155
162
|
environment: str,
|
|
156
163
|
release: str | None = GIT_COMMIT_HASH,
|
|
164
|
+
**sentry_kwargs: Any,
|
|
157
165
|
) -> None:
|
|
158
166
|
logger.info("Adding Sentry middleware to app", app=self.title)
|
|
159
167
|
if self.base_settings.EXECUTOR == ExecutorType.WORKER:
|
|
@@ -173,6 +181,7 @@ class OrchestratorCore(FastAPI):
|
|
|
173
181
|
integrations=sentry_integrations,
|
|
174
182
|
propagate_traces=True,
|
|
175
183
|
profiles_sample_rate=trace_sample_rate,
|
|
184
|
+
**sentry_kwargs,
|
|
176
185
|
)
|
|
177
186
|
|
|
178
187
|
@staticmethod
|
|
@@ -63,24 +63,28 @@ new_workflows = [
|
|
|
63
63
|
{
|
|
64
64
|
"name": "create_{{ product.variable }}",
|
|
65
65
|
"target": Target.CREATE,
|
|
66
|
+
"is_task": False,
|
|
66
67
|
"description": "Create {{ product.name }}",
|
|
67
68
|
"product_type": "{{ product.type }}",
|
|
68
69
|
},
|
|
69
70
|
{
|
|
70
71
|
"name": "modify_{{ product.variable }}",
|
|
71
72
|
"target": Target.MODIFY,
|
|
73
|
+
"is_task": False,
|
|
72
74
|
"description": "Modify {{ product.name }}",
|
|
73
75
|
"product_type": "{{ product.type }}",
|
|
74
76
|
},
|
|
75
77
|
{
|
|
76
78
|
"name": "terminate_{{ product.variable }}",
|
|
77
79
|
"target": Target.TERMINATE,
|
|
80
|
+
"is_task": False,
|
|
78
81
|
"description": "Terminate {{ product.name }}",
|
|
79
82
|
"product_type": "{{ product.type }}",
|
|
80
83
|
},
|
|
81
84
|
{
|
|
82
85
|
"name": "validate_{{ product.variable }}",
|
|
83
|
-
"target": Target.
|
|
86
|
+
"target": Target.VALIDATE,
|
|
87
|
+
"is_task": True,
|
|
84
88
|
"description": "Validate {{ product.name }}",
|
|
85
89
|
"product_type": "{{ product.type }}",
|
|
86
90
|
},
|
|
@@ -150,22 +150,22 @@ def create_tasks_migration_wizard() -> tuple[list[dict], list[dict]]:
|
|
|
150
150
|
- list of task items to add in the migration
|
|
151
151
|
- list of task items to delete in the migration
|
|
152
152
|
"""
|
|
153
|
-
database_tasks = {
|
|
154
|
-
task.name: task for task in list(db.session.scalars(select(WorkflowTable))) if task.target == Target.SYSTEM
|
|
155
|
-
}
|
|
153
|
+
database_tasks = {task.name: task for task in list(db.session.scalars(select(WorkflowTable))) if task.is_task}
|
|
156
154
|
registered_wf_instances = {
|
|
157
155
|
task: cast(Workflow, get_workflow(task)) for task in orchestrator.workflows.ALL_WORKFLOWS.keys()
|
|
158
156
|
}
|
|
159
157
|
|
|
158
|
+
is_task = [Target.SYSTEM, Target.VALIDATE]
|
|
159
|
+
|
|
160
160
|
registered_tasks = dict(
|
|
161
161
|
filter(
|
|
162
|
-
lambda task: task[1].target
|
|
162
|
+
lambda task: task[1].target in is_task and task[0] in database_tasks.keys(),
|
|
163
163
|
registered_wf_instances.items(),
|
|
164
164
|
)
|
|
165
165
|
)
|
|
166
166
|
available_tasks = dict(
|
|
167
167
|
filter(
|
|
168
|
-
lambda task: task[1].target
|
|
168
|
+
lambda task: task[1].target in is_task and task[0] not in database_tasks.keys(),
|
|
169
169
|
registered_wf_instances.items(),
|
|
170
170
|
)
|
|
171
171
|
)
|
|
@@ -11,7 +11,6 @@ import orchestrator.workflows
|
|
|
11
11
|
from orchestrator.cli.helpers.input_helpers import _enumerate_menu_keys, _prompt_user_menu, get_user_input
|
|
12
12
|
from orchestrator.cli.helpers.print_helpers import COLOR, noqa_print, print_fmt, str_fmt
|
|
13
13
|
from orchestrator.db import ProductTable, WorkflowTable, db
|
|
14
|
-
from orchestrator.targets import Target
|
|
15
14
|
from orchestrator.workflows import LazyWorkflowInstance, get_workflow
|
|
16
15
|
|
|
17
16
|
# Workflows are registered via migrations with product type. For every product with the given product_type, there will be an entry in products_workflows.
|
|
@@ -183,7 +182,7 @@ def create_workflows_migration_wizard() -> tuple[list[dict], list[dict]]:
|
|
|
183
182
|
"""
|
|
184
183
|
database_workflows = list(db.session.scalars(select(WorkflowTable)))
|
|
185
184
|
registered_workflows = orchestrator.workflows.ALL_WORKFLOWS
|
|
186
|
-
system_workflow_names = {wf.name for wf in database_workflows if wf.
|
|
185
|
+
system_workflow_names = {wf.name for wf in database_workflows if wf.is_task}
|
|
187
186
|
registered_non_system_workflows = {k: v for k, v in registered_workflows.items() if k not in system_workflow_names}
|
|
188
187
|
|
|
189
188
|
unregistered_workflows = [wf for wf in database_workflows if wf.name not in registered_workflows.keys()]
|
orchestrator/db/models.py
CHANGED
|
@@ -400,7 +400,7 @@ class WorkflowTable(BaseModel):
|
|
|
400
400
|
workflow_id = mapped_column(UUIDType, server_default=text("uuid_generate_v4()"), primary_key=True)
|
|
401
401
|
name = mapped_column(String(), nullable=False, unique=True)
|
|
402
402
|
target = mapped_column(String(), nullable=False)
|
|
403
|
-
description = mapped_column(Text(), nullable=
|
|
403
|
+
description = mapped_column(Text(), nullable=False)
|
|
404
404
|
created_at = mapped_column(UtcTimestamp, nullable=False, server_default=text("current_timestamp()"))
|
|
405
405
|
deleted_at = mapped_column(UtcTimestamp, deferred=True)
|
|
406
406
|
|
|
@@ -412,6 +412,8 @@ class WorkflowTable(BaseModel):
|
|
|
412
412
|
)
|
|
413
413
|
processes = relationship("ProcessTable", cascade="all, delete-orphan", back_populates="workflow")
|
|
414
414
|
|
|
415
|
+
is_task = mapped_column(Boolean, nullable=False, server_default=text("false"))
|
|
416
|
+
|
|
415
417
|
@staticmethod
|
|
416
418
|
def select() -> Select:
|
|
417
419
|
return (
|
orchestrator/domain/base.py
CHANGED
|
@@ -1294,27 +1294,13 @@ class SubscriptionModel(DomainModel):
|
|
|
1294
1294
|
# Some common functions shared by from_other_product and from_subscription
|
|
1295
1295
|
@classmethod
|
|
1296
1296
|
def _get_subscription(cls: type[S], subscription_id: UUID | UUIDstr) -> SubscriptionTable | None:
|
|
1297
|
-
from orchestrator.settings import app_settings
|
|
1298
1297
|
|
|
1299
1298
|
if not isinstance(subscription_id, UUID | UUIDstr):
|
|
1300
1299
|
raise TypeError(f"subscription_id is of type {type(subscription_id)} instead of UUID | UUIDstr")
|
|
1301
1300
|
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
joinedload(SubscriptionTable.product).selectinload(ProductTable.fixed_inputs),
|
|
1306
|
-
]
|
|
1307
|
-
|
|
1308
|
-
else:
|
|
1309
|
-
loaders = [
|
|
1310
|
-
selectinload(SubscriptionTable.instances)
|
|
1311
|
-
.joinedload(SubscriptionInstanceTable.product_block)
|
|
1312
|
-
.selectinload(ProductBlockTable.resource_types),
|
|
1313
|
-
selectinload(SubscriptionTable.instances).selectinload(
|
|
1314
|
-
SubscriptionInstanceTable.in_use_by_block_relations
|
|
1315
|
-
),
|
|
1316
|
-
selectinload(SubscriptionTable.instances).selectinload(SubscriptionInstanceTable.values),
|
|
1317
|
-
]
|
|
1301
|
+
loaders = [
|
|
1302
|
+
joinedload(SubscriptionTable.product).selectinload(ProductTable.fixed_inputs),
|
|
1303
|
+
]
|
|
1318
1304
|
|
|
1319
1305
|
return db.session.get(SubscriptionTable, subscription_id, options=loaders)
|
|
1320
1306
|
|
|
@@ -1394,7 +1380,6 @@ class SubscriptionModel(DomainModel):
|
|
|
1394
1380
|
def from_subscription(cls: type[S], subscription_id: UUID | UUIDstr) -> S:
|
|
1395
1381
|
"""Use a subscription_id to return required fields of an existing subscription."""
|
|
1396
1382
|
from orchestrator.domain.context_cache import get_from_cache, store_in_cache
|
|
1397
|
-
from orchestrator.settings import app_settings
|
|
1398
1383
|
|
|
1399
1384
|
if cached_model := get_from_cache(subscription_id):
|
|
1400
1385
|
return cast(S, cached_model)
|
|
@@ -1421,12 +1406,7 @@ class SubscriptionModel(DomainModel):
|
|
|
1421
1406
|
|
|
1422
1407
|
fixed_inputs = {fi.name: fi.value for fi in subscription.product.fixed_inputs}
|
|
1423
1408
|
|
|
1424
|
-
instances
|
|
1425
|
-
if app_settings.ENABLE_SUBSCRIPTION_MODEL_OPTIMIZATIONS:
|
|
1426
|
-
# TODO #900 remove toggle and make this path the default
|
|
1427
|
-
instances = cls._load_root_instances(subscription_id)
|
|
1428
|
-
else:
|
|
1429
|
-
instances = cls._load_instances(subscription.instances, status, match_domain_attr=False)
|
|
1409
|
+
instances = cls._load_root_instances(subscription_id)
|
|
1430
1410
|
|
|
1431
1411
|
try:
|
|
1432
1412
|
model = cls(
|
|
@@ -21,7 +21,6 @@ from sqlalchemy import select
|
|
|
21
21
|
from orchestrator.api.models import delete
|
|
22
22
|
from orchestrator.db import SubscriptionCustomerDescriptionTable, db
|
|
23
23
|
from orchestrator.utils.errors import StaleDataError
|
|
24
|
-
from orchestrator.utils.redis import delete_subscription_from_redis
|
|
25
24
|
from orchestrator.utils.validate_data_version import validate_data_version
|
|
26
25
|
from orchestrator.websocket import invalidate_subscription_cache
|
|
27
26
|
|
|
@@ -38,7 +37,6 @@ def get_customer_description_by_customer_subscription(
|
|
|
38
37
|
return db.session.scalars(stmt).one_or_none()
|
|
39
38
|
|
|
40
39
|
|
|
41
|
-
@delete_subscription_from_redis()
|
|
42
40
|
async def create_subscription_customer_description(
|
|
43
41
|
customer_id: str, subscription_id: UUID, description: str
|
|
44
42
|
) -> SubscriptionCustomerDescriptionTable:
|
|
@@ -53,7 +51,6 @@ async def create_subscription_customer_description(
|
|
|
53
51
|
return customer_description
|
|
54
52
|
|
|
55
53
|
|
|
56
|
-
@delete_subscription_from_redis()
|
|
57
54
|
async def update_subscription_customer_description(
|
|
58
55
|
customer_description: SubscriptionCustomerDescriptionTable,
|
|
59
56
|
description: str,
|
|
@@ -70,7 +67,6 @@ async def update_subscription_customer_description(
|
|
|
70
67
|
return customer_description
|
|
71
68
|
|
|
72
69
|
|
|
73
|
-
@delete_subscription_from_redis()
|
|
74
70
|
async def delete_subscription_customer_description_by_customer_subscription(
|
|
75
71
|
customer_id: str, subscription_id: UUID
|
|
76
72
|
) -> SubscriptionCustomerDescriptionTable | None:
|
|
@@ -60,7 +60,7 @@ async def resolve_remove_customer_description(
|
|
|
60
60
|
)
|
|
61
61
|
if not description:
|
|
62
62
|
return NotFoundError(message="Customer description not found")
|
|
63
|
-
return CustomerDescription.from_pydantic(description)
|
|
63
|
+
return CustomerDescription.from_pydantic(description) # type: ignore
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
@strawberry.type(description="Customer subscription description mutations")
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
from prometheus_client import Metric
|
|
4
|
+
from prometheus_client.metrics_core import GaugeMetricFamily, StateSetMetricFamily
|
|
5
|
+
from prometheus_client.registry import Collector
|
|
6
|
+
|
|
7
|
+
from orchestrator.schemas.engine_settings import GlobalStatusEnum
|
|
8
|
+
from orchestrator.services import settings
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _get_engine_status() -> tuple[GlobalStatusEnum, int]:
|
|
12
|
+
"""Query for getting the current status of the workflow engine.
|
|
13
|
+
|
|
14
|
+
This includes the engine status, and the amount of currently running processes.
|
|
15
|
+
"""
|
|
16
|
+
engine_settings = settings.get_engine_settings()
|
|
17
|
+
engine_status = settings.generate_engine_global_status(engine_settings)
|
|
18
|
+
|
|
19
|
+
return engine_status, int(engine_settings.running_processes)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class WorkflowEngineCollector(Collector):
|
|
23
|
+
"""Initialize a Prometheus enum and a gauge.
|
|
24
|
+
|
|
25
|
+
The enum of the current workflow engine status takes three values:
|
|
26
|
+
- RUNNING
|
|
27
|
+
- PAUSING
|
|
28
|
+
- PAUSED
|
|
29
|
+
|
|
30
|
+
This metric also exports the amount of currently running processes.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def collect(self) -> Iterable[Metric]:
|
|
34
|
+
current_engine_status, running_process_count = _get_engine_status()
|
|
35
|
+
|
|
36
|
+
engine_status = StateSetMetricFamily(
|
|
37
|
+
"wfo_engine_status",
|
|
38
|
+
documentation="Current workflow engine status.",
|
|
39
|
+
value={status: status == current_engine_status for status in GlobalStatusEnum.values()},
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
engine_process_count = GaugeMetricFamily(
|
|
43
|
+
"wfo_active_process_count",
|
|
44
|
+
unit="count",
|
|
45
|
+
value=running_process_count,
|
|
46
|
+
documentation="Number of currently running processes in the workflow engine.",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return [engine_status, engine_process_count]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from prometheus_client import CollectorRegistry
|
|
2
|
+
|
|
3
|
+
from orchestrator.metrics.engine import WorkflowEngineCollector
|
|
4
|
+
from orchestrator.metrics.processes import ProcessCollector
|
|
5
|
+
from orchestrator.metrics.subscriptions import SubscriptionCollector
|
|
6
|
+
|
|
7
|
+
ORCHESTRATOR_METRICS_REGISTRY = CollectorRegistry(auto_describe=True)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def initialize_default_metrics() -> None:
|
|
11
|
+
"""Register default Prometheus collectors."""
|
|
12
|
+
ORCHESTRATOR_METRICS_REGISTRY.register(SubscriptionCollector())
|
|
13
|
+
ORCHESTRATOR_METRICS_REGISTRY.register(ProcessCollector())
|
|
14
|
+
ORCHESTRATOR_METRICS_REGISTRY.register(WorkflowEngineCollector())
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
from prometheus_client.metrics_core import GaugeMetricFamily, Metric
|
|
4
|
+
from prometheus_client.registry import Collector
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
from sqlalchemy import desc, func
|
|
7
|
+
|
|
8
|
+
from orchestrator.db import ProcessTable, ProductTable, SubscriptionTable, WorkflowTable, db
|
|
9
|
+
from orchestrator.db.models import ProcessSubscriptionTable
|
|
10
|
+
from orchestrator.targets import Target
|
|
11
|
+
from orchestrator.workflow import ProcessStatus
|
|
12
|
+
from pydantic_forms.types import UUIDstr
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ProcessTableQueryResult(BaseModel):
|
|
16
|
+
workflow_name: str
|
|
17
|
+
customer_id: UUIDstr
|
|
18
|
+
workflow_target: Target
|
|
19
|
+
last_status: ProcessStatus
|
|
20
|
+
created_by: str
|
|
21
|
+
is_task: bool
|
|
22
|
+
product_name: str
|
|
23
|
+
total_runtime: float
|
|
24
|
+
process_count: int
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_processes() -> list[ProcessTableQueryResult]:
|
|
28
|
+
"""Query for getting all processes.
|
|
29
|
+
|
|
30
|
+
Equivalent to the following SQL statement:
|
|
31
|
+
```sql
|
|
32
|
+
SELECT
|
|
33
|
+
workflows."name" AS workflow_name
|
|
34
|
+
, subscriptions.customer_id
|
|
35
|
+
, workflows.target AS workflow_target
|
|
36
|
+
, processes.last_status
|
|
37
|
+
, processes.created_by
|
|
38
|
+
, processes.is_task
|
|
39
|
+
, products."name" AS product_name
|
|
40
|
+
, Coalesce(Sum(Extract(EPOCH FROM processes.last_modified_at - processes.started_at)), 0) AS total_runtime
|
|
41
|
+
, Count(workflows."name") AS process_count
|
|
42
|
+
FROM
|
|
43
|
+
processes
|
|
44
|
+
JOIN workflows
|
|
45
|
+
ON processes.workflow_id = workflows.workflow_id
|
|
46
|
+
JOIN processes_subscriptions
|
|
47
|
+
ON processes.pid = processes_subscriptions.pid
|
|
48
|
+
JOIN subscriptions
|
|
49
|
+
ON processes_subscriptions.subscription_id = subscriptions.subscription_id
|
|
50
|
+
JOIN products
|
|
51
|
+
ON subscriptions.product_id = products.product_id
|
|
52
|
+
GROUP BY
|
|
53
|
+
workflows."name"
|
|
54
|
+
, subscriptions.customer_id
|
|
55
|
+
, workflows.target
|
|
56
|
+
, processes.last_status
|
|
57
|
+
, processes.created_by
|
|
58
|
+
, processes.is_task
|
|
59
|
+
, products."name"
|
|
60
|
+
;
|
|
61
|
+
```
|
|
62
|
+
"""
|
|
63
|
+
process_count = func.count(WorkflowTable.name).label("process_count")
|
|
64
|
+
total_process_time = func.coalesce(
|
|
65
|
+
func.sum(func.extract("epoch", (ProcessTable.last_modified_at - ProcessTable.started_at))), 0
|
|
66
|
+
).label("total_runtime")
|
|
67
|
+
return (
|
|
68
|
+
db.session.query(
|
|
69
|
+
ProcessTable.last_status,
|
|
70
|
+
ProcessTable.created_by,
|
|
71
|
+
ProcessTable.is_task,
|
|
72
|
+
ProductTable.name.label("product_name"),
|
|
73
|
+
WorkflowTable.name.label("workflow_name"),
|
|
74
|
+
SubscriptionTable.customer_id,
|
|
75
|
+
WorkflowTable.target.label("workflow_target"),
|
|
76
|
+
process_count,
|
|
77
|
+
total_process_time,
|
|
78
|
+
)
|
|
79
|
+
.join(WorkflowTable, WorkflowTable.workflow_id == ProcessTable.workflow_id)
|
|
80
|
+
.join(ProcessSubscriptionTable, ProcessSubscriptionTable.process_id == ProcessTable.process_id)
|
|
81
|
+
.join(SubscriptionTable, SubscriptionTable.subscription_id == ProcessSubscriptionTable.subscription_id)
|
|
82
|
+
.join(ProductTable, ProductTable.product_id == SubscriptionTable.product_id)
|
|
83
|
+
.group_by(
|
|
84
|
+
ProcessTable.last_status,
|
|
85
|
+
ProcessTable.created_by,
|
|
86
|
+
ProcessTable.is_task,
|
|
87
|
+
ProductTable.name,
|
|
88
|
+
WorkflowTable.name,
|
|
89
|
+
SubscriptionTable.customer_id,
|
|
90
|
+
WorkflowTable.target,
|
|
91
|
+
)
|
|
92
|
+
.order_by(desc(process_count))
|
|
93
|
+
).all()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class ProcessCollector(Collector):
|
|
97
|
+
"""Collector that contains two Prometheus gauges with process counts and total runtime.
|
|
98
|
+
|
|
99
|
+
These gauges contain the amount of processes, and the total runtime in seconds, per every combination of the labels
|
|
100
|
+
that are defined:
|
|
101
|
+
- Process last status
|
|
102
|
+
- Process created by
|
|
103
|
+
- Process is task
|
|
104
|
+
- Product name
|
|
105
|
+
- Workflow name
|
|
106
|
+
- Customer ID
|
|
107
|
+
- Workflow target
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
def collect(self) -> Iterable[Metric]:
|
|
111
|
+
label_names = [
|
|
112
|
+
"last_status",
|
|
113
|
+
"created_by",
|
|
114
|
+
"is_task",
|
|
115
|
+
"product_name",
|
|
116
|
+
"workflow_name",
|
|
117
|
+
"customer_id",
|
|
118
|
+
"workflow_target",
|
|
119
|
+
]
|
|
120
|
+
process_counts = GaugeMetricFamily(
|
|
121
|
+
"wfo_process_count",
|
|
122
|
+
labels=label_names,
|
|
123
|
+
unit="count",
|
|
124
|
+
documentation="Number of processes per status, creator, task, product, workflow, customer, and target.",
|
|
125
|
+
)
|
|
126
|
+
process_seconds_total = GaugeMetricFamily(
|
|
127
|
+
"wfo_process_seconds_total",
|
|
128
|
+
labels=label_names,
|
|
129
|
+
unit="count",
|
|
130
|
+
documentation="Total time spent on processes in seconds.",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
for row in _get_processes():
|
|
134
|
+
label_values = [
|
|
135
|
+
row.last_status,
|
|
136
|
+
str(row.created_by),
|
|
137
|
+
str(row.is_task),
|
|
138
|
+
row.product_name,
|
|
139
|
+
row.workflow_name,
|
|
140
|
+
row.customer_id,
|
|
141
|
+
row.workflow_target,
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
process_counts.add_metric(label_values, row.process_count)
|
|
145
|
+
process_seconds_total.add_metric(label_values, row.total_runtime)
|
|
146
|
+
|
|
147
|
+
return [process_counts, process_seconds_total]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
from prometheus_client import Metric
|
|
4
|
+
from prometheus_client.metrics_core import GaugeMetricFamily
|
|
5
|
+
from prometheus_client.registry import Collector
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from sqlalchemy import desc, func
|
|
8
|
+
|
|
9
|
+
from orchestrator.db import ProductTable, SubscriptionTable, db
|
|
10
|
+
from orchestrator.types import SubscriptionLifecycle
|
|
11
|
+
from pydantic_forms.types import UUIDstr
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SubscriptionTableQueryResult(BaseModel):
|
|
15
|
+
lifecycle_state: SubscriptionLifecycle
|
|
16
|
+
customer_id: UUIDstr
|
|
17
|
+
insync: bool
|
|
18
|
+
product_name: str
|
|
19
|
+
subscription_count: int
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_subscriptions() -> list[SubscriptionTableQueryResult]:
|
|
23
|
+
"""Query for getting all subscriptions.
|
|
24
|
+
|
|
25
|
+
Equivalent to the following SQL statement:
|
|
26
|
+
```sql
|
|
27
|
+
SELECT
|
|
28
|
+
subscriptions.status AS lifecycle_state
|
|
29
|
+
, subscriptions.customer_id
|
|
30
|
+
, subscriptions.insync
|
|
31
|
+
, products."name" AS product_name
|
|
32
|
+
, Count(subscriptions.subscription_id) AS subscription_count
|
|
33
|
+
FROM
|
|
34
|
+
subscriptions
|
|
35
|
+
JOIN products
|
|
36
|
+
ON subscriptions.product_id = products.product_id
|
|
37
|
+
GROUP BY subscriptions.status
|
|
38
|
+
, subscriptions.customer_id
|
|
39
|
+
, insync
|
|
40
|
+
, products."name"
|
|
41
|
+
;
|
|
42
|
+
```
|
|
43
|
+
"""
|
|
44
|
+
subscription_count = func.count(SubscriptionTable.subscription_id).label("subscription_count")
|
|
45
|
+
return (
|
|
46
|
+
db.session.query(
|
|
47
|
+
SubscriptionTable.status.label("lifecycle_state"),
|
|
48
|
+
SubscriptionTable.customer_id,
|
|
49
|
+
SubscriptionTable.insync,
|
|
50
|
+
ProductTable.name.label("product_name"),
|
|
51
|
+
subscription_count,
|
|
52
|
+
)
|
|
53
|
+
.outerjoin(ProductTable, ProductTable.product_id == SubscriptionTable.product_id)
|
|
54
|
+
.group_by(
|
|
55
|
+
SubscriptionTable.status,
|
|
56
|
+
SubscriptionTable.customer_id,
|
|
57
|
+
SubscriptionTable.insync,
|
|
58
|
+
ProductTable.name,
|
|
59
|
+
)
|
|
60
|
+
.order_by(desc(subscription_count))
|
|
61
|
+
).all()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class SubscriptionCollector(Collector):
|
|
65
|
+
"""Collector for Subscriptions stored in the subscription database.
|
|
66
|
+
|
|
67
|
+
This collector contains one gauge that contains the amount of subscriptions, per every combination of the labels
|
|
68
|
+
that are defined:
|
|
69
|
+
- Product name
|
|
70
|
+
- Subscription lifecycle
|
|
71
|
+
- Customer ID
|
|
72
|
+
- `insync` state
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def collect(self) -> Iterable[Metric]:
|
|
76
|
+
subscriptions = GaugeMetricFamily(
|
|
77
|
+
name="wfo_subscriptions_count",
|
|
78
|
+
labels=[
|
|
79
|
+
"product_name",
|
|
80
|
+
"lifecycle_state",
|
|
81
|
+
"customer_id",
|
|
82
|
+
"insync",
|
|
83
|
+
],
|
|
84
|
+
unit="count",
|
|
85
|
+
documentation="Number of subscriptions per product, lifecycle state, customer, and in sync state.",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
for row in _get_subscriptions():
|
|
89
|
+
subscriptions.add_metric(
|
|
90
|
+
[row.product_name, row.lifecycle_state, row.customer_id, str(row.insync)], row.subscription_count
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return [subscriptions]
|