orchestrator-core 4.2.0rc3__py3-none-any.whl → 4.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +1 -1
- orchestrator/api/api_v1/endpoints/processes.py +1 -10
- orchestrator/graphql/loaders/subscriptions.py +12 -1
- orchestrator/graphql/schemas/subscription.py +4 -0
- orchestrator/graphql/types.py +10 -1
- orchestrator/migrations/__init__.py +0 -0
- orchestrator/migrations/versions/schema/2025-07-28_850dccac3b02_update_description_of_resume_workflows_.py +35 -0
- orchestrator/services/executors/__init__.py +12 -0
- orchestrator/services/{celery.py → executors/celery.py} +39 -51
- orchestrator/services/executors/threadpool.py +128 -0
- orchestrator/services/input_state.py +6 -3
- orchestrator/services/processes.py +45 -53
- orchestrator/services/subscription_relations.py +21 -1
- orchestrator/services/tasks.py +17 -46
- orchestrator/workflows/tasks/resume_workflows.py +71 -15
- {orchestrator_core-4.2.0rc3.dist-info → orchestrator_core-4.3.0.dist-info}/METADATA +1 -1
- {orchestrator_core-4.2.0rc3.dist-info → orchestrator_core-4.3.0.dist-info}/RECORD +19 -15
- {orchestrator_core-4.2.0rc3.dist-info → orchestrator_core-4.3.0.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.2.0rc3.dist-info → orchestrator_core-4.3.0.dist-info}/licenses/LICENSE +0 -0
orchestrator/__init__.py
CHANGED
|
@@ -48,6 +48,7 @@ from orchestrator.services.processes import (
|
|
|
48
48
|
_async_resume_processes,
|
|
49
49
|
_get_process,
|
|
50
50
|
abort_process,
|
|
51
|
+
can_be_resumed,
|
|
51
52
|
continue_awaiting_process,
|
|
52
53
|
load_process,
|
|
53
54
|
resume_process,
|
|
@@ -120,16 +121,6 @@ def get_auth_callbacks(steps: StepList, workflow: Workflow) -> tuple[Authorizer
|
|
|
120
121
|
return auth_resume, auth_retry
|
|
121
122
|
|
|
122
123
|
|
|
123
|
-
def can_be_resumed(status: ProcessStatus) -> bool:
|
|
124
|
-
return status in (
|
|
125
|
-
ProcessStatus.SUSPENDED, # Can be resumed
|
|
126
|
-
ProcessStatus.WAITING, # Can be retried
|
|
127
|
-
ProcessStatus.FAILED, # Can be retried
|
|
128
|
-
ProcessStatus.API_UNAVAILABLE, # subtype of FAILED
|
|
129
|
-
ProcessStatus.INCONSISTENT_DATA, # subtype of FAILED
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
|
|
133
124
|
def resolve_user_name(
|
|
134
125
|
*,
|
|
135
126
|
reporter: Reporter | None,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from datetime import datetime
|
|
1
2
|
from typing import Literal
|
|
2
3
|
from uuid import UUID
|
|
3
4
|
|
|
@@ -8,7 +9,11 @@ from strawberry.dataloader import DataLoader
|
|
|
8
9
|
from orchestrator.db import (
|
|
9
10
|
SubscriptionTable,
|
|
10
11
|
)
|
|
11
|
-
from orchestrator.services.subscription_relations import
|
|
12
|
+
from orchestrator.services.subscription_relations import (
|
|
13
|
+
get_depends_on_subscriptions,
|
|
14
|
+
get_in_use_by_subscriptions,
|
|
15
|
+
get_last_validation_datetimes,
|
|
16
|
+
)
|
|
12
17
|
from orchestrator.types import SubscriptionLifecycle
|
|
13
18
|
|
|
14
19
|
logger = structlog.get_logger(__name__)
|
|
@@ -38,4 +43,10 @@ async def depends_on_subs_loader(keys: list[tuple[UUID, tuple[str, ...]]]) -> li
|
|
|
38
43
|
return await get_depends_on_subscriptions(subscription_ids, filter_statuses)
|
|
39
44
|
|
|
40
45
|
|
|
46
|
+
async def last_validation_datetime_loader(keys: list[UUID]) -> list[datetime | None]:
|
|
47
|
+
"""GraphQL dataloader to efficiently get the last validation datetime for multiple subscription_ids."""
|
|
48
|
+
return await get_last_validation_datetimes(keys)
|
|
49
|
+
|
|
50
|
+
|
|
41
51
|
SubsLoaderType = DataLoader[tuple[UUID, tuple[str, ...]], list[SubscriptionTable]]
|
|
52
|
+
LastValidationLoaderType = DataLoader[UUID, datetime | None]
|
|
@@ -175,6 +175,10 @@ class SubscriptionInterface:
|
|
|
175
175
|
]
|
|
176
176
|
return await resolve_subscriptions(info, filter_by_with_related_subscriptions, sort_by, first, after)
|
|
177
177
|
|
|
178
|
+
@strawberry.field(description="Returns the date and time of the last validation workflow run for a subscription") # type: ignore
|
|
179
|
+
async def last_validated_at(self, info: OrchestratorInfo) -> datetime | None:
|
|
180
|
+
return await info.context.core_last_validation_datetime_loader.load(self.subscription_id)
|
|
181
|
+
|
|
178
182
|
@strawberry.field(description="Returns customer of a subscription") # type: ignore
|
|
179
183
|
def customer(self) -> CustomerType:
|
|
180
184
|
return CustomerType(
|
orchestrator/graphql/types.py
CHANGED
|
@@ -30,7 +30,13 @@ from oauth2_lib.fastapi import AuthManager
|
|
|
30
30
|
from oauth2_lib.strawberry import OauthContext
|
|
31
31
|
from orchestrator.db.filters import Filter
|
|
32
32
|
from orchestrator.db.sorting import Sort, SortOrder
|
|
33
|
-
from orchestrator.graphql.loaders.subscriptions import
|
|
33
|
+
from orchestrator.graphql.loaders.subscriptions import (
|
|
34
|
+
LastValidationLoaderType,
|
|
35
|
+
SubsLoaderType,
|
|
36
|
+
depends_on_subs_loader,
|
|
37
|
+
in_use_by_subs_loader,
|
|
38
|
+
last_validation_datetime_loader,
|
|
39
|
+
)
|
|
34
40
|
from orchestrator.services.process_broadcast_thread import ProcessDataBroadcastThread
|
|
35
41
|
|
|
36
42
|
StrawberryPydanticModel = TypeVar("StrawberryPydanticModel", bound=StrawberryTypeFromPydantic)
|
|
@@ -60,6 +66,9 @@ class OrchestratorContext(OauthContext):
|
|
|
60
66
|
self.graphql_models = graphql_models or {}
|
|
61
67
|
self.core_in_use_by_subs_loader: SubsLoaderType = DataLoader(load_fn=in_use_by_subs_loader)
|
|
62
68
|
self.core_depends_on_subs_loader: SubsLoaderType = DataLoader(load_fn=depends_on_subs_loader)
|
|
69
|
+
self.core_last_validation_datetime_loader: LastValidationLoaderType = DataLoader(
|
|
70
|
+
load_fn=last_validation_datetime_loader
|
|
71
|
+
)
|
|
63
72
|
super().__init__(auth_manager)
|
|
64
73
|
|
|
65
74
|
|
|
File without changes
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Update description of resume workflows task.
|
|
2
|
+
|
|
3
|
+
Revision ID: 850dccac3b02
|
|
4
|
+
Revises: 93fc5834c7e5
|
|
5
|
+
Create Date: 2025-07-28 15:38:57.211087
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
|
|
11
|
+
# revision identifiers, used by Alembic.
|
|
12
|
+
revision = "850dccac3b02"
|
|
13
|
+
down_revision = "93fc5834c7e5"
|
|
14
|
+
branch_labels = None
|
|
15
|
+
depends_on = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upgrade() -> None:
|
|
19
|
+
op.execute(
|
|
20
|
+
"""
|
|
21
|
+
UPDATE workflows
|
|
22
|
+
SET description = 'Resume all workflows that are stuck on tasks with the status ''waiting'', ''created'' or ''resumed'''
|
|
23
|
+
WHERE name = 'task_resume_workflows';
|
|
24
|
+
"""
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def downgrade() -> None:
|
|
29
|
+
op.execute(
|
|
30
|
+
"""
|
|
31
|
+
UPDATE workflows
|
|
32
|
+
SET description = 'Resume all workflows that are stuck on tasks with the status ''waiting'''
|
|
33
|
+
WHERE name = 'task_resume_workflows';
|
|
34
|
+
"""
|
|
35
|
+
)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Copyright 2019-2020 SURF.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
@@ -18,19 +18,22 @@ from uuid import UUID
|
|
|
18
18
|
import structlog
|
|
19
19
|
from celery.result import AsyncResult
|
|
20
20
|
from kombu.exceptions import ConnectionError, OperationalError
|
|
21
|
+
from sqlalchemy import select
|
|
21
22
|
|
|
22
|
-
from oauth2_lib.fastapi import OIDCUserModel
|
|
23
23
|
from orchestrator import app_settings
|
|
24
24
|
from orchestrator.api.error_handling import raise_status
|
|
25
25
|
from orchestrator.db import ProcessTable, db
|
|
26
|
-
from orchestrator.services.
|
|
27
|
-
|
|
26
|
+
from orchestrator.services.processes import (
|
|
27
|
+
SYSTEM_USER,
|
|
28
|
+
can_be_resumed,
|
|
29
|
+
create_process,
|
|
30
|
+
delete_process,
|
|
31
|
+
set_process_status,
|
|
32
|
+
)
|
|
28
33
|
from orchestrator.services.workflows import get_workflow_by_name
|
|
29
|
-
from orchestrator.
|
|
34
|
+
from orchestrator.workflow import ProcessStat, ProcessStatus
|
|
30
35
|
from pydantic_forms.types import State
|
|
31
36
|
|
|
32
|
-
SYSTEM_USER = "SYSTEM"
|
|
33
|
-
|
|
34
37
|
logger = structlog.get_logger(__name__)
|
|
35
38
|
|
|
36
39
|
|
|
@@ -42,29 +45,17 @@ def _block_when_testing(task_result: AsyncResult) -> None:
|
|
|
42
45
|
raise RuntimeError("Celery worker has failed to resume process")
|
|
43
46
|
|
|
44
47
|
|
|
45
|
-
def _celery_start_process(
|
|
46
|
-
workflow_key: str,
|
|
47
|
-
user_inputs: list[State] | None,
|
|
48
|
-
user: str = SYSTEM_USER,
|
|
49
|
-
user_model: OIDCUserModel | None = None,
|
|
50
|
-
**kwargs: Any,
|
|
51
|
-
) -> UUID:
|
|
48
|
+
def _celery_start_process(pstat: ProcessStat, user: str = SYSTEM_USER, **kwargs: Any) -> UUID:
|
|
52
49
|
"""Client side call of Celery."""
|
|
53
50
|
from orchestrator.services.tasks import NEW_TASK, NEW_WORKFLOW, get_celery_task
|
|
54
51
|
|
|
55
|
-
|
|
56
|
-
if not workflow:
|
|
57
|
-
raise_status(HTTPStatus.NOT_FOUND, "Workflow does not exist")
|
|
58
|
-
|
|
59
|
-
wf_table = get_workflow_by_name(workflow.name)
|
|
60
|
-
if not wf_table:
|
|
52
|
+
if not (wf_table := get_workflow_by_name(pstat.workflow.name)):
|
|
61
53
|
raise_status(HTTPStatus.NOT_FOUND, "Workflow in Database does not exist")
|
|
62
54
|
|
|
63
55
|
task_name = NEW_TASK if wf_table.is_task else NEW_WORKFLOW
|
|
64
56
|
trigger_task = get_celery_task(task_name)
|
|
65
|
-
pstat = create_process(workflow_key, user_inputs=user_inputs, user=user, user_model=user_model)
|
|
66
57
|
try:
|
|
67
|
-
result = trigger_task.delay(pstat.process_id,
|
|
58
|
+
result = trigger_task.delay(pstat.process_id, user)
|
|
68
59
|
_block_when_testing(result)
|
|
69
60
|
return pstat.process_id
|
|
70
61
|
except (ConnectionError, OperationalError) as e:
|
|
@@ -77,65 +68,62 @@ def _celery_start_process(
|
|
|
77
68
|
def _celery_resume_process(
|
|
78
69
|
process: ProcessTable,
|
|
79
70
|
*,
|
|
80
|
-
user_inputs: list[State] | None = None,
|
|
81
71
|
user: str | None = None,
|
|
82
72
|
**kwargs: Any,
|
|
83
|
-
) ->
|
|
73
|
+
) -> bool:
|
|
84
74
|
"""Client side call of Celery."""
|
|
85
|
-
from orchestrator.services.processes import load_process
|
|
86
75
|
from orchestrator.services.tasks import RESUME_TASK, RESUME_WORKFLOW, get_celery_task
|
|
87
76
|
|
|
88
|
-
pstat = load_process(process)
|
|
89
77
|
last_process_status = process.last_status
|
|
90
|
-
workflow = pstat.workflow
|
|
91
|
-
|
|
92
|
-
wf_table = get_workflow_by_name(workflow.name)
|
|
93
|
-
if not workflow or not wf_table:
|
|
94
|
-
raise_status(HTTPStatus.NOT_FOUND, "Workflow does not exist")
|
|
95
78
|
|
|
96
|
-
task_name = RESUME_TASK if
|
|
79
|
+
task_name = RESUME_TASK if process.workflow.is_task else RESUME_WORKFLOW
|
|
97
80
|
trigger_task = get_celery_task(task_name)
|
|
98
81
|
|
|
99
|
-
|
|
100
|
-
|
|
82
|
+
_celery_set_process_status_resumed(process.process_id)
|
|
83
|
+
|
|
101
84
|
try:
|
|
102
|
-
|
|
103
|
-
result = trigger_task.delay(pstat.process_id, user)
|
|
85
|
+
result = trigger_task.delay(process.process_id, user)
|
|
104
86
|
_block_when_testing(result)
|
|
105
87
|
|
|
106
|
-
return
|
|
88
|
+
return process.process_id
|
|
107
89
|
except (ConnectionError, OperationalError) as e:
|
|
108
90
|
logger.warning(
|
|
109
91
|
"Connection error when submitting task to celery. Resetting process status back",
|
|
110
92
|
current_status=process.last_status,
|
|
111
93
|
last_status=last_process_status,
|
|
112
94
|
)
|
|
113
|
-
|
|
95
|
+
set_process_status(process.process_id, last_process_status)
|
|
114
96
|
raise e
|
|
115
97
|
|
|
116
98
|
|
|
117
|
-
def
|
|
118
|
-
process
|
|
119
|
-
db.session.add(process)
|
|
120
|
-
db.session.commit()
|
|
121
|
-
|
|
99
|
+
def _celery_set_process_status_resumed(process_id: UUID) -> None:
|
|
100
|
+
"""Set the process status to RESUMED to show its waiting to be picked up by a worker.
|
|
122
101
|
|
|
123
|
-
|
|
124
|
-
|
|
102
|
+
uses with_for_update to lock the subscription in a transaction, preventing other changes.
|
|
103
|
+
rolls back transation and raises an exception when it can't change to RESUMED to prevent it from being added to the queue.
|
|
125
104
|
|
|
126
105
|
Args:
|
|
127
|
-
|
|
106
|
+
process_id: Process ID to fetch process from DB
|
|
128
107
|
"""
|
|
129
|
-
|
|
130
|
-
|
|
108
|
+
stmt = select(ProcessTable).where(ProcessTable.process_id == process_id).with_for_update()
|
|
109
|
+
|
|
110
|
+
result = db.session.execute(stmt)
|
|
111
|
+
locked_process = result.scalar_one_or_none()
|
|
112
|
+
|
|
113
|
+
if not locked_process:
|
|
114
|
+
raise ValueError(f"Process not found: {process_id}")
|
|
131
115
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
116
|
+
if can_be_resumed(locked_process.last_status):
|
|
117
|
+
locked_process.last_status = ProcessStatus.RESUMED
|
|
118
|
+
db.session.commit()
|
|
119
|
+
else:
|
|
120
|
+
db.session.rollback()
|
|
121
|
+
raise ValueError(f"Process has incorrect status to resume: {locked_process.last_status}")
|
|
135
122
|
|
|
136
123
|
|
|
137
124
|
def _celery_validate(validation_workflow: str, json: list[State] | None) -> None:
|
|
138
|
-
|
|
125
|
+
pstat = create_process(validation_workflow, user_inputs=json)
|
|
126
|
+
_celery_start_process(pstat)
|
|
139
127
|
|
|
140
128
|
|
|
141
129
|
CELERY_EXECUTION_CONTEXT: dict[str, Callable] = {
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT, ESnet.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
from collections.abc import Callable
|
|
14
|
+
from functools import partial
|
|
15
|
+
from uuid import UUID
|
|
16
|
+
|
|
17
|
+
import structlog
|
|
18
|
+
from sqlalchemy import select
|
|
19
|
+
|
|
20
|
+
from oauth2_lib.fastapi import OIDCUserModel
|
|
21
|
+
from orchestrator.db import ProcessTable, db
|
|
22
|
+
from orchestrator.services.input_state import InputType, retrieve_input_state
|
|
23
|
+
from orchestrator.services.processes import (
|
|
24
|
+
RESUME_WORKFLOW_REMOVED_ERROR_MSG,
|
|
25
|
+
START_WORKFLOW_REMOVED_ERROR_MSG,
|
|
26
|
+
SYSTEM_USER,
|
|
27
|
+
StateMerger,
|
|
28
|
+
_run_process_async,
|
|
29
|
+
create_process,
|
|
30
|
+
load_process,
|
|
31
|
+
safe_logstep,
|
|
32
|
+
)
|
|
33
|
+
from orchestrator.types import BroadcastFunc
|
|
34
|
+
from orchestrator.workflow import (
|
|
35
|
+
ProcessStat,
|
|
36
|
+
ProcessStatus,
|
|
37
|
+
runwf,
|
|
38
|
+
)
|
|
39
|
+
from orchestrator.workflows.removed_workflow import removed_workflow
|
|
40
|
+
from pydantic_forms.types import State
|
|
41
|
+
|
|
42
|
+
logger = structlog.get_logger(__name__)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _set_process_status_running(process_id: UUID) -> None:
|
|
46
|
+
"""Set the process status to RUNNING to prevent it from being picked up by mutliple workers.
|
|
47
|
+
|
|
48
|
+
uses with_for_update to lock the subscription in a transaction, preventing other changes.
|
|
49
|
+
rolls back transation and raises an exception when its already on status RUNNING to prevent worker from running an already running process
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
process_id: Process ID to fetch process from DB
|
|
53
|
+
"""
|
|
54
|
+
stmt = select(ProcessTable).where(ProcessTable.process_id == process_id).with_for_update()
|
|
55
|
+
|
|
56
|
+
result = db.session.execute(stmt)
|
|
57
|
+
locked_process = result.scalar_one_or_none()
|
|
58
|
+
|
|
59
|
+
if not locked_process:
|
|
60
|
+
db.session.rollback()
|
|
61
|
+
raise ValueError(f"Process not found: {process_id}")
|
|
62
|
+
|
|
63
|
+
if locked_process.last_status is not ProcessStatus.RUNNING:
|
|
64
|
+
locked_process.last_status = ProcessStatus.RUNNING
|
|
65
|
+
db.session.commit()
|
|
66
|
+
else:
|
|
67
|
+
db.session.rollback()
|
|
68
|
+
raise Exception("Process is already running")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def thread_start_process(
|
|
72
|
+
pstat: ProcessStat,
|
|
73
|
+
user: str = SYSTEM_USER,
|
|
74
|
+
user_model: OIDCUserModel | None = None,
|
|
75
|
+
broadcast_func: BroadcastFunc | None = None,
|
|
76
|
+
) -> UUID:
|
|
77
|
+
if pstat.workflow == removed_workflow:
|
|
78
|
+
raise ValueError(START_WORKFLOW_REMOVED_ERROR_MSG)
|
|
79
|
+
|
|
80
|
+
# enforce an update to the process status to properly show the process
|
|
81
|
+
_set_process_status_running(pstat.process_id)
|
|
82
|
+
|
|
83
|
+
input_data = retrieve_input_state(pstat.process_id, "initial_state", False)
|
|
84
|
+
pstat.update(state=pstat.state.map(lambda state: StateMerger.merge(state, input_data.input_state)))
|
|
85
|
+
|
|
86
|
+
_safe_logstep_with_func = partial(safe_logstep, broadcast_func=broadcast_func)
|
|
87
|
+
return _run_process_async(pstat.process_id, lambda: runwf(pstat, _safe_logstep_with_func))
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def thread_resume_process(
|
|
91
|
+
process: ProcessTable,
|
|
92
|
+
*,
|
|
93
|
+
user: str | None = None,
|
|
94
|
+
user_model: OIDCUserModel | None = None,
|
|
95
|
+
broadcast_func: BroadcastFunc | None = None,
|
|
96
|
+
) -> UUID:
|
|
97
|
+
# ATTENTION!! When modifying this function make sure you make similar changes to `resume_workflow` in the test code
|
|
98
|
+
pstat = load_process(process)
|
|
99
|
+
if pstat.workflow == removed_workflow:
|
|
100
|
+
raise ValueError(RESUME_WORKFLOW_REMOVED_ERROR_MSG)
|
|
101
|
+
|
|
102
|
+
if user:
|
|
103
|
+
pstat.update(current_user=user)
|
|
104
|
+
|
|
105
|
+
# retrieve_input_str is for the edge case when workflow engine stops whilst there is an existing 'CREATED' process queue'ed.
|
|
106
|
+
# It will have become a `RUNNING` process that gets resumed and this should fetch initial_state instead of user_input.
|
|
107
|
+
retrieve_input_str: InputType = "user_input" if process.steps else "initial_state"
|
|
108
|
+
input_data = retrieve_input_state(process.process_id, retrieve_input_str, False)
|
|
109
|
+
pstat.update(state=pstat.state.map(lambda state: StateMerger.merge(state, input_data.input_state)))
|
|
110
|
+
|
|
111
|
+
# enforce an update to the process status to properly show the process
|
|
112
|
+
_set_process_status_running(process.process_id)
|
|
113
|
+
|
|
114
|
+
_safe_logstep_prep = partial(safe_logstep, broadcast_func=broadcast_func)
|
|
115
|
+
_run_process_async(pstat.process_id, lambda: runwf(pstat, _safe_logstep_prep))
|
|
116
|
+
return pstat.process_id
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def thread_validate_workflow(validation_workflow: str, json: list[State] | None) -> UUID:
|
|
120
|
+
pstat = create_process(validation_workflow, user_inputs=json)
|
|
121
|
+
return thread_start_process(pstat)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
THREADPOOL_EXECUTION_CONTEXT: dict[str, Callable] = {
|
|
125
|
+
"start": thread_start_process,
|
|
126
|
+
"resume": thread_resume_process,
|
|
127
|
+
"validate": thread_validate_workflow,
|
|
128
|
+
}
|
|
@@ -24,12 +24,13 @@ logger = structlog.get_logger(__name__)
|
|
|
24
24
|
InputType = Literal["initial_state", "user_input"]
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def retrieve_input_state(process_id: UUID, input_type: InputType) -> InputStateTable:
|
|
27
|
+
def retrieve_input_state(process_id: UUID, input_type: InputType, raise_exception: bool = True) -> InputStateTable:
|
|
28
28
|
"""Get user input.
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
31
|
process_id: Process ID
|
|
32
32
|
input_type: The type of the input.
|
|
33
|
+
raise_exception: boolean to throw error when not finding data or not
|
|
33
34
|
|
|
34
35
|
Returns:
|
|
35
36
|
User input table
|
|
@@ -40,13 +41,15 @@ def retrieve_input_state(process_id: UUID, input_type: InputType) -> InputStateT
|
|
|
40
41
|
select(InputStateTable)
|
|
41
42
|
.filter(InputStateTable.process_id == process_id)
|
|
42
43
|
.filter(InputStateTable.input_type == input_type)
|
|
43
|
-
.order_by(InputStateTable.input_time.
|
|
44
|
+
.order_by(InputStateTable.input_time.desc())
|
|
44
45
|
).first()
|
|
45
46
|
|
|
46
47
|
if res:
|
|
47
48
|
logger.debug("Retrieved input state", process_id=process_id, input_state=res, input_type=input_type)
|
|
48
49
|
return res
|
|
49
|
-
|
|
50
|
+
if raise_exception:
|
|
51
|
+
raise ValueError(f"No input state for pid: {process_id}")
|
|
52
|
+
return InputStateTable(input_state={})
|
|
50
53
|
|
|
51
54
|
|
|
52
55
|
def store_input_state(
|
|
@@ -51,7 +51,6 @@ from orchestrator.workflow import (
|
|
|
51
51
|
Success,
|
|
52
52
|
Workflow,
|
|
53
53
|
abort_wf,
|
|
54
|
-
runwf,
|
|
55
54
|
)
|
|
56
55
|
from orchestrator.workflow import Process as WFProcess
|
|
57
56
|
from orchestrator.workflows import get_workflow
|
|
@@ -68,13 +67,18 @@ SYSTEM_USER = "SYSTEM"
|
|
|
68
67
|
|
|
69
68
|
_workflow_executor = None
|
|
70
69
|
|
|
70
|
+
START_WORKFLOW_REMOVED_ERROR_MSG = "This workflow cannot be started because it has been removed"
|
|
71
|
+
RESUME_WORKFLOW_REMOVED_ERROR_MSG = "This workflow cannot be resumed because it has been removed"
|
|
72
|
+
|
|
71
73
|
|
|
72
74
|
def get_execution_context() -> dict[str, Callable]:
|
|
73
75
|
if app_settings.EXECUTOR == ExecutorType.WORKER:
|
|
74
|
-
from orchestrator.services.celery import CELERY_EXECUTION_CONTEXT
|
|
76
|
+
from orchestrator.services.executors.celery import CELERY_EXECUTION_CONTEXT
|
|
75
77
|
|
|
76
78
|
return CELERY_EXECUTION_CONTEXT
|
|
77
79
|
|
|
80
|
+
from orchestrator.services.executors.threadpool import THREADPOOL_EXECUTION_CONTEXT
|
|
81
|
+
|
|
78
82
|
return THREADPOOL_EXECUTION_CONTEXT
|
|
79
83
|
|
|
80
84
|
|
|
@@ -449,7 +453,6 @@ def create_process(
|
|
|
449
453
|
}
|
|
450
454
|
|
|
451
455
|
try:
|
|
452
|
-
|
|
453
456
|
state = post_form(workflow.initial_input_form, initial_state, user_inputs)
|
|
454
457
|
except FormValidationError:
|
|
455
458
|
logger.exception("Validation errors", user_inputs=user_inputs)
|
|
@@ -469,19 +472,6 @@ def create_process(
|
|
|
469
472
|
return pstat
|
|
470
473
|
|
|
471
474
|
|
|
472
|
-
def thread_start_process(
|
|
473
|
-
workflow_key: str,
|
|
474
|
-
user_inputs: list[State] | None = None,
|
|
475
|
-
user: str = SYSTEM_USER,
|
|
476
|
-
user_model: OIDCUserModel | None = None,
|
|
477
|
-
broadcast_func: BroadcastFunc | None = None,
|
|
478
|
-
) -> UUID:
|
|
479
|
-
pstat = create_process(workflow_key, user_inputs=user_inputs, user=user, user_model=user_model)
|
|
480
|
-
|
|
481
|
-
_safe_logstep_with_func = partial(safe_logstep, broadcast_func=broadcast_func)
|
|
482
|
-
return _run_process_async(pstat.process_id, lambda: runwf(pstat, _safe_logstep_with_func))
|
|
483
|
-
|
|
484
|
-
|
|
485
475
|
def start_process(
|
|
486
476
|
workflow_key: str,
|
|
487
477
|
user_inputs: list[State] | None = None,
|
|
@@ -502,57 +492,47 @@ def start_process(
|
|
|
502
492
|
process id
|
|
503
493
|
|
|
504
494
|
"""
|
|
495
|
+
pstat = create_process(workflow_key, user_inputs=user_inputs, user=user)
|
|
496
|
+
|
|
505
497
|
start_func = get_execution_context()["start"]
|
|
506
|
-
return start_func(
|
|
507
|
-
workflow_key, user_inputs=user_inputs, user=user, user_model=user_model, broadcast_func=broadcast_func
|
|
508
|
-
)
|
|
498
|
+
return start_func(pstat, user=user, user_model=user_model, broadcast_func=broadcast_func)
|
|
509
499
|
|
|
510
500
|
|
|
511
|
-
def
|
|
501
|
+
def restart_process(
|
|
512
502
|
process: ProcessTable,
|
|
513
503
|
*,
|
|
514
|
-
user_inputs: list[State] | None = None,
|
|
515
504
|
user: str | None = None,
|
|
516
505
|
broadcast_func: BroadcastFunc | None = None,
|
|
517
506
|
) -> UUID:
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
if user_inputs is None:
|
|
521
|
-
user_inputs = [{}]
|
|
522
|
-
|
|
523
|
-
pstat = load_process(process)
|
|
524
|
-
|
|
525
|
-
if pstat.workflow == removed_workflow:
|
|
526
|
-
raise ValueError("This workflow cannot be resumed")
|
|
507
|
+
"""Restart a process that is stuck on status CREATED.
|
|
527
508
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
509
|
+
Args:
|
|
510
|
+
process: Process from database
|
|
511
|
+
user: user who resumed this process
|
|
512
|
+
broadcast_func: Optional function to broadcast process data
|
|
531
513
|
|
|
532
|
-
|
|
533
|
-
|
|
514
|
+
Returns:
|
|
515
|
+
process id
|
|
534
516
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
store_input_state(pstat.process_id, user_input, "user_input")
|
|
538
|
-
# enforce an update to the process status to properly show the process
|
|
539
|
-
process.last_status = ProcessStatus.RUNNING
|
|
540
|
-
db.session.add(process)
|
|
541
|
-
db.session.commit()
|
|
517
|
+
"""
|
|
518
|
+
pstat = load_process(process)
|
|
542
519
|
|
|
543
|
-
|
|
544
|
-
return
|
|
520
|
+
start_func = get_execution_context()["start"]
|
|
521
|
+
return start_func(pstat, user=user, broadcast_func=broadcast_func)
|
|
545
522
|
|
|
546
523
|
|
|
547
|
-
|
|
548
|
-
|
|
524
|
+
RESUMABLE_STATUSES = (
|
|
525
|
+
ProcessStatus.SUSPENDED, # Can be resumed
|
|
526
|
+
ProcessStatus.WAITING, # Can be retried
|
|
527
|
+
ProcessStatus.FAILED, # Can be retried
|
|
528
|
+
ProcessStatus.API_UNAVAILABLE, # subtype of FAILED
|
|
529
|
+
ProcessStatus.INCONSISTENT_DATA, # subtype of FAILED
|
|
530
|
+
ProcessStatus.RESUMED, # re-resume stuck process
|
|
531
|
+
)
|
|
549
532
|
|
|
550
533
|
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
"resume": thread_resume_process,
|
|
554
|
-
"validate": thread_validate_workflow,
|
|
555
|
-
}
|
|
534
|
+
def can_be_resumed(status: ProcessStatus) -> bool:
|
|
535
|
+
return status in RESUMABLE_STATUSES
|
|
556
536
|
|
|
557
537
|
|
|
558
538
|
def resume_process(
|
|
@@ -576,14 +556,19 @@ def resume_process(
|
|
|
576
556
|
"""
|
|
577
557
|
pstat = load_process(process)
|
|
578
558
|
|
|
559
|
+
if pstat.workflow == removed_workflow:
|
|
560
|
+
raise ValueError(RESUME_WORKFLOW_REMOVED_ERROR_MSG)
|
|
561
|
+
|
|
579
562
|
try:
|
|
580
|
-
post_form(pstat.log[0].form, pstat.state.unwrap(), user_inputs=user_inputs or [])
|
|
563
|
+
user_input = post_form(pstat.log[0].form, pstat.state.unwrap(), user_inputs=user_inputs or [{}])
|
|
581
564
|
except FormValidationError:
|
|
582
565
|
logger.exception("Validation errors", user_inputs=user_inputs)
|
|
583
566
|
raise
|
|
584
567
|
|
|
568
|
+
store_input_state(pstat.process_id, user_input, "user_input")
|
|
569
|
+
|
|
585
570
|
resume_func = get_execution_context()["resume"]
|
|
586
|
-
return resume_func(process,
|
|
571
|
+
return resume_func(process, user=user, broadcast_func=broadcast_func)
|
|
587
572
|
|
|
588
573
|
|
|
589
574
|
def ensure_correct_callback_token(pstat: ProcessStat, *, token: str) -> None:
|
|
@@ -809,6 +794,12 @@ def _get_running_processes() -> list[ProcessTable]:
|
|
|
809
794
|
return list(db.session.scalars(stmt))
|
|
810
795
|
|
|
811
796
|
|
|
797
|
+
def set_process_status(process: ProcessTable, status: ProcessStatus) -> None:
|
|
798
|
+
process.last_status = status
|
|
799
|
+
db.session.add(process)
|
|
800
|
+
db.session.commit()
|
|
801
|
+
|
|
802
|
+
|
|
812
803
|
def marshall_processes(engine_settings: EngineSettingsTable, new_global_lock: bool) -> EngineSettingsTable | None:
|
|
813
804
|
"""Manage processes depending on the engine status.
|
|
814
805
|
|
|
@@ -831,6 +822,7 @@ def marshall_processes(engine_settings: EngineSettingsTable, new_global_lock: bo
|
|
|
831
822
|
|
|
832
823
|
# Resume all the running processes
|
|
833
824
|
for process in _get_running_processes():
|
|
825
|
+
set_process_status(process, ProcessStatus.RESUMED)
|
|
834
826
|
resume_process(process, user=SYSTEM_USER)
|
|
835
827
|
|
|
836
828
|
elif not engine_settings.global_lock and new_global_lock:
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
+
from datetime import datetime
|
|
1
2
|
from itertools import chain
|
|
2
3
|
from typing import Any, Awaitable, Callable, NamedTuple
|
|
3
4
|
from uuid import UUID
|
|
4
5
|
|
|
5
6
|
import structlog
|
|
6
7
|
from more_itertools import flatten, unique_everseen
|
|
7
|
-
from sqlalchemy import Row, select
|
|
8
|
+
from sqlalchemy import Row, func, select
|
|
8
9
|
from sqlalchemy import Text as SaText
|
|
9
10
|
from sqlalchemy import cast as sa_cast
|
|
10
11
|
from sqlalchemy.orm import aliased
|
|
11
12
|
|
|
12
13
|
from orchestrator.db import (
|
|
14
|
+
ProcessSubscriptionTable,
|
|
15
|
+
ProcessTable,
|
|
13
16
|
ResourceTypeTable,
|
|
14
17
|
SubscriptionInstanceTable,
|
|
15
18
|
SubscriptionInstanceValueTable,
|
|
@@ -267,3 +270,20 @@ async def get_recursive_relations(
|
|
|
267
270
|
relation_fetcher=relation_fetcher,
|
|
268
271
|
)
|
|
269
272
|
return list(unique_everseen(relations + nested_relations, key=lambda s: s.subscription_id))
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
async def get_last_validation_datetimes(subscription_ids: list[UUID]) -> list[datetime | None]:
|
|
276
|
+
stmt = (
|
|
277
|
+
select(ProcessSubscriptionTable.subscription_id, func.max(ProcessTable.last_modified_at))
|
|
278
|
+
.join(ProcessSubscriptionTable)
|
|
279
|
+
.group_by(ProcessSubscriptionTable.subscription_id)
|
|
280
|
+
.where(
|
|
281
|
+
(ProcessSubscriptionTable.workflow_target == "VALIDATE")
|
|
282
|
+
& ProcessSubscriptionTable.subscription_id.in_(subscription_ids)
|
|
283
|
+
)
|
|
284
|
+
)
|
|
285
|
+
results = db.session.execute(stmt).all()
|
|
286
|
+
last_validation_indexed_by_sub_id = {
|
|
287
|
+
str(subscription_id): last_validation for subscription_id, last_validation in results
|
|
288
|
+
}
|
|
289
|
+
return [last_validation_indexed_by_sub_id.get(str(subscription_id), None) for subscription_id in subscription_ids]
|
orchestrator/services/tasks.py
CHANGED
|
@@ -11,8 +11,6 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
from functools import partial
|
|
14
|
-
from http import HTTPStatus
|
|
15
|
-
from typing import Any
|
|
16
14
|
from uuid import UUID
|
|
17
15
|
|
|
18
16
|
import structlog
|
|
@@ -21,21 +19,12 @@ from celery.app.control import Inspect
|
|
|
21
19
|
from celery.utils.log import get_task_logger
|
|
22
20
|
from kombu.serialization import registry
|
|
23
21
|
|
|
24
|
-
from orchestrator.api.error_handling import raise_status
|
|
25
22
|
from orchestrator.schemas.engine_settings import WorkerStatus
|
|
26
|
-
from orchestrator.services.
|
|
27
|
-
from orchestrator.services.processes import
|
|
28
|
-
_get_process,
|
|
29
|
-
_run_process_async,
|
|
30
|
-
ensure_correct_process_status,
|
|
31
|
-
safe_logstep,
|
|
32
|
-
thread_resume_process,
|
|
33
|
-
)
|
|
23
|
+
from orchestrator.services.executors.threadpool import thread_resume_process, thread_start_process
|
|
24
|
+
from orchestrator.services.processes import _get_process, ensure_correct_process_status, load_process
|
|
34
25
|
from orchestrator.types import BroadcastFunc
|
|
35
26
|
from orchestrator.utils.json import json_dumps, json_loads
|
|
36
|
-
from orchestrator.workflow import
|
|
37
|
-
from orchestrator.workflows import get_workflow
|
|
38
|
-
from pydantic_forms.types import State
|
|
27
|
+
from orchestrator.workflow import ProcessStatus
|
|
39
28
|
|
|
40
29
|
logger = get_task_logger(__name__)
|
|
41
30
|
|
|
@@ -78,24 +67,12 @@ def initialise_celery(celery: Celery) -> None: # noqa: C901
|
|
|
78
67
|
|
|
79
68
|
process_broadcast_fn: BroadcastFunc | None = getattr(celery, "process_broadcast_fn", None)
|
|
80
69
|
|
|
81
|
-
def start_process(process_id: UUID,
|
|
70
|
+
def start_process(process_id: UUID, user: str) -> UUID | None:
|
|
82
71
|
try:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
if not workflow:
|
|
86
|
-
raise_status(HTTPStatus.NOT_FOUND, "Workflow does not exist")
|
|
87
|
-
|
|
88
|
-
pstat = ProcessStat(
|
|
89
|
-
process_id,
|
|
90
|
-
workflow=workflow,
|
|
91
|
-
state=Success(state),
|
|
92
|
-
log=workflow.steps,
|
|
93
|
-
current_user=user,
|
|
94
|
-
)
|
|
95
|
-
|
|
72
|
+
process = _get_process(process_id)
|
|
73
|
+
pstat = load_process(process)
|
|
96
74
|
ensure_correct_process_status(process_id, ProcessStatus.CREATED)
|
|
97
|
-
|
|
98
|
-
process_id = _run_process_async(pstat.process_id, lambda: runwf(pstat, safe_logstep_with_func))
|
|
75
|
+
thread_start_process(pstat, user=user, broadcast_func=process_broadcast_fn)
|
|
99
76
|
|
|
100
77
|
except Exception as exc:
|
|
101
78
|
local_logger.error("Worker failed to execute workflow", process_id=process_id, details=str(exc))
|
|
@@ -103,13 +80,11 @@ def initialise_celery(celery: Celery) -> None: # noqa: C901
|
|
|
103
80
|
else:
|
|
104
81
|
return process_id
|
|
105
82
|
|
|
106
|
-
def resume_process(process_id: UUID,
|
|
83
|
+
def resume_process(process_id: UUID, user: str) -> UUID | None:
|
|
107
84
|
try:
|
|
108
85
|
process = _get_process(process_id)
|
|
109
86
|
ensure_correct_process_status(process_id, ProcessStatus.RESUMED)
|
|
110
|
-
|
|
111
|
-
process, user_inputs=user_inputs, user=user, broadcast_func=process_broadcast_fn
|
|
112
|
-
)
|
|
87
|
+
thread_resume_process(process, user=user, broadcast_func=process_broadcast_fn)
|
|
113
88
|
except Exception as exc:
|
|
114
89
|
local_logger.error("Worker failed to resume workflow", process_id=process_id, details=str(exc))
|
|
115
90
|
return None
|
|
@@ -119,28 +94,24 @@ def initialise_celery(celery: Celery) -> None: # noqa: C901
|
|
|
119
94
|
celery_task = partial(celery.task, log=local_logger, serializer="orchestrator-json")
|
|
120
95
|
|
|
121
96
|
@celery_task(name=NEW_TASK) # type: ignore
|
|
122
|
-
def new_task(process_id
|
|
123
|
-
local_logger.info("Start task", process_id=process_id
|
|
124
|
-
|
|
125
|
-
return start_process(process_id, workflow_key, state=state, user=user)
|
|
97
|
+
def new_task(process_id: UUID, user: str) -> UUID | None:
|
|
98
|
+
local_logger.info("Start task", process_id=process_id)
|
|
99
|
+
return start_process(process_id, user=user)
|
|
126
100
|
|
|
127
101
|
@celery_task(name=NEW_WORKFLOW) # type: ignore
|
|
128
|
-
def new_workflow(process_id
|
|
129
|
-
local_logger.info("Start workflow", process_id=process_id
|
|
130
|
-
|
|
131
|
-
return start_process(process_id, workflow_key, state=state, user=user)
|
|
102
|
+
def new_workflow(process_id: UUID, user: str) -> UUID | None:
|
|
103
|
+
local_logger.info("Start workflow", process_id=process_id)
|
|
104
|
+
return start_process(process_id, user=user)
|
|
132
105
|
|
|
133
106
|
@celery_task(name=RESUME_TASK) # type: ignore
|
|
134
107
|
def resume_task(process_id: UUID, user: str) -> UUID | None:
|
|
135
108
|
local_logger.info("Resume task", process_id=process_id)
|
|
136
|
-
|
|
137
|
-
return resume_process(process_id, user_inputs=state, user=user)
|
|
109
|
+
return resume_process(process_id, user=user)
|
|
138
110
|
|
|
139
111
|
@celery_task(name=RESUME_WORKFLOW) # type: ignore
|
|
140
112
|
def resume_workflow(process_id: UUID, user: str) -> UUID | None:
|
|
141
113
|
local_logger.info("Resume workflow", process_id=process_id)
|
|
142
|
-
|
|
143
|
-
return resume_process(process_id, user_inputs=state, user=user)
|
|
114
|
+
return resume_process(process_id, user=user)
|
|
144
115
|
|
|
145
116
|
|
|
146
117
|
class CeleryJobWorkerStatus(WorkerStatus):
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
|
-
|
|
13
|
+
from uuid import UUID
|
|
14
14
|
|
|
15
15
|
import structlog
|
|
16
16
|
from sqlalchemy import select
|
|
@@ -24,36 +24,92 @@ from pydantic_forms.types import State, UUIDstr
|
|
|
24
24
|
logger = structlog.get_logger(__name__)
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
def get_process_ids_by_process_statuses(process_statuses: list[ProcessStatus], exclude_ids: list[UUID]) -> list:
|
|
28
|
+
return list(
|
|
29
|
+
db.session.scalars(
|
|
30
|
+
select(ProcessTable.process_id).filter(
|
|
31
|
+
ProcessTable.last_status.in_(process_statuses), ProcessTable.process_id.not_in(exclude_ids)
|
|
32
|
+
)
|
|
33
|
+
)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
27
37
|
@step("Find waiting workflows")
|
|
28
|
-
def find_waiting_workflows() -> State:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
return {
|
|
38
|
+
def find_waiting_workflows(process_id: UUID) -> State:
|
|
39
|
+
created_process_ids = get_process_ids_by_process_statuses([ProcessStatus.CREATED], exclude_ids=[process_id])
|
|
40
|
+
resumed_process_ids = get_process_ids_by_process_statuses([ProcessStatus.RESUMED], exclude_ids=[process_id])
|
|
41
|
+
waiting_process_ids = get_process_ids_by_process_statuses([ProcessStatus.WAITING], exclude_ids=[process_id])
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
"number_of_waiting_processes": len(waiting_process_ids),
|
|
45
|
+
"waiting_process_ids": waiting_process_ids,
|
|
46
|
+
"created_processes_stuck": len(created_process_ids),
|
|
47
|
+
"created_state_process_ids": created_process_ids,
|
|
48
|
+
"resumed_processes_stuck": len(resumed_process_ids),
|
|
49
|
+
"resumed_state_process_ids": resumed_process_ids,
|
|
50
|
+
}
|
|
34
51
|
|
|
35
52
|
|
|
36
53
|
@step("Resume found workflows")
|
|
37
|
-
def resume_found_workflows(
|
|
38
|
-
|
|
39
|
-
|
|
54
|
+
def resume_found_workflows(
|
|
55
|
+
waiting_process_ids: list[UUIDstr],
|
|
56
|
+
resumed_state_process_ids: list[UUIDstr],
|
|
57
|
+
) -> State:
|
|
58
|
+
resume_processes = waiting_process_ids + resumed_state_process_ids
|
|
59
|
+
|
|
60
|
+
resumed_process_ids: list = []
|
|
61
|
+
for process_id in resume_processes:
|
|
40
62
|
try:
|
|
41
63
|
process = db.session.get(ProcessTable, process_id)
|
|
42
64
|
if not process:
|
|
43
65
|
continue
|
|
66
|
+
|
|
44
67
|
# Workaround the commit disable function
|
|
45
68
|
db.session.info["disabled"] = False
|
|
69
|
+
|
|
46
70
|
processes.resume_process(process)
|
|
47
71
|
resumed_process_ids.append(process_id)
|
|
48
|
-
except Exception:
|
|
49
|
-
logger.
|
|
72
|
+
except Exception as exc:
|
|
73
|
+
logger.warning("Could not resume process", process_id=process_id, error=str(exc))
|
|
74
|
+
finally:
|
|
75
|
+
# Make sure to turn it on again
|
|
76
|
+
db.session.info["disabled"] = True
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
"number_of_resumed_process_ids": len(resumed_process_ids),
|
|
80
|
+
"resumed_process_ids": resumed_process_ids,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@step("Restart found CREATED workflows")
|
|
85
|
+
def restart_created_workflows(created_state_process_ids: list[UUIDstr]) -> State:
|
|
86
|
+
started_process_ids = []
|
|
87
|
+
for process_id in created_state_process_ids:
|
|
88
|
+
try:
|
|
89
|
+
process = db.session.get(ProcessTable, process_id)
|
|
90
|
+
if not process:
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
# Workaround the commit disable function
|
|
94
|
+
db.session.info["disabled"] = False
|
|
95
|
+
|
|
96
|
+
processes.restart_process(process)
|
|
97
|
+
started_process_ids.append(process_id)
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
logger.warning("Could not resume process", process_id=process_id, error=str(exc))
|
|
50
100
|
finally:
|
|
51
101
|
# Make sure to turn it on again
|
|
52
102
|
db.session.info["disabled"] = True
|
|
53
103
|
|
|
54
|
-
return {
|
|
104
|
+
return {
|
|
105
|
+
"number_of_started_process_ids": len(started_process_ids),
|
|
106
|
+
"started_process_ids": started_process_ids,
|
|
107
|
+
}
|
|
55
108
|
|
|
56
109
|
|
|
57
|
-
@workflow(
|
|
110
|
+
@workflow(
|
|
111
|
+
"Resume all workflows that are stuck on tasks with the status 'waiting', 'created' or 'resumed'",
|
|
112
|
+
target=Target.SYSTEM,
|
|
113
|
+
)
|
|
58
114
|
def task_resume_workflows() -> StepList:
|
|
59
|
-
return init >> find_waiting_workflows >> resume_found_workflows >> done
|
|
115
|
+
return init >> find_waiting_workflows >> resume_found_workflows >> restart_created_workflows >> done
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
orchestrator/__init__.py,sha256=
|
|
1
|
+
orchestrator/__init__.py,sha256=UwDUO_wca3GXvCuLY4enk7UDeLXMeY7YNku2dHPoKCQ,1063
|
|
2
2
|
orchestrator/app.py,sha256=7UrXKjBKNSEaSSXAd5ww_RdMFhFqE4yvfj8faS2MzAA,12089
|
|
3
3
|
orchestrator/exception_handlers.py,sha256=UsW3dw8q0QQlNLcV359bIotah8DYjMsj2Ts1LfX4ClY,1268
|
|
4
4
|
orchestrator/log_config.py,sha256=1tPRX5q65e57a6a_zEii_PFK8SzWT0mnA5w2sKg4hh8,1853
|
|
@@ -17,7 +17,7 @@ orchestrator/api/api_v1/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n
|
|
|
17
17
|
orchestrator/api/api_v1/api.py,sha256=m4iDktsSpzxUDaudkdgXeZ83a6B4wfc3pczQsa-Pb-8,2866
|
|
18
18
|
orchestrator/api/api_v1/endpoints/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
19
19
|
orchestrator/api/api_v1/endpoints/health.py,sha256=iaxs1XX1_250_gKNsspuULCV2GEMBjbtjsmfQTOvMAI,1284
|
|
20
|
-
orchestrator/api/api_v1/endpoints/processes.py,sha256=
|
|
20
|
+
orchestrator/api/api_v1/endpoints/processes.py,sha256=kWz_jL8_sTNwl44tU17VwkwZGjBIw1IIW5pYCCSHwgs,15891
|
|
21
21
|
orchestrator/api/api_v1/endpoints/product_blocks.py,sha256=kZ6ywIOsS_S2qGq7RvZ4KzjvaS1LmwbGWR37AKRvWOw,2146
|
|
22
22
|
orchestrator/api/api_v1/endpoints/products.py,sha256=BfFtwu9dZXEQbtKxYj9icc73GKGvAGMR5ytyf41nQlQ,3081
|
|
23
23
|
orchestrator/api/api_v1/endpoints/resource_types.py,sha256=gGyuaDyOD0TAVoeFGaGmjDGnQ8eQQArOxKrrk4MaDzA,2145
|
|
@@ -159,12 +159,12 @@ orchestrator/graphql/__init__.py,sha256=avq8Yg3Jr_9pJqh7ClyIAOX7YSg1eM_AWmt5C3FR
|
|
|
159
159
|
orchestrator/graphql/autoregistration.py,sha256=pF2jbMKG26MvYoMSa6ZpqpHjVks7_NvSRFymHTgmfjs,6342
|
|
160
160
|
orchestrator/graphql/pagination.py,sha256=iqVDn3GPZpiQhEydfwkBJLURY-X8wwUphS8Lkeg0BOc,2413
|
|
161
161
|
orchestrator/graphql/schema.py,sha256=gwZ3nAgKL0zlpc-aK58hSUAGPVD11Tb3aRSSK9hC39I,9204
|
|
162
|
-
orchestrator/graphql/types.py,sha256=
|
|
162
|
+
orchestrator/graphql/types.py,sha256=_kHKMusrRPuRtF4wm42NsBzoFZ4egbu3ibMmhd2D6Fs,5432
|
|
163
163
|
orchestrator/graphql/extensions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
164
|
orchestrator/graphql/extensions/model_cache.py,sha256=1uhMRjBs9eK7zJ1Y6P6BopX06822w2Yh9jliwYvG6yQ,1085
|
|
165
165
|
orchestrator/graphql/extensions/stats.py,sha256=pGhEBQg45XvqZhRobcrCSGwt5AGmR3gflsm1dYiIg5g,2018
|
|
166
166
|
orchestrator/graphql/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
|
-
orchestrator/graphql/loaders/subscriptions.py,sha256=
|
|
167
|
+
orchestrator/graphql/loaders/subscriptions.py,sha256=0deS91hn95CX1KY4NAKgYSfVBqioSc-Q3sdOrTJTaDc,2244
|
|
168
168
|
orchestrator/graphql/mutations/customer_description.py,sha256=zm_X1yvWl4qC97_rYUYSF-1q1gFrQX6fDrzQKhguDYs,3359
|
|
169
169
|
orchestrator/graphql/mutations/start_process.py,sha256=8vLVvmBwL1ujbZJoI_8YE3VAgI-J2RTzgrTZJC8THZ4,1576
|
|
170
170
|
orchestrator/graphql/resolvers/__init__.py,sha256=EEw9NO4LAryfrpkLlgsNQ9rytKd0usBDx95OURRV6sg,1031
|
|
@@ -190,7 +190,7 @@ orchestrator/graphql/schemas/product_block.py,sha256=Qk9cbA6vm7ZPrhdgPHatKRuy6Ty
|
|
|
190
190
|
orchestrator/graphql/schemas/resource_type.py,sha256=s5d_FwQXL2-Sc-IDUxTJun5qFQ4zOP4-XcHF9ql-t1g,898
|
|
191
191
|
orchestrator/graphql/schemas/settings.py,sha256=drhm5VcLmUbiYAk6WUSJcyJqjNM96E6GvpxVdPAobnA,999
|
|
192
192
|
orchestrator/graphql/schemas/strawberry_pydantic_patch.py,sha256=CjNUhTKdYmLiaem-WY_mzw4HASIeaZitxGF8pPocqVw,1602
|
|
193
|
-
orchestrator/graphql/schemas/subscription.py,sha256=
|
|
193
|
+
orchestrator/graphql/schemas/subscription.py,sha256=hTA34C27kgLguH9V53173CxMKIWiQKh3vFzyJ2yBfE0,9918
|
|
194
194
|
orchestrator/graphql/schemas/version.py,sha256=HSzVg_y4Sjd5_H5rRUtu3FJKOG_8ifhvBNt_qjOtC-E,92
|
|
195
195
|
orchestrator/graphql/schemas/workflow.py,sha256=WLbegRNxOfvXg4kPYrO5KPBwtHmUofAr2pvZT2JsW1c,1761
|
|
196
196
|
orchestrator/graphql/utils/__init__.py,sha256=1JvenzEVW1CBa1sGVI9I8IWnnoXIkb1hneDqph9EEZY,524
|
|
@@ -208,6 +208,7 @@ orchestrator/metrics/init.py,sha256=xBITvDjbNf-iabbBg0tAW8TPj6-wzr_MerOOqgDsoS4,
|
|
|
208
208
|
orchestrator/metrics/processes.py,sha256=SyogN5NSuhYoRv2CSUE1So9e8Gkrwa71J6oGLOdODQU,5333
|
|
209
209
|
orchestrator/metrics/subscriptions.py,sha256=vC1O8VmTq5oJxNrn5CU99Rf8cxzdyhc7tXbZBSAU-O8,3036
|
|
210
210
|
orchestrator/migrations/README,sha256=heMzebYwlGhnE8_4CWJ4LS74WoEZjBy-S-mIJRxAEKI,39
|
|
211
|
+
orchestrator/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
211
212
|
orchestrator/migrations/alembic.ini,sha256=kMoADqhGeubU8xanILNaqm4oixLy9m4ngYtdGpZcc7I,873
|
|
212
213
|
orchestrator/migrations/env.py,sha256=M_cPoAL2axuuup5fvMy8I_WTPHEw0RbPEHkhZ3QEGoE,3740
|
|
213
214
|
orchestrator/migrations/helpers.py,sha256=CAGGKhxpmhyKGfYcO-SUCPfMTOCZPfEpkJrcm2MYfcE,47979
|
|
@@ -243,6 +244,7 @@ orchestrator/migrations/versions/schema/2025-04-09_fc5c993a4b4a_add_cascade_cons
|
|
|
243
244
|
orchestrator/migrations/versions/schema/2025-05-08_161918133bec_add_is_task_to_workflow.py,sha256=VLFDHFYRWn5ktUba0KuSPWyvjYJdfN1WypWmOPqIW18,721
|
|
244
245
|
orchestrator/migrations/versions/schema/2025-07-01_93fc5834c7e5_changed_timestamping_fields_in_process_steps.py,sha256=Oezd8b2qaI1Kyq-sZFVFmdzd4d9NjXrf6HtJGk11fy0,1914
|
|
245
246
|
orchestrator/migrations/versions/schema/2025-07-04_4b58e336d1bf_deprecating_workflow_target_in_.py,sha256=xnD6w-97R4ClS7rbmXQEXc36K3fdcXKhCy7ZZNy_FX4,742
|
|
247
|
+
orchestrator/migrations/versions/schema/2025-07-28_850dccac3b02_update_description_of_resume_workflows_.py,sha256=R6Qoga83DJ1IL0WYPu0u5u2ZvAmqGlDmUMv_KtJyOhQ,812
|
|
246
248
|
orchestrator/schedules/__init__.py,sha256=JnnaglfK1qYUBKI6Dd9taV-tCZIPlAdAkHtnkJDMXxY,1066
|
|
247
249
|
orchestrator/schedules/resume_workflows.py,sha256=kSotzTAXjX7p9fpSYiGOpuxuTQfv54eRFAe0YSG0DHc,832
|
|
248
250
|
orchestrator/schedules/scheduling.py,sha256=ehtwgpbvMOk1jhn-hHgVzg_9wLJkI6l3mRY3DcO9ZVY,1526
|
|
@@ -262,20 +264,22 @@ orchestrator/schemas/subscription.py,sha256=-jXyHZIed9Xlia18ksSDyenblNN6Q2yM2FlG
|
|
|
262
264
|
orchestrator/schemas/subscription_descriptions.py,sha256=Ft_jw1U0bf9Z0U8O4OWfLlcl0mXCVT_qYVagBP3GbIQ,1262
|
|
263
265
|
orchestrator/schemas/workflow.py,sha256=VqQ9XfV4fVd6MjY0LRRQzWBJHmlPsAamWfTwDx1cZkg,2102
|
|
264
266
|
orchestrator/services/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
265
|
-
orchestrator/services/celery.py,sha256=PsIgRBJsmA3vKwAUaqPq9ynLwDsXHY2ggDWc-nQAwgM,5232
|
|
266
267
|
orchestrator/services/fixed_inputs.py,sha256=kyz7s2HLzyDulvcq-ZqefTw1om86COvyvTjz0_5CmgI,876
|
|
267
|
-
orchestrator/services/input_state.py,sha256=
|
|
268
|
+
orchestrator/services/input_state.py,sha256=6BZOpb3cHpO18K-XG-3QUIV9pIM25_ufdODrp5CmXG4,2390
|
|
268
269
|
orchestrator/services/process_broadcast_thread.py,sha256=D44YbjF8mRqGuznkRUV4SoRn1J0lfy_x1H508GnSVlU,4649
|
|
269
|
-
orchestrator/services/processes.py,sha256=
|
|
270
|
+
orchestrator/services/processes.py,sha256=JGM9vWbUjvEpy-IpTIgaYaqcTBKMI-CWTY8SJKBf3eI,30153
|
|
270
271
|
orchestrator/services/products.py,sha256=BP4KyE8zO-8z7Trrs5T6zKBOw53S9BfBJnHWI3p6u5Y,1943
|
|
271
272
|
orchestrator/services/resource_types.py,sha256=_QBy_JOW_X3aSTqH0CuLrq4zBJL0p7Q-UDJUcuK2_qc,884
|
|
272
273
|
orchestrator/services/settings.py,sha256=HEWfFulgoEDwgfxGEO__QTr5fDiwNBEj1UhAeTAdbLQ,3159
|
|
273
274
|
orchestrator/services/settings_env_variables.py,sha256=iPErQjqPQCxKs0sPhefB16d8SBBVUi6eiRnFBK5bgqA,2196
|
|
274
|
-
orchestrator/services/subscription_relations.py,sha256=
|
|
275
|
+
orchestrator/services/subscription_relations.py,sha256=aIdyzwyyy58OFhwjRPCPgnQTUTmChu6SeSQRIleQoDE,13138
|
|
275
276
|
orchestrator/services/subscriptions.py,sha256=nr2HI89nC0lYjzTh2j-lEQ5cPQK43LNZv3gvP6jbepw,27189
|
|
276
|
-
orchestrator/services/tasks.py,sha256=
|
|
277
|
+
orchestrator/services/tasks.py,sha256=mR3Fj1VsudltpanJKI2PvrxersyhVQ1skp8H7r3XnYI,5288
|
|
277
278
|
orchestrator/services/translations.py,sha256=GyP8soUFGej8AS8uulBsk10CCK6Kwfjv9AHMFm3ElQY,1713
|
|
278
279
|
orchestrator/services/workflows.py,sha256=iEkt2OBuTwkDru4V6ZSKatnw0b96ZdPV-VQqeZ9EOgU,4015
|
|
280
|
+
orchestrator/services/executors/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
281
|
+
orchestrator/services/executors/celery.py,sha256=j5xJo7sZAdTtc0GmmJzoYVfzuYKiqAdAe5QbtPv0bPI,4937
|
|
282
|
+
orchestrator/services/executors/threadpool.py,sha256=SA0Lns17fP7qp5Y0bLZB7YzZ-sYKrmHQdYTeqs9dnV0,4931
|
|
279
283
|
orchestrator/utils/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
280
284
|
orchestrator/utils/auth.py,sha256=IWn0amdquLobt1mRNwhgKT0ErCBjLGDtLdsDuaY8rlE,309
|
|
281
285
|
orchestrator/utils/crypt.py,sha256=18eNamYWMllPkxyRtWIde3FDr3rSF74R5SAL6WsCj9Y,5584
|
|
@@ -308,11 +312,11 @@ orchestrator/workflows/steps.py,sha256=CZxfzkG5ANJYwuYTkQ4da2RpQqIjXCtey_Uy1ezRA
|
|
|
308
312
|
orchestrator/workflows/utils.py,sha256=bhX9vm3oc9k6RSaESl34v4Nrh40G4Ys91INoTjZ0XVM,13966
|
|
309
313
|
orchestrator/workflows/tasks/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
310
314
|
orchestrator/workflows/tasks/cleanup_tasks_log.py,sha256=BfWYbPXhnLAHUJ0mlODDnjZnQQAvKCZJDVTwbwOWI04,1624
|
|
311
|
-
orchestrator/workflows/tasks/resume_workflows.py,sha256=
|
|
315
|
+
orchestrator/workflows/tasks/resume_workflows.py,sha256=T3iobSJjVgiupe0rClD34kUZ7KF4pL5yK2AVeRLZog8,4313
|
|
312
316
|
orchestrator/workflows/tasks/validate_product_type.py,sha256=paG-NAY1bdde3Adt8zItkcBKf5Pxw6f5ngGW6an6dYU,3192
|
|
313
317
|
orchestrator/workflows/tasks/validate_products.py,sha256=GZJBoFF-WMphS7ghMs2-gqvV2iL1F0POhk0uSNt93n0,8510
|
|
314
318
|
orchestrator/workflows/translations/en-GB.json,sha256=ST53HxkphFLTMjFHonykDBOZ7-P_KxksktZU3GbxLt0,846
|
|
315
|
-
orchestrator_core-4.
|
|
316
|
-
orchestrator_core-4.
|
|
317
|
-
orchestrator_core-4.
|
|
318
|
-
orchestrator_core-4.
|
|
319
|
+
orchestrator_core-4.3.0.dist-info/licenses/LICENSE,sha256=b-aA5OZQuuBATmLKo_mln8CQrDPPhg3ghLzjPjLn4Tg,11409
|
|
320
|
+
orchestrator_core-4.3.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
321
|
+
orchestrator_core-4.3.0.dist-info/METADATA,sha256=YaUYyTOn8PWLskXyX9gYdJzubqTZb8e41TdGcdGrUK8,5960
|
|
322
|
+
orchestrator_core-4.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|