dbos 0.21.0a5__tar.gz → 0.22.0a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dbos might be problematic. Click here for more details.
- {dbos-0.21.0a5 → dbos-0.22.0a1}/PKG-INFO +1 -1
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_dbos.py +4 -1
- dbos-0.22.0a1/dbos/_recovery.py +70 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_sys_db.py +43 -7
- {dbos-0.21.0a5 → dbos-0.22.0a1}/pyproject.toml +1 -1
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_dbos.py +3 -3
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_failures.py +1 -1
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_queue.py +108 -4
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_workflow_cmds.py +2 -2
- dbos-0.21.0a5/dbos/_recovery.py +0 -50
- {dbos-0.21.0a5 → dbos-0.22.0a1}/LICENSE +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/README.md +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/__init__.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_admin_server.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_app_db.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_classproperty.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_cloudutils/authentication.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_cloudutils/cloudutils.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_cloudutils/databases.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_context.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_core.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_croniter.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_db_wizard.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_dbos_config.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_error.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_fastapi.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_flask.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_kafka.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_kafka_message.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_logger.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/env.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/script.py.mako +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/versions/04ca4f231047_workflow_queues_executor_id.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/versions/50f3227f0b4b_fix_job_queue.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/versions/5c361fc04708_added_system_tables.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/versions/a3b18ad34abe_added_triggers.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/versions/d76646551a6b_job_queue_limiter.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/versions/d76646551a6c_workflow_queue.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/versions/eab0cc1d9a14_job_queue.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_outcome.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_queue.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_registrations.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_request.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_roles.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_scheduler.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_schemas/__init__.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_schemas/application_database.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_schemas/system_database.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_serialization.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/README.md +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/__package/__init__.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/__package/main.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/__package/schema.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/alembic.ini +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/dbos-config.yaml.dbos +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/migrations/env.py.dbos +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/migrations/script.py.mako +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/migrations/versions/2024_07_31_180642_init.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_templates/dbos-db-starter/start_postgres_docker.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_tracer.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_workflow_commands.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/cli/_github_init.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/cli/_template_init.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/cli/cli.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/dbos-config.schema.json +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/py.typed +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/__init__.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/atexit_no_ctor.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/atexit_no_launch.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/classdefs.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/conftest.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/more_classdefs.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/queuedworkflow.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_admin_server.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_async.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_classdecorators.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_concurrency.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_config.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_croniter.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_fastapi.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_fastapi_roles.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_flask.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_kafka.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_outcome.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_package.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_scheduler.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_schema_migration.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_singleton.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_spans.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/tests/test_sqlalchemy.py +0 -0
- {dbos-0.21.0a5 → dbos-0.22.0a1}/version/__init__.py +0 -0
|
@@ -56,7 +56,7 @@ from ._registrations import (
|
|
|
56
56
|
)
|
|
57
57
|
from ._roles import default_required_roles, required_roles
|
|
58
58
|
from ._scheduler import ScheduledWorkflow, scheduled
|
|
59
|
-
from ._sys_db import
|
|
59
|
+
from ._sys_db import reset_system_database
|
|
60
60
|
from ._tracer import dbos_tracer
|
|
61
61
|
|
|
62
62
|
if TYPE_CHECKING:
|
|
@@ -613,6 +613,7 @@ class DBOS:
|
|
|
613
613
|
workflow_id=workflow_id,
|
|
614
614
|
status=stat["status"],
|
|
615
615
|
name=stat["name"],
|
|
616
|
+
executor_id=stat["executor_id"],
|
|
616
617
|
recovery_attempts=stat["recovery_attempts"],
|
|
617
618
|
class_name=stat["class_name"],
|
|
618
619
|
config_name=stat["config_name"],
|
|
@@ -909,6 +910,7 @@ class WorkflowStatus:
|
|
|
909
910
|
workflow_id(str): The ID of the workflow execution
|
|
910
911
|
status(str): The status of the execution, from `WorkflowStatusString`
|
|
911
912
|
name(str): The workflow function name
|
|
913
|
+
executor_id(str): The ID of the executor running the workflow
|
|
912
914
|
class_name(str): For member functions, the name of the class containing the workflow function
|
|
913
915
|
config_name(str): For instance member functions, the name of the class instance for the execution
|
|
914
916
|
queue_name(str): For workflows that are or were queued, the queue name
|
|
@@ -922,6 +924,7 @@ class WorkflowStatus:
|
|
|
922
924
|
workflow_id: str
|
|
923
925
|
status: str
|
|
924
926
|
name: str
|
|
927
|
+
executor_id: Optional[str]
|
|
925
928
|
class_name: Optional[str]
|
|
926
929
|
config_name: Optional[str]
|
|
927
930
|
queue_name: Optional[str]
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
import traceback
|
|
5
|
+
from typing import TYPE_CHECKING, Any, List
|
|
6
|
+
|
|
7
|
+
from ._core import execute_workflow_by_id
|
|
8
|
+
from ._error import DBOSWorkflowFunctionNotFoundError
|
|
9
|
+
from ._sys_db import GetPendingWorkflowsOutput
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from ._dbos import DBOS, WorkflowHandle
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def startup_recovery_thread(
|
|
16
|
+
dbos: "DBOS", pending_workflows: List[GetPendingWorkflowsOutput]
|
|
17
|
+
) -> None:
|
|
18
|
+
"""Attempt to recover local pending workflows on startup using a background thread."""
|
|
19
|
+
stop_event = threading.Event()
|
|
20
|
+
dbos.stop_events.append(stop_event)
|
|
21
|
+
while not stop_event.is_set() and len(pending_workflows) > 0:
|
|
22
|
+
try:
|
|
23
|
+
for pending_workflow in list(pending_workflows):
|
|
24
|
+
if (
|
|
25
|
+
pending_workflow.queue_name
|
|
26
|
+
and pending_workflow.queue_name != "_dbos_internal_queue"
|
|
27
|
+
):
|
|
28
|
+
dbos._sys_db.clear_queue_assignment(pending_workflow.workflow_uuid)
|
|
29
|
+
continue
|
|
30
|
+
execute_workflow_by_id(dbos, pending_workflow.workflow_uuid)
|
|
31
|
+
pending_workflows.remove(pending_workflow)
|
|
32
|
+
except DBOSWorkflowFunctionNotFoundError:
|
|
33
|
+
time.sleep(1)
|
|
34
|
+
except Exception as e:
|
|
35
|
+
dbos.logger.error(
|
|
36
|
+
f"Exception encountered when recovering workflows: {traceback.format_exc()}"
|
|
37
|
+
)
|
|
38
|
+
raise e
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def recover_pending_workflows(
|
|
42
|
+
dbos: "DBOS", executor_ids: List[str] = ["local"]
|
|
43
|
+
) -> List["WorkflowHandle[Any]"]:
|
|
44
|
+
workflow_handles: List["WorkflowHandle[Any]"] = []
|
|
45
|
+
for executor_id in executor_ids:
|
|
46
|
+
if executor_id == "local" and os.environ.get("DBOS__VMID"):
|
|
47
|
+
dbos.logger.debug(
|
|
48
|
+
f"Skip local recovery because it's running in a VM: {os.environ.get('DBOS__VMID')}"
|
|
49
|
+
)
|
|
50
|
+
dbos.logger.debug(f"Recovering pending workflows for executor: {executor_id}")
|
|
51
|
+
pending_workflows = dbos._sys_db.get_pending_workflows(executor_id)
|
|
52
|
+
for pending_workflow in pending_workflows:
|
|
53
|
+
if (
|
|
54
|
+
pending_workflow.queue_name
|
|
55
|
+
and pending_workflow.queue_name != "_dbos_internal_queue"
|
|
56
|
+
):
|
|
57
|
+
try:
|
|
58
|
+
dbos._sys_db.clear_queue_assignment(pending_workflow.workflow_uuid)
|
|
59
|
+
workflow_handles.append(
|
|
60
|
+
dbos.retrieve_workflow(pending_workflow.workflow_uuid)
|
|
61
|
+
)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
dbos.logger.error(e)
|
|
64
|
+
else:
|
|
65
|
+
workflow_handles.append(
|
|
66
|
+
execute_workflow_by_id(dbos, pending_workflow.workflow_uuid)
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
dbos.logger.info("Recovered pending workflows")
|
|
70
|
+
return workflow_handles
|
|
@@ -140,6 +140,12 @@ class GetWorkflowsOutput:
|
|
|
140
140
|
self.workflow_uuids = workflow_uuids
|
|
141
141
|
|
|
142
142
|
|
|
143
|
+
class GetPendingWorkflowsOutput:
|
|
144
|
+
def __init__(self, *, workflow_uuid: str, queue_name: Optional[str] = None):
|
|
145
|
+
self.workflow_uuid: str = workflow_uuid
|
|
146
|
+
self.queue_name: Optional[str] = queue_name
|
|
147
|
+
|
|
148
|
+
|
|
143
149
|
class WorkflowInformation(TypedDict, total=False):
|
|
144
150
|
workflow_uuid: str
|
|
145
151
|
status: WorkflowStatuses # The status of the workflow.
|
|
@@ -221,7 +227,12 @@ class SystemDatabase:
|
|
|
221
227
|
self.engine.url.render_as_string(hide_password=False),
|
|
222
228
|
)
|
|
223
229
|
alembic_cfg.set_main_option("sqlalchemy.url", escaped_conn_string)
|
|
224
|
-
|
|
230
|
+
try:
|
|
231
|
+
command.upgrade(alembic_cfg, "head")
|
|
232
|
+
except Exception as e:
|
|
233
|
+
dbos_logger.warning(
|
|
234
|
+
f"Exception during system database construction. This is most likely because the system database was configured using a later version of DBOS: {e}"
|
|
235
|
+
)
|
|
225
236
|
|
|
226
237
|
self.notification_conn: Optional[psycopg.connection.Connection] = None
|
|
227
238
|
self.notifications_map: Dict[str, threading.Condition] = {}
|
|
@@ -465,6 +476,7 @@ class SystemDatabase:
|
|
|
465
476
|
SystemSchema.workflow_status.c.authenticated_roles,
|
|
466
477
|
SystemSchema.workflow_status.c.assumed_role,
|
|
467
478
|
SystemSchema.workflow_status.c.queue_name,
|
|
479
|
+
SystemSchema.workflow_status.c.executor_id,
|
|
468
480
|
).where(SystemSchema.workflow_status.c.workflow_uuid == workflow_uuid)
|
|
469
481
|
).fetchone()
|
|
470
482
|
if row is None:
|
|
@@ -479,7 +491,7 @@ class SystemDatabase:
|
|
|
479
491
|
"error": None,
|
|
480
492
|
"app_id": None,
|
|
481
493
|
"app_version": None,
|
|
482
|
-
"executor_id":
|
|
494
|
+
"executor_id": row[10],
|
|
483
495
|
"request": row[2],
|
|
484
496
|
"recovery_attempts": row[3],
|
|
485
497
|
"authenticated_user": row[6],
|
|
@@ -665,7 +677,7 @@ class SystemDatabase:
|
|
|
665
677
|
|
|
666
678
|
def get_workflows(self, input: GetWorkflowsInput) -> GetWorkflowsOutput:
|
|
667
679
|
query = sa.select(SystemSchema.workflow_status.c.workflow_uuid).order_by(
|
|
668
|
-
SystemSchema.workflow_status.c.created_at.
|
|
680
|
+
SystemSchema.workflow_status.c.created_at.asc()
|
|
669
681
|
)
|
|
670
682
|
if input.name:
|
|
671
683
|
query = query.where(SystemSchema.workflow_status.c.name == input.name)
|
|
@@ -711,7 +723,7 @@ class SystemDatabase:
|
|
|
711
723
|
SystemSchema.workflow_queue.c.workflow_uuid
|
|
712
724
|
== SystemSchema.workflow_status.c.workflow_uuid,
|
|
713
725
|
)
|
|
714
|
-
.order_by(SystemSchema.workflow_status.c.created_at.
|
|
726
|
+
.order_by(SystemSchema.workflow_status.c.created_at.asc())
|
|
715
727
|
)
|
|
716
728
|
|
|
717
729
|
if input.get("name"):
|
|
@@ -746,16 +758,27 @@ class SystemDatabase:
|
|
|
746
758
|
|
|
747
759
|
return GetWorkflowsOutput(workflow_uuids)
|
|
748
760
|
|
|
749
|
-
def get_pending_workflows(
|
|
761
|
+
def get_pending_workflows(
|
|
762
|
+
self, executor_id: str
|
|
763
|
+
) -> list[GetPendingWorkflowsOutput]:
|
|
750
764
|
with self.engine.begin() as c:
|
|
751
765
|
rows = c.execute(
|
|
752
|
-
sa.select(
|
|
766
|
+
sa.select(
|
|
767
|
+
SystemSchema.workflow_status.c.workflow_uuid,
|
|
768
|
+
SystemSchema.workflow_status.c.queue_name,
|
|
769
|
+
).where(
|
|
753
770
|
SystemSchema.workflow_status.c.status
|
|
754
771
|
== WorkflowStatusString.PENDING.value,
|
|
755
772
|
SystemSchema.workflow_status.c.executor_id == executor_id,
|
|
756
773
|
)
|
|
757
774
|
).fetchall()
|
|
758
|
-
return [
|
|
775
|
+
return [
|
|
776
|
+
GetPendingWorkflowsOutput(
|
|
777
|
+
workflow_uuid=row.workflow_uuid,
|
|
778
|
+
queue_name=row.queue_name,
|
|
779
|
+
)
|
|
780
|
+
for row in rows
|
|
781
|
+
]
|
|
759
782
|
|
|
760
783
|
def record_operation_result(
|
|
761
784
|
self, result: OperationResultInternal, conn: Optional[sa.Connection] = None
|
|
@@ -1375,6 +1398,19 @@ class SystemDatabase:
|
|
|
1375
1398
|
.values(completed_at_epoch_ms=int(time.time() * 1000))
|
|
1376
1399
|
)
|
|
1377
1400
|
|
|
1401
|
+
def clear_queue_assignment(self, workflow_id: str) -> None:
|
|
1402
|
+
with self.engine.begin() as c:
|
|
1403
|
+
c.execute(
|
|
1404
|
+
sa.update(SystemSchema.workflow_queue)
|
|
1405
|
+
.where(SystemSchema.workflow_queue.c.workflow_uuid == workflow_id)
|
|
1406
|
+
.values(executor_id=None, started_at_epoch_ms=None)
|
|
1407
|
+
)
|
|
1408
|
+
c.execute(
|
|
1409
|
+
sa.update(SystemSchema.workflow_status)
|
|
1410
|
+
.where(SystemSchema.workflow_status.c.workflow_uuid == workflow_id)
|
|
1411
|
+
.values(executor_id=None, status=WorkflowStatusString.ENQUEUED.value)
|
|
1412
|
+
)
|
|
1413
|
+
|
|
1378
1414
|
|
|
1379
1415
|
def reset_system_database(config: ConfigFile) -> None:
|
|
1380
1416
|
sysdb_name = (
|
|
@@ -905,10 +905,10 @@ def test_send_recv_temp_wf(dbos: DBOS) -> None:
|
|
|
905
905
|
|
|
906
906
|
wfs = dbos._sys_db.get_workflows(gwi)
|
|
907
907
|
assert len(wfs.workflow_uuids) == 2
|
|
908
|
-
assert wfs.workflow_uuids[
|
|
909
|
-
assert wfs.workflow_uuids[
|
|
908
|
+
assert wfs.workflow_uuids[0] == dest_uuid
|
|
909
|
+
assert wfs.workflow_uuids[1] != dest_uuid
|
|
910
910
|
|
|
911
|
-
wfi = dbos._sys_db.get_workflow_info(wfs.workflow_uuids[
|
|
911
|
+
wfi = dbos._sys_db.get_workflow_info(wfs.workflow_uuids[1], False)
|
|
912
912
|
assert wfi
|
|
913
913
|
assert wfi["name"] == "<temp>.temp_send_workflow"
|
|
914
914
|
|
|
@@ -9,7 +9,7 @@ from psycopg.errors import SerializationFailure
|
|
|
9
9
|
from sqlalchemy.exc import InvalidRequestError, OperationalError
|
|
10
10
|
|
|
11
11
|
# Public API
|
|
12
|
-
from dbos import DBOS, GetWorkflowsInput,
|
|
12
|
+
from dbos import DBOS, GetWorkflowsInput, SetWorkflowID
|
|
13
13
|
from dbos._error import DBOSDeadLetterQueueError, DBOSException
|
|
14
14
|
from dbos._sys_db import WorkflowStatusString
|
|
15
15
|
|
|
@@ -615,11 +615,17 @@ def test_queue_recovery(dbos: DBOS) -> None:
|
|
|
615
615
|
original_handle = DBOS.start_workflow(test_workflow)
|
|
616
616
|
for e in step_events:
|
|
617
617
|
e.wait()
|
|
618
|
+
e.clear()
|
|
619
|
+
|
|
618
620
|
assert step_counter == 5
|
|
619
621
|
|
|
620
622
|
# Recover the workflow, then resume it.
|
|
621
623
|
recovery_handles = DBOS.recover_pending_workflows()
|
|
624
|
+
# Wait until the 2nd invocation of the workflows are dequeued and executed
|
|
625
|
+
for e in step_events:
|
|
626
|
+
e.wait()
|
|
622
627
|
event.set()
|
|
628
|
+
|
|
623
629
|
# There should be one handle for the workflow and another for each queued step.
|
|
624
630
|
assert len(recovery_handles) == queued_steps + 1
|
|
625
631
|
# Verify that both the recovered and original workflows complete correctly.
|
|
@@ -639,6 +645,84 @@ def test_queue_recovery(dbos: DBOS) -> None:
|
|
|
639
645
|
assert queue_entries_are_cleaned_up(dbos)
|
|
640
646
|
|
|
641
647
|
|
|
648
|
+
def test_queue_concurrency_under_recovery(dbos: DBOS) -> None:
|
|
649
|
+
event = threading.Event()
|
|
650
|
+
wf_events = [threading.Event() for _ in range(2)]
|
|
651
|
+
counter = 0
|
|
652
|
+
|
|
653
|
+
@DBOS.workflow()
|
|
654
|
+
def blocked_workflow(i: int) -> None:
|
|
655
|
+
wf_events[i].set()
|
|
656
|
+
nonlocal counter
|
|
657
|
+
counter += 1
|
|
658
|
+
event.wait()
|
|
659
|
+
|
|
660
|
+
@DBOS.workflow()
|
|
661
|
+
def noop() -> None:
|
|
662
|
+
pass
|
|
663
|
+
|
|
664
|
+
queue = Queue("test_queue", concurrency=2)
|
|
665
|
+
handle1 = queue.enqueue(blocked_workflow, 0)
|
|
666
|
+
handle2 = queue.enqueue(blocked_workflow, 1)
|
|
667
|
+
handle3 = queue.enqueue(noop)
|
|
668
|
+
|
|
669
|
+
# Wait for the two first workflows to be dequeued
|
|
670
|
+
for e in wf_events:
|
|
671
|
+
e.wait()
|
|
672
|
+
e.clear()
|
|
673
|
+
|
|
674
|
+
assert counter == 2
|
|
675
|
+
assert handle1.get_status().status == WorkflowStatusString.PENDING.value
|
|
676
|
+
assert handle2.get_status().status == WorkflowStatusString.PENDING.value
|
|
677
|
+
assert handle3.get_status().status == WorkflowStatusString.ENQUEUED.value
|
|
678
|
+
|
|
679
|
+
# Manually update the database to pretend the 3rd workflow is PENDING and comes from another executor
|
|
680
|
+
with dbos._sys_db.engine.begin() as c:
|
|
681
|
+
query = (
|
|
682
|
+
sa.update(SystemSchema.workflow_status)
|
|
683
|
+
.values(status=WorkflowStatusString.PENDING.value, executor_id="other")
|
|
684
|
+
.where(
|
|
685
|
+
SystemSchema.workflow_status.c.workflow_uuid
|
|
686
|
+
== handle3.get_workflow_id()
|
|
687
|
+
)
|
|
688
|
+
)
|
|
689
|
+
c.execute(query)
|
|
690
|
+
|
|
691
|
+
# Trigger workflow recovery. The two first workflows should still be blocked but the 3rd one enqueued
|
|
692
|
+
recovered_other_handles = DBOS.recover_pending_workflows(["other"])
|
|
693
|
+
assert handle1.get_status().status == WorkflowStatusString.PENDING.value
|
|
694
|
+
assert handle2.get_status().status == WorkflowStatusString.PENDING.value
|
|
695
|
+
assert len(recovered_other_handles) == 1
|
|
696
|
+
assert recovered_other_handles[0].get_workflow_id() == handle3.get_workflow_id()
|
|
697
|
+
assert handle3.get_status().status == WorkflowStatusString.ENQUEUED.value
|
|
698
|
+
|
|
699
|
+
# Trigger workflow recovery for "local". The two first workflows should be re-enqueued then dequeued again
|
|
700
|
+
recovered_local_handles = DBOS.recover_pending_workflows(["local"])
|
|
701
|
+
assert len(recovered_local_handles) == 2
|
|
702
|
+
for h in recovered_local_handles:
|
|
703
|
+
assert h.get_workflow_id() in [
|
|
704
|
+
handle1.get_workflow_id(),
|
|
705
|
+
handle2.get_workflow_id(),
|
|
706
|
+
]
|
|
707
|
+
for e in wf_events:
|
|
708
|
+
e.wait()
|
|
709
|
+
assert counter == 4
|
|
710
|
+
assert handle1.get_status().status == WorkflowStatusString.PENDING.value
|
|
711
|
+
assert handle2.get_status().status == WorkflowStatusString.PENDING.value
|
|
712
|
+
# Because tasks are re-enqueued in order, the 3rd task is head of line blocked
|
|
713
|
+
assert handle3.get_status().status == WorkflowStatusString.ENQUEUED.value
|
|
714
|
+
|
|
715
|
+
# Unblock the first two workflows
|
|
716
|
+
event.set()
|
|
717
|
+
|
|
718
|
+
# Verify all queue entries eventually get cleaned up.
|
|
719
|
+
assert handle1.get_result() == None
|
|
720
|
+
assert handle2.get_result() == None
|
|
721
|
+
assert handle3.get_result() == None
|
|
722
|
+
assert handle3.get_status().executor_id == "local"
|
|
723
|
+
assert queue_entries_are_cleaned_up(dbos)
|
|
724
|
+
|
|
725
|
+
|
|
642
726
|
def test_cancelling_queued_workflows(dbos: DBOS) -> None:
|
|
643
727
|
start_event = threading.Event()
|
|
644
728
|
blocking_event = threading.Event()
|
|
@@ -746,17 +830,28 @@ def test_dlq_enqueued_workflows(dbos: DBOS) -> None:
|
|
|
746
830
|
|
|
747
831
|
# Attempt to recover the blocked workflow the maximum number of times
|
|
748
832
|
for i in range(max_recovery_attempts):
|
|
833
|
+
start_event.clear()
|
|
749
834
|
DBOS.recover_pending_workflows()
|
|
835
|
+
start_event.wait()
|
|
750
836
|
assert recovery_count == i + 2
|
|
751
837
|
|
|
752
|
-
# Verify an additional recovery throws
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
838
|
+
# Verify an additional recovery throws puts the workflow in the DLQ status.
|
|
839
|
+
DBOS.recover_pending_workflows()
|
|
840
|
+
# we can't start_event.wait() here because the workflow will never execute
|
|
841
|
+
time.sleep(2)
|
|
756
842
|
assert (
|
|
757
843
|
blocked_handle.get_status().status
|
|
758
844
|
== WorkflowStatusString.RETRIES_EXCEEDED.value
|
|
759
845
|
)
|
|
846
|
+
with dbos._sys_db.engine.begin() as c:
|
|
847
|
+
query = sa.select(SystemSchema.workflow_status.c.recovery_attempts).where(
|
|
848
|
+
SystemSchema.workflow_status.c.workflow_uuid
|
|
849
|
+
== blocked_handle.get_workflow_id()
|
|
850
|
+
)
|
|
851
|
+
result = c.execute(query)
|
|
852
|
+
row = result.fetchone()
|
|
853
|
+
assert row is not None
|
|
854
|
+
assert row[0] == max_recovery_attempts + 2
|
|
760
855
|
|
|
761
856
|
# Verify the blocked workflow entering the DLQ lets the regular workflow run
|
|
762
857
|
assert regular_handle.get_result() == None
|
|
@@ -766,6 +861,15 @@ def test_dlq_enqueued_workflows(dbos: DBOS) -> None:
|
|
|
766
861
|
assert blocked_handle.get_result() == None
|
|
767
862
|
dbos._sys_db.wait_for_buffer_flush()
|
|
768
863
|
assert blocked_handle.get_status().status == WorkflowStatusString.SUCCESS.value
|
|
864
|
+
with dbos._sys_db.engine.begin() as c:
|
|
865
|
+
query = sa.select(SystemSchema.workflow_status.c.recovery_attempts).where(
|
|
866
|
+
SystemSchema.workflow_status.c.workflow_uuid
|
|
867
|
+
== blocked_handle.get_workflow_id()
|
|
868
|
+
)
|
|
869
|
+
result = c.execute(query)
|
|
870
|
+
row = result.fetchone()
|
|
871
|
+
assert row is not None
|
|
872
|
+
assert row[0] == max_recovery_attempts + 2
|
|
769
873
|
|
|
770
874
|
# Verify all queue entries eventually get cleaned up.
|
|
771
875
|
assert queue_entries_are_cleaned_up(dbos)
|
|
@@ -245,8 +245,8 @@ def test_queued_workflows(dbos: DBOS, config: ConfigFile) -> None:
|
|
|
245
245
|
assert workflow.status == WorkflowStatusString.PENDING.value
|
|
246
246
|
assert workflow.queue_name == queue.name
|
|
247
247
|
assert workflow.input is not None
|
|
248
|
-
# Verify
|
|
249
|
-
assert workflow.input["args"][0] ==
|
|
248
|
+
# Verify oldest queue entries appear first
|
|
249
|
+
assert workflow.input["args"][0] == i
|
|
250
250
|
assert workflow.output is None
|
|
251
251
|
assert workflow.error is None
|
|
252
252
|
assert "blocking_step" in workflow.workflowName
|
dbos-0.21.0a5/dbos/_recovery.py
DELETED
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import threading
|
|
3
|
-
import time
|
|
4
|
-
import traceback
|
|
5
|
-
from typing import TYPE_CHECKING, Any, List
|
|
6
|
-
|
|
7
|
-
from ._core import execute_workflow_by_id
|
|
8
|
-
from ._error import DBOSWorkflowFunctionNotFoundError
|
|
9
|
-
|
|
10
|
-
if TYPE_CHECKING:
|
|
11
|
-
from ._dbos import DBOS, WorkflowHandle
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def startup_recovery_thread(dbos: "DBOS", workflow_ids: List[str]) -> None:
|
|
15
|
-
"""Attempt to recover local pending workflows on startup using a background thread."""
|
|
16
|
-
stop_event = threading.Event()
|
|
17
|
-
dbos.stop_events.append(stop_event)
|
|
18
|
-
while not stop_event.is_set() and len(workflow_ids) > 0:
|
|
19
|
-
try:
|
|
20
|
-
for workflowID in list(workflow_ids):
|
|
21
|
-
execute_workflow_by_id(dbos, workflowID)
|
|
22
|
-
workflow_ids.remove(workflowID)
|
|
23
|
-
except DBOSWorkflowFunctionNotFoundError:
|
|
24
|
-
time.sleep(1)
|
|
25
|
-
except Exception as e:
|
|
26
|
-
dbos.logger.error(
|
|
27
|
-
f"Exception encountered when recovering workflows: {traceback.format_exc()}"
|
|
28
|
-
)
|
|
29
|
-
raise e
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def recover_pending_workflows(
|
|
33
|
-
dbos: "DBOS", executor_ids: List[str] = ["local"]
|
|
34
|
-
) -> List["WorkflowHandle[Any]"]:
|
|
35
|
-
workflow_handles: List["WorkflowHandle[Any]"] = []
|
|
36
|
-
for executor_id in executor_ids:
|
|
37
|
-
if executor_id == "local" and os.environ.get("DBOS__VMID"):
|
|
38
|
-
dbos.logger.debug(
|
|
39
|
-
f"Skip local recovery because it's running in a VM: {os.environ.get('DBOS__VMID')}"
|
|
40
|
-
)
|
|
41
|
-
dbos.logger.debug(f"Recovering pending workflows for executor: {executor_id}")
|
|
42
|
-
workflow_ids = dbos._sys_db.get_pending_workflows(executor_id)
|
|
43
|
-
dbos.logger.debug(f"Pending workflows: {workflow_ids}")
|
|
44
|
-
|
|
45
|
-
for workflowID in workflow_ids:
|
|
46
|
-
handle = execute_workflow_by_id(dbos, workflowID)
|
|
47
|
-
workflow_handles.append(handle)
|
|
48
|
-
|
|
49
|
-
dbos.logger.info("Recovered pending workflows")
|
|
50
|
-
return workflow_handles
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dbos-0.21.0a5 → dbos-0.22.0a1}/dbos/_migrations/versions/5c361fc04708_added_system_tables.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|