dbos 0.8.0a3__py3-none-any.whl → 0.8.0a7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dbos might be problematic. Click here for more details.
- dbos/core.py +39 -7
- dbos/dbos.py +44 -36
- dbos/error.py +11 -0
- dbos/migrations/versions/d76646551a6b_job_queue_limiter.py +42 -0
- dbos/queue.py +21 -8
- dbos/registrations.py +3 -0
- dbos/schemas/system_database.py +8 -0
- dbos/system_database.py +145 -22
- {dbos-0.8.0a3.dist-info → dbos-0.8.0a7.dist-info}/METADATA +1 -1
- {dbos-0.8.0a3.dist-info → dbos-0.8.0a7.dist-info}/RECORD +13 -12
- {dbos-0.8.0a3.dist-info → dbos-0.8.0a7.dist-info}/WHEEL +0 -0
- {dbos-0.8.0a3.dist-info → dbos-0.8.0a7.dist-info}/entry_points.txt +0 -0
- {dbos-0.8.0a3.dist-info → dbos-0.8.0a7.dist-info}/licenses/LICENSE +0 -0
dbos/core.py
CHANGED
|
@@ -38,6 +38,7 @@ from dbos.error import (
|
|
|
38
38
|
DBOSWorkflowFunctionNotFoundError,
|
|
39
39
|
)
|
|
40
40
|
from dbos.registrations import (
|
|
41
|
+
DEFAULT_MAX_RECOVERY_ATTEMPTS,
|
|
41
42
|
get_config_name,
|
|
42
43
|
get_dbos_class_name,
|
|
43
44
|
get_dbos_func_name,
|
|
@@ -118,6 +119,7 @@ def _init_workflow(
|
|
|
118
119
|
config_name: Optional[str],
|
|
119
120
|
temp_wf_type: Optional[str],
|
|
120
121
|
queue: Optional[str] = None,
|
|
122
|
+
max_recovery_attempts: int = DEFAULT_MAX_RECOVERY_ATTEMPTS,
|
|
121
123
|
) -> WorkflowStatusInternal:
|
|
122
124
|
wfid = (
|
|
123
125
|
ctx.workflow_id
|
|
@@ -157,7 +159,9 @@ def _init_workflow(
|
|
|
157
159
|
# Synchronously record the status and inputs for workflows and single-step workflows
|
|
158
160
|
# We also have to do this for single-step workflows because of the foreign key constraint on the operation outputs table
|
|
159
161
|
# TODO: Make this transactional (and with the queue step below)
|
|
160
|
-
dbos._sys_db.update_workflow_status(
|
|
162
|
+
dbos._sys_db.update_workflow_status(
|
|
163
|
+
status, False, ctx.in_recovery, max_recovery_attempts=max_recovery_attempts
|
|
164
|
+
)
|
|
161
165
|
dbos._sys_db.update_workflow_inputs(wfid, utils.serialize_args(inputs))
|
|
162
166
|
else:
|
|
163
167
|
# Buffer the inputs for single-transaction workflows, but don't buffer the status
|
|
@@ -181,7 +185,8 @@ def _execute_workflow(
|
|
|
181
185
|
status["status"] = "SUCCESS"
|
|
182
186
|
status["output"] = utils.serialize(output)
|
|
183
187
|
if status["queue_name"] is not None:
|
|
184
|
-
dbos.
|
|
188
|
+
queue = dbos._registry.queue_info_map[status["queue_name"]]
|
|
189
|
+
dbos._sys_db.remove_from_queue(status["workflow_uuid"], queue)
|
|
185
190
|
dbos._sys_db.buffer_workflow_status(status)
|
|
186
191
|
except DBOSWorkflowConflictIDError:
|
|
187
192
|
# Retrieve the workflow handle and wait for the result.
|
|
@@ -195,7 +200,8 @@ def _execute_workflow(
|
|
|
195
200
|
status["status"] = "ERROR"
|
|
196
201
|
status["error"] = utils.serialize_exception(error)
|
|
197
202
|
if status["queue_name"] is not None:
|
|
198
|
-
dbos.
|
|
203
|
+
queue = dbos._registry.queue_info_map[status["queue_name"]]
|
|
204
|
+
dbos._sys_db.remove_from_queue(status["workflow_uuid"], queue)
|
|
199
205
|
dbos._sys_db.update_workflow_status(status)
|
|
200
206
|
raise
|
|
201
207
|
|
|
@@ -218,7 +224,7 @@ def _execute_workflow_wthread(
|
|
|
218
224
|
with EnterDBOSWorkflow(attributes):
|
|
219
225
|
try:
|
|
220
226
|
return _execute_workflow(dbos, status, func, *args, **kwargs)
|
|
221
|
-
except Exception
|
|
227
|
+
except Exception:
|
|
222
228
|
dbos.logger.error(
|
|
223
229
|
f"Exception encountered in asynchronous workflow: {traceback.format_exc()}"
|
|
224
230
|
)
|
|
@@ -289,10 +295,15 @@ def _execute_workflow_id(dbos: "DBOS", workflow_id: str) -> "WorkflowHandle[Any]
|
|
|
289
295
|
)
|
|
290
296
|
|
|
291
297
|
|
|
292
|
-
def _workflow_wrapper(
|
|
298
|
+
def _workflow_wrapper(
|
|
299
|
+
dbosreg: "_DBOSRegistry",
|
|
300
|
+
func: F,
|
|
301
|
+
max_recovery_attempts: int = DEFAULT_MAX_RECOVERY_ATTEMPTS,
|
|
302
|
+
) -> F:
|
|
293
303
|
func.__orig_func = func # type: ignore
|
|
294
304
|
|
|
295
305
|
fi = get_or_create_func_info(func)
|
|
306
|
+
fi.max_recovery_attempts = max_recovery_attempts
|
|
296
307
|
|
|
297
308
|
@wraps(func)
|
|
298
309
|
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
@@ -325,17 +336,21 @@ def _workflow_wrapper(dbosreg: "_DBOSRegistry", func: F) -> F:
|
|
|
325
336
|
class_name=get_dbos_class_name(fi, func, args),
|
|
326
337
|
config_name=get_config_name(fi, func, args),
|
|
327
338
|
temp_wf_type=get_temp_workflow_type(func),
|
|
339
|
+
max_recovery_attempts=max_recovery_attempts,
|
|
328
340
|
)
|
|
329
341
|
|
|
342
|
+
dbos.logger.debug(
|
|
343
|
+
f"Running workflow, id: {ctx.workflow_id}, name: {get_dbos_func_name(func)}"
|
|
344
|
+
)
|
|
330
345
|
return _execute_workflow(dbos, status, func, *args, **kwargs)
|
|
331
346
|
|
|
332
347
|
wrapped_func = cast(F, wrapper)
|
|
333
348
|
return wrapped_func
|
|
334
349
|
|
|
335
350
|
|
|
336
|
-
def _workflow(reg: "_DBOSRegistry") -> Callable[[F], F]:
|
|
351
|
+
def _workflow(reg: "_DBOSRegistry", max_recovery_attempts: int) -> Callable[[F], F]:
|
|
337
352
|
def _workflow_decorator(func: F) -> F:
|
|
338
|
-
wrapped_func = _workflow_wrapper(reg, func)
|
|
353
|
+
wrapped_func = _workflow_wrapper(reg, func, max_recovery_attempts)
|
|
339
354
|
reg.register_wf_function(func.__qualname__, wrapped_func)
|
|
340
355
|
return wrapped_func
|
|
341
356
|
|
|
@@ -401,6 +416,7 @@ def _start_workflow(
|
|
|
401
416
|
config_name=get_config_name(fi, func, gin_args),
|
|
402
417
|
temp_wf_type=get_temp_workflow_type(func),
|
|
403
418
|
queue=queue_name,
|
|
419
|
+
max_recovery_attempts=fi.max_recovery_attempts,
|
|
404
420
|
)
|
|
405
421
|
|
|
406
422
|
if not execute_workflow:
|
|
@@ -477,6 +493,9 @@ def _transaction(
|
|
|
477
493
|
)
|
|
478
494
|
)
|
|
479
495
|
if recorded_output:
|
|
496
|
+
dbos.logger.debug(
|
|
497
|
+
f"Replaying transaction, id: {ctx.function_id}, name: {attributes['name']}"
|
|
498
|
+
)
|
|
480
499
|
if recorded_output["error"]:
|
|
481
500
|
deserialized_error = (
|
|
482
501
|
utils.deserialize_exception(
|
|
@@ -493,6 +512,11 @@ def _transaction(
|
|
|
493
512
|
raise Exception(
|
|
494
513
|
"Output and error are both None"
|
|
495
514
|
)
|
|
515
|
+
else:
|
|
516
|
+
dbos.logger.debug(
|
|
517
|
+
f"Running transaction, id: {ctx.function_id}, name: {attributes['name']}"
|
|
518
|
+
)
|
|
519
|
+
|
|
496
520
|
output = func(*args, **kwargs)
|
|
497
521
|
txn_output["output"] = utils.serialize(output)
|
|
498
522
|
assert (
|
|
@@ -590,6 +614,9 @@ def _step(
|
|
|
590
614
|
ctx.workflow_id, ctx.function_id
|
|
591
615
|
)
|
|
592
616
|
if recorded_output:
|
|
617
|
+
dbos.logger.debug(
|
|
618
|
+
f"Replaying step, id: {ctx.function_id}, name: {attributes['name']}"
|
|
619
|
+
)
|
|
593
620
|
if recorded_output["error"] is not None:
|
|
594
621
|
deserialized_error = utils.deserialize_exception(
|
|
595
622
|
recorded_output["error"]
|
|
@@ -599,6 +626,11 @@ def _step(
|
|
|
599
626
|
return utils.deserialize(recorded_output["output"])
|
|
600
627
|
else:
|
|
601
628
|
raise Exception("Output and error are both None")
|
|
629
|
+
else:
|
|
630
|
+
dbos.logger.debug(
|
|
631
|
+
f"Running step, id: {ctx.function_id}, name: {attributes['name']}"
|
|
632
|
+
)
|
|
633
|
+
|
|
602
634
|
output = None
|
|
603
635
|
error = None
|
|
604
636
|
local_max_attempts = max_attempts if retries_allowed else 1
|
dbos/dbos.py
CHANGED
|
@@ -5,6 +5,7 @@ import json
|
|
|
5
5
|
import os
|
|
6
6
|
import sys
|
|
7
7
|
import threading
|
|
8
|
+
import traceback
|
|
8
9
|
from concurrent.futures import ThreadPoolExecutor
|
|
9
10
|
from dataclasses import dataclass
|
|
10
11
|
from logging import Logger
|
|
@@ -42,6 +43,7 @@ from dbos.decorators import classproperty
|
|
|
42
43
|
from dbos.queue import Queue, queue_thread
|
|
43
44
|
from dbos.recovery import _recover_pending_workflows, _startup_recovery_thread
|
|
44
45
|
from dbos.registrations import (
|
|
46
|
+
DEFAULT_MAX_RECOVERY_ATTEMPTS,
|
|
45
47
|
DBOSClassInfo,
|
|
46
48
|
get_or_create_class_info,
|
|
47
49
|
set_dbos_func_name,
|
|
@@ -338,43 +340,47 @@ class DBOS:
|
|
|
338
340
|
_dbos_global_instance._launch()
|
|
339
341
|
|
|
340
342
|
def _launch(self) -> None:
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
self._executor.submit(queue_thread, evt, self)
|
|
364
|
-
|
|
365
|
-
# Grab any pollers that were deferred and start them
|
|
366
|
-
for evt, func, args, kwargs in self._registry.pollers:
|
|
343
|
+
try:
|
|
344
|
+
if self._launched:
|
|
345
|
+
dbos_logger.warning(f"DBOS was already launched")
|
|
346
|
+
return
|
|
347
|
+
self._launched = True
|
|
348
|
+
self._executor_field = ThreadPoolExecutor(max_workers=64)
|
|
349
|
+
self._sys_db_field = SystemDatabase(self.config)
|
|
350
|
+
self._app_db_field = ApplicationDatabase(self.config)
|
|
351
|
+
self._admin_server_field = AdminServer(dbos=self)
|
|
352
|
+
|
|
353
|
+
if not os.environ.get("DBOS__VMID"):
|
|
354
|
+
workflow_ids = self._sys_db.get_pending_workflows("local")
|
|
355
|
+
self._executor.submit(_startup_recovery_thread, self, workflow_ids)
|
|
356
|
+
|
|
357
|
+
# Listen to notifications
|
|
358
|
+
self._executor.submit(self._sys_db._notification_listener)
|
|
359
|
+
|
|
360
|
+
# Start flush workflow buffers thread
|
|
361
|
+
self._executor.submit(self._sys_db.flush_workflow_buffers)
|
|
362
|
+
|
|
363
|
+
# Start the queue thread
|
|
364
|
+
evt = threading.Event()
|
|
367
365
|
self.stop_events.append(evt)
|
|
368
|
-
self._executor.submit(
|
|
369
|
-
self._registry.pollers = []
|
|
366
|
+
self._executor.submit(queue_thread, evt, self)
|
|
370
367
|
|
|
371
|
-
|
|
368
|
+
# Grab any pollers that were deferred and start them
|
|
369
|
+
for evt, func, args, kwargs in self._registry.pollers:
|
|
370
|
+
self.stop_events.append(evt)
|
|
371
|
+
self._executor.submit(func, *args, **kwargs)
|
|
372
|
+
self._registry.pollers = []
|
|
372
373
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
374
|
+
dbos_logger.info("DBOS launched")
|
|
375
|
+
|
|
376
|
+
# Flush handlers and add OTLP to all loggers if enabled
|
|
377
|
+
# to enable their export in DBOS Cloud
|
|
378
|
+
for handler in dbos_logger.handlers:
|
|
379
|
+
handler.flush()
|
|
380
|
+
add_otlp_to_all_loggers()
|
|
381
|
+
except Exception:
|
|
382
|
+
dbos_logger.error(f"DBOS failed to launch: {traceback.format_exc()}")
|
|
383
|
+
raise
|
|
378
384
|
|
|
379
385
|
def _destroy(self) -> None:
|
|
380
386
|
self._initialized = False
|
|
@@ -401,9 +407,11 @@ class DBOS:
|
|
|
401
407
|
|
|
402
408
|
# Decorators for DBOS functionality
|
|
403
409
|
@classmethod
|
|
404
|
-
def workflow(
|
|
410
|
+
def workflow(
|
|
411
|
+
cls, *, max_recovery_attempts: int = DEFAULT_MAX_RECOVERY_ATTEMPTS
|
|
412
|
+
) -> Callable[[F], F]:
|
|
405
413
|
"""Decorate a function for use as a DBOS workflow."""
|
|
406
|
-
return _workflow(_get_or_create_dbos_registry())
|
|
414
|
+
return _workflow(_get_or_create_dbos_registry(), max_recovery_attempts)
|
|
407
415
|
|
|
408
416
|
@classmethod
|
|
409
417
|
def transaction(
|
dbos/error.py
CHANGED
|
@@ -32,6 +32,7 @@ class DBOSErrorCode(Enum):
|
|
|
32
32
|
InitializationError = 3
|
|
33
33
|
WorkflowFunctionNotFound = 4
|
|
34
34
|
NonExistentWorkflowError = 5
|
|
35
|
+
DeadLetterQueueError = 6
|
|
35
36
|
MaxStepRetriesExceeded = 7
|
|
36
37
|
NotAuthorized = 8
|
|
37
38
|
|
|
@@ -86,6 +87,16 @@ class DBOSNonExistentWorkflowError(DBOSException):
|
|
|
86
87
|
)
|
|
87
88
|
|
|
88
89
|
|
|
90
|
+
class DBOSDeadLetterQueueError(DBOSException):
|
|
91
|
+
"""Exception raised when a workflow database record does not exist for a given ID."""
|
|
92
|
+
|
|
93
|
+
def __init__(self, wf_id: str, max_retries: int):
|
|
94
|
+
super().__init__(
|
|
95
|
+
f"Workflow {wf_id} has been moved to the dead-letter queue after exceeding the maximum of ${max_retries} retries",
|
|
96
|
+
dbos_error_code=DBOSErrorCode.DeadLetterQueueError.value,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
89
100
|
class DBOSNotAuthorizedError(DBOSException):
|
|
90
101
|
"""Exception raised by DBOS role-based security when the user is not authorized to access a function."""
|
|
91
102
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""job_queue_limiter
|
|
2
|
+
|
|
3
|
+
Revision ID: d76646551a6b
|
|
4
|
+
Revises: 50f3227f0b4b
|
|
5
|
+
Create Date: 2024-09-25 14:48:10.218015
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
# revision identifiers, used by Alembic.
|
|
15
|
+
revision: str = "d76646551a6b"
|
|
16
|
+
down_revision: Union[str, None] = "50f3227f0b4b"
|
|
17
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
18
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def upgrade() -> None:
|
|
22
|
+
op.add_column(
|
|
23
|
+
"job_queue",
|
|
24
|
+
sa.Column(
|
|
25
|
+
"started_at_epoch_ms",
|
|
26
|
+
sa.BigInteger(),
|
|
27
|
+
),
|
|
28
|
+
schema="dbos",
|
|
29
|
+
)
|
|
30
|
+
op.add_column(
|
|
31
|
+
"job_queue",
|
|
32
|
+
sa.Column(
|
|
33
|
+
"completed_at_epoch_ms",
|
|
34
|
+
sa.BigInteger(),
|
|
35
|
+
),
|
|
36
|
+
schema="dbos",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def downgrade() -> None:
|
|
41
|
+
op.drop_column("job_queue", "started_at_epoch_ms", schema="dbos")
|
|
42
|
+
op.drop_column("job_queue", "completed_at_epoch_ms", schema="dbos")
|
dbos/queue.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import threading
|
|
2
|
-
import time
|
|
3
2
|
import traceback
|
|
4
|
-
from typing import TYPE_CHECKING, Optional
|
|
3
|
+
from typing import TYPE_CHECKING, Optional, TypedDict
|
|
5
4
|
|
|
6
5
|
from dbos.core import P, R, _execute_workflow_id, _start_workflow
|
|
7
6
|
|
|
@@ -9,10 +8,25 @@ if TYPE_CHECKING:
|
|
|
9
8
|
from dbos.dbos import DBOS, Workflow, WorkflowHandle
|
|
10
9
|
|
|
11
10
|
|
|
11
|
+
# Limit the maximum number of functions from this queue
|
|
12
|
+
# that can be started in a given period. If the limit is 5
|
|
13
|
+
# and the period is 10, no more than 5 functions can be
|
|
14
|
+
# started per 10 seconds.
|
|
15
|
+
class Limiter(TypedDict):
|
|
16
|
+
limit: int
|
|
17
|
+
period: float
|
|
18
|
+
|
|
19
|
+
|
|
12
20
|
class Queue:
|
|
13
|
-
def __init__(
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
name: str,
|
|
24
|
+
concurrency: Optional[int] = None,
|
|
25
|
+
limiter: Optional[Limiter] = None,
|
|
26
|
+
) -> None:
|
|
14
27
|
self.name = name
|
|
15
28
|
self.concurrency = concurrency
|
|
29
|
+
self.limiter = limiter
|
|
16
30
|
from dbos.dbos import _get_or_create_dbos_registry
|
|
17
31
|
|
|
18
32
|
registry = _get_or_create_dbos_registry()
|
|
@@ -29,12 +43,11 @@ class Queue:
|
|
|
29
43
|
|
|
30
44
|
def queue_thread(stop_event: threading.Event, dbos: "DBOS") -> None:
|
|
31
45
|
while not stop_event.is_set():
|
|
32
|
-
|
|
33
|
-
|
|
46
|
+
if stop_event.wait(timeout=1):
|
|
47
|
+
return
|
|
48
|
+
for _, queue in dbos._registry.queue_info_map.items():
|
|
34
49
|
try:
|
|
35
|
-
wf_ids = dbos._sys_db.start_queued_workflows(
|
|
36
|
-
queue_name, queue.concurrency
|
|
37
|
-
)
|
|
50
|
+
wf_ids = dbos._sys_db.start_queued_workflows(queue)
|
|
38
51
|
for id in wf_ids:
|
|
39
52
|
_execute_workflow_id(dbos, id)
|
|
40
53
|
except Exception:
|
dbos/registrations.py
CHANGED
|
@@ -3,6 +3,8 @@ from enum import Enum
|
|
|
3
3
|
from types import FunctionType
|
|
4
4
|
from typing import Any, Callable, List, Literal, Optional, Tuple, Type, cast
|
|
5
5
|
|
|
6
|
+
DEFAULT_MAX_RECOVERY_ATTEMPTS = 50
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
def get_dbos_func_name(f: Any) -> str:
|
|
8
10
|
if hasattr(f, "dbos_function_name"):
|
|
@@ -47,6 +49,7 @@ class DBOSFuncInfo:
|
|
|
47
49
|
self.class_info: Optional[DBOSClassInfo] = None
|
|
48
50
|
self.func_type: DBOSFuncType = DBOSFuncType.Unknown
|
|
49
51
|
self.required_roles: Optional[List[str]] = None
|
|
52
|
+
self.max_recovery_attempts = DEFAULT_MAX_RECOVERY_ATTEMPTS
|
|
50
53
|
|
|
51
54
|
|
|
52
55
|
def get_or_create_class_info(cls: Type[Any]) -> DBOSClassInfo:
|
dbos/schemas/system_database.py
CHANGED
|
@@ -161,4 +161,12 @@ class SystemSchema:
|
|
|
161
161
|
nullable=False,
|
|
162
162
|
server_default=text("(EXTRACT(epoch FROM now()) * 1000::numeric)::bigint"),
|
|
163
163
|
),
|
|
164
|
+
Column(
|
|
165
|
+
"started_at_epoch_ms",
|
|
166
|
+
BigInteger(),
|
|
167
|
+
),
|
|
168
|
+
Column(
|
|
169
|
+
"completed_at_epoch_ms",
|
|
170
|
+
BigInteger(),
|
|
171
|
+
),
|
|
164
172
|
)
|
dbos/system_database.py
CHANGED
|
@@ -4,6 +4,7 @@ import threading
|
|
|
4
4
|
import time
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from typing import (
|
|
7
|
+
TYPE_CHECKING,
|
|
7
8
|
Any,
|
|
8
9
|
Dict,
|
|
9
10
|
List,
|
|
@@ -24,12 +25,20 @@ from alembic.config import Config
|
|
|
24
25
|
from sqlalchemy.exc import DBAPIError
|
|
25
26
|
|
|
26
27
|
import dbos.utils as utils
|
|
27
|
-
from dbos.error import
|
|
28
|
+
from dbos.error import (
|
|
29
|
+
DBOSDeadLetterQueueError,
|
|
30
|
+
DBOSNonExistentWorkflowError,
|
|
31
|
+
DBOSWorkflowConflictIDError,
|
|
32
|
+
)
|
|
33
|
+
from dbos.registrations import DEFAULT_MAX_RECOVERY_ATTEMPTS
|
|
28
34
|
|
|
29
35
|
from .dbos_config import ConfigFile
|
|
30
36
|
from .logger import dbos_logger
|
|
31
37
|
from .schemas.system_database import SystemSchema
|
|
32
38
|
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from .queue import Queue
|
|
41
|
+
|
|
33
42
|
|
|
34
43
|
class WorkflowStatusString(Enum):
|
|
35
44
|
"""Enumeration of values allowed for `WorkflowSatusInternal.status`."""
|
|
@@ -233,7 +242,9 @@ class SystemDatabase:
|
|
|
233
242
|
status: WorkflowStatusInternal,
|
|
234
243
|
replace: bool = True,
|
|
235
244
|
in_recovery: bool = False,
|
|
245
|
+
*,
|
|
236
246
|
conn: Optional[sa.Connection] = None,
|
|
247
|
+
max_recovery_attempts: int = DEFAULT_MAX_RECOVERY_ATTEMPTS,
|
|
237
248
|
) -> None:
|
|
238
249
|
cmd = pg.insert(SystemSchema.workflow_status).values(
|
|
239
250
|
workflow_uuid=status["workflow_uuid"],
|
|
@@ -271,12 +282,36 @@ class SystemDatabase:
|
|
|
271
282
|
)
|
|
272
283
|
else:
|
|
273
284
|
cmd = cmd.on_conflict_do_nothing()
|
|
285
|
+
cmd = cmd.returning(SystemSchema.workflow_status.c.recovery_attempts) # type: ignore
|
|
274
286
|
|
|
275
287
|
if conn is not None:
|
|
276
|
-
conn.execute(cmd)
|
|
288
|
+
results = conn.execute(cmd)
|
|
277
289
|
else:
|
|
278
290
|
with self.engine.begin() as c:
|
|
279
|
-
c.execute(cmd)
|
|
291
|
+
results = c.execute(cmd)
|
|
292
|
+
if in_recovery:
|
|
293
|
+
row = results.fetchone()
|
|
294
|
+
if row is not None:
|
|
295
|
+
recovery_attempts: int = row[0]
|
|
296
|
+
if recovery_attempts > max_recovery_attempts:
|
|
297
|
+
with self.engine.begin() as c:
|
|
298
|
+
c.execute(
|
|
299
|
+
sa.update(SystemSchema.workflow_status)
|
|
300
|
+
.where(
|
|
301
|
+
SystemSchema.workflow_status.c.workflow_uuid
|
|
302
|
+
== status["workflow_uuid"]
|
|
303
|
+
)
|
|
304
|
+
.where(
|
|
305
|
+
SystemSchema.workflow_status.c.status
|
|
306
|
+
== WorkflowStatusString.PENDING.value
|
|
307
|
+
)
|
|
308
|
+
.values(
|
|
309
|
+
status=WorkflowStatusString.RETRIES_EXCEEDED.value,
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
raise DBOSDeadLetterQueueError(
|
|
313
|
+
status["workflow_uuid"], max_recovery_attempts
|
|
314
|
+
)
|
|
280
315
|
|
|
281
316
|
# Record we have exported status for this single-transaction workflow
|
|
282
317
|
if status["workflow_uuid"] in self._temp_txn_wf_ids:
|
|
@@ -617,7 +652,14 @@ class SystemDatabase:
|
|
|
617
652
|
workflow_uuid, function_id, conn=c
|
|
618
653
|
)
|
|
619
654
|
if recorded_output is not None:
|
|
655
|
+
dbos_logger.debug(
|
|
656
|
+
f"Replaying send, id: {function_id}, destination_uuid: {destination_uuid}, topic: {topic}"
|
|
657
|
+
)
|
|
620
658
|
return # Already sent before
|
|
659
|
+
else:
|
|
660
|
+
dbos_logger.debug(
|
|
661
|
+
f"Running send, id: {function_id}, destination_uuid: {destination_uuid}, topic: {topic}"
|
|
662
|
+
)
|
|
621
663
|
|
|
622
664
|
try:
|
|
623
665
|
c.execute(
|
|
@@ -653,10 +695,13 @@ class SystemDatabase:
|
|
|
653
695
|
# First, check for previous executions.
|
|
654
696
|
recorded_output = self.check_operation_execution(workflow_uuid, function_id)
|
|
655
697
|
if recorded_output is not None:
|
|
698
|
+
dbos_logger.debug(f"Replaying recv, id: {function_id}, topic: {topic}")
|
|
656
699
|
if recorded_output["output"] is not None:
|
|
657
700
|
return utils.deserialize(recorded_output["output"])
|
|
658
701
|
else:
|
|
659
702
|
raise Exception("No output recorded in the last recv")
|
|
703
|
+
else:
|
|
704
|
+
dbos_logger.debug(f"Running recv, id: {function_id}, topic: {topic}")
|
|
660
705
|
|
|
661
706
|
# Insert a condition to the notifications map, so the listener can notify it when a message is received.
|
|
662
707
|
payload = f"{workflow_uuid}::{topic}"
|
|
@@ -799,9 +844,11 @@ class SystemDatabase:
|
|
|
799
844
|
recorded_output = self.check_operation_execution(workflow_uuid, function_id)
|
|
800
845
|
end_time: float
|
|
801
846
|
if recorded_output is not None:
|
|
847
|
+
dbos_logger.debug(f"Replaying sleep, id: {function_id}, seconds: {seconds}")
|
|
802
848
|
assert recorded_output["output"] is not None, "no recorded end time"
|
|
803
849
|
end_time = utils.deserialize(recorded_output["output"])
|
|
804
850
|
else:
|
|
851
|
+
dbos_logger.debug(f"Running sleep, id: {function_id}, seconds: {seconds}")
|
|
805
852
|
end_time = time.time() + seconds
|
|
806
853
|
try:
|
|
807
854
|
self.record_operation_result(
|
|
@@ -831,7 +878,10 @@ class SystemDatabase:
|
|
|
831
878
|
workflow_uuid, function_id, conn=c
|
|
832
879
|
)
|
|
833
880
|
if recorded_output is not None:
|
|
881
|
+
dbos_logger.debug(f"Replaying set_event, id: {function_id}, key: {key}")
|
|
834
882
|
return # Already sent before
|
|
883
|
+
else:
|
|
884
|
+
dbos_logger.debug(f"Running set_event, id: {function_id}, key: {key}")
|
|
835
885
|
|
|
836
886
|
c.execute(
|
|
837
887
|
pg.insert(SystemSchema.workflow_events)
|
|
@@ -872,10 +922,17 @@ class SystemDatabase:
|
|
|
872
922
|
caller_ctx["workflow_uuid"], caller_ctx["function_id"]
|
|
873
923
|
)
|
|
874
924
|
if recorded_output is not None:
|
|
925
|
+
dbos_logger.debug(
|
|
926
|
+
f"Replaying get_event, id: {caller_ctx['function_id']}, key: {key}"
|
|
927
|
+
)
|
|
875
928
|
if recorded_output["output"] is not None:
|
|
876
929
|
return utils.deserialize(recorded_output["output"])
|
|
877
930
|
else:
|
|
878
931
|
raise Exception("No output recorded in the last get_event")
|
|
932
|
+
else:
|
|
933
|
+
dbos_logger.debug(
|
|
934
|
+
f"Running get_event, id: {caller_ctx['function_id']}, key: {key}"
|
|
935
|
+
)
|
|
879
936
|
|
|
880
937
|
payload = f"{target_uuid}::{key}"
|
|
881
938
|
condition = threading.Condition()
|
|
@@ -1030,22 +1087,60 @@ class SystemDatabase:
|
|
|
1030
1087
|
.on_conflict_do_nothing()
|
|
1031
1088
|
)
|
|
1032
1089
|
|
|
1033
|
-
def start_queued_workflows(
|
|
1034
|
-
|
|
1035
|
-
|
|
1090
|
+
def start_queued_workflows(self, queue: "Queue") -> List[str]:
|
|
1091
|
+
start_time_ms = int(time.time() * 1000)
|
|
1092
|
+
if queue.limiter is not None:
|
|
1093
|
+
limiter_period_ms = int(queue.limiter["period"] * 1000)
|
|
1036
1094
|
with self.engine.begin() as c:
|
|
1037
|
-
|
|
1038
|
-
|
|
1095
|
+
# Execute with snapshot isolation to ensure multiple workers respect limits
|
|
1096
|
+
c.execute(sa.text("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ"))
|
|
1097
|
+
|
|
1098
|
+
# If there is a limiter, compute how many functions have started in its period.
|
|
1099
|
+
if queue.limiter is not None:
|
|
1100
|
+
query = (
|
|
1101
|
+
sa.select(sa.func.count())
|
|
1102
|
+
.select_from(SystemSchema.job_queue)
|
|
1103
|
+
.where(SystemSchema.job_queue.c.started_at_epoch_ms.isnot(None))
|
|
1104
|
+
.where(
|
|
1105
|
+
SystemSchema.job_queue.c.started_at_epoch_ms
|
|
1106
|
+
> start_time_ms - limiter_period_ms
|
|
1107
|
+
)
|
|
1108
|
+
)
|
|
1109
|
+
num_recent_queries = c.execute(query).fetchone()[0] # type: ignore
|
|
1110
|
+
if num_recent_queries >= queue.limiter["limit"]:
|
|
1111
|
+
return []
|
|
1112
|
+
|
|
1113
|
+
# Select not-yet-completed functions in the queue ordered by the
|
|
1114
|
+
# time at which they were enqueued.
|
|
1115
|
+
# If there is a concurrency limit N, select only the N most recent
|
|
1116
|
+
# functions, else select all of them.
|
|
1117
|
+
query = (
|
|
1118
|
+
sa.select(
|
|
1119
|
+
SystemSchema.job_queue.c.workflow_uuid,
|
|
1120
|
+
SystemSchema.job_queue.c.started_at_epoch_ms,
|
|
1121
|
+
)
|
|
1122
|
+
.where(SystemSchema.job_queue.c.queue_name == queue.name)
|
|
1123
|
+
.where(SystemSchema.job_queue.c.completed_at_epoch_ms == None)
|
|
1124
|
+
.order_by(SystemSchema.job_queue.c.created_at_epoch_ms.asc())
|
|
1039
1125
|
)
|
|
1040
|
-
if concurrency is not None:
|
|
1041
|
-
query = query.
|
|
1042
|
-
|
|
1043
|
-
|
|
1126
|
+
if queue.concurrency is not None:
|
|
1127
|
+
query = query.limit(queue.concurrency)
|
|
1128
|
+
|
|
1129
|
+
# From the functions retrieved, get the workflow IDs of the functions
|
|
1130
|
+
# that have not yet been started so we can start them.
|
|
1044
1131
|
rows = c.execute(query).fetchall()
|
|
1045
|
-
dequeued_ids: List[str] = [row[0] for row in rows]
|
|
1046
|
-
ret_ids = []
|
|
1132
|
+
dequeued_ids: List[str] = [row[0] for row in rows if row[1] is None]
|
|
1133
|
+
ret_ids: list[str] = []
|
|
1047
1134
|
for id in dequeued_ids:
|
|
1048
|
-
|
|
1135
|
+
|
|
1136
|
+
# If we have a limiter, stop starting functions when the number
|
|
1137
|
+
# of functions started this period exceeds the limit.
|
|
1138
|
+
if queue.limiter is not None:
|
|
1139
|
+
if len(ret_ids) + num_recent_queries >= queue.limiter["limit"]:
|
|
1140
|
+
break
|
|
1141
|
+
|
|
1142
|
+
# To start a function, first set its status to PENDING
|
|
1143
|
+
c.execute(
|
|
1049
1144
|
SystemSchema.workflow_status.update()
|
|
1050
1145
|
.where(SystemSchema.workflow_status.c.workflow_uuid == id)
|
|
1051
1146
|
.where(
|
|
@@ -1054,14 +1149,42 @@ class SystemDatabase:
|
|
|
1054
1149
|
)
|
|
1055
1150
|
.values(status=WorkflowStatusString.PENDING.value)
|
|
1056
1151
|
)
|
|
1057
|
-
|
|
1058
|
-
|
|
1152
|
+
|
|
1153
|
+
# Then give it a start time
|
|
1154
|
+
c.execute(
|
|
1155
|
+
SystemSchema.job_queue.update()
|
|
1156
|
+
.where(SystemSchema.job_queue.c.workflow_uuid == id)
|
|
1157
|
+
.values(started_at_epoch_ms=start_time_ms)
|
|
1158
|
+
)
|
|
1159
|
+
ret_ids.append(id)
|
|
1160
|
+
|
|
1161
|
+
# If we have a limiter, garbage-collect all completed functions started
|
|
1162
|
+
# before the period. If there's no limiter, there's no need--they were
|
|
1163
|
+
# deleted on completion.
|
|
1164
|
+
if queue.limiter is not None:
|
|
1165
|
+
c.execute(
|
|
1166
|
+
sa.delete(SystemSchema.job_queue)
|
|
1167
|
+
.where(SystemSchema.job_queue.c.completed_at_epoch_ms != None)
|
|
1168
|
+
.where(
|
|
1169
|
+
SystemSchema.job_queue.c.started_at_epoch_ms
|
|
1170
|
+
< start_time_ms - limiter_period_ms
|
|
1171
|
+
)
|
|
1172
|
+
)
|
|
1173
|
+
|
|
1174
|
+
# Return the IDs of all functions we started
|
|
1059
1175
|
return ret_ids
|
|
1060
1176
|
|
|
1061
|
-
def remove_from_queue(self, workflow_id: str) -> None:
|
|
1177
|
+
def remove_from_queue(self, workflow_id: str, queue: "Queue") -> None:
|
|
1062
1178
|
with self.engine.begin() as c:
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
SystemSchema.job_queue.
|
|
1179
|
+
if queue.limiter is None:
|
|
1180
|
+
c.execute(
|
|
1181
|
+
sa.delete(SystemSchema.job_queue).where(
|
|
1182
|
+
SystemSchema.job_queue.c.workflow_uuid == workflow_id
|
|
1183
|
+
)
|
|
1184
|
+
)
|
|
1185
|
+
else:
|
|
1186
|
+
c.execute(
|
|
1187
|
+
sa.update(SystemSchema.job_queue)
|
|
1188
|
+
.where(SystemSchema.job_queue.c.workflow_uuid == workflow_id)
|
|
1189
|
+
.values(completed_at_epoch_ms=int(time.time() * 1000))
|
|
1066
1190
|
)
|
|
1067
|
-
)
|
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
dbos-0.8.
|
|
2
|
-
dbos-0.8.
|
|
3
|
-
dbos-0.8.
|
|
4
|
-
dbos-0.8.
|
|
1
|
+
dbos-0.8.0a7.dist-info/METADATA,sha256=cqNzAf7TbC3n_DDGTPUj_xawggwMydLnIQ5056iFDUI,5010
|
|
2
|
+
dbos-0.8.0a7.dist-info/WHEEL,sha256=Vza3XR51HW1KmFP0iIMUVYIvz0uQuKJpIXKYOBGQyFQ,90
|
|
3
|
+
dbos-0.8.0a7.dist-info/entry_points.txt,sha256=z6GcVANQV7Uw_82H9Ob2axJX6V3imftyZsljdh-M1HU,54
|
|
4
|
+
dbos-0.8.0a7.dist-info/licenses/LICENSE,sha256=VGZit_a5-kdw9WT6fY5jxAWVwGQzgLFyPWrcVVUhVNU,1067
|
|
5
5
|
dbos/__init__.py,sha256=-h1QgWNL11CiLlHEKa2ycAJVJw5SXYZ4BGNNWBAiE9k,726
|
|
6
6
|
dbos/admin_sever.py,sha256=Qg5T3YRrbPW05PR_99yAaxgo1ugQrAp_uTeTqSfjm_k,3397
|
|
7
7
|
dbos/application_database.py,sha256=knFK8We8y6WrIpnFCKvFq5hvSuFQqUuJqOqDpSVMCPI,5521
|
|
8
8
|
dbos/cli.py,sha256=z5dXbbnGWzSC3E1rfS8Lp1_OIImzcDKM7jP-iu_Q4aI,8602
|
|
9
9
|
dbos/context.py,sha256=4MsxZdoh1WIsgoUsaxo0B6caGN6xq2WC60MzbBppzGk,17738
|
|
10
|
-
dbos/core.py,sha256=
|
|
10
|
+
dbos/core.py,sha256=ggsRC2XicvNI1qqruEFoqxoTU5oSSnhMZvDih3AG_3A,30879
|
|
11
11
|
dbos/dbos-config.schema.json,sha256=azpfmoDZg7WfSy3kvIsk9iEiKB_-VZt03VEOoXJAkqE,5331
|
|
12
|
-
dbos/dbos.py,sha256=
|
|
12
|
+
dbos/dbos.py,sha256=LnqX7rFETpcyxT9YHs4Uc3uOB4EDQC-zis3UFQU4smc,29705
|
|
13
13
|
dbos/dbos_config.py,sha256=NJVze2GkKgYUmcPP31Unb-QpsA0TzImEeQGJgVq6W6k,5352
|
|
14
14
|
dbos/decorators.py,sha256=lbPefsLK6Cya4cb7TrOcLglOpGT3pc6qjZdsQKlfZLg,629
|
|
15
|
-
dbos/error.py,sha256=
|
|
15
|
+
dbos/error.py,sha256=UETk8CoZL-TO2Utn1-E7OSWelhShWmKM-fOlODMR9PE,3893
|
|
16
16
|
dbos/fastapi.py,sha256=gx9hlpxYOiwbuhSlbY9bn5C-F_FsCbrJvkX9ZAvDG6U,3418
|
|
17
17
|
dbos/flask.py,sha256=azr4geMEGuuTBCyxIZmgDmmP-6s_pTIF-lGyp9Q4IB8,2430
|
|
18
18
|
dbos/kafka.py,sha256=LH3hbNapnkjLcuXNUtdGU0398JafWb-t0GwUl3LOzkc,3645
|
|
@@ -23,19 +23,20 @@ dbos/migrations/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj
|
|
|
23
23
|
dbos/migrations/versions/50f3227f0b4b_fix_job_queue.py,sha256=ZtnsZFMuon-D0n8V5BR10jQEqJPUsYsOwt29FAoKG8g,868
|
|
24
24
|
dbos/migrations/versions/5c361fc04708_added_system_tables.py,sha256=QMgFMb0aLgC25YicsvPSr6AHRCA6Zd66hyaRUhwKzrQ,6404
|
|
25
25
|
dbos/migrations/versions/a3b18ad34abe_added_triggers.py,sha256=Rv0ZsZYZ_WdgGEULYsPfnp4YzaO5L198gDTgYY39AVA,2022
|
|
26
|
+
dbos/migrations/versions/d76646551a6b_job_queue_limiter.py,sha256=M1upulBOLXm9ORJc2Q6PA0AwX0CA3zgqc-NhYS-eNPY,948
|
|
26
27
|
dbos/migrations/versions/eab0cc1d9a14_job_queue.py,sha256=_9-FCW-zOpCQfblTS_yRLtFiUaWlC1tM4BoKBTDeH9k,1395
|
|
27
28
|
dbos/py.typed,sha256=QfzXT1Ktfk3Rj84akygc7_42z0lRpCq0Ilh8OXI6Zas,44
|
|
28
|
-
dbos/queue.py,sha256=
|
|
29
|
+
dbos/queue.py,sha256=4MIWYDdl3DhYsnayy747xF1Jgdq2qvV4On3KJbJ4NDU,1764
|
|
29
30
|
dbos/recovery.py,sha256=zqtO_ExGoIErLMVnbneU3VeHLVWvhV4jnfqssAVlQQk,2016
|
|
30
|
-
dbos/registrations.py,sha256=
|
|
31
|
+
dbos/registrations.py,sha256=mei6q6_3R5uei8i_Wo_TqGZs85s10shOekDX41sFYD0,6642
|
|
31
32
|
dbos/request.py,sha256=-FIwtknayvRl6OjvqO4V2GySVzSdP1Ft3cc9ZBS-PLY,928
|
|
32
33
|
dbos/roles.py,sha256=7Lh7uwUq1dpa6TXCOHre4mPTd5qmXzK_QPkvYR52DXg,2285
|
|
33
34
|
dbos/scheduler/croniter.py,sha256=hbhgfsHBqclUS8VeLnJ9PSE9Z54z6mi4nnrr1aUXn0k,47561
|
|
34
35
|
dbos/scheduler/scheduler.py,sha256=Sz4EIpAtur7so2YajTic64GrTpa4qPw8QxXn0M34v80,1360
|
|
35
36
|
dbos/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
37
|
dbos/schemas/application_database.py,sha256=q_Wr2XbiZNBYFkOtu7uKavo1T_cSOBblxKGHThYGGsY,962
|
|
37
|
-
dbos/schemas/system_database.py,sha256
|
|
38
|
-
dbos/system_database.py,sha256=
|
|
38
|
+
dbos/schemas/system_database.py,sha256=-37sNXfx6cNGyzndj9mrWQLDH5iIBrwsT56ZiJ56Sj0,5116
|
|
39
|
+
dbos/system_database.py,sha256=x-TpKHIS187KED6BxPozvMLt6Qjhgh3dYSKZkd6epM0,47764
|
|
39
40
|
dbos/templates/hello/README.md,sha256=GhxhBj42wjTt1fWEtwNriHbJuKb66Vzu89G4pxNHw2g,930
|
|
40
41
|
dbos/templates/hello/__package/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
42
|
dbos/templates/hello/__package/main.py,sha256=eI0SS9Nwj-fldtiuSzIlIG6dC91GXXwdRsoHxv6S_WI,2719
|
|
@@ -49,4 +50,4 @@ dbos/templates/hello/start_postgres_docker.py,sha256=lQVLlYO5YkhGPEgPqwGc7Y8uDKs
|
|
|
49
50
|
dbos/tracer.py,sha256=GaXDhdKKF_IQp5SAMipGXiDVwteRKjNbrXyYCH1mor0,2520
|
|
50
51
|
dbos/utils.py,sha256=lwRymY-y7GprAS8pKmbICQvOJd5eGxKGTxCMFn0OwaQ,1739
|
|
51
52
|
version/__init__.py,sha256=L4sNxecRuqdtSFdpUGX3TtBi9KL3k7YsZVIvv-fv9-A,1678
|
|
52
|
-
dbos-0.8.
|
|
53
|
+
dbos-0.8.0a7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|