dbos 0.8.0a3__py3-none-any.whl → 0.8.0a10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbos might be problematic. Click here for more details.

dbos/core.py CHANGED
@@ -38,6 +38,7 @@ from dbos.error import (
38
38
  DBOSWorkflowFunctionNotFoundError,
39
39
  )
40
40
  from dbos.registrations import (
41
+ DEFAULT_MAX_RECOVERY_ATTEMPTS,
41
42
  get_config_name,
42
43
  get_dbos_class_name,
43
44
  get_dbos_func_name,
@@ -118,6 +119,7 @@ def _init_workflow(
118
119
  config_name: Optional[str],
119
120
  temp_wf_type: Optional[str],
120
121
  queue: Optional[str] = None,
122
+ max_recovery_attempts: int = DEFAULT_MAX_RECOVERY_ATTEMPTS,
121
123
  ) -> WorkflowStatusInternal:
122
124
  wfid = (
123
125
  ctx.workflow_id
@@ -157,7 +159,9 @@ def _init_workflow(
157
159
  # Synchronously record the status and inputs for workflows and single-step workflows
158
160
  # We also have to do this for single-step workflows because of the foreign key constraint on the operation outputs table
159
161
  # TODO: Make this transactional (and with the queue step below)
160
- dbos._sys_db.update_workflow_status(status, False, ctx.in_recovery)
162
+ dbos._sys_db.update_workflow_status(
163
+ status, False, ctx.in_recovery, max_recovery_attempts=max_recovery_attempts
164
+ )
161
165
  dbos._sys_db.update_workflow_inputs(wfid, utils.serialize_args(inputs))
162
166
  else:
163
167
  # Buffer the inputs for single-transaction workflows, but don't buffer the status
@@ -181,7 +185,8 @@ def _execute_workflow(
181
185
  status["status"] = "SUCCESS"
182
186
  status["output"] = utils.serialize(output)
183
187
  if status["queue_name"] is not None:
184
- dbos._sys_db.remove_from_queue(status["workflow_uuid"])
188
+ queue = dbos._registry.queue_info_map[status["queue_name"]]
189
+ dbos._sys_db.remove_from_queue(status["workflow_uuid"], queue)
185
190
  dbos._sys_db.buffer_workflow_status(status)
186
191
  except DBOSWorkflowConflictIDError:
187
192
  # Retrieve the workflow handle and wait for the result.
@@ -195,7 +200,8 @@ def _execute_workflow(
195
200
  status["status"] = "ERROR"
196
201
  status["error"] = utils.serialize_exception(error)
197
202
  if status["queue_name"] is not None:
198
- dbos._sys_db.remove_from_queue(status["workflow_uuid"])
203
+ queue = dbos._registry.queue_info_map[status["queue_name"]]
204
+ dbos._sys_db.remove_from_queue(status["workflow_uuid"], queue)
199
205
  dbos._sys_db.update_workflow_status(status)
200
206
  raise
201
207
 
@@ -218,7 +224,7 @@ def _execute_workflow_wthread(
218
224
  with EnterDBOSWorkflow(attributes):
219
225
  try:
220
226
  return _execute_workflow(dbos, status, func, *args, **kwargs)
221
- except Exception as e:
227
+ except Exception:
222
228
  dbos.logger.error(
223
229
  f"Exception encountered in asynchronous workflow: {traceback.format_exc()}"
224
230
  )
@@ -289,10 +295,15 @@ def _execute_workflow_id(dbos: "DBOS", workflow_id: str) -> "WorkflowHandle[Any]
289
295
  )
290
296
 
291
297
 
292
- def _workflow_wrapper(dbosreg: "_DBOSRegistry", func: F) -> F:
298
+ def _workflow_wrapper(
299
+ dbosreg: "_DBOSRegistry",
300
+ func: F,
301
+ max_recovery_attempts: int = DEFAULT_MAX_RECOVERY_ATTEMPTS,
302
+ ) -> F:
293
303
  func.__orig_func = func # type: ignore
294
304
 
295
305
  fi = get_or_create_func_info(func)
306
+ fi.max_recovery_attempts = max_recovery_attempts
296
307
 
297
308
  @wraps(func)
298
309
  def wrapper(*args: Any, **kwargs: Any) -> Any:
@@ -325,17 +336,21 @@ def _workflow_wrapper(dbosreg: "_DBOSRegistry", func: F) -> F:
325
336
  class_name=get_dbos_class_name(fi, func, args),
326
337
  config_name=get_config_name(fi, func, args),
327
338
  temp_wf_type=get_temp_workflow_type(func),
339
+ max_recovery_attempts=max_recovery_attempts,
328
340
  )
329
341
 
342
+ dbos.logger.debug(
343
+ f"Running workflow, id: {ctx.workflow_id}, name: {get_dbos_func_name(func)}"
344
+ )
330
345
  return _execute_workflow(dbos, status, func, *args, **kwargs)
331
346
 
332
347
  wrapped_func = cast(F, wrapper)
333
348
  return wrapped_func
334
349
 
335
350
 
336
- def _workflow(reg: "_DBOSRegistry") -> Callable[[F], F]:
351
+ def _workflow(reg: "_DBOSRegistry", max_recovery_attempts: int) -> Callable[[F], F]:
337
352
  def _workflow_decorator(func: F) -> F:
338
- wrapped_func = _workflow_wrapper(reg, func)
353
+ wrapped_func = _workflow_wrapper(reg, func, max_recovery_attempts)
339
354
  reg.register_wf_function(func.__qualname__, wrapped_func)
340
355
  return wrapped_func
341
356
 
@@ -401,6 +416,7 @@ def _start_workflow(
401
416
  config_name=get_config_name(fi, func, gin_args),
402
417
  temp_wf_type=get_temp_workflow_type(func),
403
418
  queue=queue_name,
419
+ max_recovery_attempts=fi.max_recovery_attempts,
404
420
  )
405
421
 
406
422
  if not execute_workflow:
@@ -477,6 +493,9 @@ def _transaction(
477
493
  )
478
494
  )
479
495
  if recorded_output:
496
+ dbos.logger.debug(
497
+ f"Replaying transaction, id: {ctx.function_id}, name: {attributes['name']}"
498
+ )
480
499
  if recorded_output["error"]:
481
500
  deserialized_error = (
482
501
  utils.deserialize_exception(
@@ -493,6 +512,11 @@ def _transaction(
493
512
  raise Exception(
494
513
  "Output and error are both None"
495
514
  )
515
+ else:
516
+ dbos.logger.debug(
517
+ f"Running transaction, id: {ctx.function_id}, name: {attributes['name']}"
518
+ )
519
+
496
520
  output = func(*args, **kwargs)
497
521
  txn_output["output"] = utils.serialize(output)
498
522
  assert (
@@ -590,6 +614,9 @@ def _step(
590
614
  ctx.workflow_id, ctx.function_id
591
615
  )
592
616
  if recorded_output:
617
+ dbos.logger.debug(
618
+ f"Replaying step, id: {ctx.function_id}, name: {attributes['name']}"
619
+ )
593
620
  if recorded_output["error"] is not None:
594
621
  deserialized_error = utils.deserialize_exception(
595
622
  recorded_output["error"]
@@ -599,6 +626,11 @@ def _step(
599
626
  return utils.deserialize(recorded_output["output"])
600
627
  else:
601
628
  raise Exception("Output and error are both None")
629
+ else:
630
+ dbos.logger.debug(
631
+ f"Running step, id: {ctx.function_id}, name: {attributes['name']}"
632
+ )
633
+
602
634
  output = None
603
635
  error = None
604
636
  local_max_attempts = max_attempts if retries_allowed else 1
dbos/dbos.py CHANGED
@@ -5,6 +5,7 @@ import json
5
5
  import os
6
6
  import sys
7
7
  import threading
8
+ import traceback
8
9
  from concurrent.futures import ThreadPoolExecutor
9
10
  from dataclasses import dataclass
10
11
  from logging import Logger
@@ -42,6 +43,7 @@ from dbos.decorators import classproperty
42
43
  from dbos.queue import Queue, queue_thread
43
44
  from dbos.recovery import _recover_pending_workflows, _startup_recovery_thread
44
45
  from dbos.registrations import (
46
+ DEFAULT_MAX_RECOVERY_ATTEMPTS,
45
47
  DBOSClassInfo,
46
48
  get_or_create_class_info,
47
49
  set_dbos_func_name,
@@ -338,43 +340,47 @@ class DBOS:
338
340
  _dbos_global_instance._launch()
339
341
 
340
342
  def _launch(self) -> None:
341
- if self._launched:
342
- dbos_logger.warning(f"DBOS was already launched")
343
- return
344
- self._launched = True
345
- self._executor_field = ThreadPoolExecutor(max_workers=64)
346
- self._sys_db_field = SystemDatabase(self.config)
347
- self._app_db_field = ApplicationDatabase(self.config)
348
- self._admin_server_field = AdminServer(dbos=self)
349
-
350
- if not os.environ.get("DBOS__VMID"):
351
- workflow_ids = self._sys_db.get_pending_workflows("local")
352
- self._executor.submit(_startup_recovery_thread, self, workflow_ids)
353
-
354
- # Listen to notifications
355
- self._executor.submit(self._sys_db._notification_listener)
356
-
357
- # Start flush workflow buffers thread
358
- self._executor.submit(self._sys_db.flush_workflow_buffers)
359
-
360
- # Start the queue thread
361
- evt = threading.Event()
362
- self.stop_events.append(evt)
363
- self._executor.submit(queue_thread, evt, self)
364
-
365
- # Grab any pollers that were deferred and start them
366
- for evt, func, args, kwargs in self._registry.pollers:
343
+ try:
344
+ if self._launched:
345
+ dbos_logger.warning(f"DBOS was already launched")
346
+ return
347
+ self._launched = True
348
+ self._executor_field = ThreadPoolExecutor(max_workers=64)
349
+ self._sys_db_field = SystemDatabase(self.config)
350
+ self._app_db_field = ApplicationDatabase(self.config)
351
+ self._admin_server_field = AdminServer(dbos=self)
352
+
353
+ if not os.environ.get("DBOS__VMID"):
354
+ workflow_ids = self._sys_db.get_pending_workflows("local")
355
+ self._executor.submit(_startup_recovery_thread, self, workflow_ids)
356
+
357
+ # Listen to notifications
358
+ self._executor.submit(self._sys_db._notification_listener)
359
+
360
+ # Start flush workflow buffers thread
361
+ self._executor.submit(self._sys_db.flush_workflow_buffers)
362
+
363
+ # Start the queue thread
364
+ evt = threading.Event()
367
365
  self.stop_events.append(evt)
368
- self._executor.submit(func, *args, **kwargs)
369
- self._registry.pollers = []
366
+ self._executor.submit(queue_thread, evt, self)
370
367
 
371
- dbos_logger.info("DBOS launched")
368
+ # Grab any pollers that were deferred and start them
369
+ for evt, func, args, kwargs in self._registry.pollers:
370
+ self.stop_events.append(evt)
371
+ self._executor.submit(func, *args, **kwargs)
372
+ self._registry.pollers = []
372
373
 
373
- # Flush handlers and add OTLP to all loggers if enabled
374
- # to enable their export in DBOS Cloud
375
- for handler in dbos_logger.handlers:
376
- handler.flush()
377
- add_otlp_to_all_loggers()
374
+ dbos_logger.info("DBOS launched")
375
+
376
+ # Flush handlers and add OTLP to all loggers if enabled
377
+ # to enable their export in DBOS Cloud
378
+ for handler in dbos_logger.handlers:
379
+ handler.flush()
380
+ add_otlp_to_all_loggers()
381
+ except Exception:
382
+ dbos_logger.error(f"DBOS failed to launch: {traceback.format_exc()}")
383
+ raise
378
384
 
379
385
  def _destroy(self) -> None:
380
386
  self._initialized = False
@@ -401,9 +407,11 @@ class DBOS:
401
407
 
402
408
  # Decorators for DBOS functionality
403
409
  @classmethod
404
- def workflow(cls) -> Callable[[F], F]:
410
+ def workflow(
411
+ cls, *, max_recovery_attempts: int = DEFAULT_MAX_RECOVERY_ATTEMPTS
412
+ ) -> Callable[[F], F]:
405
413
  """Decorate a function for use as a DBOS workflow."""
406
- return _workflow(_get_or_create_dbos_registry())
414
+ return _workflow(_get_or_create_dbos_registry(), max_recovery_attempts)
407
415
 
408
416
  @classmethod
409
417
  def transaction(
@@ -542,6 +550,7 @@ class DBOS:
542
550
  recovery_attempts=stat["recovery_attempts"],
543
551
  class_name=stat["class_name"],
544
552
  config_name=stat["config_name"],
553
+ queue_name=stat["queue_name"],
545
554
  authenticated_user=stat["authenticated_user"],
546
555
  assumed_role=stat["assumed_role"],
547
556
  authenticated_roles=(
@@ -748,6 +757,7 @@ class WorkflowStatus:
748
757
  name(str): The workflow function name
749
758
  class_name(str): For member functions, the name of the class containing the workflow function
750
759
  config_name(str): For instance member functions, the name of the class instance for the execution
760
+ queue_name(str): For workflows that are or were queued, the queue name
751
761
  authenticated_user(str): The user who invoked the workflow
752
762
  assumed_role(str): The access role used by the user to allow access to the workflow function
753
763
  authenticated_roles(List[str]): List of all access roles available to the authenticated user
@@ -760,6 +770,7 @@ class WorkflowStatus:
760
770
  name: str
761
771
  class_name: Optional[str]
762
772
  config_name: Optional[str]
773
+ queue_name: Optional[str]
763
774
  authenticated_user: Optional[str]
764
775
  assumed_role: Optional[str]
765
776
  authenticated_roles: Optional[List[str]]
dbos/error.py CHANGED
@@ -32,6 +32,7 @@ class DBOSErrorCode(Enum):
32
32
  InitializationError = 3
33
33
  WorkflowFunctionNotFound = 4
34
34
  NonExistentWorkflowError = 5
35
+ DeadLetterQueueError = 6
35
36
  MaxStepRetriesExceeded = 7
36
37
  NotAuthorized = 8
37
38
 
@@ -86,6 +87,16 @@ class DBOSNonExistentWorkflowError(DBOSException):
86
87
  )
87
88
 
88
89
 
90
+ class DBOSDeadLetterQueueError(DBOSException):
91
+ """Exception raised when a workflow database record does not exist for a given ID."""
92
+
93
+ def __init__(self, wf_id: str, max_retries: int):
94
+ super().__init__(
95
+ f"Workflow {wf_id} has been moved to the dead-letter queue after exceeding the maximum of ${max_retries} retries",
96
+ dbos_error_code=DBOSErrorCode.DeadLetterQueueError.value,
97
+ )
98
+
99
+
89
100
  class DBOSNotAuthorizedError(DBOSException):
90
101
  """Exception raised by DBOS role-based security when the user is not authorized to access a function."""
91
102
 
@@ -1,4 +1,5 @@
1
- """fix_job_queue
1
+ """
2
+ Fix job queue PK.
2
3
 
3
4
  Revision ID: 50f3227f0b4b
4
5
  Revises: eab0cc1d9a14
@@ -0,0 +1,43 @@
1
+ """
2
+ Adjust workflow queue to add columns for rate limiter.
3
+
4
+ Revision ID: d76646551a6b
5
+ Revises: 50f3227f0b4b
6
+ Create Date: 2024-09-25 14:48:10.218015
7
+
8
+ """
9
+
10
+ from typing import Sequence, Union
11
+
12
+ import sqlalchemy as sa
13
+ from alembic import op
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "d76646551a6b"
17
+ down_revision: Union[str, None] = "50f3227f0b4b"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ op.add_column(
24
+ "job_queue",
25
+ sa.Column(
26
+ "started_at_epoch_ms",
27
+ sa.BigInteger(),
28
+ ),
29
+ schema="dbos",
30
+ )
31
+ op.add_column(
32
+ "job_queue",
33
+ sa.Column(
34
+ "completed_at_epoch_ms",
35
+ sa.BigInteger(),
36
+ ),
37
+ schema="dbos",
38
+ )
39
+
40
+
41
+ def downgrade() -> None:
42
+ op.drop_column("job_queue", "started_at_epoch_ms", schema="dbos")
43
+ op.drop_column("job_queue", "completed_at_epoch_ms", schema="dbos")
@@ -0,0 +1,28 @@
1
+ """workflow_queue
2
+
3
+ Revision ID: d76646551a6c
4
+ Revises: d76646551a6b
5
+ Create Date: 2024-09-27 12:00:00.0
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = "d76646551a6c"
16
+ down_revision: Union[str, None] = "d76646551a6b"
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ op.rename_table("job_queue", "workflow_queue", schema="dbos")
23
+ op.execute("CREATE VIEW dbos.job_queue AS SELECT * FROM dbos.workflow_queue;")
24
+
25
+
26
+ def downgrade() -> None:
27
+ op.execute("DROP VIEW dbos.job_queue;")
28
+ op.rename_table("workflow_queue", "job_queue", schema="dbos")
@@ -1,4 +1,5 @@
1
- """job_queue
1
+ """
2
+ Add workflow queue table.
2
3
 
3
4
  Revision ID: eab0cc1d9a14
4
5
  Revises: a3b18ad34abe
dbos/queue.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import threading
2
- import time
3
2
  import traceback
4
- from typing import TYPE_CHECKING, Optional
3
+ from typing import TYPE_CHECKING, Optional, TypedDict
5
4
 
6
5
  from dbos.core import P, R, _execute_workflow_id, _start_workflow
7
6
 
@@ -9,10 +8,35 @@ if TYPE_CHECKING:
9
8
  from dbos.dbos import DBOS, Workflow, WorkflowHandle
10
9
 
11
10
 
11
+ class QueueRateLimit(TypedDict):
12
+ """
13
+ Limit the maximum number of workflows from this queue that can be started in a given period.
14
+
15
+ If the limit is 5 and the period is 10, no more than 5 functions can be
16
+ started per 10 seconds.
17
+ """
18
+
19
+ limit: int
20
+ period: float
21
+
22
+
12
23
  class Queue:
13
- def __init__(self, name: str, concurrency: Optional[int] = None) -> None:
24
+ """
25
+ Workflow queue.
26
+
27
+ Workflow queues allow workflows to be started at a later time, based on concurrency and
28
+ rate limits.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ name: str,
34
+ concurrency: Optional[int] = None,
35
+ limiter: Optional[QueueRateLimit] = None,
36
+ ) -> None:
14
37
  self.name = name
15
38
  self.concurrency = concurrency
39
+ self.limiter = limiter
16
40
  from dbos.dbos import _get_or_create_dbos_registry
17
41
 
18
42
  registry = _get_or_create_dbos_registry()
@@ -29,12 +53,11 @@ class Queue:
29
53
 
30
54
  def queue_thread(stop_event: threading.Event, dbos: "DBOS") -> None:
31
55
  while not stop_event.is_set():
32
- time.sleep(1)
33
- for queue_name, queue in dbos._registry.queue_info_map.items():
56
+ if stop_event.wait(timeout=1):
57
+ return
58
+ for _, queue in dbos._registry.queue_info_map.items():
34
59
  try:
35
- wf_ids = dbos._sys_db.start_queued_workflows(
36
- queue_name, queue.concurrency
37
- )
60
+ wf_ids = dbos._sys_db.start_queued_workflows(queue)
38
61
  for id in wf_ids:
39
62
  _execute_workflow_id(dbos, id)
40
63
  except Exception:
dbos/registrations.py CHANGED
@@ -3,6 +3,8 @@ from enum import Enum
3
3
  from types import FunctionType
4
4
  from typing import Any, Callable, List, Literal, Optional, Tuple, Type, cast
5
5
 
6
+ DEFAULT_MAX_RECOVERY_ATTEMPTS = 50
7
+
6
8
 
7
9
  def get_dbos_func_name(f: Any) -> str:
8
10
  if hasattr(f, "dbos_function_name"):
@@ -47,6 +49,7 @@ class DBOSFuncInfo:
47
49
  self.class_info: Optional[DBOSClassInfo] = None
48
50
  self.func_type: DBOSFuncType = DBOSFuncType.Unknown
49
51
  self.required_roles: Optional[List[str]] = None
52
+ self.max_recovery_attempts = DEFAULT_MAX_RECOVERY_ATTEMPTS
50
53
 
51
54
 
52
55
  def get_or_create_class_info(cls: Type[Any]) -> DBOSClassInfo:
dbos/request.py CHANGED
@@ -13,6 +13,7 @@ class Address(NamedTuple):
13
13
  class Request:
14
14
  """
15
15
  Serializable HTTP Request object.
16
+
16
17
  Attributes:
17
18
  base_url(str): Base of URL requested, as in application code
18
19
  client(Optional[Address]): HTTP Client
@@ -2,6 +2,7 @@ import threading
2
2
  from datetime import datetime, timezone
3
3
  from typing import TYPE_CHECKING, Callable
4
4
 
5
+ from dbos.logger import dbos_logger
5
6
  from dbos.queue import Queue
6
7
 
7
8
  if TYPE_CHECKING:
@@ -18,7 +19,12 @@ scheduler_queue: Queue
18
19
  def scheduler_loop(
19
20
  func: ScheduledWorkflow, cron: str, stop_event: threading.Event
20
21
  ) -> None:
21
- iter = croniter(cron, datetime.now(timezone.utc), second_at_beginning=True)
22
+ try:
23
+ iter = croniter(cron, datetime.now(timezone.utc), second_at_beginning=True)
24
+ except Exception as e:
25
+ dbos_logger.error(
26
+ f'Cannot run scheduled function {func.__name__}. Invalid crontab "{cron}"'
27
+ )
22
28
  while not stop_event.is_set():
23
29
  nextExecTime = iter.get_next(datetime)
24
30
  sleepTime = nextExecTime - datetime.now(timezone.utc)
@@ -142,8 +142,8 @@ class SystemSchema:
142
142
  Column("last_run_time", BigInteger, nullable=False),
143
143
  )
144
144
 
145
- job_queue = Table(
146
- "job_queue",
145
+ workflow_queue = Table(
146
+ "workflow_queue",
147
147
  metadata_obj,
148
148
  Column(
149
149
  "workflow_uuid",
@@ -161,4 +161,12 @@ class SystemSchema:
161
161
  nullable=False,
162
162
  server_default=text("(EXTRACT(epoch FROM now()) * 1000::numeric)::bigint"),
163
163
  ),
164
+ Column(
165
+ "started_at_epoch_ms",
166
+ BigInteger(),
167
+ ),
168
+ Column(
169
+ "completed_at_epoch_ms",
170
+ BigInteger(),
171
+ ),
164
172
  )
dbos/system_database.py CHANGED
@@ -4,6 +4,7 @@ import threading
4
4
  import time
5
5
  from enum import Enum
6
6
  from typing import (
7
+ TYPE_CHECKING,
7
8
  Any,
8
9
  Dict,
9
10
  List,
@@ -24,12 +25,20 @@ from alembic.config import Config
24
25
  from sqlalchemy.exc import DBAPIError
25
26
 
26
27
  import dbos.utils as utils
27
- from dbos.error import DBOSNonExistentWorkflowError, DBOSWorkflowConflictIDError
28
+ from dbos.error import (
29
+ DBOSDeadLetterQueueError,
30
+ DBOSNonExistentWorkflowError,
31
+ DBOSWorkflowConflictIDError,
32
+ )
33
+ from dbos.registrations import DEFAULT_MAX_RECOVERY_ATTEMPTS
28
34
 
29
35
  from .dbos_config import ConfigFile
30
36
  from .logger import dbos_logger
31
37
  from .schemas.system_database import SystemSchema
32
38
 
39
+ if TYPE_CHECKING:
40
+ from .queue import Queue
41
+
33
42
 
34
43
  class WorkflowStatusString(Enum):
35
44
  """Enumeration of values allowed for `WorkflowSatusInternal.status`."""
@@ -233,7 +242,9 @@ class SystemDatabase:
233
242
  status: WorkflowStatusInternal,
234
243
  replace: bool = True,
235
244
  in_recovery: bool = False,
245
+ *,
236
246
  conn: Optional[sa.Connection] = None,
247
+ max_recovery_attempts: int = DEFAULT_MAX_RECOVERY_ATTEMPTS,
237
248
  ) -> None:
238
249
  cmd = pg.insert(SystemSchema.workflow_status).values(
239
250
  workflow_uuid=status["workflow_uuid"],
@@ -271,12 +282,36 @@ class SystemDatabase:
271
282
  )
272
283
  else:
273
284
  cmd = cmd.on_conflict_do_nothing()
285
+ cmd = cmd.returning(SystemSchema.workflow_status.c.recovery_attempts) # type: ignore
274
286
 
275
287
  if conn is not None:
276
- conn.execute(cmd)
288
+ results = conn.execute(cmd)
277
289
  else:
278
290
  with self.engine.begin() as c:
279
- c.execute(cmd)
291
+ results = c.execute(cmd)
292
+ if in_recovery:
293
+ row = results.fetchone()
294
+ if row is not None:
295
+ recovery_attempts: int = row[0]
296
+ if recovery_attempts > max_recovery_attempts:
297
+ with self.engine.begin() as c:
298
+ c.execute(
299
+ sa.update(SystemSchema.workflow_status)
300
+ .where(
301
+ SystemSchema.workflow_status.c.workflow_uuid
302
+ == status["workflow_uuid"]
303
+ )
304
+ .where(
305
+ SystemSchema.workflow_status.c.status
306
+ == WorkflowStatusString.PENDING.value
307
+ )
308
+ .values(
309
+ status=WorkflowStatusString.RETRIES_EXCEEDED.value,
310
+ )
311
+ )
312
+ raise DBOSDeadLetterQueueError(
313
+ status["workflow_uuid"], max_recovery_attempts
314
+ )
280
315
 
281
316
  # Record we have exported status for this single-transaction workflow
282
317
  if status["workflow_uuid"] in self._temp_txn_wf_ids:
@@ -617,7 +652,14 @@ class SystemDatabase:
617
652
  workflow_uuid, function_id, conn=c
618
653
  )
619
654
  if recorded_output is not None:
655
+ dbos_logger.debug(
656
+ f"Replaying send, id: {function_id}, destination_uuid: {destination_uuid}, topic: {topic}"
657
+ )
620
658
  return # Already sent before
659
+ else:
660
+ dbos_logger.debug(
661
+ f"Running send, id: {function_id}, destination_uuid: {destination_uuid}, topic: {topic}"
662
+ )
621
663
 
622
664
  try:
623
665
  c.execute(
@@ -653,10 +695,13 @@ class SystemDatabase:
653
695
  # First, check for previous executions.
654
696
  recorded_output = self.check_operation_execution(workflow_uuid, function_id)
655
697
  if recorded_output is not None:
698
+ dbos_logger.debug(f"Replaying recv, id: {function_id}, topic: {topic}")
656
699
  if recorded_output["output"] is not None:
657
700
  return utils.deserialize(recorded_output["output"])
658
701
  else:
659
702
  raise Exception("No output recorded in the last recv")
703
+ else:
704
+ dbos_logger.debug(f"Running recv, id: {function_id}, topic: {topic}")
660
705
 
661
706
  # Insert a condition to the notifications map, so the listener can notify it when a message is received.
662
707
  payload = f"{workflow_uuid}::{topic}"
@@ -799,9 +844,11 @@ class SystemDatabase:
799
844
  recorded_output = self.check_operation_execution(workflow_uuid, function_id)
800
845
  end_time: float
801
846
  if recorded_output is not None:
847
+ dbos_logger.debug(f"Replaying sleep, id: {function_id}, seconds: {seconds}")
802
848
  assert recorded_output["output"] is not None, "no recorded end time"
803
849
  end_time = utils.deserialize(recorded_output["output"])
804
850
  else:
851
+ dbos_logger.debug(f"Running sleep, id: {function_id}, seconds: {seconds}")
805
852
  end_time = time.time() + seconds
806
853
  try:
807
854
  self.record_operation_result(
@@ -831,7 +878,10 @@ class SystemDatabase:
831
878
  workflow_uuid, function_id, conn=c
832
879
  )
833
880
  if recorded_output is not None:
881
+ dbos_logger.debug(f"Replaying set_event, id: {function_id}, key: {key}")
834
882
  return # Already sent before
883
+ else:
884
+ dbos_logger.debug(f"Running set_event, id: {function_id}, key: {key}")
835
885
 
836
886
  c.execute(
837
887
  pg.insert(SystemSchema.workflow_events)
@@ -872,10 +922,17 @@ class SystemDatabase:
872
922
  caller_ctx["workflow_uuid"], caller_ctx["function_id"]
873
923
  )
874
924
  if recorded_output is not None:
925
+ dbos_logger.debug(
926
+ f"Replaying get_event, id: {caller_ctx['function_id']}, key: {key}"
927
+ )
875
928
  if recorded_output["output"] is not None:
876
929
  return utils.deserialize(recorded_output["output"])
877
930
  else:
878
931
  raise Exception("No output recorded in the last get_event")
932
+ else:
933
+ dbos_logger.debug(
934
+ f"Running get_event, id: {caller_ctx['function_id']}, key: {key}"
935
+ )
879
936
 
880
937
  payload = f"{target_uuid}::{key}"
881
938
  condition = threading.Condition()
@@ -926,7 +983,7 @@ class SystemDatabase:
926
983
  return value
927
984
 
928
985
  def _flush_workflow_status_buffer(self) -> None:
929
- """Export the workflow status buffer to the database, up to the batch size"""
986
+ """Export the workflow status buffer to the database, up to the batch size."""
930
987
  if len(self._workflow_status_buffer) == 0:
931
988
  return
932
989
 
@@ -1022,7 +1079,7 @@ class SystemDatabase:
1022
1079
  def enqueue(self, workflow_id: str, queue_name: str) -> None:
1023
1080
  with self.engine.begin() as c:
1024
1081
  c.execute(
1025
- pg.insert(SystemSchema.job_queue)
1082
+ pg.insert(SystemSchema.workflow_queue)
1026
1083
  .values(
1027
1084
  workflow_uuid=workflow_id,
1028
1085
  queue_name=queue_name,
@@ -1030,22 +1087,63 @@ class SystemDatabase:
1030
1087
  .on_conflict_do_nothing()
1031
1088
  )
1032
1089
 
1033
- def start_queued_workflows(
1034
- self, queue_name: str, concurrency: Optional[int]
1035
- ) -> List[str]:
1090
+ def start_queued_workflows(self, queue: "Queue") -> List[str]:
1091
+ start_time_ms = int(time.time() * 1000)
1092
+ if queue.limiter is not None:
1093
+ limiter_period_ms = int(queue.limiter["period"] * 1000)
1036
1094
  with self.engine.begin() as c:
1037
- query = sa.select(SystemSchema.job_queue.c.workflow_uuid).where(
1038
- SystemSchema.job_queue.c.queue_name == queue_name
1095
+ # Execute with snapshot isolation to ensure multiple workers respect limits
1096
+ c.execute(sa.text("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ"))
1097
+
1098
+ # If there is a limiter, compute how many functions have started in its period.
1099
+ if queue.limiter is not None:
1100
+ query = (
1101
+ sa.select(sa.func.count())
1102
+ .select_from(SystemSchema.workflow_queue)
1103
+ .where(SystemSchema.workflow_queue.c.queue_name == queue.name)
1104
+ .where(
1105
+ SystemSchema.workflow_queue.c.started_at_epoch_ms.isnot(None)
1106
+ )
1107
+ .where(
1108
+ SystemSchema.workflow_queue.c.started_at_epoch_ms
1109
+ > start_time_ms - limiter_period_ms
1110
+ )
1111
+ )
1112
+ num_recent_queries = c.execute(query).fetchone()[0] # type: ignore
1113
+ if num_recent_queries >= queue.limiter["limit"]:
1114
+ return []
1115
+
1116
+ # Select not-yet-completed functions in the queue ordered by the
1117
+ # time at which they were enqueued.
1118
+ # If there is a concurrency limit N, select only the N most recent
1119
+ # functions, else select all of them.
1120
+ query = (
1121
+ sa.select(
1122
+ SystemSchema.workflow_queue.c.workflow_uuid,
1123
+ SystemSchema.workflow_queue.c.started_at_epoch_ms,
1124
+ )
1125
+ .where(SystemSchema.workflow_queue.c.queue_name == queue.name)
1126
+ .where(SystemSchema.workflow_queue.c.completed_at_epoch_ms == None)
1127
+ .order_by(SystemSchema.workflow_queue.c.created_at_epoch_ms.asc())
1039
1128
  )
1040
- if concurrency is not None:
1041
- query = query.order_by(
1042
- SystemSchema.job_queue.c.created_at_epoch_ms.asc()
1043
- ).limit(concurrency)
1129
+ if queue.concurrency is not None:
1130
+ query = query.limit(queue.concurrency)
1131
+
1132
+ # From the functions retrieved, get the workflow IDs of the functions
1133
+ # that have not yet been started so we can start them.
1044
1134
  rows = c.execute(query).fetchall()
1045
- dequeued_ids: List[str] = [row[0] for row in rows]
1046
- ret_ids = []
1135
+ dequeued_ids: List[str] = [row[0] for row in rows if row[1] is None]
1136
+ ret_ids: list[str] = []
1047
1137
  for id in dequeued_ids:
1048
- result = c.execute(
1138
+
1139
+ # If we have a limiter, stop starting functions when the number
1140
+ # of functions started this period exceeds the limit.
1141
+ if queue.limiter is not None:
1142
+ if len(ret_ids) + num_recent_queries >= queue.limiter["limit"]:
1143
+ break
1144
+
1145
+ # To start a function, first set its status to PENDING
1146
+ c.execute(
1049
1147
  SystemSchema.workflow_status.update()
1050
1148
  .where(SystemSchema.workflow_status.c.workflow_uuid == id)
1051
1149
  .where(
@@ -1054,14 +1152,43 @@ class SystemDatabase:
1054
1152
  )
1055
1153
  .values(status=WorkflowStatusString.PENDING.value)
1056
1154
  )
1057
- if result.rowcount > 0:
1058
- ret_ids.append(id)
1155
+
1156
+ # Then give it a start time
1157
+ c.execute(
1158
+ SystemSchema.workflow_queue.update()
1159
+ .where(SystemSchema.workflow_queue.c.workflow_uuid == id)
1160
+ .values(started_at_epoch_ms=start_time_ms)
1161
+ )
1162
+ ret_ids.append(id)
1163
+
1164
+ # If we have a limiter, garbage-collect all completed functions started
1165
+ # before the period. If there's no limiter, there's no need--they were
1166
+ # deleted on completion.
1167
+ if queue.limiter is not None:
1168
+ c.execute(
1169
+ sa.delete(SystemSchema.workflow_queue)
1170
+ .where(SystemSchema.workflow_queue.c.completed_at_epoch_ms != None)
1171
+ .where(SystemSchema.workflow_queue.c.queue_name == queue.name)
1172
+ .where(
1173
+ SystemSchema.workflow_queue.c.started_at_epoch_ms
1174
+ < start_time_ms - limiter_period_ms
1175
+ )
1176
+ )
1177
+
1178
+ # Return the IDs of all functions we started
1059
1179
  return ret_ids
1060
1180
 
1061
- def remove_from_queue(self, workflow_id: str) -> None:
1181
+ def remove_from_queue(self, workflow_id: str, queue: "Queue") -> None:
1062
1182
  with self.engine.begin() as c:
1063
- c.execute(
1064
- sa.delete(SystemSchema.job_queue).where(
1065
- SystemSchema.job_queue.c.workflow_uuid == workflow_id
1183
+ if queue.limiter is None:
1184
+ c.execute(
1185
+ sa.delete(SystemSchema.workflow_queue).where(
1186
+ SystemSchema.workflow_queue.c.workflow_uuid == workflow_id
1187
+ )
1188
+ )
1189
+ else:
1190
+ c.execute(
1191
+ sa.update(SystemSchema.workflow_queue)
1192
+ .where(SystemSchema.workflow_queue.c.workflow_uuid == workflow_id)
1193
+ .values(completed_at_epoch_ms=int(time.time() * 1000))
1066
1194
  )
1067
- )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dbos
3
- Version: 0.8.0a3
3
+ Version: 0.8.0a10
4
4
  Summary: Ultra-lightweight durable execution in Python
5
5
  Author-Email: "DBOS, Inc." <contact@dbos.dev>
6
6
  License: MIT
@@ -1,18 +1,18 @@
1
- dbos-0.8.0a3.dist-info/METADATA,sha256=u3PDfNC0j5qeu05Uq1834FHAScD6ECq64z9_XOQqdiw,5010
2
- dbos-0.8.0a3.dist-info/WHEEL,sha256=Vza3XR51HW1KmFP0iIMUVYIvz0uQuKJpIXKYOBGQyFQ,90
3
- dbos-0.8.0a3.dist-info/entry_points.txt,sha256=z6GcVANQV7Uw_82H9Ob2axJX6V3imftyZsljdh-M1HU,54
4
- dbos-0.8.0a3.dist-info/licenses/LICENSE,sha256=VGZit_a5-kdw9WT6fY5jxAWVwGQzgLFyPWrcVVUhVNU,1067
1
+ dbos-0.8.0a10.dist-info/METADATA,sha256=Ql94ETlUkbD-PYbjc6ZvVlXj42jnHlLZqp-we0XwkF8,5011
2
+ dbos-0.8.0a10.dist-info/WHEEL,sha256=Vza3XR51HW1KmFP0iIMUVYIvz0uQuKJpIXKYOBGQyFQ,90
3
+ dbos-0.8.0a10.dist-info/entry_points.txt,sha256=z6GcVANQV7Uw_82H9Ob2axJX6V3imftyZsljdh-M1HU,54
4
+ dbos-0.8.0a10.dist-info/licenses/LICENSE,sha256=VGZit_a5-kdw9WT6fY5jxAWVwGQzgLFyPWrcVVUhVNU,1067
5
5
  dbos/__init__.py,sha256=-h1QgWNL11CiLlHEKa2ycAJVJw5SXYZ4BGNNWBAiE9k,726
6
6
  dbos/admin_sever.py,sha256=Qg5T3YRrbPW05PR_99yAaxgo1ugQrAp_uTeTqSfjm_k,3397
7
7
  dbos/application_database.py,sha256=knFK8We8y6WrIpnFCKvFq5hvSuFQqUuJqOqDpSVMCPI,5521
8
8
  dbos/cli.py,sha256=z5dXbbnGWzSC3E1rfS8Lp1_OIImzcDKM7jP-iu_Q4aI,8602
9
9
  dbos/context.py,sha256=4MsxZdoh1WIsgoUsaxo0B6caGN6xq2WC60MzbBppzGk,17738
10
- dbos/core.py,sha256=nV0w0wCkKm0VRVbUO0DgRnVqX5ue1Bn37UyAsdSXl48,29342
10
+ dbos/core.py,sha256=ggsRC2XicvNI1qqruEFoqxoTU5oSSnhMZvDih3AG_3A,30879
11
11
  dbos/dbos-config.schema.json,sha256=azpfmoDZg7WfSy3kvIsk9iEiKB_-VZt03VEOoXJAkqE,5331
12
- dbos/dbos.py,sha256=RtDcvKe4sm1TlnCGU4cyex-UI7hMMlhgzmOl1NuRLo4,29294
12
+ dbos/dbos.py,sha256=qQOHcgaT4rru1gZJy1nFrIvz8YzEhDfMJmN9eCskPZ4,29857
13
13
  dbos/dbos_config.py,sha256=NJVze2GkKgYUmcPP31Unb-QpsA0TzImEeQGJgVq6W6k,5352
14
14
  dbos/decorators.py,sha256=lbPefsLK6Cya4cb7TrOcLglOpGT3pc6qjZdsQKlfZLg,629
15
- dbos/error.py,sha256=c2y7d3Cbb-ZOO-M9txcCxuyPE1bdnwKrJnXgJXYb-pQ,3437
15
+ dbos/error.py,sha256=UETk8CoZL-TO2Utn1-E7OSWelhShWmKM-fOlODMR9PE,3893
16
16
  dbos/fastapi.py,sha256=gx9hlpxYOiwbuhSlbY9bn5C-F_FsCbrJvkX9ZAvDG6U,3418
17
17
  dbos/flask.py,sha256=azr4geMEGuuTBCyxIZmgDmmP-6s_pTIF-lGyp9Q4IB8,2430
18
18
  dbos/kafka.py,sha256=LH3hbNapnkjLcuXNUtdGU0398JafWb-t0GwUl3LOzkc,3645
@@ -20,22 +20,24 @@ dbos/kafka_message.py,sha256=NYvOXNG3Qn7bghn1pv3fg4Pbs86ILZGcK4IB-MLUNu0,409
20
20
  dbos/logger.py,sha256=D-aFSZUCHBP34J1IZ5YNkTrJW-rDiH3py_v9jLU4Yrk,3565
21
21
  dbos/migrations/env.py,sha256=38SIGVbmn_VV2x2u1aHLcPOoWgZ84eCymf3g_NljmbU,1626
22
22
  dbos/migrations/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
23
- dbos/migrations/versions/50f3227f0b4b_fix_job_queue.py,sha256=ZtnsZFMuon-D0n8V5BR10jQEqJPUsYsOwt29FAoKG8g,868
23
+ dbos/migrations/versions/50f3227f0b4b_fix_job_queue.py,sha256=ZBYrtTdxy64HxIAlOes89fVIk2P1gNaJack7wuC_epg,873
24
24
  dbos/migrations/versions/5c361fc04708_added_system_tables.py,sha256=QMgFMb0aLgC25YicsvPSr6AHRCA6Zd66hyaRUhwKzrQ,6404
25
25
  dbos/migrations/versions/a3b18ad34abe_added_triggers.py,sha256=Rv0ZsZYZ_WdgGEULYsPfnp4YzaO5L198gDTgYY39AVA,2022
26
- dbos/migrations/versions/eab0cc1d9a14_job_queue.py,sha256=_9-FCW-zOpCQfblTS_yRLtFiUaWlC1tM4BoKBTDeH9k,1395
26
+ dbos/migrations/versions/d76646551a6b_job_queue_limiter.py,sha256=8PyFi8rd6CN-mUro43wGhsg5wcQWKZPRHD6jw8R5pVc,986
27
+ dbos/migrations/versions/d76646551a6c_workflow_queue.py,sha256=G942nophZ2uC2vc4hGBC02Ptng1715roTjY3xiyzZU4,729
28
+ dbos/migrations/versions/eab0cc1d9a14_job_queue.py,sha256=uvhFOtqbBreCePhAxZfIT0qCAI7BiZTou9wt6QnbY7c,1412
27
29
  dbos/py.typed,sha256=QfzXT1Ktfk3Rj84akygc7_42z0lRpCq0Ilh8OXI6Zas,44
28
- dbos/queue.py,sha256=ngY1MN3xD7jAvEXlrl_D16FUPpP_vpRgbyERLfPyU9Y,1437
30
+ dbos/queue.py,sha256=DT5dFIDZGnC4GpgI1Tph8fh5VvClpokugv-2ow4qyiQ,1947
29
31
  dbos/recovery.py,sha256=zqtO_ExGoIErLMVnbneU3VeHLVWvhV4jnfqssAVlQQk,2016
30
- dbos/registrations.py,sha256=gMI-u05tv5bpvyddQGtoUgCsqARx51aOY7p0JXPafQo,6539
31
- dbos/request.py,sha256=-FIwtknayvRl6OjvqO4V2GySVzSdP1Ft3cc9ZBS-PLY,928
32
+ dbos/registrations.py,sha256=mei6q6_3R5uei8i_Wo_TqGZs85s10shOekDX41sFYD0,6642
33
+ dbos/request.py,sha256=cX1B3Atlh160phgS35gF1VEEV4pD126c9F3BDgBmxZU,929
32
34
  dbos/roles.py,sha256=7Lh7uwUq1dpa6TXCOHre4mPTd5qmXzK_QPkvYR52DXg,2285
33
35
  dbos/scheduler/croniter.py,sha256=hbhgfsHBqclUS8VeLnJ9PSE9Z54z6mi4nnrr1aUXn0k,47561
34
- dbos/scheduler/scheduler.py,sha256=Sz4EIpAtur7so2YajTic64GrTpa4qPw8QxXn0M34v80,1360
36
+ dbos/scheduler/scheduler.py,sha256=KpcBid6qIbqLqLdrQQqEQnRBTvo_XwtVuvUba3Ed5Go,1560
35
37
  dbos/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
38
  dbos/schemas/application_database.py,sha256=q_Wr2XbiZNBYFkOtu7uKavo1T_cSOBblxKGHThYGGsY,962
37
- dbos/schemas/system_database.py,sha256=ed4c1UntsD-cqXD0ekM4jvcYYEViavDh_G6c0pVDe7k,4938
38
- dbos/system_database.py,sha256=Os1-qqnrirKm_K37zuMgk14mm4ziXNYDjZnM1o8A450,41864
39
+ dbos/schemas/system_database.py,sha256=7iw7eHJzEvkatHMOaHORoSvtfisF73wW5j8hRt_Ph14,5126
40
+ dbos/system_database.py,sha256=jS0JV3HW2nxKlCVAahTkraXpUpfy1pv2eirzQFpX6J4,48067
39
41
  dbos/templates/hello/README.md,sha256=GhxhBj42wjTt1fWEtwNriHbJuKb66Vzu89G4pxNHw2g,930
40
42
  dbos/templates/hello/__package/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
43
  dbos/templates/hello/__package/main.py,sha256=eI0SS9Nwj-fldtiuSzIlIG6dC91GXXwdRsoHxv6S_WI,2719
@@ -49,4 +51,4 @@ dbos/templates/hello/start_postgres_docker.py,sha256=lQVLlYO5YkhGPEgPqwGc7Y8uDKs
49
51
  dbos/tracer.py,sha256=GaXDhdKKF_IQp5SAMipGXiDVwteRKjNbrXyYCH1mor0,2520
50
52
  dbos/utils.py,sha256=lwRymY-y7GprAS8pKmbICQvOJd5eGxKGTxCMFn0OwaQ,1739
51
53
  version/__init__.py,sha256=L4sNxecRuqdtSFdpUGX3TtBi9KL3k7YsZVIvv-fv9-A,1678
52
- dbos-0.8.0a3.dist-info/RECORD,,
54
+ dbos-0.8.0a10.dist-info/RECORD,,