dbos 2.1.0a2__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dbos/_queue.py CHANGED
@@ -43,6 +43,7 @@ class Queue:
43
43
  *, # Disable positional arguments from here on
44
44
  worker_concurrency: Optional[int] = None,
45
45
  priority_enabled: bool = False,
46
+ partition_queue: bool = False,
46
47
  ) -> None:
47
48
  if (
48
49
  worker_concurrency is not None
@@ -57,6 +58,7 @@ class Queue:
57
58
  self.worker_concurrency = worker_concurrency
58
59
  self.limiter = limiter
59
60
  self.priority_enabled = priority_enabled
61
+ self.partition_queue = partition_queue
60
62
  from ._dbos import _get_or_create_dbos_registry
61
63
 
62
64
  registry = _get_or_create_dbos_registry()
@@ -78,6 +80,18 @@ class Queue:
78
80
  raise Exception(
79
81
  f"Priority is not enabled for queue {self.name}. Setting priority will not have any effect."
80
82
  )
83
+ if self.partition_queue and (
84
+ context is None or context.queue_partition_key is None
85
+ ):
86
+ raise Exception(
87
+ f"A workflow cannot be enqueued on partitioned queue {self.name} without a partition key"
88
+ )
89
+ if context and context.queue_partition_key and not self.partition_queue:
90
+ raise Exception(
91
+ f"You can only use a partition key on a partition-enabled queue. Key {context.queue_partition_key} was used with non-partitioned queue {self.name}"
92
+ )
93
+ if context and context.queue_partition_key and context.deduplication_id:
94
+ raise Exception("Deduplication is not supported for partitioned queues")
81
95
 
82
96
  dbos = _get_dbos_instance()
83
97
  return start_workflow(dbos, func, self.name, False, *args, **kwargs)
@@ -105,10 +119,21 @@ def queue_thread(stop_event: threading.Event, dbos: "DBOS") -> None:
105
119
  queues = dict(dbos._registry.queue_info_map)
106
120
  for _, queue in queues.items():
107
121
  try:
108
- wf_ids = dbos._sys_db.start_queued_workflows(
109
- queue, GlobalParams.executor_id, GlobalParams.app_version
110
- )
111
- for id in wf_ids:
122
+ if queue.partition_queue:
123
+ dequeued_workflows = []
124
+ queue_partition_keys = dbos._sys_db.get_queue_partitions(queue.name)
125
+ for key in queue_partition_keys:
126
+ dequeued_workflows += dbos._sys_db.start_queued_workflows(
127
+ queue,
128
+ GlobalParams.executor_id,
129
+ GlobalParams.app_version,
130
+ key,
131
+ )
132
+ else:
133
+ dequeued_workflows = dbos._sys_db.start_queued_workflows(
134
+ queue, GlobalParams.executor_id, GlobalParams.app_version, None
135
+ )
136
+ for id in dequeued_workflows:
112
137
  execute_workflow_by_id(dbos, id)
113
138
  except OperationalError as e:
114
139
  if isinstance(
dbos/_scheduler.py CHANGED
@@ -1,3 +1,4 @@
1
+ import random
1
2
  import threading
2
3
  import traceback
3
4
  from datetime import datetime, timezone
@@ -15,28 +16,40 @@ from ._registrations import get_dbos_func_name
15
16
 
16
17
  ScheduledWorkflow = Callable[[datetime, datetime], None]
17
18
 
18
- scheduler_queue: Queue
19
-
20
19
 
21
20
  def scheduler_loop(
22
21
  func: ScheduledWorkflow, cron: str, stop_event: threading.Event
23
22
  ) -> None:
23
+ from dbos._dbos import _get_dbos_instance
24
+
25
+ dbos = _get_dbos_instance()
26
+ scheduler_queue = dbos._registry.get_internal_queue()
24
27
  try:
25
28
  iter = croniter(cron, datetime.now(timezone.utc), second_at_beginning=True)
26
- except Exception as e:
29
+ except Exception:
27
30
  dbos_logger.error(
28
31
  f'Cannot run scheduled function {get_dbos_func_name(func)}. Invalid crontab "{cron}"'
29
32
  )
33
+ raise
30
34
  while not stop_event.is_set():
31
- nextExecTime = iter.get_next(datetime)
32
- sleepTime = nextExecTime - datetime.now(timezone.utc)
33
- if stop_event.wait(timeout=sleepTime.total_seconds()):
35
+ next_exec_time = iter.get_next(datetime)
36
+ sleep_time = (next_exec_time - datetime.now(timezone.utc)).total_seconds()
37
+ sleep_time = max(0, sleep_time)
38
+ # To prevent a "thundering herd" problem in a distributed setting,
39
+ # apply jitter of up to 10% the sleep time, capped at 10 seconds
40
+ max_jitter = min(sleep_time / 10, 10)
41
+ jitter = random.uniform(0, max_jitter)
42
+ if stop_event.wait(timeout=sleep_time + jitter):
34
43
  return
35
44
  try:
36
- with SetWorkflowID(
37
- f"sched-{get_dbos_func_name(func)}-{nextExecTime.isoformat()}"
38
- ):
39
- scheduler_queue.enqueue(func, nextExecTime, datetime.now(timezone.utc))
45
+ workflowID = (
46
+ f"sched-{get_dbos_func_name(func)}-{next_exec_time.isoformat()}"
47
+ )
48
+ if not dbos._sys_db.get_workflow_status(workflowID):
49
+ with SetWorkflowID(workflowID):
50
+ scheduler_queue.enqueue(
51
+ func, next_exec_time, datetime.now(timezone.utc)
52
+ )
40
53
  except Exception:
41
54
  dbos_logger.warning(
42
55
  f"Exception encountered in scheduler thread: {traceback.format_exc()})"
@@ -49,13 +62,10 @@ def scheduled(
49
62
  def decorator(func: ScheduledWorkflow) -> ScheduledWorkflow:
50
63
  try:
51
64
  croniter(cron, datetime.now(timezone.utc), second_at_beginning=True)
52
- except Exception as e:
65
+ except Exception:
53
66
  raise ValueError(
54
67
  f'Invalid crontab "{cron}" for scheduled function function {get_dbos_func_name(func)}.'
55
68
  )
56
-
57
- global scheduler_queue
58
- scheduler_queue = dbosreg.get_internal_queue()
59
69
  stop_event = threading.Event()
60
70
  dbosreg.register_poller(stop_event, scheduler_loop, func, cron, stop_event)
61
71
  return func
@@ -77,6 +77,7 @@ class SystemSchema:
77
77
  Column("deduplication_id", Text(), nullable=True),
78
78
  Column("inputs", Text()),
79
79
  Column("priority", Integer(), nullable=False, server_default=text("'0'::int")),
80
+ Column("queue_partition_key", Text()),
80
81
  Index("workflow_status_created_at_index", "created_at"),
81
82
  Index("workflow_status_executor_id_index", "executor_id"),
82
83
  Index("workflow_status_status_index", "status"),
dbos/_serialization.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import base64
2
2
  import pickle
3
- import types
3
+ from abc import ABC, abstractmethod
4
4
  from typing import Any, Dict, Optional, Tuple, TypedDict
5
5
 
6
6
  from ._logger import dbos_logger
@@ -11,47 +11,31 @@ class WorkflowInputs(TypedDict):
11
11
  kwargs: Dict[str, Any]
12
12
 
13
13
 
14
- def serialize(data: Any) -> str:
15
- pickled_data: bytes = pickle.dumps(data)
16
- encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
17
- return encoded_data
14
+ class Serializer(ABC):
18
15
 
16
+ @abstractmethod
17
+ def serialize(self, data: Any) -> str:
18
+ pass
19
19
 
20
- def serialize_args(data: WorkflowInputs) -> str:
21
- """Serialize args to a base64-encoded string using pickle."""
22
- pickled_data: bytes = pickle.dumps(data)
23
- encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
24
- return encoded_data
20
+ @abstractmethod
21
+ def deserialize(cls, serialized_data: str) -> Any:
22
+ pass
25
23
 
26
24
 
27
- def serialize_exception(data: Exception) -> str:
28
- """Serialize an Exception object to a base64-encoded string using pickle."""
29
- pickled_data: bytes = pickle.dumps(data)
30
- encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
31
- return encoded_data
25
+ class DefaultSerializer(Serializer):
32
26
 
27
+ def serialize(self, data: Any) -> str:
28
+ pickled_data: bytes = pickle.dumps(data)
29
+ encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
30
+ return encoded_data
33
31
 
34
- def deserialize(serialized_data: str) -> Any:
35
- """Deserialize a base64-encoded string back to a Python object using pickle."""
36
- pickled_data: bytes = base64.b64decode(serialized_data)
37
- return pickle.loads(pickled_data)
38
-
39
-
40
- def deserialize_args(serialized_data: str) -> WorkflowInputs:
41
- """Deserialize a base64-encoded string back to a Python object list using pickle."""
42
- pickled_data: bytes = base64.b64decode(serialized_data)
43
- args: WorkflowInputs = pickle.loads(pickled_data)
44
- return args
45
-
46
-
47
- def deserialize_exception(serialized_data: str) -> Exception:
48
- """Deserialize a base64-encoded string back to a Python Exception using pickle."""
49
- pickled_data: bytes = base64.b64decode(serialized_data)
50
- exc: Exception = pickle.loads(pickled_data)
51
- return exc
32
+ def deserialize(cls, serialized_data: str) -> Any:
33
+ pickled_data: bytes = base64.b64decode(serialized_data)
34
+ return pickle.loads(pickled_data)
52
35
 
53
36
 
54
37
  def safe_deserialize(
38
+ serializer: Serializer,
55
39
  workflow_id: str,
56
40
  *,
57
41
  serialized_input: Optional[str],
@@ -68,7 +52,9 @@ def safe_deserialize(
68
52
  input: Optional[WorkflowInputs]
69
53
  try:
70
54
  input = (
71
- deserialize_args(serialized_input) if serialized_input is not None else None
55
+ serializer.deserialize(serialized_input)
56
+ if serialized_input is not None
57
+ else None
72
58
  )
73
59
  except Exception as e:
74
60
  dbos_logger.warning(
@@ -78,7 +64,9 @@ def safe_deserialize(
78
64
  output: Optional[Any]
79
65
  try:
80
66
  output = (
81
- deserialize(serialized_output) if serialized_output is not None else None
67
+ serializer.deserialize(serialized_output)
68
+ if serialized_output is not None
69
+ else None
82
70
  )
83
71
  except Exception as e:
84
72
  dbos_logger.warning(
@@ -88,7 +76,7 @@ def safe_deserialize(
88
76
  exception: Optional[Exception]
89
77
  try:
90
78
  exception = (
91
- deserialize_exception(serialized_exception)
79
+ serializer.deserialize(serialized_exception)
92
80
  if serialized_exception is not None
93
81
  else None
94
82
  )