dbos 1.15.0a9__py3-none-any.whl → 2.4.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dbos/_migration.py CHANGED
@@ -203,8 +203,39 @@ CREATE TABLE \"{schema}\".event_dispatch_kv (
203
203
  """
204
204
 
205
205
 
206
+ def get_dbos_migration_two(schema: str) -> str:
207
+ return f"""
208
+ ALTER TABLE \"{schema}\".workflow_status ADD COLUMN queue_partition_key TEXT;
209
+ """
210
+
211
+
212
+ def get_dbos_migration_three(schema: str) -> str:
213
+ return f"""
214
+ create index "idx_workflow_status_queue_status_started" on \"{schema}\"."workflow_status" ("queue_name", "status", "started_at_epoch_ms")
215
+ """
216
+
217
+
218
+ def get_dbos_migration_four(schema: str) -> str:
219
+ return f"""
220
+ ALTER TABLE \"{schema}\".workflow_status ADD COLUMN forked_from TEXT;
221
+ CREATE INDEX "idx_workflow_status_forked_from" ON \"{schema}\"."workflow_status" ("forked_from")
222
+ """
223
+
224
+
225
+ def get_dbos_migration_five(schema: str) -> str:
226
+ return f"""
227
+ ALTER TABLE \"{schema}\".operation_outputs ADD COLUMN started_at_epoch_ms BIGINT, ADD COLUMN completed_at_epoch_ms BIGINT;
228
+ """
229
+
230
+
206
231
  def get_dbos_migrations(schema: str) -> list[str]:
207
- return [get_dbos_migration_one(schema)]
232
+ return [
233
+ get_dbos_migration_one(schema),
234
+ get_dbos_migration_two(schema),
235
+ get_dbos_migration_three(schema),
236
+ get_dbos_migration_four(schema),
237
+ get_dbos_migration_five(schema),
238
+ ]
208
239
 
209
240
 
210
241
  def get_sqlite_timestamp_expr() -> str:
@@ -293,4 +324,30 @@ CREATE TABLE streams (
293
324
  );
294
325
  """
295
326
 
296
- sqlite_migrations = [sqlite_migration_one]
327
+ sqlite_migration_two = """
328
+ ALTER TABLE workflow_status ADD COLUMN queue_partition_key TEXT;
329
+ """
330
+
331
+ sqlite_migration_three = """
332
+ CREATE INDEX "idx_workflow_status_queue_status_started"
333
+ ON "workflow_status" ("queue_name", "status", "started_at_epoch_ms")
334
+ """
335
+
336
+ sqlite_migration_four = """
337
+ ALTER TABLE workflow_status ADD COLUMN forked_from TEXT;
338
+ CREATE INDEX "idx_workflow_status_forked_from" ON "workflow_status" ("forked_from")
339
+ """
340
+
341
+ sqlite_migration_five = """
342
+ ALTER TABLE operation_outputs ADD COLUMN started_at_epoch_ms BIGINT;
343
+ ALTER TABLE operation_outputs ADD COLUMN completed_at_epoch_ms BIGINT;
344
+ """
345
+
346
+
347
+ sqlite_migrations = [
348
+ sqlite_migration_one,
349
+ sqlite_migration_two,
350
+ sqlite_migration_three,
351
+ sqlite_migration_four,
352
+ sqlite_migration_five,
353
+ ]
dbos/_queue.py CHANGED
@@ -43,6 +43,7 @@ class Queue:
43
43
  *, # Disable positional arguments from here on
44
44
  worker_concurrency: Optional[int] = None,
45
45
  priority_enabled: bool = False,
46
+ partition_queue: bool = False,
46
47
  ) -> None:
47
48
  if (
48
49
  worker_concurrency is not None
@@ -57,6 +58,7 @@ class Queue:
57
58
  self.worker_concurrency = worker_concurrency
58
59
  self.limiter = limiter
59
60
  self.priority_enabled = priority_enabled
61
+ self.partition_queue = partition_queue
60
62
  from ._dbos import _get_or_create_dbos_registry
61
63
 
62
64
  registry = _get_or_create_dbos_registry()
@@ -78,6 +80,18 @@ class Queue:
78
80
  raise Exception(
79
81
  f"Priority is not enabled for queue {self.name}. Setting priority will not have any effect."
80
82
  )
83
+ if self.partition_queue and (
84
+ context is None or context.queue_partition_key is None
85
+ ):
86
+ raise Exception(
87
+ f"A workflow cannot be enqueued on partitioned queue {self.name} without a partition key"
88
+ )
89
+ if context and context.queue_partition_key and not self.partition_queue:
90
+ raise Exception(
91
+ f"You can only use a partition key on a partition-enabled queue. Key {context.queue_partition_key} was used with non-partitioned queue {self.name}"
92
+ )
93
+ if context and context.queue_partition_key and context.deduplication_id:
94
+ raise Exception("Deduplication is not supported for partitioned queues")
81
95
 
82
96
  dbos = _get_dbos_instance()
83
97
  return start_workflow(dbos, func, self.name, False, *args, **kwargs)
@@ -105,10 +119,21 @@ def queue_thread(stop_event: threading.Event, dbos: "DBOS") -> None:
105
119
  queues = dict(dbos._registry.queue_info_map)
106
120
  for _, queue in queues.items():
107
121
  try:
108
- wf_ids = dbos._sys_db.start_queued_workflows(
109
- queue, GlobalParams.executor_id, GlobalParams.app_version
110
- )
111
- for id in wf_ids:
122
+ if queue.partition_queue:
123
+ dequeued_workflows = []
124
+ queue_partition_keys = dbos._sys_db.get_queue_partitions(queue.name)
125
+ for key in queue_partition_keys:
126
+ dequeued_workflows += dbos._sys_db.start_queued_workflows(
127
+ queue,
128
+ GlobalParams.executor_id,
129
+ GlobalParams.app_version,
130
+ key,
131
+ )
132
+ else:
133
+ dequeued_workflows = dbos._sys_db.start_queued_workflows(
134
+ queue, GlobalParams.executor_id, GlobalParams.app_version, None
135
+ )
136
+ for id in dequeued_workflows:
112
137
  execute_workflow_by_id(dbos, id)
113
138
  except OperationalError as e:
114
139
  if isinstance(
dbos/_scheduler.py CHANGED
@@ -1,7 +1,8 @@
1
+ import random
1
2
  import threading
2
3
  import traceback
3
4
  from datetime import datetime, timezone
4
- from typing import TYPE_CHECKING, Callable
5
+ from typing import TYPE_CHECKING, Any, Callable, Coroutine
5
6
 
6
7
  from ._logger import dbos_logger
7
8
  from ._queue import Queue
@@ -13,30 +14,45 @@ from ._context import SetWorkflowID
13
14
  from ._croniter import croniter # type: ignore
14
15
  from ._registrations import get_dbos_func_name
15
16
 
16
- ScheduledWorkflow = Callable[[datetime, datetime], None]
17
-
18
- scheduler_queue: Queue
17
+ ScheduledWorkflow = (
18
+ Callable[[datetime, datetime], None]
19
+ | Callable[[datetime, datetime], Coroutine[Any, Any, None]]
20
+ )
19
21
 
20
22
 
21
23
  def scheduler_loop(
22
24
  func: ScheduledWorkflow, cron: str, stop_event: threading.Event
23
25
  ) -> None:
26
+ from dbos._dbos import _get_dbos_instance
27
+
28
+ dbos = _get_dbos_instance()
29
+ scheduler_queue = dbos._registry.get_internal_queue()
24
30
  try:
25
31
  iter = croniter(cron, datetime.now(timezone.utc), second_at_beginning=True)
26
- except Exception as e:
32
+ except Exception:
27
33
  dbos_logger.error(
28
34
  f'Cannot run scheduled function {get_dbos_func_name(func)}. Invalid crontab "{cron}"'
29
35
  )
36
+ raise
30
37
  while not stop_event.is_set():
31
- nextExecTime = iter.get_next(datetime)
32
- sleepTime = nextExecTime - datetime.now(timezone.utc)
33
- if stop_event.wait(timeout=sleepTime.total_seconds()):
38
+ next_exec_time = iter.get_next(datetime)
39
+ sleep_time = (next_exec_time - datetime.now(timezone.utc)).total_seconds()
40
+ sleep_time = max(0, sleep_time)
41
+ # To prevent a "thundering herd" problem in a distributed setting,
42
+ # apply jitter of up to 10% the sleep time, capped at 10 seconds
43
+ max_jitter = min(sleep_time / 10, 10)
44
+ jitter = random.uniform(0, max_jitter)
45
+ if stop_event.wait(timeout=sleep_time + jitter):
34
46
  return
35
47
  try:
36
- with SetWorkflowID(
37
- f"sched-{get_dbos_func_name(func)}-{nextExecTime.isoformat()}"
38
- ):
39
- scheduler_queue.enqueue(func, nextExecTime, datetime.now(timezone.utc))
48
+ workflowID = (
49
+ f"sched-{get_dbos_func_name(func)}-{next_exec_time.isoformat()}"
50
+ )
51
+ if not dbos._sys_db.get_workflow_status(workflowID):
52
+ with SetWorkflowID(workflowID):
53
+ scheduler_queue.enqueue(
54
+ func, next_exec_time, datetime.now(timezone.utc)
55
+ )
40
56
  except Exception:
41
57
  dbos_logger.warning(
42
58
  f"Exception encountered in scheduler thread: {traceback.format_exc()})"
@@ -49,13 +65,10 @@ def scheduled(
49
65
  def decorator(func: ScheduledWorkflow) -> ScheduledWorkflow:
50
66
  try:
51
67
  croniter(cron, datetime.now(timezone.utc), second_at_beginning=True)
52
- except Exception as e:
68
+ except Exception:
53
69
  raise ValueError(
54
70
  f'Invalid crontab "{cron}" for scheduled function function {get_dbos_func_name(func)}.'
55
71
  )
56
-
57
- global scheduler_queue
58
- scheduler_queue = dbosreg.get_internal_queue()
59
72
  stop_event = threading.Event()
60
73
  dbosreg.register_poller(stop_event, scheduler_loop, func, cron, stop_event)
61
74
  return func
@@ -77,6 +77,8 @@ class SystemSchema:
77
77
  Column("deduplication_id", Text(), nullable=True),
78
78
  Column("inputs", Text()),
79
79
  Column("priority", Integer(), nullable=False, server_default=text("'0'::int")),
80
+ Column("queue_partition_key", Text()),
81
+ Column("forked_from", Text()),
80
82
  Index("workflow_status_created_at_index", "created_at"),
81
83
  Index("workflow_status_executor_id_index", "executor_id"),
82
84
  Index("workflow_status_status_index", "status"),
@@ -103,6 +105,8 @@ class SystemSchema:
103
105
  Column("output", Text, nullable=True),
104
106
  Column("error", Text, nullable=True),
105
107
  Column("child_workflow_id", Text, nullable=True),
108
+ Column("started_at_epoch_ms", BigInteger, nullable=True),
109
+ Column("completed_at_epoch_ms", BigInteger, nullable=True),
106
110
  PrimaryKeyConstraint("workflow_uuid", "function_id"),
107
111
  )
108
112
 
dbos/_serialization.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import base64
2
2
  import pickle
3
- import types
3
+ from abc import ABC, abstractmethod
4
4
  from typing import Any, Dict, Optional, Tuple, TypedDict
5
5
 
6
6
  from ._logger import dbos_logger
@@ -11,47 +11,35 @@ class WorkflowInputs(TypedDict):
11
11
  kwargs: Dict[str, Any]
12
12
 
13
13
 
14
- def serialize(data: Any) -> str:
15
- pickled_data: bytes = pickle.dumps(data)
16
- encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
17
- return encoded_data
14
+ class Serializer(ABC):
18
15
 
16
+ @abstractmethod
17
+ def serialize(self, data: Any) -> str:
18
+ pass
19
19
 
20
- def serialize_args(data: WorkflowInputs) -> str:
21
- """Serialize args to a base64-encoded string using pickle."""
22
- pickled_data: bytes = pickle.dumps(data)
23
- encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
24
- return encoded_data
20
+ @abstractmethod
21
+ def deserialize(cls, serialized_data: str) -> Any:
22
+ pass
25
23
 
26
24
 
27
- def serialize_exception(data: Exception) -> str:
28
- """Serialize an Exception object to a base64-encoded string using pickle."""
29
- pickled_data: bytes = pickle.dumps(data)
30
- encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
31
- return encoded_data
25
+ class DefaultSerializer(Serializer):
32
26
 
27
+ def serialize(self, data: Any) -> str:
28
+ try:
29
+ pickled_data: bytes = pickle.dumps(data)
30
+ encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
31
+ return encoded_data
32
+ except Exception as e:
33
+ dbos_logger.error(f"Error serializing object: {data}", exc_info=e)
34
+ raise
33
35
 
34
- def deserialize(serialized_data: str) -> Any:
35
- """Deserialize a base64-encoded string back to a Python object using pickle."""
36
- pickled_data: bytes = base64.b64decode(serialized_data)
37
- return pickle.loads(pickled_data)
38
-
39
-
40
- def deserialize_args(serialized_data: str) -> WorkflowInputs:
41
- """Deserialize a base64-encoded string back to a Python object list using pickle."""
42
- pickled_data: bytes = base64.b64decode(serialized_data)
43
- args: WorkflowInputs = pickle.loads(pickled_data)
44
- return args
45
-
46
-
47
- def deserialize_exception(serialized_data: str) -> Exception:
48
- """Deserialize a base64-encoded string back to a Python Exception using pickle."""
49
- pickled_data: bytes = base64.b64decode(serialized_data)
50
- exc: Exception = pickle.loads(pickled_data)
51
- return exc
36
+ def deserialize(cls, serialized_data: str) -> Any:
37
+ pickled_data: bytes = base64.b64decode(serialized_data)
38
+ return pickle.loads(pickled_data)
52
39
 
53
40
 
54
41
  def safe_deserialize(
42
+ serializer: Serializer,
55
43
  workflow_id: str,
56
44
  *,
57
45
  serialized_input: Optional[str],
@@ -68,7 +56,9 @@ def safe_deserialize(
68
56
  input: Optional[WorkflowInputs]
69
57
  try:
70
58
  input = (
71
- deserialize_args(serialized_input) if serialized_input is not None else None
59
+ serializer.deserialize(serialized_input)
60
+ if serialized_input is not None
61
+ else None
72
62
  )
73
63
  except Exception as e:
74
64
  dbos_logger.warning(
@@ -78,7 +68,9 @@ def safe_deserialize(
78
68
  output: Optional[Any]
79
69
  try:
80
70
  output = (
81
- deserialize(serialized_output) if serialized_output is not None else None
71
+ serializer.deserialize(serialized_output)
72
+ if serialized_output is not None
73
+ else None
82
74
  )
83
75
  except Exception as e:
84
76
  dbos_logger.warning(
@@ -88,7 +80,7 @@ def safe_deserialize(
88
80
  exception: Optional[Exception]
89
81
  try:
90
82
  exception = (
91
- deserialize_exception(serialized_exception)
83
+ serializer.deserialize(serialized_exception)
92
84
  if serialized_exception is not None
93
85
  else None
94
86
  )