dbos 2.1.0a2__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbos/__init__.py +2 -0
- dbos/_app_db.py +40 -45
- dbos/_client.py +11 -4
- dbos/_context.py +8 -0
- dbos/_core.py +38 -26
- dbos/_dbos.py +15 -0
- dbos/_dbos_config.py +4 -10
- dbos/_migration.py +12 -2
- dbos/_queue.py +29 -4
- dbos/_scheduler.py +24 -14
- dbos/_schemas/system_database.py +1 -0
- dbos/_serialization.py +24 -36
- dbos/_sys_db.py +124 -60
- dbos/cli/migration.py +3 -0
- {dbos-2.1.0a2.dist-info → dbos-2.2.0.dist-info}/METADATA +1 -1
- {dbos-2.1.0a2.dist-info → dbos-2.2.0.dist-info}/RECORD +19 -19
- {dbos-2.1.0a2.dist-info → dbos-2.2.0.dist-info}/WHEEL +0 -0
- {dbos-2.1.0a2.dist-info → dbos-2.2.0.dist-info}/entry_points.txt +0 -0
- {dbos-2.1.0a2.dist-info → dbos-2.2.0.dist-info}/licenses/LICENSE +0 -0
dbos/_queue.py
CHANGED
@@ -43,6 +43,7 @@ class Queue:
|
|
43
43
|
*, # Disable positional arguments from here on
|
44
44
|
worker_concurrency: Optional[int] = None,
|
45
45
|
priority_enabled: bool = False,
|
46
|
+
partition_queue: bool = False,
|
46
47
|
) -> None:
|
47
48
|
if (
|
48
49
|
worker_concurrency is not None
|
@@ -57,6 +58,7 @@ class Queue:
|
|
57
58
|
self.worker_concurrency = worker_concurrency
|
58
59
|
self.limiter = limiter
|
59
60
|
self.priority_enabled = priority_enabled
|
61
|
+
self.partition_queue = partition_queue
|
60
62
|
from ._dbos import _get_or_create_dbos_registry
|
61
63
|
|
62
64
|
registry = _get_or_create_dbos_registry()
|
@@ -78,6 +80,18 @@ class Queue:
|
|
78
80
|
raise Exception(
|
79
81
|
f"Priority is not enabled for queue {self.name}. Setting priority will not have any effect."
|
80
82
|
)
|
83
|
+
if self.partition_queue and (
|
84
|
+
context is None or context.queue_partition_key is None
|
85
|
+
):
|
86
|
+
raise Exception(
|
87
|
+
f"A workflow cannot be enqueued on partitioned queue {self.name} without a partition key"
|
88
|
+
)
|
89
|
+
if context and context.queue_partition_key and not self.partition_queue:
|
90
|
+
raise Exception(
|
91
|
+
f"You can only use a partition key on a partition-enabled queue. Key {context.queue_partition_key} was used with non-partitioned queue {self.name}"
|
92
|
+
)
|
93
|
+
if context and context.queue_partition_key and context.deduplication_id:
|
94
|
+
raise Exception("Deduplication is not supported for partitioned queues")
|
81
95
|
|
82
96
|
dbos = _get_dbos_instance()
|
83
97
|
return start_workflow(dbos, func, self.name, False, *args, **kwargs)
|
@@ -105,10 +119,21 @@ def queue_thread(stop_event: threading.Event, dbos: "DBOS") -> None:
|
|
105
119
|
queues = dict(dbos._registry.queue_info_map)
|
106
120
|
for _, queue in queues.items():
|
107
121
|
try:
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
122
|
+
if queue.partition_queue:
|
123
|
+
dequeued_workflows = []
|
124
|
+
queue_partition_keys = dbos._sys_db.get_queue_partitions(queue.name)
|
125
|
+
for key in queue_partition_keys:
|
126
|
+
dequeued_workflows += dbos._sys_db.start_queued_workflows(
|
127
|
+
queue,
|
128
|
+
GlobalParams.executor_id,
|
129
|
+
GlobalParams.app_version,
|
130
|
+
key,
|
131
|
+
)
|
132
|
+
else:
|
133
|
+
dequeued_workflows = dbos._sys_db.start_queued_workflows(
|
134
|
+
queue, GlobalParams.executor_id, GlobalParams.app_version, None
|
135
|
+
)
|
136
|
+
for id in dequeued_workflows:
|
112
137
|
execute_workflow_by_id(dbos, id)
|
113
138
|
except OperationalError as e:
|
114
139
|
if isinstance(
|
dbos/_scheduler.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import random
|
1
2
|
import threading
|
2
3
|
import traceback
|
3
4
|
from datetime import datetime, timezone
|
@@ -15,28 +16,40 @@ from ._registrations import get_dbos_func_name
|
|
15
16
|
|
16
17
|
ScheduledWorkflow = Callable[[datetime, datetime], None]
|
17
18
|
|
18
|
-
scheduler_queue: Queue
|
19
|
-
|
20
19
|
|
21
20
|
def scheduler_loop(
|
22
21
|
func: ScheduledWorkflow, cron: str, stop_event: threading.Event
|
23
22
|
) -> None:
|
23
|
+
from dbos._dbos import _get_dbos_instance
|
24
|
+
|
25
|
+
dbos = _get_dbos_instance()
|
26
|
+
scheduler_queue = dbos._registry.get_internal_queue()
|
24
27
|
try:
|
25
28
|
iter = croniter(cron, datetime.now(timezone.utc), second_at_beginning=True)
|
26
|
-
except Exception
|
29
|
+
except Exception:
|
27
30
|
dbos_logger.error(
|
28
31
|
f'Cannot run scheduled function {get_dbos_func_name(func)}. Invalid crontab "{cron}"'
|
29
32
|
)
|
33
|
+
raise
|
30
34
|
while not stop_event.is_set():
|
31
|
-
|
32
|
-
|
33
|
-
|
35
|
+
next_exec_time = iter.get_next(datetime)
|
36
|
+
sleep_time = (next_exec_time - datetime.now(timezone.utc)).total_seconds()
|
37
|
+
sleep_time = max(0, sleep_time)
|
38
|
+
# To prevent a "thundering herd" problem in a distributed setting,
|
39
|
+
# apply jitter of up to 10% the sleep time, capped at 10 seconds
|
40
|
+
max_jitter = min(sleep_time / 10, 10)
|
41
|
+
jitter = random.uniform(0, max_jitter)
|
42
|
+
if stop_event.wait(timeout=sleep_time + jitter):
|
34
43
|
return
|
35
44
|
try:
|
36
|
-
|
37
|
-
f"sched-{get_dbos_func_name(func)}-{
|
38
|
-
)
|
39
|
-
|
45
|
+
workflowID = (
|
46
|
+
f"sched-{get_dbos_func_name(func)}-{next_exec_time.isoformat()}"
|
47
|
+
)
|
48
|
+
if not dbos._sys_db.get_workflow_status(workflowID):
|
49
|
+
with SetWorkflowID(workflowID):
|
50
|
+
scheduler_queue.enqueue(
|
51
|
+
func, next_exec_time, datetime.now(timezone.utc)
|
52
|
+
)
|
40
53
|
except Exception:
|
41
54
|
dbos_logger.warning(
|
42
55
|
f"Exception encountered in scheduler thread: {traceback.format_exc()})"
|
@@ -49,13 +62,10 @@ def scheduled(
|
|
49
62
|
def decorator(func: ScheduledWorkflow) -> ScheduledWorkflow:
|
50
63
|
try:
|
51
64
|
croniter(cron, datetime.now(timezone.utc), second_at_beginning=True)
|
52
|
-
except Exception
|
65
|
+
except Exception:
|
53
66
|
raise ValueError(
|
54
67
|
f'Invalid crontab "{cron}" for scheduled function function {get_dbos_func_name(func)}.'
|
55
68
|
)
|
56
|
-
|
57
|
-
global scheduler_queue
|
58
|
-
scheduler_queue = dbosreg.get_internal_queue()
|
59
69
|
stop_event = threading.Event()
|
60
70
|
dbosreg.register_poller(stop_event, scheduler_loop, func, cron, stop_event)
|
61
71
|
return func
|
dbos/_schemas/system_database.py
CHANGED
@@ -77,6 +77,7 @@ class SystemSchema:
|
|
77
77
|
Column("deduplication_id", Text(), nullable=True),
|
78
78
|
Column("inputs", Text()),
|
79
79
|
Column("priority", Integer(), nullable=False, server_default=text("'0'::int")),
|
80
|
+
Column("queue_partition_key", Text()),
|
80
81
|
Index("workflow_status_created_at_index", "created_at"),
|
81
82
|
Index("workflow_status_executor_id_index", "executor_id"),
|
82
83
|
Index("workflow_status_status_index", "status"),
|
dbos/_serialization.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import base64
|
2
2
|
import pickle
|
3
|
-
import
|
3
|
+
from abc import ABC, abstractmethod
|
4
4
|
from typing import Any, Dict, Optional, Tuple, TypedDict
|
5
5
|
|
6
6
|
from ._logger import dbos_logger
|
@@ -11,47 +11,31 @@ class WorkflowInputs(TypedDict):
|
|
11
11
|
kwargs: Dict[str, Any]
|
12
12
|
|
13
13
|
|
14
|
-
|
15
|
-
pickled_data: bytes = pickle.dumps(data)
|
16
|
-
encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
|
17
|
-
return encoded_data
|
14
|
+
class Serializer(ABC):
|
18
15
|
|
16
|
+
@abstractmethod
|
17
|
+
def serialize(self, data: Any) -> str:
|
18
|
+
pass
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
|
24
|
-
return encoded_data
|
20
|
+
@abstractmethod
|
21
|
+
def deserialize(cls, serialized_data: str) -> Any:
|
22
|
+
pass
|
25
23
|
|
26
24
|
|
27
|
-
|
28
|
-
"""Serialize an Exception object to a base64-encoded string using pickle."""
|
29
|
-
pickled_data: bytes = pickle.dumps(data)
|
30
|
-
encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
|
31
|
-
return encoded_data
|
25
|
+
class DefaultSerializer(Serializer):
|
32
26
|
|
27
|
+
def serialize(self, data: Any) -> str:
|
28
|
+
pickled_data: bytes = pickle.dumps(data)
|
29
|
+
encoded_data: str = base64.b64encode(pickled_data).decode("utf-8")
|
30
|
+
return encoded_data
|
33
31
|
|
34
|
-
def deserialize(serialized_data: str) -> Any:
|
35
|
-
|
36
|
-
|
37
|
-
return pickle.loads(pickled_data)
|
38
|
-
|
39
|
-
|
40
|
-
def deserialize_args(serialized_data: str) -> WorkflowInputs:
|
41
|
-
"""Deserialize a base64-encoded string back to a Python object list using pickle."""
|
42
|
-
pickled_data: bytes = base64.b64decode(serialized_data)
|
43
|
-
args: WorkflowInputs = pickle.loads(pickled_data)
|
44
|
-
return args
|
45
|
-
|
46
|
-
|
47
|
-
def deserialize_exception(serialized_data: str) -> Exception:
|
48
|
-
"""Deserialize a base64-encoded string back to a Python Exception using pickle."""
|
49
|
-
pickled_data: bytes = base64.b64decode(serialized_data)
|
50
|
-
exc: Exception = pickle.loads(pickled_data)
|
51
|
-
return exc
|
32
|
+
def deserialize(cls, serialized_data: str) -> Any:
|
33
|
+
pickled_data: bytes = base64.b64decode(serialized_data)
|
34
|
+
return pickle.loads(pickled_data)
|
52
35
|
|
53
36
|
|
54
37
|
def safe_deserialize(
|
38
|
+
serializer: Serializer,
|
55
39
|
workflow_id: str,
|
56
40
|
*,
|
57
41
|
serialized_input: Optional[str],
|
@@ -68,7 +52,9 @@ def safe_deserialize(
|
|
68
52
|
input: Optional[WorkflowInputs]
|
69
53
|
try:
|
70
54
|
input = (
|
71
|
-
|
55
|
+
serializer.deserialize(serialized_input)
|
56
|
+
if serialized_input is not None
|
57
|
+
else None
|
72
58
|
)
|
73
59
|
except Exception as e:
|
74
60
|
dbos_logger.warning(
|
@@ -78,7 +64,9 @@ def safe_deserialize(
|
|
78
64
|
output: Optional[Any]
|
79
65
|
try:
|
80
66
|
output = (
|
81
|
-
deserialize(serialized_output)
|
67
|
+
serializer.deserialize(serialized_output)
|
68
|
+
if serialized_output is not None
|
69
|
+
else None
|
82
70
|
)
|
83
71
|
except Exception as e:
|
84
72
|
dbos_logger.warning(
|
@@ -88,7 +76,7 @@ def safe_deserialize(
|
|
88
76
|
exception: Optional[Exception]
|
89
77
|
try:
|
90
78
|
exception = (
|
91
|
-
|
79
|
+
serializer.deserialize(serialized_exception)
|
92
80
|
if serialized_exception is not None
|
93
81
|
else None
|
94
82
|
)
|