dbos 1.1.0a4__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbos/_admin_server.py +24 -4
- dbos/_app_db.py +0 -15
- dbos/_client.py +4 -3
- dbos/_core.py +11 -4
- dbos/_dbos.py +22 -4
- dbos/_dbos_config.py +21 -39
- dbos/_event_loop.py +10 -7
- dbos/_queue.py +6 -7
- dbos/_sys_db.py +179 -134
- dbos/_utils.py +33 -0
- dbos/_workflow_commands.py +1 -10
- dbos/cli/cli.py +2 -1
- {dbos-1.1.0a4.dist-info → dbos-1.2.0.dist-info}/METADATA +1 -1
- {dbos-1.1.0a4.dist-info → dbos-1.2.0.dist-info}/RECORD +17 -17
- {dbos-1.1.0a4.dist-info → dbos-1.2.0.dist-info}/WHEEL +0 -0
- {dbos-1.1.0a4.dist-info → dbos-1.2.0.dist-info}/entry_points.txt +0 -0
- {dbos-1.1.0a4.dist-info → dbos-1.2.0.dist-info}/licenses/LICENSE +0 -0
dbos/_admin_server.py
CHANGED
@@ -5,8 +5,9 @@ import re
|
|
5
5
|
import threading
|
6
6
|
from functools import partial
|
7
7
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
8
|
-
from typing import TYPE_CHECKING, Any, List, TypedDict
|
8
|
+
from typing import TYPE_CHECKING, Any, List, Optional, TypedDict
|
9
9
|
|
10
|
+
from ._context import SetWorkflowID
|
10
11
|
from ._error import DBOSException
|
11
12
|
from ._logger import dbos_logger
|
12
13
|
from ._recovery import recover_pending_workflows
|
@@ -141,7 +142,11 @@ class AdminRequestHandler(BaseHTTPRequestHandler):
|
|
141
142
|
try:
|
142
143
|
data = json.loads(post_data.decode("utf-8"))
|
143
144
|
start_step: int = data.get("start_step", 1)
|
144
|
-
|
145
|
+
new_workflow_id: Optional[str] = data.get("new_workflow_id")
|
146
|
+
application_version: Optional[str] = data.get("application_version")
|
147
|
+
self._handle_fork(
|
148
|
+
workflow_id, start_step, new_workflow_id, application_version
|
149
|
+
)
|
145
150
|
except (json.JSONDecodeError, AttributeError) as e:
|
146
151
|
self.send_response(500)
|
147
152
|
self.send_header("Content-Type", "application/json")
|
@@ -191,9 +196,24 @@ class AdminRequestHandler(BaseHTTPRequestHandler):
|
|
191
196
|
self.end_headers()
|
192
197
|
self.wfile.write(response_body)
|
193
198
|
|
194
|
-
def _handle_fork(
|
199
|
+
def _handle_fork(
|
200
|
+
self,
|
201
|
+
workflow_id: str,
|
202
|
+
start_step: int,
|
203
|
+
new_workflow_id: Optional[str],
|
204
|
+
application_version: Optional[str],
|
205
|
+
) -> None:
|
195
206
|
try:
|
196
|
-
|
207
|
+
print(f"Forking workflow {workflow_id} from step {start_step}")
|
208
|
+
if new_workflow_id is not None:
|
209
|
+
with SetWorkflowID(new_workflow_id):
|
210
|
+
handle = self.dbos.fork_workflow(
|
211
|
+
workflow_id, start_step, application_version=application_version
|
212
|
+
)
|
213
|
+
else:
|
214
|
+
handle = self.dbos.fork_workflow(
|
215
|
+
workflow_id, start_step, application_version=application_version
|
216
|
+
)
|
197
217
|
response_body = json.dumps(
|
198
218
|
{
|
199
219
|
"workflow_id": handle.workflow_id,
|
dbos/_app_db.py
CHANGED
@@ -216,21 +216,6 @@ class ApplicationDatabase:
|
|
216
216
|
for row in rows
|
217
217
|
]
|
218
218
|
|
219
|
-
def get_max_function_id(self, workflow_uuid: str) -> Optional[int]:
|
220
|
-
with self.engine.begin() as conn:
|
221
|
-
max_function_id_row = conn.execute(
|
222
|
-
sa.select(
|
223
|
-
sa.func.max(ApplicationSchema.transaction_outputs.c.function_id)
|
224
|
-
).where(
|
225
|
-
ApplicationSchema.transaction_outputs.c.workflow_uuid
|
226
|
-
== workflow_uuid
|
227
|
-
)
|
228
|
-
).fetchone()
|
229
|
-
|
230
|
-
max_function_id = max_function_id_row[0] if max_function_id_row else None
|
231
|
-
|
232
|
-
return max_function_id
|
233
|
-
|
234
219
|
def clone_workflow_transactions(
|
235
220
|
self, src_workflow_id: str, forked_workflow_id: str, start_step: int
|
236
221
|
) -> None:
|
dbos/_client.py
CHANGED
@@ -3,8 +3,6 @@ import sys
|
|
3
3
|
import uuid
|
4
4
|
from typing import Any, Generic, List, Optional, TypedDict, TypeVar
|
5
5
|
|
6
|
-
from sqlalchemy import URL
|
7
|
-
|
8
6
|
from dbos._app_db import ApplicationDatabase
|
9
7
|
from dbos._context import MaxPriority, MinPriority
|
10
8
|
|
@@ -15,6 +13,7 @@ else:
|
|
15
13
|
|
16
14
|
from dbos import _serialization
|
17
15
|
from dbos._dbos import WorkflowHandle, WorkflowHandleAsync
|
16
|
+
from dbos._dbos_config import is_valid_database_url
|
18
17
|
from dbos._error import DBOSException, DBOSNonExistentWorkflowError
|
19
18
|
from dbos._registrations import DEFAULT_MAX_RECOVERY_ATTEMPTS
|
20
19
|
from dbos._serialization import WorkflowInputs
|
@@ -99,6 +98,7 @@ class WorkflowHandleClientAsyncPolling(Generic[R]):
|
|
99
98
|
|
100
99
|
class DBOSClient:
|
101
100
|
def __init__(self, database_url: str, *, system_database: Optional[str] = None):
|
101
|
+
assert is_valid_database_url(database_url)
|
102
102
|
# We only create database connections but do not run migrations
|
103
103
|
self._sys_db = SystemDatabase(
|
104
104
|
database_url=database_url,
|
@@ -109,6 +109,7 @@ class DBOSClient:
|
|
109
109
|
},
|
110
110
|
sys_db_name=system_database,
|
111
111
|
)
|
112
|
+
self._sys_db.check_connection()
|
112
113
|
self._app_db = ApplicationDatabase(
|
113
114
|
database_url=database_url,
|
114
115
|
engine_kwargs={
|
@@ -231,7 +232,7 @@ class DBOSClient:
|
|
231
232
|
"workflow_deadline_epoch_ms": None,
|
232
233
|
}
|
233
234
|
with self._sys_db.engine.begin() as conn:
|
234
|
-
self._sys_db.
|
235
|
+
self._sys_db._insert_workflow_status(
|
235
236
|
status, conn, max_recovery_attempts=None
|
236
237
|
)
|
237
238
|
self._sys_db.send(status["workflow_uuid"], 0, destination_id, message, topic)
|
dbos/_core.py
CHANGED
@@ -20,8 +20,10 @@ from typing import (
|
|
20
20
|
cast,
|
21
21
|
)
|
22
22
|
|
23
|
+
import psycopg
|
24
|
+
|
23
25
|
from dbos._outcome import Immediate, NoResult, Outcome, Pending
|
24
|
-
from dbos._utils import GlobalParams
|
26
|
+
from dbos._utils import GlobalParams, retriable_postgres_exception
|
25
27
|
|
26
28
|
from ._app_db import ApplicationDatabase, TransactionResultInternal
|
27
29
|
|
@@ -602,7 +604,6 @@ async def start_workflow_async(
|
|
602
604
|
*args: P.args,
|
603
605
|
**kwargs: P.kwargs,
|
604
606
|
) -> "WorkflowHandleAsync[R]":
|
605
|
-
|
606
607
|
# If the function has a class, add the class object as its first argument
|
607
608
|
fself: Optional[object] = None
|
608
609
|
if hasattr(func, "__self__"):
|
@@ -932,12 +933,18 @@ def decorate_transaction(
|
|
932
933
|
)
|
933
934
|
break
|
934
935
|
except DBAPIError as dbapi_error:
|
935
|
-
|
936
|
+
driver_error = cast(
|
937
|
+
Optional[psycopg.OperationalError], dbapi_error.orig
|
938
|
+
)
|
939
|
+
if retriable_postgres_exception(dbapi_error) or (
|
940
|
+
driver_error is not None
|
941
|
+
and driver_error.sqlstate == "40001"
|
942
|
+
):
|
936
943
|
# Retry on serialization failure
|
937
944
|
span = ctx.get_current_span()
|
938
945
|
if span:
|
939
946
|
span.add_event(
|
940
|
-
"Transaction
|
947
|
+
"Transaction Failure",
|
941
948
|
{"retry_wait_seconds": retry_wait_seconds},
|
942
949
|
)
|
943
950
|
time.sleep(retry_wait_seconds)
|
dbos/_dbos.py
CHANGED
@@ -90,10 +90,8 @@ from ._context import (
|
|
90
90
|
from ._dbos_config import (
|
91
91
|
ConfigFile,
|
92
92
|
DBOSConfig,
|
93
|
-
check_config_consistency,
|
94
93
|
overwrite_config,
|
95
94
|
process_config,
|
96
|
-
set_env_vars,
|
97
95
|
translate_dbos_config_to_config_file,
|
98
96
|
)
|
99
97
|
from ._error import (
|
@@ -299,6 +297,7 @@ class DBOS:
|
|
299
297
|
|
300
298
|
self._launched: bool = False
|
301
299
|
self._debug_mode: bool = False
|
300
|
+
self._configured_threadpool: bool = False
|
302
301
|
self._sys_db_field: Optional[SystemDatabase] = None
|
303
302
|
self._app_db_field: Optional[ApplicationDatabase] = None
|
304
303
|
self._registry: DBOSRegistry = _get_or_create_dbos_registry()
|
@@ -323,14 +322,12 @@ class DBOS:
|
|
323
322
|
unvalidated_config = translate_dbos_config_to_config_file(config)
|
324
323
|
if os.environ.get("DBOS__CLOUD") == "true":
|
325
324
|
unvalidated_config = overwrite_config(unvalidated_config)
|
326
|
-
check_config_consistency(name=unvalidated_config["name"])
|
327
325
|
|
328
326
|
if unvalidated_config is not None:
|
329
327
|
self._config: ConfigFile = process_config(data=unvalidated_config)
|
330
328
|
else:
|
331
329
|
raise ValueError("No valid configuration was loaded.")
|
332
330
|
|
333
|
-
set_env_vars(self._config)
|
334
331
|
config_logger(self._config)
|
335
332
|
dbos_tracer.config(self._config)
|
336
333
|
dbos_logger.info(f"Initializing DBOS (v{GlobalParams.dbos_version})")
|
@@ -719,6 +716,7 @@ class DBOS:
|
|
719
716
|
**kwargs: P.kwargs,
|
720
717
|
) -> WorkflowHandleAsync[R]:
|
721
718
|
"""Invoke a workflow function on the event loop, returning a handle to the ongoing execution."""
|
719
|
+
await cls._configure_asyncio_thread_pool()
|
722
720
|
return await start_workflow_async(
|
723
721
|
_get_dbos_instance(), func, None, True, *args, **kwargs
|
724
722
|
)
|
@@ -736,6 +734,7 @@ class DBOS:
|
|
736
734
|
async def get_workflow_status_async(
|
737
735
|
cls, workflow_id: str
|
738
736
|
) -> Optional[WorkflowStatus]:
|
737
|
+
await cls._configure_asyncio_thread_pool()
|
739
738
|
"""Return the status of a workflow execution."""
|
740
739
|
return await asyncio.to_thread(cls.get_workflow_status, workflow_id)
|
741
740
|
|
@@ -757,6 +756,7 @@ class DBOS:
|
|
757
756
|
) -> WorkflowHandleAsync[R]:
|
758
757
|
"""Return a `WorkflowHandle` for a workflow execution."""
|
759
758
|
dbos = _get_dbos_instance()
|
759
|
+
await cls._configure_asyncio_thread_pool()
|
760
760
|
if existing_workflow:
|
761
761
|
stat = await dbos.get_workflow_status_async(workflow_id)
|
762
762
|
if stat is None:
|
@@ -775,6 +775,7 @@ class DBOS:
|
|
775
775
|
cls, destination_id: str, message: Any, topic: Optional[str] = None
|
776
776
|
) -> None:
|
777
777
|
"""Send a message to a workflow execution."""
|
778
|
+
await cls._configure_asyncio_thread_pool()
|
778
779
|
await asyncio.to_thread(lambda: DBOS.send(destination_id, message, topic))
|
779
780
|
|
780
781
|
@classmethod
|
@@ -797,6 +798,7 @@ class DBOS:
|
|
797
798
|
This function is to be called from within a workflow.
|
798
799
|
`recv_async` will return the message sent on `topic`, asyncronously waiting if necessary.
|
799
800
|
"""
|
801
|
+
await cls._configure_asyncio_thread_pool()
|
800
802
|
return await asyncio.to_thread(lambda: DBOS.recv(topic, timeout_seconds))
|
801
803
|
|
802
804
|
@classmethod
|
@@ -835,6 +837,7 @@ class DBOS:
|
|
835
837
|
It is important to use `DBOS.sleep` or `DBOS.sleep_async` (as opposed to any other sleep) within workflows,
|
836
838
|
as the DBOS sleep methods are durable and completed sleeps will be skipped during recovery.
|
837
839
|
"""
|
840
|
+
await cls._configure_asyncio_thread_pool()
|
838
841
|
await asyncio.to_thread(lambda: DBOS.sleep(seconds))
|
839
842
|
|
840
843
|
@classmethod
|
@@ -869,6 +872,7 @@ class DBOS:
|
|
869
872
|
value(Any): A serializable value to associate with the key
|
870
873
|
|
871
874
|
"""
|
875
|
+
await cls._configure_asyncio_thread_pool()
|
872
876
|
await asyncio.to_thread(lambda: DBOS.set_event(key, value))
|
873
877
|
|
874
878
|
@classmethod
|
@@ -901,6 +905,7 @@ class DBOS:
|
|
901
905
|
timeout_seconds(float): The amount of time to wait, in case `set_event` has not yet been called byt the workflow
|
902
906
|
|
903
907
|
"""
|
908
|
+
await cls._configure_asyncio_thread_pool()
|
904
909
|
return await asyncio.to_thread(
|
905
910
|
lambda: DBOS.get_event(workflow_id, key, timeout_seconds)
|
906
911
|
)
|
@@ -929,6 +934,19 @@ class DBOS:
|
|
929
934
|
fn, "DBOS.cancelWorkflow"
|
930
935
|
)
|
931
936
|
|
937
|
+
@classmethod
|
938
|
+
async def _configure_asyncio_thread_pool(cls) -> None:
|
939
|
+
"""
|
940
|
+
Configure the thread pool for asyncio.to_thread.
|
941
|
+
|
942
|
+
This function is called before the first call to asyncio.to_thread.
|
943
|
+
"""
|
944
|
+
if _get_dbos_instance()._configured_threadpool:
|
945
|
+
return
|
946
|
+
loop = asyncio.get_running_loop()
|
947
|
+
loop.set_default_executor(_get_dbos_instance()._executor)
|
948
|
+
_get_dbos_instance()._configured_threadpool = True
|
949
|
+
|
932
950
|
@classmethod
|
933
951
|
def resume_workflow(cls, workflow_id: str) -> WorkflowHandle[Any]:
|
934
952
|
"""Resume a workflow by ID."""
|
dbos/_dbos_config.py
CHANGED
@@ -329,17 +329,9 @@ def process_config(
|
|
329
329
|
if data.get("database_url") is not None and data["database_url"] != "":
|
330
330
|
# Parse the db string and check required fields
|
331
331
|
assert data["database_url"] is not None
|
332
|
+
assert is_valid_database_url(data["database_url"])
|
333
|
+
|
332
334
|
url = make_url(data["database_url"])
|
333
|
-
required_fields = [
|
334
|
-
("username", "Username must be specified in the connection URL"),
|
335
|
-
("password", "Password must be specified in the connection URL"),
|
336
|
-
("host", "Host must be specified in the connection URL"),
|
337
|
-
("database", "Database name must be specified in the connection URL"),
|
338
|
-
]
|
339
|
-
for field_name, error_message in required_fields:
|
340
|
-
field_value = getattr(url, field_name, None)
|
341
|
-
if not field_value:
|
342
|
-
raise DBOSInitializationError(error_message)
|
343
335
|
|
344
336
|
if not data["database"].get("sys_db_name"):
|
345
337
|
assert url.database is not None
|
@@ -385,6 +377,9 @@ def process_config(
|
|
385
377
|
if not silent and logs["logLevel"] == "INFO" or logs["logLevel"] == "DEBUG":
|
386
378
|
log_url = make_url(data["database_url"]).render_as_string(hide_password=True)
|
387
379
|
print(f"[bold blue]Using database connection string: {log_url}[/bold blue]")
|
380
|
+
print(
|
381
|
+
f"[bold blue]Database engine parameters: {data['database']['db_engine_kwargs']}[/bold blue]"
|
382
|
+
)
|
388
383
|
|
389
384
|
# Return data as ConfigFile type
|
390
385
|
return data
|
@@ -407,6 +402,7 @@ def configure_db_engine_parameters(
|
|
407
402
|
"pool_timeout": 30,
|
408
403
|
"max_overflow": 0,
|
409
404
|
"pool_size": 20,
|
405
|
+
"pool_pre_ping": True,
|
410
406
|
}
|
411
407
|
# If user-provided kwargs are present, use them instead
|
412
408
|
user_kwargs = data.get("db_engine_kwargs")
|
@@ -431,6 +427,21 @@ def configure_db_engine_parameters(
|
|
431
427
|
data["sys_db_engine_kwargs"] = system_engine_kwargs
|
432
428
|
|
433
429
|
|
430
|
+
def is_valid_database_url(database_url: str) -> bool:
|
431
|
+
url = make_url(database_url)
|
432
|
+
required_fields = [
|
433
|
+
("username", "Username must be specified in the connection URL"),
|
434
|
+
("password", "Password must be specified in the connection URL"),
|
435
|
+
("host", "Host must be specified in the connection URL"),
|
436
|
+
("database", "Database name must be specified in the connection URL"),
|
437
|
+
]
|
438
|
+
for field_name, error_message in required_fields:
|
439
|
+
field_value = getattr(url, field_name, None)
|
440
|
+
if not field_value:
|
441
|
+
raise DBOSInitializationError(error_message)
|
442
|
+
return True
|
443
|
+
|
444
|
+
|
434
445
|
def _is_valid_app_name(name: str) -> bool:
|
435
446
|
name_len = len(name)
|
436
447
|
if name_len < 3 or name_len > 30:
|
@@ -444,12 +455,6 @@ def _app_name_to_db_name(app_name: str) -> str:
|
|
444
455
|
return name if not name[0].isdigit() else f"_{name}"
|
445
456
|
|
446
457
|
|
447
|
-
def set_env_vars(config: ConfigFile) -> None:
|
448
|
-
for env, value in config.get("env", {}).items():
|
449
|
-
if value is not None:
|
450
|
-
os.environ[env] = str(value)
|
451
|
-
|
452
|
-
|
453
458
|
def overwrite_config(provided_config: ConfigFile) -> ConfigFile:
|
454
459
|
# Load the DBOS configuration file and force the use of:
|
455
460
|
# 1. The database url provided by DBOS_DATABASE_URL
|
@@ -529,26 +534,3 @@ def overwrite_config(provided_config: ConfigFile) -> ConfigFile:
|
|
529
534
|
del provided_config["env"]
|
530
535
|
|
531
536
|
return provided_config
|
532
|
-
|
533
|
-
|
534
|
-
def check_config_consistency(
|
535
|
-
*,
|
536
|
-
name: str,
|
537
|
-
config_file_path: str = DBOS_CONFIG_PATH,
|
538
|
-
) -> None:
|
539
|
-
# First load the config file and check whether it is present
|
540
|
-
try:
|
541
|
-
config = load_config(config_file_path, silent=True, run_process_config=False)
|
542
|
-
except FileNotFoundError:
|
543
|
-
dbos_logger.debug(
|
544
|
-
f"No configuration file {config_file_path} found. Skipping consistency check with provided config."
|
545
|
-
)
|
546
|
-
return
|
547
|
-
except Exception as e:
|
548
|
-
raise e
|
549
|
-
|
550
|
-
# Check the name
|
551
|
-
if name != config["name"]:
|
552
|
-
raise DBOSInitializationError(
|
553
|
-
f"Provided app name '{name}' does not match the app name '{config['name']}' in {config_file_path}."
|
554
|
-
)
|
dbos/_event_loop.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import threading
|
3
|
+
from concurrent.futures import ThreadPoolExecutor
|
3
4
|
from typing import Any, Coroutine, Optional, TypeVar
|
4
5
|
|
5
6
|
|
@@ -33,15 +34,17 @@ class BackgroundEventLoop:
|
|
33
34
|
|
34
35
|
def _run_event_loop(self) -> None:
|
35
36
|
self._loop = asyncio.new_event_loop()
|
36
|
-
|
37
|
+
with ThreadPoolExecutor(max_workers=64) as thread_pool:
|
38
|
+
self._loop.set_default_executor(thread_pool)
|
39
|
+
asyncio.set_event_loop(self._loop)
|
37
40
|
|
38
|
-
|
39
|
-
|
41
|
+
self._running = True
|
42
|
+
self._ready.set() # Signal that the loop is ready
|
40
43
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
44
|
+
try:
|
45
|
+
self._loop.run_forever()
|
46
|
+
finally:
|
47
|
+
self._loop.close()
|
45
48
|
|
46
49
|
async def _shutdown(self) -> None:
|
47
50
|
if self._loop is None:
|
dbos/_queue.py
CHANGED
@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Any, Callable, Coroutine, Optional, TypedDict
|
|
5
5
|
from psycopg import errors
|
6
6
|
from sqlalchemy.exc import OperationalError
|
7
7
|
|
8
|
+
from dbos._logger import dbos_logger
|
8
9
|
from dbos._utils import GlobalParams
|
9
10
|
|
10
11
|
from ._core import P, R, execute_workflow_by_id, start_workflow, start_workflow_async
|
@@ -56,6 +57,8 @@ class Queue:
|
|
56
57
|
from ._dbos import _get_or_create_dbos_registry
|
57
58
|
|
58
59
|
registry = _get_or_create_dbos_registry()
|
60
|
+
if self.name in registry.queue_info_map:
|
61
|
+
dbos_logger.warning(f"Queue {name} has already been declared")
|
59
62
|
registry.queue_info_map[self.name] = self
|
60
63
|
|
61
64
|
def enqueue(
|
@@ -95,12 +98,8 @@ def queue_thread(stop_event: threading.Event, dbos: "DBOS") -> None:
|
|
95
98
|
if not isinstance(
|
96
99
|
e.orig, (errors.SerializationFailure, errors.LockNotAvailable)
|
97
100
|
):
|
98
|
-
dbos.logger.warning(
|
99
|
-
|
100
|
-
)
|
101
|
-
except Exception:
|
101
|
+
dbos.logger.warning(f"Exception encountered in queue thread: {e}")
|
102
|
+
except Exception as e:
|
102
103
|
if not stop_event.is_set():
|
103
104
|
# Only print the error if the thread is not stopping
|
104
|
-
dbos.logger.warning(
|
105
|
-
f"Exception encountered in queue thread: {traceback.format_exc()}"
|
106
|
-
)
|
105
|
+
dbos.logger.warning(f"Exception encountered in queue thread: {e}")
|
dbos/_sys_db.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
import datetime
|
2
|
+
import functools
|
2
3
|
import json
|
3
4
|
import logging
|
4
5
|
import os
|
6
|
+
import random
|
5
7
|
import re
|
6
8
|
import threading
|
7
9
|
import time
|
@@ -17,6 +19,7 @@ from typing import (
|
|
17
19
|
Sequence,
|
18
20
|
TypedDict,
|
19
21
|
TypeVar,
|
22
|
+
cast,
|
20
23
|
)
|
21
24
|
|
22
25
|
import psycopg
|
@@ -27,7 +30,7 @@ from alembic.config import Config
|
|
27
30
|
from sqlalchemy.exc import DBAPIError
|
28
31
|
from sqlalchemy.sql import func
|
29
32
|
|
30
|
-
from dbos._utils import INTERNAL_QUEUE_NAME
|
33
|
+
from dbos._utils import INTERNAL_QUEUE_NAME, retriable_postgres_exception
|
31
34
|
|
32
35
|
from . import _serialization
|
33
36
|
from ._context import get_local_dbos_context
|
@@ -268,6 +271,51 @@ class ThreadSafeConditionDict:
|
|
268
271
|
dbos_logger.warning(f"Key {key} not found in condition dictionary.")
|
269
272
|
|
270
273
|
|
274
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
275
|
+
|
276
|
+
|
277
|
+
def db_retry(
|
278
|
+
initial_backoff: float = 1.0, max_backoff: float = 60.0
|
279
|
+
) -> Callable[[F], F]:
|
280
|
+
"""
|
281
|
+
If a workflow encounters a database connection issue while performing an operation,
|
282
|
+
block the workflow and retry the operation until it reconnects and succeeds.
|
283
|
+
|
284
|
+
In other words, if DBOS loses its database connection, everything pauses until the connection is recovered,
|
285
|
+
trading off availability for correctness.
|
286
|
+
"""
|
287
|
+
|
288
|
+
def decorator(func: F) -> F:
|
289
|
+
@functools.wraps(func)
|
290
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
291
|
+
retries: int = 0
|
292
|
+
backoff: float = initial_backoff
|
293
|
+
while True:
|
294
|
+
try:
|
295
|
+
return func(*args, **kwargs)
|
296
|
+
except DBAPIError as e:
|
297
|
+
|
298
|
+
# Determine if this is a retriable exception
|
299
|
+
if not retriable_postgres_exception(e):
|
300
|
+
raise
|
301
|
+
|
302
|
+
retries += 1
|
303
|
+
# Calculate backoff with jitter
|
304
|
+
actual_backoff: float = backoff * (0.5 + random.random())
|
305
|
+
dbos_logger.warning(
|
306
|
+
f"Database connection failed: {str(e)}. "
|
307
|
+
f"Retrying in {actual_backoff:.2f}s (attempt {retries})"
|
308
|
+
)
|
309
|
+
# Sleep with backoff
|
310
|
+
time.sleep(actual_backoff)
|
311
|
+
# Increase backoff for next attempt (exponential)
|
312
|
+
backoff = min(backoff * 2, max_backoff)
|
313
|
+
|
314
|
+
return cast(F, wrapper)
|
315
|
+
|
316
|
+
return decorator
|
317
|
+
|
318
|
+
|
271
319
|
class SystemDatabase:
|
272
320
|
|
273
321
|
def __init__(
|
@@ -365,7 +413,7 @@ class SystemDatabase:
|
|
365
413
|
self.notification_conn.close()
|
366
414
|
self.engine.dispose()
|
367
415
|
|
368
|
-
def
|
416
|
+
def _insert_workflow_status(
|
369
417
|
self,
|
370
418
|
status: WorkflowStatusInternal,
|
371
419
|
conn: sa.Connection,
|
@@ -377,6 +425,15 @@ class SystemDatabase:
|
|
377
425
|
wf_status: WorkflowStatuses = status["status"]
|
378
426
|
workflow_deadline_epoch_ms: Optional[int] = status["workflow_deadline_epoch_ms"]
|
379
427
|
|
428
|
+
# Values to update when a row already exists for this workflow
|
429
|
+
update_values: dict[str, Any] = {
|
430
|
+
"recovery_attempts": SystemSchema.workflow_status.c.recovery_attempts + 1,
|
431
|
+
"updated_at": func.extract("epoch", func.now()) * 1000,
|
432
|
+
}
|
433
|
+
# Don't update an existing executor ID when enqueueing a workflow.
|
434
|
+
if wf_status != WorkflowStatusString.ENQUEUED.value:
|
435
|
+
update_values["executor_id"] = status["executor_id"]
|
436
|
+
|
380
437
|
cmd = (
|
381
438
|
pg.insert(SystemSchema.workflow_status)
|
382
439
|
.values(
|
@@ -402,13 +459,7 @@ class SystemDatabase:
|
|
402
459
|
)
|
403
460
|
.on_conflict_do_update(
|
404
461
|
index_elements=["workflow_uuid"],
|
405
|
-
set_=
|
406
|
-
executor_id=status["executor_id"],
|
407
|
-
recovery_attempts=(
|
408
|
-
SystemSchema.workflow_status.c.recovery_attempts + 1
|
409
|
-
),
|
410
|
-
updated_at=func.extract("epoch", func.now()) * 1000,
|
411
|
-
),
|
462
|
+
set_=update_values,
|
412
463
|
)
|
413
464
|
)
|
414
465
|
|
@@ -474,53 +525,46 @@ class SystemDatabase:
|
|
474
525
|
|
475
526
|
return wf_status, workflow_deadline_epoch_ms
|
476
527
|
|
528
|
+
@db_retry()
|
477
529
|
def update_workflow_status(
|
478
530
|
self,
|
479
531
|
status: WorkflowStatusInternal,
|
480
|
-
*,
|
481
|
-
conn: Optional[sa.Connection] = None,
|
482
532
|
) -> None:
|
483
533
|
if self._debug_mode:
|
484
534
|
raise Exception("called update_workflow_status in debug mode")
|
485
535
|
wf_status: WorkflowStatuses = status["status"]
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
status=status["status"],
|
492
|
-
name=status["name"],
|
493
|
-
class_name=status["class_name"],
|
494
|
-
config_name=status["config_name"],
|
495
|
-
output=status["output"],
|
496
|
-
error=status["error"],
|
497
|
-
executor_id=status["executor_id"],
|
498
|
-
application_version=status["app_version"],
|
499
|
-
application_id=status["app_id"],
|
500
|
-
authenticated_user=status["authenticated_user"],
|
501
|
-
authenticated_roles=status["authenticated_roles"],
|
502
|
-
assumed_role=status["assumed_role"],
|
503
|
-
queue_name=status["queue_name"],
|
504
|
-
recovery_attempts=(
|
505
|
-
1 if wf_status != WorkflowStatusString.ENQUEUED.value else 0
|
506
|
-
),
|
507
|
-
)
|
508
|
-
.on_conflict_do_update(
|
509
|
-
index_elements=["workflow_uuid"],
|
510
|
-
set_=dict(
|
536
|
+
with self.engine.begin() as c:
|
537
|
+
c.execute(
|
538
|
+
pg.insert(SystemSchema.workflow_status)
|
539
|
+
.values(
|
540
|
+
workflow_uuid=status["workflow_uuid"],
|
511
541
|
status=status["status"],
|
542
|
+
name=status["name"],
|
543
|
+
class_name=status["class_name"],
|
544
|
+
config_name=status["config_name"],
|
512
545
|
output=status["output"],
|
513
546
|
error=status["error"],
|
514
|
-
|
515
|
-
|
547
|
+
executor_id=status["executor_id"],
|
548
|
+
application_version=status["app_version"],
|
549
|
+
application_id=status["app_id"],
|
550
|
+
authenticated_user=status["authenticated_user"],
|
551
|
+
authenticated_roles=status["authenticated_roles"],
|
552
|
+
assumed_role=status["assumed_role"],
|
553
|
+
queue_name=status["queue_name"],
|
554
|
+
recovery_attempts=(
|
555
|
+
1 if wf_status != WorkflowStatusString.ENQUEUED.value else 0
|
556
|
+
),
|
557
|
+
)
|
558
|
+
.on_conflict_do_update(
|
559
|
+
index_elements=["workflow_uuid"],
|
560
|
+
set_=dict(
|
561
|
+
status=status["status"],
|
562
|
+
output=status["output"],
|
563
|
+
error=status["error"],
|
564
|
+
updated_at=func.extract("epoch", func.now()) * 1000,
|
565
|
+
),
|
566
|
+
)
|
516
567
|
)
|
517
|
-
)
|
518
|
-
|
519
|
-
if conn is not None:
|
520
|
-
conn.execute(cmd)
|
521
|
-
else:
|
522
|
-
with self.engine.begin() as c:
|
523
|
-
c.execute(cmd)
|
524
568
|
|
525
569
|
def cancel_workflow(
|
526
570
|
self,
|
@@ -601,18 +645,6 @@ class SystemDatabase:
|
|
601
645
|
)
|
602
646
|
)
|
603
647
|
|
604
|
-
def get_max_function_id(self, workflow_uuid: str) -> Optional[int]:
|
605
|
-
with self.engine.begin() as conn:
|
606
|
-
max_function_id_row = conn.execute(
|
607
|
-
sa.select(
|
608
|
-
sa.func.max(SystemSchema.operation_outputs.c.function_id)
|
609
|
-
).where(SystemSchema.operation_outputs.c.workflow_uuid == workflow_uuid)
|
610
|
-
).fetchone()
|
611
|
-
|
612
|
-
max_function_id = max_function_id_row[0] if max_function_id_row else None
|
613
|
-
|
614
|
-
return max_function_id
|
615
|
-
|
616
648
|
def fork_workflow(
|
617
649
|
self,
|
618
650
|
original_workflow_id: str,
|
@@ -698,6 +730,7 @@ class SystemDatabase:
|
|
698
730
|
)
|
699
731
|
return forked_workflow_id
|
700
732
|
|
733
|
+
@db_retry()
|
701
734
|
def get_workflow_status(
|
702
735
|
self, workflow_uuid: str
|
703
736
|
) -> Optional[WorkflowStatusInternal]:
|
@@ -747,6 +780,7 @@ class SystemDatabase:
|
|
747
780
|
}
|
748
781
|
return status
|
749
782
|
|
783
|
+
@db_retry()
|
750
784
|
def await_workflow_result(self, workflow_id: str) -> Any:
|
751
785
|
while True:
|
752
786
|
with self.engine.begin() as c:
|
@@ -773,7 +807,7 @@ class SystemDatabase:
|
|
773
807
|
pass # CB: I guess we're assuming the WF will show up eventually.
|
774
808
|
time.sleep(1)
|
775
809
|
|
776
|
-
def
|
810
|
+
def _update_workflow_inputs(
|
777
811
|
self, workflow_uuid: str, inputs: str, conn: sa.Connection
|
778
812
|
) -> None:
|
779
813
|
if self._debug_mode:
|
@@ -803,6 +837,7 @@ class SystemDatabase:
|
|
803
837
|
|
804
838
|
return
|
805
839
|
|
840
|
+
@db_retry()
|
806
841
|
def get_workflow_inputs(
|
807
842
|
self, workflow_uuid: str
|
808
843
|
) -> Optional[_serialization.WorkflowInputs]:
|
@@ -1096,8 +1131,8 @@ class SystemDatabase:
|
|
1096
1131
|
for row in rows
|
1097
1132
|
]
|
1098
1133
|
|
1099
|
-
def
|
1100
|
-
self, result: OperationResultInternal, conn:
|
1134
|
+
def _record_operation_result_txn(
|
1135
|
+
self, result: OperationResultInternal, conn: sa.Connection
|
1101
1136
|
) -> None:
|
1102
1137
|
if self._debug_mode:
|
1103
1138
|
raise Exception("called record_operation_result in debug mode")
|
@@ -1112,16 +1147,18 @@ class SystemDatabase:
|
|
1112
1147
|
error=error,
|
1113
1148
|
)
|
1114
1149
|
try:
|
1115
|
-
|
1116
|
-
conn.execute(sql)
|
1117
|
-
else:
|
1118
|
-
with self.engine.begin() as c:
|
1119
|
-
c.execute(sql)
|
1150
|
+
conn.execute(sql)
|
1120
1151
|
except DBAPIError as dbapi_error:
|
1121
1152
|
if dbapi_error.orig.sqlstate == "23505": # type: ignore
|
1122
1153
|
raise DBOSWorkflowConflictIDError(result["workflow_uuid"])
|
1123
1154
|
raise
|
1124
1155
|
|
1156
|
+
@db_retry()
|
1157
|
+
def record_operation_result(self, result: OperationResultInternal) -> None:
|
1158
|
+
with self.engine.begin() as c:
|
1159
|
+
self._record_operation_result_txn(result, c)
|
1160
|
+
|
1161
|
+
@db_retry()
|
1125
1162
|
def record_get_result(
|
1126
1163
|
self, result_workflow_id: str, output: Optional[str], error: Optional[str]
|
1127
1164
|
) -> None:
|
@@ -1147,6 +1184,7 @@ class SystemDatabase:
|
|
1147
1184
|
with self.engine.begin() as c:
|
1148
1185
|
c.execute(sql)
|
1149
1186
|
|
1187
|
+
@db_retry()
|
1150
1188
|
def record_child_workflow(
|
1151
1189
|
self,
|
1152
1190
|
parentUUID: str,
|
@@ -1171,13 +1209,12 @@ class SystemDatabase:
|
|
1171
1209
|
raise DBOSWorkflowConflictIDError(parentUUID)
|
1172
1210
|
raise
|
1173
1211
|
|
1174
|
-
def
|
1212
|
+
def _check_operation_execution_txn(
|
1175
1213
|
self,
|
1176
1214
|
workflow_id: str,
|
1177
1215
|
function_id: int,
|
1178
1216
|
function_name: str,
|
1179
|
-
|
1180
|
-
conn: Optional[sa.Connection] = None,
|
1217
|
+
conn: sa.Connection,
|
1181
1218
|
) -> Optional[RecordedResult]:
|
1182
1219
|
# First query: Retrieve the workflow status
|
1183
1220
|
workflow_status_sql = sa.select(
|
@@ -1195,13 +1232,8 @@ class SystemDatabase:
|
|
1195
1232
|
)
|
1196
1233
|
|
1197
1234
|
# Execute both queries
|
1198
|
-
|
1199
|
-
|
1200
|
-
operation_output_rows = conn.execute(operation_output_sql).all()
|
1201
|
-
else:
|
1202
|
-
with self.engine.begin() as c:
|
1203
|
-
workflow_status_rows = c.execute(workflow_status_sql).all()
|
1204
|
-
operation_output_rows = c.execute(operation_output_sql).all()
|
1235
|
+
workflow_status_rows = conn.execute(workflow_status_sql).all()
|
1236
|
+
operation_output_rows = conn.execute(operation_output_sql).all()
|
1205
1237
|
|
1206
1238
|
# Check if the workflow exists
|
1207
1239
|
assert (
|
@@ -1243,6 +1275,16 @@ class SystemDatabase:
|
|
1243
1275
|
}
|
1244
1276
|
return result
|
1245
1277
|
|
1278
|
+
@db_retry()
|
1279
|
+
def check_operation_execution(
|
1280
|
+
self, workflow_id: str, function_id: int, function_name: str
|
1281
|
+
) -> Optional[RecordedResult]:
|
1282
|
+
with self.engine.begin() as c:
|
1283
|
+
return self._check_operation_execution_txn(
|
1284
|
+
workflow_id, function_id, function_name, c
|
1285
|
+
)
|
1286
|
+
|
1287
|
+
@db_retry()
|
1246
1288
|
def check_child_workflow(
|
1247
1289
|
self, workflow_uuid: str, function_id: int
|
1248
1290
|
) -> Optional[str]:
|
@@ -1260,6 +1302,7 @@ class SystemDatabase:
|
|
1260
1302
|
return None
|
1261
1303
|
return str(row[0])
|
1262
1304
|
|
1305
|
+
@db_retry()
|
1263
1306
|
def send(
|
1264
1307
|
self,
|
1265
1308
|
workflow_uuid: str,
|
@@ -1271,7 +1314,7 @@ class SystemDatabase:
|
|
1271
1314
|
function_name = "DBOS.send"
|
1272
1315
|
topic = topic if topic is not None else _dbos_null_topic
|
1273
1316
|
with self.engine.begin() as c:
|
1274
|
-
recorded_output = self.
|
1317
|
+
recorded_output = self._check_operation_execution_txn(
|
1275
1318
|
workflow_uuid, function_id, function_name, conn=c
|
1276
1319
|
)
|
1277
1320
|
if self._debug_mode and recorded_output is None:
|
@@ -1309,8 +1352,9 @@ class SystemDatabase:
|
|
1309
1352
|
"output": None,
|
1310
1353
|
"error": None,
|
1311
1354
|
}
|
1312
|
-
self.
|
1355
|
+
self._record_operation_result_txn(output, conn=c)
|
1313
1356
|
|
1357
|
+
@db_retry()
|
1314
1358
|
def recv(
|
1315
1359
|
self,
|
1316
1360
|
workflow_uuid: str,
|
@@ -1403,7 +1447,7 @@ class SystemDatabase:
|
|
1403
1447
|
message: Any = None
|
1404
1448
|
if len(rows) > 0:
|
1405
1449
|
message = _serialization.deserialize(rows[0][0])
|
1406
|
-
self.
|
1450
|
+
self._record_operation_result_txn(
|
1407
1451
|
{
|
1408
1452
|
"workflow_uuid": workflow_uuid,
|
1409
1453
|
"function_id": function_id,
|
@@ -1467,13 +1511,14 @@ class SystemDatabase:
|
|
1467
1511
|
dbos_logger.error(f"Unknown channel: {channel}")
|
1468
1512
|
except Exception as e:
|
1469
1513
|
if self._run_background_processes:
|
1470
|
-
dbos_logger.
|
1514
|
+
dbos_logger.warning(f"Notification listener error: {e}")
|
1471
1515
|
time.sleep(1)
|
1472
1516
|
# Then the loop will try to reconnect and restart the listener
|
1473
1517
|
finally:
|
1474
1518
|
if self.notification_conn is not None:
|
1475
1519
|
self.notification_conn.close()
|
1476
1520
|
|
1521
|
+
@db_retry()
|
1477
1522
|
def sleep(
|
1478
1523
|
self,
|
1479
1524
|
workflow_uuid: str,
|
@@ -1513,6 +1558,7 @@ class SystemDatabase:
|
|
1513
1558
|
time.sleep(duration)
|
1514
1559
|
return duration
|
1515
1560
|
|
1561
|
+
@db_retry()
|
1516
1562
|
def set_event(
|
1517
1563
|
self,
|
1518
1564
|
workflow_uuid: str,
|
@@ -1522,7 +1568,7 @@ class SystemDatabase:
|
|
1522
1568
|
) -> None:
|
1523
1569
|
function_name = "DBOS.setEvent"
|
1524
1570
|
with self.engine.begin() as c:
|
1525
|
-
recorded_output = self.
|
1571
|
+
recorded_output = self._check_operation_execution_txn(
|
1526
1572
|
workflow_uuid, function_id, function_name, conn=c
|
1527
1573
|
)
|
1528
1574
|
if self._debug_mode and recorded_output is None:
|
@@ -1554,8 +1600,9 @@ class SystemDatabase:
|
|
1554
1600
|
"output": None,
|
1555
1601
|
"error": None,
|
1556
1602
|
}
|
1557
|
-
self.
|
1603
|
+
self._record_operation_result_txn(output, conn=c)
|
1558
1604
|
|
1605
|
+
@db_retry()
|
1559
1606
|
def get_event(
|
1560
1607
|
self,
|
1561
1608
|
target_uuid: str,
|
@@ -1646,7 +1693,7 @@ class SystemDatabase:
|
|
1646
1693
|
)
|
1647
1694
|
return value
|
1648
1695
|
|
1649
|
-
def
|
1696
|
+
def _enqueue(
|
1650
1697
|
self,
|
1651
1698
|
workflow_id: str,
|
1652
1699
|
queue_name: str,
|
@@ -1722,13 +1769,8 @@ class SystemDatabase:
|
|
1722
1769
|
if num_recent_queries >= queue.limiter["limit"]:
|
1723
1770
|
return []
|
1724
1771
|
|
1725
|
-
#
|
1726
|
-
|
1727
|
-
# functions, else select all of them.
|
1728
|
-
|
1729
|
-
# First lets figure out how many tasks are eligible for dequeue.
|
1730
|
-
# This means figuring out how many unstarted tasks are within the local and global concurrency limits
|
1731
|
-
running_tasks_query = (
|
1772
|
+
# Count how many workflows on this queue are currently PENDING both locally and globally.
|
1773
|
+
pending_tasks_query = (
|
1732
1774
|
sa.select(
|
1733
1775
|
SystemSchema.workflow_status.c.executor_id,
|
1734
1776
|
sa.func.count().label("task_count"),
|
@@ -1742,41 +1784,37 @@ class SystemDatabase:
|
|
1742
1784
|
)
|
1743
1785
|
.where(SystemSchema.workflow_queue.c.queue_name == queue.name)
|
1744
1786
|
.where(
|
1745
|
-
SystemSchema.
|
1746
|
-
|
1747
|
-
) # Task is started
|
1748
|
-
)
|
1749
|
-
.where(
|
1750
|
-
SystemSchema.workflow_queue.c.completed_at_epoch_ms.is_(
|
1751
|
-
None
|
1752
|
-
) # Task is not completed.
|
1787
|
+
SystemSchema.workflow_status.c.status
|
1788
|
+
== WorkflowStatusString.PENDING.value
|
1753
1789
|
)
|
1754
1790
|
.group_by(SystemSchema.workflow_status.c.executor_id)
|
1755
1791
|
)
|
1756
|
-
|
1757
|
-
|
1758
|
-
|
1759
|
-
executor_id, 0
|
1760
|
-
) # Get count for current executor
|
1792
|
+
pending_workflows = c.execute(pending_tasks_query).fetchall()
|
1793
|
+
pending_workflows_dict = {row[0]: row[1] for row in pending_workflows}
|
1794
|
+
local_pending_workflows = pending_workflows_dict.get(executor_id, 0)
|
1761
1795
|
|
1796
|
+
# Compute max_tasks, the number of workflows that can be dequeued given local and global concurrency limits,
|
1762
1797
|
max_tasks = float("inf")
|
1763
1798
|
if queue.worker_concurrency is not None:
|
1764
|
-
|
1765
|
-
|
1766
|
-
|
1799
|
+
# Print a warning if the local concurrency limit is violated
|
1800
|
+
if local_pending_workflows > queue.worker_concurrency:
|
1801
|
+
dbos_logger.warning(
|
1802
|
+
f"The number of local pending workflows ({local_pending_workflows}) on queue {queue.name} exceeds the local concurrency limit ({queue.worker_concurrency})"
|
1803
|
+
)
|
1804
|
+
max_tasks = max(0, queue.worker_concurrency - local_pending_workflows)
|
1805
|
+
|
1767
1806
|
if queue.concurrency is not None:
|
1768
|
-
|
1769
|
-
#
|
1770
|
-
|
1771
|
-
if total_running_tasks > queue.concurrency:
|
1807
|
+
global_pending_workflows = sum(pending_workflows_dict.values())
|
1808
|
+
# Print a warning if the global concurrency limit is violated
|
1809
|
+
if global_pending_workflows > queue.concurrency:
|
1772
1810
|
dbos_logger.warning(
|
1773
|
-
f"
|
1811
|
+
f"The total number of pending workflows ({global_pending_workflows}) on queue {queue.name} exceeds the global concurrency limit ({queue.concurrency})"
|
1774
1812
|
)
|
1775
|
-
available_tasks = max(0, queue.concurrency -
|
1813
|
+
available_tasks = max(0, queue.concurrency - global_pending_workflows)
|
1776
1814
|
max_tasks = min(max_tasks, available_tasks)
|
1777
1815
|
|
1778
1816
|
# Retrieve the first max_tasks workflows in the queue.
|
1779
|
-
# Only retrieve workflows of the
|
1817
|
+
# Only retrieve workflows of the local version (or without version set)
|
1780
1818
|
query = (
|
1781
1819
|
sa.select(
|
1782
1820
|
SystemSchema.workflow_queue.c.workflow_uuid,
|
@@ -1789,8 +1827,10 @@ class SystemDatabase:
|
|
1789
1827
|
)
|
1790
1828
|
)
|
1791
1829
|
.where(SystemSchema.workflow_queue.c.queue_name == queue.name)
|
1792
|
-
.where(
|
1793
|
-
|
1830
|
+
.where(
|
1831
|
+
SystemSchema.workflow_status.c.status
|
1832
|
+
== WorkflowStatusString.ENQUEUED.value
|
1833
|
+
)
|
1794
1834
|
.where(
|
1795
1835
|
sa.or_(
|
1796
1836
|
SystemSchema.workflow_status.c.application_version
|
@@ -1819,20 +1859,16 @@ class SystemDatabase:
|
|
1819
1859
|
ret_ids: list[str] = []
|
1820
1860
|
|
1821
1861
|
for id in dequeued_ids:
|
1822
|
-
# If we have a limiter, stop
|
1823
|
-
# of
|
1862
|
+
# If we have a limiter, stop dequeueing workflows when the number
|
1863
|
+
# of workflows started this period exceeds the limit.
|
1824
1864
|
if queue.limiter is not None:
|
1825
1865
|
if len(ret_ids) + num_recent_queries >= queue.limiter["limit"]:
|
1826
1866
|
break
|
1827
1867
|
|
1828
|
-
# To start a
|
1829
|
-
|
1868
|
+
# To start a workflow, first set its status to PENDING and update its executor ID
|
1869
|
+
c.execute(
|
1830
1870
|
SystemSchema.workflow_status.update()
|
1831
1871
|
.where(SystemSchema.workflow_status.c.workflow_uuid == id)
|
1832
|
-
.where(
|
1833
|
-
SystemSchema.workflow_status.c.status
|
1834
|
-
== WorkflowStatusString.ENQUEUED.value
|
1835
|
-
)
|
1836
1872
|
.values(
|
1837
1873
|
status=WorkflowStatusString.PENDING.value,
|
1838
1874
|
application_version=app_version,
|
@@ -1855,16 +1891,15 @@ class SystemDatabase:
|
|
1855
1891
|
),
|
1856
1892
|
)
|
1857
1893
|
)
|
1858
|
-
|
1859
|
-
|
1860
|
-
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
ret_ids.append(id)
|
1894
|
+
# Then give it a start time
|
1895
|
+
c.execute(
|
1896
|
+
SystemSchema.workflow_queue.update()
|
1897
|
+
.where(SystemSchema.workflow_queue.c.workflow_uuid == id)
|
1898
|
+
.values(started_at_epoch_ms=start_time_ms)
|
1899
|
+
)
|
1900
|
+
ret_ids.append(id)
|
1866
1901
|
|
1867
|
-
# If we have a limiter, garbage-collect all completed
|
1902
|
+
# If we have a limiter, garbage-collect all completed workflows started
|
1868
1903
|
# before the period. If there's no limiter, there's no need--they were
|
1869
1904
|
# deleted on completion.
|
1870
1905
|
if queue.limiter is not None:
|
@@ -1881,6 +1916,7 @@ class SystemDatabase:
|
|
1881
1916
|
# Return the IDs of all functions we started
|
1882
1917
|
return ret_ids
|
1883
1918
|
|
1919
|
+
@db_retry()
|
1884
1920
|
def remove_from_queue(self, workflow_id: str, queue: "Queue") -> None:
|
1885
1921
|
if self._debug_mode:
|
1886
1922
|
raise Exception("called remove_from_queue in debug mode")
|
@@ -1969,6 +2005,7 @@ class SystemDatabase:
|
|
1969
2005
|
)
|
1970
2006
|
return result
|
1971
2007
|
|
2008
|
+
@db_retry()
|
1972
2009
|
def init_workflow(
|
1973
2010
|
self,
|
1974
2011
|
status: WorkflowStatusInternal,
|
@@ -1981,17 +2018,17 @@ class SystemDatabase:
|
|
1981
2018
|
Synchronously record the status and inputs for workflows in a single transaction
|
1982
2019
|
"""
|
1983
2020
|
with self.engine.begin() as conn:
|
1984
|
-
wf_status, workflow_deadline_epoch_ms = self.
|
2021
|
+
wf_status, workflow_deadline_epoch_ms = self._insert_workflow_status(
|
1985
2022
|
status, conn, max_recovery_attempts=max_recovery_attempts
|
1986
2023
|
)
|
1987
2024
|
# TODO: Modify the inputs if they were changed by `update_workflow_inputs`
|
1988
|
-
self.
|
2025
|
+
self._update_workflow_inputs(status["workflow_uuid"], inputs, conn)
|
1989
2026
|
|
1990
2027
|
if (
|
1991
2028
|
status["queue_name"] is not None
|
1992
2029
|
and wf_status == WorkflowStatusString.ENQUEUED.value
|
1993
2030
|
):
|
1994
|
-
self.
|
2031
|
+
self._enqueue(
|
1995
2032
|
status["workflow_uuid"],
|
1996
2033
|
status["queue_name"],
|
1997
2034
|
conn,
|
@@ -1999,6 +2036,14 @@ class SystemDatabase:
|
|
1999
2036
|
)
|
2000
2037
|
return wf_status, workflow_deadline_epoch_ms
|
2001
2038
|
|
2039
|
+
def check_connection(self) -> None:
|
2040
|
+
try:
|
2041
|
+
with self.engine.begin() as conn:
|
2042
|
+
conn.execute(sa.text("SELECT 1")).fetchall()
|
2043
|
+
except Exception as e:
|
2044
|
+
dbos_logger.error(f"Error connecting to the DBOS system database: {e}")
|
2045
|
+
raise
|
2046
|
+
|
2002
2047
|
|
2003
2048
|
def reset_system_database(postgres_db_url: sa.URL, sysdb_name: str) -> None:
|
2004
2049
|
try:
|
dbos/_utils.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
import importlib.metadata
|
2
2
|
import os
|
3
3
|
|
4
|
+
import psycopg
|
5
|
+
from sqlalchemy.exc import DBAPIError
|
6
|
+
|
4
7
|
INTERNAL_QUEUE_NAME = "_dbos_internal_queue"
|
5
8
|
|
6
9
|
request_id_header = "x-request-id"
|
@@ -15,3 +18,33 @@ class GlobalParams:
|
|
15
18
|
except importlib.metadata.PackageNotFoundError:
|
16
19
|
# If package is not installed or during development
|
17
20
|
dbos_version = "unknown"
|
21
|
+
|
22
|
+
|
23
|
+
def retriable_postgres_exception(e: DBAPIError) -> bool:
|
24
|
+
if e.connection_invalidated:
|
25
|
+
return True
|
26
|
+
if isinstance(e.orig, psycopg.OperationalError):
|
27
|
+
driver_error: psycopg.OperationalError = e.orig
|
28
|
+
pgcode = driver_error.sqlstate or ""
|
29
|
+
# Failure to establish connection
|
30
|
+
if "connection failed" in str(driver_error):
|
31
|
+
return True
|
32
|
+
# Error within database transaction
|
33
|
+
elif "server closed the connection unexpectedly" in str(driver_error):
|
34
|
+
return True
|
35
|
+
# Connection timeout
|
36
|
+
if isinstance(driver_error, psycopg.errors.ConnectionTimeout):
|
37
|
+
return True
|
38
|
+
# Insufficient resources
|
39
|
+
elif pgcode.startswith("53"):
|
40
|
+
return True
|
41
|
+
# Connection exception
|
42
|
+
elif pgcode.startswith("08"):
|
43
|
+
return True
|
44
|
+
# Operator intervention
|
45
|
+
elif pgcode.startswith("57"):
|
46
|
+
return True
|
47
|
+
else:
|
48
|
+
return False
|
49
|
+
else:
|
50
|
+
return False
|
dbos/_workflow_commands.py
CHANGED
@@ -103,16 +103,7 @@ def fork_workflow(
|
|
103
103
|
*,
|
104
104
|
application_version: Optional[str],
|
105
105
|
) -> str:
|
106
|
-
|
107
|
-
max_transactions = app_db.get_max_function_id(workflow_uuid) or 0
|
108
|
-
max_operations = sys_db.get_max_function_id(workflow_uuid) or 0
|
109
|
-
return max(max_transactions, max_operations)
|
110
|
-
|
111
|
-
max_function_id = get_max_function_id(workflow_id)
|
112
|
-
if max_function_id > 0 and start_step > max_function_id:
|
113
|
-
raise DBOSException(
|
114
|
-
f"Cannot fork workflow {workflow_id} from step {start_step}. The workflow has {max_function_id} steps."
|
115
|
-
)
|
106
|
+
|
116
107
|
ctx = get_local_dbos_context()
|
117
108
|
if ctx is not None and len(ctx.id_assigned_for_next_workflow) > 0:
|
118
109
|
forked_workflow_id = ctx.id_assigned_for_next_workflow
|
dbos/cli/cli.py
CHANGED
@@ -18,7 +18,7 @@ from dbos._debug import debug_workflow, parse_start_command
|
|
18
18
|
|
19
19
|
from .._app_db import ApplicationDatabase
|
20
20
|
from .._client import DBOSClient
|
21
|
-
from .._dbos_config import _is_valid_app_name, load_config
|
21
|
+
from .._dbos_config import _is_valid_app_name, is_valid_database_url, load_config
|
22
22
|
from .._docker_pg_helper import start_docker_pg, stop_docker_pg
|
23
23
|
from .._schemas.system_database import SystemSchema
|
24
24
|
from .._sys_db import SystemDatabase, reset_system_database
|
@@ -35,6 +35,7 @@ def _get_db_url(db_url: Optional[str]) -> str:
|
|
35
35
|
raise ValueError(
|
36
36
|
"Missing database URL: please set it using the --db-url flag or the DBOS_DATABASE_URL environment variable."
|
37
37
|
)
|
38
|
+
assert is_valid_database_url(database_url)
|
38
39
|
return database_url
|
39
40
|
|
40
41
|
|
@@ -1,24 +1,24 @@
|
|
1
|
-
dbos-1.
|
2
|
-
dbos-1.
|
3
|
-
dbos-1.
|
4
|
-
dbos-1.
|
1
|
+
dbos-1.2.0.dist-info/METADATA,sha256=I-2ce9trkyIdOk3YBcO0biggnP6qfW-qDMFrFqjTXiU,13265
|
2
|
+
dbos-1.2.0.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
|
3
|
+
dbos-1.2.0.dist-info/entry_points.txt,sha256=_QOQ3tVfEjtjBlr1jS4sHqHya9lI2aIEIWkz8dqYp14,58
|
4
|
+
dbos-1.2.0.dist-info/licenses/LICENSE,sha256=VGZit_a5-kdw9WT6fY5jxAWVwGQzgLFyPWrcVVUhVNU,1067
|
5
5
|
dbos/__init__.py,sha256=NssPCubaBxdiKarOWa-wViz1hdJSkmBGcpLX_gQ4NeA,891
|
6
6
|
dbos/__main__.py,sha256=G7Exn-MhGrVJVDbgNlpzhfh8WMX_72t3_oJaFT9Lmt8,653
|
7
|
-
dbos/_admin_server.py,sha256=
|
8
|
-
dbos/_app_db.py,sha256=
|
7
|
+
dbos/_admin_server.py,sha256=TWXi4drrzKFpKkUmEJpJkQBZxAtOalnhtYicEn2nDK0,10618
|
8
|
+
dbos/_app_db.py,sha256=0PKqpxJ3EbIaak3Wl0lNl3hXvhBfz4EEHaCw1bUOvIM,9937
|
9
9
|
dbos/_classproperty.py,sha256=f0X-_BySzn3yFDRKB2JpCbLYQ9tLwt1XftfshvY7CBs,626
|
10
|
-
dbos/_client.py,sha256
|
10
|
+
dbos/_client.py,sha256=BZ5mROMnHrWyMsMj8gYCfey79Zc4eZp1Srlrgel485o,14302
|
11
11
|
dbos/_conductor/conductor.py,sha256=o0IaZjwnZ2TOyHeP2H4iSX6UnXLXQ4uODvWAKD9hHMs,21703
|
12
12
|
dbos/_conductor/protocol.py,sha256=wgOFZxmS81bv0WCB9dAyg0s6QzldpzVKQDoSPeaX0Ws,6967
|
13
13
|
dbos/_context.py,sha256=5ajoWAmToAfzzmMLylnJZoL4Ny9rBwZWuG05sXadMIA,24798
|
14
|
-
dbos/_core.py,sha256=
|
14
|
+
dbos/_core.py,sha256=m2i9lsHjNKTi8BQyiSOUBrAVH5OvMoBswNZPRpMVIC0,48662
|
15
15
|
dbos/_croniter.py,sha256=XHAyUyibs_59sJQfSNWkP7rqQY6_XrlfuuCxk4jYqek,47559
|
16
|
-
dbos/_dbos.py,sha256=
|
17
|
-
dbos/_dbos_config.py,sha256=
|
16
|
+
dbos/_dbos.py,sha256=tby_y__7jWQ7O2j2Ws9W_7QKq25IrV54cvWgiPuZngU,47216
|
17
|
+
dbos/_dbos_config.py,sha256=JWVuPE_Ifyr-pYHFxclFalB_HZ8ETFCGNJzBHGpClXw,20347
|
18
18
|
dbos/_debug.py,sha256=MNlQVZ6TscGCRQeEEL0VE8Uignvr6dPeDDDefS3xgIE,1823
|
19
19
|
dbos/_docker_pg_helper.py,sha256=tLJXWqZ4S-ExcaPnxg_i6cVxL6ZxrYlZjaGsklY-s2I,6115
|
20
20
|
dbos/_error.py,sha256=q0OQJZTbR8FFHV9hEpAGpz9oWBT5L509zUhmyff7FJw,8500
|
21
|
-
dbos/_event_loop.py,sha256=
|
21
|
+
dbos/_event_loop.py,sha256=ts2T1_imfQjdu6hPs7-WZHui4DtmsZ2HUsPgIJ1GXZg,2335
|
22
22
|
dbos/_fastapi.py,sha256=m4SL3H9P-NBQ_ZrbFxAWMOqNyIi3HGEn2ODR7xAK038,3118
|
23
23
|
dbos/_flask.py,sha256=Npnakt-a3W5OykONFRkDRnumaDhTQmA0NPdUCGRYKXE,1652
|
24
24
|
dbos/_kafka.py,sha256=pz0xZ9F3X9Ky1k-VSbeF3tfPhP3UPr3lUUhUfE41__U,4198
|
@@ -38,7 +38,7 @@ dbos/_migrations/versions/d76646551a6c_workflow_queue.py,sha256=G942nophZ2uC2vc4
|
|
38
38
|
dbos/_migrations/versions/eab0cc1d9a14_job_queue.py,sha256=uvhFOtqbBreCePhAxZfIT0qCAI7BiZTou9wt6QnbY7c,1412
|
39
39
|
dbos/_migrations/versions/f4b9b32ba814_functionname_childid_op_outputs.py,sha256=m90Lc5YH0ZISSq1MyxND6oq3RZrZKrIqEsZtwJ1jWxA,1049
|
40
40
|
dbos/_outcome.py,sha256=EXxBg4jXCVJsByDQ1VOCIedmbeq_03S6d-p1vqQrLFU,6810
|
41
|
-
dbos/_queue.py,sha256=
|
41
|
+
dbos/_queue.py,sha256=oDQcydDwYM68U5KQKN6iZiSC-4LXye6KFmSJ7ohG048,3558
|
42
42
|
dbos/_recovery.py,sha256=jVMexjfCCNopzyn8gVQzJCmGJaP9G3C1EFaoCQ_Nh7g,2564
|
43
43
|
dbos/_registrations.py,sha256=CZt1ElqDjCT7hz6iyT-1av76Yu-iuwu_c9lozO87wvM,7303
|
44
44
|
dbos/_roles.py,sha256=iOsgmIAf1XVzxs3gYWdGRe1B880YfOw5fpU7Jwx8_A8,2271
|
@@ -47,7 +47,7 @@ dbos/_schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
dbos/_schemas/application_database.py,sha256=SypAS9l9EsaBHFn9FR8jmnqt01M74d9AF1AMa4m2hhI,1040
|
48
48
|
dbos/_schemas/system_database.py,sha256=3Z0L72bOgHnusK1hBaETWU9RfiLBP0QnS-fdu41i0yY,5835
|
49
49
|
dbos/_serialization.py,sha256=bWuwhXSQcGmiazvhJHA5gwhrRWxtmFmcCFQSDJnqqkU,3666
|
50
|
-
dbos/_sys_db.py,sha256=
|
50
|
+
dbos/_sys_db.py,sha256=dNb2xeidel6-YEApxFCN0TTJZNpYr6Wc8LdFvX3pEb4,85730
|
51
51
|
dbos/_templates/dbos-db-starter/README.md,sha256=GhxhBj42wjTt1fWEtwNriHbJuKb66Vzu89G4pxNHw2g,930
|
52
52
|
dbos/_templates/dbos-db-starter/__package/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
53
|
dbos/_templates/dbos-db-starter/__package/main.py.dbos,sha256=aQnBPSSQpkB8ERfhf7gB7P9tsU6OPKhZscfeh0yiaD8,2702
|
@@ -59,12 +59,12 @@ dbos/_templates/dbos-db-starter/migrations/script.py.mako,sha256=MEqL-2qATlST9TA
|
|
59
59
|
dbos/_templates/dbos-db-starter/migrations/versions/2024_07_31_180642_init.py,sha256=MpS7LGaJS0CpvsjhfDkp9EJqvMvVCjRPfUp4c0aE2ys,941
|
60
60
|
dbos/_templates/dbos-db-starter/start_postgres_docker.py,sha256=lQVLlYO5YkhGPEgPqwGc7Y8uDKse9HsWv5fynJEFJHM,1681
|
61
61
|
dbos/_tracer.py,sha256=yN6GRDKu_1p-EqtQLNarMocPfga2ZuqpzStzzSPYhzo,2732
|
62
|
-
dbos/_utils.py,sha256=
|
63
|
-
dbos/_workflow_commands.py,sha256=
|
62
|
+
dbos/_utils.py,sha256=uywq1QrjMwy17btjxW4bES49povlQwYwYbvKwMT6C2U,1575
|
63
|
+
dbos/_workflow_commands.py,sha256=UCpHWvCEXjVZtf5FNanFvtJpgUJDSI1EFBqQP0x_2A0,3346
|
64
64
|
dbos/cli/_github_init.py,sha256=Y_bDF9gfO2jB1id4FV5h1oIxEJRWyqVjhb7bNEa5nQ0,3224
|
65
65
|
dbos/cli/_template_init.py,sha256=7JBcpMqP1r2mfCnvWatu33z8ctEGHJarlZYKgB83cXE,2972
|
66
|
-
dbos/cli/cli.py,sha256=
|
66
|
+
dbos/cli/cli.py,sha256=EemOMqNpzSU2BQhAxV_e59pBRITDLwt49HF6W3uWBZg,20775
|
67
67
|
dbos/dbos-config.schema.json,sha256=CjaspeYmOkx6Ip_pcxtmfXJTn_YGdSx_0pcPBF7KZmo,6060
|
68
68
|
dbos/py.typed,sha256=QfzXT1Ktfk3Rj84akygc7_42z0lRpCq0Ilh8OXI6Zas,44
|
69
69
|
version/__init__.py,sha256=L4sNxecRuqdtSFdpUGX3TtBi9KL3k7YsZVIvv-fv9-A,1678
|
70
|
-
dbos-1.
|
70
|
+
dbos-1.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|