dbos 1.2.0a2__py3-none-any.whl → 1.2.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbos/_client.py +2 -1
- dbos/_core.py +11 -3
- dbos/_dbos.py +0 -2
- dbos/_dbos_config.py +1 -23
- dbos/_queue.py +6 -7
- dbos/_sys_db.py +169 -115
- dbos/_utils.py +33 -0
- {dbos-1.2.0a2.dist-info → dbos-1.2.0a5.dist-info}/METADATA +1 -1
- {dbos-1.2.0a2.dist-info → dbos-1.2.0a5.dist-info}/RECORD +12 -12
- {dbos-1.2.0a2.dist-info → dbos-1.2.0a5.dist-info}/WHEEL +0 -0
- {dbos-1.2.0a2.dist-info → dbos-1.2.0a5.dist-info}/entry_points.txt +0 -0
- {dbos-1.2.0a2.dist-info → dbos-1.2.0a5.dist-info}/licenses/LICENSE +0 -0
dbos/_client.py
CHANGED
@@ -109,6 +109,7 @@ class DBOSClient:
|
|
109
109
|
},
|
110
110
|
sys_db_name=system_database,
|
111
111
|
)
|
112
|
+
self._sys_db.check_connection()
|
112
113
|
self._app_db = ApplicationDatabase(
|
113
114
|
database_url=database_url,
|
114
115
|
engine_kwargs={
|
@@ -231,7 +232,7 @@ class DBOSClient:
|
|
231
232
|
"workflow_deadline_epoch_ms": None,
|
232
233
|
}
|
233
234
|
with self._sys_db.engine.begin() as conn:
|
234
|
-
self._sys_db.
|
235
|
+
self._sys_db._insert_workflow_status(
|
235
236
|
status, conn, max_recovery_attempts=None
|
236
237
|
)
|
237
238
|
self._sys_db.send(status["workflow_uuid"], 0, destination_id, message, topic)
|
dbos/_core.py
CHANGED
@@ -20,8 +20,10 @@ from typing import (
|
|
20
20
|
cast,
|
21
21
|
)
|
22
22
|
|
23
|
+
import psycopg
|
24
|
+
|
23
25
|
from dbos._outcome import Immediate, NoResult, Outcome, Pending
|
24
|
-
from dbos._utils import GlobalParams
|
26
|
+
from dbos._utils import GlobalParams, retriable_postgres_exception
|
25
27
|
|
26
28
|
from ._app_db import ApplicationDatabase, TransactionResultInternal
|
27
29
|
|
@@ -931,12 +933,18 @@ def decorate_transaction(
|
|
931
933
|
)
|
932
934
|
break
|
933
935
|
except DBAPIError as dbapi_error:
|
934
|
-
|
936
|
+
driver_error = cast(
|
937
|
+
Optional[psycopg.OperationalError], dbapi_error.orig
|
938
|
+
)
|
939
|
+
if retriable_postgres_exception(dbapi_error) or (
|
940
|
+
driver_error is not None
|
941
|
+
and driver_error.sqlstate == "40001"
|
942
|
+
):
|
935
943
|
# Retry on serialization failure
|
936
944
|
span = ctx.get_current_span()
|
937
945
|
if span:
|
938
946
|
span.add_event(
|
939
|
-
"Transaction
|
947
|
+
"Transaction Failure",
|
940
948
|
{"retry_wait_seconds": retry_wait_seconds},
|
941
949
|
)
|
942
950
|
time.sleep(retry_wait_seconds)
|
dbos/_dbos.py
CHANGED
@@ -90,7 +90,6 @@ from ._context import (
|
|
90
90
|
from ._dbos_config import (
|
91
91
|
ConfigFile,
|
92
92
|
DBOSConfig,
|
93
|
-
check_config_consistency,
|
94
93
|
overwrite_config,
|
95
94
|
process_config,
|
96
95
|
set_env_vars,
|
@@ -324,7 +323,6 @@ class DBOS:
|
|
324
323
|
unvalidated_config = translate_dbos_config_to_config_file(config)
|
325
324
|
if os.environ.get("DBOS__CLOUD") == "true":
|
326
325
|
unvalidated_config = overwrite_config(unvalidated_config)
|
327
|
-
check_config_consistency(name=unvalidated_config["name"])
|
328
326
|
|
329
327
|
if unvalidated_config is not None:
|
330
328
|
self._config: ConfigFile = process_config(data=unvalidated_config)
|
dbos/_dbos_config.py
CHANGED
@@ -407,6 +407,7 @@ def configure_db_engine_parameters(
|
|
407
407
|
"pool_timeout": 30,
|
408
408
|
"max_overflow": 0,
|
409
409
|
"pool_size": 20,
|
410
|
+
"pool_pre_ping": True,
|
410
411
|
}
|
411
412
|
# If user-provided kwargs are present, use them instead
|
412
413
|
user_kwargs = data.get("db_engine_kwargs")
|
@@ -529,26 +530,3 @@ def overwrite_config(provided_config: ConfigFile) -> ConfigFile:
|
|
529
530
|
del provided_config["env"]
|
530
531
|
|
531
532
|
return provided_config
|
532
|
-
|
533
|
-
|
534
|
-
def check_config_consistency(
|
535
|
-
*,
|
536
|
-
name: str,
|
537
|
-
config_file_path: str = DBOS_CONFIG_PATH,
|
538
|
-
) -> None:
|
539
|
-
# First load the config file and check whether it is present
|
540
|
-
try:
|
541
|
-
config = load_config(config_file_path, silent=True, run_process_config=False)
|
542
|
-
except FileNotFoundError:
|
543
|
-
dbos_logger.debug(
|
544
|
-
f"No configuration file {config_file_path} found. Skipping consistency check with provided config."
|
545
|
-
)
|
546
|
-
return
|
547
|
-
except Exception as e:
|
548
|
-
raise e
|
549
|
-
|
550
|
-
# Check the name
|
551
|
-
if name != config["name"]:
|
552
|
-
raise DBOSInitializationError(
|
553
|
-
f"Provided app name '{name}' does not match the app name '{config['name']}' in {config_file_path}."
|
554
|
-
)
|
dbos/_queue.py
CHANGED
@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Any, Callable, Coroutine, Optional, TypedDict
|
|
5
5
|
from psycopg import errors
|
6
6
|
from sqlalchemy.exc import OperationalError
|
7
7
|
|
8
|
+
from dbos._logger import dbos_logger
|
8
9
|
from dbos._utils import GlobalParams
|
9
10
|
|
10
11
|
from ._core import P, R, execute_workflow_by_id, start_workflow, start_workflow_async
|
@@ -56,6 +57,8 @@ class Queue:
|
|
56
57
|
from ._dbos import _get_or_create_dbos_registry
|
57
58
|
|
58
59
|
registry = _get_or_create_dbos_registry()
|
60
|
+
if self.name in registry.queue_info_map:
|
61
|
+
dbos_logger.warning(f"Queue {name} has already been declared")
|
59
62
|
registry.queue_info_map[self.name] = self
|
60
63
|
|
61
64
|
def enqueue(
|
@@ -95,12 +98,8 @@ def queue_thread(stop_event: threading.Event, dbos: "DBOS") -> None:
|
|
95
98
|
if not isinstance(
|
96
99
|
e.orig, (errors.SerializationFailure, errors.LockNotAvailable)
|
97
100
|
):
|
98
|
-
dbos.logger.warning(
|
99
|
-
|
100
|
-
)
|
101
|
-
except Exception:
|
101
|
+
dbos.logger.warning(f"Exception encountered in queue thread: {e}")
|
102
|
+
except Exception as e:
|
102
103
|
if not stop_event.is_set():
|
103
104
|
# Only print the error if the thread is not stopping
|
104
|
-
dbos.logger.warning(
|
105
|
-
f"Exception encountered in queue thread: {traceback.format_exc()}"
|
106
|
-
)
|
105
|
+
dbos.logger.warning(f"Exception encountered in queue thread: {e}")
|
dbos/_sys_db.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
import datetime
|
2
|
+
import functools
|
2
3
|
import json
|
3
4
|
import logging
|
4
5
|
import os
|
6
|
+
import random
|
5
7
|
import re
|
6
8
|
import threading
|
7
9
|
import time
|
@@ -17,6 +19,7 @@ from typing import (
|
|
17
19
|
Sequence,
|
18
20
|
TypedDict,
|
19
21
|
TypeVar,
|
22
|
+
cast,
|
20
23
|
)
|
21
24
|
|
22
25
|
import psycopg
|
@@ -27,7 +30,7 @@ from alembic.config import Config
|
|
27
30
|
from sqlalchemy.exc import DBAPIError
|
28
31
|
from sqlalchemy.sql import func
|
29
32
|
|
30
|
-
from dbos._utils import INTERNAL_QUEUE_NAME
|
33
|
+
from dbos._utils import INTERNAL_QUEUE_NAME, retriable_postgres_exception
|
31
34
|
|
32
35
|
from . import _serialization
|
33
36
|
from ._context import get_local_dbos_context
|
@@ -268,6 +271,51 @@ class ThreadSafeConditionDict:
|
|
268
271
|
dbos_logger.warning(f"Key {key} not found in condition dictionary.")
|
269
272
|
|
270
273
|
|
274
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
275
|
+
|
276
|
+
|
277
|
+
def db_retry(
|
278
|
+
initial_backoff: float = 1.0, max_backoff: float = 60.0
|
279
|
+
) -> Callable[[F], F]:
|
280
|
+
"""
|
281
|
+
If a workflow encounters a database connection issue while performing an operation,
|
282
|
+
block the workflow and retry the operation until it reconnects and succeeds.
|
283
|
+
|
284
|
+
In other words, if DBOS loses its database connection, everything pauses until the connection is recovered,
|
285
|
+
trading off availability for correctness.
|
286
|
+
"""
|
287
|
+
|
288
|
+
def decorator(func: F) -> F:
|
289
|
+
@functools.wraps(func)
|
290
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
291
|
+
retries: int = 0
|
292
|
+
backoff: float = initial_backoff
|
293
|
+
while True:
|
294
|
+
try:
|
295
|
+
return func(*args, **kwargs)
|
296
|
+
except DBAPIError as e:
|
297
|
+
|
298
|
+
# Determine if this is a retriable exception
|
299
|
+
if not retriable_postgres_exception(e):
|
300
|
+
raise
|
301
|
+
|
302
|
+
retries += 1
|
303
|
+
# Calculate backoff with jitter
|
304
|
+
actual_backoff: float = backoff * (0.5 + random.random())
|
305
|
+
dbos_logger.warning(
|
306
|
+
f"Database connection failed: {str(e)}. "
|
307
|
+
f"Retrying in {actual_backoff:.2f}s (attempt {retries})"
|
308
|
+
)
|
309
|
+
# Sleep with backoff
|
310
|
+
time.sleep(actual_backoff)
|
311
|
+
# Increase backoff for next attempt (exponential)
|
312
|
+
backoff = min(backoff * 2, max_backoff)
|
313
|
+
|
314
|
+
return cast(F, wrapper)
|
315
|
+
|
316
|
+
return decorator
|
317
|
+
|
318
|
+
|
271
319
|
class SystemDatabase:
|
272
320
|
|
273
321
|
def __init__(
|
@@ -365,7 +413,7 @@ class SystemDatabase:
|
|
365
413
|
self.notification_conn.close()
|
366
414
|
self.engine.dispose()
|
367
415
|
|
368
|
-
def
|
416
|
+
def _insert_workflow_status(
|
369
417
|
self,
|
370
418
|
status: WorkflowStatusInternal,
|
371
419
|
conn: sa.Connection,
|
@@ -474,53 +522,46 @@ class SystemDatabase:
|
|
474
522
|
|
475
523
|
return wf_status, workflow_deadline_epoch_ms
|
476
524
|
|
525
|
+
@db_retry()
|
477
526
|
def update_workflow_status(
|
478
527
|
self,
|
479
528
|
status: WorkflowStatusInternal,
|
480
|
-
*,
|
481
|
-
conn: Optional[sa.Connection] = None,
|
482
529
|
) -> None:
|
483
530
|
if self._debug_mode:
|
484
531
|
raise Exception("called update_workflow_status in debug mode")
|
485
532
|
wf_status: WorkflowStatuses = status["status"]
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
status=status["status"],
|
492
|
-
name=status["name"],
|
493
|
-
class_name=status["class_name"],
|
494
|
-
config_name=status["config_name"],
|
495
|
-
output=status["output"],
|
496
|
-
error=status["error"],
|
497
|
-
executor_id=status["executor_id"],
|
498
|
-
application_version=status["app_version"],
|
499
|
-
application_id=status["app_id"],
|
500
|
-
authenticated_user=status["authenticated_user"],
|
501
|
-
authenticated_roles=status["authenticated_roles"],
|
502
|
-
assumed_role=status["assumed_role"],
|
503
|
-
queue_name=status["queue_name"],
|
504
|
-
recovery_attempts=(
|
505
|
-
1 if wf_status != WorkflowStatusString.ENQUEUED.value else 0
|
506
|
-
),
|
507
|
-
)
|
508
|
-
.on_conflict_do_update(
|
509
|
-
index_elements=["workflow_uuid"],
|
510
|
-
set_=dict(
|
533
|
+
with self.engine.begin() as c:
|
534
|
+
c.execute(
|
535
|
+
pg.insert(SystemSchema.workflow_status)
|
536
|
+
.values(
|
537
|
+
workflow_uuid=status["workflow_uuid"],
|
511
538
|
status=status["status"],
|
539
|
+
name=status["name"],
|
540
|
+
class_name=status["class_name"],
|
541
|
+
config_name=status["config_name"],
|
512
542
|
output=status["output"],
|
513
543
|
error=status["error"],
|
514
|
-
|
515
|
-
|
544
|
+
executor_id=status["executor_id"],
|
545
|
+
application_version=status["app_version"],
|
546
|
+
application_id=status["app_id"],
|
547
|
+
authenticated_user=status["authenticated_user"],
|
548
|
+
authenticated_roles=status["authenticated_roles"],
|
549
|
+
assumed_role=status["assumed_role"],
|
550
|
+
queue_name=status["queue_name"],
|
551
|
+
recovery_attempts=(
|
552
|
+
1 if wf_status != WorkflowStatusString.ENQUEUED.value else 0
|
553
|
+
),
|
554
|
+
)
|
555
|
+
.on_conflict_do_update(
|
556
|
+
index_elements=["workflow_uuid"],
|
557
|
+
set_=dict(
|
558
|
+
status=status["status"],
|
559
|
+
output=status["output"],
|
560
|
+
error=status["error"],
|
561
|
+
updated_at=func.extract("epoch", func.now()) * 1000,
|
562
|
+
),
|
563
|
+
)
|
516
564
|
)
|
517
|
-
)
|
518
|
-
|
519
|
-
if conn is not None:
|
520
|
-
conn.execute(cmd)
|
521
|
-
else:
|
522
|
-
with self.engine.begin() as c:
|
523
|
-
c.execute(cmd)
|
524
565
|
|
525
566
|
def cancel_workflow(
|
526
567
|
self,
|
@@ -686,6 +727,7 @@ class SystemDatabase:
|
|
686
727
|
)
|
687
728
|
return forked_workflow_id
|
688
729
|
|
730
|
+
@db_retry()
|
689
731
|
def get_workflow_status(
|
690
732
|
self, workflow_uuid: str
|
691
733
|
) -> Optional[WorkflowStatusInternal]:
|
@@ -735,6 +777,7 @@ class SystemDatabase:
|
|
735
777
|
}
|
736
778
|
return status
|
737
779
|
|
780
|
+
@db_retry()
|
738
781
|
def await_workflow_result(self, workflow_id: str) -> Any:
|
739
782
|
while True:
|
740
783
|
with self.engine.begin() as c:
|
@@ -761,7 +804,7 @@ class SystemDatabase:
|
|
761
804
|
pass # CB: I guess we're assuming the WF will show up eventually.
|
762
805
|
time.sleep(1)
|
763
806
|
|
764
|
-
def
|
807
|
+
def _update_workflow_inputs(
|
765
808
|
self, workflow_uuid: str, inputs: str, conn: sa.Connection
|
766
809
|
) -> None:
|
767
810
|
if self._debug_mode:
|
@@ -791,6 +834,7 @@ class SystemDatabase:
|
|
791
834
|
|
792
835
|
return
|
793
836
|
|
837
|
+
@db_retry()
|
794
838
|
def get_workflow_inputs(
|
795
839
|
self, workflow_uuid: str
|
796
840
|
) -> Optional[_serialization.WorkflowInputs]:
|
@@ -1084,8 +1128,8 @@ class SystemDatabase:
|
|
1084
1128
|
for row in rows
|
1085
1129
|
]
|
1086
1130
|
|
1087
|
-
def
|
1088
|
-
self, result: OperationResultInternal, conn:
|
1131
|
+
def _record_operation_result_txn(
|
1132
|
+
self, result: OperationResultInternal, conn: sa.Connection
|
1089
1133
|
) -> None:
|
1090
1134
|
if self._debug_mode:
|
1091
1135
|
raise Exception("called record_operation_result in debug mode")
|
@@ -1100,16 +1144,18 @@ class SystemDatabase:
|
|
1100
1144
|
error=error,
|
1101
1145
|
)
|
1102
1146
|
try:
|
1103
|
-
|
1104
|
-
conn.execute(sql)
|
1105
|
-
else:
|
1106
|
-
with self.engine.begin() as c:
|
1107
|
-
c.execute(sql)
|
1147
|
+
conn.execute(sql)
|
1108
1148
|
except DBAPIError as dbapi_error:
|
1109
1149
|
if dbapi_error.orig.sqlstate == "23505": # type: ignore
|
1110
1150
|
raise DBOSWorkflowConflictIDError(result["workflow_uuid"])
|
1111
1151
|
raise
|
1112
1152
|
|
1153
|
+
@db_retry()
|
1154
|
+
def record_operation_result(self, result: OperationResultInternal) -> None:
|
1155
|
+
with self.engine.begin() as c:
|
1156
|
+
self._record_operation_result_txn(result, c)
|
1157
|
+
|
1158
|
+
@db_retry()
|
1113
1159
|
def record_get_result(
|
1114
1160
|
self, result_workflow_id: str, output: Optional[str], error: Optional[str]
|
1115
1161
|
) -> None:
|
@@ -1135,6 +1181,7 @@ class SystemDatabase:
|
|
1135
1181
|
with self.engine.begin() as c:
|
1136
1182
|
c.execute(sql)
|
1137
1183
|
|
1184
|
+
@db_retry()
|
1138
1185
|
def record_child_workflow(
|
1139
1186
|
self,
|
1140
1187
|
parentUUID: str,
|
@@ -1159,13 +1206,12 @@ class SystemDatabase:
|
|
1159
1206
|
raise DBOSWorkflowConflictIDError(parentUUID)
|
1160
1207
|
raise
|
1161
1208
|
|
1162
|
-
def
|
1209
|
+
def _check_operation_execution_txn(
|
1163
1210
|
self,
|
1164
1211
|
workflow_id: str,
|
1165
1212
|
function_id: int,
|
1166
1213
|
function_name: str,
|
1167
|
-
|
1168
|
-
conn: Optional[sa.Connection] = None,
|
1214
|
+
conn: sa.Connection,
|
1169
1215
|
) -> Optional[RecordedResult]:
|
1170
1216
|
# First query: Retrieve the workflow status
|
1171
1217
|
workflow_status_sql = sa.select(
|
@@ -1183,13 +1229,8 @@ class SystemDatabase:
|
|
1183
1229
|
)
|
1184
1230
|
|
1185
1231
|
# Execute both queries
|
1186
|
-
|
1187
|
-
|
1188
|
-
operation_output_rows = conn.execute(operation_output_sql).all()
|
1189
|
-
else:
|
1190
|
-
with self.engine.begin() as c:
|
1191
|
-
workflow_status_rows = c.execute(workflow_status_sql).all()
|
1192
|
-
operation_output_rows = c.execute(operation_output_sql).all()
|
1232
|
+
workflow_status_rows = conn.execute(workflow_status_sql).all()
|
1233
|
+
operation_output_rows = conn.execute(operation_output_sql).all()
|
1193
1234
|
|
1194
1235
|
# Check if the workflow exists
|
1195
1236
|
assert (
|
@@ -1231,6 +1272,16 @@ class SystemDatabase:
|
|
1231
1272
|
}
|
1232
1273
|
return result
|
1233
1274
|
|
1275
|
+
@db_retry()
|
1276
|
+
def check_operation_execution(
|
1277
|
+
self, workflow_id: str, function_id: int, function_name: str
|
1278
|
+
) -> Optional[RecordedResult]:
|
1279
|
+
with self.engine.begin() as c:
|
1280
|
+
return self._check_operation_execution_txn(
|
1281
|
+
workflow_id, function_id, function_name, c
|
1282
|
+
)
|
1283
|
+
|
1284
|
+
@db_retry()
|
1234
1285
|
def check_child_workflow(
|
1235
1286
|
self, workflow_uuid: str, function_id: int
|
1236
1287
|
) -> Optional[str]:
|
@@ -1248,6 +1299,7 @@ class SystemDatabase:
|
|
1248
1299
|
return None
|
1249
1300
|
return str(row[0])
|
1250
1301
|
|
1302
|
+
@db_retry()
|
1251
1303
|
def send(
|
1252
1304
|
self,
|
1253
1305
|
workflow_uuid: str,
|
@@ -1259,7 +1311,7 @@ class SystemDatabase:
|
|
1259
1311
|
function_name = "DBOS.send"
|
1260
1312
|
topic = topic if topic is not None else _dbos_null_topic
|
1261
1313
|
with self.engine.begin() as c:
|
1262
|
-
recorded_output = self.
|
1314
|
+
recorded_output = self._check_operation_execution_txn(
|
1263
1315
|
workflow_uuid, function_id, function_name, conn=c
|
1264
1316
|
)
|
1265
1317
|
if self._debug_mode and recorded_output is None:
|
@@ -1297,8 +1349,9 @@ class SystemDatabase:
|
|
1297
1349
|
"output": None,
|
1298
1350
|
"error": None,
|
1299
1351
|
}
|
1300
|
-
self.
|
1352
|
+
self._record_operation_result_txn(output, conn=c)
|
1301
1353
|
|
1354
|
+
@db_retry()
|
1302
1355
|
def recv(
|
1303
1356
|
self,
|
1304
1357
|
workflow_uuid: str,
|
@@ -1391,7 +1444,7 @@ class SystemDatabase:
|
|
1391
1444
|
message: Any = None
|
1392
1445
|
if len(rows) > 0:
|
1393
1446
|
message = _serialization.deserialize(rows[0][0])
|
1394
|
-
self.
|
1447
|
+
self._record_operation_result_txn(
|
1395
1448
|
{
|
1396
1449
|
"workflow_uuid": workflow_uuid,
|
1397
1450
|
"function_id": function_id,
|
@@ -1455,13 +1508,14 @@ class SystemDatabase:
|
|
1455
1508
|
dbos_logger.error(f"Unknown channel: {channel}")
|
1456
1509
|
except Exception as e:
|
1457
1510
|
if self._run_background_processes:
|
1458
|
-
dbos_logger.
|
1511
|
+
dbos_logger.warning(f"Notification listener error: {e}")
|
1459
1512
|
time.sleep(1)
|
1460
1513
|
# Then the loop will try to reconnect and restart the listener
|
1461
1514
|
finally:
|
1462
1515
|
if self.notification_conn is not None:
|
1463
1516
|
self.notification_conn.close()
|
1464
1517
|
|
1518
|
+
@db_retry()
|
1465
1519
|
def sleep(
|
1466
1520
|
self,
|
1467
1521
|
workflow_uuid: str,
|
@@ -1501,6 +1555,7 @@ class SystemDatabase:
|
|
1501
1555
|
time.sleep(duration)
|
1502
1556
|
return duration
|
1503
1557
|
|
1558
|
+
@db_retry()
|
1504
1559
|
def set_event(
|
1505
1560
|
self,
|
1506
1561
|
workflow_uuid: str,
|
@@ -1510,7 +1565,7 @@ class SystemDatabase:
|
|
1510
1565
|
) -> None:
|
1511
1566
|
function_name = "DBOS.setEvent"
|
1512
1567
|
with self.engine.begin() as c:
|
1513
|
-
recorded_output = self.
|
1568
|
+
recorded_output = self._check_operation_execution_txn(
|
1514
1569
|
workflow_uuid, function_id, function_name, conn=c
|
1515
1570
|
)
|
1516
1571
|
if self._debug_mode and recorded_output is None:
|
@@ -1542,8 +1597,9 @@ class SystemDatabase:
|
|
1542
1597
|
"output": None,
|
1543
1598
|
"error": None,
|
1544
1599
|
}
|
1545
|
-
self.
|
1600
|
+
self._record_operation_result_txn(output, conn=c)
|
1546
1601
|
|
1602
|
+
@db_retry()
|
1547
1603
|
def get_event(
|
1548
1604
|
self,
|
1549
1605
|
target_uuid: str,
|
@@ -1634,7 +1690,7 @@ class SystemDatabase:
|
|
1634
1690
|
)
|
1635
1691
|
return value
|
1636
1692
|
|
1637
|
-
def
|
1693
|
+
def _enqueue(
|
1638
1694
|
self,
|
1639
1695
|
workflow_id: str,
|
1640
1696
|
queue_name: str,
|
@@ -1710,13 +1766,8 @@ class SystemDatabase:
|
|
1710
1766
|
if num_recent_queries >= queue.limiter["limit"]:
|
1711
1767
|
return []
|
1712
1768
|
|
1713
|
-
#
|
1714
|
-
|
1715
|
-
# functions, else select all of them.
|
1716
|
-
|
1717
|
-
# First lets figure out how many tasks are eligible for dequeue.
|
1718
|
-
# This means figuring out how many unstarted tasks are within the local and global concurrency limits
|
1719
|
-
running_tasks_query = (
|
1769
|
+
# Count how many workflows on this queue are currently PENDING both locally and globally.
|
1770
|
+
pending_tasks_query = (
|
1720
1771
|
sa.select(
|
1721
1772
|
SystemSchema.workflow_status.c.executor_id,
|
1722
1773
|
sa.func.count().label("task_count"),
|
@@ -1730,41 +1781,37 @@ class SystemDatabase:
|
|
1730
1781
|
)
|
1731
1782
|
.where(SystemSchema.workflow_queue.c.queue_name == queue.name)
|
1732
1783
|
.where(
|
1733
|
-
SystemSchema.
|
1734
|
-
|
1735
|
-
) # Task is started
|
1736
|
-
)
|
1737
|
-
.where(
|
1738
|
-
SystemSchema.workflow_queue.c.completed_at_epoch_ms.is_(
|
1739
|
-
None
|
1740
|
-
) # Task is not completed.
|
1784
|
+
SystemSchema.workflow_status.c.status
|
1785
|
+
== WorkflowStatusString.PENDING.value
|
1741
1786
|
)
|
1742
1787
|
.group_by(SystemSchema.workflow_status.c.executor_id)
|
1743
1788
|
)
|
1744
|
-
|
1745
|
-
|
1746
|
-
|
1747
|
-
executor_id, 0
|
1748
|
-
) # Get count for current executor
|
1789
|
+
pending_workflows = c.execute(pending_tasks_query).fetchall()
|
1790
|
+
pending_workflows_dict = {row[0]: row[1] for row in pending_workflows}
|
1791
|
+
local_pending_workflows = pending_workflows_dict.get(executor_id, 0)
|
1749
1792
|
|
1793
|
+
# Compute max_tasks, the number of workflows that can be dequeued given local and global concurrency limits,
|
1750
1794
|
max_tasks = float("inf")
|
1751
1795
|
if queue.worker_concurrency is not None:
|
1752
|
-
|
1753
|
-
|
1754
|
-
|
1796
|
+
# Print a warning if the local concurrency limit is violated
|
1797
|
+
if local_pending_workflows > queue.worker_concurrency:
|
1798
|
+
dbos_logger.warning(
|
1799
|
+
f"The number of local pending workflows ({local_pending_workflows}) on queue {queue.name} exceeds the local concurrency limit ({queue.worker_concurrency})"
|
1800
|
+
)
|
1801
|
+
max_tasks = max(0, queue.worker_concurrency - local_pending_workflows)
|
1802
|
+
|
1755
1803
|
if queue.concurrency is not None:
|
1756
|
-
|
1757
|
-
#
|
1758
|
-
|
1759
|
-
if total_running_tasks > queue.concurrency:
|
1804
|
+
global_pending_workflows = sum(pending_workflows_dict.values())
|
1805
|
+
# Print a warning if the global concurrency limit is violated
|
1806
|
+
if global_pending_workflows > queue.concurrency:
|
1760
1807
|
dbos_logger.warning(
|
1761
|
-
f"
|
1808
|
+
f"The total number of pending workflows ({global_pending_workflows}) on queue {queue.name} exceeds the global concurrency limit ({queue.concurrency})"
|
1762
1809
|
)
|
1763
|
-
available_tasks = max(0, queue.concurrency -
|
1810
|
+
available_tasks = max(0, queue.concurrency - global_pending_workflows)
|
1764
1811
|
max_tasks = min(max_tasks, available_tasks)
|
1765
1812
|
|
1766
1813
|
# Retrieve the first max_tasks workflows in the queue.
|
1767
|
-
# Only retrieve workflows of the
|
1814
|
+
# Only retrieve workflows of the local version (or without version set)
|
1768
1815
|
query = (
|
1769
1816
|
sa.select(
|
1770
1817
|
SystemSchema.workflow_queue.c.workflow_uuid,
|
@@ -1777,8 +1824,10 @@ class SystemDatabase:
|
|
1777
1824
|
)
|
1778
1825
|
)
|
1779
1826
|
.where(SystemSchema.workflow_queue.c.queue_name == queue.name)
|
1780
|
-
.where(
|
1781
|
-
|
1827
|
+
.where(
|
1828
|
+
SystemSchema.workflow_status.c.status
|
1829
|
+
== WorkflowStatusString.ENQUEUED.value
|
1830
|
+
)
|
1782
1831
|
.where(
|
1783
1832
|
sa.or_(
|
1784
1833
|
SystemSchema.workflow_status.c.application_version
|
@@ -1807,20 +1856,16 @@ class SystemDatabase:
|
|
1807
1856
|
ret_ids: list[str] = []
|
1808
1857
|
|
1809
1858
|
for id in dequeued_ids:
|
1810
|
-
# If we have a limiter, stop
|
1811
|
-
# of
|
1859
|
+
# If we have a limiter, stop dequeueing workflows when the number
|
1860
|
+
# of workflows started this period exceeds the limit.
|
1812
1861
|
if queue.limiter is not None:
|
1813
1862
|
if len(ret_ids) + num_recent_queries >= queue.limiter["limit"]:
|
1814
1863
|
break
|
1815
1864
|
|
1816
|
-
# To start a
|
1817
|
-
|
1865
|
+
# To start a workflow, first set its status to PENDING and update its executor ID
|
1866
|
+
c.execute(
|
1818
1867
|
SystemSchema.workflow_status.update()
|
1819
1868
|
.where(SystemSchema.workflow_status.c.workflow_uuid == id)
|
1820
|
-
.where(
|
1821
|
-
SystemSchema.workflow_status.c.status
|
1822
|
-
== WorkflowStatusString.ENQUEUED.value
|
1823
|
-
)
|
1824
1869
|
.values(
|
1825
1870
|
status=WorkflowStatusString.PENDING.value,
|
1826
1871
|
application_version=app_version,
|
@@ -1843,16 +1888,15 @@ class SystemDatabase:
|
|
1843
1888
|
),
|
1844
1889
|
)
|
1845
1890
|
)
|
1846
|
-
|
1847
|
-
|
1848
|
-
|
1849
|
-
|
1850
|
-
|
1851
|
-
|
1852
|
-
|
1853
|
-
ret_ids.append(id)
|
1891
|
+
# Then give it a start time
|
1892
|
+
c.execute(
|
1893
|
+
SystemSchema.workflow_queue.update()
|
1894
|
+
.where(SystemSchema.workflow_queue.c.workflow_uuid == id)
|
1895
|
+
.values(started_at_epoch_ms=start_time_ms)
|
1896
|
+
)
|
1897
|
+
ret_ids.append(id)
|
1854
1898
|
|
1855
|
-
# If we have a limiter, garbage-collect all completed
|
1899
|
+
# If we have a limiter, garbage-collect all completed workflows started
|
1856
1900
|
# before the period. If there's no limiter, there's no need--they were
|
1857
1901
|
# deleted on completion.
|
1858
1902
|
if queue.limiter is not None:
|
@@ -1869,6 +1913,7 @@ class SystemDatabase:
|
|
1869
1913
|
# Return the IDs of all functions we started
|
1870
1914
|
return ret_ids
|
1871
1915
|
|
1916
|
+
@db_retry()
|
1872
1917
|
def remove_from_queue(self, workflow_id: str, queue: "Queue") -> None:
|
1873
1918
|
if self._debug_mode:
|
1874
1919
|
raise Exception("called remove_from_queue in debug mode")
|
@@ -1957,6 +2002,7 @@ class SystemDatabase:
|
|
1957
2002
|
)
|
1958
2003
|
return result
|
1959
2004
|
|
2005
|
+
@db_retry()
|
1960
2006
|
def init_workflow(
|
1961
2007
|
self,
|
1962
2008
|
status: WorkflowStatusInternal,
|
@@ -1969,17 +2015,17 @@ class SystemDatabase:
|
|
1969
2015
|
Synchronously record the status and inputs for workflows in a single transaction
|
1970
2016
|
"""
|
1971
2017
|
with self.engine.begin() as conn:
|
1972
|
-
wf_status, workflow_deadline_epoch_ms = self.
|
2018
|
+
wf_status, workflow_deadline_epoch_ms = self._insert_workflow_status(
|
1973
2019
|
status, conn, max_recovery_attempts=max_recovery_attempts
|
1974
2020
|
)
|
1975
2021
|
# TODO: Modify the inputs if they were changed by `update_workflow_inputs`
|
1976
|
-
self.
|
2022
|
+
self._update_workflow_inputs(status["workflow_uuid"], inputs, conn)
|
1977
2023
|
|
1978
2024
|
if (
|
1979
2025
|
status["queue_name"] is not None
|
1980
2026
|
and wf_status == WorkflowStatusString.ENQUEUED.value
|
1981
2027
|
):
|
1982
|
-
self.
|
2028
|
+
self._enqueue(
|
1983
2029
|
status["workflow_uuid"],
|
1984
2030
|
status["queue_name"],
|
1985
2031
|
conn,
|
@@ -1987,6 +2033,14 @@ class SystemDatabase:
|
|
1987
2033
|
)
|
1988
2034
|
return wf_status, workflow_deadline_epoch_ms
|
1989
2035
|
|
2036
|
+
def check_connection(self) -> None:
|
2037
|
+
try:
|
2038
|
+
with self.engine.begin() as conn:
|
2039
|
+
conn.execute(sa.text("SELECT 1")).fetchall()
|
2040
|
+
except Exception as e:
|
2041
|
+
dbos_logger.error(f"Error connecting to the DBOS system database: {e}")
|
2042
|
+
raise
|
2043
|
+
|
1990
2044
|
|
1991
2045
|
def reset_system_database(postgres_db_url: sa.URL, sysdb_name: str) -> None:
|
1992
2046
|
try:
|
dbos/_utils.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
import importlib.metadata
|
2
2
|
import os
|
3
3
|
|
4
|
+
import psycopg
|
5
|
+
from sqlalchemy.exc import DBAPIError
|
6
|
+
|
4
7
|
INTERNAL_QUEUE_NAME = "_dbos_internal_queue"
|
5
8
|
|
6
9
|
request_id_header = "x-request-id"
|
@@ -15,3 +18,33 @@ class GlobalParams:
|
|
15
18
|
except importlib.metadata.PackageNotFoundError:
|
16
19
|
# If package is not installed or during development
|
17
20
|
dbos_version = "unknown"
|
21
|
+
|
22
|
+
|
23
|
+
def retriable_postgres_exception(e: DBAPIError) -> bool:
|
24
|
+
if e.connection_invalidated:
|
25
|
+
return True
|
26
|
+
if isinstance(e.orig, psycopg.OperationalError):
|
27
|
+
driver_error: psycopg.OperationalError = e.orig
|
28
|
+
pgcode = driver_error.sqlstate or ""
|
29
|
+
# Failure to establish connection
|
30
|
+
if "connection failed" in str(driver_error):
|
31
|
+
return True
|
32
|
+
# Error within database transaction
|
33
|
+
elif "server closed the connection unexpectedly" in str(driver_error):
|
34
|
+
return True
|
35
|
+
# Connection timeout
|
36
|
+
if isinstance(driver_error, psycopg.errors.ConnectionTimeout):
|
37
|
+
return True
|
38
|
+
# Insufficient resources
|
39
|
+
elif pgcode.startswith("53"):
|
40
|
+
return True
|
41
|
+
# Connection exception
|
42
|
+
elif pgcode.startswith("08"):
|
43
|
+
return True
|
44
|
+
# Operator intervention
|
45
|
+
elif pgcode.startswith("57"):
|
46
|
+
return True
|
47
|
+
else:
|
48
|
+
return False
|
49
|
+
else:
|
50
|
+
return False
|
@@ -1,20 +1,20 @@
|
|
1
|
-
dbos-1.2.
|
2
|
-
dbos-1.2.
|
3
|
-
dbos-1.2.
|
4
|
-
dbos-1.2.
|
1
|
+
dbos-1.2.0a5.dist-info/METADATA,sha256=PeZezLDhF3k-2FR5-9qCP0_BVWuEVSvo41QucKSXD8o,13267
|
2
|
+
dbos-1.2.0a5.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
|
3
|
+
dbos-1.2.0a5.dist-info/entry_points.txt,sha256=_QOQ3tVfEjtjBlr1jS4sHqHya9lI2aIEIWkz8dqYp14,58
|
4
|
+
dbos-1.2.0a5.dist-info/licenses/LICENSE,sha256=VGZit_a5-kdw9WT6fY5jxAWVwGQzgLFyPWrcVVUhVNU,1067
|
5
5
|
dbos/__init__.py,sha256=NssPCubaBxdiKarOWa-wViz1hdJSkmBGcpLX_gQ4NeA,891
|
6
6
|
dbos/__main__.py,sha256=G7Exn-MhGrVJVDbgNlpzhfh8WMX_72t3_oJaFT9Lmt8,653
|
7
7
|
dbos/_admin_server.py,sha256=TWXi4drrzKFpKkUmEJpJkQBZxAtOalnhtYicEn2nDK0,10618
|
8
8
|
dbos/_app_db.py,sha256=0PKqpxJ3EbIaak3Wl0lNl3hXvhBfz4EEHaCw1bUOvIM,9937
|
9
9
|
dbos/_classproperty.py,sha256=f0X-_BySzn3yFDRKB2JpCbLYQ9tLwt1XftfshvY7CBs,626
|
10
|
-
dbos/_client.py,sha256
|
10
|
+
dbos/_client.py,sha256=mGDuQRcSdkyEHf1s0rJuqHQiWbqIBt85qijNJSYmBik,14227
|
11
11
|
dbos/_conductor/conductor.py,sha256=o0IaZjwnZ2TOyHeP2H4iSX6UnXLXQ4uODvWAKD9hHMs,21703
|
12
12
|
dbos/_conductor/protocol.py,sha256=wgOFZxmS81bv0WCB9dAyg0s6QzldpzVKQDoSPeaX0Ws,6967
|
13
13
|
dbos/_context.py,sha256=5ajoWAmToAfzzmMLylnJZoL4Ny9rBwZWuG05sXadMIA,24798
|
14
|
-
dbos/_core.py,sha256=
|
14
|
+
dbos/_core.py,sha256=m2i9lsHjNKTi8BQyiSOUBrAVH5OvMoBswNZPRpMVIC0,48662
|
15
15
|
dbos/_croniter.py,sha256=XHAyUyibs_59sJQfSNWkP7rqQY6_XrlfuuCxk4jYqek,47559
|
16
|
-
dbos/_dbos.py,sha256=
|
17
|
-
dbos/_dbos_config.py,sha256=
|
16
|
+
dbos/_dbos.py,sha256=1EhH7r6v2vwW3Z74nK6_Zw8InE1jSXedEsztz0I4ggA,47269
|
17
|
+
dbos/_dbos_config.py,sha256=JYtEbhjcCxLUhktMgqIEBz7i5nk1Ryg0vqSJHXqdGOo,20264
|
18
18
|
dbos/_debug.py,sha256=MNlQVZ6TscGCRQeEEL0VE8Uignvr6dPeDDDefS3xgIE,1823
|
19
19
|
dbos/_docker_pg_helper.py,sha256=tLJXWqZ4S-ExcaPnxg_i6cVxL6ZxrYlZjaGsklY-s2I,6115
|
20
20
|
dbos/_error.py,sha256=q0OQJZTbR8FFHV9hEpAGpz9oWBT5L509zUhmyff7FJw,8500
|
@@ -38,7 +38,7 @@ dbos/_migrations/versions/d76646551a6c_workflow_queue.py,sha256=G942nophZ2uC2vc4
|
|
38
38
|
dbos/_migrations/versions/eab0cc1d9a14_job_queue.py,sha256=uvhFOtqbBreCePhAxZfIT0qCAI7BiZTou9wt6QnbY7c,1412
|
39
39
|
dbos/_migrations/versions/f4b9b32ba814_functionname_childid_op_outputs.py,sha256=m90Lc5YH0ZISSq1MyxND6oq3RZrZKrIqEsZtwJ1jWxA,1049
|
40
40
|
dbos/_outcome.py,sha256=EXxBg4jXCVJsByDQ1VOCIedmbeq_03S6d-p1vqQrLFU,6810
|
41
|
-
dbos/_queue.py,sha256=
|
41
|
+
dbos/_queue.py,sha256=oDQcydDwYM68U5KQKN6iZiSC-4LXye6KFmSJ7ohG048,3558
|
42
42
|
dbos/_recovery.py,sha256=jVMexjfCCNopzyn8gVQzJCmGJaP9G3C1EFaoCQ_Nh7g,2564
|
43
43
|
dbos/_registrations.py,sha256=CZt1ElqDjCT7hz6iyT-1av76Yu-iuwu_c9lozO87wvM,7303
|
44
44
|
dbos/_roles.py,sha256=iOsgmIAf1XVzxs3gYWdGRe1B880YfOw5fpU7Jwx8_A8,2271
|
@@ -47,7 +47,7 @@ dbos/_schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
dbos/_schemas/application_database.py,sha256=SypAS9l9EsaBHFn9FR8jmnqt01M74d9AF1AMa4m2hhI,1040
|
48
48
|
dbos/_schemas/system_database.py,sha256=3Z0L72bOgHnusK1hBaETWU9RfiLBP0QnS-fdu41i0yY,5835
|
49
49
|
dbos/_serialization.py,sha256=bWuwhXSQcGmiazvhJHA5gwhrRWxtmFmcCFQSDJnqqkU,3666
|
50
|
-
dbos/_sys_db.py,sha256=
|
50
|
+
dbos/_sys_db.py,sha256=T02hZbe-4tpsK4hGVlatft06ybu86SJ4w6-anaf55KQ,85528
|
51
51
|
dbos/_templates/dbos-db-starter/README.md,sha256=GhxhBj42wjTt1fWEtwNriHbJuKb66Vzu89G4pxNHw2g,930
|
52
52
|
dbos/_templates/dbos-db-starter/__package/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
53
|
dbos/_templates/dbos-db-starter/__package/main.py.dbos,sha256=aQnBPSSQpkB8ERfhf7gB7P9tsU6OPKhZscfeh0yiaD8,2702
|
@@ -59,7 +59,7 @@ dbos/_templates/dbos-db-starter/migrations/script.py.mako,sha256=MEqL-2qATlST9TA
|
|
59
59
|
dbos/_templates/dbos-db-starter/migrations/versions/2024_07_31_180642_init.py,sha256=MpS7LGaJS0CpvsjhfDkp9EJqvMvVCjRPfUp4c0aE2ys,941
|
60
60
|
dbos/_templates/dbos-db-starter/start_postgres_docker.py,sha256=lQVLlYO5YkhGPEgPqwGc7Y8uDKse9HsWv5fynJEFJHM,1681
|
61
61
|
dbos/_tracer.py,sha256=yN6GRDKu_1p-EqtQLNarMocPfga2ZuqpzStzzSPYhzo,2732
|
62
|
-
dbos/_utils.py,sha256=
|
62
|
+
dbos/_utils.py,sha256=uywq1QrjMwy17btjxW4bES49povlQwYwYbvKwMT6C2U,1575
|
63
63
|
dbos/_workflow_commands.py,sha256=UCpHWvCEXjVZtf5FNanFvtJpgUJDSI1EFBqQP0x_2A0,3346
|
64
64
|
dbos/cli/_github_init.py,sha256=Y_bDF9gfO2jB1id4FV5h1oIxEJRWyqVjhb7bNEa5nQ0,3224
|
65
65
|
dbos/cli/_template_init.py,sha256=7JBcpMqP1r2mfCnvWatu33z8ctEGHJarlZYKgB83cXE,2972
|
@@ -67,4 +67,4 @@ dbos/cli/cli.py,sha256=HinoCGrAUTiSeq7AAoCFfhdiE0uDw7vLMuDMN1_YTLI,20705
|
|
67
67
|
dbos/dbos-config.schema.json,sha256=CjaspeYmOkx6Ip_pcxtmfXJTn_YGdSx_0pcPBF7KZmo,6060
|
68
68
|
dbos/py.typed,sha256=QfzXT1Ktfk3Rj84akygc7_42z0lRpCq0Ilh8OXI6Zas,44
|
69
69
|
version/__init__.py,sha256=L4sNxecRuqdtSFdpUGX3TtBi9KL3k7YsZVIvv-fv9-A,1678
|
70
|
-
dbos-1.2.
|
70
|
+
dbos-1.2.0a5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|