dbos 0.26.0a25__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbos might be problematic. Click here for more details.

dbos/_serialization.py CHANGED
@@ -1,8 +1,10 @@
1
1
  import types
2
- from typing import Any, Dict, Tuple, TypedDict
2
+ from typing import Any, Dict, Optional, Tuple, TypedDict
3
3
 
4
4
  import jsonpickle # type: ignore
5
5
 
6
+ from ._logger import dbos_logger
7
+
6
8
 
7
9
  class WorkflowInputs(TypedDict):
8
10
  args: Tuple[Any, ...]
@@ -51,5 +53,54 @@ def deserialize_args(serialized_data: str) -> WorkflowInputs:
51
53
 
52
54
  def deserialize_exception(serialized_data: str) -> Exception:
53
55
  """Deserialize JSON string back to a Python Exception using jsonpickle."""
54
- upo: Exception = jsonpickle.decode(serialized_data)
55
- return upo
56
+ exc: Exception = jsonpickle.decode(serialized_data)
57
+ return exc
58
+
59
+
60
+ def safe_deserialize(
61
+ workflow_id: str,
62
+ *,
63
+ serialized_input: Optional[str],
64
+ serialized_output: Optional[str],
65
+ serialized_exception: Optional[str],
66
+ ) -> tuple[Optional[WorkflowInputs], Optional[Any], Optional[Exception]]:
67
+ """
68
+ This function safely deserializes a workflow's recorded input and output/exception.
69
+ If any of them is not deserializable, it logs a warning and returns a string instead of throwing an exception.
70
+
71
+ This function is used in workflow introspection methods (get_workflows and get_queued_workflow)
72
+ to ensure errors related to nondeserializable objects are observable.
73
+ """
74
+ input: Optional[WorkflowInputs]
75
+ try:
76
+ input = (
77
+ deserialize_args(serialized_input) if serialized_input is not None else None
78
+ )
79
+ except Exception as e:
80
+ dbos_logger.warning(
81
+ f"Warning: input object could not be deserialized for workflow {workflow_id}, returning as string: {e}"
82
+ )
83
+ input = serialized_input # type: ignore
84
+ output: Optional[Any]
85
+ try:
86
+ output = (
87
+ deserialize(serialized_output) if serialized_output is not None else None
88
+ )
89
+ except Exception as e:
90
+ dbos_logger.warning(
91
+ f"Warning: output object could not be deserialized for workflow {workflow_id}, returning as string: {e}"
92
+ )
93
+ output = serialized_output
94
+ exception: Optional[Exception]
95
+ try:
96
+ exception = (
97
+ deserialize_exception(serialized_exception)
98
+ if serialized_exception is not None
99
+ else None
100
+ )
101
+ except Exception as e:
102
+ dbos_logger.warning(
103
+ f"Warning: exception object could not be deserialized for workflow {workflow_id}, returning as string: {e}"
104
+ )
105
+ exception = serialized_exception # type: ignore
106
+ return input, output, exception
dbos/_sys_db.py CHANGED
@@ -37,6 +37,7 @@ from ._error import (
37
37
  DBOSConflictingWorkflowError,
38
38
  DBOSDeadLetterQueueError,
39
39
  DBOSNonExistentWorkflowError,
40
+ DBOSQueueDeduplicatedError,
40
41
  DBOSUnexpectedStepError,
41
42
  DBOSWorkflowCancelledError,
42
43
  DBOSWorkflowConflictIDError,
@@ -135,6 +136,10 @@ class WorkflowStatusInternal(TypedDict):
135
136
  workflow_deadline_epoch_ms: Optional[int]
136
137
 
137
138
 
139
+ class EnqueueOptionsInternal(TypedDict):
140
+ deduplication_id: Optional[str] # Unique ID for deduplication on a queue
141
+
142
+
138
143
  class RecordedResult(TypedDict):
139
144
  output: Optional[str] # JSON (jsonpickle)
140
145
  error: Optional[str] # JSON (jsonpickle)
@@ -248,6 +253,7 @@ class SystemDatabase:
248
253
  sa.text("SELECT 1 FROM pg_database WHERE datname=:db_name"),
249
254
  parameters={"db_name": sysdb_name},
250
255
  ).scalar():
256
+ dbos_logger.info(f"Creating system database {sysdb_name}")
251
257
  conn.execute(sa.text(f"CREATE DATABASE {sysdb_name}"))
252
258
  engine.dispose()
253
259
 
@@ -267,12 +273,23 @@ class SystemDatabase:
267
273
  if pool_size is None:
268
274
  pool_size = 20
269
275
 
276
+ engine_kwargs = database.get("db_engine_kwargs")
277
+ if engine_kwargs is None:
278
+ engine_kwargs = {}
279
+
280
+ # Respect user-provided values. Otherwise, set defaults.
281
+ if "pool_size" not in engine_kwargs:
282
+ engine_kwargs["pool_size"] = pool_size
283
+ if "max_overflow" not in engine_kwargs:
284
+ engine_kwargs["max_overflow"] = 0
285
+ if "pool_timeout" not in engine_kwargs:
286
+ engine_kwargs["pool_timeout"] = 30
287
+ if "connect_args" not in engine_kwargs:
288
+ engine_kwargs["connect_args"] = {"connect_timeout": 10}
289
+
270
290
  self.engine = sa.create_engine(
271
291
  system_db_url,
272
- pool_size=pool_size,
273
- max_overflow=0,
274
- pool_timeout=30,
275
- connect_args={"connect_timeout": 10},
292
+ **engine_kwargs,
276
293
  )
277
294
 
278
295
  # Run a schema migration for the system database
@@ -378,7 +395,6 @@ class SystemDatabase:
378
395
  cmd = cmd.returning(SystemSchema.workflow_status.c.recovery_attempts, SystemSchema.workflow_status.c.status, SystemSchema.workflow_status.c.workflow_deadline_epoch_ms, SystemSchema.workflow_status.c.name, SystemSchema.workflow_status.c.class_name, SystemSchema.workflow_status.c.config_name, SystemSchema.workflow_status.c.queue_name) # type: ignore
379
396
 
380
397
  results = conn.execute(cmd)
381
-
382
398
  row = results.fetchone()
383
399
  if row is not None:
384
400
  # Check the started workflow matches the expected name, class_name, config_name, and queue_name
@@ -528,15 +544,17 @@ class SystemDatabase:
528
544
  # Execute with snapshot isolation in case of concurrent calls on the same workflow
529
545
  c.execute(sa.text("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ"))
530
546
  # Check the status of the workflow. If it is complete, do nothing.
531
- row = c.execute(
547
+ status_row = c.execute(
532
548
  sa.select(
533
549
  SystemSchema.workflow_status.c.status,
534
550
  ).where(SystemSchema.workflow_status.c.workflow_uuid == workflow_id)
535
551
  ).fetchone()
552
+ if status_row is None:
553
+ return
554
+ status = status_row[0]
536
555
  if (
537
- row is None
538
- or row[0] == WorkflowStatusString.SUCCESS.value
539
- or row[0] == WorkflowStatusString.ERROR.value
556
+ status == WorkflowStatusString.SUCCESS.value
557
+ or status == WorkflowStatusString.ERROR.value
540
558
  ):
541
559
  return
542
560
  # Remove the workflow from the queues table so resume can safely be called on an ENQUEUED workflow
@@ -576,7 +594,12 @@ class SystemDatabase:
576
594
  return max_function_id
577
595
 
578
596
  def fork_workflow(
579
- self, original_workflow_id: str, forked_workflow_id: str, start_step: int = 1
597
+ self,
598
+ original_workflow_id: str,
599
+ forked_workflow_id: str,
600
+ start_step: int,
601
+ *,
602
+ application_version: Optional[str],
580
603
  ) -> str:
581
604
 
582
605
  status = self.get_workflow_status(original_workflow_id)
@@ -596,7 +619,11 @@ class SystemDatabase:
596
619
  name=status["name"],
597
620
  class_name=status["class_name"],
598
621
  config_name=status["config_name"],
599
- application_version=status["app_version"],
622
+ application_version=(
623
+ application_version
624
+ if application_version is not None
625
+ else status["app_version"]
626
+ ),
600
627
  application_id=status["app_id"],
601
628
  request=status["request"],
602
629
  authenticated_user=status["authenticated_user"],
@@ -874,13 +901,15 @@ class SystemDatabase:
874
901
  info.app_version = row[14]
875
902
  info.app_id = row[15]
876
903
 
877
- inputs = _serialization.deserialize_args(row[16])
878
- if inputs is not None:
879
- info.input = inputs
880
- if info.status == WorkflowStatusString.SUCCESS.value:
881
- info.output = _serialization.deserialize(row[17])
882
- elif info.status == WorkflowStatusString.ERROR.value:
883
- info.error = _serialization.deserialize_exception(row[18])
904
+ inputs, output, exception = _serialization.safe_deserialize(
905
+ info.workflow_id,
906
+ serialized_input=row[16],
907
+ serialized_output=row[17],
908
+ serialized_exception=row[18],
909
+ )
910
+ info.input = inputs
911
+ info.output = output
912
+ info.error = exception
884
913
 
885
914
  infos.append(info)
886
915
  return infos
@@ -980,13 +1009,15 @@ class SystemDatabase:
980
1009
  info.app_version = row[14]
981
1010
  info.app_id = row[15]
982
1011
 
983
- inputs = _serialization.deserialize_args(row[16])
984
- if inputs is not None:
985
- info.input = inputs
986
- if info.status == WorkflowStatusString.SUCCESS.value:
987
- info.output = _serialization.deserialize(row[17])
988
- elif info.status == WorkflowStatusString.ERROR.value:
989
- info.error = _serialization.deserialize_exception(row[18])
1012
+ inputs, output, exception = _serialization.safe_deserialize(
1013
+ info.workflow_id,
1014
+ serialized_input=row[16],
1015
+ serialized_output=row[17],
1016
+ serialized_exception=row[18],
1017
+ )
1018
+ info.input = inputs
1019
+ info.output = output
1020
+ info.error = exception
990
1021
 
991
1022
  infos.append(info)
992
1023
 
@@ -1586,17 +1617,43 @@ class SystemDatabase:
1586
1617
  )
1587
1618
  return value
1588
1619
 
1589
- def enqueue(self, workflow_id: str, queue_name: str, conn: sa.Connection) -> None:
1620
+ def enqueue(
1621
+ self,
1622
+ workflow_id: str,
1623
+ queue_name: str,
1624
+ conn: sa.Connection,
1625
+ *,
1626
+ enqueue_options: Optional[EnqueueOptionsInternal],
1627
+ ) -> None:
1590
1628
  if self._debug_mode:
1591
1629
  raise Exception("called enqueue in debug mode")
1592
- conn.execute(
1593
- pg.insert(SystemSchema.workflow_queue)
1594
- .values(
1595
- workflow_uuid=workflow_id,
1596
- queue_name=queue_name,
1630
+ try:
1631
+ deduplication_id = (
1632
+ enqueue_options["deduplication_id"]
1633
+ if enqueue_options is not None
1634
+ else None
1597
1635
  )
1598
- .on_conflict_do_nothing()
1599
- )
1636
+ query = (
1637
+ pg.insert(SystemSchema.workflow_queue)
1638
+ .values(
1639
+ workflow_uuid=workflow_id,
1640
+ queue_name=queue_name,
1641
+ deduplication_id=deduplication_id,
1642
+ )
1643
+ .on_conflict_do_nothing(
1644
+ index_elements=SystemSchema.workflow_queue.primary_key.columns
1645
+ )
1646
+ ) # Ignore primary key constraint violation
1647
+ conn.execute(query)
1648
+ except DBAPIError as dbapi_error:
1649
+ # Unique constraint violation for the deduplication ID
1650
+ if dbapi_error.orig.sqlstate == "23505": # type: ignore
1651
+ assert (
1652
+ deduplication_id is not None
1653
+ ), f"deduplication_id should not be None. Workflow ID: {workflow_id}, Queue name: {queue_name}."
1654
+ raise DBOSQueueDeduplicatedError(
1655
+ workflow_id, queue_name, deduplication_id
1656
+ )
1600
1657
 
1601
1658
  def start_queued_workflows(
1602
1659
  self, queue: "Queue", executor_id: str, app_version: str
@@ -1868,6 +1925,7 @@ class SystemDatabase:
1868
1925
  inputs: str,
1869
1926
  *,
1870
1927
  max_recovery_attempts: Optional[int],
1928
+ enqueue_options: Optional[EnqueueOptionsInternal],
1871
1929
  ) -> tuple[WorkflowStatuses, Optional[int]]:
1872
1930
  """
1873
1931
  Synchronously record the status and inputs for workflows in a single transaction
@@ -1883,24 +1941,16 @@ class SystemDatabase:
1883
1941
  status["queue_name"] is not None
1884
1942
  and wf_status == WorkflowStatusString.ENQUEUED.value
1885
1943
  ):
1886
- self.enqueue(status["workflow_uuid"], status["queue_name"], conn)
1944
+ self.enqueue(
1945
+ status["workflow_uuid"],
1946
+ status["queue_name"],
1947
+ conn,
1948
+ enqueue_options=enqueue_options,
1949
+ )
1887
1950
  return wf_status, workflow_deadline_epoch_ms
1888
1951
 
1889
1952
 
1890
- def reset_system_database(config: ConfigFile) -> None:
1891
- sysdb_name = (
1892
- config["database"]["sys_db_name"]
1893
- if "sys_db_name" in config["database"] and config["database"]["sys_db_name"]
1894
- else config["database"]["app_db_name"] + SystemSchema.sysdb_suffix
1895
- )
1896
- postgres_db_url = sa.URL.create(
1897
- "postgresql+psycopg",
1898
- username=config["database"]["username"],
1899
- password=config["database"]["password"],
1900
- host=config["database"]["hostname"],
1901
- port=config["database"]["port"],
1902
- database="postgres",
1903
- )
1953
+ def reset_system_database(postgres_db_url: sa.URL, sysdb_name: str) -> None:
1904
1954
  try:
1905
1955
  # Connect to postgres default database
1906
1956
  engine = sa.create_engine(postgres_db_url)
dbos/_tracer.py CHANGED
@@ -3,8 +3,10 @@ from typing import TYPE_CHECKING, Optional
3
3
 
4
4
  from opentelemetry import trace
5
5
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
6
+ from opentelemetry.sdk.resources import Resource
6
7
  from opentelemetry.sdk.trace import TracerProvider
7
8
  from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
9
+ from opentelemetry.semconv.resource import ResourceAttributes
8
10
  from opentelemetry.trace import Span
9
11
 
10
12
  from dbos._utils import GlobalParams
@@ -23,7 +25,13 @@ class DBOSTracer:
23
25
 
24
26
  def config(self, config: ConfigFile) -> None:
25
27
  if not isinstance(trace.get_tracer_provider(), TracerProvider):
26
- provider = TracerProvider()
28
+ resource = Resource(
29
+ attributes={
30
+ ResourceAttributes.SERVICE_NAME: config["name"],
31
+ }
32
+ )
33
+
34
+ provider = TracerProvider(resource=resource)
27
35
  if os.environ.get("DBOS__CONSOLE_TRACES", None) is not None:
28
36
  processor = BatchSpanProcessor(ConsoleSpanExporter())
29
37
  provider.add_span_processor(processor)
@@ -1,6 +1,7 @@
1
1
  import uuid
2
2
  from typing import List, Optional
3
3
 
4
+ from dbos._context import get_local_dbos_context
4
5
  from dbos._error import DBOSException
5
6
 
6
7
  from ._app_db import ApplicationDatabase
@@ -103,6 +104,8 @@ def fork_workflow(
103
104
  app_db: ApplicationDatabase,
104
105
  workflow_id: str,
105
106
  start_step: int,
107
+ *,
108
+ application_version: Optional[str],
106
109
  ) -> str:
107
110
  def get_max_function_id(workflow_uuid: str) -> int:
108
111
  max_transactions = app_db.get_max_function_id(workflow_uuid) or 0
@@ -114,7 +117,17 @@ def fork_workflow(
114
117
  raise DBOSException(
115
118
  f"Cannot fork workflow {workflow_id} from step {start_step}. The workflow has {max_function_id} steps."
116
119
  )
117
- forked_workflow_id = str(uuid.uuid4())
120
+ ctx = get_local_dbos_context()
121
+ if ctx is not None and len(ctx.id_assigned_for_next_workflow) > 0:
122
+ forked_workflow_id = ctx.id_assigned_for_next_workflow
123
+ ctx.id_assigned_for_next_workflow = ""
124
+ else:
125
+ forked_workflow_id = str(uuid.uuid4())
118
126
  app_db.clone_workflow_transactions(workflow_id, forked_workflow_id, start_step)
119
- sys_db.fork_workflow(workflow_id, forked_workflow_id, start_step)
127
+ sys_db.fork_workflow(
128
+ workflow_id,
129
+ forked_workflow_id,
130
+ start_step,
131
+ application_version=application_version,
132
+ )
120
133
  return forked_workflow_id