dbos 2.4.0a7__py3-none-any.whl → 2.6.0a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbos might be problematic. Click here for more details.

dbos/_sys_db.py CHANGED
@@ -158,6 +158,16 @@ class WorkflowStatusInternal(TypedDict):
158
158
  forked_from: Optional[str]
159
159
 
160
160
 
161
+ class MetricData(TypedDict):
162
+ """
163
+ Metrics data for workflows and steps within a time range.
164
+ """
165
+
166
+ metric_type: str # Type of metric: "workflow" or "step"
167
+ metric_name: str # Name of the workflow or step
168
+ value: int # Number of times the operation ran in the time interval
169
+
170
+
161
171
  class EnqueueOptionsInternal(TypedDict):
162
172
  # Unique ID for deduplication on a queue
163
173
  deduplication_id: Optional[str]
@@ -170,16 +180,17 @@ class EnqueueOptionsInternal(TypedDict):
170
180
 
171
181
 
172
182
  class RecordedResult(TypedDict):
173
- output: Optional[str] # JSON (jsonpickle)
174
- error: Optional[str] # JSON (jsonpickle)
183
+ output: Optional[str] # Serialized
184
+ error: Optional[str] # Serialized
185
+ child_workflow_id: Optional[str]
175
186
 
176
187
 
177
188
  class OperationResultInternal(TypedDict):
178
189
  workflow_uuid: str
179
190
  function_id: int
180
191
  function_name: str
181
- output: Optional[str] # JSON (jsonpickle)
182
- error: Optional[str] # JSON (jsonpickle)
192
+ output: Optional[str] # Serialized
193
+ error: Optional[str] # Serialized
183
194
  started_at_epoch_ms: int
184
195
 
185
196
 
@@ -394,6 +405,26 @@ class SystemDatabase(ABC):
394
405
  import sqlalchemy.dialects.postgresql as pg
395
406
  import sqlalchemy.dialects.sqlite as sq
396
407
 
408
+ # Log system database connection information
409
+ if engine:
410
+ dbos_logger.info("Initializing DBOS system database with custom engine")
411
+ else:
412
+ printable_sys_db_url = sa.make_url(system_database_url).render_as_string(
413
+ hide_password=True
414
+ )
415
+ dbos_logger.info(
416
+ f"Initializing DBOS system database with URL: {printable_sys_db_url}"
417
+ )
418
+ if system_database_url.startswith("sqlite"):
419
+ dbos_logger.info(
420
+ f"Using SQLite as a system database. The SQLite system database is for development and testing. PostgreSQL is recommended for production use."
421
+ )
422
+ else:
423
+ dbos_logger.info(
424
+ f"DBOS system database engine parameters: {engine_kwargs}"
425
+ )
426
+
427
+ # Configure and initialize the system database
397
428
  self.dialect = sq if system_database_url.startswith("sqlite") else pg
398
429
 
399
430
  self.serializer = serializer
@@ -700,11 +731,7 @@ class SystemDatabase(ABC):
700
731
  name=status["name"],
701
732
  class_name=status["class_name"],
702
733
  config_name=status["config_name"],
703
- application_version=(
704
- application_version
705
- if application_version is not None
706
- else status["app_version"]
707
- ),
734
+ application_version=application_version,
708
735
  application_id=status["app_id"],
709
736
  authenticated_user=status["authenticated_user"],
710
737
  authenticated_roles=status["authenticated_roles"],
@@ -716,34 +743,124 @@ class SystemDatabase(ABC):
716
743
  )
717
744
 
718
745
  if start_step > 1:
746
+ # Copy the original workflow's step checkpoints
747
+ c.execute(
748
+ sa.insert(SystemSchema.operation_outputs).from_select(
749
+ [
750
+ "workflow_uuid",
751
+ "function_id",
752
+ "output",
753
+ "error",
754
+ "function_name",
755
+ "child_workflow_id",
756
+ "started_at_epoch_ms",
757
+ "completed_at_epoch_ms",
758
+ ],
759
+ sa.select(
760
+ sa.literal(forked_workflow_id).label("workflow_uuid"),
761
+ SystemSchema.operation_outputs.c.function_id,
762
+ SystemSchema.operation_outputs.c.output,
763
+ SystemSchema.operation_outputs.c.error,
764
+ SystemSchema.operation_outputs.c.function_name,
765
+ SystemSchema.operation_outputs.c.child_workflow_id,
766
+ SystemSchema.operation_outputs.c.started_at_epoch_ms,
767
+ SystemSchema.operation_outputs.c.completed_at_epoch_ms,
768
+ ).where(
769
+ (
770
+ SystemSchema.operation_outputs.c.workflow_uuid
771
+ == original_workflow_id
772
+ )
773
+ & (
774
+ SystemSchema.operation_outputs.c.function_id
775
+ < start_step
776
+ )
777
+ ),
778
+ )
779
+ )
780
+ # Copy the original workflow's events
781
+ c.execute(
782
+ sa.insert(SystemSchema.workflow_events_history).from_select(
783
+ [
784
+ "workflow_uuid",
785
+ "function_id",
786
+ "key",
787
+ "value",
788
+ ],
789
+ sa.select(
790
+ sa.literal(forked_workflow_id).label("workflow_uuid"),
791
+ SystemSchema.workflow_events_history.c.function_id,
792
+ SystemSchema.workflow_events_history.c.key,
793
+ SystemSchema.workflow_events_history.c.value,
794
+ ).where(
795
+ (
796
+ SystemSchema.workflow_events_history.c.workflow_uuid
797
+ == original_workflow_id
798
+ )
799
+ & (
800
+ SystemSchema.workflow_events_history.c.function_id
801
+ < start_step
802
+ )
803
+ ),
804
+ )
805
+ )
806
+ # Copy only the latest version of each workflow event from the history table
807
+ # (the one with the maximum function_id for each key where function_id < start_step)
808
+ weh1 = SystemSchema.workflow_events_history.alias("weh1")
809
+ weh2 = SystemSchema.workflow_events_history.alias("weh2")
719
810
 
720
- # Copy the original workflow's outputs into the forked workflow
721
- insert_stmt = sa.insert(SystemSchema.operation_outputs).from_select(
722
- [
723
- "workflow_uuid",
724
- "function_id",
725
- "output",
726
- "error",
727
- "function_name",
728
- "child_workflow_id",
729
- ],
730
- sa.select(
731
- sa.literal(forked_workflow_id).label("workflow_uuid"),
732
- SystemSchema.operation_outputs.c.function_id,
733
- SystemSchema.operation_outputs.c.output,
734
- SystemSchema.operation_outputs.c.error,
735
- SystemSchema.operation_outputs.c.function_name,
736
- SystemSchema.operation_outputs.c.child_workflow_id,
737
- ).where(
738
- (
739
- SystemSchema.operation_outputs.c.workflow_uuid
740
- == original_workflow_id
741
- )
742
- & (SystemSchema.operation_outputs.c.function_id < start_step)
743
- ),
811
+ max_function_id_subquery = (
812
+ sa.select(sa.func.max(weh2.c.function_id))
813
+ .where(
814
+ (weh2.c.workflow_uuid == original_workflow_id)
815
+ & (weh2.c.key == weh1.c.key)
816
+ & (weh2.c.function_id < start_step)
817
+ )
818
+ .scalar_subquery()
819
+ )
820
+
821
+ c.execute(
822
+ sa.insert(SystemSchema.workflow_events).from_select(
823
+ [
824
+ "workflow_uuid",
825
+ "key",
826
+ "value",
827
+ ],
828
+ sa.select(
829
+ sa.literal(forked_workflow_id).label("workflow_uuid"),
830
+ weh1.c.key,
831
+ weh1.c.value,
832
+ ).where(
833
+ (weh1.c.workflow_uuid == original_workflow_id)
834
+ & (weh1.c.function_id == max_function_id_subquery)
835
+ ),
836
+ )
837
+ )
838
+ # Copy the original workflow's streams
839
+ c.execute(
840
+ sa.insert(SystemSchema.streams).from_select(
841
+ [
842
+ "workflow_uuid",
843
+ "function_id",
844
+ "key",
845
+ "value",
846
+ "offset",
847
+ ],
848
+ sa.select(
849
+ sa.literal(forked_workflow_id).label("workflow_uuid"),
850
+ SystemSchema.streams.c.function_id,
851
+ SystemSchema.streams.c.key,
852
+ SystemSchema.streams.c.value,
853
+ SystemSchema.streams.c.offset,
854
+ ).where(
855
+ (
856
+ SystemSchema.streams.c.workflow_uuid
857
+ == original_workflow_id
858
+ )
859
+ & (SystemSchema.streams.c.function_id < start_step)
860
+ ),
861
+ )
744
862
  )
745
863
 
746
- c.execute(insert_stmt)
747
864
  return forked_workflow_id
748
865
 
749
866
  @db_retry()
@@ -834,7 +951,7 @@ class SystemDatabase(ABC):
834
951
  return workflow_id
835
952
 
836
953
  @db_retry()
837
- def await_workflow_result(self, workflow_id: str) -> Any:
954
+ def await_workflow_result(self, workflow_id: str, polling_interval: float) -> Any:
838
955
  while True:
839
956
  with self.engine.begin() as c:
840
957
  row = c.execute(
@@ -859,7 +976,7 @@ class SystemDatabase(ABC):
859
976
  raise DBOSAwaitedWorkflowCancelledError(workflow_id)
860
977
  else:
861
978
  pass # CB: I guess we're assuming the WF will show up eventually.
862
- time.sleep(1)
979
+ time.sleep(polling_interval)
863
980
 
864
981
  def get_workflows(
865
982
  self,
@@ -902,11 +1019,12 @@ class SystemDatabase(ABC):
902
1019
 
903
1020
  if input.queues_only:
904
1021
  query = sa.select(*load_columns).where(
905
- sa.and_(
906
- SystemSchema.workflow_status.c.queue_name.isnot(None),
907
- SystemSchema.workflow_status.c.status.in_(["ENQUEUED", "PENDING"]),
908
- )
1022
+ SystemSchema.workflow_status.c.queue_name.isnot(None),
909
1023
  )
1024
+ if not input.status:
1025
+ query = query.where(
1026
+ SystemSchema.workflow_status.c.status.in_(["ENQUEUED", "PENDING"])
1027
+ )
910
1028
  else:
911
1029
  query = sa.select(*load_columns)
912
1030
  if input.sort_desc:
@@ -1033,7 +1151,7 @@ class SystemDatabase(ABC):
1033
1151
  for row in rows
1034
1152
  ]
1035
1153
 
1036
- def get_workflow_steps(self, workflow_id: str) -> List[StepInfo]:
1154
+ def list_workflow_steps(self, workflow_id: str) -> List[StepInfo]:
1037
1155
  with self.engine.begin() as c:
1038
1156
  rows = c.execute(
1039
1157
  sa.select(
@@ -1044,7 +1162,9 @@ class SystemDatabase(ABC):
1044
1162
  SystemSchema.operation_outputs.c.child_workflow_id,
1045
1163
  SystemSchema.operation_outputs.c.started_at_epoch_ms,
1046
1164
  SystemSchema.operation_outputs.c.completed_at_epoch_ms,
1047
- ).where(SystemSchema.operation_outputs.c.workflow_uuid == workflow_id)
1165
+ )
1166
+ .where(SystemSchema.operation_outputs.c.workflow_uuid == workflow_id)
1167
+ .order_by(SystemSchema.operation_outputs.c.function_id)
1048
1168
  ).fetchall()
1049
1169
  steps = []
1050
1170
  for row in rows:
@@ -1075,6 +1195,9 @@ class SystemDatabase(ABC):
1075
1195
  error = result["error"]
1076
1196
  output = result["output"]
1077
1197
  assert error is None or output is None, "Only one of error or output can be set"
1198
+
1199
+ # Check if the executor ID belong to another process.
1200
+ # Reset it to this process's executor ID if so.
1078
1201
  wf_executor_id_row = conn.execute(
1079
1202
  sa.select(
1080
1203
  SystemSchema.workflow_status.c.executor_id,
@@ -1096,17 +1219,20 @@ class SystemDatabase(ABC):
1096
1219
  == result["workflow_uuid"]
1097
1220
  )
1098
1221
  )
1099
- sql = sa.insert(SystemSchema.operation_outputs).values(
1100
- workflow_uuid=result["workflow_uuid"],
1101
- function_id=result["function_id"],
1102
- function_name=result["function_name"],
1103
- started_at_epoch_ms=result["started_at_epoch_ms"],
1104
- completed_at_epoch_ms=int(time.time() * 1000),
1105
- output=output,
1106
- error=error,
1107
- )
1222
+
1223
+ # Record the outcome, throwing DBOSWorkflowConflictIDError if it is already present
1108
1224
  try:
1109
- conn.execute(sql)
1225
+ conn.execute(
1226
+ sa.insert(SystemSchema.operation_outputs).values(
1227
+ workflow_uuid=result["workflow_uuid"],
1228
+ function_id=result["function_id"],
1229
+ function_name=result["function_name"],
1230
+ started_at_epoch_ms=result["started_at_epoch_ms"],
1231
+ completed_at_epoch_ms=int(time.time() * 1000),
1232
+ output=output,
1233
+ error=error,
1234
+ )
1235
+ )
1110
1236
  except DBAPIError as dbapi_error:
1111
1237
  if self._is_unique_constraint_violation(dbapi_error):
1112
1238
  raise DBOSWorkflowConflictIDError(result["workflow_uuid"])
@@ -1195,6 +1321,7 @@ class SystemDatabase(ABC):
1195
1321
  SystemSchema.operation_outputs.c.output,
1196
1322
  SystemSchema.operation_outputs.c.error,
1197
1323
  SystemSchema.operation_outputs.c.function_name,
1324
+ SystemSchema.operation_outputs.c.child_workflow_id,
1198
1325
  ).where(
1199
1326
  (SystemSchema.operation_outputs.c.workflow_uuid == workflow_id)
1200
1327
  & (SystemSchema.operation_outputs.c.function_id == function_id)
@@ -1223,10 +1350,11 @@ class SystemDatabase(ABC):
1223
1350
  return None
1224
1351
 
1225
1352
  # Extract operation output data
1226
- output, error, recorded_function_name = (
1353
+ output, error, recorded_function_name, child_workflow_id = (
1227
1354
  operation_output_rows[0][0],
1228
1355
  operation_output_rows[0][1],
1229
1356
  operation_output_rows[0][2],
1357
+ operation_output_rows[0][3],
1230
1358
  )
1231
1359
 
1232
1360
  # If the provided and recorded function name are different, throw an exception
@@ -1241,6 +1369,7 @@ class SystemDatabase(ABC):
1241
1369
  result: RecordedResult = {
1242
1370
  "output": output,
1243
1371
  "error": error,
1372
+ "child_workflow_id": child_workflow_id,
1244
1373
  }
1245
1374
  return result
1246
1375
 
@@ -1253,31 +1382,6 @@ class SystemDatabase(ABC):
1253
1382
  workflow_id, function_id, function_name, c
1254
1383
  )
1255
1384
 
1256
- @db_retry()
1257
- def check_child_workflow(
1258
- self, workflow_uuid: str, function_id: int
1259
- ) -> Optional[str]:
1260
- sql = sa.select(
1261
- SystemSchema.operation_outputs.c.child_workflow_id,
1262
- SystemSchema.operation_outputs.c.error,
1263
- ).where(
1264
- SystemSchema.operation_outputs.c.workflow_uuid == workflow_uuid,
1265
- SystemSchema.operation_outputs.c.function_id == function_id,
1266
- )
1267
-
1268
- # If in a transaction, use the provided connection
1269
- row: Any
1270
- with self.engine.begin() as c:
1271
- row = c.execute(sql).fetchone()
1272
-
1273
- if row is None:
1274
- return None
1275
- elif row[1]:
1276
- e: Exception = self.serializer.deserialize(row[1])
1277
- raise e
1278
- else:
1279
- return str(row[0])
1280
-
1281
1385
  @db_retry()
1282
1386
  def send(
1283
1387
  self,
@@ -1530,6 +1634,19 @@ class SystemDatabase(ABC):
1530
1634
  set_={"value": self.serializer.serialize(message)},
1531
1635
  )
1532
1636
  )
1637
+ c.execute(
1638
+ self.dialect.insert(SystemSchema.workflow_events_history)
1639
+ .values(
1640
+ workflow_uuid=workflow_uuid,
1641
+ function_id=function_id,
1642
+ key=key,
1643
+ value=self.serializer.serialize(message),
1644
+ )
1645
+ .on_conflict_do_update(
1646
+ index_elements=["workflow_uuid", "key", "function_id"],
1647
+ set_={"value": self.serializer.serialize(message)},
1648
+ )
1649
+ )
1533
1650
  output: OperationResultInternal = {
1534
1651
  "workflow_uuid": workflow_uuid,
1535
1652
  "function_id": function_id,
@@ -1543,6 +1660,7 @@ class SystemDatabase(ABC):
1543
1660
  def set_event_from_step(
1544
1661
  self,
1545
1662
  workflow_uuid: str,
1663
+ function_id: int,
1546
1664
  key: str,
1547
1665
  message: Any,
1548
1666
  ) -> None:
@@ -1559,6 +1677,19 @@ class SystemDatabase(ABC):
1559
1677
  set_={"value": self.serializer.serialize(message)},
1560
1678
  )
1561
1679
  )
1680
+ c.execute(
1681
+ self.dialect.insert(SystemSchema.workflow_events_history)
1682
+ .values(
1683
+ workflow_uuid=workflow_uuid,
1684
+ function_id=function_id,
1685
+ key=key,
1686
+ value=self.serializer.serialize(message),
1687
+ )
1688
+ .on_conflict_do_update(
1689
+ index_elements=["workflow_uuid", "key", "function_id"],
1690
+ set_={"value": self.serializer.serialize(message)},
1691
+ )
1692
+ )
1562
1693
 
1563
1694
  def get_all_events(self, workflow_id: str) -> Dict[str, Any]:
1564
1695
  """
@@ -1577,7 +1708,6 @@ class SystemDatabase(ABC):
1577
1708
  SystemSchema.workflow_events.c.value,
1578
1709
  ).where(SystemSchema.workflow_events.c.workflow_uuid == workflow_id)
1579
1710
  ).fetchall()
1580
-
1581
1711
  events: Dict[str, Any] = {}
1582
1712
  for row in rows:
1583
1713
  key = row[0]
@@ -1732,10 +1862,6 @@ class SystemDatabase(ABC):
1732
1862
  sa.select(sa.func.count())
1733
1863
  .select_from(SystemSchema.workflow_status)
1734
1864
  .where(SystemSchema.workflow_status.c.queue_name == queue.name)
1735
- .where(
1736
- SystemSchema.workflow_status.c.queue_partition_key
1737
- == queue_partition_key
1738
- )
1739
1865
  .where(
1740
1866
  SystemSchema.workflow_status.c.status
1741
1867
  != WorkflowStatusString.ENQUEUED.value
@@ -1745,6 +1871,11 @@ class SystemDatabase(ABC):
1745
1871
  > start_time_ms - limiter_period_ms
1746
1872
  )
1747
1873
  )
1874
+ if queue_partition_key is not None:
1875
+ query = query.where(
1876
+ SystemSchema.workflow_status.c.queue_partition_key
1877
+ == queue_partition_key
1878
+ )
1748
1879
  num_recent_queries = c.execute(query).fetchone()[0] # type: ignore
1749
1880
  if num_recent_queries >= queue.limiter["limit"]:
1750
1881
  return []
@@ -1760,16 +1891,17 @@ class SystemDatabase(ABC):
1760
1891
  )
1761
1892
  .select_from(SystemSchema.workflow_status)
1762
1893
  .where(SystemSchema.workflow_status.c.queue_name == queue.name)
1763
- .where(
1764
- SystemSchema.workflow_status.c.queue_partition_key
1765
- == queue_partition_key
1766
- )
1767
1894
  .where(
1768
1895
  SystemSchema.workflow_status.c.status
1769
1896
  == WorkflowStatusString.PENDING.value
1770
1897
  )
1771
1898
  .group_by(SystemSchema.workflow_status.c.executor_id)
1772
1899
  )
1900
+ if queue_partition_key is not None:
1901
+ pending_tasks_query = pending_tasks_query.where(
1902
+ SystemSchema.workflow_status.c.queue_partition_key
1903
+ == queue_partition_key
1904
+ )
1773
1905
  pending_workflows = c.execute(pending_tasks_query).fetchall()
1774
1906
  pending_workflows_dict = {row[0]: row[1] for row in pending_workflows}
1775
1907
  local_pending_workflows = pending_workflows_dict.get(executor_id, 0)
@@ -1805,10 +1937,6 @@ class SystemDatabase(ABC):
1805
1937
  )
1806
1938
  .select_from(SystemSchema.workflow_status)
1807
1939
  .where(SystemSchema.workflow_status.c.queue_name == queue.name)
1808
- .where(
1809
- SystemSchema.workflow_status.c.queue_partition_key
1810
- == queue_partition_key
1811
- )
1812
1940
  .where(
1813
1941
  SystemSchema.workflow_status.c.status
1814
1942
  == WorkflowStatusString.ENQUEUED.value
@@ -1825,6 +1953,11 @@ class SystemDatabase(ABC):
1825
1953
  # to ensure all processes have a consistent view of the table.
1826
1954
  .with_for_update(skip_locked=skip_locks, nowait=(not skip_locks))
1827
1955
  )
1956
+ if queue_partition_key is not None:
1957
+ query = query.where(
1958
+ SystemSchema.workflow_status.c.queue_partition_key
1959
+ == queue_partition_key
1960
+ )
1828
1961
  if queue.priority_enabled:
1829
1962
  query = query.order_by(
1830
1963
  SystemSchema.workflow_status.c.priority.asc(),
@@ -1968,7 +2101,9 @@ class SystemDatabase(ABC):
1968
2101
  dbos_logger.error(f"Error connecting to the DBOS system database: {e}")
1969
2102
  raise
1970
2103
 
1971
- def write_stream_from_step(self, workflow_uuid: str, key: str, value: Any) -> None:
2104
+ def write_stream_from_step(
2105
+ self, workflow_uuid: str, function_id: int, key: str, value: Any
2106
+ ) -> None:
1972
2107
  """
1973
2108
  Write a key-value pair to the stream at the first unused offset.
1974
2109
  """
@@ -1998,6 +2133,7 @@ class SystemDatabase(ABC):
1998
2133
  c.execute(
1999
2134
  sa.insert(SystemSchema.streams).values(
2000
2135
  workflow_uuid=workflow_uuid,
2136
+ function_id=function_id,
2001
2137
  key=key,
2002
2138
  value=serialized_value,
2003
2139
  offset=next_offset,
@@ -2054,6 +2190,7 @@ class SystemDatabase(ABC):
2054
2190
  c.execute(
2055
2191
  sa.insert(SystemSchema.streams).values(
2056
2192
  workflow_uuid=workflow_uuid,
2193
+ function_id=function_id,
2057
2194
  key=key,
2058
2195
  value=serialized_value,
2059
2196
  offset=next_offset,
@@ -2151,3 +2288,76 @@ class SystemDatabase(ABC):
2151
2288
  return cutoff_epoch_timestamp_ms, [
2152
2289
  row[0] for row in pending_enqueued_result
2153
2290
  ]
2291
+
2292
+ def get_metrics(self, start_time: str, end_time: str) -> List[MetricData]:
2293
+ """
2294
+ Retrieve the number of workflows and steps that ran in a time range.
2295
+
2296
+ Args:
2297
+ start_time: ISO 8601 formatted start time
2298
+ end_time: ISO 8601 formatted end time
2299
+ """
2300
+ # Convert ISO 8601 times to epoch milliseconds
2301
+ start_epoch_ms = int(
2302
+ datetime.datetime.fromisoformat(start_time).timestamp() * 1000
2303
+ )
2304
+ end_epoch_ms = int(datetime.datetime.fromisoformat(end_time).timestamp() * 1000)
2305
+
2306
+ metrics: List[MetricData] = []
2307
+
2308
+ with self.engine.begin() as c:
2309
+ # Query workflow metrics
2310
+ workflow_query = (
2311
+ sa.select(
2312
+ SystemSchema.workflow_status.c.name,
2313
+ func.count(SystemSchema.workflow_status.c.workflow_uuid).label(
2314
+ "count"
2315
+ ),
2316
+ )
2317
+ .where(
2318
+ sa.and_(
2319
+ SystemSchema.workflow_status.c.created_at >= start_epoch_ms,
2320
+ SystemSchema.workflow_status.c.created_at < end_epoch_ms,
2321
+ )
2322
+ )
2323
+ .group_by(SystemSchema.workflow_status.c.name)
2324
+ )
2325
+
2326
+ workflow_results = c.execute(workflow_query).fetchall()
2327
+ for row in workflow_results:
2328
+ metrics.append(
2329
+ MetricData(
2330
+ metric_type="workflow_count",
2331
+ metric_name=row[0],
2332
+ value=row[1],
2333
+ )
2334
+ )
2335
+
2336
+ # Query step metrics
2337
+ step_query = (
2338
+ sa.select(
2339
+ SystemSchema.operation_outputs.c.function_name,
2340
+ func.count().label("count"),
2341
+ )
2342
+ .where(
2343
+ sa.and_(
2344
+ SystemSchema.operation_outputs.c.started_at_epoch_ms
2345
+ >= start_epoch_ms,
2346
+ SystemSchema.operation_outputs.c.started_at_epoch_ms
2347
+ < end_epoch_ms,
2348
+ )
2349
+ )
2350
+ .group_by(SystemSchema.operation_outputs.c.function_name)
2351
+ )
2352
+
2353
+ step_results = c.execute(step_query).fetchall()
2354
+ for row in step_results:
2355
+ metrics.append(
2356
+ MetricData(
2357
+ metric_type="step_count",
2358
+ metric_name=row[0],
2359
+ value=row[1],
2360
+ )
2361
+ )
2362
+
2363
+ return metrics
dbos/_sys_db_postgres.py CHANGED
@@ -31,18 +31,24 @@ class PostgresSystemDatabase(SystemDatabase):
31
31
  sysdb_name = system_db_url.database
32
32
  # Unless we were provided an engine, if the system database does not already exist, create it
33
33
  if self.created_engine:
34
- engine = sa.create_engine(
35
- system_db_url.set(database="postgres"), **self._engine_kwargs
36
- )
37
- with engine.connect() as conn:
38
- conn.execution_options(isolation_level="AUTOCOMMIT")
39
- if not conn.execute(
40
- sa.text("SELECT 1 FROM pg_database WHERE datname=:db_name"),
41
- parameters={"db_name": sysdb_name},
42
- ).scalar():
43
- dbos_logger.info(f"Creating system database {sysdb_name}")
44
- conn.execute(sa.text(f'CREATE DATABASE "{sysdb_name}"'))
45
- engine.dispose()
34
+ try:
35
+ engine = sa.create_engine(
36
+ system_db_url.set(database="postgres"), **self._engine_kwargs
37
+ )
38
+ with engine.connect() as conn:
39
+ conn.execution_options(isolation_level="AUTOCOMMIT")
40
+ if not conn.execute(
41
+ sa.text("SELECT 1 FROM pg_database WHERE datname=:db_name"),
42
+ parameters={"db_name": sysdb_name},
43
+ ).scalar():
44
+ dbos_logger.info(f"Creating system database {sysdb_name}")
45
+ conn.execute(sa.text(f'CREATE DATABASE "{sysdb_name}"'))
46
+ except Exception:
47
+ dbos_logger.warning(
48
+ f"Could not connect to postgres database to verify existence of {sysdb_name}. Continuing..."
49
+ )
50
+ finally:
51
+ engine.dispose()
46
52
  else:
47
53
  # If we were provided an engine, validate it can connect
48
54
  with self.engine.connect() as conn:
dbos/_tracer.py CHANGED
@@ -8,6 +8,7 @@ if TYPE_CHECKING:
8
8
  from dbos._utils import GlobalParams
9
9
 
10
10
  from ._dbos_config import ConfigFile
11
+ from ._logger import dbos_logger
11
12
 
12
13
  if TYPE_CHECKING:
13
14
  from ._context import TracedAttributes
@@ -46,7 +47,8 @@ class DBOSTracer:
46
47
 
47
48
  # Only set up OTLP provider and exporter if endpoints are provided
48
49
  if otlp_traces_endpoints is not None and len(otlp_traces_endpoints) > 0:
49
- if not isinstance(tracer_provider, TracerProvider):
50
+ if isinstance(tracer_provider, trace.ProxyTracerProvider):
51
+ # Set a real TracerProvider if it was previously a ProxyTracerProvider
50
52
  resource = Resource(
51
53
  attributes={
52
54
  SERVICE_NAME: config["name"],
@@ -61,7 +63,12 @@ class DBOSTracer:
61
63
 
62
64
  for e in otlp_traces_endpoints:
63
65
  processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=e))
64
- tracer_provider.add_span_processor(processor)
66
+ tracer_provider.add_span_processor(processor) # type: ignore
67
+
68
+ if isinstance(tracer_provider, trace.ProxyTracerProvider):
69
+ dbos_logger.warning(
70
+ "OTLP is enabled but tracer provider not set, skipping trace exporter setup."
71
+ )
65
72
 
66
73
  def set_provider(self, provider: "Optional[TracerProvider]") -> None:
67
74
  self.provider = provider