dbos 2.4.0a7__py3-none-any.whl → 2.6.0a8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dbos might be problematic. Click here for more details.
- dbos/__init__.py +2 -0
- dbos/_app_db.py +29 -87
- dbos/_client.py +12 -8
- dbos/_conductor/conductor.py +40 -5
- dbos/_conductor/protocol.py +23 -0
- dbos/_core.py +98 -30
- dbos/_dbos.py +15 -12
- dbos/_dbos_config.py +2 -19
- dbos/_fastapi.py +2 -1
- dbos/_logger.py +12 -6
- dbos/_migration.py +30 -0
- dbos/_queue.py +94 -37
- dbos/_schemas/system_database.py +20 -0
- dbos/_sys_db.py +302 -92
- dbos/_sys_db_postgres.py +18 -12
- dbos/_tracer.py +9 -2
- dbos/_workflow_commands.py +0 -15
- dbos/cli/cli.py +8 -18
- dbos/cli/migration.py +28 -1
- {dbos-2.4.0a7.dist-info → dbos-2.6.0a8.dist-info}/METADATA +1 -1
- {dbos-2.4.0a7.dist-info → dbos-2.6.0a8.dist-info}/RECORD +24 -24
- {dbos-2.4.0a7.dist-info → dbos-2.6.0a8.dist-info}/WHEEL +0 -0
- {dbos-2.4.0a7.dist-info → dbos-2.6.0a8.dist-info}/entry_points.txt +0 -0
- {dbos-2.4.0a7.dist-info → dbos-2.6.0a8.dist-info}/licenses/LICENSE +0 -0
dbos/_sys_db.py
CHANGED
|
@@ -158,6 +158,16 @@ class WorkflowStatusInternal(TypedDict):
|
|
|
158
158
|
forked_from: Optional[str]
|
|
159
159
|
|
|
160
160
|
|
|
161
|
+
class MetricData(TypedDict):
|
|
162
|
+
"""
|
|
163
|
+
Metrics data for workflows and steps within a time range.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
metric_type: str # Type of metric: "workflow" or "step"
|
|
167
|
+
metric_name: str # Name of the workflow or step
|
|
168
|
+
value: int # Number of times the operation ran in the time interval
|
|
169
|
+
|
|
170
|
+
|
|
161
171
|
class EnqueueOptionsInternal(TypedDict):
|
|
162
172
|
# Unique ID for deduplication on a queue
|
|
163
173
|
deduplication_id: Optional[str]
|
|
@@ -170,16 +180,17 @@ class EnqueueOptionsInternal(TypedDict):
|
|
|
170
180
|
|
|
171
181
|
|
|
172
182
|
class RecordedResult(TypedDict):
|
|
173
|
-
output: Optional[str] #
|
|
174
|
-
error: Optional[str] #
|
|
183
|
+
output: Optional[str] # Serialized
|
|
184
|
+
error: Optional[str] # Serialized
|
|
185
|
+
child_workflow_id: Optional[str]
|
|
175
186
|
|
|
176
187
|
|
|
177
188
|
class OperationResultInternal(TypedDict):
|
|
178
189
|
workflow_uuid: str
|
|
179
190
|
function_id: int
|
|
180
191
|
function_name: str
|
|
181
|
-
output: Optional[str] #
|
|
182
|
-
error: Optional[str] #
|
|
192
|
+
output: Optional[str] # Serialized
|
|
193
|
+
error: Optional[str] # Serialized
|
|
183
194
|
started_at_epoch_ms: int
|
|
184
195
|
|
|
185
196
|
|
|
@@ -394,6 +405,26 @@ class SystemDatabase(ABC):
|
|
|
394
405
|
import sqlalchemy.dialects.postgresql as pg
|
|
395
406
|
import sqlalchemy.dialects.sqlite as sq
|
|
396
407
|
|
|
408
|
+
# Log system database connection information
|
|
409
|
+
if engine:
|
|
410
|
+
dbos_logger.info("Initializing DBOS system database with custom engine")
|
|
411
|
+
else:
|
|
412
|
+
printable_sys_db_url = sa.make_url(system_database_url).render_as_string(
|
|
413
|
+
hide_password=True
|
|
414
|
+
)
|
|
415
|
+
dbos_logger.info(
|
|
416
|
+
f"Initializing DBOS system database with URL: {printable_sys_db_url}"
|
|
417
|
+
)
|
|
418
|
+
if system_database_url.startswith("sqlite"):
|
|
419
|
+
dbos_logger.info(
|
|
420
|
+
f"Using SQLite as a system database. The SQLite system database is for development and testing. PostgreSQL is recommended for production use."
|
|
421
|
+
)
|
|
422
|
+
else:
|
|
423
|
+
dbos_logger.info(
|
|
424
|
+
f"DBOS system database engine parameters: {engine_kwargs}"
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# Configure and initialize the system database
|
|
397
428
|
self.dialect = sq if system_database_url.startswith("sqlite") else pg
|
|
398
429
|
|
|
399
430
|
self.serializer = serializer
|
|
@@ -700,11 +731,7 @@ class SystemDatabase(ABC):
|
|
|
700
731
|
name=status["name"],
|
|
701
732
|
class_name=status["class_name"],
|
|
702
733
|
config_name=status["config_name"],
|
|
703
|
-
application_version=
|
|
704
|
-
application_version
|
|
705
|
-
if application_version is not None
|
|
706
|
-
else status["app_version"]
|
|
707
|
-
),
|
|
734
|
+
application_version=application_version,
|
|
708
735
|
application_id=status["app_id"],
|
|
709
736
|
authenticated_user=status["authenticated_user"],
|
|
710
737
|
authenticated_roles=status["authenticated_roles"],
|
|
@@ -716,34 +743,124 @@ class SystemDatabase(ABC):
|
|
|
716
743
|
)
|
|
717
744
|
|
|
718
745
|
if start_step > 1:
|
|
746
|
+
# Copy the original workflow's step checkpoints
|
|
747
|
+
c.execute(
|
|
748
|
+
sa.insert(SystemSchema.operation_outputs).from_select(
|
|
749
|
+
[
|
|
750
|
+
"workflow_uuid",
|
|
751
|
+
"function_id",
|
|
752
|
+
"output",
|
|
753
|
+
"error",
|
|
754
|
+
"function_name",
|
|
755
|
+
"child_workflow_id",
|
|
756
|
+
"started_at_epoch_ms",
|
|
757
|
+
"completed_at_epoch_ms",
|
|
758
|
+
],
|
|
759
|
+
sa.select(
|
|
760
|
+
sa.literal(forked_workflow_id).label("workflow_uuid"),
|
|
761
|
+
SystemSchema.operation_outputs.c.function_id,
|
|
762
|
+
SystemSchema.operation_outputs.c.output,
|
|
763
|
+
SystemSchema.operation_outputs.c.error,
|
|
764
|
+
SystemSchema.operation_outputs.c.function_name,
|
|
765
|
+
SystemSchema.operation_outputs.c.child_workflow_id,
|
|
766
|
+
SystemSchema.operation_outputs.c.started_at_epoch_ms,
|
|
767
|
+
SystemSchema.operation_outputs.c.completed_at_epoch_ms,
|
|
768
|
+
).where(
|
|
769
|
+
(
|
|
770
|
+
SystemSchema.operation_outputs.c.workflow_uuid
|
|
771
|
+
== original_workflow_id
|
|
772
|
+
)
|
|
773
|
+
& (
|
|
774
|
+
SystemSchema.operation_outputs.c.function_id
|
|
775
|
+
< start_step
|
|
776
|
+
)
|
|
777
|
+
),
|
|
778
|
+
)
|
|
779
|
+
)
|
|
780
|
+
# Copy the original workflow's events
|
|
781
|
+
c.execute(
|
|
782
|
+
sa.insert(SystemSchema.workflow_events_history).from_select(
|
|
783
|
+
[
|
|
784
|
+
"workflow_uuid",
|
|
785
|
+
"function_id",
|
|
786
|
+
"key",
|
|
787
|
+
"value",
|
|
788
|
+
],
|
|
789
|
+
sa.select(
|
|
790
|
+
sa.literal(forked_workflow_id).label("workflow_uuid"),
|
|
791
|
+
SystemSchema.workflow_events_history.c.function_id,
|
|
792
|
+
SystemSchema.workflow_events_history.c.key,
|
|
793
|
+
SystemSchema.workflow_events_history.c.value,
|
|
794
|
+
).where(
|
|
795
|
+
(
|
|
796
|
+
SystemSchema.workflow_events_history.c.workflow_uuid
|
|
797
|
+
== original_workflow_id
|
|
798
|
+
)
|
|
799
|
+
& (
|
|
800
|
+
SystemSchema.workflow_events_history.c.function_id
|
|
801
|
+
< start_step
|
|
802
|
+
)
|
|
803
|
+
),
|
|
804
|
+
)
|
|
805
|
+
)
|
|
806
|
+
# Copy only the latest version of each workflow event from the history table
|
|
807
|
+
# (the one with the maximum function_id for each key where function_id < start_step)
|
|
808
|
+
weh1 = SystemSchema.workflow_events_history.alias("weh1")
|
|
809
|
+
weh2 = SystemSchema.workflow_events_history.alias("weh2")
|
|
719
810
|
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
)
|
|
742
|
-
|
|
743
|
-
|
|
811
|
+
max_function_id_subquery = (
|
|
812
|
+
sa.select(sa.func.max(weh2.c.function_id))
|
|
813
|
+
.where(
|
|
814
|
+
(weh2.c.workflow_uuid == original_workflow_id)
|
|
815
|
+
& (weh2.c.key == weh1.c.key)
|
|
816
|
+
& (weh2.c.function_id < start_step)
|
|
817
|
+
)
|
|
818
|
+
.scalar_subquery()
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
c.execute(
|
|
822
|
+
sa.insert(SystemSchema.workflow_events).from_select(
|
|
823
|
+
[
|
|
824
|
+
"workflow_uuid",
|
|
825
|
+
"key",
|
|
826
|
+
"value",
|
|
827
|
+
],
|
|
828
|
+
sa.select(
|
|
829
|
+
sa.literal(forked_workflow_id).label("workflow_uuid"),
|
|
830
|
+
weh1.c.key,
|
|
831
|
+
weh1.c.value,
|
|
832
|
+
).where(
|
|
833
|
+
(weh1.c.workflow_uuid == original_workflow_id)
|
|
834
|
+
& (weh1.c.function_id == max_function_id_subquery)
|
|
835
|
+
),
|
|
836
|
+
)
|
|
837
|
+
)
|
|
838
|
+
# Copy the original workflow's streams
|
|
839
|
+
c.execute(
|
|
840
|
+
sa.insert(SystemSchema.streams).from_select(
|
|
841
|
+
[
|
|
842
|
+
"workflow_uuid",
|
|
843
|
+
"function_id",
|
|
844
|
+
"key",
|
|
845
|
+
"value",
|
|
846
|
+
"offset",
|
|
847
|
+
],
|
|
848
|
+
sa.select(
|
|
849
|
+
sa.literal(forked_workflow_id).label("workflow_uuid"),
|
|
850
|
+
SystemSchema.streams.c.function_id,
|
|
851
|
+
SystemSchema.streams.c.key,
|
|
852
|
+
SystemSchema.streams.c.value,
|
|
853
|
+
SystemSchema.streams.c.offset,
|
|
854
|
+
).where(
|
|
855
|
+
(
|
|
856
|
+
SystemSchema.streams.c.workflow_uuid
|
|
857
|
+
== original_workflow_id
|
|
858
|
+
)
|
|
859
|
+
& (SystemSchema.streams.c.function_id < start_step)
|
|
860
|
+
),
|
|
861
|
+
)
|
|
744
862
|
)
|
|
745
863
|
|
|
746
|
-
c.execute(insert_stmt)
|
|
747
864
|
return forked_workflow_id
|
|
748
865
|
|
|
749
866
|
@db_retry()
|
|
@@ -834,7 +951,7 @@ class SystemDatabase(ABC):
|
|
|
834
951
|
return workflow_id
|
|
835
952
|
|
|
836
953
|
@db_retry()
|
|
837
|
-
def await_workflow_result(self, workflow_id: str) -> Any:
|
|
954
|
+
def await_workflow_result(self, workflow_id: str, polling_interval: float) -> Any:
|
|
838
955
|
while True:
|
|
839
956
|
with self.engine.begin() as c:
|
|
840
957
|
row = c.execute(
|
|
@@ -859,7 +976,7 @@ class SystemDatabase(ABC):
|
|
|
859
976
|
raise DBOSAwaitedWorkflowCancelledError(workflow_id)
|
|
860
977
|
else:
|
|
861
978
|
pass # CB: I guess we're assuming the WF will show up eventually.
|
|
862
|
-
time.sleep(
|
|
979
|
+
time.sleep(polling_interval)
|
|
863
980
|
|
|
864
981
|
def get_workflows(
|
|
865
982
|
self,
|
|
@@ -902,11 +1019,12 @@ class SystemDatabase(ABC):
|
|
|
902
1019
|
|
|
903
1020
|
if input.queues_only:
|
|
904
1021
|
query = sa.select(*load_columns).where(
|
|
905
|
-
|
|
906
|
-
SystemSchema.workflow_status.c.queue_name.isnot(None),
|
|
907
|
-
SystemSchema.workflow_status.c.status.in_(["ENQUEUED", "PENDING"]),
|
|
908
|
-
)
|
|
1022
|
+
SystemSchema.workflow_status.c.queue_name.isnot(None),
|
|
909
1023
|
)
|
|
1024
|
+
if not input.status:
|
|
1025
|
+
query = query.where(
|
|
1026
|
+
SystemSchema.workflow_status.c.status.in_(["ENQUEUED", "PENDING"])
|
|
1027
|
+
)
|
|
910
1028
|
else:
|
|
911
1029
|
query = sa.select(*load_columns)
|
|
912
1030
|
if input.sort_desc:
|
|
@@ -1033,7 +1151,7 @@ class SystemDatabase(ABC):
|
|
|
1033
1151
|
for row in rows
|
|
1034
1152
|
]
|
|
1035
1153
|
|
|
1036
|
-
def
|
|
1154
|
+
def list_workflow_steps(self, workflow_id: str) -> List[StepInfo]:
|
|
1037
1155
|
with self.engine.begin() as c:
|
|
1038
1156
|
rows = c.execute(
|
|
1039
1157
|
sa.select(
|
|
@@ -1044,7 +1162,9 @@ class SystemDatabase(ABC):
|
|
|
1044
1162
|
SystemSchema.operation_outputs.c.child_workflow_id,
|
|
1045
1163
|
SystemSchema.operation_outputs.c.started_at_epoch_ms,
|
|
1046
1164
|
SystemSchema.operation_outputs.c.completed_at_epoch_ms,
|
|
1047
|
-
)
|
|
1165
|
+
)
|
|
1166
|
+
.where(SystemSchema.operation_outputs.c.workflow_uuid == workflow_id)
|
|
1167
|
+
.order_by(SystemSchema.operation_outputs.c.function_id)
|
|
1048
1168
|
).fetchall()
|
|
1049
1169
|
steps = []
|
|
1050
1170
|
for row in rows:
|
|
@@ -1075,6 +1195,9 @@ class SystemDatabase(ABC):
|
|
|
1075
1195
|
error = result["error"]
|
|
1076
1196
|
output = result["output"]
|
|
1077
1197
|
assert error is None or output is None, "Only one of error or output can be set"
|
|
1198
|
+
|
|
1199
|
+
# Check if the executor ID belong to another process.
|
|
1200
|
+
# Reset it to this process's executor ID if so.
|
|
1078
1201
|
wf_executor_id_row = conn.execute(
|
|
1079
1202
|
sa.select(
|
|
1080
1203
|
SystemSchema.workflow_status.c.executor_id,
|
|
@@ -1096,17 +1219,20 @@ class SystemDatabase(ABC):
|
|
|
1096
1219
|
== result["workflow_uuid"]
|
|
1097
1220
|
)
|
|
1098
1221
|
)
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
function_id=result["function_id"],
|
|
1102
|
-
function_name=result["function_name"],
|
|
1103
|
-
started_at_epoch_ms=result["started_at_epoch_ms"],
|
|
1104
|
-
completed_at_epoch_ms=int(time.time() * 1000),
|
|
1105
|
-
output=output,
|
|
1106
|
-
error=error,
|
|
1107
|
-
)
|
|
1222
|
+
|
|
1223
|
+
# Record the outcome, throwing DBOSWorkflowConflictIDError if it is already present
|
|
1108
1224
|
try:
|
|
1109
|
-
conn.execute(
|
|
1225
|
+
conn.execute(
|
|
1226
|
+
sa.insert(SystemSchema.operation_outputs).values(
|
|
1227
|
+
workflow_uuid=result["workflow_uuid"],
|
|
1228
|
+
function_id=result["function_id"],
|
|
1229
|
+
function_name=result["function_name"],
|
|
1230
|
+
started_at_epoch_ms=result["started_at_epoch_ms"],
|
|
1231
|
+
completed_at_epoch_ms=int(time.time() * 1000),
|
|
1232
|
+
output=output,
|
|
1233
|
+
error=error,
|
|
1234
|
+
)
|
|
1235
|
+
)
|
|
1110
1236
|
except DBAPIError as dbapi_error:
|
|
1111
1237
|
if self._is_unique_constraint_violation(dbapi_error):
|
|
1112
1238
|
raise DBOSWorkflowConflictIDError(result["workflow_uuid"])
|
|
@@ -1195,6 +1321,7 @@ class SystemDatabase(ABC):
|
|
|
1195
1321
|
SystemSchema.operation_outputs.c.output,
|
|
1196
1322
|
SystemSchema.operation_outputs.c.error,
|
|
1197
1323
|
SystemSchema.operation_outputs.c.function_name,
|
|
1324
|
+
SystemSchema.operation_outputs.c.child_workflow_id,
|
|
1198
1325
|
).where(
|
|
1199
1326
|
(SystemSchema.operation_outputs.c.workflow_uuid == workflow_id)
|
|
1200
1327
|
& (SystemSchema.operation_outputs.c.function_id == function_id)
|
|
@@ -1223,10 +1350,11 @@ class SystemDatabase(ABC):
|
|
|
1223
1350
|
return None
|
|
1224
1351
|
|
|
1225
1352
|
# Extract operation output data
|
|
1226
|
-
output, error, recorded_function_name = (
|
|
1353
|
+
output, error, recorded_function_name, child_workflow_id = (
|
|
1227
1354
|
operation_output_rows[0][0],
|
|
1228
1355
|
operation_output_rows[0][1],
|
|
1229
1356
|
operation_output_rows[0][2],
|
|
1357
|
+
operation_output_rows[0][3],
|
|
1230
1358
|
)
|
|
1231
1359
|
|
|
1232
1360
|
# If the provided and recorded function name are different, throw an exception
|
|
@@ -1241,6 +1369,7 @@ class SystemDatabase(ABC):
|
|
|
1241
1369
|
result: RecordedResult = {
|
|
1242
1370
|
"output": output,
|
|
1243
1371
|
"error": error,
|
|
1372
|
+
"child_workflow_id": child_workflow_id,
|
|
1244
1373
|
}
|
|
1245
1374
|
return result
|
|
1246
1375
|
|
|
@@ -1253,31 +1382,6 @@ class SystemDatabase(ABC):
|
|
|
1253
1382
|
workflow_id, function_id, function_name, c
|
|
1254
1383
|
)
|
|
1255
1384
|
|
|
1256
|
-
@db_retry()
|
|
1257
|
-
def check_child_workflow(
|
|
1258
|
-
self, workflow_uuid: str, function_id: int
|
|
1259
|
-
) -> Optional[str]:
|
|
1260
|
-
sql = sa.select(
|
|
1261
|
-
SystemSchema.operation_outputs.c.child_workflow_id,
|
|
1262
|
-
SystemSchema.operation_outputs.c.error,
|
|
1263
|
-
).where(
|
|
1264
|
-
SystemSchema.operation_outputs.c.workflow_uuid == workflow_uuid,
|
|
1265
|
-
SystemSchema.operation_outputs.c.function_id == function_id,
|
|
1266
|
-
)
|
|
1267
|
-
|
|
1268
|
-
# If in a transaction, use the provided connection
|
|
1269
|
-
row: Any
|
|
1270
|
-
with self.engine.begin() as c:
|
|
1271
|
-
row = c.execute(sql).fetchone()
|
|
1272
|
-
|
|
1273
|
-
if row is None:
|
|
1274
|
-
return None
|
|
1275
|
-
elif row[1]:
|
|
1276
|
-
e: Exception = self.serializer.deserialize(row[1])
|
|
1277
|
-
raise e
|
|
1278
|
-
else:
|
|
1279
|
-
return str(row[0])
|
|
1280
|
-
|
|
1281
1385
|
@db_retry()
|
|
1282
1386
|
def send(
|
|
1283
1387
|
self,
|
|
@@ -1530,6 +1634,19 @@ class SystemDatabase(ABC):
|
|
|
1530
1634
|
set_={"value": self.serializer.serialize(message)},
|
|
1531
1635
|
)
|
|
1532
1636
|
)
|
|
1637
|
+
c.execute(
|
|
1638
|
+
self.dialect.insert(SystemSchema.workflow_events_history)
|
|
1639
|
+
.values(
|
|
1640
|
+
workflow_uuid=workflow_uuid,
|
|
1641
|
+
function_id=function_id,
|
|
1642
|
+
key=key,
|
|
1643
|
+
value=self.serializer.serialize(message),
|
|
1644
|
+
)
|
|
1645
|
+
.on_conflict_do_update(
|
|
1646
|
+
index_elements=["workflow_uuid", "key", "function_id"],
|
|
1647
|
+
set_={"value": self.serializer.serialize(message)},
|
|
1648
|
+
)
|
|
1649
|
+
)
|
|
1533
1650
|
output: OperationResultInternal = {
|
|
1534
1651
|
"workflow_uuid": workflow_uuid,
|
|
1535
1652
|
"function_id": function_id,
|
|
@@ -1543,6 +1660,7 @@ class SystemDatabase(ABC):
|
|
|
1543
1660
|
def set_event_from_step(
|
|
1544
1661
|
self,
|
|
1545
1662
|
workflow_uuid: str,
|
|
1663
|
+
function_id: int,
|
|
1546
1664
|
key: str,
|
|
1547
1665
|
message: Any,
|
|
1548
1666
|
) -> None:
|
|
@@ -1559,6 +1677,19 @@ class SystemDatabase(ABC):
|
|
|
1559
1677
|
set_={"value": self.serializer.serialize(message)},
|
|
1560
1678
|
)
|
|
1561
1679
|
)
|
|
1680
|
+
c.execute(
|
|
1681
|
+
self.dialect.insert(SystemSchema.workflow_events_history)
|
|
1682
|
+
.values(
|
|
1683
|
+
workflow_uuid=workflow_uuid,
|
|
1684
|
+
function_id=function_id,
|
|
1685
|
+
key=key,
|
|
1686
|
+
value=self.serializer.serialize(message),
|
|
1687
|
+
)
|
|
1688
|
+
.on_conflict_do_update(
|
|
1689
|
+
index_elements=["workflow_uuid", "key", "function_id"],
|
|
1690
|
+
set_={"value": self.serializer.serialize(message)},
|
|
1691
|
+
)
|
|
1692
|
+
)
|
|
1562
1693
|
|
|
1563
1694
|
def get_all_events(self, workflow_id: str) -> Dict[str, Any]:
|
|
1564
1695
|
"""
|
|
@@ -1577,7 +1708,6 @@ class SystemDatabase(ABC):
|
|
|
1577
1708
|
SystemSchema.workflow_events.c.value,
|
|
1578
1709
|
).where(SystemSchema.workflow_events.c.workflow_uuid == workflow_id)
|
|
1579
1710
|
).fetchall()
|
|
1580
|
-
|
|
1581
1711
|
events: Dict[str, Any] = {}
|
|
1582
1712
|
for row in rows:
|
|
1583
1713
|
key = row[0]
|
|
@@ -1732,10 +1862,6 @@ class SystemDatabase(ABC):
|
|
|
1732
1862
|
sa.select(sa.func.count())
|
|
1733
1863
|
.select_from(SystemSchema.workflow_status)
|
|
1734
1864
|
.where(SystemSchema.workflow_status.c.queue_name == queue.name)
|
|
1735
|
-
.where(
|
|
1736
|
-
SystemSchema.workflow_status.c.queue_partition_key
|
|
1737
|
-
== queue_partition_key
|
|
1738
|
-
)
|
|
1739
1865
|
.where(
|
|
1740
1866
|
SystemSchema.workflow_status.c.status
|
|
1741
1867
|
!= WorkflowStatusString.ENQUEUED.value
|
|
@@ -1745,6 +1871,11 @@ class SystemDatabase(ABC):
|
|
|
1745
1871
|
> start_time_ms - limiter_period_ms
|
|
1746
1872
|
)
|
|
1747
1873
|
)
|
|
1874
|
+
if queue_partition_key is not None:
|
|
1875
|
+
query = query.where(
|
|
1876
|
+
SystemSchema.workflow_status.c.queue_partition_key
|
|
1877
|
+
== queue_partition_key
|
|
1878
|
+
)
|
|
1748
1879
|
num_recent_queries = c.execute(query).fetchone()[0] # type: ignore
|
|
1749
1880
|
if num_recent_queries >= queue.limiter["limit"]:
|
|
1750
1881
|
return []
|
|
@@ -1760,16 +1891,17 @@ class SystemDatabase(ABC):
|
|
|
1760
1891
|
)
|
|
1761
1892
|
.select_from(SystemSchema.workflow_status)
|
|
1762
1893
|
.where(SystemSchema.workflow_status.c.queue_name == queue.name)
|
|
1763
|
-
.where(
|
|
1764
|
-
SystemSchema.workflow_status.c.queue_partition_key
|
|
1765
|
-
== queue_partition_key
|
|
1766
|
-
)
|
|
1767
1894
|
.where(
|
|
1768
1895
|
SystemSchema.workflow_status.c.status
|
|
1769
1896
|
== WorkflowStatusString.PENDING.value
|
|
1770
1897
|
)
|
|
1771
1898
|
.group_by(SystemSchema.workflow_status.c.executor_id)
|
|
1772
1899
|
)
|
|
1900
|
+
if queue_partition_key is not None:
|
|
1901
|
+
pending_tasks_query = pending_tasks_query.where(
|
|
1902
|
+
SystemSchema.workflow_status.c.queue_partition_key
|
|
1903
|
+
== queue_partition_key
|
|
1904
|
+
)
|
|
1773
1905
|
pending_workflows = c.execute(pending_tasks_query).fetchall()
|
|
1774
1906
|
pending_workflows_dict = {row[0]: row[1] for row in pending_workflows}
|
|
1775
1907
|
local_pending_workflows = pending_workflows_dict.get(executor_id, 0)
|
|
@@ -1805,10 +1937,6 @@ class SystemDatabase(ABC):
|
|
|
1805
1937
|
)
|
|
1806
1938
|
.select_from(SystemSchema.workflow_status)
|
|
1807
1939
|
.where(SystemSchema.workflow_status.c.queue_name == queue.name)
|
|
1808
|
-
.where(
|
|
1809
|
-
SystemSchema.workflow_status.c.queue_partition_key
|
|
1810
|
-
== queue_partition_key
|
|
1811
|
-
)
|
|
1812
1940
|
.where(
|
|
1813
1941
|
SystemSchema.workflow_status.c.status
|
|
1814
1942
|
== WorkflowStatusString.ENQUEUED.value
|
|
@@ -1825,6 +1953,11 @@ class SystemDatabase(ABC):
|
|
|
1825
1953
|
# to ensure all processes have a consistent view of the table.
|
|
1826
1954
|
.with_for_update(skip_locked=skip_locks, nowait=(not skip_locks))
|
|
1827
1955
|
)
|
|
1956
|
+
if queue_partition_key is not None:
|
|
1957
|
+
query = query.where(
|
|
1958
|
+
SystemSchema.workflow_status.c.queue_partition_key
|
|
1959
|
+
== queue_partition_key
|
|
1960
|
+
)
|
|
1828
1961
|
if queue.priority_enabled:
|
|
1829
1962
|
query = query.order_by(
|
|
1830
1963
|
SystemSchema.workflow_status.c.priority.asc(),
|
|
@@ -1968,7 +2101,9 @@ class SystemDatabase(ABC):
|
|
|
1968
2101
|
dbos_logger.error(f"Error connecting to the DBOS system database: {e}")
|
|
1969
2102
|
raise
|
|
1970
2103
|
|
|
1971
|
-
def write_stream_from_step(
|
|
2104
|
+
def write_stream_from_step(
|
|
2105
|
+
self, workflow_uuid: str, function_id: int, key: str, value: Any
|
|
2106
|
+
) -> None:
|
|
1972
2107
|
"""
|
|
1973
2108
|
Write a key-value pair to the stream at the first unused offset.
|
|
1974
2109
|
"""
|
|
@@ -1998,6 +2133,7 @@ class SystemDatabase(ABC):
|
|
|
1998
2133
|
c.execute(
|
|
1999
2134
|
sa.insert(SystemSchema.streams).values(
|
|
2000
2135
|
workflow_uuid=workflow_uuid,
|
|
2136
|
+
function_id=function_id,
|
|
2001
2137
|
key=key,
|
|
2002
2138
|
value=serialized_value,
|
|
2003
2139
|
offset=next_offset,
|
|
@@ -2054,6 +2190,7 @@ class SystemDatabase(ABC):
|
|
|
2054
2190
|
c.execute(
|
|
2055
2191
|
sa.insert(SystemSchema.streams).values(
|
|
2056
2192
|
workflow_uuid=workflow_uuid,
|
|
2193
|
+
function_id=function_id,
|
|
2057
2194
|
key=key,
|
|
2058
2195
|
value=serialized_value,
|
|
2059
2196
|
offset=next_offset,
|
|
@@ -2151,3 +2288,76 @@ class SystemDatabase(ABC):
|
|
|
2151
2288
|
return cutoff_epoch_timestamp_ms, [
|
|
2152
2289
|
row[0] for row in pending_enqueued_result
|
|
2153
2290
|
]
|
|
2291
|
+
|
|
2292
|
+
def get_metrics(self, start_time: str, end_time: str) -> List[MetricData]:
|
|
2293
|
+
"""
|
|
2294
|
+
Retrieve the number of workflows and steps that ran in a time range.
|
|
2295
|
+
|
|
2296
|
+
Args:
|
|
2297
|
+
start_time: ISO 8601 formatted start time
|
|
2298
|
+
end_time: ISO 8601 formatted end time
|
|
2299
|
+
"""
|
|
2300
|
+
# Convert ISO 8601 times to epoch milliseconds
|
|
2301
|
+
start_epoch_ms = int(
|
|
2302
|
+
datetime.datetime.fromisoformat(start_time).timestamp() * 1000
|
|
2303
|
+
)
|
|
2304
|
+
end_epoch_ms = int(datetime.datetime.fromisoformat(end_time).timestamp() * 1000)
|
|
2305
|
+
|
|
2306
|
+
metrics: List[MetricData] = []
|
|
2307
|
+
|
|
2308
|
+
with self.engine.begin() as c:
|
|
2309
|
+
# Query workflow metrics
|
|
2310
|
+
workflow_query = (
|
|
2311
|
+
sa.select(
|
|
2312
|
+
SystemSchema.workflow_status.c.name,
|
|
2313
|
+
func.count(SystemSchema.workflow_status.c.workflow_uuid).label(
|
|
2314
|
+
"count"
|
|
2315
|
+
),
|
|
2316
|
+
)
|
|
2317
|
+
.where(
|
|
2318
|
+
sa.and_(
|
|
2319
|
+
SystemSchema.workflow_status.c.created_at >= start_epoch_ms,
|
|
2320
|
+
SystemSchema.workflow_status.c.created_at < end_epoch_ms,
|
|
2321
|
+
)
|
|
2322
|
+
)
|
|
2323
|
+
.group_by(SystemSchema.workflow_status.c.name)
|
|
2324
|
+
)
|
|
2325
|
+
|
|
2326
|
+
workflow_results = c.execute(workflow_query).fetchall()
|
|
2327
|
+
for row in workflow_results:
|
|
2328
|
+
metrics.append(
|
|
2329
|
+
MetricData(
|
|
2330
|
+
metric_type="workflow_count",
|
|
2331
|
+
metric_name=row[0],
|
|
2332
|
+
value=row[1],
|
|
2333
|
+
)
|
|
2334
|
+
)
|
|
2335
|
+
|
|
2336
|
+
# Query step metrics
|
|
2337
|
+
step_query = (
|
|
2338
|
+
sa.select(
|
|
2339
|
+
SystemSchema.operation_outputs.c.function_name,
|
|
2340
|
+
func.count().label("count"),
|
|
2341
|
+
)
|
|
2342
|
+
.where(
|
|
2343
|
+
sa.and_(
|
|
2344
|
+
SystemSchema.operation_outputs.c.started_at_epoch_ms
|
|
2345
|
+
>= start_epoch_ms,
|
|
2346
|
+
SystemSchema.operation_outputs.c.started_at_epoch_ms
|
|
2347
|
+
< end_epoch_ms,
|
|
2348
|
+
)
|
|
2349
|
+
)
|
|
2350
|
+
.group_by(SystemSchema.operation_outputs.c.function_name)
|
|
2351
|
+
)
|
|
2352
|
+
|
|
2353
|
+
step_results = c.execute(step_query).fetchall()
|
|
2354
|
+
for row in step_results:
|
|
2355
|
+
metrics.append(
|
|
2356
|
+
MetricData(
|
|
2357
|
+
metric_type="step_count",
|
|
2358
|
+
metric_name=row[0],
|
|
2359
|
+
value=row[1],
|
|
2360
|
+
)
|
|
2361
|
+
)
|
|
2362
|
+
|
|
2363
|
+
return metrics
|
dbos/_sys_db_postgres.py
CHANGED
|
@@ -31,18 +31,24 @@ class PostgresSystemDatabase(SystemDatabase):
|
|
|
31
31
|
sysdb_name = system_db_url.database
|
|
32
32
|
# Unless we were provided an engine, if the system database does not already exist, create it
|
|
33
33
|
if self.created_engine:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
34
|
+
try:
|
|
35
|
+
engine = sa.create_engine(
|
|
36
|
+
system_db_url.set(database="postgres"), **self._engine_kwargs
|
|
37
|
+
)
|
|
38
|
+
with engine.connect() as conn:
|
|
39
|
+
conn.execution_options(isolation_level="AUTOCOMMIT")
|
|
40
|
+
if not conn.execute(
|
|
41
|
+
sa.text("SELECT 1 FROM pg_database WHERE datname=:db_name"),
|
|
42
|
+
parameters={"db_name": sysdb_name},
|
|
43
|
+
).scalar():
|
|
44
|
+
dbos_logger.info(f"Creating system database {sysdb_name}")
|
|
45
|
+
conn.execute(sa.text(f'CREATE DATABASE "{sysdb_name}"'))
|
|
46
|
+
except Exception:
|
|
47
|
+
dbos_logger.warning(
|
|
48
|
+
f"Could not connect to postgres database to verify existence of {sysdb_name}. Continuing..."
|
|
49
|
+
)
|
|
50
|
+
finally:
|
|
51
|
+
engine.dispose()
|
|
46
52
|
else:
|
|
47
53
|
# If we were provided an engine, validate it can connect
|
|
48
54
|
with self.engine.connect() as conn:
|
dbos/_tracer.py
CHANGED
|
@@ -8,6 +8,7 @@ if TYPE_CHECKING:
|
|
|
8
8
|
from dbos._utils import GlobalParams
|
|
9
9
|
|
|
10
10
|
from ._dbos_config import ConfigFile
|
|
11
|
+
from ._logger import dbos_logger
|
|
11
12
|
|
|
12
13
|
if TYPE_CHECKING:
|
|
13
14
|
from ._context import TracedAttributes
|
|
@@ -46,7 +47,8 @@ class DBOSTracer:
|
|
|
46
47
|
|
|
47
48
|
# Only set up OTLP provider and exporter if endpoints are provided
|
|
48
49
|
if otlp_traces_endpoints is not None and len(otlp_traces_endpoints) > 0:
|
|
49
|
-
if
|
|
50
|
+
if isinstance(tracer_provider, trace.ProxyTracerProvider):
|
|
51
|
+
# Set a real TracerProvider if it was previously a ProxyTracerProvider
|
|
50
52
|
resource = Resource(
|
|
51
53
|
attributes={
|
|
52
54
|
SERVICE_NAME: config["name"],
|
|
@@ -61,7 +63,12 @@ class DBOSTracer:
|
|
|
61
63
|
|
|
62
64
|
for e in otlp_traces_endpoints:
|
|
63
65
|
processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=e))
|
|
64
|
-
tracer_provider.add_span_processor(processor)
|
|
66
|
+
tracer_provider.add_span_processor(processor) # type: ignore
|
|
67
|
+
|
|
68
|
+
if isinstance(tracer_provider, trace.ProxyTracerProvider):
|
|
69
|
+
dbos_logger.warning(
|
|
70
|
+
"OTLP is enabled but tracer provider not set, skipping trace exporter setup."
|
|
71
|
+
)
|
|
65
72
|
|
|
66
73
|
def set_provider(self, provider: "Optional[TracerProvider]") -> None:
|
|
67
74
|
self.provider = provider
|