dbos 1.2.0a9__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbos/_client.py +15 -7
- dbos/_core.py +28 -22
- dbos/_dbos.py +11 -6
- dbos/_dbos_config.py +1 -1
- dbos/_event_loop.py +7 -10
- dbos/_logger.py +17 -11
- dbos/_migrations/versions/66478e1b95e5_consolidate_queues.py +71 -0
- dbos/_migrations/versions/d994145b47b6_consolidate_inputs.py +30 -0
- dbos/_queue.py +15 -2
- dbos/_schemas/system_database.py +10 -67
- dbos/_sys_db.py +157 -349
- {dbos-1.2.0a9.dist-info → dbos-1.3.0.dist-info}/METADATA +1 -1
- {dbos-1.2.0a9.dist-info → dbos-1.3.0.dist-info}/RECORD +16 -14
- {dbos-1.2.0a9.dist-info → dbos-1.3.0.dist-info}/WHEEL +0 -0
- {dbos-1.2.0a9.dist-info → dbos-1.3.0.dist-info}/entry_points.txt +0 -0
- {dbos-1.2.0a9.dist-info → dbos-1.3.0.dist-info}/licenses/LICENSE +0 -0
dbos/_sys_db.py
CHANGED
@@ -136,13 +136,19 @@ class WorkflowStatusInternal(TypedDict):
|
|
136
136
|
# The deadline of a workflow, computed by adding its timeout to its start time.
|
137
137
|
# Deadlines propagate to children. When the deadline is reached, the workflow is cancelled.
|
138
138
|
workflow_deadline_epoch_ms: Optional[int]
|
139
|
+
# Unique ID for deduplication on a queue
|
140
|
+
deduplication_id: Optional[str]
|
141
|
+
# Priority of the workflow on the queue, starting from 1 ~ 2,147,483,647. Default 0 (highest priority).
|
142
|
+
priority: int
|
143
|
+
# Serialized workflow inputs
|
144
|
+
inputs: str
|
139
145
|
|
140
146
|
|
141
147
|
class EnqueueOptionsInternal(TypedDict):
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
]
|
148
|
+
# Unique ID for deduplication on a queue
|
149
|
+
deduplication_id: Optional[str]
|
150
|
+
# Priority of the workflow on the queue, starting from 1 ~ 2,147,483,647. Default 0 (highest priority).
|
151
|
+
priority: Optional[int]
|
146
152
|
|
147
153
|
|
148
154
|
class RecordedResult(TypedDict):
|
@@ -456,6 +462,9 @@ class SystemDatabase:
|
|
456
462
|
),
|
457
463
|
workflow_timeout_ms=status["workflow_timeout_ms"],
|
458
464
|
workflow_deadline_epoch_ms=status["workflow_deadline_epoch_ms"],
|
465
|
+
deduplication_id=status["deduplication_id"],
|
466
|
+
priority=status["priority"],
|
467
|
+
inputs=status["inputs"],
|
459
468
|
)
|
460
469
|
.on_conflict_do_update(
|
461
470
|
index_elements=["workflow_uuid"],
|
@@ -465,7 +474,20 @@ class SystemDatabase:
|
|
465
474
|
|
466
475
|
cmd = cmd.returning(SystemSchema.workflow_status.c.recovery_attempts, SystemSchema.workflow_status.c.status, SystemSchema.workflow_status.c.workflow_deadline_epoch_ms, SystemSchema.workflow_status.c.name, SystemSchema.workflow_status.c.class_name, SystemSchema.workflow_status.c.config_name, SystemSchema.workflow_status.c.queue_name) # type: ignore
|
467
476
|
|
468
|
-
|
477
|
+
try:
|
478
|
+
results = conn.execute(cmd)
|
479
|
+
except DBAPIError as dbapi_error:
|
480
|
+
# Unique constraint violation for the deduplication ID
|
481
|
+
if dbapi_error.orig.sqlstate == "23505": # type: ignore
|
482
|
+
assert status["deduplication_id"] is not None
|
483
|
+
assert status["queue_name"] is not None
|
484
|
+
raise DBOSQueueDeduplicatedError(
|
485
|
+
status["workflow_uuid"],
|
486
|
+
status["queue_name"],
|
487
|
+
status["deduplication_id"],
|
488
|
+
)
|
489
|
+
else:
|
490
|
+
raise
|
469
491
|
row = results.fetchone()
|
470
492
|
if row is not None:
|
471
493
|
# Check the started workflow matches the expected name, class_name, config_name, and queue_name
|
@@ -495,12 +517,6 @@ class SystemDatabase:
|
|
495
517
|
and max_recovery_attempts is not None
|
496
518
|
and recovery_attempts > max_recovery_attempts + 1
|
497
519
|
):
|
498
|
-
delete_cmd = sa.delete(SystemSchema.workflow_queue).where(
|
499
|
-
SystemSchema.workflow_queue.c.workflow_uuid
|
500
|
-
== status["workflow_uuid"]
|
501
|
-
)
|
502
|
-
conn.execute(delete_cmd)
|
503
|
-
|
504
520
|
dlq_cmd = (
|
505
521
|
sa.update(SystemSchema.workflow_status)
|
506
522
|
.where(
|
@@ -513,6 +529,8 @@ class SystemDatabase:
|
|
513
529
|
)
|
514
530
|
.values(
|
515
531
|
status=WorkflowStatusString.RETRIES_EXCEEDED.value,
|
532
|
+
deduplication_id=None,
|
533
|
+
started_at_epoch_ms=None,
|
516
534
|
queue_name=None,
|
517
535
|
)
|
518
536
|
)
|
@@ -526,44 +544,28 @@ class SystemDatabase:
|
|
526
544
|
return wf_status, workflow_deadline_epoch_ms
|
527
545
|
|
528
546
|
@db_retry()
|
529
|
-
def
|
547
|
+
def update_workflow_outcome(
|
530
548
|
self,
|
531
|
-
|
549
|
+
workflow_id: str,
|
550
|
+
status: WorkflowStatuses,
|
551
|
+
*,
|
552
|
+
output: Optional[str] = None,
|
553
|
+
error: Optional[str] = None,
|
532
554
|
) -> None:
|
533
555
|
if self._debug_mode:
|
534
556
|
raise Exception("called update_workflow_status in debug mode")
|
535
|
-
wf_status: WorkflowStatuses = status["status"]
|
536
557
|
with self.engine.begin() as c:
|
537
558
|
c.execute(
|
538
|
-
|
559
|
+
sa.update(SystemSchema.workflow_status)
|
539
560
|
.values(
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
error=status["error"],
|
547
|
-
executor_id=status["executor_id"],
|
548
|
-
application_version=status["app_version"],
|
549
|
-
application_id=status["app_id"],
|
550
|
-
authenticated_user=status["authenticated_user"],
|
551
|
-
authenticated_roles=status["authenticated_roles"],
|
552
|
-
assumed_role=status["assumed_role"],
|
553
|
-
queue_name=status["queue_name"],
|
554
|
-
recovery_attempts=(
|
555
|
-
1 if wf_status != WorkflowStatusString.ENQUEUED.value else 0
|
556
|
-
),
|
557
|
-
)
|
558
|
-
.on_conflict_do_update(
|
559
|
-
index_elements=["workflow_uuid"],
|
560
|
-
set_=dict(
|
561
|
-
status=status["status"],
|
562
|
-
output=status["output"],
|
563
|
-
error=status["error"],
|
564
|
-
updated_at=func.extract("epoch", func.now()) * 1000,
|
565
|
-
),
|
561
|
+
status=status,
|
562
|
+
output=output,
|
563
|
+
error=error,
|
564
|
+
# As the workflow is complete, remove its deduplication ID
|
565
|
+
deduplication_id=None,
|
566
|
+
updated_at=func.extract("epoch", func.now()) * 1000,
|
566
567
|
)
|
568
|
+
.where(SystemSchema.workflow_status.c.workflow_uuid == workflow_id)
|
567
569
|
)
|
568
570
|
|
569
571
|
def cancel_workflow(
|
@@ -585,18 +587,15 @@ class SystemDatabase:
|
|
585
587
|
or row[0] == WorkflowStatusString.ERROR.value
|
586
588
|
):
|
587
589
|
return
|
588
|
-
#
|
589
|
-
c.execute(
|
590
|
-
sa.delete(SystemSchema.workflow_queue).where(
|
591
|
-
SystemSchema.workflow_queue.c.workflow_uuid == workflow_id
|
592
|
-
)
|
593
|
-
)
|
594
|
-
# Set the workflow's status to CANCELLED
|
590
|
+
# Set the workflow's status to CANCELLED and remove it from any queue it is on
|
595
591
|
c.execute(
|
596
592
|
sa.update(SystemSchema.workflow_status)
|
597
593
|
.where(SystemSchema.workflow_status.c.workflow_uuid == workflow_id)
|
598
594
|
.values(
|
599
595
|
status=WorkflowStatusString.CANCELLED.value,
|
596
|
+
queue_name=None,
|
597
|
+
deduplication_id=None,
|
598
|
+
started_at_epoch_ms=None,
|
600
599
|
)
|
601
600
|
)
|
602
601
|
|
@@ -620,19 +619,6 @@ class SystemDatabase:
|
|
620
619
|
or status == WorkflowStatusString.ERROR.value
|
621
620
|
):
|
622
621
|
return
|
623
|
-
# Remove the workflow from the queues table so resume can safely be called on an ENQUEUED workflow
|
624
|
-
c.execute(
|
625
|
-
sa.delete(SystemSchema.workflow_queue).where(
|
626
|
-
SystemSchema.workflow_queue.c.workflow_uuid == workflow_id
|
627
|
-
)
|
628
|
-
)
|
629
|
-
# Enqueue the workflow on the internal queue
|
630
|
-
c.execute(
|
631
|
-
pg.insert(SystemSchema.workflow_queue).values(
|
632
|
-
workflow_uuid=workflow_id,
|
633
|
-
queue_name=INTERNAL_QUEUE_NAME,
|
634
|
-
)
|
635
|
-
)
|
636
622
|
# Set the workflow's status to ENQUEUED and clear its recovery attempts and deadline.
|
637
623
|
c.execute(
|
638
624
|
sa.update(SystemSchema.workflow_status)
|
@@ -642,6 +628,8 @@ class SystemDatabase:
|
|
642
628
|
queue_name=INTERNAL_QUEUE_NAME,
|
643
629
|
recovery_attempts=0,
|
644
630
|
workflow_deadline_epoch_ms=None,
|
631
|
+
deduplication_id=None,
|
632
|
+
started_at_epoch_ms=None,
|
645
633
|
)
|
646
634
|
)
|
647
635
|
|
@@ -657,9 +645,6 @@ class SystemDatabase:
|
|
657
645
|
status = self.get_workflow_status(original_workflow_id)
|
658
646
|
if status is None:
|
659
647
|
raise Exception(f"Workflow {original_workflow_id} not found")
|
660
|
-
inputs = self.get_workflow_inputs(original_workflow_id)
|
661
|
-
if inputs is None:
|
662
|
-
raise Exception(f"Workflow {original_workflow_id} not found")
|
663
648
|
|
664
649
|
with self.engine.begin() as c:
|
665
650
|
# Create an entry for the forked workflow with the same
|
@@ -681,13 +666,7 @@ class SystemDatabase:
|
|
681
666
|
authenticated_roles=status["authenticated_roles"],
|
682
667
|
assumed_role=status["assumed_role"],
|
683
668
|
queue_name=INTERNAL_QUEUE_NAME,
|
684
|
-
|
685
|
-
)
|
686
|
-
# Copy the original workflow's inputs into the forked workflow
|
687
|
-
c.execute(
|
688
|
-
pg.insert(SystemSchema.workflow_inputs).values(
|
689
|
-
workflow_uuid=forked_workflow_id,
|
690
|
-
inputs=_serialization.serialize_args(inputs),
|
669
|
+
inputs=status["inputs"],
|
691
670
|
)
|
692
671
|
)
|
693
672
|
|
@@ -720,14 +699,6 @@ class SystemDatabase:
|
|
720
699
|
)
|
721
700
|
|
722
701
|
c.execute(insert_stmt)
|
723
|
-
|
724
|
-
# Enqueue the forked workflow on the internal queue
|
725
|
-
c.execute(
|
726
|
-
pg.insert(SystemSchema.workflow_queue).values(
|
727
|
-
workflow_uuid=forked_workflow_id,
|
728
|
-
queue_name=INTERNAL_QUEUE_NAME,
|
729
|
-
)
|
730
|
-
)
|
731
702
|
return forked_workflow_id
|
732
703
|
|
733
704
|
@db_retry()
|
@@ -753,6 +724,9 @@ class SystemDatabase:
|
|
753
724
|
SystemSchema.workflow_status.c.application_id,
|
754
725
|
SystemSchema.workflow_status.c.workflow_deadline_epoch_ms,
|
755
726
|
SystemSchema.workflow_status.c.workflow_timeout_ms,
|
727
|
+
SystemSchema.workflow_status.c.deduplication_id,
|
728
|
+
SystemSchema.workflow_status.c.priority,
|
729
|
+
SystemSchema.workflow_status.c.inputs,
|
756
730
|
).where(SystemSchema.workflow_status.c.workflow_uuid == workflow_uuid)
|
757
731
|
).fetchone()
|
758
732
|
if row is None:
|
@@ -777,6 +751,9 @@ class SystemDatabase:
|
|
777
751
|
"app_id": row[13],
|
778
752
|
"workflow_deadline_epoch_ms": row[14],
|
779
753
|
"workflow_timeout_ms": row[15],
|
754
|
+
"deduplication_id": row[16],
|
755
|
+
"priority": row[17],
|
756
|
+
"inputs": row[18],
|
780
757
|
}
|
781
758
|
return status
|
782
759
|
|
@@ -807,53 +784,6 @@ class SystemDatabase:
|
|
807
784
|
pass # CB: I guess we're assuming the WF will show up eventually.
|
808
785
|
time.sleep(1)
|
809
786
|
|
810
|
-
def _update_workflow_inputs(
|
811
|
-
self, workflow_uuid: str, inputs: str, conn: sa.Connection
|
812
|
-
) -> None:
|
813
|
-
if self._debug_mode:
|
814
|
-
raise Exception("called update_workflow_inputs in debug mode")
|
815
|
-
|
816
|
-
cmd = (
|
817
|
-
pg.insert(SystemSchema.workflow_inputs)
|
818
|
-
.values(
|
819
|
-
workflow_uuid=workflow_uuid,
|
820
|
-
inputs=inputs,
|
821
|
-
)
|
822
|
-
.on_conflict_do_update(
|
823
|
-
index_elements=["workflow_uuid"],
|
824
|
-
set_=dict(workflow_uuid=SystemSchema.workflow_inputs.c.workflow_uuid),
|
825
|
-
)
|
826
|
-
.returning(SystemSchema.workflow_inputs.c.inputs)
|
827
|
-
)
|
828
|
-
|
829
|
-
row = conn.execute(cmd).fetchone()
|
830
|
-
if row is not None and row[0] != inputs:
|
831
|
-
# In a distributed environment, scheduled workflows are enqueued multiple times with slightly different timestamps
|
832
|
-
if not workflow_uuid.startswith("sched-"):
|
833
|
-
dbos_logger.warning(
|
834
|
-
f"Workflow {workflow_uuid} has been called multiple times with different inputs"
|
835
|
-
)
|
836
|
-
# TODO: actually changing the input
|
837
|
-
|
838
|
-
return
|
839
|
-
|
840
|
-
@db_retry()
|
841
|
-
def get_workflow_inputs(
|
842
|
-
self, workflow_uuid: str
|
843
|
-
) -> Optional[_serialization.WorkflowInputs]:
|
844
|
-
with self.engine.begin() as c:
|
845
|
-
row = c.execute(
|
846
|
-
sa.select(SystemSchema.workflow_inputs.c.inputs).where(
|
847
|
-
SystemSchema.workflow_inputs.c.workflow_uuid == workflow_uuid
|
848
|
-
)
|
849
|
-
).fetchone()
|
850
|
-
if row is None:
|
851
|
-
return None
|
852
|
-
inputs: _serialization.WorkflowInputs = _serialization.deserialize_args(
|
853
|
-
row[0]
|
854
|
-
)
|
855
|
-
return inputs
|
856
|
-
|
857
787
|
def get_workflows(self, input: GetWorkflowsInput) -> List[WorkflowStatus]:
|
858
788
|
"""
|
859
789
|
Retrieve a list of workflows result and inputs based on the input criteria. The result is a list of external-facing workflow status objects.
|
@@ -874,15 +804,11 @@ class SystemDatabase:
|
|
874
804
|
SystemSchema.workflow_status.c.updated_at,
|
875
805
|
SystemSchema.workflow_status.c.application_version,
|
876
806
|
SystemSchema.workflow_status.c.application_id,
|
877
|
-
SystemSchema.
|
807
|
+
SystemSchema.workflow_status.c.inputs,
|
878
808
|
SystemSchema.workflow_status.c.output,
|
879
809
|
SystemSchema.workflow_status.c.error,
|
880
810
|
SystemSchema.workflow_status.c.workflow_deadline_epoch_ms,
|
881
811
|
SystemSchema.workflow_status.c.workflow_timeout_ms,
|
882
|
-
).join(
|
883
|
-
SystemSchema.workflow_inputs,
|
884
|
-
SystemSchema.workflow_status.c.workflow_uuid
|
885
|
-
== SystemSchema.workflow_inputs.c.workflow_uuid,
|
886
812
|
)
|
887
813
|
if input.sort_desc:
|
888
814
|
query = query.order_by(SystemSchema.workflow_status.c.created_at.desc())
|
@@ -988,20 +914,15 @@ class SystemDatabase:
|
|
988
914
|
SystemSchema.workflow_status.c.updated_at,
|
989
915
|
SystemSchema.workflow_status.c.application_version,
|
990
916
|
SystemSchema.workflow_status.c.application_id,
|
991
|
-
SystemSchema.
|
917
|
+
SystemSchema.workflow_status.c.inputs,
|
992
918
|
SystemSchema.workflow_status.c.output,
|
993
919
|
SystemSchema.workflow_status.c.error,
|
994
920
|
SystemSchema.workflow_status.c.workflow_deadline_epoch_ms,
|
995
921
|
SystemSchema.workflow_status.c.workflow_timeout_ms,
|
996
|
-
).
|
997
|
-
|
998
|
-
SystemSchema.workflow_status,
|
999
|
-
SystemSchema.
|
1000
|
-
== SystemSchema.workflow_status.c.workflow_uuid,
|
1001
|
-
).join(
|
1002
|
-
SystemSchema.workflow_inputs,
|
1003
|
-
SystemSchema.workflow_queue.c.workflow_uuid
|
1004
|
-
== SystemSchema.workflow_inputs.c.workflow_uuid,
|
922
|
+
).where(
|
923
|
+
sa.and_(
|
924
|
+
SystemSchema.workflow_status.c.queue_name.isnot(None),
|
925
|
+
SystemSchema.workflow_status.c.status.in_(["ENQUEUED", "PENDING"]),
|
1005
926
|
)
|
1006
927
|
)
|
1007
928
|
if input["sort_desc"]:
|
@@ -1014,7 +935,7 @@ class SystemDatabase:
|
|
1014
935
|
|
1015
936
|
if input.get("queue_name"):
|
1016
937
|
query = query.where(
|
1017
|
-
SystemSchema.
|
938
|
+
SystemSchema.workflow_status.c.queue_name == input["queue_name"]
|
1018
939
|
)
|
1019
940
|
|
1020
941
|
if input.get("status"):
|
@@ -1385,35 +1306,35 @@ class SystemDatabase:
|
|
1385
1306
|
payload = f"{workflow_uuid}::{topic}"
|
1386
1307
|
condition = threading.Condition()
|
1387
1308
|
# Must acquire first before adding to the map. Otherwise, the notification listener may notify it before the condition is acquired and waited.
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1391
|
-
|
1392
|
-
|
1393
|
-
|
1394
|
-
raise DBOSWorkflowConflictIDError(workflow_uuid)
|
1309
|
+
try:
|
1310
|
+
condition.acquire()
|
1311
|
+
success, _ = self.notifications_map.set(payload, condition)
|
1312
|
+
if not success:
|
1313
|
+
# This should not happen, but if it does, it means the workflow is executed concurrently.
|
1314
|
+
raise DBOSWorkflowConflictIDError(workflow_uuid)
|
1395
1315
|
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1399
|
-
|
1400
|
-
|
1401
|
-
|
1402
|
-
|
1403
|
-
|
1404
|
-
|
1405
|
-
|
1406
|
-
|
1316
|
+
# Check if the key is already in the database. If not, wait for the notification.
|
1317
|
+
init_recv: Sequence[Any]
|
1318
|
+
with self.engine.begin() as c:
|
1319
|
+
init_recv = c.execute(
|
1320
|
+
sa.select(
|
1321
|
+
SystemSchema.notifications.c.topic,
|
1322
|
+
).where(
|
1323
|
+
SystemSchema.notifications.c.destination_uuid == workflow_uuid,
|
1324
|
+
SystemSchema.notifications.c.topic == topic,
|
1325
|
+
)
|
1326
|
+
).fetchall()
|
1407
1327
|
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1328
|
+
if len(init_recv) == 0:
|
1329
|
+
# Wait for the notification
|
1330
|
+
# Support OAOO sleep
|
1331
|
+
actual_timeout = self.sleep(
|
1332
|
+
workflow_uuid, timeout_function_id, timeout_seconds, skip_sleep=True
|
1333
|
+
)
|
1334
|
+
condition.wait(timeout=actual_timeout)
|
1335
|
+
finally:
|
1336
|
+
condition.release()
|
1337
|
+
self.notifications_map.pop(payload)
|
1417
1338
|
|
1418
1339
|
# Transactionally consume and return the message if it's in the database, otherwise return null.
|
1419
1340
|
with self.engine.begin() as c:
|
@@ -1693,51 +1614,6 @@ class SystemDatabase:
|
|
1693
1614
|
)
|
1694
1615
|
return value
|
1695
1616
|
|
1696
|
-
def _enqueue(
|
1697
|
-
self,
|
1698
|
-
workflow_id: str,
|
1699
|
-
queue_name: str,
|
1700
|
-
conn: sa.Connection,
|
1701
|
-
*,
|
1702
|
-
enqueue_options: Optional[EnqueueOptionsInternal],
|
1703
|
-
) -> None:
|
1704
|
-
if self._debug_mode:
|
1705
|
-
raise Exception("called enqueue in debug mode")
|
1706
|
-
try:
|
1707
|
-
deduplication_id = (
|
1708
|
-
enqueue_options["deduplication_id"]
|
1709
|
-
if enqueue_options is not None
|
1710
|
-
else None
|
1711
|
-
)
|
1712
|
-
priority = (
|
1713
|
-
enqueue_options["priority"] if enqueue_options is not None else None
|
1714
|
-
)
|
1715
|
-
# Default to 0 (highest priority) if not provided
|
1716
|
-
if priority is None:
|
1717
|
-
priority = 0
|
1718
|
-
query = (
|
1719
|
-
pg.insert(SystemSchema.workflow_queue)
|
1720
|
-
.values(
|
1721
|
-
workflow_uuid=workflow_id,
|
1722
|
-
queue_name=queue_name,
|
1723
|
-
deduplication_id=deduplication_id,
|
1724
|
-
priority=priority,
|
1725
|
-
)
|
1726
|
-
.on_conflict_do_nothing(
|
1727
|
-
index_elements=SystemSchema.workflow_queue.primary_key.columns
|
1728
|
-
)
|
1729
|
-
) # Ignore primary key constraint violation
|
1730
|
-
conn.execute(query)
|
1731
|
-
except DBAPIError as dbapi_error:
|
1732
|
-
# Unique constraint violation for the deduplication ID
|
1733
|
-
if dbapi_error.orig.sqlstate == "23505": # type: ignore
|
1734
|
-
assert (
|
1735
|
-
deduplication_id is not None
|
1736
|
-
), f"deduplication_id should not be None. Workflow ID: {workflow_id}, Queue name: {queue_name}."
|
1737
|
-
raise DBOSQueueDeduplicatedError(
|
1738
|
-
workflow_id, queue_name, deduplication_id
|
1739
|
-
)
|
1740
|
-
|
1741
1617
|
def start_queued_workflows(
|
1742
1618
|
self, queue: "Queue", executor_id: str, app_version: str
|
1743
1619
|
) -> List[str]:
|
@@ -1755,13 +1631,14 @@ class SystemDatabase:
|
|
1755
1631
|
if queue.limiter is not None:
|
1756
1632
|
query = (
|
1757
1633
|
sa.select(sa.func.count())
|
1758
|
-
.select_from(SystemSchema.
|
1759
|
-
.where(SystemSchema.
|
1634
|
+
.select_from(SystemSchema.workflow_status)
|
1635
|
+
.where(SystemSchema.workflow_status.c.queue_name == queue.name)
|
1760
1636
|
.where(
|
1761
|
-
SystemSchema.
|
1637
|
+
SystemSchema.workflow_status.c.status
|
1638
|
+
!= WorkflowStatusString.ENQUEUED.value
|
1762
1639
|
)
|
1763
1640
|
.where(
|
1764
|
-
SystemSchema.
|
1641
|
+
SystemSchema.workflow_status.c.started_at_epoch_ms
|
1765
1642
|
> start_time_ms - limiter_period_ms
|
1766
1643
|
)
|
1767
1644
|
)
|
@@ -1769,64 +1646,57 @@ class SystemDatabase:
|
|
1769
1646
|
if num_recent_queries >= queue.limiter["limit"]:
|
1770
1647
|
return []
|
1771
1648
|
|
1772
|
-
# Count how many workflows on this queue are currently PENDING both locally and globally.
|
1773
|
-
pending_tasks_query = (
|
1774
|
-
sa.select(
|
1775
|
-
SystemSchema.workflow_status.c.executor_id,
|
1776
|
-
sa.func.count().label("task_count"),
|
1777
|
-
)
|
1778
|
-
.select_from(
|
1779
|
-
SystemSchema.workflow_queue.join(
|
1780
|
-
SystemSchema.workflow_status,
|
1781
|
-
SystemSchema.workflow_queue.c.workflow_uuid
|
1782
|
-
== SystemSchema.workflow_status.c.workflow_uuid,
|
1783
|
-
)
|
1784
|
-
)
|
1785
|
-
.where(SystemSchema.workflow_queue.c.queue_name == queue.name)
|
1786
|
-
.where(
|
1787
|
-
SystemSchema.workflow_status.c.status
|
1788
|
-
== WorkflowStatusString.PENDING.value
|
1789
|
-
)
|
1790
|
-
.group_by(SystemSchema.workflow_status.c.executor_id)
|
1791
|
-
)
|
1792
|
-
pending_workflows = c.execute(pending_tasks_query).fetchall()
|
1793
|
-
pending_workflows_dict = {row[0]: row[1] for row in pending_workflows}
|
1794
|
-
local_pending_workflows = pending_workflows_dict.get(executor_id, 0)
|
1795
|
-
|
1796
1649
|
# Compute max_tasks, the number of workflows that can be dequeued given local and global concurrency limits,
|
1797
1650
|
max_tasks = float("inf")
|
1798
|
-
if queue.worker_concurrency is not None:
|
1799
|
-
#
|
1800
|
-
|
1801
|
-
|
1802
|
-
|
1651
|
+
if queue.worker_concurrency is not None or queue.concurrency is not None:
|
1652
|
+
# Count how many workflows on this queue are currently PENDING both locally and globally.
|
1653
|
+
pending_tasks_query = (
|
1654
|
+
sa.select(
|
1655
|
+
SystemSchema.workflow_status.c.executor_id,
|
1656
|
+
sa.func.count().label("task_count"),
|
1657
|
+
)
|
1658
|
+
.select_from(SystemSchema.workflow_status)
|
1659
|
+
.where(SystemSchema.workflow_status.c.queue_name == queue.name)
|
1660
|
+
.where(
|
1661
|
+
SystemSchema.workflow_status.c.status
|
1662
|
+
== WorkflowStatusString.PENDING.value
|
1663
|
+
)
|
1664
|
+
.group_by(SystemSchema.workflow_status.c.executor_id)
|
1665
|
+
)
|
1666
|
+
pending_workflows = c.execute(pending_tasks_query).fetchall()
|
1667
|
+
pending_workflows_dict = {row[0]: row[1] for row in pending_workflows}
|
1668
|
+
local_pending_workflows = pending_workflows_dict.get(executor_id, 0)
|
1669
|
+
|
1670
|
+
if queue.worker_concurrency is not None:
|
1671
|
+
# Print a warning if the local concurrency limit is violated
|
1672
|
+
if local_pending_workflows > queue.worker_concurrency:
|
1673
|
+
dbos_logger.warning(
|
1674
|
+
f"The number of local pending workflows ({local_pending_workflows}) on queue {queue.name} exceeds the local concurrency limit ({queue.worker_concurrency})"
|
1675
|
+
)
|
1676
|
+
max_tasks = max(
|
1677
|
+
0, queue.worker_concurrency - local_pending_workflows
|
1803
1678
|
)
|
1804
|
-
max_tasks = max(0, queue.worker_concurrency - local_pending_workflows)
|
1805
1679
|
|
1806
|
-
|
1807
|
-
|
1808
|
-
|
1809
|
-
|
1810
|
-
|
1811
|
-
|
1680
|
+
if queue.concurrency is not None:
|
1681
|
+
global_pending_workflows = sum(pending_workflows_dict.values())
|
1682
|
+
# Print a warning if the global concurrency limit is violated
|
1683
|
+
if global_pending_workflows > queue.concurrency:
|
1684
|
+
dbos_logger.warning(
|
1685
|
+
f"The total number of pending workflows ({global_pending_workflows}) on queue {queue.name} exceeds the global concurrency limit ({queue.concurrency})"
|
1686
|
+
)
|
1687
|
+
available_tasks = max(
|
1688
|
+
0, queue.concurrency - global_pending_workflows
|
1812
1689
|
)
|
1813
|
-
|
1814
|
-
max_tasks = min(max_tasks, available_tasks)
|
1690
|
+
max_tasks = min(max_tasks, available_tasks)
|
1815
1691
|
|
1816
1692
|
# Retrieve the first max_tasks workflows in the queue.
|
1817
1693
|
# Only retrieve workflows of the local version (or without version set)
|
1818
1694
|
query = (
|
1819
1695
|
sa.select(
|
1820
|
-
SystemSchema.
|
1821
|
-
)
|
1822
|
-
.select_from(
|
1823
|
-
SystemSchema.workflow_queue.join(
|
1824
|
-
SystemSchema.workflow_status,
|
1825
|
-
SystemSchema.workflow_queue.c.workflow_uuid
|
1826
|
-
== SystemSchema.workflow_status.c.workflow_uuid,
|
1827
|
-
)
|
1696
|
+
SystemSchema.workflow_status.c.workflow_uuid,
|
1828
1697
|
)
|
1829
|
-
.
|
1698
|
+
.select_from(SystemSchema.workflow_status)
|
1699
|
+
.where(SystemSchema.workflow_status.c.queue_name == queue.name)
|
1830
1700
|
.where(
|
1831
1701
|
SystemSchema.workflow_status.c.status
|
1832
1702
|
== WorkflowStatusString.ENQUEUED.value
|
@@ -1838,12 +1708,15 @@ class SystemDatabase:
|
|
1838
1708
|
SystemSchema.workflow_status.c.application_version.is_(None),
|
1839
1709
|
)
|
1840
1710
|
)
|
1841
|
-
.order_by(
|
1842
|
-
SystemSchema.workflow_queue.c.priority.asc(),
|
1843
|
-
SystemSchema.workflow_queue.c.created_at_epoch_ms.asc(),
|
1844
|
-
)
|
1845
1711
|
.with_for_update(nowait=True) # Error out early
|
1846
1712
|
)
|
1713
|
+
if queue.priority_enabled:
|
1714
|
+
query = query.order_by(
|
1715
|
+
SystemSchema.workflow_status.c.priority.asc(),
|
1716
|
+
SystemSchema.workflow_status.c.created_at.asc(),
|
1717
|
+
)
|
1718
|
+
else:
|
1719
|
+
query = query.order_by(SystemSchema.workflow_status.c.created_at.asc())
|
1847
1720
|
# Apply limit only if max_tasks is finite
|
1848
1721
|
if max_tasks != float("inf"):
|
1849
1722
|
query = query.limit(int(max_tasks))
|
@@ -1873,6 +1746,7 @@ class SystemDatabase:
|
|
1873
1746
|
status=WorkflowStatusString.PENDING.value,
|
1874
1747
|
application_version=app_version,
|
1875
1748
|
executor_id=executor_id,
|
1749
|
+
started_at_epoch_ms=start_time_ms,
|
1876
1750
|
# If a timeout is set, set the deadline on dequeue
|
1877
1751
|
workflow_deadline_epoch_ms=sa.case(
|
1878
1752
|
(
|
@@ -1892,82 +1766,31 @@ class SystemDatabase:
|
|
1892
1766
|
)
|
1893
1767
|
)
|
1894
1768
|
# Then give it a start time
|
1895
|
-
c.execute(
|
1896
|
-
SystemSchema.workflow_queue.update()
|
1897
|
-
.where(SystemSchema.workflow_queue.c.workflow_uuid == id)
|
1898
|
-
.values(started_at_epoch_ms=start_time_ms)
|
1899
|
-
)
|
1900
1769
|
ret_ids.append(id)
|
1901
1770
|
|
1902
|
-
# If we have a limiter, garbage-collect all completed workflows started
|
1903
|
-
# before the period. If there's no limiter, there's no need--they were
|
1904
|
-
# deleted on completion.
|
1905
|
-
if queue.limiter is not None:
|
1906
|
-
c.execute(
|
1907
|
-
sa.delete(SystemSchema.workflow_queue)
|
1908
|
-
.where(SystemSchema.workflow_queue.c.completed_at_epoch_ms != None)
|
1909
|
-
.where(SystemSchema.workflow_queue.c.queue_name == queue.name)
|
1910
|
-
.where(
|
1911
|
-
SystemSchema.workflow_queue.c.started_at_epoch_ms
|
1912
|
-
< start_time_ms - limiter_period_ms
|
1913
|
-
)
|
1914
|
-
)
|
1915
|
-
|
1916
1771
|
# Return the IDs of all functions we started
|
1917
1772
|
return ret_ids
|
1918
1773
|
|
1919
|
-
@db_retry()
|
1920
|
-
def remove_from_queue(self, workflow_id: str, queue: "Queue") -> None:
|
1921
|
-
if self._debug_mode:
|
1922
|
-
raise Exception("called remove_from_queue in debug mode")
|
1923
|
-
|
1924
|
-
with self.engine.begin() as c:
|
1925
|
-
if queue.limiter is None:
|
1926
|
-
c.execute(
|
1927
|
-
sa.delete(SystemSchema.workflow_queue).where(
|
1928
|
-
SystemSchema.workflow_queue.c.workflow_uuid == workflow_id
|
1929
|
-
)
|
1930
|
-
)
|
1931
|
-
else:
|
1932
|
-
c.execute(
|
1933
|
-
sa.update(SystemSchema.workflow_queue)
|
1934
|
-
.where(SystemSchema.workflow_queue.c.workflow_uuid == workflow_id)
|
1935
|
-
.values(completed_at_epoch_ms=int(time.time() * 1000))
|
1936
|
-
)
|
1937
|
-
|
1938
1774
|
def clear_queue_assignment(self, workflow_id: str) -> bool:
|
1939
1775
|
if self._debug_mode:
|
1940
1776
|
raise Exception("called clear_queue_assignment in debug mode")
|
1941
1777
|
|
1942
|
-
with self.engine.
|
1943
|
-
|
1944
|
-
|
1945
|
-
|
1946
|
-
|
1947
|
-
|
1948
|
-
|
1949
|
-
|
1950
|
-
|
1951
|
-
.values(started_at_epoch_ms=None)
|
1778
|
+
with self.engine.begin() as c:
|
1779
|
+
# Reset the status of the task to "ENQUEUED"
|
1780
|
+
res = c.execute(
|
1781
|
+
sa.update(SystemSchema.workflow_status)
|
1782
|
+
.where(SystemSchema.workflow_status.c.workflow_uuid == workflow_id)
|
1783
|
+
.where(SystemSchema.workflow_status.c.queue_name.isnot(None))
|
1784
|
+
.where(
|
1785
|
+
SystemSchema.workflow_status.c.status
|
1786
|
+
== WorkflowStatusString.PENDING.value
|
1952
1787
|
)
|
1953
|
-
|
1954
|
-
|
1955
|
-
if res.rowcount == 0:
|
1956
|
-
transaction.rollback()
|
1957
|
-
return False
|
1958
|
-
|
1959
|
-
# Reset the status of the task to "ENQUEUED"
|
1960
|
-
res = conn.execute(
|
1961
|
-
sa.update(SystemSchema.workflow_status)
|
1962
|
-
.where(SystemSchema.workflow_status.c.workflow_uuid == workflow_id)
|
1963
|
-
.values(status=WorkflowStatusString.ENQUEUED.value)
|
1788
|
+
.values(
|
1789
|
+
status=WorkflowStatusString.ENQUEUED.value, started_at_epoch_ms=None
|
1964
1790
|
)
|
1965
|
-
|
1966
|
-
|
1967
|
-
|
1968
|
-
f"UNREACHABLE: Workflow {workflow_id} is found in the workflow_queue table but not found in the workflow_status table"
|
1969
|
-
)
|
1970
|
-
return True
|
1791
|
+
)
|
1792
|
+
# If no rows were affected, the workflow is not anymore in the queue or was already completed
|
1793
|
+
return res.rowcount > 0
|
1971
1794
|
|
1972
1795
|
T = TypeVar("T")
|
1973
1796
|
|
@@ -2009,10 +1832,8 @@ class SystemDatabase:
|
|
2009
1832
|
def init_workflow(
|
2010
1833
|
self,
|
2011
1834
|
status: WorkflowStatusInternal,
|
2012
|
-
inputs: str,
|
2013
1835
|
*,
|
2014
1836
|
max_recovery_attempts: Optional[int],
|
2015
|
-
enqueue_options: Optional[EnqueueOptionsInternal],
|
2016
1837
|
) -> tuple[WorkflowStatuses, Optional[int]]:
|
2017
1838
|
"""
|
2018
1839
|
Synchronously record the status and inputs for workflows in a single transaction
|
@@ -2021,19 +1842,6 @@ class SystemDatabase:
|
|
2021
1842
|
wf_status, workflow_deadline_epoch_ms = self._insert_workflow_status(
|
2022
1843
|
status, conn, max_recovery_attempts=max_recovery_attempts
|
2023
1844
|
)
|
2024
|
-
# TODO: Modify the inputs if they were changed by `update_workflow_inputs`
|
2025
|
-
self._update_workflow_inputs(status["workflow_uuid"], inputs, conn)
|
2026
|
-
|
2027
|
-
if (
|
2028
|
-
status["queue_name"] is not None
|
2029
|
-
and wf_status == WorkflowStatusString.ENQUEUED.value
|
2030
|
-
):
|
2031
|
-
self._enqueue(
|
2032
|
-
status["workflow_uuid"],
|
2033
|
-
status["queue_name"],
|
2034
|
-
conn,
|
2035
|
-
enqueue_options=enqueue_options,
|
2036
|
-
)
|
2037
1845
|
return wf_status, workflow_deadline_epoch_ms
|
2038
1846
|
|
2039
1847
|
def check_connection(self) -> None:
|