PyPI - dbos - Versions diffs - 0.22.0a10__py3-none-any.whl → 0.23.0__py3-none-any.whl - Mend

dbos 0.22.0a10py3-none-any.whl → 0.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dbos might be problematic. Click here for more details.

Files changed (29) hide show

dbos/__main__.py +26 -0
dbos/_app_db.py +29 -24
dbos/_cloudutils/cloudutils.py +4 -2
dbos/_cloudutils/databases.py +4 -0
dbos/_conductor/conductor.py +213 -0
dbos/_conductor/protocol.py +197 -0
dbos/_context.py +3 -1
dbos/_core.py +73 -26
dbos/_croniter.py +2 -2
dbos/_dbos.py +74 -16
dbos/_dbos_config.py +45 -11
dbos/_debug.py +45 -0
dbos/_error.py +11 -0
dbos/_logger.py +5 -6
dbos/_migrations/versions/5c361fc04708_added_system_tables.py +1 -1
dbos/_queue.py +5 -1
dbos/_recovery.py +23 -24
dbos/_schemas/system_database.py +1 -1
dbos/_sys_db.py +212 -187
dbos/_templates/dbos-db-starter/migrations/versions/2024_07_31_180642_init.py +1 -1
dbos/_tracer.py +4 -4
dbos/_utils.py +6 -0
dbos/_workflow_commands.py +76 -111
dbos/cli/cli.py +63 -21
{dbos-0.22.0a10.dist-info → dbos-0.23.0.dist-info}/METADATA +7 -3
{dbos-0.22.0a10.dist-info → dbos-0.23.0.dist-info}/RECORD +29 -24
{dbos-0.22.0a10.dist-info → dbos-0.23.0.dist-info}/WHEEL +0 -0
{dbos-0.22.0a10.dist-info → dbos-0.23.0.dist-info}/entry_points.txt +0 -0
{dbos-0.22.0a10.dist-info → dbos-0.23.0.dist-info}/licenses/LICENSE +0 -0

dbos/_sys_db.py CHANGED Viewed

@@ -14,9 +14,7 @@ from typing import (
     Optional,
     Sequence,
     Set,
-    Tuple,
     TypedDict,
-    cast,
 )
 import psycopg
@@ -27,6 +25,8 @@ from alembic.config import Config
 from sqlalchemy.exc import DBAPIError
 from sqlalchemy.sql import func
+from dbos._utils import GlobalParams
 from . import _serialization
 from ._dbos_config import ConfigFile
 from ._error import (
@@ -66,17 +66,19 @@ class WorkflowStatusInternal(TypedDict):
     name: str
     class_name: Optional[str]
     config_name: Optional[str]
+    authenticated_user: Optional[str]
+    assumed_role: Optional[str]
+    authenticated_roles: Optional[str]  # JSON list of roles
     output: Optional[str]  # JSON (jsonpickle)
+    request: Optional[str]  # JSON (jsonpickle)
     error: Optional[str]  # JSON (jsonpickle)
+    created_at: Optional[int]  # Unix epoch timestamp in ms
+    updated_at: Optional[int]  # Unix epoch timestamp in ms
+    queue_name: Optional[str]
     executor_id: Optional[str]
     app_version: Optional[str]
     app_id: Optional[str]
-    request: Optional[str]  # JSON (jsonpickle)
     recovery_attempts: Optional[int]
-    authenticated_user: Optional[str]
-    assumed_role: Optional[str]
-    authenticated_roles: Optional[str]  # JSON list of roles.
-    queue_name: Optional[str]
 class RecordedResult(TypedDict):
@@ -102,19 +104,12 @@ class GetWorkflowsInput:
     Structure for argument to `get_workflows` function.
     This specifies the search criteria for workflow retrieval by `get_workflows`.
-    Attributes:
-       name(str):  The name of the workflow function
-       authenticated_user(str):  The name of the user who invoked the function
-       start_time(str): Beginning of search range for time of invocation, in ISO 8601 format
-       end_time(str): End of search range for time of invocation, in ISO 8601 format
-       status(str): Current status of the workflow invocation (see `WorkflowStatusString`)
-       application_version(str): Application version that invoked the workflow
-       limit(int): Limit on number of returned records
     """
     def __init__(self) -> None:
+        self.workflow_ids: Optional[List[str]] = (
+            None  # Search only in these workflow IDs
+        )
         self.name: Optional[str] = None  # The name of the workflow function
         self.authenticated_user: Optional[str] = None  # The user who ran the workflow.
         self.start_time: Optional[str] = None  # Timestamp in ISO 8601 format
@@ -126,15 +121,23 @@ class GetWorkflowsInput:
         self.limit: Optional[int] = (
             None  # Return up to this many workflows IDs. IDs are ordered by workflow creation time.
         )
+        self.offset: Optional[int] = (
+            None  # Offset into the matching records for pagination
+        )
+        self.sort_desc: bool = (
+            False  # If true, sort by created_at in DESC order. Default false (in ASC order).
+        )
 class GetQueuedWorkflowsInput(TypedDict):
-    queue_name: Optional[str]
-    status: Optional[str]
+    queue_name: Optional[str]  # Get workflows belonging to this queue
+    status: Optional[str]  # Get workflows with this status
     start_time: Optional[str]  # Timestamp in ISO 8601 format
     end_time: Optional[str]  # Timestamp in ISO 8601 format
     limit: Optional[int]  # Return up to this many workflows IDs.
+    offset: Optional[int]  # Offset into the matching records for pagination
     name: Optional[str]  # The name of the workflow function
+    sort_desc: Optional[bool]  # Sort by created_at in DESC or ASC order
 class GetWorkflowsOutput:
@@ -148,25 +151,6 @@ class GetPendingWorkflowsOutput:
         self.queue_name: Optional[str] = queue_name
-class WorkflowInformation(TypedDict, total=False):
-    workflow_uuid: str
-    status: WorkflowStatuses  # The status of the workflow.
-    name: str  # The name of the workflow function.
-    workflow_class_name: str  # The class name holding the workflow function.
-    workflow_config_name: (
-        str  # The name of the configuration, if the class needs configuration
-    )
-    authenticated_user: str  # The user who ran the workflow. Empty string if not set.
-    assumed_role: str
-    # The role used to run this workflow.  Empty string if authorization is not required.
-    authenticated_roles: List[str]
-    # All roles the authenticated user has, if any.
-    input: Optional[_serialization.WorkflowInputs]
-    output: Optional[str]
-    error: Optional[str]
-    request: Optional[str]
 _dbos_null_topic = "__null__topic__"
 _buffer_flush_batch_size = 100
 _buffer_flush_interval_secs = 1.0
@@ -174,7 +158,7 @@ _buffer_flush_interval_secs = 1.0
 class SystemDatabase:
-    def __init__(self, config: ConfigFile):
+    def __init__(self, config: ConfigFile, *, debug_mode: bool = False):
         self.config = config
         sysdb_name = (
@@ -183,28 +167,27 @@ class SystemDatabase:
             else config["database"]["app_db_name"] + SystemSchema.sysdb_suffix
         )
-        # If the system database does not already exist, create it
-        postgres_db_url = sa.URL.create(
-            "postgresql+psycopg",
-            username=config["database"]["username"],
-            password=config["database"]["password"],
-            host=config["database"]["hostname"],
-            port=config["database"]["port"],
-            database="postgres",
-            # fills the "application_name" column in pg_stat_activity
-            query={
-                "application_name": f"dbos_transact_{os.environ.get('DBOS__VMID', 'local')}"
-            },
-        )
-        engine = sa.create_engine(postgres_db_url)
-        with engine.connect() as conn:
-            conn.execution_options(isolation_level="AUTOCOMMIT")
-            if not conn.execute(
-                sa.text("SELECT 1 FROM pg_database WHERE datname=:db_name"),
-                parameters={"db_name": sysdb_name},
-            ).scalar():
-                conn.execute(sa.text(f"CREATE DATABASE {sysdb_name}"))
-        engine.dispose()
+        if not debug_mode:
+            # If the system database does not already exist, create it
+            postgres_db_url = sa.URL.create(
+                "postgresql+psycopg",
+                username=config["database"]["username"],
+                password=config["database"]["password"],
+                host=config["database"]["hostname"],
+                port=config["database"]["port"],
+                database="postgres",
+                # fills the "application_name" column in pg_stat_activity
+                query={"application_name": f"dbos_transact_{GlobalParams.executor_id}"},
+            )
+            engine = sa.create_engine(postgres_db_url)
+            with engine.connect() as conn:
+                conn.execution_options(isolation_level="AUTOCOMMIT")
+                if not conn.execute(
+                    sa.text("SELECT 1 FROM pg_database WHERE datname=:db_name"),
+                    parameters={"db_name": sysdb_name},
+                ).scalar():
+                    conn.execute(sa.text(f"CREATE DATABASE {sysdb_name}"))
+            engine.dispose()
         system_db_url = sa.URL.create(
             "postgresql+psycopg",
@@ -214,9 +197,7 @@ class SystemDatabase:
             port=config["database"]["port"],
             database=sysdb_name,
             # fills the "application_name" column in pg_stat_activity
-            query={
-                "application_name": f"dbos_transact_{os.environ.get('DBOS__VMID', 'local')}"
-            },
+            query={"application_name": f"dbos_transact_{GlobalParams.executor_id}"},
         )
         # Create a connection pool for the system database
@@ -225,25 +206,41 @@ class SystemDatabase:
         )
         # Run a schema migration for the system database
-        migration_dir = os.path.join(
-            os.path.dirname(os.path.realpath(__file__)), "_migrations"
-        )
-        alembic_cfg = Config()
-        alembic_cfg.set_main_option("script_location", migration_dir)
-        logging.getLogger("alembic").setLevel(logging.WARNING)
-        # Alembic requires the % in URL-escaped parameters to itself be escaped to %%.
-        escaped_conn_string = re.sub(
-            r"%(?=[0-9A-Fa-f]{2})",
-            "%%",
-            self.engine.url.render_as_string(hide_password=False),
-        )
-        alembic_cfg.set_main_option("sqlalchemy.url", escaped_conn_string)
-        try:
-            command.upgrade(alembic_cfg, "head")
-        except Exception as e:
-            dbos_logger.warning(
-                f"Exception during system database construction. This is most likely because the system database was configured using a later version of DBOS: {e}"
+        if not debug_mode:
+            migration_dir = os.path.join(
+                os.path.dirname(os.path.realpath(__file__)), "_migrations"
+            )
+            alembic_cfg = Config()
+            alembic_cfg.set_main_option("script_location", migration_dir)
+            logging.getLogger("alembic").setLevel(logging.WARNING)
+            # Alembic requires the % in URL-escaped parameters to itself be escaped to %%.
+            escaped_conn_string = re.sub(
+                r"%(?=[0-9A-Fa-f]{2})",
+                "%%",
+                self.engine.url.render_as_string(hide_password=False),
             )
+            alembic_cfg.set_main_option("sqlalchemy.url", escaped_conn_string)
+            try:
+                command.upgrade(alembic_cfg, "head")
+            except Exception as e:
+                dbos_logger.warning(
+                    f"Exception during system database construction. This is most likely because the system database was configured using a later version of DBOS: {e}"
+                )
+                alembic_cfg = Config()
+                alembic_cfg.set_main_option("script_location", migration_dir)
+                # Alembic requires the % in URL-escaped parameters to itself be escaped to %%.
+                escaped_conn_string = re.sub(
+                    r"%(?=[0-9A-Fa-f]{2})",
+                    "%%",
+                    self.engine.url.render_as_string(hide_password=False),
+                )
+                alembic_cfg.set_main_option("sqlalchemy.url", escaped_conn_string)
+                try:
+                    command.upgrade(alembic_cfg, "head")
+                except Exception as e:
+                    dbos_logger.warning(
+                        f"Exception during system database construction. This is most likely because the system database was configured using a later version of DBOS: {e}"
+                    )
         self.notification_conn: Optional[psycopg.connection.Connection] = None
         self.notifications_map: Dict[str, threading.Condition] = {}
@@ -259,6 +256,7 @@ class SystemDatabase:
         # Now we can run background processes
         self._run_background_processes = True
+        self._debug_mode = debug_mode
     # Destroy the pool when finished
     def destroy(self) -> None:
@@ -280,6 +278,8 @@ class SystemDatabase:
         *,
         max_recovery_attempts: int = DEFAULT_MAX_RECOVERY_ATTEMPTS,
     ) -> WorkflowStatuses:
+        if self._debug_mode:
+            raise Exception("called insert_workflow_status in debug mode")
         wf_status: WorkflowStatuses = status["status"]
         cmd = (
@@ -307,6 +307,7 @@ class SystemDatabase:
             .on_conflict_do_update(
                 index_elements=["workflow_uuid"],
                 set_=dict(
+                    executor_id=status["executor_id"],
                     recovery_attempts=(
                         SystemSchema.workflow_status.c.recovery_attempts + 1
                     ),
@@ -378,6 +379,8 @@ class SystemDatabase:
         *,
         conn: Optional[sa.Connection] = None,
     ) -> None:
+        if self._debug_mode:
+            raise Exception("called update_workflow_status in debug mode")
         wf_status: WorkflowStatuses = status["status"]
         cmd = (
@@ -427,6 +430,8 @@ class SystemDatabase:
         self,
         workflow_id: str,
     ) -> None:
+        if self._debug_mode:
+            raise Exception("called cancel_workflow in debug mode")
         with self.engine.begin() as c:
             # Remove the workflow from the queues table so it does not block the table
             c.execute(
@@ -447,6 +452,8 @@ class SystemDatabase:
         self,
         workflow_id: str,
     ) -> None:
+        if self._debug_mode:
+            raise Exception("called resume_workflow in debug mode")
         with self.engine.begin() as c:
             # Check the status of the workflow. If it is complete, do nothing.
             row = c.execute(
@@ -490,27 +497,33 @@ class SystemDatabase:
                     SystemSchema.workflow_status.c.assumed_role,
                     SystemSchema.workflow_status.c.queue_name,
                     SystemSchema.workflow_status.c.executor_id,
+                    SystemSchema.workflow_status.c.created_at,
+                    SystemSchema.workflow_status.c.updated_at,
+                    SystemSchema.workflow_status.c.application_version,
+                    SystemSchema.workflow_status.c.application_id,
                 ).where(SystemSchema.workflow_status.c.workflow_uuid == workflow_uuid)
             ).fetchone()
             if row is None:
                 return None
             status: WorkflowStatusInternal = {
                 "workflow_uuid": workflow_uuid,
-                "status": row[0],
-                "name": row[1],
-                "class_name": row[5],
-                "config_name": row[4],
                 "output": None,
                 "error": None,
-                "app_id": None,
-                "app_version": None,
-                "executor_id": row[10],
+                "status": row[0],
+                "name": row[1],
                 "request": row[2],
                 "recovery_attempts": row[3],
+                "config_name": row[4],
+                "class_name": row[5],
                 "authenticated_user": row[6],
                 "authenticated_roles": row[7],
                 "assumed_role": row[8],
                 "queue_name": row[9],
+                "executor_id": row[10],
+                "created_at": row[11],
+                "updated_at": row[12],
+                "app_version": row[13],
+                "app_id": row[14],
             }
             return status
@@ -539,47 +552,6 @@ class SystemDatabase:
         )
         return stat
-    def get_workflow_status_w_outputs(
-        self, workflow_uuid: str
-    ) -> Optional[WorkflowStatusInternal]:
-        with self.engine.begin() as c:
-            row = c.execute(
-                sa.select(
-                    SystemSchema.workflow_status.c.status,
-                    SystemSchema.workflow_status.c.name,
-                    SystemSchema.workflow_status.c.request,
-                    SystemSchema.workflow_status.c.output,
-                    SystemSchema.workflow_status.c.error,
-                    SystemSchema.workflow_status.c.config_name,
-                    SystemSchema.workflow_status.c.class_name,
-                    SystemSchema.workflow_status.c.authenticated_user,
-                    SystemSchema.workflow_status.c.authenticated_roles,
-                    SystemSchema.workflow_status.c.assumed_role,
-                    SystemSchema.workflow_status.c.queue_name,
-                ).where(SystemSchema.workflow_status.c.workflow_uuid == workflow_uuid)
-            ).fetchone()
-            if row is None:
-                return None
-            status: WorkflowStatusInternal = {
-                "workflow_uuid": workflow_uuid,
-                "status": row[0],
-                "name": row[1],
-                "config_name": row[5],
-                "class_name": row[6],
-                "output": row[3],
-                "error": row[4],
-                "app_id": None,
-                "app_version": None,
-                "executor_id": None,
-                "request": row[2],
-                "recovery_attempts": None,
-                "authenticated_user": row[7],
-                "authenticated_roles": row[8],
-                "assumed_role": row[9],
-                "queue_name": row[10],
-            }
-            return status
     def await_workflow_result_internal(self, workflow_uuid: str) -> dict[str, Any]:
         polling_interval_secs: float = 1.000
@@ -626,24 +598,12 @@ class SystemDatabase:
             raise _serialization.deserialize_exception(stat["error"])
         return None
-    def get_workflow_info(
-        self, workflow_uuid: str, get_request: bool
-    ) -> Optional[WorkflowInformation]:
-        stat = self.get_workflow_status_w_outputs(workflow_uuid)
-        if stat is None:
-            return None
-        info = cast(WorkflowInformation, stat)
-        input = self.get_workflow_inputs(workflow_uuid)
-        if input is not None:
-            info["input"] = input
-        if not get_request:
-            info.pop("request", None)
-        return info
     def update_workflow_inputs(
         self, workflow_uuid: str, inputs: str, conn: Optional[sa.Connection] = None
     ) -> None:
+        if self._debug_mode:
+            raise Exception("called update_workflow_inputs in debug mode")
         cmd = (
             pg.insert(SystemSchema.workflow_inputs)
             .values(
@@ -689,9 +649,11 @@ class SystemDatabase:
             return inputs
     def get_workflows(self, input: GetWorkflowsInput) -> GetWorkflowsOutput:
-        query = sa.select(SystemSchema.workflow_status.c.workflow_uuid).order_by(
-            SystemSchema.workflow_status.c.created_at.asc()
-        )
+        query = sa.select(SystemSchema.workflow_status.c.workflow_uuid)
+        if input.sort_desc:
+            query = query.order_by(SystemSchema.workflow_status.c.created_at.desc())
+        else:
+            query = query.order_by(SystemSchema.workflow_status.c.created_at.asc())
         if input.name:
             query = query.where(SystemSchema.workflow_status.c.name == input.name)
         if input.authenticated_user:
@@ -716,28 +678,34 @@ class SystemDatabase:
                 SystemSchema.workflow_status.c.application_version
                 == input.application_version
             )
+        if input.workflow_ids:
+            query = query.where(
+                SystemSchema.workflow_status.c.workflow_uuid.in_(input.workflow_ids)
+            )
         if input.limit:
             query = query.limit(input.limit)
+        if input.offset:
+            query = query.offset(input.offset)
         with self.engine.begin() as c:
             rows = c.execute(query)
-        workflow_uuids = [row[0] for row in rows]
+        workflow_ids = [row[0] for row in rows]
-        return GetWorkflowsOutput(workflow_uuids)
+        return GetWorkflowsOutput(workflow_ids)
     def get_queued_workflows(
         self, input: GetQueuedWorkflowsInput
     ) -> GetWorkflowsOutput:
-        query = (
-            sa.select(SystemSchema.workflow_queue.c.workflow_uuid)
-            .join(
-                SystemSchema.workflow_status,
-                SystemSchema.workflow_queue.c.workflow_uuid
-                == SystemSchema.workflow_status.c.workflow_uuid,
-            )
-            .order_by(SystemSchema.workflow_status.c.created_at.asc())
+        query = sa.select(SystemSchema.workflow_queue.c.workflow_uuid).join(
+            SystemSchema.workflow_status,
+            SystemSchema.workflow_queue.c.workflow_uuid
+            == SystemSchema.workflow_status.c.workflow_uuid,
         )
+        if input["sort_desc"]:
+            query = query.order_by(SystemSchema.workflow_status.c.created_at.desc())
+        else:
+            query = query.order_by(SystemSchema.workflow_status.c.created_at.asc())
         if input.get("name"):
             query = query.where(SystemSchema.workflow_status.c.name == input["name"])
@@ -764,6 +732,8 @@ class SystemDatabase:
             )
         if input.get("limit"):
             query = query.limit(input["limit"])
+        if input.get("offset"):
+            query = query.offset(input["offset"])
         with self.engine.begin() as c:
             rows = c.execute(query)
@@ -798,6 +768,8 @@ class SystemDatabase:
     def record_operation_result(
         self, result: OperationResultInternal, conn: Optional[sa.Connection] = None
     ) -> None:
+        if self._debug_mode:
+            raise Exception("called record_operation_result in debug mode")
         error = result["error"]
         output = result["output"]
         assert error is None or output is None, "Only one of error or output can be set"
@@ -857,6 +829,11 @@ class SystemDatabase:
             recorded_output = self.check_operation_execution(
                 workflow_uuid, function_id, conn=c
             )
+            if self._debug_mode and recorded_output is None:
+                raise Exception(
+                    "called send in debug mode without a previous execution"
+                )
             if recorded_output is not None:
                 dbos_logger.debug(
                     f"Replaying send, id: {function_id}, destination_uuid: {destination_uuid}, topic: {topic}"
@@ -900,6 +877,8 @@ class SystemDatabase:
         # First, check for previous executions.
         recorded_output = self.check_operation_execution(workflow_uuid, function_id)
+        if self._debug_mode and recorded_output is None:
+            raise Exception("called recv in debug mode without a previous execution")
         if recorded_output is not None:
             dbos_logger.debug(f"Replaying recv, id: {function_id}, topic: {topic}")
             if recorded_output["output"] is not None:
@@ -1049,6 +1028,9 @@ class SystemDatabase:
     ) -> float:
         recorded_output = self.check_operation_execution(workflow_uuid, function_id)
         end_time: float
+        if self._debug_mode and recorded_output is None:
+            raise Exception("called sleep in debug mode without a previous execution")
         if recorded_output is not None:
             dbos_logger.debug(f"Replaying sleep, id: {function_id}, seconds: {seconds}")
             assert recorded_output["output"] is not None, "no recorded end time"
@@ -1083,6 +1065,10 @@ class SystemDatabase:
             recorded_output = self.check_operation_execution(
                 workflow_uuid, function_id, conn=c
             )
+            if self._debug_mode and recorded_output is None:
+                raise Exception(
+                    "called set_event in debug mode without a previous execution"
+                )
             if recorded_output is not None:
                 dbos_logger.debug(f"Replaying set_event, id: {function_id}, key: {key}")
                 return  # Already sent before
@@ -1127,6 +1113,10 @@ class SystemDatabase:
             recorded_output = self.check_operation_execution(
                 caller_ctx["workflow_uuid"], caller_ctx["function_id"]
             )
+            if self._debug_mode and recorded_output is None:
+                raise Exception(
+                    "called get_event in debug mode without a previous execution"
+                )
             if recorded_output is not None:
                 dbos_logger.debug(
                     f"Replaying get_event, id: {caller_ctx['function_id']}, key: {key}"
@@ -1189,6 +1179,9 @@ class SystemDatabase:
         return value
     def _flush_workflow_status_buffer(self) -> None:
+        if self._debug_mode:
+            raise Exception("called _flush_workflow_status_buffer in debug mode")
         """Export the workflow status buffer to the database, up to the batch size."""
         if len(self._workflow_status_buffer) == 0:
             return
@@ -1219,6 +1212,9 @@ class SystemDatabase:
                     break
     def _flush_workflow_inputs_buffer(self) -> None:
+        if self._debug_mode:
+            raise Exception("called _flush_workflow_inputs_buffer in debug mode")
         """Export the workflow inputs buffer to the database, up to the batch size."""
         if len(self._workflow_inputs_buffer) == 0:
             return
@@ -1283,6 +1279,8 @@ class SystemDatabase:
         )
     def enqueue(self, workflow_id: str, queue_name: str) -> None:
+        if self._debug_mode:
+            raise Exception("called enqueue in debug mode")
         with self.engine.begin() as c:
             c.execute(
                 pg.insert(SystemSchema.workflow_queue)
@@ -1294,6 +1292,9 @@ class SystemDatabase:
             )
     def start_queued_workflows(self, queue: "Queue", executor_id: str) -> List[str]:
+        if self._debug_mode:
+            return []
         start_time_ms = int(time.time() * 1000)
         if queue.limiter is not None:
             limiter_period_ms = int(queue.limiter["period"] * 1000)
@@ -1323,24 +1324,32 @@ class SystemDatabase:
             # If there is a global or local concurrency limit N, select only the N oldest enqueued
             # functions, else select all of them.
-            # First lets figure out how many tasks the worker can dequeue
+            # First lets figure out how many tasks are eligible for dequeue.
+            # This means figuring out how many unstarted tasks are within the local and global concurrency limits
             running_tasks_query = (
                 sa.select(
-                    SystemSchema.workflow_queue.c.executor_id,
+                    SystemSchema.workflow_status.c.executor_id,
                     sa.func.count().label("task_count"),
                 )
+                .select_from(
+                    SystemSchema.workflow_queue.join(
+                        SystemSchema.workflow_status,
+                        SystemSchema.workflow_queue.c.workflow_uuid
+                        == SystemSchema.workflow_status.c.workflow_uuid,
+                    )
+                )
                 .where(SystemSchema.workflow_queue.c.queue_name == queue.name)
                 .where(
-                    SystemSchema.workflow_queue.c.executor_id.isnot(
+                    SystemSchema.workflow_queue.c.started_at_epoch_ms.isnot(
                         None
-                    )  # Task is dequeued
+                    )  # Task is started
                 )
                 .where(
                     SystemSchema.workflow_queue.c.completed_at_epoch_ms.is_(
                         None
-                    )  # Task is not completed
+                    )  # Task is not completed.
                 )
-                .group_by(SystemSchema.workflow_queue.c.executor_id)
+                .group_by(SystemSchema.workflow_status.c.executor_id)
             )
             running_tasks_result = c.execute(running_tasks_query).fetchall()
             running_tasks_result_dict = {row[0]: row[1] for row in running_tasks_result}
@@ -1350,12 +1359,6 @@ class SystemDatabase:
             max_tasks = float("inf")
             if queue.worker_concurrency is not None:
-                # Worker local concurrency limit should always be >= running_tasks_for_this_worker
-                # This should never happen but a check + warning doesn't hurt
-                if running_tasks_for_this_worker > queue.worker_concurrency:
-                    dbos_logger.warning(
-                        f"Number of tasks on this worker ({running_tasks_for_this_worker}) exceeds the worker concurrency limit ({queue.worker_concurrency})"
-                    )
                 max_tasks = max(
                     0, queue.worker_concurrency - running_tasks_for_this_worker
                 )
@@ -1370,16 +1373,14 @@ class SystemDatabase:
                 available_tasks = max(0, queue.concurrency - total_running_tasks)
                 max_tasks = min(max_tasks, available_tasks)
-            # Lookup tasks
+            # Lookup unstarted/uncompleted tasks (not running)
             query = (
                 sa.select(
                     SystemSchema.workflow_queue.c.workflow_uuid,
-                    SystemSchema.workflow_queue.c.started_at_epoch_ms,
-                    SystemSchema.workflow_queue.c.executor_id,
                 )
                 .where(SystemSchema.workflow_queue.c.queue_name == queue.name)
+                .where(SystemSchema.workflow_queue.c.started_at_epoch_ms == None)
                 .where(SystemSchema.workflow_queue.c.completed_at_epoch_ms == None)
-                .where(SystemSchema.workflow_queue.c.executor_id == None)
                 .order_by(SystemSchema.workflow_queue.c.created_at_epoch_ms.asc())
                 .with_for_update(nowait=True)  # Error out early
             )
@@ -1422,7 +1423,7 @@ class SystemDatabase:
                 c.execute(
                     SystemSchema.workflow_queue.update()
                     .where(SystemSchema.workflow_queue.c.workflow_uuid == id)
-                    .values(started_at_epoch_ms=start_time_ms, executor_id=executor_id)
+                    .values(started_at_epoch_ms=start_time_ms)
                 )
                 ret_ids.append(id)
@@ -1444,6 +1445,9 @@ class SystemDatabase:
             return ret_ids
     def remove_from_queue(self, workflow_id: str, queue: "Queue") -> None:
+        if self._debug_mode:
+            raise Exception("called remove_from_queue in debug mode")
         with self.engine.begin() as c:
             if queue.limiter is None:
                 c.execute(
@@ -1458,18 +1462,39 @@ class SystemDatabase:
                     .values(completed_at_epoch_ms=int(time.time() * 1000))
                 )
-    def clear_queue_assignment(self, workflow_id: str) -> None:
-        with self.engine.begin() as c:
-            c.execute(
-                sa.update(SystemSchema.workflow_queue)
-                .where(SystemSchema.workflow_queue.c.workflow_uuid == workflow_id)
-                .values(executor_id=None, started_at_epoch_ms=None)
-            )
-            c.execute(
-                sa.update(SystemSchema.workflow_status)
-                .where(SystemSchema.workflow_status.c.workflow_uuid == workflow_id)
-                .values(executor_id=None, status=WorkflowStatusString.ENQUEUED.value)
-            )
+    def clear_queue_assignment(self, workflow_id: str) -> bool:
+        if self._debug_mode:
+            raise Exception("called clear_queue_assignment in debug mode")
+        with self.engine.connect() as conn:
+            with conn.begin() as transaction:
+                # Reset the start time in the queue to mark it as not started
+                res = conn.execute(
+                    sa.update(SystemSchema.workflow_queue)
+                    .where(SystemSchema.workflow_queue.c.workflow_uuid == workflow_id)
+                    .where(
+                        SystemSchema.workflow_queue.c.completed_at_epoch_ms.is_(None)
+                    )
+                    .values(started_at_epoch_ms=None)
+                )
+                # If no rows were affected, the workflow is not anymore in the queue or was already completed
+                if res.rowcount == 0:
+                    transaction.rollback()
+                    return False
+                # Reset the status of the task to "ENQUEUED"
+                res = conn.execute(
+                    sa.update(SystemSchema.workflow_status)
+                    .where(SystemSchema.workflow_status.c.workflow_uuid == workflow_id)
+                    .values(status=WorkflowStatusString.ENQUEUED.value)
+                )
+                if res.rowcount == 0:
+                    # This should never happen
+                    raise Exception(
+                        f"UNREACHABLE: Workflow {workflow_id} is found in the workflow_queue table but not found in the workflow_status table"
+                    )
+                return True
 def reset_system_database(config: ConfigFile) -> None:

dbos 0.22.0a10__py3-none-any.whl → 0.23.0__py3-none-any.whl

Potentially problematic release.

dbos 0.22.0a10py3-none-any.whl → 0.23.0py3-none-any.whl