PyPI - buildgrid - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

buildgrid 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

buildgrid/server/app/cli.py +4 -4
buildgrid/server/app/settings/parser.py +46 -17
buildgrid/server/app/settings/schema.yml +16 -2
buildgrid/server/bots/service.py +4 -10
buildgrid/server/cas/storage/index/index_abc.py +0 -7
buildgrid/server/cas/storage/index/sql.py +91 -215
buildgrid/server/cas/storage/redis_fmb_cache.py +220 -0
buildgrid/server/enums.py +5 -0
buildgrid/server/metrics_names.py +0 -2
buildgrid/server/scheduler/impl.py +298 -70
buildgrid/server/sql/alembic/versions/3737630fc9cf_remove_deleted_column_from_sql_cas_index.py +43 -0
buildgrid/server/sql/models.py +0 -2
buildgrid/server/sql/utils.py +3 -3
buildgrid/server/utils/bots.py +1 -1
buildgrid/server/version.py +1 -1
{buildgrid-0.3.5.dist-info → buildgrid-0.4.0.dist-info}/METADATA +2 -2
{buildgrid-0.3.5.dist-info → buildgrid-0.4.0.dist-info}/RECORD +21 -19
{buildgrid-0.3.5.dist-info → buildgrid-0.4.0.dist-info}/WHEEL +1 -1
{buildgrid-0.3.5.dist-info → buildgrid-0.4.0.dist-info}/entry_points.txt +0 -0
{buildgrid-0.3.5.dist-info → buildgrid-0.4.0.dist-info}/licenses/LICENSE +0 -0
{buildgrid-0.3.5.dist-info → buildgrid-0.4.0.dist-info}/top_level.txt +0 -0

buildgrid/server/scheduler/impl.py CHANGED Viewed

@@ -22,7 +22,19 @@ from contextlib import ExitStack
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 from time import time
-from typing import Any, Callable, Generator, Iterable, NamedTuple, Required, Sequence, Tuple, TypedDict, TypeVar, cast
+from typing import (
+    Any,
+    Callable,
+    Generator,
+    Iterable,
+    NamedTuple,
+    Required,
+    Sequence,
+    Tuple,
+    TypedDict,
+    TypeVar,
+    cast,
+)
 from buildgrid_metering.client import SyncMeteringServiceClient
 from buildgrid_metering.models.dataclasses import ComputingUsage, Identity, Usage
@@ -30,7 +42,18 @@ from google.protobuf.any_pb2 import Any as ProtoAny
 from google.protobuf.internal.containers import RepeatedCompositeFieldContainer
 from google.protobuf.timestamp_pb2 import Timestamp
 from grpc import Channel
-from sqlalchemy import ColumnExpressionArgument, CursorResult, and_, delete, func, insert, or_, select, text, update
+from sqlalchemy import (
+    ColumnExpressionArgument,
+    CursorResult,
+    and_,
+    delete,
+    func,
+    insert,
+    or_,
+    select,
+    text,
+    update,
+)
 from sqlalchemy.dialects import postgresql
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session, joinedload
@@ -51,7 +74,9 @@ from buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2 impo
 from buildgrid._protos.build.buildbox.execution_stats_pb2 import ExecutionStatistics
 from buildgrid._protos.build.buildgrid.identity_pb2 import ClientIdentity
 from buildgrid._protos.build.buildgrid.introspection_pb2 import JobEvent
-from buildgrid._protos.build.buildgrid.quota_pb2 import InstanceQuota as InstanceQuotaProto
+from buildgrid._protos.build.buildgrid.quota_pb2 import (
+    InstanceQuota as InstanceQuotaProto,
+)
 from buildgrid._protos.build.buildgrid.scheduling_pb2 import SchedulingMetadata
 from buildgrid._protos.google.devtools.remoteworkers.v1test2.bots_pb2 import Lease
 from buildgrid._protos.google.longrunning import operations_pb2
@@ -62,7 +87,11 @@ from buildgrid.server.actioncache.caches.action_cache_abc import ActionCacheABC
 from buildgrid.server.cas.storage.storage_abc import StorageABC
 from buildgrid.server.client.asset import AssetClient
 from buildgrid.server.client.logstream import logstream_client
-from buildgrid.server.context import current_instance, instance_context, try_current_instance
+from buildgrid.server.context import (
+    current_instance,
+    instance_context,
+    try_current_instance,
+)
 from buildgrid.server.decorators import timed
 from buildgrid.server.enums import (
     BotStatus,
@@ -85,11 +114,22 @@ from buildgrid.server.exceptions import (
 )
 from buildgrid.server.logging import Tags, buildgrid_logger
 from buildgrid.server.metrics_names import METRIC
-from buildgrid.server.metrics_utils import publish_counter_metric, publish_timer_metric, timer
-from buildgrid.server.operations.filtering import DEFAULT_SORT_KEYS, OperationFilter, SortKey
+from buildgrid.server.metrics_utils import (
+    publish_counter_metric,
+    publish_timer_metric,
+    timer,
+)
+from buildgrid.server.operations.filtering import (
+    DEFAULT_SORT_KEYS,
+    OperationFilter,
+    SortKey,
+)
 from buildgrid.server.scheduler import events
 from buildgrid.server.scheduler.cohorts import CohortSet
-from buildgrid.server.settings import DEFAULT_MAX_EXECUTION_TIMEOUT, SQL_SCHEDULER_METRICS_PUBLISH_INTERVAL_SECONDS
+from buildgrid.server.settings import (
+    DEFAULT_MAX_EXECUTION_TIMEOUT,
+    SQL_SCHEDULER_METRICS_PUBLISH_INTERVAL_SECONDS,
+)
 from buildgrid.server.sql.models import Base as OrmBase
 from buildgrid.server.sql.models import (
     BotEntry,
@@ -180,9 +220,9 @@ class AgedJobHandlerOptions(NamedTuple):
             )
         return AgedJobHandlerOptions(
-            job_max_age=_dict_to_timedelta(job_max_age_cfg) if job_max_age_cfg else timedelta(days=30),
-            handling_period=_dict_to_timedelta(handling_period_cfg) if handling_period_cfg else timedelta(minutes=5),
-            max_handling_window=max_handling_window_cfg if max_handling_window_cfg else 10000,
+            job_max_age=(_dict_to_timedelta(job_max_age_cfg) if job_max_age_cfg else timedelta(days=30)),
+            handling_period=(_dict_to_timedelta(handling_period_cfg) if handling_period_cfg else timedelta(minutes=5)),
+            max_handling_window=(max_handling_window_cfg if max_handling_window_cfg else 10000),
         )
@@ -196,7 +236,15 @@ BotAssignmentFn = Callable[[Session, JobEntry], Tuple[BotEntry, str] | None]
 # See `_match_job_to_bot` for parameters
 MatchJobToBotFn = Callable[
-    [Session, JobEntry, float, BotAssignmentFn, str | None, ColumnExpressionArgument[bool] | None], None
+    [
+        Session,
+        JobEntry,
+        float,
+        BotAssignmentFn,
+        str | None,
+        ColumnExpressionArgument[bool] | None,
+    ],
+    None,
 ]
@@ -409,11 +457,17 @@ class Scheduler:
             except NotFoundError:
                 pass
             except Exception:
-                LOGGER.exception("Checking ActionCache for action failed.", tags=dict(digest=action_digest))
+                LOGGER.exception(
+                    "Checking ActionCache for action failed.",
+                    tags=dict(digest=action_digest),
+                )
         # Extend retention for action
         self._update_action_retention(
-            action, action_digest, self.queued_action_retention_hours, instance_name=current_instance()
+            action,
+            action_digest,
+            self.queued_action_retention_hours,
+            instance_name=current_instance(),
         )
         return self.create_operation_for_new_job(
@@ -456,7 +510,10 @@ class Scheduler:
             # Reschedule if priority is now greater, and we're still waiting on it to start.
             if priority < job.priority and job.stage == OperationStage.QUEUED.value:
-                LOGGER.info("Job assigned a new priority.", tags=dict(job_name=job.name, priority=priority))
+                LOGGER.info(
+                    "Job assigned a new priority.",
+                    tags=dict(job_name=job.name, priority=priority),
+                )
                 job.priority = priority
                 job.assigned = False
@@ -672,7 +729,12 @@ class Scheduler:
         if job:
             LOGGER.debug(
                 "Loaded job from db.",
-                tags=dict(job_name=job_name, job_stage=job.stage, result=job.result, instance_name=job.instance_name),
+                tags=dict(
+                    job_name=job_name,
+                    job_stage=job.stage,
+                    result=job.result,
+                    instance_name=job.instance_name,
+                ),
             )
         return job
@@ -983,7 +1045,10 @@ class Scheduler:
                 for platform_filter in platform_filters:
                     key, value = platform_filter.value.split(":", 1)
                     platform_clauses.append(
-                        and_(PlatformEntry.key == key, platform_filter.operator(PlatformEntry.value, value))
+                        and_(
+                            PlatformEntry.key == key,
+                            platform_filter.operator(PlatformEntry.value, value),
+                        )
                     )
                 job_name_subquery = (
@@ -1078,7 +1143,10 @@ class Scheduler:
                         JobEntry.property_label.label("property_label"),
                         func.count(JobEntry.name).label("job_count"),
                     )
-                    .where(JobEntry.stage < OperationStage.COMPLETED.value, JobEntry.instance_name == instance_name)
+                    .where(
+                        JobEntry.stage < OperationStage.COMPLETED.value,
+                        JobEntry.instance_name == instance_name,
+                    )
                     .group_by(JobEntry.stage, JobEntry.property_label),
                 ).all()
@@ -1112,7 +1180,13 @@ class Scheduler:
             )
         )
-    def _assign_job_to_bot(self, session: Session, job: JobEntry, bot: BotEntry, assignment_strategy: str = "") -> None:
+    def _assign_job_to_bot(
+        self,
+        session: Session,
+        job: JobEntry,
+        bot: BotEntry,
+        assignment_strategy: str = "",
+    ) -> None:
         """Assigns a job to a bot, updating both the job and bot entries in the database.
         `job` and `bot` ORM objects must be from `session`.
         """
@@ -1174,14 +1248,22 @@ class Scheduler:
                 ).scalar_one_or_none():
                     LOGGER.debug(
                         "Matched bot by sampling.",
-                        tags={"bot_name": bot.name, "attempt": attempt + 1, "bot_capacity": bot.capacity},
+                        tags={
+                            "bot_name": bot.name,
+                            "attempt": attempt + 1,
+                            "bot_capacity": bot.capacity,
+                        },
                     )
                     return bot
         LOGGER.debug("No bot matched by sampling after all attempts.")
         return None
     def match_bot_by_capacity(
-        self, session: Session, job: JobEntry, sampling: SamplingConfig | None = None, bot_cohort: str | None = None
+        self,
+        session: Session,
+        job: JobEntry,
+        sampling: SamplingConfig | None = None,
+        bot_cohort: str | None = None,
     ) -> Tuple[BotEntry, str] | None:
         """Select a bot for a job by capacity."""
         query = (
@@ -1311,7 +1393,8 @@ class Scheduler:
                 )
                 .order_by(JobEntry.priority.desc(), JobEntry.queued_timestamp.desc())
                 .limit(1)
-                .with_for_update(skip_locked=True, of=[BotEntry, JobEntry])  # type: ignore
+                # ignore typing as older version of SQLAlchemy .with_for_update() doesn't understand typing for tuples see https://github.com/sqlalchemy/sqlalchemy/issues/12730
+                .with_for_update(skip_locked=True, of=[BotEntry, JobEntry])  # type: ignore[list-item, unused-ignore]
                 .execution_options(populate_existing=True)
             )
             if bot_evicted_job := session.execute(eviction_query).one_or_none():
@@ -1332,7 +1415,11 @@ class Scheduler:
                 bot.capacity += 1  # Restore capacity from evicted job
                 session.add(
-                    JobHistoryEntry(event_type=JobHistoryEvent.EVICTED.value, job_name=evicted_job.name, payload=None)
+                    JobHistoryEntry(
+                        event_type=JobHistoryEvent.EVICTED.value,
+                        job_name=evicted_job.name,
+                        payload=None,
+                    )
                 )
                 assignment = (bot, JobAssignmentStrategy.PREEMPTION.value)
@@ -1380,7 +1467,8 @@ class Scheduler:
             # The caller didn't check the usage, we apply a minimum check here against max_quota
             instance_quota = session.execute(
                 select(InstanceQuota).where(
-                    InstanceQuota.bot_cohort == bot.cohort, InstanceQuota.instance_name == job.instance_name
+                    InstanceQuota.bot_cohort == bot.cohort,
+                    InstanceQuota.instance_name == job.instance_name,
                 )
             ).scalar_one_or_none()
             if instance_quota is not None and instance_quota.current_usage >= instance_quota.max_quota:
@@ -1439,7 +1527,9 @@ class Scheduler:
         bot_assignment_fn = bot_assignment_fn or self.match_bot_by_capacity
         def assign_with_guard(
-            session: Session, match_fn: MatchJobToBotFn, guard: ColumnExpressionArgument[bool]
+            session: Session,
+            match_fn: MatchJobToBotFn,
+            guard: ColumnExpressionArgument[bool],
         ) -> bool:
             instance_names_query = (
                 select(InstanceQuota.instance_name).where(InstanceQuota.bot_cohort == cohort).where(guard)
@@ -1458,7 +1548,14 @@ class Scheduler:
                 job = session.execute(job_statement).scalar_one_or_none()
                 if job is not None:
-                    match_fn(session, job, failure_backoff, bot_assignment_fn, assigner_name, guard)
+                    match_fn(
+                        session,
+                        job,
+                        failure_backoff,
+                        bot_assignment_fn,
+                        assigner_name,
+                        guard,
+                    )
                     return True
             return False
@@ -1487,12 +1584,16 @@ class Scheduler:
             # First, prioritize instances which are below their minimum quota
             updated = assign_with_guard(
-                session, match_with_preemption, InstanceQuota.current_usage < InstanceQuota.min_quota
+                session,
+                match_with_preemption,
+                InstanceQuota.current_usage < InstanceQuota.min_quota,
             )
             # Next, consider instances which are below their maximum quota
             if not updated:
                 updated = assign_with_guard(
-                    session, self._match_job_to_bot, InstanceQuota.current_usage < InstanceQuota.max_quota
+                    session,
+                    self._match_job_to_bot,
+                    InstanceQuota.current_usage < InstanceQuota.max_quota,
                 )
         return 1 if updated else 0
@@ -1635,7 +1736,9 @@ class Scheduler:
             .with_for_update(skip_locked=True)
         )
         return self._batch_timeout_jobs(
-            jobs_to_timeout_stmt, code_pb2.UNAVAILABLE, "Operation has been queued for too long"
+            jobs_to_timeout_stmt,
+            code_pb2.UNAVAILABLE,
+            "Operation has been queued for too long",
         )
     def prune_timer_loop(self, shutdown_requested: threading.Event) -> None:
@@ -1684,7 +1787,8 @@ class Scheduler:
         with self._sql.session() as session:
             options = {"synchronize_session": "fetch"}
             num_rows_deleted: int = cast(
-                CursorResult[Any], session.execute(delete_stmt, execution_options=options)
+                CursorResult[Any],
+                session.execute(delete_stmt, execution_options=options),
             ).rowcount
         if num_rows_deleted:
@@ -1855,7 +1959,11 @@ class Scheduler:
             }
             LOGGER.debug("Closing bot session.", tags=log_tags)
             for job in self._get_incomplete_jobs_for_bot(bot.bot_id, session, with_for_update=True):
-                lease_tags = {**log_tags, "db.lease_id": job.name, "db.lease_state": job.lease_state()}
+                lease_tags = {
+                    **log_tags,
+                    "db.lease_id": job.name,
+                    "db.lease_state": job.lease_state(),
+                }
                 LOGGER.debug("Reassigning job for bot session.", tags=lease_tags)
                 self._retry_job(session, job)
                 self._notify_job_updated(job.name, session)
@@ -1866,7 +1974,10 @@ class Scheduler:
         self._batch_update_instance_quota_usage(session, usage_diff)
     def session_expiry_timer_loop(self, shutdown_requested: threading.Event) -> None:
-        LOGGER.info("Starting BotSession reaper.", tags=dict(keepalive_timeout=self.bot_session_keepalive_timeout))
+        LOGGER.info(
+            "Starting BotSession reaper.",
+            tags=dict(keepalive_timeout=self.bot_session_keepalive_timeout),
+        )
         while not shutdown_requested.is_set():
             try:
                 while self.reap_expired_sessions():
@@ -1896,7 +2007,10 @@ class Scheduler:
                     LOGGER.warning(
                         "BotSession has expired.",
                         tags=dict(
-                            name=bot.name, bot_id=bot.bot_id, instance_name=bot.instance_name, deadline=bot.expiry_time
+                            name=bot.name,
+                            bot_id=bot.bot_id,
+                            instance_name=bot.instance_name,
+                            deadline=bot.expiry_time,
                         ),
                     )
                     bots_by_instance[bot.instance_name].append(bot)
@@ -1930,7 +2044,11 @@ class Scheduler:
     @timed(METRIC.SCHEDULER.ASSIGNMENT_DURATION)
     def _fetch_job_for_bot(
-        self, session: Session, bot: BotEntry, usage_diffs: InstanceQuotaUsageDiffs, log_tags: Tags
+        self,
+        session: Session,
+        bot: BotEntry,
+        usage_diffs: InstanceQuotaUsageDiffs,
+        log_tags: Tags,
     ) -> JobEntry | None:
         # Attempt to fetch a new job for a bot to work on.
         # This can help if there are usually more jobs available than bots.
@@ -1961,7 +2079,10 @@ class Scheduler:
             if next_job := session.execute(job_statement).scalar_one_or_none():
                 log_tags["db.next_job_name"] = next_job.name
                 self._assign_job_to_bot(
-                    session, next_job, bot, assignment_strategy=JobAssignmentStrategy.PROACTIVE.value
+                    session,
+                    next_job,
+                    bot,
+                    assignment_strategy=JobAssignmentStrategy.PROACTIVE.value,
                 )
                 start_timestamp = Timestamp()
                 start_timestamp.FromDatetime(next_job.queued_timestamp)
@@ -2010,7 +2131,6 @@ class Scheduler:
             "request.bot_id": bot_id,
             "request.bot_status": bot_status,
             "request.bot_name": bot_name,
-            "request.leases": {lease.id: lease.state for lease in bot_session_leases},
             "request.capacity": max_capacity,
         }
@@ -2112,7 +2232,7 @@ class Scheduler:
                 if db_bot_version == bot_version:
                     return active_leases, bot_version
-            bot_jobs_stmt = select(JobEntry).where(
+            bot_job_names_stmt = select(JobEntry.name).where(
                 JobEntry.worker_name == bot_id,
                 JobEntry.stage >= OperationStage.QUEUED.value,
                 JobEntry.stage < OperationStage.COMPLETED.value,
@@ -2120,20 +2240,25 @@ class Scheduler:
             # If this bot is instance-restricted, only look for jobs in the current instance.
             if instance_restricted_bot:
-                bot_jobs_stmt = bot_jobs_stmt.where(self._job_in_instance_pool())
+                bot_job_names_stmt = bot_job_names_stmt.where(self._job_in_instance_pool())
-            jobs = {job.name: job for job in session.execute(bot_jobs_stmt).scalars().all()}
-            db_lease_ids = set(jobs.keys())
-            log_tags["db.leases"] = {job.name: job.lease_state() for job in jobs.values()}
+            db_lease_ids = set(session.execute(bot_job_names_stmt).scalars().all())
             for lease in active_leases:
                 # Set specific tags in log lines for the lease currently being synchronized.
                 # This can help to identify a problematic lease in logs for a bot with multiple leases assigned.
-                lease_tags = {**log_tags, "request.lease_id": lease.id, "request.lease_state": lease.state}
+                lease_tags = {
+                    **log_tags,
+                    "request.lease_id": lease.id,
+                    "request.lease_state": lease.state,
+                }
                 # If the database has no lease, but the work is completed, we probably timed out the last call.
                 if lease.id not in db_lease_ids and lease.state == LeaseState.COMPLETED.value:
-                    LOGGER.debug("No lease in database, but session lease is completed. Skipping.", tags=lease_tags)
+                    LOGGER.debug(
+                        "No lease in database, but session lease is completed. Skipping.",
+                        tags=lease_tags,
+                    )
                     continue
                 # Remove this lease ID from db_lease_ids if present, now that we know we're handling it.
@@ -2143,7 +2268,10 @@ class Scheduler:
                 job = self._get_job(lease.id, session)
                 if not job or job.worker_name != bot_id:
-                    LOGGER.info("Lease is deleted or assigned to another bot. Skipping.", tags=lease_tags)
+                    LOGGER.info(
+                        "Lease is deleted or assigned to another bot. Skipping.",
+                        tags=lease_tags,
+                    )
                     continue
                 lease_tags["db.lease_id"] = job.name
@@ -2201,7 +2329,10 @@ class Scheduler:
                 if lease_state == LeaseState.PENDING.value:
                     # Need another iteration to flip the state to ACTIVE
                     # See also `_activate_bot_pending_leases`
-                    LOGGER.debug("Lease was assigned by an old scheduler during synchronization.", tags=log_tags)
+                    LOGGER.debug(
+                        "Lease was assigned by an old scheduler during synchronization.",
+                        tags=log_tags,
+                    )
                     continue
                 # Assign:
@@ -2263,13 +2394,20 @@ class Scheduler:
             raise InvalidArgumentError(f"Bot does not exist while reporting completed leases. {log_tags}")
         for lease in completed_leases:
-            lease_tags = {**log_tags, "request.lease_id": lease.id, "request.lease_state": lease.state}
+            lease_tags = {
+                **log_tags,
+                "request.lease_id": lease.id,
+                "request.lease_state": lease.state,
+            }
             job = self._get_job(lease.id, session, with_for_update=True)
             if not job or job.worker_name != bot.bot_id or job.stage != OperationStage.EXECUTING.value:
                 if job:
                     lease_tags["job.stage"] = job.stage
-                LOGGER.warning("Completed lease points to non-existent or invalid job. Skipping.", tags=lease_tags)
+                LOGGER.warning(
+                    "Completed lease points to non-existent or invalid job. Skipping.",
+                    tags=lease_tags,
+                )
                 continue
             completion_tags = {
@@ -2324,7 +2462,8 @@ class Scheduler:
         if job.n_tries >= self.max_job_attempts:
             status = status_pb2.Status(
-                code=code_pb2.ABORTED, message=f"Job was retried {job.n_tries} unsuccessfully. Aborting."
+                code=code_pb2.ABORTED,
+                message=f"Job was retried {job.n_tries} unsuccessfully. Aborting.",
             )
             self._complete_job(session, job, status=status)
             return
@@ -2486,14 +2625,22 @@ class Scheduler:
             try:
                 LOGGER.debug(
                     "Recording bot locality hint.",
-                    tags=dict(job_name=job.name, bot_name=bot_name, locality_hint=job.locality_hint),
+                    tags=dict(
+                        job_name=job.name,
+                        bot_name=bot_name,
+                        locality_hint=job.locality_hint,
+                    ),
                 )
                 self._record_bot_locality_hint(session, bot_name, job.locality_hint)
             except Exception:
                 # Don't fail job completion if locality hint recording fails
                 LOGGER.warning(
                     "Failed to record bot locality hint.",
-                    tags=dict(job_name=job.name, bot_name=bot_name, locality_hint=job.locality_hint),
+                    tags=dict(
+                        job_name=job.name,
+                        bot_name=bot_name,
+                        locality_hint=job.locality_hint,
+                    ),
                     exc_info=True,
                 )
@@ -2506,7 +2653,9 @@ class Scheduler:
         )
         if action_result.ByteSize() > 0:
             self._update_action_result_retention(
-                action_result, retention_hours=self.action_result_retention_hours, instance_name=job.instance_name
+                action_result,
+                retention_hours=self.action_result_retention_hours,
+                instance_name=job.instance_name,
             )
         worker_duration = None
@@ -2554,7 +2703,11 @@ class Scheduler:
             # bot count by status for each property label
             query_per_label = (
-                session.query(BotEntry.bot_status, PropertyLabelEntry.property_label, func.count(BotEntry.bot_status))
+                session.query(
+                    BotEntry.bot_status,
+                    PropertyLabelEntry.property_label,
+                    func.count(BotEntry.bot_status),
+                )
                 .join(BotEntry, BotEntry.name == PropertyLabelEntry.bot_name)
                 .group_by(BotEntry.bot_status, PropertyLabelEntry.property_label)
                 .filter(self._bot_in_instance_pool())
@@ -2571,7 +2724,11 @@ class Scheduler:
                 metrics["available_capacity_total"][BotStatus(status)] = cast(int, capacity)
             capacity_per_label_stmt = (
-                select(BotEntry.bot_status, PropertyLabelEntry.property_label, func.sum(BotEntry.capacity))
+                select(
+                    BotEntry.bot_status,
+                    PropertyLabelEntry.property_label,
+                    func.sum(BotEntry.capacity),
+                )
                 .join(BotEntry, BotEntry.name == PropertyLabelEntry.bot_name)
                 .group_by(BotEntry.bot_status, PropertyLabelEntry.property_label)
                 .where(self._bot_in_instance_pool())
@@ -2595,7 +2752,11 @@ class Scheduler:
         locate_bot_stmt = (
             select(BotEntry)
-            .where(BotEntry.name == bot_name, BotEntry.bot_id == bot_id, self._bot_in_instance_pool())
+            .where(
+                BotEntry.name == bot_name,
+                BotEntry.bot_id == bot_id,
+                self._bot_in_instance_pool(),
+            )
             .with_for_update()
         )
         with self._sql.session() as session:
@@ -2630,7 +2791,12 @@ class Scheduler:
                         stdout_stream_name=job.stdout_stream_write_name or "",
                         partial_execution_metadata=self.get_execute_action_metadata(job),
                     )
-                    metadata.append(("executeoperationmetadata-bin", job_metadata.SerializeToString()))
+                    metadata.append(
+                        (
+                            "executeoperationmetadata-bin",
+                            job_metadata.SerializeToString(),
+                        )
+                    )
         return metadata
@@ -2647,9 +2813,15 @@ class Scheduler:
         assign_timestamp(metadata.worker_start_timestamp, job.worker_start_timestamp)
         assign_timestamp(metadata.worker_completed_timestamp, job.worker_completed_timestamp)
         assign_timestamp(metadata.input_fetch_start_timestamp, job.input_fetch_start_timestamp)
-        assign_timestamp(metadata.input_fetch_completed_timestamp, job.input_fetch_completed_timestamp)
+        assign_timestamp(
+            metadata.input_fetch_completed_timestamp,
+            job.input_fetch_completed_timestamp,
+        )
         assign_timestamp(metadata.output_upload_start_timestamp, job.output_upload_start_timestamp)
-        assign_timestamp(metadata.output_upload_completed_timestamp, job.output_upload_completed_timestamp)
+        assign_timestamp(
+            metadata.output_upload_completed_timestamp,
+            job.output_upload_completed_timestamp,
+        )
         assign_timestamp(metadata.execution_start_timestamp, job.execution_start_timestamp)
         assign_timestamp(metadata.execution_completed_timestamp, job.execution_completed_timestamp)
@@ -2697,7 +2869,12 @@ class Scheduler:
     ) -> None:
         with self._sql_ro.session(expire_on_commit=False) as session:
             self._publish_execution_stats(
-                session, job_name, instance_name, execution_metadata, property_label, assigner_name
+                session,
+                job_name,
+                instance_name,
+                execution_metadata,
+                property_label,
+                assigner_name,
             )
     def _publish_execution_stats(
@@ -2720,17 +2897,46 @@ class Scheduler:
         upload_start = execution_metadata.output_upload_start_timestamp
         upload_completed = execution_metadata.output_upload_completed_timestamp
-        self._publish_job_duration(instance_name, queued, worker_completed, "Total", property_label, assigner_name)
+        self._publish_job_duration(
+            instance_name,
+            queued,
+            worker_completed,
+            "Total",
+            property_label,
+            assigner_name,
+        )
         # The Queued time is missing here as it's posted as soon as worker has accepted the job.
         self._publish_job_duration(
-            instance_name, worker_start, worker_completed, "Worker", property_label, assigner_name
+            instance_name,
+            worker_start,
+            worker_completed,
+            "Worker",
+            property_label,
+            assigner_name,
         )
-        self._publish_job_duration(instance_name, fetch_start, fetch_completed, "Fetch", property_label, assigner_name)
         self._publish_job_duration(
-            instance_name, execution_start, execution_completed, "Execution", property_label, assigner_name
+            instance_name,
+            fetch_start,
+            fetch_completed,
+            "Fetch",
+            property_label,
+            assigner_name,
         )
         self._publish_job_duration(
-            instance_name, upload_start, upload_completed, "Upload", property_label, assigner_name
+            instance_name,
+            execution_start,
+            execution_completed,
+            "Execution",
+            property_label,
+            assigner_name,
+        )
+        self._publish_job_duration(
+            instance_name,
+            upload_start,
+            upload_completed,
+            "Upload",
+            property_label,
+            assigner_name,
         )
         if self.metering_client is None or len(execution_metadata.auxiliary_metadata) == 0:
@@ -2767,10 +2973,18 @@ class Scheduler:
                 )
                 self.metering_client.put_usage(identity=client_id, operation_name=op.name, usage=usage)
         except Exception as exc:
-            LOGGER.exception("Cannot publish resource usage.", tags=dict(job_name=job_name), exc_info=exc)
+            LOGGER.exception(
+                "Cannot publish resource usage.",
+                tags=dict(job_name=job_name),
+                exc_info=exc,
+            )
     def _update_action_retention(
-        self, action: Action, action_digest: Digest, retention_hours: float | None, instance_name: str
+        self,
+        action: Action,
+        action_digest: Digest,
+        retention_hours: float | None,
+        instance_name: str,
     ) -> None:
         if not self.asset_client or not retention_hours:
             return
@@ -2791,14 +3005,18 @@ class Scheduler:
                 instance_name=instance_name,
             )
             LOGGER.debug(
-                "Extended the retention of action.", tags=dict(digest=action_digest, retention_hours=retention_hours)
+                "Extended the retention of action.",
+                tags=dict(digest=action_digest, retention_hours=retention_hours),
             )
         except Exception:
             LOGGER.exception("Failed to push action as an asset.", tags=dict(digest=action_digest))
             # Not a fatal path, don't reraise here
     def _update_action_result_retention(
-        self, action_result: ActionResult, retention_hours: float | None, instance_name: str
+        self,
+        action_result: ActionResult,
+        retention_hours: float | None,
+        instance_name: str,
     ) -> None:
         if not self.asset_client or not retention_hours:
             return
@@ -2842,11 +3060,16 @@ class Scheduler:
                 instance_name=instance_name,
             )
             LOGGER.debug(
-                "Extended the retention of action result.", tags=dict(digest=digest, retention_hours=retention_hours)
+                "Extended the retention of action result.",
+                tags=dict(digest=digest, retention_hours=retention_hours),
             )
         except Exception as e:
-            LOGGER.exception("Failed to push action_result as an asset.", tags=dict(digest=digest), exc_info=e)
+            LOGGER.exception(
+                "Failed to push action_result as an asset.",
+                tags=dict(digest=digest),
+                exc_info=e,
+            )
             # Not a fatal path, don't reraise here
     def _record_bot_locality_hint(self, session: Session, bot_name: str, locality_hint: str) -> None:
@@ -2885,7 +3108,8 @@ class Scheduler:
             # Delete all hints older than the K-th most recent
             session.execute(
                 delete(BotLocalityHintEntry).where(
-                    BotLocalityHintEntry.bot_name == bot_name, BotLocalityHintEntry.sequence_number < k_th_seq
+                    BotLocalityHintEntry.bot_name == bot_name,
+                    BotLocalityHintEntry.sequence_number < k_th_seq,
                 )
             )
@@ -2983,7 +3207,11 @@ class Scheduler:
             LOGGER.warning(
                 "Instance usage not updated.",
-                tags={"cohort": bot_cohort, "instance_name": instance_name, "delta": delta},
+                tags={
+                    "cohort": bot_cohort,
+                    "instance_name": instance_name,
+                    "delta": delta,
+                },
             )
             return False
         return True

buildgrid 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

buildgrid 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl