@cat-factory/orchestration 0.19.2 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,11 @@
1
- import { parseBlueprintService, parseSpecDoc, DEFAULT_COMPANION_MAX_ATTEMPTS, } from '@cat-factory/contracts';
1
+ import { parseBlueprintService, parseSpecDoc, DEFAULT_COMPANION_MAX_ATTEMPTS, isLocalRunner, } from '@cat-factory/contracts';
2
2
  import { blueprintPostOp, companionFor, companionTargets, isCompanionKind, registeredAgentStep, registeredPreOps, registeredPostOps, runRepoOps, specPostOp, TASK_ESTIMATOR_AGENT_KIND, } from '@cat-factory/agents';
3
3
  import { coerceTaskEstimate, summarizeEstimate } from '../estimation/estimate.logic.js';
4
4
  import { validatePipelineShape } from '../pipelines/pipelineShape.js';
5
5
  import { shouldRunGatedStep } from './stepGating.logic.js';
6
6
  import { reviewableArtifactOutput } from './artifact-review.logic.js';
7
7
  import { resolveIndividualVendors, } from './individualVendors.logic.js';
8
- import { assertFound, ConflictError, getErrorMessage, isModelUsable, NotFoundError, sameSubtasks, ValidationError, } from '@cat-factory/kernel';
8
+ import { assertFound, ConflictError, getErrorMessage, isModelUsable, NotFoundError, parseLocalModelId, resolveModelRef, sameSubtasks, subscriptionOptionFor, ValidationError, } from '@cat-factory/kernel';
9
9
  import { DEFAULT_MERGE_PRESET } from '@cat-factory/kernel';
10
10
  import { CONFLICTS_AGENT_KIND, MERGER_AGENT_KIND, REQUIREMENTS_REVIEW_AGENT_KIND, CLARITY_REVIEW_AGENT_KIND, BUG_INVESTIGATOR_AGENT_KIND, TRACKER_AGENT_KIND, ANALYSIS_AGENT_KIND, TESTER_AGENT_KIND, HUMAN_TEST_AGENT_KIND, BLUEPRINTS_AGENT_KIND, SPEC_WRITER_AGENT_KIND, } from './ci.logic.js';
11
11
  import { AgentContextBuilder } from './AgentContextBuilder.js';
@@ -360,6 +360,59 @@ export class ExecutionService {
360
360
  'before starting.', 'providers_unconfigured', { models: [...unconfigured] });
361
361
  }
362
362
  }
363
+ /**
364
+ * Refuse to START / RETRY a run when the workspace has reached its spend budget AND the
365
+ * pipeline has at least one budget-METERED step. A `0` (or exhausted) budget is a
366
+ * deliberate "no paid spend" setting, but it must surface as a clear, up-front error here
367
+ * rather than a silent mid-run pause. Steps that incur no metered cost — a connected
368
+ * subscription model, or a keyless local-runner model — are exempt, so a workspace that
369
+ * runs ONLY local/subscription models starts normally even at a `0` budget. Best-effort:
370
+ * with no capability resolver wired (tests/unconfigured) it is skipped and the mid-run
371
+ * gate still guards. Before any side effects, matching the other start guards.
372
+ */
373
+ async assertBudgetAllowsPipeline(workspaceId, block, pipeline, initiatedBy) {
374
+ if (!(await this.spend.isOverBudget(workspaceId)))
375
+ return;
376
+ if (!this.resolveProviderCapabilities)
377
+ return;
378
+ const caps = await this.resolveProviderCapabilities(workspaceId, initiatedBy);
379
+ const ids = [];
380
+ if (block.modelId) {
381
+ ids.push(block.modelId);
382
+ }
383
+ else if (this.resolveWorkspaceModelDefault) {
384
+ for (const kind of pipeline.agentKinds) {
385
+ ids.push(await this.resolveWorkspaceModelDefault(workspaceId, kind, block.modelPresetId));
386
+ }
387
+ }
388
+ else {
389
+ ids.push(undefined);
390
+ }
391
+ if (!ids.some((id) => this.modelIdIsMetered(id, caps)))
392
+ return;
393
+ const status = await this.spend.status(workspaceId);
394
+ throw new ConflictError(`This workspace has reached its spend budget (${status.costSpent.toFixed(2)}/` +
395
+ `${status.costLimit.toFixed(2)} ${status.currency}). New runs on metered models are ` +
396
+ 'paused until the budget is raised (Workspace settings → Budget) or the billing period ' +
397
+ 'resets. A task pinned to a local model or a connected subscription still runs.');
398
+ }
399
+ /**
400
+ * Whether a model id will incur metered monetary cost for THIS workspace. Non-metered:
401
+ * a subscription model whose vendor is connected ("subscriptions always win"), or a
402
+ * local-runner model (keyless, on the user's own endpoint). Everything else — including
403
+ * env-default routing (an absent id) and Cloudflare Workers AI — is treated as metered.
404
+ */
405
+ modelIdIsMetered(id, caps) {
406
+ const sub = subscriptionOptionFor(id);
407
+ if (sub && caps.subscriptionVendors.has(sub.vendor))
408
+ return false;
409
+ const ref = resolveModelRef(id, caps);
410
+ if (!ref)
411
+ return true;
412
+ if (ref.harness === 'claude-code' || ref.harness === 'codex')
413
+ return false;
414
+ return !isLocalRunner(ref.provider);
415
+ }
363
416
  /** Start a pipeline against a block, replacing any prior run on it. */
364
417
  async start(workspaceId, blockId, pipelineId,
365
418
  /**
@@ -397,6 +450,9 @@ export class ExecutionService {
397
450
  // Enforce the workspace's per-service running-task limit (off by default) — a clear,
398
451
  // actionable error before any side effects, so the human knows why the start was refused.
399
452
  await this.assertWithinTaskLimit(workspaceId, block);
453
+ // Refuse a metered run once the spend budget is reached (a clear error rather than a
454
+ // silent mid-run pause). A local/subscription-only pipeline is exempt and starts.
455
+ await this.assertBudgetAllowsPipeline(workspaceId, block, pipeline, initiatedBy);
400
456
  // Hard dependency gate: a task cannot start while any block it `dependsOn` is unfinished
401
457
  // (not yet `done`/merged). Enforced server-side so it holds for manual starts, recurring
402
458
  // fires, auto-start propagation and direct API calls alike — the frontend's runnable
@@ -617,11 +673,13 @@ export class ExecutionService {
617
673
  // Spend gate: don't incur monetary LLM cost once the budget is exhausted. Pause
618
674
  // the run (so the frontend can flag it) and stop here. A previously-paused run
619
675
  // that finds the budget has freed up resumes and proceeds. EXEMPTION: a step that
620
- // runs on a flat-rate subscription (quota) model — Claude Code / Codex on a pooled
621
- // token incurs no metered monetary cost and never contributes to the budget, so
622
- // it must not be held hostage by a budget other (metered) models exhausted.
623
- if (await this.spend.isOverBudget()) {
624
- if (!(await this.currentStepIsQuotaBased(workspaceId, instance, step))) {
676
+ // incurs no metered monetary cost — a flat-rate subscription (Claude Code / Codex)
677
+ // OR a local-runner model (keyless, on the user's own endpoint) never contributes
678
+ // to the budget, so it must not be held hostage by a budget other (metered) models
679
+ // exhausted. This is what lets a deliberately local-only / subscription-only workspace
680
+ // keep running at a `0` budget (see the spend-budget docs).
681
+ if (await this.spend.isOverBudget(workspaceId)) {
682
+ if (!(await this.currentStepIsNonMetered(workspaceId, instance, step))) {
625
683
  if (instance.status !== 'paused') {
626
684
  instance.status = 'paused';
627
685
  await this.executionRepository.upsert(workspaceId, instance);
@@ -797,24 +855,52 @@ export class ExecutionService {
797
855
  }
798
856
  }
799
857
  /**
800
- * Whether the current step will run on a flat-rate subscription (quota) model, so
801
- * the spend gate can let it proceed even when the monetary budget is exhausted.
802
- * Resolved through the executor (the authority on the "subscriptions always win"
803
- * routing) off a full step context. Best-effort and side-effect-free: an executor
804
- * without the capability, a missing block, or any resolution error all report false
805
- * (the step is treated as budget-metered, the prior behaviour). Only consulted on
806
- * the over-budget path, so the extra context build never touches the happy path.
858
+ * Whether the current step incurs NO metered monetary LLM cost, so the spend gate can
859
+ * let it proceed even when the budget is exhausted. Two non-metered cases:
860
+ * - a flat-rate SUBSCRIPTION (quota) model — Claude Code / Codex on a pooled token;
861
+ * resolved through the executor (the authority on "subscriptions always win").
862
+ * - a LOCAL-runner model (Ollama / LM Studio / …) keyless, runs on the user's own
863
+ * endpoint, so it costs the deployment nothing; detected off the resolved model id.
864
+ * This is what makes a `0` budget mean "no PAID spend" without bricking a workspace that
865
+ * deliberately runs only local models or subscriptions (see the spend-budget docs).
866
+ *
867
+ * Once the executor resolves the step's concrete model id, the metered/non-metered
868
+ * decision is delegated to the SAME {@link modelIdIsMetered} predicate the up-front
869
+ * {@link assertBudgetAllowsPipeline} gate uses, so the two gates can't classify a model
870
+ * differently (a divergence would let a run pass the start gate then immediately pause,
871
+ * or vice versa). The executor's `isQuotaBased` is still consulted first as the
872
+ * authoritative subscription-routing signal; the shared predicate covers local-runner +
873
+ * subscription-by-capability + Cloudflare classification identically to the start gate.
874
+ * Falls back to a bare local-id check when no capability resolver is wired.
875
+ *
876
+ * Best-effort and side-effect-free: an executor without the capability, a missing block,
877
+ * or any resolution error all report false (treated as budget-metered, the prior
878
+ * behaviour). Only consulted on the over-budget path, so it never touches the happy path.
807
879
  */
808
- async currentStepIsQuotaBased(workspaceId, instance, step) {
809
- if (!this.agentExecutor.isQuotaBased)
810
- return false;
880
+ async currentStepIsNonMetered(workspaceId, instance, step) {
811
881
  try {
812
882
  const block = await this.blockRepository.get(workspaceId, instance.blockId);
813
883
  if (!block)
814
884
  return false;
815
885
  const isFinalStep = instance.currentStep === instance.steps.length - 1;
816
886
  const context = await this.contextBuilder.buildContext(workspaceId, instance, step, isFinalStep, block);
817
- return await this.agentExecutor.isQuotaBased(context);
887
+ if (this.agentExecutor.isQuotaBased && (await this.agentExecutor.isQuotaBased(context))) {
888
+ return true;
889
+ }
890
+ if (this.agentExecutor.resolveModel) {
891
+ const modelId = await this.agentExecutor.resolveModel(context);
892
+ // Classify the resolved id through the shared predicate (same as the start gate)
893
+ // when capabilities are wired; else fall back to the bare local-runner check.
894
+ if (this.resolveProviderCapabilities) {
895
+ const caps = await this.resolveProviderCapabilities(workspaceId, instance.initiatedBy);
896
+ if (!this.modelIdIsMetered(modelId, caps))
897
+ return true;
898
+ }
899
+ else if (parseLocalModelId(modelId)) {
900
+ return true;
901
+ }
902
+ }
903
+ return false;
818
904
  }
819
905
  catch {
820
906
  return false;
@@ -2940,7 +3026,13 @@ export class ExecutionService {
2940
3026
  if (previous.status !== 'failed') {
2941
3027
  throw new ConflictError(`Only a failed run can be retried (run is '${previous.status}').`, 'run_not_retryable', { status: previous.status });
2942
3028
  }
2943
- await this.requireBlock(workspaceId, previous.blockId);
3029
+ const block = await this.requireBlock(workspaceId, previous.blockId);
3030
+ // Same up-front budget gate as start(): refuse a metered retry once the budget is
3031
+ // reached (local/subscription-only pipelines still retry). Before any side effects.
3032
+ const pipeline = await this.pipelineRepository.get(workspaceId, previous.pipelineId);
3033
+ if (pipeline) {
3034
+ await this.assertBudgetAllowsPipeline(workspaceId, block, pipeline, initiatedBy ?? previous.initiatedBy);
3035
+ }
2944
3036
  const { steps, currentStep } = planResumedSteps(previous);
2945
3037
  // Mint the activation before replacing the failed run, so a bad password aborts
2946
3038
  // the retry without losing the retryable terminal run.