@aitne/daemon 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/adapter-watchdog.d.ts +70 -0
- package/dist/adapters/adapter-watchdog.js +115 -0
- package/dist/adapters/discord.d.ts +17 -1
- package/dist/adapters/discord.js +33 -0
- package/dist/adapters/notification-manager.d.ts +27 -1
- package/dist/adapters/notification-manager.js +54 -39
- package/dist/adapters/slack-adapter.d.ts +26 -1
- package/dist/adapters/slack-adapter.js +41 -0
- package/dist/adapters/telegram-adapter.d.ts +18 -1
- package/dist/adapters/telegram-adapter.js +41 -2
- package/dist/adapters/types.d.ts +20 -0
- package/dist/adapters/whatsapp-adapter.d.ts +26 -7
- package/dist/adapters/whatsapp-adapter.js +74 -21
- package/dist/api/env-writer.js +8 -5
- package/dist/api/helpers/agent-errors-registry.d.ts +5 -5
- package/dist/api/helpers/agent-errors-registry.js +5 -5
- package/dist/api/routes/agent.js +33 -12
- package/dist/api/routes/agents/index.js +75 -16
- package/dist/api/routes/agents/views.d.ts +37 -2
- package/dist/api/routes/agents/views.js +64 -2
- package/dist/api/routes/background-task.d.ts +22 -0
- package/dist/api/routes/background-task.js +338 -0
- package/dist/api/routes/browser-history.js +9 -1
- package/dist/api/routes/context/permissions.js +3 -2
- package/dist/api/routes/context/snapshots.js +0 -3
- package/dist/api/routes/context/write.js +3 -17
- package/dist/api/routes/dashboard/config.js +48 -12
- package/dist/api/routes/dashboard/cost-approvals.js +66 -0
- package/dist/api/routes/dashboard/notifications.js +9 -9
- package/dist/api/routes/integrations/crud-patch.js +5 -1
- package/dist/api/routes/integrations-reconcile.js +2 -2
- package/dist/api/routes/notion.d.ts +1 -1
- package/dist/api/routes/observations.js +7 -7
- package/dist/api/routes/obsidian.d.ts +1 -1
- package/dist/api/routes/receipts.js +5 -1
- package/dist/api/routes/setup-migrate.js +1 -1
- package/dist/api/routes/setup.js +1 -1
- package/dist/api/routes/task-flows.d.ts +1 -1
- package/dist/api/routes/task-flows.js +1 -1
- package/dist/api/routes/tuning.d.ts +29 -0
- package/dist/api/routes/tuning.js +304 -0
- package/dist/api/server.d.ts +44 -16
- package/dist/api/server.js +9 -0
- package/dist/bootstrap/adapters.d.ts +19 -0
- package/dist/bootstrap/adapters.js +61 -0
- package/dist/bootstrap/api.d.ts +5 -3
- package/dist/bootstrap/api.js +45 -13
- package/dist/bootstrap/catchup.d.ts +1 -1
- package/dist/bootstrap/catchup.js +11 -11
- package/dist/bootstrap/event-pipeline.d.ts +11 -0
- package/dist/bootstrap/event-pipeline.js +245 -7
- package/dist/bootstrap/observers.js +9 -6
- package/dist/bootstrap/schedule-helpers.d.ts +104 -6
- package/dist/bootstrap/schedule-helpers.js +172 -19
- package/dist/config.js +26 -12
- package/dist/core/agent-core.d.ts +33 -1
- package/dist/core/agent-core.js +36 -1
- package/dist/core/agents/activity-scan-cadence.d.ts +103 -0
- package/dist/core/agents/activity-scan-cadence.js +127 -0
- package/dist/core/agents/agent-route-override.d.ts +53 -0
- package/dist/core/agents/agent-route-override.js +69 -0
- package/dist/core/agents/builtin-registry.d.ts +51 -14
- package/dist/core/agents/builtin-registry.js +92 -15
- package/dist/core/agents/config-gate-reconcile.d.ts +38 -0
- package/dist/core/agents/config-gate-reconcile.js +51 -0
- package/dist/core/agents/cron-substitute.d.ts +1 -1
- package/dist/core/agents/cron-substitute.js +1 -1
- package/dist/core/agents/custom-routine-migration.d.ts +60 -0
- package/dist/core/agents/custom-routine-migration.js +149 -0
- package/dist/core/agents/firing-blocked.d.ts +1 -1
- package/dist/core/agents/hourly-cadence.d.ts +102 -0
- package/dist/core/agents/hourly-cadence.js +126 -0
- package/dist/core/agents/loader-boot.js +23 -0
- package/dist/core/agents/loader.d.ts +19 -0
- package/dist/core/agents/loader.js +34 -2
- package/dist/core/agents/override-merge.d.ts +1 -1
- package/dist/core/agents/override-merge.js +9 -1
- package/dist/core/agents/recurrence-convert.d.ts +1 -1
- package/dist/core/agents/recurrence-convert.js +1 -1
- package/dist/core/agents/recurring-schedule-adapter.js +8 -0
- package/dist/core/alerts.js +6 -6
- package/dist/core/backends/auth-health-monitor.d.ts +2 -2
- package/dist/core/backends/auth-health-monitor.js +1 -1
- package/dist/core/backends/backend-router.d.ts +27 -1
- package/dist/core/backends/backend-router.js +165 -1
- package/dist/core/backends/claude-code-core.d.ts +71 -31
- package/dist/core/backends/claude-code-core.js +282 -54
- package/dist/core/backends/cli-quota-guards.d.ts +29 -1
- package/dist/core/backends/cli-quota-guards.js +40 -5
- package/dist/core/backends/codex-core.d.ts +6 -0
- package/dist/core/backends/codex-core.js +22 -6
- package/dist/core/backends/failure-spend.d.ts +58 -0
- package/dist/core/backends/failure-spend.js +137 -0
- package/dist/core/backends/gemini-cli-core.d.ts +6 -0
- package/dist/core/backends/gemini-cli-core.js +25 -6
- package/dist/core/backends/model-registry.d.ts +1 -1
- package/dist/core/backends/model-registry.js +4 -4
- package/dist/core/backends/opencode-core.d.ts +1 -1
- package/dist/core/backends/opencode-core.js +5 -5
- package/dist/core/backends/plan-presets.js +39 -15
- package/dist/core/bang-commands/commands-cost.js +3 -1
- package/dist/core/bang-commands/commands-report.js +4 -3
- package/dist/core/bang-commands/commands-research.js +4 -1
- package/dist/core/bang-commands/commands-revert-tuning.d.ts +18 -0
- package/dist/core/bang-commands/commands-revert-tuning.js +63 -0
- package/dist/core/bang-commands/commands-stop-start.js +3 -3
- package/dist/core/bang-commands/commands-task-control.d.ts +19 -0
- package/dist/core/bang-commands/commands-task-control.js +147 -0
- package/dist/core/bang-commands/commands-wiki.js +5 -5
- package/dist/core/bang-commands/index.d.ts +2 -0
- package/dist/core/bang-commands/index.js +12 -0
- package/dist/core/bang-commands/registry.d.ts +12 -0
- package/dist/core/browser-history/research-cluster-fanout.d.ts +28 -14
- package/dist/core/browser-history/research-cluster-fanout.js +39 -16
- package/dist/core/channel-timeline.d.ts +5 -1
- package/dist/core/channel-timeline.js +13 -0
- package/dist/core/context/index-reconciler.js +5 -2
- package/dist/core/context/policy-index-reconciler.d.ts +6 -4
- package/dist/core/context/policy-index-runner.js +25 -6
- package/dist/core/context-builder-calendar.js +10 -2
- package/dist/core/context-builder-conversation.d.ts +8 -1
- package/dist/core/context-builder-conversation.js +41 -7
- package/dist/core/context-builder-yesterday.js +4 -3
- package/dist/core/context-builder.d.ts +7 -2
- package/dist/core/context-builder.js +62 -20
- package/dist/core/context-file-serializer.d.ts +1 -1
- package/dist/core/context-file-serializer.js +1 -1
- package/dist/core/context-health.js +2 -2
- package/dist/core/context-paths.d.ts +1 -1
- package/dist/core/context-paths.js +1 -1
- package/dist/core/context-validation/prepare-write.js +1 -1
- package/dist/core/context-validation/routine-rulebook.d.ts +1 -1
- package/dist/core/context-vault-aliases.d.ts +0 -13
- package/dist/core/context-vault-aliases.js +37 -0
- package/dist/core/custom-routines.d.ts +99 -0
- package/dist/core/custom-routines.js +187 -0
- package/dist/core/daemon-api-cli.js +49 -0
- package/dist/core/day-boundary.d.ts +46 -0
- package/dist/core/day-boundary.js +40 -0
- package/dist/core/dispatcher-activity-scan.d.ts +221 -0
- package/dist/core/dispatcher-activity-scan.js +775 -0
- package/dist/core/dispatcher-error-handling.d.ts +6 -11
- package/dist/core/dispatcher-error-handling.js +38 -62
- package/dist/core/dispatcher-hourly-check.js +6 -1
- package/dist/core/dispatcher-message-handler.d.ts +10 -0
- package/dist/core/dispatcher-message-handler.js +17 -0
- package/dist/core/dispatcher-morning-routine.d.ts +6 -6
- package/dist/core/dispatcher-morning-routine.js +13 -13
- package/dist/core/dispatcher-result-processor.d.ts +33 -0
- package/dist/core/dispatcher-result-processor.js +167 -11
- package/dist/core/dispatcher-scheduled-background-task.d.ts +42 -0
- package/dist/core/dispatcher-scheduled-background-task.js +89 -0
- package/dist/core/dispatcher-scheduled-tasks.d.ts +63 -1
- package/dist/core/dispatcher-scheduled-tasks.js +213 -6
- package/dist/core/dispatcher-task-delivery.d.ts +105 -0
- package/dist/core/dispatcher-task-delivery.js +555 -0
- package/dist/core/dispatcher-types.d.ts +48 -9
- package/dist/core/dispatcher-types.js +3 -3
- package/dist/core/dispatcher.d.ts +112 -31
- package/dist/core/dispatcher.js +284 -59
- package/dist/core/dm-freshness-metrics.d.ts +1 -1
- package/dist/core/drift-effects.js +2 -2
- package/dist/core/feedback/consolidation-prep.js +17 -5
- package/dist/core/feedback/eviction-scorer.js +6 -2
- package/dist/core/feedback/lesson-format.js +9 -4
- package/dist/core/feedback/lesson-injection.d.ts +1 -1
- package/dist/core/feedback/lesson-injection.js +17 -2
- package/dist/core/feedback/lesson-store-overview.d.ts +8 -4
- package/dist/core/feedback/lesson-store-overview.js +8 -4
- package/dist/core/feedback/regeneralization-prep.js +29 -16
- package/dist/core/feedback/self-performance-prep.d.ts +186 -0
- package/dist/core/feedback/self-performance-prep.js +541 -0
- package/dist/core/feedback/tuning-actuator.d.ts +198 -0
- package/dist/core/feedback/tuning-actuator.js +432 -0
- package/dist/core/feedback/tuning-recommender.d.ts +247 -0
- package/dist/core/feedback/tuning-recommender.js +580 -0
- package/dist/core/feedback/tuning-revert-monitor.d.ts +90 -0
- package/dist/core/feedback/tuning-revert-monitor.js +213 -0
- package/dist/core/health-monitor.d.ts +6 -0
- package/dist/core/health-monitor.js +1 -1
- package/dist/core/injection-policy.d.ts +4 -4
- package/dist/core/injection-policy.js +4 -4
- package/dist/core/integration-main-backend.js +4 -0
- package/dist/core/management-md.d.ts +2 -2
- package/dist/core/management-md.js +51 -13
- package/dist/core/morning/orchestrator.d.ts +2 -2
- package/dist/core/morning/orchestrator.js +2 -2
- package/dist/core/notification-gate.d.ts +64 -0
- package/dist/core/notification-gate.js +51 -0
- package/dist/core/notification-rate-limit.d.ts +40 -0
- package/dist/core/notification-rate-limit.js +50 -0
- package/dist/core/policy-files.d.ts +1 -1
- package/dist/core/policy-files.js +2 -2
- package/dist/core/pre-pass-freshness.d.ts +4 -4
- package/dist/core/retention.d.ts +5 -0
- package/dist/core/retention.js +20 -4
- package/dist/core/review-context.d.ts +1 -1
- package/dist/core/review-context.js +10 -5
- package/dist/core/roadmap-write-lock.d.ts +2 -1
- package/dist/core/roadmap-write-lock.js +15 -10
- package/dist/core/routine-acquisition-plan.d.ts +47 -1
- package/dist/core/routine-acquisition-plan.js +78 -20
- package/dist/core/routine-fetch-window-retry.js +7 -4
- package/dist/core/routine-fetch-window-runner.d.ts +39 -3
- package/dist/core/routine-fetch-window-runner.js +264 -13
- package/dist/core/routine-windows.d.ts +2 -2
- package/dist/core/routine-windows.js +8 -5
- package/dist/core/scheduler.d.ts +175 -16
- package/dist/core/scheduler.js +559 -102
- package/dist/core/signal-detector.d.ts +12 -0
- package/dist/core/signal-detector.js +53 -9
- package/dist/core/skills-compiler-denied-tools.js +2 -2
- package/dist/core/skills-compiler-skill-index.d.ts +2 -2
- package/dist/core/skills-compiler-skill-index.js +2 -2
- package/dist/core/skills-compiler-variants.d.ts +1 -1
- package/dist/core/skills-compiler-variants.js +8 -0
- package/dist/core/skills-compiler.d.ts +29 -26
- package/dist/core/skills-compiler.js +117 -81
- package/dist/core/skills-manifest.d.ts +37 -0
- package/dist/core/skills-manifest.js +73 -2
- package/dist/core/sleep-inhibitor.d.ts +79 -0
- package/dist/core/sleep-inhibitor.js +132 -0
- package/dist/core/slim-system-prompt-loader.d.ts +77 -0
- package/dist/core/slim-system-prompt-loader.js +141 -0
- package/dist/core/spawn-gates.d.ts +126 -0
- package/dist/core/spawn-gates.js +180 -0
- package/dist/core/today-direct-writer.d.ts +2 -2
- package/dist/core/today-direct-writer.js +1 -1
- package/dist/core/today-write-lock.d.ts +4 -2
- package/dist/core/today-write-lock.js +30 -20
- package/dist/core/wake-detector.d.ts +55 -0
- package/dist/core/wake-detector.js +80 -0
- package/dist/core/wiki/compile-lock.d.ts +1 -1
- package/dist/core/wiki/compile-lock.js +1 -1
- package/dist/core/workdir.js +15 -6
- package/dist/db/activity-scan-signals.d.ts +77 -0
- package/dist/db/activity-scan-signals.js +378 -0
- package/dist/db/agents-store.d.ts +28 -0
- package/dist/db/agents-store.js +62 -0
- package/dist/db/background-task-clarifications-store.d.ts +81 -0
- package/dist/db/background-task-clarifications-store.js +152 -0
- package/dist/db/background-task-store.d.ts +207 -0
- package/dist/db/background-task-store.js +380 -0
- package/dist/db/browser-history-store.d.ts +39 -6
- package/dist/db/browser-history-store.js +51 -7
- package/dist/db/browser-task-clarifications-store.d.ts +12 -0
- package/dist/db/browser-task-clarifications-store.js +35 -5
- package/dist/db/browser-task-store.d.ts +3 -0
- package/dist/db/browser-task-store.js +29 -4
- package/dist/db/deferred-dm.d.ts +86 -0
- package/dist/db/deferred-dm.js +199 -0
- package/dist/db/migrations.js +330 -0
- package/dist/db/observations.d.ts +2 -2
- package/dist/db/observations.js +3 -3
- package/dist/db/schema.js +217 -16
- package/dist/db/voice-transcripts-store.d.ts +1 -1
- package/dist/index.js +86 -29
- package/dist/messaging/browser-task-mcp-notifier.d.ts +12 -70
- package/dist/messaging/browser-task-mcp-notifier.js +30 -151
- package/dist/messaging/browser-task-screenshot-attachment.d.ts +15 -0
- package/dist/messaging/browser-task-screenshot-attachment.js +63 -0
- package/dist/observers/delegated-sync-worker.d.ts +6 -6
- package/dist/observers/delegated-sync-worker.js +10 -10
- package/dist/observers/git-delegated-cron.d.ts +1 -1
- package/dist/observers/git-delegated-cron.js +2 -2
- package/dist/observers/github-poller-classifier.d.ts +3 -3
- package/dist/observers/github-poller-classifier.js +3 -3
- package/dist/observers/imminent-event-scheduler.d.ts +1 -1
- package/dist/observers/imminent-event-scheduler.js +1 -1
- package/dist/observers/mail-poller.d.ts +1 -0
- package/dist/observers/mail-poller.js +42 -3
- package/dist/observers/observation-summarizer/summarizer-client.d.ts +2 -2
- package/dist/observers/observation-summarizer/summarizer-client.js +2 -2
- package/dist/observers/observation-summarizer/worker.d.ts +2 -2
- package/dist/observers/observation-summarizer/worker.js +4 -4
- package/dist/observers/obsidian-watcher.d.ts +1 -1
- package/dist/observers/obsidian-watcher.js +1 -1
- package/dist/safety/agent-write-tracker.d.ts +4 -4
- package/dist/safety/agent-write-tracker.js +4 -4
- package/dist/safety/audit.d.ts +43 -5
- package/dist/safety/audit.js +86 -18
- package/dist/safety/risk-classifier.d.ts +6 -0
- package/dist/safety/risk-classifier.js +75 -11
- package/dist/scheduler/activity-scan-gate.d.ts +86 -0
- package/dist/scheduler/activity-scan-gate.js +132 -0
- package/dist/services/background-task/background-task-budget.d.ts +80 -0
- package/dist/services/background-task/background-task-budget.js +91 -0
- package/dist/services/background-task/background-task-driver.d.ts +105 -0
- package/dist/services/background-task/background-task-driver.js +416 -0
- package/dist/services/background-task/background-task-runner.d.ts +96 -0
- package/dist/services/background-task/background-task-runner.js +673 -0
- package/dist/services/background-task/background-task-tools.d.ts +84 -0
- package/dist/services/background-task/background-task-tools.js +247 -0
- package/dist/services/background-task/background-task-transition-events.d.ts +43 -0
- package/dist/services/background-task/background-task-transition-events.js +54 -0
- package/dist/services/browser-history/automation/egress-denylist.d.ts +1 -1
- package/dist/services/browser-history/automation/egress-denylist.js +16 -6
- package/dist/services/browser-history/managed-chromium/sandbox-launcher.js +0 -1
- package/dist/services/browser-task/browser-task-runner.js +53 -8
- package/dist/services/observations-batch.d.ts +1 -1
- package/dist/services/observations-batch.js +2 -2
- package/dist/settings/runtime-settings.d.ts +38 -11
- package/dist/settings/runtime-settings.js +203 -40
- package/dist/settings/settings-store.js +11 -3
- package/package.json +4 -4
|
@@ -16,7 +16,7 @@ import { buildDaemonApiCliEnv } from "../daemon-api-cli.js";
|
|
|
16
16
|
import { noteNativeSkillToolIfPresent, probeCliNativeSkillSubcommand, } from "./native-skill-discovery-probe.js";
|
|
17
17
|
import { createOutputCapturePath, CliPathCache, parseJsonLine, readFileIfExists, removeFileIfExists, runLineCommand, } from "./cli-utils.js";
|
|
18
18
|
import { probeApiKeyServerSide } from "./api-key-probe.js";
|
|
19
|
-
import { assertCostWithinMaxBudget, assertPromptCostWithinMaxBudget, classifyCliFailure, } from "./cli-quota-guards.js";
|
|
19
|
+
import { assertCostWithinMaxBudget, assertPromptCostWithinMaxBudget, classifyCliFailure, recoverCliFailureSpend, } from "./cli-quota-guards.js";
|
|
20
20
|
import { auditStreamObservation, extractCodexShellCall, } from "../../safety/subprocess-block-scanner.js";
|
|
21
21
|
import { extractSilentApiErrors, logSilentApiErrors, } from "./silent-api-error-detector.js";
|
|
22
22
|
import { findRegisteredModel, getModelsForBackend, latestLiteFor, } from "./model-registry.js";
|
|
@@ -43,7 +43,7 @@ const EMPTY_USAGE = {
|
|
|
43
43
|
* The delegated path already had this guard (see the `runDelegatedTool`
|
|
44
44
|
* wiring lower in this file); the reactive path needs it explicitly
|
|
45
45
|
* because a single hung subprocess can pin a session for the full
|
|
46
|
-
* executeTimeoutMinutes wall-clock, blocking morning-routine /
|
|
46
|
+
* executeTimeoutMinutes wall-clock, blocking morning-routine / activity-scan
|
|
47
47
|
* dispatch downstream.
|
|
48
48
|
*/
|
|
49
49
|
const REACTIVE_IDLE_TIMEOUT_MS = 5 * 60 * 1000;
|
|
@@ -669,12 +669,12 @@ export class CodexCore {
|
|
|
669
669
|
// we still throw a `timeout` failure here because that matches
|
|
670
670
|
// the dispatcher's retry semantics.
|
|
671
671
|
if (idleTimedOut) {
|
|
672
|
-
const err = new BackendDecisiveFailure(this.backendId, "timeout", new Error(`Codex reactive stream went idle for ${REACTIVE_IDLE_TIMEOUT_MS}ms (no events from CLI subprocess)`));
|
|
672
|
+
const err = new BackendDecisiveFailure(this.backendId, "timeout", new Error(`Codex reactive stream went idle for ${REACTIVE_IDLE_TIMEOUT_MS}ms (no events from CLI subprocess)`), this.recoverFailureSpend(usage, actualModelId, numTurns, startMs));
|
|
673
673
|
logger.error({ err, eventType: params.eventType, model: params.modelId, durationMs: Date.now() - startMs }, "Codex execute idle-timed-out");
|
|
674
674
|
throw err;
|
|
675
675
|
}
|
|
676
676
|
if (runResult.timedOut) {
|
|
677
|
-
const err = new BackendDecisiveFailure(this.backendId, "timeout", new Error(`Codex execution exceeded timeout of ${this.config.executeTimeoutMinutes} minutes`));
|
|
677
|
+
const err = new BackendDecisiveFailure(this.backendId, "timeout", new Error(`Codex execution exceeded timeout of ${this.config.executeTimeoutMinutes} minutes`), this.recoverFailureSpend(usage, actualModelId, numTurns, startMs));
|
|
678
678
|
logger.error({ err, eventType: params.eventType, model: params.modelId, durationMs: Date.now() - startMs }, "Codex execute timed out");
|
|
679
679
|
throw err;
|
|
680
680
|
}
|
|
@@ -723,7 +723,7 @@ export class CodexCore {
|
|
|
723
723
|
?? firstFailureLine(runResult.stdoutLines);
|
|
724
724
|
if (!sawCompletion || runResult.exitCode !== 0) {
|
|
725
725
|
const failureMsg = combinedFailure ?? "Codex execution did not complete successfully.";
|
|
726
|
-
const classified = this.classifyFailure(failureMsg);
|
|
726
|
+
const classified = this.classifyFailure(failureMsg, this.recoverFailureSpend(usage, actualModelId, numTurns, startMs));
|
|
727
727
|
logger.error({ err: classified, eventType: params.eventType, model: params.modelId, exitCode: runResult.exitCode, durationMs: Date.now() - startMs }, "Codex execute failed");
|
|
728
728
|
throw classified;
|
|
729
729
|
}
|
|
@@ -943,13 +943,29 @@ export class CodexCore {
|
|
|
943
943
|
// Gemini CLI core; the logic lives in `cli-quota-guards.ts` (single source
|
|
944
944
|
// of truth) and each backend passes its own regexes / label. See that
|
|
945
945
|
// module for the full ordering rationale.
|
|
946
|
-
classifyFailure(message) {
|
|
946
|
+
classifyFailure(message, spend) {
|
|
947
947
|
return classifyCliFailure({
|
|
948
948
|
backendId: this.backendId,
|
|
949
949
|
message,
|
|
950
950
|
// OpenAI surfaces quota exhaustion as "rate limit" / "usage limit" / "quota".
|
|
951
951
|
rateLimitPattern: /rate limit|usage limit|quota/i,
|
|
952
952
|
authPattern: /unauthorized|forbidden|api key|login/i,
|
|
953
|
+
...(spend !== undefined ? { spend } : {}),
|
|
954
|
+
});
|
|
955
|
+
}
|
|
956
|
+
/**
|
|
957
|
+
* PREPASS_COST_REDUCTION_PLAN.md N1 — spend recovered from the failed
|
|
958
|
+
* run's JSONL usage so terminal errors carry what the provider already
|
|
959
|
+
* billed. Null when the stream never reported usage.
|
|
960
|
+
*/
|
|
961
|
+
recoverFailureSpend(usage, modelId, numTurns, startMs) {
|
|
962
|
+
return recoverCliFailureSpend({
|
|
963
|
+
backendId: this.backendId,
|
|
964
|
+
priceFetcher: this.priceFetcher,
|
|
965
|
+
usage,
|
|
966
|
+
modelId,
|
|
967
|
+
numTurns,
|
|
968
|
+
durationMs: Date.now() - startMs,
|
|
953
969
|
});
|
|
954
970
|
}
|
|
955
971
|
assertWithinMaxBudget(costUsd, maxBudgetUsd, modelId, spend) {
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PREPASS_COST_REDUCTION_PLAN.md N1 — shared post-hoc failure-spend
|
|
3
|
+
* recording.
|
|
4
|
+
*
|
|
5
|
+
* A backend attempt that fails (or is budget-killed) after the provider
|
|
6
|
+
* has already billed must still land in `agent_actions`, or the cost
|
|
7
|
+
* dials under-report by the size of every failed turn. Two layers need
|
|
8
|
+
* the same write:
|
|
9
|
+
*
|
|
10
|
+
* - `DispatcherErrorRouter.handleError` — the throw path, where a
|
|
11
|
+
* `BackendRouterHandledError` is unwrapped into its per-backend
|
|
12
|
+
* failures (main + fallback can both have billed);
|
|
13
|
+
* - `BackendRouter.executeFallbackCore` — the fallback-SUCCESS path,
|
|
14
|
+
* where the dispatcher's error path never runs and this module is
|
|
15
|
+
* the only place the main attempt's spend can be recorded (and the
|
|
16
|
+
* raw-fallback-error rethrow, which bypasses the dispatcher's
|
|
17
|
+
* unwrap because the thrown error is not a router wrap).
|
|
18
|
+
*
|
|
19
|
+
* Tagging convention: quota errors keep the spend payload's own
|
|
20
|
+
* `costSource` (`sdk` for Codex/Gemini post-hoc asserts, `sdk_partial`
|
|
21
|
+
* for Claude budget aborts); non-quota decisive failures are tagged
|
|
22
|
+
* `cost_source='post_hoc_error'` so failure-spend rows are queryable
|
|
23
|
+
* as a class.
|
|
24
|
+
*/
|
|
25
|
+
import type Database from "better-sqlite3";
|
|
26
|
+
import type { BackendId } from "@aitne/shared";
|
|
27
|
+
import { type BackendQuotaSpend } from "../agent-core.js";
|
|
28
|
+
/**
|
|
29
|
+
* One recordable failure-spend: which backend billed it, the recovered
|
|
30
|
+
* payload, and the `cost_source` tag the audit row should carry.
|
|
31
|
+
*/
|
|
32
|
+
export interface FailureSpendInfo {
|
|
33
|
+
backendId: BackendId;
|
|
34
|
+
spend: BackendQuotaSpend;
|
|
35
|
+
costSource: string | null;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Recover the recordable spend from one backend failure signal.
|
|
39
|
+
* Handles the nested `BackendDecisiveFailure(kind="quota",
|
|
40
|
+
* cause=BackendQuotaError)` wrap the router produces. Returns `null`
|
|
41
|
+
* when the failure carries no spend (nothing billed, or the
|
|
42
|
+
* SDK/CLI surfaced no usage before dying).
|
|
43
|
+
*/
|
|
44
|
+
export declare function extractFailureSpendInfo(failure: unknown): FailureSpendInfo | null;
|
|
45
|
+
/**
|
|
46
|
+
* Write a `result='failed'` agent_actions row carrying the actual spend
|
|
47
|
+
* for a turn the backend completed (or partially ran) before failing.
|
|
48
|
+
* One row per distinct billed backend attempt — a fallback-success run
|
|
49
|
+
* gets a `failed` row for the main attempt next to the ResultProcessor's
|
|
50
|
+
* `success` row for the fallback.
|
|
51
|
+
*
|
|
52
|
+
* Best-effort: a logging failure must not mask the original control
|
|
53
|
+
* flow — we catch and warn instead of rethrowing.
|
|
54
|
+
*/
|
|
55
|
+
export declare function recordFailureSpendRow(db: Database.Database, event: {
|
|
56
|
+
correlationId: string;
|
|
57
|
+
type: string;
|
|
58
|
+
}, spendInfo: FailureSpendInfo, errorMessage: string): void;
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PREPASS_COST_REDUCTION_PLAN.md N1 — shared post-hoc failure-spend
|
|
3
|
+
* recording.
|
|
4
|
+
*
|
|
5
|
+
* A backend attempt that fails (or is budget-killed) after the provider
|
|
6
|
+
* has already billed must still land in `agent_actions`, or the cost
|
|
7
|
+
* dials under-report by the size of every failed turn. Two layers need
|
|
8
|
+
* the same write:
|
|
9
|
+
*
|
|
10
|
+
* - `DispatcherErrorRouter.handleError` — the throw path, where a
|
|
11
|
+
* `BackendRouterHandledError` is unwrapped into its per-backend
|
|
12
|
+
* failures (main + fallback can both have billed);
|
|
13
|
+
* - `BackendRouter.executeFallbackCore` — the fallback-SUCCESS path,
|
|
14
|
+
* where the dispatcher's error path never runs and this module is
|
|
15
|
+
* the only place the main attempt's spend can be recorded (and the
|
|
16
|
+
* raw-fallback-error rethrow, which bypasses the dispatcher's
|
|
17
|
+
* unwrap because the thrown error is not a router wrap).
|
|
18
|
+
*
|
|
19
|
+
* Tagging convention: quota errors keep the spend payload's own
|
|
20
|
+
* `costSource` (`sdk` for Codex/Gemini post-hoc asserts, `sdk_partial`
|
|
21
|
+
* for Claude budget aborts); non-quota decisive failures are tagged
|
|
22
|
+
* `cost_source='post_hoc_error'` so failure-spend rows are queryable
|
|
23
|
+
* as a class.
|
|
24
|
+
*/
|
|
25
|
+
import { BackendDecisiveFailure, BackendQuotaError, } from "../agent-core.js";
|
|
26
|
+
import { createLogger } from "../../logging.js";
|
|
27
|
+
const logger = createLogger("failure-spend");
|
|
28
|
+
/**
|
|
29
|
+
* Recover the recordable spend from one backend failure signal.
|
|
30
|
+
* Handles the nested `BackendDecisiveFailure(kind="quota",
|
|
31
|
+
* cause=BackendQuotaError)` wrap the router produces. Returns `null`
|
|
32
|
+
* when the failure carries no spend (nothing billed, or the
|
|
33
|
+
* SDK/CLI surfaced no usage before dying).
|
|
34
|
+
*/
|
|
35
|
+
export function extractFailureSpendInfo(failure) {
|
|
36
|
+
const quota = failure instanceof BackendQuotaError
|
|
37
|
+
? failure
|
|
38
|
+
: failure instanceof BackendDecisiveFailure
|
|
39
|
+
&& failure.kind === "quota"
|
|
40
|
+
&& failure.cause instanceof BackendQuotaError
|
|
41
|
+
? failure.cause
|
|
42
|
+
: null;
|
|
43
|
+
if (quota?.spend) {
|
|
44
|
+
return {
|
|
45
|
+
backendId: quota.backendId,
|
|
46
|
+
spend: quota.spend,
|
|
47
|
+
costSource: quota.spend.costSource ?? null,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
if (failure instanceof BackendDecisiveFailure && failure.spend) {
|
|
51
|
+
return {
|
|
52
|
+
backendId: failure.backendId,
|
|
53
|
+
spend: failure.spend,
|
|
54
|
+
costSource: "post_hoc_error",
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Per-DB memo of whether `agent_actions` carries the migration-added
|
|
61
|
+
* cache-token columns. Pre-migration databases (the `AuditLogger`
|
|
62
|
+
* guards the same way) must not make the whole best-effort INSERT
|
|
63
|
+
* fail just because the optional columns are absent.
|
|
64
|
+
*/
|
|
65
|
+
const cacheColumnSupport = new WeakMap();
|
|
66
|
+
function hasCacheTokenColumns(db) {
|
|
67
|
+
const cached = cacheColumnSupport.get(db);
|
|
68
|
+
if (cached !== undefined)
|
|
69
|
+
return cached;
|
|
70
|
+
let supported = false;
|
|
71
|
+
try {
|
|
72
|
+
const columns = db
|
|
73
|
+
.prepare("PRAGMA table_info(agent_actions)")
|
|
74
|
+
.all();
|
|
75
|
+
const names = new Set(columns.map((c) => c.name));
|
|
76
|
+
supported = names.has("cache_creation_tokens") && names.has("cache_read_tokens");
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
supported = false;
|
|
80
|
+
}
|
|
81
|
+
cacheColumnSupport.set(db, supported);
|
|
82
|
+
return supported;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Write a `result='failed'` agent_actions row carrying the actual spend
|
|
86
|
+
* for a turn the backend completed (or partially ran) before failing.
|
|
87
|
+
* One row per distinct billed backend attempt — a fallback-success run
|
|
88
|
+
* gets a `failed` row for the main attempt next to the ResultProcessor's
|
|
89
|
+
* `success` row for the fallback.
|
|
90
|
+
*
|
|
91
|
+
* Best-effort: a logging failure must not mask the original control
|
|
92
|
+
* flow — we catch and warn instead of rethrowing.
|
|
93
|
+
*/
|
|
94
|
+
export function recordFailureSpendRow(db, event, spendInfo, errorMessage) {
|
|
95
|
+
const { spend } = spendInfo;
|
|
96
|
+
try {
|
|
97
|
+
const columns = [
|
|
98
|
+
"event_id",
|
|
99
|
+
"action_type",
|
|
100
|
+
"model_used",
|
|
101
|
+
"cost_usd",
|
|
102
|
+
"tokens_input",
|
|
103
|
+
"tokens_output",
|
|
104
|
+
"duration_ms",
|
|
105
|
+
"num_turns",
|
|
106
|
+
"result",
|
|
107
|
+
"backend",
|
|
108
|
+
"cost_source",
|
|
109
|
+
"error",
|
|
110
|
+
"completed_at",
|
|
111
|
+
];
|
|
112
|
+
const values = [
|
|
113
|
+
event.correlationId,
|
|
114
|
+
event.type,
|
|
115
|
+
spend.modelId,
|
|
116
|
+
spend.costUsd,
|
|
117
|
+
spend.usage.inputTokens,
|
|
118
|
+
spend.usage.outputTokens,
|
|
119
|
+
spend.durationMs,
|
|
120
|
+
spend.numTurns,
|
|
121
|
+
"failed",
|
|
122
|
+
spendInfo.backendId,
|
|
123
|
+
spendInfo.costSource,
|
|
124
|
+
errorMessage.slice(0, 4096),
|
|
125
|
+
new Date().toISOString(),
|
|
126
|
+
];
|
|
127
|
+
if (hasCacheTokenColumns(db)) {
|
|
128
|
+
columns.splice(6, 0, "cache_creation_tokens", "cache_read_tokens");
|
|
129
|
+
values.splice(6, 0, spend.usage.cacheCreationInputTokens, spend.usage.cacheReadInputTokens);
|
|
130
|
+
}
|
|
131
|
+
const placeholders = columns.map(() => "?").join(", ");
|
|
132
|
+
db.prepare(`INSERT INTO agent_actions (${columns.join(", ")}) VALUES (${placeholders})`).run(...values);
|
|
133
|
+
}
|
|
134
|
+
catch (err) {
|
|
135
|
+
logger.warn({ err, eventType: event.type, backendId: spendInfo.backendId }, "Failed to record post-hoc failure spend in agent_actions");
|
|
136
|
+
}
|
|
137
|
+
}
|
|
@@ -300,6 +300,12 @@ export declare class GeminiCliCore implements IAgentCore {
|
|
|
300
300
|
*/
|
|
301
301
|
private incrementRequestsCount;
|
|
302
302
|
private classifyFailure;
|
|
303
|
+
/**
|
|
304
|
+
* PREPASS_COST_REDUCTION_PLAN.md N1 — spend recovered from the failed
|
|
305
|
+
* run's JSONL stats so terminal errors carry what the provider already
|
|
306
|
+
* billed. Null when the stream never reported usage.
|
|
307
|
+
*/
|
|
308
|
+
private recoverFailureSpend;
|
|
303
309
|
private assertWithinMaxBudget;
|
|
304
310
|
private assertPromptWithinMaxBudget;
|
|
305
311
|
/**
|
|
@@ -19,7 +19,7 @@ import { ALWAYS_DISALLOWED_TOOLS } from "../../safety/always-disallowed.js";
|
|
|
19
19
|
import { CliPathCache, parseJsonLine, runLineCommand, } from "./cli-utils.js";
|
|
20
20
|
import { isPathInsideOrEqual, jsonStringPathForms, shellPathForms, } from "../path-compat.js";
|
|
21
21
|
import { probeApiKeyServerSide } from "./api-key-probe.js";
|
|
22
|
-
import { assertCostWithinMaxBudget, assertPromptCostWithinMaxBudget, classifyCliFailure, } from "./cli-quota-guards.js";
|
|
22
|
+
import { assertCostWithinMaxBudget, assertPromptCostWithinMaxBudget, classifyCliFailure, recoverCliFailureSpend, } from "./cli-quota-guards.js";
|
|
23
23
|
import { buildAgentDayBoundaryHint } from "./quota-reset-hints.js";
|
|
24
24
|
import { auditStreamObservation, extractGeminiToolUseTarget, } from "../../safety/subprocess-block-scanner.js";
|
|
25
25
|
import { extractSilentApiErrors, logSilentApiErrors, } from "./silent-api-error-detector.js";
|
|
@@ -783,7 +783,7 @@ export class GeminiCliCore {
|
|
|
783
783
|
// the wall-clock case, but the audit trail and operator alert
|
|
784
784
|
// call out the idle hang specifically.
|
|
785
785
|
if (maxTurnsExceeded) {
|
|
786
|
-
const err = new BackendDecisiveFailure(this.backendId, "max_turns", new Error(`Gemini execution exceeded max-turns cap of ${maxTurns} (observed ${toolCallCount} tool calls).`));
|
|
786
|
+
const err = new BackendDecisiveFailure(this.backendId, "max_turns", new Error(`Gemini execution exceeded max-turns cap of ${maxTurns} (observed ${toolCallCount} tool calls).`), this.recoverFailureSpend(stats, params.modelId, toolCallCount, startMs));
|
|
787
787
|
logger.error({
|
|
788
788
|
err,
|
|
789
789
|
eventType: params.eventType,
|
|
@@ -795,12 +795,12 @@ export class GeminiCliCore {
|
|
|
795
795
|
throw err;
|
|
796
796
|
}
|
|
797
797
|
if (idleTimedOut) {
|
|
798
|
-
const err = new BackendDecisiveFailure(this.backendId, "timeout", new Error(`Gemini reactive stream went idle for ${REACTIVE_IDLE_TIMEOUT_MS}ms (no events from CLI subprocess)`));
|
|
798
|
+
const err = new BackendDecisiveFailure(this.backendId, "timeout", new Error(`Gemini reactive stream went idle for ${REACTIVE_IDLE_TIMEOUT_MS}ms (no events from CLI subprocess)`), this.recoverFailureSpend(stats, params.modelId, toolCallCount, startMs));
|
|
799
799
|
logger.error({ err, eventType: params.eventType, model: params.modelId, durationMs: Date.now() - startMs }, "Gemini execute idle-timed-out");
|
|
800
800
|
throw err;
|
|
801
801
|
}
|
|
802
802
|
if (runResult.timedOut) {
|
|
803
|
-
const err = new BackendDecisiveFailure(this.backendId, "timeout", new Error(`Gemini execution exceeded timeout of ${this.config.executeTimeoutMinutes} minutes`));
|
|
803
|
+
const err = new BackendDecisiveFailure(this.backendId, "timeout", new Error(`Gemini execution exceeded timeout of ${this.config.executeTimeoutMinutes} minutes`), this.recoverFailureSpend(stats, params.modelId, toolCallCount, startMs));
|
|
804
804
|
logger.error({ err, eventType: params.eventType, model: params.modelId, durationMs: Date.now() - startMs }, "Gemini execute timed out");
|
|
805
805
|
throw err;
|
|
806
806
|
}
|
|
@@ -813,7 +813,7 @@ export class GeminiCliCore {
|
|
|
813
813
|
?? firstFailureLine(runResult.stdoutLines)
|
|
814
814
|
?? firstFailureLine(runResult.stderrLines)
|
|
815
815
|
?? "Gemini execution did not complete successfully.";
|
|
816
|
-
const classified = this.classifyFailure(failureText);
|
|
816
|
+
const classified = this.classifyFailure(failureText, this.recoverFailureSpend(stats, params.modelId, toolCallCount, startMs));
|
|
817
817
|
logger.error({ err: classified, eventType: params.eventType, model: params.modelId, exitCode: runResult.exitCode, durationMs: Date.now() - startMs }, "Gemini execute failed");
|
|
818
818
|
throw classified;
|
|
819
819
|
}
|
|
@@ -1703,7 +1703,7 @@ ${fetchClause}`;
|
|
|
1703
1703
|
// Codex core; the logic lives in `cli-quota-guards.ts` (single source of
|
|
1704
1704
|
// truth) and each backend passes its own regexes / label. Gemini adds a
|
|
1705
1705
|
// pre-auth policy-deny branch via `classifyGeminiPolicyDeny`.
|
|
1706
|
-
classifyFailure(message) {
|
|
1706
|
+
classifyFailure(message, spend) {
|
|
1707
1707
|
return classifyCliFailure({
|
|
1708
1708
|
backendId: this.backendId,
|
|
1709
1709
|
message,
|
|
@@ -1711,6 +1711,25 @@ ${fetchClause}`;
|
|
|
1711
1711
|
rateLimitPattern: /rate limit|quota|429/i,
|
|
1712
1712
|
authPattern: /authentication page|oauth|api key|login|required/i,
|
|
1713
1713
|
extraClassifier: classifyGeminiPolicyDeny,
|
|
1714
|
+
...(spend !== undefined ? { spend } : {}),
|
|
1715
|
+
});
|
|
1716
|
+
}
|
|
1717
|
+
/**
|
|
1718
|
+
* PREPASS_COST_REDUCTION_PLAN.md N1 — spend recovered from the failed
|
|
1719
|
+
* run's JSONL stats so terminal errors carry what the provider already
|
|
1720
|
+
* billed. Null when the stream never reported usage.
|
|
1721
|
+
*/
|
|
1722
|
+
recoverFailureSpend(stats, modelId, toolCallCount, startMs) {
|
|
1723
|
+
return recoverCliFailureSpend({
|
|
1724
|
+
backendId: this.backendId,
|
|
1725
|
+
priceFetcher: this.priceFetcher,
|
|
1726
|
+
usage: normalizeGeminiUsage(stats),
|
|
1727
|
+
modelId,
|
|
1728
|
+
// Matches the success branch / budget-assert formula
|
|
1729
|
+
// (`toolCallCount + 1`) so a failed and a successful run with the
|
|
1730
|
+
// same tool fan-out report the same turn count.
|
|
1731
|
+
numTurns: toolCallCount + 1,
|
|
1732
|
+
durationMs: Date.now() - startMs,
|
|
1714
1733
|
});
|
|
1715
1734
|
}
|
|
1716
1735
|
assertWithinMaxBudget(costUsd, maxBudgetUsd, modelId, spend) {
|
|
@@ -6,7 +6,7 @@ import type { BackendId, BackendModel, BackendModelTier, BackendUsage } from "@a
|
|
|
6
6
|
*
|
|
7
7
|
* Tier semantics (see `BackendModelTier` in shared/backend.ts):
|
|
8
8
|
* - `LITE` — delegated proxy + observer-fired short-shape tasks.
|
|
9
|
-
* - `MEDIUM` — main agent surfaces (DM, routines,
|
|
9
|
+
* - `MEDIUM` — main agent surfaces (DM, routines, activity scan, reviews).
|
|
10
10
|
* - `HIGH` — heavy reasoning (advisor, knowledge import, generative one-shots).
|
|
11
11
|
*
|
|
12
12
|
* Sources of truth for the alias → API ID mapping:
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*
|
|
6
6
|
* Tier semantics (see `BackendModelTier` in shared/backend.ts):
|
|
7
7
|
* - `LITE` — delegated proxy + observer-fired short-shape tasks.
|
|
8
|
-
* - `MEDIUM` — main agent surfaces (DM, routines,
|
|
8
|
+
* - `MEDIUM` — main agent surfaces (DM, routines, activity scan, reviews).
|
|
9
9
|
* - `HIGH` — heavy reasoning (advisor, knowledge import, generative one-shots).
|
|
10
10
|
*
|
|
11
11
|
* Sources of truth for the alias → API ID mapping:
|
|
@@ -18,7 +18,7 @@ export const DEFAULT_CLAUDE_LITE_MODEL = "claude-haiku-4-5-20251001";
|
|
|
18
18
|
export const DEFAULT_CLAUDE_MEDIUM_MODEL = "claude-sonnet-4-6";
|
|
19
19
|
export const DEFAULT_CLAUDE_HIGH_MODEL = "claude-opus-4-8";
|
|
20
20
|
// Codex (gpt-5.4-mini is the lite-tier pick; gpt-5.4 is the medium-tier
|
|
21
|
-
// default — morning_routine /
|
|
21
|
+
// default — morning_routine / activity_scan / evening_review run on this.
|
|
22
22
|
// gpt-5.5 and gpt-5.5-chat-latest are the flagship reasoning models and
|
|
23
23
|
// stay registered at high tier as selectable opt-ins, but the seeded
|
|
24
24
|
// default for Codex's high tier is also gpt-5.4: gpt-5.5 carries Opus-
|
|
@@ -34,7 +34,7 @@ export const DEFAULT_CODEX_HIGH_MODEL = "gpt-5.4";
|
|
|
34
34
|
// medium-tier flash equivalent left. `gemini-3.1-pro-preview` is priced in
|
|
35
35
|
// the Sonnet band — $0.002/$0.012 per 1k in/out under 200k context, ~2/3
|
|
36
36
|
// the cost of Sonnet 4.6 — so it serves as the "main agent work" pick for
|
|
37
|
-
// every medium-tier surface (morning_routine, message.dm,
|
|
37
|
+
// every medium-tier surface (morning_routine, message.dm, activity_scan,
|
|
38
38
|
// evening / weekly / monthly review). High tier collapses onto the same
|
|
39
39
|
// model via `SEED_HIGH_TIER_OVERRIDE` below: there's no Opus-priced Google
|
|
40
40
|
// flagship worth defaulting to, and operators who genuinely want
|
|
@@ -274,7 +274,7 @@ const MODEL_REGISTRY = [
|
|
|
274
274
|
displayName: "GPT-5.4",
|
|
275
275
|
// Promoted from `high` to `medium` after gpt-5.5 shipped: gpt-5.4 is
|
|
276
276
|
// priced ~half of gpt-5.5 for the same context shapes, and the
|
|
277
|
-
// medium-tier routines (morning_routine /
|
|
277
|
+
// medium-tier routines (morning_routine / activity_scan /
|
|
278
278
|
// evening_review / message.dm) do not benefit from gpt-5.5's
|
|
279
279
|
// deeper reasoning enough to justify the cost. gpt-5.4 is ALSO the
|
|
280
280
|
// seeded default for codex's high tier via `SEED_HIGH_TIER_OVERRIDE`
|
|
@@ -289,7 +289,7 @@ export declare function auditOpencodeTools(tools: ReturnType<typeof extractToolU
|
|
|
289
289
|
sessionId: number | null;
|
|
290
290
|
}): void;
|
|
291
291
|
/**
|
|
292
|
-
* docs/design/appendices/opencode-backend.md Phase 4 — Stage 2
|
|
292
|
+
* docs/design/appendices/opencode-backend.md Phase 4 — Stage 2 activity-scan triage
|
|
293
293
|
* schema. Mirrors the `parseStage2Verdict` text contract
|
|
294
294
|
* (`dispatcher-types.ts`): the agent must return exactly
|
|
295
295
|
* `{ "action": "log_only" | "escalate", "reason": string }`. Opencode
|
|
@@ -609,7 +609,7 @@ export class OpencodeCore {
|
|
|
609
609
|
const renderedPrompt = isResume
|
|
610
610
|
? params.prompt
|
|
611
611
|
: buildExecutionPrompt(params.prompt, params.context, params.event, params.conversationHistory);
|
|
612
|
-
// docs/design/appendices/opencode-backend.md §4 / Phase 4 — `routine.
|
|
612
|
+
// docs/design/appendices/opencode-backend.md §4 / Phase 4 — `routine.activity_scan.triage`
|
|
613
613
|
// returns a strict JSON envelope (`{ "action": "log_only" |
|
|
614
614
|
// "escalate", … }`) parsed by `parseStage2Verdict`. opencode's
|
|
615
615
|
// `format: { type: "json_schema", … }` honours the schema with
|
|
@@ -767,7 +767,7 @@ export class OpencodeCore {
|
|
|
767
767
|
const durationMs = Date.now() - startMs;
|
|
768
768
|
const stopReason = assistantMessage?.finish ?? null;
|
|
769
769
|
// Best-effort cleanup of the on-disk opencode session row when we
|
|
770
|
-
// own the workdir — keeps disk usage bounded under
|
|
770
|
+
// own the workdir — keeps disk usage bounded under activity_scan.
|
|
771
771
|
// On resume we must never delete: the dispatcher's next turn
|
|
772
772
|
// needs the same session id to resolve to a live server-side
|
|
773
773
|
// history. (`ownsSessionDir` is already false when sessionDir is
|
|
@@ -1466,7 +1466,7 @@ function classifyStreamError(payload, backendId) {
|
|
|
1466
1466
|
return new BackendDecisiveFailure(backendId, "other_non_retryable", new Error(message));
|
|
1467
1467
|
}
|
|
1468
1468
|
/**
|
|
1469
|
-
* docs/design/appendices/opencode-backend.md Phase 4 — Stage 2
|
|
1469
|
+
* docs/design/appendices/opencode-backend.md Phase 4 — Stage 2 activity-scan triage
|
|
1470
1470
|
* schema. Mirrors the `parseStage2Verdict` text contract
|
|
1471
1471
|
* (`dispatcher-types.ts`): the agent must return exactly
|
|
1472
1472
|
* `{ "action": "log_only" | "escalate", "reason": string }`. Opencode
|
|
@@ -1493,11 +1493,11 @@ export const STAGE2_TRIAGE_JSON_SCHEMA = {
|
|
|
1493
1493
|
/**
|
|
1494
1494
|
* Returns the opencode `format` envelope to apply when a given process
|
|
1495
1495
|
* key has a strict structured-output contract; null otherwise. v1
|
|
1496
|
-
* covers `routine.
|
|
1496
|
+
* covers `routine.activity_scan.triage`; future strict-JSON process
|
|
1497
1497
|
* keys (e.g. delegated classifiers) extend this map.
|
|
1498
1498
|
*/
|
|
1499
1499
|
function formatForProcessKey(processKey) {
|
|
1500
|
-
if (processKey === "routine.
|
|
1500
|
+
if (processKey === "routine.activity_scan.triage") {
|
|
1501
1501
|
return {
|
|
1502
1502
|
type: "json_schema",
|
|
1503
1503
|
schema: STAGE2_TRIAGE_JSON_SCHEMA,
|
|
@@ -222,27 +222,44 @@ const ENVELOPE_OVERRIDES_BY_PROCESS_KEY = {
|
|
|
222
222
|
// cost-reduction-structural §B — Stage 2 lite-tier triage. Strict
|
|
223
223
|
// JSON-only output (~2K input / ~50 output) decides log_only vs
|
|
224
224
|
// escalate. 1 turn / $0.05 mirrors observation.summarize.
|
|
225
|
-
"routine.
|
|
225
|
+
"routine.activity_scan.triage": { maxTurns: 1, maxBudgetUsd: 0.05 },
|
|
226
226
|
// docs/design/appendices/routine-data-acquisition.md §6.2 / §6.9 pre-pass fetcher.
|
|
227
227
|
// The lite-tier nominal ($0.20) under-provisioned the morning fan-out
|
|
228
228
|
// (2 mail providers × N accounts + calendar + notion) and tripped
|
|
229
229
|
// BackendQuotaError(max_budget_usd) mid-fetch — widened to $0.50 so
|
|
230
230
|
// the cap still binds well before runaway but accommodates the real
|
|
231
|
-
// worst-case fan-out.
|
|
232
|
-
//
|
|
233
|
-
|
|
231
|
+
// worst-case fan-out. Keep in lock-step with the corresponding
|
|
232
|
+
// schema-seed row.
|
|
233
|
+
//
|
|
234
|
+
// maxTurns 20 → 10 (PREPASS_COST_REDUCTION_PLAN.md N4, 2026-06-10):
|
|
235
|
+
// measured over 502 fetch_window runs on a live install, num_turns
|
|
236
|
+
// P50=3 / P95=6 / P99=8 / max=11 (avg 3.08). The per-integration
|
|
237
|
+
// fan-out means each session handles ONE partial, so the original
|
|
238
|
+
// "~6 partials × 3 tool calls" sizing no longer applies. 10 bounds
|
|
239
|
+
// budget-cap wander (a stuck session now stops at half the previous
|
|
240
|
+
// exploration depth) while clearing P99 with 2 turns of headroom.
|
|
241
|
+
"routine.fetch_window": { maxTurns: 10, maxBudgetUsd: 0.5 },
|
|
234
242
|
// BROWSER_HISTORY_INTEGRATION_PLAN P3 — keep these in lock-step with
|
|
235
|
-
// the schema seed rows
|
|
236
|
-
//
|
|
237
|
-
//
|
|
238
|
-
//
|
|
239
|
-
//
|
|
240
|
-
//
|
|
241
|
-
//
|
|
242
|
-
//
|
|
243
|
-
//
|
|
244
|
-
|
|
245
|
-
|
|
243
|
+
// the schema seed rows (research_offer_dm has NO seed row — this
|
|
244
|
+
// entry is its only default, materialized on main-backend switch or
|
|
245
|
+
// Reset).
|
|
246
|
+
//
|
|
247
|
+
// cluster_update / offer_dm budgets are STOP-LOSSES sized to cover
|
|
248
|
+
// one cold-prompt-cache run, not per-run cost targets
|
|
249
|
+
// (RESEARCH_CLUSTER_COST_FIX_PLAN.md RC2/F3): the SDK budget check
|
|
250
|
+
// only fires between turns, and a cold run writes the full session
|
|
251
|
+
// prefix to prompt cache (~$0.13-0.30 observed on Haiku) before the
|
|
252
|
+
// check can abort — the original floor values ($0.05/$0.02) killed
|
|
253
|
+
// every cold run AFTER the money was spent and the journal was never
|
|
254
|
+
// written. With the F1 per-agent-day enqueue stamp, cluster_update
|
|
255
|
+
// runs at most once per cluster per day, so $0.50 bounds daily spend
|
|
256
|
+
// per cluster. Bumped for upgrading installs by migration 0012.
|
|
257
|
+
// research_dispatch carries the WebFetch fan-out; mirrors
|
|
258
|
+
// evening_review (50/$1.00). research_wiki_summary is tighter
|
|
259
|
+
// (30/$0.50) — it reads the cluster journal the agent already wrote
|
|
260
|
+
// and composes from it, with bounded external work.
|
|
261
|
+
"routine.research_cluster_update": { maxTurns: 5, maxBudgetUsd: 0.5 },
|
|
262
|
+
"routine.research_offer_dm": { maxTurns: 5, maxBudgetUsd: 0.15 },
|
|
246
263
|
"routine.research_dispatch": { maxTurns: 50, maxBudgetUsd: 1.0 },
|
|
247
264
|
"routine.research_wiki_summary": { maxTurns: 30, maxBudgetUsd: 0.5 },
|
|
248
265
|
// BROWSER_TASK_REDESIGN_PLAN.md §5 — open-ended browser sub-agent.
|
|
@@ -254,6 +271,13 @@ const ENVELOPE_OVERRIDES_BY_PROCESS_KEY = {
|
|
|
254
271
|
// cost without tripping BackendQuotaError. Lock-step with the
|
|
255
272
|
// schema-seed row.
|
|
256
273
|
"browser_task": { maxTurns: 30, maxBudgetUsd: 1.0 },
|
|
274
|
+
// BACKGROUND_TASK_RUNNER_DESIGN.md §6 — generic detached worker. The
|
|
275
|
+
// medium-tier nominal (50/$1.00) is too tight for long-running research
|
|
276
|
+
// / multi-repo audits, so the seed picks 40 turns / $2.00 (the
|
|
277
|
+
// medium-tier base in background-task-budget.ts). Kept in lock-step
|
|
278
|
+
// with the schema-seed row so a force=true backend-switch reset
|
|
279
|
+
// preserves this envelope instead of clobbering it to the tier default.
|
|
280
|
+
"background_task": { maxTurns: 40, maxBudgetUsd: 2.0 },
|
|
257
281
|
};
|
|
258
282
|
/**
|
|
259
283
|
* Resolve the seed `(model, envelope)` for a configurable process key on
|
|
@@ -88,7 +88,9 @@ function makeCostBackendCommand(backend) {
|
|
|
88
88
|
return {
|
|
89
89
|
name: `!cost ${backend}`,
|
|
90
90
|
title: `Cost for ${backend}`,
|
|
91
|
-
|
|
91
|
+
// Short on purpose — one of these renders per registered backend in
|
|
92
|
+
// `!help`, and the whole list must fit MOBILE_REPLY_BUDGET.
|
|
93
|
+
describe: `Agent spend on ${backend} (7 days).`,
|
|
92
94
|
details: [
|
|
93
95
|
`Reports trailing 7-day spend for ${backend}.`,
|
|
94
96
|
"Does not invoke an LLM.",
|
|
@@ -18,7 +18,7 @@ const REPORT_SQL = `
|
|
|
18
18
|
MIN(started_at) AS first_seen,
|
|
19
19
|
MAX(started_at) AS last_seen
|
|
20
20
|
FROM agent_actions
|
|
21
|
-
WHERE result
|
|
21
|
+
WHERE result IN ('failed', 'partial')
|
|
22
22
|
AND started_at >= datetime('now', '-7 days')
|
|
23
23
|
AND error IS NOT NULL
|
|
24
24
|
GROUP BY backend, action_type
|
|
@@ -27,7 +27,7 @@ const REPORT_SQL = `
|
|
|
27
27
|
ON latest.action_type = g.action_type
|
|
28
28
|
AND COALESCE(latest.backend, 'claude') = COALESCE(g.backend, 'claude')
|
|
29
29
|
AND latest.started_at = g.last_seen
|
|
30
|
-
AND latest.result
|
|
30
|
+
AND latest.result IN ('failed', 'partial')
|
|
31
31
|
AND latest.error IS NOT NULL
|
|
32
32
|
GROUP BY g.backend, g.action_type
|
|
33
33
|
ORDER BY g.n DESC, g.last_seen DESC
|
|
@@ -42,7 +42,7 @@ function queryReport(db) {
|
|
|
42
42
|
COUNT(DISTINCT COALESCE(backend, 'claude') || '|' || action_type) AS groups,
|
|
43
43
|
COUNT(*) AS total
|
|
44
44
|
FROM agent_actions
|
|
45
|
-
WHERE result
|
|
45
|
+
WHERE result IN ('failed', 'partial')
|
|
46
46
|
AND started_at >= datetime('now', '-7 days')
|
|
47
47
|
AND error IS NOT NULL`)
|
|
48
48
|
.get();
|
|
@@ -94,6 +94,7 @@ export const reportCommand = {
|
|
|
94
94
|
describe: "Agent errors over the past 7 days.",
|
|
95
95
|
details: [
|
|
96
96
|
"Summarizes recent failed agent actions by action type and backend.",
|
|
97
|
+
"Includes partial runs that failed a post-run outcome check.",
|
|
97
98
|
"Samples are redacted before sending to messaging surfaces.",
|
|
98
99
|
"Does not invoke an LLM.",
|
|
99
100
|
],
|
|
@@ -242,7 +242,10 @@ async function handleRename(ctx, slug, newName) {
|
|
|
242
242
|
export const researchCommand = {
|
|
243
243
|
prefix: "!research",
|
|
244
244
|
title: "Research clusters",
|
|
245
|
-
|
|
245
|
+
// Keep this one-liner short: `!help` renders every registered describe
|
|
246
|
+
// inside the 1500-char MOBILE_REPLY_BUDGET, and the subcommand catalogue
|
|
247
|
+
// already lives in `details` below (and in the `!research` reply itself).
|
|
248
|
+
describe: "manage browser-history research clusters",
|
|
246
249
|
details: [
|
|
247
250
|
"Subcommands:",
|
|
248
251
|
"- `!research` — list active clusters",
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `!revert tuning` — undo the most recent applied self-tuning config change
|
|
3
|
+
* (SELF_TUNING_REVIEW_CYCLE_DESIGN.md §3.4, Phase 3).
|
|
4
|
+
*
|
|
5
|
+
* The owner-side escape hatch for the Autonomous-plus-DM actuation posture
|
|
6
|
+
* (D1): every applied change DMs "Reply `!revert tuning` to undo", and this
|
|
7
|
+
* command is that reply. It restores the ledger's `prev` value through the
|
|
8
|
+
* same `applyConfigUpdates` chokepoint the actuator used, stamps
|
|
9
|
+
* `reverted_at` (which puts the key into the 28-day re-proposal cool-down),
|
|
10
|
+
* audits `self_tuning.reverted`, and records an explicit-correction
|
|
11
|
+
* feedback signal so the lesson loop learns from the owner's override.
|
|
12
|
+
*
|
|
13
|
+
* `runsWhilePaused: true` — a pure DB/config write with no LLM dispatch,
|
|
14
|
+
* and the owner may well have paused the agent *because* of a bad tuning
|
|
15
|
+
* change; the undo must not be locked behind `!start`.
|
|
16
|
+
*/
|
|
17
|
+
import { type BangPrefixCommand } from "./registry.js";
|
|
18
|
+
export declare const revertTuningCommand: BangPrefixCommand;
|