@aitne/daemon 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/adapter-watchdog.d.ts +70 -0
- package/dist/adapters/adapter-watchdog.js +115 -0
- package/dist/adapters/discord.d.ts +17 -1
- package/dist/adapters/discord.js +33 -0
- package/dist/adapters/notification-manager.d.ts +27 -1
- package/dist/adapters/notification-manager.js +54 -39
- package/dist/adapters/slack-adapter.d.ts +26 -1
- package/dist/adapters/slack-adapter.js +41 -0
- package/dist/adapters/telegram-adapter.d.ts +18 -1
- package/dist/adapters/telegram-adapter.js +41 -2
- package/dist/adapters/types.d.ts +20 -0
- package/dist/adapters/whatsapp-adapter.d.ts +26 -7
- package/dist/adapters/whatsapp-adapter.js +74 -21
- package/dist/api/env-writer.js +8 -5
- package/dist/api/helpers/agent-errors-registry.d.ts +5 -5
- package/dist/api/helpers/agent-errors-registry.js +5 -5
- package/dist/api/routes/agent.js +33 -12
- package/dist/api/routes/agents/index.js +75 -16
- package/dist/api/routes/agents/views.d.ts +37 -2
- package/dist/api/routes/agents/views.js +64 -2
- package/dist/api/routes/background-task.d.ts +22 -0
- package/dist/api/routes/background-task.js +338 -0
- package/dist/api/routes/browser-history.js +9 -1
- package/dist/api/routes/context/permissions.js +3 -2
- package/dist/api/routes/context/snapshots.js +0 -3
- package/dist/api/routes/context/write.js +3 -17
- package/dist/api/routes/dashboard/config.js +48 -12
- package/dist/api/routes/dashboard/cost-approvals.js +66 -0
- package/dist/api/routes/dashboard/notifications.js +9 -9
- package/dist/api/routes/integrations/crud-patch.js +5 -1
- package/dist/api/routes/integrations-reconcile.js +2 -2
- package/dist/api/routes/notion.d.ts +1 -1
- package/dist/api/routes/observations.js +7 -7
- package/dist/api/routes/obsidian.d.ts +1 -1
- package/dist/api/routes/receipts.js +5 -1
- package/dist/api/routes/setup-migrate.js +1 -1
- package/dist/api/routes/setup.js +1 -1
- package/dist/api/routes/task-flows.d.ts +1 -1
- package/dist/api/routes/task-flows.js +1 -1
- package/dist/api/routes/tuning.d.ts +29 -0
- package/dist/api/routes/tuning.js +304 -0
- package/dist/api/server.d.ts +44 -16
- package/dist/api/server.js +9 -0
- package/dist/bootstrap/adapters.d.ts +19 -0
- package/dist/bootstrap/adapters.js +61 -0
- package/dist/bootstrap/api.d.ts +5 -3
- package/dist/bootstrap/api.js +45 -13
- package/dist/bootstrap/catchup.d.ts +1 -1
- package/dist/bootstrap/catchup.js +11 -11
- package/dist/bootstrap/event-pipeline.d.ts +11 -0
- package/dist/bootstrap/event-pipeline.js +245 -7
- package/dist/bootstrap/observers.js +9 -6
- package/dist/bootstrap/schedule-helpers.d.ts +104 -6
- package/dist/bootstrap/schedule-helpers.js +172 -19
- package/dist/config.js +26 -12
- package/dist/core/agent-core.d.ts +33 -1
- package/dist/core/agent-core.js +36 -1
- package/dist/core/agents/activity-scan-cadence.d.ts +103 -0
- package/dist/core/agents/activity-scan-cadence.js +127 -0
- package/dist/core/agents/agent-route-override.d.ts +53 -0
- package/dist/core/agents/agent-route-override.js +69 -0
- package/dist/core/agents/builtin-registry.d.ts +51 -14
- package/dist/core/agents/builtin-registry.js +92 -15
- package/dist/core/agents/config-gate-reconcile.d.ts +38 -0
- package/dist/core/agents/config-gate-reconcile.js +51 -0
- package/dist/core/agents/cron-substitute.d.ts +1 -1
- package/dist/core/agents/cron-substitute.js +1 -1
- package/dist/core/agents/custom-routine-migration.d.ts +60 -0
- package/dist/core/agents/custom-routine-migration.js +149 -0
- package/dist/core/agents/firing-blocked.d.ts +1 -1
- package/dist/core/agents/hourly-cadence.d.ts +102 -0
- package/dist/core/agents/hourly-cadence.js +126 -0
- package/dist/core/agents/loader-boot.js +23 -0
- package/dist/core/agents/loader.d.ts +19 -0
- package/dist/core/agents/loader.js +34 -2
- package/dist/core/agents/override-merge.d.ts +1 -1
- package/dist/core/agents/override-merge.js +9 -1
- package/dist/core/agents/recurrence-convert.d.ts +1 -1
- package/dist/core/agents/recurrence-convert.js +1 -1
- package/dist/core/agents/recurring-schedule-adapter.js +8 -0
- package/dist/core/alerts.js +6 -6
- package/dist/core/backends/auth-health-monitor.d.ts +2 -2
- package/dist/core/backends/auth-health-monitor.js +1 -1
- package/dist/core/backends/backend-router.d.ts +27 -1
- package/dist/core/backends/backend-router.js +165 -1
- package/dist/core/backends/claude-code-core.d.ts +71 -31
- package/dist/core/backends/claude-code-core.js +282 -54
- package/dist/core/backends/cli-quota-guards.d.ts +29 -1
- package/dist/core/backends/cli-quota-guards.js +40 -5
- package/dist/core/backends/codex-core.d.ts +6 -0
- package/dist/core/backends/codex-core.js +22 -6
- package/dist/core/backends/failure-spend.d.ts +58 -0
- package/dist/core/backends/failure-spend.js +137 -0
- package/dist/core/backends/gemini-cli-core.d.ts +6 -0
- package/dist/core/backends/gemini-cli-core.js +25 -6
- package/dist/core/backends/model-registry.d.ts +1 -1
- package/dist/core/backends/model-registry.js +4 -4
- package/dist/core/backends/opencode-core.d.ts +1 -1
- package/dist/core/backends/opencode-core.js +5 -5
- package/dist/core/backends/plan-presets.js +39 -15
- package/dist/core/bang-commands/commands-cost.js +3 -1
- package/dist/core/bang-commands/commands-report.js +4 -3
- package/dist/core/bang-commands/commands-research.js +4 -1
- package/dist/core/bang-commands/commands-revert-tuning.d.ts +18 -0
- package/dist/core/bang-commands/commands-revert-tuning.js +63 -0
- package/dist/core/bang-commands/commands-stop-start.js +3 -3
- package/dist/core/bang-commands/commands-task-control.d.ts +19 -0
- package/dist/core/bang-commands/commands-task-control.js +147 -0
- package/dist/core/bang-commands/commands-wiki.js +5 -5
- package/dist/core/bang-commands/index.d.ts +2 -0
- package/dist/core/bang-commands/index.js +12 -0
- package/dist/core/bang-commands/registry.d.ts +12 -0
- package/dist/core/browser-history/research-cluster-fanout.d.ts +28 -14
- package/dist/core/browser-history/research-cluster-fanout.js +39 -16
- package/dist/core/channel-timeline.d.ts +5 -1
- package/dist/core/channel-timeline.js +13 -0
- package/dist/core/context/index-reconciler.js +5 -2
- package/dist/core/context/policy-index-reconciler.d.ts +6 -4
- package/dist/core/context/policy-index-runner.js +25 -6
- package/dist/core/context-builder-calendar.js +10 -2
- package/dist/core/context-builder-conversation.d.ts +8 -1
- package/dist/core/context-builder-conversation.js +41 -7
- package/dist/core/context-builder-yesterday.js +4 -3
- package/dist/core/context-builder.d.ts +7 -2
- package/dist/core/context-builder.js +62 -20
- package/dist/core/context-file-serializer.d.ts +1 -1
- package/dist/core/context-file-serializer.js +1 -1
- package/dist/core/context-health.js +2 -2
- package/dist/core/context-paths.d.ts +1 -1
- package/dist/core/context-paths.js +1 -1
- package/dist/core/context-validation/prepare-write.js +1 -1
- package/dist/core/context-validation/routine-rulebook.d.ts +1 -1
- package/dist/core/context-vault-aliases.d.ts +0 -13
- package/dist/core/context-vault-aliases.js +37 -0
- package/dist/core/custom-routines.d.ts +99 -0
- package/dist/core/custom-routines.js +187 -0
- package/dist/core/daemon-api-cli.js +49 -0
- package/dist/core/day-boundary.d.ts +46 -0
- package/dist/core/day-boundary.js +40 -0
- package/dist/core/dispatcher-activity-scan.d.ts +221 -0
- package/dist/core/dispatcher-activity-scan.js +775 -0
- package/dist/core/dispatcher-error-handling.d.ts +6 -11
- package/dist/core/dispatcher-error-handling.js +38 -62
- package/dist/core/dispatcher-hourly-check.js +6 -1
- package/dist/core/dispatcher-message-handler.d.ts +10 -0
- package/dist/core/dispatcher-message-handler.js +17 -0
- package/dist/core/dispatcher-morning-routine.d.ts +6 -6
- package/dist/core/dispatcher-morning-routine.js +13 -13
- package/dist/core/dispatcher-result-processor.d.ts +33 -0
- package/dist/core/dispatcher-result-processor.js +167 -11
- package/dist/core/dispatcher-scheduled-background-task.d.ts +42 -0
- package/dist/core/dispatcher-scheduled-background-task.js +89 -0
- package/dist/core/dispatcher-scheduled-tasks.d.ts +63 -1
- package/dist/core/dispatcher-scheduled-tasks.js +213 -6
- package/dist/core/dispatcher-task-delivery.d.ts +105 -0
- package/dist/core/dispatcher-task-delivery.js +555 -0
- package/dist/core/dispatcher-types.d.ts +48 -9
- package/dist/core/dispatcher-types.js +3 -3
- package/dist/core/dispatcher.d.ts +112 -31
- package/dist/core/dispatcher.js +284 -59
- package/dist/core/dm-freshness-metrics.d.ts +1 -1
- package/dist/core/drift-effects.js +2 -2
- package/dist/core/feedback/consolidation-prep.js +17 -5
- package/dist/core/feedback/eviction-scorer.js +6 -2
- package/dist/core/feedback/lesson-format.js +9 -4
- package/dist/core/feedback/lesson-injection.d.ts +1 -1
- package/dist/core/feedback/lesson-injection.js +17 -2
- package/dist/core/feedback/lesson-store-overview.d.ts +8 -4
- package/dist/core/feedback/lesson-store-overview.js +8 -4
- package/dist/core/feedback/regeneralization-prep.js +29 -16
- package/dist/core/feedback/self-performance-prep.d.ts +186 -0
- package/dist/core/feedback/self-performance-prep.js +541 -0
- package/dist/core/feedback/tuning-actuator.d.ts +198 -0
- package/dist/core/feedback/tuning-actuator.js +432 -0
- package/dist/core/feedback/tuning-recommender.d.ts +247 -0
- package/dist/core/feedback/tuning-recommender.js +580 -0
- package/dist/core/feedback/tuning-revert-monitor.d.ts +90 -0
- package/dist/core/feedback/tuning-revert-monitor.js +213 -0
- package/dist/core/health-monitor.d.ts +6 -0
- package/dist/core/health-monitor.js +1 -1
- package/dist/core/injection-policy.d.ts +4 -4
- package/dist/core/injection-policy.js +4 -4
- package/dist/core/integration-main-backend.js +4 -0
- package/dist/core/management-md.d.ts +2 -2
- package/dist/core/management-md.js +51 -13
- package/dist/core/morning/orchestrator.d.ts +2 -2
- package/dist/core/morning/orchestrator.js +2 -2
- package/dist/core/notification-gate.d.ts +64 -0
- package/dist/core/notification-gate.js +51 -0
- package/dist/core/notification-rate-limit.d.ts +40 -0
- package/dist/core/notification-rate-limit.js +50 -0
- package/dist/core/policy-files.d.ts +1 -1
- package/dist/core/policy-files.js +2 -2
- package/dist/core/pre-pass-freshness.d.ts +4 -4
- package/dist/core/retention.d.ts +5 -0
- package/dist/core/retention.js +20 -4
- package/dist/core/review-context.d.ts +1 -1
- package/dist/core/review-context.js +10 -5
- package/dist/core/roadmap-write-lock.d.ts +2 -1
- package/dist/core/roadmap-write-lock.js +15 -10
- package/dist/core/routine-acquisition-plan.d.ts +47 -1
- package/dist/core/routine-acquisition-plan.js +78 -20
- package/dist/core/routine-fetch-window-retry.js +7 -4
- package/dist/core/routine-fetch-window-runner.d.ts +39 -3
- package/dist/core/routine-fetch-window-runner.js +264 -13
- package/dist/core/routine-windows.d.ts +2 -2
- package/dist/core/routine-windows.js +8 -5
- package/dist/core/scheduler.d.ts +175 -16
- package/dist/core/scheduler.js +559 -102
- package/dist/core/signal-detector.d.ts +12 -0
- package/dist/core/signal-detector.js +53 -9
- package/dist/core/skills-compiler-denied-tools.js +2 -2
- package/dist/core/skills-compiler-skill-index.d.ts +2 -2
- package/dist/core/skills-compiler-skill-index.js +2 -2
- package/dist/core/skills-compiler-variants.d.ts +1 -1
- package/dist/core/skills-compiler-variants.js +8 -0
- package/dist/core/skills-compiler.d.ts +29 -26
- package/dist/core/skills-compiler.js +117 -81
- package/dist/core/skills-manifest.d.ts +37 -0
- package/dist/core/skills-manifest.js +73 -2
- package/dist/core/sleep-inhibitor.d.ts +79 -0
- package/dist/core/sleep-inhibitor.js +132 -0
- package/dist/core/slim-system-prompt-loader.d.ts +77 -0
- package/dist/core/slim-system-prompt-loader.js +141 -0
- package/dist/core/spawn-gates.d.ts +126 -0
- package/dist/core/spawn-gates.js +180 -0
- package/dist/core/today-direct-writer.d.ts +2 -2
- package/dist/core/today-direct-writer.js +1 -1
- package/dist/core/today-write-lock.d.ts +4 -2
- package/dist/core/today-write-lock.js +30 -20
- package/dist/core/wake-detector.d.ts +55 -0
- package/dist/core/wake-detector.js +80 -0
- package/dist/core/wiki/compile-lock.d.ts +1 -1
- package/dist/core/wiki/compile-lock.js +1 -1
- package/dist/core/workdir.js +15 -6
- package/dist/db/activity-scan-signals.d.ts +77 -0
- package/dist/db/activity-scan-signals.js +378 -0
- package/dist/db/agents-store.d.ts +28 -0
- package/dist/db/agents-store.js +62 -0
- package/dist/db/background-task-clarifications-store.d.ts +81 -0
- package/dist/db/background-task-clarifications-store.js +152 -0
- package/dist/db/background-task-store.d.ts +207 -0
- package/dist/db/background-task-store.js +380 -0
- package/dist/db/browser-history-store.d.ts +39 -6
- package/dist/db/browser-history-store.js +51 -7
- package/dist/db/browser-task-clarifications-store.d.ts +12 -0
- package/dist/db/browser-task-clarifications-store.js +35 -5
- package/dist/db/browser-task-store.d.ts +3 -0
- package/dist/db/browser-task-store.js +29 -4
- package/dist/db/deferred-dm.d.ts +86 -0
- package/dist/db/deferred-dm.js +199 -0
- package/dist/db/migrations.js +330 -0
- package/dist/db/observations.d.ts +2 -2
- package/dist/db/observations.js +3 -3
- package/dist/db/schema.js +217 -16
- package/dist/db/voice-transcripts-store.d.ts +1 -1
- package/dist/index.js +86 -29
- package/dist/messaging/browser-task-mcp-notifier.d.ts +12 -70
- package/dist/messaging/browser-task-mcp-notifier.js +30 -151
- package/dist/messaging/browser-task-screenshot-attachment.d.ts +15 -0
- package/dist/messaging/browser-task-screenshot-attachment.js +63 -0
- package/dist/observers/delegated-sync-worker.d.ts +6 -6
- package/dist/observers/delegated-sync-worker.js +10 -10
- package/dist/observers/git-delegated-cron.d.ts +1 -1
- package/dist/observers/git-delegated-cron.js +2 -2
- package/dist/observers/github-poller-classifier.d.ts +3 -3
- package/dist/observers/github-poller-classifier.js +3 -3
- package/dist/observers/imminent-event-scheduler.d.ts +1 -1
- package/dist/observers/imminent-event-scheduler.js +1 -1
- package/dist/observers/mail-poller.d.ts +1 -0
- package/dist/observers/mail-poller.js +42 -3
- package/dist/observers/observation-summarizer/summarizer-client.d.ts +2 -2
- package/dist/observers/observation-summarizer/summarizer-client.js +2 -2
- package/dist/observers/observation-summarizer/worker.d.ts +2 -2
- package/dist/observers/observation-summarizer/worker.js +4 -4
- package/dist/observers/obsidian-watcher.d.ts +1 -1
- package/dist/observers/obsidian-watcher.js +1 -1
- package/dist/safety/agent-write-tracker.d.ts +4 -4
- package/dist/safety/agent-write-tracker.js +4 -4
- package/dist/safety/audit.d.ts +43 -5
- package/dist/safety/audit.js +86 -18
- package/dist/safety/risk-classifier.d.ts +6 -0
- package/dist/safety/risk-classifier.js +75 -11
- package/dist/scheduler/activity-scan-gate.d.ts +86 -0
- package/dist/scheduler/activity-scan-gate.js +132 -0
- package/dist/services/background-task/background-task-budget.d.ts +80 -0
- package/dist/services/background-task/background-task-budget.js +91 -0
- package/dist/services/background-task/background-task-driver.d.ts +105 -0
- package/dist/services/background-task/background-task-driver.js +416 -0
- package/dist/services/background-task/background-task-runner.d.ts +96 -0
- package/dist/services/background-task/background-task-runner.js +673 -0
- package/dist/services/background-task/background-task-tools.d.ts +84 -0
- package/dist/services/background-task/background-task-tools.js +247 -0
- package/dist/services/background-task/background-task-transition-events.d.ts +43 -0
- package/dist/services/background-task/background-task-transition-events.js +54 -0
- package/dist/services/browser-history/automation/egress-denylist.d.ts +1 -1
- package/dist/services/browser-history/automation/egress-denylist.js +16 -6
- package/dist/services/browser-history/managed-chromium/sandbox-launcher.js +0 -1
- package/dist/services/browser-task/browser-task-runner.js +53 -8
- package/dist/services/observations-batch.d.ts +1 -1
- package/dist/services/observations-batch.js +2 -2
- package/dist/settings/runtime-settings.d.ts +38 -11
- package/dist/settings/runtime-settings.js +203 -40
- package/dist/settings/settings-store.js +11 -3
- package/package.json +4 -4
|
@@ -0,0 +1,580 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Self-Tuning Review Cycle — Recommend stage (SELF_TUNING_REVIEW_CYCLE_DESIGN.md
|
|
3
|
+
* §3.2, Phase 2).
|
|
4
|
+
*
|
|
5
|
+
* The daemon-side, deterministic Recommend step ($0 — LLM tokens buy judgment
|
|
6
|
+
* only, P1). On the weekly-review dispatch it consumes the same
|
|
7
|
+
* {@link SelfPerformanceData} the Measure stage (§3.1) already gathered —
|
|
8
|
+
* current + baseline window = the 14-day evidence span the v1 rules read —
|
|
9
|
+
* and emits at most {@link MAX_RECOMMENDATIONS_PER_CYCLE} bounded
|
|
10
|
+
* `TuningRecommendation`s, rendered as one `<tuning_recommendations>` block
|
|
11
|
+
* for the weekly session's Phase 3c verdict step (§3.3).
|
|
12
|
+
*
|
|
13
|
+
* Guards live in code, not prompt (§3.2): per-rule ladders with hard
|
|
14
|
+
* floors/caps, hysteresis (no re-proposal of a key changed < 14 days ago;
|
|
15
|
+
* 28 days after a revert), minimum sample sizes, and the max-3 ranking by
|
|
16
|
+
* estimated $ impact. Numeric *enforcement* stays where it already lives —
|
|
17
|
+
* `runtimeSettingsSchema` + `env-writer.ts:NUMERIC_RANGE` behind the
|
|
18
|
+
* `applyConfigUpdates` chokepoint (P4); the `bounds` field on a
|
|
19
|
+
* recommendation documents the rule's own ladder, it is not a third copy of
|
|
20
|
+
* the schema bounds.
|
|
21
|
+
*
|
|
22
|
+
* The shadow period (§7): recommendations are generated, persisted under
|
|
23
|
+
* {@link TUNING_PENDING_CYCLE_STATE_KEY}, and verdicted via
|
|
24
|
+
* `POST /api/tuning/verdicts`. While `selfTuningEnabled` is `false` (the
|
|
25
|
+
* shipped default) nothing is actuated; once flipped, the Phase 3 actuator
|
|
26
|
+
* (`tuning-actuator.ts`) applies `apply` verdicts through the config
|
|
27
|
+
* chokepoint. Verdict ids are single-use: each weekly cycle overwrites the
|
|
28
|
+
* pending blob, expiring the prior cycle's ids (§3.4).
|
|
29
|
+
*/
|
|
30
|
+
import { FETCH_WINDOW_ACTION_TYPE } from "./self-performance-prep.js";
|
|
31
|
+
/**
|
|
32
|
+
* §3.4 — runtime_state key for the current cycle's pending recommendations
|
|
33
|
+
* + verdicts. Deliberately uses a `.` separator, NOT the
|
|
34
|
+
* `SELF_TUNING_LEDGER_PREFIX` (`self_tuning:`) namespace — the Measure
|
|
35
|
+
* stage's `gatherLedger` does a `LIKE 'self_tuning:%'` scan and must never
|
|
36
|
+
* pick the pending blob up as a phantom ledger entry.
|
|
37
|
+
*/
|
|
38
|
+
export const TUNING_PENDING_CYCLE_STATE_KEY = "self_tuning.pending_cycle";
|
|
39
|
+
/** §3.2 — max recommendations per weekly cycle, ranked by estimated $ impact. */
|
|
40
|
+
export const MAX_RECOMMENDATIONS_PER_CYCLE = 3;
|
|
41
|
+
/** §3.2 — no re-proposal of a key changed less than this many days ago. */
|
|
42
|
+
export const TUNING_HYSTERESIS_DAYS = 14;
|
|
43
|
+
/** §3.4 — extended cool-down after an auto-revert, so apply→revert can't flap. */
|
|
44
|
+
export const TUNING_REVERT_COOLDOWN_DAYS = 28;
|
|
45
|
+
// ── R1 (pre-pass freshness) ─────────────────────────────────────────────────
|
|
46
|
+
export const R1_KNOB = "activityScanPrePassFreshnessMinutes";
|
|
47
|
+
export const R1_EMPTY_RATE_STEP_UP = 0.7;
|
|
48
|
+
export const R1_EMPTY_RATE_STEP_DOWN = 0.2;
|
|
49
|
+
export const R1_MIN_RUNS = 10;
|
|
50
|
+
/**
|
|
51
|
+
* §3.2 — the freshness ladder. Step up = smallest notch above the current
|
|
52
|
+
* value (cap 480); step down = largest notch below it (floor 120). Today's
|
|
53
|
+
* schema caps the knob at 240 — the 360/480 notches become appliable when
|
|
54
|
+
* Phase 3 widens `.max()` to 480 (§6); in the Phase 2 shadow period they are
|
|
55
|
+
* recorded-and-judged only, so proposing them is safe.
|
|
56
|
+
*/
|
|
57
|
+
export const R1_FRESHNESS_NOTCHES = [120, 240, 360, 480];
|
|
58
|
+
// ── R2 (notification throttle — lesson-mediated in v1) ─────────────────────
|
|
59
|
+
export const R2_IGNORED_RATE = 0.6;
|
|
60
|
+
export const R2_MIN_SENT = 5;
|
|
61
|
+
/**
|
|
62
|
+
* The loop's own DM channel — apply notices ("Reply `!revert tuning` to
|
|
63
|
+
* undo") and auto-revert notices land in `notification_log` under this
|
|
64
|
+
* type. R2 must never propose demoting it: the per-change DM is the D1/D6
|
|
65
|
+
* safety invariant (daemon-sent, mandatory, deliberately not a tunable
|
|
66
|
+
* notification surface), so an owner who lets those DMs sit unreacted
|
|
67
|
+
* would otherwise have the loop spend one of its max-3 weekly slots
|
|
68
|
+
* recommending that its own safety channel go quiet.
|
|
69
|
+
*/
|
|
70
|
+
export const SELF_TUNING_NOTIFICATION_TYPE = "self_tuning";
|
|
71
|
+
// ── R3 (hourly-gate tightening) ─────────────────────────────────────────────
|
|
72
|
+
export const R3_KNOB = "activityScanLowSignalPendingCeiling";
|
|
73
|
+
export const R3_LOW_NOVELTY_SHARE = 0.5;
|
|
74
|
+
/** Minimum stage-3 escalations over 14d before the share is meaningful. */
|
|
75
|
+
export const R3_MIN_STAGE3 = 4;
|
|
76
|
+
/**
|
|
77
|
+
* Conservative ladder for the silent-skip band. The schema allows up to 20;
|
|
78
|
+
* the rule never proposes past 8 — a wider band is an operator decision.
|
|
79
|
+
*/
|
|
80
|
+
export const R3_CEILING_NOTCHES = [2, 4, 8];
|
|
81
|
+
// ── R5 (lesson-store byte budget, §3.5) ─────────────────────────────────────
|
|
82
|
+
export const R5_KNOB = "feedbackLessonMaxBytesGlobal";
|
|
83
|
+
export const R5_UTILIZATION_THRESHOLD = 0.9;
|
|
84
|
+
export const R5_MEDIAN_EV_CEILING = 1;
|
|
85
|
+
/** Floor for the R5 step-down; matches the per-agent default cap. */
|
|
86
|
+
export const R5_MIN_BYTES = 4096;
|
|
87
|
+
/** R5 proposes a 25% reduction, rounded down to a 1 KiB multiple. */
|
|
88
|
+
export const R5_STEP_FACTOR = 0.75;
|
|
89
|
+
const R5_ROUNDING_BYTES = 1024;
|
|
90
|
+
export const MAX_EVIDENCE_CHARS = 200;
|
|
91
|
+
const DAY_MS = 24 * 60 * 60 * 1000;
|
|
92
|
+
const RULE_ORDER = { R1: 0, R2: 1, R3: 2, R4: 3, R5: 4 };
|
|
93
|
+
function truncateEvidence(value) {
|
|
94
|
+
return value.length <= MAX_EVIDENCE_CHARS
|
|
95
|
+
? value
|
|
96
|
+
: `${value.slice(0, MAX_EVIDENCE_CHARS - 1)}…`;
|
|
97
|
+
}
|
|
98
|
+
function pctLabel(numerator, denominator) {
|
|
99
|
+
return `${Math.round((100 * numerator) / denominator)}%`;
|
|
100
|
+
}
|
|
101
|
+
function round4(value) {
|
|
102
|
+
return Math.round(value * 10000) / 10000;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Tolerant timestamp parse for ledger `applied_at` / `reverted_at` values.
|
|
106
|
+
* Accepts ISO 8601 and SQLite `YYYY-MM-DD HH:MM:SS` (read as UTC). An
|
|
107
|
+
* unparseable value returns null — the hysteresis check treats that as
|
|
108
|
+
* "recently changed" (blocking) because an unverifiable timestamp must not
|
|
109
|
+
* silently unlock a re-proposal.
|
|
110
|
+
*/
|
|
111
|
+
function parseLedgerTimestamp(raw) {
|
|
112
|
+
if (!raw)
|
|
113
|
+
return null;
|
|
114
|
+
const sqliteShaped = /^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$/.test(raw);
|
|
115
|
+
const ms = Date.parse(sqliteShaped ? `${raw.replace(" ", "T")}Z` : raw);
|
|
116
|
+
return Number.isNaN(ms) ? null : ms;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* §3.2 hysteresis guard. A key with a ledger entry applied within
|
|
120
|
+
* {@link TUNING_HYSTERESIS_DAYS} (or reverted within
|
|
121
|
+
* {@link TUNING_REVERT_COOLDOWN_DAYS}) is off the table this cycle. A ledger
|
|
122
|
+
* entry whose timestamps fail to parse blocks conservatively.
|
|
123
|
+
*/
|
|
124
|
+
export function isKeyInCooldown(ledger, key, now) {
|
|
125
|
+
for (const entry of ledger) {
|
|
126
|
+
if (entry.key !== key)
|
|
127
|
+
continue;
|
|
128
|
+
const revertedMs = parseLedgerTimestamp(entry.revertedAt);
|
|
129
|
+
if (revertedMs !== null) {
|
|
130
|
+
if (now.getTime() - revertedMs < TUNING_REVERT_COOLDOWN_DAYS * DAY_MS) {
|
|
131
|
+
return true;
|
|
132
|
+
}
|
|
133
|
+
continue; // old revert — the apply that preceded it is older still
|
|
134
|
+
}
|
|
135
|
+
const appliedMs = parseLedgerTimestamp(entry.appliedAt);
|
|
136
|
+
if (appliedMs === null)
|
|
137
|
+
return true; // unverifiable → block
|
|
138
|
+
if (now.getTime() - appliedMs < TUNING_HYSTERESIS_DAYS * DAY_MS)
|
|
139
|
+
return true;
|
|
140
|
+
}
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
// ── 14-day window combinators ───────────────────────────────────────────────
|
|
144
|
+
function combineFetchWindow(current, baseline) {
|
|
145
|
+
const byKey = new Map();
|
|
146
|
+
for (const list of [current.fetchWindow, baseline.fetchWindow]) {
|
|
147
|
+
for (const row of list) {
|
|
148
|
+
const agg = byKey.get(row.integrationKey) ?? {
|
|
149
|
+
integrationKey: row.integrationKey,
|
|
150
|
+
runs: 0,
|
|
151
|
+
empty: 0,
|
|
152
|
+
};
|
|
153
|
+
agg.runs += row.runs;
|
|
154
|
+
agg.empty += row.empty;
|
|
155
|
+
byKey.set(row.integrationKey, agg);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
return [...byKey.values()];
|
|
159
|
+
}
|
|
160
|
+
function combineGate(current, baseline) {
|
|
161
|
+
const sum = (pick) => pick(current.gate) + pick(baseline.gate);
|
|
162
|
+
return {
|
|
163
|
+
ticks: sum((g) => g.ticks),
|
|
164
|
+
stage0: sum((g) => g.stage0),
|
|
165
|
+
stage2: sum((g) => g.stage2),
|
|
166
|
+
stage3: sum((g) => g.stage3),
|
|
167
|
+
stage3LowSignal: sum((g) => g.stage3LowSignal),
|
|
168
|
+
stage3LowSignalLowNovelty: sum((g) => g.stage3LowSignalLowNovelty),
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
function combineNotifications(current, baseline) {
|
|
172
|
+
const byType = new Map();
|
|
173
|
+
for (const list of [current.notifications, baseline.notifications]) {
|
|
174
|
+
for (const row of list) {
|
|
175
|
+
const agg = byType.get(row.notificationType) ?? {
|
|
176
|
+
notificationType: row.notificationType,
|
|
177
|
+
sent: 0,
|
|
178
|
+
replied: 0,
|
|
179
|
+
acted: 0,
|
|
180
|
+
corrected: 0,
|
|
181
|
+
ignored: 0,
|
|
182
|
+
pending: 0,
|
|
183
|
+
};
|
|
184
|
+
agg.sent += row.sent;
|
|
185
|
+
agg.replied += row.replied;
|
|
186
|
+
agg.acted += row.acted;
|
|
187
|
+
agg.corrected += row.corrected;
|
|
188
|
+
agg.ignored += row.ignored;
|
|
189
|
+
agg.pending += row.pending;
|
|
190
|
+
byType.set(row.notificationType, agg);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
return [...byType.values()];
|
|
194
|
+
}
|
|
195
|
+
/** Mean cost per run for one action_type across both windows; 0 when unseen. */
|
|
196
|
+
function avgCostPerRun(data, actionType) {
|
|
197
|
+
let runs = 0;
|
|
198
|
+
let cost = 0;
|
|
199
|
+
for (const window of [data.current, data.baseline]) {
|
|
200
|
+
const row = window.actions.find((a) => a.actionType === actionType);
|
|
201
|
+
if (!row)
|
|
202
|
+
continue;
|
|
203
|
+
runs += row.runs;
|
|
204
|
+
cost += row.costUsd;
|
|
205
|
+
}
|
|
206
|
+
return runs > 0 ? cost / runs : 0;
|
|
207
|
+
}
|
|
208
|
+
// ── Ladder steppers (exported for direct unit coverage) ─────────────────────
|
|
209
|
+
/** Smallest notch strictly above `current`, or null at/above the cap. */
|
|
210
|
+
export function stepUpNotch(notches, current) {
|
|
211
|
+
for (const notch of notches)
|
|
212
|
+
if (notch > current)
|
|
213
|
+
return notch;
|
|
214
|
+
return null;
|
|
215
|
+
}
|
|
216
|
+
/** Largest notch strictly below `current`, or null at/below the floor. */
|
|
217
|
+
export function stepDownNotch(notches, current) {
|
|
218
|
+
for (let i = notches.length - 1; i >= 0; i--) {
|
|
219
|
+
if (notches[i] < current)
|
|
220
|
+
return notches[i];
|
|
221
|
+
}
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
// ── Rules ───────────────────────────────────────────────────────────────────
|
|
225
|
+
function makeId(cycleId, rule, key) {
|
|
226
|
+
return `${cycleId}:${rule}:${key}`;
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* R1 — pre-pass freshness. The knob is global while the measurement is
|
|
230
|
+
* per-integration (§8 open question); v1 fires on the **run-weighted
|
|
231
|
+
* aggregate** empty-rate across qualifying integrations (n ≥
|
|
232
|
+
* {@link R1_MIN_RUNS} each) — the same "72% of runs were empty" overall
|
|
233
|
+
* framing that produced the manual freshness=240 fix — and cites the worst
|
|
234
|
+
* single integration in the evidence line.
|
|
235
|
+
*/
|
|
236
|
+
function ruleR1(cycleId, data, knobs) {
|
|
237
|
+
const combined = combineFetchWindow(data.current, data.baseline).filter((row) => row.runs >= R1_MIN_RUNS);
|
|
238
|
+
if (combined.length === 0)
|
|
239
|
+
return null;
|
|
240
|
+
const runs = combined.reduce((n, r) => n + r.runs, 0);
|
|
241
|
+
const empty = combined.reduce((n, r) => n + r.empty, 0);
|
|
242
|
+
const rate = empty / runs;
|
|
243
|
+
const current = knobs.activityScanPrePassFreshnessMinutes;
|
|
244
|
+
let proposed = null;
|
|
245
|
+
if (rate > R1_EMPTY_RATE_STEP_UP) {
|
|
246
|
+
proposed = stepUpNotch(R1_FRESHNESS_NOTCHES, current);
|
|
247
|
+
}
|
|
248
|
+
else if (rate < R1_EMPTY_RATE_STEP_DOWN) {
|
|
249
|
+
proposed = stepDownNotch(R1_FRESHNESS_NOTCHES, current);
|
|
250
|
+
}
|
|
251
|
+
if (proposed === null)
|
|
252
|
+
return null;
|
|
253
|
+
const worst = [...combined].sort((a, b) => b.empty / b.runs - a.empty / a.runs ||
|
|
254
|
+
a.integrationKey.localeCompare(b.integrationKey))[0];
|
|
255
|
+
const direction = proposed > current ? "raise" : "lower";
|
|
256
|
+
// Heuristic: a step-up roughly halves the empty-run share it can reach;
|
|
257
|
+
// a step-down's value is responsiveness, not $ — rank it by 0.
|
|
258
|
+
const estWeeklySavingUsd = proposed > current
|
|
259
|
+
? round4((empty / 2) * avgCostPerRun(data, FETCH_WINDOW_ACTION_TYPE) * 0.5)
|
|
260
|
+
: 0;
|
|
261
|
+
return {
|
|
262
|
+
id: makeId(cycleId, "R1", R1_KNOB),
|
|
263
|
+
rule: "R1",
|
|
264
|
+
actuator: "config",
|
|
265
|
+
key: R1_KNOB,
|
|
266
|
+
currentValue: current,
|
|
267
|
+
proposedValue: proposed,
|
|
268
|
+
bounds: {
|
|
269
|
+
min: R1_FRESHNESS_NOTCHES[0],
|
|
270
|
+
max: R1_FRESHNESS_NOTCHES[R1_FRESHNESS_NOTCHES.length - 1],
|
|
271
|
+
},
|
|
272
|
+
evidence: truncateEvidence(`fetch_window ${pctLabel(empty, runs)} empty over ${runs} runs/14d ` +
|
|
273
|
+
`(worst: ${worst.integrationKey} ${pctLabel(worst.empty, worst.runs)}) — ${direction} freshness`),
|
|
274
|
+
estWeeklySavingUsd,
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* R2 — notification throttle, lesson-mediated in v1 (§3.2): no per-type
|
|
279
|
+
* digest/silent knob exists, so the recommendation's actuator is `lesson` —
|
|
280
|
+
* an apply verdict feeds task-flow guidance through the existing lesson
|
|
281
|
+
* loop rather than any config write. One recommendation per qualifying
|
|
282
|
+
* type (the max-3 ranking keeps the block bounded).
|
|
283
|
+
*/
|
|
284
|
+
function ruleR2(cycleId, data) {
|
|
285
|
+
const out = [];
|
|
286
|
+
for (const type of combineNotifications(data.current, data.baseline)) {
|
|
287
|
+
// The loop's own mandatory DM channel is not a demotion candidate —
|
|
288
|
+
// see SELF_TUNING_NOTIFICATION_TYPE.
|
|
289
|
+
if (type.notificationType === SELF_TUNING_NOTIFICATION_TYPE)
|
|
290
|
+
continue;
|
|
291
|
+
if (type.sent < R2_MIN_SENT)
|
|
292
|
+
continue;
|
|
293
|
+
if (type.ignored / type.sent <= R2_IGNORED_RATE)
|
|
294
|
+
continue;
|
|
295
|
+
out.push({
|
|
296
|
+
id: makeId(cycleId, "R2", `notification:${type.notificationType}`),
|
|
297
|
+
rule: "R2",
|
|
298
|
+
actuator: "lesson",
|
|
299
|
+
key: `notification:${type.notificationType}`,
|
|
300
|
+
currentValue: "send",
|
|
301
|
+
proposedValue: "demote (batch into digests / silence unless user-actionable)",
|
|
302
|
+
bounds: null,
|
|
303
|
+
evidence: truncateEvidence(`${type.notificationType}: ${type.ignored}/${type.sent} ignored ` +
|
|
304
|
+
`(${pctLabel(type.ignored, type.sent)}) over 14d`),
|
|
305
|
+
estWeeklySavingUsd: 0,
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
return out;
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* R3 — hourly-gate tightening. Counts only the autonomous
|
|
312
|
+
* `low_signal_default` fallback escalations (legitimate VIP-mail /
|
|
313
|
+
* calendar-conflict escalations and forced ticks are excluded upstream by
|
|
314
|
+
* the Measure stage, §3.1) and steps up the existing silent-skip band knob.
|
|
315
|
+
* Zero new gate code by design (§3.2).
|
|
316
|
+
*/
|
|
317
|
+
function ruleR3(cycleId, data, knobs) {
|
|
318
|
+
const gate = combineGate(data.current, data.baseline);
|
|
319
|
+
if (gate.stage3 < R3_MIN_STAGE3)
|
|
320
|
+
return null;
|
|
321
|
+
if (gate.stage3LowSignalLowNovelty / gate.stage3 <= R3_LOW_NOVELTY_SHARE) {
|
|
322
|
+
return null;
|
|
323
|
+
}
|
|
324
|
+
const current = knobs.activityScanLowSignalPendingCeiling;
|
|
325
|
+
const proposed = stepUpNotch(R3_CEILING_NOTCHES, current);
|
|
326
|
+
if (proposed === null)
|
|
327
|
+
return null;
|
|
328
|
+
return {
|
|
329
|
+
id: makeId(cycleId, "R3", R3_KNOB),
|
|
330
|
+
rule: "R3",
|
|
331
|
+
actuator: "config",
|
|
332
|
+
key: R3_KNOB,
|
|
333
|
+
currentValue: current,
|
|
334
|
+
proposedValue: proposed,
|
|
335
|
+
bounds: { min: 0, max: R3_CEILING_NOTCHES[R3_CEILING_NOTCHES.length - 1] },
|
|
336
|
+
evidence: truncateEvidence(`${gate.stage3LowSignalLowNovelty}/${gate.stage3} stage3 escalations were ` +
|
|
337
|
+
`low_signal_default with novelty<=1 over 14d`),
|
|
338
|
+
estWeeklySavingUsd: round4((gate.stage3LowSignalLowNovelty / 2) *
|
|
339
|
+
avgCostPerRun(data, "routine.activity_scan")),
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* R4 — schedule hygiene. Propose-only in v1 (§3.2 / §8): no provenance
|
|
344
|
+
* column distinguishes agent-created rows, so an apply verdict still means
|
|
345
|
+
* "the owner flips `recurring_schedules.enabled` by hand". Built-in cron
|
|
346
|
+
* routines never appear here — they have no `recurring_schedule_id` parent.
|
|
347
|
+
*/
|
|
348
|
+
function ruleR4(cycleId, failingSchedules) {
|
|
349
|
+
return failingSchedules.map((row) => ({
|
|
350
|
+
id: makeId(cycleId, "R4", `recurring_schedules:${row.id}`),
|
|
351
|
+
rule: "R4",
|
|
352
|
+
actuator: "schedule",
|
|
353
|
+
key: `recurring_schedules:${row.id}`,
|
|
354
|
+
currentValue: "enabled",
|
|
355
|
+
proposedValue: "disabled",
|
|
356
|
+
bounds: null,
|
|
357
|
+
evidence: truncateEvidence(`last ${row.lastFailures} runs failed (task_type=${row.taskType}` +
|
|
358
|
+
(row.description ? `, ${row.description}` : "") +
|
|
359
|
+
")"),
|
|
360
|
+
estWeeklySavingUsd: 0,
|
|
361
|
+
}));
|
|
362
|
+
}
|
|
363
|
+
/**
|
|
364
|
+
* R5 — lesson-store byte budget (§3.5). The eviction scorer's primary term
|
|
365
|
+
* is `w_ev·log(ev+1)`, so cap pressure already evicts low-evidence entries
|
|
366
|
+
* first; R5 only fires when the **global** store sits above 90% utilization
|
|
367
|
+
* with median evidence ≤ 1 — i.e. the cap is keeping weak lessons alive.
|
|
368
|
+
* Per-agent stores are measured (§3.5 `<lesson_stores>`) but not tuned in v1.
|
|
369
|
+
*/
|
|
370
|
+
function ruleR5(cycleId, knobs, lessonStores) {
|
|
371
|
+
const globalStore = lessonStores.find((store) => store.scope === "agent");
|
|
372
|
+
if (!globalStore || globalStore.capBytes <= 0)
|
|
373
|
+
return null;
|
|
374
|
+
if (globalStore.bytes / globalStore.capBytes <= R5_UTILIZATION_THRESHOLD) {
|
|
375
|
+
return null;
|
|
376
|
+
}
|
|
377
|
+
if (globalStore.medianEv === null || globalStore.medianEv > R5_MEDIAN_EV_CEILING) {
|
|
378
|
+
return null;
|
|
379
|
+
}
|
|
380
|
+
const current = knobs.feedbackLessonMaxBytesGlobal;
|
|
381
|
+
const proposed = Math.max(R5_MIN_BYTES, Math.floor((current * R5_STEP_FACTOR) / R5_ROUNDING_BYTES) * R5_ROUNDING_BYTES);
|
|
382
|
+
if (proposed >= current)
|
|
383
|
+
return null;
|
|
384
|
+
return {
|
|
385
|
+
id: makeId(cycleId, "R5", R5_KNOB),
|
|
386
|
+
rule: "R5",
|
|
387
|
+
actuator: "config",
|
|
388
|
+
key: R5_KNOB,
|
|
389
|
+
currentValue: current,
|
|
390
|
+
proposedValue: proposed,
|
|
391
|
+
bounds: { min: R5_MIN_BYTES, max: current },
|
|
392
|
+
evidence: truncateEvidence(`agent lesson store at ${pctLabel(globalStore.bytes, globalStore.capBytes)} ` +
|
|
393
|
+
`of cap with median ev=${globalStore.medianEv} (${globalStore.entries} entries)`),
|
|
394
|
+
estWeeklySavingUsd: 0,
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* The rule table (§3.2). Pure: every input is passed in; the only
|
|
399
|
+
* non-determinism allowed is the caller's `now`. Applies the code-side
|
|
400
|
+
* guards — hysteresis against the ledger, per-rule minimum samples, and the
|
|
401
|
+
* max-3 ranking by estimated weekly $ impact (ties: rule order, then key).
|
|
402
|
+
*/
|
|
403
|
+
export function buildTuningRecommendations(input) {
|
|
404
|
+
const cycleId = cycleIdForDate(input.now);
|
|
405
|
+
const lessonStores = input.lessonStores ?? [];
|
|
406
|
+
const failingSchedules = input.failingSchedules ?? [];
|
|
407
|
+
const candidates = [];
|
|
408
|
+
const r1 = ruleR1(cycleId, input.data, input.knobs);
|
|
409
|
+
if (r1)
|
|
410
|
+
candidates.push(r1);
|
|
411
|
+
candidates.push(...ruleR2(cycleId, input.data));
|
|
412
|
+
const r3 = ruleR3(cycleId, input.data, input.knobs);
|
|
413
|
+
if (r3)
|
|
414
|
+
candidates.push(r3);
|
|
415
|
+
candidates.push(...ruleR4(cycleId, failingSchedules));
|
|
416
|
+
const r5 = ruleR5(cycleId, input.knobs, lessonStores);
|
|
417
|
+
if (r5)
|
|
418
|
+
candidates.push(r5);
|
|
419
|
+
return candidates
|
|
420
|
+
.filter((rec) => !isKeyInCooldown(input.data.ledger, rec.key, input.now))
|
|
421
|
+
.sort((a, b) => b.estWeeklySavingUsd - a.estWeeklySavingUsd ||
|
|
422
|
+
RULE_ORDER[a.rule] - RULE_ORDER[b.rule] ||
|
|
423
|
+
a.key.localeCompare(b.key))
|
|
424
|
+
.slice(0, MAX_RECOMMENDATIONS_PER_CYCLE);
|
|
425
|
+
}
|
|
426
|
+
/** Cycle id = the generating run's UTC date (`YYYY-MM-DD`). */
|
|
427
|
+
export function cycleIdForDate(now) {
|
|
428
|
+
return now.toISOString().slice(0, 10);
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Wrap a recommendation set as the persisted pending-cycle blob. Written to
|
|
432
|
+
* `runtime_state` even when empty — overwriting is what expires the previous
|
|
433
|
+
* cycle's single-use ids (§3.4).
|
|
434
|
+
*
|
|
435
|
+
* Same-day regeneration (a weekly-review re-run via `!run` / crash retry)
|
|
436
|
+
* produces the SAME cycle id and — for any rule still firing on the same
|
|
437
|
+
* key — the same recommendation ids. Those ids are not expired (§3.4 expiry
|
|
438
|
+
* is the *next* weekly cycle), so verdicts already recorded against them
|
|
439
|
+
* carry forward: without this, the regenerated blob's empty `verdicts` map
|
|
440
|
+
* would silently reopen judged ids, and the re-run session's re-POST would
|
|
441
|
+
* record fresh verdicts — double-posting the rejection `self_critique`
|
|
442
|
+
* signals the route's per-id idempotency exists to prevent. Verdicts for
|
|
443
|
+
* ids the regenerated set no longer contains are dropped (the evidence
|
|
444
|
+
* that produced them is gone); a different-day cycle starts clean.
|
|
445
|
+
*/
|
|
446
|
+
export function createPendingTuningCycle(recommendations, generatedAtIso, previousCycle) {
|
|
447
|
+
const cycleId = generatedAtIso.slice(0, 10);
|
|
448
|
+
const verdicts = {};
|
|
449
|
+
if (previousCycle && previousCycle.cycleId === cycleId) {
|
|
450
|
+
const liveIds = new Set(recommendations.map((rec) => rec.id));
|
|
451
|
+
for (const [id, record] of Object.entries(previousCycle.verdicts ?? {})) {
|
|
452
|
+
if (liveIds.has(id))
|
|
453
|
+
verdicts[id] = record;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
return {
|
|
457
|
+
cycleId,
|
|
458
|
+
generatedAt: generatedAtIso,
|
|
459
|
+
recommendations: [...recommendations],
|
|
460
|
+
verdicts,
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
// ── R4 gather (single DB read, same injected-DB pattern as the Measure stage) ─
|
|
464
|
+
/** Trailing settled-run window R4 inspects per recurring row. */
|
|
465
|
+
export const R4_FAILURE_STREAK = 3;
|
|
466
|
+
/**
|
|
467
|
+
* R4 input — enabled `recurring_schedules` rows whose last
|
|
468
|
+
* {@link R4_FAILURE_STREAK} *settled* materialized runs (`completed` /
|
|
469
|
+
* `failed` / `skipped`; pending and running rows are not evidence) all
|
|
470
|
+
* failed. SQL over `agent_schedule` grouped by `recurring_schedule_id` —
|
|
471
|
+
* no new columns (§3.2). A `skipped` run breaks the streak deliberately:
|
|
472
|
+
* the rule targets "fires and fails every time", not gate-skipped rows.
|
|
473
|
+
*/
|
|
474
|
+
export function gatherFailingRecurringSchedules(db) {
|
|
475
|
+
const recurring = db
|
|
476
|
+
.prepare(`SELECT id, task_type AS taskType, task_description AS description
|
|
477
|
+
FROM recurring_schedules
|
|
478
|
+
WHERE enabled = 1
|
|
479
|
+
ORDER BY id ASC`)
|
|
480
|
+
.all();
|
|
481
|
+
const lastRuns = db.prepare(`SELECT status FROM agent_schedule
|
|
482
|
+
WHERE recurring_schedule_id = ?
|
|
483
|
+
AND status IN ('completed', 'failed', 'skipped')
|
|
484
|
+
ORDER BY scheduled_for DESC, id DESC
|
|
485
|
+
LIMIT ${R4_FAILURE_STREAK}`);
|
|
486
|
+
const out = [];
|
|
487
|
+
for (const row of recurring) {
|
|
488
|
+
const settled = lastRuns.all(row.id);
|
|
489
|
+
if (settled.length < R4_FAILURE_STREAK)
|
|
490
|
+
continue;
|
|
491
|
+
if (!settled.every((run) => run.status === "failed"))
|
|
492
|
+
continue;
|
|
493
|
+
out.push({
|
|
494
|
+
id: row.id,
|
|
495
|
+
taskType: row.taskType,
|
|
496
|
+
description: row.description,
|
|
497
|
+
lastFailures: R4_FAILURE_STREAK,
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
return out;
|
|
501
|
+
}
|
|
502
|
+
// ── Renderer ────────────────────────────────────────────────────────────────
|
|
503
|
+
function xmlEscape(value) {
|
|
504
|
+
return value
|
|
505
|
+
.replace(/&/g, "&")
|
|
506
|
+
.replace(/</g, "<")
|
|
507
|
+
.replace(/>/g, ">")
|
|
508
|
+
.replace(/"/g, """);
|
|
509
|
+
}
|
|
510
|
+
/**
|
|
511
|
+
* Render the `<tuning_recommendations>` block for the weekly session's
|
|
512
|
+
* Phase 3c verdict step. Returns `null` when the cycle holds no
|
|
513
|
+
* recommendations — the design requires zero bytes in that case (§3.2).
|
|
514
|
+
* Output is bounded by construction: ≤ {@link MAX_RECOMMENDATIONS_PER_CYCLE}
|
|
515
|
+
* rows with ≤ {@link MAX_EVIDENCE_CHARS}-char evidence strings.
|
|
516
|
+
*
|
|
517
|
+
* Carried-forward verdicts (a same-day re-run regenerates the same ids, and
|
|
518
|
+
* `createPendingTuningCycle` preserves verdicts already recorded against
|
|
519
|
+
* them) surface as a `verdict` attribute on the row, so the re-run session
|
|
520
|
+
* skips already-judged rows instead of re-POSTing them — the route's per-id
|
|
521
|
+
* idempotency would absorb the duplicates, but not the wasted judgment
|
|
522
|
+
* tokens.
|
|
523
|
+
*/
|
|
524
|
+
export function renderTuningRecommendationsBlock(cycle, opts = {}) {
|
|
525
|
+
if (cycle.recommendations.length === 0)
|
|
526
|
+
return null;
|
|
527
|
+
const mode = opts.mode ?? "shadow";
|
|
528
|
+
const out = [];
|
|
529
|
+
out.push(`<tuning_recommendations cycle="${xmlEscape(cycle.cycleId)}" ` +
|
|
530
|
+
`count="${cycle.recommendations.length}" mode="${mode}" ` +
|
|
531
|
+
`verdict_endpoint="POST /api/tuning/verdicts">`);
|
|
532
|
+
for (const rec of cycle.recommendations) {
|
|
533
|
+
const recorded = cycle.verdicts?.[rec.id];
|
|
534
|
+
out.push(` <r id="${xmlEscape(rec.id)}" rule="${rec.rule}" ` +
|
|
535
|
+
`actuator="${rec.actuator}" key="${xmlEscape(rec.key)}" ` +
|
|
536
|
+
`current="${xmlEscape(String(rec.currentValue))}" ` +
|
|
537
|
+
`proposed="${xmlEscape(String(rec.proposedValue))}"` +
|
|
538
|
+
(rec.bounds ? ` bounds="${rec.bounds.min}..${rec.bounds.max}"` : "") +
|
|
539
|
+
(rec.estWeeklySavingUsd > 0
|
|
540
|
+
? ` est_usd_wk="${rec.estWeeklySavingUsd}"`
|
|
541
|
+
: "") +
|
|
542
|
+
(recorded ? ` verdict="${recorded.verdict}"` : "") +
|
|
543
|
+
` evidence="${xmlEscape(rec.evidence)}" />`);
|
|
544
|
+
}
|
|
545
|
+
out.push("</tuning_recommendations>");
|
|
546
|
+
return out.join("\n");
|
|
547
|
+
}
|
|
548
|
+
/**
|
|
549
|
+
* Record verdicts onto a pending cycle, idempotently per id (§3.4): a
|
|
550
|
+
* retried POST with the same verdict is a `duplicate` no-op; a different
|
|
551
|
+
* verdict for an already-verdicted id is a `conflict` — first verdict wins
|
|
552
|
+
* (re-judging a recommendation mid-cycle is not a supported operation).
|
|
553
|
+
* Callers must have validated every id against `cycle.recommendations`
|
|
554
|
+
* first; an unknown id here is a programming error and throws.
|
|
555
|
+
*/
|
|
556
|
+
export function applyVerdictsToCycle(cycle, entries, nowIso) {
|
|
557
|
+
const known = new Set(cycle.recommendations.map((rec) => rec.id));
|
|
558
|
+
const verdicts = { ...cycle.verdicts };
|
|
559
|
+
const results = [];
|
|
560
|
+
for (const entry of entries) {
|
|
561
|
+
if (!known.has(entry.id)) {
|
|
562
|
+
throw new Error(`Unknown recommendation id: ${entry.id}`);
|
|
563
|
+
}
|
|
564
|
+
const existing = verdicts[entry.id];
|
|
565
|
+
if (existing) {
|
|
566
|
+
results.push({
|
|
567
|
+
id: entry.id,
|
|
568
|
+
status: existing.verdict === entry.verdict ? "duplicate" : "conflict",
|
|
569
|
+
});
|
|
570
|
+
continue;
|
|
571
|
+
}
|
|
572
|
+
verdicts[entry.id] = {
|
|
573
|
+
verdict: entry.verdict,
|
|
574
|
+
reason: entry.reason,
|
|
575
|
+
recordedAt: nowIso,
|
|
576
|
+
};
|
|
577
|
+
results.push({ id: entry.id, status: "recorded" });
|
|
578
|
+
}
|
|
579
|
+
return { cycle: { ...cycle, verdicts }, results };
|
|
580
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Self-Tuning Review Cycle — Verify stage / auto-revert monitor
|
|
3
|
+
* (SELF_TUNING_REVIEW_CYCLE_DESIGN.md §3.4, Phase 3).
|
|
4
|
+
*
|
|
5
|
+
* Piggybacks the existing hourly cron tick (scheduler.ts — same
|
|
6
|
+
* fire-and-forget slot as the auth probe; no new scheduled session, P2) and
|
|
7
|
+
* throttles itself to one pass per UTC day via
|
|
8
|
+
* {@link REVERT_MONITOR_STATE_KEY}. Seven days after an applied config
|
|
9
|
+
* change, it recomputes the rule's target metric over the verify window
|
|
10
|
+
* `[applied_at, applied_at + 7d)` and:
|
|
11
|
+
*
|
|
12
|
+
* - **regression past the rule's margin** → revert through the shared
|
|
13
|
+
* {@link revertAppliedTuningChange} (config restored via the
|
|
14
|
+
* `applyConfigUpdates` chokepoint, ledger stamped `reverted_at` — which
|
|
15
|
+
* triggers the 28-day re-proposal cool-down — audit
|
|
16
|
+
* `self_tuning.reverted`, `self_critique` signal so the failure becomes
|
|
17
|
+
* a lesson) and DM the owner;
|
|
18
|
+
* - **no regression** → stamp `verified_at` + audit
|
|
19
|
+
* `self_tuning.verified` so the entry is never re-examined.
|
|
20
|
+
*
|
|
21
|
+
* Per-rule margins (D3/D4 — named constants, deliberately not settings
|
|
22
|
+
* keys):
|
|
23
|
+
* - R1 reverts if daily novelty≥2 observation arrivals fall >30% below
|
|
24
|
+
* the pre-change baseline (stale pre-pass suppressing signal) OR the
|
|
25
|
+
* cautious-escalate tick share rises >10 pt.
|
|
26
|
+
* - R3 reverts if >10% of `stage0_silent` ticks in the window carried
|
|
27
|
+
* `maxNoveltyScore ≥ 2` in their audited snapshot — harm only the
|
|
28
|
+
* raised ceiling can introduce (today's gate never silences novelty≥2).
|
|
29
|
+
* - R5 reverts on the explicit-correction proxy: any negative explicit /
|
|
30
|
+
* self_critique signal citing a lesson within the window.
|
|
31
|
+
*
|
|
32
|
+
* The monitor runs regardless of `selfTuningEnabled`: entries only exist
|
|
33
|
+
* once actuation has run, and a safety rollback must keep working even if
|
|
34
|
+
* the owner turns the loop off afterwards. Only `config`-actuator entries
|
|
35
|
+
* are verified — lesson/schedule entries carry no machine state.
|
|
36
|
+
*/
|
|
37
|
+
import type Database from "better-sqlite3";
|
|
38
|
+
import { type LedgerScanEntry, type RevertDeps } from "./tuning-actuator.js";
|
|
39
|
+
/**
|
|
40
|
+
* Daily-throttle state key. Dot-separated namespace on purpose — the
|
|
41
|
+
* Measure stage's `gatherLedger` scans `self_tuning:%` and must never pick
|
|
42
|
+
* monitor state up as a phantom ledger entry (same rule as the pending
|
|
43
|
+
* cycle key).
|
|
44
|
+
*/
|
|
45
|
+
export declare const REVERT_MONITOR_STATE_KEY = "self_tuning.revert_monitor";
|
|
46
|
+
/** §3.4 — days between apply and the verify pass. */
|
|
47
|
+
export declare const TUNING_VERIFY_WINDOW_DAYS = 7;
|
|
48
|
+
/** D4 — R1 reverts when novelty≥2 arrivals fall >30% below baseline. */
|
|
49
|
+
export declare const R1_NOVELTY_ARRIVALS_MAX_DROP = 0.3;
|
|
50
|
+
/** D4 — R1 reverts when the cautious-escalate share rises >10 pt. */
|
|
51
|
+
export declare const R1_CAUTIOUS_ESCALATE_MAX_RISE = 0.1;
|
|
52
|
+
/** D3 — R3 reverts when >10% of silent ticks carried novelty≥2 snapshots. */
|
|
53
|
+
export declare const R3_SILENT_NOVELTY_GE2_MAX_SHARE = 0.1;
|
|
54
|
+
export type VerifyDecision = {
|
|
55
|
+
action: "wait";
|
|
56
|
+
} | {
|
|
57
|
+
action: "verify";
|
|
58
|
+
result: string;
|
|
59
|
+
} | {
|
|
60
|
+
action: "revert";
|
|
61
|
+
reason: string;
|
|
62
|
+
};
|
|
63
|
+
/**
|
|
64
|
+
* Decide one applied entry's fate. Pure given the DB rows: every margin is
|
|
65
|
+
* compared against telemetry that already exists (D3 — no recomputation of
|
|
66
|
+
* live signals). An entry whose `applied_at` cannot be parsed, or whose
|
|
67
|
+
* rule has no metric, settles as verified with an explanatory result — the
|
|
68
|
+
* conservative direction is "leave the change in place", never "revert
|
|
69
|
+
* without evidence".
|
|
70
|
+
*/
|
|
71
|
+
export declare function evaluateAppliedEntry(db: Database.Database, entry: LedgerScanEntry, now: Date): VerifyDecision;
|
|
72
|
+
export interface RevertMonitorDeps extends RevertDeps {
|
|
73
|
+
/** Owner DM for an auto-revert. Failure-isolated; absence only logs. */
|
|
74
|
+
sendDm?: (message: string) => Promise<void>;
|
|
75
|
+
}
|
|
76
|
+
export interface RevertMonitorRun {
|
|
77
|
+
/** False when the daily throttle short-circuited the pass. */
|
|
78
|
+
ran: boolean;
|
|
79
|
+
reverted: string[];
|
|
80
|
+
verified: string[];
|
|
81
|
+
}
|
|
82
|
+
/** §3.4 — the one-line owner DM for an auto-revert. */
|
|
83
|
+
export declare function buildAutoRevertDmMessage(entry: LedgerScanEntry, reason: string): string;
|
|
84
|
+
/**
|
|
85
|
+
* The cron-tick entry point. Throttled to one pass per UTC day; the state
|
|
86
|
+
* write happens before the scan so a mid-pass failure waits for tomorrow
|
|
87
|
+
* instead of retrying every tick. Each entry is processed in isolation —
|
|
88
|
+
* one broken entry never blocks the rest.
|
|
89
|
+
*/
|
|
90
|
+
export declare function runSelfTuningRevertMonitor(deps: RevertMonitorDeps, now?: Date): Promise<RevertMonitorRun>;
|