@itaila/archetype 0.3.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +475 -0
- package/dist/audit/audit-persona.d.ts +163 -0
- package/dist/audit/audit-persona.d.ts.map +1 -0
- package/dist/audit/audit-persona.js +415 -0
- package/dist/audit/audit-persona.js.map +1 -0
- package/dist/audit/brain-reflection.d.ts +33 -0
- package/dist/audit/brain-reflection.d.ts.map +1 -0
- package/dist/audit/brain-reflection.js +148 -0
- package/dist/audit/brain-reflection.js.map +1 -0
- package/dist/audit/conversation-audit.d.ts +12 -0
- package/dist/audit/conversation-audit.d.ts.map +1 -0
- package/dist/audit/conversation-audit.js +76 -0
- package/dist/audit/conversation-audit.js.map +1 -0
- package/dist/audit/prompt-audit.d.ts +10 -0
- package/dist/audit/prompt-audit.d.ts.map +1 -0
- package/dist/audit/prompt-audit.js +153 -0
- package/dist/audit/prompt-audit.js.map +1 -0
- package/dist/audit/prompt-dump.d.ts +137 -0
- package/dist/audit/prompt-dump.d.ts.map +1 -0
- package/dist/audit/prompt-dump.js +269 -0
- package/dist/audit/prompt-dump.js.map +1 -0
- package/dist/audit/trace-integrity.d.ts +33 -0
- package/dist/audit/trace-integrity.d.ts.map +1 -0
- package/dist/audit/trace-integrity.js +109 -0
- package/dist/audit/trace-integrity.js.map +1 -0
- package/dist/audit/types.d.ts +92 -0
- package/dist/audit/types.d.ts.map +1 -0
- package/dist/audit/types.js +2 -0
- package/dist/audit/types.js.map +1 -0
- package/dist/audit/version.d.ts +14 -0
- package/dist/audit/version.d.ts.map +1 -0
- package/dist/audit/version.js +65 -0
- package/dist/audit/version.js.map +1 -0
- package/dist/brain.d.ts +7 -0
- package/dist/brain.d.ts.map +1 -0
- package/dist/brain.js +83 -0
- package/dist/brain.js.map +1 -0
- package/dist/builder/actions.d.ts +60 -0
- package/dist/builder/actions.d.ts.map +1 -0
- package/dist/builder/actions.js +257 -0
- package/dist/builder/actions.js.map +1 -0
- package/dist/builder/browser.d.ts +140 -0
- package/dist/builder/browser.d.ts.map +1 -0
- package/dist/builder/browser.js +232 -0
- package/dist/builder/browser.js.map +1 -0
- package/dist/builder/executor.d.ts +228 -0
- package/dist/builder/executor.d.ts.map +1 -0
- package/dist/builder/executor.js +1548 -0
- package/dist/builder/executor.js.map +1 -0
- package/dist/builder/index.d.ts +24 -0
- package/dist/builder/index.d.ts.map +1 -0
- package/dist/builder/index.js +24 -0
- package/dist/builder/index.js.map +1 -0
- package/dist/builder/node-test-discovery.d.ts +13 -0
- package/dist/builder/node-test-discovery.d.ts.map +1 -0
- package/dist/builder/node-test-discovery.js +45 -0
- package/dist/builder/node-test-discovery.js.map +1 -0
- package/dist/builder/sandbox.d.ts +172 -0
- package/dist/builder/sandbox.d.ts.map +1 -0
- package/dist/builder/sandbox.js +294 -0
- package/dist/builder/sandbox.js.map +1 -0
- package/dist/builder/workspace-files.d.ts +63 -0
- package/dist/builder/workspace-files.d.ts.map +1 -0
- package/dist/builder/workspace-files.js +190 -0
- package/dist/builder/workspace-files.js.map +1 -0
- package/dist/core/actions.d.ts +55 -0
- package/dist/core/actions.d.ts.map +1 -0
- package/dist/core/actions.js +311 -0
- package/dist/core/actions.js.map +1 -0
- package/dist/core/attachment-notes.d.ts +7 -0
- package/dist/core/attachment-notes.d.ts.map +1 -0
- package/dist/core/attachment-notes.js +38 -0
- package/dist/core/attachment-notes.js.map +1 -0
- package/dist/core/context.d.ts +10 -0
- package/dist/core/context.d.ts.map +1 -0
- package/dist/core/context.js +108 -0
- package/dist/core/context.js.map +1 -0
- package/dist/core/crud-prompt.d.ts +16 -0
- package/dist/core/crud-prompt.d.ts.map +1 -0
- package/dist/core/crud-prompt.js +268 -0
- package/dist/core/crud-prompt.js.map +1 -0
- package/dist/core/crud-schema.d.ts +12 -0
- package/dist/core/crud-schema.d.ts.map +1 -0
- package/dist/core/crud-schema.js +42 -0
- package/dist/core/crud-schema.js.map +1 -0
- package/dist/core/effective-config.d.ts +13 -0
- package/dist/core/effective-config.d.ts.map +1 -0
- package/dist/core/effective-config.js +33 -0
- package/dist/core/effective-config.js.map +1 -0
- package/dist/core/entities.d.ts +82 -0
- package/dist/core/entities.d.ts.map +1 -0
- package/dist/core/entities.js +116 -0
- package/dist/core/entities.js.map +1 -0
- package/dist/core/entity-helpers.d.ts +47 -0
- package/dist/core/entity-helpers.d.ts.map +1 -0
- package/dist/core/entity-helpers.js +122 -0
- package/dist/core/entity-helpers.js.map +1 -0
- package/dist/core/entity-registry.d.ts +47 -0
- package/dist/core/entity-registry.d.ts.map +1 -0
- package/dist/core/entity-registry.js +54 -0
- package/dist/core/entity-registry.js.map +1 -0
- package/dist/core/eq.d.ts +13 -0
- package/dist/core/eq.d.ts.map +1 -0
- package/dist/core/eq.js +41 -0
- package/dist/core/eq.js.map +1 -0
- package/dist/core/focus-context.d.ts +19 -0
- package/dist/core/focus-context.d.ts.map +1 -0
- package/dist/core/focus-context.js +46 -0
- package/dist/core/focus-context.js.map +1 -0
- package/dist/core/focus-mode-actions.d.ts +23 -0
- package/dist/core/focus-mode-actions.d.ts.map +1 -0
- package/dist/core/focus-mode-actions.js +74 -0
- package/dist/core/focus-mode-actions.js.map +1 -0
- package/dist/core/greeting.d.ts +10 -0
- package/dist/core/greeting.d.ts.map +1 -0
- package/dist/core/greeting.js +41 -0
- package/dist/core/greeting.js.map +1 -0
- package/dist/core/identity.d.ts +13 -0
- package/dist/core/identity.d.ts.map +1 -0
- package/dist/core/identity.js +54 -0
- package/dist/core/identity.js.map +1 -0
- package/dist/core/knowledge.d.ts +10 -0
- package/dist/core/knowledge.d.ts.map +1 -0
- package/dist/core/knowledge.js +40 -0
- package/dist/core/knowledge.js.map +1 -0
- package/dist/core/memory-actions.d.ts +38 -0
- package/dist/core/memory-actions.d.ts.map +1 -0
- package/dist/core/memory-actions.js +181 -0
- package/dist/core/memory-actions.js.map +1 -0
- package/dist/core/memory.d.ts +35 -0
- package/dist/core/memory.d.ts.map +1 -0
- package/dist/core/memory.js +168 -0
- package/dist/core/memory.js.map +1 -0
- package/dist/core/peer-actions.d.ts +15 -0
- package/dist/core/peer-actions.d.ts.map +1 -0
- package/dist/core/peer-actions.js +33 -0
- package/dist/core/peer-actions.js.map +1 -0
- package/dist/core/prompt-builder.d.ts +46 -0
- package/dist/core/prompt-builder.d.ts.map +1 -0
- package/dist/core/prompt-builder.js +543 -0
- package/dist/core/prompt-builder.js.map +1 -0
- package/dist/core/prompt-mode.d.ts +3 -0
- package/dist/core/prompt-mode.d.ts.map +1 -0
- package/dist/core/prompt-mode.js +6 -0
- package/dist/core/prompt-mode.js.map +1 -0
- package/dist/core/prompted-turn.d.ts +6 -0
- package/dist/core/prompted-turn.d.ts.map +1 -0
- package/dist/core/prompted-turn.js +48 -0
- package/dist/core/prompted-turn.js.map +1 -0
- package/dist/core/request-builder.d.ts +14 -0
- package/dist/core/request-builder.d.ts.map +1 -0
- package/dist/core/request-builder.js +64 -0
- package/dist/core/request-builder.js.map +1 -0
- package/dist/core/session-routing.d.ts +23 -0
- package/dist/core/session-routing.d.ts.map +1 -0
- package/dist/core/session-routing.js +59 -0
- package/dist/core/session-routing.js.map +1 -0
- package/dist/core/voice.d.ts +6 -0
- package/dist/core/voice.d.ts.map +1 -0
- package/dist/core/voice.js +30 -0
- package/dist/core/voice.js.map +1 -0
- package/dist/engine/chat.d.ts +45 -0
- package/dist/engine/chat.d.ts.map +1 -0
- package/dist/engine/chat.js +308 -0
- package/dist/engine/chat.js.map +1 -0
- package/dist/engine/continuity.d.ts +107 -0
- package/dist/engine/continuity.d.ts.map +1 -0
- package/dist/engine/continuity.js +320 -0
- package/dist/engine/continuity.js.map +1 -0
- package/dist/engine/crud.d.ts +62 -0
- package/dist/engine/crud.d.ts.map +1 -0
- package/dist/engine/crud.js +260 -0
- package/dist/engine/crud.js.map +1 -0
- package/dist/engine/side-effects.d.ts +93 -0
- package/dist/engine/side-effects.d.ts.map +1 -0
- package/dist/engine/side-effects.js +271 -0
- package/dist/engine/side-effects.js.map +1 -0
- package/dist/engine/staging.d.ts +29 -0
- package/dist/engine/staging.d.ts.map +1 -0
- package/dist/engine/staging.js +159 -0
- package/dist/engine/staging.js.map +1 -0
- package/dist/engine/working-set.d.ts +18 -0
- package/dist/engine/working-set.d.ts.map +1 -0
- package/dist/engine/working-set.js +246 -0
- package/dist/engine/working-set.js.map +1 -0
- package/dist/evals/action-contracts.d.ts +40 -0
- package/dist/evals/action-contracts.d.ts.map +1 -0
- package/dist/evals/action-contracts.js +208 -0
- package/dist/evals/action-contracts.js.map +1 -0
- package/dist/evals/brain-bloat.d.ts +39 -0
- package/dist/evals/brain-bloat.d.ts.map +1 -0
- package/dist/evals/brain-bloat.js +167 -0
- package/dist/evals/brain-bloat.js.map +1 -0
- package/dist/evals/brain-prescriptions.d.ts +30 -0
- package/dist/evals/brain-prescriptions.d.ts.map +1 -0
- package/dist/evals/brain-prescriptions.js +148 -0
- package/dist/evals/brain-prescriptions.js.map +1 -0
- package/dist/evals/cross-layer-duplicates.d.ts +49 -0
- package/dist/evals/cross-layer-duplicates.d.ts.map +1 -0
- package/dist/evals/cross-layer-duplicates.js +289 -0
- package/dist/evals/cross-layer-duplicates.js.map +1 -0
- package/dist/evals/entity-visibility.d.ts +28 -0
- package/dist/evals/entity-visibility.d.ts.map +1 -0
- package/dist/evals/entity-visibility.js +216 -0
- package/dist/evals/entity-visibility.js.map +1 -0
- package/dist/evals/index.d.ts +19 -0
- package/dist/evals/index.d.ts.map +1 -0
- package/dist/evals/index.js +11 -0
- package/dist/evals/index.js.map +1 -0
- package/dist/evals/judge.d.ts +22 -0
- package/dist/evals/judge.d.ts.map +1 -0
- package/dist/evals/judge.js +337 -0
- package/dist/evals/judge.js.map +1 -0
- package/dist/evals/operational-contract.d.ts +40 -0
- package/dist/evals/operational-contract.d.ts.map +1 -0
- package/dist/evals/operational-contract.js +115 -0
- package/dist/evals/operational-contract.js.map +1 -0
- package/dist/evals/prompt-content.d.ts +14 -0
- package/dist/evals/prompt-content.d.ts.map +1 -0
- package/dist/evals/prompt-content.js +104 -0
- package/dist/evals/prompt-content.js.map +1 -0
- package/dist/evals/runtime.d.ts +4 -0
- package/dist/evals/runtime.d.ts.map +1 -0
- package/dist/evals/runtime.js +197 -0
- package/dist/evals/runtime.js.map +1 -0
- package/dist/evals/sample-projects.d.ts +143 -0
- package/dist/evals/sample-projects.d.ts.map +1 -0
- package/dist/evals/sample-projects.js +644 -0
- package/dist/evals/sample-projects.js.map +1 -0
- package/dist/evals/types.d.ts +88 -0
- package/dist/evals/types.d.ts.map +1 -0
- package/dist/evals/types.js +2 -0
- package/dist/evals/types.js.map +1 -0
- package/dist/foundation/index.d.ts +158 -0
- package/dist/foundation/index.d.ts.map +1 -0
- package/dist/foundation/index.js +256 -0
- package/dist/foundation/index.js.map +1 -0
- package/dist/index.d.ts +223 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +998 -0
- package/dist/index.js.map +1 -0
- package/dist/managed/autonomous-loop.d.ts +199 -0
- package/dist/managed/autonomous-loop.d.ts.map +1 -0
- package/dist/managed/autonomous-loop.js +451 -0
- package/dist/managed/autonomous-loop.js.map +1 -0
- package/dist/managed/conversation.d.ts +20 -0
- package/dist/managed/conversation.d.ts.map +1 -0
- package/dist/managed/conversation.js +40 -0
- package/dist/managed/conversation.js.map +1 -0
- package/dist/managed/knowledge.d.ts +7 -0
- package/dist/managed/knowledge.d.ts.map +1 -0
- package/dist/managed/knowledge.js +174 -0
- package/dist/managed/knowledge.js.map +1 -0
- package/dist/managed/memory-manager.d.ts +7 -0
- package/dist/managed/memory-manager.d.ts.map +1 -0
- package/dist/managed/memory-manager.js +18 -0
- package/dist/managed/memory-manager.js.map +1 -0
- package/dist/managed/memory-review.d.ts +45 -0
- package/dist/managed/memory-review.d.ts.map +1 -0
- package/dist/managed/memory-review.js +130 -0
- package/dist/managed/memory-review.js.map +1 -0
- package/dist/managed/storage.d.ts +2 -0
- package/dist/managed/storage.d.ts.map +1 -0
- package/dist/managed/storage.js +2 -0
- package/dist/managed/storage.js.map +1 -0
- package/dist/managed/work-history.d.ts +23 -0
- package/dist/managed/work-history.d.ts.map +1 -0
- package/dist/managed/work-history.js +31 -0
- package/dist/managed/work-history.js.map +1 -0
- package/dist/observability/index.d.ts +15 -0
- package/dist/observability/index.d.ts.map +1 -0
- package/dist/observability/index.js +15 -0
- package/dist/observability/index.js.map +1 -0
- package/dist/observability/render-run-markdown.d.ts +90 -0
- package/dist/observability/render-run-markdown.d.ts.map +1 -0
- package/dist/observability/render-run-markdown.js +231 -0
- package/dist/observability/render-run-markdown.js.map +1 -0
- package/dist/observability/turn-reporter.d.ts +20 -0
- package/dist/observability/turn-reporter.d.ts.map +1 -0
- package/dist/observability/turn-reporter.js +106 -0
- package/dist/observability/turn-reporter.js.map +1 -0
- package/dist/persona.d.ts +49 -0
- package/dist/persona.d.ts.map +1 -0
- package/dist/persona.js +287 -0
- package/dist/persona.js.map +1 -0
- package/dist/playbook/defaults.d.ts +25 -0
- package/dist/playbook/defaults.d.ts.map +1 -0
- package/dist/playbook/defaults.js +108 -0
- package/dist/playbook/defaults.js.map +1 -0
- package/dist/playbook/invariants.d.ts +244 -0
- package/dist/playbook/invariants.d.ts.map +1 -0
- package/dist/playbook/invariants.js +259 -0
- package/dist/playbook/invariants.js.map +1 -0
- package/dist/playbook/templates.d.ts +7 -0
- package/dist/playbook/templates.d.ts.map +1 -0
- package/dist/playbook/templates.js +437 -0
- package/dist/playbook/templates.js.map +1 -0
- package/dist/providers/gemini.d.ts +73 -0
- package/dist/providers/gemini.d.ts.map +1 -0
- package/dist/providers/gemini.js +536 -0
- package/dist/providers/gemini.js.map +1 -0
- package/dist/providers/types.d.ts +2 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/providers/zod-to-gemini.d.ts +8 -0
- package/dist/providers/zod-to-gemini.d.ts.map +1 -0
- package/dist/providers/zod-to-gemini.js +148 -0
- package/dist/providers/zod-to-gemini.js.map +1 -0
- package/dist/samples/pm-spec-agent.d.ts +22 -0
- package/dist/samples/pm-spec-agent.d.ts.map +1 -0
- package/dist/samples/pm-spec-agent.js +53 -0
- package/dist/samples/pm-spec-agent.js.map +1 -0
- package/dist/types.d.ts +920 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +68 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Load-bearing invariants — the audit stack's counterweight.
|
|
3
|
+
*
|
|
4
|
+
* The other audits (auditBrainBloat, auditBrainPrescriptions,
|
|
5
|
+
* auditCrossLayerDuplicates, auditActionContracts) all push one way:
|
|
6
|
+
* PRUNE PRESCRIPTIVE CONTENT. That's the right pressure for 99% of what
|
|
7
|
+
* a developer writes. But a small number of prompt sentences look exactly
|
|
8
|
+
* like prescriptions — abstract, prose-y, not naming a specific mechanic —
|
|
9
|
+
* while actually defusing the default prior an LLM picks up from the
|
|
10
|
+
* surrounding context. When those sentences are pruned, the audits don't
|
|
11
|
+
* complain and the regression only shows up in a live scenario weeks later.
|
|
12
|
+
*
|
|
13
|
+
* This file gives each such sentence a NAME, a WHY, and a CANARY scenario.
|
|
14
|
+
* The companion test (tests/load-bearing-invariants.test.ts) asserts each
|
|
15
|
+
* invariant still appears in the assembled prompt of a foundation config.
|
|
16
|
+
* If a future refactor removes one, the test fails with the doc comment
|
|
17
|
+
* pointing at the failure mode the line was written to prevent.
|
|
18
|
+
*
|
|
19
|
+
* Adding a new invariant: add the named constant below, include a
|
|
20
|
+
* ==> WHY, ==> CANARY, and ==> VERIFY section in the doc comment, and
|
|
21
|
+
* add a `describe` block to the test.
|
|
22
|
+
*
|
|
23
|
+
* Removing an invariant: don't. If you believe one is no longer needed,
|
|
24
|
+
* follow ==> VERIFY to prove the canary scenario still passes without it,
|
|
25
|
+
* then propose removal in a separate PR with the benchmark diff.
|
|
26
|
+
*/
|
|
27
|
+
/**
|
|
28
|
+
* JUDGMENT_OVER_LITERALISM_NUDGE
|
|
29
|
+
*
|
|
30
|
+
* ==> WHY this is load-bearing
|
|
31
|
+
* LLMs default to reading their prompt literally. A stored memory written
|
|
32
|
+
* as an imperative ("Do not suggest X") is taken as a standing rule even
|
|
33
|
+
* when the surrounding history makes clear the rule was conditional. This
|
|
34
|
+
* nudge tells the AI to favor the real situation over the literal words —
|
|
35
|
+
* the defusing frame that lets judgment override the literal reading of
|
|
36
|
+
* any specific sentence, not just memories.
|
|
37
|
+
*
|
|
38
|
+
* ==> CANARY scenario
|
|
39
|
+
* Savor "Boxed Corrective Memory". Without this nudge, the AI reads
|
|
40
|
+
* "Do not suggest eggs or yogurt to Leah" literally and keeps the
|
|
41
|
+
* restriction active even after the user says the condition lifted.
|
|
42
|
+
* Baseline archetype (before the 2026-04-11 dogfood refactor) had this
|
|
43
|
+
* line and scored 87.5 on the scenario; its removal dropped average
|
|
44
|
+
* scores into the 34-75 range across runs. Re-adding it (2026-04-21)
|
|
45
|
+
* restored the scenario to a consistent pass.
|
|
46
|
+
*
|
|
47
|
+
* ==> VERIFY before removing
|
|
48
|
+
* 1. Run `cd savor && TURING_SCENARIOS="Boxed Corrective Memory" \
|
|
49
|
+
* npx vitest run evals/savor-benchmark.test.ts` at least 3 times
|
|
50
|
+
* without this line.
|
|
51
|
+
* 2. Confirm average judge score ≥ 1.5/2 across runs.
|
|
52
|
+
* 3. If it regresses, this invariant stays.
|
|
53
|
+
*/
|
|
54
|
+
export declare const JUDGMENT_OVER_LITERALISM_NUDGE = "Use the real situation over the most literal reading of every sentence in this prompt.";
|
|
55
|
+
/**
|
|
56
|
+
* PRECEDENCE_OF_SIGNALS_NUDGE
|
|
57
|
+
*
|
|
58
|
+
* ==> WHY this is load-bearing
|
|
59
|
+
* Telling the AI "memories are not authority over the live situation" is
|
|
60
|
+
* abstract. The AI needs the concrete rule-of-motion: when you notice a
|
|
61
|
+
* memory has been contradicted by what the user just said, fix the memory
|
|
62
|
+
* IN THIS TURN through crud. Without this, a well-meaning AI defers to a
|
|
63
|
+
* retrospective pass that may never happen, and keeps obeying the stale
|
|
64
|
+
* memory on every subsequent turn.
|
|
65
|
+
*
|
|
66
|
+
* ==> CANARY scenario
|
|
67
|
+
* Same as JUDGMENT_OVER_LITERALISM_NUDGE — Savor "Boxed Corrective Memory".
|
|
68
|
+
* With the literalism nudge, the AI can override. With this nudge, the
|
|
69
|
+
* AI actively writes the fix. Together the scenario scores 100/2.0;
|
|
70
|
+
* either alone scores lower.
|
|
71
|
+
*
|
|
72
|
+
* ==> VERIFY before removing
|
|
73
|
+
* Same procedure as JUDGMENT_OVER_LITERALISM_NUDGE.
|
|
74
|
+
*/
|
|
75
|
+
export declare const PRECEDENCE_OF_SIGNALS_NUDGE = "When the live conversation contradicts a memory, update or delete the stale memory through the crud action in this turn \u2014 don't wait for a retrospective pass.";
|
|
76
|
+
/**
|
|
77
|
+
* MEMORY_SELF_BOX_WARNING
|
|
78
|
+
*
|
|
79
|
+
* ==> WHY this is load-bearing
|
|
80
|
+
* Stored memories that lost their conditional context (a common output
|
|
81
|
+
* of memory extraction) read to the LLM as absolute rules. This warning
|
|
82
|
+
* reframes a stored memory as a working hypothesis, tells the AI to
|
|
83
|
+
* check whether the condition that made it true still holds, and tells
|
|
84
|
+
* the writer side to capture the why so future-you can tell a
|
|
85
|
+
* still-relevant insight from an expired correction.
|
|
86
|
+
*
|
|
87
|
+
* ==> CANARY scenario
|
|
88
|
+
* Savor "Boxed Corrective Memory" — same scenario. Third required nudge
|
|
89
|
+
* alongside literalism + precedence; together they produce 100/2.0.
|
|
90
|
+
*
|
|
91
|
+
* ==> VERIFY before removing
|
|
92
|
+
* Same procedure. Canary is Boxed Corrective Memory. A weaker version
|
|
93
|
+
* that only tells the AI "check the condition still holds" may be
|
|
94
|
+
* enough — but removing the whole nudge regressed the scenario on all
|
|
95
|
+
* three earlier tests.
|
|
96
|
+
*/
|
|
97
|
+
export declare const MEMORY_SELF_BOX_WARNING = "Every stored memory comes from a specific moment. When reading one, check whether the situation that made it true still holds. When writing one, capture the situation that produced it \u2014 a correction saved without its why calcifies into a rule divorced from reality.";
|
|
98
|
+
/**
|
|
99
|
+
* CONTEXTHINT_CAPTURES_THE_WHY
|
|
100
|
+
*
|
|
101
|
+
* ==> WHY this is load-bearing
|
|
102
|
+
* The `contextHint` field exists on Memory but its describe() framed it
|
|
103
|
+
* as "an optional interpretability hint." For corrections and
|
|
104
|
+
* situation-bound instructions, that framing is wrong — the field is
|
|
105
|
+
* how you avoid producing a rule divorced from its reason. This nudge
|
|
106
|
+
* makes contextHint non-optional for memories that are corrections.
|
|
107
|
+
*
|
|
108
|
+
* ==> CANARY scenario
|
|
109
|
+
* Any scenario that stores a correction. In Boxed Corrective Memory
|
|
110
|
+
* the fixture intentionally mis-extracts — storing "Do not suggest
|
|
111
|
+
* eggs or yogurt to Leah" with no contextHint — to simulate the
|
|
112
|
+
* failure mode this nudge guards against.
|
|
113
|
+
*
|
|
114
|
+
* ==> VERIFY before removing
|
|
115
|
+
* Sample memories written during situation-bound turns. Without this
|
|
116
|
+
* nudge, non-empty contextHint rate drops to single digits. Stays
|
|
117
|
+
* if that regression shows up.
|
|
118
|
+
*/
|
|
119
|
+
export declare const CONTEXTHINT_CAPTURES_THE_WHY = "contextHint: the situation that produced the memory. For any correction or instruction tied to a moment (illness, travel, a specific week), contextHint is not optional \u2014 without it, the memory becomes a rule divorced from its reason.";
|
|
120
|
+
/**
|
|
121
|
+
* MATCH_MESSAGE_TO_ACTIONS_NUDGE
|
|
122
|
+
*
|
|
123
|
+
* ==> WHY this is load-bearing
|
|
124
|
+
* LLMs naturally narrate actions in conversational prose ("I've logged
|
|
125
|
+
* your smoothie") even when they don't fire the corresponding structured
|
|
126
|
+
* action. This produces hallucinated-action failures: the judge sees the
|
|
127
|
+
* narration in message + no corresponding CRUD action in the same turn,
|
|
128
|
+
* scores action-fidelity 0, and the next turn inherits a message-vs-state
|
|
129
|
+
* contradiction the AI tries to reconcile by re-performing the action —
|
|
130
|
+
* creating duplicates.
|
|
131
|
+
*
|
|
132
|
+
* ==> CANARY scenario
|
|
133
|
+
* Savor "Memory Recall & Continuity" T2 and T3 (2026-04-21). T2 AI:
|
|
134
|
+
* "I've added a draft for the smoothie to your log" — no CRUD action
|
|
135
|
+
* fired. T3 AI: "I've also confirmed your smoothie from earlier" —
|
|
136
|
+
* also no CRUD. Judge penalizes both on action-fidelity.
|
|
137
|
+
*
|
|
138
|
+
* ==> VERIFY before removing
|
|
139
|
+
* Run Memory Recall & Continuity 3x. Count turns where the AI says
|
|
140
|
+
* "I've [verb]ed" in the message. Count turns where a crud action
|
|
141
|
+
* was emitted. They should match.
|
|
142
|
+
*/
|
|
143
|
+
export declare const MATCH_MESSAGE_TO_ACTIONS_NUDGE = "Match your words to your actions. If your message narrates a change (\"I've logged\u2026\", \"I've added\u2026\", \"I've updated\u2026\", \"I've saved\u2026\"), the corresponding action or crud call must also appear in this turn's output. Don't describe actions you aren't taking \u2014 silent narration is how past-tense promises become hallucinations future turns try to reconcile.";
|
|
144
|
+
/**
|
|
145
|
+
* ACTION_RESULTS_ARE_WORLD_STATE_NUDGE
|
|
146
|
+
*
|
|
147
|
+
* ==> WHY this is load-bearing
|
|
148
|
+
* In custom loops and multi-persona sessions, the current turn may be an
|
|
149
|
+
* app-initiated continuation after a tool/action result. The intended shape is
|
|
150
|
+
* a single chronological work stream: the persona's narration/inner voice,
|
|
151
|
+
* compact natural-language action narration, and the factual outcome. Raw
|
|
152
|
+
* action params must not be dumped back into the model-visible stream: they
|
|
153
|
+
* can bloat or contaminate continuity, preserve obsolete action APIs, and make
|
|
154
|
+
* the feed look like executable instructions rather than history. If a future
|
|
155
|
+
* refactor hides the narration, splits outcomes into a different surface, raw-
|
|
156
|
+
* dumps attempted actions, or fails to state that successful outcomes already
|
|
157
|
+
* changed the world, the AI reasonably reads a broken world picture and may
|
|
158
|
+
* repeat, undo, or distrust completed work. This is a continuity bug, not a
|
|
159
|
+
* capability issue.
|
|
160
|
+
*
|
|
161
|
+
* ==> CANARY scenario
|
|
162
|
+
* Foundry PM-builder Clockwork Courier smoke run after expert-peer handoff
|
|
163
|
+
* wording (2026-04-29). The PM repeatedly called readFile even though the
|
|
164
|
+
* full brief result was visible in history, because the outcome contract did
|
|
165
|
+
* not explicitly state that visible results are current world state.
|
|
166
|
+
*
|
|
167
|
+
* ==> VERIFY before removing
|
|
168
|
+
* Run a same-actor app-initiated turn after a readFile/listFiles action.
|
|
169
|
+
* The next action should use the visible result, not repeat the same read,
|
|
170
|
+
* unless it says it needs a fresh value or exact contents have decayed.
|
|
171
|
+
*/
|
|
172
|
+
export declare const ACTION_RESULTS_ARE_WORLD_STATE_NUDGE = "When an action result appears in your current prompt, that action already happened. Use the result as current world state; rerun the action only when you need a fresh value or exact contents are no longer carried. Inner narration and action outcomes are one work stream: what you said or intended, compact action narration, then what actually happened. Read them together in order; the outcome is the factual state. Action narration is not a raw action dump; raw parameters are omitted because they can bloat or contaminate continuity and may reference action APIs no longer available.";
|
|
173
|
+
/**
|
|
174
|
+
* EXPERT_AUTONOMY_NUDGE
|
|
175
|
+
*
|
|
176
|
+
* ==> WHY this is load-bearing
|
|
177
|
+
* Archetype's core stance is that each persona is a capable expert, not a
|
|
178
|
+
* mediocre worker to be managed by rules. Boxing can happen in two directions:
|
|
179
|
+
* a persona can prescribe another expert's method, or it can shrink its own
|
|
180
|
+
* judgment into a checklist/work item and stop owning the work. The fix is a
|
|
181
|
+
* clearer world picture and expert-owned judgment, not more behavioral rules.
|
|
182
|
+
*
|
|
183
|
+
* ==> CANARY scenario
|
|
184
|
+
* Foundry PM-builder Clockwork Courier comparison (2026-04-29). The PM read
|
|
185
|
+
* the source brief but produced a spec/handoff that preserved the checklist
|
|
186
|
+
* while weakening the "shifting city / route risk" product hook. The builder
|
|
187
|
+
* then wrote a focus work item around "project structure, core loop, map
|
|
188
|
+
* rendering, basic movement" and shipped a functional but shallow prototype.
|
|
189
|
+
*
|
|
190
|
+
* ==> VERIFY before removing
|
|
191
|
+
* 1. Run the PM-builder journey with prompt dumps enabled.
|
|
192
|
+
* 2. Inspect the PM return-to-session message and the builder's first focus
|
|
193
|
+
* work item. They should preserve intent, context, constraints, evidence,
|
|
194
|
+
* and success picture without prescribing another expert's implementation
|
|
195
|
+
* sequence unless that sequence came from source material. The builder's
|
|
196
|
+
* work item should anchor judgment, not replace it with a checklist.
|
|
197
|
+
* 3. Compare against a single-builder/subagent baseline on the same brief.
|
|
198
|
+
*/
|
|
199
|
+
export declare const EXPERT_AUTONOMY_NUDGE = "Treat yourself and every participant as an expert owner of their field. Share intent, context, constraints, evidence, and what great should feel like; let each expert own method, sequencing, tools, and implementation approach unless those details are real source facts or constraints. Short-lived work items anchor judgment; they do not replace it with a checklist.";
|
|
200
|
+
export declare const LOAD_BEARING_INVARIANTS: readonly [{
|
|
201
|
+
readonly id: "judgment-over-literalism";
|
|
202
|
+
readonly constant: "JUDGMENT_OVER_LITERALISM_NUDGE";
|
|
203
|
+
readonly text: "Use the real situation over the most literal reading of every sentence in this prompt.";
|
|
204
|
+
readonly keyConcepts: readonly ["real situation", "literal reading"];
|
|
205
|
+
readonly sourceSection: "CONVERSATION_REALITY";
|
|
206
|
+
}, {
|
|
207
|
+
readonly id: "precedence-of-signals";
|
|
208
|
+
readonly constant: "PRECEDENCE_OF_SIGNALS_NUDGE";
|
|
209
|
+
readonly text: "When the live conversation contradicts a memory, update or delete the stale memory through the crud action in this turn — don't wait for a retrospective pass.";
|
|
210
|
+
readonly keyConcepts: readonly ["contradicts a memory", "update or delete", "in this turn"];
|
|
211
|
+
readonly sourceSection: "CONVERSATION_REALITY";
|
|
212
|
+
}, {
|
|
213
|
+
readonly id: "match-message-to-actions";
|
|
214
|
+
readonly constant: "MATCH_MESSAGE_TO_ACTIONS_NUDGE";
|
|
215
|
+
readonly text: "Match your words to your actions. If your message narrates a change (\"I've logged…\", \"I've added…\", \"I've updated…\", \"I've saved…\"), the corresponding action or crud call must also appear in this turn's output. Don't describe actions you aren't taking — silent narration is how past-tense promises become hallucinations future turns try to reconcile.";
|
|
216
|
+
readonly keyConcepts: readonly ["match your words", "don't describe actions you aren't taking"];
|
|
217
|
+
readonly sourceSection: "OUTCOME_NOTES_INSTRUCTION";
|
|
218
|
+
}, {
|
|
219
|
+
readonly id: "action-results-are-world-state";
|
|
220
|
+
readonly constant: "ACTION_RESULTS_ARE_WORLD_STATE_NUDGE";
|
|
221
|
+
readonly text: "When an action result appears in your current prompt, that action already happened. Use the result as current world state; rerun the action only when you need a fresh value or exact contents are no longer carried. Inner narration and action outcomes are one work stream: what you said or intended, compact action narration, then what actually happened. Read them together in order; the outcome is the factual state. Action narration is not a raw action dump; raw parameters are omitted because they can bloat or contaminate continuity and may reference action APIs no longer available.";
|
|
222
|
+
readonly keyConcepts: readonly ["action already happened", "current world state", "one work stream", "raw action dump", "contaminate continuity"];
|
|
223
|
+
readonly sourceSection: "OUTCOME_NOTES_INSTRUCTION";
|
|
224
|
+
}, {
|
|
225
|
+
readonly id: "memory-self-box";
|
|
226
|
+
readonly constant: "MEMORY_SELF_BOX_WARNING";
|
|
227
|
+
readonly text: "Every stored memory comes from a specific moment. When reading one, check whether the situation that made it true still holds. When writing one, capture the situation that produced it — a correction saved without its why calcifies into a rule divorced from reality.";
|
|
228
|
+
readonly keyConcepts: readonly ["situation that made it true", "capture the situation", "calcifies"];
|
|
229
|
+
readonly sourceSection: "MEMORY_ENTITY_RULES";
|
|
230
|
+
}, {
|
|
231
|
+
readonly id: "contexthint-captures-why";
|
|
232
|
+
readonly constant: "CONTEXTHINT_CAPTURES_THE_WHY";
|
|
233
|
+
readonly text: "contextHint: the situation that produced the memory. For any correction or instruction tied to a moment (illness, travel, a specific week), contextHint is not optional — without it, the memory becomes a rule divorced from its reason.";
|
|
234
|
+
readonly keyConcepts: readonly ["contextHint", "situation that produced", "not optional"];
|
|
235
|
+
readonly sourceSection: "MEMORY_METADATA_GUIDANCE";
|
|
236
|
+
}, {
|
|
237
|
+
readonly id: "expert-autonomy";
|
|
238
|
+
readonly constant: "EXPERT_AUTONOMY_NUDGE";
|
|
239
|
+
readonly text: "Treat yourself and every participant as an expert owner of their field. Share intent, context, constraints, evidence, and what great should feel like; let each expert own method, sequencing, tools, and implementation approach unless those details are real source facts or constraints. Short-lived work items anchor judgment; they do not replace it with a checklist.";
|
|
240
|
+
readonly keyConcepts: readonly ["expert owner", "intent", "own method", "work items anchor judgment"];
|
|
241
|
+
readonly sourceSection: "EXPERT_AUTONOMY";
|
|
242
|
+
}];
|
|
243
|
+
export type LoadBearingInvariant = typeof LOAD_BEARING_INVARIANTS[number];
|
|
244
|
+
//# sourceMappingURL=invariants.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"invariants.d.ts","sourceRoot":"","sources":["../../src/playbook/invariants.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAGH;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,eAAO,MAAM,8BAA8B,2FAA2F,CAAA;AAGtI;;;;;;;;;;;;;;;;;;;GAmBG;AACH,eAAO,MAAM,2BAA2B,wKAAmK,CAAA;AAG3M;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,uBAAuB,mRAA8Q,CAAA;AAGlT;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,4BAA4B,mPAA8O,CAAA;AAGvR;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,eAAO,MAAM,8BAA8B,oYAAmW,CAAA;AAG9Y;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,eAAO,MAAM,oCAAoC,8kBAA8kB,CAAA;AAG/nB;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,eAAO,MAAM,qBAAqB,kXAAkX,CAAA;AAIpZ,eAAO,MAAM,uBAAuB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkD1B,CAAA;AAEV,MAAM,MAAM,oBAAoB,GAAG,OAAO,uBAAuB,CAAC,MAAM,CAAC,CAAA"}
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Load-bearing invariants — the audit stack's counterweight.
|
|
3
|
+
*
|
|
4
|
+
* The other audits (auditBrainBloat, auditBrainPrescriptions,
|
|
5
|
+
* auditCrossLayerDuplicates, auditActionContracts) all push one way:
|
|
6
|
+
* PRUNE PRESCRIPTIVE CONTENT. That's the right pressure for 99% of what
|
|
7
|
+
* a developer writes. But a small number of prompt sentences look exactly
|
|
8
|
+
* like prescriptions — abstract, prose-y, not naming a specific mechanic —
|
|
9
|
+
* while actually defusing the default prior an LLM picks up from the
|
|
10
|
+
* surrounding context. When those sentences are pruned, the audits don't
|
|
11
|
+
* complain and the regression only shows up in a live scenario weeks later.
|
|
12
|
+
*
|
|
13
|
+
* This file gives each such sentence a NAME, a WHY, and a CANARY scenario.
|
|
14
|
+
* The companion test (tests/load-bearing-invariants.test.ts) asserts each
|
|
15
|
+
* invariant still appears in the assembled prompt of a foundation config.
|
|
16
|
+
* If a future refactor removes one, the test fails with the doc comment
|
|
17
|
+
* pointing at the failure mode the line was written to prevent.
|
|
18
|
+
*
|
|
19
|
+
* Adding a new invariant: add the named constant below, include a
|
|
20
|
+
* ==> WHY, ==> CANARY, and ==> VERIFY section in the doc comment, and
|
|
21
|
+
* add a `describe` block to the test.
|
|
22
|
+
*
|
|
23
|
+
* Removing an invariant: don't. If you believe one is no longer needed,
|
|
24
|
+
* follow ==> VERIFY to prove the canary scenario still passes without it,
|
|
25
|
+
* then propose removal in a separate PR with the benchmark diff.
|
|
26
|
+
*/
|
|
27
|
+
// ─── Invariant 1: judgment over literalism ──────────────────────────────────
|
|
28
|
+
/**
|
|
29
|
+
* JUDGMENT_OVER_LITERALISM_NUDGE
|
|
30
|
+
*
|
|
31
|
+
* ==> WHY this is load-bearing
|
|
32
|
+
* LLMs default to reading their prompt literally. A stored memory written
|
|
33
|
+
* as an imperative ("Do not suggest X") is taken as a standing rule even
|
|
34
|
+
* when the surrounding history makes clear the rule was conditional. This
|
|
35
|
+
* nudge tells the AI to favor the real situation over the literal words —
|
|
36
|
+
* the defusing frame that lets judgment override the literal reading of
|
|
37
|
+
* any specific sentence, not just memories.
|
|
38
|
+
*
|
|
39
|
+
* ==> CANARY scenario
|
|
40
|
+
* Savor "Boxed Corrective Memory". Without this nudge, the AI reads
|
|
41
|
+
* "Do not suggest eggs or yogurt to Leah" literally and keeps the
|
|
42
|
+
* restriction active even after the user says the condition lifted.
|
|
43
|
+
* Baseline archetype (before the 2026-04-11 dogfood refactor) had this
|
|
44
|
+
* line and scored 87.5 on the scenario; its removal dropped average
|
|
45
|
+
* scores into the 34-75 range across runs. Re-adding it (2026-04-21)
|
|
46
|
+
* restored the scenario to a consistent pass.
|
|
47
|
+
*
|
|
48
|
+
* ==> VERIFY before removing
|
|
49
|
+
* 1. Run `cd savor && TURING_SCENARIOS="Boxed Corrective Memory" \
|
|
50
|
+
* npx vitest run evals/savor-benchmark.test.ts` at least 3 times
|
|
51
|
+
* without this line.
|
|
52
|
+
* 2. Confirm average judge score ≥ 1.5/2 across runs.
|
|
53
|
+
* 3. If it regresses, this invariant stays.
|
|
54
|
+
*/
|
|
55
|
+
export const JUDGMENT_OVER_LITERALISM_NUDGE = `Use the real situation over the most literal reading of every sentence in this prompt.`;
|
|
56
|
+
// ─── Invariant 2: fix stale memory in the live turn ─────────────────────────
|
|
57
|
+
/**
|
|
58
|
+
* PRECEDENCE_OF_SIGNALS_NUDGE
|
|
59
|
+
*
|
|
60
|
+
* ==> WHY this is load-bearing
|
|
61
|
+
* Telling the AI "memories are not authority over the live situation" is
|
|
62
|
+
* abstract. The AI needs the concrete rule-of-motion: when you notice a
|
|
63
|
+
* memory has been contradicted by what the user just said, fix the memory
|
|
64
|
+
* IN THIS TURN through crud. Without this, a well-meaning AI defers to a
|
|
65
|
+
* retrospective pass that may never happen, and keeps obeying the stale
|
|
66
|
+
* memory on every subsequent turn.
|
|
67
|
+
*
|
|
68
|
+
* ==> CANARY scenario
|
|
69
|
+
* Same as JUDGMENT_OVER_LITERALISM_NUDGE — Savor "Boxed Corrective Memory".
|
|
70
|
+
* With the literalism nudge, the AI can override. With this nudge, the
|
|
71
|
+
* AI actively writes the fix. Together the scenario scores 100/2.0;
|
|
72
|
+
* either alone scores lower.
|
|
73
|
+
*
|
|
74
|
+
* ==> VERIFY before removing
|
|
75
|
+
* Same procedure as JUDGMENT_OVER_LITERALISM_NUDGE.
|
|
76
|
+
*/
|
|
77
|
+
export const PRECEDENCE_OF_SIGNALS_NUDGE = `When the live conversation contradicts a memory, update or delete the stale memory through the crud action in this turn — don't wait for a retrospective pass.`;
|
|
78
|
+
// ─── Invariant 3: memory is a working hypothesis ────────────────────────────
|
|
79
|
+
/**
|
|
80
|
+
* MEMORY_SELF_BOX_WARNING
|
|
81
|
+
*
|
|
82
|
+
* ==> WHY this is load-bearing
|
|
83
|
+
* Stored memories that lost their conditional context (a common output
|
|
84
|
+
* of memory extraction) read to the LLM as absolute rules. This warning
|
|
85
|
+
* reframes a stored memory as a working hypothesis, tells the AI to
|
|
86
|
+
* check whether the condition that made it true still holds, and tells
|
|
87
|
+
* the writer side to capture the why so future-you can tell a
|
|
88
|
+
* still-relevant insight from an expired correction.
|
|
89
|
+
*
|
|
90
|
+
* ==> CANARY scenario
|
|
91
|
+
* Savor "Boxed Corrective Memory" — same scenario. Third required nudge
|
|
92
|
+
* alongside literalism + precedence; together they produce 100/2.0.
|
|
93
|
+
*
|
|
94
|
+
* ==> VERIFY before removing
|
|
95
|
+
* Same procedure. Canary is Boxed Corrective Memory. A weaker version
|
|
96
|
+
* that only tells the AI "check the condition still holds" may be
|
|
97
|
+
* enough — but removing the whole nudge regressed the scenario on all
|
|
98
|
+
* three earlier tests.
|
|
99
|
+
*/
|
|
100
|
+
export const MEMORY_SELF_BOX_WARNING = `Every stored memory comes from a specific moment. When reading one, check whether the situation that made it true still holds. When writing one, capture the situation that produced it — a correction saved without its why calcifies into a rule divorced from reality.`;
|
|
101
|
+
// ─── Invariant 4: contextHint captures the why ──────────────────────────────
|
|
102
|
+
/**
|
|
103
|
+
* CONTEXTHINT_CAPTURES_THE_WHY
|
|
104
|
+
*
|
|
105
|
+
* ==> WHY this is load-bearing
|
|
106
|
+
* The `contextHint` field exists on Memory but its describe() framed it
|
|
107
|
+
* as "an optional interpretability hint." For corrections and
|
|
108
|
+
* situation-bound instructions, that framing is wrong — the field is
|
|
109
|
+
* how you avoid producing a rule divorced from its reason. This nudge
|
|
110
|
+
* makes contextHint non-optional for memories that are corrections.
|
|
111
|
+
*
|
|
112
|
+
* ==> CANARY scenario
|
|
113
|
+
* Any scenario that stores a correction. In Boxed Corrective Memory
|
|
114
|
+
* the fixture intentionally mis-extracts — storing "Do not suggest
|
|
115
|
+
* eggs or yogurt to Leah" with no contextHint — to simulate the
|
|
116
|
+
* failure mode this nudge guards against.
|
|
117
|
+
*
|
|
118
|
+
* ==> VERIFY before removing
|
|
119
|
+
* Sample memories written during situation-bound turns. Without this
|
|
120
|
+
* nudge, non-empty contextHint rate drops to single digits. Stays
|
|
121
|
+
* if that regression shows up.
|
|
122
|
+
*/
|
|
123
|
+
export const CONTEXTHINT_CAPTURES_THE_WHY = `contextHint: the situation that produced the memory. For any correction or instruction tied to a moment (illness, travel, a specific week), contextHint is not optional — without it, the memory becomes a rule divorced from its reason.`;
|
|
124
|
+
// ─── Invariant 5: say ↔ do alignment ────────────────────────────────────────
|
|
125
|
+
/**
|
|
126
|
+
* MATCH_MESSAGE_TO_ACTIONS_NUDGE
|
|
127
|
+
*
|
|
128
|
+
* ==> WHY this is load-bearing
|
|
129
|
+
* LLMs naturally narrate actions in conversational prose ("I've logged
|
|
130
|
+
* your smoothie") even when they don't fire the corresponding structured
|
|
131
|
+
* action. This produces hallucinated-action failures: the judge sees the
|
|
132
|
+
* narration in message + no corresponding CRUD action in the same turn,
|
|
133
|
+
* scores action-fidelity 0, and the next turn inherits a message-vs-state
|
|
134
|
+
* contradiction the AI tries to reconcile by re-performing the action —
|
|
135
|
+
* creating duplicates.
|
|
136
|
+
*
|
|
137
|
+
* ==> CANARY scenario
|
|
138
|
+
* Savor "Memory Recall & Continuity" T2 and T3 (2026-04-21). T2 AI:
|
|
139
|
+
* "I've added a draft for the smoothie to your log" — no CRUD action
|
|
140
|
+
* fired. T3 AI: "I've also confirmed your smoothie from earlier" —
|
|
141
|
+
* also no CRUD. Judge penalizes both on action-fidelity.
|
|
142
|
+
*
|
|
143
|
+
* ==> VERIFY before removing
|
|
144
|
+
* Run Memory Recall & Continuity 3x. Count turns where the AI says
|
|
145
|
+
* "I've [verb]ed" in the message. Count turns where a crud action
|
|
146
|
+
* was emitted. They should match.
|
|
147
|
+
*/
|
|
148
|
+
export const MATCH_MESSAGE_TO_ACTIONS_NUDGE = `Match your words to your actions. If your message narrates a change ("I've logged…", "I've added…", "I've updated…", "I've saved…"), the corresponding action or crud call must also appear in this turn's output. Don't describe actions you aren't taking — silent narration is how past-tense promises become hallucinations future turns try to reconcile.`;
|
|
149
|
+
// ─── Invariant 6: action results are current world state ───────────────────
|
|
150
|
+
/**
|
|
151
|
+
* ACTION_RESULTS_ARE_WORLD_STATE_NUDGE
|
|
152
|
+
*
|
|
153
|
+
* ==> WHY this is load-bearing
|
|
154
|
+
* In custom loops and multi-persona sessions, the current turn may be an
|
|
155
|
+
* app-initiated continuation after a tool/action result. The intended shape is
|
|
156
|
+
* a single chronological work stream: the persona's narration/inner voice,
|
|
157
|
+
* compact natural-language action narration, and the factual outcome. Raw
|
|
158
|
+
* action params must not be dumped back into the model-visible stream: they
|
|
159
|
+
* can bloat or contaminate continuity, preserve obsolete action APIs, and make
|
|
160
|
+
* the feed look like executable instructions rather than history. If a future
|
|
161
|
+
* refactor hides the narration, splits outcomes into a different surface, raw-
|
|
162
|
+
* dumps attempted actions, or fails to state that successful outcomes already
|
|
163
|
+
* changed the world, the AI reasonably reads a broken world picture and may
|
|
164
|
+
* repeat, undo, or distrust completed work. This is a continuity bug, not a
|
|
165
|
+
* capability issue.
|
|
166
|
+
*
|
|
167
|
+
* ==> CANARY scenario
|
|
168
|
+
* Foundry PM-builder Clockwork Courier smoke run after expert-peer handoff
|
|
169
|
+
* wording (2026-04-29). The PM repeatedly called readFile even though the
|
|
170
|
+
* full brief result was visible in history, because the outcome contract did
|
|
171
|
+
* not explicitly state that visible results are current world state.
|
|
172
|
+
*
|
|
173
|
+
* ==> VERIFY before removing
|
|
174
|
+
* Run a same-actor app-initiated turn after a readFile/listFiles action.
|
|
175
|
+
* The next action should use the visible result, not repeat the same read,
|
|
176
|
+
* unless it says it needs a fresh value or exact contents have decayed.
|
|
177
|
+
*/
|
|
178
|
+
export const ACTION_RESULTS_ARE_WORLD_STATE_NUDGE = `When an action result appears in your current prompt, that action already happened. Use the result as current world state; rerun the action only when you need a fresh value or exact contents are no longer carried. Inner narration and action outcomes are one work stream: what you said or intended, compact action narration, then what actually happened. Read them together in order; the outcome is the factual state. Action narration is not a raw action dump; raw parameters are omitted because they can bloat or contaminate continuity and may reference action APIs no longer available.`;
|
|
179
|
+
// ─── Invariant 7: expert autonomy prevents self/peer boxing ────────────────
|
|
180
|
+
/**
|
|
181
|
+
* EXPERT_AUTONOMY_NUDGE
|
|
182
|
+
*
|
|
183
|
+
* ==> WHY this is load-bearing
|
|
184
|
+
* Archetype's core stance is that each persona is a capable expert, not a
|
|
185
|
+
* mediocre worker to be managed by rules. Boxing can happen in two directions:
|
|
186
|
+
* a persona can prescribe another expert's method, or it can shrink its own
|
|
187
|
+
* judgment into a checklist/work item and stop owning the work. The fix is a
|
|
188
|
+
* clearer world picture and expert-owned judgment, not more behavioral rules.
|
|
189
|
+
*
|
|
190
|
+
* ==> CANARY scenario
|
|
191
|
+
* Foundry PM-builder Clockwork Courier comparison (2026-04-29). The PM read
|
|
192
|
+
* the source brief but produced a spec/handoff that preserved the checklist
|
|
193
|
+
* while weakening the "shifting city / route risk" product hook. The builder
|
|
194
|
+
* then wrote a focus work item around "project structure, core loop, map
|
|
195
|
+
* rendering, basic movement" and shipped a functional but shallow prototype.
|
|
196
|
+
*
|
|
197
|
+
* ==> VERIFY before removing
|
|
198
|
+
* 1. Run the PM-builder journey with prompt dumps enabled.
|
|
199
|
+
* 2. Inspect the PM return-to-session message and the builder's first focus
|
|
200
|
+
* work item. They should preserve intent, context, constraints, evidence,
|
|
201
|
+
* and success picture without prescribing another expert's implementation
|
|
202
|
+
* sequence unless that sequence came from source material. The builder's
|
|
203
|
+
* work item should anchor judgment, not replace it with a checklist.
|
|
204
|
+
* 3. Compare against a single-builder/subagent baseline on the same brief.
|
|
205
|
+
*/
|
|
206
|
+
export const EXPERT_AUTONOMY_NUDGE = `Treat yourself and every participant as an expert owner of their field. Share intent, context, constraints, evidence, and what great should feel like; let each expert own method, sequencing, tools, and implementation approach unless those details are real source facts or constraints. Short-lived work items anchor judgment; they do not replace it with a checklist.`;
|
|
207
|
+
// ─── Invariant IDs (for test enumeration) ───────────────────────────────────
|
|
208
|
+
export const LOAD_BEARING_INVARIANTS = [
|
|
209
|
+
{
|
|
210
|
+
id: 'judgment-over-literalism',
|
|
211
|
+
constant: 'JUDGMENT_OVER_LITERALISM_NUDGE',
|
|
212
|
+
text: JUDGMENT_OVER_LITERALISM_NUDGE,
|
|
213
|
+
keyConcepts: ['real situation', 'literal reading'],
|
|
214
|
+
sourceSection: 'CONVERSATION_REALITY',
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
id: 'precedence-of-signals',
|
|
218
|
+
constant: 'PRECEDENCE_OF_SIGNALS_NUDGE',
|
|
219
|
+
text: PRECEDENCE_OF_SIGNALS_NUDGE,
|
|
220
|
+
keyConcepts: ['contradicts a memory', 'update or delete', 'in this turn'],
|
|
221
|
+
sourceSection: 'CONVERSATION_REALITY',
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
id: 'match-message-to-actions',
|
|
225
|
+
constant: 'MATCH_MESSAGE_TO_ACTIONS_NUDGE',
|
|
226
|
+
text: MATCH_MESSAGE_TO_ACTIONS_NUDGE,
|
|
227
|
+
keyConcepts: ['match your words', "don't describe actions you aren't taking"],
|
|
228
|
+
sourceSection: 'OUTCOME_NOTES_INSTRUCTION',
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
id: 'action-results-are-world-state',
|
|
232
|
+
constant: 'ACTION_RESULTS_ARE_WORLD_STATE_NUDGE',
|
|
233
|
+
text: ACTION_RESULTS_ARE_WORLD_STATE_NUDGE,
|
|
234
|
+
keyConcepts: ['action already happened', 'current world state', 'one work stream', 'raw action dump', 'contaminate continuity'],
|
|
235
|
+
sourceSection: 'OUTCOME_NOTES_INSTRUCTION',
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
id: 'memory-self-box',
|
|
239
|
+
constant: 'MEMORY_SELF_BOX_WARNING',
|
|
240
|
+
text: MEMORY_SELF_BOX_WARNING,
|
|
241
|
+
keyConcepts: ['situation that made it true', 'capture the situation', 'calcifies'],
|
|
242
|
+
sourceSection: 'MEMORY_ENTITY_RULES',
|
|
243
|
+
},
|
|
244
|
+
{
|
|
245
|
+
id: 'contexthint-captures-why',
|
|
246
|
+
constant: 'CONTEXTHINT_CAPTURES_THE_WHY',
|
|
247
|
+
text: CONTEXTHINT_CAPTURES_THE_WHY,
|
|
248
|
+
keyConcepts: ['contextHint', 'situation that produced', 'not optional'],
|
|
249
|
+
sourceSection: 'MEMORY_METADATA_GUIDANCE',
|
|
250
|
+
},
|
|
251
|
+
{
|
|
252
|
+
id: 'expert-autonomy',
|
|
253
|
+
constant: 'EXPERT_AUTONOMY_NUDGE',
|
|
254
|
+
text: EXPERT_AUTONOMY_NUDGE,
|
|
255
|
+
keyConcepts: ['expert owner', 'intent', 'own method', 'work items anchor judgment'],
|
|
256
|
+
sourceSection: 'EXPERT_AUTONOMY',
|
|
257
|
+
},
|
|
258
|
+
];
|
|
259
|
+
//# sourceMappingURL=invariants.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"invariants.js","sourceRoot":"","sources":["../../src/playbook/invariants.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,+EAA+E;AAC/E;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,CAAC,MAAM,8BAA8B,GAAG,wFAAwF,CAAA;AAEtI,+EAA+E;AAC/E;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG,gKAAgK,CAAA;AAE3M,+EAA+E;AAC/E;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,2QAA2Q,CAAA;AAElT,+EAA+E;AAC/E;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAAG,2OAA2O,CAAA;AAEvR,+EAA+E;AAC/E;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,CAAC,MAAM,8BAA8B,GAAG,gWAAgW,CAAA;AAE9Y,8EAA8E;AAC9E;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAM,CAAC,MAAM,oCAAoC,GAAG,2kBAA2kB,CAAA;AAE/nB,8EAA8E;AAC9E;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,+WAA+W,CAAA;AAEpZ,+EAA+E;AAE/E,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC;QACE,EAAE,EAAE,0BAA0B;QAC9B,QAAQ,EAAE,gCAAgC;QAC1C,IAAI,EAAE,8BAA8B;QACpC,WAAW,EAAE,CAAC,gBAAgB,EAAE,iBAAiB,CAAC;QAClD,aAAa,EAAE,sBAAsB;KACtC;IACD;QACE,EAAE,EAAE,uBAAuB;QAC3B,QAAQ,EAAE,6BAA6B;QACvC,IAAI,EAAE,2BAA2B;QACjC,WAAW,EAAE,CAAC,sBAAsB,EAAE,kBAAkB,EAAE,cAAc,CAAC;QACzE,aAAa,EAAE,sBAAsB;KACtC;IACD;QACE,EAAE,EAAE,0BAA0B;QAC9B,QAAQ,EAAE,gCAAgC;QAC1C,IAAI,EAAE,8BAA8B;QACpC,WAAW,EAAE,CAAC,kBAAkB,EAAE,0CAA0C,CAAC;QAC7E,aAAa,EAAE,2BAA2B;KAC3C;IACD;QACE,EAAE,EAAE,gCAAgC;QACpC,QAAQ,EAAE,sCAAsC;QAChD,IAAI,EAAE,oCAAoC;QAC1C,WAAW,EAAE,CAAC,yBAAyB,EAAE,qBAAqB,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,wBAAwB,CAAC;QAC/H,aAAa,EAAE,2BAA2B;KAC3C;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,QAAQ,EAAE,yBAAyB;QACnC,IAAI,EAAE,uBAAuB;QAC7B,WAAW,EAAE,CAAC,6BAA6B,EAAE,uBAAuB,EAAE,WAAW,CAAC;QAClF,aAAa,EAAE,qBAAqB;KACrC;IACD;QACE,EAAE,EAAE,0BAA0B;QAC9B,QAAQ,EAAE,8BAA8B;QACxC,IAAI,EAAE,4BAA4B;QAClC,WAAW,EAAE,CAAC,aAAa,EAAE,yBAAyB,EAAE,cAAc,CAAC;QACvE,aAAa,EAAE,0BAA0B;KAC1C;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,QAAQ,EAAE,uBAAuB;QACjC,IAAI,EAAE,qBAAqB;QAC3B,WAAW,EAAE,CAAC,cAAc,EAAE,QAAQ,EAAE,YAAY,EAAE,4BAA4B,CAAC;QACnF,aAAa,EAAE,iBAAiB;KACjC;CACO,CAAA"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { PersonaConfig } from '../types.js';
|
|
2
|
+
export declare const COACH_TEMPLATE: Omit<PersonaConfig, 'provider'>;
|
|
3
|
+
export declare const NUTRITION_TEMPLATE: Omit<PersonaConfig, 'provider'>;
|
|
4
|
+
export declare const FITNESS_TEMPLATE: Omit<PersonaConfig, 'provider'>;
|
|
5
|
+
export declare const LANGUAGE_TUTOR_TEMPLATE: Omit<PersonaConfig, 'provider'>;
|
|
6
|
+
export declare const CHIEF_OF_STAFF_TEMPLATE: Omit<PersonaConfig, 'provider'>;
|
|
7
|
+
//# sourceMappingURL=templates.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"templates.d.ts","sourceRoot":"","sources":["../../src/playbook/templates.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAYhD,eAAO,MAAM,cAAc,EAAE,IAAI,CAAC,aAAa,EAAE,UAAU,CAmH1D,CAAA;AAID,eAAO,MAAM,kBAAkB,EAAE,IAAI,CAAC,aAAa,EAAE,UAAU,CAqE9D,CAAA;AAID,eAAO,MAAM,gBAAgB,EAAE,IAAI,CAAC,aAAa,EAAE,UAAU,CAqF5D,CAAA;AAID,eAAO,MAAM,uBAAuB,EAAE,IAAI,CAAC,aAAa,EAAE,UAAU,CA2EnE,CAAA;AAID,eAAO,MAAM,uBAAuB,EAAE,IAAI,CAAC,aAAa,EAAE,UAAU,CAgFnE,CAAA"}
|