@itaila/archetype 0.3.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +475 -0
- package/dist/audit/audit-persona.d.ts +163 -0
- package/dist/audit/audit-persona.d.ts.map +1 -0
- package/dist/audit/audit-persona.js +415 -0
- package/dist/audit/audit-persona.js.map +1 -0
- package/dist/audit/brain-reflection.d.ts +33 -0
- package/dist/audit/brain-reflection.d.ts.map +1 -0
- package/dist/audit/brain-reflection.js +148 -0
- package/dist/audit/brain-reflection.js.map +1 -0
- package/dist/audit/conversation-audit.d.ts +12 -0
- package/dist/audit/conversation-audit.d.ts.map +1 -0
- package/dist/audit/conversation-audit.js +76 -0
- package/dist/audit/conversation-audit.js.map +1 -0
- package/dist/audit/prompt-audit.d.ts +10 -0
- package/dist/audit/prompt-audit.d.ts.map +1 -0
- package/dist/audit/prompt-audit.js +153 -0
- package/dist/audit/prompt-audit.js.map +1 -0
- package/dist/audit/prompt-dump.d.ts +137 -0
- package/dist/audit/prompt-dump.d.ts.map +1 -0
- package/dist/audit/prompt-dump.js +269 -0
- package/dist/audit/prompt-dump.js.map +1 -0
- package/dist/audit/trace-integrity.d.ts +33 -0
- package/dist/audit/trace-integrity.d.ts.map +1 -0
- package/dist/audit/trace-integrity.js +109 -0
- package/dist/audit/trace-integrity.js.map +1 -0
- package/dist/audit/types.d.ts +92 -0
- package/dist/audit/types.d.ts.map +1 -0
- package/dist/audit/types.js +2 -0
- package/dist/audit/types.js.map +1 -0
- package/dist/audit/version.d.ts +14 -0
- package/dist/audit/version.d.ts.map +1 -0
- package/dist/audit/version.js +65 -0
- package/dist/audit/version.js.map +1 -0
- package/dist/brain.d.ts +7 -0
- package/dist/brain.d.ts.map +1 -0
- package/dist/brain.js +83 -0
- package/dist/brain.js.map +1 -0
- package/dist/builder/actions.d.ts +60 -0
- package/dist/builder/actions.d.ts.map +1 -0
- package/dist/builder/actions.js +257 -0
- package/dist/builder/actions.js.map +1 -0
- package/dist/builder/browser.d.ts +140 -0
- package/dist/builder/browser.d.ts.map +1 -0
- package/dist/builder/browser.js +232 -0
- package/dist/builder/browser.js.map +1 -0
- package/dist/builder/executor.d.ts +228 -0
- package/dist/builder/executor.d.ts.map +1 -0
- package/dist/builder/executor.js +1548 -0
- package/dist/builder/executor.js.map +1 -0
- package/dist/builder/index.d.ts +24 -0
- package/dist/builder/index.d.ts.map +1 -0
- package/dist/builder/index.js +24 -0
- package/dist/builder/index.js.map +1 -0
- package/dist/builder/node-test-discovery.d.ts +13 -0
- package/dist/builder/node-test-discovery.d.ts.map +1 -0
- package/dist/builder/node-test-discovery.js +45 -0
- package/dist/builder/node-test-discovery.js.map +1 -0
- package/dist/builder/sandbox.d.ts +172 -0
- package/dist/builder/sandbox.d.ts.map +1 -0
- package/dist/builder/sandbox.js +294 -0
- package/dist/builder/sandbox.js.map +1 -0
- package/dist/builder/workspace-files.d.ts +63 -0
- package/dist/builder/workspace-files.d.ts.map +1 -0
- package/dist/builder/workspace-files.js +190 -0
- package/dist/builder/workspace-files.js.map +1 -0
- package/dist/core/actions.d.ts +55 -0
- package/dist/core/actions.d.ts.map +1 -0
- package/dist/core/actions.js +311 -0
- package/dist/core/actions.js.map +1 -0
- package/dist/core/attachment-notes.d.ts +7 -0
- package/dist/core/attachment-notes.d.ts.map +1 -0
- package/dist/core/attachment-notes.js +38 -0
- package/dist/core/attachment-notes.js.map +1 -0
- package/dist/core/context.d.ts +10 -0
- package/dist/core/context.d.ts.map +1 -0
- package/dist/core/context.js +108 -0
- package/dist/core/context.js.map +1 -0
- package/dist/core/crud-prompt.d.ts +16 -0
- package/dist/core/crud-prompt.d.ts.map +1 -0
- package/dist/core/crud-prompt.js +268 -0
- package/dist/core/crud-prompt.js.map +1 -0
- package/dist/core/crud-schema.d.ts +12 -0
- package/dist/core/crud-schema.d.ts.map +1 -0
- package/dist/core/crud-schema.js +42 -0
- package/dist/core/crud-schema.js.map +1 -0
- package/dist/core/effective-config.d.ts +13 -0
- package/dist/core/effective-config.d.ts.map +1 -0
- package/dist/core/effective-config.js +33 -0
- package/dist/core/effective-config.js.map +1 -0
- package/dist/core/entities.d.ts +82 -0
- package/dist/core/entities.d.ts.map +1 -0
- package/dist/core/entities.js +116 -0
- package/dist/core/entities.js.map +1 -0
- package/dist/core/entity-helpers.d.ts +47 -0
- package/dist/core/entity-helpers.d.ts.map +1 -0
- package/dist/core/entity-helpers.js +122 -0
- package/dist/core/entity-helpers.js.map +1 -0
- package/dist/core/entity-registry.d.ts +47 -0
- package/dist/core/entity-registry.d.ts.map +1 -0
- package/dist/core/entity-registry.js +54 -0
- package/dist/core/entity-registry.js.map +1 -0
- package/dist/core/eq.d.ts +13 -0
- package/dist/core/eq.d.ts.map +1 -0
- package/dist/core/eq.js +41 -0
- package/dist/core/eq.js.map +1 -0
- package/dist/core/focus-context.d.ts +19 -0
- package/dist/core/focus-context.d.ts.map +1 -0
- package/dist/core/focus-context.js +46 -0
- package/dist/core/focus-context.js.map +1 -0
- package/dist/core/focus-mode-actions.d.ts +23 -0
- package/dist/core/focus-mode-actions.d.ts.map +1 -0
- package/dist/core/focus-mode-actions.js +74 -0
- package/dist/core/focus-mode-actions.js.map +1 -0
- package/dist/core/greeting.d.ts +10 -0
- package/dist/core/greeting.d.ts.map +1 -0
- package/dist/core/greeting.js +41 -0
- package/dist/core/greeting.js.map +1 -0
- package/dist/core/identity.d.ts +13 -0
- package/dist/core/identity.d.ts.map +1 -0
- package/dist/core/identity.js +54 -0
- package/dist/core/identity.js.map +1 -0
- package/dist/core/knowledge.d.ts +10 -0
- package/dist/core/knowledge.d.ts.map +1 -0
- package/dist/core/knowledge.js +40 -0
- package/dist/core/knowledge.js.map +1 -0
- package/dist/core/memory-actions.d.ts +38 -0
- package/dist/core/memory-actions.d.ts.map +1 -0
- package/dist/core/memory-actions.js +181 -0
- package/dist/core/memory-actions.js.map +1 -0
- package/dist/core/memory.d.ts +35 -0
- package/dist/core/memory.d.ts.map +1 -0
- package/dist/core/memory.js +168 -0
- package/dist/core/memory.js.map +1 -0
- package/dist/core/peer-actions.d.ts +15 -0
- package/dist/core/peer-actions.d.ts.map +1 -0
- package/dist/core/peer-actions.js +33 -0
- package/dist/core/peer-actions.js.map +1 -0
- package/dist/core/prompt-builder.d.ts +46 -0
- package/dist/core/prompt-builder.d.ts.map +1 -0
- package/dist/core/prompt-builder.js +543 -0
- package/dist/core/prompt-builder.js.map +1 -0
- package/dist/core/prompt-mode.d.ts +3 -0
- package/dist/core/prompt-mode.d.ts.map +1 -0
- package/dist/core/prompt-mode.js +6 -0
- package/dist/core/prompt-mode.js.map +1 -0
- package/dist/core/prompted-turn.d.ts +6 -0
- package/dist/core/prompted-turn.d.ts.map +1 -0
- package/dist/core/prompted-turn.js +48 -0
- package/dist/core/prompted-turn.js.map +1 -0
- package/dist/core/request-builder.d.ts +14 -0
- package/dist/core/request-builder.d.ts.map +1 -0
- package/dist/core/request-builder.js +64 -0
- package/dist/core/request-builder.js.map +1 -0
- package/dist/core/session-routing.d.ts +23 -0
- package/dist/core/session-routing.d.ts.map +1 -0
- package/dist/core/session-routing.js +59 -0
- package/dist/core/session-routing.js.map +1 -0
- package/dist/core/voice.d.ts +6 -0
- package/dist/core/voice.d.ts.map +1 -0
- package/dist/core/voice.js +30 -0
- package/dist/core/voice.js.map +1 -0
- package/dist/engine/chat.d.ts +45 -0
- package/dist/engine/chat.d.ts.map +1 -0
- package/dist/engine/chat.js +308 -0
- package/dist/engine/chat.js.map +1 -0
- package/dist/engine/continuity.d.ts +107 -0
- package/dist/engine/continuity.d.ts.map +1 -0
- package/dist/engine/continuity.js +320 -0
- package/dist/engine/continuity.js.map +1 -0
- package/dist/engine/crud.d.ts +62 -0
- package/dist/engine/crud.d.ts.map +1 -0
- package/dist/engine/crud.js +260 -0
- package/dist/engine/crud.js.map +1 -0
- package/dist/engine/side-effects.d.ts +93 -0
- package/dist/engine/side-effects.d.ts.map +1 -0
- package/dist/engine/side-effects.js +271 -0
- package/dist/engine/side-effects.js.map +1 -0
- package/dist/engine/staging.d.ts +29 -0
- package/dist/engine/staging.d.ts.map +1 -0
- package/dist/engine/staging.js +159 -0
- package/dist/engine/staging.js.map +1 -0
- package/dist/engine/working-set.d.ts +18 -0
- package/dist/engine/working-set.d.ts.map +1 -0
- package/dist/engine/working-set.js +246 -0
- package/dist/engine/working-set.js.map +1 -0
- package/dist/evals/action-contracts.d.ts +40 -0
- package/dist/evals/action-contracts.d.ts.map +1 -0
- package/dist/evals/action-contracts.js +208 -0
- package/dist/evals/action-contracts.js.map +1 -0
- package/dist/evals/brain-bloat.d.ts +39 -0
- package/dist/evals/brain-bloat.d.ts.map +1 -0
- package/dist/evals/brain-bloat.js +167 -0
- package/dist/evals/brain-bloat.js.map +1 -0
- package/dist/evals/brain-prescriptions.d.ts +30 -0
- package/dist/evals/brain-prescriptions.d.ts.map +1 -0
- package/dist/evals/brain-prescriptions.js +148 -0
- package/dist/evals/brain-prescriptions.js.map +1 -0
- package/dist/evals/cross-layer-duplicates.d.ts +49 -0
- package/dist/evals/cross-layer-duplicates.d.ts.map +1 -0
- package/dist/evals/cross-layer-duplicates.js +289 -0
- package/dist/evals/cross-layer-duplicates.js.map +1 -0
- package/dist/evals/entity-visibility.d.ts +28 -0
- package/dist/evals/entity-visibility.d.ts.map +1 -0
- package/dist/evals/entity-visibility.js +216 -0
- package/dist/evals/entity-visibility.js.map +1 -0
- package/dist/evals/index.d.ts +19 -0
- package/dist/evals/index.d.ts.map +1 -0
- package/dist/evals/index.js +11 -0
- package/dist/evals/index.js.map +1 -0
- package/dist/evals/judge.d.ts +22 -0
- package/dist/evals/judge.d.ts.map +1 -0
- package/dist/evals/judge.js +337 -0
- package/dist/evals/judge.js.map +1 -0
- package/dist/evals/operational-contract.d.ts +40 -0
- package/dist/evals/operational-contract.d.ts.map +1 -0
- package/dist/evals/operational-contract.js +115 -0
- package/dist/evals/operational-contract.js.map +1 -0
- package/dist/evals/prompt-content.d.ts +14 -0
- package/dist/evals/prompt-content.d.ts.map +1 -0
- package/dist/evals/prompt-content.js +104 -0
- package/dist/evals/prompt-content.js.map +1 -0
- package/dist/evals/runtime.d.ts +4 -0
- package/dist/evals/runtime.d.ts.map +1 -0
- package/dist/evals/runtime.js +197 -0
- package/dist/evals/runtime.js.map +1 -0
- package/dist/evals/sample-projects.d.ts +143 -0
- package/dist/evals/sample-projects.d.ts.map +1 -0
- package/dist/evals/sample-projects.js +644 -0
- package/dist/evals/sample-projects.js.map +1 -0
- package/dist/evals/types.d.ts +88 -0
- package/dist/evals/types.d.ts.map +1 -0
- package/dist/evals/types.js +2 -0
- package/dist/evals/types.js.map +1 -0
- package/dist/foundation/index.d.ts +158 -0
- package/dist/foundation/index.d.ts.map +1 -0
- package/dist/foundation/index.js +256 -0
- package/dist/foundation/index.js.map +1 -0
- package/dist/index.d.ts +223 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +998 -0
- package/dist/index.js.map +1 -0
- package/dist/managed/autonomous-loop.d.ts +199 -0
- package/dist/managed/autonomous-loop.d.ts.map +1 -0
- package/dist/managed/autonomous-loop.js +451 -0
- package/dist/managed/autonomous-loop.js.map +1 -0
- package/dist/managed/conversation.d.ts +20 -0
- package/dist/managed/conversation.d.ts.map +1 -0
- package/dist/managed/conversation.js +40 -0
- package/dist/managed/conversation.js.map +1 -0
- package/dist/managed/knowledge.d.ts +7 -0
- package/dist/managed/knowledge.d.ts.map +1 -0
- package/dist/managed/knowledge.js +174 -0
- package/dist/managed/knowledge.js.map +1 -0
- package/dist/managed/memory-manager.d.ts +7 -0
- package/dist/managed/memory-manager.d.ts.map +1 -0
- package/dist/managed/memory-manager.js +18 -0
- package/dist/managed/memory-manager.js.map +1 -0
- package/dist/managed/memory-review.d.ts +45 -0
- package/dist/managed/memory-review.d.ts.map +1 -0
- package/dist/managed/memory-review.js +130 -0
- package/dist/managed/memory-review.js.map +1 -0
- package/dist/managed/storage.d.ts +2 -0
- package/dist/managed/storage.d.ts.map +1 -0
- package/dist/managed/storage.js +2 -0
- package/dist/managed/storage.js.map +1 -0
- package/dist/managed/work-history.d.ts +23 -0
- package/dist/managed/work-history.d.ts.map +1 -0
- package/dist/managed/work-history.js +31 -0
- package/dist/managed/work-history.js.map +1 -0
- package/dist/observability/index.d.ts +15 -0
- package/dist/observability/index.d.ts.map +1 -0
- package/dist/observability/index.js +15 -0
- package/dist/observability/index.js.map +1 -0
- package/dist/observability/render-run-markdown.d.ts +90 -0
- package/dist/observability/render-run-markdown.d.ts.map +1 -0
- package/dist/observability/render-run-markdown.js +231 -0
- package/dist/observability/render-run-markdown.js.map +1 -0
- package/dist/observability/turn-reporter.d.ts +20 -0
- package/dist/observability/turn-reporter.d.ts.map +1 -0
- package/dist/observability/turn-reporter.js +106 -0
- package/dist/observability/turn-reporter.js.map +1 -0
- package/dist/persona.d.ts +49 -0
- package/dist/persona.d.ts.map +1 -0
- package/dist/persona.js +287 -0
- package/dist/persona.js.map +1 -0
- package/dist/playbook/defaults.d.ts +25 -0
- package/dist/playbook/defaults.d.ts.map +1 -0
- package/dist/playbook/defaults.js +108 -0
- package/dist/playbook/defaults.js.map +1 -0
- package/dist/playbook/invariants.d.ts +244 -0
- package/dist/playbook/invariants.d.ts.map +1 -0
- package/dist/playbook/invariants.js +259 -0
- package/dist/playbook/invariants.js.map +1 -0
- package/dist/playbook/templates.d.ts +7 -0
- package/dist/playbook/templates.d.ts.map +1 -0
- package/dist/playbook/templates.js +437 -0
- package/dist/playbook/templates.js.map +1 -0
- package/dist/providers/gemini.d.ts +73 -0
- package/dist/providers/gemini.d.ts.map +1 -0
- package/dist/providers/gemini.js +536 -0
- package/dist/providers/gemini.js.map +1 -0
- package/dist/providers/types.d.ts +2 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/providers/zod-to-gemini.d.ts +8 -0
- package/dist/providers/zod-to-gemini.d.ts.map +1 -0
- package/dist/providers/zod-to-gemini.js +148 -0
- package/dist/providers/zod-to-gemini.js.map +1 -0
- package/dist/samples/pm-spec-agent.d.ts +22 -0
- package/dist/samples/pm-spec-agent.d.ts.map +1 -0
- package/dist/samples/pm-spec-agent.js +53 -0
- package/dist/samples/pm-spec-agent.js.map +1 -0
- package/dist/types.d.ts +920 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +68 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conversation Audit — reviews actual AI behavior for keystone violations.
|
|
3
|
+
*
|
|
4
|
+
* The prompt audit checks what the AI was told.
|
|
5
|
+
* The conversation audit checks what the AI actually did.
|
|
6
|
+
*
|
|
7
|
+
* Output: explicit failure list with evidence. The developer decides
|
|
8
|
+
* whether to build regression tests from the failures.
|
|
9
|
+
*/
|
|
10
|
+
import { GoogleGenerativeAI, SchemaType } from '@google/generative-ai';
|
|
11
|
+
import { configVersion } from './version.js';
|
|
12
|
+
const CONVERSATION_AUDIT_SYSTEM = `You are the Archetype meta judge — an expert in AI persona behavioral quality.
|
|
13
|
+
|
|
14
|
+
You are reviewing a conversation between an AI persona and a user.
|
|
15
|
+
|
|
16
|
+
THE KEYSTONE PRINCIPLE:
|
|
17
|
+
The AI persona is a domain expert. It has rich context — the user's data, history, memories, and the current situation. A great expert uses what it knows about THIS specific person to lead the conversation. The prompt paints the scenario; the expert decides how to show up.
|
|
18
|
+
|
|
19
|
+
WHAT GREAT LOOKS LIKE:
|
|
20
|
+
A well-functioning persona sounds like a professional who has been working with this client for months. It leads with expertise, grounds its advice in the specific data it has, and sounds like itself — not like a textbook or a generic assistant.
|
|
21
|
+
|
|
22
|
+
You are an expert in evaluating AI persona behavior. Review this conversation and identify where the AI fails the keystone standard. For each failure, quote specific evidence from the transcript.
|
|
23
|
+
|
|
24
|
+
Return valid JSON.`;
|
|
25
|
+
const RESPONSE_SCHEMA = {
|
|
26
|
+
type: SchemaType.OBJECT,
|
|
27
|
+
properties: {
|
|
28
|
+
failures: {
|
|
29
|
+
type: SchemaType.ARRAY,
|
|
30
|
+
items: {
|
|
31
|
+
type: SchemaType.OBJECT,
|
|
32
|
+
properties: {
|
|
33
|
+
principle: { type: SchemaType.STRING, description: 'Which keystone principle the behavior violates' },
|
|
34
|
+
turn: { type: SchemaType.NUMBER, description: 'Which assistant turn (0-indexed) contains the violation' },
|
|
35
|
+
issue: { type: SchemaType.STRING, description: 'What went wrong, concretely' },
|
|
36
|
+
evidence: { type: SchemaType.STRING, description: 'Direct quote from the assistant response as evidence' },
|
|
37
|
+
},
|
|
38
|
+
required: ['principle', 'turn', 'issue', 'evidence'],
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
summary: { type: SchemaType.STRING, description: 'One-paragraph summary of behavioral quality' },
|
|
42
|
+
},
|
|
43
|
+
required: ['failures', 'summary'],
|
|
44
|
+
};
|
|
45
|
+
export async function auditConversation(input) {
|
|
46
|
+
const { apiKey, config, history, context } = input;
|
|
47
|
+
const transcript = history.map(msg => `${msg.role.toUpperCase()}: ${msg.content}`).join('\n\n');
|
|
48
|
+
const personaContext = [
|
|
49
|
+
`PERSONA: ${config.identity.name}`,
|
|
50
|
+
`EXPERTISE: ${config.identity.expertise.join(', ')}`,
|
|
51
|
+
`RELATIONSHIP: ${config.identity.relationship}`,
|
|
52
|
+
`NORTH STAR: ${config.identity.northStar}`,
|
|
53
|
+
config.methodology ? `METHODOLOGY:\n${config.methodology}` : '',
|
|
54
|
+
].filter(Boolean).join('\n');
|
|
55
|
+
const contextStr = context
|
|
56
|
+
? `\nCONTEXT DATA AVAILABLE TO THE AI:\n${JSON.stringify(context, null, 2)}`
|
|
57
|
+
: '';
|
|
58
|
+
const genAI = new GoogleGenerativeAI(apiKey);
|
|
59
|
+
const model = genAI.getGenerativeModel({
|
|
60
|
+
model: 'gemini-3.5-flash',
|
|
61
|
+
systemInstruction: CONVERSATION_AUDIT_SYSTEM,
|
|
62
|
+
generationConfig: {
|
|
63
|
+
temperature: 0.2,
|
|
64
|
+
responseMimeType: 'application/json',
|
|
65
|
+
responseSchema: RESPONSE_SCHEMA,
|
|
66
|
+
},
|
|
67
|
+
});
|
|
68
|
+
const result = await model.generateContent(`${personaContext}${contextStr}\n\nCONVERSATION TO AUDIT:\n${transcript}`);
|
|
69
|
+
const parsed = JSON.parse(result.response.text());
|
|
70
|
+
return {
|
|
71
|
+
configVersion: configVersion(config),
|
|
72
|
+
failures: parsed.failures ?? [],
|
|
73
|
+
summary: parsed.summary ?? '',
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=conversation-audit.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"conversation-audit.js","sourceRoot":"","sources":["../../src/audit/conversation-audit.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,kBAAkB,EAAE,UAAU,EAAe,MAAM,uBAAuB,CAAA;AACnF,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAA;AAO5C,MAAM,yBAAyB,GAAG;;;;;;;;;;;;mBAYf,CAAA;AAEnB,MAAM,eAAe,GAAG;IACtB,IAAI,EAAE,UAAU,CAAC,MAAM;IACvB,UAAU,EAAE;QACV,QAAQ,EAAE;YACR,IAAI,EAAE,UAAU,CAAC,KAAK;YACtB,KAAK,EAAE;gBACL,IAAI,EAAE,UAAU,CAAC,MAAM;gBACvB,UAAU,EAAE;oBACV,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,gDAAgD,EAAE;oBACrG,IAAI,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,yDAAyD,EAAE;oBACzG,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,6BAA6B,EAAE;oBAC9E,QAAQ,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,sDAAsD,EAAE;iBAC3G;gBACD,QAAQ,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,CAAC;aACrD;SACF;QACD,OAAO,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,6CAA6C,EAAE;KACjG;IACD,QAAQ,EAAE,CAAC,UAAU,EAAE,SAAS,CAAC;CAClC,CAAA;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,KAA6B;IACnE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,KAAK,CAAA;IAElD,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CACnC,GAAG,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,GAAG,CAAC,OAAO,EAAE,CAC5C,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IAEd,MAAM,cAAc,GAAG;QACrB,YAAY,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE;QAClC,cAAc,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;QACpD,iBAAiB,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE;QAC/C,eAAe,MAAM,CAAC,QAAQ,CAAC,SAAS,EAAE;QAC1C,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,iBAAiB,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE;KAChE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAE5B,MAAM,UAAU,GAAG,OAAO;QACxB,CAAC,CAAC,wCAAwC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE;QAC5E,CAAC,CAAC,EAAE,CAAA;IAEN,MAAM,KAAK,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAA;IAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,kBAAkB,CAAC;QACrC,KAAK,EAAE,kBAAkB;QACzB,iBAAiB,EAAE,yBAAyB;QAC5C,gBAAgB,EAAE;YAChB,WAAW,EAAE,GAAG;YAChB,gBAAgB,EAAE,kBAAkB;YACpC,cAAc,EAAE,eAAoC;SACrD;KACF,CAAC,CAAA;IAEF,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,eAAe,CACxC,GAAG,cAAc,GAAG,UAAU,+BAA+B,UAAU,EAAE,CAC1E,CAAA;IAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,CAG/C,CAAA;IAED,OAAO;QACL,aAAa,EAAE,aAAa,CAAC,MAAM,CAAC;QACpC,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,EAAE;QAC/B,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;KAC9B,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt Audit — Archetype's meta judge for keystone principle violations.
|
|
3
|
+
*
|
|
4
|
+
* The persona is the domain expert. Archetype is the prompt-engineering expert.
|
|
5
|
+
* This judge reviews the assembled prompt for anti-patterns that constrain
|
|
6
|
+
* the AI's expert judgment instead of empowering it.
|
|
7
|
+
*/
|
|
8
|
+
import type { PromptAuditInput, PromptAuditResult } from './types.js';
|
|
9
|
+
export declare function auditPrompt(input: PromptAuditInput): Promise<PromptAuditResult>;
|
|
10
|
+
//# sourceMappingURL=prompt-audit.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompt-audit.d.ts","sourceRoot":"","sources":["../../src/audit/prompt-audit.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAMH,OAAO,KAAK,EAAE,gBAAgB,EAAE,iBAAiB,EAAsB,MAAM,YAAY,CAAA;AAuEzF,wBAAsB,WAAW,CAAC,KAAK,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAiFrF"}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt Audit — Archetype's meta judge for keystone principle violations.
|
|
3
|
+
*
|
|
4
|
+
* The persona is the domain expert. Archetype is the prompt-engineering expert.
|
|
5
|
+
* This judge reviews the assembled prompt for anti-patterns that constrain
|
|
6
|
+
* the AI's expert judgment instead of empowering it.
|
|
7
|
+
*/
|
|
8
|
+
import { GoogleGenerativeAI, SchemaType } from '@google/generative-ai';
|
|
9
|
+
import { buildSystemPrompt } from '../core/prompt-builder.js';
|
|
10
|
+
import { resolveActions } from '../core/effective-config.js';
|
|
11
|
+
import { configVersion } from './version.js';
|
|
12
|
+
/**
|
|
13
|
+
* The keystone audit criteria — derived from PLAYBOOK_ESSENTIALS.md and the
|
|
14
|
+
* design principles in eq.ts, identity.ts, and defaults.ts.
|
|
15
|
+
*
|
|
16
|
+
* These judge the SHAPE of the prompt, not the domain content.
|
|
17
|
+
*/
|
|
18
|
+
const PROMPT_AUDIT_SYSTEM = `You are the Archetype meta judge — an expert reviewer of AI persona configs written by other engineers (often with AI coding agents).
|
|
19
|
+
|
|
20
|
+
The most common failure you catch is the reviewer's instinct to assume the AI is "stupid" and box it with rules, instead of realizing the AI is as capable as the engineer and failed because the context, scenario, or contract was unclear. When you see a rule that looks like it's correcting a mediocre employee, the real fix is almost always in the adjacent layer (entity description, schema field, context label, EQ config) — not more brain prose.
|
|
21
|
+
|
|
22
|
+
THE KEYSTONE PRINCIPLE:
|
|
23
|
+
The AI persona is a domain expert. The prompt's job is to paint the scenario — context, memories, history, timing, constraints — and explain the intent. Describe the standard. Then trust the expert. Hard rules are ONLY for mechanical correctness (JSON format, unit conversion, schema compliance, dedup logic). Everything else must be a thinking nudge.
|
|
24
|
+
|
|
25
|
+
THE SMELL TEST:
|
|
26
|
+
If a line reads like correcting a mediocre employee instead of briefing a strong collaborator, it's a violation.
|
|
27
|
+
|
|
28
|
+
WHAT GREAT LOOKS LIKE:
|
|
29
|
+
A well-shaped methodology describes the world the expert operates in and what a great practitioner notices. It reads like a briefing, not a rulebook.
|
|
30
|
+
|
|
31
|
+
FAILURE MODES TO WATCH FOR (use judgment; these are lenses, not a checklist):
|
|
32
|
+
|
|
33
|
+
- prescriptive — rules that tell the AI what to do in specific situations instead of describing a standard.
|
|
34
|
+
- trigger-response-mapping — any "when X → do Y" pattern, at any level of abstraction. Even abstracted coaching wisdom like "respond to frustration by validating" is boxing.
|
|
35
|
+
- doing-experts-thinking — enumerating what to check, attend to, or reason about, instead of trusting the expert to know what matters.
|
|
36
|
+
- negative-identity — "you are NOT a calorie tracker" etc. Defines the persona by what it isn't.
|
|
37
|
+
- throttling — "limit yourself to 2 suggestions", "don't always ask a question" — arbitrary caps on the expert's judgment.
|
|
38
|
+
- announce-actions — instructions to narrate what the AI is doing or about to do.
|
|
39
|
+
- rule-density — so many rules stacked that the expert has no room to reason.
|
|
40
|
+
- duplicate-across-layers — prose in the brain that restates content already carried by entity descriptions, entity field describe() annotations, EQ flags, context labels, or memory config. The brain shouldn't carry what adjacent layers already say — duplicates compete for attention and crowd out taste.
|
|
41
|
+
- signal-dilution — the same semantic signal stated three or more times in different wordings across the assembled prompt (across SDK defaults, brain sections, EQ nudges, memory block intros, etc.). Different from duplicate-across-layers: that's brain restating adjacent-layer content; this is the SAME idea phrased multiple ways inside the same prompt. The AI sees a cloud of soft reminders instead of one canonical rule, and the cloud dilutes rather than reinforces. Name the concept being repeated, quote two or three of the offending phrasings, and suggest consolidating to the single strongest statement.
|
|
42
|
+
- self-documenting-overdocumented — an action or entity name that's already self-explanatory (like sendEmail, logMeal, createTask) paired with multi-line prose that restates what the name already says. Tell: deleting the description loses nothing a reader couldn't infer.
|
|
43
|
+
- ambiguous-action-contract — action or entity contracts where the name/schema/description fight each other, use generic words (data, handle, process), have vague types (any, stringified objects), or lack the semantic clues the AI needs to emit correct calls. When you see "AI emits malformed actions", the problem is usually here — not AI capability.
|
|
44
|
+
- not-visible-in-context — the ENTITIES block advertises update and delete for an entity, but the assembled prompt contains no surface that carries a record with an id for it (no CURRENT/OPEN/… block listing rows with ids under the entity name or its {name}Record(s) variants). The AI is asked to target records it can't see — it falls back to creating duplicates or writing memories instead. Fix by declaring a contextInput that surfaces records with ids, or by scoping the entity to createOnly.
|
|
45
|
+
- conflicting-instructions — two parts of the config contradict each other (e.g., identity says "respect informed choices", action protocol says "always push toward healthier").
|
|
46
|
+
- underspecified — instructions vague to the point of forcing guessing (e.g., "handle correction turns appropriately" — what's "appropriately"?). Different from terseness: good terse prose is unambiguous; underspecified prose leaves the AI to invent the meaning.
|
|
47
|
+
|
|
48
|
+
For each violation, quote the exact text, name the principle, explain concretely why it's a problem, and suggest a scenario-first rewrite OR point to the adjacent layer where the content should live (e.g., "move to profile entity description", "already carried by EQ qualitativeFirst flag").
|
|
49
|
+
|
|
50
|
+
Return valid JSON.`;
|
|
51
|
+
const RESPONSE_SCHEMA = {
|
|
52
|
+
type: SchemaType.OBJECT,
|
|
53
|
+
properties: {
|
|
54
|
+
failures: {
|
|
55
|
+
type: SchemaType.ARRAY,
|
|
56
|
+
items: {
|
|
57
|
+
type: SchemaType.OBJECT,
|
|
58
|
+
properties: {
|
|
59
|
+
principle: { type: SchemaType.STRING, description: 'Which keystone principle is violated (e.g., "prescriptive", "behavioral-rule", "doing-experts-thinking", "negative-identity", "throttling", "rule-density")' },
|
|
60
|
+
text: { type: SchemaType.STRING, description: 'The exact problematic text from the prompt' },
|
|
61
|
+
issue: { type: SchemaType.STRING, description: 'Why this is a problem, concretely' },
|
|
62
|
+
suggestion: { type: SchemaType.STRING, description: 'A scenario-first rewrite that preserves the intent' },
|
|
63
|
+
},
|
|
64
|
+
required: ['principle', 'text', 'issue', 'suggestion'],
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
summary: { type: SchemaType.STRING, description: 'One-paragraph summary of the prompt health and key issues' },
|
|
68
|
+
},
|
|
69
|
+
required: ['failures', 'summary'],
|
|
70
|
+
};
|
|
71
|
+
const DEFAULT_AUDIT_FALLBACKS = ['gemini-3.1-pro-preview', 'gemini-3.1-flash-lite'];
|
|
72
|
+
const AUDIT_MAX_RETRIES = 2;
|
|
73
|
+
function isRetryableAuditError(error) {
|
|
74
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
75
|
+
return /\b(429|5\d\d|timeout|timed out|overloaded|unavailable|resource exhausted)\b/i.test(msg);
|
|
76
|
+
}
|
|
77
|
+
export async function auditPrompt(input) {
|
|
78
|
+
const { apiKey, config, context, memories } = input;
|
|
79
|
+
const primary = input.model ?? 'gemini-3.5-flash';
|
|
80
|
+
const fallbacks = input.fallbackModels ?? DEFAULT_AUDIT_FALLBACKS;
|
|
81
|
+
const seen = new Set();
|
|
82
|
+
const modelChain = [primary, ...fallbacks].filter((m) => {
|
|
83
|
+
if (seen.has(m))
|
|
84
|
+
return false;
|
|
85
|
+
seen.add(m);
|
|
86
|
+
return true;
|
|
87
|
+
});
|
|
88
|
+
// Resolve effective actions (merges memory/craft actions) to get the full prompt
|
|
89
|
+
const effectiveActions = resolveActions(config);
|
|
90
|
+
const effectiveConfig = effectiveActions !== config.actions
|
|
91
|
+
? { ...config, actions: effectiveActions }
|
|
92
|
+
: config;
|
|
93
|
+
// Assemble the full prompt that the AI would actually see. When the
|
|
94
|
+
// consumer passes `promptMode`, honor it — otherwise the audit reports
|
|
95
|
+
// findings against a prompt variant the runtime never sends (chat-mode
|
|
96
|
+
// default), missing latent contradictions in focus / operational modes.
|
|
97
|
+
const fullPrompt = buildSystemPrompt({
|
|
98
|
+
config: effectiveConfig,
|
|
99
|
+
input: {
|
|
100
|
+
message: '(audit)',
|
|
101
|
+
context: context ?? {},
|
|
102
|
+
memories: memories ?? [],
|
|
103
|
+
timezone: 'UTC',
|
|
104
|
+
...(input.promptMode ? { promptMode: input.promptMode } : {}),
|
|
105
|
+
},
|
|
106
|
+
});
|
|
107
|
+
const genAI = new GoogleGenerativeAI(apiKey);
|
|
108
|
+
const makeModel = (modelName) => genAI.getGenerativeModel({
|
|
109
|
+
model: modelName,
|
|
110
|
+
systemInstruction: PROMPT_AUDIT_SYSTEM,
|
|
111
|
+
generationConfig: {
|
|
112
|
+
temperature: 0.2,
|
|
113
|
+
responseMimeType: 'application/json',
|
|
114
|
+
responseSchema: RESPONSE_SCHEMA,
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
const auditInput = `Review this assembled system prompt for keystone principle violations:\n\n---\n${fullPrompt}\n---`;
|
|
118
|
+
// Try each model in chain with retries
|
|
119
|
+
let lastError;
|
|
120
|
+
let result;
|
|
121
|
+
outer: for (let m = 0; m < modelChain.length; m++) {
|
|
122
|
+
const current = modelChain[m];
|
|
123
|
+
if (m > 0) {
|
|
124
|
+
console.warn(`[archetype:audit] ${modelChain[m - 1]} exhausted — falling back to ${current}`);
|
|
125
|
+
}
|
|
126
|
+
const model = makeModel(current);
|
|
127
|
+
for (let attempt = 0; attempt <= AUDIT_MAX_RETRIES; attempt++) {
|
|
128
|
+
try {
|
|
129
|
+
result = await model.generateContent(auditInput);
|
|
130
|
+
break outer;
|
|
131
|
+
}
|
|
132
|
+
catch (err) {
|
|
133
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
134
|
+
if (!isRetryableAuditError(lastError))
|
|
135
|
+
throw lastError;
|
|
136
|
+
if (attempt < AUDIT_MAX_RETRIES) {
|
|
137
|
+
const delayMs = Math.min(500 * Math.pow(2, attempt), 4000);
|
|
138
|
+
console.warn(`[archetype:audit] ${current} attempt ${attempt + 1} failed: ${lastError.message} — retrying in ${delayMs}ms`);
|
|
139
|
+
await new Promise((r) => setTimeout(r, delayMs));
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
if (!result)
|
|
145
|
+
throw lastError ?? new Error('[archetype:audit] all models exhausted');
|
|
146
|
+
const parsed = JSON.parse(result.response.text());
|
|
147
|
+
return {
|
|
148
|
+
configVersion: configVersion(config),
|
|
149
|
+
failures: parsed.failures ?? [],
|
|
150
|
+
summary: parsed.summary ?? '',
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
//# sourceMappingURL=prompt-audit.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompt-audit.js","sourceRoot":"","sources":["../../src/audit/prompt-audit.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,kBAAkB,EAAE,UAAU,EAAe,MAAM,uBAAuB,CAAA;AACnF,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAA;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAA;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAA;AAG5C;;;;;GAKG;AACH,MAAM,mBAAmB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;mBAgCT,CAAA;AAEnB,MAAM,eAAe,GAAG;IACtB,IAAI,EAAE,UAAU,CAAC,MAAM;IACvB,UAAU,EAAE;QACV,QAAQ,EAAE;YACR,IAAI,EAAE,UAAU,CAAC,KAAK;YACtB,KAAK,EAAE;gBACL,IAAI,EAAE,UAAU,CAAC,MAAM;gBACvB,UAAU,EAAE;oBACV,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,6JAA6J,EAAE;oBAClN,IAAI,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,4CAA4C,EAAE;oBAC5F,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,mCAAmC,EAAE;oBACpF,UAAU,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,oDAAoD,EAAE;iBAC3G;gBACD,QAAQ,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,CAAC;aACvD;SACF;QACD,OAAO,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,WAAW,EAAE,2DAA2D,EAAE;KAC/G;IACD,QAAQ,EAAE,CAAC,UAAU,EAAE,SAAS,CAAC;CAClC,CAAA;AAED,MAAM,uBAAuB,GAAG,CAAC,wBAAwB,EAAE,uBAAuB,CAAC,CAAA;AACnF,MAAM,iBAAiB,GAAG,CAAC,CAAA;AAE3B,SAAS,qBAAqB,CAAC,KAAc;IAC3C,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;IAClE,OAAO,8EAA8E,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACjG,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,KAAuB;IACvD,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAA;IACnD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,IAAI,kBAAkB,CAAA;IACjD,MAAM,SAAS,GAAG,KAAK,CAAC,cAAc,IAAI,uBAAuB,CAAA;IACjE,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAA;IAC9B,MAAM,UAAU,GAAG,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QACtD,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAA;QAC7B,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;QACX,OAAO,IAAI,CAAA;IACb,CAAC,CAAC,CAAA;IAEF,iFAAiF;IACjF,MAAM,gBAAgB,GAAG,cAAc,CAAC,MAAM,CAAC,CAAA;IAC/C,MAAM,eAAe,GAAG,gBAAgB,KAAK,MAAM,CAAC,OAAO;QACzD,CAAC,CAAC,EAAE,GAAG,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE;QAC1C,CAAC,CAAC,MAAM,CAAA;IAEV,oEAAoE;IACpE,uEAAuE;IACvE,uEAAuE;IACvE,wEAAwE;IACxE,MAAM,UAAU,GAAG,iBAAiB,CAAC;QACnC,MAAM,EAAE,eAAe;QACvB,KAAK,EAAE;YACL,OAAO,EAAE,SAAS;YAClB,OAAO,EAAE,OAAO,IAAI,EAAE;YACtB,QAAQ,EAAE,QAAQ,IAAI,EAAE;YACxB,QAAQ,EAAE,KAAK;YACf,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC9D;KACF,CAAC,CAAA;IAEF,MAAM,KAAK,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAA;IAC5C,MAAM,SAAS,GAAG,CAAC,SAAiB,EAAE,EAAE,CAAC,KAAK,CAAC,kBAAkB,CAAC;QAChE,KAAK,EAAE,SAAS;QAChB,iBAAiB,EAAE,mBAAmB;QACtC,gBAAgB,EAAE;YAChB,WAAW,EAAE,GAAG;YAChB,gBAAgB,EAAE,kBAAkB;YACpC,cAAc,EAAE,eAAoC;SACrD;KACF,CAAC,CAAA;IAEF,MAAM,UAAU,GAAG,kFAAkF,UAAU,OAAO,CAAA;IAEtH,uCAAuC;IACvC,IAAI,SAA4B,CAAA;IAChC,IAAI,MAAwD,CAAA;IAC5D,KAAK,EAAE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClD,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;QAC7B,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACV,OAAO,CAAC,IAAI,CAAC,qBAAqB,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,gCAAgC,OAAO,EAAE,CAAC,CAAA;QAC/F,CAAC;QACD,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAA;QAChC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,iBAAiB,EAAE,OAAO,EAAE,EAAE,CAAC;YAC9D,IAAI,CAAC;gBACH,MAAM,GAAG,MAAM,KAAK,CAAC,eAAe,CAAC,UAAU,CAAC,CAAA;gBAChD,MAAM,KAAK,CAAA;YACb,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAA;gBAC/D,IAAI,CAAC,qBAAqB,CAAC,SAAS,CAAC;oBAAE,MAAM,SAAS,CAAA;gBACtD,IAAI,OAAO,GAAG,iBAAiB,EAAE,CAAC;oBAChC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC,CAAA;oBAC1D,OAAO,CAAC,IAAI,CAAC,qBAAqB,OAAO,YAAY,OAAO,GAAG,CAAC,YAAY,SAAS,CAAC,OAAO,kBAAkB,OAAO,IAAI,CAAC,CAAA;oBAC3H,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAA;gBAClD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IACD,IAAI,CAAC,MAAM;QAAE,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAA;IAEnF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,CAG/C,CAAA;IAED,OAAO;QACL,aAAa,EAAE,aAAa,CAAC,MAAM,CAAC;QACpC,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,EAAE;QAC/B,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;KAC9B,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import type { PersonaConfig, ChatInput, PromptedTurnInput, ChatAttachment, PromptMode, PromptOrigin } from '../types.js';
|
|
2
|
+
export interface DumpedPrompt {
|
|
3
|
+
/** The assembled system prompt — exactly what the LLM sees as its system role. */
|
|
4
|
+
systemPrompt: string;
|
|
5
|
+
/** The user/assistant history in order. */
|
|
6
|
+
history: Array<{
|
|
7
|
+
role: 'user' | 'assistant';
|
|
8
|
+
content: string;
|
|
9
|
+
}>;
|
|
10
|
+
/** The current user message (or the intent/guidelines for prompted turns). */
|
|
11
|
+
message: string;
|
|
12
|
+
/** Image attachments, when present. */
|
|
13
|
+
attachments?: ChatAttachment[];
|
|
14
|
+
/** Response schema passed to the LLM, when relevant. */
|
|
15
|
+
responseSchema?: Record<string, unknown>;
|
|
16
|
+
/** Which mode produced this prompt ('chat' | 'prompted-turn' | 'retrospect'). */
|
|
17
|
+
mode: DumpMode;
|
|
18
|
+
/** Which prompt scaffold the model actually received. */
|
|
19
|
+
promptMode: PromptMode;
|
|
20
|
+
/** Whether the current message originated with a person/user or the app/runtime. */
|
|
21
|
+
promptOrigin: PromptOrigin;
|
|
22
|
+
/**
|
|
23
|
+
* A single-string artifact ready for pasting into an AI reviewer,
|
|
24
|
+
* attaching to a bug report, diffing across runs, etc. Includes
|
|
25
|
+
* clearly-labeled section headers.
|
|
26
|
+
*/
|
|
27
|
+
artifact: string;
|
|
28
|
+
}
|
|
29
|
+
export type DumpMode = 'chat' | 'prompted-turn' | 'retrospect';
|
|
30
|
+
export interface DumpPromptOptions {
|
|
31
|
+
/**
|
|
32
|
+
* Which assembly path to use. Default 'chat' — matches engine.chat().
|
|
33
|
+
* Use 'prompted-turn' for AI-initiated turns (greetings, proactive).
|
|
34
|
+
*/
|
|
35
|
+
mode?: DumpMode;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Assemble the exact prompt this persona would send for one input, and
|
|
39
|
+
* return it in a shape friendly for human or AI review.
|
|
40
|
+
*
|
|
41
|
+
* Use this during the debugging loop when auditPersona findings need
|
|
42
|
+
* context, or when you want a second opinion from another AI on the
|
|
43
|
+
* actual packet.
|
|
44
|
+
*/
|
|
45
|
+
export declare function dumpPromptForReview(config: PersonaConfig, input: ChatInput | PromptedTurnInput, options?: DumpPromptOptions): DumpedPrompt;
|
|
46
|
+
export interface PromptTraceRecorderOptions {
|
|
47
|
+
/** Directory to write per-turn trace JSONs into. Created if missing. */
|
|
48
|
+
outDir: string;
|
|
49
|
+
/**
|
|
50
|
+
* Optional namespace/subgroup within outDir — useful when running
|
|
51
|
+
* multiple harnesses against one persona in parallel. Falls under
|
|
52
|
+
* `<outDir>/<traceGroup>/` when set.
|
|
53
|
+
*/
|
|
54
|
+
traceGroup?: string;
|
|
55
|
+
/**
|
|
56
|
+
* Optional tag written into each trace record. Pass e.g. the
|
|
57
|
+
* transport/provider name so downstream review tooling can filter.
|
|
58
|
+
*/
|
|
59
|
+
tag?: string;
|
|
60
|
+
/**
|
|
61
|
+
* Format written per turn. Default 'json' for programmatic analysis.
|
|
62
|
+
* 'artifact' writes the human-readable single-string version instead.
|
|
63
|
+
* 'both' writes two files per turn (.json + .txt).
|
|
64
|
+
*/
|
|
65
|
+
format?: 'json' | 'artifact' | 'both';
|
|
66
|
+
}
|
|
67
|
+
export interface PromptTraceRecorder {
|
|
68
|
+
/**
|
|
69
|
+
* Attach to `runAutonomousLoop`'s `hooks.onBeforeChat` — the recorder
|
|
70
|
+
* receives the assembled request and writes a per-turn file.
|
|
71
|
+
*/
|
|
72
|
+
onBeforeChat: (info: {
|
|
73
|
+
request: {
|
|
74
|
+
systemPrompt: string;
|
|
75
|
+
message: string;
|
|
76
|
+
history: Array<{
|
|
77
|
+
role: 'user' | 'assistant';
|
|
78
|
+
content: string;
|
|
79
|
+
}>;
|
|
80
|
+
responseSchema?: Record<string, unknown>;
|
|
81
|
+
attachments?: ChatAttachment[];
|
|
82
|
+
promptMode?: PromptMode;
|
|
83
|
+
};
|
|
84
|
+
turn: number;
|
|
85
|
+
attempt: number;
|
|
86
|
+
}) => void;
|
|
87
|
+
/**
|
|
88
|
+
* Record a standalone dump without going through autonomous-loop.
|
|
89
|
+
* Useful when a benchmark harness is hand-rolled.
|
|
90
|
+
*/
|
|
91
|
+
record: (input: {
|
|
92
|
+
turn: number;
|
|
93
|
+
attempt?: number;
|
|
94
|
+
phase?: 'initial' | 'followup';
|
|
95
|
+
dumped: DumpedPrompt;
|
|
96
|
+
}) => void;
|
|
97
|
+
/**
|
|
98
|
+
* Append post-model/post-action evidence to the existing turn JSON.
|
|
99
|
+
* This is intentionally developer-facing audit data: it records the raw
|
|
100
|
+
* response, parsed actions, execution outcomes, and resulting history so a
|
|
101
|
+
* failed action can be debugged from artifacts alone.
|
|
102
|
+
*/
|
|
103
|
+
recordTurnResult: (input: PromptTraceResultInput) => void;
|
|
104
|
+
/** Directory where traces are being written. */
|
|
105
|
+
outDir: string;
|
|
106
|
+
}
|
|
107
|
+
export interface PromptTraceResultInput {
|
|
108
|
+
turn: number;
|
|
109
|
+
attempt?: number;
|
|
110
|
+
phase?: 'initial' | 'followup';
|
|
111
|
+
rawResponse?: string | null;
|
|
112
|
+
message?: string;
|
|
113
|
+
trace?: unknown;
|
|
114
|
+
actions?: unknown[];
|
|
115
|
+
actionResults?: unknown[];
|
|
116
|
+
historyAfterTurn?: Array<{
|
|
117
|
+
role: string;
|
|
118
|
+
content: string;
|
|
119
|
+
}>;
|
|
120
|
+
nextMessage?: string | null;
|
|
121
|
+
notes?: string[];
|
|
122
|
+
}
|
|
123
|
+
export declare function createPromptTraceRecorder(options: PromptTraceRecorderOptions): PromptTraceRecorder;
|
|
124
|
+
declare function formatAsArtifact(input: {
|
|
125
|
+
systemPrompt: string;
|
|
126
|
+
history: Array<{
|
|
127
|
+
role: 'user' | 'assistant';
|
|
128
|
+
content: string;
|
|
129
|
+
}>;
|
|
130
|
+
message: string;
|
|
131
|
+
mode: DumpMode;
|
|
132
|
+
promptMode?: PromptMode;
|
|
133
|
+
promptOrigin?: PromptOrigin;
|
|
134
|
+
attachments?: ChatAttachment[];
|
|
135
|
+
}): string;
|
|
136
|
+
export { formatAsArtifact };
|
|
137
|
+
//# sourceMappingURL=prompt-dump.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompt-dump.d.ts","sourceRoot":"","sources":["../../src/audit/prompt-dump.ts"],"names":[],"mappings":"AA0BA,OAAO,KAAK,EACV,aAAa,EACb,SAAS,EACT,iBAAiB,EAEjB,cAAc,EACd,UAAU,EACV,YAAY,EACb,MAAM,aAAa,CAAA;AAQpB,MAAM,WAAW,YAAY;IAC3B,kFAAkF;IAClF,YAAY,EAAE,MAAM,CAAA;IACpB,2CAA2C;IAC3C,OAAO,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC/D,8EAA8E;IAC9E,OAAO,EAAE,MAAM,CAAA;IACf,uCAAuC;IACvC,WAAW,CAAC,EAAE,cAAc,EAAE,CAAA;IAC9B,wDAAwD;IACxD,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IACxC,iFAAiF;IACjF,IAAI,EAAE,QAAQ,CAAA;IACd,yDAAyD;IACzD,UAAU,EAAE,UAAU,CAAA;IACtB,oFAAoF;IACpF,YAAY,EAAE,YAAY,CAAA;IAC1B;;;;OAIG;IACH,QAAQ,EAAE,MAAM,CAAA;CACjB;AAED,MAAM,MAAM,QAAQ,GAAG,MAAM,GAAG,eAAe,GAAG,YAAY,CAAA;AAE9D,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,IAAI,CAAC,EAAE,QAAQ,CAAA;CAChB;AAID;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CACjC,MAAM,EAAE,aAAa,EACrB,KAAK,EAAE,SAAS,GAAG,iBAAiB,EACpC,OAAO,CAAC,EAAE,iBAAiB,GAC1B,YAAY,CA+Cd;AAID,MAAM,WAAW,0BAA0B;IACzC,wEAAwE;IACxE,MAAM,EAAE,MAAM,CAAA;IACd;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ;;;;OAIG;IACH,MAAM,CAAC,EAAE,MAAM,GAAG,UAAU,GAAG,MAAM,CAAA;CACtC;AAED,MAAM,WAAW,mBAAmB;IAClC;;;OAGG;IACH,YAAY,EAAE,CAAC,IAAI,EAAE;QACnB,OAAO,EAAE;YACP,YAAY,EAAE,MAAM,CAAA;YACpB,OAAO,EAAE,MAAM,CAAA;YACf,OAAO,EAAE,KAAK,CAAC;gBAAE,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;gBAAC,OAAO,EAAE,MAAM,CAAA;aAAE,CAAC,CAAA;YAC/D,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;YACxC,WAAW,CAAC,EAAE,cAAc,EAAE,CAAA;YAC9B,UAAU,CAAC,EAAE,UAAU,CAAA;SACxB,CAAA;QACD,IAAI,EAAE,MAAM,CAAA;QACZ,OAAO,EAAE,MAAM,CAAA;KAChB,KAAK,IAAI,CAAA;IACV;;;OAGG;IACH,MAAM,EAAE,CAAC,KAAK,EAAE;QACd,IAAI,EAAE,MAAM,CAAA;QACZ,OAAO,CAAC,EAAE,MAAM,CAAA;QAChB,KAAK,CAAC,EAAE,SAAS,GAAG,UAAU,CAAA;QAC9B,MAAM,EAAE,YAAY,CAAA;KACrB,KAAK,IAAI,CAAA;IACV;;;;;OAKG;IACH,gBAAgB,EAAE,CAAC,KAAK,EAAE,sBAAsB,KAAK,IAAI,CAAA;IACzD,gDAAgD;IAChD,MAAM,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,SAAS,GAAG,UAAU,CAAA;IAC9B,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,OAAO,CAAA;IACf,OAAO,CAAC,EAAE,OAAO,EAAE,CAAA;IACnB,aAAa,CAAC,EAAE,OAAO,EAAE,CAAA;IACzB,gBAAgB,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC3D,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IAC3B,KAAK,CAAC,EAAE,MAAM,EAAE,CAAA;CACjB;AAID,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,0BAA0B,GAClC,mBAAmB,CAmHrB;AAyBD,iBAAS,gBAAgB,CAAC,KAAK,EAAE;IAC/B,YAAY,EAAE,MAAM,CAAA;IACpB,OAAO,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC/D,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,QAAQ,CAAA;IACd,UAAU,CAAC,EAAE,UAAU,CAAA;IACvB,YAAY,CAAC,EAAE,YAAY,CAAA;IAC3B,WAAW,CAAC,EAAE,cAAc,EAAE,CAAA;CAC/B,GAAG,MAAM,CAwDT;AAWD,OAAO,EAAE,gBAAgB,EAAE,CAAA"}
|