onion-check 2.0.2 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/generators/alerts.d.ts +13 -0
- package/dist/generators/alerts.d.ts.map +1 -0
- package/dist/generators/alerts.js +283 -0
- package/dist/generators/alerts.js.map +1 -0
- package/dist/generators/goal.js +1 -1
- package/dist/generators/index.js +3 -3
- package/dist/generators/index.js.map +1 -1
- package/dist/generators/layer-docs.d.ts.map +1 -1
- package/dist/generators/layer-docs.js +7 -2
- package/dist/generators/layer-docs.js.map +1 -1
- package/dist/generators/operational-docs.d.ts +5 -0
- package/dist/generators/operational-docs.d.ts.map +1 -0
- package/dist/generators/operational-docs.js +1169 -0
- package/dist/generators/operational-docs.js.map +1 -0
- package/dist/generators/roadmap.d.ts +4 -0
- package/dist/generators/roadmap.d.ts.map +1 -0
- package/dist/generators/roadmap.js +356 -0
- package/dist/generators/roadmap.js.map +1 -0
- package/dist/history/status.d.ts.map +1 -1
- package/dist/history/status.js +48 -0
- package/dist/history/status.js.map +1 -1
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/interactive/diagnostic.d.ts.map +1 -1
- package/dist/interactive/diagnostic.js +86 -11
- package/dist/interactive/diagnostic.js.map +1 -1
- package/dist/scaffold/generator.js +1 -1
- package/dist/scaffold/generator.js.map +1 -1
- package/dist/scaffold/templates.js +11 -11
- package/dist/scanner/reporter.js +1 -1
- package/dist/scanner/reporter.js.map +1 -1
- package/dist/scanner/scorer.js +3 -3
- package/dist/scanner/scorer.js.map +1 -1
- package/package.json +2 -2
|
@@ -0,0 +1,1169 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Layer name map (for dependency descriptions)
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
const LAYER_NAMES = {
|
|
5
|
+
1: 'Model',
|
|
6
|
+
2: 'Context',
|
|
7
|
+
3: 'Skills',
|
|
8
|
+
4: 'Tools',
|
|
9
|
+
5: 'Orchestration',
|
|
10
|
+
6: 'Security',
|
|
11
|
+
7: 'Governance',
|
|
12
|
+
8: 'Evaluation',
|
|
13
|
+
};
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Operational Document Generator
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
export function generateOperationalDoc(layerNumber, result, context, history) {
|
|
18
|
+
switch (layerNumber) {
|
|
19
|
+
case 1: return generateModelOps(result, context, history);
|
|
20
|
+
case 2: return generateContextOps(result, context, history);
|
|
21
|
+
case 3: return generateSkillsOps(result, context, history);
|
|
22
|
+
case 4: return generateToolsOps(result, context, history);
|
|
23
|
+
case 5: return generateOrchestrationOps(result, context, history);
|
|
24
|
+
case 6: return generateSecurityOps(result, context, history);
|
|
25
|
+
case 7: return generateGovernanceOps(result, context, history);
|
|
26
|
+
case 8: return generateEvaluationOps(result, context, history);
|
|
27
|
+
default: return '';
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Shared helpers
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
function header(layerNumber, name, result, context) {
|
|
34
|
+
const date = new Date().toISOString().split('T')[0];
|
|
35
|
+
return [
|
|
36
|
+
`# Layer ${layerNumber}: ${name} — Operational Guide`,
|
|
37
|
+
'',
|
|
38
|
+
`> Project: ${context.typeLabel}`,
|
|
39
|
+
`> Type: ${context.typeLabel} | Stack: ${context.stackLabel}`,
|
|
40
|
+
`> Scale: ${context.scaleLabel} | Risk: ${context.riskLabel}`,
|
|
41
|
+
context.industry ? `> Industry: ${context.industry}` : null,
|
|
42
|
+
`> Last updated: ${date}`,
|
|
43
|
+
`> Score: ${result.contentScore}/100`,
|
|
44
|
+
'',
|
|
45
|
+
].filter(l => l !== null).join('\n');
|
|
46
|
+
}
|
|
47
|
+
function currentState(result) {
|
|
48
|
+
const lines = [
|
|
49
|
+
'## Current State',
|
|
50
|
+
'',
|
|
51
|
+
];
|
|
52
|
+
if (result.detected) {
|
|
53
|
+
lines.push(`This layer is **active** with a quality score of ${result.contentScore}/100.`);
|
|
54
|
+
lines.push('');
|
|
55
|
+
if (result.evidence.length > 0) {
|
|
56
|
+
lines.push('**Evidence found:**');
|
|
57
|
+
for (const ev of result.evidence) {
|
|
58
|
+
lines.push(`- ${ev}`);
|
|
59
|
+
}
|
|
60
|
+
lines.push('');
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
lines.push('This layer is **not detected**. No evidence of implementation found.');
|
|
65
|
+
lines.push('');
|
|
66
|
+
lines.push(result.layer.gapAnalysis);
|
|
67
|
+
lines.push('');
|
|
68
|
+
}
|
|
69
|
+
return lines.join('\n');
|
|
70
|
+
}
|
|
71
|
+
function changeLog(history, layerNumber) {
|
|
72
|
+
const lines = [
|
|
73
|
+
'## Change Log',
|
|
74
|
+
'',
|
|
75
|
+
];
|
|
76
|
+
if (!history || history.length === 0) {
|
|
77
|
+
lines.push('No scan history available yet. Run `npx onion-check --status` periodically to build a change log.');
|
|
78
|
+
lines.push('');
|
|
79
|
+
return lines.join('\n');
|
|
80
|
+
}
|
|
81
|
+
// Show last 10 entries where this layer changed
|
|
82
|
+
const relevant = [];
|
|
83
|
+
for (let i = 1; i < history.length && relevant.length < 10; i++) {
|
|
84
|
+
const prev = history[i - 1];
|
|
85
|
+
const curr = history[i];
|
|
86
|
+
const prevLayer = prev.layers.find(l => l.number === layerNumber);
|
|
87
|
+
const currLayer = curr.layers.find(l => l.number === layerNumber);
|
|
88
|
+
if (!prevLayer || !currLayer)
|
|
89
|
+
continue;
|
|
90
|
+
if (!prevLayer.detected && currLayer.detected) {
|
|
91
|
+
relevant.push({ date: curr.timestamp.split('T')[0], change: `Layer added (score: ${currLayer.contentScore}/100)` });
|
|
92
|
+
}
|
|
93
|
+
else if (prevLayer.detected && !currLayer.detected) {
|
|
94
|
+
relevant.push({ date: curr.timestamp.split('T')[0], change: 'Layer lost' });
|
|
95
|
+
}
|
|
96
|
+
else if (prevLayer.contentScore !== currLayer.contentScore) {
|
|
97
|
+
const delta = currLayer.contentScore - prevLayer.contentScore;
|
|
98
|
+
const sign = delta > 0 ? '+' : '';
|
|
99
|
+
relevant.push({ date: curr.timestamp.split('T')[0], change: `Score: ${prevLayer.contentScore} -> ${currLayer.contentScore} (${sign}${delta})` });
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
if (relevant.length === 0) {
|
|
103
|
+
lines.push('No changes recorded for this layer across scan history.');
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
lines.push('| Date | Change |');
|
|
107
|
+
lines.push('|------|--------|');
|
|
108
|
+
for (const entry of relevant) {
|
|
109
|
+
lines.push(`| ${entry.date} | ${entry.change} |`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
lines.push('');
|
|
113
|
+
return lines.join('\n');
|
|
114
|
+
}
|
|
115
|
+
function footer() {
|
|
116
|
+
return [
|
|
117
|
+
'---',
|
|
118
|
+
'*Living document — updated by `npx onion-check`. Do not delete.*',
|
|
119
|
+
'',
|
|
120
|
+
].join('\n');
|
|
121
|
+
}
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
// Layer 1: Model
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
function generateModelOps(result, ctx, history) {
|
|
126
|
+
const lines = [];
|
|
127
|
+
lines.push(header(1, 'Model', result, ctx));
|
|
128
|
+
// Purpose
|
|
129
|
+
lines.push('## Purpose');
|
|
130
|
+
lines.push('');
|
|
131
|
+
switch (ctx.type) {
|
|
132
|
+
case 'saas':
|
|
133
|
+
lines.push('The model layer is your product\'s AI engine. Every user-facing AI feature depends on this layer being properly configured, cost-efficient, and reliable. For a SaaS product, model reliability directly impacts user retention — if the AI feels slow or inconsistent, users churn.');
|
|
134
|
+
break;
|
|
135
|
+
case 'agency':
|
|
136
|
+
lines.push('The model layer powers every piece of AI-generated client work. For agency operations, model consistency matters more than capability — clients notice when output quality fluctuates between deliverables. One model, configured well, beats three models configured loosely.');
|
|
137
|
+
break;
|
|
138
|
+
case 'content':
|
|
139
|
+
lines.push('The model layer generates your content. For a content platform, model selection directly impacts voice consistency, creative range, and production speed. The wrong model produces technically correct content that reads like every other AI-generated piece on the internet.');
|
|
140
|
+
break;
|
|
141
|
+
case 'support':
|
|
142
|
+
lines.push('The model layer handles customer interactions. For support systems, accuracy and tone are non-negotiable — a wrong answer costs trust, and a robotic tone costs goodwill. The model must be reliable under load and consistent across thousands of conversations.');
|
|
143
|
+
break;
|
|
144
|
+
default:
|
|
145
|
+
lines.push(`The model layer is the foundation of your AI system. For your ${ctx.typeLabel.toLowerCase()}, this is where compute meets capability. Everything above this layer — context, skills, tools — depends on the model being properly configured and accessible.`);
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
lines.push('');
|
|
149
|
+
// Current State
|
|
150
|
+
lines.push(currentState(result));
|
|
151
|
+
// Operating Rules
|
|
152
|
+
lines.push('## Operating Rules');
|
|
153
|
+
lines.push('');
|
|
154
|
+
lines.push('1. All model API calls must go through a centralized client — no direct SDK imports scattered across the codebase.');
|
|
155
|
+
if (ctx.stack === 'node') {
|
|
156
|
+
lines.push('2. Use `@anthropic-ai/sdk` or `@ai-sdk/anthropic` for TypeScript. Pick one and standardize.');
|
|
157
|
+
}
|
|
158
|
+
else if (ctx.stack === 'python') {
|
|
159
|
+
lines.push('2. Use the official `anthropic` Python package. Avoid raw HTTP calls.');
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
lines.push('2. Use official SDKs for your stack. Official SDKs handle retries, rate limits, and streaming correctly.');
|
|
163
|
+
}
|
|
164
|
+
lines.push('3. API keys must be in `.env`, never in source code. The `.env.example` must document every required key.');
|
|
165
|
+
lines.push('4. Model fallback: if the primary model is unavailable, degrade gracefully — never expose raw API errors to users.');
|
|
166
|
+
if (ctx.scale === 'mid' || ctx.scale === 'enterprise') {
|
|
167
|
+
lines.push('5. Implement model routing: cheap models for simple tasks (classification, extraction), expensive models for complex reasoning.');
|
|
168
|
+
lines.push('6. Log token usage per request for cost monitoring and optimization.');
|
|
169
|
+
}
|
|
170
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
171
|
+
lines.push(`${ctx.scale === 'mid' || ctx.scale === 'enterprise' ? '7' : '5'}. Rate limiting must be enforced at the application level, not just relied upon from the API provider.`);
|
|
172
|
+
}
|
|
173
|
+
lines.push('');
|
|
174
|
+
// What's Approved
|
|
175
|
+
lines.push('## What\'s Approved');
|
|
176
|
+
lines.push('');
|
|
177
|
+
if (result.detected && result.evidence.length > 0) {
|
|
178
|
+
lines.push('Based on scan detection:');
|
|
179
|
+
for (const ev of result.evidence) {
|
|
180
|
+
lines.push(`- ${ev}`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
else {
|
|
184
|
+
lines.push('No model configuration detected yet. After setup, this section will list approved models and SDKs.');
|
|
185
|
+
}
|
|
186
|
+
lines.push('');
|
|
187
|
+
// How to Extend
|
|
188
|
+
lines.push('## How to Extend');
|
|
189
|
+
lines.push('');
|
|
190
|
+
lines.push('### Adding a new model provider');
|
|
191
|
+
if (ctx.stack === 'node') {
|
|
192
|
+
lines.push('1. Add the SDK to `package.json` (`npm install @anthropic-ai/sdk`)');
|
|
193
|
+
lines.push('2. Add the API key to `.env` and `.env.example`');
|
|
194
|
+
lines.push('3. Create a client wrapper in `lib/ai/` or `src/ai/`');
|
|
195
|
+
lines.push('4. Update model routing config if using multi-model setup');
|
|
196
|
+
}
|
|
197
|
+
else if (ctx.stack === 'python') {
|
|
198
|
+
lines.push('1. Add the SDK to `requirements.txt` or `pyproject.toml`');
|
|
199
|
+
lines.push('2. Add the API key to `.env` and `.env.example`');
|
|
200
|
+
lines.push('3. Create a client wrapper in `lib/ai/` or `src/ai/`');
|
|
201
|
+
lines.push('4. Update model routing config if using multi-model setup');
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
lines.push('1. Add the SDK to your dependency manager');
|
|
205
|
+
lines.push('2. Add the API key to `.env` and `.env.example`');
|
|
206
|
+
lines.push('3. Create a client wrapper module');
|
|
207
|
+
lines.push('4. Update model routing config if using multi-model setup');
|
|
208
|
+
}
|
|
209
|
+
lines.push('5. Run `npx onion-check --status` to verify Layer 1 score improved');
|
|
210
|
+
lines.push('');
|
|
211
|
+
// Quality Standards
|
|
212
|
+
lines.push('## Quality Standards');
|
|
213
|
+
lines.push('');
|
|
214
|
+
if (ctx.scale === 'solo') {
|
|
215
|
+
lines.push('- One model, properly configured, with API key in `.env`');
|
|
216
|
+
lines.push('- `.env.example` documents all required keys');
|
|
217
|
+
lines.push('- Graceful error handling on API failures');
|
|
218
|
+
}
|
|
219
|
+
else {
|
|
220
|
+
lines.push('- Centralized model client with no direct SDK imports in business logic');
|
|
221
|
+
lines.push('- `.env.example` documents all required keys with descriptions');
|
|
222
|
+
lines.push('- Graceful fallback on model unavailability');
|
|
223
|
+
lines.push('- Response latency monitoring');
|
|
224
|
+
lines.push('- Token usage tracking for cost visibility');
|
|
225
|
+
}
|
|
226
|
+
lines.push('');
|
|
227
|
+
// Dependencies
|
|
228
|
+
lines.push('## Dependencies');
|
|
229
|
+
lines.push('');
|
|
230
|
+
lines.push('- **Requires:** Nothing — this is the foundation layer');
|
|
231
|
+
lines.push('- **Affects:** Every layer above depends on model availability');
|
|
232
|
+
lines.push('- **Alert:** If the model config changes, verify all skills and context still produce expected output');
|
|
233
|
+
lines.push('');
|
|
234
|
+
// Growth Path
|
|
235
|
+
lines.push('## Growth Path');
|
|
236
|
+
lines.push('');
|
|
237
|
+
if (!result.detected) {
|
|
238
|
+
lines.push('### Now (immediate)');
|
|
239
|
+
lines.push('Configure at least one model with API keys in `.env`. This is step zero.');
|
|
240
|
+
lines.push('');
|
|
241
|
+
lines.push('### 30 days');
|
|
242
|
+
lines.push('Centralized client wrapper, `.env.example` with all keys documented, basic error handling.');
|
|
243
|
+
lines.push('');
|
|
244
|
+
lines.push('### 90 days');
|
|
245
|
+
if (ctx.scale === 'solo') {
|
|
246
|
+
lines.push('Reliable model integration with monitoring and graceful degradation.');
|
|
247
|
+
}
|
|
248
|
+
else {
|
|
249
|
+
lines.push('Multi-model routing by task complexity, cost monitoring dashboard, automatic fallback on failures.');
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
else if (result.contentScore < 50) {
|
|
253
|
+
lines.push('### Now (immediate)');
|
|
254
|
+
lines.push('Centralize model access through a single client wrapper. Remove any scattered direct SDK imports.');
|
|
255
|
+
lines.push('');
|
|
256
|
+
lines.push('### 30 days');
|
|
257
|
+
lines.push('Add `.env.example`, implement graceful error handling, add basic latency logging.');
|
|
258
|
+
lines.push('');
|
|
259
|
+
lines.push('### 90 days');
|
|
260
|
+
lines.push('Token usage tracking, cost alerts, and model routing if processing volume warrants it.');
|
|
261
|
+
}
|
|
262
|
+
else {
|
|
263
|
+
lines.push('### Now (immediate)');
|
|
264
|
+
lines.push('Layer is solid. Review for any cost optimization opportunities.');
|
|
265
|
+
lines.push('');
|
|
266
|
+
lines.push('### 30 days');
|
|
267
|
+
lines.push('Add model performance benchmarks to catch regressions when providers update models.');
|
|
268
|
+
lines.push('');
|
|
269
|
+
lines.push('### 90 days');
|
|
270
|
+
lines.push('Full observability: latency percentiles, token costs per feature, quality metrics per model version.');
|
|
271
|
+
}
|
|
272
|
+
lines.push('');
|
|
273
|
+
lines.push(changeLog(history, 1));
|
|
274
|
+
lines.push(footer());
|
|
275
|
+
return lines.join('\n');
|
|
276
|
+
}
|
|
277
|
+
// ---------------------------------------------------------------------------
|
|
278
|
+
// Layer 2: Context
|
|
279
|
+
// ---------------------------------------------------------------------------
|
|
280
|
+
function generateContextOps(result, ctx, history) {
|
|
281
|
+
const lines = [];
|
|
282
|
+
lines.push(header(2, 'Context', result, ctx));
|
|
283
|
+
// Purpose
|
|
284
|
+
lines.push('## Purpose');
|
|
285
|
+
lines.push('');
|
|
286
|
+
switch (ctx.type) {
|
|
287
|
+
case 'agency':
|
|
288
|
+
lines.push('The context layer encodes everything a senior account manager knows about each client — their preferences, communication style, pet peeves, and institutional knowledge. Without it, every AI-generated deliverable sounds like it was written by a stranger who read the brief but never met the client.');
|
|
289
|
+
break;
|
|
290
|
+
case 'saas':
|
|
291
|
+
lines.push('The context layer is what makes your AI feel like part of your product instead of a generic chatbot bolted on. It encodes your product domain, user personas, quality standards, and the specific language your users expect. Generic AI output is the number one reason users churn from AI-powered features.');
|
|
292
|
+
break;
|
|
293
|
+
case 'content':
|
|
294
|
+
lines.push('The context layer is your editorial DNA. It encodes voice, tone, audience expectations, and the specific standards that separate your content from everything else. Without it, every piece reads like it could have come from any AI tool — technically competent and completely forgettable.');
|
|
295
|
+
break;
|
|
296
|
+
case 'support':
|
|
297
|
+
lines.push('The context layer gives your support AI the product knowledge and communication standards that a senior support agent carries. Without it, responses are technically accurate but feel robotic — they answer the question but miss the customer\'s actual frustration.');
|
|
298
|
+
break;
|
|
299
|
+
default:
|
|
300
|
+
lines.push(`The context layer turns your AI from a smart generalist into a specialist that understands your ${ctx.typeLabel.toLowerCase()}. It encodes business knowledge, domain expertise, and quality standards that make every output feel like it came from someone who actually knows your operation.`);
|
|
301
|
+
break;
|
|
302
|
+
}
|
|
303
|
+
lines.push('');
|
|
304
|
+
lines.push(currentState(result));
|
|
305
|
+
// Operating Rules
|
|
306
|
+
lines.push('## Operating Rules');
|
|
307
|
+
lines.push('');
|
|
308
|
+
lines.push('1. A `CLAUDE.md` or equivalent system prompt must exist at the project root with business-specific instructions.');
|
|
309
|
+
lines.push('2. Context documents must be reviewed and updated at least bi-weekly — stale context produces stale output.');
|
|
310
|
+
if (ctx.type === 'agency') {
|
|
311
|
+
lines.push('3. Each client must have their own context file or section — shared generic context produces generic work.');
|
|
312
|
+
lines.push('4. Client preferences discovered during work must be documented in context within the same session.');
|
|
313
|
+
}
|
|
314
|
+
else if (ctx.type === 'saas') {
|
|
315
|
+
lines.push('3. Product terminology and user personas must be documented — the AI must speak your product\'s language.');
|
|
316
|
+
lines.push('4. Error messages and edge case handling must be specified in context, not left to model defaults.');
|
|
317
|
+
}
|
|
318
|
+
else if (ctx.type === 'content') {
|
|
319
|
+
lines.push('3. Voice profile must be documented with specific examples, not just adjectives like "professional" or "friendly."');
|
|
320
|
+
lines.push('4. Platform-specific variations (blog vs. social vs. email) must be encoded separately.');
|
|
321
|
+
}
|
|
322
|
+
else {
|
|
323
|
+
lines.push('3. Domain-specific terminology must be documented — the AI must use your organization\'s language.');
|
|
324
|
+
lines.push('4. Quality standards must be explicit with examples of good and bad output.');
|
|
325
|
+
}
|
|
326
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
327
|
+
lines.push('5. Context must never contain actual credentials, API keys, or PII — only references to where they\'re stored.');
|
|
328
|
+
}
|
|
329
|
+
lines.push('');
|
|
330
|
+
// What's Approved
|
|
331
|
+
lines.push('## What\'s Approved');
|
|
332
|
+
lines.push('');
|
|
333
|
+
if (result.detected && result.evidence.length > 0) {
|
|
334
|
+
lines.push('Detected context sources:');
|
|
335
|
+
for (const ev of result.evidence) {
|
|
336
|
+
lines.push(`- ${ev}`);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
else {
|
|
340
|
+
lines.push('No context layer detected. After setup, this section will list all context sources and their refresh schedule.');
|
|
341
|
+
}
|
|
342
|
+
lines.push('');
|
|
343
|
+
// How to Extend
|
|
344
|
+
lines.push('## How to Extend');
|
|
345
|
+
lines.push('');
|
|
346
|
+
lines.push('### Adding new business context');
|
|
347
|
+
lines.push('1. Create or update `CLAUDE.md` at the project root');
|
|
348
|
+
lines.push('2. Add domain knowledge to a `context/` or `knowledge/` directory');
|
|
349
|
+
lines.push('3. Include specific examples — "write like this, not like that" is 10x more useful than adjectives');
|
|
350
|
+
lines.push('4. Add reference documents that the AI should know about');
|
|
351
|
+
lines.push('5. Run `npx onion-check --status` to verify Layer 2 score improved');
|
|
352
|
+
lines.push('');
|
|
353
|
+
if (ctx.type === 'agency') {
|
|
354
|
+
lines.push('### Adding a new client\'s context');
|
|
355
|
+
lines.push('1. Create `context/clients/{client-name}/` directory');
|
|
356
|
+
lines.push('2. Add `preferences.md` with communication style, terminology, and known constraints');
|
|
357
|
+
lines.push('3. Add `history.md` with project history and lessons learned');
|
|
358
|
+
lines.push('4. Reference the client context directory in `CLAUDE.md`');
|
|
359
|
+
lines.push('');
|
|
360
|
+
}
|
|
361
|
+
// Quality Standards
|
|
362
|
+
lines.push('## Quality Standards');
|
|
363
|
+
lines.push('');
|
|
364
|
+
lines.push('- Context produces output that a domain expert would recognize as informed');
|
|
365
|
+
lines.push('- New team members can read the context and understand the AI\'s operating environment');
|
|
366
|
+
lines.push('- Context is specific enough to differentiate output from generic model responses');
|
|
367
|
+
lines.push('- No stale information older than 30 days without explicit review');
|
|
368
|
+
lines.push('');
|
|
369
|
+
// Dependencies
|
|
370
|
+
lines.push('## Dependencies');
|
|
371
|
+
lines.push('');
|
|
372
|
+
lines.push('- **Requires:** Layer 1 (Model) — context is useless without a model to consume it');
|
|
373
|
+
lines.push('- **Affects:** Layer 3 (Skills) — skills operate within the context\'s domain boundaries');
|
|
374
|
+
lines.push('- **Affects:** Layer 5 (Orchestration) — agents need context to make routing decisions');
|
|
375
|
+
lines.push('- **Alert:** When context changes, verify that skills still produce expected output quality');
|
|
376
|
+
lines.push('');
|
|
377
|
+
// Growth Path
|
|
378
|
+
lines.push('## Growth Path');
|
|
379
|
+
lines.push('');
|
|
380
|
+
if (!result.detected) {
|
|
381
|
+
lines.push('### Now (immediate)');
|
|
382
|
+
lines.push('Create a `CLAUDE.md` at the project root. Start with: what this project does, who it serves, what "good output" looks like, and any terminology that differs from industry standard.');
|
|
383
|
+
lines.push('');
|
|
384
|
+
lines.push('### 30 days');
|
|
385
|
+
lines.push('Structured context directory with domain knowledge, quality examples, and reference documents.');
|
|
386
|
+
lines.push('');
|
|
387
|
+
lines.push('### 90 days');
|
|
388
|
+
lines.push('Living context system with regular review cycles, version-tracked changes, and measurable impact on output quality.');
|
|
389
|
+
}
|
|
390
|
+
else if (result.contentScore < 50) {
|
|
391
|
+
lines.push('### Now (immediate)');
|
|
392
|
+
lines.push('Expand existing context with specific examples and domain terminology. Generic instructions like "be professional" don\'t count.');
|
|
393
|
+
lines.push('');
|
|
394
|
+
lines.push('### 30 days');
|
|
395
|
+
lines.push('Add structured reference documents covering common scenarios, edge cases, and quality benchmarks.');
|
|
396
|
+
lines.push('');
|
|
397
|
+
lines.push('### 90 days');
|
|
398
|
+
lines.push('Context review process integrated into sprint cycles. Every major product change triggers a context update.');
|
|
399
|
+
}
|
|
400
|
+
else {
|
|
401
|
+
lines.push('### Now (immediate)');
|
|
402
|
+
lines.push('Context is strong. Verify it\'s being consumed effectively by downstream skills and agents.');
|
|
403
|
+
lines.push('');
|
|
404
|
+
lines.push('### 30 days');
|
|
405
|
+
lines.push('Add freshness tracking — flag any context document not reviewed in 30+ days.');
|
|
406
|
+
lines.push('');
|
|
407
|
+
lines.push('### 90 days');
|
|
408
|
+
lines.push('Automated context validation: test that context produces measurably different output than a generic prompt.');
|
|
409
|
+
}
|
|
410
|
+
lines.push('');
|
|
411
|
+
lines.push(changeLog(history, 2));
|
|
412
|
+
lines.push(footer());
|
|
413
|
+
return lines.join('\n');
|
|
414
|
+
}
|
|
415
|
+
// ---------------------------------------------------------------------------
|
|
416
|
+
// Layer 3: Skills
|
|
417
|
+
// ---------------------------------------------------------------------------
|
|
418
|
+
function generateSkillsOps(result, ctx, history) {
|
|
419
|
+
const lines = [];
|
|
420
|
+
lines.push(header(3, 'Skills', result, ctx));
|
|
421
|
+
lines.push('## Purpose');
|
|
422
|
+
lines.push('');
|
|
423
|
+
switch (ctx.type) {
|
|
424
|
+
case 'agency':
|
|
425
|
+
lines.push('Skills are the encoded expertise that let your AI produce client-ready work without hand-holding. For agency operations, each skill should represent a capability you\'d normally assign to a specialist — report generation in a client\'s format, content creation in their voice, communications that match their protocols. The goal is specialization, not generalization.');
|
|
426
|
+
break;
|
|
427
|
+
case 'saas':
|
|
428
|
+
lines.push('Skills are the packaged workflows that power your product\'s AI features. Each skill should handle a complete user task — not just the happy path, but the edge cases that generate support tickets. The first version of a skill is always too generic. The production version handles the 20% of cases that cause 80% of the problems.');
|
|
429
|
+
break;
|
|
430
|
+
case 'content':
|
|
431
|
+
lines.push('Skills encode your editorial workflows into repeatable, quality-controlled processes. SEO optimization, content formatting for different channels, headline generation, editorial review — each skill should produce output that meets your editorial standards without manual intervention.');
|
|
432
|
+
break;
|
|
433
|
+
default:
|
|
434
|
+
lines.push(`Skills turn your AI from a generalist with context into an actual specialist. For your ${ctx.typeLabel.toLowerCase()}, each skill should encode a specific capability that the AI can execute reliably, repeatedly, and with quality that meets your standards.`);
|
|
435
|
+
break;
|
|
436
|
+
}
|
|
437
|
+
lines.push('');
|
|
438
|
+
lines.push(currentState(result));
|
|
439
|
+
lines.push('## Operating Rules');
|
|
440
|
+
lines.push('');
|
|
441
|
+
lines.push('1. Each skill must have a clear, documented scope — what it does, what it doesn\'t do, and when to use it.');
|
|
442
|
+
lines.push('2. Skills must handle edge cases explicitly, not fall back to generic model behavior.');
|
|
443
|
+
lines.push('3. New skills must be tested against real scenarios before deployment, not just synthetic examples.');
|
|
444
|
+
if (ctx.type === 'agency') {
|
|
445
|
+
lines.push('4. Client-specific skills must be isolated — one client\'s customizations must never leak into another\'s output.');
|
|
446
|
+
}
|
|
447
|
+
else if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
448
|
+
lines.push('4. Skills that modify data or trigger actions must include validation steps before execution.');
|
|
449
|
+
}
|
|
450
|
+
lines.push(`${ctx.type === 'agency' || ctx.risk === 'high' || ctx.risk === 'critical' ? '5' : '4'}. Skills should be versioned. When a skill changes, the old version should remain accessible until the new one is validated.`);
|
|
451
|
+
lines.push('');
|
|
452
|
+
lines.push('## What\'s Approved');
|
|
453
|
+
lines.push('');
|
|
454
|
+
if (result.detected && result.evidence.length > 0) {
|
|
455
|
+
lines.push('Detected skill sources:');
|
|
456
|
+
for (const ev of result.evidence) {
|
|
457
|
+
lines.push(`- ${ev}`);
|
|
458
|
+
}
|
|
459
|
+
lines.push('');
|
|
460
|
+
lines.push('Browse [skillstack.me](https://skillstack.me) for production-ready skills to add to your stack.');
|
|
461
|
+
}
|
|
462
|
+
else {
|
|
463
|
+
lines.push('No skills detected. Browse [skillstack.me](https://skillstack.me) for production-ready skills in your domain.');
|
|
464
|
+
}
|
|
465
|
+
lines.push('');
|
|
466
|
+
lines.push('## How to Extend');
|
|
467
|
+
lines.push('');
|
|
468
|
+
lines.push('### Adding a new skill');
|
|
469
|
+
lines.push('1. Create a skill file in `.claude/skills/` or `skills/` directory');
|
|
470
|
+
lines.push('2. Define the skill\'s scope: what triggers it, what it produces, what it doesn\'t handle');
|
|
471
|
+
lines.push('3. Include at least 3 examples covering the happy path, an edge case, and a failure scenario');
|
|
472
|
+
lines.push('4. Test against real project data before marking it production-ready');
|
|
473
|
+
lines.push('5. Run `npx onion-check --status` to verify Layer 3 score improved');
|
|
474
|
+
lines.push('');
|
|
475
|
+
lines.push('## Quality Standards');
|
|
476
|
+
lines.push('');
|
|
477
|
+
lines.push('- Skills produce output that meets the quality bar without manual intervention');
|
|
478
|
+
lines.push('- Edge cases are handled explicitly, not defaulted to generic model behavior');
|
|
479
|
+
lines.push('- Each skill has clear documentation of its scope and limitations');
|
|
480
|
+
lines.push('- Skills are tested against real scenarios, not just synthetic examples');
|
|
481
|
+
lines.push('');
|
|
482
|
+
lines.push('## Dependencies');
|
|
483
|
+
lines.push('');
|
|
484
|
+
lines.push('- **Requires:** Layer 1 (Model), Layer 2 (Context) — skills operate within the context\'s domain');
|
|
485
|
+
lines.push('- **Affects:** Layer 5 (Orchestration) — orchestration routes tasks to skills');
|
|
486
|
+
lines.push('- **Affects:** Layer 8 (Evaluation) — skill output quality should be measured');
|
|
487
|
+
lines.push('- **Alert:** When skills change, run evaluation benchmarks to verify quality hasn\'t drifted');
|
|
488
|
+
lines.push('');
|
|
489
|
+
lines.push('## Growth Path');
|
|
490
|
+
lines.push('');
|
|
491
|
+
if (!result.detected) {
|
|
492
|
+
lines.push('### Now (immediate)');
|
|
493
|
+
lines.push('Add 3-5 skills covering your most common workflows. Start with the tasks you repeat most often.');
|
|
494
|
+
lines.push('');
|
|
495
|
+
lines.push('### 30 days');
|
|
496
|
+
lines.push('Refine skills based on real usage. Add edge case handling for the scenarios that tripped up the first version.');
|
|
497
|
+
lines.push('');
|
|
498
|
+
lines.push('### 90 days');
|
|
499
|
+
lines.push('Full skill library with coverage metrics. Every common workflow has a dedicated skill with documented quality benchmarks.');
|
|
500
|
+
}
|
|
501
|
+
else if (result.contentScore < 50) {
|
|
502
|
+
lines.push('### Now (immediate)');
|
|
503
|
+
lines.push('Review existing skills for specificity. If a skill says "write content" without specifying format, voice, and quality criteria, it\'s too generic.');
|
|
504
|
+
lines.push('');
|
|
505
|
+
lines.push('### 30 days');
|
|
506
|
+
lines.push('Add edge case handling and failure scenarios to each skill. Test against real data.');
|
|
507
|
+
lines.push('');
|
|
508
|
+
lines.push('### 90 days');
|
|
509
|
+
lines.push('Skill quality metrics: track which skills produce output that needs manual revision vs. which go straight to production.');
|
|
510
|
+
}
|
|
511
|
+
else {
|
|
512
|
+
lines.push('### Now (immediate)');
|
|
513
|
+
lines.push('Skills are solid. Look for gaps — which tasks still require manual work that could be encoded as skills?');
|
|
514
|
+
lines.push('');
|
|
515
|
+
lines.push('### 30 days');
|
|
516
|
+
lines.push('Add skill versioning and A/B testing to measure quality improvements.');
|
|
517
|
+
lines.push('');
|
|
518
|
+
lines.push('### 90 days');
|
|
519
|
+
lines.push('Automated skill quality monitoring with alerts when output quality drifts below baseline.');
|
|
520
|
+
}
|
|
521
|
+
lines.push('');
|
|
522
|
+
lines.push(changeLog(history, 3));
|
|
523
|
+
lines.push(footer());
|
|
524
|
+
return lines.join('\n');
|
|
525
|
+
}
|
|
526
|
+
// ---------------------------------------------------------------------------
|
|
527
|
+
// Layer 4: Tools
|
|
528
|
+
// ---------------------------------------------------------------------------
|
|
529
|
+
function generateToolsOps(result, ctx, history) {
|
|
530
|
+
const lines = [];
|
|
531
|
+
lines.push(header(4, 'Tools', result, ctx));
|
|
532
|
+
lines.push('## Purpose');
|
|
533
|
+
lines.push('');
|
|
534
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
535
|
+
lines.push(`Tools give the model hands — but for your setup (${ctx.riskLabel.toLowerCase()}), every tool connection is an attack surface. The value is enormous: going from "the AI wrote me a plan" to "the AI executed the plan." The risk is proportional. Every MCP server, every API integration, every database connection needs scoped permissions and audit logging.`);
|
|
536
|
+
}
|
|
537
|
+
else if (ctx.type === 'agency') {
|
|
538
|
+
lines.push('Tools close the automation loop for agency work. The goal is end-to-end: the AI reads a brief, generates the deliverable, and posts it where it needs to go — without you copy-pasting between tools. Each tool connection eliminates a manual step that was eating your margin.');
|
|
539
|
+
}
|
|
540
|
+
else {
|
|
541
|
+
lines.push(`Tools give the model the ability to act, not just advise. For your ${ctx.typeLabel.toLowerCase()}, each tool connection should solve a specific "I wish the AI could just do this" moment. If you find yourself copy-pasting AI output into another tool, that's a missing tool connection.`);
|
|
542
|
+
}
|
|
543
|
+
lines.push('');
|
|
544
|
+
lines.push(currentState(result));
|
|
545
|
+
lines.push('## Operating Rules');
|
|
546
|
+
lines.push('');
|
|
547
|
+
lines.push('1. Every tool connection must be documented in `.mcp.json` or equivalent config — no ad-hoc integrations.');
|
|
548
|
+
lines.push('2. Each tool must have scoped permissions — minimum necessary access, never wildcard.');
|
|
549
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
550
|
+
lines.push('3. All tool actions must be logged for audit purposes — who triggered what, when, and with what data.');
|
|
551
|
+
lines.push('4. Write access to production systems requires explicit approval workflow (see Layer 7: Governance).');
|
|
552
|
+
lines.push('5. New tool connections require security review before activation (see Layer 6: Security).');
|
|
553
|
+
}
|
|
554
|
+
else {
|
|
555
|
+
lines.push('3. Read-only access first, then incrementally add write access with guardrails.');
|
|
556
|
+
lines.push('4. Log tool usage for debugging and optimization — you need to know which tools are actually used.');
|
|
557
|
+
}
|
|
558
|
+
lines.push('');
|
|
559
|
+
lines.push('## What\'s Approved');
|
|
560
|
+
lines.push('');
|
|
561
|
+
if (result.detected && result.evidence.length > 0) {
|
|
562
|
+
lines.push('Detected tool configurations:');
|
|
563
|
+
for (const ev of result.evidence) {
|
|
564
|
+
lines.push(`- ${ev}`);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
else {
|
|
568
|
+
lines.push('No tool connections detected. After setup, this section will list all configured MCP servers and integrations.');
|
|
569
|
+
}
|
|
570
|
+
lines.push('');
|
|
571
|
+
lines.push('## How to Extend');
|
|
572
|
+
lines.push('');
|
|
573
|
+
lines.push('### Adding a new tool/MCP server');
|
|
574
|
+
lines.push('1. Add the server config to `.mcp.json`');
|
|
575
|
+
lines.push('2. Scope permissions to minimum necessary (read-only if possible)');
|
|
576
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
577
|
+
lines.push('3. Add the server to security scan checks');
|
|
578
|
+
lines.push('4. Document what data the server can access in this file');
|
|
579
|
+
lines.push('5. Get security review approval before enabling in production');
|
|
580
|
+
}
|
|
581
|
+
else {
|
|
582
|
+
lines.push('3. Test the connection with a simple read operation before enabling writes');
|
|
583
|
+
lines.push('4. Document what the tool does and why it\'s needed');
|
|
584
|
+
}
|
|
585
|
+
lines.push(`${ctx.risk === 'high' || ctx.risk === 'critical' ? '6' : '5'}. Run \`npx onion-check --status\` to verify Layer 4 score improved`);
|
|
586
|
+
lines.push('');
|
|
587
|
+
lines.push('## Quality Standards');
|
|
588
|
+
lines.push('');
|
|
589
|
+
lines.push('- Every tool connection has a documented purpose');
|
|
590
|
+
lines.push('- Permissions are scoped to minimum necessary');
|
|
591
|
+
lines.push('- Failed tool calls are handled gracefully — the AI doesn\'t crash, it reports the issue');
|
|
592
|
+
lines.push('- Unused tool connections are removed — dead integrations are security liabilities');
|
|
593
|
+
lines.push('');
|
|
594
|
+
lines.push('## Dependencies');
|
|
595
|
+
lines.push('');
|
|
596
|
+
lines.push('- **Requires:** Layer 1 (Model) — tools need a model to invoke them');
|
|
597
|
+
lines.push('- **Requires:** Layer 6 (Security) — every tool is an attack surface that must be secured');
|
|
598
|
+
lines.push('- **Affects:** Layer 5 (Orchestration) — agents use tools to execute tasks');
|
|
599
|
+
lines.push('- **Affects:** Layer 7 (Governance) — tool actions may require approval workflows');
|
|
600
|
+
lines.push('- **Alert:** When adding a new tool, verify Layer 6 (Security) covers the new attack surface');
|
|
601
|
+
lines.push('');
|
|
602
|
+
lines.push('## Growth Path');
|
|
603
|
+
lines.push('');
|
|
604
|
+
if (!result.detected) {
|
|
605
|
+
lines.push('### Now (immediate)');
|
|
606
|
+
lines.push('Set up `.mcp.json` with your first tool connection — filesystem access is the easiest starting point.');
|
|
607
|
+
lines.push('');
|
|
608
|
+
lines.push('### 30 days');
|
|
609
|
+
lines.push('Add connections to your primary systems: project management, communication, data sources.');
|
|
610
|
+
lines.push('');
|
|
611
|
+
lines.push('### 90 days');
|
|
612
|
+
lines.push('Full integration surface: every "copy-paste from AI" moment has been replaced with a direct tool connection.');
|
|
613
|
+
}
|
|
614
|
+
else if (result.contentScore < 50) {
|
|
615
|
+
lines.push('### Now (immediate)');
|
|
616
|
+
lines.push('Audit existing tool connections for permission scoping. Remove any wildcard access.');
|
|
617
|
+
lines.push('');
|
|
618
|
+
lines.push('### 30 days');
|
|
619
|
+
lines.push('Add usage logging and identify which tools are underutilized or misconfigured.');
|
|
620
|
+
lines.push('');
|
|
621
|
+
lines.push('### 90 days');
|
|
622
|
+
lines.push('Comprehensive tool suite with scoped permissions, usage monitoring, and documented purpose for each connection.');
|
|
623
|
+
}
|
|
624
|
+
else {
|
|
625
|
+
lines.push('### Now (immediate)');
|
|
626
|
+
lines.push('Tool layer is solid. Look for automation gaps — which manual steps remain?');
|
|
627
|
+
lines.push('');
|
|
628
|
+
lines.push('### 30 days');
|
|
629
|
+
lines.push('Add tool usage analytics to identify optimization opportunities.');
|
|
630
|
+
lines.push('');
|
|
631
|
+
lines.push('### 90 days');
|
|
632
|
+
lines.push('Self-healing tool connections: automatic retry, fallback, and alerting on failures.');
|
|
633
|
+
}
|
|
634
|
+
lines.push('');
|
|
635
|
+
lines.push(changeLog(history, 4));
|
|
636
|
+
lines.push(footer());
|
|
637
|
+
return lines.join('\n');
|
|
638
|
+
}
|
|
639
|
+
// ---------------------------------------------------------------------------
|
|
640
|
+
// Layer 5: Orchestration
|
|
641
|
+
// ---------------------------------------------------------------------------
|
|
642
|
+
function generateOrchestrationOps(result, ctx, history) {
|
|
643
|
+
const lines = [];
|
|
644
|
+
lines.push(header(5, 'Orchestration', result, ctx));
|
|
645
|
+
lines.push('## Purpose');
|
|
646
|
+
lines.push('');
|
|
647
|
+
switch (ctx.scale) {
|
|
648
|
+
case 'solo':
|
|
649
|
+
lines.push('Orchestration coordinates multiple agents — but at your scale, it\'s optional. One well-configured agent with strong context and skills often outperforms three poorly-coordinated agents. If you do orchestrate, start minimal: one agent generates, another reviews. That\'s the minimum viable orchestration that actually improves quality.');
|
|
650
|
+
break;
|
|
651
|
+
case 'small':
|
|
652
|
+
lines.push('Orchestration is what turns "I have an AI tool" into "I have a system." For a small team, you want 2-5 specialized agents with clear boundaries and documented handoff protocols. The key mistake: agents with overlapping responsibilities. When two agents can handle the same task, they will, and they\'ll do it differently.');
|
|
653
|
+
break;
|
|
654
|
+
case 'mid':
|
|
655
|
+
case 'enterprise':
|
|
656
|
+
lines.push(`At your scale (${ctx.scaleLabel.toLowerCase()}), orchestration is the nervous system. Without it, you have isolated islands of automation that duplicate work and produce inconsistent results. You need routing logic that assigns tasks by complexity and domain, workflow definitions for multi-step processes, and monitoring that shows you the full pipeline.`);
|
|
657
|
+
break;
|
|
658
|
+
}
|
|
659
|
+
lines.push('');
|
|
660
|
+
lines.push(currentState(result));
|
|
661
|
+
lines.push('## Operating Rules');
|
|
662
|
+
lines.push('');
|
|
663
|
+
if (ctx.scale === 'solo') {
|
|
664
|
+
lines.push('1. If using multiple agents, each must have a clear, non-overlapping role.');
|
|
665
|
+
lines.push('2. Document agent roles in `AGENTS.md` — who does what, and what they don\'t do.');
|
|
666
|
+
lines.push('3. Don\'t add agents for complexity\'s sake. Every agent must justify its existence with a specific capability gap.');
|
|
667
|
+
}
|
|
668
|
+
else {
|
|
669
|
+
lines.push('1. Every agent must have a clear, documented role in `AGENTS.md` — no overlapping responsibilities.');
|
|
670
|
+
lines.push('2. Handoff protocols between agents must be explicit — what data is passed, what format, what\'s expected back.');
|
|
671
|
+
lines.push('3. Routing logic must be deterministic — the same input should always route to the same agent.');
|
|
672
|
+
lines.push('4. Agent failures must be handled gracefully — if one agent fails, the system doesn\'t collapse.');
|
|
673
|
+
if (ctx.scale === 'mid' || ctx.scale === 'enterprise') {
|
|
674
|
+
lines.push('5. Agent performance must be monitored individually — which agents are bottlenecks, which are underutilized?');
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
lines.push('');
|
|
678
|
+
lines.push('## What\'s Approved');
|
|
679
|
+
lines.push('');
|
|
680
|
+
if (result.detected && result.evidence.length > 0) {
|
|
681
|
+
lines.push('Detected orchestration components:');
|
|
682
|
+
for (const ev of result.evidence) {
|
|
683
|
+
lines.push(`- ${ev}`);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
else {
|
|
687
|
+
if (ctx.scale === 'solo') {
|
|
688
|
+
lines.push('No orchestration detected — and that\'s fine at your scale. Add it when a single agent can\'t handle the workload.');
|
|
689
|
+
}
|
|
690
|
+
else {
|
|
691
|
+
lines.push('No orchestration detected. For a team of your size, this becomes important as soon as you have 2+ agents.');
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
lines.push('');
|
|
695
|
+
lines.push('## How to Extend');
|
|
696
|
+
lines.push('');
|
|
697
|
+
lines.push('### Adding a new agent');
|
|
698
|
+
lines.push('1. Define the agent\'s role, scope, and boundaries');
|
|
699
|
+
lines.push('2. Create the agent configuration in `agents/` directory');
|
|
700
|
+
lines.push('3. Document the agent in `AGENTS.md` with clear responsibility boundaries');
|
|
701
|
+
lines.push('4. Define handoff protocols with existing agents');
|
|
702
|
+
lines.push('5. Test the routing: does the right work reach the right agent?');
|
|
703
|
+
lines.push('6. Run `npx onion-check --status` to verify Layer 5 score improved');
|
|
704
|
+
lines.push('');
|
|
705
|
+
lines.push('## Quality Standards');
|
|
706
|
+
lines.push('');
|
|
707
|
+
lines.push('- `AGENTS.md` exists and accurately describes the current agent topology');
|
|
708
|
+
lines.push('- No two agents have overlapping responsibilities');
|
|
709
|
+
lines.push('- Routing decisions are traceable — you can explain why a task went to agent X');
|
|
710
|
+
lines.push('- The system degrades gracefully when an agent fails');
|
|
711
|
+
lines.push('');
|
|
712
|
+
lines.push('## Dependencies');
|
|
713
|
+
lines.push('');
|
|
714
|
+
lines.push('- **Requires:** Layer 1 (Model), Layer 2 (Context), Layer 3 (Skills) — agents need all three to function');
|
|
715
|
+
lines.push('- **Affects:** Layer 6 (Security) — each agent is an additional attack surface');
|
|
716
|
+
lines.push('- **Affects:** Layer 8 (Evaluation) — multi-agent output quality needs compound drift monitoring');
|
|
717
|
+
lines.push('- **Alert:** Adding a new agent without evaluation means quality drift is invisible and compounds faster');
|
|
718
|
+
lines.push('');
|
|
719
|
+
lines.push('## Growth Path');
|
|
720
|
+
lines.push('');
|
|
721
|
+
if (!result.detected) {
|
|
722
|
+
lines.push('### Now (immediate)');
|
|
723
|
+
if (ctx.scale === 'solo') {
|
|
724
|
+
lines.push('Skip this layer unless you have a specific need for multi-agent coordination. Focus on Layers 1-4 first.');
|
|
725
|
+
}
|
|
726
|
+
else {
|
|
727
|
+
lines.push('Create `AGENTS.md` documenting your current agent setup, even if it\'s just one. This is the foundation.');
|
|
728
|
+
}
|
|
729
|
+
lines.push('');
|
|
730
|
+
lines.push('### 30 days');
|
|
731
|
+
if (ctx.scale === 'solo') {
|
|
732
|
+
lines.push('If needed, add a review agent to check the primary agent\'s output. That\'s your minimum viable orchestration.');
|
|
733
|
+
}
|
|
734
|
+
else {
|
|
735
|
+
lines.push('Formalize routing logic: which tasks go to which agent, based on what criteria.');
|
|
736
|
+
}
|
|
737
|
+
lines.push('');
|
|
738
|
+
lines.push('### 90 days');
|
|
739
|
+
lines.push('Monitored orchestration with performance tracking per agent and workflow.');
|
|
740
|
+
}
|
|
741
|
+
else if (result.contentScore < 50) {
|
|
742
|
+
lines.push('### Now (immediate)');
|
|
743
|
+
lines.push('Document existing agents in `AGENTS.md` with clear boundaries. Identify any overlapping responsibilities.');
|
|
744
|
+
lines.push('');
|
|
745
|
+
lines.push('### 30 days');
|
|
746
|
+
lines.push('Formalize handoff protocols and add routing tests.');
|
|
747
|
+
lines.push('');
|
|
748
|
+
lines.push('### 90 days');
|
|
749
|
+
lines.push('Per-agent performance monitoring and workflow optimization.');
|
|
750
|
+
}
|
|
751
|
+
else {
|
|
752
|
+
lines.push('### Now (immediate)');
|
|
753
|
+
lines.push('Orchestration is solid. Review for bottlenecks and underutilized agents.');
|
|
754
|
+
lines.push('');
|
|
755
|
+
lines.push('### 30 days');
|
|
756
|
+
lines.push('Add agent performance dashboards and routing analytics.');
|
|
757
|
+
lines.push('');
|
|
758
|
+
lines.push('### 90 days');
|
|
759
|
+
lines.push('Self-optimizing routing: the system adjusts task assignment based on agent performance history.');
|
|
760
|
+
}
|
|
761
|
+
lines.push('');
|
|
762
|
+
lines.push(changeLog(history, 5));
|
|
763
|
+
lines.push(footer());
|
|
764
|
+
return lines.join('\n');
|
|
765
|
+
}
|
|
766
|
+
// ---------------------------------------------------------------------------
|
|
767
|
+
// Layer 6: Security
|
|
768
|
+
// ---------------------------------------------------------------------------
|
|
769
|
+
function generateSecurityOps(result, ctx, history) {
|
|
770
|
+
const lines = [];
|
|
771
|
+
lines.push(header(6, 'Security', result, ctx));
|
|
772
|
+
lines.push('## Purpose');
|
|
773
|
+
lines.push('');
|
|
774
|
+
switch (ctx.risk) {
|
|
775
|
+
case 'critical':
|
|
776
|
+
lines.push('Security is the non-negotiable foundation for your mission-critical system. Every document your agent processes is a potential prompt injection vector. Every tool action is unprotected until you explicitly protect it. This layer must be locked down before you add features, not after. A single security incident at your risk level can be existential.');
|
|
777
|
+
break;
|
|
778
|
+
case 'high':
|
|
779
|
+
lines.push('You\'re handling money or PII with AI agents. This layer is the single biggest liability in your setup if it\'s not locked down. Every input is a potential injection vector. Every tool connection is an attack surface. Stop adding features until this layer is in place — the cost of a breach at your risk level makes every other priority irrelevant.');
|
|
780
|
+
break;
|
|
781
|
+
case 'medium':
|
|
782
|
+
lines.push('Client-facing AI output carries reputational risk. Your security layer must prevent prompt injection that could produce harmful content, unauthorized data access, and any action that could embarrass your organization in front of clients. The bar isn\'t "never been breached" — it\'s "can withstand a determined attempt."');
|
|
783
|
+
break;
|
|
784
|
+
case 'low':
|
|
785
|
+
lines.push('Even internal-only systems need baseline security. Prompt injection can exfiltrate internal data, credentials, and business logic. The attack surface is smaller than client-facing systems, but the protection must still exist. "Internal only" is not a security strategy.');
|
|
786
|
+
break;
|
|
787
|
+
}
|
|
788
|
+
lines.push('');
|
|
789
|
+
lines.push(currentState(result));
|
|
790
|
+
lines.push('## Operating Rules');
|
|
791
|
+
lines.push('');
|
|
792
|
+
lines.push('1. Every MCP server connection must have scoped permissions — no wildcard access, no "admin" mode.');
|
|
793
|
+
lines.push('2. `.env` files must be in `.gitignore` — verified by pre-commit hook, not just policy.');
|
|
794
|
+
lines.push('3. All external input (user messages, uploaded files, API responses) must be treated as potentially hostile.');
|
|
795
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
796
|
+
lines.push('4. Sensitive data must never appear in logs, error messages, or AI context.');
|
|
797
|
+
lines.push('5. All agent actions on production systems must be logged for audit trail.');
|
|
798
|
+
lines.push('6. Security scan must pass before any deployment — CI/CD pipeline must enforce this.');
|
|
799
|
+
lines.push('7. Third-party MCP servers must be reviewed for data handling practices before integration.');
|
|
800
|
+
}
|
|
801
|
+
else if (ctx.type === 'agency') {
|
|
802
|
+
lines.push('4. Client data must never appear in logs or error messages.');
|
|
803
|
+
lines.push('5. Each client\'s data must be isolated — one client\'s information must never leak to another.');
|
|
804
|
+
}
|
|
805
|
+
else {
|
|
806
|
+
lines.push('4. Credentials must never be hardcoded or logged.');
|
|
807
|
+
lines.push('5. Security scan should run on every commit via pre-commit hook.');
|
|
808
|
+
}
|
|
809
|
+
lines.push('');
|
|
810
|
+
lines.push('## What\'s Approved');
|
|
811
|
+
lines.push('');
|
|
812
|
+
if (result.detected && result.evidence.length > 0) {
|
|
813
|
+
lines.push('Detected security infrastructure:');
|
|
814
|
+
for (const ev of result.evidence) {
|
|
815
|
+
lines.push(`- ${ev}`);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
else {
|
|
819
|
+
lines.push('**ACTION REQUIRED:** No security layer detected.');
|
|
820
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
821
|
+
lines.push('');
|
|
822
|
+
lines.push('This is a blocker for your risk profile. Do not add new features or tool connections until basic security is in place.');
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
lines.push('');
|
|
826
|
+
lines.push('## How to Extend');
|
|
827
|
+
lines.push('');
|
|
828
|
+
lines.push('### Setting up baseline security');
|
|
829
|
+
lines.push('1. Add `.env` to `.gitignore` (verify it\'s not already committed)');
|
|
830
|
+
lines.push('2. Install pre-commit hooks: `npx onion-check --guard`');
|
|
831
|
+
lines.push('3. Create a `security-scan.sh` that checks for hardcoded credentials, exposed API keys, and permissive tool configs');
|
|
832
|
+
lines.push('4. Add security rules documentation to `security-rules.md`');
|
|
833
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
834
|
+
lines.push('5. Set up CI/CD security pipeline that blocks deployment on scan failure');
|
|
835
|
+
lines.push('6. Configure audit logging for all agent actions on production systems');
|
|
836
|
+
}
|
|
837
|
+
lines.push(`${ctx.risk === 'high' || ctx.risk === 'critical' ? '7' : '5'}. Run \`npx onion-check --status\` to verify Layer 6 score improved`);
|
|
838
|
+
lines.push('');
|
|
839
|
+
lines.push('### Adding a new tool/MCP server securely');
|
|
840
|
+
lines.push('1. Scope permissions to minimum necessary (read-only if possible)');
|
|
841
|
+
lines.push('2. Add the server to security scan checks');
|
|
842
|
+
lines.push('3. Document what data the server can access in this file');
|
|
843
|
+
if (ctx.type === 'agency') {
|
|
844
|
+
lines.push('4. If the server accesses client data, add it to the client data audit list');
|
|
845
|
+
}
|
|
846
|
+
lines.push('');
|
|
847
|
+
lines.push('## Quality Standards');
|
|
848
|
+
lines.push('');
|
|
849
|
+
lines.push('- Zero hardcoded credentials in the codebase');
|
|
850
|
+
lines.push('- Pre-commit hooks are installed and enforced');
|
|
851
|
+
lines.push('- Every tool connection has documented, scoped permissions');
|
|
852
|
+
lines.push('- Security scan runs automatically (pre-commit or CI/CD)');
|
|
853
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
854
|
+
lines.push('- Audit trail exists for all production actions');
|
|
855
|
+
lines.push('- Input validation covers prompt injection patterns');
|
|
856
|
+
lines.push('- Incident response plan documented and tested');
|
|
857
|
+
}
|
|
858
|
+
lines.push('');
|
|
859
|
+
lines.push('## Dependencies');
|
|
860
|
+
lines.push('');
|
|
861
|
+
lines.push('- **Requires:** Nothing — security should exist before anything else');
|
|
862
|
+
lines.push('- **Affects:** Layer 4 (Tools) — every tool connection must be secured');
|
|
863
|
+
lines.push('- **Affects:** Layer 5 (Orchestration) — each agent is an additional attack surface');
|
|
864
|
+
lines.push('- **Affects:** Layer 7 (Governance) — security violations should trigger governance workflows');
|
|
865
|
+
lines.push('- **Alert:** When any new tool or agent is added, this layer must be updated to cover the new surface');
|
|
866
|
+
lines.push('');
|
|
867
|
+
lines.push('## Growth Path');
|
|
868
|
+
lines.push('');
|
|
869
|
+
if (!result.detected) {
|
|
870
|
+
lines.push('### Now (immediate)');
|
|
871
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
872
|
+
lines.push('**STOP adding features.** Set up `.gitignore` for `.env`, install pre-commit hooks (`npx onion-check --guard`), create `security-scan.sh`. This is your top priority.');
|
|
873
|
+
}
|
|
874
|
+
else {
|
|
875
|
+
lines.push('Add `.env` to `.gitignore`, install pre-commit hooks (`npx onion-check --guard`), create basic security rules documentation.');
|
|
876
|
+
}
|
|
877
|
+
lines.push('');
|
|
878
|
+
lines.push('### 30 days');
|
|
879
|
+
lines.push('Security scan in CI/CD pipeline, permission auditing for all tool connections, input validation for external data.');
|
|
880
|
+
lines.push('');
|
|
881
|
+
lines.push('### 90 days');
|
|
882
|
+
lines.push('Comprehensive security layer: automated scanning, audit logging, incident response plan, regular penetration testing against prompt injection.');
|
|
883
|
+
}
|
|
884
|
+
else if (result.contentScore < 50) {
|
|
885
|
+
lines.push('### Now (immediate)');
|
|
886
|
+
lines.push('Audit existing security for gaps. Check: are all tool permissions scoped? Are credentials in `.env`? Are pre-commit hooks running?');
|
|
887
|
+
lines.push('');
|
|
888
|
+
lines.push('### 30 days');
|
|
889
|
+
lines.push('Add CI/CD security pipeline. Add input validation for all external data sources.');
|
|
890
|
+
lines.push('');
|
|
891
|
+
lines.push('### 90 days');
|
|
892
|
+
lines.push('Automated security monitoring, regular audits, and prompt injection testing.');
|
|
893
|
+
}
|
|
894
|
+
else {
|
|
895
|
+
lines.push('### Now (immediate)');
|
|
896
|
+
lines.push('Security is solid. Schedule a quarterly security review to verify nothing has drifted.');
|
|
897
|
+
lines.push('');
|
|
898
|
+
lines.push('### 30 days');
|
|
899
|
+
lines.push('Add automated prompt injection testing to CI/CD pipeline.');
|
|
900
|
+
lines.push('');
|
|
901
|
+
lines.push('### 90 days');
|
|
902
|
+
lines.push('Bug bounty or red team exercise to stress-test the security layer.');
|
|
903
|
+
}
|
|
904
|
+
lines.push('');
|
|
905
|
+
lines.push(changeLog(history, 6));
|
|
906
|
+
lines.push(footer());
|
|
907
|
+
return lines.join('\n');
|
|
908
|
+
}
|
|
909
|
+
// ---------------------------------------------------------------------------
|
|
910
|
+
// Layer 7: Governance
|
|
911
|
+
// ---------------------------------------------------------------------------
|
|
912
|
+
function generateGovernanceOps(result, ctx, history) {
|
|
913
|
+
const lines = [];
|
|
914
|
+
lines.push(header(7, 'Governance', result, ctx));
|
|
915
|
+
lines.push('## Purpose');
|
|
916
|
+
lines.push('');
|
|
917
|
+
switch (ctx.risk) {
|
|
918
|
+
case 'critical':
|
|
919
|
+
case 'high':
|
|
920
|
+
lines.push(`Governance is accountability. When an AI agent makes a decision that costs money (${ctx.riskLabel.toLowerCase()}), someone needs to be responsible, and there needs to be a paper trail. Approval workflows, escalation paths, and compliance frameworks aren't bureaucracy — they're the difference between "we caught it" and "we're in a lawsuit."`);
|
|
921
|
+
break;
|
|
922
|
+
default:
|
|
923
|
+
if (ctx.scale === 'solo') {
|
|
924
|
+
lines.push('For a solo builder, governance is lightweight — but it still matters. Even if you\'re the only one approving decisions, having a defined escalation path for high-stakes actions (sending money, deleting data, communicating externally) prevents the AI from doing something you\'d never approve while you\'re not looking.');
|
|
925
|
+
}
|
|
926
|
+
else {
|
|
927
|
+
lines.push(`Governance defines who can approve what, when human oversight is required, and what happens when the AI encounters something outside its authority. For your ${ctx.scaleLabel.toLowerCase()} team, this is the difference between controlled autonomy and chaos.`);
|
|
928
|
+
}
|
|
929
|
+
break;
|
|
930
|
+
}
|
|
931
|
+
lines.push('');
|
|
932
|
+
lines.push(currentState(result));
|
|
933
|
+
lines.push('## Operating Rules');
|
|
934
|
+
lines.push('');
|
|
935
|
+
if (ctx.scale === 'solo' && ctx.risk !== 'high' && ctx.risk !== 'critical') {
|
|
936
|
+
lines.push('1. Define a list of actions that always require your explicit approval (sending money, deleting data, external communications).');
|
|
937
|
+
lines.push('2. The AI must never take an irreversible action without approval.');
|
|
938
|
+
lines.push('3. Maintain a simple decision log — what the AI decided, when, and with what context.');
|
|
939
|
+
}
|
|
940
|
+
else {
|
|
941
|
+
lines.push('1. High-stakes actions must require human approval before execution.');
|
|
942
|
+
lines.push('2. Escalation paths must be defined — what happens when the AI encounters something outside its authority?');
|
|
943
|
+
lines.push('3. Decision audit trail: every autonomous decision must be logged with rationale.');
|
|
944
|
+
lines.push('4. Approval authority must be role-based — not everyone can approve the same actions.');
|
|
945
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
946
|
+
lines.push('5. Compliance framework must map to industry regulations (SOC 2, GDPR, PCI DSS, etc.).');
|
|
947
|
+
lines.push('6. Regular governance audits to verify the framework is being followed, not just documented.');
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
lines.push('');
|
|
951
|
+
lines.push('## What\'s Approved');
|
|
952
|
+
lines.push('');
|
|
953
|
+
if (result.detected && result.evidence.length > 0) {
|
|
954
|
+
lines.push('Detected governance infrastructure:');
|
|
955
|
+
for (const ev of result.evidence) {
|
|
956
|
+
lines.push(`- ${ev}`);
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
else {
|
|
960
|
+
if (ctx.scale === 'solo' && ctx.risk !== 'high' && ctx.risk !== 'critical') {
|
|
961
|
+
lines.push('No governance layer detected. At your scale this is lower priority, but define at minimum which actions require your approval.');
|
|
962
|
+
}
|
|
963
|
+
else {
|
|
964
|
+
lines.push('No governance layer detected. Add approval workflows before scaling autonomous agent actions.');
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
lines.push('');
|
|
968
|
+
lines.push('## How to Extend');
|
|
969
|
+
lines.push('');
|
|
970
|
+
lines.push('### Setting up governance');
|
|
971
|
+
if (ctx.scale === 'solo' && ctx.risk !== 'high' && ctx.risk !== 'critical') {
|
|
972
|
+
lines.push('1. Create a `governance/approval-rules.md` listing actions that require your explicit approval');
|
|
973
|
+
lines.push('2. Add a decision log template at `governance/decision-log.md`');
|
|
974
|
+
lines.push('3. Define what "escalation" means — even for a solo builder, some decisions should wait for your review');
|
|
975
|
+
}
|
|
976
|
+
else {
|
|
977
|
+
lines.push('1. Create a `governance/` directory');
|
|
978
|
+
lines.push('2. Define approval workflows in `governance/approval-rules.md`');
|
|
979
|
+
lines.push('3. Set up escalation paths in `governance/escalation-policy.md`');
|
|
980
|
+
lines.push('4. Create role-based authorization matrix');
|
|
981
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
982
|
+
lines.push('5. Map compliance requirements to specific governance controls');
|
|
983
|
+
lines.push('6. Set up automated compliance monitoring');
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
lines.push(`${ctx.scale === 'solo' && ctx.risk !== 'high' && ctx.risk !== 'critical' ? '4' : ctx.risk === 'high' || ctx.risk === 'critical' ? '7' : '5'}. Run \`npx onion-check --status\` to verify Layer 7 score improved`);
|
|
987
|
+
lines.push('');
|
|
988
|
+
lines.push('## Quality Standards');
|
|
989
|
+
lines.push('');
|
|
990
|
+
lines.push('- Every high-stakes action has a defined approval workflow');
|
|
991
|
+
lines.push('- Autonomous decisions are logged with rationale');
|
|
992
|
+
lines.push('- Escalation paths are tested — not just documented, actually followed');
|
|
993
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
994
|
+
lines.push('- Compliance framework maps to specific regulatory requirements');
|
|
995
|
+
lines.push('- Governance audits run quarterly at minimum');
|
|
996
|
+
}
|
|
997
|
+
lines.push('');
|
|
998
|
+
lines.push('## Dependencies');
|
|
999
|
+
lines.push('');
|
|
1000
|
+
lines.push('- **Requires:** Layer 6 (Security) — governance assumes security controls exist');
|
|
1001
|
+
lines.push('- **Affects:** Layer 4 (Tools) — tool actions may require approval before execution');
|
|
1002
|
+
lines.push('- **Affects:** Layer 5 (Orchestration) — agents must respect governance boundaries');
|
|
1003
|
+
lines.push('- **Alert:** When new autonomous capabilities are added, governance rules must be updated');
|
|
1004
|
+
lines.push('');
|
|
1005
|
+
lines.push('## Growth Path');
|
|
1006
|
+
lines.push('');
|
|
1007
|
+
if (!result.detected) {
|
|
1008
|
+
lines.push('### Now (immediate)');
|
|
1009
|
+
if (ctx.scale === 'solo' && ctx.risk !== 'high' && ctx.risk !== 'critical') {
|
|
1010
|
+
lines.push('Create a simple approval list: which actions need your "yes" before the AI proceeds? Start there.');
|
|
1011
|
+
}
|
|
1012
|
+
else if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
1013
|
+
lines.push('This is urgent. Create `governance/` with approval workflows, escalation policies, and compliance mapping. Do this before adding more autonomous capabilities.');
|
|
1014
|
+
}
|
|
1015
|
+
else {
|
|
1016
|
+
lines.push('Create `governance/approval-rules.md` listing actions that require human approval.');
|
|
1017
|
+
}
|
|
1018
|
+
lines.push('');
|
|
1019
|
+
lines.push('### 30 days');
|
|
1020
|
+
lines.push('Decision logging implemented. Escalation paths tested with real scenarios.');
|
|
1021
|
+
lines.push('');
|
|
1022
|
+
lines.push('### 90 days');
|
|
1023
|
+
if (ctx.risk === 'high' || ctx.risk === 'critical') {
|
|
1024
|
+
lines.push('Full compliance framework with automated monitoring, regular audits, and incident response integration.');
|
|
1025
|
+
}
|
|
1026
|
+
else {
|
|
1027
|
+
lines.push('Mature governance with decision logging, tested escalation paths, and regular review cycles.');
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
else if (result.contentScore < 50) {
|
|
1031
|
+
lines.push('### Now (immediate)');
|
|
1032
|
+
lines.push('Audit existing governance for gaps. Are all high-stakes actions covered? Is the escalation path actually followed?');
|
|
1033
|
+
lines.push('');
|
|
1034
|
+
lines.push('### 30 days');
|
|
1035
|
+
lines.push('Add decision logging and compliance mapping.');
|
|
1036
|
+
lines.push('');
|
|
1037
|
+
lines.push('### 90 days');
|
|
1038
|
+
lines.push('Automated governance monitoring with alerts on policy violations.');
|
|
1039
|
+
}
|
|
1040
|
+
else {
|
|
1041
|
+
lines.push('### Now (immediate)');
|
|
1042
|
+
lines.push('Governance is solid. Schedule quarterly reviews to keep it aligned with current capabilities.');
|
|
1043
|
+
lines.push('');
|
|
1044
|
+
lines.push('### 30 days');
|
|
1045
|
+
lines.push('Automate governance reporting and compliance dashboards.');
|
|
1046
|
+
lines.push('');
|
|
1047
|
+
lines.push('### 90 days');
|
|
1048
|
+
lines.push('Predictive governance: identify potential violations before they happen based on pattern analysis.');
|
|
1049
|
+
}
|
|
1050
|
+
lines.push('');
|
|
1051
|
+
lines.push(changeLog(history, 7));
|
|
1052
|
+
lines.push(footer());
|
|
1053
|
+
return lines.join('\n');
|
|
1054
|
+
}
|
|
1055
|
+
// ---------------------------------------------------------------------------
|
|
1056
|
+
// Layer 8: Evaluation
|
|
1057
|
+
// ---------------------------------------------------------------------------
|
|
1058
|
+
function generateEvaluationOps(result, ctx, history) {
|
|
1059
|
+
const lines = [];
|
|
1060
|
+
lines.push(header(8, 'Evaluation', result, ctx));
|
|
1061
|
+
lines.push('## Purpose');
|
|
1062
|
+
lines.push('');
|
|
1063
|
+
switch (ctx.type) {
|
|
1064
|
+
case 'agency':
|
|
1065
|
+
lines.push('Evaluation catches quality drift before your clients do. For agency work, output quality is your reputation. Without evaluation, the AI keeps producing work that looks fine on the surface while slowly drifting from client expectations. By the time someone notices, you\'ve shipped weeks of subpar deliverables.');
|
|
1066
|
+
break;
|
|
1067
|
+
case 'saas':
|
|
1068
|
+
lines.push('Evaluation is your product quality monitoring. For a SaaS product, AI output quality directly correlates with user satisfaction and retention. Without evaluation, quality erodes silently — your NPS drops, support tickets increase, and you don\'t know why because the AI is still "working."');
|
|
1069
|
+
break;
|
|
1070
|
+
case 'content':
|
|
1071
|
+
lines.push('Evaluation is editorial quality control at scale. For a content platform, consistency and quality are the product. Without evaluation, the AI produces content that passes a cursory review but fails to meet the standards that your audience expects. Drift is invisible until your engagement metrics start sliding.');
|
|
1072
|
+
break;
|
|
1073
|
+
default:
|
|
1074
|
+
lines.push(`Evaluation catches quality drift before it compounds. For your ${ctx.typeLabel.toLowerCase()}, this is the difference between "the AI is working" and "the AI is working well." Without evaluation, you're flying blind — the system keeps running while output quality erodes invisibly.`);
|
|
1075
|
+
break;
|
|
1076
|
+
}
|
|
1077
|
+
lines.push('');
|
|
1078
|
+
lines.push(currentState(result));
|
|
1079
|
+
lines.push('## Operating Rules');
|
|
1080
|
+
lines.push('');
|
|
1081
|
+
lines.push('1. Define measurable quality benchmarks — not "output should be good" but specific, testable criteria.');
|
|
1082
|
+
lines.push('2. Run evaluations regularly, not just when something seems wrong. Drift is invisible until it\'s severe.');
|
|
1083
|
+
lines.push('3. Evaluation results must trigger action — a failing benchmark that nobody responds to is worse than no benchmark.');
|
|
1084
|
+
if (ctx.type === 'agency') {
|
|
1085
|
+
lines.push('4. Each client must have client-specific quality benchmarks, not just global standards.');
|
|
1086
|
+
lines.push('5. Client feedback must feed back into evaluation criteria — their complaints define your quality gaps.');
|
|
1087
|
+
}
|
|
1088
|
+
else if (ctx.type === 'saas') {
|
|
1089
|
+
lines.push('4. User feedback signals (thumbs up/down, support tickets, feature usage drop) must feed into evaluation.');
|
|
1090
|
+
lines.push('5. A/B testing framework for skill and prompt changes — don\'t ship changes without measuring impact.');
|
|
1091
|
+
}
|
|
1092
|
+
if (ctx.scale !== 'solo') {
|
|
1093
|
+
lines.push(`${(ctx.type === 'agency' || ctx.type === 'saas') ? '6' : '4'}. Evaluation results must be visible to the whole team, not siloed in one person's dashboard.`);
|
|
1094
|
+
}
|
|
1095
|
+
lines.push('');
|
|
1096
|
+
lines.push('## What\'s Approved');
|
|
1097
|
+
lines.push('');
|
|
1098
|
+
if (result.detected && result.evidence.length > 0) {
|
|
1099
|
+
lines.push('Detected evaluation infrastructure:');
|
|
1100
|
+
for (const ev of result.evidence) {
|
|
1101
|
+
lines.push(`- ${ev}`);
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
else {
|
|
1105
|
+
lines.push('No evaluation layer detected. Quality drift is currently invisible.');
|
|
1106
|
+
}
|
|
1107
|
+
lines.push('');
|
|
1108
|
+
lines.push('## How to Extend');
|
|
1109
|
+
lines.push('');
|
|
1110
|
+
lines.push('### Setting up evaluation');
|
|
1111
|
+
lines.push('1. Create an `evaluation/` directory');
|
|
1112
|
+
lines.push('2. Define quality benchmarks in `evaluation/benchmarks.md` with specific, measurable criteria');
|
|
1113
|
+
lines.push('3. Create a quality rubric in `evaluation/quality-rubric.md` — what does "good" look like?');
|
|
1114
|
+
lines.push('4. Set up a feedback collection mechanism (even a simple log file)');
|
|
1115
|
+
lines.push('5. Schedule regular evaluation runs (weekly minimum, daily if high-volume)');
|
|
1116
|
+
lines.push('6. Run `npx onion-check --status` to verify Layer 8 score improved');
|
|
1117
|
+
lines.push('');
|
|
1118
|
+
lines.push('## Quality Standards');
|
|
1119
|
+
lines.push('');
|
|
1120
|
+
lines.push('- Benchmarks are specific and measurable, not subjective');
|
|
1121
|
+
lines.push('- Evaluation runs on a regular schedule, not just ad-hoc');
|
|
1122
|
+
lines.push('- Failing benchmarks trigger a defined response process');
|
|
1123
|
+
lines.push('- Feedback loops exist between evaluation results and upstream layers (context, skills)');
|
|
1124
|
+
lines.push('');
|
|
1125
|
+
lines.push('## Dependencies');
|
|
1126
|
+
lines.push('');
|
|
1127
|
+
lines.push('- **Requires:** Layer 3 (Skills) — evaluation measures skill output quality');
|
|
1128
|
+
lines.push('- **Affects:** Layer 2 (Context) — evaluation results should inform context updates');
|
|
1129
|
+
lines.push('- **Affects:** Layer 3 (Skills) — failing evaluations trigger skill improvements');
|
|
1130
|
+
lines.push('- **Alert:** When skills or context change, re-run evaluation benchmarks immediately');
|
|
1131
|
+
lines.push('');
|
|
1132
|
+
lines.push('## Growth Path');
|
|
1133
|
+
lines.push('');
|
|
1134
|
+
if (!result.detected) {
|
|
1135
|
+
lines.push('### Now (immediate)');
|
|
1136
|
+
lines.push('Create `evaluation/quality-rubric.md` defining what "good output" looks like for your top 3 use cases. Be specific — include examples.');
|
|
1137
|
+
lines.push('');
|
|
1138
|
+
lines.push('### 30 days');
|
|
1139
|
+
lines.push('Automated evaluation pipeline: benchmark suite that runs weekly and alerts on quality drift.');
|
|
1140
|
+
lines.push('');
|
|
1141
|
+
lines.push('### 90 days');
|
|
1142
|
+
lines.push('Full evaluation loop: benchmarks catch drift, results trigger skill/context updates, improvements are verified by re-evaluation.');
|
|
1143
|
+
}
|
|
1144
|
+
else if (result.contentScore < 50) {
|
|
1145
|
+
lines.push('### Now (immediate)');
|
|
1146
|
+
lines.push('Review existing benchmarks for specificity. If they say "output should be high quality," they\'re not benchmarks — they\'re wishes.');
|
|
1147
|
+
lines.push('');
|
|
1148
|
+
lines.push('### 30 days');
|
|
1149
|
+
lines.push('Add feedback loops: evaluation results must trigger specific actions on upstream layers.');
|
|
1150
|
+
lines.push('');
|
|
1151
|
+
lines.push('### 90 days');
|
|
1152
|
+
lines.push('Automated drift detection with historical trend analysis.');
|
|
1153
|
+
}
|
|
1154
|
+
else {
|
|
1155
|
+
lines.push('### Now (immediate)');
|
|
1156
|
+
lines.push('Evaluation is solid. Look for blind spots — which use cases aren\'t covered by current benchmarks?');
|
|
1157
|
+
lines.push('');
|
|
1158
|
+
lines.push('### 30 days');
|
|
1159
|
+
lines.push('Add comparative evaluation: test new skill/context versions against baselines before deployment.');
|
|
1160
|
+
lines.push('');
|
|
1161
|
+
lines.push('### 90 days');
|
|
1162
|
+
lines.push('Predictive quality monitoring: detect drift trends before they cross quality thresholds.');
|
|
1163
|
+
}
|
|
1164
|
+
lines.push('');
|
|
1165
|
+
lines.push(changeLog(history, 8));
|
|
1166
|
+
lines.push(footer());
|
|
1167
|
+
return lines.join('\n');
|
|
1168
|
+
}
|
|
1169
|
+
//# sourceMappingURL=operational-docs.js.map
|