ninja-terminals 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +2 -17
- package/cli.js +23 -0
- package/lib/auth.js +195 -0
- package/lib/hypothesis-validator.js +346 -0
- package/lib/post-session.js +426 -0
- package/lib/pre-dispatch.js +265 -0
- package/lib/prompt-delivery.js +127 -0
- package/lib/settings-gen.js +82 -23
- package/package.json +8 -6
- package/public/app.js +282 -13
- package/public/index.html +45 -0
- package/public/style.css +300 -0
- package/server.js +358 -33
- package/ORCHESTRATOR-PROMPT.md +0 -295
- package/orchestrator/evolution-log.md +0 -33
- package/orchestrator/identity.md +0 -60
- package/orchestrator/metrics/.gitkeep +0 -0
- package/orchestrator/metrics/raw/.gitkeep +0 -0
- package/orchestrator/metrics/session-2026-03-23-setup.md +0 -54
- package/orchestrator/metrics/session-2026-03-24-appcast-build.md +0 -55
- package/orchestrator/playbooks.md +0 -71
- package/orchestrator/security-protocol.md +0 -69
- package/orchestrator/tool-registry.md +0 -96
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const { analyzeSession, SUMMARIES_PATH } = require('./analyze-session');
|
|
6
|
+
const { rateTools } = require('./tool-rater');
|
|
7
|
+
const { validateHypotheses, summarizeResults } = require('./hypothesis-validator');
|
|
8
|
+
const { safeAppend, safeWrite } = require('./safe-file-writer');
|
|
9
|
+
const { logEvolution } = require('./evolution-writer');
|
|
10
|
+
|
|
11
|
+
// Paths
|
|
12
|
+
const RAW_DIR = path.join(__dirname, '..', 'orchestrator', 'metrics', 'raw');
|
|
13
|
+
const PROCESSED_PATH = path.join(__dirname, '..', 'orchestrator', 'metrics', 'processed.json');
|
|
14
|
+
const TOOL_RATINGS_PATH = path.join(__dirname, '..', 'orchestrator', 'metrics', 'tool-ratings.json');
|
|
15
|
+
const TOOL_RATINGS_PREV_PATH = path.join(__dirname, '..', 'orchestrator', 'metrics', 'tool-ratings-prev.json');
|
|
16
|
+
const PLAYBOOKS_PATH = path.join(__dirname, '..', 'orchestrator', 'playbooks.md');
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Load previous tool ratings for comparison.
|
|
20
|
+
* @returns {object|null}
|
|
21
|
+
*/
|
|
22
|
+
function loadPreviousRatings() {
|
|
23
|
+
if (!fs.existsSync(TOOL_RATINGS_PREV_PATH)) return null;
|
|
24
|
+
try {
|
|
25
|
+
return JSON.parse(fs.readFileSync(TOOL_RATINGS_PREV_PATH, 'utf8'));
|
|
26
|
+
} catch {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Save current ratings as previous before computing new ones.
|
|
33
|
+
*/
|
|
34
|
+
function savePreviousRatings() {
|
|
35
|
+
if (fs.existsSync(TOOL_RATINGS_PATH)) {
|
|
36
|
+
try {
|
|
37
|
+
const current = fs.readFileSync(TOOL_RATINGS_PATH, 'utf8');
|
|
38
|
+
fs.writeFileSync(TOOL_RATINGS_PREV_PATH, current, 'utf8');
|
|
39
|
+
} catch { /* ignore */ }
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Generate a human-readable learning summary comparing before/after.
|
|
45
|
+
* @param {object} currentRatings - Current tool ratings
|
|
46
|
+
* @param {object|null} previousRatings - Previous tool ratings (null if first run)
|
|
47
|
+
* @param {object} hypothesisValidation - Results from hypothesis validation
|
|
48
|
+
* @param {string[]} newGuidance - New guidance strings being injected
|
|
49
|
+
* @returns {{ plainText: string, structured: object }}
|
|
50
|
+
*/
|
|
51
|
+
function generateLearningSummary(currentRatings, previousRatings, hypothesisValidation, newGuidance) {
|
|
52
|
+
const toolChanges = [];
|
|
53
|
+
const lines = [];
|
|
54
|
+
|
|
55
|
+
lines.push('╔══════════════════════════════════════════════════════════════╗');
|
|
56
|
+
lines.push('║ SESSION LEARNINGS ║');
|
|
57
|
+
lines.push('╠══════════════════════════════════════════════════════════════╣');
|
|
58
|
+
|
|
59
|
+
// Tool changes
|
|
60
|
+
lines.push('║ Tool Rating Changes: ║');
|
|
61
|
+
for (const [tool, stats] of Object.entries(currentRatings)) {
|
|
62
|
+
const prev = previousRatings?.[tool];
|
|
63
|
+
const currRate = (stats.success_rate * 100).toFixed(0);
|
|
64
|
+
const currRating = stats.rating;
|
|
65
|
+
|
|
66
|
+
if (prev) {
|
|
67
|
+
const prevRate = (prev.success_rate * 100).toFixed(0);
|
|
68
|
+
const prevRating = prev.rating;
|
|
69
|
+
const delta = stats.success_rate - prev.success_rate;
|
|
70
|
+
const deltaStr = delta >= 0 ? `+${(delta * 100).toFixed(0)}%` : `${(delta * 100).toFixed(0)}%`;
|
|
71
|
+
|
|
72
|
+
if (prevRating !== currRating || Math.abs(delta) >= 0.05) {
|
|
73
|
+
const change = { tool, from: prevRating, to: currRating, delta: deltaStr };
|
|
74
|
+
toolChanges.push(change);
|
|
75
|
+
lines.push(`║ ${tool}: ${prevRating}→${currRating} (${deltaStr})`.padEnd(63) + '║');
|
|
76
|
+
}
|
|
77
|
+
} else {
|
|
78
|
+
// New tool
|
|
79
|
+
toolChanges.push({ tool, from: null, to: currRating, delta: 'new' });
|
|
80
|
+
lines.push(`║ ${tool}: ${currRating} (${currRate}% success) [NEW]`.padEnd(63) + '║');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (toolChanges.length === 0) {
|
|
85
|
+
lines.push('║ No significant changes ║');
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
lines.push('║ ║');
|
|
89
|
+
|
|
90
|
+
// Hypothesis updates
|
|
91
|
+
lines.push('║ Hypothesis Updates: ║');
|
|
92
|
+
const promoted = hypothesisValidation.promoted || [];
|
|
93
|
+
const rejected = hypothesisValidation.rejected || [];
|
|
94
|
+
const continuing = hypothesisValidation.continue || [];
|
|
95
|
+
|
|
96
|
+
if (promoted.length > 0) {
|
|
97
|
+
for (const h of promoted) {
|
|
98
|
+
lines.push(`║ ✓ PROMOTED: ${h.substring(0, 43)}`.padEnd(63) + '║');
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
if (rejected.length > 0) {
|
|
102
|
+
for (const h of rejected) {
|
|
103
|
+
lines.push(`║ ✗ REJECTED: ${h.substring(0, 43)}`.padEnd(63) + '║');
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
if (continuing.length > 0) {
|
|
107
|
+
lines.push(`║ ⋯ ${continuing.length} hypothesis(es) still testing`.padEnd(63) + '║');
|
|
108
|
+
}
|
|
109
|
+
if (promoted.length === 0 && rejected.length === 0 && continuing.length === 0) {
|
|
110
|
+
lines.push('║ No hypotheses to validate ║');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
lines.push('║ ║');
|
|
114
|
+
|
|
115
|
+
// New guidance
|
|
116
|
+
lines.push('║ Active Guidance (injected into dispatches): ║');
|
|
117
|
+
if (newGuidance && newGuidance.length > 0) {
|
|
118
|
+
for (const g of newGuidance.slice(0, 5)) {
|
|
119
|
+
lines.push(`║ • ${g.substring(0, 55)}`.padEnd(63) + '║');
|
|
120
|
+
}
|
|
121
|
+
if (newGuidance.length > 5) {
|
|
122
|
+
lines.push(`║ ... and ${newGuidance.length - 5} more`.padEnd(63) + '║');
|
|
123
|
+
}
|
|
124
|
+
} else {
|
|
125
|
+
lines.push('║ None yet ║');
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
lines.push('╚══════════════════════════════════════════════════════════════╝');
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
plainText: lines.join('\n'),
|
|
132
|
+
structured: {
|
|
133
|
+
toolChanges,
|
|
134
|
+
hypothesisUpdates: {
|
|
135
|
+
promoted,
|
|
136
|
+
rejected,
|
|
137
|
+
continuing,
|
|
138
|
+
},
|
|
139
|
+
guidance: newGuidance || [],
|
|
140
|
+
generatedAt: new Date().toISOString(),
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Load the set of already-processed NDJSON files.
|
|
147
|
+
* @returns {Set<string>} File basenames that have been processed
|
|
148
|
+
*/
|
|
149
|
+
function loadProcessedFiles() {
|
|
150
|
+
if (!fs.existsSync(PROCESSED_PATH)) return new Set();
|
|
151
|
+
try {
|
|
152
|
+
const data = JSON.parse(fs.readFileSync(PROCESSED_PATH, 'utf8'));
|
|
153
|
+
return new Set(data.files || []);
|
|
154
|
+
} catch {
|
|
155
|
+
return new Set();
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Save the set of processed NDJSON files.
|
|
161
|
+
* @param {Set<string>} processed
|
|
162
|
+
*/
|
|
163
|
+
function saveProcessedFiles(processed) {
|
|
164
|
+
const dir = path.dirname(PROCESSED_PATH);
|
|
165
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
166
|
+
fs.writeFileSync(PROCESSED_PATH, JSON.stringify({
|
|
167
|
+
files: Array.from(processed),
|
|
168
|
+
lastUpdated: new Date().toISOString(),
|
|
169
|
+
}, null, 2), 'utf8');
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Find all unprocessed NDJSON files in the raw directory.
|
|
174
|
+
* @returns {string[]} Full paths to unprocessed files
|
|
175
|
+
*/
|
|
176
|
+
function findUnprocessedFiles() {
|
|
177
|
+
if (!fs.existsSync(RAW_DIR)) return [];
|
|
178
|
+
|
|
179
|
+
const processed = loadProcessedFiles();
|
|
180
|
+
const allFiles = fs.readdirSync(RAW_DIR)
|
|
181
|
+
.filter(f => f.endsWith('.ndjson'))
|
|
182
|
+
.filter(f => !processed.has(f));
|
|
183
|
+
|
|
184
|
+
return allFiles.map(f => path.join(RAW_DIR, f));
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Process all raw NDJSON files - analyze each and append to summaries.
|
|
189
|
+
* @returns {Promise<{ processed: number, summaries: object[] }>}
|
|
190
|
+
*/
|
|
191
|
+
async function processRawFiles() {
|
|
192
|
+
const unprocessed = findUnprocessedFiles();
|
|
193
|
+
const processed = loadProcessedFiles();
|
|
194
|
+
const summaries = [];
|
|
195
|
+
|
|
196
|
+
for (const filePath of unprocessed) {
|
|
197
|
+
try {
|
|
198
|
+
const summary = await analyzeSession(filePath);
|
|
199
|
+
safeAppend(SUMMARIES_PATH, JSON.stringify(summary));
|
|
200
|
+
summaries.push(summary);
|
|
201
|
+
processed.add(path.basename(filePath));
|
|
202
|
+
console.log(`[post-session] Processed: ${path.basename(filePath)}`);
|
|
203
|
+
} catch (err) {
|
|
204
|
+
console.error(`[post-session] Failed to process ${filePath}:`, err.message);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
saveProcessedFiles(processed);
|
|
209
|
+
return { processed: summaries.length, summaries };
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Compute and save tool ratings to tool-ratings.json.
|
|
214
|
+
* Saves previous ratings first for comparison.
|
|
215
|
+
* @returns {Promise<{ current: object, previous: object|null }>}
|
|
216
|
+
*/
|
|
217
|
+
async function computeAndSaveRatings() {
|
|
218
|
+
// Save current as previous before overwriting
|
|
219
|
+
savePreviousRatings();
|
|
220
|
+
const previousRatings = loadPreviousRatings();
|
|
221
|
+
|
|
222
|
+
const ratingsMap = await rateTools();
|
|
223
|
+
const ratings = {};
|
|
224
|
+
for (const [tool, data] of ratingsMap) {
|
|
225
|
+
ratings[tool] = data;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
fs.mkdirSync(path.dirname(TOOL_RATINGS_PATH), { recursive: true });
|
|
229
|
+
fs.writeFileSync(TOOL_RATINGS_PATH, JSON.stringify(ratings, null, 2), 'utf8');
|
|
230
|
+
console.log(`[post-session] Tool ratings saved to ${TOOL_RATINGS_PATH}`);
|
|
231
|
+
|
|
232
|
+
return { current: ratings, previous: previousRatings };
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Update playbooks.md status for a hypothesis.
|
|
237
|
+
* @param {string} hypothesisName
|
|
238
|
+
* @param {string} newStatus - 'validated' or 'rejected'
|
|
239
|
+
* @param {string} evidence
|
|
240
|
+
* @returns {boolean} True if updated
|
|
241
|
+
*/
|
|
242
|
+
function updatePlaybookStatus(hypothesisName, newStatus, evidence) {
|
|
243
|
+
if (!fs.existsSync(PLAYBOOKS_PATH)) return false;
|
|
244
|
+
|
|
245
|
+
let content = fs.readFileSync(PLAYBOOKS_PATH, 'utf8');
|
|
246
|
+
const originalContent = content;
|
|
247
|
+
|
|
248
|
+
// Find the section for this hypothesis and update its status
|
|
249
|
+
// Look for **Status:** lines after the hypothesis name/section
|
|
250
|
+
const sections = content.split(/^### /m);
|
|
251
|
+
let updated = false;
|
|
252
|
+
|
|
253
|
+
const newSections = sections.map((section, i) => {
|
|
254
|
+
if (i === 0) return section; // Header before first ###
|
|
255
|
+
|
|
256
|
+
// Check if this section contains/relates to the hypothesis
|
|
257
|
+
const sectionNameMatch = section.match(/^([^\n]+)/);
|
|
258
|
+
const sectionName = sectionNameMatch ? sectionNameMatch[1].trim() : '';
|
|
259
|
+
|
|
260
|
+
// Match by name or by content containing the hypothesis text
|
|
261
|
+
if (sectionName.toLowerCase().includes(hypothesisName.toLowerCase()) ||
|
|
262
|
+
section.toLowerCase().includes(hypothesisName.toLowerCase())) {
|
|
263
|
+
|
|
264
|
+
// Update Status line if it contains hypothesis or testing
|
|
265
|
+
const statusRegex = /\*\*Status:\*\*\s*(?:hypothesis|testing)[^\n]*/gi;
|
|
266
|
+
if (statusRegex.test(section)) {
|
|
267
|
+
const date = new Date().toISOString().split('T')[0];
|
|
268
|
+
const newStatusLine = `**Status:** ${newStatus} (${date}) — ${evidence.substring(0, 100)}`;
|
|
269
|
+
section = section.replace(statusRegex, newStatusLine);
|
|
270
|
+
updated = true;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return '### ' + section;
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
if (updated) {
|
|
278
|
+
content = newSections.join('');
|
|
279
|
+
safeWrite(PLAYBOOKS_PATH, content);
|
|
280
|
+
console.log(`[post-session] Updated playbook status: ${hypothesisName} -> ${newStatus}`);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return updated;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Run hypothesis validation and update playbooks accordingly.
|
|
288
|
+
* @returns {object} { results: array, promoted: array, rejected: array }
|
|
289
|
+
*/
|
|
290
|
+
function runHypothesisValidation() {
|
|
291
|
+
const results = validateHypotheses(PLAYBOOKS_PATH, SUMMARIES_PATH);
|
|
292
|
+
const promoted = [];
|
|
293
|
+
const rejected = [];
|
|
294
|
+
|
|
295
|
+
for (const result of results) {
|
|
296
|
+
if (result.decision === 'promote') {
|
|
297
|
+
const updated = updatePlaybookStatus(result.hypothesis, 'validated', result.evidence);
|
|
298
|
+
if (updated) {
|
|
299
|
+
promoted.push(result);
|
|
300
|
+
logEvolution({
|
|
301
|
+
file: 'orchestrator/playbooks.md',
|
|
302
|
+
change: `Promoted hypothesis: ${result.hypothesis}`,
|
|
303
|
+
why: 'Metric improvement exceeded 10% threshold over 3+ sessions',
|
|
304
|
+
evidence: result.evidence,
|
|
305
|
+
reversible: 'yes',
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
} else if (result.decision === 'reject') {
|
|
309
|
+
const updated = updatePlaybookStatus(result.hypothesis, 'rejected', result.evidence);
|
|
310
|
+
if (updated) {
|
|
311
|
+
rejected.push(result);
|
|
312
|
+
logEvolution({
|
|
313
|
+
file: 'orchestrator/playbooks.md',
|
|
314
|
+
change: `Rejected hypothesis: ${result.hypothesis}`,
|
|
315
|
+
why: 'Metric worsened by >10% over 3+ sessions',
|
|
316
|
+
evidence: result.evidence,
|
|
317
|
+
reversible: 'yes',
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
return { results, promoted, rejected };
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Run the full post-session automation pipeline.
|
|
328
|
+
* Called when a session ends (all terminals idle or manual trigger).
|
|
329
|
+
*
|
|
330
|
+
* Steps:
|
|
331
|
+
* 1. Process all unprocessed NDJSON files
|
|
332
|
+
* 2. Compute and save tool ratings
|
|
333
|
+
* 3. Run hypothesis validation
|
|
334
|
+
* 4. Update playbooks with promotions/rejections
|
|
335
|
+
* 5. Log evolution changes
|
|
336
|
+
*
|
|
337
|
+
* @returns {Promise<object>} Pipeline results
|
|
338
|
+
*/
|
|
339
|
+
async function runPostSession() {
|
|
340
|
+
console.log('[post-session] Starting post-session automation...');
|
|
341
|
+
const startTime = Date.now();
|
|
342
|
+
|
|
343
|
+
// Step 1: Process raw NDJSON files
|
|
344
|
+
const { processed: filesProcessed, summaries } = await processRawFiles();
|
|
345
|
+
console.log(`[post-session] Processed ${filesProcessed} raw NDJSON files`);
|
|
346
|
+
|
|
347
|
+
// Step 2: Compute tool ratings (saves previous first)
|
|
348
|
+
const { current: ratings, previous: previousRatings } = await computeAndSaveRatings();
|
|
349
|
+
const toolCount = Object.keys(ratings).length;
|
|
350
|
+
console.log(`[post-session] Computed ratings for ${toolCount} tools`);
|
|
351
|
+
|
|
352
|
+
// Step 3-4: Validate hypotheses and update playbooks
|
|
353
|
+
const validation = runHypothesisValidation();
|
|
354
|
+
const validationSummary = summarizeResults(validation.results);
|
|
355
|
+
console.log(`[post-session] Hypothesis validation: ${validationSummary.promote} promoted, ${validationSummary.reject} rejected, ${validationSummary.continue} continue`);
|
|
356
|
+
|
|
357
|
+
// Step 5: Generate guidance for next session (for summary)
|
|
358
|
+
let newGuidance = [];
|
|
359
|
+
try {
|
|
360
|
+
const { getPreDispatchContext } = require('./pre-dispatch');
|
|
361
|
+
const ctx = await getPreDispatchContext();
|
|
362
|
+
newGuidance = ctx.toolGuidance || [];
|
|
363
|
+
} catch { /* pre-dispatch might not be loaded */ }
|
|
364
|
+
|
|
365
|
+
// Step 6: Generate learning summary
|
|
366
|
+
const hypothesisValidation = {
|
|
367
|
+
total: validation.results.length,
|
|
368
|
+
promoted: validation.promoted.map(r => r.hypothesis),
|
|
369
|
+
rejected: validation.rejected.map(r => r.hypothesis),
|
|
370
|
+
continue: validation.results.filter(r => r.decision === 'continue').map(r => r.hypothesis),
|
|
371
|
+
details: validation.results,
|
|
372
|
+
};
|
|
373
|
+
|
|
374
|
+
const learningSummary = generateLearningSummary(ratings, previousRatings, hypothesisValidation, newGuidance);
|
|
375
|
+
|
|
376
|
+
const duration = Date.now() - startTime;
|
|
377
|
+
console.log(`[post-session] Pipeline completed in ${duration}ms`);
|
|
378
|
+
|
|
379
|
+
// Print summary to console
|
|
380
|
+
console.log('\n' + learningSummary.plainText);
|
|
381
|
+
|
|
382
|
+
return {
|
|
383
|
+
filesProcessed,
|
|
384
|
+
summaries: summaries.map(s => ({ session_id: s.session_id, terminal: s.terminal, duration_min: s.duration_min })),
|
|
385
|
+
toolRatings: ratings,
|
|
386
|
+
previousRatings,
|
|
387
|
+
hypothesisValidation,
|
|
388
|
+
learningSummary,
|
|
389
|
+
duration_ms: duration,
|
|
390
|
+
ts: new Date().toISOString(),
|
|
391
|
+
};
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// CLI mode
|
|
395
|
+
if (require.main === module) {
|
|
396
|
+
runPostSession()
|
|
397
|
+
.then(result => {
|
|
398
|
+
console.log('\n=== Post-Session Report ===');
|
|
399
|
+
console.log(`Files processed: ${result.filesProcessed}`);
|
|
400
|
+
console.log(`Tools rated: ${Object.keys(result.toolRatings).length}`);
|
|
401
|
+
console.log(`Hypotheses: ${result.hypothesisValidation.total} total`);
|
|
402
|
+
console.log(` Promoted: ${result.hypothesisValidation.promoted.join(', ') || 'none'}`);
|
|
403
|
+
console.log(` Rejected: ${result.hypothesisValidation.rejected.join(', ') || 'none'}`);
|
|
404
|
+
console.log(` Continue: ${result.hypothesisValidation.continue.length}`);
|
|
405
|
+
console.log(`Duration: ${result.duration_ms}ms`);
|
|
406
|
+
})
|
|
407
|
+
.catch(err => {
|
|
408
|
+
console.error('Post-session failed:', err.message);
|
|
409
|
+
process.exit(1);
|
|
410
|
+
});
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
module.exports = {
|
|
414
|
+
runPostSession,
|
|
415
|
+
processRawFiles,
|
|
416
|
+
computeAndSaveRatings,
|
|
417
|
+
runHypothesisValidation,
|
|
418
|
+
updatePlaybookStatus,
|
|
419
|
+
findUnprocessedFiles,
|
|
420
|
+
loadProcessedFiles,
|
|
421
|
+
saveProcessedFiles,
|
|
422
|
+
generateLearningSummary,
|
|
423
|
+
loadPreviousRatings,
|
|
424
|
+
TOOL_RATINGS_PATH,
|
|
425
|
+
RAW_DIR,
|
|
426
|
+
};
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const { SUMMARIES_PATH } = require('./analyze-session');
|
|
6
|
+
const { rateTools } = require('./tool-rater');
|
|
7
|
+
const { parsePlaybooks } = require('./playbook-tracker');
|
|
8
|
+
|
|
9
|
+
const TOOL_RATINGS_PATH = path.join(__dirname, '..', 'orchestrator', 'metrics', 'tool-ratings.json');
|
|
10
|
+
const PLAYBOOKS_PATH = path.join(__dirname, '..', 'orchestrator', 'playbooks.md');
|
|
11
|
+
|
|
12
|
+
// Known tool alternatives for guidance generation
|
|
13
|
+
const TOOL_ALTERNATIVES = {
|
|
14
|
+
Edit: { preferred: 'Write', useCase: 'new files or full rewrites' },
|
|
15
|
+
Bash: { preferred: 'Glob', useCase: 'directory scanning' },
|
|
16
|
+
find: { preferred: 'Glob', useCase: 'file searches' },
|
|
17
|
+
grep: { preferred: 'Grep', useCase: 'content searches' },
|
|
18
|
+
cat: { preferred: 'Read', useCase: 'reading files' },
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Load tool ratings from JSON file or compute fresh from summaries.
|
|
23
|
+
* @returns {Promise<Map<string, object>>}
|
|
24
|
+
*/
|
|
25
|
+
async function loadToolRatings() {
|
|
26
|
+
// Try cached JSON first
|
|
27
|
+
if (fs.existsSync(TOOL_RATINGS_PATH)) {
|
|
28
|
+
try {
|
|
29
|
+
const data = JSON.parse(fs.readFileSync(TOOL_RATINGS_PATH, 'utf8'));
|
|
30
|
+
const map = new Map();
|
|
31
|
+
for (const [tool, stats] of Object.entries(data)) {
|
|
32
|
+
map.set(tool, stats);
|
|
33
|
+
}
|
|
34
|
+
return map;
|
|
35
|
+
} catch { /* fall through to compute */ }
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Compute fresh
|
|
39
|
+
return rateTools();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Generate tool guidance strings from ratings.
|
|
44
|
+
* @param {Map<string, object>} ratings
|
|
45
|
+
* @returns {string[]}
|
|
46
|
+
*/
|
|
47
|
+
function generateToolGuidance(ratings) {
|
|
48
|
+
const guidance = [];
|
|
49
|
+
|
|
50
|
+
for (const [tool, stats] of ratings) {
|
|
51
|
+
const { rating, composite, success_rate } = stats;
|
|
52
|
+
|
|
53
|
+
if (rating === 'C' || composite < 0.50) {
|
|
54
|
+
// Low-rated tool: suggest avoidance
|
|
55
|
+
const alt = TOOL_ALTERNATIVES[tool];
|
|
56
|
+
if (alt) {
|
|
57
|
+
guidance.push(`Avoid ${tool} for ${alt.useCase}, prefer ${alt.preferred} (${tool} has ${rating} rating: ${composite})`);
|
|
58
|
+
} else {
|
|
59
|
+
guidance.push(`Use ${tool} cautiously — ${rating} rating (${composite}), success rate: ${(success_rate * 100).toFixed(0)}%`);
|
|
60
|
+
}
|
|
61
|
+
} else if (rating === 'S' || rating === 'A') {
|
|
62
|
+
// High-rated tool: recommend preference
|
|
63
|
+
guidance.push(`Prefer ${tool} — reliable (${rating} rating: ${composite})`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return guidance;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Extract actionable insights from validated playbooks.
|
|
72
|
+
* @returns {string[]}
|
|
73
|
+
*/
|
|
74
|
+
function extractPlaybookInsights() {
|
|
75
|
+
const insights = [];
|
|
76
|
+
|
|
77
|
+
if (!fs.existsSync(PLAYBOOKS_PATH)) return insights;
|
|
78
|
+
|
|
79
|
+
const content = fs.readFileSync(PLAYBOOKS_PATH, 'utf8');
|
|
80
|
+
|
|
81
|
+
// Extract from "## Measured Insights" section
|
|
82
|
+
const measuredMatch = content.match(/## Measured Insights[^\n]*\n([\s\S]*?)(?=\n## |\n---|\*\*Status:\*\*|$)/);
|
|
83
|
+
if (measuredMatch) {
|
|
84
|
+
const bulletMatches = measuredMatch[1].match(/^- \*\*(.+?)\*\*\s*[—–-]\s*(.+)$/gm);
|
|
85
|
+
if (bulletMatches) {
|
|
86
|
+
for (const bullet of bulletMatches) {
|
|
87
|
+
const cleaned = bullet
|
|
88
|
+
.replace(/^- \*\*/, '')
|
|
89
|
+
.replace(/\*\*\s*[—–-]\s*/, ' — ')
|
|
90
|
+
.trim();
|
|
91
|
+
insights.push(cleaned);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Extract from "## Known Anti-Patterns" section
|
|
97
|
+
const antiMatch = content.match(/## Known Anti-Patterns[^\n]*\n([\s\S]*?)(?=\n## |$)/);
|
|
98
|
+
if (antiMatch) {
|
|
99
|
+
const bulletMatches = antiMatch[1].match(/^- \*\*(.+?)\*\*\s*[—–-]\s*(.+)$/gm);
|
|
100
|
+
if (bulletMatches) {
|
|
101
|
+
for (const bullet of bulletMatches) {
|
|
102
|
+
const cleaned = bullet
|
|
103
|
+
.replace(/^- \*\*/, '')
|
|
104
|
+
.replace(/\*\*\s*[—–-]\s*/, ' — ')
|
|
105
|
+
.trim();
|
|
106
|
+
insights.push(cleaned);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Also extract dispatch best practices (simpler format)
|
|
112
|
+
const dispatchMatch = content.match(/## Dispatch Best Practices\n([\s\S]*?)(?=\n## |$)/);
|
|
113
|
+
if (dispatchMatch) {
|
|
114
|
+
const simpleMatches = dispatchMatch[1].match(/^- \*\*(.+?)\*\*(.*)$/gm);
|
|
115
|
+
if (simpleMatches) {
|
|
116
|
+
for (const bullet of simpleMatches.slice(0, 3)) { // Just top 3
|
|
117
|
+
const cleaned = bullet.replace(/^- /, '').trim();
|
|
118
|
+
insights.push(cleaned);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return insights;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Analyze recent summaries for terminal performance patterns.
|
|
128
|
+
* @returns {object} terminalId -> { avgDuration, successRate, recentTools }
|
|
129
|
+
*/
|
|
130
|
+
function analyzeTerminalPerformance() {
|
|
131
|
+
const hints = {};
|
|
132
|
+
|
|
133
|
+
if (!fs.existsSync(SUMMARIES_PATH)) return hints;
|
|
134
|
+
|
|
135
|
+
const lines = fs.readFileSync(SUMMARIES_PATH, 'utf8').trim().split('\n').filter(Boolean);
|
|
136
|
+
|
|
137
|
+
// Only look at recent sessions (last 20)
|
|
138
|
+
const recentLines = lines.slice(-20);
|
|
139
|
+
const sessions = recentLines
|
|
140
|
+
.map(l => { try { return JSON.parse(l); } catch { return null; } })
|
|
141
|
+
.filter(Boolean);
|
|
142
|
+
|
|
143
|
+
// Aggregate by terminal
|
|
144
|
+
const byTerminal = {};
|
|
145
|
+
for (const s of sessions) {
|
|
146
|
+
const t = s.terminal || 'unknown';
|
|
147
|
+
if (t === 'unknown') continue;
|
|
148
|
+
|
|
149
|
+
if (!byTerminal[t]) {
|
|
150
|
+
byTerminal[t] = { durations: [], totalSuccess: 0, totalInvocations: 0, tools: {} };
|
|
151
|
+
}
|
|
152
|
+
const agg = byTerminal[t];
|
|
153
|
+
agg.durations.push(s.duration_min || 0);
|
|
154
|
+
|
|
155
|
+
// Aggregate tool stats
|
|
156
|
+
if (s.tools) {
|
|
157
|
+
for (const [tool, stats] of Object.entries(s.tools)) {
|
|
158
|
+
agg.totalSuccess += stats.successes || 0;
|
|
159
|
+
agg.totalInvocations += stats.invocations || 0;
|
|
160
|
+
agg.tools[tool] = (agg.tools[tool] || 0) + (stats.invocations || 0);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Compute hints per terminal
|
|
166
|
+
for (const [t, agg] of Object.entries(byTerminal)) {
|
|
167
|
+
const avgDuration = agg.durations.length > 0
|
|
168
|
+
? agg.durations.reduce((a, b) => a + b, 0) / agg.durations.length
|
|
169
|
+
: 0;
|
|
170
|
+
const successRate = agg.totalInvocations > 0
|
|
171
|
+
? agg.totalSuccess / agg.totalInvocations
|
|
172
|
+
: 0;
|
|
173
|
+
|
|
174
|
+
// Top 3 tools by usage
|
|
175
|
+
const topTools = Object.entries(agg.tools)
|
|
176
|
+
.sort((a, b) => b[1] - a[1])
|
|
177
|
+
.slice(0, 3)
|
|
178
|
+
.map(([tool]) => tool);
|
|
179
|
+
|
|
180
|
+
hints[t] = {
|
|
181
|
+
avgDuration: +avgDuration.toFixed(1),
|
|
182
|
+
successRate: +successRate.toFixed(3),
|
|
183
|
+
topTools,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Add comparative hints
|
|
188
|
+
const terminals = Object.keys(hints);
|
|
189
|
+
if (terminals.length >= 2) {
|
|
190
|
+
// Find fastest and slowest
|
|
191
|
+
const sorted = terminals.sort((a, b) => hints[a].avgDuration - hints[b].avgDuration);
|
|
192
|
+
const fastest = sorted[0];
|
|
193
|
+
const slowest = sorted[sorted.length - 1];
|
|
194
|
+
|
|
195
|
+
if (hints[fastest].avgDuration > 0 && hints[slowest].avgDuration > hints[fastest].avgDuration * 1.3) {
|
|
196
|
+
hints._comparison = `T${fastest} was ${((hints[slowest].avgDuration / hints[fastest].avgDuration - 1) * 100).toFixed(0)}% faster than T${slowest} in recent sessions`;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return hints;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Get pre-dispatch context for injection into terminal prompts.
|
|
205
|
+
* @returns {Promise<{ toolGuidance: string[], playbookInsights: string[], terminalHints: object }>}
|
|
206
|
+
*/
|
|
207
|
+
async function getPreDispatchContext() {
|
|
208
|
+
const ratings = await loadToolRatings();
|
|
209
|
+
const toolGuidance = generateToolGuidance(ratings);
|
|
210
|
+
const playbookInsights = extractPlaybookInsights();
|
|
211
|
+
const terminalHints = analyzeTerminalPerformance();
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
toolGuidance,
|
|
215
|
+
playbookInsights,
|
|
216
|
+
terminalHints,
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Format pre-dispatch context as a string block for prompt injection.
|
|
222
|
+
* @param {object} ctx - Output from getPreDispatchContext()
|
|
223
|
+
* @returns {string}
|
|
224
|
+
*/
|
|
225
|
+
function formatContextForInjection(ctx) {
|
|
226
|
+
const lines = ['[SYSTEM GUIDANCE from prior sessions]'];
|
|
227
|
+
|
|
228
|
+
if (ctx.toolGuidance.length > 0) {
|
|
229
|
+
for (const g of ctx.toolGuidance) {
|
|
230
|
+
lines.push(`- ${g}`);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (ctx.playbookInsights.length > 0) {
|
|
235
|
+
lines.push('');
|
|
236
|
+
for (const i of ctx.playbookInsights.slice(0, 5)) { // Limit to top 5
|
|
237
|
+
lines.push(`- ${i}`);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (ctx.terminalHints._comparison) {
|
|
242
|
+
lines.push(`- ${ctx.terminalHints._comparison}`);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
lines.push('[END GUIDANCE]');
|
|
246
|
+
return lines.join('\n');
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// CLI mode: node pre-dispatch.js
|
|
250
|
+
if (require.main === module) {
|
|
251
|
+
getPreDispatchContext()
|
|
252
|
+
.then(ctx => {
|
|
253
|
+
console.log('=== Pre-Dispatch Context ===\n');
|
|
254
|
+
console.log('Tool Guidance:');
|
|
255
|
+
ctx.toolGuidance.forEach(g => console.log(` - ${g}`));
|
|
256
|
+
console.log('\nPlaybook Insights:');
|
|
257
|
+
ctx.playbookInsights.forEach(i => console.log(` - ${i}`));
|
|
258
|
+
console.log('\nTerminal Hints:', JSON.stringify(ctx.terminalHints, null, 2));
|
|
259
|
+
console.log('\n=== Formatted for Injection ===\n');
|
|
260
|
+
console.log(formatContextForInjection(ctx));
|
|
261
|
+
})
|
|
262
|
+
.catch(err => { console.error('Failed:', err.message); process.exit(1); });
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
module.exports = { getPreDispatchContext, formatContextForInjection, loadToolRatings };
|