ninja-terminals 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CLAUDE.md CHANGED
@@ -54,25 +54,10 @@ These status lines are CRITICAL — the orchestrator parses them to know your st
54
54
  - The orchestrator relays between terminals
55
55
 
56
56
  ## MCP Tools
57
- You have access to 170+ MCP tools. Use them proactively:
58
- - **postforme**: Video rendering, social publishing, Meta ads, content management, brand profiles, asset management, insights/analytics
59
- - **studychat**: RAG knowledge base, DMs, C2C messaging, document upload/query
60
- - **chrome-devtools**: Browser automation — navigate, click, type, screenshot, forms, network monitoring
61
- - **gmail**: Search emails, read messages, download attachments
62
- - **netlify-billing / render-billing**: Deployment status, billing, service health
63
- - **builder-pro**: Code review, security scan, auto-fix, architecture validation
64
- - **gkchatty**: Knowledge base queries, uploads — DO NOT USE unless explicitly instructed
65
-
66
- ### PostForMe Publishing — Use the Right Tool
67
- | Content Type | Correct Tool | Wrong Tool (will fail) |
68
- |---|---|---|
69
- | Video → IG Reel | `publish_meta(contentId, platform: "instagram")` | — |
70
- | Video → FB | `publish_meta(contentId, platform: "facebook")` | — |
71
- | Video → Story | `publish_story(contentId, imageUrl/videoUrl)` | publish_meta |
72
- | Carousel (multi-image) | `publish_carousel(imageUrls: [...], caption)` | publish_meta |
57
+ Ninja Terminals works with any MCP tools you have configured. The orchestrator and workers will automatically detect and use your installed MCP servers.
73
58
 
74
59
  ### Tool Selection Priority
75
- 1. Check the tool list first — verify it accepts the parameters you need
60
+ 1. Check your available tool list first — verify it accepts the parameters you need
76
61
  2. Use the most direct tool available (MCP > browser automation > manual)
77
62
  3. If an MCP tool exists for the task, prefer it over browser-driving
78
63
  4. Use browser automation for websites without an MCP/API
package/cli.js CHANGED
@@ -33,13 +33,23 @@ OPTIONS
33
33
  --port <number> Port to listen on (default: 3300)
34
34
  --terminals <number> Number of terminals to spawn (default: 4)
35
35
  --cwd <path> Working directory for terminals (default: current dir)
36
+ --token <jwt> Auth token for Pro users / CI (skips browser login)
37
+ --offline Offline mode for Pro users (skips backend validation)
36
38
  --version, -v Print version and exit
37
39
  --help, -h Show this help message
38
40
 
41
+ AUTHENTICATION
42
+ Pro users can authenticate via:
43
+ 1. Browser login (default) - sign in at the web UI
44
+ 2. --token flag - pass JWT directly (useful for CI/scripts)
45
+ 3. --offline flag - skip validation (requires downloaded Pro package)
46
+
39
47
  EXAMPLES
40
48
  npx ninja-terminals
41
49
  npx ninja-terminals --port 3301 --terminals 2
42
50
  npx ninja-terminals --cwd /path/to/my-project
51
+ npx ninja-terminals --token eyJhbGciOiJIUzI1NiIs...
52
+ npx ninja-terminals --offline
43
53
  `);
44
54
  process.exit(0);
45
55
  }
@@ -52,6 +62,8 @@ if (hasFlag('--version') || hasFlag('-v')) {
52
62
  const port = parseInt(getArg('--port', '3300'), 10);
53
63
  const terminals = parseInt(getArg('--terminals', '4'), 10);
54
64
  const cwd = getArg('--cwd', process.cwd());
65
+ const token = getArg('--token', null);
66
+ const offline = hasFlag('--offline');
55
67
 
56
68
  if (isNaN(port) || port < 1 || port > 65535) {
57
69
  console.error(`Error: --port must be a number between 1 and 65535`);
@@ -65,6 +77,8 @@ if (isNaN(terminals) || terminals < 1 || terminals > 16) {
65
77
 
66
78
  // ── Startup banner ───────────────────────────────────────────
67
79
 
80
+ const authMode = offline ? 'offline' : (token ? 'token' : 'browser');
81
+
68
82
  console.log(`
69
83
  ╔═══════════════════════════════════════╗
70
84
  ║ NINJA TERMINALS v${pkg.version} ║
@@ -73,6 +87,7 @@ console.log(`
73
87
  ║ Port : ${String(port).padEnd(24)} ║
74
88
  ║ Terminals : ${String(terminals).padEnd(24)} ║
75
89
  ║ CWD : ${cwd.length > 24 ? '...' + cwd.slice(-21) : cwd.padEnd(24)} ║
90
+ ║ Auth : ${authMode.padEnd(24)} ║
76
91
  ╚═══════════════════════════════════════╝
77
92
  `);
78
93
 
@@ -84,6 +99,14 @@ process.env.PORT = String(port);
84
99
  process.env.DEFAULT_TERMINALS = String(terminals);
85
100
  process.env.DEFAULT_CWD = cwd;
86
101
 
102
+ // Auth env vars
103
+ if (token) {
104
+ process.env.NINJA_AUTH_TOKEN = token;
105
+ }
106
+ if (offline) {
107
+ process.env.NINJA_OFFLINE = '1';
108
+ }
109
+
87
110
  // ── Auto-open browser ────────────────────────────────────────
88
111
 
89
112
  function openBrowser(url) {
package/lib/auth.js ADDED
@@ -0,0 +1,195 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Auth module — Token validation and session middleware for Ninja Terminals
5
+ // ---------------------------------------------------------------------------
6
+
7
+ const BACKEND_URL = process.env.NINJA_BACKEND_URL || 'https://emtchat-backend.onrender.com';
8
+
9
+ // In-memory cache for validated sessions (token -> session data)
10
+ // Used as fallback when network is unavailable
11
+ const validationCache = new Map();
12
+
13
+ /**
14
+ * Validate a token against the backend.
15
+ *
16
+ * @param {string} token - Bearer token to validate
17
+ * @returns {Promise<{valid: boolean, tier: string, terminalsMax: number, features: string[]}|null>}
18
+ */
19
+ async function validateToken(token) {
20
+ if (!token) return null;
21
+
22
+ try {
23
+ const response = await fetch(`${BACKEND_URL}/api/ninja/validate`, {
24
+ method: 'POST',
25
+ headers: {
26
+ 'Authorization': `Bearer ${token}`,
27
+ 'Content-Type': 'application/json',
28
+ },
29
+ signal: AbortSignal.timeout(10000), // 10s timeout
30
+ });
31
+
32
+ if (!response.ok) {
33
+ // Token invalid or expired
34
+ return null;
35
+ }
36
+
37
+ const data = await response.json();
38
+
39
+ // Cache the result
40
+ const result = {
41
+ valid: true,
42
+ tier: data.tier || 'free',
43
+ terminalsMax: data.terminalsMax || 1,
44
+ features: data.features || [],
45
+ validatedAt: Date.now(),
46
+ };
47
+
48
+ validationCache.set(token, result);
49
+ return result;
50
+
51
+ } catch (err) {
52
+ // Network error — check cache for fallback
53
+ const cached = validationCache.get(token);
54
+ if (cached && cached.valid) {
55
+ console.warn(`[auth] Network error validating token, using cache: ${err.message}`);
56
+ return cached;
57
+ }
58
+ return null;
59
+ }
60
+ }
61
+
62
+ /**
63
+ * Create Express middleware that validates Authorization Bearer tokens.
64
+ *
65
+ * @param {Map} sessionCache - Shared session cache (token -> session data)
66
+ * @returns {import('express').RequestHandler}
67
+ */
68
+ function createAuthMiddleware(sessionCache) {
69
+ return async function authMiddleware(req, res, next) {
70
+ // Extract Bearer token from Authorization header
71
+ const authHeader = req.headers.authorization;
72
+ if (!authHeader || !authHeader.startsWith('Bearer ')) {
73
+ return res.status(401).json({ error: 'Missing or invalid Authorization header' });
74
+ }
75
+
76
+ const token = authHeader.slice(7); // Remove 'Bearer ' prefix
77
+ if (!token) {
78
+ return res.status(401).json({ error: 'Empty token' });
79
+ }
80
+
81
+ // Check session cache first
82
+ const cached = sessionCache.get(token);
83
+ const now = Date.now();
84
+ const CACHE_TTL = 5 * 60 * 1000; // 5 minutes
85
+
86
+ if (cached && (now - cached.validatedAt) < CACHE_TTL) {
87
+ // Cache hit and fresh
88
+ req.ninjaUser = {
89
+ tier: cached.tier,
90
+ terminalsMax: cached.terminalsMax,
91
+ features: cached.features,
92
+ token,
93
+ };
94
+ return next();
95
+ }
96
+
97
+ // Cache miss or stale — validate against backend
98
+ const result = await validateToken(token);
99
+
100
+ if (!result || !result.valid) {
101
+ return res.status(401).json({ error: 'Invalid or expired token' });
102
+ }
103
+
104
+ // Update cache
105
+ sessionCache.set(token, {
106
+ tier: result.tier,
107
+ terminalsMax: result.terminalsMax,
108
+ features: result.features,
109
+ validatedAt: now,
110
+ });
111
+
112
+ req.ninjaUser = {
113
+ tier: result.tier,
114
+ terminalsMax: result.terminalsMax,
115
+ features: result.features,
116
+ token,
117
+ };
118
+
119
+ next();
120
+ };
121
+ }
122
+
123
+ /**
124
+ * WebSocket token validation for upgrade requests.
125
+ *
126
+ * @param {string} token - Token from query param
127
+ * @param {Map} sessionCache - Shared session cache
128
+ * @returns {Promise<{valid: boolean, tier?: string, terminalsMax?: number, features?: string[]}>}
129
+ */
130
+ async function validateWebSocketToken(token, sessionCache) {
131
+ if (!token) {
132
+ return { valid: false };
133
+ }
134
+
135
+ // Check session cache first
136
+ const cached = sessionCache.get(token);
137
+ const now = Date.now();
138
+ const CACHE_TTL = 5 * 60 * 1000;
139
+
140
+ if (cached && (now - cached.validatedAt) < CACHE_TTL) {
141
+ return { valid: true, ...cached };
142
+ }
143
+
144
+ // Validate against backend
145
+ const result = await validateToken(token);
146
+ if (!result || !result.valid) {
147
+ return { valid: false };
148
+ }
149
+
150
+ // Update cache
151
+ sessionCache.set(token, {
152
+ tier: result.tier,
153
+ terminalsMax: result.terminalsMax,
154
+ features: result.features,
155
+ validatedAt: now,
156
+ });
157
+
158
+ return { valid: true, ...result };
159
+ }
160
+
161
+ /**
162
+ * Start heartbeat that re-validates stored sessions.
163
+ * If a session becomes invalid, the callback is invoked to clean up.
164
+ *
165
+ * @param {Map} sessionCache - Shared session cache
166
+ * @param {(token: string) => void} onInvalid - Callback when a session becomes invalid
167
+ * @param {number} [intervalMs=300000] - Heartbeat interval (default 5 min)
168
+ * @returns {NodeJS.Timeout} Interval handle
169
+ */
170
+ function startSessionHeartbeat(sessionCache, onInvalid, intervalMs = 5 * 60 * 1000) {
171
+ return setInterval(async () => {
172
+ for (const [token, session] of sessionCache.entries()) {
173
+ const result = await validateToken(token);
174
+ if (!result || !result.valid) {
175
+ console.log(`[auth] Session invalidated during heartbeat`);
176
+ sessionCache.delete(token);
177
+ onInvalid(token);
178
+ } else {
179
+ // Update cached data
180
+ session.tier = result.tier;
181
+ session.terminalsMax = result.terminalsMax;
182
+ session.features = result.features;
183
+ session.validatedAt = Date.now();
184
+ }
185
+ }
186
+ }, intervalMs);
187
+ }
188
+
189
+ module.exports = {
190
+ validateToken,
191
+ createAuthMiddleware,
192
+ validateWebSocketToken,
193
+ startSessionHeartbeat,
194
+ BACKEND_URL,
195
+ };
@@ -0,0 +1,346 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const { parsePlaybooks } = require('./playbook-tracker');
6
+ const { SUMMARIES_PATH } = require('./analyze-session');
7
+
8
+ const STATE_PATH = path.join(__dirname, '..', 'orchestrator', 'metrics', 'hypothesis-state.json');
9
+
10
+ // Decision thresholds from Phase 4 spec
11
+ const MIN_TEST_SESSIONS = 3;
12
+ const IMPROVEMENT_THRESHOLD = 0.10; // 10%
13
+
14
+ /**
15
+ * Extract metric targets from hypothesis text.
16
+ * Maps hypothesis claims to measurable metrics.
17
+ * @param {string} hypothesisText - The full hypothesis section text
18
+ * @returns {object} { type: 'tool'|'session'|'pattern', target: string, metric: string }
19
+ */
20
+ function extractMetricTarget(hypothesisText) {
21
+ const text = hypothesisText.toLowerCase();
22
+
23
+ // Tool-specific hypotheses: "Edit has C rating", "prefer Write over Edit", "Glob is reliable"
24
+ const toolPatterns = [
25
+ { regex: /\b(edit|write|read|bash|glob|grep|agent)\b.*\b(rating|reliable|failure|prefer)/i, metric: 'success_rate' },
26
+ { regex: /prefer\s+(\w+)\s+over\s+(\w+)/i, metric: 'success_rate' },
27
+ { regex: /\b(\w+)\s+has\s+[a-s]\s+rating/i, metric: 'success_rate' },
28
+ ];
29
+
30
+ for (const pattern of toolPatterns) {
31
+ const match = hypothesisText.match(pattern.regex);
32
+ if (match) {
33
+ // Extract the tool name (capitalize first letter)
34
+ const toolName = match[1].charAt(0).toUpperCase() + match[1].slice(1).toLowerCase();
35
+ return { type: 'tool', target: toolName, metric: pattern.metric };
36
+ }
37
+ }
38
+
39
+ // Session-level hypotheses: "staggered dispatch", "session time"
40
+ if (text.includes('staggered') || text.includes('dispatch') || text.includes('session time')) {
41
+ return { type: 'session', target: 'duration', metric: 'duration_min' };
42
+ }
43
+
44
+ // Default: overall tool success rate
45
+ return { type: 'aggregate', target: 'all_tools', metric: 'success_rate' };
46
+ }
47
+
48
+ /**
49
+ * Load all session summaries from NDJSON file.
50
+ * @param {string} summariesPath
51
+ * @returns {Array<object>} Parsed session summaries
52
+ */
53
+ function loadSummaries(summariesPath) {
54
+ const filePath = summariesPath || SUMMARIES_PATH;
55
+ if (!fs.existsSync(filePath)) return [];
56
+
57
+ const lines = fs.readFileSync(filePath, 'utf8').trim().split('\n').filter(Boolean);
58
+ const summaries = [];
59
+
60
+ for (const line of lines) {
61
+ try {
62
+ summaries.push(JSON.parse(line));
63
+ } catch { /* skip malformed */ }
64
+ }
65
+
66
+ // Deduplicate by session_id (keep latest)
67
+ const seen = new Map();
68
+ for (const s of summaries) {
69
+ seen.set(s.session_id, s);
70
+ }
71
+
72
+ return Array.from(seen.values());
73
+ }
74
+
75
+ /**
76
+ * Load or initialize hypothesis tracking state.
77
+ * Tracks when each hypothesis was first observed (by session count).
78
+ * @returns {object} { hypotheses: { [name]: { firstSeenAt: number, sessionCount: number } } }
79
+ */
80
+ function loadState() {
81
+ if (!fs.existsSync(STATE_PATH)) {
82
+ return { hypotheses: {} };
83
+ }
84
+ try {
85
+ return JSON.parse(fs.readFileSync(STATE_PATH, 'utf8'));
86
+ } catch {
87
+ return { hypotheses: {} };
88
+ }
89
+ }
90
+
91
+ /**
92
+ * Save hypothesis tracking state.
93
+ * @param {object} state
94
+ */
95
+ function saveState(state) {
96
+ const dir = path.dirname(STATE_PATH);
97
+ fs.mkdirSync(dir, { recursive: true });
98
+ fs.writeFileSync(STATE_PATH, JSON.stringify(state, null, 2), 'utf8');
99
+ }
100
+
101
+ /**
102
+ * Compute aggregate metrics for a set of sessions.
103
+ * @param {Array<object>} sessions
104
+ * @param {object} target - { type, target, metric }
105
+ * @returns {object} { value: number, sampleSize: number }
106
+ */
107
+ function computeMetric(sessions, target) {
108
+ if (sessions.length === 0) {
109
+ return { value: null, sampleSize: 0 };
110
+ }
111
+
112
+ if (target.type === 'tool') {
113
+ // Aggregate tool-specific metrics
114
+ let totalInvocations = 0;
115
+ let totalSuccesses = 0;
116
+
117
+ for (const s of sessions) {
118
+ const toolData = s.tools?.[target.target];
119
+ if (toolData) {
120
+ totalInvocations += toolData.invocations || 0;
121
+ totalSuccesses += toolData.successes || 0;
122
+ }
123
+ }
124
+
125
+ if (totalInvocations === 0) {
126
+ return { value: null, sampleSize: 0 };
127
+ }
128
+
129
+ return {
130
+ value: totalSuccesses / totalInvocations,
131
+ sampleSize: totalInvocations,
132
+ };
133
+ }
134
+
135
+ if (target.type === 'session') {
136
+ // Session-level metrics (e.g., duration)
137
+ const values = sessions.map(s => s[target.metric]).filter(v => v != null && v > 0);
138
+ if (values.length === 0) {
139
+ return { value: null, sampleSize: 0 };
140
+ }
141
+
142
+ const avg = values.reduce((a, b) => a + b, 0) / values.length;
143
+ return { value: avg, sampleSize: values.length };
144
+ }
145
+
146
+ // Aggregate: all tools combined
147
+ let totalInvocations = 0;
148
+ let totalSuccesses = 0;
149
+
150
+ for (const s of sessions) {
151
+ for (const toolData of Object.values(s.tools || {})) {
152
+ totalInvocations += toolData.invocations || 0;
153
+ totalSuccesses += toolData.successes || 0;
154
+ }
155
+ }
156
+
157
+ if (totalInvocations === 0) {
158
+ return { value: null, sampleSize: 0 };
159
+ }
160
+
161
+ return {
162
+ value: totalSuccesses / totalInvocations,
163
+ sampleSize: totalInvocations,
164
+ };
165
+ }
166
+
167
+ /**
168
+ * Calculate percentage change between baseline and test.
169
+ * For success rates: positive = improvement
170
+ * For duration: negative = improvement (faster)
171
+ * @param {number} baseline
172
+ * @param {number} test
173
+ * @param {string} metric
174
+ * @returns {number} Change as decimal (-0.15 = 15% worse, 0.15 = 15% better)
175
+ */
176
+ function calculateChange(baseline, test, metric) {
177
+ if (baseline === 0 || baseline == null || test == null) {
178
+ return null;
179
+ }
180
+
181
+ const rawChange = (test - baseline) / baseline;
182
+
183
+ // For duration, lower is better, so invert
184
+ if (metric === 'duration_min') {
185
+ return -rawChange;
186
+ }
187
+
188
+ return rawChange;
189
+ }
190
+
191
+ /**
192
+ * Validate all hypotheses in playbooks.md against session metrics.
193
+ * Compares baseline (before hypothesis) vs test (after hypothesis) periods.
194
+ *
195
+ * Decision rules (from Phase 4 spec):
196
+ * - 3+ test sessions AND metric improved by >10% -> promote
197
+ * - 3+ test sessions AND metric worsened by >10% -> reject
198
+ * - Otherwise -> continue (need more data)
199
+ *
200
+ * @param {string} playbooksPath - Path to playbooks.md
201
+ * @param {string} summariesPath - Path to summaries.ndjson
202
+ * @returns {Array<{ hypothesis: string, decision: 'promote'|'reject'|'continue', evidence: string, metrics: object }>}
203
+ */
204
+ function validateHypotheses(playbooksPath, summariesPath) {
205
+ const playbooks = parsePlaybooks(playbooksPath);
206
+ const summaries = loadSummaries(summariesPath);
207
+ const state = loadState();
208
+ const results = [];
209
+
210
+ // Filter for hypothesis/testing entries (status may contain extra text)
211
+ const hypotheses = playbooks.filter(p =>
212
+ p.status.includes('hypothesis') || p.status.includes('testing')
213
+ );
214
+
215
+ const currentSessionCount = summaries.length;
216
+
217
+ for (const hyp of hypotheses) {
218
+ // Initialize tracking if new hypothesis
219
+ if (!state.hypotheses[hyp.name]) {
220
+ state.hypotheses[hyp.name] = {
221
+ firstSeenAt: currentSessionCount,
222
+ sessionCountAtStart: currentSessionCount,
223
+ };
224
+ }
225
+
226
+ const tracking = state.hypotheses[hyp.name];
227
+ const target = extractMetricTarget(hyp.section);
228
+
229
+ // Split sessions into baseline (before hypothesis) and test (after)
230
+ const baselineSessions = summaries.slice(0, tracking.firstSeenAt);
231
+ const testSessions = summaries.slice(tracking.firstSeenAt);
232
+
233
+ const baselineMetric = computeMetric(baselineSessions, target);
234
+ const testMetric = computeMetric(testSessions, target);
235
+
236
+ const change = calculateChange(baselineMetric.value, testMetric.value, target.metric);
237
+
238
+ // Build evidence string
239
+ const evidenceParts = [];
240
+ evidenceParts.push(`Target: ${target.target} (${target.metric})`);
241
+ evidenceParts.push(`Baseline: ${baselineMetric.value?.toFixed(3) ?? 'N/A'} (${baselineMetric.sampleSize} samples)`);
242
+ evidenceParts.push(`Test: ${testMetric.value?.toFixed(3) ?? 'N/A'} (${testMetric.sampleSize} samples)`);
243
+ if (change != null) {
244
+ const changePercent = (change * 100).toFixed(1);
245
+ evidenceParts.push(`Change: ${change > 0 ? '+' : ''}${changePercent}%`);
246
+ }
247
+ evidenceParts.push(`Test sessions: ${testSessions.length}`);
248
+
249
+ // Decision logic
250
+ let decision = 'continue';
251
+ let reason = '';
252
+
253
+ if (testSessions.length < MIN_TEST_SESSIONS) {
254
+ reason = `Need ${MIN_TEST_SESSIONS}+ test sessions, have ${testSessions.length}`;
255
+ } else if (change == null) {
256
+ reason = 'Insufficient metric data for comparison';
257
+ } else if (change >= IMPROVEMENT_THRESHOLD) {
258
+ decision = 'promote';
259
+ reason = `Improved by ${(change * 100).toFixed(1)}% (>${IMPROVEMENT_THRESHOLD * 100}% threshold)`;
260
+ } else if (change <= -IMPROVEMENT_THRESHOLD) {
261
+ decision = 'reject';
262
+ reason = `Worsened by ${(-change * 100).toFixed(1)}% (>${IMPROVEMENT_THRESHOLD * 100}% threshold)`;
263
+ } else {
264
+ reason = `Change of ${(change * 100).toFixed(1)}% within neutral zone (±${IMPROVEMENT_THRESHOLD * 100}%)`;
265
+ }
266
+
267
+ results.push({
268
+ hypothesis: hyp.name,
269
+ status: hyp.status,
270
+ decision,
271
+ evidence: evidenceParts.join(' | ') + ` | ${reason}`,
272
+ metrics: {
273
+ target,
274
+ baseline: baselineMetric,
275
+ test: testMetric,
276
+ change,
277
+ testSessionCount: testSessions.length,
278
+ },
279
+ });
280
+ }
281
+
282
+ // Save updated state
283
+ saveState(state);
284
+
285
+ return results;
286
+ }
287
+
288
+ /**
289
+ * Get a summary of hypothesis validation status.
290
+ * @param {Array} results - Output from validateHypotheses
291
+ * @returns {object} { total, promote, reject, continue, summary: string }
292
+ */
293
+ function summarizeResults(results) {
294
+ const counts = {
295
+ total: results.length,
296
+ promote: results.filter(r => r.decision === 'promote').length,
297
+ reject: results.filter(r => r.decision === 'reject').length,
298
+ continue: results.filter(r => r.decision === 'continue').length,
299
+ };
300
+
301
+ const lines = [
302
+ `Hypothesis validation: ${counts.total} total`,
303
+ ` Promote: ${counts.promote}`,
304
+ ` Reject: ${counts.reject}`,
305
+ ` Continue: ${counts.continue}`,
306
+ ];
307
+
308
+ for (const r of results) {
309
+ lines.push(` - [${r.decision.toUpperCase()}] ${r.hypothesis}`);
310
+ }
311
+
312
+ return { ...counts, summary: lines.join('\n') };
313
+ }
314
+
315
+ // CLI mode
316
+ if (require.main === module) {
317
+ const playbooksPath = process.argv[2] || path.join(__dirname, '..', 'orchestrator', 'playbooks.md');
318
+ const summariesPath = process.argv[3] || SUMMARIES_PATH;
319
+
320
+ console.log('Validating hypotheses...');
321
+ console.log(' Playbooks:', playbooksPath);
322
+ console.log(' Summaries:', summariesPath);
323
+ console.log('');
324
+
325
+ const results = validateHypotheses(playbooksPath, summariesPath);
326
+ const summary = summarizeResults(results);
327
+
328
+ console.log(summary.summary);
329
+ console.log('');
330
+ console.log('Details:');
331
+ for (const r of results) {
332
+ console.log(`\n${r.hypothesis}:`);
333
+ console.log(` Decision: ${r.decision}`);
334
+ console.log(` Evidence: ${r.evidence}`);
335
+ }
336
+ }
337
+
338
+ module.exports = {
339
+ validateHypotheses,
340
+ summarizeResults,
341
+ extractMetricTarget,
342
+ loadSummaries,
343
+ computeMetric,
344
+ calculateChange,
345
+ STATE_PATH,
346
+ };