@pugi/cli 0.1.0-beta.5 → 0.1.0-beta.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/THIRD_PARTY_NOTICES.md +40 -0
- package/assets/pugi-mascot.ansi +15 -25
- package/assets/pugi-prozr2-mascot.ansi +9 -0
- package/bin/run.js +33 -1
- package/dist/commands/jobs-watch.js +201 -0
- package/dist/commands/jobs.js +15 -0
- package/dist/commands/smoke.js +133 -0
- package/dist/core/agent-progress/cleanup.js +134 -0
- package/dist/core/agent-progress/schema.js +144 -0
- package/dist/core/agent-progress/writer.js +101 -0
- package/dist/core/artifact-chain/dispatcher.js +148 -0
- package/dist/core/artifact-chain/exporter.js +164 -0
- package/dist/core/artifact-chain/state.js +243 -0
- package/dist/core/artifact-chain/steps.js +169 -0
- package/dist/core/auth/ensure-authenticated.js +129 -0
- package/dist/core/auth/env-provider.js +238 -0
- package/dist/core/auto-update/channels.js +122 -0
- package/dist/core/auto-update/checker.js +241 -0
- package/dist/core/auto-update/state.js +235 -0
- package/dist/core/bare-mode/index.js +107 -0
- package/dist/core/bash-classifier.js +400 -4
- package/dist/core/checkpoint/resumer.js +149 -0
- package/dist/core/checkpoint/rewinder.js +291 -0
- package/dist/core/codegraph/decision-store.js +248 -0
- package/dist/core/codegraph/detect-repo.js +459 -0
- package/dist/core/codegraph/install.js +134 -0
- package/dist/core/codegraph/offer-hook.js +220 -0
- package/dist/core/compact/auto-trigger.js +96 -0
- package/dist/core/compact/buffer-rewriter.js +115 -0
- package/dist/core/compact/summarizer.js +208 -0
- package/dist/core/compact/token-counter.js +108 -0
- package/dist/core/consensus/diff-capture.js +112 -3
- package/dist/core/context/index.js +7 -0
- package/dist/core/context/markdown-traverse.js +255 -0
- package/dist/core/cost/rate-card.js +129 -0
- package/dist/core/cost/tracker.js +221 -0
- package/dist/core/denial-tracking/index.js +8 -0
- package/dist/core/denial-tracking/state.js +264 -0
- package/dist/core/diagnostics/probe-runner.js +93 -0
- package/dist/core/diagnostics/probes/api.js +46 -0
- package/dist/core/diagnostics/probes/auth.js +86 -0
- package/dist/core/diagnostics/probes/bare-mode.js +42 -0
- package/dist/core/diagnostics/probes/cli-version.js +127 -0
- package/dist/core/diagnostics/probes/config.js +72 -0
- package/dist/core/diagnostics/probes/denial-tracking.js +57 -0
- package/dist/core/diagnostics/probes/disk.js +81 -0
- package/dist/core/diagnostics/probes/git.js +65 -0
- package/dist/core/diagnostics/probes/hooks.js +118 -0
- package/dist/core/diagnostics/probes/mcp.js +75 -0
- package/dist/core/diagnostics/probes/node.js +59 -0
- package/dist/core/diagnostics/probes/pnpm.js +36 -0
- package/dist/core/diagnostics/probes/pugi-md.js +89 -0
- package/dist/core/diagnostics/probes/sandbox.js +40 -0
- package/dist/core/diagnostics/probes/session.js +74 -0
- package/dist/core/diagnostics/probes/status-snapshot.js +488 -0
- package/dist/core/diagnostics/probes/workspace.js +63 -0
- package/dist/core/diagnostics/types.js +70 -0
- package/dist/core/dispatch/cache-cleanup.js +197 -0
- package/dist/core/dispatch/cache-handoff.js +295 -0
- package/dist/core/edits/dispatch.js +218 -2
- package/dist/core/edits/journal.js +199 -0
- package/dist/core/edits/layer-d-ast.js +557 -14
- package/dist/core/edits/verify-hook.js +273 -0
- package/dist/core/edits/worktree.js +322 -0
- package/dist/core/engine/anvil-client.js +115 -5
- package/dist/core/engine/auto-compact.js +179 -0
- package/dist/core/engine/budgets.js +155 -0
- package/dist/core/engine/context-prefix.js +155 -0
- package/dist/core/engine/intent.js +260 -0
- package/dist/core/engine/native-pugi.js +897 -211
- package/dist/core/engine/prompts.js +88 -2
- package/dist/core/engine/strip-internal-fields.js +124 -0
- package/dist/core/engine/tool-bridge.js +1045 -36
- package/dist/core/feedback/queue.js +177 -0
- package/dist/core/feedback/submitter.js +145 -0
- package/dist/core/file-cache.js +113 -1
- package/dist/core/hooks/events.js +44 -0
- package/dist/core/hooks/index.js +15 -0
- package/dist/core/hooks/registry.js +213 -0
- package/dist/core/hooks/runner.js +236 -0
- package/dist/core/hooks/v2/event-emitter.js +115 -0
- package/dist/core/hooks/v2/executor.js +282 -0
- package/dist/core/hooks/v2/index.js +25 -0
- package/dist/core/hooks/v2/lifecycle.js +104 -0
- package/dist/core/hooks/v2/loader.js +216 -0
- package/dist/core/hooks/v2/matcher.js +125 -0
- package/dist/core/hooks/v2/trust.js +143 -0
- package/dist/core/hooks/v2/types.js +86 -0
- package/dist/core/lsp/cache.js +105 -0
- package/dist/core/lsp/client.js +776 -0
- package/dist/core/lsp/language-detect.js +66 -0
- package/dist/core/lsp/post-edit-diagnostics.js +171 -0
- package/dist/core/mcp/client.js +75 -6
- package/dist/core/mcp/http-server.js +553 -0
- package/dist/core/mcp/orchestrator-tools.js +662 -0
- package/dist/core/mcp/permission.js +190 -0
- package/dist/core/mcp/registry.js +24 -2
- package/dist/core/mcp/server-tools.js +219 -0
- package/dist/core/mcp/server.js +397 -0
- package/dist/core/memory/dual-write.js +416 -0
- package/dist/core/memory/phase1-kinds.js +20 -0
- package/dist/core/memory-sync/queue.js +158 -0
- package/dist/core/onboarding/ensure-initialized.js +133 -0
- package/dist/core/onboarding/marker.js +111 -0
- package/dist/core/onboarding/telemetry-state.js +108 -0
- package/dist/core/output-style/presets.js +176 -0
- package/dist/core/output-style/state.js +185 -0
- package/dist/core/path-security.js +284 -2
- package/dist/core/permissions/auto-classifier.js +124 -0
- package/dist/core/permissions/circuit-breaker.js +83 -0
- package/dist/core/permissions/gate.js +278 -0
- package/dist/core/permissions/index.js +20 -0
- package/dist/core/permissions/mode.js +174 -0
- package/dist/core/permissions/state.js +241 -0
- package/dist/core/permissions/tool-class.js +93 -0
- package/dist/core/prd-check/parser.js +215 -0
- package/dist/core/prd-check/reporter.js +127 -0
- package/dist/core/prd-check/session-review.js +557 -0
- package/dist/core/prd-check/verifiers.js +223 -0
- package/dist/core/pugi-md/context-injector.js +76 -0
- package/dist/core/pugi-md/walk-up.js +207 -0
- package/dist/core/release-notes/parser.js +241 -0
- package/dist/core/release-notes/state.js +116 -0
- package/dist/core/repl/history.js +11 -1
- package/dist/core/repl/model-pricing.js +135 -0
- package/dist/core/repl/session.js +1897 -37
- package/dist/core/repl/slash-commands.js +430 -15
- package/dist/core/repl/store/session-store.js +31 -2
- package/dist/core/repl/workspace-context.js +22 -0
- package/dist/core/repo-map/build.js +125 -0
- package/dist/core/repo-map/cache.js +185 -0
- package/dist/core/repo-map/extractor.js +254 -0
- package/dist/core/repo-map/formatter.js +145 -0
- package/dist/core/repo-map/scanner.js +211 -0
- package/dist/core/retry-budget/budget.js +284 -0
- package/dist/core/retry-budget/index.js +5 -0
- package/dist/core/session.js +92 -0
- package/dist/core/settings.js +80 -0
- package/dist/core/share/formatter.js +271 -0
- package/dist/core/share/redactor.js +221 -0
- package/dist/core/share/uploader.js +267 -0
- package/dist/core/skills/defaults.js +457 -0
- package/dist/core/smoke/headless-driver.js +174 -0
- package/dist/core/smoke/orchestrator.js +194 -0
- package/dist/core/smoke/runner.js +238 -0
- package/dist/core/smoke/scenario-parser.js +316 -0
- package/dist/core/subagents/dispatcher-real.js +600 -0
- package/dist/core/subagents/dispatcher.js +113 -24
- package/dist/core/subagents/index.js +18 -5
- package/dist/core/subagents/isolation-matrix.js +213 -0
- package/dist/core/subagents/spawn.js +19 -4
- package/dist/core/telemetry/emitter.js +229 -0
- package/dist/core/telemetry/queue.js +251 -0
- package/dist/core/theme/context.js +91 -0
- package/dist/core/theme/presets.js +228 -0
- package/dist/core/theme/state.js +181 -0
- package/dist/core/todos/invariant.js +10 -0
- package/dist/core/todos/state.js +177 -0
- package/dist/core/transport/version-interceptor.js +166 -0
- package/dist/core/vim/keymap.js +288 -0
- package/dist/core/vim/state.js +92 -0
- package/dist/core/worktree-manager/cleanup.js +123 -0
- package/dist/core/worktree-manager/manager.js +303 -0
- package/dist/index.js +28 -0
- package/dist/runtime/bootstrap.js +190 -0
- package/dist/runtime/cli.js +3241 -343
- package/dist/runtime/commands/cancel.js +231 -0
- package/dist/runtime/commands/chain.js +489 -0
- package/dist/runtime/commands/codegraph-status.js +227 -0
- package/dist/runtime/commands/compact.js +297 -0
- package/dist/runtime/commands/cost.js +199 -0
- package/dist/runtime/commands/delegate.js +242 -11
- package/dist/runtime/commands/dispatch.js +126 -0
- package/dist/runtime/commands/doctor.js +412 -0
- package/dist/runtime/commands/feedback.js +184 -0
- package/dist/runtime/commands/hooks.js +184 -0
- package/dist/runtime/commands/lsp.js +368 -0
- package/dist/runtime/commands/mcp.js +879 -0
- package/dist/runtime/commands/memory.js +508 -0
- package/dist/runtime/commands/model.js +237 -0
- package/dist/runtime/commands/onboarding.js +275 -0
- package/dist/runtime/commands/patch.js +128 -0
- package/dist/runtime/commands/permissions.js +112 -0
- package/dist/runtime/commands/plan.js +143 -0
- package/dist/runtime/commands/prd-check.js +285 -0
- package/dist/runtime/commands/redo-blob-store.js +92 -0
- package/dist/runtime/commands/redo.js +361 -0
- package/dist/runtime/commands/release-notes.js +229 -0
- package/dist/runtime/commands/repo-map.js +95 -0
- package/dist/runtime/commands/report.js +299 -0
- package/dist/runtime/commands/resume.js +118 -0
- package/dist/runtime/commands/review-consensus.js +17 -2
- package/dist/runtime/commands/rewind.js +333 -0
- package/dist/runtime/commands/sessions.js +163 -0
- package/dist/runtime/commands/share.js +316 -0
- package/dist/runtime/commands/status.js +186 -0
- package/dist/runtime/commands/stickers.js +82 -0
- package/dist/runtime/commands/style.js +194 -0
- package/dist/runtime/commands/theme.js +196 -0
- package/dist/runtime/commands/undo.js +32 -0
- package/dist/runtime/commands/update.js +289 -0
- package/dist/runtime/commands/vim.js +140 -0
- package/dist/runtime/commands/worktree.js +177 -0
- package/dist/runtime/commands/worktrees.js +155 -0
- package/dist/runtime/headless-repl.js +195 -0
- package/dist/runtime/headless.js +543 -0
- package/dist/runtime/load-hooks-or-exit.js +71 -0
- package/dist/runtime/plan-decompose.js +531 -0
- package/dist/runtime/version.js +65 -0
- package/dist/tools/agent-tool.js +229 -0
- package/dist/tools/apply-patch.js +556 -0
- package/dist/tools/ask-user-question.js +213 -0
- package/dist/tools/ask-user.js +115 -0
- package/dist/tools/bash.js +203 -4
- package/dist/tools/file-tools.js +85 -14
- package/dist/tools/lsp-tools.js +189 -0
- package/dist/tools/mcp-tool.js +260 -0
- package/dist/tools/multi-edit.js +361 -0
- package/dist/tools/powershell.js +268 -0
- package/dist/tools/registry.js +51 -0
- package/dist/tools/skill-tool.js +96 -0
- package/dist/tools/tasks.js +208 -0
- package/dist/tools/todo-write.js +184 -0
- package/dist/tools/web-fetch.js +147 -2
- package/dist/tools/web-search.js +458 -0
- package/dist/tui/agent-progress-card.js +111 -0
- package/dist/tui/agent-tree.js +10 -0
- package/dist/tui/ask-modal.js +2 -2
- package/dist/tui/ask-user-question-prompt.js +192 -0
- package/dist/tui/compact-banner.js +81 -0
- package/dist/tui/conversation-pane.js +82 -8
- package/dist/tui/cost-table.js +111 -0
- package/dist/tui/doctor-table.js +46 -0
- package/dist/tui/feedback-prompt.js +156 -0
- package/dist/tui/input-box.js +218 -3
- package/dist/tui/markdown-render.js +4 -4
- package/dist/tui/onboarding-wizard.js +240 -0
- package/dist/tui/permissions-picker.js +86 -0
- package/dist/tui/render.js +35 -0
- package/dist/tui/repl-render.js +313 -35
- package/dist/tui/repl-splash-art.js +1 -1
- package/dist/tui/repl-splash-mascot.js +32 -8
- package/dist/tui/repl-splash.js +2 -2
- package/dist/tui/repl.js +85 -5
- package/dist/tui/splash.js +1 -1
- package/dist/tui/status-bar.js +94 -16
- package/dist/tui/status-table.js +7 -0
- package/dist/tui/stickers-art.js +136 -0
- package/dist/tui/style-table.js +28 -0
- package/dist/tui/theme-table.js +29 -0
- package/dist/tui/thinking-spinner.js +123 -0
- package/dist/tui/tool-stream-pane.js +52 -3
- package/dist/tui/update-banner.js +27 -2
- package/dist/tui/vim-input.js +267 -0
- package/dist/tui/welcome-banner.js +107 -0
- package/dist/tui/welcome-data.js +293 -0
- package/docs/examples/codegraph.mcp.json +10 -0
- package/package.json +13 -7
- package/test/scenarios/codegen-create-file.scenario.txt +13 -0
- package/test/scenarios/compact-force.scenario.txt +11 -0
- package/test/scenarios/identity.scenario.txt +11 -0
- package/test/scenarios/persona-handoff.scenario.txt +11 -0
- package/test/scenarios/walkback.scenario.txt +12 -0
- package/dist/core/engine/compaction-hook.js +0 -154
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smoke orchestrator — glues the scenario parser, the headless
|
|
3
|
+
* subprocess driver, and the runner into a single "load → run → report"
|
|
4
|
+
* pipeline. The CLI surface (`pugi smoke`) and the standalone script
|
|
5
|
+
* (`scripts/run-scenarios.ts`) both call into this module so the two
|
|
6
|
+
* entry points share one code path.
|
|
7
|
+
*
|
|
8
|
+
* Phase 1 boundary — `runSmoke` is responsible for:
|
|
9
|
+
*
|
|
10
|
+
* 1. Discovering scenario files under `scenariosDir` (glob match on
|
|
11
|
+
* `*.scenario.txt`).
|
|
12
|
+
* 2. Parsing each file via `parseScenario`. Parse errors are surfaced
|
|
13
|
+
* via the report but do not stop the run — every scenario gets a
|
|
14
|
+
* chance to fail with a clean diagnostic.
|
|
15
|
+
* 3. Driving each scenario through the headless executor (the
|
|
16
|
+
* executor is injected so tests can swap it for a deterministic
|
|
17
|
+
* stub; production wires `runHeadlessScenario` from
|
|
18
|
+
* `headless-driver.ts`).
|
|
19
|
+
* 4. Filtering by `--filter <pattern>` (compiles to fnmatch-lite).
|
|
20
|
+
* 5. Computing pass/fail/summary numbers.
|
|
21
|
+
*
|
|
22
|
+
* The orchestrator is intentionally synchronous (apart from the
|
|
23
|
+
* per-scenario `await`) — running scenarios in parallel is a Phase 2
|
|
24
|
+
* concern. The corpus is small and sequential output is easier to read.
|
|
25
|
+
*/
|
|
26
|
+
import { readdirSync, readFileSync, statSync } from 'node:fs';
|
|
27
|
+
import { resolve } from 'node:path';
|
|
28
|
+
import { parseScenario, } from './scenario-parser.js';
|
|
29
|
+
import { runScenario, } from './runner.js';
|
|
30
|
+
/**
|
|
31
|
+
* Top-level smoke entry. Returns the report so the CLI can pretty-print
|
|
32
|
+
* it AND set `process.exitCode` deterministically.
|
|
33
|
+
*/
|
|
34
|
+
export async function runSmoke(opts) {
|
|
35
|
+
const log = opts.log ?? noopLog;
|
|
36
|
+
const now = opts.now ?? Date.now;
|
|
37
|
+
const allScenarios = loadScenariosFromDir(opts.scenariosDir);
|
|
38
|
+
const visible = opts.filter && opts.filter.length > 0
|
|
39
|
+
? filterByPattern(allScenarios, opts.filter)
|
|
40
|
+
: allScenarios;
|
|
41
|
+
const results = [];
|
|
42
|
+
let passed = 0;
|
|
43
|
+
let failed = 0;
|
|
44
|
+
for (const item of visible) {
|
|
45
|
+
log(`pugi smoke: running ${item.scenario.id}`);
|
|
46
|
+
if (item.parseErrors.length > 0) {
|
|
47
|
+
results.push({
|
|
48
|
+
id: item.scenario.id,
|
|
49
|
+
filePath: item.scenario.filePath,
|
|
50
|
+
status: 'parse-error',
|
|
51
|
+
durationMs: 0,
|
|
52
|
+
assertionCount: 0,
|
|
53
|
+
failures: [],
|
|
54
|
+
parseErrors: item.parseErrors,
|
|
55
|
+
});
|
|
56
|
+
failed += 1;
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
let envelopes = [];
|
|
60
|
+
let workspaceRoot = '.';
|
|
61
|
+
try {
|
|
62
|
+
const out = await opts.executor(item.scenario);
|
|
63
|
+
envelopes = out.envelopes;
|
|
64
|
+
workspaceRoot = out.workspaceRoot;
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
68
|
+
results.push({
|
|
69
|
+
id: item.scenario.id,
|
|
70
|
+
filePath: item.scenario.filePath,
|
|
71
|
+
status: 'executor-error',
|
|
72
|
+
durationMs: 0,
|
|
73
|
+
assertionCount: 0,
|
|
74
|
+
failures: [],
|
|
75
|
+
executorError: message,
|
|
76
|
+
});
|
|
77
|
+
failed += 1;
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
const result = runScenario({
|
|
81
|
+
scenario: item.scenario,
|
|
82
|
+
envelopes,
|
|
83
|
+
workspaceRoot,
|
|
84
|
+
now,
|
|
85
|
+
});
|
|
86
|
+
results.push({
|
|
87
|
+
id: result.id,
|
|
88
|
+
filePath: item.scenario.filePath,
|
|
89
|
+
status: result.passed ? 'passed' : 'failed',
|
|
90
|
+
durationMs: result.durationMs,
|
|
91
|
+
assertionCount: result.assertionCount,
|
|
92
|
+
failures: result.failures,
|
|
93
|
+
});
|
|
94
|
+
if (result.passed)
|
|
95
|
+
passed += 1;
|
|
96
|
+
else
|
|
97
|
+
failed += 1;
|
|
98
|
+
}
|
|
99
|
+
const total = visible.length;
|
|
100
|
+
const skipped = allScenarios.length - visible.length;
|
|
101
|
+
const exitCode = failed === 0 ? 0 : 1;
|
|
102
|
+
return { total, passed, failed, skipped, results, exitCode };
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Walk `dir` for `*.scenario.txt` files (non-recursive). Returns each
|
|
106
|
+
* file's parsed scenario + collected parse errors so the orchestrator
|
|
107
|
+
* can surface malformed files as failed runs rather than skipping them.
|
|
108
|
+
*/
|
|
109
|
+
export function loadScenariosFromDir(dir) {
|
|
110
|
+
let names = [];
|
|
111
|
+
try {
|
|
112
|
+
names = readdirSync(dir);
|
|
113
|
+
}
|
|
114
|
+
catch {
|
|
115
|
+
return [];
|
|
116
|
+
}
|
|
117
|
+
const out = [];
|
|
118
|
+
for (const name of names) {
|
|
119
|
+
if (!name.endsWith('.scenario.txt'))
|
|
120
|
+
continue;
|
|
121
|
+
const filePath = resolve(dir, name);
|
|
122
|
+
let stat;
|
|
123
|
+
try {
|
|
124
|
+
stat = statSync(filePath);
|
|
125
|
+
}
|
|
126
|
+
catch {
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
if (!stat.isFile())
|
|
130
|
+
continue;
|
|
131
|
+
const body = readFileSync(filePath, 'utf8');
|
|
132
|
+
const parsed = parseScenario(filePath, body);
|
|
133
|
+
if (parsed.scenario) {
|
|
134
|
+
out.push({ scenario: parsed.scenario, parseErrors: parsed.errors });
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
// Stable sort by id so report output is deterministic across
|
|
138
|
+
// filesystems with different readdir order.
|
|
139
|
+
out.sort((a, b) => a.scenario.id.localeCompare(b.scenario.id));
|
|
140
|
+
return out;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Render a SmokeReport into a multi-line human-readable string. Kept
|
|
144
|
+
* separate from `runSmoke` so the CLI can pick its own format (text vs
|
|
145
|
+
* JSON). The default text format mirrors `node:test`'s tap-lite output:
|
|
146
|
+
*
|
|
147
|
+
* ok 1 - identity (12ms)
|
|
148
|
+
* not ok 2 - codegen-create-file (8ms)
|
|
149
|
+
* line 5: EXPECT failed — no envelope matched ...
|
|
150
|
+
*
|
|
151
|
+
* pugi smoke: 1 passed, 1 failed
|
|
152
|
+
*/
|
|
153
|
+
export function renderReportText(report) {
|
|
154
|
+
const lines = [];
|
|
155
|
+
for (let i = 0; i < report.results.length; i += 1) {
|
|
156
|
+
const r = report.results[i];
|
|
157
|
+
if (!r)
|
|
158
|
+
continue;
|
|
159
|
+
const ordinal = i + 1;
|
|
160
|
+
if (r.status === 'passed') {
|
|
161
|
+
lines.push(`ok ${ordinal} - ${r.id} (${r.durationMs}ms)`);
|
|
162
|
+
}
|
|
163
|
+
else if (r.status === 'failed') {
|
|
164
|
+
lines.push(`not ok ${ordinal} - ${r.id} (${r.durationMs}ms)`);
|
|
165
|
+
for (const f of r.failures) {
|
|
166
|
+
lines.push(` line ${f.line}: ${f.message}`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
else if (r.status === 'parse-error') {
|
|
170
|
+
lines.push(`not ok ${ordinal} - ${r.id} (parse error)`);
|
|
171
|
+
for (const e of r.parseErrors ?? []) {
|
|
172
|
+
lines.push(` line ${e.line}: ${e.message}`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
else if (r.status === 'executor-error') {
|
|
176
|
+
lines.push(`not ok ${ordinal} - ${r.id} (executor error)`);
|
|
177
|
+
lines.push(` ${r.executorError ?? 'unknown executor failure'}`);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
lines.push('');
|
|
181
|
+
const skippedSuffix = report.skipped > 0 ? `, ${report.skipped} skipped` : '';
|
|
182
|
+
lines.push(`pugi smoke: ${report.passed} passed, ${report.failed} failed${skippedSuffix}`);
|
|
183
|
+
return lines.join('\n');
|
|
184
|
+
}
|
|
185
|
+
function filterByPattern(scenarios, pattern) {
|
|
186
|
+
if (!pattern.includes('*')) {
|
|
187
|
+
return scenarios.filter((s) => s.scenario.id.includes(pattern));
|
|
188
|
+
}
|
|
189
|
+
const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&');
|
|
190
|
+
const re = new RegExp(`^${escaped.replace(/\*/g, '.*')}$`);
|
|
191
|
+
return scenarios.filter((s) => re.test(s.scenario.id));
|
|
192
|
+
}
|
|
193
|
+
function noopLog(_line) { }
|
|
194
|
+
//# sourceMappingURL=orchestrator.js.map
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scenario runner for the Pugi MCP test harness (BIG TRACK 10 Phase 1,
|
|
3
|
+
* 2026-05-27).
|
|
4
|
+
*
|
|
5
|
+
* Drives a parsed scenario against a stream of `HeadlessEnvelope`
|
|
6
|
+
* objects (the same shape `pugi --headless` emits on stdout) and a
|
|
7
|
+
* filesystem checker for `EXPECT_FILE` directives. The runner is
|
|
8
|
+
* deliberately decoupled from the subprocess spawn so the spec file
|
|
9
|
+
* can inject deterministic envelope arrays without spawning a real
|
|
10
|
+
* `pugi` binary — that strategy keeps the test cycle under 200ms while
|
|
11
|
+
* still exercising the matching semantics every CI run depends on.
|
|
12
|
+
*
|
|
13
|
+
* Matching semantics (the rules the corpus authors care about):
|
|
14
|
+
*
|
|
15
|
+
* - `EXPECT:` after a `>` user-input line scans envelopes that
|
|
16
|
+
* arrived AFTER that user-input. The cursor resets on each new
|
|
17
|
+
* `>`. If no envelope satisfies the pattern, the assertion fails.
|
|
18
|
+
*
|
|
19
|
+
* - `EXPECT_NOT:` runs the inverse — passes if NO envelope in the
|
|
20
|
+
* post-`>` window satisfies the pattern. A negative assertion that
|
|
21
|
+
* fires on every input line gives the operator a clean signal when
|
|
22
|
+
* a forbidden phrase ("Mira") shows up.
|
|
23
|
+
*
|
|
24
|
+
* - `EXPECT_FILE:` runs once at the END of the scenario, against the
|
|
25
|
+
* final filesystem snapshot. The runner does not race the
|
|
26
|
+
* subprocess — by the time we evaluate file assertions the
|
|
27
|
+
* headless process has exited (or been terminated).
|
|
28
|
+
*
|
|
29
|
+
* Result shape mirrors `node:test` style: top-level pass/fail plus an
|
|
30
|
+
* array of per-assertion records so the CLI can print a grouped
|
|
31
|
+
* summary. Each failure carries the originating line number so the
|
|
32
|
+
* operator can jump straight to the scenario source.
|
|
33
|
+
*/
|
|
34
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
35
|
+
import { resolve } from 'node:path';
|
|
36
|
+
/**
|
|
37
|
+
* Run the assertions in `scenario` against the given envelope stream
|
|
38
|
+
* and filesystem snapshot. Pure function — no I/O outside the filesystem
|
|
39
|
+
* stat that `EXPECT_FILE` performs, and even that is gated by a step
|
|
40
|
+
* actually existing.
|
|
41
|
+
*/
|
|
42
|
+
export function runScenario(inputs) {
|
|
43
|
+
const now = inputs.now ?? Date.now;
|
|
44
|
+
const startedAt = now();
|
|
45
|
+
const failures = [];
|
|
46
|
+
let assertionCount = 0;
|
|
47
|
+
// Group steps into runs anchored by `>` user-input lines. Each run
|
|
48
|
+
// owns the EXPECT/EXPECT_NOT assertions that follow it until the
|
|
49
|
+
// next `>`. EXPECT_FILE is collected globally and evaluated after
|
|
50
|
+
// every user-input run is processed.
|
|
51
|
+
const runs = [];
|
|
52
|
+
const fileChecks = [];
|
|
53
|
+
for (const step of inputs.scenario.steps) {
|
|
54
|
+
if (step.kind === 'user-input') {
|
|
55
|
+
runs.push({ userStep: step, expects: [] });
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
if (step.kind === 'expect') {
|
|
59
|
+
// Assertions that appear BEFORE any `>` attach to a synthetic
|
|
60
|
+
// pre-run so the matching pass still sees them. Rare in practice,
|
|
61
|
+
// but the parser allows it and the runner should not silently
|
|
62
|
+
// drop them.
|
|
63
|
+
if (runs.length === 0)
|
|
64
|
+
runs.push({ userStep: null, expects: [] });
|
|
65
|
+
const current = runs[runs.length - 1];
|
|
66
|
+
if (current)
|
|
67
|
+
current.expects.push(step);
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
if (step.kind === 'expect-file') {
|
|
71
|
+
fileChecks.push(step);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Build a per-run envelope window. We walk the envelope stream once,
|
|
75
|
+
// assigning each envelope to the most recent user-turn we have seen.
|
|
76
|
+
// The first `user-turn` envelope after a `>` is the marker for that
|
|
77
|
+
// run; assertions match within the slice up to (but not including)
|
|
78
|
+
// the NEXT `user-turn` envelope.
|
|
79
|
+
const userTurnIndices = [];
|
|
80
|
+
for (let i = 0; i < inputs.envelopes.length; i += 1) {
|
|
81
|
+
const env = inputs.envelopes[i];
|
|
82
|
+
if (env && env.kind === 'user-turn')
|
|
83
|
+
userTurnIndices.push(i);
|
|
84
|
+
}
|
|
85
|
+
for (let runIdx = 0; runIdx < runs.length; runIdx += 1) {
|
|
86
|
+
const run = runs[runIdx];
|
|
87
|
+
if (!run)
|
|
88
|
+
continue;
|
|
89
|
+
let start = 0;
|
|
90
|
+
let end = inputs.envelopes.length;
|
|
91
|
+
if (run.userStep && userTurnIndices[runIdx] !== undefined) {
|
|
92
|
+
start = (userTurnIndices[runIdx] ?? 0) + 1;
|
|
93
|
+
const nextUserTurn = userTurnIndices[runIdx + 1];
|
|
94
|
+
end = nextUserTurn ?? inputs.envelopes.length;
|
|
95
|
+
}
|
|
96
|
+
const window = inputs.envelopes.slice(start, end);
|
|
97
|
+
for (const expectation of run.expects) {
|
|
98
|
+
assertionCount += 1;
|
|
99
|
+
const matched = window.some((env) => matchesEnvelope(env, expectation.pattern));
|
|
100
|
+
if (expectation.polarity === 'positive' && !matched) {
|
|
101
|
+
failures.push({
|
|
102
|
+
line: expectation.line,
|
|
103
|
+
message: `EXPECT failed — no envelope matched ${describePattern(expectation.pattern)}`,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
else if (expectation.polarity === 'negative' && matched) {
|
|
107
|
+
failures.push({
|
|
108
|
+
line: expectation.line,
|
|
109
|
+
message: `EXPECT_NOT failed — envelope matched ${describePattern(expectation.pattern)}`,
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
for (const check of fileChecks) {
|
|
115
|
+
assertionCount += 1;
|
|
116
|
+
const absolute = resolve(inputs.workspaceRoot, check.file);
|
|
117
|
+
if (!existsSync(absolute)) {
|
|
118
|
+
failures.push({
|
|
119
|
+
line: check.line,
|
|
120
|
+
message: `EXPECT_FILE failed — ${check.file} does not exist`,
|
|
121
|
+
});
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
if (check.content !== undefined) {
|
|
125
|
+
const body = readFileSync(absolute, 'utf8');
|
|
126
|
+
if (!body.includes(check.content)) {
|
|
127
|
+
failures.push({
|
|
128
|
+
line: check.line,
|
|
129
|
+
message: `EXPECT_FILE failed — ${check.file} does not contain "${check.content}"`,
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
const durationMs = Math.max(0, now() - startedAt);
|
|
135
|
+
return {
|
|
136
|
+
id: inputs.scenario.id,
|
|
137
|
+
passed: failures.length === 0,
|
|
138
|
+
failures,
|
|
139
|
+
durationMs,
|
|
140
|
+
assertionCount,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Filter parsed scenarios by a simple glob-ish substring matcher.
|
|
145
|
+
* `*` matches any run of characters; otherwise we fall back to plain
|
|
146
|
+
* substring containment so `pugi smoke --filter identity` works as
|
|
147
|
+
* the operator expects. The matcher is intentionally NOT a full RegExp
|
|
148
|
+
* (no anchors, no character classes) because scenarios are addressed
|
|
149
|
+
* by short ids — a `--filter "id*"` form is the maximum complexity
|
|
150
|
+
* the corpus needs.
|
|
151
|
+
*/
|
|
152
|
+
export function filterScenarios(scenarios, pattern) {
|
|
153
|
+
if (!pattern || pattern.length === 0)
|
|
154
|
+
return scenarios;
|
|
155
|
+
const matcher = compileFilterPattern(pattern);
|
|
156
|
+
return scenarios.filter((s) => matcher(s.id));
|
|
157
|
+
}
|
|
158
|
+
function compileFilterPattern(pattern) {
|
|
159
|
+
if (!pattern.includes('*')) {
|
|
160
|
+
return (id) => id.includes(pattern);
|
|
161
|
+
}
|
|
162
|
+
// Escape RegExp metacharacters except `*`, then translate `*` to
|
|
163
|
+
// `.*`. This is the dropbox/glob "fnmatch lite" approach — predictable
|
|
164
|
+
// and small.
|
|
165
|
+
const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&');
|
|
166
|
+
const re = new RegExp(`^${escaped.replace(/\*/g, '.*')}$`);
|
|
167
|
+
return (id) => re.test(id);
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Decide whether a single envelope satisfies a pattern. Exported for
|
|
171
|
+
* tests that want to probe the matching logic without building a full
|
|
172
|
+
* scenario object.
|
|
173
|
+
*/
|
|
174
|
+
export function matchesEnvelope(env, pattern) {
|
|
175
|
+
if (pattern.kind === 'persona-turn-contains') {
|
|
176
|
+
if (env.kind !== 'persona-turn')
|
|
177
|
+
return false;
|
|
178
|
+
return pattern.substrings.some((s) => env.body.includes(s));
|
|
179
|
+
}
|
|
180
|
+
if (pattern.kind === 'tool-call') {
|
|
181
|
+
if (env.kind !== 'tool-call')
|
|
182
|
+
return false;
|
|
183
|
+
// The body is JSON. Tool calls that don't parse as JSON cannot
|
|
184
|
+
// match — surface a clean fail instead of crashing.
|
|
185
|
+
let parsed;
|
|
186
|
+
try {
|
|
187
|
+
parsed = JSON.parse(env.body);
|
|
188
|
+
}
|
|
189
|
+
catch {
|
|
190
|
+
return false;
|
|
191
|
+
}
|
|
192
|
+
if (!isRecord(parsed))
|
|
193
|
+
return false;
|
|
194
|
+
if (pattern.tool !== undefined && parsed['tool'] !== pattern.tool) {
|
|
195
|
+
return false;
|
|
196
|
+
}
|
|
197
|
+
if (pattern.argsSubset !== undefined) {
|
|
198
|
+
const args = parsed['args'];
|
|
199
|
+
if (!isRecord(args))
|
|
200
|
+
return false;
|
|
201
|
+
for (const [k, v] of Object.entries(pattern.argsSubset)) {
|
|
202
|
+
if (String(args[k]) !== v)
|
|
203
|
+
return false;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return true;
|
|
207
|
+
}
|
|
208
|
+
if (pattern.kind === 'envelope-kind') {
|
|
209
|
+
return env.kind === pattern.envelopeKind;
|
|
210
|
+
}
|
|
211
|
+
return false;
|
|
212
|
+
}
|
|
213
|
+
function isRecord(value) {
|
|
214
|
+
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
215
|
+
}
|
|
216
|
+
function describePattern(pattern) {
|
|
217
|
+
if (pattern.kind === 'persona-turn-contains') {
|
|
218
|
+
return `persona-turn containing one of [${pattern.substrings
|
|
219
|
+
.map((s) => `"${s}"`)
|
|
220
|
+
.join(', ')}]`;
|
|
221
|
+
}
|
|
222
|
+
if (pattern.kind === 'tool-call') {
|
|
223
|
+
const parts = [];
|
|
224
|
+
if (pattern.tool)
|
|
225
|
+
parts.push(`tool=${pattern.tool}`);
|
|
226
|
+
if (pattern.argsSubset) {
|
|
227
|
+
for (const [k, v] of Object.entries(pattern.argsSubset)) {
|
|
228
|
+
parts.push(`${k}=${v}`);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return `tool-call ${parts.join(' ')}`.trim();
|
|
232
|
+
}
|
|
233
|
+
if (pattern.kind === 'envelope-kind') {
|
|
234
|
+
return `envelope kind=${pattern.envelopeKind}`;
|
|
235
|
+
}
|
|
236
|
+
return 'unknown pattern';
|
|
237
|
+
}
|
|
238
|
+
//# sourceMappingURL=runner.js.map
|