@pugi/cli 0.1.0-beta.99 → 1.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +11 -191
- package/bin/pugi +8 -0
- package/package.json +15 -71
- package/postinstall.mjs +31 -0
- package/CHANGELOG.md +0 -132
- package/THIRD_PARTY_NOTICES.md +0 -40
- package/assets/pugi-mascot.ansi +0 -16
- package/assets/pugi-prozr2-mascot.ansi +0 -9
- package/bin/run.js +0 -34
- package/dist/commands/deploy.js +0 -439
- package/dist/commands/flatten.js +0 -191
- package/dist/commands/jobs-watch.js +0 -201
- package/dist/commands/jobs.js +0 -260
- package/dist/commands/retro.js +0 -210
- package/dist/commands/smoke.js +0 -133
- package/dist/core/agent-progress/cleanup.js +0 -134
- package/dist/core/agent-progress/schema.js +0 -144
- package/dist/core/agent-progress/writer.js +0 -101
- package/dist/core/agents/adaptive-router.js +0 -330
- package/dist/core/agents/loader.js +0 -104
- package/dist/core/agents/query-decomposer.js +0 -297
- package/dist/core/agents/registry.js +0 -69
- package/dist/core/approvals/shortcut-resolver.js +0 -98
- package/dist/core/artifact-chain/dispatcher.js +0 -148
- package/dist/core/artifact-chain/exporter.js +0 -164
- package/dist/core/artifact-chain/state.js +0 -243
- package/dist/core/artifact-chain/steps.js +0 -169
- package/dist/core/ask-user/question.js +0 -92
- package/dist/core/audit/audit-trail.js +0 -275
- package/dist/core/auth/ensure-authenticated.js +0 -129
- package/dist/core/auth/env-provider.js +0 -238
- package/dist/core/auto-open-browser.js +0 -128
- package/dist/core/auto-update/channels.js +0 -122
- package/dist/core/auto-update/checker.js +0 -241
- package/dist/core/auto-update/state.js +0 -235
- package/dist/core/bare-mode/index.js +0 -107
- package/dist/core/bash/redirect.js +0 -281
- package/dist/core/bash-classifier.js +0 -1397
- package/dist/core/checkpoint/resumer.js +0 -149
- package/dist/core/checkpoint/rewinder.js +0 -291
- package/dist/core/checkpoints/shadow-git.js +0 -670
- package/dist/core/citations/parser.js +0 -109
- package/dist/core/classifier/yolo-classifier.js +0 -88
- package/dist/core/clipboard.js +0 -70
- package/dist/core/codegraph/decision-store.js +0 -248
- package/dist/core/codegraph/detect-repo.js +0 -459
- package/dist/core/codegraph/install.js +0 -134
- package/dist/core/codegraph/offer-hook.js +0 -220
- package/dist/core/compact/auto-trigger.js +0 -96
- package/dist/core/compact/buffer-rewriter.js +0 -115
- package/dist/core/compact/summarizer.js +0 -208
- package/dist/core/compact/token-counter.js +0 -108
- package/dist/core/consensus/anvil-fanout.js +0 -276
- package/dist/core/consensus/diff-capture.js +0 -491
- package/dist/core/consensus/rubric.js +0 -233
- package/dist/core/context/builder.js +0 -114
- package/dist/core/context/compaction-events.js +0 -99
- package/dist/core/context/compaction.js +0 -602
- package/dist/core/context/index.js +0 -28
- package/dist/core/context/invariants.js +0 -250
- package/dist/core/context/markdown-loader.js +0 -288
- package/dist/core/context/markdown-traverse.js +0 -255
- package/dist/core/context/pugiignore.js +0 -316
- package/dist/core/context/repo-skeleton.js +0 -533
- package/dist/core/context/tool-eviction.js +0 -55
- package/dist/core/context/watcher.js +0 -342
- package/dist/core/context/working-set.js +0 -165
- package/dist/core/coordinator/agent-tools.js +0 -77
- package/dist/core/coordinator/agent-toolset.js +0 -65
- package/dist/core/coordinator/fsm.js +0 -73
- package/dist/core/coordinator/mode-fsm.js +0 -70
- package/dist/core/cost/rate-card.js +0 -129
- package/dist/core/cost/tracker.js +0 -221
- package/dist/core/credentials.js +0 -355
- package/dist/core/cron/scheduler.js +0 -138
- package/dist/core/denial-tracking/index.js +0 -8
- package/dist/core/denial-tracking/state.js +0 -264
- package/dist/core/diagnostics/probe-runner.js +0 -93
- package/dist/core/diagnostics/probes/api.js +0 -46
- package/dist/core/diagnostics/probes/auth.js +0 -93
- package/dist/core/diagnostics/probes/bare-mode.js +0 -42
- package/dist/core/diagnostics/probes/cli-version.js +0 -127
- package/dist/core/diagnostics/probes/config.js +0 -72
- package/dist/core/diagnostics/probes/denial-tracking.js +0 -57
- package/dist/core/diagnostics/probes/disk.js +0 -81
- package/dist/core/diagnostics/probes/engine-live.js +0 -46
- package/dist/core/diagnostics/probes/git.js +0 -65
- package/dist/core/diagnostics/probes/hooks.js +0 -118
- package/dist/core/diagnostics/probes/mcp.js +0 -75
- package/dist/core/diagnostics/probes/node.js +0 -59
- package/dist/core/diagnostics/probes/pnpm.js +0 -36
- package/dist/core/diagnostics/probes/pugi-md.js +0 -89
- package/dist/core/diagnostics/probes/sandbox.js +0 -72
- package/dist/core/diagnostics/probes/session.js +0 -74
- package/dist/core/diagnostics/probes/status-snapshot.js +0 -488
- package/dist/core/diagnostics/probes/workspace.js +0 -63
- package/dist/core/diagnostics/types.js +0 -70
- package/dist/core/dispatch/cache-cleanup.js +0 -197
- package/dist/core/dispatch/cache-handoff.js +0 -295
- package/dist/core/edits/apply-patch-layer-e.js +0 -189
- package/dist/core/edits/dispatch.js +0 -511
- package/dist/core/edits/format-detector.js +0 -260
- package/dist/core/edits/format-matrix.js +0 -26
- package/dist/core/edits/fuzzy-ladder.js +0 -650
- package/dist/core/edits/index.js +0 -19
- package/dist/core/edits/journal.js +0 -199
- package/dist/core/edits/layer-a-apply.js +0 -217
- package/dist/core/edits/layer-a-fuzzy-apply.js +0 -198
- package/dist/core/edits/layer-b-apply.js +0 -211
- package/dist/core/edits/layer-c-apply.js +0 -160
- package/dist/core/edits/layer-d-ast.js +0 -572
- package/dist/core/edits/marker-parser.js +0 -401
- package/dist/core/edits/security-gate.js +0 -223
- package/dist/core/edits/verify-hook.js +0 -273
- package/dist/core/edits/worktree.js +0 -322
- package/dist/core/engine/adapter-runner.js +0 -8
- package/dist/core/engine/anvil-client.js +0 -344
- package/dist/core/engine/auto-compact.js +0 -179
- package/dist/core/engine/budgets.js +0 -195
- package/dist/core/engine/context-prefix.js +0 -155
- package/dist/core/engine/index.js +0 -12
- package/dist/core/engine/intensity.js +0 -163
- package/dist/core/engine/intent.js +0 -260
- package/dist/core/engine/native-pugi.js +0 -1616
- package/dist/core/engine/noop.js +0 -27
- package/dist/core/engine/prompts.js +0 -236
- package/dist/core/engine/strip-internal-fields.js +0 -124
- package/dist/core/engine/tool-bridge.js +0 -2173
- package/dist/core/engine/verification-patterns.js +0 -195
- package/dist/core/evaluation/golden-dataset.js +0 -293
- package/dist/core/feedback/queue.js +0 -177
- package/dist/core/feedback/submitter.js +0 -145
- package/dist/core/file-cache.js +0 -141
- package/dist/core/flatten/flatten-repo.js +0 -439
- package/dist/core/format/osc8-link.js +0 -28
- package/dist/core/hook-chains.js +0 -392
- package/dist/core/hooks/citation-verify-hook.js +0 -138
- package/dist/core/hooks/citation-verify.js +0 -112
- package/dist/core/hooks/events.js +0 -46
- package/dist/core/hooks/index.js +0 -15
- package/dist/core/hooks/registry.js +0 -216
- package/dist/core/hooks/runner.js +0 -236
- package/dist/core/hooks/v2/event-emitter.js +0 -115
- package/dist/core/hooks/v2/executor.js +0 -282
- package/dist/core/hooks/v2/index.js +0 -25
- package/dist/core/hooks/v2/lifecycle.js +0 -104
- package/dist/core/hooks/v2/loader.js +0 -216
- package/dist/core/hooks/v2/matcher.js +0 -125
- package/dist/core/hooks/v2/trust.js +0 -143
- package/dist/core/hooks/v2/types.js +0 -86
- package/dist/core/hooks/worktree-events.js +0 -158
- package/dist/core/hooks.js +0 -415
- package/dist/core/image/renderer.js +0 -71
- package/dist/core/index-store.js +0 -260
- package/dist/core/init/detector.js +0 -582
- package/dist/core/init/template-renderer.js +0 -242
- package/dist/core/jobs/registry.js +0 -462
- package/dist/core/ledger/results-tsv.js +0 -142
- package/dist/core/log-discipline/stdout-redirect.js +0 -51
- package/dist/core/lsp/cache.js +0 -105
- package/dist/core/lsp/client.js +0 -1229
- package/dist/core/lsp/language-detect.js +0 -66
- package/dist/core/lsp/post-edit-diagnostics.js +0 -171
- package/dist/core/lsp/server-detect.js +0 -173
- package/dist/core/lsp/symbol-cache.js +0 -162
- package/dist/core/lsp/symbol-tools.js +0 -664
- package/dist/core/mcp/client.js +0 -385
- package/dist/core/mcp/http-server.js +0 -553
- package/dist/core/mcp/orchestrator-config.js +0 -192
- package/dist/core/mcp/orchestrator-tools.js +0 -806
- package/dist/core/mcp/permission.js +0 -190
- package/dist/core/mcp/registry.js +0 -193
- package/dist/core/mcp/server-tools.js +0 -219
- package/dist/core/mcp/server.js +0 -397
- package/dist/core/mcp/trust.js +0 -91
- package/dist/core/memory/dual-write.js +0 -416
- package/dist/core/memory/passive-extract.js +0 -130
- package/dist/core/memory/phase1-kinds.js +0 -20
- package/dist/core/memory/secret-scanner.js +0 -304
- package/dist/core/memory-sync/queue.js +0 -170
- package/dist/core/metrics/extract.js +0 -113
- package/dist/core/modes/roo-modes.js +0 -68
- package/dist/core/onboarding/ensure-initialized.js +0 -133
- package/dist/core/onboarding/marker.js +0 -111
- package/dist/core/onboarding/telemetry-state.js +0 -108
- package/dist/core/output-style/presets.js +0 -176
- package/dist/core/output-style/state.js +0 -185
- package/dist/core/path-security.js +0 -345
- package/dist/core/permission.js +0 -369
- package/dist/core/permissions/auto-classifier.js +0 -124
- package/dist/core/permissions/bash-parser.js +0 -371
- package/dist/core/permissions/circuit-breaker.js +0 -83
- package/dist/core/permissions/constrained-edit.js +0 -91
- package/dist/core/permissions/gate.js +0 -278
- package/dist/core/permissions/index.js +0 -20
- package/dist/core/permissions/mode.js +0 -174
- package/dist/core/permissions/network-egress.js +0 -137
- package/dist/core/permissions/state.js +0 -241
- package/dist/core/permissions/tool-class.js +0 -107
- package/dist/core/plan-mode/ui-state.js +0 -51
- package/dist/core/plans/plan-artifact.js +0 -721
- package/dist/core/policy-limits/etag-store.js +0 -122
- package/dist/core/prd-check/parser.js +0 -215
- package/dist/core/prd-check/reporter.js +0 -127
- package/dist/core/prd-check/session-review.js +0 -557
- package/dist/core/prd-check/verifiers.js +0 -223
- package/dist/core/prompt-cache/client-cache.js +0 -99
- package/dist/core/prompts/assembly.js +0 -29
- package/dist/core/prompts/registry.js +0 -364
- package/dist/core/pugi-gitignore.js +0 -52
- package/dist/core/pugi-md/cc-compat-rules.js +0 -735
- package/dist/core/pugi-md/context-injector.js +0 -76
- package/dist/core/pugi-md/walk-up.js +0 -207
- package/dist/core/python/uv-installer.js +0 -270
- package/dist/core/python/uv-resolver.js +0 -83
- package/dist/core/rate-limit/narrator.js +0 -146
- package/dist/core/recipes/cli-types.js +0 -20
- package/dist/core/recipes/loader.js +0 -103
- package/dist/core/recipes/runner.js +0 -345
- package/dist/core/recipes/schema.js +0 -587
- package/dist/core/release-notes/parser.js +0 -241
- package/dist/core/release-notes/state.js +0 -116
- package/dist/core/repl/ask.js +0 -512
- package/dist/core/repl/cancellation.js +0 -98
- package/dist/core/repl/cap-warning.js +0 -91
- package/dist/core/repl/clipboard-read.js +0 -174
- package/dist/core/repl/dispatch-fsm.js +0 -220
- package/dist/core/repl/engine-bridge.js +0 -303
- package/dist/core/repl/history-search.js +0 -175
- package/dist/core/repl/history.js +0 -182
- package/dist/core/repl/kill-ring.js +0 -138
- package/dist/core/repl/model-pricing.js +0 -135
- package/dist/core/repl/privacy-banner.js +0 -71
- package/dist/core/repl/session.js +0 -4962
- package/dist/core/repl/slash-commands.js +0 -747
- package/dist/core/repl/store/index.js +0 -12
- package/dist/core/repl/store/jsonl-log.js +0 -321
- package/dist/core/repl/store/lockfile.js +0 -155
- package/dist/core/repl/store/session-store.js +0 -821
- package/dist/core/repl/store/types.js +0 -44
- package/dist/core/repl/store/uuid-v7.js +0 -68
- package/dist/core/repl/tool-route.js +0 -382
- package/dist/core/repl/workspace-context.js +0 -206
- package/dist/core/repo-map/build.js +0 -125
- package/dist/core/repo-map/cache.js +0 -185
- package/dist/core/repo-map/extractor.js +0 -254
- package/dist/core/repo-map/formatter.js +0 -145
- package/dist/core/repo-map/page-rank.js +0 -105
- package/dist/core/repo-map/scanner.js +0 -211
- package/dist/core/retro/git-collector.js +0 -251
- package/dist/core/retro/health-card.js +0 -25
- package/dist/core/retro/metrics.js +0 -342
- package/dist/core/retro/narrative.js +0 -249
- package/dist/core/retro/plane-collector.js +0 -274
- package/dist/core/retro/pr-issue-link.js +0 -65
- package/dist/core/retro/types.js +0 -16
- package/dist/core/retry-budget/budget.js +0 -284
- package/dist/core/retry-budget/index.js +0 -5
- package/dist/core/retry-budget/retry-cap.js +0 -74
- package/dist/core/routing/lead-worker.js +0 -43
- package/dist/core/routing/pre-flight-estimator.js +0 -108
- package/dist/core/runs/run-tree.js +0 -103
- package/dist/core/sandboxing/adapter.js +0 -29
- package/dist/core/sandboxing/index.js +0 -49
- package/dist/core/sandboxing/none.js +0 -19
- package/dist/core/sandboxing/seatbelt.js +0 -183
- package/dist/core/security/injection-scanner.js +0 -367
- package/dist/core/security/output-filter.js +0 -418
- package/dist/core/session/env-file.js +0 -105
- package/dist/core/session/section-budgets.js +0 -140
- package/dist/core/session.js +0 -377
- package/dist/core/settings.js +0 -400
- package/dist/core/share/formatter.js +0 -271
- package/dist/core/share/redactor.js +0 -221
- package/dist/core/share/uploader.js +0 -267
- package/dist/core/skills/defaults.js +0 -457
- package/dist/core/skills/loader.js +0 -454
- package/dist/core/skills/sources.js +0 -480
- package/dist/core/skills/trust.js +0 -172
- package/dist/core/smoke/headless-driver.js +0 -174
- package/dist/core/smoke/orchestrator.js +0 -194
- package/dist/core/smoke/runner.js +0 -238
- package/dist/core/smoke/scenario-parser.js +0 -316
- package/dist/core/statusline.js +0 -99
- package/dist/core/subagents/dispatcher-real.js +0 -600
- package/dist/core/subagents/dispatcher.js +0 -352
- package/dist/core/subagents/index.js +0 -39
- package/dist/core/subagents/isolation-matrix.js +0 -213
- package/dist/core/subagents/spawn.js +0 -101
- package/dist/core/telemetry/emitter.js +0 -229
- package/dist/core/telemetry/queue.js +0 -251
- package/dist/core/theme/context.js +0 -91
- package/dist/core/theme/presets.js +0 -228
- package/dist/core/theme/state.js +0 -181
- package/dist/core/todos/invariant.js +0 -10
- package/dist/core/todos/state.js +0 -177
- package/dist/core/tool-schema/compressor.js +0 -89
- package/dist/core/transport/version-interceptor.js +0 -166
- package/dist/core/trust.js +0 -109
- package/dist/core/tui/thinking-block.js +0 -64
- package/dist/core/vim/keymap.js +0 -288
- package/dist/core/vim/state.js +0 -92
- package/dist/core/watch-markers/marker-watcher.js +0 -133
- package/dist/core/worktree/include-parser.js +0 -249
- package/dist/core/worktree-manager/cleanup.js +0 -123
- package/dist/core/worktree-manager/manager.js +0 -303
- package/dist/index.js +0 -44
- package/dist/runtime/bootstrap.js +0 -190
- package/dist/runtime/cli.js +0 -8121
- package/dist/runtime/commands/agents.js +0 -385
- package/dist/runtime/commands/budget.js +0 -192
- package/dist/runtime/commands/cancel.js +0 -231
- package/dist/runtime/commands/chain.js +0 -489
- package/dist/runtime/commands/codegraph-status.js +0 -227
- package/dist/runtime/commands/compact.js +0 -297
- package/dist/runtime/commands/config.js +0 -595
- package/dist/runtime/commands/cost.js +0 -199
- package/dist/runtime/commands/delegate.js +0 -312
- package/dist/runtime/commands/dispatch.js +0 -126
- package/dist/runtime/commands/doctor.js +0 -579
- package/dist/runtime/commands/feedback.js +0 -184
- package/dist/runtime/commands/hooks.js +0 -187
- package/dist/runtime/commands/init.js +0 -254
- package/dist/runtime/commands/lsp.js +0 -368
- package/dist/runtime/commands/mcp.js +0 -935
- package/dist/runtime/commands/memory.js +0 -582
- package/dist/runtime/commands/model.js +0 -237
- package/dist/runtime/commands/onboarding.js +0 -275
- package/dist/runtime/commands/patch.js +0 -128
- package/dist/runtime/commands/permissions.js +0 -112
- package/dist/runtime/commands/plan.js +0 -143
- package/dist/runtime/commands/prd-check.js +0 -285
- package/dist/runtime/commands/privacy.js +0 -107
- package/dist/runtime/commands/recipe.js +0 -325
- package/dist/runtime/commands/redo-blob-store.js +0 -92
- package/dist/runtime/commands/redo.js +0 -361
- package/dist/runtime/commands/release-notes.js +0 -229
- package/dist/runtime/commands/repo-map.js +0 -95
- package/dist/runtime/commands/report.js +0 -299
- package/dist/runtime/commands/resume.js +0 -118
- package/dist/runtime/commands/review-consensus.js +0 -414
- package/dist/runtime/commands/rewind.js +0 -333
- package/dist/runtime/commands/roster.js +0 -117
- package/dist/runtime/commands/sessions.js +0 -163
- package/dist/runtime/commands/share.js +0 -316
- package/dist/runtime/commands/skills.js +0 -401
- package/dist/runtime/commands/status.js +0 -186
- package/dist/runtime/commands/stickers.js +0 -82
- package/dist/runtime/commands/style.js +0 -194
- package/dist/runtime/commands/theme.js +0 -196
- package/dist/runtime/commands/undo.js +0 -361
- package/dist/runtime/commands/update.js +0 -289
- package/dist/runtime/commands/vim.js +0 -140
- package/dist/runtime/commands/worktree.js +0 -177
- package/dist/runtime/commands/worktrees.js +0 -155
- package/dist/runtime/deprecation-warning.js +0 -69
- package/dist/runtime/engine-exit-code.js +0 -50
- package/dist/runtime/headless-repl.js +0 -195
- package/dist/runtime/headless.js +0 -548
- package/dist/runtime/load-hooks-or-exit.js +0 -71
- package/dist/runtime/plan-decompose.js +0 -531
- package/dist/runtime/sigint-guard.js +0 -272
- package/dist/runtime/stream-renderer.js +0 -195
- package/dist/runtime/update-check.js +0 -294
- package/dist/runtime/version.js +0 -65
- package/dist/runtime/worktree-bootstrap.js +0 -579
- package/dist/skills/bundled/batch.js +0 -617
- package/dist/skills/bundled/index.js +0 -45
- package/dist/skills/bundled/loop.js +0 -358
- package/dist/skills/bundled/remember.js +0 -383
- package/dist/skills/bundled/simplify.js +0 -289
- package/dist/skills/bundled/skillify.js +0 -373
- package/dist/skills/bundled/stuck.js +0 -558
- package/dist/skills/bundled/verify.js +0 -439
- package/dist/testing/vcr.js +0 -486
- package/dist/tools/agent-tool.js +0 -229
- package/dist/tools/apply-patch.js +0 -556
- package/dist/tools/ask-user-question.js +0 -337
- package/dist/tools/ask-user.js +0 -115
- package/dist/tools/bash.js +0 -1238
- package/dist/tools/brief.js +0 -224
- package/dist/tools/cron.js +0 -433
- package/dist/tools/enter-worktree.js +0 -250
- package/dist/tools/exit-worktree.js +0 -147
- package/dist/tools/file-tools.js +0 -553
- package/dist/tools/http-request.js +0 -336
- package/dist/tools/lsp-tools.js +0 -565
- package/dist/tools/mcp-tool.js +0 -260
- package/dist/tools/multi-edit.js +0 -361
- package/dist/tools/powershell.js +0 -268
- package/dist/tools/registry.js +0 -166
- package/dist/tools/server-tools.js +0 -892
- package/dist/tools/skill-tool.js +0 -96
- package/dist/tools/sleep.js +0 -99
- package/dist/tools/synthetic-output.js +0 -133
- package/dist/tools/tasks.js +0 -208
- package/dist/tools/todo-write.js +0 -184
- package/dist/tools/verify-plan-execution.js +0 -295
- package/dist/tools/web-fetch-injection-scanner.js +0 -207
- package/dist/tools/web-fetch.js +0 -720
- package/dist/tools/web-search.js +0 -458
- package/dist/tui/agent-progress-card.js +0 -111
- package/dist/tui/agent-tree-pane.js +0 -9
- package/dist/tui/agent-tree.js +0 -87
- package/dist/tui/ask-cli.js +0 -52
- package/dist/tui/ask-modal.js +0 -211
- package/dist/tui/ask-user-question-chips.js +0 -315
- package/dist/tui/ask-user-question-prompt.js +0 -203
- package/dist/tui/compact-banner.js +0 -81
- package/dist/tui/conversation-pane.js +0 -164
- package/dist/tui/cost-table.js +0 -111
- package/dist/tui/device-flow.js +0 -142
- package/dist/tui/doctor-table.js +0 -46
- package/dist/tui/feedback-prompt.js +0 -156
- package/dist/tui/input-box.js +0 -732
- package/dist/tui/login-picker.js +0 -69
- package/dist/tui/markdown-render.js +0 -266
- package/dist/tui/multi-file-diff-approval.js +0 -375
- package/dist/tui/onboarding-wizard.js +0 -240
- package/dist/tui/permissions-picker.js +0 -86
- package/dist/tui/render.js +0 -160
- package/dist/tui/repl-render.js +0 -770
- package/dist/tui/repl-splash-art.js +0 -64
- package/dist/tui/repl-splash-mascot.js +0 -154
- package/dist/tui/repl-splash.js +0 -117
- package/dist/tui/repl.js +0 -378
- package/dist/tui/slash-palette.js +0 -106
- package/dist/tui/splash-data.js +0 -61
- package/dist/tui/splash.js +0 -31
- package/dist/tui/status-bar.js +0 -209
- package/dist/tui/status-table.js +0 -7
- package/dist/tui/stickers-art.js +0 -136
- package/dist/tui/style-table.js +0 -28
- package/dist/tui/theme-table.js +0 -29
- package/dist/tui/thinking-spinner.js +0 -123
- package/dist/tui/tool-stream-pane.js +0 -140
- package/dist/tui/update-banner.js +0 -33
- package/dist/tui/vim-input.js +0 -267
- package/dist/tui/welcome-banner.js +0 -107
- package/dist/tui/welcome-data.js +0 -293
- package/dist/tui/workspace-context.js +0 -105
- package/docs/examples/codegraph.mcp.json +0 -10
- package/test/scenarios/codegen-create-file.scenario.txt +0 -13
- package/test/scenarios/compact-force.scenario.txt +0 -12
- package/test/scenarios/identity.scenario.txt +0 -11
- package/test/scenarios/persona-handoff.scenario.txt +0 -12
- package/test/scenarios/walkback.scenario.txt +0 -12
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* PUGI-VERIFY-GATE — verification command detection.
|
|
3
|
-
*
|
|
4
|
-
* Background: Codex dogfood 2026-06-04 surfaced a P0 trust failure
|
|
5
|
-
* where the Pugi engine returned `status: done` + `exitCode: 0`
|
|
6
|
-
* even after `npm test` exited non-zero on a regression the agent
|
|
7
|
-
* itself had introduced. Root cause: no layer of the dispatch
|
|
8
|
-
* pipeline knew which bash invocations were verification commands,
|
|
9
|
-
* so the engine outcome had no way to gate the final status on
|
|
10
|
-
* test/lint/build pass.
|
|
11
|
-
*
|
|
12
|
-
* This module is the deterministic, configurable allowlist of regex
|
|
13
|
-
* patterns the engine uses to recognise verification commands at
|
|
14
|
-
* dispatch time. The detection is intentionally simple (anchored on
|
|
15
|
-
* the head of the command after sudo / env-prefix stripping) so the
|
|
16
|
-
* allowlist stays auditable. False negatives are recoverable (the
|
|
17
|
-
* agent can re-run with a recognised wrapper); false positives would
|
|
18
|
-
* silently down-grade unrelated commands and are forbidden.
|
|
19
|
-
*
|
|
20
|
-
* The pattern table is exported as `VERIFICATION_PATTERNS`; callers
|
|
21
|
-
* use `detectVerificationCommand(cmd)` for the boolean + tool-tag
|
|
22
|
-
* decision. Both surfaces are pure — no I/O, no session state, no
|
|
23
|
-
* environment reads.
|
|
24
|
-
*/
|
|
25
|
-
/**
|
|
26
|
-
* Canonical verification allowlist. Patterns target the head of each
|
|
27
|
-
* shell-separated component AFTER:
|
|
28
|
-
* - leading whitespace is trimmed
|
|
29
|
-
* - leading `sudo` / `time` / `env KEY=value` prefixes are stripped
|
|
30
|
-
*
|
|
31
|
-
* Pre-trim the cmd through `extractCommandHead` before matching.
|
|
32
|
-
*
|
|
33
|
-
* When extending: keep the regex anchored (`^`) so a path containing
|
|
34
|
-
* the tool name (`./scripts/npm.sh`) does not false-positive.
|
|
35
|
-
*/
|
|
36
|
-
export const VERIFICATION_PATTERNS = [
|
|
37
|
-
// ----- JavaScript / TypeScript ecosystem -----
|
|
38
|
-
// npm test / npm run test / npm run lint / npm run typecheck / npm run build
|
|
39
|
-
{ tool: 'npm-test', pattern: /^npm\s+(?:run\s+)?test\b/, category: 'test' },
|
|
40
|
-
{ tool: 'npm-lint', pattern: /^npm\s+run\s+lint\b/, category: 'lint' },
|
|
41
|
-
{ tool: 'npm-typecheck', pattern: /^npm\s+run\s+typecheck\b/, category: 'typecheck' },
|
|
42
|
-
{ tool: 'npm-build', pattern: /^npm\s+run\s+build\b/, category: 'build' },
|
|
43
|
-
// pnpm (with and without -C / --filter prefixes — match the full head)
|
|
44
|
-
{ tool: 'pnpm-test', pattern: /^pnpm(?:\s+(?:-C\s+\S+|--filter(?:\s+|=)\S+|-r))*\s+(?:run\s+)?test\b/, category: 'test' },
|
|
45
|
-
{ tool: 'pnpm-lint', pattern: /^pnpm(?:\s+(?:-C\s+\S+|--filter(?:\s+|=)\S+|-r))*\s+(?:run\s+)?lint\b/, category: 'lint' },
|
|
46
|
-
{ tool: 'pnpm-typecheck', pattern: /^pnpm(?:\s+(?:-C\s+\S+|--filter(?:\s+|=)\S+|-r))*\s+(?:run\s+)?typecheck\b/, category: 'typecheck' },
|
|
47
|
-
{ tool: 'pnpm-build', pattern: /^pnpm(?:\s+(?:-C\s+\S+|--filter(?:\s+|=)\S+|-r))*\s+(?:run\s+)?build\b/, category: 'build' },
|
|
48
|
-
// yarn
|
|
49
|
-
{ tool: 'yarn-test', pattern: /^yarn\s+(?:run\s+)?test\b/, category: 'test' },
|
|
50
|
-
{ tool: 'yarn-lint', pattern: /^yarn\s+(?:run\s+)?lint\b/, category: 'lint' },
|
|
51
|
-
{ tool: 'yarn-typecheck', pattern: /^yarn\s+(?:run\s+)?typecheck\b/, category: 'typecheck' },
|
|
52
|
-
{ tool: 'yarn-build', pattern: /^yarn\s+(?:run\s+)?build\b/, category: 'build' },
|
|
53
|
-
// Direct test-runner invocations (npx and bare).
|
|
54
|
-
{ tool: 'jest', pattern: /^(?:npx\s+)?jest\b/, category: 'test' },
|
|
55
|
-
{ tool: 'vitest', pattern: /^(?:npx\s+)?vitest\b/, category: 'test' },
|
|
56
|
-
{ tool: 'mocha', pattern: /^(?:npx\s+)?mocha\b/, category: 'test' },
|
|
57
|
-
{ tool: 'tsc-typecheck', pattern: /^(?:npx\s+)?tsc\b(?=.*--noEmit|\s*$)/, category: 'typecheck' },
|
|
58
|
-
{ tool: 'eslint', pattern: /^(?:npx\s+)?eslint\b/, category: 'lint' },
|
|
59
|
-
{ tool: 'node-test', pattern: /^node\s+--test\b/, category: 'test' },
|
|
60
|
-
// ----- Python -----
|
|
61
|
-
{ tool: 'pytest', pattern: /^(?:python\s+-m\s+)?pytest\b/, category: 'test' },
|
|
62
|
-
{ tool: 'python-unittest', pattern: /^python\s+-m\s+unittest\b/, category: 'test' },
|
|
63
|
-
{ tool: 'ruff', pattern: /^ruff\s+check\b/, category: 'lint' },
|
|
64
|
-
{ tool: 'mypy', pattern: /^mypy\b/, category: 'typecheck' },
|
|
65
|
-
// ----- Rust -----
|
|
66
|
-
{ tool: 'cargo-test', pattern: /^cargo\s+test\b/, category: 'test' },
|
|
67
|
-
{ tool: 'cargo-check', pattern: /^cargo\s+check\b/, category: 'typecheck' },
|
|
68
|
-
{ tool: 'cargo-clippy', pattern: /^cargo\s+clippy\b/, category: 'lint' },
|
|
69
|
-
{ tool: 'cargo-build', pattern: /^cargo\s+build\b/, category: 'build' },
|
|
70
|
-
// ----- Go -----
|
|
71
|
-
{ tool: 'go-test', pattern: /^go\s+test\b/, category: 'test' },
|
|
72
|
-
{ tool: 'go-vet', pattern: /^go\s+vet\b/, category: 'lint' },
|
|
73
|
-
{ tool: 'go-build', pattern: /^go\s+build\b/, category: 'build' },
|
|
74
|
-
// ----- Elixir -----
|
|
75
|
-
{ tool: 'mix-test', pattern: /^mix\s+test\b/, category: 'test' },
|
|
76
|
-
// ----- Ruby -----
|
|
77
|
-
{ tool: 'rspec', pattern: /^(?:bundle\s+exec\s+)?rspec\b/, category: 'test' },
|
|
78
|
-
{ tool: 'rubocop', pattern: /^(?:bundle\s+exec\s+)?rubocop\b/, category: 'lint' },
|
|
79
|
-
// ----- Java / Kotlin / Gradle / Maven -----
|
|
80
|
-
{ tool: 'gradle-test', pattern: /^(?:\.\/)?gradlew?\s+test\b/, category: 'test' },
|
|
81
|
-
{ tool: 'gradle-build', pattern: /^(?:\.\/)?gradlew?\s+build\b/, category: 'build' },
|
|
82
|
-
{ tool: 'maven-test', pattern: /^mvn\s+test\b/, category: 'test' },
|
|
83
|
-
{ tool: 'maven-verify', pattern: /^mvn\s+verify\b/, category: 'test' },
|
|
84
|
-
// ----- C/C++ / Make -----
|
|
85
|
-
{ tool: 'make-test', pattern: /^make\s+(?:test|check)\b/, category: 'test' },
|
|
86
|
-
{ tool: 'ctest', pattern: /^ctest\b/, category: 'test' },
|
|
87
|
-
];
|
|
88
|
-
const SHELL_SEPARATORS = /\s*(?:&&|\|\||;|\|)\s*/;
|
|
89
|
-
const ENV_ASSIGN = /^[A-Z_][A-Z0-9_]*=\S+$/;
|
|
90
|
-
/**
|
|
91
|
-
* Strip leading `sudo` / `time` / `env A=1 B=2` noise so the verb is
|
|
92
|
-
* the first non-prefix token. Returns the stripped head as a single
|
|
93
|
-
* normalised string. Pure — no side effects.
|
|
94
|
-
*
|
|
95
|
-
* We do NOT strip generic env-variable assignments like `CI=1` that
|
|
96
|
-
* the operator typed inline (e.g. `CI=1 pnpm test`) because the
|
|
97
|
-
* regex allowlist anchors `pnpm` — matching the head after stripping
|
|
98
|
-
* `CI=1` is precisely the intent.
|
|
99
|
-
*/
|
|
100
|
-
export function extractCommandHead(component) {
|
|
101
|
-
let head = component.trim();
|
|
102
|
-
// sudo / time wrappers
|
|
103
|
-
while (true) {
|
|
104
|
-
if (head.startsWith('sudo ')) {
|
|
105
|
-
head = head.slice(5).trimStart();
|
|
106
|
-
continue;
|
|
107
|
-
}
|
|
108
|
-
if (head.startsWith('time ')) {
|
|
109
|
-
head = head.slice(5).trimStart();
|
|
110
|
-
continue;
|
|
111
|
-
}
|
|
112
|
-
// env A=1 B=2 prefix (inline env assignments before the verb).
|
|
113
|
-
// Peel one token at a time so `FOO=bar BAZ=qux pnpm test` resolves to `pnpm test`.
|
|
114
|
-
const firstToken = head.split(/\s+/, 1)[0] ?? '';
|
|
115
|
-
if (firstToken !== '' && ENV_ASSIGN.test(firstToken)) {
|
|
116
|
-
head = head.slice(firstToken.length).trimStart();
|
|
117
|
-
continue;
|
|
118
|
-
}
|
|
119
|
-
break;
|
|
120
|
-
}
|
|
121
|
-
return head;
|
|
122
|
-
}
|
|
123
|
-
/**
|
|
124
|
-
* Detect whether a shell command runs a verification step. The
|
|
125
|
-
* predicate scans every `&&` / `;` / `||` / `|`-separated component
|
|
126
|
-
* and returns the first match — a compound command like
|
|
127
|
-
* `cd packages/foo && pnpm test` is correctly flagged on the
|
|
128
|
-
* trailing component.
|
|
129
|
-
*
|
|
130
|
-
* The check is intentionally optimistic: it does not parse `if`,
|
|
131
|
-
* `for`, or function bodies. Operators wrapping verification inside
|
|
132
|
-
* a script (e.g. `./scripts/test.sh`) opt out of the gate; that is
|
|
133
|
-
* recorded in the unverifiedReason as `no_verification_command_run`
|
|
134
|
-
* downstream.
|
|
135
|
-
*/
|
|
136
|
-
export function detectVerificationCommand(cmd) {
|
|
137
|
-
if (typeof cmd !== 'string' || cmd.trim() === '') {
|
|
138
|
-
return { isVerification: false, tool: null, matchedComponent: '' };
|
|
139
|
-
}
|
|
140
|
-
const components = cmd.split(SHELL_SEPARATORS);
|
|
141
|
-
for (const raw of components) {
|
|
142
|
-
const head = extractCommandHead(raw);
|
|
143
|
-
if (head === '')
|
|
144
|
-
continue;
|
|
145
|
-
for (const entry of VERIFICATION_PATTERNS) {
|
|
146
|
-
if (entry.pattern.test(head)) {
|
|
147
|
-
return {
|
|
148
|
-
isVerification: true,
|
|
149
|
-
tool: entry.tool,
|
|
150
|
-
matchedComponent: raw.trim(),
|
|
151
|
-
};
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
return { isVerification: false, tool: null, matchedComponent: '' };
|
|
156
|
-
}
|
|
157
|
-
/**
|
|
158
|
-
* Phrases the agent uses to dispute ownership of a verification
|
|
159
|
-
* failure. When ANY of these phrases appears in the final assistant
|
|
160
|
-
* text AND the agent mutated files in the same module as a failing
|
|
161
|
-
* test, the outcome's `regressionOwnershipDispute` flag is set so a
|
|
162
|
-
* downstream reviewer can decide whether to escalate.
|
|
163
|
-
*
|
|
164
|
-
* The list is case-insensitive at match time. Punctuation around the
|
|
165
|
-
* phrase is allowed because `.includes()` looks for the substring,
|
|
166
|
-
* not word boundaries (an agent that writes "this is a pre-existing
|
|
167
|
-
* test bug" still trips the flag).
|
|
168
|
-
*/
|
|
169
|
-
export const REGRESSION_DISPUTE_PHRASES = [
|
|
170
|
-
'pre-existing',
|
|
171
|
-
'preexisting',
|
|
172
|
-
'pre existing',
|
|
173
|
-
'not from my changes',
|
|
174
|
-
'not related to my changes',
|
|
175
|
-
'unrelated test failure',
|
|
176
|
-
'unrelated to my changes',
|
|
177
|
-
'unrelated failure',
|
|
178
|
-
'not my change',
|
|
179
|
-
];
|
|
180
|
-
/**
|
|
181
|
-
* Tail trimmer for stderr captured in verification ledger entries.
|
|
182
|
-
* Returns the last `maxBytes` of UTF-8 text, clamped at a hard 2 KB
|
|
183
|
-
* default to match the PUGI-VERIFY-GATE contract.
|
|
184
|
-
*/
|
|
185
|
-
export function tailStderr(stderr, maxBytes = 2048) {
|
|
186
|
-
if (typeof stderr !== 'string' || stderr.length === 0)
|
|
187
|
-
return '';
|
|
188
|
-
if (Buffer.byteLength(stderr, 'utf8') <= maxBytes)
|
|
189
|
-
return stderr;
|
|
190
|
-
// Approximate cap by character index — accurate enough for stderr
|
|
191
|
-
// tails that are overwhelmingly ASCII test output.
|
|
192
|
-
const slice = stderr.slice(-maxBytes);
|
|
193
|
-
return slice;
|
|
194
|
-
}
|
|
195
|
-
//# sourceMappingURL=verification-patterns.js.map
|
|
@@ -1,293 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Golden dataset primitive — pugi-eval-v1 foundation (task).
|
|
3
|
-
*
|
|
4
|
-
* Formalizes a frozen set of representative CLI tasks that we replay
|
|
5
|
-
* against the agent loop to detect quality regressions. This module is
|
|
6
|
-
* the pure data layer:
|
|
7
|
-
*
|
|
8
|
-
* 1. `GoldenTask` — typed contract for one evaluation case (id, query,
|
|
9
|
-
* expected file touches, expected output substrings, exit-code
|
|
10
|
-
* gate, soft budgets, tags).
|
|
11
|
-
* 2. `loadGoldenDataset(path)` — reads a JSON file from disk, asserts
|
|
12
|
-
* it is a non-empty array, and runs `validateGoldenTask` on every
|
|
13
|
-
* element. Throws on the first malformed entry so the eval harness
|
|
14
|
-
* cannot silently drop a task.
|
|
15
|
-
* 3. `validateGoldenTask(raw)` — schema validator. Enforces kebab-case
|
|
16
|
-
* ids, non-empty query, optional arrays whose elements are
|
|
17
|
-
* well-formed, and rejects parent-traversal `..` segments inside
|
|
18
|
-
* `expectedFiles`. Throws `TypeError` with a specific field message.
|
|
19
|
-
* 4. `diffAgainstBaseline(current, baseline)` — turns two `TaskResult`
|
|
20
|
-
* maps into a `DriftReport` that lists regressed / improved / new /
|
|
21
|
-
* removed tasks. A regression is a task that PASSED in baseline and
|
|
22
|
-
* FAILED in current; an improvement is the inverse. The report is
|
|
23
|
-
* what the future CI gate consumes — a positive `regressed.length`
|
|
24
|
-
* blocks the merge.
|
|
25
|
-
*
|
|
26
|
-
* Out of scope here: actually running tasks against the engine. The
|
|
27
|
-
* follow-up `runOfflineEval()` consumer reads a dataset via
|
|
28
|
-
* `loadGoldenDataset`, executes each task through `pugi code/explain/
|
|
29
|
-
* plan/fix/build`, collects pass/fail/latency/tokens into a
|
|
30
|
-
* `TaskResult[]`, and feeds the previous run's results into
|
|
31
|
-
* `diffAgainstBaseline` for the drift gate.
|
|
32
|
-
*
|
|
33
|
-
* Pure functions only. No file I/O beyond `fs/promises.readFile` inside
|
|
34
|
-
* `loadGoldenDataset` so the module composes cleanly with in-memory test
|
|
35
|
-
* fixtures and engine-side consumers that already hold dataset bytes.
|
|
36
|
-
*/
|
|
37
|
-
import { readFile } from 'node:fs/promises';
|
|
38
|
-
const ID_PATTERN = /^[a-z][a-z0-9-]*$/;
|
|
39
|
-
/**
|
|
40
|
-
* Validate one raw value as a `GoldenTask`. Throws `TypeError` with a
|
|
41
|
-
* field-specific message on any violation. Returns the typed task on
|
|
42
|
-
* success.
|
|
43
|
-
*
|
|
44
|
-
* The validator is intentionally hand-rolled — the shape is small, and
|
|
45
|
-
* adding zod here would obscure the per-field error contract that the
|
|
46
|
-
* eval harness logs at load time.
|
|
47
|
-
*/
|
|
48
|
-
export function validateGoldenTask(raw) {
|
|
49
|
-
if (typeof raw !== 'object' || raw === null || Array.isArray(raw)) {
|
|
50
|
-
throw new TypeError('golden task must be a JSON object');
|
|
51
|
-
}
|
|
52
|
-
const r = raw;
|
|
53
|
-
// id — required, kebab-case.
|
|
54
|
-
if (typeof r.id !== 'string' || r.id.length === 0) {
|
|
55
|
-
throw new TypeError('golden task: `id` required (non-empty string)');
|
|
56
|
-
}
|
|
57
|
-
if (!ID_PATTERN.test(r.id)) {
|
|
58
|
-
throw new TypeError(`golden task ${JSON.stringify(r.id)}: \`id\` must match ${ID_PATTERN.source} (kebab-case, leading letter)`);
|
|
59
|
-
}
|
|
60
|
-
const id = r.id;
|
|
61
|
-
// query — required.
|
|
62
|
-
if (typeof r.query !== 'string' || r.query.length === 0) {
|
|
63
|
-
throw new TypeError(`golden task ${id}: \`query\` required (non-empty string)`);
|
|
64
|
-
}
|
|
65
|
-
const query = r.query;
|
|
66
|
-
// expectedFiles — optional, when present must be non-empty array of
|
|
67
|
-
// relative paths without traversal.
|
|
68
|
-
let expectedFiles;
|
|
69
|
-
if (r.expectedFiles !== undefined) {
|
|
70
|
-
if (!Array.isArray(r.expectedFiles)) {
|
|
71
|
-
throw new TypeError(`golden task ${id}: \`expectedFiles\` must be an array`);
|
|
72
|
-
}
|
|
73
|
-
if (r.expectedFiles.length === 0) {
|
|
74
|
-
throw new TypeError(`golden task ${id}: \`expectedFiles\` must be non-empty (omit field instead of passing [])`);
|
|
75
|
-
}
|
|
76
|
-
expectedFiles = r.expectedFiles.map((entry, idx) => {
|
|
77
|
-
if (typeof entry !== 'string' || entry.length === 0) {
|
|
78
|
-
throw new TypeError(`golden task ${id}: \`expectedFiles[${idx}]\` must be a non-empty string`);
|
|
79
|
-
}
|
|
80
|
-
if (entry.startsWith('/')) {
|
|
81
|
-
throw new TypeError(`golden task ${id}: \`expectedFiles[${idx}]\` must be relative (no leading /)`);
|
|
82
|
-
}
|
|
83
|
-
const segments = entry.split('/');
|
|
84
|
-
if (segments.some((s) => s === '..')) {
|
|
85
|
-
throw new TypeError(`golden task ${id}: \`expectedFiles[${idx}]\` contains \`..\` traversal`);
|
|
86
|
-
}
|
|
87
|
-
return entry;
|
|
88
|
-
});
|
|
89
|
-
}
|
|
90
|
-
// expectedSubstrings — optional array of non-empty strings.
|
|
91
|
-
let expectedSubstrings;
|
|
92
|
-
if (r.expectedSubstrings !== undefined) {
|
|
93
|
-
if (!Array.isArray(r.expectedSubstrings)) {
|
|
94
|
-
throw new TypeError(`golden task ${id}: \`expectedSubstrings\` must be an array`);
|
|
95
|
-
}
|
|
96
|
-
if (r.expectedSubstrings.length === 0) {
|
|
97
|
-
throw new TypeError(`golden task ${id}: \`expectedSubstrings\` must be non-empty (omit field instead of passing [])`);
|
|
98
|
-
}
|
|
99
|
-
expectedSubstrings = r.expectedSubstrings.map((entry, idx) => {
|
|
100
|
-
if (typeof entry !== 'string' || entry.length === 0) {
|
|
101
|
-
throw new TypeError(`golden task ${id}: \`expectedSubstrings[${idx}]\` must be a non-empty string`);
|
|
102
|
-
}
|
|
103
|
-
return entry;
|
|
104
|
-
});
|
|
105
|
-
}
|
|
106
|
-
// expectedExitCode — optional integer.
|
|
107
|
-
let expectedExitCode;
|
|
108
|
-
if (r.expectedExitCode !== undefined) {
|
|
109
|
-
if (typeof r.expectedExitCode !== 'number' ||
|
|
110
|
-
!Number.isInteger(r.expectedExitCode)) {
|
|
111
|
-
throw new TypeError(`golden task ${id}: \`expectedExitCode\` must be an integer`);
|
|
112
|
-
}
|
|
113
|
-
expectedExitCode = r.expectedExitCode;
|
|
114
|
-
}
|
|
115
|
-
// maxTokens — optional positive integer.
|
|
116
|
-
let maxTokens;
|
|
117
|
-
if (r.maxTokens !== undefined) {
|
|
118
|
-
if (typeof r.maxTokens !== 'number' ||
|
|
119
|
-
!Number.isInteger(r.maxTokens) ||
|
|
120
|
-
r.maxTokens <= 0) {
|
|
121
|
-
throw new TypeError(`golden task ${id}: \`maxTokens\` must be a positive integer`);
|
|
122
|
-
}
|
|
123
|
-
maxTokens = r.maxTokens;
|
|
124
|
-
}
|
|
125
|
-
// maxLatencyMs — optional positive integer.
|
|
126
|
-
let maxLatencyMs;
|
|
127
|
-
if (r.maxLatencyMs !== undefined) {
|
|
128
|
-
if (typeof r.maxLatencyMs !== 'number' ||
|
|
129
|
-
!Number.isInteger(r.maxLatencyMs) ||
|
|
130
|
-
r.maxLatencyMs <= 0) {
|
|
131
|
-
throw new TypeError(`golden task ${id}: \`maxLatencyMs\` must be a positive integer`);
|
|
132
|
-
}
|
|
133
|
-
maxLatencyMs = r.maxLatencyMs;
|
|
134
|
-
}
|
|
135
|
-
// tags — optional array of non-empty strings.
|
|
136
|
-
let tags;
|
|
137
|
-
if (r.tags !== undefined) {
|
|
138
|
-
if (!Array.isArray(r.tags)) {
|
|
139
|
-
throw new TypeError(`golden task ${id}: \`tags\` must be an array`);
|
|
140
|
-
}
|
|
141
|
-
tags = r.tags.map((entry, idx) => {
|
|
142
|
-
if (typeof entry !== 'string' || entry.length === 0) {
|
|
143
|
-
throw new TypeError(`golden task ${id}: \`tags[${idx}]\` must be a non-empty string`);
|
|
144
|
-
}
|
|
145
|
-
return entry;
|
|
146
|
-
});
|
|
147
|
-
}
|
|
148
|
-
const task = { id, query };
|
|
149
|
-
if (expectedFiles !== undefined)
|
|
150
|
-
task.expectedFiles = expectedFiles;
|
|
151
|
-
if (expectedSubstrings !== undefined)
|
|
152
|
-
task.expectedSubstrings = expectedSubstrings;
|
|
153
|
-
if (expectedExitCode !== undefined)
|
|
154
|
-
task.expectedExitCode = expectedExitCode;
|
|
155
|
-
if (maxTokens !== undefined)
|
|
156
|
-
task.maxTokens = maxTokens;
|
|
157
|
-
if (maxLatencyMs !== undefined)
|
|
158
|
-
task.maxLatencyMs = maxLatencyMs;
|
|
159
|
-
if (tags !== undefined)
|
|
160
|
-
task.tags = tags;
|
|
161
|
-
return task;
|
|
162
|
-
}
|
|
163
|
-
/**
|
|
164
|
-
* Load and validate a golden dataset JSON file. The file must parse as
|
|
165
|
-
* a JSON array of objects, each one a valid `GoldenTask`. Duplicate
|
|
166
|
-
* ids are rejected — drift comparisons key by id and a duplicate would
|
|
167
|
-
* silently mask one row.
|
|
168
|
-
*/
|
|
169
|
-
export async function loadGoldenDataset(filePath) {
|
|
170
|
-
let raw;
|
|
171
|
-
try {
|
|
172
|
-
raw = await readFile(filePath, 'utf8');
|
|
173
|
-
}
|
|
174
|
-
catch (err) {
|
|
175
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
176
|
-
throw new TypeError(`golden dataset: cannot read ${filePath}: ${message}`);
|
|
177
|
-
}
|
|
178
|
-
let parsed;
|
|
179
|
-
try {
|
|
180
|
-
parsed = JSON.parse(raw);
|
|
181
|
-
}
|
|
182
|
-
catch (err) {
|
|
183
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
184
|
-
throw new TypeError(`golden dataset ${filePath}: invalid JSON (${message})`);
|
|
185
|
-
}
|
|
186
|
-
if (!Array.isArray(parsed)) {
|
|
187
|
-
throw new TypeError(`golden dataset ${filePath}: top-level value must be a JSON array`);
|
|
188
|
-
}
|
|
189
|
-
const tasks = [];
|
|
190
|
-
const seen = new Set();
|
|
191
|
-
for (let i = 0; i < parsed.length; i += 1) {
|
|
192
|
-
let task;
|
|
193
|
-
try {
|
|
194
|
-
task = validateGoldenTask(parsed[i]);
|
|
195
|
-
}
|
|
196
|
-
catch (err) {
|
|
197
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
198
|
-
throw new TypeError(`golden dataset ${filePath}: entry [${i}]: ${message}`);
|
|
199
|
-
}
|
|
200
|
-
if (seen.has(task.id)) {
|
|
201
|
-
throw new TypeError(`golden dataset ${filePath}: duplicate task id ${JSON.stringify(task.id)} at entry [${i}]`);
|
|
202
|
-
}
|
|
203
|
-
seen.add(task.id);
|
|
204
|
-
tasks.push(task);
|
|
205
|
-
}
|
|
206
|
-
return tasks;
|
|
207
|
-
}
|
|
208
|
-
/**
|
|
209
|
-
* Compare a current run against a baseline run and return a drift
|
|
210
|
-
* report. Pure function — no I/O, no clock, deterministic ordering
|
|
211
|
-
* (regressed/improved sorted by id).
|
|
212
|
-
*
|
|
213
|
-
* Either input may be empty. An empty baseline produces an
|
|
214
|
-
* all-newTasks report without crashing — useful for the very first
|
|
215
|
-
* eval run when no previous artifact exists yet.
|
|
216
|
-
*/
|
|
217
|
-
export function diffAgainstBaseline(currentResults, baselineResults, options) {
|
|
218
|
-
const includeReason = options?.includeReason ?? true;
|
|
219
|
-
const currentById = new Map();
|
|
220
|
-
for (const r of currentResults) {
|
|
221
|
-
if (typeof r.id !== 'string' || r.id.length === 0)
|
|
222
|
-
continue;
|
|
223
|
-
currentById.set(r.id, r);
|
|
224
|
-
}
|
|
225
|
-
const baselineById = new Map();
|
|
226
|
-
for (const r of baselineResults) {
|
|
227
|
-
if (typeof r.id !== 'string' || r.id.length === 0)
|
|
228
|
-
continue;
|
|
229
|
-
baselineById.set(r.id, r);
|
|
230
|
-
}
|
|
231
|
-
const regressed = [];
|
|
232
|
-
const improved = [];
|
|
233
|
-
const newTasks = [];
|
|
234
|
-
const removedTasks = [];
|
|
235
|
-
for (const [id, currentRow] of currentById) {
|
|
236
|
-
const baselineRow = baselineById.get(id);
|
|
237
|
-
if (!baselineRow) {
|
|
238
|
-
newTasks.push(id);
|
|
239
|
-
continue;
|
|
240
|
-
}
|
|
241
|
-
if (baselineRow.passed && !currentRow.passed) {
|
|
242
|
-
const diff = {
|
|
243
|
-
id,
|
|
244
|
-
baselinePassed: true,
|
|
245
|
-
currentPassed: false,
|
|
246
|
-
};
|
|
247
|
-
if (includeReason && currentRow.reason !== undefined) {
|
|
248
|
-
diff.reason = currentRow.reason;
|
|
249
|
-
}
|
|
250
|
-
regressed.push(diff);
|
|
251
|
-
}
|
|
252
|
-
else if (!baselineRow.passed && currentRow.passed) {
|
|
253
|
-
const diff = {
|
|
254
|
-
id,
|
|
255
|
-
baselinePassed: false,
|
|
256
|
-
currentPassed: true,
|
|
257
|
-
};
|
|
258
|
-
if (includeReason && currentRow.reason !== undefined) {
|
|
259
|
-
diff.reason = currentRow.reason;
|
|
260
|
-
}
|
|
261
|
-
improved.push(diff);
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
for (const id of baselineById.keys()) {
|
|
265
|
-
if (!currentById.has(id)) {
|
|
266
|
-
removedTasks.push(id);
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
regressed.sort((a, b) => a.id.localeCompare(b.id));
|
|
270
|
-
improved.sort((a, b) => a.id.localeCompare(b.id));
|
|
271
|
-
newTasks.sort();
|
|
272
|
-
removedTasks.sort();
|
|
273
|
-
let passedNow = 0;
|
|
274
|
-
for (const r of currentById.values()) {
|
|
275
|
-
if (r.passed)
|
|
276
|
-
passedNow += 1;
|
|
277
|
-
}
|
|
278
|
-
let passedBaseline = 0;
|
|
279
|
-
for (const r of baselineById.values()) {
|
|
280
|
-
if (r.passed)
|
|
281
|
-
passedBaseline += 1;
|
|
282
|
-
}
|
|
283
|
-
return {
|
|
284
|
-
totalTasks: currentById.size,
|
|
285
|
-
passedNow,
|
|
286
|
-
passedBaseline,
|
|
287
|
-
regressed,
|
|
288
|
-
improved,
|
|
289
|
-
newTasks,
|
|
290
|
-
removedTasks,
|
|
291
|
-
};
|
|
292
|
-
}
|
|
293
|
-
//# sourceMappingURL=golden-dataset.js.map
|
|
@@ -1,177 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Local feedback queue — .
|
|
3
|
-
*
|
|
4
|
-
* `pugi feedback` POSTs collected operator feedback to the admin-api
|
|
5
|
-
* `/api/pugi/feedback` route. When that round-trip fails (endpoint
|
|
6
|
-
* missing, network down, server 5xx), the submitter falls back to
|
|
7
|
-
* appending the envelope to `<cwd>/.pugi/feedback-queue.jsonl`. On the
|
|
8
|
-
* next online session the flusher drains the queue silently in the
|
|
9
|
-
* background.
|
|
10
|
-
*
|
|
11
|
-
* # Module contract
|
|
12
|
-
*
|
|
13
|
-
* - Per-workspace storage. The queue file lives at
|
|
14
|
-
* `<cwd>/.pugi/feedback-queue.jsonl` so the operator-visible state
|
|
15
|
-
* stays alongside the project's other Pugi metadata. Multi-repo
|
|
16
|
-
* operators get one queue per repo — matches the rest of `.pugi/`.
|
|
17
|
-
*
|
|
18
|
-
* - JSONL append-only format. One envelope per line. Newlines inside
|
|
19
|
-
* the comment field are escaped as `\n` by `JSON.stringify`. The
|
|
20
|
-
* enqueue path uses an atomic `O_APPEND` write so concurrent
|
|
21
|
-
* `pugi feedback` invocations from a split-screen REPL + shell do
|
|
22
|
-
* not interleave half-records.
|
|
23
|
-
*
|
|
24
|
-
* - The flusher is best-effort. It returns counts but never throws —
|
|
25
|
-
* a failed flush leaves the queue untouched and a successful flush
|
|
26
|
-
* atomically rewrites the file with the remaining (unsubmitted)
|
|
27
|
-
* envelopes. Partial-success is the normal path when the server
|
|
28
|
-
* accepts the first N but 5xx's the (N+1)th.
|
|
29
|
-
*
|
|
30
|
-
* - The queue file is intentionally NOT readable by anything beyond
|
|
31
|
-
* the flusher. The operator's free-text comments are confidential
|
|
32
|
-
* — we do not surface them in `/status` / `/doctor` / telemetry.
|
|
33
|
-
*
|
|
34
|
-
* - All filesystem writes go through `mkdirSync({recursive: true})`
|
|
35
|
-
* so the first-ever enqueue on a fresh workspace lazily creates
|
|
36
|
-
* `.pugi/` without depending on an earlier `pugi init`.
|
|
37
|
-
*/
|
|
38
|
-
import { appendFileSync, existsSync, mkdirSync, readFileSync, renameSync, writeFileSync, } from 'node:fs';
|
|
39
|
-
import { dirname, resolve } from 'node:path';
|
|
40
|
-
/**
|
|
41
|
-
* Resolve the queue file path for a workspace. Centralised so the
|
|
42
|
-
* submitter + flusher + tests agree on a single canonical location.
|
|
43
|
-
*/
|
|
44
|
-
export function feedbackQueuePath(cwd) {
|
|
45
|
-
return resolve(cwd, '.pugi', 'feedback-queue.jsonl');
|
|
46
|
-
}
|
|
47
|
-
/**
|
|
48
|
-
* Append one envelope atomically. Uses `O_APPEND` semantics via
|
|
49
|
-
* `appendFileSync` so concurrent invocations from a split-screen
|
|
50
|
-
* REPL + shell cannot interleave bytes mid-line.
|
|
51
|
-
*
|
|
52
|
-
* Returns the absolute path written so callers can surface it in the
|
|
53
|
-
* "Feedback queued locally" toast.
|
|
54
|
-
*/
|
|
55
|
-
export function enqueueFeedback(env, cwd) {
|
|
56
|
-
const path = feedbackQueuePath(cwd);
|
|
57
|
-
mkdirSync(dirname(path), { recursive: true });
|
|
58
|
-
// JSON.stringify of an object never emits raw newlines; the trailing
|
|
59
|
-
// '\n' is the line separator. JSONL parsers split on '\n' so the
|
|
60
|
-
// separator survives round-trips.
|
|
61
|
-
const line = `${JSON.stringify(env)}\n`;
|
|
62
|
-
appendFileSync(path, line, { encoding: 'utf8' });
|
|
63
|
-
return path;
|
|
64
|
-
}
|
|
65
|
-
export function readFeedbackQueue(cwd) {
|
|
66
|
-
const path = feedbackQueuePath(cwd);
|
|
67
|
-
if (!existsSync(path)) {
|
|
68
|
-
return { envelopes: [], parseErrors: [] };
|
|
69
|
-
}
|
|
70
|
-
const contents = readFileSync(path, 'utf8');
|
|
71
|
-
const lines = contents.split('\n');
|
|
72
|
-
const envelopes = [];
|
|
73
|
-
const parseErrors = [];
|
|
74
|
-
for (let i = 0; i < lines.length; i += 1) {
|
|
75
|
-
const raw = lines[i]?.trim();
|
|
76
|
-
if (!raw)
|
|
77
|
-
continue;
|
|
78
|
-
try {
|
|
79
|
-
const parsed = JSON.parse(raw);
|
|
80
|
-
// Minimal shape check — we don't full-validate here because the
|
|
81
|
-
// server is the trust boundary. Just guard against obvious
|
|
82
|
-
// corruption that would make the line un-submittable.
|
|
83
|
-
if (typeof parsed.category === 'string'
|
|
84
|
-
&& typeof parsed.rating === 'number'
|
|
85
|
-
&& typeof parsed.comment === 'string'
|
|
86
|
-
&& typeof parsed.ts === 'string'
|
|
87
|
-
&& typeof parsed.cliVersion === 'string') {
|
|
88
|
-
envelopes.push(parsed);
|
|
89
|
-
}
|
|
90
|
-
else {
|
|
91
|
-
parseErrors.push(i + 1);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
catch {
|
|
95
|
-
parseErrors.push(i + 1);
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
return { envelopes, parseErrors };
|
|
99
|
-
}
|
|
100
|
-
/**
|
|
101
|
-
* Rewrite the queue file atomically with the remaining (unsubmitted)
|
|
102
|
-
* envelopes. Called by the flusher after a partial-success drain.
|
|
103
|
-
*
|
|
104
|
-
* Atomicity: write to a sibling `.tmp` then rename. On a crash mid-
|
|
105
|
-
* rewrite the original file is preserved (rename is atomic on POSIX
|
|
106
|
-
* + on NTFS via `MoveFileEx`).
|
|
107
|
-
*/
|
|
108
|
-
export function rewriteFeedbackQueue(remaining, cwd) {
|
|
109
|
-
const path = feedbackQueuePath(cwd);
|
|
110
|
-
if (remaining.length === 0) {
|
|
111
|
-
// Clear the file by truncating to empty. Done in-place — we still
|
|
112
|
-
// want the file to exist (presence signals an active workspace)
|
|
113
|
-
// but with zero bytes so the next read returns no envelopes.
|
|
114
|
-
if (existsSync(path)) {
|
|
115
|
-
writeFileSync(path, '', { encoding: 'utf8' });
|
|
116
|
-
}
|
|
117
|
-
return;
|
|
118
|
-
}
|
|
119
|
-
mkdirSync(dirname(path), { recursive: true });
|
|
120
|
-
const tmp = `${path}.tmp`;
|
|
121
|
-
const body = remaining.map((env) => JSON.stringify(env)).join('\n') + '\n';
|
|
122
|
-
writeFileSync(tmp, body, { encoding: 'utf8' });
|
|
123
|
-
// Use writeFileSync's atomic-replace semantics by going through
|
|
124
|
-
// tmp + rename. Node's `fs.renameSync` is the atomic primitive
|
|
125
|
-
// on POSIX. We avoid `fs.writeFileSync` directly on `path`
|
|
126
|
-
// because writeFileSync truncates first which leaves a brief
|
|
127
|
-
// window of zero-byte state if the process is killed mid-write.
|
|
128
|
-
renameSync(tmp, path);
|
|
129
|
-
}
|
|
130
|
-
/**
|
|
131
|
-
* Drain the queue. Best-effort: each envelope is submitted in order;
|
|
132
|
-
* a `false` return keeps it in the queue for the next attempt; a
|
|
133
|
-
* `true` return removes it. After all envelopes are processed the
|
|
134
|
-
* queue file is rewritten with the unsubmitted ones.
|
|
135
|
-
*
|
|
136
|
-
* The function NEVER throws — it returns a structured result and
|
|
137
|
-
* the caller decides whether to log / surface failures. This keeps
|
|
138
|
-
* the silent-background-drain path on session-start safe.
|
|
139
|
-
*/
|
|
140
|
-
export async function flushFeedbackQueue(cwd, submit) {
|
|
141
|
-
const { envelopes, parseErrors } = readFeedbackQueue(cwd);
|
|
142
|
-
if (envelopes.length === 0) {
|
|
143
|
-
return {
|
|
144
|
-
attempted: 0,
|
|
145
|
-
succeeded: 0,
|
|
146
|
-
failed: 0,
|
|
147
|
-
failedEnvelopes: [],
|
|
148
|
-
parseErrors,
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
let succeeded = 0;
|
|
152
|
-
const failedEnvelopes = [];
|
|
153
|
-
for (const env of envelopes) {
|
|
154
|
-
let ok = false;
|
|
155
|
-
try {
|
|
156
|
-
ok = await submit(env);
|
|
157
|
-
}
|
|
158
|
-
catch {
|
|
159
|
-
ok = false;
|
|
160
|
-
}
|
|
161
|
-
if (ok) {
|
|
162
|
-
succeeded += 1;
|
|
163
|
-
}
|
|
164
|
-
else {
|
|
165
|
-
failedEnvelopes.push(env);
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
rewriteFeedbackQueue(failedEnvelopes, cwd);
|
|
169
|
-
return {
|
|
170
|
-
attempted: envelopes.length,
|
|
171
|
-
succeeded,
|
|
172
|
-
failed: failedEnvelopes.length,
|
|
173
|
-
failedEnvelopes,
|
|
174
|
-
parseErrors,
|
|
175
|
-
};
|
|
176
|
-
}
|
|
177
|
-
//# sourceMappingURL=queue.js.map
|