@pugi/cli 0.1.0-beta.92 → 0.1.0-beta.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/engine/native-pugi.js +1 -1
- package/dist/core/engine/prompts.js +1 -1
- package/dist/core/engine/verification-patterns.js +195 -0
- package/dist/runtime/commands/compact.js +1 -1
- package/dist/runtime/commands/config.js +1 -1
- package/dist/runtime/commands/memory.js +1 -1
- package/dist/runtime/version.js +1 -1
- package/dist/skills/bundled/remember.js +2 -2
- package/package.json +2 -2
- package/test/scenarios/identity.scenario.txt +0 -1
|
@@ -1195,7 +1195,7 @@ function toCommandKind(kind) {
|
|
|
1195
1195
|
*
|
|
1196
1196
|
* The admin-api controller (`pugi-engine.controller.ts`) routes per-tag
|
|
1197
1197
|
* to a model/persona pair via
|
|
1198
|
-
* `apps/admin-api/src/
|
|
1198
|
+
* `apps/admin-api/src/pugi/routing/dispatch-tag.ts::DISPATCH_TAGS`. The
|
|
1199
1199
|
* closed `EngineChatTag` vocabulary is
|
|
1200
1200
|
* `classify | reason | codegen | summarize | vision` — note that
|
|
1201
1201
|
* `code`, `fix`, `plan`, `build`, `explain` (CLI command names) are NOT
|
|
@@ -49,7 +49,7 @@ const COMMON_LOCAL_FIRST_PREAMBLE = [
|
|
|
49
49
|
*
|
|
50
50
|
* Voice constraint: same banned-jargon list as the cabinet Pugi
|
|
51
51
|
* persona (брифую / диспатчу / шипаю and the English jargon list
|
|
52
|
-
* from BANNED_WORDS in
|
|
52
|
+
* from BANNED_WORDS in pugi.system-prompt.ts). Repeated here verbatim
|
|
53
53
|
* so the CLI surface has its own enforcement copy; the cabinet copy
|
|
54
54
|
* is the source of truth and ships through the runtime persona
|
|
55
55
|
* prompt for the cabinet UI. CLI runs DO NOT load the cabinet
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PUGI-VERIFY-GATE — verification command detection.
|
|
3
|
+
*
|
|
4
|
+
* Background: Codex dogfood 2026-06-04 surfaced a P0 trust failure
|
|
5
|
+
* where the Pugi engine returned `status: done` + `exitCode: 0` even
|
|
6
|
+
* after `npm test` exited non-zero on a regression the agent itself
|
|
7
|
+
* had introduced. Root cause: no layer of the dispatch pipeline knew
|
|
8
|
+
* which bash invocations were verification commands, so the engine
|
|
9
|
+
* outcome had no way to gate the final status on test/lint/build
|
|
10
|
+
* pass.
|
|
11
|
+
*
|
|
12
|
+
* This module is the deterministic, configurable allowlist of regex
|
|
13
|
+
* patterns the engine uses to recognise verification commands at
|
|
14
|
+
* dispatch time. The detection is intentionally simple (anchored on
|
|
15
|
+
* the head of the command after sudo / env-prefix stripping) so the
|
|
16
|
+
* allowlist stays auditable. False negatives are recoverable (the
|
|
17
|
+
* agent can re-run with a recognised wrapper); false positives would
|
|
18
|
+
* silently down-grade unrelated commands and are forbidden.
|
|
19
|
+
*
|
|
20
|
+
* The pattern table is exported as `VERIFICATION_PATTERNS`; callers
|
|
21
|
+
* use `detectVerificationCommand(cmd)` for the boolean + tool-tag
|
|
22
|
+
* decision. Both surfaces are pure — no I/O, no session state, no
|
|
23
|
+
* environment reads.
|
|
24
|
+
*/
|
|
25
|
+
/**
|
|
26
|
+
* Canonical verification allowlist. Patterns target the head of each
|
|
27
|
+
* shell-separated component AFTER:
|
|
28
|
+
* - leading whitespace is trimmed
|
|
29
|
+
* - leading `sudo` / `time` / `env KEY=value` prefixes are stripped
|
|
30
|
+
*
|
|
31
|
+
* Pre-trim the cmd through `extractCommandHead` before matching.
|
|
32
|
+
*
|
|
33
|
+
* When extending: keep the regex anchored (`^`) so a path containing
|
|
34
|
+
* the tool name (`./scripts/npm.sh`) does not false-positive.
|
|
35
|
+
*/
|
|
36
|
+
export const VERIFICATION_PATTERNS = [
|
|
37
|
+
// ----- JavaScript / TypeScript ecosystem -----
|
|
38
|
+
// npm test / npm run test / npm run lint / npm run typecheck / npm run build
|
|
39
|
+
{ tool: 'npm-test', pattern: /^npm\s+(?:run\s+)?test\b/, category: 'test' },
|
|
40
|
+
{ tool: 'npm-lint', pattern: /^npm\s+run\s+lint\b/, category: 'lint' },
|
|
41
|
+
{ tool: 'npm-typecheck', pattern: /^npm\s+run\s+typecheck\b/, category: 'typecheck' },
|
|
42
|
+
{ tool: 'npm-build', pattern: /^npm\s+run\s+build\b/, category: 'build' },
|
|
43
|
+
// pnpm (with and without -C / --filter prefixes — match the full head)
|
|
44
|
+
{ tool: 'pnpm-test', pattern: /^pnpm(?:\s+(?:-C\s+\S+|--filter(?:\s+|=)\S+|-r))*\s+(?:run\s+)?test\b/, category: 'test' },
|
|
45
|
+
{ tool: 'pnpm-lint', pattern: /^pnpm(?:\s+(?:-C\s+\S+|--filter(?:\s+|=)\S+|-r))*\s+(?:run\s+)?lint\b/, category: 'lint' },
|
|
46
|
+
{ tool: 'pnpm-typecheck', pattern: /^pnpm(?:\s+(?:-C\s+\S+|--filter(?:\s+|=)\S+|-r))*\s+(?:run\s+)?typecheck\b/, category: 'typecheck' },
|
|
47
|
+
{ tool: 'pnpm-build', pattern: /^pnpm(?:\s+(?:-C\s+\S+|--filter(?:\s+|=)\S+|-r))*\s+(?:run\s+)?build\b/, category: 'build' },
|
|
48
|
+
// yarn
|
|
49
|
+
{ tool: 'yarn-test', pattern: /^yarn\s+(?:run\s+)?test\b/, category: 'test' },
|
|
50
|
+
{ tool: 'yarn-lint', pattern: /^yarn\s+(?:run\s+)?lint\b/, category: 'lint' },
|
|
51
|
+
{ tool: 'yarn-typecheck', pattern: /^yarn\s+(?:run\s+)?typecheck\b/, category: 'typecheck' },
|
|
52
|
+
{ tool: 'yarn-build', pattern: /^yarn\s+(?:run\s+)?build\b/, category: 'build' },
|
|
53
|
+
// Direct test-runner invocations (npx and bare).
|
|
54
|
+
{ tool: 'jest', pattern: /^(?:npx\s+)?jest\b/, category: 'test' },
|
|
55
|
+
{ tool: 'vitest', pattern: /^(?:npx\s+)?vitest\b/, category: 'test' },
|
|
56
|
+
{ tool: 'mocha', pattern: /^(?:npx\s+)?mocha\b/, category: 'test' },
|
|
57
|
+
{ tool: 'tsc-typecheck', pattern: /^(?:npx\s+)?tsc\b(?=.*--noEmit|\s*$)/, category: 'typecheck' },
|
|
58
|
+
{ tool: 'eslint', pattern: /^(?:npx\s+)?eslint\b/, category: 'lint' },
|
|
59
|
+
{ tool: 'node-test', pattern: /^node\s+--test\b/, category: 'test' },
|
|
60
|
+
// ----- Python -----
|
|
61
|
+
{ tool: 'pytest', pattern: /^(?:python\s+-m\s+)?pytest\b/, category: 'test' },
|
|
62
|
+
{ tool: 'python-unittest', pattern: /^python\s+-m\s+unittest\b/, category: 'test' },
|
|
63
|
+
{ tool: 'ruff', pattern: /^ruff\s+check\b/, category: 'lint' },
|
|
64
|
+
{ tool: 'mypy', pattern: /^mypy\b/, category: 'typecheck' },
|
|
65
|
+
// ----- Rust -----
|
|
66
|
+
{ tool: 'cargo-test', pattern: /^cargo\s+test\b/, category: 'test' },
|
|
67
|
+
{ tool: 'cargo-check', pattern: /^cargo\s+check\b/, category: 'typecheck' },
|
|
68
|
+
{ tool: 'cargo-clippy', pattern: /^cargo\s+clippy\b/, category: 'lint' },
|
|
69
|
+
{ tool: 'cargo-build', pattern: /^cargo\s+build\b/, category: 'build' },
|
|
70
|
+
// ----- Go -----
|
|
71
|
+
{ tool: 'go-test', pattern: /^go\s+test\b/, category: 'test' },
|
|
72
|
+
{ tool: 'go-vet', pattern: /^go\s+vet\b/, category: 'lint' },
|
|
73
|
+
{ tool: 'go-build', pattern: /^go\s+build\b/, category: 'build' },
|
|
74
|
+
// ----- Elixir -----
|
|
75
|
+
{ tool: 'mix-test', pattern: /^mix\s+test\b/, category: 'test' },
|
|
76
|
+
// ----- Ruby -----
|
|
77
|
+
{ tool: 'rspec', pattern: /^(?:bundle\s+exec\s+)?rspec\b/, category: 'test' },
|
|
78
|
+
{ tool: 'rubocop', pattern: /^(?:bundle\s+exec\s+)?rubocop\b/, category: 'lint' },
|
|
79
|
+
// ----- Java / Kotlin / Gradle / Maven -----
|
|
80
|
+
{ tool: 'gradle-test', pattern: /^(?:\.\/)?gradlew?\s+test\b/, category: 'test' },
|
|
81
|
+
{ tool: 'gradle-build', pattern: /^(?:\.\/)?gradlew?\s+build\b/, category: 'build' },
|
|
82
|
+
{ tool: 'maven-test', pattern: /^mvn\s+test\b/, category: 'test' },
|
|
83
|
+
{ tool: 'maven-verify', pattern: /^mvn\s+verify\b/, category: 'test' },
|
|
84
|
+
// ----- C/C++ / Make -----
|
|
85
|
+
{ tool: 'make-test', pattern: /^make\s+(?:test|check)\b/, category: 'test' },
|
|
86
|
+
{ tool: 'ctest', pattern: /^ctest\b/, category: 'test' },
|
|
87
|
+
];
|
|
88
|
+
const SHELL_SEPARATORS = /\s*(?:&&|\|\||;|\|)\s*/;
|
|
89
|
+
const ENV_ASSIGN = /^[A-Z_][A-Z0-9_]*=\S+$/;
|
|
90
|
+
/**
|
|
91
|
+
* Strip leading `sudo` / `time` / `env A=1 B=2` noise so the verb is
|
|
92
|
+
* the first non-prefix token. Returns the stripped head as a single
|
|
93
|
+
* normalised string. Pure — no side effects.
|
|
94
|
+
*
|
|
95
|
+
* We do NOT strip generic env-variable assignments like `CI=1` that
|
|
96
|
+
* the operator typed inline (e.g. `CI=1 pnpm test`) because the
|
|
97
|
+
* regex allowlist anchors `pnpm` — matching the head after stripping
|
|
98
|
+
* `CI=1` is precisely the intent.
|
|
99
|
+
*/
|
|
100
|
+
export function extractCommandHead(component) {
|
|
101
|
+
let head = component.trim();
|
|
102
|
+
// sudo / time wrappers
|
|
103
|
+
while (true) {
|
|
104
|
+
if (head.startsWith('sudo ')) {
|
|
105
|
+
head = head.slice(5).trimStart();
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
if (head.startsWith('time ')) {
|
|
109
|
+
head = head.slice(5).trimStart();
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
// env A=1 B=2 prefix (inline env assignments before the verb).
|
|
113
|
+
// We peel one token at a time so `FOO=bar BAZ=qux pnpm test` resolves to `pnpm test`.
|
|
114
|
+
const firstToken = head.split(/\s+/, 1)[0] ?? '';
|
|
115
|
+
if (firstToken !== '' && ENV_ASSIGN.test(firstToken)) {
|
|
116
|
+
head = head.slice(firstToken.length).trimStart();
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
return head;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Detect whether a shell command runs a verification step. The
|
|
125
|
+
* predicate scans every `&&` / `;` / `||` / `|`-separated component
|
|
126
|
+
* and returns the first match — a compound command like
|
|
127
|
+
* `cd packages/foo && pnpm test` is correctly flagged on the
|
|
128
|
+
* trailing component.
|
|
129
|
+
*
|
|
130
|
+
* The check is intentionally optimistic: it does not parse `if`,
|
|
131
|
+
* `for`, or function bodies. Operators wrapping verification inside
|
|
132
|
+
* a script (e.g. `./scripts/test.sh`) opt out of the gate; that is
|
|
133
|
+
* recorded in the unverifiedReason as `no_verification_command_run`
|
|
134
|
+
* downstream.
|
|
135
|
+
*/
|
|
136
|
+
export function detectVerificationCommand(cmd) {
|
|
137
|
+
if (typeof cmd !== 'string' || cmd.trim() === '') {
|
|
138
|
+
return { isVerification: false, tool: null, matchedComponent: '' };
|
|
139
|
+
}
|
|
140
|
+
const components = cmd.split(SHELL_SEPARATORS);
|
|
141
|
+
for (const raw of components) {
|
|
142
|
+
const head = extractCommandHead(raw);
|
|
143
|
+
if (head === '')
|
|
144
|
+
continue;
|
|
145
|
+
for (const entry of VERIFICATION_PATTERNS) {
|
|
146
|
+
if (entry.pattern.test(head)) {
|
|
147
|
+
return {
|
|
148
|
+
isVerification: true,
|
|
149
|
+
tool: entry.tool,
|
|
150
|
+
matchedComponent: raw.trim(),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return { isVerification: false, tool: null, matchedComponent: '' };
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Phrases the agent uses to dispute ownership of a verification
|
|
159
|
+
* failure. When ANY of these phrases appears in the final assistant
|
|
160
|
+
* text AND the agent mutated files in the same module as a failing
|
|
161
|
+
* test, the outcome's `regressionOwnershipDispute` flag is set so a
|
|
162
|
+
* downstream reviewer can decide whether to escalate.
|
|
163
|
+
*
|
|
164
|
+
* The list is case-insensitive at match time. Punctuation around the
|
|
165
|
+
* phrase is allowed because `.test()` looks for the substring, not
|
|
166
|
+
* word boundaries (an agent that writes "this is a pre-existing
|
|
167
|
+
* test bug" still trips the flag).
|
|
168
|
+
*/
|
|
169
|
+
export const REGRESSION_DISPUTE_PHRASES = [
|
|
170
|
+
'pre-existing',
|
|
171
|
+
'preexisting',
|
|
172
|
+
'pre existing',
|
|
173
|
+
'not from my changes',
|
|
174
|
+
'not related to my changes',
|
|
175
|
+
'unrelated test failure',
|
|
176
|
+
'unrelated to my changes',
|
|
177
|
+
'unrelated failure',
|
|
178
|
+
'not my change',
|
|
179
|
+
];
|
|
180
|
+
/**
|
|
181
|
+
* Tail trimmer for stderr captured in verification ledger entries.
|
|
182
|
+
* Returns the last `maxBytes` of UTF-8 text, clamped at a hard 2 KB
|
|
183
|
+
* default to match the PUGI-VERIFY-GATE contract.
|
|
184
|
+
*/
|
|
185
|
+
export function tailStderr(stderr, maxBytes = 2048) {
|
|
186
|
+
if (typeof stderr !== 'string' || stderr.length === 0)
|
|
187
|
+
return '';
|
|
188
|
+
if (Buffer.byteLength(stderr, 'utf8') <= maxBytes)
|
|
189
|
+
return stderr;
|
|
190
|
+
// Approximate cap by character index — accurate enough for stderr
|
|
191
|
+
// tails that are overwhelmingly ASCII test output.
|
|
192
|
+
const slice = stderr.slice(-maxBytes);
|
|
193
|
+
return slice;
|
|
194
|
+
}
|
|
195
|
+
//# sourceMappingURL=verification-patterns.js.map
|
|
@@ -335,7 +335,7 @@ async function runConfigMcpFlip(args, ctx, state) {
|
|
|
335
335
|
/* ------------------------------------------------------------------ */
|
|
336
336
|
/**
|
|
337
337
|
* Closed sets — match
|
|
338
|
-
* `apps/admin-api/src/
|
|
338
|
+
* `apps/admin-api/src/pugi/routing/dispatch-tag.ts` verbatim. Pinning
|
|
339
339
|
* them in the CLI lets us reject typos client-side before round-tripping
|
|
340
340
|
* to the admin-api (better UX, smaller blast radius for a wrong typo on
|
|
341
341
|
* a flaky network).
|
|
@@ -43,7 +43,7 @@ const SUB_USAGE = [
|
|
|
43
43
|
'pugi memory forget <id>',
|
|
44
44
|
'pugi memory sync',
|
|
45
45
|
].join('\n ');
|
|
46
|
-
const DEFAULT_PERSONA = '
|
|
46
|
+
const DEFAULT_PERSONA = 'pugi';
|
|
47
47
|
/** Single CLI entry — top-level `pugi memory` AND the in-REPL `/memory` slash both call this. */
|
|
48
48
|
export async function runMemoryCommand(args, ctx) {
|
|
49
49
|
const sub = (args[0] ?? '').toLowerCase();
|
package/dist/runtime/version.js
CHANGED
|
@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
|
|
|
44
44
|
* during import). When bumping the CLI version BOTH literals must be
|
|
45
45
|
* updated; the release smoke-test (`pack:smoke`) verifies they agree.
|
|
46
46
|
*/
|
|
47
|
-
export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.
|
|
47
|
+
export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.93');
|
|
48
48
|
/**
|
|
49
49
|
* Outbound: the CLI's installed semver. Read at request time by
|
|
50
50
|
* `version-interceptor.ts` and injected on every `fetch` call.
|
|
@@ -49,7 +49,7 @@
|
|
|
49
49
|
*/
|
|
50
50
|
import { readFileSync } from 'node:fs';
|
|
51
51
|
import { PERSONA_MEMORY_KINDS, enqueueMemoryOp, } from '../../core/memory-sync/queue.js';
|
|
52
|
-
const DEFAULT_PERSONA = '
|
|
52
|
+
const DEFAULT_PERSONA = 'pugi';
|
|
53
53
|
function parseFlags(args) {
|
|
54
54
|
const flags = {
|
|
55
55
|
json: false,
|
|
@@ -372,7 +372,7 @@ const REMEMBER_USAGE = [
|
|
|
372
372
|
'',
|
|
373
373
|
'Flags:',
|
|
374
374
|
' --json Emit a JSON envelope instead of human text.',
|
|
375
|
-
' --persona <slug> Persona slug to attribute the memory to (default:
|
|
375
|
+
' --persona <slug> Persona slug to attribute the memory to (default: pugi).',
|
|
376
376
|
' --input <path> Read newline-separated candidates from a file.',
|
|
377
377
|
'',
|
|
378
378
|
'Every proposal is shown to the operator BEFORE persisting; nothing is',
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pugi/cli",
|
|
3
|
-
"version": "0.1.0-beta.
|
|
3
|
+
"version": "0.1.0-beta.93",
|
|
4
4
|
"description": "Pugi CLI - terminal-native software execution system",
|
|
5
5
|
"homepage": "https://pugi.io",
|
|
6
6
|
"repository": {
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
"which": "^6.0.0",
|
|
64
64
|
"zod": "^3.23.0",
|
|
65
65
|
"@pugi/personas": "0.1.2",
|
|
66
|
-
"@pugi/sdk": "0.1.0-beta.
|
|
66
|
+
"@pugi/sdk": "0.1.0-beta.93"
|
|
67
67
|
},
|
|
68
68
|
"devDependencies": {
|
|
69
69
|
"@types/node": "^22.0.0",
|