@pugi/cli 0.1.0-beta.100 → 0.1.0-beta.101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +2 -0
  2. package/dist/core/codegraph/parser.js +574 -47
  3. package/dist/core/codegraph/queries/go.scm +57 -0
  4. package/dist/core/codegraph/queries/javascript.scm +56 -0
  5. package/dist/core/codegraph/queries/python.scm +55 -0
  6. package/dist/core/codegraph/queries/rust.scm +63 -0
  7. package/dist/core/codegraph/queries/typescript.scm +91 -0
  8. package/dist/core/codegraph/reindex.js +218 -0
  9. package/dist/core/codegraph/resolve-edges.js +107 -0
  10. package/dist/core/codegraph/watcher.js +440 -0
  11. package/dist/core/diagnostics/probes/sandbox.js +7 -12
  12. package/dist/core/engine/prompts.js +32 -0
  13. package/dist/core/eval/v1/ledger.js +83 -0
  14. package/dist/core/eval/v1/runner.js +280 -0
  15. package/dist/core/eval/v1/scoring.js +68 -0
  16. package/dist/core/eval/v1/task-loader.js +191 -0
  17. package/dist/core/eval/v1/types.js +14 -0
  18. package/dist/core/eval/v1/verifier.js +176 -0
  19. package/dist/core/eval/v1/yaml-parser.js +250 -0
  20. package/dist/core/sandboxing/adapter.js +31 -17
  21. package/dist/core/sandboxing/bubblewrap.js +209 -0
  22. package/dist/core/sandboxing/index.js +32 -3
  23. package/dist/core/sandboxing/policy.js +97 -0
  24. package/dist/core/sandboxing/seatbelt.js +69 -21
  25. package/dist/core/settings.js +31 -7
  26. package/dist/runtime/cli.js +58 -0
  27. package/dist/runtime/commands/eval-v1.js +266 -0
  28. package/dist/runtime/commands/index-cmd.js +125 -19
  29. package/dist/runtime/commands/servers-cli.js +182 -0
  30. package/dist/runtime/version.js +1 -1
  31. package/dist/tools/bash.js +187 -3
  32. package/package.json +10 -3
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Verification check executor for pugi-eval-v1.
3
+ *
4
+ * Each check kind has a pure implementation. The runner calls
5
+ * `runVerifications` after the CLI subprocess exits, passing the
6
+ * post-run workspace root + the final stdout text the engine emitted.
7
+ *
8
+ * Path safety: `file_exists` and `file_contains` resolve the path
9
+ * relative to the workspace root and refuse anything that escapes the
10
+ * root (`..` traversal, absolute paths, symlinks). A task spec that
11
+ * tries to peek outside the workspace fails the check rather than
12
+ * leaking host state.
13
+ */
14
+ import { spawnSync } from 'node:child_process';
15
+ import { existsSync, readFileSync, realpathSync, statSync } from 'node:fs';
16
+ import { isAbsolute, relative, resolve } from 'node:path';
17
+ const DEFAULT_COMMAND_TIMEOUT_MS = 60_000;
18
+ function withinRoot(rootReal, candidateReal) {
19
+ if (candidateReal === rootReal)
20
+ return true;
21
+ const rel = relative(rootReal, candidateReal);
22
+ return rel !== '' && !rel.startsWith('..') && !isAbsolute(rel);
23
+ }
24
+ /**
25
+ * Resolve `workspaceRoot` to its realpath once so symlinked tmp dirs
26
+ * (macOS `/tmp` -> `/private/tmp`) compare correctly against any
27
+ * resolved child path. Falls back к the raw root when the directory
28
+ * does not yet exist (defensive - production callers always create
29
+ * the tmp dir before invoking).
30
+ */
31
+ function realRoot(workspaceRoot) {
32
+ try {
33
+ return realpathSync(workspaceRoot);
34
+ }
35
+ catch {
36
+ return workspaceRoot;
37
+ }
38
+ }
39
+ function resolveWorkspacePath(workspaceRoot, path) {
40
+ if (isAbsolute(path)) {
41
+ return { ok: false, reason: `absolute paths refused (${path})` };
42
+ }
43
+ if (path.split(/[\\/]/).includes('..')) {
44
+ return { ok: false, reason: `path traversal refused (${path})` };
45
+ }
46
+ const root = realRoot(workspaceRoot);
47
+ const absolute = resolve(root, path);
48
+ if (!withinRoot(root, absolute)) {
49
+ return { ok: false, reason: `path escapes workspace root (${path})` };
50
+ }
51
+ // realpath check: if the file exists, the realpath must also be
52
+ // inside the workspace root - symlinks that point outside are a
53
+ // refusal even when the entry itself is inside.
54
+ if (existsSync(absolute)) {
55
+ try {
56
+ const real = realpathSync(absolute);
57
+ if (!withinRoot(root, real)) {
58
+ return {
59
+ ok: false,
60
+ reason: `symlink target escapes workspace root (${path})`,
61
+ };
62
+ }
63
+ }
64
+ catch {
65
+ // Stat failure during realpath: surface as missing rather than
66
+ // throwing - the file_exists check will record the failure.
67
+ }
68
+ }
69
+ return { ok: true, absolute };
70
+ }
71
+ function matches(haystack, pattern, mode) {
72
+ if (mode === 'regex') {
73
+ let re;
74
+ try {
75
+ re = new RegExp(pattern);
76
+ }
77
+ catch (err) {
78
+ return {
79
+ ok: false,
80
+ reason: `invalid regex ${pattern}: ${err.message}`,
81
+ };
82
+ }
83
+ return re.test(haystack)
84
+ ? { ok: true, reason: '' }
85
+ : { ok: false, reason: `regex ${pattern} did not match` };
86
+ }
87
+ return haystack.includes(pattern)
88
+ ? { ok: true, reason: '' }
89
+ : { ok: false, reason: `literal ${JSON.stringify(pattern)} not found` };
90
+ }
91
+ export function runVerification(check, ctx) {
92
+ switch (check.kind) {
93
+ case 'file_exists': {
94
+ const r = resolveWorkspacePath(ctx.workspaceRoot, check.path);
95
+ if (!r.ok)
96
+ return { kind: check.kind, passed: false, detail: r.reason };
97
+ try {
98
+ const stat = statSync(r.absolute);
99
+ if (!stat.isFile()) {
100
+ return {
101
+ kind: check.kind,
102
+ passed: false,
103
+ detail: `${check.path} exists but is not a regular file`,
104
+ };
105
+ }
106
+ return { kind: check.kind, passed: true, detail: '' };
107
+ }
108
+ catch {
109
+ return {
110
+ kind: check.kind,
111
+ passed: false,
112
+ detail: `${check.path} not found`,
113
+ };
114
+ }
115
+ }
116
+ case 'file_contains': {
117
+ const r = resolveWorkspacePath(ctx.workspaceRoot, check.path);
118
+ if (!r.ok)
119
+ return { kind: check.kind, passed: false, detail: r.reason };
120
+ let body;
121
+ try {
122
+ body = readFileSync(r.absolute, 'utf8');
123
+ }
124
+ catch {
125
+ return {
126
+ kind: check.kind,
127
+ passed: false,
128
+ detail: `${check.path} not readable`,
129
+ };
130
+ }
131
+ const m = matches(body, check.pattern, check.mode);
132
+ return {
133
+ kind: check.kind,
134
+ passed: m.ok,
135
+ detail: m.ok ? '' : `${check.path}: ${m.reason}`,
136
+ };
137
+ }
138
+ case 'output_contains': {
139
+ const m = matches(ctx.finalText, check.pattern, check.mode);
140
+ return {
141
+ kind: check.kind,
142
+ passed: m.ok,
143
+ detail: m.ok ? '' : `final output: ${m.reason}`,
144
+ };
145
+ }
146
+ case 'command_exit_code': {
147
+ const timeout = check.timeoutMs ?? DEFAULT_COMMAND_TIMEOUT_MS;
148
+ const result = spawnSync('bash', ['-lc', check.command], {
149
+ cwd: ctx.workspaceRoot,
150
+ timeout,
151
+ encoding: 'utf8',
152
+ env: { ...process.env, CI: '1' },
153
+ });
154
+ if (result.error) {
155
+ return {
156
+ kind: check.kind,
157
+ passed: false,
158
+ detail: `${check.command}: ${result.error.message}`,
159
+ };
160
+ }
161
+ const exit = result.status ?? -1;
162
+ if (exit !== check.expectedExitCode) {
163
+ return {
164
+ kind: check.kind,
165
+ passed: false,
166
+ detail: `${check.command}: exit ${exit} expected ${check.expectedExitCode}`,
167
+ };
168
+ }
169
+ return { kind: check.kind, passed: true, detail: '' };
170
+ }
171
+ }
172
+ }
173
+ export function runVerifications(checks, ctx) {
174
+ return checks.map((c) => runVerification(c, ctx));
175
+ }
176
+ //# sourceMappingURL=verifier.js.map
@@ -0,0 +1,250 @@
1
+ /**
2
+ * Minimal YAML parser for pugi-eval-v1 task files.
3
+ *
4
+ * Same rationale as `core/recipes/schema.ts`: pulling in `js-yaml`
5
+ * is ~70 KB of installed weight for a schema we control. The shape
6
+ * here is intentionally narrow:
7
+ *
8
+ * - Top-level scalars (string / number / boolean).
9
+ * - Block sequences of scalars (e.g. plain string lists).
10
+ * - Block sequences of maps (verification entries).
11
+ * - Nested block maps (fixture file map).
12
+ * - Multi-line block scalars via `|` (literal) and `>` (folded).
13
+ * - `#` line comments.
14
+ *
15
+ * Unsupported YAML features: flow collections, anchors, tags,
16
+ * directives, document separators. The parser throws on anything
17
+ * outside the supported subset so a malformed task file fails fast
18
+ * with a clear line number.
19
+ *
20
+ * The parsed value is `unknown`; downstream loader code applies a Zod
21
+ * schema to narrow it. The parser deliberately does NOT do schema
22
+ * validation - it only enforces YAML well-formedness.
23
+ */
24
+ export class TaskYamlParseError extends Error {
25
+ line;
26
+ constructor(message, line) {
27
+ super(`task YAML parse error at line ${line}: ${message}`);
28
+ this.line = line;
29
+ this.name = 'TaskYamlParseError';
30
+ }
31
+ }
32
+ /**
33
+ * Tokenise the source into non-blank, non-comment lines + their
34
+ * indentation. Tab characters are rejected (YAML spec) so the indent
35
+ * count is unambiguous.
36
+ */
37
+ function tokenise(source) {
38
+ const physical = source.split(/\r?\n/);
39
+ const out = [];
40
+ for (let i = 0; i < physical.length; i += 1) {
41
+ const raw = physical[i] ?? '';
42
+ const lineNo = i + 1;
43
+ if (raw.includes('\t')) {
44
+ throw new TaskYamlParseError('tab characters are not allowed; use spaces for indentation', lineNo);
45
+ }
46
+ const stripped = raw.replace(/\s*#.*$/, '');
47
+ if (stripped.trim() === '')
48
+ continue;
49
+ const indent = stripped.length - stripped.replace(/^ +/, '').length;
50
+ out.push({ lineNo, indent, body: stripped.slice(indent) });
51
+ }
52
+ return out;
53
+ }
54
+ /**
55
+ * Convert a literal scalar (after the `:` or after a `-` marker) into
56
+ * a JS value. Recognises `true`/`false`, integers, floats, single-
57
+ * and double-quoted strings; everything else is returned verbatim as
58
+ * an unquoted string.
59
+ */
60
+ function coerceScalar(raw, lineNo) {
61
+ const trimmed = raw.trim();
62
+ if (trimmed === '')
63
+ return '';
64
+ if (trimmed === 'true')
65
+ return true;
66
+ if (trimmed === 'false')
67
+ return false;
68
+ if (trimmed === 'null' || trimmed === '~')
69
+ return '';
70
+ if (/^-?\d+$/.test(trimmed))
71
+ return Number(trimmed);
72
+ if (/^-?\d+\.\d+$/.test(trimmed))
73
+ return Number(trimmed);
74
+ const doubleQuoted = /^"((?:\\.|[^"\\])*)"$/.exec(trimmed);
75
+ if (doubleQuoted) {
76
+ // Decode common escape sequences. This subset matches what the
77
+ // task spec needs (newlines inside regex patterns, escaped
78
+ // backslashes).
79
+ return doubleQuoted[1].replace(/\\(.)/g, (_, c) => {
80
+ if (c === 'n')
81
+ return '\n';
82
+ if (c === 't')
83
+ return '\t';
84
+ if (c === 'r')
85
+ return '\r';
86
+ if (c === '"')
87
+ return '"';
88
+ if (c === '\\')
89
+ return '\\';
90
+ throw new TaskYamlParseError(`unknown escape \\${c} in double-quoted string`, lineNo);
91
+ });
92
+ }
93
+ const singleQuoted = /^'((?:''|[^'])*)'$/.exec(trimmed);
94
+ if (singleQuoted) {
95
+ return singleQuoted[1].replace(/''/g, "'");
96
+ }
97
+ return trimmed;
98
+ }
99
+ /**
100
+ * Block scalar reader for `|` (literal, preserves newlines) and `>`
101
+ * (folded, newlines collapse to spaces). Joins continuation lines
102
+ * whose indent is strictly greater than the parent map's indent.
103
+ */
104
+ function readBlockScalar(marker, parentIndent, lines, cursor) {
105
+ const collected = [];
106
+ let baseIndent = -1;
107
+ while (cursor.i < lines.length) {
108
+ const next = lines[cursor.i];
109
+ if (next.indent <= parentIndent)
110
+ break;
111
+ if (baseIndent < 0)
112
+ baseIndent = next.indent;
113
+ const slice = ' '.repeat(Math.max(0, next.indent - baseIndent)) + next.body;
114
+ collected.push(slice);
115
+ cursor.i += 1;
116
+ }
117
+ if (marker === '|')
118
+ return collected.join('\n');
119
+ return collected.join(' ');
120
+ }
121
+ /**
122
+ * Recursively parse a block beginning at `cursor.i` whose body lives
123
+ * strictly deeper than `parentIndent`. The return value is either a
124
+ * map (object) or a sequence (array); the caller decides which based
125
+ * on the first child line.
126
+ */
127
+ function parseBlock(lines, cursor, parentIndent) {
128
+ if (cursor.i >= lines.length)
129
+ return {};
130
+ const first = lines[cursor.i];
131
+ if (first.indent <= parentIndent)
132
+ return {};
133
+ const blockIndent = first.indent;
134
+ const isSequence = first.body.startsWith('- ') || first.body === '-';
135
+ if (isSequence) {
136
+ const arr = [];
137
+ while (cursor.i < lines.length) {
138
+ const line = lines[cursor.i];
139
+ if (line.indent < blockIndent)
140
+ break;
141
+ if (line.indent > blockIndent) {
142
+ throw new TaskYamlParseError(`unexpected indent ${line.indent} in sequence (expected ${blockIndent})`, line.lineNo);
143
+ }
144
+ if (!line.body.startsWith('-')) {
145
+ throw new TaskYamlParseError(`expected sequence marker '-' but found ${line.body.slice(0, 10)}`, line.lineNo);
146
+ }
147
+ const after = line.body.replace(/^-\s*/, '');
148
+ cursor.i += 1;
149
+ if (after === '') {
150
+ arr.push(parseBlock(lines, cursor, blockIndent));
151
+ continue;
152
+ }
153
+ // Inline `- key: value` first entry; treat the rest of the line
154
+ // as a single-key map entry, then continue collecting deeper
155
+ // siblings into the same map.
156
+ const colonIdx = after.indexOf(':');
157
+ if (colonIdx === -1) {
158
+ arr.push(coerceScalar(after, line.lineNo));
159
+ continue;
160
+ }
161
+ const key = after.slice(0, colonIdx).trim();
162
+ const valRaw = after.slice(colonIdx + 1).trim();
163
+ const entry = {};
164
+ if (valRaw === '' || valRaw === '|' || valRaw === '>') {
165
+ if (valRaw === '|' || valRaw === '>') {
166
+ entry[key] = readBlockScalar(valRaw, blockIndent, lines, cursor);
167
+ }
168
+ else {
169
+ entry[key] = parseBlock(lines, cursor, blockIndent);
170
+ }
171
+ }
172
+ else {
173
+ entry[key] = coerceScalar(valRaw, line.lineNo);
174
+ }
175
+ // Continue absorbing deeper siblings of this synthetic map.
176
+ while (cursor.i < lines.length) {
177
+ const sibling = lines[cursor.i];
178
+ // Sibling map keys live at the same indent as the inline
179
+ // `key: value` text (which is at `blockIndent + 2` relative to
180
+ // the `-` marker). Anything at blockIndent or shallower
181
+ // belongs to the parent.
182
+ const childIndent = blockIndent + 2;
183
+ if (sibling.indent < childIndent)
184
+ break;
185
+ if (sibling.indent > childIndent) {
186
+ throw new TaskYamlParseError(`unexpected indent ${sibling.indent} (expected ${childIndent})`, sibling.lineNo);
187
+ }
188
+ const sibColon = sibling.body.indexOf(':');
189
+ if (sibColon === -1) {
190
+ throw new TaskYamlParseError(`expected key:value in map but found ${sibling.body.slice(0, 20)}`, sibling.lineNo);
191
+ }
192
+ const sibKey = sibling.body.slice(0, sibColon).trim();
193
+ const sibVal = sibling.body.slice(sibColon + 1).trim();
194
+ cursor.i += 1;
195
+ if (sibVal === '' || sibVal === '|' || sibVal === '>') {
196
+ if (sibVal === '|' || sibVal === '>') {
197
+ entry[sibKey] = readBlockScalar(sibVal, childIndent, lines, cursor);
198
+ }
199
+ else {
200
+ entry[sibKey] = parseBlock(lines, cursor, childIndent);
201
+ }
202
+ }
203
+ else {
204
+ entry[sibKey] = coerceScalar(sibVal, sibling.lineNo);
205
+ }
206
+ }
207
+ arr.push(entry);
208
+ }
209
+ return arr;
210
+ }
211
+ const map = {};
212
+ while (cursor.i < lines.length) {
213
+ const line = lines[cursor.i];
214
+ if (line.indent < blockIndent)
215
+ break;
216
+ if (line.indent > blockIndent) {
217
+ throw new TaskYamlParseError(`unexpected indent ${line.indent} (expected ${blockIndent})`, line.lineNo);
218
+ }
219
+ const colonIdx = line.body.indexOf(':');
220
+ if (colonIdx === -1) {
221
+ throw new TaskYamlParseError(`expected key:value but found ${line.body.slice(0, 20)}`, line.lineNo);
222
+ }
223
+ const key = line.body.slice(0, colonIdx).trim();
224
+ const valRaw = line.body.slice(colonIdx + 1).trim();
225
+ cursor.i += 1;
226
+ if (valRaw === '') {
227
+ map[key] = parseBlock(lines, cursor, blockIndent);
228
+ continue;
229
+ }
230
+ if (valRaw === '|' || valRaw === '>') {
231
+ map[key] = readBlockScalar(valRaw, blockIndent, lines, cursor);
232
+ continue;
233
+ }
234
+ map[key] = coerceScalar(valRaw, line.lineNo);
235
+ }
236
+ return map;
237
+ }
238
+ export function parseTaskYaml(source) {
239
+ const lines = tokenise(source);
240
+ if (lines.length === 0)
241
+ return {};
242
+ const cursor = { i: 0 };
243
+ const result = parseBlock(lines, cursor, -1);
244
+ if (cursor.i !== lines.length) {
245
+ const stray = lines[cursor.i];
246
+ throw new TaskYamlParseError(`stray content after document body`, stray.lineNo);
247
+ }
248
+ return result;
249
+ }
250
+ //# sourceMappingURL=yaml-parser.js.map
@@ -1,25 +1,39 @@
1
1
  /**
2
- * Bash sandbox adapter interface (Trust Sprint item 6).
2
+ * Bash sandbox adapter interface (Trust Sprint item 6 + Phase 1 #302).
3
3
  *
4
- * Adapter pattern so the bash tool stays unchanged: a runner wraps the
5
- * spawn invocation with an OS-level sandbox primitive. Today's variants:
4
+ * Adapter pattern: a runner wraps the spawn invocation with an
5
+ * OS-level sandbox primitive. Today's variants:
6
6
  *
7
- * - none passthrough (existing behaviour).
8
- * - macOS-seatbelt /usr/bin/sandbox-exec with a workspace-scoped
9
- * write allowlist, read-anywhere, network-allow
10
- * profile.
11
- * - docker — Linux fallback. Throws at boot (deferred to a
12
- * follow-up PR; schema accepts the keyword so
13
- * operators can see it documented).
7
+ * - none - passthrough (legacy behaviour, default).
8
+ * - macOS-seatbelt - /usr/bin/sandbox-exec with a workspace-scoped
9
+ * write allowlist + secret-dir deny list +
10
+ * posture-conditional network rule.
11
+ * - bubblewrap - Linux `bwrap` user-namespace jail with a
12
+ * read-only bind of /usr, /lib, /etc, a tmpfs at
13
+ * /tmp, and a writable bind for workspaceRoot.
14
+ * Posture toggles `--share-net`.
15
+ * - docker - Windows fallback (not shipped in this PR;
16
+ * schema accepts the keyword so a forward-rolled
17
+ * settings.json does not error).
14
18
  *
15
- * The CLI bash tool itself is owned by a parallel agent (PUGI-VERIFY-
16
- * GATE). We intentionally do NOT modify `tools/bash.ts` here. Instead
17
- * the sandbox sits as an indirection layer between higher-level
18
- * callers (`runtime/cli.ts`, `core/bash-runner.ts` if introduced
19
- * later) and the existing bash entry-point.
19
+ * Mechanism x posture matrix:
20
20
  *
21
- * Future: replace this with native landlock bindings on Linux and
22
- * job-object on Windows. The interface is stable, the adapters
21
+ * | mechanism | strict | lenient | off |
22
+ * | --------------- | ----------------------------------- | --------------------------------- | ------------ |
23
+ * | none | passthrough (mode wins) | passthrough (mode wins) | passthrough |
24
+ * | macOS-seatbelt | workspace writes + deny network | workspace writes + allow network | passthrough |
25
+ * | bubblewrap | workspace bind + deny network | workspace bind + allow network | passthrough |
26
+ * | docker | (not shipped) | (not shipped) | passthrough |
27
+ *
28
+ * Wired into `tools/bash.ts` at the `spawn`/`spawnSync` call sites
29
+ * (foreground async, foreground sync, background). The wrap fires
30
+ * AFTER the permission gate so a refused command never reaches the
31
+ * sandbox layer; if the adapter probe returns `armed=false` and the
32
+ * configured mechanism is non-`none`, the bash tool refuses
33
+ * fail-closed instead of silently degrading.
34
+ *
35
+ * Future: replace bubblewrap with native landlock bindings on Linux
36
+ * + job-object on Windows. The interface is stable, the adapters
23
37
  * change.
24
38
  */
25
39
  export {};