@pugi/cli 0.1.0-beta.100 → 0.1.0-beta.101
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/core/codegraph/parser.js +574 -47
- package/dist/core/codegraph/queries/go.scm +57 -0
- package/dist/core/codegraph/queries/javascript.scm +56 -0
- package/dist/core/codegraph/queries/python.scm +55 -0
- package/dist/core/codegraph/queries/rust.scm +63 -0
- package/dist/core/codegraph/queries/typescript.scm +91 -0
- package/dist/core/codegraph/reindex.js +218 -0
- package/dist/core/codegraph/resolve-edges.js +107 -0
- package/dist/core/codegraph/watcher.js +440 -0
- package/dist/core/diagnostics/probes/sandbox.js +7 -12
- package/dist/core/engine/prompts.js +32 -0
- package/dist/core/eval/v1/ledger.js +83 -0
- package/dist/core/eval/v1/runner.js +280 -0
- package/dist/core/eval/v1/scoring.js +68 -0
- package/dist/core/eval/v1/task-loader.js +191 -0
- package/dist/core/eval/v1/types.js +14 -0
- package/dist/core/eval/v1/verifier.js +176 -0
- package/dist/core/eval/v1/yaml-parser.js +250 -0
- package/dist/core/sandboxing/adapter.js +31 -17
- package/dist/core/sandboxing/bubblewrap.js +209 -0
- package/dist/core/sandboxing/index.js +32 -3
- package/dist/core/sandboxing/policy.js +97 -0
- package/dist/core/sandboxing/seatbelt.js +69 -21
- package/dist/core/settings.js +31 -7
- package/dist/runtime/cli.js +58 -0
- package/dist/runtime/commands/eval-v1.js +266 -0
- package/dist/runtime/commands/index-cmd.js +125 -19
- package/dist/runtime/commands/servers-cli.js +182 -0
- package/dist/runtime/version.js +1 -1
- package/dist/tools/bash.js +187 -3
- package/package.json +10 -3
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pugi local symbol index - chokidar file watcher (PR L2).
|
|
3
|
+
*
|
|
4
|
+
* Live auto-sync layer на top of the parser (PR L1) и DB (PR L0). When
|
|
5
|
+
* the operator runs `pugi index --watch`, this module:
|
|
6
|
+
*
|
|
7
|
+
* 1. Spins up a chokidar watcher rooted at `workspaceRoot`.
|
|
8
|
+
* 2. Filters events to the v1 parseable extensions
|
|
9
|
+
* (`PARSEABLE_EXTENSIONS` from parser.ts).
|
|
10
|
+
* 3. Coalesces bursts via a 2-second debounce (configurable; tests
|
|
11
|
+
* pass `debounceMs: 50` to keep specs fast).
|
|
12
|
+
* 4. On flush: per file -> `deleteFile` cascade -> `parseFile` ->
|
|
13
|
+
* `insertSymbols` -> resolve `pendingEdges` -> `insertEdges`.
|
|
14
|
+
* 5. On `unlink`: `deleteFile` only (cascade removes symbols + edges
|
|
15
|
+
* via the schema's FK ON DELETE CASCADE).
|
|
16
|
+
*
|
|
17
|
+
* Architectural decisions:
|
|
18
|
+
*
|
|
19
|
+
* - **No nested transactions.** `deleteFile` / `insertSymbols` /
|
|
20
|
+
* `insertEdges` each open + commit their own BEGIN block. We call
|
|
21
|
+
* them sequentially and rely on per-call atomicity. A higher-level
|
|
22
|
+
* SAVEPOINT wrapper was considered и rejected: chokidar batches are
|
|
23
|
+
* small (typically 1-5 files) and the per-file work is already
|
|
24
|
+
* transactional, so the extra level adds complexity without a
|
|
25
|
+
* customer-visible win.
|
|
26
|
+
*
|
|
27
|
+
* - **Two-step edge resolution.** `parseFile` returns `pendingEdges`
|
|
28
|
+
* keyed by name + scope. We resolve them after `insertSymbols`
|
|
29
|
+
* populates ids, using the shared `resolvePendingEdges` helper.
|
|
30
|
+
* Cross-file targets are looked up via a SQL `SELECT id FROM
|
|
31
|
+
* symbols WHERE name = ?` closure - the live DB IS the cross-file
|
|
32
|
+
* map for the watcher. Reindex builds an in-memory map because it
|
|
33
|
+
* sees every file in one pass; the watcher sees files one delta at
|
|
34
|
+
* a time.
|
|
35
|
+
*
|
|
36
|
+
* - **Path normalization.** Chokidar emits absolute or cwd-relative
|
|
37
|
+
* paths depending on options. We always work in workspace-relative
|
|
38
|
+
* POSIX form (`path.relative(root, abs).split(sep).join('/')`)
|
|
39
|
+
* because that is what `reindex.ts` writes to `Symbol.file` and
|
|
40
|
+
* `files.path`. Mismatched separators on Windows would break the
|
|
41
|
+
* FK + the `deleteFile` lookup.
|
|
42
|
+
*
|
|
43
|
+
* - **Exclude list parity.** We use the EXACT same `PRUNE_DIRS` as
|
|
44
|
+
* `reindex.ts` (extended slightly with the v1 extensions the spec
|
|
45
|
+
* pinned). Divergence between the walker and the watcher would
|
|
46
|
+
* silently produce different indexes depending on entry point.
|
|
47
|
+
*
|
|
48
|
+
* - **In-flight queue at close.** SIGINT / SIGTERM trigger
|
|
49
|
+
* `close()`. If a debounce timer is still pending, we flush it
|
|
50
|
+
* synchronously before tearing down chokidar - dropping a pending
|
|
51
|
+
* edit would leave the index out of sync until the next manual
|
|
52
|
+
* `pugi index` run.
|
|
53
|
+
*
|
|
54
|
+
* - **Initial scan is the operator's job.** `ignoreInitial: true`
|
|
55
|
+
* means we do NOT re-index the workspace on startup. The CLI
|
|
56
|
+
* command runs `reindexWorkspace` first (covered by PR L1) и THEN
|
|
57
|
+
* starts the watcher; this avoids a double-pass over the
|
|
58
|
+
* workspace.
|
|
59
|
+
*/
|
|
60
|
+
import { existsSync } from 'node:fs';
|
|
61
|
+
import { readFile } from 'node:fs/promises';
|
|
62
|
+
import { createHash } from 'node:crypto';
|
|
63
|
+
import { relative, resolve, sep, extname } from 'node:path';
|
|
64
|
+
import chokidar from 'chokidar';
|
|
65
|
+
import { deleteFile, insertEdges, insertSymbols, upsertFile, } from './db.js';
|
|
66
|
+
import { PARSEABLE_EXTENSIONS, parseFile } from './parser.js';
|
|
67
|
+
import { buildPerFileNameMap, resolvePendingEdges, } from './resolve-edges.js';
|
|
68
|
+
/**
|
|
69
|
+
* Directories pruned from the watch. EXACT mirror of
|
|
70
|
+
* `reindex.ts:PRUNE_DIRS` so the walker и watcher agree on what
|
|
71
|
+
* "workspace" means. Touching this list requires touching both
|
|
72
|
+
* sites.
|
|
73
|
+
*/
|
|
74
|
+
const PRUNE_DIRS = Object.freeze([
|
|
75
|
+
'node_modules',
|
|
76
|
+
'.git',
|
|
77
|
+
'dist',
|
|
78
|
+
'build',
|
|
79
|
+
'.next',
|
|
80
|
+
'.turbo',
|
|
81
|
+
'.cache',
|
|
82
|
+
'coverage',
|
|
83
|
+
'.pugi',
|
|
84
|
+
]);
|
|
85
|
+
/** Default debounce window between burst-of-events and flush. */
|
|
86
|
+
const DEFAULT_DEBOUNCE_MS = 2000;
|
|
87
|
+
/**
|
|
88
|
+
* Start a chokidar watcher rooted at `workspaceRoot` against the
|
|
89
|
+
* caller-owned IndexDB. Returns a handle the caller invokes к stop the
|
|
90
|
+
* watcher cleanly.
|
|
91
|
+
*
|
|
92
|
+
* The function is sync - the watcher is "started" the moment chokidar
|
|
93
|
+
* exists, even if it has не yet emitted `ready`. The first `add`/
|
|
94
|
+
* `change` events flow into the debounce queue immediately.
|
|
95
|
+
*/
|
|
96
|
+
export function startWatcher(opts) {
|
|
97
|
+
const root = resolve(opts.workspaceRoot);
|
|
98
|
+
if (!existsSync(root)) {
|
|
99
|
+
throw new Error(`startWatcher: workspaceRoot does not exist: ${root}`);
|
|
100
|
+
}
|
|
101
|
+
const debounceMs = Math.max(0, opts.debounceMs ?? DEFAULT_DEBOUNCE_MS);
|
|
102
|
+
const writeStderr = opts.writeStderr ?? ((text) => void process.stderr.write(text));
|
|
103
|
+
const watchFn = opts.chokidarWatchFn ?? chokidar.watch;
|
|
104
|
+
const quiet = opts.quiet === true;
|
|
105
|
+
const stats = {
|
|
106
|
+
added: 0,
|
|
107
|
+
changed: 0,
|
|
108
|
+
removed: 0,
|
|
109
|
+
reparsed: 0,
|
|
110
|
+
edgesInserted: 0,
|
|
111
|
+
edgesOrphaned: 0,
|
|
112
|
+
parseErrors: 0,
|
|
113
|
+
watcherErrors: 0,
|
|
114
|
+
};
|
|
115
|
+
/** Workspace-relative POSIX path. */
|
|
116
|
+
const toRelPosix = (abs) => {
|
|
117
|
+
const rel = relative(root, abs);
|
|
118
|
+
if (rel.length === 0 || rel.startsWith('..'))
|
|
119
|
+
return '';
|
|
120
|
+
return rel.split(sep).join('/');
|
|
121
|
+
};
|
|
122
|
+
/**
|
|
123
|
+
* Defense in depth: chokidar matches on directory globs, but a
|
|
124
|
+
* watched-parent-dir event still might fire для excluded subpaths
|
|
125
|
+
* on some platforms. Re-check the relative path against the prune
|
|
126
|
+
* list before doing any work.
|
|
127
|
+
*/
|
|
128
|
+
const isExcluded = (relPosix) => {
|
|
129
|
+
if (relPosix.length === 0)
|
|
130
|
+
return true;
|
|
131
|
+
const parts = relPosix.split('/');
|
|
132
|
+
for (const p of parts) {
|
|
133
|
+
if (PRUNE_DIRS.includes(p))
|
|
134
|
+
return true;
|
|
135
|
+
}
|
|
136
|
+
return false;
|
|
137
|
+
};
|
|
138
|
+
const isParseable = (relPosix) => {
|
|
139
|
+
const ext = extname(relPosix).toLowerCase();
|
|
140
|
+
return PARSEABLE_EXTENSIONS.includes(ext);
|
|
141
|
+
};
|
|
142
|
+
// ---- queue + debounce -------------------------------------------------
|
|
143
|
+
/** Files queued для re-parse on the next flush. */
|
|
144
|
+
const dirty = new Set();
|
|
145
|
+
/** Files queued для unlink-cascade on the next flush. */
|
|
146
|
+
const removed = new Set();
|
|
147
|
+
let timer = null;
|
|
148
|
+
/** Tracks the in-progress flush so close() can await it. */
|
|
149
|
+
let flushing = null;
|
|
150
|
+
/** Once closed, ignore further events. */
|
|
151
|
+
let closed = false;
|
|
152
|
+
const armDebounce = () => {
|
|
153
|
+
if (closed)
|
|
154
|
+
return;
|
|
155
|
+
if (timer)
|
|
156
|
+
clearTimeout(timer);
|
|
157
|
+
timer = setTimeout(() => {
|
|
158
|
+
timer = null;
|
|
159
|
+
void flush();
|
|
160
|
+
}, debounceMs);
|
|
161
|
+
};
|
|
162
|
+
/**
|
|
163
|
+
* Drain the queue. Sequential per-file work - the parser is async,
|
|
164
|
+
* SQLite handles are sync but already wrapped in per-call BEGIN/
|
|
165
|
+
* COMMIT inside db.ts, so concurrency would only add lock contention
|
|
166
|
+
* without throughput gain on a single-process watcher.
|
|
167
|
+
*/
|
|
168
|
+
const flush = async () => {
|
|
169
|
+
// If a flush is already in-flight, queue behind it. We serialize
|
|
170
|
+
// so the dirty Set drained by one flush never collides with one
|
|
171
|
+
// mutated by another.
|
|
172
|
+
if (flushing) {
|
|
173
|
+
await flushing;
|
|
174
|
+
// Nothing new since the previous flush picked it up? Bail.
|
|
175
|
+
if (dirty.size === 0 && removed.size === 0)
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
flushing = doFlush().finally(() => {
|
|
179
|
+
flushing = null;
|
|
180
|
+
});
|
|
181
|
+
return flushing;
|
|
182
|
+
};
|
|
183
|
+
const doFlush = async () => {
|
|
184
|
+
const snapshotDirty = Array.from(dirty);
|
|
185
|
+
dirty.clear();
|
|
186
|
+
const snapshotRemoved = Array.from(removed);
|
|
187
|
+
removed.clear();
|
|
188
|
+
// Process removals first - if a file was deleted and re-added in
|
|
189
|
+
// the same window we want the add half to write fresh rows.
|
|
190
|
+
for (const relPath of snapshotRemoved) {
|
|
191
|
+
try {
|
|
192
|
+
deleteFile(opts.db, relPath);
|
|
193
|
+
}
|
|
194
|
+
catch (err) {
|
|
195
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
196
|
+
if (!quiet) {
|
|
197
|
+
writeStderr(`pugi index --watch: deleteFile(${relPath}) failed: ${message}\n`);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
for (const relPath of snapshotDirty) {
|
|
202
|
+
// Drop файлы that were also removed in this batch - the unlink
|
|
203
|
+
// above already wiped them.
|
|
204
|
+
if (snapshotRemoved.includes(relPath))
|
|
205
|
+
continue;
|
|
206
|
+
const abs = resolve(root, relPath);
|
|
207
|
+
let bytes;
|
|
208
|
+
try {
|
|
209
|
+
bytes = await readFile(abs, 'utf8');
|
|
210
|
+
}
|
|
211
|
+
catch {
|
|
212
|
+
// Disappeared between event и flush - treat as unlink.
|
|
213
|
+
try {
|
|
214
|
+
deleteFile(opts.db, relPath);
|
|
215
|
+
}
|
|
216
|
+
catch (err) {
|
|
217
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
218
|
+
if (!quiet) {
|
|
219
|
+
writeStderr(`pugi index --watch: post-disappear deleteFile(${relPath}) failed: ${message}\n`);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
let result;
|
|
225
|
+
try {
|
|
226
|
+
result = await parseFile(abs, {
|
|
227
|
+
relPath,
|
|
228
|
+
quiet: true,
|
|
229
|
+
sourceOverride: bytes,
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
catch (err) {
|
|
233
|
+
stats.parseErrors += 1;
|
|
234
|
+
if (!quiet) {
|
|
235
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
236
|
+
writeStderr(`pugi index --watch: parse(${relPath}) failed: ${message}\n`);
|
|
237
|
+
}
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
// Clear prior rows for this file before re-inserting. Cheap
|
|
241
|
+
// даже if the file is new - the lookup hits the `idx_symbols_file`
|
|
242
|
+
// index и returns zero rows.
|
|
243
|
+
try {
|
|
244
|
+
deleteFile(opts.db, relPath);
|
|
245
|
+
}
|
|
246
|
+
catch (err) {
|
|
247
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
248
|
+
if (!quiet) {
|
|
249
|
+
writeStderr(`pugi index --watch: deleteFile(${relPath}) failed: ${message}\n`);
|
|
250
|
+
}
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
if (result.symbols.length === 0) {
|
|
254
|
+
// Record the file fingerprint so a future "did this change?"
|
|
255
|
+
// query can short-circuit. Only when we got далеко enough to
|
|
256
|
+
// recognise the language - skipping unsupported extensions
|
|
257
|
+
// earlier means we never see them here.
|
|
258
|
+
if (result.language !== null) {
|
|
259
|
+
try {
|
|
260
|
+
upsertFile(opts.db, {
|
|
261
|
+
path: relPath,
|
|
262
|
+
sha256: sha256(bytes),
|
|
263
|
+
lastIndexedAt: new Date().toISOString(),
|
|
264
|
+
symbolCount: 0,
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
catch (err) {
|
|
268
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
269
|
+
if (!quiet) {
|
|
270
|
+
writeStderr(`pugi index --watch: upsertFile(${relPath}) failed: ${message}\n`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
stats.reparsed += 1;
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
let ids;
|
|
278
|
+
try {
|
|
279
|
+
ids = insertSymbols(opts.db, result.symbols);
|
|
280
|
+
}
|
|
281
|
+
catch (err) {
|
|
282
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
283
|
+
if (!quiet) {
|
|
284
|
+
writeStderr(`pugi index --watch: insertSymbols(${relPath}) failed: ${message}\n`);
|
|
285
|
+
}
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
const perFile = buildPerFileNameMap(result.symbols, ids);
|
|
289
|
+
const { resolved, orphaned } = resolvePendingEdges(result.pendingEdges, perFile, (name) => crossFileLookup(opts.db, name));
|
|
290
|
+
stats.edgesOrphaned += orphaned;
|
|
291
|
+
if (resolved.length > 0) {
|
|
292
|
+
try {
|
|
293
|
+
insertEdges(opts.db, resolved);
|
|
294
|
+
stats.edgesInserted += resolved.length;
|
|
295
|
+
}
|
|
296
|
+
catch (err) {
|
|
297
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
298
|
+
if (!quiet) {
|
|
299
|
+
writeStderr(`pugi index --watch: insertEdges(${relPath}) failed: ${message}\n`);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
try {
|
|
304
|
+
upsertFile(opts.db, {
|
|
305
|
+
path: relPath,
|
|
306
|
+
sha256: sha256(bytes),
|
|
307
|
+
lastIndexedAt: new Date().toISOString(),
|
|
308
|
+
symbolCount: ids.length,
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
catch (err) {
|
|
312
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
313
|
+
if (!quiet) {
|
|
314
|
+
writeStderr(`pugi index --watch: upsertFile(${relPath}) failed: ${message}\n`);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
stats.reparsed += 1;
|
|
318
|
+
}
|
|
319
|
+
};
|
|
320
|
+
// ---- chokidar wiring --------------------------------------------------
|
|
321
|
+
// We watch the workspace root recursively and let our own filter
|
|
322
|
+
// drop unparseable extensions + excluded directories. Chokidar's
|
|
323
|
+
// built-in `ignored` accepts glob/regex/function; we use a function
|
|
324
|
+
// form so we can re-use the prune list verbatim instead of
|
|
325
|
+
// duplicating it as a glob array.
|
|
326
|
+
const ignored = (p) => {
|
|
327
|
+
// chokidar may pass absolute or workspace-relative paths depending
|
|
328
|
+
// on platform и `cwd` option; normalize then reuse the predicate.
|
|
329
|
+
const abs = resolve(p);
|
|
330
|
+
const rel = toRelPosix(abs);
|
|
331
|
+
if (rel.length === 0)
|
|
332
|
+
return false; // root itself - keep watching
|
|
333
|
+
return isExcluded(rel);
|
|
334
|
+
};
|
|
335
|
+
const watcher = watchFn(root, {
|
|
336
|
+
ignored,
|
|
337
|
+
ignoreInitial: true,
|
|
338
|
+
persistent: true,
|
|
339
|
+
followSymlinks: false,
|
|
340
|
+
awaitWriteFinish: {
|
|
341
|
+
stabilityThreshold: Math.min(50, Math.max(10, Math.floor(debounceMs / 4))),
|
|
342
|
+
pollInterval: 10,
|
|
343
|
+
},
|
|
344
|
+
});
|
|
345
|
+
const onChange = (kind, abs) => {
|
|
346
|
+
if (closed)
|
|
347
|
+
return;
|
|
348
|
+
const rel = toRelPosix(abs);
|
|
349
|
+
if (isExcluded(rel) || !isParseable(rel))
|
|
350
|
+
return;
|
|
351
|
+
if (kind === 'add')
|
|
352
|
+
stats.added += 1;
|
|
353
|
+
else
|
|
354
|
+
stats.changed += 1;
|
|
355
|
+
dirty.add(rel);
|
|
356
|
+
armDebounce();
|
|
357
|
+
};
|
|
358
|
+
const onUnlink = (abs) => {
|
|
359
|
+
if (closed)
|
|
360
|
+
return;
|
|
361
|
+
const rel = toRelPosix(abs);
|
|
362
|
+
if (isExcluded(rel) || !isParseable(rel))
|
|
363
|
+
return;
|
|
364
|
+
stats.removed += 1;
|
|
365
|
+
// If the file is also pending как dirty, drop the dirty marker -
|
|
366
|
+
// the unlink wins.
|
|
367
|
+
dirty.delete(rel);
|
|
368
|
+
removed.add(rel);
|
|
369
|
+
armDebounce();
|
|
370
|
+
};
|
|
371
|
+
watcher.on('add', (p) => onChange('add', p));
|
|
372
|
+
watcher.on('change', (p) => onChange('change', p));
|
|
373
|
+
watcher.on('unlink', (p) => onUnlink(p));
|
|
374
|
+
watcher.on('error', (err) => {
|
|
375
|
+
stats.watcherErrors += 1;
|
|
376
|
+
if (!quiet) {
|
|
377
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
378
|
+
writeStderr(`pugi index --watch: chokidar error: ${message}\n`);
|
|
379
|
+
}
|
|
380
|
+
});
|
|
381
|
+
const close = async () => {
|
|
382
|
+
if (closed)
|
|
383
|
+
return;
|
|
384
|
+
closed = true;
|
|
385
|
+
if (timer) {
|
|
386
|
+
clearTimeout(timer);
|
|
387
|
+
timer = null;
|
|
388
|
+
}
|
|
389
|
+
// Drain any pending events before closing the watcher.
|
|
390
|
+
if (dirty.size > 0 || removed.size > 0) {
|
|
391
|
+
try {
|
|
392
|
+
await flush();
|
|
393
|
+
}
|
|
394
|
+
catch (err) {
|
|
395
|
+
if (!quiet) {
|
|
396
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
397
|
+
writeStderr(`pugi index --watch: close-time flush failed: ${message}\n`);
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
// Wait for any in-progress flush even if the queue was empty.
|
|
402
|
+
if (flushing) {
|
|
403
|
+
try {
|
|
404
|
+
await flushing;
|
|
405
|
+
}
|
|
406
|
+
catch {
|
|
407
|
+
/* already surfaced via writeStderr above */
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
await watcher.close();
|
|
411
|
+
};
|
|
412
|
+
const flushNow = async () => {
|
|
413
|
+
if (timer) {
|
|
414
|
+
clearTimeout(timer);
|
|
415
|
+
timer = null;
|
|
416
|
+
}
|
|
417
|
+
await flush();
|
|
418
|
+
};
|
|
419
|
+
return {
|
|
420
|
+
close,
|
|
421
|
+
flushNow,
|
|
422
|
+
stats: () => ({ ...stats }),
|
|
423
|
+
};
|
|
424
|
+
}
|
|
425
|
+
/**
|
|
426
|
+
* Cross-file target resolver backed by the live `symbols` table.
|
|
427
|
+
* Returns the first matching row id - deterministic stability via the
|
|
428
|
+
* autoincrement order, mirrors `reindex.ts`'s "first-write-wins"
|
|
429
|
+
* convention. `null` when no symbol carries the requested name.
|
|
430
|
+
*/
|
|
431
|
+
function crossFileLookup(db, name) {
|
|
432
|
+
const row = db.conn
|
|
433
|
+
.prepare('SELECT id FROM symbols WHERE name = ? ORDER BY id ASC LIMIT 1')
|
|
434
|
+
.get(name);
|
|
435
|
+
return row?.id ?? null;
|
|
436
|
+
}
|
|
437
|
+
function sha256(s) {
|
|
438
|
+
return createHash('sha256').update(s).digest('hex');
|
|
439
|
+
}
|
|
440
|
+
//# sourceMappingURL=watcher.js.map
|
|
@@ -24,21 +24,16 @@ export function probeSandbox(ctx) {
|
|
|
24
24
|
extraWritePaths,
|
|
25
25
|
});
|
|
26
26
|
if (state.armed) {
|
|
27
|
-
//
|
|
28
|
-
//
|
|
29
|
-
//
|
|
30
|
-
//
|
|
31
|
-
//
|
|
32
|
-
// jailed when they still run with full process privileges. We
|
|
33
|
-
// surface 'warn' with a precise reason instead and flip к 'ok'
|
|
34
|
-
// when the runner indirection lands.
|
|
27
|
+
// Phase 1 #302 — bash runner indirection is now wired (the
|
|
28
|
+
// sandbox wrap fires for every bash spawn / spawnSync site). We
|
|
29
|
+
// can finally promote 'warn' → 'ok' when the adapter probes
|
|
30
|
+
// armed. Operators get an unambiguous signal: "the OS sandbox
|
|
31
|
+
// really is wrapping your bash calls right now."
|
|
35
32
|
return {
|
|
36
33
|
name: 'SANDBOX',
|
|
37
|
-
status: '
|
|
38
|
-
detail: `
|
|
34
|
+
status: 'ok',
|
|
35
|
+
detail: `armed (mode=${state.mode}). Bash dispatches go through the OS sandbox. ` +
|
|
39
36
|
`Adapter posture: ${state.details.join('; ')}`,
|
|
40
|
-
remediation: 'The seatbelt adapter is in-tree and exercised by tests; the bash runner indirection that consumes it lands in a follow-up. ' +
|
|
41
|
-
'Bash classifier denylist + permission FSM remain in force in the meantime.',
|
|
42
37
|
};
|
|
43
38
|
}
|
|
44
39
|
// Not armed — distinguish "operator chose none" from "configured
|
|
@@ -69,6 +69,36 @@ const CUSTOMER_APP_DEFAULTS = [
|
|
|
69
69
|
' - Locale: default to en-US for date/number formatting unless overridden.',
|
|
70
70
|
'The operator\'s CHAT language is NOT the target market language. A brief sent in Russian or Ukrainian does NOT imply the generated app should ship in Russian / Ukrainian / ₽ / ₴. If the target market is ambiguous, ask one clarifying question OR ship the USD + English default and surface the choice in your final answer so the operator can override.',
|
|
71
71
|
].join('\n');
|
|
72
|
+
/**
|
|
73
|
+
* PR N (2026-06-05) - simplicity criterion for customer apps (task #112).
|
|
74
|
+
*
|
|
75
|
+
* Hiroshi (dev persona) tends к overengineer customer apps. The
|
|
76
|
+
* Pizza Finder dogfood demo shipped 8 features + multi-filter UI
|
|
77
|
+
* when "MVP" would have been 3 pizzas + name search. CEO directive
|
|
78
|
+
* 2026-06-05: bake a simplicity criterion into the code/build
|
|
79
|
+
* prompt so the first turn ships the smallest shape that proves
|
|
80
|
+
* the brief.
|
|
81
|
+
*
|
|
82
|
+
* Pattern reference: Karpathy speedrun (smallest end-to-end loop
|
|
83
|
+
* first, then add capacity). Shipping the maximalist version on
|
|
84
|
+
* turn 1 wastes tokens AND makes the demo harder к reason about;
|
|
85
|
+
* ship the simplest shape, ask "want me to add X next?" only if
|
|
86
|
+
* context strongly hints the brief implies it.
|
|
87
|
+
*
|
|
88
|
+
* Memory pointer: backlog task #112 (P1, Simplicity criterion baked
|
|
89
|
+
* into Hiroshi system prompt).
|
|
90
|
+
*/
|
|
91
|
+
const SIMPLICITY_CRITERION = [
|
|
92
|
+
'# Simplicity criterion',
|
|
93
|
+
'When implementing a customer feature, default to the SIMPLEST shape that proves the brief:',
|
|
94
|
+
' - Prefer 3 sample items over 8. Operator can ask for more.',
|
|
95
|
+
' - Prefer 1 filter over 5. Operator can ask for more.',
|
|
96
|
+
' - Prefer 1 sort option over 3. Operator can ask for more.',
|
|
97
|
+
' - Hardcoded data over fetching, until the operator names a backend.',
|
|
98
|
+
' - Inline CSS over Tailwind / library imports, for single-file demos.',
|
|
99
|
+
' - Single file over multi-file, until the operator names structure.',
|
|
100
|
+
'The brief tells you WHAT to build. The operator\'s NEXT brief tells you HOW MUCH to add. Shipping the maximalist version on turn 1 wastes tokens AND makes the demo harder to reason about. Ship the simplest shape; ask "want me to add X next?" only if context strongly hints the brief implies it.',
|
|
101
|
+
].join('\n');
|
|
72
102
|
/**
|
|
73
103
|
* PR I (2026-06-05) — server-kill guidance.
|
|
74
104
|
*
|
|
@@ -193,6 +223,7 @@ function baseSystemPromptFor(kind) {
|
|
|
193
223
|
'Command: `pugi code`. The operator gave you a feature request or refactor. Implement it end-to-end.',
|
|
194
224
|
EDIT_FLOW_RULES,
|
|
195
225
|
CUSTOMER_APP_DEFAULTS,
|
|
226
|
+
SIMPLICITY_CRITERION,
|
|
196
227
|
SERVER_KILL_HINT,
|
|
197
228
|
'If the request is ambiguous, ask one clarifying question by returning a text answer instead of editing.',
|
|
198
229
|
].join('\n\n');
|
|
@@ -224,6 +255,7 @@ function baseSystemPromptFor(kind) {
|
|
|
224
255
|
'Command: `pugi build`. The operator wants you to scaffold a feature across multiple files.',
|
|
225
256
|
EDIT_FLOW_RULES,
|
|
226
257
|
CUSTOMER_APP_DEFAULTS,
|
|
258
|
+
SIMPLICITY_CRITERION,
|
|
227
259
|
SERVER_KILL_HINT,
|
|
228
260
|
'Group related edits, run lint/test via bash where it adds confidence, and list every file you created or modified in the final answer.',
|
|
229
261
|
].join('\n\n');
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Append-only TSV ledger for pugi-eval-v1 results.
|
|
3
|
+
*
|
|
4
|
+
* Pattern source: backlog #110 (Karpathy autoresearch). The ledger
|
|
5
|
+
* is git-tracked, never rewritten in place, never edited by hand. The
|
|
6
|
+
* column set is frozen as of schema v1; new columns must land in
|
|
7
|
+
* eval-v2 with a separate ledger file.
|
|
8
|
+
*
|
|
9
|
+
* Columns (tab-separated, in this exact order):
|
|
10
|
+
*
|
|
11
|
+
* timestamp UTC ISO 8601
|
|
12
|
+
* git_sha short sha of repo HEAD at run start
|
|
13
|
+
* task_id frozen task id (`NN-slug`)
|
|
14
|
+
* model engine model identifier or `(default)`
|
|
15
|
+
* status pass | fail | budget_exhausted | timeout | engine_error
|
|
16
|
+
* pugi_score per-task score, 2 decimal places
|
|
17
|
+
* tokens tokens reported by engine
|
|
18
|
+
* turns engine turn count
|
|
19
|
+
* tool_calls tool calls executed
|
|
20
|
+
* wall_ms wall-clock duration (ms)
|
|
21
|
+
* exit_code CLI subprocess exit code
|
|
22
|
+
* verifications `<passed>/<total>`
|
|
23
|
+
*
|
|
24
|
+
* Bytes containing TAB or NEWLINE are stripped from string fields
|
|
25
|
+
* before write so a single line is always one TSV record.
|
|
26
|
+
*/
|
|
27
|
+
import { appendFileSync, existsSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
28
|
+
import { dirname } from 'node:path';
|
|
29
|
+
export const LEDGER_COLUMNS = [
|
|
30
|
+
'timestamp',
|
|
31
|
+
'git_sha',
|
|
32
|
+
'task_id',
|
|
33
|
+
'model',
|
|
34
|
+
'status',
|
|
35
|
+
'pugi_score',
|
|
36
|
+
'tokens',
|
|
37
|
+
'turns',
|
|
38
|
+
'tool_calls',
|
|
39
|
+
'wall_ms',
|
|
40
|
+
'exit_code',
|
|
41
|
+
'verifications',
|
|
42
|
+
];
|
|
43
|
+
export const LEDGER_HEADER = LEDGER_COLUMNS.join('\t');
|
|
44
|
+
function safe(s) {
|
|
45
|
+
return s.replace(/[\t\r\n]+/g, ' ');
|
|
46
|
+
}
|
|
47
|
+
export function formatLedgerLine(row) {
|
|
48
|
+
const { timestamp, gitSha, model, result } = row;
|
|
49
|
+
const passed = result.verifications.filter((v) => v.passed).length;
|
|
50
|
+
const total = result.verifications.length;
|
|
51
|
+
const cells = [
|
|
52
|
+
safe(timestamp),
|
|
53
|
+
safe(gitSha),
|
|
54
|
+
safe(result.taskId),
|
|
55
|
+
safe(model),
|
|
56
|
+
safe(result.status),
|
|
57
|
+
result.pugiScore.toFixed(2),
|
|
58
|
+
String(result.tokensUsed),
|
|
59
|
+
String(result.turnsUsed),
|
|
60
|
+
String(result.toolCallCount),
|
|
61
|
+
String(result.wallClockMs),
|
|
62
|
+
String(result.exitCode),
|
|
63
|
+
`${passed}/${total}`,
|
|
64
|
+
];
|
|
65
|
+
return cells.join('\t');
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Append a single row. Creates the file with the header if it does
|
|
69
|
+
* not yet exist; otherwise appends a single line. Never rewrites
|
|
70
|
+
* existing content.
|
|
71
|
+
*/
|
|
72
|
+
export function appendLedgerRow(ledgerPath, row) {
|
|
73
|
+
mkdirSync(dirname(ledgerPath), { recursive: true });
|
|
74
|
+
if (!existsSync(ledgerPath)) {
|
|
75
|
+
writeFileSync(ledgerPath, `${LEDGER_HEADER}\n`, { mode: 0o644 });
|
|
76
|
+
}
|
|
77
|
+
appendFileSync(ledgerPath, `${formatLedgerLine(row)}\n`);
|
|
78
|
+
}
|
|
79
|
+
export function appendLedgerRows(ledgerPath, rows) {
|
|
80
|
+
for (const row of rows)
|
|
81
|
+
appendLedgerRow(ledgerPath, row);
|
|
82
|
+
}
|
|
83
|
+
//# sourceMappingURL=ledger.js.map
|