@pugi/cli 0.1.0-beta.100 → 0.1.0-beta.101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +2 -0
  2. package/dist/core/codegraph/parser.js +574 -47
  3. package/dist/core/codegraph/queries/go.scm +57 -0
  4. package/dist/core/codegraph/queries/javascript.scm +56 -0
  5. package/dist/core/codegraph/queries/python.scm +55 -0
  6. package/dist/core/codegraph/queries/rust.scm +63 -0
  7. package/dist/core/codegraph/queries/typescript.scm +91 -0
  8. package/dist/core/codegraph/reindex.js +218 -0
  9. package/dist/core/codegraph/resolve-edges.js +107 -0
  10. package/dist/core/codegraph/watcher.js +440 -0
  11. package/dist/core/diagnostics/probes/sandbox.js +7 -12
  12. package/dist/core/engine/prompts.js +32 -0
  13. package/dist/core/eval/v1/ledger.js +83 -0
  14. package/dist/core/eval/v1/runner.js +280 -0
  15. package/dist/core/eval/v1/scoring.js +68 -0
  16. package/dist/core/eval/v1/task-loader.js +191 -0
  17. package/dist/core/eval/v1/types.js +14 -0
  18. package/dist/core/eval/v1/verifier.js +176 -0
  19. package/dist/core/eval/v1/yaml-parser.js +250 -0
  20. package/dist/core/sandboxing/adapter.js +31 -17
  21. package/dist/core/sandboxing/bubblewrap.js +209 -0
  22. package/dist/core/sandboxing/index.js +32 -3
  23. package/dist/core/sandboxing/policy.js +97 -0
  24. package/dist/core/sandboxing/seatbelt.js +69 -21
  25. package/dist/core/settings.js +31 -7
  26. package/dist/runtime/cli.js +58 -0
  27. package/dist/runtime/commands/eval-v1.js +266 -0
  28. package/dist/runtime/commands/index-cmd.js +125 -19
  29. package/dist/runtime/commands/servers-cli.js +182 -0
  30. package/dist/runtime/version.js +1 -1
  31. package/dist/tools/bash.js +187 -3
  32. package/package.json +10 -3
@@ -0,0 +1,440 @@
1
+ /**
2
+ * Pugi local symbol index - chokidar file watcher (PR L2).
3
+ *
4
+ * Live auto-sync layer на top of the parser (PR L1) и DB (PR L0). When
5
+ * the operator runs `pugi index --watch`, this module:
6
+ *
7
+ * 1. Spins up a chokidar watcher rooted at `workspaceRoot`.
8
+ * 2. Filters events to the v1 parseable extensions
9
+ * (`PARSEABLE_EXTENSIONS` from parser.ts).
10
+ * 3. Coalesces bursts via a 2-second debounce (configurable; tests
11
+ * pass `debounceMs: 50` to keep specs fast).
12
+ * 4. On flush: per file -> `deleteFile` cascade -> `parseFile` ->
13
+ * `insertSymbols` -> resolve `pendingEdges` -> `insertEdges`.
14
+ * 5. On `unlink`: `deleteFile` only (cascade removes symbols + edges
15
+ * via the schema's FK ON DELETE CASCADE).
16
+ *
17
+ * Architectural decisions:
18
+ *
19
+ * - **No nested transactions.** `deleteFile` / `insertSymbols` /
20
+ * `insertEdges` each open + commit their own BEGIN block. We call
21
+ * them sequentially and rely on per-call atomicity. A higher-level
22
+ * SAVEPOINT wrapper was considered и rejected: chokidar batches are
23
+ * small (typically 1-5 files) and the per-file work is already
24
+ * transactional, so the extra level adds complexity without a
25
+ * customer-visible win.
26
+ *
27
+ * - **Two-step edge resolution.** `parseFile` returns `pendingEdges`
28
+ * keyed by name + scope. We resolve them after `insertSymbols`
29
+ * populates ids, using the shared `resolvePendingEdges` helper.
30
+ * Cross-file targets are looked up via a SQL `SELECT id FROM
31
+ * symbols WHERE name = ?` closure - the live DB IS the cross-file
32
+ * map for the watcher. Reindex builds an in-memory map because it
33
+ * sees every file in one pass; the watcher sees files one delta at
34
+ * a time.
35
+ *
36
+ * - **Path normalization.** Chokidar emits absolute or cwd-relative
37
+ * paths depending on options. We always work in workspace-relative
38
+ * POSIX form (`path.relative(root, abs).split(sep).join('/')`)
39
+ * because that is what `reindex.ts` writes to `Symbol.file` and
40
+ * `files.path`. Mismatched separators on Windows would break the
41
+ * FK + the `deleteFile` lookup.
42
+ *
43
+ * - **Exclude list parity.** We use the EXACT same `PRUNE_DIRS` as
44
+ * `reindex.ts` (extended slightly with the v1 extensions the spec
45
+ * pinned). Divergence between the walker and the watcher would
46
+ * silently produce different indexes depending on entry point.
47
+ *
48
+ * - **In-flight queue at close.** SIGINT / SIGTERM trigger
49
+ * `close()`. If a debounce timer is still pending, we flush it
50
+ * synchronously before tearing down chokidar - dropping a pending
51
+ * edit would leave the index out of sync until the next manual
52
+ * `pugi index` run.
53
+ *
54
+ * - **Initial scan is the operator's job.** `ignoreInitial: true`
55
+ * means we do NOT re-index the workspace on startup. The CLI
56
+ * command runs `reindexWorkspace` first (covered by PR L1) и THEN
57
+ * starts the watcher; this avoids a double-pass over the
58
+ * workspace.
59
+ */
60
+ import { existsSync } from 'node:fs';
61
+ import { readFile } from 'node:fs/promises';
62
+ import { createHash } from 'node:crypto';
63
+ import { relative, resolve, sep, extname } from 'node:path';
64
+ import chokidar from 'chokidar';
65
+ import { deleteFile, insertEdges, insertSymbols, upsertFile, } from './db.js';
66
+ import { PARSEABLE_EXTENSIONS, parseFile } from './parser.js';
67
+ import { buildPerFileNameMap, resolvePendingEdges, } from './resolve-edges.js';
68
+ /**
69
+ * Directories pruned from the watch. EXACT mirror of
70
+ * `reindex.ts:PRUNE_DIRS` so the walker и watcher agree on what
71
+ * "workspace" means. Touching this list requires touching both
72
+ * sites.
73
+ */
74
+ const PRUNE_DIRS = Object.freeze([
75
+ 'node_modules',
76
+ '.git',
77
+ 'dist',
78
+ 'build',
79
+ '.next',
80
+ '.turbo',
81
+ '.cache',
82
+ 'coverage',
83
+ '.pugi',
84
+ ]);
85
+ /** Default debounce window between burst-of-events and flush. */
86
+ const DEFAULT_DEBOUNCE_MS = 2000;
87
+ /**
88
+ * Start a chokidar watcher rooted at `workspaceRoot` against the
89
+ * caller-owned IndexDB. Returns a handle the caller invokes к stop the
90
+ * watcher cleanly.
91
+ *
92
+ * The function is sync - the watcher is "started" the moment chokidar
93
+ * exists, even if it has не yet emitted `ready`. The first `add`/
94
+ * `change` events flow into the debounce queue immediately.
95
+ */
96
+ export function startWatcher(opts) {
97
+ const root = resolve(opts.workspaceRoot);
98
+ if (!existsSync(root)) {
99
+ throw new Error(`startWatcher: workspaceRoot does not exist: ${root}`);
100
+ }
101
+ const debounceMs = Math.max(0, opts.debounceMs ?? DEFAULT_DEBOUNCE_MS);
102
+ const writeStderr = opts.writeStderr ?? ((text) => void process.stderr.write(text));
103
+ const watchFn = opts.chokidarWatchFn ?? chokidar.watch;
104
+ const quiet = opts.quiet === true;
105
+ const stats = {
106
+ added: 0,
107
+ changed: 0,
108
+ removed: 0,
109
+ reparsed: 0,
110
+ edgesInserted: 0,
111
+ edgesOrphaned: 0,
112
+ parseErrors: 0,
113
+ watcherErrors: 0,
114
+ };
115
+ /** Workspace-relative POSIX path. */
116
+ const toRelPosix = (abs) => {
117
+ const rel = relative(root, abs);
118
+ if (rel.length === 0 || rel.startsWith('..'))
119
+ return '';
120
+ return rel.split(sep).join('/');
121
+ };
122
+ /**
123
+ * Defense in depth: chokidar matches on directory globs, but a
124
+ * watched-parent-dir event still might fire для excluded subpaths
125
+ * on some platforms. Re-check the relative path against the prune
126
+ * list before doing any work.
127
+ */
128
+ const isExcluded = (relPosix) => {
129
+ if (relPosix.length === 0)
130
+ return true;
131
+ const parts = relPosix.split('/');
132
+ for (const p of parts) {
133
+ if (PRUNE_DIRS.includes(p))
134
+ return true;
135
+ }
136
+ return false;
137
+ };
138
+ const isParseable = (relPosix) => {
139
+ const ext = extname(relPosix).toLowerCase();
140
+ return PARSEABLE_EXTENSIONS.includes(ext);
141
+ };
142
+ // ---- queue + debounce -------------------------------------------------
143
+ /** Files queued для re-parse on the next flush. */
144
+ const dirty = new Set();
145
+ /** Files queued для unlink-cascade on the next flush. */
146
+ const removed = new Set();
147
+ let timer = null;
148
+ /** Tracks the in-progress flush so close() can await it. */
149
+ let flushing = null;
150
+ /** Once closed, ignore further events. */
151
+ let closed = false;
152
+ const armDebounce = () => {
153
+ if (closed)
154
+ return;
155
+ if (timer)
156
+ clearTimeout(timer);
157
+ timer = setTimeout(() => {
158
+ timer = null;
159
+ void flush();
160
+ }, debounceMs);
161
+ };
162
+ /**
163
+ * Drain the queue. Sequential per-file work - the parser is async,
164
+ * SQLite handles are sync but already wrapped in per-call BEGIN/
165
+ * COMMIT inside db.ts, so concurrency would only add lock contention
166
+ * without throughput gain on a single-process watcher.
167
+ */
168
+ const flush = async () => {
169
+ // If a flush is already in-flight, queue behind it. We serialize
170
+ // so the dirty Set drained by one flush never collides with one
171
+ // mutated by another.
172
+ if (flushing) {
173
+ await flushing;
174
+ // Nothing new since the previous flush picked it up? Bail.
175
+ if (dirty.size === 0 && removed.size === 0)
176
+ return;
177
+ }
178
+ flushing = doFlush().finally(() => {
179
+ flushing = null;
180
+ });
181
+ return flushing;
182
+ };
183
+ const doFlush = async () => {
184
+ const snapshotDirty = Array.from(dirty);
185
+ dirty.clear();
186
+ const snapshotRemoved = Array.from(removed);
187
+ removed.clear();
188
+ // Process removals first - if a file was deleted and re-added in
189
+ // the same window we want the add half to write fresh rows.
190
+ for (const relPath of snapshotRemoved) {
191
+ try {
192
+ deleteFile(opts.db, relPath);
193
+ }
194
+ catch (err) {
195
+ const message = err instanceof Error ? err.message : String(err);
196
+ if (!quiet) {
197
+ writeStderr(`pugi index --watch: deleteFile(${relPath}) failed: ${message}\n`);
198
+ }
199
+ }
200
+ }
201
+ for (const relPath of snapshotDirty) {
202
+ // Drop файлы that were also removed in this batch - the unlink
203
+ // above already wiped them.
204
+ if (snapshotRemoved.includes(relPath))
205
+ continue;
206
+ const abs = resolve(root, relPath);
207
+ let bytes;
208
+ try {
209
+ bytes = await readFile(abs, 'utf8');
210
+ }
211
+ catch {
212
+ // Disappeared between event и flush - treat as unlink.
213
+ try {
214
+ deleteFile(opts.db, relPath);
215
+ }
216
+ catch (err) {
217
+ const message = err instanceof Error ? err.message : String(err);
218
+ if (!quiet) {
219
+ writeStderr(`pugi index --watch: post-disappear deleteFile(${relPath}) failed: ${message}\n`);
220
+ }
221
+ }
222
+ continue;
223
+ }
224
+ let result;
225
+ try {
226
+ result = await parseFile(abs, {
227
+ relPath,
228
+ quiet: true,
229
+ sourceOverride: bytes,
230
+ });
231
+ }
232
+ catch (err) {
233
+ stats.parseErrors += 1;
234
+ if (!quiet) {
235
+ const message = err instanceof Error ? err.message : String(err);
236
+ writeStderr(`pugi index --watch: parse(${relPath}) failed: ${message}\n`);
237
+ }
238
+ continue;
239
+ }
240
+ // Clear prior rows for this file before re-inserting. Cheap
241
+ // даже if the file is new - the lookup hits the `idx_symbols_file`
242
+ // index и returns zero rows.
243
+ try {
244
+ deleteFile(opts.db, relPath);
245
+ }
246
+ catch (err) {
247
+ const message = err instanceof Error ? err.message : String(err);
248
+ if (!quiet) {
249
+ writeStderr(`pugi index --watch: deleteFile(${relPath}) failed: ${message}\n`);
250
+ }
251
+ continue;
252
+ }
253
+ if (result.symbols.length === 0) {
254
+ // Record the file fingerprint so a future "did this change?"
255
+ // query can short-circuit. Only when we got далеко enough to
256
+ // recognise the language - skipping unsupported extensions
257
+ // earlier means we never see them here.
258
+ if (result.language !== null) {
259
+ try {
260
+ upsertFile(opts.db, {
261
+ path: relPath,
262
+ sha256: sha256(bytes),
263
+ lastIndexedAt: new Date().toISOString(),
264
+ symbolCount: 0,
265
+ });
266
+ }
267
+ catch (err) {
268
+ const message = err instanceof Error ? err.message : String(err);
269
+ if (!quiet) {
270
+ writeStderr(`pugi index --watch: upsertFile(${relPath}) failed: ${message}\n`);
271
+ }
272
+ }
273
+ }
274
+ stats.reparsed += 1;
275
+ continue;
276
+ }
277
+ let ids;
278
+ try {
279
+ ids = insertSymbols(opts.db, result.symbols);
280
+ }
281
+ catch (err) {
282
+ const message = err instanceof Error ? err.message : String(err);
283
+ if (!quiet) {
284
+ writeStderr(`pugi index --watch: insertSymbols(${relPath}) failed: ${message}\n`);
285
+ }
286
+ continue;
287
+ }
288
+ const perFile = buildPerFileNameMap(result.symbols, ids);
289
+ const { resolved, orphaned } = resolvePendingEdges(result.pendingEdges, perFile, (name) => crossFileLookup(opts.db, name));
290
+ stats.edgesOrphaned += orphaned;
291
+ if (resolved.length > 0) {
292
+ try {
293
+ insertEdges(opts.db, resolved);
294
+ stats.edgesInserted += resolved.length;
295
+ }
296
+ catch (err) {
297
+ const message = err instanceof Error ? err.message : String(err);
298
+ if (!quiet) {
299
+ writeStderr(`pugi index --watch: insertEdges(${relPath}) failed: ${message}\n`);
300
+ }
301
+ }
302
+ }
303
+ try {
304
+ upsertFile(opts.db, {
305
+ path: relPath,
306
+ sha256: sha256(bytes),
307
+ lastIndexedAt: new Date().toISOString(),
308
+ symbolCount: ids.length,
309
+ });
310
+ }
311
+ catch (err) {
312
+ const message = err instanceof Error ? err.message : String(err);
313
+ if (!quiet) {
314
+ writeStderr(`pugi index --watch: upsertFile(${relPath}) failed: ${message}\n`);
315
+ }
316
+ }
317
+ stats.reparsed += 1;
318
+ }
319
+ };
320
+ // ---- chokidar wiring --------------------------------------------------
321
+ // We watch the workspace root recursively and let our own filter
322
+ // drop unparseable extensions + excluded directories. Chokidar's
323
+ // built-in `ignored` accepts glob/regex/function; we use a function
324
+ // form so we can re-use the prune list verbatim instead of
325
+ // duplicating it as a glob array.
326
+ const ignored = (p) => {
327
+ // chokidar may pass absolute or workspace-relative paths depending
328
+ // on platform и `cwd` option; normalize then reuse the predicate.
329
+ const abs = resolve(p);
330
+ const rel = toRelPosix(abs);
331
+ if (rel.length === 0)
332
+ return false; // root itself - keep watching
333
+ return isExcluded(rel);
334
+ };
335
+ const watcher = watchFn(root, {
336
+ ignored,
337
+ ignoreInitial: true,
338
+ persistent: true,
339
+ followSymlinks: false,
340
+ awaitWriteFinish: {
341
+ stabilityThreshold: Math.min(50, Math.max(10, Math.floor(debounceMs / 4))),
342
+ pollInterval: 10,
343
+ },
344
+ });
345
+ const onChange = (kind, abs) => {
346
+ if (closed)
347
+ return;
348
+ const rel = toRelPosix(abs);
349
+ if (isExcluded(rel) || !isParseable(rel))
350
+ return;
351
+ if (kind === 'add')
352
+ stats.added += 1;
353
+ else
354
+ stats.changed += 1;
355
+ dirty.add(rel);
356
+ armDebounce();
357
+ };
358
+ const onUnlink = (abs) => {
359
+ if (closed)
360
+ return;
361
+ const rel = toRelPosix(abs);
362
+ if (isExcluded(rel) || !isParseable(rel))
363
+ return;
364
+ stats.removed += 1;
365
+ // If the file is also pending как dirty, drop the dirty marker -
366
+ // the unlink wins.
367
+ dirty.delete(rel);
368
+ removed.add(rel);
369
+ armDebounce();
370
+ };
371
+ watcher.on('add', (p) => onChange('add', p));
372
+ watcher.on('change', (p) => onChange('change', p));
373
+ watcher.on('unlink', (p) => onUnlink(p));
374
+ watcher.on('error', (err) => {
375
+ stats.watcherErrors += 1;
376
+ if (!quiet) {
377
+ const message = err instanceof Error ? err.message : String(err);
378
+ writeStderr(`pugi index --watch: chokidar error: ${message}\n`);
379
+ }
380
+ });
381
+ const close = async () => {
382
+ if (closed)
383
+ return;
384
+ closed = true;
385
+ if (timer) {
386
+ clearTimeout(timer);
387
+ timer = null;
388
+ }
389
+ // Drain any pending events before closing the watcher.
390
+ if (dirty.size > 0 || removed.size > 0) {
391
+ try {
392
+ await flush();
393
+ }
394
+ catch (err) {
395
+ if (!quiet) {
396
+ const message = err instanceof Error ? err.message : String(err);
397
+ writeStderr(`pugi index --watch: close-time flush failed: ${message}\n`);
398
+ }
399
+ }
400
+ }
401
+ // Wait for any in-progress flush even if the queue was empty.
402
+ if (flushing) {
403
+ try {
404
+ await flushing;
405
+ }
406
+ catch {
407
+ /* already surfaced via writeStderr above */
408
+ }
409
+ }
410
+ await watcher.close();
411
+ };
412
+ const flushNow = async () => {
413
+ if (timer) {
414
+ clearTimeout(timer);
415
+ timer = null;
416
+ }
417
+ await flush();
418
+ };
419
+ return {
420
+ close,
421
+ flushNow,
422
+ stats: () => ({ ...stats }),
423
+ };
424
+ }
425
+ /**
426
+ * Cross-file target resolver backed by the live `symbols` table.
427
+ * Returns the first matching row id - deterministic stability via the
428
+ * autoincrement order, mirrors `reindex.ts`'s "first-write-wins"
429
+ * convention. `null` when no symbol carries the requested name.
430
+ */
431
+ function crossFileLookup(db, name) {
432
+ const row = db.conn
433
+ .prepare('SELECT id FROM symbols WHERE name = ? ORDER BY id ASC LIMIT 1')
434
+ .get(name);
435
+ return row?.id ?? null;
436
+ }
437
+ function sha256(s) {
438
+ return createHash('sha256').update(s).digest('hex');
439
+ }
440
+ //# sourceMappingURL=watcher.js.map
@@ -24,21 +24,16 @@ export function probeSandbox(ctx) {
24
24
  extraWritePaths,
25
25
  });
26
26
  if (state.armed) {
27
- // Discipline-gap honesty (Trust Sprint thesis): the adapter
28
- // probes ok, but spawn-wrap is NOT yet wired into the bash
29
- // runner (that file is owned by another agent on PUGI-VERIFY-
30
- // GATE). Reporting status=ok would overstate the posture — an
31
- // operator reading 'armed' would assume their bash calls were
32
- // jailed when they still run with full process privileges. We
33
- // surface 'warn' with a precise reason instead and flip к 'ok'
34
- // when the runner indirection lands.
27
+ // Phase 1 #302 bash runner indirection is now wired (the
28
+ // sandbox wrap fires for every bash spawn / spawnSync site). We
29
+ // can finally promote 'warn' 'ok' when the adapter probes
30
+ // armed. Operators get an unambiguous signal: "the OS sandbox
31
+ // really is wrapping your bash calls right now."
35
32
  return {
36
33
  name: 'SANDBOX',
37
- status: 'warn',
38
- detail: `configured (mode=${state.mode}) but spawn-wrap not yet wired — bash dispatches still run with full process privileges. ` +
34
+ status: 'ok',
35
+ detail: `armed (mode=${state.mode}). Bash dispatches go through the OS sandbox. ` +
39
36
  `Adapter posture: ${state.details.join('; ')}`,
40
- remediation: 'The seatbelt adapter is in-tree and exercised by tests; the bash runner indirection that consumes it lands in a follow-up. ' +
41
- 'Bash classifier denylist + permission FSM remain in force in the meantime.',
42
37
  };
43
38
  }
44
39
  // Not armed — distinguish "operator chose none" from "configured
@@ -69,6 +69,36 @@ const CUSTOMER_APP_DEFAULTS = [
69
69
  ' - Locale: default to en-US for date/number formatting unless overridden.',
70
70
  'The operator\'s CHAT language is NOT the target market language. A brief sent in Russian or Ukrainian does NOT imply the generated app should ship in Russian / Ukrainian / ₽ / ₴. If the target market is ambiguous, ask one clarifying question OR ship the USD + English default and surface the choice in your final answer so the operator can override.',
71
71
  ].join('\n');
72
+ /**
73
+ * PR N (2026-06-05) - simplicity criterion for customer apps (task #112).
74
+ *
75
+ * Hiroshi (dev persona) tends к overengineer customer apps. The
76
+ * Pizza Finder dogfood demo shipped 8 features + multi-filter UI
77
+ * when "MVP" would have been 3 pizzas + name search. CEO directive
78
+ * 2026-06-05: bake a simplicity criterion into the code/build
79
+ * prompt so the first turn ships the smallest shape that proves
80
+ * the brief.
81
+ *
82
+ * Pattern reference: Karpathy speedrun (smallest end-to-end loop
83
+ * first, then add capacity). Shipping the maximalist version on
84
+ * turn 1 wastes tokens AND makes the demo harder к reason about;
85
+ * ship the simplest shape, ask "want me to add X next?" only if
86
+ * context strongly hints the brief implies it.
87
+ *
88
+ * Memory pointer: backlog task #112 (P1, Simplicity criterion baked
89
+ * into Hiroshi system prompt).
90
+ */
91
+ const SIMPLICITY_CRITERION = [
92
+ '# Simplicity criterion',
93
+ 'When implementing a customer feature, default to the SIMPLEST shape that proves the brief:',
94
+ ' - Prefer 3 sample items over 8. Operator can ask for more.',
95
+ ' - Prefer 1 filter over 5. Operator can ask for more.',
96
+ ' - Prefer 1 sort option over 3. Operator can ask for more.',
97
+ ' - Hardcoded data over fetching, until the operator names a backend.',
98
+ ' - Inline CSS over Tailwind / library imports, for single-file demos.',
99
+ ' - Single file over multi-file, until the operator names structure.',
100
+ 'The brief tells you WHAT to build. The operator\'s NEXT brief tells you HOW MUCH to add. Shipping the maximalist version on turn 1 wastes tokens AND makes the demo harder to reason about. Ship the simplest shape; ask "want me to add X next?" only if context strongly hints the brief implies it.',
101
+ ].join('\n');
72
102
  /**
73
103
  * PR I (2026-06-05) — server-kill guidance.
74
104
  *
@@ -193,6 +223,7 @@ function baseSystemPromptFor(kind) {
193
223
  'Command: `pugi code`. The operator gave you a feature request or refactor. Implement it end-to-end.',
194
224
  EDIT_FLOW_RULES,
195
225
  CUSTOMER_APP_DEFAULTS,
226
+ SIMPLICITY_CRITERION,
196
227
  SERVER_KILL_HINT,
197
228
  'If the request is ambiguous, ask one clarifying question by returning a text answer instead of editing.',
198
229
  ].join('\n\n');
@@ -224,6 +255,7 @@ function baseSystemPromptFor(kind) {
224
255
  'Command: `pugi build`. The operator wants you to scaffold a feature across multiple files.',
225
256
  EDIT_FLOW_RULES,
226
257
  CUSTOMER_APP_DEFAULTS,
258
+ SIMPLICITY_CRITERION,
227
259
  SERVER_KILL_HINT,
228
260
  'Group related edits, run lint/test via bash where it adds confidence, and list every file you created or modified in the final answer.',
229
261
  ].join('\n\n');
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Append-only TSV ledger for pugi-eval-v1 results.
3
+ *
4
+ * Pattern source: backlog #110 (Karpathy autoresearch). The ledger
5
+ * is git-tracked, never rewritten in place, never edited by hand. The
6
+ * column set is frozen as of schema v1; new columns must land in
7
+ * eval-v2 with a separate ledger file.
8
+ *
9
+ * Columns (tab-separated, in this exact order):
10
+ *
11
+ * timestamp UTC ISO 8601
12
+ * git_sha short sha of repo HEAD at run start
13
+ * task_id frozen task id (`NN-slug`)
14
+ * model engine model identifier or `(default)`
15
+ * status pass | fail | budget_exhausted | timeout | engine_error
16
+ * pugi_score per-task score, 2 decimal places
17
+ * tokens tokens reported by engine
18
+ * turns engine turn count
19
+ * tool_calls tool calls executed
20
+ * wall_ms wall-clock duration (ms)
21
+ * exit_code CLI subprocess exit code
22
+ * verifications `<passed>/<total>`
23
+ *
24
+ * Bytes containing TAB or NEWLINE are stripped from string fields
25
+ * before write so a single line is always one TSV record.
26
+ */
27
+ import { appendFileSync, existsSync, mkdirSync, writeFileSync } from 'node:fs';
28
+ import { dirname } from 'node:path';
29
+ export const LEDGER_COLUMNS = [
30
+ 'timestamp',
31
+ 'git_sha',
32
+ 'task_id',
33
+ 'model',
34
+ 'status',
35
+ 'pugi_score',
36
+ 'tokens',
37
+ 'turns',
38
+ 'tool_calls',
39
+ 'wall_ms',
40
+ 'exit_code',
41
+ 'verifications',
42
+ ];
43
+ export const LEDGER_HEADER = LEDGER_COLUMNS.join('\t');
44
+ function safe(s) {
45
+ return s.replace(/[\t\r\n]+/g, ' ');
46
+ }
47
+ export function formatLedgerLine(row) {
48
+ const { timestamp, gitSha, model, result } = row;
49
+ const passed = result.verifications.filter((v) => v.passed).length;
50
+ const total = result.verifications.length;
51
+ const cells = [
52
+ safe(timestamp),
53
+ safe(gitSha),
54
+ safe(result.taskId),
55
+ safe(model),
56
+ safe(result.status),
57
+ result.pugiScore.toFixed(2),
58
+ String(result.tokensUsed),
59
+ String(result.turnsUsed),
60
+ String(result.toolCallCount),
61
+ String(result.wallClockMs),
62
+ String(result.exitCode),
63
+ `${passed}/${total}`,
64
+ ];
65
+ return cells.join('\t');
66
+ }
67
+ /**
68
+ * Append a single row. Creates the file with the header if it does
69
+ * not yet exist; otherwise appends a single line. Never rewrites
70
+ * existing content.
71
+ */
72
+ export function appendLedgerRow(ledgerPath, row) {
73
+ mkdirSync(dirname(ledgerPath), { recursive: true });
74
+ if (!existsSync(ledgerPath)) {
75
+ writeFileSync(ledgerPath, `${LEDGER_HEADER}\n`, { mode: 0o644 });
76
+ }
77
+ appendFileSync(ledgerPath, `${formatLedgerLine(row)}\n`);
78
+ }
79
+ export function appendLedgerRows(ledgerPath, rows) {
80
+ for (const row of rows)
81
+ appendLedgerRow(ledgerPath, row);
82
+ }
83
+ //# sourceMappingURL=ledger.js.map