@pdpp/local-collector 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +48 -0
  2. package/dist/local-collector/bin/pdpp-local-collector.js +347 -0
  3. package/dist/local-collector/src/errors.d.ts +12 -0
  4. package/dist/local-collector/src/errors.js +20 -0
  5. package/dist/local-collector/src/runner.d.ts +16 -0
  6. package/dist/local-collector/src/runner.js +59 -0
  7. package/dist/polyfill-connectors/connectors/claude_code/index.js +806 -0
  8. package/dist/polyfill-connectors/connectors/claude_code/parsers.js +224 -0
  9. package/dist/polyfill-connectors/connectors/claude_code/schemas.js +120 -0
  10. package/dist/polyfill-connectors/connectors/claude_code/types.js +1 -0
  11. package/dist/polyfill-connectors/connectors/codex/index.js +880 -0
  12. package/dist/polyfill-connectors/connectors/codex/parsers.js +159 -0
  13. package/dist/polyfill-connectors/connectors/codex/schemas.js +118 -0
  14. package/dist/polyfill-connectors/connectors/codex/types.js +1 -0
  15. package/dist/polyfill-connectors/src/auth.js +76 -0
  16. package/dist/polyfill-connectors/src/browser-handoff.js +197 -0
  17. package/dist/polyfill-connectors/src/collector-protocol.d.ts +2 -0
  18. package/dist/polyfill-connectors/src/collector-protocol.js +2 -0
  19. package/dist/polyfill-connectors/src/collector-runner.d.ts +139 -0
  20. package/dist/polyfill-connectors/src/collector-runner.js +1084 -0
  21. package/dist/polyfill-connectors/src/connector-runtime-protocol.d.ts +191 -0
  22. package/dist/polyfill-connectors/src/connector-runtime-protocol.js +1 -0
  23. package/dist/polyfill-connectors/src/connector-runtime.js +879 -0
  24. package/dist/polyfill-connectors/src/fixture-capture.js +237 -0
  25. package/dist/polyfill-connectors/src/is-main-module.d.ts +1 -0
  26. package/dist/polyfill-connectors/src/is-main-module.js +17 -0
  27. package/dist/polyfill-connectors/src/local-device-client.d.ts +126 -0
  28. package/dist/polyfill-connectors/src/local-device-client.js +132 -0
  29. package/dist/polyfill-connectors/src/local-device-envelope.d.ts +26 -0
  30. package/dist/polyfill-connectors/src/local-device-envelope.js +43 -0
  31. package/dist/polyfill-connectors/src/local-device-outbox.d.ts +115 -0
  32. package/dist/polyfill-connectors/src/local-device-outbox.js +509 -0
  33. package/dist/polyfill-connectors/src/local-device-queue.d.ts +34 -0
  34. package/dist/polyfill-connectors/src/local-device-queue.js +133 -0
  35. package/dist/polyfill-connectors/src/local-source-inventory.js +119 -0
  36. package/dist/polyfill-connectors/src/pdpp-safe-text.js +13 -0
  37. package/dist/polyfill-connectors/src/runner/index.d.ts +11 -0
  38. package/dist/polyfill-connectors/src/runner/index.js +10 -0
  39. package/dist/polyfill-connectors/src/runtime-capabilities.d.ts +40 -0
  40. package/dist/polyfill-connectors/src/runtime-capabilities.js +59 -0
  41. package/dist/polyfill-connectors/src/safe-emit.d.ts +3 -0
  42. package/dist/polyfill-connectors/src/safe-emit.js +30 -0
  43. package/dist/polyfill-connectors/src/safe-text-preview.js +156 -0
  44. package/dist/polyfill-connectors/src/schema-registry.js +17 -0
  45. package/dist/polyfill-connectors/src/scope-filters.d.ts +38 -0
  46. package/dist/polyfill-connectors/src/scope-filters.js +80 -0
  47. package/dist/polyfill-connectors/src/shutdown-hook.js +51 -0
  48. package/dist/polyfill-connectors/src/streaming-target-registration.js +161 -0
  49. package/package.json +63 -0
@@ -0,0 +1,880 @@
1
+ #!/usr/bin/env node
2
+ import { createReadStream, existsSync, statSync } from "node:fs";
3
+ import { readdir, readFile, stat } from "node:fs/promises";
4
+ import { homedir } from "node:os";
5
+ import { join } from "node:path";
6
+ import { createInterface as createFileReader, createInterface } from "node:readline";
7
+ import { DatabaseSync } from "node:sqlite";
8
+ import { isMainModule } from "../../src/is-main-module.js";
9
+ import { buildLocalSourceInventory, listDirectoryInventory, } from "../../src/local-source-inventory.js";
10
+ import { stringifyForJsonl } from "../../src/safe-emit.js";
11
+ import { resourceSet } from "../../src/scope-filters.js";
12
+ import { buildPromptRecord, buildRolloutOnlySessionRecord, buildRuleRecord, buildSkillRecord, buildThreadSessionRecord, extendTimestampRange, extractMessageText, isRolloutFile, isSkippableRulesLine, parseFrontmatter, payloadOutputPreview, RULES_SUFFIX_RE, splitRulesLines, TWO_DIGIT_DIR_RE, textPreview, YEAR_DIR_RE, } from "./parsers.js";
13
+ import { validateRecord } from "./schemas.js";
14
+ const DEFAULT_ACTIVE_ROLLOUT_QUIET_MS = 120_000;
15
+ const ACTIVE_ROLLOUT_QUIET_MS_ENV = "PDPP_CODEX_ACTIVE_ROLLOUT_QUIET_MS";
16
+ let stdoutDrainPromise = null;
17
+ const emit = (m) => {
18
+ const ok = process.stdout.write(stringifyForJsonl(m));
19
+ if (!ok && stdoutDrainPromise === null) {
20
+ stdoutDrainPromise = new Promise((resolve) => {
21
+ process.stdout.once("drain", () => {
22
+ stdoutDrainPromise = null;
23
+ resolve();
24
+ });
25
+ });
26
+ }
27
+ };
28
+ async function waitForEmitDrain() {
29
+ if (stdoutDrainPromise !== null) {
30
+ await stdoutDrainPromise;
31
+ }
32
+ }
33
+ const flushAndExit = (code) => {
34
+ if (process.stdout.writableLength > 0) {
35
+ process.stdout.once("drain", () => process.exit(code));
36
+ setTimeout(() => process.exit(code), 3000).unref();
37
+ }
38
+ else {
39
+ process.exit(code);
40
+ }
41
+ };
42
+ const fail = (m, r = false) => {
43
+ emit({
44
+ type: "DONE",
45
+ status: "failed",
46
+ records_emitted: 0,
47
+ error: { message: m, retryable: r },
48
+ });
49
+ flushAndExit(1);
50
+ };
51
+ export const CODEX_KNOWN_LOCAL_STORES = [
52
+ {
53
+ store: "sessions",
54
+ relativePath: "sessions",
55
+ stream: "sessions",
56
+ classification: "collect",
57
+ reason: "declared rollout source",
58
+ },
59
+ {
60
+ store: "state_db",
61
+ relativePath: "state_5.sqlite",
62
+ stream: "sessions",
63
+ classification: "collect",
64
+ reason: "declared thread metadata source opened read-only",
65
+ },
66
+ {
67
+ store: "rules",
68
+ relativePath: "rules",
69
+ stream: "rules",
70
+ classification: "collect",
71
+ reason: "declared user-authored rules source",
72
+ },
73
+ {
74
+ store: "prompts",
75
+ relativePath: "prompts",
76
+ stream: "prompts",
77
+ classification: "collect",
78
+ reason: "declared user-authored prompts source",
79
+ },
80
+ {
81
+ store: "skills",
82
+ relativePath: "skills",
83
+ stream: "skills",
84
+ classification: "collect",
85
+ reason: "declared user-authored skills source",
86
+ },
87
+ {
88
+ store: "history",
89
+ relativePath: "history.jsonl",
90
+ stream: "history",
91
+ classification: "inventory_only",
92
+ reason: "metadata-only until prompt-history payload contract is approved",
93
+ },
94
+ {
95
+ store: "session_index",
96
+ relativePath: "session_index.jsonl",
97
+ stream: "session_index",
98
+ classification: "inventory_only",
99
+ reason: "metadata-only until session-index payload contract is approved",
100
+ },
101
+ {
102
+ store: "shell_snapshots",
103
+ relativePath: "shell-snapshots",
104
+ stream: "shell_snapshots",
105
+ classification: "inventory_only",
106
+ reason: "shell content requires redaction review before payload collection",
107
+ },
108
+ {
109
+ store: "memories",
110
+ relativePath: "memories",
111
+ stream: null,
112
+ classification: "inventory_only",
113
+ reason: "deferred private local store; diagnostics only until a general Codex memory surface is approved",
114
+ },
115
+ {
116
+ store: "context_mode",
117
+ relativePath: "context-mode",
118
+ stream: null,
119
+ classification: "inventory_only",
120
+ reason: "user-specific local convention; diagnostics only, not a general Codex stream",
121
+ },
122
+ {
123
+ store: "logs",
124
+ relativePath: "logs",
125
+ stream: "logs",
126
+ classification: "defer",
127
+ reason: "logs require deterministic redaction before collection",
128
+ },
129
+ {
130
+ store: "config",
131
+ relativePath: "config.toml",
132
+ stream: "config_inventory",
133
+ classification: "inventory_only",
134
+ reason: "configuration is inventoried without payload content",
135
+ },
136
+ {
137
+ store: "cache",
138
+ relativePath: "cache",
139
+ stream: "cache_inventory",
140
+ classification: "inventory_only",
141
+ reason: "raw cache payloads may contain sensitive tool output",
142
+ },
143
+ {
144
+ store: "auth",
145
+ relativePath: "auth.json",
146
+ stream: null,
147
+ classification: "exclude",
148
+ reason: "auth-adjacent credential material is never emitted",
149
+ },
150
+ ];
151
+ async function* iterJsonlLines(path) {
152
+ const r = createFileReader({
153
+ input: createReadStream(path, { encoding: "utf8" }),
154
+ terminal: false,
155
+ });
156
+ for await (const line of r) {
157
+ if (!line.trim()) {
158
+ continue;
159
+ }
160
+ try {
161
+ yield JSON.parse(line);
162
+ }
163
+ catch {
164
+ }
165
+ }
166
+ }
167
+ async function listIfExists(dir) {
168
+ try {
169
+ return await readdir(dir);
170
+ }
171
+ catch {
172
+ return null;
173
+ }
174
+ }
175
+ async function* walkDayFiles(dayPath, year, month, day) {
176
+ const files = await listIfExists(dayPath);
177
+ if (files === null) {
178
+ return;
179
+ }
180
+ for (const f of files) {
181
+ if (isRolloutFile(f)) {
182
+ yield { path: join(dayPath, f), year, month, day, file: f };
183
+ }
184
+ }
185
+ }
186
+ async function* walkMonthDays(monthPath, year, month) {
187
+ const days = await listIfExists(monthPath);
188
+ if (days === null) {
189
+ return;
190
+ }
191
+ for (const d of days) {
192
+ if (!TWO_DIGIT_DIR_RE.test(d)) {
193
+ continue;
194
+ }
195
+ yield* walkDayFiles(join(monthPath, d), year, month, d);
196
+ }
197
+ }
198
+ async function* walkYearMonths(yearPath, year) {
199
+ const months = await listIfExists(yearPath);
200
+ if (months === null) {
201
+ return;
202
+ }
203
+ for (const m of months) {
204
+ if (!TWO_DIGIT_DIR_RE.test(m)) {
205
+ continue;
206
+ }
207
+ yield* walkMonthDays(join(yearPath, m), year, m);
208
+ }
209
+ }
210
+ async function* walkRollouts(baseDir) {
211
+ const years = await listIfExists(baseDir);
212
+ if (years === null) {
213
+ return;
214
+ }
215
+ for (const y of years) {
216
+ if (!YEAR_DIR_RE.test(y)) {
217
+ continue;
218
+ }
219
+ yield* walkYearMonths(join(baseDir, y), y);
220
+ }
221
+ }
222
+ const THREADS_QUERY = `
223
+ SELECT id, rollout_path, created_at, updated_at, source, model_provider,
224
+ cwd, title, sandbox_policy, approval_mode, tokens_used,
225
+ has_user_event, archived, archived_at, git_sha, git_branch,
226
+ git_origin_url, cli_version, first_user_message, agent_nickname,
227
+ agent_role, memory_mode, model, reasoning_effort
228
+ FROM threads
229
+ `;
230
+ function openThreadsDb(dbPath) {
231
+ try {
232
+ return new DatabaseSync(dbPath, { readOnly: true });
233
+ }
234
+ catch (err) {
235
+ const msg = err instanceof Error ? err.message : String(err);
236
+ emit({
237
+ type: "PROGRESS",
238
+ message: `state_5.sqlite unreadable (${msg}); falling back to rollouts only`,
239
+ });
240
+ return null;
241
+ }
242
+ }
243
+ function queryThreadsRows(db) {
244
+ try {
245
+ const rawRows = db.prepare(THREADS_QUERY).all();
246
+ return rawRows;
247
+ }
248
+ catch (err) {
249
+ const msg = err instanceof Error ? err.message : String(err);
250
+ emit({
251
+ type: "PROGRESS",
252
+ message: `threads query failed (${msg}); falling back to rollouts only`,
253
+ });
254
+ return [];
255
+ }
256
+ }
257
+ function loadThreadsMap(dbPath) {
258
+ if (!existsSync(dbPath)) {
259
+ return { map: new Map(), present: false };
260
+ }
261
+ const db = openThreadsDb(dbPath);
262
+ if (!db) {
263
+ return { map: new Map(), present: false };
264
+ }
265
+ const map = new Map();
266
+ try {
267
+ for (const r of queryThreadsRows(db)) {
268
+ map.set(r.id, r);
269
+ }
270
+ }
271
+ finally {
272
+ try {
273
+ db.close();
274
+ }
275
+ catch {
276
+ }
277
+ }
278
+ return { map, present: true };
279
+ }
280
+ async function statAndRead(path) {
281
+ try {
282
+ const st = await stat(path);
283
+ const text = await readFile(path, "utf8");
284
+ return { mtimeMs: Number(st.mtimeMs), size: Number(st.size), text };
285
+ }
286
+ catch {
287
+ return null;
288
+ }
289
+ }
290
+ async function emitRulesStream(rulesDir, emitRecord) {
291
+ const entries = await listIfExists(rulesDir);
292
+ if (entries === null) {
293
+ return;
294
+ }
295
+ for (const f of entries) {
296
+ if (!f.endsWith(".rules")) {
297
+ continue;
298
+ }
299
+ const p = join(rulesDir, f);
300
+ const loaded = await statAndRead(p);
301
+ if (!loaded) {
302
+ continue;
303
+ }
304
+ const mtime = Math.floor(loaded.mtimeMs / 1000);
305
+ const ruleset = f.replace(RULES_SUFFIX_RE, "");
306
+ let idx = 0;
307
+ for (const raw of splitRulesLines(loaded.text)) {
308
+ const line = raw.trim();
309
+ if (isSkippableRulesLine(line)) {
310
+ continue;
311
+ }
312
+ emitRecord("rules", buildRuleRecord({ ruleset, line, index: idx, path: p, mtime }));
313
+ await waitForEmitDrain();
314
+ idx++;
315
+ }
316
+ }
317
+ }
318
+ async function emitPromptsStream(promptsDir, emitRecord) {
319
+ const entries = await listIfExists(promptsDir);
320
+ if (entries === null) {
321
+ return;
322
+ }
323
+ for (const f of entries) {
324
+ if (!f.endsWith(".md")) {
325
+ continue;
326
+ }
327
+ const p = join(promptsDir, f);
328
+ const loaded = await statAndRead(p);
329
+ if (!loaded) {
330
+ continue;
331
+ }
332
+ const { meta, body } = parseFrontmatter(loaded.text);
333
+ emitRecord("prompts", buildPromptRecord({ fileName: f, meta, body, path: p, mtimeMs: loaded.mtimeMs }));
334
+ await waitForEmitDrain();
335
+ }
336
+ }
337
+ function shouldSkipSkillEntry(ent) {
338
+ return ent.name.startsWith(".") || ent.name === "skills.backup";
339
+ }
340
+ async function isDirectoryPath(p) {
341
+ try {
342
+ const s = await stat(p);
343
+ return s.isDirectory();
344
+ }
345
+ catch {
346
+ return false;
347
+ }
348
+ }
349
+ async function emitSkillsStream(skillsDir, emitRecord) {
350
+ let entries;
351
+ try {
352
+ entries = await readdir(skillsDir, { withFileTypes: true });
353
+ }
354
+ catch {
355
+ return;
356
+ }
357
+ for (const ent of entries) {
358
+ if (shouldSkipSkillEntry(ent)) {
359
+ continue;
360
+ }
361
+ const dirPath = join(skillsDir, ent.name);
362
+ if (!(await isDirectoryPath(dirPath))) {
363
+ continue;
364
+ }
365
+ const skillMdPath = join(dirPath, "SKILL.md");
366
+ const loaded = await statAndRead(skillMdPath);
367
+ if (!loaded) {
368
+ continue;
369
+ }
370
+ const { meta, body } = parseFrontmatter(loaded.text);
371
+ emitRecord("skills", buildSkillRecord({ dirName: ent.name, meta, body, path: skillMdPath, mtimeMs: loaded.mtimeMs }));
372
+ await waitForEmitDrain();
373
+ }
374
+ }
375
+ export function makeRolloutParseState() {
376
+ return {
377
+ sessionId: null,
378
+ sessionMeta: null,
379
+ firstTimestamp: null,
380
+ lastTimestamp: null,
381
+ messageCount: 0,
382
+ functionCallCount: 0,
383
+ pendingCalls: new Map(),
384
+ lineCount: 0,
385
+ };
386
+ }
387
+ function emitMessageRecord(state, payload, ts, emitRecord) {
388
+ const sessionId = state.sessionId;
389
+ if (!sessionId) {
390
+ return;
391
+ }
392
+ const id = `${sessionId}:${state.lineCount}`;
393
+ emitRecord("messages", {
394
+ id,
395
+ session_id: sessionId,
396
+ role: payload.role || null,
397
+ type: "message",
398
+ content: textPreview(extractMessageText(payload), 5000),
399
+ timestamp: ts,
400
+ });
401
+ }
402
+ function registerFunctionCall(state, payload, ts) {
403
+ const sessionId = state.sessionId;
404
+ if (!sessionId) {
405
+ return;
406
+ }
407
+ const callId = payload.call_id || `${sessionId}:${state.lineCount}`;
408
+ state.pendingCalls.set(callId, {
409
+ id: callId,
410
+ session_id: sessionId,
411
+ call_id: callId,
412
+ name: payload.name || null,
413
+ arguments: textPreview(payload.arguments || null, 2000),
414
+ output_preview: null,
415
+ timestamp: ts,
416
+ });
417
+ }
418
+ function applyFunctionCallOutput(state, payload, ts, emitRecord) {
419
+ const sessionId = state.sessionId;
420
+ if (!sessionId) {
421
+ return;
422
+ }
423
+ const callId = payload.call_id;
424
+ const existing = callId ? state.pendingCalls.get(callId) : null;
425
+ const previewResult = payloadOutputPreview(payload.output);
426
+ if (existing) {
427
+ existing.output_preview = previewResult.preview;
428
+ if (previewResult.binaryReason) {
429
+ existing.output_binary_reason = previewResult.binaryReason;
430
+ }
431
+ return;
432
+ }
433
+ emitRecord("function_calls", {
434
+ id: `${sessionId}:${state.lineCount}:output`,
435
+ session_id: sessionId,
436
+ call_id: callId || null,
437
+ name: null,
438
+ arguments: null,
439
+ output_preview: previewResult.preview,
440
+ output_binary_reason: previewResult.binaryReason,
441
+ timestamp: ts,
442
+ });
443
+ }
444
+ export function processResponseItem({ deps, payload, state, ts }) {
445
+ if (payload.type === "message") {
446
+ state.messageCount++;
447
+ if (deps.requested.has("messages")) {
448
+ emitMessageRecord(state, payload, ts, deps.emitRecord);
449
+ }
450
+ return;
451
+ }
452
+ if (payload.type === "function_call") {
453
+ state.functionCallCount++;
454
+ if (deps.requested.has("function_calls")) {
455
+ registerFunctionCall(state, payload, ts);
456
+ }
457
+ return;
458
+ }
459
+ if (payload.type === "function_call_output" && deps.requested.has("function_calls")) {
460
+ applyFunctionCallOutput(state, payload, ts, deps.emitRecord);
461
+ }
462
+ }
463
+ const PROGRESS_EVERY = 2000;
464
+ export function shouldDeferActiveRolloutFile(input) {
465
+ return input.quietMs > 0 && input.mtimeMs > input.nowMs - input.quietMs;
466
+ }
467
+ export function processRolloutLine({ deps, file, obj, state }) {
468
+ state.lineCount++;
469
+ if (state.lineCount % PROGRESS_EVERY === 0) {
470
+ deps.progress(` ${file}: ${state.lineCount} lines parsed`);
471
+ }
472
+ const ts = obj.timestamp || null;
473
+ const range = { firstTs: state.firstTimestamp, lastTs: state.lastTimestamp };
474
+ extendTimestampRange(range, ts);
475
+ state.firstTimestamp = range.firstTs;
476
+ state.lastTimestamp = range.lastTs;
477
+ if (obj.type === "session_meta") {
478
+ state.sessionMeta = obj.payload || {};
479
+ state.sessionId = state.sessionMeta.id || null;
480
+ return;
481
+ }
482
+ if (!state.sessionId) {
483
+ return;
484
+ }
485
+ if (obj.type !== "response_item") {
486
+ return;
487
+ }
488
+ processResponseItem({
489
+ payload: obj.payload || {},
490
+ ts,
491
+ state,
492
+ deps,
493
+ });
494
+ }
495
+ export function flushPendingCalls(state, deps) {
496
+ for (const call of state.pendingCalls.values()) {
497
+ deps.emitRecord("function_calls", { ...call });
498
+ }
499
+ }
500
+ export function emitSessionsFromMaps({ threadsMap, rolloutAggregates, emitRecord }) {
501
+ const emittedSessionIds = new Set();
502
+ for (const [id, t] of threadsMap) {
503
+ emitRecord("sessions", buildThreadSessionRecord(id, t, rolloutAggregates.get(id)));
504
+ emittedSessionIds.add(id);
505
+ }
506
+ for (const [id, agg] of rolloutAggregates) {
507
+ if (emittedSessionIds.has(id)) {
508
+ continue;
509
+ }
510
+ emitRecord("sessions", buildRolloutOnlySessionRecord(id, agg));
511
+ }
512
+ }
513
+ async function parseRolloutFile(args) {
514
+ const state = makeRolloutParseState();
515
+ const deps = {
516
+ emitRecord: args.emitRecord,
517
+ progress: (message) => {
518
+ emit({ type: "PROGRESS", message });
519
+ },
520
+ requested: args.requested,
521
+ };
522
+ for await (const obj of iterJsonlLines(args.path)) {
523
+ processRolloutLine({ obj, state, deps, file: args.file });
524
+ await waitForEmitDrain();
525
+ }
526
+ flushPendingCalls(state, deps);
527
+ await waitForEmitDrain();
528
+ if (state.sessionId) {
529
+ args.rolloutAggregates.set(state.sessionId, {
530
+ meta: state.sessionMeta || {},
531
+ firstTs: state.firstTimestamp,
532
+ lastTs: state.lastTimestamp,
533
+ messageCount: state.messageCount,
534
+ functionCallCount: state.functionCallCount,
535
+ rolloutPath: args.path,
536
+ });
537
+ }
538
+ }
539
+ async function processRolloutEntry(entry, args) {
540
+ let st;
541
+ try {
542
+ st = statSync(entry.path);
543
+ }
544
+ catch {
545
+ return "missing";
546
+ }
547
+ const mtime = st.mtimeMs;
548
+ if (args.fileMtimes[entry.path] === mtime) {
549
+ args.newMtimes[entry.path] = mtime;
550
+ return "skipped";
551
+ }
552
+ if (shouldDeferActiveRolloutFile({ mtimeMs: mtime, nowMs: args.scanStartedAtMs, quietMs: args.activeQuietMs })) {
553
+ emit({
554
+ type: "PROGRESS",
555
+ message: `Deferring active rollout ${entry.year}/${entry.month}/${entry.day}/${entry.file}`,
556
+ });
557
+ await waitForEmitDrain();
558
+ return "skipped";
559
+ }
560
+ emit({
561
+ type: "PROGRESS",
562
+ message: `Parsing ${entry.year}/${entry.month}/${entry.day}/${entry.file} (${(st.size / 1024 / 1024).toFixed(1)}MB)`,
563
+ });
564
+ await waitForEmitDrain();
565
+ await parseRolloutFile({
566
+ path: entry.path,
567
+ file: entry.file,
568
+ requested: args.requested,
569
+ emitRecord: args.emitRecord,
570
+ rolloutAggregates: args.rolloutAggregates,
571
+ });
572
+ args.newMtimes[entry.path] = mtime;
573
+ return "parsed";
574
+ }
575
+ async function scanRollouts(args) {
576
+ const baseExists = (await listIfExists(args.baseDir)) !== null;
577
+ if (!baseExists) {
578
+ emit({
579
+ type: "PROGRESS",
580
+ message: `${args.baseDir} not readable`,
581
+ });
582
+ await waitForEmitDrain();
583
+ return { parsedFiles: 0 };
584
+ }
585
+ let fileCount = 0;
586
+ let parsedFiles = 0;
587
+ for await (const entry of walkRollouts(args.baseDir)) {
588
+ fileCount++;
589
+ if ((await processRolloutEntry(entry, args)) === "parsed") {
590
+ parsedFiles++;
591
+ }
592
+ }
593
+ emit({
594
+ type: "PROGRESS",
595
+ message: `Scanned ${fileCount} rollout files`,
596
+ });
597
+ await waitForEmitDrain();
598
+ return { parsedFiles };
599
+ }
600
+ function emitSessions({ stateDbPath, rolloutAggregates, emitRecord }) {
601
+ const { map: threadsById } = loadThreadsMap(stateDbPath);
602
+ emitSessionsFromMaps({ threadsMap: threadsById, rolloutAggregates, emitRecord });
603
+ }
604
+ async function readStartMessage() {
605
+ const rl = createInterface({ input: process.stdin, terminal: false });
606
+ return await new Promise((resolve, reject) => rl.once("line", (l) => {
607
+ try {
608
+ resolve(JSON.parse(l));
609
+ }
610
+ catch (e) {
611
+ reject(e);
612
+ }
613
+ }));
614
+ }
615
+ function resolveCodexDirs() {
616
+ const codexHome = process.env.CODEX_HOME || join(homedir(), ".codex");
617
+ return {
618
+ codexHome,
619
+ baseDir: process.env.CODEX_SESSIONS_DIR || join(codexHome, "sessions"),
620
+ stateDbPath: process.env.CODEX_STATE_DB || join(codexHome, "state_5.sqlite"),
621
+ rulesDir: process.env.CODEX_RULES_DIR || join(codexHome, "rules"),
622
+ promptsDir: process.env.CODEX_PROMPTS_DIR || join(codexHome, "prompts"),
623
+ skillsDir: process.env.CODEX_SKILLS_DIR || join(codexHome, "skills"),
624
+ };
625
+ }
626
+ function readFileMtimes(startMsg) {
627
+ const state = startMsg.state || {};
628
+ return (state.messages?.file_mtimes ||
629
+ state.function_calls?.file_mtimes ||
630
+ state.sessions?.file_mtimes ||
631
+ state.file_mtimes ||
632
+ {});
633
+ }
634
+ function resolveActiveRolloutQuietMs(env = process.env) {
635
+ const raw = env[ACTIVE_ROLLOUT_QUIET_MS_ENV];
636
+ if (!raw) {
637
+ return DEFAULT_ACTIVE_ROLLOUT_QUIET_MS;
638
+ }
639
+ const parsed = Number(raw);
640
+ return Number.isFinite(parsed) && parsed >= 0 ? parsed : DEFAULT_ACTIVE_ROLLOUT_QUIET_MS;
641
+ }
642
+ function buildRequestedMap(startMsg) {
643
+ return new Map((startMsg.scope?.streams || []).map((s) => [s.name, s]));
644
+ }
645
+ function buildResourceFilters(requested) {
646
+ const resFilters = new Map();
647
+ for (const [n, r] of requested) {
648
+ resFilters.set(n, resourceSet(r));
649
+ }
650
+ return resFilters;
651
+ }
652
+ async function isReadableDirectory(path) {
653
+ try {
654
+ const st = await stat(path);
655
+ return st.isDirectory();
656
+ }
657
+ catch {
658
+ return false;
659
+ }
660
+ }
661
+ async function isReadableFile(path) {
662
+ try {
663
+ const st = await stat(path);
664
+ return st.isFile();
665
+ }
666
+ catch {
667
+ return false;
668
+ }
669
+ }
670
+ async function assertRequestedCodexSources(dirs, requested) {
671
+ const missing = [];
672
+ const needsRollouts = requested.has("messages") || requested.has("function_calls");
673
+ if (needsRollouts && !(await isReadableDirectory(dirs.baseDir))) {
674
+ missing.push(`CODEX_SESSIONS_DIR=${dirs.baseDir}`);
675
+ }
676
+ if (requested.has("sessions")) {
677
+ const hasRollouts = await isReadableDirectory(dirs.baseDir);
678
+ const hasThreadsDb = await isReadableFile(dirs.stateDbPath);
679
+ if (!(hasRollouts || hasThreadsDb)) {
680
+ missing.push(`CODEX_SESSIONS_DIR=${dirs.baseDir} or CODEX_STATE_DB=${dirs.stateDbPath}`);
681
+ }
682
+ }
683
+ if (requested.has("rules") && !(await isReadableDirectory(dirs.rulesDir))) {
684
+ missing.push(`CODEX_RULES_DIR=${dirs.rulesDir}`);
685
+ }
686
+ if (requested.has("prompts") && !(await isReadableDirectory(dirs.promptsDir))) {
687
+ missing.push(`CODEX_PROMPTS_DIR=${dirs.promptsDir}`);
688
+ }
689
+ if (requested.has("skills") && !(await isReadableDirectory(dirs.skillsDir))) {
690
+ missing.push(`CODEX_SKILLS_DIR=${dirs.skillsDir}`);
691
+ }
692
+ if (missing.length > 0) {
693
+ throw new Error(`requested Codex local source path(s) are missing or unreadable: ${missing.join(", ")}`);
694
+ }
695
+ }
696
+ function emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs }) {
697
+ if (requested.has("sessions")) {
698
+ emit({
699
+ type: "STATE",
700
+ stream: "sessions",
701
+ cursor: { fetched_at: nowIso(), source_mtime_ms: sessionsSourceMtimeMs },
702
+ });
703
+ }
704
+ if (requested.has("messages") || requested.has("function_calls")) {
705
+ const cursorStream = requested.has("messages") ? "messages" : "function_calls";
706
+ emit({
707
+ type: "STATE",
708
+ stream: cursorStream,
709
+ cursor: { file_mtimes: newMtimes, fetched_at: nowIso() },
710
+ });
711
+ }
712
+ for (const s of ["rules", "prompts", "skills"]) {
713
+ if (requested.has(s)) {
714
+ emit({ type: "STATE", stream: s, cursor: { fetched_at: nowIso() } });
715
+ }
716
+ }
717
+ for (const s of [
718
+ "history",
719
+ "session_index",
720
+ "logs",
721
+ "shell_snapshots",
722
+ "config_inventory",
723
+ "cache_inventory",
724
+ "coverage_diagnostics",
725
+ ]) {
726
+ if (requested.has(s)) {
727
+ emit({ type: "STATE", stream: s, cursor: { fetched_at: nowIso() } });
728
+ }
729
+ }
730
+ }
731
+ function readPriorSessionsSourceMtimeMs(startMsg) {
732
+ const state = startMsg.state || {};
733
+ const sessions = state.sessions;
734
+ const value = sessions && typeof sessions === "object" && !Array.isArray(sessions)
735
+ ? sessions.source_mtime_ms
736
+ : null;
737
+ return typeof value === "number" && Number.isFinite(value) ? value : null;
738
+ }
739
+ function fileMtimeMs(path) {
740
+ try {
741
+ return statSync(path).mtimeMs;
742
+ }
743
+ catch {
744
+ return 0;
745
+ }
746
+ }
747
+ async function emitLocalInventoryStreams(input) {
748
+ const inventory = await buildLocalSourceInventory("codex", input.codexHome, CODEX_KNOWN_LOCAL_STORES);
749
+ for (const [stream, records] of inventory.recordsByStream) {
750
+ if (!input.requested.has(stream)) {
751
+ continue;
752
+ }
753
+ for (const record of records) {
754
+ input.emitRecord(stream, record);
755
+ await waitForEmitDrain();
756
+ }
757
+ }
758
+ for (const directoryStream of [
759
+ {
760
+ relativeRoot: "shell-snapshots",
761
+ store: "shell_snapshots",
762
+ stream: "shell_snapshots",
763
+ reason: "shell content requires redaction review before payload collection",
764
+ },
765
+ ]) {
766
+ if (!input.requested.has(directoryStream.stream)) {
767
+ continue;
768
+ }
769
+ const records = await listDirectoryInventory({
770
+ tool: "codex",
771
+ sourceHome: input.codexHome,
772
+ ...directoryStream,
773
+ });
774
+ for (const record of records) {
775
+ input.emitRecord(directoryStream.stream, record);
776
+ await waitForEmitDrain();
777
+ }
778
+ }
779
+ if (input.requested.has("coverage_diagnostics")) {
780
+ for (const record of inventory.coverage) {
781
+ input.emitRecord("coverage_diagnostics", record);
782
+ await waitForEmitDrain();
783
+ }
784
+ }
785
+ }
786
+ async function main() {
787
+ const startMsg = await readStartMessage();
788
+ if (startMsg.type !== "START") {
789
+ return fail("Expected START");
790
+ }
791
+ const requested = buildRequestedMap(startMsg);
792
+ if (!requested.size) {
793
+ return fail("START.scope.streams is required");
794
+ }
795
+ const resFilters = buildResourceFilters(requested);
796
+ const dirs = resolveCodexDirs();
797
+ await assertRequestedCodexSources(dirs, requested);
798
+ const fileMtimes = readFileMtimes(startMsg);
799
+ let total = 0;
800
+ const nowIso = () => new Date().toISOString();
801
+ const emittedAt = nowIso();
802
+ const emitRecord = (s, d) => {
803
+ if (d.id == null) {
804
+ return;
805
+ }
806
+ const resSet = resFilters.get(s);
807
+ if (resSet && !resSet.has(String(d.id))) {
808
+ return;
809
+ }
810
+ const validation = validateRecord(s, d);
811
+ if (!validation.ok) {
812
+ const message = `${String(d.id)}: ${validation.issues.map((i) => `${i.path}: ${i.message}`).join("; ")}`;
813
+ emit({
814
+ type: "SKIP_RESULT",
815
+ stream: s,
816
+ reason: "shape_check_failed",
817
+ message,
818
+ });
819
+ return;
820
+ }
821
+ emit({
822
+ type: "RECORD",
823
+ stream: s,
824
+ key: d.id,
825
+ data: d,
826
+ emitted_at: emittedAt,
827
+ });
828
+ total++;
829
+ };
830
+ const needRollouts = requested.has("sessions") || requested.has("messages") || requested.has("function_calls");
831
+ const rolloutAggregates = new Map();
832
+ const newMtimes = { ...fileMtimes };
833
+ const scanStartedAtMs = Date.now();
834
+ const sessionsSourceMtimeMs = fileMtimeMs(dirs.stateDbPath);
835
+ let parsedRolloutFiles = 0;
836
+ await emitLocalInventoryStreams({ codexHome: dirs.codexHome, requested, emitRecord });
837
+ if (needRollouts) {
838
+ const rolloutScan = await scanRollouts({
839
+ activeQuietMs: resolveActiveRolloutQuietMs(),
840
+ baseDir: dirs.baseDir,
841
+ fileMtimes,
842
+ newMtimes,
843
+ requested,
844
+ emitRecord,
845
+ rolloutAggregates,
846
+ scanStartedAtMs,
847
+ });
848
+ parsedRolloutFiles = rolloutScan.parsedFiles;
849
+ }
850
+ if (requested.has("sessions") &&
851
+ (parsedRolloutFiles > 0 || readPriorSessionsSourceMtimeMs(startMsg) !== sessionsSourceMtimeMs)) {
852
+ emitSessions({ stateDbPath: dirs.stateDbPath, rolloutAggregates, emitRecord });
853
+ await waitForEmitDrain();
854
+ }
855
+ if (requested.has("rules")) {
856
+ await emitRulesStream(dirs.rulesDir, emitRecord);
857
+ }
858
+ if (requested.has("prompts")) {
859
+ await emitPromptsStream(dirs.promptsDir, emitRecord);
860
+ }
861
+ if (requested.has("skills")) {
862
+ await emitSkillsStream(dirs.skillsDir, emitRecord);
863
+ }
864
+ emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs });
865
+ await waitForEmitDrain();
866
+ emit({ type: "DONE", status: "succeeded", records_emitted: total });
867
+ flushAndExit(0);
868
+ }
869
+ if (isMainModule(import.meta.url)) {
870
+ main().catch((e) => {
871
+ const msg = e instanceof Error ? e.message : String(e);
872
+ emit({
873
+ type: "DONE",
874
+ status: "failed",
875
+ records_emitted: 0,
876
+ error: { message: msg, retryable: false },
877
+ });
878
+ flushAndExit(1);
879
+ });
880
+ }