@pdpp/local-collector 0.1.0-beta.7 → 0.1.0-beta.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/dist/local-collector/bin/pdpp-local-collector.js +580 -22
  2. package/dist/local-collector/src/runner.d.ts +1 -1
  3. package/dist/local-collector/src/runner.js +15 -1
  4. package/dist/polyfill-connectors/connectors/claude_code/index.js +60 -37
  5. package/dist/polyfill-connectors/connectors/codex/index.js +390 -108
  6. package/dist/polyfill-connectors/connectors/codex/parsers.js +5 -3
  7. package/dist/polyfill-connectors/src/bounded-file-preview.js +76 -0
  8. package/dist/polyfill-connectors/src/browser-handoff.js +38 -5
  9. package/dist/polyfill-connectors/src/collector-build-info.d.ts +8 -0
  10. package/dist/polyfill-connectors/src/collector-build-info.js +10 -0
  11. package/dist/polyfill-connectors/src/collector-runner.d.ts +54 -0
  12. package/dist/polyfill-connectors/src/collector-runner.js +250 -18
  13. package/dist/polyfill-connectors/src/connector-exit.js +62 -0
  14. package/dist/polyfill-connectors/src/connector-runtime-protocol.d.ts +41 -21
  15. package/dist/polyfill-connectors/src/connector-runtime.js +241 -30
  16. package/dist/polyfill-connectors/src/fingerprint-cursor.js +107 -0
  17. package/dist/polyfill-connectors/src/local-device-client.d.ts +17 -0
  18. package/dist/polyfill-connectors/src/local-device-client.js +69 -9
  19. package/dist/polyfill-connectors/src/local-device-outbox.d.ts +59 -0
  20. package/dist/polyfill-connectors/src/local-device-outbox.js +394 -5
  21. package/dist/polyfill-connectors/src/local-source-inventory.js +8 -1
  22. package/dist/polyfill-connectors/src/runner/index.d.ts +4 -3
  23. package/dist/polyfill-connectors/src/runner/index.js +4 -3
  24. package/dist/polyfill-connectors/src/safe-text-preview.js +13 -0
  25. package/dist/polyfill-connectors/src/static-secret-injection.js +155 -0
  26. package/package.json +1 -1
@@ -1,18 +1,22 @@
1
1
  #!/usr/bin/env node
2
+ import { createHash } from "node:crypto";
2
3
  import { createReadStream, existsSync, statSync } from "node:fs";
3
4
  import { readdir, readFile, stat } from "node:fs/promises";
4
5
  import { homedir } from "node:os";
5
6
  import { join } from "node:path";
6
- import { createInterface as createFileReader, createInterface } from "node:readline";
7
+ import { createInterface } from "node:readline";
7
8
  import { DatabaseSync } from "node:sqlite";
9
+ import { flushAndExitAfterRuntimeAck } from "../../src/connector-exit.js";
10
+ import { openCarryForwardCursor } from "../../src/fingerprint-cursor.js";
8
11
  import { isMainModule } from "../../src/is-main-module.js";
9
- import { buildLocalSourceInventory, listDirectoryInventory, } from "../../src/local-source-inventory.js";
12
+ import { buildLocalSourceInventory, listDirectoryInventory, openInventoryFingerprintCursor, } from "../../src/local-source-inventory.js";
10
13
  import { stringifyForJsonl } from "../../src/safe-emit.js";
11
14
  import { resourceSet } from "../../src/scope-filters.js";
12
15
  import { buildPromptRecord, buildRolloutOnlySessionRecord, buildRuleRecord, buildSkillRecord, buildThreadSessionRecord, extendTimestampRange, extractMessageText, isRolloutFile, isSkippableRulesLine, parseFrontmatter, payloadOutputPreview, RULES_SUFFIX_RE, splitRulesLines, TWO_DIGIT_DIR_RE, textPreview, YEAR_DIR_RE, } from "./parsers.js";
13
16
  import { validateRecord } from "./schemas.js";
14
17
  const DEFAULT_ACTIVE_ROLLOUT_QUIET_MS = 120_000;
15
18
  const ACTIVE_ROLLOUT_QUIET_MS_ENV = "PDPP_CODEX_ACTIVE_ROLLOUT_QUIET_MS";
19
+ const GUARD_PREFIX_BYTES = 64 * 1024;
16
20
  let stdoutDrainPromise = null;
17
21
  const emit = (m) => {
18
22
  const ok = process.stdout.write(stringifyForJsonl(m));
@@ -31,13 +35,7 @@ async function waitForEmitDrain() {
31
35
  }
32
36
  }
33
37
  const flushAndExit = (code) => {
34
- if (process.stdout.writableLength > 0) {
35
- process.stdout.once("drain", () => process.exit(code));
36
- setTimeout(() => process.exit(code), 3000).unref();
37
- }
38
- else {
39
- process.exit(code);
40
- }
38
+ flushAndExitAfterRuntimeAck(code);
41
39
  };
42
40
  const fail = (m, r = false) => {
43
41
  emit({
@@ -148,22 +146,55 @@ export const CODEX_KNOWN_LOCAL_STORES = [
148
146
  reason: "auth-adjacent credential material is never emitted",
149
147
  },
150
148
  ];
151
- async function* iterJsonlLines(path) {
152
- const r = createFileReader({
153
- input: createReadStream(path, { encoding: "utf8" }),
154
- terminal: false,
155
- });
156
- for await (const line of r) {
157
- if (!line.trim()) {
158
- continue;
159
- }
160
- try {
161
- yield JSON.parse(line);
162
- }
163
- catch {
149
+ export async function* iterJsonlLinesFromOffset(path, startOffset) {
150
+ const stream = createReadStream(path, { start: startOffset });
151
+ let pending = Buffer.alloc(0);
152
+ let committed = startOffset;
153
+ for await (const chunk of stream) {
154
+ const buf = chunk;
155
+ pending = pending.length === 0 ? buf : Buffer.concat([pending, buf]);
156
+ let nl = pending.indexOf(0x0a);
157
+ while (nl !== -1) {
158
+ const lineBuf = pending.subarray(0, nl);
159
+ committed += nl + 1;
160
+ const line = lineBuf.toString("utf8");
161
+ const trimmed = line.trim();
162
+ if (trimmed) {
163
+ let parsed = null;
164
+ try {
165
+ parsed = JSON.parse(line);
166
+ }
167
+ catch {
168
+ parsed = null;
169
+ }
170
+ if (parsed) {
171
+ yield { obj: parsed, committedOffset: committed };
172
+ }
173
+ }
174
+ pending = pending.subarray(nl + 1);
175
+ nl = pending.indexOf(0x0a);
164
176
  }
165
177
  }
166
178
  }
179
+ async function hashFilePrefix(path, guardBytes) {
180
+ if (guardBytes <= 0) {
181
+ return createHash("sha256").update(Buffer.alloc(0)).digest("hex");
182
+ }
183
+ return await new Promise((resolve) => {
184
+ const hash = createHash("sha256");
185
+ let read = 0;
186
+ const stream = createReadStream(path, { start: 0, end: guardBytes - 1 });
187
+ stream.on("data", (chunk) => {
188
+ const buf = chunk;
189
+ read += buf.length;
190
+ hash.update(buf);
191
+ });
192
+ stream.on("error", () => resolve(null));
193
+ stream.on("end", () => {
194
+ resolve(read >= guardBytes ? hash.digest("hex") : null);
195
+ });
196
+ });
197
+ }
167
198
  async function listIfExists(dir) {
168
199
  try {
169
200
  return await readdir(dir);
@@ -231,11 +262,10 @@ function openThreadsDb(dbPath) {
231
262
  try {
232
263
  return new DatabaseSync(dbPath, { readOnly: true });
233
264
  }
234
- catch (err) {
235
- const msg = err instanceof Error ? err.message : String(err);
265
+ catch {
236
266
  emit({
237
267
  type: "PROGRESS",
238
- message: `state_5.sqlite unreadable (${msg}); falling back to rollouts only`,
268
+ message: "Codex phase=index pass=index state_db_readable=false fallback=rollouts_only",
239
269
  });
240
270
  return null;
241
271
  }
@@ -245,11 +275,10 @@ function queryThreadsRows(db) {
245
275
  const rawRows = db.prepare(THREADS_QUERY).all();
246
276
  return rawRows;
247
277
  }
248
- catch (err) {
249
- const msg = err instanceof Error ? err.message : String(err);
278
+ catch {
250
279
  emit({
251
280
  type: "PROGRESS",
252
- message: `threads query failed (${msg}); falling back to rollouts only`,
281
+ message: "Codex phase=index pass=index state_db_query_failed=true fallback=rollouts_only",
253
282
  });
254
283
  return [];
255
284
  }
@@ -372,16 +401,16 @@ async function emitSkillsStream(skillsDir, emitRecord) {
372
401
  await waitForEmitDrain();
373
402
  }
374
403
  }
375
- export function makeRolloutParseState() {
404
+ export function makeRolloutParseState(seed) {
376
405
  return {
377
- sessionId: null,
406
+ sessionId: seed?.sessionId ?? null,
378
407
  sessionMeta: null,
379
- firstTimestamp: null,
380
- lastTimestamp: null,
381
- messageCount: 0,
382
- functionCallCount: 0,
408
+ firstTimestamp: seed?.firstTimestamp ?? null,
409
+ lastTimestamp: seed?.lastTimestamp ?? null,
410
+ messageCount: seed?.messageCount ?? 0,
411
+ functionCallCount: seed?.functionCallCount ?? 0,
383
412
  pendingCalls: new Map(),
384
- lineCount: 0,
413
+ lineCount: seed?.lineCount ?? 0,
385
414
  };
386
415
  }
387
416
  function emitMessageRecord(state, payload, ts, emitRecord) {
@@ -428,6 +457,10 @@ function applyFunctionCallOutput(state, payload, ts, emitRecord) {
428
457
  if (previewResult.binaryReason) {
429
458
  existing.output_binary_reason = previewResult.binaryReason;
430
459
  }
460
+ if (callId) {
461
+ state.pendingCalls.delete(callId);
462
+ }
463
+ emitRecord("function_calls", { ...existing });
431
464
  return;
432
465
  }
433
466
  emitRecord("function_calls", {
@@ -464,10 +497,10 @@ const PROGRESS_EVERY = 2000;
464
497
  export function shouldDeferActiveRolloutFile(input) {
465
498
  return input.quietMs > 0 && input.mtimeMs > input.nowMs - input.quietMs;
466
499
  }
467
- export function processRolloutLine({ deps, file, obj, state }) {
500
+ export function processRolloutLine({ deps, obj, state }) {
468
501
  state.lineCount++;
469
502
  if (state.lineCount % PROGRESS_EVERY === 0) {
470
- deps.progress(` ${file}: ${state.lineCount} lines parsed`);
503
+ deps.progress(`Codex phase=emit pass=emit lines_parsed=${state.lineCount}`);
471
504
  }
472
505
  const ts = obj.timestamp || null;
473
506
  const range = { firstTs: state.firstTimestamp, lastTs: state.lastTimestamp };
@@ -475,8 +508,10 @@ export function processRolloutLine({ deps, file, obj, state }) {
475
508
  state.firstTimestamp = range.firstTs;
476
509
  state.lastTimestamp = range.lastTs;
477
510
  if (obj.type === "session_meta") {
478
- state.sessionMeta = obj.payload || {};
479
- state.sessionId = state.sessionMeta.id || null;
511
+ if (state.sessionId === null) {
512
+ state.sessionMeta = obj.payload || {};
513
+ state.sessionId = state.sessionMeta.id || null;
514
+ }
480
515
  return;
481
516
  }
482
517
  if (!state.sessionId) {
@@ -496,12 +531,42 @@ export function flushPendingCalls(state, deps) {
496
531
  for (const call of state.pendingCalls.values()) {
497
532
  deps.emitRecord("function_calls", { ...call });
498
533
  }
534
+ state.pendingCalls.clear();
499
535
  }
500
- export function emitSessionsFromMaps({ threadsMap, rolloutAggregates, emitRecord }) {
536
+ export function shouldReemitThreadSession(thread, agg, priorFingerprint) {
537
+ if (!priorFingerprint) {
538
+ return true;
539
+ }
540
+ if (agg) {
541
+ return true;
542
+ }
543
+ const priorUpdatedAt = priorFingerprint.updated_at ?? null;
544
+ const currentUpdatedAt = thread.updated_at ?? null;
545
+ if (currentUpdatedAt == null) {
546
+ return priorUpdatedAt != null;
547
+ }
548
+ if (priorUpdatedAt == null) {
549
+ return true;
550
+ }
551
+ return currentUpdatedAt > priorUpdatedAt;
552
+ }
553
+ function makeThreadFingerprint(thread, agg, priorFingerprint) {
554
+ return {
555
+ updated_at: thread.updated_at ?? null,
556
+ message_count: agg?.messageCount ?? priorFingerprint?.message_count ?? null,
557
+ function_call_count: agg?.functionCallCount ?? priorFingerprint?.function_call_count ?? null,
558
+ };
559
+ }
560
+ export function emitSessionsFromMaps({ threadsMap, rolloutAggregates, emitRecord, cursor, }) {
501
561
  const emittedSessionIds = new Set();
502
562
  for (const [id, t] of threadsMap) {
503
- emitRecord("sessions", buildThreadSessionRecord(id, t, rolloutAggregates.get(id)));
504
563
  emittedSessionIds.add(id);
564
+ const agg = rolloutAggregates.get(id);
565
+ const prior = cursor?.prior(id);
566
+ if (shouldReemitThreadSession(t, agg, prior)) {
567
+ emitRecord("sessions", buildThreadSessionRecord(id, t, agg, prior));
568
+ }
569
+ cursor?.note(id, makeThreadFingerprint(t, agg, prior));
505
570
  }
506
571
  for (const [id, agg] of rolloutAggregates) {
507
572
  if (emittedSessionIds.has(id)) {
@@ -511,7 +576,7 @@ export function emitSessionsFromMaps({ threadsMap, rolloutAggregates, emitRecord
511
576
  }
512
577
  }
513
578
  async function parseRolloutFile(args) {
514
- const state = makeRolloutParseState();
579
+ const state = makeRolloutParseState(args.seed);
515
580
  const deps = {
516
581
  emitRecord: args.emitRecord,
517
582
  progress: (message) => {
@@ -519,8 +584,10 @@ async function parseRolloutFile(args) {
519
584
  },
520
585
  requested: args.requested,
521
586
  };
522
- for await (const obj of iterJsonlLines(args.path)) {
587
+ let committedOffset = args.startOffset;
588
+ for await (const { obj, committedOffset: lineEnd } of iterJsonlLinesFromOffset(args.path, args.startOffset)) {
523
589
  processRolloutLine({ obj, state, deps, file: args.file });
590
+ committedOffset = lineEnd;
524
591
  await waitForEmitDrain();
525
592
  }
526
593
  flushPendingCalls(state, deps);
@@ -535,8 +602,84 @@ async function parseRolloutFile(args) {
535
602
  rolloutPath: args.path,
536
603
  });
537
604
  }
605
+ return {
606
+ committedOffset,
607
+ sessionId: state.sessionId,
608
+ lineCount: state.lineCount,
609
+ messageCount: state.messageCount,
610
+ functionCallCount: state.functionCallCount,
611
+ firstTimestamp: state.firstTimestamp,
612
+ lastTimestamp: state.lastTimestamp,
613
+ };
614
+ }
615
+ export function decideRolloutAction(input) {
616
+ const { cursor, sizeBytes, mtimeMs } = input;
617
+ if (!cursor) {
618
+ return { kind: "full" };
619
+ }
620
+ if (sizeBytes === cursor.size_bytes && mtimeMs === cursor.mtime_ms) {
621
+ return { kind: "skip" };
622
+ }
623
+ if (sizeBytes < cursor.size_bytes || cursor.offset_bytes > sizeBytes || !input.guardMatches) {
624
+ return { kind: "unsafe_full" };
625
+ }
626
+ if (sizeBytes > cursor.size_bytes) {
627
+ return {
628
+ kind: "append",
629
+ startOffset: cursor.offset_bytes,
630
+ seed: {
631
+ sessionId: cursor.session_id,
632
+ lineCount: cursor.line_count,
633
+ messageCount: cursor.message_count,
634
+ functionCallCount: cursor.function_call_count,
635
+ firstTimestamp: cursor.first_ts,
636
+ lastTimestamp: cursor.last_ts,
637
+ },
638
+ };
639
+ }
640
+ return { kind: "skip" };
641
+ }
642
+ function carryFileCursorForward(args, path, mtime) {
643
+ const prior = args.fileCursors[path];
644
+ if (prior) {
645
+ args.newFileCursors[path] = prior;
646
+ }
647
+ args.newMtimes[path] = mtime;
538
648
  }
539
- async function processRolloutEntry(entry, args) {
649
+ async function buildFileCursorAfterParse(path, result) {
650
+ const guardBytes = Math.min(result.committedOffset, GUARD_PREFIX_BYTES);
651
+ const head = (await hashFilePrefix(path, guardBytes)) ?? "";
652
+ let mtimeMs = 0;
653
+ try {
654
+ mtimeMs = statSync(path).mtimeMs;
655
+ }
656
+ catch {
657
+ mtimeMs = 0;
658
+ }
659
+ return {
660
+ mtime_ms: mtimeMs,
661
+ size_bytes: result.committedOffset,
662
+ offset_bytes: result.committedOffset,
663
+ line_count: result.lineCount,
664
+ head_sha256: head,
665
+ guard_bytes: guardBytes,
666
+ session_id: result.sessionId,
667
+ message_count: result.messageCount,
668
+ function_call_count: result.functionCallCount,
669
+ first_ts: result.firstTimestamp,
670
+ last_ts: result.lastTimestamp,
671
+ };
672
+ }
673
+ async function resolveRolloutAction(path, st, cursor) {
674
+ const sizeBytes = Number(st.size);
675
+ let guardMatches = false;
676
+ if (cursor && sizeBytes > cursor.size_bytes && cursor.offset_bytes <= sizeBytes) {
677
+ const head = await hashFilePrefix(path, cursor.guard_bytes);
678
+ guardMatches = head !== null && head === cursor.head_sha256;
679
+ }
680
+ return decideRolloutAction({ cursor, sizeBytes, mtimeMs: st.mtimeMs, guardMatches });
681
+ }
682
+ async function processRolloutEntry(entry, args, rolloutOrdinal) {
540
683
  let st;
541
684
  try {
542
685
  st = statSync(entry.path);
@@ -545,30 +688,43 @@ async function processRolloutEntry(entry, args) {
545
688
  return "missing";
546
689
  }
547
690
  const mtime = st.mtimeMs;
548
- if (args.fileMtimes[entry.path] === mtime) {
691
+ const cursor = args.fileCursors[entry.path];
692
+ if (!cursor && args.fileMtimes[entry.path] === mtime) {
549
693
  args.newMtimes[entry.path] = mtime;
550
694
  return "skipped";
551
695
  }
696
+ const action = await resolveRolloutAction(entry.path, st, cursor);
697
+ if (action.kind === "skip") {
698
+ carryFileCursorForward(args, entry.path, mtime);
699
+ return "skipped";
700
+ }
552
701
  if (shouldDeferActiveRolloutFile({ mtimeMs: mtime, nowMs: args.scanStartedAtMs, quietMs: args.activeQuietMs })) {
553
702
  emit({
554
703
  type: "PROGRESS",
555
- message: `Deferring active rollout ${entry.year}/${entry.month}/${entry.day}/${entry.file}`,
704
+ message: `Codex phase=index pass=index item=${rolloutOrdinal} backpressure=active_rollout_deferred`,
556
705
  });
557
706
  await waitForEmitDrain();
707
+ if (cursor) {
708
+ args.newFileCursors[entry.path] = cursor;
709
+ }
558
710
  return "skipped";
559
711
  }
712
+ const isAppend = action.kind === "append";
560
713
  emit({
561
714
  type: "PROGRESS",
562
- message: `Parsing ${entry.year}/${entry.month}/${entry.day}/${entry.file} (${(st.size / 1024 / 1024).toFixed(1)}MB)`,
715
+ message: `Codex phase=emit pass=emit item=${rolloutOrdinal} mode=${isAppend ? "append" : "full"} file_size_mb=${(st.size / 1024 / 1024).toFixed(1)}`,
563
716
  });
564
717
  await waitForEmitDrain();
565
- await parseRolloutFile({
718
+ const result = await parseRolloutFile({
566
719
  path: entry.path,
567
720
  file: entry.file,
568
721
  requested: args.requested,
569
722
  emitRecord: args.emitRecord,
570
723
  rolloutAggregates: args.rolloutAggregates,
724
+ startOffset: isAppend ? action.startOffset : 0,
725
+ seed: isAppend ? action.seed : undefined,
571
726
  });
727
+ args.newFileCursors[entry.path] = await buildFileCursorAfterParse(entry.path, result);
572
728
  args.newMtimes[entry.path] = mtime;
573
729
  return "parsed";
574
730
  }
@@ -577,29 +733,34 @@ async function scanRollouts(args) {
577
733
  if (!baseExists) {
578
734
  emit({
579
735
  type: "PROGRESS",
580
- message: `${args.baseDir} not readable`,
736
+ message: "Codex phase=index pass=index sessions_dir_readable=false",
581
737
  });
582
738
  await waitForEmitDrain();
583
739
  return { parsedFiles: 0 };
584
740
  }
585
- let fileCount = 0;
586
- let parsedFiles = 0;
741
+ let totalRollouts = 0;
742
+ let parsedRollouts = 0;
587
743
  for await (const entry of walkRollouts(args.baseDir)) {
588
- fileCount++;
589
- if ((await processRolloutEntry(entry, args)) === "parsed") {
590
- parsedFiles++;
744
+ totalRollouts++;
745
+ if ((await processRolloutEntry(entry, args, totalRollouts)) === "parsed") {
746
+ parsedRollouts++;
591
747
  }
592
748
  }
593
749
  emit({
594
750
  type: "PROGRESS",
595
- message: `Scanned ${fileCount} rollout files`,
751
+ message: `Codex phase=index pass=index total_items=${totalRollouts} parsed_items=${parsedRollouts}`,
596
752
  });
597
753
  await waitForEmitDrain();
598
- return { parsedFiles };
754
+ return { parsedFiles: parsedRollouts };
599
755
  }
600
- function emitSessions({ stateDbPath, rolloutAggregates, emitRecord }) {
756
+ function emitSessions({ stateDbPath, rolloutAggregates, emitRecord, cursor }) {
601
757
  const { map: threadsById } = loadThreadsMap(stateDbPath);
602
- emitSessionsFromMaps({ threadsMap: threadsById, rolloutAggregates, emitRecord });
758
+ emitSessionsFromMaps({
759
+ threadsMap: threadsById,
760
+ rolloutAggregates,
761
+ emitRecord,
762
+ cursor,
763
+ });
603
764
  }
604
765
  async function readStartMessage() {
605
766
  const rl = createInterface({ input: process.stdin, terminal: false });
@@ -631,6 +792,50 @@ function readFileMtimes(startMsg) {
631
792
  state.file_mtimes ||
632
793
  {});
633
794
  }
795
+ function coerceRolloutFileCursor(value) {
796
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
797
+ return null;
798
+ }
799
+ const v = value;
800
+ const num = (x) => (typeof x === "number" && Number.isFinite(x) ? x : null);
801
+ const offset = num(v.offset_bytes);
802
+ const size = num(v.size_bytes);
803
+ const mtime = num(v.mtime_ms);
804
+ const line = num(v.line_count);
805
+ const guardBytes = num(v.guard_bytes);
806
+ const head = typeof v.head_sha256 === "string" ? v.head_sha256 : null;
807
+ if (offset === null || size === null || mtime === null || line === null || guardBytes === null || head === null) {
808
+ return null;
809
+ }
810
+ return {
811
+ mtime_ms: mtime,
812
+ size_bytes: size,
813
+ offset_bytes: offset,
814
+ line_count: line,
815
+ head_sha256: head,
816
+ guard_bytes: guardBytes,
817
+ session_id: typeof v.session_id === "string" ? v.session_id : null,
818
+ message_count: num(v.message_count) ?? 0,
819
+ function_call_count: num(v.function_call_count) ?? 0,
820
+ first_ts: typeof v.first_ts === "string" ? v.first_ts : null,
821
+ last_ts: typeof v.last_ts === "string" ? v.last_ts : null,
822
+ };
823
+ }
824
+ export function readPriorFileCursors(startMsg) {
825
+ const state = startMsg.state || {};
826
+ const raw = state.messages?.file_cursors || state.function_calls?.file_cursors || state.sessions?.file_cursors || null;
827
+ const out = {};
828
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
829
+ return out;
830
+ }
831
+ for (const [path, value] of Object.entries(raw)) {
832
+ const cursor = coerceRolloutFileCursor(value);
833
+ if (cursor) {
834
+ out[path] = cursor;
835
+ }
836
+ }
837
+ return out;
838
+ }
634
839
  function resolveActiveRolloutQuietMs(env = process.env) {
635
840
  const raw = env[ACTIVE_ROLLOUT_QUIET_MS_ENV];
636
841
  if (!raw) {
@@ -693,12 +898,16 @@ async function assertRequestedCodexSources(dirs, requested) {
693
898
  throw new Error(`requested Codex local source path(s) are missing or unreadable: ${missing.join(", ")}`);
694
899
  }
695
900
  }
696
- function emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs }) {
901
+ function emitStateCursors({ requested, newFileCursors, newMtimes, nowIso, sessionsSourceMtimeMs, threadFingerprints, }) {
697
902
  if (requested.has("sessions")) {
698
903
  emit({
699
904
  type: "STATE",
700
905
  stream: "sessions",
701
- cursor: { fetched_at: nowIso(), source_mtime_ms: sessionsSourceMtimeMs },
906
+ cursor: {
907
+ fetched_at: nowIso(),
908
+ source_mtime_ms: sessionsSourceMtimeMs,
909
+ thread_fingerprints: threadFingerprints.toState(),
910
+ },
702
911
  });
703
912
  }
704
913
  if (requested.has("messages") || requested.has("function_calls")) {
@@ -706,7 +915,7 @@ function emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs
706
915
  emit({
707
916
  type: "STATE",
708
917
  stream: cursorStream,
709
- cursor: { file_mtimes: newMtimes, fetched_at: nowIso() },
918
+ cursor: { file_mtimes: newMtimes, file_cursors: newFileCursors, fetched_at: nowIso() },
710
919
  });
711
920
  }
712
921
  for (const s of ["rules", "prompts", "skills"]) {
@@ -714,18 +923,8 @@ function emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs
714
923
  emit({ type: "STATE", stream: s, cursor: { fetched_at: nowIso() } });
715
924
  }
716
925
  }
717
- for (const s of [
718
- "history",
719
- "session_index",
720
- "logs",
721
- "shell_snapshots",
722
- "config_inventory",
723
- "cache_inventory",
724
- "coverage_diagnostics",
725
- ]) {
726
- if (requested.has(s)) {
727
- emit({ type: "STATE", stream: s, cursor: { fetched_at: nowIso() } });
728
- }
926
+ if (requested.has("coverage_diagnostics")) {
927
+ emit({ type: "STATE", stream: "coverage_diagnostics", cursor: { fetched_at: nowIso() } });
729
928
  }
730
929
  }
731
930
  function readPriorSessionsSourceMtimeMs(startMsg) {
@@ -736,6 +935,52 @@ function readPriorSessionsSourceMtimeMs(startMsg) {
736
935
  : null;
737
936
  return typeof value === "number" && Number.isFinite(value) ? value : null;
738
937
  }
938
+ function nullableFiniteNumber(value) {
939
+ return typeof value === "number" && Number.isFinite(value) ? value : null;
940
+ }
941
+ function coerceFingerprintEntry(value) {
942
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
943
+ return null;
944
+ }
945
+ const v = value;
946
+ return {
947
+ updated_at: nullableFiniteNumber(v.updated_at),
948
+ message_count: nullableFiniteNumber(v.message_count),
949
+ function_call_count: nullableFiniteNumber(v.function_call_count),
950
+ };
951
+ }
952
+ function rawFingerprintMap(startMsg) {
953
+ if (!startMsg || typeof startMsg !== "object") {
954
+ return null;
955
+ }
956
+ const state = startMsg.state;
957
+ if (!state || typeof state !== "object") {
958
+ return null;
959
+ }
960
+ const sessions = state.sessions;
961
+ if (!sessions || typeof sessions !== "object" || Array.isArray(sessions)) {
962
+ return null;
963
+ }
964
+ const raw = sessions.thread_fingerprints;
965
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
966
+ return null;
967
+ }
968
+ return raw;
969
+ }
970
+ export function readPriorThreadFingerprints(startMsg) {
971
+ const out = new Map();
972
+ const raw = rawFingerprintMap(startMsg);
973
+ if (!raw) {
974
+ return out;
975
+ }
976
+ for (const [id, value] of Object.entries(raw)) {
977
+ const entry = coerceFingerprintEntry(value);
978
+ if (entry) {
979
+ out.set(id, entry);
980
+ }
981
+ }
982
+ return out;
983
+ }
739
984
  function fileMtimeMs(path) {
740
985
  try {
741
986
  return statSync(path).mtimeMs;
@@ -744,43 +989,61 @@ function fileMtimeMs(path) {
744
989
  return 0;
745
990
  }
746
991
  }
747
- async function emitLocalInventoryStreams(input) {
748
- const inventory = await buildLocalSourceInventory("codex", input.codexHome, CODEX_KNOWN_LOCAL_STORES);
749
- for (const [stream, records] of inventory.recordsByStream) {
750
- if (!input.requested.has(stream)) {
751
- continue;
752
- }
753
- for (const record of records) {
754
- input.emitRecord(stream, record);
992
+ async function emitCoverageDiagnostics(input) {
993
+ if (!input.requested.has("coverage_diagnostics")) {
994
+ return;
995
+ }
996
+ for (const record of input.inventory.coverage) {
997
+ input.emitRecord("coverage_diagnostics", record);
998
+ await waitForEmitDrain();
999
+ }
1000
+ }
1001
+ async function emitGatedInventoryStream(input) {
1002
+ const cursor = openInventoryFingerprintCursor(input.priorState);
1003
+ for (const record of input.records) {
1004
+ if (cursor.shouldEmit(record)) {
1005
+ input.emitRecord(input.stream, record);
755
1006
  await waitForEmitDrain();
756
1007
  }
757
1008
  }
758
- for (const directoryStream of [
759
- {
760
- relativeRoot: "shell-snapshots",
761
- store: "shell_snapshots",
762
- stream: "shell_snapshots",
763
- reason: "shell content requires redaction review before payload collection",
764
- },
765
- ]) {
766
- if (!input.requested.has(directoryStream.stream)) {
1009
+ cursor.pruneStale();
1010
+ const inventoryCursor = { fetched_at: input.nowIso() };
1011
+ if (cursor.size() > 0) {
1012
+ inventoryCursor.fingerprints = cursor.toState();
1013
+ }
1014
+ emit({ type: "STATE", stream: input.stream, cursor: inventoryCursor });
1015
+ await waitForEmitDrain();
1016
+ }
1017
+ export const CODEX_GATED_INVENTORY_STREAMS = [
1018
+ "history",
1019
+ "session_index",
1020
+ "shell_snapshots",
1021
+ "config_inventory",
1022
+ "cache_inventory",
1023
+ "logs",
1024
+ ];
1025
+ async function emitLocalInventoryStreams(input) {
1026
+ for (const stream of CODEX_GATED_INVENTORY_STREAMS) {
1027
+ if (!input.requested.has(stream)) {
767
1028
  continue;
768
1029
  }
769
- const records = await listDirectoryInventory({
770
- tool: "codex",
771
- sourceHome: input.codexHome,
772
- ...directoryStream,
1030
+ const records = stream === "shell_snapshots"
1031
+ ? await listDirectoryInventory({
1032
+ tool: "codex",
1033
+ sourceHome: input.codexHome,
1034
+ relativeRoot: "shell-snapshots",
1035
+ store: "shell_snapshots",
1036
+ stream: "shell_snapshots",
1037
+ reason: "shell content requires redaction review before payload collection",
1038
+ })
1039
+ : (input.inventory.recordsByStream.get(stream) ?? []);
1040
+ await emitGatedInventoryStream({
1041
+ emitRecord: input.emitRecord,
1042
+ nowIso: input.nowIso,
1043
+ priorState: input.state[stream],
1044
+ records,
1045
+ stream,
773
1046
  });
774
- for (const record of records) {
775
- input.emitRecord(directoryStream.stream, record);
776
- await waitForEmitDrain();
777
- }
778
- }
779
- if (input.requested.has("coverage_diagnostics")) {
780
- for (const record of inventory.coverage) {
781
- input.emitRecord("coverage_diagnostics", record);
782
- await waitForEmitDrain();
783
- }
784
1047
  }
785
1048
  }
786
1049
  async function main() {
@@ -794,8 +1057,8 @@ async function main() {
794
1057
  }
795
1058
  const resFilters = buildResourceFilters(requested);
796
1059
  const dirs = resolveCodexDirs();
797
- await assertRequestedCodexSources(dirs, requested);
798
1060
  const fileMtimes = readFileMtimes(startMsg);
1061
+ const fileCursors = readPriorFileCursors(startMsg);
799
1062
  let total = 0;
800
1063
  const nowIso = () => new Date().toISOString();
801
1064
  const emittedAt = nowIso();
@@ -830,15 +1093,29 @@ async function main() {
830
1093
  const needRollouts = requested.has("sessions") || requested.has("messages") || requested.has("function_calls");
831
1094
  const rolloutAggregates = new Map();
832
1095
  const newMtimes = { ...fileMtimes };
1096
+ const newFileCursors = {};
833
1097
  const scanStartedAtMs = Date.now();
834
1098
  const sessionsSourceMtimeMs = fileMtimeMs(dirs.stateDbPath);
835
1099
  let parsedRolloutFiles = 0;
836
- await emitLocalInventoryStreams({ codexHome: dirs.codexHome, requested, emitRecord });
1100
+ const threadFingerprints = openCarryForwardCursor(readPriorThreadFingerprints(startMsg));
1101
+ const inventory = await buildLocalSourceInventory("codex", dirs.codexHome, CODEX_KNOWN_LOCAL_STORES);
1102
+ await emitCoverageDiagnostics({ emitRecord, inventory, requested });
1103
+ await assertRequestedCodexSources(dirs, requested);
1104
+ await emitLocalInventoryStreams({
1105
+ codexHome: dirs.codexHome,
1106
+ emitRecord,
1107
+ inventory,
1108
+ nowIso,
1109
+ requested,
1110
+ state: startMsg.state || {},
1111
+ });
837
1112
  if (needRollouts) {
838
1113
  const rolloutScan = await scanRollouts({
839
1114
  activeQuietMs: resolveActiveRolloutQuietMs(),
840
1115
  baseDir: dirs.baseDir,
1116
+ fileCursors,
841
1117
  fileMtimes,
1118
+ newFileCursors,
842
1119
  newMtimes,
843
1120
  requested,
844
1121
  emitRecord,
@@ -849,7 +1126,12 @@ async function main() {
849
1126
  }
850
1127
  if (requested.has("sessions") &&
851
1128
  (parsedRolloutFiles > 0 || readPriorSessionsSourceMtimeMs(startMsg) !== sessionsSourceMtimeMs)) {
852
- emitSessions({ stateDbPath: dirs.stateDbPath, rolloutAggregates, emitRecord });
1129
+ emitSessions({
1130
+ stateDbPath: dirs.stateDbPath,
1131
+ rolloutAggregates,
1132
+ emitRecord,
1133
+ cursor: threadFingerprints,
1134
+ });
853
1135
  await waitForEmitDrain();
854
1136
  }
855
1137
  if (requested.has("rules")) {
@@ -861,7 +1143,7 @@ async function main() {
861
1143
  if (requested.has("skills")) {
862
1144
  await emitSkillsStream(dirs.skillsDir, emitRecord);
863
1145
  }
864
- emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs });
1146
+ emitStateCursors({ requested, newFileCursors, newMtimes, nowIso, sessionsSourceMtimeMs, threadFingerprints });
865
1147
  await waitForEmitDrain();
866
1148
  emit({ type: "DONE", status: "succeeded", records_emitted: total });
867
1149
  flushAndExit(0);