@pdpp/local-collector 0.1.0-beta.7 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/dist/local-collector/bin/pdpp-local-collector.js +580 -22
- package/dist/local-collector/src/runner.d.ts +1 -1
- package/dist/local-collector/src/runner.js +15 -1
- package/dist/polyfill-connectors/connectors/claude_code/index.js +60 -37
- package/dist/polyfill-connectors/connectors/codex/index.js +390 -108
- package/dist/polyfill-connectors/connectors/codex/parsers.js +5 -3
- package/dist/polyfill-connectors/src/bounded-file-preview.js +76 -0
- package/dist/polyfill-connectors/src/browser-handoff.js +38 -5
- package/dist/polyfill-connectors/src/collector-build-info.d.ts +8 -0
- package/dist/polyfill-connectors/src/collector-build-info.js +10 -0
- package/dist/polyfill-connectors/src/collector-runner.d.ts +54 -0
- package/dist/polyfill-connectors/src/collector-runner.js +250 -18
- package/dist/polyfill-connectors/src/connector-exit.js +62 -0
- package/dist/polyfill-connectors/src/connector-runtime-protocol.d.ts +41 -21
- package/dist/polyfill-connectors/src/connector-runtime.js +241 -30
- package/dist/polyfill-connectors/src/fingerprint-cursor.js +107 -0
- package/dist/polyfill-connectors/src/local-device-client.d.ts +17 -0
- package/dist/polyfill-connectors/src/local-device-client.js +69 -9
- package/dist/polyfill-connectors/src/local-device-outbox.d.ts +59 -0
- package/dist/polyfill-connectors/src/local-device-outbox.js +394 -5
- package/dist/polyfill-connectors/src/local-source-inventory.js +8 -1
- package/dist/polyfill-connectors/src/runner/index.d.ts +4 -3
- package/dist/polyfill-connectors/src/runner/index.js +4 -3
- package/dist/polyfill-connectors/src/safe-text-preview.js +13 -0
- package/dist/polyfill-connectors/src/static-secret-injection.js +151 -0
- package/package.json +2 -2
|
@@ -1,18 +1,22 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { createHash } from "node:crypto";
|
|
2
3
|
import { createReadStream, existsSync, statSync } from "node:fs";
|
|
3
4
|
import { readdir, readFile, stat } from "node:fs/promises";
|
|
4
5
|
import { homedir } from "node:os";
|
|
5
6
|
import { join } from "node:path";
|
|
6
|
-
import { createInterface
|
|
7
|
+
import { createInterface } from "node:readline";
|
|
7
8
|
import { DatabaseSync } from "node:sqlite";
|
|
9
|
+
import { flushAndExitAfterRuntimeAck } from "../../src/connector-exit.js";
|
|
10
|
+
import { openCarryForwardCursor } from "../../src/fingerprint-cursor.js";
|
|
8
11
|
import { isMainModule } from "../../src/is-main-module.js";
|
|
9
|
-
import { buildLocalSourceInventory, listDirectoryInventory, } from "../../src/local-source-inventory.js";
|
|
12
|
+
import { buildLocalSourceInventory, listDirectoryInventory, openInventoryFingerprintCursor, } from "../../src/local-source-inventory.js";
|
|
10
13
|
import { stringifyForJsonl } from "../../src/safe-emit.js";
|
|
11
14
|
import { resourceSet } from "../../src/scope-filters.js";
|
|
12
15
|
import { buildPromptRecord, buildRolloutOnlySessionRecord, buildRuleRecord, buildSkillRecord, buildThreadSessionRecord, extendTimestampRange, extractMessageText, isRolloutFile, isSkippableRulesLine, parseFrontmatter, payloadOutputPreview, RULES_SUFFIX_RE, splitRulesLines, TWO_DIGIT_DIR_RE, textPreview, YEAR_DIR_RE, } from "./parsers.js";
|
|
13
16
|
import { validateRecord } from "./schemas.js";
|
|
14
17
|
const DEFAULT_ACTIVE_ROLLOUT_QUIET_MS = 120_000;
|
|
15
18
|
const ACTIVE_ROLLOUT_QUIET_MS_ENV = "PDPP_CODEX_ACTIVE_ROLLOUT_QUIET_MS";
|
|
19
|
+
const GUARD_PREFIX_BYTES = 64 * 1024;
|
|
16
20
|
let stdoutDrainPromise = null;
|
|
17
21
|
const emit = (m) => {
|
|
18
22
|
const ok = process.stdout.write(stringifyForJsonl(m));
|
|
@@ -31,13 +35,7 @@ async function waitForEmitDrain() {
|
|
|
31
35
|
}
|
|
32
36
|
}
|
|
33
37
|
const flushAndExit = (code) => {
|
|
34
|
-
|
|
35
|
-
process.stdout.once("drain", () => process.exit(code));
|
|
36
|
-
setTimeout(() => process.exit(code), 3000).unref();
|
|
37
|
-
}
|
|
38
|
-
else {
|
|
39
|
-
process.exit(code);
|
|
40
|
-
}
|
|
38
|
+
flushAndExitAfterRuntimeAck(code);
|
|
41
39
|
};
|
|
42
40
|
const fail = (m, r = false) => {
|
|
43
41
|
emit({
|
|
@@ -148,22 +146,55 @@ export const CODEX_KNOWN_LOCAL_STORES = [
|
|
|
148
146
|
reason: "auth-adjacent credential material is never emitted",
|
|
149
147
|
},
|
|
150
148
|
];
|
|
151
|
-
async function*
|
|
152
|
-
const
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
149
|
+
export async function* iterJsonlLinesFromOffset(path, startOffset) {
|
|
150
|
+
const stream = createReadStream(path, { start: startOffset });
|
|
151
|
+
let pending = Buffer.alloc(0);
|
|
152
|
+
let committed = startOffset;
|
|
153
|
+
for await (const chunk of stream) {
|
|
154
|
+
const buf = chunk;
|
|
155
|
+
pending = pending.length === 0 ? buf : Buffer.concat([pending, buf]);
|
|
156
|
+
let nl = pending.indexOf(0x0a);
|
|
157
|
+
while (nl !== -1) {
|
|
158
|
+
const lineBuf = pending.subarray(0, nl);
|
|
159
|
+
committed += nl + 1;
|
|
160
|
+
const line = lineBuf.toString("utf8");
|
|
161
|
+
const trimmed = line.trim();
|
|
162
|
+
if (trimmed) {
|
|
163
|
+
let parsed = null;
|
|
164
|
+
try {
|
|
165
|
+
parsed = JSON.parse(line);
|
|
166
|
+
}
|
|
167
|
+
catch {
|
|
168
|
+
parsed = null;
|
|
169
|
+
}
|
|
170
|
+
if (parsed) {
|
|
171
|
+
yield { obj: parsed, committedOffset: committed };
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
pending = pending.subarray(nl + 1);
|
|
175
|
+
nl = pending.indexOf(0x0a);
|
|
164
176
|
}
|
|
165
177
|
}
|
|
166
178
|
}
|
|
179
|
+
async function hashFilePrefix(path, guardBytes) {
|
|
180
|
+
if (guardBytes <= 0) {
|
|
181
|
+
return createHash("sha256").update(Buffer.alloc(0)).digest("hex");
|
|
182
|
+
}
|
|
183
|
+
return await new Promise((resolve) => {
|
|
184
|
+
const hash = createHash("sha256");
|
|
185
|
+
let read = 0;
|
|
186
|
+
const stream = createReadStream(path, { start: 0, end: guardBytes - 1 });
|
|
187
|
+
stream.on("data", (chunk) => {
|
|
188
|
+
const buf = chunk;
|
|
189
|
+
read += buf.length;
|
|
190
|
+
hash.update(buf);
|
|
191
|
+
});
|
|
192
|
+
stream.on("error", () => resolve(null));
|
|
193
|
+
stream.on("end", () => {
|
|
194
|
+
resolve(read >= guardBytes ? hash.digest("hex") : null);
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
}
|
|
167
198
|
async function listIfExists(dir) {
|
|
168
199
|
try {
|
|
169
200
|
return await readdir(dir);
|
|
@@ -231,11 +262,10 @@ function openThreadsDb(dbPath) {
|
|
|
231
262
|
try {
|
|
232
263
|
return new DatabaseSync(dbPath, { readOnly: true });
|
|
233
264
|
}
|
|
234
|
-
catch
|
|
235
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
265
|
+
catch {
|
|
236
266
|
emit({
|
|
237
267
|
type: "PROGRESS",
|
|
238
|
-
message:
|
|
268
|
+
message: "Codex phase=index pass=index state_db_readable=false fallback=rollouts_only",
|
|
239
269
|
});
|
|
240
270
|
return null;
|
|
241
271
|
}
|
|
@@ -245,11 +275,10 @@ function queryThreadsRows(db) {
|
|
|
245
275
|
const rawRows = db.prepare(THREADS_QUERY).all();
|
|
246
276
|
return rawRows;
|
|
247
277
|
}
|
|
248
|
-
catch
|
|
249
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
278
|
+
catch {
|
|
250
279
|
emit({
|
|
251
280
|
type: "PROGRESS",
|
|
252
|
-
message:
|
|
281
|
+
message: "Codex phase=index pass=index state_db_query_failed=true fallback=rollouts_only",
|
|
253
282
|
});
|
|
254
283
|
return [];
|
|
255
284
|
}
|
|
@@ -372,16 +401,16 @@ async function emitSkillsStream(skillsDir, emitRecord) {
|
|
|
372
401
|
await waitForEmitDrain();
|
|
373
402
|
}
|
|
374
403
|
}
|
|
375
|
-
export function makeRolloutParseState() {
|
|
404
|
+
export function makeRolloutParseState(seed) {
|
|
376
405
|
return {
|
|
377
|
-
sessionId: null,
|
|
406
|
+
sessionId: seed?.sessionId ?? null,
|
|
378
407
|
sessionMeta: null,
|
|
379
|
-
firstTimestamp: null,
|
|
380
|
-
lastTimestamp: null,
|
|
381
|
-
messageCount: 0,
|
|
382
|
-
functionCallCount: 0,
|
|
408
|
+
firstTimestamp: seed?.firstTimestamp ?? null,
|
|
409
|
+
lastTimestamp: seed?.lastTimestamp ?? null,
|
|
410
|
+
messageCount: seed?.messageCount ?? 0,
|
|
411
|
+
functionCallCount: seed?.functionCallCount ?? 0,
|
|
383
412
|
pendingCalls: new Map(),
|
|
384
|
-
lineCount: 0,
|
|
413
|
+
lineCount: seed?.lineCount ?? 0,
|
|
385
414
|
};
|
|
386
415
|
}
|
|
387
416
|
function emitMessageRecord(state, payload, ts, emitRecord) {
|
|
@@ -428,6 +457,10 @@ function applyFunctionCallOutput(state, payload, ts, emitRecord) {
|
|
|
428
457
|
if (previewResult.binaryReason) {
|
|
429
458
|
existing.output_binary_reason = previewResult.binaryReason;
|
|
430
459
|
}
|
|
460
|
+
if (callId) {
|
|
461
|
+
state.pendingCalls.delete(callId);
|
|
462
|
+
}
|
|
463
|
+
emitRecord("function_calls", { ...existing });
|
|
431
464
|
return;
|
|
432
465
|
}
|
|
433
466
|
emitRecord("function_calls", {
|
|
@@ -464,10 +497,10 @@ const PROGRESS_EVERY = 2000;
|
|
|
464
497
|
export function shouldDeferActiveRolloutFile(input) {
|
|
465
498
|
return input.quietMs > 0 && input.mtimeMs > input.nowMs - input.quietMs;
|
|
466
499
|
}
|
|
467
|
-
export function processRolloutLine({ deps,
|
|
500
|
+
export function processRolloutLine({ deps, obj, state }) {
|
|
468
501
|
state.lineCount++;
|
|
469
502
|
if (state.lineCount % PROGRESS_EVERY === 0) {
|
|
470
|
-
deps.progress(`
|
|
503
|
+
deps.progress(`Codex phase=emit pass=emit lines_parsed=${state.lineCount}`);
|
|
471
504
|
}
|
|
472
505
|
const ts = obj.timestamp || null;
|
|
473
506
|
const range = { firstTs: state.firstTimestamp, lastTs: state.lastTimestamp };
|
|
@@ -475,8 +508,10 @@ export function processRolloutLine({ deps, file, obj, state }) {
|
|
|
475
508
|
state.firstTimestamp = range.firstTs;
|
|
476
509
|
state.lastTimestamp = range.lastTs;
|
|
477
510
|
if (obj.type === "session_meta") {
|
|
478
|
-
state.
|
|
479
|
-
|
|
511
|
+
if (state.sessionId === null) {
|
|
512
|
+
state.sessionMeta = obj.payload || {};
|
|
513
|
+
state.sessionId = state.sessionMeta.id || null;
|
|
514
|
+
}
|
|
480
515
|
return;
|
|
481
516
|
}
|
|
482
517
|
if (!state.sessionId) {
|
|
@@ -496,12 +531,42 @@ export function flushPendingCalls(state, deps) {
|
|
|
496
531
|
for (const call of state.pendingCalls.values()) {
|
|
497
532
|
deps.emitRecord("function_calls", { ...call });
|
|
498
533
|
}
|
|
534
|
+
state.pendingCalls.clear();
|
|
499
535
|
}
|
|
500
|
-
export function
|
|
536
|
+
export function shouldReemitThreadSession(thread, agg, priorFingerprint) {
|
|
537
|
+
if (!priorFingerprint) {
|
|
538
|
+
return true;
|
|
539
|
+
}
|
|
540
|
+
if (agg) {
|
|
541
|
+
return true;
|
|
542
|
+
}
|
|
543
|
+
const priorUpdatedAt = priorFingerprint.updated_at ?? null;
|
|
544
|
+
const currentUpdatedAt = thread.updated_at ?? null;
|
|
545
|
+
if (currentUpdatedAt == null) {
|
|
546
|
+
return priorUpdatedAt != null;
|
|
547
|
+
}
|
|
548
|
+
if (priorUpdatedAt == null) {
|
|
549
|
+
return true;
|
|
550
|
+
}
|
|
551
|
+
return currentUpdatedAt > priorUpdatedAt;
|
|
552
|
+
}
|
|
553
|
+
function makeThreadFingerprint(thread, agg, priorFingerprint) {
|
|
554
|
+
return {
|
|
555
|
+
updated_at: thread.updated_at ?? null,
|
|
556
|
+
message_count: agg?.messageCount ?? priorFingerprint?.message_count ?? null,
|
|
557
|
+
function_call_count: agg?.functionCallCount ?? priorFingerprint?.function_call_count ?? null,
|
|
558
|
+
};
|
|
559
|
+
}
|
|
560
|
+
export function emitSessionsFromMaps({ threadsMap, rolloutAggregates, emitRecord, cursor, }) {
|
|
501
561
|
const emittedSessionIds = new Set();
|
|
502
562
|
for (const [id, t] of threadsMap) {
|
|
503
|
-
emitRecord("sessions", buildThreadSessionRecord(id, t, rolloutAggregates.get(id)));
|
|
504
563
|
emittedSessionIds.add(id);
|
|
564
|
+
const agg = rolloutAggregates.get(id);
|
|
565
|
+
const prior = cursor?.prior(id);
|
|
566
|
+
if (shouldReemitThreadSession(t, agg, prior)) {
|
|
567
|
+
emitRecord("sessions", buildThreadSessionRecord(id, t, agg, prior));
|
|
568
|
+
}
|
|
569
|
+
cursor?.note(id, makeThreadFingerprint(t, agg, prior));
|
|
505
570
|
}
|
|
506
571
|
for (const [id, agg] of rolloutAggregates) {
|
|
507
572
|
if (emittedSessionIds.has(id)) {
|
|
@@ -511,7 +576,7 @@ export function emitSessionsFromMaps({ threadsMap, rolloutAggregates, emitRecord
|
|
|
511
576
|
}
|
|
512
577
|
}
|
|
513
578
|
async function parseRolloutFile(args) {
|
|
514
|
-
const state = makeRolloutParseState();
|
|
579
|
+
const state = makeRolloutParseState(args.seed);
|
|
515
580
|
const deps = {
|
|
516
581
|
emitRecord: args.emitRecord,
|
|
517
582
|
progress: (message) => {
|
|
@@ -519,8 +584,10 @@ async function parseRolloutFile(args) {
|
|
|
519
584
|
},
|
|
520
585
|
requested: args.requested,
|
|
521
586
|
};
|
|
522
|
-
|
|
587
|
+
let committedOffset = args.startOffset;
|
|
588
|
+
for await (const { obj, committedOffset: lineEnd } of iterJsonlLinesFromOffset(args.path, args.startOffset)) {
|
|
523
589
|
processRolloutLine({ obj, state, deps, file: args.file });
|
|
590
|
+
committedOffset = lineEnd;
|
|
524
591
|
await waitForEmitDrain();
|
|
525
592
|
}
|
|
526
593
|
flushPendingCalls(state, deps);
|
|
@@ -535,8 +602,84 @@ async function parseRolloutFile(args) {
|
|
|
535
602
|
rolloutPath: args.path,
|
|
536
603
|
});
|
|
537
604
|
}
|
|
605
|
+
return {
|
|
606
|
+
committedOffset,
|
|
607
|
+
sessionId: state.sessionId,
|
|
608
|
+
lineCount: state.lineCount,
|
|
609
|
+
messageCount: state.messageCount,
|
|
610
|
+
functionCallCount: state.functionCallCount,
|
|
611
|
+
firstTimestamp: state.firstTimestamp,
|
|
612
|
+
lastTimestamp: state.lastTimestamp,
|
|
613
|
+
};
|
|
614
|
+
}
|
|
615
|
+
export function decideRolloutAction(input) {
|
|
616
|
+
const { cursor, sizeBytes, mtimeMs } = input;
|
|
617
|
+
if (!cursor) {
|
|
618
|
+
return { kind: "full" };
|
|
619
|
+
}
|
|
620
|
+
if (sizeBytes === cursor.size_bytes && mtimeMs === cursor.mtime_ms) {
|
|
621
|
+
return { kind: "skip" };
|
|
622
|
+
}
|
|
623
|
+
if (sizeBytes < cursor.size_bytes || cursor.offset_bytes > sizeBytes || !input.guardMatches) {
|
|
624
|
+
return { kind: "unsafe_full" };
|
|
625
|
+
}
|
|
626
|
+
if (sizeBytes > cursor.size_bytes) {
|
|
627
|
+
return {
|
|
628
|
+
kind: "append",
|
|
629
|
+
startOffset: cursor.offset_bytes,
|
|
630
|
+
seed: {
|
|
631
|
+
sessionId: cursor.session_id,
|
|
632
|
+
lineCount: cursor.line_count,
|
|
633
|
+
messageCount: cursor.message_count,
|
|
634
|
+
functionCallCount: cursor.function_call_count,
|
|
635
|
+
firstTimestamp: cursor.first_ts,
|
|
636
|
+
lastTimestamp: cursor.last_ts,
|
|
637
|
+
},
|
|
638
|
+
};
|
|
639
|
+
}
|
|
640
|
+
return { kind: "skip" };
|
|
641
|
+
}
|
|
642
|
+
function carryFileCursorForward(args, path, mtime) {
|
|
643
|
+
const prior = args.fileCursors[path];
|
|
644
|
+
if (prior) {
|
|
645
|
+
args.newFileCursors[path] = prior;
|
|
646
|
+
}
|
|
647
|
+
args.newMtimes[path] = mtime;
|
|
538
648
|
}
|
|
539
|
-
async function
|
|
649
|
+
async function buildFileCursorAfterParse(path, result) {
|
|
650
|
+
const guardBytes = Math.min(result.committedOffset, GUARD_PREFIX_BYTES);
|
|
651
|
+
const head = (await hashFilePrefix(path, guardBytes)) ?? "";
|
|
652
|
+
let mtimeMs = 0;
|
|
653
|
+
try {
|
|
654
|
+
mtimeMs = statSync(path).mtimeMs;
|
|
655
|
+
}
|
|
656
|
+
catch {
|
|
657
|
+
mtimeMs = 0;
|
|
658
|
+
}
|
|
659
|
+
return {
|
|
660
|
+
mtime_ms: mtimeMs,
|
|
661
|
+
size_bytes: result.committedOffset,
|
|
662
|
+
offset_bytes: result.committedOffset,
|
|
663
|
+
line_count: result.lineCount,
|
|
664
|
+
head_sha256: head,
|
|
665
|
+
guard_bytes: guardBytes,
|
|
666
|
+
session_id: result.sessionId,
|
|
667
|
+
message_count: result.messageCount,
|
|
668
|
+
function_call_count: result.functionCallCount,
|
|
669
|
+
first_ts: result.firstTimestamp,
|
|
670
|
+
last_ts: result.lastTimestamp,
|
|
671
|
+
};
|
|
672
|
+
}
|
|
673
|
+
async function resolveRolloutAction(path, st, cursor) {
|
|
674
|
+
const sizeBytes = Number(st.size);
|
|
675
|
+
let guardMatches = false;
|
|
676
|
+
if (cursor && sizeBytes > cursor.size_bytes && cursor.offset_bytes <= sizeBytes) {
|
|
677
|
+
const head = await hashFilePrefix(path, cursor.guard_bytes);
|
|
678
|
+
guardMatches = head !== null && head === cursor.head_sha256;
|
|
679
|
+
}
|
|
680
|
+
return decideRolloutAction({ cursor, sizeBytes, mtimeMs: st.mtimeMs, guardMatches });
|
|
681
|
+
}
|
|
682
|
+
async function processRolloutEntry(entry, args, rolloutOrdinal) {
|
|
540
683
|
let st;
|
|
541
684
|
try {
|
|
542
685
|
st = statSync(entry.path);
|
|
@@ -545,30 +688,43 @@ async function processRolloutEntry(entry, args) {
|
|
|
545
688
|
return "missing";
|
|
546
689
|
}
|
|
547
690
|
const mtime = st.mtimeMs;
|
|
548
|
-
|
|
691
|
+
const cursor = args.fileCursors[entry.path];
|
|
692
|
+
if (!cursor && args.fileMtimes[entry.path] === mtime) {
|
|
549
693
|
args.newMtimes[entry.path] = mtime;
|
|
550
694
|
return "skipped";
|
|
551
695
|
}
|
|
696
|
+
const action = await resolveRolloutAction(entry.path, st, cursor);
|
|
697
|
+
if (action.kind === "skip") {
|
|
698
|
+
carryFileCursorForward(args, entry.path, mtime);
|
|
699
|
+
return "skipped";
|
|
700
|
+
}
|
|
552
701
|
if (shouldDeferActiveRolloutFile({ mtimeMs: mtime, nowMs: args.scanStartedAtMs, quietMs: args.activeQuietMs })) {
|
|
553
702
|
emit({
|
|
554
703
|
type: "PROGRESS",
|
|
555
|
-
message: `
|
|
704
|
+
message: `Codex phase=index pass=index item=${rolloutOrdinal} backpressure=active_rollout_deferred`,
|
|
556
705
|
});
|
|
557
706
|
await waitForEmitDrain();
|
|
707
|
+
if (cursor) {
|
|
708
|
+
args.newFileCursors[entry.path] = cursor;
|
|
709
|
+
}
|
|
558
710
|
return "skipped";
|
|
559
711
|
}
|
|
712
|
+
const isAppend = action.kind === "append";
|
|
560
713
|
emit({
|
|
561
714
|
type: "PROGRESS",
|
|
562
|
-
message: `
|
|
715
|
+
message: `Codex phase=emit pass=emit item=${rolloutOrdinal} mode=${isAppend ? "append" : "full"} file_size_mb=${(st.size / 1024 / 1024).toFixed(1)}`,
|
|
563
716
|
});
|
|
564
717
|
await waitForEmitDrain();
|
|
565
|
-
await parseRolloutFile({
|
|
718
|
+
const result = await parseRolloutFile({
|
|
566
719
|
path: entry.path,
|
|
567
720
|
file: entry.file,
|
|
568
721
|
requested: args.requested,
|
|
569
722
|
emitRecord: args.emitRecord,
|
|
570
723
|
rolloutAggregates: args.rolloutAggregates,
|
|
724
|
+
startOffset: isAppend ? action.startOffset : 0,
|
|
725
|
+
seed: isAppend ? action.seed : undefined,
|
|
571
726
|
});
|
|
727
|
+
args.newFileCursors[entry.path] = await buildFileCursorAfterParse(entry.path, result);
|
|
572
728
|
args.newMtimes[entry.path] = mtime;
|
|
573
729
|
return "parsed";
|
|
574
730
|
}
|
|
@@ -577,29 +733,34 @@ async function scanRollouts(args) {
|
|
|
577
733
|
if (!baseExists) {
|
|
578
734
|
emit({
|
|
579
735
|
type: "PROGRESS",
|
|
580
|
-
message:
|
|
736
|
+
message: "Codex phase=index pass=index sessions_dir_readable=false",
|
|
581
737
|
});
|
|
582
738
|
await waitForEmitDrain();
|
|
583
739
|
return { parsedFiles: 0 };
|
|
584
740
|
}
|
|
585
|
-
let
|
|
586
|
-
let
|
|
741
|
+
let totalRollouts = 0;
|
|
742
|
+
let parsedRollouts = 0;
|
|
587
743
|
for await (const entry of walkRollouts(args.baseDir)) {
|
|
588
|
-
|
|
589
|
-
if ((await processRolloutEntry(entry, args)) === "parsed") {
|
|
590
|
-
|
|
744
|
+
totalRollouts++;
|
|
745
|
+
if ((await processRolloutEntry(entry, args, totalRollouts)) === "parsed") {
|
|
746
|
+
parsedRollouts++;
|
|
591
747
|
}
|
|
592
748
|
}
|
|
593
749
|
emit({
|
|
594
750
|
type: "PROGRESS",
|
|
595
|
-
message: `
|
|
751
|
+
message: `Codex phase=index pass=index total_items=${totalRollouts} parsed_items=${parsedRollouts}`,
|
|
596
752
|
});
|
|
597
753
|
await waitForEmitDrain();
|
|
598
|
-
return { parsedFiles };
|
|
754
|
+
return { parsedFiles: parsedRollouts };
|
|
599
755
|
}
|
|
600
|
-
function emitSessions({ stateDbPath, rolloutAggregates, emitRecord }) {
|
|
756
|
+
function emitSessions({ stateDbPath, rolloutAggregates, emitRecord, cursor }) {
|
|
601
757
|
const { map: threadsById } = loadThreadsMap(stateDbPath);
|
|
602
|
-
emitSessionsFromMaps({
|
|
758
|
+
emitSessionsFromMaps({
|
|
759
|
+
threadsMap: threadsById,
|
|
760
|
+
rolloutAggregates,
|
|
761
|
+
emitRecord,
|
|
762
|
+
cursor,
|
|
763
|
+
});
|
|
603
764
|
}
|
|
604
765
|
async function readStartMessage() {
|
|
605
766
|
const rl = createInterface({ input: process.stdin, terminal: false });
|
|
@@ -631,6 +792,50 @@ function readFileMtimes(startMsg) {
|
|
|
631
792
|
state.file_mtimes ||
|
|
632
793
|
{});
|
|
633
794
|
}
|
|
795
|
+
function coerceRolloutFileCursor(value) {
|
|
796
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
797
|
+
return null;
|
|
798
|
+
}
|
|
799
|
+
const v = value;
|
|
800
|
+
const num = (x) => (typeof x === "number" && Number.isFinite(x) ? x : null);
|
|
801
|
+
const offset = num(v.offset_bytes);
|
|
802
|
+
const size = num(v.size_bytes);
|
|
803
|
+
const mtime = num(v.mtime_ms);
|
|
804
|
+
const line = num(v.line_count);
|
|
805
|
+
const guardBytes = num(v.guard_bytes);
|
|
806
|
+
const head = typeof v.head_sha256 === "string" ? v.head_sha256 : null;
|
|
807
|
+
if (offset === null || size === null || mtime === null || line === null || guardBytes === null || head === null) {
|
|
808
|
+
return null;
|
|
809
|
+
}
|
|
810
|
+
return {
|
|
811
|
+
mtime_ms: mtime,
|
|
812
|
+
size_bytes: size,
|
|
813
|
+
offset_bytes: offset,
|
|
814
|
+
line_count: line,
|
|
815
|
+
head_sha256: head,
|
|
816
|
+
guard_bytes: guardBytes,
|
|
817
|
+
session_id: typeof v.session_id === "string" ? v.session_id : null,
|
|
818
|
+
message_count: num(v.message_count) ?? 0,
|
|
819
|
+
function_call_count: num(v.function_call_count) ?? 0,
|
|
820
|
+
first_ts: typeof v.first_ts === "string" ? v.first_ts : null,
|
|
821
|
+
last_ts: typeof v.last_ts === "string" ? v.last_ts : null,
|
|
822
|
+
};
|
|
823
|
+
}
|
|
824
|
+
export function readPriorFileCursors(startMsg) {
|
|
825
|
+
const state = startMsg.state || {};
|
|
826
|
+
const raw = state.messages?.file_cursors || state.function_calls?.file_cursors || state.sessions?.file_cursors || null;
|
|
827
|
+
const out = {};
|
|
828
|
+
if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
|
|
829
|
+
return out;
|
|
830
|
+
}
|
|
831
|
+
for (const [path, value] of Object.entries(raw)) {
|
|
832
|
+
const cursor = coerceRolloutFileCursor(value);
|
|
833
|
+
if (cursor) {
|
|
834
|
+
out[path] = cursor;
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
return out;
|
|
838
|
+
}
|
|
634
839
|
function resolveActiveRolloutQuietMs(env = process.env) {
|
|
635
840
|
const raw = env[ACTIVE_ROLLOUT_QUIET_MS_ENV];
|
|
636
841
|
if (!raw) {
|
|
@@ -693,12 +898,16 @@ async function assertRequestedCodexSources(dirs, requested) {
|
|
|
693
898
|
throw new Error(`requested Codex local source path(s) are missing or unreadable: ${missing.join(", ")}`);
|
|
694
899
|
}
|
|
695
900
|
}
|
|
696
|
-
function emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs }) {
|
|
901
|
+
function emitStateCursors({ requested, newFileCursors, newMtimes, nowIso, sessionsSourceMtimeMs, threadFingerprints, }) {
|
|
697
902
|
if (requested.has("sessions")) {
|
|
698
903
|
emit({
|
|
699
904
|
type: "STATE",
|
|
700
905
|
stream: "sessions",
|
|
701
|
-
cursor: {
|
|
906
|
+
cursor: {
|
|
907
|
+
fetched_at: nowIso(),
|
|
908
|
+
source_mtime_ms: sessionsSourceMtimeMs,
|
|
909
|
+
thread_fingerprints: threadFingerprints.toState(),
|
|
910
|
+
},
|
|
702
911
|
});
|
|
703
912
|
}
|
|
704
913
|
if (requested.has("messages") || requested.has("function_calls")) {
|
|
@@ -706,7 +915,7 @@ function emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs
|
|
|
706
915
|
emit({
|
|
707
916
|
type: "STATE",
|
|
708
917
|
stream: cursorStream,
|
|
709
|
-
cursor: { file_mtimes: newMtimes, fetched_at: nowIso() },
|
|
918
|
+
cursor: { file_mtimes: newMtimes, file_cursors: newFileCursors, fetched_at: nowIso() },
|
|
710
919
|
});
|
|
711
920
|
}
|
|
712
921
|
for (const s of ["rules", "prompts", "skills"]) {
|
|
@@ -714,18 +923,8 @@ function emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs
|
|
|
714
923
|
emit({ type: "STATE", stream: s, cursor: { fetched_at: nowIso() } });
|
|
715
924
|
}
|
|
716
925
|
}
|
|
717
|
-
|
|
718
|
-
"
|
|
719
|
-
"session_index",
|
|
720
|
-
"logs",
|
|
721
|
-
"shell_snapshots",
|
|
722
|
-
"config_inventory",
|
|
723
|
-
"cache_inventory",
|
|
724
|
-
"coverage_diagnostics",
|
|
725
|
-
]) {
|
|
726
|
-
if (requested.has(s)) {
|
|
727
|
-
emit({ type: "STATE", stream: s, cursor: { fetched_at: nowIso() } });
|
|
728
|
-
}
|
|
926
|
+
if (requested.has("coverage_diagnostics")) {
|
|
927
|
+
emit({ type: "STATE", stream: "coverage_diagnostics", cursor: { fetched_at: nowIso() } });
|
|
729
928
|
}
|
|
730
929
|
}
|
|
731
930
|
function readPriorSessionsSourceMtimeMs(startMsg) {
|
|
@@ -736,6 +935,52 @@ function readPriorSessionsSourceMtimeMs(startMsg) {
|
|
|
736
935
|
: null;
|
|
737
936
|
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
738
937
|
}
|
|
938
|
+
function nullableFiniteNumber(value) {
|
|
939
|
+
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
940
|
+
}
|
|
941
|
+
function coerceFingerprintEntry(value) {
|
|
942
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
943
|
+
return null;
|
|
944
|
+
}
|
|
945
|
+
const v = value;
|
|
946
|
+
return {
|
|
947
|
+
updated_at: nullableFiniteNumber(v.updated_at),
|
|
948
|
+
message_count: nullableFiniteNumber(v.message_count),
|
|
949
|
+
function_call_count: nullableFiniteNumber(v.function_call_count),
|
|
950
|
+
};
|
|
951
|
+
}
|
|
952
|
+
function rawFingerprintMap(startMsg) {
|
|
953
|
+
if (!startMsg || typeof startMsg !== "object") {
|
|
954
|
+
return null;
|
|
955
|
+
}
|
|
956
|
+
const state = startMsg.state;
|
|
957
|
+
if (!state || typeof state !== "object") {
|
|
958
|
+
return null;
|
|
959
|
+
}
|
|
960
|
+
const sessions = state.sessions;
|
|
961
|
+
if (!sessions || typeof sessions !== "object" || Array.isArray(sessions)) {
|
|
962
|
+
return null;
|
|
963
|
+
}
|
|
964
|
+
const raw = sessions.thread_fingerprints;
|
|
965
|
+
if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
|
|
966
|
+
return null;
|
|
967
|
+
}
|
|
968
|
+
return raw;
|
|
969
|
+
}
|
|
970
|
+
export function readPriorThreadFingerprints(startMsg) {
|
|
971
|
+
const out = new Map();
|
|
972
|
+
const raw = rawFingerprintMap(startMsg);
|
|
973
|
+
if (!raw) {
|
|
974
|
+
return out;
|
|
975
|
+
}
|
|
976
|
+
for (const [id, value] of Object.entries(raw)) {
|
|
977
|
+
const entry = coerceFingerprintEntry(value);
|
|
978
|
+
if (entry) {
|
|
979
|
+
out.set(id, entry);
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
return out;
|
|
983
|
+
}
|
|
739
984
|
function fileMtimeMs(path) {
|
|
740
985
|
try {
|
|
741
986
|
return statSync(path).mtimeMs;
|
|
@@ -744,43 +989,61 @@ function fileMtimeMs(path) {
|
|
|
744
989
|
return 0;
|
|
745
990
|
}
|
|
746
991
|
}
|
|
747
|
-
async function
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
992
|
+
async function emitCoverageDiagnostics(input) {
|
|
993
|
+
if (!input.requested.has("coverage_diagnostics")) {
|
|
994
|
+
return;
|
|
995
|
+
}
|
|
996
|
+
for (const record of input.inventory.coverage) {
|
|
997
|
+
input.emitRecord("coverage_diagnostics", record);
|
|
998
|
+
await waitForEmitDrain();
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
async function emitGatedInventoryStream(input) {
|
|
1002
|
+
const cursor = openInventoryFingerprintCursor(input.priorState);
|
|
1003
|
+
for (const record of input.records) {
|
|
1004
|
+
if (cursor.shouldEmit(record)) {
|
|
1005
|
+
input.emitRecord(input.stream, record);
|
|
755
1006
|
await waitForEmitDrain();
|
|
756
1007
|
}
|
|
757
1008
|
}
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
1009
|
+
cursor.pruneStale();
|
|
1010
|
+
const inventoryCursor = { fetched_at: input.nowIso() };
|
|
1011
|
+
if (cursor.size() > 0) {
|
|
1012
|
+
inventoryCursor.fingerprints = cursor.toState();
|
|
1013
|
+
}
|
|
1014
|
+
emit({ type: "STATE", stream: input.stream, cursor: inventoryCursor });
|
|
1015
|
+
await waitForEmitDrain();
|
|
1016
|
+
}
|
|
1017
|
+
export const CODEX_GATED_INVENTORY_STREAMS = [
|
|
1018
|
+
"history",
|
|
1019
|
+
"session_index",
|
|
1020
|
+
"shell_snapshots",
|
|
1021
|
+
"config_inventory",
|
|
1022
|
+
"cache_inventory",
|
|
1023
|
+
"logs",
|
|
1024
|
+
];
|
|
1025
|
+
async function emitLocalInventoryStreams(input) {
|
|
1026
|
+
for (const stream of CODEX_GATED_INVENTORY_STREAMS) {
|
|
1027
|
+
if (!input.requested.has(stream)) {
|
|
767
1028
|
continue;
|
|
768
1029
|
}
|
|
769
|
-
const records =
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
1030
|
+
const records = stream === "shell_snapshots"
|
|
1031
|
+
? await listDirectoryInventory({
|
|
1032
|
+
tool: "codex",
|
|
1033
|
+
sourceHome: input.codexHome,
|
|
1034
|
+
relativeRoot: "shell-snapshots",
|
|
1035
|
+
store: "shell_snapshots",
|
|
1036
|
+
stream: "shell_snapshots",
|
|
1037
|
+
reason: "shell content requires redaction review before payload collection",
|
|
1038
|
+
})
|
|
1039
|
+
: (input.inventory.recordsByStream.get(stream) ?? []);
|
|
1040
|
+
await emitGatedInventoryStream({
|
|
1041
|
+
emitRecord: input.emitRecord,
|
|
1042
|
+
nowIso: input.nowIso,
|
|
1043
|
+
priorState: input.state[stream],
|
|
1044
|
+
records,
|
|
1045
|
+
stream,
|
|
773
1046
|
});
|
|
774
|
-
for (const record of records) {
|
|
775
|
-
input.emitRecord(directoryStream.stream, record);
|
|
776
|
-
await waitForEmitDrain();
|
|
777
|
-
}
|
|
778
|
-
}
|
|
779
|
-
if (input.requested.has("coverage_diagnostics")) {
|
|
780
|
-
for (const record of inventory.coverage) {
|
|
781
|
-
input.emitRecord("coverage_diagnostics", record);
|
|
782
|
-
await waitForEmitDrain();
|
|
783
|
-
}
|
|
784
1047
|
}
|
|
785
1048
|
}
|
|
786
1049
|
async function main() {
|
|
@@ -794,8 +1057,8 @@ async function main() {
|
|
|
794
1057
|
}
|
|
795
1058
|
const resFilters = buildResourceFilters(requested);
|
|
796
1059
|
const dirs = resolveCodexDirs();
|
|
797
|
-
await assertRequestedCodexSources(dirs, requested);
|
|
798
1060
|
const fileMtimes = readFileMtimes(startMsg);
|
|
1061
|
+
const fileCursors = readPriorFileCursors(startMsg);
|
|
799
1062
|
let total = 0;
|
|
800
1063
|
const nowIso = () => new Date().toISOString();
|
|
801
1064
|
const emittedAt = nowIso();
|
|
@@ -830,15 +1093,29 @@ async function main() {
|
|
|
830
1093
|
const needRollouts = requested.has("sessions") || requested.has("messages") || requested.has("function_calls");
|
|
831
1094
|
const rolloutAggregates = new Map();
|
|
832
1095
|
const newMtimes = { ...fileMtimes };
|
|
1096
|
+
const newFileCursors = {};
|
|
833
1097
|
const scanStartedAtMs = Date.now();
|
|
834
1098
|
const sessionsSourceMtimeMs = fileMtimeMs(dirs.stateDbPath);
|
|
835
1099
|
let parsedRolloutFiles = 0;
|
|
836
|
-
|
|
1100
|
+
const threadFingerprints = openCarryForwardCursor(readPriorThreadFingerprints(startMsg));
|
|
1101
|
+
const inventory = await buildLocalSourceInventory("codex", dirs.codexHome, CODEX_KNOWN_LOCAL_STORES);
|
|
1102
|
+
await emitCoverageDiagnostics({ emitRecord, inventory, requested });
|
|
1103
|
+
await assertRequestedCodexSources(dirs, requested);
|
|
1104
|
+
await emitLocalInventoryStreams({
|
|
1105
|
+
codexHome: dirs.codexHome,
|
|
1106
|
+
emitRecord,
|
|
1107
|
+
inventory,
|
|
1108
|
+
nowIso,
|
|
1109
|
+
requested,
|
|
1110
|
+
state: startMsg.state || {},
|
|
1111
|
+
});
|
|
837
1112
|
if (needRollouts) {
|
|
838
1113
|
const rolloutScan = await scanRollouts({
|
|
839
1114
|
activeQuietMs: resolveActiveRolloutQuietMs(),
|
|
840
1115
|
baseDir: dirs.baseDir,
|
|
1116
|
+
fileCursors,
|
|
841
1117
|
fileMtimes,
|
|
1118
|
+
newFileCursors,
|
|
842
1119
|
newMtimes,
|
|
843
1120
|
requested,
|
|
844
1121
|
emitRecord,
|
|
@@ -849,7 +1126,12 @@ async function main() {
|
|
|
849
1126
|
}
|
|
850
1127
|
if (requested.has("sessions") &&
|
|
851
1128
|
(parsedRolloutFiles > 0 || readPriorSessionsSourceMtimeMs(startMsg) !== sessionsSourceMtimeMs)) {
|
|
852
|
-
emitSessions({
|
|
1129
|
+
emitSessions({
|
|
1130
|
+
stateDbPath: dirs.stateDbPath,
|
|
1131
|
+
rolloutAggregates,
|
|
1132
|
+
emitRecord,
|
|
1133
|
+
cursor: threadFingerprints,
|
|
1134
|
+
});
|
|
853
1135
|
await waitForEmitDrain();
|
|
854
1136
|
}
|
|
855
1137
|
if (requested.has("rules")) {
|
|
@@ -861,7 +1143,7 @@ async function main() {
|
|
|
861
1143
|
if (requested.has("skills")) {
|
|
862
1144
|
await emitSkillsStream(dirs.skillsDir, emitRecord);
|
|
863
1145
|
}
|
|
864
|
-
emitStateCursors({ requested, newMtimes, nowIso, sessionsSourceMtimeMs });
|
|
1146
|
+
emitStateCursors({ requested, newFileCursors, newMtimes, nowIso, sessionsSourceMtimeMs, threadFingerprints });
|
|
865
1147
|
await waitForEmitDrain();
|
|
866
1148
|
emit({ type: "DONE", status: "succeeded", records_emitted: total });
|
|
867
1149
|
flushAndExit(0);
|