@pdpp/local-collector 0.1.0-beta.7 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/README.md +8 -8
  2. package/dist/local-collector/bin/pdpp-local-collector.js +580 -22
  3. package/dist/local-collector/src/runner.d.ts +1 -1
  4. package/dist/local-collector/src/runner.js +15 -1
  5. package/dist/polyfill-connectors/connectors/claude_code/index.js +60 -37
  6. package/dist/polyfill-connectors/connectors/codex/index.js +390 -108
  7. package/dist/polyfill-connectors/connectors/codex/parsers.js +5 -3
  8. package/dist/polyfill-connectors/src/bounded-file-preview.js +76 -0
  9. package/dist/polyfill-connectors/src/browser-handoff.js +38 -5
  10. package/dist/polyfill-connectors/src/collector-build-info.d.ts +8 -0
  11. package/dist/polyfill-connectors/src/collector-build-info.js +10 -0
  12. package/dist/polyfill-connectors/src/collector-runner.d.ts +54 -0
  13. package/dist/polyfill-connectors/src/collector-runner.js +250 -18
  14. package/dist/polyfill-connectors/src/connector-exit.js +62 -0
  15. package/dist/polyfill-connectors/src/connector-runtime-protocol.d.ts +41 -21
  16. package/dist/polyfill-connectors/src/connector-runtime.js +241 -30
  17. package/dist/polyfill-connectors/src/fingerprint-cursor.js +107 -0
  18. package/dist/polyfill-connectors/src/local-device-client.d.ts +17 -0
  19. package/dist/polyfill-connectors/src/local-device-client.js +69 -9
  20. package/dist/polyfill-connectors/src/local-device-outbox.d.ts +59 -0
  21. package/dist/polyfill-connectors/src/local-device-outbox.js +394 -5
  22. package/dist/polyfill-connectors/src/local-source-inventory.js +8 -1
  23. package/dist/polyfill-connectors/src/runner/index.d.ts +4 -3
  24. package/dist/polyfill-connectors/src/runner/index.js +4 -3
  25. package/dist/polyfill-connectors/src/safe-text-preview.js +13 -0
  26. package/dist/polyfill-connectors/src/static-secret-injection.js +151 -0
  27. package/package.json +2 -2
@@ -1,5 +1,5 @@
1
1
  import { type RuntimeCapabilityProfile } from "../../polyfill-connectors/src/runner/index.js";
2
- export { buildCollectorStartMessage, COLLECTOR_PROTOCOL_VERSION, CollectorStateReadError, drainCollectorQueue, emitToStdout, enrollCollector, evaluatePlacement, isMainModule, LocalDeviceClient, LocalDeviceHttpError, LocalDeviceOutbox, LocalDeviceQueue, PROVIDER_RUNTIME_CAPABILITIES, RUNTIME_CAPABILITY_MISMATCH_CODE, RuntimeCapabilityMismatchError, assertPlacementOrThrow, buildLocalDeviceRecordEnvelope, buildLocalDeviceOutboxId, canonicalJson, diffRequiredBindings, hashCanonicalJson, parseJsonlLine, resourceSet, runCollectorConnector, stringifyForJsonl, transformRecordsToCollectorEnvelopes, type CollectorChildContext, type CollectorConnectorSpec, type CollectorEnrollmentConfig, type CollectorRunConfig, type CollectorRunResult, type ConnectorPlacementInput, type ConnectorRuntimeRequirements, type EmittedMessage, type EnrollmentExchangeResponse, type LocalDeviceRecordEnvelope, type BuildLocalDeviceOutboxIdInput, type LocalDeviceOutboxClaimInput, type LocalDeviceOutboxDeadLetterInput, type LocalDeviceOutboxEnqueueInput, type LocalDeviceOutboxFailInput, type LocalDeviceOutboxItem, type LocalDeviceOutboxKind, type LocalDeviceOutboxLeaseInput, type LocalDeviceOutboxOptions, type LocalDeviceOutboxStatus, type LocalDeviceOutboxSummary, type PlacementDecision, type RuntimeBindingName, type RuntimeCapabilityProfile, type StartMessage, type StreamScope, } from "../../polyfill-connectors/src/runner/index.js";
2
+ export { buildCollectorStartMessage, COLLECTOR_COVERAGE_STATUSES, COLLECTOR_PROTOCOL_VERSION, CollectorStateReadError, drainCollectorQueue, emitToStdout, enrollCollector, evaluatePlacement, isMainModule, LocalDeviceClient, LocalDeviceHttpError, LocalDeviceOutbox, LocalDeviceQueue, PROVIDER_RUNTIME_CAPABILITIES, RUNTIME_CAPABILITY_MISMATCH_CODE, RuntimeCapabilityMismatchError, assertPlacementOrThrow, buildLocalDeviceRecordEnvelope, buildLocalDeviceOutboxId, canonicalJson, classifyDeadLetterError, deriveLocalCollectorLifecycleState, diffRequiredBindings, hashCanonicalJson, LOCAL_COLLECTOR_LIFECYCLE_STATES, parseJsonlLine, resourceSet, runCollectorConnector, stringifyForJsonl, summarizeCollectorCompleteness, transformRecordsToCollectorEnvelopes, type CollectorChildContext, type CollectorCompletenessSummary, type CollectorConnectorSpec, type CollectorCoverageStatus, type CollectorEnrollmentConfig, type CollectorRunConfig, type CollectorRunResult, type ConnectorPlacementInput, type ConnectorRuntimeRequirements, type EmittedMessage, type EnrollmentExchangeResponse, type LocalCollectorLifecycleInput, type LocalCollectorLifecycleState, type LocalDeviceRecordEnvelope, type BuildLocalDeviceOutboxIdInput, type LocalDeviceOutboxClaimInput, type LocalDeviceOutboxCompactResult, type LocalDeviceOutboxDeadLetterErrorClass, type LocalDeviceOutboxDeadLetterErrorSummary, type LocalDeviceOutboxDeadLetterErrorSummaryInput, type LocalDeviceOutboxDeadLetterInput, type LocalDeviceOutboxEnqueueInput, type LocalDeviceOutboxFailInput, type LocalDeviceOutboxItem, type LocalDeviceOutboxKind, type LocalDeviceOutboxLeaseInput, type LocalDeviceOutboxOptions, type LocalDeviceOutboxPageStats, type LocalDeviceOutboxPruneSentInput, type LocalDeviceOutboxPruneSentResult, type LocalDeviceOutboxRequeueDeadLettersInput, type LocalDeviceOutboxRequeueDeadLettersResult, type LocalDeviceOutboxStatus, type LocalDeviceOutboxSummary, type PlacementDecision, type RuntimeBindingName, type RuntimeCapabilityProfile, type StartMessage, type StreamScope, } from "../../polyfill-connectors/src/runner/index.js";
3
3
  export declare const COLLECTOR_RUNTIME_CAPABILITIES: RuntimeCapabilityProfile;
4
4
  export interface BundledConnectorEntry {
5
5
  readonly connector_id: string;
@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
2
2
  import { extname } from "node:path";
3
3
  import { fileURLToPath } from "node:url";
4
4
  import { COLLECTOR_PROTOCOL_VERSION as PROTOCOL_VERSION, COLLECTOR_RUNTIME_CAPABILITIES as POLYFILL_COLLECTOR_RUNTIME_CAPABILITIES, } from "../../polyfill-connectors/src/runner/index.js";
5
- export { buildCollectorStartMessage, COLLECTOR_PROTOCOL_VERSION, CollectorStateReadError, drainCollectorQueue, emitToStdout, enrollCollector, evaluatePlacement, isMainModule, LocalDeviceClient, LocalDeviceHttpError, LocalDeviceOutbox, LocalDeviceQueue, PROVIDER_RUNTIME_CAPABILITIES, RUNTIME_CAPABILITY_MISMATCH_CODE, RuntimeCapabilityMismatchError, assertPlacementOrThrow, buildLocalDeviceRecordEnvelope, buildLocalDeviceOutboxId, canonicalJson, diffRequiredBindings, hashCanonicalJson, parseJsonlLine, resourceSet, runCollectorConnector, stringifyForJsonl, transformRecordsToCollectorEnvelopes, } from "../../polyfill-connectors/src/runner/index.js";
5
+ export { buildCollectorStartMessage, COLLECTOR_COVERAGE_STATUSES, COLLECTOR_PROTOCOL_VERSION, CollectorStateReadError, drainCollectorQueue, emitToStdout, enrollCollector, evaluatePlacement, isMainModule, LocalDeviceClient, LocalDeviceHttpError, LocalDeviceOutbox, LocalDeviceQueue, PROVIDER_RUNTIME_CAPABILITIES, RUNTIME_CAPABILITY_MISMATCH_CODE, RuntimeCapabilityMismatchError, assertPlacementOrThrow, buildLocalDeviceRecordEnvelope, buildLocalDeviceOutboxId, canonicalJson, classifyDeadLetterError, deriveLocalCollectorLifecycleState, diffRequiredBindings, hashCanonicalJson, LOCAL_COLLECTOR_LIFECYCLE_STATES, parseJsonlLine, resourceSet, runCollectorConnector, stringifyForJsonl, summarizeCollectorCompleteness, transformRecordsToCollectorEnvelopes, } from "../../polyfill-connectors/src/runner/index.js";
6
6
  export const COLLECTOR_RUNTIME_CAPABILITIES = {
7
7
  id: POLYFILL_COLLECTOR_RUNTIME_CAPABILITIES.id,
8
8
  bindings: new Set(["network", "filesystem", "local_device"]),
@@ -32,6 +32,13 @@ export const BUNDLED_CONNECTORS = Object.freeze({
32
32
  "memory_notes",
33
33
  "skills",
34
34
  "slash_commands",
35
+ "file_history",
36
+ "cache_inventory",
37
+ "coverage_diagnostics",
38
+ "debug_artifacts",
39
+ "downloads",
40
+ "backup_inventory",
41
+ "config_inventory",
35
42
  ]),
36
43
  }),
37
44
  codex: Object.freeze({
@@ -46,6 +53,13 @@ export const BUNDLED_CONNECTORS = Object.freeze({
46
53
  "rules",
47
54
  "prompts",
48
55
  "skills",
56
+ "history",
57
+ "session_index",
58
+ "logs",
59
+ "shell_snapshots",
60
+ "config_inventory",
61
+ "cache_inventory",
62
+ "coverage_diagnostics",
49
63
  ]),
50
64
  }),
51
65
  });
@@ -4,9 +4,10 @@ import { readdir, readFile, stat } from "node:fs/promises";
4
4
  import { homedir } from "node:os";
5
5
  import { basename, join } from "node:path";
6
6
  import { createInterface as createFileReader } from "node:readline";
7
+ import { readBoundedFilePreview } from "../../src/bounded-file-preview.js";
7
8
  import { runConnector } from "../../src/connector-runtime.js";
8
9
  import { isMainModule } from "../../src/is-main-module.js";
9
- import { buildLocalSourceInventory, listDirectoryInventory, } from "../../src/local-source-inventory.js";
10
+ import { buildLocalSourceInventory, listDirectoryInventory, openInventoryFingerprintCursor, } from "../../src/local-source-inventory.js";
10
11
  import { safeTextPreview } from "../../src/safe-text-preview.js";
11
12
  import { ATTACHMENT_PREVIEW_CHARS, applyProjectDirScope, BYTES_PER_MB, buildMemoryNoteRecord, buildSkillRecord, buildSlashCommandRecord, extractContent, LINE_PROGRESS_INTERVAL, MESSAGE_CONTENT_PREVIEW_CHARS, makeEmptySessionAccumulator, mergeSessionObservations, parseCsvEnv, parseFrontmatter, SESSION_DIR_PREFIX_RE, TOOL_RESULT_PREVIEW_CHARS, textPreview, widenSessionTimeRange, } from "./parsers.js";
12
13
  import { validateRecord } from "./schemas.js";
@@ -222,16 +223,13 @@ export async function emitSessionsFromAccumulators({ emitRecord, requested, sess
222
223
  await emitRecord("sessions", { ...session });
223
224
  }
224
225
  }
225
- async function emitToolResultFile(args) {
226
- let buf;
227
- try {
228
- buf = await readFile(args.full, "utf8");
229
- }
230
- catch {
226
+ export async function emitToolResultFile(args) {
227
+ const bounded = await readBoundedFilePreview(args.full);
228
+ if (bounded === null) {
231
229
  return;
232
230
  }
233
231
  const rel = args.full.slice(args.toolResultsDir.length + 1);
234
- const previewResult = safeTextPreview(buf, TOOL_RESULT_PREVIEW_CHARS);
232
+ const previewResult = safeTextPreview(bounded.buffer, TOOL_RESULT_PREVIEW_CHARS);
235
233
  await args.emitRecord("attachments", {
236
234
  id: `tool_result_file:${args.projectDir}/${args.sessionId}/${rel}`,
237
235
  session_id: args.sessionId,
@@ -365,7 +363,7 @@ async function parseJsonlFile(args) {
365
363
  if (!buildOnly && lineCount % LINE_PROGRESS_INTERVAL === 0) {
366
364
  await emit({
367
365
  type: "PROGRESS",
368
- message: ` ${path}: ${lineCount} lines parsed`,
366
+ message: `Claude Code phase=emit pass=emit lines_parsed=${lineCount}`,
369
367
  });
370
368
  }
371
369
  const messageCountBeforeLine = obs.messageCount;
@@ -506,7 +504,7 @@ async function emitProjectMemoryNotes({ emitRecord, fileMtimes, newMtimes, proje
506
504
  await emitRecord("memory_notes", buildMemoryNoteRecord({ projectDir, relPath, frontmatter, body, path: fullPath, mtimeMs: st.mtimeMs }));
507
505
  }
508
506
  }
509
- async function processJsonlFile({ args, forcedSessionId, path, progressLabel, projectDir, }) {
507
+ async function processJsonlFile({ args, forcedSessionId, path, projectDir }) {
510
508
  let st;
511
509
  try {
512
510
  st = statSync(path);
@@ -521,7 +519,7 @@ async function processJsonlFile({ args, forcedSessionId, path, progressLabel, pr
521
519
  }
522
520
  await args.emit({
523
521
  type: "PROGRESS",
524
- message: `${args.buildOnly ? "Indexing" : "Emitting"} ${progressLabel} (${(st.size / BYTES_PER_MB).toFixed(1)}MB)`,
522
+ message: `Claude Code phase=${args.buildOnly ? "index" : "emit"} pass=${args.buildOnly ? "index" : "emit"} file_size_mb=${(st.size / BYTES_PER_MB).toFixed(1)}`,
525
523
  });
526
524
  await parseJsonlFile({
527
525
  buildOnly: args.buildOnly,
@@ -542,7 +540,6 @@ async function processTopLevelJsonl(entries, projectPath, projectDir, args) {
542
540
  args,
543
541
  forcedSessionId: null,
544
542
  path: join(projectPath, f),
545
- progressLabel: `${projectDir}/${f}`,
546
543
  projectDir,
547
544
  });
548
545
  }
@@ -561,7 +558,6 @@ async function processSessionDir(sessEnt, projectPath, projectDir, args) {
561
558
  args,
562
559
  forcedSessionId: sessionId,
563
560
  path: join(subagentsDir, f),
564
- progressLabel: `${projectDir}/${sessionId}/subagents/${f}`,
565
561
  projectDir,
566
562
  });
567
563
  }
@@ -606,13 +602,12 @@ async function listProjectDirs(baseDir, emit) {
606
602
  try {
607
603
  projectDirs = (await readdir(baseDir)).filter((name) => !name.startsWith("."));
608
604
  }
609
- catch (err) {
610
- const errMsg = err instanceof Error ? err.message : String(err);
605
+ catch {
611
606
  await emit({
612
607
  type: "SKIP_RESULT",
613
608
  stream: "sessions",
614
609
  reason: "claude_dir_not_found",
615
- message: `${baseDir} not readable: ${errMsg}`,
610
+ message: "Claude Code projects directory not readable",
616
611
  });
617
612
  return null;
618
613
  }
@@ -625,9 +620,10 @@ export async function scanProjectDirs(args) {
625
620
  if (projectDirs === null) {
626
621
  return;
627
622
  }
623
+ const totalProjectDirs = projectDirs.length;
628
624
  await args.emit({
629
625
  type: "PROGRESS",
630
- message: `${projectDirs.length} project dirs in scope`,
626
+ message: `Claude Code phase=index pass=index total_project_dirs=${totalProjectDirs}`,
631
627
  });
632
628
  for (const projectDir of projectDirs) {
633
629
  await scanProjectDir(projectDir, args);
@@ -661,15 +657,40 @@ async function assertRequestedClaudeSources(input) {
661
657
  throw new Error(`requested Claude Code local source path(s) are missing or unreadable: ${missing.join(", ")}`);
662
658
  }
663
659
  }
660
+ async function emitCoverageDiagnostics(input) {
661
+ if (!input.requested.has("coverage_diagnostics")) {
662
+ return;
663
+ }
664
+ for (const record of input.inventory.coverage) {
665
+ await input.emitRecord("coverage_diagnostics", record);
666
+ }
667
+ }
668
+ async function emitGatedInventoryStream(input) {
669
+ const cursor = openInventoryFingerprintCursor(input.priorState);
670
+ for (const record of input.records) {
671
+ if (cursor.shouldEmit(record)) {
672
+ await input.emitRecord(input.stream, record);
673
+ }
674
+ }
675
+ cursor.pruneStale();
676
+ const inventoryCursor = { fetched_at: nowIso() };
677
+ if (cursor.size() > 0) {
678
+ inventoryCursor.fingerprints = cursor.toState();
679
+ }
680
+ await input.emit({ type: "STATE", stream: input.stream, cursor: inventoryCursor });
681
+ }
664
682
  async function emitLocalInventoryStreams(input) {
665
- const inventory = await buildLocalSourceInventory("claude_code", input.claudeHome, CLAUDE_CODE_KNOWN_LOCAL_STORES);
666
- for (const [stream, records] of inventory.recordsByStream) {
683
+ for (const [stream, records] of input.inventory.recordsByStream) {
667
684
  if (!input.requested.has(stream)) {
668
685
  continue;
669
686
  }
670
- for (const record of records) {
671
- await input.emitRecord(stream, record);
672
- }
687
+ await emitGatedInventoryStream({
688
+ emit: input.emit,
689
+ emitRecord: input.emitRecord,
690
+ priorState: input.state[stream],
691
+ records,
692
+ stream,
693
+ });
673
694
  }
674
695
  if (input.requested.has("file_history")) {
675
696
  const records = await listDirectoryInventory({
@@ -680,14 +701,13 @@ async function emitLocalInventoryStreams(input) {
680
701
  stream: "file_history",
681
702
  reason: "metadata-only until payload contract is approved",
682
703
  });
683
- for (const record of records) {
684
- await input.emitRecord("file_history", record);
685
- }
686
- }
687
- if (input.requested.has("coverage_diagnostics")) {
688
- for (const record of inventory.coverage) {
689
- await input.emitRecord("coverage_diagnostics", record);
690
- }
704
+ await emitGatedInventoryStream({
705
+ emit: input.emit,
706
+ emitRecord: input.emitRecord,
707
+ priorState: input.state.file_history,
708
+ records,
709
+ stream: "file_history",
710
+ });
691
711
  }
692
712
  }
693
713
  async function runSkillsAndCommands(claudeHome, requested, emit, emitRecord, state) {
@@ -700,9 +720,8 @@ async function runSkillsAndCommands(claudeHome, requested, emit, emitRecord, sta
700
720
  newMtimes: state.newSkillsMtimes,
701
721
  });
702
722
  }
703
- catch (err) {
704
- const msg = err instanceof Error ? err.message : String(err);
705
- await emit({ type: "PROGRESS", message: `skills scan skipped: ${msg}` });
723
+ catch {
724
+ await emit({ type: "PROGRESS", message: "Claude Code phase=index pass=index stream=skills scan_skipped=true" });
706
725
  }
707
726
  try {
708
727
  await emitSlashCommands({
@@ -713,9 +732,11 @@ async function runSkillsAndCommands(claudeHome, requested, emit, emitRecord, sta
713
732
  newMtimes: state.newSlashCommandMtimes,
714
733
  });
715
734
  }
716
- catch (err) {
717
- const msg = err instanceof Error ? err.message : String(err);
718
- await emit({ type: "PROGRESS", message: `slash_commands scan skipped: ${msg}` });
735
+ catch {
736
+ await emit({
737
+ type: "PROGRESS",
738
+ message: "Claude Code phase=index pass=index stream=slash_commands scan_skipped=true",
739
+ });
719
740
  }
720
741
  if (requested.has("skills")) {
721
742
  await emit({
@@ -742,6 +763,8 @@ if (isMainModule(import.meta.url)) {
742
763
  async collect({ state, requested, emit, emitRecord }) {
743
764
  const claudeHome = process.env.CLAUDE_CODE_HOME || join(homedir(), ".claude");
744
765
  const baseDir = process.env.CLAUDE_CODE_PROJECTS_DIR || join(claudeHome, "projects");
766
+ const inventory = await buildLocalSourceInventory("claude_code", claudeHome, CLAUDE_CODE_KNOWN_LOCAL_STORES);
767
+ await emitCoverageDiagnostics({ emitRecord, inventory, requested });
745
768
  await assertRequestedClaudeSources({ baseDir, claudeHome, requested });
746
769
  const typedState = state;
747
770
  const messageFileMtimes = streamFileMtimes(typedState, "messages") ?? typedState.file_mtimes ?? {};
@@ -752,7 +775,7 @@ if (isMainModule(import.meta.url)) {
752
775
  const newSkillsMtimes = { ...skillsMtimes };
753
776
  const newSlashCommandMtimes = { ...slashCommandMtimes };
754
777
  const newMemoryNoteMtimes = { ...memoryNoteMtimes };
755
- await emitLocalInventoryStreams({ claudeHome, requested, emitRecord });
778
+ await emitLocalInventoryStreams({ claudeHome, emit, emitRecord, inventory, requested, state: typedState });
756
779
  await runSkillsAndCommands(claudeHome, requested, emit, emitRecord, {
757
780
  skillsMtimes,
758
781
  newSkillsMtimes,