@pdpp/local-collector 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +48 -0
  2. package/dist/local-collector/bin/pdpp-local-collector.js +347 -0
  3. package/dist/local-collector/src/errors.d.ts +12 -0
  4. package/dist/local-collector/src/errors.js +20 -0
  5. package/dist/local-collector/src/runner.d.ts +16 -0
  6. package/dist/local-collector/src/runner.js +59 -0
  7. package/dist/polyfill-connectors/connectors/claude_code/index.js +806 -0
  8. package/dist/polyfill-connectors/connectors/claude_code/parsers.js +224 -0
  9. package/dist/polyfill-connectors/connectors/claude_code/schemas.js +120 -0
  10. package/dist/polyfill-connectors/connectors/claude_code/types.js +1 -0
  11. package/dist/polyfill-connectors/connectors/codex/index.js +880 -0
  12. package/dist/polyfill-connectors/connectors/codex/parsers.js +159 -0
  13. package/dist/polyfill-connectors/connectors/codex/schemas.js +118 -0
  14. package/dist/polyfill-connectors/connectors/codex/types.js +1 -0
  15. package/dist/polyfill-connectors/src/auth.js +76 -0
  16. package/dist/polyfill-connectors/src/browser-handoff.js +197 -0
  17. package/dist/polyfill-connectors/src/collector-protocol.d.ts +2 -0
  18. package/dist/polyfill-connectors/src/collector-protocol.js +2 -0
  19. package/dist/polyfill-connectors/src/collector-runner.d.ts +139 -0
  20. package/dist/polyfill-connectors/src/collector-runner.js +1084 -0
  21. package/dist/polyfill-connectors/src/connector-runtime-protocol.d.ts +191 -0
  22. package/dist/polyfill-connectors/src/connector-runtime-protocol.js +1 -0
  23. package/dist/polyfill-connectors/src/connector-runtime.js +879 -0
  24. package/dist/polyfill-connectors/src/fixture-capture.js +237 -0
  25. package/dist/polyfill-connectors/src/is-main-module.d.ts +1 -0
  26. package/dist/polyfill-connectors/src/is-main-module.js +17 -0
  27. package/dist/polyfill-connectors/src/local-device-client.d.ts +126 -0
  28. package/dist/polyfill-connectors/src/local-device-client.js +132 -0
  29. package/dist/polyfill-connectors/src/local-device-envelope.d.ts +26 -0
  30. package/dist/polyfill-connectors/src/local-device-envelope.js +43 -0
  31. package/dist/polyfill-connectors/src/local-device-outbox.d.ts +115 -0
  32. package/dist/polyfill-connectors/src/local-device-outbox.js +509 -0
  33. package/dist/polyfill-connectors/src/local-device-queue.d.ts +34 -0
  34. package/dist/polyfill-connectors/src/local-device-queue.js +133 -0
  35. package/dist/polyfill-connectors/src/local-source-inventory.js +119 -0
  36. package/dist/polyfill-connectors/src/pdpp-safe-text.js +13 -0
  37. package/dist/polyfill-connectors/src/runner/index.d.ts +11 -0
  38. package/dist/polyfill-connectors/src/runner/index.js +10 -0
  39. package/dist/polyfill-connectors/src/runtime-capabilities.d.ts +40 -0
  40. package/dist/polyfill-connectors/src/runtime-capabilities.js +59 -0
  41. package/dist/polyfill-connectors/src/safe-emit.d.ts +3 -0
  42. package/dist/polyfill-connectors/src/safe-emit.js +30 -0
  43. package/dist/polyfill-connectors/src/safe-text-preview.js +156 -0
  44. package/dist/polyfill-connectors/src/schema-registry.js +17 -0
  45. package/dist/polyfill-connectors/src/scope-filters.d.ts +38 -0
  46. package/dist/polyfill-connectors/src/scope-filters.js +80 -0
  47. package/dist/polyfill-connectors/src/shutdown-hook.js +51 -0
  48. package/dist/polyfill-connectors/src/streaming-target-registration.js +161 -0
  49. package/package.json +63 -0
@@ -0,0 +1,806 @@
1
+ #!/usr/bin/env node
2
+ import { createReadStream, statSync } from "node:fs";
3
+ import { readdir, readFile, stat } from "node:fs/promises";
4
+ import { homedir } from "node:os";
5
+ import { basename, join } from "node:path";
6
+ import { createInterface as createFileReader } from "node:readline";
7
+ import { runConnector } from "../../src/connector-runtime.js";
8
+ import { isMainModule } from "../../src/is-main-module.js";
9
+ import { buildLocalSourceInventory, listDirectoryInventory, } from "../../src/local-source-inventory.js";
10
+ import { safeTextPreview } from "../../src/safe-text-preview.js";
11
+ import { ATTACHMENT_PREVIEW_CHARS, applyProjectDirScope, BYTES_PER_MB, buildMemoryNoteRecord, buildSkillRecord, buildSlashCommandRecord, extractContent, LINE_PROGRESS_INTERVAL, MESSAGE_CONTENT_PREVIEW_CHARS, makeEmptySessionAccumulator, mergeSessionObservations, parseCsvEnv, parseFrontmatter, SESSION_DIR_PREFIX_RE, TOOL_RESULT_PREVIEW_CHARS, textPreview, widenSessionTimeRange, } from "./parsers.js";
12
+ import { validateRecord } from "./schemas.js";
13
+ const nowIso = () => new Date().toISOString();
14
+ const MD_FILE_RE = /\.md$/i;
15
+ export const CLAUDE_CODE_KNOWN_LOCAL_STORES = [
16
+ {
17
+ store: "projects",
18
+ relativePath: "projects",
19
+ stream: "sessions",
20
+ classification: "collect",
21
+ reason: "declared transcript source",
22
+ },
23
+ {
24
+ store: "skills",
25
+ relativePath: "skills",
26
+ stream: "skills",
27
+ classification: "collect",
28
+ reason: "declared user-authored skills source",
29
+ },
30
+ {
31
+ store: "commands",
32
+ relativePath: "commands",
33
+ stream: "slash_commands",
34
+ classification: "collect",
35
+ reason: "declared user-authored slash commands source",
36
+ },
37
+ {
38
+ store: "file_history",
39
+ relativePath: "file-history",
40
+ stream: "file_history",
41
+ classification: "inventory_only",
42
+ reason: "metadata-only until payload contract is approved",
43
+ },
44
+ {
45
+ store: "context_mode",
46
+ relativePath: "context-mode",
47
+ stream: null,
48
+ classification: "inventory_only",
49
+ reason: "user-specific local convention; diagnostics only, not a general Claude Code stream",
50
+ },
51
+ {
52
+ store: "cache",
53
+ relativePath: "cache",
54
+ stream: "cache_inventory",
55
+ classification: "inventory_only",
56
+ reason: "raw cache payloads may contain sensitive tool output",
57
+ },
58
+ {
59
+ store: "backups",
60
+ relativePath: "backups",
61
+ stream: "backup_inventory",
62
+ classification: "inventory_only",
63
+ reason: "backup payloads require owner review before collection",
64
+ },
65
+ {
66
+ store: "config",
67
+ relativePath: "settings.json",
68
+ stream: "config_inventory",
69
+ classification: "inventory_only",
70
+ reason: "configuration is inventoried without payload content",
71
+ },
72
+ {
73
+ store: "debug",
74
+ relativePath: "debug",
75
+ stream: "debug_artifacts",
76
+ classification: "defer",
77
+ reason: "debug payloads require deterministic redaction before collection",
78
+ },
79
+ {
80
+ store: "downloads",
81
+ relativePath: "downloads",
82
+ stream: "downloads",
83
+ classification: "defer",
84
+ reason: "download payloads require owner approval before collection",
85
+ },
86
+ {
87
+ store: "auth",
88
+ relativePath: "auth.json",
89
+ stream: null,
90
+ classification: "exclude",
91
+ reason: "auth-adjacent credential material is never emitted",
92
+ },
93
+ ];
94
+ async function* iterJsonlLines(path) {
95
+ const r = createFileReader({
96
+ input: createReadStream(path, { encoding: "utf8" }),
97
+ terminal: false,
98
+ });
99
+ for await (const line of r) {
100
+ if (!line.trim()) {
101
+ continue;
102
+ }
103
+ try {
104
+ yield JSON.parse(line);
105
+ }
106
+ catch {
107
+ }
108
+ }
109
+ }
110
+ export function makeJsonlObservations(forcedSessionId) {
111
+ return {
112
+ sessionId: forcedSessionId || null,
113
+ firstTimestamp: null,
114
+ lastTimestamp: null,
115
+ messageCount: 0,
116
+ cwd: null,
117
+ gitBranch: null,
118
+ userType: null,
119
+ entrypoint: null,
120
+ version: null,
121
+ };
122
+ }
123
+ export function observeJsonlFields(obj, obs, forcedSessionId) {
124
+ if (obj.sessionId && !forcedSessionId) {
125
+ obs.sessionId = obj.sessionId;
126
+ }
127
+ if (obj.cwd && !obs.cwd) {
128
+ obs.cwd = obj.cwd;
129
+ }
130
+ if (obj.gitBranch && !obs.gitBranch) {
131
+ obs.gitBranch = obj.gitBranch;
132
+ }
133
+ if (obj.userType && !obs.userType) {
134
+ obs.userType = obj.userType;
135
+ }
136
+ if (obj.entrypoint && !obs.entrypoint) {
137
+ obs.entrypoint = obj.entrypoint;
138
+ }
139
+ if (obj.version && !obs.version) {
140
+ obs.version = obj.version;
141
+ }
142
+ if (obj.timestamp) {
143
+ if (!obs.firstTimestamp || obj.timestamp < obs.firstTimestamp) {
144
+ obs.firstTimestamp = obj.timestamp;
145
+ }
146
+ if (!obs.lastTimestamp || obj.timestamp > obs.lastTimestamp) {
147
+ obs.lastTimestamp = obj.timestamp;
148
+ }
149
+ }
150
+ }
151
+ export function isMessageType(type) {
152
+ return type === "user" || type === "assistant";
153
+ }
154
+ export function isAttachmentType(type) {
155
+ return (type === "attachment" || type === "file-history-snapshot" || type === "permission-mode" || type === "last-prompt");
156
+ }
157
+ export function buildMessageRecord(obj, sessionId, uuid) {
158
+ return {
159
+ id: uuid,
160
+ session_id: sessionId,
161
+ parent_uuid: obj.parentUuid ?? null,
162
+ role: obj.type ?? null,
163
+ type: obj.type ?? null,
164
+ content: textPreview(extractContent(obj.message || obj), MESSAGE_CONTENT_PREVIEW_CHARS),
165
+ timestamp: obj.timestamp || null,
166
+ is_sidechain: obj.isSidechain ?? null,
167
+ user_type: obj.userType ?? null,
168
+ agent_id: obj.agentId ?? null,
169
+ };
170
+ }
171
+ export function buildAttachmentRecord(obj, sessionId, uuid) {
172
+ const att = obj.attachment || {};
173
+ const content = extractContent(att) || extractContent(obj);
174
+ const previewResult = safeTextPreview(content, ATTACHMENT_PREVIEW_CHARS);
175
+ return {
176
+ id: uuid,
177
+ session_id: sessionId,
178
+ parent_uuid: obj.parentUuid ?? null,
179
+ event_type: obj.type ?? null,
180
+ hook_name: att.hookName || null,
181
+ tool_use_id: att.toolUseID || null,
182
+ content_preview: previewResult.preview,
183
+ content_binary_reason: previewResult.kind === "binary" ? previewResult.reason : null,
184
+ content_bytes: null,
185
+ timestamp: obj.timestamp || null,
186
+ };
187
+ }
188
+ export async function processJsonlLine({ buildOnly, deps, obj, obs }) {
189
+ const sessionId = obs.sessionId;
190
+ if (!sessionId) {
191
+ return;
192
+ }
193
+ const uuid = obj.uuid;
194
+ const type = obj.type;
195
+ if (isMessageType(type)) {
196
+ obs.messageCount++;
197
+ if (!buildOnly && deps.requested.has("messages") && uuid) {
198
+ await deps.emitRecord("messages", buildMessageRecord(obj, sessionId, uuid));
199
+ }
200
+ return;
201
+ }
202
+ if (!buildOnly && isAttachmentType(type) && deps.requested.has("attachments") && uuid) {
203
+ await deps.emitRecord("attachments", buildAttachmentRecord(obj, sessionId, uuid));
204
+ }
205
+ }
206
+ export async function emitSessionsFromAccumulators({ emitRecord, requested, sessionAccumulators, }) {
207
+ if (!requested.has("sessions")) {
208
+ return;
209
+ }
210
+ for (const session of sessionAccumulators.values()) {
211
+ await emitRecord("sessions", { ...session });
212
+ }
213
+ }
214
+ async function emitToolResultFile(args) {
215
+ let buf;
216
+ try {
217
+ buf = await readFile(args.full, "utf8");
218
+ }
219
+ catch {
220
+ return;
221
+ }
222
+ const rel = args.full.slice(args.toolResultsDir.length + 1);
223
+ const previewResult = safeTextPreview(buf, TOOL_RESULT_PREVIEW_CHARS);
224
+ await args.emitRecord("attachments", {
225
+ id: `tool_result_file:${args.projectDir}/${args.sessionId}/${rel}`,
226
+ session_id: args.sessionId,
227
+ parent_uuid: null,
228
+ event_type: "tool_result_file",
229
+ hook_name: null,
230
+ tool_use_id: null,
231
+ content_preview: previewResult.preview,
232
+ content_binary_reason: previewResult.kind === "binary" ? previewResult.reason : null,
233
+ content_bytes: args.st.size,
234
+ timestamp: new Date(args.st.mtimeMs).toISOString(),
235
+ });
236
+ }
237
+ async function processToolResultEntry(ent, args) {
238
+ if (!(ent.isFile() || ent.isSymbolicLink())) {
239
+ return;
240
+ }
241
+ let st;
242
+ try {
243
+ st = statSync(args.full);
244
+ }
245
+ catch {
246
+ return;
247
+ }
248
+ const mtime = st.mtimeMs;
249
+ if (args.fileMtimes[args.full] === mtime) {
250
+ args.newMtimes[args.full] = mtime;
251
+ return;
252
+ }
253
+ args.newMtimes[args.full] = mtime;
254
+ if (!args.requested.has("attachments")) {
255
+ return;
256
+ }
257
+ await emitToolResultFile({
258
+ emitRecord: args.emitRecord,
259
+ full: args.full,
260
+ toolResultsDir: args.toolResultsDir,
261
+ projectDir: args.projectDir,
262
+ sessionId: args.sessionId,
263
+ st,
264
+ });
265
+ }
266
+ async function walkToolResults(args) {
267
+ const { sessionDir, sessionId, projectDir, requested, emitRecord, fileMtimes, newMtimes } = args;
268
+ const toolResultsDir = join(sessionDir, "tool-results");
269
+ try {
270
+ await readdir(toolResultsDir);
271
+ }
272
+ catch {
273
+ return;
274
+ }
275
+ const walk = async (dir) => {
276
+ let items;
277
+ try {
278
+ items = await readdir(dir, { withFileTypes: true });
279
+ }
280
+ catch {
281
+ return;
282
+ }
283
+ for (const ent of items) {
284
+ const full = join(dir, ent.name);
285
+ if (ent.isDirectory()) {
286
+ await walk(full);
287
+ continue;
288
+ }
289
+ await processToolResultEntry(ent, {
290
+ full,
291
+ toolResultsDir,
292
+ projectDir,
293
+ sessionId,
294
+ requested,
295
+ emitRecord,
296
+ fileMtimes,
297
+ newMtimes,
298
+ });
299
+ }
300
+ };
301
+ await walk(toolResultsDir);
302
+ }
303
+ async function readFilesRecursively(rootDir, predicate) {
304
+ const out = [];
305
+ const walk = async (dir, prefix) => {
306
+ let items;
307
+ try {
308
+ items = await readdir(dir, { withFileTypes: true });
309
+ }
310
+ catch {
311
+ return;
312
+ }
313
+ for (const ent of items.sort((a, b) => a.name.localeCompare(b.name))) {
314
+ if (ent.name.startsWith(".")) {
315
+ continue;
316
+ }
317
+ const relPath = prefix ? `${prefix}/${ent.name}` : ent.name;
318
+ const fullPath = join(dir, ent.name);
319
+ if (ent.isDirectory()) {
320
+ await walk(fullPath, relPath);
321
+ continue;
322
+ }
323
+ if (predicate(ent)) {
324
+ out.push({ fullPath, relPath });
325
+ }
326
+ }
327
+ };
328
+ await walk(rootDir, "");
329
+ return out;
330
+ }
331
+ function updateSessionAccumulator(sessionAccumulators, projectDir, obs) {
332
+ const sessionId = obs.sessionId;
333
+ if (!sessionId) {
334
+ return;
335
+ }
336
+ const acc = sessionAccumulators.get(sessionId) ?? makeEmptySessionAccumulator(sessionId, projectDir);
337
+ mergeSessionObservations(acc, {
338
+ cwd: obs.cwd,
339
+ entrypoint: obs.entrypoint,
340
+ gitBranch: obs.gitBranch,
341
+ userType: obs.userType,
342
+ version: obs.version,
343
+ });
344
+ widenSessionTimeRange(acc, obs.firstTimestamp, obs.lastTimestamp);
345
+ acc.message_count += obs.messageCount;
346
+ sessionAccumulators.set(sessionId, acc);
347
+ }
348
+ async function parseJsonlFile(args) {
349
+ const { buildOnly, path, projectDir, requested, emit, emitRecord, sessionAccumulators, forcedSessionId } = args;
350
+ const obs = makeJsonlObservations(forcedSessionId);
351
+ let lineCount = 0;
352
+ for await (const obj of iterJsonlLines(path)) {
353
+ lineCount++;
354
+ if (!buildOnly && lineCount % LINE_PROGRESS_INTERVAL === 0) {
355
+ await emit({
356
+ type: "PROGRESS",
357
+ message: ` ${path}: ${lineCount} lines parsed`,
358
+ });
359
+ }
360
+ observeJsonlFields(obj, obs, forcedSessionId);
361
+ await processJsonlLine({ buildOnly, deps: { emitRecord, requested }, obj, obs });
362
+ }
363
+ if (buildOnly) {
364
+ updateSessionAccumulator(sessionAccumulators, projectDir, obs);
365
+ }
366
+ return obs.sessionId;
367
+ }
368
+ function markFileMtimeAndShouldSkip(fileMtimes, newMtimes, path, mtime) {
369
+ newMtimes[path] = mtime;
370
+ return fileMtimes[path] === mtime;
371
+ }
372
+ async function emitSkills({ claudeHome, requested, emitRecord, fileMtimes, newMtimes }) {
373
+ if (!requested.has("skills")) {
374
+ return;
375
+ }
376
+ const skillsDir = join(claudeHome, "skills");
377
+ let entries;
378
+ try {
379
+ entries = await readdir(skillsDir, { withFileTypes: true });
380
+ }
381
+ catch {
382
+ return;
383
+ }
384
+ for (const ent of entries) {
385
+ if (!(ent.isDirectory() || ent.isSymbolicLink())) {
386
+ continue;
387
+ }
388
+ if (ent.name.startsWith(".")) {
389
+ continue;
390
+ }
391
+ const skillPath = join(skillsDir, ent.name, "SKILL.md");
392
+ let st;
393
+ let raw;
394
+ try {
395
+ st = statSync(skillPath);
396
+ }
397
+ catch {
398
+ continue;
399
+ }
400
+ if (markFileMtimeAndShouldSkip(fileMtimes, newMtimes, skillPath, st.mtimeMs)) {
401
+ continue;
402
+ }
403
+ try {
404
+ raw = await readFile(skillPath, "utf8");
405
+ }
406
+ catch {
407
+ continue;
408
+ }
409
+ const { frontmatter, body } = parseFrontmatter(raw);
410
+ await emitRecord("skills", buildSkillRecord({ name: ent.name, frontmatter, body, path: skillPath, mtimeMs: st.mtimeMs }));
411
+ }
412
+ }
413
+ async function processSlashCommandFile(args) {
414
+ if (!args.name.endsWith(".md")) {
415
+ return;
416
+ }
417
+ let st;
418
+ let raw;
419
+ try {
420
+ st = statSync(args.full);
421
+ }
422
+ catch {
423
+ return;
424
+ }
425
+ if (markFileMtimeAndShouldSkip(args.fileMtimes, args.newMtimes, args.full, st.mtimeMs)) {
426
+ return;
427
+ }
428
+ try {
429
+ raw = await readFile(args.full, "utf8");
430
+ }
431
+ catch {
432
+ return;
433
+ }
434
+ const { frontmatter, body } = parseFrontmatter(raw);
435
+ const base = basename(args.name, ".md");
436
+ const idPath = args.prefix ? `${args.prefix}/${base}` : base;
437
+ await args.emitRecord("slash_commands", buildSlashCommandRecord({ idPath, base, frontmatter, body, path: args.full, mtimeMs: st.mtimeMs }));
438
+ }
439
+ async function emitSlashCommands({ claudeHome, requested, emitRecord, fileMtimes, newMtimes, }) {
440
+ if (!requested.has("slash_commands")) {
441
+ return;
442
+ }
443
+ const commandsDir = join(claudeHome, "commands");
444
+ const walk = async (dir, prefix) => {
445
+ let items;
446
+ try {
447
+ items = await readdir(dir, { withFileTypes: true });
448
+ }
449
+ catch {
450
+ return;
451
+ }
452
+ for (const ent of items) {
453
+ if (ent.name.startsWith(".")) {
454
+ continue;
455
+ }
456
+ const full = join(dir, ent.name);
457
+ if (ent.isDirectory()) {
458
+ await walk(full, prefix ? `${prefix}/${ent.name}` : ent.name);
459
+ continue;
460
+ }
461
+ if (!(ent.isFile() || ent.isSymbolicLink())) {
462
+ continue;
463
+ }
464
+ await processSlashCommandFile({ full, name: ent.name, prefix, emitRecord, fileMtimes, newMtimes });
465
+ }
466
+ };
467
+ await walk(commandsDir, "");
468
+ }
469
+ async function emitProjectMemoryNotes({ emitRecord, fileMtimes, newMtimes, projectDir, projectPath, requested, }) {
470
+ if (!requested.has("memory_notes")) {
471
+ return;
472
+ }
473
+ const memoryDir = join(projectPath, "memory");
474
+ const files = await readFilesRecursively(memoryDir, (ent) => (ent.isFile() || ent.isSymbolicLink()) && MD_FILE_RE.test(ent.name));
475
+ for (const { fullPath, relPath } of files) {
476
+ let st;
477
+ let raw;
478
+ try {
479
+ st = statSync(fullPath);
480
+ }
481
+ catch {
482
+ continue;
483
+ }
484
+ if (markFileMtimeAndShouldSkip(fileMtimes, newMtimes, fullPath, st.mtimeMs)) {
485
+ continue;
486
+ }
487
+ try {
488
+ raw = await readFile(fullPath, "utf8");
489
+ }
490
+ catch {
491
+ continue;
492
+ }
493
+ const { frontmatter, body } = parseFrontmatter(raw);
494
+ await emitRecord("memory_notes", buildMemoryNoteRecord({ projectDir, relPath, frontmatter, body, path: fullPath, mtimeMs: st.mtimeMs }));
495
+ }
496
+ }
497
+ async function processJsonlFile({ args, forcedSessionId, path, progressLabel, projectDir, }) {
498
+ let st;
499
+ try {
500
+ st = statSync(path);
501
+ }
502
+ catch {
503
+ return;
504
+ }
505
+ const mtime = st.mtimeMs;
506
+ if (args.fileMtimes[path] === mtime) {
507
+ args.newMtimes[path] = mtime;
508
+ return;
509
+ }
510
+ await args.emit({
511
+ type: "PROGRESS",
512
+ message: `${args.buildOnly ? "Indexing" : "Emitting"} ${progressLabel} (${(st.size / BYTES_PER_MB).toFixed(1)}MB)`,
513
+ });
514
+ await parseJsonlFile({
515
+ buildOnly: args.buildOnly,
516
+ emit: args.emit,
517
+ emitRecord: args.emitRecord,
518
+ forcedSessionId,
519
+ path,
520
+ projectDir,
521
+ requested: args.requested,
522
+ sessionAccumulators: args.sessionAccumulators,
523
+ });
524
+ args.newMtimes[path] = mtime;
525
+ }
526
+ async function processTopLevelJsonl(entries, projectPath, projectDir, args) {
527
+ const topJsonl = entries.filter((e) => e.isFile() && e.name.endsWith(".jsonl")).map((e) => e.name);
528
+ for (const f of topJsonl) {
529
+ await processJsonlFile({
530
+ args,
531
+ forcedSessionId: null,
532
+ path: join(projectPath, f),
533
+ progressLabel: `${projectDir}/${f}`,
534
+ projectDir,
535
+ });
536
+ }
537
+ }
538
+ async function readSubagentFiles(subagentsDir) {
539
+ const files = await readFilesRecursively(subagentsDir, (ent) => (ent.isFile() || ent.isSymbolicLink()) && ent.name.endsWith(".jsonl"));
540
+ return files.map((file) => file.relPath);
541
+ }
542
+ async function processSessionDir(sessEnt, projectPath, projectDir, args) {
543
+ const sessionId = sessEnt.name;
544
+ const sessionDir = join(projectPath, sessionId);
545
+ const subagentsDir = join(sessionDir, "subagents");
546
+ const subFiles = await readSubagentFiles(subagentsDir);
547
+ for (const f of subFiles) {
548
+ await processJsonlFile({
549
+ args,
550
+ forcedSessionId: sessionId,
551
+ path: join(subagentsDir, f),
552
+ progressLabel: `${projectDir}/${sessionId}/subagents/${f}`,
553
+ projectDir,
554
+ });
555
+ }
556
+ await walkToolResults({
557
+ sessionDir,
558
+ sessionId,
559
+ projectDir,
560
+ requested: args.requested,
561
+ emit: args.emit,
562
+ emitRecord: args.emitRecord,
563
+ fileMtimes: args.fileMtimes,
564
+ newMtimes: args.newMtimes,
565
+ });
566
+ }
567
+ async function scanProjectDir(projectDir, args) {
568
+ const projectPath = join(args.baseDir, projectDir);
569
+ let entries;
570
+ try {
571
+ entries = await readdir(projectPath, { withFileTypes: true });
572
+ }
573
+ catch {
574
+ return;
575
+ }
576
+ if (args.buildOnly) {
577
+ await emitProjectMemoryNotes({
578
+ projectDir,
579
+ projectPath,
580
+ requested: args.requested,
581
+ emitRecord: args.emitRecord,
582
+ fileMtimes: args.memoryNoteMtimes ?? {},
583
+ newMtimes: args.newMemoryNoteMtimes ?? {},
584
+ });
585
+ }
586
+ await processTopLevelJsonl(entries, projectPath, projectDir, args);
587
+ const sessionDirs = entries.filter((e) => e.isDirectory() && SESSION_DIR_PREFIX_RE.test(e.name));
588
+ for (const sessEnt of sessionDirs) {
589
+ await processSessionDir(sessEnt, projectPath, projectDir, args);
590
+ }
591
+ }
592
+ async function listProjectDirs(baseDir, emit) {
593
+ let projectDirs;
594
+ try {
595
+ projectDirs = (await readdir(baseDir)).filter((name) => !name.startsWith("."));
596
+ }
597
+ catch (err) {
598
+ const errMsg = err instanceof Error ? err.message : String(err);
599
+ await emit({
600
+ type: "SKIP_RESULT",
601
+ stream: "sessions",
602
+ reason: "claude_dir_not_found",
603
+ message: `${baseDir} not readable: ${errMsg}`,
604
+ });
605
+ return null;
606
+ }
607
+ const include = parseCsvEnv(process.env.CLAUDE_CODE_PROJECT_INCLUDE);
608
+ const exclude = parseCsvEnv(process.env.CLAUDE_CODE_PROJECT_EXCLUDE);
609
+ return applyProjectDirScope(projectDirs, include, exclude);
610
+ }
611
+ export async function scanProjectDirs(args) {
612
+ const projectDirs = await listProjectDirs(args.baseDir, args.emit);
613
+ if (projectDirs === null) {
614
+ return;
615
+ }
616
+ await args.emit({
617
+ type: "PROGRESS",
618
+ message: `${projectDirs.length} project dirs in scope`,
619
+ });
620
+ for (const projectDir of projectDirs) {
621
+ await scanProjectDir(projectDir, args);
622
+ }
623
+ }
624
+ async function isReadableDirectory(path) {
625
+ try {
626
+ const st = await stat(path);
627
+ return st.isDirectory();
628
+ }
629
+ catch {
630
+ return false;
631
+ }
632
+ }
633
+ async function assertRequestedClaudeSources(input) {
634
+ const missing = [];
635
+ const needsProjects = input.requested.has("sessions") ||
636
+ input.requested.has("messages") ||
637
+ input.requested.has("attachments") ||
638
+ input.requested.has("memory_notes");
639
+ if (needsProjects && !(await isReadableDirectory(input.baseDir))) {
640
+ missing.push(`CLAUDE_CODE_PROJECTS_DIR=${input.baseDir}`);
641
+ }
642
+ if (input.requested.has("skills") && !(await isReadableDirectory(join(input.claudeHome, "skills")))) {
643
+ missing.push(`CLAUDE_CODE_HOME skills directory=${join(input.claudeHome, "skills")}`);
644
+ }
645
+ if (input.requested.has("slash_commands") && !(await isReadableDirectory(join(input.claudeHome, "commands")))) {
646
+ missing.push(`CLAUDE_CODE_HOME commands directory=${join(input.claudeHome, "commands")}`);
647
+ }
648
+ if (missing.length > 0) {
649
+ throw new Error(`requested Claude Code local source path(s) are missing or unreadable: ${missing.join(", ")}`);
650
+ }
651
+ }
652
+ async function emitLocalInventoryStreams(input) {
653
+ const inventory = await buildLocalSourceInventory("claude_code", input.claudeHome, CLAUDE_CODE_KNOWN_LOCAL_STORES);
654
+ for (const [stream, records] of inventory.recordsByStream) {
655
+ if (!input.requested.has(stream)) {
656
+ continue;
657
+ }
658
+ for (const record of records) {
659
+ await input.emitRecord(stream, record);
660
+ }
661
+ }
662
+ if (input.requested.has("file_history")) {
663
+ const records = await listDirectoryInventory({
664
+ tool: "claude_code",
665
+ sourceHome: input.claudeHome,
666
+ relativeRoot: "file-history",
667
+ store: "file_history",
668
+ stream: "file_history",
669
+ reason: "metadata-only until payload contract is approved",
670
+ });
671
+ for (const record of records) {
672
+ await input.emitRecord("file_history", record);
673
+ }
674
+ }
675
+ if (input.requested.has("coverage_diagnostics")) {
676
+ for (const record of inventory.coverage) {
677
+ await input.emitRecord("coverage_diagnostics", record);
678
+ }
679
+ }
680
+ }
681
+ async function runSkillsAndCommands(claudeHome, requested, emit, emitRecord, state) {
682
+ try {
683
+ await emitSkills({
684
+ claudeHome,
685
+ requested,
686
+ emitRecord,
687
+ fileMtimes: state.skillsMtimes,
688
+ newMtimes: state.newSkillsMtimes,
689
+ });
690
+ }
691
+ catch (err) {
692
+ const msg = err instanceof Error ? err.message : String(err);
693
+ await emit({ type: "PROGRESS", message: `skills scan skipped: ${msg}` });
694
+ }
695
+ try {
696
+ await emitSlashCommands({
697
+ claudeHome,
698
+ requested,
699
+ emitRecord,
700
+ fileMtimes: state.slashCommandMtimes,
701
+ newMtimes: state.newSlashCommandMtimes,
702
+ });
703
+ }
704
+ catch (err) {
705
+ const msg = err instanceof Error ? err.message : String(err);
706
+ await emit({ type: "PROGRESS", message: `slash_commands scan skipped: ${msg}` });
707
+ }
708
+ if (requested.has("skills")) {
709
+ await emit({
710
+ type: "STATE",
711
+ stream: "skills",
712
+ cursor: { file_mtimes: state.newSkillsMtimes, fetched_at: nowIso() },
713
+ });
714
+ }
715
+ if (requested.has("slash_commands")) {
716
+ await emit({
717
+ type: "STATE",
718
+ stream: "slash_commands",
719
+ cursor: { file_mtimes: state.newSlashCommandMtimes, fetched_at: nowIso() },
720
+ });
721
+ }
722
+ }
723
+ function streamFileMtimes(state, stream) {
724
+ return state[stream]?.file_mtimes;
725
+ }
726
+ if (isMainModule(import.meta.url)) {
727
+ runConnector({
728
+ name: "claude_code",
729
+ validateRecord,
730
+ async collect({ state, requested, emit, emitRecord }) {
731
+ const claudeHome = process.env.CLAUDE_CODE_HOME || join(homedir(), ".claude");
732
+ const baseDir = process.env.CLAUDE_CODE_PROJECTS_DIR || join(claudeHome, "projects");
733
+ await assertRequestedClaudeSources({ baseDir, claudeHome, requested });
734
+ const typedState = state;
735
+ const fileMtimes = streamFileMtimes(typedState, "messages") ?? typedState.file_mtimes ?? {};
736
+ const skillsMtimes = streamFileMtimes(typedState, "skills") ?? {};
737
+ const slashCommandMtimes = streamFileMtimes(typedState, "slash_commands") ?? {};
738
+ const memoryNoteMtimes = streamFileMtimes(typedState, "memory_notes") ?? {};
739
+ const newSkillsMtimes = { ...skillsMtimes };
740
+ const newSlashCommandMtimes = { ...slashCommandMtimes };
741
+ const newMemoryNoteMtimes = { ...memoryNoteMtimes };
742
+ await emitLocalInventoryStreams({ claudeHome, requested, emitRecord });
743
+ await runSkillsAndCommands(claudeHome, requested, emit, emitRecord, {
744
+ skillsMtimes,
745
+ newSkillsMtimes,
746
+ slashCommandMtimes,
747
+ newSlashCommandMtimes,
748
+ });
749
+ const needsProjects = requested.has("sessions") ||
750
+ requested.has("messages") ||
751
+ requested.has("attachments") ||
752
+ requested.has("memory_notes");
753
+ if (!needsProjects) {
754
+ return;
755
+ }
756
+ const newMtimes = { ...fileMtimes };
757
+ const sessionAccumulators = new Map();
758
+ await scanProjectDirs({
759
+ baseDir,
760
+ buildOnly: true,
761
+ emit,
762
+ emitRecord,
763
+ fileMtimes,
764
+ newMtimes,
765
+ memoryNoteMtimes,
766
+ newMemoryNoteMtimes,
767
+ requested,
768
+ sessionAccumulators,
769
+ });
770
+ await emitSessionsFromAccumulators({ emitRecord, requested, sessionAccumulators });
771
+ if (requested.has("sessions")) {
772
+ await emit({
773
+ type: "STATE",
774
+ stream: "sessions",
775
+ cursor: { fetched_at: nowIso() },
776
+ });
777
+ }
778
+ if (requested.has("memory_notes")) {
779
+ await emit({
780
+ type: "STATE",
781
+ stream: "memory_notes",
782
+ cursor: { file_mtimes: newMemoryNoteMtimes, fetched_at: nowIso() },
783
+ });
784
+ }
785
+ if (requested.has("messages") || requested.has("attachments")) {
786
+ await scanProjectDirs({
787
+ baseDir,
788
+ buildOnly: false,
789
+ emit,
790
+ emitRecord,
791
+ fileMtimes,
792
+ newMtimes,
793
+ requested,
794
+ sessionAccumulators,
795
+ });
796
+ }
797
+ if (requested.has("messages") || requested.has("attachments")) {
798
+ await emit({
799
+ type: "STATE",
800
+ stream: "messages",
801
+ cursor: { file_mtimes: newMtimes, fetched_at: nowIso() },
802
+ });
803
+ }
804
+ },
805
+ });
806
+ }