@pdpp/local-collector 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +48 -0
  2. package/dist/local-collector/bin/pdpp-local-collector.js +347 -0
  3. package/dist/local-collector/src/errors.d.ts +12 -0
  4. package/dist/local-collector/src/errors.js +20 -0
  5. package/dist/local-collector/src/runner.d.ts +16 -0
  6. package/dist/local-collector/src/runner.js +59 -0
  7. package/dist/polyfill-connectors/connectors/claude_code/index.js +806 -0
  8. package/dist/polyfill-connectors/connectors/claude_code/parsers.js +224 -0
  9. package/dist/polyfill-connectors/connectors/claude_code/schemas.js +120 -0
  10. package/dist/polyfill-connectors/connectors/claude_code/types.js +1 -0
  11. package/dist/polyfill-connectors/connectors/codex/index.js +880 -0
  12. package/dist/polyfill-connectors/connectors/codex/parsers.js +159 -0
  13. package/dist/polyfill-connectors/connectors/codex/schemas.js +118 -0
  14. package/dist/polyfill-connectors/connectors/codex/types.js +1 -0
  15. package/dist/polyfill-connectors/src/auth.js +76 -0
  16. package/dist/polyfill-connectors/src/browser-handoff.js +197 -0
  17. package/dist/polyfill-connectors/src/collector-protocol.d.ts +2 -0
  18. package/dist/polyfill-connectors/src/collector-protocol.js +2 -0
  19. package/dist/polyfill-connectors/src/collector-runner.d.ts +139 -0
  20. package/dist/polyfill-connectors/src/collector-runner.js +1084 -0
  21. package/dist/polyfill-connectors/src/connector-runtime-protocol.d.ts +191 -0
  22. package/dist/polyfill-connectors/src/connector-runtime-protocol.js +1 -0
  23. package/dist/polyfill-connectors/src/connector-runtime.js +879 -0
  24. package/dist/polyfill-connectors/src/fixture-capture.js +237 -0
  25. package/dist/polyfill-connectors/src/is-main-module.d.ts +1 -0
  26. package/dist/polyfill-connectors/src/is-main-module.js +17 -0
  27. package/dist/polyfill-connectors/src/local-device-client.d.ts +126 -0
  28. package/dist/polyfill-connectors/src/local-device-client.js +132 -0
  29. package/dist/polyfill-connectors/src/local-device-envelope.d.ts +26 -0
  30. package/dist/polyfill-connectors/src/local-device-envelope.js +43 -0
  31. package/dist/polyfill-connectors/src/local-device-outbox.d.ts +115 -0
  32. package/dist/polyfill-connectors/src/local-device-outbox.js +509 -0
  33. package/dist/polyfill-connectors/src/local-device-queue.d.ts +34 -0
  34. package/dist/polyfill-connectors/src/local-device-queue.js +133 -0
  35. package/dist/polyfill-connectors/src/local-source-inventory.js +119 -0
  36. package/dist/polyfill-connectors/src/pdpp-safe-text.js +13 -0
  37. package/dist/polyfill-connectors/src/runner/index.d.ts +11 -0
  38. package/dist/polyfill-connectors/src/runner/index.js +10 -0
  39. package/dist/polyfill-connectors/src/runtime-capabilities.d.ts +40 -0
  40. package/dist/polyfill-connectors/src/runtime-capabilities.js +59 -0
  41. package/dist/polyfill-connectors/src/safe-emit.d.ts +3 -0
  42. package/dist/polyfill-connectors/src/safe-emit.js +30 -0
  43. package/dist/polyfill-connectors/src/safe-text-preview.js +156 -0
  44. package/dist/polyfill-connectors/src/schema-registry.js +17 -0
  45. package/dist/polyfill-connectors/src/scope-filters.d.ts +38 -0
  46. package/dist/polyfill-connectors/src/scope-filters.js +80 -0
  47. package/dist/polyfill-connectors/src/shutdown-hook.js +51 -0
  48. package/dist/polyfill-connectors/src/streaming-target-registration.js +161 -0
  49. package/package.json +63 -0
@@ -0,0 +1,224 @@
1
+ import { safeTextPreview } from "../../src/safe-text-preview.js";
2
+ export const SHORT_PREVIEW_CHARS = 300;
3
+ export const ATTACHMENT_PREVIEW_CHARS = 500;
4
+ export const TOOL_RESULT_PREVIEW_CHARS = 500;
5
+ export const MESSAGE_CONTENT_PREVIEW_CHARS = 5000;
6
+ export const SKILL_BODY_MAX_CHARS = 20_000;
7
+ export const LINE_PROGRESS_INTERVAL = 2000;
8
+ export const BYTES_PER_MB = 1024 * 1024;
9
+ export const SESSION_DIR_PREFIX_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-/;
10
+ const CLAUDE_FRONTMATTER_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/;
11
+ const CLAUDE_FM_LINE_RE = /^([A-Za-z0-9_-]+)\s*:\s*(.*)$/;
12
+ const CLAUDE_FM_COMMENT_RE = /^\s*#/;
13
+ const CLAUDE_FM_INDENT_RE = /^\s+\S/;
14
+ const CLAUDE_FM_LEADING_WS_RE = /^\s+/;
15
+ const CLAUDE_FM_QUOTED_DOUBLE_RE = /^"([\s\S]*)"$/;
16
+ const CLAUDE_FM_QUOTED_SINGLE_RE = /^'([\s\S]*)'$/;
17
+ const CLAUDE_FM_COLLAPSE_WS_RE = /\s+/g;
18
+ const CLAUDE_FM_LINE_SPLIT_RE = /\r?\n/;
19
+ const CLAUDE_MD_SUFFIX_RE = /\.md$/i;
20
+ export function textPreview(s, max = SHORT_PREVIEW_CHARS) {
21
+ return safeTextPreview(s, max).preview;
22
+ }
23
+ export function truncateBody(body, max = SKILL_BODY_MAX_CHARS) {
24
+ return body.length > max ? body.slice(0, max) : body;
25
+ }
26
+ function extractFromArrayPart(p) {
27
+ if (typeof p === "string") {
28
+ return p;
29
+ }
30
+ const part = p;
31
+ if (part?.type === "text" && part.text) {
32
+ return part.text;
33
+ }
34
+ if (part?.type === "tool_use") {
35
+ return `[tool_use: ${part.name || "unknown"}]`;
36
+ }
37
+ if (part?.type === "tool_result") {
38
+ return "[tool_result]";
39
+ }
40
+ return "";
41
+ }
42
+ function extractFromArray(arr) {
43
+ const parts = arr.map(extractFromArrayPart).filter(Boolean);
44
+ return parts.join("\n") || null;
45
+ }
46
+ function extractFromObject(obj) {
47
+ if (obj.content) {
48
+ return extractContent(obj.content);
49
+ }
50
+ if (typeof obj.text === "string") {
51
+ return obj.text;
52
+ }
53
+ return null;
54
+ }
55
+ export function extractContent(obj) {
56
+ if (!obj) {
57
+ return null;
58
+ }
59
+ if (typeof obj === "string") {
60
+ return obj;
61
+ }
62
+ if (Array.isArray(obj)) {
63
+ return extractFromArray(obj);
64
+ }
65
+ if (typeof obj === "object") {
66
+ return extractFromObject(obj);
67
+ }
68
+ return null;
69
+ }
70
+ function stripQuotes(value) {
71
+ return value.replace(CLAUDE_FM_QUOTED_DOUBLE_RE, "$1").replace(CLAUDE_FM_QUOTED_SINGLE_RE, "$1").trim();
72
+ }
73
+ function isBlockScalar(value) {
74
+ return value === ">" || value === "|" || value === ">-" || value === "|-";
75
+ }
76
+ function readBlockScalar(lines, startIdx, marker) {
77
+ const folded = marker.startsWith(">");
78
+ const collected = [];
79
+ let i = startIdx;
80
+ while (i < lines.length) {
81
+ const next = lines[i] ?? "";
82
+ if (CLAUDE_FM_INDENT_RE.test(next) || next === "") {
83
+ collected.push(next.replace(CLAUDE_FM_LEADING_WS_RE, ""));
84
+ i++;
85
+ }
86
+ else {
87
+ break;
88
+ }
89
+ }
90
+ const value = folded
91
+ ? collected.join(" ").replace(CLAUDE_FM_COLLAPSE_WS_RE, " ").trim()
92
+ : collected.join("\n").trim();
93
+ return { nextIndex: i, value };
94
+ }
95
+ export function parseFrontmatter(text) {
96
+ if (typeof text !== "string") {
97
+ return { frontmatter: {}, body: text || "" };
98
+ }
99
+ const m = CLAUDE_FRONTMATTER_RE.exec(text);
100
+ if (!m) {
101
+ return { frontmatter: {}, body: text };
102
+ }
103
+ const rawFm = m[1] ?? "";
104
+ const body = m[2] ?? "";
105
+ const frontmatter = {};
106
+ const lines = rawFm.split(CLAUDE_FM_LINE_SPLIT_RE);
107
+ let i = 0;
108
+ while (i < lines.length) {
109
+ const line = lines[i] ?? "";
110
+ if (!line.trim() || CLAUDE_FM_COMMENT_RE.test(line)) {
111
+ i++;
112
+ continue;
113
+ }
114
+ const kv = CLAUDE_FM_LINE_RE.exec(line);
115
+ if (!kv) {
116
+ i++;
117
+ continue;
118
+ }
119
+ const key = kv[1] ?? "";
120
+ const rawValue = kv[2] ?? "";
121
+ if (isBlockScalar(rawValue)) {
122
+ const { nextIndex, value } = readBlockScalar(lines, i + 1, rawValue);
123
+ frontmatter[key] = value;
124
+ i = nextIndex;
125
+ }
126
+ else {
127
+ frontmatter[key] = stripQuotes(rawValue);
128
+ i++;
129
+ }
130
+ }
131
+ return { frontmatter, body };
132
+ }
133
+ export function makeEmptySessionAccumulator(id, projectPath) {
134
+ return {
135
+ id,
136
+ project_path: projectPath,
137
+ cwd: null,
138
+ git_branch: null,
139
+ version: null,
140
+ started_at: null,
141
+ last_event_at: null,
142
+ message_count: 0,
143
+ user_type: null,
144
+ entrypoint: null,
145
+ };
146
+ }
147
+ export function mergeSessionObservations(acc, obs) {
148
+ if (obs.cwd) {
149
+ acc.cwd = obs.cwd;
150
+ }
151
+ if (obs.gitBranch) {
152
+ acc.git_branch = obs.gitBranch;
153
+ }
154
+ if (obs.version) {
155
+ acc.version = obs.version;
156
+ }
157
+ if (obs.userType) {
158
+ acc.user_type = obs.userType;
159
+ }
160
+ if (obs.entrypoint) {
161
+ acc.entrypoint = obs.entrypoint;
162
+ }
163
+ }
164
+ export function widenSessionTimeRange(acc, firstTimestamp, lastTimestamp) {
165
+ if (firstTimestamp && (!acc.started_at || firstTimestamp < acc.started_at)) {
166
+ acc.started_at = firstTimestamp;
167
+ }
168
+ if (lastTimestamp && (!acc.last_event_at || lastTimestamp > acc.last_event_at)) {
169
+ acc.last_event_at = lastTimestamp;
170
+ }
171
+ }
172
+ export function buildSkillRecord(args) {
173
+ return {
174
+ id: `skills:${args.name}`,
175
+ name: args.frontmatter.name || args.name,
176
+ description: args.frontmatter.description || null,
177
+ source: "user",
178
+ path: args.path,
179
+ content: truncateBody(args.body),
180
+ frontmatter: args.frontmatter,
181
+ mtime_epoch: Math.floor(args.mtimeMs / 1000),
182
+ };
183
+ }
184
+ export function buildMemoryNoteRecord(args) {
185
+ const fallbackName = args.relPath.replace(CLAUDE_MD_SUFFIX_RE, "");
186
+ return {
187
+ id: `memory_notes:${args.projectDir}/${args.relPath}`,
188
+ project_path: args.projectDir,
189
+ note_path: args.relPath,
190
+ name: args.frontmatter.name || args.frontmatter.title || fallbackName,
191
+ description: args.frontmatter.description || null,
192
+ path: args.path,
193
+ content: truncateBody(args.body),
194
+ frontmatter: args.frontmatter,
195
+ mtime_epoch: Math.floor(args.mtimeMs / 1000),
196
+ };
197
+ }
198
+ export function buildSlashCommandRecord(args) {
199
+ return {
200
+ id: `commands:${args.idPath}`,
201
+ name: args.frontmatter.name || args.base,
202
+ description: args.frontmatter.description || null,
203
+ path: args.path,
204
+ content: truncateBody(args.body),
205
+ frontmatter: args.frontmatter,
206
+ mtime_epoch: Math.floor(args.mtimeMs / 1000),
207
+ };
208
+ }
209
+ export function applyProjectDirScope(dirs, include, exclude) {
210
+ let out = dirs;
211
+ if (include.length) {
212
+ out = out.filter((d) => include.some((s) => d.includes(s)));
213
+ }
214
+ if (exclude.length) {
215
+ out = out.filter((d) => !exclude.some((s) => d.includes(s)));
216
+ }
217
+ return out;
218
+ }
219
+ export function parseCsvEnv(value) {
220
+ return (value || "")
221
+ .split(",")
222
+ .map((s) => s.trim())
223
+ .filter(Boolean);
224
+ }
@@ -0,0 +1,120 @@
1
+ import { z } from "zod";
2
+ import { pdppSafeText } from "../../src/pdpp-safe-text.js";
3
+ import { PDPP_PREVIEW_MAX_CHARS, safeTextPreview } from "../../src/safe-text-preview.js";
4
+ import { makeValidateRecord } from "../../src/schema-registry.js";
5
+ const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
6
+ const ISO_Z_RE = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/;
7
+ const uuidSchema = z.string().regex(UUID_RE, "must be valid UUID");
8
+ const isoDateTimeSchema = z.string().regex(ISO_Z_RE, "must be ISO-8601 with millis and Z suffix").nullable();
9
+ const stringMaxSchema = (max) => pdppSafeText.max(max).nullable();
10
+ const pathSchema = pdppSafeText.max(2048).nullable();
11
+ export const sessionsSchema = z.object({
12
+ id: uuidSchema,
13
+ project_path: pdppSafeText,
14
+ cwd: pathSchema,
15
+ git_branch: stringMaxSchema(256),
16
+ version: stringMaxSchema(64),
17
+ started_at: isoDateTimeSchema,
18
+ last_event_at: isoDateTimeSchema,
19
+ message_count: z.number().int().min(0).nullable(),
20
+ user_type: stringMaxSchema(40),
21
+ entrypoint: stringMaxSchema(256),
22
+ });
23
+ export const messagesSchema = z.object({
24
+ id: uuidSchema,
25
+ session_id: uuidSchema,
26
+ parent_uuid: uuidSchema.nullable(),
27
+ role: stringMaxSchema(64),
28
+ type: stringMaxSchema(64),
29
+ content: pdppSafeText.max(10_000_000).nullable(),
30
+ timestamp: isoDateTimeSchema,
31
+ is_sidechain: z.boolean(),
32
+ user_type: stringMaxSchema(40),
33
+ agent_id: stringMaxSchema(256).nullable(),
34
+ });
35
+ export const attachmentsSchema = z.object({
36
+ id: pdppSafeText.min(1).max(2048),
37
+ session_id: uuidSchema,
38
+ parent_uuid: uuidSchema.nullable(),
39
+ event_type: stringMaxSchema(64),
40
+ hook_name: stringMaxSchema(256),
41
+ tool_use_id: stringMaxSchema(256),
42
+ content_preview: z
43
+ .string()
44
+ .max(PDPP_PREVIEW_MAX_CHARS + 1)
45
+ .refine((val) => {
46
+ const result = safeTextPreview(val, PDPP_PREVIEW_MAX_CHARS);
47
+ return result.kind === "text" || result.kind === "empty";
48
+ }, "content_preview contains forbidden control characters")
49
+ .nullable(),
50
+ content_binary_reason: pdppSafeText.max(200).nullable().optional(),
51
+ content_bytes: z.number().int().min(0).nullable(),
52
+ timestamp: isoDateTimeSchema,
53
+ });
54
+ export const skillsSchema = z.object({
55
+ id: pdppSafeText,
56
+ name: stringMaxSchema(256),
57
+ description: stringMaxSchema(2048),
58
+ source: stringMaxSchema(64),
59
+ path: pathSchema,
60
+ content: pdppSafeText.max(10_000_000).nullable(),
61
+ frontmatter: z.record(z.string(), z.unknown()).nullable(),
62
+ mtime_epoch: z.number().nullable(),
63
+ });
64
+ export const memoryNotesSchema = z.object({
65
+ id: pdppSafeText,
66
+ project_path: pdppSafeText,
67
+ note_path: pdppSafeText,
68
+ name: stringMaxSchema(256),
69
+ description: stringMaxSchema(2048),
70
+ path: pathSchema,
71
+ content: pdppSafeText.max(10_000_000).nullable(),
72
+ frontmatter: z.record(z.string(), z.unknown()).nullable(),
73
+ mtime_epoch: z.number().nullable(),
74
+ });
75
+ export const slashCommandsSchema = z.object({
76
+ id: pdppSafeText,
77
+ name: stringMaxSchema(256),
78
+ description: stringMaxSchema(2048),
79
+ path: pathSchema,
80
+ content: pdppSafeText.max(10_000_000).nullable(),
81
+ frontmatter: z.record(z.string(), z.unknown()).nullable(),
82
+ mtime_epoch: z.number().nullable(),
83
+ });
84
+ const inventoryClassificationSchema = z.enum(["inventory_only", "defer"]);
85
+ const inventoryTypeSchema = z.enum(["directory", "file", "missing", "other"]);
86
+ const coverageStatusSchema = z.enum(["collected", "inventory_only", "excluded", "deferred", "missing", "unsupported"]);
87
+ export const inventorySchema = z.object({
88
+ id: pdppSafeText,
89
+ store: pdppSafeText,
90
+ relative_path: pdppSafeText.max(2048),
91
+ path_hash: z.string().regex(/^[a-f0-9]{64}$/),
92
+ type: inventoryTypeSchema,
93
+ size_bytes: z.number().int().min(0).nullable(),
94
+ mtime_epoch: z.number().int().min(0).nullable(),
95
+ classification: inventoryClassificationSchema,
96
+ reason: pdppSafeText.max(512),
97
+ });
98
+ export const coverageDiagnosticsSchema = z.object({
99
+ id: pdppSafeText,
100
+ store: pdppSafeText,
101
+ stream: pdppSafeText.nullable(),
102
+ status: coverageStatusSchema,
103
+ reason: pdppSafeText.max(512),
104
+ });
105
+ export const SCHEMAS = {
106
+ sessions: sessionsSchema,
107
+ messages: messagesSchema,
108
+ attachments: attachmentsSchema,
109
+ skills: skillsSchema,
110
+ memory_notes: memoryNotesSchema,
111
+ slash_commands: slashCommandsSchema,
112
+ file_history: inventorySchema,
113
+ debug_artifacts: inventorySchema,
114
+ downloads: inventorySchema,
115
+ cache_inventory: inventorySchema,
116
+ backup_inventory: inventorySchema,
117
+ config_inventory: inventorySchema,
118
+ coverage_diagnostics: coverageDiagnosticsSchema,
119
+ };
120
+ export const validateRecord = makeValidateRecord(SCHEMAS);