@pdpp/local-collector 0.1.0-beta.7 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/README.md +8 -8
  2. package/dist/local-collector/bin/pdpp-local-collector.js +580 -22
  3. package/dist/local-collector/src/runner.d.ts +1 -1
  4. package/dist/local-collector/src/runner.js +15 -1
  5. package/dist/polyfill-connectors/connectors/claude_code/index.js +60 -37
  6. package/dist/polyfill-connectors/connectors/codex/index.js +390 -108
  7. package/dist/polyfill-connectors/connectors/codex/parsers.js +5 -3
  8. package/dist/polyfill-connectors/src/bounded-file-preview.js +76 -0
  9. package/dist/polyfill-connectors/src/browser-handoff.js +38 -5
  10. package/dist/polyfill-connectors/src/collector-build-info.d.ts +8 -0
  11. package/dist/polyfill-connectors/src/collector-build-info.js +10 -0
  12. package/dist/polyfill-connectors/src/collector-runner.d.ts +54 -0
  13. package/dist/polyfill-connectors/src/collector-runner.js +250 -18
  14. package/dist/polyfill-connectors/src/connector-exit.js +62 -0
  15. package/dist/polyfill-connectors/src/connector-runtime-protocol.d.ts +41 -21
  16. package/dist/polyfill-connectors/src/connector-runtime.js +241 -30
  17. package/dist/polyfill-connectors/src/fingerprint-cursor.js +107 -0
  18. package/dist/polyfill-connectors/src/local-device-client.d.ts +17 -0
  19. package/dist/polyfill-connectors/src/local-device-client.js +69 -9
  20. package/dist/polyfill-connectors/src/local-device-outbox.d.ts +59 -0
  21. package/dist/polyfill-connectors/src/local-device-outbox.js +394 -5
  22. package/dist/polyfill-connectors/src/local-source-inventory.js +8 -1
  23. package/dist/polyfill-connectors/src/runner/index.d.ts +4 -3
  24. package/dist/polyfill-connectors/src/runner/index.js +4 -3
  25. package/dist/polyfill-connectors/src/safe-text-preview.js +13 -0
  26. package/dist/polyfill-connectors/src/static-secret-injection.js +151 -0
  27. package/package.json +2 -2
@@ -1,16 +1,26 @@
1
1
  #!/usr/bin/env node
2
- import { existsSync, readFileSync } from "node:fs";
3
- import { basename, dirname, extname, join } from "node:path";
2
+ import { existsSync, readFileSync, realpathSync } from "node:fs";
3
+ import { basename, dirname, extname, join, sep } from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
5
  import { ALLOW_CUSTOM_COMMAND_ENV, CollectorCustomCommandRefusedError, CollectorUsageError, } from "../src/errors.js";
6
- import { BUNDLED_CONNECTOR_IDS, COLLECTOR_PROTOCOL_VERSION, COLLECTOR_RUNTIME_CAPABILITIES, LocalDeviceOutbox, enrollCollector, getBundledConnector, isMainModule, runCollectorConnector, } from "../src/runner.js";
6
+ import { BUNDLED_CONNECTOR_IDS, COLLECTOR_PROTOCOL_VERSION, COLLECTOR_RUNTIME_CAPABILITIES, deriveLocalCollectorLifecycleState, LocalDeviceOutbox, enrollCollector, getBundledConnector, isMainModule, runCollectorConnector, } from "../src/runner.js";
7
+ const COVERAGE_DIAGNOSTICS_STREAM = "coverage_diagnostics";
7
8
  const DEFAULT_QUEUE_PATH = join(dirname(fileURLToPath(import.meta.url)), "..", ".pdpp-data", "collector-runner-queue.json");
8
9
  const LOCAL_COLLECTOR_PACKAGE_NAME = "@pdpp/local-collector";
9
10
  const LOCAL_COLLECTOR_PACKAGE_VERSION_FALLBACK = "0.0.0";
10
- export function resolveLocalCollectorPackageVersion(startUrl = import.meta.url) {
11
- let current = typeof startUrl === "string" && !startUrl.startsWith("file:")
12
- ? dirname(startUrl)
13
- : dirname(fileURLToPath(startUrl));
11
+ const LOCAL_COLLECTOR_PLACEHOLDER_VERSION = "0.0.0";
12
+ const REPO_ONLY_PACKAGE_SIBLINGS = ["src", "bin", "test", "scripts", "tsconfig.build.json"];
13
+ function resolveLocalCollectorManifest(startUrl) {
14
+ const startPath = typeof startUrl === "string" && !startUrl.startsWith("file:")
15
+ ? startUrl
16
+ : fileURLToPath(startUrl);
17
+ let realStart = startPath;
18
+ try {
19
+ realStart = realpathSync(startPath);
20
+ }
21
+ catch {
22
+ }
23
+ let current = dirname(realStart);
14
24
  for (;;) {
15
25
  const manifestPath = join(current, "package.json");
16
26
  if (existsSync(manifestPath)) {
@@ -19,7 +29,7 @@ export function resolveLocalCollectorPackageVersion(startUrl = import.meta.url)
19
29
  if (manifest.name === LOCAL_COLLECTOR_PACKAGE_NAME &&
20
30
  typeof manifest.version === "string" &&
21
31
  manifest.version) {
22
- return manifest.version;
32
+ return { packageRoot: current, version: manifest.version };
23
33
  }
24
34
  }
25
35
  catch {
@@ -27,11 +37,53 @@ export function resolveLocalCollectorPackageVersion(startUrl = import.meta.url)
27
37
  }
28
38
  const parent = dirname(current);
29
39
  if (parent === current) {
30
- return LOCAL_COLLECTOR_PACKAGE_VERSION_FALLBACK;
40
+ return { packageRoot: null, version: LOCAL_COLLECTOR_PACKAGE_VERSION_FALLBACK };
31
41
  }
32
42
  current = parent;
33
43
  }
34
44
  }
45
+ export function resolveLocalCollectorPackageVersion(startUrl = import.meta.url) {
46
+ return resolveLocalCollectorManifest(startUrl).version;
47
+ }
48
+ export function classifyLocalCollectorDeploymentPosture(startUrl = import.meta.url) {
49
+ const startPath = typeof startUrl === "string" && !startUrl.startsWith("file:")
50
+ ? startUrl
51
+ : fileURLToPath(startUrl);
52
+ const moduleBasename = basename(startPath);
53
+ const isSourceEntrypoint = extname(startPath) === ".ts";
54
+ const { packageRoot, version } = resolveLocalCollectorManifest(startUrl);
55
+ let kind;
56
+ let locationHint;
57
+ if (!packageRoot) {
58
+ kind = isSourceEntrypoint ? "repo_dist_override" : "unknown";
59
+ locationHint = "unresolved";
60
+ }
61
+ else if (isUnderNodeModulesPackage(packageRoot)) {
62
+ kind = "published_package";
63
+ locationHint = `node_modules/${LOCAL_COLLECTOR_PACKAGE_NAME}`;
64
+ }
65
+ else if (isSourceEntrypoint || hasRepoOnlySiblings(packageRoot)) {
66
+ kind = "repo_dist_override";
67
+ locationHint = `packages/${basename(packageRoot)}`;
68
+ }
69
+ else {
70
+ kind = "unknown";
71
+ locationHint = `packages/${basename(packageRoot)}`;
72
+ }
73
+ return {
74
+ kind,
75
+ is_placeholder_version: version === LOCAL_COLLECTOR_PLACEHOLDER_VERSION,
76
+ location_hint: locationHint,
77
+ module_basename: moduleBasename,
78
+ version,
79
+ };
80
+ }
81
+ function isUnderNodeModulesPackage(dir) {
82
+ return dir.split(sep).includes("node_modules");
83
+ }
84
+ function hasRepoOnlySiblings(packageRoot) {
85
+ return REPO_ONLY_PACKAGE_SIBLINGS.some((entry) => existsSync(join(packageRoot, entry)));
86
+ }
35
87
  const HELP_TEXT = `pdpp-local-collector — PDPP local collector runner.
36
88
 
37
89
  Ownership: the local device/host supervisor decides when filesystem-class
@@ -47,6 +99,25 @@ Subcommands:
47
99
  doctor Print local durable outbox operator diagnostics as JSON.
48
100
  [--queue <path>]
49
101
  [--connection-id <id>]
102
+ retry-dead-letters Requeue local dead-letter outbox rows.
103
+ [--queue <path>]
104
+ [--connection-id <id>]
105
+ [--kind record_batch|checkpoint|gap|blob_upload]
106
+ [--limit <n>]
107
+ [--apply] Dry-run by default; --apply mutates after a DB backup.
108
+ prune-sent Delete sent (succeeded) outbox rows to reclaim disk space.
109
+ [--queue <path>]
110
+ [--connection-id <id>]
111
+ [--older-than-days <n>] Delete sent rows older than N days (default: 30).
112
+ [--keep-count <n>] Keep at most N most-recent sent rows per connection.
113
+ [--apply] Dry-run by default; --apply mutates after a DB backup.
114
+ Never touches pending, leased, retrying, or dead-letter rows.
115
+ compact Rebuild the outbox SQLite file to return freed pages to disk.
116
+ [--queue <path>] prune-sent deletes rows but the file never shrinks on its own
117
+ [--connection-id <id>] (auto_vacuum=NONE); compact runs VACUUM to reclaim the freelist.
118
+ [--apply] Dry-run by default; --apply rebuilds after a DB backup.
119
+ [--force] Apply is refused while unsent (ready/leased/dead-letter) rows
120
+ exist; --force compacts anyway (VACUUM is lossless either way).
50
121
  enroll --base-url <url> Exchange a one-time enrollment code for a
51
122
  --code <code> device id + device token.
52
123
  [--device-label <label>]
@@ -80,7 +151,30 @@ async function main() {
80
151
  }
81
152
  if (options.command === "status" || options.command === "doctor") {
82
153
  const status = inspectLocalOutboxStatus(options);
83
- process.stdout.write(`${JSON.stringify(options.command === "doctor" ? buildLocalOutboxDoctor(status) : status, null, 2)}\n`);
154
+ if (options.command === "doctor") {
155
+ const errorSummary = readLocalOutboxDeadLetterErrorSummary(options);
156
+ process.stdout.write(`${JSON.stringify(buildLocalOutboxDoctor(status, errorSummary), null, 2)}\n`);
157
+ return;
158
+ }
159
+ process.stdout.write(`${JSON.stringify(status, null, 2)}\n`);
160
+ return;
161
+ }
162
+ if (options.command === "retry-dead-letters") {
163
+ const result = retryLocalOutboxDeadLetters(options);
164
+ process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
165
+ return;
166
+ }
167
+ if (options.command === "prune-sent") {
168
+ const result = pruneSentOutboxRows(options);
169
+ process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
170
+ return;
171
+ }
172
+ if (options.command === "compact") {
173
+ const result = compactOutbox(options);
174
+ process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
175
+ if (result.refused) {
176
+ process.exitCode = 1;
177
+ }
84
178
  return;
85
179
  }
86
180
  if (options.command === "enroll") {
@@ -114,12 +208,59 @@ async function main() {
114
208
  process.stdout.write(`${JSON.stringify(summarizeRunResultForCli(result), null, 2)}\n`);
115
209
  }
116
210
  export function summarizeRunResultForCli(result) {
211
+ const summary = result.outboxSummary;
212
+ const lifecycleState = deriveLocalCollectorLifecycleState({
213
+ coverageObserved: null,
214
+ recordBatchCount: 0,
215
+ summary,
216
+ });
217
+ const openWork = pendingOpenWork(summary);
218
+ const drained = openWork === 0;
117
219
  return {
118
220
  ...result,
221
+ drain_note: runDrainNote(result, summary, drained),
222
+ drained,
119
223
  flushedState: summarizeCollectorState(result.flushedState),
224
+ lifecycle_state: lifecycleState,
120
225
  priorState: summarizeCollectorState(result.priorState),
226
+ residual_backlog: {
227
+ dead_letter: summary.deadLetter,
228
+ leased: summary.leased,
229
+ ready: summary.ready,
230
+ retrying: summary.retrying,
231
+ total_open: openWork,
232
+ },
121
233
  };
122
234
  }
235
+ function runDrainNote(result, summary, drained) {
236
+ if (result.skippedScanForBacklog) {
237
+ return (`Scan was skipped: ${pendingOpenWork(summary)} open outbox row(s) from a prior run still need to drain first. ` +
238
+ "No new source work was collected this pass; re-run to continue draining.");
239
+ }
240
+ if (drained) {
241
+ return "Outbox fully drained — no ready, retrying, leased, or dead-letter work remains.";
242
+ }
243
+ const parts = [];
244
+ if (summary.ready > 0) {
245
+ parts.push(`${summary.ready} ready (drains on the next scheduled run)`);
246
+ }
247
+ if (summary.retrying > 0) {
248
+ parts.push(`${summary.retrying} retrying (waiting on backoff)`);
249
+ }
250
+ if (summary.leased > 0) {
251
+ parts.push(`${summary.leased} leased (in flight)`);
252
+ }
253
+ if (summary.deadLetter > 0) {
254
+ parts.push(`${summary.deadLetter} dead-letter (run \`retry-dead-letters\` then re-run)`);
255
+ }
256
+ const scanNote = result.scanBudgetExceeded
257
+ ? " The connector was stopped by the per-run enqueue budget, so more source work likely remains; re-run to continue."
258
+ : "";
259
+ return `Run succeeded on the source but the outbox is NOT fully drained: ${parts.join(", ")}.${scanNote}`;
260
+ }
261
+ function pendingOpenWork(summary) {
262
+ return summary.ready + summary.retrying + summary.leased + summary.deadLetter;
263
+ }
123
264
  function summarizeCollectorState(state) {
124
265
  if (!state || Object.keys(state).length === 0) {
125
266
  return null;
@@ -147,23 +288,41 @@ function summarizeCursor(cursor) {
147
288
  if (record.file_mtimes && typeof record.file_mtimes === "object" && !Array.isArray(record.file_mtimes)) {
148
289
  summary.file_mtimes_count = Object.keys(record.file_mtimes).length;
149
290
  }
291
+ if (record.file_cursors && typeof record.file_cursors === "object" && !Array.isArray(record.file_cursors)) {
292
+ summary.file_cursors_count = Object.keys(record.file_cursors).length;
293
+ }
150
294
  return summary;
151
295
  }
152
- export function inspectLocalOutboxStatus(options) {
296
+ export function inspectLocalOutboxStatus(options, deps = {}) {
153
297
  const dbPath = resolveOutboxPath(options);
154
298
  const exists = existsSync(dbPath);
155
- const summary = exists ? readOutboxSummary(dbPath, options.sourceInstanceId) : emptyOutboxSummary();
299
+ const inspection = exists
300
+ ? readOutboxInspection(dbPath, options.sourceInstanceId)
301
+ : { coverageObserved: null, recordBatchCount: 0, summary: emptyOutboxSummary() };
302
+ const summary = inspection.summary;
303
+ const lifecycleState = deriveLocalCollectorLifecycleState({
304
+ coverageObserved: inspection.coverageObserved,
305
+ recordBatchCount: inspection.recordBatchCount,
306
+ summary,
307
+ });
308
+ const deploymentPosture = deps.deploymentPosture ?? classifyLocalCollectorDeploymentPosture();
156
309
  return {
157
310
  collector_protocol_version: COLLECTOR_PROTOCOL_VERSION,
158
311
  configured_device: {
159
312
  device_id_configured: Boolean(options.deviceId),
160
313
  device_token_configured: Boolean(options.deviceToken),
161
314
  },
315
+ coverage: {
316
+ observed: inspection.coverageObserved,
317
+ record_batches: inspection.recordBatchCount,
318
+ },
162
319
  db: {
163
320
  configured: Boolean(options.queuePath),
164
321
  exists,
165
322
  path: dbPath,
166
323
  },
324
+ deployment_posture: deploymentPosture,
325
+ lifecycle_state: lifecycleState,
167
326
  outbox: {
168
327
  counts: {
169
328
  dead_letter: summary.deadLetter,
@@ -186,22 +345,360 @@ export function inspectLocalOutboxStatus(options) {
186
345
  },
187
346
  };
188
347
  }
189
- export function buildLocalOutboxDoctor(status) {
348
+ export function buildLocalOutboxDoctor(status, errorSummary) {
349
+ const posture = status.deployment_posture;
350
+ const postureDisqualifiesEvidence = posture.kind === "repo_dist_override" || posture.is_placeholder_version;
190
351
  const checks = {
352
+ coverage_diagnostics: status.lifecycle_state === "coverage_missing" ? "warn" : "ok",
353
+ deployment_posture: postureDisqualifiesEvidence ? "warn" : "ok",
191
354
  expired_leases: status.outbox.expired_leases > 0 ? "warn" : "ok",
192
355
  outbox_db: status.db.exists ? "ok" : "missing",
193
356
  outbox_failures: status.outbox.counts.dead_letter > 0 ? "fail" : "ok",
194
357
  };
358
+ const remediation = [];
359
+ if (checks.outbox_failures === "fail") {
360
+ const topClass = errorSummary?.top_classes?.[0];
361
+ const causeHint = topClass
362
+ ? ` Most common cause: ${topClass.error_class} (${topClass.count} row(s)).`
363
+ : "";
364
+ remediation.push(`${status.outbox.counts.dead_letter} dead-letter row(s) need recovery.${causeHint} ` +
365
+ "Preview with `pdpp-local-collector retry-dead-letters`, then requeue with " +
366
+ "`pdpp-local-collector retry-dead-letters --apply` (backs up the DB first), " +
367
+ "then re-run the collector to drain the requeued rows.");
368
+ }
369
+ if (checks.expired_leases === "warn") {
370
+ remediation.push(`${status.outbox.expired_leases} lease(s) are past expiry — a previous run likely crashed mid-drain. ` +
371
+ "The next `pdpp-local-collector run …` recovers expired leases automatically before scanning; " +
372
+ "no manual action is required.");
373
+ }
374
+ if (checks.coverage_diagnostics === "warn") {
375
+ remediation.push(`This lane drained ${status.coverage.record_batches} record batch(es) but never carried a ` +
376
+ "`coverage_diagnostics` record, so the dashboard can only show coverage_unknown. " +
377
+ "Re-run with a build that emits `coverage_diagnostics` by default and the default stream set (no `--streams`): " +
378
+ "`npx -y @pdpp/local-collector run …` (or `pdpp-local-collector run …` if already on a current build). " +
379
+ "Older installs may omit `coverage_diagnostics` from bundled defaults. `npx -y` fetches the latest *published* build, " +
380
+ "which can still lag the repo build — if the gap persists, confirm the published `latest` carries the fix with " +
381
+ "`pnpm release:dist-tag-check` (release owner) rather than assuming the published build is current.");
382
+ }
383
+ if (checks.deployment_posture === "warn") {
384
+ remediation.push(deploymentPostureRemediation(posture));
385
+ }
386
+ const includeSummary = Boolean(errorSummary) && status.outbox.counts.dead_letter > 0;
195
387
  return {
196
388
  ...status,
197
389
  checks,
198
- status: checks.outbox_failures === "fail"
199
- ? "critical"
200
- : checks.expired_leases === "warn" || checks.outbox_db === "missing"
201
- ? "warning"
202
- : "ok",
390
+ ...(includeSummary && errorSummary ? { dead_letter_error_summary: errorSummary } : {}),
391
+ ...(remediation.length > 0 ? { remediation } : {}),
392
+ status: doctorSeverityForChecks(checks),
203
393
  };
204
394
  }
395
+ function deploymentPostureRemediation(posture) {
396
+ const parts = [];
397
+ if (posture.kind === "repo_dist_override") {
398
+ parts.push(`This collector resolves to a repo \`dist/\` override (${posture.location_hint}), ` +
399
+ "not a published package — treat its output as dev evidence, not published " +
400
+ "operator-host evidence.");
401
+ }
402
+ if (posture.is_placeholder_version) {
403
+ parts.push(`The reported version is the \`${posture.version}\` placeholder, which is older than ` +
404
+ "every real build (left over from the npm bootstrap; upgrade to the published release).");
405
+ }
406
+ parts.push("Pin a published version before capturing operator-host evidence: " +
407
+ "`npm i -g @pdpp/local-collector` (or an explicit pinned `@<version>`). " +
408
+ "The published build can lag the repo build, so confirm it carries the " +
409
+ "fixes you need before re-pinning — `pnpm release:dist-tag-check` (release " +
410
+ "owner) reports the live dist-tag posture; a `repo_dist_override` that is " +
411
+ "ahead of the published build is dev evidence, not a build to downgrade to. " +
412
+ "See docs/local-collector.md §\"Deployment Posture: Published vs Dev\".");
413
+ return parts.join(" ");
414
+ }
415
+ function doctorSeverityForChecks(checks) {
416
+ if (checks.outbox_failures === "fail") {
417
+ return "critical";
418
+ }
419
+ if (checks.expired_leases === "warn" ||
420
+ checks.outbox_db === "missing" ||
421
+ checks.coverage_diagnostics === "warn" ||
422
+ checks.deployment_posture === "warn") {
423
+ return "warning";
424
+ }
425
+ return "ok";
426
+ }
427
+ export function readLocalOutboxDeadLetterErrorSummary(options) {
428
+ const dbPath = resolveOutboxPath(options);
429
+ if (!existsSync(dbPath)) {
430
+ return null;
431
+ }
432
+ const outbox = new LocalDeviceOutbox({ path: dbPath });
433
+ try {
434
+ const summary = outbox.deadLetterErrorSummary(options.sourceInstanceId ? { sourceInstanceId: options.sourceInstanceId } : {});
435
+ return summary.dead_letter_count > 0 ? summary : null;
436
+ }
437
+ finally {
438
+ outbox.close();
439
+ }
440
+ }
441
+ const RETRY_DEAD_LETTERS_NO_MATCH_NOTE = "No dead-letter rows matched. If the dashboard shows this connection as " +
442
+ "blocked/stalled, that is a state-read block, not a dead-letter backlog — " +
443
+ "there is nothing to requeue. Recovery is to re-run the collector " +
444
+ "(`pdpp-local-collector run …`), which re-reads prior state and clears the block.";
445
+ function retryDeadLettersMatchNote(matched, dryRun) {
446
+ if (matched === 0) {
447
+ return RETRY_DEAD_LETTERS_NO_MATCH_NOTE;
448
+ }
449
+ const requeued = dryRun
450
+ ? `${matched} dead-letter row(s) would be requeued (dry run). Re-run with --apply to requeue (backs up the DB first), `
451
+ : `${matched} dead-letter row(s) matched and were requeued to pending. `;
452
+ return `${requeued}then re-run the collector (\`pdpp-local-collector run …\`) to drain them — requeue moves rows to pending, it does not ingest.`;
453
+ }
454
+ export function retryLocalOutboxDeadLetters(options) {
455
+ const dbPath = resolveOutboxPath(options);
456
+ const exists = existsSync(dbPath);
457
+ if (!exists) {
458
+ return {
459
+ backup_path: null,
460
+ db: { exists: false, path: dbPath },
461
+ dry_run: !options.apply,
462
+ filter: {
463
+ kind: options.deadLetterKind ?? null,
464
+ limit: options.limit ?? null,
465
+ source_instance_id: options.sourceInstanceId ?? null,
466
+ },
467
+ matched: 0,
468
+ note: retryDeadLettersMatchNote(0, !options.apply),
469
+ requeued: 0,
470
+ status_after: null,
471
+ status_before: null,
472
+ };
473
+ }
474
+ const outbox = new LocalDeviceOutbox({ path: dbPath });
475
+ try {
476
+ const statusBefore = summaryCounts(outbox.summary(options.sourceInstanceId ? { sourceInstanceId: options.sourceInstanceId } : {}));
477
+ const errorSummary = outbox.deadLetterErrorSummary(options.sourceInstanceId ? { sourceInstanceId: options.sourceInstanceId } : {});
478
+ const dryRun = !options.apply;
479
+ const backupPath = dryRun ? null : backupSqliteDb(outbox, dbPath, "retry-dead-letters");
480
+ const result = outbox.requeueDeadLetters({
481
+ dryRun,
482
+ ...(options.deadLetterKind ? { kind: options.deadLetterKind } : {}),
483
+ ...(options.limit ? { limit: options.limit } : {}),
484
+ ...(options.sourceInstanceId ? { sourceInstanceId: options.sourceInstanceId } : {}),
485
+ });
486
+ const statusAfter = summaryCounts(outbox.summary(options.sourceInstanceId ? { sourceInstanceId: options.sourceInstanceId } : {}));
487
+ return {
488
+ backup_path: backupPath,
489
+ db: { exists: true, path: dbPath },
490
+ ...(errorSummary.dead_letter_count > 0 ? { dead_letter_error_summary: errorSummary } : {}),
491
+ dry_run: dryRun,
492
+ filter: {
493
+ kind: options.deadLetterKind ?? null,
494
+ limit: options.limit ?? null,
495
+ source_instance_id: options.sourceInstanceId ?? null,
496
+ },
497
+ matched: result.matched,
498
+ note: retryDeadLettersMatchNote(result.matched, dryRun),
499
+ requeued: result.requeued,
500
+ status_after: statusAfter,
501
+ status_before: statusBefore,
502
+ };
503
+ }
504
+ finally {
505
+ outbox.close();
506
+ }
507
+ }
508
+ function summaryCounts(summary) {
509
+ return {
510
+ dead_letter: summary.deadLetter,
511
+ leased: summary.leased,
512
+ pending: summary.ready,
513
+ retrying: summary.retrying,
514
+ sent: summary.succeeded,
515
+ total: summary.total,
516
+ };
517
+ }
518
+ const DEFAULT_PRUNE_SENT_OLDER_THAN_DAYS = 30;
519
+ export function pruneSentOutboxRows(options) {
520
+ const olderThanDays = options.olderThanDays ?? (options.keepCount === undefined ? DEFAULT_PRUNE_SENT_OLDER_THAN_DAYS : undefined);
521
+ const olderThanIso = olderThanDays !== undefined ? daysAgoIso(olderThanDays) : undefined;
522
+ const dbPath = resolveOutboxPath(options);
523
+ const exists = existsSync(dbPath);
524
+ const reportedOlderThanDays = olderThanDays ?? null;
525
+ const reportedOlderThanIso = olderThanIso ?? null;
526
+ if (!exists) {
527
+ return {
528
+ backup_path: null,
529
+ db: { exists: false, path: dbPath },
530
+ dry_run: !options.apply,
531
+ filter: {
532
+ keep_count: options.keepCount ?? null,
533
+ older_than_days: reportedOlderThanDays,
534
+ older_than_iso: reportedOlderThanIso,
535
+ source_instance_id: options.sourceInstanceId ?? null,
536
+ },
537
+ matched: 0,
538
+ note: "Outbox DB does not exist; nothing to prune.",
539
+ pruned: 0,
540
+ status_after: null,
541
+ status_before: null,
542
+ };
543
+ }
544
+ const outbox = new LocalDeviceOutbox({ path: dbPath });
545
+ try {
546
+ const statusBefore = summaryCounts(outbox.summary(options.sourceInstanceId ? { sourceInstanceId: options.sourceInstanceId } : {}));
547
+ const dryRun = !options.apply;
548
+ const pruneInput = {
549
+ dryRun,
550
+ ...(olderThanIso !== undefined ? { olderThanIso } : {}),
551
+ ...(options.keepCount !== undefined ? { keepCount: options.keepCount } : {}),
552
+ ...(options.sourceInstanceId ? { sourceInstanceId: options.sourceInstanceId } : {}),
553
+ };
554
+ const backupPath = dryRun ? null : backupSqliteDb(outbox, dbPath, "prune-sent");
555
+ const result = outbox.pruneSent(pruneInput);
556
+ const statusAfter = summaryCounts(outbox.summary(options.sourceInstanceId ? { sourceInstanceId: options.sourceInstanceId } : {}));
557
+ const note = pruneSentNote(result, dryRun, reportedOlderThanDays, options.keepCount);
558
+ return {
559
+ backup_path: backupPath,
560
+ db: { exists: true, path: dbPath },
561
+ dry_run: dryRun,
562
+ filter: {
563
+ keep_count: options.keepCount ?? null,
564
+ older_than_days: reportedOlderThanDays,
565
+ older_than_iso: reportedOlderThanIso,
566
+ source_instance_id: options.sourceInstanceId ?? null,
567
+ },
568
+ matched: result.matched,
569
+ note,
570
+ pruned: result.pruned,
571
+ status_after: statusAfter,
572
+ status_before: statusBefore,
573
+ };
574
+ }
575
+ finally {
576
+ outbox.close();
577
+ }
578
+ }
579
+ function pruneSentNote(result, dryRun, olderThanDays, keepCount) {
580
+ if (result.matched === 0) {
581
+ return `No sent rows matched the retention policy (${pruneSentPolicyDescription(olderThanDays, keepCount)}). Nothing to prune.`;
582
+ }
583
+ if (dryRun) {
584
+ return (`${result.matched} sent row(s) would be pruned (dry run). ` +
585
+ `Re-run with --apply to delete (backs up the DB first). ` +
586
+ `This only removes sent rows — pending, leased, retrying, and dead-letter rows are never touched.`);
587
+ }
588
+ return (`${result.pruned} sent row(s) pruned. ` +
589
+ `Pending, leased, retrying, and dead-letter rows were not touched. ` +
590
+ `Run \`pdpp-local-collector status\` to confirm the new outbox size.`);
591
+ }
592
+ function pruneSentPolicyDescription(olderThanDays, keepCount) {
593
+ const parts = [];
594
+ if (olderThanDays !== null) {
595
+ parts.push(`older than ${olderThanDays} days`);
596
+ }
597
+ if (keepCount !== undefined) {
598
+ parts.push(`keep-count ${keepCount}`);
599
+ }
600
+ return parts.length > 0 ? parts.join(", ") : "default sent-row retention";
601
+ }
602
+ function daysAgoIso(days) {
603
+ const ms = days * 24 * 60 * 60 * 1000;
604
+ return new Date(Date.now() - ms).toISOString();
605
+ }
606
+ export function compactOutbox(options) {
607
+ const dbPath = resolveOutboxPath(options);
608
+ const exists = existsSync(dbPath);
609
+ const dryRun = !options.apply;
610
+ if (!exists) {
611
+ return {
612
+ backup_path: null,
613
+ compacted: null,
614
+ db: { exists: false, path: dbPath },
615
+ dry_run: dryRun,
616
+ note: "Outbox DB does not exist; nothing to compact.",
617
+ non_succeeded_rows: 0,
618
+ page_stats: null,
619
+ reclaimed_bytes: 0,
620
+ refused: false,
621
+ };
622
+ }
623
+ const outbox = new LocalDeviceOutbox({ path: dbPath });
624
+ try {
625
+ const pageStats = outbox.pageStats();
626
+ const nonSucceeded = outbox.countNonSucceeded();
627
+ if (dryRun) {
628
+ return {
629
+ backup_path: null,
630
+ compacted: null,
631
+ db: { exists: true, path: dbPath },
632
+ dry_run: true,
633
+ note: compactDryRunNote(pageStats, nonSucceeded, Boolean(options.force)),
634
+ non_succeeded_rows: nonSucceeded,
635
+ page_stats: pageStats,
636
+ reclaimed_bytes: 0,
637
+ refused: false,
638
+ };
639
+ }
640
+ if (nonSucceeded > 0 && !options.force) {
641
+ return {
642
+ backup_path: null,
643
+ compacted: null,
644
+ db: { exists: true, path: dbPath },
645
+ dry_run: false,
646
+ note: `Refusing to compact: ${nonSucceeded} non-succeeded (ready/leased/dead-letter) row(s) are still in the outbox. ` +
647
+ "Drain the lane first (`pdpp-local-collector run …`, then `retry-dead-letters --apply` for any dead-letter rows), " +
648
+ "or pass --force to compact anyway. VACUUM is lossless — unsent rows are copied, never dropped — but compacting a " +
649
+ "live lane is refused by default so the reclaim runs on a quiet outbox.",
650
+ non_succeeded_rows: nonSucceeded,
651
+ page_stats: pageStats,
652
+ reclaimed_bytes: 0,
653
+ refused: true,
654
+ };
655
+ }
656
+ const backupPath = backupSqliteDb(outbox, dbPath, "compact");
657
+ const result = outbox.compact();
658
+ return {
659
+ backup_path: backupPath,
660
+ compacted: result.after,
661
+ db: { exists: true, path: dbPath },
662
+ dry_run: false,
663
+ note: compactAppliedNote(result, nonSucceeded, Boolean(options.force)),
664
+ non_succeeded_rows: nonSucceeded,
665
+ page_stats: result.before,
666
+ reclaimed_bytes: result.reclaimedBytes,
667
+ refused: false,
668
+ };
669
+ }
670
+ finally {
671
+ outbox.close();
672
+ }
673
+ }
674
+ function compactDryRunNote(stats, nonSucceeded, force) {
675
+ const reclaimMb = (stats.reclaimableBytes / (1024 * 1024)).toFixed(1);
676
+ if (stats.reclaimableBytes === 0) {
677
+ return "The outbox has no reclaimable free pages; a compact would return ~0 bytes. Nothing to do.";
678
+ }
679
+ const base = `~${reclaimMb} MiB of free pages can be returned to the filesystem (${stats.freelistPages} of ${stats.pageCount} pages). ` +
680
+ "Re-run with --apply to rebuild the DB in place (backs up the DB first).";
681
+ if (nonSucceeded > 0 && !force) {
682
+ return (`${base} NOTE: ${nonSucceeded} non-succeeded (unsent) row(s) are present, so --apply will be refused unless you ` +
683
+ "drain the lane first or pass --force. VACUUM never drops unsent rows; the refusal just keeps the reclaim on a quiet outbox.");
684
+ }
685
+ return base;
686
+ }
687
+ function compactAppliedNote(result, nonSucceeded, force) {
688
+ const reclaimedMb = (result.reclaimedBytes / (1024 * 1024)).toFixed(1);
689
+ const forcedNote = nonSucceeded > 0 && force
690
+ ? ` Compacted with --force while ${nonSucceeded} non-succeeded row(s) were present; VACUUM copied them losslessly.`
691
+ : "";
692
+ return (`Compacted: ~${reclaimedMb} MiB returned to the filesystem ` +
693
+ `(${result.before.pageCount} → ${result.after.pageCount} pages).${forcedNote} ` +
694
+ "Run `pdpp-local-collector status` to confirm the new outbox size.");
695
+ }
696
+ function backupSqliteDb(outbox, dbPath, label) {
697
+ const stamp = new Date().toISOString().replace(/[:.]/g, "-");
698
+ const backupPath = `${dbPath}.pre-${label}-${stamp}.bak`;
699
+ outbox.backupTo(backupPath);
700
+ return backupPath;
701
+ }
205
702
  export function buildConnectorSpec(options) {
206
703
  if (!options.connector) {
207
704
  throw new CollectorUsageError("connector required");
@@ -237,8 +734,15 @@ export function parseArgs(args) {
237
734
  process.stdout.write(HELP_TEXT);
238
735
  process.exit(0);
239
736
  }
240
- if (command !== "enroll" && command !== "run" && command !== "advertise" && command !== "status" && command !== "doctor") {
241
- throw new CollectorUsageError(`usage: pdpp-local-collector <enroll|run|advertise|status|doctor> --base-url <url> [options]`);
737
+ if (command !== "enroll" &&
738
+ command !== "run" &&
739
+ command !== "advertise" &&
740
+ command !== "status" &&
741
+ command !== "doctor" &&
742
+ command !== "retry-dead-letters" &&
743
+ command !== "prune-sent" &&
744
+ command !== "compact") {
745
+ throw new CollectorUsageError(`usage: pdpp-local-collector <enroll|run|advertise|status|doctor|retry-dead-letters|prune-sent|compact> --base-url <url> [options]`);
242
746
  }
243
747
  const options = {
244
748
  baseUrl: process.env.PDPP_REFERENCE_BASE_URL ?? "http://127.0.0.1:7662",
@@ -268,12 +772,26 @@ export function parseArgs(args) {
268
772
  if (!arg) {
269
773
  throw new CollectorUsageError("missing option");
270
774
  }
775
+ if (applyFlagOption(options, arg)) {
776
+ continue;
777
+ }
271
778
  const value = rest[index + 1];
272
779
  applyOption(options, arg, value);
273
780
  index++;
274
781
  }
275
782
  return options;
276
783
  }
784
+ function applyFlagOption(options, arg) {
785
+ if (arg === "--apply") {
786
+ options.apply = true;
787
+ return true;
788
+ }
789
+ if (arg === "--force") {
790
+ options.force = true;
791
+ return true;
792
+ }
793
+ return false;
794
+ }
277
795
  function applyOption(options, arg, value) {
278
796
  if (!value) {
279
797
  throw new CollectorUsageError(`missing option value: ${arg}`);
@@ -300,6 +818,12 @@ function applyOption(options, arg, value) {
300
818
  "--device-token": (next) => {
301
819
  options.deviceToken = next;
302
820
  },
821
+ "--kind": (next) => {
822
+ options.deadLetterKind = parseOutboxKind(next);
823
+ },
824
+ "--limit": (next) => {
825
+ options.limit = parsePositiveInteger("--limit", next);
826
+ },
303
827
  "--queue": (next) => {
304
828
  options.queuePath = next;
305
829
  },
@@ -321,6 +845,12 @@ function applyOption(options, arg, value) {
321
845
  "--args": (next) => {
322
846
  options.args = next.split(" ").filter(Boolean);
323
847
  },
848
+ "--older-than-days": (next) => {
849
+ options.olderThanDays = parseNonNegativeInteger("--older-than-days", next);
850
+ },
851
+ "--keep-count": (next) => {
852
+ options.keepCount = parseNonNegativeInteger("--keep-count", next);
853
+ },
324
854
  };
325
855
  const set = setters[arg];
326
856
  if (!set) {
@@ -328,6 +858,26 @@ function applyOption(options, arg, value) {
328
858
  }
329
859
  set(value);
330
860
  }
861
+ function parseOutboxKind(value) {
862
+ if (value === "record_batch" || value === "checkpoint" || value === "gap" || value === "blob_upload") {
863
+ return value;
864
+ }
865
+ throw new CollectorUsageError(`invalid --kind: ${value}`);
866
+ }
867
+ function parsePositiveInteger(label, value) {
868
+ const parsed = Number(value);
869
+ if (!Number.isSafeInteger(parsed) || parsed <= 0) {
870
+ throw new CollectorUsageError(`${label} must be a positive integer`);
871
+ }
872
+ return parsed;
873
+ }
874
+ function parseNonNegativeInteger(label, value) {
875
+ const parsed = Number(value);
876
+ if (!Number.isSafeInteger(parsed) || parsed < 0) {
877
+ throw new CollectorUsageError(`${label} must be a non-negative integer`);
878
+ }
879
+ return parsed;
880
+ }
331
881
  function parseCsv(value) {
332
882
  return value
333
883
  .split(",")
@@ -347,10 +897,18 @@ function resolveOutboxPath(options) {
347
897
  ? scopedDefaultQueuePath(options.queuePath, DEFAULT_QUEUE_PATH, options.sourceInstanceId)
348
898
  : options.queuePath;
349
899
  }
350
- function readOutboxSummary(path, sourceInstanceId) {
900
+ function readOutboxInspection(path, sourceInstanceId) {
351
901
  const outbox = new LocalDeviceOutbox({ path });
352
902
  try {
353
- return outbox.summary(sourceInstanceId ? { sourceInstanceId } : {});
903
+ const summary = outbox.summary(sourceInstanceId ? { sourceInstanceId } : {});
904
+ if (!sourceInstanceId) {
905
+ return { coverageObserved: null, recordBatchCount: 0, summary };
906
+ }
907
+ return {
908
+ coverageObserved: outbox.hasObservedStream({ sourceInstanceId, stream: COVERAGE_DIAGNOSTICS_STREAM }),
909
+ recordBatchCount: outbox.countRecordBatches({ sourceInstanceId }),
910
+ summary,
911
+ };
354
912
  }
355
913
  finally {
356
914
  outbox.close();