npm - ushman-ledger - Versions diffs - 1.2.1 → 1.3.0 - Mend

ushman-ledger 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/AGENTS.md +7 -5
package/ARCHITECTURE.md +85 -0
package/CHANGELOG.md +11 -0
package/README.md +114 -5
package/TROUBLESHOOTING.md +184 -0
package/dist/blobs.d.ts +3 -0
package/dist/blobs.d.ts.map +1 -1
package/dist/blobs.js +41 -15
package/dist/builders.d.ts +33 -0
package/dist/builders.d.ts.map +1 -1
package/dist/builders.js +10 -1
package/dist/cli.d.ts.map +1 -1
package/dist/cli.js +176 -59
package/dist/coverage.d.ts.map +1 -1
package/dist/coverage.js +3 -2
package/dist/doctor.d.ts +17 -4
package/dist/doctor.d.ts.map +1 -1
package/dist/doctor.js +263 -62
package/dist/handle.d.ts.map +1 -1
package/dist/handle.js +67 -30
package/dist/helpers.d.ts +1 -0
package/dist/helpers.d.ts.map +1 -1
package/dist/helpers.js +23 -0
package/dist/index.d.ts +4 -2
package/dist/index.d.ts.map +1 -1
package/dist/index.js +4 -2
package/dist/list.d.ts +34 -1
package/dist/list.d.ts.map +1 -1
package/dist/list.js +19 -9
package/dist/patch-resolver.d.ts.map +1 -1
package/dist/patch-resolver.js +193 -53
package/dist/process.d.ts +2 -0
package/dist/process.d.ts.map +1 -0
package/dist/process.js +16 -0
package/dist/read-index.d.ts +7 -7
package/dist/read-index.d.ts.map +1 -1
package/dist/read-index.js +18 -13
package/dist/record.js +2 -2
package/dist/recovery.d.ts +8 -0
package/dist/recovery.d.ts.map +1 -1
package/dist/recovery.js +142 -30
package/dist/render/retro.d.ts.map +1 -1
package/dist/render/retro.js +4 -1
package/dist/runtime-config.d.ts +14 -0
package/dist/runtime-config.d.ts.map +1 -0
package/dist/runtime-config.js +97 -0
package/dist/schema/entry-core.d.ts +5 -2
package/dist/schema/entry-core.d.ts.map +1 -1
package/dist/schema/entry-core.js +3 -0
package/dist/schema/entry-read.d.ts +57 -0
package/dist/schema/entry-read.d.ts.map +1 -1
package/dist/schema/entry-read.js +9 -1
package/dist/schema/entry-write.d.ts +51 -0
package/dist/schema/entry-write.d.ts.map +1 -1
package/dist/schema/entry-write.js +9 -1
package/dist/storage/filesystem.d.ts +15 -2
package/dist/storage/filesystem.d.ts.map +1 -1
package/dist/storage/filesystem.js +234 -37
package/dist/storage/lock.d.ts.map +1 -1
package/dist/storage/lock.js +38 -16
package/dist/text-lines.d.ts +8 -0
package/dist/text-lines.d.ts.map +1 -0
package/dist/text-lines.js +20 -0
package/dist/version.d.ts +1 -1
package/dist/version.d.ts.map +1 -1
package/dist/version.js +2 -1
package/package.json +4 -2

package/AGENTS.md CHANGED Viewed

@@ -29,11 +29,13 @@ An append-only ledger library and CLI for ushman v4 workspaces. It owns ledger s
 5. `src/schema/entry-migrations.ts`
 6. `src/schema/entry.ts`
 7. `src/storage/filesystem.ts`
-8. `src/record.ts`
-9. `src/handle.ts`
-10. `src/coverage.ts`
-11. `src/doctor.ts`
-12. `src/cli.ts`
+8. `src/recovery.ts`
+9. `src/record.ts`
+10. `src/handle.ts`
+11. `src/runtime-config.ts`
+12. `src/coverage.ts`
+13. `src/doctor.ts`
+14. `src/cli.ts`
 ## Commands

package/ARCHITECTURE.md ADDED Viewed

@@ -0,0 +1,85 @@
+# ushman-ledger architecture
+This package owns append-only ledger persistence for ushman v4 workspaces. It does not orchestrate pipelines or act as a general database layer.
+## Core flow
+1. `openLedger()` validates the workspace and reconciles any pending crash-recovery state.
+2. `record()` normalizes the input, resolves any patch/blob payload, and acquires the manifest lock.
+3. A pending commit journal is written before the entry file, manifest update, and read-index update diverge.
+4. The entry file is written atomically into the phase directory.
+5. `manifest.json` is updated atomically.
+6. `read-index.json` is updated atomically.
+7. The pending journal is removed only after the manifest and read index are durable.
+The same reconciliation path is used by reads, writes, coverage, doctor, render, and archive entrypoints so callers do not need a separate recovery command.
+## Storage model
+`<workspace>/.lab/ledger/`
+- `manifest.json`: append-order metadata, per-phase pointers, idempotency index, and archive metadata.
+- `read-index.json`: durable lightweight scan index used by list/render/coverage/doctor.
+- `external-temp-files/`: journals for custom render temp files created outside `.lab/ledger`.
+- `pending/`: append journals used to replay incomplete commits.
+- `pending-quarantine/`: unreplayable pending commit journals plus quarantine metadata.
+- `pending-archives/`: archive journals used to adopt or discard incomplete archive writes.
+- `blobs/`: content-addressed patch payloads keyed by SHA-256.
+- `<phase>/`: append-only entry files per ledger phase.
+## Append semantics
+- Entries are immutable once written. Corrections use `correction` records linked to the original entry.
+- Each phase maintains its own `prevEntryId` chain so local append history is explicit and auditable.
+- Idempotency is logical-content based by default and can be overridden with an explicit `idempotencyKey`.
+- `stage-write` records treat `filePath` as the required primary covered artifact and may extend coverage with extra normalized `links.affectedFiles`.
+- Patch payloads are de-duplicated by SHA-256 and stored once under `blobs/`.
+## Read path
+- `list`, render, coverage, and doctor iterate entries in manifest sequence order.
+- The durable read index avoids resorting `manifest.entryLocations` on every scan.
+- Limited reads filter from the read index first, then materialize only the last matching entries from disk.
+- Multiple readers can run concurrently. Writers serialize only around manifest mutation and recovery work that must observe a consistent durable state.
+## Recovery model
+- Atomic writes use temp files and rename.
+- Pending commit journals record the target sequence before the entry file and manifest can drift.
+- Startup reconciliation replays pending commits in manifest order, quarantines unreplayable pending commits, rebuilds missing or stale read indexes, adopts verified archives, and cleans stale temp files.
+- Manifest locks are reclaimed after stale-owner or dead-owner checks, so a newer owner is not deleted during handoff.
+- Re-opening the ledger or rerunning any CLI command triggers the same recovery path; there is no separate manual recovery subcommand.
+## Scale and tuning
+The default scan contract is intentionally conservative and can be tuned with environment variables:
+- `USHMAN_LEDGER_SCAN_BATCH_SIZE`
+- `USHMAN_LEDGER_SCAN_CONCURRENCY`
+- `USHMAN_LEDGER_READ_INDEX_REBUILD_BATCH_SIZE`
+- `USHMAN_LEDGER_READ_INDEX_REBUILD_CONCURRENCY`
+- `USHMAN_LEDGER_COVERAGE_FILE_STAT_CONCURRENCY`
+- `USHMAN_LEDGER_BLOB_HASH_CONCURRENCY`
+- `USHMAN_LEDGER_DOCTOR_CHECKPOINT_MAX_AGE_MS`
+- `USHMAN_LEDGER_DOCTOR_OPEN_ISSUE_MAX_AGE_MS`
+- `USHMAN_LEDGER_MAX_PATCH_BYTES`
+The benchmark entrypoint, `bun run bench:scale`, prints the active runtime config so measurements can be compared across different knob settings.
+## Operational ceilings
+- Manifest sequence numbers stop at `Number.MAX_SAFE_INTEGER`.
+- Patch/blob ingestion defaults to a 10 MiB ceiling unless `USHMAN_LEDGER_MAX_PATCH_BYTES` is raised.
+- Git diff capture is the only external-binary dependency and defaults to a 30 second timeout plus a 10 MiB stdout buffer.
+## Contributor map
+- `src/blobs.ts`: patch blob storage, size limits, and digest/path validation.
+- `src/storage/lock.ts`: manifest lock acquisition, stale-owner reclaim, and reclaim-marker turnover.
+- `src/record.ts`: append pipeline and idempotency behavior.
+- `src/recovery.ts`: pending journal replay, read-index rebuild, and temp cleanup.
+- `src/read-index.ts`: durable scan index construction and maintenance.
+- `src/list.ts`: manifest/read-index iteration logic.
+- `src/coverage.ts`: candidate file scan plus read-index-backed coverage calculation.
+- `src/doctor.ts`: integrity checks plus troubleshooting-oriented findings.
+- `src/cli.ts`: CLI argument handling and automation-facing output.

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,16 @@
 # Changelog
+## [1.2.0] - 2026-05-28
+- Added `stage-write` ledger records for deterministic pipeline-owned candidate writes.
+- `stage-write` now accepts `intake` alongside the later candidate-mutating stage ids so intake-owned writes such as auto-stubs and workspace runbooks can be recorded without falling back to patch semantics.
+- Read-index coverage now treats `stage-write.filePath` the same as patch `affectedFiles`, so downstream coverage checks can distinguish stage output from operator edits without losing coverage accounting.
+- Retro rendering now includes `stage-write` entries in the tool/action stream.
+- Custom render outputs now journal their external temp files so startup recovery can clean stale crash leftovers outside `.lab/ledger`.
+- Lock recovery now reclaims locks immediately when the recorded owner PID is no longer alive.
+- Pending commit recovery now quarantines unreplayable journals and reports them through `doctor` instead of failing the whole reconciliation pass.
+- Doctor stale-age windows for pre-change checkpoints and open issues are configurable through runtime environment variables.
 ## [1.1.0] - 2026-05-23
 - Added `change-log` records, narrative note subkinds, and `migration-log-md` / `workspace-narrative-md` render targets.

package/README.md CHANGED Viewed

@@ -33,8 +33,10 @@ import {
   appendSemanticCleanupSummaryNote,
   type BuildRecordInput,
   buildChangeLogRecord,
+  buildStageWriteRecord,
   buildValidatorResultRecord,
   deriveFilesChangedFromPatch,
+  getLedgerRuntimeConfig,
   openLedger,
 } from 'ushman-ledger';
@@ -74,6 +76,17 @@ await ledger.record(
   }),
 );
+await ledger.record(
+  buildStageWriteRecord({
+    emitter: { tool: 'ushman-seed', version: '1.0.0' },
+    filePath: 'src/generated/candidate.ts',
+    phase: 'seed',
+    rationale: 'Seed stage wrote the initial candidate artifact.',
+    stage: 'seed',
+    summary: 'seed candidate output',
+  }),
+);
 await ledger.record(
   buildValidatorResultRecord({
     emitter: { tool: 'ushman-doctor', version: '1.0.0' },
@@ -100,15 +113,32 @@ await ledger.render({ to: 'retro' });
 await ledger.render({ to: 'migration-log-md' });
 await ledger.renderTo({ to: 'migration-log-md', out: '/tmp/migration-log.md' });
 await ledger.archive('/tmp/ledger.tgz');
+const config = getLedgerRuntimeConfig();
+console.log(`Using scan concurrency ${config.scanConcurrency}`);
 ```
 Advanced builder wrappers can reuse the exported `BuildRecordInput<T>` helper type when they want the same input contract the built-in builders accept.
+Example:
+```ts
+import type { BuildRecordInput, LedgerRecord } from 'ushman-ledger';
+type RuntimeEventInput = BuildRecordInput<Extract<LedgerRecord, { kind: 'runtime-event' }>>;
+const buildRuntimeEventRecord = (input: RuntimeEventInput) => ({
+  ...input,
+  kind: 'runtime-event' as const,
+});
+```
 ## CLI
 ```bash
 ushman-ledger record --workspace=<ws> --kind=tool-invocation --phase=capture --summary="capture started"
 ushman-ledger record --workspace=<ws> --kind=agent-patch --phase=cleanup --summary="capture git diff" --rationale="track working tree change" --diff-from-git=HEAD
+ushman-ledger record --workspace=<ws> --kind=stage-write --phase=seed --summary="seed output" --rationale="pipeline wrote candidate" --file-path=src/generated/candidate.ts --stage=seed
 ushman-ledger record --workspace=<ws> --kind=change-log --subkind=smoke --phase=cleanup --summary="scope git diff" --diff-from-git=HEAD --git-paths=src/main.ts,src/cli.ts --git-diff-timeout-ms=10000 --git-diff-max-buffer-bytes=20971520
 ushman-ledger record --workspace=<ws> --kind=operator-decision --phase=cleanup --summary="manual override" --action=ledger-hand-edit --check-id=manual-review --rationale="operator edited the ledger after audit"
 ushman-ledger record --workspace=<ws> --kind=change-log --subkind=semantic-cleanup --phase=cleanup --summary="split schema modules" --diff=/tmp/change.patch --hypothesis="smaller schema modules keep the public API stable" --commands=$'bun test\nbun run typecheck' --smoke-result=pass --parity-status=green --rollback-plan="revert the schema split"
@@ -122,12 +152,15 @@ ushman-ledger render --workspace=<ws> --to=jsonl --out=/tmp/ledger.jsonl
 ushman-ledger render --workspace=<ws> --to=dependency-graph --out=/tmp/ledger.mmd
 ushman-ledger archive --workspace=<ws> --out=/tmp/ledger.tgz
 ushman-ledger doctor --workspace=<ws>
+ushman-ledger doctor --workspace=<ws> --json
 ```
-Valid record kinds: `tool-invocation`, `agent-patch`, `operator-patch`, `operator-decision`, `validator-result`, `runtime-event`, `note`, `correction`, `strip-decision-reverted`, `change-log`
+Valid record kinds: `tool-invocation`, `stage-write`, `agent-patch`, `operator-patch`, `operator-decision`, `validator-result`, `runtime-event`, `note`, `correction`, `strip-decision-reverted`, `change-log`
 Valid phases: `capture`, `intake`, `seed`, `vendor-extract`, `cleanup`, `parity`, `characterize`, `equiv`, `analyze`, `recover`, `ship`, `migration`
+Valid `stage-write` stages: `seed`, `vendor-extract`, `cleanup`, `candidate-promotion`
 Valid note subkinds: `regression`, `automation`, `retro`, `operator`, `tooling-gap`, `cleanup-wave`, `verified-flow`, `open-issue`, `decomposition-wave`, `semantic-cleanup-summary`
 Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `migration-log-md`, `workspace-narrative-md`
@@ -166,14 +199,16 @@ Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `mi
 - `operator-decision` entries capture `{ action, checkId?, rationale }` in `payload`. `action` is one of `bypass-doctor`, `skip-check`, `override-strip-decision`, `override-ship-state`, `manual-parity-assertion`, `ledger-hand-edit`, `escalation`.
 - `strip-decision-reverted` entries capture `{ stripDecisionId, rationale, invalidatedStages? }`.
 - `correction` entries require `links.correctsLedgerId` pointing at the entry they correct.
+- `stage-write` entries capture `{ filePath, rationale, stage }` for deterministic pipeline-owned writes. `phase` records where the ledger event happened; `stage` records the pipeline stage that produced the artifact.
 - Patch entries (`agent-patch`, `operator-patch`) store a `diff` blob reference and a `rationale`. Patch text is stored once under `.lab/ledger/blobs/` and shared by hash.
 ## Links and coverage
 - `links.affectedFiles` should contain normalized workspace-relative paths for files changed by an `agent-patch` or `operator-patch`.
+- `stage-write.filePath` is always treated as covered output. If one stage write should cover additional normalized paths, include them in `links.affectedFiles`; coverage merges both sources.
 - Use forward slashes, do not prefix paths with `./`, and do not include trailing slashes or `..` segments.
 - Coverage only considers candidate workspace files modified after workspace initialization.
-- A modified file is considered covered when any patch entry lists it in `links.affectedFiles`.
+- A modified file is considered covered when a patch entry lists it in `links.affectedFiles` or a `stage-write` entry names it through `filePath`/`links.affectedFiles`.
 - Coverage is backed by a durable read index so repeated coverage runs do not rescan every patch entry.
 ## Append chain and concurrency
@@ -182,21 +217,22 @@ Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `mi
 - Manifest updates are serialized through a global ledger lock.
 - Appends use a pending-commit journal so entry files, the manifest, and the durable read index can be replayed after crashes.
 - Open, read, and append paths reconcile unfinished commits before serving ledger data.
-- Corrupt or stale manifest locks are reclaimed automatically with compare-and-swap style quarantine semantics.
+- Corrupt, stale, or dead-owner manifest locks are reclaimed automatically with compare-and-swap style quarantine semantics.
 ## Crash recovery
 - Pending commit journals live under `.lab/ledger/pending/`.
+- Pending commit journals that cannot be replayed are moved to `.lab/ledger/pending-quarantine/` and surfaced by `doctor`.
 - Pending archive journals live under `.lab/ledger/pending-archives/`.
 - Startup reconciliation replays journaled appends in manifest sequence order.
 - Startup reconciliation also rebuilds a missing or stale `read-index.json` from manifest sequence order.
 - Verified pending archives are adopted into the manifest on startup; corrupt or partial pending archives are deleted.
-- Stale temp files are cleaned for phase entries, the manifest, blobs, the read index, and render outputs.
+- Stale temp files are cleaned for phase entries, the manifest, blobs, the read index, canonical render outputs, and tracked custom render outputs.
 ## Recovery & Troubleshooting
 - `.lab/ledger/pending/` contains append journals that let the ledger recover incomplete writes after crashes. Do not edit these files by hand.
-- If startup or `doctor` reports a pending commit mismatch, re-open the ledger or rerun the command first so reconciliation can replay or discard the journal safely.
+- If startup moves a pending commit to `.lab/ledger/pending-quarantine/`, inspect the `doctor` finding and restore the journal only when its entry, sequence, and manifest base are known to be safe.
 - If `doctor` reports manifest or blob corruption, fix the underlying entry/blob mismatch first and rerun `doctor` before attempting `archive`.
 ## Scan behavior
@@ -218,9 +254,11 @@ Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `mi
 - `--diff-from-git=<ref>` runs `git diff <ref>` against the workspace working tree.
 - `--git-paths=<csv>` safely scopes `git diff` to literal normalized workspace-relative paths. The CLI passes them after `--`, so pathspec magic and shell token splitting are intentionally not supported here.
+- `--git-paths=<csv>` and `--files-changed=<csv>` do not support literal commas inside individual file names.
 - `--git-diff-timeout-ms=<ms>` and `--git-diff-max-buffer-bytes=<bytes>` override the default 30 second timeout and 10 MiB stdout buffer for the git diff helper.
 - The CLI reports usage errors for missing `git`, timed out git diffs, invalid scoped paths, and buffer overflows instead of surfacing raw child-process failures.
 - This is the only CLI feature that depends on an external binary. `git` must be available in `PATH`.
+- `--from-stdin` accepts structured JSON records only. Raw patch text still needs `--diff=<file>` or `--diff-from-git=<ref>`.
 ## Storage shape
@@ -229,7 +267,9 @@ Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `mi
   .manifest.lock
   manifest.json
   read-index.json
+  external-temp-files/
   pending/
+  pending-quarantine/
   pending-archives/
   blobs/
   render.md
@@ -254,3 +294,72 @@ bun run bench:scale
 - `bun run bench:scale` creates a temporary workspace and benchmarks large-ledger paths for population, limited reads, repeated coverage, repeated doctor runs, and markdown render paths.
 - Override the defaults with environment variables such as `LEDGER_BENCH_ENTRY_COUNT=100000` and `LEDGER_BENCH_CANDIDATE_FILE_COUNT=10000`.
+- The benchmark prints the active ledger runtime tuning so large-ledger measurements can be tied back to the scan/rebuild settings that produced them.
+## Operational limits
+- The manifest sequence counter is capped at `Number.MAX_SAFE_INTEGER`. Appends fail before overflow so sequence ordering stays exact.
+- Patch/blob ingestion is capped at 10 MiB by default across `diffText`, `diffPath`, `storePatchBlob()`, and CLI git-diff capture. Raise `USHMAN_LEDGER_MAX_PATCH_BYTES` only when the larger diff size is intentional and operationally acceptable.
+- CLI git diff capture also defaults to a 30 second timeout and a 10 MiB stdout buffer. Use `--git-diff-timeout-ms` and `--git-diff-max-buffer-bytes` when a specific capture needs more headroom.
+## Runtime tuning
+- `USHMAN_LEDGER_SCAN_BATCH_SIZE`: batch size for list/render/doctor entry reads. Default `32`.
+- `USHMAN_LEDGER_SCAN_CONCURRENCY`: concurrent entry reads for list/render/doctor. Default `16`.
+- `USHMAN_LEDGER_READ_INDEX_REBUILD_BATCH_SIZE`: batch size used when rebuilding `read-index.json`. Defaults to `USHMAN_LEDGER_SCAN_BATCH_SIZE`.
+- `USHMAN_LEDGER_READ_INDEX_REBUILD_CONCURRENCY`: concurrent entry reads used while rebuilding `read-index.json`. Defaults to `USHMAN_LEDGER_SCAN_CONCURRENCY`.
+- `USHMAN_LEDGER_COVERAGE_FILE_STAT_CONCURRENCY`: concurrent `stat()` calls during coverage scans. Defaults to `USHMAN_LEDGER_SCAN_CONCURRENCY`.
+- `USHMAN_LEDGER_BLOB_HASH_CONCURRENCY`: concurrent blob integrity hashes during `doctor`. Defaults to `USHMAN_LEDGER_SCAN_CONCURRENCY`.
+- `USHMAN_LEDGER_DOCTOR_CHECKPOINT_MAX_AGE_MS`: stale window for `pre-change-checkpoint` follow-up findings. Default `86400000`.
+- `USHMAN_LEDGER_DOCTOR_OPEN_ISSUE_MAX_AGE_MS`: stale window for unresolved `open-issue` note findings. Default `2592000000`.
+- `USHMAN_LEDGER_MAX_PATCH_BYTES`: maximum accepted patch/blob input size in bytes. Default `10485760`.
+All runtime tuning values must be positive integers. Invalid values fail fast with an explicit error so automation does not silently run with an unexpected fallback.
+## Doctor output
+- `ushman-ledger doctor` prints human-oriented findings with a stable finding code and a remediation step for each issue.
+- `ushman-ledger doctor --json` returns a machine-readable envelope with `checkedAt`, `ok`, `issueCount`, `issues`, and structured `findings`.
+- Each JSON finding includes a stable `code`, a human `message`, and a `remediation` string intended for operators and CI surfaces.
+Example:
+```bash
+ushman-ledger doctor --workspace="$WS" --json | jq '.findings[] | { code, message, remediation }'
+```
+See [TROUBLESHOOTING.md](TROUBLESHOOTING.md) for the current finding codes and the expected recovery flow.
+## Advanced usage
+Use a stable `idempotencyKey` to bracket a multi-step cleanup wave, then stream the migration log without buffering the whole render in memory:
+```ts
+import {
+  buildChangeLogRecord,
+  openLedger,
+} from 'ushman-ledger';
+const ledger = await openLedger(workspaceRoot);
+await ledger.record(
+  buildChangeLogRecord({
+    emitter: { tool: 'ushman-cli', version: '1.0.0' },
+    idempotencyKey: 'cleanup-wave-7',
+    kind: 'change-log',
+    phase: 'cleanup',
+    subkind: 'pre-change-checkpoint',
+    summary: 'Wave 7 checkpoint',
+  }),
+);
+await ledger.renderTo({
+  to: 'migration-log-md',
+  write: (chunk) => process.stdout.write(chunk),
+});
+```
+## Further reading
+- [ARCHITECTURE.md](ARCHITECTURE.md)
+- [TROUBLESHOOTING.md](TROUBLESHOOTING.md)

package/TROUBLESHOOTING.md ADDED Viewed

@@ -0,0 +1,184 @@
+# ushman-ledger troubleshooting
+Start with:
+```bash
+ushman-ledger doctor --workspace="$WS"
+```
+For automation or CI:
+```bash
+ushman-ledger doctor --workspace="$WS" --json
+```
+The JSON report contains:
+- `ok`: overall health boolean
+- `issueCount`: number of findings
+- `issues`: legacy message-only list
+- `findings`: structured findings with `code`, `message`, `remediation`, and optional metadata
+The default human-readable output prints one finding per block:
+```text
+[finding-code] human-readable message
+Next step: remediation guidance
+```
+## Common findings
+### `manifest-entry-count-mismatch`
+Meaning: `manifest.json` disagrees with the number of durable entry files on disk.
+Action:
+- Re-open the ledger or rerun the failed command first so recovery can replay pending commits.
+- If the mismatch persists, compare `manifest.entryLocations` against the phase directories and repair the missing file or location.
+### `manifest-last-sequence-mismatch`
+Meaning: the manifest sequence counter no longer matches the durable append count.
+Action:
+- Finish recovery first.
+- If recovery is already clean, repair `manifest.lastSequence` so it matches the highest durable append sequence.
+### `manifest-entry-missing-on-disk`
+Meaning: the manifest references an entry id whose file is missing.
+Action:
+- Restore the missing entry file from a known-good copy, or repair the manifest location if it was edited incorrectly.
+- Do not archive until the manifest and disk agree.
+### `manifest-entry-location-missing`
+Meaning: an entry file exists on disk, but `manifest.entryLocations` has no location for that entry id.
+Action:
+- Repair `manifest.entryLocations` so the durable entry is reachable by phase and sequence.
+- Rerun `doctor` afterward to catch any follow-on sequence or latest-pointer drift.
+### `manifest-phase-mismatch`
+Meaning: an entry file exists, but the manifest points at the wrong phase.
+Action:
+- Restore the entry to the correct phase directory or repair `manifest.entryLocations`.
+### `manifest-per-phase-latest-mismatch`
+Meaning: `perPhaseLatest` does not point at the newest entry in that phase.
+Action:
+- Repair the manifest so the latest pointer matches the highest sequence entry in the phase.
+### `manifest-sequence-mismatch`
+Meaning: manifest sequence numbers are no longer contiguous or ordered.
+Action:
+- Repair `manifest.entryLocations` so sequence numbers are gap-free and monotonic.
+### `phase-prev-entry-mismatch`
+Meaning: an entry’s `prevEntryId` no longer matches the prior append in its phase chain.
+Action:
+- Restore the edited entry or repair the phase chain.
+- For content fixes, append a `correction` entry instead of rewriting history in place.
+### `blob-missing`
+Meaning: a patch/blob referenced by an entry is missing under `.lab/ledger/blobs/`.
+Action:
+- Restore the blob file or recreate the patch entry from the original diff.
+### `blob-corrupt`
+Meaning: the blob exists but its SHA-256 digest no longer matches the entry metadata.
+Action:
+- Restore the original blob content or recreate the patch entry from the original diff.
+### `blob-unreadable`
+Meaning: the blob exists but doctor could not read it to verify the digest.
+Action:
+- Fix the filesystem permission or transient I/O problem first.
+- Rerun `doctor` before attempting `archive`.
+### `change-log-smoke-failure-missing-rollback-plan`
+Meaning: a `change-log` entry recorded `smokeResult=fail` without documenting a rollback plan.
+Action:
+- Append a correction or follow-up `change-log` entry that documents the rollback path.
+### `change-log-rollback-missing-target`
+Meaning: a rollback `change-log` entry did not declare which ledger entry it reverts.
+Action:
+- Append a correction or replacement rollback entry with `rollsBack` populated.
+### `pre-change-checkpoint-stale`
+Meaning: a pre-change checkpoint aged past the configured stale window without a follow-up entry using the same `idempotencyKey`. The default window is 24 hours.
+Action:
+- Append the follow-up change-log entry, or close the stale checkpoint with a correction entry explaining the abandoned work.
+### `open-issue-stale`
+Meaning: an `open-issue` note is older than the configured stale window and has no correction/supersession link. The default window is 30 days.
+Action:
+- Append a correction or superseding note linking back to the old issue once the follow-up is complete.
+### `pending-commit-quarantined`
+Meaning: startup recovery found a pending commit journal that could not be parsed, replayed, or reconciled with the current manifest, so it moved the journal to `.lab/ledger/pending-quarantine/` and continued reconciling the rest of the ledger.
+Action:
+- Inspect the finding metadata and matching quarantine files.
+- Start with `ushman-ledger doctor --workspace="$WS" --json` so the finding metadata shows the quarantined file path and recorded reason.
+- Restore the journal to `.lab/ledger/pending/` only if its entry, sequence, and manifest base are known to be safe.
+- Otherwise keep or remove the quarantined file after recording an operator decision for the recovery outcome.
+### `read-failure`
+Meaning: the ledger could not be parsed or reconciled cleanly before checks started.
+Action:
+- Fix invalid JSON in `manifest.json` or `read-index.json`.
+- Re-open the ledger afterward so reconciliation can rebuild any missing derived state.
+## Recovery workflow
+1. Re-open the ledger or rerun the failing ledger command once. Many issues resolve after reconciliation replays pending commits or rebuilds the read index automatically.
+2. Run `doctor --json` and capture the structured findings if the problem persists.
+3. Repair the lowest-level corruption first: missing entry file, missing blob, invalid manifest JSON, or stale manifest pointer.
+4. Rerun `doctor`.
+5. Only run `archive` after `doctor` returns `ok: true`.
+## Common operator scenarios
+### Ledger lock held by a dead process
+Rerun the original ledger command first. Lock reclamation checks the recorded owner PID and stale age during normal startup and append flows, so you usually do not need to delete lock files by hand.
+### Manifest and disk disagree after a crash
+Re-open the ledger first so pending commits can be replayed. If `doctor` still reports `manifest-entry-missing-on-disk` or `manifest-entry-location-missing`, repair the missing entry file or manifest location before archiving.
+### Custom doctor stale windows
+Use `USHMAN_LEDGER_DOCTOR_CHECKPOINT_MAX_AGE_MS` to tune `pre-change-checkpoint-stale` and `USHMAN_LEDGER_DOCTOR_OPEN_ISSUE_MAX_AGE_MS` to tune `open-issue-stale`. Both values are positive integer millisecond windows and keep the default behavior when unset.
+### Manual ledger edits
+Prefer appending `correction` or `operator-decision` records instead of editing historical entry files directly. If historical files were edited already, use `doctor` to identify the damaged chain or manifest pointers before appending new records.

package/dist/blobs.d.ts CHANGED Viewed

@@ -4,6 +4,9 @@ export type StoredPatchBlob = {
     readonly bytes: number;
     readonly removedLines: number;
 };
+export declare const assertValidBlobSha256: (sha256: string) => string;
+export declare const assertPatchTextWithinLimit: (patchText: string, sourceLabel: string) => number;
+export declare const readPatchTextFromFile: (patchPath: string) => Promise<string>;
 export declare const storePatchBlob: (workspaceRoot: string, patchText: string) => Promise<StoredPatchBlob>;
 export declare const storePatchBlobFromFile: (workspaceRoot: string, patchPath: string) => Promise<StoredPatchBlob>;
 export declare const resolveBlobPath: (workspaceRoot: string, sha256: string) => string;

package/dist/blobs.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"blobs.d.ts","sourceRoot":"","sources":["../src/blobs.ts"],"names":[],"mappings":"~~AAiCA~~,MAAM,MAAM,eAAe,GAAG;IAC1B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CACjC,CAAC;AAEF,eAAO,MAAM,cAAc,GAAU,eAAe,MAAM,EAAE,WAAW,MAAM,KAAG,OAAO,CAAC,eAAe,~~CAsBtG~~,CAAC;AAEF,eAAO,MAAM,sBAAsB,GAAU,eAAe,MAAM,EAAE,WAAW,MAAM,KAAG,OAAO,CAAC,eAAe,CAE9G,CAAC;AAEF,eAAO,MAAM,eAAe,GAAI,eAAe,MAAM,EAAE,QAAQ,MAAM,~~WAAyC~~,CAAC"}
1	+ {"version":3,"file":"blobs.d.ts","sourceRoot":"","sources":["../src/blobs.ts"],"names":[],"mappings":"AAsDA,MAAM,MAAM,eAAe,GAAG;IAC1B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CACjC,CAAC;AAEF,eAAO,MAAM,qBAAqB,GAAI,QAAQ,MAAM,WAKnD,CAAC;AAEF,eAAO,MAAM,0BAA0B,GAAI,WAAW,MAAM,EAAE,aAAa,MAAM,WAOhF,CAAC;AAEF,eAAO,MAAM,qBAAqB,GAAU,WAAW,MAAM,oBAO5D,CAAC;AAEF,eAAO,MAAM,cAAc,GAAU,eAAe,MAAM,EAAE,WAAW,MAAM,KAAG,OAAO,CAAC,eAAe,CAmBtG,CAAC;AAEF,eAAO,MAAM,sBAAsB,GAAU,eAAe,MAAM,EAAE,WAAW,MAAM,KAAG,OAAO,CAAC,eAAe,CAE9G,CAAC;AAEF,eAAO,MAAM,eAAe,GAAI,eAAe,MAAM,EAAE,QAAQ,MAAM,WACN,CAAC"}

package/dist/blobs.js CHANGED Viewed

@@ -1,49 +1,75 @@
-import { mkdir, readFile } from 'node:fs/promises';
+import { readFile, stat } from 'node:fs/promises';
 import path from 'node:path';
-import { sha256File, sha256Hex } from "./json.js";
+import { sha256Hex } from "./json.js";
+import { getLedgerRuntimeConfig } from "./runtime-config.js";
 import { resolveLedgerPaths, writeAtomicTextFile } from "./storage/filesystem.js";
+import { forEachLine } from "./text-lines.js";
+const SHA256_HEX_PATTERN = /^[a-f0-9]{64}$/u;
 const countPatchLines = (patchText) => {
     let addedLines = 0;
     let removedLines = 0;
     let insideHunk = false;
-    for (const line of patchText.split(/\r?\n/u)) {
+    forEachLine(patchText, (line) => {
+        if (line.startsWith('diff --git ')) {
+            insideHunk = false;
+            return;
+        }
         if (line.startsWith('@@')) {
             insideHunk = true;
-            continue;
+            return;
         }
         if (!insideHunk || line.startsWith('+++') || line.startsWith('---')) {
-            continue;
+            return;
         }
         if (line.startsWith('+')) {
             addedLines += 1;
-            continue;
+            return;
         }
         if (line.startsWith('-')) {
             removedLines += 1;
         }
-    }
+    });
     return { addedLines, removedLines };
 };
+const formatPatchLimitError = ({ bytes, limitBytes, sourceLabel, }) => `Patch input from ${sourceLabel} is ${bytes} bytes, exceeding the configured limit of ${limitBytes} bytes. Reduce the diff size or increase USHMAN_LEDGER_MAX_PATCH_BYTES.`;
 const buildBlobPath = (workspaceRoot, sha256) => {
     const paths = resolveLedgerPaths(workspaceRoot);
     return path.join(paths.blobsDir, sha256.slice(0, 2), `${sha256}.patch`);
 };
-export const storePatchBlob = async (workspaceRoot, patchText) => {
+export const assertValidBlobSha256 = (sha256) => {
+    if (!SHA256_HEX_PATTERN.test(sha256)) {
+        throw new Error(`Invalid patch blob digest: ${sha256}. Expected a lowercase SHA-256 hex digest.`);
+    }
+    return sha256;
+};
+export const assertPatchTextWithinLimit = (patchText, sourceLabel) => {
+    const limitBytes = getLedgerRuntimeConfig().maxPatchBytes;
     const bytes = Buffer.byteLength(patchText, 'utf8');
+    if (bytes > limitBytes) {
+        throw new Error(formatPatchLimitError({ bytes, limitBytes, sourceLabel }));
+    }
+    return bytes;
+};
+export const readPatchTextFromFile = async (patchPath) => {
+    const fileStat = await stat(patchPath);
+    const limitBytes = getLedgerRuntimeConfig().maxPatchBytes;
+    if (fileStat.size > limitBytes) {
+        throw new Error(formatPatchLimitError({ bytes: fileStat.size, limitBytes, sourceLabel: patchPath }));
+    }
+    return readFile(patchPath, 'utf8');
+};
+export const storePatchBlob = async (workspaceRoot, patchText) => {
+    const bytes = assertPatchTextWithinLimit(patchText, 'inline diff text');
     const blobSha256 = sha256Hex(patchText);
     const blobPath = buildBlobPath(workspaceRoot, blobSha256);
-    let shouldWrite = true;
     try {
-        shouldWrite = (await sha256File(blobPath)) !== blobSha256;
+        await stat(blobPath);
     }
     catch (error) {
         const code = error.code;
         if (code !== 'ENOENT') {
             throw error;
         }
-    }
-    if (shouldWrite) {
-        await mkdir(path.dirname(blobPath), { recursive: true });
         await writeAtomicTextFile(blobPath, patchText);
     }
     return {
@@ -53,6 +79,6 @@ export const storePatchBlob = async (workspaceRoot, patchText) => {
     };
 };
 export const storePatchBlobFromFile = async (workspaceRoot, patchPath) => {
-    return storePatchBlob(workspaceRoot, await readFile(patchPath, 'utf8'));
+    return storePatchBlob(workspaceRoot, await readPatchTextFromFile(patchPath));
 };
-export const resolveBlobPath = (workspaceRoot, sha256) => buildBlobPath(workspaceRoot, sha256);
+export const resolveBlobPath = (workspaceRoot, sha256) => buildBlobPath(workspaceRoot, assertValidBlobSha256(sha256));