ushman-ledger 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/AGENTS.md +7 -5
  2. package/ARCHITECTURE.md +85 -0
  3. package/CHANGELOG.md +11 -0
  4. package/README.md +114 -5
  5. package/TROUBLESHOOTING.md +184 -0
  6. package/dist/blobs.d.ts +3 -0
  7. package/dist/blobs.d.ts.map +1 -1
  8. package/dist/blobs.js +41 -15
  9. package/dist/builders.d.ts +33 -0
  10. package/dist/builders.d.ts.map +1 -1
  11. package/dist/builders.js +10 -1
  12. package/dist/cli.d.ts.map +1 -1
  13. package/dist/cli.js +176 -59
  14. package/dist/coverage.d.ts.map +1 -1
  15. package/dist/coverage.js +3 -2
  16. package/dist/doctor.d.ts +17 -4
  17. package/dist/doctor.d.ts.map +1 -1
  18. package/dist/doctor.js +263 -62
  19. package/dist/handle.d.ts.map +1 -1
  20. package/dist/handle.js +67 -30
  21. package/dist/helpers.d.ts +1 -0
  22. package/dist/helpers.d.ts.map +1 -1
  23. package/dist/helpers.js +23 -0
  24. package/dist/index.d.ts +4 -2
  25. package/dist/index.d.ts.map +1 -1
  26. package/dist/index.js +4 -2
  27. package/dist/list.d.ts +34 -1
  28. package/dist/list.d.ts.map +1 -1
  29. package/dist/list.js +19 -9
  30. package/dist/patch-resolver.d.ts.map +1 -1
  31. package/dist/patch-resolver.js +193 -53
  32. package/dist/process.d.ts +2 -0
  33. package/dist/process.d.ts.map +1 -0
  34. package/dist/process.js +16 -0
  35. package/dist/read-index.d.ts +7 -7
  36. package/dist/read-index.d.ts.map +1 -1
  37. package/dist/read-index.js +18 -13
  38. package/dist/record.js +2 -2
  39. package/dist/recovery.d.ts +8 -0
  40. package/dist/recovery.d.ts.map +1 -1
  41. package/dist/recovery.js +142 -30
  42. package/dist/render/retro.d.ts.map +1 -1
  43. package/dist/render/retro.js +4 -1
  44. package/dist/runtime-config.d.ts +14 -0
  45. package/dist/runtime-config.d.ts.map +1 -0
  46. package/dist/runtime-config.js +97 -0
  47. package/dist/schema/entry-core.d.ts +5 -2
  48. package/dist/schema/entry-core.d.ts.map +1 -1
  49. package/dist/schema/entry-core.js +3 -0
  50. package/dist/schema/entry-read.d.ts +57 -0
  51. package/dist/schema/entry-read.d.ts.map +1 -1
  52. package/dist/schema/entry-read.js +9 -1
  53. package/dist/schema/entry-write.d.ts +51 -0
  54. package/dist/schema/entry-write.d.ts.map +1 -1
  55. package/dist/schema/entry-write.js +9 -1
  56. package/dist/storage/filesystem.d.ts +15 -2
  57. package/dist/storage/filesystem.d.ts.map +1 -1
  58. package/dist/storage/filesystem.js +234 -37
  59. package/dist/storage/lock.d.ts.map +1 -1
  60. package/dist/storage/lock.js +38 -16
  61. package/dist/text-lines.d.ts +8 -0
  62. package/dist/text-lines.d.ts.map +1 -0
  63. package/dist/text-lines.js +20 -0
  64. package/dist/version.d.ts +1 -1
  65. package/dist/version.d.ts.map +1 -1
  66. package/dist/version.js +2 -1
  67. package/package.json +4 -2
package/AGENTS.md CHANGED
@@ -29,11 +29,13 @@ An append-only ledger library and CLI for ushman v4 workspaces. It owns ledger s
29
29
  5. `src/schema/entry-migrations.ts`
30
30
  6. `src/schema/entry.ts`
31
31
  7. `src/storage/filesystem.ts`
32
- 8. `src/record.ts`
33
- 9. `src/handle.ts`
34
- 10. `src/coverage.ts`
35
- 11. `src/doctor.ts`
36
- 12. `src/cli.ts`
32
+ 8. `src/recovery.ts`
33
+ 9. `src/record.ts`
34
+ 10. `src/handle.ts`
35
+ 11. `src/runtime-config.ts`
36
+ 12. `src/coverage.ts`
37
+ 13. `src/doctor.ts`
38
+ 14. `src/cli.ts`
37
39
 
38
40
  ## Commands
39
41
 
@@ -0,0 +1,85 @@
1
+ # ushman-ledger architecture
2
+
3
+ This package owns append-only ledger persistence for ushman v4 workspaces. It does not orchestrate pipelines or act as a general database layer.
4
+
5
+ ## Core flow
6
+
7
+ 1. `openLedger()` validates the workspace and reconciles any pending crash-recovery state.
8
+ 2. `record()` normalizes the input, resolves any patch/blob payload, and acquires the manifest lock.
9
+ 3. A pending commit journal is written before the entry file, manifest update, and read-index update diverge.
10
+ 4. The entry file is written atomically into the phase directory.
11
+ 5. `manifest.json` is updated atomically.
12
+ 6. `read-index.json` is updated atomically.
13
+ 7. The pending journal is removed only after the manifest and read index are durable.
14
+
15
+ The same reconciliation path is used by reads, writes, coverage, doctor, render, and archive entrypoints so callers do not need a separate recovery command.
16
+
17
+ ## Storage model
18
+
19
+ `<workspace>/.lab/ledger/`
20
+
21
+ - `manifest.json`: append-order metadata, per-phase pointers, idempotency index, and archive metadata.
22
+ - `read-index.json`: durable lightweight scan index used by list/render/coverage/doctor.
23
+ - `external-temp-files/`: journals for custom render temp files created outside `.lab/ledger`.
24
+ - `pending/`: append journals used to replay incomplete commits.
25
+ - `pending-quarantine/`: unreplayable pending commit journals plus quarantine metadata.
26
+ - `pending-archives/`: archive journals used to adopt or discard incomplete archive writes.
27
+ - `blobs/`: content-addressed patch payloads keyed by SHA-256.
28
+ - `<phase>/`: append-only entry files per ledger phase.
29
+
30
+ ## Append semantics
31
+
32
+ - Entries are immutable once written. Corrections use `correction` records linked to the original entry.
33
+ - Each phase maintains its own `prevEntryId` chain so local append history is explicit and auditable.
34
+ - Idempotency is logical-content based by default and can be overridden with an explicit `idempotencyKey`.
35
+ - `stage-write` records treat `filePath` as the required primary covered artifact and may extend coverage with extra normalized `links.affectedFiles`.
36
+ - Patch payloads are de-duplicated by SHA-256 and stored once under `blobs/`.
37
+
38
+ ## Read path
39
+
40
+ - `list`, render, coverage, and doctor iterate entries in manifest sequence order.
41
+ - The durable read index avoids resorting `manifest.entryLocations` on every scan.
42
+ - Limited reads filter from the read index first, then materialize only the last matching entries from disk.
43
+ - Multiple readers can run concurrently. Writers serialize only around manifest mutation and recovery work that must observe a consistent durable state.
44
+
45
+ ## Recovery model
46
+
47
+ - Atomic writes use temp files and rename.
48
+ - Pending commit journals record the target sequence before the entry file and manifest can drift.
49
+ - Startup reconciliation replays pending commits in manifest order, quarantines unreplayable pending commits, rebuilds missing or stale read indexes, adopts verified archives, and cleans stale temp files.
50
+ - Manifest locks are reclaimed after stale-owner or dead-owner checks, so a newer owner is not deleted during handoff.
51
+ - Re-opening the ledger or rerunning any CLI command triggers the same recovery path; there is no separate manual recovery subcommand.
52
+
53
+ ## Scale and tuning
54
+
55
+ The default scan contract is intentionally conservative and can be tuned with environment variables:
56
+
57
+ - `USHMAN_LEDGER_SCAN_BATCH_SIZE`
58
+ - `USHMAN_LEDGER_SCAN_CONCURRENCY`
59
+ - `USHMAN_LEDGER_READ_INDEX_REBUILD_BATCH_SIZE`
60
+ - `USHMAN_LEDGER_READ_INDEX_REBUILD_CONCURRENCY`
61
+ - `USHMAN_LEDGER_COVERAGE_FILE_STAT_CONCURRENCY`
62
+ - `USHMAN_LEDGER_BLOB_HASH_CONCURRENCY`
63
+ - `USHMAN_LEDGER_DOCTOR_CHECKPOINT_MAX_AGE_MS`
64
+ - `USHMAN_LEDGER_DOCTOR_OPEN_ISSUE_MAX_AGE_MS`
65
+ - `USHMAN_LEDGER_MAX_PATCH_BYTES`
66
+
67
+ The benchmark entrypoint, `bun run bench:scale`, prints the active runtime config so measurements can be compared across different knob settings.
68
+
69
+ ## Operational ceilings
70
+
71
+ - Manifest sequence numbers stop at `Number.MAX_SAFE_INTEGER`.
72
+ - Patch/blob ingestion defaults to a 10 MiB ceiling unless `USHMAN_LEDGER_MAX_PATCH_BYTES` is raised.
73
+ - Git diff capture is the only external-binary dependency and defaults to a 30 second timeout plus a 10 MiB stdout buffer.
74
+
75
+ ## Contributor map
76
+
77
+ - `src/blobs.ts`: patch blob storage, size limits, and digest/path validation.
78
+ - `src/storage/lock.ts`: manifest lock acquisition, stale-owner reclaim, and reclaim-marker turnover.
79
+ - `src/record.ts`: append pipeline and idempotency behavior.
80
+ - `src/recovery.ts`: pending journal replay, read-index rebuild, and temp cleanup.
81
+ - `src/read-index.ts`: durable scan index construction and maintenance.
82
+ - `src/list.ts`: manifest/read-index iteration logic.
83
+ - `src/coverage.ts`: candidate file scan plus read-index-backed coverage calculation.
84
+ - `src/doctor.ts`: integrity checks plus troubleshooting-oriented findings.
85
+ - `src/cli.ts`: CLI argument handling and automation-facing output.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.2.0] - 2026-05-28
4
+
5
+ - Added `stage-write` ledger records for deterministic pipeline-owned candidate writes.
6
+ - `stage-write` now accepts `intake` alongside the later candidate-mutating stage ids so intake-owned writes such as auto-stubs and workspace runbooks can be recorded without falling back to patch semantics.
7
+ - Read-index coverage now treats `stage-write.filePath` the same as patch `affectedFiles`, so downstream coverage checks can distinguish stage output from operator edits without losing coverage accounting.
8
+ - Retro rendering now includes `stage-write` entries in the tool/action stream.
9
+ - Custom render outputs now journal their external temp files so startup recovery can clean stale crash leftovers outside `.lab/ledger`.
10
+ - Lock recovery now reclaims locks immediately when the recorded owner PID is no longer alive.
11
+ - Pending commit recovery now quarantines unreplayable journals and reports them through `doctor` instead of failing the whole reconciliation pass.
12
+ - Doctor stale-age windows for pre-change checkpoints and open issues are configurable through runtime environment variables.
13
+
3
14
  ## [1.1.0] - 2026-05-23
4
15
 
5
16
  - Added `change-log` records, narrative note subkinds, and `migration-log-md` / `workspace-narrative-md` render targets.
package/README.md CHANGED
@@ -33,8 +33,10 @@ import {
33
33
  appendSemanticCleanupSummaryNote,
34
34
  type BuildRecordInput,
35
35
  buildChangeLogRecord,
36
+ buildStageWriteRecord,
36
37
  buildValidatorResultRecord,
37
38
  deriveFilesChangedFromPatch,
39
+ getLedgerRuntimeConfig,
38
40
  openLedger,
39
41
  } from 'ushman-ledger';
40
42
 
@@ -74,6 +76,17 @@ await ledger.record(
74
76
  }),
75
77
  );
76
78
 
79
+ await ledger.record(
80
+ buildStageWriteRecord({
81
+ emitter: { tool: 'ushman-seed', version: '1.0.0' },
82
+ filePath: 'src/generated/candidate.ts',
83
+ phase: 'seed',
84
+ rationale: 'Seed stage wrote the initial candidate artifact.',
85
+ stage: 'seed',
86
+ summary: 'seed candidate output',
87
+ }),
88
+ );
89
+
77
90
  await ledger.record(
78
91
  buildValidatorResultRecord({
79
92
  emitter: { tool: 'ushman-doctor', version: '1.0.0' },
@@ -100,15 +113,32 @@ await ledger.render({ to: 'retro' });
100
113
  await ledger.render({ to: 'migration-log-md' });
101
114
  await ledger.renderTo({ to: 'migration-log-md', out: '/tmp/migration-log.md' });
102
115
  await ledger.archive('/tmp/ledger.tgz');
116
+
117
+ const config = getLedgerRuntimeConfig();
118
+ console.log(`Using scan concurrency ${config.scanConcurrency}`);
103
119
  ```
104
120
 
105
121
  Advanced builder wrappers can reuse the exported `BuildRecordInput<T>` helper type when they want the same input contract the built-in builders accept.
106
122
 
123
+ Example:
124
+
125
+ ```ts
126
+ import type { BuildRecordInput, LedgerRecord } from 'ushman-ledger';
127
+
128
+ type RuntimeEventInput = BuildRecordInput<Extract<LedgerRecord, { kind: 'runtime-event' }>>;
129
+
130
+ const buildRuntimeEventRecord = (input: RuntimeEventInput) => ({
131
+ ...input,
132
+ kind: 'runtime-event' as const,
133
+ });
134
+ ```
135
+
107
136
  ## CLI
108
137
 
109
138
  ```bash
110
139
  ushman-ledger record --workspace=<ws> --kind=tool-invocation --phase=capture --summary="capture started"
111
140
  ushman-ledger record --workspace=<ws> --kind=agent-patch --phase=cleanup --summary="capture git diff" --rationale="track working tree change" --diff-from-git=HEAD
141
+ ushman-ledger record --workspace=<ws> --kind=stage-write --phase=seed --summary="seed output" --rationale="pipeline wrote candidate" --file-path=src/generated/candidate.ts --stage=seed
112
142
  ushman-ledger record --workspace=<ws> --kind=change-log --subkind=smoke --phase=cleanup --summary="scope git diff" --diff-from-git=HEAD --git-paths=src/main.ts,src/cli.ts --git-diff-timeout-ms=10000 --git-diff-max-buffer-bytes=20971520
113
143
  ushman-ledger record --workspace=<ws> --kind=operator-decision --phase=cleanup --summary="manual override" --action=ledger-hand-edit --check-id=manual-review --rationale="operator edited the ledger after audit"
114
144
  ushman-ledger record --workspace=<ws> --kind=change-log --subkind=semantic-cleanup --phase=cleanup --summary="split schema modules" --diff=/tmp/change.patch --hypothesis="smaller schema modules keep the public API stable" --commands=$'bun test\nbun run typecheck' --smoke-result=pass --parity-status=green --rollback-plan="revert the schema split"
@@ -122,12 +152,15 @@ ushman-ledger render --workspace=<ws> --to=jsonl --out=/tmp/ledger.jsonl
122
152
  ushman-ledger render --workspace=<ws> --to=dependency-graph --out=/tmp/ledger.mmd
123
153
  ushman-ledger archive --workspace=<ws> --out=/tmp/ledger.tgz
124
154
  ushman-ledger doctor --workspace=<ws>
155
+ ushman-ledger doctor --workspace=<ws> --json
125
156
  ```
126
157
 
127
- Valid record kinds: `tool-invocation`, `agent-patch`, `operator-patch`, `operator-decision`, `validator-result`, `runtime-event`, `note`, `correction`, `strip-decision-reverted`, `change-log`
158
+ Valid record kinds: `tool-invocation`, `stage-write`, `agent-patch`, `operator-patch`, `operator-decision`, `validator-result`, `runtime-event`, `note`, `correction`, `strip-decision-reverted`, `change-log`
128
159
 
129
160
  Valid phases: `capture`, `intake`, `seed`, `vendor-extract`, `cleanup`, `parity`, `characterize`, `equiv`, `analyze`, `recover`, `ship`, `migration`
130
161
 
162
+ Valid `stage-write` stages: `seed`, `vendor-extract`, `cleanup`, `candidate-promotion`
163
+
131
164
  Valid note subkinds: `regression`, `automation`, `retro`, `operator`, `tooling-gap`, `cleanup-wave`, `verified-flow`, `open-issue`, `decomposition-wave`, `semantic-cleanup-summary`
132
165
 
133
166
  Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `migration-log-md`, `workspace-narrative-md`
@@ -166,14 +199,16 @@ Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `mi
166
199
  - `operator-decision` entries capture `{ action, checkId?, rationale }` in `payload`. `action` is one of `bypass-doctor`, `skip-check`, `override-strip-decision`, `override-ship-state`, `manual-parity-assertion`, `ledger-hand-edit`, `escalation`.
167
200
  - `strip-decision-reverted` entries capture `{ stripDecisionId, rationale, invalidatedStages? }`.
168
201
  - `correction` entries require `links.correctsLedgerId` pointing at the entry they correct.
202
+ - `stage-write` entries capture `{ filePath, rationale, stage }` for deterministic pipeline-owned writes. `phase` records where the ledger event happened; `stage` records the pipeline stage that produced the artifact.
169
203
  - Patch entries (`agent-patch`, `operator-patch`) store a `diff` blob reference and a `rationale`. Patch text is stored once under `.lab/ledger/blobs/` and shared by hash.
170
204
 
171
205
  ## Links and coverage
172
206
 
173
207
  - `links.affectedFiles` should contain normalized workspace-relative paths for files changed by an `agent-patch` or `operator-patch`.
208
+ - `stage-write.filePath` is always treated as covered output. If one stage write should cover additional normalized paths, include them in `links.affectedFiles`; coverage merges both sources.
174
209
  - Use forward slashes, do not prefix paths with `./`, and do not include trailing slashes or `..` segments.
175
210
  - Coverage only considers candidate workspace files modified after workspace initialization.
176
- - A modified file is considered covered when any patch entry lists it in `links.affectedFiles`.
211
+ - A modified file is considered covered when a patch entry lists it in `links.affectedFiles` or a `stage-write` entry names it through `filePath`/`links.affectedFiles`.
177
212
  - Coverage is backed by a durable read index so repeated coverage runs do not rescan every patch entry.
178
213
 
179
214
  ## Append chain and concurrency
@@ -182,21 +217,22 @@ Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `mi
182
217
  - Manifest updates are serialized through a global ledger lock.
183
218
  - Appends use a pending-commit journal so entry files, the manifest, and the durable read index can be replayed after crashes.
184
219
  - Open, read, and append paths reconcile unfinished commits before serving ledger data.
185
- - Corrupt or stale manifest locks are reclaimed automatically with compare-and-swap style quarantine semantics.
220
+ - Corrupt, stale, or dead-owner manifest locks are reclaimed automatically with compare-and-swap style quarantine semantics.
186
221
 
187
222
  ## Crash recovery
188
223
 
189
224
  - Pending commit journals live under `.lab/ledger/pending/`.
225
+ - Pending commit journals that cannot be replayed are moved to `.lab/ledger/pending-quarantine/` and surfaced by `doctor`.
190
226
  - Pending archive journals live under `.lab/ledger/pending-archives/`.
191
227
  - Startup reconciliation replays journaled appends in manifest sequence order.
192
228
  - Startup reconciliation also rebuilds a missing or stale `read-index.json` from manifest sequence order.
193
229
  - Verified pending archives are adopted into the manifest on startup; corrupt or partial pending archives are deleted.
194
- - Stale temp files are cleaned for phase entries, the manifest, blobs, the read index, and render outputs.
230
+ - Stale temp files are cleaned for phase entries, the manifest, blobs, the read index, canonical render outputs, and tracked custom render outputs.
195
231
 
196
232
  ## Recovery & Troubleshooting
197
233
 
198
234
  - `.lab/ledger/pending/` contains append journals that let the ledger recover incomplete writes after crashes. Do not edit these files by hand.
199
- - If startup or `doctor` reports a pending commit mismatch, re-open the ledger or rerun the command first so reconciliation can replay or discard the journal safely.
235
+ - If startup moves a pending commit to `.lab/ledger/pending-quarantine/`, inspect the `doctor` finding and restore the journal only when its entry, sequence, and manifest base are known to be safe.
200
236
  - If `doctor` reports manifest or blob corruption, fix the underlying entry/blob mismatch first and rerun `doctor` before attempting `archive`.
201
237
 
202
238
  ## Scan behavior
@@ -218,9 +254,11 @@ Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `mi
218
254
 
219
255
  - `--diff-from-git=<ref>` runs `git diff <ref>` against the workspace working tree.
220
256
  - `--git-paths=<csv>` safely scopes `git diff` to literal normalized workspace-relative paths. The CLI passes them after `--`, so pathspec magic and shell token splitting are intentionally not supported here.
257
+ - `--git-paths=<csv>` and `--files-changed=<csv>` do not support literal commas inside individual file names.
221
258
  - `--git-diff-timeout-ms=<ms>` and `--git-diff-max-buffer-bytes=<bytes>` override the default 30 second timeout and 10 MiB stdout buffer for the git diff helper.
222
259
  - The CLI reports usage errors for missing `git`, timed out git diffs, invalid scoped paths, and buffer overflows instead of surfacing raw child-process failures.
223
260
  - This is the only CLI feature that depends on an external binary. `git` must be available in `PATH`.
261
+ - `--from-stdin` accepts structured JSON records only. Raw patch text still needs `--diff=<file>` or `--diff-from-git=<ref>`.
224
262
 
225
263
  ## Storage shape
226
264
 
@@ -229,7 +267,9 @@ Valid render targets: `retro`, `jsonl`, `timeline-html`, `dependency-graph`, `mi
229
267
  .manifest.lock
230
268
  manifest.json
231
269
  read-index.json
270
+ external-temp-files/
232
271
  pending/
272
+ pending-quarantine/
233
273
  pending-archives/
234
274
  blobs/
235
275
  render.md
@@ -254,3 +294,72 @@ bun run bench:scale
254
294
 
255
295
  - `bun run bench:scale` creates a temporary workspace and benchmarks large-ledger paths for population, limited reads, repeated coverage, repeated doctor runs, and markdown render paths.
256
296
  - Override the defaults with environment variables such as `LEDGER_BENCH_ENTRY_COUNT=100000` and `LEDGER_BENCH_CANDIDATE_FILE_COUNT=10000`.
297
+ - The benchmark prints the active ledger runtime tuning so large-ledger measurements can be tied back to the scan/rebuild settings that produced them.
298
+
299
+ ## Operational limits
300
+
301
+ - The manifest sequence counter is capped at `Number.MAX_SAFE_INTEGER`. Appends fail before overflow so sequence ordering stays exact.
302
+ - Patch/blob ingestion is capped at 10 MiB by default across `diffText`, `diffPath`, `storePatchBlob()`, and CLI git-diff capture. Raise `USHMAN_LEDGER_MAX_PATCH_BYTES` only when the larger diff size is intentional and operationally acceptable.
303
+ - CLI git diff capture also defaults to a 30 second timeout and a 10 MiB stdout buffer. Use `--git-diff-timeout-ms` and `--git-diff-max-buffer-bytes` when a specific capture needs more headroom.
304
+
305
+ ## Runtime tuning
306
+
307
+ - `USHMAN_LEDGER_SCAN_BATCH_SIZE`: batch size for list/render/doctor entry reads. Default `32`.
308
+ - `USHMAN_LEDGER_SCAN_CONCURRENCY`: concurrent entry reads for list/render/doctor. Default `16`.
309
+ - `USHMAN_LEDGER_READ_INDEX_REBUILD_BATCH_SIZE`: batch size used when rebuilding `read-index.json`. Defaults to `USHMAN_LEDGER_SCAN_BATCH_SIZE`.
310
+ - `USHMAN_LEDGER_READ_INDEX_REBUILD_CONCURRENCY`: concurrent entry reads used while rebuilding `read-index.json`. Defaults to `USHMAN_LEDGER_SCAN_CONCURRENCY`.
311
+ - `USHMAN_LEDGER_COVERAGE_FILE_STAT_CONCURRENCY`: concurrent `stat()` calls during coverage scans. Defaults to `USHMAN_LEDGER_SCAN_CONCURRENCY`.
312
+ - `USHMAN_LEDGER_BLOB_HASH_CONCURRENCY`: concurrent blob integrity hashes during `doctor`. Defaults to `USHMAN_LEDGER_SCAN_CONCURRENCY`.
313
+ - `USHMAN_LEDGER_DOCTOR_CHECKPOINT_MAX_AGE_MS`: stale window for `pre-change-checkpoint` follow-up findings. Default `86400000`.
314
+ - `USHMAN_LEDGER_DOCTOR_OPEN_ISSUE_MAX_AGE_MS`: stale window for unresolved `open-issue` note findings. Default `2592000000`.
315
+ - `USHMAN_LEDGER_MAX_PATCH_BYTES`: maximum accepted patch/blob input size in bytes. Default `10485760`.
316
+
317
+ All runtime tuning values must be positive integers. Invalid values fail fast with an explicit error so automation does not silently run with an unexpected fallback.
318
+
319
+ ## Doctor output
320
+
321
+ - `ushman-ledger doctor` prints human-oriented findings with a stable finding code and a remediation step for each issue.
322
+ - `ushman-ledger doctor --json` returns a machine-readable envelope with `checkedAt`, `ok`, `issueCount`, `issues`, and structured `findings`.
323
+ - Each JSON finding includes a stable `code`, a human `message`, and a `remediation` string intended for operators and CI surfaces.
324
+
325
+ Example:
326
+
327
+ ```bash
328
+ ushman-ledger doctor --workspace="$WS" --json | jq '.findings[] | { code, message, remediation }'
329
+ ```
330
+
331
+ See [TROUBLESHOOTING.md](TROUBLESHOOTING.md) for the current finding codes and the expected recovery flow.
332
+
333
+ ## Advanced usage
334
+
335
+ Use a stable `idempotencyKey` to bracket a multi-step cleanup wave, then stream the migration log without buffering the whole render in memory:
336
+
337
+ ```ts
338
+ import {
339
+ buildChangeLogRecord,
340
+ openLedger,
341
+ } from 'ushman-ledger';
342
+
343
+ const ledger = await openLedger(workspaceRoot);
344
+
345
+ await ledger.record(
346
+ buildChangeLogRecord({
347
+ emitter: { tool: 'ushman-cli', version: '1.0.0' },
348
+ idempotencyKey: 'cleanup-wave-7',
349
+ kind: 'change-log',
350
+ phase: 'cleanup',
351
+ subkind: 'pre-change-checkpoint',
352
+ summary: 'Wave 7 checkpoint',
353
+ }),
354
+ );
355
+
356
+ await ledger.renderTo({
357
+ to: 'migration-log-md',
358
+ write: (chunk) => process.stdout.write(chunk),
359
+ });
360
+ ```
361
+
362
+ ## Further reading
363
+
364
+ - [ARCHITECTURE.md](ARCHITECTURE.md)
365
+ - [TROUBLESHOOTING.md](TROUBLESHOOTING.md)
@@ -0,0 +1,184 @@
1
+ # ushman-ledger troubleshooting
2
+
3
+ Start with:
4
+
5
+ ```bash
6
+ ushman-ledger doctor --workspace="$WS"
7
+ ```
8
+
9
+ For automation or CI:
10
+
11
+ ```bash
12
+ ushman-ledger doctor --workspace="$WS" --json
13
+ ```
14
+
15
+ The JSON report contains:
16
+
17
+ - `ok`: overall health boolean
18
+ - `issueCount`: number of findings
19
+ - `issues`: legacy message-only list
20
+ - `findings`: structured findings with `code`, `message`, `remediation`, and optional metadata
21
+
22
+ The default human-readable output prints one finding per block:
23
+
24
+ ```text
25
+ [finding-code] human-readable message
26
+ Next step: remediation guidance
27
+ ```
28
+
29
+ ## Common findings
30
+
31
+ ### `manifest-entry-count-mismatch`
32
+
33
+ Meaning: `manifest.json` disagrees with the number of durable entry files on disk.
34
+
35
+ Action:
36
+ - Re-open the ledger or rerun the failed command first so recovery can replay pending commits.
37
+ - If the mismatch persists, compare `manifest.entryLocations` against the phase directories and repair the missing file or location.
38
+
39
+ ### `manifest-last-sequence-mismatch`
40
+
41
+ Meaning: the manifest sequence counter no longer matches the durable append count.
42
+
43
+ Action:
44
+ - Finish recovery first.
45
+ - If recovery is already clean, repair `manifest.lastSequence` so it matches the highest durable append sequence.
46
+
47
+ ### `manifest-entry-missing-on-disk`
48
+
49
+ Meaning: the manifest references an entry id whose file is missing.
50
+
51
+ Action:
52
+ - Restore the missing entry file from a known-good copy, or repair the manifest location if it was edited incorrectly.
53
+ - Do not archive until the manifest and disk agree.
54
+
55
+ ### `manifest-entry-location-missing`
56
+
57
+ Meaning: an entry file exists on disk, but `manifest.entryLocations` has no location for that entry id.
58
+
59
+ Action:
60
+ - Repair `manifest.entryLocations` so the durable entry is reachable by phase and sequence.
61
+ - Rerun `doctor` afterward to catch any follow-on sequence or latest-pointer drift.
62
+
63
+ ### `manifest-phase-mismatch`
64
+
65
+ Meaning: an entry file exists, but the manifest points at the wrong phase.
66
+
67
+ Action:
68
+ - Restore the entry to the correct phase directory or repair `manifest.entryLocations`.
69
+
70
+ ### `manifest-per-phase-latest-mismatch`
71
+
72
+ Meaning: `perPhaseLatest` does not point at the newest entry in that phase.
73
+
74
+ Action:
75
+ - Repair the manifest so the latest pointer matches the highest sequence entry in the phase.
76
+
77
+ ### `manifest-sequence-mismatch`
78
+
79
+ Meaning: manifest sequence numbers are no longer contiguous or ordered.
80
+
81
+ Action:
82
+ - Repair `manifest.entryLocations` so sequence numbers are gap-free and monotonic.
83
+
84
+ ### `phase-prev-entry-mismatch`
85
+
86
+ Meaning: an entry’s `prevEntryId` no longer matches the prior append in its phase chain.
87
+
88
+ Action:
89
+ - Restore the edited entry or repair the phase chain.
90
+ - For content fixes, append a `correction` entry instead of rewriting history in place.
91
+
92
+ ### `blob-missing`
93
+
94
+ Meaning: a patch/blob referenced by an entry is missing under `.lab/ledger/blobs/`.
95
+
96
+ Action:
97
+ - Restore the blob file or recreate the patch entry from the original diff.
98
+
99
+ ### `blob-corrupt`
100
+
101
+ Meaning: the blob exists but its SHA-256 digest no longer matches the entry metadata.
102
+
103
+ Action:
104
+ - Restore the original blob content or recreate the patch entry from the original diff.
105
+
106
+ ### `blob-unreadable`
107
+
108
+ Meaning: the blob exists but doctor could not read it to verify the digest.
109
+
110
+ Action:
111
+ - Fix the filesystem permission or transient I/O problem first.
112
+ - Rerun `doctor` before attempting `archive`.
113
+
114
+ ### `change-log-smoke-failure-missing-rollback-plan`
115
+
116
+ Meaning: a `change-log` entry recorded `smokeResult=fail` without documenting a rollback plan.
117
+
118
+ Action:
119
+ - Append a correction or follow-up `change-log` entry that documents the rollback path.
120
+
121
+ ### `change-log-rollback-missing-target`
122
+
123
+ Meaning: a rollback `change-log` entry did not declare which ledger entry it reverts.
124
+
125
+ Action:
126
+ - Append a correction or replacement rollback entry with `rollsBack` populated.
127
+
128
+ ### `pre-change-checkpoint-stale`
129
+
130
+ Meaning: a pre-change checkpoint aged past the configured stale window without a follow-up entry using the same `idempotencyKey`. The default window is 24 hours.
131
+
132
+ Action:
133
+ - Append the follow-up change-log entry, or close the stale checkpoint with a correction entry explaining the abandoned work.
134
+
135
+ ### `open-issue-stale`
136
+
137
+ Meaning: an `open-issue` note is older than the configured stale window and has no correction/supersession link. The default window is 30 days.
138
+
139
+ Action:
140
+ - Append a correction or superseding note linking back to the old issue once the follow-up is complete.
141
+
142
+ ### `pending-commit-quarantined`
143
+
144
+ Meaning: startup recovery found a pending commit journal that could not be parsed, replayed, or reconciled with the current manifest, so it moved the journal to `.lab/ledger/pending-quarantine/` and continued reconciling the rest of the ledger.
145
+
146
+ Action:
147
+ - Inspect the finding metadata and matching quarantine files.
148
+ - Start with `ushman-ledger doctor --workspace="$WS" --json` so the finding metadata shows the quarantined file path and recorded reason.
149
+ - Restore the journal to `.lab/ledger/pending/` only if its entry, sequence, and manifest base are known to be safe.
150
+ - Otherwise keep or remove the quarantined file after recording an operator decision for the recovery outcome.
151
+
152
+ ### `read-failure`
153
+
154
+ Meaning: the ledger could not be parsed or reconciled cleanly before checks started.
155
+
156
+ Action:
157
+ - Fix invalid JSON in `manifest.json` or `read-index.json`.
158
+ - Re-open the ledger afterward so reconciliation can rebuild any missing derived state.
159
+
160
+ ## Recovery workflow
161
+
162
+ 1. Re-open the ledger or rerun the failing ledger command once. Many issues resolve after reconciliation replays pending commits or rebuilds the read index automatically.
163
+ 2. Run `doctor --json` and capture the structured findings if the problem persists.
164
+ 3. Repair the lowest-level corruption first: missing entry file, missing blob, invalid manifest JSON, or stale manifest pointer.
165
+ 4. Rerun `doctor`.
166
+ 5. Only run `archive` after `doctor` returns `ok: true`.
167
+
168
+ ## Common operator scenarios
169
+
170
+ ### Ledger lock held by a dead process
171
+
172
+ Rerun the original ledger command first. Lock reclamation checks the recorded owner PID and stale age during normal startup and append flows, so you usually do not need to delete lock files by hand.
173
+
174
+ ### Manifest and disk disagree after a crash
175
+
176
+ Re-open the ledger first so pending commits can be replayed. If `doctor` still reports `manifest-entry-missing-on-disk` or `manifest-entry-location-missing`, repair the missing entry file or manifest location before archiving.
177
+
178
+ ### Custom doctor stale windows
179
+
180
+ Use `USHMAN_LEDGER_DOCTOR_CHECKPOINT_MAX_AGE_MS` to tune `pre-change-checkpoint-stale` and `USHMAN_LEDGER_DOCTOR_OPEN_ISSUE_MAX_AGE_MS` to tune `open-issue-stale`. Both values are positive integer millisecond windows and keep the default behavior when unset.
181
+
182
+ ### Manual ledger edits
183
+
184
+ Prefer appending `correction` or `operator-decision` records instead of editing historical entry files directly. If historical files were edited already, use `doctor` to identify the damaged chain or manifest pointers before appending new records.
package/dist/blobs.d.ts CHANGED
@@ -4,6 +4,9 @@ export type StoredPatchBlob = {
4
4
  readonly bytes: number;
5
5
  readonly removedLines: number;
6
6
  };
7
+ export declare const assertValidBlobSha256: (sha256: string) => string;
8
+ export declare const assertPatchTextWithinLimit: (patchText: string, sourceLabel: string) => number;
9
+ export declare const readPatchTextFromFile: (patchPath: string) => Promise<string>;
7
10
  export declare const storePatchBlob: (workspaceRoot: string, patchText: string) => Promise<StoredPatchBlob>;
8
11
  export declare const storePatchBlobFromFile: (workspaceRoot: string, patchPath: string) => Promise<StoredPatchBlob>;
9
12
  export declare const resolveBlobPath: (workspaceRoot: string, sha256: string) => string;
@@ -1 +1 @@
1
- {"version":3,"file":"blobs.d.ts","sourceRoot":"","sources":["../src/blobs.ts"],"names":[],"mappings":"AAiCA,MAAM,MAAM,eAAe,GAAG;IAC1B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CACjC,CAAC;AAEF,eAAO,MAAM,cAAc,GAAU,eAAe,MAAM,EAAE,WAAW,MAAM,KAAG,OAAO,CAAC,eAAe,CAsBtG,CAAC;AAEF,eAAO,MAAM,sBAAsB,GAAU,eAAe,MAAM,EAAE,WAAW,MAAM,KAAG,OAAO,CAAC,eAAe,CAE9G,CAAC;AAEF,eAAO,MAAM,eAAe,GAAI,eAAe,MAAM,EAAE,QAAQ,MAAM,WAAyC,CAAC"}
1
+ {"version":3,"file":"blobs.d.ts","sourceRoot":"","sources":["../src/blobs.ts"],"names":[],"mappings":"AAsDA,MAAM,MAAM,eAAe,GAAG;IAC1B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CACjC,CAAC;AAEF,eAAO,MAAM,qBAAqB,GAAI,QAAQ,MAAM,WAKnD,CAAC;AAEF,eAAO,MAAM,0BAA0B,GAAI,WAAW,MAAM,EAAE,aAAa,MAAM,WAOhF,CAAC;AAEF,eAAO,MAAM,qBAAqB,GAAU,WAAW,MAAM,oBAO5D,CAAC;AAEF,eAAO,MAAM,cAAc,GAAU,eAAe,MAAM,EAAE,WAAW,MAAM,KAAG,OAAO,CAAC,eAAe,CAmBtG,CAAC;AAEF,eAAO,MAAM,sBAAsB,GAAU,eAAe,MAAM,EAAE,WAAW,MAAM,KAAG,OAAO,CAAC,eAAe,CAE9G,CAAC;AAEF,eAAO,MAAM,eAAe,GAAI,eAAe,MAAM,EAAE,QAAQ,MAAM,WACN,CAAC"}
package/dist/blobs.js CHANGED
@@ -1,49 +1,75 @@
1
- import { mkdir, readFile } from 'node:fs/promises';
1
+ import { readFile, stat } from 'node:fs/promises';
2
2
  import path from 'node:path';
3
- import { sha256File, sha256Hex } from "./json.js";
3
+ import { sha256Hex } from "./json.js";
4
+ import { getLedgerRuntimeConfig } from "./runtime-config.js";
4
5
  import { resolveLedgerPaths, writeAtomicTextFile } from "./storage/filesystem.js";
6
+ import { forEachLine } from "./text-lines.js";
7
+ const SHA256_HEX_PATTERN = /^[a-f0-9]{64}$/u;
5
8
  const countPatchLines = (patchText) => {
6
9
  let addedLines = 0;
7
10
  let removedLines = 0;
8
11
  let insideHunk = false;
9
- for (const line of patchText.split(/\r?\n/u)) {
12
+ forEachLine(patchText, (line) => {
13
+ if (line.startsWith('diff --git ')) {
14
+ insideHunk = false;
15
+ return;
16
+ }
10
17
  if (line.startsWith('@@')) {
11
18
  insideHunk = true;
12
- continue;
19
+ return;
13
20
  }
14
21
  if (!insideHunk || line.startsWith('+++') || line.startsWith('---')) {
15
- continue;
22
+ return;
16
23
  }
17
24
  if (line.startsWith('+')) {
18
25
  addedLines += 1;
19
- continue;
26
+ return;
20
27
  }
21
28
  if (line.startsWith('-')) {
22
29
  removedLines += 1;
23
30
  }
24
- }
31
+ });
25
32
  return { addedLines, removedLines };
26
33
  };
34
+ const formatPatchLimitError = ({ bytes, limitBytes, sourceLabel, }) => `Patch input from ${sourceLabel} is ${bytes} bytes, exceeding the configured limit of ${limitBytes} bytes. Reduce the diff size or increase USHMAN_LEDGER_MAX_PATCH_BYTES.`;
27
35
  const buildBlobPath = (workspaceRoot, sha256) => {
28
36
  const paths = resolveLedgerPaths(workspaceRoot);
29
37
  return path.join(paths.blobsDir, sha256.slice(0, 2), `${sha256}.patch`);
30
38
  };
31
- export const storePatchBlob = async (workspaceRoot, patchText) => {
39
+ export const assertValidBlobSha256 = (sha256) => {
40
+ if (!SHA256_HEX_PATTERN.test(sha256)) {
41
+ throw new Error(`Invalid patch blob digest: ${sha256}. Expected a lowercase SHA-256 hex digest.`);
42
+ }
43
+ return sha256;
44
+ };
45
+ export const assertPatchTextWithinLimit = (patchText, sourceLabel) => {
46
+ const limitBytes = getLedgerRuntimeConfig().maxPatchBytes;
32
47
  const bytes = Buffer.byteLength(patchText, 'utf8');
48
+ if (bytes > limitBytes) {
49
+ throw new Error(formatPatchLimitError({ bytes, limitBytes, sourceLabel }));
50
+ }
51
+ return bytes;
52
+ };
53
+ export const readPatchTextFromFile = async (patchPath) => {
54
+ const fileStat = await stat(patchPath);
55
+ const limitBytes = getLedgerRuntimeConfig().maxPatchBytes;
56
+ if (fileStat.size > limitBytes) {
57
+ throw new Error(formatPatchLimitError({ bytes: fileStat.size, limitBytes, sourceLabel: patchPath }));
58
+ }
59
+ return readFile(patchPath, 'utf8');
60
+ };
61
+ export const storePatchBlob = async (workspaceRoot, patchText) => {
62
+ const bytes = assertPatchTextWithinLimit(patchText, 'inline diff text');
33
63
  const blobSha256 = sha256Hex(patchText);
34
64
  const blobPath = buildBlobPath(workspaceRoot, blobSha256);
35
- let shouldWrite = true;
36
65
  try {
37
- shouldWrite = (await sha256File(blobPath)) !== blobSha256;
66
+ await stat(blobPath);
38
67
  }
39
68
  catch (error) {
40
69
  const code = error.code;
41
70
  if (code !== 'ENOENT') {
42
71
  throw error;
43
72
  }
44
- }
45
- if (shouldWrite) {
46
- await mkdir(path.dirname(blobPath), { recursive: true });
47
73
  await writeAtomicTextFile(blobPath, patchText);
48
74
  }
49
75
  return {
@@ -53,6 +79,6 @@ export const storePatchBlob = async (workspaceRoot, patchText) => {
53
79
  };
54
80
  };
55
81
  export const storePatchBlobFromFile = async (workspaceRoot, patchPath) => {
56
- return storePatchBlob(workspaceRoot, await readFile(patchPath, 'utf8'));
82
+ return storePatchBlob(workspaceRoot, await readPatchTextFromFile(patchPath));
57
83
  };
58
- export const resolveBlobPath = (workspaceRoot, sha256) => buildBlobPath(workspaceRoot, sha256);
84
+ export const resolveBlobPath = (workspaceRoot, sha256) => buildBlobPath(workspaceRoot, assertValidBlobSha256(sha256));