@meetless/mla 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/build-info.json +3 -3
  2. package/dist/cli.js +31 -5
  3. package/dist/commands/activate.js +39 -18
  4. package/dist/commands/agent-memory.js +333 -0
  5. package/dist/commands/enrich.js +211 -2
  6. package/dist/commands/internal-auto-index.js +64 -1
  7. package/dist/commands/internal-pretool-observe.js +86 -1
  8. package/dist/commands/internal-redact-capture.js +130 -0
  9. package/dist/commands/pilot.js +385 -0
  10. package/dist/lib/agent-memory-capture/binding.js +115 -0
  11. package/dist/lib/agent-memory-capture/classify.js +68 -0
  12. package/dist/lib/agent-memory-capture/collector.js +69 -0
  13. package/dist/lib/agent-memory-capture/containment.js +74 -0
  14. package/dist/lib/agent-memory-capture/ledger.js +43 -0
  15. package/dist/lib/agent-memory-capture/live-collector.js +148 -0
  16. package/dist/lib/agent-memory-capture/live-ledger.js +45 -0
  17. package/dist/lib/agent-memory-capture/live-pipeline.js +344 -0
  18. package/dist/lib/agent-memory-capture/lock.js +98 -0
  19. package/dist/lib/agent-memory-capture/paths.js +47 -0
  20. package/dist/lib/agent-memory-capture/pipeline.js +222 -0
  21. package/dist/lib/agent-memory-capture/report.js +131 -0
  22. package/dist/lib/agent-memory-capture/types.js +14 -0
  23. package/dist/lib/agent-memory-capture/upsert-client.js +104 -0
  24. package/dist/lib/analytics/enforcement-classify.js +65 -0
  25. package/dist/lib/analytics/enforcement-incident.js +83 -0
  26. package/dist/lib/analytics/envelope.js +55 -1
  27. package/dist/lib/analytics/pilot.js +313 -0
  28. package/dist/lib/enrichment/ingest.js +98 -13
  29. package/dist/lib/enrichment/materialize-rules.js +81 -0
  30. package/dist/lib/enrichment/plan.js +72 -15
  31. package/dist/lib/enrichment/protocol.js +85 -5
  32. package/dist/lib/enrichment/scout-brief.js +35 -6
  33. package/dist/lib/redactor.js +104 -1
  34. package/dist/lib/scanner/agent-memory.js +55 -4
  35. package/dist/lib/scanner/managed-rules.js +0 -0
  36. package/dist/lib/scanner/scan.js +52 -1
  37. package/dist/lib/scanner/score.js +41 -3
  38. package/dist/lib/scanner/scout-mission.js +9 -7
  39. package/dist/lib/upgrade-apply.js +30 -0
  40. package/dist/lib/wire.js +2 -0
  41. package/package.json +1 -1
@@ -0,0 +1,74 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MAX_FILE_BYTES = void 0;
4
+ exports.enumerateEligibleFiles = enumerateEligibleFiles;
5
+ // src/lib/agent-memory-capture/containment.ts
6
+ //
7
+ // Enumerate the eligible memory files under a binding's directory (§4 step 3),
8
+ // with realpath containment so a symlink can never point the collector at a file
9
+ // outside the consented directory (CONTAINMENT-1). MVP scans DIRECT `.md`
10
+ // children only (the corpus is flat); nesting support is deferred until topic
11
+ // files actually nest.
12
+ const node_fs_1 = require("node:fs");
13
+ const node_path_1 = require("node:path");
14
+ // Fixed max byte size (a constant, not user-configurable yet, per SECRET-1 /
15
+ // §4). Real memory topic files are 1-6 KB; the 188 KB MEMORY.md index is
16
+ // denylisted. A file above this is a processing failure (oversized), never a
17
+ // silent truncate-and-send.
18
+ exports.MAX_FILE_BYTES = 256 * 1024;
19
+ // Never a capture source even if it somehow carried a project type: the index
20
+ // is one-line pointers, not durable claims. Type-filtering already excludes it
21
+ // (it has no frontmatter), but the explicit denylist is belt-and-suspenders.
22
+ const DENYLIST = new Set(["memory.md"]);
23
+ function isContained(child, parentReal) {
24
+ return child === parentReal || child.startsWith(parentReal + node_path_1.sep);
25
+ }
26
+ // Enumerate direct `.md` children that are regular files, realpath-contained,
27
+ // not denylisted, with their byte size. Returns complete=false on ANY iteration
28
+ // error so a partial scan never drives deletions.
29
+ function enumerateEligibleFiles(memoryDir) {
30
+ let memoryReal;
31
+ try {
32
+ memoryReal = (0, node_fs_1.realpathSync)(memoryDir);
33
+ }
34
+ catch {
35
+ return { files: [], complete: false };
36
+ }
37
+ let names;
38
+ try {
39
+ names = (0, node_fs_1.readdirSync)(memoryReal);
40
+ }
41
+ catch {
42
+ return { files: [], complete: false };
43
+ }
44
+ const files = [];
45
+ let complete = true;
46
+ for (const name of names) {
47
+ if (!name.toLowerCase().endsWith(".md"))
48
+ continue;
49
+ if (DENYLIST.has(name.toLowerCase()))
50
+ continue;
51
+ const absPath = (0, node_path_1.join)(memoryReal, name);
52
+ let realPath;
53
+ let bytes;
54
+ try {
55
+ realPath = (0, node_fs_1.realpathSync)(absPath);
56
+ const st = (0, node_fs_1.statSync)(realPath);
57
+ if (!st.isFile())
58
+ continue; // directories, fifos, etc.
59
+ bytes = st.size;
60
+ }
61
+ catch {
62
+ // A single entry that vanished/raced mid-scan makes THIS pass incomplete,
63
+ // so we do not mistake other present files' absence for deletions.
64
+ complete = false;
65
+ continue;
66
+ }
67
+ // Symlink escape guard: the resolved target must stay inside the consented
68
+ // directory. A symlink pointing outside is silently excluded.
69
+ if (!isContained(realPath, memoryReal))
70
+ continue;
71
+ files.push({ relativePath: name, absPath, realPath, bytes });
72
+ }
73
+ return { files, complete };
74
+ }
@@ -0,0 +1,43 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.readLedger = readLedger;
4
+ exports.writeLedger = writeLedger;
5
+ // src/lib/agent-memory-capture/ledger.ts
6
+ //
7
+ // The thin per-binding dry-run ledger (§4). It stores only what the COLLECTOR
8
+ // needs to avoid re-emitting events for unchanged content; it deliberately does
9
+ // NOT mirror server processing/extraction state (two state machines would
10
+ // diverge). Keyed by a file's path relative to memoryDir.
11
+ const node_fs_1 = require("node:fs");
12
+ const node_path_1 = require("node:path");
13
+ const config_1 = require("../config");
14
+ const paths_1 = require("./paths");
15
+ function emptyLedger() {
16
+ return { version: 1, entries: {} };
17
+ }
18
+ function readLedger(bindingId, home = config_1.HOME) {
19
+ let raw;
20
+ try {
21
+ raw = (0, node_fs_1.readFileSync)((0, paths_1.ledgerPath)(bindingId, home), "utf8");
22
+ }
23
+ catch {
24
+ return emptyLedger();
25
+ }
26
+ try {
27
+ const parsed = JSON.parse(raw);
28
+ if (!parsed || typeof parsed.entries !== "object" || parsed.entries === null) {
29
+ return emptyLedger();
30
+ }
31
+ return { version: 1, entries: parsed.entries };
32
+ }
33
+ catch {
34
+ return emptyLedger();
35
+ }
36
+ }
37
+ function writeLedger(bindingId, ledger, home = config_1.HOME) {
38
+ const dest = (0, paths_1.ledgerPath)(bindingId, home);
39
+ (0, node_fs_1.mkdirSync)((0, node_path_1.dirname)(dest), { recursive: true });
40
+ const tmp = `${dest}.${process.pid}.tmp`;
41
+ (0, node_fs_1.writeFileSync)(tmp, JSON.stringify(ledger, null, 2) + "\n", { mode: 0o600 });
42
+ (0, node_fs_1.renameSync)(tmp, dest);
43
+ }
@@ -0,0 +1,148 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DEFAULT_MAX_UPLOADS_PER_PASS = void 0;
4
+ exports.liveCaptureEnabled = liveCaptureEnabled;
5
+ exports.appendLiveDecisions = appendLiveDecisions;
6
+ exports.runLiveCollector = runLiveCollector;
7
+ // src/lib/agent-memory-capture/live-collector.ts
8
+ //
9
+ // Orchestrates one LIVE collection + upload pass across all enabled bindings
10
+ // (proposal §4 lifecycle + §6 Phase 2A). Mirrors the dry-run collector.ts shape
11
+ // exactly (per-binding lock, fail-soft per binding, append only the actionable
12
+ // outcomes to a metadata-only JSONL), but the per-file engine is
13
+ // collectAndUploadOnce, which ACTUALLY uploads/withdraws against intel.
14
+ //
15
+ // GATING (default OFF, fail-closed): live capture never runs unless it is
16
+ // explicitly turned on (liveCaptureEnabled). The proposal wires this into the
17
+ // existing Stop auto-index worker (NOT a new hook), so an operator opts in once
18
+ // via MEETLESS_AGENT_MEMORY_LIVE; with the flag unset the worker calls nothing.
19
+ // The flag is necessary but NOT sufficient: there must also be at least one
20
+ // consented binding (CONSENT-1) and a resolvable actor identity, or the pass is
21
+ // a no-op. We never upload anonymously.
22
+ //
23
+ // SECRET-1: the per-file engine runs the credential denylist fail-closed before
24
+ // any byte leaves the machine. This orchestrator adds a second guard, the
25
+ // no-backfill per-pass upload cap (§6), so the FIRST live pass cannot dump the
26
+ // whole backlog at once; the cap drains over successive passes.
27
+ const node_fs_1 = require("node:fs");
28
+ const node_path_1 = require("node:path");
29
+ const config_1 = require("../config");
30
+ const lock_1 = require("./lock");
31
+ const binding_1 = require("./binding");
32
+ const live_pipeline_1 = require("./live-pipeline");
33
+ const paths_1 = require("./paths");
34
+ const upsert_client_1 = require("./upsert-client");
35
+ // Default per-pass upload cap (no-backfill, §6). Conservative on purpose: the
36
+ // first live pass over a backlog uploads at most this many revisions, the rest
37
+ // defer and drain over later Stops. Override with MEETLESS_AGENT_MEMORY_MAX_UPLOADS.
38
+ exports.DEFAULT_MAX_UPLOADS_PER_PASS = 25;
39
+ // Live capture is DEFAULT OFF. Returns true only when the flag is explicitly an
40
+ // affirmative value; unset / "0" / "false" / anything else is OFF (fail-closed).
41
+ // Pure predicate so the worker and the CLI can share one gate and a test can
42
+ // assert it without side effects.
43
+ function liveCaptureEnabled(env = process.env) {
44
+ const v = (env.MEETLESS_AGENT_MEMORY_LIVE ?? "").trim().toLowerCase();
45
+ return v === "1" || v === "true" || v === "yes" || v === "on";
46
+ }
47
+ function resolveMaxUploads(env = process.env) {
48
+ const raw = (env.MEETLESS_AGENT_MEMORY_MAX_UPLOADS ?? "").trim();
49
+ if (!raw)
50
+ return exports.DEFAULT_MAX_UPLOADS_PER_PASS;
51
+ const n = Number(raw);
52
+ return Number.isInteger(n) && n > 0 ? n : exports.DEFAULT_MAX_UPLOADS_PER_PASS;
53
+ }
54
+ // Append the actionable live outcomes for one binding to its JSONL. Metadata
55
+ // only (the LiveRecord shape: sourceId, relativePath, hash, bytes, outcome,
56
+ // reason, secretRuleIds, revisionId, serverOutcome, observedAt). NEVER raw
57
+ // content. unchanged/skipped no-ops are dropped so the log stays bounded.
58
+ function appendLiveDecisions(bindingId, records, home = config_1.HOME) {
59
+ const actionable = records.filter((r) => (0, live_pipeline_1.isLiveActionable)(r.outcome));
60
+ if (actionable.length === 0)
61
+ return 0;
62
+ const dest = (0, paths_1.liveDecisionLogPath)(bindingId, home);
63
+ (0, node_fs_1.mkdirSync)((0, node_path_1.dirname)(dest), { recursive: true });
64
+ const lines = actionable.map((r) => JSON.stringify(r)).join("\n") + "\n";
65
+ (0, node_fs_1.appendFileSync)(dest, lines, { mode: 0o600 });
66
+ return actionable.length;
67
+ }
68
+ async function runForBindingLive(binding, deps) {
69
+ const home = deps.home ?? config_1.HOME;
70
+ // Same per-binding lock as the dry-run collector: dry-run and live passes are
71
+ // mutually exclusive on a binding (they share the lock namespace), so they can
72
+ // never interleave and corrupt either ledger.
73
+ const lock = (0, lock_1.acquireBindingLock)(binding.bindingId, deps.nowIso, home);
74
+ if (!lock) {
75
+ return { bindingId: binding.bindingId, summary: null, locked: false, appended: 0 };
76
+ }
77
+ try {
78
+ const summary = await (0, live_pipeline_1.collectAndUploadOnce)(binding, deps);
79
+ const appended = appendLiveDecisions(binding.bindingId, summary.records, home);
80
+ return { bindingId: binding.bindingId, summary, locked: true, appended };
81
+ }
82
+ finally {
83
+ lock.release();
84
+ }
85
+ }
86
+ // Build the real UpsertClient + actor from config, or fail-closed. Returns null
87
+ // when there is no resolvable actor identity (never upload anonymously) or the
88
+ // config cannot be read. Skipped entirely when a client is injected (tests).
89
+ function resolveClientAndActor(opts) {
90
+ if (opts.client) {
91
+ // An injected client with no actor is a test misconfiguration; require both.
92
+ if (!opts.actor)
93
+ return null;
94
+ return { client: opts.client, actor: opts.actor };
95
+ }
96
+ let cfg;
97
+ try {
98
+ cfg = opts.cfg ?? (0, config_1.readConfig)();
99
+ }
100
+ catch {
101
+ return null;
102
+ }
103
+ const actor = (opts.actor ?? cfg.actorUserId ?? "").trim();
104
+ if (!actor)
105
+ return null; // not logged in -> never upload anonymously.
106
+ return { client: (0, upsert_client_1.createIntelUpsertClient)(cfg), actor };
107
+ }
108
+ // Run a LIVE pass over every enabled binding. Fail-soft per binding: one
109
+ // binding's error never aborts the others. Returns [] WITHOUT touching the
110
+ // network when live capture is gated off, when there is no resolvable actor, or
111
+ // when there are no enabled bindings. Async because the per-file engine awaits
112
+ // the network.
113
+ async function runLiveCollector(opts) {
114
+ const env = opts.env ?? process.env;
115
+ // Gate 1: the explicit opt-in flag (default off). The worker also checks this
116
+ // before calling, but re-check here so a direct call can never bypass it.
117
+ if (!liveCaptureEnabled(env))
118
+ return [];
119
+ // Gate 2: a resolvable client + actor (never upload anonymously).
120
+ const resolved = resolveClientAndActor(opts);
121
+ if (!resolved)
122
+ return [];
123
+ const home = opts.home ?? config_1.HOME;
124
+ // Gate 3: at least one consented binding (CONSENT-1).
125
+ const bindings = (0, binding_1.listEnabledBindings)(home);
126
+ if (bindings.length === 0)
127
+ return [];
128
+ const deps = {
129
+ client: resolved.client,
130
+ actor: resolved.actor,
131
+ nowIso: opts.nowIso,
132
+ home,
133
+ maxUploadsPerPass: opts.maxUploadsPerPass ?? resolveMaxUploads(env),
134
+ ...(opts.scan ? { scan: opts.scan } : {}),
135
+ ...(opts.scannerVersion ? { scannerVersion: opts.scannerVersion } : {}),
136
+ ...(opts.scannerMode ? { scannerMode: opts.scannerMode } : {}),
137
+ };
138
+ const out = [];
139
+ for (const b of bindings) {
140
+ try {
141
+ out.push(await runForBindingLive(b, deps));
142
+ }
143
+ catch {
144
+ out.push({ bindingId: b.bindingId, summary: null, locked: false, appended: 0 });
145
+ }
146
+ }
147
+ return out;
148
+ }
@@ -0,0 +1,45 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.readLiveLedger = readLiveLedger;
4
+ exports.writeLiveLedger = writeLiveLedger;
5
+ // src/lib/agent-memory-capture/live-ledger.ts
6
+ //
7
+ // The per-binding LIVE ledger (§4 "Live"). Unlike the dry-run ledger, this one
8
+ // tracks what the SERVER acknowledged, not what we observed: `lastUploadedHash`
9
+ // advances ONLY on a hash-matched ack (COMMIT-1), so a failed or unverified
10
+ // upload leaves the entry "unsettled" and the next pass re-attempts (RETRY-2).
11
+ // Kept in its own file (liveLedgerPath) so it can never collide with the dry-run
12
+ // ledger on the same binding. Keyed by a file's path relative to memoryDir.
13
+ const node_fs_1 = require("node:fs");
14
+ const node_path_1 = require("node:path");
15
+ const config_1 = require("../config");
16
+ const paths_1 = require("./paths");
17
+ function emptyLiveLedger() {
18
+ return { version: 1, entries: {} };
19
+ }
20
+ function readLiveLedger(bindingId, home = config_1.HOME) {
21
+ let raw;
22
+ try {
23
+ raw = (0, node_fs_1.readFileSync)((0, paths_1.liveLedgerPath)(bindingId, home), "utf8");
24
+ }
25
+ catch {
26
+ return emptyLiveLedger();
27
+ }
28
+ try {
29
+ const parsed = JSON.parse(raw);
30
+ if (!parsed || typeof parsed.entries !== "object" || parsed.entries === null) {
31
+ return emptyLiveLedger();
32
+ }
33
+ return { version: 1, entries: parsed.entries };
34
+ }
35
+ catch {
36
+ return emptyLiveLedger();
37
+ }
38
+ }
39
+ function writeLiveLedger(bindingId, ledger, home = config_1.HOME) {
40
+ const dest = (0, paths_1.liveLedgerPath)(bindingId, home);
41
+ (0, node_fs_1.mkdirSync)((0, node_path_1.dirname)(dest), { recursive: true });
42
+ const tmp = `${dest}.${process.pid}.tmp`;
43
+ (0, node_fs_1.writeFileSync)(tmp, JSON.stringify(ledger, null, 2) + "\n", { mode: 0o600 });
44
+ (0, node_fs_1.renameSync)(tmp, dest);
45
+ }
@@ -0,0 +1,344 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.isLiveActionable = isLiveActionable;
4
+ exports.collectAndUploadOnce = collectAndUploadOnce;
5
+ // src/lib/agent-memory-capture/live-pipeline.ts
6
+ //
7
+ // The LIVE collection pass (Phase 2A+). Mirrors the dry-run §4 lifecycle router
8
+ // in pipeline.ts but ACTUALLY performs the network ops: it uploads eligible
9
+ // revisions (UPSERT_SOURCE_REVISION) and withdraws reclassified/deleted sources
10
+ // (WITHDRAW_SOURCE), against the injectable `UpsertClient`. It commits the LIVE
11
+ // ledger only on a verified server ack:
12
+ //
13
+ // COMMIT-1: `lastUploadedHash` advances ONLY after a successful ack whose
14
+ // server-echoed content hash equals the local hash (or, on older
15
+ // intel that omits the echo, on a success outcome alone). A failed,
16
+ // rejected, or hash-mismatched upload leaves the entry unsettled.
17
+ // RETRY-2: Because a failed upload never advances `lastUploadedHash`, the next
18
+ // pass sees the file as still-changed and re-attempts it. A blocked
19
+ // file is re-evaluated when the scanner version moves.
20
+ //
21
+ // SECRET-1: the credential denylist (`scanForCredentials`, NOT the entropy
22
+ // scanner) runs FAIL-CLOSED before any upload. A credential-format hit withholds
23
+ // the file; a scanner outage withholds the file. Nothing credential-bearing is
24
+ // handed to the client.
25
+ //
26
+ // One immutable byte buffer per file (the dry-run's TOCTOU guard): the bytes
27
+ // hashed, classified, scanned, and uploaded are provably the same bytes.
28
+ const node_fs_1 = require("node:fs");
29
+ const config_1 = require("../config");
30
+ const redactor_1 = require("../redactor");
31
+ const classify_1 = require("./classify");
32
+ const containment_1 = require("./containment");
33
+ const live_ledger_1 = require("./live-ledger");
34
+ const pipeline_1 = require("./pipeline");
35
+ // Only outcomes that represent an actual event are worth persisting to the live
36
+ // JSONL. "unchanged" and "skipped" are emitted every pass; persisting them would
37
+ // grow the log without bound.
38
+ function isLiveActionable(outcome) {
39
+ return outcome !== "unchanged" && outcome !== "skipped";
40
+ }
41
+ // Run one LIVE collection + upload pass for a single binding. Reads the real
42
+ // directory, performs network ops via the client, and mutates the LIVE ledger.
43
+ // Returns every file's outcome (the collector persists only the actionable ones).
44
+ async function collectAndUploadOnce(binding, deps) {
45
+ const home = deps.home ?? config_1.HOME;
46
+ const scan = deps.scan ?? redactor_1.scanForCredentials;
47
+ const scannerVersion = deps.scannerVersion ?? redactor_1.SECRET_SCANNER_VERSION;
48
+ // Live is fail-closed by default; "off" is a test-only escape hatch.
49
+ const scannerMode = deps.scannerMode ?? "block";
50
+ const now = deps.nowIso;
51
+ // No-backfill cap (§6). undefined = uncapped; otherwise stop attempting uploads
52
+ // once this many have been attempted this pass and defer the rest.
53
+ const cap = deps.maxUploadsPerPass;
54
+ let uploadAttempts = 0;
55
+ const ledger = (0, live_ledger_1.readLiveLedger)(binding.bindingId, home);
56
+ const { files, complete } = (0, containment_1.enumerateEligibleFiles)(binding.memoryDir);
57
+ const records = [];
58
+ const present = new Set();
59
+ let mutated = false;
60
+ const base = (relativePath, bytes) => ({
61
+ sourceId: (0, pipeline_1.syntheticSourceId)(binding.bindingId, relativePath),
62
+ relativePath,
63
+ bytes,
64
+ secretRuleIds: [],
65
+ observedAt: now,
66
+ });
67
+ for (const f of files) {
68
+ present.add(f.relativePath);
69
+ const sourceId = (0, pipeline_1.syntheticSourceId)(binding.bindingId, f.relativePath);
70
+ // Oversized: known from stat; never read, never upload, never withdraw.
71
+ if (f.bytes > containment_1.MAX_FILE_BYTES) {
72
+ records.push({ ...base(f.relativePath, f.bytes), hash: null, outcome: "failed", reason: "oversized" });
73
+ continue;
74
+ }
75
+ let buf;
76
+ try {
77
+ buf = (0, node_fs_1.readFileSync)(f.realPath);
78
+ }
79
+ catch {
80
+ records.push({ ...base(f.relativePath, f.bytes), hash: null, outcome: "failed", reason: "unreadable" });
81
+ continue;
82
+ }
83
+ if (buf.length > containment_1.MAX_FILE_BYTES) {
84
+ records.push({ ...base(f.relativePath, buf.length), hash: null, outcome: "failed", reason: "oversized" });
85
+ continue;
86
+ }
87
+ const hash = (0, pipeline_1.sha256Hex)(buf);
88
+ const text = buf.toString("utf8");
89
+ const cls = (0, classify_1.classifyMemory)(text);
90
+ const prior = ledger.entries[f.relativePath];
91
+ if (cls.malformed) {
92
+ records.push({ ...base(f.relativePath, buf.length), hash, outcome: "failed", reason: "malformed_frontmatter" });
93
+ continue;
94
+ }
95
+ if (cls.type !== "project") {
96
+ // A previously-tracked project file became non-project: WITHDRAW it. A file
97
+ // never tracked is simply skipped (it was never uploaded).
98
+ if (prior) {
99
+ const res = await deps.client.withdraw({
100
+ workspaceId: binding.workspaceId,
101
+ actor: deps.actor,
102
+ relPath: sourceId,
103
+ reason: "reclassified",
104
+ });
105
+ if (res.ok) {
106
+ delete ledger.entries[f.relativePath];
107
+ mutated = true;
108
+ records.push({
109
+ ...base(f.relativePath, buf.length),
110
+ hash,
111
+ outcome: "reclassified",
112
+ reason: `reclassified project -> ${cls.type ?? "none"}`,
113
+ });
114
+ }
115
+ else {
116
+ // Leave the entry so the next pass retries the withdraw (RETRY-2).
117
+ ledger.entries[f.relativePath] = { ...prior, lastAttemptAt: now };
118
+ mutated = true;
119
+ records.push({
120
+ ...base(f.relativePath, buf.length),
121
+ hash,
122
+ outcome: "failed",
123
+ reason: `withdraw_failed (reclassified): ${res.reason}`,
124
+ });
125
+ }
126
+ }
127
+ else {
128
+ records.push({
129
+ ...base(f.relativePath, buf.length),
130
+ hash,
131
+ outcome: "skipped",
132
+ reason: cls.type ? `type ${cls.type}` : "no project type",
133
+ });
134
+ }
135
+ continue;
136
+ }
137
+ // type === project. If the exact bytes already match what the server acked,
138
+ // it is settled and clean by construction (we never upload a credential-
139
+ // bearing file), so short-circuit WITHOUT re-scanning. Clear any stale block
140
+ // marker (content reverted to the uploaded version).
141
+ //
142
+ // LIMITATION (documented, not a bug): this does NOT retroactively re-scan or
143
+ // withdraw already-uploaded content when the scanner version bumps. RETRY-2's
144
+ // re-evaluation applies to BLOCKED files, not settled uploads. Once content
145
+ // is acked it is governed by the KB review rail, not the local scanner.
146
+ if (prior?.lastUploadedHash === hash) {
147
+ if (prior.blockedHash || prior.blockedScannerVersion) {
148
+ const cleared = { ...prior, lastAttemptAt: now };
149
+ delete cleared.blockedHash;
150
+ delete cleared.blockedScannerVersion;
151
+ ledger.entries[f.relativePath] = cleared;
152
+ mutated = true;
153
+ }
154
+ records.push({
155
+ ...base(f.relativePath, buf.length),
156
+ hash,
157
+ outcome: "unchanged",
158
+ reason: "content identical to last upload",
159
+ });
160
+ continue;
161
+ }
162
+ // Credential denylist, FAIL-CLOSED (SECRET-1). "off" is the test-only path.
163
+ let secretRuleIds = [];
164
+ if (scannerMode !== "off") {
165
+ try {
166
+ secretRuleIds = scan(text);
167
+ }
168
+ catch {
169
+ // Scanner outage withholds the file: we cannot prove it is clean.
170
+ records.push({
171
+ ...base(f.relativePath, buf.length),
172
+ hash,
173
+ outcome: "failed",
174
+ reason: "scanner_unavailable",
175
+ });
176
+ continue;
177
+ }
178
+ }
179
+ if (scannerMode !== "off" && secretRuleIds.length > 0) {
180
+ const alreadyBlocked = prior?.blockedHash === hash && prior?.blockedScannerVersion === scannerVersion;
181
+ if (alreadyBlocked) {
182
+ records.push({
183
+ ...base(f.relativePath, buf.length),
184
+ hash,
185
+ outcome: "unchanged",
186
+ reason: "blocked (unchanged, same scanner)",
187
+ secretRuleIds,
188
+ });
189
+ }
190
+ else {
191
+ // Set the block marker but PRESERVE any prior upload settle (a file can
192
+ // be blocked at a new revision while an older clean revision is on the
193
+ // server). Never advance lastUploadedHash here.
194
+ ledger.entries[f.relativePath] = {
195
+ ...(prior ?? {}),
196
+ blockedHash: hash,
197
+ blockedScannerVersion: scannerVersion,
198
+ lastAttemptAt: now,
199
+ };
200
+ mutated = true;
201
+ records.push({
202
+ ...base(f.relativePath, buf.length),
203
+ hash,
204
+ outcome: "blocked",
205
+ reason: "credential format matched",
206
+ secretRuleIds,
207
+ });
208
+ }
209
+ continue;
210
+ }
211
+ // No-backfill cap (§6): once the per-pass upload budget is exhausted, DEFER
212
+ // the remaining changed+clean files rather than uploading the whole backlog
213
+ // in one burst. A deferred file is left UNSETTLED (the ledger is untouched),
214
+ // so the next pass re-attempts it; the backlog drains `cap` files per pass.
215
+ // Surfaced as a visible `deferred` count, never silently dropped.
216
+ if (cap !== undefined && uploadAttempts >= cap) {
217
+ records.push({
218
+ ...base(f.relativePath, buf.length),
219
+ hash,
220
+ outcome: "deferred",
221
+ reason: "per-pass upload cap reached",
222
+ secretRuleIds,
223
+ });
224
+ continue;
225
+ }
226
+ uploadAttempts++;
227
+ // project + clean + changed/new -> UPLOAD.
228
+ const res = await deps.client.upsert({
229
+ workspaceId: binding.workspaceId,
230
+ actor: deps.actor,
231
+ relPath: sourceId,
232
+ content: text,
233
+ contentHash: hash,
234
+ bindingId: binding.bindingId,
235
+ consentedAt: binding.consentedAt,
236
+ });
237
+ if (!res.ok || res.outcome === "failed") {
238
+ // RETRY-2: do NOT advance lastUploadedHash; only stamp an attempt on an
239
+ // existing entry (never create a bare entry for a never-settled file, so
240
+ // deletion reconciliation cannot later withdraw something never uploaded).
241
+ if (prior) {
242
+ ledger.entries[f.relativePath] = { ...prior, lastAttemptAt: now };
243
+ mutated = true;
244
+ }
245
+ records.push({
246
+ ...base(f.relativePath, buf.length),
247
+ hash,
248
+ outcome: "failed",
249
+ reason: res.ok ? `server_rejected: ${res.reason}` : res.reason,
250
+ secretRuleIds,
251
+ });
252
+ continue;
253
+ }
254
+ // COMMIT-1: if the server echoed its content hash, it MUST equal ours.
255
+ if (res.serverContentHash !== null && res.serverContentHash !== hash) {
256
+ if (prior) {
257
+ ledger.entries[f.relativePath] = { ...prior, lastAttemptAt: now };
258
+ mutated = true;
259
+ }
260
+ records.push({
261
+ ...base(f.relativePath, buf.length),
262
+ hash,
263
+ outcome: "failed",
264
+ reason: "hash_mismatch",
265
+ secretRuleIds,
266
+ });
267
+ continue;
268
+ }
269
+ // COMMIT-1 satisfied. Settle the ledger to this hash; clear any block marker.
270
+ ledger.entries[f.relativePath] = {
271
+ lastUploadedHash: hash,
272
+ lastUploadedRevisionId: res.revisionId ?? undefined,
273
+ lastLogicalSourceId: res.logicalSourceId ?? undefined,
274
+ lastSourceId: sourceId,
275
+ lastAttemptAt: now,
276
+ };
277
+ mutated = true;
278
+ records.push({
279
+ ...base(f.relativePath, buf.length),
280
+ hash,
281
+ outcome: "uploaded",
282
+ reason: prior?.lastUploadedHash ? "changed" : "new",
283
+ secretRuleIds,
284
+ revisionId: res.revisionId,
285
+ // Map the upsert vocabulary ("created"|"unchanged") onto the record's
286
+ // create/dedup vocabulary; "unchanged" here means the server already held
287
+ // these exact bytes under this path (a benign dedup), recorded as such.
288
+ serverOutcome: res.outcome === "created" ? "created" : "already_exists",
289
+ });
290
+ }
291
+ // Deletions: only when the scan completed (a partial scan must never mistake an
292
+ // un-enumerated file for a deletion). WITHDRAW each absent tracked source; keep
293
+ // the entry on a failed withdraw so the next complete pass retries it.
294
+ if (complete) {
295
+ for (const rel of Object.keys(ledger.entries)) {
296
+ if (present.has(rel))
297
+ continue;
298
+ const sourceId = (0, pipeline_1.syntheticSourceId)(binding.bindingId, rel);
299
+ const res = await deps.client.withdraw({
300
+ workspaceId: binding.workspaceId,
301
+ actor: deps.actor,
302
+ relPath: sourceId,
303
+ reason: "deleted",
304
+ });
305
+ if (res.ok) {
306
+ delete ledger.entries[rel];
307
+ mutated = true;
308
+ records.push({
309
+ sourceId,
310
+ relativePath: rel,
311
+ bytes: 0,
312
+ hash: null,
313
+ outcome: "deleted",
314
+ reason: "absent after complete scan",
315
+ secretRuleIds: [],
316
+ observedAt: now,
317
+ });
318
+ }
319
+ else {
320
+ ledger.entries[rel] = { ...ledger.entries[rel], lastAttemptAt: now };
321
+ mutated = true;
322
+ records.push({
323
+ sourceId,
324
+ relativePath: rel,
325
+ bytes: 0,
326
+ hash: null,
327
+ outcome: "failed",
328
+ reason: `withdraw_failed (deleted): ${res.reason}`,
329
+ secretRuleIds: [],
330
+ observedAt: now,
331
+ });
332
+ }
333
+ }
334
+ }
335
+ if (mutated)
336
+ (0, live_ledger_1.writeLiveLedger)(binding.bindingId, ledger, home);
337
+ return {
338
+ bindingId: binding.bindingId,
339
+ memoryDir: binding.memoryDir,
340
+ workspaceId: binding.workspaceId,
341
+ scanComplete: complete,
342
+ records,
343
+ };
344
+ }