@meetless/mla 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/cli.js +31 -5
- package/dist/commands/activate.js +39 -18
- package/dist/commands/agent-memory.js +333 -0
- package/dist/commands/enrich.js +211 -2
- package/dist/commands/internal-auto-index.js +64 -1
- package/dist/commands/internal-pretool-observe.js +86 -1
- package/dist/commands/internal-redact-capture.js +130 -0
- package/dist/commands/pilot.js +385 -0
- package/dist/lib/agent-memory-capture/binding.js +115 -0
- package/dist/lib/agent-memory-capture/classify.js +68 -0
- package/dist/lib/agent-memory-capture/collector.js +69 -0
- package/dist/lib/agent-memory-capture/containment.js +74 -0
- package/dist/lib/agent-memory-capture/ledger.js +43 -0
- package/dist/lib/agent-memory-capture/live-collector.js +148 -0
- package/dist/lib/agent-memory-capture/live-ledger.js +45 -0
- package/dist/lib/agent-memory-capture/live-pipeline.js +344 -0
- package/dist/lib/agent-memory-capture/lock.js +98 -0
- package/dist/lib/agent-memory-capture/paths.js +47 -0
- package/dist/lib/agent-memory-capture/pipeline.js +222 -0
- package/dist/lib/agent-memory-capture/report.js +131 -0
- package/dist/lib/agent-memory-capture/types.js +14 -0
- package/dist/lib/agent-memory-capture/upsert-client.js +104 -0
- package/dist/lib/analytics/enforcement-classify.js +65 -0
- package/dist/lib/analytics/enforcement-incident.js +83 -0
- package/dist/lib/analytics/envelope.js +55 -1
- package/dist/lib/analytics/pilot.js +313 -0
- package/dist/lib/enrichment/ingest.js +98 -13
- package/dist/lib/enrichment/materialize-rules.js +81 -0
- package/dist/lib/enrichment/plan.js +72 -15
- package/dist/lib/enrichment/protocol.js +85 -5
- package/dist/lib/enrichment/scout-brief.js +35 -6
- package/dist/lib/redactor.js +104 -1
- package/dist/lib/scanner/agent-memory.js +55 -4
- package/dist/lib/scanner/managed-rules.js +0 -0
- package/dist/lib/scanner/scan.js +52 -1
- package/dist/lib/scanner/score.js +41 -3
- package/dist/lib/scanner/scout-mission.js +9 -7
- package/dist/lib/upgrade-apply.js +30 -0
- package/dist/lib/wire.js +2 -0
- package/package.json +3 -3
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MAX_FILE_BYTES = void 0;
|
|
4
|
+
exports.enumerateEligibleFiles = enumerateEligibleFiles;
|
|
5
|
+
// src/lib/agent-memory-capture/containment.ts
|
|
6
|
+
//
|
|
7
|
+
// Enumerate the eligible memory files under a binding's directory (§4 step 3),
|
|
8
|
+
// with realpath containment so a symlink can never point the collector at a file
|
|
9
|
+
// outside the consented directory (CONTAINMENT-1). MVP scans DIRECT `.md`
|
|
10
|
+
// children only (the corpus is flat); nesting support is deferred until topic
|
|
11
|
+
// files actually nest.
|
|
12
|
+
const node_fs_1 = require("node:fs");
|
|
13
|
+
const node_path_1 = require("node:path");
|
|
14
|
+
// Fixed max byte size (a constant, not user-configurable yet, per SECRET-1 /
|
|
15
|
+
// §4). Real memory topic files are 1-6 KB; the 188 KB MEMORY.md index is
|
|
16
|
+
// denylisted. A file above this is a processing failure (oversized), never a
|
|
17
|
+
// silent truncate-and-send.
|
|
18
|
+
exports.MAX_FILE_BYTES = 256 * 1024;
|
|
19
|
+
// Never a capture source even if it somehow carried a project type: the index
|
|
20
|
+
// is one-line pointers, not durable claims. Type-filtering already excludes it
|
|
21
|
+
// (it has no frontmatter), but the explicit denylist is belt-and-suspenders.
|
|
22
|
+
const DENYLIST = new Set(["memory.md"]);
|
|
23
|
+
function isContained(child, parentReal) {
|
|
24
|
+
return child === parentReal || child.startsWith(parentReal + node_path_1.sep);
|
|
25
|
+
}
|
|
26
|
+
// Enumerate direct `.md` children that are regular files, realpath-contained,
|
|
27
|
+
// not denylisted, with their byte size. Returns complete=false on ANY iteration
|
|
28
|
+
// error so a partial scan never drives deletions.
|
|
29
|
+
function enumerateEligibleFiles(memoryDir) {
|
|
30
|
+
let memoryReal;
|
|
31
|
+
try {
|
|
32
|
+
memoryReal = (0, node_fs_1.realpathSync)(memoryDir);
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
return { files: [], complete: false };
|
|
36
|
+
}
|
|
37
|
+
let names;
|
|
38
|
+
try {
|
|
39
|
+
names = (0, node_fs_1.readdirSync)(memoryReal);
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
return { files: [], complete: false };
|
|
43
|
+
}
|
|
44
|
+
const files = [];
|
|
45
|
+
let complete = true;
|
|
46
|
+
for (const name of names) {
|
|
47
|
+
if (!name.toLowerCase().endsWith(".md"))
|
|
48
|
+
continue;
|
|
49
|
+
if (DENYLIST.has(name.toLowerCase()))
|
|
50
|
+
continue;
|
|
51
|
+
const absPath = (0, node_path_1.join)(memoryReal, name);
|
|
52
|
+
let realPath;
|
|
53
|
+
let bytes;
|
|
54
|
+
try {
|
|
55
|
+
realPath = (0, node_fs_1.realpathSync)(absPath);
|
|
56
|
+
const st = (0, node_fs_1.statSync)(realPath);
|
|
57
|
+
if (!st.isFile())
|
|
58
|
+
continue; // directories, fifos, etc.
|
|
59
|
+
bytes = st.size;
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
// A single entry that vanished/raced mid-scan makes THIS pass incomplete,
|
|
63
|
+
// so we do not mistake other present files' absence for deletions.
|
|
64
|
+
complete = false;
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
// Symlink escape guard: the resolved target must stay inside the consented
|
|
68
|
+
// directory. A symlink pointing outside is silently excluded.
|
|
69
|
+
if (!isContained(realPath, memoryReal))
|
|
70
|
+
continue;
|
|
71
|
+
files.push({ relativePath: name, absPath, realPath, bytes });
|
|
72
|
+
}
|
|
73
|
+
return { files, complete };
|
|
74
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.readLedger = readLedger;
|
|
4
|
+
exports.writeLedger = writeLedger;
|
|
5
|
+
// src/lib/agent-memory-capture/ledger.ts
|
|
6
|
+
//
|
|
7
|
+
// The thin per-binding dry-run ledger (§4). It stores only what the COLLECTOR
|
|
8
|
+
// needs to avoid re-emitting events for unchanged content; it deliberately does
|
|
9
|
+
// NOT mirror server processing/extraction state (two state machines would
|
|
10
|
+
// diverge). Keyed by a file's path relative to memoryDir.
|
|
11
|
+
const node_fs_1 = require("node:fs");
|
|
12
|
+
const node_path_1 = require("node:path");
|
|
13
|
+
const config_1 = require("../config");
|
|
14
|
+
const paths_1 = require("./paths");
|
|
15
|
+
function emptyLedger() {
|
|
16
|
+
return { version: 1, entries: {} };
|
|
17
|
+
}
|
|
18
|
+
function readLedger(bindingId, home = config_1.HOME) {
|
|
19
|
+
let raw;
|
|
20
|
+
try {
|
|
21
|
+
raw = (0, node_fs_1.readFileSync)((0, paths_1.ledgerPath)(bindingId, home), "utf8");
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
return emptyLedger();
|
|
25
|
+
}
|
|
26
|
+
try {
|
|
27
|
+
const parsed = JSON.parse(raw);
|
|
28
|
+
if (!parsed || typeof parsed.entries !== "object" || parsed.entries === null) {
|
|
29
|
+
return emptyLedger();
|
|
30
|
+
}
|
|
31
|
+
return { version: 1, entries: parsed.entries };
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
return emptyLedger();
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function writeLedger(bindingId, ledger, home = config_1.HOME) {
|
|
38
|
+
const dest = (0, paths_1.ledgerPath)(bindingId, home);
|
|
39
|
+
(0, node_fs_1.mkdirSync)((0, node_path_1.dirname)(dest), { recursive: true });
|
|
40
|
+
const tmp = `${dest}.${process.pid}.tmp`;
|
|
41
|
+
(0, node_fs_1.writeFileSync)(tmp, JSON.stringify(ledger, null, 2) + "\n", { mode: 0o600 });
|
|
42
|
+
(0, node_fs_1.renameSync)(tmp, dest);
|
|
43
|
+
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DEFAULT_MAX_UPLOADS_PER_PASS = void 0;
|
|
4
|
+
exports.liveCaptureEnabled = liveCaptureEnabled;
|
|
5
|
+
exports.appendLiveDecisions = appendLiveDecisions;
|
|
6
|
+
exports.runLiveCollector = runLiveCollector;
|
|
7
|
+
// src/lib/agent-memory-capture/live-collector.ts
|
|
8
|
+
//
|
|
9
|
+
// Orchestrates one LIVE collection + upload pass across all enabled bindings
|
|
10
|
+
// (proposal §4 lifecycle + §6 Phase 2A). Mirrors the dry-run collector.ts shape
|
|
11
|
+
// exactly (per-binding lock, fail-soft per binding, append only the actionable
|
|
12
|
+
// outcomes to a metadata-only JSONL), but the per-file engine is
|
|
13
|
+
// collectAndUploadOnce, which ACTUALLY uploads/withdraws against intel.
|
|
14
|
+
//
|
|
15
|
+
// GATING (default OFF, fail-closed): live capture never runs unless it is
|
|
16
|
+
// explicitly turned on (liveCaptureEnabled). The proposal wires this into the
|
|
17
|
+
// existing Stop auto-index worker (NOT a new hook), so an operator opts in once
|
|
18
|
+
// via MEETLESS_AGENT_MEMORY_LIVE; with the flag unset the worker calls nothing.
|
|
19
|
+
// The flag is necessary but NOT sufficient: there must also be at least one
|
|
20
|
+
// consented binding (CONSENT-1) and a resolvable actor identity, or the pass is
|
|
21
|
+
// a no-op. We never upload anonymously.
|
|
22
|
+
//
|
|
23
|
+
// SECRET-1: the per-file engine runs the credential denylist fail-closed before
|
|
24
|
+
// any byte leaves the machine. This orchestrator adds a second guard, the
|
|
25
|
+
// no-backfill per-pass upload cap (§6), so the FIRST live pass cannot dump the
|
|
26
|
+
// whole backlog at once; the cap drains over successive passes.
|
|
27
|
+
const node_fs_1 = require("node:fs");
|
|
28
|
+
const node_path_1 = require("node:path");
|
|
29
|
+
const config_1 = require("../config");
|
|
30
|
+
const lock_1 = require("./lock");
|
|
31
|
+
const binding_1 = require("./binding");
|
|
32
|
+
const live_pipeline_1 = require("./live-pipeline");
|
|
33
|
+
const paths_1 = require("./paths");
|
|
34
|
+
const upsert_client_1 = require("./upsert-client");
|
|
35
|
+
// Default per-pass upload cap (no-backfill, §6). Conservative on purpose: the
|
|
36
|
+
// first live pass over a backlog uploads at most this many revisions, the rest
|
|
37
|
+
// defer and drain over later Stops. Override with MEETLESS_AGENT_MEMORY_MAX_UPLOADS.
|
|
38
|
+
exports.DEFAULT_MAX_UPLOADS_PER_PASS = 25;
|
|
39
|
+
// Live capture is DEFAULT OFF. Returns true only when the flag is explicitly an
|
|
40
|
+
// affirmative value; unset / "0" / "false" / anything else is OFF (fail-closed).
|
|
41
|
+
// Pure predicate so the worker and the CLI can share one gate and a test can
|
|
42
|
+
// assert it without side effects.
|
|
43
|
+
function liveCaptureEnabled(env = process.env) {
|
|
44
|
+
const v = (env.MEETLESS_AGENT_MEMORY_LIVE ?? "").trim().toLowerCase();
|
|
45
|
+
return v === "1" || v === "true" || v === "yes" || v === "on";
|
|
46
|
+
}
|
|
47
|
+
function resolveMaxUploads(env = process.env) {
|
|
48
|
+
const raw = (env.MEETLESS_AGENT_MEMORY_MAX_UPLOADS ?? "").trim();
|
|
49
|
+
if (!raw)
|
|
50
|
+
return exports.DEFAULT_MAX_UPLOADS_PER_PASS;
|
|
51
|
+
const n = Number(raw);
|
|
52
|
+
return Number.isInteger(n) && n > 0 ? n : exports.DEFAULT_MAX_UPLOADS_PER_PASS;
|
|
53
|
+
}
|
|
54
|
+
// Append the actionable live outcomes for one binding to its JSONL. Metadata
|
|
55
|
+
// only (the LiveRecord shape: sourceId, relativePath, hash, bytes, outcome,
|
|
56
|
+
// reason, secretRuleIds, revisionId, serverOutcome, observedAt). NEVER raw
|
|
57
|
+
// content. unchanged/skipped no-ops are dropped so the log stays bounded.
|
|
58
|
+
function appendLiveDecisions(bindingId, records, home = config_1.HOME) {
|
|
59
|
+
const actionable = records.filter((r) => (0, live_pipeline_1.isLiveActionable)(r.outcome));
|
|
60
|
+
if (actionable.length === 0)
|
|
61
|
+
return 0;
|
|
62
|
+
const dest = (0, paths_1.liveDecisionLogPath)(bindingId, home);
|
|
63
|
+
(0, node_fs_1.mkdirSync)((0, node_path_1.dirname)(dest), { recursive: true });
|
|
64
|
+
const lines = actionable.map((r) => JSON.stringify(r)).join("\n") + "\n";
|
|
65
|
+
(0, node_fs_1.appendFileSync)(dest, lines, { mode: 0o600 });
|
|
66
|
+
return actionable.length;
|
|
67
|
+
}
|
|
68
|
+
async function runForBindingLive(binding, deps) {
|
|
69
|
+
const home = deps.home ?? config_1.HOME;
|
|
70
|
+
// Same per-binding lock as the dry-run collector: dry-run and live passes are
|
|
71
|
+
// mutually exclusive on a binding (they share the lock namespace), so they can
|
|
72
|
+
// never interleave and corrupt either ledger.
|
|
73
|
+
const lock = (0, lock_1.acquireBindingLock)(binding.bindingId, deps.nowIso, home);
|
|
74
|
+
if (!lock) {
|
|
75
|
+
return { bindingId: binding.bindingId, summary: null, locked: false, appended: 0 };
|
|
76
|
+
}
|
|
77
|
+
try {
|
|
78
|
+
const summary = await (0, live_pipeline_1.collectAndUploadOnce)(binding, deps);
|
|
79
|
+
const appended = appendLiveDecisions(binding.bindingId, summary.records, home);
|
|
80
|
+
return { bindingId: binding.bindingId, summary, locked: true, appended };
|
|
81
|
+
}
|
|
82
|
+
finally {
|
|
83
|
+
lock.release();
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// Build the real UpsertClient + actor from config, or fail-closed. Returns null
|
|
87
|
+
// when there is no resolvable actor identity (never upload anonymously) or the
|
|
88
|
+
// config cannot be read. Skipped entirely when a client is injected (tests).
|
|
89
|
+
function resolveClientAndActor(opts) {
|
|
90
|
+
if (opts.client) {
|
|
91
|
+
// An injected client with no actor is a test misconfiguration; require both.
|
|
92
|
+
if (!opts.actor)
|
|
93
|
+
return null;
|
|
94
|
+
return { client: opts.client, actor: opts.actor };
|
|
95
|
+
}
|
|
96
|
+
let cfg;
|
|
97
|
+
try {
|
|
98
|
+
cfg = opts.cfg ?? (0, config_1.readConfig)();
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
const actor = (opts.actor ?? cfg.actorUserId ?? "").trim();
|
|
104
|
+
if (!actor)
|
|
105
|
+
return null; // not logged in -> never upload anonymously.
|
|
106
|
+
return { client: (0, upsert_client_1.createIntelUpsertClient)(cfg), actor };
|
|
107
|
+
}
|
|
108
|
+
// Run a LIVE pass over every enabled binding. Fail-soft per binding: one
|
|
109
|
+
// binding's error never aborts the others. Returns [] WITHOUT touching the
|
|
110
|
+
// network when live capture is gated off, when there is no resolvable actor, or
|
|
111
|
+
// when there are no enabled bindings. Async because the per-file engine awaits
|
|
112
|
+
// the network.
|
|
113
|
+
async function runLiveCollector(opts) {
|
|
114
|
+
const env = opts.env ?? process.env;
|
|
115
|
+
// Gate 1: the explicit opt-in flag (default off). The worker also checks this
|
|
116
|
+
// before calling, but re-check here so a direct call can never bypass it.
|
|
117
|
+
if (!liveCaptureEnabled(env))
|
|
118
|
+
return [];
|
|
119
|
+
// Gate 2: a resolvable client + actor (never upload anonymously).
|
|
120
|
+
const resolved = resolveClientAndActor(opts);
|
|
121
|
+
if (!resolved)
|
|
122
|
+
return [];
|
|
123
|
+
const home = opts.home ?? config_1.HOME;
|
|
124
|
+
// Gate 3: at least one consented binding (CONSENT-1).
|
|
125
|
+
const bindings = (0, binding_1.listEnabledBindings)(home);
|
|
126
|
+
if (bindings.length === 0)
|
|
127
|
+
return [];
|
|
128
|
+
const deps = {
|
|
129
|
+
client: resolved.client,
|
|
130
|
+
actor: resolved.actor,
|
|
131
|
+
nowIso: opts.nowIso,
|
|
132
|
+
home,
|
|
133
|
+
maxUploadsPerPass: opts.maxUploadsPerPass ?? resolveMaxUploads(env),
|
|
134
|
+
...(opts.scan ? { scan: opts.scan } : {}),
|
|
135
|
+
...(opts.scannerVersion ? { scannerVersion: opts.scannerVersion } : {}),
|
|
136
|
+
...(opts.scannerMode ? { scannerMode: opts.scannerMode } : {}),
|
|
137
|
+
};
|
|
138
|
+
const out = [];
|
|
139
|
+
for (const b of bindings) {
|
|
140
|
+
try {
|
|
141
|
+
out.push(await runForBindingLive(b, deps));
|
|
142
|
+
}
|
|
143
|
+
catch {
|
|
144
|
+
out.push({ bindingId: b.bindingId, summary: null, locked: false, appended: 0 });
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return out;
|
|
148
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.readLiveLedger = readLiveLedger;
|
|
4
|
+
exports.writeLiveLedger = writeLiveLedger;
|
|
5
|
+
// src/lib/agent-memory-capture/live-ledger.ts
|
|
6
|
+
//
|
|
7
|
+
// The per-binding LIVE ledger (§4 "Live"). Unlike the dry-run ledger, this one
|
|
8
|
+
// tracks what the SERVER acknowledged, not what we observed: `lastUploadedHash`
|
|
9
|
+
// advances ONLY on a hash-matched ack (COMMIT-1), so a failed or unverified
|
|
10
|
+
// upload leaves the entry "unsettled" and the next pass re-attempts (RETRY-2).
|
|
11
|
+
// Kept in its own file (liveLedgerPath) so it can never collide with the dry-run
|
|
12
|
+
// ledger on the same binding. Keyed by a file's path relative to memoryDir.
|
|
13
|
+
const node_fs_1 = require("node:fs");
|
|
14
|
+
const node_path_1 = require("node:path");
|
|
15
|
+
const config_1 = require("../config");
|
|
16
|
+
const paths_1 = require("./paths");
|
|
17
|
+
function emptyLiveLedger() {
|
|
18
|
+
return { version: 1, entries: {} };
|
|
19
|
+
}
|
|
20
|
+
function readLiveLedger(bindingId, home = config_1.HOME) {
|
|
21
|
+
let raw;
|
|
22
|
+
try {
|
|
23
|
+
raw = (0, node_fs_1.readFileSync)((0, paths_1.liveLedgerPath)(bindingId, home), "utf8");
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
return emptyLiveLedger();
|
|
27
|
+
}
|
|
28
|
+
try {
|
|
29
|
+
const parsed = JSON.parse(raw);
|
|
30
|
+
if (!parsed || typeof parsed.entries !== "object" || parsed.entries === null) {
|
|
31
|
+
return emptyLiveLedger();
|
|
32
|
+
}
|
|
33
|
+
return { version: 1, entries: parsed.entries };
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
return emptyLiveLedger();
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function writeLiveLedger(bindingId, ledger, home = config_1.HOME) {
|
|
40
|
+
const dest = (0, paths_1.liveLedgerPath)(bindingId, home);
|
|
41
|
+
(0, node_fs_1.mkdirSync)((0, node_path_1.dirname)(dest), { recursive: true });
|
|
42
|
+
const tmp = `${dest}.${process.pid}.tmp`;
|
|
43
|
+
(0, node_fs_1.writeFileSync)(tmp, JSON.stringify(ledger, null, 2) + "\n", { mode: 0o600 });
|
|
44
|
+
(0, node_fs_1.renameSync)(tmp, dest);
|
|
45
|
+
}
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.isLiveActionable = isLiveActionable;
|
|
4
|
+
exports.collectAndUploadOnce = collectAndUploadOnce;
|
|
5
|
+
// src/lib/agent-memory-capture/live-pipeline.ts
|
|
6
|
+
//
|
|
7
|
+
// The LIVE collection pass (Phase 2A+). Mirrors the dry-run §4 lifecycle router
|
|
8
|
+
// in pipeline.ts but ACTUALLY performs the network ops: it uploads eligible
|
|
9
|
+
// revisions (UPSERT_SOURCE_REVISION) and withdraws reclassified/deleted sources
|
|
10
|
+
// (WITHDRAW_SOURCE), against the injectable `UpsertClient`. It commits the LIVE
|
|
11
|
+
// ledger only on a verified server ack:
|
|
12
|
+
//
|
|
13
|
+
// COMMIT-1: `lastUploadedHash` advances ONLY after a successful ack whose
|
|
14
|
+
// server-echoed content hash equals the local hash (or, on older
|
|
15
|
+
// intel that omits the echo, on a success outcome alone). A failed,
|
|
16
|
+
// rejected, or hash-mismatched upload leaves the entry unsettled.
|
|
17
|
+
// RETRY-2: Because a failed upload never advances `lastUploadedHash`, the next
|
|
18
|
+
// pass sees the file as still-changed and re-attempts it. A blocked
|
|
19
|
+
// file is re-evaluated when the scanner version moves.
|
|
20
|
+
//
|
|
21
|
+
// SECRET-1: the credential denylist (`scanForCredentials`, NOT the entropy
|
|
22
|
+
// scanner) runs FAIL-CLOSED before any upload. A credential-format hit withholds
|
|
23
|
+
// the file; a scanner outage withholds the file. Nothing credential-bearing is
|
|
24
|
+
// handed to the client.
|
|
25
|
+
//
|
|
26
|
+
// One immutable byte buffer per file (the dry-run's TOCTOU guard): the bytes
|
|
27
|
+
// hashed, classified, scanned, and uploaded are provably the same bytes.
|
|
28
|
+
const node_fs_1 = require("node:fs");
|
|
29
|
+
const config_1 = require("../config");
|
|
30
|
+
const redactor_1 = require("../redactor");
|
|
31
|
+
const classify_1 = require("./classify");
|
|
32
|
+
const containment_1 = require("./containment");
|
|
33
|
+
const live_ledger_1 = require("./live-ledger");
|
|
34
|
+
const pipeline_1 = require("./pipeline");
|
|
35
|
+
// Only outcomes that represent an actual event are worth persisting to the live
|
|
36
|
+
// JSONL. "unchanged" and "skipped" are emitted every pass; persisting them would
|
|
37
|
+
// grow the log without bound.
|
|
38
|
+
function isLiveActionable(outcome) {
|
|
39
|
+
return outcome !== "unchanged" && outcome !== "skipped";
|
|
40
|
+
}
|
|
41
|
+
// Run one LIVE collection + upload pass for a single binding. Reads the real
|
|
42
|
+
// directory, performs network ops via the client, and mutates the LIVE ledger.
|
|
43
|
+
// Returns every file's outcome (the collector persists only the actionable ones).
|
|
44
|
+
async function collectAndUploadOnce(binding, deps) {
|
|
45
|
+
const home = deps.home ?? config_1.HOME;
|
|
46
|
+
const scan = deps.scan ?? redactor_1.scanForCredentials;
|
|
47
|
+
const scannerVersion = deps.scannerVersion ?? redactor_1.SECRET_SCANNER_VERSION;
|
|
48
|
+
// Live is fail-closed by default; "off" is a test-only escape hatch.
|
|
49
|
+
const scannerMode = deps.scannerMode ?? "block";
|
|
50
|
+
const now = deps.nowIso;
|
|
51
|
+
// No-backfill cap (§6). undefined = uncapped; otherwise stop attempting uploads
|
|
52
|
+
// once this many have been attempted this pass and defer the rest.
|
|
53
|
+
const cap = deps.maxUploadsPerPass;
|
|
54
|
+
let uploadAttempts = 0;
|
|
55
|
+
const ledger = (0, live_ledger_1.readLiveLedger)(binding.bindingId, home);
|
|
56
|
+
const { files, complete } = (0, containment_1.enumerateEligibleFiles)(binding.memoryDir);
|
|
57
|
+
const records = [];
|
|
58
|
+
const present = new Set();
|
|
59
|
+
let mutated = false;
|
|
60
|
+
const base = (relativePath, bytes) => ({
|
|
61
|
+
sourceId: (0, pipeline_1.syntheticSourceId)(binding.bindingId, relativePath),
|
|
62
|
+
relativePath,
|
|
63
|
+
bytes,
|
|
64
|
+
secretRuleIds: [],
|
|
65
|
+
observedAt: now,
|
|
66
|
+
});
|
|
67
|
+
for (const f of files) {
|
|
68
|
+
present.add(f.relativePath);
|
|
69
|
+
const sourceId = (0, pipeline_1.syntheticSourceId)(binding.bindingId, f.relativePath);
|
|
70
|
+
// Oversized: known from stat; never read, never upload, never withdraw.
|
|
71
|
+
if (f.bytes > containment_1.MAX_FILE_BYTES) {
|
|
72
|
+
records.push({ ...base(f.relativePath, f.bytes), hash: null, outcome: "failed", reason: "oversized" });
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
let buf;
|
|
76
|
+
try {
|
|
77
|
+
buf = (0, node_fs_1.readFileSync)(f.realPath);
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
records.push({ ...base(f.relativePath, f.bytes), hash: null, outcome: "failed", reason: "unreadable" });
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
if (buf.length > containment_1.MAX_FILE_BYTES) {
|
|
84
|
+
records.push({ ...base(f.relativePath, buf.length), hash: null, outcome: "failed", reason: "oversized" });
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
const hash = (0, pipeline_1.sha256Hex)(buf);
|
|
88
|
+
const text = buf.toString("utf8");
|
|
89
|
+
const cls = (0, classify_1.classifyMemory)(text);
|
|
90
|
+
const prior = ledger.entries[f.relativePath];
|
|
91
|
+
if (cls.malformed) {
|
|
92
|
+
records.push({ ...base(f.relativePath, buf.length), hash, outcome: "failed", reason: "malformed_frontmatter" });
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
if (cls.type !== "project") {
|
|
96
|
+
// A previously-tracked project file became non-project: WITHDRAW it. A file
|
|
97
|
+
// never tracked is simply skipped (it was never uploaded).
|
|
98
|
+
if (prior) {
|
|
99
|
+
const res = await deps.client.withdraw({
|
|
100
|
+
workspaceId: binding.workspaceId,
|
|
101
|
+
actor: deps.actor,
|
|
102
|
+
relPath: sourceId,
|
|
103
|
+
reason: "reclassified",
|
|
104
|
+
});
|
|
105
|
+
if (res.ok) {
|
|
106
|
+
delete ledger.entries[f.relativePath];
|
|
107
|
+
mutated = true;
|
|
108
|
+
records.push({
|
|
109
|
+
...base(f.relativePath, buf.length),
|
|
110
|
+
hash,
|
|
111
|
+
outcome: "reclassified",
|
|
112
|
+
reason: `reclassified project -> ${cls.type ?? "none"}`,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
// Leave the entry so the next pass retries the withdraw (RETRY-2).
|
|
117
|
+
ledger.entries[f.relativePath] = { ...prior, lastAttemptAt: now };
|
|
118
|
+
mutated = true;
|
|
119
|
+
records.push({
|
|
120
|
+
...base(f.relativePath, buf.length),
|
|
121
|
+
hash,
|
|
122
|
+
outcome: "failed",
|
|
123
|
+
reason: `withdraw_failed (reclassified): ${res.reason}`,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
else {
|
|
128
|
+
records.push({
|
|
129
|
+
...base(f.relativePath, buf.length),
|
|
130
|
+
hash,
|
|
131
|
+
outcome: "skipped",
|
|
132
|
+
reason: cls.type ? `type ${cls.type}` : "no project type",
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
// type === project. If the exact bytes already match what the server acked,
|
|
138
|
+
// it is settled and clean by construction (we never upload a credential-
|
|
139
|
+
// bearing file), so short-circuit WITHOUT re-scanning. Clear any stale block
|
|
140
|
+
// marker (content reverted to the uploaded version).
|
|
141
|
+
//
|
|
142
|
+
// LIMITATION (documented, not a bug): this does NOT retroactively re-scan or
|
|
143
|
+
// withdraw already-uploaded content when the scanner version bumps. RETRY-2's
|
|
144
|
+
// re-evaluation applies to BLOCKED files, not settled uploads. Once content
|
|
145
|
+
// is acked it is governed by the KB review rail, not the local scanner.
|
|
146
|
+
if (prior?.lastUploadedHash === hash) {
|
|
147
|
+
if (prior.blockedHash || prior.blockedScannerVersion) {
|
|
148
|
+
const cleared = { ...prior, lastAttemptAt: now };
|
|
149
|
+
delete cleared.blockedHash;
|
|
150
|
+
delete cleared.blockedScannerVersion;
|
|
151
|
+
ledger.entries[f.relativePath] = cleared;
|
|
152
|
+
mutated = true;
|
|
153
|
+
}
|
|
154
|
+
records.push({
|
|
155
|
+
...base(f.relativePath, buf.length),
|
|
156
|
+
hash,
|
|
157
|
+
outcome: "unchanged",
|
|
158
|
+
reason: "content identical to last upload",
|
|
159
|
+
});
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
// Credential denylist, FAIL-CLOSED (SECRET-1). "off" is the test-only path.
|
|
163
|
+
let secretRuleIds = [];
|
|
164
|
+
if (scannerMode !== "off") {
|
|
165
|
+
try {
|
|
166
|
+
secretRuleIds = scan(text);
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
// Scanner outage withholds the file: we cannot prove it is clean.
|
|
170
|
+
records.push({
|
|
171
|
+
...base(f.relativePath, buf.length),
|
|
172
|
+
hash,
|
|
173
|
+
outcome: "failed",
|
|
174
|
+
reason: "scanner_unavailable",
|
|
175
|
+
});
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
if (scannerMode !== "off" && secretRuleIds.length > 0) {
|
|
180
|
+
const alreadyBlocked = prior?.blockedHash === hash && prior?.blockedScannerVersion === scannerVersion;
|
|
181
|
+
if (alreadyBlocked) {
|
|
182
|
+
records.push({
|
|
183
|
+
...base(f.relativePath, buf.length),
|
|
184
|
+
hash,
|
|
185
|
+
outcome: "unchanged",
|
|
186
|
+
reason: "blocked (unchanged, same scanner)",
|
|
187
|
+
secretRuleIds,
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
else {
|
|
191
|
+
// Set the block marker but PRESERVE any prior upload settle (a file can
|
|
192
|
+
// be blocked at a new revision while an older clean revision is on the
|
|
193
|
+
// server). Never advance lastUploadedHash here.
|
|
194
|
+
ledger.entries[f.relativePath] = {
|
|
195
|
+
...(prior ?? {}),
|
|
196
|
+
blockedHash: hash,
|
|
197
|
+
blockedScannerVersion: scannerVersion,
|
|
198
|
+
lastAttemptAt: now,
|
|
199
|
+
};
|
|
200
|
+
mutated = true;
|
|
201
|
+
records.push({
|
|
202
|
+
...base(f.relativePath, buf.length),
|
|
203
|
+
hash,
|
|
204
|
+
outcome: "blocked",
|
|
205
|
+
reason: "credential format matched",
|
|
206
|
+
secretRuleIds,
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
// No-backfill cap (§6): once the per-pass upload budget is exhausted, DEFER
|
|
212
|
+
// the remaining changed+clean files rather than uploading the whole backlog
|
|
213
|
+
// in one burst. A deferred file is left UNSETTLED (the ledger is untouched),
|
|
214
|
+
// so the next pass re-attempts it; the backlog drains `cap` files per pass.
|
|
215
|
+
// Surfaced as a visible `deferred` count, never silently dropped.
|
|
216
|
+
if (cap !== undefined && uploadAttempts >= cap) {
|
|
217
|
+
records.push({
|
|
218
|
+
...base(f.relativePath, buf.length),
|
|
219
|
+
hash,
|
|
220
|
+
outcome: "deferred",
|
|
221
|
+
reason: "per-pass upload cap reached",
|
|
222
|
+
secretRuleIds,
|
|
223
|
+
});
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
uploadAttempts++;
|
|
227
|
+
// project + clean + changed/new -> UPLOAD.
|
|
228
|
+
const res = await deps.client.upsert({
|
|
229
|
+
workspaceId: binding.workspaceId,
|
|
230
|
+
actor: deps.actor,
|
|
231
|
+
relPath: sourceId,
|
|
232
|
+
content: text,
|
|
233
|
+
contentHash: hash,
|
|
234
|
+
bindingId: binding.bindingId,
|
|
235
|
+
consentedAt: binding.consentedAt,
|
|
236
|
+
});
|
|
237
|
+
if (!res.ok || res.outcome === "failed") {
|
|
238
|
+
// RETRY-2: do NOT advance lastUploadedHash; only stamp an attempt on an
|
|
239
|
+
// existing entry (never create a bare entry for a never-settled file, so
|
|
240
|
+
// deletion reconciliation cannot later withdraw something never uploaded).
|
|
241
|
+
if (prior) {
|
|
242
|
+
ledger.entries[f.relativePath] = { ...prior, lastAttemptAt: now };
|
|
243
|
+
mutated = true;
|
|
244
|
+
}
|
|
245
|
+
records.push({
|
|
246
|
+
...base(f.relativePath, buf.length),
|
|
247
|
+
hash,
|
|
248
|
+
outcome: "failed",
|
|
249
|
+
reason: res.ok ? `server_rejected: ${res.reason}` : res.reason,
|
|
250
|
+
secretRuleIds,
|
|
251
|
+
});
|
|
252
|
+
continue;
|
|
253
|
+
}
|
|
254
|
+
// COMMIT-1: if the server echoed its content hash, it MUST equal ours.
|
|
255
|
+
if (res.serverContentHash !== null && res.serverContentHash !== hash) {
|
|
256
|
+
if (prior) {
|
|
257
|
+
ledger.entries[f.relativePath] = { ...prior, lastAttemptAt: now };
|
|
258
|
+
mutated = true;
|
|
259
|
+
}
|
|
260
|
+
records.push({
|
|
261
|
+
...base(f.relativePath, buf.length),
|
|
262
|
+
hash,
|
|
263
|
+
outcome: "failed",
|
|
264
|
+
reason: "hash_mismatch",
|
|
265
|
+
secretRuleIds,
|
|
266
|
+
});
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
269
|
+
// COMMIT-1 satisfied. Settle the ledger to this hash; clear any block marker.
|
|
270
|
+
ledger.entries[f.relativePath] = {
|
|
271
|
+
lastUploadedHash: hash,
|
|
272
|
+
lastUploadedRevisionId: res.revisionId ?? undefined,
|
|
273
|
+
lastLogicalSourceId: res.logicalSourceId ?? undefined,
|
|
274
|
+
lastSourceId: sourceId,
|
|
275
|
+
lastAttemptAt: now,
|
|
276
|
+
};
|
|
277
|
+
mutated = true;
|
|
278
|
+
records.push({
|
|
279
|
+
...base(f.relativePath, buf.length),
|
|
280
|
+
hash,
|
|
281
|
+
outcome: "uploaded",
|
|
282
|
+
reason: prior?.lastUploadedHash ? "changed" : "new",
|
|
283
|
+
secretRuleIds,
|
|
284
|
+
revisionId: res.revisionId,
|
|
285
|
+
// Map the upsert vocabulary ("created"|"unchanged") onto the record's
|
|
286
|
+
// create/dedup vocabulary; "unchanged" here means the server already held
|
|
287
|
+
// these exact bytes under this path (a benign dedup), recorded as such.
|
|
288
|
+
serverOutcome: res.outcome === "created" ? "created" : "already_exists",
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
// Deletions: only when the scan completed (a partial scan must never mistake an
|
|
292
|
+
// un-enumerated file for a deletion). WITHDRAW each absent tracked source; keep
|
|
293
|
+
// the entry on a failed withdraw so the next complete pass retries it.
|
|
294
|
+
if (complete) {
|
|
295
|
+
for (const rel of Object.keys(ledger.entries)) {
|
|
296
|
+
if (present.has(rel))
|
|
297
|
+
continue;
|
|
298
|
+
const sourceId = (0, pipeline_1.syntheticSourceId)(binding.bindingId, rel);
|
|
299
|
+
const res = await deps.client.withdraw({
|
|
300
|
+
workspaceId: binding.workspaceId,
|
|
301
|
+
actor: deps.actor,
|
|
302
|
+
relPath: sourceId,
|
|
303
|
+
reason: "deleted",
|
|
304
|
+
});
|
|
305
|
+
if (res.ok) {
|
|
306
|
+
delete ledger.entries[rel];
|
|
307
|
+
mutated = true;
|
|
308
|
+
records.push({
|
|
309
|
+
sourceId,
|
|
310
|
+
relativePath: rel,
|
|
311
|
+
bytes: 0,
|
|
312
|
+
hash: null,
|
|
313
|
+
outcome: "deleted",
|
|
314
|
+
reason: "absent after complete scan",
|
|
315
|
+
secretRuleIds: [],
|
|
316
|
+
observedAt: now,
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
else {
|
|
320
|
+
ledger.entries[rel] = { ...ledger.entries[rel], lastAttemptAt: now };
|
|
321
|
+
mutated = true;
|
|
322
|
+
records.push({
|
|
323
|
+
sourceId,
|
|
324
|
+
relativePath: rel,
|
|
325
|
+
bytes: 0,
|
|
326
|
+
hash: null,
|
|
327
|
+
outcome: "failed",
|
|
328
|
+
reason: `withdraw_failed (deleted): ${res.reason}`,
|
|
329
|
+
secretRuleIds: [],
|
|
330
|
+
observedAt: now,
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
if (mutated)
|
|
336
|
+
(0, live_ledger_1.writeLiveLedger)(binding.bindingId, ledger, home);
|
|
337
|
+
return {
|
|
338
|
+
bindingId: binding.bindingId,
|
|
339
|
+
memoryDir: binding.memoryDir,
|
|
340
|
+
workspaceId: binding.workspaceId,
|
|
341
|
+
scanComplete: complete,
|
|
342
|
+
records,
|
|
343
|
+
};
|
|
344
|
+
}
|