@meetless/mla 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/cli.js +31 -5
- package/dist/commands/activate.js +39 -18
- package/dist/commands/agent-memory.js +333 -0
- package/dist/commands/enrich.js +211 -2
- package/dist/commands/internal-auto-index.js +64 -1
- package/dist/commands/internal-pretool-observe.js +86 -1
- package/dist/commands/internal-redact-capture.js +130 -0
- package/dist/commands/pilot.js +385 -0
- package/dist/lib/agent-memory-capture/binding.js +115 -0
- package/dist/lib/agent-memory-capture/classify.js +68 -0
- package/dist/lib/agent-memory-capture/collector.js +69 -0
- package/dist/lib/agent-memory-capture/containment.js +74 -0
- package/dist/lib/agent-memory-capture/ledger.js +43 -0
- package/dist/lib/agent-memory-capture/live-collector.js +148 -0
- package/dist/lib/agent-memory-capture/live-ledger.js +45 -0
- package/dist/lib/agent-memory-capture/live-pipeline.js +344 -0
- package/dist/lib/agent-memory-capture/lock.js +98 -0
- package/dist/lib/agent-memory-capture/paths.js +47 -0
- package/dist/lib/agent-memory-capture/pipeline.js +222 -0
- package/dist/lib/agent-memory-capture/report.js +131 -0
- package/dist/lib/agent-memory-capture/types.js +14 -0
- package/dist/lib/agent-memory-capture/upsert-client.js +104 -0
- package/dist/lib/analytics/enforcement-classify.js +65 -0
- package/dist/lib/analytics/enforcement-incident.js +83 -0
- package/dist/lib/analytics/envelope.js +55 -1
- package/dist/lib/analytics/pilot.js +313 -0
- package/dist/lib/enrichment/ingest.js +98 -13
- package/dist/lib/enrichment/materialize-rules.js +81 -0
- package/dist/lib/enrichment/plan.js +72 -15
- package/dist/lib/enrichment/protocol.js +85 -5
- package/dist/lib/enrichment/scout-brief.js +35 -6
- package/dist/lib/redactor.js +104 -1
- package/dist/lib/scanner/agent-memory.js +55 -4
- package/dist/lib/scanner/managed-rules.js +0 -0
- package/dist/lib/scanner/scan.js +52 -1
- package/dist/lib/scanner/score.js +41 -3
- package/dist/lib/scanner/scout-mission.js +9 -7
- package/dist/lib/upgrade-apply.js +30 -0
- package/dist/lib/wire.js +2 -0
- package/package.json +1 -1
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.acquireBindingLock = acquireBindingLock;
|
|
4
|
+
// src/lib/agent-memory-capture/lock.ts
|
|
5
|
+
//
|
|
6
|
+
// Per-bindingId mutual exclusion (CONCURRENCY-1): overlapping Stops must not
|
|
7
|
+
// double-process or corrupt the ledger, and a holder that dies must release
|
|
8
|
+
// automatically.
|
|
9
|
+
//
|
|
10
|
+
// The proposal asks for an OS advisory flock "so it self-releases on death (not
|
|
11
|
+
// a PID file)." Node ships no portable flock(2) (and macOS, the dogfood
|
|
12
|
+
// platform, has no `flock` binary either), so this implements the SAME
|
|
13
|
+
// invariant a different way: an exclusive-create lockfile whose holder is
|
|
14
|
+
// liveness-checked. A stale lockfile from a dead PID is immediately stealable
|
|
15
|
+
// (`process.kill(pid, 0)` throws ESRCH), which gives the "self-releases on
|
|
16
|
+
// death" property the design requires. Acquisition is non-blocking: a live
|
|
17
|
+
// holder means another collector is running, so we skip this pass and let the
|
|
18
|
+
// next Stop rescan.
|
|
19
|
+
const node_fs_1 = require("node:fs");
|
|
20
|
+
const node_path_1 = require("node:path");
|
|
21
|
+
const config_1 = require("../config");
|
|
22
|
+
const paths_1 = require("./paths");
|
|
23
|
+
function pidIsAlive(pid) {
|
|
24
|
+
if (!Number.isInteger(pid) || pid <= 0)
|
|
25
|
+
return false;
|
|
26
|
+
try {
|
|
27
|
+
// Signal 0 performs error checking without sending a signal: throws ESRCH
|
|
28
|
+
// when no such process exists, EPERM when it exists but we can't signal it
|
|
29
|
+
// (still alive). Either non-throw or EPERM means alive.
|
|
30
|
+
process.kill(pid, 0);
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
catch (e) {
|
|
34
|
+
return e.code === "EPERM";
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function readHolderPid(path) {
|
|
38
|
+
try {
|
|
39
|
+
const first = (0, node_fs_1.readFileSync)(path, "utf8").split("\n")[0]?.trim();
|
|
40
|
+
const pid = Number(first);
|
|
41
|
+
return Number.isInteger(pid) && pid > 0 ? pid : null;
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
function tryCreate(path, nowIso) {
|
|
48
|
+
let fd;
|
|
49
|
+
try {
|
|
50
|
+
fd = (0, node_fs_1.openSync)(path, "wx"); // exclusive create; EEXIST if held
|
|
51
|
+
}
|
|
52
|
+
catch (e) {
|
|
53
|
+
if (e.code === "EEXIST")
|
|
54
|
+
return null;
|
|
55
|
+
throw e;
|
|
56
|
+
}
|
|
57
|
+
try {
|
|
58
|
+
(0, node_fs_1.writeSync)(fd, `${process.pid}\n${nowIso}\n`);
|
|
59
|
+
}
|
|
60
|
+
finally {
|
|
61
|
+
(0, node_fs_1.closeSync)(fd);
|
|
62
|
+
}
|
|
63
|
+
let released = false;
|
|
64
|
+
return {
|
|
65
|
+
release() {
|
|
66
|
+
if (released)
|
|
67
|
+
return;
|
|
68
|
+
released = true;
|
|
69
|
+
try {
|
|
70
|
+
(0, node_fs_1.rmSync)(path);
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
// Already gone (stolen as stale, or never existed): nothing to do.
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
// Acquire the per-binding lock without blocking. Returns null when a LIVE holder
|
|
79
|
+
// already owns it. A stale lockfile (dead holder) is stolen exactly once and the
|
|
80
|
+
// acquisition retried.
|
|
81
|
+
function acquireBindingLock(bindingId, nowIso, home = config_1.HOME) {
|
|
82
|
+
const path = (0, paths_1.lockPath)(bindingId, home);
|
|
83
|
+
(0, node_fs_1.mkdirSync)((0, node_path_1.dirname)(path), { recursive: true });
|
|
84
|
+
const first = tryCreate(path, nowIso);
|
|
85
|
+
if (first)
|
|
86
|
+
return first;
|
|
87
|
+
// Held: only steal if the recorded holder is dead.
|
|
88
|
+
const holder = readHolderPid(path);
|
|
89
|
+
if (holder !== null && pidIsAlive(holder))
|
|
90
|
+
return null;
|
|
91
|
+
try {
|
|
92
|
+
(0, node_fs_1.rmSync)(path);
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
// Someone else just cleared/replaced it; fall through to one retry.
|
|
96
|
+
}
|
|
97
|
+
return tryCreate(path, nowIso);
|
|
98
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.bindingsPath = bindingsPath;
|
|
4
|
+
exports.ledgerPath = ledgerPath;
|
|
5
|
+
exports.lockPath = lockPath;
|
|
6
|
+
exports.decisionLogPath = decisionLogPath;
|
|
7
|
+
exports.liveLedgerPath = liveLedgerPath;
|
|
8
|
+
exports.liveDecisionLogPath = liveDecisionLogPath;
|
|
9
|
+
// src/lib/agent-memory-capture/paths.ts
|
|
10
|
+
//
|
|
11
|
+
// All capture-local state lives under the mla HOME (~/.meetless by default),
|
|
12
|
+
// in its OWN files, deliberately NOT folded into cli-config.json. cli-config is
|
|
13
|
+
// a carefully-guarded credential surface; capture bindings are non-credential
|
|
14
|
+
// local state and get their own file, matching how SESSION_GATE_DIR / QUEUE_DIR
|
|
15
|
+
// are kept separate from the config.
|
|
16
|
+
const node_path_1 = require("node:path");
|
|
17
|
+
const config_1 = require("../config");
|
|
18
|
+
// The binding registry: { version, bindings: MemoryBinding[] }.
|
|
19
|
+
function bindingsPath(home = config_1.HOME) {
|
|
20
|
+
return (0, node_path_1.join)(home, "agent-memory-bindings.json");
|
|
21
|
+
}
|
|
22
|
+
// Per-binding thin dry-run ledger: <home>/agent-memory/ledger/<bindingId>.json.
|
|
23
|
+
function ledgerPath(bindingId, home = config_1.HOME) {
|
|
24
|
+
return (0, node_path_1.join)(home, "agent-memory", "ledger", `${bindingId}.json`);
|
|
25
|
+
}
|
|
26
|
+
// Per-binding advisory lockfile (PID-liveness; self-releases on process death).
|
|
27
|
+
function lockPath(bindingId, home = config_1.HOME) {
|
|
28
|
+
return (0, node_path_1.join)(home, "agent-memory", "locks", `${bindingId}.lock`);
|
|
29
|
+
}
|
|
30
|
+
// Append-only, metadata-only decision log for the dry-run collector. One JSONL
|
|
31
|
+
// per binding so volume analysis is per-source. NEVER contains raw content.
|
|
32
|
+
function decisionLogPath(bindingId, home = config_1.HOME) {
|
|
33
|
+
return (0, node_path_1.join)(home, "agent-memory", "decisions", `${bindingId}.jsonl`);
|
|
34
|
+
}
|
|
35
|
+
// Per-binding LIVE ledger (Phase 2A+): <home>/agent-memory/live-ledger/<bindingId>.json.
|
|
36
|
+
// Kept separate from the dry-run ledger so the two state shapes can never collide
|
|
37
|
+
// on the same binding (§4) and a binding's live progress is never clobbered by an
|
|
38
|
+
// earlier dry-run pass (or vice versa).
|
|
39
|
+
function liveLedgerPath(bindingId, home = config_1.HOME) {
|
|
40
|
+
return (0, node_path_1.join)(home, "agent-memory", "live-ledger", `${bindingId}.json`);
|
|
41
|
+
}
|
|
42
|
+
// Append-only, metadata-only outcome log for the LIVE collector. NEVER contains
|
|
43
|
+
// raw content; carries hashes, byte counts, matched credential rule ids, and the
|
|
44
|
+
// server revision ids of acked uploads.
|
|
45
|
+
function liveDecisionLogPath(bindingId, home = config_1.HOME) {
|
|
46
|
+
return (0, node_path_1.join)(home, "agent-memory", "live-decisions", `${bindingId}.jsonl`);
|
|
47
|
+
}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.sha256Hex = sha256Hex;
|
|
4
|
+
exports.syntheticSourceId = syntheticSourceId;
|
|
5
|
+
exports.isActionable = isActionable;
|
|
6
|
+
exports.collectOnce = collectOnce;
|
|
7
|
+
// src/lib/agent-memory-capture/pipeline.ts
|
|
8
|
+
//
|
|
9
|
+
// The exact-byte collection flow + the §4 lifecycle transition router. One
|
|
10
|
+
// immutable byte buffer per file: the bytes hashed, parsed, and scanned are
|
|
11
|
+
// provably the bytes that WOULD be uploaded, closing the TOCTOU where an editor
|
|
12
|
+
// rewrites the file between a path scan and a re-read. A scan failure must never
|
|
13
|
+
// mutate lifecycle (no upload, no retire) — that invariant lives in the routing
|
|
14
|
+
// table below.
|
|
15
|
+
//
|
|
16
|
+
// Phase 1 is DRY-RUN: this computes a decision per file and updates the thin
|
|
17
|
+
// ledger, but uploads nothing. The synthetic source id and content hash are
|
|
18
|
+
// computed so the dry-run records what a live upload WOULD address.
|
|
19
|
+
const node_crypto_1 = require("node:crypto");
|
|
20
|
+
const node_fs_1 = require("node:fs");
|
|
21
|
+
const config_1 = require("../config");
|
|
22
|
+
const redactor_1 = require("../redactor");
|
|
23
|
+
const classify_1 = require("./classify");
|
|
24
|
+
const containment_1 = require("./containment");
|
|
25
|
+
const ledger_1 = require("./ledger");
|
|
26
|
+
function sha256Hex(buf) {
|
|
27
|
+
return (0, node_crypto_1.createHash)("sha256").update(buf).digest("hex");
|
|
28
|
+
}
|
|
29
|
+
function syntheticSourceId(bindingId, relativePath) {
|
|
30
|
+
return `_external/agent-auto-memory/${bindingId}/${relativePath}`;
|
|
31
|
+
}
|
|
32
|
+
// Only these decisions represent an event worth persisting to the JSONL.
|
|
33
|
+
// `unchanged` and `skipped` are no-ops emitted every scan; persisting them would
|
|
34
|
+
// grow the log without bound (a Phase 1 exit criterion).
|
|
35
|
+
function isActionable(decision) {
|
|
36
|
+
return decision !== "unchanged" && decision !== "skipped";
|
|
37
|
+
}
|
|
38
|
+
// Run one dry-run collection pass for a single binding. Pure with respect to the
|
|
39
|
+
// network (uploads nothing); reads the real directory and mutates the local
|
|
40
|
+
// ledger. Returns every file's decision (the writer persists only the
|
|
41
|
+
// actionable ones).
|
|
42
|
+
function collectOnce(binding, deps) {
|
|
43
|
+
const home = deps.home ?? config_1.HOME;
|
|
44
|
+
const scan = deps.scan ?? redactor_1.scanForSecrets;
|
|
45
|
+
const scannerVersion = deps.scannerVersion ?? redactor_1.SECRET_SCANNER_VERSION;
|
|
46
|
+
// Local phases (0A/1) observe; only the future pre-upload path blocks.
|
|
47
|
+
const scannerMode = deps.scannerMode ?? "observe";
|
|
48
|
+
const now = deps.nowIso;
|
|
49
|
+
const ledger = (0, ledger_1.readLedger)(binding.bindingId, home);
|
|
50
|
+
const { files, complete } = (0, containment_1.enumerateEligibleFiles)(binding.memoryDir);
|
|
51
|
+
const records = [];
|
|
52
|
+
const present = new Set();
|
|
53
|
+
let mutated = false;
|
|
54
|
+
const base = (relativePath, bytes) => ({
|
|
55
|
+
sourceId: syntheticSourceId(binding.bindingId, relativePath),
|
|
56
|
+
relativePath,
|
|
57
|
+
bytes,
|
|
58
|
+
secretRuleIds: [],
|
|
59
|
+
observedAt: now,
|
|
60
|
+
});
|
|
61
|
+
for (const f of files) {
|
|
62
|
+
present.add(f.relativePath);
|
|
63
|
+
// Oversized: known from stat; never read, never upload, never retire.
|
|
64
|
+
if (f.bytes > containment_1.MAX_FILE_BYTES) {
|
|
65
|
+
records.push({ ...base(f.relativePath, f.bytes), hash: null, decision: "failed", reason: "oversized" });
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
let buf;
|
|
69
|
+
try {
|
|
70
|
+
buf = (0, node_fs_1.readFileSync)(f.realPath);
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
records.push({ ...base(f.relativePath, f.bytes), hash: null, decision: "failed", reason: "unreadable" });
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
// Guard the race where the file grew between stat and read.
|
|
77
|
+
if (buf.length > containment_1.MAX_FILE_BYTES) {
|
|
78
|
+
records.push({ ...base(f.relativePath, buf.length), hash: null, decision: "failed", reason: "oversized" });
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
const hash = sha256Hex(buf);
|
|
82
|
+
const text = buf.toString("utf8");
|
|
83
|
+
const cls = (0, classify_1.classifyMemory)(text);
|
|
84
|
+
const prior = ledger.entries[f.relativePath];
|
|
85
|
+
if (cls.malformed) {
|
|
86
|
+
records.push({ ...base(f.relativePath, buf.length), hash, decision: "failed", reason: "malformed_frontmatter" });
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
if (cls.type !== "project") {
|
|
90
|
+
// Was it previously a tracked project file? Ledger presence is the signal
|
|
91
|
+
// (we only ever create entries for project files).
|
|
92
|
+
if (prior) {
|
|
93
|
+
delete ledger.entries[f.relativePath];
|
|
94
|
+
mutated = true;
|
|
95
|
+
records.push({
|
|
96
|
+
...base(f.relativePath, buf.length),
|
|
97
|
+
hash,
|
|
98
|
+
decision: "reclassified",
|
|
99
|
+
reason: `reclassified project -> ${cls.type ?? "none"}`,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
records.push({
|
|
104
|
+
...base(f.relativePath, buf.length),
|
|
105
|
+
hash,
|
|
106
|
+
decision: "skipped",
|
|
107
|
+
reason: cls.type ? `type ${cls.type}` : "no project type",
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
// type === project: secret-scan the EXACT bytes. Posture decides what a hit
|
|
113
|
+
// means (§6). "off" skips scanning. "observe" (the local default) records
|
|
114
|
+
// matched rule ids as telemetry but never blocks: nothing is uploaded, so a
|
|
115
|
+
// scanner outage is not a safety event either. "block" (future pre-upload)
|
|
116
|
+
// is fail-closed: an outage fails the file, a hit becomes a `blocked`
|
|
117
|
+
// decision and pins the scanner version.
|
|
118
|
+
let secretRuleIds = [];
|
|
119
|
+
if (scannerMode !== "off") {
|
|
120
|
+
try {
|
|
121
|
+
secretRuleIds = scan(text);
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
if (scannerMode === "block") {
|
|
125
|
+
records.push({
|
|
126
|
+
...base(f.relativePath, buf.length),
|
|
127
|
+
hash,
|
|
128
|
+
decision: "failed",
|
|
129
|
+
reason: "scanner_unavailable",
|
|
130
|
+
});
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
secretRuleIds = [];
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
if (scannerMode === "block" && secretRuleIds.length > 0) {
|
|
137
|
+
const alreadyBlocked = prior?.lastDecision === "blocked" &&
|
|
138
|
+
prior.lastObservedHash === hash &&
|
|
139
|
+
prior.blockedScannerVersion === scannerVersion;
|
|
140
|
+
if (alreadyBlocked) {
|
|
141
|
+
records.push({
|
|
142
|
+
...base(f.relativePath, buf.length),
|
|
143
|
+
hash,
|
|
144
|
+
decision: "unchanged",
|
|
145
|
+
reason: "blocked (unchanged, same scanner)",
|
|
146
|
+
secretRuleIds,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
ledger.entries[f.relativePath] = {
|
|
151
|
+
lastObservedHash: hash,
|
|
152
|
+
lastDecision: "blocked",
|
|
153
|
+
blockedScannerVersion: scannerVersion,
|
|
154
|
+
lastObservedAt: now,
|
|
155
|
+
};
|
|
156
|
+
mutated = true;
|
|
157
|
+
records.push({
|
|
158
|
+
...base(f.relativePath, buf.length),
|
|
159
|
+
hash,
|
|
160
|
+
decision: "blocked",
|
|
161
|
+
reason: "secret pattern matched",
|
|
162
|
+
secretRuleIds,
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
// project + clean (or observe/off, where secretRuleIds rides along as a
|
|
168
|
+
// telemetry-only signal on the content-state decision).
|
|
169
|
+
if (prior && prior.lastObservedHash === hash && prior.lastDecision !== "blocked") {
|
|
170
|
+
records.push({
|
|
171
|
+
...base(f.relativePath, buf.length),
|
|
172
|
+
hash,
|
|
173
|
+
decision: "unchanged",
|
|
174
|
+
reason: "content identical",
|
|
175
|
+
secretRuleIds,
|
|
176
|
+
});
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
ledger.entries[f.relativePath] = {
|
|
180
|
+
lastObservedHash: hash,
|
|
181
|
+
lastDecision: "eligible",
|
|
182
|
+
lastObservedAt: now,
|
|
183
|
+
};
|
|
184
|
+
mutated = true;
|
|
185
|
+
records.push({
|
|
186
|
+
...base(f.relativePath, buf.length),
|
|
187
|
+
hash,
|
|
188
|
+
decision: "eligible",
|
|
189
|
+
reason: prior ? "changed" : "new",
|
|
190
|
+
secretRuleIds,
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
// Deletions: only reconcile when the scan completed; a partial scan must never
|
|
194
|
+
// mistake an un-enumerated file for a deletion.
|
|
195
|
+
if (complete) {
|
|
196
|
+
for (const rel of Object.keys(ledger.entries)) {
|
|
197
|
+
if (present.has(rel))
|
|
198
|
+
continue;
|
|
199
|
+
delete ledger.entries[rel];
|
|
200
|
+
mutated = true;
|
|
201
|
+
records.push({
|
|
202
|
+
sourceId: syntheticSourceId(binding.bindingId, rel),
|
|
203
|
+
relativePath: rel,
|
|
204
|
+
bytes: 0,
|
|
205
|
+
hash: null,
|
|
206
|
+
decision: "deleted",
|
|
207
|
+
reason: "absent after complete scan",
|
|
208
|
+
secretRuleIds: [],
|
|
209
|
+
observedAt: now,
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
if (mutated)
|
|
214
|
+
(0, ledger_1.writeLedger)(binding.bindingId, ledger, home);
|
|
215
|
+
return {
|
|
216
|
+
bindingId: binding.bindingId,
|
|
217
|
+
memoryDir: binding.memoryDir,
|
|
218
|
+
workspaceId: binding.workspaceId,
|
|
219
|
+
scanComplete: complete,
|
|
220
|
+
records,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.analyzeCorpus = analyzeCorpus;
|
|
4
|
+
// src/lib/agent-memory-capture/report.ts
|
|
5
|
+
//
|
|
6
|
+
// Phase 0A static corpus value gate (§6): read-only statistics over a memory
|
|
7
|
+
// directory, sends nothing. It measures the STATIC properties the doc allows to
|
|
8
|
+
// be computed automatically (counts by type, size distribution) and explicitly
|
|
9
|
+
// leaves the judgement gates (net-new reviewable %, overlap with session
|
|
10
|
+
// capture, mixed-content rate) to a manual stratified review, which it reminds
|
|
11
|
+
// the operator to run. Dynamic volume is NOT measured here (the memory dir has
|
|
12
|
+
// no reliable revision history); that lives in the Phase 1 dry-run collector.
|
|
13
|
+
//
|
|
14
|
+
// Secret scanning here is OBSERVE-ONLY (An's verdict 2026-06-27): a hit is a
|
|
15
|
+
// reported signal, never a value gate and never a block. The local phases upload
|
|
16
|
+
// nothing, so there is nothing to protect; pre-upload credential blocking is a
|
|
17
|
+
// Phase 2B concern. The named-fixture check is kept only as a Phase 2B
|
|
18
|
+
// readiness probe (does the scanner still catch the known live credential?).
|
|
19
|
+
const node_fs_1 = require("node:fs");
|
|
20
|
+
const node_path_1 = require("node:path");
|
|
21
|
+
const redactor_1 = require("../redactor");
|
|
22
|
+
const classify_1 = require("./classify");
|
|
23
|
+
// Tokens that the future Phase 2B pre-upload credential denylist MUST catch
|
|
24
|
+
// (the credential-denylist readiness probe). The space-delimited Redis
|
|
25
|
+
// `requirepass` directive is the canonical hard fixture: it is the live secret
|
|
26
|
+
// known to sit in the real corpus and it slips past the uppercase
|
|
27
|
+
// env_assignment + 32-char entropy patterns, so the scanner needs its directive
|
|
28
|
+
// pattern to catch it. If a file carries one of these tokens but the scanner
|
|
29
|
+
// does NOT match it, the probe fails loudly so the gap is visible before any
|
|
30
|
+
// remote capture is ever enabled.
|
|
31
|
+
const FIXTURE_TOKENS = ["requirepass", "masterauth", "masteruser"];
|
|
32
|
+
function percentile(sortedAsc, p) {
|
|
33
|
+
if (sortedAsc.length === 0)
|
|
34
|
+
return 0;
|
|
35
|
+
const idx = Math.min(sortedAsc.length - 1, Math.floor((p / 100) * sortedAsc.length));
|
|
36
|
+
return sortedAsc[idx];
|
|
37
|
+
}
|
|
38
|
+
function analyzeCorpus(memoryDir) {
|
|
39
|
+
let names;
|
|
40
|
+
try {
|
|
41
|
+
names = (0, node_fs_1.readdirSync)(memoryDir);
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
return {
|
|
45
|
+
memoryDir,
|
|
46
|
+
exists: false,
|
|
47
|
+
totalMdFiles: 0,
|
|
48
|
+
byType: {},
|
|
49
|
+
projectFiles: 0,
|
|
50
|
+
malformedFiles: 0,
|
|
51
|
+
sizeBytes: { min: 0, median: 0, p90: 0, max: 0 },
|
|
52
|
+
secretSignalFiles: [],
|
|
53
|
+
credentialProbeMisses: [],
|
|
54
|
+
credentialProbePass: true,
|
|
55
|
+
manualGates: [],
|
|
56
|
+
files: [],
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
const files = [];
|
|
60
|
+
const byType = {};
|
|
61
|
+
const sizes = [];
|
|
62
|
+
for (const name of names) {
|
|
63
|
+
if (!name.toLowerCase().endsWith(".md"))
|
|
64
|
+
continue;
|
|
65
|
+
const abs = (0, node_path_1.join)(memoryDir, name);
|
|
66
|
+
let bytes;
|
|
67
|
+
let text;
|
|
68
|
+
try {
|
|
69
|
+
const st = (0, node_fs_1.statSync)(abs);
|
|
70
|
+
if (!st.isFile())
|
|
71
|
+
continue;
|
|
72
|
+
bytes = st.size;
|
|
73
|
+
text = (0, node_fs_1.readFileSync)(abs, "utf8");
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
const cls = (0, classify_1.classifyMemory)(text);
|
|
79
|
+
const typeKey = cls.malformed ? "malformed" : (cls.type ?? "none");
|
|
80
|
+
byType[typeKey] = (byType[typeKey] ?? 0) + 1;
|
|
81
|
+
sizes.push(bytes);
|
|
82
|
+
// Scan only what MVP would actually capture (project files); still record
|
|
83
|
+
// fixture tokens everywhere so a misclassified secret file is visible.
|
|
84
|
+
const blockRuleIds = cls.type === "project" ? (0, redactor_1.scanForSecrets)(text) : [];
|
|
85
|
+
const lower = text.toLowerCase();
|
|
86
|
+
const hasFixtureToken = FIXTURE_TOKENS.some((t) => lower.includes(t));
|
|
87
|
+
files.push({ file: name, type: cls.type, malformed: cls.malformed, bytes, blockRuleIds, hasFixtureToken });
|
|
88
|
+
}
|
|
89
|
+
files.sort((a, b) => a.file.localeCompare(b.file));
|
|
90
|
+
sizes.sort((a, b) => a - b);
|
|
91
|
+
const secretSignalFiles = files
|
|
92
|
+
.filter((f) => f.type === "project" && f.blockRuleIds.length > 0)
|
|
93
|
+
.map((f) => ({ file: f.file, ruleIds: f.blockRuleIds }));
|
|
94
|
+
// Credential-denylist readiness probe: any file carrying a known fixture token
|
|
95
|
+
// must be matched by the scanner. (Scan even non-project files for the
|
|
96
|
+
// cross-check so a mistyped secret file is still caught here.)
|
|
97
|
+
const credentialProbeMisses = files
|
|
98
|
+
.filter((f) => f.hasFixtureToken && (0, redactor_1.scanForSecrets)(safeRead(memoryDir, f.file)).length === 0)
|
|
99
|
+
.map((f) => f.file);
|
|
100
|
+
return {
|
|
101
|
+
memoryDir,
|
|
102
|
+
exists: true,
|
|
103
|
+
totalMdFiles: files.length,
|
|
104
|
+
byType,
|
|
105
|
+
projectFiles: byType["project"] ?? 0,
|
|
106
|
+
malformedFiles: byType["malformed"] ?? 0,
|
|
107
|
+
sizeBytes: {
|
|
108
|
+
min: sizes[0] ?? 0,
|
|
109
|
+
median: percentile(sizes, 50),
|
|
110
|
+
p90: percentile(sizes, 90),
|
|
111
|
+
max: sizes[sizes.length - 1] ?? 0,
|
|
112
|
+
},
|
|
113
|
+
secretSignalFiles,
|
|
114
|
+
credentialProbeMisses,
|
|
115
|
+
credentialProbePass: credentialProbeMisses.length === 0,
|
|
116
|
+
manualGates: [
|
|
117
|
+
"Gate 1 (>= 15% of sampled project files carry a net-new reviewable durable item): MANUAL stratified review of 50-75 files.",
|
|
118
|
+
"Gate 2 (existing session capture does not already cover the large majority of valuable findings): MANUAL cross-reference against session-derived KB.",
|
|
119
|
+
"Mixed-content rate (durable claim + transient log in one file): MANUAL design finding, not a numeric gate.",
|
|
120
|
+
],
|
|
121
|
+
files,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
function safeRead(dir, name) {
|
|
125
|
+
try {
|
|
126
|
+
return (0, node_fs_1.readFileSync)((0, node_path_1.join)(dir, name), "utf8");
|
|
127
|
+
}
|
|
128
|
+
catch {
|
|
129
|
+
return "";
|
|
130
|
+
}
|
|
131
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// src/lib/agent-memory-capture/types.ts
|
|
3
|
+
//
|
|
4
|
+
// Shared types for the agent-memory capture pipeline
|
|
5
|
+
// (notes/20260626-agent-memory-auto-capture-proposal.md). This subsystem routes
|
|
6
|
+
// the coding agent's own private auto-memory writes
|
|
7
|
+
// (`~/.claude/projects/<encoded-cwd>/memory/*.md`) into the governed KB, walled
|
|
8
|
+
// off from grounding any session until a human accepts a derived claim.
|
|
9
|
+
//
|
|
10
|
+
// Phase 1 here is DRY-RUN ONLY: it observes, classifies, secret-scans, and
|
|
11
|
+
// records a metadata-only decision per file. It never uploads. Live ingestion
|
|
12
|
+
// (Phase 2A+) is blocked upstream by the missing cross-revision claim-grain
|
|
13
|
+
// idempotency (DERIVED-IDEMPOTENCY-1, §5.2) and is intentionally not built here.
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CAPTURE_METHOD = void 0;
|
|
4
|
+
exports.createIntelUpsertClient = createIntelUpsertClient;
|
|
5
|
+
const http_1 = require("../http");
|
|
6
|
+
// The capture method the route uses to branch provenance + non-publication.
|
|
7
|
+
exports.CAPTURE_METHOD = "agent_auto_memory";
|
|
8
|
+
// Default LDM profile; agent-memory bodies are markdown like notes. Kept here so
|
|
9
|
+
// the route input is explicit rather than relying on a server default.
|
|
10
|
+
const DEFAULT_PROFILE = "markdown_atomic_v1";
|
|
11
|
+
function mapOutcome(outcome) {
|
|
12
|
+
if (outcome === "ingested")
|
|
13
|
+
return "created";
|
|
14
|
+
if (outcome === "noop_unchanged")
|
|
15
|
+
return "unchanged";
|
|
16
|
+
return "failed";
|
|
17
|
+
}
|
|
18
|
+
// The real client. Closes over a CliConfig-compatible config (controlToken +
|
|
19
|
+
// intelUrl + auth) and reuses intelPost, so it inherits the intel auth fail-fast,
|
|
20
|
+
// trace/session headers, and timeout handling already proven on `mla kb add`.
|
|
21
|
+
function createIntelUpsertClient(cfg, post = http_1.intelPost) {
|
|
22
|
+
return {
|
|
23
|
+
async upsert(input) {
|
|
24
|
+
const body = {
|
|
25
|
+
workspaceId: input.workspaceId,
|
|
26
|
+
actor: input.actor,
|
|
27
|
+
captureMethod: exports.CAPTURE_METHOD,
|
|
28
|
+
bindingId: input.bindingId,
|
|
29
|
+
consentedAt: input.consentedAt,
|
|
30
|
+
provenance: exports.CAPTURE_METHOD, // advisory; the server derives the recorded value
|
|
31
|
+
profile: DEFAULT_PROFILE,
|
|
32
|
+
mode: "file",
|
|
33
|
+
documents: [
|
|
34
|
+
{
|
|
35
|
+
relPath: input.relPath,
|
|
36
|
+
content: input.content,
|
|
37
|
+
// Sent so the server can verify it received the exact bytes and echo
|
|
38
|
+
// its own sha256 back for the COMMIT-1 hash check.
|
|
39
|
+
contentSha256: input.contentHash,
|
|
40
|
+
},
|
|
41
|
+
],
|
|
42
|
+
};
|
|
43
|
+
let res;
|
|
44
|
+
try {
|
|
45
|
+
res = await post(cfg, "/internal/v1/kb/add", body);
|
|
46
|
+
}
|
|
47
|
+
catch (e) {
|
|
48
|
+
return {
|
|
49
|
+
ok: false,
|
|
50
|
+
outcome: "failed",
|
|
51
|
+
serverContentHash: null,
|
|
52
|
+
revisionId: null,
|
|
53
|
+
logicalSourceId: null,
|
|
54
|
+
reason: `upload_failed: ${e.message}`,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
const receipt = res.receipts?.[0];
|
|
58
|
+
if (!receipt) {
|
|
59
|
+
return {
|
|
60
|
+
ok: false,
|
|
61
|
+
outcome: "failed",
|
|
62
|
+
serverContentHash: null,
|
|
63
|
+
revisionId: null,
|
|
64
|
+
logicalSourceId: null,
|
|
65
|
+
reason: "no_receipt",
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
return {
|
|
69
|
+
ok: true,
|
|
70
|
+
outcome: mapOutcome(receipt.outcome),
|
|
71
|
+
serverContentHash: receipt.contentSha256 ?? null,
|
|
72
|
+
revisionId: receipt.revisionId ?? null,
|
|
73
|
+
logicalSourceId: receipt.documentId ?? null,
|
|
74
|
+
reason: receipt.reason ?? receipt.outcome ?? "",
|
|
75
|
+
};
|
|
76
|
+
},
|
|
77
|
+
async withdraw(input) {
|
|
78
|
+
const body = {
|
|
79
|
+
workspaceId: input.workspaceId,
|
|
80
|
+
actor: input.actor,
|
|
81
|
+
captureMethod: exports.CAPTURE_METHOD,
|
|
82
|
+
relPath: input.relPath,
|
|
83
|
+
reason: input.reason,
|
|
84
|
+
};
|
|
85
|
+
try {
|
|
86
|
+
const res = await post(cfg, "/internal/v1/kb/withdraw", body);
|
|
87
|
+
return {
|
|
88
|
+
ok: true,
|
|
89
|
+
withdrawn: res.withdrawn === true,
|
|
90
|
+
retiredPendingDerived: res.retiredPendingDerived ?? null,
|
|
91
|
+
reason: res.reason ?? "",
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
catch (e) {
|
|
95
|
+
return {
|
|
96
|
+
ok: false,
|
|
97
|
+
withdrawn: false,
|
|
98
|
+
retiredPendingDerived: null,
|
|
99
|
+
reason: `withdraw_failed: ${e.message}`,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|