@meetless/mla 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/cli.js +31 -5
- package/dist/commands/activate.js +39 -18
- package/dist/commands/agent-memory.js +333 -0
- package/dist/commands/enrich.js +211 -2
- package/dist/commands/internal-auto-index.js +64 -1
- package/dist/commands/internal-pretool-observe.js +86 -1
- package/dist/commands/internal-redact-capture.js +130 -0
- package/dist/commands/pilot.js +385 -0
- package/dist/lib/agent-memory-capture/binding.js +115 -0
- package/dist/lib/agent-memory-capture/classify.js +68 -0
- package/dist/lib/agent-memory-capture/collector.js +69 -0
- package/dist/lib/agent-memory-capture/containment.js +74 -0
- package/dist/lib/agent-memory-capture/ledger.js +43 -0
- package/dist/lib/agent-memory-capture/live-collector.js +148 -0
- package/dist/lib/agent-memory-capture/live-ledger.js +45 -0
- package/dist/lib/agent-memory-capture/live-pipeline.js +344 -0
- package/dist/lib/agent-memory-capture/lock.js +98 -0
- package/dist/lib/agent-memory-capture/paths.js +47 -0
- package/dist/lib/agent-memory-capture/pipeline.js +222 -0
- package/dist/lib/agent-memory-capture/report.js +131 -0
- package/dist/lib/agent-memory-capture/types.js +14 -0
- package/dist/lib/agent-memory-capture/upsert-client.js +104 -0
- package/dist/lib/analytics/enforcement-classify.js +65 -0
- package/dist/lib/analytics/enforcement-incident.js +83 -0
- package/dist/lib/analytics/envelope.js +55 -1
- package/dist/lib/analytics/pilot.js +313 -0
- package/dist/lib/enrichment/ingest.js +98 -13
- package/dist/lib/enrichment/materialize-rules.js +81 -0
- package/dist/lib/enrichment/plan.js +72 -15
- package/dist/lib/enrichment/protocol.js +85 -5
- package/dist/lib/enrichment/scout-brief.js +35 -6
- package/dist/lib/redactor.js +104 -1
- package/dist/lib/scanner/agent-memory.js +55 -4
- package/dist/lib/scanner/managed-rules.js +0 -0
- package/dist/lib/scanner/scan.js +52 -1
- package/dist/lib/scanner/score.js +41 -3
- package/dist/lib/scanner/scout-mission.js +9 -7
- package/dist/lib/upgrade-apply.js +30 -0
- package/dist/lib/wire.js +2 -0
- package/package.json +1 -1
package/dist/lib/redactor.js
CHANGED
|
@@ -6,8 +6,10 @@
|
|
|
6
6
|
// captured content. Cross-plane parity is locked by a shared fixture test
|
|
7
7
|
// (tools/meetless-agent/test/lib/redactor-parity.spec.ts).
|
|
8
8
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
-
exports.REDACTED = void 0;
|
|
9
|
+
exports.CREDENTIAL_RULE_IDS = exports.SECRET_SCANNER_VERSION = exports.REDACTED = void 0;
|
|
10
10
|
exports.redact = redact;
|
|
11
|
+
exports.scanForSecrets = scanForSecrets;
|
|
12
|
+
exports.scanForCredentials = scanForCredentials;
|
|
11
13
|
exports.redactPayload = redactPayload;
|
|
12
14
|
exports.REDACTED = "[REDACTED]";
|
|
13
15
|
// Order matters: env_assignment runs first so KEY=value pairs are redacted
|
|
@@ -73,6 +75,107 @@ function redact(text) {
|
|
|
73
75
|
out = out.replace(ENTROPY_TOKEN, (m) => (looksHighEntropy(m) ? exports.REDACTED : m));
|
|
74
76
|
return out;
|
|
75
77
|
}
|
|
78
|
+
// --- Block-on-detect secret scanner (SECRET-1) ---
|
|
79
|
+
//
|
|
80
|
+
// The agent-memory capture pipeline
|
|
81
|
+
// (notes/20260626-agent-memory-auto-capture-proposal.md) must BLOCK a file from
|
|
82
|
+
// leaving the machine when it contains a known high-risk secret, rather than
|
|
83
|
+
// silently redact-and-send. This reuses the parity-locked PATTERNS + entropy
|
|
84
|
+
// heuristic above for detection and adds directive-style secrets the
|
|
85
|
+
// substitution redactor does not carry.
|
|
86
|
+
//
|
|
87
|
+
// HONEST SCOPE (do not overstate to users): this blocks KNOWN secret PATTERNS
|
|
88
|
+
// locally; it is NOT a guarantee that "secrets cannot leave the machine." A
|
|
89
|
+
// novel or low-entropy credential can still pass. Returns the set of matched
|
|
90
|
+
// rule ids, sorted + de-duplicated; the matched secret text is NEVER returned,
|
|
91
|
+
// so a caller that logs findings cannot leak the secret. Empty array == clean.
|
|
92
|
+
// Directive-style secrets the substitution redactor intentionally omits (it
|
|
93
|
+
// substitutes; this one only blocks). requirepass/masterauth/masteruser are
|
|
94
|
+
// Redis/Sentinel config directives: a lowercase keyword + space + value, which
|
|
95
|
+
// the uppercase env_assignment pattern and the 32-char entropy gate both miss
|
|
96
|
+
// (e.g. an 8-char `requirepass <value>` slips past both).
|
|
97
|
+
const BLOCK_DIRECTIVE_PATTERNS = [
|
|
98
|
+
["redis_directive", /\b(requirepass|masterauth|masteruser)\s+('[^']*'|"[^"]*"|\S+)/gi],
|
|
99
|
+
];
|
|
100
|
+
// A pure-hex token (git SHA, content hash, digest) is not a secret, and the
|
|
101
|
+
// agent-memory corpus is dense with them. Excluding hex from the entropy block
|
|
102
|
+
// keeps the dry-run from blocking nearly every file on an incidental 40-char
|
|
103
|
+
// hash while still catching base64/mixed-class credential blobs.
|
|
104
|
+
function isHexToken(token) {
|
|
105
|
+
return /^[0-9a-f]+$/i.test(token);
|
|
106
|
+
}
|
|
107
|
+
// Bump when the block-on-detect pattern set or entropy heuristic changes. The
|
|
108
|
+
// capture ledger stores this alongside a blocked file so a policy upgrade
|
|
109
|
+
// re-evaluates content blocked under an older version (RETRY-2 for blocks).
|
|
110
|
+
exports.SECRET_SCANNER_VERSION = "2026-06-27.1";
|
|
111
|
+
function scanForSecrets(text) {
|
|
112
|
+
if (!text)
|
|
113
|
+
return [];
|
|
114
|
+
const hits = new Set();
|
|
115
|
+
for (const [name, pat] of PATTERNS) {
|
|
116
|
+
pat.lastIndex = 0;
|
|
117
|
+
if (pat.test(text))
|
|
118
|
+
hits.add(name);
|
|
119
|
+
}
|
|
120
|
+
for (const [name, pat] of BLOCK_DIRECTIVE_PATTERNS) {
|
|
121
|
+
pat.lastIndex = 0;
|
|
122
|
+
if (pat.test(text))
|
|
123
|
+
hits.add(name);
|
|
124
|
+
}
|
|
125
|
+
ENTROPY_TOKEN.lastIndex = 0;
|
|
126
|
+
let m;
|
|
127
|
+
while ((m = ENTROPY_TOKEN.exec(text)) !== null) {
|
|
128
|
+
const tok = m[0];
|
|
129
|
+
if (!isHexToken(tok) && looksHighEntropy(tok)) {
|
|
130
|
+
hits.add("high_entropy_token");
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return [...hits].sort();
|
|
135
|
+
}
|
|
136
|
+
// --- Pre-upload credential denylist (Phase 2A/2B, proposal §4/§6) ---
|
|
137
|
+
//
|
|
138
|
+
// The LIVE capture path (Phase 2A+) must withhold a file from upload when it
|
|
139
|
+
// carries a KNOWN, high-confidence credential FORMAT, because the real corpus
|
|
140
|
+
// contains a live credential (SECRET-1). This is DELIBERATELY NOT scanForSecrets:
|
|
141
|
+
// it excludes the generic Shannon-entropy heuristic, which over-blocked 99.2% of
|
|
142
|
+
// the corpus in the Phase 0A static audit and is explicitly rejected for the
|
|
143
|
+
// blocking path. It runs ONLY the precision-first format matchers: provider-token
|
|
144
|
+
// prefixes (sk-/ghp_/AKIA/...), Authorization headers (Bearer/Basic), cookies,
|
|
145
|
+
// PEM private-key blocks, the Redis `requirepass`/`masterauth`/`masteruser`
|
|
146
|
+
// directives, and credential-named env assignments.
|
|
147
|
+
//
|
|
148
|
+
// HONEST SCOPE (do not overstate to users): a clean result means "none of these
|
|
149
|
+
// known formats are present," NOT "no secret exists." A novel or unformatted
|
|
150
|
+
// credential can still pass; that is an accepted, documented limit (§4 SECRET-1).
|
|
151
|
+
// Returns the matched rule ids, sorted + de-duplicated; the secret text is NEVER
|
|
152
|
+
// returned. Empty array == clean (eligible for upload). Reuses the parity-safe
|
|
153
|
+
// PATTERNS + BLOCK_DIRECTIVE_PATTERNS so the block formats stay in lockstep with
|
|
154
|
+
// the observe-only scanner, minus entropy.
|
|
155
|
+
exports.CREDENTIAL_RULE_IDS = [
|
|
156
|
+
"env_assignment",
|
|
157
|
+
"bearer",
|
|
158
|
+
"provider_token",
|
|
159
|
+
"cookie",
|
|
160
|
+
"pem_key",
|
|
161
|
+
"redis_directive",
|
|
162
|
+
];
|
|
163
|
+
function scanForCredentials(text) {
|
|
164
|
+
if (!text)
|
|
165
|
+
return [];
|
|
166
|
+
const hits = new Set();
|
|
167
|
+
for (const [name, pat] of PATTERNS) {
|
|
168
|
+
pat.lastIndex = 0;
|
|
169
|
+
if (pat.test(text))
|
|
170
|
+
hits.add(name);
|
|
171
|
+
}
|
|
172
|
+
for (const [name, pat] of BLOCK_DIRECTIVE_PATTERNS) {
|
|
173
|
+
pat.lastIndex = 0;
|
|
174
|
+
if (pat.test(text))
|
|
175
|
+
hits.add(name);
|
|
176
|
+
}
|
|
177
|
+
return [...hits].sort();
|
|
178
|
+
}
|
|
76
179
|
function redactPayload(value) {
|
|
77
180
|
if (typeof value === "string")
|
|
78
181
|
return redact(value);
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DEFAULT_AGENT_MEMORY_PROVIDERS = exports.claudeCodeProvider = void 0;
|
|
3
4
|
exports.agentMemoryDir = agentMemoryDir;
|
|
4
5
|
exports.readAgentMemoryFiles = readAgentMemoryFiles;
|
|
5
6
|
exports.parseAgentMemoryDirectives = parseAgentMemoryDirectives;
|
|
7
|
+
exports.collectAgentMemoryFiles = collectAgentMemoryFiles;
|
|
6
8
|
exports.discoverAgentMemoryDirectives = discoverAgentMemoryDirectives;
|
|
7
9
|
// src/lib/scanner/agent-memory.ts
|
|
8
10
|
//
|
|
@@ -18,6 +20,7 @@ exports.discoverAgentMemoryDirectives = discoverAgentMemoryDirectives;
|
|
|
18
20
|
// "not attested" and can NEVER earn must-follow; it rides advisory until a human attests.
|
|
19
21
|
// `render.ts` already enforces this (must-follow requires `human_attested`), and
|
|
20
22
|
// `scanWorkspace` keeps these out of the auto-injected `confirmedRulesXml` pack entirely.
|
|
23
|
+
const node_crypto_1 = require("node:crypto");
|
|
21
24
|
const node_fs_1 = require("node:fs");
|
|
22
25
|
const node_os_1 = require("node:os");
|
|
23
26
|
const node_path_1 = require("node:path");
|
|
@@ -27,11 +30,27 @@ const frontmatter_1 = require("./frontmatter");
|
|
|
27
30
|
// Mirrors parse-directives.ts MUST_TOKENS so strength is consistent across sources.
|
|
28
31
|
const MUST_TOKENS = /\b(MUST|NEVER|ALWAYS|REQUIRED|DO NOT|DON'?T|FORBIDDEN|NON-NEGOTIABLE)\b/;
|
|
29
32
|
// Resolve the agent-memory dir for a workspace cwd. Replicates Claude Code's projects-dir
|
|
30
|
-
// encoding (slashes AND dots become dashes). `home` is injectable for tests.
|
|
33
|
+
// encoding (slashes AND dots become dashes). `home` is injectable for tests. This is an
|
|
34
|
+
// implementation detail of the Claude Code provider below, NOT a workspace identity: the
|
|
35
|
+
// same repo encodes to different dirs from its git root, a nested dir, a worktree, or a
|
|
36
|
+
// symlinked clone (memo Phase 2). Discovery keeps it behind the provider seam so the
|
|
37
|
+
// path convention never leaks into the workspace model.
|
|
31
38
|
function agentMemoryDir(cwd, home = (0, node_os_1.homedir)()) {
|
|
32
39
|
const encoded = cwd.replace(/[/.]/g, "-");
|
|
33
40
|
return (0, node_path_1.join)(home, ".claude", "projects", encoded, "memory");
|
|
34
41
|
}
|
|
42
|
+
// Claude Code: ~/.claude/projects/<encoded-cwd>/memory/.
|
|
43
|
+
exports.claudeCodeProvider = {
|
|
44
|
+
name: "claude-code",
|
|
45
|
+
memoryDirs(searchPath, home) {
|
|
46
|
+
return [agentMemoryDir(searchPath, home)];
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
// The default provider set. Today only Claude Code; the list is the extension point.
|
|
50
|
+
exports.DEFAULT_AGENT_MEMORY_PROVIDERS = [exports.claudeCodeProvider];
|
|
51
|
+
function contentFingerprint(text) {
|
|
52
|
+
return (0, node_crypto_1.createHash)("sha256").update(text, "utf8").digest("hex");
|
|
53
|
+
}
|
|
35
54
|
// Read the `feedback_*.md` topic files (the "rules the user gave" bucket) from an
|
|
36
55
|
// agent-memory dir, sorted for a stable/diffable worklist. Fails open to []: a missing
|
|
37
56
|
// dir (fresh machine, no prior agent memory) is the common case and must never abort the
|
|
@@ -92,8 +111,40 @@ function parseAgentMemoryDirectives(files) {
|
|
|
92
111
|
// surface in full; bounding scan-time I/O only matters at the absurd end. Scan runs on
|
|
93
112
|
// `mla activate`, not the per-Write hot path, so reading a few hundred small files is cheap.
|
|
94
113
|
const DEFAULT_AGENT_MEMORY_CAP = 500;
|
|
95
|
-
//
|
|
114
|
+
// Collect feedback memory files for a workspace across every provider and search path,
|
|
115
|
+
// deduped by CONTENT fingerprint: the same memory found under two encoded paths (e.g. the
|
|
116
|
+
// repo opened once at its root and once at a nested dir) collapses to a single entry,
|
|
117
|
+
// tagged with the first provider/path that surfaced it. Missing memory is a harmless
|
|
118
|
+
// zero-result. Sorted by name (then sourcePath) for a stable, diffable worklist.
|
|
119
|
+
function collectAgentMemoryFiles(cwd, home = (0, node_os_1.homedir)(), opts = {}) {
|
|
120
|
+
const providers = opts.providers ?? exports.DEFAULT_AGENT_MEMORY_PROVIDERS;
|
|
121
|
+
// Active session path + canonical root, order-preserving and de-duplicated.
|
|
122
|
+
const searchPaths = [];
|
|
123
|
+
for (const p of [cwd, opts.canonicalRoot]) {
|
|
124
|
+
if (p && !searchPaths.includes(p))
|
|
125
|
+
searchPaths.push(p);
|
|
126
|
+
}
|
|
127
|
+
const seen = new Set();
|
|
128
|
+
const out = [];
|
|
129
|
+
for (const provider of providers) {
|
|
130
|
+
for (const searchPath of searchPaths) {
|
|
131
|
+
for (const dir of provider.memoryDirs(searchPath, home)) {
|
|
132
|
+
for (const f of readAgentMemoryFiles(dir)) {
|
|
133
|
+
const fp = contentFingerprint(f.text);
|
|
134
|
+
if (seen.has(fp))
|
|
135
|
+
continue;
|
|
136
|
+
seen.add(fp);
|
|
137
|
+
out.push({ ...f, provider: provider.name, sourcePath: (0, node_path_1.join)(dir, f.name) });
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return out.sort((a, b) => a.name.localeCompare(b.name) || (a.sourcePath ?? "").localeCompare(b.sourcePath ?? ""));
|
|
143
|
+
}
|
|
144
|
+
// Discover + parse the agent-memory rules for a workspace. The advisory worklist is
|
|
96
145
|
// surfaced for review, never bulk-injected. Fully fail-open via readAgentMemoryFiles.
|
|
97
|
-
|
|
98
|
-
|
|
146
|
+
// The default 3-arg call (cwd, home, cap) is preserved; pass opts to search a canonical
|
|
147
|
+
// root or a custom provider set.
|
|
148
|
+
function discoverAgentMemoryDirectives(cwd, home = (0, node_os_1.homedir)(), cap = DEFAULT_AGENT_MEMORY_CAP, opts = {}) {
|
|
149
|
+
return parseAgentMemoryDirectives(collectAgentMemoryFiles(cwd, home, opts)).slice(0, cap);
|
|
99
150
|
}
|
|
Binary file
|
package/dist/lib/scanner/scan.js
CHANGED
|
@@ -12,6 +12,7 @@ const parse_directives_1 = require("./parse-directives");
|
|
|
12
12
|
const parse_structured_1 = require("./parse-structured");
|
|
13
13
|
const render_1 = require("./render");
|
|
14
14
|
const agent_memory_1 = require("./agent-memory");
|
|
15
|
+
const managed_rules_1 = require("./managed-rules");
|
|
15
16
|
const MAX_FILE_BYTES = 256 * 1024; // skip large files for the free pass
|
|
16
17
|
function scanWorkspace(cwd, opts) {
|
|
17
18
|
const tracked = gitLsFiles(cwd);
|
|
@@ -21,6 +22,11 @@ function scanWorkspace(cwd, opts) {
|
|
|
21
22
|
let decisionDocs = 0;
|
|
22
23
|
let legacyNotes = 0;
|
|
23
24
|
for (const rel of tracked) {
|
|
25
|
+
// The mla-managed rule file is handled by its own parser below (read directly from disk
|
|
26
|
+
// so it is effective before it is committed). Skip it here so it is not also processed as
|
|
27
|
+
// a generic T2 prose doc, which would double-count it and run stale detection on it.
|
|
28
|
+
if (rel === managed_rules_1.MANAGED_RULES_PATH)
|
|
29
|
+
continue;
|
|
24
30
|
const tier = (0, score_1.classifyTier)(rel);
|
|
25
31
|
if (!tier)
|
|
26
32
|
continue;
|
|
@@ -70,6 +76,14 @@ function scanWorkspace(cwd, opts) {
|
|
|
70
76
|
seenSources.add(s.source);
|
|
71
77
|
return true;
|
|
72
78
|
});
|
|
79
|
+
// The mla-managed rule file (.meetless/rules.md) is the canonical source of durable repo
|
|
80
|
+
// policy (memo Phase 1). Read it DIRECTLY from disk (not via git ls-files) so a rule is
|
|
81
|
+
// effective locally the moment it is written, before the human commits and pushes to share
|
|
82
|
+
// it. Every managed rule is human_attested, so a MUST_FOLLOW one earns must-follow injection.
|
|
83
|
+
// Searched at the active path and the canonical root, the same as agent-memory, and folded
|
|
84
|
+
// into the directive list BEFORE dedupe so it participates in authority ranking.
|
|
85
|
+
const canonicalRoot = gitToplevel(cwd);
|
|
86
|
+
directives.push(...readManagedDirectives(cwd, canonicalRoot));
|
|
73
87
|
// Collapse the same rule attested by multiple instruction files into one, so
|
|
74
88
|
// the stored array, the reported rule count, and the grounding pack all agree
|
|
75
89
|
// on distinct rules rather than per-file occurrences.
|
|
@@ -80,7 +94,13 @@ function scanWorkspace(cwd, opts) {
|
|
|
80
94
|
// human review and deliberately excluded from `confirmedRulesXml` (never auto-injected as
|
|
81
95
|
// must-follow): untracked => not attested => ingest is not accept.
|
|
82
96
|
const committedTexts = new Set(dedupedDirectives.map((d) => d.text));
|
|
83
|
-
|
|
97
|
+
// Search the active session path AND the canonical repo root: the same repo opened at a
|
|
98
|
+
// nested dir or a worktree encodes to a different agent-memory dir, so binding identity to
|
|
99
|
+
// a single path would silently miss memory (memo Phase 2). Discovery dedupes by content.
|
|
100
|
+
// canonicalRoot was resolved above for the managed-rules read; reuse it.
|
|
101
|
+
const advisoryDirectives = (0, agent_memory_1.discoverAgentMemoryDirectives)(cwd, opts.home, undefined, {
|
|
102
|
+
canonicalRoot,
|
|
103
|
+
}).filter((d) => !committedTexts.has(d.text));
|
|
84
104
|
const inventory = {
|
|
85
105
|
instructionFiles,
|
|
86
106
|
decisionDocs,
|
|
@@ -112,6 +132,37 @@ function gitLsFiles(cwd) {
|
|
|
112
132
|
return [];
|
|
113
133
|
}
|
|
114
134
|
}
|
|
135
|
+
// The canonical repo root (git toplevel), or undefined outside a repo. Used only to widen
|
|
136
|
+
// agent-memory discovery beyond the active session path; it does NOT define workspace identity.
|
|
137
|
+
function gitToplevel(cwd) {
|
|
138
|
+
try {
|
|
139
|
+
const top = (0, node_child_process_1.execFileSync)("git", ["rev-parse", "--show-toplevel"], { cwd, encoding: "utf8" }).trim();
|
|
140
|
+
return top || undefined;
|
|
141
|
+
}
|
|
142
|
+
catch {
|
|
143
|
+
return undefined;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// Read + parse the mla-managed rule file from disk at the active path and (if distinct) the
|
|
147
|
+
// canonical root, merging both into one human_attested directive set. Reading from disk (not
|
|
148
|
+
// git) is deliberate: a freshly written rule is effective locally before it is committed.
|
|
149
|
+
// Missing file is a harmless zero-result. The two reads are deduped by rule id inside
|
|
150
|
+
// parseManagedRules/managedRulesToDirectives, so an uncommitted-vs-committed copy collapses.
|
|
151
|
+
function readManagedDirectives(cwd, canonicalRoot) {
|
|
152
|
+
const roots = [cwd, canonicalRoot].filter((r) => Boolean(r));
|
|
153
|
+
const seen = new Set();
|
|
154
|
+
const dirs = [];
|
|
155
|
+
for (const root of roots) {
|
|
156
|
+
if (seen.has(root))
|
|
157
|
+
continue;
|
|
158
|
+
seen.add(root);
|
|
159
|
+
const text = safeRead((0, node_path_1.join)(root, managed_rules_1.MANAGED_RULES_PATH));
|
|
160
|
+
if (text === null)
|
|
161
|
+
continue;
|
|
162
|
+
dirs.push(...(0, managed_rules_1.managedRulesToDirectives)((0, managed_rules_1.parseManagedRules)(text)));
|
|
163
|
+
}
|
|
164
|
+
return dirs;
|
|
165
|
+
}
|
|
115
166
|
function gitHead(cwd) {
|
|
116
167
|
try {
|
|
117
168
|
return (0, node_child_process_1.execFileSync)("git", ["rev-parse", "HEAD"], { cwd, encoding: "utf8" }).trim();
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.isCuratedDoc = isCuratedDoc;
|
|
3
4
|
exports.classifyTier = classifyTier;
|
|
4
5
|
exports.isInstructionFile = isInstructionFile;
|
|
5
6
|
// Agent-instruction filenames matched by BASENAME, not full path: real monorepos
|
|
@@ -9,23 +10,60 @@ exports.isInstructionFile = isInstructionFile;
|
|
|
9
10
|
const T1_BASENAMES = new Set([
|
|
10
11
|
"CLAUDE.md", "AGENTS.md", "GEMINI.md", "memory.md", "copilot-instructions.md",
|
|
11
12
|
]);
|
|
12
|
-
|
|
13
|
+
// Curated decision / instruction-adjacent docs: known high-signal BASENAMES plus
|
|
14
|
+
// decision-record directories. Basenames are matched by basename (not full path) for
|
|
15
|
+
// the SAME reason as T1_BASENAMES above: a monorepo keeps a README.md / ARCHITECTURE.md
|
|
16
|
+
// per package, and full-path matching would treat every nested one as anonymous prose.
|
|
17
|
+
// These are still tier T2, but `isCuratedDoc` lets the enrichment ranker float them
|
|
18
|
+
// above generic prose so a tight target budget is not spent on arbitrary marketing .md
|
|
19
|
+
// while a real ADR or package README is crowded out (plan §5b).
|
|
20
|
+
const T2_BASENAMES = new Set(["README.md", "README", "ARCHITECTURE.md", "CONTRIBUTING.md"]);
|
|
13
21
|
const T2_DIRS = ["docs/adr/", "docs/rfc/", "docs/decisions/", "docs/specs/", "docs/runbooks/"];
|
|
14
22
|
const T3_NAMES = new Set(["package.json", "prisma/schema.prisma", "docker-compose.yml", ".env.example"]);
|
|
15
23
|
const DENY_EXT = /\.(ts|tsx|js|jsx|py|go|rs|java|lock|map|png|jpg|svg|snap)$/i;
|
|
16
24
|
const DENY_NAME = /(^|\/)(pnpm-lock\.yaml|package-lock\.json|yarn\.lock)$/i;
|
|
25
|
+
// Generated / vendored output directories. Even when a repo commits them (e.g. a
|
|
26
|
+
// Forge app that checks in its webpack bundle), nothing under them is a governance
|
|
27
|
+
// doc: it is third-party or machine-emitted. Match the segment anywhere in the path
|
|
28
|
+
// so a committed `apps/x/build/static/...` is excluded the same as a root `dist/`.
|
|
29
|
+
const DENY_DIR = /(^|\/)(node_modules|dist|build|out|coverage|vendor|\.next|\.nuxt|\.svelte-kit|\.turbo|\.cache|\.output)\//i;
|
|
30
|
+
// Minified-bundle license sidecars (terser/webpack emit `<chunk>.js.LICENSE.txt`):
|
|
31
|
+
// pure third-party license boilerplate, never a governance doc, and they can sit
|
|
32
|
+
// outside a build dir when an app serves its bundle from a tracked static folder.
|
|
33
|
+
const DENY_GENERATED = /\.LICENSE\.txt$/i;
|
|
34
|
+
// Test / eval / fixture corpora. These trees hold test data, eval benchmarks, and
|
|
35
|
+
// deliberately-broken fixtures (e.g. an eval's `broken_outputs/*.txt` are MALFORMED
|
|
36
|
+
// answers used to test the harness): reading them as governance docs both drowns the
|
|
37
|
+
// real docs and risks minting false claims from poison. The intel repo proved it: 171
|
|
38
|
+
// of 179 tracked .md files live under `evals/`, crowding a real `notes/` doc out of
|
|
39
|
+
// the top 20. Match whole path SEGMENTS so `latest/` or a `docs/testing-policy.md` are
|
|
40
|
+
// untouched. Deliberately EXCLUDES `spec`/`specs` (overloaded: `docs/specs/` is a
|
|
41
|
+
// governance T2_DIR, and OpenAPI/spec docs are real). Widen only on real dogfood need.
|
|
42
|
+
const DENY_TESTDIR = /(^|\/)(evals?|tests?|__tests__|e2e|fixtures|__fixtures__|testdata|test[-_]data|__mocks__|__snapshots__)\//i;
|
|
17
43
|
function basename(p) {
|
|
18
44
|
const i = p.lastIndexOf("/");
|
|
19
45
|
return i >= 0 ? p.slice(i + 1) : p;
|
|
20
46
|
}
|
|
47
|
+
// A curated T2 doc: a known high-signal doc name (at any depth) or a file under a
|
|
48
|
+
// decision-record directory, as opposed to arbitrary prose that merely ends in .md.
|
|
49
|
+
// Used by the enrichment ranker to order curated docs above generic prose within T2.
|
|
50
|
+
// Pure name/path test: it does NOT run the DENY checks, so call it only on a path
|
|
51
|
+
// `classifyTier` has already accepted (the deny gates ran there first).
|
|
52
|
+
function isCuratedDoc(p) {
|
|
53
|
+
return T2_BASENAMES.has(basename(p)) || T2_DIRS.some((d) => p.startsWith(d));
|
|
54
|
+
}
|
|
21
55
|
function classifyTier(p) {
|
|
22
|
-
if (DENY_NAME.test(p) ||
|
|
56
|
+
if (DENY_NAME.test(p) ||
|
|
57
|
+
DENY_EXT.test(p) ||
|
|
58
|
+
DENY_DIR.test(p) ||
|
|
59
|
+
DENY_GENERATED.test(p) ||
|
|
60
|
+
DENY_TESTDIR.test(p))
|
|
23
61
|
return null;
|
|
24
62
|
if (T1_BASENAMES.has(basename(p)) || p.startsWith(".claude/rules/") || p.startsWith(".cursor/rules/"))
|
|
25
63
|
return "T1";
|
|
26
64
|
if (p.startsWith("notes/"))
|
|
27
65
|
return "T4";
|
|
28
|
-
if (
|
|
66
|
+
if (isCuratedDoc(p))
|
|
29
67
|
return "T2";
|
|
30
68
|
if (T3_NAMES.has(p) || p.startsWith(".github/workflows/"))
|
|
31
69
|
return "T3";
|
|
@@ -71,12 +71,13 @@ function renderCategories(policy) {
|
|
|
71
71
|
return policy.categories.map((c) => ` • ${c.kind}: ${c.gloss}`);
|
|
72
72
|
}
|
|
73
73
|
/**
|
|
74
|
-
* The default `fast` tier invitation to go
|
|
75
|
-
*
|
|
76
|
-
*
|
|
77
|
-
*
|
|
78
|
-
*
|
|
79
|
-
* When there is nothing deep to scout, return null so the bundle
|
|
74
|
+
* The default `fast` tier invitation to go deeper. When the deterministic pass left
|
|
75
|
+
* deep docs unread (decision/spec docs or legacy notes it could only count), return a
|
|
76
|
+
* one-line nudge naming the consolidated `/mla onboard` flow and quantifying the
|
|
77
|
+
* unread surface, so the deeper read is discoverable without reading `--help`
|
|
78
|
+
* (notes/20260624-mla-new-user-value-and-brownfield-proof.md, Phase 2: one public
|
|
79
|
+
* onboarding flow). When there is nothing deep to scout, return null so the bundle
|
|
80
|
+
* does not nag.
|
|
80
81
|
*/
|
|
81
82
|
function renderAgenticInvitation(scan) {
|
|
82
83
|
const docs = scan.inventory.decisionDocs;
|
|
@@ -86,7 +87,8 @@ function renderAgenticInvitation(scan) {
|
|
|
86
87
|
}
|
|
87
88
|
return (`Deeper docs went unread in this fast pass ` +
|
|
88
89
|
`(${pluralize(docs, "decision/spec doc")}, ${pluralize(notes, "legacy note")}). ` +
|
|
89
|
-
"Run
|
|
90
|
+
"Run `/mla onboard` inside a Claude Code session to dispatch two read-only scouts " +
|
|
91
|
+
"that dig into them and surface candidates born PENDING for review.");
|
|
90
92
|
}
|
|
91
93
|
/**
|
|
92
94
|
* Render the agentic scout mission for `mla activate --bootstrap agentic`. Pure
|
|
@@ -37,6 +37,7 @@ exports.REEXEC_GUARD_ENV = void 0;
|
|
|
37
37
|
exports.stateFilePath = stateFilePath;
|
|
38
38
|
exports.readUpdateState = readUpdateState;
|
|
39
39
|
exports.writeUpdateState = writeUpdateState;
|
|
40
|
+
exports.stampLatestFromManifest = stampLatestFromManifest;
|
|
40
41
|
exports.liveBinaryPath = liveBinaryPath;
|
|
41
42
|
exports.prevBinaryPath = prevBinaryPath;
|
|
42
43
|
exports.stagedDir = stagedDir;
|
|
@@ -105,6 +106,30 @@ function writeUpdateState(state) {
|
|
|
105
106
|
// best-effort; a read-only HOME just means we re-check next time.
|
|
106
107
|
}
|
|
107
108
|
}
|
|
109
|
+
// Persist the latest published version + minimum-supported floor learned from a
|
|
110
|
+
// freshly VERIFIED manifest into the update cache. The passive nag reads ONLY the
|
|
111
|
+
// cache; a foreground `mla upgrade [--check]` fetches the manifest LIVE. Without
|
|
112
|
+
// this, the two disagree: `--check` can see a new release while the throttled
|
|
113
|
+
// background check leaves the cache stale, so the nag never fires (it has nothing
|
|
114
|
+
// newer to report). Stamping here makes any foreground upgrade path refresh the
|
|
115
|
+
// cache as a side effect, so the manual and passive surfaces always agree.
|
|
116
|
+
//
|
|
117
|
+
// Preserves lastCheckedAt (the background-check throttle is a separate concern)
|
|
118
|
+
// and any staged binary. Best-effort: a cache write must never fail an otherwise
|
|
119
|
+
// successful upgrade command.
|
|
120
|
+
function stampLatestFromManifest(manifest) {
|
|
121
|
+
try {
|
|
122
|
+
const prev = readUpdateState();
|
|
123
|
+
writeUpdateState({
|
|
124
|
+
...prev,
|
|
125
|
+
latestVersion: manifest.version,
|
|
126
|
+
minVersion: manifest.minVersion,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
// never let a cache write break `mla upgrade`
|
|
131
|
+
}
|
|
132
|
+
}
|
|
108
133
|
// --- filesystem layout -------------------------------------------------------
|
|
109
134
|
// The curl install puts the binary here (install.sh: $HOME/.meetless/bin/mla),
|
|
110
135
|
// as a plain file (no symlink), so rename(2) over this path atomically swaps the
|
|
@@ -574,6 +599,11 @@ async function runUpgrade(opts) {
|
|
|
574
599
|
return 1;
|
|
575
600
|
}
|
|
576
601
|
const manifest = verified.manifest;
|
|
602
|
+
// We just verified the authoritative "latest" pointer live. Refresh the cache
|
|
603
|
+
// the passive nag reads BEFORE any of the early returns below (--check,
|
|
604
|
+
// up-to-date, no-artifact, ...), so a manual `mla upgrade --check` un-sticks a
|
|
605
|
+
// stale nag instead of discarding what it learned.
|
|
606
|
+
stampLatestFromManifest(manifest);
|
|
577
607
|
const triple = (0, update_check_1.currentTriple)(process.platform, process.arch);
|
|
578
608
|
const plan = (0, update_check_1.planUpgrade)({ current, manifest, triple, force: args.force });
|
|
579
609
|
switch (plan.action) {
|
package/dist/lib/wire.js
CHANGED
|
@@ -856,6 +856,8 @@ There are exactly two scouts: \`documentation\` and \`history\`. For each role:
|
|
|
856
856
|
c. The subagent returns exactly one JSON object (a scout result). Capture it verbatim. If it wrapped the JSON in prose, extract only the JSON object.
|
|
857
857
|
Do NOT pass a scout anything other than the brief from step 2a. The brief is the exact contract \`enrich ingest\` validates against; adding your own files or instructions breaks that contract. Do NOT edit a scout's returned JSON.
|
|
858
858
|
|
|
859
|
+
If a dispatch fails with "Agent type '${scout_brief_1.SCOUT_AGENT_NAME.documentation}' (or '${scout_brief_1.SCOUT_AGENT_NAME.history}') not found", do NOT fall back to \`general-purpose\` or any other agent: the scouts' tool boundary (doc scout = Read only; history scout = no tools) is enforced by those subagent definitions, and a substitute would run the scout with the wrong capabilities. This failure means the scout agents were installed (by \`mla init\`/\`mla rewire\`) AFTER this Claude Code session started, and Claude Code loads agent definitions only at session start. Stop and tell An: the scout agents are installed but not yet loaded; restart Claude Code (or open a new session), then re-run \`/mla onboard\`. The run record from Step 1 is durable, so nothing is lost.
|
|
860
|
+
|
|
859
861
|
Step 3: Ingest.
|
|
860
862
|
Assemble one JSON object: \`{"runId": "<runId>", "results": [<documentation result>, <history result>]}\`. Write it to a temporary file (for example \`/tmp/mla-onboard-<runId>.json\`) with the Write tool, then run \`mla enrich ingest --run-id <runId> --results-file <that file>\`. Print its summary verbatim. It reports, per scout, how many candidates were accepted, rejected, and persisted born PENDING.
|
|
861
863
|
|