@pugi/cli 0.1.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +172 -0
- package/bin/run.js +2 -0
- package/dist/commands/jobs.js +245 -0
- package/dist/core/agents/loader.js +104 -0
- package/dist/core/agents/registry.js +69 -0
- package/dist/core/auto-open-browser.js +128 -0
- package/dist/core/bash-classifier.js +1001 -0
- package/dist/core/clipboard.js +70 -0
- package/dist/core/context/builder.js +114 -0
- package/dist/core/context/compaction-events.js +99 -0
- package/dist/core/context/compaction.js +602 -0
- package/dist/core/context/invariants.js +250 -0
- package/dist/core/context/markdown-loader.js +270 -0
- package/dist/core/credentials.js +355 -0
- package/dist/core/engine/adapter-runner.js +8 -0
- package/dist/core/engine/anvil-client.js +156 -0
- package/dist/core/engine/compaction-hook.js +154 -0
- package/dist/core/engine/index.js +12 -0
- package/dist/core/engine/native-pugi.js +369 -0
- package/dist/core/engine/noop.js +27 -0
- package/dist/core/engine/prompts.js +118 -0
- package/dist/core/engine/tool-bridge.js +313 -0
- package/dist/core/file-cache.js +29 -0
- package/dist/core/hooks.js +415 -0
- package/dist/core/index-store.js +260 -0
- package/dist/core/jobs/registry.js +462 -0
- package/dist/core/mcp/client.js +316 -0
- package/dist/core/mcp/registry.js +171 -0
- package/dist/core/mcp/trust.js +91 -0
- package/dist/core/path-security.js +63 -0
- package/dist/core/permission.js +309 -0
- package/dist/core/repl/cap-warning.js +91 -0
- package/dist/core/repl/clipboard-read.js +174 -0
- package/dist/core/repl/history-search.js +175 -0
- package/dist/core/repl/history.js +172 -0
- package/dist/core/repl/kill-ring.js +138 -0
- package/dist/core/repl/session.js +618 -0
- package/dist/core/repl/slash-commands.js +227 -0
- package/dist/core/repl/workspace-context.js +113 -0
- package/dist/core/session.js +258 -0
- package/dist/core/settings.js +59 -0
- package/dist/core/skills/loader.js +454 -0
- package/dist/core/skills/sources.js +480 -0
- package/dist/core/skills/trust.js +172 -0
- package/dist/core/subagents/dispatcher.js +258 -0
- package/dist/core/subagents/index.js +26 -0
- package/dist/core/subagents/spawn.js +86 -0
- package/dist/core/trust.js +109 -0
- package/dist/index.js +8 -0
- package/dist/runtime/cli.js +3405 -0
- package/dist/runtime/commands/agents.js +385 -0
- package/dist/runtime/commands/budget.js +192 -0
- package/dist/runtime/commands/config.js +231 -0
- package/dist/runtime/commands/privacy.js +107 -0
- package/dist/runtime/commands/skills.js +401 -0
- package/dist/runtime/commands/undo.js +329 -0
- package/dist/runtime/update-check.js +294 -0
- package/dist/tools/bash.js +660 -0
- package/dist/tools/file-tools.js +346 -0
- package/dist/tools/registry.js +25 -0
- package/dist/tools/web-fetch.js +535 -0
- package/dist/tui/agent-tree.js +66 -0
- package/dist/tui/conversation-pane.js +45 -0
- package/dist/tui/device-flow.js +142 -0
- package/dist/tui/input-box.js +474 -0
- package/dist/tui/login-picker.js +69 -0
- package/dist/tui/render.js +125 -0
- package/dist/tui/repl-render.js +240 -0
- package/dist/tui/repl-splash-art.js +64 -0
- package/dist/tui/repl-splash.js +111 -0
- package/dist/tui/repl.js +214 -0
- package/dist/tui/slash-palette.js +106 -0
- package/dist/tui/splash-data.js +61 -0
- package/dist/tui/splash.js +31 -0
- package/dist/tui/status-bar.js +71 -0
- package/dist/tui/update-banner.js +8 -0
- package/dist/tui/workspace-context.js +105 -0
- package/package.json +71 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compaction invariants — the safety belt that decides whether a
|
|
3
|
+
* compaction result is allowed to replace the original transcript.
|
|
4
|
+
*
|
|
5
|
+
* Per pattern card §2:
|
|
6
|
+
* - principle 4: never summarize secrets into durable memory
|
|
7
|
+
* - principle 5: do not erase open decisions
|
|
8
|
+
* - principle 6: cache stable prompt parts (static hash must survive
|
|
9
|
+
* compaction unchanged)
|
|
10
|
+
*
|
|
11
|
+
* Plus our own physical-integrity invariant: artifact refs emitted by
|
|
12
|
+
* the compaction must point to files that exist on disk and match the
|
|
13
|
+
* SHA256 stored in the ref.
|
|
14
|
+
*
|
|
15
|
+
* Violations fire `compaction.invariant_violated` events upstream; the
|
|
16
|
+
* engine loop discards the compaction result and tries again on the
|
|
17
|
+
* next turn.
|
|
18
|
+
*/
|
|
19
|
+
import { createHash } from 'node:crypto';
|
|
20
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
21
|
+
/**
|
|
22
|
+
* Regex sweep for credential-shaped substrings. Errs on the side of
|
|
23
|
+
* over-reporting; a false positive aborts the compaction (the engine
|
|
24
|
+
* keeps the original transcript and the operator pays the latency
|
|
25
|
+
* cost of one extra turn, which is the right trade-off for secrets).
|
|
26
|
+
*
|
|
27
|
+
* Coverage spans two shapes:
|
|
28
|
+
*
|
|
29
|
+
* 1. Cooperative `keyword = value` / `keyword: value` pairs covering
|
|
30
|
+
* api_key, access_token, password, client_secret, private_key,
|
|
31
|
+
* .env.* etc.
|
|
32
|
+
* 2. Provider-specific bare tokens that travel without a keyword:
|
|
33
|
+
* Bearer JWTs, AWS access keys, GitHub PATs, Slack tokens, Stripe
|
|
34
|
+
* keys, Anthropic/OpenAI keys, and PEM private key blocks.
|
|
35
|
+
*
|
|
36
|
+
* High-entropy base64-shaped blobs after a `:` or `=` are also caught
|
|
37
|
+
* as a defence in depth — operators who exfiltrate keys via raw JSON
|
|
38
|
+
* blobs (`{"x":"abc...40chars..."}`) are still surfaced.
|
|
39
|
+
*
|
|
40
|
+
* Each pattern is tried in turn by `findSecrets`; the first match per
|
|
41
|
+
* pattern is reported (with redaction) and the compaction is aborted.
|
|
42
|
+
*/
|
|
43
|
+
const SECRET_PATTERNS = [
|
|
44
|
+
// 1. Keyword=value / keyword: value — the cooperative shape.
|
|
45
|
+
/(api[_-]?key|access[_-]?token|id[_-]?token|password|passwd|client[_-]?secret|private[_-]?key|\.env\.[A-Z_]+|(?<![a-z])token|(?<![a-z])secret)\s*[:=]\s*\S+/gi,
|
|
46
|
+
// 2. Authorization: Bearer <jwt-or-opaque-token>. JWTs are eyJ... but
|
|
47
|
+
// we accept any non-whitespace token after Bearer so opaque bearer
|
|
48
|
+
// tokens are also caught.
|
|
49
|
+
/Authorization\s*:\s*Bearer\s+\S+/gi,
|
|
50
|
+
/\bBearer\s+(?:eyJ[A-Za-z0-9_\-.]{16,}|[A-Za-z0-9_\-.]{20,})/g,
|
|
51
|
+
// 3. AWS access keys. AKIA prefix is the long-lived IAM key shape;
|
|
52
|
+
// aws_access_key_id is the typical .aws/credentials shape.
|
|
53
|
+
/\bAKIA[0-9A-Z]{16}\b/g,
|
|
54
|
+
/aws_access_key_id\s*[:=]\s*\S+/gi,
|
|
55
|
+
/aws_secret_access_key\s*[:=]\s*\S+/gi,
|
|
56
|
+
// 4. GitHub PATs and app tokens.
|
|
57
|
+
/\bghp_[A-Za-z0-9]{36}\b/g,
|
|
58
|
+
/\bgho_[A-Za-z0-9]{36}\b/g,
|
|
59
|
+
/\bghs_[A-Za-z0-9]{36}\b/g,
|
|
60
|
+
/\bghu_[A-Za-z0-9]{36}\b/g,
|
|
61
|
+
/\bghr_[A-Za-z0-9]{36}\b/g,
|
|
62
|
+
/\bgithub_pat_[A-Za-z0-9_]{22,}\b/g,
|
|
63
|
+
// 5. Slack tokens (xoxa / xoxb / xoxp / xoxr / xoxs / xoxe).
|
|
64
|
+
/\bxox[abprse]-[A-Za-z0-9-]{10,}/g,
|
|
65
|
+
// 6. Stripe live/test secret keys.
|
|
66
|
+
/\bsk_(?:live|test)_[A-Za-z0-9]{24,}\b/g,
|
|
67
|
+
// 7. Anthropic and OpenAI API keys. Anthropic uses sk-ant-<32+chars>;
|
|
68
|
+
// OpenAI legacy keys are sk-<40+chars>. Both share the sk- prefix
|
|
69
|
+
// so we keep them in their own patterns to avoid catching every
|
|
70
|
+
// Stripe sk_ as well (Stripe uses an underscore, not a dash).
|
|
71
|
+
/\bsk-ant-[A-Za-z0-9_-]{32,}\b/g,
|
|
72
|
+
/\bsk-(?!ant-)[A-Za-z0-9]{40,}\b/g,
|
|
73
|
+
// 8. PEM-encoded private key blocks. Matches RSA / EC / DSA / OPENSSH
|
|
74
|
+
// and the bare PRIVATE KEY variant. The body may contain real
|
|
75
|
+
// newlines (when scanning raw transcript content) OR literal `\n`
|
|
76
|
+
// sequences (when scanning JSON.stringify'd summaries) — both are
|
|
77
|
+
// covered by the broad character class.
|
|
78
|
+
/-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED |)PRIVATE KEY-----[\s\S\\n]*?-----END (?:RSA |EC |DSA |OPENSSH |ENCRYPTED |)PRIVATE KEY-----/g,
|
|
79
|
+
// 8b. Lone PEM begin/end markers — survive even when the body is too
|
|
80
|
+
// long to capture or escaped beyond recognition.
|
|
81
|
+
/-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED |)PRIVATE KEY-----/g,
|
|
82
|
+
// 9. High-entropy base64-shaped blobs after a colon or equals. 40+ chars
|
|
83
|
+
// of the base64url alphabet is well above the ~6 bytes/char threshold
|
|
84
|
+
// where false positives become rare. Word-boundary anchored so prose
|
|
85
|
+
// is not swept up.
|
|
86
|
+
/[:=]\s*"?([A-Za-z0-9_\-+/]{40,}={0,2})"?(?=\s|,|\}|$)/g,
|
|
87
|
+
];
|
|
88
|
+
/**
|
|
89
|
+
* Decision markers we promise to preserve verbatim across compaction.
|
|
90
|
+
* Line-anchored so a decision quoted in chat (`> DECISION: ...`)
|
|
91
|
+
* survives, but a casual mention does not falsely trigger.
|
|
92
|
+
*/
|
|
93
|
+
const DECISION_RX = /^\s*(?:DECISION|OPEN|BLOCKED|REJECTED):/;
|
|
94
|
+
/**
|
|
95
|
+
* Compare invariant-relevant state before and after compaction. Returns
|
|
96
|
+
* an empty array when the compaction is safe to commit. Any returned
|
|
97
|
+
* violation must cause the engine to drop the compaction result.
|
|
98
|
+
*
|
|
99
|
+
* Inputs:
|
|
100
|
+
* - `before`: the compaction input snapshot the caller computed
|
|
101
|
+
* - `after`: the compaction result the tier function produced
|
|
102
|
+
* - `summaryText`: the concrete prose / structured summary the
|
|
103
|
+
* compaction wrote into the dynamic block (or empty string for
|
|
104
|
+
* microcompact tiers that only reshape existing content)
|
|
105
|
+
* - `staticHashBefore` / `staticHashAfter`: instructions+toolSchema
|
|
106
|
+
* hash from the context builder, captured before and after the
|
|
107
|
+
* compaction (compaction must never touch static blocks)
|
|
108
|
+
*/
|
|
109
|
+
export function checkInvariants(args) {
|
|
110
|
+
const { before, after, summaryText } = args;
|
|
111
|
+
const violations = [];
|
|
112
|
+
// 1. secrets-never-summarize — sweep the post-compaction summary text.
|
|
113
|
+
// `summaryText` is what gets written to .pugi/session.db / replaces
|
|
114
|
+
// the transcript turns. We grep there, not the input, because the
|
|
115
|
+
// pre-compaction transcript is the operator's own data; we only
|
|
116
|
+
// police what we are about to make durable.
|
|
117
|
+
if (summaryText.length > 0) {
|
|
118
|
+
const firstMatch = findFirstSecret(summaryText);
|
|
119
|
+
if (firstMatch !== null) {
|
|
120
|
+
violations.push({
|
|
121
|
+
invariant: 'secrets-never-summarize',
|
|
122
|
+
evidence: redact(firstMatch),
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
// 2. open-decisions-preserved — every DECISION/OPEN/BLOCKED/REJECTED
|
|
127
|
+
// line in the pre-compaction transcript must appear verbatim in
|
|
128
|
+
// the post-compaction summary OR remain in the after-state's
|
|
129
|
+
// `decisionsPreserved`. The compaction result surfaces the latter
|
|
130
|
+
// so we cross-check both.
|
|
131
|
+
const beforeDecisions = [];
|
|
132
|
+
for (const turn of before.transcript) {
|
|
133
|
+
for (const line of turn.content.split('\n')) {
|
|
134
|
+
if (DECISION_RX.test(line))
|
|
135
|
+
beforeDecisions.push(line.trim());
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
const preservedSet = new Set(after.decisionsPreserved.map((d) => d.trim()));
|
|
139
|
+
for (const dec of beforeDecisions) {
|
|
140
|
+
const inSummary = summaryText.includes(dec);
|
|
141
|
+
const inPreservedList = preservedSet.has(dec);
|
|
142
|
+
if (!inSummary && !inPreservedList) {
|
|
143
|
+
violations.push({
|
|
144
|
+
invariant: 'open-decisions-preserved',
|
|
145
|
+
evidence: dec,
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// 3. artifact-refs-resolvable — every artifact ref must point to a
|
|
150
|
+
// file under .pugi/artifacts/ that exists and SHA256-matches the
|
|
151
|
+
// ref. We compute the hash physically rather than trusting the
|
|
152
|
+
// bookkeeping; if the disk write was corrupted, we want to know
|
|
153
|
+
// before we promote the compaction.
|
|
154
|
+
for (const ref of after.artifactsCreated) {
|
|
155
|
+
const violation = verifyArtifact(ref);
|
|
156
|
+
if (violation)
|
|
157
|
+
violations.push(violation);
|
|
158
|
+
}
|
|
159
|
+
// 4. static-hash-unchanged — instructions and tool schema hashes
|
|
160
|
+
// must be byte-identical before and after compaction.
|
|
161
|
+
if (args.staticHashBefore.instructionsHash !== args.staticHashAfter.instructionsHash) {
|
|
162
|
+
violations.push({
|
|
163
|
+
invariant: 'static-hash-unchanged',
|
|
164
|
+
evidence: `instructionsHash ${args.staticHashBefore.instructionsHash} -> ${args.staticHashAfter.instructionsHash}`,
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
if (args.staticHashBefore.toolSchemaHash !== args.staticHashAfter.toolSchemaHash) {
|
|
168
|
+
violations.push({
|
|
169
|
+
invariant: 'static-hash-unchanged',
|
|
170
|
+
evidence: `toolSchemaHash ${args.staticHashBefore.toolSchemaHash} -> ${args.staticHashAfter.toolSchemaHash}`,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
return violations;
|
|
174
|
+
}
|
|
175
|
+
function verifyArtifact(ref) {
|
|
176
|
+
if (!ref.path || !ref.sha256) {
|
|
177
|
+
return {
|
|
178
|
+
invariant: 'artifact-refs-resolvable',
|
|
179
|
+
evidence: `artifact ref missing path or sha256: ${JSON.stringify(ref)}`,
|
|
180
|
+
artifactRef: ref.sha256,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
if (!existsSync(ref.path)) {
|
|
184
|
+
return {
|
|
185
|
+
invariant: 'artifact-refs-resolvable',
|
|
186
|
+
evidence: `artifact path does not exist: ${ref.path}`,
|
|
187
|
+
artifactRef: ref.sha256,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
const physical = createHash('sha256').update(readFileSync(ref.path)).digest('hex');
|
|
191
|
+
// Refs are stored with a `sha256:` prefix per the spec's
|
|
192
|
+
// { artifactRef: 'sha256:abc...' } shape. Strip the prefix before
|
|
193
|
+
// comparing against the raw hex digest of the file contents.
|
|
194
|
+
const expected = ref.sha256.startsWith('sha256:') ? ref.sha256.slice('sha256:'.length) : ref.sha256;
|
|
195
|
+
if (physical !== expected) {
|
|
196
|
+
return {
|
|
197
|
+
invariant: 'artifact-refs-resolvable',
|
|
198
|
+
evidence: `sha256 mismatch at ${ref.path}: expected ${ref.sha256}, got ${physical}`,
|
|
199
|
+
artifactRef: ref.sha256,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
function findFirstSecret(summaryText) {
|
|
205
|
+
// Iterate each pattern and return the first non-empty match. We do
|
|
206
|
+
// not bail on the first pattern with no hit — operators can leak via
|
|
207
|
+
// any of the shapes (Bearer headers in one section, raw AKIA in
|
|
208
|
+
// another) so we want a deterministic sweep over the union.
|
|
209
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
210
|
+
// Patterns are stateful (the `g` flag tracks lastIndex), so we
|
|
211
|
+
// explicitly reset before scanning to keep the function pure.
|
|
212
|
+
pattern.lastIndex = 0;
|
|
213
|
+
const match = pattern.exec(summaryText);
|
|
214
|
+
if (match && match[0].length > 0)
|
|
215
|
+
return match[0];
|
|
216
|
+
}
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
function redact(input) {
|
|
220
|
+
// Two shapes to handle:
|
|
221
|
+
//
|
|
222
|
+
// 1. Keyword=value / keyword: value — keep the keyword visible so
|
|
223
|
+
// the operator can see which secret leaked, but mask the value.
|
|
224
|
+
// 2. Bare token (Bearer ..., AKIA..., PEM block, etc.) — keep the
|
|
225
|
+
// first 2 and last 2 chars of the token; mask the middle. PEM
|
|
226
|
+
// blocks are dropped entirely except for the BEGIN line.
|
|
227
|
+
if (input.startsWith('-----BEGIN')) {
|
|
228
|
+
// PEM blocks may arrive with real newlines, with escaped `\n`
|
|
229
|
+
// (JSON-stringified payloads), or with `\r\n` (Windows). Cut on
|
|
230
|
+
// the BEGIN header end so the body never leaks.
|
|
231
|
+
// Codex P1 retro 2026-05-24: matching on `\n` only let the
|
|
232
|
+
// escaped-newline case dump the full PEM into invariant evidence.
|
|
233
|
+
const headerEnd = input.search(/-----(\r?\n|\\r?\\n|$)/);
|
|
234
|
+
const firstLine = headerEnd > 0 ? input.slice(0, headerEnd + 5) : '-----BEGIN PRIVATE KEY-----';
|
|
235
|
+
return `${firstLine} ***PEM_BODY_REDACTED***`;
|
|
236
|
+
}
|
|
237
|
+
const kv = /^([^:=]+[:=]\s*)(\S+)/.exec(input);
|
|
238
|
+
if (kv) {
|
|
239
|
+
const head = kv[1] ?? '';
|
|
240
|
+
const value = kv[2] ?? '';
|
|
241
|
+
const tail = value.length > 4 ? `${value.slice(0, 2)}***${value.slice(-2)}` : '***';
|
|
242
|
+
return `${head}${tail}`;
|
|
243
|
+
}
|
|
244
|
+
// Bare-token shape: mask the middle of the whole input.
|
|
245
|
+
if (input.length > 4) {
|
|
246
|
+
return `${input.slice(0, 2)}***${input.slice(-2)}`;
|
|
247
|
+
}
|
|
248
|
+
return '***';
|
|
249
|
+
}
|
|
250
|
+
//# sourceMappingURL=invariants.js.map
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PUGI.md / AGENTS.md context loader with bounded @import resolution.
|
|
3
|
+
*
|
|
4
|
+
* Per pattern card `docs/research/pugi-cli-corpus/patterns/context-compaction.md`
|
|
5
|
+
* §6 (prompt cache boundary), PUGI.md and AGENTS.md belong to the static
|
|
6
|
+
* context block. They are loaded once per session, deterministically, with
|
|
7
|
+
* the following safety budget:
|
|
8
|
+
*
|
|
9
|
+
* - max import depth: 3 (deeper chains are skipped, not fatal)
|
|
10
|
+
* - max total loaded bytes: 64 KB across PUGI.md + AGENTS.md + all imports
|
|
11
|
+
* - HTML comment stripping (`<!-- ... -->`) — comments often carry stale
|
|
12
|
+
* annotations that bias the model long after they go out of date
|
|
13
|
+
* - workspace containment — `@import ../../../etc/passwd` is rejected
|
|
14
|
+
*
|
|
15
|
+
* Missing files are not errors. If `PUGI.md` is absent we simply skip it
|
|
16
|
+
* and report nothing; the engine builds its context from instructions +
|
|
17
|
+
* tool schemas alone.
|
|
18
|
+
*
|
|
19
|
+
* This module is pure: no logging, no side effects beyond fs reads. The
|
|
20
|
+
* caller (context builder) decides how to emit warning events.
|
|
21
|
+
*/
|
|
22
|
+
import { existsSync, readFileSync, realpathSync, statSync } from 'node:fs';
|
|
23
|
+
import { dirname, isAbsolute, relative, resolve } from 'node:path';
|
|
24
|
+
export const MAX_IMPORT_DEPTH = 3;
|
|
25
|
+
export const MAX_TOTAL_BYTES = 64 * 1024;
|
|
26
|
+
/**
|
|
27
|
+
* Source filenames we look for at the workspace root. Order matters:
|
|
28
|
+
* PUGI.md is the canonical Pugi-native file; AGENTS.md is the
|
|
29
|
+
* cross-CLI compatibility shim used by other agentic CLIs.
|
|
30
|
+
*/
|
|
31
|
+
export const MARKDOWN_SOURCES = ['PUGI.md', 'AGENTS.md'];
|
|
32
|
+
/**
|
|
33
|
+
* Load PUGI.md + AGENTS.md from `workspaceRoot`. Either or both may be
|
|
34
|
+
* absent. Returns the combined load result with per-file detail plus a
|
|
35
|
+
* flat list of warnings (best-effort: a missing file is a warning, not
|
|
36
|
+
* an error).
|
|
37
|
+
*/
|
|
38
|
+
export async function loadMarkdownContext(workspaceRoot) {
|
|
39
|
+
const warnings = [];
|
|
40
|
+
const loaded = [];
|
|
41
|
+
let budgetRemaining = MAX_TOTAL_BYTES;
|
|
42
|
+
const visited = new Set();
|
|
43
|
+
const absRoot = resolve(workspaceRoot);
|
|
44
|
+
for (const source of MARKDOWN_SOURCES) {
|
|
45
|
+
const candidate = resolve(absRoot, source);
|
|
46
|
+
if (!existsSync(candidate)) {
|
|
47
|
+
warnings.push({
|
|
48
|
+
kind: 'file_missing',
|
|
49
|
+
message: `${source} not found at workspace root`,
|
|
50
|
+
path: candidate,
|
|
51
|
+
});
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
if (budgetRemaining <= 0) {
|
|
55
|
+
warnings.push({
|
|
56
|
+
kind: 'budget_exhausted',
|
|
57
|
+
message: `skipped ${source}: 64 KB total budget already consumed by earlier file`,
|
|
58
|
+
path: candidate,
|
|
59
|
+
});
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
const expanded = expandFile({
|
|
63
|
+
filePath: candidate,
|
|
64
|
+
workspaceRoot: absRoot,
|
|
65
|
+
depth: 0,
|
|
66
|
+
visited,
|
|
67
|
+
warnings,
|
|
68
|
+
budgetRemaining,
|
|
69
|
+
});
|
|
70
|
+
loaded.push({
|
|
71
|
+
source,
|
|
72
|
+
resolvedPath: candidate,
|
|
73
|
+
rawBytes: expanded.rootRawBytes,
|
|
74
|
+
loadedBytes: expanded.bytesConsumed,
|
|
75
|
+
imports: expanded.imports,
|
|
76
|
+
truncated: expanded.truncated,
|
|
77
|
+
content: expanded.content,
|
|
78
|
+
});
|
|
79
|
+
budgetRemaining -= expanded.bytesConsumed;
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
loaded,
|
|
83
|
+
warnings,
|
|
84
|
+
totalBytes: MAX_TOTAL_BYTES - budgetRemaining,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
function expandFile(input) {
|
|
88
|
+
const { filePath, workspaceRoot, depth, visited, warnings } = input;
|
|
89
|
+
let budgetRemaining = input.budgetRemaining;
|
|
90
|
+
const imports = [];
|
|
91
|
+
let truncated = false;
|
|
92
|
+
// Cycle guard: if the same file was already visited in this load,
|
|
93
|
+
// skip re-expansion. Markdown @imports forming a cycle would explode
|
|
94
|
+
// the budget within seconds otherwise.
|
|
95
|
+
if (visited.has(filePath)) {
|
|
96
|
+
return {
|
|
97
|
+
content: '',
|
|
98
|
+
rootRawBytes: 0,
|
|
99
|
+
bytesConsumed: 0,
|
|
100
|
+
imports,
|
|
101
|
+
truncated: false,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
visited.add(filePath);
|
|
105
|
+
let raw;
|
|
106
|
+
let rawBytes;
|
|
107
|
+
try {
|
|
108
|
+
raw = readFileSync(filePath, 'utf8');
|
|
109
|
+
rawBytes = statSync(filePath).size;
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
warnings.push({
|
|
113
|
+
kind: 'read_error',
|
|
114
|
+
message: `could not read ${filePath}: ${error.message}`,
|
|
115
|
+
path: filePath,
|
|
116
|
+
});
|
|
117
|
+
return {
|
|
118
|
+
content: '',
|
|
119
|
+
rootRawBytes: 0,
|
|
120
|
+
bytesConsumed: 0,
|
|
121
|
+
imports,
|
|
122
|
+
truncated: false,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
// Strip HTML comments first; budget accounting happens on the stripped
|
|
126
|
+
// body so that bloated comment blocks do not waste the operator's cap.
|
|
127
|
+
const stripped = stripHtmlComments(raw);
|
|
128
|
+
// Resolve `@import path/to/file.md` directives. Line-anchored, so a
|
|
129
|
+
// literal `@import` inside a code fence (which would not start at
|
|
130
|
+
// column 0 of a line) is left alone. We accept relative paths only;
|
|
131
|
+
// absolute paths are rejected as out-of-workspace.
|
|
132
|
+
const lines = stripped.split('\n');
|
|
133
|
+
const expandedLines = [];
|
|
134
|
+
for (const line of lines) {
|
|
135
|
+
const match = /^\s*@import\s+(\S+)\s*$/.exec(line);
|
|
136
|
+
if (!match) {
|
|
137
|
+
// Plain content line — accumulate into the budget. We do this
|
|
138
|
+
// line-by-line so an oversized file can be partially captured up
|
|
139
|
+
// to the cap rather than skipped entirely.
|
|
140
|
+
const lineBytes = Buffer.byteLength(line, 'utf8') + 1;
|
|
141
|
+
if (budgetRemaining <= 0) {
|
|
142
|
+
truncated = true;
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
145
|
+
if (lineBytes > budgetRemaining) {
|
|
146
|
+
// Take what we can fit (UTF-8 safe truncation: just slice the
|
|
147
|
+
// remaining bytes — markdown does not require codepoint
|
|
148
|
+
// alignment for context purposes, but Buffer.byteLength may
|
|
149
|
+
// overshoot by 1 if we cut mid-codepoint; we bias to a safe
|
|
150
|
+
// codepoint slice via string indexing).
|
|
151
|
+
const safeChars = Math.max(0, budgetRemaining - 1);
|
|
152
|
+
expandedLines.push(line.slice(0, safeChars));
|
|
153
|
+
budgetRemaining = 0;
|
|
154
|
+
truncated = true;
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
expandedLines.push(line);
|
|
158
|
+
budgetRemaining -= lineBytes;
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
const importPath = match[1] ?? '';
|
|
162
|
+
const nextDepth = depth + 1;
|
|
163
|
+
if (nextDepth > MAX_IMPORT_DEPTH) {
|
|
164
|
+
warnings.push({
|
|
165
|
+
kind: 'import_depth_exceeded',
|
|
166
|
+
message: `@import depth ${nextDepth} exceeds max ${MAX_IMPORT_DEPTH} at ${filePath} -> ${importPath}`,
|
|
167
|
+
path: importPath,
|
|
168
|
+
});
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
if (isAbsolute(importPath)) {
|
|
172
|
+
warnings.push({
|
|
173
|
+
kind: 'import_escapes_workspace',
|
|
174
|
+
message: `@import absolute path rejected: ${importPath} (from ${filePath})`,
|
|
175
|
+
path: importPath,
|
|
176
|
+
});
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
const targetPath = resolve(dirname(filePath), importPath);
|
|
180
|
+
const rel = relative(workspaceRoot, targetPath);
|
|
181
|
+
if (rel.startsWith('..') || isAbsolute(rel)) {
|
|
182
|
+
warnings.push({
|
|
183
|
+
kind: 'import_escapes_workspace',
|
|
184
|
+
message: `@import escapes workspace: ${importPath} (from ${filePath})`,
|
|
185
|
+
path: importPath,
|
|
186
|
+
});
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
if (!existsSync(targetPath)) {
|
|
190
|
+
warnings.push({
|
|
191
|
+
kind: 'import_missing',
|
|
192
|
+
message: `@import target not found: ${importPath} (from ${filePath})`,
|
|
193
|
+
path: targetPath,
|
|
194
|
+
});
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
// The string-level relative() check above only sees the path the
|
|
198
|
+
// operator typed. A symlink inside the workspace (e.g.
|
|
199
|
+
// `workspace/sneaky.md -> /etc/passwd`) passes that gate, and
|
|
200
|
+
// readFileSync then follows the symlink and inlines arbitrary
|
|
201
|
+
// contents into the static context. Realpath the target AND the
|
|
202
|
+
// workspace root so the comparison runs against the physical paths.
|
|
203
|
+
// Mirrors the apps/pugi-cli/src/core/trust.ts pattern from PR #305.
|
|
204
|
+
let realTarget;
|
|
205
|
+
let realRoot;
|
|
206
|
+
try {
|
|
207
|
+
realTarget = realpathSync(targetPath);
|
|
208
|
+
realRoot = realpathSync(workspaceRoot);
|
|
209
|
+
}
|
|
210
|
+
catch (error) {
|
|
211
|
+
warnings.push({
|
|
212
|
+
kind: 'read_error',
|
|
213
|
+
message: `realpath failed for @import: ${error.message}`,
|
|
214
|
+
path: targetPath,
|
|
215
|
+
});
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
const realRel = relative(realRoot, realTarget);
|
|
219
|
+
if (realRel.startsWith('..') || isAbsolute(realRel)) {
|
|
220
|
+
warnings.push({
|
|
221
|
+
kind: 'import_escapes_workspace',
|
|
222
|
+
message: `@import escapes workspace via symlink: ${importPath} -> ${realTarget} (from ${filePath})`,
|
|
223
|
+
path: importPath,
|
|
224
|
+
});
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
const subtree = expandFile({
|
|
228
|
+
filePath: targetPath,
|
|
229
|
+
workspaceRoot,
|
|
230
|
+
depth: nextDepth,
|
|
231
|
+
visited,
|
|
232
|
+
warnings,
|
|
233
|
+
budgetRemaining,
|
|
234
|
+
});
|
|
235
|
+
expandedLines.push(subtree.content);
|
|
236
|
+
imports.push({
|
|
237
|
+
resolvedPath: targetPath,
|
|
238
|
+
depth: nextDepth,
|
|
239
|
+
rawBytes: subtree.rootRawBytes,
|
|
240
|
+
loadedBytes: subtree.bytesConsumed,
|
|
241
|
+
});
|
|
242
|
+
imports.push(...subtree.imports);
|
|
243
|
+
budgetRemaining -= subtree.bytesConsumed;
|
|
244
|
+
if (subtree.truncated)
|
|
245
|
+
truncated = true;
|
|
246
|
+
if (budgetRemaining <= 0) {
|
|
247
|
+
truncated = true;
|
|
248
|
+
break;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
const content = expandedLines.join('\n');
|
|
252
|
+
const bytesConsumed = input.budgetRemaining - budgetRemaining;
|
|
253
|
+
return {
|
|
254
|
+
content,
|
|
255
|
+
rootRawBytes: rawBytes,
|
|
256
|
+
bytesConsumed,
|
|
257
|
+
imports,
|
|
258
|
+
truncated,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Remove HTML comments `<!-- ... -->` from markdown source. Non-greedy
|
|
263
|
+
* match, multi-line aware. Matches the behaviour described in the
|
|
264
|
+
* context-compaction pattern card §6 (cache boundary): comments often
|
|
265
|
+
* carry stale annotations that bias the model long after they should.
|
|
266
|
+
*/
|
|
267
|
+
export function stripHtmlComments(input) {
|
|
268
|
+
return input.replace(/<!--[\s\S]*?-->/g, '');
|
|
269
|
+
}
|
|
270
|
+
//# sourceMappingURL=markdown-loader.js.map
|