@bookedsolid/rea 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/pre-push +15 -18
- package/README.md +41 -1
- package/THREAT_MODEL.md +100 -29
- package/dist/audit/append.d.ts +21 -8
- package/dist/audit/append.js +48 -83
- package/dist/audit/fs.d.ts +68 -0
- package/dist/audit/fs.js +171 -0
- package/dist/cli/audit.d.ts +40 -0
- package/dist/cli/audit.js +205 -0
- package/dist/cli/doctor.d.ts +19 -4
- package/dist/cli/doctor.js +172 -5
- package/dist/cli/index.js +26 -1
- package/dist/cli/init.js +93 -7
- package/dist/cli/install/pre-push.d.ts +335 -0
- package/dist/cli/install/pre-push.js +2818 -0
- package/dist/cli/serve.d.ts +64 -0
- package/dist/cli/serve.js +270 -2
- package/dist/cli/status.d.ts +90 -0
- package/dist/cli/status.js +399 -0
- package/dist/cli/utils.d.ts +4 -0
- package/dist/cli/utils.js +4 -0
- package/dist/gateway/audit/rotator.d.ts +116 -0
- package/dist/gateway/audit/rotator.js +289 -0
- package/dist/gateway/circuit-breaker.d.ts +17 -0
- package/dist/gateway/circuit-breaker.js +32 -3
- package/dist/gateway/downstream-pool.d.ts +2 -1
- package/dist/gateway/downstream-pool.js +2 -2
- package/dist/gateway/downstream.d.ts +39 -3
- package/dist/gateway/downstream.js +73 -14
- package/dist/gateway/log.d.ts +122 -0
- package/dist/gateway/log.js +334 -0
- package/dist/gateway/middleware/audit.d.ts +24 -1
- package/dist/gateway/middleware/audit.js +103 -58
- package/dist/gateway/middleware/blocked-paths.d.ts +0 -9
- package/dist/gateway/middleware/blocked-paths.js +439 -67
- package/dist/gateway/middleware/injection.d.ts +218 -13
- package/dist/gateway/middleware/injection.js +433 -51
- package/dist/gateway/middleware/kill-switch.d.ts +10 -1
- package/dist/gateway/middleware/kill-switch.js +20 -1
- package/dist/gateway/observability/metrics.d.ts +125 -0
- package/dist/gateway/observability/metrics.js +321 -0
- package/dist/gateway/server.d.ts +19 -0
- package/dist/gateway/server.js +99 -15
- package/dist/policy/loader.d.ts +47 -0
- package/dist/policy/loader.js +47 -0
- package/dist/policy/profiles.d.ts +13 -0
- package/dist/policy/profiles.js +12 -0
- package/dist/policy/types.d.ts +52 -0
- package/dist/registry/fingerprint.d.ts +73 -0
- package/dist/registry/fingerprint.js +81 -0
- package/dist/registry/fingerprints-store.d.ts +62 -0
- package/dist/registry/fingerprints-store.js +111 -0
- package/dist/registry/interpolate.d.ts +58 -0
- package/dist/registry/interpolate.js +121 -0
- package/dist/registry/loader.d.ts +2 -2
- package/dist/registry/loader.js +22 -1
- package/dist/registry/tofu-gate.d.ts +41 -0
- package/dist/registry/tofu-gate.js +189 -0
- package/dist/registry/tofu.d.ts +111 -0
- package/dist/registry/tofu.js +173 -0
- package/dist/registry/types.d.ts +9 -1
- package/package.json +3 -1
- package/profiles/bst-internal-no-codex.yaml +5 -0
- package/profiles/bst-internal.yaml +7 -0
- package/scripts/tarball-smoke.sh +197 -0
|
@@ -2,36 +2,160 @@ import path from 'node:path';
|
|
|
2
2
|
import { InvocationStatus } from '../../policy/types.js';
|
|
3
3
|
import { loadPolicyAsync } from '../../policy/loader.js';
|
|
4
4
|
/**
|
|
5
|
-
* Pre-execution middleware: denies tool invocations whose arguments
|
|
6
|
-
* reference paths
|
|
5
|
+
* Pre-execution middleware: denies tool invocations whose path-shaped arguments
|
|
6
|
+
* reference paths in the policy's `blocked_paths` list.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
8
|
+
* BUG-001 (0.3.x): earlier versions substring-matched blocked patterns against
|
|
9
|
+
* EVERY string value in the argument tree — including free-form `content` and
|
|
10
|
+
* `body` fields. Combined with a fallback that stripped the leading `.` from
|
|
11
|
+
* `.env`, a note containing the word "environment" tripped the guard. This
|
|
12
|
+
* version restricts enforcement to:
|
|
13
|
+
* 1. Arguments whose leaf key name is a known path-like identifier
|
|
14
|
+
* (`path`, `file_path`, `filename`, `folder`, …), OR
|
|
15
|
+
* 2. Arguments whose value LOOKS like a filesystem path (contains a slash,
|
|
16
|
+
* starts with `.` + alnum, `~`, `/`, or `./`).
|
|
17
|
+
*
|
|
18
|
+
* Post-merge hardening (0.4.0, PR #24 round-1 Codex blockers):
|
|
19
|
+
* - "Content" keys (content/body/text/message/name/value/label/tag/tags/
|
|
20
|
+
* title/description/...) are ALWAYS skipped — they are never path
|
|
21
|
+
* destinations. Scanning them by value shape caused availability regressions
|
|
22
|
+
* on every tool call that used these keys as metadata (e.g. messaging tools
|
|
23
|
+
* with `message: "/some/path"`). The accepted tradeoff: false negatives on
|
|
24
|
+
* content-key bypasses are preferable to false positives across the gateway.
|
|
25
|
+
* - Absolute-path blocked_paths entries (e.g. `/etc/passwd`, `/var/log/`)
|
|
26
|
+
* match absolute-path values anchored at the filesystem root. The BUG-001
|
|
27
|
+
* narrowing dropped the leading `/` during segmentation and silently
|
|
28
|
+
* regressed these entries; restored by carrying an absolute flag through.
|
|
29
|
+
* - Malformed `%XX` URL-escape sequences now FAIL CLOSED (request blocked)
|
|
30
|
+
* rather than falling through with undecoded content, which previously
|
|
31
|
+
* allowed `.rea/` trust-root bypass via crafted escapes like `.rea%ZZ/foo`.
|
|
32
|
+
*
|
|
33
|
+
* SECURITY: .rea/ is always blocked regardless of policy (trust root).
|
|
34
|
+
* SECURITY: Matching is path-segment aware — no substring false positives.
|
|
35
|
+
* SECURITY: Absolute-path patterns are anchored at `/`, not just basename.
|
|
36
|
+
* SECURITY: Glob patterns (`*`, `?`) in blocked_paths are interpreted as
|
|
37
|
+
* single-segment globs (`*` = any chars except `/`, `?` = one non-`/` char).
|
|
38
|
+
* SECURITY: URL-encoded separators and case variants are normalized first.
|
|
39
|
+
* SECURITY: Triple+ encoded separators (%25252F → … → /) are decoded via an
|
|
40
|
+
* iterative decode-until-stable loop (no arbitrary cap) so they cannot escape
|
|
41
|
+
* the normalizer regardless of encoding depth.
|
|
42
|
+
* SECURITY: Only `file:` URIs are mapped to local filesystem paths. All other
|
|
43
|
+
* URI schemes (http:, https:, ftp:, etc.) reference remote resources and are
|
|
44
|
+
* returned as empty string so they never match any blocked_paths entry.
|
|
45
|
+
* SECURITY: `file:` URI authority forms (file://host/path, file:///path,
|
|
46
|
+
* file:/path) are all stripped to a bare path before decoding.
|
|
47
|
+
* SECURITY: Query strings and fragments in `file:` URIs
|
|
48
|
+
* (`file:///etc/passwd?dl=1#x`) are stripped before normalization so the
|
|
49
|
+
* path component is compared cleanly against blocked entries.
|
|
50
|
+
* SECURITY: C0 control characters (including null bytes) are stripped after
|
|
51
|
+
* decoding so they cannot smuggle segment prefixes past equality checks.
|
|
52
|
+
* SECURITY: Malformed URL-escapes are treated as hostile (request blocked).
|
|
53
|
+
* SECURITY: Paths with `%` that are not full `%XX` sequences (e.g.
|
|
54
|
+
* `/builds/50%complete/`) trigger the malformed-escape fail-closed gate.
|
|
55
|
+
* This is intentional: such values are structurally ambiguous and treated
|
|
56
|
+
* as hostile. Callers that need literal `%` in paths must percent-encode
|
|
57
|
+
* it as `%25`.
|
|
58
|
+
* SECURITY: Hot-reloads blocked_paths from policy.yaml when baseDir is given.
|
|
59
|
+
*/
|
|
60
|
+
const PATH_LIKE_KEYS = new Set([
|
|
61
|
+
'path',
|
|
62
|
+
'paths',
|
|
63
|
+
'file',
|
|
64
|
+
'files',
|
|
65
|
+
'file_path',
|
|
66
|
+
'filepath',
|
|
67
|
+
'filename',
|
|
68
|
+
'filenames',
|
|
69
|
+
'folder',
|
|
70
|
+
'folders',
|
|
71
|
+
'dir',
|
|
72
|
+
'directory',
|
|
73
|
+
'directories',
|
|
74
|
+
'src',
|
|
75
|
+
'source',
|
|
76
|
+
'dst',
|
|
77
|
+
'dest',
|
|
78
|
+
'destination',
|
|
79
|
+
'target',
|
|
80
|
+
'input_path',
|
|
81
|
+
'output_path',
|
|
82
|
+
'from',
|
|
83
|
+
'to',
|
|
84
|
+
'pattern',
|
|
85
|
+
'glob',
|
|
86
|
+
'uri',
|
|
87
|
+
'url',
|
|
88
|
+
]);
|
|
89
|
+
/**
|
|
90
|
+
* Keys whose values are free-form prose or metadata rather than path
|
|
91
|
+
* destinations. These are always skipped — scanning them by value shape
|
|
92
|
+
* caused availability regressions across every gateway tool call that
|
|
93
|
+
* happened to use these keys as metadata (e.g. a messaging tool with
|
|
94
|
+
* `message: "/some/path"` or a tagging tool with `tag: ".env"`).
|
|
95
|
+
* The accepted tradeoff: false negatives on content-key bypasses are
|
|
96
|
+
* preferable to false positives on all tool calls.
|
|
12
97
|
*/
|
|
98
|
+
const CONTENT_KEYS = new Set([
|
|
99
|
+
'content',
|
|
100
|
+
'contents',
|
|
101
|
+
'body',
|
|
102
|
+
'text',
|
|
103
|
+
'message',
|
|
104
|
+
'note',
|
|
105
|
+
'notes',
|
|
106
|
+
'description',
|
|
107
|
+
'summary',
|
|
108
|
+
'title',
|
|
109
|
+
'query',
|
|
110
|
+
'prompt',
|
|
111
|
+
'search',
|
|
112
|
+
'q',
|
|
113
|
+
'comment',
|
|
114
|
+
'caption',
|
|
115
|
+
'subject',
|
|
116
|
+
'name',
|
|
117
|
+
'label',
|
|
118
|
+
'tag',
|
|
119
|
+
'tags',
|
|
120
|
+
'value',
|
|
121
|
+
'reason',
|
|
122
|
+
]);
|
|
13
123
|
export function createBlockedPathsMiddleware(initialPolicy, baseDir) {
|
|
14
124
|
return async (ctx, next) => {
|
|
15
|
-
// Hot-reload blocked_paths from policy.yaml if baseDir is available
|
|
16
125
|
let blockedPaths = initialPolicy.blocked_paths;
|
|
17
|
-
if (baseDir) {
|
|
126
|
+
if (baseDir !== undefined) {
|
|
18
127
|
try {
|
|
19
128
|
const policy = await loadPolicyAsync(baseDir);
|
|
20
129
|
blockedPaths = policy.blocked_paths;
|
|
21
130
|
}
|
|
22
131
|
catch {
|
|
23
|
-
// Fall back to initial policy's blocked_paths on read failure
|
|
132
|
+
// Fall back to initial policy's blocked_paths on read failure.
|
|
24
133
|
}
|
|
25
134
|
}
|
|
26
|
-
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
135
|
+
const patterns = [...new Set([...blockedPaths, '.rea/'])];
|
|
136
|
+
for (const [key, value] of extractScannableStrings(ctx.arguments)) {
|
|
137
|
+
// Fail closed on malformed URL-escape sequences: silently falling back
|
|
138
|
+
// to undecoded content previously allowed `.rea/` trust-root bypass via
|
|
139
|
+
// crafted escapes like `.rea%ZZ/foo`. A malformed escape in a path-
|
|
140
|
+
// shaped value is treated as hostile.
|
|
141
|
+
if (hasMalformedEscape(value)) {
|
|
142
|
+
ctx.status = InvocationStatus.Denied;
|
|
143
|
+
ctx.error = `Argument "${key}" contains malformed URL-escape; blocked as hostile. Tool: ${ctx.tool_name}`;
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
// Fail closed: if encoded path separators (%2f / %5c) remain after a
|
|
147
|
+
// full iterative decode, the value is using evasion-level encoding
|
|
148
|
+
// deeper than the decode loop would surface (>5 levels). Treat as hostile
|
|
149
|
+
// rather than risk a miss.
|
|
150
|
+
if (hasDeepEncodedSeparator(value)) {
|
|
151
|
+
ctx.status = InvocationStatus.Denied;
|
|
152
|
+
ctx.error = `Argument "${key}" contains deeply-encoded path separator; blocked as hostile. Tool: ${ctx.tool_name}`;
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
for (const pattern of patterns) {
|
|
156
|
+
if (matchesBlockedPattern(value, pattern)) {
|
|
33
157
|
ctx.status = InvocationStatus.Denied;
|
|
34
|
-
ctx.error = `Argument "${key}" references blocked path "${
|
|
158
|
+
ctx.error = `Argument "${key}" references blocked path "${pattern}". Tool: ${ctx.tool_name}`;
|
|
35
159
|
return;
|
|
36
160
|
}
|
|
37
161
|
}
|
|
@@ -40,78 +164,326 @@ export function createBlockedPathsMiddleware(initialPolicy, baseDir) {
|
|
|
40
164
|
};
|
|
41
165
|
}
|
|
42
166
|
/**
|
|
43
|
-
*
|
|
44
|
-
*
|
|
167
|
+
* Walk the arg tree and return `[keyPath, value]` for strings we should scan.
|
|
168
|
+
*
|
|
169
|
+
* Routing rules:
|
|
170
|
+
* - PATH_LIKE_KEYS (file_path, folder, …): always scan.
|
|
171
|
+
* - CONTENT_KEYS (content, body, name, value, title, …): always skip.
|
|
172
|
+
* These keys carry prose or tool metadata — not path destinations.
|
|
173
|
+
* Scanning them (even only when path-shaped) denies legitimate tool calls
|
|
174
|
+
* across the gateway. See CONTENT_KEYS JSDoc for the accepted tradeoff.
|
|
175
|
+
* - Any other key: scan when the value is path-shaped.
|
|
176
|
+
* Array indices inherit the parent key's semantics.
|
|
45
177
|
*/
|
|
46
|
-
function
|
|
47
|
-
const
|
|
178
|
+
function extractScannableStrings(obj, prefix = '', inheritedKey = '', seen = new WeakSet()) {
|
|
179
|
+
const out = [];
|
|
48
180
|
if (obj === null || obj === undefined)
|
|
49
|
-
return
|
|
181
|
+
return out;
|
|
50
182
|
if (typeof obj === 'string') {
|
|
51
|
-
|
|
52
|
-
|
|
183
|
+
const leaf = inheritedKey.toLowerCase();
|
|
184
|
+
const pathShaped = looksLikePath(obj);
|
|
185
|
+
if (CONTENT_KEYS.has(leaf)) {
|
|
186
|
+
// Content-ish keys (message, title, name, body, etc.) are never path
|
|
187
|
+
// destinations — skip regardless of value shape. Scanning by shape here
|
|
188
|
+
// would deny legitimate tool metadata across the gateway.
|
|
189
|
+
return out;
|
|
190
|
+
}
|
|
191
|
+
if (PATH_LIKE_KEYS.has(leaf) || pathShaped) {
|
|
192
|
+
out.push([prefix || 'value', obj]);
|
|
193
|
+
}
|
|
194
|
+
return out;
|
|
53
195
|
}
|
|
54
196
|
if (typeof obj !== 'object')
|
|
55
|
-
return
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
seen.add(objRef);
|
|
197
|
+
return out;
|
|
198
|
+
const ref = obj;
|
|
199
|
+
if (seen.has(ref))
|
|
200
|
+
return out;
|
|
201
|
+
seen.add(ref);
|
|
61
202
|
if (Array.isArray(obj)) {
|
|
62
203
|
for (let i = 0; i < obj.length; i++) {
|
|
63
|
-
|
|
204
|
+
out.push(...extractScannableStrings(obj[i], `${prefix}[${i}]`, inheritedKey, seen));
|
|
64
205
|
}
|
|
206
|
+
return out;
|
|
65
207
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
results.push(...extractStringValues(value, fullKey, seen));
|
|
70
|
-
}
|
|
208
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
209
|
+
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
210
|
+
out.push(...extractScannableStrings(value, fullKey, key, seen));
|
|
71
211
|
}
|
|
72
|
-
return
|
|
212
|
+
return out;
|
|
73
213
|
}
|
|
74
214
|
/**
|
|
75
|
-
*
|
|
76
|
-
*
|
|
77
|
-
*
|
|
78
|
-
*
|
|
79
|
-
*
|
|
215
|
+
* Heuristic: does this string look like a filesystem path rather than prose?
|
|
216
|
+
* Must not contain whitespace/newlines, ≤1024 chars, AND one of:
|
|
217
|
+
* - contains `/` or `\`
|
|
218
|
+
* - starts with `~`
|
|
219
|
+
* - starts with `.` followed by an alnum (dotfile)
|
|
220
|
+
* - matches a Windows drive prefix
|
|
221
|
+
* - contains a URL-encoded path separator (`%2f`, `%5c`) or a URL-encoded
|
|
222
|
+
* dot prefix (`%2e`) — these decode to the patterns above and must not
|
|
223
|
+
* be allowed to bypass the shape check
|
|
80
224
|
*/
|
|
81
|
-
function
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if (
|
|
225
|
+
function looksLikePath(value) {
|
|
226
|
+
if (value.length === 0 || value.length > 1024)
|
|
227
|
+
return false;
|
|
228
|
+
if (/[\s\n\r\t]/.test(value))
|
|
229
|
+
return false;
|
|
230
|
+
if (value.includes('/') || value.includes('\\'))
|
|
231
|
+
return true;
|
|
232
|
+
if (value.startsWith('~'))
|
|
233
|
+
return true;
|
|
234
|
+
if (/^\.[a-zA-Z0-9_-]/.test(value))
|
|
235
|
+
return true;
|
|
236
|
+
if (/^[a-zA-Z]:[/\\]/.test(value))
|
|
87
237
|
return true;
|
|
88
|
-
//
|
|
89
|
-
|
|
90
|
-
if (stripped && normalized.includes(stripped))
|
|
238
|
+
// URL-encoded path separators / dot prefix: `.%72ea/foo`, `%2erea%2ffoo`.
|
|
239
|
+
if (/%2[efEF]/i.test(value) || /%5[cC]/.test(value))
|
|
91
240
|
return true;
|
|
92
241
|
return false;
|
|
93
242
|
}
|
|
94
243
|
/**
|
|
95
|
-
*
|
|
244
|
+
* Detect malformed `%XX` URL-escape sequences.
|
|
96
245
|
*
|
|
97
|
-
*
|
|
98
|
-
*
|
|
99
|
-
*
|
|
100
|
-
*
|
|
246
|
+
* A value is considered malformed when:
|
|
247
|
+
* - Any `%` is not followed by exactly two hex digits (structural), OR
|
|
248
|
+
* - The structurally-valid escapes decode to an invalid UTF-8 byte
|
|
249
|
+
* sequence (thrown by `decodeURIComponent`).
|
|
250
|
+
*
|
|
251
|
+
* Strings with no `%` are always well-formed.
|
|
252
|
+
*
|
|
253
|
+
* Rationale (Codex round-1 finding 3): the previous `normalizePath` wrapped
|
|
254
|
+
* `decodeURIComponent` in a catch-all that silently fell back to the raw
|
|
255
|
+
* value. This let `.rea%ZZ/foo` bypass the `.rea/` trust-root check because
|
|
256
|
+
* the segment split on `/` kept `.rea%zz` and `foo`, neither of which equal
|
|
257
|
+
* `.rea`. We now fail closed on malformed escapes before any pattern match.
|
|
101
258
|
*/
|
|
102
|
-
function
|
|
103
|
-
|
|
104
|
-
|
|
259
|
+
function hasMalformedEscape(value) {
|
|
260
|
+
if (!value.includes('%'))
|
|
261
|
+
return false;
|
|
262
|
+
if (/%(?![0-9a-fA-F]{2})/.test(value))
|
|
263
|
+
return true;
|
|
105
264
|
try {
|
|
106
|
-
|
|
265
|
+
decodeURIComponent(value);
|
|
266
|
+
return false;
|
|
107
267
|
}
|
|
108
268
|
catch {
|
|
109
|
-
|
|
269
|
+
return true;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Detect evasion-level encoding: run a decode-until-stable loop and check
|
|
274
|
+
* whether any percent-encoded path separators (%2f / %5c) survive all passes.
|
|
275
|
+
*
|
|
276
|
+
* This closes the depth-6+ bypass: `.rea%25252525252Ffoo` encodes the
|
|
277
|
+
* separator at 6 levels. After 5 decode passes it emerges as `.rea%2ffoo` —
|
|
278
|
+
* the pattern check would miss it. Running to true stability and then checking
|
|
279
|
+
* for remaining encoded separators catches all depths regardless of how many
|
|
280
|
+
* encode rounds were applied.
|
|
281
|
+
*
|
|
282
|
+
* Strings without `%` short-circuit immediately. The try/catch exits cleanly
|
|
283
|
+
* on any URIError so malformed inputs (already caught by hasMalformedEscape)
|
|
284
|
+
* do not crash here.
|
|
285
|
+
*/
|
|
286
|
+
function hasDeepEncodedSeparator(value) {
|
|
287
|
+
if (!value.includes('%'))
|
|
288
|
+
return false;
|
|
289
|
+
let v = value;
|
|
290
|
+
for (;;) {
|
|
291
|
+
try {
|
|
292
|
+
const next = decodeURIComponent(v);
|
|
293
|
+
if (next === v)
|
|
294
|
+
break;
|
|
295
|
+
v = next;
|
|
296
|
+
}
|
|
297
|
+
catch {
|
|
298
|
+
break;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
return /%2[fF]|%5[cC]/i.test(v);
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Check a candidate value against a blocked-path pattern with path-segment
|
|
305
|
+
* awareness. Supports simple globs: `*` = any chars except `/`, `?` = one
|
|
306
|
+
* non-`/` char. Trailing `/` means "this directory and everything under it".
|
|
307
|
+
*
|
|
308
|
+
* Absolute-path patterns (leading `/`) are anchored at the filesystem root:
|
|
309
|
+
* - pattern `/etc/passwd` matches `/etc/passwd` and `/etc/passwd/anything/`
|
|
310
|
+
* when the pattern is a dir pattern, but NEVER matches `/project/etc/passwd`.
|
|
311
|
+
* Relative patterns (no leading `/`) match tail-aligned segments anywhere in
|
|
312
|
+
* the value (`.env` matches `/project/.env` and `.env`).
|
|
313
|
+
*/
|
|
314
|
+
function matchesBlockedPattern(value, pattern) {
|
|
315
|
+
const nv = normalizePath(value);
|
|
316
|
+
const np = normalizePath(pattern);
|
|
317
|
+
if (np.length === 0)
|
|
318
|
+
return false;
|
|
319
|
+
const patternAbsolute = np.startsWith('/');
|
|
320
|
+
const valueAbsolute = nv.startsWith('/');
|
|
321
|
+
const dirPattern = np.endsWith('/');
|
|
322
|
+
const base = dirPattern ? np.slice(0, -1) : np;
|
|
323
|
+
if (base.length === 0)
|
|
324
|
+
return false;
|
|
325
|
+
const hasGlob = /[*?]/.test(base);
|
|
326
|
+
const segs = nv.split('/').filter((s) => s.length > 0);
|
|
327
|
+
if (patternAbsolute) {
|
|
328
|
+
// Anchored match at filesystem root. Strip the pattern's leading slash
|
|
329
|
+
// for segment-wise comparison, but do NOT let the pattern match
|
|
330
|
+
// non-absolute values or absolute values with different roots.
|
|
331
|
+
if (!valueAbsolute)
|
|
332
|
+
return false;
|
|
333
|
+
const baseNoSlash = base.startsWith('/') ? base.slice(1) : base;
|
|
334
|
+
if (baseNoSlash.length === 0)
|
|
335
|
+
return false;
|
|
336
|
+
const patternSegs = baseNoSlash.split('/').filter((s) => s.length > 0);
|
|
337
|
+
if (patternSegs.length === 0)
|
|
338
|
+
return false;
|
|
339
|
+
if (hasGlob) {
|
|
340
|
+
// Absolute glob: anchored at root, each segment matched positionally.
|
|
341
|
+
if (segs.length < patternSegs.length)
|
|
342
|
+
return false;
|
|
343
|
+
for (let i = 0; i < patternSegs.length; i++) {
|
|
344
|
+
const pseg = patternSegs[i];
|
|
345
|
+
const vseg = segs[i];
|
|
346
|
+
if (pseg === undefined || vseg === undefined)
|
|
347
|
+
return false;
|
|
348
|
+
const re = globToRegex(pseg);
|
|
349
|
+
if (!re.test(vseg))
|
|
350
|
+
return false;
|
|
351
|
+
}
|
|
352
|
+
if (!dirPattern && segs.length !== patternSegs.length)
|
|
353
|
+
return false;
|
|
354
|
+
return true;
|
|
355
|
+
}
|
|
356
|
+
// Plain absolute pattern: positional, rooted.
|
|
357
|
+
if (segs.length < patternSegs.length)
|
|
358
|
+
return false;
|
|
359
|
+
for (let i = 0; i < patternSegs.length; i++) {
|
|
360
|
+
if (segs[i] !== patternSegs[i])
|
|
361
|
+
return false;
|
|
362
|
+
}
|
|
363
|
+
if (!dirPattern && segs.length !== patternSegs.length)
|
|
364
|
+
return false;
|
|
365
|
+
return true;
|
|
366
|
+
}
|
|
367
|
+
if (hasGlob) {
|
|
368
|
+
const re = globToRegex(base);
|
|
369
|
+
for (let i = 0; i < segs.length; i++) {
|
|
370
|
+
const seg = segs[i];
|
|
371
|
+
if (seg !== undefined && re.test(seg))
|
|
372
|
+
return true;
|
|
373
|
+
const suffix = segs.slice(i).join('/');
|
|
374
|
+
if (re.test(suffix))
|
|
375
|
+
return true;
|
|
376
|
+
}
|
|
377
|
+
return false;
|
|
378
|
+
}
|
|
379
|
+
for (let i = 0; i < segs.length; i++) {
|
|
380
|
+
const suffix = segs.slice(i).join('/');
|
|
381
|
+
if (suffix === base)
|
|
382
|
+
return true;
|
|
383
|
+
if (dirPattern && suffix.startsWith(`${base}/`))
|
|
384
|
+
return true;
|
|
385
|
+
}
|
|
386
|
+
const basename = segs[segs.length - 1] ?? '';
|
|
387
|
+
if (basename === base)
|
|
388
|
+
return true;
|
|
389
|
+
if (dirPattern && segs.includes(base))
|
|
390
|
+
return true;
|
|
391
|
+
return false;
|
|
392
|
+
}
|
|
393
|
+
/**
|
|
394
|
+
* Convert a simple glob to an anchored RegExp. Only `*` and `?` are special;
|
|
395
|
+
* all other regex metacharacters are escaped.
|
|
396
|
+
*/
|
|
397
|
+
function globToRegex(glob) {
|
|
398
|
+
let out = '^';
|
|
399
|
+
for (const ch of glob) {
|
|
400
|
+
if (ch === '*')
|
|
401
|
+
out += '[^/]*';
|
|
402
|
+
else if (ch === '?')
|
|
403
|
+
out += '[^/]';
|
|
404
|
+
else if (/[.+^${}()|[\]\\]/.test(ch))
|
|
405
|
+
out += `\\${ch}`;
|
|
406
|
+
else
|
|
407
|
+
out += ch;
|
|
408
|
+
}
|
|
409
|
+
out += '$';
|
|
410
|
+
return new RegExp(out);
|
|
411
|
+
}
|
|
412
|
+
/**
|
|
413
|
+
* Normalize a value or pattern: strip URI scheme, URL-decode iteratively until
|
|
414
|
+
* stable (handles any encoding depth), strip C0 control characters, normalize
|
|
415
|
+
* path separators, resolve `.`/`..` segments, lowercase.
|
|
416
|
+
*
|
|
417
|
+
* IMPORTANT: callers MUST first reject malformed URL-escapes via
|
|
418
|
+
* `hasMalformedEscape()` before calling this on untrusted input. Silently
|
|
419
|
+
* falling back to undecoded content on URIError previously allowed crafted
|
|
420
|
+
* `.rea%ZZ/foo` sequences to bypass the `.rea/` check.
|
|
421
|
+
*
|
|
422
|
+
* Step 1 — URI scheme dispatch:
|
|
423
|
+
* - Non-file schemes (http:, https:, ftp:, …) reference remote resources and
|
|
424
|
+
* are returned immediately as `''` — they never match any blocked_paths
|
|
425
|
+
* entry (all of which are local filesystem paths).
|
|
426
|
+
* - `file:` URIs: strip the scheme + optional authority so all three forms
|
|
427
|
+
* collapse to a plain absolute path (`file:///path`, `file://host/path`,
|
|
428
|
+
* `file:/path` → `/path`).
|
|
429
|
+
* - No scheme: left as-is.
|
|
430
|
+
* Step 1b — Strip query string and fragment from `file:` paths so
|
|
431
|
+
* `file:///etc/passwd?dl=1#x` → `/etc/passwd` before any matching.
|
|
432
|
+
* Step 2 — Iterative decode until stable (no cap): catches triple+ encoded
|
|
433
|
+
* separators (`%25252F` → `%252F` → `%2F` → `/`). Exits when the value
|
|
434
|
+
* stops changing; per-iteration try/catch exits on URIError.
|
|
435
|
+
* Step 3 — Strip C0 control characters (Finding 2): removes null bytes and
|
|
436
|
+
* other control chars that could smuggle segment prefixes past equality
|
|
437
|
+
* checks (e.g. `\x00.gitignore` → `.gitignore`).
|
|
438
|
+
*/
|
|
439
|
+
function normalizePath(raw) {
|
|
440
|
+
// Step 1: URI scheme dispatch.
|
|
441
|
+
// Only `file:` URIs map to local filesystem paths. All other schemes
|
|
442
|
+
// (http:, https:, ftp:, data:, etc.) reference remote or non-filesystem
|
|
443
|
+
// resources. Mapping them to local paths (e.g. http://evil.com/etc/passwd
|
|
444
|
+
// → /etc/passwd) creates false positives. Return '' so they never match
|
|
445
|
+
// any blocked pattern.
|
|
446
|
+
const fileScheme = /^file:/i.test(raw);
|
|
447
|
+
const otherScheme = !fileScheme && /^[a-zA-Z][a-zA-Z0-9+\-.]*:\/\//i.test(raw);
|
|
448
|
+
if (otherScheme)
|
|
449
|
+
return '';
|
|
450
|
+
let v;
|
|
451
|
+
if (fileScheme) {
|
|
452
|
+
// Strip file: scheme + optional authority (all three forms):
|
|
453
|
+
// file:///path → /path (triple-slash, empty authority)
|
|
454
|
+
// file://host/path → /path (named authority)
|
|
455
|
+
// file:/path → /path (single-slash, no authority)
|
|
456
|
+
v = raw.replace(/^file:(?:\/\/[^/?#]*)?(?=\/)/, '');
|
|
457
|
+
// Step 1b: strip query string and fragment so file:///etc/passwd?dl=1#x
|
|
458
|
+
// and file:///etc/passwd#fragment both reduce to /etc/passwd.
|
|
459
|
+
v = v.replace(/[?#].*$/, '');
|
|
460
|
+
}
|
|
461
|
+
else {
|
|
462
|
+
v = raw;
|
|
463
|
+
}
|
|
464
|
+
// Step 2: iterative decode until stable (no iteration cap).
|
|
465
|
+
// Terminates because each successful decode either shortens or leaves the
|
|
466
|
+
// string unchanged; once unchanged we break. Handles any encoding depth
|
|
467
|
+
// (triple, quad, N-level). Per-iteration try/catch exits cleanly on URIError
|
|
468
|
+
// so malformed inputs that somehow pass hasMalformedEscape() (trusted-pattern
|
|
469
|
+
// code path) are left at the last valid value rather than crashing.
|
|
470
|
+
let prev = v;
|
|
471
|
+
for (;;) {
|
|
472
|
+
try {
|
|
473
|
+
const next = decodeURIComponent(prev);
|
|
474
|
+
if (next === prev)
|
|
475
|
+
break;
|
|
476
|
+
prev = next;
|
|
477
|
+
}
|
|
478
|
+
catch {
|
|
479
|
+
break;
|
|
480
|
+
}
|
|
110
481
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
//
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
482
|
+
v = prev;
|
|
483
|
+
// Step 3: strip C0 control characters (including null bytes \x00–\x1f)
|
|
484
|
+
// that could prefix a segment and defeat segment-equality matching.
|
|
485
|
+
v = v.replace(/[\x00-\x1f]/g, '');
|
|
486
|
+
v = v.replace(/\\/g, '/');
|
|
487
|
+
v = path.posix.normalize(v);
|
|
488
|
+
return v.toLowerCase();
|
|
117
489
|
}
|