@blamejs/exceptd-skills 0.15.49 → 0.15.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,262 @@
1
+ "use strict";
2
+ /**
3
+ * codepoint-class — shared codepoint-table threat catalog and regex
4
+ * compiler for the guard-* family.
5
+ *
6
+ * Threat detectors that need to match Unicode bidi overrides, C0
7
+ * control characters, zero-width / invisible chars, etc. compose
8
+ * regex character classes from numeric codepoint range tables here
9
+ * instead of embedding the attack characters directly in their
10
+ * source files. Centralizing the tables means:
11
+ *
12
+ * - Source files in lib/guard-* stay pure ASCII (zero
13
+ * irregular-whitespace lint findings, no eslint-disable comments
14
+ * for this category).
15
+ * - Adding / removing a codepoint from the catalog is a single
16
+ * edit; every guard picks up the change.
17
+ * - The detector composes the way an attacker would compose the
18
+ * payload (programmatic codepoint emission, not literal typing).
19
+ *
20
+ * Surface:
21
+ *
22
+ * hex4(cp) -> "\\uXXXX" escape for a single codepoint
23
+ * charClass(ranges) -> regex character class body for a range
24
+ * table (e.g. [0x200E, [0x202A,0x202E]])
25
+ * fromCp(cp) -> String.fromCharCode shorthand
26
+ * ranges() -> { BIDI_RANGES, C0_CTRL_RANGES,
27
+ * ZERO_WIDTH_RANGES }
28
+ * compiled() -> { BIDI_RE, BIDI_RE_G, C0_CTRL_RE,
29
+ * C0_CTRL_RE_G, ZERO_WIDTH_RE, ZW_RE_G,
30
+ * NULL_RE_G, NULL_BYTE, BOM_CHAR }
31
+ *
32
+ * The compiled() exports are RegExp instances built from the
33
+ * codepoint tables at module load. Consumers grab them once at boot.
34
+ *
35
+ * Codepoint tables:
36
+ *
37
+ * BIDI_RANGES — Unicode bidi-override family (CVE-2021-42574
38
+ * Trojan Source). LRM U+200E / RLM U+200F / ALM U+061C / LRE
39
+ * U+202A / RLE U+202B / PDF U+202C / LRO U+202D / RLO U+202E /
40
+ * LRI U+2066 / RLI U+2067 / FSI U+2068 / PDI U+2069.
41
+ *
42
+ * C0_CTRL_RANGES — C0 control characters minus tab (U+09) / lf
43
+ * (U+0A) / cr (U+0D) — those are dialect-shaped chars that
44
+ * parsers handle separately. Everything else (U+00, U+01-U+08,
45
+ * U+0B-U+0C, U+0E-U+1F) flagged as control-byte injection.
46
+ *
47
+ * ZERO_WIDTH_RANGES — invisible-formatting / zero-width chars
48
+ * attackers use to hide payloads:
49
+ * SHY U+00AD ZWSP U+200B ZWNJ U+200C ZWJ U+200D
50
+ * WJ U+2060 BOM U+FEFF
51
+ */
52
+
53
+ var HEX_RADIX = 16; // base-16 radix, not byte size
54
+
55
+ function hex4(cp) {
56
+ var s = cp.toString(HEX_RADIX).toUpperCase();
57
+ while (s.length < 4) s = "0" + s;
58
+ return "\\u" + s;
59
+ }
60
+ function charClass(rangeList) {
61
+ return rangeList.map(function (r) {
62
+ return Array.isArray(r) ? hex4(r[0]) + "-" + hex4(r[1]) : hex4(r);
63
+ }).join("");
64
+ }
65
+ function fromCp(cp) { return String.fromCharCode(cp); }
66
+
67
+ var BIDI_RANGES = [0x200E, 0x200F, 0x061C, [0x202A, 0x202E], [0x2066, 0x2069]];
68
+ var C0_CTRL_RANGES = [[0x0000, 0x0008], 0x000B, 0x000C, [0x000E, 0x001F]];
69
+ var ZERO_WIDTH_RANGES = [0x00AD, [0x200B, 0x200D], 0x2060, 0xFEFF];
70
+
71
+ // allow:dynamic-regex — codepoints from BIDI_RANGES literal table
72
+ var BIDI_RE = new RegExp("[" + charClass(BIDI_RANGES) + "]");
73
+ // allow:dynamic-regex — codepoints from BIDI_RANGES literal table
74
+ var BIDI_RE_G = new RegExp("[" + charClass(BIDI_RANGES) + "]", "g");
75
+ // allow:dynamic-regex — codepoints from C0_CTRL_RANGES literal table
76
+ var C0_CTRL_RE = new RegExp("[" + charClass(C0_CTRL_RANGES) + "]");
77
+ // allow:dynamic-regex — codepoints from C0_CTRL_RANGES literal table
78
+ var C0_CTRL_RE_G = new RegExp("[" + charClass(C0_CTRL_RANGES) + "]", "g");
79
+ // allow:dynamic-regex — codepoints from ZERO_WIDTH_RANGES literal table
80
+ var ZERO_WIDTH_RE = new RegExp("[" + charClass(ZERO_WIDTH_RANGES) + "]");
81
+ // allow:dynamic-regex — codepoints from ZERO_WIDTH_RANGES literal table
82
+ var ZW_RE_G = new RegExp("[" + charClass(ZERO_WIDTH_RANGES) + "]", "g");
83
+ // allow:dynamic-regex — single literal codepoint U+0000
84
+ var NULL_RE_G = new RegExp(hex4(0x0000), "g");
85
+
86
+ var NULL_BYTE = fromCp(0x0000);
87
+ var BOM_CHAR = fromCp(0xFEFF);
88
+
89
+ // Unicode script-range catalog for IDN-homograph / mixed-script
90
+ // confusable detection (UTS #39). Used by guard-domain, guard-email,
91
+ // safe-url IDN host-label classification, and any future caller that
92
+ // needs "is this label entirely one writing system?". Centralizing the
93
+ // table keeps the codepoint definitions in one place — adding a script
94
+ // is a single edit.
95
+ var SCRIPT_RANGES = {
96
+ latin: [[0x0041, 0x005A], [0x0061, 0x007A],
97
+ [0x00C0, 0x024F], [0x1E00, 0x1EFF]], // Unicode script ranges
98
+ cyrillic: [[0x0400, 0x04FF], [0x0500, 0x052F]], // Unicode Cyrillic + Cyrillic Supplement
99
+ greek: [[0x0370, 0x03FF], [0x1F00, 0x1FFF]], // Unicode Greek + Greek Extended
100
+ armenian: [[0x0530, 0x058F]], // Unicode Armenian
101
+ cherokee: [[0x13A0, 0x13FF], [0xAB70, 0xABBF]], // Unicode Cherokee + Cherokee Supplement
102
+ han: [[0x4E00, 0x9FFF]], // CJK Unified Ideographs
103
+ hiragana: [[0x3040, 0x309F]], // Hiragana
104
+ katakana: [[0x30A0, 0x30FF]], // Katakana
105
+ hangul: [[0xAC00, 0xD7AF]], // Hangul Syllables
106
+ arabic: [[0x0600, 0x06FF]], // Arabic
107
+ hebrew: [[0x0590, 0x05FF]], // Hebrew
108
+ };
109
+
110
+ // scriptFor(cp) — returns the script-name string for a codepoint, or
111
+ // null when the codepoint is in a script not in the catalog (digits,
112
+ // punctuation, symbols, etc. are not script-classifying).
113
+ function scriptFor(cp) {
114
+ var keys = Object.keys(SCRIPT_RANGES);
115
+ for (var i = 0; i < keys.length; i += 1) {
116
+ var ranges = SCRIPT_RANGES[keys[i]];
117
+ for (var j = 0; j < ranges.length; j += 1) {
118
+ if (cp >= ranges[j][0] && cp <= ranges[j][1]) return keys[i];
119
+ }
120
+ }
121
+ return null;
122
+ }
123
+
124
+ // detectMixedScripts(label, allowedScripts?) — returns null when the
125
+ // label is single-script (or every script appears in the optional
126
+ // allowedScripts allowlist), or an array of the detected script names
127
+ // when the label mixes scripts (homograph attack shape — Cyrillic 'а'
128
+ // inside an otherwise-Latin label, etc.). The result is the FULL set
129
+ // of scripts seen; callers decide refuse / audit / strip.
130
+ //
131
+ // allowedScripts: an array of script names the caller treats as
132
+ // acceptable; when supplied, a label whose every script is on the list
133
+ // returns null even if multiple scripts appear (legitimate mixed-
134
+ // script content like an English word inside a Japanese label).
135
+ function detectMixedScripts(label, allowedScripts) {
136
+ if (typeof label !== "string" || label.length === 0) return null;
137
+ var seen = {};
138
+ for (var i = 0; i < label.length; i += 1) {
139
+ var script = scriptFor(label.charCodeAt(i));
140
+ if (script === null) continue;
141
+ seen[script] = true;
142
+ }
143
+ var scripts = Object.keys(seen);
144
+ if (scripts.length <= 1) return null;
145
+ if (!allowedScripts) return scripts;
146
+ for (var k = 0; k < scripts.length; k += 1) {
147
+ if (allowedScripts.indexOf(scripts[k]) === -1) return scripts;
148
+ }
149
+ return null;
150
+ }
151
+
152
+ // detectCharThreats — returns an array of issue objects for character-
153
+ // class threats (bidi / null / C0-control) per the opts policy. Emits
154
+ // at most one issue per class. Used by guard-* primitives' detection
155
+ // pass instead of repeating the per-class match-and-push block.
156
+ //
157
+ // Issue shape mirrors guard-* convention:
158
+ // { kind, severity, ruleId, location, snippet }
159
+ //
160
+ // issues.push.apply(issues,
161
+ // codepointClass.detectCharThreats(text, opts, "html"));
162
+ function detectCharThreats(text, opts, codePrefix) {
163
+ var issues = [];
164
+ if (typeof text !== "string") return issues;
165
+ if (opts && opts.bidiPolicy !== "allow") {
166
+ var bidiMatch = text.match(BIDI_RE);
167
+ if (bidiMatch) {
168
+ issues.push({
169
+ kind: "bidi-override", severity: "critical",
170
+ ruleId: codePrefix + ".bidi",
171
+ location: bidiMatch.index,
172
+ snippet: "Unicode bidi override (CVE-2021-42574 Trojan Source)",
173
+ });
174
+ }
175
+ }
176
+ if (opts && opts.nullBytePolicy !== "allow") {
177
+ var nullIdx = text.indexOf(NULL_BYTE);
178
+ if (nullIdx >= 0) {
179
+ issues.push({
180
+ kind: "null-byte", severity: "critical",
181
+ ruleId: codePrefix + ".null-byte",
182
+ location: nullIdx,
183
+ snippet: "null byte at byte " + nullIdx,
184
+ });
185
+ }
186
+ }
187
+ if (opts && opts.controlPolicy !== "allow") {
188
+ var ctrlMatch = text.match(C0_CTRL_RE);
189
+ if (ctrlMatch) {
190
+ issues.push({
191
+ kind: "control-char", severity: "high",
192
+ ruleId: codePrefix + ".control",
193
+ location: ctrlMatch.index,
194
+ snippet: "C0 control char U+" + ctrlMatch[0].charCodeAt(0).toString(HEX_RADIX),
195
+ });
196
+ }
197
+ }
198
+ return issues;
199
+ }
200
+
201
+ // assertNoCharThreats — throws an instance of errorFactory(code, msg)
202
+ // when the text contains a class that's set to "reject" in opts.
203
+ // Opt-name vocabulary: bidiPolicy / nullBytePolicy / controlPolicy
204
+ // (the standard guard-* family naming; older guard-csv uses different
205
+ // names and keeps its inline checks).
206
+ function assertNoCharThreats(text, opts, errorFactory, codePrefix) {
207
+ if (typeof text !== "string") return;
208
+ if (opts && opts.bidiPolicy === "reject" && BIDI_RE.test(text)) { // allow:regex-no-length-cap — caller bounds length before invoking
209
+ throw errorFactory(codePrefix + ".bidi",
210
+ "input contains Unicode bidi override (CVE-2021-42574)");
211
+ }
212
+ if (opts && opts.nullBytePolicy === "reject" && text.indexOf(NULL_BYTE) !== -1) {
213
+ throw errorFactory(codePrefix + ".null-byte",
214
+ "input contains null byte");
215
+ }
216
+ if (opts && opts.controlPolicy === "reject" && C0_CTRL_RE.test(text)) { // allow:regex-no-length-cap — caller bounds length before invoking
217
+ throw errorFactory(codePrefix + ".control",
218
+ "input contains C0 control character");
219
+ }
220
+ }
221
+
222
+ // applyCharStripPolicies — given a text and a policy object, apply
223
+ // strip-mode replacements for each character-class threat. Reads:
224
+ // opts.bidiPolicy === "strip" -> strip BIDI overrides
225
+ // opts.controlPolicy === "strip" -> strip C0 controls
226
+ // opts.nullBytePolicy === "strip" -> strip null bytes
227
+ // opts.zeroWidthPolicy === "strip" -> strip zero-widths
228
+ // Returns the cleaned string. Used by every guard's sanitize path so
229
+ // each one doesn't reinvent the same sequence of replace() calls.
230
+ function applyCharStripPolicies(text, opts) {
231
+ if (typeof text !== "string") return text;
232
+ var out = text;
233
+ if (opts && opts.bidiPolicy === "strip") out = out.replace(BIDI_RE_G, "");
234
+ if (opts && opts.controlPolicy === "strip") out = out.replace(C0_CTRL_RE_G, "");
235
+ if (opts && opts.nullBytePolicy === "strip") out = out.replace(NULL_RE_G, "");
236
+ if (opts && opts.zeroWidthPolicy === "strip") out = out.replace(ZW_RE_G, "");
237
+ return out;
238
+ }
239
+
240
+ module.exports = {
241
+ hex4: hex4,
242
+ charClass: charClass,
243
+ fromCp: fromCp,
244
+ BIDI_RANGES: BIDI_RANGES,
245
+ C0_CTRL_RANGES: C0_CTRL_RANGES,
246
+ ZERO_WIDTH_RANGES: ZERO_WIDTH_RANGES,
247
+ BIDI_RE: BIDI_RE,
248
+ BIDI_RE_G: BIDI_RE_G,
249
+ C0_CTRL_RE: C0_CTRL_RE,
250
+ C0_CTRL_RE_G: C0_CTRL_RE_G,
251
+ ZERO_WIDTH_RE: ZERO_WIDTH_RE,
252
+ ZW_RE_G: ZW_RE_G,
253
+ NULL_RE_G: NULL_RE_G,
254
+ NULL_BYTE: NULL_BYTE,
255
+ BOM_CHAR: BOM_CHAR,
256
+ applyCharStripPolicies: applyCharStripPolicies,
257
+ assertNoCharThreats: assertNoCharThreats,
258
+ detectCharThreats: detectCharThreats,
259
+ SCRIPT_RANGES: SCRIPT_RANGES,
260
+ scriptFor: scriptFor,
261
+ detectMixedScripts: detectMixedScripts,
262
+ };