namespace-guard 0.6.1 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Paul Wood FRSA
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  [![TypeScript](https://img.shields.io/badge/TypeScript-5.0+-blue.svg)](https://www.typescriptlang.org/)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
7
 
8
- **[Live Demo](https://paultendo.github.io/namespace-guard/)** — try it in your browser
8
+ **[Live Demo](https://paultendo.github.io/namespace-guard/)** — try it in your browser | **[Blog Post](https://paultendo.github.io/posts/namespace-guard-launch/)** — why this exists
9
9
 
10
10
  **Check slug/handle uniqueness across multiple database tables with reserved name protection.**
11
11
 
@@ -60,7 +60,7 @@ if (result.available) {
60
60
  // Create the org
61
61
  } else {
62
62
  // Show error: result.message
63
- // e.g., "That name is reserved." or "That name is already in use."
63
+ // e.g., "That name is reserved. Try another one." or "That name is already in use."
64
64
  }
65
65
  ```
66
66
 
@@ -98,6 +98,7 @@ import { createDrizzleAdapter } from "namespace-guard/adapters/drizzle";
98
98
  import { db } from "./db";
99
99
  import { users, organizations } from "./schema";
100
100
 
101
+ // Pass eq directly, or use { eq, ilike } for case-insensitive support
101
102
  const adapter = createDrizzleAdapter(db, { users, organizations }, eq);
102
103
  ```
103
104
 
@@ -296,6 +297,100 @@ const guard = createNamespaceGuard({
296
297
 
297
298
  No words are bundled — use any word list you like (e.g., the `bad-words` npm package, your own list, or an external API wrapped in a custom validator).
298
299
 
300
+ ### Built-in Homoglyph Validator
301
+
302
+ Prevent spoofing attacks where visually similar characters from any Unicode script are substituted for Latin letters (e.g., Cyrillic "а" for Latin "a" in "admin"):
303
+
304
+ ```typescript
305
+ import { createNamespaceGuard, createHomoglyphValidator } from "namespace-guard";
306
+
307
+ const guard = createNamespaceGuard({
308
+ sources: [/* ... */],
309
+ validators: [
310
+ createHomoglyphValidator(),
311
+ ],
312
+ }, adapter);
313
+ ```
314
+
315
+ Options:
316
+
317
+ ```typescript
318
+ createHomoglyphValidator({
319
+ message: "Custom rejection message.", // optional
320
+ additionalMappings: { "\u0261": "g" }, // extend the built-in map
321
+ rejectMixedScript: true, // also reject Latin + non-Latin script mixing
322
+ })
323
+ ```
324
+
325
+ The built-in `CONFUSABLE_MAP` contains 613 character pairs generated from [Unicode TR39 confusables.txt](https://unicode.org/reports/tr39/) plus supplemental Latin small capitals. It covers Cyrillic, Greek, Armenian, Cherokee, IPA, Coptic, Lisu, Canadian Syllabics, Georgian, and 20+ other scripts. The map is exported for inspection or extension, and is regenerable for new Unicode versions with `npx tsx scripts/generate-confusables.ts`.
326
+
327
+ ### How the anti-spoofing pipeline works
328
+
329
+ Most confusable-detection libraries apply a character map in isolation. namespace-guard uses a three-stage pipeline where each stage is aware of the others:
330
+
331
+ ```
332
+ Input → NFKC normalize → Confusable map → Mixed-script reject
333
+ (stage 1) (stage 2) (stage 3)
334
+ ```
335
+
336
+ **Stage 1: NFKC normalization** collapses full-width characters (`I` → `I`), ligatures (`fi` → `fi`), superscripts, and other Unicode compatibility forms to their canonical equivalents. This runs first, before any confusable check.
337
+
338
+ **Stage 2: Confusable map** catches characters that survive NFKC but visually mimic Latin letters — Cyrillic `а` for `a`, Greek `ο` for `o`, Cherokee `Ꭺ` for `A`, and 600+ others from the Unicode Consortium's [confusables.txt](https://unicode.org/Public/security/latest/confusables.txt).
339
+
340
+ **Stage 3: Mixed-script rejection** (`rejectMixedScript: true`) blocks identifiers that mix Latin with non-Latin scripts (Hebrew, Arabic, Devanagari, Thai, Georgian, Ethiopic, etc.) even if the specific characters aren't in the confusable map. This catches novel homoglyphs that the map doesn't cover.
341
+
342
+ #### Why NFKC-aware filtering matters
343
+
344
+ The key insight: TR39's confusables.txt and NFKC normalization sometimes disagree. For example, Unicode says capital `I` (U+0049) is confusable with lowercase `l` — visually true in many fonts. But NFKC maps Mathematical Bold `𝐈` (U+1D408) to `I`, not `l`. If you naively ship the TR39 mapping (`𝐈` → `l`), the confusable check will never see that character — NFKC already converted it to `I` in stage 1.
345
+
346
+ We found 31 entries where this happens:
347
+
348
+ | Character | TR39 says | NFKC says | Winner |
349
+ |-----------|-----------|-----------|--------|
350
+ | `ſ` Long S (U+017F) | `f` | `s` | NFKC (`s` is correct) |
351
+ | `Ⅰ` Roman Numeral I (U+2160) | `l` | `i` | NFKC (`i` is correct) |
352
+ | `I` Fullwidth I (U+FF29) | `l` | `i` | NFKC (`i` is correct) |
353
+ | `𝟎` Math Bold 0 (U+1D7CE) | `o` | `0` | NFKC (`0` is correct) |
354
+ | 11 Mathematical I variants | `l` | `i` | NFKC |
355
+ | 12 Mathematical 0/1 variants | `o`/`l` | `0`/`1` | NFKC |
356
+
357
+ These entries are dead code in any pipeline that runs NFKC first — and worse, they encode the *wrong* mapping. The generate script (`scripts/generate-confusables.ts`) automatically detects and excludes them.
358
+
359
+ ## Unicode Normalization
360
+
361
+ By default, `normalize()` applies [NFKC normalization](https://unicode.org/reports/tr15/) before lowercasing. This collapses full-width characters, ligatures, superscripts, and other Unicode compatibility forms to their canonical equivalents:
362
+
363
+ ```typescript
364
+ normalize("hello"); // "hello" (full-width → ASCII)
365
+ normalize("\ufb01nance"); // "finance" (fi ligature → fi)
366
+ ```
367
+
368
+ NFKC is a no-op for ASCII input and matches what ENS, GitHub, and Unicode IDNA standards mandate. To opt out:
369
+
370
+ ```typescript
371
+ const guard = createNamespaceGuard({
372
+ sources: [/* ... */],
373
+ normalizeUnicode: false,
374
+ }, adapter);
375
+ ```
376
+
377
+ ## Rejecting Purely Numeric Identifiers
378
+
379
+ Twitter/X blocks purely numeric handles. Enable this with `allowPurelyNumeric: false`:
380
+
381
+ ```typescript
382
+ const guard = createNamespaceGuard({
383
+ sources: [/* ... */],
384
+ allowPurelyNumeric: false,
385
+ messages: {
386
+ purelyNumeric: "Handles cannot be all numbers.", // optional custom message
387
+ },
388
+ }, adapter);
389
+
390
+ await guard.check("123456"); // { available: false, reason: "invalid", message: "Handles cannot be all numbers." }
391
+ await guard.check("abc123"); // available (has letters)
392
+ ```
393
+
299
394
  ## Conflict Suggestions
300
395
 
301
396
  When a slug is taken, automatically suggest available alternatives using pluggable strategies:
@@ -512,9 +607,9 @@ Validate format only (no database queries).
512
607
 
513
608
  ---
514
609
 
515
- ### `normalize(identifier)`
610
+ ### `normalize(identifier, options?)`
516
611
 
517
- Utility function to normalize identifiers. Trims whitespace, lowercases, and strips leading `@` symbols.
612
+ Utility function to normalize identifiers. Trims whitespace, applies NFKC Unicode normalization (by default), lowercases, and strips leading `@` symbols. Pass `{ unicode: false }` to skip NFKC.
518
613
 
519
614
  ```typescript
520
615
  import { normalize } from "namespace-guard";
@@ -664,6 +759,8 @@ Full TypeScript support with exported types:
664
759
  import {
665
760
  createNamespaceGuard,
666
761
  createProfanityValidator,
762
+ createHomoglyphValidator,
763
+ CONFUSABLE_MAP,
667
764
  normalize,
668
765
  type NamespaceConfig,
669
766
  type NamespaceSource,
package/dist/cli.js CHANGED
@@ -266,8 +266,10 @@ function resolveGenerator(suggest, pattern) {
266
266
  return result;
267
267
  };
268
268
  }
269
- function normalize(raw) {
270
- return raw.trim().toLowerCase().replace(/^@+/, "");
269
+ function normalize(raw, options) {
270
+ const trimmed = raw.trim();
271
+ const nfkc = options?.unicode ?? true ? trimmed.normalize("NFKC") : trimmed;
272
+ return nfkc.toLowerCase().replace(/^@+/, "");
271
273
  }
272
274
  function buildReservedMap(reserved) {
273
275
  const map = /* @__PURE__ */ new Map();
@@ -291,6 +293,9 @@ function createNamespaceGuard(config, adapter) {
291
293
  const invalidMsg = configMessages.invalid ?? DEFAULT_MESSAGES.invalid;
292
294
  const takenMsg = configMessages.taken ?? DEFAULT_MESSAGES.taken;
293
295
  const validators = config.validators ?? [];
296
+ const normalizeOpts = { unicode: config.normalizeUnicode ?? true };
297
+ const allowPurelyNumeric = config.allowPurelyNumeric ?? true;
298
+ const purelyNumericMsg = configMessages.purelyNumeric ?? "Identifiers cannot be purely numeric.";
294
299
  const cacheEnabled = !!config.cache;
295
300
  const cacheTtl = config.cache?.ttl ?? 5e3;
296
301
  const cacheMaxSize = 1e3;
@@ -324,38 +329,44 @@ function createNamespaceGuard(config, adapter) {
324
329
  return defaultReservedMsg;
325
330
  }
326
331
  function validateFormat(identifier) {
327
- const normalized = normalize(identifier);
332
+ const normalized = normalize(identifier, normalizeOpts);
328
333
  if (!pattern.test(normalized)) {
329
334
  return invalidMsg;
330
335
  }
336
+ if (!allowPurelyNumeric && /^\d+(-\d+)*$/.test(normalized)) {
337
+ return purelyNumericMsg;
338
+ }
331
339
  if (reservedMap.has(normalized)) {
332
340
  return getReservedMessage(reservedMap.get(normalized));
333
341
  }
334
342
  return null;
335
343
  }
344
+ function isOwnedByScope(existing, source, scope) {
345
+ if (!source.scopeKey) return false;
346
+ const scopeValue = scope[source.scopeKey];
347
+ const idColumn = source.idColumn ?? "id";
348
+ const existingId = existing[idColumn];
349
+ return !!(scopeValue && existingId && scopeValue === String(existingId));
350
+ }
336
351
  async function checkDbOnly(value, scope) {
337
352
  const findOptions = config.caseInsensitive ? { caseInsensitive: true } : void 0;
338
353
  const checks = config.sources.map(async (source) => {
339
354
  const existing = await cachedFindOne(source, value, findOptions);
340
355
  if (!existing) return null;
341
- if (source.scopeKey) {
342
- const scopeValue = scope[source.scopeKey];
343
- const idColumn = source.idColumn ?? "id";
344
- const existingId = existing[idColumn];
345
- if (scopeValue && existingId && scopeValue === String(existingId)) {
346
- return null;
347
- }
348
- }
356
+ if (isOwnedByScope(existing, source, scope)) return null;
349
357
  return source.name;
350
358
  });
351
359
  const results = await Promise.all(checks);
352
360
  return !results.some((r) => r !== null);
353
361
  }
354
362
  async function check(identifier, scope = {}, options) {
355
- const normalized = normalize(identifier);
363
+ const normalized = normalize(identifier, normalizeOpts);
356
364
  if (!pattern.test(normalized)) {
357
365
  return { available: false, reason: "invalid", message: invalidMsg };
358
366
  }
367
+ if (!allowPurelyNumeric && /^\d+(-\d+)*$/.test(normalized)) {
368
+ return { available: false, reason: "invalid", message: purelyNumericMsg };
369
+ }
359
370
  const reservedCategory = reservedMap.get(normalized);
360
371
  if (reservedCategory) {
361
372
  return {
@@ -380,14 +391,7 @@ function createNamespaceGuard(config, adapter) {
380
391
  const checks = config.sources.map(async (source) => {
381
392
  const existing = await cachedFindOne(source, normalized, findOptions);
382
393
  if (!existing) return null;
383
- if (source.scopeKey) {
384
- const scopeValue = scope[source.scopeKey];
385
- const idColumn = source.idColumn ?? "id";
386
- const existingId = existing[idColumn];
387
- if (scopeValue && existingId && scopeValue === String(existingId)) {
388
- return null;
389
- }
390
- }
394
+ if (isOwnedByScope(existing, source, scope)) return null;
391
395
  return source.name;
392
396
  });
393
397
  const results = await Promise.all(checks);
@@ -405,7 +409,7 @@ function createNamespaceGuard(config, adapter) {
405
409
  const candidates = generate(normalized);
406
410
  const suggestions = [];
407
411
  const passedSync = candidates.filter(
408
- (c) => pattern.test(c) && !reservedMap.has(c)
412
+ (c) => pattern.test(c) && !reservedMap.has(c) && (allowPurelyNumeric || !/^\d+(-\d+)*$/.test(c))
409
413
  );
410
414
  for (let i = 0; i < passedSync.length && suggestions.length < max; i += max) {
411
415
  const batch = passedSync.slice(i, i + max);
package/dist/cli.mjs CHANGED
@@ -243,8 +243,10 @@ function resolveGenerator(suggest, pattern) {
243
243
  return result;
244
244
  };
245
245
  }
246
- function normalize(raw) {
247
- return raw.trim().toLowerCase().replace(/^@+/, "");
246
+ function normalize(raw, options) {
247
+ const trimmed = raw.trim();
248
+ const nfkc = options?.unicode ?? true ? trimmed.normalize("NFKC") : trimmed;
249
+ return nfkc.toLowerCase().replace(/^@+/, "");
248
250
  }
249
251
  function buildReservedMap(reserved) {
250
252
  const map = /* @__PURE__ */ new Map();
@@ -268,6 +270,9 @@ function createNamespaceGuard(config, adapter) {
268
270
  const invalidMsg = configMessages.invalid ?? DEFAULT_MESSAGES.invalid;
269
271
  const takenMsg = configMessages.taken ?? DEFAULT_MESSAGES.taken;
270
272
  const validators = config.validators ?? [];
273
+ const normalizeOpts = { unicode: config.normalizeUnicode ?? true };
274
+ const allowPurelyNumeric = config.allowPurelyNumeric ?? true;
275
+ const purelyNumericMsg = configMessages.purelyNumeric ?? "Identifiers cannot be purely numeric.";
271
276
  const cacheEnabled = !!config.cache;
272
277
  const cacheTtl = config.cache?.ttl ?? 5e3;
273
278
  const cacheMaxSize = 1e3;
@@ -301,38 +306,44 @@ function createNamespaceGuard(config, adapter) {
301
306
  return defaultReservedMsg;
302
307
  }
303
308
  function validateFormat(identifier) {
304
- const normalized = normalize(identifier);
309
+ const normalized = normalize(identifier, normalizeOpts);
305
310
  if (!pattern.test(normalized)) {
306
311
  return invalidMsg;
307
312
  }
313
+ if (!allowPurelyNumeric && /^\d+(-\d+)*$/.test(normalized)) {
314
+ return purelyNumericMsg;
315
+ }
308
316
  if (reservedMap.has(normalized)) {
309
317
  return getReservedMessage(reservedMap.get(normalized));
310
318
  }
311
319
  return null;
312
320
  }
321
+ function isOwnedByScope(existing, source, scope) {
322
+ if (!source.scopeKey) return false;
323
+ const scopeValue = scope[source.scopeKey];
324
+ const idColumn = source.idColumn ?? "id";
325
+ const existingId = existing[idColumn];
326
+ return !!(scopeValue && existingId && scopeValue === String(existingId));
327
+ }
313
328
  async function checkDbOnly(value, scope) {
314
329
  const findOptions = config.caseInsensitive ? { caseInsensitive: true } : void 0;
315
330
  const checks = config.sources.map(async (source) => {
316
331
  const existing = await cachedFindOne(source, value, findOptions);
317
332
  if (!existing) return null;
318
- if (source.scopeKey) {
319
- const scopeValue = scope[source.scopeKey];
320
- const idColumn = source.idColumn ?? "id";
321
- const existingId = existing[idColumn];
322
- if (scopeValue && existingId && scopeValue === String(existingId)) {
323
- return null;
324
- }
325
- }
333
+ if (isOwnedByScope(existing, source, scope)) return null;
326
334
  return source.name;
327
335
  });
328
336
  const results = await Promise.all(checks);
329
337
  return !results.some((r) => r !== null);
330
338
  }
331
339
  async function check(identifier, scope = {}, options) {
332
- const normalized = normalize(identifier);
340
+ const normalized = normalize(identifier, normalizeOpts);
333
341
  if (!pattern.test(normalized)) {
334
342
  return { available: false, reason: "invalid", message: invalidMsg };
335
343
  }
344
+ if (!allowPurelyNumeric && /^\d+(-\d+)*$/.test(normalized)) {
345
+ return { available: false, reason: "invalid", message: purelyNumericMsg };
346
+ }
336
347
  const reservedCategory = reservedMap.get(normalized);
337
348
  if (reservedCategory) {
338
349
  return {
@@ -357,14 +368,7 @@ function createNamespaceGuard(config, adapter) {
357
368
  const checks = config.sources.map(async (source) => {
358
369
  const existing = await cachedFindOne(source, normalized, findOptions);
359
370
  if (!existing) return null;
360
- if (source.scopeKey) {
361
- const scopeValue = scope[source.scopeKey];
362
- const idColumn = source.idColumn ?? "id";
363
- const existingId = existing[idColumn];
364
- if (scopeValue && existingId && scopeValue === String(existingId)) {
365
- return null;
366
- }
367
- }
371
+ if (isOwnedByScope(existing, source, scope)) return null;
368
372
  return source.name;
369
373
  });
370
374
  const results = await Promise.all(checks);
@@ -382,7 +386,7 @@ function createNamespaceGuard(config, adapter) {
382
386
  const candidates = generate(normalized);
383
387
  const suggestions = [];
384
388
  const passedSync = candidates.filter(
385
- (c) => pattern.test(c) && !reservedMap.has(c)
389
+ (c) => pattern.test(c) && !reservedMap.has(c) && (allowPurelyNumeric || !/^\d+(-\d+)*$/.test(c))
386
390
  );
387
391
  for (let i = 0; i < passedSync.length && suggestions.length < max; i += max) {
388
392
  const batch = passedSync.slice(i, i + max);
package/dist/index.d.mts CHANGED
@@ -21,11 +21,19 @@ type NamespaceConfig = {
21
21
  pattern?: RegExp;
22
22
  /** Use case-insensitive matching in database queries (default: false) */
23
23
  caseInsensitive?: boolean;
24
+ /** Apply NFKC Unicode normalization during normalize() (default: true).
25
+ * Collapses full-width characters, ligatures, and compatibility forms to their canonical equivalents. */
26
+ normalizeUnicode?: boolean;
27
+ /** Allow purely numeric identifiers like "123" or "12-34" (default: true).
28
+ * Set to false to reject them, matching Twitter/X handle rules. */
29
+ allowPurelyNumeric?: boolean;
24
30
  /** Custom error messages */
25
31
  messages?: {
26
32
  invalid?: string;
27
33
  reserved?: string | Record<string, string>;
28
34
  taken?: (sourceName: string) => string;
35
+ /** Message shown when a purely numeric identifier is rejected (default: "Identifiers cannot be purely numeric.") */
36
+ purelyNumeric?: string;
29
37
  };
30
38
  /** Async validation hooks — run after format/reserved checks, before DB */
31
39
  validators?: Array<(value: string) => Promise<{
@@ -73,18 +81,28 @@ type CheckResult = {
73
81
  suggestions?: string[];
74
82
  };
75
83
  /**
76
- * Normalize a raw identifier: trims whitespace, lowercases, and strips leading `@` symbols.
84
+ * Normalize a raw identifier: trims whitespace, applies NFKC Unicode normalization,
85
+ * lowercases, and strips leading `@` symbols.
86
+ *
87
+ * NFKC normalization collapses full-width characters, ligatures, superscripts,
88
+ * and other compatibility forms to their canonical equivalents. This is a no-op
89
+ * for ASCII-only input.
77
90
  *
78
91
  * @param raw - The raw user input
92
+ * @param options - Optional settings
93
+ * @param options.unicode - Apply NFKC Unicode normalization (default: true)
79
94
  * @returns The normalized identifier
80
95
  *
81
96
  * @example
82
97
  * ```ts
83
98
  * normalize(" @Sarah "); // "sarah"
84
99
  * normalize("ACME-Corp"); // "acme-corp"
100
+ * normalize("\uff48\uff45\uff4c\uff4c\uff4f"); // "hello" (full-width → ASCII)
85
101
  * ```
86
102
  */
87
- declare function normalize(raw: string): string;
103
+ declare function normalize(raw: string, options?: {
104
+ unicode?: boolean;
105
+ }): string;
88
106
  /**
89
107
  * Create a validator that rejects identifiers containing profanity or offensive words.
90
108
  *
@@ -117,6 +135,48 @@ declare function createProfanityValidator(words: string[], options?: {
117
135
  available: false;
118
136
  message: string;
119
137
  } | null>;
138
+ /**
139
+ * Mapping of visually confusable Unicode characters to their Latin/digit equivalents.
140
+ * Generated from Unicode TR39 confusables.txt + supplemental Latin small capitals.
141
+ * Covers every single-character mapping to a lowercase Latin letter or digit,
142
+ * excluding characters already handled by NFKC normalization (either collapsed
143
+ * to the same target, or mapped to a different valid Latin char/digit).
144
+ * Regenerate: `npx tsx scripts/generate-confusables.ts`
145
+ */
146
+ declare const CONFUSABLE_MAP: Record<string, string>;
147
+ /**
148
+ * Create a validator that rejects identifiers containing homoglyph/confusable characters.
149
+ *
150
+ * Catches spoofing attacks where characters from other scripts are substituted for
151
+ * visually identical Latin characters (e.g., Cyrillic "а" for Latin "a" in "admin").
152
+ * Uses a comprehensive mapping of 613 character pairs generated from Unicode TR39
153
+ * confusables.txt, covering Cyrillic, Greek, Armenian, Cherokee, IPA, Latin small
154
+ * capitals, Canadian Syllabics, Georgian, Lisu, Coptic, and many other scripts.
155
+ *
156
+ * @param options - Optional settings
157
+ * @param options.message - Custom rejection message (default: "That name contains characters that could be confused with other letters.")
158
+ * @param options.additionalMappings - Extra confusable pairs to merge with the built-in map
159
+ * @param options.rejectMixedScript - Also reject identifiers that mix Latin with non-Latin characters from any covered script (Cyrillic, Greek, Armenian, Hebrew, Arabic, Georgian, Cherokee, Canadian Syllabics, Ethiopic, Coptic, Lisu, and more) (default: false)
160
+ * @returns An async validator function for use in `config.validators`
161
+ *
162
+ * @example
163
+ * ```ts
164
+ * const guard = createNamespaceGuard({
165
+ * sources: [{ name: "user", column: "handle" }],
166
+ * validators: [
167
+ * createHomoglyphValidator(),
168
+ * ],
169
+ * }, adapter);
170
+ * ```
171
+ */
172
+ declare function createHomoglyphValidator(options?: {
173
+ message?: string;
174
+ additionalMappings?: Record<string, string>;
175
+ rejectMixedScript?: boolean;
176
+ }): (value: string) => Promise<{
177
+ available: false;
178
+ message: string;
179
+ } | null>;
120
180
  /**
121
181
  * Create a namespace guard instance for checking slug/handle uniqueness
122
182
  * across multiple database tables with reserved name protection.
@@ -165,4 +225,4 @@ declare function createNamespaceGuard(config: NamespaceConfig, adapter: Namespac
165
225
  /** The guard instance returned by `createNamespaceGuard`. */
166
226
  type NamespaceGuard = ReturnType<typeof createNamespaceGuard>;
167
227
 
168
- export { type CheckResult, type FindOneOptions, type NamespaceAdapter, type NamespaceConfig, type NamespaceGuard, type NamespaceSource, type OwnershipScope, type SuggestStrategyName, createNamespaceGuard, createProfanityValidator, normalize };
228
+ export { CONFUSABLE_MAP, type CheckResult, type FindOneOptions, type NamespaceAdapter, type NamespaceConfig, type NamespaceGuard, type NamespaceSource, type OwnershipScope, type SuggestStrategyName, createHomoglyphValidator, createNamespaceGuard, createProfanityValidator, normalize };
package/dist/index.d.ts CHANGED
@@ -21,11 +21,19 @@ type NamespaceConfig = {
21
21
  pattern?: RegExp;
22
22
  /** Use case-insensitive matching in database queries (default: false) */
23
23
  caseInsensitive?: boolean;
24
+ /** Apply NFKC Unicode normalization during normalize() (default: true).
25
+ * Collapses full-width characters, ligatures, and compatibility forms to their canonical equivalents. */
26
+ normalizeUnicode?: boolean;
27
+ /** Allow purely numeric identifiers like "123" or "12-34" (default: true).
28
+ * Set to false to reject them, matching Twitter/X handle rules. */
29
+ allowPurelyNumeric?: boolean;
24
30
  /** Custom error messages */
25
31
  messages?: {
26
32
  invalid?: string;
27
33
  reserved?: string | Record<string, string>;
28
34
  taken?: (sourceName: string) => string;
35
+ /** Message shown when a purely numeric identifier is rejected (default: "Identifiers cannot be purely numeric.") */
36
+ purelyNumeric?: string;
29
37
  };
30
38
  /** Async validation hooks — run after format/reserved checks, before DB */
31
39
  validators?: Array<(value: string) => Promise<{
@@ -73,18 +81,28 @@ type CheckResult = {
73
81
  suggestions?: string[];
74
82
  };
75
83
  /**
76
- * Normalize a raw identifier: trims whitespace, lowercases, and strips leading `@` symbols.
84
+ * Normalize a raw identifier: trims whitespace, applies NFKC Unicode normalization,
85
+ * lowercases, and strips leading `@` symbols.
86
+ *
87
+ * NFKC normalization collapses full-width characters, ligatures, superscripts,
88
+ * and other compatibility forms to their canonical equivalents. This is a no-op
89
+ * for ASCII-only input.
77
90
  *
78
91
  * @param raw - The raw user input
92
+ * @param options - Optional settings
93
+ * @param options.unicode - Apply NFKC Unicode normalization (default: true)
79
94
  * @returns The normalized identifier
80
95
  *
81
96
  * @example
82
97
  * ```ts
83
98
  * normalize(" @Sarah "); // "sarah"
84
99
  * normalize("ACME-Corp"); // "acme-corp"
100
+ * normalize("\uff48\uff45\uff4c\uff4c\uff4f"); // "hello" (full-width → ASCII)
85
101
  * ```
86
102
  */
87
- declare function normalize(raw: string): string;
103
+ declare function normalize(raw: string, options?: {
104
+ unicode?: boolean;
105
+ }): string;
88
106
  /**
89
107
  * Create a validator that rejects identifiers containing profanity or offensive words.
90
108
  *
@@ -117,6 +135,48 @@ declare function createProfanityValidator(words: string[], options?: {
117
135
  available: false;
118
136
  message: string;
119
137
  } | null>;
138
+ /**
139
+ * Mapping of visually confusable Unicode characters to their Latin/digit equivalents.
140
+ * Generated from Unicode TR39 confusables.txt + supplemental Latin small capitals.
141
+ * Covers every single-character mapping to a lowercase Latin letter or digit,
142
+ * excluding characters already handled by NFKC normalization (either collapsed
143
+ * to the same target, or mapped to a different valid Latin char/digit).
144
+ * Regenerate: `npx tsx scripts/generate-confusables.ts`
145
+ */
146
+ declare const CONFUSABLE_MAP: Record<string, string>;
147
+ /**
148
+ * Create a validator that rejects identifiers containing homoglyph/confusable characters.
149
+ *
150
+ * Catches spoofing attacks where characters from other scripts are substituted for
151
+ * visually identical Latin characters (e.g., Cyrillic "а" for Latin "a" in "admin").
152
+ * Uses a comprehensive mapping of 613 character pairs generated from Unicode TR39
153
+ * confusables.txt, covering Cyrillic, Greek, Armenian, Cherokee, IPA, Latin small
154
+ * capitals, Canadian Syllabics, Georgian, Lisu, Coptic, and many other scripts.
155
+ *
156
+ * @param options - Optional settings
157
+ * @param options.message - Custom rejection message (default: "That name contains characters that could be confused with other letters.")
158
+ * @param options.additionalMappings - Extra confusable pairs to merge with the built-in map
159
+ * @param options.rejectMixedScript - Also reject identifiers that mix Latin with non-Latin characters from any covered script (Cyrillic, Greek, Armenian, Hebrew, Arabic, Georgian, Cherokee, Canadian Syllabics, Ethiopic, Coptic, Lisu, and more) (default: false)
160
+ * @returns An async validator function for use in `config.validators`
161
+ *
162
+ * @example
163
+ * ```ts
164
+ * const guard = createNamespaceGuard({
165
+ * sources: [{ name: "user", column: "handle" }],
166
+ * validators: [
167
+ * createHomoglyphValidator(),
168
+ * ],
169
+ * }, adapter);
170
+ * ```
171
+ */
172
+ declare function createHomoglyphValidator(options?: {
173
+ message?: string;
174
+ additionalMappings?: Record<string, string>;
175
+ rejectMixedScript?: boolean;
176
+ }): (value: string) => Promise<{
177
+ available: false;
178
+ message: string;
179
+ } | null>;
120
180
  /**
121
181
  * Create a namespace guard instance for checking slug/handle uniqueness
122
182
  * across multiple database tables with reserved name protection.
@@ -165,4 +225,4 @@ declare function createNamespaceGuard(config: NamespaceConfig, adapter: Namespac
165
225
  /** The guard instance returned by `createNamespaceGuard`. */
166
226
  type NamespaceGuard = ReturnType<typeof createNamespaceGuard>;
167
227
 
168
- export { type CheckResult, type FindOneOptions, type NamespaceAdapter, type NamespaceConfig, type NamespaceGuard, type NamespaceSource, type OwnershipScope, type SuggestStrategyName, createNamespaceGuard, createProfanityValidator, normalize };
228
+ export { CONFUSABLE_MAP, type CheckResult, type FindOneOptions, type NamespaceAdapter, type NamespaceConfig, type NamespaceGuard, type NamespaceSource, type OwnershipScope, type SuggestStrategyName, createHomoglyphValidator, createNamespaceGuard, createProfanityValidator, normalize };