npm - zero-contact - Versions diffs - 0.1.0 - Mend

zero-contact 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +108 -0
package/dist/index.cjs +572 -0
package/dist/index.cjs.map +1 -0
package/dist/index.d.cts +12 -0
package/dist/index.d.ts +12 -0
package/dist/index.js +566 -0
package/dist/index.js.map +1 -0
package/dist/locales/en.cjs +48 -0
package/dist/locales/en.cjs.map +1 -0
package/dist/locales/en.d.cts +5 -0
package/dist/locales/en.d.ts +5 -0
package/dist/locales/en.js +46 -0
package/dist/locales/en.js.map +1 -0
package/dist/locales/index.cjs +79 -0
package/dist/locales/index.cjs.map +1 -0
package/dist/locales/index.d.cts +3 -0
package/dist/locales/index.d.ts +3 -0
package/dist/locales/index.js +76 -0
package/dist/locales/index.js.map +1 -0
package/dist/locales/tr.cjs +55 -0
package/dist/locales/tr.cjs.map +1 -0
package/dist/locales/tr.d.cts +5 -0
package/dist/locales/tr.d.ts +5 -0
package/dist/locales/tr.js +53 -0
package/dist/locales/tr.js.map +1 -0
package/dist/types-C4A9Bbv7.d.cts +30 -0
package/dist/types-C4A9Bbv7.d.ts +30 -0
package/package.json +97 -0

package/README.md ADDED Viewed

@@ -0,0 +1,108 @@
+# zero-contact
+Detect obfuscated phone numbers in user-generated text. Built for moderation pipelines where contact sharing is restricted.
+Supports numeric, word-spelled, emoji, homoglyph, and mixed obfuscation formats in Turkish and English.
+## Install
+```bash
+npm install zero-contact
+```
+## Quick start
+```ts
+import { detect, mask, quickCheck } from 'zero-contact';
+quickCheck('hello world'); // false
+detect('0532 123 45 67');
+// { detected: true, matches: [{ normalized: '05321234567', confidence: 0.9, ... }] }
+detect('beş üç iki bir iki üç dört beş altı yedi');
+// { detected: true, matches: [{ normalized: '5321234567', types: ['word'], ... }] }
+mask('Arayın: 0532 123 45 67');
+// 'Arayın: #### ### ## ##'
+```
+## API
+### `quickCheck(text, options?)`
+Fast pre-scan. Returns `true` when the text may contain a phone number. Use this as a cheap gate before `detect()`.
+### `detect(text, options?)`
+Full pipeline. Returns `{ detected, matches }` with confidence scores and source ranges.
+### `mask(text, options?)`
+Detects and masks phone-like sequences while preserving string length. Spaces are kept; other characters are replaced with `#` by default.
+### Options
+| Option | Default | Description |
+|---|---|---|
+| `locales` | `[tr, en]` | Locale dictionaries for word-digit matching |
+| `minDigits` | `7` | Minimum consecutive digits to flag |
+| `minConfidence` | `0.6` | Minimum confidence score (0–1) |
+| `countryHint` | `'TR'` | Scoring hint for national formats |
+| `char` | `'#'` | Mask character (`mask` only) |
+## Tree-shakeable locales
+Import only the locales you need:
+```ts
+import { detect } from 'zero-contact';
+import { tr } from 'zero-contact/locales/tr';
+detect('beş üç iki bir iki üç dört beş altı yedi', { locales: [tr] });
+```
+## What it detects
+- Plain numbers: `0532 123 45 67`, `+90 532 123 45 67`
+- Unicode digits: `٠٥٣٢ ١٢٣ ٤٥ ٦٧`
+- Word spelling: `five three two...`, `beş üç iki...`
+- Emoji keycaps: `5️⃣3️⃣2️⃣1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣`
+- Circled digits: `⑤③②①②③④⑤⑥⑦`
+- Homoglyphs: `O5³2 l23 4567`
+- Mixed obfuscation: `beş 5 üç iki bir...`
+- Multipliers: `double five` → `55`, `çift bir` → `11`
+## False-positive guards
+Filters out common non-phone patterns:
+- Dates: `01/02/2024`
+- Times: `14:30`
+- IPv4 addresses: `192.168.1.1`
+- Isolated digit words in prose: `bu ürün beş yıldız`
+## Performance
+Benchmarks on a typical dev machine:
+| Scenario | ~Mean |
+|---|---|
+| `quickCheck` on clean 200-char comment | 0.017 ms |
+| `detect` on obfuscated phone | 0.043 ms |
+| `detect` on 2 KB text | 1.4 ms |
+Zero runtime dependencies.
+## Development
+```bash
+npm install
+npm test
+npm run bench
+npm run build
+```
+## License
+MIT

package/dist/index.cjs ADDED Viewed

@@ -0,0 +1,572 @@
+'use strict';
+// src/match/false-positives.ts
+var DATE_PATTERNS = [
+  /\b\d{1,2}[/.-]\d{1,2}[/.-]\d{2,4}\b/,
+  /\b\d{4}[/.-]\d{1,2}[/.-]\d{1,2}\b/
+];
+var TIME_PATTERNS = [
+  /\b\d{1,2}:\d{2}(?::\d{2})?\b/,
+  /\b\d{1,2}\.\d{2}\b/
+];
+var IPV4_PATTERN = /\b(?:\d{1,3}\.){3}\d{1,3}\b/;
+function isFalsePositive(sequence, sourceText) {
+  const snippet = sourceText.slice(sequence.start, sequence.end);
+  for (const pattern of DATE_PATTERNS) {
+    if (pattern.test(snippet)) {
+      return true;
+    }
+  }
+  for (const pattern of TIME_PATTERNS) {
+    if (pattern.test(snippet)) {
+      return true;
+    }
+  }
+  if (IPV4_PATTERN.test(snippet)) {
+    return true;
+  }
+  if (/^(\d)\1{11,}$/.test(sequence.normalized)) {
+    return true;
+  }
+  return false;
+}
+// src/match/scorer.ts
+function isTrMobile(normalized) {
+  const digits = normalized.replace(/\D/g, "");
+  if (digits.length === 10 && /^5\d{9}$/.test(digits)) {
+    return true;
+  }
+  if (digits.length === 11 && /^05\d{9}$/.test(digits)) {
+    return true;
+  }
+  if (digits.length === 12 && /^905\d{9}$/.test(digits)) {
+    return true;
+  }
+  if (digits.length === 13 && /^\+?905\d{9}$/.test(digits)) {
+    return true;
+  }
+  return false;
+}
+function hasPrefix(sourceText, sequence) {
+  const before = sourceText.slice(Math.max(0, sequence.start - 4), sequence.start);
+  return /\+/.test(before) || /^0/.test(sequence.normalized);
+}
+function countObfuscationTypes(types) {
+  if (types.includes("mixed")) {
+    return 3;
+  }
+  return types.length;
+}
+function scoreSequence(sequence, sourceText, options) {
+  let score = 0.35;
+  const digits = sequence.normalized;
+  const digitCount = digits.length;
+  const countryHint = options.countryHint ?? "TR";
+  if (countryHint === "TR" && isTrMobile(digits)) {
+    score += 0.4;
+  } else if (digitCount >= 10 && digitCount <= 15) {
+    score += 0.25;
+  } else if (digitCount >= 7) {
+    score += 0.15;
+  }
+  if (hasPrefix(sourceText, sequence)) {
+    score += 0.15;
+  }
+  const typeCount = countObfuscationTypes(sequence.types);
+  if (typeCount >= 2 || sequence.types.includes("mixed")) {
+    score += 0.2;
+  } else if (sequence.types[0] !== "numeric") {
+    score += 0.15;
+  }
+  if (sequence.types.length === 1 && sequence.types[0] === "numeric" && digitCount === 10) {
+    score += 0.1;
+  }
+  if (digitCount < 8) {
+    score -= 0.15;
+  }
+  return Math.max(0, Math.min(1, score));
+}
+// src/locales/types.ts
+var SEPARATORS = /* @__PURE__ */ new Set([" ", "	", "-", ".", ",", "/", "(", ")", "+", "\xA0"]);
+var DEFAULT_MIN_DIGITS = 7;
+var DEFAULT_MIN_CONFIDENCE = 0.6;
+// src/normalize/emoji.ts
+var CIRCLED_DIGITS = {
+  9450: "0",
+  9312: "1",
+  9313: "2",
+  9314: "3",
+  9315: "4",
+  9316: "5",
+  9317: "6",
+  9318: "7",
+  9319: "8",
+  9320: "9",
+  10102: "1",
+  10103: "2",
+  10104: "3",
+  10105: "4",
+  10106: "5",
+  10107: "6",
+  10108: "7",
+  10109: "8",
+  10110: "9"
+};
+function isKeycapStart(text, index) {
+  const char = text[index];
+  if (!char || !/[0-9#*]/.test(char)) return null;
+  let consumed = 1;
+  if (text[index + 1] === "\uFE0F") {
+    consumed += 1;
+  }
+  if (text[index + consumed] === "\u20E3") {
+    consumed += 1;
+    if (char === "#" || char === "*") return null;
+    return consumed;
+  }
+  return null;
+}
+function circledDigitToAscii(text, index) {
+  const code = text.codePointAt(index);
+  if (code === void 0) return null;
+  const digit = CIRCLED_DIGITS[code];
+  if (!digit) return null;
+  const length = code > 65535 ? 2 : 1;
+  return { digit, length };
+}
+var EMOJI_PRESCAN = /(?:[0-9#*]\uFE0F?\u20E3|[\u2460-\u2468\u24EA\u2776-\u277E])/u;
+// src/normalize/homoglyph.ts
+var HOMOGLYPH_MAP = {
+  O: "0",
+  o: "0",
+  l: "1",
+  I: "1",
+  i: "1",
+  "|": "1",
+  "!": "1",
+  S: "5",
+  s: "5",
+  B: "8",
+  Z: "2",
+  z: "2",
+  g: "9",
+  q: "9"
+};
+function homoglyphToDigit(char) {
+  return HOMOGLYPH_MAP[char] ?? null;
+}
+function isDigitLikeContext(text, index) {
+  const prev = text[index - 1];
+  const next = text[index + 1];
+  const prevIsDigitLike = prev !== void 0 && (/[0-9]/.test(prev) || homoglyphToDigit(prev) !== null || /[\s\-.,/()+]/.test(prev));
+  const nextIsDigitLike = next !== void 0 && (/[0-9]/.test(next) || homoglyphToDigit(next) !== null || /[\s\-.,/()+]/.test(next));
+  return prevIsDigitLike || nextIsDigitLike;
+}
+// src/normalize/unicode.ts
+var INVISIBLE_CHARS = /[\u200B-\u200D\uFEFF\u2060\u180E]/g;
+function nfkc(text) {
+  return text.normalize("NFKC");
+}
+function stripInvisible(text) {
+  return text.replace(INVISIBLE_CHARS, "");
+}
+function unicodeDigitToAscii(char) {
+  const code = char.codePointAt(0);
+  if (code === void 0) return null;
+  if (code >= 48 && code <= 57) {
+    return char;
+  }
+  if (code >= 65296 && code <= 65305) {
+    return String.fromCharCode(code - 65296 + 48);
+  }
+  if (code >= 1632 && code <= 1641) {
+    return String.fromCharCode(code - 1632 + 48);
+  }
+  if (code >= 1776 && code <= 1785) {
+    return String.fromCharCode(code - 1776 + 48);
+  }
+  if (code >= 2406 && code <= 2415) {
+    return String.fromCharCode(code - 2406 + 48);
+  }
+  const superscriptMap = {
+    8304: "0",
+    185: "1",
+    178: "2",
+    179: "3",
+    8308: "4",
+    8309: "5",
+    8310: "6",
+    8311: "7",
+    8312: "8",
+    8313: "9"
+  };
+  const superscript = superscriptMap[code];
+  if (superscript) {
+    return superscript;
+  }
+  return null;
+}
+function foldTurkish(text) {
+  return text.replace(/ı/g, "i").replace(/İ/g, "i").replace(/ş/g, "s").replace(/Ş/g, "s").replace(/ğ/g, "g").replace(/Ğ/g, "g").replace(/ü/g, "u").replace(/Ü/g, "u").replace(/ö/g, "o").replace(/Ö/g, "o").replace(/ç/g, "c").replace(/Ç/g, "c");
+}
+function normalizeForLookup(text) {
+  return foldTurkish(nfkc(stripInvisible(text)).toLowerCase());
+}
+// src/normalize/word-digits.ts
+function buildLookup(locales) {
+  const digitWords = /* @__PURE__ */ new Map();
+  const multipliers = /* @__PURE__ */ new Map();
+  for (const locale of locales) {
+    for (const [word, digit] of locale.digitWords) {
+      digitWords.set(normalizeForLookup(word), digit);
+    }
+    for (const [word, count] of locale.multipliers) {
+      multipliers.set(normalizeForLookup(word), count);
+    }
+  }
+  const words = [.../* @__PURE__ */ new Set([...digitWords.keys(), ...multipliers.keys()])].sort((a, b) => b.length - a.length).map((word) => word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
+  const wordPattern = new RegExp(`\\b(?:${words.join("|")})\\b`, "giu");
+  return { digitWords, multipliers, wordPattern };
+}
+function isSeparator(char) {
+  return /[\s\-.,/()+]/.test(char) || char === "\xA0";
+}
+function isWordChar(char) {
+  return /[\p{L}_]/u.test(char);
+}
+function pushDigit(tokens, digit, start, end, type, repeat = 1) {
+  for (let i = 0; i < repeat; i += 1) {
+    tokens.push({ digit, start, end, type });
+  }
+}
+function tokenizeDigits(text, locales) {
+  const lookup = buildLookup(locales);
+  const tokens = [];
+  let pendingRepeat = 1;
+  let index = 0;
+  while (index < text.length) {
+    const keycapLength = isKeycapStart(text, index);
+    if (keycapLength !== null) {
+      const digit = text[index];
+      pushDigit(tokens, digit, index, index + keycapLength, "emoji", pendingRepeat);
+      pendingRepeat = 1;
+      index += keycapLength;
+      continue;
+    }
+    const circled = circledDigitToAscii(text, index);
+    if (circled) {
+      pushDigit(tokens, circled.digit, index, index + circled.length, "emoji", pendingRepeat);
+      pendingRepeat = 1;
+      index += circled.length;
+      continue;
+    }
+    const char = text[index];
+    if (isWordChar(char)) {
+      let end = index + 1;
+      while (end < text.length && isWordChar(text[end])) {
+        end += 1;
+      }
+      if (end === index + 1) {
+        const homoglyph2 = homoglyphToDigit(char);
+        if (homoglyph2 && isDigitLikeContext(text, index)) {
+          pushDigit(tokens, homoglyph2, index, index + 1, "homoglyph", pendingRepeat);
+          pendingRepeat = 1;
+          index += 1;
+          continue;
+        }
+      }
+      const rawWord = text.slice(index, end);
+      const normalizedWord = normalizeForLookup(rawWord);
+      const multiplier = lookup.multipliers.get(normalizedWord);
+      if (multiplier !== void 0) {
+        pendingRepeat = multiplier;
+        index = end;
+        continue;
+      }
+      const digit = lookup.digitWords.get(normalizedWord);
+      if (digit !== void 0) {
+        pushDigit(tokens, digit, index, end, "word", pendingRepeat);
+        pendingRepeat = 1;
+        index = end;
+        continue;
+      }
+      index = end;
+      continue;
+    }
+    const asciiDigit = unicodeDigitToAscii(char);
+    if (asciiDigit) {
+      pushDigit(tokens, asciiDigit, index, index + 1, "numeric", pendingRepeat);
+      pendingRepeat = 1;
+      index += 1;
+      continue;
+    }
+    const homoglyph = homoglyphToDigit(char);
+    if (homoglyph && isDigitLikeContext(text, index)) {
+      pushDigit(tokens, homoglyph, index, index + 1, "homoglyph", pendingRepeat);
+      pendingRepeat = 1;
+      index += 1;
+      continue;
+    }
+    if (isSeparator(char)) {
+      index += 1;
+      continue;
+    }
+    pendingRepeat = 1;
+    index += 1;
+  }
+  return tokens;
+}
+function hasLocaleWordSignal(text, locales) {
+  const lookup = buildLookup(locales);
+  const pattern = lookup.wordPattern;
+  pattern.lastIndex = 0;
+  return pattern.test(text);
+}
+function isMultiplierSpan(text, from, to, locales) {
+  const slice = text.slice(from, to).trim();
+  if (!slice) {
+    return true;
+  }
+  const lookup = buildLookup(locales);
+  const words = slice.split(/\s+/);
+  return words.every((word) => lookup.multipliers.has(normalizeForLookup(word)));
+}
+// src/match/sequence.ts
+function onlySeparatorsBetween(text, from, to, locales) {
+  if (from >= to) {
+    return true;
+  }
+  if (isMultiplierSpan(text, from, to, locales)) {
+    return true;
+  }
+  for (let i = from; i < to; i += 1) {
+    const char = text[i];
+    if (!SEPARATORS.has(char)) {
+      return false;
+    }
+  }
+  return true;
+}
+function collectTypes(tokens) {
+  const unique = new Set(tokens.map((token) => token.type));
+  if (unique.size > 1) {
+    return ["mixed"];
+  }
+  return [...unique];
+}
+function findDigitSequences(text, tokens, minDigits, locales = []) {
+  if (tokens.length === 0) {
+    return [];
+  }
+  const sequences = [];
+  let group = [tokens[0]];
+  for (let i = 1; i < tokens.length; i += 1) {
+    const prev = tokens[i - 1];
+    const current = tokens[i];
+    if (onlySeparatorsBetween(text, prev.end, current.start, locales)) {
+      group.push(current);
+      continue;
+    }
+    if (group.length >= minDigits) {
+      sequences.push(toSequence(group));
+    }
+    group = [current];
+  }
+  if (group.length >= minDigits) {
+    sequences.push(toSequence(group));
+  }
+  return sequences;
+}
+function toSequence(tokens) {
+  const first = tokens[0];
+  const last = tokens[tokens.length - 1];
+  return {
+    tokens,
+    start: first.start,
+    end: last.end,
+    normalized: tokens.map((token) => token.digit).join(""),
+    types: collectTypes(tokens)
+  };
+}
+// src/locales/build-locale.ts
+function createLocale(id, digitWords, multipliers = {}) {
+  const digitMap = new Map(Object.entries(digitWords));
+  const multiplierMap = new Map(Object.entries(multipliers));
+  const words = [...digitMap.keys(), ...multiplierMap.keys()].sort((a, b) => b.length - a.length).map(escapeRegex);
+  const preScanPattern = new RegExp(
+    `(?:${words.join("|")})`,
+    "iu"
+  );
+  return {
+    id,
+    digitWords: digitMap,
+    multipliers: multiplierMap,
+    preScanPattern
+  };
+}
+function escapeRegex(value) {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+// src/locales/en.ts
+var en = createLocale(
+  "en",
+  {
+    zero: "0",
+    oh: "0",
+    one: "1",
+    two: "2",
+    three: "3",
+    four: "4",
+    five: "5",
+    six: "6",
+    seven: "7",
+    eight: "8",
+    nine: "9"
+  },
+  {
+    double: 2,
+    triple: 3,
+    quad: 4
+  }
+);
+// src/locales/tr.ts
+var tr = createLocale(
+  "tr",
+  {
+    sifir: "0",
+    s\u0131f\u0131r: "0",
+    bir: "1",
+    iki: "2",
+    uc: "3",
+    \u00FC\u00E7: "3",
+    dort: "4",
+    d\u00F6rt: "4",
+    bes: "5",
+    be\u015F: "5",
+    alti: "6",
+    alt\u0131: "6",
+    yedi: "7",
+    sekiz: "8",
+    dokuz: "9"
+  },
+  {
+    cift: 2,
+    \u00E7ift: 2,
+    uclu: 3,
+    \u00FC\u00E7l\u00FC: 3,
+    dortlu: 4,
+    d\u00F6rtl\u00FC: 4
+  }
+);
+// src/pipeline/options.ts
+function resolveOptions(options) {
+  return {
+    locales: options?.locales ?? [tr, en],
+    minDigits: options?.minDigits ?? DEFAULT_MIN_DIGITS,
+    minConfidence: options?.minConfidence ?? DEFAULT_MIN_CONFIDENCE,
+    countryHint: options?.countryHint ?? "TR"
+  };
+}
+// src/pipeline/quick-check.ts
+function quickCheck(text, options) {
+  if (!text || text.length === 0) {
+    return false;
+  }
+  const resolved = resolveOptions(options);
+  if (/\d/.test(text)) {
+    return true;
+  }
+  if (/\p{Nd}/u.test(text)) {
+    return true;
+  }
+  if (EMOJI_PRESCAN.test(text)) {
+    return true;
+  }
+  if (hasLocaleWordSignal(text, resolved.locales)) {
+    return true;
+  }
+  for (const locale of resolved.locales) {
+    locale.preScanPattern.lastIndex = 0;
+    if (locale.preScanPattern.test(text)) {
+      return true;
+    }
+  }
+  if (/[+]\s*[\d\p{Nd}\s\-().]{4,}/u.test(text)) {
+    return true;
+  }
+  if (/(?:[\d\p{Nd}][\s.\-/)]{0,3}){3,}[\d\p{Nd}]/u.test(text)) {
+    return true;
+  }
+  return false;
+}
+// src/pipeline/detect.ts
+function detect(text, options) {
+  if (!quickCheck(text, options)) {
+    return { detected: false, matches: [] };
+  }
+  const resolved = resolveOptions(options);
+  const tokens = tokenizeDigits(text, resolved.locales);
+  const sequences = findDigitSequences(text, tokens, resolved.minDigits, resolved.locales);
+  const matches = [];
+  for (const sequence of sequences) {
+    if (isFalsePositive(sequence, text)) {
+      continue;
+    }
+    const confidence = scoreSequence(sequence, text, resolved);
+    if (confidence < resolved.minConfidence) {
+      continue;
+    }
+    matches.push({
+      start: sequence.start,
+      end: sequence.end,
+      text: text.slice(sequence.start, sequence.end),
+      normalized: sequence.normalized,
+      confidence,
+      types: sequence.types
+    });
+  }
+  return {
+    detected: matches.length > 0,
+    matches
+  };
+}
+// src/pipeline/mask.ts
+function mask(text, options) {
+  const result = detect(text, options);
+  if (!result.detected) {
+    return text;
+  }
+  const char = options?.char ?? "#";
+  const chars = [...text];
+  const sorted = [...result.matches].sort((a, b) => b.start - a.start);
+  for (const match of sorted) {
+    for (let i = match.start; i < match.end; i += 1) {
+      if (!/\s/.test(chars[i] ?? "")) {
+        chars[i] = char;
+      }
+    }
+  }
+  return chars.join("");
+}
+exports.detect = detect;
+exports.en = en;
+exports.mask = mask;
+exports.quickCheck = quickCheck;
+exports.tr = tr;
+//# sourceMappingURL=index.cjs.map
+//# sourceMappingURL=index.cjs.map