npm - typesecure - Versions diffs - 0.2.2 → 0.2.3 - Mend

typesecure 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -27,6 +27,8 @@ You “type” your data as `public | pii | secret | token | credential`, and `t
 ## Installation
+Requires Node.js `>=18.18.0`.
 ```bash
 # Using npm
 npm install typesecure
@@ -113,8 +115,39 @@ app.use((req, _res, next) => {
 ### Redaction
 - `redact(value): value` (deep traversal)
+- `redactText(value): string` (mask sensitive fragments in plain text)
+- `detectText(value): StringDetection[]` (return ranges/kinds for audit workflows)
 - `safeJsonStringify(value): string`
 - `safeLoggerAdapter(consoleLike)`
+- Redaction options:
+  - `guessByKey` (default `true`): redact suspicious keys like `password`, `token`, `apiKey`.
+  - `guessByValue` (default `true`): auto-detect and redact sensitive-looking values.
+  - `useDefaultValueDetector` (default `true`): keep built-in rule-based detectors on/off.
+  - `stringDetectors`: add custom detectors (for NER/ML or domain-specific logic).
+  - `minDetectionConfidence` (default `0`): ignore low-confidence custom detections.
+- Value detection masks only the sensitive fragments inside a larger string (instead of replacing the whole text), including:
+  - PII: email, phone, SSN, date of birth (`YYYY-MM-DD`), IPv4 address, payment card numbers (Luhn-validated).
+  - Secrets/tokens: JWTs, private key PEM blocks, GitHub tokens, AWS access keys, Stripe secret keys, OpenAI-style `sk-...` keys, credential pairs (`user:pass`), high-entropy token-like strings.
+Example custom detector (NER/ML-style integration):
+```typescript
+const out = redact(
+  { text: "Customer Jane Doe uses jane@example.com" },
+  {
+    stringDetectors: [
+      (value) => {
+        const name = "Jane Doe";
+        const idx = value.indexOf(name);
+        return idx >= 0
+          ? [{ start: idx, end: idx + name.length, kind: "pii", confidence: 0.92, source: "ml.ner" }]
+          : [];
+      },
+    ],
+    minDetectionConfidence: 0.8,
+  },
+);
+```
 ### Policy
@@ -145,9 +178,47 @@ To contribute to this project:
 2. Install dependencies with `pnpm install`
 3. Run tests with `pnpm test`
 4. Build the package with `pnpm build`
+5. Run Enron dataset integration tests with `pnpm test:data`
+### Optional: external dataset setup
+For larger redaction/policy experiments (Enron + Synthea FHIR), fetch datasets locally:
+```bash
+pnpm data:setup
+```
+This command downloads and extracts to:
+- `data/enron-maildir`
+- `data/synthea_sample_data_fhir_latest`
+Notes:
+- `data/` is gitignored and not published to npm.
+- `pnpm test` excludes dataset suites by default.
+- `pnpm test:data` runs Enron dataset tests with verbose output.
+- `pnpm test:data:synthea` runs Synthea-specific dataset tests.
+- `pnpm test:data:all` runs all dataset suites.
+- You can override source URLs with `ENRON_URL=...` and/or `SYNTHEA_FHIR_URL=...`.
+- You can change destination with `DATA_DIR=/path/to/data`.
+Dataset sources:
+- Enron: [https://www.cs.cmu.edu/~enron/](https://www.cs.cmu.edu/~enron/)
+- Synthea: [https://github.com/synthetichealth/synthea-sample-data/](https://github.com/synthetichealth/synthea-sample-data/)
 This project uses TypeScript for type safety, Jest for testing, and ESLint for code quality.
+## Dataset Acknowledgements
+We use these public datasets for redaction and policy testing:
+- [CMU Enron Email Dataset](https://www.cs.cmu.edu/~enron/)
+- [Synthea Sample Data](https://github.com/synthetichealth/synthea-sample-data/)
+Personal note: I am especially interested in the historical context around Enron, including how it was able to happen and the improvements in governance and controls that followed.
 ## License
 MIT © [Arvid Berndtsson](https://github.com/arvid-berndtsson)

package/dist/index.d.mts CHANGED Viewed

@@ -50,12 +50,50 @@ declare function secretText(value: string): SecretString;
 declare function token(value: string): TokenString;
 declare function credential(value: string): CredentialString;
+type StringDetection = Readonly<{
+    start: number;
+    end: number;
+    kind: DataClassification | "unknown";
+    confidence?: number;
+    source?: string;
+}>;
+type StringDetector = (value: string, context: Readonly<{
+    keyHint?: string;
+    depth: number;
+}>) => readonly StringDetection[];
+type StringDetectionOptions = Readonly<{
+    useDefaultValueDetector?: boolean;
+    stringDetectors?: readonly StringDetector[];
+    minDetectionConfidence?: number;
+}>;
 type RedactOptions = Readonly<{
     /**
      * If true, redact values for suspicious keys even if they aren't classified.
      * Defaults to true.
      */
     guessByKey?: boolean;
+    /**
+     * If true, redact suspicious string values even when keys are not suspicious.
+     * Defaults to true.
+     */
+    guessByValue?: boolean;
+    /**
+     * Additional string detectors (for custom heuristics, NER, ML models, etc).
+     * Detectors return match ranges to mask in text.
+     */
+    stringDetectors?: readonly StringDetector[];
+    /**
+     * If false, disables the built-in rule-based value detector.
+     * Defaults to true.
+     */
+    useDefaultValueDetector?: boolean;
+    /**
+     * Minimum confidence for detections from string detectors.
+     * Defaults to 0.
+     */
+    minDetectionConfidence?: number;
     /**
      * Placeholder format for redacted values.
      * Defaults to "[REDACTED:<kind>]".
@@ -66,8 +104,10 @@ type RedactOptions = Readonly<{
      * Defaults to 25.
      */
     maxDepth?: number;
-}>;
+}> & StringDetectionOptions;
 declare function redact<T>(value: T, options?: RedactOptions): T;
+declare function detectText(value: string, options?: Pick<RedactOptions, "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence">): StringDetection[];
+declare function redactText(value: string, options?: Pick<RedactOptions, "guessByValue" | "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence" | "placeholder">): string;
 declare function safeJsonStringify(value: unknown, options?: RedactOptions, space?: number): string;
 /**
  * Convenience logger that will redact classified data and suspicious keys.
@@ -113,4 +153,4 @@ declare function audit(policy: Policy, action: PolicyAction, data: unknown): Aud
  */
 declare function policyLog(policy: Policy, logger: Pick<Console, "info" | "warn" | "error" | "debug" | "log">, level: keyof Pick<Console, "info" | "warn" | "error" | "debug" | "log">, ...args: unknown[]): void;
-export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
+export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type StringDetection, type StringDetector, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, detectText, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, redactText, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };

package/dist/index.d.ts CHANGED Viewed

@@ -50,12 +50,50 @@ declare function secretText(value: string): SecretString;
 declare function token(value: string): TokenString;
 declare function credential(value: string): CredentialString;
+type StringDetection = Readonly<{
+    start: number;
+    end: number;
+    kind: DataClassification | "unknown";
+    confidence?: number;
+    source?: string;
+}>;
+type StringDetector = (value: string, context: Readonly<{
+    keyHint?: string;
+    depth: number;
+}>) => readonly StringDetection[];
+type StringDetectionOptions = Readonly<{
+    useDefaultValueDetector?: boolean;
+    stringDetectors?: readonly StringDetector[];
+    minDetectionConfidence?: number;
+}>;
 type RedactOptions = Readonly<{
     /**
      * If true, redact values for suspicious keys even if they aren't classified.
      * Defaults to true.
      */
     guessByKey?: boolean;
+    /**
+     * If true, redact suspicious string values even when keys are not suspicious.
+     * Defaults to true.
+     */
+    guessByValue?: boolean;
+    /**
+     * Additional string detectors (for custom heuristics, NER, ML models, etc).
+     * Detectors return match ranges to mask in text.
+     */
+    stringDetectors?: readonly StringDetector[];
+    /**
+     * If false, disables the built-in rule-based value detector.
+     * Defaults to true.
+     */
+    useDefaultValueDetector?: boolean;
+    /**
+     * Minimum confidence for detections from string detectors.
+     * Defaults to 0.
+     */
+    minDetectionConfidence?: number;
     /**
      * Placeholder format for redacted values.
      * Defaults to "[REDACTED:<kind>]".
@@ -66,8 +104,10 @@ type RedactOptions = Readonly<{
      * Defaults to 25.
      */
     maxDepth?: number;
-}>;
+}> & StringDetectionOptions;
 declare function redact<T>(value: T, options?: RedactOptions): T;
+declare function detectText(value: string, options?: Pick<RedactOptions, "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence">): StringDetection[];
+declare function redactText(value: string, options?: Pick<RedactOptions, "guessByValue" | "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence" | "placeholder">): string;
 declare function safeJsonStringify(value: unknown, options?: RedactOptions, space?: number): string;
 /**
  * Convenience logger that will redact classified data and suspicious keys.
@@ -113,4 +153,4 @@ declare function audit(policy: Policy, action: PolicyAction, data: unknown): Aud
  */
 declare function policyLog(policy: Policy, logger: Pick<Console, "info" | "warn" | "error" | "debug" | "log">, level: keyof Pick<Console, "info" | "warn" | "error" | "debug" | "log">, ...args: unknown[]): void;
-export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
+export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type StringDetection, type StringDetector, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, detectText, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, redactText, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };

package/dist/index.js CHANGED Viewed

@@ -31,12 +31,14 @@ __export(index_exports, {
   credential: () => credential,
   decide: () => decide,
   defaultPolicy: () => defaultPolicy,
+  detectText: () => detectText,
   httpAuthorizationBearer: () => httpAuthorizationBearer,
   isClassified: () => isClassified,
   piiText: () => piiText,
   policyLog: () => policyLog,
   publicText: () => publicText,
   redact: () => redact,
+  redactText: () => redactText,
   reveal: () => reveal,
   safeJsonStringify: () => safeJsonStringify,
   safeLoggerAdapter: () => safeLoggerAdapter,
@@ -94,6 +96,179 @@ function credential(value) {
   return CredentialStringSchema.parse(value);
 }
+// src/detectors/rules.ts
+var EMAIL_RE = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi;
+var PHONE_RE = /\b(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)\d{3}[-.\s]?\d{4}\b/g;
+var SSN_RE = /\b\d{3}-\d{2}-\d{4}\b/g;
+var DOB_RE = /\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g;
+var IPV4_RE = /\b(?:\d{1,3}\.){3}\d{1,3}\b/g;
+var CREDIT_CARD_RE = /\b(?:\d[ -]*?){13,19}\b/g;
+var JWT_RE = /\b[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\b/g;
+var PRIVATE_KEY_BLOCK_RE = /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g;
+var AWS_ACCESS_KEY_RE = /\bAKIA[0-9A-Z]{16}\b/g;
+var GITHUB_TOKEN_RE = /\bgh[pousr]_[A-Za-z0-9]{20,}\b/g;
+var STRIPE_SECRET_RE = /\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b/g;
+var OPENAI_KEY_RE = /\bsk-[A-Za-z0-9]{20,}\b/g;
+var CREDENTIAL_PAIR_RE = /\b[^:\s]{1,128}:[^\s:]{1,256}\b/g;
+var HIGH_ENTROPY_TOKEN_RE = /\b[A-Za-z0-9+/=_-]{28,}\b/g;
+function luhnValid(input) {
+  const digits = input.replace(/[ -]/g, "");
+  if (!/^\d{13,19}$/.test(digits)) return false;
+  let sum = 0;
+  let shouldDouble = false;
+  for (let i = digits.length - 1; i >= 0; i -= 1) {
+    let d = Number(digits[i]);
+    if (shouldDouble) {
+      d *= 2;
+      if (d > 9) d -= 9;
+    }
+    sum += d;
+    shouldDouble = !shouldDouble;
+  }
+  return sum % 10 === 0;
+}
+function placeholderRanges(text) {
+  const ranges = [];
+  const re = /\[REDACTED:[^\]]+\]/g;
+  let m;
+  while ((m = re.exec(text)) !== null) {
+    ranges.push({ start: m.index, end: m.index + m[0].length });
+  }
+  return ranges;
+}
+function overlapsRanges(start, end, ranges) {
+  return ranges.some((r) => start < r.end && end > r.start);
+}
+function detectRegexRanges(value, re, kind, source, guard) {
+  const out = [];
+  const execRe = new RegExp(re.source, re.flags);
+  const protectedRanges = placeholderRanges(value);
+  let m;
+  while ((m = execRe.exec(value)) !== null) {
+    const match = m[0];
+    const start = m.index;
+    const end = start + match.length;
+    if (!match || guard && !guard(match) || overlapsRanges(start, end, protectedRanges)) {
+      continue;
+    }
+    out.push({ start, end, kind, source, confidence: 1 });
+    if (execRe.lastIndex === m.index) execRe.lastIndex += 1;
+  }
+  return out;
+}
+var defaultRuleStringDetector = (value) => {
+  const out = [];
+  out.push(
+    ...detectRegexRanges(
+      value,
+      PRIVATE_KEY_BLOCK_RE,
+      "secret",
+      "rule.private-key"
+    ),
+    ...detectRegexRanges(value, JWT_RE, "token", "rule.jwt"),
+    ...detectRegexRanges(
+      value,
+      AWS_ACCESS_KEY_RE,
+      "credential",
+      "rule.aws-access-key"
+    ),
+    ...detectRegexRanges(value, GITHUB_TOKEN_RE, "token", "rule.github-token"),
+    ...detectRegexRanges(
+      value,
+      STRIPE_SECRET_RE,
+      "secret",
+      "rule.stripe-secret"
+    ),
+    ...detectRegexRanges(value, OPENAI_KEY_RE, "secret", "rule.openai-key"),
+    ...detectRegexRanges(
+      value,
+      CREDENTIAL_PAIR_RE,
+      "credential",
+      "rule.credential-pair"
+    )
+  );
+  out.push(
+    ...detectRegexRanges(value, EMAIL_RE, "pii", "rule.email"),
+    ...detectRegexRanges(value, PHONE_RE, "pii", "rule.phone"),
+    ...detectRegexRanges(value, SSN_RE, "pii", "rule.ssn"),
+    ...detectRegexRanges(value, DOB_RE, "pii", "rule.dob"),
+    ...detectRegexRanges(
+      value,
+      IPV4_RE,
+      "pii",
+      "rule.ipv4",
+      (m) => m.split(".").every((p) => Number(p) >= 0 && Number(p) <= 255)
+    ),
+    ...detectRegexRanges(
+      value,
+      CREDIT_CARD_RE,
+      "pii",
+      "rule.credit-card",
+      luhnValid
+    )
+  );
+  out.push(
+    ...detectRegexRanges(
+      value,
+      HIGH_ENTROPY_TOKEN_RE,
+      "token",
+      "rule.high-entropy",
+      (m) => {
+        if (/^[a-z]+$/i.test(m)) return false;
+        return /\d/.test(m);
+      }
+    )
+  );
+  return out;
+};
+// src/detectors/engine.ts
+function collectStringDetections(value, context, options) {
+  const useDefaultValueDetector = options?.useDefaultValueDetector ?? true;
+  const customDetectors = options?.stringDetectors ?? [];
+  const minDetectionConfidence = options?.minDetectionConfidence ?? 0;
+  const detections = [];
+  if (useDefaultValueDetector) {
+    detections.push(...defaultRuleStringDetector(value, context));
+  }
+  for (const detector of customDetectors) {
+    detections.push(...detector(value, context));
+  }
+  return detections.filter(
+    (d) => (d.confidence ?? 1) >= minDetectionConfidence && Number.isFinite(d.start) && Number.isFinite(d.end)
+  );
+}
+function applyDetectionsToString(value, detections, placeholder) {
+  if (detections.length === 0) return value;
+  const owner = new Array(value.length).fill(-1);
+  const normalized = [];
+  for (const d of detections) {
+    const start = Math.max(0, Math.min(value.length, Math.trunc(d.start)));
+    const end = Math.max(start, Math.min(value.length, Math.trunc(d.end)));
+    if (end <= start) continue;
+    normalized.push({ start, end, kind: d.kind });
+  }
+  normalized.forEach((d, idx) => {
+    for (let i2 = d.start; i2 < d.end; i2 += 1) {
+      if (owner[i2] === -1) owner[i2] = idx;
+    }
+  });
+  let out = "";
+  let i = 0;
+  while (i < value.length) {
+    const idx = owner[i];
+    if (idx < 0) {
+      out += value[i];
+      i += 1;
+      continue;
+    }
+    const d = normalized[idx];
+    out += placeholder(d.kind);
+    i = d.end;
+  }
+  return out;
+}
 // src/redaction.ts
 var DEFAULT_SUSPICIOUS_KEY = /pass(word)?|pwd|secret|token|api[_-]?key|auth|bearer|cookie|session|private[_-]?key|ssh|credential/i;
 function defaultPlaceholder(kind) {
@@ -104,6 +279,7 @@ function isPlainObject(value) {
 }
 function redact(value, options) {
   const guessByKey = options?.guessByKey ?? true;
+  const guessByValue = options?.guessByValue ?? true;
   const placeholder = options?.placeholder ?? defaultPlaceholder;
   const maxDepth = options?.maxDepth ?? 25;
   const seen = /* @__PURE__ */ new WeakMap();
@@ -118,6 +294,15 @@ function redact(value, options) {
         return placeholder("unknown");
       }
     }
+    if (guessByValue && typeof v === "string") {
+      const detections = collectStringDetections(
+        v,
+        { keyHint, depth },
+        options
+      );
+      const masked = applyDetectionsToString(v, detections, placeholder);
+      if (masked !== v) return masked;
+    }
     if (Array.isArray(v)) {
       return v.map((item) => walk(item, depth + 1));
     }
@@ -137,6 +322,16 @@ function redact(value, options) {
   };
   return walk(value, 0);
 }
+function detectText(value, options) {
+  return collectStringDetections(value, { depth: 0 }, options);
+}
+function redactText(value, options) {
+  const guessByValue = options?.guessByValue ?? true;
+  if (!guessByValue) return value;
+  const placeholder = options?.placeholder ?? defaultPlaceholder;
+  const detections = detectText(value, options);
+  return applyDetectionsToString(value, detections, placeholder);
+}
 function safeJsonStringify(value, options, space) {
   return JSON.stringify(redact(value, options), null, space);
 }
@@ -243,12 +438,14 @@ function policyLog(policy, logger, level, ...args) {
   credential,
   decide,
   defaultPolicy,
+  detectText,
   httpAuthorizationBearer,
   isClassified,
   piiText,
   policyLog,
   publicText,
   redact,
+  redactText,
   reveal,
   safeJsonStringify,
   safeLoggerAdapter,

package/dist/index.mjs CHANGED Viewed

@@ -47,6 +47,179 @@ function credential(value) {
   return CredentialStringSchema.parse(value);
 }
+// src/detectors/rules.ts
+var EMAIL_RE = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi;
+var PHONE_RE = /\b(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)\d{3}[-.\s]?\d{4}\b/g;
+var SSN_RE = /\b\d{3}-\d{2}-\d{4}\b/g;
+var DOB_RE = /\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g;
+var IPV4_RE = /\b(?:\d{1,3}\.){3}\d{1,3}\b/g;
+var CREDIT_CARD_RE = /\b(?:\d[ -]*?){13,19}\b/g;
+var JWT_RE = /\b[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\b/g;
+var PRIVATE_KEY_BLOCK_RE = /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g;
+var AWS_ACCESS_KEY_RE = /\bAKIA[0-9A-Z]{16}\b/g;
+var GITHUB_TOKEN_RE = /\bgh[pousr]_[A-Za-z0-9]{20,}\b/g;
+var STRIPE_SECRET_RE = /\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b/g;
+var OPENAI_KEY_RE = /\bsk-[A-Za-z0-9]{20,}\b/g;
+var CREDENTIAL_PAIR_RE = /\b[^:\s]{1,128}:[^\s:]{1,256}\b/g;
+var HIGH_ENTROPY_TOKEN_RE = /\b[A-Za-z0-9+/=_-]{28,}\b/g;
+function luhnValid(input) {
+  const digits = input.replace(/[ -]/g, "");
+  if (!/^\d{13,19}$/.test(digits)) return false;
+  let sum = 0;
+  let shouldDouble = false;
+  for (let i = digits.length - 1; i >= 0; i -= 1) {
+    let d = Number(digits[i]);
+    if (shouldDouble) {
+      d *= 2;
+      if (d > 9) d -= 9;
+    }
+    sum += d;
+    shouldDouble = !shouldDouble;
+  }
+  return sum % 10 === 0;
+}
+function placeholderRanges(text) {
+  const ranges = [];
+  const re = /\[REDACTED:[^\]]+\]/g;
+  let m;
+  while ((m = re.exec(text)) !== null) {
+    ranges.push({ start: m.index, end: m.index + m[0].length });
+  }
+  return ranges;
+}
+function overlapsRanges(start, end, ranges) {
+  return ranges.some((r) => start < r.end && end > r.start);
+}
+function detectRegexRanges(value, re, kind, source, guard) {
+  const out = [];
+  const execRe = new RegExp(re.source, re.flags);
+  const protectedRanges = placeholderRanges(value);
+  let m;
+  while ((m = execRe.exec(value)) !== null) {
+    const match = m[0];
+    const start = m.index;
+    const end = start + match.length;
+    if (!match || guard && !guard(match) || overlapsRanges(start, end, protectedRanges)) {
+      continue;
+    }
+    out.push({ start, end, kind, source, confidence: 1 });
+    if (execRe.lastIndex === m.index) execRe.lastIndex += 1;
+  }
+  return out;
+}
+var defaultRuleStringDetector = (value) => {
+  const out = [];
+  out.push(
+    ...detectRegexRanges(
+      value,
+      PRIVATE_KEY_BLOCK_RE,
+      "secret",
+      "rule.private-key"
+    ),
+    ...detectRegexRanges(value, JWT_RE, "token", "rule.jwt"),
+    ...detectRegexRanges(
+      value,
+      AWS_ACCESS_KEY_RE,
+      "credential",
+      "rule.aws-access-key"
+    ),
+    ...detectRegexRanges(value, GITHUB_TOKEN_RE, "token", "rule.github-token"),
+    ...detectRegexRanges(
+      value,
+      STRIPE_SECRET_RE,
+      "secret",
+      "rule.stripe-secret"
+    ),
+    ...detectRegexRanges(value, OPENAI_KEY_RE, "secret", "rule.openai-key"),
+    ...detectRegexRanges(
+      value,
+      CREDENTIAL_PAIR_RE,
+      "credential",
+      "rule.credential-pair"
+    )
+  );
+  out.push(
+    ...detectRegexRanges(value, EMAIL_RE, "pii", "rule.email"),
+    ...detectRegexRanges(value, PHONE_RE, "pii", "rule.phone"),
+    ...detectRegexRanges(value, SSN_RE, "pii", "rule.ssn"),
+    ...detectRegexRanges(value, DOB_RE, "pii", "rule.dob"),
+    ...detectRegexRanges(
+      value,
+      IPV4_RE,
+      "pii",
+      "rule.ipv4",
+      (m) => m.split(".").every((p) => Number(p) >= 0 && Number(p) <= 255)
+    ),
+    ...detectRegexRanges(
+      value,
+      CREDIT_CARD_RE,
+      "pii",
+      "rule.credit-card",
+      luhnValid
+    )
+  );
+  out.push(
+    ...detectRegexRanges(
+      value,
+      HIGH_ENTROPY_TOKEN_RE,
+      "token",
+      "rule.high-entropy",
+      (m) => {
+        if (/^[a-z]+$/i.test(m)) return false;
+        return /\d/.test(m);
+      }
+    )
+  );
+  return out;
+};
+// src/detectors/engine.ts
+function collectStringDetections(value, context, options) {
+  const useDefaultValueDetector = options?.useDefaultValueDetector ?? true;
+  const customDetectors = options?.stringDetectors ?? [];
+  const minDetectionConfidence = options?.minDetectionConfidence ?? 0;
+  const detections = [];
+  if (useDefaultValueDetector) {
+    detections.push(...defaultRuleStringDetector(value, context));
+  }
+  for (const detector of customDetectors) {
+    detections.push(...detector(value, context));
+  }
+  return detections.filter(
+    (d) => (d.confidence ?? 1) >= minDetectionConfidence && Number.isFinite(d.start) && Number.isFinite(d.end)
+  );
+}
+function applyDetectionsToString(value, detections, placeholder) {
+  if (detections.length === 0) return value;
+  const owner = new Array(value.length).fill(-1);
+  const normalized = [];
+  for (const d of detections) {
+    const start = Math.max(0, Math.min(value.length, Math.trunc(d.start)));
+    const end = Math.max(start, Math.min(value.length, Math.trunc(d.end)));
+    if (end <= start) continue;
+    normalized.push({ start, end, kind: d.kind });
+  }
+  normalized.forEach((d, idx) => {
+    for (let i2 = d.start; i2 < d.end; i2 += 1) {
+      if (owner[i2] === -1) owner[i2] = idx;
+    }
+  });
+  let out = "";
+  let i = 0;
+  while (i < value.length) {
+    const idx = owner[i];
+    if (idx < 0) {
+      out += value[i];
+      i += 1;
+      continue;
+    }
+    const d = normalized[idx];
+    out += placeholder(d.kind);
+    i = d.end;
+  }
+  return out;
+}
 // src/redaction.ts
 var DEFAULT_SUSPICIOUS_KEY = /pass(word)?|pwd|secret|token|api[_-]?key|auth|bearer|cookie|session|private[_-]?key|ssh|credential/i;
 function defaultPlaceholder(kind) {
@@ -57,6 +230,7 @@ function isPlainObject(value) {
 }
 function redact(value, options) {
   const guessByKey = options?.guessByKey ?? true;
+  const guessByValue = options?.guessByValue ?? true;
   const placeholder = options?.placeholder ?? defaultPlaceholder;
   const maxDepth = options?.maxDepth ?? 25;
   const seen = /* @__PURE__ */ new WeakMap();
@@ -71,6 +245,15 @@ function redact(value, options) {
         return placeholder("unknown");
       }
     }
+    if (guessByValue && typeof v === "string") {
+      const detections = collectStringDetections(
+        v,
+        { keyHint, depth },
+        options
+      );
+      const masked = applyDetectionsToString(v, detections, placeholder);
+      if (masked !== v) return masked;
+    }
     if (Array.isArray(v)) {
       return v.map((item) => walk(item, depth + 1));
     }
@@ -90,6 +273,16 @@ function redact(value, options) {
   };
   return walk(value, 0);
 }
+function detectText(value, options) {
+  return collectStringDetections(value, { depth: 0 }, options);
+}
+function redactText(value, options) {
+  const guessByValue = options?.guessByValue ?? true;
+  if (!guessByValue) return value;
+  const placeholder = options?.placeholder ?? defaultPlaceholder;
+  const detections = detectText(value, options);
+  return applyDetectionsToString(value, detections, placeholder);
+}
 function safeJsonStringify(value, options, space) {
   return JSON.stringify(redact(value, options), null, space);
 }
@@ -195,12 +388,14 @@ export {
   credential,
   decide,
   defaultPolicy,
+  detectText,
   httpAuthorizationBearer,
   isClassified,
   piiText,
   policyLog,
   publicText,
   redact,
+  redactText,
   reveal,
   safeJsonStringify,
   safeLoggerAdapter,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "typesecure",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "description": "Type-safe data classification and security enforcement for TypeScript - prevent secrets and PII leaks with compile-time and runtime guarantees",
   "main": "dist/index.js",
   "module": "dist/index.mjs",
@@ -13,7 +13,14 @@
     "lint": "eslint src tests --ext .ts",
     "lint:fix": "eslint src tests --ext .ts --fix",
     "dev": "tsup src/index.ts --format cjs,esm --dts --watch",
-    "test": "jest --config jest.config.js",
+    "data:setup": "bash scripts/setup-datasets.sh",
+    "test": "jest --config jest.config.js --testPathIgnorePatterns tests/datasets",
+    "test:data": "pnpm run test:data:enron",
+    "test:data:enron": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets/enron.dataset.test.ts",
+    "test:data:enron:full": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets/enron.fullscan.test.ts",
+    "test:data:synthea": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets/synthea.dataset.test.ts",
+    "test:data:all": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets",
+    "test:datasets": "pnpm run test:data:all",
     "test:watch": "jest --config jest.config.js --watch",
     "test:coverage": "jest --config jest.config.js --coverage",
     "format": "prettier --write \"src/**/*.ts\" \"tests/**/*.ts\"",
@@ -65,6 +72,6 @@
     "zod": "^3.25.76"
   },
   "engines": {
-    "node": ">=24.0.0"
+    "node": ">=18.18.0"
   }
 }