typesecure 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,6 +27,8 @@ You “type” your data as `public | pii | secret | token | credential`, and `t
27
27
 
28
28
  ## Installation
29
29
 
30
+ Requires Node.js `>=18.18.0`.
31
+
30
32
  ```bash
31
33
  # Using npm
32
34
  npm install typesecure
@@ -113,8 +115,39 @@ app.use((req, _res, next) => {
113
115
  ### Redaction
114
116
 
115
117
  - `redact(value): value` (deep traversal)
118
+ - `redactText(value): string` (mask sensitive fragments in plain text)
119
+ - `detectText(value): StringDetection[]` (return ranges/kinds for audit workflows)
116
120
  - `safeJsonStringify(value): string`
117
121
  - `safeLoggerAdapter(consoleLike)`
122
+ - Redaction options:
123
+ - `guessByKey` (default `true`): redact suspicious keys like `password`, `token`, `apiKey`.
124
+ - `guessByValue` (default `true`): auto-detect and redact sensitive-looking values.
125
+ - `useDefaultValueDetector` (default `true`): keep built-in rule-based detectors on/off.
126
+ - `stringDetectors`: add custom detectors (for NER/ML or domain-specific logic).
127
+ - `minDetectionConfidence` (default `0`): ignore low-confidence custom detections.
128
+ - Value detection masks only the sensitive fragments inside a larger string (instead of replacing the whole text), including:
129
+ - PII: email, phone, SSN, date of birth (`YYYY-MM-DD`), IPv4 address, payment card numbers (Luhn-validated).
130
+ - Secrets/tokens: JWTs, private key PEM blocks, GitHub tokens, AWS access keys, Stripe secret keys, OpenAI-style `sk-...` keys, credential pairs (`user:pass`), high-entropy token-like strings.
131
+
132
+ Example custom detector (NER/ML-style integration):
133
+
134
+ ```typescript
135
+ const out = redact(
136
+ { text: "Customer Jane Doe uses jane@example.com" },
137
+ {
138
+ stringDetectors: [
139
+ (value) => {
140
+ const name = "Jane Doe";
141
+ const idx = value.indexOf(name);
142
+ return idx >= 0
143
+ ? [{ start: idx, end: idx + name.length, kind: "pii", confidence: 0.92, source: "ml.ner" }]
144
+ : [];
145
+ },
146
+ ],
147
+ minDetectionConfidence: 0.8,
148
+ },
149
+ );
150
+ ```
118
151
 
119
152
  ### Policy
120
153
 
@@ -145,9 +178,47 @@ To contribute to this project:
145
178
  2. Install dependencies with `pnpm install`
146
179
  3. Run tests with `pnpm test`
147
180
  4. Build the package with `pnpm build`
181
+ 5. Run Enron dataset integration tests with `pnpm test:data`
182
+
183
+ ### Optional: external dataset setup
184
+
185
+ For larger redaction/policy experiments (Enron + Synthea FHIR), fetch datasets locally:
186
+
187
+ ```bash
188
+ pnpm data:setup
189
+ ```
190
+
191
+ This command downloads and extracts to:
192
+
193
+ - `data/enron-maildir`
194
+ - `data/synthea_sample_data_fhir_latest`
195
+
196
+ Notes:
197
+
198
+ - `data/` is gitignored and not published to npm.
199
+ - `pnpm test` excludes dataset suites by default.
200
+ - `pnpm test:data` runs Enron dataset tests with verbose output.
201
+ - `pnpm test:data:synthea` runs Synthea-specific dataset tests.
202
+ - `pnpm test:data:all` runs all dataset suites.
203
+ - You can override source URLs with `ENRON_URL=...` and/or `SYNTHEA_FHIR_URL=...`.
204
+ - You can change destination with `DATA_DIR=/path/to/data`.
205
+
206
+ Dataset sources:
207
+
208
+ - Enron: [https://www.cs.cmu.edu/~enron/](https://www.cs.cmu.edu/~enron/)
209
+ - Synthea: [https://github.com/synthetichealth/synthea-sample-data/](https://github.com/synthetichealth/synthea-sample-data/)
148
210
 
149
211
  This project uses TypeScript for type safety, Jest for testing, and ESLint for code quality.
150
212
 
213
+ ## Dataset Acknowledgements
214
+
215
+ We use these public datasets for redaction and policy testing:
216
+
217
+ - [CMU Enron Email Dataset](https://www.cs.cmu.edu/~enron/)
218
+ - [Synthea Sample Data](https://github.com/synthetichealth/synthea-sample-data/)
219
+
220
+ Personal note: I am especially interested in the historical context around Enron, including how it was able to happen and the improvements in governance and controls that followed.
221
+
151
222
  ## License
152
223
 
153
224
  MIT © [Arvid Berndtsson](https://github.com/arvid-berndtsson)
package/dist/index.d.mts CHANGED
@@ -50,12 +50,50 @@ declare function secretText(value: string): SecretString;
50
50
  declare function token(value: string): TokenString;
51
51
  declare function credential(value: string): CredentialString;
52
52
 
53
+ type StringDetection = Readonly<{
54
+ start: number;
55
+ end: number;
56
+ kind: DataClassification | "unknown";
57
+ confidence?: number;
58
+ source?: string;
59
+ }>;
60
+ type StringDetector = (value: string, context: Readonly<{
61
+ keyHint?: string;
62
+ depth: number;
63
+ }>) => readonly StringDetection[];
64
+
65
+ type StringDetectionOptions = Readonly<{
66
+ useDefaultValueDetector?: boolean;
67
+ stringDetectors?: readonly StringDetector[];
68
+ minDetectionConfidence?: number;
69
+ }>;
70
+
53
71
  type RedactOptions = Readonly<{
54
72
  /**
55
73
  * If true, redact values for suspicious keys even if they aren't classified.
56
74
  * Defaults to true.
57
75
  */
58
76
  guessByKey?: boolean;
77
+ /**
78
+ * If true, redact suspicious string values even when keys are not suspicious.
79
+ * Defaults to true.
80
+ */
81
+ guessByValue?: boolean;
82
+ /**
83
+ * Additional string detectors (for custom heuristics, NER, ML models, etc).
84
+ * Detectors return match ranges to mask in text.
85
+ */
86
+ stringDetectors?: readonly StringDetector[];
87
+ /**
88
+ * If false, disables the built-in rule-based value detector.
89
+ * Defaults to true.
90
+ */
91
+ useDefaultValueDetector?: boolean;
92
+ /**
93
+ * Minimum confidence for detections from string detectors.
94
+ * Defaults to 0.
95
+ */
96
+ minDetectionConfidence?: number;
59
97
  /**
60
98
  * Placeholder format for redacted values.
61
99
  * Defaults to "[REDACTED:<kind>]".
@@ -66,8 +104,10 @@ type RedactOptions = Readonly<{
66
104
  * Defaults to 25.
67
105
  */
68
106
  maxDepth?: number;
69
- }>;
107
+ }> & StringDetectionOptions;
70
108
  declare function redact<T>(value: T, options?: RedactOptions): T;
109
+ declare function detectText(value: string, options?: Pick<RedactOptions, "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence">): StringDetection[];
110
+ declare function redactText(value: string, options?: Pick<RedactOptions, "guessByValue" | "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence" | "placeholder">): string;
71
111
  declare function safeJsonStringify(value: unknown, options?: RedactOptions, space?: number): string;
72
112
  /**
73
113
  * Convenience logger that will redact classified data and suspicious keys.
@@ -113,4 +153,4 @@ declare function audit(policy: Policy, action: PolicyAction, data: unknown): Aud
113
153
  */
114
154
  declare function policyLog(policy: Policy, logger: Pick<Console, "info" | "warn" | "error" | "debug" | "log">, level: keyof Pick<Console, "info" | "warn" | "error" | "debug" | "log">, ...args: unknown[]): void;
115
155
 
116
- export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
156
+ export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type StringDetection, type StringDetector, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, detectText, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, redactText, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
package/dist/index.d.ts CHANGED
@@ -50,12 +50,50 @@ declare function secretText(value: string): SecretString;
50
50
  declare function token(value: string): TokenString;
51
51
  declare function credential(value: string): CredentialString;
52
52
 
53
+ type StringDetection = Readonly<{
54
+ start: number;
55
+ end: number;
56
+ kind: DataClassification | "unknown";
57
+ confidence?: number;
58
+ source?: string;
59
+ }>;
60
+ type StringDetector = (value: string, context: Readonly<{
61
+ keyHint?: string;
62
+ depth: number;
63
+ }>) => readonly StringDetection[];
64
+
65
+ type StringDetectionOptions = Readonly<{
66
+ useDefaultValueDetector?: boolean;
67
+ stringDetectors?: readonly StringDetector[];
68
+ minDetectionConfidence?: number;
69
+ }>;
70
+
53
71
  type RedactOptions = Readonly<{
54
72
  /**
55
73
  * If true, redact values for suspicious keys even if they aren't classified.
56
74
  * Defaults to true.
57
75
  */
58
76
  guessByKey?: boolean;
77
+ /**
78
+ * If true, redact suspicious string values even when keys are not suspicious.
79
+ * Defaults to true.
80
+ */
81
+ guessByValue?: boolean;
82
+ /**
83
+ * Additional string detectors (for custom heuristics, NER, ML models, etc).
84
+ * Detectors return match ranges to mask in text.
85
+ */
86
+ stringDetectors?: readonly StringDetector[];
87
+ /**
88
+ * If false, disables the built-in rule-based value detector.
89
+ * Defaults to true.
90
+ */
91
+ useDefaultValueDetector?: boolean;
92
+ /**
93
+ * Minimum confidence for detections from string detectors.
94
+ * Defaults to 0.
95
+ */
96
+ minDetectionConfidence?: number;
59
97
  /**
60
98
  * Placeholder format for redacted values.
61
99
  * Defaults to "[REDACTED:<kind>]".
@@ -66,8 +104,10 @@ type RedactOptions = Readonly<{
66
104
  * Defaults to 25.
67
105
  */
68
106
  maxDepth?: number;
69
- }>;
107
+ }> & StringDetectionOptions;
70
108
  declare function redact<T>(value: T, options?: RedactOptions): T;
109
+ declare function detectText(value: string, options?: Pick<RedactOptions, "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence">): StringDetection[];
110
+ declare function redactText(value: string, options?: Pick<RedactOptions, "guessByValue" | "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence" | "placeholder">): string;
71
111
  declare function safeJsonStringify(value: unknown, options?: RedactOptions, space?: number): string;
72
112
  /**
73
113
  * Convenience logger that will redact classified data and suspicious keys.
@@ -113,4 +153,4 @@ declare function audit(policy: Policy, action: PolicyAction, data: unknown): Aud
113
153
  */
114
154
  declare function policyLog(policy: Policy, logger: Pick<Console, "info" | "warn" | "error" | "debug" | "log">, level: keyof Pick<Console, "info" | "warn" | "error" | "debug" | "log">, ...args: unknown[]): void;
115
155
 
116
- export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
156
+ export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type StringDetection, type StringDetector, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, detectText, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, redactText, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
package/dist/index.js CHANGED
@@ -31,12 +31,14 @@ __export(index_exports, {
31
31
  credential: () => credential,
32
32
  decide: () => decide,
33
33
  defaultPolicy: () => defaultPolicy,
34
+ detectText: () => detectText,
34
35
  httpAuthorizationBearer: () => httpAuthorizationBearer,
35
36
  isClassified: () => isClassified,
36
37
  piiText: () => piiText,
37
38
  policyLog: () => policyLog,
38
39
  publicText: () => publicText,
39
40
  redact: () => redact,
41
+ redactText: () => redactText,
40
42
  reveal: () => reveal,
41
43
  safeJsonStringify: () => safeJsonStringify,
42
44
  safeLoggerAdapter: () => safeLoggerAdapter,
@@ -94,6 +96,179 @@ function credential(value) {
94
96
  return CredentialStringSchema.parse(value);
95
97
  }
96
98
 
99
+ // src/detectors/rules.ts
100
+ var EMAIL_RE = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi;
101
+ var PHONE_RE = /\b(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)\d{3}[-.\s]?\d{4}\b/g;
102
+ var SSN_RE = /\b\d{3}-\d{2}-\d{4}\b/g;
103
+ var DOB_RE = /\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g;
104
+ var IPV4_RE = /\b(?:\d{1,3}\.){3}\d{1,3}\b/g;
105
+ var CREDIT_CARD_RE = /\b(?:\d[ -]*?){13,19}\b/g;
106
+ var JWT_RE = /\b[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\b/g;
107
+ var PRIVATE_KEY_BLOCK_RE = /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g;
108
+ var AWS_ACCESS_KEY_RE = /\bAKIA[0-9A-Z]{16}\b/g;
109
+ var GITHUB_TOKEN_RE = /\bgh[pousr]_[A-Za-z0-9]{20,}\b/g;
110
+ var STRIPE_SECRET_RE = /\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b/g;
111
+ var OPENAI_KEY_RE = /\bsk-[A-Za-z0-9]{20,}\b/g;
112
+ var CREDENTIAL_PAIR_RE = /\b[^:\s]{1,128}:[^\s:]{1,256}\b/g;
113
+ var HIGH_ENTROPY_TOKEN_RE = /\b[A-Za-z0-9+/=_-]{28,}\b/g;
114
+ function luhnValid(input) {
115
+ const digits = input.replace(/[ -]/g, "");
116
+ if (!/^\d{13,19}$/.test(digits)) return false;
117
+ let sum = 0;
118
+ let shouldDouble = false;
119
+ for (let i = digits.length - 1; i >= 0; i -= 1) {
120
+ let d = Number(digits[i]);
121
+ if (shouldDouble) {
122
+ d *= 2;
123
+ if (d > 9) d -= 9;
124
+ }
125
+ sum += d;
126
+ shouldDouble = !shouldDouble;
127
+ }
128
+ return sum % 10 === 0;
129
+ }
130
+ function placeholderRanges(text) {
131
+ const ranges = [];
132
+ const re = /\[REDACTED:[^\]]+\]/g;
133
+ let m;
134
+ while ((m = re.exec(text)) !== null) {
135
+ ranges.push({ start: m.index, end: m.index + m[0].length });
136
+ }
137
+ return ranges;
138
+ }
139
+ function overlapsRanges(start, end, ranges) {
140
+ return ranges.some((r) => start < r.end && end > r.start);
141
+ }
142
+ function detectRegexRanges(value, re, kind, source, guard) {
143
+ const out = [];
144
+ const execRe = new RegExp(re.source, re.flags);
145
+ const protectedRanges = placeholderRanges(value);
146
+ let m;
147
+ while ((m = execRe.exec(value)) !== null) {
148
+ const match = m[0];
149
+ const start = m.index;
150
+ const end = start + match.length;
151
+ if (!match || guard && !guard(match) || overlapsRanges(start, end, protectedRanges)) {
152
+ continue;
153
+ }
154
+ out.push({ start, end, kind, source, confidence: 1 });
155
+ if (execRe.lastIndex === m.index) execRe.lastIndex += 1;
156
+ }
157
+ return out;
158
+ }
159
+ var defaultRuleStringDetector = (value) => {
160
+ const out = [];
161
+ out.push(
162
+ ...detectRegexRanges(
163
+ value,
164
+ PRIVATE_KEY_BLOCK_RE,
165
+ "secret",
166
+ "rule.private-key"
167
+ ),
168
+ ...detectRegexRanges(value, JWT_RE, "token", "rule.jwt"),
169
+ ...detectRegexRanges(
170
+ value,
171
+ AWS_ACCESS_KEY_RE,
172
+ "credential",
173
+ "rule.aws-access-key"
174
+ ),
175
+ ...detectRegexRanges(value, GITHUB_TOKEN_RE, "token", "rule.github-token"),
176
+ ...detectRegexRanges(
177
+ value,
178
+ STRIPE_SECRET_RE,
179
+ "secret",
180
+ "rule.stripe-secret"
181
+ ),
182
+ ...detectRegexRanges(value, OPENAI_KEY_RE, "secret", "rule.openai-key"),
183
+ ...detectRegexRanges(
184
+ value,
185
+ CREDENTIAL_PAIR_RE,
186
+ "credential",
187
+ "rule.credential-pair"
188
+ )
189
+ );
190
+ out.push(
191
+ ...detectRegexRanges(value, EMAIL_RE, "pii", "rule.email"),
192
+ ...detectRegexRanges(value, PHONE_RE, "pii", "rule.phone"),
193
+ ...detectRegexRanges(value, SSN_RE, "pii", "rule.ssn"),
194
+ ...detectRegexRanges(value, DOB_RE, "pii", "rule.dob"),
195
+ ...detectRegexRanges(
196
+ value,
197
+ IPV4_RE,
198
+ "pii",
199
+ "rule.ipv4",
200
+ (m) => m.split(".").every((p) => Number(p) >= 0 && Number(p) <= 255)
201
+ ),
202
+ ...detectRegexRanges(
203
+ value,
204
+ CREDIT_CARD_RE,
205
+ "pii",
206
+ "rule.credit-card",
207
+ luhnValid
208
+ )
209
+ );
210
+ out.push(
211
+ ...detectRegexRanges(
212
+ value,
213
+ HIGH_ENTROPY_TOKEN_RE,
214
+ "token",
215
+ "rule.high-entropy",
216
+ (m) => {
217
+ if (/^[a-z]+$/i.test(m)) return false;
218
+ return /\d/.test(m);
219
+ }
220
+ )
221
+ );
222
+ return out;
223
+ };
224
+
225
+ // src/detectors/engine.ts
226
+ function collectStringDetections(value, context, options) {
227
+ const useDefaultValueDetector = options?.useDefaultValueDetector ?? true;
228
+ const customDetectors = options?.stringDetectors ?? [];
229
+ const minDetectionConfidence = options?.minDetectionConfidence ?? 0;
230
+ const detections = [];
231
+ if (useDefaultValueDetector) {
232
+ detections.push(...defaultRuleStringDetector(value, context));
233
+ }
234
+ for (const detector of customDetectors) {
235
+ detections.push(...detector(value, context));
236
+ }
237
+ return detections.filter(
238
+ (d) => (d.confidence ?? 1) >= minDetectionConfidence && Number.isFinite(d.start) && Number.isFinite(d.end)
239
+ );
240
+ }
241
+ function applyDetectionsToString(value, detections, placeholder) {
242
+ if (detections.length === 0) return value;
243
+ const owner = new Array(value.length).fill(-1);
244
+ const normalized = [];
245
+ for (const d of detections) {
246
+ const start = Math.max(0, Math.min(value.length, Math.trunc(d.start)));
247
+ const end = Math.max(start, Math.min(value.length, Math.trunc(d.end)));
248
+ if (end <= start) continue;
249
+ normalized.push({ start, end, kind: d.kind });
250
+ }
251
+ normalized.forEach((d, idx) => {
252
+ for (let i2 = d.start; i2 < d.end; i2 += 1) {
253
+ if (owner[i2] === -1) owner[i2] = idx;
254
+ }
255
+ });
256
+ let out = "";
257
+ let i = 0;
258
+ while (i < value.length) {
259
+ const idx = owner[i];
260
+ if (idx < 0) {
261
+ out += value[i];
262
+ i += 1;
263
+ continue;
264
+ }
265
+ const d = normalized[idx];
266
+ out += placeholder(d.kind);
267
+ i = d.end;
268
+ }
269
+ return out;
270
+ }
271
+
97
272
  // src/redaction.ts
98
273
  var DEFAULT_SUSPICIOUS_KEY = /pass(word)?|pwd|secret|token|api[_-]?key|auth|bearer|cookie|session|private[_-]?key|ssh|credential/i;
99
274
  function defaultPlaceholder(kind) {
@@ -104,6 +279,7 @@ function isPlainObject(value) {
104
279
  }
105
280
  function redact(value, options) {
106
281
  const guessByKey = options?.guessByKey ?? true;
282
+ const guessByValue = options?.guessByValue ?? true;
107
283
  const placeholder = options?.placeholder ?? defaultPlaceholder;
108
284
  const maxDepth = options?.maxDepth ?? 25;
109
285
  const seen = /* @__PURE__ */ new WeakMap();
@@ -118,6 +294,15 @@ function redact(value, options) {
118
294
  return placeholder("unknown");
119
295
  }
120
296
  }
297
+ if (guessByValue && typeof v === "string") {
298
+ const detections = collectStringDetections(
299
+ v,
300
+ { keyHint, depth },
301
+ options
302
+ );
303
+ const masked = applyDetectionsToString(v, detections, placeholder);
304
+ if (masked !== v) return masked;
305
+ }
121
306
  if (Array.isArray(v)) {
122
307
  return v.map((item) => walk(item, depth + 1));
123
308
  }
@@ -137,6 +322,16 @@ function redact(value, options) {
137
322
  };
138
323
  return walk(value, 0);
139
324
  }
325
+ function detectText(value, options) {
326
+ return collectStringDetections(value, { depth: 0 }, options);
327
+ }
328
+ function redactText(value, options) {
329
+ const guessByValue = options?.guessByValue ?? true;
330
+ if (!guessByValue) return value;
331
+ const placeholder = options?.placeholder ?? defaultPlaceholder;
332
+ const detections = detectText(value, options);
333
+ return applyDetectionsToString(value, detections, placeholder);
334
+ }
140
335
  function safeJsonStringify(value, options, space) {
141
336
  return JSON.stringify(redact(value, options), null, space);
142
337
  }
@@ -243,12 +438,14 @@ function policyLog(policy, logger, level, ...args) {
243
438
  credential,
244
439
  decide,
245
440
  defaultPolicy,
441
+ detectText,
246
442
  httpAuthorizationBearer,
247
443
  isClassified,
248
444
  piiText,
249
445
  policyLog,
250
446
  publicText,
251
447
  redact,
448
+ redactText,
252
449
  reveal,
253
450
  safeJsonStringify,
254
451
  safeLoggerAdapter,
package/dist/index.mjs CHANGED
@@ -47,6 +47,179 @@ function credential(value) {
47
47
  return CredentialStringSchema.parse(value);
48
48
  }
49
49
 
50
+ // src/detectors/rules.ts
51
+ var EMAIL_RE = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi;
52
+ var PHONE_RE = /\b(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)\d{3}[-.\s]?\d{4}\b/g;
53
+ var SSN_RE = /\b\d{3}-\d{2}-\d{4}\b/g;
54
+ var DOB_RE = /\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g;
55
+ var IPV4_RE = /\b(?:\d{1,3}\.){3}\d{1,3}\b/g;
56
+ var CREDIT_CARD_RE = /\b(?:\d[ -]*?){13,19}\b/g;
57
+ var JWT_RE = /\b[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\b/g;
58
+ var PRIVATE_KEY_BLOCK_RE = /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g;
59
+ var AWS_ACCESS_KEY_RE = /\bAKIA[0-9A-Z]{16}\b/g;
60
+ var GITHUB_TOKEN_RE = /\bgh[pousr]_[A-Za-z0-9]{20,}\b/g;
61
+ var STRIPE_SECRET_RE = /\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b/g;
62
+ var OPENAI_KEY_RE = /\bsk-[A-Za-z0-9]{20,}\b/g;
63
+ var CREDENTIAL_PAIR_RE = /\b[^:\s]{1,128}:[^\s:]{1,256}\b/g;
64
+ var HIGH_ENTROPY_TOKEN_RE = /\b[A-Za-z0-9+/=_-]{28,}\b/g;
65
+ function luhnValid(input) {
66
+ const digits = input.replace(/[ -]/g, "");
67
+ if (!/^\d{13,19}$/.test(digits)) return false;
68
+ let sum = 0;
69
+ let shouldDouble = false;
70
+ for (let i = digits.length - 1; i >= 0; i -= 1) {
71
+ let d = Number(digits[i]);
72
+ if (shouldDouble) {
73
+ d *= 2;
74
+ if (d > 9) d -= 9;
75
+ }
76
+ sum += d;
77
+ shouldDouble = !shouldDouble;
78
+ }
79
+ return sum % 10 === 0;
80
+ }
81
+ function placeholderRanges(text) {
82
+ const ranges = [];
83
+ const re = /\[REDACTED:[^\]]+\]/g;
84
+ let m;
85
+ while ((m = re.exec(text)) !== null) {
86
+ ranges.push({ start: m.index, end: m.index + m[0].length });
87
+ }
88
+ return ranges;
89
+ }
90
+ function overlapsRanges(start, end, ranges) {
91
+ return ranges.some((r) => start < r.end && end > r.start);
92
+ }
93
+ function detectRegexRanges(value, re, kind, source, guard) {
94
+ const out = [];
95
+ const execRe = new RegExp(re.source, re.flags);
96
+ const protectedRanges = placeholderRanges(value);
97
+ let m;
98
+ while ((m = execRe.exec(value)) !== null) {
99
+ const match = m[0];
100
+ const start = m.index;
101
+ const end = start + match.length;
102
+ if (!match || guard && !guard(match) || overlapsRanges(start, end, protectedRanges)) {
103
+ continue;
104
+ }
105
+ out.push({ start, end, kind, source, confidence: 1 });
106
+ if (execRe.lastIndex === m.index) execRe.lastIndex += 1;
107
+ }
108
+ return out;
109
+ }
110
+ var defaultRuleStringDetector = (value) => {
111
+ const out = [];
112
+ out.push(
113
+ ...detectRegexRanges(
114
+ value,
115
+ PRIVATE_KEY_BLOCK_RE,
116
+ "secret",
117
+ "rule.private-key"
118
+ ),
119
+ ...detectRegexRanges(value, JWT_RE, "token", "rule.jwt"),
120
+ ...detectRegexRanges(
121
+ value,
122
+ AWS_ACCESS_KEY_RE,
123
+ "credential",
124
+ "rule.aws-access-key"
125
+ ),
126
+ ...detectRegexRanges(value, GITHUB_TOKEN_RE, "token", "rule.github-token"),
127
+ ...detectRegexRanges(
128
+ value,
129
+ STRIPE_SECRET_RE,
130
+ "secret",
131
+ "rule.stripe-secret"
132
+ ),
133
+ ...detectRegexRanges(value, OPENAI_KEY_RE, "secret", "rule.openai-key"),
134
+ ...detectRegexRanges(
135
+ value,
136
+ CREDENTIAL_PAIR_RE,
137
+ "credential",
138
+ "rule.credential-pair"
139
+ )
140
+ );
141
+ out.push(
142
+ ...detectRegexRanges(value, EMAIL_RE, "pii", "rule.email"),
143
+ ...detectRegexRanges(value, PHONE_RE, "pii", "rule.phone"),
144
+ ...detectRegexRanges(value, SSN_RE, "pii", "rule.ssn"),
145
+ ...detectRegexRanges(value, DOB_RE, "pii", "rule.dob"),
146
+ ...detectRegexRanges(
147
+ value,
148
+ IPV4_RE,
149
+ "pii",
150
+ "rule.ipv4",
151
+ (m) => m.split(".").every((p) => Number(p) >= 0 && Number(p) <= 255)
152
+ ),
153
+ ...detectRegexRanges(
154
+ value,
155
+ CREDIT_CARD_RE,
156
+ "pii",
157
+ "rule.credit-card",
158
+ luhnValid
159
+ )
160
+ );
161
+ out.push(
162
+ ...detectRegexRanges(
163
+ value,
164
+ HIGH_ENTROPY_TOKEN_RE,
165
+ "token",
166
+ "rule.high-entropy",
167
+ (m) => {
168
+ if (/^[a-z]+$/i.test(m)) return false;
169
+ return /\d/.test(m);
170
+ }
171
+ )
172
+ );
173
+ return out;
174
+ };
175
+
176
+ // src/detectors/engine.ts
177
+ function collectStringDetections(value, context, options) {
178
+ const useDefaultValueDetector = options?.useDefaultValueDetector ?? true;
179
+ const customDetectors = options?.stringDetectors ?? [];
180
+ const minDetectionConfidence = options?.minDetectionConfidence ?? 0;
181
+ const detections = [];
182
+ if (useDefaultValueDetector) {
183
+ detections.push(...defaultRuleStringDetector(value, context));
184
+ }
185
+ for (const detector of customDetectors) {
186
+ detections.push(...detector(value, context));
187
+ }
188
+ return detections.filter(
189
+ (d) => (d.confidence ?? 1) >= minDetectionConfidence && Number.isFinite(d.start) && Number.isFinite(d.end)
190
+ );
191
+ }
192
+ function applyDetectionsToString(value, detections, placeholder) {
193
+ if (detections.length === 0) return value;
194
+ const owner = new Array(value.length).fill(-1);
195
+ const normalized = [];
196
+ for (const d of detections) {
197
+ const start = Math.max(0, Math.min(value.length, Math.trunc(d.start)));
198
+ const end = Math.max(start, Math.min(value.length, Math.trunc(d.end)));
199
+ if (end <= start) continue;
200
+ normalized.push({ start, end, kind: d.kind });
201
+ }
202
+ normalized.forEach((d, idx) => {
203
+ for (let i2 = d.start; i2 < d.end; i2 += 1) {
204
+ if (owner[i2] === -1) owner[i2] = idx;
205
+ }
206
+ });
207
+ let out = "";
208
+ let i = 0;
209
+ while (i < value.length) {
210
+ const idx = owner[i];
211
+ if (idx < 0) {
212
+ out += value[i];
213
+ i += 1;
214
+ continue;
215
+ }
216
+ const d = normalized[idx];
217
+ out += placeholder(d.kind);
218
+ i = d.end;
219
+ }
220
+ return out;
221
+ }
222
+
50
223
  // src/redaction.ts
51
224
  var DEFAULT_SUSPICIOUS_KEY = /pass(word)?|pwd|secret|token|api[_-]?key|auth|bearer|cookie|session|private[_-]?key|ssh|credential/i;
52
225
  function defaultPlaceholder(kind) {
@@ -57,6 +230,7 @@ function isPlainObject(value) {
57
230
  }
58
231
  function redact(value, options) {
59
232
  const guessByKey = options?.guessByKey ?? true;
233
+ const guessByValue = options?.guessByValue ?? true;
60
234
  const placeholder = options?.placeholder ?? defaultPlaceholder;
61
235
  const maxDepth = options?.maxDepth ?? 25;
62
236
  const seen = /* @__PURE__ */ new WeakMap();
@@ -71,6 +245,15 @@ function redact(value, options) {
71
245
  return placeholder("unknown");
72
246
  }
73
247
  }
248
+ if (guessByValue && typeof v === "string") {
249
+ const detections = collectStringDetections(
250
+ v,
251
+ { keyHint, depth },
252
+ options
253
+ );
254
+ const masked = applyDetectionsToString(v, detections, placeholder);
255
+ if (masked !== v) return masked;
256
+ }
74
257
  if (Array.isArray(v)) {
75
258
  return v.map((item) => walk(item, depth + 1));
76
259
  }
@@ -90,6 +273,16 @@ function redact(value, options) {
90
273
  };
91
274
  return walk(value, 0);
92
275
  }
276
+ function detectText(value, options) {
277
+ return collectStringDetections(value, { depth: 0 }, options);
278
+ }
279
+ function redactText(value, options) {
280
+ const guessByValue = options?.guessByValue ?? true;
281
+ if (!guessByValue) return value;
282
+ const placeholder = options?.placeholder ?? defaultPlaceholder;
283
+ const detections = detectText(value, options);
284
+ return applyDetectionsToString(value, detections, placeholder);
285
+ }
93
286
  function safeJsonStringify(value, options, space) {
94
287
  return JSON.stringify(redact(value, options), null, space);
95
288
  }
@@ -195,12 +388,14 @@ export {
195
388
  credential,
196
389
  decide,
197
390
  defaultPolicy,
391
+ detectText,
198
392
  httpAuthorizationBearer,
199
393
  isClassified,
200
394
  piiText,
201
395
  policyLog,
202
396
  publicText,
203
397
  redact,
398
+ redactText,
204
399
  reveal,
205
400
  safeJsonStringify,
206
401
  safeLoggerAdapter,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typesecure",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "description": "Type-safe data classification and security enforcement for TypeScript - prevent secrets and PII leaks with compile-time and runtime guarantees",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -13,7 +13,14 @@
13
13
  "lint": "eslint src tests --ext .ts",
14
14
  "lint:fix": "eslint src tests --ext .ts --fix",
15
15
  "dev": "tsup src/index.ts --format cjs,esm --dts --watch",
16
- "test": "jest --config jest.config.js",
16
+ "data:setup": "bash scripts/setup-datasets.sh",
17
+ "test": "jest --config jest.config.js --testPathIgnorePatterns tests/datasets",
18
+ "test:data": "pnpm run test:data:enron",
19
+ "test:data:enron": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets/enron.dataset.test.ts",
20
+ "test:data:enron:full": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets/enron.fullscan.test.ts",
21
+ "test:data:synthea": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets/synthea.dataset.test.ts",
22
+ "test:data:all": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets",
23
+ "test:datasets": "pnpm run test:data:all",
17
24
  "test:watch": "jest --config jest.config.js --watch",
18
25
  "test:coverage": "jest --config jest.config.js --coverage",
19
26
  "format": "prettier --write \"src/**/*.ts\" \"tests/**/*.ts\"",
@@ -65,6 +72,6 @@
65
72
  "zod": "^3.25.76"
66
73
  },
67
74
  "engines": {
68
- "node": ">=24.0.0"
75
+ "node": ">=18.18.0"
69
76
  }
70
77
  }