typesecure 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -0
- package/dist/index.d.mts +42 -2
- package/dist/index.d.ts +42 -2
- package/dist/index.js +197 -0
- package/dist/index.mjs +195 -0
- package/package.json +10 -3
package/README.md
CHANGED
|
@@ -27,6 +27,8 @@ You “type” your data as `public | pii | secret | token | credential`, and `t
|
|
|
27
27
|
|
|
28
28
|
## Installation
|
|
29
29
|
|
|
30
|
+
Requires Node.js `>=18.18.0`.
|
|
31
|
+
|
|
30
32
|
```bash
|
|
31
33
|
# Using npm
|
|
32
34
|
npm install typesecure
|
|
@@ -113,8 +115,39 @@ app.use((req, _res, next) => {
|
|
|
113
115
|
### Redaction
|
|
114
116
|
|
|
115
117
|
- `redact(value): value` (deep traversal)
|
|
118
|
+
- `redactText(value): string` (mask sensitive fragments in plain text)
|
|
119
|
+
- `detectText(value): StringDetection[]` (return ranges/kinds for audit workflows)
|
|
116
120
|
- `safeJsonStringify(value): string`
|
|
117
121
|
- `safeLoggerAdapter(consoleLike)`
|
|
122
|
+
- Redaction options:
|
|
123
|
+
- `guessByKey` (default `true`): redact suspicious keys like `password`, `token`, `apiKey`.
|
|
124
|
+
- `guessByValue` (default `true`): auto-detect and redact sensitive-looking values.
|
|
125
|
+
- `useDefaultValueDetector` (default `true`): keep built-in rule-based detectors on/off.
|
|
126
|
+
- `stringDetectors`: add custom detectors (for NER/ML or domain-specific logic).
|
|
127
|
+
- `minDetectionConfidence` (default `0`): ignore low-confidence custom detections.
|
|
128
|
+
- Value detection masks only the sensitive fragments inside a larger string (instead of replacing the whole text), including:
|
|
129
|
+
- PII: email, phone, SSN, date of birth (`YYYY-MM-DD`), IPv4 address, payment card numbers (Luhn-validated).
|
|
130
|
+
- Secrets/tokens: JWTs, private key PEM blocks, GitHub tokens, AWS access keys, Stripe secret keys, OpenAI-style `sk-...` keys, credential pairs (`user:pass`), high-entropy token-like strings.
|
|
131
|
+
|
|
132
|
+
Example custom detector (NER/ML-style integration):
|
|
133
|
+
|
|
134
|
+
```typescript
|
|
135
|
+
const out = redact(
|
|
136
|
+
{ text: "Customer Jane Doe uses jane@example.com" },
|
|
137
|
+
{
|
|
138
|
+
stringDetectors: [
|
|
139
|
+
(value) => {
|
|
140
|
+
const name = "Jane Doe";
|
|
141
|
+
const idx = value.indexOf(name);
|
|
142
|
+
return idx >= 0
|
|
143
|
+
? [{ start: idx, end: idx + name.length, kind: "pii", confidence: 0.92, source: "ml.ner" }]
|
|
144
|
+
: [];
|
|
145
|
+
},
|
|
146
|
+
],
|
|
147
|
+
minDetectionConfidence: 0.8,
|
|
148
|
+
},
|
|
149
|
+
);
|
|
150
|
+
```
|
|
118
151
|
|
|
119
152
|
### Policy
|
|
120
153
|
|
|
@@ -145,9 +178,47 @@ To contribute to this project:
|
|
|
145
178
|
2. Install dependencies with `pnpm install`
|
|
146
179
|
3. Run tests with `pnpm test`
|
|
147
180
|
4. Build the package with `pnpm build`
|
|
181
|
+
5. Run Enron dataset integration tests with `pnpm test:data`
|
|
182
|
+
|
|
183
|
+
### Optional: external dataset setup
|
|
184
|
+
|
|
185
|
+
For larger redaction/policy experiments (Enron + Synthea FHIR), fetch datasets locally:
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
pnpm data:setup
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
This command downloads and extracts to:
|
|
192
|
+
|
|
193
|
+
- `data/enron-maildir`
|
|
194
|
+
- `data/synthea_sample_data_fhir_latest`
|
|
195
|
+
|
|
196
|
+
Notes:
|
|
197
|
+
|
|
198
|
+
- `data/` is gitignored and not published to npm.
|
|
199
|
+
- `pnpm test` excludes dataset suites by default.
|
|
200
|
+
- `pnpm test:data` runs Enron dataset tests with verbose output.
|
|
201
|
+
- `pnpm test:data:synthea` runs Synthea-specific dataset tests.
|
|
202
|
+
- `pnpm test:data:all` runs all dataset suites.
|
|
203
|
+
- You can override source URLs with `ENRON_URL=...` and/or `SYNTHEA_FHIR_URL=...`.
|
|
204
|
+
- You can change destination with `DATA_DIR=/path/to/data`.
|
|
205
|
+
|
|
206
|
+
Dataset sources:
|
|
207
|
+
|
|
208
|
+
- Enron: [https://www.cs.cmu.edu/~enron/](https://www.cs.cmu.edu/~enron/)
|
|
209
|
+
- Synthea: [https://github.com/synthetichealth/synthea-sample-data/](https://github.com/synthetichealth/synthea-sample-data/)
|
|
148
210
|
|
|
149
211
|
This project uses TypeScript for type safety, Jest for testing, and ESLint for code quality.
|
|
150
212
|
|
|
213
|
+
## Dataset Acknowledgements
|
|
214
|
+
|
|
215
|
+
We use these public datasets for redaction and policy testing:
|
|
216
|
+
|
|
217
|
+
- [CMU Enron Email Dataset](https://www.cs.cmu.edu/~enron/)
|
|
218
|
+
- [Synthea Sample Data](https://github.com/synthetichealth/synthea-sample-data/)
|
|
219
|
+
|
|
220
|
+
Personal note: I am especially interested in the historical context around Enron, including how it was able to happen and the improvements in governance and controls that followed.
|
|
221
|
+
|
|
151
222
|
## License
|
|
152
223
|
|
|
153
224
|
MIT © [Arvid Berndtsson](https://github.com/arvid-berndtsson)
|
package/dist/index.d.mts
CHANGED
|
@@ -50,12 +50,50 @@ declare function secretText(value: string): SecretString;
|
|
|
50
50
|
declare function token(value: string): TokenString;
|
|
51
51
|
declare function credential(value: string): CredentialString;
|
|
52
52
|
|
|
53
|
+
type StringDetection = Readonly<{
|
|
54
|
+
start: number;
|
|
55
|
+
end: number;
|
|
56
|
+
kind: DataClassification | "unknown";
|
|
57
|
+
confidence?: number;
|
|
58
|
+
source?: string;
|
|
59
|
+
}>;
|
|
60
|
+
type StringDetector = (value: string, context: Readonly<{
|
|
61
|
+
keyHint?: string;
|
|
62
|
+
depth: number;
|
|
63
|
+
}>) => readonly StringDetection[];
|
|
64
|
+
|
|
65
|
+
type StringDetectionOptions = Readonly<{
|
|
66
|
+
useDefaultValueDetector?: boolean;
|
|
67
|
+
stringDetectors?: readonly StringDetector[];
|
|
68
|
+
minDetectionConfidence?: number;
|
|
69
|
+
}>;
|
|
70
|
+
|
|
53
71
|
type RedactOptions = Readonly<{
|
|
54
72
|
/**
|
|
55
73
|
* If true, redact values for suspicious keys even if they aren't classified.
|
|
56
74
|
* Defaults to true.
|
|
57
75
|
*/
|
|
58
76
|
guessByKey?: boolean;
|
|
77
|
+
/**
|
|
78
|
+
* If true, redact suspicious string values even when keys are not suspicious.
|
|
79
|
+
* Defaults to true.
|
|
80
|
+
*/
|
|
81
|
+
guessByValue?: boolean;
|
|
82
|
+
/**
|
|
83
|
+
* Additional string detectors (for custom heuristics, NER, ML models, etc).
|
|
84
|
+
* Detectors return match ranges to mask in text.
|
|
85
|
+
*/
|
|
86
|
+
stringDetectors?: readonly StringDetector[];
|
|
87
|
+
/**
|
|
88
|
+
* If false, disables the built-in rule-based value detector.
|
|
89
|
+
* Defaults to true.
|
|
90
|
+
*/
|
|
91
|
+
useDefaultValueDetector?: boolean;
|
|
92
|
+
/**
|
|
93
|
+
* Minimum confidence for detections from string detectors.
|
|
94
|
+
* Defaults to 0.
|
|
95
|
+
*/
|
|
96
|
+
minDetectionConfidence?: number;
|
|
59
97
|
/**
|
|
60
98
|
* Placeholder format for redacted values.
|
|
61
99
|
* Defaults to "[REDACTED:<kind>]".
|
|
@@ -66,8 +104,10 @@ type RedactOptions = Readonly<{
|
|
|
66
104
|
* Defaults to 25.
|
|
67
105
|
*/
|
|
68
106
|
maxDepth?: number;
|
|
69
|
-
}
|
|
107
|
+
}> & StringDetectionOptions;
|
|
70
108
|
declare function redact<T>(value: T, options?: RedactOptions): T;
|
|
109
|
+
declare function detectText(value: string, options?: Pick<RedactOptions, "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence">): StringDetection[];
|
|
110
|
+
declare function redactText(value: string, options?: Pick<RedactOptions, "guessByValue" | "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence" | "placeholder">): string;
|
|
71
111
|
declare function safeJsonStringify(value: unknown, options?: RedactOptions, space?: number): string;
|
|
72
112
|
/**
|
|
73
113
|
* Convenience logger that will redact classified data and suspicious keys.
|
|
@@ -113,4 +153,4 @@ declare function audit(policy: Policy, action: PolicyAction, data: unknown): Aud
|
|
|
113
153
|
*/
|
|
114
154
|
declare function policyLog(policy: Policy, logger: Pick<Console, "info" | "warn" | "error" | "debug" | "log">, level: keyof Pick<Console, "info" | "warn" | "error" | "debug" | "log">, ...args: unknown[]): void;
|
|
115
155
|
|
|
116
|
-
export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
|
|
156
|
+
export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type StringDetection, type StringDetector, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, detectText, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, redactText, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
|
package/dist/index.d.ts
CHANGED
|
@@ -50,12 +50,50 @@ declare function secretText(value: string): SecretString;
|
|
|
50
50
|
declare function token(value: string): TokenString;
|
|
51
51
|
declare function credential(value: string): CredentialString;
|
|
52
52
|
|
|
53
|
+
type StringDetection = Readonly<{
|
|
54
|
+
start: number;
|
|
55
|
+
end: number;
|
|
56
|
+
kind: DataClassification | "unknown";
|
|
57
|
+
confidence?: number;
|
|
58
|
+
source?: string;
|
|
59
|
+
}>;
|
|
60
|
+
type StringDetector = (value: string, context: Readonly<{
|
|
61
|
+
keyHint?: string;
|
|
62
|
+
depth: number;
|
|
63
|
+
}>) => readonly StringDetection[];
|
|
64
|
+
|
|
65
|
+
type StringDetectionOptions = Readonly<{
|
|
66
|
+
useDefaultValueDetector?: boolean;
|
|
67
|
+
stringDetectors?: readonly StringDetector[];
|
|
68
|
+
minDetectionConfidence?: number;
|
|
69
|
+
}>;
|
|
70
|
+
|
|
53
71
|
type RedactOptions = Readonly<{
|
|
54
72
|
/**
|
|
55
73
|
* If true, redact values for suspicious keys even if they aren't classified.
|
|
56
74
|
* Defaults to true.
|
|
57
75
|
*/
|
|
58
76
|
guessByKey?: boolean;
|
|
77
|
+
/**
|
|
78
|
+
* If true, redact suspicious string values even when keys are not suspicious.
|
|
79
|
+
* Defaults to true.
|
|
80
|
+
*/
|
|
81
|
+
guessByValue?: boolean;
|
|
82
|
+
/**
|
|
83
|
+
* Additional string detectors (for custom heuristics, NER, ML models, etc).
|
|
84
|
+
* Detectors return match ranges to mask in text.
|
|
85
|
+
*/
|
|
86
|
+
stringDetectors?: readonly StringDetector[];
|
|
87
|
+
/**
|
|
88
|
+
* If false, disables the built-in rule-based value detector.
|
|
89
|
+
* Defaults to true.
|
|
90
|
+
*/
|
|
91
|
+
useDefaultValueDetector?: boolean;
|
|
92
|
+
/**
|
|
93
|
+
* Minimum confidence for detections from string detectors.
|
|
94
|
+
* Defaults to 0.
|
|
95
|
+
*/
|
|
96
|
+
minDetectionConfidence?: number;
|
|
59
97
|
/**
|
|
60
98
|
* Placeholder format for redacted values.
|
|
61
99
|
* Defaults to "[REDACTED:<kind>]".
|
|
@@ -66,8 +104,10 @@ type RedactOptions = Readonly<{
|
|
|
66
104
|
* Defaults to 25.
|
|
67
105
|
*/
|
|
68
106
|
maxDepth?: number;
|
|
69
|
-
}
|
|
107
|
+
}> & StringDetectionOptions;
|
|
70
108
|
declare function redact<T>(value: T, options?: RedactOptions): T;
|
|
109
|
+
declare function detectText(value: string, options?: Pick<RedactOptions, "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence">): StringDetection[];
|
|
110
|
+
declare function redactText(value: string, options?: Pick<RedactOptions, "guessByValue" | "useDefaultValueDetector" | "stringDetectors" | "minDetectionConfidence" | "placeholder">): string;
|
|
71
111
|
declare function safeJsonStringify(value: unknown, options?: RedactOptions, space?: number): string;
|
|
72
112
|
/**
|
|
73
113
|
* Convenience logger that will redact classified data and suspicious keys.
|
|
@@ -113,4 +153,4 @@ declare function audit(policy: Policy, action: PolicyAction, data: unknown): Aud
|
|
|
113
153
|
*/
|
|
114
154
|
declare function policyLog(policy: Policy, logger: Pick<Console, "info" | "warn" | "error" | "debug" | "log">, level: keyof Pick<Console, "info" | "warn" | "error" | "debug" | "log">, ...args: unknown[]): void;
|
|
115
155
|
|
|
116
|
-
export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
|
|
156
|
+
export { type AuditEvent, type Classified, type CredentialString, CredentialStringSchema, type DataClassification, type PIIString, PIIStringSchema, type Policy, type PolicyAction, type PolicyDecision, type PublicString, PublicStringSchema, type RedactOptions, type SecretString, SecretStringSchema, type StringDetection, type StringDetector, type TokenString, TokenStringSchema, assertAllowed, audit, classificationOf, credential, decide, defaultPolicy, detectText, httpAuthorizationBearer, isClassified, piiText, policyLog, publicText, redact, redactText, reveal, safeJsonStringify, safeLoggerAdapter, secretText, token };
|
package/dist/index.js
CHANGED
|
@@ -31,12 +31,14 @@ __export(index_exports, {
|
|
|
31
31
|
credential: () => credential,
|
|
32
32
|
decide: () => decide,
|
|
33
33
|
defaultPolicy: () => defaultPolicy,
|
|
34
|
+
detectText: () => detectText,
|
|
34
35
|
httpAuthorizationBearer: () => httpAuthorizationBearer,
|
|
35
36
|
isClassified: () => isClassified,
|
|
36
37
|
piiText: () => piiText,
|
|
37
38
|
policyLog: () => policyLog,
|
|
38
39
|
publicText: () => publicText,
|
|
39
40
|
redact: () => redact,
|
|
41
|
+
redactText: () => redactText,
|
|
40
42
|
reveal: () => reveal,
|
|
41
43
|
safeJsonStringify: () => safeJsonStringify,
|
|
42
44
|
safeLoggerAdapter: () => safeLoggerAdapter,
|
|
@@ -94,6 +96,179 @@ function credential(value) {
|
|
|
94
96
|
return CredentialStringSchema.parse(value);
|
|
95
97
|
}
|
|
96
98
|
|
|
99
|
+
// src/detectors/rules.ts
|
|
100
|
+
var EMAIL_RE = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi;
|
|
101
|
+
var PHONE_RE = /\b(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)\d{3}[-.\s]?\d{4}\b/g;
|
|
102
|
+
var SSN_RE = /\b\d{3}-\d{2}-\d{4}\b/g;
|
|
103
|
+
var DOB_RE = /\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g;
|
|
104
|
+
var IPV4_RE = /\b(?:\d{1,3}\.){3}\d{1,3}\b/g;
|
|
105
|
+
var CREDIT_CARD_RE = /\b(?:\d[ -]*?){13,19}\b/g;
|
|
106
|
+
var JWT_RE = /\b[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\b/g;
|
|
107
|
+
var PRIVATE_KEY_BLOCK_RE = /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g;
|
|
108
|
+
var AWS_ACCESS_KEY_RE = /\bAKIA[0-9A-Z]{16}\b/g;
|
|
109
|
+
var GITHUB_TOKEN_RE = /\bgh[pousr]_[A-Za-z0-9]{20,}\b/g;
|
|
110
|
+
var STRIPE_SECRET_RE = /\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b/g;
|
|
111
|
+
var OPENAI_KEY_RE = /\bsk-[A-Za-z0-9]{20,}\b/g;
|
|
112
|
+
var CREDENTIAL_PAIR_RE = /\b[^:\s]{1,128}:[^\s:]{1,256}\b/g;
|
|
113
|
+
var HIGH_ENTROPY_TOKEN_RE = /\b[A-Za-z0-9+/=_-]{28,}\b/g;
|
|
114
|
+
function luhnValid(input) {
|
|
115
|
+
const digits = input.replace(/[ -]/g, "");
|
|
116
|
+
if (!/^\d{13,19}$/.test(digits)) return false;
|
|
117
|
+
let sum = 0;
|
|
118
|
+
let shouldDouble = false;
|
|
119
|
+
for (let i = digits.length - 1; i >= 0; i -= 1) {
|
|
120
|
+
let d = Number(digits[i]);
|
|
121
|
+
if (shouldDouble) {
|
|
122
|
+
d *= 2;
|
|
123
|
+
if (d > 9) d -= 9;
|
|
124
|
+
}
|
|
125
|
+
sum += d;
|
|
126
|
+
shouldDouble = !shouldDouble;
|
|
127
|
+
}
|
|
128
|
+
return sum % 10 === 0;
|
|
129
|
+
}
|
|
130
|
+
function placeholderRanges(text) {
|
|
131
|
+
const ranges = [];
|
|
132
|
+
const re = /\[REDACTED:[^\]]+\]/g;
|
|
133
|
+
let m;
|
|
134
|
+
while ((m = re.exec(text)) !== null) {
|
|
135
|
+
ranges.push({ start: m.index, end: m.index + m[0].length });
|
|
136
|
+
}
|
|
137
|
+
return ranges;
|
|
138
|
+
}
|
|
139
|
+
function overlapsRanges(start, end, ranges) {
|
|
140
|
+
return ranges.some((r) => start < r.end && end > r.start);
|
|
141
|
+
}
|
|
142
|
+
function detectRegexRanges(value, re, kind, source, guard) {
|
|
143
|
+
const out = [];
|
|
144
|
+
const execRe = new RegExp(re.source, re.flags);
|
|
145
|
+
const protectedRanges = placeholderRanges(value);
|
|
146
|
+
let m;
|
|
147
|
+
while ((m = execRe.exec(value)) !== null) {
|
|
148
|
+
const match = m[0];
|
|
149
|
+
const start = m.index;
|
|
150
|
+
const end = start + match.length;
|
|
151
|
+
if (!match || guard && !guard(match) || overlapsRanges(start, end, protectedRanges)) {
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
out.push({ start, end, kind, source, confidence: 1 });
|
|
155
|
+
if (execRe.lastIndex === m.index) execRe.lastIndex += 1;
|
|
156
|
+
}
|
|
157
|
+
return out;
|
|
158
|
+
}
|
|
159
|
+
var defaultRuleStringDetector = (value) => {
|
|
160
|
+
const out = [];
|
|
161
|
+
out.push(
|
|
162
|
+
...detectRegexRanges(
|
|
163
|
+
value,
|
|
164
|
+
PRIVATE_KEY_BLOCK_RE,
|
|
165
|
+
"secret",
|
|
166
|
+
"rule.private-key"
|
|
167
|
+
),
|
|
168
|
+
...detectRegexRanges(value, JWT_RE, "token", "rule.jwt"),
|
|
169
|
+
...detectRegexRanges(
|
|
170
|
+
value,
|
|
171
|
+
AWS_ACCESS_KEY_RE,
|
|
172
|
+
"credential",
|
|
173
|
+
"rule.aws-access-key"
|
|
174
|
+
),
|
|
175
|
+
...detectRegexRanges(value, GITHUB_TOKEN_RE, "token", "rule.github-token"),
|
|
176
|
+
...detectRegexRanges(
|
|
177
|
+
value,
|
|
178
|
+
STRIPE_SECRET_RE,
|
|
179
|
+
"secret",
|
|
180
|
+
"rule.stripe-secret"
|
|
181
|
+
),
|
|
182
|
+
...detectRegexRanges(value, OPENAI_KEY_RE, "secret", "rule.openai-key"),
|
|
183
|
+
...detectRegexRanges(
|
|
184
|
+
value,
|
|
185
|
+
CREDENTIAL_PAIR_RE,
|
|
186
|
+
"credential",
|
|
187
|
+
"rule.credential-pair"
|
|
188
|
+
)
|
|
189
|
+
);
|
|
190
|
+
out.push(
|
|
191
|
+
...detectRegexRanges(value, EMAIL_RE, "pii", "rule.email"),
|
|
192
|
+
...detectRegexRanges(value, PHONE_RE, "pii", "rule.phone"),
|
|
193
|
+
...detectRegexRanges(value, SSN_RE, "pii", "rule.ssn"),
|
|
194
|
+
...detectRegexRanges(value, DOB_RE, "pii", "rule.dob"),
|
|
195
|
+
...detectRegexRanges(
|
|
196
|
+
value,
|
|
197
|
+
IPV4_RE,
|
|
198
|
+
"pii",
|
|
199
|
+
"rule.ipv4",
|
|
200
|
+
(m) => m.split(".").every((p) => Number(p) >= 0 && Number(p) <= 255)
|
|
201
|
+
),
|
|
202
|
+
...detectRegexRanges(
|
|
203
|
+
value,
|
|
204
|
+
CREDIT_CARD_RE,
|
|
205
|
+
"pii",
|
|
206
|
+
"rule.credit-card",
|
|
207
|
+
luhnValid
|
|
208
|
+
)
|
|
209
|
+
);
|
|
210
|
+
out.push(
|
|
211
|
+
...detectRegexRanges(
|
|
212
|
+
value,
|
|
213
|
+
HIGH_ENTROPY_TOKEN_RE,
|
|
214
|
+
"token",
|
|
215
|
+
"rule.high-entropy",
|
|
216
|
+
(m) => {
|
|
217
|
+
if (/^[a-z]+$/i.test(m)) return false;
|
|
218
|
+
return /\d/.test(m);
|
|
219
|
+
}
|
|
220
|
+
)
|
|
221
|
+
);
|
|
222
|
+
return out;
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
// src/detectors/engine.ts
|
|
226
|
+
function collectStringDetections(value, context, options) {
|
|
227
|
+
const useDefaultValueDetector = options?.useDefaultValueDetector ?? true;
|
|
228
|
+
const customDetectors = options?.stringDetectors ?? [];
|
|
229
|
+
const minDetectionConfidence = options?.minDetectionConfidence ?? 0;
|
|
230
|
+
const detections = [];
|
|
231
|
+
if (useDefaultValueDetector) {
|
|
232
|
+
detections.push(...defaultRuleStringDetector(value, context));
|
|
233
|
+
}
|
|
234
|
+
for (const detector of customDetectors) {
|
|
235
|
+
detections.push(...detector(value, context));
|
|
236
|
+
}
|
|
237
|
+
return detections.filter(
|
|
238
|
+
(d) => (d.confidence ?? 1) >= minDetectionConfidence && Number.isFinite(d.start) && Number.isFinite(d.end)
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
function applyDetectionsToString(value, detections, placeholder) {
|
|
242
|
+
if (detections.length === 0) return value;
|
|
243
|
+
const owner = new Array(value.length).fill(-1);
|
|
244
|
+
const normalized = [];
|
|
245
|
+
for (const d of detections) {
|
|
246
|
+
const start = Math.max(0, Math.min(value.length, Math.trunc(d.start)));
|
|
247
|
+
const end = Math.max(start, Math.min(value.length, Math.trunc(d.end)));
|
|
248
|
+
if (end <= start) continue;
|
|
249
|
+
normalized.push({ start, end, kind: d.kind });
|
|
250
|
+
}
|
|
251
|
+
normalized.forEach((d, idx) => {
|
|
252
|
+
for (let i2 = d.start; i2 < d.end; i2 += 1) {
|
|
253
|
+
if (owner[i2] === -1) owner[i2] = idx;
|
|
254
|
+
}
|
|
255
|
+
});
|
|
256
|
+
let out = "";
|
|
257
|
+
let i = 0;
|
|
258
|
+
while (i < value.length) {
|
|
259
|
+
const idx = owner[i];
|
|
260
|
+
if (idx < 0) {
|
|
261
|
+
out += value[i];
|
|
262
|
+
i += 1;
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
const d = normalized[idx];
|
|
266
|
+
out += placeholder(d.kind);
|
|
267
|
+
i = d.end;
|
|
268
|
+
}
|
|
269
|
+
return out;
|
|
270
|
+
}
|
|
271
|
+
|
|
97
272
|
// src/redaction.ts
|
|
98
273
|
var DEFAULT_SUSPICIOUS_KEY = /pass(word)?|pwd|secret|token|api[_-]?key|auth|bearer|cookie|session|private[_-]?key|ssh|credential/i;
|
|
99
274
|
function defaultPlaceholder(kind) {
|
|
@@ -104,6 +279,7 @@ function isPlainObject(value) {
|
|
|
104
279
|
}
|
|
105
280
|
function redact(value, options) {
|
|
106
281
|
const guessByKey = options?.guessByKey ?? true;
|
|
282
|
+
const guessByValue = options?.guessByValue ?? true;
|
|
107
283
|
const placeholder = options?.placeholder ?? defaultPlaceholder;
|
|
108
284
|
const maxDepth = options?.maxDepth ?? 25;
|
|
109
285
|
const seen = /* @__PURE__ */ new WeakMap();
|
|
@@ -118,6 +294,15 @@ function redact(value, options) {
|
|
|
118
294
|
return placeholder("unknown");
|
|
119
295
|
}
|
|
120
296
|
}
|
|
297
|
+
if (guessByValue && typeof v === "string") {
|
|
298
|
+
const detections = collectStringDetections(
|
|
299
|
+
v,
|
|
300
|
+
{ keyHint, depth },
|
|
301
|
+
options
|
|
302
|
+
);
|
|
303
|
+
const masked = applyDetectionsToString(v, detections, placeholder);
|
|
304
|
+
if (masked !== v) return masked;
|
|
305
|
+
}
|
|
121
306
|
if (Array.isArray(v)) {
|
|
122
307
|
return v.map((item) => walk(item, depth + 1));
|
|
123
308
|
}
|
|
@@ -137,6 +322,16 @@ function redact(value, options) {
|
|
|
137
322
|
};
|
|
138
323
|
return walk(value, 0);
|
|
139
324
|
}
|
|
325
|
+
function detectText(value, options) {
|
|
326
|
+
return collectStringDetections(value, { depth: 0 }, options);
|
|
327
|
+
}
|
|
328
|
+
function redactText(value, options) {
|
|
329
|
+
const guessByValue = options?.guessByValue ?? true;
|
|
330
|
+
if (!guessByValue) return value;
|
|
331
|
+
const placeholder = options?.placeholder ?? defaultPlaceholder;
|
|
332
|
+
const detections = detectText(value, options);
|
|
333
|
+
return applyDetectionsToString(value, detections, placeholder);
|
|
334
|
+
}
|
|
140
335
|
function safeJsonStringify(value, options, space) {
|
|
141
336
|
return JSON.stringify(redact(value, options), null, space);
|
|
142
337
|
}
|
|
@@ -243,12 +438,14 @@ function policyLog(policy, logger, level, ...args) {
|
|
|
243
438
|
credential,
|
|
244
439
|
decide,
|
|
245
440
|
defaultPolicy,
|
|
441
|
+
detectText,
|
|
246
442
|
httpAuthorizationBearer,
|
|
247
443
|
isClassified,
|
|
248
444
|
piiText,
|
|
249
445
|
policyLog,
|
|
250
446
|
publicText,
|
|
251
447
|
redact,
|
|
448
|
+
redactText,
|
|
252
449
|
reveal,
|
|
253
450
|
safeJsonStringify,
|
|
254
451
|
safeLoggerAdapter,
|
package/dist/index.mjs
CHANGED
|
@@ -47,6 +47,179 @@ function credential(value) {
|
|
|
47
47
|
return CredentialStringSchema.parse(value);
|
|
48
48
|
}
|
|
49
49
|
|
|
50
|
+
// src/detectors/rules.ts
|
|
51
|
+
var EMAIL_RE = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi;
|
|
52
|
+
var PHONE_RE = /\b(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)\d{3}[-.\s]?\d{4}\b/g;
|
|
53
|
+
var SSN_RE = /\b\d{3}-\d{2}-\d{4}\b/g;
|
|
54
|
+
var DOB_RE = /\b(?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/g;
|
|
55
|
+
var IPV4_RE = /\b(?:\d{1,3}\.){3}\d{1,3}\b/g;
|
|
56
|
+
var CREDIT_CARD_RE = /\b(?:\d[ -]*?){13,19}\b/g;
|
|
57
|
+
var JWT_RE = /\b[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{12,}\b/g;
|
|
58
|
+
var PRIVATE_KEY_BLOCK_RE = /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g;
|
|
59
|
+
var AWS_ACCESS_KEY_RE = /\bAKIA[0-9A-Z]{16}\b/g;
|
|
60
|
+
var GITHUB_TOKEN_RE = /\bgh[pousr]_[A-Za-z0-9]{20,}\b/g;
|
|
61
|
+
var STRIPE_SECRET_RE = /\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b/g;
|
|
62
|
+
var OPENAI_KEY_RE = /\bsk-[A-Za-z0-9]{20,}\b/g;
|
|
63
|
+
var CREDENTIAL_PAIR_RE = /\b[^:\s]{1,128}:[^\s:]{1,256}\b/g;
|
|
64
|
+
var HIGH_ENTROPY_TOKEN_RE = /\b[A-Za-z0-9+/=_-]{28,}\b/g;
|
|
65
|
+
function luhnValid(input) {
|
|
66
|
+
const digits = input.replace(/[ -]/g, "");
|
|
67
|
+
if (!/^\d{13,19}$/.test(digits)) return false;
|
|
68
|
+
let sum = 0;
|
|
69
|
+
let shouldDouble = false;
|
|
70
|
+
for (let i = digits.length - 1; i >= 0; i -= 1) {
|
|
71
|
+
let d = Number(digits[i]);
|
|
72
|
+
if (shouldDouble) {
|
|
73
|
+
d *= 2;
|
|
74
|
+
if (d > 9) d -= 9;
|
|
75
|
+
}
|
|
76
|
+
sum += d;
|
|
77
|
+
shouldDouble = !shouldDouble;
|
|
78
|
+
}
|
|
79
|
+
return sum % 10 === 0;
|
|
80
|
+
}
|
|
81
|
+
function placeholderRanges(text) {
|
|
82
|
+
const ranges = [];
|
|
83
|
+
const re = /\[REDACTED:[^\]]+\]/g;
|
|
84
|
+
let m;
|
|
85
|
+
while ((m = re.exec(text)) !== null) {
|
|
86
|
+
ranges.push({ start: m.index, end: m.index + m[0].length });
|
|
87
|
+
}
|
|
88
|
+
return ranges;
|
|
89
|
+
}
|
|
90
|
+
function overlapsRanges(start, end, ranges) {
|
|
91
|
+
return ranges.some((r) => start < r.end && end > r.start);
|
|
92
|
+
}
|
|
93
|
+
function detectRegexRanges(value, re, kind, source, guard) {
|
|
94
|
+
const out = [];
|
|
95
|
+
const execRe = new RegExp(re.source, re.flags);
|
|
96
|
+
const protectedRanges = placeholderRanges(value);
|
|
97
|
+
let m;
|
|
98
|
+
while ((m = execRe.exec(value)) !== null) {
|
|
99
|
+
const match = m[0];
|
|
100
|
+
const start = m.index;
|
|
101
|
+
const end = start + match.length;
|
|
102
|
+
if (!match || guard && !guard(match) || overlapsRanges(start, end, protectedRanges)) {
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
out.push({ start, end, kind, source, confidence: 1 });
|
|
106
|
+
if (execRe.lastIndex === m.index) execRe.lastIndex += 1;
|
|
107
|
+
}
|
|
108
|
+
return out;
|
|
109
|
+
}
|
|
110
|
+
var defaultRuleStringDetector = (value) => {
|
|
111
|
+
const out = [];
|
|
112
|
+
out.push(
|
|
113
|
+
...detectRegexRanges(
|
|
114
|
+
value,
|
|
115
|
+
PRIVATE_KEY_BLOCK_RE,
|
|
116
|
+
"secret",
|
|
117
|
+
"rule.private-key"
|
|
118
|
+
),
|
|
119
|
+
...detectRegexRanges(value, JWT_RE, "token", "rule.jwt"),
|
|
120
|
+
...detectRegexRanges(
|
|
121
|
+
value,
|
|
122
|
+
AWS_ACCESS_KEY_RE,
|
|
123
|
+
"credential",
|
|
124
|
+
"rule.aws-access-key"
|
|
125
|
+
),
|
|
126
|
+
...detectRegexRanges(value, GITHUB_TOKEN_RE, "token", "rule.github-token"),
|
|
127
|
+
...detectRegexRanges(
|
|
128
|
+
value,
|
|
129
|
+
STRIPE_SECRET_RE,
|
|
130
|
+
"secret",
|
|
131
|
+
"rule.stripe-secret"
|
|
132
|
+
),
|
|
133
|
+
...detectRegexRanges(value, OPENAI_KEY_RE, "secret", "rule.openai-key"),
|
|
134
|
+
...detectRegexRanges(
|
|
135
|
+
value,
|
|
136
|
+
CREDENTIAL_PAIR_RE,
|
|
137
|
+
"credential",
|
|
138
|
+
"rule.credential-pair"
|
|
139
|
+
)
|
|
140
|
+
);
|
|
141
|
+
out.push(
|
|
142
|
+
...detectRegexRanges(value, EMAIL_RE, "pii", "rule.email"),
|
|
143
|
+
...detectRegexRanges(value, PHONE_RE, "pii", "rule.phone"),
|
|
144
|
+
...detectRegexRanges(value, SSN_RE, "pii", "rule.ssn"),
|
|
145
|
+
...detectRegexRanges(value, DOB_RE, "pii", "rule.dob"),
|
|
146
|
+
...detectRegexRanges(
|
|
147
|
+
value,
|
|
148
|
+
IPV4_RE,
|
|
149
|
+
"pii",
|
|
150
|
+
"rule.ipv4",
|
|
151
|
+
(m) => m.split(".").every((p) => Number(p) >= 0 && Number(p) <= 255)
|
|
152
|
+
),
|
|
153
|
+
...detectRegexRanges(
|
|
154
|
+
value,
|
|
155
|
+
CREDIT_CARD_RE,
|
|
156
|
+
"pii",
|
|
157
|
+
"rule.credit-card",
|
|
158
|
+
luhnValid
|
|
159
|
+
)
|
|
160
|
+
);
|
|
161
|
+
out.push(
|
|
162
|
+
...detectRegexRanges(
|
|
163
|
+
value,
|
|
164
|
+
HIGH_ENTROPY_TOKEN_RE,
|
|
165
|
+
"token",
|
|
166
|
+
"rule.high-entropy",
|
|
167
|
+
(m) => {
|
|
168
|
+
if (/^[a-z]+$/i.test(m)) return false;
|
|
169
|
+
return /\d/.test(m);
|
|
170
|
+
}
|
|
171
|
+
)
|
|
172
|
+
);
|
|
173
|
+
return out;
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
// src/detectors/engine.ts
|
|
177
|
+
function collectStringDetections(value, context, options) {
|
|
178
|
+
const useDefaultValueDetector = options?.useDefaultValueDetector ?? true;
|
|
179
|
+
const customDetectors = options?.stringDetectors ?? [];
|
|
180
|
+
const minDetectionConfidence = options?.minDetectionConfidence ?? 0;
|
|
181
|
+
const detections = [];
|
|
182
|
+
if (useDefaultValueDetector) {
|
|
183
|
+
detections.push(...defaultRuleStringDetector(value, context));
|
|
184
|
+
}
|
|
185
|
+
for (const detector of customDetectors) {
|
|
186
|
+
detections.push(...detector(value, context));
|
|
187
|
+
}
|
|
188
|
+
return detections.filter(
|
|
189
|
+
(d) => (d.confidence ?? 1) >= minDetectionConfidence && Number.isFinite(d.start) && Number.isFinite(d.end)
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
function applyDetectionsToString(value, detections, placeholder) {
|
|
193
|
+
if (detections.length === 0) return value;
|
|
194
|
+
const owner = new Array(value.length).fill(-1);
|
|
195
|
+
const normalized = [];
|
|
196
|
+
for (const d of detections) {
|
|
197
|
+
const start = Math.max(0, Math.min(value.length, Math.trunc(d.start)));
|
|
198
|
+
const end = Math.max(start, Math.min(value.length, Math.trunc(d.end)));
|
|
199
|
+
if (end <= start) continue;
|
|
200
|
+
normalized.push({ start, end, kind: d.kind });
|
|
201
|
+
}
|
|
202
|
+
normalized.forEach((d, idx) => {
|
|
203
|
+
for (let i2 = d.start; i2 < d.end; i2 += 1) {
|
|
204
|
+
if (owner[i2] === -1) owner[i2] = idx;
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
let out = "";
|
|
208
|
+
let i = 0;
|
|
209
|
+
while (i < value.length) {
|
|
210
|
+
const idx = owner[i];
|
|
211
|
+
if (idx < 0) {
|
|
212
|
+
out += value[i];
|
|
213
|
+
i += 1;
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
const d = normalized[idx];
|
|
217
|
+
out += placeholder(d.kind);
|
|
218
|
+
i = d.end;
|
|
219
|
+
}
|
|
220
|
+
return out;
|
|
221
|
+
}
|
|
222
|
+
|
|
50
223
|
// src/redaction.ts
|
|
51
224
|
var DEFAULT_SUSPICIOUS_KEY = /pass(word)?|pwd|secret|token|api[_-]?key|auth|bearer|cookie|session|private[_-]?key|ssh|credential/i;
|
|
52
225
|
function defaultPlaceholder(kind) {
|
|
@@ -57,6 +230,7 @@ function isPlainObject(value) {
|
|
|
57
230
|
}
|
|
58
231
|
function redact(value, options) {
|
|
59
232
|
const guessByKey = options?.guessByKey ?? true;
|
|
233
|
+
const guessByValue = options?.guessByValue ?? true;
|
|
60
234
|
const placeholder = options?.placeholder ?? defaultPlaceholder;
|
|
61
235
|
const maxDepth = options?.maxDepth ?? 25;
|
|
62
236
|
const seen = /* @__PURE__ */ new WeakMap();
|
|
@@ -71,6 +245,15 @@ function redact(value, options) {
|
|
|
71
245
|
return placeholder("unknown");
|
|
72
246
|
}
|
|
73
247
|
}
|
|
248
|
+
if (guessByValue && typeof v === "string") {
|
|
249
|
+
const detections = collectStringDetections(
|
|
250
|
+
v,
|
|
251
|
+
{ keyHint, depth },
|
|
252
|
+
options
|
|
253
|
+
);
|
|
254
|
+
const masked = applyDetectionsToString(v, detections, placeholder);
|
|
255
|
+
if (masked !== v) return masked;
|
|
256
|
+
}
|
|
74
257
|
if (Array.isArray(v)) {
|
|
75
258
|
return v.map((item) => walk(item, depth + 1));
|
|
76
259
|
}
|
|
@@ -90,6 +273,16 @@ function redact(value, options) {
|
|
|
90
273
|
};
|
|
91
274
|
return walk(value, 0);
|
|
92
275
|
}
|
|
276
|
+
function detectText(value, options) {
|
|
277
|
+
return collectStringDetections(value, { depth: 0 }, options);
|
|
278
|
+
}
|
|
279
|
+
function redactText(value, options) {
|
|
280
|
+
const guessByValue = options?.guessByValue ?? true;
|
|
281
|
+
if (!guessByValue) return value;
|
|
282
|
+
const placeholder = options?.placeholder ?? defaultPlaceholder;
|
|
283
|
+
const detections = detectText(value, options);
|
|
284
|
+
return applyDetectionsToString(value, detections, placeholder);
|
|
285
|
+
}
|
|
93
286
|
function safeJsonStringify(value, options, space) {
|
|
94
287
|
return JSON.stringify(redact(value, options), null, space);
|
|
95
288
|
}
|
|
@@ -195,12 +388,14 @@ export {
|
|
|
195
388
|
credential,
|
|
196
389
|
decide,
|
|
197
390
|
defaultPolicy,
|
|
391
|
+
detectText,
|
|
198
392
|
httpAuthorizationBearer,
|
|
199
393
|
isClassified,
|
|
200
394
|
piiText,
|
|
201
395
|
policyLog,
|
|
202
396
|
publicText,
|
|
203
397
|
redact,
|
|
398
|
+
redactText,
|
|
204
399
|
reveal,
|
|
205
400
|
safeJsonStringify,
|
|
206
401
|
safeLoggerAdapter,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "typesecure",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.3",
|
|
4
4
|
"description": "Type-safe data classification and security enforcement for TypeScript - prevent secrets and PII leaks with compile-time and runtime guarantees",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
@@ -13,7 +13,14 @@
|
|
|
13
13
|
"lint": "eslint src tests --ext .ts",
|
|
14
14
|
"lint:fix": "eslint src tests --ext .ts --fix",
|
|
15
15
|
"dev": "tsup src/index.ts --format cjs,esm --dts --watch",
|
|
16
|
-
"
|
|
16
|
+
"data:setup": "bash scripts/setup-datasets.sh",
|
|
17
|
+
"test": "jest --config jest.config.js --testPathIgnorePatterns tests/datasets",
|
|
18
|
+
"test:data": "pnpm run test:data:enron",
|
|
19
|
+
"test:data:enron": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets/enron.dataset.test.ts",
|
|
20
|
+
"test:data:enron:full": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets/enron.fullscan.test.ts",
|
|
21
|
+
"test:data:synthea": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets/synthea.dataset.test.ts",
|
|
22
|
+
"test:data:all": "jest --config jest.datasets.config.js --runInBand --verbose tests/datasets",
|
|
23
|
+
"test:datasets": "pnpm run test:data:all",
|
|
17
24
|
"test:watch": "jest --config jest.config.js --watch",
|
|
18
25
|
"test:coverage": "jest --config jest.config.js --coverage",
|
|
19
26
|
"format": "prettier --write \"src/**/*.ts\" \"tests/**/*.ts\"",
|
|
@@ -65,6 +72,6 @@
|
|
|
65
72
|
"zod": "^3.25.76"
|
|
66
73
|
},
|
|
67
74
|
"engines": {
|
|
68
|
-
"node": ">=
|
|
75
|
+
"node": ">=18.18.0"
|
|
69
76
|
}
|
|
70
77
|
}
|