gavio 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/README.md +95 -0
  2. package/dist/cjs/context.js +47 -0
  3. package/dist/cjs/errors.js +57 -0
  4. package/dist/cjs/gateway.js +127 -0
  5. package/dist/cjs/ids.js +60 -0
  6. package/dist/cjs/index.js +49 -0
  7. package/dist/cjs/interceptors/audit/index.js +12 -0
  8. package/dist/cjs/interceptors/audit/interceptor.js +77 -0
  9. package/dist/cjs/interceptors/audit/record.js +107 -0
  10. package/dist/cjs/interceptors/audit/sink.js +3 -0
  11. package/dist/cjs/interceptors/audit/sinks/index.js +5 -0
  12. package/dist/cjs/interceptors/audit/sinks/stdout.js +33 -0
  13. package/dist/cjs/interceptors/base.js +7 -0
  14. package/dist/cjs/interceptors/cache/backend.js +9 -0
  15. package/dist/cjs/interceptors/cache/backends/index.js +5 -0
  16. package/dist/cjs/interceptors/cache/backends/memory.js +53 -0
  17. package/dist/cjs/interceptors/cache/index.js +9 -0
  18. package/dist/cjs/interceptors/chain.js +57 -0
  19. package/dist/cjs/interceptors/index.js +18 -0
  20. package/dist/cjs/interceptors/pii/context.js +25 -0
  21. package/dist/cjs/interceptors/pii/guard.js +161 -0
  22. package/dist/cjs/interceptors/pii/index.js +28 -0
  23. package/dist/cjs/interceptors/pii/match.js +21 -0
  24. package/dist/cjs/interceptors/pii/scanner.js +31 -0
  25. package/dist/cjs/interceptors/pii/scanners/bsn.js +41 -0
  26. package/dist/cjs/interceptors/pii/scanners/credit-card.js +51 -0
  27. package/dist/cjs/interceptors/pii/scanners/email.js +26 -0
  28. package/dist/cjs/interceptors/pii/scanners/iban.js +58 -0
  29. package/dist/cjs/interceptors/pii/scanners/index.js +45 -0
  30. package/dist/cjs/interceptors/pii/scanners/ip-address.js +36 -0
  31. package/dist/cjs/interceptors/pii/scanners/phone.js +37 -0
  32. package/dist/cjs/interceptors/pii/scanners/secret.js +46 -0
  33. package/dist/cjs/interceptors/pii/scanners/ssn.js +28 -0
  34. package/dist/cjs/interceptors/reliability/fallback.js +53 -0
  35. package/dist/cjs/interceptors/reliability/index.js +11 -0
  36. package/dist/cjs/interceptors/reliability/retry.js +69 -0
  37. package/dist/cjs/interceptors/reliability/timeout.js +41 -0
  38. package/dist/cjs/package.json +3 -0
  39. package/dist/cjs/pricing.js +70 -0
  40. package/dist/cjs/providers/anthropic.js +80 -0
  41. package/dist/cjs/providers/base.js +30 -0
  42. package/dist/cjs/providers/http.js +42 -0
  43. package/dist/cjs/providers/index.js +34 -0
  44. package/dist/cjs/providers/mock.js +54 -0
  45. package/dist/cjs/providers/openai.js +63 -0
  46. package/dist/cjs/request.js +60 -0
  47. package/dist/cjs/response.js +55 -0
  48. package/dist/cjs/testing/harness.js +70 -0
  49. package/dist/cjs/testing/index.js +8 -0
  50. package/dist/cjs/types.js +61 -0
  51. package/dist/esm/context.d.ts +33 -0
  52. package/dist/esm/context.js +43 -0
  53. package/dist/esm/errors.d.ts +36 -0
  54. package/dist/esm/errors.js +44 -0
  55. package/dist/esm/gateway.d.ts +54 -0
  56. package/dist/esm/gateway.js +123 -0
  57. package/dist/esm/ids.d.ts +11 -0
  58. package/dist/esm/ids.js +56 -0
  59. package/dist/esm/index.d.ts +25 -0
  60. package/dist/esm/index.js +20 -0
  61. package/dist/esm/interceptors/audit/index.d.ts +7 -0
  62. package/dist/esm/interceptors/audit/index.js +3 -0
  63. package/dist/esm/interceptors/audit/interceptor.d.ts +11 -0
  64. package/dist/esm/interceptors/audit/interceptor.js +72 -0
  65. package/dist/esm/interceptors/audit/record.d.ts +66 -0
  66. package/dist/esm/interceptors/audit/record.js +103 -0
  67. package/dist/esm/interceptors/audit/sink.d.ts +8 -0
  68. package/dist/esm/interceptors/audit/sink.js +2 -0
  69. package/dist/esm/interceptors/audit/sinks/index.d.ts +2 -0
  70. package/dist/esm/interceptors/audit/sinks/index.js +1 -0
  71. package/dist/esm/interceptors/audit/sinks/stdout.d.ts +8 -0
  72. package/dist/esm/interceptors/audit/sinks/stdout.js +30 -0
  73. package/dist/esm/interceptors/base.d.ts +37 -0
  74. package/dist/esm/interceptors/base.js +4 -0
  75. package/dist/esm/interceptors/cache/backend.d.ts +14 -0
  76. package/dist/esm/interceptors/cache/backend.js +8 -0
  77. package/dist/esm/interceptors/cache/backends/index.d.ts +2 -0
  78. package/dist/esm/interceptors/cache/backends/index.js +1 -0
  79. package/dist/esm/interceptors/cache/backends/memory.d.ts +7 -0
  80. package/dist/esm/interceptors/cache/backends/memory.js +50 -0
  81. package/dist/esm/interceptors/cache/index.d.ts +7 -0
  82. package/dist/esm/interceptors/cache/index.js +5 -0
  83. package/dist/esm/interceptors/chain.d.ts +17 -0
  84. package/dist/esm/interceptors/chain.js +53 -0
  85. package/dist/esm/interceptors/index.d.ts +8 -0
  86. package/dist/esm/interceptors/index.js +7 -0
  87. package/dist/esm/interceptors/pii/context.d.ts +15 -0
  88. package/dist/esm/interceptors/pii/context.js +21 -0
  89. package/dist/esm/interceptors/pii/guard.d.ts +30 -0
  90. package/dist/esm/interceptors/pii/guard.js +157 -0
  91. package/dist/esm/interceptors/pii/index.d.ts +10 -0
  92. package/dist/esm/interceptors/pii/index.js +7 -0
  93. package/dist/esm/interceptors/pii/match.d.ts +26 -0
  94. package/dist/esm/interceptors/pii/match.js +17 -0
  95. package/dist/esm/interceptors/pii/scanner.d.ts +32 -0
  96. package/dist/esm/interceptors/pii/scanner.js +26 -0
  97. package/dist/esm/interceptors/pii/scanners/bsn.d.ts +5 -0
  98. package/dist/esm/interceptors/pii/scanners/bsn.js +37 -0
  99. package/dist/esm/interceptors/pii/scanners/credit-card.d.ts +4 -0
  100. package/dist/esm/interceptors/pii/scanners/credit-card.js +47 -0
  101. package/dist/esm/interceptors/pii/scanners/email.d.ts +3 -0
  102. package/dist/esm/interceptors/pii/scanners/email.js +23 -0
  103. package/dist/esm/interceptors/pii/scanners/iban.d.ts +5 -0
  104. package/dist/esm/interceptors/pii/scanners/iban.js +54 -0
  105. package/dist/esm/interceptors/pii/scanners/index.d.ts +13 -0
  106. package/dist/esm/interceptors/pii/scanners/index.js +30 -0
  107. package/dist/esm/interceptors/pii/scanners/ip-address.d.ts +3 -0
  108. package/dist/esm/interceptors/pii/scanners/ip-address.js +33 -0
  109. package/dist/esm/interceptors/pii/scanners/phone.d.ts +6 -0
  110. package/dist/esm/interceptors/pii/scanners/phone.js +34 -0
  111. package/dist/esm/interceptors/pii/scanners/secret.d.ts +9 -0
  112. package/dist/esm/interceptors/pii/scanners/secret.js +43 -0
  113. package/dist/esm/interceptors/pii/scanners/ssn.d.ts +3 -0
  114. package/dist/esm/interceptors/pii/scanners/ssn.js +25 -0
  115. package/dist/esm/interceptors/reliability/fallback.d.ts +9 -0
  116. package/dist/esm/interceptors/reliability/fallback.js +50 -0
  117. package/dist/esm/interceptors/reliability/index.d.ts +7 -0
  118. package/dist/esm/interceptors/reliability/index.js +4 -0
  119. package/dist/esm/interceptors/reliability/retry.d.ts +13 -0
  120. package/dist/esm/interceptors/reliability/retry.js +66 -0
  121. package/dist/esm/interceptors/reliability/timeout.d.ts +9 -0
  122. package/dist/esm/interceptors/reliability/timeout.js +37 -0
  123. package/dist/esm/package.json +3 -0
  124. package/dist/esm/pricing.d.ts +19 -0
  125. package/dist/esm/pricing.js +65 -0
  126. package/dist/esm/providers/anthropic.d.ts +30 -0
  127. package/dist/esm/providers/anthropic.js +77 -0
  128. package/dist/esm/providers/base.d.ts +23 -0
  129. package/dist/esm/providers/base.js +28 -0
  130. package/dist/esm/providers/http.d.ts +8 -0
  131. package/dist/esm/providers/http.js +39 -0
  132. package/dist/esm/providers/index.d.ts +15 -0
  133. package/dist/esm/providers/index.js +25 -0
  134. package/dist/esm/providers/mock.d.ts +31 -0
  135. package/dist/esm/providers/mock.js +51 -0
  136. package/dist/esm/providers/openai.d.ts +26 -0
  137. package/dist/esm/providers/openai.js +60 -0
  138. package/dist/esm/request.d.ts +36 -0
  139. package/dist/esm/request.js +56 -0
  140. package/dist/esm/response.d.ts +38 -0
  141. package/dist/esm/response.js +51 -0
  142. package/dist/esm/testing/harness.d.ts +37 -0
  143. package/dist/esm/testing/harness.js +66 -0
  144. package/dist/esm/testing/index.d.ts +5 -0
  145. package/dist/esm/testing/index.js +3 -0
  146. package/dist/esm/types.d.ts +58 -0
  147. package/dist/esm/types.js +56 -0
  148. package/package.json +115 -0
  149. package/src/context.ts +57 -0
  150. package/src/errors.ts +47 -0
  151. package/src/gateway.ts +174 -0
  152. package/src/ids.ts +69 -0
  153. package/src/index.ts +52 -0
  154. package/src/interceptors/audit/index.ts +7 -0
  155. package/src/interceptors/audit/interceptor.ts +93 -0
  156. package/src/interceptors/audit/record.ts +138 -0
  157. package/src/interceptors/audit/sink.ts +10 -0
  158. package/src/interceptors/audit/sinks/index.ts +2 -0
  159. package/src/interceptors/audit/sinks/stdout.ts +42 -0
  160. package/src/interceptors/base.ts +58 -0
  161. package/src/interceptors/cache/backend.ts +15 -0
  162. package/src/interceptors/cache/backends/index.ts +2 -0
  163. package/src/interceptors/cache/backends/memory.ts +68 -0
  164. package/src/interceptors/cache/index.ts +8 -0
  165. package/src/interceptors/chain.ts +65 -0
  166. package/src/interceptors/index.ts +9 -0
  167. package/src/interceptors/pii/context.ts +24 -0
  168. package/src/interceptors/pii/guard.ts +201 -0
  169. package/src/interceptors/pii/index.ts +21 -0
  170. package/src/interceptors/pii/match.ts +43 -0
  171. package/src/interceptors/pii/scanner.ts +54 -0
  172. package/src/interceptors/pii/scanners/bsn.ts +44 -0
  173. package/src/interceptors/pii/scanners/credit-card.ts +52 -0
  174. package/src/interceptors/pii/scanners/email.ts +31 -0
  175. package/src/interceptors/pii/scanners/iban.ts +60 -0
  176. package/src/interceptors/pii/scanners/index.ts +35 -0
  177. package/src/interceptors/pii/scanners/ip-address.ts +41 -0
  178. package/src/interceptors/pii/scanners/phone.ts +46 -0
  179. package/src/interceptors/pii/scanners/secret.ts +51 -0
  180. package/src/interceptors/pii/scanners/ssn.ts +33 -0
  181. package/src/interceptors/reliability/fallback.ts +66 -0
  182. package/src/interceptors/reliability/index.ts +8 -0
  183. package/src/interceptors/reliability/retry.ts +97 -0
  184. package/src/interceptors/reliability/timeout.ts +53 -0
  185. package/src/pricing.ts +72 -0
  186. package/src/providers/anthropic.ts +113 -0
  187. package/src/providers/base.ts +52 -0
  188. package/src/providers/http.ts +50 -0
  189. package/src/providers/index.ts +39 -0
  190. package/src/providers/mock.ts +73 -0
  191. package/src/providers/openai.ts +94 -0
  192. package/src/request.ts +76 -0
  193. package/src/response.ts +73 -0
  194. package/src/testing/harness.ts +98 -0
  195. package/src/testing/index.ts +6 -0
  196. package/src/types.ts +83 -0
@@ -0,0 +1,201 @@
1
+ /**
2
+ * PiiGuard — the pre/post interceptor that detects and redacts PII.
3
+ *
4
+ * Pipeline rule (privacy): PII is scanned on every request before it reaches
5
+ * the provider. Detected entities are redacted/masked/tagged or blocked. In
6
+ * REDACT mode the original values are restored in the response.
7
+ */
8
+
9
+ import type { InterceptorContext } from '../../context.js'
10
+ import { PiiBlockedError } from '../../errors.js'
11
+ import type { GavioRequest } from '../../request.js'
12
+ import type { GavioResponse } from '../../response.js'
13
+ import { PiiMode, Sensitivity } from '../../types.js'
14
+ import type { Message } from '../../types.js'
15
+ import type { Interceptor } from '../base.js'
16
+ import { ScanContext } from './context.js'
17
+ import { matchLength } from './match.js'
18
+ import type { PiiMatch } from './match.js'
19
+ import { scannerTier } from './scanner.js'
20
+ import type { PiiScanner } from './scanner.js'
21
+ import { defaultScanners } from './scanners/index.js'
22
+
23
+ const STATE_KEY = 'pii_replacements'
24
+
25
+ // Confidence floor per sensitivity level — matches below the floor are ignored.
26
+ const CONFIDENCE_FLOOR: Record<Sensitivity, number> = {
27
+ [Sensitivity.STRICT]: 0.0,
28
+ [Sensitivity.BALANCED]: 0.6,
29
+ [Sensitivity.PERMISSIVE]: 0.9,
30
+ }
31
+
32
+ export interface PiiGuardOptions {
33
+ scanners?: PiiScanner[]
34
+ sensitivity?: Sensitivity
35
+ mode?: PiiMode
36
+ restoreOnResponse?: boolean
37
+ logEntityTypes?: boolean
38
+ dryRun?: boolean
39
+ locale?: string
40
+ language?: string
41
+ }
42
+
43
+ class PiiGuard implements Interceptor {
44
+ readonly name = 'pii_guard'
45
+ readonly dryRunSafe = true
46
+
47
+ private readonly scanners: PiiScanner[]
48
+ private readonly sensitivity: Sensitivity
49
+ private readonly mode: PiiMode
50
+ private readonly restoreOnResponse: boolean
51
+ private readonly logEntityTypes: boolean
52
+ private readonly ownDryRun: boolean
53
+ private readonly locale: string
54
+ private readonly language: string
55
+
56
+ constructor(options: PiiGuardOptions = {}) {
57
+ this.scanners = options.scanners ?? defaultScanners()
58
+ this.sensitivity = options.sensitivity ?? Sensitivity.STRICT
59
+ this.mode = options.mode ?? PiiMode.REDACT
60
+ this.restoreOnResponse = options.restoreOnResponse ?? true
61
+ this.logEntityTypes = options.logEntityTypes ?? true
62
+ this.ownDryRun = options.dryRun ?? false
63
+ this.locale = options.locale ?? 'NL'
64
+ this.language = options.language ?? 'en'
65
+ }
66
+
67
+ async before(request: GavioRequest, ctx: InterceptorContext): Promise<GavioRequest> {
68
+ const scanCtx = new ScanContext(this.language, this.locale)
69
+ const floor = CONFIDENCE_FLOOR[this.sensitivity]
70
+
71
+ const newMessages: Message[] = []
72
+ const allTypes: string[] = []
73
+ const replacements: Record<string, string> =
74
+ (ctx.state[STATE_KEY] as Record<string, string> | undefined) ?? {}
75
+
76
+ const isDryRun = this.ownDryRun || ctx.dryRun
77
+
78
+ for (const message of request.messages) {
79
+ const content = message.content ?? ''
80
+ const matches = await this.scanText(content, scanCtx, floor)
81
+ for (const m of matches) allTypes.push(m.entityType)
82
+
83
+ if (matches.length > 0 && this.mode === PiiMode.BLOCK) {
84
+ const types = matches.map((m) => m.entityType)
85
+ throw new PiiBlockedError(types)
86
+ }
87
+
88
+ let redacted = content
89
+ if (matches.length > 0 && !isDryRun) {
90
+ redacted = this.apply(content, matches, replacements)
91
+ }
92
+
93
+ newMessages.push({ ...message, content: redacted })
94
+ }
95
+
96
+ if (allTypes.length > 0) {
97
+ ctx.recordPii(allTypes)
98
+ if (this.logEntityTypes) {
99
+ const unique = Array.from(new Set(allTypes)).sort()
100
+ // eslint-disable-next-line no-console
101
+ console.info(`[gavio:pii] detected entity types: ${unique.join(', ')}`)
102
+ }
103
+ }
104
+
105
+ if (this.restoreOnResponse && Object.keys(replacements).length > 0) {
106
+ ctx.state[STATE_KEY] = replacements
107
+ }
108
+
109
+ if (isDryRun) return request
110
+ return request.copyWithMessages(newMessages)
111
+ }
112
+
113
+ async after(
114
+ response: GavioResponse,
115
+ ctx: InterceptorContext,
116
+ ): Promise<GavioResponse> {
117
+ if (!this.restoreOnResponse || this.mode !== PiiMode.REDACT) return response
118
+ const replacements = ctx.state[STATE_KEY] as Record<string, string> | undefined
119
+ if (!replacements || Object.keys(replacements).length === 0) return response
120
+ let content = response.content
121
+ for (const [token, original] of Object.entries(replacements)) {
122
+ content = content.split(token).join(original)
123
+ }
124
+ if (content === response.content) return response
125
+ return response.copyWithContent(content)
126
+ }
127
+
128
+ private async scanText(
129
+ text: string,
130
+ scanCtx: ScanContext,
131
+ floor: number,
132
+ ): Promise<PiiMatch[]> {
133
+ const raw: PiiMatch[] = []
134
+ const ordered = [...this.scanners].sort((a, b) => scannerTier(a) - scannerTier(b))
135
+ for (const scanner of ordered) {
136
+ const found = await scanner.scan(text, scanCtx)
137
+ for (const match of found) {
138
+ if (match.confidence >= floor) raw.push(match)
139
+ }
140
+ }
141
+ return resolveOverlaps(raw)
142
+ }
143
+
144
+ private apply(
145
+ text: string,
146
+ matches: PiiMatch[],
147
+ replacements: Record<string, string>,
148
+ ): string {
149
+ // Replace right-to-left so earlier offsets stay valid.
150
+ const ordered = [...matches].sort((a, b) => b.start - a.start)
151
+ let out = text
152
+ for (const match of ordered) {
153
+ const token = this.tokenFor(match)
154
+ if (this.mode === PiiMode.REDACT) {
155
+ replacements[token] = match.value
156
+ }
157
+ out = out.slice(0, match.start) + token + out.slice(match.end)
158
+ }
159
+ return out
160
+ }
161
+
162
+ private tokenFor(match: PiiMatch): string {
163
+ if (this.mode === PiiMode.MASK) {
164
+ return '*'.repeat(Math.max(matchLength(match), 1))
165
+ }
166
+ if (this.mode === PiiMode.TAG) {
167
+ return `<${match.entityType}>${match.value}</${match.entityType}>`
168
+ }
169
+ // REDACT (default)
170
+ return match.replacement || `[${match.entityType}]`
171
+ }
172
+ }
173
+
174
+ /**
175
+ * Drop lower-priority matches that overlap a kept one.
176
+ *
177
+ * Sort by start, then by descending span length (prefer the longer match),
178
+ * then by confidence. Greedily keep non-overlapping matches.
179
+ */
180
+ export function resolveOverlaps(matches: PiiMatch[]): PiiMatch[] {
181
+ const ordered = [...matches].sort((a, b) => {
182
+ if (a.start !== b.start) return a.start - b.start
183
+ const lenDiff = matchLength(b) - matchLength(a)
184
+ if (lenDiff !== 0) return lenDiff
185
+ return b.confidence - a.confidence
186
+ })
187
+ const kept: PiiMatch[] = []
188
+ let occupiedEnd = -1
189
+ for (const match of ordered) {
190
+ if (match.start >= occupiedEnd) {
191
+ kept.push(match)
192
+ occupiedEnd = match.end
193
+ }
194
+ }
195
+ return kept
196
+ }
197
+
198
+ /** Factory: build a PiiGuard interceptor. */
199
+ export function piiGuard(options: PiiGuardOptions = {}): Interceptor {
200
+ return new PiiGuard(options)
201
+ }
@@ -0,0 +1,21 @@
1
+ /** PII guard public surface. */
2
+
3
+ export { piiGuard, resolveOverlaps } from './guard.js'
4
+ export type { PiiGuardOptions } from './guard.js'
5
+ export { ScanContext } from './context.js'
6
+ export { ScannerRegistry, scannerTier } from './scanner.js'
7
+ export type { PiiScanner } from './scanner.js'
8
+ export { makeMatch, matchLength } from './match.js'
9
+ export type { PiiMatch } from './match.js'
10
+ export { PiiMode, Sensitivity } from '../../types.js'
11
+ export {
12
+ bsnScanner,
13
+ creditCardScanner,
14
+ emailScanner,
15
+ ibanScanner,
16
+ ipAddressScanner,
17
+ phoneScanner,
18
+ secretScanner,
19
+ ssnScanner,
20
+ defaultScanners,
21
+ } from './scanners/index.js'
@@ -0,0 +1,43 @@
1
+ /** PiiMatch — a single detected PII entity within a span of text. */
2
+
3
+ /**
4
+ * One detected entity.
5
+ *
6
+ * `start`/`end` are half-open character offsets into the scanned text.
7
+ * `replacement` is the placeholder used in REDACT mode; `value` is the
8
+ * original text (never logged — used only for restore).
9
+ */
10
+ export interface PiiMatch {
11
+ entityType: string
12
+ start: number
13
+ end: number
14
+ value: string
15
+ confidence: number
16
+ /** e.g. '[EMAIL_1]'. */
17
+ replacement: string
18
+ }
19
+
20
+ export function matchLength(m: PiiMatch): number {
21
+ return m.end - m.start
22
+ }
23
+
24
+ export function makeMatch(init: {
25
+ entityType: string
26
+ start: number
27
+ end: number
28
+ value: string
29
+ confidence?: number
30
+ replacement: string
31
+ }): PiiMatch {
32
+ if (init.start < 0 || init.end < init.start) {
33
+ throw new Error(`Invalid PiiMatch span: start=${init.start}, end=${init.end}`)
34
+ }
35
+ return {
36
+ entityType: init.entityType,
37
+ start: init.start,
38
+ end: init.end,
39
+ value: init.value,
40
+ confidence: init.confidence ?? 1.0,
41
+ replacement: init.replacement,
42
+ }
43
+ }
@@ -0,0 +1,54 @@
1
+ /** PiiScanner interface and ScannerRegistry. */
2
+
3
+ import type { ScanContext } from './context.js'
4
+ import type { PiiMatch } from './match.js'
5
+
6
+ /**
7
+ * Detects one class of PII entity within text.
8
+ *
9
+ * Scanners are tiered: tier 1 = regex, tier 2 = NER/ML, tier 3 = LLM. Lower
10
+ * tiers run first so cheap deterministic matches are found before expensive
11
+ * ones. v0.1.0 ships only tier-1 regex scanners.
12
+ */
13
+ export interface PiiScanner {
14
+ /** e.g. 'EMAIL', 'IBAN', 'BSN'. */
15
+ readonly entityType: string
16
+ /** default: 1 */
17
+ readonly tier?: 1 | 2 | 3
18
+ scan(text: string, ctx: ScanContext): PiiMatch[] | Promise<PiiMatch[]>
19
+ /** default: 1.0 */
20
+ readonly confidence?: number
21
+ supportsLanguage?(lang: string): boolean
22
+ supportsLocale?(locale: string): boolean
23
+ }
24
+
25
+ export function scannerTier(s: PiiScanner): number {
26
+ return s.tier ?? 1
27
+ }
28
+
29
+ /** Registry of scanners, discoverable by entity type at runtime. */
30
+ export class ScannerRegistry {
31
+ private scanners: PiiScanner[] = []
32
+
33
+ constructor(scanners?: PiiScanner[]) {
34
+ for (const s of scanners ?? []) this.register(s)
35
+ }
36
+
37
+ register(scanner: PiiScanner): this {
38
+ this.scanners.push(scanner)
39
+ return this
40
+ }
41
+
42
+ /** Return scanners sorted by tier (lowest first). */
43
+ all(): PiiScanner[] {
44
+ return [...this.scanners].sort((a, b) => scannerTier(a) - scannerTier(b))
45
+ }
46
+
47
+ byEntityType(entityType: string): PiiScanner[] {
48
+ return this.scanners.filter((s) => s.entityType === entityType)
49
+ }
50
+
51
+ get size(): number {
52
+ return this.scanners.length
53
+ }
54
+ }
@@ -0,0 +1,44 @@
1
+ /** Dutch BSN scanner — regex + 11-proef (eleven-test) checksum. */
2
+
3
+ import type { ScanContext } from '../context.js'
4
+ import { makeMatch } from '../match.js'
5
+ import type { PiiMatch } from '../match.js'
6
+ import type { PiiScanner } from '../scanner.js'
7
+
8
+ // BSN is 8 or 9 digits; we validate the 9-digit form with the 11-proef.
9
+ const BSN = /\b\d{9}\b/g
10
+
11
+ /** 11-proef: sum of digit*weight (9,8,...,2,-1) must be divisible by 11. */
12
+ export function validBsn(digits: string): boolean {
13
+ if (digits.length !== 9) return false
14
+ const weights = [9, 8, 7, 6, 5, 4, 3, 2, -1]
15
+ let total = 0
16
+ for (let i = 0; i < 9; i++) {
17
+ total += Number(digits[i]) * weights[i]!
18
+ }
19
+ return total % 11 === 0
20
+ }
21
+
22
+ export function bsnScanner(): PiiScanner {
23
+ return {
24
+ entityType: 'BSN',
25
+ tier: 1,
26
+ scan(text: string, ctx: ScanContext): PiiMatch[] {
27
+ const out: PiiMatch[] = []
28
+ for (const m of text.matchAll(BSN)) {
29
+ if (!validBsn(m[0])) continue
30
+ const idx = ctx.nextIndex('BSN')
31
+ out.push(
32
+ makeMatch({
33
+ entityType: 'BSN',
34
+ start: m.index,
35
+ end: m.index + m[0].length,
36
+ value: m[0],
37
+ replacement: `[BSN_${idx}]`,
38
+ }),
39
+ )
40
+ }
41
+ return out
42
+ },
43
+ }
44
+ }
@@ -0,0 +1,52 @@
1
+ /** Credit card scanner — regex candidate + Luhn checksum validation. */
2
+
3
+ import type { ScanContext } from '../context.js'
4
+ import { makeMatch } from '../match.js'
5
+ import type { PiiMatch } from '../match.js'
6
+ import type { PiiScanner } from '../scanner.js'
7
+
8
+ // 13–19 digits, optionally separated by single spaces or hyphens.
9
+ const CARD = /\b(?:\d[ -]?){12,18}\d\b/g
10
+
11
+ export function luhnValid(number: string): boolean {
12
+ const digits: number[] = []
13
+ for (const c of number) {
14
+ if (c >= '0' && c <= '9') digits.push(c.charCodeAt(0) - 48)
15
+ }
16
+ if (digits.length < 13 || digits.length > 19) return false
17
+ let checksum = 0
18
+ const parity = digits.length % 2
19
+ for (let i = 0; i < digits.length; i++) {
20
+ let d = digits[i]!
21
+ if (i % 2 === parity) {
22
+ d *= 2
23
+ if (d > 9) d -= 9
24
+ }
25
+ checksum += d
26
+ }
27
+ return checksum % 10 === 0
28
+ }
29
+
30
+ export function creditCardScanner(): PiiScanner {
31
+ return {
32
+ entityType: 'CREDIT_CARD',
33
+ tier: 1,
34
+ scan(text: string, ctx: ScanContext): PiiMatch[] {
35
+ const out: PiiMatch[] = []
36
+ for (const m of text.matchAll(CARD)) {
37
+ if (!luhnValid(m[0])) continue
38
+ const idx = ctx.nextIndex('CREDIT_CARD')
39
+ out.push(
40
+ makeMatch({
41
+ entityType: 'CREDIT_CARD',
42
+ start: m.index,
43
+ end: m.index + m[0].length,
44
+ value: m[0],
45
+ replacement: `[CREDIT_CARD_${idx}]`,
46
+ }),
47
+ )
48
+ }
49
+ return out
50
+ },
51
+ }
52
+ }
@@ -0,0 +1,31 @@
1
+ /** Email address scanner (RFC 5322 pragmatic subset). */
2
+
3
+ import type { ScanContext } from '../context.js'
4
+ import { makeMatch } from '../match.js'
5
+ import type { PiiMatch } from '../match.js'
6
+ import type { PiiScanner } from '../scanner.js'
7
+
8
+ const EMAIL = /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/g
9
+
10
+ export function emailScanner(): PiiScanner {
11
+ return {
12
+ entityType: 'EMAIL',
13
+ tier: 1,
14
+ scan(text: string, ctx: ScanContext): PiiMatch[] {
15
+ const out: PiiMatch[] = []
16
+ for (const m of text.matchAll(EMAIL)) {
17
+ const idx = ctx.nextIndex('EMAIL')
18
+ out.push(
19
+ makeMatch({
20
+ entityType: 'EMAIL',
21
+ start: m.index,
22
+ end: m.index + m[0].length,
23
+ value: m[0],
24
+ replacement: `[EMAIL_${idx}]`,
25
+ }),
26
+ )
27
+ }
28
+ return out
29
+ },
30
+ }
31
+ }
@@ -0,0 +1,60 @@
1
+ /** IBAN scanner — regex candidate + ISO 13616 mod-97 checksum validation. */
2
+
3
+ import type { ScanContext } from '../context.js'
4
+ import { makeMatch } from '../match.js'
5
+ import type { PiiMatch } from '../match.js'
6
+ import type { PiiScanner } from '../scanner.js'
7
+
8
+ // Candidate: 2 letters, 2 check digits, 11–30 alphanumerics (optionally spaced).
9
+ const IBAN = /\b[A-Z]{2}\d{2}(?:[ ]?[A-Z0-9]){11,30}\b/g
10
+
11
+ /** ISO 13616 mod-97: rearrange, convert letters to numbers, check %97 == 1. */
12
+ export function validIban(candidate: string): boolean {
13
+ const cleaned = candidate.replace(/ /g, '').toUpperCase()
14
+ if (cleaned.length < 15) return false
15
+ const rearranged = cleaned.slice(4) + cleaned.slice(0, 4)
16
+ let digits = ''
17
+ for (const ch of rearranged) {
18
+ if (ch >= 'A' && ch <= 'Z') {
19
+ digits += (ch.charCodeAt(0) - 55).toString()
20
+ } else if (ch >= '0' && ch <= '9') {
21
+ digits += ch
22
+ } else {
23
+ return false
24
+ }
25
+ }
26
+ return mod97(digits) === 1
27
+ }
28
+
29
+ /** Compute n % 97 over a large numeric string without BigInt overflow concerns. */
30
+ function mod97(numeric: string): number {
31
+ let remainder = 0
32
+ for (const ch of numeric) {
33
+ remainder = (remainder * 10 + (ch.charCodeAt(0) - 48)) % 97
34
+ }
35
+ return remainder
36
+ }
37
+
38
+ export function ibanScanner(): PiiScanner {
39
+ return {
40
+ entityType: 'IBAN',
41
+ tier: 1,
42
+ scan(text: string, ctx: ScanContext): PiiMatch[] {
43
+ const out: PiiMatch[] = []
44
+ for (const m of text.matchAll(IBAN)) {
45
+ if (!validIban(m[0])) continue
46
+ const idx = ctx.nextIndex('IBAN')
47
+ out.push(
48
+ makeMatch({
49
+ entityType: 'IBAN',
50
+ start: m.index,
51
+ end: m.index + m[0].length,
52
+ value: m[0],
53
+ replacement: `[IBAN_${idx}]`,
54
+ }),
55
+ )
56
+ }
57
+ return out
58
+ },
59
+ }
60
+ }
@@ -0,0 +1,35 @@
1
+ /** Built-in tier-1 (regex) PII scanners. */
2
+
3
+ import type { PiiScanner } from '../scanner.js'
4
+ import { bsnScanner } from './bsn.js'
5
+ import { creditCardScanner } from './credit-card.js'
6
+ import { emailScanner } from './email.js'
7
+ import { ibanScanner } from './iban.js'
8
+ import { ipAddressScanner } from './ip-address.js'
9
+ import { phoneScanner } from './phone.js'
10
+ import { secretScanner } from './secret.js'
11
+ import { ssnScanner } from './ssn.js'
12
+
13
+ export { bsnScanner, validBsn } from './bsn.js'
14
+ export { creditCardScanner, luhnValid } from './credit-card.js'
15
+ export { emailScanner } from './email.js'
16
+ export { ibanScanner, validIban } from './iban.js'
17
+ export { ipAddressScanner } from './ip-address.js'
18
+ export { phoneScanner } from './phone.js'
19
+ export type { PhoneScannerOptions } from './phone.js'
20
+ export { secretScanner } from './secret.js'
21
+ export { ssnScanner } from './ssn.js'
22
+
23
+ /** The default scanner set wired into PiiGuard when none is supplied. */
24
+ export function defaultScanners(): PiiScanner[] {
25
+ return [
26
+ secretScanner(),
27
+ emailScanner(),
28
+ ibanScanner(),
29
+ bsnScanner(),
30
+ creditCardScanner(),
31
+ ssnScanner(),
32
+ phoneScanner(),
33
+ ipAddressScanner(),
34
+ ]
35
+ }
@@ -0,0 +1,41 @@
1
+ /** IP address scanner — IPv4 and IPv6, validated via node:net isIP. */
2
+
3
+ import { isIP } from 'node:net'
4
+ import type { ScanContext } from '../context.js'
5
+ import { makeMatch } from '../match.js'
6
+ import type { PiiMatch } from '../match.js'
7
+ import type { PiiScanner } from '../scanner.js'
8
+
9
+ const IPV4 = String.raw`(?:\d{1,3}\.){3}\d{1,3}`
10
+ // Permissive IPv6 candidate — allows empty groups for "::" compression. False
11
+ // positives are filtered by isIP validation below.
12
+ const IPV6 = String.raw`(?:[A-Fa-f0-9]{0,4}:){2,7}[A-Fa-f0-9]{0,4}`
13
+ const IP = new RegExp(String.raw`(?<![\w.])(?:${IPV6}|${IPV4})(?![\w.])`, 'g')
14
+
15
+ function validIp(candidate: string): boolean {
16
+ return isIP(candidate) !== 0
17
+ }
18
+
19
+ export function ipAddressScanner(): PiiScanner {
20
+ return {
21
+ entityType: 'IP_ADDRESS',
22
+ tier: 1,
23
+ scan(text: string, ctx: ScanContext): PiiMatch[] {
24
+ const out: PiiMatch[] = []
25
+ for (const m of text.matchAll(IP)) {
26
+ if (!validIp(m[0])) continue
27
+ const idx = ctx.nextIndex('IP_ADDRESS')
28
+ out.push(
29
+ makeMatch({
30
+ entityType: 'IP_ADDRESS',
31
+ start: m.index,
32
+ end: m.index + m[0].length,
33
+ value: m[0],
34
+ replacement: `[IP_ADDRESS_${idx}]`,
35
+ }),
36
+ )
37
+ }
38
+ return out
39
+ },
40
+ }
41
+ }
@@ -0,0 +1,46 @@
1
+ /** Phone number scanner — E.164 and common national formats. */
2
+
3
+ import type { ScanContext } from '../context.js'
4
+ import { makeMatch } from '../match.js'
5
+ import type { PiiMatch } from '../match.js'
6
+ import type { PiiScanner } from '../scanner.js'
7
+
8
+ // E.164 (+CC...) or national groupings with separators. A digit-count filter
9
+ // (7–15) below avoids matching short numbers / years.
10
+ const PHONE =
11
+ /(?<![\w.])(?:\+?\d{1,3}[ .-]?)?(?:\(\d{1,4}\)[ .-]?)?\d{2,4}(?:[ .-]?\d{2,4}){2,4}(?![\w])/g
12
+
13
+ export interface PhoneScannerOptions {
14
+ locales?: string[]
15
+ }
16
+
17
+ export function phoneScanner(options: PhoneScannerOptions = {}): PiiScanner {
18
+ const locales = options.locales ?? ['NL', 'DE', 'GB', 'US']
19
+ return {
20
+ entityType: 'PHONE',
21
+ tier: 1,
22
+ confidence: 0.85,
23
+ scan(text: string, ctx: ScanContext): PiiMatch[] {
24
+ const out: PiiMatch[] = []
25
+ for (const m of text.matchAll(PHONE)) {
26
+ const digitCount = (m[0].match(/\d/g) ?? []).length
27
+ if (digitCount < 7 || digitCount > 15) continue
28
+ const idx = ctx.nextIndex('PHONE')
29
+ out.push(
30
+ makeMatch({
31
+ entityType: 'PHONE',
32
+ start: m.index,
33
+ end: m.index + m[0].length,
34
+ value: m[0],
35
+ confidence: 0.85,
36
+ replacement: `[PHONE_${idx}]`,
37
+ }),
38
+ )
39
+ }
40
+ return out
41
+ },
42
+ supportsLocale(locale: string): boolean {
43
+ return locales.includes(locale.toUpperCase())
44
+ },
45
+ }
46
+ }
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Secret / credential scanner (F-SEC-04).
3
+ *
4
+ * Detects API keys, tokens, JWTs, PEM private keys, and database connection
5
+ * strings. These must never leave the device, so SecretScanner is tier 1 and
6
+ * runs by default.
7
+ */
8
+
9
+ import type { ScanContext } from '../context.js'
10
+ import { makeMatch } from '../match.js'
11
+ import type { PiiMatch } from '../match.js'
12
+ import type { PiiScanner } from '../scanner.js'
13
+
14
+ // [label, pattern] — ordered most-specific first. All patterns are global.
15
+ const PATTERNS: Array<[string, RegExp]> = [
16
+ ['ANTHROPIC_KEY', /\bsk-ant-[A-Za-z0-9_-]{20,}\b/g],
17
+ ['OPENAI_KEY', /\bsk-(?:proj-)?[A-Za-z0-9_-]{20,}\b/g],
18
+ ['AWS_ACCESS_KEY', /\b(?:AKIA|ASIA)[0-9A-Z]{16}\b/g],
19
+ ['GITHUB_TOKEN', /\bgh[pousr]_[A-Za-z0-9]{36,}\b/g],
20
+ ['JWT', /\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b/g],
21
+ ['PRIVATE_KEY', /-----BEGIN (?:RSA |EC |OPENSSH |PGP )?PRIVATE KEY-----/g],
22
+ [
23
+ 'DB_CONNECTION_STRING',
24
+ /\b(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis):\/\/[^\s"']+/g,
25
+ ],
26
+ ]
27
+
28
+ export function secretScanner(): PiiScanner {
29
+ return {
30
+ entityType: 'SECRET',
31
+ tier: 1,
32
+ scan(text: string, ctx: ScanContext): PiiMatch[] {
33
+ const out: PiiMatch[] = []
34
+ for (const [, pattern] of PATTERNS) {
35
+ for (const m of text.matchAll(pattern)) {
36
+ const idx = ctx.nextIndex('SECRET')
37
+ out.push(
38
+ makeMatch({
39
+ entityType: 'SECRET',
40
+ start: m.index,
41
+ end: m.index + m[0].length,
42
+ value: m[0],
43
+ replacement: `[SECRET_${idx}]`,
44
+ }),
45
+ )
46
+ }
47
+ }
48
+ return out
49
+ },
50
+ }
51
+ }