@nationaldesignstudio/rampart 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +402 -0
- package/MODEL_CARD.md +422 -0
- package/README.md +279 -0
- package/RELEASE.md +97 -0
- package/WHITEPAPER.md +316 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +35639 -0
- package/dist/index.js.map +36 -0
- package/dist/src/guard.d.ts +94 -0
- package/dist/src/guard.d.ts.map +1 -0
- package/dist/src/heuristics.d.ts +14 -0
- package/dist/src/heuristics.d.ts.map +1 -0
- package/dist/src/ner/classifier.d.ts +92 -0
- package/dist/src/ner/classifier.d.ts.map +1 -0
- package/dist/src/ner/worker.d.ts +44 -0
- package/dist/src/ner/worker.d.ts.map +1 -0
- package/dist/src/ner/worker.js +35302 -0
- package/dist/src/ner/worker.js.map +30 -0
- package/dist/src/pipeline.d.ts +76 -0
- package/dist/src/pipeline.d.ts.map +1 -0
- package/dist/src/policy.d.ts +27 -0
- package/dist/src/policy.d.ts.map +1 -0
- package/dist/src/premask.d.ts +48 -0
- package/dist/src/premask.d.ts.map +1 -0
- package/dist/src/session.d.ts +60 -0
- package/dist/src/session.d.ts.map +1 -0
- package/dist/src/streaming.d.ts +32 -0
- package/dist/src/streaming.d.ts.map +1 -0
- package/dist/src/types.d.ts +43 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/validators.d.ts +16 -0
- package/dist/src/validators.d.ts.map +1 -0
- package/eval/bench/README.md +91 -0
- package/eval/bench/fetch.ts +152 -0
- package/eval/bench/labels.ts +45 -0
- package/eval/bench/run.ts +146 -0
- package/eval/bench/runs/m06-v3-30k/by_language.json +303 -0
- package/eval/bench/runs/m06-v3-30k/summary.json +56 -0
- package/eval/bench/runs/sample-900/by_language.json +303 -0
- package/eval/bench/runs/sample-900/manifest.json +926 -0
- package/eval/bench/runs/sample-900/summary.json +56 -0
- package/eval/bench/score.ts +197 -0
- package/eval/bench/webgpu/entry.ts +70 -0
- package/eval/bench/webgpu/index.html +12 -0
- package/eval/bench/webgpu.ts +209 -0
- package/eval/public-cases.ts +412 -0
- package/eval/run-public-eval.ts +140 -0
- package/examples/basic-chat.ts +12 -0
- package/examples/pii-worker.ts +3 -0
- package/index.ts +47 -0
- package/package.json +103 -0
- package/src/guard.ts +170 -0
- package/src/heuristics.ts +141 -0
- package/src/ner/classifier.ts +580 -0
- package/src/ner/worker.ts +130 -0
- package/src/policy.ts +64 -0
- package/src/premask.ts +90 -0
- package/src/session.ts +99 -0
- package/src/streaming.ts +73 -0
- package/src/types.ts +74 -0
- package/src/validators.ts +40 -0
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
import type { PiiLabel } from "../src/types";
|
|
2
|
+
|
|
3
|
+
export interface EvalTerm {
|
|
4
|
+
readonly text: string;
|
|
5
|
+
readonly label: PiiLabel;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface PublicEvalCase {
|
|
9
|
+
readonly id: string;
|
|
10
|
+
readonly input: string;
|
|
11
|
+
readonly privateTerms: readonly EvalTerm[];
|
|
12
|
+
readonly publicTerms: readonly string[];
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export const PUBLIC_E2E_CASES: readonly PublicEvalCase[] = [
|
|
16
|
+
{
|
|
17
|
+
id: "chat-name-email-income",
|
|
18
|
+
input: "My name is Alex Rivera, my email is alex.rivera@example.com, and my monthly income is $1,950.",
|
|
19
|
+
privateTerms: [
|
|
20
|
+
{ text: "Alex", label: "GIVEN_NAME" },
|
|
21
|
+
{ text: "Rivera", label: "SURNAME" },
|
|
22
|
+
{ text: "alex.rivera@example.com", label: "EMAIL" },
|
|
23
|
+
],
|
|
24
|
+
publicTerms: ["$1,950"],
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
id: "chat-ssn-phone",
|
|
28
|
+
input: "My SSN is 472-81-0094 and you can call me at (202) 555-0188.",
|
|
29
|
+
privateTerms: [
|
|
30
|
+
{ text: "472-81-0094", label: "SSN" },
|
|
31
|
+
{ text: "(202) 555-0188", label: "PHONE" },
|
|
32
|
+
],
|
|
33
|
+
publicTerms: [],
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
id: "chat-card-spaces",
|
|
37
|
+
input: "The card number is 4111 1111 1111 1111, but my eligibility question is about rent.",
|
|
38
|
+
privateTerms: [{ text: "4111 1111 1111 1111", label: "CREDIT_CARD" }],
|
|
39
|
+
publicTerms: ["eligibility question", "rent"],
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
id: "chat-routing-number",
|
|
43
|
+
input: "My bank routing number is 021000021 for direct deposit.",
|
|
44
|
+
privateTerms: [{ text: "021000021", label: "ROUTING_NUMBER" }],
|
|
45
|
+
publicTerms: ["direct deposit"],
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
id: "chat-street-keeps-geography",
|
|
49
|
+
input: "I live at 31 Birchwood Avenue Old Lyme CT 06371 and make $45,000.",
|
|
50
|
+
privateTerms: [
|
|
51
|
+
{ text: "31", label: "BUILDING_NUMBER" },
|
|
52
|
+
{ text: "Birchwood Avenue", label: "STREET_NAME" },
|
|
53
|
+
],
|
|
54
|
+
publicTerms: ["Old Lyme", "CT", "06371", "$45,000"],
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
id: "chat-city-state-zip-only",
|
|
58
|
+
input: "I live in Old Lyme CT 06371 and make $45,000.",
|
|
59
|
+
privateTerms: [],
|
|
60
|
+
publicTerms: ["Old Lyme", "CT", "06371", "$45,000"],
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
id: "chat-hyphenated-name",
|
|
64
|
+
input: "My name is Thanh-Nghiem Quoc-Bao and I need help with my application.",
|
|
65
|
+
privateTerms: [
|
|
66
|
+
{ text: "Thanh-Nghiem", label: "GIVEN_NAME" },
|
|
67
|
+
{ text: "Quoc-Bao", label: "SURNAME" },
|
|
68
|
+
],
|
|
69
|
+
publicTerms: ["need help with my application"],
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
id: "chat-particle-name",
|
|
73
|
+
input: "Please update the record for Jean-Baptiste De La Croix.",
|
|
74
|
+
privateTerms: [
|
|
75
|
+
{ text: "Jean-Baptiste", label: "GIVEN_NAME" },
|
|
76
|
+
{ text: "De La Croix", label: "SURNAME" },
|
|
77
|
+
],
|
|
78
|
+
publicTerms: ["update the record"],
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
id: "chat-apostrophe-name",
|
|
82
|
+
input: "The applicant is Saoirse O'Neill and the household size is four.",
|
|
83
|
+
privateTerms: [
|
|
84
|
+
{ text: "Saoirse", label: "GIVEN_NAME" },
|
|
85
|
+
{ text: "O'Neill", label: "SURNAME" },
|
|
86
|
+
],
|
|
87
|
+
publicTerms: ["household size is four"],
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
id: "chat-diacritic-name",
|
|
91
|
+
input: "I am Saoirse Ni Bhraonain and I have a voucher question.",
|
|
92
|
+
privateTerms: [
|
|
93
|
+
{ text: "Saoirse", label: "GIVEN_NAME" },
|
|
94
|
+
{ text: "Ni Bhraonain", label: "SURNAME" },
|
|
95
|
+
],
|
|
96
|
+
publicTerms: ["voucher question"],
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
id: "chat-east-asian-name",
|
|
100
|
+
input: "Yu-Shan Liang is listed as the head of household.",
|
|
101
|
+
privateTerms: [
|
|
102
|
+
{ text: "Yu-Shan", label: "GIVEN_NAME" },
|
|
103
|
+
{ text: "Liang", label: "SURNAME" },
|
|
104
|
+
],
|
|
105
|
+
publicTerms: ["head of household"],
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
id: "chat-south-asian-name",
|
|
109
|
+
input: "Priyanka Venkataraman submitted the form yesterday.",
|
|
110
|
+
privateTerms: [
|
|
111
|
+
{ text: "Priyanka", label: "GIVEN_NAME" },
|
|
112
|
+
{ text: "Venkataraman", label: "SURNAME" },
|
|
113
|
+
],
|
|
114
|
+
publicTerms: ["submitted the form"],
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
id: "chat-west-african-name",
|
|
118
|
+
input: "Chukwuemeka Okonkwo-Adeyemi needs to update household income.",
|
|
119
|
+
privateTerms: [
|
|
120
|
+
{ text: "Chukwuemeka", label: "GIVEN_NAME" },
|
|
121
|
+
{ text: "Okonkwo-Adeyemi", label: "SURNAME" },
|
|
122
|
+
],
|
|
123
|
+
publicTerms: ["household income"],
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
id: "chat-hispanic-name",
|
|
127
|
+
input: "Luis Garcia has a Section 8 voucher appointment next week.",
|
|
128
|
+
privateTerms: [
|
|
129
|
+
{ text: "Luis", label: "GIVEN_NAME" },
|
|
130
|
+
{ text: "Garcia", label: "SURNAME" },
|
|
131
|
+
],
|
|
132
|
+
publicTerms: ["Section 8 voucher appointment"],
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
id: "chat-middle-eastern-name",
|
|
136
|
+
input: "Noura Al-Hassan asked about utility allowances.",
|
|
137
|
+
privateTerms: [
|
|
138
|
+
{ text: "Noura", label: "GIVEN_NAME" },
|
|
139
|
+
{ text: "Al-Hassan", label: "SURNAME" },
|
|
140
|
+
],
|
|
141
|
+
publicTerms: ["utility allowances"],
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
id: "chat-public-official-kept-by-policy-fixture",
|
|
145
|
+
input: "The Housing Secretary spoke in Washington about housing.",
|
|
146
|
+
privateTerms: [],
|
|
147
|
+
publicTerms: ["The Housing Secretary", "Washington", "housing"],
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
id: "chat-private-landlord",
|
|
151
|
+
input: "My landlord Thomas Green filed an eviction notice.",
|
|
152
|
+
privateTerms: [
|
|
153
|
+
{ text: "Thomas", label: "GIVEN_NAME" },
|
|
154
|
+
{ text: "Green", label: "SURNAME" },
|
|
155
|
+
],
|
|
156
|
+
publicTerms: ["eviction notice"],
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
id: "chat-public-organization",
|
|
160
|
+
input: "I found this information on benefits.gov and the state housing website.",
|
|
161
|
+
privateTerms: [],
|
|
162
|
+
publicTerms: ["benefits.gov", "state housing website"],
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
id: "chat-case-number",
|
|
166
|
+
input: "My case number is AGY-2026-009871 and my household member is Maya Chen.",
|
|
167
|
+
privateTerms: [
|
|
168
|
+
{ text: "AGY-2026-009871", label: "GOVERNMENT_ID" },
|
|
169
|
+
{ text: "Maya", label: "GIVEN_NAME" },
|
|
170
|
+
{ text: "Chen", label: "SURNAME" },
|
|
171
|
+
],
|
|
172
|
+
publicTerms: ["household member"],
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
id: "chat-uscis-receipt",
|
|
176
|
+
input: "My USCIS receipt is IOE0912345678, and I need to know if that affects housing eligibility.",
|
|
177
|
+
privateTerms: [{ text: "IOE0912345678", label: "GOVERNMENT_ID" }],
|
|
178
|
+
publicTerms: ["housing eligibility"],
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
id: "chat-medicare-number",
|
|
182
|
+
input: "My Medicare number is 1EG4-TE5-MK73 and I am applying for housing.",
|
|
183
|
+
privateTerms: [{ text: "1EG4-TE5-MK73", label: "GOVERNMENT_ID" }],
|
|
184
|
+
publicTerms: ["applying for housing"],
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
id: "chat-passport",
|
|
188
|
+
input: "My passport number is X12345678 and my question is about local rent limits.",
|
|
189
|
+
privateTerms: [{ text: "X12345678", label: "PASSPORT" }],
|
|
190
|
+
publicTerms: ["local rent limits"],
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
id: "chat-drivers-license",
|
|
194
|
+
input: "The form asks for my driver's license D1234567, but I only want rent guidance.",
|
|
195
|
+
privateTerms: [{ text: "D1234567", label: "DRIVERS_LICENSE" }],
|
|
196
|
+
publicTerms: ["rent guidance"],
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
id: "chat-age-kept",
|
|
200
|
+
input: "I am 63 and my mother who lives with us is 81 years old.",
|
|
201
|
+
privateTerms: [],
|
|
202
|
+
publicTerms: ["63", "81 years old"],
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
id: "chat-income-words-kept",
|
|
206
|
+
input: "I bring home about forty thousand dollars annually.",
|
|
207
|
+
privateTerms: [],
|
|
208
|
+
publicTerms: ["forty thousand dollars"],
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
id: "chat-low-income-kept",
|
|
212
|
+
input: "My annual income is $18,200 and I live in Newark NJ.",
|
|
213
|
+
privateTerms: [],
|
|
214
|
+
publicTerms: ["$18,200", "Newark NJ"],
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
id: "chat-url-private",
|
|
218
|
+
input: "My uploaded documents are at https://files.example.com/private/alex.",
|
|
219
|
+
privateTerms: [{ text: "https://files.example.com/private/alex.", label: "URL" }],
|
|
220
|
+
publicTerms: ["uploaded documents"],
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
id: "chat-ip-private",
|
|
224
|
+
input: "The support form recorded IP 192.168.0.14 with my application.",
|
|
225
|
+
privateTerms: [{ text: "192.168.0.14", label: "IP_ADDRESS" }],
|
|
226
|
+
publicTerms: ["support form"],
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
id: "chat-email-unusual-tld",
|
|
230
|
+
input: "Contact dr.okeke@clinic.health about the tenant records.",
|
|
231
|
+
privateTerms: [{ text: "dr.okeke@clinic.health", label: "EMAIL" }],
|
|
232
|
+
publicTerms: ["tenant records"],
|
|
233
|
+
},
|
|
234
|
+
{
|
|
235
|
+
id: "chat-phone-dots",
|
|
236
|
+
input: "Reach me on 312.555.7741 after five.",
|
|
237
|
+
privateTerms: [{ text: "312.555.7741", label: "PHONE" }],
|
|
238
|
+
publicTerms: ["after five"],
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
id: "chat-phone-bare",
|
|
242
|
+
input: "My phone is 2025550188 and I need a callback.",
|
|
243
|
+
privateTerms: [{ text: "2025550188", label: "PHONE" }],
|
|
244
|
+
publicTerms: ["callback"],
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
id: "chat-ssn-spaces",
|
|
248
|
+
input: "My social is 472 81 0094.",
|
|
249
|
+
privateTerms: [{ text: "472 81 0094", label: "SSN" }],
|
|
250
|
+
publicTerms: [],
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
id: "chat-ssn-dots",
|
|
254
|
+
input: "SSN: 601.23.7788 for the household head.",
|
|
255
|
+
privateTerms: [{ text: "601.23.7788", label: "SSN" }],
|
|
256
|
+
publicTerms: ["household head"],
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
id: "chat-invalid-ssn-kept",
|
|
260
|
+
input: "The order confirmation is 000-12-3456, not a social security number.",
|
|
261
|
+
privateTerms: [],
|
|
262
|
+
publicTerms: ["000-12-3456", "order confirmation"],
|
|
263
|
+
},
|
|
264
|
+
{
|
|
265
|
+
id: "chat-order-number-kept",
|
|
266
|
+
input: "My order confirmation is 482910337 and it shipped yesterday.",
|
|
267
|
+
privateTerms: [],
|
|
268
|
+
publicTerms: ["shipped yesterday"],
|
|
269
|
+
},
|
|
270
|
+
{
|
|
271
|
+
id: "chat-card-no-spaces",
|
|
272
|
+
input: "The card on file is 5500005555555559.",
|
|
273
|
+
privateTerms: [{ text: "5500005555555559", label: "CREDIT_CARD" }],
|
|
274
|
+
publicTerms: [],
|
|
275
|
+
},
|
|
276
|
+
{
|
|
277
|
+
id: "chat-invalid-card-kept",
|
|
278
|
+
input: "The reference number is 4111111111111112 for the document.",
|
|
279
|
+
privateTerms: [],
|
|
280
|
+
publicTerms: ["4111111111111112", "document"],
|
|
281
|
+
},
|
|
282
|
+
{
|
|
283
|
+
id: "chat-street-apartment",
|
|
284
|
+
input: "Send mail to 4417 Westmoreland Blvd Apt 12C, the unit is accessible.",
|
|
285
|
+
privateTerms: [
|
|
286
|
+
{ text: "4417", label: "BUILDING_NUMBER" },
|
|
287
|
+
{ text: "Westmoreland Blvd", label: "STREET_NAME" },
|
|
288
|
+
],
|
|
289
|
+
publicTerms: ["unit is accessible"],
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
id: "chat-street-court-state-collision",
|
|
293
|
+
input: "I live at 14 Maple Court Hartford CT 06103.",
|
|
294
|
+
privateTerms: [
|
|
295
|
+
{ text: "14", label: "BUILDING_NUMBER" },
|
|
296
|
+
{ text: "Maple Court", label: "STREET_NAME" },
|
|
297
|
+
],
|
|
298
|
+
publicTerms: ["Hartford", "CT", "06103"],
|
|
299
|
+
},
|
|
300
|
+
{
|
|
301
|
+
id: "chat-city-state-zip-kept",
|
|
302
|
+
input: "My mailing city is Hartford CT 06103.",
|
|
303
|
+
privateTerms: [],
|
|
304
|
+
publicTerms: ["Hartford", "CT", "06103"],
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
id: "chat-household-members",
|
|
308
|
+
input: "My children are Ana Rivera and Mateo Rivera, ages 5 and 9.",
|
|
309
|
+
privateTerms: [
|
|
310
|
+
{ text: "Ana", label: "GIVEN_NAME" },
|
|
311
|
+
{ text: "Rivera", label: "SURNAME" },
|
|
312
|
+
{ text: "Mateo", label: "GIVEN_NAME" },
|
|
313
|
+
{ text: "Rivera", label: "SURNAME" },
|
|
314
|
+
],
|
|
315
|
+
publicTerms: ["ages 5 and 9"],
|
|
316
|
+
},
|
|
317
|
+
{
|
|
318
|
+
id: "chat-two-emails",
|
|
319
|
+
input: "Use maya@example.com first, then backup maya.backup@example.org.",
|
|
320
|
+
privateTerms: [
|
|
321
|
+
{ text: "maya@example.com", label: "EMAIL" },
|
|
322
|
+
{ text: "maya.backup@example.org", label: "EMAIL" },
|
|
323
|
+
],
|
|
324
|
+
publicTerms: ["backup"],
|
|
325
|
+
},
|
|
326
|
+
{
|
|
327
|
+
id: "chat-bank-account",
|
|
328
|
+
input: "My bank account number is 123456789012 and my rent is due Friday.",
|
|
329
|
+
privateTerms: [{ text: "123456789012", label: "BANK_ACCOUNT" }],
|
|
330
|
+
publicTerms: ["rent is due Friday"],
|
|
331
|
+
},
|
|
332
|
+
{
|
|
333
|
+
id: "chat-card-hyphenated",
|
|
334
|
+
input: "The payment card field shows 4111-1111-1111-1111.",
|
|
335
|
+
privateTerms: [{ text: "4111-1111-1111-1111", label: "CREDIT_CARD" }],
|
|
336
|
+
publicTerms: ["payment card field"],
|
|
337
|
+
},
|
|
338
|
+
{
|
|
339
|
+
id: "chat-email-plus-subdomain",
|
|
340
|
+
input: "Use alex+housing@sub.example.gov for the intake receipt.",
|
|
341
|
+
privateTerms: [{ text: "alex+housing@sub.example.gov", label: "EMAIL" }],
|
|
342
|
+
publicTerms: ["intake receipt"],
|
|
343
|
+
},
|
|
344
|
+
{
|
|
345
|
+
id: "chat-www-url",
|
|
346
|
+
input: "The uploaded packet is at www.example.org/private/maya.",
|
|
347
|
+
privateTerms: [{ text: "www.example.org/private/maya.", label: "URL" }],
|
|
348
|
+
publicTerms: ["uploaded packet"],
|
|
349
|
+
},
|
|
350
|
+
{
|
|
351
|
+
id: "chat-po-box-dotted",
|
|
352
|
+
input: "Please send notices to P.O. Box 88, Salem OR 97301.",
|
|
353
|
+
privateTerms: [{ text: "P.O. Box 88", label: "STREET_NAME" }],
|
|
354
|
+
publicTerms: ["Salem OR 97301"],
|
|
355
|
+
},
|
|
356
|
+
{
|
|
357
|
+
id: "chat-rural-route-abbrev",
|
|
358
|
+
input: "The mailing line is RR 2 in Taos NM 87571.",
|
|
359
|
+
privateTerms: [{ text: "RR 2", label: "STREET_NAME" }],
|
|
360
|
+
publicTerms: ["Taos NM 87571"],
|
|
361
|
+
},
|
|
362
|
+
{
|
|
363
|
+
id: "chat-account-number-context",
|
|
364
|
+
input: "The account number is 987654321098 for the benefits deposit.",
|
|
365
|
+
privateTerms: [{ text: "987654321098", label: "BANK_ACCOUNT" }],
|
|
366
|
+
publicTerms: ["benefits deposit"],
|
|
367
|
+
},
|
|
368
|
+
{
|
|
369
|
+
id: "chat-public-statute-control",
|
|
370
|
+
input: "Section 42 U.S.C. 1437f covers the voucher program.",
|
|
371
|
+
privateTerms: [],
|
|
372
|
+
publicTerms: ["42 U.S.C. 1437f", "voucher program"],
|
|
373
|
+
},
|
|
374
|
+
{
|
|
375
|
+
id: "chat-public-form-year-control",
|
|
376
|
+
input: "Use Form GOV-9886 for the 2026 recertification packet.",
|
|
377
|
+
privateTerms: [],
|
|
378
|
+
publicTerms: ["Form GOV-9886", "2026", "recertification packet"],
|
|
379
|
+
},
|
|
380
|
+
{
|
|
381
|
+
id: "chat-no-pii-weather",
|
|
382
|
+
input: "The weather is clear today and housing applications are due next month.",
|
|
383
|
+
privateTerms: [],
|
|
384
|
+
publicTerms: ["weather", "housing applications"],
|
|
385
|
+
},
|
|
386
|
+
{
|
|
387
|
+
id: "chat-no-pii-form-number",
|
|
388
|
+
input: "Form 8823 is due in 2026 and covers buildings over 50 units.",
|
|
389
|
+
privateTerms: [],
|
|
390
|
+
publicTerms: ["Form 8823", "2026", "50 units"],
|
|
391
|
+
},
|
|
392
|
+
{
|
|
393
|
+
id: "chat-location-public",
|
|
394
|
+
input: "I live in Placentia, California and make $38,500 a year.",
|
|
395
|
+
privateTerms: [],
|
|
396
|
+
publicTerms: ["Placentia, California", "$38,500"],
|
|
397
|
+
},
|
|
398
|
+
{
|
|
399
|
+
id: "chat-mixed-dense",
|
|
400
|
+
input:
|
|
401
|
+
"I'm Priyanka Venkataraman, email pv.ranga@fastmail.io, cell 646-555-0199, " +
|
|
402
|
+
"SSN 533-22-1847, living at 88 Larkspur Lane, age 29, income $52,000.",
|
|
403
|
+
privateTerms: [
|
|
404
|
+
{ text: "Priyanka Venkataraman", label: "GIVEN_NAME" },
|
|
405
|
+
{ text: "pv.ranga@fastmail.io", label: "EMAIL" },
|
|
406
|
+
{ text: "646-555-0199", label: "PHONE" },
|
|
407
|
+
{ text: "533-22-1847", label: "SSN" },
|
|
408
|
+
{ text: "88 Larkspur Lane", label: "STREET_NAME" },
|
|
409
|
+
],
|
|
410
|
+
publicTerms: ["age 29", "$52,000"],
|
|
411
|
+
},
|
|
412
|
+
];
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import { PUBLIC_E2E_CASES } from "./public-cases";
|
|
2
|
+
import { detectNer, loadNerClassifier } from "../src/ner/classifier";
|
|
3
|
+
import { ChatGuard } from "../src/guard";
|
|
4
|
+
|
|
5
|
+
// Load the weights committed to this repo (and pulled via git-lfs in CI) rather
|
|
6
|
+
// than the published Hugging Face repo, keeping the eval hermetic and runnable
|
|
7
|
+
// without an HF token.
|
|
8
|
+
const LOCAL_MODEL = "./model";
|
|
9
|
+
|
|
10
|
+
interface Score {
|
|
11
|
+
readonly privateFound: number;
|
|
12
|
+
readonly privateTotal: number;
|
|
13
|
+
readonly publicKept: number;
|
|
14
|
+
readonly publicTotal: number;
|
|
15
|
+
readonly leakRows: readonly string[];
|
|
16
|
+
readonly overRedactionRows: readonly string[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function hasFlag(args: readonly string[], flag: string): boolean {
|
|
20
|
+
return args.includes(flag);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function pct(numerator: number, denominator: number): string {
|
|
24
|
+
if (denominator === 0) return "n/a";
|
|
25
|
+
return `${((numerator / denominator) * 100).toFixed(1)}%`;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function score(noPrefilter: boolean): Promise<Score> {
|
|
29
|
+
const classifier = await loadNerClassifier({ model: LOCAL_MODEL, device: "cpu" });
|
|
30
|
+
const guard = new ChatGuard({
|
|
31
|
+
ner: (text) => detectNer(text, classifier),
|
|
32
|
+
aliases: {},
|
|
33
|
+
noPrefilter,
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
let privateFound = 0;
|
|
37
|
+
let privateTotal = 0;
|
|
38
|
+
let publicKept = 0;
|
|
39
|
+
let publicTotal = 0;
|
|
40
|
+
const leakRows: string[] = [];
|
|
41
|
+
const overRedactionRows: string[] = [];
|
|
42
|
+
|
|
43
|
+
for (const testCase of PUBLIC_E2E_CASES) {
|
|
44
|
+
const result = await guard.protect(testCase.input);
|
|
45
|
+
const leaked = testCase.privateTerms.filter((term) => result.text.includes(term.text));
|
|
46
|
+
const overRedacted = testCase.publicTerms.filter((term) => !result.text.includes(term));
|
|
47
|
+
|
|
48
|
+
privateFound += testCase.privateTerms.length - leaked.length;
|
|
49
|
+
privateTotal += testCase.privateTerms.length;
|
|
50
|
+
publicKept += testCase.publicTerms.length - overRedacted.length;
|
|
51
|
+
publicTotal += testCase.publicTerms.length;
|
|
52
|
+
|
|
53
|
+
if (leaked.length > 0) {
|
|
54
|
+
leakRows.push(`${testCase.id}: ${leaked.map((term) => term.text).join(", ")}`);
|
|
55
|
+
}
|
|
56
|
+
if (overRedacted.length > 0) {
|
|
57
|
+
overRedactionRows.push(`${testCase.id}: ${overRedacted.join(", ")}`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return { privateFound, privateTotal, publicKept, publicTotal, leakRows, overRedactionRows };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const args = Bun.argv.slice(2);
|
|
65
|
+
const strict = hasFlag(args, "--strict");
|
|
66
|
+
const noPrefilter = hasFlag(args, "--no-prefilter");
|
|
67
|
+
const jsonOnly = hasFlag(args, "--json");
|
|
68
|
+
const result = await score(noPrefilter);
|
|
69
|
+
const recallPct = result.privateTotal === 0 ? 1 : result.privateFound / result.privateTotal;
|
|
70
|
+
const retentionPct = result.publicTotal === 0 ? 1 : result.publicKept / result.publicTotal;
|
|
71
|
+
|
|
72
|
+
if (jsonOnly) {
|
|
73
|
+
console.log(JSON.stringify({
|
|
74
|
+
model: LOCAL_MODEL,
|
|
75
|
+
no_prefilter: noPrefilter,
|
|
76
|
+
cases: PUBLIC_E2E_CASES.length,
|
|
77
|
+
private_found: result.privateFound,
|
|
78
|
+
private_total: result.privateTotal,
|
|
79
|
+
private_recall: recallPct,
|
|
80
|
+
public_kept: result.publicKept,
|
|
81
|
+
public_total: result.publicTotal,
|
|
82
|
+
public_retention: retentionPct,
|
|
83
|
+
leaks: result.leakRows,
|
|
84
|
+
over_redactions: result.overRedactionRows,
|
|
85
|
+
}));
|
|
86
|
+
} else {
|
|
87
|
+
console.log(`Model: ${LOCAL_MODEL}${noPrefilter ? " (no-prefilter runtime)" : ""}`);
|
|
88
|
+
console.log(`Cases: ${PUBLIC_E2E_CASES.length}`);
|
|
89
|
+
console.log(
|
|
90
|
+
`Private recall: ${result.privateFound}/${result.privateTotal} (${pct(result.privateFound, result.privateTotal)})`,
|
|
91
|
+
);
|
|
92
|
+
console.log(
|
|
93
|
+
`Public retention: ${result.publicKept}/${result.publicTotal} (${pct(result.publicKept, result.publicTotal)})`,
|
|
94
|
+
);
|
|
95
|
+
if (result.leakRows.length > 0) {
|
|
96
|
+
console.log("\nLeaks:");
|
|
97
|
+
for (const row of result.leakRows) console.log(`- ${row}`);
|
|
98
|
+
}
|
|
99
|
+
if (result.overRedactionRows.length > 0) {
|
|
100
|
+
console.log("\nOver-redaction:");
|
|
101
|
+
for (const row of result.overRedactionRows) console.log(`- ${row}`);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Strict gate enforces minimum recall/retention thresholds rather than zero
|
|
106
|
+
// leaks. The current public-cases corpus has a small number of irreducible
|
|
107
|
+
// edge cases the shipped quantized model does not perfectly handle. After the
|
|
108
|
+
// EMAIL/URL heuristic detectors were added (text-shaped PII the model was weak
|
|
109
|
+
// on), the residual leaks on the 55-case suite are two exotic structured IDs —
|
|
110
|
+
// an agency case-number (`AGY-2026-009871`) and a Medicare-style identifier
|
|
111
|
+
// (`1EG4-TE5-MK73`) — that neither a checksum heuristic nor the model reliably
|
|
112
|
+
// catches. The thresholds below are pinned to current measured performance so
|
|
113
|
+
// CI fails on regressions but does not require model retrains for
|
|
114
|
+
// known-acceptable behavior.
|
|
115
|
+
//
|
|
116
|
+
// Floor history: 0.98 (original lineage) -> 0.95 (m06 + format-cases "v3"
|
|
117
|
+
// release with EMAIL/URL heuristics; measured 97.0% on the 55-case suite — see
|
|
118
|
+
// eval/bench/runs/m06-v3-30k for the headline OpenPII numbers this model ships
|
|
119
|
+
// on).
|
|
120
|
+
const STRICT_MIN_PRIVATE_RECALL = 0.95;
|
|
121
|
+
const STRICT_MIN_PUBLIC_RETENTION = 0.95;
|
|
122
|
+
|
|
123
|
+
if (strict) {
|
|
124
|
+
const recall = result.privateTotal === 0 ? 1 : result.privateFound / result.privateTotal;
|
|
125
|
+
const retention = result.publicTotal === 0 ? 1 : result.publicKept / result.publicTotal;
|
|
126
|
+
let failed = false;
|
|
127
|
+
if (recall < STRICT_MIN_PRIVATE_RECALL) {
|
|
128
|
+
console.log(
|
|
129
|
+
`\nStrict: private recall ${pct(result.privateFound, result.privateTotal)} below ${STRICT_MIN_PRIVATE_RECALL * 100}%`,
|
|
130
|
+
);
|
|
131
|
+
failed = true;
|
|
132
|
+
}
|
|
133
|
+
if (retention < STRICT_MIN_PUBLIC_RETENTION) {
|
|
134
|
+
console.log(
|
|
135
|
+
`\nStrict: public retention ${pct(result.publicKept, result.publicTotal)} below ${STRICT_MIN_PUBLIC_RETENTION * 100}%`,
|
|
136
|
+
);
|
|
137
|
+
failed = true;
|
|
138
|
+
}
|
|
139
|
+
if (failed) process.exit(1);
|
|
140
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { createGuard } from "../index.ts";
|
|
2
|
+
|
|
3
|
+
const guard = await createGuard();
|
|
4
|
+
|
|
5
|
+
const safe = await guard.protect("My name is Alex Rivera. My SSN is 472-81-0094.");
|
|
6
|
+
const reply = await llm(safe.text);
|
|
7
|
+
|
|
8
|
+
console.log(guard.reveal(reply));
|
|
9
|
+
|
|
10
|
+
async function llm(text: string): Promise<string> {
|
|
11
|
+
return "Got it, [GIVEN_NAME_1].";
|
|
12
|
+
}
|
package/index.ts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @nationaldesignstudio/rampart
|
|
3
|
+
*
|
|
4
|
+
* Client-side PII filter for LLM chat. Strips names, SSNs, card numbers, and
|
|
5
|
+
* every other PII class out of text before it leaves the device — keeping only
|
|
6
|
+
* {city, state, zip} — so raw PII never reaches our servers or logs.
|
|
7
|
+
*
|
|
8
|
+
* Layers: offset-preserving heuristics + validators (structured PII, any
|
|
9
|
+
* separator form), an optional small wasm token-classifier (contextual PII),
|
|
10
|
+
* a default-deny keep-set policy, and a reversible placeholder/rehydrate
|
|
11
|
+
* session table for coherent multi-turn chat.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
export {
|
|
15
|
+
ChatGuard,
|
|
16
|
+
createGuard,
|
|
17
|
+
DEFAULT_ALIASES,
|
|
18
|
+
type GuardOptions,
|
|
19
|
+
type NerDetector,
|
|
20
|
+
} from "./src/guard";
|
|
21
|
+
export {
|
|
22
|
+
StreamingReveal,
|
|
23
|
+
createRevealTransform,
|
|
24
|
+
type PlaceholderResolver,
|
|
25
|
+
} from "./src/streaming";
|
|
26
|
+
export {
|
|
27
|
+
SessionEntityTable,
|
|
28
|
+
PLACEHOLDER_PATTERN,
|
|
29
|
+
type ScrubResult,
|
|
30
|
+
type PlaceholderAliases,
|
|
31
|
+
} from "./src/session";
|
|
32
|
+
export { detectHeuristics } from "./src/heuristics";
|
|
33
|
+
export { mergeSpans, applyPolicy } from "./src/policy";
|
|
34
|
+
export { premask, projectMaskedSpan, sentinelFor, type PremaskResult } from "./src/premask";
|
|
35
|
+
export { KEEP_LABELS, resolveKeepLabels, shouldRedact, type PiiLabel, type Span } from "./src/types";
|
|
36
|
+
export {
|
|
37
|
+
detectNer,
|
|
38
|
+
loadNerClassifier,
|
|
39
|
+
RAMPART_MODEL_ID,
|
|
40
|
+
NER_TOKEN_BUDGET,
|
|
41
|
+
NER_TOKEN_OVERLAP,
|
|
42
|
+
type NerOptions,
|
|
43
|
+
type TokenClassifier,
|
|
44
|
+
type TokenCounter,
|
|
45
|
+
} from "./src/ner/classifier";
|
|
46
|
+
export { registerNerWorker, createWorkerClassifier } from "./src/ner/worker";
|
|
47
|
+
export { isLuhnValid, isValidSsn } from "./src/validators";
|