@nationaldesignstudio/rampart 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/LICENSE +402 -0
  2. package/MODEL_CARD.md +422 -0
  3. package/README.md +279 -0
  4. package/RELEASE.md +97 -0
  5. package/WHITEPAPER.md +316 -0
  6. package/dist/index.d.ts +23 -0
  7. package/dist/index.d.ts.map +1 -0
  8. package/dist/index.js +35639 -0
  9. package/dist/index.js.map +36 -0
  10. package/dist/src/guard.d.ts +94 -0
  11. package/dist/src/guard.d.ts.map +1 -0
  12. package/dist/src/heuristics.d.ts +14 -0
  13. package/dist/src/heuristics.d.ts.map +1 -0
  14. package/dist/src/ner/classifier.d.ts +92 -0
  15. package/dist/src/ner/classifier.d.ts.map +1 -0
  16. package/dist/src/ner/worker.d.ts +44 -0
  17. package/dist/src/ner/worker.d.ts.map +1 -0
  18. package/dist/src/ner/worker.js +35302 -0
  19. package/dist/src/ner/worker.js.map +30 -0
  20. package/dist/src/pipeline.d.ts +76 -0
  21. package/dist/src/pipeline.d.ts.map +1 -0
  22. package/dist/src/policy.d.ts +27 -0
  23. package/dist/src/policy.d.ts.map +1 -0
  24. package/dist/src/premask.d.ts +48 -0
  25. package/dist/src/premask.d.ts.map +1 -0
  26. package/dist/src/session.d.ts +60 -0
  27. package/dist/src/session.d.ts.map +1 -0
  28. package/dist/src/streaming.d.ts +32 -0
  29. package/dist/src/streaming.d.ts.map +1 -0
  30. package/dist/src/types.d.ts +43 -0
  31. package/dist/src/types.d.ts.map +1 -0
  32. package/dist/src/validators.d.ts +16 -0
  33. package/dist/src/validators.d.ts.map +1 -0
  34. package/eval/bench/README.md +91 -0
  35. package/eval/bench/fetch.ts +152 -0
  36. package/eval/bench/labels.ts +45 -0
  37. package/eval/bench/run.ts +146 -0
  38. package/eval/bench/runs/m06-v3-30k/by_language.json +303 -0
  39. package/eval/bench/runs/m06-v3-30k/summary.json +56 -0
  40. package/eval/bench/runs/sample-900/by_language.json +303 -0
  41. package/eval/bench/runs/sample-900/manifest.json +926 -0
  42. package/eval/bench/runs/sample-900/summary.json +56 -0
  43. package/eval/bench/score.ts +197 -0
  44. package/eval/bench/webgpu/entry.ts +70 -0
  45. package/eval/bench/webgpu/index.html +12 -0
  46. package/eval/bench/webgpu.ts +209 -0
  47. package/eval/public-cases.ts +412 -0
  48. package/eval/run-public-eval.ts +140 -0
  49. package/examples/basic-chat.ts +12 -0
  50. package/examples/pii-worker.ts +3 -0
  51. package/index.ts +47 -0
  52. package/package.json +103 -0
  53. package/src/guard.ts +170 -0
  54. package/src/heuristics.ts +141 -0
  55. package/src/ner/classifier.ts +580 -0
  56. package/src/ner/worker.ts +130 -0
  57. package/src/policy.ts +64 -0
  58. package/src/premask.ts +90 -0
  59. package/src/session.ts +99 -0
  60. package/src/streaming.ts +73 -0
  61. package/src/types.ts +74 -0
  62. package/src/validators.ts +40 -0
@@ -0,0 +1,146 @@
1
+ /**
2
+ * The native bench: run the shipped pipeline over the frozen held-out set and
3
+ * emit `summary.json` / `by_language.json`. The predictions come from the exact
4
+ * TypeScript code that ships, scored by the TypeScript code under test.
5
+ *
6
+ * bun eval/bench/run.ts --out eval/bench/runs/native
7
+ */
8
+
9
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
10
+ import { join } from "node:path";
11
+ import { detectHeuristics } from "../../src/heuristics";
12
+ import { detectNer, loadNerClassifier, RAMPART_MODEL_ID } from "../../src/ner/classifier";
13
+ import { applyPolicy } from "../../src/policy";
14
+ import { premask, projectMaskedSpan } from "../../src/premask";
15
+ import type { Span } from "../../src/types";
16
+ import { mapOpenPiiLabel } from "./labels";
17
+ import { type GoldSpan, type PredSpan, percentile, redactText, Stratum, termPresence, expectedCalibrationError } from "./score";
18
+
19
+ const IOU_THRESHOLDS = [1, 0.5, 0];
20
+
21
+ interface Row {
22
+ uid: number;
23
+ language: string;
24
+ source_text: string;
25
+ privacy_mask: { label: string; start: number; end: number; value: string }[];
26
+ }
27
+
28
+ function arg(name: string, fallback: string): string {
29
+ const eq = Bun.argv.find((a) => a.startsWith(`--${name}=`));
30
+ if (eq) return eq.slice(`--${name}=`.length);
31
+ const idx = Bun.argv.indexOf(`--${name}`);
32
+ return idx >= 0 && Bun.argv[idx + 1] && !Bun.argv[idx + 1].startsWith("--") ? Bun.argv[idx + 1] : fallback;
33
+ }
34
+
35
+ /** Build gold terms + redact-target spans for one row from the OpenPII privacy mask. */
36
+ function buildGold(row: Row): { priv: string[]; pub: string[]; spans: GoldSpan[] } {
37
+ const priv: string[] = [];
38
+ const pub: string[] = [];
39
+ const spans: GoldSpan[] = [];
40
+ for (const entry of row.privacy_mask) {
41
+ const value = (entry.value ?? "").trim();
42
+ if (!value || !row.source_text.includes(value)) continue;
43
+ const ours = mapOpenPiiLabel(entry.label);
44
+ if (ours === "O") {
45
+ pub.push(value);
46
+ } else {
47
+ priv.push(value);
48
+ const start = row.source_text.indexOf(value);
49
+ spans.push({ label: ours, start, end: start + value.length });
50
+ }
51
+ }
52
+ return { priv, pub, spans };
53
+ }
54
+
55
+ async function main(): Promise<void> {
56
+ const dataPath = arg("data", "eval/bench/data/heldout.jsonl");
57
+ const outDir = arg("out", "eval/bench/runs/native");
58
+ const noPrefilter = process.argv.includes("--no-prefilter");
59
+ const modelOnly = process.argv.includes("--model-only");
60
+
61
+ const model = arg("model", RAMPART_MODEL_ID);
62
+ const rows: Row[] = (await readFile(dataPath, "utf8")).trim().split("\n").map((l) => JSON.parse(l));
63
+ const classifier = await loadNerClassifier({ model, device: "cpu" });
64
+
65
+ const overall = new Stratum();
66
+ const byLang = new Map<string, Stratum>();
67
+ const latencies: number[] = [];
68
+ const calibration: [number, boolean][] = [];
69
+
70
+ for (const row of rows) {
71
+ const { priv, pub, spans: gold } = buildGold(row);
72
+
73
+ const t0 = performance.now();
74
+ // Match the shipped runtime: premask SSN/CC/IP_ADDRESS to sentinels before
75
+ // the model unless --no-prefilter is set (npf models trained on raw digits).
76
+ // --model-only skips heuristics entirely (model is the only detector).
77
+ const heuristic = modelOnly ? [] : detectHeuristics(row.source_text);
78
+ let modelSpans: Span[];
79
+ if (noPrefilter || modelOnly) {
80
+ modelSpans = await detectNer(row.source_text, classifier);
81
+ } else {
82
+ const map = premask(row.source_text, heuristic);
83
+ const masked = await detectNer(map.masked, classifier);
84
+ modelSpans = [];
85
+ for (const s of masked) {
86
+ const projected = projectMaskedSpan(s, row.source_text, map);
87
+ if (projected !== null) modelSpans.push(projected);
88
+ }
89
+ }
90
+ const detected: Span[] = [...heuristic, ...modelSpans];
91
+ const redactSpans = applyPolicy(detected); // non-overlapping, keep-set filtered — exactly what ships
92
+ latencies.push(performance.now() - t0);
93
+
94
+ const pred: PredSpan[] = redactSpans.map((s) => ({ label: s.label, start: s.start, end: s.end, score: s.score }));
95
+ const redacted = redactText(row.source_text, redactSpans);
96
+
97
+ const term = termPresence(redacted, priv, pub);
98
+ overall.addTerm(term);
99
+ overall.addSpans(gold, pred, IOU_THRESHOLDS);
100
+ const stratum = byLang.get(row.language) ?? new Stratum();
101
+ stratum.addTerm(term);
102
+ stratum.addSpans(gold, pred, IOU_THRESHOLDS);
103
+ byLang.set(row.language, stratum);
104
+
105
+ // Calibration: per predicted span, score vs. whether it hit a gold span (IoU>=0.5).
106
+ for (const p of pred) {
107
+ const correct = gold.some((g) => g.label === p.label && spanIou(p, g) >= 0.5);
108
+ calibration.push([Math.min(1, Math.max(0, p.score)), correct]);
109
+ }
110
+ }
111
+
112
+ latencies.sort((a, b) => a - b);
113
+ const summary = {
114
+ ...overall.report(),
115
+ calibration: { ece: expectedCalibrationError(calibration), n_pairs: calibration.length },
116
+ latency_ms: {
117
+ cold: latencies[latencies.length - 1] ?? 0,
118
+ p50: percentile(latencies, 0.5),
119
+ p95: percentile(latencies, 0.95),
120
+ p99: percentile(latencies, 0.99),
121
+ mean: latencies.reduce((a, b) => a + b, 0) / (latencies.length || 1),
122
+ },
123
+ model,
124
+ rows_scored: rows.length,
125
+ };
126
+ const byLanguage = Object.fromEntries([...byLang.entries()].sort().map(([lang, s]) => [lang, s.report()]));
127
+
128
+ await mkdir(outDir, { recursive: true });
129
+ await writeFile(join(outDir, "summary.json"), JSON.stringify(summary, null, 2) + "\n");
130
+ await writeFile(join(outDir, "by_language.json"), JSON.stringify(byLanguage, null, 2) + "\n");
131
+
132
+ const r = summary as Record<string, any>;
133
+ console.log(`\nRows ${r.rows} private ${r.private_total} public ${r.public_total}`);
134
+ console.log(`Private recall ${(r.private_recall * 100).toFixed(2)}% Wilson95 [${(r.private_recall_wilson95[0] * 100).toFixed(2)}, ${(r.private_recall_wilson95[1] * 100).toFixed(2)}] (leaked ${r.leaked})`);
135
+ console.log(`Public retention ${(r.public_retained * 100).toFixed(2)}%`);
136
+ console.log(`Span-F1 strict ${r.span_f1["iou_1.00"].f1.toFixed(3)} latency p50 ${r.latency_ms.p50.toFixed(1)} ms`);
137
+ console.log(`By language: ${[...byLang.entries()].sort().map(([l, s]) => { const x = s.report() as any; return `${l} ${(x.private_recall * 100).toFixed(1)}%`; }).join(" ")}`);
138
+ console.log(`\nwrote ${join(outDir, "summary.json")} + by_language.json`);
139
+ }
140
+
141
+ function spanIou(a: GoldSpan, b: GoldSpan): number {
142
+ const inter = Math.max(0, Math.min(a.end, b.end) - Math.max(a.start, b.start));
143
+ return inter === 0 ? 0 : inter / (Math.max(a.end, b.end) - Math.min(a.start, b.start));
144
+ }
145
+
146
+ await main();
@@ -0,0 +1,303 @@
1
+ {
2
+ "de": {
3
+ "rows": 4260,
4
+ "private_total": 17347,
5
+ "private_recall": 0.9794200726350377,
6
+ "private_recall_wilson95": [
7
+ 0.977198733783712,
8
+ 0.9814291174439769
9
+ ],
10
+ "private_recall_bootstrap95": [
11
+ 0.9772871389865683,
12
+ 0.9816682999942353
13
+ ],
14
+ "leaked": 357,
15
+ "public_total": 11464,
16
+ "public_retained": 0.9166085136078158,
17
+ "over_redacted": 956,
18
+ "span_f1": {
19
+ "iou_1.00": {
20
+ "tp": 8626,
21
+ "fp": 7911,
22
+ "fn": 8721,
23
+ "precision": 0.5216181895144222,
24
+ "recall": 0.49726177437020813,
25
+ "f1": 0.5091488608192657
26
+ },
27
+ "iou_0.50": {
28
+ "tp": 10797,
29
+ "fp": 5740,
30
+ "fn": 6550,
31
+ "precision": 0.6528995585656406,
32
+ "recall": 0.6224130973655387,
33
+ "f1": 0.6372919371974973
34
+ },
35
+ "iou_0.00": {
36
+ "tp": 12407,
37
+ "fp": 4130,
38
+ "fn": 4940,
39
+ "precision": 0.7502569994557658,
40
+ "recall": 0.7152245345016429,
41
+ "f1": 0.7323220399008381
42
+ }
43
+ }
44
+ },
45
+ "en": {
46
+ "rows": 11569,
47
+ "private_total": 53877,
48
+ "private_recall": 0.9885294281418787,
49
+ "private_recall_wilson95": [
50
+ 0.9875947848056238,
51
+ 0.9893944090676955
52
+ ],
53
+ "private_recall_bootstrap95": [
54
+ 0.9876570707351932,
55
+ 0.9894574679362251
56
+ ],
57
+ "leaked": 618,
58
+ "public_total": 35153,
59
+ "public_retained": 0.9053565840753278,
60
+ "over_redacted": 3327,
61
+ "span_f1": {
62
+ "iou_1.00": {
63
+ "tp": 28381,
64
+ "fp": 21760,
65
+ "fn": 25496,
66
+ "precision": 0.5660238128477693,
67
+ "recall": 0.5267739480668931,
68
+ "f1": 0.5456940144974908
69
+ },
70
+ "iou_0.50": {
71
+ "tp": 34591,
72
+ "fp": 15550,
73
+ "fn": 19286,
74
+ "precision": 0.6898745537584013,
75
+ "recall": 0.6420364905247137,
76
+ "f1": 0.665096425618643
77
+ },
78
+ "iou_0.00": {
79
+ "tp": 40179,
80
+ "fp": 9962,
81
+ "fn": 13698,
82
+ "precision": 0.8013202768193693,
83
+ "recall": 0.7457542179408653,
84
+ "f1": 0.7725393681862754
85
+ }
86
+ }
87
+ },
88
+ "es": {
89
+ "rows": 3234,
90
+ "private_total": 13736,
91
+ "private_recall": 0.988351776354106,
92
+ "private_recall_wilson95": [
93
+ 0.9864159298109118,
94
+ 0.9900145408285751
95
+ ],
96
+ "private_recall_bootstrap95": [
97
+ 0.9865317414094351,
98
+ 0.9900262085032032
99
+ ],
100
+ "leaked": 160,
101
+ "public_total": 9160,
102
+ "public_retained": 0.9161572052401746,
103
+ "over_redacted": 768,
104
+ "span_f1": {
105
+ "iou_1.00": {
106
+ "tp": 6958,
107
+ "fp": 6030,
108
+ "fn": 6778,
109
+ "precision": 0.5357252848783493,
110
+ "recall": 0.5065521258008153,
111
+ "f1": 0.5207304295764107
112
+ },
113
+ "iou_0.50": {
114
+ "tp": 8954,
115
+ "fp": 4034,
116
+ "fn": 4782,
117
+ "precision": 0.6894056051740067,
118
+ "recall": 0.651863715783343,
119
+ "f1": 0.6701092650800778
120
+ },
121
+ "iou_0.00": {
122
+ "tp": 10330,
123
+ "fp": 2658,
124
+ "fn": 3406,
125
+ "precision": 0.795349553433939,
126
+ "recall": 0.7520384391380315,
127
+ "f1": 0.7730878610986379
128
+ }
129
+ }
130
+ },
131
+ "fr": {
132
+ "rows": 4708,
133
+ "private_total": 19906,
134
+ "private_recall": 0.9840751532201346,
135
+ "private_recall_wilson95": [
136
+ 0.9822403470424554,
137
+ 0.9857231549876158
138
+ ],
139
+ "private_recall_bootstrap95": [
140
+ 0.9823671254898021,
141
+ 0.9857831809504672
142
+ ],
143
+ "leaked": 317,
144
+ "public_total": 13740,
145
+ "public_retained": 0.9282387190684134,
146
+ "over_redacted": 986,
147
+ "span_f1": {
148
+ "iou_1.00": {
149
+ "tp": 10066,
150
+ "fp": 8581,
151
+ "fn": 9840,
152
+ "precision": 0.5398187375985413,
153
+ "recall": 0.50567668039787,
154
+ "f1": 0.5221902316291859
155
+ },
156
+ "iou_0.50": {
157
+ "tp": 12725,
158
+ "fp": 5922,
159
+ "fn": 7181,
160
+ "precision": 0.682415401941331,
161
+ "recall": 0.6392544961318195,
162
+ "f1": 0.6601302103597644
163
+ },
164
+ "iou_0.00": {
165
+ "tp": 14517,
166
+ "fp": 4130,
167
+ "fn": 5389,
168
+ "precision": 0.7785166514720867,
169
+ "recall": 0.7292776047422888,
170
+ "f1": 0.7530931445023734
171
+ }
172
+ }
173
+ },
174
+ "it": {
175
+ "rows": 3218,
176
+ "private_total": 13855,
177
+ "private_recall": 0.9782749909779863,
178
+ "private_recall_wilson95": [
179
+ 0.9757116115600604,
180
+ 0.9805732196339119
181
+ ],
182
+ "private_recall_bootstrap95": [
183
+ 0.975821003247925,
184
+ 0.9807289787080476
185
+ ],
186
+ "leaked": 301,
187
+ "public_total": 9198,
188
+ "public_retained": 0.9406392694063926,
189
+ "over_redacted": 546,
190
+ "span_f1": {
191
+ "iou_1.00": {
192
+ "tp": 6962,
193
+ "fp": 6111,
194
+ "fn": 6893,
195
+ "precision": 0.5325479996940259,
196
+ "recall": 0.5024900757849152,
197
+ "f1": 0.5170825906120025
198
+ },
199
+ "iou_0.50": {
200
+ "tp": 8880,
201
+ "fp": 4193,
202
+ "fn": 4975,
203
+ "precision": 0.6792626023101048,
204
+ "recall": 0.6409238542042583,
205
+ "f1": 0.659536541889483
206
+ },
207
+ "iou_0.00": {
208
+ "tp": 10122,
209
+ "fp": 2951,
210
+ "fn": 3733,
211
+ "precision": 0.7742675743899641,
212
+ "recall": 0.7305665824612053,
213
+ "f1": 0.7517825311942958
214
+ }
215
+ }
216
+ },
217
+ "nl": {
218
+ "rows": 1526,
219
+ "private_total": 6519,
220
+ "private_recall": 0.9720816076085289,
221
+ "private_recall_wilson95": [
222
+ 0.9677960000335368,
223
+ 0.9758111542471986
224
+ ],
225
+ "private_recall_bootstrap95": [
226
+ 0.9682466635987115,
227
+ 0.9762233471391317
228
+ ],
229
+ "leaked": 182,
230
+ "public_total": 4302,
231
+ "public_retained": 0.9193398419339842,
232
+ "over_redacted": 347,
233
+ "span_f1": {
234
+ "iou_1.00": {
235
+ "tp": 3334,
236
+ "fp": 2747,
237
+ "fn": 3185,
238
+ "precision": 0.5482650879789508,
239
+ "recall": 0.5114281331492561,
240
+ "f1": 0.5292063492063492
241
+ },
242
+ "iou_0.50": {
243
+ "tp": 4061,
244
+ "fp": 2020,
245
+ "fn": 2458,
246
+ "precision": 0.6678177931261305,
247
+ "recall": 0.6229483049547476,
248
+ "f1": 0.6446031746031745
249
+ },
250
+ "iou_0.00": {
251
+ "tp": 4631,
252
+ "fp": 1450,
253
+ "fn": 1888,
254
+ "precision": 0.7615523762539056,
255
+ "recall": 0.7103850283785856,
256
+ "f1": 0.7350793650793651
257
+ }
258
+ }
259
+ },
260
+ "pt": {
261
+ "rows": 1485,
262
+ "private_total": 6467,
263
+ "private_recall": 0.9772692129271687,
264
+ "private_recall_wilson95": [
265
+ 0.9733432981324541,
266
+ 0.9806284387124102
267
+ ],
268
+ "private_recall_bootstrap95": [
269
+ 0.9737126952218957,
270
+ 0.9808257306324416
271
+ ],
272
+ "leaked": 147,
273
+ "public_total": 4190,
274
+ "public_retained": 0.9250596658711218,
275
+ "over_redacted": 314,
276
+ "span_f1": {
277
+ "iou_1.00": {
278
+ "tp": 3012,
279
+ "fp": 2683,
280
+ "fn": 3455,
281
+ "precision": 0.5288849868305531,
282
+ "recall": 0.46574918818617594,
283
+ "f1": 0.4953132708436112
284
+ },
285
+ "iou_0.50": {
286
+ "tp": 3707,
287
+ "fp": 1988,
288
+ "fn": 2760,
289
+ "precision": 0.6509218612818262,
290
+ "recall": 0.5732178753672491,
291
+ "f1": 0.6096036836046703
292
+ },
293
+ "iou_0.00": {
294
+ "tp": 4375,
295
+ "fp": 1320,
296
+ "fn": 2092,
297
+ "precision": 0.7682177348551361,
298
+ "recall": 0.676511520024741,
299
+ "f1": 0.71945403716494
300
+ }
301
+ }
302
+ }
303
+ }
@@ -0,0 +1,56 @@
1
+ {
2
+ "rows": 30000,
3
+ "private_total": 131707,
4
+ "private_recall": 0.9841921841663693,
5
+ "private_recall_wilson95": [
6
+ 0.9835042840945342,
7
+ 0.9848518394457167
8
+ ],
9
+ "private_recall_bootstrap95": [
10
+ 0.9835088491879702,
11
+ 0.9848451487012839
12
+ ],
13
+ "leaked": 2082,
14
+ "public_total": 87207,
15
+ "public_retained": 0.9169332737050925,
16
+ "over_redacted": 7244,
17
+ "span_f1": {
18
+ "iou_1.00": {
19
+ "tp": 67339,
20
+ "fp": 55823,
21
+ "fn": 64368,
22
+ "precision": 0.546751433071889,
23
+ "recall": 0.5112788234490194,
24
+ "f1": 0.5284204826793372
25
+ },
26
+ "iou_0.50": {
27
+ "tp": 83715,
28
+ "fp": 39447,
29
+ "fn": 47992,
30
+ "precision": 0.6797145223364349,
31
+ "recall": 0.6356154190741571,
32
+ "f1": 0.656925714778965
33
+ },
34
+ "iou_0.00": {
35
+ "tp": 96561,
36
+ "fp": 26601,
37
+ "fn": 35146,
38
+ "precision": 0.784016173819847,
39
+ "recall": 0.7331500983243108,
40
+ "f1": 0.7577304419132965
41
+ }
42
+ },
43
+ "calibration": {
44
+ "ece": 0.291236456105258,
45
+ "n_pairs": 123162
46
+ },
47
+ "latency_ms": {
48
+ "cold": 62.28677499999992,
49
+ "p50": 6.577430999997887,
50
+ "p95": 12.474294999999984,
51
+ "p99": 15.152568000004976,
52
+ "mean": 7.214940486199942
53
+ },
54
+ "model": "./model",
55
+ "rows_scored": 30000
56
+ }