npm - @nationaldesignstudio/rampart - Versions diffs - 0.1.1 - Mend

@nationaldesignstudio/rampart 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/LICENSE +402 -0
package/MODEL_CARD.md +422 -0
package/README.md +279 -0
package/RELEASE.md +97 -0
package/WHITEPAPER.md +316 -0
package/dist/index.d.ts +23 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +35639 -0
package/dist/index.js.map +36 -0
package/dist/src/guard.d.ts +94 -0
package/dist/src/guard.d.ts.map +1 -0
package/dist/src/heuristics.d.ts +14 -0
package/dist/src/heuristics.d.ts.map +1 -0
package/dist/src/ner/classifier.d.ts +92 -0
package/dist/src/ner/classifier.d.ts.map +1 -0
package/dist/src/ner/worker.d.ts +44 -0
package/dist/src/ner/worker.d.ts.map +1 -0
package/dist/src/ner/worker.js +35302 -0
package/dist/src/ner/worker.js.map +30 -0
package/dist/src/pipeline.d.ts +76 -0
package/dist/src/pipeline.d.ts.map +1 -0
package/dist/src/policy.d.ts +27 -0
package/dist/src/policy.d.ts.map +1 -0
package/dist/src/premask.d.ts +48 -0
package/dist/src/premask.d.ts.map +1 -0
package/dist/src/session.d.ts +60 -0
package/dist/src/session.d.ts.map +1 -0
package/dist/src/streaming.d.ts +32 -0
package/dist/src/streaming.d.ts.map +1 -0
package/dist/src/types.d.ts +43 -0
package/dist/src/types.d.ts.map +1 -0
package/dist/src/validators.d.ts +16 -0
package/dist/src/validators.d.ts.map +1 -0
package/eval/bench/README.md +91 -0
package/eval/bench/fetch.ts +152 -0
package/eval/bench/labels.ts +45 -0
package/eval/bench/run.ts +146 -0
package/eval/bench/runs/m06-v3-30k/by_language.json +303 -0
package/eval/bench/runs/m06-v3-30k/summary.json +56 -0
package/eval/bench/runs/sample-900/by_language.json +303 -0
package/eval/bench/runs/sample-900/manifest.json +926 -0
package/eval/bench/runs/sample-900/summary.json +56 -0
package/eval/bench/score.ts +197 -0
package/eval/bench/webgpu/entry.ts +70 -0
package/eval/bench/webgpu/index.html +12 -0
package/eval/bench/webgpu.ts +209 -0
package/eval/public-cases.ts +412 -0
package/eval/run-public-eval.ts +140 -0
package/examples/basic-chat.ts +12 -0
package/examples/pii-worker.ts +3 -0
package/index.ts +47 -0
package/package.json +103 -0
package/src/guard.ts +170 -0
package/src/heuristics.ts +141 -0
package/src/ner/classifier.ts +580 -0
package/src/ner/worker.ts +130 -0
package/src/policy.ts +64 -0
package/src/premask.ts +90 -0
package/src/session.ts +99 -0
package/src/streaming.ts +73 -0
package/src/types.ts +74 -0
package/src/validators.ts +40 -0

package/eval/bench/run.ts ADDED Viewed

@@ -0,0 +1,146 @@
+/**
+ * The native bench: run the shipped pipeline over the frozen held-out set and
+ * emit `summary.json` / `by_language.json`. The predictions come from the exact
+ * TypeScript code that ships, scored by the TypeScript code under test.
+ *
+ *   bun eval/bench/run.ts --out eval/bench/runs/native
+ */
+import { mkdir, readFile, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import { detectHeuristics } from "../../src/heuristics";
+import { detectNer, loadNerClassifier, RAMPART_MODEL_ID } from "../../src/ner/classifier";
+import { applyPolicy } from "../../src/policy";
+import { premask, projectMaskedSpan } from "../../src/premask";
+import type { Span } from "../../src/types";
+import { mapOpenPiiLabel } from "./labels";
+import { type GoldSpan, type PredSpan, percentile, redactText, Stratum, termPresence, expectedCalibrationError } from "./score";
+const IOU_THRESHOLDS = [1, 0.5, 0];
+interface Row {
+  uid: number;
+  language: string;
+  source_text: string;
+  privacy_mask: { label: string; start: number; end: number; value: string }[];
+}
+function arg(name: string, fallback: string): string {
+  const eq = Bun.argv.find((a) => a.startsWith(`--${name}=`));
+  if (eq) return eq.slice(`--${name}=`.length);
+  const idx = Bun.argv.indexOf(`--${name}`);
+  return idx >= 0 && Bun.argv[idx + 1] && !Bun.argv[idx + 1].startsWith("--") ? Bun.argv[idx + 1] : fallback;
+}
+/** Build gold terms + redact-target spans for one row from the OpenPII privacy mask. */
+function buildGold(row: Row): { priv: string[]; pub: string[]; spans: GoldSpan[] } {
+  const priv: string[] = [];
+  const pub: string[] = [];
+  const spans: GoldSpan[] = [];
+  for (const entry of row.privacy_mask) {
+    const value = (entry.value ?? "").trim();
+    if (!value || !row.source_text.includes(value)) continue;
+    const ours = mapOpenPiiLabel(entry.label);
+    if (ours === "O") {
+      pub.push(value);
+    } else {
+      priv.push(value);
+      const start = row.source_text.indexOf(value);
+      spans.push({ label: ours, start, end: start + value.length });
+    }
+  }
+  return { priv, pub, spans };
+}
+async function main(): Promise<void> {
+  const dataPath = arg("data", "eval/bench/data/heldout.jsonl");
+  const outDir = arg("out", "eval/bench/runs/native");
+  const noPrefilter = process.argv.includes("--no-prefilter");
+  const modelOnly = process.argv.includes("--model-only");
+  const model = arg("model", RAMPART_MODEL_ID);
+  const rows: Row[] = (await readFile(dataPath, "utf8")).trim().split("\n").map((l) => JSON.parse(l));
+  const classifier = await loadNerClassifier({ model, device: "cpu" });
+  const overall = new Stratum();
+  const byLang = new Map<string, Stratum>();
+  const latencies: number[] = [];
+  const calibration: [number, boolean][] = [];
+  for (const row of rows) {
+    const { priv, pub, spans: gold } = buildGold(row);
+    const t0 = performance.now();
+    // Match the shipped runtime: premask SSN/CC/IP_ADDRESS to sentinels before
+    // the model unless --no-prefilter is set (npf models trained on raw digits).
+    // --model-only skips heuristics entirely (model is the only detector).
+    const heuristic = modelOnly ? [] : detectHeuristics(row.source_text);
+    let modelSpans: Span[];
+    if (noPrefilter || modelOnly) {
+      modelSpans = await detectNer(row.source_text, classifier);
+    } else {
+      const map = premask(row.source_text, heuristic);
+      const masked = await detectNer(map.masked, classifier);
+      modelSpans = [];
+      for (const s of masked) {
+        const projected = projectMaskedSpan(s, row.source_text, map);
+        if (projected !== null) modelSpans.push(projected);
+      }
+    }
+    const detected: Span[] = [...heuristic, ...modelSpans];
+    const redactSpans = applyPolicy(detected); // non-overlapping, keep-set filtered — exactly what ships
+    latencies.push(performance.now() - t0);
+    const pred: PredSpan[] = redactSpans.map((s) => ({ label: s.label, start: s.start, end: s.end, score: s.score }));
+    const redacted = redactText(row.source_text, redactSpans);
+    const term = termPresence(redacted, priv, pub);
+    overall.addTerm(term);
+    overall.addSpans(gold, pred, IOU_THRESHOLDS);
+    const stratum = byLang.get(row.language) ?? new Stratum();
+    stratum.addTerm(term);
+    stratum.addSpans(gold, pred, IOU_THRESHOLDS);
+    byLang.set(row.language, stratum);
+    // Calibration: per predicted span, score vs. whether it hit a gold span (IoU>=0.5).
+    for (const p of pred) {
+      const correct = gold.some((g) => g.label === p.label && spanIou(p, g) >= 0.5);
+      calibration.push([Math.min(1, Math.max(0, p.score)), correct]);
+    }
+  }
+  latencies.sort((a, b) => a - b);
+  const summary = {
+    ...overall.report(),
+    calibration: { ece: expectedCalibrationError(calibration), n_pairs: calibration.length },
+    latency_ms: {
+      cold: latencies[latencies.length - 1] ?? 0,
+      p50: percentile(latencies, 0.5),
+      p95: percentile(latencies, 0.95),
+      p99: percentile(latencies, 0.99),
+      mean: latencies.reduce((a, b) => a + b, 0) / (latencies.length || 1),
+    },
+    model,
+    rows_scored: rows.length,
+  };
+  const byLanguage = Object.fromEntries([...byLang.entries()].sort().map(([lang, s]) => [lang, s.report()]));
+  await mkdir(outDir, { recursive: true });
+  await writeFile(join(outDir, "summary.json"), JSON.stringify(summary, null, 2) + "\n");
+  await writeFile(join(outDir, "by_language.json"), JSON.stringify(byLanguage, null, 2) + "\n");
+  const r = summary as Record<string, any>;
+  console.log(`\nRows ${r.rows}  private ${r.private_total}  public ${r.public_total}`);
+  console.log(`Private recall   ${(r.private_recall * 100).toFixed(2)}%  Wilson95 [${(r.private_recall_wilson95[0] * 100).toFixed(2)}, ${(r.private_recall_wilson95[1] * 100).toFixed(2)}]  (leaked ${r.leaked})`);
+  console.log(`Public retention ${(r.public_retained * 100).toFixed(2)}%`);
+  console.log(`Span-F1 strict   ${r.span_f1["iou_1.00"].f1.toFixed(3)}   latency p50 ${r.latency_ms.p50.toFixed(1)} ms`);
+  console.log(`By language: ${[...byLang.entries()].sort().map(([l, s]) => { const x = s.report() as any; return `${l} ${(x.private_recall * 100).toFixed(1)}%`; }).join("  ")}`);
+  console.log(`\nwrote ${join(outDir, "summary.json")} + by_language.json`);
+}
+function spanIou(a: GoldSpan, b: GoldSpan): number {
+  const inter = Math.max(0, Math.min(a.end, b.end) - Math.max(a.start, b.start));
+  return inter === 0 ? 0 : inter / (Math.max(a.end, b.end) - Math.min(a.start, b.start));
+}
+await main();

package/eval/bench/runs/m06-v3-30k/by_language.json ADDED Viewed

@@ -0,0 +1,303 @@
+{
+  "de": {
+    "rows": 4260,
+    "private_total": 17347,
+    "private_recall": 0.9794200726350377,
+    "private_recall_wilson95": [
+      0.977198733783712,
+      0.9814291174439769
+    ],
+    "private_recall_bootstrap95": [
+      0.9772871389865683,
+      0.9816682999942353
+    ],
+    "leaked": 357,
+    "public_total": 11464,
+    "public_retained": 0.9166085136078158,
+    "over_redacted": 956,
+    "span_f1": {
+      "iou_1.00": {
+        "tp": 8626,
+        "fp": 7911,
+        "fn": 8721,
+        "precision": 0.5216181895144222,
+        "recall": 0.49726177437020813,
+        "f1": 0.5091488608192657
+      },
+      "iou_0.50": {
+        "tp": 10797,
+        "fp": 5740,
+        "fn": 6550,
+        "precision": 0.6528995585656406,
+        "recall": 0.6224130973655387,
+        "f1": 0.6372919371974973
+      },
+      "iou_0.00": {
+        "tp": 12407,
+        "fp": 4130,
+        "fn": 4940,
+        "precision": 0.7502569994557658,
+        "recall": 0.7152245345016429,
+        "f1": 0.7323220399008381
+      }
+    }
+  },
+  "en": {
+    "rows": 11569,
+    "private_total": 53877,
+    "private_recall": 0.9885294281418787,
+    "private_recall_wilson95": [
+      0.9875947848056238,
+      0.9893944090676955
+    ],
+    "private_recall_bootstrap95": [
+      0.9876570707351932,
+      0.9894574679362251
+    ],
+    "leaked": 618,
+    "public_total": 35153,
+    "public_retained": 0.9053565840753278,
+    "over_redacted": 3327,
+    "span_f1": {
+      "iou_1.00": {
+        "tp": 28381,
+        "fp": 21760,
+        "fn": 25496,
+        "precision": 0.5660238128477693,
+        "recall": 0.5267739480668931,
+        "f1": 0.5456940144974908
+      },
+      "iou_0.50": {
+        "tp": 34591,
+        "fp": 15550,
+        "fn": 19286,
+        "precision": 0.6898745537584013,
+        "recall": 0.6420364905247137,
+        "f1": 0.665096425618643
+      },
+      "iou_0.00": {
+        "tp": 40179,
+        "fp": 9962,
+        "fn": 13698,
+        "precision": 0.8013202768193693,
+        "recall": 0.7457542179408653,
+        "f1": 0.7725393681862754
+      }
+    }
+  },
+  "es": {
+    "rows": 3234,
+    "private_total": 13736,
+    "private_recall": 0.988351776354106,
+    "private_recall_wilson95": [
+      0.9864159298109118,
+      0.9900145408285751
+    ],
+    "private_recall_bootstrap95": [
+      0.9865317414094351,
+      0.9900262085032032
+    ],
+    "leaked": 160,
+    "public_total": 9160,
+    "public_retained": 0.9161572052401746,
+    "over_redacted": 768,
+    "span_f1": {
+      "iou_1.00": {
+        "tp": 6958,
+        "fp": 6030,
+        "fn": 6778,
+        "precision": 0.5357252848783493,
+        "recall": 0.5065521258008153,
+        "f1": 0.5207304295764107
+      },
+      "iou_0.50": {
+        "tp": 8954,
+        "fp": 4034,
+        "fn": 4782,
+        "precision": 0.6894056051740067,
+        "recall": 0.651863715783343,
+        "f1": 0.6701092650800778
+      },
+      "iou_0.00": {
+        "tp": 10330,
+        "fp": 2658,
+        "fn": 3406,
+        "precision": 0.795349553433939,
+        "recall": 0.7520384391380315,
+        "f1": 0.7730878610986379
+      }
+    }
+  },
+  "fr": {
+    "rows": 4708,
+    "private_total": 19906,
+    "private_recall": 0.9840751532201346,
+    "private_recall_wilson95": [
+      0.9822403470424554,
+      0.9857231549876158
+    ],
+    "private_recall_bootstrap95": [
+      0.9823671254898021,
+      0.9857831809504672
+    ],
+    "leaked": 317,
+    "public_total": 13740,
+    "public_retained": 0.9282387190684134,
+    "over_redacted": 986,
+    "span_f1": {
+      "iou_1.00": {
+        "tp": 10066,
+        "fp": 8581,
+        "fn": 9840,
+        "precision": 0.5398187375985413,
+        "recall": 0.50567668039787,
+        "f1": 0.5221902316291859
+      },
+      "iou_0.50": {
+        "tp": 12725,
+        "fp": 5922,
+        "fn": 7181,
+        "precision": 0.682415401941331,
+        "recall": 0.6392544961318195,
+        "f1": 0.6601302103597644
+      },
+      "iou_0.00": {
+        "tp": 14517,
+        "fp": 4130,
+        "fn": 5389,
+        "precision": 0.7785166514720867,
+        "recall": 0.7292776047422888,
+        "f1": 0.7530931445023734
+      }
+    }
+  },
+  "it": {
+    "rows": 3218,
+    "private_total": 13855,
+    "private_recall": 0.9782749909779863,
+    "private_recall_wilson95": [
+      0.9757116115600604,
+      0.9805732196339119
+    ],
+    "private_recall_bootstrap95": [
+      0.975821003247925,
+      0.9807289787080476
+    ],
+    "leaked": 301,
+    "public_total": 9198,
+    "public_retained": 0.9406392694063926,
+    "over_redacted": 546,
+    "span_f1": {
+      "iou_1.00": {
+        "tp": 6962,
+        "fp": 6111,
+        "fn": 6893,
+        "precision": 0.5325479996940259,
+        "recall": 0.5024900757849152,
+        "f1": 0.5170825906120025
+      },
+      "iou_0.50": {
+        "tp": 8880,
+        "fp": 4193,
+        "fn": 4975,
+        "precision": 0.6792626023101048,
+        "recall": 0.6409238542042583,
+        "f1": 0.659536541889483
+      },
+      "iou_0.00": {
+        "tp": 10122,
+        "fp": 2951,
+        "fn": 3733,
+        "precision": 0.7742675743899641,
+        "recall": 0.7305665824612053,
+        "f1": 0.7517825311942958
+      }
+    }
+  },
+  "nl": {
+    "rows": 1526,
+    "private_total": 6519,
+    "private_recall": 0.9720816076085289,
+    "private_recall_wilson95": [
+      0.9677960000335368,
+      0.9758111542471986
+    ],
+    "private_recall_bootstrap95": [
+      0.9682466635987115,
+      0.9762233471391317
+    ],
+    "leaked": 182,
+    "public_total": 4302,
+    "public_retained": 0.9193398419339842,
+    "over_redacted": 347,
+    "span_f1": {
+      "iou_1.00": {
+        "tp": 3334,
+        "fp": 2747,
+        "fn": 3185,
+        "precision": 0.5482650879789508,
+        "recall": 0.5114281331492561,
+        "f1": 0.5292063492063492
+      },
+      "iou_0.50": {
+        "tp": 4061,
+        "fp": 2020,
+        "fn": 2458,
+        "precision": 0.6678177931261305,
+        "recall": 0.6229483049547476,
+        "f1": 0.6446031746031745
+      },
+      "iou_0.00": {
+        "tp": 4631,
+        "fp": 1450,
+        "fn": 1888,
+        "precision": 0.7615523762539056,
+        "recall": 0.7103850283785856,
+        "f1": 0.7350793650793651
+      }
+    }
+  },
+  "pt": {
+    "rows": 1485,
+    "private_total": 6467,
+    "private_recall": 0.9772692129271687,
+    "private_recall_wilson95": [
+      0.9733432981324541,
+      0.9806284387124102
+    ],
+    "private_recall_bootstrap95": [
+      0.9737126952218957,
+      0.9808257306324416
+    ],
+    "leaked": 147,
+    "public_total": 4190,
+    "public_retained": 0.9250596658711218,
+    "over_redacted": 314,
+    "span_f1": {
+      "iou_1.00": {
+        "tp": 3012,
+        "fp": 2683,
+        "fn": 3455,
+        "precision": 0.5288849868305531,
+        "recall": 0.46574918818617594,
+        "f1": 0.4953132708436112
+      },
+      "iou_0.50": {
+        "tp": 3707,
+        "fp": 1988,
+        "fn": 2760,
+        "precision": 0.6509218612818262,
+        "recall": 0.5732178753672491,
+        "f1": 0.6096036836046703
+      },
+      "iou_0.00": {
+        "tp": 4375,
+        "fp": 1320,
+        "fn": 2092,
+        "precision": 0.7682177348551361,
+        "recall": 0.676511520024741,
+        "f1": 0.71945403716494
+      }
+    }
+  }
+}

package/eval/bench/runs/m06-v3-30k/summary.json ADDED Viewed

@@ -0,0 +1,56 @@
+{
+  "rows": 30000,
+  "private_total": 131707,
+  "private_recall": 0.9841921841663693,
+  "private_recall_wilson95": [
+    0.9835042840945342,
+    0.9848518394457167
+  ],
+  "private_recall_bootstrap95": [
+    0.9835088491879702,
+    0.9848451487012839
+  ],
+  "leaked": 2082,
+  "public_total": 87207,
+  "public_retained": 0.9169332737050925,
+  "over_redacted": 7244,
+  "span_f1": {
+    "iou_1.00": {
+      "tp": 67339,
+      "fp": 55823,
+      "fn": 64368,
+      "precision": 0.546751433071889,
+      "recall": 0.5112788234490194,
+      "f1": 0.5284204826793372
+    },
+    "iou_0.50": {
+      "tp": 83715,
+      "fp": 39447,
+      "fn": 47992,
+      "precision": 0.6797145223364349,
+      "recall": 0.6356154190741571,
+      "f1": 0.656925714778965
+    },
+    "iou_0.00": {
+      "tp": 96561,
+      "fp": 26601,
+      "fn": 35146,
+      "precision": 0.784016173819847,
+      "recall": 0.7331500983243108,
+      "f1": 0.7577304419132965
+    }
+  },
+  "calibration": {
+    "ece": 0.291236456105258,
+    "n_pairs": 123162
+  },
+  "latency_ms": {
+    "cold": 62.28677499999992,
+    "p50": 6.577430999997887,
+    "p95": 12.474294999999984,
+    "p99": 15.152568000004976,
+    "mean": 7.214940486199942
+  },
+  "model": "./model",
+  "rows_scored": 30000
+}