@holoscript/plugin-insurance 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,342 @@
1
+ /**
2
+ * Insurance underwriting fairness adapter — integration test (D.057 tracer-bullet)
3
+ *
4
+ * Tests the NAIC AI Systems Evaluation Tool pilot slice end-to-end:
5
+ * 1. Adapter importable; cohort ≥200 rows with correct shape.
6
+ * 2. Biased model → FLAG-DISPARATE-IMPACT on the race_proxy attribute.
7
+ * 3. Remediated model → PASS on the same cohort.
8
+ * 4. FairnessReceipt integrity: receiptHash verifies + replayFingerprint stable.
9
+ * 5. Replay determinism: re-run from same inputs reproduces fingerprint (MATCH).
10
+ * 6. Regulatory crosswalk: NAIC 2026 pilot + CO SB21-169 keys present.
11
+ * 7. Robustness band: biased model ROBUSTLY-UNFAIR, remediated ROBUSTLY-FAIR.
12
+ * 8. Ensemble replay: same band byte-identically on second run.
13
+ *
14
+ * F-flags:
15
+ * F-1 carried (synthetic data only — no external model assertions).
16
+ * F-2 partially closed (realistic synthetic; ZIP-income decile proxy mirrors
17
+ * NAIC focus area; seasonal drift + credit-bureau noise are domain-real).
18
+ *
19
+ * Validation evidence: this file + pnpm test output (passed locally).
20
+ */
21
+
22
+ import { describe, it, expect } from 'vitest';
23
+ import { Simulation } from '@holoscript/engine';
24
+
25
+ import {
26
+ createHomeownersModel,
27
+ makeInsuranceCohort,
28
+ insuranceUnderwritingPerturber,
29
+ BIASED_WEIGHTS,
30
+ REMEDIATED_WEIGHTS,
31
+ NAIC_INSURANCE_CROSSWALK,
32
+ PROTECTED_ATTRIBUTE,
33
+ } from '../fairness-underwriting';
34
+
35
+ const {
36
+ runFairnessSweep,
37
+ runFairnessRobustness,
38
+ replayFairnessReceipt,
39
+ verifyReceiptIntegrity,
40
+ verifyReplayExecution,
41
+ computeDecisionDigest,
42
+ } = Simulation;
43
+
44
+ const SEED = 202601;
45
+ const ISSUED = '2026-06-08T00:00:00Z';
46
+ const COHORT_SIZE = 400; // 200 per group — well above NAIC minimum
47
+
48
+ // ── 1. Adapter and cohort shape ───────────────────────────────────────────────
49
+
50
+ describe('insurance-underwriting adapter — cohort shape', () => {
51
+ it('generates ≥200 rows with correct feature keys and group labels', () => {
52
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
53
+ expect(cohort.length).toBe(COHORT_SIZE);
54
+ for (const row of cohort) {
55
+ expect(['low-decile', 'high-decile']).toContain(row.group);
56
+ expect(typeof row.features.prior_claims).toBe('number');
57
+ expect(typeof row.features.credit_tier).toBe('number');
58
+ expect(typeof row.features.zip_risk).toBe('number');
59
+ expect(typeof row.features.age_band).toBe('number');
60
+ // All features must be in [0,1]
61
+ for (const v of Object.values(row.features)) {
62
+ expect(v).toBeGreaterThanOrEqual(0);
63
+ expect(v).toBeLessThanOrEqual(1);
64
+ }
65
+ }
66
+ });
67
+
68
+ it('has ~50/50 group split', () => {
69
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
70
+ const low = cohort.filter((r) => r.group === 'low-decile').length;
71
+ const high = cohort.filter((r) => r.group === 'high-decile').length;
72
+ expect(low).toBe(COHORT_SIZE / 2);
73
+ expect(high).toBe(COHORT_SIZE / 2);
74
+ });
75
+
76
+ it('high-decile has higher mean zip_risk than low-decile (bias proxy)', () => {
77
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
78
+ const mean = (g: string) => {
79
+ const rows = cohort.filter((r) => r.group === g);
80
+ return rows.reduce((s, r) => s + r.features.zip_risk, 0) / rows.length;
81
+ };
82
+ expect(mean('high-decile')).toBeGreaterThan(mean('low-decile'));
83
+ });
84
+ });
85
+
86
+ // ── 2. Biased model — FLAG-DISPARATE-IMPACT ───────────────────────────────────
87
+
88
+ describe('insurance-underwriting — biased model flags disparate impact', () => {
89
+ it('adverse-impact ratio < 0.8 and receipt decision = FLAG-DISPARATE-IMPACT', async () => {
90
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
91
+ const model = createHomeownersModel(BIASED_WEIGHTS);
92
+ const { receipt, metrics } = await runFairnessSweep(model, cohort, {
93
+ seed: SEED,
94
+ issuedAt: ISSUED,
95
+ protectedAttribute: PROTECTED_ATTRIBUTE,
96
+ hashMode: 'sha256',
97
+ verifyDeterminism: true,
98
+ regulatoryMapping: NAIC_INSURANCE_CROSSWALK,
99
+ });
100
+
101
+ expect(metrics.adverseImpactRatio).toBeLessThan(0.8);
102
+ expect(receipt.decision).toBe('FLAG-DISPARATE-IMPACT');
103
+ expect(receipt.protectedAttribute).toBe('race_proxy');
104
+ expect(receipt.kind).toBe('fairness.receipt.v1');
105
+ // Determinism: linear model is exact
106
+ expect(receipt.replayDeterminism).toBe('exact');
107
+ expect(receipt.replayTolerance).toBe(0);
108
+ });
109
+ });
110
+
111
+ // ── 3. Remediated model — PASS ────────────────────────────────────────────────
112
+
113
+ describe('insurance-underwriting — remediated model passes', () => {
114
+ it('adverse-impact ratio ≥ 0.8 and receipt decision = PASS', async () => {
115
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
116
+ const model = createHomeownersModel(REMEDIATED_WEIGHTS, 'homeowners-remediated');
117
+ const { receipt, metrics } = await runFairnessSweep(model, cohort, {
118
+ seed: SEED,
119
+ issuedAt: ISSUED,
120
+ protectedAttribute: PROTECTED_ATTRIBUTE,
121
+ hashMode: 'sha256',
122
+ regulatoryMapping: NAIC_INSURANCE_CROSSWALK,
123
+ });
124
+
125
+ expect(metrics.adverseImpactRatio).toBeGreaterThanOrEqual(0.8);
126
+ expect(receipt.decision).toBe('PASS');
127
+ });
128
+ });
129
+
130
+ // ── 4. Receipt integrity ──────────────────────────────────────────────────────
131
+
132
+ describe('insurance-underwriting — receipt integrity', () => {
133
+ it('verifyReceiptIntegrity returns true for a freshly emitted receipt', async () => {
134
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
135
+ const model = createHomeownersModel(BIASED_WEIGHTS);
136
+ const { receipt } = await runFairnessSweep(model, cohort, {
137
+ seed: SEED,
138
+ issuedAt: ISSUED,
139
+ hashMode: 'sha256',
140
+ regulatoryMapping: NAIC_INSURANCE_CROSSWALK,
141
+ });
142
+
143
+ expect(verifyReceiptIntegrity(receipt)).toBe(true);
144
+ });
145
+
146
+ it('mutating a metric field fails the integrity check (tamper detection)', async () => {
147
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
148
+ const model = createHomeownersModel(BIASED_WEIGHTS);
149
+ const { receipt } = await runFairnessSweep(model, cohort, {
150
+ seed: SEED,
151
+ issuedAt: ISSUED,
152
+ hashMode: 'sha256',
153
+ });
154
+
155
+ const forged = { ...receipt, metrics: { ...receipt.metrics, adverseImpactRatio: 0.99 } };
156
+ expect(verifyReceiptIntegrity(forged)).toBe(false);
157
+ });
158
+ });
159
+
160
+ // ── 5. Replay determinism — MATCH on same inputs ──────────────────────────────
161
+
162
+ describe('insurance-underwriting — replay determinism', () => {
163
+ it('re-running from the same inputs produces the same replayFingerprint (MATCH)', async () => {
164
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
165
+ const model = createHomeownersModel(BIASED_WEIGHTS);
166
+ const opts = {
167
+ seed: SEED,
168
+ issuedAt: ISSUED,
169
+ hashMode: 'sha256' as const,
170
+ regulatoryMapping: NAIC_INSURANCE_CROSSWALK,
171
+ };
172
+
173
+ const a = await runFairnessSweep(model, cohort, opts);
174
+ const b = await runFairnessSweep(model, cohort, opts);
175
+
176
+ expect(b.receipt.replayFingerprint).toBe(a.receipt.replayFingerprint);
177
+ expect(b.receipt.receiptHash).toBe(a.receipt.receiptHash);
178
+
179
+ const verdict = replayFairnessReceipt(a.receipt, {
180
+ modelHash: b.modelHash,
181
+ seed: SEED,
182
+ inputHash: b.inputHash,
183
+ weightStrategy: b.weightStrategy,
184
+ });
185
+ expect(verdict).toBe('MATCH');
186
+ });
187
+
188
+ it('verifyReplayExecution — re-run decision digest matches (exact model)', async () => {
189
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
190
+ const model = createHomeownersModel(BIASED_WEIGHTS);
191
+ const { receipt, metrics } = await runFairnessSweep(model, cohort, {
192
+ seed: SEED,
193
+ issuedAt: ISSUED,
194
+ hashMode: 'sha256',
195
+ verifyDeterminism: true,
196
+ });
197
+
198
+ // Independent re-run by a validator
199
+ const rerunDigest = computeDecisionDigest(
200
+ cohort.map((r) => model.decide(r.features)),
201
+ 'sha256',
202
+ );
203
+
204
+ const reExec = verifyReplayExecution(receipt, {
205
+ decisionDigest: rerunDigest,
206
+ adverseImpactRatio: metrics.adverseImpactRatio,
207
+ });
208
+ expect(reExec).toBe('MATCH');
209
+ });
210
+
211
+ it('perturbing one input record breaks the fingerprint (DRIFT)', async () => {
212
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
213
+ const model = createHomeownersModel(BIASED_WEIGHTS);
214
+ const a = await runFairnessSweep(model, cohort, {
215
+ seed: SEED,
216
+ issuedAt: ISSUED,
217
+ hashMode: 'sha256',
218
+ });
219
+
220
+ const tampered = cohort.map((r, i) =>
221
+ i === 0
222
+ ? { group: r.group, features: { ...r.features, zip_risk: r.features.zip_risk + 1e-4 } }
223
+ : r,
224
+ );
225
+ const t = await runFairnessSweep(model, tampered, {
226
+ seed: SEED,
227
+ issuedAt: ISSUED,
228
+ hashMode: 'sha256',
229
+ });
230
+
231
+ expect(t.inputHash).not.toBe(a.inputHash);
232
+ const verdict = replayFairnessReceipt(a.receipt, {
233
+ modelHash: t.modelHash,
234
+ seed: SEED,
235
+ inputHash: t.inputHash,
236
+ weightStrategy: t.weightStrategy,
237
+ });
238
+ expect(verdict).toBe('DRIFT');
239
+ });
240
+ });
241
+
242
+ // ── 6. Regulatory crosswalk — NAIC 2026 pilot + CO SB21-169 ──────────────────
243
+
244
+ describe('insurance-underwriting — regulatory crosswalk keys', () => {
245
+ it('receipt contains NAIC 2026 AI pilot crosswalk key', async () => {
246
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
247
+ const model = createHomeownersModel(BIASED_WEIGHTS);
248
+ const { receipt } = await runFairnessSweep(model, cohort, {
249
+ seed: SEED,
250
+ issuedAt: ISSUED,
251
+ hashMode: 'sha256',
252
+ regulatoryMapping: NAIC_INSURANCE_CROSSWALK,
253
+ });
254
+
255
+ const keys = Object.keys(receipt.regulatoryMapping);
256
+ expect(keys.some((k) => k.includes('NAIC 2026'))).toBe(true);
257
+ expect(keys.some((k) => k.includes('Colorado SB21-169'))).toBe(true);
258
+ // Must have at least the standard crosswalk entries + NAIC pilot additions
259
+ expect(keys.length).toBeGreaterThanOrEqual(8);
260
+ });
261
+
262
+ it('NAIC crosswalk entry references race_proxy and homeowners-underwriting-scorer', () => {
263
+ const naicEntry = NAIC_INSURANCE_CROSSWALK['NAIC AI Systems Evaluation Tool (sample case file + bias audit)'];
264
+ expect(naicEntry).toBeTruthy();
265
+ expect(naicEntry).toContain('homeowners-underwriting-scorer');
266
+ expect(naicEntry).toContain('race_proxy');
267
+ });
268
+
269
+ it('CO SB21-169 crosswalk entry references zip_risk proxy', () => {
270
+ const coEntry = NAIC_INSURANCE_CROSSWALK['Colorado SB21-169 / Reg 10-1-1 (unfair-discrimination testing)'];
271
+ expect(coEntry).toBeTruthy();
272
+ expect(coEntry).toContain('zip_risk');
273
+ });
274
+ });
275
+
276
+ // ── 7. Robustness band ────────────────────────────────────────────────────────
277
+
278
+ describe('insurance-underwriting — robustness band', () => {
279
+ const REPLICATES = 80; // reduced for test speed; full demo uses ≥200
280
+
281
+ it('biased model is ROBUSTLY-UNFAIR under seasonal drift + credit noise', async () => {
282
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
283
+ const model = createHomeownersModel(BIASED_WEIGHTS);
284
+ const result = await runFairnessRobustness(model, cohort, {
285
+ seed: SEED,
286
+ issuedAt: ISSUED,
287
+ hashMode: 'sha256',
288
+ replicates: REPLICATES,
289
+ perturber: insuranceUnderwritingPerturber,
290
+ regulatoryMapping: NAIC_INSURANCE_CROSSWALK,
291
+ });
292
+
293
+ expect(result.verdict).toBe('ROBUSTLY-UNFAIR');
294
+ // Entire 90% CI band below 0.8
295
+ expect(result.band.ci90[1]).toBeLessThan(0.8);
296
+ expect(verifyReceiptIntegrity(result.receipt)).toBe(true);
297
+ expect(result.receipt.kind).toBe('fairness.robustness.v1');
298
+ });
299
+
300
+ it('remediated model is ROBUSTLY-FAIR under the same drift space', async () => {
301
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
302
+ const model = createHomeownersModel(REMEDIATED_WEIGHTS, 'homeowners-remediated');
303
+ const result = await runFairnessRobustness(model, cohort, {
304
+ seed: SEED,
305
+ issuedAt: ISSUED,
306
+ hashMode: 'sha256',
307
+ replicates: REPLICATES,
308
+ perturber: insuranceUnderwritingPerturber,
309
+ regulatoryMapping: NAIC_INSURANCE_CROSSWALK,
310
+ });
311
+
312
+ expect(result.verdict).toBe('ROBUSTLY-FAIR');
313
+ // Entire 90% CI band above 0.8
314
+ expect(result.band.ci90[0]).toBeGreaterThanOrEqual(0.8);
315
+ expect(verifyReceiptIntegrity(result.receipt)).toBe(true);
316
+ });
317
+ });
318
+
319
+ // ── 8. Ensemble replay ────────────────────────────────────────────────────────
320
+
321
+ describe('insurance-underwriting — ensemble replay', () => {
322
+ const REPLICATES = 60;
323
+
324
+ it('re-running the robustness sweep produces the same ensembleHash and receiptHash', async () => {
325
+ const cohort = makeInsuranceCohort(SEED, COHORT_SIZE);
326
+ const model = createHomeownersModel(BIASED_WEIGHTS);
327
+ const opts = {
328
+ seed: SEED,
329
+ issuedAt: ISSUED,
330
+ hashMode: 'sha256' as const,
331
+ replicates: REPLICATES,
332
+ perturber: insuranceUnderwritingPerturber,
333
+ regulatoryMapping: NAIC_INSURANCE_CROSSWALK,
334
+ };
335
+
336
+ const first = await runFairnessRobustness(model, cohort, opts);
337
+ const replay = await runFairnessRobustness(model, cohort, opts);
338
+
339
+ expect(replay.ensembleHash).toBe(first.ensembleHash);
340
+ expect(replay.receipt.receiptHash).toBe(first.receipt.receiptHash);
341
+ });
342
+ });