@cyanheads/sanctions-screening-mcp-server 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/AGENTS.md +452 -0
  2. package/CLAUDE.md +452 -0
  3. package/Dockerfile +126 -0
  4. package/LICENSE +201 -0
  5. package/README.md +354 -0
  6. package/changelog/0.1.x/0.1.0.md +26 -0
  7. package/changelog/template.md +127 -0
  8. package/dist/config/server-config.d.ts +37 -0
  9. package/dist/config/server-config.d.ts.map +1 -0
  10. package/dist/config/server-config.js +87 -0
  11. package/dist/config/server-config.js.map +1 -0
  12. package/dist/index.d.ts +11 -0
  13. package/dist/index.d.ts.map +1 -0
  14. package/dist/index.js +70 -0
  15. package/dist/index.js.map +1 -0
  16. package/dist/mcp-server/prompts/definitions/index.d.ts +12 -0
  17. package/dist/mcp-server/prompts/definitions/index.d.ts.map +1 -0
  18. package/dist/mcp-server/prompts/definitions/index.js +9 -0
  19. package/dist/mcp-server/prompts/definitions/index.js.map +1 -0
  20. package/dist/mcp-server/prompts/definitions/vet-counterparty.prompt.d.ts +14 -0
  21. package/dist/mcp-server/prompts/definitions/vet-counterparty.prompt.d.ts.map +1 -0
  22. package/dist/mcp-server/prompts/definitions/vet-counterparty.prompt.js +42 -0
  23. package/dist/mcp-server/prompts/definitions/vet-counterparty.prompt.js.map +1 -0
  24. package/dist/mcp-server/resources/definitions/designation.resource.d.ts +25 -0
  25. package/dist/mcp-server/resources/definitions/designation.resource.d.ts.map +1 -0
  26. package/dist/mcp-server/resources/definitions/designation.resource.js +57 -0
  27. package/dist/mcp-server/resources/definitions/designation.resource.js.map +1 -0
  28. package/dist/mcp-server/resources/definitions/entity.resource.d.ts +17 -0
  29. package/dist/mcp-server/resources/definitions/entity.resource.d.ts.map +1 -0
  30. package/dist/mcp-server/resources/definitions/entity.resource.js +40 -0
  31. package/dist/mcp-server/resources/definitions/entity.resource.js.map +1 -0
  32. package/dist/mcp-server/resources/definitions/index.d.ts +32 -0
  33. package/dist/mcp-server/resources/definitions/index.d.ts.map +1 -0
  34. package/dist/mcp-server/resources/definitions/index.js +11 -0
  35. package/dist/mcp-server/resources/definitions/index.js.map +1 -0
  36. package/dist/mcp-server/resources/definitions/sources.resource.d.ts +9 -0
  37. package/dist/mcp-server/resources/definitions/sources.resource.d.ts.map +1 -0
  38. package/dist/mcp-server/resources/definitions/sources.resource.js +50 -0
  39. package/dist/mcp-server/resources/definitions/sources.resource.js.map +1 -0
  40. package/dist/mcp-server/tools/definitions/_shared.d.ts +13 -0
  41. package/dist/mcp-server/tools/definitions/_shared.d.ts.map +1 -0
  42. package/dist/mcp-server/tools/definitions/_shared.js +13 -0
  43. package/dist/mcp-server/tools/definitions/_shared.js.map +1 -0
  44. package/dist/mcp-server/tools/definitions/get-designation.tool.d.ts +78 -0
  45. package/dist/mcp-server/tools/definitions/get-designation.tool.d.ts.map +1 -0
  46. package/dist/mcp-server/tools/definitions/get-designation.tool.js +168 -0
  47. package/dist/mcp-server/tools/definitions/get-designation.tool.js.map +1 -0
  48. package/dist/mcp-server/tools/definitions/get-entity.tool.d.ts +55 -0
  49. package/dist/mcp-server/tools/definitions/get-entity.tool.d.ts.map +1 -0
  50. package/dist/mcp-server/tools/definitions/get-entity.tool.js +176 -0
  51. package/dist/mcp-server/tools/definitions/get-entity.tool.js.map +1 -0
  52. package/dist/mcp-server/tools/definitions/index.d.ts +306 -0
  53. package/dist/mcp-server/tools/definitions/index.d.ts.map +1 -0
  54. package/dist/mcp-server/tools/definitions/index.js +21 -0
  55. package/dist/mcp-server/tools/definitions/index.js.map +1 -0
  56. package/dist/mcp-server/tools/definitions/list-sources.tool.d.ts +23 -0
  57. package/dist/mcp-server/tools/definitions/list-sources.tool.d.ts.map +1 -0
  58. package/dist/mcp-server/tools/definitions/list-sources.tool.js +106 -0
  59. package/dist/mcp-server/tools/definitions/list-sources.tool.js.map +1 -0
  60. package/dist/mcp-server/tools/definitions/resolve-entity.tool.d.ts +51 -0
  61. package/dist/mcp-server/tools/definitions/resolve-entity.tool.d.ts.map +1 -0
  62. package/dist/mcp-server/tools/definitions/resolve-entity.tool.js +148 -0
  63. package/dist/mcp-server/tools/definitions/resolve-entity.tool.js.map +1 -0
  64. package/dist/mcp-server/tools/definitions/screen-name.tool.d.ts +82 -0
  65. package/dist/mcp-server/tools/definitions/screen-name.tool.d.ts.map +1 -0
  66. package/dist/mcp-server/tools/definitions/screen-name.tool.js +172 -0
  67. package/dist/mcp-server/tools/definitions/screen-name.tool.js.map +1 -0
  68. package/dist/mcp-server/tools/definitions/trace-ownership.tool.d.ts +74 -0
  69. package/dist/mcp-server/tools/definitions/trace-ownership.tool.d.ts.map +1 -0
  70. package/dist/mcp-server/tools/definitions/trace-ownership.tool.js +273 -0
  71. package/dist/mcp-server/tools/definitions/trace-ownership.tool.js.map +1 -0
  72. package/dist/services/screening/fixtures.d.ts +17 -0
  73. package/dist/services/screening/fixtures.d.ts.map +1 -0
  74. package/dist/services/screening/fixtures.js +162 -0
  75. package/dist/services/screening/fixtures.js.map +1 -0
  76. package/dist/services/screening/gleif-ingest.d.ts +68 -0
  77. package/dist/services/screening/gleif-ingest.d.ts.map +1 -0
  78. package/dist/services/screening/gleif-ingest.js +251 -0
  79. package/dist/services/screening/gleif-ingest.js.map +1 -0
  80. package/dist/services/screening/sanctions-ingest.d.ts +46 -0
  81. package/dist/services/screening/sanctions-ingest.d.ts.map +1 -0
  82. package/dist/services/screening/sanctions-ingest.js +688 -0
  83. package/dist/services/screening/sanctions-ingest.js.map +1 -0
  84. package/dist/services/screening/schema.d.ts +52 -0
  85. package/dist/services/screening/schema.d.ts.map +1 -0
  86. package/dist/services/screening/schema.js +125 -0
  87. package/dist/services/screening/schema.js.map +1 -0
  88. package/dist/services/screening/screening-service.d.ts +203 -0
  89. package/dist/services/screening/screening-service.d.ts.map +1 -0
  90. package/dist/services/screening/screening-service.js +702 -0
  91. package/dist/services/screening/screening-service.js.map +1 -0
  92. package/dist/services/screening/text-matching.d.ts +53 -0
  93. package/dist/services/screening/text-matching.d.ts.map +1 -0
  94. package/dist/services/screening/text-matching.js +514 -0
  95. package/dist/services/screening/text-matching.js.map +1 -0
  96. package/dist/services/screening/types.d.ts +154 -0
  97. package/dist/services/screening/types.d.ts.map +1 -0
  98. package/dist/services/screening/types.js +24 -0
  99. package/dist/services/screening/types.js.map +1 -0
  100. package/dist/services/screening/xml.d.ts +29 -0
  101. package/dist/services/screening/xml.d.ts.map +1 -0
  102. package/dist/services/screening/xml.js +46 -0
  103. package/dist/services/screening/xml.js.map +1 -0
  104. package/package.json +119 -0
  105. package/scripts/_mirror-context.ts +21 -0
  106. package/scripts/mirror-init.ts +66 -0
  107. package/scripts/mirror-refresh.ts +56 -0
  108. package/scripts/mirror-seed.ts +36 -0
  109. package/scripts/mirror-verify.ts +44 -0
  110. package/server.json +148 -0
@@ -0,0 +1,702 @@
1
+ /**
2
+ * @fileoverview The screening service — owns the two local mirrors (sanctions
3
+ * `designation` + GLEIF `lei_entity`, both SQLite + FTS5 via the framework
4
+ * MirrorService), the normalized-schema write path that keeps the per-alias
5
+ * `name` index and `lei_relationship` table in lockstep, and the matching
6
+ * engine (exact → strict-token → scored Jaro-Winkler / phonetic fuzzy). All six
7
+ * tools compose against this service; the agent never sees the source boundary.
8
+ *
9
+ * The matching engine surfaces only real signal: exact/strong hits are
10
+ * deterministic and unscored; approximate hits carry the raw Jaro-Winkler
11
+ * similarity (0–1). There is no fabricated composite "confidence".
12
+ * @module services/screening/screening-service
13
+ */
14
+ import { defineMirror, sqliteMirrorStore } from '@cyanheads/mcp-ts-core/mirror';
15
+ import { logger } from '@cyanheads/mcp-ts-core/utils';
16
+ import { getServerConfig } from '../../config/server-config.js';
17
+ import { createSanctionsSync, } from '../../services/screening/sanctions-ingest.js';
18
+ import { designationStoreSpec, ensureDesignationAuxSchema, ensureLeiAuxSchema, LEI_RELATIONSHIP_TABLE, leiStoreSpec, NAME_FTS_TABLE, NAME_TABLE, } from '../../services/screening/schema.js';
19
+ import { bestTokenScore, buildFtsMatch, doubleMetaphone, fold, jaroWinkler, tokenize, } from '../../services/screening/text-matching.js';
20
+ import { SOURCE_CODES } from '../../services/screening/types.js';
21
+ /**
22
+ * The screening service. Holds both mirrors and the matching engine. Initialized
23
+ * once in `setup()`; tools access it via {@link getScreeningService}.
24
+ */
25
+ export class ScreeningService {
26
+ config;
27
+ designationMirror;
28
+ leiMirror;
29
+ designationAuxReady = false;
30
+ leiAuxReady = false;
31
+ constructor(config) {
32
+ this.config = config;
33
+ // The two mirrors use SEPARATE database files. `mirror_sync_state` is a
34
+ // single-row table per database, so sharing one file would make the
35
+ // sanctions and GLEIF readiness/sync-state clobber each other — and their
36
+ // lifecycles are independent (sanctions re-harvests in full; GLEIF inits +
37
+ // applies deltas, and is far larger). The GLEIF file is a sibling of the
38
+ // configured sanctions path.
39
+ this.designationMirror = defineMirror({
40
+ name: 'sanctions-designations',
41
+ store: sqliteMirrorStore({ path: config.mirrorPath, ...designationStoreSpec }),
42
+ sync: createSanctionsSync(),
43
+ });
44
+ this.leiMirror = defineMirror({
45
+ name: 'gleif-entities',
46
+ store: sqliteMirrorStore({ path: gleifPath(config.mirrorPath), ...leiStoreSpec }),
47
+ // GLEIF ingest is driven directly via ingestLeiEntities/ingestLeiRelationships
48
+ // (golden-copy init + delta refresh), so the mirror's own sync yields no
49
+ // pages — the lifecycle scripts call the ingest methods.
50
+ sync: emptySync,
51
+ });
52
+ }
53
+ /**
54
+ * Open the designation mirror's raw handle, ensuring the auxiliary `name` index
55
+ * + FTS exist first. The framework's migration runner skips migrations on a
56
+ * fresh DB, so the aux DDL is applied here (idempotently) on first use.
57
+ */
58
+ async designationHandle() {
59
+ const raw = await this.designationMirror.raw();
60
+ if (!this.designationAuxReady) {
61
+ ensureDesignationAuxSchema(raw);
62
+ this.designationAuxReady = true;
63
+ }
64
+ return raw;
65
+ }
66
+ /** Open the GLEIF mirror's raw handle, ensuring `lei_relationship` exists first. */
67
+ async leiHandle() {
68
+ const raw = await this.leiMirror.raw();
69
+ if (!this.leiAuxReady) {
70
+ ensureLeiAuxSchema(raw);
71
+ this.leiAuxReady = true;
72
+ }
73
+ return raw;
74
+ }
75
+ /** The sanctions designation mirror (for sync lifecycle scripts). */
76
+ get designations() {
77
+ return this.designationMirror;
78
+ }
79
+ /** The GLEIF entity mirror (for sync lifecycle scripts). */
80
+ get leiEntities() {
81
+ return this.leiMirror;
82
+ }
83
+ /** True once the sanctions mirror has ever completed a full sync. */
84
+ sanctionsReady() {
85
+ return this.designationMirror.ready();
86
+ }
87
+ /** True once the GLEIF mirror has ever completed a full sync. */
88
+ leiReady() {
89
+ return this.leiMirror.ready();
90
+ }
91
+ // ─── Ingest write path ───────────────────────────────────────────────────
92
+ /**
93
+ * Apply a batch of normalized designations. Writes the primary `designation`
94
+ * rows via the mirror store, then refreshes the per-alias `name` index for
95
+ * exactly those designations — all in one transaction. Idempotent per id.
96
+ */
97
+ async ingestDesignations(designations) {
98
+ if (designations.length === 0)
99
+ return;
100
+ const handle = await this.designationHandle();
101
+ handle.transaction(() => {
102
+ const upsert = handle.prepare(`INSERT INTO designation
103
+ (id, source, source_entry_id, entity_type, primary_name, normalized_name,
104
+ program, legal_basis, designation_date, payload)
105
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
106
+ ON CONFLICT(id) DO UPDATE SET
107
+ source=excluded.source, source_entry_id=excluded.source_entry_id,
108
+ entity_type=excluded.entity_type, primary_name=excluded.primary_name,
109
+ normalized_name=excluded.normalized_name, program=excluded.program,
110
+ legal_basis=excluded.legal_basis, designation_date=excluded.designation_date,
111
+ payload=excluded.payload`);
112
+ const deleteNames = handle.prepare(`DELETE FROM ${NAME_TABLE} WHERE designation_id = ?`);
113
+ const insertName = handle.prepare(`INSERT INTO ${NAME_TABLE} (designation_id, name, normalized, phonetic, name_type)
114
+ VALUES (?, ?, ?, ?, ?)`);
115
+ for (const d of designations) {
116
+ const normalizedPrimary = fold(d.primaryName);
117
+ upsert.run(d.id, d.source, d.sourceEntryId, d.entityType, d.primaryName, normalizedPrimary, d.program ?? null, d.legalBasis ?? null, d.designationDate ?? null, JSON.stringify(d.payload));
118
+ deleteNames.run(d.id);
119
+ for (const rec of this.allNames(d)) {
120
+ const normalized = fold(rec.name);
121
+ if (!normalized)
122
+ continue;
123
+ insertName.run(d.id, rec.name, normalized, doubleMetaphone(normalized), rec.nameType);
124
+ }
125
+ }
126
+ });
127
+ }
128
+ /**
129
+ * Rebuild the per-alias `name` index from the current `designation` table.
130
+ * The MirrorService `sync` path only writes the primary `designation` rows, so
131
+ * after a `runSync` the lifecycle scripts (and the refresh cron) call this to
132
+ * regenerate the matching index — including the Double-Metaphone phonetic keys
133
+ * that can't be computed in SQL. Idempotent: clears and repopulates `name`.
134
+ */
135
+ async rebuildNameIndex() {
136
+ const handle = await this.designationHandle();
137
+ const rows = handle
138
+ .prepare(`SELECT id, primary_name, payload FROM designation`)
139
+ .all();
140
+ handle.transaction(() => {
141
+ handle.exec(`DELETE FROM ${NAME_TABLE}`);
142
+ const insertName = handle.prepare(`INSERT INTO ${NAME_TABLE} (designation_id, name, normalized, phonetic, name_type)
143
+ VALUES (?, ?, ?, ?, ?)`);
144
+ for (const row of rows) {
145
+ const payload = JSON.parse(row.payload);
146
+ const names = [
147
+ { name: row.primary_name, nameType: 'primary' },
148
+ ...payload.aliases,
149
+ ];
150
+ for (const rec of names) {
151
+ const normalized = fold(rec.name);
152
+ if (!normalized)
153
+ continue;
154
+ insertName.run(row.id, rec.name, normalized, doubleMetaphone(normalized), rec.nameType);
155
+ }
156
+ }
157
+ });
158
+ }
159
+ /** Apply a batch of GLEIF Level 1 entity records via the LEI mirror store. */
160
+ async ingestLeiEntities(entities) {
161
+ if (entities.length === 0)
162
+ return;
163
+ await this.leiMirror.store.applyBatch(entities.map((e) => ({
164
+ lei: e.lei,
165
+ legal_name: e.legalName,
166
+ normalized_name: fold(e.legalName),
167
+ other_names: JSON.stringify(e.otherNames),
168
+ jurisdiction: e.jurisdiction ?? null,
169
+ status: e.status ?? null,
170
+ legal_address: e.legalAddress ?? null,
171
+ headquarters_address: e.headquartersAddress ?? null,
172
+ registration_authority_id: e.registrationAuthorityId ?? null,
173
+ registration_authority_entity_id: e.registrationAuthorityEntityId ?? null,
174
+ last_update: e.lastUpdate ?? null,
175
+ payload: JSON.stringify(e),
176
+ })), []);
177
+ }
178
+ /**
179
+ * Apply a batch of GLEIF Level 2 relationships. Replaces all rows for each
180
+ * child LEI present in the batch (so a refresh that re-states a child's
181
+ * relationships is idempotent).
182
+ */
183
+ async ingestLeiRelationships(relationships) {
184
+ if (relationships.length === 0)
185
+ return;
186
+ const handle = await this.leiHandle();
187
+ const children = [...new Set(relationships.map((r) => r.childLei))];
188
+ handle.transaction(() => {
189
+ const clear = handle.prepare(`DELETE FROM ${LEI_RELATIONSHIP_TABLE} WHERE child_lei = ?`);
190
+ for (const child of children)
191
+ clear.run(child);
192
+ const insert = handle.prepare(`INSERT OR REPLACE INTO ${LEI_RELATIONSHIP_TABLE}
193
+ (child_lei, parent_lei, relationship_type, relationship_status, relationship_period)
194
+ VALUES (?, ?, ?, ?, ?)`);
195
+ for (const r of relationships) {
196
+ insert.run(r.childLei, r.parentLei, r.relationshipType, r.relationshipStatus ?? null, r.relationshipPeriod ?? null);
197
+ }
198
+ });
199
+ }
200
+ /** Primary name + aliases as one list, primary first. */
201
+ allNames(d) {
202
+ return [{ name: d.primaryName, nameType: 'primary' }, ...d.payload.aliases];
203
+ }
204
+ /**
205
+ * Mark the sanctions mirror's sync state complete (sets `completedAt`/`total`),
206
+ * so the read path's `ready()` gate opens. The MirrorService `runSync` path
207
+ * sets this automatically; the fixture-seed and lifecycle-rebuild paths call it
208
+ * explicitly after a direct ingest.
209
+ */
210
+ async markSanctionsReady(total) {
211
+ await this.designationMirror.store.writeState({
212
+ status: 'complete',
213
+ completedAt: new Date().toISOString(),
214
+ total,
215
+ });
216
+ }
217
+ /** Mark the GLEIF mirror's sync state complete — see {@link markSanctionsReady}. */
218
+ async markLeiReady(total) {
219
+ await this.leiMirror.store.writeState({
220
+ status: 'complete',
221
+ completedAt: new Date().toISOString(),
222
+ total,
223
+ });
224
+ }
225
+ /**
226
+ * Load a synthetic fixture into both mirrors and mark them ready. For tests and
227
+ * a quick local smoke run — NOT the real corpus, which loads via `mirror:init`.
228
+ */
229
+ async seedFixtures(fixtures) {
230
+ await this.ingestDesignations(fixtures.designations);
231
+ await this.ingestLeiEntities(fixtures.leiEntities);
232
+ await this.ingestLeiRelationships(fixtures.leiRelationships);
233
+ await this.markSanctionsReady(fixtures.designations.length);
234
+ await this.markLeiReady(fixtures.leiEntities.length);
235
+ }
236
+ // ─── Matching engine: screen a name against the sanctions lists ────────────
237
+ /**
238
+ * Screen a name against the loaded sanctions lists. Strict mode runs exact
239
+ * then all-tokens-present (FTS5). Fuzzy mode (explicit, or auto when strict is
240
+ * empty) adds Jaro-Winkler + phonetic scoring against the per-alias index.
241
+ */
242
+ async screenName(opts, ctx) {
243
+ const normalizedQuery = fold(opts.query);
244
+ const queryTokens = tokenize(normalizedQuery);
245
+ const handle = await this.designationHandle();
246
+ const sourceFilter = this.sourceFilterClause(opts.sources);
247
+ // entityType is enum-constrained at the tool boundary; escape at the SQL sink
248
+ // anyway so the service stays injection-safe for any future caller that
249
+ // reaches it without re-validating (matches the jurisdiction handling below).
250
+ const typeFilter = opts.entityType === 'any'
251
+ ? ''
252
+ : ` AND d.entity_type = '${this.escapeLiteral(opts.entityType)}'`;
253
+ // Step 1+2: exact-normalized, then strict all-tokens-present (FTS5 AND).
254
+ const strictHits = this.runStrict(handle, {
255
+ normalizedQuery,
256
+ queryTokens,
257
+ sourceFilter,
258
+ typeFilter,
259
+ limit: opts.limit,
260
+ });
261
+ // Explicit fuzzy always runs fuzzy; strict auto-upgrades to fuzzy on an empty
262
+ // result ONLY when auto-fallback is enabled (the default — off for internal
263
+ // cross-reference screens, see ScreenNameOptions.autoFallback).
264
+ const wantFuzzy = opts.matchMode === 'fuzzy' || (opts.autoFallback !== false && strictHits.length === 0);
265
+ if (!wantFuzzy || queryTokens.length === 0) {
266
+ ctx.log.debug('Strict screening complete', {
267
+ normalizedQuery,
268
+ hitCount: strictHits.length,
269
+ });
270
+ return {
271
+ hits: strictHits.slice(0, opts.limit),
272
+ modeUsed: 'strict',
273
+ normalizedQuery,
274
+ fuzzyFallbackTriggered: false,
275
+ };
276
+ }
277
+ // Step 3/3b: fuzzy (Jaro-Winkler) + phonetic over the candidate pool.
278
+ const minScore = opts.minScore ?? this.config.fuzzyMinScore;
279
+ const fuzzyHits = this.runFuzzy(handle, {
280
+ normalizedQuery,
281
+ queryTokens,
282
+ sourceFilter,
283
+ typeFilter,
284
+ minScore,
285
+ cap: this.config.fuzzyMaxResults,
286
+ });
287
+ // Merge: keep strict hits (deterministic, unscored) ahead of fuzzy, dedup by id.
288
+ const merged = this.mergeHits(strictHits, fuzzyHits);
289
+ ctx.log.debug('Fuzzy screening complete', {
290
+ normalizedQuery,
291
+ strictCount: strictHits.length,
292
+ fuzzyCount: fuzzyHits.length,
293
+ minScore,
294
+ });
295
+ return {
296
+ hits: merged.slice(0, opts.limit),
297
+ modeUsed: 'fuzzy',
298
+ normalizedQuery,
299
+ fuzzyFallbackTriggered: opts.matchMode === 'strict' && strictHits.length === 0,
300
+ };
301
+ }
302
+ sourceFilterClause(sources) {
303
+ if (sources.length === 0 || sources.length === SOURCE_CODES.length)
304
+ return '';
305
+ // Source codes are enum-constrained upstream; escape at the sink regardless
306
+ // so the IN-list stays injection-safe independent of the caller.
307
+ const list = sources.map((s) => `'${this.escapeLiteral(s)}'`).join(', ');
308
+ return ` AND d.source IN (${list})`;
309
+ }
310
+ runStrict(handle, args) {
311
+ const match = buildFtsMatch(args.normalizedQuery);
312
+ if (!match)
313
+ return [];
314
+ // FTS over the name index; join back to name + designation. Classify each
315
+ // matched name as exact (normalized equality) or strong (all tokens present).
316
+ const rows = handle
317
+ .prepare(`SELECT n.designation_id, n.name, n.normalized, n.phonetic, n.name_type,
318
+ d.source, d.source_entry_id, d.entity_type, d.primary_name,
319
+ d.program, d.designation_date
320
+ FROM ${NAME_FTS_TABLE} f
321
+ JOIN ${NAME_TABLE} n ON n.rowid = f.rowid
322
+ JOIN designation d ON d.id = n.designation_id
323
+ WHERE ${NAME_FTS_TABLE} MATCH ?${args.sourceFilter}${args.typeFilter}
324
+ LIMIT 5000`)
325
+ .all(match);
326
+ const byDesignation = new Map();
327
+ for (const row of rows) {
328
+ const isExact = row.normalized === args.normalizedQuery;
329
+ const hit = this.rowToHit(row, isExact ? 'exact' : 'strong');
330
+ const existing = byDesignation.get(row.designation_id);
331
+ // Prefer the higher-confidence match type per designation.
332
+ if (!existing || (isExact && existing.matchType !== 'exact')) {
333
+ byDesignation.set(row.designation_id, hit);
334
+ }
335
+ }
336
+ // Exact hits first, then strong; stable within each band.
337
+ return [...byDesignation.values()].sort((a, b) => matchRank(b.matchType) - matchRank(a.matchType));
338
+ }
339
+ runFuzzy(handle, args) {
340
+ const queryPhonetic = doubleMetaphone(args.normalizedQuery);
341
+ const phoneticKeys = [...new Set(queryPhonetic.split(/\s+/).filter(Boolean))];
342
+ // Candidate pool from two blocking strategies, each query SEPARATELY so no
343
+ // one strategy starves the others under the row cap:
344
+ // (a) phonetic-key equality — catches transliteration-class variants whose
345
+ // Jaro-Winkler similarity is below the floor (e.g. Mohammed/Muhammad).
346
+ // (b) a leading-trigram prefix shared with any query token — pulls the
347
+ // JW-near candidates whose phonetic key differs (e.g. Volkov/Volkow).
348
+ // A single OR'd query with one shared LIMIT let the first clause (e.g. a
349
+ // common given-name prefix) exhaust the cap before later tokens' rows were
350
+ // scanned, dropping the true multi-token match from the pool entirely
351
+ // (e.g. "nikolas maduro moros" never reaching "MADURO MOROS Nicolas"). Each
352
+ // strategy now gets its own bounded query and the rows are merged, deduped by
353
+ // rowid — every query token contributes candidates.
354
+ const select = `SELECT n.rowid AS rowid, n.designation_id, n.name, n.normalized,
355
+ n.phonetic, n.name_type, d.source, d.source_entry_id, d.entity_type,
356
+ d.primary_name, d.program, d.designation_date
357
+ FROM ${NAME_TABLE} n
358
+ JOIN designation d ON d.id = n.designation_id`;
359
+ const prefixes = [
360
+ ...new Set(args.queryTokens.map((t) => t.slice(0, 3)).filter((p) => p.length >= 2)),
361
+ ];
362
+ // Per-strategy budget keeps total work bounded while guaranteeing fair
363
+ // representation; the final scored set is still capped to `args.cap`.
364
+ const perStrategyLimit = Math.max(args.cap * 4, 200);
365
+ const byRowid = new Map();
366
+ const collect = (rows) => {
367
+ for (const r of rows)
368
+ if (!byRowid.has(r.rowid))
369
+ byRowid.set(r.rowid, r);
370
+ };
371
+ if (phoneticKeys.length > 0) {
372
+ const placeholders = phoneticKeys.map(() => '?').join(', ');
373
+ collect(handle
374
+ .prepare(`${select} WHERE n.phonetic IN (${placeholders})${args.sourceFilter}${args.typeFilter} LIMIT ?`)
375
+ .all(...phoneticKeys, perStrategyLimit));
376
+ }
377
+ for (const prefix of prefixes) {
378
+ collect(handle
379
+ .prepare(`${select} WHERE n.normalized LIKE ?${args.sourceFilter}${args.typeFilter} LIMIT ?`)
380
+ .all(`%${prefix}%`, perStrategyLimit));
381
+ }
382
+ if (byRowid.size === 0)
383
+ return [];
384
+ const rows = [...byRowid.values()];
385
+ const phoneticSet = new Set(phoneticKeys);
386
+ const scored = [];
387
+ for (const row of rows) {
388
+ const candidateTokens = tokenize(row.normalized);
389
+ const tokenScore = bestTokenScore(args.queryTokens, candidateTokens);
390
+ const wholeScore = jaroWinkler(args.normalizedQuery, row.normalized);
391
+ const score = Math.max(tokenScore, wholeScore);
392
+ const phoneticHit = row.phonetic.split(/\s+/).some((k) => k && phoneticSet.has(k));
393
+ if (score >= args.minScore || phoneticHit) {
394
+ const hit = this.rowToHit(row, 'approximate');
395
+ hit.score = Number(score.toFixed(4));
396
+ scored.push(hit);
397
+ }
398
+ }
399
+ // Best score per designation, then sort by score desc, cap.
400
+ const byDesignation = new Map();
401
+ for (const hit of scored) {
402
+ const existing = byDesignation.get(hit.designationId);
403
+ if (!existing || (hit.score ?? 0) > (existing.score ?? 0)) {
404
+ byDesignation.set(hit.designationId, hit);
405
+ }
406
+ }
407
+ return [...byDesignation.values()]
408
+ .sort((a, b) => (b.score ?? 0) - (a.score ?? 0))
409
+ .slice(0, args.cap);
410
+ }
411
+ mergeHits(strict, fuzzy) {
412
+ const seen = new Set(strict.map((h) => h.designationId));
413
+ const out = [...strict];
414
+ for (const hit of fuzzy) {
415
+ if (!seen.has(hit.designationId)) {
416
+ out.push(hit);
417
+ seen.add(hit.designationId);
418
+ }
419
+ }
420
+ return out;
421
+ }
422
+ rowToHit(row, matchType) {
423
+ return {
424
+ designationId: row.designation_id,
425
+ source: row.source,
426
+ sourceEntryId: row.source_entry_id,
427
+ entityType: row.entity_type,
428
+ primaryName: row.primary_name,
429
+ matchedName: row.name,
430
+ matchedNameType: row.name_type,
431
+ matchType,
432
+ ...(row.program ? { program: row.program } : {}),
433
+ ...(row.designation_date ? { designationDate: row.designation_date } : {}),
434
+ };
435
+ }
436
+ // ─── Designation detail ────────────────────────────────────────────────────
437
+ /** Full normalized designation by source + entry id, or null if absent. */
438
+ async getDesignation(source, entryId) {
439
+ const rows = await this.designationMirror.getByIds([`${source}:${entryId}`]);
440
+ const row = rows[0];
441
+ if (!row)
442
+ return null;
443
+ return {
444
+ id: String(row.id),
445
+ source: String(row.source),
446
+ sourceEntryId: String(row.source_entry_id),
447
+ entityType: String(row.entity_type),
448
+ primaryName: String(row.primary_name),
449
+ ...(row.program ? { program: String(row.program) } : {}),
450
+ ...(row.legal_basis ? { legalBasis: String(row.legal_basis) } : {}),
451
+ ...(row.designation_date ? { designationDate: String(row.designation_date) } : {}),
452
+ payload: JSON.parse(String(row.payload)),
453
+ };
454
+ }
455
+ // ─── LEI resolution ──────────────────────────────────────────────────────
456
+ /** Resolve a company name to ranked GLEIF LEI candidates. */
457
+ async resolveEntity(opts, ctx) {
458
+ const normalizedQuery = fold(opts.query);
459
+ const queryTokens = tokenize(normalizedQuery);
460
+ const handle = await this.leiHandle();
461
+ const filters = [];
462
+ if (opts.jurisdiction)
463
+ filters.push(`e.jurisdiction = '${this.escapeLiteral(opts.jurisdiction)}'`);
464
+ if (opts.status === 'issued')
465
+ filters.push(`UPPER(e.status) = 'ISSUED'`);
466
+ else if (opts.status === 'lapsed')
467
+ filters.push(`UPPER(e.status) != 'ISSUED'`);
468
+ const filterClause = filters.length ? ` AND ${filters.join(' AND ')}` : '';
469
+ const strict = this.runLeiStrict(handle, { normalizedQuery, filterClause, limit: opts.limit });
470
+ const wantFuzzy = opts.matchMode === 'fuzzy' || strict.length === 0;
471
+ if (!wantFuzzy || queryTokens.length === 0) {
472
+ return {
473
+ matches: strict.slice(0, opts.limit),
474
+ modeUsed: 'strict',
475
+ normalizedQuery,
476
+ fuzzyFallbackTriggered: false,
477
+ };
478
+ }
479
+ const minScore = opts.minScore ?? this.config.fuzzyMinScore;
480
+ const fuzzy = this.runLeiFuzzy(handle, {
481
+ normalizedQuery,
482
+ queryTokens,
483
+ filterClause,
484
+ minScore,
485
+ cap: this.config.fuzzyMaxResults,
486
+ });
487
+ const seen = new Set(strict.map((m) => m.lei));
488
+ const merged = [...strict, ...fuzzy.filter((m) => !seen.has(m.lei))];
489
+ ctx.log.debug('LEI resolution complete', {
490
+ normalizedQuery,
491
+ strictCount: strict.length,
492
+ fuzzyCount: fuzzy.length,
493
+ });
494
+ return {
495
+ matches: merged.slice(0, opts.limit),
496
+ modeUsed: 'fuzzy',
497
+ normalizedQuery,
498
+ fuzzyFallbackTriggered: opts.matchMode === 'strict' && strict.length === 0,
499
+ };
500
+ }
501
+ runLeiStrict(handle, args) {
502
+ const match = buildFtsMatch(args.normalizedQuery);
503
+ if (!match)
504
+ return [];
505
+ const rows = handle
506
+ .prepare(`SELECT e.lei, e.legal_name, e.normalized_name, e.other_names, e.jurisdiction, e.status
507
+ FROM ${leiStoreSpec.table}_fts f
508
+ JOIN ${leiStoreSpec.table} e ON e.rowid = f.rowid
509
+ WHERE ${leiStoreSpec.table}_fts MATCH ?${args.filterClause}
510
+ LIMIT 2000`)
511
+ .all(match);
512
+ return rows
513
+ .map((row) => {
514
+ const isExact = row.normalized_name === args.normalizedQuery;
515
+ return this.leiRowToMatch(row, isExact ? 'exact' : 'strong', row.legal_name);
516
+ })
517
+ .sort((a, b) => matchRank(b.matchType) - matchRank(a.matchType));
518
+ }
519
+ runLeiFuzzy(handle, args) {
520
+ // Block on EVERY query token's leading 3 chars, one bounded query each, then
521
+ // merge deduped by LEI. Blocking on only the first token starved the pool
522
+ // when the first token wasn't the entity's leading word (order swaps) or was
523
+ // a common word that exhausted the cap before the distinctive token's rows.
524
+ const prefixes = [
525
+ ...new Set(args.queryTokens.map((t) => t.slice(0, 3)).filter((p) => p.length >= 2)),
526
+ ];
527
+ const perStrategyLimit = Math.max(args.cap * 4, 200);
528
+ const byLei = new Map();
529
+ for (const prefix of prefixes) {
530
+ const part = handle
531
+ .prepare(`SELECT e.lei, e.legal_name, e.normalized_name, e.other_names, e.jurisdiction, e.status
532
+ FROM ${leiStoreSpec.table} e
533
+ WHERE e.normalized_name LIKE ?${args.filterClause}
534
+ LIMIT ?`)
535
+ .all(`%${prefix}%`, perStrategyLimit);
536
+ for (const r of part)
537
+ if (!byLei.has(r.lei))
538
+ byLei.set(r.lei, r);
539
+ }
540
+ const rows = [...byLei.values()];
541
+ const scored = [];
542
+ for (const row of rows) {
543
+ const names = [row.legal_name, ...JSON.parse(row.other_names || '[]')];
544
+ let best = 0;
545
+ let bestName = row.legal_name;
546
+ for (const name of names) {
547
+ const folded = fold(name);
548
+ const s = Math.max(jaroWinkler(args.normalizedQuery, folded), bestTokenScore(args.queryTokens, tokenize(folded)));
549
+ if (s > best) {
550
+ best = s;
551
+ bestName = name;
552
+ }
553
+ }
554
+ if (best >= args.minScore) {
555
+ const m = this.leiRowToMatch(row, 'approximate', bestName);
556
+ m.score = Number(best.toFixed(4));
557
+ scored.push(m);
558
+ }
559
+ }
560
+ return scored.sort((a, b) => (b.score ?? 0) - (a.score ?? 0)).slice(0, args.cap);
561
+ }
562
+ leiRowToMatch(row, matchType, matchedName) {
563
+ return {
564
+ lei: row.lei,
565
+ legalName: row.legal_name,
566
+ matchedName,
567
+ matchType,
568
+ ...(row.jurisdiction ? { jurisdiction: row.jurisdiction } : {}),
569
+ ...(row.status ? { status: row.status } : {}),
570
+ };
571
+ }
572
+ /** Full GLEIF Level 1 entity by LEI, or null. */
573
+ async getLeiEntity(lei) {
574
+ const rows = await this.leiMirror.getByIds([lei]);
575
+ const row = rows[0];
576
+ if (!row?.payload)
577
+ return null;
578
+ return JSON.parse(String(row.payload));
579
+ }
580
+ /** Direct relationship edges for an LEI in the requested direction(s). */
581
+ async getRelationships(lei, direction) {
582
+ const handle = await this.leiHandle();
583
+ const out = [];
584
+ const mapRel = (r) => ({
585
+ childLei: r.child_lei,
586
+ parentLei: r.parent_lei,
587
+ relationshipType: r.relationship_type,
588
+ ...(r.relationship_status ? { relationshipStatus: r.relationship_status } : {}),
589
+ ...(r.relationship_period ? { relationshipPeriod: r.relationship_period } : {}),
590
+ });
591
+ if (direction === 'parents' || direction === 'both') {
592
+ out.push(...handle
593
+ .prepare(`SELECT * FROM ${LEI_RELATIONSHIP_TABLE} WHERE child_lei = ?`)
594
+ .all(lei)
595
+ .map(mapRel));
596
+ }
597
+ if (direction === 'children' || direction === 'both') {
598
+ out.push(...handle
599
+ .prepare(`SELECT * FROM ${LEI_RELATIONSHIP_TABLE} WHERE parent_lei = ?`)
600
+ .all(lei)
601
+ .map(mapRel));
602
+ }
603
+ return out;
604
+ }
605
+ /** Hydrate multiple LEIs to name/jurisdiction/status, preserving order. */
606
+ async getLeiEntitiesBatch(leis) {
607
+ if (leis.length === 0)
608
+ return [];
609
+ const rows = await this.leiMirror.getByIds(leis);
610
+ return rows
611
+ .filter((r) => r.payload)
612
+ .map((r) => JSON.parse(String(r.payload)));
613
+ }
614
+ // ─── Sources / freshness ───────────────────────────────────────────────────
615
+ /** Per-source record counts in the sanctions mirror. */
616
+ async sourceCounts() {
617
+ const handle = await this.designationHandle();
618
+ const rows = handle
619
+ .prepare(`SELECT source, COUNT(*) AS n FROM designation GROUP BY source`)
620
+ .all();
621
+ const bySource = new Map(rows.map((r) => [r.source, r.n]));
622
+ return SOURCE_CODES.map((code) => ({ code, recordCount: bySource.get(code) ?? 0 }));
623
+ }
624
+ /** Sanctions mirror readiness + freshness. */
625
+ async sanctionsReadiness() {
626
+ return this.toReadiness(await this.designationMirror.status());
627
+ }
628
+ /** GLEIF mirror readiness + freshness, plus L1/L2 counts. */
629
+ async leiReadiness() {
630
+ const status = this.toReadiness(await this.leiMirror.status());
631
+ const handle = await this.leiHandle();
632
+ const entityCount = handle.prepare(`SELECT COUNT(*) AS n FROM ${leiStoreSpec.table}`).get()?.n ??
633
+ 0;
634
+ const relationshipCount = handle.prepare(`SELECT COUNT(*) AS n FROM ${LEI_RELATIONSHIP_TABLE}`).get()
635
+ ?.n ?? 0;
636
+ return { ...status, entityCount, relationshipCount };
637
+ }
638
+ toReadiness(s) {
639
+ return {
640
+ ready: s.ready,
641
+ total: s.total ?? 0,
642
+ status: s.status,
643
+ ...(s.completedAt ? { completedAt: s.completedAt } : {}),
644
+ ...(s.error ? { error: s.error } : {}),
645
+ };
646
+ }
647
+ escapeLiteral(value) {
648
+ return value.replace(/'/g, "''");
649
+ }
650
+ /** Close both mirrors (lifecycle scripts / shutdown). */
651
+ async close() {
652
+ await Promise.allSettled([this.designationMirror.close(), this.leiMirror.close()]);
653
+ }
654
+ }
655
+ /** Rank for sorting match types (exact > strong > approximate). */
656
+ function matchRank(type) {
657
+ return type === 'exact' ? 3 : type === 'strong' ? 2 : 1;
658
+ }
659
+ /**
660
+ * Derive the GLEIF mirror's database path from the configured sanctions path by
661
+ * inserting `.gleif` before the extension (`./data/sanctions.db` →
662
+ * `./data/sanctions.gleif.db`). Keeps the two mirrors' sync-state independent.
663
+ */
664
+ function gleifPath(sanctionsPath) {
665
+ const dot = sanctionsPath.lastIndexOf('.');
666
+ const slash = Math.max(sanctionsPath.lastIndexOf('/'), sanctionsPath.lastIndexOf('\\'));
667
+ return dot > slash
668
+ ? `${sanctionsPath.slice(0, dot)}.gleif${sanctionsPath.slice(dot)}`
669
+ : `${sanctionsPath}.gleif`;
670
+ }
671
+ /**
672
+ * A {@link SyncGenerator} that yields no pages — the GLEIF mirror's sync. GLEIF
673
+ * data is ingested via the service's `ingestLeiEntities`/`ingestLeiRelationships`
674
+ * methods (called by the lifecycle scripts), not through `runSync`.
675
+ */
676
+ async function* emptySync() {
677
+ yield* [];
678
+ }
679
+ // ─── Init / accessor ─────────────────────────────────────────────────────────
680
+ let _service;
681
+ /** Initialize the screening service. Call from `createApp()` `setup()`. */
682
+ export function initScreeningService(_config, _storage) {
683
+ _service = new ScreeningService(getServerConfig());
684
+ }
685
+ /** Access the screening service; throws if not initialized. */
686
+ export function getScreeningService() {
687
+ if (!_service) {
688
+ throw new Error('ScreeningService not initialized — call initScreeningService() in setup()');
689
+ }
690
+ return _service;
691
+ }
692
+ /** Build a standalone instance (lifecycle scripts run outside createApp). */
693
+ export function buildScreeningService() {
694
+ return new ScreeningService(getServerConfig());
695
+ }
696
+ /** Reset — test isolation only. */
697
+ export function resetScreeningService() {
698
+ _service = undefined;
699
+ }
700
+ /** The framework logger, re-exported for lifecycle scripts. */
701
+ export { logger };
702
+ //# sourceMappingURL=screening-service.js.map