@cyanheads/sanctions-screening-mcp-server 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +452 -0
- package/CLAUDE.md +452 -0
- package/Dockerfile +126 -0
- package/LICENSE +201 -0
- package/README.md +354 -0
- package/changelog/0.1.x/0.1.0.md +26 -0
- package/changelog/template.md +127 -0
- package/dist/config/server-config.d.ts +37 -0
- package/dist/config/server-config.d.ts.map +1 -0
- package/dist/config/server-config.js +87 -0
- package/dist/config/server-config.js.map +1 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +70 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp-server/prompts/definitions/index.d.ts +12 -0
- package/dist/mcp-server/prompts/definitions/index.d.ts.map +1 -0
- package/dist/mcp-server/prompts/definitions/index.js +9 -0
- package/dist/mcp-server/prompts/definitions/index.js.map +1 -0
- package/dist/mcp-server/prompts/definitions/vet-counterparty.prompt.d.ts +14 -0
- package/dist/mcp-server/prompts/definitions/vet-counterparty.prompt.d.ts.map +1 -0
- package/dist/mcp-server/prompts/definitions/vet-counterparty.prompt.js +42 -0
- package/dist/mcp-server/prompts/definitions/vet-counterparty.prompt.js.map +1 -0
- package/dist/mcp-server/resources/definitions/designation.resource.d.ts +25 -0
- package/dist/mcp-server/resources/definitions/designation.resource.d.ts.map +1 -0
- package/dist/mcp-server/resources/definitions/designation.resource.js +57 -0
- package/dist/mcp-server/resources/definitions/designation.resource.js.map +1 -0
- package/dist/mcp-server/resources/definitions/entity.resource.d.ts +17 -0
- package/dist/mcp-server/resources/definitions/entity.resource.d.ts.map +1 -0
- package/dist/mcp-server/resources/definitions/entity.resource.js +40 -0
- package/dist/mcp-server/resources/definitions/entity.resource.js.map +1 -0
- package/dist/mcp-server/resources/definitions/index.d.ts +32 -0
- package/dist/mcp-server/resources/definitions/index.d.ts.map +1 -0
- package/dist/mcp-server/resources/definitions/index.js +11 -0
- package/dist/mcp-server/resources/definitions/index.js.map +1 -0
- package/dist/mcp-server/resources/definitions/sources.resource.d.ts +9 -0
- package/dist/mcp-server/resources/definitions/sources.resource.d.ts.map +1 -0
- package/dist/mcp-server/resources/definitions/sources.resource.js +50 -0
- package/dist/mcp-server/resources/definitions/sources.resource.js.map +1 -0
- package/dist/mcp-server/tools/definitions/_shared.d.ts +13 -0
- package/dist/mcp-server/tools/definitions/_shared.d.ts.map +1 -0
- package/dist/mcp-server/tools/definitions/_shared.js +13 -0
- package/dist/mcp-server/tools/definitions/_shared.js.map +1 -0
- package/dist/mcp-server/tools/definitions/get-designation.tool.d.ts +78 -0
- package/dist/mcp-server/tools/definitions/get-designation.tool.d.ts.map +1 -0
- package/dist/mcp-server/tools/definitions/get-designation.tool.js +168 -0
- package/dist/mcp-server/tools/definitions/get-designation.tool.js.map +1 -0
- package/dist/mcp-server/tools/definitions/get-entity.tool.d.ts +55 -0
- package/dist/mcp-server/tools/definitions/get-entity.tool.d.ts.map +1 -0
- package/dist/mcp-server/tools/definitions/get-entity.tool.js +176 -0
- package/dist/mcp-server/tools/definitions/get-entity.tool.js.map +1 -0
- package/dist/mcp-server/tools/definitions/index.d.ts +306 -0
- package/dist/mcp-server/tools/definitions/index.d.ts.map +1 -0
- package/dist/mcp-server/tools/definitions/index.js +21 -0
- package/dist/mcp-server/tools/definitions/index.js.map +1 -0
- package/dist/mcp-server/tools/definitions/list-sources.tool.d.ts +23 -0
- package/dist/mcp-server/tools/definitions/list-sources.tool.d.ts.map +1 -0
- package/dist/mcp-server/tools/definitions/list-sources.tool.js +106 -0
- package/dist/mcp-server/tools/definitions/list-sources.tool.js.map +1 -0
- package/dist/mcp-server/tools/definitions/resolve-entity.tool.d.ts +51 -0
- package/dist/mcp-server/tools/definitions/resolve-entity.tool.d.ts.map +1 -0
- package/dist/mcp-server/tools/definitions/resolve-entity.tool.js +148 -0
- package/dist/mcp-server/tools/definitions/resolve-entity.tool.js.map +1 -0
- package/dist/mcp-server/tools/definitions/screen-name.tool.d.ts +82 -0
- package/dist/mcp-server/tools/definitions/screen-name.tool.d.ts.map +1 -0
- package/dist/mcp-server/tools/definitions/screen-name.tool.js +172 -0
- package/dist/mcp-server/tools/definitions/screen-name.tool.js.map +1 -0
- package/dist/mcp-server/tools/definitions/trace-ownership.tool.d.ts +74 -0
- package/dist/mcp-server/tools/definitions/trace-ownership.tool.d.ts.map +1 -0
- package/dist/mcp-server/tools/definitions/trace-ownership.tool.js +273 -0
- package/dist/mcp-server/tools/definitions/trace-ownership.tool.js.map +1 -0
- package/dist/services/screening/fixtures.d.ts +17 -0
- package/dist/services/screening/fixtures.d.ts.map +1 -0
- package/dist/services/screening/fixtures.js +162 -0
- package/dist/services/screening/fixtures.js.map +1 -0
- package/dist/services/screening/gleif-ingest.d.ts +68 -0
- package/dist/services/screening/gleif-ingest.d.ts.map +1 -0
- package/dist/services/screening/gleif-ingest.js +251 -0
- package/dist/services/screening/gleif-ingest.js.map +1 -0
- package/dist/services/screening/sanctions-ingest.d.ts +46 -0
- package/dist/services/screening/sanctions-ingest.d.ts.map +1 -0
- package/dist/services/screening/sanctions-ingest.js +688 -0
- package/dist/services/screening/sanctions-ingest.js.map +1 -0
- package/dist/services/screening/schema.d.ts +52 -0
- package/dist/services/screening/schema.d.ts.map +1 -0
- package/dist/services/screening/schema.js +125 -0
- package/dist/services/screening/schema.js.map +1 -0
- package/dist/services/screening/screening-service.d.ts +203 -0
- package/dist/services/screening/screening-service.d.ts.map +1 -0
- package/dist/services/screening/screening-service.js +702 -0
- package/dist/services/screening/screening-service.js.map +1 -0
- package/dist/services/screening/text-matching.d.ts +53 -0
- package/dist/services/screening/text-matching.d.ts.map +1 -0
- package/dist/services/screening/text-matching.js +514 -0
- package/dist/services/screening/text-matching.js.map +1 -0
- package/dist/services/screening/types.d.ts +154 -0
- package/dist/services/screening/types.d.ts.map +1 -0
- package/dist/services/screening/types.js +24 -0
- package/dist/services/screening/types.js.map +1 -0
- package/dist/services/screening/xml.d.ts +29 -0
- package/dist/services/screening/xml.d.ts.map +1 -0
- package/dist/services/screening/xml.js +46 -0
- package/dist/services/screening/xml.js.map +1 -0
- package/package.json +119 -0
- package/scripts/_mirror-context.ts +21 -0
- package/scripts/mirror-init.ts +66 -0
- package/scripts/mirror-refresh.ts +56 -0
- package/scripts/mirror-seed.ts +36 -0
- package/scripts/mirror-verify.ts +44 -0
- package/server.json +148 -0
|
@@ -0,0 +1,688 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Sanctions ingesters — one per source (OFAC SDN, OFAC
|
|
3
|
+
* Consolidated, EU FSF, UK Sanctions List, UN SC Consolidated). Each fetches the
|
|
4
|
+
* source file, parses the XML, and maps records onto the common
|
|
5
|
+
* {@link NormalizedDesignation} schema. The {@link createSanctionsSync} factory
|
|
6
|
+
* wires them into the MirrorService `sync` generator: each refresh re-harvests
|
|
7
|
+
* all sources in full (the combined corpus is tens of thousands of rows — no
|
|
8
|
+
* delta logic needed), yielding one page per source.
|
|
9
|
+
*
|
|
10
|
+
* The XML shapes differ wildly across sources; each parser is defensive about
|
|
11
|
+
* sparsity and arrays-of-one (fast-xml-parser collapses single children to
|
|
12
|
+
* objects), and preserves absence rather than fabricating fields.
|
|
13
|
+
* @module services/screening/sanctions-ingest
|
|
14
|
+
*/
|
|
15
|
+
import { serviceUnavailable } from '@cyanheads/mcp-ts-core/errors';
|
|
16
|
+
import { fetchWithTimeout, requestContextService, withRetry } from '@cyanheads/mcp-ts-core/utils';
|
|
17
|
+
import { getServerConfig } from '../../config/server-config.js';
|
|
18
|
+
import { fold } from '../../services/screening/text-matching.js';
|
|
19
|
+
import { parseXml } from '../../services/screening/xml.js';
|
|
20
|
+
/** Browser-style UA — the UN SC domain returns 404 to bare requests. */
|
|
21
|
+
const BROWSER_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36';
|
|
22
|
+
const FETCH_TIMEOUT_MS = 120_000;
|
|
23
|
+
/** Coerce fast-xml-parser's "single child → object, many → array" into an array. */
|
|
24
|
+
function asArray(value) {
|
|
25
|
+
if (value == null)
|
|
26
|
+
return [];
|
|
27
|
+
return Array.isArray(value) ? value : [value];
|
|
28
|
+
}
|
|
29
|
+
/** Coerce a scalar XML node (string/number/object-with-#text) to a trimmed string. */
|
|
30
|
+
function asText(value) {
|
|
31
|
+
if (value == null)
|
|
32
|
+
return;
|
|
33
|
+
if (typeof value === 'string')
|
|
34
|
+
return value.trim() || undefined;
|
|
35
|
+
if (typeof value === 'number')
|
|
36
|
+
return String(value);
|
|
37
|
+
if (typeof value === 'object' && '#text' in value) {
|
|
38
|
+
return asText(value['#text']);
|
|
39
|
+
}
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Conditional-spread fragment: `{ [key]: value }` when `value` is defined and
|
|
44
|
+
* non-empty, else `{}`. Keeps the normalized objects honest under
|
|
45
|
+
* `exactOptionalPropertyTypes` (absent rather than `undefined`) without a
|
|
46
|
+
* double `asText` call or a non-null assertion.
|
|
47
|
+
*/
|
|
48
|
+
function opt(key, value) {
|
|
49
|
+
return value ? { [key]: value } : {};
|
|
50
|
+
}
|
|
51
|
+
/** Fetch text with a browser UA, retry, and HTML-error-page detection. */
|
|
52
|
+
function fetchXml(url, signal, source) {
|
|
53
|
+
const reqCtx = requestContextService.createRequestContext({ operation: `harvest:${source}` });
|
|
54
|
+
return withRetry(async () => {
|
|
55
|
+
const response = await fetchWithTimeout(url, FETCH_TIMEOUT_MS, reqCtx, {
|
|
56
|
+
signal,
|
|
57
|
+
headers: { 'User-Agent': BROWSER_UA, Accept: 'application/xml, text/xml, */*' },
|
|
58
|
+
redirect: 'follow',
|
|
59
|
+
});
|
|
60
|
+
const text = await response.text();
|
|
61
|
+
if (/^\s*<(!DOCTYPE\s+html|html[\s>])/i.test(text)) {
|
|
62
|
+
throw serviceUnavailable(`${source} returned HTML instead of XML — likely rate-limited.`);
|
|
63
|
+
}
|
|
64
|
+
return text;
|
|
65
|
+
}, { operation: `harvest:${source}`, baseDelayMs: 2000, signal });
|
|
66
|
+
}
|
|
67
|
+
// ─── OFAC (SDN + Consolidated, advanced UN 1267/1988 schema) ────────────────────
|
|
68
|
+
/**
|
|
69
|
+
* Parse one OFAC advanced-schema `<distinctParty>` (or the standard fallback
|
|
70
|
+
* `<sdnEntry>`) shape. The advanced schema is the richest; we read the common
|
|
71
|
+
* fields and keep the rest in `payload`. Robust to the two shapes via duck typing.
|
|
72
|
+
*/
|
|
73
|
+
function buildOfacIngester(source, urlGetter) {
|
|
74
|
+
return {
|
|
75
|
+
source,
|
|
76
|
+
url: urlGetter,
|
|
77
|
+
async harvest(signal) {
|
|
78
|
+
const xml = await fetchXml(urlGetter(), signal, source);
|
|
79
|
+
const doc = parseXml(xml);
|
|
80
|
+
return parseOfac(doc, source);
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
export function parseOfac(doc, source) {
|
|
85
|
+
// Standard schema: <sdnList><sdnEntry>. Advanced: <Sanctions><DistinctParties>.
|
|
86
|
+
const sdnList = (doc.sdnList ?? doc.SDNList);
|
|
87
|
+
if (sdnList) {
|
|
88
|
+
return asArray(sdnList.sdnEntry).map((e) => parseOfacStandard(e, source));
|
|
89
|
+
}
|
|
90
|
+
// Advanced schema (the configured default — SDN_ADVANCED.XML / CONS_ADVANCED.XML).
|
|
91
|
+
const sanctions = (doc.Sanctions ?? doc.sanctions);
|
|
92
|
+
if (!sanctions)
|
|
93
|
+
return [];
|
|
94
|
+
const refs = buildOfacReferenceSets(sanctions);
|
|
95
|
+
const programsByProfile = buildOfacProgramIndex(sanctions);
|
|
96
|
+
const parties = sanctions.DistinctParties;
|
|
97
|
+
return asArray(parties?.DistinctParty)
|
|
98
|
+
.map((p) => parseOfacAdvanced(p, source, refs, programsByProfile))
|
|
99
|
+
.filter(Boolean);
|
|
100
|
+
}
|
|
101
|
+
function buildOfacReferenceSets(sanctions) {
|
|
102
|
+
const sets = (sanctions.ReferenceValueSets ?? {});
|
|
103
|
+
const aliasType = new Map();
|
|
104
|
+
for (const a of asArray(sets.AliasTypeValues?.AliasType)) {
|
|
105
|
+
const id = asText(a['@_ID']);
|
|
106
|
+
const label = asText(a['#text'] ?? a);
|
|
107
|
+
if (id && label)
|
|
108
|
+
aliasType.set(id, label);
|
|
109
|
+
}
|
|
110
|
+
const featureType = new Map();
|
|
111
|
+
for (const f of asArray(sets.FeatureTypeValues?.FeatureType)) {
|
|
112
|
+
const id = asText(f['@_ID']);
|
|
113
|
+
const label = asText(f['#text'] ?? f);
|
|
114
|
+
if (id && label)
|
|
115
|
+
featureType.set(id, label);
|
|
116
|
+
}
|
|
117
|
+
const subTypeToPartyType = new Map();
|
|
118
|
+
const subTypeLabel = new Map();
|
|
119
|
+
for (const s of asArray(sets.PartySubTypeValues?.PartySubType)) {
|
|
120
|
+
const sub = s;
|
|
121
|
+
const id = asText(sub['@_ID']);
|
|
122
|
+
if (!id)
|
|
123
|
+
continue;
|
|
124
|
+
const partyTypeId = asText(sub['@_PartyTypeID']);
|
|
125
|
+
if (partyTypeId)
|
|
126
|
+
subTypeToPartyType.set(id, partyTypeId);
|
|
127
|
+
const label = asText(sub['#text'] ?? sub);
|
|
128
|
+
if (label)
|
|
129
|
+
subTypeLabel.set(id, label);
|
|
130
|
+
}
|
|
131
|
+
return { aliasType, featureType, subTypeToPartyType, subTypeLabel };
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Build a `profileId → { program, designationDate }` index from the advanced
|
|
135
|
+
* schema's `<SanctionsEntries>`. The programme name is published as a
|
|
136
|
+
* `<SanctionsMeasure><Comment>` and the designation date as the `<EntryEvent>`
|
|
137
|
+
* `<Date>` (Year/Month/Day elements). Keyed by `ProfileID` (== the DistinctParty
|
|
138
|
+
* `FixedRef`).
|
|
139
|
+
*/
|
|
140
|
+
function buildOfacProgramIndex(sanctions) {
|
|
141
|
+
const out = new Map();
|
|
142
|
+
const entries = (sanctions.SanctionsEntries ?? {});
|
|
143
|
+
for (const raw of asArray(entries.SanctionsEntry)) {
|
|
144
|
+
const e = raw;
|
|
145
|
+
const profileId = asText(e['@_ProfileID']);
|
|
146
|
+
if (!profileId)
|
|
147
|
+
continue;
|
|
148
|
+
const programs = asArray(e.SanctionsMeasure)
|
|
149
|
+
.map((m) => asText(m.Comment))
|
|
150
|
+
.filter((x) => Boolean(x));
|
|
151
|
+
const event = (e.EntryEvent ?? {});
|
|
152
|
+
const designationDate = composeOfacDate(event.Date);
|
|
153
|
+
const existing = out.get(profileId) ?? {};
|
|
154
|
+
out.set(profileId, {
|
|
155
|
+
...existing,
|
|
156
|
+
...(programs.length ? { program: programs.join(', ') } : {}),
|
|
157
|
+
...(designationDate ? { designationDate } : {}),
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
return out;
|
|
161
|
+
}
|
|
162
|
+
/** Compose an OFAC `<Date><Year>/<Month>/<Day></Date>` node into an ISO-ish string. */
|
|
163
|
+
function composeOfacDate(date) {
|
|
164
|
+
if (!date)
|
|
165
|
+
return;
|
|
166
|
+
const y = asText(date.Year);
|
|
167
|
+
if (!y)
|
|
168
|
+
return;
|
|
169
|
+
const m = asText(date.Month);
|
|
170
|
+
const d = asText(date.Day);
|
|
171
|
+
if (m && d)
|
|
172
|
+
return `${y}-${m.padStart(2, '0')}-${d.padStart(2, '0')}`;
|
|
173
|
+
if (m)
|
|
174
|
+
return `${y}-${m.padStart(2, '0')}`;
|
|
175
|
+
return y;
|
|
176
|
+
}
|
|
177
|
+
function parseOfacStandard(e, source) {
|
|
178
|
+
const uid = asText(e.uid) ?? crypto.randomUUID();
|
|
179
|
+
const first = asText(e.firstName);
|
|
180
|
+
const last = asText(e.lastName);
|
|
181
|
+
const sdnType = asText(e.sdnType)?.toLowerCase();
|
|
182
|
+
const primaryName = [first, last].filter(Boolean).join(' ').trim() || last || first || 'Unknown';
|
|
183
|
+
const aliases = asArray(e.akaList?.aka)
|
|
184
|
+
.map((aka) => {
|
|
185
|
+
const a = aka;
|
|
186
|
+
const an = [asText(a.firstName), asText(a.lastName)].filter(Boolean).join(' ').trim();
|
|
187
|
+
const category = asText(a.category)?.toLowerCase();
|
|
188
|
+
return {
|
|
189
|
+
name: an || asText(a.lastName) || '',
|
|
190
|
+
nameType: category === 'weak' ? 'low-quality-aka' : 'aka',
|
|
191
|
+
};
|
|
192
|
+
})
|
|
193
|
+
.filter((a) => a.name);
|
|
194
|
+
const identifiers = asArray(e.idList?.id)
|
|
195
|
+
.map((id) => {
|
|
196
|
+
const i = id;
|
|
197
|
+
return {
|
|
198
|
+
type: asText(i.idType) ?? 'ID',
|
|
199
|
+
value: asText(i.idNumber) ?? '',
|
|
200
|
+
...opt('country', asText(i.idCountry)),
|
|
201
|
+
};
|
|
202
|
+
})
|
|
203
|
+
.filter((i) => i.value);
|
|
204
|
+
const addresses = asArray(e.addressList?.address)
|
|
205
|
+
.map((addr) => {
|
|
206
|
+
const a = addr;
|
|
207
|
+
const parts = [
|
|
208
|
+
asText(a.address1),
|
|
209
|
+
asText(a.address2),
|
|
210
|
+
asText(a.city),
|
|
211
|
+
asText(a.stateOrProvince),
|
|
212
|
+
asText(a.postalCode),
|
|
213
|
+
asText(a.country),
|
|
214
|
+
].filter(Boolean);
|
|
215
|
+
return {
|
|
216
|
+
full: parts.join(', '),
|
|
217
|
+
...opt('country', asText(a.country)),
|
|
218
|
+
};
|
|
219
|
+
})
|
|
220
|
+
.filter((a) => a.full);
|
|
221
|
+
const dobs = asArray(e.dateOfBirthList?.dateOfBirthItem)
|
|
222
|
+
.map((d) => {
|
|
223
|
+
const dd = d;
|
|
224
|
+
return opt('date', asText(dd.dateOfBirth));
|
|
225
|
+
})
|
|
226
|
+
.filter((d) => d.date);
|
|
227
|
+
const nationalities = asArray(e.nationalityList?.nationality)
|
|
228
|
+
.map((n) => asText(n.country))
|
|
229
|
+
.filter((x) => Boolean(x));
|
|
230
|
+
const remarks = asText(e.remarks);
|
|
231
|
+
const designationDate = remarks ? extractDateFromRemarks(remarks) : undefined;
|
|
232
|
+
return {
|
|
233
|
+
id: `${source}:${uid}`,
|
|
234
|
+
source,
|
|
235
|
+
sourceEntryId: uid,
|
|
236
|
+
entityType: mapOfacType(sdnType),
|
|
237
|
+
primaryName,
|
|
238
|
+
...opt('program', asText(e.program)),
|
|
239
|
+
...(designationDate ? { designationDate } : {}),
|
|
240
|
+
payload: {
|
|
241
|
+
aliases,
|
|
242
|
+
identifiers,
|
|
243
|
+
addresses,
|
|
244
|
+
datesOfBirth: dobs,
|
|
245
|
+
nationalities,
|
|
246
|
+
...opt('remarks', remarks),
|
|
247
|
+
},
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Parse one advanced-schema `<DistinctParty>`. With attributes available this
|
|
252
|
+
* reads the stable `FixedRef` entry id, the entity type (via `PartySubTypeID` →
|
|
253
|
+
* `PartyType`), the primary name and typed aliases (via `AliasTypeID` /
|
|
254
|
+
* `LowQuality`), and dates/places of birth (via `Feature` type ids). The
|
|
255
|
+
* programme + designation date come from the `<SanctionsEntries>` index, keyed by
|
|
256
|
+
* profile id. Resilient to the deep nesting and to sparse records.
|
|
257
|
+
*/
|
|
258
|
+
function parseOfacAdvanced(p, source, refs, programsByProfile) {
|
|
259
|
+
const profile = (p.Profile ?? p.profile);
|
|
260
|
+
const id = asText(p['@_FixedRef']) ?? asText(p['@_ID']) ?? crypto.randomUUID();
|
|
261
|
+
const collected = [];
|
|
262
|
+
for (const ident of asArray((profile?.Identity ?? profile?.identity))) {
|
|
263
|
+
for (const aliasRaw of asArray(ident.Alias)) {
|
|
264
|
+
const alias = aliasRaw;
|
|
265
|
+
const aliasLabel = refs.aliasType.get(asText(alias['@_AliasTypeID']) ?? '');
|
|
266
|
+
const lowQuality = asText(alias['@_LowQuality']) === 'true';
|
|
267
|
+
const aliasPrimary = asText(alias['@_Primary']) === 'true';
|
|
268
|
+
for (const dn of asArray(alias.DocumentedName)) {
|
|
269
|
+
const parts = asArray(dn.DocumentedNamePart)
|
|
270
|
+
.map((np) => asText(np.NamePartValue?.['#text'] ?? np.NamePartValue))
|
|
271
|
+
.filter(Boolean);
|
|
272
|
+
const name = parts.join(' ').trim();
|
|
273
|
+
if (!name)
|
|
274
|
+
continue;
|
|
275
|
+
collected.push({
|
|
276
|
+
name,
|
|
277
|
+
isPrimary: aliasPrimary,
|
|
278
|
+
nameType: ofacAliasNameType(aliasLabel, lowQuality, aliasPrimary),
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
const firstName = collected[0];
|
|
284
|
+
if (!firstName)
|
|
285
|
+
return null;
|
|
286
|
+
// Primary = the alias flagged Primary (AliasTypeID 1403 "Name"); fall back to first.
|
|
287
|
+
const primaryEntry = collected.find((n) => n.isPrimary) ?? firstName;
|
|
288
|
+
const aliases = collected
|
|
289
|
+
.filter((n) => n !== primaryEntry)
|
|
290
|
+
.map((n) => ({ name: n.name, nameType: n.nameType }));
|
|
291
|
+
const { datesOfBirth, placesOfBirth } = extractOfacFeatures(profile, refs);
|
|
292
|
+
const program = programsByProfile.get(id);
|
|
293
|
+
return {
|
|
294
|
+
id: `${source}:${id}`,
|
|
295
|
+
source,
|
|
296
|
+
sourceEntryId: id,
|
|
297
|
+
entityType: mapOfacPartySubType(asText(profile?.['@_PartySubTypeID']), refs),
|
|
298
|
+
primaryName: primaryEntry.name,
|
|
299
|
+
...(program?.program ? { program: program.program } : {}),
|
|
300
|
+
...(program?.designationDate ? { designationDate: program.designationDate } : {}),
|
|
301
|
+
payload: {
|
|
302
|
+
aliases,
|
|
303
|
+
identifiers: [],
|
|
304
|
+
addresses: [],
|
|
305
|
+
datesOfBirth: datesOfBirth.length || placesOfBirth.length ? mergeDobPob(datesOfBirth, placesOfBirth) : [],
|
|
306
|
+
nationalities: [],
|
|
307
|
+
},
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
/** Map an advanced-schema alias to a normalized name type. */
|
|
311
|
+
function ofacAliasNameType(aliasLabel, lowQuality, isPrimary) {
|
|
312
|
+
if (isPrimary)
|
|
313
|
+
return 'primary';
|
|
314
|
+
if (lowQuality)
|
|
315
|
+
return 'low-quality-aka';
|
|
316
|
+
const label = aliasLabel?.toUpperCase().replace(/\./g, '');
|
|
317
|
+
if (label === 'FKA')
|
|
318
|
+
return 'fka';
|
|
319
|
+
return 'aka';
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* Map an advanced-schema `PartySubTypeID` to a coarse entity type. Vessel and
|
|
323
|
+
* Aircraft are explicit sub-types; otherwise the parent `PartyType` distinguishes
|
|
324
|
+
* Individual (person) from Entity (organization).
|
|
325
|
+
*/
|
|
326
|
+
function mapOfacPartySubType(subTypeId, refs) {
|
|
327
|
+
if (!subTypeId)
|
|
328
|
+
return 'unknown';
|
|
329
|
+
const subLabel = refs.subTypeLabel.get(subTypeId)?.toLowerCase();
|
|
330
|
+
if (subLabel === 'vessel')
|
|
331
|
+
return 'vessel';
|
|
332
|
+
if (subLabel === 'aircraft')
|
|
333
|
+
return 'aircraft';
|
|
334
|
+
const partyType = refs.subTypeToPartyType.get(subTypeId);
|
|
335
|
+
if (partyType === '1')
|
|
336
|
+
return 'person';
|
|
337
|
+
if (partyType === '2' || partyType === '5')
|
|
338
|
+
return 'organization';
|
|
339
|
+
if (partyType === '4')
|
|
340
|
+
return 'vessel'; // Transport without a specific sub-type
|
|
341
|
+
return 'unknown';
|
|
342
|
+
}
|
|
343
|
+
/** Birthdate / place-of-birth feature values pulled from a profile's `<Feature>`s. */
|
|
344
|
+
function extractOfacFeatures(profile, refs) {
|
|
345
|
+
const datesOfBirth = [];
|
|
346
|
+
const placesOfBirth = [];
|
|
347
|
+
for (const featRaw of asArray(profile?.Feature)) {
|
|
348
|
+
const feat = featRaw;
|
|
349
|
+
const label = refs.featureType.get(asText(feat['@_FeatureTypeID']) ?? '')?.toLowerCase();
|
|
350
|
+
if (label === 'birthdate') {
|
|
351
|
+
const date = ofacFeatureDate(feat);
|
|
352
|
+
if (date)
|
|
353
|
+
datesOfBirth.push(date);
|
|
354
|
+
}
|
|
355
|
+
else if (label === 'place of birth') {
|
|
356
|
+
const place = asText(feat.FeatureVersion?.VersionLocation);
|
|
357
|
+
// Place often lives as free text in the VersionDetail; capture what's there.
|
|
358
|
+
const detail = asText(feat.FeatureVersion?.VersionDetail?.['#text'] ?? feat.FeatureVersion?.VersionDetail);
|
|
359
|
+
const pob = detail ?? place;
|
|
360
|
+
if (pob)
|
|
361
|
+
placesOfBirth.push(pob);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
return { datesOfBirth, placesOfBirth };
|
|
365
|
+
}
|
|
366
|
+
/** Pull an ISO-ish birthdate out of a `<Feature>`'s nested `DatePeriod`. */
|
|
367
|
+
function ofacFeatureDate(feat) {
|
|
368
|
+
const version = (feat.FeatureVersion ?? {});
|
|
369
|
+
const period = (version.DatePeriod ?? {});
|
|
370
|
+
const start = (period.Start ?? {});
|
|
371
|
+
const from = (start.From ?? {});
|
|
372
|
+
return composeOfacDate(from);
|
|
373
|
+
}
|
|
374
|
+
/** Zip parallel DOB and POB lists into DobRecords (best-effort pairing by index). */
|
|
375
|
+
function mergeDobPob(dates, places) {
|
|
376
|
+
const len = Math.max(dates.length, places.length);
|
|
377
|
+
const out = [];
|
|
378
|
+
for (let i = 0; i < len; i++) {
|
|
379
|
+
out.push({ ...opt('date', dates[i]), ...opt('place', places[i]) });
|
|
380
|
+
}
|
|
381
|
+
return out.filter((d) => d.date || d.place);
|
|
382
|
+
}
|
|
383
|
+
function mapOfacType(t) {
|
|
384
|
+
switch (t) {
|
|
385
|
+
case 'individual':
|
|
386
|
+
return 'person';
|
|
387
|
+
case 'entity':
|
|
388
|
+
return 'organization';
|
|
389
|
+
case 'vessel':
|
|
390
|
+
return 'vessel';
|
|
391
|
+
case 'aircraft':
|
|
392
|
+
return 'aircraft';
|
|
393
|
+
default:
|
|
394
|
+
return 'unknown';
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
/** OFAC remarks embed the designation date; pull an ISO-ish date if present. */
|
|
398
|
+
function extractDateFromRemarks(remarks) {
|
|
399
|
+
const m = remarks.match(/(\d{1,2}\s+\w+\s+\d{4})|(\d{4}-\d{2}-\d{2})/);
|
|
400
|
+
return m ? m[0] : undefined;
|
|
401
|
+
}
|
|
402
|
+
// ─── EU (xmlFullSanctionsList_1_1) ──────────────────────────────────────────────
|
|
403
|
+
function buildEuIngester() {
|
|
404
|
+
return {
|
|
405
|
+
source: 'eu',
|
|
406
|
+
url: () => getServerConfig().euFsfUrl,
|
|
407
|
+
async harvest(signal) {
|
|
408
|
+
const xml = await fetchXml(getServerConfig().euFsfUrl, signal, 'eu');
|
|
409
|
+
const doc = parseXml(xml);
|
|
410
|
+
return parseEu(doc);
|
|
411
|
+
},
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
export function parseEu(doc) {
|
|
415
|
+
const root = (doc.export ?? doc);
|
|
416
|
+
const entities = asArray((root.sanctionEntity ?? root.SanctionEntity));
|
|
417
|
+
return entities
|
|
418
|
+
.map((raw) => {
|
|
419
|
+
const e = raw;
|
|
420
|
+
const id = asText(e['@_logicalId']) ?? asText(e['@_euReferenceNumber']) ?? crypto.randomUUID();
|
|
421
|
+
const subjectType = e.subjectType?.['@_code'];
|
|
422
|
+
const nameAliases = asArray(e.nameAlias)
|
|
423
|
+
.map((n) => {
|
|
424
|
+
const na = n;
|
|
425
|
+
const whole = asText(na['@_wholeName']);
|
|
426
|
+
const strong = asText(na['@_strong']);
|
|
427
|
+
return {
|
|
428
|
+
name: whole ??
|
|
429
|
+
[asText(na['@_firstName']), asText(na['@_lastName'])].filter(Boolean).join(' '),
|
|
430
|
+
strong: strong !== 'false',
|
|
431
|
+
};
|
|
432
|
+
})
|
|
433
|
+
.filter((n) => n.name);
|
|
434
|
+
const primary = nameAliases[0]?.name;
|
|
435
|
+
if (!primary)
|
|
436
|
+
return null;
|
|
437
|
+
const birthdates = asArray(e.birthdate)
|
|
438
|
+
.map((b) => asText(b['@_birthdate']))
|
|
439
|
+
.filter((x) => Boolean(x));
|
|
440
|
+
const citizenships = asArray(e.citizenship)
|
|
441
|
+
.map((c) => asText(c['@_countryDescription']))
|
|
442
|
+
.filter((x) => Boolean(x));
|
|
443
|
+
return {
|
|
444
|
+
id: `eu:${id}`,
|
|
445
|
+
source: 'eu',
|
|
446
|
+
sourceEntryId: id,
|
|
447
|
+
entityType: mapEuType(asText(subjectType)),
|
|
448
|
+
primaryName: primary,
|
|
449
|
+
...opt('program', asText(e.regulation?.['@_programme'])),
|
|
450
|
+
...opt('designationDate', asText(e.regulation?.['@_publicationDate'])),
|
|
451
|
+
payload: {
|
|
452
|
+
aliases: nameAliases.slice(1).map((n) => ({
|
|
453
|
+
name: n.name,
|
|
454
|
+
nameType: (n.strong ? 'aka' : 'low-quality-aka'),
|
|
455
|
+
})),
|
|
456
|
+
identifiers: [],
|
|
457
|
+
addresses: [],
|
|
458
|
+
datesOfBirth: birthdates.map((d) => ({ date: d })),
|
|
459
|
+
nationalities: citizenships,
|
|
460
|
+
},
|
|
461
|
+
};
|
|
462
|
+
})
|
|
463
|
+
.filter(Boolean);
|
|
464
|
+
}
|
|
465
|
+
function mapEuType(code) {
|
|
466
|
+
if (code === 'P' || code?.toLowerCase() === 'person')
|
|
467
|
+
return 'person';
|
|
468
|
+
if (code === 'E' || code?.toLowerCase() === 'enterprise')
|
|
469
|
+
return 'organization';
|
|
470
|
+
return 'unknown';
|
|
471
|
+
}
|
|
472
|
+
// ─── UK Sanctions List (UKSL, FCDO) ─────────────────────────────────────────────
|
|
473
|
+
function buildUkIngester() {
|
|
474
|
+
return {
|
|
475
|
+
source: 'uk',
|
|
476
|
+
url: () => getServerConfig().ukSanctionsUrl,
|
|
477
|
+
async harvest(signal) {
|
|
478
|
+
const xml = await fetchXml(getServerConfig().ukSanctionsUrl, signal, 'uk');
|
|
479
|
+
const doc = parseXml(xml);
|
|
480
|
+
return parseUk(doc);
|
|
481
|
+
},
|
|
482
|
+
};
|
|
483
|
+
}
|
|
484
|
+
export function parseUk(doc) {
|
|
485
|
+
// UKSL XML root is <Sanctions...><Designations><Designation>. Famously messy;
|
|
486
|
+
// be defensive about every field.
|
|
487
|
+
const root = (doc.Designations ?? doc.UKSanctionsList ?? doc);
|
|
488
|
+
const designations = asArray((root.Designation ?? root.designation));
|
|
489
|
+
const list = designations.length
|
|
490
|
+
? designations
|
|
491
|
+
: asArray(doc.Designations
|
|
492
|
+
?.Designation);
|
|
493
|
+
return list
|
|
494
|
+
.map((raw) => {
|
|
495
|
+
const d = raw;
|
|
496
|
+
const id = asText(d.UniqueID) ??
|
|
497
|
+
asText(d.OFSIGroupID) ??
|
|
498
|
+
asText(d['@_UniqueID']) ??
|
|
499
|
+
crypto.randomUUID();
|
|
500
|
+
const names = asArray(d.Names?.Name)
|
|
501
|
+
.map((n) => {
|
|
502
|
+
const nm = n;
|
|
503
|
+
const parts = [
|
|
504
|
+
asText(nm.Name1),
|
|
505
|
+
asText(nm.Name2),
|
|
506
|
+
asText(nm.Name3),
|
|
507
|
+
asText(nm.Name4),
|
|
508
|
+
asText(nm.Name5),
|
|
509
|
+
asText(nm.Name6),
|
|
510
|
+
].filter(Boolean);
|
|
511
|
+
const whole = asText(nm.NameType) && parts.length
|
|
512
|
+
? parts.join(' ')
|
|
513
|
+
: (asText(nm.WholeName) ?? parts.join(' '));
|
|
514
|
+
return { name: whole, type: asText(nm.NameType) };
|
|
515
|
+
})
|
|
516
|
+
.filter((n) => Boolean(n.name));
|
|
517
|
+
const fallbackName = asText(d.Name) ?? asText(d.Names?.WholeName);
|
|
518
|
+
const allNames = names.length
|
|
519
|
+
? names
|
|
520
|
+
: fallbackName
|
|
521
|
+
? [{ name: fallbackName, type: 'Primary name' }]
|
|
522
|
+
: [];
|
|
523
|
+
const primary = allNames[0]?.name;
|
|
524
|
+
if (!primary)
|
|
525
|
+
return null;
|
|
526
|
+
return {
|
|
527
|
+
id: `uk:${id}`,
|
|
528
|
+
source: 'uk',
|
|
529
|
+
sourceEntryId: id,
|
|
530
|
+
entityType: mapUkType(asText(d.IndividualEntityShip ?? d.GroupType)),
|
|
531
|
+
primaryName: primary,
|
|
532
|
+
...opt('program', asText(d.RegimeName)),
|
|
533
|
+
...opt('designationDate', asText(d.DateDesignated ?? d.LastUpdated)),
|
|
534
|
+
payload: {
|
|
535
|
+
aliases: allNames.slice(1).map((n) => ({
|
|
536
|
+
name: n.name,
|
|
537
|
+
nameType: 'aka',
|
|
538
|
+
})),
|
|
539
|
+
identifiers: [],
|
|
540
|
+
addresses: [],
|
|
541
|
+
datesOfBirth: [],
|
|
542
|
+
nationalities: asArray(d.Nationalities?.Nationality)
|
|
543
|
+
.map((x) => asText(x))
|
|
544
|
+
.filter((x) => Boolean(x)),
|
|
545
|
+
...opt('remarks', asText(d.OtherInformation)),
|
|
546
|
+
},
|
|
547
|
+
};
|
|
548
|
+
})
|
|
549
|
+
.filter(Boolean);
|
|
550
|
+
}
|
|
551
|
+
function mapUkType(t) {
|
|
552
|
+
const v = t?.toLowerCase();
|
|
553
|
+
if (v === 'individual' || v === 'person')
|
|
554
|
+
return 'person';
|
|
555
|
+
if (v === 'entity' || v === 'organisation' || v === 'organization')
|
|
556
|
+
return 'organization';
|
|
557
|
+
if (v === 'ship' || v === 'vessel')
|
|
558
|
+
return 'vessel';
|
|
559
|
+
return 'unknown';
|
|
560
|
+
}
|
|
561
|
+
// ─── UN Security Council Consolidated List ───────────────────────────────────────
|
|
562
|
+
function buildUnIngester() {
|
|
563
|
+
return {
|
|
564
|
+
source: 'un',
|
|
565
|
+
url: () => getServerConfig().unScUrl,
|
|
566
|
+
async harvest(signal) {
|
|
567
|
+
const xml = await fetchXml(getServerConfig().unScUrl, signal, 'un');
|
|
568
|
+
const doc = parseXml(xml);
|
|
569
|
+
return parseUn(doc);
|
|
570
|
+
},
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
export function parseUn(doc) {
|
|
574
|
+
const root = (doc.CONSOLIDATED_LIST ?? doc);
|
|
575
|
+
const individuals = asArray(root.INDIVIDUALS?.INDIVIDUAL).map((i) => parseUnEntry(i, 'person'));
|
|
576
|
+
const entities = asArray(root.ENTITIES?.ENTITY).map((e) => parseUnEntry(e, 'organization'));
|
|
577
|
+
return [...individuals, ...entities].filter(Boolean);
|
|
578
|
+
}
|
|
579
|
+
function parseUnEntry(e, entityType) {
|
|
580
|
+
const id = asText(e.DATAID) ?? asText(e.REFERENCE_NUMBER) ?? crypto.randomUUID();
|
|
581
|
+
const nameParts = [
|
|
582
|
+
asText(e.FIRST_NAME),
|
|
583
|
+
asText(e.SECOND_NAME),
|
|
584
|
+
asText(e.THIRD_NAME),
|
|
585
|
+
asText(e.FOURTH_NAME),
|
|
586
|
+
].filter(Boolean);
|
|
587
|
+
const primary = entityType === 'organization'
|
|
588
|
+
? (asText(e.FIRST_NAME) ?? nameParts.join(' '))
|
|
589
|
+
: nameParts.join(' ');
|
|
590
|
+
if (!primary)
|
|
591
|
+
return null;
|
|
592
|
+
const aliases = asArray(e.INDIVIDUAL_ALIAS ?? e.ENTITY_ALIAS)
|
|
593
|
+
.map((a) => {
|
|
594
|
+
const al = a;
|
|
595
|
+
const quality = asText(al.QUALITY)?.toLowerCase();
|
|
596
|
+
return {
|
|
597
|
+
name: asText(al.ALIAS_NAME) ?? '',
|
|
598
|
+
nameType: (quality === 'low' ? 'low-quality-aka' : 'aka'),
|
|
599
|
+
};
|
|
600
|
+
})
|
|
601
|
+
.filter((a) => a.name);
|
|
602
|
+
const dobs = asArray(e.INDIVIDUAL_DATE_OF_BIRTH)
|
|
603
|
+
.map((d) => {
|
|
604
|
+
const dd = d;
|
|
605
|
+
return opt('date', asText(dd.DATE) ?? asText(dd.YEAR));
|
|
606
|
+
})
|
|
607
|
+
.filter((d) => d.date);
|
|
608
|
+
const nationalities = asArray(e.NATIONALITY?.VALUE)
|
|
609
|
+
.map((v) => asText(v))
|
|
610
|
+
.filter((x) => Boolean(x));
|
|
611
|
+
return {
|
|
612
|
+
id: `un:${id}`,
|
|
613
|
+
source: 'un',
|
|
614
|
+
sourceEntryId: id,
|
|
615
|
+
entityType,
|
|
616
|
+
primaryName: primary,
|
|
617
|
+
...opt('program', asText(e.UN_LIST_TYPE)),
|
|
618
|
+
...opt('designationDate', asText(e.LISTED_ON)),
|
|
619
|
+
payload: {
|
|
620
|
+
aliases,
|
|
621
|
+
identifiers: asArray(e.INDIVIDUAL_DOCUMENT)
|
|
622
|
+
.map((d) => {
|
|
623
|
+
const dd = d;
|
|
624
|
+
return {
|
|
625
|
+
type: asText(dd.TYPE_OF_DOCUMENT) ?? 'Document',
|
|
626
|
+
value: asText(dd.NUMBER) ?? '',
|
|
627
|
+
...opt('country', asText(dd.ISSUING_COUNTRY)),
|
|
628
|
+
};
|
|
629
|
+
})
|
|
630
|
+
.filter((x) => x.value),
|
|
631
|
+
addresses: [],
|
|
632
|
+
datesOfBirth: dobs,
|
|
633
|
+
nationalities,
|
|
634
|
+
...opt('remarks', asText(e.COMMENTS1)),
|
|
635
|
+
},
|
|
636
|
+
};
|
|
637
|
+
}
|
|
638
|
+
// ─── Registry + sync factory ─────────────────────────────────────────────────
|
|
639
|
+
/** All five sanctions ingesters, configured from the current server config. */
|
|
640
|
+
export function buildSanctionsIngesters() {
|
|
641
|
+
const cfg = getServerConfig();
|
|
642
|
+
return [
|
|
643
|
+
buildOfacIngester('ofac_sdn', () => cfg.ofacSdnUrl),
|
|
644
|
+
buildOfacIngester('ofac_consolidated', () => cfg.ofacConsolidatedUrl),
|
|
645
|
+
buildEuIngester(),
|
|
646
|
+
buildUkIngester(),
|
|
647
|
+
buildUnIngester(),
|
|
648
|
+
];
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* The MirrorService `sync` generator for the sanctions designation mirror. Each
|
|
652
|
+
* run harvests every source in full and yields one page per source. The mirror
|
|
653
|
+
* upserts the `designation` rows; the per-alias `name` index is rebuilt from
|
|
654
|
+
* `designation.payload` after the sync by the service's `rebuildNameIndex()`
|
|
655
|
+
* (the lifecycle scripts and the refresh cron call it). `init` and `refresh`
|
|
656
|
+
* behave identically — these are small, fully re-harvested corpora.
|
|
657
|
+
*/
|
|
658
|
+
export function createSanctionsSync() {
|
|
659
|
+
return async function* sync(ctx) {
|
|
660
|
+
const ingesters = buildSanctionsIngesters();
|
|
661
|
+
const stamp = new Date().toISOString();
|
|
662
|
+
for (const ingester of ingesters) {
|
|
663
|
+
if (ctx.signal.aborted)
|
|
664
|
+
return;
|
|
665
|
+
const designations = await ingester.harvest(ctx.signal);
|
|
666
|
+
yield {
|
|
667
|
+
records: designations.map(toDesignationRow),
|
|
668
|
+
checkpoint: stamp,
|
|
669
|
+
};
|
|
670
|
+
}
|
|
671
|
+
};
|
|
672
|
+
}
|
|
673
|
+
/** Map a normalized designation to its primary-table row (no aux fields). */
|
|
674
|
+
export function toDesignationRow(d) {
|
|
675
|
+
return {
|
|
676
|
+
id: d.id,
|
|
677
|
+
source: d.source,
|
|
678
|
+
source_entry_id: d.sourceEntryId,
|
|
679
|
+
entity_type: d.entityType,
|
|
680
|
+
primary_name: d.primaryName,
|
|
681
|
+
normalized_name: fold(d.primaryName),
|
|
682
|
+
program: d.program ?? null,
|
|
683
|
+
legal_basis: d.legalBasis ?? null,
|
|
684
|
+
designation_date: d.designationDate ?? null,
|
|
685
|
+
payload: JSON.stringify(d.payload),
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
//# sourceMappingURL=sanctions-ingest.js.map
|