glossarist 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/concept-collection.js +36 -28
- package/src/concept-parser.js +6 -11
- package/src/gcr-reader.js +32 -2
- package/src/index.d.ts +10 -0
- package/src/index.js +4 -2
- package/src/models/concept-ref.js +9 -2
- package/src/models/concept-source.js +2 -0
- package/src/models/concept.js +44 -0
- package/src/models/designation-relationship.js +27 -0
- package/src/models/designation.js +6 -1
- package/src/models/index.d.ts +11 -1
- package/src/models/index.js +1 -0
- package/src/models/localized-concept.js +38 -82
- package/src/models/related-concept.js +0 -2
- package/src/reference-mention.js +88 -0
- package/src/reference-resolver.js +285 -26
- package/src/render-classification.js +106 -0
- package/src/validators/concept-validator.js +51 -44
- package/src/validators/gcr-validator.js +6 -6
- package/src/validators/index.js +7 -1
- package/src/validators/register-validator.js +7 -11
- package/src/validators/relationship-type-rule.js +39 -0
- package/src/validators/v3-rules.js +171 -107
- package/src/validators/validation-error.js +4 -0
- package/src/validators/validation-result.js +29 -11
- package/src/validators/validation-rule.js +7 -5
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mention parser for {{...}} inline references in concept text.
|
|
3
|
+
*
|
|
4
|
+
* Pure function: takes a raw mention body (the text inside
|
|
5
|
+
* {{...}}) and returns a structured MentionParseResult. The
|
|
6
|
+
* extractor (in src/reference-resolver.js) consumes the
|
|
7
|
+
* structured form to emit Reference objects.
|
|
8
|
+
*
|
|
9
|
+
* Two outcomes in v8:
|
|
10
|
+
* - 'cite-ref': the mention is {{cite:<key>}} or
|
|
11
|
+
* {{cite:<key>,<label>}}. The extractor looks the key up in
|
|
12
|
+
* the current concept's sources list.
|
|
13
|
+
* - 'numeric': the mention is a bare dotted or dashed id
|
|
14
|
+
* like {{3.1.1.1}} or {{103-01-02}}. Resolves to a
|
|
15
|
+
* same-dataset concept.
|
|
16
|
+
* - 'unresolved': the mention did not match a recognized
|
|
17
|
+
* form. The extractor silently drops it.
|
|
18
|
+
*
|
|
19
|
+
* The full v6 form-aware parser (URI schemes, short-ids,
|
|
20
|
+
* quoting) is aspirational; v8 only supports the two forms
|
|
21
|
+
* above plus a catch-all unresolved case.
|
|
22
|
+
*
|
|
23
|
+
* @typedef {Object} MentionParseResult
|
|
24
|
+
* @property {'cite-ref' | 'numeric' | 'unresolved'} kind
|
|
25
|
+
* @property {string} [key] — for 'cite-ref': the local key
|
|
26
|
+
* @property {string} [label] — for 'cite-ref': the inline label
|
|
27
|
+
* @property {string} [id] — for 'numeric': the bare id
|
|
28
|
+
* @property {string} raw — the original mention body
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
const NUMERIC_RE = /^\d+(?:[.-]\d+)+$/;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Parse the body of a {{...}} mention (without the braces).
|
|
35
|
+
*
|
|
36
|
+
* The function is pure: no I/O, no model lookups, no state.
|
|
37
|
+
* Resolution of the parsed result is the extractor's job.
|
|
38
|
+
*
|
|
39
|
+
* @param {string} raw — the trimmed text inside {{...}}
|
|
40
|
+
* @returns {MentionParseResult}
|
|
41
|
+
*/
|
|
42
|
+
export function parseMention(raw) {
|
|
43
|
+
const body = raw.trim();
|
|
44
|
+
|
|
45
|
+
// 1. cite:<key> form, with optional ,<label> after the key.
|
|
46
|
+
// The key must not contain a comma (the comma is the
|
|
47
|
+
// label separator). Labels can be quoted (CSV-style) to
|
|
48
|
+
// contain commas; if not quoted, the label is the text
|
|
49
|
+
// up to the next comma or the end of the mention. The
|
|
50
|
+
// label may be empty.
|
|
51
|
+
const citeMatch = body.match(/^cite:([^,}]+)(?:,(.*))?$/);
|
|
52
|
+
if (citeMatch) {
|
|
53
|
+
const label = citeMatch[2] !== undefined ? unquoteLabel(citeMatch[2].trim()) : null;
|
|
54
|
+
return {
|
|
55
|
+
kind: 'cite-ref',
|
|
56
|
+
key: citeMatch[1].trim(),
|
|
57
|
+
label,
|
|
58
|
+
raw: body,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// 2. Bare numeric id: same-dataset concept id.
|
|
63
|
+
if (NUMERIC_RE.test(body)) {
|
|
64
|
+
return {
|
|
65
|
+
kind: 'numeric',
|
|
66
|
+
id: body,
|
|
67
|
+
raw: body,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// 3. Anything else is unresolved at the parse layer.
|
|
72
|
+
return {
|
|
73
|
+
kind: 'unresolved',
|
|
74
|
+
raw: body,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Strip surrounding double quotes from a label, unescaping
|
|
80
|
+
* CSV-style "" to a single ". If the input is not quoted,
|
|
81
|
+
* return it unchanged.
|
|
82
|
+
*/
|
|
83
|
+
function unquoteLabel(label) {
|
|
84
|
+
if (label.length >= 2 && label.startsWith('"') && label.endsWith('"')) {
|
|
85
|
+
return label.slice(1, -1).replace(/""/g, '"');
|
|
86
|
+
}
|
|
87
|
+
return label;
|
|
88
|
+
}
|
|
@@ -1,11 +1,35 @@
|
|
|
1
1
|
import { ConceptRef } from './models/concept-ref.js';
|
|
2
|
+
import { parseMention } from './reference-mention.js';
|
|
2
3
|
|
|
3
4
|
export class Reference {
|
|
4
|
-
|
|
5
|
+
/**
|
|
6
|
+
* @param {string} type — the structural kind of the reference
|
|
7
|
+
* ('concept', 'dataset', 'bibliography', 'typed-ref',
|
|
8
|
+
* 'standard').
|
|
9
|
+
* @param {string | null} target — the legacy flat display
|
|
10
|
+
* string. Kept for backward compat with callers that only
|
|
11
|
+
* read `r.target`.
|
|
12
|
+
* @param {string | null} [relationship] — the type of the
|
|
13
|
+
* relationship that produced this reference (e.g. 'see',
|
|
14
|
+
* 'supersedes', 'source').
|
|
15
|
+
* @param {string | null} [source] — a JSON-pointer-ish path
|
|
16
|
+
* indicating where in the concept the reference was
|
|
17
|
+
* extracted from.
|
|
18
|
+
* @param {object} [extras] — additional fields (v8+):
|
|
19
|
+
* `citation`, `sourceId`, `resolution`, `lookupKey`,
|
|
20
|
+
* `label`, `quoted`, `uri`. All optional.
|
|
21
|
+
*/
|
|
22
|
+
constructor(type, target, relationship, source, extras = {}) {
|
|
5
23
|
this.type = type;
|
|
6
24
|
this.target = target;
|
|
7
25
|
this.relationship = relationship ?? null;
|
|
8
26
|
this.source = source ?? null;
|
|
27
|
+
|
|
28
|
+
this.uri = extras.uri ?? null;
|
|
29
|
+
this.citation = extras.citation ?? null;
|
|
30
|
+
this.sourceId = extras.sourceId ?? null;
|
|
31
|
+
this.resolution = extras.resolution ?? null;
|
|
32
|
+
this.lookupKey = extras.lookupKey ?? null;
|
|
9
33
|
}
|
|
10
34
|
}
|
|
11
35
|
|
|
@@ -18,6 +42,21 @@ function refTarget(rc) {
|
|
|
18
42
|
}
|
|
19
43
|
|
|
20
44
|
export class ReferenceResolver {
|
|
45
|
+
/**
|
|
46
|
+
* Extract all embedded references from a concept's localizations.
|
|
47
|
+
*
|
|
48
|
+
* Walks definitions, notes, examples, and annotations text.
|
|
49
|
+
* For each `{{...}}` mention, runs `parseMention` to
|
|
50
|
+
* classify the form, then dispatches:
|
|
51
|
+
* - 'cite-ref' → look up the key in concept.sources; emit
|
|
52
|
+
* Bibliography Reference with the Citation.
|
|
53
|
+
* - 'numeric' → emit Concept Reference with the bare id
|
|
54
|
+
* (existing behavior).
|
|
55
|
+
* - 'unresolved' → do not emit a Reference.
|
|
56
|
+
*
|
|
57
|
+
* @param {Concept} concept
|
|
58
|
+
* @returns {Reference[]}
|
|
59
|
+
*/
|
|
21
60
|
extractReferences(concept) {
|
|
22
61
|
const refs = [];
|
|
23
62
|
|
|
@@ -42,47 +81,267 @@ export class ReferenceResolver {
|
|
|
42
81
|
}
|
|
43
82
|
}
|
|
44
83
|
|
|
45
|
-
const texts =
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
refs.push(..._extractEmbedded(text));
|
|
84
|
+
const texts = this._collectTexts(lc, lang);
|
|
85
|
+
for (const { text, source } of texts) {
|
|
86
|
+
for (const ref of this._extractFromText(text, source, concept)) {
|
|
87
|
+
refs.push(ref);
|
|
88
|
+
}
|
|
51
89
|
}
|
|
52
90
|
}
|
|
53
91
|
|
|
54
92
|
return refs;
|
|
55
93
|
}
|
|
56
94
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
95
|
+
/**
|
|
96
|
+
* Collect all text fields from a localized concept, paired
|
|
97
|
+
* with diagnostic source paths.
|
|
98
|
+
*
|
|
99
|
+
* @param {LocalizedConcept} lc
|
|
100
|
+
* @param {string} lang
|
|
101
|
+
* @returns {{text: string, source: string}[]}
|
|
102
|
+
*/
|
|
103
|
+
_collectTexts(lc, lang) {
|
|
104
|
+
const out = [];
|
|
105
|
+
for (let i = 0; (lc.definitions ?? [])[i]; i++) {
|
|
106
|
+
const content = lc.definitions[i]?.content;
|
|
107
|
+
if (typeof content === 'string') {
|
|
108
|
+
out.push({ text: content, source: `localizations.${lang}.definitions[${i}].content` });
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
for (let i = 0; (lc.notes ?? [])[i]; i++) {
|
|
112
|
+
const content = typeof lc.notes[i] === 'object'
|
|
113
|
+
? (lc.notes[i]?.content ?? '')
|
|
114
|
+
: String(lc.notes[i] ?? '');
|
|
115
|
+
if (content) {
|
|
116
|
+
out.push({ text: content, source: `localizations.${lang}.notes[${i}].content` });
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
for (let i = 0; (lc.examples ?? [])[i]; i++) {
|
|
120
|
+
const content = lc.examples[i]?.content;
|
|
121
|
+
if (typeof content === 'string') {
|
|
122
|
+
out.push({ text: content, source: `localizations.${lang}.examples[${i}].content` });
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
for (let i = 0; (lc.annotations ?? [])[i]; i++) {
|
|
126
|
+
const content = lc.annotations[i]?.content;
|
|
127
|
+
if (typeof content === 'string') {
|
|
128
|
+
out.push({ text: content, source: `localizations.${lang}.annotations[${i}].content` });
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return out;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Walk a single text string and emit References for each
|
|
136
|
+
* `{{...}}` mention.
|
|
137
|
+
*
|
|
138
|
+
* @param {string} text
|
|
139
|
+
* @param {string} source — diagnostic path
|
|
140
|
+
* @param {Concept} concept — the owning concept (for cite-ref lookup)
|
|
141
|
+
* @returns {Reference[]}
|
|
142
|
+
*/
|
|
143
|
+
_extractFromText(text, source, concept) {
|
|
144
|
+
const refs = [];
|
|
145
|
+
const re = /\{\{([^{}]*?)\}\}/g;
|
|
146
|
+
let m;
|
|
147
|
+
while ((m = re.exec(text)) !== null) {
|
|
148
|
+
const parsed = parseMention(m[1]);
|
|
149
|
+
switch (parsed.kind) {
|
|
150
|
+
case 'cite-ref':
|
|
151
|
+
refs.push(this._resolveCiteRef(parsed, source, concept));
|
|
152
|
+
break;
|
|
153
|
+
case 'numeric':
|
|
154
|
+
refs.push(new Reference('concept', parsed.id, 'embedded', source));
|
|
155
|
+
break;
|
|
156
|
+
case 'unresolved':
|
|
157
|
+
// Silently dropped. The mention is either non-reference
|
|
158
|
+
// text (e.g. math, code) or a form we don't support.
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return refs;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Resolve a `cite-ref` parser result against the concept's
|
|
167
|
+
* sources list. Emits a Bibliography Reference with the
|
|
168
|
+
* resolved Citation (if found) or an unresolved Reference
|
|
169
|
+
* (if not).
|
|
170
|
+
*
|
|
171
|
+
* @param {MentionParseResult} parsed
|
|
172
|
+
* @param {string} source — diagnostic path
|
|
173
|
+
* @param {Concept} concept — the owning concept
|
|
174
|
+
* @returns {Reference}
|
|
175
|
+
*/
|
|
176
|
+
_resolveCiteRef(parsed, source, concept) {
|
|
177
|
+
const sourceEntry = concept?.findSourceById(parsed.key) ?? null;
|
|
178
|
+
if (!sourceEntry) {
|
|
179
|
+
return new Reference(
|
|
180
|
+
'bibliography',
|
|
181
|
+
parsed.label ?? parsed.key,
|
|
182
|
+
null,
|
|
183
|
+
source,
|
|
184
|
+
{
|
|
185
|
+
sourceId: parsed.key,
|
|
186
|
+
citation: null,
|
|
187
|
+
resolution: { kind: 'unresolved', reason: 'no-source' },
|
|
188
|
+
},
|
|
189
|
+
);
|
|
190
|
+
}
|
|
191
|
+
const displayTarget = parsed.label
|
|
192
|
+
?? sourceEntry.origin?.toString()
|
|
193
|
+
?? sourceEntry.id;
|
|
194
|
+
return new Reference(
|
|
195
|
+
'bibliography',
|
|
196
|
+
displayTarget,
|
|
197
|
+
null,
|
|
198
|
+
source,
|
|
199
|
+
{
|
|
200
|
+
sourceId: sourceEntry.id,
|
|
201
|
+
citation: sourceEntry.origin,
|
|
202
|
+
resolution: { kind: 'resolved', sourceId: sourceEntry.id },
|
|
203
|
+
},
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Resolve a single reference against a registry (a map of
|
|
209
|
+
* datasetId → { concepts, register? }). The registry may also
|
|
210
|
+
* include 'bibliography:<source>' keys for bibliographic
|
|
211
|
+
* datasets.
|
|
212
|
+
*
|
|
213
|
+
* For a `type: 'bibliography'` Reference with an inline
|
|
214
|
+
* `citation`, the resolver first tries the bibliography
|
|
215
|
+
* registry (matching `citation.ref` by source/id/version);
|
|
216
|
+
* if not found, returns the inline Citation as a
|
|
217
|
+
* self-contained fallback.
|
|
218
|
+
*
|
|
219
|
+
* For a `type: 'bibliography'` Reference with a `uri` and
|
|
220
|
+
* `resolution.kind === 'bibliography-namespace'`, the
|
|
221
|
+
* resolver tries the bibliography registry by
|
|
222
|
+
* `resolution.source/id/version`.
|
|
223
|
+
*
|
|
224
|
+
* For `type: 'concept'` References with a `lookupKey.id`
|
|
225
|
+
* (id-match, short-id, or numeric), the resolver looks up
|
|
226
|
+
* the id in `lookupKey.dataset`'s ConceptCollection.
|
|
227
|
+
*
|
|
228
|
+
* Backward compat: when the second argument is a
|
|
229
|
+
* ConceptCollection (has `byId` but no `concepts` field), it
|
|
230
|
+
* is treated as a one-key registry of one default dataset.
|
|
231
|
+
*/
|
|
232
|
+
resolveReference(ref, registry) {
|
|
233
|
+
if (ref == null) return null;
|
|
234
|
+
|
|
235
|
+
// Backward-compat: single ConceptCollection becomes a
|
|
236
|
+
// one-key registry.
|
|
237
|
+
if (isConceptCollection(registry)) {
|
|
238
|
+
registry = { _default: { concepts: registry } };
|
|
239
|
+
}
|
|
240
|
+
if (registry == null) return null;
|
|
241
|
+
|
|
242
|
+
// 1. cite:key form (Bibliography with inline Citation).
|
|
243
|
+
if (ref.type === 'bibliography' && ref.citation) {
|
|
244
|
+
const bioRecord = this._resolveBibliographyRecord(
|
|
245
|
+
ref.citation.ref,
|
|
246
|
+
registry,
|
|
247
|
+
);
|
|
248
|
+
if (bioRecord) return bioRecord;
|
|
249
|
+
return ref.citation;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// 2. URI form (urn:... or https:...) with
|
|
253
|
+
// bibliography-namespace resolution.
|
|
254
|
+
if (ref.uri) {
|
|
255
|
+
if (ref.resolution?.kind === 'bibliography-namespace'
|
|
256
|
+
|| (ref.resolution?.source && !ref.resolution?.datasetId)) {
|
|
257
|
+
const bioRecord = this._resolveBibliographyRecord(
|
|
258
|
+
ref.resolution,
|
|
259
|
+
registry,
|
|
260
|
+
);
|
|
261
|
+
if (bioRecord) return bioRecord;
|
|
262
|
+
}
|
|
263
|
+
// Concept URI lookup (for non-bibliography URIs).
|
|
264
|
+
if (ref.resolution?.datasetId) {
|
|
265
|
+
const coll = registry[ref.resolution.datasetId]?.concepts;
|
|
266
|
+
if (coll) {
|
|
267
|
+
return coll.byId(ref.resolution.conceptId);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
return null;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// 3. Same-dataset concept id (numeric mention, id-match, etc.).
|
|
274
|
+
if (ref.lookupKey?.id) {
|
|
275
|
+
const coll = registry[ref.lookupKey.dataset]?.concepts;
|
|
276
|
+
if (coll) return coll.byId(ref.lookupKey.id);
|
|
277
|
+
return null;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// 3b. Backward-compat: a concept ref with a `target` (id)
|
|
281
|
+
// but no `lookupKey` is looked up in the single
|
|
282
|
+
// collection (backward-compat one-key registry).
|
|
283
|
+
if (ref.type === 'concept' && ref.target) {
|
|
284
|
+
const defaultColl = registry._default?.concepts;
|
|
285
|
+
if (defaultColl) return defaultColl.byId(ref.target);
|
|
286
|
+
// Try every dataset in the registry as a fallback.
|
|
287
|
+
for (const entry of Object.values(registry)) {
|
|
288
|
+
if (entry?.concepts?.byId(ref.target)) {
|
|
289
|
+
return entry.concepts.byId(ref.target);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// 4. Unanchored designation — search is a separate concern
|
|
296
|
+
// (plan 06). For v8, return null.
|
|
297
|
+
if (ref.lookupKey?.designation) {
|
|
298
|
+
return null;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return null;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Try to resolve a Citation::Ref against the bibliography
|
|
306
|
+
* registry. The ref has shape { source, id, version? }.
|
|
307
|
+
*
|
|
308
|
+
* Returns the matching bibliographic record (a Concept), or
|
|
309
|
+
* null if no match.
|
|
310
|
+
*/
|
|
311
|
+
_resolveBibliographyRecord(citationRef, registry) {
|
|
312
|
+
if (!citationRef?.source || !citationRef?.id) return null;
|
|
313
|
+
const bioColl = registry[`bibliography:${citationRef.source}`]?.concepts;
|
|
314
|
+
if (!bioColl) return null;
|
|
315
|
+
if (citationRef.version) {
|
|
316
|
+
return bioColl.byIdAnd(citationRef.id, citationRef.version);
|
|
317
|
+
}
|
|
318
|
+
return bioColl.byId(citationRef.id);
|
|
60
319
|
}
|
|
61
320
|
|
|
62
|
-
resolveAll(concept,
|
|
321
|
+
resolveAll(concept, registry) {
|
|
63
322
|
const resolved = new Map();
|
|
64
323
|
for (const ref of this.extractReferences(concept)) {
|
|
65
|
-
if (ref.type === 'concept') {
|
|
66
|
-
|
|
324
|
+
if (ref.type === 'concept' || ref.type === 'bibliography') {
|
|
325
|
+
const target = this.resolveReference(ref, registry);
|
|
326
|
+
if (target != null) {
|
|
327
|
+
const key = ref.target ?? ref.uri ?? ref.sourceId;
|
|
328
|
+
if (key != null) resolved.set(key, target);
|
|
329
|
+
}
|
|
67
330
|
}
|
|
68
331
|
}
|
|
69
332
|
return resolved;
|
|
70
333
|
}
|
|
71
334
|
}
|
|
72
335
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
refs.push(new Reference('concept', target, 'embedded', null));
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
return refs;
|
|
336
|
+
/**
|
|
337
|
+
* Type-guard for the single-collection case (backward compat).
|
|
338
|
+
* A ConceptCollection has `byId` but no `concepts` field.
|
|
339
|
+
*/
|
|
340
|
+
function isConceptCollection(x) {
|
|
341
|
+
return x != null
|
|
342
|
+
&& typeof x === 'object'
|
|
343
|
+
&& typeof x.byId === 'function'
|
|
344
|
+
&& !('concepts' in x);
|
|
86
345
|
}
|
|
87
346
|
|
|
88
347
|
export const referenceResolver = new ReferenceResolver();
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Classify a Reference for rendering.
|
|
3
|
+
*
|
|
4
|
+
* The classifier is constructed once per render with a registry
|
|
5
|
+
* (and optional source dataset id). The classify() method is
|
|
6
|
+
* pure and side-effect-free.
|
|
7
|
+
*
|
|
8
|
+
* Each `Reference.type` is its own `_classifyXxx` method. The
|
|
9
|
+
* dispatch in classify() is closed for modification.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
export class ReferenceClassifier {
|
|
13
|
+
/**
|
|
14
|
+
* @param {object} registry — the deployment's dataset registry.
|
|
15
|
+
* @param {string} [sourceDatasetId] — the dataset the source
|
|
16
|
+
* concept belongs to; used to determine "same-dataset".
|
|
17
|
+
* @param {object} [options] — additional options (e.g. scope).
|
|
18
|
+
*/
|
|
19
|
+
constructor(registry = {}, sourceDatasetId = null, options = {}) {
|
|
20
|
+
this.registry = registry;
|
|
21
|
+
this.sourceDatasetId = sourceDatasetId;
|
|
22
|
+
this.options = options;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* @param {Reference} ref
|
|
27
|
+
* @returns {string} — the classification (e.g. 'same-dataset',
|
|
28
|
+
* 'internal-citation', 'unresolved', etc.)
|
|
29
|
+
*/
|
|
30
|
+
classify(ref) {
|
|
31
|
+
if (ref == null) return 'unknown';
|
|
32
|
+
|
|
33
|
+
switch (ref.type) {
|
|
34
|
+
case 'concept': return this._classifyConcept(ref);
|
|
35
|
+
case 'dataset': return this._classifyDataset(ref);
|
|
36
|
+
case 'bibliography': return this._classifyBibliography(ref);
|
|
37
|
+
case 'typed-ref': return this._classifyTypedRef(ref);
|
|
38
|
+
case 'standard': return 'legacy-standard';
|
|
39
|
+
default: return 'unknown';
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_classifyConcept(ref) {
|
|
44
|
+
// 1. URI form, resolved to a dataset.
|
|
45
|
+
if (ref.uri) {
|
|
46
|
+
const dsId = ref.resolution?.datasetId;
|
|
47
|
+
if (!dsId) return 'unresolved';
|
|
48
|
+
if (!this.registry[dsId]) return 'external-citation';
|
|
49
|
+
if (dsId === this.sourceDatasetId) return 'same-dataset';
|
|
50
|
+
return 'cross-dataset';
|
|
51
|
+
}
|
|
52
|
+
// 2. Unanchored designation.
|
|
53
|
+
if (ref.lookupKey?.designation) {
|
|
54
|
+
return 'unresolved-designation';
|
|
55
|
+
}
|
|
56
|
+
// 3. Id-style (id-match, short-id, numeric).
|
|
57
|
+
if (ref.lookupKey?.id) {
|
|
58
|
+
const dsId = ref.lookupKey.dataset;
|
|
59
|
+
if (!this.registry[dsId]) return 'unresolved';
|
|
60
|
+
if (dsId === this.sourceDatasetId) return 'same-dataset';
|
|
61
|
+
return 'cross-dataset';
|
|
62
|
+
}
|
|
63
|
+
// 4. Concept ref with target (legacy).
|
|
64
|
+
if (ref.target) {
|
|
65
|
+
return 'unresolved';
|
|
66
|
+
}
|
|
67
|
+
return 'unresolved';
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
_classifyDataset(ref) {
|
|
71
|
+
if (ref.resolution?.kind === 'dataset-self') return 'dataset-self';
|
|
72
|
+
if (ref.resolution?.kind === 'dataset-namespace') return 'dataset-self';
|
|
73
|
+
return 'unknown';
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
_classifyBibliography(ref) {
|
|
77
|
+
// 1. cite:key form: try the bibliography registry.
|
|
78
|
+
if (ref.citation) {
|
|
79
|
+
const bioRecord = this._tryBibliography(ref.citation.ref);
|
|
80
|
+
if (bioRecord) return 'internal-citation';
|
|
81
|
+
return 'self-contained-citation';
|
|
82
|
+
}
|
|
83
|
+
// 2. URI form: try the bibliography registry, then the
|
|
84
|
+
// resolution's datasetId (if it's a concept URI), else null.
|
|
85
|
+
if (ref.uri) {
|
|
86
|
+
const bioRecord = this._tryBibliography(ref.resolution);
|
|
87
|
+
if (bioRecord) return 'internal-citation';
|
|
88
|
+
return 'external-citation';
|
|
89
|
+
}
|
|
90
|
+
return 'unresolved-citation';
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
_classifyTypedRef(_ref) {
|
|
94
|
+
return 'typed-ref';
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
_tryBibliography(citationRef) {
|
|
98
|
+
if (!citationRef?.source || !citationRef?.id) return null;
|
|
99
|
+
const bioColl = this.registry[`bibliography:${citationRef.source}`]?.concepts;
|
|
100
|
+
if (!bioColl) return null;
|
|
101
|
+
if (citationRef.version) {
|
|
102
|
+
return bioColl.byIdAnd(citationRef.id, citationRef.version) ?? null;
|
|
103
|
+
}
|
|
104
|
+
return bioColl.byId(citationRef.id) ?? null;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { ValidationRule } from './validation-rule.js';
|
|
2
|
-
import {
|
|
2
|
+
import { ValidationResult } from './validation-result.js';
|
|
3
3
|
|
|
4
4
|
const VALID_DESIGNATION_TYPES = new Set([
|
|
5
5
|
'expression', 'abbreviation', 'symbol', 'graphical symbol', 'graphical_symbol',
|
|
@@ -9,51 +9,55 @@ const VALID_ENTRY_STATUSES = new Set([
|
|
|
9
9
|
'valid', 'draft', 'retired', 'notValid', 'superseded', 'withdrawn',
|
|
10
10
|
]);
|
|
11
11
|
|
|
12
|
+
const _langs = (c) =>
|
|
13
|
+
c.languages ?? (c.localizations ? Object.keys(c.localizations) : []);
|
|
14
|
+
|
|
15
|
+
const _loc = (c, lang) =>
|
|
16
|
+
typeof c.localization === 'function' ? c.localization(lang) : c.localizations?.[lang];
|
|
17
|
+
|
|
12
18
|
export class LanguageCodeRule extends ValidationRule {
|
|
13
19
|
constructor() { super('language-code'); }
|
|
14
|
-
validate(
|
|
15
|
-
|
|
16
|
-
const errors = [];
|
|
17
|
-
for (const lang of Object.keys(value.localizations)) {
|
|
20
|
+
validate(concept, path, result) {
|
|
21
|
+
for (const lang of _langs(concept)) {
|
|
18
22
|
if (!/^[a-z]{3}$/.test(lang)) {
|
|
19
|
-
|
|
20
|
-
`Invalid language code '${lang}': expected ISO 639-3 (3 lowercase letters)`)
|
|
23
|
+
result.addError(`${path}localizations.${lang}`,
|
|
24
|
+
`Invalid language code '${lang}': expected ISO 639-3 (3 lowercase letters)`);
|
|
21
25
|
}
|
|
22
26
|
}
|
|
23
|
-
return errors;
|
|
24
27
|
}
|
|
25
28
|
}
|
|
26
29
|
|
|
27
30
|
export class DesignationTypeRule extends ValidationRule {
|
|
28
31
|
constructor() { super('designation-type'); }
|
|
29
|
-
validate(
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
validate(concept, path, result) {
|
|
33
|
+
for (const lang of _langs(concept)) {
|
|
34
|
+
const lc = _loc(concept, lang);
|
|
35
|
+
if (!lc) continue;
|
|
36
|
+
const terms = lc.terms ?? [];
|
|
37
|
+
for (let i = 0; i < terms.length; i++) {
|
|
38
|
+
const t = terms[i];
|
|
39
|
+
const type = t.type ?? (typeof t.toJSON === 'function' ? t.toJSON().type : undefined);
|
|
40
|
+
if (type && !VALID_DESIGNATION_TYPES.has(type)) {
|
|
41
|
+
result.addError(`${path}localizations.${lang}.terms[${i}].type`,
|
|
42
|
+
`Unknown designation type '${type}'`);
|
|
38
43
|
}
|
|
39
44
|
}
|
|
40
45
|
}
|
|
41
|
-
return errors;
|
|
42
46
|
}
|
|
43
47
|
}
|
|
44
48
|
|
|
45
49
|
export class EntryStatusRule extends ValidationRule {
|
|
46
50
|
constructor() { super('entry-status'); }
|
|
47
|
-
validate(
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
validate(concept, path, result) {
|
|
52
|
+
for (const lang of _langs(concept)) {
|
|
53
|
+
const lc = _loc(concept, lang);
|
|
54
|
+
if (!lc) continue;
|
|
55
|
+
const status = lc.entryStatus ?? lc.entry_status;
|
|
56
|
+
if (status && !VALID_ENTRY_STATUSES.has(status)) {
|
|
57
|
+
result.addError(`${path}localizations.${lang}.entry_status`,
|
|
58
|
+
`Unknown entry status '${status}'`);
|
|
54
59
|
}
|
|
55
60
|
}
|
|
56
|
-
return errors;
|
|
57
61
|
}
|
|
58
62
|
}
|
|
59
63
|
|
|
@@ -66,34 +70,37 @@ export class ConceptValidator {
|
|
|
66
70
|
}
|
|
67
71
|
|
|
68
72
|
validate(concept) {
|
|
69
|
-
const
|
|
70
|
-
const
|
|
73
|
+
const result = new ValidationResult();
|
|
74
|
+
const hasModelApi = typeof concept.localization === 'function';
|
|
71
75
|
|
|
72
|
-
if (!
|
|
73
|
-
|
|
76
|
+
if (!concept.id) {
|
|
77
|
+
result.addError('id', 'Concept must have an id');
|
|
74
78
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
79
|
+
|
|
80
|
+
const langs = hasModelApi ? concept.languages : Object.keys(concept.localizations ?? {});
|
|
81
|
+
if (langs.length === 0) {
|
|
82
|
+
result.addWarning('localizations', 'Concept must have at least one localization');
|
|
83
|
+
} else if (hasModelApi) {
|
|
84
|
+
for (const lang of langs) {
|
|
85
|
+
const lc = concept.localization(lang);
|
|
86
|
+
if (!lc || lc.terms.length === 0) {
|
|
87
|
+
result.addWarning(`localizations.${lang}.terms`,
|
|
88
|
+
`Localization '${lang}' must have at least one term`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
78
91
|
} else {
|
|
79
|
-
for (const [lang, lc] of Object.entries(
|
|
92
|
+
for (const [lang, lc] of Object.entries(concept.localizations ?? {})) {
|
|
80
93
|
if (!lc.terms || lc.terms.length === 0) {
|
|
81
|
-
|
|
82
|
-
`
|
|
83
|
-
`Localization '${lang}' must have at least one term`, 'warning'));
|
|
94
|
+
result.addWarning(`localizations.${lang}.terms`,
|
|
95
|
+
`Localization '${lang}' must have at least one term`);
|
|
84
96
|
}
|
|
85
97
|
}
|
|
86
98
|
}
|
|
87
99
|
|
|
88
100
|
for (const rule of this._rules) {
|
|
89
|
-
|
|
101
|
+
rule.validate(concept, '', result);
|
|
90
102
|
}
|
|
91
103
|
|
|
92
|
-
return
|
|
93
|
-
valid: errors.filter(e => e.severity === 'error').length === 0,
|
|
94
|
-
errors: errors.filter(e => e.severity === 'error'),
|
|
95
|
-
warnings: errors.filter(e => e.severity === 'warning'),
|
|
96
|
-
};
|
|
104
|
+
return result;
|
|
97
105
|
}
|
|
98
106
|
}
|
|
99
|
-
|