glossarist 0.3.7 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/concept-reader.js +1 -1
- package/src/gcr-reader.js +1 -1
- package/src/index.js +1 -1
- package/src/models/base.js +14 -0
- package/src/models/concept.js +0 -5
- package/src/models/localized-concept.js +0 -14
- package/src/reference-resolver.js +58 -183
- package/src/render-classification.js +12 -52
- package/src/validators/concept-validator.js +18 -33
- package/src/validators/v3-rules.js +32 -43
package/package.json
CHANGED
package/src/concept-reader.js
CHANGED
|
@@ -20,7 +20,7 @@ function assertDir(dir, fnName) {
|
|
|
20
20
|
*
|
|
21
21
|
* @example
|
|
22
22
|
* const concepts = readConcepts('./geolexica-v2/');
|
|
23
|
-
* console.log(concepts[0].
|
|
23
|
+
* console.log(concepts[0].localization('eng').terms[0].designation);
|
|
24
24
|
*/
|
|
25
25
|
export function readConcepts(dir) {
|
|
26
26
|
assertDir(dir, 'readConcepts');
|
package/src/gcr-reader.js
CHANGED
|
@@ -418,7 +418,7 @@ export class GcrPackage {
|
|
|
418
418
|
*
|
|
419
419
|
* @example
|
|
420
420
|
* const concept = parseConceptYaml('termid: "001"\neng:\n terms:\n - designation: test', '001');
|
|
421
|
-
* console.log(concept.
|
|
421
|
+
* console.log(concept.localization('eng').terms[0].designation); // "test"
|
|
422
422
|
*/
|
|
423
423
|
export function parseConceptYaml(raw, context) {
|
|
424
424
|
return conceptParser.parse(raw, context);
|
package/src/index.js
CHANGED
|
@@ -7,7 +7,7 @@ export { ConceptCollection } from './concept-collection.js';
|
|
|
7
7
|
export { ManagedConceptCollection } from './managed-concept-collection.js';
|
|
8
8
|
export { validateConcept, validateRegister, validateGcrPackage, createConceptValidator, ValidationError, ValidationRule, ValidationResult, RegisterValidator, GcrValidator } from './validators/index.js';
|
|
9
9
|
export { conceptUuid, localizedConceptUuid, uuidV5 } from './uuid.js';
|
|
10
|
-
export { ReferenceResolver, Reference, referenceResolver } from './reference-resolver.js';
|
|
10
|
+
export { ReferenceResolver, Reference, referenceResolver, resolveBibliographyRecord } from './reference-resolver.js';
|
|
11
11
|
export { parseMention } from './reference-mention.js';
|
|
12
12
|
export { ReferenceClassifier } from './render-classification.js';
|
|
13
13
|
export { V1Reader, migrateV1ToV2 } from './v1-reader.js';
|
package/src/models/base.js
CHANGED
|
@@ -15,4 +15,18 @@ export class GlossaristModel {
|
|
|
15
15
|
clone() {
|
|
16
16
|
return this.constructor.fromJSON(JSON.parse(JSON.stringify(this.toJSON())));
|
|
17
17
|
}
|
|
18
|
+
|
|
19
|
+
_lazy(cacheKey, rawKey, wrapFn) {
|
|
20
|
+
if (this[cacheKey] === null) {
|
|
21
|
+
this[cacheKey] = this[rawKey].map(wrapFn);
|
|
22
|
+
}
|
|
23
|
+
return this[cacheKey];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
_serialize(obj, jsonKey, cacheKey, rawKey) {
|
|
27
|
+
const items = this[cacheKey] ?? (this[rawKey].length > 0 ? this[rawKey] : []);
|
|
28
|
+
if (items.length > 0) {
|
|
29
|
+
obj[jsonKey] = items.map(i => (i instanceof GlossaristModel) ? i.toJSON() : i);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
18
32
|
}
|
package/src/models/concept.js
CHANGED
|
@@ -34,11 +34,6 @@ export class Concept extends GlossaristModel {
|
|
|
34
34
|
return this.languages;
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
/** @deprecated Use localization(lang) for model access, or toJSON().localizations for raw data */
|
|
38
|
-
get localizations() {
|
|
39
|
-
return this._rawLocalizations;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
37
|
localization(lang) {
|
|
43
38
|
if (!(lang in this._rawLocalizations)) return undefined;
|
|
44
39
|
if (!this._cache[lang]) {
|
|
@@ -131,20 +131,6 @@ export class LocalizedConcept extends GlossaristModel {
|
|
|
131
131
|
return obj;
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
-
_lazy(cacheKey, rawKey, wrapFn) {
|
|
135
|
-
if (this[cacheKey] === null) {
|
|
136
|
-
this[cacheKey] = this[rawKey].map(wrapFn);
|
|
137
|
-
}
|
|
138
|
-
return this[cacheKey];
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
_serialize(obj, jsonKey, cacheKey, rawKey) {
|
|
142
|
-
const items = this[cacheKey] ?? (this[rawKey].length > 0 ? this[rawKey] : []);
|
|
143
|
-
if (items.length > 0) {
|
|
144
|
-
obj[jsonKey] = items.map(i => (typeof i.toJSON === 'function') ? i.toJSON() : i);
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
|
|
148
134
|
static fromJSON(data) {
|
|
149
135
|
return new LocalizedConcept(data);
|
|
150
136
|
}
|
|
@@ -2,29 +2,11 @@ import { ConceptRef } from './models/concept-ref.js';
|
|
|
2
2
|
import { parseMention } from './reference-mention.js';
|
|
3
3
|
|
|
4
4
|
export class Reference {
|
|
5
|
-
/**
|
|
6
|
-
* @param {string} type — the structural kind of the reference
|
|
7
|
-
* ('concept', 'dataset', 'bibliography', 'typed-ref',
|
|
8
|
-
* 'standard').
|
|
9
|
-
* @param {string | null} target — the legacy flat display
|
|
10
|
-
* string. Kept for backward compat with callers that only
|
|
11
|
-
* read `r.target`.
|
|
12
|
-
* @param {string | null} [relationship] — the type of the
|
|
13
|
-
* relationship that produced this reference (e.g. 'see',
|
|
14
|
-
* 'supersedes', 'source').
|
|
15
|
-
* @param {string | null} [source] — a JSON-pointer-ish path
|
|
16
|
-
* indicating where in the concept the reference was
|
|
17
|
-
* extracted from.
|
|
18
|
-
* @param {object} [extras] — additional fields (v8+):
|
|
19
|
-
* `citation`, `sourceId`, `resolution`, `lookupKey`,
|
|
20
|
-
* `label`, `quoted`, `uri`. All optional.
|
|
21
|
-
*/
|
|
22
5
|
constructor(type, target, relationship, source, extras = {}) {
|
|
23
6
|
this.type = type;
|
|
24
7
|
this.target = target;
|
|
25
8
|
this.relationship = relationship ?? null;
|
|
26
9
|
this.source = source ?? null;
|
|
27
|
-
|
|
28
10
|
this.uri = extras.uri ?? null;
|
|
29
11
|
this.citation = extras.citation ?? null;
|
|
30
12
|
this.sourceId = extras.sourceId ?? null;
|
|
@@ -41,29 +23,26 @@ function refTarget(rc) {
|
|
|
41
23
|
return '';
|
|
42
24
|
}
|
|
43
25
|
|
|
26
|
+
export function resolveBibliographyRecord(citationRef, registry) {
|
|
27
|
+
if (!citationRef?.source || !citationRef?.id) return null;
|
|
28
|
+
const bioColl = registry[`bibliography:${citationRef.source}`]?.concepts;
|
|
29
|
+
if (!bioColl) return null;
|
|
30
|
+
if (citationRef.version) {
|
|
31
|
+
return bioColl.byIdAnd(citationRef.id, citationRef.version);
|
|
32
|
+
}
|
|
33
|
+
return bioColl.byId(citationRef.id);
|
|
34
|
+
}
|
|
35
|
+
|
|
44
36
|
export class ReferenceResolver {
|
|
45
|
-
/**
|
|
46
|
-
* Extract all embedded references from a concept's localizations.
|
|
47
|
-
*
|
|
48
|
-
* Walks definitions, notes, examples, and annotations text.
|
|
49
|
-
* For each `{{...}}` mention, runs `parseMention` to
|
|
50
|
-
* classify the form, then dispatches:
|
|
51
|
-
* - 'cite-ref' → look up the key in concept.sources; emit
|
|
52
|
-
* Bibliography Reference with the Citation.
|
|
53
|
-
* - 'numeric' → emit Concept Reference with the bare id
|
|
54
|
-
* (existing behavior).
|
|
55
|
-
* - 'unresolved' → do not emit a Reference.
|
|
56
|
-
*
|
|
57
|
-
* @param {Concept} concept
|
|
58
|
-
* @returns {Reference[]}
|
|
59
|
-
*/
|
|
60
37
|
extractReferences(concept) {
|
|
61
38
|
const refs = [];
|
|
62
39
|
|
|
63
40
|
for (const rc of concept.relatedConcepts) {
|
|
64
41
|
const target = refTarget(rc);
|
|
65
42
|
if (target) {
|
|
66
|
-
refs.push(new Reference('concept', target, rc.type, 'relatedConcepts'
|
|
43
|
+
refs.push(new Reference('concept', target, rc.type, 'relatedConcepts', {
|
|
44
|
+
lookupKey: { id: target },
|
|
45
|
+
}));
|
|
67
46
|
}
|
|
68
47
|
}
|
|
69
48
|
|
|
@@ -71,13 +50,11 @@ export class ReferenceResolver {
|
|
|
71
50
|
const lc = concept.localization(lang);
|
|
72
51
|
if (!lc) continue;
|
|
73
52
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
refs.push(new Reference('standard', ref, src.type, `localizations.${lang}.sources[${i}]`));
|
|
80
|
-
}
|
|
53
|
+
for (let i = 0; i < lc.sources.length; i++) {
|
|
54
|
+
const src = lc.sources[i];
|
|
55
|
+
const ref = src.origin?.toString() ?? '';
|
|
56
|
+
if (ref) {
|
|
57
|
+
refs.push(new Reference('standard', ref, src.type, `localizations.${lang}.sources[${i}]`));
|
|
81
58
|
}
|
|
82
59
|
}
|
|
83
60
|
|
|
@@ -92,37 +69,27 @@ export class ReferenceResolver {
|
|
|
92
69
|
return refs;
|
|
93
70
|
}
|
|
94
71
|
|
|
95
|
-
/**
|
|
96
|
-
* Collect all text fields from a localized concept, paired
|
|
97
|
-
* with diagnostic source paths.
|
|
98
|
-
*
|
|
99
|
-
* @param {LocalizedConcept} lc
|
|
100
|
-
* @param {string} lang
|
|
101
|
-
* @returns {{text: string, source: string}[]}
|
|
102
|
-
*/
|
|
103
72
|
_collectTexts(lc, lang) {
|
|
104
73
|
const out = [];
|
|
105
|
-
for (let i = 0;
|
|
74
|
+
for (let i = 0; i < lc.definitions.length; i++) {
|
|
106
75
|
const content = lc.definitions[i]?.content;
|
|
107
76
|
if (typeof content === 'string') {
|
|
108
77
|
out.push({ text: content, source: `localizations.${lang}.definitions[${i}].content` });
|
|
109
78
|
}
|
|
110
79
|
}
|
|
111
|
-
for (let i = 0;
|
|
112
|
-
const content =
|
|
113
|
-
? (lc.notes[i]?.content ?? '')
|
|
114
|
-
: String(lc.notes[i] ?? '');
|
|
80
|
+
for (let i = 0; i < lc.notes.length; i++) {
|
|
81
|
+
const content = lc.notes[i]?.content;
|
|
115
82
|
if (content) {
|
|
116
83
|
out.push({ text: content, source: `localizations.${lang}.notes[${i}].content` });
|
|
117
84
|
}
|
|
118
85
|
}
|
|
119
|
-
for (let i = 0;
|
|
86
|
+
for (let i = 0; i < lc.examples.length; i++) {
|
|
120
87
|
const content = lc.examples[i]?.content;
|
|
121
88
|
if (typeof content === 'string') {
|
|
122
89
|
out.push({ text: content, source: `localizations.${lang}.examples[${i}].content` });
|
|
123
90
|
}
|
|
124
91
|
}
|
|
125
|
-
for (let i = 0;
|
|
92
|
+
for (let i = 0; i < lc.annotations.length; i++) {
|
|
126
93
|
const content = lc.annotations[i]?.content;
|
|
127
94
|
if (typeof content === 'string') {
|
|
128
95
|
out.push({ text: content, source: `localizations.${lang}.annotations[${i}].content` });
|
|
@@ -131,15 +98,6 @@ export class ReferenceResolver {
|
|
|
131
98
|
return out;
|
|
132
99
|
}
|
|
133
100
|
|
|
134
|
-
/**
|
|
135
|
-
* Walk a single text string and emit References for each
|
|
136
|
-
* `{{...}}` mention.
|
|
137
|
-
*
|
|
138
|
-
* @param {string} text
|
|
139
|
-
* @param {string} source — diagnostic path
|
|
140
|
-
* @param {Concept} concept — the owning concept (for cite-ref lookup)
|
|
141
|
-
* @returns {Reference[]}
|
|
142
|
-
*/
|
|
143
101
|
_extractFromText(text, source, concept) {
|
|
144
102
|
const refs = [];
|
|
145
103
|
const re = /\{\{([^{}]*?)\}\}/g;
|
|
@@ -173,17 +131,6 @@ export class ReferenceResolver {
|
|
|
173
131
|
return refs;
|
|
174
132
|
}
|
|
175
133
|
|
|
176
|
-
/**
|
|
177
|
-
* Resolve a `cite-ref` parser result against the concept's
|
|
178
|
-
* sources list. Emits a Bibliography Reference with the
|
|
179
|
-
* resolved Citation (if found) or an unresolved Reference
|
|
180
|
-
* (if not).
|
|
181
|
-
*
|
|
182
|
-
* @param {MentionParseResult} parsed
|
|
183
|
-
* @param {string} source — diagnostic path
|
|
184
|
-
* @param {Concept} concept — the owning concept
|
|
185
|
-
* @returns {Reference}
|
|
186
|
-
*/
|
|
187
134
|
_resolveCiteRef(parsed, source, concept) {
|
|
188
135
|
const sourceEntry = concept?.findSourceById(parsed.key) ?? null;
|
|
189
136
|
if (!sourceEntry) {
|
|
@@ -215,118 +162,57 @@ export class ReferenceResolver {
|
|
|
215
162
|
);
|
|
216
163
|
}
|
|
217
164
|
|
|
218
|
-
/**
|
|
219
|
-
* Resolve a single reference against a registry (a map of
|
|
220
|
-
* datasetId → { concepts, register? }). The registry may also
|
|
221
|
-
* include 'bibliography:<source>' keys for bibliographic
|
|
222
|
-
* datasets.
|
|
223
|
-
*
|
|
224
|
-
* For a `type: 'bibliography'` Reference with an inline
|
|
225
|
-
* `citation`, the resolver first tries the bibliography
|
|
226
|
-
* registry (matching `citation.ref` by source/id/version);
|
|
227
|
-
* if not found, returns the inline Citation as a
|
|
228
|
-
* self-contained fallback.
|
|
229
|
-
*
|
|
230
|
-
* For a `type: 'bibliography'` Reference with a `uri` and
|
|
231
|
-
* `resolution.kind === 'bibliography-namespace'`, the
|
|
232
|
-
* resolver tries the bibliography registry by
|
|
233
|
-
* `resolution.source/id/version`.
|
|
234
|
-
*
|
|
235
|
-
* For `type: 'concept'` References with a `lookupKey.id`
|
|
236
|
-
* (id-match, short-id, or numeric), the resolver looks up
|
|
237
|
-
* the id in `lookupKey.dataset`'s ConceptCollection.
|
|
238
|
-
*
|
|
239
|
-
* Backward compat: when the second argument is a
|
|
240
|
-
* ConceptCollection (has `byId` but no `concepts` field), it
|
|
241
|
-
* is treated as a one-key registry of one default dataset.
|
|
242
|
-
*/
|
|
243
165
|
resolveReference(ref, registry) {
|
|
244
|
-
if (ref == null) return null;
|
|
166
|
+
if (ref == null || registry == null) return null;
|
|
245
167
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
// 1. cite:key form (Bibliography with inline Citation).
|
|
254
|
-
if (ref.type === 'bibliography' && ref.citation) {
|
|
255
|
-
const bioRecord = this._resolveBibliographyRecord(
|
|
256
|
-
ref.citation.ref,
|
|
257
|
-
registry,
|
|
258
|
-
);
|
|
259
|
-
if (bioRecord) return bioRecord;
|
|
260
|
-
return ref.citation;
|
|
168
|
+
switch (ref.type) {
|
|
169
|
+
case 'concept': return this._resolveConcept(ref, registry);
|
|
170
|
+
case 'bibliography': return this._resolveBibliography(ref, registry);
|
|
171
|
+
case 'dataset': return this._resolveDataset(ref, registry);
|
|
172
|
+
case 'typed-ref': return this._resolveTypedRef(ref, registry);
|
|
173
|
+
case 'standard': return this._resolveStandard(ref, registry);
|
|
174
|
+
default: return null;
|
|
261
175
|
}
|
|
176
|
+
}
|
|
262
177
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
if (
|
|
267
|
-
|
|
268
|
-
const bioRecord = this._resolveBibliographyRecord(
|
|
269
|
-
ref.resolution,
|
|
270
|
-
registry,
|
|
271
|
-
);
|
|
272
|
-
if (bioRecord) return bioRecord;
|
|
178
|
+
_resolveConcept(ref, registry) {
|
|
179
|
+
if (ref.lookupKey?.id) {
|
|
180
|
+
const dataset = ref.lookupKey.dataset;
|
|
181
|
+
if (dataset) {
|
|
182
|
+
return registry[dataset]?.concepts?.byId(ref.lookupKey.id) ?? null;
|
|
273
183
|
}
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
if (coll) {
|
|
278
|
-
return coll.byId(ref.resolution.conceptId);
|
|
279
|
-
}
|
|
184
|
+
for (const entry of Object.values(registry)) {
|
|
185
|
+
const found = entry?.concepts?.byId(ref.lookupKey.id);
|
|
186
|
+
if (found) return found;
|
|
280
187
|
}
|
|
281
188
|
return null;
|
|
282
189
|
}
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
if (ref.lookupKey?.id) {
|
|
286
|
-
const coll = registry[ref.lookupKey.dataset]?.concepts;
|
|
287
|
-
if (coll) return coll.byId(ref.lookupKey.id);
|
|
288
|
-
return null;
|
|
190
|
+
if (ref.uri && ref.resolution?.datasetId) {
|
|
191
|
+
return registry[ref.resolution.datasetId]?.concepts?.byId(ref.resolution.conceptId) ?? null;
|
|
289
192
|
}
|
|
193
|
+
return null;
|
|
194
|
+
}
|
|
290
195
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
if (ref.type === 'concept' && ref.target) {
|
|
295
|
-
const defaultColl = registry._default?.concepts;
|
|
296
|
-
if (defaultColl) return defaultColl.byId(ref.target);
|
|
297
|
-
// Try every dataset in the registry as a fallback.
|
|
298
|
-
for (const entry of Object.values(registry)) {
|
|
299
|
-
if (entry?.concepts?.byId(ref.target)) {
|
|
300
|
-
return entry.concepts.byId(ref.target);
|
|
301
|
-
}
|
|
302
|
-
}
|
|
303
|
-
return null;
|
|
196
|
+
_resolveBibliography(ref, registry) {
|
|
197
|
+
if (ref.citation) {
|
|
198
|
+
return resolveBibliographyRecord(ref.citation.ref, registry) ?? ref.citation;
|
|
304
199
|
}
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
// (plan 06). For v8, return null.
|
|
308
|
-
if (ref.lookupKey?.designation) {
|
|
309
|
-
return null;
|
|
200
|
+
if (ref.uri && ref.resolution?.source) {
|
|
201
|
+
return resolveBibliographyRecord(ref.resolution, registry) ?? null;
|
|
310
202
|
}
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
311
205
|
|
|
206
|
+
_resolveDataset(_ref, _registry) {
|
|
312
207
|
return null;
|
|
313
208
|
}
|
|
314
209
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
*/
|
|
322
|
-
_resolveBibliographyRecord(citationRef, registry) {
|
|
323
|
-
if (!citationRef?.source || !citationRef?.id) return null;
|
|
324
|
-
const bioColl = registry[`bibliography:${citationRef.source}`]?.concepts;
|
|
325
|
-
if (!bioColl) return null;
|
|
326
|
-
if (citationRef.version) {
|
|
327
|
-
return bioColl.byIdAnd(citationRef.id, citationRef.version);
|
|
328
|
-
}
|
|
329
|
-
return bioColl.byId(citationRef.id);
|
|
210
|
+
_resolveTypedRef(_ref, _registry) {
|
|
211
|
+
return null;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
_resolveStandard(_ref, _registry) {
|
|
215
|
+
return null;
|
|
330
216
|
}
|
|
331
217
|
|
|
332
218
|
resolveAll(concept, registry) {
|
|
@@ -344,15 +230,4 @@ export class ReferenceResolver {
|
|
|
344
230
|
}
|
|
345
231
|
}
|
|
346
232
|
|
|
347
|
-
/**
|
|
348
|
-
* Type-guard for the single-collection case (backward compat).
|
|
349
|
-
* A ConceptCollection has `byId` but no `concepts` field.
|
|
350
|
-
*/
|
|
351
|
-
function isConceptCollection(x) {
|
|
352
|
-
return x != null
|
|
353
|
-
&& typeof x === 'object'
|
|
354
|
-
&& typeof x.byId === 'function'
|
|
355
|
-
&& !('concepts' in x);
|
|
356
|
-
}
|
|
357
|
-
|
|
358
233
|
export const referenceResolver = new ReferenceResolver();
|
|
@@ -1,47 +1,26 @@
|
|
|
1
|
-
|
|
2
|
-
* Classify a Reference for rendering.
|
|
3
|
-
*
|
|
4
|
-
* The classifier is constructed once per render with a registry
|
|
5
|
-
* (and optional source dataset id). The classify() method is
|
|
6
|
-
* pure and side-effect-free.
|
|
7
|
-
*
|
|
8
|
-
* Each `Reference.type` is its own `_classifyXxx` method. The
|
|
9
|
-
* dispatch in classify() is closed for modification.
|
|
10
|
-
*/
|
|
1
|
+
import { resolveBibliographyRecord } from './reference-resolver.js';
|
|
11
2
|
|
|
12
3
|
export class ReferenceClassifier {
|
|
13
|
-
/**
|
|
14
|
-
* @param {object} registry — the deployment's dataset registry.
|
|
15
|
-
* @param {string} [sourceDatasetId] — the dataset the source
|
|
16
|
-
* concept belongs to; used to determine "same-dataset".
|
|
17
|
-
* @param {object} [options] — additional options (e.g. scope).
|
|
18
|
-
*/
|
|
19
4
|
constructor(registry = {}, sourceDatasetId = null, options = {}) {
|
|
20
5
|
this.registry = registry;
|
|
21
6
|
this.sourceDatasetId = sourceDatasetId;
|
|
22
7
|
this.options = options;
|
|
23
8
|
}
|
|
24
9
|
|
|
25
|
-
/**
|
|
26
|
-
* @param {Reference} ref
|
|
27
|
-
* @returns {string} — the classification (e.g. 'same-dataset',
|
|
28
|
-
* 'internal-citation', 'unresolved', etc.)
|
|
29
|
-
*/
|
|
30
10
|
classify(ref) {
|
|
31
11
|
if (ref == null) return 'unknown';
|
|
32
12
|
|
|
33
13
|
switch (ref.type) {
|
|
34
|
-
case 'concept':
|
|
35
|
-
case 'dataset':
|
|
14
|
+
case 'concept': return this._classifyConcept(ref);
|
|
15
|
+
case 'dataset': return this._classifyDataset(ref);
|
|
36
16
|
case 'bibliography': return this._classifyBibliography(ref);
|
|
37
|
-
case 'typed-ref':
|
|
38
|
-
case 'standard':
|
|
39
|
-
default:
|
|
17
|
+
case 'typed-ref': return this._classifyTypedRef(ref);
|
|
18
|
+
case 'standard': return 'legacy-standard';
|
|
19
|
+
default: return 'unknown';
|
|
40
20
|
}
|
|
41
21
|
}
|
|
42
22
|
|
|
43
23
|
_classifyConcept(ref) {
|
|
44
|
-
// 1. URI form, resolved to a dataset.
|
|
45
24
|
if (ref.uri) {
|
|
46
25
|
const dsId = ref.resolution?.datasetId;
|
|
47
26
|
if (!dsId) return 'unresolved';
|
|
@@ -49,21 +28,15 @@ export class ReferenceClassifier {
|
|
|
49
28
|
if (dsId === this.sourceDatasetId) return 'same-dataset';
|
|
50
29
|
return 'cross-dataset';
|
|
51
30
|
}
|
|
52
|
-
// 2. Unanchored designation.
|
|
53
31
|
if (ref.lookupKey?.designation) {
|
|
54
32
|
return 'unresolved-designation';
|
|
55
33
|
}
|
|
56
|
-
// 3. Id-style (id-match, short-id, numeric).
|
|
57
34
|
if (ref.lookupKey?.id) {
|
|
58
35
|
const dsId = ref.lookupKey.dataset;
|
|
59
36
|
if (!this.registry[dsId]) return 'unresolved';
|
|
60
37
|
if (dsId === this.sourceDatasetId) return 'same-dataset';
|
|
61
38
|
return 'cross-dataset';
|
|
62
39
|
}
|
|
63
|
-
// 4. Concept ref with target (legacy).
|
|
64
|
-
if (ref.target) {
|
|
65
|
-
return 'unresolved';
|
|
66
|
-
}
|
|
67
40
|
return 'unresolved';
|
|
68
41
|
}
|
|
69
42
|
|
|
@@ -74,18 +47,15 @@ export class ReferenceClassifier {
|
|
|
74
47
|
}
|
|
75
48
|
|
|
76
49
|
_classifyBibliography(ref) {
|
|
77
|
-
// 1. cite:key form: try the bibliography registry.
|
|
78
50
|
if (ref.citation) {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
51
|
+
return resolveBibliographyRecord(ref.citation.ref, this.registry)
|
|
52
|
+
? 'internal-citation'
|
|
53
|
+
: 'self-contained-citation';
|
|
82
54
|
}
|
|
83
|
-
// 2. URI form: try the bibliography registry, then the
|
|
84
|
-
// resolution's datasetId (if it's a concept URI), else null.
|
|
85
55
|
if (ref.uri) {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
56
|
+
return resolveBibliographyRecord(ref.resolution, this.registry)
|
|
57
|
+
? 'internal-citation'
|
|
58
|
+
: 'external-citation';
|
|
89
59
|
}
|
|
90
60
|
return 'unresolved-citation';
|
|
91
61
|
}
|
|
@@ -93,14 +63,4 @@ export class ReferenceClassifier {
|
|
|
93
63
|
_classifyTypedRef(_ref) {
|
|
94
64
|
return 'typed-ref';
|
|
95
65
|
}
|
|
96
|
-
|
|
97
|
-
_tryBibliography(citationRef) {
|
|
98
|
-
if (!citationRef?.source || !citationRef?.id) return null;
|
|
99
|
-
const bioColl = this.registry[`bibliography:${citationRef.source}`]?.concepts;
|
|
100
|
-
if (!bioColl) return null;
|
|
101
|
-
if (citationRef.version) {
|
|
102
|
-
return bioColl.byIdAnd(citationRef.id, citationRef.version) ?? null;
|
|
103
|
-
}
|
|
104
|
-
return bioColl.byId(citationRef.id) ?? null;
|
|
105
|
-
}
|
|
106
66
|
}
|
|
@@ -9,18 +9,13 @@ const VALID_ENTRY_STATUSES = new Set([
|
|
|
9
9
|
'valid', 'draft', 'retired', 'notValid', 'superseded', 'withdrawn',
|
|
10
10
|
]);
|
|
11
11
|
|
|
12
|
-
const _langs = (c) =>
|
|
13
|
-
c.languages ?? (c.localizations ? Object.keys(c.localizations) : []);
|
|
14
|
-
|
|
15
|
-
const _loc = (c, lang) =>
|
|
16
|
-
typeof c.localization === 'function' ? c.localization(lang) : c.localizations?.[lang];
|
|
17
|
-
|
|
18
12
|
export class LanguageCodeRule extends ValidationRule {
|
|
19
13
|
constructor() { super('language-code'); }
|
|
20
14
|
validate(concept, path, result) {
|
|
21
|
-
for (const lang of
|
|
15
|
+
for (const lang of concept.languages) {
|
|
22
16
|
if (!/^[a-z]{3}$/.test(lang)) {
|
|
23
|
-
|
|
17
|
+
this.addIssue(result,
|
|
18
|
+
`${path}localizations.${lang}`,
|
|
24
19
|
`Invalid language code '${lang}': expected ISO 639-3 (3 lowercase letters)`);
|
|
25
20
|
}
|
|
26
21
|
}
|
|
@@ -30,15 +25,14 @@ export class LanguageCodeRule extends ValidationRule {
|
|
|
30
25
|
export class DesignationTypeRule extends ValidationRule {
|
|
31
26
|
constructor() { super('designation-type'); }
|
|
32
27
|
validate(concept, path, result) {
|
|
33
|
-
for (const lang of
|
|
34
|
-
const lc =
|
|
28
|
+
for (const lang of concept.languages) {
|
|
29
|
+
const lc = concept.localization(lang);
|
|
35
30
|
if (!lc) continue;
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
const t = terms[i];
|
|
39
|
-
const type = t.type ?? (typeof t.toJSON === 'function' ? t.toJSON().type : undefined);
|
|
31
|
+
for (let i = 0; i < lc.terms.length; i++) {
|
|
32
|
+
const type = lc.terms[i].type;
|
|
40
33
|
if (type && !VALID_DESIGNATION_TYPES.has(type)) {
|
|
41
|
-
|
|
34
|
+
this.addIssue(result,
|
|
35
|
+
`${path}localizations.${lang}.terms[${i}].type`,
|
|
42
36
|
`Unknown designation type '${type}'`);
|
|
43
37
|
}
|
|
44
38
|
}
|
|
@@ -49,13 +43,13 @@ export class DesignationTypeRule extends ValidationRule {
|
|
|
49
43
|
export class EntryStatusRule extends ValidationRule {
|
|
50
44
|
constructor() { super('entry-status'); }
|
|
51
45
|
validate(concept, path, result) {
|
|
52
|
-
for (const lang of
|
|
53
|
-
const lc =
|
|
46
|
+
for (const lang of concept.languages) {
|
|
47
|
+
const lc = concept.localization(lang);
|
|
54
48
|
if (!lc) continue;
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
`Unknown entry status '${
|
|
49
|
+
if (lc.entryStatus && !VALID_ENTRY_STATUSES.has(lc.entryStatus)) {
|
|
50
|
+
this.addIssue(result,
|
|
51
|
+
`${path}localizations.${lang}.entry_status`,
|
|
52
|
+
`Unknown entry status '${lc.entryStatus}'`);
|
|
59
53
|
}
|
|
60
54
|
}
|
|
61
55
|
}
|
|
@@ -71,30 +65,21 @@ export class ConceptValidator {
|
|
|
71
65
|
|
|
72
66
|
validate(concept) {
|
|
73
67
|
const result = new ValidationResult();
|
|
74
|
-
const hasModelApi = typeof concept.localization === 'function';
|
|
75
68
|
|
|
76
69
|
if (!concept.id) {
|
|
77
70
|
result.addError('id', 'Concept must have an id');
|
|
78
71
|
}
|
|
79
72
|
|
|
80
|
-
|
|
81
|
-
if (langs.length === 0) {
|
|
73
|
+
if (concept.languages.length === 0) {
|
|
82
74
|
result.addWarning('localizations', 'Concept must have at least one localization');
|
|
83
|
-
} else
|
|
84
|
-
for (const lang of
|
|
75
|
+
} else {
|
|
76
|
+
for (const lang of concept.languages) {
|
|
85
77
|
const lc = concept.localization(lang);
|
|
86
78
|
if (!lc || lc.terms.length === 0) {
|
|
87
79
|
result.addWarning(`localizations.${lang}.terms`,
|
|
88
80
|
`Localization '${lang}' must have at least one term`);
|
|
89
81
|
}
|
|
90
82
|
}
|
|
91
|
-
} else {
|
|
92
|
-
for (const [lang, lc] of Object.entries(concept.localizations ?? {})) {
|
|
93
|
-
if (!lc.terms || lc.terms.length === 0) {
|
|
94
|
-
result.addWarning(`localizations.${lang}.terms`,
|
|
95
|
-
`Localization '${lang}' must have at least one term`);
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
83
|
}
|
|
99
84
|
|
|
100
85
|
for (const rule of this._rules) {
|
|
@@ -1,14 +1,8 @@
|
|
|
1
1
|
import { ValidationRule } from './validation-rule.js';
|
|
2
2
|
|
|
3
|
-
const _langs = (concept) =>
|
|
4
|
-
concept.languages ?? (concept.localizations ? Object.keys(concept.localizations) : []);
|
|
5
|
-
|
|
6
|
-
const _loc = (concept, lang) =>
|
|
7
|
-
typeof concept.localization === 'function' ? concept.localization(lang) : concept.localizations?.[lang];
|
|
8
|
-
|
|
9
3
|
const _eachLocalization = (concept, fn) => {
|
|
10
|
-
for (const lang of
|
|
11
|
-
const lc =
|
|
4
|
+
for (const lang of concept.languages) {
|
|
5
|
+
const lc = concept.localization(lang);
|
|
12
6
|
if (lc) fn(lang, lc);
|
|
13
7
|
}
|
|
14
8
|
};
|
|
@@ -19,7 +13,7 @@ export class RefShapeRule extends ValidationRule {
|
|
|
19
13
|
validate(concept, path, result) {
|
|
20
14
|
let sourceIdx = 0;
|
|
21
15
|
_eachLocalization(concept, (lang, lc) => {
|
|
22
|
-
const sources = lc.sources
|
|
16
|
+
const sources = lc.sources;
|
|
23
17
|
for (let i = 0; i < sources.length; i++) {
|
|
24
18
|
sourceIdx++;
|
|
25
19
|
const origin = sources[i].origin;
|
|
@@ -38,7 +32,7 @@ export class RefShapeRule extends ValidationRule {
|
|
|
38
32
|
}
|
|
39
33
|
});
|
|
40
34
|
|
|
41
|
-
const related = concept.relatedConcepts
|
|
35
|
+
const related = concept.relatedConcepts;
|
|
42
36
|
for (let i = 0; i < related.length; i++) {
|
|
43
37
|
const ref = related[i].ref;
|
|
44
38
|
if (!ref) continue;
|
|
@@ -56,7 +50,7 @@ export class LocalityCompletenessRule extends ValidationRule {
|
|
|
56
50
|
|
|
57
51
|
validate(concept, path, result) {
|
|
58
52
|
_eachLocalization(concept, (lang, lc) => {
|
|
59
|
-
const sources = lc.sources
|
|
53
|
+
const sources = lc.sources;
|
|
60
54
|
for (let i = 0; i < sources.length; i++) {
|
|
61
55
|
const origin = sources[i].origin;
|
|
62
56
|
if (!origin || !origin.locality) continue;
|
|
@@ -81,14 +75,14 @@ export class LocalizationConsistencyRule extends ValidationRule {
|
|
|
81
75
|
constructor() { super('localization-consistency'); }
|
|
82
76
|
|
|
83
77
|
validate(concept, path, result) {
|
|
84
|
-
const langs =
|
|
85
|
-
const data = concept.raw?.data ||
|
|
78
|
+
const langs = concept.languages;
|
|
79
|
+
const data = concept.raw?.data || {};
|
|
86
80
|
const declaredLangs = data.localized_concepts
|
|
87
81
|
? Object.keys(data.localized_concepts)
|
|
88
82
|
: langs;
|
|
89
83
|
|
|
90
84
|
for (const lang of declaredLangs) {
|
|
91
|
-
if (!concept.hasLocalization
|
|
85
|
+
if (!concept.hasLocalization(lang)) {
|
|
92
86
|
this.addIssue(result,
|
|
93
87
|
`${path}localizations.${lang}`,
|
|
94
88
|
`localized_concepts map has '${lang}' but no localization loaded`);
|
|
@@ -101,12 +95,10 @@ export class SchemaVersionRule extends ValidationRule {
|
|
|
101
95
|
constructor() { super('schema-version', 'warning'); }
|
|
102
96
|
|
|
103
97
|
validate(concept, path, result) {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
if (version && String(version) !== '3') {
|
|
98
|
+
if (concept.schemaVersion && String(concept.schemaVersion) !== '3') {
|
|
107
99
|
this.addIssue(result,
|
|
108
100
|
`${path}schema_version`,
|
|
109
|
-
`schema_version is '${
|
|
101
|
+
`schema_version is '${concept.schemaVersion}', expected '3'`);
|
|
110
102
|
}
|
|
111
103
|
}
|
|
112
104
|
}
|
|
@@ -115,11 +107,8 @@ export class DomainRefRule extends ValidationRule {
|
|
|
115
107
|
constructor() { super('domain-ref', 'warning'); }
|
|
116
108
|
|
|
117
109
|
validate(concept, path, result) {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
for (let i = 0; i < domains.length; i++) {
|
|
121
|
-
const domain = domains[i];
|
|
122
|
-
const json = typeof domain.toJSON === 'function' ? domain.toJSON() : domain;
|
|
110
|
+
for (let i = 0; i < concept.domains.length; i++) {
|
|
111
|
+
const json = concept.domains[i].toJSON();
|
|
123
112
|
if (!json.concept_id && !json.urn) {
|
|
124
113
|
this.addIssue(result,
|
|
125
114
|
`${path}domains[${i}]`,
|
|
@@ -134,7 +123,7 @@ export class UuidFormatRule extends ValidationRule {
|
|
|
134
123
|
|
|
135
124
|
validate(concept, path, result) {
|
|
136
125
|
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
137
|
-
const id = concept.id
|
|
126
|
+
const id = concept.id;
|
|
138
127
|
|
|
139
128
|
if (id && !UUID_RE.test(String(id))) {
|
|
140
129
|
if (String(id).includes('-') && String(id).length > 20) {
|
|
@@ -153,7 +142,7 @@ export class SourceUrnFormatRule extends ValidationRule {
|
|
|
153
142
|
const URN_RE = /^urn:[a-z0-9][a-z0-9-]{0,31}:[a-z0-9()+,\-.:=@;$_!*'%/?#]+$/i;
|
|
154
143
|
|
|
155
144
|
_eachLocalization(concept, (lang, lc) => {
|
|
156
|
-
const sources = lc.sources
|
|
145
|
+
const sources = lc.sources;
|
|
157
146
|
for (let i = 0; i < sources.length; i++) {
|
|
158
147
|
const source = lc.sources[i].origin?.ref?.source;
|
|
159
148
|
if (!source || !source.startsWith('urn:')) continue;
|
|
@@ -180,23 +169,23 @@ function _findCiteMentions(concept) {
|
|
|
180
169
|
}
|
|
181
170
|
};
|
|
182
171
|
|
|
183
|
-
for (const lang of
|
|
184
|
-
const lc =
|
|
172
|
+
for (const lang of concept.languages) {
|
|
173
|
+
const lc = concept.localization(lang);
|
|
185
174
|
if (!lc) continue;
|
|
186
175
|
|
|
187
|
-
for (let i = 0;
|
|
176
|
+
for (let i = 0; lc.definitions[i]; i++) {
|
|
188
177
|
walkText(lc.definitions[i]?.content, `localizations.${lang}.definitions[${i}].content`);
|
|
189
178
|
}
|
|
190
|
-
for (let i = 0;
|
|
179
|
+
for (let i = 0; lc.notes[i]; i++) {
|
|
191
180
|
const content = typeof lc.notes[i] === 'object'
|
|
192
181
|
? (lc.notes[i]?.content ?? '')
|
|
193
182
|
: String(lc.notes[i] ?? '');
|
|
194
183
|
walkText(content, `localizations.${lang}.notes[${i}].content`);
|
|
195
184
|
}
|
|
196
|
-
for (let i = 0;
|
|
185
|
+
for (let i = 0; lc.examples[i]; i++) {
|
|
197
186
|
walkText(lc.examples[i]?.content, `localizations.${lang}.examples[${i}].content`);
|
|
198
187
|
}
|
|
199
|
-
for (let i = 0;
|
|
188
|
+
for (let i = 0; lc.annotations[i]; i++) {
|
|
200
189
|
walkText(lc.annotations[i]?.content, `localizations.${lang}.annotations[${i}].content`);
|
|
201
190
|
}
|
|
202
191
|
}
|
|
@@ -212,13 +201,13 @@ function _findDuplicateSourceIds(concept) {
|
|
|
212
201
|
seen.get(source.id).push(source);
|
|
213
202
|
};
|
|
214
203
|
|
|
215
|
-
for (const source of
|
|
216
|
-
for (const lang of
|
|
217
|
-
const lc =
|
|
204
|
+
for (const source of concept.sources) record(source);
|
|
205
|
+
for (const lang of concept.languages) {
|
|
206
|
+
const lc = concept.localization(lang);
|
|
218
207
|
if (!lc) continue;
|
|
219
|
-
for (const source of (lc.sources
|
|
220
|
-
for (const designation of
|
|
221
|
-
for (const source of
|
|
208
|
+
for (const source of (lc.sources)) record(source);
|
|
209
|
+
for (const designation of lc.terms) {
|
|
210
|
+
for (const source of designation.sources) record(source);
|
|
222
211
|
}
|
|
223
212
|
}
|
|
224
213
|
|
|
@@ -231,17 +220,17 @@ function _findDuplicateSourceIds(concept) {
|
|
|
231
220
|
|
|
232
221
|
function _collectSourceIds(concept) {
|
|
233
222
|
const ids = new Set();
|
|
234
|
-
for (const source of
|
|
223
|
+
for (const source of concept.sources) {
|
|
235
224
|
if (source?.id != null) ids.add(source.id);
|
|
236
225
|
}
|
|
237
|
-
for (const lang of
|
|
238
|
-
const lc =
|
|
226
|
+
for (const lang of concept.languages) {
|
|
227
|
+
const lc = concept.localization(lang);
|
|
239
228
|
if (!lc) continue;
|
|
240
|
-
for (const source of (lc.sources
|
|
229
|
+
for (const source of (lc.sources)) {
|
|
241
230
|
if (source?.id != null) ids.add(source.id);
|
|
242
231
|
}
|
|
243
|
-
for (const designation of
|
|
244
|
-
for (const source of
|
|
232
|
+
for (const designation of lc.terms) {
|
|
233
|
+
for (const source of designation.sources) {
|
|
245
234
|
if (source?.id != null) ids.add(source.id);
|
|
246
235
|
}
|
|
247
236
|
}
|