glossarist 0.3.8 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/concept-parser.js +7 -1
- package/src/dataset-asset.js +3 -0
- package/src/entity-directory.js +43 -0
- package/src/gcr-reader.js +52 -4
- package/src/gcr-writer.js +3 -1
- package/src/index.d.ts +40 -7
- package/src/index.js +19 -2
- package/src/managed-concept-collection.js +11 -2
- package/src/models/bibliography-data.js +43 -0
- package/src/models/bibliography-entry.js +26 -0
- package/src/models/concept.js +27 -0
- package/src/models/designation.js +2 -14
- package/src/models/figure.js +71 -0
- package/src/models/formula.js +21 -0
- package/src/models/index.js +12 -1
- package/src/models/localized-string.js +19 -0
- package/src/models/non-verb-rep.js +15 -13
- package/src/models/non-verbal-entity.js +39 -0
- package/src/models/non-verbal-reference.js +35 -0
- package/src/models/non-verbal-references.js +16 -0
- package/src/models/registrable.js +25 -0
- package/src/models/shared-non-verbal-entity.js +28 -0
- package/src/models/table.js +21 -0
- package/src/reference-mention.js +19 -3
- package/src/reference-resolver.js +78 -2
- package/src/render-classification.js +10 -1
- package/src/validators/asset-index.js +66 -0
- package/src/validators/index.js +5 -1
- package/src/validators/v3-rules.js +147 -0
package/package.json
CHANGED
package/src/concept-parser.js
CHANGED
|
@@ -3,7 +3,7 @@ import { Concept } from './models/concept.js';
|
|
|
3
3
|
import { RelatedConcept } from './models/related-concept.js';
|
|
4
4
|
import { InvalidInputError, YamlParseError } from './errors.js';
|
|
5
5
|
|
|
6
|
-
const STRUCTURAL_KEYS = new Set(['termid', 'term']);
|
|
6
|
+
const STRUCTURAL_KEYS = new Set(['termid', 'term', 'figures', 'tables', 'formulas']);
|
|
7
7
|
|
|
8
8
|
export class ConceptParser {
|
|
9
9
|
parse(raw, context) {
|
|
@@ -51,6 +51,9 @@ export class ConceptParser {
|
|
|
51
51
|
id: String(doc.termid),
|
|
52
52
|
term: doc.term || null,
|
|
53
53
|
localizations,
|
|
54
|
+
figures: doc.figures,
|
|
55
|
+
tables: doc.tables,
|
|
56
|
+
formulas: doc.formulas,
|
|
54
57
|
raw: doc,
|
|
55
58
|
});
|
|
56
59
|
}
|
|
@@ -76,6 +79,9 @@ export class ConceptParser {
|
|
|
76
79
|
groups: mc.data.groups,
|
|
77
80
|
dates: mc.dates ?? mc.data?.dates,
|
|
78
81
|
sources: mc.sources ?? mc.data?.sources,
|
|
82
|
+
figures: mc.data?.figures,
|
|
83
|
+
tables: mc.data?.tables,
|
|
84
|
+
formulas: mc.data?.formulas,
|
|
79
85
|
status: mc.status,
|
|
80
86
|
schemaVersion: mc.schema_version,
|
|
81
87
|
raw: mc,
|
package/src/dataset-asset.js
CHANGED
|
@@ -12,6 +12,9 @@
|
|
|
12
12
|
const DATASET_ASSETS = Object.freeze([
|
|
13
13
|
{ path: 'bibliography.yaml', type: 'file' },
|
|
14
14
|
{ path: 'images', type: 'directory' },
|
|
15
|
+
{ path: 'figures', type: 'directory' },
|
|
16
|
+
{ path: 'tables', type: 'directory' },
|
|
17
|
+
{ path: 'formulas', type: 'directory' },
|
|
15
18
|
]);
|
|
16
19
|
|
|
17
20
|
const FILE_ASSETS = Object.freeze(
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
const ENTITY_DIRECTORIES = Object.freeze(new Map([
|
|
2
|
+
['figure', 'figures'],
|
|
3
|
+
['table', 'tables'],
|
|
4
|
+
['formula', 'formulas'],
|
|
5
|
+
]));
|
|
6
|
+
|
|
7
|
+
const ENTITY_TYPES = Object.freeze([...ENTITY_DIRECTORIES.keys()]);
|
|
8
|
+
|
|
9
|
+
function entityDir(type) {
|
|
10
|
+
const dir = ENTITY_DIRECTORIES.get(type);
|
|
11
|
+
if (!dir) throw new RangeError(`Unknown entity type: ${type}`);
|
|
12
|
+
return dir;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function entityPath(type, id) {
|
|
16
|
+
return `${entityDir(type)}/${id}.yaml`;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function isKnownEntityType(type) {
|
|
20
|
+
return ENTITY_DIRECTORIES.has(type);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function parseEntityPath(zipPath) {
|
|
24
|
+
for (const [type, dir] of ENTITY_DIRECTORIES) {
|
|
25
|
+
const prefix = `${dir}/`;
|
|
26
|
+
if (!zipPath.startsWith(prefix)) continue;
|
|
27
|
+
const filename = zipPath.slice(prefix.length);
|
|
28
|
+
if (!filename.endsWith('.yaml')) continue;
|
|
29
|
+
const id = filename.slice(0, -'.yaml'.length);
|
|
30
|
+
if (!id) continue;
|
|
31
|
+
return { type, id };
|
|
32
|
+
}
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export {
|
|
37
|
+
ENTITY_DIRECTORIES,
|
|
38
|
+
ENTITY_TYPES,
|
|
39
|
+
entityDir,
|
|
40
|
+
entityPath,
|
|
41
|
+
isKnownEntityType,
|
|
42
|
+
parseEntityPath,
|
|
43
|
+
};
|
package/src/gcr-reader.js
CHANGED
|
@@ -6,6 +6,9 @@ import { COMPILED_FORMATS, parseCompiledPath, compiledPath } from './compiled-fo
|
|
|
6
6
|
import { DATASET_ASSETS, findFileAsset, findDirectoryAssetPath } from './dataset-asset.js';
|
|
7
7
|
import { GcrMetadata } from './models/gcr-metadata.js';
|
|
8
8
|
import { naturalSort } from './sort.js';
|
|
9
|
+
import { NonVerbalEntity } from './models/non-verbal-entity.js';
|
|
10
|
+
import { BibliographyData } from './models/bibliography-data.js';
|
|
11
|
+
import { entityDir, entityPath, ENTITY_TYPES, parseEntityPath } from './entity-directory.js';
|
|
9
12
|
|
|
10
13
|
export { naturalSort } from './sort.js';
|
|
11
14
|
|
|
@@ -283,12 +286,13 @@ export class GcrPackage {
|
|
|
283
286
|
}
|
|
284
287
|
|
|
285
288
|
/**
|
|
286
|
-
* Read bibliography.yaml from the package as a
|
|
287
|
-
* @returns {Promise<
|
|
289
|
+
* Read and parse bibliography.yaml from the package as a BibliographyData instance.
|
|
290
|
+
* @returns {Promise<BibliographyData | null>}
|
|
288
291
|
*/
|
|
289
292
|
async bibliography() {
|
|
290
|
-
const
|
|
291
|
-
|
|
293
|
+
const raw = await this._readText('bibliography.yaml');
|
|
294
|
+
if (raw === null) return null;
|
|
295
|
+
return BibliographyData.fromYAML(raw);
|
|
292
296
|
}
|
|
293
297
|
|
|
294
298
|
/**
|
|
@@ -403,6 +407,50 @@ export class GcrPackage {
|
|
|
403
407
|
async _readText(filePath) {
|
|
404
408
|
return this.readText(filePath);
|
|
405
409
|
}
|
|
410
|
+
|
|
411
|
+
// --- Non-verbal entity directories (figures/, tables/, formulas/) ---
|
|
412
|
+
|
|
413
|
+
async entityIds(type) {
|
|
414
|
+
const prefix = `${entityDir(type)}/`;
|
|
415
|
+
const ids = [];
|
|
416
|
+
this._zip.forEach((relativePath, entry) => {
|
|
417
|
+
if (!entry.dir && relativePath.startsWith(prefix) && relativePath.endsWith('.yaml')) {
|
|
418
|
+
ids.push(relativePath.slice(prefix.length, -'.yaml'.length));
|
|
419
|
+
}
|
|
420
|
+
});
|
|
421
|
+
return ids.sort(naturalSort);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
async entity(type, id) {
|
|
425
|
+
const raw = await this.readText(entityPath(type, id));
|
|
426
|
+
if (raw === null) return null;
|
|
427
|
+
const yamlData = yaml.load(raw);
|
|
428
|
+
return NonVerbalEntity.fromData({ ...yamlData, type });
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
async eachEntity(type, callback) {
|
|
432
|
+
for (const id of await this.entityIds(type)) {
|
|
433
|
+
const entity = await this.entity(type, id);
|
|
434
|
+
if (entity) await callback(entity, id);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
async allEntities(type) {
|
|
439
|
+
const entities = [];
|
|
440
|
+
await this.eachEntity(type, (e) => { entities.push(e); });
|
|
441
|
+
return entities;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
async entityTypes() {
|
|
445
|
+
const seen = new Set();
|
|
446
|
+
this._zip.forEach((relativePath, entry) => {
|
|
447
|
+
if (!entry.dir) {
|
|
448
|
+
const parsed = parseEntityPath(relativePath);
|
|
449
|
+
if (parsed) seen.add(parsed.type);
|
|
450
|
+
}
|
|
451
|
+
});
|
|
452
|
+
return ENTITY_TYPES.filter((t) => seen.has(t));
|
|
453
|
+
}
|
|
406
454
|
}
|
|
407
455
|
|
|
408
456
|
// --- Concept YAML parsing ---
|
package/src/gcr-writer.js
CHANGED
|
@@ -38,7 +38,9 @@ export class GcrWriter {
|
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
if (options.bibliography) {
|
|
41
|
-
|
|
41
|
+
const bib = options.bibliography;
|
|
42
|
+
const yamlStr = bib.toYAML ? bib.toYAML() : String(bib);
|
|
43
|
+
zip.file('bibliography.yaml', yamlStr);
|
|
42
44
|
}
|
|
43
45
|
|
|
44
46
|
if (options.images) {
|
package/src/index.d.ts
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
// Models
|
|
2
2
|
export {
|
|
3
3
|
GlossaristModel,
|
|
4
|
+
RegistrableModel,
|
|
4
5
|
Concept, LocalizedConcept,
|
|
5
|
-
Designation, Expression, Abbreviation, Symbol, GraphicalSymbol,
|
|
6
|
-
Citation, ConceptRef, ConceptSource, RelatedConcept,
|
|
7
|
-
|
|
6
|
+
Designation, Expression, Abbreviation, Symbol, LetterSymbol, GraphicalSymbol,
|
|
7
|
+
Citation, ConceptRef, ConceptSource, RelatedConcept,
|
|
8
|
+
DesignationRelationship, ConceptReference, ConceptDate,
|
|
9
|
+
DetailedDefinition, NonVerbRep, NON_VERBAL_TYPES,
|
|
10
|
+
NonVerbalEntity, SharedNonVerbalEntity,
|
|
11
|
+
Figure, FigureImage, Table, Formula,
|
|
12
|
+
NonVerbalReference, FigureReference, TableReference, FormulaReference,
|
|
13
|
+
BibliographyEntry, BibliographyData,
|
|
8
14
|
GcrMetadata, GcrStatistics,
|
|
9
|
-
RELATIONSHIP_TYPES, DATE_TYPES,
|
|
15
|
+
RELATIONSHIP_TYPES, DESIGNATION_RELATIONSHIP_TYPES, DATE_TYPES,
|
|
10
16
|
} from './models/index';
|
|
11
17
|
|
|
12
18
|
// GCR reader
|
|
@@ -26,16 +32,27 @@ export { ConceptCollection } from './concept-collection';
|
|
|
26
32
|
export { ManagedConceptCollection } from './managed-concept-collection';
|
|
27
33
|
|
|
28
34
|
// Validators
|
|
29
|
-
export {
|
|
35
|
+
export {
|
|
36
|
+
validateConcept, validateRegister, validateGcrPackage,
|
|
37
|
+
createConceptValidator,
|
|
38
|
+
ValidationError, ValidationRule, ValidationResult,
|
|
39
|
+
RegisterValidator, GcrValidator,
|
|
40
|
+
NonVerbalRefIntegrityRule, OrphanedImagesRule,
|
|
41
|
+
} from './validators/index';
|
|
42
|
+
export { AssetIndex } from './validators/asset-index';
|
|
30
43
|
|
|
31
44
|
// UUID
|
|
32
45
|
export { conceptUuid, localizedConceptUuid, uuidV5 } from './uuid';
|
|
33
46
|
|
|
34
47
|
// Reference resolution
|
|
35
|
-
export {
|
|
48
|
+
export {
|
|
49
|
+
ReferenceResolver, Reference, referenceResolver,
|
|
50
|
+
resolveBibliographyRecord, findNonVerbalEntity,
|
|
51
|
+
} from './reference-resolver';
|
|
36
52
|
|
|
37
53
|
export type MentionParseResult = {
|
|
38
|
-
kind: 'cite-ref' | 'urn-ref' | '
|
|
54
|
+
kind: 'cite-ref' | 'urn-ref' | 'fig-ref' | 'table-ref' | 'formula-ref'
|
|
55
|
+
| 'numeric' | 'designation' | 'unresolved';
|
|
39
56
|
key?: string;
|
|
40
57
|
uri?: string;
|
|
41
58
|
label?: string | null;
|
|
@@ -45,6 +62,14 @@ export type MentionParseResult = {
|
|
|
45
62
|
|
|
46
63
|
export function parseMention(raw: string): MentionParseResult;
|
|
47
64
|
|
|
65
|
+
export function fetchLocalizedString(
|
|
66
|
+
hash: Record<string, string> | null,
|
|
67
|
+
lang: string,
|
|
68
|
+
fallback?: string,
|
|
69
|
+
): string | null;
|
|
70
|
+
export function localizedStringIsEmpty(hash: Record<string, string> | null): boolean;
|
|
71
|
+
export function localizedStringIsPresent(hash: Record<string, string> | null): boolean;
|
|
72
|
+
|
|
48
73
|
// V1 support
|
|
49
74
|
export { V1Reader, migrateV1ToV2 } from './v1-reader';
|
|
50
75
|
|
|
@@ -66,3 +91,11 @@ export const DIRECTORY_ASSETS: readonly { path: string; type: string }[];
|
|
|
66
91
|
export function findFileAsset(path: string): { path: string; type: string } | undefined;
|
|
67
92
|
export function findDirectoryAssetPath(zipPath: string): { path: string; type: string } | undefined;
|
|
68
93
|
export function isDatasetAssetPath(zipPath: string): boolean;
|
|
94
|
+
|
|
95
|
+
// Entity directory registry
|
|
96
|
+
export const ENTITY_DIRECTORIES: ReadonlyMap<string, string>;
|
|
97
|
+
export const ENTITY_TYPES: readonly string[];
|
|
98
|
+
export function entityDir(type: string): string;
|
|
99
|
+
export function entityPath(type: string, id: string): string;
|
|
100
|
+
export function isKnownEntityType(type: string): boolean;
|
|
101
|
+
export function parseEntityPath(zipPath: string): { type: string; id: string } | null;
|
package/src/index.js
CHANGED
|
@@ -7,7 +7,7 @@ export { ConceptCollection } from './concept-collection.js';
|
|
|
7
7
|
export { ManagedConceptCollection } from './managed-concept-collection.js';
|
|
8
8
|
export { validateConcept, validateRegister, validateGcrPackage, createConceptValidator, ValidationError, ValidationRule, ValidationResult, RegisterValidator, GcrValidator } from './validators/index.js';
|
|
9
9
|
export { conceptUuid, localizedConceptUuid, uuidV5 } from './uuid.js';
|
|
10
|
-
export { ReferenceResolver, Reference, referenceResolver, resolveBibliographyRecord } from './reference-resolver.js';
|
|
10
|
+
export { ReferenceResolver, Reference, referenceResolver, resolveBibliographyRecord, findNonVerbalEntity } from './reference-resolver.js';
|
|
11
11
|
export { parseMention } from './reference-mention.js';
|
|
12
12
|
export { ReferenceClassifier } from './render-classification.js';
|
|
13
13
|
export { V1Reader, migrateV1ToV2 } from './v1-reader.js';
|
|
@@ -30,12 +30,29 @@ export {
|
|
|
30
30
|
|
|
31
31
|
export {
|
|
32
32
|
GlossaristModel,
|
|
33
|
+
RegistrableModel,
|
|
33
34
|
Register, Section,
|
|
34
35
|
REGISTER_STATUSES, ORDERING_METHODS,
|
|
35
36
|
Concept, LocalizedConcept,
|
|
36
37
|
Designation, Expression, Abbreviation, Symbol, GraphicalSymbol,
|
|
37
38
|
Citation, ConceptRef, ConceptSource, RelatedConcept, DesignationRelationship, ConceptReference, ConceptDate,
|
|
38
|
-
DetailedDefinition, NonVerbRep,
|
|
39
|
+
DetailedDefinition, NonVerbRep, NON_VERBAL_TYPES,
|
|
40
|
+
NonVerbalEntity, SharedNonVerbalEntity,
|
|
41
|
+
Figure, FigureImage, Table, Formula,
|
|
42
|
+
NonVerbalReference, FigureReference, TableReference, FormulaReference,
|
|
43
|
+
BibliographyEntry, BibliographyData,
|
|
44
|
+
fetchLocalizedString, localizedStringIsEmpty, localizedStringIsPresent,
|
|
39
45
|
GcrMetadata, GcrStatistics,
|
|
40
46
|
RELATIONSHIP_TYPES, DESIGNATION_RELATIONSHIP_TYPES, DATE_TYPES,
|
|
41
47
|
} from './models/index.js';
|
|
48
|
+
|
|
49
|
+
export { AssetIndex } from './validators/asset-index.js';
|
|
50
|
+
|
|
51
|
+
export {
|
|
52
|
+
ENTITY_DIRECTORIES,
|
|
53
|
+
ENTITY_TYPES,
|
|
54
|
+
entityDir,
|
|
55
|
+
entityPath,
|
|
56
|
+
isKnownEntityType,
|
|
57
|
+
parseEntityPath,
|
|
58
|
+
} from './entity-directory.js';
|
|
@@ -3,6 +3,7 @@ import { readConcepts, readRegister } from './concept-reader.js';
|
|
|
3
3
|
import { writeConcepts } from './concept-writer.js';
|
|
4
4
|
import { loadGcr } from './gcr-reader.js';
|
|
5
5
|
import { GcrWriter } from './gcr-writer.js';
|
|
6
|
+
import { BibliographyData } from './models/bibliography-data.js';
|
|
6
7
|
|
|
7
8
|
export class ManagedConceptCollection {
|
|
8
9
|
constructor() {
|
|
@@ -73,8 +74,16 @@ export class ManagedConceptCollection {
|
|
|
73
74
|
return this;
|
|
74
75
|
}
|
|
75
76
|
|
|
76
|
-
setBibliography(
|
|
77
|
-
|
|
77
|
+
setBibliography(bib) {
|
|
78
|
+
if (bib instanceof BibliographyData) {
|
|
79
|
+
this._bibliography = bib;
|
|
80
|
+
} else if (typeof bib === 'string') {
|
|
81
|
+
this._bibliography = BibliographyData.fromYAML(bib);
|
|
82
|
+
} else if (bib == null) {
|
|
83
|
+
this._bibliography = null;
|
|
84
|
+
} else {
|
|
85
|
+
this._bibliography = new BibliographyData(bib);
|
|
86
|
+
}
|
|
78
87
|
return this;
|
|
79
88
|
}
|
|
80
89
|
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import yaml from 'js-yaml';
|
|
2
|
+
import { GlossaristModel } from './base.js';
|
|
3
|
+
import { BibliographyEntry } from './bibliography-entry.js';
|
|
4
|
+
|
|
5
|
+
export class BibliographyData extends GlossaristModel {
|
|
6
|
+
constructor(data = {}) {
|
|
7
|
+
super();
|
|
8
|
+
const entriesData = data.bibliography ?? data.entries ?? [];
|
|
9
|
+
this._rawEntries = Array.isArray(entriesData) ? entriesData : [];
|
|
10
|
+
this._entries = null;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
get entries() {
|
|
14
|
+
return this._lazy('_entries', '_rawEntries',
|
|
15
|
+
e => e instanceof BibliographyEntry ? e : new BibliographyEntry(e));
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
find(id) {
|
|
19
|
+
return this.entries.find(e => e.id === id) ?? null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
get keys() {
|
|
23
|
+
return this.entries.map(e => e.id);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
toJSON() {
|
|
27
|
+
if (this.entries.length === 0) return { bibliography: [] };
|
|
28
|
+
return { bibliography: this.entries.map(e => e.toJSON()) };
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
toYAML() {
|
|
32
|
+
return yaml.dump(this.toJSON());
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
static fromYAML(yamlString) {
|
|
36
|
+
const parsed = yaml.load(yamlString);
|
|
37
|
+
return new BibliographyData(parsed ?? {});
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
static fromJSON(data) {
|
|
41
|
+
return new BibliographyData(data);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { GlossaristModel } from './base.js';
|
|
2
|
+
|
|
3
|
+
export class BibliographyEntry extends GlossaristModel {
|
|
4
|
+
constructor(data = {}) {
|
|
5
|
+
super();
|
|
6
|
+
this.id = data.id ?? null;
|
|
7
|
+
this.reference = data.reference ?? null;
|
|
8
|
+
this.title = data.title ?? null;
|
|
9
|
+
this.link = data.link ?? null;
|
|
10
|
+
this.type = data.type ?? null;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
toJSON() {
|
|
14
|
+
const obj = {};
|
|
15
|
+
if (this.id != null) obj.id = this.id;
|
|
16
|
+
if (this.reference != null) obj.reference = this.reference;
|
|
17
|
+
if (this.title != null) obj.title = this.title;
|
|
18
|
+
if (this.link != null) obj.link = this.link;
|
|
19
|
+
if (this.type != null) obj.type = this.type;
|
|
20
|
+
return obj;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
static fromJSON(data) {
|
|
24
|
+
return new BibliographyEntry(data);
|
|
25
|
+
}
|
|
26
|
+
}
|
package/src/models/concept.js
CHANGED
|
@@ -4,6 +4,9 @@ import { RelatedConcept } from './related-concept.js';
|
|
|
4
4
|
import { ConceptReference } from './concept-reference.js';
|
|
5
5
|
import { ConceptDate } from './concept-date.js';
|
|
6
6
|
import { ConceptSource } from './concept-source.js';
|
|
7
|
+
import { FigureReference } from './non-verbal-references.js';
|
|
8
|
+
import { TableReference } from './non-verbal-references.js';
|
|
9
|
+
import { FormulaReference } from './non-verbal-references.js';
|
|
7
10
|
|
|
8
11
|
export class Concept extends GlossaristModel {
|
|
9
12
|
constructor(data = {}) {
|
|
@@ -19,6 +22,12 @@ export class Concept extends GlossaristModel {
|
|
|
19
22
|
this.tags = Array.isArray(data.tags) ? [...data.tags] : [];
|
|
20
23
|
this.dates = _mapInstances(data.dates ?? [], ConceptDate);
|
|
21
24
|
this.sources = _mapInstances(data.sources ?? [], ConceptSource);
|
|
25
|
+
this._rawFigures = data.figures ?? [];
|
|
26
|
+
this._rawTables = data.tables ?? [];
|
|
27
|
+
this._rawFormulas = data.formulas ?? [];
|
|
28
|
+
this._figures = null;
|
|
29
|
+
this._tables = null;
|
|
30
|
+
this._formulas = null;
|
|
22
31
|
this.status = data.status ?? null;
|
|
23
32
|
this.schemaVersion = data.schemaVersion ?? data.schema_version ?? '3';
|
|
24
33
|
this.raw = data.raw ?? null;
|
|
@@ -107,6 +116,21 @@ export class Concept extends GlossaristModel {
|
|
|
107
116
|
return null;
|
|
108
117
|
}
|
|
109
118
|
|
|
119
|
+
get figures() {
|
|
120
|
+
return this._lazy('_figures', '_rawFigures',
|
|
121
|
+
r => FigureReference.fromJSON(r));
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
get tables() {
|
|
125
|
+
return this._lazy('_tables', '_rawTables',
|
|
126
|
+
r => TableReference.fromJSON(r));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
get formulas() {
|
|
130
|
+
return this._lazy('_formulas', '_rawFormulas',
|
|
131
|
+
r => FormulaReference.fromJSON(r));
|
|
132
|
+
}
|
|
133
|
+
|
|
110
134
|
toJSON() {
|
|
111
135
|
const obj = { id: this.id };
|
|
112
136
|
if (this.term != null) obj.term = this.term;
|
|
@@ -141,6 +165,9 @@ export class Concept extends GlossaristModel {
|
|
|
141
165
|
if (this.sources.length > 0) {
|
|
142
166
|
obj.sources = this.sources.map(s => s.toJSON());
|
|
143
167
|
}
|
|
168
|
+
this._serialize(obj, 'figures', '_figures', '_rawFigures');
|
|
169
|
+
this._serialize(obj, 'tables', '_tables', '_rawTables');
|
|
170
|
+
this._serialize(obj, 'formulas', '_formulas', '_rawFormulas');
|
|
144
171
|
if (this.status != null) obj.status = this.status;
|
|
145
172
|
obj.schema_version = this.schemaVersion;
|
|
146
173
|
return obj;
|
|
@@ -1,23 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { RegistrableModel } from './registrable.js';
|
|
2
2
|
import { ConceptSource } from './concept-source.js';
|
|
3
3
|
import { Pronunciation } from './pronunciation.js';
|
|
4
4
|
import { GrammarInfo } from './grammar-info.js';
|
|
5
5
|
import { RelatedConcept } from './related-concept.js';
|
|
6
6
|
import { DesignationRelationship, DESIGNATION_RELATIONSHIP_TYPES } from './designation-relationship.js';
|
|
7
7
|
|
|
8
|
-
export class Designation extends
|
|
9
|
-
static _registry = new Map();
|
|
10
|
-
|
|
11
|
-
static register(type, cls) {
|
|
12
|
-
Designation._registry.set(type, cls);
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
static fromData(data) {
|
|
16
|
-
if (data instanceof Designation) return data;
|
|
17
|
-
const Cls = Designation._registry.get(data?.type) ?? Designation;
|
|
18
|
-
return new Cls(data);
|
|
19
|
-
}
|
|
20
|
-
|
|
8
|
+
export class Designation extends RegistrableModel {
|
|
21
9
|
constructor(data = {}) {
|
|
22
10
|
super();
|
|
23
11
|
this.designation = data.designation ?? '';
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { SharedNonVerbalEntity } from './shared-non-verbal-entity.js';
|
|
2
|
+
import { NonVerbalEntity } from './non-verbal-entity.js';
|
|
3
|
+
|
|
4
|
+
export class FigureImage {
|
|
5
|
+
constructor(data = {}) {
|
|
6
|
+
this.src = data.src ?? null;
|
|
7
|
+
this.format = data.format ?? null;
|
|
8
|
+
this.role = data.role ?? null;
|
|
9
|
+
this.width = data.width ?? null;
|
|
10
|
+
this.height = data.height ?? null;
|
|
11
|
+
this.scale = data.scale ?? null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
toJSON() {
|
|
15
|
+
const obj = {};
|
|
16
|
+
if (this.src != null) obj.src = this.src;
|
|
17
|
+
if (this.format != null) obj.format = this.format;
|
|
18
|
+
if (this.role != null) obj.role = this.role;
|
|
19
|
+
if (this.width != null) obj.width = this.width;
|
|
20
|
+
if (this.height != null) obj.height = this.height;
|
|
21
|
+
if (this.scale != null) obj.scale = this.scale;
|
|
22
|
+
return obj;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
static fromJSON(data) { return new FigureImage(data); }
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export class Figure extends SharedNonVerbalEntity {
|
|
29
|
+
constructor(data = {}) {
|
|
30
|
+
super(data);
|
|
31
|
+
this._rawImages = data.images ?? [];
|
|
32
|
+
this._rawSubfigures = data.subfigures ?? [];
|
|
33
|
+
this._images = null;
|
|
34
|
+
this._subfigures = null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
get images() {
|
|
38
|
+
return this._lazy('_images', '_rawImages',
|
|
39
|
+
i => i instanceof FigureImage ? i : new FigureImage(i));
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
get subfigures() {
|
|
43
|
+
return this._lazy('_subfigures', '_rawSubfigures',
|
|
44
|
+
s => s instanceof Figure ? s : new Figure(s));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
findById(targetId) {
|
|
48
|
+
if (this.id === targetId) return this;
|
|
49
|
+
for (const sub of this.subfigures) {
|
|
50
|
+
const found = sub.findById(targetId);
|
|
51
|
+
if (found) return found;
|
|
52
|
+
}
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
allIds() {
|
|
57
|
+
const ids = this.id != null ? [this.id] : [];
|
|
58
|
+
return [...ids, ...this.subfigures.flatMap(s => s.allIds())];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
toJSON() {
|
|
62
|
+
const obj = super.toJSON();
|
|
63
|
+
this._serialize(obj, 'images', '_images', '_rawImages');
|
|
64
|
+
this._serialize(obj, 'subfigures', '_subfigures', '_rawSubfigures');
|
|
65
|
+
return obj;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
static fromJSON(data) { return new Figure(data); }
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
NonVerbalEntity.register('figure', Figure);
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { SharedNonVerbalEntity } from './shared-non-verbal-entity.js';
|
|
2
|
+
import { NonVerbalEntity } from './non-verbal-entity.js';
|
|
3
|
+
|
|
4
|
+
export class Formula extends SharedNonVerbalEntity {
|
|
5
|
+
constructor(data = {}) {
|
|
6
|
+
super(data);
|
|
7
|
+
this.expression = data.expression ?? null;
|
|
8
|
+
this.notation = data.notation ?? null;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
toJSON() {
|
|
12
|
+
const obj = super.toJSON();
|
|
13
|
+
if (this.expression != null) obj.expression = this.expression;
|
|
14
|
+
if (this.notation != null) obj.notation = this.notation;
|
|
15
|
+
return obj;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
static fromJSON(data) { return new Formula(data); }
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
NonVerbalEntity.register('formula', Formula);
|
package/src/models/index.js
CHANGED
|
@@ -12,9 +12,20 @@ export { DesignationRelationship, DESIGNATION_RELATIONSHIP_TYPES } from './desig
|
|
|
12
12
|
export { ConceptReference } from './concept-reference.js';
|
|
13
13
|
export { ConceptDate, DATE_TYPES } from './concept-date.js';
|
|
14
14
|
export { DetailedDefinition } from './detailed-definition.js';
|
|
15
|
-
export { NonVerbRep } from './non-verb-rep.js';
|
|
16
15
|
export { Pronunciation } from './pronunciation.js';
|
|
17
16
|
export { GrammarInfo, GRAMMAR_GENDERS, GRAMMAR_NUMBERS, GRAMMAR_PARTS_OF_SPEECH } from './grammar-info.js';
|
|
18
17
|
export { Locality } from './locality.js';
|
|
19
18
|
export { GcrMetadata } from './gcr-metadata.js';
|
|
20
19
|
export { GcrStatistics } from './gcr-statistics.js';
|
|
20
|
+
export { RegistrableModel } from './registrable.js';
|
|
21
|
+
export { NonVerbalEntity } from './non-verbal-entity.js';
|
|
22
|
+
export { SharedNonVerbalEntity } from './shared-non-verbal-entity.js';
|
|
23
|
+
export { Figure, FigureImage } from './figure.js';
|
|
24
|
+
export { Table } from './table.js';
|
|
25
|
+
export { Formula } from './formula.js';
|
|
26
|
+
export { NonVerbRep, NON_VERBAL_TYPES } from './non-verb-rep.js';
|
|
27
|
+
export { NonVerbalReference } from './non-verbal-reference.js';
|
|
28
|
+
export { FigureReference, TableReference, FormulaReference } from './non-verbal-references.js';
|
|
29
|
+
export { BibliographyEntry } from './bibliography-entry.js';
|
|
30
|
+
export { BibliographyData } from './bibliography-data.js';
|
|
31
|
+
export { fetchLocalizedString, localizedStringIsEmpty, localizedStringIsPresent } from './localized-string.js';
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
const FALLBACK_LANG = 'eng';
|
|
2
|
+
|
|
3
|
+
export function fetchLocalizedString(hash, lang, fallback = FALLBACK_LANG) {
|
|
4
|
+
if (hash == null || typeof hash !== 'object') return null;
|
|
5
|
+
const direct = hash[lang] ?? hash[String(lang)];
|
|
6
|
+
if (direct != null) return direct;
|
|
7
|
+
if (fallback != null && fallback !== lang) {
|
|
8
|
+
return hash[fallback] ?? hash[String(fallback)] ?? null;
|
|
9
|
+
}
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function localizedStringIsEmpty(hash) {
|
|
14
|
+
return hash == null || (typeof hash === 'object' && Object.keys(hash).length === 0);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function localizedStringIsPresent(hash) {
|
|
18
|
+
return !localizedStringIsEmpty(hash);
|
|
19
|
+
}
|
|
@@ -1,23 +1,25 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { NonVerbalEntity } from './non-verbal-entity.js';
|
|
2
|
+
import { FigureImage } from './figure.js';
|
|
3
3
|
|
|
4
|
-
export
|
|
4
|
+
export const NON_VERBAL_TYPES = Object.freeze(['image', 'table', 'formula']);
|
|
5
|
+
|
|
6
|
+
export class NonVerbRep extends NonVerbalEntity {
|
|
5
7
|
constructor(data = {}) {
|
|
6
|
-
super();
|
|
8
|
+
super(data);
|
|
7
9
|
this.type = data.type ?? null;
|
|
8
|
-
this.
|
|
9
|
-
this.
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
this._rawImages = data.images ?? [];
|
|
11
|
+
this._images = null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
get images() {
|
|
15
|
+
return this._lazy('_images', '_rawImages',
|
|
16
|
+
i => i instanceof FigureImage ? i : new FigureImage(i));
|
|
13
17
|
}
|
|
14
18
|
|
|
15
19
|
toJSON() {
|
|
16
|
-
const obj =
|
|
20
|
+
const obj = super.toJSON();
|
|
17
21
|
if (this.type != null) obj.type = this.type;
|
|
18
|
-
|
|
19
|
-
if (this.text != null) obj.text = this.text;
|
|
20
|
-
if (this.sources.length > 0) obj.sources = this.sources.map(s => s.toJSON());
|
|
22
|
+
this._serialize(obj, 'images', '_images', '_rawImages');
|
|
21
23
|
return obj;
|
|
22
24
|
}
|
|
23
25
|
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { RegistrableModel } from './registrable.js';
|
|
2
|
+
import { ConceptSource } from './concept-source.js';
|
|
3
|
+
|
|
4
|
+
export class NonVerbalEntity extends RegistrableModel {
|
|
5
|
+
constructor(data = {}) {
|
|
6
|
+
super();
|
|
7
|
+
this.caption = data.caption ?? null;
|
|
8
|
+
this.description = data.description ?? null;
|
|
9
|
+
this.alt = data.alt ?? null;
|
|
10
|
+
this._rawSources = data.sources ?? [];
|
|
11
|
+
this._sources = null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
get sources() {
|
|
15
|
+
return this._lazy('_sources', '_rawSources',
|
|
16
|
+
s => s instanceof ConceptSource ? s : new ConceptSource(s));
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
findById(_targetId) {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
allIds() {
|
|
24
|
+
return [];
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
toJSON() {
|
|
28
|
+
const obj = {};
|
|
29
|
+
if (this.caption != null) obj.caption = this.caption;
|
|
30
|
+
if (this.description != null) obj.description = this.description;
|
|
31
|
+
if (this.alt != null) obj.alt = this.alt;
|
|
32
|
+
this._serialize(obj, 'sources', '_sources', '_rawSources');
|
|
33
|
+
return obj;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static fromJSON(data) {
|
|
37
|
+
return NonVerbalEntity.fromData(data);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { RegistrableModel } from './registrable.js';
|
|
2
|
+
|
|
3
|
+
export class NonVerbalReference extends RegistrableModel {
|
|
4
|
+
constructor(data = {}) {
|
|
5
|
+
super();
|
|
6
|
+
this.entityId = data.entityId ?? data.entity_id ?? data.ref ?? data.id ?? null;
|
|
7
|
+
this.display = data.display ?? null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
get dedupKey() {
|
|
11
|
+
return [this.constructor.name, this.entityId];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
toJSON() {
|
|
15
|
+
if (this.display != null) {
|
|
16
|
+
return { ref: this.entityId, display: this.display };
|
|
17
|
+
}
|
|
18
|
+
return this.entityId;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
static fromJSON(data) {
|
|
22
|
+
if (data instanceof NonVerbalReference) return data;
|
|
23
|
+
if (typeof data === 'string') {
|
|
24
|
+
return new NonVerbalReference({ entityId: data });
|
|
25
|
+
}
|
|
26
|
+
const entityId = data.entityId ?? data.entity_id ?? data.ref ?? data.id ?? null;
|
|
27
|
+
const display = data.display ?? null;
|
|
28
|
+
const type = data.type;
|
|
29
|
+
if (type && this._registry().has(type)) {
|
|
30
|
+
const Cls = this._registry().get(type);
|
|
31
|
+
return new Cls({ entityId, display });
|
|
32
|
+
}
|
|
33
|
+
return new NonVerbalReference({ entityId, display });
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { NonVerbalReference } from './non-verbal-reference.js';
|
|
2
|
+
|
|
3
|
+
export class FigureReference extends NonVerbalReference {
|
|
4
|
+
static fromJSON(data) { return NonVerbalReference.fromJSON(data); }
|
|
5
|
+
}
|
|
6
|
+
NonVerbalReference.register('figure', FigureReference);
|
|
7
|
+
|
|
8
|
+
export class TableReference extends NonVerbalReference {
|
|
9
|
+
static fromJSON(data) { return NonVerbalReference.fromJSON(data); }
|
|
10
|
+
}
|
|
11
|
+
NonVerbalReference.register('table', TableReference);
|
|
12
|
+
|
|
13
|
+
export class FormulaReference extends NonVerbalReference {
|
|
14
|
+
static fromJSON(data) { return NonVerbalReference.fromJSON(data); }
|
|
15
|
+
}
|
|
16
|
+
NonVerbalReference.register('formula', FormulaReference);
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { GlossaristModel } from './base.js';
|
|
2
|
+
|
|
3
|
+
const _registries = new WeakMap();
|
|
4
|
+
|
|
5
|
+
export class RegistrableModel extends GlossaristModel {
|
|
6
|
+
static _registry() {
|
|
7
|
+
let map = _registries.get(this);
|
|
8
|
+
if (!map) {
|
|
9
|
+
map = new Map();
|
|
10
|
+
_registries.set(this, map);
|
|
11
|
+
}
|
|
12
|
+
return map;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
static register(type, cls) {
|
|
16
|
+
this._registry().set(type, cls);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
static fromData(data) {
|
|
20
|
+
if (data instanceof this) return data;
|
|
21
|
+
const type = data?.type;
|
|
22
|
+
const Cls = type ? this._registry().get(type) ?? this : this;
|
|
23
|
+
return new Cls(data);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { NonVerbalEntity } from './non-verbal-entity.js';
|
|
2
|
+
|
|
3
|
+
export class SharedNonVerbalEntity extends NonVerbalEntity {
|
|
4
|
+
constructor(data = {}) {
|
|
5
|
+
super(data);
|
|
6
|
+
this.id = data.id ?? null;
|
|
7
|
+
this.identifier = data.identifier ?? null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
findById(targetId) {
|
|
11
|
+
return this.id === targetId ? this : null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
allIds() {
|
|
15
|
+
return this.id != null ? [this.id] : [];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
toJSON() {
|
|
19
|
+
const obj = super.toJSON();
|
|
20
|
+
if (this.id != null) obj.id = this.id;
|
|
21
|
+
if (this.identifier != null) obj.identifier = this.identifier;
|
|
22
|
+
return obj;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
static fromJSON(data) {
|
|
26
|
+
return SharedNonVerbalEntity.fromData(data);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { SharedNonVerbalEntity } from './shared-non-verbal-entity.js';
|
|
2
|
+
import { NonVerbalEntity } from './non-verbal-entity.js';
|
|
3
|
+
|
|
4
|
+
export class Table extends SharedNonVerbalEntity {
|
|
5
|
+
constructor(data = {}) {
|
|
6
|
+
super(data);
|
|
7
|
+
this.content = data.content ?? null;
|
|
8
|
+
this.format = data.format ?? null;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
toJSON() {
|
|
12
|
+
const obj = super.toJSON();
|
|
13
|
+
if (this.content != null) obj.content = this.content;
|
|
14
|
+
if (this.format != null) obj.format = this.format;
|
|
15
|
+
return obj;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
static fromJSON(data) { return new Table(data); }
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
NonVerbalEntity.register('table', Table);
|
package/src/reference-mention.js
CHANGED
|
@@ -27,6 +27,12 @@
|
|
|
27
27
|
|
|
28
28
|
const NUMERIC_RE = /^\d+(?:[.-]\d+)+$/;
|
|
29
29
|
|
|
30
|
+
const NVR_PREFIXES = Object.freeze([
|
|
31
|
+
{ prefix: 'fig:', kind: 'fig-ref' },
|
|
32
|
+
{ prefix: 'table:', kind: 'table-ref' },
|
|
33
|
+
{ prefix: 'formula:', kind: 'formula-ref' },
|
|
34
|
+
]);
|
|
35
|
+
|
|
30
36
|
/**
|
|
31
37
|
* Parse the body of a {{...}} mention (without the braces).
|
|
32
38
|
*
|
|
@@ -63,7 +69,17 @@ export function parseMention(raw) {
|
|
|
63
69
|
};
|
|
64
70
|
}
|
|
65
71
|
|
|
66
|
-
// 3.
|
|
72
|
+
// 3. NVR prefixes (fig:/table:/formula:) — config-driven dispatch.
|
|
73
|
+
for (const { prefix, kind } of NVR_PREFIXES) {
|
|
74
|
+
const escPrefix = prefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
75
|
+
const match = body.match(new RegExp(`^${escPrefix}([^,}]+)(?:,(.*))?$`));
|
|
76
|
+
if (match) {
|
|
77
|
+
const label = match[2] !== undefined ? unquoteLabel(match[2].trim()) : null;
|
|
78
|
+
return { kind, key: match[1].trim(), label, raw: body };
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// 4. Comma-separated form: {{id, render}}.
|
|
67
83
|
// ID always comes first, render text always comes last.
|
|
68
84
|
const commaIdx = body.indexOf(',');
|
|
69
85
|
if (commaIdx !== -1) {
|
|
@@ -76,12 +92,12 @@ export function parseMention(raw) {
|
|
|
76
92
|
return { kind: 'designation', id, label, raw: body };
|
|
77
93
|
}
|
|
78
94
|
|
|
79
|
-
//
|
|
95
|
+
// 5. Bare numeric id.
|
|
80
96
|
if (NUMERIC_RE.test(body)) {
|
|
81
97
|
return { kind: 'numeric', id: body, label: null, raw: body };
|
|
82
98
|
}
|
|
83
99
|
|
|
84
|
-
//
|
|
100
|
+
// 6. Anything else is unresolved at the parse layer.
|
|
85
101
|
return { kind: 'unresolved', raw: body };
|
|
86
102
|
}
|
|
87
103
|
|
|
@@ -13,6 +13,21 @@ export class Reference {
|
|
|
13
13
|
this.resolution = extras.resolution ?? null;
|
|
14
14
|
this.lookupKey = extras.lookupKey ?? null;
|
|
15
15
|
}
|
|
16
|
+
|
|
17
|
+
get dedupKey() {
|
|
18
|
+
if (this.type === 'bibliography') {
|
|
19
|
+
return ['bibliography',
|
|
20
|
+
this.sourceId ?? this.citation?.ref?.id ?? this.target];
|
|
21
|
+
}
|
|
22
|
+
if (this.type === 'figure' || this.type === 'table' || this.type === 'formula') {
|
|
23
|
+
return [this.type, this.lookupKey?.entityId ?? this.target];
|
|
24
|
+
}
|
|
25
|
+
if (this.type === 'concept') {
|
|
26
|
+
return ['concept',
|
|
27
|
+
this.lookupKey?.id ?? this.lookupKey?.designation ?? this.target];
|
|
28
|
+
}
|
|
29
|
+
return [this.type, this.target];
|
|
30
|
+
}
|
|
16
31
|
}
|
|
17
32
|
|
|
18
33
|
function refTarget(rc) {
|
|
@@ -33,6 +48,18 @@ export function resolveBibliographyRecord(citationRef, registry) {
|
|
|
33
48
|
return bioColl.byId(citationRef.id);
|
|
34
49
|
}
|
|
35
50
|
|
|
51
|
+
export function findNonVerbalEntity(ref, registry) {
|
|
52
|
+
const { entityType, entityId } = ref.lookupKey ?? {};
|
|
53
|
+
if (!entityType || !entityId) return null;
|
|
54
|
+
const collection = registry[`nvr:${entityType}`];
|
|
55
|
+
if (!collection) return null;
|
|
56
|
+
for (const entity of collection) {
|
|
57
|
+
const found = entity.findById(entityId);
|
|
58
|
+
if (found) return found;
|
|
59
|
+
}
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
|
|
36
63
|
export class ReferenceResolver {
|
|
37
64
|
extractReferences(concept) {
|
|
38
65
|
const refs = [];
|
|
@@ -46,6 +73,22 @@ export class ReferenceResolver {
|
|
|
46
73
|
}
|
|
47
74
|
}
|
|
48
75
|
|
|
76
|
+
for (const ref of concept.figures) {
|
|
77
|
+
refs.push(new Reference('figure', ref.display ?? ref.entityId, 'structural', 'figures', {
|
|
78
|
+
lookupKey: { entityType: 'figure', entityId: ref.entityId },
|
|
79
|
+
}));
|
|
80
|
+
}
|
|
81
|
+
for (const ref of concept.tables) {
|
|
82
|
+
refs.push(new Reference('table', ref.display ?? ref.entityId, 'structural', 'tables', {
|
|
83
|
+
lookupKey: { entityType: 'table', entityId: ref.entityId },
|
|
84
|
+
}));
|
|
85
|
+
}
|
|
86
|
+
for (const ref of concept.formulas) {
|
|
87
|
+
refs.push(new Reference('formula', ref.display ?? ref.entityId, 'structural', 'formulas', {
|
|
88
|
+
lookupKey: { entityType: 'formula', entityId: ref.entityId },
|
|
89
|
+
}));
|
|
90
|
+
}
|
|
91
|
+
|
|
49
92
|
for (const lang of concept.languages) {
|
|
50
93
|
const lc = concept.localization(lang);
|
|
51
94
|
if (!lc) continue;
|
|
@@ -66,7 +109,17 @@ export class ReferenceResolver {
|
|
|
66
109
|
}
|
|
67
110
|
}
|
|
68
111
|
|
|
69
|
-
return refs;
|
|
112
|
+
return this._dedup(refs);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
_dedup(refs) {
|
|
116
|
+
const seen = new Set();
|
|
117
|
+
return refs.filter(ref => {
|
|
118
|
+
const key = JSON.stringify(ref.dedupKey);
|
|
119
|
+
if (seen.has(key)) return false;
|
|
120
|
+
seen.add(key);
|
|
121
|
+
return true;
|
|
122
|
+
});
|
|
70
123
|
}
|
|
71
124
|
|
|
72
125
|
_collectTexts(lc, lang) {
|
|
@@ -114,6 +167,21 @@ export class ReferenceResolver {
|
|
|
114
167
|
resolution: null,
|
|
115
168
|
}));
|
|
116
169
|
break;
|
|
170
|
+
case 'fig-ref':
|
|
171
|
+
refs.push(new Reference('figure', parsed.label ?? parsed.key, 'embedded', source, {
|
|
172
|
+
lookupKey: { entityType: 'figure', entityId: parsed.key },
|
|
173
|
+
}));
|
|
174
|
+
break;
|
|
175
|
+
case 'table-ref':
|
|
176
|
+
refs.push(new Reference('table', parsed.label ?? parsed.key, 'embedded', source, {
|
|
177
|
+
lookupKey: { entityType: 'table', entityId: parsed.key },
|
|
178
|
+
}));
|
|
179
|
+
break;
|
|
180
|
+
case 'formula-ref':
|
|
181
|
+
refs.push(new Reference('formula', parsed.label ?? parsed.key, 'embedded', source, {
|
|
182
|
+
lookupKey: { entityType: 'formula', entityId: parsed.key },
|
|
183
|
+
}));
|
|
184
|
+
break;
|
|
117
185
|
case 'numeric':
|
|
118
186
|
refs.push(new Reference('concept', parsed.label ?? parsed.id, 'embedded', source, {
|
|
119
187
|
lookupKey: { id: parsed.id },
|
|
@@ -168,6 +236,9 @@ export class ReferenceResolver {
|
|
|
168
236
|
switch (ref.type) {
|
|
169
237
|
case 'concept': return this._resolveConcept(ref, registry);
|
|
170
238
|
case 'bibliography': return this._resolveBibliography(ref, registry);
|
|
239
|
+
case 'figure':
|
|
240
|
+
case 'table':
|
|
241
|
+
case 'formula': return this._resolveNonVerbal(ref, registry);
|
|
171
242
|
case 'dataset': return this._resolveDataset(ref, registry);
|
|
172
243
|
case 'typed-ref': return this._resolveTypedRef(ref, registry);
|
|
173
244
|
case 'standard': return this._resolveStandard(ref, registry);
|
|
@@ -175,6 +246,10 @@ export class ReferenceResolver {
|
|
|
175
246
|
}
|
|
176
247
|
}
|
|
177
248
|
|
|
249
|
+
_resolveNonVerbal(ref, registry) {
|
|
250
|
+
return findNonVerbalEntity(ref, registry);
|
|
251
|
+
}
|
|
252
|
+
|
|
178
253
|
_resolveConcept(ref, registry) {
|
|
179
254
|
if (ref.lookupKey?.id) {
|
|
180
255
|
const dataset = ref.lookupKey.dataset;
|
|
@@ -218,7 +293,8 @@ export class ReferenceResolver {
|
|
|
218
293
|
resolveAll(concept, registry) {
|
|
219
294
|
const resolved = new Map();
|
|
220
295
|
for (const ref of this.extractReferences(concept)) {
|
|
221
|
-
if (ref.type === 'concept' || ref.type === 'bibliography'
|
|
296
|
+
if (ref.type === 'concept' || ref.type === 'bibliography'
|
|
297
|
+
|| ref.type === 'figure' || ref.type === 'table' || ref.type === 'formula') {
|
|
222
298
|
const target = this.resolveReference(ref, registry);
|
|
223
299
|
if (target != null) {
|
|
224
300
|
const key = ref.target ?? ref.uri ?? ref.sourceId;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { resolveBibliographyRecord } from './reference-resolver.js';
|
|
1
|
+
import { resolveBibliographyRecord, findNonVerbalEntity } from './reference-resolver.js';
|
|
2
2
|
|
|
3
3
|
export class ReferenceClassifier {
|
|
4
4
|
constructor(registry = {}, sourceDatasetId = null, options = {}) {
|
|
@@ -14,6 +14,9 @@ export class ReferenceClassifier {
|
|
|
14
14
|
case 'concept': return this._classifyConcept(ref);
|
|
15
15
|
case 'dataset': return this._classifyDataset(ref);
|
|
16
16
|
case 'bibliography': return this._classifyBibliography(ref);
|
|
17
|
+
case 'figure':
|
|
18
|
+
case 'table':
|
|
19
|
+
case 'formula': return this._classifyNonVerbal(ref);
|
|
17
20
|
case 'typed-ref': return this._classifyTypedRef(ref);
|
|
18
21
|
case 'standard': return 'legacy-standard';
|
|
19
22
|
default: return 'unknown';
|
|
@@ -63,4 +66,10 @@ export class ReferenceClassifier {
|
|
|
63
66
|
_classifyTypedRef(_ref) {
|
|
64
67
|
return 'typed-ref';
|
|
65
68
|
}
|
|
69
|
+
|
|
70
|
+
_classifyNonVerbal(ref) {
|
|
71
|
+
return findNonVerbalEntity(ref, this.registry)
|
|
72
|
+
? 'internal-citation'
|
|
73
|
+
: 'external-citation';
|
|
74
|
+
}
|
|
66
75
|
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { DATASET_ASSETS } from '../dataset-asset.js';
|
|
2
|
+
|
|
3
|
+
export class AssetIndex {
|
|
4
|
+
constructor() {
|
|
5
|
+
this._paths = new Set();
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
get paths() {
|
|
9
|
+
return [...this._paths].sort();
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
get size() {
|
|
13
|
+
return this._paths.size;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
register(path) {
|
|
17
|
+
if (path == null) return;
|
|
18
|
+
this._paths.add(this._normalize(path));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
has(path) {
|
|
22
|
+
if (path == null) return false;
|
|
23
|
+
return this._paths.has(this._normalize(path));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
[Symbol.iterator]() {
|
|
27
|
+
return this._paths[Symbol.iterator]();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
_normalize(path) {
|
|
31
|
+
return String(path).replace(/^\//, '');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
static async fromGcrPackage(pkg) {
|
|
35
|
+
const index = new AssetIndex();
|
|
36
|
+
const names = await pkg.imageFileNames();
|
|
37
|
+
for (const name of names) {
|
|
38
|
+
index.register(name);
|
|
39
|
+
}
|
|
40
|
+
return index;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
static fromDirectory(datasetPath, fs) {
|
|
44
|
+
const index = new AssetIndex();
|
|
45
|
+
const imagesAsset = DATASET_ASSETS.find(
|
|
46
|
+
a => a.type === 'directory' && a.path === 'images');
|
|
47
|
+
if (!imagesAsset) return index;
|
|
48
|
+
|
|
49
|
+
const imagesDir = `${datasetPath}/${imagesAsset.path}`;
|
|
50
|
+
_walkDir(fs, imagesDir, imagesAsset.path, index);
|
|
51
|
+
return index;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function _walkDir(fs, dirPath, relativePrefix, index) {
|
|
56
|
+
if (!fs.existsSync(dirPath)) return;
|
|
57
|
+
for (const entry of fs.readdirSync(dirPath, { withFileTypes: true })) {
|
|
58
|
+
const fullPath = `${dirPath}/${entry.name}`;
|
|
59
|
+
const relPath = `${relativePrefix}/${entry.name}`;
|
|
60
|
+
if (entry.isDirectory()) {
|
|
61
|
+
_walkDir(fs, fullPath, relPath, index);
|
|
62
|
+
} else {
|
|
63
|
+
index.register(relPath);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
package/src/validators/index.js
CHANGED
|
@@ -14,6 +14,8 @@ export {
|
|
|
14
14
|
UuidFormatRule,
|
|
15
15
|
SourceUrnFormatRule,
|
|
16
16
|
CiteRefIntegrityRule,
|
|
17
|
+
NonVerbalRefIntegrityRule,
|
|
18
|
+
OrphanedImagesRule,
|
|
17
19
|
} from './v3-rules.js';
|
|
18
20
|
|
|
19
21
|
import { ConceptValidator, LanguageCodeRule, DesignationTypeRule, EntryStatusRule } from './concept-validator.js';
|
|
@@ -29,6 +31,7 @@ import {
|
|
|
29
31
|
UuidFormatRule,
|
|
30
32
|
SourceUrnFormatRule,
|
|
31
33
|
CiteRefIntegrityRule,
|
|
34
|
+
NonVerbalRefIntegrityRule,
|
|
32
35
|
} from './v3-rules.js';
|
|
33
36
|
|
|
34
37
|
const _default = new ConceptValidator()
|
|
@@ -43,7 +46,8 @@ const _default = new ConceptValidator()
|
|
|
43
46
|
.addRule(new UuidFormatRule())
|
|
44
47
|
.addRule(new SourceUrnFormatRule())
|
|
45
48
|
.addRule(new RelationshipTypeRule())
|
|
46
|
-
.addRule(new CiteRefIntegrityRule())
|
|
49
|
+
.addRule(new CiteRefIntegrityRule())
|
|
50
|
+
.addRule(new NonVerbalRefIntegrityRule());
|
|
47
51
|
|
|
48
52
|
export function validateConcept(concept) {
|
|
49
53
|
return _default.validate(concept);
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { ValidationRule } from './validation-rule.js';
|
|
2
|
+
import { parseMention } from '../reference-mention.js';
|
|
3
|
+
import { GraphicalSymbol } from '../models/designation.js';
|
|
2
4
|
|
|
3
5
|
const _eachLocalization = (concept, fn) => {
|
|
4
6
|
for (const lang of concept.languages) {
|
|
@@ -267,3 +269,148 @@ export class CiteRefIntegrityRule extends ValidationRule {
|
|
|
267
269
|
}
|
|
268
270
|
}
|
|
269
271
|
}
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
// ── NonVerbalRefIntegrityRule ────────────────────────────────────────
|
|
275
|
+
// Uses parseMention for classification (no regex duplication).
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
const NVR_ARRAYS = Object.freeze([
|
|
279
|
+
{ name: 'figures', entityType: 'figure' },
|
|
280
|
+
{ name: 'tables', entityType: 'table' },
|
|
281
|
+
{ name: 'formulas', entityType: 'formula' },
|
|
282
|
+
]);
|
|
283
|
+
|
|
284
|
+
function _findNvrMentions(concept) {
|
|
285
|
+
const mentions = [];
|
|
286
|
+
const walkText = (text, source) => {
|
|
287
|
+
if (typeof text !== 'string' || text.length === 0) return;
|
|
288
|
+
const re = /\{\{([^{}]*?)\}\}/g;
|
|
289
|
+
let m;
|
|
290
|
+
while ((m = re.exec(text)) !== null) {
|
|
291
|
+
const parsed = parseMention(m[1]);
|
|
292
|
+
if (parsed.kind === 'fig-ref' ||
|
|
293
|
+
parsed.kind === 'table-ref' ||
|
|
294
|
+
parsed.kind === 'formula-ref') {
|
|
295
|
+
mentions.push({ key: parsed.key, source });
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
for (const lang of concept.languages) {
|
|
301
|
+
const lc = concept.localization(lang);
|
|
302
|
+
if (!lc) continue;
|
|
303
|
+
for (let i = 0; i < lc.definitions.length; i++) {
|
|
304
|
+
walkText(lc.definitions[i]?.content,
|
|
305
|
+
`localizations.${lang}.definitions[${i}].content`);
|
|
306
|
+
}
|
|
307
|
+
for (let i = 0; i < lc.notes.length; i++) {
|
|
308
|
+
walkText(lc.notes[i]?.content,
|
|
309
|
+
`localizations.${lang}.notes[${i}].content`);
|
|
310
|
+
}
|
|
311
|
+
for (let i = 0; i < lc.examples.length; i++) {
|
|
312
|
+
walkText(lc.examples[i]?.content,
|
|
313
|
+
`localizations.${lang}.examples[${i}].content`);
|
|
314
|
+
}
|
|
315
|
+
for (let i = 0; i < lc.annotations.length; i++) {
|
|
316
|
+
walkText(lc.annotations[i]?.content,
|
|
317
|
+
`localizations.${lang}.annotations[${i}].content`);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
return mentions;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
export class NonVerbalRefIntegrityRule extends ValidationRule {
|
|
324
|
+
constructor() {
|
|
325
|
+
super('nvr-integrity', 'warning');
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
validate(concept, path, result) {
|
|
329
|
+
for (const { name } of NVR_ARRAYS) {
|
|
330
|
+
const counts = new Map();
|
|
331
|
+
for (const ref of concept[name]) {
|
|
332
|
+
if (ref.entityId == null) continue;
|
|
333
|
+
counts.set(ref.entityId, (counts.get(ref.entityId) ?? 0) + 1);
|
|
334
|
+
}
|
|
335
|
+
for (const [id, count] of counts) {
|
|
336
|
+
if (count > 1) {
|
|
337
|
+
this.addIssue(result, `${path}${name}`,
|
|
338
|
+
`duplicate ${name} reference id "${id}" appears ${count} times`);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
const mentions = _findNvrMentions(concept);
|
|
344
|
+
if (mentions.length === 0) return;
|
|
345
|
+
|
|
346
|
+
const knownIds = new Set();
|
|
347
|
+
for (const { name } of NVR_ARRAYS) {
|
|
348
|
+
for (const ref of concept[name]) {
|
|
349
|
+
if (ref.entityId != null) knownIds.add(ref.entityId);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
for (const { key, source } of mentions) {
|
|
354
|
+
if (!knownIds.has(key)) {
|
|
355
|
+
this.addIssue(result, source,
|
|
356
|
+
`inline NVR mention "${key}" does not resolve to any figures/tables/formulas entry`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// ── OrphanedImagesRule ───────────────────────────────────────────────
|
|
363
|
+
// Collection-scope rule: needs AssetIndex + all concepts. Called
|
|
364
|
+
// directly by GcrValidator (not in concept validator chain).
|
|
365
|
+
|
|
366
|
+
export class OrphanedImagesRule {
|
|
367
|
+
constructor() {
|
|
368
|
+
this.name = 'orphaned-images';
|
|
369
|
+
this.severity = 'warning';
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
check(context) {
|
|
373
|
+
const { assetIndex, concepts, resolver } = context;
|
|
374
|
+
if (!assetIndex || assetIndex.size === 0) return [];
|
|
375
|
+
|
|
376
|
+
const referenced = new Set();
|
|
377
|
+
|
|
378
|
+
for (const concept of concepts) {
|
|
379
|
+
if (resolver) {
|
|
380
|
+
for (const ref of resolver.extractReferences(concept)) {
|
|
381
|
+
if (ref.target && ref.target.includes('images/')) {
|
|
382
|
+
referenced.add(ref.target.replace(/^\//, ''));
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
for (const lang of concept.languages) {
|
|
388
|
+
const lc = concept.localization(lang);
|
|
389
|
+
if (!lc) continue;
|
|
390
|
+
|
|
391
|
+
for (const nvr of lc.nonVerbalRep) {
|
|
392
|
+
for (const img of nvr.images) {
|
|
393
|
+
if (img.src) referenced.add(img.src.replace(/^\//, ''));
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
for (const term of lc.terms) {
|
|
397
|
+
if (term instanceof GraphicalSymbol && term.image) {
|
|
398
|
+
referenced.add(term.image.replace(/^\//, ''));
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
const issues = [];
|
|
405
|
+
for (const imgPath of assetIndex.paths) {
|
|
406
|
+
if (!referenced.has(imgPath)) {
|
|
407
|
+
issues.push({
|
|
408
|
+
path: imgPath,
|
|
409
|
+
severity: 'warning',
|
|
410
|
+
message: `orphaned image: ${imgPath} (not referenced by any concept)`,
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
return issues;
|
|
415
|
+
}
|
|
416
|
+
}
|