glossarist 0.3.8 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/concept-collection.js +3 -11
- package/src/concept-parser.js +7 -1
- package/src/dataset-asset.js +3 -0
- package/src/entity-directory.js +43 -0
- package/src/gcr-reader.js +53 -5
- package/src/gcr-writer.js +3 -1
- package/src/index.d.ts +40 -7
- package/src/index.js +19 -2
- package/src/managed-concept-collection.js +11 -2
- package/src/models/bibliography-data.js +43 -0
- package/src/models/bibliography-entry.js +26 -0
- package/src/models/concept.js +44 -0
- package/src/models/designation.js +2 -14
- package/src/models/detailed-definition.js +30 -6
- package/src/models/figure.js +71 -0
- package/src/models/formula.js +21 -0
- package/src/models/index.d.ts +5 -0
- package/src/models/index.js +12 -1
- package/src/models/localized-concept.js +21 -0
- package/src/models/localized-string.js +19 -0
- package/src/models/non-verb-rep.js +15 -13
- package/src/models/non-verbal-entity.js +39 -0
- package/src/models/non-verbal-reference.js +35 -0
- package/src/models/non-verbal-references.js +16 -0
- package/src/models/registrable.js +25 -0
- package/src/models/shared-non-verbal-entity.js +28 -0
- package/src/models/table.js +21 -0
- package/src/reference-mention.js +19 -3
- package/src/reference-resolver.js +77 -31
- package/src/render-classification.js +10 -1
- package/src/validators/asset-index.js +66 -0
- package/src/validators/index.js +5 -1
- package/src/validators/v3-rules.js +132 -24
package/src/models/index.d.ts
CHANGED
|
@@ -65,6 +65,7 @@ export class Concept extends GlossaristModel {
|
|
|
65
65
|
definition(lang: string): string | null;
|
|
66
66
|
setLocalization(lang: string, lc: LocalizedConcept | Record<string, unknown>): this;
|
|
67
67
|
hasLocalization(lang: string): boolean;
|
|
68
|
+
walkTexts(): Iterable<{ text: string; source: string }>;
|
|
68
69
|
static fromJSON(data: Record<string, unknown>): Concept;
|
|
69
70
|
}
|
|
70
71
|
|
|
@@ -96,6 +97,7 @@ export class LocalizedConcept extends GlossaristModel {
|
|
|
96
97
|
readonly related: RelatedConcept[];
|
|
97
98
|
readonly primaryDesignation: string | null;
|
|
98
99
|
readonly primaryDefinition: string | null;
|
|
100
|
+
walkTexts(basePath: string): Iterable<{ text: string; source: string }>;
|
|
99
101
|
static fromJSON(data: Record<string, unknown>): LocalizedConcept;
|
|
100
102
|
}
|
|
101
103
|
|
|
@@ -243,6 +245,9 @@ export class ConceptDate extends GlossaristModel {
|
|
|
243
245
|
export class DetailedDefinition extends GlossaristModel {
|
|
244
246
|
readonly content: string;
|
|
245
247
|
readonly sources: ConceptSource[];
|
|
248
|
+
readonly examples: DetailedDefinition[];
|
|
249
|
+
walkTexts(path: string): Iterable<{ text: string; source: string }>;
|
|
250
|
+
static fromJSON(data: Record<string, unknown>): DetailedDefinition;
|
|
246
251
|
}
|
|
247
252
|
|
|
248
253
|
export class NonVerbRep extends GlossaristModel {
|
package/src/models/index.js
CHANGED
|
@@ -12,9 +12,20 @@ export { DesignationRelationship, DESIGNATION_RELATIONSHIP_TYPES } from './desig
|
|
|
12
12
|
export { ConceptReference } from './concept-reference.js';
|
|
13
13
|
export { ConceptDate, DATE_TYPES } from './concept-date.js';
|
|
14
14
|
export { DetailedDefinition } from './detailed-definition.js';
|
|
15
|
-
export { NonVerbRep } from './non-verb-rep.js';
|
|
16
15
|
export { Pronunciation } from './pronunciation.js';
|
|
17
16
|
export { GrammarInfo, GRAMMAR_GENDERS, GRAMMAR_NUMBERS, GRAMMAR_PARTS_OF_SPEECH } from './grammar-info.js';
|
|
18
17
|
export { Locality } from './locality.js';
|
|
19
18
|
export { GcrMetadata } from './gcr-metadata.js';
|
|
20
19
|
export { GcrStatistics } from './gcr-statistics.js';
|
|
20
|
+
export { RegistrableModel } from './registrable.js';
|
|
21
|
+
export { NonVerbalEntity } from './non-verbal-entity.js';
|
|
22
|
+
export { SharedNonVerbalEntity } from './shared-non-verbal-entity.js';
|
|
23
|
+
export { Figure, FigureImage } from './figure.js';
|
|
24
|
+
export { Table } from './table.js';
|
|
25
|
+
export { Formula } from './formula.js';
|
|
26
|
+
export { NonVerbRep, NON_VERBAL_TYPES } from './non-verb-rep.js';
|
|
27
|
+
export { NonVerbalReference } from './non-verbal-reference.js';
|
|
28
|
+
export { FigureReference, TableReference, FormulaReference } from './non-verbal-references.js';
|
|
29
|
+
export { BibliographyEntry } from './bibliography-entry.js';
|
|
30
|
+
export { BibliographyData } from './bibliography-data.js';
|
|
31
|
+
export { fetchLocalizedString, localizedStringIsEmpty, localizedStringIsPresent } from './localized-string.js';
|
|
@@ -99,6 +99,27 @@ export class LocalizedConcept extends GlossaristModel {
|
|
|
99
99
|
return this.definitions[0]?.content ?? null;
|
|
100
100
|
}
|
|
101
101
|
|
|
102
|
+
*_textSlots() {
|
|
103
|
+
yield ['definitions', this.definitions];
|
|
104
|
+
yield ['notes', this.notes];
|
|
105
|
+
yield ['examples', this.examples];
|
|
106
|
+
yield ['annotations', this.annotations];
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Yield every content-text fragment in this localization, recursing
|
|
111
|
+
* through nested examples. `basePath` prefixes every emitted
|
|
112
|
+
* `source` path; pass `localizations.<lang>` to get paths consistent
|
|
113
|
+
* with the rest of the codebase. Designations are not included.
|
|
114
|
+
*/
|
|
115
|
+
*walkTexts(basePath) {
|
|
116
|
+
for (const [name, arr] of this._textSlots()) {
|
|
117
|
+
for (let i = 0; i < arr.length; i++) {
|
|
118
|
+
yield* arr[i].walkTexts(`${basePath}.${name}[${i}]`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
102
123
|
toJSON() {
|
|
103
124
|
const obj = {};
|
|
104
125
|
if (this.languageCode) obj.language_code = this.languageCode;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
const FALLBACK_LANG = 'eng';
|
|
2
|
+
|
|
3
|
+
export function fetchLocalizedString(hash, lang, fallback = FALLBACK_LANG) {
|
|
4
|
+
if (hash == null || typeof hash !== 'object') return null;
|
|
5
|
+
const direct = hash[lang] ?? hash[String(lang)];
|
|
6
|
+
if (direct != null) return direct;
|
|
7
|
+
if (fallback != null && fallback !== lang) {
|
|
8
|
+
return hash[fallback] ?? hash[String(fallback)] ?? null;
|
|
9
|
+
}
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function localizedStringIsEmpty(hash) {
|
|
14
|
+
return hash == null || (typeof hash === 'object' && Object.keys(hash).length === 0);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function localizedStringIsPresent(hash) {
|
|
18
|
+
return !localizedStringIsEmpty(hash);
|
|
19
|
+
}
|
|
@@ -1,23 +1,25 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { NonVerbalEntity } from './non-verbal-entity.js';
|
|
2
|
+
import { FigureImage } from './figure.js';
|
|
3
3
|
|
|
4
|
-
export
|
|
4
|
+
export const NON_VERBAL_TYPES = Object.freeze(['image', 'table', 'formula']);
|
|
5
|
+
|
|
6
|
+
export class NonVerbRep extends NonVerbalEntity {
|
|
5
7
|
constructor(data = {}) {
|
|
6
|
-
super();
|
|
8
|
+
super(data);
|
|
7
9
|
this.type = data.type ?? null;
|
|
8
|
-
this.
|
|
9
|
-
this.
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
this._rawImages = data.images ?? [];
|
|
11
|
+
this._images = null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
get images() {
|
|
15
|
+
return this._lazy('_images', '_rawImages',
|
|
16
|
+
i => i instanceof FigureImage ? i : new FigureImage(i));
|
|
13
17
|
}
|
|
14
18
|
|
|
15
19
|
toJSON() {
|
|
16
|
-
const obj =
|
|
20
|
+
const obj = super.toJSON();
|
|
17
21
|
if (this.type != null) obj.type = this.type;
|
|
18
|
-
|
|
19
|
-
if (this.text != null) obj.text = this.text;
|
|
20
|
-
if (this.sources.length > 0) obj.sources = this.sources.map(s => s.toJSON());
|
|
22
|
+
this._serialize(obj, 'images', '_images', '_rawImages');
|
|
21
23
|
return obj;
|
|
22
24
|
}
|
|
23
25
|
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { RegistrableModel } from './registrable.js';
|
|
2
|
+
import { ConceptSource } from './concept-source.js';
|
|
3
|
+
|
|
4
|
+
export class NonVerbalEntity extends RegistrableModel {
|
|
5
|
+
constructor(data = {}) {
|
|
6
|
+
super();
|
|
7
|
+
this.caption = data.caption ?? null;
|
|
8
|
+
this.description = data.description ?? null;
|
|
9
|
+
this.alt = data.alt ?? null;
|
|
10
|
+
this._rawSources = data.sources ?? [];
|
|
11
|
+
this._sources = null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
get sources() {
|
|
15
|
+
return this._lazy('_sources', '_rawSources',
|
|
16
|
+
s => s instanceof ConceptSource ? s : new ConceptSource(s));
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
findById(_targetId) {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
allIds() {
|
|
24
|
+
return [];
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
toJSON() {
|
|
28
|
+
const obj = {};
|
|
29
|
+
if (this.caption != null) obj.caption = this.caption;
|
|
30
|
+
if (this.description != null) obj.description = this.description;
|
|
31
|
+
if (this.alt != null) obj.alt = this.alt;
|
|
32
|
+
this._serialize(obj, 'sources', '_sources', '_rawSources');
|
|
33
|
+
return obj;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static fromJSON(data) {
|
|
37
|
+
return NonVerbalEntity.fromData(data);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { RegistrableModel } from './registrable.js';
|
|
2
|
+
|
|
3
|
+
export class NonVerbalReference extends RegistrableModel {
|
|
4
|
+
constructor(data = {}) {
|
|
5
|
+
super();
|
|
6
|
+
this.entityId = data.entityId ?? data.entity_id ?? data.ref ?? data.id ?? null;
|
|
7
|
+
this.display = data.display ?? null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
get dedupKey() {
|
|
11
|
+
return [this.constructor.name, this.entityId];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
toJSON() {
|
|
15
|
+
if (this.display != null) {
|
|
16
|
+
return { ref: this.entityId, display: this.display };
|
|
17
|
+
}
|
|
18
|
+
return this.entityId;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
static fromJSON(data) {
|
|
22
|
+
if (data instanceof NonVerbalReference) return data;
|
|
23
|
+
if (typeof data === 'string') {
|
|
24
|
+
return new NonVerbalReference({ entityId: data });
|
|
25
|
+
}
|
|
26
|
+
const entityId = data.entityId ?? data.entity_id ?? data.ref ?? data.id ?? null;
|
|
27
|
+
const display = data.display ?? null;
|
|
28
|
+
const type = data.type;
|
|
29
|
+
if (type && this._registry().has(type)) {
|
|
30
|
+
const Cls = this._registry().get(type);
|
|
31
|
+
return new Cls({ entityId, display });
|
|
32
|
+
}
|
|
33
|
+
return new NonVerbalReference({ entityId, display });
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { NonVerbalReference } from './non-verbal-reference.js';
|
|
2
|
+
|
|
3
|
+
export class FigureReference extends NonVerbalReference {
|
|
4
|
+
static fromJSON(data) { return NonVerbalReference.fromJSON(data); }
|
|
5
|
+
}
|
|
6
|
+
NonVerbalReference.register('figure', FigureReference);
|
|
7
|
+
|
|
8
|
+
export class TableReference extends NonVerbalReference {
|
|
9
|
+
static fromJSON(data) { return NonVerbalReference.fromJSON(data); }
|
|
10
|
+
}
|
|
11
|
+
NonVerbalReference.register('table', TableReference);
|
|
12
|
+
|
|
13
|
+
export class FormulaReference extends NonVerbalReference {
|
|
14
|
+
static fromJSON(data) { return NonVerbalReference.fromJSON(data); }
|
|
15
|
+
}
|
|
16
|
+
NonVerbalReference.register('formula', FormulaReference);
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { GlossaristModel } from './base.js';
|
|
2
|
+
|
|
3
|
+
const _registries = new WeakMap();
|
|
4
|
+
|
|
5
|
+
export class RegistrableModel extends GlossaristModel {
|
|
6
|
+
static _registry() {
|
|
7
|
+
let map = _registries.get(this);
|
|
8
|
+
if (!map) {
|
|
9
|
+
map = new Map();
|
|
10
|
+
_registries.set(this, map);
|
|
11
|
+
}
|
|
12
|
+
return map;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
static register(type, cls) {
|
|
16
|
+
this._registry().set(type, cls);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
static fromData(data) {
|
|
20
|
+
if (data instanceof this) return data;
|
|
21
|
+
const type = data?.type;
|
|
22
|
+
const Cls = type ? this._registry().get(type) ?? this : this;
|
|
23
|
+
return new Cls(data);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { NonVerbalEntity } from './non-verbal-entity.js';
|
|
2
|
+
|
|
3
|
+
export class SharedNonVerbalEntity extends NonVerbalEntity {
|
|
4
|
+
constructor(data = {}) {
|
|
5
|
+
super(data);
|
|
6
|
+
this.id = data.id ?? null;
|
|
7
|
+
this.identifier = data.identifier ?? null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
findById(targetId) {
|
|
11
|
+
return this.id === targetId ? this : null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
allIds() {
|
|
15
|
+
return this.id != null ? [this.id] : [];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
toJSON() {
|
|
19
|
+
const obj = super.toJSON();
|
|
20
|
+
if (this.id != null) obj.id = this.id;
|
|
21
|
+
if (this.identifier != null) obj.identifier = this.identifier;
|
|
22
|
+
return obj;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
static fromJSON(data) {
|
|
26
|
+
return SharedNonVerbalEntity.fromData(data);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { SharedNonVerbalEntity } from './shared-non-verbal-entity.js';
|
|
2
|
+
import { NonVerbalEntity } from './non-verbal-entity.js';
|
|
3
|
+
|
|
4
|
+
export class Table extends SharedNonVerbalEntity {
|
|
5
|
+
constructor(data = {}) {
|
|
6
|
+
super(data);
|
|
7
|
+
this.content = data.content ?? null;
|
|
8
|
+
this.format = data.format ?? null;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
toJSON() {
|
|
12
|
+
const obj = super.toJSON();
|
|
13
|
+
if (this.content != null) obj.content = this.content;
|
|
14
|
+
if (this.format != null) obj.format = this.format;
|
|
15
|
+
return obj;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
static fromJSON(data) { return new Table(data); }
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
NonVerbalEntity.register('table', Table);
|
package/src/reference-mention.js
CHANGED
|
@@ -27,6 +27,12 @@
|
|
|
27
27
|
|
|
28
28
|
const NUMERIC_RE = /^\d+(?:[.-]\d+)+$/;
|
|
29
29
|
|
|
30
|
+
const NVR_PREFIXES = Object.freeze([
|
|
31
|
+
{ prefix: 'fig:', kind: 'fig-ref' },
|
|
32
|
+
{ prefix: 'table:', kind: 'table-ref' },
|
|
33
|
+
{ prefix: 'formula:', kind: 'formula-ref' },
|
|
34
|
+
]);
|
|
35
|
+
|
|
30
36
|
/**
|
|
31
37
|
* Parse the body of a {{...}} mention (without the braces).
|
|
32
38
|
*
|
|
@@ -63,7 +69,17 @@ export function parseMention(raw) {
|
|
|
63
69
|
};
|
|
64
70
|
}
|
|
65
71
|
|
|
66
|
-
// 3.
|
|
72
|
+
// 3. NVR prefixes (fig:/table:/formula:) — config-driven dispatch.
|
|
73
|
+
for (const { prefix, kind } of NVR_PREFIXES) {
|
|
74
|
+
const escPrefix = prefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
75
|
+
const match = body.match(new RegExp(`^${escPrefix}([^,}]+)(?:,(.*))?$`));
|
|
76
|
+
if (match) {
|
|
77
|
+
const label = match[2] !== undefined ? unquoteLabel(match[2].trim()) : null;
|
|
78
|
+
return { kind, key: match[1].trim(), label, raw: body };
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// 4. Comma-separated form: {{id, render}}.
|
|
67
83
|
// ID always comes first, render text always comes last.
|
|
68
84
|
const commaIdx = body.indexOf(',');
|
|
69
85
|
if (commaIdx !== -1) {
|
|
@@ -76,12 +92,12 @@ export function parseMention(raw) {
|
|
|
76
92
|
return { kind: 'designation', id, label, raw: body };
|
|
77
93
|
}
|
|
78
94
|
|
|
79
|
-
//
|
|
95
|
+
// 5. Bare numeric id.
|
|
80
96
|
if (NUMERIC_RE.test(body)) {
|
|
81
97
|
return { kind: 'numeric', id: body, label: null, raw: body };
|
|
82
98
|
}
|
|
83
99
|
|
|
84
|
-
//
|
|
100
|
+
// 6. Anything else is unresolved at the parse layer.
|
|
85
101
|
return { kind: 'unresolved', raw: body };
|
|
86
102
|
}
|
|
87
103
|
|
|
@@ -13,6 +13,21 @@ export class Reference {
|
|
|
13
13
|
this.resolution = extras.resolution ?? null;
|
|
14
14
|
this.lookupKey = extras.lookupKey ?? null;
|
|
15
15
|
}
|
|
16
|
+
|
|
17
|
+
get dedupKey() {
|
|
18
|
+
if (this.type === 'bibliography') {
|
|
19
|
+
return ['bibliography',
|
|
20
|
+
this.sourceId ?? this.citation?.ref?.id ?? this.target];
|
|
21
|
+
}
|
|
22
|
+
if (this.type === 'figure' || this.type === 'table' || this.type === 'formula') {
|
|
23
|
+
return [this.type, this.lookupKey?.entityId ?? this.target];
|
|
24
|
+
}
|
|
25
|
+
if (this.type === 'concept') {
|
|
26
|
+
return ['concept',
|
|
27
|
+
this.lookupKey?.id ?? this.lookupKey?.designation ?? this.target];
|
|
28
|
+
}
|
|
29
|
+
return [this.type, this.target];
|
|
30
|
+
}
|
|
16
31
|
}
|
|
17
32
|
|
|
18
33
|
function refTarget(rc) {
|
|
@@ -33,6 +48,18 @@ export function resolveBibliographyRecord(citationRef, registry) {
|
|
|
33
48
|
return bioColl.byId(citationRef.id);
|
|
34
49
|
}
|
|
35
50
|
|
|
51
|
+
export function findNonVerbalEntity(ref, registry) {
|
|
52
|
+
const { entityType, entityId } = ref.lookupKey ?? {};
|
|
53
|
+
if (!entityType || !entityId) return null;
|
|
54
|
+
const collection = registry[`nvr:${entityType}`];
|
|
55
|
+
if (!collection) return null;
|
|
56
|
+
for (const entity of collection) {
|
|
57
|
+
const found = entity.findById(entityId);
|
|
58
|
+
if (found) return found;
|
|
59
|
+
}
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
|
|
36
63
|
export class ReferenceResolver {
|
|
37
64
|
extractReferences(concept) {
|
|
38
65
|
const refs = [];
|
|
@@ -46,6 +73,22 @@ export class ReferenceResolver {
|
|
|
46
73
|
}
|
|
47
74
|
}
|
|
48
75
|
|
|
76
|
+
for (const ref of concept.figures) {
|
|
77
|
+
refs.push(new Reference('figure', ref.display ?? ref.entityId, 'structural', 'figures', {
|
|
78
|
+
lookupKey: { entityType: 'figure', entityId: ref.entityId },
|
|
79
|
+
}));
|
|
80
|
+
}
|
|
81
|
+
for (const ref of concept.tables) {
|
|
82
|
+
refs.push(new Reference('table', ref.display ?? ref.entityId, 'structural', 'tables', {
|
|
83
|
+
lookupKey: { entityType: 'table', entityId: ref.entityId },
|
|
84
|
+
}));
|
|
85
|
+
}
|
|
86
|
+
for (const ref of concept.formulas) {
|
|
87
|
+
refs.push(new Reference('formula', ref.display ?? ref.entityId, 'structural', 'formulas', {
|
|
88
|
+
lookupKey: { entityType: 'formula', entityId: ref.entityId },
|
|
89
|
+
}));
|
|
90
|
+
}
|
|
91
|
+
|
|
49
92
|
for (const lang of concept.languages) {
|
|
50
93
|
const lc = concept.localization(lang);
|
|
51
94
|
if (!lc) continue;
|
|
@@ -58,44 +101,24 @@ export class ReferenceResolver {
|
|
|
58
101
|
}
|
|
59
102
|
}
|
|
60
103
|
|
|
61
|
-
const
|
|
62
|
-
for (const { text, source } of texts) {
|
|
104
|
+
for (const { text, source } of lc.walkTexts(`localizations.${lang}`)) {
|
|
63
105
|
for (const ref of this._extractFromText(text, source, concept)) {
|
|
64
106
|
refs.push(ref);
|
|
65
107
|
}
|
|
66
108
|
}
|
|
67
109
|
}
|
|
68
110
|
|
|
69
|
-
return refs;
|
|
111
|
+
return this._dedup(refs);
|
|
70
112
|
}
|
|
71
113
|
|
|
72
|
-
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
const
|
|
76
|
-
if (
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
}
|
|
80
|
-
for (let i = 0; i < lc.notes.length; i++) {
|
|
81
|
-
const content = lc.notes[i]?.content;
|
|
82
|
-
if (content) {
|
|
83
|
-
out.push({ text: content, source: `localizations.${lang}.notes[${i}].content` });
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
for (let i = 0; i < lc.examples.length; i++) {
|
|
87
|
-
const content = lc.examples[i]?.content;
|
|
88
|
-
if (typeof content === 'string') {
|
|
89
|
-
out.push({ text: content, source: `localizations.${lang}.examples[${i}].content` });
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
for (let i = 0; i < lc.annotations.length; i++) {
|
|
93
|
-
const content = lc.annotations[i]?.content;
|
|
94
|
-
if (typeof content === 'string') {
|
|
95
|
-
out.push({ text: content, source: `localizations.${lang}.annotations[${i}].content` });
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
return out;
|
|
114
|
+
_dedup(refs) {
|
|
115
|
+
const seen = new Set();
|
|
116
|
+
return refs.filter(ref => {
|
|
117
|
+
const key = JSON.stringify(ref.dedupKey);
|
|
118
|
+
if (seen.has(key)) return false;
|
|
119
|
+
seen.add(key);
|
|
120
|
+
return true;
|
|
121
|
+
});
|
|
99
122
|
}
|
|
100
123
|
|
|
101
124
|
_extractFromText(text, source, concept) {
|
|
@@ -114,6 +137,21 @@ export class ReferenceResolver {
|
|
|
114
137
|
resolution: null,
|
|
115
138
|
}));
|
|
116
139
|
break;
|
|
140
|
+
case 'fig-ref':
|
|
141
|
+
refs.push(new Reference('figure', parsed.label ?? parsed.key, 'embedded', source, {
|
|
142
|
+
lookupKey: { entityType: 'figure', entityId: parsed.key },
|
|
143
|
+
}));
|
|
144
|
+
break;
|
|
145
|
+
case 'table-ref':
|
|
146
|
+
refs.push(new Reference('table', parsed.label ?? parsed.key, 'embedded', source, {
|
|
147
|
+
lookupKey: { entityType: 'table', entityId: parsed.key },
|
|
148
|
+
}));
|
|
149
|
+
break;
|
|
150
|
+
case 'formula-ref':
|
|
151
|
+
refs.push(new Reference('formula', parsed.label ?? parsed.key, 'embedded', source, {
|
|
152
|
+
lookupKey: { entityType: 'formula', entityId: parsed.key },
|
|
153
|
+
}));
|
|
154
|
+
break;
|
|
117
155
|
case 'numeric':
|
|
118
156
|
refs.push(new Reference('concept', parsed.label ?? parsed.id, 'embedded', source, {
|
|
119
157
|
lookupKey: { id: parsed.id },
|
|
@@ -168,6 +206,9 @@ export class ReferenceResolver {
|
|
|
168
206
|
switch (ref.type) {
|
|
169
207
|
case 'concept': return this._resolveConcept(ref, registry);
|
|
170
208
|
case 'bibliography': return this._resolveBibliography(ref, registry);
|
|
209
|
+
case 'figure':
|
|
210
|
+
case 'table':
|
|
211
|
+
case 'formula': return this._resolveNonVerbal(ref, registry);
|
|
171
212
|
case 'dataset': return this._resolveDataset(ref, registry);
|
|
172
213
|
case 'typed-ref': return this._resolveTypedRef(ref, registry);
|
|
173
214
|
case 'standard': return this._resolveStandard(ref, registry);
|
|
@@ -175,6 +216,10 @@ export class ReferenceResolver {
|
|
|
175
216
|
}
|
|
176
217
|
}
|
|
177
218
|
|
|
219
|
+
_resolveNonVerbal(ref, registry) {
|
|
220
|
+
return findNonVerbalEntity(ref, registry);
|
|
221
|
+
}
|
|
222
|
+
|
|
178
223
|
_resolveConcept(ref, registry) {
|
|
179
224
|
if (ref.lookupKey?.id) {
|
|
180
225
|
const dataset = ref.lookupKey.dataset;
|
|
@@ -218,7 +263,8 @@ export class ReferenceResolver {
|
|
|
218
263
|
resolveAll(concept, registry) {
|
|
219
264
|
const resolved = new Map();
|
|
220
265
|
for (const ref of this.extractReferences(concept)) {
|
|
221
|
-
if (ref.type === 'concept' || ref.type === 'bibliography'
|
|
266
|
+
if (ref.type === 'concept' || ref.type === 'bibliography'
|
|
267
|
+
|| ref.type === 'figure' || ref.type === 'table' || ref.type === 'formula') {
|
|
222
268
|
const target = this.resolveReference(ref, registry);
|
|
223
269
|
if (target != null) {
|
|
224
270
|
const key = ref.target ?? ref.uri ?? ref.sourceId;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { resolveBibliographyRecord } from './reference-resolver.js';
|
|
1
|
+
import { resolveBibliographyRecord, findNonVerbalEntity } from './reference-resolver.js';
|
|
2
2
|
|
|
3
3
|
export class ReferenceClassifier {
|
|
4
4
|
constructor(registry = {}, sourceDatasetId = null, options = {}) {
|
|
@@ -14,6 +14,9 @@ export class ReferenceClassifier {
|
|
|
14
14
|
case 'concept': return this._classifyConcept(ref);
|
|
15
15
|
case 'dataset': return this._classifyDataset(ref);
|
|
16
16
|
case 'bibliography': return this._classifyBibliography(ref);
|
|
17
|
+
case 'figure':
|
|
18
|
+
case 'table':
|
|
19
|
+
case 'formula': return this._classifyNonVerbal(ref);
|
|
17
20
|
case 'typed-ref': return this._classifyTypedRef(ref);
|
|
18
21
|
case 'standard': return 'legacy-standard';
|
|
19
22
|
default: return 'unknown';
|
|
@@ -63,4 +66,10 @@ export class ReferenceClassifier {
|
|
|
63
66
|
_classifyTypedRef(_ref) {
|
|
64
67
|
return 'typed-ref';
|
|
65
68
|
}
|
|
69
|
+
|
|
70
|
+
_classifyNonVerbal(ref) {
|
|
71
|
+
return findNonVerbalEntity(ref, this.registry)
|
|
72
|
+
? 'internal-citation'
|
|
73
|
+
: 'external-citation';
|
|
74
|
+
}
|
|
66
75
|
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { DATASET_ASSETS } from '../dataset-asset.js';
|
|
2
|
+
|
|
3
|
+
export class AssetIndex {
|
|
4
|
+
constructor() {
|
|
5
|
+
this._paths = new Set();
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
get paths() {
|
|
9
|
+
return [...this._paths].sort();
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
get size() {
|
|
13
|
+
return this._paths.size;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
register(path) {
|
|
17
|
+
if (path == null) return;
|
|
18
|
+
this._paths.add(this._normalize(path));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
has(path) {
|
|
22
|
+
if (path == null) return false;
|
|
23
|
+
return this._paths.has(this._normalize(path));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
[Symbol.iterator]() {
|
|
27
|
+
return this._paths[Symbol.iterator]();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
_normalize(path) {
|
|
31
|
+
return String(path).replace(/^\//, '');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
static async fromGcrPackage(pkg) {
|
|
35
|
+
const index = new AssetIndex();
|
|
36
|
+
const names = await pkg.imageFileNames();
|
|
37
|
+
for (const name of names) {
|
|
38
|
+
index.register(name);
|
|
39
|
+
}
|
|
40
|
+
return index;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
static fromDirectory(datasetPath, fs) {
|
|
44
|
+
const index = new AssetIndex();
|
|
45
|
+
const imagesAsset = DATASET_ASSETS.find(
|
|
46
|
+
a => a.type === 'directory' && a.path === 'images');
|
|
47
|
+
if (!imagesAsset) return index;
|
|
48
|
+
|
|
49
|
+
const imagesDir = `${datasetPath}/${imagesAsset.path}`;
|
|
50
|
+
_walkDir(fs, imagesDir, imagesAsset.path, index);
|
|
51
|
+
return index;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function _walkDir(fs, dirPath, relativePrefix, index) {
|
|
56
|
+
if (!fs.existsSync(dirPath)) return;
|
|
57
|
+
for (const entry of fs.readdirSync(dirPath, { withFileTypes: true })) {
|
|
58
|
+
const fullPath = `${dirPath}/${entry.name}`;
|
|
59
|
+
const relPath = `${relativePrefix}/${entry.name}`;
|
|
60
|
+
if (entry.isDirectory()) {
|
|
61
|
+
_walkDir(fs, fullPath, relPath, index);
|
|
62
|
+
} else {
|
|
63
|
+
index.register(relPath);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
package/src/validators/index.js
CHANGED
|
@@ -14,6 +14,8 @@ export {
|
|
|
14
14
|
UuidFormatRule,
|
|
15
15
|
SourceUrnFormatRule,
|
|
16
16
|
CiteRefIntegrityRule,
|
|
17
|
+
NonVerbalRefIntegrityRule,
|
|
18
|
+
OrphanedImagesRule,
|
|
17
19
|
} from './v3-rules.js';
|
|
18
20
|
|
|
19
21
|
import { ConceptValidator, LanguageCodeRule, DesignationTypeRule, EntryStatusRule } from './concept-validator.js';
|
|
@@ -29,6 +31,7 @@ import {
|
|
|
29
31
|
UuidFormatRule,
|
|
30
32
|
SourceUrnFormatRule,
|
|
31
33
|
CiteRefIntegrityRule,
|
|
34
|
+
NonVerbalRefIntegrityRule,
|
|
32
35
|
} from './v3-rules.js';
|
|
33
36
|
|
|
34
37
|
const _default = new ConceptValidator()
|
|
@@ -43,7 +46,8 @@ const _default = new ConceptValidator()
|
|
|
43
46
|
.addRule(new UuidFormatRule())
|
|
44
47
|
.addRule(new SourceUrnFormatRule())
|
|
45
48
|
.addRule(new RelationshipTypeRule())
|
|
46
|
-
.addRule(new CiteRefIntegrityRule())
|
|
49
|
+
.addRule(new CiteRefIntegrityRule())
|
|
50
|
+
.addRule(new NonVerbalRefIntegrityRule());
|
|
47
51
|
|
|
48
52
|
export function validateConcept(concept) {
|
|
49
53
|
return _default.validate(concept);
|