@glossarist/concept-browser 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/cli/index.mjs +2 -1
- package/env.d.ts +5 -0
- package/package.json +4 -3
- package/scripts/build-edges.js +78 -10
- package/scripts/generate-data.mjs +152 -20
- package/scripts/generate-ontology-data.mjs +184 -0
- package/scripts/generate-ontology-schema.mjs +315 -0
- package/src/__tests__/about-view.test.ts +98 -0
- package/src/__tests__/app-footer.test.ts +38 -0
- package/src/__tests__/app-header.test.ts +130 -0
- package/src/__tests__/app-sidebar.test.ts +159 -0
- package/src/__tests__/asciidoc-lite.test.ts +1 -1
- package/src/__tests__/concept-card.test.ts +115 -0
- package/src/__tests__/concept-detail-interaction.test.ts +273 -0
- package/src/__tests__/concept-formats.test.ts +32 -30
- package/src/__tests__/concept-timeline.test.ts +200 -0
- package/src/__tests__/concept-view.test.ts +88 -0
- package/src/__tests__/contributors-view.test.ts +103 -0
- package/src/__tests__/dataset-adapter.test.ts +172 -23
- package/src/__tests__/dataset-view.test.ts +232 -0
- package/src/__tests__/designation-registry.test.ts +161 -0
- package/src/__tests__/format-downloads.test.ts +98 -0
- package/src/__tests__/graph-view.test.ts +69 -0
- package/src/__tests__/graph.test.ts +62 -0
- package/src/__tests__/home-interaction.test.ts +157 -0
- package/src/__tests__/language-detail.test.ts +203 -0
- package/src/__tests__/nav-icon.test.ts +48 -0
- package/src/__tests__/news-view.test.ts +87 -0
- package/src/__tests__/ontology-registry.test.ts +109 -0
- package/src/__tests__/page-view.test.ts +83 -0
- package/src/__tests__/relationship-categories.test.ts +62 -0
- package/src/__tests__/resolve-view.test.ts +77 -0
- package/src/__tests__/router.test.ts +65 -0
- package/src/__tests__/search-bar.test.ts +219 -0
- package/src/__tests__/search-view.test.ts +41 -0
- package/src/__tests__/stats-view.test.ts +77 -0
- package/src/__tests__/test-helpers.ts +171 -0
- package/src/__tests__/ui-store.test.ts +100 -0
- package/src/__tests__/v-math.test.ts +8 -7
- package/src/adapters/DatasetAdapter.ts +188 -63
- package/src/adapters/model-bridge.ts +277 -0
- package/src/adapters/ontology-registry.ts +75 -0
- package/src/adapters/ontology-schema.ts +100 -0
- package/src/adapters/types.ts +53 -78
- package/src/components/AppSidebar.vue +1 -1
- package/src/components/CitationDisplay.vue +35 -0
- package/src/components/ConceptDetail.vue +349 -146
- package/src/components/ConceptRdfView.vue +397 -0
- package/src/components/ConceptTimeline.vue +57 -60
- package/src/components/GraphPanel.vue +96 -31
- package/src/components/LanguageDetail.vue +46 -61
- package/src/components/NavIcon.vue +1 -0
- package/src/components/NonVerbalRepDisplay.vue +38 -0
- package/src/components/RelationshipList.vue +99 -0
- package/src/composables/use-render-options.ts +1 -4
- package/src/config/use-site-config.ts +3 -0
- package/src/data/ontology-schema.json +1551 -0
- package/src/data/taxonomies.json +543 -0
- package/src/graph/GraphEngine.ts +7 -4
- package/src/router/index.ts +6 -1
- package/src/shims/empty.ts +1 -0
- package/src/shims/node-crypto.ts +6 -0
- package/src/shims/node-path.ts +10 -0
- package/src/stores/vocabulary.ts +82 -32
- package/src/style.css +74 -20
- package/src/utils/asciidoc-lite.ts +17 -19
- package/src/utils/concept-formats.ts +22 -20
- package/src/utils/concept-helpers.ts +54 -0
- package/src/utils/designation-registry.ts +124 -0
- package/src/utils/escape.ts +7 -0
- package/src/utils/markdown-lite.ts +1 -3
- package/src/utils/math.ts +2 -11
- package/src/utils/plurimath.ts +2 -7
- package/src/utils/relationship-categories.ts +84 -0
- package/src/views/ConceptView.vue +22 -1
- package/src/views/DatasetView.vue +7 -2
- package/src/views/OntologySchemaView.vue +302 -0
- package/src/views/PageView.vue +28 -17
- package/src/views/StatsView.vue +34 -12
- package/vite.config.ts +8 -0
package/README.md
CHANGED
|
@@ -57,9 +57,10 @@ datasets.yml
|
|
|
57
57
|
└─> public/data/{id}/
|
|
58
58
|
├── manifest.json Dataset metadata
|
|
59
59
|
├── index.json Concept listing (chunked for large sets)
|
|
60
|
-
├── edges.json Pre-computed cross-
|
|
60
|
+
├── edges.json Pre-computed cross-reference + domain edges
|
|
61
|
+
├── domain-nodes.json Domain classification nodes
|
|
61
62
|
└── concepts/*.json Individual concept documents
|
|
62
|
-
|
|
63
|
+
└─> scripts/build-edges.js (extract graph + domain edges)
|
|
63
64
|
```
|
|
64
65
|
|
|
65
66
|
### Step-by-step
|
package/cli/index.mjs
CHANGED
|
@@ -100,7 +100,8 @@ Environment:
|
|
|
100
100
|
// Run vite build using the package's vite.config.ts
|
|
101
101
|
console.log(`\n=== BUILD SPA ===\n`);
|
|
102
102
|
const viteConfig = resolve(pkgRoot, 'vite.config.ts');
|
|
103
|
-
|
|
103
|
+
const viteBin = resolve(pkgRoot, 'node_modules', '.bin', 'vite');
|
|
104
|
+
execSync(`${viteBin} build --config ${viteConfig}`, {
|
|
104
105
|
stdio: 'inherit',
|
|
105
106
|
env: { ...process.env },
|
|
106
107
|
});
|
package/env.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@glossarist/concept-browser",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Vue SPA for browsing Glossarist terminology datasets with cross-reference resolution, graph visualization, and multi-language support",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -13,7 +13,8 @@
|
|
|
13
13
|
"preview": "vite preview",
|
|
14
14
|
"fetch-datasets": "node scripts/fetch-datasets.mjs",
|
|
15
15
|
"generate-data": "node scripts/generate-data.mjs",
|
|
16
|
-
"
|
|
16
|
+
"generate-ontology": "node scripts/generate-ontology-data.mjs && node scripts/generate-ontology-schema.mjs",
|
|
17
|
+
"build:full": "npm run generate-ontology && npm run fetch-datasets && npm run generate-data && node scripts/build-edges.js && npm run build",
|
|
17
18
|
"build:site": "concept-browser --site build",
|
|
18
19
|
"test": "vitest run",
|
|
19
20
|
"test:watch": "vitest"
|
|
@@ -23,7 +24,7 @@
|
|
|
23
24
|
"@vitejs/plugin-vue": "^5.2.3",
|
|
24
25
|
"autoprefixer": "^10.4.21",
|
|
25
26
|
"d3": "^7.9.0",
|
|
26
|
-
"glossarist": "^0.
|
|
27
|
+
"glossarist": "^0.3.0",
|
|
27
28
|
"js-yaml": "^4.1.0",
|
|
28
29
|
"pinia": "^2.3.1",
|
|
29
30
|
"postcss": "^8.5.3",
|
package/scripts/build-edges.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Pre-computes cross-reference edges for each dataset.
|
|
3
|
-
* Reads all concept JSON files, extracts structured and
|
|
4
|
-
* and writes edges.json
|
|
2
|
+
* Pre-computes cross-reference and domain edges for each dataset.
|
|
3
|
+
* Reads all concept JSON files, extracts structured references and
|
|
4
|
+
* authoritative sources (domains), and writes edges.json + domain-nodes.json.
|
|
5
5
|
*
|
|
6
6
|
* Usage: node scripts/build-edges.js
|
|
7
7
|
*/
|
|
@@ -13,12 +13,18 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
13
13
|
const ROOT = process.cwd();
|
|
14
14
|
const DATA_DIR = join(ROOT, 'public', 'data');
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
// --- Normalization ---
|
|
17
|
+
|
|
18
|
+
function slugify(text) {
|
|
19
|
+
return text.toLowerCase().replace(/[^\w\s-]/g, '').replace(/[\s/]+/g, '-');
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// --- Extractors (open/closed: add new extractors to EXTRACTORS array) ---
|
|
23
|
+
|
|
24
|
+
function extractReferences(concept, registerId) {
|
|
17
25
|
const edges = [];
|
|
18
26
|
const sourceUri = concept['@id'];
|
|
19
|
-
|
|
20
|
-
for (const [_lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) {
|
|
21
|
-
// Structured cross-references (gl:references array, pre-computed during data generation)
|
|
27
|
+
for (const [lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) {
|
|
22
28
|
if (lc['gl:references']) {
|
|
23
29
|
for (const ref of lc['gl:references']) {
|
|
24
30
|
if (ref['@id'] && ref['@id'] !== sourceUri) {
|
|
@@ -28,15 +34,45 @@ function extractEdgesFromConcept(concept, registerId) {
|
|
|
28
34
|
type: 'references',
|
|
29
35
|
label: ref['gl:term'] || undefined,
|
|
30
36
|
register: registerId,
|
|
37
|
+
lang,
|
|
31
38
|
});
|
|
32
39
|
}
|
|
33
40
|
}
|
|
34
41
|
}
|
|
35
42
|
}
|
|
43
|
+
return edges;
|
|
44
|
+
}
|
|
36
45
|
|
|
46
|
+
function extractDomains(concept, registerId) {
|
|
47
|
+
const edges = [];
|
|
48
|
+
const sourceUri = concept['@id'];
|
|
49
|
+
const lcs = concept['gl:localizedConcept'] || {};
|
|
50
|
+
const langs = Object.keys(lcs);
|
|
51
|
+
const seen = new Set();
|
|
52
|
+
for (const lang of langs) {
|
|
53
|
+
const domain = lcs[lang]['gl:domain'];
|
|
54
|
+
if (domain && !seen.has(domain)) {
|
|
55
|
+
seen.add(domain);
|
|
56
|
+
edges.push({
|
|
57
|
+
source: sourceUri,
|
|
58
|
+
target: `https://glossarist.org/${registerId}/domain/${slugify(domain)}`,
|
|
59
|
+
type: 'domain',
|
|
60
|
+
label: domain,
|
|
61
|
+
register: registerId,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
}
|
|
37
65
|
return edges;
|
|
38
66
|
}
|
|
39
67
|
|
|
68
|
+
const EXTRACTORS = [extractReferences, extractDomains];
|
|
69
|
+
|
|
70
|
+
function extractAllEdges(concept, registerId) {
|
|
71
|
+
return EXTRACTORS.flatMap(fn => fn(concept, registerId));
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// --- Build ---
|
|
75
|
+
|
|
40
76
|
function buildEdgesForDataset(datasetDir, registerId) {
|
|
41
77
|
const conceptsDir = join(datasetDir, 'concepts');
|
|
42
78
|
if (!existsSync(conceptsDir)) {
|
|
@@ -48,13 +84,20 @@ function buildEdgesForDataset(datasetDir, registerId) {
|
|
|
48
84
|
console.log(` Processing ${files.length} concepts...`);
|
|
49
85
|
|
|
50
86
|
const allEdges = [];
|
|
87
|
+
const domainConceptCount = new Map();
|
|
51
88
|
let processed = 0;
|
|
52
89
|
|
|
53
90
|
for (const file of files) {
|
|
54
91
|
try {
|
|
55
92
|
const data = JSON.parse(readFileSync(join(conceptsDir, file), 'utf-8'));
|
|
56
|
-
const edges =
|
|
93
|
+
const edges = extractAllEdges(data, registerId);
|
|
57
94
|
allEdges.push(...edges);
|
|
95
|
+
|
|
96
|
+
for (const edge of edges) {
|
|
97
|
+
if (edge.type === 'domain') {
|
|
98
|
+
domainConceptCount.set(edge.target, (domainConceptCount.get(edge.target) || 0) + 1);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
58
101
|
} catch (e) {
|
|
59
102
|
console.error(` Error processing ${file}: ${e.message}`);
|
|
60
103
|
}
|
|
@@ -64,11 +107,11 @@ function buildEdgesForDataset(datasetDir, registerId) {
|
|
|
64
107
|
}
|
|
65
108
|
}
|
|
66
109
|
|
|
67
|
-
// Deduplicate edges by source+target
|
|
110
|
+
// Deduplicate edges by source+target+type+lang
|
|
68
111
|
const seen = new Set();
|
|
69
112
|
const deduped = [];
|
|
70
113
|
for (const edge of allEdges) {
|
|
71
|
-
const key = `${edge.source}→${edge.target}`;
|
|
114
|
+
const key = `${edge.source}→${edge.target}→${edge.type}→${edge.lang || ''}`;
|
|
72
115
|
if (!seen.has(key)) {
|
|
73
116
|
seen.add(key);
|
|
74
117
|
deduped.push(edge);
|
|
@@ -84,6 +127,31 @@ function buildEdgesForDataset(datasetDir, registerId) {
|
|
|
84
127
|
const outputPath = join(datasetDir, 'edges.json');
|
|
85
128
|
writeFileSync(outputPath, JSON.stringify(output, null, 2));
|
|
86
129
|
console.log(` Written ${deduped.length} edges to edges.json (${(JSON.stringify(output).length / 1024).toFixed(1)} KB)`);
|
|
130
|
+
|
|
131
|
+
// Build domain-nodes.json
|
|
132
|
+
const domainEdgeMap = new Map();
|
|
133
|
+
for (const edge of deduped) {
|
|
134
|
+
if (edge.type === 'domain') {
|
|
135
|
+
const existing = domainEdgeMap.get(edge.target);
|
|
136
|
+
if (existing) {
|
|
137
|
+
existing.labels.add(edge.label);
|
|
138
|
+
} else {
|
|
139
|
+
domainEdgeMap.set(edge.target, { uri: edge.target, labels: new Set([edge.label]), registerId });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const domainNodes = [...domainEdgeMap.values()].map(d => ({
|
|
145
|
+
uri: d.uri,
|
|
146
|
+
label: [...d.labels][0],
|
|
147
|
+
registerId: d.registerId,
|
|
148
|
+
conceptCount: domainConceptCount.get(d.uri) || 0,
|
|
149
|
+
})).sort((a, b) => b.conceptCount - a.conceptCount);
|
|
150
|
+
|
|
151
|
+
const domainOutput = { registerId, domainNodes };
|
|
152
|
+
const domainPath = join(datasetDir, 'domain-nodes.json');
|
|
153
|
+
writeFileSync(domainPath, JSON.stringify(domainOutput, null, 2));
|
|
154
|
+
console.log(` Written ${domainNodes.length} domain nodes to domain-nodes.json`);
|
|
87
155
|
}
|
|
88
156
|
|
|
89
157
|
// Main
|
|
@@ -30,6 +30,16 @@ function loadConceptFile(filePath) {
|
|
|
30
30
|
if (docs.length >= 1 && docs[0].data && docs[0].data.identifier !== undefined) {
|
|
31
31
|
const mc = docs[0];
|
|
32
32
|
const result = { termid: String(mc.data.identifier) };
|
|
33
|
+
|
|
34
|
+
// Managed concept-level fields
|
|
35
|
+
if (mc.related) result._related = mc.related;
|
|
36
|
+
if (mc.data.domains) result._domains = mc.data.domains;
|
|
37
|
+
if (mc.dates) result._dates = mc.dates;
|
|
38
|
+
if (mc.sources) result._sources = mc.sources;
|
|
39
|
+
if (mc.status) result._status = mc.status;
|
|
40
|
+
if (mc.schema_version) result._schemaVersion = mc.schema_version;
|
|
41
|
+
if (mc.date_accepted) result._dateAccepted = mc.date_accepted;
|
|
42
|
+
|
|
33
43
|
for (const doc of docs.slice(1)) {
|
|
34
44
|
if (!doc || !doc.data || !doc.data.language_code) continue;
|
|
35
45
|
const lang = doc.data.language_code;
|
|
@@ -49,17 +59,46 @@ function writeJson(filePath, data) {
|
|
|
49
59
|
}
|
|
50
60
|
|
|
51
61
|
function termToDesignation(term) {
|
|
62
|
+
const typeMap = {
|
|
63
|
+
expression: 'gl:Expression',
|
|
64
|
+
abbreviation: 'gl:Abbreviation',
|
|
65
|
+
symbol: 'gl:Symbol',
|
|
66
|
+
letter_symbol: 'gl:LetterSymbol',
|
|
67
|
+
'graphical symbol': 'gl:GraphicalSymbol',
|
|
68
|
+
};
|
|
52
69
|
const doc = {
|
|
53
|
-
'@type': term.type
|
|
54
|
-
: term.type === 'symbol' ? 'gl:Symbol'
|
|
55
|
-
: term.type === 'abbreviation' ? 'gl:Abbreviation'
|
|
56
|
-
: 'gl:Designation',
|
|
70
|
+
'@type': typeMap[term.type] || 'gl:Designation',
|
|
57
71
|
'gl:normativeStatus': term.normative_status || 'preferred',
|
|
58
72
|
'gl:term': term.designation,
|
|
59
73
|
};
|
|
60
|
-
|
|
61
|
-
if (term.
|
|
74
|
+
|
|
75
|
+
if (term.grammar_info && term.grammar_info.length > 0) {
|
|
76
|
+
doc['gl:grammarInfo'] = term.grammar_info.map(gi => {
|
|
77
|
+
const g = {};
|
|
78
|
+
if (gi.gender) g['gl:gender'] = gi.gender;
|
|
79
|
+
if (gi.number) g['gl:number'] = gi.number;
|
|
80
|
+
for (const pos of ['noun', 'verb', 'adj', 'adverb', 'preposition', 'participle']) {
|
|
81
|
+
if (gi[pos]) g[`gl:${pos}`] = gi[pos];
|
|
82
|
+
}
|
|
83
|
+
return g;
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
62
87
|
if (term.international !== undefined) doc['gl:international'] = term.international;
|
|
88
|
+
if (term.absent !== undefined) doc['gl:absent'] = term.absent;
|
|
89
|
+
if (term.geographical_area) doc['gl:geographicalArea'] = term.geographical_area;
|
|
90
|
+
if (term.term_type) doc['gl:termType'] = term.term_type;
|
|
91
|
+
if (term.prefix) doc['gl:prefix'] = term.prefix;
|
|
92
|
+
if (term.usage_info) doc['gl:usageInfo'] = term.usage_info;
|
|
93
|
+
if (term.field_of_application) doc['gl:fieldOfApplication'] = term.field_of_application;
|
|
94
|
+
|
|
95
|
+
if (term.acronym !== undefined) doc['gl:acronym'] = term.acronym;
|
|
96
|
+
if (term.initialism !== undefined) doc['gl:initialism'] = term.initialism;
|
|
97
|
+
if (term.truncation !== undefined) doc['gl:truncation'] = term.truncation;
|
|
98
|
+
|
|
99
|
+
if (term.text) doc['gl:text'] = term.text;
|
|
100
|
+
if (term.image) doc['gl:image'] = term.image;
|
|
101
|
+
|
|
63
102
|
return doc;
|
|
64
103
|
}
|
|
65
104
|
|
|
@@ -81,8 +120,24 @@ function sourcesToJsonLd(sources) {
|
|
|
81
120
|
if (s.status) doc['gl:sourceStatus'] = s.status;
|
|
82
121
|
if (s.origin) {
|
|
83
122
|
const origin = { '@type': 'gl:Citation' };
|
|
84
|
-
if (s.origin.ref)
|
|
85
|
-
|
|
123
|
+
if (s.origin.ref) {
|
|
124
|
+
const ref = s.origin.ref;
|
|
125
|
+
const refObj = { '@type': 'gl:Ref' };
|
|
126
|
+
if (ref.source) refObj['gl:source'] = ref.source;
|
|
127
|
+
if (ref.id) refObj['gl:id'] = ref.id;
|
|
128
|
+
if (ref.version) refObj['gl:version'] = ref.version;
|
|
129
|
+
origin['gl:ref'] = refObj;
|
|
130
|
+
}
|
|
131
|
+
if (s.origin.locality) {
|
|
132
|
+
const loc = s.origin.locality;
|
|
133
|
+
const locObj = {};
|
|
134
|
+
if (loc.type) locObj['gl:localityType'] = loc.type;
|
|
135
|
+
if (loc.reference_from) locObj['gl:referenceFrom'] = loc.reference_from;
|
|
136
|
+
if (loc.referenceFrom) locObj['gl:referenceFrom'] = loc.referenceFrom;
|
|
137
|
+
if (loc.reference_to) locObj['gl:referenceTo'] = loc.reference_to;
|
|
138
|
+
if (loc.referenceTo) locObj['gl:referenceTo'] = loc.referenceTo;
|
|
139
|
+
origin['gl:locality'] = locObj;
|
|
140
|
+
}
|
|
86
141
|
if (s.origin.link) origin['gl:link'] = s.origin.link;
|
|
87
142
|
doc['gl:origin'] = origin;
|
|
88
143
|
}
|
|
@@ -143,7 +198,7 @@ function buildRefMaps(config) {
|
|
|
143
198
|
if (xref.urnStandardMap) Object.assign(urnStandardMap, xref.urnStandardMap);
|
|
144
199
|
|
|
145
200
|
const uriBase = config.uriBase || `https://${config.domain}`;
|
|
146
|
-
return { refPrefixMap, urnStandardMap, uriBase };
|
|
201
|
+
return { refPrefixMap, urnStandardMap, uriBase, register: null };
|
|
147
202
|
}
|
|
148
203
|
|
|
149
204
|
function extractInlineRefs(localizedData, refMaps) {
|
|
@@ -178,6 +233,18 @@ function extractInlineRefs(localizedData, refMaps) {
|
|
|
178
233
|
if (datasetId) refs.push({ id: `${uriBase}/${datasetId}/concept/${m[2]}`, term: (m[4] || m[3]).trim() });
|
|
179
234
|
}
|
|
180
235
|
|
|
236
|
+
// Generic {{term, concept_id}} — same-dataset cross-reference (e.g. VIML)
|
|
237
|
+
const register = refMaps.register;
|
|
238
|
+
for (const m of fullText.matchAll(/\{\{([^,}]+),\s*([A-Za-z0-9.]+)\}\}/g)) {
|
|
239
|
+
const termName = m[1].trim();
|
|
240
|
+
const conceptId = m[2].trim();
|
|
241
|
+
// Skip if already matched by IEV or URN patterns
|
|
242
|
+
if (refPrefixMap && refPrefixMap[termName]) continue;
|
|
243
|
+
if (/^\d/.test(conceptId) || /^[A-Z]\.\d/.test(conceptId)) {
|
|
244
|
+
refs.push({ id: `${uriBase}/${register}/concept/${conceptId}`, term: termName });
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
181
248
|
const seen = new Set();
|
|
182
249
|
return refs.filter(r => {
|
|
183
250
|
if (seen.has(r.id)) return false;
|
|
@@ -210,6 +277,10 @@ function yamlToJsonLd(conceptYaml, register, refMaps) {
|
|
|
210
277
|
};
|
|
211
278
|
|
|
212
279
|
if (lc.entry_status) lDoc['gl:entryStatus'] = lc.entry_status;
|
|
280
|
+
if (lc.classification) lDoc['gl:classification'] = lc.classification;
|
|
281
|
+
if (lc.review_type) lDoc['gl:reviewType'] = lc.review_type;
|
|
282
|
+
if (lc.script) lDoc['gl:script'] = lc.script;
|
|
283
|
+
if (lc.system) lDoc['gl:system'] = lc.system;
|
|
213
284
|
if (lc.terms && lc.terms.length > 0) lDoc['gl:designation'] = lc.terms.map(termToDesignation);
|
|
214
285
|
if (lc.definition) lDoc['gl:definition'] = defsToJsonLd(lc.definition);
|
|
215
286
|
if (lc.notes && lc.notes.length > 0) lDoc['gl:notes'] = defsToJsonLd(lc.notes);
|
|
@@ -223,6 +294,7 @@ function yamlToJsonLd(conceptYaml, register, refMaps) {
|
|
|
223
294
|
if (lc.review_status) lDoc['gl:reviewStatus'] = lc.review_status;
|
|
224
295
|
if (lc.review_decision) lDoc['gl:reviewDecision'] = lc.review_decision;
|
|
225
296
|
if (lc.review_decision_notes) lDoc['gl:reviewDecisionNotes'] = lc.review_decision_notes;
|
|
297
|
+
if (lc.domain) lDoc['gl:domain'] = lc.domain;
|
|
226
298
|
if (lc.dates && lc.dates.length > 0) {
|
|
227
299
|
lDoc['gl:dates'] = lc.dates.map(d => ({
|
|
228
300
|
'gl:dateType': d.type,
|
|
@@ -245,6 +317,48 @@ function yamlToJsonLd(conceptYaml, register, refMaps) {
|
|
|
245
317
|
doc['gl:localizedConcept'] = localizations;
|
|
246
318
|
}
|
|
247
319
|
|
|
320
|
+
// Managed concept-level fields (v3)
|
|
321
|
+
if (conceptYaml._status) doc['gl:status'] = conceptYaml._status;
|
|
322
|
+
if (conceptYaml._schemaVersion) doc['gl:schemaVersion'] = conceptYaml._schemaVersion;
|
|
323
|
+
if (conceptYaml._dateAccepted) doc['gl:dateAccepted'] = conceptYaml._dateAccepted;
|
|
324
|
+
|
|
325
|
+
if (conceptYaml._dates && conceptYaml._dates.length > 0) {
|
|
326
|
+
doc['gl:dates'] = conceptYaml._dates.map(d => ({
|
|
327
|
+
'gl:dateType': d.type,
|
|
328
|
+
'gl:date': d.date,
|
|
329
|
+
}));
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if (conceptYaml._sources && conceptYaml._sources.length > 0) {
|
|
333
|
+
doc['gl:source'] = sourcesToJsonLd(conceptYaml._sources);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
if (conceptYaml._domains && conceptYaml._domains.length > 0) {
|
|
337
|
+
doc['gl:domain'] = conceptYaml._domains.map(d => {
|
|
338
|
+
const domain = { '@type': 'gl:ConceptReference' };
|
|
339
|
+
if (d.concept_id) domain['gl:conceptId'] = d.concept_id;
|
|
340
|
+
if (d.source) domain['gl:source'] = d.source;
|
|
341
|
+
if (d.urn) domain['gl:urn'] = d.urn;
|
|
342
|
+
if (d.ref_type) domain['gl:refType'] = d.ref_type;
|
|
343
|
+
return domain;
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (conceptYaml._related && conceptYaml._related.length > 0) {
|
|
348
|
+
doc['gl:related'] = conceptYaml._related.map(r => {
|
|
349
|
+
const rel = { '@type': 'gl:RelatedConcept' };
|
|
350
|
+
if (r.type) rel['gl:relationshipType'] = r.type;
|
|
351
|
+
if (r.content) rel['gl:content'] = r.content;
|
|
352
|
+
if (r.ref) {
|
|
353
|
+
const ref = {};
|
|
354
|
+
if (r.ref.source) ref['gl:source'] = r.ref.source;
|
|
355
|
+
if (r.ref.id) ref['gl:id'] = r.ref.id;
|
|
356
|
+
rel['gl:ref'] = ref;
|
|
357
|
+
}
|
|
358
|
+
return rel;
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
|
|
248
362
|
return doc;
|
|
249
363
|
}
|
|
250
364
|
|
|
@@ -352,7 +466,8 @@ function conceptJsonToSkosJsonLd(concept) {
|
|
|
352
466
|
}
|
|
353
467
|
|
|
354
468
|
function escapeXml(s) {
|
|
355
|
-
|
|
469
|
+
const str = Array.isArray(s) ? s.join(', ') : String(s ?? '');
|
|
470
|
+
return str.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"');
|
|
356
471
|
}
|
|
357
472
|
|
|
358
473
|
function conceptJsonToTbx(concept) {
|
|
@@ -378,15 +493,22 @@ function conceptJsonToTbx(concept) {
|
|
|
378
493
|
const status = d['gl:normativeStatus'] || '';
|
|
379
494
|
const type = d['@type'] || '';
|
|
380
495
|
let gramGrp = '';
|
|
381
|
-
if (d['gl:
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
496
|
+
if (d['gl:grammarInfo'] && d['gl:grammarInfo'].length > 0) {
|
|
497
|
+
const gi = d['gl:grammarInfo'][0];
|
|
498
|
+
if (gi['gl:gender']) gramGrp = `\n <grammaticalGender>${escapeXml(gi['gl:gender'])}</grammaticalGender>`;
|
|
499
|
+
if (gi['gl:number']) gramGrp += `\n <grammaticalNumber>${escapeXml(gi['gl:number'])}</grammaticalNumber>`;
|
|
500
|
+
for (const pos of ['noun', 'verb', 'adj', 'adverb', 'preposition', 'participle']) {
|
|
501
|
+
if (gi[`gl:${pos}`]) gramGrp += `\n <partOfSpeech>${pos}</partOfSpeech>`;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
let posBlock = '';
|
|
505
|
+
if (type.includes('Abbreviation')) posBlock = '\n <partOfSpeech>abbreviation</partOfSpeech>';
|
|
506
|
+
if (type.includes('Symbol')) posBlock = '\n <partOfSpeech>symbol</partOfSpeech>';
|
|
385
507
|
|
|
386
508
|
termEntries.push(` <termEntry>
|
|
387
509
|
<langSet xml:lang="${lang}">
|
|
388
510
|
<tig>
|
|
389
|
-
<term>${escapeXml(term)}</term>${gramGrp}${
|
|
511
|
+
<term>${escapeXml(term)}</term>${gramGrp}${posBlock}
|
|
390
512
|
</tig>
|
|
391
513
|
</langSet>
|
|
392
514
|
</termEntry>`);
|
|
@@ -410,10 +532,19 @@ function conceptJsonToTbx(concept) {
|
|
|
410
532
|
for (const src of sources) {
|
|
411
533
|
const origin = src['gl:origin'] || {};
|
|
412
534
|
const parts = [];
|
|
413
|
-
|
|
414
|
-
if (
|
|
415
|
-
|
|
416
|
-
|
|
535
|
+
const ref = origin['gl:ref'];
|
|
536
|
+
if (ref) {
|
|
537
|
+
const refParts = [];
|
|
538
|
+
if (ref['gl:source']) refParts.push(ref['gl:source']);
|
|
539
|
+
if (ref['gl:id']) refParts.push(ref['gl:id']);
|
|
540
|
+
parts.push(refParts.join(' ') || '');
|
|
541
|
+
}
|
|
542
|
+
if (origin['gl:locality']) {
|
|
543
|
+
const loc = origin['gl:locality'];
|
|
544
|
+
if (loc['gl:referenceFrom']) parts.push(loc['gl:localityType'] ? `${loc['gl:localityType']} ${loc['gl:referenceFrom']}` : loc['gl:referenceFrom']);
|
|
545
|
+
}
|
|
546
|
+
if (parts.filter(Boolean).length) {
|
|
547
|
+
sourceBlock += `\n <ref>${escapeXml(parts.filter(Boolean).join(', '))}</ref>`;
|
|
417
548
|
}
|
|
418
549
|
}
|
|
419
550
|
|
|
@@ -460,6 +591,7 @@ function processDataset(dir, register, opts) {
|
|
|
460
591
|
const langTermCounts = {};
|
|
461
592
|
const langDefCounts = {};
|
|
462
593
|
const availableFormats = ['ttl', 'jsonld', 'yaml', 'tbx'];
|
|
594
|
+
const dsRefMaps = { ...refMaps, register };
|
|
463
595
|
|
|
464
596
|
for (let i = 0; i < files.length; i++) {
|
|
465
597
|
const file = files[i];
|
|
@@ -468,7 +600,7 @@ function processDataset(dir, register, opts) {
|
|
|
468
600
|
if (!conceptYaml || !conceptYaml.termid) continue;
|
|
469
601
|
|
|
470
602
|
const termid = String(conceptYaml.termid);
|
|
471
|
-
const jsonld = yamlToJsonLd(conceptYaml, register,
|
|
603
|
+
const jsonld = yamlToJsonLd(conceptYaml, register, dsRefMaps);
|
|
472
604
|
writeJson(path.join(conceptsDir, `${termid}.json`), jsonld);
|
|
473
605
|
|
|
474
606
|
// Generate Turtle format
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Extract SKOS taxonomy data from concept-model TTL files into JSON
|
|
4
|
+
* for browser consumption via the OntologyRegistry.
|
|
5
|
+
*
|
|
6
|
+
* Reads from: ../concept-model/ontologies/taxonomies/*.ttl
|
|
7
|
+
* Writes to: src/data/taxonomies.json
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
|
|
11
|
+
import { resolve, dirname, join } from 'path';
|
|
12
|
+
import { fileURLToPath } from 'url';
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
const ROOT = resolve(__dirname, '..');
|
|
16
|
+
const TAXONOMY_DIR = resolve(ROOT, '..', 'concept-model', 'ontologies', 'taxonomies');
|
|
17
|
+
const OUTPUT = resolve(ROOT, 'src', 'data', 'taxonomies.json');
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Minimal Turtle parser for SKOS taxonomy files.
|
|
21
|
+
* Handles the specific patterns used in our taxonomy TTL files:
|
|
22
|
+
* - Subject blocks terminated by "."
|
|
23
|
+
* - Predicate-object pairs separated by ";"
|
|
24
|
+
* - Quoted strings with @lang tags
|
|
25
|
+
* - Comma-separated objects
|
|
26
|
+
*/
|
|
27
|
+
function parseTurtle(text) {
|
|
28
|
+
// Remove comments
|
|
29
|
+
const cleaned = text.replace(/#[^\n]*/g, '');
|
|
30
|
+
// Split into subject blocks (terminated by ".")
|
|
31
|
+
const subjectBlocks = splitSubjects(cleaned);
|
|
32
|
+
|
|
33
|
+
const concepts = {};
|
|
34
|
+
let scheme = null;
|
|
35
|
+
let schemeLabel = null;
|
|
36
|
+
let schemeDefinition = null;
|
|
37
|
+
|
|
38
|
+
for (const block of subjectBlocks) {
|
|
39
|
+
const trimmed = block.trim();
|
|
40
|
+
if (!trimmed) continue;
|
|
41
|
+
|
|
42
|
+
// Parse subject and type
|
|
43
|
+
const subjectMatch = trimmed.match(/^([^\s]+)\s+a\s+(.+?)(?:\s*[;.,]|$)/s);
|
|
44
|
+
if (!subjectMatch) continue;
|
|
45
|
+
|
|
46
|
+
const subject = subjectMatch[1];
|
|
47
|
+
const types = subjectMatch[2];
|
|
48
|
+
|
|
49
|
+
const isScheme = /\bskos:ConceptScheme\b/.test(types);
|
|
50
|
+
const isConcept = /\bskos:Concept\b/.test(types);
|
|
51
|
+
|
|
52
|
+
if (isScheme) {
|
|
53
|
+
scheme = subject;
|
|
54
|
+
schemeLabel = findLiteral(trimmed, 'skos:prefLabel');
|
|
55
|
+
schemeDefinition = findLiteral(trimmed, 'skos:definition');
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (isConcept && !isScheme) {
|
|
59
|
+
const id = subject.includes('/') ? subject.split('/').pop() : subject;
|
|
60
|
+
const prefLabel = findLiteral(trimmed, 'skos:prefLabel');
|
|
61
|
+
const altLabel = findLiteral(trimmed, 'skos:altLabel');
|
|
62
|
+
const definition = findLiteral(trimmed, 'skos:definition');
|
|
63
|
+
const broader = findResource(trimmed, 'skos:broader');
|
|
64
|
+
|
|
65
|
+
concepts[id] = {
|
|
66
|
+
id,
|
|
67
|
+
iri: subject,
|
|
68
|
+
prefLabel: prefLabel || id,
|
|
69
|
+
...(altLabel && { altLabel }),
|
|
70
|
+
...(definition && { definition }),
|
|
71
|
+
...(broader && { broader }),
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return { scheme, schemeLabel, schemeDefinition, concepts };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function splitSubjects(text) {
|
|
80
|
+
const blocks = [];
|
|
81
|
+
let depth = 0;
|
|
82
|
+
let start = -1;
|
|
83
|
+
|
|
84
|
+
for (let i = 0; i < text.length; i++) {
|
|
85
|
+
const ch = text[i];
|
|
86
|
+
|
|
87
|
+
if (ch === '"') {
|
|
88
|
+
// Skip quoted strings
|
|
89
|
+
i++;
|
|
90
|
+
while (i < text.length && text[i] !== '"') {
|
|
91
|
+
if (text[i] === '\\') i++;
|
|
92
|
+
i++;
|
|
93
|
+
}
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (ch === '[') depth++;
|
|
98
|
+
if (ch === ']') depth--;
|
|
99
|
+
if (ch === '(') depth++;
|
|
100
|
+
if (ch === ')') depth--;
|
|
101
|
+
|
|
102
|
+
if (depth === 0 && ch === '.' && start >= 0) {
|
|
103
|
+
blocks.push(text.slice(start, i));
|
|
104
|
+
start = -1;
|
|
105
|
+
} else if (start < 0 && ch === '\n') {
|
|
106
|
+
// Skip blank lines
|
|
107
|
+
const rest = text.slice(i).trimStart();
|
|
108
|
+
if (rest && !rest.startsWith('#')) {
|
|
109
|
+
start = i + 1;
|
|
110
|
+
}
|
|
111
|
+
} else if (start < 0 && /[^\s]/.test(ch)) {
|
|
112
|
+
start = i;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return blocks;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function findLiteral(block, predicate) {
|
|
120
|
+
// Match: predicate "value"@lang or predicate """value"""@lang
|
|
121
|
+
const tripleQuoted = new RegExp(predicate + '\\s+"""([^"]*?)"""@en');
|
|
122
|
+
let m = block.match(tripleQuoted);
|
|
123
|
+
if (m) return m[1].replace(/\s+/g, ' ').trim();
|
|
124
|
+
|
|
125
|
+
const singleQuoted = new RegExp(predicate + '\\s+"([^"]*?)"@en');
|
|
126
|
+
m = block.match(singleQuoted);
|
|
127
|
+
return m ? m[1].replace(/\s+/g, ' ').trim() : null;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function findResource(block, predicate) {
|
|
131
|
+
const re = new RegExp(predicate + '\\s+([^\\s,;]+)');
|
|
132
|
+
const m = block.match(re);
|
|
133
|
+
if (!m) return null;
|
|
134
|
+
const val = m[1].replace(/[;.]+$/, '');
|
|
135
|
+
return val.includes('/') ? val.split('/').pop() : val;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const TAXONOMY_MAP = {
|
|
139
|
+
'concept-status.ttl': 'conceptStatus',
|
|
140
|
+
'entry-status.ttl': 'entryStatus',
|
|
141
|
+
'normative-status.ttl': 'normativeStatus',
|
|
142
|
+
'source-type.ttl': 'sourceType',
|
|
143
|
+
'source-status.ttl': 'sourceStatus',
|
|
144
|
+
'relationship-type.ttl': 'relationshipType',
|
|
145
|
+
'designation-type.ttl': 'designationType',
|
|
146
|
+
'term-type.ttl': 'termType',
|
|
147
|
+
'grammar-gender.ttl': 'grammarGender',
|
|
148
|
+
'grammar-number.ttl': 'grammarNumber',
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
function main() {
|
|
152
|
+
if (!existsSync(TAXONOMY_DIR)) {
|
|
153
|
+
console.error(`Taxonomy directory not found: ${TAXONOMY_DIR}`);
|
|
154
|
+
console.error('Ensure concept-model is available at ../concept-model/');
|
|
155
|
+
process.exit(1);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const result = {};
|
|
159
|
+
|
|
160
|
+
for (const [filename, key] of Object.entries(TAXONOMY_MAP)) {
|
|
161
|
+
const filepath = join(TAXONOMY_DIR, filename);
|
|
162
|
+
if (!existsSync(filepath)) {
|
|
163
|
+
console.warn(` Skipping ${filename} (not found)`);
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
const text = readFileSync(filepath, 'utf-8');
|
|
167
|
+
const parsed = parseTurtle(text);
|
|
168
|
+
result[key] = {
|
|
169
|
+
scheme: parsed.scheme,
|
|
170
|
+
schemeLabel: parsed.schemeLabel,
|
|
171
|
+
schemeDefinition: parsed.schemeDefinition,
|
|
172
|
+
concepts: parsed.concepts,
|
|
173
|
+
};
|
|
174
|
+
const count = Object.keys(parsed.concepts).length;
|
|
175
|
+
console.log(` ${key}: ${count} concepts`);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
mkdirSync(dirname(OUTPUT), { recursive: true });
|
|
179
|
+
writeFileSync(OUTPUT, JSON.stringify(result, null, 2) + '\n');
|
|
180
|
+
console.log(`\nWrote ${OUTPUT}`);
|
|
181
|
+
console.log(`Total taxonomies: ${Object.keys(result).length}`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
main();
|