@glossarist/concept-browser 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +3 -2
  2. package/cli/index.mjs +2 -1
  3. package/env.d.ts +5 -0
  4. package/package.json +4 -3
  5. package/scripts/build-edges.js +78 -10
  6. package/scripts/generate-data.mjs +152 -20
  7. package/scripts/generate-ontology-data.mjs +184 -0
  8. package/scripts/generate-ontology-schema.mjs +315 -0
  9. package/src/__tests__/about-view.test.ts +98 -0
  10. package/src/__tests__/app-footer.test.ts +38 -0
  11. package/src/__tests__/app-header.test.ts +130 -0
  12. package/src/__tests__/app-sidebar.test.ts +159 -0
  13. package/src/__tests__/asciidoc-lite.test.ts +1 -1
  14. package/src/__tests__/concept-card.test.ts +115 -0
  15. package/src/__tests__/concept-detail-interaction.test.ts +273 -0
  16. package/src/__tests__/concept-formats.test.ts +32 -30
  17. package/src/__tests__/concept-timeline.test.ts +200 -0
  18. package/src/__tests__/concept-view.test.ts +88 -0
  19. package/src/__tests__/contributors-view.test.ts +103 -0
  20. package/src/__tests__/dataset-adapter.test.ts +172 -23
  21. package/src/__tests__/dataset-view.test.ts +232 -0
  22. package/src/__tests__/designation-registry.test.ts +161 -0
  23. package/src/__tests__/format-downloads.test.ts +98 -0
  24. package/src/__tests__/graph-view.test.ts +69 -0
  25. package/src/__tests__/graph.test.ts +62 -0
  26. package/src/__tests__/home-interaction.test.ts +157 -0
  27. package/src/__tests__/language-detail.test.ts +203 -0
  28. package/src/__tests__/nav-icon.test.ts +48 -0
  29. package/src/__tests__/news-view.test.ts +87 -0
  30. package/src/__tests__/ontology-registry.test.ts +109 -0
  31. package/src/__tests__/page-view.test.ts +83 -0
  32. package/src/__tests__/relationship-categories.test.ts +62 -0
  33. package/src/__tests__/resolve-view.test.ts +77 -0
  34. package/src/__tests__/router.test.ts +65 -0
  35. package/src/__tests__/search-bar.test.ts +219 -0
  36. package/src/__tests__/search-view.test.ts +41 -0
  37. package/src/__tests__/stats-view.test.ts +77 -0
  38. package/src/__tests__/test-helpers.ts +171 -0
  39. package/src/__tests__/ui-store.test.ts +100 -0
  40. package/src/__tests__/v-math.test.ts +8 -7
  41. package/src/adapters/DatasetAdapter.ts +188 -63
  42. package/src/adapters/model-bridge.ts +277 -0
  43. package/src/adapters/ontology-registry.ts +75 -0
  44. package/src/adapters/ontology-schema.ts +100 -0
  45. package/src/adapters/types.ts +53 -78
  46. package/src/components/AppSidebar.vue +1 -1
  47. package/src/components/CitationDisplay.vue +35 -0
  48. package/src/components/ConceptDetail.vue +349 -146
  49. package/src/components/ConceptRdfView.vue +397 -0
  50. package/src/components/ConceptTimeline.vue +57 -60
  51. package/src/components/GraphPanel.vue +96 -31
  52. package/src/components/LanguageDetail.vue +46 -61
  53. package/src/components/NavIcon.vue +1 -0
  54. package/src/components/NonVerbalRepDisplay.vue +38 -0
  55. package/src/components/RelationshipList.vue +99 -0
  56. package/src/composables/use-render-options.ts +1 -4
  57. package/src/config/use-site-config.ts +3 -0
  58. package/src/data/ontology-schema.json +1551 -0
  59. package/src/data/taxonomies.json +543 -0
  60. package/src/graph/GraphEngine.ts +7 -4
  61. package/src/router/index.ts +6 -1
  62. package/src/shims/empty.ts +1 -0
  63. package/src/shims/node-crypto.ts +6 -0
  64. package/src/shims/node-path.ts +10 -0
  65. package/src/stores/vocabulary.ts +82 -32
  66. package/src/style.css +74 -20
  67. package/src/utils/asciidoc-lite.ts +17 -19
  68. package/src/utils/concept-formats.ts +22 -20
  69. package/src/utils/concept-helpers.ts +54 -0
  70. package/src/utils/designation-registry.ts +124 -0
  71. package/src/utils/escape.ts +7 -0
  72. package/src/utils/markdown-lite.ts +1 -3
  73. package/src/utils/math.ts +2 -11
  74. package/src/utils/plurimath.ts +2 -7
  75. package/src/utils/relationship-categories.ts +84 -0
  76. package/src/views/ConceptView.vue +22 -1
  77. package/src/views/DatasetView.vue +7 -2
  78. package/src/views/OntologySchemaView.vue +302 -0
  79. package/src/views/PageView.vue +28 -17
  80. package/src/views/StatsView.vue +34 -12
  81. package/vite.config.ts +8 -0
package/README.md CHANGED
@@ -57,9 +57,10 @@ datasets.yml
57
57
  └─> public/data/{id}/
58
58
  ├── manifest.json Dataset metadata
59
59
  ├── index.json Concept listing (chunked for large sets)
60
- ├── edges.json Pre-computed cross-references
60
+ ├── edges.json Pre-computed cross-reference + domain edges
61
+ ├── domain-nodes.json Domain classification nodes
61
62
  └── concepts/*.json Individual concept documents
62
- └─> scripts/build-edges.js (extract graph edges)
63
+ └─> scripts/build-edges.js (extract graph + domain edges)
63
64
  ```
64
65
 
65
66
  ### Step-by-step
package/cli/index.mjs CHANGED
@@ -100,7 +100,8 @@ Environment:
100
100
  // Run vite build using the package's vite.config.ts
101
101
  console.log(`\n=== BUILD SPA ===\n`);
102
102
  const viteConfig = resolve(pkgRoot, 'vite.config.ts');
103
- execSync(`npx vite build --config ${viteConfig}`, {
103
+ const viteBin = resolve(pkgRoot, 'node_modules', '.bin', 'vite');
104
+ execSync(`${viteBin} build --config ${viteConfig}`, {
104
105
  stdio: 'inherit',
105
106
  env: { ...process.env },
106
107
  });
package/env.d.ts CHANGED
@@ -1,5 +1,10 @@
1
1
  /// <reference types="vite/client" />
2
2
 
3
+ declare module '*.json' {
4
+ const value: any;
5
+ export default value;
6
+ }
7
+
3
8
  declare module '*.vue' {
4
9
  import type { DefineComponent } from 'vue'
5
10
  const component: DefineComponent<{}, {}, any>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@glossarist/concept-browser",
3
- "version": "0.3.4",
3
+ "version": "0.4.0",
4
4
  "description": "Vue SPA for browsing Glossarist terminology datasets with cross-reference resolution, graph visualization, and multi-language support",
5
5
  "type": "module",
6
6
  "bin": {
@@ -13,7 +13,8 @@
13
13
  "preview": "vite preview",
14
14
  "fetch-datasets": "node scripts/fetch-datasets.mjs",
15
15
  "generate-data": "node scripts/generate-data.mjs",
16
- "build:full": "npm run fetch-datasets && npm run generate-data && node scripts/build-edges.js && npm run build",
16
+ "generate-ontology": "node scripts/generate-ontology-data.mjs && node scripts/generate-ontology-schema.mjs",
17
+ "build:full": "npm run generate-ontology && npm run fetch-datasets && npm run generate-data && node scripts/build-edges.js && npm run build",
17
18
  "build:site": "concept-browser --site build",
18
19
  "test": "vitest run",
19
20
  "test:watch": "vitest"
@@ -23,7 +24,7 @@
23
24
  "@vitejs/plugin-vue": "^5.2.3",
24
25
  "autoprefixer": "^10.4.21",
25
26
  "d3": "^7.9.0",
26
- "glossarist": "^0.2.0",
27
+ "glossarist": "^0.3.0",
27
28
  "js-yaml": "^4.1.0",
28
29
  "pinia": "^2.3.1",
29
30
  "postcss": "^8.5.3",
@@ -1,7 +1,7 @@
1
1
  /**
2
- * Pre-computes cross-reference edges for each dataset.
3
- * Reads all concept JSON files, extracts structured and inline references,
4
- * and writes edges.json for each dataset.
2
+ * Pre-computes cross-reference and domain edges for each dataset.
3
+ * Reads all concept JSON files, extracts structured references and
4
+ * authoritative sources (domains), and writes edges.json + domain-nodes.json.
5
5
  *
6
6
  * Usage: node scripts/build-edges.js
7
7
  */
@@ -13,12 +13,18 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
13
13
  const ROOT = process.cwd();
14
14
  const DATA_DIR = join(ROOT, 'public', 'data');
15
15
 
16
- function extractEdgesFromConcept(concept, registerId) {
16
+ // --- Normalization ---
17
+
18
+ function slugify(text) {
19
+ return text.toLowerCase().replace(/[^\w\s-]/g, '').replace(/[\s/]+/g, '-');
20
+ }
21
+
22
+ // --- Extractors (open/closed: add new extractors to EXTRACTORS array) ---
23
+
24
+ function extractReferences(concept, registerId) {
17
25
  const edges = [];
18
26
  const sourceUri = concept['@id'];
19
-
20
- for (const [_lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) {
21
- // Structured cross-references (gl:references array, pre-computed during data generation)
27
+ for (const [lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) {
22
28
  if (lc['gl:references']) {
23
29
  for (const ref of lc['gl:references']) {
24
30
  if (ref['@id'] && ref['@id'] !== sourceUri) {
@@ -28,15 +34,45 @@ function extractEdgesFromConcept(concept, registerId) {
28
34
  type: 'references',
29
35
  label: ref['gl:term'] || undefined,
30
36
  register: registerId,
37
+ lang,
31
38
  });
32
39
  }
33
40
  }
34
41
  }
35
42
  }
43
+ return edges;
44
+ }
36
45
 
46
+ function extractDomains(concept, registerId) {
47
+ const edges = [];
48
+ const sourceUri = concept['@id'];
49
+ const lcs = concept['gl:localizedConcept'] || {};
50
+ const langs = Object.keys(lcs);
51
+ const seen = new Set();
52
+ for (const lang of langs) {
53
+ const domain = lcs[lang]['gl:domain'];
54
+ if (domain && !seen.has(domain)) {
55
+ seen.add(domain);
56
+ edges.push({
57
+ source: sourceUri,
58
+ target: `https://glossarist.org/${registerId}/domain/${slugify(domain)}`,
59
+ type: 'domain',
60
+ label: domain,
61
+ register: registerId,
62
+ });
63
+ }
64
+ }
37
65
  return edges;
38
66
  }
39
67
 
68
+ const EXTRACTORS = [extractReferences, extractDomains];
69
+
70
+ function extractAllEdges(concept, registerId) {
71
+ return EXTRACTORS.flatMap(fn => fn(concept, registerId));
72
+ }
73
+
74
+ // --- Build ---
75
+
40
76
  function buildEdgesForDataset(datasetDir, registerId) {
41
77
  const conceptsDir = join(datasetDir, 'concepts');
42
78
  if (!existsSync(conceptsDir)) {
@@ -48,13 +84,20 @@ function buildEdgesForDataset(datasetDir, registerId) {
48
84
  console.log(` Processing ${files.length} concepts...`);
49
85
 
50
86
  const allEdges = [];
87
+ const domainConceptCount = new Map();
51
88
  let processed = 0;
52
89
 
53
90
  for (const file of files) {
54
91
  try {
55
92
  const data = JSON.parse(readFileSync(join(conceptsDir, file), 'utf-8'));
56
- const edges = extractEdgesFromConcept(data, registerId);
93
+ const edges = extractAllEdges(data, registerId);
57
94
  allEdges.push(...edges);
95
+
96
+ for (const edge of edges) {
97
+ if (edge.type === 'domain') {
98
+ domainConceptCount.set(edge.target, (domainConceptCount.get(edge.target) || 0) + 1);
99
+ }
100
+ }
58
101
  } catch (e) {
59
102
  console.error(` Error processing ${file}: ${e.message}`);
60
103
  }
@@ -64,11 +107,11 @@ function buildEdgesForDataset(datasetDir, registerId) {
64
107
  }
65
108
  }
66
109
 
67
- // Deduplicate edges by source+target pair
110
+ // Deduplicate edges by source+target+type+lang
68
111
  const seen = new Set();
69
112
  const deduped = [];
70
113
  for (const edge of allEdges) {
71
- const key = `${edge.source}→${edge.target}`;
114
+ const key = `${edge.source}→${edge.target}→${edge.type}→${edge.lang || ''}`;
72
115
  if (!seen.has(key)) {
73
116
  seen.add(key);
74
117
  deduped.push(edge);
@@ -84,6 +127,31 @@ function buildEdgesForDataset(datasetDir, registerId) {
84
127
  const outputPath = join(datasetDir, 'edges.json');
85
128
  writeFileSync(outputPath, JSON.stringify(output, null, 2));
86
129
  console.log(` Written ${deduped.length} edges to edges.json (${(JSON.stringify(output).length / 1024).toFixed(1)} KB)`);
130
+
131
+ // Build domain-nodes.json
132
+ const domainEdgeMap = new Map();
133
+ for (const edge of deduped) {
134
+ if (edge.type === 'domain') {
135
+ const existing = domainEdgeMap.get(edge.target);
136
+ if (existing) {
137
+ existing.labels.add(edge.label);
138
+ } else {
139
+ domainEdgeMap.set(edge.target, { uri: edge.target, labels: new Set([edge.label]), registerId });
140
+ }
141
+ }
142
+ }
143
+
144
+ const domainNodes = [...domainEdgeMap.values()].map(d => ({
145
+ uri: d.uri,
146
+ label: [...d.labels][0],
147
+ registerId: d.registerId,
148
+ conceptCount: domainConceptCount.get(d.uri) || 0,
149
+ })).sort((a, b) => b.conceptCount - a.conceptCount);
150
+
151
+ const domainOutput = { registerId, domainNodes };
152
+ const domainPath = join(datasetDir, 'domain-nodes.json');
153
+ writeFileSync(domainPath, JSON.stringify(domainOutput, null, 2));
154
+ console.log(` Written ${domainNodes.length} domain nodes to domain-nodes.json`);
87
155
  }
88
156
 
89
157
  // Main
@@ -30,6 +30,16 @@ function loadConceptFile(filePath) {
30
30
  if (docs.length >= 1 && docs[0].data && docs[0].data.identifier !== undefined) {
31
31
  const mc = docs[0];
32
32
  const result = { termid: String(mc.data.identifier) };
33
+
34
+ // Managed concept-level fields
35
+ if (mc.related) result._related = mc.related;
36
+ if (mc.data.domains) result._domains = mc.data.domains;
37
+ if (mc.dates) result._dates = mc.dates;
38
+ if (mc.sources) result._sources = mc.sources;
39
+ if (mc.status) result._status = mc.status;
40
+ if (mc.schema_version) result._schemaVersion = mc.schema_version;
41
+ if (mc.date_accepted) result._dateAccepted = mc.date_accepted;
42
+
33
43
  for (const doc of docs.slice(1)) {
34
44
  if (!doc || !doc.data || !doc.data.language_code) continue;
35
45
  const lang = doc.data.language_code;
@@ -49,17 +59,46 @@ function writeJson(filePath, data) {
49
59
  }
50
60
 
51
61
  function termToDesignation(term) {
62
+ const typeMap = {
63
+ expression: 'gl:Expression',
64
+ abbreviation: 'gl:Abbreviation',
65
+ symbol: 'gl:Symbol',
66
+ letter_symbol: 'gl:LetterSymbol',
67
+ 'graphical symbol': 'gl:GraphicalSymbol',
68
+ };
52
69
  const doc = {
53
- '@type': term.type === 'expression' ? 'gl:Expression'
54
- : term.type === 'symbol' ? 'gl:Symbol'
55
- : term.type === 'abbreviation' ? 'gl:Abbreviation'
56
- : 'gl:Designation',
70
+ '@type': typeMap[term.type] || 'gl:Designation',
57
71
  'gl:normativeStatus': term.normative_status || 'preferred',
58
72
  'gl:term': term.designation,
59
73
  };
60
- if (term.gender) doc['gl:gender'] = term.gender;
61
- if (term.plurality) doc['gl:plurality'] = term.plurality;
74
+
75
+ if (term.grammar_info && term.grammar_info.length > 0) {
76
+ doc['gl:grammarInfo'] = term.grammar_info.map(gi => {
77
+ const g = {};
78
+ if (gi.gender) g['gl:gender'] = gi.gender;
79
+ if (gi.number) g['gl:number'] = gi.number;
80
+ for (const pos of ['noun', 'verb', 'adj', 'adverb', 'preposition', 'participle']) {
81
+ if (gi[pos]) g[`gl:${pos}`] = gi[pos];
82
+ }
83
+ return g;
84
+ });
85
+ }
86
+
62
87
  if (term.international !== undefined) doc['gl:international'] = term.international;
88
+ if (term.absent !== undefined) doc['gl:absent'] = term.absent;
89
+ if (term.geographical_area) doc['gl:geographicalArea'] = term.geographical_area;
90
+ if (term.term_type) doc['gl:termType'] = term.term_type;
91
+ if (term.prefix) doc['gl:prefix'] = term.prefix;
92
+ if (term.usage_info) doc['gl:usageInfo'] = term.usage_info;
93
+ if (term.field_of_application) doc['gl:fieldOfApplication'] = term.field_of_application;
94
+
95
+ if (term.acronym !== undefined) doc['gl:acronym'] = term.acronym;
96
+ if (term.initialism !== undefined) doc['gl:initialism'] = term.initialism;
97
+ if (term.truncation !== undefined) doc['gl:truncation'] = term.truncation;
98
+
99
+ if (term.text) doc['gl:text'] = term.text;
100
+ if (term.image) doc['gl:image'] = term.image;
101
+
63
102
  return doc;
64
103
  }
65
104
 
@@ -81,8 +120,24 @@ function sourcesToJsonLd(sources) {
81
120
  if (s.status) doc['gl:sourceStatus'] = s.status;
82
121
  if (s.origin) {
83
122
  const origin = { '@type': 'gl:Citation' };
84
- if (s.origin.ref) origin['gl:ref'] = s.origin.ref;
85
- if (s.origin.clause) origin['gl:clause'] = s.origin.clause;
123
+ if (s.origin.ref) {
124
+ const ref = s.origin.ref;
125
+ const refObj = { '@type': 'gl:Ref' };
126
+ if (ref.source) refObj['gl:source'] = ref.source;
127
+ if (ref.id) refObj['gl:id'] = ref.id;
128
+ if (ref.version) refObj['gl:version'] = ref.version;
129
+ origin['gl:ref'] = refObj;
130
+ }
131
+ if (s.origin.locality) {
132
+ const loc = s.origin.locality;
133
+ const locObj = {};
134
+ if (loc.type) locObj['gl:localityType'] = loc.type;
135
+ if (loc.reference_from) locObj['gl:referenceFrom'] = loc.reference_from;
136
+ if (loc.referenceFrom) locObj['gl:referenceFrom'] = loc.referenceFrom;
137
+ if (loc.reference_to) locObj['gl:referenceTo'] = loc.reference_to;
138
+ if (loc.referenceTo) locObj['gl:referenceTo'] = loc.referenceTo;
139
+ origin['gl:locality'] = locObj;
140
+ }
86
141
  if (s.origin.link) origin['gl:link'] = s.origin.link;
87
142
  doc['gl:origin'] = origin;
88
143
  }
@@ -143,7 +198,7 @@ function buildRefMaps(config) {
143
198
  if (xref.urnStandardMap) Object.assign(urnStandardMap, xref.urnStandardMap);
144
199
 
145
200
  const uriBase = config.uriBase || `https://${config.domain}`;
146
- return { refPrefixMap, urnStandardMap, uriBase };
201
+ return { refPrefixMap, urnStandardMap, uriBase, register: null };
147
202
  }
148
203
 
149
204
  function extractInlineRefs(localizedData, refMaps) {
@@ -178,6 +233,18 @@ function extractInlineRefs(localizedData, refMaps) {
178
233
  if (datasetId) refs.push({ id: `${uriBase}/${datasetId}/concept/${m[2]}`, term: (m[4] || m[3]).trim() });
179
234
  }
180
235
 
236
+ // Generic {{term, concept_id}} — same-dataset cross-reference (e.g. VIML)
237
+ const register = refMaps.register;
238
+ for (const m of fullText.matchAll(/\{\{([^,}]+),\s*([A-Za-z0-9.]+)\}\}/g)) {
239
+ const termName = m[1].trim();
240
+ const conceptId = m[2].trim();
241
+ // Skip if already matched by IEV or URN patterns
242
+ if (refPrefixMap && refPrefixMap[termName]) continue;
243
+ if (/^\d/.test(conceptId) || /^[A-Z]\.\d/.test(conceptId)) {
244
+ refs.push({ id: `${uriBase}/${register}/concept/${conceptId}`, term: termName });
245
+ }
246
+ }
247
+
181
248
  const seen = new Set();
182
249
  return refs.filter(r => {
183
250
  if (seen.has(r.id)) return false;
@@ -210,6 +277,10 @@ function yamlToJsonLd(conceptYaml, register, refMaps) {
210
277
  };
211
278
 
212
279
  if (lc.entry_status) lDoc['gl:entryStatus'] = lc.entry_status;
280
+ if (lc.classification) lDoc['gl:classification'] = lc.classification;
281
+ if (lc.review_type) lDoc['gl:reviewType'] = lc.review_type;
282
+ if (lc.script) lDoc['gl:script'] = lc.script;
283
+ if (lc.system) lDoc['gl:system'] = lc.system;
213
284
  if (lc.terms && lc.terms.length > 0) lDoc['gl:designation'] = lc.terms.map(termToDesignation);
214
285
  if (lc.definition) lDoc['gl:definition'] = defsToJsonLd(lc.definition);
215
286
  if (lc.notes && lc.notes.length > 0) lDoc['gl:notes'] = defsToJsonLd(lc.notes);
@@ -223,6 +294,7 @@ function yamlToJsonLd(conceptYaml, register, refMaps) {
223
294
  if (lc.review_status) lDoc['gl:reviewStatus'] = lc.review_status;
224
295
  if (lc.review_decision) lDoc['gl:reviewDecision'] = lc.review_decision;
225
296
  if (lc.review_decision_notes) lDoc['gl:reviewDecisionNotes'] = lc.review_decision_notes;
297
+ if (lc.domain) lDoc['gl:domain'] = lc.domain;
226
298
  if (lc.dates && lc.dates.length > 0) {
227
299
  lDoc['gl:dates'] = lc.dates.map(d => ({
228
300
  'gl:dateType': d.type,
@@ -245,6 +317,48 @@ function yamlToJsonLd(conceptYaml, register, refMaps) {
245
317
  doc['gl:localizedConcept'] = localizations;
246
318
  }
247
319
 
320
+ // Managed concept-level fields (v3)
321
+ if (conceptYaml._status) doc['gl:status'] = conceptYaml._status;
322
+ if (conceptYaml._schemaVersion) doc['gl:schemaVersion'] = conceptYaml._schemaVersion;
323
+ if (conceptYaml._dateAccepted) doc['gl:dateAccepted'] = conceptYaml._dateAccepted;
324
+
325
+ if (conceptYaml._dates && conceptYaml._dates.length > 0) {
326
+ doc['gl:dates'] = conceptYaml._dates.map(d => ({
327
+ 'gl:dateType': d.type,
328
+ 'gl:date': d.date,
329
+ }));
330
+ }
331
+
332
+ if (conceptYaml._sources && conceptYaml._sources.length > 0) {
333
+ doc['gl:source'] = sourcesToJsonLd(conceptYaml._sources);
334
+ }
335
+
336
+ if (conceptYaml._domains && conceptYaml._domains.length > 0) {
337
+ doc['gl:domain'] = conceptYaml._domains.map(d => {
338
+ const domain = { '@type': 'gl:ConceptReference' };
339
+ if (d.concept_id) domain['gl:conceptId'] = d.concept_id;
340
+ if (d.source) domain['gl:source'] = d.source;
341
+ if (d.urn) domain['gl:urn'] = d.urn;
342
+ if (d.ref_type) domain['gl:refType'] = d.ref_type;
343
+ return domain;
344
+ });
345
+ }
346
+
347
+ if (conceptYaml._related && conceptYaml._related.length > 0) {
348
+ doc['gl:related'] = conceptYaml._related.map(r => {
349
+ const rel = { '@type': 'gl:RelatedConcept' };
350
+ if (r.type) rel['gl:relationshipType'] = r.type;
351
+ if (r.content) rel['gl:content'] = r.content;
352
+ if (r.ref) {
353
+ const ref = {};
354
+ if (r.ref.source) ref['gl:source'] = r.ref.source;
355
+ if (r.ref.id) ref['gl:id'] = r.ref.id;
356
+ rel['gl:ref'] = ref;
357
+ }
358
+ return rel;
359
+ });
360
+ }
361
+
248
362
  return doc;
249
363
  }
250
364
 
@@ -352,7 +466,8 @@ function conceptJsonToSkosJsonLd(concept) {
352
466
  }
353
467
 
354
468
  function escapeXml(s) {
355
- return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
469
+ const str = Array.isArray(s) ? s.join(', ') : String(s ?? '');
470
+ return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
356
471
  }
357
472
 
358
473
  function conceptJsonToTbx(concept) {
@@ -378,15 +493,22 @@ function conceptJsonToTbx(concept) {
378
493
  const status = d['gl:normativeStatus'] || '';
379
494
  const type = d['@type'] || '';
380
495
  let gramGrp = '';
381
- if (d['gl:gender']) gramGrp = `\n <grammaticalGender>${escapeXml(d['gl:gender'])}</grammaticalGender>`;
382
- let partOfSpeech = '';
383
- if (type.includes('Abbreviation')) partOfSpeech = '\n <partOfSpeech>abbreviation</partOfSpeech>';
384
- if (type.includes('Symbol')) partOfSpeech = '\n <partOfSpeech>symbol</partOfSpeech>';
496
+ if (d['gl:grammarInfo'] && d['gl:grammarInfo'].length > 0) {
497
+ const gi = d['gl:grammarInfo'][0];
498
+ if (gi['gl:gender']) gramGrp = `\n <grammaticalGender>${escapeXml(gi['gl:gender'])}</grammaticalGender>`;
499
+ if (gi['gl:number']) gramGrp += `\n <grammaticalNumber>${escapeXml(gi['gl:number'])}</grammaticalNumber>`;
500
+ for (const pos of ['noun', 'verb', 'adj', 'adverb', 'preposition', 'participle']) {
501
+ if (gi[`gl:${pos}`]) gramGrp += `\n <partOfSpeech>${pos}</partOfSpeech>`;
502
+ }
503
+ }
504
+ let posBlock = '';
505
+ if (type.includes('Abbreviation')) posBlock = '\n <partOfSpeech>abbreviation</partOfSpeech>';
506
+ if (type.includes('Symbol')) posBlock = '\n <partOfSpeech>symbol</partOfSpeech>';
385
507
 
386
508
  termEntries.push(` <termEntry>
387
509
  <langSet xml:lang="${lang}">
388
510
  <tig>
389
- <term>${escapeXml(term)}</term>${gramGrp}${partOfSpeech}
511
+ <term>${escapeXml(term)}</term>${gramGrp}${posBlock}
390
512
  </tig>
391
513
  </langSet>
392
514
  </termEntry>`);
@@ -410,10 +532,19 @@ function conceptJsonToTbx(concept) {
410
532
  for (const src of sources) {
411
533
  const origin = src['gl:origin'] || {};
412
534
  const parts = [];
413
- if (origin['gl:ref']) parts.push(origin['gl:ref']);
414
- if (origin['gl:clause']) parts.push(origin['gl:clause']);
415
- if (parts.length) {
416
- sourceBlock += `\n <ref>${escapeXml(parts.join(', '))}</ref>`;
535
+ const ref = origin['gl:ref'];
536
+ if (ref) {
537
+ const refParts = [];
538
+ if (ref['gl:source']) refParts.push(ref['gl:source']);
539
+ if (ref['gl:id']) refParts.push(ref['gl:id']);
540
+ parts.push(refParts.join(' ') || '');
541
+ }
542
+ if (origin['gl:locality']) {
543
+ const loc = origin['gl:locality'];
544
+ if (loc['gl:referenceFrom']) parts.push(loc['gl:localityType'] ? `${loc['gl:localityType']} ${loc['gl:referenceFrom']}` : loc['gl:referenceFrom']);
545
+ }
546
+ if (parts.filter(Boolean).length) {
547
+ sourceBlock += `\n <ref>${escapeXml(parts.filter(Boolean).join(', '))}</ref>`;
417
548
  }
418
549
  }
419
550
 
@@ -460,6 +591,7 @@ function processDataset(dir, register, opts) {
460
591
  const langTermCounts = {};
461
592
  const langDefCounts = {};
462
593
  const availableFormats = ['ttl', 'jsonld', 'yaml', 'tbx'];
594
+ const dsRefMaps = { ...refMaps, register };
463
595
 
464
596
  for (let i = 0; i < files.length; i++) {
465
597
  const file = files[i];
@@ -468,7 +600,7 @@ function processDataset(dir, register, opts) {
468
600
  if (!conceptYaml || !conceptYaml.termid) continue;
469
601
 
470
602
  const termid = String(conceptYaml.termid);
471
- const jsonld = yamlToJsonLd(conceptYaml, register, refMaps);
603
+ const jsonld = yamlToJsonLd(conceptYaml, register, dsRefMaps);
472
604
  writeJson(path.join(conceptsDir, `${termid}.json`), jsonld);
473
605
 
474
606
  // Generate Turtle format
@@ -0,0 +1,184 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Extract SKOS taxonomy data from concept-model TTL files into JSON
4
+ * for browser consumption via the OntologyRegistry.
5
+ *
6
+ * Reads from: ../concept-model/ontologies/taxonomies/*.ttl
7
+ * Writes to: src/data/taxonomies.json
8
+ */
9
+
10
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
11
+ import { resolve, dirname, join } from 'path';
12
+ import { fileURLToPath } from 'url';
13
+
14
+ const __dirname = dirname(fileURLToPath(import.meta.url));
15
+ const ROOT = resolve(__dirname, '..');
16
+ const TAXONOMY_DIR = resolve(ROOT, '..', 'concept-model', 'ontologies', 'taxonomies');
17
+ const OUTPUT = resolve(ROOT, 'src', 'data', 'taxonomies.json');
18
+
19
+ /**
20
+ * Minimal Turtle parser for SKOS taxonomy files.
21
+ * Handles the specific patterns used in our taxonomy TTL files:
22
+ * - Subject blocks terminated by "."
23
+ * - Predicate-object pairs separated by ";"
24
+ * - Quoted strings with @lang tags
25
+ * - Comma-separated objects
26
+ */
27
+ function parseTurtle(text) {
28
+ // Remove comments
29
+ const cleaned = text.replace(/#[^\n]*/g, '');
30
+ // Split into subject blocks (terminated by ".")
31
+ const subjectBlocks = splitSubjects(cleaned);
32
+
33
+ const concepts = {};
34
+ let scheme = null;
35
+ let schemeLabel = null;
36
+ let schemeDefinition = null;
37
+
38
+ for (const block of subjectBlocks) {
39
+ const trimmed = block.trim();
40
+ if (!trimmed) continue;
41
+
42
+ // Parse subject and type
43
+ const subjectMatch = trimmed.match(/^([^\s]+)\s+a\s+(.+?)(?:\s*[;.,]|$)/s);
44
+ if (!subjectMatch) continue;
45
+
46
+ const subject = subjectMatch[1];
47
+ const types = subjectMatch[2];
48
+
49
+ const isScheme = /\bskos:ConceptScheme\b/.test(types);
50
+ const isConcept = /\bskos:Concept\b/.test(types);
51
+
52
+ if (isScheme) {
53
+ scheme = subject;
54
+ schemeLabel = findLiteral(trimmed, 'skos:prefLabel');
55
+ schemeDefinition = findLiteral(trimmed, 'skos:definition');
56
+ }
57
+
58
+ if (isConcept && !isScheme) {
59
+ const id = subject.includes('/') ? subject.split('/').pop() : subject;
60
+ const prefLabel = findLiteral(trimmed, 'skos:prefLabel');
61
+ const altLabel = findLiteral(trimmed, 'skos:altLabel');
62
+ const definition = findLiteral(trimmed, 'skos:definition');
63
+ const broader = findResource(trimmed, 'skos:broader');
64
+
65
+ concepts[id] = {
66
+ id,
67
+ iri: subject,
68
+ prefLabel: prefLabel || id,
69
+ ...(altLabel && { altLabel }),
70
+ ...(definition && { definition }),
71
+ ...(broader && { broader }),
72
+ };
73
+ }
74
+ }
75
+
76
+ return { scheme, schemeLabel, schemeDefinition, concepts };
77
+ }
78
+
79
+ function splitSubjects(text) {
80
+ const blocks = [];
81
+ let depth = 0;
82
+ let start = -1;
83
+
84
+ for (let i = 0; i < text.length; i++) {
85
+ const ch = text[i];
86
+
87
+ if (ch === '"') {
88
+ // Skip quoted strings
89
+ i++;
90
+ while (i < text.length && text[i] !== '"') {
91
+ if (text[i] === '\\') i++;
92
+ i++;
93
+ }
94
+ continue;
95
+ }
96
+
97
+ if (ch === '[') depth++;
98
+ if (ch === ']') depth--;
99
+ if (ch === '(') depth++;
100
+ if (ch === ')') depth--;
101
+
102
+ if (depth === 0 && ch === '.' && start >= 0) {
103
+ blocks.push(text.slice(start, i));
104
+ start = -1;
105
+ } else if (start < 0 && ch === '\n') {
106
+ // Skip blank lines
107
+ const rest = text.slice(i).trimStart();
108
+ if (rest && !rest.startsWith('#')) {
109
+ start = i + 1;
110
+ }
111
+ } else if (start < 0 && /[^\s]/.test(ch)) {
112
+ start = i;
113
+ }
114
+ }
115
+
116
+ return blocks;
117
+ }
118
+
119
+ function findLiteral(block, predicate) {
120
+ // Match: predicate "value"@lang or predicate """value"""@lang
121
+ const tripleQuoted = new RegExp(predicate + '\\s+"""([^"]*?)"""@en');
122
+ let m = block.match(tripleQuoted);
123
+ if (m) return m[1].replace(/\s+/g, ' ').trim();
124
+
125
+ const singleQuoted = new RegExp(predicate + '\\s+"([^"]*?)"@en');
126
+ m = block.match(singleQuoted);
127
+ return m ? m[1].replace(/\s+/g, ' ').trim() : null;
128
+ }
129
+
130
+ function findResource(block, predicate) {
131
+ const re = new RegExp(predicate + '\\s+([^\\s,;]+)');
132
+ const m = block.match(re);
133
+ if (!m) return null;
134
+ const val = m[1].replace(/[;.]+$/, '');
135
+ return val.includes('/') ? val.split('/').pop() : val;
136
+ }
137
+
138
+ const TAXONOMY_MAP = {
139
+ 'concept-status.ttl': 'conceptStatus',
140
+ 'entry-status.ttl': 'entryStatus',
141
+ 'normative-status.ttl': 'normativeStatus',
142
+ 'source-type.ttl': 'sourceType',
143
+ 'source-status.ttl': 'sourceStatus',
144
+ 'relationship-type.ttl': 'relationshipType',
145
+ 'designation-type.ttl': 'designationType',
146
+ 'term-type.ttl': 'termType',
147
+ 'grammar-gender.ttl': 'grammarGender',
148
+ 'grammar-number.ttl': 'grammarNumber',
149
+ };
150
+
151
+ function main() {
152
+ if (!existsSync(TAXONOMY_DIR)) {
153
+ console.error(`Taxonomy directory not found: ${TAXONOMY_DIR}`);
154
+ console.error('Ensure concept-model is available at ../concept-model/');
155
+ process.exit(1);
156
+ }
157
+
158
+ const result = {};
159
+
160
+ for (const [filename, key] of Object.entries(TAXONOMY_MAP)) {
161
+ const filepath = join(TAXONOMY_DIR, filename);
162
+ if (!existsSync(filepath)) {
163
+ console.warn(` Skipping ${filename} (not found)`);
164
+ continue;
165
+ }
166
+ const text = readFileSync(filepath, 'utf-8');
167
+ const parsed = parseTurtle(text);
168
+ result[key] = {
169
+ scheme: parsed.scheme,
170
+ schemeLabel: parsed.schemeLabel,
171
+ schemeDefinition: parsed.schemeDefinition,
172
+ concepts: parsed.concepts,
173
+ };
174
+ const count = Object.keys(parsed.concepts).length;
175
+ console.log(` ${key}: ${count} concepts`);
176
+ }
177
+
178
+ mkdirSync(dirname(OUTPUT), { recursive: true });
179
+ writeFileSync(OUTPUT, JSON.stringify(result, null, 2) + '\n');
180
+ console.log(`\nWrote ${OUTPUT}`);
181
+ console.log(`Total taxonomies: ${Object.keys(result).length}`);
182
+ }
183
+
184
+ main();