glossarist 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -118,6 +118,51 @@ COMPILED_EXTENSIONS.get('turtle'); // 'ttl'
118
118
  isKnownFormat('csv'); // false
119
119
  ```
120
120
 
121
+ ### Bibliography and images in GCR
122
+
123
+ GCR packages can contain a `bibliography.yaml` file and an `images/` directory, making the archive fully self-contained. This mirrors the Ruby glossarist gem's `DATASET_ASSETS` convention.
124
+
125
+ ```js
126
+ import { loadGcr } from 'glossarist';
127
+
128
+ const pkg = await loadGcr(fs.readFileSync('dataset.gcr'));
129
+
130
+ // Bibliography (raw YAML string)
131
+ const bib = await pkg.bibliography(); // 'ISO_19111_2019:\n ...' or null
132
+
133
+ // Images
134
+ await pkg.hasImages(); // true
135
+ const names = await pkg.imageFileNames(); // ['images/fig1.png', ...]
136
+ const img = await pkg.imageFile('fig1.png'); // Uint8Array or null
137
+ const allImages = await pkg.allImageFiles(); // Map<string, Uint8Array>
138
+ ```
139
+
140
+ #### Write bibliography and images into a GCR package
141
+
142
+ ```js
143
+ import { GcrWriter } from 'glossarist';
144
+
145
+ const buf = await GcrWriter.createBuffer({
146
+ concepts: [...],
147
+ metadata: { shortname: 'my-dataset' },
148
+ bibliography: 'ISO_19111_2019:\n title: Geographic information',
149
+ images: {
150
+ 'figure1.png': pngBuffer,
151
+ 'diagrams/schema.svg': svgString,
152
+ },
153
+ });
154
+ ```
155
+
156
+ #### Dataset asset registry
157
+
158
+ ```js
159
+ import { DATASET_ASSETS, FILE_ASSETS, DIRECTORY_ASSETS } from 'glossarist';
160
+
161
+ DATASET_ASSETS; // [{ path: 'bibliography.yaml', type: 'file' }, { path: 'images', type: 'directory' }]
162
+ FILE_ASSETS; // [{ path: 'bibliography.yaml', type: 'file' }]
163
+ DIRECTORY_ASSETS; // [{ path: 'images', type: 'directory' }]
164
+ ```
165
+
121
166
  ### Domain model
122
167
 
123
168
  Every domain entity is a class instance with `toJSON()`, `fromJSON()`, `equals()`, and `clone()`:
@@ -269,6 +314,7 @@ Public API (index.js)
269
314
  ├── Serialization → ConceptSerializer (canonical + managed YAML output)
270
315
  ├── I/O → loadGcr, readConcepts, createGcr, writeConcepts
271
316
  ├── Compiled formats → CompiledFormatRegistry (TBX, JSON-LD, Turtle, JSONL in GCR)
317
+ ├── Dataset assets → DATASET_ASSETS registry (bibliography.yaml, images/ in GCR)
272
318
  ├── Collections → ConceptCollection (Proxy-based, queryable), ManagedConceptCollection
273
319
  ├── Validation → ConceptValidator, RegisterValidator, ValidationRule (pluggable)
274
320
  ├── Utilities → conceptUuid, referenceResolver, V1Reader
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "glossarist",
3
- "version": "0.1.6",
3
+ "version": "0.2.0",
4
4
  "description": "JavaScript SDK for Glossarist GCR packages — read, write, validate, and manage terminology concepts",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -1,4 +1,4 @@
1
- import { naturalSort } from './gcr-reader.js';
1
+ import { naturalSort } from './sort.js';
2
2
 
3
3
  const _items = Symbol('items');
4
4
 
@@ -1,4 +1,4 @@
1
- import type { Concept } from './gcr-reader';
1
+ import type { Concept } from './models/index';
2
2
 
3
3
  /** Read all concept YAML files from a directory. */
4
4
  export function readConcepts(dir: string): Concept[];
@@ -2,7 +2,7 @@ import fs from 'fs';
2
2
  import path from 'path';
3
3
  import yaml from 'js-yaml';
4
4
  import { conceptParser } from './concept-parser.js';
5
- import { naturalSort } from './gcr-reader.js';
5
+ import { naturalSort } from './sort.js';
6
6
  import { InvalidInputError } from './errors.js';
7
7
 
8
8
  function assertDir(dir, fnName) {
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Registry of dataset assets that can be bundled inside a GCR package.
3
+ *
4
+ * Mirrors the Ruby glossarist gem's GcrPackage::DATASET_ASSETS.
5
+ * Asset types:
6
+ * - file: a single named file at the GCR root (e.g. bibliography.yaml)
7
+ * - directory: a named directory with arbitrary nested files (e.g. images/)
8
+ *
9
+ * New asset types can be added by appending to DATASET_ASSETS (open/closed).
10
+ */
11
+
12
+ const DATASET_ASSETS = Object.freeze([
13
+ { path: 'bibliography.yaml', type: 'file' },
14
+ { path: 'images', type: 'directory' },
15
+ ]);
16
+
17
+ const FILE_ASSETS = Object.freeze(
18
+ DATASET_ASSETS.filter((a) => a.type === 'file'),
19
+ );
20
+
21
+ const DIRECTORY_ASSETS = Object.freeze(
22
+ DATASET_ASSETS.filter((a) => a.type === 'directory'),
23
+ );
24
+
25
+ /**
26
+ * Find a file asset descriptor by its ZIP path.
27
+ * @param {string} path
28
+ * @returns {{ path: string, type: string } | undefined}
29
+ */
30
+ function findFileAsset(path) {
31
+ return FILE_ASSETS.find((a) => a.path === path);
32
+ }
33
+
34
+ /**
35
+ * Check whether a ZIP path belongs to a directory asset.
36
+ * Returns the asset descriptor if so, undefined otherwise.
37
+ * @param {string} zipPath
38
+ * @returns {{ path: string, type: string } | undefined}
39
+ */
40
+ function findDirectoryAssetPath(zipPath) {
41
+ for (const asset of DIRECTORY_ASSETS) {
42
+ if (zipPath === asset.path || zipPath.startsWith(`${asset.path}/`)) {
43
+ return asset;
44
+ }
45
+ }
46
+ return undefined;
47
+ }
48
+
49
+ /**
50
+ * Check if a ZIP path is a dataset asset entry.
51
+ * @param {string} zipPath
52
+ * @returns {boolean}
53
+ */
54
+ function isDatasetAssetPath(zipPath) {
55
+ return findFileAsset(zipPath) !== undefined || findDirectoryAssetPath(zipPath) !== undefined;
56
+ }
57
+
58
+ export {
59
+ DATASET_ASSETS,
60
+ FILE_ASSETS,
61
+ DIRECTORY_ASSETS,
62
+ findFileAsset,
63
+ findDirectoryAssetPath,
64
+ isDatasetAssetPath,
65
+ };
@@ -1,52 +1,4 @@
1
- /** A single term designation. */
2
- export interface Term {
3
- type: string;
4
- designation: string;
5
- normative_status?: string;
6
- }
7
-
8
- /** A definition content block. */
9
- export interface Definition {
10
- content: string;
11
- }
12
-
13
- /** A bibliographic source reference. */
14
- export interface Source {
15
- type: string;
16
- origin?: { ref: string };
17
- }
18
-
19
- /** Localized concept data for a single language. */
20
- export interface Localization {
21
- terms: Term[];
22
- definition?: Definition[];
23
- notes?: { content: string }[];
24
- examples?: { content: string }[];
25
- sources?: Source[];
26
- entry_status?: string;
27
- normative_status?: string;
28
- }
29
-
30
- /** A normalized glossarist concept. */
31
- export interface Concept {
32
- termid: string;
33
- term: string | null;
34
- localizations: Record<string, Localization>;
35
- raw: Record<string, unknown>;
36
- }
37
-
38
- /** GCR package metadata from metadata.yaml. */
39
- export interface GcrMetadata {
40
- shortname: string;
41
- version?: string;
42
- title?: string;
43
- concept_count?: number;
44
- languages?: string[];
45
- schema_version?: string;
46
- glossarist_version?: string;
47
- created_at?: string;
48
- statistics?: Record<string, unknown>;
49
- }
1
+ import { Concept, GcrMetadata } from './models/index';
50
2
 
51
3
  /**
52
4
  * Load a GCR package from a ZIP archive.
@@ -56,7 +8,7 @@ export function loadGcr(input: Buffer | ArrayBuffer | Uint8Array | Blob | string
56
8
 
57
9
  /** A loaded GCR package (ZIP archive of glossarist concept data). */
58
10
  export class GcrPackage {
59
- /** Read and parse metadata.yaml. */
11
+ /** Read and parse metadata.yaml as a GcrMetadata instance. */
60
12
  metadata(): Promise<GcrMetadata | null>;
61
13
  /** Read and parse optional register.yaml. */
62
14
  register(): Promise<Record<string, unknown> | null>;
@@ -85,9 +37,28 @@ export class GcrPackage {
85
37
  eachCompiledFile(format: string, callback: (id: string, content: string) => void | Promise<void>): Promise<void>;
86
38
  /** Load all entries for a compiled format into a Map. */
87
39
  allCompiledFiles(format: string): Promise<Map<string, string>>;
40
+
41
+ // Dataset assets (bibliography, images)
42
+
43
+ /** List all dataset asset entries found in this package. */
44
+ datasetAssetEntries(): Promise<Array<{ path: string; type: 'file' | 'directory'; asset: { path: string; type: string } }>>;
45
+ /** Read a file-type dataset asset by its registered path. */
46
+ readDatasetFileAsset(assetPath: string): Promise<string | null>;
47
+ /** Read bibliography.yaml from the package as raw YAML string. */
48
+ bibliography(): Promise<string | null>;
49
+ /** Check whether the images/ directory is present and non-empty. */
50
+ hasImages(): Promise<boolean>;
51
+ /** List all image file paths (relative to ZIP root). */
52
+ imageFileNames(): Promise<string[]>;
53
+ /** Read a single image file as Uint8Array. */
54
+ imageFile(path: string): Promise<Uint8Array | null>;
55
+ /** Iterate all image files. */
56
+ eachImageFile(callback: (path: string, content: Uint8Array) => void | Promise<void>): Promise<void>;
57
+ /** Load all image files into a Map (path → Uint8Array). */
58
+ allImageFiles(): Promise<Map<string, Uint8Array>>;
88
59
  }
89
60
 
90
- /** Parse raw concept YAML (canonical or managed format) into a normalized Concept. */
61
+ /** Parse raw concept YAML (canonical or managed format) into a Concept. */
91
62
  export function parseConceptYaml(raw: string, context?: string): Concept;
92
63
 
93
64
  /** Natural sort comparator for concept IDs like "3.1.1.1", "551-12-39". */
package/src/gcr-reader.js CHANGED
@@ -2,12 +2,14 @@ import JSZip from 'jszip';
2
2
  import yaml from 'js-yaml';
3
3
  import { conceptParser } from './concept-parser.js';
4
4
  import { InvalidInputError } from './errors.js';
5
- import { COMPILED_EXTENSIONS, parseCompiledPath, compiledPath } from './compiled-format.js';
5
+ import { COMPILED_FORMATS, parseCompiledPath, compiledPath } from './compiled-format.js';
6
+ import { DATASET_ASSETS, findFileAsset, findDirectoryAssetPath } from './dataset-asset.js';
7
+ import { GcrMetadata } from './models/gcr-metadata.js';
8
+ import { naturalSort } from './sort.js';
6
9
 
7
- const BASE64_RE = /^[A-Za-z0-9+/]{100,}={0,2}$/;
10
+ export { naturalSort } from './sort.js';
8
11
 
9
- const NATURAL_SORT_RE = /(\d+|\D+)/g;
10
- const DIGIT_RE = /^\d+$/;
12
+ const BASE64_RE = /^[A-Za-z0-9+/]{100,}={0,2}$/;
11
13
 
12
14
  /**
13
15
  * @typedef {Object} Term
@@ -81,12 +83,12 @@ export class GcrPackage {
81
83
  }
82
84
 
83
85
  /**
84
- * Read and parse metadata.yaml from the package.
86
+ * Read and parse metadata.yaml from the package as a GcrMetadata instance.
85
87
  * @returns {Promise<GcrMetadata | null>}
86
88
  */
87
89
  async metadata() {
88
90
  const raw = await this._readText('metadata.yaml');
89
- return raw ? yaml.load(raw) : null;
91
+ return raw ? GcrMetadata.fromYaml(raw) : null;
90
92
  }
91
93
 
92
94
  /**
@@ -154,7 +156,6 @@ export class GcrPackage {
154
156
 
155
157
  /**
156
158
  * List compiled format directories present in this package.
157
- * Only returns formats whose `compiled/{format}/` directory contains at least one file.
158
159
  * @returns {Promise<string[]>}
159
160
  */
160
161
  async compiledFormats() {
@@ -165,9 +166,7 @@ export class GcrPackage {
165
166
  if (parsed) seen.add(parsed.format);
166
167
  }
167
168
  });
168
- return COMPILED_EXTENSIONS.keys
169
- ? [...COMPILED_EXTENSIONS.keys()].filter((f) => seen.has(f))
170
- : [...seen];
169
+ return COMPILED_FORMATS.filter((f) => seen.has(f));
171
170
  }
172
171
 
173
172
  /**
@@ -250,6 +249,124 @@ export class GcrPackage {
250
249
  return map;
251
250
  }
252
251
 
252
+ // --- Dataset assets (bibliography, images, etc.) ---
253
+
254
+ /**
255
+ * List all dataset asset entries found in this package.
256
+ * Each entry has { path, type, asset } where asset is the registry descriptor.
257
+ * @returns {Promise<Array<{ path: string, type: 'file' | 'directory', asset: { path: string, type: string } }>>}
258
+ */
259
+ async datasetAssetEntries() {
260
+ const entries = [];
261
+ this._zip.forEach((relativePath, zipEntry) => {
262
+ if (zipEntry.dir) return;
263
+ const fileAsset = findFileAsset(relativePath);
264
+ if (fileAsset) {
265
+ entries.push({ path: relativePath, type: 'file', asset: fileAsset });
266
+ return;
267
+ }
268
+ const dirAsset = findDirectoryAssetPath(relativePath);
269
+ if (dirAsset) {
270
+ entries.push({ path: relativePath, type: 'directory', asset: dirAsset });
271
+ }
272
+ });
273
+ return entries;
274
+ }
275
+
276
+ /**
277
+ * Read a file-type dataset asset by its registered path (e.g. 'bibliography.yaml').
278
+ * @param {string} assetPath - registered file asset path
279
+ * @returns {Promise<string | null>}
280
+ */
281
+ async readDatasetFileAsset(assetPath) {
282
+ return this._readText(assetPath);
283
+ }
284
+
285
+ /**
286
+ * Read bibliography.yaml from the package as a string (raw YAML).
287
+ * @returns {Promise<string | null>}
288
+ */
289
+ async bibliography() {
290
+ const fileAsset = DATASET_ASSETS.find((a) => a.type === 'file' && a.path === 'bibliography.yaml');
291
+ return fileAsset ? this._readText(fileAsset.path) : null;
292
+ }
293
+
294
+ /**
295
+ * Check whether the images/ directory is present and non-empty.
296
+ * Uses the dataset-asset registry to find the images directory.
297
+ * @returns {Promise<boolean>}
298
+ */
299
+ async hasImages() {
300
+ const asset = DATASET_ASSETS.find((a) => a.type === 'directory' && a.path === 'images');
301
+ if (!asset) return false;
302
+ const prefix = `${asset.path}/`;
303
+ let found = false;
304
+ this._zip.forEach((relativePath, entry) => {
305
+ if (!found && !entry.dir && relativePath.startsWith(prefix)) {
306
+ found = true;
307
+ }
308
+ });
309
+ return found;
310
+ }
311
+
312
+ /**
313
+ * List all image file paths (relative to ZIP root).
314
+ * Uses the dataset-asset registry to find the images directory.
315
+ * @returns {Promise<string[]>}
316
+ */
317
+ async imageFileNames() {
318
+ const asset = DATASET_ASSETS.find((a) => a.type === 'directory' && a.path === 'images');
319
+ if (!asset) return [];
320
+ const prefix = `${asset.path}/`;
321
+ const names = [];
322
+ this._zip.forEach((relativePath, entry) => {
323
+ if (!entry.dir && relativePath.startsWith(prefix)) {
324
+ names.push(relativePath);
325
+ }
326
+ });
327
+ return names.sort();
328
+ }
329
+
330
+ /**
331
+ * Read a single image file as a Uint8Array.
332
+ * @param {string} path - relative path starting with 'images/' or just the filename
333
+ * @returns {Promise<Uint8Array | null>}
334
+ */
335
+ async imageFile(path) {
336
+ const asset = DATASET_ASSETS.find((a) => a.type === 'directory' && a.path === 'images');
337
+ if (!asset) return null;
338
+ const fullPath = path.startsWith(`${asset.path}/`) ? path : `${asset.path}/${path}`;
339
+ const entry = this._zip.file(fullPath);
340
+ if (!entry) return null;
341
+ return entry.async('uint8array');
342
+ }
343
+
344
+ /**
345
+ * Iterate all image files.
346
+ * @param {(path: string, content: Uint8Array) => void | Promise<void>} callback
347
+ * @returns {Promise<void>}
348
+ */
349
+ async eachImageFile(callback) {
350
+ const names = await this.imageFileNames();
351
+ for (const name of names) {
352
+ const entry = this._zip.file(name);
353
+ if (entry) {
354
+ const content = await entry.async('uint8array');
355
+ await callback(name, content);
356
+ }
357
+ }
358
+ }
359
+
360
+ /**
361
+ * Load all image files into a Map (path → Uint8Array).
362
+ * @returns {Promise<Map<string, Uint8Array>>}
363
+ */
364
+ async allImageFiles() {
365
+ const map = new Map();
366
+ await this.eachImageFile((path, content) => { map.set(path, content); });
367
+ return map;
368
+ }
369
+
253
370
  /** @private @param {string} filePath @returns {Promise<string | null>} */
254
371
  async _readText(filePath) {
255
372
  const entry = this._zip.file(filePath);
@@ -277,34 +394,6 @@ export function parseConceptYaml(raw, context) {
277
394
  return conceptParser.parse(raw, context);
278
395
  }
279
396
 
280
- // --- Helpers ---
281
-
282
- /**
283
- * Natural sort comparator for concept IDs like "3.1.1.1", "551-12-39".
284
- * @param {string} a
285
- * @param {string} b
286
- * @returns {number}
287
- *
288
- * @example
289
- * ['3.1.10', '3.1.2', '3.1.1'].sort(naturalSort); // ['3.1.1', '3.1.2', '3.1.10']
290
- */
291
- export function naturalSort(a, b) {
292
- const pa = a.match(NATURAL_SORT_RE) || [];
293
- const pb = b.match(NATURAL_SORT_RE) || [];
294
- for (let i = 0; i < Math.max(pa.length, pb.length); i++) {
295
- const na = pa[i] || '';
296
- const nb = pb[i] || '';
297
- if (DIGIT_RE.test(na) && DIGIT_RE.test(nb)) {
298
- const diff = parseInt(na, 10) - parseInt(nb, 10);
299
- if (diff !== 0) return diff;
300
- } else {
301
- const cmp = na.localeCompare(nb);
302
- if (cmp !== 0) return cmp;
303
- }
304
- }
305
- return 0;
306
- }
307
-
308
397
  /**
309
398
  * @typedef {Object} GcrMetadata
310
399
  * @property {string} shortname - dataset short name
@@ -1,16 +1,21 @@
1
- import { Concept } from './models/index';
1
+ import { Concept, GcrMetadata } from './models/index';
2
2
 
3
3
  /** Compiled formats map: format name → id → content string. */
4
4
  export type CompiledFormatsMap = Record<string, Record<string, string> | Map<string, string>>;
5
5
 
6
+ /** Images map: relative path → binary content. */
7
+ export type ImagesMap = Record<string, Uint8Array | string | ArrayBuffer> | Map<string, Uint8Array | string | ArrayBuffer>;
8
+
6
9
  export class GcrWriter {
7
10
  static createBuffer(options: {
8
11
  concepts: Concept[];
9
- metadata?: Record<string, unknown>;
12
+ metadata?: GcrMetadata | Record<string, unknown>;
10
13
  register?: Record<string, unknown>;
11
14
  uuidFn?: () => string;
12
15
  format?: 'canonical' | 'managed' | 'auto';
13
16
  compiledFormats?: CompiledFormatsMap;
17
+ bibliography?: string;
18
+ images?: ImagesMap;
14
19
  }): Promise<Uint8Array>;
15
20
  }
16
21
 
package/src/gcr-writer.js CHANGED
@@ -2,6 +2,8 @@ import JSZip from 'jszip';
2
2
  import { conceptSerializer } from './concept-serializer.js';
3
3
  import { InvalidInputError } from './errors.js';
4
4
  import { compiledPath, isKnownFormat } from './compiled-format.js';
5
+ import { GcrMetadata } from './models/gcr-metadata.js';
6
+ import { GcrStatistics } from './models/gcr-statistics.js';
5
7
 
6
8
  export class GcrWriter {
7
9
  static async createBuffer(options) {
@@ -15,7 +17,8 @@ export class GcrWriter {
15
17
  const zip = new JSZip();
16
18
 
17
19
  if (options.metadata) {
18
- zip.file('metadata.yaml', conceptSerializer.toRegisterYaml(options.metadata));
20
+ const meta = GcrWriter._normalizeMetadata(options.metadata, options.concepts);
21
+ zip.file('metadata.yaml', conceptSerializer.toRegisterYaml(meta));
19
22
  }
20
23
  if (options.register) {
21
24
  zip.file('register.yaml', conceptSerializer.toRegisterYaml(options.register));
@@ -34,9 +37,37 @@ export class GcrWriter {
34
37
  GcrWriter._writeCompiledFormats(zip, options.compiledFormats);
35
38
  }
36
39
 
40
+ if (options.bibliography) {
41
+ zip.file('bibliography.yaml', options.bibliography);
42
+ }
43
+
44
+ if (options.images) {
45
+ GcrWriter._writeImages(zip, options.images);
46
+ }
47
+
37
48
  return zip.generateAsync({ type: 'uint8array' });
38
49
  }
39
50
 
51
+ static _normalizeMetadata(metadata, concepts) {
52
+ if (metadata instanceof GcrMetadata) {
53
+ const meta = metadata.clone();
54
+ if (!meta.statistics && concepts.length > 0) {
55
+ meta.statistics = GcrStatistics.fromConcepts(concepts);
56
+ }
57
+ if (!meta.conceptCount) meta.conceptCount = concepts.length;
58
+ return meta.toJSON();
59
+ }
60
+
61
+ const data = { ...metadata };
62
+ if (!data.statistics && concepts.length > 0) {
63
+ data.statistics = GcrStatistics.fromConcepts(concepts).toJSON();
64
+ }
65
+ if (!data.concept_count && concepts.length > 0) {
66
+ data.concept_count = concepts.length;
67
+ }
68
+ return data;
69
+ }
70
+
40
71
  static _writeCompiledFormats(zip, compiledFormats) {
41
72
  for (const [format, entries] of Object.entries(compiledFormats)) {
42
73
  if (!isKnownFormat(format)) {
@@ -48,6 +79,14 @@ export class GcrWriter {
48
79
  }
49
80
  }
50
81
  }
82
+
83
+ static _writeImages(zip, images) {
84
+ const map = images instanceof Map ? images : new Map(Object.entries(images));
85
+ for (const [path, content] of map) {
86
+ const fullPath = path.startsWith('images/') ? path : `images/${path}`;
87
+ zip.file(fullPath, content);
88
+ }
89
+ }
51
90
  }
52
91
 
53
92
  export async function createGcr(concepts, metadata) {
package/src/index.d.ts CHANGED
@@ -5,12 +5,12 @@ export {
5
5
  Designation, Expression, Abbreviation, Symbol, GraphicalSymbol,
6
6
  Citation, ConceptSource, RelatedConcept, ConceptDate,
7
7
  DetailedDefinition, NonVerbRep,
8
+ GcrMetadata, GcrStatistics,
8
9
  RELATIONSHIP_TYPES, DATE_TYPES,
9
10
  } from './models/index';
10
11
 
11
12
  // GCR reader
12
13
  export { loadGcr, GcrPackage, parseConceptYaml, naturalSort } from './gcr-reader';
13
- export type { GcrMetadata } from './gcr-reader';
14
14
 
15
15
  // GCR writer
16
16
  export { createGcr, GcrWriter } from './gcr-writer';
@@ -47,3 +47,11 @@ export function isKnownFormat(format: string): boolean;
47
47
  export function compiledFilename(format: string, id: string): string;
48
48
  export function compiledPath(format: string, id: string): string;
49
49
  export function parseCompiledPath(zipPath: string): { format: string; id: string } | null;
50
+
51
+ // Dataset asset registry
52
+ export const DATASET_ASSETS: readonly { path: string; type: string }[];
53
+ export const FILE_ASSETS: readonly { path: string; type: string }[];
54
+ export const DIRECTORY_ASSETS: readonly { path: string; type: string }[];
55
+ export function findFileAsset(path: string): { path: string; type: string } | undefined;
56
+ export function findDirectoryAssetPath(zipPath: string): { path: string; type: string } | undefined;
57
+ export function isDatasetAssetPath(zipPath: string): boolean;
package/src/index.js CHANGED
@@ -1,4 +1,5 @@
1
- export { loadGcr, GcrPackage, parseConceptYaml, naturalSort } from './gcr-reader.js';
1
+ export { naturalSort } from './sort.js';
2
+ export { loadGcr, GcrPackage, parseConceptYaml } from './gcr-reader.js';
2
3
  export { readConcepts, readConcept, listConceptIds, readRegister } from './concept-reader.js';
3
4
  export { writeConcept, writeConcepts } from './concept-writer.js';
4
5
  export { createGcr, GcrWriter } from './gcr-writer.js';
@@ -19,11 +20,18 @@ export {
19
20
  parseCompiledPath,
20
21
  } from './compiled-format.js';
21
22
 
23
+ export {
24
+ DATASET_ASSETS,
25
+ FILE_ASSETS,
26
+ DIRECTORY_ASSETS,
27
+ } from './dataset-asset.js';
28
+
22
29
  export {
23
30
  GlossaristModel,
24
31
  Concept, LocalizedConcept,
25
32
  Designation, Expression, Abbreviation, Symbol, GraphicalSymbol,
26
33
  Citation, ConceptSource, RelatedConcept, ConceptDate,
27
34
  DetailedDefinition, NonVerbRep,
35
+ GcrMetadata, GcrStatistics,
28
36
  RELATIONSHIP_TYPES, DATE_TYPES,
29
37
  } from './models/index.js';
@@ -2,16 +2,20 @@ import { ConceptCollection } from './concept-collection.js';
2
2
  import { readConcepts, readRegister } from './concept-reader.js';
3
3
  import { writeConcepts } from './concept-writer.js';
4
4
  import { loadGcr } from './gcr-reader.js';
5
- import { createGcr } from './gcr-writer.js';
5
+ import { GcrWriter } from './gcr-writer.js';
6
6
 
7
7
  export class ManagedConceptCollection {
8
8
  constructor() {
9
9
  this._concepts = new ConceptCollection();
10
10
  this._register = null;
11
+ this._bibliography = null;
12
+ this._images = null;
11
13
  }
12
14
 
13
15
  get concepts() { return this._concepts; }
14
16
  get register() { return this._register; }
17
+ get bibliography() { return this._bibliography; }
18
+ get images() { return this._images; }
15
19
 
16
20
  loadFromDirectory(dir) {
17
21
  this._concepts = new ConceptCollection(readConcepts(dir));
@@ -23,6 +27,8 @@ export class ManagedConceptCollection {
23
27
  const pkg = await loadGcr(input);
24
28
  this._concepts = new ConceptCollection(await pkg.allConcepts());
25
29
  this._register = await pkg.register();
30
+ this._bibliography = await pkg.bibliography();
31
+ this._images = await pkg.allImageFiles();
26
32
  return this;
27
33
  }
28
34
 
@@ -34,7 +40,15 @@ export class ManagedConceptCollection {
34
40
  }
35
41
 
36
42
  async saveToGcr(options = {}) {
37
- return createGcr(this._concepts, options.metadata);
43
+ return GcrWriter.createBuffer({
44
+ concepts: this._concepts,
45
+ metadata: options.metadata,
46
+ register: this._register,
47
+ format: options.format,
48
+ compiledFormats: options.compiledFormats,
49
+ bibliography: this._bibliography,
50
+ images: this._images,
51
+ });
38
52
  }
39
53
 
40
54
  add(concept) {
@@ -58,4 +72,14 @@ export class ManagedConceptCollection {
58
72
  this._register = data;
59
73
  return this;
60
74
  }
75
+
76
+ setBibliography(yamlString) {
77
+ this._bibliography = yamlString;
78
+ return this;
79
+ }
80
+
81
+ setImages(images) {
82
+ this._images = images instanceof Map ? images : new Map(Object.entries(images));
83
+ return this;
84
+ }
61
85
  }
@@ -0,0 +1,68 @@
1
+ import yaml from 'js-yaml';
2
+ import { GlossaristModel } from './base.js';
3
+ import { GcrStatistics } from './gcr-statistics.js';
4
+
5
+ export class GcrMetadata extends GlossaristModel {
6
+ constructor(data = {}) {
7
+ super();
8
+ this.shortname = data.shortname ?? null;
9
+ this.version = data.version ?? null;
10
+ this.title = data.title ?? null;
11
+ this.description = data.description ?? null;
12
+ this.owner = data.owner ?? null;
13
+ this.tags = data.tags ?? [];
14
+ this.conceptCount = data.concept_count ?? data.conceptCount ?? 0;
15
+ this.languages = data.languages ?? [];
16
+ this.createdAt = data.created_at ?? data.createdAt ?? null;
17
+ this.glossaristVersion = data.glossarist_version ?? data.glossaristVersion ?? null;
18
+ this.schemaVersion = data.schema_version ?? data.schemaVersion ?? '1';
19
+ this.homepage = data.homepage ?? null;
20
+ this.repository = data.repository ?? null;
21
+ this.license = data.license ?? null;
22
+ this.uriPrefix = data.uri_prefix ?? data.uriPrefix ?? null;
23
+ this.conceptUriTemplate = data.concept_uri_template ?? data.conceptUriTemplate ?? null;
24
+ this.compiledFormats = data.compiled_formats ?? data.compiledFormats ?? [];
25
+ this.statistics = data.statistics
26
+ ? (data.statistics instanceof GcrStatistics ? data.statistics : new GcrStatistics(data.statistics))
27
+ : null;
28
+ }
29
+
30
+ get concept_count() { return this.conceptCount; }
31
+ get created_at() { return this.createdAt; }
32
+ get glossarist_version() { return this.glossaristVersion; }
33
+ get schema_version() { return this.schemaVersion; }
34
+ get uri_prefix() { return this.uriPrefix; }
35
+ get concept_uri_template() { return this.conceptUriTemplate; }
36
+ get compiled_formats() { return this.compiledFormats; }
37
+
38
+ toJSON() {
39
+ const obj = {};
40
+ if (this.shortname != null) obj.shortname = this.shortname;
41
+ if (this.version != null) obj.version = this.version;
42
+ if (this.title != null) obj.title = this.title;
43
+ if (this.description != null) obj.description = this.description;
44
+ if (this.owner != null) obj.owner = this.owner;
45
+ if (this.tags.length > 0) obj.tags = this.tags;
46
+ if (this.conceptCount > 0) obj.concept_count = this.conceptCount;
47
+ if (this.languages.length > 0) obj.languages = this.languages;
48
+ if (this.createdAt != null) obj.created_at = this.createdAt;
49
+ if (this.glossaristVersion != null) obj.glossarist_version = this.glossaristVersion;
50
+ if (this.schemaVersion != null) obj.schema_version = this.schemaVersion;
51
+ if (this.homepage != null) obj.homepage = this.homepage;
52
+ if (this.repository != null) obj.repository = this.repository;
53
+ if (this.license != null) obj.license = this.license;
54
+ if (this.uriPrefix != null) obj.uri_prefix = this.uriPrefix;
55
+ if (this.conceptUriTemplate != null) obj.concept_uri_template = this.conceptUriTemplate;
56
+ if (this.compiledFormats.length > 0) obj.compiled_formats = this.compiledFormats;
57
+ if (this.statistics != null) obj.statistics = this.statistics.toJSON();
58
+ return obj;
59
+ }
60
+
61
+ static fromJSON(data) {
62
+ return new GcrMetadata(data);
63
+ }
64
+
65
+ static fromYaml(yamlString) {
66
+ return new GcrMetadata(yaml.load(yamlString));
67
+ }
68
+ }
@@ -0,0 +1,51 @@
1
+ import { GlossaristModel } from './base.js';
2
+
3
+ export class GcrStatistics extends GlossaristModel {
4
+ constructor(data = {}) {
5
+ super();
6
+ this.totalConcepts = data.total_concepts ?? data.totalConcepts ?? 0;
7
+ this.conceptsWithDefinitions = data.concepts_with_definitions ?? data.conceptsWithDefinitions ?? 0;
8
+ this.conceptsByStatus = data.concepts_by_status ?? data.conceptsByStatus ?? {};
9
+ }
10
+
11
+ get total_concepts() { return this.totalConcepts; }
12
+ get concepts_with_definitions() { return this.conceptsWithDefinitions; }
13
+ get concepts_by_status() { return this.conceptsByStatus; }
14
+
15
+ toJSON() {
16
+ const obj = { total_concepts: this.totalConcepts };
17
+ if (this.conceptsWithDefinitions > 0) {
18
+ obj.concepts_with_definitions = this.conceptsWithDefinitions;
19
+ }
20
+ if (Object.keys(this.conceptsByStatus).length > 0) {
21
+ obj.concepts_by_status = this.conceptsByStatus;
22
+ }
23
+ return obj;
24
+ }
25
+
26
+ static fromJSON(data) {
27
+ return new GcrStatistics(data);
28
+ }
29
+
30
+ static fromConcepts(concepts) {
31
+ const langs = new Set();
32
+ let withDefs = 0;
33
+ const byStatus = {};
34
+
35
+ for (const concept of concepts) {
36
+ for (const lang of concept.languages) {
37
+ langs.add(lang);
38
+ const lc = concept.localization(lang);
39
+ if (lc && lc.definitions.length > 0) withDefs++;
40
+ const status = lc?.entryStatus ?? 'unknown';
41
+ byStatus[status] = (byStatus[status] ?? 0) + 1;
42
+ }
43
+ }
44
+
45
+ return new GcrStatistics({
46
+ total_concepts: concepts.length,
47
+ concepts_with_definitions: withDefs,
48
+ concepts_by_status: byStatus,
49
+ });
50
+ }
51
+ }
@@ -106,3 +106,42 @@ export class NonVerbRep extends GlossaristModel {
106
106
  readonly formula: string | null;
107
107
  readonly sources: Citation[];
108
108
  }
109
+
110
+ export class GcrStatistics extends GlossaristModel {
111
+ readonly totalConcepts: number;
112
+ readonly conceptsWithDefinitions: number;
113
+ readonly conceptsByStatus: Record<string, number>;
114
+ readonly total_concepts: number;
115
+ readonly concepts_with_definitions: number;
116
+ readonly concepts_by_status: Record<string, number>;
117
+ static fromConcepts(concepts: Concept[]): GcrStatistics;
118
+ static fromJSON(data: Record<string, unknown>): GcrStatistics;
119
+ }
120
+
121
+ export class GcrMetadata extends GlossaristModel {
122
+ readonly shortname: string | null;
123
+ readonly version: string | null;
124
+ readonly title: string | null;
125
+ readonly description: string | null;
126
+ readonly owner: string | null;
127
+ readonly tags: string[];
128
+ readonly conceptCount: number;
129
+ readonly languages: string[];
130
+ readonly createdAt: string | null;
131
+ readonly glossaristVersion: string | null;
132
+ readonly schemaVersion: string;
133
+ readonly homepage: string | null;
134
+ readonly repository: string | null;
135
+ readonly license: string | null;
136
+ readonly uriPrefix: string | null;
137
+ readonly conceptUriTemplate: string | null;
138
+ readonly compiledFormats: string[];
139
+ readonly statistics: GcrStatistics | null;
140
+ readonly concept_count: number;
141
+ readonly created_at: string | null;
142
+ readonly glossarist_version: string | null;
143
+ readonly schema_version: string;
144
+ readonly compiled_formats: string[];
145
+ static fromYaml(yamlString: string): GcrMetadata;
146
+ static fromJSON(data: Record<string, unknown>): GcrMetadata;
147
+ }
@@ -8,3 +8,5 @@ export { RelatedConcept, RELATIONSHIP_TYPES } from './related-concept.js';
8
8
  export { ConceptDate, DATE_TYPES } from './concept-date.js';
9
9
  export { DetailedDefinition } from './detailed-definition.js';
10
10
  export { NonVerbRep } from './non-verb-rep.js';
11
+ export { GcrMetadata } from './gcr-metadata.js';
12
+ export { GcrStatistics } from './gcr-statistics.js';;
package/src/sort.js ADDED
@@ -0,0 +1,25 @@
1
+ const NATURAL_SORT_RE = /(\d+|\D+)/g;
2
+ const DIGIT_RE = /^\d+$/;
3
+
4
+ /**
5
+ * Natural sort comparator for concept IDs like "3.1.1.1", "551-12-39".
6
+ * @param {string} a
7
+ * @param {string} b
8
+ * @returns {number}
9
+ */
10
+ export function naturalSort(a, b) {
11
+ const pa = a.match(NATURAL_SORT_RE) || [];
12
+ const pb = b.match(NATURAL_SORT_RE) || [];
13
+ for (let i = 0; i < Math.max(pa.length, pb.length); i++) {
14
+ const na = pa[i] || '';
15
+ const nb = pb[i] || '';
16
+ if (DIGIT_RE.test(na) && DIGIT_RE.test(nb)) {
17
+ const diff = parseInt(na, 10) - parseInt(nb, 10);
18
+ if (diff !== 0) return diff;
19
+ } else {
20
+ const cmp = na.localeCompare(nb);
21
+ if (cmp !== 0) return cmp;
22
+ }
23
+ }
24
+ return 0;
25
+ }