glossarist 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -0
- package/package.json +1 -1
- package/src/dataset-asset.js +65 -0
- package/src/gcr-reader.d.ts +15 -0
- package/src/gcr-reader.js +78 -5
- package/src/gcr-writer.d.ts +5 -0
- package/src/gcr-writer.js +16 -0
- package/src/index.d.ts +5 -0
- package/src/index.js +6 -0
- package/src/managed-concept-collection.js +26 -2
package/README.md
CHANGED
|
@@ -118,6 +118,51 @@ COMPILED_EXTENSIONS.get('turtle'); // 'ttl'
|
|
|
118
118
|
isKnownFormat('csv'); // false
|
|
119
119
|
```
|
|
120
120
|
|
|
121
|
+
### Bibliography and images in GCR
|
|
122
|
+
|
|
123
|
+
GCR packages can contain a `bibliography.yaml` file and an `images/` directory, making the archive fully self-contained. This mirrors the Ruby glossarist gem's `DATASET_ASSETS` convention.
|
|
124
|
+
|
|
125
|
+
```js
|
|
126
|
+
import { loadGcr } from 'glossarist';
|
|
127
|
+
|
|
128
|
+
const pkg = await loadGcr(fs.readFileSync('dataset.gcr'));
|
|
129
|
+
|
|
130
|
+
// Bibliography (raw YAML string)
|
|
131
|
+
const bib = await pkg.bibliography(); // 'ISO_19111_2019:\n ...' or null
|
|
132
|
+
|
|
133
|
+
// Images
|
|
134
|
+
await pkg.hasImages(); // true
|
|
135
|
+
const names = await pkg.imageFileNames(); // ['images/fig1.png', ...]
|
|
136
|
+
const img = await pkg.imageFile('fig1.png'); // Uint8Array or null
|
|
137
|
+
const allImages = await pkg.allImageFiles(); // Map<string, Uint8Array>
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
#### Write bibliography and images into a GCR package
|
|
141
|
+
|
|
142
|
+
```js
|
|
143
|
+
import { GcrWriter } from 'glossarist';
|
|
144
|
+
|
|
145
|
+
const buf = await GcrWriter.createBuffer({
|
|
146
|
+
concepts: [...],
|
|
147
|
+
metadata: { shortname: 'my-dataset' },
|
|
148
|
+
bibliography: 'ISO_19111_2019:\n title: Geographic information',
|
|
149
|
+
images: {
|
|
150
|
+
'figure1.png': pngBuffer,
|
|
151
|
+
'diagrams/schema.svg': svgString,
|
|
152
|
+
},
|
|
153
|
+
});
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### Dataset asset registry
|
|
157
|
+
|
|
158
|
+
```js
|
|
159
|
+
import { DATASET_ASSETS, FILE_ASSETS, DIRECTORY_ASSETS } from 'glossarist';
|
|
160
|
+
|
|
161
|
+
DATASET_ASSETS; // [{ path: 'bibliography.yaml', type: 'file' }, { path: 'images', type: 'directory' }]
|
|
162
|
+
FILE_ASSETS; // [{ path: 'bibliography.yaml', type: 'file' }]
|
|
163
|
+
DIRECTORY_ASSETS; // [{ path: 'images', type: 'directory' }]
|
|
164
|
+
```
|
|
165
|
+
|
|
121
166
|
### Domain model
|
|
122
167
|
|
|
123
168
|
Every domain entity is a class instance with `toJSON()`, `fromJSON()`, `equals()`, and `clone()`:
|
|
@@ -269,6 +314,7 @@ Public API (index.js)
|
|
|
269
314
|
├── Serialization → ConceptSerializer (canonical + managed YAML output)
|
|
270
315
|
├── I/O → loadGcr, readConcepts, createGcr, writeConcepts
|
|
271
316
|
├── Compiled formats → CompiledFormatRegistry (TBX, JSON-LD, Turtle, JSONL in GCR)
|
|
317
|
+
├── Dataset assets → DATASET_ASSETS registry (bibliography.yaml, images/ in GCR)
|
|
272
318
|
├── Collections → ConceptCollection (Proxy-based, queryable), ManagedConceptCollection
|
|
273
319
|
├── Validation → ConceptValidator, RegisterValidator, ValidationRule (pluggable)
|
|
274
320
|
├── Utilities → conceptUuid, referenceResolver, V1Reader
|
package/package.json
CHANGED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Registry of dataset assets that can be bundled inside a GCR package.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors the Ruby glossarist gem's GcrPackage::DATASET_ASSETS.
|
|
5
|
+
* Asset types:
|
|
6
|
+
* - file: a single named file at the GCR root (e.g. bibliography.yaml)
|
|
7
|
+
* - directory: a named directory with arbitrary nested files (e.g. images/)
|
|
8
|
+
*
|
|
9
|
+
* New asset types can be added by appending to DATASET_ASSETS (open/closed).
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const DATASET_ASSETS = Object.freeze([
|
|
13
|
+
{ path: 'bibliography.yaml', type: 'file' },
|
|
14
|
+
{ path: 'images', type: 'directory' },
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
const FILE_ASSETS = Object.freeze(
|
|
18
|
+
DATASET_ASSETS.filter((a) => a.type === 'file'),
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
const DIRECTORY_ASSETS = Object.freeze(
|
|
22
|
+
DATASET_ASSETS.filter((a) => a.type === 'directory'),
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Find a file asset descriptor by its ZIP path.
|
|
27
|
+
* @param {string} path
|
|
28
|
+
* @returns {{ path: string, type: string } | undefined}
|
|
29
|
+
*/
|
|
30
|
+
function findFileAsset(path) {
|
|
31
|
+
return FILE_ASSETS.find((a) => a.path === path);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Check whether a ZIP path belongs to a directory asset.
|
|
36
|
+
* Returns the asset descriptor if so, undefined otherwise.
|
|
37
|
+
* @param {string} zipPath
|
|
38
|
+
* @returns {{ path: string, type: string } | undefined}
|
|
39
|
+
*/
|
|
40
|
+
function findDirectoryAssetPath(zipPath) {
|
|
41
|
+
for (const asset of DIRECTORY_ASSETS) {
|
|
42
|
+
if (zipPath === asset.path || zipPath.startsWith(`${asset.path}/`)) {
|
|
43
|
+
return asset;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return undefined;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Check if a ZIP path is a dataset asset entry.
|
|
51
|
+
* @param {string} zipPath
|
|
52
|
+
* @returns {boolean}
|
|
53
|
+
*/
|
|
54
|
+
function isDatasetAssetPath(zipPath) {
|
|
55
|
+
return findFileAsset(zipPath) !== undefined || findDirectoryAssetPath(zipPath) !== undefined;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export {
|
|
59
|
+
DATASET_ASSETS,
|
|
60
|
+
FILE_ASSETS,
|
|
61
|
+
DIRECTORY_ASSETS,
|
|
62
|
+
findFileAsset,
|
|
63
|
+
findDirectoryAssetPath,
|
|
64
|
+
isDatasetAssetPath,
|
|
65
|
+
};
|
package/src/gcr-reader.d.ts
CHANGED
|
@@ -85,6 +85,21 @@ export class GcrPackage {
|
|
|
85
85
|
eachCompiledFile(format: string, callback: (id: string, content: string) => void | Promise<void>): Promise<void>;
|
|
86
86
|
/** Load all entries for a compiled format into a Map. */
|
|
87
87
|
allCompiledFiles(format: string): Promise<Map<string, string>>;
|
|
88
|
+
|
|
89
|
+
// Dataset assets (bibliography, images)
|
|
90
|
+
|
|
91
|
+
/** Read bibliography.yaml from the package as raw YAML string. */
|
|
92
|
+
bibliography(): Promise<string | null>;
|
|
93
|
+
/** Check whether the images/ directory is present and non-empty. */
|
|
94
|
+
hasImages(): Promise<boolean>;
|
|
95
|
+
/** List all image file paths (relative to ZIP root). */
|
|
96
|
+
imageFileNames(): Promise<string[]>;
|
|
97
|
+
/** Read a single image file as Uint8Array. */
|
|
98
|
+
imageFile(path: string): Promise<Uint8Array | null>;
|
|
99
|
+
/** Iterate all image files. */
|
|
100
|
+
eachImageFile(callback: (path: string, content: Uint8Array) => void | Promise<void>): Promise<void>;
|
|
101
|
+
/** Load all image files into a Map (path → Uint8Array). */
|
|
102
|
+
allImageFiles(): Promise<Map<string, Uint8Array>>;
|
|
88
103
|
}
|
|
89
104
|
|
|
90
105
|
/** Parse raw concept YAML (canonical or managed format) into a normalized Concept. */
|
package/src/gcr-reader.js
CHANGED
|
@@ -2,7 +2,7 @@ import JSZip from 'jszip';
|
|
|
2
2
|
import yaml from 'js-yaml';
|
|
3
3
|
import { conceptParser } from './concept-parser.js';
|
|
4
4
|
import { InvalidInputError } from './errors.js';
|
|
5
|
-
import {
|
|
5
|
+
import { COMPILED_FORMATS, parseCompiledPath, compiledPath } from './compiled-format.js';
|
|
6
6
|
|
|
7
7
|
const BASE64_RE = /^[A-Za-z0-9+/]{100,}={0,2}$/;
|
|
8
8
|
|
|
@@ -154,7 +154,6 @@ export class GcrPackage {
|
|
|
154
154
|
|
|
155
155
|
/**
|
|
156
156
|
* List compiled format directories present in this package.
|
|
157
|
-
* Only returns formats whose `compiled/{format}/` directory contains at least one file.
|
|
158
157
|
* @returns {Promise<string[]>}
|
|
159
158
|
*/
|
|
160
159
|
async compiledFormats() {
|
|
@@ -165,9 +164,7 @@ export class GcrPackage {
|
|
|
165
164
|
if (parsed) seen.add(parsed.format);
|
|
166
165
|
}
|
|
167
166
|
});
|
|
168
|
-
return
|
|
169
|
-
? [...COMPILED_EXTENSIONS.keys()].filter((f) => seen.has(f))
|
|
170
|
-
: [...seen];
|
|
167
|
+
return COMPILED_FORMATS.filter((f) => seen.has(f));
|
|
171
168
|
}
|
|
172
169
|
|
|
173
170
|
/**
|
|
@@ -250,6 +247,82 @@ export class GcrPackage {
|
|
|
250
247
|
return map;
|
|
251
248
|
}
|
|
252
249
|
|
|
250
|
+
// --- Dataset assets (bibliography, images, etc.) ---
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Read bibliography.yaml from the package as a string (raw YAML).
|
|
254
|
+
* @returns {Promise<string | null>}
|
|
255
|
+
*/
|
|
256
|
+
async bibliography() {
|
|
257
|
+
return this._readText('bibliography.yaml');
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Check whether the images/ directory is present and non-empty.
|
|
262
|
+
* @returns {Promise<boolean>}
|
|
263
|
+
*/
|
|
264
|
+
async hasImages() {
|
|
265
|
+
let found = false;
|
|
266
|
+
this._zip.forEach((relativePath, entry) => {
|
|
267
|
+
if (!found && !entry.dir && relativePath.startsWith('images/')) {
|
|
268
|
+
found = true;
|
|
269
|
+
}
|
|
270
|
+
});
|
|
271
|
+
return found;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* List all image file paths (relative to ZIP root).
|
|
276
|
+
* @returns {Promise<string[]>}
|
|
277
|
+
*/
|
|
278
|
+
async imageFileNames() {
|
|
279
|
+
const names = [];
|
|
280
|
+
this._zip.forEach((relativePath, entry) => {
|
|
281
|
+
if (!entry.dir && relativePath.startsWith('images/')) {
|
|
282
|
+
names.push(relativePath);
|
|
283
|
+
}
|
|
284
|
+
});
|
|
285
|
+
return names.sort();
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Read a single image file as a Uint8Array.
|
|
290
|
+
* @param {string} path - relative path starting with 'images/' or just the filename
|
|
291
|
+
* @returns {Promise<Uint8Array | null>}
|
|
292
|
+
*/
|
|
293
|
+
async imageFile(path) {
|
|
294
|
+
const fullPath = path.startsWith('images/') ? path : `images/${path}`;
|
|
295
|
+
const entry = this._zip.file(fullPath);
|
|
296
|
+
if (!entry) return null;
|
|
297
|
+
return entry.async('uint8array');
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Iterate all image files.
|
|
302
|
+
* @param {(path: string, content: Uint8Array) => void | Promise<void>} callback
|
|
303
|
+
* @returns {Promise<void>}
|
|
304
|
+
*/
|
|
305
|
+
async eachImageFile(callback) {
|
|
306
|
+
const names = await this.imageFileNames();
|
|
307
|
+
for (const name of names) {
|
|
308
|
+
const entry = this._zip.file(name);
|
|
309
|
+
if (entry) {
|
|
310
|
+
const content = await entry.async('uint8array');
|
|
311
|
+
await callback(name, content);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* Load all image files into a Map (path → Uint8Array).
|
|
318
|
+
* @returns {Promise<Map<string, Uint8Array>>}
|
|
319
|
+
*/
|
|
320
|
+
async allImageFiles() {
|
|
321
|
+
const map = new Map();
|
|
322
|
+
await this.eachImageFile((path, content) => { map.set(path, content); });
|
|
323
|
+
return map;
|
|
324
|
+
}
|
|
325
|
+
|
|
253
326
|
/** @private @param {string} filePath @returns {Promise<string | null>} */
|
|
254
327
|
async _readText(filePath) {
|
|
255
328
|
const entry = this._zip.file(filePath);
|
package/src/gcr-writer.d.ts
CHANGED
|
@@ -3,6 +3,9 @@ import { Concept } from './models/index';
|
|
|
3
3
|
/** Compiled formats map: format name → id → content string. */
|
|
4
4
|
export type CompiledFormatsMap = Record<string, Record<string, string> | Map<string, string>>;
|
|
5
5
|
|
|
6
|
+
/** Images map: relative path → binary content. */
|
|
7
|
+
export type ImagesMap = Record<string, Uint8Array | string | ArrayBuffer> | Map<string, Uint8Array | string | ArrayBuffer>;
|
|
8
|
+
|
|
6
9
|
export class GcrWriter {
|
|
7
10
|
static createBuffer(options: {
|
|
8
11
|
concepts: Concept[];
|
|
@@ -11,6 +14,8 @@ export class GcrWriter {
|
|
|
11
14
|
uuidFn?: () => string;
|
|
12
15
|
format?: 'canonical' | 'managed' | 'auto';
|
|
13
16
|
compiledFormats?: CompiledFormatsMap;
|
|
17
|
+
bibliography?: string;
|
|
18
|
+
images?: ImagesMap;
|
|
14
19
|
}): Promise<Uint8Array>;
|
|
15
20
|
}
|
|
16
21
|
|
package/src/gcr-writer.js
CHANGED
|
@@ -34,6 +34,14 @@ export class GcrWriter {
|
|
|
34
34
|
GcrWriter._writeCompiledFormats(zip, options.compiledFormats);
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
+
if (options.bibliography) {
|
|
38
|
+
zip.file('bibliography.yaml', options.bibliography);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (options.images) {
|
|
42
|
+
GcrWriter._writeImages(zip, options.images);
|
|
43
|
+
}
|
|
44
|
+
|
|
37
45
|
return zip.generateAsync({ type: 'uint8array' });
|
|
38
46
|
}
|
|
39
47
|
|
|
@@ -48,6 +56,14 @@ export class GcrWriter {
|
|
|
48
56
|
}
|
|
49
57
|
}
|
|
50
58
|
}
|
|
59
|
+
|
|
60
|
+
static _writeImages(zip, images) {
|
|
61
|
+
const map = images instanceof Map ? images : new Map(Object.entries(images));
|
|
62
|
+
for (const [path, content] of map) {
|
|
63
|
+
const fullPath = path.startsWith('images/') ? path : `images/${path}`;
|
|
64
|
+
zip.file(fullPath, content);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
51
67
|
}
|
|
52
68
|
|
|
53
69
|
export async function createGcr(concepts, metadata) {
|
package/src/index.d.ts
CHANGED
|
@@ -47,3 +47,8 @@ export function isKnownFormat(format: string): boolean;
|
|
|
47
47
|
export function compiledFilename(format: string, id: string): string;
|
|
48
48
|
export function compiledPath(format: string, id: string): string;
|
|
49
49
|
export function parseCompiledPath(zipPath: string): { format: string; id: string } | null;
|
|
50
|
+
|
|
51
|
+
// Dataset asset registry
|
|
52
|
+
export const DATASET_ASSETS: readonly { path: string; type: string }[];
|
|
53
|
+
export const FILE_ASSETS: readonly { path: string; type: string }[];
|
|
54
|
+
export const DIRECTORY_ASSETS: readonly { path: string; type: string }[];
|
package/src/index.js
CHANGED
|
@@ -2,16 +2,20 @@ import { ConceptCollection } from './concept-collection.js';
|
|
|
2
2
|
import { readConcepts, readRegister } from './concept-reader.js';
|
|
3
3
|
import { writeConcepts } from './concept-writer.js';
|
|
4
4
|
import { loadGcr } from './gcr-reader.js';
|
|
5
|
-
import {
|
|
5
|
+
import { GcrWriter } from './gcr-writer.js';
|
|
6
6
|
|
|
7
7
|
export class ManagedConceptCollection {
|
|
8
8
|
constructor() {
|
|
9
9
|
this._concepts = new ConceptCollection();
|
|
10
10
|
this._register = null;
|
|
11
|
+
this._bibliography = null;
|
|
12
|
+
this._images = null;
|
|
11
13
|
}
|
|
12
14
|
|
|
13
15
|
get concepts() { return this._concepts; }
|
|
14
16
|
get register() { return this._register; }
|
|
17
|
+
get bibliography() { return this._bibliography; }
|
|
18
|
+
get images() { return this._images; }
|
|
15
19
|
|
|
16
20
|
loadFromDirectory(dir) {
|
|
17
21
|
this._concepts = new ConceptCollection(readConcepts(dir));
|
|
@@ -23,6 +27,8 @@ export class ManagedConceptCollection {
|
|
|
23
27
|
const pkg = await loadGcr(input);
|
|
24
28
|
this._concepts = new ConceptCollection(await pkg.allConcepts());
|
|
25
29
|
this._register = await pkg.register();
|
|
30
|
+
this._bibliography = await pkg.bibliography();
|
|
31
|
+
this._images = await pkg.allImageFiles();
|
|
26
32
|
return this;
|
|
27
33
|
}
|
|
28
34
|
|
|
@@ -34,7 +40,15 @@ export class ManagedConceptCollection {
|
|
|
34
40
|
}
|
|
35
41
|
|
|
36
42
|
async saveToGcr(options = {}) {
|
|
37
|
-
return
|
|
43
|
+
return GcrWriter.createBuffer({
|
|
44
|
+
concepts: this._concepts,
|
|
45
|
+
metadata: options.metadata,
|
|
46
|
+
register: this._register,
|
|
47
|
+
format: options.format,
|
|
48
|
+
compiledFormats: options.compiledFormats,
|
|
49
|
+
bibliography: this._bibliography,
|
|
50
|
+
images: this._images,
|
|
51
|
+
});
|
|
38
52
|
}
|
|
39
53
|
|
|
40
54
|
add(concept) {
|
|
@@ -58,4 +72,14 @@ export class ManagedConceptCollection {
|
|
|
58
72
|
this._register = data;
|
|
59
73
|
return this;
|
|
60
74
|
}
|
|
75
|
+
|
|
76
|
+
setBibliography(yamlString) {
|
|
77
|
+
this._bibliography = yamlString;
|
|
78
|
+
return this;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
setImages(images) {
|
|
82
|
+
this._images = images instanceof Map ? images : new Map(Object.entries(images));
|
|
83
|
+
return this;
|
|
84
|
+
}
|
|
61
85
|
}
|