glossarist 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/package.json +1 -1
- package/src/compiled-format.js +89 -0
- package/src/dataset-asset.js +65 -0
- package/src/gcr-reader.d.ts +32 -0
- package/src/gcr-reader.js +174 -0
- package/src/gcr-writer.d.ts +9 -0
- package/src/gcr-writer.js +33 -0
- package/src/index.d.ts +13 -0
- package/src/index.js +15 -0
- package/src/managed-concept-collection.js +26 -2
package/README.md
CHANGED
|
@@ -58,6 +58,111 @@ const buf = await createGcr([concept], { shortname: 'test' });
|
|
|
58
58
|
fs.writeFileSync('out.gcr', buf);
|
|
59
59
|
```
|
|
60
60
|
|
|
61
|
+
### Compiled / machine formats in GCR
|
|
62
|
+
|
|
63
|
+
GCR packages can contain pre-compiled machine formats (TBX, JSON-LD, Turtle, JSONL) inside a `compiled/` directory. This mirrors the Ruby glossarist gem's `COMPILED_EXTENSIONS` convention.
|
|
64
|
+
|
|
65
|
+
```js
|
|
66
|
+
import { loadGcr } from 'glossarist';
|
|
67
|
+
|
|
68
|
+
const pkg = await loadGcr(fs.readFileSync('dataset.gcr'));
|
|
69
|
+
|
|
70
|
+
// Discover which compiled formats are present
|
|
71
|
+
const formats = await pkg.compiledFormats(); // ['tbx', 'jsonld', 'turtle']
|
|
72
|
+
|
|
73
|
+
// List entry IDs for a specific format
|
|
74
|
+
const ids = await pkg.compiledFormatIds('jsonld'); // ['3.1.1.1', '3.1.1.2']
|
|
75
|
+
|
|
76
|
+
// Read a single compiled file as string
|
|
77
|
+
const jsonld = await pkg.compiledFile('jsonld', '3.1.1.1');
|
|
78
|
+
|
|
79
|
+
// Read a single compiled file as binary
|
|
80
|
+
const buf = await pkg.compiledFileBuffer('jsonld', '3.1.1.1');
|
|
81
|
+
|
|
82
|
+
// Iterate all entries for a format
|
|
83
|
+
await pkg.eachCompiledFile('turtle', (id, content) => {
|
|
84
|
+
console.log(id, content.length);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// Load all entries for a format into a Map
|
|
88
|
+
const allTurtle = await pkg.allCompiledFiles('turtle');
|
|
89
|
+
|
|
90
|
+
// Check if a format is present
|
|
91
|
+
await pkg.hasCompiledFormat('tbx'); // true
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
#### Write compiled formats into a GCR package
|
|
95
|
+
|
|
96
|
+
```js
|
|
97
|
+
import { GcrWriter } from 'glossarist';
|
|
98
|
+
|
|
99
|
+
const buf = await GcrWriter.createBuffer({
|
|
100
|
+
concepts: [...],
|
|
101
|
+
metadata: { shortname: 'my-dataset' },
|
|
102
|
+
compiledFormats: {
|
|
103
|
+
tbx: { 'my-dataset': tbxXmlString },
|
|
104
|
+
jsonld: { '3.1.1.1': jsonldString, '3.1.1.2': jsonldString },
|
|
105
|
+
turtle: { '3.1.1.1': ttlString },
|
|
106
|
+
},
|
|
107
|
+
});
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
#### Format registry
|
|
111
|
+
|
|
112
|
+
```js
|
|
113
|
+
import { COMPILED_FORMATS, COMPILED_EXTENSIONS, isKnownFormat } from 'glossarist';
|
|
114
|
+
|
|
115
|
+
COMPILED_FORMATS; // ['tbx', 'jsonld', 'turtle', 'jsonl']
|
|
116
|
+
COMPILED_EXTENSIONS.get('tbx'); // 'tbx.xml'
|
|
117
|
+
COMPILED_EXTENSIONS.get('turtle'); // 'ttl'
|
|
118
|
+
isKnownFormat('csv'); // false
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Bibliography and images in GCR
|
|
122
|
+
|
|
123
|
+
GCR packages can contain a `bibliography.yaml` file and an `images/` directory, making the archive fully self-contained. This mirrors the Ruby glossarist gem's `DATASET_ASSETS` convention.
|
|
124
|
+
|
|
125
|
+
```js
|
|
126
|
+
import { loadGcr } from 'glossarist';
|
|
127
|
+
|
|
128
|
+
const pkg = await loadGcr(fs.readFileSync('dataset.gcr'));
|
|
129
|
+
|
|
130
|
+
// Bibliography (raw YAML string)
|
|
131
|
+
const bib = await pkg.bibliography(); // 'ISO_19111_2019:\n ...' or null
|
|
132
|
+
|
|
133
|
+
// Images
|
|
134
|
+
await pkg.hasImages(); // true
|
|
135
|
+
const names = await pkg.imageFileNames(); // ['images/fig1.png', ...]
|
|
136
|
+
const img = await pkg.imageFile('fig1.png'); // Uint8Array or null
|
|
137
|
+
const allImages = await pkg.allImageFiles(); // Map<string, Uint8Array>
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
#### Write bibliography and images into a GCR package
|
|
141
|
+
|
|
142
|
+
```js
|
|
143
|
+
import { GcrWriter } from 'glossarist';
|
|
144
|
+
|
|
145
|
+
const buf = await GcrWriter.createBuffer({
|
|
146
|
+
concepts: [...],
|
|
147
|
+
metadata: { shortname: 'my-dataset' },
|
|
148
|
+
bibliography: 'ISO_19111_2019:\n title: Geographic information',
|
|
149
|
+
images: {
|
|
150
|
+
'figure1.png': pngBuffer,
|
|
151
|
+
'diagrams/schema.svg': svgString,
|
|
152
|
+
},
|
|
153
|
+
});
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### Dataset asset registry
|
|
157
|
+
|
|
158
|
+
```js
|
|
159
|
+
import { DATASET_ASSETS, FILE_ASSETS, DIRECTORY_ASSETS } from 'glossarist';
|
|
160
|
+
|
|
161
|
+
DATASET_ASSETS; // [{ path: 'bibliography.yaml', type: 'file' }, { path: 'images', type: 'directory' }]
|
|
162
|
+
FILE_ASSETS; // [{ path: 'bibliography.yaml', type: 'file' }]
|
|
163
|
+
DIRECTORY_ASSETS; // [{ path: 'images', type: 'directory' }]
|
|
164
|
+
```
|
|
165
|
+
|
|
61
166
|
### Domain model
|
|
62
167
|
|
|
63
168
|
Every domain entity is a class instance with `toJSON()`, `fromJSON()`, `equals()`, and `clone()`:
|
|
@@ -208,6 +313,8 @@ Public API (index.js)
|
|
|
208
313
|
├── Parsing → ConceptParser (canonical + managed format detection)
|
|
209
314
|
├── Serialization → ConceptSerializer (canonical + managed YAML output)
|
|
210
315
|
├── I/O → loadGcr, readConcepts, createGcr, writeConcepts
|
|
316
|
+
├── Compiled formats → CompiledFormatRegistry (TBX, JSON-LD, Turtle, JSONL in GCR)
|
|
317
|
+
├── Dataset assets → DATASET_ASSETS registry (bibliography.yaml, images/ in GCR)
|
|
211
318
|
├── Collections → ConceptCollection (Proxy-based, queryable), ManagedConceptCollection
|
|
212
319
|
├── Validation → ConceptValidator, RegisterValidator, ValidationRule (pluggable)
|
|
213
320
|
├── Utilities → conceptUuid, referenceResolver, V1Reader
|
package/package.json
CHANGED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Registry of compiled/machine formats that can be bundled inside a GCR package.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors the Ruby glossarist gem's GcrPackage::COMPILED_EXTENSIONS:
|
|
5
|
+
* tbx → TBX-Basic XML (ISO 30042)
|
|
6
|
+
* jsonld → JSON-LD (SKOS vocabulary)
|
|
7
|
+
* turtle → Turtle/RDF (SKOS vocabulary)
|
|
8
|
+
* jsonl → JSON Lines (one concept per line)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Maps format name to the file extension used inside the GCR `compiled/` directory.
|
|
13
|
+
* Keys are the canonical format identifiers; values are the extension including
|
|
14
|
+
* the leading dot (for simple lookup) or the multi-part extension for TBX.
|
|
15
|
+
* @type {ReadonlyMap<string, string>}
|
|
16
|
+
*/
|
|
17
|
+
const COMPILED_EXTENSIONS = Object.freeze(new Map([
|
|
18
|
+
['tbx', 'tbx.xml'],
|
|
19
|
+
['jsonld', 'jsonld'],
|
|
20
|
+
['turtle', 'ttl'],
|
|
21
|
+
['jsonl', 'jsonl'],
|
|
22
|
+
]));
|
|
23
|
+
|
|
24
|
+
/** Canonical format identifiers, in a stable order. */
|
|
25
|
+
const COMPILED_FORMATS = Object.freeze([...COMPILED_EXTENSIONS.keys()]);
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Returns the filename (without directory) for a compiled-format entry.
|
|
29
|
+
*
|
|
30
|
+
* @param {string} format - e.g. 'tbx', 'jsonld'
|
|
31
|
+
* @param {string} id - concept ID or document name (e.g. '3.1.1.1', 'glossary')
|
|
32
|
+
* @returns {string} e.g. '3.1.1.1.jsonld', 'glossary.tbx.xml'
|
|
33
|
+
*/
|
|
34
|
+
function compiledFilename(format, id) {
|
|
35
|
+
const ext = COMPILED_EXTENSIONS.get(format);
|
|
36
|
+
if (!ext) throw new RangeError(`Unknown compiled format: ${format}`);
|
|
37
|
+
return `${id}.${ext}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Returns the full ZIP path for a compiled-format entry.
|
|
42
|
+
*
|
|
43
|
+
* @param {string} format
|
|
44
|
+
* @param {string} id
|
|
45
|
+
* @returns {string} e.g. 'compiled/jsonld/3.1.1.1.jsonld'
|
|
46
|
+
*/
|
|
47
|
+
function compiledPath(format, id) {
|
|
48
|
+
return `compiled/${format}/${compiledFilename(format, id)}`;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Checks whether a format name is a known compiled format.
|
|
53
|
+
* @param {string} format
|
|
54
|
+
* @returns {boolean}
|
|
55
|
+
*/
|
|
56
|
+
function isKnownFormat(format) {
|
|
57
|
+
return COMPILED_EXTENSIONS.has(format);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Extracts the entry ID (concept ID or document name) from a compiled-format ZIP path.
|
|
62
|
+
* Returns null if the path doesn't match the expected pattern.
|
|
63
|
+
*
|
|
64
|
+
* @param {string} zipPath - e.g. 'compiled/jsonld/3.1.1.1.jsonld'
|
|
65
|
+
* @returns {{ format: string, id: string } | null}
|
|
66
|
+
*/
|
|
67
|
+
function parseCompiledPath(zipPath) {
|
|
68
|
+
if (!zipPath.startsWith('compiled/')) return null;
|
|
69
|
+
const rest = zipPath.slice('compiled/'.length);
|
|
70
|
+
const slash = rest.indexOf('/');
|
|
71
|
+
if (slash === -1) return null;
|
|
72
|
+
const format = rest.slice(0, slash);
|
|
73
|
+
const ext = COMPILED_EXTENSIONS.get(format);
|
|
74
|
+
if (!ext) return null;
|
|
75
|
+
const filename = rest.slice(slash + 1);
|
|
76
|
+
const suffix = `.${ext}`;
|
|
77
|
+
if (!filename.endsWith(suffix)) return null;
|
|
78
|
+
const id = filename.slice(0, -suffix.length);
|
|
79
|
+
return { format, id };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export {
|
|
83
|
+
COMPILED_EXTENSIONS,
|
|
84
|
+
COMPILED_FORMATS,
|
|
85
|
+
compiledFilename,
|
|
86
|
+
compiledPath,
|
|
87
|
+
isKnownFormat,
|
|
88
|
+
parseCompiledPath,
|
|
89
|
+
};
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Registry of dataset assets that can be bundled inside a GCR package.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors the Ruby glossarist gem's GcrPackage::DATASET_ASSETS.
|
|
5
|
+
* Asset types:
|
|
6
|
+
* - file: a single named file at the GCR root (e.g. bibliography.yaml)
|
|
7
|
+
* - directory: a named directory with arbitrary nested files (e.g. images/)
|
|
8
|
+
*
|
|
9
|
+
* New asset types can be added by appending to DATASET_ASSETS (open/closed).
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const DATASET_ASSETS = Object.freeze([
|
|
13
|
+
{ path: 'bibliography.yaml', type: 'file' },
|
|
14
|
+
{ path: 'images', type: 'directory' },
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
const FILE_ASSETS = Object.freeze(
|
|
18
|
+
DATASET_ASSETS.filter((a) => a.type === 'file'),
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
const DIRECTORY_ASSETS = Object.freeze(
|
|
22
|
+
DATASET_ASSETS.filter((a) => a.type === 'directory'),
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Find a file asset descriptor by its ZIP path.
|
|
27
|
+
* @param {string} path
|
|
28
|
+
* @returns {{ path: string, type: string } | undefined}
|
|
29
|
+
*/
|
|
30
|
+
function findFileAsset(path) {
|
|
31
|
+
return FILE_ASSETS.find((a) => a.path === path);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Check whether a ZIP path belongs to a directory asset.
|
|
36
|
+
* Returns the asset descriptor if so, undefined otherwise.
|
|
37
|
+
* @param {string} zipPath
|
|
38
|
+
* @returns {{ path: string, type: string } | undefined}
|
|
39
|
+
*/
|
|
40
|
+
function findDirectoryAssetPath(zipPath) {
|
|
41
|
+
for (const asset of DIRECTORY_ASSETS) {
|
|
42
|
+
if (zipPath === asset.path || zipPath.startsWith(`${asset.path}/`)) {
|
|
43
|
+
return asset;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return undefined;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Check if a ZIP path is a dataset asset entry.
|
|
51
|
+
* @param {string} zipPath
|
|
52
|
+
* @returns {boolean}
|
|
53
|
+
*/
|
|
54
|
+
function isDatasetAssetPath(zipPath) {
|
|
55
|
+
return findFileAsset(zipPath) !== undefined || findDirectoryAssetPath(zipPath) !== undefined;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export {
|
|
59
|
+
DATASET_ASSETS,
|
|
60
|
+
FILE_ASSETS,
|
|
61
|
+
DIRECTORY_ASSETS,
|
|
62
|
+
findFileAsset,
|
|
63
|
+
findDirectoryAssetPath,
|
|
64
|
+
isDatasetAssetPath,
|
|
65
|
+
};
|
package/src/gcr-reader.d.ts
CHANGED
|
@@ -68,6 +68,38 @@ export class GcrPackage {
|
|
|
68
68
|
eachConcept(callback: (concept: Concept, index: number) => void | Promise<void>): Promise<void>;
|
|
69
69
|
/** Load all concepts into an array. */
|
|
70
70
|
allConcepts(): Promise<Concept[]>;
|
|
71
|
+
|
|
72
|
+
// Compiled / machine formats (TBX, JSON-LD, Turtle, JSONL)
|
|
73
|
+
|
|
74
|
+
/** List compiled format directories present in this package. */
|
|
75
|
+
compiledFormats(): Promise<string[]>;
|
|
76
|
+
/** List entry IDs for a given compiled format. */
|
|
77
|
+
compiledFormatIds(format: string): Promise<string[]>;
|
|
78
|
+
/** Check whether a compiled format is present. */
|
|
79
|
+
hasCompiledFormat(format: string): Promise<boolean>;
|
|
80
|
+
/** Read a single compiled-format file as a string. */
|
|
81
|
+
compiledFile(format: string, id: string): Promise<string | null>;
|
|
82
|
+
/** Read a single compiled-format file as a Uint8Array. */
|
|
83
|
+
compiledFileBuffer(format: string, id: string): Promise<Uint8Array | null>;
|
|
84
|
+
/** Iterate all entries for a compiled format. */
|
|
85
|
+
eachCompiledFile(format: string, callback: (id: string, content: string) => void | Promise<void>): Promise<void>;
|
|
86
|
+
/** Load all entries for a compiled format into a Map. */
|
|
87
|
+
allCompiledFiles(format: string): Promise<Map<string, string>>;
|
|
88
|
+
|
|
89
|
+
// Dataset assets (bibliography, images)
|
|
90
|
+
|
|
91
|
+
/** Read bibliography.yaml from the package as raw YAML string. */
|
|
92
|
+
bibliography(): Promise<string | null>;
|
|
93
|
+
/** Check whether the images/ directory is present and non-empty. */
|
|
94
|
+
hasImages(): Promise<boolean>;
|
|
95
|
+
/** List all image file paths (relative to ZIP root). */
|
|
96
|
+
imageFileNames(): Promise<string[]>;
|
|
97
|
+
/** Read a single image file as Uint8Array. */
|
|
98
|
+
imageFile(path: string): Promise<Uint8Array | null>;
|
|
99
|
+
/** Iterate all image files. */
|
|
100
|
+
eachImageFile(callback: (path: string, content: Uint8Array) => void | Promise<void>): Promise<void>;
|
|
101
|
+
/** Load all image files into a Map (path → Uint8Array). */
|
|
102
|
+
allImageFiles(): Promise<Map<string, Uint8Array>>;
|
|
71
103
|
}
|
|
72
104
|
|
|
73
105
|
/** Parse raw concept YAML (canonical or managed format) into a normalized Concept. */
|
package/src/gcr-reader.js
CHANGED
|
@@ -2,6 +2,7 @@ import JSZip from 'jszip';
|
|
|
2
2
|
import yaml from 'js-yaml';
|
|
3
3
|
import { conceptParser } from './concept-parser.js';
|
|
4
4
|
import { InvalidInputError } from './errors.js';
|
|
5
|
+
import { COMPILED_FORMATS, parseCompiledPath, compiledPath } from './compiled-format.js';
|
|
5
6
|
|
|
6
7
|
const BASE64_RE = /^[A-Za-z0-9+/]{100,}={0,2}$/;
|
|
7
8
|
|
|
@@ -149,6 +150,179 @@ export class GcrPackage {
|
|
|
149
150
|
return concepts;
|
|
150
151
|
}
|
|
151
152
|
|
|
153
|
+
// --- Compiled / machine formats (TBX, JSON-LD, Turtle, JSONL) ---
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* List compiled format directories present in this package.
|
|
157
|
+
* @returns {Promise<string[]>}
|
|
158
|
+
*/
|
|
159
|
+
async compiledFormats() {
|
|
160
|
+
const seen = new Set();
|
|
161
|
+
this._zip.forEach((relativePath, entry) => {
|
|
162
|
+
if (!entry.dir) {
|
|
163
|
+
const parsed = parseCompiledPath(relativePath);
|
|
164
|
+
if (parsed) seen.add(parsed.format);
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
return COMPILED_FORMATS.filter((f) => seen.has(f));
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* List entry IDs for a given compiled format.
|
|
172
|
+
* @param {string} format - e.g. 'tbx', 'jsonld', 'turtle', 'jsonl'
|
|
173
|
+
* @returns {Promise<string[]>}
|
|
174
|
+
*/
|
|
175
|
+
async compiledFormatIds(format) {
|
|
176
|
+
const prefix = `compiled/${format}/`;
|
|
177
|
+
const ids = [];
|
|
178
|
+
this._zip.forEach((relativePath, entry) => {
|
|
179
|
+
if (!entry.dir && relativePath.startsWith(prefix)) {
|
|
180
|
+
const parsed = parseCompiledPath(relativePath);
|
|
181
|
+
if (parsed && parsed.format === format) ids.push(parsed.id);
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
return ids.sort(naturalSort);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Check whether a compiled format is present.
|
|
189
|
+
* @param {string} format
|
|
190
|
+
* @returns {Promise<boolean>}
|
|
191
|
+
*/
|
|
192
|
+
async hasCompiledFormat(format) {
|
|
193
|
+
const prefix = `compiled/${format}/`;
|
|
194
|
+
let found = false;
|
|
195
|
+
this._zip.forEach((relativePath, entry) => {
|
|
196
|
+
if (!found && !entry.dir && relativePath.startsWith(prefix)) {
|
|
197
|
+
found = true;
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
return found;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Read a single compiled-format file as a string.
|
|
205
|
+
* @param {string} format - e.g. 'jsonld'
|
|
206
|
+
* @param {string} id - concept ID or document name (e.g. '3.1.1.1', 'glossary')
|
|
207
|
+
* @returns {Promise<string | null>} null if the file doesn't exist
|
|
208
|
+
*/
|
|
209
|
+
async compiledFile(format, id) {
|
|
210
|
+
return this._readText(compiledPath(format, id));
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Read a single compiled-format file as a Uint8Array (for binary content).
|
|
215
|
+
* @param {string} format
|
|
216
|
+
* @param {string} id
|
|
217
|
+
* @returns {Promise<Uint8Array | null>}
|
|
218
|
+
*/
|
|
219
|
+
async compiledFileBuffer(format, id) {
|
|
220
|
+
const entry = this._zip.file(compiledPath(format, id));
|
|
221
|
+
if (!entry) return null;
|
|
222
|
+
return entry.async('uint8array');
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Iterate all entries for a compiled format.
|
|
227
|
+
* @param {string} format
|
|
228
|
+
* @param {(id: string, content: string) => void | Promise<void>} callback
|
|
229
|
+
* @returns {Promise<void>}
|
|
230
|
+
*/
|
|
231
|
+
async eachCompiledFile(format, callback) {
|
|
232
|
+
const ids = await this.compiledFormatIds(format);
|
|
233
|
+
for (const id of ids) {
|
|
234
|
+
const content = await this.compiledFile(format, id);
|
|
235
|
+
if (content !== null) await callback(id, content);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Load all entries for a compiled format into a Map (id → content).
|
|
241
|
+
* @param {string} format
|
|
242
|
+
* @returns {Promise<Map<string, string>>}
|
|
243
|
+
*/
|
|
244
|
+
async allCompiledFiles(format) {
|
|
245
|
+
const map = new Map();
|
|
246
|
+
await this.eachCompiledFile(format, (id, content) => { map.set(id, content); });
|
|
247
|
+
return map;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// --- Dataset assets (bibliography, images, etc.) ---
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Read bibliography.yaml from the package as a string (raw YAML).
|
|
254
|
+
* @returns {Promise<string | null>}
|
|
255
|
+
*/
|
|
256
|
+
async bibliography() {
|
|
257
|
+
return this._readText('bibliography.yaml');
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Check whether the images/ directory is present and non-empty.
|
|
262
|
+
* @returns {Promise<boolean>}
|
|
263
|
+
*/
|
|
264
|
+
async hasImages() {
|
|
265
|
+
let found = false;
|
|
266
|
+
this._zip.forEach((relativePath, entry) => {
|
|
267
|
+
if (!found && !entry.dir && relativePath.startsWith('images/')) {
|
|
268
|
+
found = true;
|
|
269
|
+
}
|
|
270
|
+
});
|
|
271
|
+
return found;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* List all image file paths (relative to ZIP root).
|
|
276
|
+
* @returns {Promise<string[]>}
|
|
277
|
+
*/
|
|
278
|
+
async imageFileNames() {
|
|
279
|
+
const names = [];
|
|
280
|
+
this._zip.forEach((relativePath, entry) => {
|
|
281
|
+
if (!entry.dir && relativePath.startsWith('images/')) {
|
|
282
|
+
names.push(relativePath);
|
|
283
|
+
}
|
|
284
|
+
});
|
|
285
|
+
return names.sort();
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Read a single image file as a Uint8Array.
|
|
290
|
+
* @param {string} path - relative path starting with 'images/' or just the filename
|
|
291
|
+
* @returns {Promise<Uint8Array | null>}
|
|
292
|
+
*/
|
|
293
|
+
async imageFile(path) {
|
|
294
|
+
const fullPath = path.startsWith('images/') ? path : `images/${path}`;
|
|
295
|
+
const entry = this._zip.file(fullPath);
|
|
296
|
+
if (!entry) return null;
|
|
297
|
+
return entry.async('uint8array');
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Iterate all image files.
|
|
302
|
+
* @param {(path: string, content: Uint8Array) => void | Promise<void>} callback
|
|
303
|
+
* @returns {Promise<void>}
|
|
304
|
+
*/
|
|
305
|
+
async eachImageFile(callback) {
|
|
306
|
+
const names = await this.imageFileNames();
|
|
307
|
+
for (const name of names) {
|
|
308
|
+
const entry = this._zip.file(name);
|
|
309
|
+
if (entry) {
|
|
310
|
+
const content = await entry.async('uint8array');
|
|
311
|
+
await callback(name, content);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* Load all image files into a Map (path → Uint8Array).
|
|
318
|
+
* @returns {Promise<Map<string, Uint8Array>>}
|
|
319
|
+
*/
|
|
320
|
+
async allImageFiles() {
|
|
321
|
+
const map = new Map();
|
|
322
|
+
await this.eachImageFile((path, content) => { map.set(path, content); });
|
|
323
|
+
return map;
|
|
324
|
+
}
|
|
325
|
+
|
|
152
326
|
/** @private @param {string} filePath @returns {Promise<string | null>} */
|
|
153
327
|
async _readText(filePath) {
|
|
154
328
|
const entry = this._zip.file(filePath);
|
package/src/gcr-writer.d.ts
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
import { Concept } from './models/index';
|
|
2
2
|
|
|
3
|
+
/** Compiled formats map: format name → id → content string. */
|
|
4
|
+
export type CompiledFormatsMap = Record<string, Record<string, string> | Map<string, string>>;
|
|
5
|
+
|
|
6
|
+
/** Images map: relative path → binary content. */
|
|
7
|
+
export type ImagesMap = Record<string, Uint8Array | string | ArrayBuffer> | Map<string, Uint8Array | string | ArrayBuffer>;
|
|
8
|
+
|
|
3
9
|
export class GcrWriter {
|
|
4
10
|
static createBuffer(options: {
|
|
5
11
|
concepts: Concept[];
|
|
@@ -7,6 +13,9 @@ export class GcrWriter {
|
|
|
7
13
|
register?: Record<string, unknown>;
|
|
8
14
|
uuidFn?: () => string;
|
|
9
15
|
format?: 'canonical' | 'managed' | 'auto';
|
|
16
|
+
compiledFormats?: CompiledFormatsMap;
|
|
17
|
+
bibliography?: string;
|
|
18
|
+
images?: ImagesMap;
|
|
10
19
|
}): Promise<Uint8Array>;
|
|
11
20
|
}
|
|
12
21
|
|
package/src/gcr-writer.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import JSZip from 'jszip';
|
|
2
2
|
import { conceptSerializer } from './concept-serializer.js';
|
|
3
3
|
import { InvalidInputError } from './errors.js';
|
|
4
|
+
import { compiledPath, isKnownFormat } from './compiled-format.js';
|
|
4
5
|
|
|
5
6
|
export class GcrWriter {
|
|
6
7
|
static async createBuffer(options) {
|
|
@@ -29,8 +30,40 @@ export class GcrWriter {
|
|
|
29
30
|
zip.file(`concepts/${concept.id}.yaml`, y);
|
|
30
31
|
}
|
|
31
32
|
|
|
33
|
+
if (options.compiledFormats) {
|
|
34
|
+
GcrWriter._writeCompiledFormats(zip, options.compiledFormats);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (options.bibliography) {
|
|
38
|
+
zip.file('bibliography.yaml', options.bibliography);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (options.images) {
|
|
42
|
+
GcrWriter._writeImages(zip, options.images);
|
|
43
|
+
}
|
|
44
|
+
|
|
32
45
|
return zip.generateAsync({ type: 'uint8array' });
|
|
33
46
|
}
|
|
47
|
+
|
|
48
|
+
static _writeCompiledFormats(zip, compiledFormats) {
|
|
49
|
+
for (const [format, entries] of Object.entries(compiledFormats)) {
|
|
50
|
+
if (!isKnownFormat(format)) {
|
|
51
|
+
throw new RangeError(`Unknown compiled format: ${format}`);
|
|
52
|
+
}
|
|
53
|
+
const map = entries instanceof Map ? entries : new Map(Object.entries(entries));
|
|
54
|
+
for (const [id, content] of map) {
|
|
55
|
+
zip.file(compiledPath(format, id), content);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
static _writeImages(zip, images) {
|
|
61
|
+
const map = images instanceof Map ? images : new Map(Object.entries(images));
|
|
62
|
+
for (const [path, content] of map) {
|
|
63
|
+
const fullPath = path.startsWith('images/') ? path : `images/${path}`;
|
|
64
|
+
zip.file(fullPath, content);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
34
67
|
}
|
|
35
68
|
|
|
36
69
|
export async function createGcr(concepts, metadata) {
|
package/src/index.d.ts
CHANGED
|
@@ -39,3 +39,16 @@ export { V1Reader, migrateV1ToV2 } from './v1-reader';
|
|
|
39
39
|
|
|
40
40
|
// Errors
|
|
41
41
|
export { GlossaristError, InvalidInputError, YamlParseError } from './errors';
|
|
42
|
+
|
|
43
|
+
// Compiled format registry
|
|
44
|
+
export const COMPILED_EXTENSIONS: ReadonlyMap<string, string>;
|
|
45
|
+
export const COMPILED_FORMATS: readonly string[];
|
|
46
|
+
export function isKnownFormat(format: string): boolean;
|
|
47
|
+
export function compiledFilename(format: string, id: string): string;
|
|
48
|
+
export function compiledPath(format: string, id: string): string;
|
|
49
|
+
export function parseCompiledPath(zipPath: string): { format: string; id: string } | null;
|
|
50
|
+
|
|
51
|
+
// Dataset asset registry
|
|
52
|
+
export const DATASET_ASSETS: readonly { path: string; type: string }[];
|
|
53
|
+
export const FILE_ASSETS: readonly { path: string; type: string }[];
|
|
54
|
+
export const DIRECTORY_ASSETS: readonly { path: string; type: string }[];
|
package/src/index.js
CHANGED
|
@@ -10,6 +10,21 @@ export { ReferenceResolver, Reference, referenceResolver } from './reference-res
|
|
|
10
10
|
export { V1Reader, migrateV1ToV2 } from './v1-reader.js';
|
|
11
11
|
export { GlossaristError, InvalidInputError, YamlParseError } from './errors.js';
|
|
12
12
|
|
|
13
|
+
export {
|
|
14
|
+
COMPILED_EXTENSIONS,
|
|
15
|
+
COMPILED_FORMATS,
|
|
16
|
+
isKnownFormat,
|
|
17
|
+
compiledFilename,
|
|
18
|
+
compiledPath,
|
|
19
|
+
parseCompiledPath,
|
|
20
|
+
} from './compiled-format.js';
|
|
21
|
+
|
|
22
|
+
export {
|
|
23
|
+
DATASET_ASSETS,
|
|
24
|
+
FILE_ASSETS,
|
|
25
|
+
DIRECTORY_ASSETS,
|
|
26
|
+
} from './dataset-asset.js';
|
|
27
|
+
|
|
13
28
|
export {
|
|
14
29
|
GlossaristModel,
|
|
15
30
|
Concept, LocalizedConcept,
|
|
@@ -2,16 +2,20 @@ import { ConceptCollection } from './concept-collection.js';
|
|
|
2
2
|
import { readConcepts, readRegister } from './concept-reader.js';
|
|
3
3
|
import { writeConcepts } from './concept-writer.js';
|
|
4
4
|
import { loadGcr } from './gcr-reader.js';
|
|
5
|
-
import {
|
|
5
|
+
import { GcrWriter } from './gcr-writer.js';
|
|
6
6
|
|
|
7
7
|
export class ManagedConceptCollection {
|
|
8
8
|
constructor() {
|
|
9
9
|
this._concepts = new ConceptCollection();
|
|
10
10
|
this._register = null;
|
|
11
|
+
this._bibliography = null;
|
|
12
|
+
this._images = null;
|
|
11
13
|
}
|
|
12
14
|
|
|
13
15
|
get concepts() { return this._concepts; }
|
|
14
16
|
get register() { return this._register; }
|
|
17
|
+
get bibliography() { return this._bibliography; }
|
|
18
|
+
get images() { return this._images; }
|
|
15
19
|
|
|
16
20
|
loadFromDirectory(dir) {
|
|
17
21
|
this._concepts = new ConceptCollection(readConcepts(dir));
|
|
@@ -23,6 +27,8 @@ export class ManagedConceptCollection {
|
|
|
23
27
|
const pkg = await loadGcr(input);
|
|
24
28
|
this._concepts = new ConceptCollection(await pkg.allConcepts());
|
|
25
29
|
this._register = await pkg.register();
|
|
30
|
+
this._bibliography = await pkg.bibliography();
|
|
31
|
+
this._images = await pkg.allImageFiles();
|
|
26
32
|
return this;
|
|
27
33
|
}
|
|
28
34
|
|
|
@@ -34,7 +40,15 @@ export class ManagedConceptCollection {
|
|
|
34
40
|
}
|
|
35
41
|
|
|
36
42
|
async saveToGcr(options = {}) {
|
|
37
|
-
return
|
|
43
|
+
return GcrWriter.createBuffer({
|
|
44
|
+
concepts: this._concepts,
|
|
45
|
+
metadata: options.metadata,
|
|
46
|
+
register: this._register,
|
|
47
|
+
format: options.format,
|
|
48
|
+
compiledFormats: options.compiledFormats,
|
|
49
|
+
bibliography: this._bibliography,
|
|
50
|
+
images: this._images,
|
|
51
|
+
});
|
|
38
52
|
}
|
|
39
53
|
|
|
40
54
|
add(concept) {
|
|
@@ -58,4 +72,14 @@ export class ManagedConceptCollection {
|
|
|
58
72
|
this._register = data;
|
|
59
73
|
return this;
|
|
60
74
|
}
|
|
75
|
+
|
|
76
|
+
setBibliography(yamlString) {
|
|
77
|
+
this._bibliography = yamlString;
|
|
78
|
+
return this;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
setImages(images) {
|
|
82
|
+
this._images = images instanceof Map ? images : new Map(Object.entries(images));
|
|
83
|
+
return this;
|
|
84
|
+
}
|
|
61
85
|
}
|