glossarist 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -0
- package/package.json +1 -1
- package/src/compiled-format.js +89 -0
- package/src/gcr-reader.d.ts +17 -0
- package/src/gcr-reader.js +101 -0
- package/src/gcr-writer.d.ts +4 -0
- package/src/gcr-writer.js +17 -0
- package/src/index.d.ts +8 -0
- package/src/index.js +9 -0
- package/src/models/citation.js +4 -0
- package/src/models/index.d.ts +1 -0
package/README.md
CHANGED
|
@@ -58,6 +58,66 @@ const buf = await createGcr([concept], { shortname: 'test' });
|
|
|
58
58
|
fs.writeFileSync('out.gcr', buf);
|
|
59
59
|
```
|
|
60
60
|
|
|
61
|
+
### Compiled / machine formats in GCR
|
|
62
|
+
|
|
63
|
+
GCR packages can contain pre-compiled machine formats (TBX, JSON-LD, Turtle, JSONL) inside a `compiled/` directory. This mirrors the Ruby glossarist gem's `COMPILED_EXTENSIONS` convention.
|
|
64
|
+
|
|
65
|
+
```js
|
|
66
|
+
import { loadGcr } from 'glossarist';
|
|
67
|
+
|
|
68
|
+
const pkg = await loadGcr(fs.readFileSync('dataset.gcr'));
|
|
69
|
+
|
|
70
|
+
// Discover which compiled formats are present
|
|
71
|
+
const formats = await pkg.compiledFormats(); // ['tbx', 'jsonld', 'turtle']
|
|
72
|
+
|
|
73
|
+
// List entry IDs for a specific format
|
|
74
|
+
const ids = await pkg.compiledFormatIds('jsonld'); // ['3.1.1.1', '3.1.1.2']
|
|
75
|
+
|
|
76
|
+
// Read a single compiled file as string
|
|
77
|
+
const jsonld = await pkg.compiledFile('jsonld', '3.1.1.1');
|
|
78
|
+
|
|
79
|
+
// Read a single compiled file as binary
|
|
80
|
+
const buf = await pkg.compiledFileBuffer('jsonld', '3.1.1.1');
|
|
81
|
+
|
|
82
|
+
// Iterate all entries for a format
|
|
83
|
+
await pkg.eachCompiledFile('turtle', (id, content) => {
|
|
84
|
+
console.log(id, content.length);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// Load all entries for a format into a Map
|
|
88
|
+
const allTurtle = await pkg.allCompiledFiles('turtle');
|
|
89
|
+
|
|
90
|
+
// Check if a format is present
|
|
91
|
+
await pkg.hasCompiledFormat('tbx'); // true
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
#### Write compiled formats into a GCR package
|
|
95
|
+
|
|
96
|
+
```js
|
|
97
|
+
import { GcrWriter } from 'glossarist';
|
|
98
|
+
|
|
99
|
+
const buf = await GcrWriter.createBuffer({
|
|
100
|
+
concepts: [...],
|
|
101
|
+
metadata: { shortname: 'my-dataset' },
|
|
102
|
+
compiledFormats: {
|
|
103
|
+
tbx: { 'my-dataset': tbxXmlString },
|
|
104
|
+
jsonld: { '3.1.1.1': jsonldString, '3.1.1.2': jsonldString },
|
|
105
|
+
turtle: { '3.1.1.1': ttlString },
|
|
106
|
+
},
|
|
107
|
+
});
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
#### Format registry
|
|
111
|
+
|
|
112
|
+
```js
|
|
113
|
+
import { COMPILED_FORMATS, COMPILED_EXTENSIONS, isKnownFormat } from 'glossarist';
|
|
114
|
+
|
|
115
|
+
COMPILED_FORMATS; // ['tbx', 'jsonld', 'turtle', 'jsonl']
|
|
116
|
+
COMPILED_EXTENSIONS.get('tbx'); // 'tbx.xml'
|
|
117
|
+
COMPILED_EXTENSIONS.get('turtle'); // 'ttl'
|
|
118
|
+
isKnownFormat('csv'); // false
|
|
119
|
+
```
|
|
120
|
+
|
|
61
121
|
### Domain model
|
|
62
122
|
|
|
63
123
|
Every domain entity is a class instance with `toJSON()`, `fromJSON()`, `equals()`, and `clone()`:
|
|
@@ -208,6 +268,7 @@ Public API (index.js)
|
|
|
208
268
|
├── Parsing → ConceptParser (canonical + managed format detection)
|
|
209
269
|
├── Serialization → ConceptSerializer (canonical + managed YAML output)
|
|
210
270
|
├── I/O → loadGcr, readConcepts, createGcr, writeConcepts
|
|
271
|
+
├── Compiled formats → CompiledFormatRegistry (TBX, JSON-LD, Turtle, JSONL in GCR)
|
|
211
272
|
├── Collections → ConceptCollection (Proxy-based, queryable), ManagedConceptCollection
|
|
212
273
|
├── Validation → ConceptValidator, RegisterValidator, ValidationRule (pluggable)
|
|
213
274
|
├── Utilities → conceptUuid, referenceResolver, V1Reader
|
package/package.json
CHANGED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Registry of compiled/machine formats that can be bundled inside a GCR package.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors the Ruby glossarist gem's GcrPackage::COMPILED_EXTENSIONS:
|
|
5
|
+
* tbx → TBX-Basic XML (ISO 30042)
|
|
6
|
+
* jsonld → JSON-LD (SKOS vocabulary)
|
|
7
|
+
* turtle → Turtle/RDF (SKOS vocabulary)
|
|
8
|
+
* jsonl → JSON Lines (one concept per line)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Maps format name to the file extension used inside the GCR `compiled/` directory.
|
|
13
|
+
* Keys are the canonical format identifiers; values are the extension including
|
|
14
|
+
* the leading dot (for simple lookup) or the multi-part extension for TBX.
|
|
15
|
+
* @type {ReadonlyMap<string, string>}
|
|
16
|
+
*/
|
|
17
|
+
const COMPILED_EXTENSIONS = Object.freeze(new Map([
|
|
18
|
+
['tbx', 'tbx.xml'],
|
|
19
|
+
['jsonld', 'jsonld'],
|
|
20
|
+
['turtle', 'ttl'],
|
|
21
|
+
['jsonl', 'jsonl'],
|
|
22
|
+
]));
|
|
23
|
+
|
|
24
|
+
/** Canonical format identifiers, in a stable order. */
|
|
25
|
+
const COMPILED_FORMATS = Object.freeze([...COMPILED_EXTENSIONS.keys()]);
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Returns the filename (without directory) for a compiled-format entry.
|
|
29
|
+
*
|
|
30
|
+
* @param {string} format - e.g. 'tbx', 'jsonld'
|
|
31
|
+
* @param {string} id - concept ID or document name (e.g. '3.1.1.1', 'glossary')
|
|
32
|
+
* @returns {string} e.g. '3.1.1.1.jsonld', 'glossary.tbx.xml'
|
|
33
|
+
*/
|
|
34
|
+
function compiledFilename(format, id) {
|
|
35
|
+
const ext = COMPILED_EXTENSIONS.get(format);
|
|
36
|
+
if (!ext) throw new RangeError(`Unknown compiled format: ${format}`);
|
|
37
|
+
return `${id}.${ext}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Returns the full ZIP path for a compiled-format entry.
|
|
42
|
+
*
|
|
43
|
+
* @param {string} format
|
|
44
|
+
* @param {string} id
|
|
45
|
+
* @returns {string} e.g. 'compiled/jsonld/3.1.1.1.jsonld'
|
|
46
|
+
*/
|
|
47
|
+
function compiledPath(format, id) {
|
|
48
|
+
return `compiled/${format}/${compiledFilename(format, id)}`;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Checks whether a format name is a known compiled format.
|
|
53
|
+
* @param {string} format
|
|
54
|
+
* @returns {boolean}
|
|
55
|
+
*/
|
|
56
|
+
function isKnownFormat(format) {
|
|
57
|
+
return COMPILED_EXTENSIONS.has(format);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Extracts the entry ID (concept ID or document name) from a compiled-format ZIP path.
|
|
62
|
+
* Returns null if the path doesn't match the expected pattern.
|
|
63
|
+
*
|
|
64
|
+
* @param {string} zipPath - e.g. 'compiled/jsonld/3.1.1.1.jsonld'
|
|
65
|
+
* @returns {{ format: string, id: string } | null}
|
|
66
|
+
*/
|
|
67
|
+
function parseCompiledPath(zipPath) {
|
|
68
|
+
if (!zipPath.startsWith('compiled/')) return null;
|
|
69
|
+
const rest = zipPath.slice('compiled/'.length);
|
|
70
|
+
const slash = rest.indexOf('/');
|
|
71
|
+
if (slash === -1) return null;
|
|
72
|
+
const format = rest.slice(0, slash);
|
|
73
|
+
const ext = COMPILED_EXTENSIONS.get(format);
|
|
74
|
+
if (!ext) return null;
|
|
75
|
+
const filename = rest.slice(slash + 1);
|
|
76
|
+
const suffix = `.${ext}`;
|
|
77
|
+
if (!filename.endsWith(suffix)) return null;
|
|
78
|
+
const id = filename.slice(0, -suffix.length);
|
|
79
|
+
return { format, id };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export {
|
|
83
|
+
COMPILED_EXTENSIONS,
|
|
84
|
+
COMPILED_FORMATS,
|
|
85
|
+
compiledFilename,
|
|
86
|
+
compiledPath,
|
|
87
|
+
isKnownFormat,
|
|
88
|
+
parseCompiledPath,
|
|
89
|
+
};
|
package/src/gcr-reader.d.ts
CHANGED
|
@@ -68,6 +68,23 @@ export class GcrPackage {
|
|
|
68
68
|
eachConcept(callback: (concept: Concept, index: number) => void | Promise<void>): Promise<void>;
|
|
69
69
|
/** Load all concepts into an array. */
|
|
70
70
|
allConcepts(): Promise<Concept[]>;
|
|
71
|
+
|
|
72
|
+
// Compiled / machine formats (TBX, JSON-LD, Turtle, JSONL)
|
|
73
|
+
|
|
74
|
+
/** List compiled format directories present in this package. */
|
|
75
|
+
compiledFormats(): Promise<string[]>;
|
|
76
|
+
/** List entry IDs for a given compiled format. */
|
|
77
|
+
compiledFormatIds(format: string): Promise<string[]>;
|
|
78
|
+
/** Check whether a compiled format is present. */
|
|
79
|
+
hasCompiledFormat(format: string): Promise<boolean>;
|
|
80
|
+
/** Read a single compiled-format file as a string. */
|
|
81
|
+
compiledFile(format: string, id: string): Promise<string | null>;
|
|
82
|
+
/** Read a single compiled-format file as a Uint8Array. */
|
|
83
|
+
compiledFileBuffer(format: string, id: string): Promise<Uint8Array | null>;
|
|
84
|
+
/** Iterate all entries for a compiled format. */
|
|
85
|
+
eachCompiledFile(format: string, callback: (id: string, content: string) => void | Promise<void>): Promise<void>;
|
|
86
|
+
/** Load all entries for a compiled format into a Map. */
|
|
87
|
+
allCompiledFiles(format: string): Promise<Map<string, string>>;
|
|
71
88
|
}
|
|
72
89
|
|
|
73
90
|
/** Parse raw concept YAML (canonical or managed format) into a normalized Concept. */
|
package/src/gcr-reader.js
CHANGED
|
@@ -2,6 +2,7 @@ import JSZip from 'jszip';
|
|
|
2
2
|
import yaml from 'js-yaml';
|
|
3
3
|
import { conceptParser } from './concept-parser.js';
|
|
4
4
|
import { InvalidInputError } from './errors.js';
|
|
5
|
+
import { COMPILED_EXTENSIONS, parseCompiledPath, compiledPath } from './compiled-format.js';
|
|
5
6
|
|
|
6
7
|
const BASE64_RE = /^[A-Za-z0-9+/]{100,}={0,2}$/;
|
|
7
8
|
|
|
@@ -149,6 +150,106 @@ export class GcrPackage {
|
|
|
149
150
|
return concepts;
|
|
150
151
|
}
|
|
151
152
|
|
|
153
|
+
// --- Compiled / machine formats (TBX, JSON-LD, Turtle, JSONL) ---
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* List compiled format directories present in this package.
|
|
157
|
+
* Only returns formats whose `compiled/{format}/` directory contains at least one file.
|
|
158
|
+
* @returns {Promise<string[]>}
|
|
159
|
+
*/
|
|
160
|
+
async compiledFormats() {
|
|
161
|
+
const seen = new Set();
|
|
162
|
+
this._zip.forEach((relativePath, entry) => {
|
|
163
|
+
if (!entry.dir) {
|
|
164
|
+
const parsed = parseCompiledPath(relativePath);
|
|
165
|
+
if (parsed) seen.add(parsed.format);
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
return COMPILED_EXTENSIONS.keys
|
|
169
|
+
? [...COMPILED_EXTENSIONS.keys()].filter((f) => seen.has(f))
|
|
170
|
+
: [...seen];
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* List entry IDs for a given compiled format.
|
|
175
|
+
* @param {string} format - e.g. 'tbx', 'jsonld', 'turtle', 'jsonl'
|
|
176
|
+
* @returns {Promise<string[]>}
|
|
177
|
+
*/
|
|
178
|
+
async compiledFormatIds(format) {
|
|
179
|
+
const prefix = `compiled/${format}/`;
|
|
180
|
+
const ids = [];
|
|
181
|
+
this._zip.forEach((relativePath, entry) => {
|
|
182
|
+
if (!entry.dir && relativePath.startsWith(prefix)) {
|
|
183
|
+
const parsed = parseCompiledPath(relativePath);
|
|
184
|
+
if (parsed && parsed.format === format) ids.push(parsed.id);
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
return ids.sort(naturalSort);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Check whether a compiled format is present.
|
|
192
|
+
* @param {string} format
|
|
193
|
+
* @returns {Promise<boolean>}
|
|
194
|
+
*/
|
|
195
|
+
async hasCompiledFormat(format) {
|
|
196
|
+
const prefix = `compiled/${format}/`;
|
|
197
|
+
let found = false;
|
|
198
|
+
this._zip.forEach((relativePath, entry) => {
|
|
199
|
+
if (!found && !entry.dir && relativePath.startsWith(prefix)) {
|
|
200
|
+
found = true;
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
return found;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Read a single compiled-format file as a string.
|
|
208
|
+
* @param {string} format - e.g. 'jsonld'
|
|
209
|
+
* @param {string} id - concept ID or document name (e.g. '3.1.1.1', 'glossary')
|
|
210
|
+
* @returns {Promise<string | null>} null if the file doesn't exist
|
|
211
|
+
*/
|
|
212
|
+
async compiledFile(format, id) {
|
|
213
|
+
return this._readText(compiledPath(format, id));
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Read a single compiled-format file as a Uint8Array (for binary content).
|
|
218
|
+
* @param {string} format
|
|
219
|
+
* @param {string} id
|
|
220
|
+
* @returns {Promise<Uint8Array | null>}
|
|
221
|
+
*/
|
|
222
|
+
async compiledFileBuffer(format, id) {
|
|
223
|
+
const entry = this._zip.file(compiledPath(format, id));
|
|
224
|
+
if (!entry) return null;
|
|
225
|
+
return entry.async('uint8array');
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Iterate all entries for a compiled format.
|
|
230
|
+
* @param {string} format
|
|
231
|
+
* @param {(id: string, content: string) => void | Promise<void>} callback
|
|
232
|
+
* @returns {Promise<void>}
|
|
233
|
+
*/
|
|
234
|
+
async eachCompiledFile(format, callback) {
|
|
235
|
+
const ids = await this.compiledFormatIds(format);
|
|
236
|
+
for (const id of ids) {
|
|
237
|
+
const content = await this.compiledFile(format, id);
|
|
238
|
+
if (content !== null) await callback(id, content);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Load all entries for a compiled format into a Map (id → content).
|
|
244
|
+
* @param {string} format
|
|
245
|
+
* @returns {Promise<Map<string, string>>}
|
|
246
|
+
*/
|
|
247
|
+
async allCompiledFiles(format) {
|
|
248
|
+
const map = new Map();
|
|
249
|
+
await this.eachCompiledFile(format, (id, content) => { map.set(id, content); });
|
|
250
|
+
return map;
|
|
251
|
+
}
|
|
252
|
+
|
|
152
253
|
/** @private @param {string} filePath @returns {Promise<string | null>} */
|
|
153
254
|
async _readText(filePath) {
|
|
154
255
|
const entry = this._zip.file(filePath);
|
package/src/gcr-writer.d.ts
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { Concept } from './models/index';
|
|
2
2
|
|
|
3
|
+
/** Compiled formats map: format name → id → content string. */
|
|
4
|
+
export type CompiledFormatsMap = Record<string, Record<string, string> | Map<string, string>>;
|
|
5
|
+
|
|
3
6
|
export class GcrWriter {
|
|
4
7
|
static createBuffer(options: {
|
|
5
8
|
concepts: Concept[];
|
|
@@ -7,6 +10,7 @@ export class GcrWriter {
|
|
|
7
10
|
register?: Record<string, unknown>;
|
|
8
11
|
uuidFn?: () => string;
|
|
9
12
|
format?: 'canonical' | 'managed' | 'auto';
|
|
13
|
+
compiledFormats?: CompiledFormatsMap;
|
|
10
14
|
}): Promise<Uint8Array>;
|
|
11
15
|
}
|
|
12
16
|
|
package/src/gcr-writer.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import JSZip from 'jszip';
|
|
2
2
|
import { conceptSerializer } from './concept-serializer.js';
|
|
3
3
|
import { InvalidInputError } from './errors.js';
|
|
4
|
+
import { compiledPath, isKnownFormat } from './compiled-format.js';
|
|
4
5
|
|
|
5
6
|
export class GcrWriter {
|
|
6
7
|
static async createBuffer(options) {
|
|
@@ -29,8 +30,24 @@ export class GcrWriter {
|
|
|
29
30
|
zip.file(`concepts/${concept.id}.yaml`, y);
|
|
30
31
|
}
|
|
31
32
|
|
|
33
|
+
if (options.compiledFormats) {
|
|
34
|
+
GcrWriter._writeCompiledFormats(zip, options.compiledFormats);
|
|
35
|
+
}
|
|
36
|
+
|
|
32
37
|
return zip.generateAsync({ type: 'uint8array' });
|
|
33
38
|
}
|
|
39
|
+
|
|
40
|
+
static _writeCompiledFormats(zip, compiledFormats) {
|
|
41
|
+
for (const [format, entries] of Object.entries(compiledFormats)) {
|
|
42
|
+
if (!isKnownFormat(format)) {
|
|
43
|
+
throw new RangeError(`Unknown compiled format: ${format}`);
|
|
44
|
+
}
|
|
45
|
+
const map = entries instanceof Map ? entries : new Map(Object.entries(entries));
|
|
46
|
+
for (const [id, content] of map) {
|
|
47
|
+
zip.file(compiledPath(format, id), content);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
34
51
|
}
|
|
35
52
|
|
|
36
53
|
export async function createGcr(concepts, metadata) {
|
package/src/index.d.ts
CHANGED
|
@@ -39,3 +39,11 @@ export { V1Reader, migrateV1ToV2 } from './v1-reader';
|
|
|
39
39
|
|
|
40
40
|
// Errors
|
|
41
41
|
export { GlossaristError, InvalidInputError, YamlParseError } from './errors';
|
|
42
|
+
|
|
43
|
+
// Compiled format registry
|
|
44
|
+
export const COMPILED_EXTENSIONS: ReadonlyMap<string, string>;
|
|
45
|
+
export const COMPILED_FORMATS: readonly string[];
|
|
46
|
+
export function isKnownFormat(format: string): boolean;
|
|
47
|
+
export function compiledFilename(format: string, id: string): string;
|
|
48
|
+
export function compiledPath(format: string, id: string): string;
|
|
49
|
+
export function parseCompiledPath(zipPath: string): { format: string; id: string } | null;
|
package/src/index.js
CHANGED
|
@@ -10,6 +10,15 @@ export { ReferenceResolver, Reference, referenceResolver } from './reference-res
|
|
|
10
10
|
export { V1Reader, migrateV1ToV2 } from './v1-reader.js';
|
|
11
11
|
export { GlossaristError, InvalidInputError, YamlParseError } from './errors.js';
|
|
12
12
|
|
|
13
|
+
export {
|
|
14
|
+
COMPILED_EXTENSIONS,
|
|
15
|
+
COMPILED_FORMATS,
|
|
16
|
+
isKnownFormat,
|
|
17
|
+
compiledFilename,
|
|
18
|
+
compiledPath,
|
|
19
|
+
parseCompiledPath,
|
|
20
|
+
} from './compiled-format.js';
|
|
21
|
+
|
|
13
22
|
export {
|
|
14
23
|
GlossaristModel,
|
|
15
24
|
Concept, LocalizedConcept,
|
package/src/models/citation.js
CHANGED
|
@@ -21,6 +21,10 @@ export class Citation extends GlossaristModel {
|
|
|
21
21
|
}
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
+
get isStructured() {
|
|
25
|
+
return typeof this.source === 'object' && this.source !== null;
|
|
26
|
+
}
|
|
27
|
+
|
|
24
28
|
toString() {
|
|
25
29
|
if (this.ref) return this.ref;
|
|
26
30
|
if (typeof this.source === 'string') return this.source;
|