glossarist 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -58,6 +58,66 @@ const buf = await createGcr([concept], { shortname: 'test' });
58
58
  fs.writeFileSync('out.gcr', buf);
59
59
  ```
60
60
 
61
+ ### Compiled / machine formats in GCR
62
+
63
+ GCR packages can contain pre-compiled machine formats (TBX, JSON-LD, Turtle, JSONL) inside a `compiled/` directory. This mirrors the Ruby glossarist gem's `COMPILED_EXTENSIONS` convention.
64
+
65
+ ```js
66
+ import { loadGcr } from 'glossarist';
67
+
68
+ const pkg = await loadGcr(fs.readFileSync('dataset.gcr'));
69
+
70
+ // Discover which compiled formats are present
71
+ const formats = await pkg.compiledFormats(); // ['tbx', 'jsonld', 'turtle']
72
+
73
+ // List entry IDs for a specific format
74
+ const ids = await pkg.compiledFormatIds('jsonld'); // ['3.1.1.1', '3.1.1.2']
75
+
76
+ // Read a single compiled file as string
77
+ const jsonld = await pkg.compiledFile('jsonld', '3.1.1.1');
78
+
79
+ // Read a single compiled file as binary
80
+ const buf = await pkg.compiledFileBuffer('jsonld', '3.1.1.1');
81
+
82
+ // Iterate all entries for a format
83
+ await pkg.eachCompiledFile('turtle', (id, content) => {
84
+ console.log(id, content.length);
85
+ });
86
+
87
+ // Load all entries for a format into a Map
88
+ const allTurtle = await pkg.allCompiledFiles('turtle');
89
+
90
+ // Check if a format is present
91
+ await pkg.hasCompiledFormat('tbx'); // true
92
+ ```
93
+
94
+ #### Write compiled formats into a GCR package
95
+
96
+ ```js
97
+ import { GcrWriter } from 'glossarist';
98
+
99
+ const buf = await GcrWriter.createBuffer({
100
+ concepts: [...],
101
+ metadata: { shortname: 'my-dataset' },
102
+ compiledFormats: {
103
+ tbx: { 'my-dataset': tbxXmlString },
104
+ jsonld: { '3.1.1.1': jsonldString, '3.1.1.2': jsonldString },
105
+ turtle: { '3.1.1.1': ttlString },
106
+ },
107
+ });
108
+ ```
109
+
110
+ #### Format registry
111
+
112
+ ```js
113
+ import { COMPILED_FORMATS, COMPILED_EXTENSIONS, isKnownFormat } from 'glossarist';
114
+
115
+ COMPILED_FORMATS; // ['tbx', 'jsonld', 'turtle', 'jsonl']
116
+ COMPILED_EXTENSIONS.get('tbx'); // 'tbx.xml'
117
+ COMPILED_EXTENSIONS.get('turtle'); // 'ttl'
118
+ isKnownFormat('csv'); // false
119
+ ```
120
+
61
121
  ### Domain model
62
122
 
63
123
  Every domain entity is a class instance with `toJSON()`, `fromJSON()`, `equals()`, and `clone()`:
@@ -208,6 +268,7 @@ Public API (index.js)
208
268
  ├── Parsing → ConceptParser (canonical + managed format detection)
209
269
  ├── Serialization → ConceptSerializer (canonical + managed YAML output)
210
270
  ├── I/O → loadGcr, readConcepts, createGcr, writeConcepts
271
+ ├── Compiled formats → CompiledFormatRegistry (TBX, JSON-LD, Turtle, JSONL in GCR)
211
272
  ├── Collections → ConceptCollection (Proxy-based, queryable), ManagedConceptCollection
212
273
  ├── Validation → ConceptValidator, RegisterValidator, ValidationRule (pluggable)
213
274
  ├── Utilities → conceptUuid, referenceResolver, V1Reader
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "glossarist",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "description": "JavaScript SDK for Glossarist GCR packages — read, write, validate, and manage terminology concepts",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Registry of compiled/machine formats that can be bundled inside a GCR package.
3
+ *
4
+ * Mirrors the Ruby glossarist gem's GcrPackage::COMPILED_EXTENSIONS:
5
+ * tbx → TBX-Basic XML (ISO 30042)
6
+ * jsonld → JSON-LD (SKOS vocabulary)
7
+ * turtle → Turtle/RDF (SKOS vocabulary)
8
+ * jsonl → JSON Lines (one concept per line)
9
+ */
10
+
11
+ /**
12
+ * Maps format name to the file extension used inside the GCR `compiled/` directory.
13
+ * Keys are the canonical format identifiers; values are the extension including
14
+ * the leading dot (for simple lookup) or the multi-part extension for TBX.
15
+ * @type {ReadonlyMap<string, string>}
16
+ */
17
+ const COMPILED_EXTENSIONS = Object.freeze(new Map([
18
+ ['tbx', 'tbx.xml'],
19
+ ['jsonld', 'jsonld'],
20
+ ['turtle', 'ttl'],
21
+ ['jsonl', 'jsonl'],
22
+ ]));
23
+
24
+ /** Canonical format identifiers, in a stable order. */
25
+ const COMPILED_FORMATS = Object.freeze([...COMPILED_EXTENSIONS.keys()]);
26
+
27
+ /**
28
+ * Returns the filename (without directory) for a compiled-format entry.
29
+ *
30
+ * @param {string} format - e.g. 'tbx', 'jsonld'
31
+ * @param {string} id - concept ID or document name (e.g. '3.1.1.1', 'glossary')
32
+ * @returns {string} e.g. '3.1.1.1.jsonld', 'glossary.tbx.xml'
33
+ */
34
+ function compiledFilename(format, id) {
35
+ const ext = COMPILED_EXTENSIONS.get(format);
36
+ if (!ext) throw new RangeError(`Unknown compiled format: ${format}`);
37
+ return `${id}.${ext}`;
38
+ }
39
+
40
+ /**
41
+ * Returns the full ZIP path for a compiled-format entry.
42
+ *
43
+ * @param {string} format
44
+ * @param {string} id
45
+ * @returns {string} e.g. 'compiled/jsonld/3.1.1.1.jsonld'
46
+ */
47
+ function compiledPath(format, id) {
48
+ return `compiled/${format}/${compiledFilename(format, id)}`;
49
+ }
50
+
51
+ /**
52
+ * Checks whether a format name is a known compiled format.
53
+ * @param {string} format
54
+ * @returns {boolean}
55
+ */
56
+ function isKnownFormat(format) {
57
+ return COMPILED_EXTENSIONS.has(format);
58
+ }
59
+
60
+ /**
61
+ * Extracts the entry ID (concept ID or document name) from a compiled-format ZIP path.
62
+ * Returns null if the path doesn't match the expected pattern.
63
+ *
64
+ * @param {string} zipPath - e.g. 'compiled/jsonld/3.1.1.1.jsonld'
65
+ * @returns {{ format: string, id: string } | null}
66
+ */
67
+ function parseCompiledPath(zipPath) {
68
+ if (!zipPath.startsWith('compiled/')) return null;
69
+ const rest = zipPath.slice('compiled/'.length);
70
+ const slash = rest.indexOf('/');
71
+ if (slash === -1) return null;
72
+ const format = rest.slice(0, slash);
73
+ const ext = COMPILED_EXTENSIONS.get(format);
74
+ if (!ext) return null;
75
+ const filename = rest.slice(slash + 1);
76
+ const suffix = `.${ext}`;
77
+ if (!filename.endsWith(suffix)) return null;
78
+ const id = filename.slice(0, -suffix.length);
79
+ return { format, id };
80
+ }
81
+
82
+ export {
83
+ COMPILED_EXTENSIONS,
84
+ COMPILED_FORMATS,
85
+ compiledFilename,
86
+ compiledPath,
87
+ isKnownFormat,
88
+ parseCompiledPath,
89
+ };
@@ -68,6 +68,23 @@ export class GcrPackage {
68
68
  eachConcept(callback: (concept: Concept, index: number) => void | Promise<void>): Promise<void>;
69
69
  /** Load all concepts into an array. */
70
70
  allConcepts(): Promise<Concept[]>;
71
+
72
+ // Compiled / machine formats (TBX, JSON-LD, Turtle, JSONL)
73
+
74
+ /** List compiled format directories present in this package. */
75
+ compiledFormats(): Promise<string[]>;
76
+ /** List entry IDs for a given compiled format. */
77
+ compiledFormatIds(format: string): Promise<string[]>;
78
+ /** Check whether a compiled format is present. */
79
+ hasCompiledFormat(format: string): Promise<boolean>;
80
+ /** Read a single compiled-format file as a string. */
81
+ compiledFile(format: string, id: string): Promise<string | null>;
82
+ /** Read a single compiled-format file as a Uint8Array. */
83
+ compiledFileBuffer(format: string, id: string): Promise<Uint8Array | null>;
84
+ /** Iterate all entries for a compiled format. */
85
+ eachCompiledFile(format: string, callback: (id: string, content: string) => void | Promise<void>): Promise<void>;
86
+ /** Load all entries for a compiled format into a Map. */
87
+ allCompiledFiles(format: string): Promise<Map<string, string>>;
71
88
  }
72
89
 
73
90
  /** Parse raw concept YAML (canonical or managed format) into a normalized Concept. */
package/src/gcr-reader.js CHANGED
@@ -2,6 +2,7 @@ import JSZip from 'jszip';
2
2
  import yaml from 'js-yaml';
3
3
  import { conceptParser } from './concept-parser.js';
4
4
  import { InvalidInputError } from './errors.js';
5
+ import { COMPILED_EXTENSIONS, parseCompiledPath, compiledPath } from './compiled-format.js';
5
6
 
6
7
  const BASE64_RE = /^[A-Za-z0-9+/]{100,}={0,2}$/;
7
8
 
@@ -149,6 +150,106 @@ export class GcrPackage {
149
150
  return concepts;
150
151
  }
151
152
 
153
+ // --- Compiled / machine formats (TBX, JSON-LD, Turtle, JSONL) ---
154
+
155
+ /**
156
+ * List compiled format directories present in this package.
157
+ * Only returns formats whose `compiled/{format}/` directory contains at least one file.
158
+ * @returns {Promise<string[]>}
159
+ */
160
+ async compiledFormats() {
161
+ const seen = new Set();
162
+ this._zip.forEach((relativePath, entry) => {
163
+ if (!entry.dir) {
164
+ const parsed = parseCompiledPath(relativePath);
165
+ if (parsed) seen.add(parsed.format);
166
+ }
167
+ });
168
+ return COMPILED_EXTENSIONS.keys
169
+ ? [...COMPILED_EXTENSIONS.keys()].filter((f) => seen.has(f))
170
+ : [...seen];
171
+ }
172
+
173
+ /**
174
+ * List entry IDs for a given compiled format.
175
+ * @param {string} format - e.g. 'tbx', 'jsonld', 'turtle', 'jsonl'
176
+ * @returns {Promise<string[]>}
177
+ */
178
+ async compiledFormatIds(format) {
179
+ const prefix = `compiled/${format}/`;
180
+ const ids = [];
181
+ this._zip.forEach((relativePath, entry) => {
182
+ if (!entry.dir && relativePath.startsWith(prefix)) {
183
+ const parsed = parseCompiledPath(relativePath);
184
+ if (parsed && parsed.format === format) ids.push(parsed.id);
185
+ }
186
+ });
187
+ return ids.sort(naturalSort);
188
+ }
189
+
190
+ /**
191
+ * Check whether a compiled format is present.
192
+ * @param {string} format
193
+ * @returns {Promise<boolean>}
194
+ */
195
+ async hasCompiledFormat(format) {
196
+ const prefix = `compiled/${format}/`;
197
+ let found = false;
198
+ this._zip.forEach((relativePath, entry) => {
199
+ if (!found && !entry.dir && relativePath.startsWith(prefix)) {
200
+ found = true;
201
+ }
202
+ });
203
+ return found;
204
+ }
205
+
206
+ /**
207
+ * Read a single compiled-format file as a string.
208
+ * @param {string} format - e.g. 'jsonld'
209
+ * @param {string} id - concept ID or document name (e.g. '3.1.1.1', 'glossary')
210
+ * @returns {Promise<string | null>} null if the file doesn't exist
211
+ */
212
+ async compiledFile(format, id) {
213
+ return this._readText(compiledPath(format, id));
214
+ }
215
+
216
+ /**
217
+ * Read a single compiled-format file as a Uint8Array (for binary content).
218
+ * @param {string} format
219
+ * @param {string} id
220
+ * @returns {Promise<Uint8Array | null>}
221
+ */
222
+ async compiledFileBuffer(format, id) {
223
+ const entry = this._zip.file(compiledPath(format, id));
224
+ if (!entry) return null;
225
+ return entry.async('uint8array');
226
+ }
227
+
228
+ /**
229
+ * Iterate all entries for a compiled format.
230
+ * @param {string} format
231
+ * @param {(id: string, content: string) => void | Promise<void>} callback
232
+ * @returns {Promise<void>}
233
+ */
234
+ async eachCompiledFile(format, callback) {
235
+ const ids = await this.compiledFormatIds(format);
236
+ for (const id of ids) {
237
+ const content = await this.compiledFile(format, id);
238
+ if (content !== null) await callback(id, content);
239
+ }
240
+ }
241
+
242
+ /**
243
+ * Load all entries for a compiled format into a Map (id → content).
244
+ * @param {string} format
245
+ * @returns {Promise<Map<string, string>>}
246
+ */
247
+ async allCompiledFiles(format) {
248
+ const map = new Map();
249
+ await this.eachCompiledFile(format, (id, content) => { map.set(id, content); });
250
+ return map;
251
+ }
252
+
152
253
  /** @private @param {string} filePath @returns {Promise<string | null>} */
153
254
  async _readText(filePath) {
154
255
  const entry = this._zip.file(filePath);
@@ -1,5 +1,8 @@
1
1
  import { Concept } from './models/index';
2
2
 
3
+ /** Compiled formats map: format name → id → content string. */
4
+ export type CompiledFormatsMap = Record<string, Record<string, string> | Map<string, string>>;
5
+
3
6
  export class GcrWriter {
4
7
  static createBuffer(options: {
5
8
  concepts: Concept[];
@@ -7,6 +10,7 @@ export class GcrWriter {
7
10
  register?: Record<string, unknown>;
8
11
  uuidFn?: () => string;
9
12
  format?: 'canonical' | 'managed' | 'auto';
13
+ compiledFormats?: CompiledFormatsMap;
10
14
  }): Promise<Uint8Array>;
11
15
  }
12
16
 
package/src/gcr-writer.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import JSZip from 'jszip';
2
2
  import { conceptSerializer } from './concept-serializer.js';
3
3
  import { InvalidInputError } from './errors.js';
4
+ import { compiledPath, isKnownFormat } from './compiled-format.js';
4
5
 
5
6
  export class GcrWriter {
6
7
  static async createBuffer(options) {
@@ -29,8 +30,24 @@ export class GcrWriter {
29
30
  zip.file(`concepts/${concept.id}.yaml`, y);
30
31
  }
31
32
 
33
+ if (options.compiledFormats) {
34
+ GcrWriter._writeCompiledFormats(zip, options.compiledFormats);
35
+ }
36
+
32
37
  return zip.generateAsync({ type: 'uint8array' });
33
38
  }
39
+
40
+ static _writeCompiledFormats(zip, compiledFormats) {
41
+ for (const [format, entries] of Object.entries(compiledFormats)) {
42
+ if (!isKnownFormat(format)) {
43
+ throw new RangeError(`Unknown compiled format: ${format}`);
44
+ }
45
+ const map = entries instanceof Map ? entries : new Map(Object.entries(entries));
46
+ for (const [id, content] of map) {
47
+ zip.file(compiledPath(format, id), content);
48
+ }
49
+ }
50
+ }
34
51
  }
35
52
 
36
53
  export async function createGcr(concepts, metadata) {
package/src/index.d.ts CHANGED
@@ -39,3 +39,11 @@ export { V1Reader, migrateV1ToV2 } from './v1-reader';
39
39
 
40
40
  // Errors
41
41
  export { GlossaristError, InvalidInputError, YamlParseError } from './errors';
42
+
43
+ // Compiled format registry
44
+ export const COMPILED_EXTENSIONS: ReadonlyMap<string, string>;
45
+ export const COMPILED_FORMATS: readonly string[];
46
+ export function isKnownFormat(format: string): boolean;
47
+ export function compiledFilename(format: string, id: string): string;
48
+ export function compiledPath(format: string, id: string): string;
49
+ export function parseCompiledPath(zipPath: string): { format: string; id: string } | null;
package/src/index.js CHANGED
@@ -10,6 +10,15 @@ export { ReferenceResolver, Reference, referenceResolver } from './reference-res
10
10
  export { V1Reader, migrateV1ToV2 } from './v1-reader.js';
11
11
  export { GlossaristError, InvalidInputError, YamlParseError } from './errors.js';
12
12
 
13
+ export {
14
+ COMPILED_EXTENSIONS,
15
+ COMPILED_FORMATS,
16
+ isKnownFormat,
17
+ compiledFilename,
18
+ compiledPath,
19
+ parseCompiledPath,
20
+ } from './compiled-format.js';
21
+
13
22
  export {
14
23
  GlossaristModel,
15
24
  Concept, LocalizedConcept,
@@ -21,6 +21,10 @@ export class Citation extends GlossaristModel {
21
21
  }
22
22
  }
23
23
 
24
+ get isStructured() {
25
+ return typeof this.source === 'object' && this.source !== null;
26
+ }
27
+
24
28
  toString() {
25
29
  if (this.ref) return this.ref;
26
30
  if (typeof this.source === 'string') return this.source;
@@ -70,6 +70,7 @@ export class Citation extends GlossaristModel {
70
70
  readonly version: string | null;
71
71
  readonly clause: string | null;
72
72
  readonly link: string | null;
73
+ readonly isStructured: boolean;
73
74
  toString(): string;
74
75
  }
75
76