glossarist 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "glossarist",
3
- "version": "0.4.0",
3
+ "version": "0.4.2",
4
4
  "description": "JavaScript SDK for Glossarist GCR packages — read, write, validate, and manage terminology concepts",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -68,7 +68,7 @@
68
68
  "access": "public"
69
69
  },
70
70
  "dependencies": {
71
- "js-yaml": "^4.1.0",
71
+ "js-yaml": "^5.0.0",
72
72
  "jszip": "^3.10.1"
73
73
  },
74
74
  "devDependencies": {
@@ -86,22 +86,14 @@ export class ConceptCollection {
86
86
  for (const t of lc.terms) {
87
87
  if ((t.designation ?? '').toLowerCase().includes(q)) return true;
88
88
  }
89
- if (this._searchDefs(lc.definitions, q)) return true;
90
- if (this._searchDefs(lc.notes, q)) return true;
91
- if (this._searchDefs(lc.examples, q)) return true;
92
- if (this._searchDefs(lc.annotations, q)) return true;
89
+ }
90
+ for (const { text } of c.walkTexts()) {
91
+ if (text.toLowerCase().includes(q)) return true;
93
92
  }
94
93
  return false;
95
94
  }));
96
95
  }
97
96
 
98
- _searchDefs(arr, q) {
99
- for (const d of arr) {
100
- if ((d.content ?? '').toLowerCase().includes(q)) return true;
101
- }
102
- return false;
103
- }
104
-
105
97
  allLanguages() {
106
98
  const set = new Set();
107
99
  for (const c of this[_items]) {
@@ -1,4 +1,4 @@
1
- import yaml from 'js-yaml';
1
+ import * as yaml from 'js-yaml';
2
2
  import { Concept } from './models/concept.js';
3
3
  import { RelatedConcept } from './models/related-concept.js';
4
4
  import { InvalidInputError, YamlParseError } from './errors.js';
@@ -1,6 +1,6 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
- import yaml from 'js-yaml';
3
+ import * as yaml from 'js-yaml';
4
4
  import { conceptParser } from './concept-parser.js';
5
5
  import { naturalSort } from './sort.js';
6
6
  import { InvalidInputError } from './errors.js';
@@ -1,4 +1,4 @@
1
- import yaml from 'js-yaml';
1
+ import * as yaml from 'js-yaml';
2
2
 
3
3
  const DUMP_OPTS = { lineWidth: -1, noRefs: true, sortKeys: false, skipInvalid: true };
4
4
 
package/src/gcr-reader.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import JSZip from 'jszip';
2
- import yaml from 'js-yaml';
2
+ import * as yaml from 'js-yaml';
3
3
  import { conceptParser } from './concept-parser.js';
4
4
  import { InvalidInputError } from './errors.js';
5
5
  import { COMPILED_FORMATS, parseCompiledPath, compiledPath } from './compiled-format.js';
@@ -37,7 +37,7 @@ const BASE64_RE = /^[A-Za-z0-9+/]{100,}={0,2}$/;
37
37
  * @property {Term[]} terms - term designations
38
38
  * @property {Definition[]} [definition] - definition content
39
39
  * @property {{ content: string }[]} [notes] - editorial notes
40
- * @property {{ content: string }[]} [examples] - usage examples
40
+ * @property {Array<{ content: string, examples?: object[], sources?: object[] }>} [examples] - usage examples (may themselves nest examples)
41
41
  * @property {Source[]} [sources] - bibliographic sources
42
42
  * @property {string} [entry_status] - e.g. 'valid', 'draft'
43
43
  * @property {string} [normative_status] - e.g. 'preferred', 'admitted'
@@ -1,4 +1,4 @@
1
- import yaml from 'js-yaml';
1
+ import * as yaml from 'js-yaml';
2
2
  import { GlossaristModel } from './base.js';
3
3
  import { BibliographyEntry } from './bibliography-entry.js';
4
4
 
@@ -131,6 +131,23 @@ export class Concept extends GlossaristModel {
131
131
  r => FormulaReference.fromJSON(r));
132
132
  }
133
133
 
134
+ /**
135
+ * Yield every content-text fragment in this concept across all
136
+ * localizations, recursing through nested examples. Each yielded
137
+ * `{ text, source }` carries a dotted path rooted at
138
+ * `localizations.<lang>.<slot>[i]...content`.
139
+ *
140
+ * Designations are not included; they live on `LocalizedConcept.terms`
141
+ * and have a different shape.
142
+ */
143
+ *walkTexts() {
144
+ for (const lang of this.languages) {
145
+ const lc = this.localization(lang);
146
+ if (!lc) continue;
147
+ yield* lc.walkTexts(`localizations.${lang}`);
148
+ }
149
+ }
150
+
134
151
  toJSON() {
135
152
  const obj = { id: this.id };
136
153
  if (this.term != null) obj.term = this.term;
@@ -5,19 +5,43 @@ export class DetailedDefinition extends GlossaristModel {
5
5
  constructor(data = {}) {
6
6
  super();
7
7
  this.content = data.content ?? '';
8
- this.sources = (data.sources ?? []).map(
9
- s => s instanceof Citation ? s : new Citation(s)
10
- );
8
+ this._rawSources = data.sources ?? [];
9
+ this._rawExamples = data.examples ?? [];
10
+ this._sources = null;
11
+ this._examples = null;
12
+ }
13
+
14
+ get sources() {
15
+ return this._lazy('_sources', '_rawSources',
16
+ s => s instanceof Citation ? s : new Citation(s));
17
+ }
18
+
19
+ get examples() {
20
+ return this._lazy('_examples', '_rawExamples',
21
+ e => e instanceof DetailedDefinition ? e : new DetailedDefinition(e));
11
22
  }
12
23
 
13
24
  toJSON() {
14
25
  const obj = { content: this.content };
15
- if (this.sources.length > 0) {
16
- obj.sources = this.sources.map(s => s.toJSON());
17
- }
26
+ this._serialize(obj, 'sources', '_sources', '_rawSources');
27
+ this._serialize(obj, 'examples', '_examples', '_rawExamples');
18
28
  return obj;
19
29
  }
20
30
 
31
+ /**
32
+ * Yield this definition's content and the content of every nested
33
+ * example (recursively). Each item carries `{ text, source }` where
34
+ * `source` is `<path>.content` rooted at the `path` argument.
35
+ */
36
+ *walkTexts(path) {
37
+ if (typeof this.content === 'string' && this.content.length > 0) {
38
+ yield { text: this.content, source: `${path}.content` };
39
+ }
40
+ for (let i = 0; i < this.examples.length; i++) {
41
+ yield* this.examples[i].walkTexts(`${path}.examples[${i}]`);
42
+ }
43
+ }
44
+
21
45
  static fromJSON(data) {
22
46
  return new DetailedDefinition(data);
23
47
  }
@@ -1,4 +1,4 @@
1
- import yaml from 'js-yaml';
1
+ import * as yaml from 'js-yaml';
2
2
  import { GlossaristModel } from './base.js';
3
3
  import { GcrStatistics } from './gcr-statistics.js';
4
4
 
@@ -65,6 +65,7 @@ export class Concept extends GlossaristModel {
65
65
  definition(lang: string): string | null;
66
66
  setLocalization(lang: string, lc: LocalizedConcept | Record<string, unknown>): this;
67
67
  hasLocalization(lang: string): boolean;
68
+ walkTexts(): Iterable<{ text: string; source: string }>;
68
69
  static fromJSON(data: Record<string, unknown>): Concept;
69
70
  }
70
71
 
@@ -96,6 +97,7 @@ export class LocalizedConcept extends GlossaristModel {
96
97
  readonly related: RelatedConcept[];
97
98
  readonly primaryDesignation: string | null;
98
99
  readonly primaryDefinition: string | null;
100
+ walkTexts(basePath: string): Iterable<{ text: string; source: string }>;
99
101
  static fromJSON(data: Record<string, unknown>): LocalizedConcept;
100
102
  }
101
103
 
@@ -243,6 +245,9 @@ export class ConceptDate extends GlossaristModel {
243
245
  export class DetailedDefinition extends GlossaristModel {
244
246
  readonly content: string;
245
247
  readonly sources: ConceptSource[];
248
+ readonly examples: DetailedDefinition[];
249
+ walkTexts(path: string): Iterable<{ text: string; source: string }>;
250
+ static fromJSON(data: Record<string, unknown>): DetailedDefinition;
246
251
  }
247
252
 
248
253
  export class NonVerbRep extends GlossaristModel {
@@ -99,6 +99,27 @@ export class LocalizedConcept extends GlossaristModel {
99
99
  return this.definitions[0]?.content ?? null;
100
100
  }
101
101
 
102
+ *_textSlots() {
103
+ yield ['definitions', this.definitions];
104
+ yield ['notes', this.notes];
105
+ yield ['examples', this.examples];
106
+ yield ['annotations', this.annotations];
107
+ }
108
+
109
+ /**
110
+ * Yield every content-text fragment in this localization, recursing
111
+ * through nested examples. `basePath` prefixes every emitted
112
+ * `source` path; pass `localizations.<lang>` to get paths consistent
113
+ * with the rest of the codebase. Designations are not included.
114
+ */
115
+ *walkTexts(basePath) {
116
+ for (const [name, arr] of this._textSlots()) {
117
+ for (let i = 0; i < arr.length; i++) {
118
+ yield* arr[i].walkTexts(`${basePath}.${name}[${i}]`);
119
+ }
120
+ }
121
+ }
122
+
102
123
  toJSON() {
103
124
  const obj = {};
104
125
  if (this.languageCode) obj.language_code = this.languageCode;
@@ -101,8 +101,7 @@ export class ReferenceResolver {
101
101
  }
102
102
  }
103
103
 
104
- const texts = this._collectTexts(lc, lang);
105
- for (const { text, source } of texts) {
104
+ for (const { text, source } of lc.walkTexts(`localizations.${lang}`)) {
106
105
  for (const ref of this._extractFromText(text, source, concept)) {
107
106
  refs.push(ref);
108
107
  }
@@ -122,35 +121,6 @@ export class ReferenceResolver {
122
121
  });
123
122
  }
124
123
 
125
- _collectTexts(lc, lang) {
126
- const out = [];
127
- for (let i = 0; i < lc.definitions.length; i++) {
128
- const content = lc.definitions[i]?.content;
129
- if (typeof content === 'string') {
130
- out.push({ text: content, source: `localizations.${lang}.definitions[${i}].content` });
131
- }
132
- }
133
- for (let i = 0; i < lc.notes.length; i++) {
134
- const content = lc.notes[i]?.content;
135
- if (content) {
136
- out.push({ text: content, source: `localizations.${lang}.notes[${i}].content` });
137
- }
138
- }
139
- for (let i = 0; i < lc.examples.length; i++) {
140
- const content = lc.examples[i]?.content;
141
- if (typeof content === 'string') {
142
- out.push({ text: content, source: `localizations.${lang}.examples[${i}].content` });
143
- }
144
- }
145
- for (let i = 0; i < lc.annotations.length; i++) {
146
- const content = lc.annotations[i]?.content;
147
- if (typeof content === 'string') {
148
- out.push({ text: content, source: `localizations.${lang}.annotations[${i}].content` });
149
- }
150
- }
151
- return out;
152
- }
153
-
154
124
  _extractFromText(text, source, concept) {
155
125
  const refs = [];
156
126
  const re = /\{\{([^{}]*?)\}\}/g;
package/src/v1-reader.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
- import yaml from 'js-yaml';
3
+ import * as yaml from 'js-yaml';
4
4
  import { Concept } from './models/concept.js';
5
5
  import { InvalidInputError } from './errors.js';
6
6
 
@@ -1,4 +1,4 @@
1
- import yaml from 'js-yaml';
1
+ import * as yaml from 'js-yaml';
2
2
  import { DATASET_ASSETS } from '../dataset-asset.js';
3
3
  import { ValidationResult } from './validation-result.js';
4
4
 
@@ -162,36 +162,14 @@ const CITE_MENTION_RE = /\{\{\s*cite:([^,}\s]+)[^}]*?\}\}/g;
162
162
 
163
163
  function _findCiteMentions(concept) {
164
164
  const mentions = [];
165
- const walkText = (text, source) => {
166
- if (typeof text !== 'string' || text.length === 0) return;
165
+ for (const { text, source } of concept.walkTexts()) {
166
+ if (typeof text !== 'string' || text.length === 0) continue;
167
167
  CITE_MENTION_RE.lastIndex = 0;
168
168
  let m;
169
169
  while ((m = CITE_MENTION_RE.exec(text)) !== null) {
170
170
  mentions.push({ key: m[1].trim(), source });
171
171
  }
172
- };
173
-
174
- for (const lang of concept.languages) {
175
- const lc = concept.localization(lang);
176
- if (!lc) continue;
177
-
178
- for (let i = 0; lc.definitions[i]; i++) {
179
- walkText(lc.definitions[i]?.content, `localizations.${lang}.definitions[${i}].content`);
180
- }
181
- for (let i = 0; lc.notes[i]; i++) {
182
- const content = typeof lc.notes[i] === 'object'
183
- ? (lc.notes[i]?.content ?? '')
184
- : String(lc.notes[i] ?? '');
185
- walkText(content, `localizations.${lang}.notes[${i}].content`);
186
- }
187
- for (let i = 0; lc.examples[i]; i++) {
188
- walkText(lc.examples[i]?.content, `localizations.${lang}.examples[${i}].content`);
189
- }
190
- for (let i = 0; lc.annotations[i]; i++) {
191
- walkText(lc.annotations[i]?.content, `localizations.${lang}.annotations[${i}].content`);
192
- }
193
172
  }
194
-
195
173
  return mentions;
196
174
  }
197
175
 
@@ -297,25 +275,8 @@ function _findNvrMentions(concept) {
297
275
  }
298
276
  };
299
277
 
300
- for (const lang of concept.languages) {
301
- const lc = concept.localization(lang);
302
- if (!lc) continue;
303
- for (let i = 0; i < lc.definitions.length; i++) {
304
- walkText(lc.definitions[i]?.content,
305
- `localizations.${lang}.definitions[${i}].content`);
306
- }
307
- for (let i = 0; i < lc.notes.length; i++) {
308
- walkText(lc.notes[i]?.content,
309
- `localizations.${lang}.notes[${i}].content`);
310
- }
311
- for (let i = 0; i < lc.examples.length; i++) {
312
- walkText(lc.examples[i]?.content,
313
- `localizations.${lang}.examples[${i}].content`);
314
- }
315
- for (let i = 0; i < lc.annotations.length; i++) {
316
- walkText(lc.annotations[i]?.content,
317
- `localizations.${lang}.annotations[${i}].content`);
318
- }
278
+ for (const { text, source } of concept.walkTexts()) {
279
+ walkText(text, source);
319
280
  }
320
281
  return mentions;
321
282
  }