@sc-voice/tools 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,362 @@
1
+ import { DBG } from '../defines.mjs';
2
+ import { BilaraPath } from './bilara-path.mjs';
3
+
4
+ export class SuttaCentralId {
5
+ constructor(scid = null) {
6
+ if (scid == null) {
7
+ throw new Error(`required scid:${scid}`);
8
+ }
9
+ this.scid = scid;
10
+ }
11
+
12
+ static basename(fp) {
13
+ return fp.split('/').pop();
14
+ }
15
+
16
+ static match(scid, pat) {
17
+ const msg = 'SuttaCentralId.match() ';
18
+ let id = pat.indexOf(':') >= 0 ? scid : scid.split(':')[0];
19
+ let pats = pat.split(/, */);
20
+ if (pats.length > 1) {
21
+ return pats.reduce(
22
+ (a, p) => a || SuttaCentralId.match(scid, p),
23
+ false,
24
+ );
25
+ }
26
+ let scidPat = pat
27
+ .replace(/\/[^:]*/, '') // remove language and translator
28
+ .replace(/ */g, ''); // "thig 1.1" => "thig1.1"
29
+ let scidLow = SuttaCentralId.rangeLow(id);
30
+ let scidHigh = SuttaCentralId.rangeHigh(id);
31
+ let matchLow = SuttaCentralId.rangeLow(scidPat);
32
+ let matchHigh = SuttaCentralId.rangeHigh(scidPat);
33
+ let cmpL = SuttaCentralId.compareLow(scidHigh, matchLow);
34
+ let cmpH = SuttaCentralId.compareHigh(scidLow, matchHigh);
35
+ let match = 0 <= cmpL && cmpH <= 0;
36
+ //console.log(msg, {scidLow, scidHigh, matchLow, matchHigh, cmpL, cmpH, match});
37
+ return match;
38
+ }
39
+
40
+ static rangeHigh(scid) {
41
+ let slashParts = scid.split('/');
42
+ let scidMain = slashParts.shift();
43
+ let suffix = slashParts.join('/');
44
+ let extRanges = scidMain.split('--');
45
+ if (extRanges.length > 2) {
46
+ throw new Error(`Invalid SuttaCentral reference:${scid}`);
47
+ }
48
+ let [c0, c1] = extRanges.map((er) => er.split(':'));
49
+ let result = extRanges.pop();
50
+ if (c1 && c0.length > 1 && c1.length < 2) {
51
+ result = `${c0[0]}:${result}`;
52
+ }
53
+ result =
54
+ c0.length > 1
55
+ ? result.replace(/[0-9]+-/g, '') + '.9999'
56
+ : result.replace(/[0-9]+-/g, '');
57
+ return suffix ? `${result}/${suffix}` : result;
58
+ }
59
+
60
+ static rangeLow(scid) {
61
+ let slashParts = scid.split('/');
62
+ let scidMain = slashParts.shift();
63
+ let suffix = slashParts.join('/');
64
+ let result = scidMain.split('--')[0];
65
+ result = result.replace(/-[0-9]+/g, '');
66
+ return suffix ? `${result}/${suffix}` : result;
67
+ }
68
+
69
+ static test(text) {
70
+ if (typeof text !== 'string') {
71
+ throw new Error(`Expected string:${text}`);
72
+ }
73
+ let commaParts = text
74
+ .toLowerCase()
75
+ .split(',')
76
+ .map((p) => p.trim());
77
+ return commaParts.reduce((acc, part) => {
78
+ part = part.replace(/\. */gu, '.');
79
+ return acc && /^[-a-z]+ ?[0-9]+[-0-9a-z.:\/]*$/i.test(part);
80
+ }, true);
81
+ }
82
+
83
+ static languages(text) {
84
+ if (!SuttaCentralId.test(text)) {
85
+ return [];
86
+ }
87
+ let commaParts = text
88
+ .toLowerCase()
89
+ .split(',')
90
+ .map((p) => p.trim());
91
+ return commaParts.reduce((a, c) => {
92
+ let cparts = c.split('/');
93
+ let lang = cparts[1];
94
+ if (lang && a.indexOf(lang) < 0) {
95
+ a.push(lang);
96
+ }
97
+ return a;
98
+ }, []);
99
+ }
100
+
101
+ static scidRegExp(pat) {
102
+ if (!pat) {
103
+ return /.*/;
104
+ }
105
+
106
+ pat = pat.replace(/\./g, '\\.');
107
+ pat = pat.replace(/\*/g, '.*');
108
+ pat = pat.replace(/\?/g, '.');
109
+ pat = pat.replace(/[$^]/g, '\\$&');
110
+ return new RegExp(pat);
111
+ }
112
+
113
+ static partNumber(part, id) {
114
+ const msg = 'SuttaCentralId.partNumber() ';
115
+ let n = Number(part);
116
+ let n0;
117
+ let n1;
118
+ if (Number.isNaN(n)) {
119
+ let caretParts = part.split('^');
120
+ let [c0, c1] = caretParts;
121
+ if (c1 == null) {
122
+ let c0dig = c0.replace(/[a-z]*/giu, '');
123
+ let c0let = c0.replace(/[0-9]*/gu, '').toLowerCase();
124
+ n0 = Number(c0dig);
125
+ n1 = c0let.charCodeAt(0) - 'a'.charCodeAt(0) + 1;
126
+ if (Number.isNaN(n0) || Number.isNaN(n1)) {
127
+ throw new Error(
128
+ `partNumber() cannot parse ${part} in ${id}`,
129
+ );
130
+ }
131
+ } else {
132
+ n0 = Number(c0);
133
+ n1 = c1.charCodeAt(0) - 'z'.charCodeAt(0) - 1;
134
+ }
135
+
136
+ return [n0, n1];
137
+ } else {
138
+ return [n];
139
+ }
140
+ }
141
+
142
+ static scidNumbersLow(id_or_path) {
143
+ let scid = BilaraPath.pathParts(id_or_path).suid;
144
+ let colonParts = scid.replace(/^[-a-z]*/, '').split(':');
145
+ let dotParts = colonParts.reduce(
146
+ (a, c) => a.concat(c.split('.')),
147
+ [],
148
+ );
149
+ let nums = dotParts.reduce((a, n) => {
150
+ let lowPart = n.split('-')[0];
151
+ return a.concat(SuttaCentralId.partNumber(lowPart, id_or_path));
152
+ }, []);
153
+ return nums;
154
+ }
155
+
156
+ static scidNumbersHigh(id_or_path) {
157
+ let scid = BilaraPath.pathParts(id_or_path).suid;
158
+ let colonParts = scid.replace(/^[-a-z]*/, '').split(':');
159
+ let dotParts = colonParts.reduce(
160
+ (a, c) => a.concat(c.split('.')),
161
+ [],
162
+ );
163
+ let nums = dotParts.reduce((a, n) => {
164
+ let highPart = n.split('-').pop();
165
+ return a.concat(
166
+ SuttaCentralId.partNumber(highPart, id_or_path),
167
+ );
168
+ }, []);
169
+ return nums;
170
+ }
171
+
172
+ static compareHigh(a, b) {
173
+ const msg = 'SuttaCentralId.compareHigh()';
174
+ const dbg = DBG.COMPARE;
175
+ let abase = SuttaCentralId.basename(a);
176
+ let bbase = SuttaCentralId.basename(b);
177
+ let aprefix = abase.substring(0, abase.search(/[0-9]/)) || a;
178
+ let bprefix = bbase.substring(0, bbase.search(/[0-9]/)) || b;
179
+ let cmp = aprefix.localeCompare(bprefix);
180
+ dbg &&
181
+ console.log(msg, '[1]cmp prefix', {
182
+ cmp,
183
+ abase,
184
+ bbase,
185
+ aprefix,
186
+ bprefix,
187
+ });
188
+ if (cmp === 0) {
189
+ let adig = SuttaCentralId.scidNumbersHigh(a);
190
+ let bdig = SuttaCentralId.scidNumbersHigh(b);
191
+ let n = Math.max(adig.length, bdig.length);
192
+ for (let i = 0; i < n; i++) {
193
+ let ai = adig[i];
194
+ let bi = bdig[i];
195
+ if (ai === bi) {
196
+ continue;
197
+ }
198
+ if (ai === undefined) {
199
+ return -bi || -1;
200
+ }
201
+ if (bi === undefined) {
202
+ return ai || 1;
203
+ }
204
+ return ai - bi;
205
+ }
206
+ }
207
+ return cmp;
208
+ }
209
+
210
+ static compareLow(a, b) {
211
+ const msg = 'SuttaCentralId.compareLow()';
212
+ const dbg = DBG.COMPARE;
213
+ let abase = SuttaCentralId.basename(a);
214
+ let bbase = SuttaCentralId.basename(b);
215
+ let adigit = abase.search(/[0-9]/);
216
+ let bdigit = bbase.search(/[0-9]/);
217
+ let aprefix = adigit < 0 ? abase : abase.substring(0, adigit);
218
+ let bprefix = bdigit < 0 ? bbase : bbase.substring(0, bdigit);
219
+ let cmp = aprefix.localeCompare(bprefix);
220
+ dbg &&
221
+ console.log(msg, '[1]cmp prefix', {
222
+ cmp,
223
+ abase,
224
+ bbase,
225
+ aprefix,
226
+ bprefix,
227
+ });
228
+ if (cmp === 0) {
229
+ let adig = SuttaCentralId.scidNumbersLow(abase);
230
+ let bdig = SuttaCentralId.scidNumbersLow(bbase);
231
+ let n = Math.max(adig.length, bdig.length);
232
+ for (let i = 0; i < n; i++) {
233
+ let ai = adig[i];
234
+ let bi = bdig[i];
235
+ if (ai === bi) {
236
+ continue;
237
+ }
238
+ if (ai === undefined) {
239
+ return -bi || -1;
240
+ }
241
+ if (bi === undefined) {
242
+ return ai || 1;
243
+ }
244
+ return ai - bi;
245
+ }
246
+ }
247
+ return cmp;
248
+ }
249
+
250
+ get groups() {
251
+ let tokens = this.scid?.split(':');
252
+ return tokens?.[1] ? tokens[1].split('.') : null;
253
+ }
254
+
255
+ get nikaya() {
256
+ return this.sutta.replace(/[-0-9.]*$/, '');
257
+ }
258
+
259
+ get nikayaFolder() {
260
+ // DEPRECATED
261
+ let majorid = this.sutta.split('.')[0];
262
+ let prefix = majorid.replace(/[0-9-.:]*$/, '');
263
+ let folder =
264
+ {
265
+ bv: `kn/bv`,
266
+ cnd: `kn/cnd`,
267
+ cp: `kn/cp`,
268
+ iti: `kn/iti`,
269
+ ja: `kn/ja`,
270
+ kp: `kn/kp`,
271
+ mil: `kn/mil`,
272
+ mnd: `kn/mnd`,
273
+ ne: `kn/ne`,
274
+ pe: `kn/pe`,
275
+ ps: `kn/ps`,
276
+ pv: `kn/pv`,
277
+ snp: `kn/snp`,
278
+ 'tha-ap': `kn/tha-ap`,
279
+ thag: `kn/thag`,
280
+ 'thi-ap': `kn/thi-ap`,
281
+ thig: `kn/thig`,
282
+ ud: `kn/ud`,
283
+ vv: `kn/vv`,
284
+ 'pli-tv-bi-vb-sk': `pli-tv-bi-vb/pli-tv-bi-vb-sk`,
285
+ }[prefix] ||
286
+ (majorid === this.sutta ? prefix : `${prefix}/${majorid}`);
287
+ return folder;
288
+ }
289
+
290
+ get sutta() {
291
+ return this.scid?.split(':')[0];
292
+ }
293
+
294
+ get parent() {
295
+ let groups = this.groups;
296
+ if (groups == null) {
297
+ return null;
298
+ }
299
+ !groups.pop() && groups.pop();
300
+ if (groups.length === 0) {
301
+ return new SuttaCentralId(`${this.sutta}:`);
302
+ }
303
+ return new SuttaCentralId(`${this.sutta}:${groups.join('.')}.`);
304
+ }
305
+
306
+ standardForm() {
307
+ let std = {
308
+ sn: 'SN',
309
+ mn: 'MN',
310
+ dn: 'DN',
311
+ an: 'AN',
312
+ thig: 'Thig',
313
+ thag: 'Thag',
314
+ };
315
+ let result = this.scid;
316
+ let keys = Object.keys(std);
317
+ for (let i = 0; i < keys.length; i++) {
318
+ let k = keys[i];
319
+ result = result.replace(k, std[k]);
320
+ }
321
+ return result;
322
+ }
323
+
324
+ sectionParts() {
325
+ return this.scid.split(':')[0].split('.');
326
+ }
327
+
328
+ segmentParts() {
329
+ let segid = this.scid.split(':')[1];
330
+ return segid.split('.');
331
+ }
332
+
333
+ add(...args) {
334
+ let prefix = this.nikaya;
335
+ let id = this.scid.substring(prefix.length);
336
+ let id2;
337
+ let colonParts = id.split(':');
338
+ if (colonParts.length > 1) {
339
+ // segment id
340
+ let dotParts = colonParts[1].split('.');
341
+ for (let i = 0; i < dotParts.length; i++) {
342
+ let a = Number(args[i]);
343
+ dotParts[i] = i < args.length ? Number(dotParts[i]) + a : 0;
344
+ }
345
+ id2 = `${colonParts[0]}:${dotParts.join('.')}`;
346
+ } else {
347
+ // document id
348
+ let dotParts = colonParts[0].split('.');
349
+ let n = Math.min(args.length, dotParts.length);
350
+ for (let i = 0; i < n; i++) {
351
+ let a = Number(args[i]);
352
+ dotParts[i] = Number(dotParts[i]) + a;
353
+ }
354
+ id2 = `${dotParts.join('.')}`;
355
+ }
356
+ return new SuttaCentralId(`${prefix}${id2}`);
357
+ }
358
+
359
+ toString() {
360
+ return this.scid;
361
+ }
362
+ } // class SuttaCentralId
@@ -0,0 +1,370 @@
1
+ let SYMBOLS = {
2
+ 0: {
3
+ name: 'digit',
4
+ },
5
+ 1: {
6
+ name: 'digit',
7
+ },
8
+ 2: {
9
+ name: 'digit',
10
+ },
11
+ 3: {
12
+ name: 'digit',
13
+ },
14
+ 4: {
15
+ name: 'digit',
16
+ },
17
+ 5: {
18
+ name: 'digit',
19
+ },
20
+ 6: {
21
+ name: 'digit',
22
+ },
23
+ 7: {
24
+ name: 'digit',
25
+ },
26
+ 8: {
27
+ name: 'digit',
28
+ },
29
+ 9: {
30
+ name: 'digit',
31
+ },
32
+ '\n': {
33
+ name: 'end of line',
34
+ eol: true,
35
+ },
36
+ '!': {
37
+ name: 'exclamation mark',
38
+ isWordTrim: true,
39
+ cuddle: 'left',
40
+ endSegment: true,
41
+ },
42
+ '"': {
43
+ name: 'quotation mark',
44
+ },
45
+ '#': {
46
+ name: 'number sign',
47
+ },
48
+ $: {
49
+ name: 'dollar sign',
50
+ },
51
+ '%': {
52
+ name: 'percent',
53
+ },
54
+ '&': {
55
+ name: 'ampersand',
56
+ },
57
+ "'": {
58
+ name: 'apostrophe',
59
+ isWordTrim: true,
60
+ isWord: false,
61
+ },
62
+ '(': {
63
+ name: 'left parenthesis',
64
+ cuddle: 'right',
65
+ },
66
+ ')': {
67
+ name: 'right parenthesis',
68
+ cuddle: 'left',
69
+ },
70
+ '*': {
71
+ name: 'asterisk',
72
+ },
73
+ '+': {
74
+ name: 'plus sign',
75
+ },
76
+ ',': {
77
+ name: 'comma',
78
+ isWordTrim: true,
79
+ cuddle: 'left',
80
+ },
81
+ '-': {
82
+ name: 'hyphen minus',
83
+ isWord: true,
84
+ },
85
+ '{': {
86
+ name: 'left brace',
87
+ },
88
+ '}': {
89
+ name: 'right brace',
90
+ },
91
+ '.': {
92
+ name: 'full stop',
93
+ isWordTrim: true,
94
+ endSegment: true,
95
+ cuddle: 'left',
96
+ },
97
+ '/': {
98
+ name: 'solidus',
99
+ },
100
+
101
+ ':': {
102
+ name: 'colon',
103
+ },
104
+ ';': {
105
+ isWordTrim: true,
106
+ name: 'semicolon',
107
+ },
108
+ '<': {
109
+ name: 'less-than sign',
110
+ },
111
+ '=': {
112
+ name: 'equals sign',
113
+ },
114
+ '>': {
115
+ name: 'greater-than sign',
116
+ },
117
+ '?': {
118
+ name: 'question mark',
119
+ isWordTrim: true,
120
+ endSegment: true,
121
+ cuddle: 'left',
122
+ },
123
+ '@': {
124
+ name: 'commercial at',
125
+ },
126
+
127
+ '[': {
128
+ name: 'left square bracket',
129
+ cuddle: 'right',
130
+ },
131
+ ']': {
132
+ name: 'right square bracket',
133
+ },
134
+
135
+ '\u2014': {
136
+ name: 'em dash',
137
+ cuddle: 'both',
138
+ },
139
+ '\u2018': {
140
+ name: 'left single quote',
141
+ cuddle: 'right',
142
+ },
143
+ '\u2019': {
144
+ name: 'right single quote',
145
+ isWordTrim: true,
146
+ cuddle: 'left',
147
+ isWord: true,
148
+ },
149
+ '\u02bc': {
150
+ name: 'apostrophe',
151
+ cuddle: 'left',
152
+ isWord: true,
153
+ },
154
+ '\u201C': {
155
+ name: 'left double quote',
156
+ cuddle: 'right',
157
+ },
158
+ '\u201D': {
159
+ name: 'right double quote',
160
+ cuddle: 'left',
161
+ },
162
+ '\u2026': {
163
+ name: 'ellipsis',
164
+ ellipsisBreak: true,
165
+ cuddle: 'left',
166
+ },
167
+ };
168
+
169
+ let ROMANIZE_MAP = {
170
+ '\u0101': 'a',
171
+ '\u1e0d': 'd',
172
+ '\u1e25': 'h',
173
+ '\u012b': 'i',
174
+ '\u1e37': 'l',
175
+ '\u1e39': 'l',
176
+ '\u1e41': 'm',
177
+ '\u1e43': 'm',
178
+ '\u1e47': 'n',
179
+ '\u1e45': 'n',
180
+ '\u00f1': 'n',
181
+ '\u1e5b': 'r',
182
+ '\u1e5d': 'r',
183
+ '\u1e63': 's',
184
+ '\u015b': 's',
185
+ '\u1e6d': 't',
186
+ '\u016b': 'u',
187
+ };
188
+
189
+ export class Unicode {
190
+ static get LSQUOTE() {
191
+ return '\u2018';
192
+ }
193
+ static get RSQUOTE() {
194
+ return '\u2019';
195
+ }
196
+ static get LDQUOTE() {
197
+ return '\u201C';
198
+ }
199
+ static get RDQUOTE() {
200
+ return '\u201D';
201
+ }
202
+ static get HYPHEN() {
203
+ return '\u2010';
204
+ }
205
+ static get APOSTROPHE() {
206
+ return '\u02BC';
207
+ }
208
+ static get ENDASH() {
209
+ return '\u2013';
210
+ }
211
+ static get EMDASH() {
212
+ return '\u2014';
213
+ }
214
+ static get ELLIPSIS() {
215
+ return '\u2026';
216
+ }
217
+ static get A_MACRON() {
218
+ return '\u0100';
219
+ }
220
+ static get a_MACRON() {
221
+ return '\u0101';
222
+ }
223
+ static get u_MACRON() {
224
+ return '\u016d'; /* UTF-8 c5ab */
225
+ }
226
+ static get LEFT_ARROW() {
227
+ return '\u2190';
228
+ }
229
+ static get RIGHT_ARROW() {
230
+ return '\u2192';
231
+ }
232
+ static get LEFT_RIGHT_ARROW() {
233
+ return '\u2194';
234
+ }
235
+ static get CHECKMARK() {
236
+ return '\u2713';
237
+ }
238
+ static get WARNING() {
239
+ return '\u26A0';
240
+ }
241
+ static get RED_X() {
242
+ return '\u274c';
243
+ }
244
+ static get GREEN_CHECKBOX() {
245
+ return '\u2705';
246
+ }
247
+ static get LEFT_GUILLEMET() {
248
+ return '\u00ab';
249
+ }
250
+ static get RIGHT_GUILLEMET() {
251
+ return '\u00bb';
252
+ }
253
+
254
+ static get LINUX_COLOR() {
255
+ return {
256
+ BLACK: '\u001b[30m',
257
+ RED: '\u001b[31m',
258
+ GREEN: '\u001b[32m',
259
+ YELLOW: '\u001b[33m',
260
+ BLUE: '\u001b[34m',
261
+ MAGENTA: '\u001b[35m',
262
+ CYAN: '\u001b[36m',
263
+ WHITE: '\u001b[37m',
264
+ NO_COLOR: '\u001b[0m',
265
+ };
266
+ }
267
+
268
+ constructor(opts = {}) {
269
+ if (opts.romanizeMap == null) {
270
+ Object.defineProperty(this, 'romanizeMap', {
271
+ value: Unicode.ROMANIZE_MAP,
272
+ });
273
+ } else {
274
+ this.romanizeMap = opts.romanizeMap;
275
+ }
276
+ if (opts.symbols == null) {
277
+ Object.defineProperty(this, 'symbols', {
278
+ value: Unicode.SYMBOLS,
279
+ });
280
+ } else {
281
+ this.symbols = opts.symbols; // enumerable
282
+ }
283
+ let syms = Object.keys(this.symbols)
284
+ .sort((a, b) => {
285
+ if (a === b) {
286
+ return 0;
287
+ }
288
+ if (a === '-') {
289
+ return -1;
290
+ }
291
+ if (b === '-') {
292
+ return 1;
293
+ }
294
+ return a.localeCompare(b);
295
+ })
296
+ .join('')
297
+ .replace(']', '\\]');
298
+ Object.defineProperty(this, 'reSymbols', {
299
+ value: new RegExp(`[${syms}]`, 'ugm'),
300
+ });
301
+ }
302
+
303
+ static get ROMANIZE_MAP() {
304
+ return ROMANIZE_MAP;
305
+ }
306
+
307
+ static get SYMBOLS() {
308
+ return SYMBOLS;
309
+ }
310
+
311
+ get LSQUOTE() {
312
+ return Unicode.LSQUOTE;
313
+ }
314
+ get RSQUOTE() {
315
+ return Unicode.RSQUOTE;
316
+ }
317
+ get LDQUOTE() {
318
+ return Unicode.LDQUOTE;
319
+ }
320
+ get RDQUOTE() {
321
+ return Unicode.RDQUOTE;
322
+ }
323
+ get HYPHEN() {
324
+ return Unicode.HYPHEN;
325
+ }
326
+ get APOSTROPHE() {
327
+ return Unicode.APOSTROPHE;
328
+ }
329
+ get ENDASH() {
330
+ return Unicode.ENDASH;
331
+ }
332
+ get EMDASH() {
333
+ return Unicode.EMDASH;
334
+ }
335
+ get ELLIPSIS() {
336
+ return Unicode.ELLIPSIS;
337
+ }
338
+ get A_MACRON() {
339
+ return Unicode.A_MACRON;
340
+ }
341
+ get a_MACRON() {
342
+ return Unicode.a_MACRON;
343
+ }
344
+ get u_MACRON() {
345
+ return Unicode.u_MACRON;
346
+ }
347
+
348
+ stripSymbols(text) {
349
+ return text.replace(this.reSymbols, '');
350
+ }
351
+
352
+ romanize(text) {
353
+ if (this.romanizePats == null) {
354
+ let srcChars = Object.keys(this.romanizeMap);
355
+ Object.defineProperty(this, 'romanizePats', {
356
+ value: srcChars.map((c) => {
357
+ return {
358
+ rep: this.romanizeMap[c],
359
+ pat: new RegExp(c, 'gui'),
360
+ };
361
+ }),
362
+ });
363
+ }
364
+ let result = text.toLowerCase();
365
+ this.romanizePats.forEach((pat, i) => {
366
+ result = result.replace(pat.pat, pat.rep);
367
+ });
368
+ return result;
369
+ }
370
+ }