@sc-voice/tools 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.mjs CHANGED
@@ -5,9 +5,6 @@ export const ScvMath = {
5
5
  Fraction,
6
6
  };
7
7
 
8
- import {
9
- Aligner, Alignment, AlignmentStatus
10
- } from './src/text/aligner.mjs';
11
8
  import { BilaraPath } from './src/text/bilara-path.mjs';
12
9
  import { EbtDoc } from './src/text/ebt-doc.mjs';
13
10
  import { LegacyDoc } from './src/text/legacy-doc.mjs';
@@ -17,9 +14,6 @@ import { Unicode } from './src/text/unicode.mjs';
17
14
  import { WordSpace } from './src/text/word-space.mjs';
18
15
 
19
16
  export const Text = {
20
- Aligner,
21
- Alignment,
22
- AlignmentStatus,
23
17
  BilaraPath,
24
18
  EbtDoc,
25
19
  LegacyDoc,
@@ -29,3 +23,24 @@ export const Text = {
29
23
  WordSpace,
30
24
  };
31
25
 
26
+ import { default as Sankey } from './src/graph/sankey.mjs';
27
+ export const Graph = {
28
+ Sankey,
29
+ }
30
+
31
+ import {
32
+ Aligner, Alignment, AlignmentStatus
33
+ } from './src/translate/aligner.mjs';
34
+ import { DpdTransformer } from './src/translate/dpd-transformer.mjs';
35
+ import { MockDeepL } from './src/translate/mock-deepl.mjs';
36
+ import { DeepLAdapter } from './src/translate/deepl-adapter.mjs';
37
+ import { QuoteParser } from './src/translate/quote-parser.mjs';
38
+ export const Translate = {
39
+ Aligner,
40
+ Alignment,
41
+ AlignmentStatus,
42
+ DeepLAdapter,
43
+ DpdTransformer,
44
+ MockDeepL,
45
+ QuoteParser,
46
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sc-voice/tools",
3
- "version": "1.1.0",
3
+ "version": "1.3.0",
4
4
  "description": "Utilities for SC-Voice",
5
5
  "main": "index.mjs",
6
6
  "files": [
@@ -31,6 +31,7 @@
31
31
  "homepage": "https://github.com/sc-voice/tools#readme",
32
32
  "devDependencies": {
33
33
  "@biomejs/biome": "1.9.4",
34
+ "deepl-node": "^1.15.0",
34
35
  "eslint": "^9.17.0",
35
36
  "mocha": "^11.0.1",
36
37
  "should": "^13.2.3"
package/src/defines.mjs CHANGED
@@ -1,9 +1,14 @@
1
1
 
2
2
  export const DBG = {
3
- APPLY_WORD_MAP: 0,
4
3
  ALIGN_ALL: 0,
5
4
  ALIGN_LINE: 0,
6
5
  ML_DOC_VECTORS: 0, // 'mn8:3.4',
7
6
  MN8_MOHAN: 0,
7
+ DEEPL_ADAPTER: 0,
8
+ DEEPL_MOCK: 0, // use mock-deepl
9
+ DEEPL_MOCK_XLT: 0, // use mock translation
10
+ DEEPL_TEST_API: 0, // test with live DeepL API ($$$)
11
+ DEEPL_XLT: 0, // test live translation
12
+ WORD_MAP_TRANFORMER: 0,
8
13
  };
9
14
 
@@ -0,0 +1,56 @@
1
+ import { DBG } from '../defines.mjs';
2
+
3
+ export default class Sankey {
4
+ constructor(opts = {}) {
5
+ let { links = [] } = opts;
6
+
7
+ Object.assign(this, {
8
+ links: [...links],
9
+ });
10
+
11
+ let nodeMap = {};
12
+ Object.defineProperty(this, 'nodeMap', { value: nodeMap });
13
+
14
+ // organize links by source/target
15
+ let linkMap = {};
16
+ Object.defineProperty(this, 'linkMap', { value: linkMap });
17
+ this.links.forEach((link) => {
18
+ let { source, target } = link;
19
+ let key = `${source}|${target}`;
20
+ linkMap[key] = link;
21
+ nodeMap[source] = (nodeMap[source] || 0) + 1;
22
+ nodeMap[target] = (nodeMap[target] || 0) + 1;
23
+ });
24
+ }
25
+
26
+ get nodes() {
27
+ let { nodeMap } = this;
28
+ let nodes = Object.keys(nodeMap).map((n) => ({ id: n }));
29
+ return nodes;
30
+ }
31
+
32
+ addLink({ source, target, value = 0 }) {
33
+ const msg = 'Sankey.addLink()';
34
+ const dbg = DBG.SANKEY;
35
+ let { links, linkMap, nodeMap } = this;
36
+ if (source == null) {
37
+ throw new Error(`${msg} source? [${source}]`);
38
+ }
39
+ nodeMap[source] = (nodeMap[source] || 0) + 1;
40
+ if (target == null) {
41
+ throw new Error(`${msg} target? [${target}]`);
42
+ }
43
+ if (Number.isNaN(value)) {
44
+ throw new Error(`${msg} value? [${value}]`);
45
+ }
46
+ nodeMap[target] = (nodeMap[target] || 0) + 1;
47
+
48
+ let key = `${source}|${target}`;
49
+ let link = linkMap[key];
50
+ if (link == null) {
51
+ link = linkMap[key] = { source, target, value: 0 };
52
+ links.push(link);
53
+ }
54
+ link.value += value;
55
+ }
56
+ }
@@ -41,10 +41,18 @@ export class Fraction {
41
41
  return this.numerator;
42
42
  }
43
43
 
44
+ set n(value) {
45
+ return this.numerator = Number(value);
46
+ }
47
+
44
48
  get d() {
45
49
  return this.denominator;
46
50
  }
47
51
 
52
+ set d(value) {
53
+ return this.denominator = Number(value);
54
+ }
55
+
48
56
  get value() {
49
57
  let { numerator, denominator } = this;
50
58
  return numerator / denominator;
@@ -3,7 +3,11 @@ import { SuttaCentralId } from './sutta-central-id.mjs';
3
3
  let privateCtor = false;
4
4
 
5
5
  const INHERITED_KEYS = [
6
- 'lang', 'author', 'author_uid', 'wordSpace'
6
+ 'lang',
7
+ 'author',
8
+ 'author_uid',
9
+ 'wordSpace',
10
+ 'footer',
7
11
  ];
8
12
 
9
13
  const HDR_KEY = '__header__';
@@ -27,7 +31,7 @@ export class EbtDoc {
27
31
 
28
32
  static create(opts = {}) {
29
33
  const msg = 'E4c.create:';
30
- let { suid, bilaraPath, segMap = {}, parent = {} } = opts;
34
+ let { segMap = {}, parent = {}, suid, bilaraPath } = opts;
31
35
  if (segMap == null) {
32
36
  throw new Error(`${msg} segMap?`);
33
37
  }
@@ -1,3 +1,5 @@
1
+ import { DBG } from '../defines.mjs';
2
+
1
3
  let privateCtor = false;
2
4
 
3
5
  const HTML_FILTER = (() => {
@@ -35,13 +37,38 @@ export class LegacyDoc {
35
37
  return true;
36
38
  }
37
39
 
38
- static create(rawDoc) {
40
+ static async fetchLegacy(opts = {}) {
41
+ const msg = 'L7c.fetch:';
42
+ const dbg = DBG.FETCH_LEGACY;
43
+ let {
44
+ endPoint = 'https://suttacentral.net/api/suttas',
45
+ sutta_uid,
46
+ lang,
47
+ author,
48
+ maxBuffer = 10 * 1024 * 1024,
49
+ } = opts;
50
+ let url = [endPoint, sutta_uid, `${author}?lang=${lang}`].join(
51
+ '/',
52
+ );
53
+ let res = await fetch(url);
54
+ if (!res.ok) {
55
+ throw new Error(`${msg} {res.status} ${url}`);
56
+ }
57
+ let json = await res.json();
58
+ let { translation } = json;
59
+ return LegacyDoc.create(translation);
60
+ }
61
+
62
+ static create(translation) {
39
63
  const msg = 'LegacyDoc.create:';
40
64
  if (typeof legacy === 'string') {
41
65
  legacy = JSON.parse(legacy);
42
66
  }
43
67
 
44
- let { uid, lang, title, author, author_uid, text } = rawDoc;
68
+ let { uid, lang, title, author, author_uid, text } = translation;
69
+ if (typeof text === 'string') {
70
+ text = text.split('\n');
71
+ }
45
72
 
46
73
  let para;
47
74
  let lines = text.filter((line) => !HTML_FILTER.test(line));
@@ -63,7 +63,7 @@ class Vector extends Object {
63
63
  }, 0);
64
64
  }
65
65
 
66
- intersect(vec2) {
66
+ intersect(vec2 = {}) {
67
67
  let keys = Object.keys(this);
68
68
  return keys.reduce((a, k) => {
69
69
  let v1 = this[k];
@@ -89,26 +89,21 @@ class Vector extends Object {
89
89
  }
90
90
  } // Vector
91
91
 
92
- export class WordSpace {
93
- constructor(opts = {}) {
94
- let {
95
- lang, // 2-letter code: fr, en, es, pt
96
- minWord = 4, // minimum word length
97
- normalize,
98
- normalizeVector = WordSpace.normalizeVector,
99
- wordMap = {}, // word replacement map
100
- reWordMap,
101
- } = opts;
102
-
103
- wordMap = Object.keys(wordMap).reduce((a, w) => {
92
+ export class WordMapTransformer {
93
+ // DEPRECATED
94
+ constructor(oWordMap = {}, opts = {}) {
95
+ let wordMap = Object.keys(oWordMap).reduce((a, w) => {
104
96
  let wLow = w.toLowerCase();
105
- a[wLow] = wordMap[w].toLowerCase();
97
+ a[wLow] = oWordMap[w].toLowerCase();
106
98
  return a;
107
99
  }, {});
100
+
101
+ let { lang = 'en', normalize } = opts;
102
+
108
103
  if (!normalize) {
109
104
  switch (lang) {
110
105
  case 'fr':
111
- normalize = WordSpace.normalizeFR;
106
+ normalize = WordMapTransformer.normalizeFR;
112
107
  break;
113
108
  default:
114
109
  normalize = (s) => s;
@@ -116,27 +111,8 @@ export class WordSpace {
116
111
  }
117
112
  }
118
113
 
119
- Object.assign(this, {
120
- lang,
121
- minWord,
122
- normalize,
123
- normalizeVector,
124
- reWordMap,
125
- wordMap,
126
- });
127
- }
128
-
129
- static compileWordMap(wordMap) {
130
- return (
131
- wordMap &&
132
- Object.keys(wordMap).map((pat) => {
133
- let rep = wordMap[pat];
134
- return {
135
- re: new RegExp(pat, 'iugm'),
136
- rep,
137
- };
138
- })
139
- );
114
+ this.wordMap = wordMap;
115
+ this.normalize = normalize;
140
116
  }
141
117
 
142
118
  static normalizeFR(s) {
@@ -150,34 +126,92 @@ export class WordSpace {
150
126
  .trim();
151
127
  }
152
128
 
153
- applyWordMap(text) {
154
- const msg = 'W7e.applyWordMap:';
155
- const dbg = DBG.APPLY_WORD_MAP;
156
- let { wordMap, reWordMap } = this;
129
+ #compileWordMap() {
130
+ let { wordMap } = this;
131
+ return Object.keys(wordMap).map((pat) => {
132
+ let rep = wordMap[pat];
133
+ return {
134
+ re: new RegExp(pat, 'iugm'),
135
+ rep,
136
+ };
137
+ });
138
+ }
139
+
140
+ transform(text) {
141
+ const msg = 'W16r.transform:';
142
+ const dbg = DBG.WORD_MAP_TRANSFORMER;
143
+ let { wordMap, reWordMap, normalize } = this;
157
144
  if (reWordMap == null) {
158
- reWordMap = WordSpace.compileWordMap(wordMap);
145
+ reWordMap = this.#compileWordMap();
159
146
  this.reWordMap = reWordMap;
160
147
  }
161
148
  dbg && console.log(msg, { text });
162
- let rslt = text;
149
+ let textMapped = text;
163
150
  for (let i = 0; i < reWordMap.length; i++) {
164
151
  let { re, rep } = reWordMap[i];
165
- rslt = rslt.replaceAll(re, rep);
166
- dbg && console.log(msg, { i, rslt, re });
152
+ textMapped = textMapped.replaceAll(re, rep);
153
+ dbg && console.log(msg, { i, textMapped, re });
167
154
  }
155
+ let rslt = normalize(textMapped)
156
+ .toLowerCase()
157
+ .trim()
158
+ .replace(/[-]/g, ' ')
159
+ .replace(/[.,_:;"'“”‘’!?]/g, '');
168
160
  return rslt;
169
161
  }
162
+ }
163
+
164
+ export class WordSpace {
165
+ constructor(opts = {}) {
166
+ let {
167
+ lang, // 2-letter code: fr, en, es, pt
168
+ minWord = 4, // minimum word length
169
+ normalize,
170
+ normalizeVector = WordSpace.normalizeVector,
171
+ transformText,
172
+ transformer,
173
+ reWordMap,
174
+ } = opts;
175
+
176
+ if (transformer == null) {
177
+ let wordMap = opts.wordMap;
178
+ transformer = new WordMapTransformer(wordMap, {
179
+ lang,
180
+ normalize,
181
+ });
182
+ if (transformText == null) {
183
+ transformText = (text) => transformer.transform(text);
184
+ }
185
+ }
186
+ Object.defineProperty(this, 'transformText', {
187
+ value: transformText,
188
+ });
189
+
190
+ Object.assign(this, {
191
+ lang,
192
+ minWord,
193
+ normalizeVector,
194
+ reWordMap,
195
+ transformer,
196
+ wordMap: opts.wordMap, // DEPRECATED
197
+ });
198
+ }
199
+
200
+ static get WordMapTransformer() {
201
+ return WordMapTransformer;
202
+ }
170
203
 
171
204
  static get Vector() {
172
205
  return Vector;
173
206
  }
174
207
 
175
- static normalizeVector(v) {
176
- let tau = 0.618034; // Golden ratio
208
+ // Golden Ratio fudge factor scales a count of 1 to ~0.8
209
+ // 1.6180339887498948482045868343656381177203091798057628621354
210
+ static normalizeVector(v, scale=1.618033988749895) {
177
211
  let vNew = new Vector(v);
178
212
  Object.entries(v).forEach((e) => {
179
213
  let [key, value] = e;
180
- vNew[key] = 1 - Math.exp(-value / tau);
214
+ vNew[key] = 1 - Math.exp(-value * scale);
181
215
  });
182
216
 
183
217
  return vNew;
@@ -189,13 +223,8 @@ export class WordSpace {
189
223
  throw new Error(`${msg} str?`);
190
224
  }
191
225
  let dbg = 0;
192
- let { normalize, normalizeVector, minWord, wordMap } = this;
193
- let sWordMap = this.applyWordMap(str);
194
- let sNorm = normalize(sWordMap)
195
- .toLowerCase()
196
- .trim()
197
- .replace(/[-]/g, ' ')
198
- .replace(/[.,_:;"'“”‘’!?]/g, '');
226
+ let { normalize, normalizeVector, minWord } = this;
227
+ let sNorm = this.transformText(str);
199
228
  let words = sNorm.split(' ');
200
229
  let v = words.reduce((a, w) => {
201
230
  if (w.length >= minWord) {