@sc-voice/tools 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,12 +1,38 @@
1
- # merkle-json
1
+ # @sc-voice/tools
2
+ Javascript libary for SC-Voice applications
3
+
4
+ * Math
5
+ * Text
6
+ * Graph
7
+
8
+ ## Math.Fraction
9
+ ```
10
+ let f = new Fraction(9, 240, 'segments');
11
+ console.log(f.value); // 0.375
12
+ console.log(f.numerator, f.denominator); // 9 240
13
+ console.log(f.n, f.d); // 9 240
14
+ console.log(f.toString()); // 1/2 segments
15
+ console.log(Fraction.gcd(9, 240)); // 3
16
+ console.log(f.difference); // -150
17
+ console.log(f.remainder); // 9
18
+ console.log(f.percent); // '4%'
19
+ console.log(f.add(new Fraction(1,80)); // new Fraction(4,80)
20
+ ```
21
+
22
+ ## Text.WordSpace
23
+ Used for text similarity comparison, WordSpace creates and compares
24
+ Vectors of words weighted by normalized frequency of occurrence.
25
+ Weights and scores are normalized to the interval [0..1].
26
+ (NOTE: Odd how bag-of-words works better here than fastText?)
27
+
28
+ ## Text.MerkleJson
2
29
  Computing the hash of JSON objects can be tricky because JSON.stringify()
3
30
  does not have a guaranteed string representation of a Javascript object.
4
31
  Specifically, the following are equivalent and valid outputs of JSON.stringify():
5
32
 
6
33
  ```js
7
34
  var json = "{size:{w:100,h:200}}";
8
- var json = "{size:{h:100,w:200}}";
9
- ```
35
+ var json = "{size:{h:100,w:200}}"; ```
10
36
 
11
37
  MerkleJson guarantees a unique hash code for any Javascript object.
12
38
  In addition, MerkleJson is efficient in that it only recalculates
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sc-voice/tools",
3
- "version": "2.0.0",
3
+ "version": "2.2.0",
4
4
  "description": "Utilities for SC-Voice",
5
5
  "main": "index.mjs",
6
6
  "files": [
@@ -41,24 +41,51 @@ export class LogEntry {
41
41
  }
42
42
  }
43
43
 
44
+ const LEVEL_DEBUG = { id: 'D', priority: -1 };
45
+ const LEVEL_INFO = { id: 'I', priority: 0 };
46
+ const LEVEL_WARN = { id: 'W', priority: 1 };
47
+ const LEVEL_ERROR = { id: 'E', priority: 2 };
48
+ const LEVEL_LOG = { id: 'L', priority: 3 };
49
+
44
50
  export class Logger {
45
51
  constructor(opts = {}) {
46
- let { sink = console, msBase = Date.now() } = opts;
52
+ let {
53
+ sink = console,
54
+ msBase = Date.now(),
55
+ logLevel = Logger.LEVEL_WARN,
56
+ } = opts;
47
57
  Object.assign(this, {
48
58
  history: [],
49
59
  sink,
50
60
  msBase,
61
+ logLevel,
51
62
  });
52
63
  }
53
64
 
65
+ static get LEVEL_DEBUG() {
66
+ return LEVEL_DEBUG;
67
+ }
68
+ static get LEVEL_INFO() {
69
+ return LEVEL_INFO;
70
+ }
71
+ static get LEVEL_WARN() {
72
+ return LEVEL_WARN;
73
+ }
74
+ static get LEVEL_ERROR() {
75
+ return LEVEL_ERROR;
76
+ }
77
+ static get LEVEL_LOG() {
78
+ return LEVEL_LOG;
79
+ }
80
+
54
81
  addEntry(level, args, fSink) {
55
82
  const msg = 'l4r.addEntry;';
56
83
  const dbg = DBG.L4R_ADD_ENTRY;
57
- let { history, sink, msBase } = this;
84
+ let { logLevel, history, sink, msBase } = this;
58
85
  let ms = Date.now() - msBase;
59
86
  let entry = LogEntry.fromArgs(level, args, ms);
60
87
  history.push(entry);
61
- if (sink) {
88
+ if (sink && level.priority >= logLevel.priority) {
62
89
  dbg && console.log(msg, 'sink');
63
90
  fSink?.apply(sink, args);
64
91
  }
@@ -66,22 +93,22 @@ export class Logger {
66
93
  }
67
94
 
68
95
  debug(...args) {
69
- return this.addEntry('D', args, this.sink?.debug);
96
+ return this.addEntry(LEVEL_DEBUG, args, this.sink?.debug);
70
97
  }
71
98
 
72
99
  info(...args) {
73
- return this.addEntry('I', args, this.sink?.info);
74
- }
75
-
76
- log(...args) {
77
- return this.addEntry('L', args, this.sink?.log);
100
+ return this.addEntry(LEVEL_INFO, args, this.sink?.info);
78
101
  }
79
102
 
80
103
  warn(...args) {
81
- return this.addEntry('W', args, this.sink?.warn);
104
+ return this.addEntry(LEVEL_WARN, args, this.sink?.warn);
82
105
  }
83
106
 
84
107
  error(...args) {
85
- return this.addEntry('E', args, this.sink?.error);
108
+ return this.addEntry(LEVEL_ERROR, args, this.sink?.error);
109
+ }
110
+
111
+ log(...args) {
112
+ return this.addEntry(LEVEL_INFO, args, this.sink?.log);
86
113
  }
87
114
  }
@@ -1,5 +1,9 @@
1
1
  import { DBG } from '../defines.mjs';
2
2
 
3
+ // The golden ratio is pretty.
4
+ // 1.6180339887498948482045868343656381177203091798057628621354;
5
+ const GOLDEN_FUDGE = 1.618033988749895;
6
+
3
7
  class Vector extends Object {
4
8
  constructor(props) {
5
9
  super();
@@ -49,6 +53,17 @@ class Vector extends Object {
49
53
  }, new Vector(this));
50
54
  }
51
55
 
56
+ increment(vec2) {
57
+ let keys = Object.keys(vec2);
58
+ return keys.reduce((a, k) => {
59
+ let v2 = vec2[k];
60
+ if (v2) {
61
+ a[k] = (a[k] || 0) + v2;
62
+ }
63
+ return a;
64
+ }, this);
65
+ }
66
+
52
67
  dot(vec2) {
53
68
  const msg = 'V4r.dot:';
54
69
  if (vec2 == null) {
@@ -165,6 +180,9 @@ export class WordSpace {
165
180
  constructor(opts = {}) {
166
181
  let {
167
182
  lang, // 2-letter code: fr, en, es, pt
183
+ corpusBow = new Vector(), // corpus bag of words
184
+ corpusSize = 0, // number of retrieval units (docs, segments, etc.)
185
+ idfWeight = GOLDEN_FUDGE, // IDF dampening
168
186
  minWord = 4, // minimum word length
169
187
  normalize,
170
188
  normalizeVector = WordSpace.normalizeVector,
@@ -189,6 +207,9 @@ export class WordSpace {
189
207
 
190
208
  Object.assign(this, {
191
209
  lang,
210
+ corpusBow,
211
+ corpusSize,
212
+ idfWeight,
192
213
  minWord,
193
214
  normalizeVector,
194
215
  reWordMap,
@@ -197,6 +218,12 @@ export class WordSpace {
197
218
  });
198
219
  }
199
220
 
221
+ static createTfIdf() {
222
+ let minWord = 1;
223
+ let normalizeVector = (v) => v;
224
+ return new WordSpace({ minWord, normalizeVector });
225
+ }
226
+
200
227
  static get WordMapTransformer() {
201
228
  return WordMapTransformer;
202
229
  }
@@ -206,8 +233,7 @@ export class WordSpace {
206
233
  }
207
234
 
208
235
  // Golden Ratio fudge factor scales a count of 1 to ~0.8
209
- // 1.6180339887498948482045868343656381177203091798057628621354
210
- static normalizeVector(v, scale = 1.618033988749895) {
236
+ static normalizeVector(v, scale = GOLDEN_FUDGE) {
211
237
  let vNew = new Vector(v);
212
238
  Object.entries(v).forEach((e) => {
213
239
  let [key, value] = e;
@@ -217,8 +243,83 @@ export class WordSpace {
217
243
  return vNew;
218
244
  }
219
245
 
246
+ addDocument(doc) {
247
+ let { corpusBow } = this;
248
+ this.corpusSize += 1;
249
+ let { bow } = this.countWords(doc, 1); // one-hot
250
+ corpusBow.increment(bow);
251
+
252
+ return this;
253
+ }
254
+
255
+ inverseDocumentFrequency(word, idfWeight) {
256
+ return this.idf(word, idfWeight);
257
+ }
258
+
259
+ idf(word, idfWeight = this.idfWeight) {
260
+ const msg = 'w7e.idf:';
261
+ let { corpusBow, corpusSize } = this;
262
+ let wCount = corpusBow[word] || 0;
263
+ // Map to [0:ignore..1:important]
264
+ return corpusSize
265
+ ? 1 - Math.exp(((wCount - corpusSize) / wCount) * idfWeight)
266
+ : 1;
267
+ }
268
+
269
+ termFrequency(word, document) {
270
+ return this.tf(word, document);
271
+ }
272
+
273
+ tf(word, doc) {
274
+ let { bow, words } = this.countWords(doc);
275
+ let count = bow[word] || 0;
276
+ return count ? count / words.length : 0;
277
+ }
278
+
279
+ tfidf(doc) {
280
+ const msg = 'w7e.tfidf:';
281
+ let { corpusBow, corpusSize, idfWeight } = this;
282
+
283
+ // More efficient implementation of tf * idf
284
+ let { bow, words } = this.countWords(doc);
285
+ let nWords = words.length;
286
+
287
+ let vTfIdf = words.reduce((a, word) => {
288
+ let wd = bow[word] || 0;
289
+ let tf = wd ? wd / nWords : 0;
290
+ let wc = corpusBow[word] || 0;
291
+ let idf = corpusSize
292
+ ? 1 - Math.exp(((wc - corpusSize) / wc) * idfWeight)
293
+ : 1;
294
+ let tfidf = tf * idf;
295
+ if (tfidf) {
296
+ a[word] = tfidf;
297
+ }
298
+ return a;
299
+ }, new Vector());
300
+
301
+ return vTfIdf;
302
+ }
303
+
304
+ countWords(str, maxCount) {
305
+ const msg = 'w7e.countWords:';
306
+ if (str == null) {
307
+ throw new Error(`${msg} str?`);
308
+ }
309
+ let dbg = 0;
310
+ let sNorm = this.transformText(str);
311
+ let words = sNorm.split(' ');
312
+ let bow = words.reduce((a, w) => {
313
+ let count = (a[w] || 0) + 1;
314
+ a[w] = maxCount ? Math.min(maxCount, count) : count;
315
+ return a;
316
+ }, new Vector());
317
+
318
+ return { bow, words };
319
+ }
320
+
220
321
  string2Vector(str, scale = 1) {
221
- const msg = 'W7e.string2Vector:';
322
+ const msg = 'w7e.string2Vector:';
222
323
  if (str == null) {
223
324
  throw new Error(`${msg} str?`);
224
325
  }