@sc-voice/tools 3.30.0 → 3.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sc-voice/tools",
3
- "version": "3.30.0",
3
+ "version": "3.31.0",
4
4
  "description": "Utilities for SC-Voice",
5
5
  "main": "index.mjs",
6
6
  "files": [
package/src/defines.mjs CHANGED
@@ -15,5 +15,6 @@ export const DBG = {
15
15
  DEEPL_XLT: 0, // test live translation
16
16
  L7C_FETCH_LEGACY: 0,
17
17
  L7C_FETCH_LEGACY_SC: 0, // ignore test cache and use SC
18
+ W7E_BOW_OF_TEXT: 0,
18
19
  WORD_MAP_TRANFORMER: 0,
19
20
  };
@@ -2,14 +2,14 @@ import { Unicode } from '../text/unicode.mjs';
2
2
  const { EMPTY_SET, INFINITY } = Unicode;
3
3
  import { ColorConsole } from '../text/color-console.mjs';
4
4
  const { cc } = ColorConsole;
5
- import { DBG } from '../defines.mjs';
6
5
  import util from 'node:util';
6
+ import { DBG } from '../defines.mjs';
7
7
 
8
8
  const MINUS_INFINITY = `-${INFINITY}`;
9
9
  const PLUS_INFINITY = `+${INFINITY}`;
10
10
 
11
11
  export class Interval {
12
- static styleText = (text)=>text;
12
+ static styleText; // (text) => text
13
13
  static collapseDegenerate = false;
14
14
 
15
15
  constructor(a, b, opts = {}) {
@@ -69,7 +69,7 @@ export class Interval {
69
69
  return INFINITY;
70
70
  }
71
71
 
72
- get size(){
72
+ get size() {
73
73
  return this.hi - this.lo;
74
74
  }
75
75
 
@@ -1,6 +1,8 @@
1
1
  import { DBG } from '../defines.mjs';
2
2
  import { Corpus } from './corpus.mjs';
3
3
  import { WordVector } from './word-vector.mjs';
4
+ import { ColorConsole } from './color-console.mjs';
5
+ const { cc } = ColorConsole;
4
6
 
5
7
  // The golden ratio is pretty.
6
8
  // 1.6180339887498948482045868343656381177203091798057628621354;
@@ -48,6 +50,22 @@ export class TfidfSpace {
48
50
  });
49
51
  }
50
52
 
53
+ // Create wordWeight function that weighs the first words
54
+ // of a document more than the remainder
55
+ static wordWeightFromPrefix(prefixLength, prefixBias=0.5) {
56
+ const msg = 't8e.wordWeightFromPrefix';
57
+
58
+ let wordWeight = (w,i,nWords) => {
59
+ const nWeighted = Math.min(nWords, prefixLength);
60
+ const nUnweighted = nWords - nWeighted;
61
+ const wf = nUnweighted ? prefixBias : 1;
62
+ return i < nWeighted
63
+ ? wf * nWords / nWeighted
64
+ : (1 - wf) * nWords / nUnweighted;
65
+ }
66
+ return wordWeight;
67
+ }
68
+
51
69
  static removeHtml(s) {
52
70
  return s.replace(/<[^>]*>/gi, '');
53
71
  }
@@ -190,16 +208,22 @@ export class TfidfSpace {
190
208
  return { bow, words };
191
209
  }
192
210
 
193
- bowOfText(text) {
211
+ bowOfText(text, opts={}) {
194
212
  const msg = 'w7e.bowOfText:';
213
+ let dbg = DBG.W7E_BOW_OF_TEXT;
195
214
  if (text == null) {
196
215
  throw new Error(`${msg} text?`);
197
216
  }
198
- let dbg = 0;
217
+ let {
218
+ wordWeight = (word,i,n) => 1,
219
+ } = opts;
199
220
  let sNorm = this.normalizeText(text);
200
221
  let words = sNorm.split(' ');
201
- let bow = words.reduce((a, w) => {
202
- a[w] = (a[w] || 0) + 1;
222
+ let nWords = words.length;
223
+ let bow = words.reduce((a, word, i) => {
224
+ let ww = wordWeight(word, i, nWords);
225
+ a[word] = (a[word] || 0) + ww;
226
+ dbg && cc.fyi1(msg+0.1, {i, word, ww, sum:a[word]});
203
227
  return a;
204
228
  }, new WordVector());
205
229