@sc-voice/tools 3.30.0 → 3.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/defines.mjs +1 -0
- package/src/math/interval.mjs +3 -3
- package/src/text/color-console.mjs +5 -1
- package/src/text/tfidf-space.mjs +28 -4
package/package.json
CHANGED
package/src/defines.mjs
CHANGED
package/src/math/interval.mjs
CHANGED
|
@@ -2,14 +2,14 @@ import { Unicode } from '../text/unicode.mjs';
|
|
|
2
2
|
const { EMPTY_SET, INFINITY } = Unicode;
|
|
3
3
|
import { ColorConsole } from '../text/color-console.mjs';
|
|
4
4
|
const { cc } = ColorConsole;
|
|
5
|
-
import { DBG } from '../defines.mjs';
|
|
6
5
|
import util from 'node:util';
|
|
6
|
+
import { DBG } from '../defines.mjs';
|
|
7
7
|
|
|
8
8
|
const MINUS_INFINITY = `-${INFINITY}`;
|
|
9
9
|
const PLUS_INFINITY = `+${INFINITY}`;
|
|
10
10
|
|
|
11
11
|
export class Interval {
|
|
12
|
-
static styleText
|
|
12
|
+
static styleText; // (text) => text
|
|
13
13
|
static collapseDegenerate = false;
|
|
14
14
|
|
|
15
15
|
constructor(a, b, opts = {}) {
|
|
@@ -69,7 +69,7 @@ export class Interval {
|
|
|
69
69
|
return INFINITY;
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
-
get size(){
|
|
72
|
+
get size() {
|
|
73
73
|
return this.hi - this.lo;
|
|
74
74
|
}
|
|
75
75
|
|
|
@@ -131,7 +131,11 @@ export class ColorConsole {
|
|
|
131
131
|
}
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
-
isOk(thing, tf
|
|
134
|
+
isOk(thing, tf) {
|
|
135
|
+
if (tf === undefined) {
|
|
136
|
+
tf = thing;
|
|
137
|
+
}
|
|
138
|
+
|
|
135
139
|
let v = this.valueOf(thing);
|
|
136
140
|
let color = tf ? this.okColor2 : this.badColor2;
|
|
137
141
|
return color + v;
|
package/src/text/tfidf-space.mjs
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { DBG } from '../defines.mjs';
|
|
2
2
|
import { Corpus } from './corpus.mjs';
|
|
3
3
|
import { WordVector } from './word-vector.mjs';
|
|
4
|
+
import { ColorConsole } from './color-console.mjs';
|
|
5
|
+
const { cc } = ColorConsole;
|
|
4
6
|
|
|
5
7
|
// The golden ratio is pretty.
|
|
6
8
|
// 1.6180339887498948482045868343656381177203091798057628621354;
|
|
@@ -48,6 +50,22 @@ export class TfidfSpace {
|
|
|
48
50
|
});
|
|
49
51
|
}
|
|
50
52
|
|
|
53
|
+
// Create wordWeight function that weighs the first words
|
|
54
|
+
// of a document more than the remainder
|
|
55
|
+
static wordWeightFromPrefix(prefixLength, prefixBias=0.5) {
|
|
56
|
+
const msg = 't8e.wordWeightFromPrefix';
|
|
57
|
+
|
|
58
|
+
let wordWeight = (w,i,nWords) => {
|
|
59
|
+
const nWeighted = Math.min(nWords, prefixLength);
|
|
60
|
+
const nUnweighted = nWords - nWeighted;
|
|
61
|
+
const wf = nUnweighted ? prefixBias : 1;
|
|
62
|
+
return i < nWeighted
|
|
63
|
+
? wf * nWords / nWeighted
|
|
64
|
+
: (1 - wf) * nWords / nUnweighted;
|
|
65
|
+
}
|
|
66
|
+
return wordWeight;
|
|
67
|
+
}
|
|
68
|
+
|
|
51
69
|
static removeHtml(s) {
|
|
52
70
|
return s.replace(/<[^>]*>/gi, '');
|
|
53
71
|
}
|
|
@@ -190,16 +208,22 @@ export class TfidfSpace {
|
|
|
190
208
|
return { bow, words };
|
|
191
209
|
}
|
|
192
210
|
|
|
193
|
-
bowOfText(text) {
|
|
211
|
+
bowOfText(text, opts={}) {
|
|
194
212
|
const msg = 'w7e.bowOfText:';
|
|
213
|
+
let dbg = DBG.W7E_BOW_OF_TEXT;
|
|
195
214
|
if (text == null) {
|
|
196
215
|
throw new Error(`${msg} text?`);
|
|
197
216
|
}
|
|
198
|
-
let
|
|
217
|
+
let {
|
|
218
|
+
wordWeight = (word,i,n) => 1,
|
|
219
|
+
} = opts;
|
|
199
220
|
let sNorm = this.normalizeText(text);
|
|
200
221
|
let words = sNorm.split(' ');
|
|
201
|
-
let
|
|
202
|
-
|
|
222
|
+
let nWords = words.length;
|
|
223
|
+
let bow = words.reduce((a, word, i) => {
|
|
224
|
+
let ww = wordWeight(word, i, nWords);
|
|
225
|
+
a[word] = (a[word] || 0) + ww;
|
|
226
|
+
dbg && cc.fyi1(msg+0.1, {i, word, ww, sum:a[word]});
|
|
203
227
|
return a;
|
|
204
228
|
}, new WordVector());
|
|
205
229
|
|