wingbot 3.67.19 → 3.67.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/AiMatching.js +17 -3
- package/src/Request.js +4 -1
- package/src/fuzzy/factoryFuzzySearch.js +3 -3
- package/src/fuzzy/levenshtein.js +60 -17
package/package.json
CHANGED
package/src/AiMatching.js
CHANGED
|
@@ -123,6 +123,8 @@ const COMPARE = {
|
|
|
123
123
|
* @prop {number} confidence
|
|
124
124
|
*/
|
|
125
125
|
|
|
126
|
+
const ENTITY_OK = 0.79; // 0.835 on NLP;
|
|
127
|
+
|
|
126
128
|
/**
|
|
127
129
|
* @class {AiMatching}
|
|
128
130
|
*
|
|
@@ -650,6 +652,7 @@ class AiMatching {
|
|
|
650
652
|
|
|
651
653
|
const allOptional = wantedEntities.every((e) => e.optional
|
|
652
654
|
&& (!e.op || useEntities.every((n) => n.entity !== e.entity)));
|
|
655
|
+
|
|
653
656
|
if (entitiesScore <= 0 && !allOptional) {
|
|
654
657
|
return { score: 0, entities: [] };
|
|
655
658
|
}
|
|
@@ -698,6 +701,8 @@ class AiMatching {
|
|
|
698
701
|
let fromState = 0;
|
|
699
702
|
let metl = 0;
|
|
700
703
|
|
|
704
|
+
let optHandicap = 0;
|
|
705
|
+
|
|
701
706
|
for (const wanted of wantedEntities) {
|
|
702
707
|
const usedIndexes = occurences.has(wanted.entity)
|
|
703
708
|
? occurences.get(wanted.entity)
|
|
@@ -706,7 +711,9 @@ class AiMatching {
|
|
|
706
711
|
let entityExists = false;
|
|
707
712
|
const index = requestEntities
|
|
708
713
|
.findIndex((e, i) => {
|
|
709
|
-
if (e.entity !== wanted.entity
|
|
714
|
+
if (e.entity !== wanted.entity
|
|
715
|
+
|| usedIndexes.includes(i)
|
|
716
|
+
|| e.score < ENTITY_OK) {
|
|
710
717
|
return false;
|
|
711
718
|
}
|
|
712
719
|
entityExists = true;
|
|
@@ -754,14 +761,21 @@ class AiMatching {
|
|
|
754
761
|
}
|
|
755
762
|
|
|
756
763
|
if (!matching) { // && optional && !entityExists
|
|
757
|
-
|
|
764
|
+
if (optHandicap < this.redundantEntityHandicap) {
|
|
765
|
+
handicap += this.redundantEntityHandicap;
|
|
766
|
+
} else {
|
|
767
|
+
handicap += this.optionalHandicap;
|
|
768
|
+
}
|
|
769
|
+
optHandicap += this.redundantEntityHandicap;
|
|
758
770
|
continue;
|
|
759
771
|
}
|
|
760
772
|
|
|
761
773
|
if (wanted.optional) {
|
|
762
|
-
|
|
774
|
+
const oph = wanted.op
|
|
763
775
|
? this.optionalEqualityHandicap
|
|
764
776
|
: this.optionalHandicap;
|
|
777
|
+
|
|
778
|
+
handicap += oph;
|
|
765
779
|
}
|
|
766
780
|
|
|
767
781
|
if (wanted.op === COMPARE.NOT_EQUAL) {
|
package/src/Request.js
CHANGED
|
@@ -1649,11 +1649,14 @@ It looks like the bot isn't connected to class BotApp or the Processor is used w
|
|
|
1649
1649
|
entity,
|
|
1650
1650
|
value,
|
|
1651
1651
|
score = 1,
|
|
1652
|
+
entityScore = Math.max(score, 0.8),
|
|
1652
1653
|
timestamp = makeTimestamp()
|
|
1653
1654
|
) {
|
|
1654
1655
|
const res = Request.text(senderId, text, timestamp);
|
|
1655
1656
|
|
|
1656
|
-
return Request.addIntentToRequest(res, intent, [
|
|
1657
|
+
return Request.addIntentToRequest(res, intent, [
|
|
1658
|
+
{ entity, value, score: entityScore }
|
|
1659
|
+
], score);
|
|
1657
1660
|
}
|
|
1658
1661
|
|
|
1659
1662
|
static quickReply (senderId, action, data = {}, timestamp = makeTimestamp()) {
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
const { shortArrayIndex, splitToNgrams, cleanup } = require('./fuzzyUtils');
|
|
7
7
|
const {
|
|
8
|
-
|
|
8
|
+
multiwordLevenshtein, SEED_FUZZY, SEED_FUZZY_MULTIPLICATOR, WORD_HANDICAP_K_FUZZY
|
|
9
9
|
} = require('./levenshtein');
|
|
10
10
|
|
|
11
11
|
const LOWER_DUPLICATES = 0.9;
|
|
@@ -93,7 +93,7 @@ function searchFnFactory (indexMap, ngramCounts, entities, maxIdf, {
|
|
|
93
93
|
const [, entityIndex, cleanText] = ngramCounts[id];
|
|
94
94
|
const [entity, value] = entities[entityIndex];
|
|
95
95
|
const relIdf = (idf / cnt) / maxIdf;
|
|
96
|
-
let score =
|
|
96
|
+
let score = multiwordLevenshtein(
|
|
97
97
|
cleanText,
|
|
98
98
|
cleanQuery,
|
|
99
99
|
levenshteinSeed,
|
|
@@ -103,7 +103,7 @@ function searchFnFactory (indexMap, ngramCounts, entities, maxIdf, {
|
|
|
103
103
|
|
|
104
104
|
if (cleanQuery.match(/^[^\s]{1,3}\s+.{6,}$/)) {
|
|
105
105
|
const without = cleanQuery.replace(/^[^\s]{1,3}\s+/, '');
|
|
106
|
-
const altScore =
|
|
106
|
+
const altScore = multiwordLevenshtein(
|
|
107
107
|
cleanText,
|
|
108
108
|
without,
|
|
109
109
|
levenshteinSeed,
|
package/src/fuzzy/levenshtein.js
CHANGED
|
@@ -128,42 +128,68 @@ function addSeed (seed, len, value, base = seed) {
|
|
|
128
128
|
return base + (((len - value) / len) * (1 - seed));
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
+
function numStats (left, right) {
|
|
132
|
+
const len = Math.max(left.length, right.length);
|
|
133
|
+
const leftNum = left.replace(/[^0-9]+/g, '');
|
|
134
|
+
const rightNum = right.replace(/[^0-9]+/g, '');
|
|
135
|
+
const numLen = Math.max(leftNum.length, rightNum.length);
|
|
136
|
+
const numLev = numLen ? levenshtein(leftNum, rightNum) * NUMERIC_KOEF : 0;
|
|
137
|
+
const numRelLen = len ? numLen / len : 0;
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
len,
|
|
141
|
+
leftNum,
|
|
142
|
+
rightNum,
|
|
143
|
+
numLen,
|
|
144
|
+
numLev,
|
|
145
|
+
numRelLen
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const STRING_REPLACER = /[^\s-]+/g;
|
|
150
|
+
const STRING_SPLITTER = /\s+/g;
|
|
151
|
+
|
|
131
152
|
/**
|
|
132
153
|
*
|
|
133
154
|
* @param {string} left - training data
|
|
134
155
|
* @param {string} right - query
|
|
135
156
|
* @param {number} [seed]
|
|
136
157
|
* @param {number} [wordKoef]
|
|
158
|
+
* @param {object} [nums]
|
|
137
159
|
* @returns {number}
|
|
138
160
|
*/
|
|
139
161
|
function relativeLevenshtein (
|
|
140
162
|
left,
|
|
141
163
|
right,
|
|
142
164
|
seed = SEED_DEFAULT,
|
|
143
|
-
wordKoef = WORD_HANDICAP_K_DEFAULT
|
|
165
|
+
wordKoef = WORD_HANDICAP_K_DEFAULT,
|
|
166
|
+
nums = numStats(left, right)
|
|
144
167
|
) {
|
|
145
|
-
|
|
146
|
-
if (!len) {
|
|
168
|
+
if (nums.len === 0) {
|
|
147
169
|
return 0;
|
|
148
170
|
}
|
|
149
171
|
let stemLen = Math.min(left.length, right.length);
|
|
150
172
|
|
|
151
|
-
const leftWordCount = (left.match(
|
|
152
|
-
const rightWordCount = (right.match(
|
|
173
|
+
const leftWordCount = (left.match(STRING_REPLACER) || ['']).length;
|
|
174
|
+
const rightWordCount = (right.match(STRING_REPLACER) || ['']).length;
|
|
153
175
|
|
|
154
176
|
const wordDiff = Math.max(0, rightWordCount - leftWordCount);
|
|
155
177
|
const wordHandicap = (wordKoef ** wordDiff);
|
|
156
178
|
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
179
|
+
const l = levenshtein(left, right);
|
|
180
|
+
|
|
181
|
+
if (nums.numRelLen >= 0.5) {
|
|
182
|
+
const max = nums.len * 0.2; // 1/5 allowed error
|
|
183
|
+
const s = (l / max) * 0.25;
|
|
184
|
+
|
|
185
|
+
return Math.max(0, 1 - s) * wordHandicap;
|
|
186
|
+
}
|
|
161
187
|
|
|
162
188
|
if (stemLen < 3) {
|
|
163
|
-
return addSeed(seed, len + numLen,
|
|
189
|
+
return addSeed(seed, nums.len + nums.numLen, l + nums.numLev) * wordHandicap;
|
|
164
190
|
}
|
|
165
191
|
|
|
166
|
-
let diff = len - stemLen;
|
|
192
|
+
let diff = nums.len - stemLen;
|
|
167
193
|
|
|
168
194
|
if (diff <= 2) {
|
|
169
195
|
diff += 2;
|
|
@@ -184,7 +210,7 @@ function relativeLevenshtein (
|
|
|
184
210
|
diffWeight = (diff - 1) * SUFFIX_WEIGHT;
|
|
185
211
|
}
|
|
186
212
|
|
|
187
|
-
const vStem = addSeed(seed, stemLen + numLen, stemLev + numLev, seed - diffWeight);
|
|
213
|
+
const vStem = addSeed(seed, stemLen + nums.numLen, stemLev + nums.numLev, seed - diffWeight);
|
|
188
214
|
const vSuffix = addSeed(1 - diffWeight, diff, suffLev, 0);
|
|
189
215
|
|
|
190
216
|
const r = (vStem + vSuffix) * wordHandicap;
|
|
@@ -200,18 +226,35 @@ function relativeLevenshtein (
|
|
|
200
226
|
* @param {number} [wordKoef]
|
|
201
227
|
* @returns {number}
|
|
202
228
|
*/
|
|
203
|
-
function multiwordLevenshtein (left, right, seed, wordKoef = undefined) {
|
|
204
|
-
const leftSplit = `${left}`.split(
|
|
205
|
-
const rightSplit = `${right}`.split(
|
|
229
|
+
function multiwordLevenshtein (left, right, seed = SEED_DEFAULT, wordKoef = undefined) {
|
|
230
|
+
const leftSplit = `${left}`.split(STRING_SPLITTER);
|
|
231
|
+
const rightSplit = `${right}`.split(STRING_SPLITTER);
|
|
206
232
|
|
|
207
233
|
let sum = 0;
|
|
234
|
+
let sumNums = 0;
|
|
235
|
+
let cntNums = 0;
|
|
208
236
|
|
|
209
237
|
const max = Math.max(leftSplit.length, rightSplit.length, 1);
|
|
210
238
|
for (let i = 0; i < max; i++) {
|
|
211
|
-
|
|
239
|
+
const ls = leftSplit[i] || '';
|
|
240
|
+
const rs = rightSplit[i] || '';
|
|
241
|
+
const nums = numStats(ls, rs);
|
|
242
|
+
|
|
243
|
+
const l = relativeLevenshtein(ls, rs, seed, wordKoef, nums);
|
|
244
|
+
|
|
245
|
+
if (nums.numRelLen >= 0.25) {
|
|
246
|
+
cntNums++;
|
|
247
|
+
sumNums += l;
|
|
248
|
+
sum += Math.max(0.85, l);
|
|
249
|
+
} else {
|
|
250
|
+
sum += l;
|
|
251
|
+
}
|
|
212
252
|
}
|
|
213
253
|
|
|
214
|
-
|
|
254
|
+
const total = sum / max;
|
|
255
|
+
const numeric = cntNums ? (sumNums / cntNums) : 1;
|
|
256
|
+
|
|
257
|
+
return total * numeric;
|
|
215
258
|
}
|
|
216
259
|
|
|
217
260
|
module.exports = {
|