wingbot 3.67.19 → 3.67.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/AiMatching.js +12 -2
- package/src/fuzzy/factoryFuzzySearch.js +3 -3
- package/src/fuzzy/levenshtein.js +60 -17
package/package.json
CHANGED
package/src/AiMatching.js
CHANGED
|
@@ -650,6 +650,7 @@ class AiMatching {
|
|
|
650
650
|
|
|
651
651
|
const allOptional = wantedEntities.every((e) => e.optional
|
|
652
652
|
&& (!e.op || useEntities.every((n) => n.entity !== e.entity)));
|
|
653
|
+
|
|
653
654
|
if (entitiesScore <= 0 && !allOptional) {
|
|
654
655
|
return { score: 0, entities: [] };
|
|
655
656
|
}
|
|
@@ -698,6 +699,8 @@ class AiMatching {
|
|
|
698
699
|
let fromState = 0;
|
|
699
700
|
let metl = 0;
|
|
700
701
|
|
|
702
|
+
let optHandicap = 0;
|
|
703
|
+
|
|
701
704
|
for (const wanted of wantedEntities) {
|
|
702
705
|
const usedIndexes = occurences.has(wanted.entity)
|
|
703
706
|
? occurences.get(wanted.entity)
|
|
@@ -754,14 +757,21 @@ class AiMatching {
|
|
|
754
757
|
}
|
|
755
758
|
|
|
756
759
|
if (!matching) { // && optional && !entityExists
|
|
757
|
-
|
|
760
|
+
if (optHandicap < this.redundantEntityHandicap) {
|
|
761
|
+
handicap += this.redundantEntityHandicap;
|
|
762
|
+
} else {
|
|
763
|
+
handicap += this.optionalHandicap;
|
|
764
|
+
}
|
|
765
|
+
optHandicap += this.redundantEntityHandicap;
|
|
758
766
|
continue;
|
|
759
767
|
}
|
|
760
768
|
|
|
761
769
|
if (wanted.optional) {
|
|
762
|
-
|
|
770
|
+
const oph = wanted.op
|
|
763
771
|
? this.optionalEqualityHandicap
|
|
764
772
|
: this.optionalHandicap;
|
|
773
|
+
|
|
774
|
+
handicap += oph;
|
|
765
775
|
}
|
|
766
776
|
|
|
767
777
|
if (wanted.op === COMPARE.NOT_EQUAL) {
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
const { shortArrayIndex, splitToNgrams, cleanup } = require('./fuzzyUtils');
|
|
7
7
|
const {
|
|
8
|
-
|
|
8
|
+
multiwordLevenshtein, SEED_FUZZY, SEED_FUZZY_MULTIPLICATOR, WORD_HANDICAP_K_FUZZY
|
|
9
9
|
} = require('./levenshtein');
|
|
10
10
|
|
|
11
11
|
const LOWER_DUPLICATES = 0.9;
|
|
@@ -93,7 +93,7 @@ function searchFnFactory (indexMap, ngramCounts, entities, maxIdf, {
|
|
|
93
93
|
const [, entityIndex, cleanText] = ngramCounts[id];
|
|
94
94
|
const [entity, value] = entities[entityIndex];
|
|
95
95
|
const relIdf = (idf / cnt) / maxIdf;
|
|
96
|
-
let score =
|
|
96
|
+
let score = multiwordLevenshtein(
|
|
97
97
|
cleanText,
|
|
98
98
|
cleanQuery,
|
|
99
99
|
levenshteinSeed,
|
|
@@ -103,7 +103,7 @@ function searchFnFactory (indexMap, ngramCounts, entities, maxIdf, {
|
|
|
103
103
|
|
|
104
104
|
if (cleanQuery.match(/^[^\s]{1,3}\s+.{6,}$/)) {
|
|
105
105
|
const without = cleanQuery.replace(/^[^\s]{1,3}\s+/, '');
|
|
106
|
-
const altScore =
|
|
106
|
+
const altScore = multiwordLevenshtein(
|
|
107
107
|
cleanText,
|
|
108
108
|
without,
|
|
109
109
|
levenshteinSeed,
|
package/src/fuzzy/levenshtein.js
CHANGED
|
@@ -128,42 +128,68 @@ function addSeed (seed, len, value, base = seed) {
|
|
|
128
128
|
return base + (((len - value) / len) * (1 - seed));
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
+
function numStats (left, right) {
|
|
132
|
+
const len = Math.max(left.length, right.length);
|
|
133
|
+
const leftNum = left.replace(/[^0-9]+/g, '');
|
|
134
|
+
const rightNum = right.replace(/[^0-9]+/g, '');
|
|
135
|
+
const numLen = Math.max(leftNum.length, rightNum.length);
|
|
136
|
+
const numLev = numLen ? levenshtein(leftNum, rightNum) * NUMERIC_KOEF : 0;
|
|
137
|
+
const numRelLen = len ? numLen / len : 0;
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
len,
|
|
141
|
+
leftNum,
|
|
142
|
+
rightNum,
|
|
143
|
+
numLen,
|
|
144
|
+
numLev,
|
|
145
|
+
numRelLen
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const STRING_REPLACER = /[^\s-]+/g;
|
|
150
|
+
const STRING_SPLITTER = /\s+/g;
|
|
151
|
+
|
|
131
152
|
/**
|
|
132
153
|
*
|
|
133
154
|
* @param {string} left - training data
|
|
134
155
|
* @param {string} right - query
|
|
135
156
|
* @param {number} [seed]
|
|
136
157
|
* @param {number} [wordKoef]
|
|
158
|
+
* @param {object} [nums]
|
|
137
159
|
* @returns {number}
|
|
138
160
|
*/
|
|
139
161
|
function relativeLevenshtein (
|
|
140
162
|
left,
|
|
141
163
|
right,
|
|
142
164
|
seed = SEED_DEFAULT,
|
|
143
|
-
wordKoef = WORD_HANDICAP_K_DEFAULT
|
|
165
|
+
wordKoef = WORD_HANDICAP_K_DEFAULT,
|
|
166
|
+
nums = numStats(left, right)
|
|
144
167
|
) {
|
|
145
|
-
|
|
146
|
-
if (!len) {
|
|
168
|
+
if (nums.len === 0) {
|
|
147
169
|
return 0;
|
|
148
170
|
}
|
|
149
171
|
let stemLen = Math.min(left.length, right.length);
|
|
150
172
|
|
|
151
|
-
const leftWordCount = (left.match(
|
|
152
|
-
const rightWordCount = (right.match(
|
|
173
|
+
const leftWordCount = (left.match(STRING_REPLACER) || ['']).length;
|
|
174
|
+
const rightWordCount = (right.match(STRING_REPLACER) || ['']).length;
|
|
153
175
|
|
|
154
176
|
const wordDiff = Math.max(0, rightWordCount - leftWordCount);
|
|
155
177
|
const wordHandicap = (wordKoef ** wordDiff);
|
|
156
178
|
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
179
|
+
const l = levenshtein(left, right);
|
|
180
|
+
|
|
181
|
+
if (nums.numRelLen >= 0.5) {
|
|
182
|
+
const max = nums.len * 0.2; // 1/5 allowed error
|
|
183
|
+
const s = (l / max) * 0.25;
|
|
184
|
+
|
|
185
|
+
return Math.max(0, 1 - s) * wordHandicap;
|
|
186
|
+
}
|
|
161
187
|
|
|
162
188
|
if (stemLen < 3) {
|
|
163
|
-
return addSeed(seed, len + numLen,
|
|
189
|
+
return addSeed(seed, nums.len + nums.numLen, l + nums.numLev) * wordHandicap;
|
|
164
190
|
}
|
|
165
191
|
|
|
166
|
-
let diff = len - stemLen;
|
|
192
|
+
let diff = nums.len - stemLen;
|
|
167
193
|
|
|
168
194
|
if (diff <= 2) {
|
|
169
195
|
diff += 2;
|
|
@@ -184,7 +210,7 @@ function relativeLevenshtein (
|
|
|
184
210
|
diffWeight = (diff - 1) * SUFFIX_WEIGHT;
|
|
185
211
|
}
|
|
186
212
|
|
|
187
|
-
const vStem = addSeed(seed, stemLen + numLen, stemLev + numLev, seed - diffWeight);
|
|
213
|
+
const vStem = addSeed(seed, stemLen + nums.numLen, stemLev + nums.numLev, seed - diffWeight);
|
|
188
214
|
const vSuffix = addSeed(1 - diffWeight, diff, suffLev, 0);
|
|
189
215
|
|
|
190
216
|
const r = (vStem + vSuffix) * wordHandicap;
|
|
@@ -200,18 +226,35 @@ function relativeLevenshtein (
|
|
|
200
226
|
* @param {number} [wordKoef]
|
|
201
227
|
* @returns {number}
|
|
202
228
|
*/
|
|
203
|
-
function multiwordLevenshtein (left, right, seed, wordKoef = undefined) {
|
|
204
|
-
const leftSplit = `${left}`.split(
|
|
205
|
-
const rightSplit = `${right}`.split(
|
|
229
|
+
function multiwordLevenshtein (left, right, seed = SEED_DEFAULT, wordKoef = undefined) {
|
|
230
|
+
const leftSplit = `${left}`.split(STRING_SPLITTER);
|
|
231
|
+
const rightSplit = `${right}`.split(STRING_SPLITTER);
|
|
206
232
|
|
|
207
233
|
let sum = 0;
|
|
234
|
+
let sumNums = 0;
|
|
235
|
+
let cntNums = 0;
|
|
208
236
|
|
|
209
237
|
const max = Math.max(leftSplit.length, rightSplit.length, 1);
|
|
210
238
|
for (let i = 0; i < max; i++) {
|
|
211
|
-
|
|
239
|
+
const ls = leftSplit[i] || '';
|
|
240
|
+
const rs = rightSplit[i] || '';
|
|
241
|
+
const nums = numStats(ls, rs);
|
|
242
|
+
|
|
243
|
+
const l = relativeLevenshtein(ls, rs, seed, wordKoef, nums);
|
|
244
|
+
|
|
245
|
+
if (nums.numRelLen >= 0.25) {
|
|
246
|
+
cntNums++;
|
|
247
|
+
sumNums += l;
|
|
248
|
+
sum += Math.max(0.85, l);
|
|
249
|
+
} else {
|
|
250
|
+
sum += l;
|
|
251
|
+
}
|
|
212
252
|
}
|
|
213
253
|
|
|
214
|
-
|
|
254
|
+
const total = sum / max;
|
|
255
|
+
const numeric = cntNums ? (sumNums / cntNums) : 1;
|
|
256
|
+
|
|
257
|
+
return total * numeric;
|
|
215
258
|
}
|
|
216
259
|
|
|
217
260
|
module.exports = {
|