@inseefr/lunatic 3.12.0 → 3.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/utils/search/SearchMiniSearch.spec.js +16 -1
- package/esm/utils/search/SearchMiniSearch.spec.js.map +1 -1
- package/esm/utils/search/SearchMinisearch.js +26 -1
- package/esm/utils/search/SearchMinisearch.js.map +1 -1
- package/esm/utils/search/tokenizer.js +8 -17
- package/esm/utils/search/tokenizer.js.map +1 -1
- package/esm/utils/search/tokenizer.spec.js +5 -0
- package/esm/utils/search/tokenizer.spec.js.map +1 -1
- package/esm/utils/search/utils.d.ts +7 -0
- package/esm/utils/search/utils.js +15 -0
- package/esm/utils/search/utils.js.map +1 -0
- package/package.json +8 -1
- package/src/utils/search/SearchMiniSearch.spec.ts +21 -1
- package/src/utils/search/SearchMinisearch.ts +34 -1
- package/src/utils/search/tokenizer.spec.ts +7 -0
- package/src/utils/search/tokenizer.ts +12 -18
- package/src/utils/search/utils.ts +14 -0
- package/tsconfig.build.tsbuildinfo +1 -1
- package/utils/search/SearchMiniSearch.spec.js +15 -1
- package/utils/search/SearchMiniSearch.spec.js.map +1 -1
- package/utils/search/SearchMinisearch.js +26 -1
- package/utils/search/SearchMinisearch.js.map +1 -1
- package/utils/search/tokenizer.js +10 -19
- package/utils/search/tokenizer.js.map +1 -1
- package/utils/search/tokenizer.spec.js +5 -0
- package/utils/search/tokenizer.spec.js.map +1 -1
- package/utils/search/utils.d.ts +7 -0
- package/utils/search/utils.js +19 -0
- package/utils/search/utils.js.map +1 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeAll, afterEach } from 'vitest';
|
|
2
2
|
import { SearchMinisearch } from './SearchMinisearch';
|
|
3
|
+
import { applyMelauto } from './melauto';
|
|
3
4
|
vi.mock('minisearch', () => {
|
|
4
5
|
return {
|
|
5
6
|
default: vi.fn().mockImplementation(() => ({
|
|
@@ -20,7 +21,13 @@ describe('SearchMinisearch', () => {
|
|
|
20
21
|
beforeAll(() => {
|
|
21
22
|
searchInstance = new SearchMinisearch({
|
|
22
23
|
name: 'test-suggester',
|
|
23
|
-
fields: [
|
|
24
|
+
fields: [
|
|
25
|
+
{ name: 'id' },
|
|
26
|
+
{
|
|
27
|
+
name: 'label',
|
|
28
|
+
synonyms: { accueil: ['ACCEUIL', 'ACUEIL'] },
|
|
29
|
+
},
|
|
30
|
+
],
|
|
24
31
|
queryParser: {
|
|
25
32
|
type: 'tokenized',
|
|
26
33
|
params: { language: 'English', pattern: '\\w+', min: 1 },
|
|
@@ -47,5 +54,13 @@ describe('SearchMinisearch', () => {
|
|
|
47
54
|
await searchInstance.index(mockData);
|
|
48
55
|
expect((_a = searchInstance.db) === null || _a === void 0 ? void 0 : _a.addAll).not.toHaveBeenCalled();
|
|
49
56
|
});
|
|
57
|
+
it('should expand query synonyms before melauto sorting', async () => {
|
|
58
|
+
var _a;
|
|
59
|
+
await searchInstance.index(mockData);
|
|
60
|
+
((_a = searchInstance.db) === null || _a === void 0 ? void 0 : _a.search).mockReturnValue(mockData);
|
|
61
|
+
vi.mocked(applyMelauto).mockReturnValue(mockData);
|
|
62
|
+
await searchInstance.search('agent acceuil');
|
|
63
|
+
expect(applyMelauto).toHaveBeenCalledWith('agent acceuil accueil', mockData);
|
|
64
|
+
});
|
|
50
65
|
});
|
|
51
66
|
//# sourceMappingURL=SearchMiniSearch.spec.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"SearchMiniSearch.spec.js","sourceRoot":"","sources":["../../../src/utils/search/SearchMiniSearch.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACxE,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"SearchMiniSearch.spec.js","sourceRoot":"","sources":["../../../src/utils/search/SearchMiniSearch.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACxE,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC,EAAE,CAAC,IAAI,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,OAAO;QACN,OAAO,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,GAAG,EAAE,CAAC,CAAC;YAC1C,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE;YACf,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE;SACf,CAAC,CAAC;KACH,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,GAAG,EAAE,CAAC,CAAC;IAC3B,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;CACrB,CAAC,CAAC,CAAC;AAEJ,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IACjC,IAAI,cAAqC,CAAC;IAC1C,MAAM,QAAQ,GAAG;QAChB,EAAE,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,YAAY,EAAE;QAChC,EAAE,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,aAAa,EAAE;KACjC,CAAC;IAEF,SAAS,CAAC,GAAG,EAAE;QACd,cAAc,GAAG,IAAI,gBAAgB,CAAC;YACrC,IAAI,EAAE,gBAAgB;YACtB,MAAM,EAAE;gBACP,EAAE,IAAI,EAAE,IAAI,EAAE;gBACd;oBACC,IAAI,EAAE,OAAO;oBACb,QAAQ,EAAE,EAAE,OAAO,EAAE,CAAC,SAAS,EAAE,QAAQ,CAAC,EAAE;iBAC5C;aACD;YACD,WAAW,EAAE;gBACZ,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE;aACxD;YACD,GAAG,EAAE,EAAE;SACP,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACd,MAAM,cAAc,GAAG,cAAc,CAAC,EAAS,CAAC;QAChD,cAAc,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;;QAC3D,MAAM,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAErC,uDAAuD;QACvD,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QACzC,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE9C,mDAAmD;QACnD,MAAM,CAAC,MAAA,cAAc,CAAC,EAAE,0CAAE,MAAM,CAAC,CAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;;QACvD,cAAc,CAAC,OAAO,GAAG,IAAI,CAAC;QAC9B,MAAM,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAErC,MAAM,CAAC,MAAA,cAAc,CAAC,EAAE,0CAAE,MAAM,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;;QACpE,MAAM,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACrC,CAAC,MAAA,cAAc,CAAC,EAAE,0CAAE,MAAc,CAAA,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;QAC7D,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,eAAe,CAAC,QAAe,CAAC,CAAC;QAEzD,MAAM,cAAc,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QAE7C,MAAM,CAAC,YAAY,CAAC,CAAC,oBAAoB,CACxC,uBAAuB,EACvB,QAAQ,CACR,CAAC;IACH,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC"}
|
|
@@ -1,6 +1,31 @@
|
|
|
1
1
|
import { applyMelauto } from './melauto';
|
|
2
2
|
import MiniSearch from 'minisearch';
|
|
3
3
|
import { tokenizer } from './tokenizer';
|
|
4
|
+
import { normalizeStr } from './utils';
|
|
5
|
+
function getMelautoQuery(query, info) {
|
|
6
|
+
const tokens = tokenizer(info)(query);
|
|
7
|
+
// existing query tokens (already tokenized/normalized by tokenizer).
|
|
8
|
+
const expandedTokens = new Set(tokens);
|
|
9
|
+
// add synonyms to keep melauto ranking.
|
|
10
|
+
for (const field of info.fields) {
|
|
11
|
+
if (!field.synonyms) {
|
|
12
|
+
continue;
|
|
13
|
+
}
|
|
14
|
+
for (const source in field.synonyms) {
|
|
15
|
+
const normalizedSource = normalizeStr(source);
|
|
16
|
+
const normalizedSynonyms = field.synonyms[source].map((synonym) => normalizeStr(synonym));
|
|
17
|
+
// source -> synonyms
|
|
18
|
+
if (expandedTokens.has(normalizedSource)) {
|
|
19
|
+
normalizedSynonyms.forEach((synonym) => expandedTokens.add(synonym));
|
|
20
|
+
}
|
|
21
|
+
// synonym -> source
|
|
22
|
+
if (normalizedSynonyms.some((synonym) => expandedTokens.has(synonym))) {
|
|
23
|
+
expandedTokens.add(normalizedSource);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return Array.from(expandedTokens).join(' ');
|
|
28
|
+
}
|
|
4
29
|
export class SearchMinisearch {
|
|
5
30
|
constructor(info) {
|
|
6
31
|
this.db = null;
|
|
@@ -31,7 +56,7 @@ export class SearchMinisearch {
|
|
|
31
56
|
prefix: (term) => term.length > 2,
|
|
32
57
|
});
|
|
33
58
|
// Apply melauto to classify results
|
|
34
|
-
data = applyMelauto(q, data);
|
|
59
|
+
data = applyMelauto(getMelautoQuery(q, this.info), data);
|
|
35
60
|
data = data.slice(0, this.info.max);
|
|
36
61
|
return data;
|
|
37
62
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"SearchMinisearch.js","sourceRoot":"","sources":["../../../src/utils/search/SearchMinisearch.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AACzC,OAAO,UAAU,MAAM,YAAY,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"SearchMinisearch.js","sourceRoot":"","sources":["../../../src/utils/search/SearchMinisearch.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AACzC,OAAO,UAAU,MAAM,YAAY,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAEvC,SAAS,eAAe,CAAC,KAAa,EAAE,IAAgB;IACvD,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC;IAEtC,qEAAqE;IACrE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IAEvC,wCAAwC;IACxC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QACjC,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;YACrB,SAAS;QACV,CAAC;QACD,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACrC,MAAM,gBAAgB,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;YAC9C,MAAM,kBAAkB,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CACjE,YAAY,CAAC,OAAO,CAAC,CACrB,CAAC;YAEF,qBAAqB;YACrB,IAAI,cAAc,CAAC,GAAG,CAAC,gBAAgB,CAAC,EAAE,CAAC;gBAC1C,kBAAkB,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YACtE,CAAC;YAED,oBAAoB;YACpB,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;gBACvE,cAAc,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;YACtC,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC7C,CAAC;AAED,MAAM,OAAO,gBAAgB;IAO5B,YAAY,IAAgB;QAJ5B,OAAE,GAAyB,IAAI,CAAC;QAEhC,YAAO,GAAG,KAAK,CAAC;QAGf,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IAClB,CAAC;IAED,SAAS;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAS;QACpB,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;QAC1B,CAAC;QACD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;QAC5D,IAAI,CAAC,EAAE,GAAG,IAAI,UAAU,CAAC;YACxB,MAAM,EAAE,UAAU;YAClB,WAAW,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU;YACxD,QAAQ,EAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;SAC9B,CAAC,CAAC;QACH,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,CAAS;QACrB,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACxC,OAAO,EAAE,CAAC;QACX,CAAC;QACD,IAAI,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE;YAC5B,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC;SACjC,CAAe,CAAC;QAEjB,oCAAoC;QACpC,IAAI,GAAG,YAAY,CAAC,eAAe,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,CAAC;QAEzD,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEpC,OAAO,IAAI,CAAC;IACb,CAAC;IAED,aAAa,CAAC,EAAO;QACpB,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;YACd,OAAO,EAAO,CAAC;QAChB,CAAC;QACD,OAAO,IAAI,CAAC,EAAE,CAAC,eAAe,CAAC,EAAE,CAAM,CAAC;IACzC,CAAC;CACD"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { normalizeStr } from './utils';
|
|
1
2
|
/**
|
|
2
3
|
* Generates a tokenize method.
|
|
3
4
|
* When used for tokenizing a search query instead of the indexing, the fieldName is undefined.
|
|
@@ -31,6 +32,7 @@ export const tokenizeQuery = (str, info) => {
|
|
|
31
32
|
*/
|
|
32
33
|
export const tokenizeIndex = (str, info, stopWords) => {
|
|
33
34
|
var _a, _b, _c;
|
|
35
|
+
let normalizedStr = normalizeStr(str);
|
|
34
36
|
const wordRegex = info.rules && info.rules !== 'soft'
|
|
35
37
|
? new RegExp(info.rules[0], 'gi')
|
|
36
38
|
: /\w+/gi;
|
|
@@ -38,28 +40,17 @@ export const tokenizeIndex = (str, info, stopWords) => {
|
|
|
38
40
|
// For synonyms, add the synonyms to the string
|
|
39
41
|
if (info.synonyms) {
|
|
40
42
|
for (const source in info.synonyms) {
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
+
const normalizedSource = normalizeStr(source);
|
|
44
|
+
const synonyms = info.synonyms[source]
|
|
45
|
+
.map((synonym) => normalizeStr(synonym))
|
|
46
|
+
.join(' ');
|
|
47
|
+
normalizedStr = normalizedStr.replaceAll(normalizedSource, `${normalizedSource} ${synonyms}`);
|
|
43
48
|
}
|
|
44
49
|
}
|
|
45
50
|
// We remove the stopWords from the string
|
|
46
|
-
return ((_c = (_b = filterStopWords(
|
|
51
|
+
return ((_c = (_b = filterStopWords(normalizedStr, stopWords)
|
|
47
52
|
.match(wordRegex)) === null || _b === void 0 ? void 0 : _b.filter((w) => w.length >= minLength)) !== null && _c !== void 0 ? _c : []);
|
|
48
53
|
};
|
|
49
|
-
/**
|
|
50
|
-
* Normalize a string
|
|
51
|
-
* - Remove accent (é => e, à => a)
|
|
52
|
-
* - remove ligatures (æ => ae, , Æ => ae, œ => oe, Œ => oe)
|
|
53
|
-
* - Lowercase
|
|
54
|
-
*/
|
|
55
|
-
const normalizeStr = (str) => {
|
|
56
|
-
return str
|
|
57
|
-
.toLowerCase()
|
|
58
|
-
.replaceAll('œ', 'oe')
|
|
59
|
-
.replaceAll('æ', 'ae')
|
|
60
|
-
.normalize('NFD')
|
|
61
|
-
.replace(/[\u0300-\u036f]/g, '');
|
|
62
|
-
};
|
|
63
54
|
/**
|
|
64
55
|
* remove from a string all the words that are included in a stopwords list
|
|
65
56
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../../src/utils/search/tokenizer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../../src/utils/search/tokenizer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAEvC;;;GAGG;AACH,MAAM,CAAC,MAAM,SAAS,GACrB,CAAC,IAAgB,EAAE,EAAE,CAAC,CAAC,GAAW,EAAE,SAAkB,EAAE,EAAE;IACzD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC;IAC5D,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;IAEjC,OAAO,KAAK;QACX,CAAC,CAAC,aAAa,CAAC,GAAG,EAAE,KAAK,EAAE,SAAS,CAAC;QACtC,CAAC,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;AACzC,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,GAAW,EAAE,IAA+B,EAAE,EAAE;;IAC7E,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAC1B,OAAO,YAAY,CAAC,GAAG,CAAC;aACtB,KAAK,CAAC,YAAY,CAAC;aACnB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,MAAM,SAAS,GACd,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,MAAM;QACpD,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC;QACvC,CAAC,CAAC,OAAO,CAAC;IACZ,MAAM,SAAS,GAAG,MAAA,IAAI,CAAC,MAAM,CAAC,GAAG,mCAAI,CAAC,CAAC;IAEvC,OAAO,CACN,MAAA,MAAA,YAAY,CAAC,GAAG,CAAC;SACf,KAAK,CAAC,SAAS,CAAC,0CACf,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,SAAS,CAAC,mCAAI,EAAE,CAC7C,CAAC;AACH,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,CAC5B,GAAW,EACX,IAAkC,EAClC,SAAoB,EACnB,EAAE;;IACH,IAAI,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,SAAS,GACd,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,MAAM;QAClC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,KAAM,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC;QAClC,CAAC,CAAC,OAAO,CAAC;IACZ,MAAM,SAAS,GAAG,MAAA,IAAI,CAAC,GAAG,mCAAI,CAAC,CAAC;IAEhC,+CAA+C;IAC/C,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QACnB,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpC,MAAM,gBAAgB,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;YAC9C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;iBACpC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;iBACvC,IAAI,CAAC,GAAG,CAAC,CAAC;YAEZ,aAAa,GAAG,aAAa,CAAC,UAAU,CACvC,gBAAgB,EAChB,GAAG,gBAAgB,IAAI,QAAQ,EAAE,CACjC,CAAC;QACH,CAAC;IACF,CAAC;IAED,0CAA0C;IAC1C,OAAO,CACN,MAAA,MAAA,eAAe,CAAC,aAAa,EAAE,SAAS,CAAC;SACvC,KAAK,CAAC,SAAS,CAAC,0CACf,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,SAAS,CAAC,mCAAI,EAAE,CAC7C,CAAC;AACH,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,KAAa,EAAE,SAAoB;IAClE,IAAI,CAAC,SAAS,EAAE,CAAC;QAChB,OAAO,KAAK,CAAC;IACd,CAAC;IACD,MAAM,kBAAkB,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IACvE,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACjC,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CACjC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,kBAAkB,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAC1D,CAAC;IACF,OAAO,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAChC,CAAC"}
|
|
@@ -123,6 +123,11 @@ describe('tokenizeIndex', () => {
|
|
|
123
123
|
const result = tokenizeIndex('The car is fast', fieldInfo);
|
|
124
124
|
expect(result).toEqual(['the', 'car', 'vehicle', 'automobile', 'fast']);
|
|
125
125
|
});
|
|
126
|
+
it('should tokenize and apply synonyms regardless of case', () => {
|
|
127
|
+
const fieldInfo = mockSearchInfo.fields[0];
|
|
128
|
+
const result = tokenizeIndex('The Car is fast', fieldInfo);
|
|
129
|
+
expect(result).toEqual(['the', 'car', 'vehicle', 'automobile', 'fast']);
|
|
130
|
+
});
|
|
126
131
|
it('should normalize the input', () => {
|
|
127
132
|
const fieldInfo = mockSearchInfo.fields[0];
|
|
128
133
|
const result = tokenizeIndex('Élève Étudiant!', fieldInfo);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.spec.js","sourceRoot":"","sources":["../../../src/utils/search/tokenizer.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACN,SAAS,EACT,aAAa,EACb,aAAa,EACb,eAAe,GACf,MAAM,aAAa,CAAC;AAGrB,MAAM,cAAc,GAAe;IAClC,IAAI,EAAE,UAAU;IAChB,MAAM,EAAE;QACP;YACC,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,CAAC;YACN,KAAK,EAAE,CAAC,QAAQ,CAAC;YACjB,QAAQ,EAAE;gBACT,GAAG,EAAE,CAAC,SAAS,EAAE,YAAY,CAAC;aAC9B;SACD;KACD;IACD,WAAW,EAAE;QACZ,IAAI,EAAE,WAAW;QACjB,MAAM,EAAE;YACP,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,SAAS;YAClB,GAAG,EAAE,CAAC;SACN;KACD;CACD,CAAC;AAEF,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAChC,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC7D,MAAM,KAAK,GAAG,iBAAiB,CAAC;QAChC,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;QAClE,MAAM,KAAK,GAAG,2BAA2B,CAAC;QAC1C,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACrC,MAAM,KAAK,GAAG,iBAAiB,CAAC;QAChC,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oEAAoE,EAAE,GAAG,EAAE;QAC7E,MAAM,KAAK,GAAG,iBAAiB,CAAC;QAChC,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wEAAwE,EAAE,GAAG,EAAE;QACjF,MAAM,KAAK,GAAG,iBAAiB,CAAC;QAChC,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0DAA0D,EAAE,GAAG,EAAE;QACnE,MAAM,KAAK,GAAG,gBAAgB,CAAC;QAC/B,MAAM,SAAS,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC/D,MAAM,KAAK,GAAG,qBAAqB,CAAC;QACpC,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC3C,MAAM,KAAK,GAAG,EAAE,CAAC;QACjB,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC9C,MAAM,KAAK,GAAG,+BAA+B,CAAC;QAC9C,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC7C,MAAM,WAAW,GAAG,EAAE,IAAI,EAAE,MAAM,EAA+B,CAAC;QAElE,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,WAAW,CAAC,CAAC;QAC7D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACxD,MAAM,WAAW,GAAG;YACnB,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,EAAE;SACT,CAAC;QAE/B,MAAM,MAAM,GAAG,aAAa,CAAC,kBAAkB,EAAE,WAAW,CAAC,CAAC;QAC9D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACrC,MAAM,WAAW,GAAG;YACnB,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE;SACN,CAAC;QAE/B,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,WAAW,CAAC,CAAC;QAC7D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QAClD,MAAM,WAAW,GAAG,EAAE,IAAI,EAAE,MAAM,EAA+B,CAAC;QAElE,MAAM,MAAM,GAAG,aAAa,CAAC,6BAA6B,EAAE,WAAW,CAAC,CAAC;QACzE,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC9D,MAAM,WAAW,GAAG;YACnB,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,cAAc;SAC7C,CAAC;QAE/B,MAAM,MAAM,GAAG,aAAa,CAAC,kBAAkB,EAAE,WAAW,CAAC,CAAC;QAC9D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,kEAAkE,EAAE,GAAG,EAAE;QAC3E,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,aAAa,CAAC,oBAAoB,EAAE,SAAS,CAAC,CAAC;QAC9D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC7C,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACrC,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QAClD,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,aAAa,CAAC,6BAA6B,EAAE,SAAS,CAAC,CAAC;QACvE,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACtC,MAAM,SAAS,GAAG,EAAE,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;QAC1D,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,CAAC;QAEnD,MAAM,MAAM,GAAG,aAAa,CAC3B,+BAA+B,EAC/B,SAAS,EACT,SAAS,CACT,CAAC;QACF,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC9D,MAAM,SAAS,GAAG,EAAE,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,eAAe;QAEnF,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;IAC1B,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC5C,MAAM,QAAQ,GAAG,SAAS,CAAC,cAAc,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,QAAQ,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC;QACpD,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,GAAG,EAAE;QACrE,MAAM,QAAQ,GAAG,SAAS,CAAC,cAAc,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACrC,MAAM,QAAQ,GAAG,SAAS,CAAC,cAAc,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACtC,MAAM,QAAQ,GAAG,SAAS,CAAC,cAAc,CAAC,CAAC;QAE3C,MAAM,CAAC,QAAQ,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC1C,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"tokenizer.spec.js","sourceRoot":"","sources":["../../../src/utils/search/tokenizer.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACN,SAAS,EACT,aAAa,EACb,aAAa,EACb,eAAe,GACf,MAAM,aAAa,CAAC;AAGrB,MAAM,cAAc,GAAe;IAClC,IAAI,EAAE,UAAU;IAChB,MAAM,EAAE;QACP;YACC,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,CAAC;YACN,KAAK,EAAE,CAAC,QAAQ,CAAC;YACjB,QAAQ,EAAE;gBACT,GAAG,EAAE,CAAC,SAAS,EAAE,YAAY,CAAC;aAC9B;SACD;KACD;IACD,WAAW,EAAE;QACZ,IAAI,EAAE,WAAW;QACjB,MAAM,EAAE;YACP,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,SAAS;YAClB,GAAG,EAAE,CAAC;SACN;KACD;CACD,CAAC;AAEF,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAChC,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC7D,MAAM,KAAK,GAAG,iBAAiB,CAAC;QAChC,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;QAClE,MAAM,KAAK,GAAG,2BAA2B,CAAC;QAC1C,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACrC,MAAM,KAAK,GAAG,iBAAiB,CAAC;QAChC,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oEAAoE,EAAE,GAAG,EAAE;QAC7E,MAAM,KAAK,GAAG,iBAAiB,CAAC;QAChC,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wEAAwE,EAAE,GAAG,EAAE;QACjF,MAAM,KAAK,GAAG,iBAAiB,CAAC;QAChC,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0DAA0D,EAAE,GAAG,EAAE;QACnE,MAAM,KAAK,GAAG,gBAAgB,CAAC;QAC/B,MAAM,SAAS,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC/D,MAAM,KAAK,GAAG,qBAAqB,CAAC;QACpC,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC3C,MAAM,KAAK,GAAG,EAAE,CAAC;QACjB,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC9C,MAAM,KAAK,GAAG,+BAA+B,CAAC;QAC9C,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC7C,MAAM,WAAW,GAAG,EAAE,IAAI,EAAE,MAAM,EAA+B,CAAC;QAElE,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,WAAW,CAAC,CAAC;QAC7D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACxD,MAAM,WAAW,GAAG;YACnB,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,EAAE;SACT,CAAC;QAE/B,MAAM,MAAM,GAAG,aAAa,CAAC,kBAAkB,EAAE,WAAW,CAAC,CAAC;QAC9D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACrC,MAAM,WAAW,GAAG;YACnB,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE;SACN,CAAC;QAE/B,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,WAAW,CAAC,CAAC;QAC7D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QAClD,MAAM,WAAW,GAAG,EAAE,IAAI,EAAE,MAAM,EAA+B,CAAC;QAElE,MAAM,MAAM,GAAG,aAAa,CAAC,6BAA6B,EAAE,WAAW,CAAC,CAAC;QACzE,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC9D,MAAM,WAAW,GAAG;YACnB,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,cAAc;SAC7C,CAAC;QAE/B,MAAM,MAAM,GAAG,aAAa,CAAC,kBAAkB,EAAE,WAAW,CAAC,CAAC;QAC9D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,kEAAkE,EAAE,GAAG,EAAE;QAC3E,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,aAAa,CAAC,oBAAoB,EAAE,SAAS,CAAC,CAAC;QAC9D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC7C,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,GAAG,EAAE;QAChE,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACrC,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QAClD,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,aAAa,CAAC,6BAA6B,EAAE,SAAS,CAAC,CAAC;QACvE,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACtC,MAAM,SAAS,GAAG,EAAE,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;QAC1D,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,CAAC;QAEnD,MAAM,MAAM,GAAG,aAAa,CAC3B,+BAA+B,EAC/B,SAAS,EACT,SAAS,CACT,CAAC;QACF,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC9D,MAAM,SAAS,GAAG,EAAE,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,eAAe;QAEnF,MAAM,MAAM,GAAG,aAAa,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;IAC1B,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC5C,MAAM,QAAQ,GAAG,SAAS,CAAC,cAAc,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,QAAQ,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC;QACpD,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,GAAG,EAAE;QACrE,MAAM,QAAQ,GAAG,SAAS,CAAC,cAAc,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACrC,MAAM,QAAQ,GAAG,SAAS,CAAC,cAAc,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACtC,MAAM,QAAQ,GAAG,SAAS,CAAC,cAAc,CAAC,CAAC;QAE3C,MAAM,CAAC,QAAQ,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC1C,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Normalize a string
|
|
3
|
+
* - Remove accent (é => e, à => a)
|
|
4
|
+
* - remove ligatures (æ => ae, , Æ => ae, œ => oe, Œ => oe)
|
|
5
|
+
* - Lowercase
|
|
6
|
+
*/
|
|
7
|
+
export const normalizeStr = (str) => {
|
|
8
|
+
return str
|
|
9
|
+
.toLowerCase()
|
|
10
|
+
.replaceAll('œ', 'oe')
|
|
11
|
+
.replaceAll('æ', 'ae')
|
|
12
|
+
.normalize('NFD')
|
|
13
|
+
.replace(/[\u0300-\u036f]/g, '');
|
|
14
|
+
};
|
|
15
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../src/utils/search/utils.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,GAAW,EAAE,EAAE;IAC3C,OAAO,GAAG;SACR,WAAW,EAAE;SACb,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC;SACrB,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC;SACrB,SAAS,CAAC,KAAK,CAAC;SAChB,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;AACnC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inseefr/lunatic",
|
|
3
|
-
"version": "3.12.
|
|
3
|
+
"version": "3.12.2",
|
|
4
4
|
"description": "Library of questionnaire components",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -462,6 +462,7 @@
|
|
|
462
462
|
"src/utils/search/melauto.ts",
|
|
463
463
|
"src/utils/search/tokenizer.spec.ts",
|
|
464
464
|
"src/utils/search/tokenizer.ts",
|
|
465
|
+
"src/utils/search/utils.ts",
|
|
465
466
|
"src/utils/variables.spec.ts",
|
|
466
467
|
"src/utils/variables.ts",
|
|
467
468
|
"src/utils/vtl.ts",
|
|
@@ -1667,6 +1668,9 @@
|
|
|
1667
1668
|
"esm/utils/search/tokenizer.spec.d.ts",
|
|
1668
1669
|
"esm/utils/search/tokenizer.spec.js",
|
|
1669
1670
|
"esm/utils/search/tokenizer.spec.js.map",
|
|
1671
|
+
"esm/utils/search/utils.d.ts",
|
|
1672
|
+
"esm/utils/search/utils.js",
|
|
1673
|
+
"esm/utils/search/utils.js.map",
|
|
1670
1674
|
"esm/utils/variables.d.ts",
|
|
1671
1675
|
"esm/utils/variables.js",
|
|
1672
1676
|
"esm/utils/variables.js.map",
|
|
@@ -2009,6 +2013,9 @@
|
|
|
2009
2013
|
"utils/search/tokenizer.spec.d.ts",
|
|
2010
2014
|
"utils/search/tokenizer.spec.js",
|
|
2011
2015
|
"utils/search/tokenizer.spec.js.map",
|
|
2016
|
+
"utils/search/utils.d.ts",
|
|
2017
|
+
"utils/search/utils.js",
|
|
2018
|
+
"utils/search/utils.js.map",
|
|
2012
2019
|
"utils/variables.d.ts",
|
|
2013
2020
|
"utils/variables.js",
|
|
2014
2021
|
"utils/variables.js.map",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeAll, afterEach } from 'vitest';
|
|
2
2
|
import { SearchMinisearch } from './SearchMinisearch';
|
|
3
|
+
import { applyMelauto } from './melauto';
|
|
3
4
|
|
|
4
5
|
vi.mock('minisearch', () => {
|
|
5
6
|
return {
|
|
@@ -24,7 +25,13 @@ describe('SearchMinisearch', () => {
|
|
|
24
25
|
beforeAll(() => {
|
|
25
26
|
searchInstance = new SearchMinisearch({
|
|
26
27
|
name: 'test-suggester',
|
|
27
|
-
fields: [
|
|
28
|
+
fields: [
|
|
29
|
+
{ name: 'id' },
|
|
30
|
+
{
|
|
31
|
+
name: 'label',
|
|
32
|
+
synonyms: { accueil: ['ACCEUIL', 'ACUEIL'] },
|
|
33
|
+
},
|
|
34
|
+
],
|
|
28
35
|
queryParser: {
|
|
29
36
|
type: 'tokenized',
|
|
30
37
|
params: { language: 'English', pattern: '\\w+', min: 1 },
|
|
@@ -55,4 +62,17 @@ describe('SearchMinisearch', () => {
|
|
|
55
62
|
|
|
56
63
|
expect(searchInstance.db?.addAll).not.toHaveBeenCalled();
|
|
57
64
|
});
|
|
65
|
+
|
|
66
|
+
it('should expand query synonyms before melauto sorting', async () => {
|
|
67
|
+
await searchInstance.index(mockData);
|
|
68
|
+
(searchInstance.db?.search as any).mockReturnValue(mockData);
|
|
69
|
+
vi.mocked(applyMelauto).mockReturnValue(mockData as any);
|
|
70
|
+
|
|
71
|
+
await searchInstance.search('agent acceuil');
|
|
72
|
+
|
|
73
|
+
expect(applyMelauto).toHaveBeenCalledWith(
|
|
74
|
+
'agent acceuil accueil',
|
|
75
|
+
mockData
|
|
76
|
+
);
|
|
77
|
+
});
|
|
58
78
|
});
|
|
@@ -6,6 +6,39 @@ import type {
|
|
|
6
6
|
import { applyMelauto } from './melauto';
|
|
7
7
|
import MiniSearch from 'minisearch';
|
|
8
8
|
import { tokenizer } from './tokenizer';
|
|
9
|
+
import { normalizeStr } from './utils';
|
|
10
|
+
|
|
11
|
+
function getMelautoQuery(query: string, info: SearchInfo) {
|
|
12
|
+
const tokens = tokenizer(info)(query);
|
|
13
|
+
|
|
14
|
+
// existing query tokens (already tokenized/normalized by tokenizer).
|
|
15
|
+
const expandedTokens = new Set(tokens);
|
|
16
|
+
|
|
17
|
+
// add synonyms to keep melauto ranking.
|
|
18
|
+
for (const field of info.fields) {
|
|
19
|
+
if (!field.synonyms) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
for (const source in field.synonyms) {
|
|
23
|
+
const normalizedSource = normalizeStr(source);
|
|
24
|
+
const normalizedSynonyms = field.synonyms[source].map((synonym) =>
|
|
25
|
+
normalizeStr(synonym)
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
// source -> synonyms
|
|
29
|
+
if (expandedTokens.has(normalizedSource)) {
|
|
30
|
+
normalizedSynonyms.forEach((synonym) => expandedTokens.add(synonym));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// synonym -> source
|
|
34
|
+
if (normalizedSynonyms.some((synonym) => expandedTokens.has(synonym))) {
|
|
35
|
+
expandedTokens.add(normalizedSource);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return Array.from(expandedTokens).join(' ');
|
|
41
|
+
}
|
|
9
42
|
|
|
10
43
|
export class SearchMinisearch<T extends IndexEntry>
|
|
11
44
|
implements SearchInterface<T>
|
|
@@ -45,7 +78,7 @@ export class SearchMinisearch<T extends IndexEntry>
|
|
|
45
78
|
}) as any as T[];
|
|
46
79
|
|
|
47
80
|
// Apply melauto to classify results
|
|
48
|
-
data = applyMelauto(q, data);
|
|
81
|
+
data = applyMelauto(getMelautoQuery(q, this.info), data);
|
|
49
82
|
|
|
50
83
|
data = data.slice(0, this.info.max);
|
|
51
84
|
|
|
@@ -154,6 +154,13 @@ describe('tokenizeIndex', () => {
|
|
|
154
154
|
expect(result).toEqual(['the', 'car', 'vehicle', 'automobile', 'fast']);
|
|
155
155
|
});
|
|
156
156
|
|
|
157
|
+
it('should tokenize and apply synonyms regardless of case', () => {
|
|
158
|
+
const fieldInfo = mockSearchInfo.fields[0];
|
|
159
|
+
|
|
160
|
+
const result = tokenizeIndex('The Car is fast', fieldInfo);
|
|
161
|
+
expect(result).toEqual(['the', 'car', 'vehicle', 'automobile', 'fast']);
|
|
162
|
+
});
|
|
163
|
+
|
|
157
164
|
it('should normalize the input', () => {
|
|
158
165
|
const fieldInfo = mockSearchInfo.fields[0];
|
|
159
166
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { SearchInfo } from './SearchInterface';
|
|
2
2
|
import type { ItemOf } from '../../type.utils';
|
|
3
|
+
import { normalizeStr } from './utils';
|
|
3
4
|
|
|
4
5
|
/**
|
|
5
6
|
* Generates a tokenize method.
|
|
@@ -46,6 +47,7 @@ export const tokenizeIndex = (
|
|
|
46
47
|
info: ItemOf<SearchInfo['fields']>,
|
|
47
48
|
stopWords?: string[]
|
|
48
49
|
) => {
|
|
50
|
+
let normalizedStr = normalizeStr(str);
|
|
49
51
|
const wordRegex =
|
|
50
52
|
info.rules && info.rules !== 'soft'
|
|
51
53
|
? new RegExp(info.rules![0], 'gi')
|
|
@@ -55,34 +57,26 @@ export const tokenizeIndex = (
|
|
|
55
57
|
// For synonyms, add the synonyms to the string
|
|
56
58
|
if (info.synonyms) {
|
|
57
59
|
for (const source in info.synonyms) {
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
+
const normalizedSource = normalizeStr(source);
|
|
61
|
+
const synonyms = info.synonyms[source]
|
|
62
|
+
.map((synonym) => normalizeStr(synonym))
|
|
63
|
+
.join(' ');
|
|
64
|
+
|
|
65
|
+
normalizedStr = normalizedStr.replaceAll(
|
|
66
|
+
normalizedSource,
|
|
67
|
+
`${normalizedSource} ${synonyms}`
|
|
68
|
+
);
|
|
60
69
|
}
|
|
61
70
|
}
|
|
62
71
|
|
|
63
72
|
// We remove the stopWords from the string
|
|
64
73
|
return (
|
|
65
|
-
filterStopWords(
|
|
74
|
+
filterStopWords(normalizedStr, stopWords)
|
|
66
75
|
.match(wordRegex)
|
|
67
76
|
?.filter((w) => w.length >= minLength) ?? []
|
|
68
77
|
);
|
|
69
78
|
};
|
|
70
79
|
|
|
71
|
-
/**
|
|
72
|
-
* Normalize a string
|
|
73
|
-
* - Remove accent (é => e, à => a)
|
|
74
|
-
* - remove ligatures (æ => ae, , Æ => ae, œ => oe, Œ => oe)
|
|
75
|
-
* - Lowercase
|
|
76
|
-
*/
|
|
77
|
-
const normalizeStr = (str: string) => {
|
|
78
|
-
return str
|
|
79
|
-
.toLowerCase()
|
|
80
|
-
.replaceAll('œ', 'oe')
|
|
81
|
-
.replaceAll('æ', 'ae')
|
|
82
|
-
.normalize('NFD')
|
|
83
|
-
.replace(/[\u0300-\u036f]/g, '');
|
|
84
|
-
};
|
|
85
|
-
|
|
86
80
|
/**
|
|
87
81
|
* remove from a string all the words that are included in a stopwords list
|
|
88
82
|
*/
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Normalize a string
|
|
3
|
+
* - Remove accent (é => e, à => a)
|
|
4
|
+
* - remove ligatures (æ => ae, , Æ => ae, œ => oe, Œ => oe)
|
|
5
|
+
* - Lowercase
|
|
6
|
+
*/
|
|
7
|
+
export const normalizeStr = (str: string) => {
|
|
8
|
+
return str
|
|
9
|
+
.toLowerCase()
|
|
10
|
+
.replaceAll('œ', 'oe')
|
|
11
|
+
.replaceAll('æ', 'ae')
|
|
12
|
+
.normalize('NFD')
|
|
13
|
+
.replace(/[\u0300-\u036f]/g, '');
|
|
14
|
+
};
|