@inseefr/lunatic 3.12.1 → 3.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/utils/search/SearchMiniSearch.spec.js +16 -1
- package/esm/utils/search/SearchMiniSearch.spec.js.map +1 -1
- package/esm/utils/search/SearchMinisearch.js +26 -1
- package/esm/utils/search/SearchMinisearch.js.map +1 -1
- package/esm/utils/search/tokenizer.js +1 -14
- package/esm/utils/search/tokenizer.js.map +1 -1
- package/esm/utils/search/utils.d.ts +7 -0
- package/esm/utils/search/utils.js +15 -0
- package/esm/utils/search/utils.js.map +1 -0
- package/package.json +9 -2
- package/src/utils/search/SearchMiniSearch.spec.ts +21 -1
- package/src/utils/search/SearchMinisearch.ts +34 -1
- package/src/utils/search/tokenizer.ts +1 -15
- package/src/utils/search/utils.ts +14 -0
- package/tsconfig.build.tsbuildinfo +1 -1
- package/utils/search/SearchMiniSearch.spec.js +15 -1
- package/utils/search/SearchMiniSearch.spec.js.map +1 -1
- package/utils/search/SearchMinisearch.js +26 -1
- package/utils/search/SearchMinisearch.js.map +1 -1
- package/utils/search/tokenizer.js +6 -19
- package/utils/search/tokenizer.js.map +1 -1
- package/utils/search/utils.d.ts +7 -0
- package/utils/search/utils.js +19 -0
- package/utils/search/utils.js.map +1 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeAll, afterEach } from 'vitest';
|
|
2
2
|
import { SearchMinisearch } from './SearchMinisearch';
|
|
3
|
+
import { applyMelauto } from './melauto';
|
|
3
4
|
vi.mock('minisearch', () => {
|
|
4
5
|
return {
|
|
5
6
|
default: vi.fn().mockImplementation(() => ({
|
|
@@ -20,7 +21,13 @@ describe('SearchMinisearch', () => {
|
|
|
20
21
|
beforeAll(() => {
|
|
21
22
|
searchInstance = new SearchMinisearch({
|
|
22
23
|
name: 'test-suggester',
|
|
23
|
-
fields: [
|
|
24
|
+
fields: [
|
|
25
|
+
{ name: 'id' },
|
|
26
|
+
{
|
|
27
|
+
name: 'label',
|
|
28
|
+
synonyms: { accueil: ['ACCEUIL', 'ACUEIL'] },
|
|
29
|
+
},
|
|
30
|
+
],
|
|
24
31
|
queryParser: {
|
|
25
32
|
type: 'tokenized',
|
|
26
33
|
params: { language: 'English', pattern: '\\w+', min: 1 },
|
|
@@ -47,5 +54,13 @@ describe('SearchMinisearch', () => {
|
|
|
47
54
|
await searchInstance.index(mockData);
|
|
48
55
|
expect((_a = searchInstance.db) === null || _a === void 0 ? void 0 : _a.addAll).not.toHaveBeenCalled();
|
|
49
56
|
});
|
|
57
|
+
it('should expand query synonyms before melauto sorting', async () => {
|
|
58
|
+
var _a;
|
|
59
|
+
await searchInstance.index(mockData);
|
|
60
|
+
((_a = searchInstance.db) === null || _a === void 0 ? void 0 : _a.search).mockReturnValue(mockData);
|
|
61
|
+
vi.mocked(applyMelauto).mockReturnValue(mockData);
|
|
62
|
+
await searchInstance.search('agent acceuil');
|
|
63
|
+
expect(applyMelauto).toHaveBeenCalledWith('agent acceuil accueil', mockData);
|
|
64
|
+
});
|
|
50
65
|
});
|
|
51
66
|
//# sourceMappingURL=SearchMiniSearch.spec.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"SearchMiniSearch.spec.js","sourceRoot":"","sources":["../../../src/utils/search/SearchMiniSearch.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACxE,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"SearchMiniSearch.spec.js","sourceRoot":"","sources":["../../../src/utils/search/SearchMiniSearch.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACxE,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC,EAAE,CAAC,IAAI,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,OAAO;QACN,OAAO,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,GAAG,EAAE,CAAC,CAAC;YAC1C,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE;YACf,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE;SACf,CAAC,CAAC;KACH,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,GAAG,EAAE,CAAC,CAAC;IAC3B,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;CACrB,CAAC,CAAC,CAAC;AAEJ,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IACjC,IAAI,cAAqC,CAAC;IAC1C,MAAM,QAAQ,GAAG;QAChB,EAAE,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,YAAY,EAAE;QAChC,EAAE,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,aAAa,EAAE;KACjC,CAAC;IAEF,SAAS,CAAC,GAAG,EAAE;QACd,cAAc,GAAG,IAAI,gBAAgB,CAAC;YACrC,IAAI,EAAE,gBAAgB;YACtB,MAAM,EAAE;gBACP,EAAE,IAAI,EAAE,IAAI,EAAE;gBACd;oBACC,IAAI,EAAE,OAAO;oBACb,QAAQ,EAAE,EAAE,OAAO,EAAE,CAAC,SAAS,EAAE,QAAQ,CAAC,EAAE;iBAC5C;aACD;YACD,WAAW,EAAE;gBACZ,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE;aACxD;YACD,GAAG,EAAE,EAAE;SACP,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACd,MAAM,cAAc,GAAG,cAAc,CAAC,EAAS,CAAC;QAChD,cAAc,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;;QAC3D,MAAM,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAErC,uDAAuD;QACvD,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QACzC,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE9C,mDAAmD;QACnD,MAAM,CAAC,MAAA,cAAc,CAAC,EAAE,0CAAE,MAAM,CAAC,CAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;;QACvD,cAAc,CAAC,OAAO,GAAG,IAAI,CAAC;QAC9B,MAAM,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAErC,MAAM,CAAC,MAAA,cAAc,CAAC,EAAE,0CAAE,MAAM,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;;QACpE,MAAM,cAAc,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACrC,CAAC,MAAA,cAAc,CAAC,EAAE,0CAAE,MAAc,CAAA,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;QAC7D,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,eAAe,CAAC,QAAe,CAAC,CAAC;QAEzD,MAAM,cAAc,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QAE7C,MAAM,CAAC,YAAY,CAAC,CAAC,oBAAoB,CACxC,uBAAuB,EACvB,QAAQ,CACR,CAAC;IACH,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC"}
|
|
@@ -1,6 +1,31 @@
|
|
|
1
1
|
import { applyMelauto } from './melauto';
|
|
2
2
|
import MiniSearch from 'minisearch';
|
|
3
3
|
import { tokenizer } from './tokenizer';
|
|
4
|
+
import { normalizeStr } from './utils';
|
|
5
|
+
function getMelautoQuery(query, info) {
|
|
6
|
+
const tokens = tokenizer(info)(query);
|
|
7
|
+
// existing query tokens (already tokenized/normalized by tokenizer).
|
|
8
|
+
const expandedTokens = new Set(tokens);
|
|
9
|
+
// add synonyms to keep melauto ranking.
|
|
10
|
+
for (const field of info.fields) {
|
|
11
|
+
if (!field.synonyms) {
|
|
12
|
+
continue;
|
|
13
|
+
}
|
|
14
|
+
for (const source in field.synonyms) {
|
|
15
|
+
const normalizedSource = normalizeStr(source);
|
|
16
|
+
const normalizedSynonyms = field.synonyms[source].map((synonym) => normalizeStr(synonym));
|
|
17
|
+
// source -> synonyms
|
|
18
|
+
if (expandedTokens.has(normalizedSource)) {
|
|
19
|
+
normalizedSynonyms.forEach((synonym) => expandedTokens.add(synonym));
|
|
20
|
+
}
|
|
21
|
+
// synonym -> source
|
|
22
|
+
if (normalizedSynonyms.some((synonym) => expandedTokens.has(synonym))) {
|
|
23
|
+
expandedTokens.add(normalizedSource);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return Array.from(expandedTokens).join(' ');
|
|
28
|
+
}
|
|
4
29
|
export class SearchMinisearch {
|
|
5
30
|
constructor(info) {
|
|
6
31
|
this.db = null;
|
|
@@ -31,7 +56,7 @@ export class SearchMinisearch {
|
|
|
31
56
|
prefix: (term) => term.length > 2,
|
|
32
57
|
});
|
|
33
58
|
// Apply melauto to classify results
|
|
34
|
-
data = applyMelauto(q, data);
|
|
59
|
+
data = applyMelauto(getMelautoQuery(q, this.info), data);
|
|
35
60
|
data = data.slice(0, this.info.max);
|
|
36
61
|
return data;
|
|
37
62
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"SearchMinisearch.js","sourceRoot":"","sources":["../../../src/utils/search/SearchMinisearch.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AACzC,OAAO,UAAU,MAAM,YAAY,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"SearchMinisearch.js","sourceRoot":"","sources":["../../../src/utils/search/SearchMinisearch.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AACzC,OAAO,UAAU,MAAM,YAAY,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAEvC,SAAS,eAAe,CAAC,KAAa,EAAE,IAAgB;IACvD,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC;IAEtC,qEAAqE;IACrE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IAEvC,wCAAwC;IACxC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QACjC,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;YACrB,SAAS;QACV,CAAC;QACD,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACrC,MAAM,gBAAgB,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;YAC9C,MAAM,kBAAkB,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CACjE,YAAY,CAAC,OAAO,CAAC,CACrB,CAAC;YAEF,qBAAqB;YACrB,IAAI,cAAc,CAAC,GAAG,CAAC,gBAAgB,CAAC,EAAE,CAAC;gBAC1C,kBAAkB,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YACtE,CAAC;YAED,oBAAoB;YACpB,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;gBACvE,cAAc,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;YACtC,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC7C,CAAC;AAED,MAAM,OAAO,gBAAgB;IAO5B,YAAY,IAAgB;QAJ5B,OAAE,GAAyB,IAAI,CAAC;QAEhC,YAAO,GAAG,KAAK,CAAC;QAGf,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IAClB,CAAC;IAED,SAAS;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAS;QACpB,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;QAC1B,CAAC;QACD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;QAC5D,IAAI,CAAC,EAAE,GAAG,IAAI,UAAU,CAAC;YACxB,MAAM,EAAE,UAAU;YAClB,WAAW,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU;YACxD,QAAQ,EAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;SAC9B,CAAC,CAAC;QACH,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,CAAS;QACrB,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACxC,OAAO,EAAE,CAAC;QACX,CAAC;QACD,IAAI,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE;YAC5B,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC;SACjC,CAAe,CAAC;QAEjB,oCAAoC;QACpC,IAAI,GAAG,YAAY,CAAC,eAAe,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,CAAC;QAEzD,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEpC,OAAO,IAAI,CAAC;IACb,CAAC;IAED,aAAa,CAAC,EAAO;QACpB,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;YACd,OAAO,EAAO,CAAC;QAChB,CAAC;QACD,OAAO,IAAI,CAAC,EAAE,CAAC,eAAe,CAAC,EAAE,CAAM,CAAC;IACzC,CAAC;CACD"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { normalizeStr } from './utils';
|
|
1
2
|
/**
|
|
2
3
|
* Generates a tokenize method.
|
|
3
4
|
* When used for tokenizing a search query instead of the indexing, the fieldName is undefined.
|
|
@@ -50,20 +51,6 @@ export const tokenizeIndex = (str, info, stopWords) => {
|
|
|
50
51
|
return ((_c = (_b = filterStopWords(normalizedStr, stopWords)
|
|
51
52
|
.match(wordRegex)) === null || _b === void 0 ? void 0 : _b.filter((w) => w.length >= minLength)) !== null && _c !== void 0 ? _c : []);
|
|
52
53
|
};
|
|
53
|
-
/**
|
|
54
|
-
* Normalize a string
|
|
55
|
-
* - Remove accent (é => e, à => a)
|
|
56
|
-
* - remove ligatures (æ => ae, , Æ => ae, œ => oe, Œ => oe)
|
|
57
|
-
* - Lowercase
|
|
58
|
-
*/
|
|
59
|
-
const normalizeStr = (str) => {
|
|
60
|
-
return str
|
|
61
|
-
.toLowerCase()
|
|
62
|
-
.replaceAll('œ', 'oe')
|
|
63
|
-
.replaceAll('æ', 'ae')
|
|
64
|
-
.normalize('NFD')
|
|
65
|
-
.replace(/[\u0300-\u036f]/g, '');
|
|
66
|
-
};
|
|
67
54
|
/**
|
|
68
55
|
* remove from a string all the words that are included in a stopwords list
|
|
69
56
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../../src/utils/search/tokenizer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../../src/utils/search/tokenizer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAEvC;;;GAGG;AACH,MAAM,CAAC,MAAM,SAAS,GACrB,CAAC,IAAgB,EAAE,EAAE,CAAC,CAAC,GAAW,EAAE,SAAkB,EAAE,EAAE;IACzD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC;IAC5D,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;IAEjC,OAAO,KAAK;QACX,CAAC,CAAC,aAAa,CAAC,GAAG,EAAE,KAAK,EAAE,SAAS,CAAC;QACtC,CAAC,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;AACzC,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,GAAW,EAAE,IAA+B,EAAE,EAAE;;IAC7E,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAC1B,OAAO,YAAY,CAAC,GAAG,CAAC;aACtB,KAAK,CAAC,YAAY,CAAC;aACnB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,MAAM,SAAS,GACd,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,MAAM;QACpD,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC;QACvC,CAAC,CAAC,OAAO,CAAC;IACZ,MAAM,SAAS,GAAG,MAAA,IAAI,CAAC,MAAM,CAAC,GAAG,mCAAI,CAAC,CAAC;IAEvC,OAAO,CACN,MAAA,MAAA,YAAY,CAAC,GAAG,CAAC;SACf,KAAK,CAAC,SAAS,CAAC,0CACf,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,SAAS,CAAC,mCAAI,EAAE,CAC7C,CAAC;AACH,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,CAC5B,GAAW,EACX,IAAkC,EAClC,SAAoB,EACnB,EAAE;;IACH,IAAI,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,SAAS,GACd,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,MAAM;QAClC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,KAAM,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC;QAClC,CAAC,CAAC,OAAO,CAAC;IACZ,MAAM,SAAS,GAAG,MAAA,IAAI,CAAC,GAAG,mCAAI,CAAC,CAAC;IAEhC,+CAA+C;IAC/C,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QACnB,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpC,MAAM,gBAAgB,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;YAC9C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;iBACpC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;iBACvC,IAAI,CAAC,GAAG,CAAC,CAAC;YAEZ,aAAa,GAAG,aAAa,CAAC,UAAU,CACvC,gBAAgB,EAChB,GAAG,gBAAgB,IAAI,QAAQ,EAAE,CACjC,CAAC;QACH,CAAC;IACF,CAAC;IAED,0CAA0C;IAC1C,OAAO,CACN,MAAA,MAAA,eAAe,CAAC,aAAa,EAAE,SAAS,CAAC;SACvC,KAAK,CAAC,SAAS,CAAC,0CACf,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,SAAS,CAAC,mCAAI,EAAE,CAC7C,CAAC;AACH,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,KAAa,EAAE,SAAoB;IAClE,IAAI,CAAC,SAAS,EAAE,CAAC;QAChB,OAAO,KAAK,CAAC;IACd,CAAC;IACD,MAAM,kBAAkB,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IACvE,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACjC,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CACjC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,kBAAkB,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAC1D,CAAC;IACF,OAAO,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAChC,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Normalize a string
|
|
3
|
+
* - Remove accent (é => e, à => a)
|
|
4
|
+
* - remove ligatures (æ => ae, , Æ => ae, œ => oe, Œ => oe)
|
|
5
|
+
* - Lowercase
|
|
6
|
+
*/
|
|
7
|
+
export const normalizeStr = (str) => {
|
|
8
|
+
return str
|
|
9
|
+
.toLowerCase()
|
|
10
|
+
.replaceAll('œ', 'oe')
|
|
11
|
+
.replaceAll('æ', 'ae')
|
|
12
|
+
.normalize('NFD')
|
|
13
|
+
.replace(/[\u0300-\u036f]/g, '');
|
|
14
|
+
};
|
|
15
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../src/utils/search/utils.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,GAAW,EAAE,EAAE;IAC3C,OAAO,GAAG;SACR,WAAW,EAAE;SACb,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC;SACrB,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC;SACrB,SAAS,CAAC,KAAK,CAAC;SAChB,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;AACnC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inseefr/lunatic",
|
|
3
|
-
"version": "3.12.
|
|
3
|
+
"version": "3.12.2",
|
|
4
4
|
"description": "Library of questionnaire components",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -462,6 +462,7 @@
|
|
|
462
462
|
"src/utils/search/melauto.ts",
|
|
463
463
|
"src/utils/search/tokenizer.spec.ts",
|
|
464
464
|
"src/utils/search/tokenizer.ts",
|
|
465
|
+
"src/utils/search/utils.ts",
|
|
465
466
|
"src/utils/variables.spec.ts",
|
|
466
467
|
"src/utils/variables.ts",
|
|
467
468
|
"src/utils/vtl.ts",
|
|
@@ -1667,6 +1668,9 @@
|
|
|
1667
1668
|
"esm/utils/search/tokenizer.spec.d.ts",
|
|
1668
1669
|
"esm/utils/search/tokenizer.spec.js",
|
|
1669
1670
|
"esm/utils/search/tokenizer.spec.js.map",
|
|
1671
|
+
"esm/utils/search/utils.d.ts",
|
|
1672
|
+
"esm/utils/search/utils.js",
|
|
1673
|
+
"esm/utils/search/utils.js.map",
|
|
1670
1674
|
"esm/utils/variables.d.ts",
|
|
1671
1675
|
"esm/utils/variables.js",
|
|
1672
1676
|
"esm/utils/variables.js.map",
|
|
@@ -2009,6 +2013,9 @@
|
|
|
2009
2013
|
"utils/search/tokenizer.spec.d.ts",
|
|
2010
2014
|
"utils/search/tokenizer.spec.js",
|
|
2011
2015
|
"utils/search/tokenizer.spec.js.map",
|
|
2016
|
+
"utils/search/utils.d.ts",
|
|
2017
|
+
"utils/search/utils.js",
|
|
2018
|
+
"utils/search/utils.js.map",
|
|
2012
2019
|
"utils/variables.d.ts",
|
|
2013
2020
|
"utils/variables.js",
|
|
2014
2021
|
"utils/variables.js.map",
|
|
@@ -2104,4 +2111,4 @@
|
|
|
2104
2111
|
"node": "20.16.0",
|
|
2105
2112
|
"pnpm": "9.15.0"
|
|
2106
2113
|
}
|
|
2107
|
-
}
|
|
2114
|
+
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeAll, afterEach } from 'vitest';
|
|
2
2
|
import { SearchMinisearch } from './SearchMinisearch';
|
|
3
|
+
import { applyMelauto } from './melauto';
|
|
3
4
|
|
|
4
5
|
vi.mock('minisearch', () => {
|
|
5
6
|
return {
|
|
@@ -24,7 +25,13 @@ describe('SearchMinisearch', () => {
|
|
|
24
25
|
beforeAll(() => {
|
|
25
26
|
searchInstance = new SearchMinisearch({
|
|
26
27
|
name: 'test-suggester',
|
|
27
|
-
fields: [
|
|
28
|
+
fields: [
|
|
29
|
+
{ name: 'id' },
|
|
30
|
+
{
|
|
31
|
+
name: 'label',
|
|
32
|
+
synonyms: { accueil: ['ACCEUIL', 'ACUEIL'] },
|
|
33
|
+
},
|
|
34
|
+
],
|
|
28
35
|
queryParser: {
|
|
29
36
|
type: 'tokenized',
|
|
30
37
|
params: { language: 'English', pattern: '\\w+', min: 1 },
|
|
@@ -55,4 +62,17 @@ describe('SearchMinisearch', () => {
|
|
|
55
62
|
|
|
56
63
|
expect(searchInstance.db?.addAll).not.toHaveBeenCalled();
|
|
57
64
|
});
|
|
65
|
+
|
|
66
|
+
it('should expand query synonyms before melauto sorting', async () => {
|
|
67
|
+
await searchInstance.index(mockData);
|
|
68
|
+
(searchInstance.db?.search as any).mockReturnValue(mockData);
|
|
69
|
+
vi.mocked(applyMelauto).mockReturnValue(mockData as any);
|
|
70
|
+
|
|
71
|
+
await searchInstance.search('agent acceuil');
|
|
72
|
+
|
|
73
|
+
expect(applyMelauto).toHaveBeenCalledWith(
|
|
74
|
+
'agent acceuil accueil',
|
|
75
|
+
mockData
|
|
76
|
+
);
|
|
77
|
+
});
|
|
58
78
|
});
|
|
@@ -6,6 +6,39 @@ import type {
|
|
|
6
6
|
import { applyMelauto } from './melauto';
|
|
7
7
|
import MiniSearch from 'minisearch';
|
|
8
8
|
import { tokenizer } from './tokenizer';
|
|
9
|
+
import { normalizeStr } from './utils';
|
|
10
|
+
|
|
11
|
+
function getMelautoQuery(query: string, info: SearchInfo) {
|
|
12
|
+
const tokens = tokenizer(info)(query);
|
|
13
|
+
|
|
14
|
+
// existing query tokens (already tokenized/normalized by tokenizer).
|
|
15
|
+
const expandedTokens = new Set(tokens);
|
|
16
|
+
|
|
17
|
+
// add synonyms to keep melauto ranking.
|
|
18
|
+
for (const field of info.fields) {
|
|
19
|
+
if (!field.synonyms) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
for (const source in field.synonyms) {
|
|
23
|
+
const normalizedSource = normalizeStr(source);
|
|
24
|
+
const normalizedSynonyms = field.synonyms[source].map((synonym) =>
|
|
25
|
+
normalizeStr(synonym)
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
// source -> synonyms
|
|
29
|
+
if (expandedTokens.has(normalizedSource)) {
|
|
30
|
+
normalizedSynonyms.forEach((synonym) => expandedTokens.add(synonym));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// synonym -> source
|
|
34
|
+
if (normalizedSynonyms.some((synonym) => expandedTokens.has(synonym))) {
|
|
35
|
+
expandedTokens.add(normalizedSource);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return Array.from(expandedTokens).join(' ');
|
|
41
|
+
}
|
|
9
42
|
|
|
10
43
|
export class SearchMinisearch<T extends IndexEntry>
|
|
11
44
|
implements SearchInterface<T>
|
|
@@ -45,7 +78,7 @@ export class SearchMinisearch<T extends IndexEntry>
|
|
|
45
78
|
}) as any as T[];
|
|
46
79
|
|
|
47
80
|
// Apply melauto to classify results
|
|
48
|
-
data = applyMelauto(q, data);
|
|
81
|
+
data = applyMelauto(getMelautoQuery(q, this.info), data);
|
|
49
82
|
|
|
50
83
|
data = data.slice(0, this.info.max);
|
|
51
84
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { SearchInfo } from './SearchInterface';
|
|
2
2
|
import type { ItemOf } from '../../type.utils';
|
|
3
|
+
import { normalizeStr } from './utils';
|
|
3
4
|
|
|
4
5
|
/**
|
|
5
6
|
* Generates a tokenize method.
|
|
@@ -76,21 +77,6 @@ export const tokenizeIndex = (
|
|
|
76
77
|
);
|
|
77
78
|
};
|
|
78
79
|
|
|
79
|
-
/**
|
|
80
|
-
* Normalize a string
|
|
81
|
-
* - Remove accent (é => e, à => a)
|
|
82
|
-
* - remove ligatures (æ => ae, , Æ => ae, œ => oe, Œ => oe)
|
|
83
|
-
* - Lowercase
|
|
84
|
-
*/
|
|
85
|
-
const normalizeStr = (str: string) => {
|
|
86
|
-
return str
|
|
87
|
-
.toLowerCase()
|
|
88
|
-
.replaceAll('œ', 'oe')
|
|
89
|
-
.replaceAll('æ', 'ae')
|
|
90
|
-
.normalize('NFD')
|
|
91
|
-
.replace(/[\u0300-\u036f]/g, '');
|
|
92
|
-
};
|
|
93
|
-
|
|
94
80
|
/**
|
|
95
81
|
* remove from a string all the words that are included in a stopwords list
|
|
96
82
|
*/
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Normalize a string
|
|
3
|
+
* - Remove accent (é => e, à => a)
|
|
4
|
+
* - remove ligatures (æ => ae, , Æ => ae, œ => oe, Œ => oe)
|
|
5
|
+
* - Lowercase
|
|
6
|
+
*/
|
|
7
|
+
export const normalizeStr = (str: string) => {
|
|
8
|
+
return str
|
|
9
|
+
.toLowerCase()
|
|
10
|
+
.replaceAll('œ', 'oe')
|
|
11
|
+
.replaceAll('æ', 'ae')
|
|
12
|
+
.normalize('NFD')
|
|
13
|
+
.replace(/[\u0300-\u036f]/g, '');
|
|
14
|
+
};
|