@easyops-cn/docusaurus-search-local 0.44.6 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ## [0.45.0](https://github.com/easyops-cn/docusaurus-search-local/compare/v0.44.6...v0.45.0) (2024-10-09)
6
+
7
+
8
+ ### Features
9
+
10
+ * fix a perf issue when searching with a large number of Chinese words ([abe720c](https://github.com/easyops-cn/docusaurus-search-local/commit/abe720cd3d521122e3aaa8b2d8ba7d0967fe046d)), closes [#312](https://github.com/easyops-cn/docusaurus-search-local/issues/312)
11
+
12
+
13
+ ### Bug Fixes
14
+
15
+ * fix a perf issue that cut zh words unnecessary repeatedly ([870dc88](https://github.com/easyops-cn/docusaurus-search-local/commit/870dc887f85ded4cbf000c2480e9e92c6fa0cad6))
16
+ * limit the number of terms, closes [#312](https://github.com/easyops-cn/docusaurus-search-local/issues/312) ([c889047](https://github.com/easyops-cn/docusaurus-search-local/commit/c8890479914e89f7dea075f5f191bbc76e99d249))
17
+
5
18
  ## [0.44.6](https://github.com/easyops-cn/docusaurus-search-local/compare/v0.44.5...v0.44.6) (2024-10-08)
6
19
 
7
20
 
@@ -1,4 +1,6 @@
1
1
  import { cutZhWords } from "./cutZhWords";
2
+ const MAX_TERMS = 12;
3
+ const HALF_MAX_TERMS = MAX_TERMS / 2;
2
4
  /**
3
5
  * Get all possible terms for a list of tokens consists of words mixed in Chinese and non-Chinese,
4
6
  * by a Chinese words dictionary.
@@ -9,27 +11,57 @@ import { cutZhWords } from "./cutZhWords";
9
11
  * @returns A smart term list.
10
12
  */
11
13
  export function smartTerms(tokens, zhDictionary) {
12
- const terms = [];
13
- function cutMixedWords(subTokens, carry) {
14
- if (subTokens.length === 0) {
15
- terms.push(carry);
16
- return;
17
- }
18
- const token = subTokens[0];
14
+ const tokenTerms = tokens
15
+ .map((token) => {
19
16
  if (/\p{Unified_Ideograph}/u.test(token)) {
20
- const terms = cutZhWords(token, zhDictionary);
21
- for (const term of terms) {
22
- const nextCarry = carry.concat(...term);
23
- cutMixedWords(subTokens.slice(1), nextCarry);
24
- }
17
+ return cutZhWords(token, zhDictionary);
25
18
  }
26
19
  else {
27
- const nextCarry = carry.concat({
28
- value: token,
29
- });
30
- cutMixedWords(subTokens.slice(1), nextCarry);
20
+ return [{ value: token }];
21
+ }
22
+ })
23
+ .slice(0, MAX_TERMS);
24
+ const tokenTermsThatAreMultiple = tokenTerms.filter((tokenTerm) => tokenTerm.length > 1);
25
+ let termsProduct = 1;
26
+ let overflowed = false;
27
+ for (const tokenTerm of tokenTermsThatAreMultiple) {
28
+ if (overflowed) {
29
+ tokenTerm.splice(1, tokenTerm.length - 1);
30
+ }
31
+ else {
32
+ if (tokenTerm.length > HALF_MAX_TERMS) {
33
+ tokenTerm.splice(HALF_MAX_TERMS, tokenTerm.length - HALF_MAX_TERMS);
34
+ }
35
+ const product = termsProduct * tokenTerm.length;
36
+ if (product >= MAX_TERMS) {
37
+ if (product > MAX_TERMS) {
38
+ const max = Math.floor(MAX_TERMS / termsProduct);
39
+ tokenTerm.splice(max, tokenTerm.length - max);
40
+ termsProduct = max * termsProduct;
41
+ }
42
+ else {
43
+ termsProduct = product;
44
+ }
45
+ if (termsProduct > HALF_MAX_TERMS) {
46
+ overflowed = true;
47
+ }
48
+ }
49
+ else {
50
+ termsProduct = product;
51
+ }
52
+ }
53
+ }
54
+ // Get all possible combinations of terms.
55
+ const terms = [];
56
+ function combine(index, carry) {
57
+ if (index === tokenTerms.length || carry.length >= MAX_TERMS) {
58
+ terms.push(carry.slice(0, MAX_TERMS));
59
+ return;
60
+ }
61
+ for (const term of tokenTerms[index]) {
62
+ combine(index + 1, carry.concat(term));
31
63
  }
32
64
  }
33
- cutMixedWords(tokens, []);
65
+ combine(0, []);
34
66
  return terms;
35
67
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@easyops-cn/docusaurus-search-local",
3
- "version": "0.44.6",
3
+ "version": "0.45.0",
4
4
  "description": "An offline/local search plugin for Docusaurus v3",
5
5
  "repository": {
6
6
  "type": "git",