axyseo 2.1.8 → 2.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/build/helpers/getLanguageResearcher.js +71 -0
  2. package/build/helpers/getLanguageResearcher.js.map +1 -0
  3. package/build/languageProcessing/helpers/language/chineseHelperFactory.js +162 -0
  4. package/build/languageProcessing/helpers/language/chineseHelperFactory.js.map +1 -0
  5. package/build/languageProcessing/helpers/language/isChineseText.js +17 -0
  6. package/build/languageProcessing/helpers/language/isChineseText.js.map +1 -0
  7. package/build/languageProcessing/helpers/match/matchTextWithWord.js +1 -1
  8. package/build/languageProcessing/helpers/match/matchTextWithWord.js.map +1 -1
  9. package/build/languageProcessing/helpers/word/getWords.js +22 -14
  10. package/build/languageProcessing/helpers/word/getWords.js.map +1 -1
  11. package/build/languageProcessing/languages/zh/Researcher.js +41 -0
  12. package/build/languageProcessing/languages/zh/Researcher.js.map +1 -0
  13. package/build/languageProcessing/languages/zh/config/functionWords.js +40 -0
  14. package/build/languageProcessing/languages/zh/config/functionWords.js.map +1 -0
  15. package/build/languageProcessing/languages/zh/helpers/getSentences.js +42 -0
  16. package/build/languageProcessing/languages/zh/helpers/getSentences.js.map +1 -0
  17. package/build/languageProcessing/languages/zh/helpers/matchTextWithWord.js +35 -0
  18. package/build/languageProcessing/languages/zh/helpers/matchTextWithWord.js.map +1 -0
  19. package/build/languageProcessing/languages/zh/helpers/splitIntoTokensCustom.js +41 -0
  20. package/build/languageProcessing/languages/zh/helpers/splitIntoTokensCustom.js.map +1 -0
  21. package/build/languageProcessing/researches/findKeywordInFirstParagraph.js +23 -1
  22. package/build/languageProcessing/researches/findKeywordInFirstParagraph.js.map +1 -1
  23. package/build/languageProcessing/researches/getAnchorsWithKeyphrase.js +22 -17
  24. package/build/languageProcessing/researches/getAnchorsWithKeyphrase.js.map +1 -1
  25. package/build/languageProcessing/researches/getParagraphs.js +13 -4
  26. package/build/languageProcessing/researches/getParagraphs.js.map +1 -1
  27. package/build/languageProcessing/researches/keywordCount.js +29 -1
  28. package/build/languageProcessing/researches/keywordCount.js.map +1 -1
  29. package/build/languageProcessing/researches/keywordCountInUrl.js +150 -5
  30. package/build/languageProcessing/researches/keywordCountInUrl.js.map +1 -1
  31. package/build/languageProcessing/researches/metaDescriptionKeyword.js +16 -4
  32. package/build/languageProcessing/researches/metaDescriptionKeyword.js.map +1 -1
  33. package/build/scoring/assessments/seo/IntroductionKeywordAssessment.js +5 -1
  34. package/build/scoring/assessments/seo/IntroductionKeywordAssessment.js.map +1 -1
  35. package/build/scoring/assessments/seo/KeywordDensityAssessment.js.map +1 -1
  36. package/build/scoring/assessments/seo/UrlKeywordAssessment.js +5 -1
  37. package/build/scoring/assessments/seo/UrlKeywordAssessment.js.map +1 -1
  38. package/package.json +1 -1
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Custom tokenizer for Chinese language.
3
+ * Chinese doesn't use spaces between words, so we split by characters
4
+ * while preserving punctuation and spaces as separate tokens.
5
+ *
6
+ * @param {string} text The text to tokenize.
7
+ *
8
+ * @returns {string[]} An array of tokens.
9
+ */
10
+ export default function splitIntoTokensCustom(text) {
11
+ if (!text) {
12
+ return [];
13
+ }
14
+ const tokens = [];
15
+ const chineseCharRegex = /[\u4e00-\u9fff]/;
16
+ const whitespaceRegex = /\s/;
17
+ const punctuationRegex = /[.,;:!?()[\]{}'"]/;
18
+ for (let i = 0; i < text.length; i++) {
19
+ const char = text[i];
20
+ if (chineseCharRegex.test(char)) {
21
+ // Chinese character - add as individual token
22
+ tokens.push(char);
23
+ } else if (whitespaceRegex.test(char)) {
24
+ // Whitespace - skip
25
+ continue;
26
+ } else if (punctuationRegex.test(char)) {
27
+ // Punctuation - add as individual token
28
+ tokens.push(char);
29
+ } else {
30
+ // Other characters (like English letters, numbers) - group together
31
+ let word = char;
32
+ while (i + 1 < text.length && !chineseCharRegex.test(text[i + 1]) && !whitespaceRegex.test(text[i + 1]) && !punctuationRegex.test(text[i + 1])) {
33
+ i++;
34
+ word += text[i];
35
+ }
36
+ tokens.push(word);
37
+ }
38
+ }
39
+ return tokens.filter(token => token.trim() !== '');
40
+ }
41
+ //# sourceMappingURL=splitIntoTokensCustom.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"splitIntoTokensCustom.js","names":["splitIntoTokensCustom","text","tokens","chineseCharRegex","whitespaceRegex","punctuationRegex","i","length","char","test","push","word","filter","token","trim"],"sources":["../../../../../src/languageProcessing/languages/zh/helpers/splitIntoTokensCustom.js"],"sourcesContent":["/**\n * Custom tokenizer for Chinese language.\n * Chinese doesn't use spaces between words, so we split by characters\n * while preserving punctuation and spaces as separate tokens.\n *\n * @param {string} text The text to tokenize.\n *\n * @returns {string[]} An array of tokens.\n */\nexport default function splitIntoTokensCustom(text) {\n if (!text) {\n return [];\n }\n\n const tokens = [];\n const chineseCharRegex = /[\\u4e00-\\u9fff]/;\n const whitespaceRegex = /\\s/;\n const punctuationRegex = /[.,;:!?()[\\]{}'\"]/;\n\n for (let i = 0; i < text.length; i++) {\n const char = text[i];\n\n if (chineseCharRegex.test(char)) {\n // Chinese character - add as individual token\n tokens.push(char);\n } else if (whitespaceRegex.test(char)) {\n // Whitespace - skip\n continue;\n } else if (punctuationRegex.test(char)) {\n // Punctuation - add as individual token\n tokens.push(char);\n } else {\n // Other characters (like English letters, numbers) - group together\n let word = char;\n while (\n i + 1 < text.length &&\n !chineseCharRegex.test(text[i + 1]) &&\n !whitespaceRegex.test(text[i + 1]) &&\n !punctuationRegex.test(text[i + 1])\n ) {\n i++;\n word += text[i];\n }\n tokens.push(word);\n }\n }\n\n return tokens.filter(token => token.trim() !== '');\n}\n"],"mappings":"AAAA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,SAASA,qBAAqBA,CAACC,IAAI,EAAE;EAClD,IAAI,CAACA,IAAI,EAAE;IACT,OAAO,EAAE;EACX;EAEA,MAAMC,MAAM,GAAG,EAAE;EACjB,MAAMC,gBAAgB,GAAG,iBAAiB;EAC1C,MAAMC,eAAe,GAAG,IAAI;EAC5B,MAAMC,gBAAgB,GAAG,mBAAmB;EAE5C,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGL,IAAI,CAACM,MAAM,EAAED,CAAC,EAAE,EAAE;IACpC,MAAME,IAAI,GAAGP,IAAI,CAACK,CAAC,CAAC;IAEpB,IAAIH,gBAAgB,CAACM,IAAI,CAACD,IAAI,CAAC,EAAE;MAC/B;MACAN,MAAM,CAACQ,IAAI,CAACF,IAAI,CAAC;IACnB,CAAC,MAAM,IAAIJ,eAAe,CAACK,IAAI,CAACD,IAAI,CAAC,EAAE;MACrC;MACA;IACF,CAAC,MAAM,IAAIH,gBAAgB,CAACI,IAAI,CAACD,IAAI,CAAC,EAAE;MACtC;MACAN,MAAM,CAACQ,IAAI,CAACF,IAAI,CAAC;IACnB,CAAC,MAAM;MACL;MACA,IAAIG,IAAI,GAAGH,IAAI;MACf,OACEF,CAAC,GAAG,CAAC,GAAGL,IAAI,CAACM,MAAM,IACnB,CAACJ,gBAAgB,CAACM,IAAI,CAACR,IAAI,CAACK,CAAC,GAAG,CAAC,CAAC,CAAC,IACnC,CAACF,eAAe,CAACK,IAAI,CAACR,IAAI,CAACK,CAAC,GAAG,CAAC,CAAC,CAAC,IAClC,CAACD,gBAAgB,CAACI,IAAI,CAACR,IAAI,CAACK,CAAC,GAAG,CAAC,CAAC,CAAC,EACnC;QACAA,CAAC,EAAE;QACHK,IAAI,IAAIV,IAAI,CAACK,CAAC,CAAC;MACjB;MACAJ,MAAM,CAACQ,IAAI,CAACC,IAAI,CAAC;IACnB;EACF;EAEA,OAAOT,MAAM,CAACU,MAAM,CAACC,KAAK,IAAIA,KAAK,CAACC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;AACpD","ignoreList":[]}
@@ -3,6 +3,7 @@ import { inRange, isEmpty } from 'lodash';
3
3
  import { findTopicFormsInString } from "../helpers/match/findKeywordFormsInString.js";
4
4
  import { getParentNode } from "../helpers/sentence/getSentencesFromTree";
5
5
  import { createShortcodeTagsRegex } from "../helpers";
6
+ import isChineseText from "../helpers/language/isChineseText";
6
7
 
7
8
  /**
8
9
  * Checks if the introductory paragraph contains keyphrase or synonyms.
@@ -36,8 +37,29 @@ export default function (paper, researcher) {
36
37
  });
37
38
  const firstParagraph = paragraphs[0];
38
39
  const topicForms = researcher.getResearch('morphology');
39
- const matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');
40
+ let matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');
40
41
  const locale = paper.getLocale();
42
+
43
+ // Auto-detect Chinese and use Chinese helper if not already available
44
+ if (!matchWordCustomHelper && isChineseText(paper.getKeyword() + ' ' + (firstParagraph ? firstParagraph.innerText() : ''))) {
45
+ // Use Chinese word matching for Chinese text
46
+ matchWordCustomHelper = function (text, word) {
47
+ const textToSearch = typeof text === 'string' ? text : text.text || text;
48
+ const matches = [];
49
+ if (!textToSearch || !word) {
50
+ return matches;
51
+ }
52
+ const lowerText = textToSearch.toLowerCase();
53
+ const lowerWord = word.toLowerCase();
54
+ let startIndex = 0;
55
+ let index;
56
+ while ((index = lowerText.indexOf(lowerWord, startIndex)) !== -1) {
57
+ matches.push(word);
58
+ startIndex = index + lowerWord.length;
59
+ }
60
+ return matches;
61
+ };
62
+ }
41
63
  const startOffset = firstParagraph && firstParagraph.sourceCodeLocation.startOffset;
42
64
  const mappedBlocks = paper._attributes.wpBlocks;
43
65
  const filteredIntroductionBlock = mappedBlocks && mappedBlocks.filter(block => inRange(startOffset, block.startOffset, block.endOffset))[0];
@@ -1 +1 @@
1
- {"version":3,"file":"findKeywordInFirstParagraph.js","names":["inRange","isEmpty","findTopicFormsInString","getParentNode","createShortcodeTagsRegex","paper","researcher","paragraphs","getResearch","filter","paragraph","parentNode","isImplicit","name","childNodes","test","value","firstParagraph","topicForms","matchWordCustomHelper","getHelper","locale","getLocale","startOffset","sourceCodeLocation","mappedBlocks","_attributes","wpBlocks","filteredIntroductionBlock","block","endOffset","result","foundInOneSentence","foundInParagraph","keyphraseOrSynonym","introduction","parentBlock","sentences","map","sentence","text","useSynonyms","firstResultSentence","find","resultSentence","percentWordMatches","resultParagraph","innerText"],"sources":["../../../src/languageProcessing/researches/findKeywordInFirstParagraph.js"],"sourcesContent":["/** @module analyses/findKeywordInFirstParagraph */\nimport {inRange, isEmpty} from 'lodash';\n\nimport {findTopicFormsInString} from '../helpers/match/findKeywordFormsInString.js';\nimport {getParentNode} from '../helpers/sentence/getSentencesFromTree';\nimport {createShortcodeTagsRegex} from '../helpers';\n\n/**\n * Checks if the introductory paragraph contains keyphrase or synonyms.\n * First splits the first paragraph by sentences. Finds the first paragraph which contains sentences e.g., not an image).\n * (1) Tries to find all (content) words from the keyphrase or a synonym phrase within one sentence.\n * If found all words within one sentence, returns an object with foundInOneSentence = true and keyphraseOrSynonym = \"keyphrase\"\n * or \"synonym\".\n * If it did not find all words within one sentence, goes ahead with matching the keyphrase with the entire first paragraph.\n * (2) Tries to find all (content) words from the keyphrase or a synonym phrase within the paragraph.\n * If found all words within the paragraph, returns an object with foundInOneSentence = false, foundInParagraph = true,\n * and keyphraseOrSynonym = \"keyphrase\" or \"synonym\".\n * If found not all words within the paragraph of nothing at all, returns an object with foundInOneSentence = false,\n * foundInParagraph = false, and keyphraseOrSynonym = \"\".\n *\n * @param {Paper} paper The text to check for paragraphs.\n * @param {Researcher} researcher The researcher to use for analysis.\n *\n * @returns {Object} Whether the keyphrase words were found in one sentence, whether the keyphrase words were found in\n * the paragraph, whether a keyphrase or a synonym phrase was matched.\n */\nexport default function(paper, researcher) {\n let paragraphs = researcher.getResearch('getParagraphs');\n // Filter captions from non-Classic editors.\n paragraphs = paragraphs.filter(paragraph => {\n const parentNode = getParentNode(paper, paragraph);\n return !(paragraph.isImplicit && parentNode && parentNode.name === 'figcaption');\n });\n // Filter captions from Classic editor and from classic block inside Block editor.\n paragraphs = paragraphs.filter(paragraph => {\n return !(\n paragraph.childNodes &&\n paragraph.childNodes[0] &&\n createShortcodeTagsRegex(['caption']).test(paragraph.childNodes[0].value)\n );\n });\n const firstParagraph = paragraphs[0];\n\n const topicForms = researcher.getResearch('morphology');\n const matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');\n const locale = paper.getLocale();\n const startOffset = firstParagraph && firstParagraph.sourceCodeLocation.startOffset;\n\n const mappedBlocks = paper._attributes.wpBlocks;\n const filteredIntroductionBlock =\n mappedBlocks &&\n mappedBlocks.filter(block => inRange(startOffset, block.startOffset, block.endOffset))[0];\n const result = {\n foundInOneSentence: false,\n foundInParagraph: false,\n keyphraseOrSynonym: '',\n introduction: firstParagraph,\n parentBlock: filteredIntroductionBlock || null\n };\n\n if (isEmpty(firstParagraph)) {\n return result;\n }\n\n const sentences = firstParagraph.sentences.map(sentence => sentence.text);\n // Use both keyphrase and synonyms to match topic words in the first paragraph.\n const useSynonyms = true;\n\n if (!isEmpty(sentences)) {\n const firstResultSentence = sentences\n .map(sentence =>\n findTopicFormsInString(topicForms, sentence, useSynonyms, locale, matchWordCustomHelper)\n )\n .find(resultSentence => resultSentence.percentWordMatches === 100);\n\n if (firstResultSentence) {\n result.foundInOneSentence = true;\n result.foundInParagraph = true;\n result.keyphraseOrSynonym = firstResultSentence.keyphraseOrSynonym;\n return result;\n }\n\n const resultParagraph = findTopicFormsInString(\n topicForms,\n firstParagraph.innerText(),\n useSynonyms,\n locale,\n matchWordCustomHelper\n );\n if (resultParagraph.percentWordMatches === 100) {\n result.foundInParagraph = true;\n result.keyphraseOrSynonym = resultParagraph.keyphraseOrSynonym;\n return result;\n }\n }\n\n return result;\n}\n"],"mappings":"AAAA;AACA,SAAQA,OAAO,EAAEC,OAAO,QAAO,QAAQ;AAEvC,SAAQC,sBAAsB;AAC9B,SAAQC,aAAa;AACrB,SAAQC,wBAAwB;;AAEhC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASC,KAAK,EAAEC,UAAU,EAAE;EACzC,IAAIC,UAAU,GAAGD,UAAU,CAACE,WAAW,CAAC,eAAe,CAAC;EACxD;EACAD,UAAU,GAAGA,UAAU,CAACE,MAAM,CAACC,SAAS,IAAI;IAC1C,MAAMC,UAAU,GAAGR,aAAa,CAACE,KAAK,EAAEK,SAAS,CAAC;IAClD,OAAO,EAAEA,SAAS,CAACE,UAAU,IAAID,UAAU,IAAIA,UAAU,CAACE,IAAI,KAAK,YAAY,CAAC;EAClF,CAAC,CAAC;EACF;EACAN,UAAU,GAAGA,UAAU,CAACE,MAAM,CAACC,SAAS,IAAI;IAC1C,OAAO,EACLA,SAAS,CAACI,UAAU,IACpBJ,SAAS,CAACI,UAAU,CAAC,CAAC,CAAC,IACvBV,wBAAwB,CAAC,CAAC,SAAS,CAAC,CAAC,CAACW,IAAI,CAACL,SAAS,CAACI,UAAU,CAAC,CAAC,CAAC,CAACE,KAAK,CAAC,CAC1E;EACH,CAAC,CAAC;EACF,MAAMC,cAAc,GAAGV,UAAU,CAAC,CAAC,CAAC;EAEpC,MAAMW,UAAU,GAAGZ,UAAU,CAACE,WAAW,CAAC,YAAY,CAAC;EACvD,MAAMW,qBAAqB,GAAGb,UAAU,CAACc,SAAS,CAAC,uBAAuB,CAAC;EAC3E,MAAMC,MAAM,GAAGhB,KAAK,CAACiB,SAAS,CAAC,CAAC;EAChC,MAAMC,WAAW,GAAGN,cAAc,IAAIA,cAAc,CAACO,kBAAkB,CAACD,WAAW;EAEnF,MAAME,YAAY,GAAGpB,KAAK,CAACqB,WAAW,CAACC,QAAQ;EAC/C,MAAMC,yBAAyB,GAC7BH,YAAY,IACZA,YAAY,CAAChB,MAAM,CAACoB,KAAK,IAAI7B,OAAO,CAACuB,WAAW,EAAEM,KAAK,CAACN,WAAW,EAAEM,KAAK,CAACC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;EAC3F,MAAMC,MAAM,GAAG;IACbC,kBAAkB,EAAE,KAAK;IACzBC,gBAAgB,EAAE,KAAK;IACvBC,kBAAkB,EAAE,EAAE;IACtBC,YAAY,EAAElB,cAAc;IAC5BmB,WAAW,EAAER,yBAAyB,IAAI;EAC5C,CAAC;EAED,IAAI3B,OAAO,CAACgB,cAAc,CAAC,EAAE;IAC3B,OAAOc,MAAM;EACf;EAEA,MAAMM,SAAS,GAAGpB,cAAc,CAACoB,SAAS,CAACC,GAAG,CAACC,QAAQ,IAAIA,QAAQ,CAACC,IAAI,CAAC;EACzE;EACA,MAAMC,WAAW,GAAG,IAAI;EAExB,IAAI,CAACxC,OAAO,CAACoC,SAAS,CAAC,EAAE;IACvB,MAAMK,mBAAmB,GAAGL,SAAS,CAClCC,GAAG,CAACC,QAAQ,IACXrC,sBAAsB,CAACgB,UAAU,EAAEqB,QAAQ,EAAEE,WAAW,EAAEpB,MAAM,EAAEF,qBAAqB,CACzF,CAAC,CACAwB,IAAI,CAACC,cAAc,IAAIA,cAAc,CAACC,kBAAkB,KAAK,GAAG,CAAC;IAEpE,IAAIH,mBAAmB,EAAE;MACvBX,MAAM,CAACC,kBAAkB,GAAG,IAAI;MAChCD,MAAM,CAACE,gBAAgB,GAAG,IAAI;MAC9BF,MAAM,CAACG,kBAAkB,GAAGQ,mBAAmB,CAACR,kBAAkB;MAClE,OAAOH,MAAM;IACf;IAEA,MAAMe,eAAe,GAAG5C,sBAAsB,CAC5CgB,UAAU,EACVD,cAAc,CAAC8B,SAAS,CAAC,CAAC,EAC1BN,WAAW,EACXpB,MAAM,EACNF,qBACF,CAAC;IACD,IAAI2B,eAAe,CAACD,kBAAkB,KAAK,GAAG,EAAE;MAC9Cd,MAAM,CAACE,gBAAgB,GAAG,IAAI;MAC9BF,MAAM,CAACG,kBAAkB,GAAGY,eAAe,CAACZ,kBAAkB;MAC9D,OAAOH,MAAM;IACf;EACF;EAEA,OAAOA,MAAM;AACf","ignoreList":[]}
1
+ {"version":3,"file":"findKeywordInFirstParagraph.js","names":["inRange","isEmpty","findTopicFormsInString","getParentNode","createShortcodeTagsRegex","isChineseText","paper","researcher","paragraphs","getResearch","filter","paragraph","parentNode","isImplicit","name","childNodes","test","value","firstParagraph","topicForms","matchWordCustomHelper","getHelper","locale","getLocale","getKeyword","innerText","text","word","textToSearch","matches","lowerText","toLowerCase","lowerWord","startIndex","index","indexOf","push","length","startOffset","sourceCodeLocation","mappedBlocks","_attributes","wpBlocks","filteredIntroductionBlock","block","endOffset","result","foundInOneSentence","foundInParagraph","keyphraseOrSynonym","introduction","parentBlock","sentences","map","sentence","useSynonyms","firstResultSentence","find","resultSentence","percentWordMatches","resultParagraph"],"sources":["../../../src/languageProcessing/researches/findKeywordInFirstParagraph.js"],"sourcesContent":["/** @module analyses/findKeywordInFirstParagraph */\nimport {inRange, isEmpty} from 'lodash';\n\nimport {findTopicFormsInString} from '../helpers/match/findKeywordFormsInString.js';\nimport {getParentNode} from '../helpers/sentence/getSentencesFromTree';\nimport {createShortcodeTagsRegex} from '../helpers';\nimport isChineseText from '../helpers/language/isChineseText';\n\n/**\n * Checks if the introductory paragraph contains keyphrase or synonyms.\n * First splits the first paragraph by sentences. Finds the first paragraph which contains sentences e.g., not an image).\n * (1) Tries to find all (content) words from the keyphrase or a synonym phrase within one sentence.\n * If found all words within one sentence, returns an object with foundInOneSentence = true and keyphraseOrSynonym = \"keyphrase\"\n * or \"synonym\".\n * If it did not find all words within one sentence, goes ahead with matching the keyphrase with the entire first paragraph.\n * (2) Tries to find all (content) words from the keyphrase or a synonym phrase within the paragraph.\n * If found all words within the paragraph, returns an object with foundInOneSentence = false, foundInParagraph = true,\n * and keyphraseOrSynonym = \"keyphrase\" or \"synonym\".\n * If found not all words within the paragraph of nothing at all, returns an object with foundInOneSentence = false,\n * foundInParagraph = false, and keyphraseOrSynonym = \"\".\n *\n * @param {Paper} paper The text to check for paragraphs.\n * @param {Researcher} researcher The researcher to use for analysis.\n *\n * @returns {Object} Whether the keyphrase words were found in one sentence, whether the keyphrase words were found in\n * the paragraph, whether a keyphrase or a synonym phrase was matched.\n */\nexport default function(paper, researcher) {\n let paragraphs = researcher.getResearch('getParagraphs');\n // Filter captions from non-Classic editors.\n paragraphs = paragraphs.filter(paragraph => {\n const parentNode = getParentNode(paper, paragraph);\n return !(paragraph.isImplicit && parentNode && parentNode.name === 'figcaption');\n });\n // Filter captions from Classic editor and from classic block inside Block editor.\n paragraphs = paragraphs.filter(paragraph => {\n return !(\n paragraph.childNodes &&\n paragraph.childNodes[0] &&\n createShortcodeTagsRegex(['caption']).test(paragraph.childNodes[0].value)\n );\n });\n const firstParagraph = paragraphs[0];\n\n const topicForms = researcher.getResearch('morphology');\n let matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');\n const locale = paper.getLocale();\n\n // Auto-detect Chinese and use Chinese helper if not already available\n if (\n !matchWordCustomHelper &&\n isChineseText(paper.getKeyword() + ' ' + (firstParagraph ? firstParagraph.innerText() : ''))\n ) {\n // Use Chinese word matching for Chinese text\n matchWordCustomHelper = function(text, word) {\n const textToSearch = typeof text === 'string' ? text : text.text || text;\n const matches = [];\n\n if (!textToSearch || !word) {\n return matches;\n }\n\n const lowerText = textToSearch.toLowerCase();\n const lowerWord = word.toLowerCase();\n\n let startIndex = 0;\n let index;\n\n while ((index = lowerText.indexOf(lowerWord, startIndex)) !== -1) {\n matches.push(word);\n startIndex = index + lowerWord.length;\n }\n\n return matches;\n };\n }\n const startOffset = firstParagraph && firstParagraph.sourceCodeLocation.startOffset;\n\n const mappedBlocks = paper._attributes.wpBlocks;\n const filteredIntroductionBlock =\n mappedBlocks &&\n mappedBlocks.filter(block => inRange(startOffset, block.startOffset, block.endOffset))[0];\n const result = {\n foundInOneSentence: false,\n foundInParagraph: false,\n keyphraseOrSynonym: '',\n introduction: firstParagraph,\n parentBlock: filteredIntroductionBlock || null\n };\n\n if (isEmpty(firstParagraph)) {\n return result;\n }\n\n const sentences = firstParagraph.sentences.map(sentence => sentence.text);\n // Use both keyphrase and synonyms to match topic words in the first paragraph.\n const useSynonyms = true;\n\n if (!isEmpty(sentences)) {\n const firstResultSentence = sentences\n .map(sentence =>\n findTopicFormsInString(topicForms, sentence, useSynonyms, locale, matchWordCustomHelper)\n )\n .find(resultSentence => resultSentence.percentWordMatches === 100);\n\n if (firstResultSentence) {\n result.foundInOneSentence = true;\n result.foundInParagraph = true;\n result.keyphraseOrSynonym = firstResultSentence.keyphraseOrSynonym;\n return result;\n }\n\n const resultParagraph = findTopicFormsInString(\n topicForms,\n firstParagraph.innerText(),\n useSynonyms,\n locale,\n matchWordCustomHelper\n );\n if (resultParagraph.percentWordMatches === 100) {\n result.foundInParagraph = true;\n result.keyphraseOrSynonym = resultParagraph.keyphraseOrSynonym;\n return result;\n }\n }\n\n return result;\n}\n"],"mappings":"AAAA;AACA,SAAQA,OAAO,EAAEC,OAAO,QAAO,QAAQ;AAEvC,SAAQC,sBAAsB;AAC9B,SAAQC,aAAa;AACrB,SAAQC,wBAAwB;AAChC,OAAOC,aAAa;;AAEpB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASC,KAAK,EAAEC,UAAU,EAAE;EACzC,IAAIC,UAAU,GAAGD,UAAU,CAACE,WAAW,CAAC,eAAe,CAAC;EACxD;EACAD,UAAU,GAAGA,UAAU,CAACE,MAAM,CAACC,SAAS,IAAI;IAC1C,MAAMC,UAAU,GAAGT,aAAa,CAACG,KAAK,EAAEK,SAAS,CAAC;IAClD,OAAO,EAAEA,SAAS,CAACE,UAAU,IAAID,UAAU,IAAIA,UAAU,CAACE,IAAI,KAAK,YAAY,CAAC;EAClF,CAAC,CAAC;EACF;EACAN,UAAU,GAAGA,UAAU,CAACE,MAAM,CAACC,SAAS,IAAI;IAC1C,OAAO,EACLA,SAAS,CAACI,UAAU,IACpBJ,SAAS,CAACI,UAAU,CAAC,CAAC,CAAC,IACvBX,wBAAwB,CAAC,CAAC,SAAS,CAAC,CAAC,CAACY,IAAI,CAACL,SAAS,CAACI,UAAU,CAAC,CAAC,CAAC,CAACE,KAAK,CAAC,CAC1E;EACH,CAAC,CAAC;EACF,MAAMC,cAAc,GAAGV,UAAU,CAAC,CAAC,CAAC;EAEpC,MAAMW,UAAU,GAAGZ,UAAU,CAACE,WAAW,CAAC,YAAY,CAAC;EACvD,IAAIW,qBAAqB,GAAGb,UAAU,CAACc,SAAS,CAAC,uBAAuB,CAAC;EACzE,MAAMC,MAAM,GAAGhB,KAAK,CAACiB,SAAS,CAAC,CAAC;;EAEhC;EACA,IACE,CAACH,qBAAqB,IACtBf,aAAa,CAACC,KAAK,CAACkB,UAAU,CAAC,CAAC,GAAG,GAAG,IAAIN,cAAc,GAAGA,cAAc,CAACO,SAAS,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAC5F;IACA;IACAL,qBAAqB,GAAG,SAAAA,CAASM,IAAI,EAAEC,IAAI,EAAE;MAC3C,MAAMC,YAAY,GAAG,OAAOF,IAAI,KAAK,QAAQ,GAAGA,IAAI,GAAGA,IAAI,CAACA,IAAI,IAAIA,IAAI;MACxE,MAAMG,OAAO,GAAG,EAAE;MAElB,IAAI,CAACD,YAAY,IAAI,CAACD,IAAI,EAAE;QAC1B,OAAOE,OAAO;MAChB;MAEA,MAAMC,SAAS,GAAGF,YAAY,CAACG,WAAW,CAAC,CAAC;MAC5C,MAAMC,SAAS,GAAGL,IAAI,CAACI,WAAW,CAAC,CAAC;MAEpC,IAAIE,UAAU,GAAG,CAAC;MAClB,IAAIC,KAAK;MAET,OAAO,CAACA,KAAK,GAAGJ,SAAS,CAACK,OAAO,CAACH,SAAS,EAAEC,UAAU,CAAC,MAAM,CAAC,CAAC,EAAE;QAChEJ,OAAO,CAACO,IAAI,CAACT,IAAI,CAAC;QAClBM,UAAU,GAAGC,KAAK,GAAGF,SAAS,CAACK,MAAM;MACvC;MAEA,OAAOR,OAAO;IAChB,CAAC;EACH;EACA,MAAMS,WAAW,GAAGpB,cAAc,IAAIA,cAAc,CAACqB,kBAAkB,CAACD,WAAW;EAEnF,MAAME,YAAY,GAAGlC,KAAK,CAACmC,WAAW,CAACC,QAAQ;EAC/C,MAAMC,yBAAyB,GAC7BH,YAAY,IACZA,YAAY,CAAC9B,MAAM,CAACkC,KAAK,IAAI5C,OAAO,CAACsC,WAAW,EAAEM,KAAK,CAACN,WAAW,EAAEM,KAAK,CAACC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;EAC3F,MAAMC,MAAM,GAAG;IACbC,kBAAkB,EAAE,KAAK;IACzBC,gBAAgB,EAAE,KAAK;IACvBC,kBAAkB,EAAE,EAAE;IACtBC,YAAY,EAAEhC,cAAc;IAC5BiC,WAAW,EAAER,yBAAyB,IAAI;EAC5C,CAAC;EAED,IAAI1C,OAAO,CAACiB,cAAc,CAAC,EAAE;IAC3B,OAAO4B,MAAM;EACf;EAEA,MAAMM,SAAS,GAAGlC,cAAc,CAACkC,SAAS,CAACC,GAAG,CAACC,QAAQ,IAAIA,QAAQ,CAAC5B,IAAI,CAAC;EACzE;EACA,MAAM6B,WAAW,GAAG,IAAI;EAExB,IAAI,CAACtD,OAAO,CAACmD,SAAS,CAAC,EAAE;IACvB,MAAMI,mBAAmB,GAAGJ,SAAS,CAClCC,GAAG,CAACC,QAAQ,IACXpD,sBAAsB,CAACiB,UAAU,EAAEmC,QAAQ,EAAEC,WAAW,EAAEjC,MAAM,EAAEF,qBAAqB,CACzF,CAAC,CACAqC,IAAI,CAACC,cAAc,IAAIA,cAAc,CAACC,kBAAkB,KAAK,GAAG,CAAC;IAEpE,IAAIH,mBAAmB,EAAE;MACvBV,MAAM,CAACC,kBAAkB,GAAG,IAAI;MAChCD,MAAM,CAACE,gBAAgB,GAAG,IAAI;MAC9BF,MAAM,CAACG,kBAAkB,GAAGO,mBAAmB,CAACP,kBAAkB;MAClE,OAAOH,MAAM;IACf;IAEA,MAAMc,eAAe,GAAG1D,sBAAsB,CAC5CiB,UAAU,EACVD,cAAc,CAACO,SAAS,CAAC,CAAC,EAC1B8B,WAAW,EACXjC,MAAM,EACNF,qBACF,CAAC;IACD,IAAIwC,eAAe,CAACD,kBAAkB,KAAK,GAAG,EAAE;MAC9Cb,MAAM,CAACE,gBAAgB,GAAG,IAAI;MAC9BF,MAAM,CAACG,kBAAkB,GAAGW,eAAe,CAACX,kBAAkB;MAC9D,OAAOH,MAAM;IACf;EACF;EAEA,OAAOA,MAAM;AACf","ignoreList":[]}
@@ -1,4 +1,4 @@
1
- import { flatten, uniq } from "lodash";
1
+ import { flatten, uniq } from 'lodash';
2
2
  import filterWordsFromArray from "../helpers/word/filterWordsFromArray";
3
3
  import { findTopicFormsInString } from "../helpers/match/findKeywordFormsInString";
4
4
  import getWords from "../helpers/word/getWords";
@@ -7,6 +7,7 @@ import parseSynonyms from "../helpers/sanitize/parseSynonyms";
7
7
  import processExactMatchRequest from "../helpers/match/processExactMatchRequest";
8
8
  import urlHelper from "../helpers/url/url.js";
9
9
  import { WORD_BOUNDARY_WITH_HYPHEN, WORD_BOUNDARY_WITHOUT_HYPHEN } from "../../config/wordBoundariesWithoutPunctuation";
10
+ import { getMatchHelper } from "../helpers/language/chineseHelperFactory.js";
10
11
  let functionWords = [];
11
12
 
12
13
  /**
@@ -86,14 +87,14 @@ function getAnchorsWithSameTextAsTopic(anchors, topicForms, locale, customHelper
86
87
  const currentAnchorText = currentAnchor.innerText();
87
88
 
88
89
  /*
89
- * For keyphrase matching, we want to split words on hyphens and en-dashes, except for languages where hyphens shouldn't
90
- * be treated as word boundaries. Currently, the latter only applies to Indonesian, where hyphens are used to create plural forms of nouns,
91
- * such as "buku-buku" being a plural form of "buku". We want to treat forms like "buku-buku" as one word, so we shouldn't
92
- * split words on hyphens in Indonesian.
93
- * For languages where hyphens are treated as word boundaries we pass a custom word boundary regex string to the getWords helper
94
- * that includes whitespaces, hyphens (u002d), and en-dashes (u2013). Otherwise, we pass a word boundary regex that only includes
95
- * whitespaces and en-dashes.
96
- */
90
+ * For keyphrase matching, we want to split words on hyphens and en-dashes, except for languages where hyphens shouldn't
91
+ * be treated as word boundaries. Currently, the latter only applies to Indonesian, where hyphens are used to create plural forms of nouns,
92
+ * such as "buku-buku" being a plural form of "buku". We want to treat forms like "buku-buku" as one word, so we shouldn't
93
+ * split words on hyphens in Indonesian.
94
+ * For languages where hyphens are treated as word boundaries we pass a custom word boundary regex string to the getWords helper
95
+ * that includes whitespaces, hyphens (u002d), and en-dashes (u2013). Otherwise, we pass a word boundary regex that only includes
96
+ * whitespaces and en-dashes.
97
+ */
97
98
  let anchorWords;
98
99
  if (getWordsCustomHelper) {
99
100
  anchorWords = uniq(getWordsCustomHelper(currentAnchorText));
@@ -143,15 +144,15 @@ function getAnchorsWithSameTextAsTopic(anchors, topicForms, locale, customHelper
143
144
  * @returns {Object} The amount of anchor texts whose content words are the same as the keyphrase or synonyms' content words.
144
145
  */
145
146
  export default function (paper, researcher) {
146
- functionWords = researcher.getConfig("functionWords");
147
- const areHyphensWordBoundaries = researcher.getConfig("areHyphensWordBoundaries");
147
+ functionWords = researcher.getConfig('functionWords');
148
+ const areHyphensWordBoundaries = researcher.getConfig('areHyphensWordBoundaries');
148
149
  const result = {
149
150
  anchorsWithKeyphrase: [],
150
151
  anchorsWithKeyphraseCount: 0
151
152
  };
152
153
  // STEP 1.
153
154
  // If the paper's text is empty, return empty result.
154
- if (paper.getText() === "") {
155
+ if (paper.getText() === '') {
155
156
  return result;
156
157
  }
157
158
 
@@ -162,7 +163,7 @@ export default function (paper, researcher) {
162
163
  * This is a conscious decision where we won't assess the paper if the keyphrase is not set.
163
164
  * This includes a case where only the synonym is set but not the keyphrase.
164
165
  */
165
- if (keyphrase === "") {
166
+ if (keyphrase === '') {
166
167
  return result;
167
168
  }
168
169
  /*
@@ -173,7 +174,7 @@ export default function (paper, researcher) {
173
174
  originalTopics.push(keyphrase);
174
175
 
175
176
  // Retrieve the anchors.
176
- let anchors = paper.getTree().findAll(treeNode => treeNode.name === "a");
177
+ let anchors = paper.getTree().findAll(treeNode => treeNode.name === 'a');
177
178
  /*
178
179
  * We get the site's URL (e.g., https://yoast.com) or domain (e.g., yoast.com) from the paper.
179
180
  * In case of WordPress, the variable is a URL. In case of Shopify, it is a domain.
@@ -188,10 +189,14 @@ export default function (paper, researcher) {
188
189
  return result;
189
190
  }
190
191
  const locale = paper.getLocale();
191
- const topicForms = researcher.getResearch("morphology");
192
+ const topicForms = researcher.getResearch('morphology');
193
+
194
+ // Auto-detect Chinese and enhance helper if needed
195
+ const text = paper.getText();
196
+ const enhancedMatchHelper = getMatchHelper(text, keyphrase, researcher.getHelper('matchWordCustomHelper'));
192
197
  const customHelpers = {
193
- matchWordCustomHelper: researcher.getHelper("matchWordCustomHelper"),
194
- getWordsCustomHelper: researcher.getHelper("getWordsCustomHelper")
198
+ matchWordCustomHelper: enhancedMatchHelper,
199
+ getWordsCustomHelper: researcher.getHelper('getWordsCustomHelper')
195
200
  };
196
201
 
197
202
  // STEP 4.
@@ -1 +1 @@
1
- {"version":3,"file":"getAnchorsWithKeyphrase.js","names":["flatten","uniq","filterWordsFromArray","findTopicFormsInString","getWords","matchTextWithArray","parseSynonyms","processExactMatchRequest","urlHelper","WORD_BOUNDARY_WITH_HYPHEN","WORD_BOUNDARY_WITHOUT_HYPHEN","functionWords","isLinkingToSelf","anchorLink","siteUrlOrDomain","Boolean","areEqual","isRelativeFragmentURL","getAnchorsLinkingToSelf","anchors","anchorsLinkingToSelf","map","anchor","attributes","href","filter","index","getAnchorsContainingTopic","topicForms","locale","matchWordCustomHelper","anchorsContainingTopic","anchorText","innerText","percentWordMatches","getAnchorsWithSameTextAsTopic","customHelpers","exactMatchRequest","areHyphensWordBoundaries","getWordsCustomHelper","keyphraseAndSynonymsWords","keyphraseForms","synonymsForms","forEach","form","push","anchorsContainedInTopic","currentAnchor","currentAnchorText","anchorWords","filteredAnchorWords","length","request","exactMatchRequested","every","anchorWord","keyphrase","includes","i","topicForm","count","paper","researcher","getConfig","result","anchorsWithKeyphrase","anchorsWithKeyphraseCount","getText","getKeyword","originalTopics","getSynonyms","getTree","findAll","treeNode","name","getPermalink","getLocale","getResearch","getHelper","isExactMatchRequested","originalTopic"],"sources":["../../../src/languageProcessing/researches/getAnchorsWithKeyphrase.js"],"sourcesContent":["import { flatten, uniq } from \"lodash\";\nimport filterWordsFromArray from \"../helpers/word/filterWordsFromArray\";\nimport { findTopicFormsInString } from \"../helpers/match/findKeywordFormsInString\";\nimport getWords from \"../helpers/word/getWords\";\nimport matchTextWithArray from \"../helpers/match/matchTextWithArray\";\nimport parseSynonyms from \"../helpers/sanitize/parseSynonyms\";\nimport processExactMatchRequest from \"../helpers/match/processExactMatchRequest\";\nimport urlHelper from \"../helpers/url/url.js\";\nimport { WORD_BOUNDARY_WITH_HYPHEN, WORD_BOUNDARY_WITHOUT_HYPHEN } from \"../../config/wordBoundariesWithoutPunctuation\";\n\nlet functionWords = [];\n\n/**\n * Checks whether the anchor's link is a relative fragment or the same as the site url/domain.\n * Relative fragment links always point to the page itself.\n *\n * @param {String} anchorLink The link anchor.\n * @param {String} siteUrlOrDomain The site URL or domain of the paper.\n *\n * @returns {boolean} Whether the anchor's link is a relative fragment or the same as the site url/domain.\n */\nfunction isLinkingToSelf( anchorLink, siteUrlOrDomain ) {\n\treturn Boolean( urlHelper.areEqual( anchorLink, siteUrlOrDomain ) || urlHelper.isRelativeFragmentURL( anchorLink ) );\n}\n\n/**\n * Gets the anchors whose url is not linking at the current site url/domain.\n *\n * @param {Array} anchors An array with all anchors from the paper.\n * @param {String} siteUrlOrDomain The site URL or domain of the paper.\n *\n * @returns {Array} The array of all anchors whose url is not linking at the current site url/domain.\n */\nfunction getAnchorsLinkingToSelf( anchors, siteUrlOrDomain ) {\n\tconst anchorsLinkingToSelf = anchors.map( function( anchor ) {\n\t\tconst anchorLink = anchor.attributes.href;\n\t\t// Return false if there is no href attribute.\n\t\treturn anchorLink ? isLinkingToSelf( anchorLink, siteUrlOrDomain ) : false;\n\t} );\n\n\treturn anchors.filter( ( anchor, index ) => ! anchorsLinkingToSelf[ index ] );\n}\n\n/**\n * Gets the anchors with text that contains all content words of the topic (i.e. keyphrase or synonyms).\n *\n * @param {Array} anchors An array with all anchors from the paper\n * @param {Object} topicForms The object with topicForms. It contains all forms of the keyphrase and synonyms.\n * @param {String} locale The locale of the paper.\n * @param {Function} matchWordCustomHelper The helper function to match word in text.\n *\n * @returns {String[]} The array of all anchors with text that contains all content words of the keyphrase or synonyms.\n */\nfunction getAnchorsContainingTopic( anchors, topicForms, locale, matchWordCustomHelper ) {\n\tconst anchorsContainingTopic = anchors.map( function( anchor ) {\n\t\t// Only retrieve the anchor's text. This is because we only use the anchor text for the following check.\n\t\tconst anchorText = anchor.innerText();\n\t\treturn findTopicFormsInString( topicForms, anchorText, true, locale, matchWordCustomHelper ).percentWordMatches === 100;\n\t} );\n\n\treturn anchors.filter( ( anchor, index ) => anchorsContainingTopic[ index ] );\n}\n\n/**\n * Gets the anchors with text that has the same content words as the keyphrase or synonyms.\n *\n * @param {Array} anchors \t\tAn array with all anchors from the paper.\n * @param {Object} topicForms \t\tThe object with topicForms. It contains all forms of the keyphrase and synonyms.\n * @param {string} locale \t\tThe locale of the paper.\n * @param {Object} customHelpers \t\tAn object containing custom helpers.\n * @param {Object[]} exactMatchRequest \t\tAn array of objects containing the keyphrase and information on whether\n * \t\t\t\t\t\t\t\t\t\t\t\t\tthe exact match has been requested.\n * @param {boolean}\t \tareHyphensWordBoundaries\tWhether hyphens should be treated as word boundaries.\n *\n * @returns {Array} The array of all anchors with text that has the same content words as the keyphrase/synonyms.\n */\nfunction getAnchorsWithSameTextAsTopic( anchors, topicForms, locale, customHelpers, exactMatchRequest, areHyphensWordBoundaries ) {\n\tconst matchWordCustomHelper = customHelpers.matchWordCustomHelper;\n\tconst getWordsCustomHelper = customHelpers.getWordsCustomHelper;\n\n\t// Prepare keyphrase and synonym forms for comparison with anchors.\n\tconst keyphraseAndSynonymsWords = [ flatten( topicForms.keyphraseForms ) ];\n\tconst synonymsForms = topicForms.synonymsForms;\n\tsynonymsForms.forEach( form => keyphraseAndSynonymsWords.push( flatten( form ) ) );\n\n\t// The variable that will save all the anchors with text that has the same content words as the keyphrase/synonyms.\n\tconst anchorsContainedInTopic = [];\n\n\tanchors.forEach( function( currentAnchor ) {\n\t\tconst currentAnchorText = currentAnchor.innerText();\n\n\t\t/*\n * For keyphrase matching, we want to split words on hyphens and en-dashes, except for languages where hyphens shouldn't\n * be treated as word boundaries. Currently, the latter only applies to Indonesian, where hyphens are used to create plural forms of nouns,\n * such as \"buku-buku\" being a plural form of \"buku\". We want to treat forms like \"buku-buku\" as one word, so we shouldn't\n * split words on hyphens in Indonesian.\n * For languages where hyphens are treated as word boundaries we pass a custom word boundary regex string to the getWords helper\n * that includes whitespaces, hyphens (u002d), and en-dashes (u2013). Otherwise, we pass a word boundary regex that only includes\n * whitespaces and en-dashes.\n */\n\t\tlet anchorWords;\n\t\tif ( getWordsCustomHelper ) {\n\t\t\tanchorWords = uniq( getWordsCustomHelper( currentAnchorText ) );\n\t\t} else if ( areHyphensWordBoundaries ) {\n\t\t\tanchorWords = uniq( getWords( currentAnchorText, WORD_BOUNDARY_WITH_HYPHEN ) );\n\t\t} else {\n\t\t\tanchorWords = uniq( getWords( currentAnchorText, WORD_BOUNDARY_WITHOUT_HYPHEN ) );\n\t\t}\n\n\t\t/*\n\t\t * Filter function words out of the anchor text.\n\t\t * If the anchor text contains only function words, we keep them.\n\t\t */\n\t\tconst filteredAnchorWords = filterWordsFromArray( anchorWords, functionWords );\n\t\tif ( filteredAnchorWords.length > 0 ) {\n\t\t\tanchorWords = filteredAnchorWords;\n\t\t}\n\n\t\texactMatchRequest.forEach( request => {\n\t\t\t/*\n\t\t\t * Check a) if the exact match is requested for the keyphrase, and\n\t\t\t * b) if every content word in the anchor text is included in the keyphrase or synonym.\n\t\t\t */\n\t\t\tif ( request.exactMatchRequested &&\n\t\t\t\tanchorWords.every( anchorWord => request.keyphrase.includes( anchorWord ) ) ) {\n\t\t\t\tanchorsContainedInTopic.push( true );\n\t\t\t}\n\t\t} );\n\n\t\t// Check if every word in the anchor text is also present in the keyphrase/synonym.\n\t\tfor ( let i = 0; i < keyphraseAndSynonymsWords.length; i++ ) {\n\t\t\tconst topicForm = keyphraseAndSynonymsWords[ i ];\n\n\t\t\tif ( anchorWords.every( anchorWord => matchTextWithArray( anchorWord, topicForm, locale, matchWordCustomHelper ).count > 0 ) ) {\n\t\t\t\tanchorsContainedInTopic.push( true );\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t} );\n\n\treturn anchors.filter( ( anchor, index ) => anchorsContainedInTopic[ index ] );\n}\n\n/**\n * Checks whether the content words of the anchor text are the same as the content words of the keyphrase or synonym.\n * Also includes different word forms if the morphology is available.\n *\n * @param {Paper} paper The paper to research.\n * @param {Researcher} researcher The researcher to use.\n *\n * @returns {Object} The amount of anchor texts whose content words are the same as the keyphrase or synonyms' content words.\n */\nexport default function( paper, researcher ) {\n\tfunctionWords = researcher.getConfig( \"functionWords\" );\n\n\tconst areHyphensWordBoundaries = researcher.getConfig( \"areHyphensWordBoundaries\" );\n\n\tconst result = {\n\t\tanchorsWithKeyphrase: [],\n\t\tanchorsWithKeyphraseCount: 0,\n\t};\n\t// STEP 1.\n\t// If the paper's text is empty, return empty result.\n\tif ( paper.getText() === \"\" ) {\n\t\treturn result;\n\t}\n\n\t// STEP 2.\n\tconst keyphrase = paper.getKeyword();\n\t/*\n\t * If no keyphrase is set, return empty result.\n\t * This is a conscious decision where we won't assess the paper if the keyphrase is not set.\n\t * This includes a case where only the synonym is set but not the keyphrase.\n\t */\n\tif ( keyphrase === \"\" ) {\n\t\treturn result;\n\t}\n\t/*\n\t * When the keyphrase is set, also retrieve the synonyms and save them in \"topics\" array.\n\t * Eventually, the term topics here refers to either keyphrase or synonyms.\n\t */\n\tconst originalTopics = parseSynonyms( paper.getSynonyms() );\n\toriginalTopics.push( keyphrase );\n\n\t// Retrieve the anchors.\n\tlet anchors = paper.getTree().findAll( treeNode => treeNode.name === \"a\" );\n\t/*\n\t * We get the site's URL (e.g., https://yoast.com) or domain (e.g., yoast.com) from the paper.\n\t * In case of WordPress, the variable is a URL. In case of Shopify, it is a domain.\n\t */\n\tconst siteUrlOrDomain = paper.getPermalink();\n\n\t// STEP 3.\n\t// Get the anchors with urls that are not linking to the current site url/domain.\n\tanchors = getAnchorsLinkingToSelf( anchors, siteUrlOrDomain );\n\t// If all anchor urls are linking to the current site url/domain, return empty result.\n\tif ( anchors.length === 0 ) {\n\t\treturn result;\n\t}\n\n\tconst locale = paper.getLocale();\n\tconst topicForms = researcher.getResearch( \"morphology\" );\n\tconst customHelpers = {\n\t\tmatchWordCustomHelper: researcher.getHelper( \"matchWordCustomHelper\" ),\n\t\tgetWordsCustomHelper: researcher.getHelper( \"getWordsCustomHelper\" ),\n\t};\n\n\t// STEP 4.\n\t// Get the anchors with text that contains the keyphrase/synonyms' content words.\n\tanchors = getAnchorsContainingTopic( anchors, topicForms, locale, customHelpers.matchWordCustomHelper );\n\t// If all anchor texts do not contain the keyphrase/synonyms' content words, return empty result.\n\tif ( anchors.length === 0 ) {\n\t\treturn result;\n\t}\n\n\t// STEP 5.\n\t// Check if exact match is requested for every topic (keyphrase or synonym).\n\tconst isExactMatchRequested = originalTopics.map( originalTopic => processExactMatchRequest( originalTopic ) );\n\t// Get the anchors with text that has the same content words as the keyphrase/synonyms.\n\tanchors = getAnchorsWithSameTextAsTopic( anchors, topicForms, locale, customHelpers, isExactMatchRequested, areHyphensWordBoundaries );\n\n\treturn {\n\t\tanchorsWithKeyphrase: anchors,\n\t\tanchorsWithKeyphraseCount: anchors.length,\n\t};\n}\n\n"],"mappings":"AAAA,SAASA,OAAO,EAAEC,IAAI,QAAQ,QAAQ;AACtC,OAAOC,oBAAoB;AAC3B,SAASC,sBAAsB;AAC/B,OAAOC,QAAQ;AACf,OAAOC,kBAAkB;AACzB,OAAOC,aAAa;AACpB,OAAOC,wBAAwB;AAC/B,OAAOC,SAAS;AAChB,SAASC,yBAAyB,EAAEC,4BAA4B;AAEhE,IAAIC,aAAa,GAAG,EAAE;;AAEtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASC,eAAeA,CAAEC,UAAU,EAAEC,eAAe,EAAG;EACvD,OAAOC,OAAO,CAAEP,SAAS,CAACQ,QAAQ,CAAEH,UAAU,EAAEC,eAAgB,CAAC,IAAIN,SAAS,CAACS,qBAAqB,CAAEJ,UAAW,CAAE,CAAC;AACrH;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASK,uBAAuBA,CAAEC,OAAO,EAAEL,eAAe,EAAG;EAC5D,MAAMM,oBAAoB,GAAGD,OAAO,CAACE,GAAG,CAAE,UAAUC,MAAM,EAAG;IAC5D,MAAMT,UAAU,GAAGS,MAAM,CAACC,UAAU,CAACC,IAAI;IACzC;IACA,OAAOX,UAAU,GAAGD,eAAe,CAAEC,UAAU,EAAEC,eAAgB,CAAC,GAAG,KAAK;EAC3E,CAAE,CAAC;EAEH,OAAQK,OAAO,CAACM,MAAM,CAAE,CAAEH,MAAM,EAAEI,KAAK,KAAM,CAAEN,oBAAoB,CAAEM,KAAK,CAAG,CAAC;AAC/E;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASC,yBAAyBA,CAAER,OAAO,EAAES,UAAU,EAAEC,MAAM,EAAEC,qBAAqB,EAAG;EACxF,MAAMC,sBAAsB,GAAGZ,OAAO,CAACE,GAAG,CAAE,UAAUC,MAAM,EAAG;IAC9D;IACA,MAAMU,UAAU,GAAGV,MAAM,CAACW,SAAS,CAAC,CAAC;IACrC,OAAO9B,sBAAsB,CAAEyB,UAAU,EAAEI,UAAU,EAAE,IAAI,EAAEH,MAAM,EAAEC,qBAAuB,CAAC,CAACI,kBAAkB,KAAK,GAAG;EACzH,CAAE,CAAC;EAEH,OAAOf,OAAO,CAACM,MAAM,CAAE,CAAEH,MAAM,EAAEI,KAAK,KAAMK,sBAAsB,CAAEL,KAAK,CAAG,CAAC;AAC9E;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASS,6BAA6BA,CAAEhB,OAAO,EAAES,UAAU,EAAEC,MAAM,EAAEO,aAAa,EAAEC,iBAAiB,EAAEC,wBAAwB,EAAI;EAClI,MAAMR,qBAAqB,GAAGM,aAAa,CAACN,qBAAqB;EACjE,MAAMS,oBAAoB,GAAGH,aAAa,CAACG,oBAAoB;;EAE/D;EACA,MAAMC,yBAAyB,GAAG,CAAExC,OAAO,CAAE4B,UAAU,CAACa,cAAe,CAAC,CAAE;EAC1E,MAAMC,aAAa,GAAGd,UAAU,CAACc,aAAa;EAC9CA,aAAa,CAACC,OAAO,CAAEC,IAAI,IAAIJ,yBAAyB,CAACK,IAAI,CAAE7C,OAAO,CAAE4C,IAAK,CAAE,CAAE,CAAC;;EAElF;EACA,MAAME,uBAAuB,GAAG,EAAE;EAElC3B,OAAO,CAACwB,OAAO,CAAE,UAAUI,aAAa,EAAG;IAC1C,MAAMC,iBAAiB,GAAGD,aAAa,CAACd,SAAS,CAAC,CAAC;;IAEnD;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;IACE,IAAIgB,WAAW;IACf,IAAKV,oBAAoB,EAAG;MAC3BU,WAAW,GAAGhD,IAAI,CAAEsC,oBAAoB,CAAES,iBAAkB,CAAE,CAAC;IAChE,CAAC,MAAM,IAAKV,wBAAwB,EAAG;MACtCW,WAAW,GAAGhD,IAAI,CAAEG,QAAQ,CAAE4C,iBAAiB,EAAEvC,yBAA0B,CAAE,CAAC;IAC/E,CAAC,MAAM;MACNwC,WAAW,GAAGhD,IAAI,CAAEG,QAAQ,CAAE4C,iBAAiB,EAAEtC,4BAA6B,CAAE,CAAC;IAClF;;IAEA;AACF;AACA;AACA;IACE,MAAMwC,mBAAmB,GAAGhD,oBAAoB,CAAE+C,WAAW,EAAEtC,aAAc,CAAC;IAC9E,IAAKuC,mBAAmB,CAACC,MAAM,GAAG,CAAC,EAAG;MACrCF,WAAW,GAAGC,mBAAmB;IAClC;IAEAb,iBAAiB,CAACM,OAAO,CAAES,OAAO,IAAI;MACrC;AACH;AACA;AACA;MACG,IAAKA,OAAO,CAACC,mBAAmB,IAC/BJ,WAAW,CAACK,KAAK,CAAEC,UAAU,IAAIH,OAAO,CAACI,SAAS,CAACC,QAAQ,CAAEF,UAAW,CAAE,CAAC,EAAG;QAC9ET,uBAAuB,CAACD,IAAI,CAAE,IAAK,CAAC;MACrC;IACD,CAAE,CAAC;;IAEH;IACA,KAAM,IAAIa,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGlB,yBAAyB,CAACW,MAAM,EAAEO,CAAC,EAAE,EAAG;MAC5D,MAAMC,SAAS,GAAInB,yBAAyB,CAAEkB,CAAC,CAAE;MAEjD,IAAKT,WAAW,CAACK,KAAK,CAAEC,UAAU,IAAIlD,kBAAkB,CAAEkD,UAAU,EAAEI,SAAS,EAAE9B,MAAM,EAAEC,qBAAsB,CAAC,CAAC8B,KAAK,GAAG,CAAE,CAAC,EAAG;QAC9Hd,uBAAuB,CAACD,IAAI,CAAE,IAAK,CAAC;QACpC;MACD;IACD;EACD,CAAE,CAAC;EAEH,OAAO1B,OAAO,CAACM,MAAM,CAAE,CAAEH,MAAM,EAAEI,KAAK,KAAMoB,uBAAuB,CAAEpB,KAAK,CAAG,CAAC;AAC/E;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAAUmC,KAAK,EAAEC,UAAU,EAAG;EAC5CnD,aAAa,GAAGmD,UAAU,CAACC,SAAS,CAAE,eAAgB,CAAC;EAEvD,MAAMzB,wBAAwB,GAAGwB,UAAU,CAACC,SAAS,CAAE,0BAA2B,CAAC;EAEnF,MAAMC,MAAM,GAAG;IACdC,oBAAoB,EAAE,EAAE;IACxBC,yBAAyB,EAAE;EAC5B,CAAC;EACD;EACA;EACA,IAAKL,KAAK,CAACM,OAAO,CAAC,CAAC,KAAK,EAAE,EAAG;IAC7B,OAAOH,MAAM;EACd;;EAEA;EACA,MAAMR,SAAS,GAAGK,KAAK,CAACO,UAAU,CAAC,CAAC;EACpC;AACD;AACA;AACA;AACA;EACC,IAAKZ,SAAS,KAAK,EAAE,EAAG;IACvB,OAAOQ,MAAM;EACd;EACA;AACD;AACA;AACA;EACC,MAAMK,cAAc,GAAG/D,aAAa,CAAEuD,KAAK,CAACS,WAAW,CAAC,CAAE,CAAC;EAC3DD,cAAc,CAACxB,IAAI,CAAEW,SAAU,CAAC;;EAEhC;EACA,IAAIrC,OAAO,GAAG0C,KAAK,CAACU,OAAO,CAAC,CAAC,CAACC,OAAO,CAAEC,QAAQ,IAAIA,QAAQ,CAACC,IAAI,KAAK,GAAI,CAAC;EAC1E;AACD;AACA;AACA;EACC,MAAM5D,eAAe,GAAG+C,KAAK,CAACc,YAAY,CAAC,CAAC;;EAE5C;EACA;EACAxD,OAAO,GAAGD,uBAAuB,CAAEC,OAAO,EAAEL,eAAgB,CAAC;EAC7D;EACA,IAAKK,OAAO,CAACgC,MAAM,KAAK,CAAC,EAAG;IAC3B,OAAOa,MAAM;EACd;EAEA,MAAMnC,MAAM,GAAGgC,KAAK,CAACe,SAAS,CAAC,CAAC;EAChC,MAAMhD,UAAU,GAAGkC,UAAU,CAACe,WAAW,CAAE,YAAa,CAAC;EACzD,MAAMzC,aAAa,GAAG;IACrBN,qBAAqB,EAAEgC,UAAU,CAACgB,SAAS,CAAE,uBAAwB,CAAC;IACtEvC,oBAAoB,EAAEuB,UAAU,CAACgB,SAAS,CAAE,sBAAuB;EACpE,CAAC;;EAED;EACA;EACA3D,OAAO,GAAGQ,yBAAyB,CAAER,OAAO,EAAES,UAAU,EAAEC,MAAM,EAAEO,aAAa,CAACN,qBAAsB,CAAC;EACvG;EACA,IAAKX,OAAO,CAACgC,MAAM,KAAK,CAAC,EAAG;IAC3B,OAAOa,MAAM;EACd;;EAEA;EACA;EACA,MAAMe,qBAAqB,GAAGV,cAAc,CAAChD,GAAG,CAAE2D,aAAa,IAAIzE,wBAAwB,CAAEyE,aAAc,CAAE,CAAC;EAC9G;EACA7D,OAAO,GAAGgB,6BAA6B,CAAEhB,OAAO,EAAES,UAAU,EAAEC,MAAM,EAAEO,aAAa,EAAE2C,qBAAqB,EAAEzC,wBAAyB,CAAC;EAEtI,OAAO;IACN2B,oBAAoB,EAAE9C,OAAO;IAC7B+C,yBAAyB,EAAE/C,OAAO,CAACgC;EACpC,CAAC;AACF","ignoreList":[]}
1
+ {"version":3,"file":"getAnchorsWithKeyphrase.js","names":["flatten","uniq","filterWordsFromArray","findTopicFormsInString","getWords","matchTextWithArray","parseSynonyms","processExactMatchRequest","urlHelper","WORD_BOUNDARY_WITH_HYPHEN","WORD_BOUNDARY_WITHOUT_HYPHEN","getMatchHelper","functionWords","isLinkingToSelf","anchorLink","siteUrlOrDomain","Boolean","areEqual","isRelativeFragmentURL","getAnchorsLinkingToSelf","anchors","anchorsLinkingToSelf","map","anchor","attributes","href","filter","index","getAnchorsContainingTopic","topicForms","locale","matchWordCustomHelper","anchorsContainingTopic","anchorText","innerText","percentWordMatches","getAnchorsWithSameTextAsTopic","customHelpers","exactMatchRequest","areHyphensWordBoundaries","getWordsCustomHelper","keyphraseAndSynonymsWords","keyphraseForms","synonymsForms","forEach","form","push","anchorsContainedInTopic","currentAnchor","currentAnchorText","anchorWords","filteredAnchorWords","length","request","exactMatchRequested","every","anchorWord","keyphrase","includes","i","topicForm","count","paper","researcher","getConfig","result","anchorsWithKeyphrase","anchorsWithKeyphraseCount","getText","getKeyword","originalTopics","getSynonyms","getTree","findAll","treeNode","name","getPermalink","getLocale","getResearch","text","enhancedMatchHelper","getHelper","isExactMatchRequested","originalTopic"],"sources":["../../../src/languageProcessing/researches/getAnchorsWithKeyphrase.js"],"sourcesContent":["import {flatten, uniq} from 'lodash';\nimport filterWordsFromArray from '../helpers/word/filterWordsFromArray';\nimport {findTopicFormsInString} from '../helpers/match/findKeywordFormsInString';\nimport getWords from '../helpers/word/getWords';\nimport matchTextWithArray from '../helpers/match/matchTextWithArray';\nimport parseSynonyms from '../helpers/sanitize/parseSynonyms';\nimport processExactMatchRequest from '../helpers/match/processExactMatchRequest';\nimport urlHelper from '../helpers/url/url.js';\nimport {\n WORD_BOUNDARY_WITH_HYPHEN,\n WORD_BOUNDARY_WITHOUT_HYPHEN\n} from '../../config/wordBoundariesWithoutPunctuation';\nimport {getMatchHelper} from '../helpers/language/chineseHelperFactory.js';\n\nlet functionWords = [];\n\n/**\n * Checks whether the anchor's link is a relative fragment or the same as the site url/domain.\n * Relative fragment links always point to the page itself.\n *\n * @param {String} anchorLink The link anchor.\n * @param {String} siteUrlOrDomain The site URL or domain of the paper.\n *\n * @returns {boolean} Whether the anchor's link is a relative fragment or the same as the site url/domain.\n */\nfunction isLinkingToSelf(anchorLink, siteUrlOrDomain) {\n return Boolean(\n urlHelper.areEqual(anchorLink, siteUrlOrDomain) || urlHelper.isRelativeFragmentURL(anchorLink)\n );\n}\n\n/**\n * Gets the anchors whose url is not linking at the current site url/domain.\n *\n * @param {Array} anchors An array with all anchors from the paper.\n * @param {String} siteUrlOrDomain The site URL or domain of the paper.\n *\n * @returns {Array} The array of all anchors whose url is not linking at the current site url/domain.\n */\nfunction getAnchorsLinkingToSelf(anchors, siteUrlOrDomain) {\n const anchorsLinkingToSelf = anchors.map(function(anchor) {\n const anchorLink = anchor.attributes.href;\n // Return false if there is no href attribute.\n return anchorLink ? isLinkingToSelf(anchorLink, siteUrlOrDomain) : false;\n });\n\n return anchors.filter((anchor, index) => !anchorsLinkingToSelf[index]);\n}\n\n/**\n * Gets the anchors with text that contains all content words of the topic (i.e. keyphrase or synonyms).\n *\n * @param {Array} anchors An array with all anchors from the paper\n * @param {Object} topicForms The object with topicForms. It contains all forms of the keyphrase and synonyms.\n * @param {String} locale The locale of the paper.\n * @param {Function} matchWordCustomHelper The helper function to match word in text.\n *\n * @returns {String[]} The array of all anchors with text that contains all content words of the keyphrase or synonyms.\n */\nfunction getAnchorsContainingTopic(anchors, topicForms, locale, matchWordCustomHelper) {\n const anchorsContainingTopic = anchors.map(function(anchor) {\n // Only retrieve the anchor's text. This is because we only use the anchor text for the following check.\n const anchorText = anchor.innerText();\n return (\n findTopicFormsInString(topicForms, anchorText, true, locale, matchWordCustomHelper)\n .percentWordMatches === 100\n );\n });\n\n return anchors.filter((anchor, index) => anchorsContainingTopic[index]);\n}\n\n/**\n * Gets the anchors with text that has the same content words as the keyphrase or synonyms.\n *\n * @param {Array} anchors \t\tAn array with all anchors from the paper.\n * @param {Object} topicForms \t\tThe object with topicForms. It contains all forms of the keyphrase and synonyms.\n * @param {string} locale \t\tThe locale of the paper.\n * @param {Object} customHelpers \t\tAn object containing custom helpers.\n * @param {Object[]} exactMatchRequest \t\tAn array of objects containing the keyphrase and information on whether\n * \t\t\t\t\t\t\t\t\t\t\t\t\tthe exact match has been requested.\n * @param {boolean}\t \tareHyphensWordBoundaries\tWhether hyphens should be treated as word boundaries.\n *\n * @returns {Array} The array of all anchors with text that has the same content words as the keyphrase/synonyms.\n */\nfunction getAnchorsWithSameTextAsTopic(\n anchors,\n topicForms,\n locale,\n customHelpers,\n exactMatchRequest,\n areHyphensWordBoundaries\n) {\n const matchWordCustomHelper = customHelpers.matchWordCustomHelper;\n const getWordsCustomHelper = customHelpers.getWordsCustomHelper;\n\n // Prepare keyphrase and synonym forms for comparison with anchors.\n const keyphraseAndSynonymsWords = [flatten(topicForms.keyphraseForms)];\n const synonymsForms = topicForms.synonymsForms;\n synonymsForms.forEach(form => keyphraseAndSynonymsWords.push(flatten(form)));\n\n // The variable that will save all the anchors with text that has the same content words as the keyphrase/synonyms.\n const anchorsContainedInTopic = [];\n\n anchors.forEach(function(currentAnchor) {\n const currentAnchorText = currentAnchor.innerText();\n\n /*\n * For keyphrase matching, we want to split words on hyphens and en-dashes, except for languages where hyphens shouldn't\n * be treated as word boundaries. Currently, the latter only applies to Indonesian, where hyphens are used to create plural forms of nouns,\n * such as \"buku-buku\" being a plural form of \"buku\". We want to treat forms like \"buku-buku\" as one word, so we shouldn't\n * split words on hyphens in Indonesian.\n * For languages where hyphens are treated as word boundaries we pass a custom word boundary regex string to the getWords helper\n * that includes whitespaces, hyphens (u002d), and en-dashes (u2013). Otherwise, we pass a word boundary regex that only includes\n * whitespaces and en-dashes.\n */\n let anchorWords;\n if (getWordsCustomHelper) {\n anchorWords = uniq(getWordsCustomHelper(currentAnchorText));\n } else if (areHyphensWordBoundaries) {\n anchorWords = uniq(getWords(currentAnchorText, WORD_BOUNDARY_WITH_HYPHEN));\n } else {\n anchorWords = uniq(getWords(currentAnchorText, WORD_BOUNDARY_WITHOUT_HYPHEN));\n }\n\n /*\n * Filter function words out of the anchor text.\n * If the anchor text contains only function words, we keep them.\n */\n const filteredAnchorWords = filterWordsFromArray(anchorWords, functionWords);\n if (filteredAnchorWords.length > 0) {\n anchorWords = filteredAnchorWords;\n }\n\n exactMatchRequest.forEach(request => {\n /*\n * Check a) if the exact match is requested for the keyphrase, and\n * b) if every content word in the anchor text is included in the keyphrase or synonym.\n */\n if (\n request.exactMatchRequested &&\n anchorWords.every(anchorWord => request.keyphrase.includes(anchorWord))\n ) {\n anchorsContainedInTopic.push(true);\n }\n });\n\n // Check if every word in the anchor text is also present in the keyphrase/synonym.\n for (let i = 0; i < keyphraseAndSynonymsWords.length; i++) {\n const topicForm = keyphraseAndSynonymsWords[i];\n\n if (\n anchorWords.every(\n anchorWord =>\n matchTextWithArray(anchorWord, topicForm, locale, matchWordCustomHelper).count > 0\n )\n ) {\n anchorsContainedInTopic.push(true);\n break;\n }\n }\n });\n\n return anchors.filter((anchor, index) => anchorsContainedInTopic[index]);\n}\n\n/**\n * Checks whether the content words of the anchor text are the same as the content words of the keyphrase or synonym.\n * Also includes different word forms if the morphology is available.\n *\n * @param {Paper} paper The paper to research.\n * @param {Researcher} researcher The researcher to use.\n *\n * @returns {Object} The amount of anchor texts whose content words are the same as the keyphrase or synonyms' content words.\n */\nexport default function(paper, researcher) {\n functionWords = researcher.getConfig('functionWords');\n\n const areHyphensWordBoundaries = researcher.getConfig('areHyphensWordBoundaries');\n\n const result = {\n anchorsWithKeyphrase: [],\n anchorsWithKeyphraseCount: 0\n };\n // STEP 1.\n // If the paper's text is empty, return empty result.\n if (paper.getText() === '') {\n return result;\n }\n\n // STEP 2.\n const keyphrase = paper.getKeyword();\n /*\n * If no keyphrase is set, return empty result.\n * This is a conscious decision where we won't assess the paper if the keyphrase is not set.\n * This includes a case where only the synonym is set but not the keyphrase.\n */\n if (keyphrase === '') {\n return result;\n }\n /*\n * When the keyphrase is set, also retrieve the synonyms and save them in \"topics\" array.\n * Eventually, the term topics here refers to either keyphrase or synonyms.\n */\n const originalTopics = parseSynonyms(paper.getSynonyms());\n originalTopics.push(keyphrase);\n\n // Retrieve the anchors.\n let anchors = paper.getTree().findAll(treeNode => treeNode.name === 'a');\n /*\n * We get the site's URL (e.g., https://yoast.com) or domain (e.g., yoast.com) from the paper.\n * In case of WordPress, the variable is a URL. In case of Shopify, it is a domain.\n */\n const siteUrlOrDomain = paper.getPermalink();\n\n // STEP 3.\n // Get the anchors with urls that are not linking to the current site url/domain.\n anchors = getAnchorsLinkingToSelf(anchors, siteUrlOrDomain);\n // If all anchor urls are linking to the current site url/domain, return empty result.\n if (anchors.length === 0) {\n return result;\n }\n\n const locale = paper.getLocale();\n const topicForms = researcher.getResearch('morphology');\n\n // Auto-detect Chinese and enhance helper if needed\n const text = paper.getText();\n const enhancedMatchHelper = getMatchHelper(\n text,\n keyphrase,\n researcher.getHelper('matchWordCustomHelper')\n );\n\n const customHelpers = {\n matchWordCustomHelper: enhancedMatchHelper,\n getWordsCustomHelper: researcher.getHelper('getWordsCustomHelper')\n };\n\n // STEP 4.\n // Get the anchors with text that contains the keyphrase/synonyms' content words.\n anchors = getAnchorsContainingTopic(\n anchors,\n topicForms,\n locale,\n customHelpers.matchWordCustomHelper\n );\n // If all anchor texts do not contain the keyphrase/synonyms' content words, return empty result.\n if (anchors.length === 0) {\n return result;\n }\n\n // STEP 5.\n // Check if exact match is requested for every topic (keyphrase or synonym).\n const isExactMatchRequested = originalTopics.map(originalTopic =>\n processExactMatchRequest(originalTopic)\n );\n // Get the anchors with text that has the same content words as the keyphrase/synonyms.\n anchors = getAnchorsWithSameTextAsTopic(\n anchors,\n topicForms,\n locale,\n customHelpers,\n isExactMatchRequested,\n areHyphensWordBoundaries\n );\n\n return {\n anchorsWithKeyphrase: anchors,\n anchorsWithKeyphraseCount: anchors.length\n };\n}\n"],"mappings":"AAAA,SAAQA,OAAO,EAAEC,IAAI,QAAO,QAAQ;AACpC,OAAOC,oBAAoB;AAC3B,SAAQC,sBAAsB;AAC9B,OAAOC,QAAQ;AACf,OAAOC,kBAAkB;AACzB,OAAOC,aAAa;AACpB,OAAOC,wBAAwB;AAC/B,OAAOC,SAAS;AAChB,SACEC,yBAAyB,EACzBC,4BAA4B;AAE9B,SAAQC,cAAc;AAEtB,IAAIC,aAAa,GAAG,EAAE;;AAEtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASC,eAAeA,CAACC,UAAU,EAAEC,eAAe,EAAE;EACpD,OAAOC,OAAO,CACZR,SAAS,CAACS,QAAQ,CAACH,UAAU,EAAEC,eAAe,CAAC,IAAIP,SAAS,CAACU,qBAAqB,CAACJ,UAAU,CAC/F,CAAC;AACH;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASK,uBAAuBA,CAACC,OAAO,EAAEL,eAAe,EAAE;EACzD,MAAMM,oBAAoB,GAAGD,OAAO,CAACE,GAAG,CAAC,UAASC,MAAM,EAAE;IACxD,MAAMT,UAAU,GAAGS,MAAM,CAACC,UAAU,CAACC,IAAI;IACzC;IACA,OAAOX,UAAU,GAAGD,eAAe,CAACC,UAAU,EAAEC,eAAe,CAAC,GAAG,KAAK;EAC1E,CAAC,CAAC;EAEF,OAAOK,OAAO,CAACM,MAAM,CAAC,CAACH,MAAM,EAAEI,KAAK,KAAK,CAACN,oBAAoB,CAACM,KAAK,CAAC,CAAC;AACxE;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASC,yBAAyBA,CAACR,OAAO,EAAES,UAAU,EAAEC,MAAM,EAAEC,qBAAqB,EAAE;EACrF,MAAMC,sBAAsB,GAAGZ,OAAO,CAACE,GAAG,CAAC,UAASC,MAAM,EAAE;IAC1D;IACA,MAAMU,UAAU,GAAGV,MAAM,CAACW,SAAS,CAAC,CAAC;IACrC,OACE/B,sBAAsB,CAAC0B,UAAU,EAAEI,UAAU,EAAE,IAAI,EAAEH,MAAM,EAAEC,qBAAqB,CAAC,CAChFI,kBAAkB,KAAK,GAAG;EAEjC,CAAC,CAAC;EAEF,OAAOf,OAAO,CAACM,MAAM,CAAC,CAACH,MAAM,EAAEI,KAAK,KAAKK,sBAAsB,CAACL,KAAK,CAAC,CAAC;AACzE;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASS,6BAA6BA,CACpChB,OAAO,EACPS,UAAU,EACVC,MAAM,EACNO,aAAa,EACbC,iBAAiB,EACjBC,wBAAwB,EACxB;EACA,MAAMR,qBAAqB,GAAGM,aAAa,CAACN,qBAAqB;EACjE,MAAMS,oBAAoB,GAAGH,aAAa,CAACG,oBAAoB;;EAE/D;EACA,MAAMC,yBAAyB,GAAG,CAACzC,OAAO,CAAC6B,UAAU,CAACa,cAAc,CAAC,CAAC;EACtE,MAAMC,aAAa,GAAGd,UAAU,CAACc,aAAa;EAC9CA,aAAa,CAACC,OAAO,CAACC,IAAI,IAAIJ,yBAAyB,CAACK,IAAI,CAAC9C,OAAO,CAAC6C,IAAI,CAAC,CAAC,CAAC;;EAE5E;EACA,MAAME,uBAAuB,GAAG,EAAE;EAElC3B,OAAO,CAACwB,OAAO,CAAC,UAASI,aAAa,EAAE;IACtC,MAAMC,iBAAiB,GAAGD,aAAa,CAACd,SAAS,CAAC,CAAC;;IAEnD;AACJ;AACA;AACA;AACA;AACA;AACA;AACA;AACA;IACI,IAAIgB,WAAW;IACf,IAAIV,oBAAoB,EAAE;MACxBU,WAAW,GAAGjD,IAAI,CAACuC,oBAAoB,CAACS,iBAAiB,CAAC,CAAC;IAC7D,CAAC,MAAM,IAAIV,wBAAwB,EAAE;MACnCW,WAAW,GAAGjD,IAAI,CAACG,QAAQ,CAAC6C,iBAAiB,EAAExC,yBAAyB,CAAC,CAAC;IAC5E,CAAC,MAAM;MACLyC,WAAW,GAAGjD,IAAI,CAACG,QAAQ,CAAC6C,iBAAiB,EAAEvC,4BAA4B,CAAC,CAAC;IAC/E;;IAEA;AACJ;AACA;AACA;IACI,MAAMyC,mBAAmB,GAAGjD,oBAAoB,CAACgD,WAAW,EAAEtC,aAAa,CAAC;IAC5E,IAAIuC,mBAAmB,CAACC,MAAM,GAAG,CAAC,EAAE;MAClCF,WAAW,GAAGC,mBAAmB;IACnC;IAEAb,iBAAiB,CAACM,OAAO,CAACS,OAAO,IAAI;MACnC;AACN;AACA;AACA;MACM,IACEA,OAAO,CAACC,mBAAmB,IAC3BJ,WAAW,CAACK,KAAK,CAACC,UAAU,IAAIH,OAAO,CAACI,SAAS,CAACC,QAAQ,CAACF,UAAU,CAAC,CAAC,EACvE;QACAT,uBAAuB,CAACD,IAAI,CAAC,IAAI,CAAC;MACpC;IACF,CAAC,CAAC;;IAEF;IACA,KAAK,IAAIa,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGlB,yBAAyB,CAACW,MAAM,EAAEO,CAAC,EAAE,EAAE;MACzD,MAAMC,SAAS,GAAGnB,yBAAyB,CAACkB,CAAC,CAAC;MAE9C,IACET,WAAW,CAACK,KAAK,CACfC,UAAU,IACRnD,kBAAkB,CAACmD,UAAU,EAAEI,SAAS,EAAE9B,MAAM,EAAEC,qBAAqB,CAAC,CAAC8B,KAAK,GAAG,CACrF,CAAC,EACD;QACAd,uBAAuB,CAACD,IAAI,CAAC,IAAI,CAAC;QAClC;MACF;IACF;EACF,CAAC,CAAC;EAEF,OAAO1B,OAAO,CAACM,MAAM,CAAC,CAACH,MAAM,EAAEI,KAAK,KAAKoB,uBAAuB,CAACpB,KAAK,CAAC,CAAC;AAC1E;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASmC,KAAK,EAAEC,UAAU,EAAE;EACzCnD,aAAa,GAAGmD,UAAU,CAACC,SAAS,CAAC,eAAe,CAAC;EAErD,MAAMzB,wBAAwB,GAAGwB,UAAU,CAACC,SAAS,CAAC,0BAA0B,CAAC;EAEjF,MAAMC,MAAM,GAAG;IACbC,oBAAoB,EAAE,EAAE;IACxBC,yBAAyB,EAAE;EAC7B,CAAC;EACD;EACA;EACA,IAAIL,KAAK,CAACM,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;IAC1B,OAAOH,MAAM;EACf;;EAEA;EACA,MAAMR,SAAS,GAAGK,KAAK,CAACO,UAAU,CAAC,CAAC;EACpC;AACF;AACA;AACA;AACA;EACE,IAAIZ,SAAS,KAAK,EAAE,EAAE;IACpB,OAAOQ,MAAM;EACf;EACA;AACF;AACA;AACA;EACE,MAAMK,cAAc,GAAGhE,aAAa,CAACwD,KAAK,CAACS,WAAW,CAAC,CAAC,CAAC;EACzDD,cAAc,CAACxB,IAAI,CAACW,SAAS,CAAC;;EAE9B;EACA,IAAIrC,OAAO,GAAG0C,KAAK,CAACU,OAAO,CAAC,CAAC,CAACC,OAAO,CAACC,QAAQ,IAAIA,QAAQ,CAACC,IAAI,KAAK,GAAG,CAAC;EACxE;AACF;AACA;AACA;EACE,MAAM5D,eAAe,GAAG+C,KAAK,CAACc,YAAY,CAAC,CAAC;;EAE5C;EACA;EACAxD,OAAO,GAAGD,uBAAuB,CAACC,OAAO,EAAEL,eAAe,CAAC;EAC3D;EACA,IAAIK,OAAO,CAACgC,MAAM,KAAK,CAAC,EAAE;IACxB,OAAOa,MAAM;EACf;EAEA,MAAMnC,MAAM,GAAGgC,KAAK,CAACe,SAAS,CAAC,CAAC;EAChC,MAAMhD,UAAU,GAAGkC,UAAU,CAACe,WAAW,CAAC,YAAY,CAAC;;EAEvD;EACA,MAAMC,IAAI,GAAGjB,KAAK,CAACM,OAAO,CAAC,CAAC;EAC5B,MAAMY,mBAAmB,GAAGrE,cAAc,CACxCoE,IAAI,EACJtB,SAAS,EACTM,UAAU,CAACkB,SAAS,CAAC,uBAAuB,CAC9C,CAAC;EAED,MAAM5C,aAAa,GAAG;IACpBN,qBAAqB,EAAEiD,mBAAmB;IAC1CxC,oBAAoB,EAAEuB,UAAU,CAACkB,SAAS,CAAC,sBAAsB;EACnE,CAAC;;EAED;EACA;EACA7D,OAAO,GAAGQ,yBAAyB,CACjCR,OAAO,EACPS,UAAU,EACVC,MAAM,EACNO,aAAa,CAACN,qBAChB,CAAC;EACD;EACA,IAAIX,OAAO,CAACgC,MAAM,KAAK,CAAC,EAAE;IACxB,OAAOa,MAAM;EACf;;EAEA;EACA;EACA,MAAMiB,qBAAqB,GAAGZ,cAAc,CAAChD,GAAG,CAAC6D,aAAa,IAC5D5E,wBAAwB,CAAC4E,aAAa,CACxC,CAAC;EACD;EACA/D,OAAO,GAAGgB,6BAA6B,CACrChB,OAAO,EACPS,UAAU,EACVC,MAAM,EACNO,aAAa,EACb6C,qBAAqB,EACrB3C,wBACF,CAAC;EAED,OAAO;IACL2B,oBAAoB,EAAE9C,OAAO;IAC7B+C,yBAAyB,EAAE/C,OAAO,CAACgC;EACrC,CAAC;AACH","ignoreList":[]}
@@ -1,4 +1,4 @@
1
- import { reject } from "lodash";
1
+ import { reject } from 'lodash';
2
2
 
3
3
  /**
4
4
  * Returns all paragraphs in a given Paper.
@@ -8,11 +8,20 @@ import { reject } from "lodash";
8
8
  * @returns {Paragraph[]} All paragraphs in the paper.
9
9
  */
10
10
  export default function (paper) {
11
- let paragraphs = paper.getTree().findAll(node => node.name === "p");
11
+ let paragraphs = paper.getTree().findAll(node => node.name === 'p');
12
12
 
13
- // Remove empty paragraphs without sentences and paragraphs only consisting of links.
13
+ // Remove empty paragraphs without sentences and paragraphs only consisting of empty links.
14
14
  paragraphs = reject(paragraphs, paragraph => paragraph.sentences.length === 0);
15
- paragraphs = reject(paragraphs, paragraph => paragraph.childNodes.every(node => node.name === "a"));
15
+ // Don't remove paragraphs with links that contain meaningful text
16
+ paragraphs = reject(paragraphs, paragraph => {
17
+ // Only reject if ALL children are links AND those links are empty/meaningless
18
+ const allLinks = paragraph.childNodes.every(node => node.name === 'a');
19
+ if (!allLinks) return false;
20
+
21
+ // Check if the paragraph has meaningful text content
22
+ const textContent = paragraph.innerText().trim();
23
+ return textContent.length === 0; // Only reject if completely empty
24
+ });
16
25
  return paragraphs;
17
26
  }
18
27
  //# sourceMappingURL=getParagraphs.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"getParagraphs.js","names":["reject","paper","paragraphs","getTree","findAll","node","name","paragraph","sentences","length","childNodes","every"],"sources":["../../../src/languageProcessing/researches/getParagraphs.js"],"sourcesContent":["import { reject } from \"lodash\";\n\n/**\n * Returns all paragraphs in a given Paper.\n * Remove paragraphs that do not contain sentences or only consist of links.\n *\n * @param {Paper} paper The current paper.\n * @returns {Paragraph[]} All paragraphs in the paper.\n */\nexport default function( paper ) {\n\tlet paragraphs = paper.getTree().findAll( node => node.name === \"p\" );\n\n\t// Remove empty paragraphs without sentences and paragraphs only consisting of links.\n\tparagraphs = reject( paragraphs, paragraph => paragraph.sentences.length === 0 );\n\tparagraphs = reject( paragraphs, paragraph => paragraph.childNodes.every( node => node.name === \"a\" ) );\n\n\treturn paragraphs;\n}\n"],"mappings":"AAAA,SAASA,MAAM,QAAQ,QAAQ;;AAE/B;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAAUC,KAAK,EAAG;EAChC,IAAIC,UAAU,GAAGD,KAAK,CAACE,OAAO,CAAC,CAAC,CAACC,OAAO,CAAEC,IAAI,IAAIA,IAAI,CAACC,IAAI,KAAK,GAAI,CAAC;;EAErE;EACAJ,UAAU,GAAGF,MAAM,CAAEE,UAAU,EAAEK,SAAS,IAAIA,SAAS,CAACC,SAAS,CAACC,MAAM,KAAK,CAAE,CAAC;EAChFP,UAAU,GAAGF,MAAM,CAAEE,UAAU,EAAEK,SAAS,IAAIA,SAAS,CAACG,UAAU,CAACC,KAAK,CAAEN,IAAI,IAAIA,IAAI,CAACC,IAAI,KAAK,GAAI,CAAE,CAAC;EAEvG,OAAOJ,UAAU;AAClB","ignoreList":[]}
1
+ {"version":3,"file":"getParagraphs.js","names":["reject","paper","paragraphs","getTree","findAll","node","name","paragraph","sentences","length","allLinks","childNodes","every","textContent","innerText","trim"],"sources":["../../../src/languageProcessing/researches/getParagraphs.js"],"sourcesContent":["import {reject} from 'lodash';\n\n/**\n * Returns all paragraphs in a given Paper.\n * Remove paragraphs that do not contain sentences or only consist of links.\n *\n * @param {Paper} paper The current paper.\n * @returns {Paragraph[]} All paragraphs in the paper.\n */\nexport default function(paper) {\n let paragraphs = paper.getTree().findAll(node => node.name === 'p');\n\n // Remove empty paragraphs without sentences and paragraphs only consisting of empty links.\n paragraphs = reject(paragraphs, paragraph => paragraph.sentences.length === 0);\n // Don't remove paragraphs with links that contain meaningful text\n paragraphs = reject(paragraphs, paragraph => {\n // Only reject if ALL children are links AND those links are empty/meaningless\n const allLinks = paragraph.childNodes.every(node => node.name === 'a');\n if (!allLinks) return false;\n\n // Check if the paragraph has meaningful text content\n const textContent = paragraph.innerText().trim();\n return textContent.length === 0; // Only reject if completely empty\n });\n\n return paragraphs;\n}\n"],"mappings":"AAAA,SAAQA,MAAM,QAAO,QAAQ;;AAE7B;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASC,KAAK,EAAE;EAC7B,IAAIC,UAAU,GAAGD,KAAK,CAACE,OAAO,CAAC,CAAC,CAACC,OAAO,CAACC,IAAI,IAAIA,IAAI,CAACC,IAAI,KAAK,GAAG,CAAC;;EAEnE;EACAJ,UAAU,GAAGF,MAAM,CAACE,UAAU,EAAEK,SAAS,IAAIA,SAAS,CAACC,SAAS,CAACC,MAAM,KAAK,CAAC,CAAC;EAC9E;EACAP,UAAU,GAAGF,MAAM,CAACE,UAAU,EAAEK,SAAS,IAAI;IAC3C;IACA,MAAMG,QAAQ,GAAGH,SAAS,CAACI,UAAU,CAACC,KAAK,CAACP,IAAI,IAAIA,IAAI,CAACC,IAAI,KAAK,GAAG,CAAC;IACtE,IAAI,CAACI,QAAQ,EAAE,OAAO,KAAK;;IAE3B;IACA,MAAMG,WAAW,GAAGN,SAAS,CAACO,SAAS,CAAC,CAAC,CAACC,IAAI,CAAC,CAAC;IAChD,OAAOF,WAAW,CAACJ,MAAM,KAAK,CAAC,CAAC,CAAC;EACnC,CAAC,CAAC;EAEF,OAAOP,UAAU;AACnB","ignoreList":[]}
@@ -7,6 +7,7 @@ import isDoubleQuoted from "../helpers/match/isDoubleQuoted";
7
7
  import { markWordsInASentence } from "../helpers/word/markWordsInSentences";
8
8
  import getSentences from "../helpers/sentence/getSentences";
9
9
  import { filterShortcodesFromHTML } from "../helpers";
10
+ import isChineseText from "../helpers/language/isChineseText";
10
11
 
11
12
  /**
12
13
  * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches.
@@ -86,12 +87,39 @@ export default function getKeyphraseCount(paper, researcher) {
86
87
  if (keyphraseLength === 0) {
87
88
  return result;
88
89
  }
89
- const matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');
90
+ let matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');
90
91
  const customSentenceTokenizer = researcher.getHelper('memoizedTokenizer');
91
92
  const customSplitIntoTokensHelper = researcher.getHelper('splitIntoTokensCustom');
92
93
  const locale = paper.getLocale();
93
94
  const text = matchWordCustomHelper ? filterShortcodesFromHTML(paper.getText(), paper._attributes && paper._attributes.shortcodes) : paper.getText();
94
95
 
96
+ // Auto-detect Chinese and use Chinese helper if not already available
97
+ if (!matchWordCustomHelper && isChineseText(paper.getKeyword() + ' ' + text)) {
98
+ // Import Chinese helper for Chinese text
99
+ try {
100
+ // Use simple Chinese word matching for Chinese text
101
+ matchWordCustomHelper = function (sentence, word) {
102
+ const text = typeof sentence === 'string' ? sentence : sentence.text || sentence;
103
+ const matches = [];
104
+ if (!text || !word) {
105
+ return matches;
106
+ }
107
+ const lowerText = text.toLowerCase();
108
+ const lowerWord = word.toLowerCase();
109
+ let startIndex = 0;
110
+ let index;
111
+ while ((index = lowerText.indexOf(lowerWord, startIndex)) !== -1) {
112
+ matches.push(word);
113
+ startIndex = index + lowerWord.length;
114
+ }
115
+ return matches;
116
+ };
117
+ } catch (error) {
118
+ // If there's any issue, continue with default behavior
119
+ console.warn('Error setting up Chinese matching, using default:', error.message);
120
+ }
121
+ }
122
+
95
123
  // When the custom helper is available, we're using the sentences retrieved from the text for the analysis.
96
124
  const sentences = matchWordCustomHelper ? getSentences(text, customSentenceTokenizer) : getSentencesFromTree(paper);
97
125
  // Exact matching is requested when the keyphrase is enclosed in double quotes.
@@ -1 +1 @@
1
- {"version":3,"file":"keywordCount.js","names":["flatten","flattenDeep","getSentencesFromTree","normalizeSingle","getMarkingsInSentence","matchWordFormsWithSentence","isDoubleQuoted","markWordsInASentence","getSentences","filterShortcodesFromHTML","countKeyphraseInText","sentences","keyphraseForms","locale","matchWordCustomHelper","isExactMatchRequested","customSplitIntoTokensHelper","result","count","markings","forEach","sentence","matchesInSentence","map","wordForms","isEachWordFound","every","counts","match","totalMatchCount","Math","min","foundWords","matches","push","getKeyphraseCount","paper","researcher","keyphraseLength","topicForms","getResearch","length","word","form","getHelper","customSentenceTokenizer","getLocale","text","getText","_attributes","shortcodes","getKeyword","keyphraseFound","keywordCount","console","warn"],"sources":["../../../src/languageProcessing/researches/keywordCount.js"],"sourcesContent":["import {flatten, flattenDeep} from 'lodash';\nimport getSentencesFromTree from '../helpers/sentence/getSentencesFromTree';\nimport {normalizeSingle} from '../helpers/sanitize/quotes';\nimport getMarkingsInSentence from '../helpers/highlighting/getMarkingsInSentence';\nimport matchWordFormsWithSentence from '../helpers/match/matchWordFormsWithSentence';\nimport isDoubleQuoted from '../helpers/match/isDoubleQuoted';\nimport {markWordsInASentence} from '../helpers/word/markWordsInSentences';\nimport getSentences from '../helpers/sentence/getSentences';\nimport {filterShortcodesFromHTML} from '../helpers';\n\n/**\n * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches.\n *\n * @param {(Sentence|string)[]}\tsentences\t\t\tThe sentences to check.\n * @param {Array}\t\tkeyphraseForms\t\t\t\tThe keyphrase forms.\n * @param {string}\t\tlocale\t\t\t\t\t\tThe locale used in the analysis.\n * @param {function}\tmatchWordCustomHelper\t\tA custom helper to match words with a text.\n * @param {boolean}\t\tisExactMatchRequested\t\tWhether the exact matching is requested.\n * @param {function}\tcustomSplitIntoTokensHelper\tA custom helper to split sentences into tokens.\n *\n * @returns {{markings: Mark[], count: number}} The number of keyphrase occurrences in the text and the Mark objects of the matches.\n */\nexport function countKeyphraseInText(\n sentences,\n keyphraseForms,\n locale,\n matchWordCustomHelper,\n isExactMatchRequested,\n customSplitIntoTokensHelper\n) {\n const result = {count: 0, markings: []};\n\n sentences.forEach(sentence => {\n const matchesInSentence = keyphraseForms.map(wordForms =>\n matchWordFormsWithSentence(\n sentence,\n wordForms,\n locale,\n matchWordCustomHelper,\n isExactMatchRequested,\n customSplitIntoTokensHelper\n )\n );\n // A sentence has at least one full-match of the keyphrase if each word occurs at least once.\n const isEachWordFound = matchesInSentence.every(wordForms => wordForms.count > 0);\n\n if (isEachWordFound) {\n /*\n * Retrieve all the occurrences' count of each word of the keyphrase and save it in an array.\n * matches: [ [ { matches: [\"red\"], count: 1 } ], [ { matches: [\"pandas\"], count: 2 } ] ]\n * counts: [ 1, 2 ]\n */\n const counts = matchesInSentence.map(match => match.count);\n /*\n * The number of the full-match count is the lowest count of the occurrences.\n * counts: [ 1, 2 ]\n * totalMatchCount: 1\n *\n * From the example above, the full-match is 1, because one of the \"pandas\" occurrences is not accompanied by \"red\"\n * to be counted as a full-match.\n */\n const totalMatchCount = Math.min(...counts);\n const foundWords = flattenDeep(matchesInSentence.map(match => match.matches));\n\n let markings = [];\n\n if (matchWordCustomHelper) {\n // Currently, this check is only applicable for Japanese.\n markings = markWordsInASentence(sentence, foundWords, matchWordCustomHelper);\n } else {\n markings = getMarkingsInSentence(sentence, foundWords);\n }\n\n result.count += totalMatchCount;\n result.markings.push(markings);\n }\n });\n\n return result;\n}\n\n/**\n * Calculates the keyphrase count, takes morphology into account.\n *\n * @param {Paper} paper The paper containing keyphrase and text.\n * @param {Researcher} researcher The researcher.\n *\n * @returns {{count: number, markings: Mark[], keyphraseLength: number}} An object containing the keyphrase count, markings and the kephrase length.\n */\nexport default function getKeyphraseCount(paper, researcher) {\n const result = {count: 0, markings: [], keyphraseLength: 0};\n const topicForms = researcher.getResearch('morphology');\n let keyphraseForms = topicForms.keyphraseForms;\n const keyphraseLength = keyphraseForms.length;\n\n /*\n * Normalize single quotes so that word form with different type of single quotes can still be matched.\n * For example, \"key‛word\" should match \"key'word\".\n */\n keyphraseForms = keyphraseForms.map(word => word.map(form => normalizeSingle(form)));\n\n if (keyphraseLength === 0) {\n return result;\n }\n\n const matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');\n const customSentenceTokenizer = researcher.getHelper('memoizedTokenizer');\n const customSplitIntoTokensHelper = researcher.getHelper('splitIntoTokensCustom');\n const locale = paper.getLocale();\n const text = matchWordCustomHelper\n ? filterShortcodesFromHTML(paper.getText(), paper._attributes && paper._attributes.shortcodes)\n : paper.getText();\n\n // When the custom helper is available, we're using the sentences retrieved from the text for the analysis.\n const sentences = matchWordCustomHelper\n ? getSentences(text, customSentenceTokenizer)\n : getSentencesFromTree(paper);\n // Exact matching is requested when the keyphrase is enclosed in double quotes.\n const isExactMatchRequested = isDoubleQuoted(paper.getKeyword());\n\n /*\n * Count the amount of keyphrase occurrences in the sentences.\n * An occurrence is counted when all words of the keyphrase are contained within the sentence. Each sentence can contain multiple keyphrases.\n * (e.g. \"The apple potato is an apple and a potato.\" has two occurrences of the keyphrase \"apple potato\").\n */\n const keyphraseFound = countKeyphraseInText(\n sentences,\n keyphraseForms,\n locale,\n matchWordCustomHelper,\n isExactMatchRequested,\n customSplitIntoTokensHelper\n );\n\n result.count = keyphraseFound.count;\n result.markings = flatten(keyphraseFound.markings);\n result.keyphraseLength = keyphraseLength;\n\n return result;\n}\n\n/**\n * Calculates the keyphrase count, takes morphology into account.\n *\n * @deprecated Use getKeyphraseCount instead.\n *\n * @param {Paper} paper The paper containing keyphrase and text.\n * @param {Researcher} researcher The researcher.\n *\n * @returns {Object} An array of all the matches, markings and the keyphrase count.\n */\nexport function keywordCount(paper, researcher) {\n console.warn('This function is deprecated, use getKeyphraseCount instead.');\n return getKeyphraseCount(paper, researcher);\n}\n"],"mappings":"AAAA,SAAQA,OAAO,EAAEC,WAAW,QAAO,QAAQ;AAC3C,OAAOC,oBAAoB;AAC3B,SAAQC,eAAe;AACvB,OAAOC,qBAAqB;AAC5B,OAAOC,0BAA0B;AACjC,OAAOC,cAAc;AACrB,SAAQC,oBAAoB;AAC5B,OAAOC,YAAY;AACnB,SAAQC,wBAAwB;;AAEhC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,OAAO,SAASC,oBAAoBA,CAClCC,SAAS,EACTC,cAAc,EACdC,MAAM,EACNC,qBAAqB,EACrBC,qBAAqB,EACrBC,2BAA2B,EAC3B;EACA,MAAMC,MAAM,GAAG;IAACC,KAAK,EAAE,CAAC;IAAEC,QAAQ,EAAE;EAAE,CAAC;EAEvCR,SAAS,CAACS,OAAO,CAACC,QAAQ,IAAI;IAC5B,MAAMC,iBAAiB,GAAGV,cAAc,CAACW,GAAG,CAACC,SAAS,IACpDnB,0BAA0B,CACxBgB,QAAQ,EACRG,SAAS,EACTX,MAAM,EACNC,qBAAqB,EACrBC,qBAAqB,EACrBC,2BACF,CACF,CAAC;IACD;IACA,MAAMS,eAAe,GAAGH,iBAAiB,CAACI,KAAK,CAACF,SAAS,IAAIA,SAAS,CAACN,KAAK,GAAG,CAAC,CAAC;IAEjF,IAAIO,eAAe,EAAE;MACnB;AACN;AACA;AACA;AACA;MACM,MAAME,MAAM,GAAGL,iBAAiB,CAACC,GAAG,CAACK,KAAK,IAAIA,KAAK,CAACV,KAAK,CAAC;MAC1D;AACN;AACA;AACA;AACA;AACA;AACA;AACA;MACM,MAAMW,eAAe,GAAGC,IAAI,CAACC,GAAG,CAAC,GAAGJ,MAAM,CAAC;MAC3C,MAAMK,UAAU,GAAG/B,WAAW,CAACqB,iBAAiB,CAACC,GAAG,CAACK,KAAK,IAAIA,KAAK,CAACK,OAAO,CAAC,CAAC;MAE7E,IAAId,QAAQ,GAAG,EAAE;MAEjB,IAAIL,qBAAqB,EAAE;QACzB;QACAK,QAAQ,GAAGZ,oBAAoB,CAACc,QAAQ,EAAEW,UAAU,EAAElB,qBAAqB,CAAC;MAC9E,CAAC,MAAM;QACLK,QAAQ,GAAGf,qBAAqB,CAACiB,QAAQ,EAAEW,UAAU,CAAC;MACxD;MAEAf,MAAM,CAACC,KAAK,IAAIW,eAAe;MAC/BZ,MAAM,CAACE,QAAQ,CAACe,IAAI,CAACf,QAAQ,CAAC;IAChC;EACF,CAAC,CAAC;EAEF,OAAOF,MAAM;AACf;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,SAASkB,iBAAiBA,CAACC,KAAK,EAAEC,UAAU,EAAE;EAC3D,MAAMpB,MAAM,GAAG;IAACC,KAAK,EAAE,CAAC;IAAEC,QAAQ,EAAE,EAAE;IAAEmB,eAAe,EAAE;EAAC,CAAC;EAC3D,MAAMC,UAAU,GAAGF,UAAU,CAACG,WAAW,CAAC,YAAY,CAAC;EACvD,IAAI5B,cAAc,GAAG2B,UAAU,CAAC3B,cAAc;EAC9C,MAAM0B,eAAe,GAAG1B,cAAc,CAAC6B,MAAM;;EAE7C;AACF;AACA;AACA;EACE7B,cAAc,GAAGA,cAAc,CAACW,GAAG,CAACmB,IAAI,IAAIA,IAAI,CAACnB,GAAG,CAACoB,IAAI,IAAIxC,eAAe,CAACwC,IAAI,CAAC,CAAC,CAAC;EAEpF,IAAIL,eAAe,KAAK,CAAC,EAAE;IACzB,OAAOrB,MAAM;EACf;EAEA,MAAMH,qBAAqB,GAAGuB,UAAU,CAACO,SAAS,CAAC,uBAAuB,CAAC;EAC3E,MAAMC,uBAAuB,GAAGR,UAAU,CAACO,SAAS,CAAC,mBAAmB,CAAC;EACzE,MAAM5B,2BAA2B,GAAGqB,UAAU,CAACO,SAAS,CAAC,uBAAuB,CAAC;EACjF,MAAM/B,MAAM,GAAGuB,KAAK,CAACU,SAAS,CAAC,CAAC;EAChC,MAAMC,IAAI,GAAGjC,qBAAqB,GAC9BL,wBAAwB,CAAC2B,KAAK,CAACY,OAAO,CAAC,CAAC,EAAEZ,KAAK,CAACa,WAAW,IAAIb,KAAK,CAACa,WAAW,CAACC,UAAU,CAAC,GAC5Fd,KAAK,CAACY,OAAO,CAAC,CAAC;;EAEnB;EACA,MAAMrC,SAAS,GAAGG,qBAAqB,GACnCN,YAAY,CAACuC,IAAI,EAAEF,uBAAuB,CAAC,GAC3C3C,oBAAoB,CAACkC,KAAK,CAAC;EAC/B;EACA,MAAMrB,qBAAqB,GAAGT,cAAc,CAAC8B,KAAK,CAACe,UAAU,CAAC,CAAC,CAAC;;EAEhE;AACF;AACA;AACA;AACA;EACE,MAAMC,cAAc,GAAG1C,oBAAoB,CACzCC,SAAS,EACTC,cAAc,EACdC,MAAM,EACNC,qBAAqB,EACrBC,qBAAqB,EACrBC,2BACF,CAAC;EAEDC,MAAM,CAACC,KAAK,GAAGkC,cAAc,CAAClC,KAAK;EACnCD,MAAM,CAACE,QAAQ,GAAGnB,OAAO,CAACoD,cAAc,CAACjC,QAAQ,CAAC;EAClDF,MAAM,CAACqB,eAAe,GAAGA,eAAe;EAExC,OAAOrB,MAAM;AACf;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,OAAO,SAASoC,YAAYA,CAACjB,KAAK,EAAEC,UAAU,EAAE;EAC9CiB,OAAO,CAACC,IAAI,CAAC,6DAA6D,CAAC;EAC3E,OAAOpB,iBAAiB,CAACC,KAAK,EAAEC,UAAU,CAAC;AAC7C","ignoreList":[]}
1
+ {"version":3,"file":"keywordCount.js","names":["flatten","flattenDeep","getSentencesFromTree","normalizeSingle","getMarkingsInSentence","matchWordFormsWithSentence","isDoubleQuoted","markWordsInASentence","getSentences","filterShortcodesFromHTML","isChineseText","countKeyphraseInText","sentences","keyphraseForms","locale","matchWordCustomHelper","isExactMatchRequested","customSplitIntoTokensHelper","result","count","markings","forEach","sentence","matchesInSentence","map","wordForms","isEachWordFound","every","counts","match","totalMatchCount","Math","min","foundWords","matches","push","getKeyphraseCount","paper","researcher","keyphraseLength","topicForms","getResearch","length","word","form","getHelper","customSentenceTokenizer","getLocale","text","getText","_attributes","shortcodes","getKeyword","lowerText","toLowerCase","lowerWord","startIndex","index","indexOf","error","console","warn","message","keyphraseFound","keywordCount"],"sources":["../../../src/languageProcessing/researches/keywordCount.js"],"sourcesContent":["import {flatten, flattenDeep} from 'lodash';\nimport getSentencesFromTree from '../helpers/sentence/getSentencesFromTree';\nimport {normalizeSingle} from '../helpers/sanitize/quotes';\nimport getMarkingsInSentence from '../helpers/highlighting/getMarkingsInSentence';\nimport matchWordFormsWithSentence from '../helpers/match/matchWordFormsWithSentence';\nimport isDoubleQuoted from '../helpers/match/isDoubleQuoted';\nimport {markWordsInASentence} from '../helpers/word/markWordsInSentences';\nimport getSentences from '../helpers/sentence/getSentences';\nimport {filterShortcodesFromHTML} from '../helpers';\nimport isChineseText from '../helpers/language/isChineseText';\n\n/**\n * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches.\n *\n * @param {(Sentence|string)[]}\tsentences\t\t\tThe sentences to check.\n * @param {Array}\t\tkeyphraseForms\t\t\t\tThe keyphrase forms.\n * @param {string}\t\tlocale\t\t\t\t\t\tThe locale used in the analysis.\n * @param {function}\tmatchWordCustomHelper\t\tA custom helper to match words with a text.\n * @param {boolean}\t\tisExactMatchRequested\t\tWhether the exact matching is requested.\n * @param {function}\tcustomSplitIntoTokensHelper\tA custom helper to split sentences into tokens.\n *\n * @returns {{markings: Mark[], count: number}} The number of keyphrase occurrences in the text and the Mark objects of the matches.\n */\nexport function countKeyphraseInText(\n sentences,\n keyphraseForms,\n locale,\n matchWordCustomHelper,\n isExactMatchRequested,\n customSplitIntoTokensHelper\n) {\n const result = {count: 0, markings: []};\n\n sentences.forEach(sentence => {\n const matchesInSentence = keyphraseForms.map(wordForms =>\n matchWordFormsWithSentence(\n sentence,\n wordForms,\n locale,\n matchWordCustomHelper,\n isExactMatchRequested,\n customSplitIntoTokensHelper\n )\n );\n // A sentence has at least one full-match of the keyphrase if each word occurs at least once.\n const isEachWordFound = matchesInSentence.every(wordForms => wordForms.count > 0);\n\n if (isEachWordFound) {\n /*\n * Retrieve all the occurrences' count of each word of the keyphrase and save it in an array.\n * matches: [ [ { matches: [\"red\"], count: 1 } ], [ { matches: [\"pandas\"], count: 2 } ] ]\n * counts: [ 1, 2 ]\n */\n const counts = matchesInSentence.map(match => match.count);\n /*\n * The number of the full-match count is the lowest count of the occurrences.\n * counts: [ 1, 2 ]\n * totalMatchCount: 1\n *\n * From the example above, the full-match is 1, because one of the \"pandas\" occurrences is not accompanied by \"red\"\n * to be counted as a full-match.\n */\n const totalMatchCount = Math.min(...counts);\n const foundWords = flattenDeep(matchesInSentence.map(match => match.matches));\n\n let markings = [];\n\n if (matchWordCustomHelper) {\n // Currently, this check is only applicable for Japanese.\n markings = markWordsInASentence(sentence, foundWords, matchWordCustomHelper);\n } else {\n markings = getMarkingsInSentence(sentence, foundWords);\n }\n\n result.count += totalMatchCount;\n result.markings.push(markings);\n }\n });\n\n return result;\n}\n\n/**\n * Calculates the keyphrase count, takes morphology into account.\n *\n * @param {Paper} paper The paper containing keyphrase and text.\n * @param {Researcher} researcher The researcher.\n *\n * @returns {{count: number, markings: Mark[], keyphraseLength: number}} An object containing the keyphrase count, markings and the kephrase length.\n */\nexport default function getKeyphraseCount(paper, researcher) {\n const result = {count: 0, markings: [], keyphraseLength: 0};\n const topicForms = researcher.getResearch('morphology');\n let keyphraseForms = topicForms.keyphraseForms;\n const keyphraseLength = keyphraseForms.length;\n\n /*\n * Normalize single quotes so that word form with different type of single quotes can still be matched.\n * For example, \"key‛word\" should match \"key'word\".\n */\n keyphraseForms = keyphraseForms.map(word => word.map(form => normalizeSingle(form)));\n\n if (keyphraseLength === 0) {\n return result;\n }\n\n let matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');\n const customSentenceTokenizer = researcher.getHelper('memoizedTokenizer');\n const customSplitIntoTokensHelper = researcher.getHelper('splitIntoTokensCustom');\n const locale = paper.getLocale();\n const text = matchWordCustomHelper\n ? filterShortcodesFromHTML(paper.getText(), paper._attributes && paper._attributes.shortcodes)\n : paper.getText();\n\n // Auto-detect Chinese and use Chinese helper if not already available\n if (!matchWordCustomHelper && isChineseText(paper.getKeyword() + ' ' + text)) {\n // Import Chinese helper for Chinese text\n try {\n // Use simple Chinese word matching for Chinese text\n matchWordCustomHelper = function(sentence, word) {\n const text = typeof sentence === 'string' ? sentence : sentence.text || sentence;\n const matches = [];\n\n if (!text || !word) {\n return matches;\n }\n\n const lowerText = text.toLowerCase();\n const lowerWord = word.toLowerCase();\n\n let startIndex = 0;\n let index;\n\n while ((index = lowerText.indexOf(lowerWord, startIndex)) !== -1) {\n matches.push(word);\n startIndex = index + lowerWord.length;\n }\n\n return matches;\n };\n } catch (error) {\n // If there's any issue, continue with default behavior\n console.warn('Error setting up Chinese matching, using default:', error.message);\n }\n }\n\n // When the custom helper is available, we're using the sentences retrieved from the text for the analysis.\n const sentences = matchWordCustomHelper\n ? getSentences(text, customSentenceTokenizer)\n : getSentencesFromTree(paper);\n // Exact matching is requested when the keyphrase is enclosed in double quotes.\n const isExactMatchRequested = isDoubleQuoted(paper.getKeyword());\n\n /*\n * Count the amount of keyphrase occurrences in the sentences.\n * An occurrence is counted when all words of the keyphrase are contained within the sentence. Each sentence can contain multiple keyphrases.\n * (e.g. \"The apple potato is an apple and a potato.\" has two occurrences of the keyphrase \"apple potato\").\n */\n const keyphraseFound = countKeyphraseInText(\n sentences,\n keyphraseForms,\n locale,\n matchWordCustomHelper,\n isExactMatchRequested,\n customSplitIntoTokensHelper\n );\n\n result.count = keyphraseFound.count;\n result.markings = flatten(keyphraseFound.markings);\n result.keyphraseLength = keyphraseLength;\n\n return result;\n}\n\n/**\n * Calculates the keyphrase count, takes morphology into account.\n *\n * @deprecated Use getKeyphraseCount instead.\n *\n * @param {Paper} paper The paper containing keyphrase and text.\n * @param {Researcher} researcher The researcher.\n *\n * @returns {Object} An array of all the matches, markings and the keyphrase count.\n */\nexport function keywordCount(paper, researcher) {\n console.warn('This function is deprecated, use getKeyphraseCount instead.');\n return getKeyphraseCount(paper, researcher);\n}\n"],"mappings":"AAAA,SAAQA,OAAO,EAAEC,WAAW,QAAO,QAAQ;AAC3C,OAAOC,oBAAoB;AAC3B,SAAQC,eAAe;AACvB,OAAOC,qBAAqB;AAC5B,OAAOC,0BAA0B;AACjC,OAAOC,cAAc;AACrB,SAAQC,oBAAoB;AAC5B,OAAOC,YAAY;AACnB,SAAQC,wBAAwB;AAChC,OAAOC,aAAa;;AAEpB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,OAAO,SAASC,oBAAoBA,CAClCC,SAAS,EACTC,cAAc,EACdC,MAAM,EACNC,qBAAqB,EACrBC,qBAAqB,EACrBC,2BAA2B,EAC3B;EACA,MAAMC,MAAM,GAAG;IAACC,KAAK,EAAE,CAAC;IAAEC,QAAQ,EAAE;EAAE,CAAC;EAEvCR,SAAS,CAACS,OAAO,CAACC,QAAQ,IAAI;IAC5B,MAAMC,iBAAiB,GAAGV,cAAc,CAACW,GAAG,CAACC,SAAS,IACpDpB,0BAA0B,CACxBiB,QAAQ,EACRG,SAAS,EACTX,MAAM,EACNC,qBAAqB,EACrBC,qBAAqB,EACrBC,2BACF,CACF,CAAC;IACD;IACA,MAAMS,eAAe,GAAGH,iBAAiB,CAACI,KAAK,CAACF,SAAS,IAAIA,SAAS,CAACN,KAAK,GAAG,CAAC,CAAC;IAEjF,IAAIO,eAAe,EAAE;MACnB;AACN;AACA;AACA;AACA;MACM,MAAME,MAAM,GAAGL,iBAAiB,CAACC,GAAG,CAACK,KAAK,IAAIA,KAAK,CAACV,KAAK,CAAC;MAC1D;AACN;AACA;AACA;AACA;AACA;AACA;AACA;MACM,MAAMW,eAAe,GAAGC,IAAI,CAACC,GAAG,CAAC,GAAGJ,MAAM,CAAC;MAC3C,MAAMK,UAAU,GAAGhC,WAAW,CAACsB,iBAAiB,CAACC,GAAG,CAACK,KAAK,IAAIA,KAAK,CAACK,OAAO,CAAC,CAAC;MAE7E,IAAId,QAAQ,GAAG,EAAE;MAEjB,IAAIL,qBAAqB,EAAE;QACzB;QACAK,QAAQ,GAAGb,oBAAoB,CAACe,QAAQ,EAAEW,UAAU,EAAElB,qBAAqB,CAAC;MAC9E,CAAC,MAAM;QACLK,QAAQ,GAAGhB,qBAAqB,CAACkB,QAAQ,EAAEW,UAAU,CAAC;MACxD;MAEAf,MAAM,CAACC,KAAK,IAAIW,eAAe;MAC/BZ,MAAM,CAACE,QAAQ,CAACe,IAAI,CAACf,QAAQ,CAAC;IAChC;EACF,CAAC,CAAC;EAEF,OAAOF,MAAM;AACf;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,SAASkB,iBAAiBA,CAACC,KAAK,EAAEC,UAAU,EAAE;EAC3D,MAAMpB,MAAM,GAAG;IAACC,KAAK,EAAE,CAAC;IAAEC,QAAQ,EAAE,EAAE;IAAEmB,eAAe,EAAE;EAAC,CAAC;EAC3D,MAAMC,UAAU,GAAGF,UAAU,CAACG,WAAW,CAAC,YAAY,CAAC;EACvD,IAAI5B,cAAc,GAAG2B,UAAU,CAAC3B,cAAc;EAC9C,MAAM0B,eAAe,GAAG1B,cAAc,CAAC6B,MAAM;;EAE7C;AACF;AACA;AACA;EACE7B,cAAc,GAAGA,cAAc,CAACW,GAAG,CAACmB,IAAI,IAAIA,IAAI,CAACnB,GAAG,CAACoB,IAAI,IAAIzC,eAAe,CAACyC,IAAI,CAAC,CAAC,CAAC;EAEpF,IAAIL,eAAe,KAAK,CAAC,EAAE;IACzB,OAAOrB,MAAM;EACf;EAEA,IAAIH,qBAAqB,GAAGuB,UAAU,CAACO,SAAS,CAAC,uBAAuB,CAAC;EACzE,MAAMC,uBAAuB,GAAGR,UAAU,CAACO,SAAS,CAAC,mBAAmB,CAAC;EACzE,MAAM5B,2BAA2B,GAAGqB,UAAU,CAACO,SAAS,CAAC,uBAAuB,CAAC;EACjF,MAAM/B,MAAM,GAAGuB,KAAK,CAACU,SAAS,CAAC,CAAC;EAChC,MAAMC,IAAI,GAAGjC,qBAAqB,GAC9BN,wBAAwB,CAAC4B,KAAK,CAACY,OAAO,CAAC,CAAC,EAAEZ,KAAK,CAACa,WAAW,IAAIb,KAAK,CAACa,WAAW,CAACC,UAAU,CAAC,GAC5Fd,KAAK,CAACY,OAAO,CAAC,CAAC;;EAEnB;EACA,IAAI,CAAClC,qBAAqB,IAAIL,aAAa,CAAC2B,KAAK,CAACe,UAAU,CAAC,CAAC,GAAG,GAAG,GAAGJ,IAAI,CAAC,EAAE;IAC5E;IACA,IAAI;MACF;MACAjC,qBAAqB,GAAG,SAAAA,CAASO,QAAQ,EAAEqB,IAAI,EAAE;QAC/C,MAAMK,IAAI,GAAG,OAAO1B,QAAQ,KAAK,QAAQ,GAAGA,QAAQ,GAAGA,QAAQ,CAAC0B,IAAI,IAAI1B,QAAQ;QAChF,MAAMY,OAAO,GAAG,EAAE;QAElB,IAAI,CAACc,IAAI,IAAI,CAACL,IAAI,EAAE;UAClB,OAAOT,OAAO;QAChB;QAEA,MAAMmB,SAAS,GAAGL,IAAI,CAACM,WAAW,CAAC,CAAC;QACpC,MAAMC,SAAS,GAAGZ,IAAI,CAACW,WAAW,CAAC,CAAC;QAEpC,IAAIE,UAAU,GAAG,CAAC;QAClB,IAAIC,KAAK;QAET,OAAO,CAACA,KAAK,GAAGJ,SAAS,CAACK,OAAO,CAACH,SAAS,EAAEC,UAAU,CAAC,MAAM,CAAC,CAAC,EAAE;UAChEtB,OAAO,CAACC,IAAI,CAACQ,IAAI,CAAC;UAClBa,UAAU,GAAGC,KAAK,GAAGF,SAAS,CAACb,MAAM;QACvC;QAEA,OAAOR,OAAO;MAChB,CAAC;IACH,CAAC,CAAC,OAAOyB,KAAK,EAAE;MACd;MACAC,OAAO,CAACC,IAAI,CAAC,mDAAmD,EAAEF,KAAK,CAACG,OAAO,CAAC;IAClF;EACF;;EAEA;EACA,MAAMlD,SAAS,GAAGG,qBAAqB,GACnCP,YAAY,CAACwC,IAAI,EAAEF,uBAAuB,CAAC,GAC3C5C,oBAAoB,CAACmC,KAAK,CAAC;EAC/B;EACA,MAAMrB,qBAAqB,GAAGV,cAAc,CAAC+B,KAAK,CAACe,UAAU,CAAC,CAAC,CAAC;;EAEhE;AACF;AACA;AACA;AACA;EACE,MAAMW,cAAc,GAAGpD,oBAAoB,CACzCC,SAAS,EACTC,cAAc,EACdC,MAAM,EACNC,qBAAqB,EACrBC,qBAAqB,EACrBC,2BACF,CAAC;EAEDC,MAAM,CAACC,KAAK,GAAG4C,cAAc,CAAC5C,KAAK;EACnCD,MAAM,CAACE,QAAQ,GAAGpB,OAAO,CAAC+D,cAAc,CAAC3C,QAAQ,CAAC;EAClDF,MAAM,CAACqB,eAAe,GAAGA,eAAe;EAExC,OAAOrB,MAAM;AACf;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,OAAO,SAAS8C,YAAYA,CAAC3B,KAAK,EAAEC,UAAU,EAAE;EAC9CsB,OAAO,CAACC,IAAI,CAAC,6DAA6D,CAAC;EAC3E,OAAOzB,iBAAiB,CAACC,KAAK,EAAEC,UAAU,CAAC;AAC7C","ignoreList":[]}