axyseo 2.1.8 → 2.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  /** @module stringProcessing/countWords */
2
2
  import sanitizeString from "../sanitize/sanitizeString";
3
- import { filter, flatMap } from "lodash";
3
+ import { filter, flatMap } from 'lodash';
4
4
  import removePunctuation, { punctuationRegexString } from "../sanitize/removePunctuation.js";
5
- const punctuationRegex = new RegExp(`([${punctuationRegexString}])`, "g");
5
+ const punctuationRegex = new RegExp(`([${punctuationRegexString}])`, 'g');
6
6
 
7
7
  /**
8
8
  * Returns an array with words used in the text.
@@ -13,25 +13,33 @@ const punctuationRegex = new RegExp(`([${punctuationRegexString}])`, "g");
13
13
  *
14
14
  * @returns {Array} The array with all words.
15
15
  */
16
- export default function (text, wordBoundaryRegexString = "\\s", shouldRemovePunctuation = true) {
16
+ export default function (text, wordBoundaryRegexString = '\\s', shouldRemovePunctuation = true) {
17
17
  // Unify whitespaces and non-breaking spaces, remove table of content and strip the tags and multiple spaces.
18
18
  text = sanitizeString(text);
19
- if (text === "") {
19
+ if (text === '') {
20
20
  return [];
21
21
  }
22
- const wordBoundaryRegex = new RegExp(wordBoundaryRegexString, "g");
23
- let words = text.split(wordBoundaryRegex);
24
- if (shouldRemovePunctuation) {
25
- words = words.map(removePunctuation);
22
+ const chineseCharRegex = /[\u4e00-\u9fff]/;
23
+ const hasChinese = chineseCharRegex.test(text);
24
+ const hasNoSpaces = !/\s/.test(text);
25
+ let words = [];
26
+ if (hasChinese && hasNoSpaces) {
27
+ words = Array.from(text).filter(char => chineseCharRegex.test(char));
26
28
  } else {
27
- // If punctuation is not removed, punctuation marks are tokenized as if they were words.
28
- words = flatMap(words, word => {
29
- const newWord = word.replace(punctuationRegex, " $1 ");
30
- return newWord.split(" ");
31
- });
29
+ const wordBoundaryRegex = new RegExp(wordBoundaryRegexString, 'g');
30
+ words = text.split(wordBoundaryRegex);
31
+ if (shouldRemovePunctuation) {
32
+ words = words.map(removePunctuation);
33
+ } else {
34
+ // If punctuation is not removed, punctuation marks are tokenized as if they were words.
35
+ words = flatMap(words, word => {
36
+ const newWord = word.replace(punctuationRegex, ' $1 ');
37
+ return newWord.split(' ');
38
+ });
39
+ }
32
40
  }
33
41
  return filter(words, function (word) {
34
- return word.trim() !== "";
42
+ return word.trim() !== '';
35
43
  });
36
44
  }
37
45
  //# sourceMappingURL=getWords.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"getWords.js","names":["sanitizeString","filter","flatMap","removePunctuation","punctuationRegexString","punctuationRegex","RegExp","text","wordBoundaryRegexString","shouldRemovePunctuation","wordBoundaryRegex","words","split","map","word","newWord","replace","trim"],"sources":["../../../../src/languageProcessing/helpers/word/getWords.js"],"sourcesContent":["/** @module stringProcessing/countWords */\nimport sanitizeString from \"../sanitize/sanitizeString\";\nimport { filter, flatMap } from \"lodash\";\nimport removePunctuation, { punctuationRegexString } from \"../sanitize/removePunctuation.js\";\n\nconst punctuationRegex = new RegExp( `([${punctuationRegexString}])`, \"g\" );\n\n/**\n * Returns an array with words used in the text.\n *\n * @param {string} text The text to be counted.\n * @param {string} [wordBoundaryRegexString=\\\\s] The regex string for the word boundary that should be used to split the text into words.\n * @param {boolean} [shouldRemovePunctuation=true] If punctuation should be removed. Defaults to `true`.\n *\n * @returns {Array} The array with all words.\n */\nexport default function( text, wordBoundaryRegexString = \"\\\\s\", shouldRemovePunctuation = true ) {\n\t// Unify whitespaces and non-breaking spaces, remove table of content and strip the tags and multiple spaces.\n\ttext = sanitizeString( text );\n\n\tif ( text === \"\" ) {\n\t\treturn [];\n\t}\n\n\tconst wordBoundaryRegex = new RegExp( wordBoundaryRegexString, \"g\" );\n\n\tlet words = text.split( wordBoundaryRegex );\n\n\tif ( shouldRemovePunctuation ) {\n\t\twords = words.map( removePunctuation );\n\t} else {\n\t\t// If punctuation is not removed, punctuation marks are tokenized as if they were words.\n\t\twords = flatMap( words, ( word ) => {\n\t\t\tconst newWord = word.replace( punctuationRegex, \" $1 \" );\n\t\t\treturn newWord.split( \" \" );\n\t\t} );\n\t}\n\n\treturn filter( words, function( word ) {\n\t\treturn word.trim() !== \"\";\n\t} );\n}\n\n"],"mappings":"AAAA;AACA,OAAOA,cAAc;AACrB,SAASC,MAAM,EAAEC,OAAO,QAAQ,QAAQ;AACxC,OAAOC,iBAAiB,IAAIC,sBAAsB;AAElD,MAAMC,gBAAgB,GAAG,IAAIC,MAAM,CAAE,KAAKF,sBAAsB,IAAI,EAAE,GAAI,CAAC;;AAE3E;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAAUG,IAAI,EAAEC,uBAAuB,GAAG,KAAK,EAAEC,uBAAuB,GAAG,IAAI,EAAG;EAChG;EACAF,IAAI,GAAGP,cAAc,CAAEO,IAAK,CAAC;EAE7B,IAAKA,IAAI,KAAK,EAAE,EAAG;IAClB,OAAO,EAAE;EACV;EAEA,MAAMG,iBAAiB,GAAG,IAAIJ,MAAM,CAAEE,uBAAuB,EAAE,GAAI,CAAC;EAEpE,IAAIG,KAAK,GAAGJ,IAAI,CAACK,KAAK,CAAEF,iBAAkB,CAAC;EAE3C,IAAKD,uBAAuB,EAAG;IAC9BE,KAAK,GAAGA,KAAK,CAACE,GAAG,CAAEV,iBAAkB,CAAC;EACvC,CAAC,MAAM;IACN;IACAQ,KAAK,GAAGT,OAAO,CAAES,KAAK,EAAIG,IAAI,IAAM;MACnC,MAAMC,OAAO,GAAGD,IAAI,CAACE,OAAO,CAAEX,gBAAgB,EAAE,MAAO,CAAC;MACxD,OAAOU,OAAO,CAACH,KAAK,CAAE,GAAI,CAAC;IAC5B,CAAE,CAAC;EACJ;EAEA,OAAOX,MAAM,CAAEU,KAAK,EAAE,UAAUG,IAAI,EAAG;IACtC,OAAOA,IAAI,CAACG,IAAI,CAAC,CAAC,KAAK,EAAE;EAC1B,CAAE,CAAC;AACJ","ignoreList":[]}
1
+ {"version":3,"file":"getWords.js","names":["sanitizeString","filter","flatMap","removePunctuation","punctuationRegexString","punctuationRegex","RegExp","text","wordBoundaryRegexString","shouldRemovePunctuation","chineseCharRegex","hasChinese","test","hasNoSpaces","words","Array","from","char","wordBoundaryRegex","split","map","word","newWord","replace","trim"],"sources":["../../../../src/languageProcessing/helpers/word/getWords.js"],"sourcesContent":["/** @module stringProcessing/countWords */\nimport sanitizeString from '../sanitize/sanitizeString';\nimport {filter, flatMap} from 'lodash';\nimport removePunctuation, {punctuationRegexString} from '../sanitize/removePunctuation.js';\n\nconst punctuationRegex = new RegExp(`([${punctuationRegexString}])`, 'g');\n\n/**\n * Returns an array with words used in the text.\n *\n * @param {string} text The text to be counted.\n * @param {string} [wordBoundaryRegexString=\\\\s] The regex string for the word boundary that should be used to split the text into words.\n * @param {boolean} [shouldRemovePunctuation=true] If punctuation should be removed. Defaults to `true`.\n *\n * @returns {Array} The array with all words.\n */\nexport default function(text, wordBoundaryRegexString = '\\\\s', shouldRemovePunctuation = true) {\n // Unify whitespaces and non-breaking spaces, remove table of content and strip the tags and multiple spaces.\n text = sanitizeString(text);\n\n if (text === '') {\n return [];\n }\n\n const chineseCharRegex = /[\\u4e00-\\u9fff]/;\n const hasChinese = chineseCharRegex.test(text);\n const hasNoSpaces = !/\\s/.test(text);\n\n let words = [];\n\n if (hasChinese && hasNoSpaces) {\n words = Array.from(text).filter(char => chineseCharRegex.test(char));\n } else {\n const wordBoundaryRegex = new RegExp(wordBoundaryRegexString, 'g');\n\n words = text.split(wordBoundaryRegex);\n\n if (shouldRemovePunctuation) {\n words = words.map(removePunctuation);\n } else {\n // If punctuation is not removed, punctuation marks are tokenized as if they were words.\n words = flatMap(words, word => {\n const newWord = word.replace(punctuationRegex, ' $1 ');\n return newWord.split(' ');\n });\n }\n }\n\n return filter(words, function(word) {\n return word.trim() !== '';\n });\n}\n"],"mappings":"AAAA;AACA,OAAOA,cAAc;AACrB,SAAQC,MAAM,EAAEC,OAAO,QAAO,QAAQ;AACtC,OAAOC,iBAAiB,IAAGC,sBAAsB;AAEjD,MAAMC,gBAAgB,GAAG,IAAIC,MAAM,CAAC,KAAKF,sBAAsB,IAAI,EAAE,GAAG,CAAC;;AAEzE;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASG,IAAI,EAAEC,uBAAuB,GAAG,KAAK,EAAEC,uBAAuB,GAAG,IAAI,EAAE;EAC7F;EACAF,IAAI,GAAGP,cAAc,CAACO,IAAI,CAAC;EAE3B,IAAIA,IAAI,KAAK,EAAE,EAAE;IACf,OAAO,EAAE;EACX;EAEA,MAAMG,gBAAgB,GAAG,iBAAiB;EAC1C,MAAMC,UAAU,GAAGD,gBAAgB,CAACE,IAAI,CAACL,IAAI,CAAC;EAC9C,MAAMM,WAAW,GAAG,CAAC,IAAI,CAACD,IAAI,CAACL,IAAI,CAAC;EAEpC,IAAIO,KAAK,GAAG,EAAE;EAEd,IAAIH,UAAU,IAAIE,WAAW,EAAE;IAC7BC,KAAK,GAAGC,KAAK,CAACC,IAAI,CAACT,IAAI,CAAC,CAACN,MAAM,CAACgB,IAAI,IAAIP,gBAAgB,CAACE,IAAI,CAACK,IAAI,CAAC,CAAC;EACtE,CAAC,MAAM;IACL,MAAMC,iBAAiB,GAAG,IAAIZ,MAAM,CAACE,uBAAuB,EAAE,GAAG,CAAC;IAElEM,KAAK,GAAGP,IAAI,CAACY,KAAK,CAACD,iBAAiB,CAAC;IAErC,IAAIT,uBAAuB,EAAE;MAC3BK,KAAK,GAAGA,KAAK,CAACM,GAAG,CAACjB,iBAAiB,CAAC;IACtC,CAAC,MAAM;MACL;MACAW,KAAK,GAAGZ,OAAO,CAACY,KAAK,EAAEO,IAAI,IAAI;QAC7B,MAAMC,OAAO,GAAGD,IAAI,CAACE,OAAO,CAAClB,gBAAgB,EAAE,MAAM,CAAC;QACtD,OAAOiB,OAAO,CAACH,KAAK,CAAC,GAAG,CAAC;MAC3B,CAAC,CAAC;IACJ;EACF;EAEA,OAAOlB,MAAM,CAACa,KAAK,EAAE,UAASO,IAAI,EAAE;IAClC,OAAOA,IAAI,CAACG,IAAI,CAAC,CAAC,KAAK,EAAE;EAC3B,CAAC,CAAC;AACJ","ignoreList":[]}
@@ -13,7 +13,7 @@ import getSentences from "../helpers/sentence/getSentences";
13
13
  const replaceFoundKeywordForms = function (description, matchedKeywordForms, maxToRemove) {
14
14
  // Replace matches so we do not match them for synonyms.
15
15
  matchedKeywordForms.forEach(keywordForm => keywordForm.matches.slice(0, maxToRemove).forEach(match => {
16
- description = description.replace(match, "");
16
+ description = description.replace(match, '');
17
17
  }));
18
18
  return description;
19
19
  };
@@ -62,9 +62,21 @@ const matchPerSentence = function (sentence, topicForms, locale, matchWordCustom
62
62
  export default function (paper, researcher) {
63
63
  const description = paper.getDescription();
64
64
  const locale = paper.getLocale();
65
- const topicForms = researcher.getResearch("morphology");
66
- const matchWordCustomHelper = researcher.getHelper("matchWordCustomHelper");
67
- const memoizedTokenizer = researcher.getHelper("memoizedTokenizer");
65
+ const topicForms = researcher.getResearch('morphology');
66
+ const matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');
67
+ const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');
68
+ const keyphrase = paper.getKeyword && paper.getKeyword();
69
+ const isChinese = keyphrase && /[\u4e00-\u9fff]/.test(keyphrase);
70
+ if (isChinese) {
71
+ if (!keyphrase) return 0;
72
+ let count = 0;
73
+ let pos = description.indexOf(keyphrase);
74
+ while (pos !== -1) {
75
+ count++;
76
+ pos = description.indexOf(keyphrase, pos + keyphrase.length);
77
+ }
78
+ return count;
79
+ }
68
80
  const sentences = getSentences(description, memoizedTokenizer);
69
81
  const sentenceMatches = sentences.map(sentence => matchPerSentence(sentence, topicForms, locale, matchWordCustomHelper));
70
82
  return sentenceMatches.reduce((sum, count) => sum + count, 0);
@@ -1 +1 @@
1
- {"version":3,"file":"metaDescriptionKeyword.js","names":["matchWords","getSentences","replaceFoundKeywordForms","description","matchedKeywordForms","maxToRemove","forEach","keywordForm","matches","slice","match","replace","matchPerSentence","sentence","topicForms","locale","matchWordCustomHelper","matchesKeyphrase","keyphraseForms","map","keywordForms","fullKeyphraseMatches","Math","min","count","fullSynonymsMatches","synonymsForms","synonymForms","fullSynonymMatches","reduce","sum","paper","researcher","getDescription","getLocale","getResearch","getHelper","memoizedTokenizer","sentences","sentenceMatches"],"sources":["../../../src/languageProcessing/researches/metaDescriptionKeyword.js"],"sourcesContent":["import matchWords from \"../helpers/match/matchTextWithArray\";\nimport getSentences from \"../helpers/sentence/getSentences\";\n\n/**\n * Replaces found keyword forms in the given description.\n *\n * @param {string} description The description to remove the matched keyword forms from.\n * @param {Object[]} matchedKeywordForms The matched keyword forms to remove from the description.\n * @param {Number} maxToRemove The maximum amount of matches of each individual keyword to remove.\n *\n * @returns {string} The description with the keywords removed.\n */\nconst replaceFoundKeywordForms = function( description, matchedKeywordForms, maxToRemove ) {\n\t// Replace matches so we do not match them for synonyms.\n\tmatchedKeywordForms.forEach( keywordForm =>\n\t\tkeywordForm.matches.slice( 0, maxToRemove ).forEach(\n\t\t\tmatch => {\n\t\t\t\tdescription = description.replace( match, \"\" );\n\t\t\t}\n\t\t)\n\t);\n\treturn description;\n};\n\n/**\n * Counts the number of full keyphrase matches in the given sentence. Takes synonyms into account.\n * A full keyphrase is when all keywords in the keyphrase match.\n *\n * @param {string} sentence The sentence that needs to be analyzed.\n * @param {Object} topicForms The keyphrase (and its optional synonyms') word forms.\n * @param {string} locale The current locale.\n * @param {function} matchWordCustomHelper The language-specific helper function to match word in text.\n *\n * @returns {Number} The number of matched keyphrases in the sentence.\n */\nconst matchPerSentence = function( sentence, topicForms, locale, matchWordCustomHelper ) {\n\t// Focus keyphrase matches.\n\tconst matchesKeyphrase = topicForms.keyphraseForms.map( keywordForms => matchWords( sentence, keywordForms, locale, matchWordCustomHelper ) );\n\t// Count the number of matches that contain every word in the entire keyphrase.\n\tconst fullKeyphraseMatches = Math.min( ...matchesKeyphrase.map( match => match.count ) );\n\n\t// Replace all full keyphrase matches so we do not match them for synonyms.\n\tsentence = replaceFoundKeywordForms( sentence, matchesKeyphrase, fullKeyphraseMatches );\n\n\t// Keyphrase synonyms matches.\n\tconst fullSynonymsMatches = topicForms.synonymsForms.map(\n\t\tsynonymForms => {\n\t\t\t// Synonym keyphrase matches.\n\t\t\tconst matches = synonymForms.map( keywordForms => matchWords( sentence, keywordForms, locale, matchWordCustomHelper ) );\n\t\t\t// Count the number of matches that contain every word in the entire synonym keyphrase.\n\t\t\tconst fullSynonymMatches = Math.min( ...matches.map( match => match.count ) );\n\t\t\t// Replace all full matches so we do not match them for other synonyms.\n\t\t\tsentence = replaceFoundKeywordForms( sentence, matchesKeyphrase, fullSynonymMatches );\n\t\t\treturn fullSynonymMatches;\n\t\t}\n\t);\n\n\treturn [ fullKeyphraseMatches, ...fullSynonymsMatches ].reduce( ( sum, count ) => sum + count, 0 );\n};\n\n/**\n * Counts the number of full keyphrase matches in the description.\n *\n * @param {Paper} paper The paper object containing the description.\n * @param {Researcher} researcher The researcher object to gather researchers from.\n *\n * @returns {Number} The number of keyphrase matches for the entire description.\n */\nexport default function( paper, researcher ) {\n\tconst description = paper.getDescription();\n\tconst locale = paper.getLocale();\n\n\tconst topicForms = researcher.getResearch( \"morphology\" );\n\tconst matchWordCustomHelper = researcher.getHelper( \"matchWordCustomHelper\" );\n\tconst memoizedTokenizer = researcher.getHelper( \"memoizedTokenizer\" );\n\n\tconst sentences = getSentences( description, memoizedTokenizer );\n\n\tconst sentenceMatches = sentences.map(\n\t\tsentence => matchPerSentence( sentence, topicForms, locale, matchWordCustomHelper )\n\t);\n\n\treturn sentenceMatches.reduce( ( sum, count ) => sum + count, 0 );\n}\n\n"],"mappings":"AAAA,OAAOA,UAAU;AACjB,OAAOC,YAAY;;AAEnB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,wBAAwB,GAAG,SAAAA,CAAUC,WAAW,EAAEC,mBAAmB,EAAEC,WAAW,EAAG;EAC1F;EACAD,mBAAmB,CAACE,OAAO,CAAEC,WAAW,IACvCA,WAAW,CAACC,OAAO,CAACC,KAAK,CAAE,CAAC,EAAEJ,WAAY,CAAC,CAACC,OAAO,CAClDI,KAAK,IAAI;IACRP,WAAW,GAAGA,WAAW,CAACQ,OAAO,CAAED,KAAK,EAAE,EAAG,CAAC;EAC/C,CACD,CACD,CAAC;EACD,OAAOP,WAAW;AACnB,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMS,gBAAgB,GAAG,SAAAA,CAAUC,QAAQ,EAAEC,UAAU,EAAEC,MAAM,EAAEC,qBAAqB,EAAG;EACxF;EACA,MAAMC,gBAAgB,GAAGH,UAAU,CAACI,cAAc,CAACC,GAAG,CAAEC,YAAY,IAAIpB,UAAU,CAAEa,QAAQ,EAAEO,YAAY,EAAEL,MAAM,EAAEC,qBAAsB,CAAE,CAAC;EAC7I;EACA,MAAMK,oBAAoB,GAAGC,IAAI,CAACC,GAAG,CAAE,GAAGN,gBAAgB,CAACE,GAAG,CAAET,KAAK,IAAIA,KAAK,CAACc,KAAM,CAAE,CAAC;;EAExF;EACAX,QAAQ,GAAGX,wBAAwB,CAAEW,QAAQ,EAAEI,gBAAgB,EAAEI,oBAAqB,CAAC;;EAEvF;EACA,MAAMI,mBAAmB,GAAGX,UAAU,CAACY,aAAa,CAACP,GAAG,CACvDQ,YAAY,IAAI;IACf;IACA,MAAMnB,OAAO,GAAGmB,YAAY,CAACR,GAAG,CAAEC,YAAY,IAAIpB,UAAU,CAAEa,QAAQ,EAAEO,YAAY,EAAEL,MAAM,EAAEC,qBAAsB,CAAE,CAAC;IACvH;IACA,MAAMY,kBAAkB,GAAGN,IAAI,CAACC,GAAG,CAAE,GAAGf,OAAO,CAACW,GAAG,CAAET,KAAK,IAAIA,KAAK,CAACc,KAAM,CAAE,CAAC;IAC7E;IACAX,QAAQ,GAAGX,wBAAwB,CAAEW,QAAQ,EAAEI,gBAAgB,EAAEW,kBAAmB,CAAC;IACrF,OAAOA,kBAAkB;EAC1B,CACD,CAAC;EAED,OAAO,CAAEP,oBAAoB,EAAE,GAAGI,mBAAmB,CAAE,CAACI,MAAM,CAAE,CAAEC,GAAG,EAAEN,KAAK,KAAMM,GAAG,GAAGN,KAAK,EAAE,CAAE,CAAC;AACnG,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAAUO,KAAK,EAAEC,UAAU,EAAG;EAC5C,MAAM7B,WAAW,GAAG4B,KAAK,CAACE,cAAc,CAAC,CAAC;EAC1C,MAAMlB,MAAM,GAAGgB,KAAK,CAACG,SAAS,CAAC,CAAC;EAEhC,MAAMpB,UAAU,GAAGkB,UAAU,CAACG,WAAW,CAAE,YAAa,CAAC;EACzD,MAAMnB,qBAAqB,GAAGgB,UAAU,CAACI,SAAS,CAAE,uBAAwB,CAAC;EAC7E,MAAMC,iBAAiB,GAAGL,UAAU,CAACI,SAAS,CAAE,mBAAoB,CAAC;EAErE,MAAME,SAAS,GAAGrC,YAAY,CAAEE,WAAW,EAAEkC,iBAAkB,CAAC;EAEhE,MAAME,eAAe,GAAGD,SAAS,CAACnB,GAAG,CACpCN,QAAQ,IAAID,gBAAgB,CAAEC,QAAQ,EAAEC,UAAU,EAAEC,MAAM,EAAEC,qBAAsB,CACnF,CAAC;EAED,OAAOuB,eAAe,CAACV,MAAM,CAAE,CAAEC,GAAG,EAAEN,KAAK,KAAMM,GAAG,GAAGN,KAAK,EAAE,CAAE,CAAC;AAClE","ignoreList":[]}
1
+ {"version":3,"file":"metaDescriptionKeyword.js","names":["matchWords","getSentences","replaceFoundKeywordForms","description","matchedKeywordForms","maxToRemove","forEach","keywordForm","matches","slice","match","replace","matchPerSentence","sentence","topicForms","locale","matchWordCustomHelper","matchesKeyphrase","keyphraseForms","map","keywordForms","fullKeyphraseMatches","Math","min","count","fullSynonymsMatches","synonymsForms","synonymForms","fullSynonymMatches","reduce","sum","paper","researcher","getDescription","getLocale","getResearch","getHelper","memoizedTokenizer","keyphrase","getKeyword","isChinese","test","pos","indexOf","length","sentences","sentenceMatches"],"sources":["../../../src/languageProcessing/researches/metaDescriptionKeyword.js"],"sourcesContent":["import matchWords from '../helpers/match/matchTextWithArray';\nimport getSentences from '../helpers/sentence/getSentences';\n\n/**\n * Replaces found keyword forms in the given description.\n *\n * @param {string} description The description to remove the matched keyword forms from.\n * @param {Object[]} matchedKeywordForms The matched keyword forms to remove from the description.\n * @param {Number} maxToRemove The maximum amount of matches of each individual keyword to remove.\n *\n * @returns {string} The description with the keywords removed.\n */\nconst replaceFoundKeywordForms = function(description, matchedKeywordForms, maxToRemove) {\n // Replace matches so we do not match them for synonyms.\n matchedKeywordForms.forEach(keywordForm =>\n keywordForm.matches.slice(0, maxToRemove).forEach(match => {\n description = description.replace(match, '');\n })\n );\n return description;\n};\n\n/**\n * Counts the number of full keyphrase matches in the given sentence. Takes synonyms into account.\n * A full keyphrase is when all keywords in the keyphrase match.\n *\n * @param {string} sentence The sentence that needs to be analyzed.\n * @param {Object} topicForms The keyphrase (and its optional synonyms') word forms.\n * @param {string} locale The current locale.\n * @param {function} matchWordCustomHelper The language-specific helper function to match word in text.\n *\n * @returns {Number} The number of matched keyphrases in the sentence.\n */\nconst matchPerSentence = function(sentence, topicForms, locale, matchWordCustomHelper) {\n // Focus keyphrase matches.\n const matchesKeyphrase = topicForms.keyphraseForms.map(keywordForms =>\n matchWords(sentence, keywordForms, locale, matchWordCustomHelper)\n );\n // Count the number of matches that contain every word in the entire keyphrase.\n const fullKeyphraseMatches = Math.min(...matchesKeyphrase.map(match => match.count));\n\n // Replace all full keyphrase matches so we do not match them for synonyms.\n sentence = replaceFoundKeywordForms(sentence, matchesKeyphrase, fullKeyphraseMatches);\n\n // Keyphrase synonyms matches.\n const fullSynonymsMatches = topicForms.synonymsForms.map(synonymForms => {\n // Synonym keyphrase matches.\n const matches = synonymForms.map(keywordForms =>\n matchWords(sentence, keywordForms, locale, matchWordCustomHelper)\n );\n // Count the number of matches that contain every word in the entire synonym keyphrase.\n const fullSynonymMatches = Math.min(...matches.map(match => match.count));\n // Replace all full matches so we do not match them for other synonyms.\n sentence = replaceFoundKeywordForms(sentence, matchesKeyphrase, fullSynonymMatches);\n return fullSynonymMatches;\n });\n\n return [fullKeyphraseMatches, ...fullSynonymsMatches].reduce((sum, count) => sum + count, 0);\n};\n\n/**\n * Counts the number of full keyphrase matches in the description.\n *\n * @param {Paper} paper The paper object containing the description.\n * @param {Researcher} researcher The researcher object to gather researchers from.\n *\n * @returns {Number} The number of keyphrase matches for the entire description.\n */\nexport default function(paper, researcher) {\n const description = paper.getDescription();\n const locale = paper.getLocale();\n\n const topicForms = researcher.getResearch('morphology');\n const matchWordCustomHelper = researcher.getHelper('matchWordCustomHelper');\n const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');\n\n const keyphrase = paper.getKeyword && paper.getKeyword();\n const isChinese = keyphrase && /[\\u4e00-\\u9fff]/.test(keyphrase);\n\n if (isChinese) {\n if (!keyphrase) return 0;\n let count = 0;\n let pos = description.indexOf(keyphrase);\n while (pos !== -1) {\n count++;\n pos = description.indexOf(keyphrase, pos + keyphrase.length);\n }\n return count;\n }\n\n const sentences = getSentences(description, memoizedTokenizer);\n\n const sentenceMatches = sentences.map(sentence =>\n matchPerSentence(sentence, topicForms, locale, matchWordCustomHelper)\n );\n\n return sentenceMatches.reduce((sum, count) => sum + count, 0);\n}\n"],"mappings":"AAAA,OAAOA,UAAU;AACjB,OAAOC,YAAY;;AAEnB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,wBAAwB,GAAG,SAAAA,CAASC,WAAW,EAAEC,mBAAmB,EAAEC,WAAW,EAAE;EACvF;EACAD,mBAAmB,CAACE,OAAO,CAACC,WAAW,IACrCA,WAAW,CAACC,OAAO,CAACC,KAAK,CAAC,CAAC,EAAEJ,WAAW,CAAC,CAACC,OAAO,CAACI,KAAK,IAAI;IACzDP,WAAW,GAAGA,WAAW,CAACQ,OAAO,CAACD,KAAK,EAAE,EAAE,CAAC;EAC9C,CAAC,CACH,CAAC;EACD,OAAOP,WAAW;AACpB,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMS,gBAAgB,GAAG,SAAAA,CAASC,QAAQ,EAAEC,UAAU,EAAEC,MAAM,EAAEC,qBAAqB,EAAE;EACrF;EACA,MAAMC,gBAAgB,GAAGH,UAAU,CAACI,cAAc,CAACC,GAAG,CAACC,YAAY,IACjEpB,UAAU,CAACa,QAAQ,EAAEO,YAAY,EAAEL,MAAM,EAAEC,qBAAqB,CAClE,CAAC;EACD;EACA,MAAMK,oBAAoB,GAAGC,IAAI,CAACC,GAAG,CAAC,GAAGN,gBAAgB,CAACE,GAAG,CAACT,KAAK,IAAIA,KAAK,CAACc,KAAK,CAAC,CAAC;;EAEpF;EACAX,QAAQ,GAAGX,wBAAwB,CAACW,QAAQ,EAAEI,gBAAgB,EAAEI,oBAAoB,CAAC;;EAErF;EACA,MAAMI,mBAAmB,GAAGX,UAAU,CAACY,aAAa,CAACP,GAAG,CAACQ,YAAY,IAAI;IACvE;IACA,MAAMnB,OAAO,GAAGmB,YAAY,CAACR,GAAG,CAACC,YAAY,IAC3CpB,UAAU,CAACa,QAAQ,EAAEO,YAAY,EAAEL,MAAM,EAAEC,qBAAqB,CAClE,CAAC;IACD;IACA,MAAMY,kBAAkB,GAAGN,IAAI,CAACC,GAAG,CAAC,GAAGf,OAAO,CAACW,GAAG,CAACT,KAAK,IAAIA,KAAK,CAACc,KAAK,CAAC,CAAC;IACzE;IACAX,QAAQ,GAAGX,wBAAwB,CAACW,QAAQ,EAAEI,gBAAgB,EAAEW,kBAAkB,CAAC;IACnF,OAAOA,kBAAkB;EAC3B,CAAC,CAAC;EAEF,OAAO,CAACP,oBAAoB,EAAE,GAAGI,mBAAmB,CAAC,CAACI,MAAM,CAAC,CAACC,GAAG,EAAEN,KAAK,KAAKM,GAAG,GAAGN,KAAK,EAAE,CAAC,CAAC;AAC9F,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASO,KAAK,EAAEC,UAAU,EAAE;EACzC,MAAM7B,WAAW,GAAG4B,KAAK,CAACE,cAAc,CAAC,CAAC;EAC1C,MAAMlB,MAAM,GAAGgB,KAAK,CAACG,SAAS,CAAC,CAAC;EAEhC,MAAMpB,UAAU,GAAGkB,UAAU,CAACG,WAAW,CAAC,YAAY,CAAC;EACvD,MAAMnB,qBAAqB,GAAGgB,UAAU,CAACI,SAAS,CAAC,uBAAuB,CAAC;EAC3E,MAAMC,iBAAiB,GAAGL,UAAU,CAACI,SAAS,CAAC,mBAAmB,CAAC;EAEnE,MAAME,SAAS,GAAGP,KAAK,CAACQ,UAAU,IAAIR,KAAK,CAACQ,UAAU,CAAC,CAAC;EACxD,MAAMC,SAAS,GAAGF,SAAS,IAAI,iBAAiB,CAACG,IAAI,CAACH,SAAS,CAAC;EAEhE,IAAIE,SAAS,EAAE;IACb,IAAI,CAACF,SAAS,EAAE,OAAO,CAAC;IACxB,IAAId,KAAK,GAAG,CAAC;IACb,IAAIkB,GAAG,GAAGvC,WAAW,CAACwC,OAAO,CAACL,SAAS,CAAC;IACxC,OAAOI,GAAG,KAAK,CAAC,CAAC,EAAE;MACjBlB,KAAK,EAAE;MACPkB,GAAG,GAAGvC,WAAW,CAACwC,OAAO,CAACL,SAAS,EAAEI,GAAG,GAAGJ,SAAS,CAACM,MAAM,CAAC;IAC9D;IACA,OAAOpB,KAAK;EACd;EAEA,MAAMqB,SAAS,GAAG5C,YAAY,CAACE,WAAW,EAAEkC,iBAAiB,CAAC;EAE9D,MAAMS,eAAe,GAAGD,SAAS,CAAC1B,GAAG,CAACN,QAAQ,IAC5CD,gBAAgB,CAACC,QAAQ,EAAEC,UAAU,EAAEC,MAAM,EAAEC,qBAAqB,CACtE,CAAC;EAED,OAAO8B,eAAe,CAACjB,MAAM,CAAC,CAACC,GAAG,EAAEN,KAAK,KAAKM,GAAG,GAAGN,KAAK,EAAE,CAAC,CAAC;AAC/D","ignoreList":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "axyseo",
3
- "version": "2.1.8",
3
+ "version": "2.1.9",
4
4
  "main": "build/index.js",
5
5
  "scripts": {
6
6
  "prepublishOnly": "npm run build ",