npm - axyseo - Versions diffs - 2025.1.0-blog.3 → 2025.1.0-blog.5 - Mend

axyseo 2025.1.0-blog.3 → 2025.1.0-blog.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/build/cjs/languageProcessing/researches/getSentenceBeginnings.js CHANGED Viewed

@@ -61,7 +61,7 @@ const compareFirstWords = function (sentenceBeginnings, sentences) {
  * @param {string}  sentence                The sentence to retrieve the first word from.
  * @param {Array}   firstWordExceptions     First word exceptions to match against.
  * @param {Array}   secondWordExceptions    Second word exceptions to match against.
- * @param {function}	getWordsCustomHelper   The language-specific helper function to retrieve words from text.
+ * @param {function}  getWordsCustomHelper   The language-specific helper function to retrieve words from text.
  *
  * @returns {string} The first word of the sentence.
  */
@@ -108,15 +108,22 @@ function _default(paper, researcher) {
   // Exclude text inside tables.
   text = text.replace(/<figure class='wp-block-table'>.*<\/figure>/gs, '');
   let sentences = (0, _getSentences.default)(text, memoizedTokenizer);
-  let sentenceBeginnings = sentences.map(function (sentence) {
-    return getSentenceBeginning(sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper);
+  const sentenceData = sentences.map(function (sentence) {
+    const beginning = getSentenceBeginning(sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper);
+    return {
+      sentence,
+      beginning
+    };
   });
-  sentences = sentences.filter(function (sentence) {
-    const stripped = (0, _stripSpaces.default)(sentence);
-    const words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : (0, _getWords.default)(stripped);
-    return words.length > 0;
+  const filteredSentenceData = sentenceData.filter(function (item) {
+    if (!item.beginning) return false;
+    const stripped = (0, _stripHTMLTags.stripFullTags)((0, _stripSpaces.default)(item.sentence));
+    const strippedNoSpaces = stripped.replace(/\s+/g, '');
+    const isDigitsOnly = strippedNoSpaces.length > 0 && /^[0-9]+$/.test(strippedNoSpaces);
+    return !isDigitsOnly;
   });
-  sentenceBeginnings = (0, _lodash.filter)(sentenceBeginnings);
-  return compareFirstWords(sentenceBeginnings, sentences);
+  const filteredSentences = filteredSentenceData.map(item => item.sentence);
+  const sentenceBeginnings = filteredSentenceData.map(item => item.beginning);
+  return compareFirstWords(sentenceBeginnings, filteredSentences);
 }
 //# sourceMappingURL=getSentenceBeginnings.js.map

package/build/cjs/languageProcessing/researches/getSentenceBeginnings.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"getSentenceBeginnings.js","names":["_getWords","_interopRequireDefault","require","_getSentences","_stripSpaces","_stripHTMLTags","_lodash","_htmlParser","_helpers","_stripNonTextTags","e","__esModule","default","startsWithSameWord","currentSentenceBeginning","nextSentenceBeginning","isEmpty","compareFirstWords","sentenceBeginnings","sentences","consecutiveFirstWords","foundSentences","sameBeginnings","forEach","beginning","i","push","word","count","getSentenceBeginning","sentence","firstWordExceptions","secondWordExceptions","getWordsCustomHelper","stripped","stripTags","stripSpaces","words","getWords","filter","test","length","firstWord","toLocaleLowerCase","indexOf","includes","_default","paper","researcher","getConfig","getHelper","memoizedTokenizer","text","getText","removeHtmlBlocks","stripNonTextTags","filterShortcodesFromHTML","_attributes","shortcodes","replace","getSentences","map"],"sources":["../../../../src/languageProcessing/researches/getSentenceBeginnings.js"],"sourcesContent":["import getWords from '../helpers/word/getWords.js';\nimport getSentences from '../helpers/sentence/getSentences';\nimport stripSpaces from '../helpers/sanitize/stripSpaces.js';\nimport {stripFullTags as stripTags} from '../helpers/sanitize/stripHTMLTags.js';\n\nimport {filter, forEach, isEmpty} from 'lodash';\nimport removeHtmlBlocks from '../helpers/html/htmlParser';\nimport {filterShortcodesFromHTML} from '../helpers';\nimport stripNonTextTags from '@axyseo/languageProcessing/helpers/sanitize/stripNonTextTags';\n\n/*\n Compares the first word of each sentence with the first word of the following sentence.\n \n @param {string} currentSentenceBeginning The first word of the current sentence.\n * @param {string} nextSentenceBeginning The first word of the next sentence.\n * @returns {boolean} Returns true if sentence beginnings match.\n /\nconst startsWithSameWord = function(currentSentenceBeginning, nextSentenceBeginning) {\n return !isEmpty(currentSentenceBeginning) && currentSentenceBeginning === nextSentenceBeginning;\n};\n\n/\n Counts the number of similar sentence beginnings.\n \n @param {Array} sentenceBeginnings The array containing the first word of each sentence.\n * @param {Array} sentences The array containing all sentences.\n * @returns {Array} The array containing the objects containing the first words and the corresponding counts.\n /\nconst compareFirstWords = function(sentenceBeginnings, sentences) {\n const consecutiveFirstWords = [];\n let foundSentences = [];\n let sameBeginnings = 1;\n\n forEach(sentenceBeginnings, function(beginning, i) {\n const currentSentenceBeginning = beginning;\n const nextSentenceBeginning = sentenceBeginnings[i + 1];\n foundSentences.push(sentences[i]);\n\n if (startsWithSameWord(currentSentenceBeginning, nextSentenceBeginning)) {\n sameBeginnings++;\n } else {\n consecutiveFirstWords.push({\n word: currentSentenceBeginning,\n count: sameBeginnings,\n sentences: foundSentences\n });\n sameBeginnings = 1;\n foundSentences = [];\n }\n });\n\n return consecutiveFirstWords;\n};\n\n/\n Retrieves the first word from the sentence. If the first or second word is on an exception list of words that should not be considered as sentence\n * beginnings, the following word is also retrieved.\n \n @param {string} sentence The sentence to retrieve the first word from.\n * @param {Array} firstWordExceptions First word exceptions to match against.\n * @param {Array} secondWordExceptions Second word exceptions to match against.\n * @param {function}\tgetWordsCustomHelper The language-specific helper function to retrieve words from text.\n \n @returns {string} The first word of the sentence.\n /\nfunction getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n) {\n const stripped = stripTags(stripSpaces(sentence));\n let words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n\n words = words.filter(word => /^\\p{L}/u.test(word));\n\n if (words.length === 0) {\n return '';\n }\n\n let firstWord = words[0].toLocaleLowerCase();\n\n if (firstWordExceptions.indexOf(firstWord) > -1 && words.length > 1) {\n firstWord = firstWord + ' ' + words[1];\n if (secondWordExceptions) {\n if (secondWordExceptions.includes(words[1])) {\n firstWord = firstWord + ' ' + words[2];\n }\n }\n }\n\n return firstWord;\n}\n\n/\n Gets the first word of each sentence from the text, and returns an object containing the first word of each sentence and the corresponding counts.\n \n @param {Paper} paper The Paper object to get the text from.\n * @param {Researcher} researcher The researcher this research is a part of.\n \n @returns {Object} The object containing the first word of each sentence and the corresponding counts.\n /\nexport default function(paper, researcher) {\n const firstWordExceptions = researcher.getConfig('firstWordExceptions');\n const secondWordExceptions = researcher.getConfig('secondWordExceptions');\n const getWordsCustomHelper = researcher.getHelper('getWordsCustomHelper');\n const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');\n\n let text = paper.getText();\n text = removeHtmlBlocks(text);\n text = stripNonTextTags(text);\n text = filterShortcodesFromHTML(text, paper._attributes && paper._attributes.shortcodes);\n\n // Remove any HTML whitespace padding and replace it with a single whitespace.\n text = text.replace(/[\\s\\n]+/g, ' ');\n\n // Exclude text inside tables.\n text = text.replace(/<figure class='wp-block-table'>.<\\/figure>/gs, '');\n\n let sentences = getSentences(text, memoizedTokenizer);\n\n let sentenceBeginnings = sentences.map(function(sentence) {\n return getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n );\n });\n\n sentences = sentences.filter(function(sentence) {\n const stripped = stripSpaces(sentence);\n const words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n return words.length > 0;\n });\n sentenceBeginnings = filter(sentenceBeginnings);\n\n return compareFirstWords(sentenceBeginnings, sentences);\n}\n"],"mappings":";;;;;;AAAA,IAAAA,SAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,aAAA,GAAAF,sBAAA,CAAAC,OAAA;AACA,IAAAE,YAAA,GAAAH,sBAAA,CAAAC,OAAA;AACA,IAAAG,cAAA,GAAAH,OAAA;AAEA,IAAAI,OAAA,GAAAJ,OAAA;AACA,IAAAK,WAAA,GAAAN,sBAAA,CAAAC,OAAA;AACA,IAAAM,QAAA,GAAAN,OAAA;AACA,IAAAO,iBAAA,GAAAR,sBAAA,CAAAC,OAAA;AAA4F,SAAAD,uBAAAS,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AAE5F;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMG,kBAAkB,GAAG,SAAAA,CAASC,wBAAwB,EAAEC,qBAAqB,EAAE;EACnF,OAAO,CAAC,IAAAC,eAAO,EAACF,wBAAwB,CAAC,IAAIA,wBAAwB,KAAKC,qBAAqB;AACjG,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAME,iBAAiB,GAAG,SAAAA,CAASC,kBAAkB,EAAEC,SAAS,EAAE;EAChE,MAAMC,qBAAqB,GAAG,EAAE;EAChC,IAAIC,cAAc,GAAG,EAAE;EACvB,IAAIC,cAAc,GAAG,CAAC;EAEtB,IAAAC,eAAO,EAACL,kBAAkB,EAAE,UAASM,SAAS,EAAEC,CAAC,EAAE;IACjD,MAAMX,wBAAwB,GAAGU,SAAS;IAC1C,MAAMT,qBAAqB,GAAGG,kBAAkB,CAACO,CAAC,GAAG,CAAC,CAAC;IACvDJ,cAAc,CAACK,IAAI,CAACP,SAAS,CAACM,CAAC,CAAC,CAAC;IAEjC,IAAIZ,kBAAkB,CAACC,wBAAwB,EAAEC,qBAAqB,CAAC,EAAE;MACvEO,cAAc,EAAE;IAClB,CAAC,MAAM;MACLF,qBAAqB,CAACM,IAAI,CAAC;QACzBC,IAAI,EAAEb,wBAAwB;QAC9Bc,KAAK,EAAEN,cAAc;QACrBH,SAAS,EAAEE;MACb,CAAC,CAAC;MACFC,cAAc,GAAG,CAAC;MAClBD,cAAc,GAAG,EAAE;IACrB;EACF,CAAC,CAAC;EAEF,OAAOD,qBAAqB;AAC9B,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASS,oBAAoBA,CAC3BC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBAAoB,EACpB;EACA,MAAMC,QAAQ,GAAG,IAAAC,4BAAS,EAAC,IAAAC,oBAAW,EAACN,QAAQ,CAAC,CAAC;EACjD,IAAIO,KAAK,GAAGJ,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG,IAAAI,iBAAQ,EAACJ,QAAQ,CAAC;EAEtFG,KAAK,GAAGA,KAAK,CAACE,MAAM,CAACZ,IAAI,IAAI,SAAS,CAACa,IAAI,CAACb,IAAI,CAAC,CAAC;EAElD,IAAIU,KAAK,CAACI,MAAM,KAAK,CAAC,EAAE;IACtB,OAAO,EAAE;EACX;EAEA,IAAIC,SAAS,GAAGL,KAAK,CAAC,CAAC,CAAC,CAACM,iBAAiB,CAAC,CAAC;EAE5C,IAAIZ,mBAAmB,CAACa,OAAO,CAACF,SAAS,CAAC,GAAG,CAAC,CAAC,IAAIL,KAAK,CAACI,MAAM,GAAG,CAAC,EAAE;IACnEC,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGL,KAAK,CAAC,CAAC,CAAC;IACtC,IAAIL,oBAAoB,EAAE;MACxB,IAAIA,oBAAoB,CAACa,QAAQ,CAACR,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;QAC3CK,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGL,KAAK,CAAC,CAAC,CAAC;MACxC;IACF;EACF;EAEA,OAAOK,SAAS;AAClB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACe,SAAAI,SAASC,KAAK,EAAEC,UAAU,EAAE;EACzC,MAAMjB,mBAAmB,GAAGiB,UAAU,CAACC,SAAS,CAAC,qBAAqB,CAAC;EACvE,MAAMjB,oBAAoB,GAAGgB,UAAU,CAACC,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMhB,oBAAoB,GAAGe,UAAU,CAACE,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMC,iBAAiB,GAAGH,UAAU,CAACE,SAAS,CAAC,mBAAmB,CAAC;EAEnE,IAAIE,IAAI,GAAGL,KAAK,CAACM,OAAO,CAAC,CAAC;EAC1BD,IAAI,GAAG,IAAAE,mBAAgB,EAACF,IAAI,CAAC;EAC7BA,IAAI,GAAG,IAAAG,yBAAgB,EAACH,IAAI,CAAC;EAC7BA,IAAI,GAAG,IAAAI,iCAAwB,EAACJ,IAAI,EAAEL,KAAK,CAACU,WAAW,IAAIV,KAAK,CAACU,WAAW,CAACC,UAAU,CAAC;;EAExF;EACAN,IAAI,GAAGA,IAAI,CAACO,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;;EAEpC;EACAP,IAAI,GAAGA,IAAI,CAACO,OAAO,CAAC,+CAA+C,EAAE,EAAE,CAAC;EAExE,IAAIxC,SAAS,GAAG,IAAAyC,qBAAY,EAACR,IAAI,EAAED,iBAAiB,CAAC;EAErD,IAAIjC,kBAAkB,GAAGC,SAAS,CAAC0C,GAAG,CAAC,UAAS/B,QAAQ,EAAE;IACxD,OAAOD,oBAAoB,CACzBC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBACF,CAAC;EACH,CAAC,CAAC;EAEFd,SAAS,GAAGA,SAAS,CAACoB,MAAM,CAAC,UAAST,QAAQ,EAAE;IAC9C,MAAMI,QAAQ,GAAG,IAAAE,oBAAW,EAACN,QAAQ,CAAC;IACtC,MAAMO,KAAK,GAAGJ,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG,IAAAI,iBAAQ,EAACJ,QAAQ,CAAC;IACxF,OAAOG,KAAK,CAACI,MAAM,GAAG,CAAC;EACzB,CAAC,CAAC;EACFvB,kBAAkB,GAAG,IAAAqB,cAAM,EAACrB,kBAAkB,CAAC;EAE/C,OAAOD,iBAAiB,CAACC,kBAAkB,EAAEC,SAAS,CAAC;AACzD","ignoreList":[]}
1	+ {"version":3,"file":"getSentenceBeginnings.js","names":["_getWords","_interopRequireDefault","require","_getSentences","_stripSpaces","_stripHTMLTags","_lodash","_htmlParser","_helpers","_stripNonTextTags","e","__esModule","default","startsWithSameWord","currentSentenceBeginning","nextSentenceBeginning","isEmpty","compareFirstWords","sentenceBeginnings","sentences","consecutiveFirstWords","foundSentences","sameBeginnings","forEach","beginning","i","push","word","count","getSentenceBeginning","sentence","firstWordExceptions","secondWordExceptions","getWordsCustomHelper","stripped","stripTags","stripSpaces","words","getWords","filter","test","length","firstWord","toLocaleLowerCase","indexOf","includes","_default","paper","researcher","getConfig","getHelper","memoizedTokenizer","text","getText","removeHtmlBlocks","stripNonTextTags","filterShortcodesFromHTML","_attributes","shortcodes","replace","getSentences","sentenceData","map","filteredSentenceData","item","strippedNoSpaces","isDigitsOnly","filteredSentences"],"sources":["../../../../src/languageProcessing/researches/getSentenceBeginnings.js"],"sourcesContent":["import getWords from '../helpers/word/getWords.js';\nimport getSentences from '../helpers/sentence/getSentences';\nimport stripSpaces from '../helpers/sanitize/stripSpaces.js';\nimport {stripFullTags as stripTags} from '../helpers/sanitize/stripHTMLTags.js';\n\nimport {filter, forEach, isEmpty} from 'lodash';\nimport removeHtmlBlocks from '../helpers/html/htmlParser';\nimport {filterShortcodesFromHTML} from '../helpers';\nimport stripNonTextTags from '@axyseo/languageProcessing/helpers/sanitize/stripNonTextTags';\n\n/*\n Compares the first word of each sentence with the first word of the following sentence.\n \n @param {string} currentSentenceBeginning The first word of the current sentence.\n * @param {string} nextSentenceBeginning The first word of the next sentence.\n * @returns {boolean} Returns true if sentence beginnings match.\n /\nconst startsWithSameWord = function(currentSentenceBeginning, nextSentenceBeginning) {\n return !isEmpty(currentSentenceBeginning) && currentSentenceBeginning === nextSentenceBeginning;\n};\n\n/\n Counts the number of similar sentence beginnings.\n \n @param {Array} sentenceBeginnings The array containing the first word of each sentence.\n * @param {Array} sentences The array containing all sentences.\n * @returns {Array} The array containing the objects containing the first words and the corresponding counts.\n /\nconst compareFirstWords = function(sentenceBeginnings, sentences) {\n const consecutiveFirstWords = [];\n let foundSentences = [];\n let sameBeginnings = 1;\n\n forEach(sentenceBeginnings, function(beginning, i) {\n const currentSentenceBeginning = beginning;\n const nextSentenceBeginning = sentenceBeginnings[i + 1];\n foundSentences.push(sentences[i]);\n\n if (startsWithSameWord(currentSentenceBeginning, nextSentenceBeginning)) {\n sameBeginnings++;\n } else {\n consecutiveFirstWords.push({\n word: currentSentenceBeginning,\n count: sameBeginnings,\n sentences: foundSentences\n });\n sameBeginnings = 1;\n foundSentences = [];\n }\n });\n\n return consecutiveFirstWords;\n};\n\n/\n Retrieves the first word from the sentence. If the first or second word is on an exception list of words that should not be considered as sentence\n * beginnings, the following word is also retrieved.\n \n @param {string} sentence The sentence to retrieve the first word from.\n * @param {Array} firstWordExceptions First word exceptions to match against.\n * @param {Array} secondWordExceptions Second word exceptions to match against.\n * @param {function} getWordsCustomHelper The language-specific helper function to retrieve words from text.\n \n @returns {string} The first word of the sentence.\n /\nfunction getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n) {\n const stripped = stripTags(stripSpaces(sentence));\n let words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n\n words = words.filter(word => /^\\p{L}/u.test(word));\n\n if (words.length === 0) {\n return '';\n }\n\n let firstWord = words[0].toLocaleLowerCase();\n\n if (firstWordExceptions.indexOf(firstWord) > -1 && words.length > 1) {\n firstWord = firstWord + ' ' + words[1];\n if (secondWordExceptions) {\n if (secondWordExceptions.includes(words[1])) {\n firstWord = firstWord + ' ' + words[2];\n }\n }\n }\n\n return firstWord;\n}\n\n/\n Gets the first word of each sentence from the text, and returns an object containing the first word of each sentence and the corresponding counts.\n \n @param {Paper} paper The Paper object to get the text from.\n * @param {Researcher} researcher The researcher this research is a part of.\n \n @returns {Object} The object containing the first word of each sentence and the corresponding counts.\n /\nexport default function(paper, researcher) {\n const firstWordExceptions = researcher.getConfig('firstWordExceptions');\n const secondWordExceptions = researcher.getConfig('secondWordExceptions');\n const getWordsCustomHelper = researcher.getHelper('getWordsCustomHelper');\n const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');\n\n let text = paper.getText();\n text = removeHtmlBlocks(text);\n text = stripNonTextTags(text);\n text = filterShortcodesFromHTML(text, paper._attributes && paper._attributes.shortcodes);\n\n // Remove any HTML whitespace padding and replace it with a single whitespace.\n text = text.replace(/[\\s\\n]+/g, ' ');\n\n // Exclude text inside tables.\n text = text.replace(/<figure class='wp-block-table'>.<\\/figure>/gs, '');\n\n let sentences = getSentences(text, memoizedTokenizer);\n\n const sentenceData = sentences.map(function(sentence) {\n const beginning = getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n );\n\n return {sentence, beginning};\n });\n\n const filteredSentenceData = sentenceData.filter(function(item) {\n if (!item.beginning) return false;\n\n const stripped = stripTags(stripSpaces(item.sentence));\n const strippedNoSpaces = stripped.replace(/\\s+/g, '');\n const isDigitsOnly = strippedNoSpaces.length > 0 && /^[0-9]+$/.test(strippedNoSpaces);\n\n return !isDigitsOnly;\n });\n\n const filteredSentences = filteredSentenceData.map(item => item.sentence);\n const sentenceBeginnings = filteredSentenceData.map(item => item.beginning);\n\n return compareFirstWords(sentenceBeginnings, filteredSentences);\n}\n"],"mappings":";;;;;;AAAA,IAAAA,SAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,aAAA,GAAAF,sBAAA,CAAAC,OAAA;AACA,IAAAE,YAAA,GAAAH,sBAAA,CAAAC,OAAA;AACA,IAAAG,cAAA,GAAAH,OAAA;AAEA,IAAAI,OAAA,GAAAJ,OAAA;AACA,IAAAK,WAAA,GAAAN,sBAAA,CAAAC,OAAA;AACA,IAAAM,QAAA,GAAAN,OAAA;AACA,IAAAO,iBAAA,GAAAR,sBAAA,CAAAC,OAAA;AAA4F,SAAAD,uBAAAS,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AAE5F;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMG,kBAAkB,GAAG,SAAAA,CAASC,wBAAwB,EAAEC,qBAAqB,EAAE;EACnF,OAAO,CAAC,IAAAC,eAAO,EAACF,wBAAwB,CAAC,IAAIA,wBAAwB,KAAKC,qBAAqB;AACjG,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAME,iBAAiB,GAAG,SAAAA,CAASC,kBAAkB,EAAEC,SAAS,EAAE;EAChE,MAAMC,qBAAqB,GAAG,EAAE;EAChC,IAAIC,cAAc,GAAG,EAAE;EACvB,IAAIC,cAAc,GAAG,CAAC;EAEtB,IAAAC,eAAO,EAACL,kBAAkB,EAAE,UAASM,SAAS,EAAEC,CAAC,EAAE;IACjD,MAAMX,wBAAwB,GAAGU,SAAS;IAC1C,MAAMT,qBAAqB,GAAGG,kBAAkB,CAACO,CAAC,GAAG,CAAC,CAAC;IACvDJ,cAAc,CAACK,IAAI,CAACP,SAAS,CAACM,CAAC,CAAC,CAAC;IAEjC,IAAIZ,kBAAkB,CAACC,wBAAwB,EAAEC,qBAAqB,CAAC,EAAE;MACvEO,cAAc,EAAE;IAClB,CAAC,MAAM;MACLF,qBAAqB,CAACM,IAAI,CAAC;QACzBC,IAAI,EAAEb,wBAAwB;QAC9Bc,KAAK,EAAEN,cAAc;QACrBH,SAAS,EAAEE;MACb,CAAC,CAAC;MACFC,cAAc,GAAG,CAAC;MAClBD,cAAc,GAAG,EAAE;IACrB;EACF,CAAC,CAAC;EAEF,OAAOD,qBAAqB;AAC9B,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASS,oBAAoBA,CAC3BC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBAAoB,EACpB;EACA,MAAMC,QAAQ,GAAG,IAAAC,4BAAS,EAAC,IAAAC,oBAAW,EAACN,QAAQ,CAAC,CAAC;EACjD,IAAIO,KAAK,GAAGJ,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG,IAAAI,iBAAQ,EAACJ,QAAQ,CAAC;EAEtFG,KAAK,GAAGA,KAAK,CAACE,MAAM,CAACZ,IAAI,IAAI,SAAS,CAACa,IAAI,CAACb,IAAI,CAAC,CAAC;EAElD,IAAIU,KAAK,CAACI,MAAM,KAAK,CAAC,EAAE;IACtB,OAAO,EAAE;EACX;EAEA,IAAIC,SAAS,GAAGL,KAAK,CAAC,CAAC,CAAC,CAACM,iBAAiB,CAAC,CAAC;EAE5C,IAAIZ,mBAAmB,CAACa,OAAO,CAACF,SAAS,CAAC,GAAG,CAAC,CAAC,IAAIL,KAAK,CAACI,MAAM,GAAG,CAAC,EAAE;IACnEC,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGL,KAAK,CAAC,CAAC,CAAC;IACtC,IAAIL,oBAAoB,EAAE;MACxB,IAAIA,oBAAoB,CAACa,QAAQ,CAACR,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;QAC3CK,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGL,KAAK,CAAC,CAAC,CAAC;MACxC;IACF;EACF;EAEA,OAAOK,SAAS;AAClB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACe,SAAAI,SAASC,KAAK,EAAEC,UAAU,EAAE;EACzC,MAAMjB,mBAAmB,GAAGiB,UAAU,CAACC,SAAS,CAAC,qBAAqB,CAAC;EACvE,MAAMjB,oBAAoB,GAAGgB,UAAU,CAACC,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMhB,oBAAoB,GAAGe,UAAU,CAACE,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMC,iBAAiB,GAAGH,UAAU,CAACE,SAAS,CAAC,mBAAmB,CAAC;EAEnE,IAAIE,IAAI,GAAGL,KAAK,CAACM,OAAO,CAAC,CAAC;EAC1BD,IAAI,GAAG,IAAAE,mBAAgB,EAACF,IAAI,CAAC;EAC7BA,IAAI,GAAG,IAAAG,yBAAgB,EAACH,IAAI,CAAC;EAC7BA,IAAI,GAAG,IAAAI,iCAAwB,EAACJ,IAAI,EAAEL,KAAK,CAACU,WAAW,IAAIV,KAAK,CAACU,WAAW,CAACC,UAAU,CAAC;;EAExF;EACAN,IAAI,GAAGA,IAAI,CAACO,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;;EAEpC;EACAP,IAAI,GAAGA,IAAI,CAACO,OAAO,CAAC,+CAA+C,EAAE,EAAE,CAAC;EAExE,IAAIxC,SAAS,GAAG,IAAAyC,qBAAY,EAACR,IAAI,EAAED,iBAAiB,CAAC;EAErD,MAAMU,YAAY,GAAG1C,SAAS,CAAC2C,GAAG,CAAC,UAAShC,QAAQ,EAAE;IACpD,MAAMN,SAAS,GAAGK,oBAAoB,CACpCC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBACF,CAAC;IAED,OAAO;MAACH,QAAQ;MAAEN;IAAS,CAAC;EAC9B,CAAC,CAAC;EAEF,MAAMuC,oBAAoB,GAAGF,YAAY,CAACtB,MAAM,CAAC,UAASyB,IAAI,EAAE;IAC9D,IAAI,CAACA,IAAI,CAACxC,SAAS,EAAE,OAAO,KAAK;IAEjC,MAAMU,QAAQ,GAAG,IAAAC,4BAAS,EAAC,IAAAC,oBAAW,EAAC4B,IAAI,CAAClC,QAAQ,CAAC,CAAC;IACtD,MAAMmC,gBAAgB,GAAG/B,QAAQ,CAACyB,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IACrD,MAAMO,YAAY,GAAGD,gBAAgB,CAACxB,MAAM,GAAG,CAAC,IAAI,UAAU,CAACD,IAAI,CAACyB,gBAAgB,CAAC;IAErF,OAAO,CAACC,YAAY;EACtB,CAAC,CAAC;EAEF,MAAMC,iBAAiB,GAAGJ,oBAAoB,CAACD,GAAG,CAACE,IAAI,IAAIA,IAAI,CAAClC,QAAQ,CAAC;EACzE,MAAMZ,kBAAkB,GAAG6C,oBAAoB,CAACD,GAAG,CAACE,IAAI,IAAIA,IAAI,CAACxC,SAAS,CAAC;EAE3E,OAAOP,iBAAiB,CAACC,kBAAkB,EAAEiD,iBAAiB,CAAC;AACjE","ignoreList":[]}

package/build/esm/languageProcessing/researches/getSentenceBeginnings.js CHANGED Viewed

@@ -55,7 +55,7 @@ const compareFirstWords = function (sentenceBeginnings, sentences) {
  * @param {string}  sentence                The sentence to retrieve the first word from.
  * @param {Array}   firstWordExceptions     First word exceptions to match against.
  * @param {Array}   secondWordExceptions    Second word exceptions to match against.
- * @param {function}	getWordsCustomHelper   The language-specific helper function to retrieve words from text.
+ * @param {function}  getWordsCustomHelper   The language-specific helper function to retrieve words from text.
  *
  * @returns {string} The first word of the sentence.
  */
@@ -102,15 +102,22 @@ export default function (paper, researcher) {
   // Exclude text inside tables.
   text = text.replace(/<figure class='wp-block-table'>.*<\/figure>/gs, '');
   let sentences = getSentences(text, memoizedTokenizer);
-  let sentenceBeginnings = sentences.map(function (sentence) {
-    return getSentenceBeginning(sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper);
+  const sentenceData = sentences.map(function (sentence) {
+    const beginning = getSentenceBeginning(sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper);
+    return {
+      sentence,
+      beginning
+    };
   });
-  sentences = sentences.filter(function (sentence) {
-    const stripped = stripSpaces(sentence);
-    const words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);
-    return words.length > 0;
+  const filteredSentenceData = sentenceData.filter(function (item) {
+    if (!item.beginning) return false;
+    const stripped = stripTags(stripSpaces(item.sentence));
+    const strippedNoSpaces = stripped.replace(/\s+/g, '');
+    const isDigitsOnly = strippedNoSpaces.length > 0 && /^[0-9]+$/.test(strippedNoSpaces);
+    return !isDigitsOnly;
   });
-  sentenceBeginnings = filter(sentenceBeginnings);
-  return compareFirstWords(sentenceBeginnings, sentences);
+  const filteredSentences = filteredSentenceData.map(item => item.sentence);
+  const sentenceBeginnings = filteredSentenceData.map(item => item.beginning);
+  return compareFirstWords(sentenceBeginnings, filteredSentences);
 }
 //# sourceMappingURL=getSentenceBeginnings.js.map

package/build/esm/languageProcessing/researches/getSentenceBeginnings.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"getSentenceBeginnings.js","names":["getWords","getSentences","stripSpaces","stripFullTags","stripTags","filter","forEach","isEmpty","removeHtmlBlocks","filterShortcodesFromHTML","stripNonTextTags","startsWithSameWord","currentSentenceBeginning","nextSentenceBeginning","compareFirstWords","sentenceBeginnings","sentences","consecutiveFirstWords","foundSentences","sameBeginnings","beginning","i","push","word","count","getSentenceBeginning","sentence","firstWordExceptions","secondWordExceptions","getWordsCustomHelper","stripped","words","test","length","firstWord","toLocaleLowerCase","indexOf","includes","paper","researcher","getConfig","getHelper","memoizedTokenizer","text","getText","_attributes","shortcodes","replace","map"],"sources":["../../../../src/languageProcessing/researches/getSentenceBeginnings.js"],"sourcesContent":["import getWords from '../helpers/word/getWords.js';\nimport getSentences from '../helpers/sentence/getSentences';\nimport stripSpaces from '../helpers/sanitize/stripSpaces.js';\nimport {stripFullTags as stripTags} from '../helpers/sanitize/stripHTMLTags.js';\n\nimport {filter, forEach, isEmpty} from 'lodash';\nimport removeHtmlBlocks from '../helpers/html/htmlParser';\nimport {filterShortcodesFromHTML} from '../helpers';\nimport stripNonTextTags from '@axyseo/languageProcessing/helpers/sanitize/stripNonTextTags';\n\n/*\n Compares the first word of each sentence with the first word of the following sentence.\n \n @param {string} currentSentenceBeginning The first word of the current sentence.\n * @param {string} nextSentenceBeginning The first word of the next sentence.\n * @returns {boolean} Returns true if sentence beginnings match.\n /\nconst startsWithSameWord = function(currentSentenceBeginning, nextSentenceBeginning) {\n return !isEmpty(currentSentenceBeginning) && currentSentenceBeginning === nextSentenceBeginning;\n};\n\n/\n Counts the number of similar sentence beginnings.\n \n @param {Array} sentenceBeginnings The array containing the first word of each sentence.\n * @param {Array} sentences The array containing all sentences.\n * @returns {Array} The array containing the objects containing the first words and the corresponding counts.\n /\nconst compareFirstWords = function(sentenceBeginnings, sentences) {\n const consecutiveFirstWords = [];\n let foundSentences = [];\n let sameBeginnings = 1;\n\n forEach(sentenceBeginnings, function(beginning, i) {\n const currentSentenceBeginning = beginning;\n const nextSentenceBeginning = sentenceBeginnings[i + 1];\n foundSentences.push(sentences[i]);\n\n if (startsWithSameWord(currentSentenceBeginning, nextSentenceBeginning)) {\n sameBeginnings++;\n } else {\n consecutiveFirstWords.push({\n word: currentSentenceBeginning,\n count: sameBeginnings,\n sentences: foundSentences\n });\n sameBeginnings = 1;\n foundSentences = [];\n }\n });\n\n return consecutiveFirstWords;\n};\n\n/\n Retrieves the first word from the sentence. If the first or second word is on an exception list of words that should not be considered as sentence\n * beginnings, the following word is also retrieved.\n \n @param {string} sentence The sentence to retrieve the first word from.\n * @param {Array} firstWordExceptions First word exceptions to match against.\n * @param {Array} secondWordExceptions Second word exceptions to match against.\n * @param {function}\tgetWordsCustomHelper The language-specific helper function to retrieve words from text.\n \n @returns {string} The first word of the sentence.\n /\nfunction getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n) {\n const stripped = stripTags(stripSpaces(sentence));\n let words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n\n words = words.filter(word => /^\\p{L}/u.test(word));\n\n if (words.length === 0) {\n return '';\n }\n\n let firstWord = words[0].toLocaleLowerCase();\n\n if (firstWordExceptions.indexOf(firstWord) > -1 && words.length > 1) {\n firstWord = firstWord + ' ' + words[1];\n if (secondWordExceptions) {\n if (secondWordExceptions.includes(words[1])) {\n firstWord = firstWord + ' ' + words[2];\n }\n }\n }\n\n return firstWord;\n}\n\n/\n Gets the first word of each sentence from the text, and returns an object containing the first word of each sentence and the corresponding counts.\n \n @param {Paper} paper The Paper object to get the text from.\n * @param {Researcher} researcher The researcher this research is a part of.\n \n @returns {Object} The object containing the first word of each sentence and the corresponding counts.\n /\nexport default function(paper, researcher) {\n const firstWordExceptions = researcher.getConfig('firstWordExceptions');\n const secondWordExceptions = researcher.getConfig('secondWordExceptions');\n const getWordsCustomHelper = researcher.getHelper('getWordsCustomHelper');\n const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');\n\n let text = paper.getText();\n text = removeHtmlBlocks(text);\n text = stripNonTextTags(text);\n text = filterShortcodesFromHTML(text, paper._attributes && paper._attributes.shortcodes);\n\n // Remove any HTML whitespace padding and replace it with a single whitespace.\n text = text.replace(/[\\s\\n]+/g, ' ');\n\n // Exclude text inside tables.\n text = text.replace(/<figure class='wp-block-table'>.<\\/figure>/gs, '');\n\n let sentences = getSentences(text, memoizedTokenizer);\n\n let sentenceBeginnings = sentences.map(function(sentence) {\n return getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n );\n });\n\n sentences = sentences.filter(function(sentence) {\n const stripped = stripSpaces(sentence);\n const words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n return words.length > 0;\n });\n sentenceBeginnings = filter(sentenceBeginnings);\n\n return compareFirstWords(sentenceBeginnings, sentences);\n}\n"],"mappings":"AAAA,OAAOA,QAAQ;AACf,OAAOC,YAAY;AACnB,OAAOC,WAAW;AAClB,SAAQC,aAAa,IAAIC,SAAS;AAElC,SAAQC,MAAM,EAAEC,OAAO,EAAEC,OAAO,QAAO,QAAQ;AAC/C,OAAOC,gBAAgB;AACvB,SAAQC,wBAAwB;AAChC,OAAOC,gBAAgB;;AAEvB;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,kBAAkB,GAAG,SAAAA,CAASC,wBAAwB,EAAEC,qBAAqB,EAAE;EACnF,OAAO,CAACN,OAAO,CAACK,wBAAwB,CAAC,IAAIA,wBAAwB,KAAKC,qBAAqB;AACjG,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,iBAAiB,GAAG,SAAAA,CAASC,kBAAkB,EAAEC,SAAS,EAAE;EAChE,MAAMC,qBAAqB,GAAG,EAAE;EAChC,IAAIC,cAAc,GAAG,EAAE;EACvB,IAAIC,cAAc,GAAG,CAAC;EAEtBb,OAAO,CAACS,kBAAkB,EAAE,UAASK,SAAS,EAAEC,CAAC,EAAE;IACjD,MAAMT,wBAAwB,GAAGQ,SAAS;IAC1C,MAAMP,qBAAqB,GAAGE,kBAAkB,CAACM,CAAC,GAAG,CAAC,CAAC;IACvDH,cAAc,CAACI,IAAI,CAACN,SAAS,CAACK,CAAC,CAAC,CAAC;IAEjC,IAAIV,kBAAkB,CAACC,wBAAwB,EAAEC,qBAAqB,CAAC,EAAE;MACvEM,cAAc,EAAE;IAClB,CAAC,MAAM;MACLF,qBAAqB,CAACK,IAAI,CAAC;QACzBC,IAAI,EAAEX,wBAAwB;QAC9BY,KAAK,EAAEL,cAAc;QACrBH,SAAS,EAAEE;MACb,CAAC,CAAC;MACFC,cAAc,GAAG,CAAC;MAClBD,cAAc,GAAG,EAAE;IACrB;EACF,CAAC,CAAC;EAEF,OAAOD,qBAAqB;AAC9B,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASQ,oBAAoBA,CAC3BC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBAAoB,EACpB;EACA,MAAMC,QAAQ,GAAG1B,SAAS,CAACF,WAAW,CAACwB,QAAQ,CAAC,CAAC;EACjD,IAAIK,KAAK,GAAGF,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG9B,QAAQ,CAAC8B,QAAQ,CAAC;EAEtFC,KAAK,GAAGA,KAAK,CAAC1B,MAAM,CAACkB,IAAI,IAAI,SAAS,CAACS,IAAI,CAACT,IAAI,CAAC,CAAC;EAElD,IAAIQ,KAAK,CAACE,MAAM,KAAK,CAAC,EAAE;IACtB,OAAO,EAAE;EACX;EAEA,IAAIC,SAAS,GAAGH,KAAK,CAAC,CAAC,CAAC,CAACI,iBAAiB,CAAC,CAAC;EAE5C,IAAIR,mBAAmB,CAACS,OAAO,CAACF,SAAS,CAAC,GAAG,CAAC,CAAC,IAAIH,KAAK,CAACE,MAAM,GAAG,CAAC,EAAE;IACnEC,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGH,KAAK,CAAC,CAAC,CAAC;IACtC,IAAIH,oBAAoB,EAAE;MACxB,IAAIA,oBAAoB,CAACS,QAAQ,CAACN,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;QAC3CG,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGH,KAAK,CAAC,CAAC,CAAC;MACxC;IACF;EACF;EAEA,OAAOG,SAAS;AAClB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASI,KAAK,EAAEC,UAAU,EAAE;EACzC,MAAMZ,mBAAmB,GAAGY,UAAU,CAACC,SAAS,CAAC,qBAAqB,CAAC;EACvE,MAAMZ,oBAAoB,GAAGW,UAAU,CAACC,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMX,oBAAoB,GAAGU,UAAU,CAACE,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMC,iBAAiB,GAAGH,UAAU,CAACE,SAAS,CAAC,mBAAmB,CAAC;EAEnE,IAAIE,IAAI,GAAGL,KAAK,CAACM,OAAO,CAAC,CAAC;EAC1BD,IAAI,GAAGnC,gBAAgB,CAACmC,IAAI,CAAC;EAC7BA,IAAI,GAAGjC,gBAAgB,CAACiC,IAAI,CAAC;EAC7BA,IAAI,GAAGlC,wBAAwB,CAACkC,IAAI,EAAEL,KAAK,CAACO,WAAW,IAAIP,KAAK,CAACO,WAAW,CAACC,UAAU,CAAC;;EAExF;EACAH,IAAI,GAAGA,IAAI,CAACI,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;;EAEpC;EACAJ,IAAI,GAAGA,IAAI,CAACI,OAAO,CAAC,+CAA+C,EAAE,EAAE,CAAC;EAExE,IAAI/B,SAAS,GAAGf,YAAY,CAAC0C,IAAI,EAAED,iBAAiB,CAAC;EAErD,IAAI3B,kBAAkB,GAAGC,SAAS,CAACgC,GAAG,CAAC,UAAStB,QAAQ,EAAE;IACxD,OAAOD,oBAAoB,CACzBC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBACF,CAAC;EACH,CAAC,CAAC;EAEFb,SAAS,GAAGA,SAAS,CAACX,MAAM,CAAC,UAASqB,QAAQ,EAAE;IAC9C,MAAMI,QAAQ,GAAG5B,WAAW,CAACwB,QAAQ,CAAC;IACtC,MAAMK,KAAK,GAAGF,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG9B,QAAQ,CAAC8B,QAAQ,CAAC;IACxF,OAAOC,KAAK,CAACE,MAAM,GAAG,CAAC;EACzB,CAAC,CAAC;EACFlB,kBAAkB,GAAGV,MAAM,CAACU,kBAAkB,CAAC;EAE/C,OAAOD,iBAAiB,CAACC,kBAAkB,EAAEC,SAAS,CAAC;AACzD","ignoreList":[]}
1	+ {"version":3,"file":"getSentenceBeginnings.js","names":["getWords","getSentences","stripSpaces","stripFullTags","stripTags","filter","forEach","isEmpty","removeHtmlBlocks","filterShortcodesFromHTML","stripNonTextTags","startsWithSameWord","currentSentenceBeginning","nextSentenceBeginning","compareFirstWords","sentenceBeginnings","sentences","consecutiveFirstWords","foundSentences","sameBeginnings","beginning","i","push","word","count","getSentenceBeginning","sentence","firstWordExceptions","secondWordExceptions","getWordsCustomHelper","stripped","words","test","length","firstWord","toLocaleLowerCase","indexOf","includes","paper","researcher","getConfig","getHelper","memoizedTokenizer","text","getText","_attributes","shortcodes","replace","sentenceData","map","filteredSentenceData","item","strippedNoSpaces","isDigitsOnly","filteredSentences"],"sources":["../../../../src/languageProcessing/researches/getSentenceBeginnings.js"],"sourcesContent":["import getWords from '../helpers/word/getWords.js';\nimport getSentences from '../helpers/sentence/getSentences';\nimport stripSpaces from '../helpers/sanitize/stripSpaces.js';\nimport {stripFullTags as stripTags} from '../helpers/sanitize/stripHTMLTags.js';\n\nimport {filter, forEach, isEmpty} from 'lodash';\nimport removeHtmlBlocks from '../helpers/html/htmlParser';\nimport {filterShortcodesFromHTML} from '../helpers';\nimport stripNonTextTags from '@axyseo/languageProcessing/helpers/sanitize/stripNonTextTags';\n\n/*\n Compares the first word of each sentence with the first word of the following sentence.\n \n @param {string} currentSentenceBeginning The first word of the current sentence.\n * @param {string} nextSentenceBeginning The first word of the next sentence.\n * @returns {boolean} Returns true if sentence beginnings match.\n /\nconst startsWithSameWord = function(currentSentenceBeginning, nextSentenceBeginning) {\n return !isEmpty(currentSentenceBeginning) && currentSentenceBeginning === nextSentenceBeginning;\n};\n\n/\n Counts the number of similar sentence beginnings.\n \n @param {Array} sentenceBeginnings The array containing the first word of each sentence.\n * @param {Array} sentences The array containing all sentences.\n * @returns {Array} The array containing the objects containing the first words and the corresponding counts.\n /\nconst compareFirstWords = function(sentenceBeginnings, sentences) {\n const consecutiveFirstWords = [];\n let foundSentences = [];\n let sameBeginnings = 1;\n\n forEach(sentenceBeginnings, function(beginning, i) {\n const currentSentenceBeginning = beginning;\n const nextSentenceBeginning = sentenceBeginnings[i + 1];\n foundSentences.push(sentences[i]);\n\n if (startsWithSameWord(currentSentenceBeginning, nextSentenceBeginning)) {\n sameBeginnings++;\n } else {\n consecutiveFirstWords.push({\n word: currentSentenceBeginning,\n count: sameBeginnings,\n sentences: foundSentences\n });\n sameBeginnings = 1;\n foundSentences = [];\n }\n });\n\n return consecutiveFirstWords;\n};\n\n/\n Retrieves the first word from the sentence. If the first or second word is on an exception list of words that should not be considered as sentence\n * beginnings, the following word is also retrieved.\n \n @param {string} sentence The sentence to retrieve the first word from.\n * @param {Array} firstWordExceptions First word exceptions to match against.\n * @param {Array} secondWordExceptions Second word exceptions to match against.\n * @param {function} getWordsCustomHelper The language-specific helper function to retrieve words from text.\n \n @returns {string} The first word of the sentence.\n /\nfunction getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n) {\n const stripped = stripTags(stripSpaces(sentence));\n let words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n\n words = words.filter(word => /^\\p{L}/u.test(word));\n\n if (words.length === 0) {\n return '';\n }\n\n let firstWord = words[0].toLocaleLowerCase();\n\n if (firstWordExceptions.indexOf(firstWord) > -1 && words.length > 1) {\n firstWord = firstWord + ' ' + words[1];\n if (secondWordExceptions) {\n if (secondWordExceptions.includes(words[1])) {\n firstWord = firstWord + ' ' + words[2];\n }\n }\n }\n\n return firstWord;\n}\n\n/\n Gets the first word of each sentence from the text, and returns an object containing the first word of each sentence and the corresponding counts.\n \n @param {Paper} paper The Paper object to get the text from.\n * @param {Researcher} researcher The researcher this research is a part of.\n \n @returns {Object} The object containing the first word of each sentence and the corresponding counts.\n /\nexport default function(paper, researcher) {\n const firstWordExceptions = researcher.getConfig('firstWordExceptions');\n const secondWordExceptions = researcher.getConfig('secondWordExceptions');\n const getWordsCustomHelper = researcher.getHelper('getWordsCustomHelper');\n const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');\n\n let text = paper.getText();\n text = removeHtmlBlocks(text);\n text = stripNonTextTags(text);\n text = filterShortcodesFromHTML(text, paper._attributes && paper._attributes.shortcodes);\n\n // Remove any HTML whitespace padding and replace it with a single whitespace.\n text = text.replace(/[\\s\\n]+/g, ' ');\n\n // Exclude text inside tables.\n text = text.replace(/<figure class='wp-block-table'>.<\\/figure>/gs, '');\n\n let sentences = getSentences(text, memoizedTokenizer);\n\n const sentenceData = sentences.map(function(sentence) {\n const beginning = getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n );\n\n return {sentence, beginning};\n });\n\n const filteredSentenceData = sentenceData.filter(function(item) {\n if (!item.beginning) return false;\n\n const stripped = stripTags(stripSpaces(item.sentence));\n const strippedNoSpaces = stripped.replace(/\\s+/g, '');\n const isDigitsOnly = strippedNoSpaces.length > 0 && /^[0-9]+$/.test(strippedNoSpaces);\n\n return !isDigitsOnly;\n });\n\n const filteredSentences = filteredSentenceData.map(item => item.sentence);\n const sentenceBeginnings = filteredSentenceData.map(item => item.beginning);\n\n return compareFirstWords(sentenceBeginnings, filteredSentences);\n}\n"],"mappings":"AAAA,OAAOA,QAAQ;AACf,OAAOC,YAAY;AACnB,OAAOC,WAAW;AAClB,SAAQC,aAAa,IAAIC,SAAS;AAElC,SAAQC,MAAM,EAAEC,OAAO,EAAEC,OAAO,QAAO,QAAQ;AAC/C,OAAOC,gBAAgB;AACvB,SAAQC,wBAAwB;AAChC,OAAOC,gBAAgB;;AAEvB;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,kBAAkB,GAAG,SAAAA,CAASC,wBAAwB,EAAEC,qBAAqB,EAAE;EACnF,OAAO,CAACN,OAAO,CAACK,wBAAwB,CAAC,IAAIA,wBAAwB,KAAKC,qBAAqB;AACjG,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,iBAAiB,GAAG,SAAAA,CAASC,kBAAkB,EAAEC,SAAS,EAAE;EAChE,MAAMC,qBAAqB,GAAG,EAAE;EAChC,IAAIC,cAAc,GAAG,EAAE;EACvB,IAAIC,cAAc,GAAG,CAAC;EAEtBb,OAAO,CAACS,kBAAkB,EAAE,UAASK,SAAS,EAAEC,CAAC,EAAE;IACjD,MAAMT,wBAAwB,GAAGQ,SAAS;IAC1C,MAAMP,qBAAqB,GAAGE,kBAAkB,CAACM,CAAC,GAAG,CAAC,CAAC;IACvDH,cAAc,CAACI,IAAI,CAACN,SAAS,CAACK,CAAC,CAAC,CAAC;IAEjC,IAAIV,kBAAkB,CAACC,wBAAwB,EAAEC,qBAAqB,CAAC,EAAE;MACvEM,cAAc,EAAE;IAClB,CAAC,MAAM;MACLF,qBAAqB,CAACK,IAAI,CAAC;QACzBC,IAAI,EAAEX,wBAAwB;QAC9BY,KAAK,EAAEL,cAAc;QACrBH,SAAS,EAAEE;MACb,CAAC,CAAC;MACFC,cAAc,GAAG,CAAC;MAClBD,cAAc,GAAG,EAAE;IACrB;EACF,CAAC,CAAC;EAEF,OAAOD,qBAAqB;AAC9B,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASQ,oBAAoBA,CAC3BC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBAAoB,EACpB;EACA,MAAMC,QAAQ,GAAG1B,SAAS,CAACF,WAAW,CAACwB,QAAQ,CAAC,CAAC;EACjD,IAAIK,KAAK,GAAGF,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG9B,QAAQ,CAAC8B,QAAQ,CAAC;EAEtFC,KAAK,GAAGA,KAAK,CAAC1B,MAAM,CAACkB,IAAI,IAAI,SAAS,CAACS,IAAI,CAACT,IAAI,CAAC,CAAC;EAElD,IAAIQ,KAAK,CAACE,MAAM,KAAK,CAAC,EAAE;IACtB,OAAO,EAAE;EACX;EAEA,IAAIC,SAAS,GAAGH,KAAK,CAAC,CAAC,CAAC,CAACI,iBAAiB,CAAC,CAAC;EAE5C,IAAIR,mBAAmB,CAACS,OAAO,CAACF,SAAS,CAAC,GAAG,CAAC,CAAC,IAAIH,KAAK,CAACE,MAAM,GAAG,CAAC,EAAE;IACnEC,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGH,KAAK,CAAC,CAAC,CAAC;IACtC,IAAIH,oBAAoB,EAAE;MACxB,IAAIA,oBAAoB,CAACS,QAAQ,CAACN,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;QAC3CG,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGH,KAAK,CAAC,CAAC,CAAC;MACxC;IACF;EACF;EAEA,OAAOG,SAAS;AAClB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASI,KAAK,EAAEC,UAAU,EAAE;EACzC,MAAMZ,mBAAmB,GAAGY,UAAU,CAACC,SAAS,CAAC,qBAAqB,CAAC;EACvE,MAAMZ,oBAAoB,GAAGW,UAAU,CAACC,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMX,oBAAoB,GAAGU,UAAU,CAACE,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMC,iBAAiB,GAAGH,UAAU,CAACE,SAAS,CAAC,mBAAmB,CAAC;EAEnE,IAAIE,IAAI,GAAGL,KAAK,CAACM,OAAO,CAAC,CAAC;EAC1BD,IAAI,GAAGnC,gBAAgB,CAACmC,IAAI,CAAC;EAC7BA,IAAI,GAAGjC,gBAAgB,CAACiC,IAAI,CAAC;EAC7BA,IAAI,GAAGlC,wBAAwB,CAACkC,IAAI,EAAEL,KAAK,CAACO,WAAW,IAAIP,KAAK,CAACO,WAAW,CAACC,UAAU,CAAC;;EAExF;EACAH,IAAI,GAAGA,IAAI,CAACI,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;;EAEpC;EACAJ,IAAI,GAAGA,IAAI,CAACI,OAAO,CAAC,+CAA+C,EAAE,EAAE,CAAC;EAExE,IAAI/B,SAAS,GAAGf,YAAY,CAAC0C,IAAI,EAAED,iBAAiB,CAAC;EAErD,MAAMM,YAAY,GAAGhC,SAAS,CAACiC,GAAG,CAAC,UAASvB,QAAQ,EAAE;IACpD,MAAMN,SAAS,GAAGK,oBAAoB,CACpCC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBACF,CAAC;IAED,OAAO;MAACH,QAAQ;MAAEN;IAAS,CAAC;EAC9B,CAAC,CAAC;EAEF,MAAM8B,oBAAoB,GAAGF,YAAY,CAAC3C,MAAM,CAAC,UAAS8C,IAAI,EAAE;IAC9D,IAAI,CAACA,IAAI,CAAC/B,SAAS,EAAE,OAAO,KAAK;IAEjC,MAAMU,QAAQ,GAAG1B,SAAS,CAACF,WAAW,CAACiD,IAAI,CAACzB,QAAQ,CAAC,CAAC;IACtD,MAAM0B,gBAAgB,GAAGtB,QAAQ,CAACiB,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IACrD,MAAMM,YAAY,GAAGD,gBAAgB,CAACnB,MAAM,GAAG,CAAC,IAAI,UAAU,CAACD,IAAI,CAACoB,gBAAgB,CAAC;IAErF,OAAO,CAACC,YAAY;EACtB,CAAC,CAAC;EAEF,MAAMC,iBAAiB,GAAGJ,oBAAoB,CAACD,GAAG,CAACE,IAAI,IAAIA,IAAI,CAACzB,QAAQ,CAAC;EACzE,MAAMX,kBAAkB,GAAGmC,oBAAoB,CAACD,GAAG,CAACE,IAAI,IAAIA,IAAI,CAAC/B,SAAS,CAAC;EAE3E,OAAON,iBAAiB,CAACC,kBAAkB,EAAEuC,iBAAiB,CAAC;AACjE","ignoreList":[]}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "axyseo",
-  "version": "2025.1.0-blog.3",
+  "version": "2025.1.0-blog.5",
   "main": "build/cjs/index.js",
   "module": "build/esm/index.js",
   "exports": {