axyseo 2025.1.0-blog.3 → 2025.1.0-blog.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,7 +61,7 @@ const compareFirstWords = function (sentenceBeginnings, sentences) {
61
61
  * @param {string} sentence The sentence to retrieve the first word from.
62
62
  * @param {Array} firstWordExceptions First word exceptions to match against.
63
63
  * @param {Array} secondWordExceptions Second word exceptions to match against.
64
- * @param {function} getWordsCustomHelper The language-specific helper function to retrieve words from text.
64
+ * @param {function} getWordsCustomHelper The language-specific helper function to retrieve words from text.
65
65
  *
66
66
  * @returns {string} The first word of the sentence.
67
67
  */
@@ -108,15 +108,22 @@ function _default(paper, researcher) {
108
108
  // Exclude text inside tables.
109
109
  text = text.replace(/<figure class='wp-block-table'>.*<\/figure>/gs, '');
110
110
  let sentences = (0, _getSentences.default)(text, memoizedTokenizer);
111
- let sentenceBeginnings = sentences.map(function (sentence) {
112
- return getSentenceBeginning(sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper);
111
+ const sentenceData = sentences.map(function (sentence) {
112
+ const beginning = getSentenceBeginning(sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper);
113
+ return {
114
+ sentence,
115
+ beginning
116
+ };
113
117
  });
114
- sentences = sentences.filter(function (sentence) {
115
- const stripped = (0, _stripSpaces.default)(sentence);
116
- const words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : (0, _getWords.default)(stripped);
117
- return words.length > 0;
118
+ const filteredSentenceData = sentenceData.filter(function (item) {
119
+ if (!item.beginning) return false;
120
+ const stripped = (0, _stripHTMLTags.stripFullTags)((0, _stripSpaces.default)(item.sentence));
121
+ const strippedNoSpaces = stripped.replace(/\s+/g, '');
122
+ const isDigitsOnly = strippedNoSpaces.length > 0 && /^[0-9]+$/.test(strippedNoSpaces);
123
+ return !isDigitsOnly;
118
124
  });
119
- sentenceBeginnings = (0, _lodash.filter)(sentenceBeginnings);
120
- return compareFirstWords(sentenceBeginnings, sentences);
125
+ const filteredSentences = filteredSentenceData.map(item => item.sentence);
126
+ const sentenceBeginnings = filteredSentenceData.map(item => item.beginning);
127
+ return compareFirstWords(sentenceBeginnings, filteredSentences);
121
128
  }
122
129
  //# sourceMappingURL=getSentenceBeginnings.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"getSentenceBeginnings.js","names":["_getWords","_interopRequireDefault","require","_getSentences","_stripSpaces","_stripHTMLTags","_lodash","_htmlParser","_helpers","_stripNonTextTags","e","__esModule","default","startsWithSameWord","currentSentenceBeginning","nextSentenceBeginning","isEmpty","compareFirstWords","sentenceBeginnings","sentences","consecutiveFirstWords","foundSentences","sameBeginnings","forEach","beginning","i","push","word","count","getSentenceBeginning","sentence","firstWordExceptions","secondWordExceptions","getWordsCustomHelper","stripped","stripTags","stripSpaces","words","getWords","filter","test","length","firstWord","toLocaleLowerCase","indexOf","includes","_default","paper","researcher","getConfig","getHelper","memoizedTokenizer","text","getText","removeHtmlBlocks","stripNonTextTags","filterShortcodesFromHTML","_attributes","shortcodes","replace","getSentences","map"],"sources":["../../../../src/languageProcessing/researches/getSentenceBeginnings.js"],"sourcesContent":["import getWords from '../helpers/word/getWords.js';\nimport getSentences from '../helpers/sentence/getSentences';\nimport stripSpaces from '../helpers/sanitize/stripSpaces.js';\nimport {stripFullTags as stripTags} from '../helpers/sanitize/stripHTMLTags.js';\n\nimport {filter, forEach, isEmpty} from 'lodash';\nimport removeHtmlBlocks from '../helpers/html/htmlParser';\nimport {filterShortcodesFromHTML} from '../helpers';\nimport stripNonTextTags from '@axyseo/languageProcessing/helpers/sanitize/stripNonTextTags';\n\n/**\n * Compares the first word of each sentence with the first word of the following sentence.\n *\n * @param {string} currentSentenceBeginning The first word of the current sentence.\n * @param {string} nextSentenceBeginning The first word of the next sentence.\n * @returns {boolean} Returns true if sentence beginnings match.\n */\nconst startsWithSameWord = function(currentSentenceBeginning, nextSentenceBeginning) {\n return !isEmpty(currentSentenceBeginning) && currentSentenceBeginning === nextSentenceBeginning;\n};\n\n/**\n * Counts the number of similar sentence beginnings.\n *\n * @param {Array} sentenceBeginnings The array containing the first word of each sentence.\n * @param {Array} sentences The array containing all sentences.\n * @returns {Array} The array containing the objects containing the first words and the corresponding counts.\n */\nconst compareFirstWords = function(sentenceBeginnings, sentences) {\n const consecutiveFirstWords = [];\n let foundSentences = [];\n let sameBeginnings = 1;\n\n forEach(sentenceBeginnings, function(beginning, i) {\n const currentSentenceBeginning = beginning;\n const nextSentenceBeginning = sentenceBeginnings[i + 1];\n foundSentences.push(sentences[i]);\n\n if (startsWithSameWord(currentSentenceBeginning, nextSentenceBeginning)) {\n sameBeginnings++;\n } else {\n consecutiveFirstWords.push({\n word: currentSentenceBeginning,\n count: sameBeginnings,\n sentences: foundSentences\n });\n sameBeginnings = 1;\n foundSentences = [];\n }\n });\n\n return consecutiveFirstWords;\n};\n\n/**\n * Retrieves the first word from the sentence. If the first or second word is on an exception list of words that should not be considered as sentence\n * beginnings, the following word is also retrieved.\n *\n * @param {string} sentence The sentence to retrieve the first word from.\n * @param {Array} firstWordExceptions First word exceptions to match against.\n * @param {Array} secondWordExceptions Second word exceptions to match against.\n * @param {function}\tgetWordsCustomHelper The language-specific helper function to retrieve words from text.\n *\n * @returns {string} The first word of the sentence.\n */\nfunction getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n) {\n const stripped = stripTags(stripSpaces(sentence));\n let words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n\n words = words.filter(word => /^\\p{L}/u.test(word));\n\n if (words.length === 0) {\n return '';\n }\n\n let firstWord = words[0].toLocaleLowerCase();\n\n if (firstWordExceptions.indexOf(firstWord) > -1 && words.length > 1) {\n firstWord = firstWord + ' ' + words[1];\n if (secondWordExceptions) {\n if (secondWordExceptions.includes(words[1])) {\n firstWord = firstWord + ' ' + words[2];\n }\n }\n }\n\n return firstWord;\n}\n\n/**\n * Gets the first word of each sentence from the text, and returns an object containing the first word of each sentence and the corresponding counts.\n *\n * @param {Paper} paper The Paper object to get the text from.\n * @param {Researcher} researcher The researcher this research is a part of.\n *\n * @returns {Object} The object containing the first word of each sentence and the corresponding counts.\n */\nexport default function(paper, researcher) {\n const firstWordExceptions = researcher.getConfig('firstWordExceptions');\n const secondWordExceptions = researcher.getConfig('secondWordExceptions');\n const getWordsCustomHelper = researcher.getHelper('getWordsCustomHelper');\n const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');\n\n let text = paper.getText();\n text = removeHtmlBlocks(text);\n text = stripNonTextTags(text);\n text = filterShortcodesFromHTML(text, paper._attributes && paper._attributes.shortcodes);\n\n // Remove any HTML whitespace padding and replace it with a single whitespace.\n text = text.replace(/[\\s\\n]+/g, ' ');\n\n // Exclude text inside tables.\n text = text.replace(/<figure class='wp-block-table'>.*<\\/figure>/gs, '');\n\n let sentences = getSentences(text, memoizedTokenizer);\n\n let sentenceBeginnings = sentences.map(function(sentence) {\n return getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n );\n });\n\n sentences = sentences.filter(function(sentence) {\n const stripped = stripSpaces(sentence);\n const words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n return words.length > 0;\n });\n sentenceBeginnings = filter(sentenceBeginnings);\n\n return compareFirstWords(sentenceBeginnings, sentences);\n}\n"],"mappings":";;;;;;AAAA,IAAAA,SAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,aAAA,GAAAF,sBAAA,CAAAC,OAAA;AACA,IAAAE,YAAA,GAAAH,sBAAA,CAAAC,OAAA;AACA,IAAAG,cAAA,GAAAH,OAAA;AAEA,IAAAI,OAAA,GAAAJ,OAAA;AACA,IAAAK,WAAA,GAAAN,sBAAA,CAAAC,OAAA;AACA,IAAAM,QAAA,GAAAN,OAAA;AACA,IAAAO,iBAAA,GAAAR,sBAAA,CAAAC,OAAA;AAA4F,SAAAD,uBAAAS,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AAE5F;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMG,kBAAkB,GAAG,SAAAA,CAASC,wBAAwB,EAAEC,qBAAqB,EAAE;EACnF,OAAO,CAAC,IAAAC,eAAO,EAACF,wBAAwB,CAAC,IAAIA,wBAAwB,KAAKC,qBAAqB;AACjG,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAME,iBAAiB,GAAG,SAAAA,CAASC,kBAAkB,EAAEC,SAAS,EAAE;EAChE,MAAMC,qBAAqB,GAAG,EAAE;EAChC,IAAIC,cAAc,GAAG,EAAE;EACvB,IAAIC,cAAc,GAAG,CAAC;EAEtB,IAAAC,eAAO,EAACL,kBAAkB,EAAE,UAASM,SAAS,EAAEC,CAAC,EAAE;IACjD,MAAMX,wBAAwB,GAAGU,SAAS;IAC1C,MAAMT,qBAAqB,GAAGG,kBAAkB,CAACO,CAAC,GAAG,CAAC,CAAC;IACvDJ,cAAc,CAACK,IAAI,CAACP,SAAS,CAACM,CAAC,CAAC,CAAC;IAEjC,IAAIZ,kBAAkB,CAACC,wBAAwB,EAAEC,qBAAqB,CAAC,EAAE;MACvEO,cAAc,EAAE;IAClB,CAAC,MAAM;MACLF,qBAAqB,CAACM,IAAI,CAAC;QACzBC,IAAI,EAAEb,wBAAwB;QAC9Bc,KAAK,EAAEN,cAAc;QACrBH,SAAS,EAAEE;MACb,CAAC,CAAC;MACFC,cAAc,GAAG,CAAC;MAClBD,cAAc,GAAG,EAAE;IACrB;EACF,CAAC,CAAC;EAEF,OAAOD,qBAAqB;AAC9B,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASS,oBAAoBA,CAC3BC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBAAoB,EACpB;EACA,MAAMC,QAAQ,GAAG,IAAAC,4BAAS,EAAC,IAAAC,oBAAW,EAACN,QAAQ,CAAC,CAAC;EACjD,IAAIO,KAAK,GAAGJ,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG,IAAAI,iBAAQ,EAACJ,QAAQ,CAAC;EAEtFG,KAAK,GAAGA,KAAK,CAACE,MAAM,CAACZ,IAAI,IAAI,SAAS,CAACa,IAAI,CAACb,IAAI,CAAC,CAAC;EAElD,IAAIU,KAAK,CAACI,MAAM,KAAK,CAAC,EAAE;IACtB,OAAO,EAAE;EACX;EAEA,IAAIC,SAAS,GAAGL,KAAK,CAAC,CAAC,CAAC,CAACM,iBAAiB,CAAC,CAAC;EAE5C,IAAIZ,mBAAmB,CAACa,OAAO,CAACF,SAAS,CAAC,GAAG,CAAC,CAAC,IAAIL,KAAK,CAACI,MAAM,GAAG,CAAC,EAAE;IACnEC,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGL,KAAK,CAAC,CAAC,CAAC;IACtC,IAAIL,oBAAoB,EAAE;MACxB,IAAIA,oBAAoB,CAACa,QAAQ,CAACR,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;QAC3CK,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGL,KAAK,CAAC,CAAC,CAAC;MACxC;IACF;EACF;EAEA,OAAOK,SAAS;AAClB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACe,SAAAI,SAASC,KAAK,EAAEC,UAAU,EAAE;EACzC,MAAMjB,mBAAmB,GAAGiB,UAAU,CAACC,SAAS,CAAC,qBAAqB,CAAC;EACvE,MAAMjB,oBAAoB,GAAGgB,UAAU,CAACC,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMhB,oBAAoB,GAAGe,UAAU,CAACE,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMC,iBAAiB,GAAGH,UAAU,CAACE,SAAS,CAAC,mBAAmB,CAAC;EAEnE,IAAIE,IAAI,GAAGL,KAAK,CAACM,OAAO,CAAC,CAAC;EAC1BD,IAAI,GAAG,IAAAE,mBAAgB,EAACF,IAAI,CAAC;EAC7BA,IAAI,GAAG,IAAAG,yBAAgB,EAACH,IAAI,CAAC;EAC7BA,IAAI,GAAG,IAAAI,iCAAwB,EAACJ,IAAI,EAAEL,KAAK,CAACU,WAAW,IAAIV,KAAK,CAACU,WAAW,CAACC,UAAU,CAAC;;EAExF;EACAN,IAAI,GAAGA,IAAI,CAACO,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;;EAEpC;EACAP,IAAI,GAAGA,IAAI,CAACO,OAAO,CAAC,+CAA+C,EAAE,EAAE,CAAC;EAExE,IAAIxC,SAAS,GAAG,IAAAyC,qBAAY,EAACR,IAAI,EAAED,iBAAiB,CAAC;EAErD,IAAIjC,kBAAkB,GAAGC,SAAS,CAAC0C,GAAG,CAAC,UAAS/B,QAAQ,EAAE;IACxD,OAAOD,oBAAoB,CACzBC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBACF,CAAC;EACH,CAAC,CAAC;EAEFd,SAAS,GAAGA,SAAS,CAACoB,MAAM,CAAC,UAAST,QAAQ,EAAE;IAC9C,MAAMI,QAAQ,GAAG,IAAAE,oBAAW,EAACN,QAAQ,CAAC;IACtC,MAAMO,KAAK,GAAGJ,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG,IAAAI,iBAAQ,EAACJ,QAAQ,CAAC;IACxF,OAAOG,KAAK,CAACI,MAAM,GAAG,CAAC;EACzB,CAAC,CAAC;EACFvB,kBAAkB,GAAG,IAAAqB,cAAM,EAACrB,kBAAkB,CAAC;EAE/C,OAAOD,iBAAiB,CAACC,kBAAkB,EAAEC,SAAS,CAAC;AACzD","ignoreList":[]}
1
+ {"version":3,"file":"getSentenceBeginnings.js","names":["_getWords","_interopRequireDefault","require","_getSentences","_stripSpaces","_stripHTMLTags","_lodash","_htmlParser","_helpers","_stripNonTextTags","e","__esModule","default","startsWithSameWord","currentSentenceBeginning","nextSentenceBeginning","isEmpty","compareFirstWords","sentenceBeginnings","sentences","consecutiveFirstWords","foundSentences","sameBeginnings","forEach","beginning","i","push","word","count","getSentenceBeginning","sentence","firstWordExceptions","secondWordExceptions","getWordsCustomHelper","stripped","stripTags","stripSpaces","words","getWords","filter","test","length","firstWord","toLocaleLowerCase","indexOf","includes","_default","paper","researcher","getConfig","getHelper","memoizedTokenizer","text","getText","removeHtmlBlocks","stripNonTextTags","filterShortcodesFromHTML","_attributes","shortcodes","replace","getSentences","sentenceData","map","filteredSentenceData","item","strippedNoSpaces","isDigitsOnly","filteredSentences"],"sources":["../../../../src/languageProcessing/researches/getSentenceBeginnings.js"],"sourcesContent":["import getWords from '../helpers/word/getWords.js';\nimport getSentences from '../helpers/sentence/getSentences';\nimport stripSpaces from '../helpers/sanitize/stripSpaces.js';\nimport {stripFullTags as stripTags} from '../helpers/sanitize/stripHTMLTags.js';\n\nimport {filter, forEach, isEmpty} from 'lodash';\nimport removeHtmlBlocks from '../helpers/html/htmlParser';\nimport {filterShortcodesFromHTML} from '../helpers';\nimport stripNonTextTags from '@axyseo/languageProcessing/helpers/sanitize/stripNonTextTags';\n\n/**\n * Compares the first word of each sentence with the first word of the following sentence.\n *\n * @param {string} currentSentenceBeginning The first word of the current sentence.\n * @param {string} nextSentenceBeginning The first word of the next sentence.\n * @returns {boolean} Returns true if sentence beginnings match.\n */\nconst startsWithSameWord = function(currentSentenceBeginning, nextSentenceBeginning) {\n return !isEmpty(currentSentenceBeginning) && currentSentenceBeginning === nextSentenceBeginning;\n};\n\n/**\n * Counts the number of similar sentence beginnings.\n *\n * @param {Array} sentenceBeginnings The array containing the first word of each sentence.\n * @param {Array} sentences The array containing all sentences.\n * @returns {Array} The array containing the objects containing the first words and the corresponding counts.\n */\nconst compareFirstWords = function(sentenceBeginnings, sentences) {\n const consecutiveFirstWords = [];\n let foundSentences = [];\n let sameBeginnings = 1;\n\n forEach(sentenceBeginnings, function(beginning, i) {\n const currentSentenceBeginning = beginning;\n const nextSentenceBeginning = sentenceBeginnings[i + 1];\n foundSentences.push(sentences[i]);\n\n if (startsWithSameWord(currentSentenceBeginning, nextSentenceBeginning)) {\n sameBeginnings++;\n } else {\n consecutiveFirstWords.push({\n word: currentSentenceBeginning,\n count: sameBeginnings,\n sentences: foundSentences\n });\n sameBeginnings = 1;\n foundSentences = [];\n }\n });\n\n return consecutiveFirstWords;\n};\n\n/**\n * Retrieves the first word from the sentence. If the first or second word is on an exception list of words that should not be considered as sentence\n * beginnings, the following word is also retrieved.\n *\n * @param {string} sentence The sentence to retrieve the first word from.\n * @param {Array} firstWordExceptions First word exceptions to match against.\n * @param {Array} secondWordExceptions Second word exceptions to match against.\n * @param {function} getWordsCustomHelper The language-specific helper function to retrieve words from text.\n *\n * @returns {string} The first word of the sentence.\n */\nfunction getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n) {\n const stripped = stripTags(stripSpaces(sentence));\n let words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n\n words = words.filter(word => /^\\p{L}/u.test(word));\n\n if (words.length === 0) {\n return '';\n }\n\n let firstWord = words[0].toLocaleLowerCase();\n\n if (firstWordExceptions.indexOf(firstWord) > -1 && words.length > 1) {\n firstWord = firstWord + ' ' + words[1];\n if (secondWordExceptions) {\n if (secondWordExceptions.includes(words[1])) {\n firstWord = firstWord + ' ' + words[2];\n }\n }\n }\n\n return firstWord;\n}\n\n/**\n * Gets the first word of each sentence from the text, and returns an object containing the first word of each sentence and the corresponding counts.\n *\n * @param {Paper} paper The Paper object to get the text from.\n * @param {Researcher} researcher The researcher this research is a part of.\n *\n * @returns {Object} The object containing the first word of each sentence and the corresponding counts.\n */\nexport default function(paper, researcher) {\n const firstWordExceptions = researcher.getConfig('firstWordExceptions');\n const secondWordExceptions = researcher.getConfig('secondWordExceptions');\n const getWordsCustomHelper = researcher.getHelper('getWordsCustomHelper');\n const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');\n\n let text = paper.getText();\n text = removeHtmlBlocks(text);\n text = stripNonTextTags(text);\n text = filterShortcodesFromHTML(text, paper._attributes && paper._attributes.shortcodes);\n\n // Remove any HTML whitespace padding and replace it with a single whitespace.\n text = text.replace(/[\\s\\n]+/g, ' ');\n\n // Exclude text inside tables.\n text = text.replace(/<figure class='wp-block-table'>.*<\\/figure>/gs, '');\n\n let sentences = getSentences(text, memoizedTokenizer);\n\n const sentenceData = sentences.map(function(sentence) {\n const beginning = getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n );\n\n return {sentence, beginning};\n });\n\n const filteredSentenceData = sentenceData.filter(function(item) {\n if (!item.beginning) return false;\n\n const stripped = stripTags(stripSpaces(item.sentence));\n const strippedNoSpaces = stripped.replace(/\\s+/g, '');\n const isDigitsOnly = strippedNoSpaces.length > 0 && /^[0-9]+$/.test(strippedNoSpaces);\n\n return !isDigitsOnly;\n });\n\n const filteredSentences = filteredSentenceData.map(item => item.sentence);\n const sentenceBeginnings = filteredSentenceData.map(item => item.beginning);\n\n return compareFirstWords(sentenceBeginnings, filteredSentences);\n}\n"],"mappings":";;;;;;AAAA,IAAAA,SAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,aAAA,GAAAF,sBAAA,CAAAC,OAAA;AACA,IAAAE,YAAA,GAAAH,sBAAA,CAAAC,OAAA;AACA,IAAAG,cAAA,GAAAH,OAAA;AAEA,IAAAI,OAAA,GAAAJ,OAAA;AACA,IAAAK,WAAA,GAAAN,sBAAA,CAAAC,OAAA;AACA,IAAAM,QAAA,GAAAN,OAAA;AACA,IAAAO,iBAAA,GAAAR,sBAAA,CAAAC,OAAA;AAA4F,SAAAD,uBAAAS,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AAE5F;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMG,kBAAkB,GAAG,SAAAA,CAASC,wBAAwB,EAAEC,qBAAqB,EAAE;EACnF,OAAO,CAAC,IAAAC,eAAO,EAACF,wBAAwB,CAAC,IAAIA,wBAAwB,KAAKC,qBAAqB;AACjG,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAME,iBAAiB,GAAG,SAAAA,CAASC,kBAAkB,EAAEC,SAAS,EAAE;EAChE,MAAMC,qBAAqB,GAAG,EAAE;EAChC,IAAIC,cAAc,GAAG,EAAE;EACvB,IAAIC,cAAc,GAAG,CAAC;EAEtB,IAAAC,eAAO,EAACL,kBAAkB,EAAE,UAASM,SAAS,EAAEC,CAAC,EAAE;IACjD,MAAMX,wBAAwB,GAAGU,SAAS;IAC1C,MAAMT,qBAAqB,GAAGG,kBAAkB,CAACO,CAAC,GAAG,CAAC,CAAC;IACvDJ,cAAc,CAACK,IAAI,CAACP,SAAS,CAACM,CAAC,CAAC,CAAC;IAEjC,IAAIZ,kBAAkB,CAACC,wBAAwB,EAAEC,qBAAqB,CAAC,EAAE;MACvEO,cAAc,EAAE;IAClB,CAAC,MAAM;MACLF,qBAAqB,CAACM,IAAI,CAAC;QACzBC,IAAI,EAAEb,wBAAwB;QAC9Bc,KAAK,EAAEN,cAAc;QACrBH,SAAS,EAAEE;MACb,CAAC,CAAC;MACFC,cAAc,GAAG,CAAC;MAClBD,cAAc,GAAG,EAAE;IACrB;EACF,CAAC,CAAC;EAEF,OAAOD,qBAAqB;AAC9B,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASS,oBAAoBA,CAC3BC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBAAoB,EACpB;EACA,MAAMC,QAAQ,GAAG,IAAAC,4BAAS,EAAC,IAAAC,oBAAW,EAACN,QAAQ,CAAC,CAAC;EACjD,IAAIO,KAAK,GAAGJ,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG,IAAAI,iBAAQ,EAACJ,QAAQ,CAAC;EAEtFG,KAAK,GAAGA,KAAK,CAACE,MAAM,CAACZ,IAAI,IAAI,SAAS,CAACa,IAAI,CAACb,IAAI,CAAC,CAAC;EAElD,IAAIU,KAAK,CAACI,MAAM,KAAK,CAAC,EAAE;IACtB,OAAO,EAAE;EACX;EAEA,IAAIC,SAAS,GAAGL,KAAK,CAAC,CAAC,CAAC,CAACM,iBAAiB,CAAC,CAAC;EAE5C,IAAIZ,mBAAmB,CAACa,OAAO,CAACF,SAAS,CAAC,GAAG,CAAC,CAAC,IAAIL,KAAK,CAACI,MAAM,GAAG,CAAC,EAAE;IACnEC,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGL,KAAK,CAAC,CAAC,CAAC;IACtC,IAAIL,oBAAoB,EAAE;MACxB,IAAIA,oBAAoB,CAACa,QAAQ,CAACR,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;QAC3CK,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGL,KAAK,CAAC,CAAC,CAAC;MACxC;IACF;EACF;EAEA,OAAOK,SAAS;AAClB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACe,SAAAI,SAASC,KAAK,EAAEC,UAAU,EAAE;EACzC,MAAMjB,mBAAmB,GAAGiB,UAAU,CAACC,SAAS,CAAC,qBAAqB,CAAC;EACvE,MAAMjB,oBAAoB,GAAGgB,UAAU,CAACC,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMhB,oBAAoB,GAAGe,UAAU,CAACE,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMC,iBAAiB,GAAGH,UAAU,CAACE,SAAS,CAAC,mBAAmB,CAAC;EAEnE,IAAIE,IAAI,GAAGL,KAAK,CAACM,OAAO,CAAC,CAAC;EAC1BD,IAAI,GAAG,IAAAE,mBAAgB,EAACF,IAAI,CAAC;EAC7BA,IAAI,GAAG,IAAAG,yBAAgB,EAACH,IAAI,CAAC;EAC7BA,IAAI,GAAG,IAAAI,iCAAwB,EAACJ,IAAI,EAAEL,KAAK,CAACU,WAAW,IAAIV,KAAK,CAACU,WAAW,CAACC,UAAU,CAAC;;EAExF;EACAN,IAAI,GAAGA,IAAI,CAACO,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;;EAEpC;EACAP,IAAI,GAAGA,IAAI,CAACO,OAAO,CAAC,+CAA+C,EAAE,EAAE,CAAC;EAExE,IAAIxC,SAAS,GAAG,IAAAyC,qBAAY,EAACR,IAAI,EAAED,iBAAiB,CAAC;EAErD,MAAMU,YAAY,GAAG1C,SAAS,CAAC2C,GAAG,CAAC,UAAShC,QAAQ,EAAE;IACpD,MAAMN,SAAS,GAAGK,oBAAoB,CACpCC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBACF,CAAC;IAED,OAAO;MAACH,QAAQ;MAAEN;IAAS,CAAC;EAC9B,CAAC,CAAC;EAEF,MAAMuC,oBAAoB,GAAGF,YAAY,CAACtB,MAAM,CAAC,UAASyB,IAAI,EAAE;IAC9D,IAAI,CAACA,IAAI,CAACxC,SAAS,EAAE,OAAO,KAAK;IAEjC,MAAMU,QAAQ,GAAG,IAAAC,4BAAS,EAAC,IAAAC,oBAAW,EAAC4B,IAAI,CAAClC,QAAQ,CAAC,CAAC;IACtD,MAAMmC,gBAAgB,GAAG/B,QAAQ,CAACyB,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IACrD,MAAMO,YAAY,GAAGD,gBAAgB,CAACxB,MAAM,GAAG,CAAC,IAAI,UAAU,CAACD,IAAI,CAACyB,gBAAgB,CAAC;IAErF,OAAO,CAACC,YAAY;EACtB,CAAC,CAAC;EAEF,MAAMC,iBAAiB,GAAGJ,oBAAoB,CAACD,GAAG,CAACE,IAAI,IAAIA,IAAI,CAAClC,QAAQ,CAAC;EACzE,MAAMZ,kBAAkB,GAAG6C,oBAAoB,CAACD,GAAG,CAACE,IAAI,IAAIA,IAAI,CAACxC,SAAS,CAAC;EAE3E,OAAOP,iBAAiB,CAACC,kBAAkB,EAAEiD,iBAAiB,CAAC;AACjE","ignoreList":[]}
@@ -55,7 +55,7 @@ const compareFirstWords = function (sentenceBeginnings, sentences) {
55
55
  * @param {string} sentence The sentence to retrieve the first word from.
56
56
  * @param {Array} firstWordExceptions First word exceptions to match against.
57
57
  * @param {Array} secondWordExceptions Second word exceptions to match against.
58
- * @param {function} getWordsCustomHelper The language-specific helper function to retrieve words from text.
58
+ * @param {function} getWordsCustomHelper The language-specific helper function to retrieve words from text.
59
59
  *
60
60
  * @returns {string} The first word of the sentence.
61
61
  */
@@ -102,15 +102,22 @@ export default function (paper, researcher) {
102
102
  // Exclude text inside tables.
103
103
  text = text.replace(/<figure class='wp-block-table'>.*<\/figure>/gs, '');
104
104
  let sentences = getSentences(text, memoizedTokenizer);
105
- let sentenceBeginnings = sentences.map(function (sentence) {
106
- return getSentenceBeginning(sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper);
105
+ const sentenceData = sentences.map(function (sentence) {
106
+ const beginning = getSentenceBeginning(sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper);
107
+ return {
108
+ sentence,
109
+ beginning
110
+ };
107
111
  });
108
- sentences = sentences.filter(function (sentence) {
109
- const stripped = stripSpaces(sentence);
110
- const words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);
111
- return words.length > 0;
112
+ const filteredSentenceData = sentenceData.filter(function (item) {
113
+ if (!item.beginning) return false;
114
+ const stripped = stripTags(stripSpaces(item.sentence));
115
+ const strippedNoSpaces = stripped.replace(/\s+/g, '');
116
+ const isDigitsOnly = strippedNoSpaces.length > 0 && /^[0-9]+$/.test(strippedNoSpaces);
117
+ return !isDigitsOnly;
112
118
  });
113
- sentenceBeginnings = filter(sentenceBeginnings);
114
- return compareFirstWords(sentenceBeginnings, sentences);
119
+ const filteredSentences = filteredSentenceData.map(item => item.sentence);
120
+ const sentenceBeginnings = filteredSentenceData.map(item => item.beginning);
121
+ return compareFirstWords(sentenceBeginnings, filteredSentences);
115
122
  }
116
123
  //# sourceMappingURL=getSentenceBeginnings.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"getSentenceBeginnings.js","names":["getWords","getSentences","stripSpaces","stripFullTags","stripTags","filter","forEach","isEmpty","removeHtmlBlocks","filterShortcodesFromHTML","stripNonTextTags","startsWithSameWord","currentSentenceBeginning","nextSentenceBeginning","compareFirstWords","sentenceBeginnings","sentences","consecutiveFirstWords","foundSentences","sameBeginnings","beginning","i","push","word","count","getSentenceBeginning","sentence","firstWordExceptions","secondWordExceptions","getWordsCustomHelper","stripped","words","test","length","firstWord","toLocaleLowerCase","indexOf","includes","paper","researcher","getConfig","getHelper","memoizedTokenizer","text","getText","_attributes","shortcodes","replace","map"],"sources":["../../../../src/languageProcessing/researches/getSentenceBeginnings.js"],"sourcesContent":["import getWords from '../helpers/word/getWords.js';\nimport getSentences from '../helpers/sentence/getSentences';\nimport stripSpaces from '../helpers/sanitize/stripSpaces.js';\nimport {stripFullTags as stripTags} from '../helpers/sanitize/stripHTMLTags.js';\n\nimport {filter, forEach, isEmpty} from 'lodash';\nimport removeHtmlBlocks from '../helpers/html/htmlParser';\nimport {filterShortcodesFromHTML} from '../helpers';\nimport stripNonTextTags from '@axyseo/languageProcessing/helpers/sanitize/stripNonTextTags';\n\n/**\n * Compares the first word of each sentence with the first word of the following sentence.\n *\n * @param {string} currentSentenceBeginning The first word of the current sentence.\n * @param {string} nextSentenceBeginning The first word of the next sentence.\n * @returns {boolean} Returns true if sentence beginnings match.\n */\nconst startsWithSameWord = function(currentSentenceBeginning, nextSentenceBeginning) {\n return !isEmpty(currentSentenceBeginning) && currentSentenceBeginning === nextSentenceBeginning;\n};\n\n/**\n * Counts the number of similar sentence beginnings.\n *\n * @param {Array} sentenceBeginnings The array containing the first word of each sentence.\n * @param {Array} sentences The array containing all sentences.\n * @returns {Array} The array containing the objects containing the first words and the corresponding counts.\n */\nconst compareFirstWords = function(sentenceBeginnings, sentences) {\n const consecutiveFirstWords = [];\n let foundSentences = [];\n let sameBeginnings = 1;\n\n forEach(sentenceBeginnings, function(beginning, i) {\n const currentSentenceBeginning = beginning;\n const nextSentenceBeginning = sentenceBeginnings[i + 1];\n foundSentences.push(sentences[i]);\n\n if (startsWithSameWord(currentSentenceBeginning, nextSentenceBeginning)) {\n sameBeginnings++;\n } else {\n consecutiveFirstWords.push({\n word: currentSentenceBeginning,\n count: sameBeginnings,\n sentences: foundSentences\n });\n sameBeginnings = 1;\n foundSentences = [];\n }\n });\n\n return consecutiveFirstWords;\n};\n\n/**\n * Retrieves the first word from the sentence. If the first or second word is on an exception list of words that should not be considered as sentence\n * beginnings, the following word is also retrieved.\n *\n * @param {string} sentence The sentence to retrieve the first word from.\n * @param {Array} firstWordExceptions First word exceptions to match against.\n * @param {Array} secondWordExceptions Second word exceptions to match against.\n * @param {function}\tgetWordsCustomHelper The language-specific helper function to retrieve words from text.\n *\n * @returns {string} The first word of the sentence.\n */\nfunction getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n) {\n const stripped = stripTags(stripSpaces(sentence));\n let words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n\n words = words.filter(word => /^\\p{L}/u.test(word));\n\n if (words.length === 0) {\n return '';\n }\n\n let firstWord = words[0].toLocaleLowerCase();\n\n if (firstWordExceptions.indexOf(firstWord) > -1 && words.length > 1) {\n firstWord = firstWord + ' ' + words[1];\n if (secondWordExceptions) {\n if (secondWordExceptions.includes(words[1])) {\n firstWord = firstWord + ' ' + words[2];\n }\n }\n }\n\n return firstWord;\n}\n\n/**\n * Gets the first word of each sentence from the text, and returns an object containing the first word of each sentence and the corresponding counts.\n *\n * @param {Paper} paper The Paper object to get the text from.\n * @param {Researcher} researcher The researcher this research is a part of.\n *\n * @returns {Object} The object containing the first word of each sentence and the corresponding counts.\n */\nexport default function(paper, researcher) {\n const firstWordExceptions = researcher.getConfig('firstWordExceptions');\n const secondWordExceptions = researcher.getConfig('secondWordExceptions');\n const getWordsCustomHelper = researcher.getHelper('getWordsCustomHelper');\n const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');\n\n let text = paper.getText();\n text = removeHtmlBlocks(text);\n text = stripNonTextTags(text);\n text = filterShortcodesFromHTML(text, paper._attributes && paper._attributes.shortcodes);\n\n // Remove any HTML whitespace padding and replace it with a single whitespace.\n text = text.replace(/[\\s\\n]+/g, ' ');\n\n // Exclude text inside tables.\n text = text.replace(/<figure class='wp-block-table'>.*<\\/figure>/gs, '');\n\n let sentences = getSentences(text, memoizedTokenizer);\n\n let sentenceBeginnings = sentences.map(function(sentence) {\n return getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n );\n });\n\n sentences = sentences.filter(function(sentence) {\n const stripped = stripSpaces(sentence);\n const words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n return words.length > 0;\n });\n sentenceBeginnings = filter(sentenceBeginnings);\n\n return compareFirstWords(sentenceBeginnings, sentences);\n}\n"],"mappings":"AAAA,OAAOA,QAAQ;AACf,OAAOC,YAAY;AACnB,OAAOC,WAAW;AAClB,SAAQC,aAAa,IAAIC,SAAS;AAElC,SAAQC,MAAM,EAAEC,OAAO,EAAEC,OAAO,QAAO,QAAQ;AAC/C,OAAOC,gBAAgB;AACvB,SAAQC,wBAAwB;AAChC,OAAOC,gBAAgB;;AAEvB;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,kBAAkB,GAAG,SAAAA,CAASC,wBAAwB,EAAEC,qBAAqB,EAAE;EACnF,OAAO,CAACN,OAAO,CAACK,wBAAwB,CAAC,IAAIA,wBAAwB,KAAKC,qBAAqB;AACjG,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,iBAAiB,GAAG,SAAAA,CAASC,kBAAkB,EAAEC,SAAS,EAAE;EAChE,MAAMC,qBAAqB,GAAG,EAAE;EAChC,IAAIC,cAAc,GAAG,EAAE;EACvB,IAAIC,cAAc,GAAG,CAAC;EAEtBb,OAAO,CAACS,kBAAkB,EAAE,UAASK,SAAS,EAAEC,CAAC,EAAE;IACjD,MAAMT,wBAAwB,GAAGQ,SAAS;IAC1C,MAAMP,qBAAqB,GAAGE,kBAAkB,CAACM,CAAC,GAAG,CAAC,CAAC;IACvDH,cAAc,CAACI,IAAI,CAACN,SAAS,CAACK,CAAC,CAAC,CAAC;IAEjC,IAAIV,kBAAkB,CAACC,wBAAwB,EAAEC,qBAAqB,CAAC,EAAE;MACvEM,cAAc,EAAE;IAClB,CAAC,MAAM;MACLF,qBAAqB,CAACK,IAAI,CAAC;QACzBC,IAAI,EAAEX,wBAAwB;QAC9BY,KAAK,EAAEL,cAAc;QACrBH,SAAS,EAAEE;MACb,CAAC,CAAC;MACFC,cAAc,GAAG,CAAC;MAClBD,cAAc,GAAG,EAAE;IACrB;EACF,CAAC,CAAC;EAEF,OAAOD,qBAAqB;AAC9B,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASQ,oBAAoBA,CAC3BC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBAAoB,EACpB;EACA,MAAMC,QAAQ,GAAG1B,SAAS,CAACF,WAAW,CAACwB,QAAQ,CAAC,CAAC;EACjD,IAAIK,KAAK,GAAGF,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG9B,QAAQ,CAAC8B,QAAQ,CAAC;EAEtFC,KAAK,GAAGA,KAAK,CAAC1B,MAAM,CAACkB,IAAI,IAAI,SAAS,CAACS,IAAI,CAACT,IAAI,CAAC,CAAC;EAElD,IAAIQ,KAAK,CAACE,MAAM,KAAK,CAAC,EAAE;IACtB,OAAO,EAAE;EACX;EAEA,IAAIC,SAAS,GAAGH,KAAK,CAAC,CAAC,CAAC,CAACI,iBAAiB,CAAC,CAAC;EAE5C,IAAIR,mBAAmB,CAACS,OAAO,CAACF,SAAS,CAAC,GAAG,CAAC,CAAC,IAAIH,KAAK,CAACE,MAAM,GAAG,CAAC,EAAE;IACnEC,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGH,KAAK,CAAC,CAAC,CAAC;IACtC,IAAIH,oBAAoB,EAAE;MACxB,IAAIA,oBAAoB,CAACS,QAAQ,CAACN,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;QAC3CG,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGH,KAAK,CAAC,CAAC,CAAC;MACxC;IACF;EACF;EAEA,OAAOG,SAAS;AAClB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASI,KAAK,EAAEC,UAAU,EAAE;EACzC,MAAMZ,mBAAmB,GAAGY,UAAU,CAACC,SAAS,CAAC,qBAAqB,CAAC;EACvE,MAAMZ,oBAAoB,GAAGW,UAAU,CAACC,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMX,oBAAoB,GAAGU,UAAU,CAACE,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMC,iBAAiB,GAAGH,UAAU,CAACE,SAAS,CAAC,mBAAmB,CAAC;EAEnE,IAAIE,IAAI,GAAGL,KAAK,CAACM,OAAO,CAAC,CAAC;EAC1BD,IAAI,GAAGnC,gBAAgB,CAACmC,IAAI,CAAC;EAC7BA,IAAI,GAAGjC,gBAAgB,CAACiC,IAAI,CAAC;EAC7BA,IAAI,GAAGlC,wBAAwB,CAACkC,IAAI,EAAEL,KAAK,CAACO,WAAW,IAAIP,KAAK,CAACO,WAAW,CAACC,UAAU,CAAC;;EAExF;EACAH,IAAI,GAAGA,IAAI,CAACI,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;;EAEpC;EACAJ,IAAI,GAAGA,IAAI,CAACI,OAAO,CAAC,+CAA+C,EAAE,EAAE,CAAC;EAExE,IAAI/B,SAAS,GAAGf,YAAY,CAAC0C,IAAI,EAAED,iBAAiB,CAAC;EAErD,IAAI3B,kBAAkB,GAAGC,SAAS,CAACgC,GAAG,CAAC,UAAStB,QAAQ,EAAE;IACxD,OAAOD,oBAAoB,CACzBC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBACF,CAAC;EACH,CAAC,CAAC;EAEFb,SAAS,GAAGA,SAAS,CAACX,MAAM,CAAC,UAASqB,QAAQ,EAAE;IAC9C,MAAMI,QAAQ,GAAG5B,WAAW,CAACwB,QAAQ,CAAC;IACtC,MAAMK,KAAK,GAAGF,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG9B,QAAQ,CAAC8B,QAAQ,CAAC;IACxF,OAAOC,KAAK,CAACE,MAAM,GAAG,CAAC;EACzB,CAAC,CAAC;EACFlB,kBAAkB,GAAGV,MAAM,CAACU,kBAAkB,CAAC;EAE/C,OAAOD,iBAAiB,CAACC,kBAAkB,EAAEC,SAAS,CAAC;AACzD","ignoreList":[]}
1
+ {"version":3,"file":"getSentenceBeginnings.js","names":["getWords","getSentences","stripSpaces","stripFullTags","stripTags","filter","forEach","isEmpty","removeHtmlBlocks","filterShortcodesFromHTML","stripNonTextTags","startsWithSameWord","currentSentenceBeginning","nextSentenceBeginning","compareFirstWords","sentenceBeginnings","sentences","consecutiveFirstWords","foundSentences","sameBeginnings","beginning","i","push","word","count","getSentenceBeginning","sentence","firstWordExceptions","secondWordExceptions","getWordsCustomHelper","stripped","words","test","length","firstWord","toLocaleLowerCase","indexOf","includes","paper","researcher","getConfig","getHelper","memoizedTokenizer","text","getText","_attributes","shortcodes","replace","sentenceData","map","filteredSentenceData","item","strippedNoSpaces","isDigitsOnly","filteredSentences"],"sources":["../../../../src/languageProcessing/researches/getSentenceBeginnings.js"],"sourcesContent":["import getWords from '../helpers/word/getWords.js';\nimport getSentences from '../helpers/sentence/getSentences';\nimport stripSpaces from '../helpers/sanitize/stripSpaces.js';\nimport {stripFullTags as stripTags} from '../helpers/sanitize/stripHTMLTags.js';\n\nimport {filter, forEach, isEmpty} from 'lodash';\nimport removeHtmlBlocks from '../helpers/html/htmlParser';\nimport {filterShortcodesFromHTML} from '../helpers';\nimport stripNonTextTags from '@axyseo/languageProcessing/helpers/sanitize/stripNonTextTags';\n\n/**\n * Compares the first word of each sentence with the first word of the following sentence.\n *\n * @param {string} currentSentenceBeginning The first word of the current sentence.\n * @param {string} nextSentenceBeginning The first word of the next sentence.\n * @returns {boolean} Returns true if sentence beginnings match.\n */\nconst startsWithSameWord = function(currentSentenceBeginning, nextSentenceBeginning) {\n return !isEmpty(currentSentenceBeginning) && currentSentenceBeginning === nextSentenceBeginning;\n};\n\n/**\n * Counts the number of similar sentence beginnings.\n *\n * @param {Array} sentenceBeginnings The array containing the first word of each sentence.\n * @param {Array} sentences The array containing all sentences.\n * @returns {Array} The array containing the objects containing the first words and the corresponding counts.\n */\nconst compareFirstWords = function(sentenceBeginnings, sentences) {\n const consecutiveFirstWords = [];\n let foundSentences = [];\n let sameBeginnings = 1;\n\n forEach(sentenceBeginnings, function(beginning, i) {\n const currentSentenceBeginning = beginning;\n const nextSentenceBeginning = sentenceBeginnings[i + 1];\n foundSentences.push(sentences[i]);\n\n if (startsWithSameWord(currentSentenceBeginning, nextSentenceBeginning)) {\n sameBeginnings++;\n } else {\n consecutiveFirstWords.push({\n word: currentSentenceBeginning,\n count: sameBeginnings,\n sentences: foundSentences\n });\n sameBeginnings = 1;\n foundSentences = [];\n }\n });\n\n return consecutiveFirstWords;\n};\n\n/**\n * Retrieves the first word from the sentence. If the first or second word is on an exception list of words that should not be considered as sentence\n * beginnings, the following word is also retrieved.\n *\n * @param {string} sentence The sentence to retrieve the first word from.\n * @param {Array} firstWordExceptions First word exceptions to match against.\n * @param {Array} secondWordExceptions Second word exceptions to match against.\n * @param {function} getWordsCustomHelper The language-specific helper function to retrieve words from text.\n *\n * @returns {string} The first word of the sentence.\n */\nfunction getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n) {\n const stripped = stripTags(stripSpaces(sentence));\n let words = getWordsCustomHelper ? getWordsCustomHelper(stripped) : getWords(stripped);\n\n words = words.filter(word => /^\\p{L}/u.test(word));\n\n if (words.length === 0) {\n return '';\n }\n\n let firstWord = words[0].toLocaleLowerCase();\n\n if (firstWordExceptions.indexOf(firstWord) > -1 && words.length > 1) {\n firstWord = firstWord + ' ' + words[1];\n if (secondWordExceptions) {\n if (secondWordExceptions.includes(words[1])) {\n firstWord = firstWord + ' ' + words[2];\n }\n }\n }\n\n return firstWord;\n}\n\n/**\n * Gets the first word of each sentence from the text, and returns an object containing the first word of each sentence and the corresponding counts.\n *\n * @param {Paper} paper The Paper object to get the text from.\n * @param {Researcher} researcher The researcher this research is a part of.\n *\n * @returns {Object} The object containing the first word of each sentence and the corresponding counts.\n */\nexport default function(paper, researcher) {\n const firstWordExceptions = researcher.getConfig('firstWordExceptions');\n const secondWordExceptions = researcher.getConfig('secondWordExceptions');\n const getWordsCustomHelper = researcher.getHelper('getWordsCustomHelper');\n const memoizedTokenizer = researcher.getHelper('memoizedTokenizer');\n\n let text = paper.getText();\n text = removeHtmlBlocks(text);\n text = stripNonTextTags(text);\n text = filterShortcodesFromHTML(text, paper._attributes && paper._attributes.shortcodes);\n\n // Remove any HTML whitespace padding and replace it with a single whitespace.\n text = text.replace(/[\\s\\n]+/g, ' ');\n\n // Exclude text inside tables.\n text = text.replace(/<figure class='wp-block-table'>.*<\\/figure>/gs, '');\n\n let sentences = getSentences(text, memoizedTokenizer);\n\n const sentenceData = sentences.map(function(sentence) {\n const beginning = getSentenceBeginning(\n sentence,\n firstWordExceptions,\n secondWordExceptions,\n getWordsCustomHelper\n );\n\n return {sentence, beginning};\n });\n\n const filteredSentenceData = sentenceData.filter(function(item) {\n if (!item.beginning) return false;\n\n const stripped = stripTags(stripSpaces(item.sentence));\n const strippedNoSpaces = stripped.replace(/\\s+/g, '');\n const isDigitsOnly = strippedNoSpaces.length > 0 && /^[0-9]+$/.test(strippedNoSpaces);\n\n return !isDigitsOnly;\n });\n\n const filteredSentences = filteredSentenceData.map(item => item.sentence);\n const sentenceBeginnings = filteredSentenceData.map(item => item.beginning);\n\n return compareFirstWords(sentenceBeginnings, filteredSentences);\n}\n"],"mappings":"AAAA,OAAOA,QAAQ;AACf,OAAOC,YAAY;AACnB,OAAOC,WAAW;AAClB,SAAQC,aAAa,IAAIC,SAAS;AAElC,SAAQC,MAAM,EAAEC,OAAO,EAAEC,OAAO,QAAO,QAAQ;AAC/C,OAAOC,gBAAgB;AACvB,SAAQC,wBAAwB;AAChC,OAAOC,gBAAgB;;AAEvB;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,kBAAkB,GAAG,SAAAA,CAASC,wBAAwB,EAAEC,qBAAqB,EAAE;EACnF,OAAO,CAACN,OAAO,CAACK,wBAAwB,CAAC,IAAIA,wBAAwB,KAAKC,qBAAqB;AACjG,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA,MAAMC,iBAAiB,GAAG,SAAAA,CAASC,kBAAkB,EAAEC,SAAS,EAAE;EAChE,MAAMC,qBAAqB,GAAG,EAAE;EAChC,IAAIC,cAAc,GAAG,EAAE;EACvB,IAAIC,cAAc,GAAG,CAAC;EAEtBb,OAAO,CAACS,kBAAkB,EAAE,UAASK,SAAS,EAAEC,CAAC,EAAE;IACjD,MAAMT,wBAAwB,GAAGQ,SAAS;IAC1C,MAAMP,qBAAqB,GAAGE,kBAAkB,CAACM,CAAC,GAAG,CAAC,CAAC;IACvDH,cAAc,CAACI,IAAI,CAACN,SAAS,CAACK,CAAC,CAAC,CAAC;IAEjC,IAAIV,kBAAkB,CAACC,wBAAwB,EAAEC,qBAAqB,CAAC,EAAE;MACvEM,cAAc,EAAE;IAClB,CAAC,MAAM;MACLF,qBAAqB,CAACK,IAAI,CAAC;QACzBC,IAAI,EAAEX,wBAAwB;QAC9BY,KAAK,EAAEL,cAAc;QACrBH,SAAS,EAAEE;MACb,CAAC,CAAC;MACFC,cAAc,GAAG,CAAC;MAClBD,cAAc,GAAG,EAAE;IACrB;EACF,CAAC,CAAC;EAEF,OAAOD,qBAAqB;AAC9B,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASQ,oBAAoBA,CAC3BC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBAAoB,EACpB;EACA,MAAMC,QAAQ,GAAG1B,SAAS,CAACF,WAAW,CAACwB,QAAQ,CAAC,CAAC;EACjD,IAAIK,KAAK,GAAGF,oBAAoB,GAAGA,oBAAoB,CAACC,QAAQ,CAAC,GAAG9B,QAAQ,CAAC8B,QAAQ,CAAC;EAEtFC,KAAK,GAAGA,KAAK,CAAC1B,MAAM,CAACkB,IAAI,IAAI,SAAS,CAACS,IAAI,CAACT,IAAI,CAAC,CAAC;EAElD,IAAIQ,KAAK,CAACE,MAAM,KAAK,CAAC,EAAE;IACtB,OAAO,EAAE;EACX;EAEA,IAAIC,SAAS,GAAGH,KAAK,CAAC,CAAC,CAAC,CAACI,iBAAiB,CAAC,CAAC;EAE5C,IAAIR,mBAAmB,CAACS,OAAO,CAACF,SAAS,CAAC,GAAG,CAAC,CAAC,IAAIH,KAAK,CAACE,MAAM,GAAG,CAAC,EAAE;IACnEC,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGH,KAAK,CAAC,CAAC,CAAC;IACtC,IAAIH,oBAAoB,EAAE;MACxB,IAAIA,oBAAoB,CAACS,QAAQ,CAACN,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;QAC3CG,SAAS,GAAGA,SAAS,GAAG,GAAG,GAAGH,KAAK,CAAC,CAAC,CAAC;MACxC;IACF;EACF;EAEA,OAAOG,SAAS;AAClB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAASI,KAAK,EAAEC,UAAU,EAAE;EACzC,MAAMZ,mBAAmB,GAAGY,UAAU,CAACC,SAAS,CAAC,qBAAqB,CAAC;EACvE,MAAMZ,oBAAoB,GAAGW,UAAU,CAACC,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMX,oBAAoB,GAAGU,UAAU,CAACE,SAAS,CAAC,sBAAsB,CAAC;EACzE,MAAMC,iBAAiB,GAAGH,UAAU,CAACE,SAAS,CAAC,mBAAmB,CAAC;EAEnE,IAAIE,IAAI,GAAGL,KAAK,CAACM,OAAO,CAAC,CAAC;EAC1BD,IAAI,GAAGnC,gBAAgB,CAACmC,IAAI,CAAC;EAC7BA,IAAI,GAAGjC,gBAAgB,CAACiC,IAAI,CAAC;EAC7BA,IAAI,GAAGlC,wBAAwB,CAACkC,IAAI,EAAEL,KAAK,CAACO,WAAW,IAAIP,KAAK,CAACO,WAAW,CAACC,UAAU,CAAC;;EAExF;EACAH,IAAI,GAAGA,IAAI,CAACI,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;;EAEpC;EACAJ,IAAI,GAAGA,IAAI,CAACI,OAAO,CAAC,+CAA+C,EAAE,EAAE,CAAC;EAExE,IAAI/B,SAAS,GAAGf,YAAY,CAAC0C,IAAI,EAAED,iBAAiB,CAAC;EAErD,MAAMM,YAAY,GAAGhC,SAAS,CAACiC,GAAG,CAAC,UAASvB,QAAQ,EAAE;IACpD,MAAMN,SAAS,GAAGK,oBAAoB,CACpCC,QAAQ,EACRC,mBAAmB,EACnBC,oBAAoB,EACpBC,oBACF,CAAC;IAED,OAAO;MAACH,QAAQ;MAAEN;IAAS,CAAC;EAC9B,CAAC,CAAC;EAEF,MAAM8B,oBAAoB,GAAGF,YAAY,CAAC3C,MAAM,CAAC,UAAS8C,IAAI,EAAE;IAC9D,IAAI,CAACA,IAAI,CAAC/B,SAAS,EAAE,OAAO,KAAK;IAEjC,MAAMU,QAAQ,GAAG1B,SAAS,CAACF,WAAW,CAACiD,IAAI,CAACzB,QAAQ,CAAC,CAAC;IACtD,MAAM0B,gBAAgB,GAAGtB,QAAQ,CAACiB,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IACrD,MAAMM,YAAY,GAAGD,gBAAgB,CAACnB,MAAM,GAAG,CAAC,IAAI,UAAU,CAACD,IAAI,CAACoB,gBAAgB,CAAC;IAErF,OAAO,CAACC,YAAY;EACtB,CAAC,CAAC;EAEF,MAAMC,iBAAiB,GAAGJ,oBAAoB,CAACD,GAAG,CAACE,IAAI,IAAIA,IAAI,CAACzB,QAAQ,CAAC;EACzE,MAAMX,kBAAkB,GAAGmC,oBAAoB,CAACD,GAAG,CAACE,IAAI,IAAIA,IAAI,CAAC/B,SAAS,CAAC;EAE3E,OAAON,iBAAiB,CAACC,kBAAkB,EAAEuC,iBAAiB,CAAC;AACjE","ignoreList":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "axyseo",
3
- "version": "2025.1.0-blog.3",
3
+ "version": "2025.1.0-blog.5",
4
4
  "main": "build/cjs/index.js",
5
5
  "module": "build/esm/index.js",
6
6
  "exports": {