axyseo 2.0.0-alpha.0.0.4 → 2.0.0-alpha.0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (637) hide show
  1. package/package.json +5 -2
  2. package/.browserslistrc +0 -1
  3. package/.gitattributes +0 -1
  4. package/babel.config.js +0 -3
  5. package/eslint.config.mjs +0 -119
  6. package/src/bundledPlugins/index.js +0 -5
  7. package/src/bundledPlugins/previouslyUsedKeywords.js +0 -192
  8. package/src/config/diacritics.js +0 -106
  9. package/src/config/getTransliterations.js +0 -1447
  10. package/src/config/transliterationsWPstyle.js +0 -774
  11. package/src/config/wordBoundaries.js +0 -23
  12. package/src/config/wordBoundariesWithoutPunctuation.js +0 -9
  13. package/src/const/analysis.js +0 -41
  14. package/src/errors/invalidType.js +0 -14
  15. package/src/errors/missingArgument.js +0 -14
  16. package/src/helpers/createMeasurementElement.js +0 -40
  17. package/src/helpers/domManipulation.js +0 -65
  18. package/src/helpers/errors.js +0 -26
  19. package/src/helpers/factory.js +0 -219
  20. package/src/helpers/formatNumber.js +0 -12
  21. package/src/helpers/formatString.js +0 -33
  22. package/src/helpers/getLanguagesWithWordComplexity.js +0 -8
  23. package/src/helpers/getLanguagesWithWordFormSupport.js +0 -11
  24. package/src/helpers/getWordComplexityConfig.js +0 -20
  25. package/src/helpers/getWordComplexityHelper.js +0 -20
  26. package/src/helpers/htmlEntities.js +0 -41
  27. package/src/helpers/includesAny.js +0 -19
  28. package/src/helpers/index.js +0 -127
  29. package/src/helpers/shortlinker/Shortlinker.js +0 -75
  30. package/src/helpers/shortlinker/index.js +0 -1
  31. package/src/helpers/shortlinker/singleton.js +0 -68
  32. package/src/helpers/types.js +0 -34
  33. package/src/index.js +0 -60
  34. package/src/languageProcessing/AbstractResearcher.js +0 -366
  35. package/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +0 -125
  36. package/src/languageProcessing/helpers/html/getFieldsToMark.js +0 -29
  37. package/src/languageProcessing/helpers/html/getSubheadingTexts.js +0 -47
  38. package/src/languageProcessing/helpers/html/getSubheadings.js +0 -95
  39. package/src/languageProcessing/helpers/html/html.js +0 -176
  40. package/src/languageProcessing/helpers/html/htmlParser.js +0 -145
  41. package/src/languageProcessing/helpers/html/matchParagraphs.js +0 -62
  42. package/src/languageProcessing/helpers/html/normalizeHTML.js +0 -16
  43. package/src/languageProcessing/helpers/image/getAltAttribute.js +0 -20
  44. package/src/languageProcessing/helpers/image/getImagesInTree.js +0 -16
  45. package/src/languageProcessing/helpers/image/imageInText.js +0 -19
  46. package/src/languageProcessing/helpers/index.js +0 -12
  47. package/src/languageProcessing/helpers/language/getLanguage.js +0 -9
  48. package/src/languageProcessing/helpers/link/checkNofollow.js +0 -38
  49. package/src/languageProcessing/helpers/link/getAnchorsFromText.js +0 -32
  50. package/src/languageProcessing/helpers/link/getLinkType.js +0 -32
  51. package/src/languageProcessing/helpers/match/findKeywordFormsInString.js +0 -101
  52. package/src/languageProcessing/helpers/match/isDoubleQuoted.js +0 -13
  53. package/src/languageProcessing/helpers/match/matchTextWithArray.js +0 -36
  54. package/src/languageProcessing/helpers/match/matchTextWithTransliteration.js +0 -58
  55. package/src/languageProcessing/helpers/match/matchTextWithWord.js +0 -45
  56. package/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +0 -164
  57. package/src/languageProcessing/helpers/match/processExactMatchRequest.js +0 -20
  58. package/src/languageProcessing/helpers/morphology/baseStemmer.js +0 -11
  59. package/src/languageProcessing/helpers/morphology/buildFormRule.js +0 -19
  60. package/src/languageProcessing/helpers/morphology/buildTopicStems.js +0 -169
  61. package/src/languageProcessing/helpers/morphology/createRulesFromArrays.js +0 -45
  62. package/src/languageProcessing/helpers/morphology/exceptionListHelpers.js +0 -65
  63. package/src/languageProcessing/helpers/morphology/findMatchingEndingInArray.js +0 -24
  64. package/src/languageProcessing/helpers/morphology/flattenSortLength.js +0 -14
  65. package/src/languageProcessing/helpers/morphology/getAllWordsFromPaper.js +0 -39
  66. package/src/languageProcessing/helpers/morphology/regexHelpers.js +0 -44
  67. package/src/languageProcessing/helpers/morphology/stemHelpers.js +0 -38
  68. package/src/languageProcessing/helpers/morphology/stemPrefixedFunctionWords.js +0 -31
  69. package/src/languageProcessing/helpers/passiveVoice/periphrastic/directPrecedenceException.js +0 -36
  70. package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/getClausesSplitOnStopWords.js +0 -113
  71. package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/nonDirectParticiplePrecedenceException.js +0 -45
  72. package/src/languageProcessing/helpers/passiveVoice/periphrastic/getClauses.js +0 -231
  73. package/src/languageProcessing/helpers/passiveVoice/periphrastic/getIndicesWithRegex.js +0 -20
  74. package/src/languageProcessing/helpers/passiveVoice/periphrastic/matchRegularParticiples.js +0 -23
  75. package/src/languageProcessing/helpers/passiveVoice/periphrastic/precedenceException.js +0 -40
  76. package/src/languageProcessing/helpers/prominentWords/determineProminentWords.js +0 -238
  77. package/src/languageProcessing/helpers/regex/createRegexFromArray.js +0 -35
  78. package/src/languageProcessing/helpers/regex/createRegexFromDoubleArray.js +0 -34
  79. package/src/languageProcessing/helpers/regex/createWordRegex.js +0 -30
  80. package/src/languageProcessing/helpers/regex/matchStringWithRegex.js +0 -19
  81. package/src/languageProcessing/helpers/regex/searchAndReplaceWithOneRegex.js +0 -14
  82. package/src/languageProcessing/helpers/sanitize/doubleQuotes.js +0 -12
  83. package/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +0 -131
  84. package/src/languageProcessing/helpers/sanitize/mergeListItems.js +0 -24
  85. package/src/languageProcessing/helpers/sanitize/parseSynonyms.js +0 -20
  86. package/src/languageProcessing/helpers/sanitize/quotes.js +0 -46
  87. package/src/languageProcessing/helpers/sanitize/removeEmailAddresses.js +0 -12
  88. package/src/languageProcessing/helpers/sanitize/removePunctuation.js +0 -64
  89. package/src/languageProcessing/helpers/sanitize/removePunctuationExceptQuotes.js +0 -18
  90. package/src/languageProcessing/helpers/sanitize/removeSentenceTerminators.js +0 -13
  91. package/src/languageProcessing/helpers/sanitize/removeURLs.js +0 -13
  92. package/src/languageProcessing/helpers/sanitize/sanitizeLineBreakTag.js +0 -11
  93. package/src/languageProcessing/helpers/sanitize/sanitizeString.js +0 -18
  94. package/src/languageProcessing/helpers/sanitize/stripHTMLTags.js +0 -57
  95. package/src/languageProcessing/helpers/sanitize/stripNonTextTags.js +0 -15
  96. package/src/languageProcessing/helpers/sanitize/stripNumbers.js +0 -21
  97. package/src/languageProcessing/helpers/sanitize/stripSpaces.js +0 -23
  98. package/src/languageProcessing/helpers/sanitize/stripWordBoundaries.js +0 -65
  99. package/src/languageProcessing/helpers/sanitize/unifyWhitespace.js +0 -61
  100. package/src/languageProcessing/helpers/sentence/SentenceTokenizer.js +0 -640
  101. package/src/languageProcessing/helpers/sentence/countSentences.js +0 -20
  102. package/src/languageProcessing/helpers/sentence/getSentences.js +0 -65
  103. package/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +0 -55
  104. package/src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer.js +0 -28
  105. package/src/languageProcessing/helpers/sentence/sentencesLength.js +0 -31
  106. package/src/languageProcessing/helpers/syllables/DeviationFragment.js +0 -112
  107. package/src/languageProcessing/helpers/syllables/countSyllables.js +0 -182
  108. package/src/languageProcessing/helpers/syllables/syllableCountIterator.js +0 -56
  109. package/src/languageProcessing/helpers/syllables/syllableCountStep.js +0 -68
  110. package/src/languageProcessing/helpers/transform/transformWordsWithHyphens.js +0 -17
  111. package/src/languageProcessing/helpers/transliterate/replaceDiacritics.js +0 -22
  112. package/src/languageProcessing/helpers/transliterate/specialCharacterMappings.js +0 -214
  113. package/src/languageProcessing/helpers/transliterate/transliterate.js +0 -20
  114. package/src/languageProcessing/helpers/transliterate/transliterateWPstyle.js +0 -21
  115. package/src/languageProcessing/helpers/url/parseSlug.js +0 -10
  116. package/src/languageProcessing/helpers/url/url.js +0 -172
  117. package/src/languageProcessing/helpers/word/addWordboundary.js +0 -37
  118. package/src/languageProcessing/helpers/word/areWordsInSentence.js +0 -16
  119. package/src/languageProcessing/helpers/word/countMetaDescriptionLength.js +0 -18
  120. package/src/languageProcessing/helpers/word/countWords.js +0 -14
  121. package/src/languageProcessing/helpers/word/createPunctuationTokens.js +0 -42
  122. package/src/languageProcessing/helpers/word/filterWordsFromArray.js +0 -15
  123. package/src/languageProcessing/helpers/word/followsIndex.js +0 -25
  124. package/src/languageProcessing/helpers/word/getAllWordsFromTree.js +0 -23
  125. package/src/languageProcessing/helpers/word/getWords.js +0 -43
  126. package/src/languageProcessing/helpers/word/includesIndex.js +0 -30
  127. package/src/languageProcessing/helpers/word/indices.js +0 -146
  128. package/src/languageProcessing/helpers/word/markWordsInSentences.js +0 -173
  129. package/src/languageProcessing/helpers/word/matchWordInSentence.js +0 -61
  130. package/src/languageProcessing/helpers/word/splitIntoTokens.js +0 -46
  131. package/src/languageProcessing/index.js +0 -91
  132. package/src/languageProcessing/languages/_default/Researcher.js +0 -34
  133. package/src/languageProcessing/languages/_default/helpers/getStemmer.js +0 -11
  134. package/src/languageProcessing/languages/ar/Researcher.js +0 -46
  135. package/src/languageProcessing/languages/ar/config/firstWordExceptions.js +0 -14
  136. package/src/languageProcessing/languages/ar/config/functionWords.js +0 -329
  137. package/src/languageProcessing/languages/ar/config/internal/passiveVerbsWithLongVowel.js +0 -570
  138. package/src/languageProcessing/languages/ar/config/prefixedFunctionWords.js +0 -5
  139. package/src/languageProcessing/languages/ar/config/transitionWords.js +0 -19
  140. package/src/languageProcessing/languages/ar/config/twoPartTransitionWords.js +0 -7
  141. package/src/languageProcessing/languages/ar/helpers/createBasicWordForms.js +0 -32
  142. package/src/languageProcessing/languages/ar/helpers/getStemmer.js +0 -22
  143. package/src/languageProcessing/languages/ar/helpers/internal/stem.js +0 -632
  144. package/src/languageProcessing/languages/ar/helpers/isPassiveSentence.js +0 -33
  145. package/src/languageProcessing/languages/ca/Researcher.js +0 -43
  146. package/src/languageProcessing/languages/ca/config/sentenceLength.js +0 -3
  147. package/src/languageProcessing/languages/ca/config/transitionWords.js +0 -31
  148. package/src/languageProcessing/languages/ca/config/twoPartTransitionWords.js +0 -7
  149. package/src/languageProcessing/languages/ca/helpers/getStemmer.js +0 -11
  150. package/src/languageProcessing/languages/cs/Researcher.js +0 -44
  151. package/src/languageProcessing/languages/cs/config/firstWordExceptions.js +0 -15
  152. package/src/languageProcessing/languages/cs/config/functionWords.js +0 -121
  153. package/src/languageProcessing/languages/cs/config/internal/passiveVoiceAuxiliaries.js +0 -38
  154. package/src/languageProcessing/languages/cs/config/internal/passiveVoiceEndings.js +0 -54
  155. package/src/languageProcessing/languages/cs/config/stopWords.js +0 -42
  156. package/src/languageProcessing/languages/cs/config/transitionWords.js +0 -26
  157. package/src/languageProcessing/languages/cs/config/twoPartTransitionWords.js +0 -8
  158. package/src/languageProcessing/languages/cs/helpers/getClauses.js +0 -26
  159. package/src/languageProcessing/languages/cs/helpers/getStemmer.js +0 -22
  160. package/src/languageProcessing/languages/cs/helpers/internal/getParticiples.js +0 -16
  161. package/src/languageProcessing/languages/cs/helpers/internal/stem.js +0 -499
  162. package/src/languageProcessing/languages/cs/values/Clause.js +0 -34
  163. package/src/languageProcessing/languages/de/Researcher.js +0 -52
  164. package/src/languageProcessing/languages/de/config/firstWordExceptions.js +0 -17
  165. package/src/languageProcessing/languages/de/config/functionWords.js +0 -303
  166. package/src/languageProcessing/languages/de/config/internal/exceptionsParticiplesActive.js +0 -2231
  167. package/src/languageProcessing/languages/de/config/internal/passiveVoiceAuxiliaries.js +0 -96
  168. package/src/languageProcessing/languages/de/config/internal/passiveVoiceIrregulars.js +0 -368
  169. package/src/languageProcessing/languages/de/config/internal/passiveVoiceRegex.js +0 -72
  170. package/src/languageProcessing/languages/de/config/keyphraseLength.js +0 -11
  171. package/src/languageProcessing/languages/de/config/stopWords.js +0 -67
  172. package/src/languageProcessing/languages/de/config/syllables.json +0 -460
  173. package/src/languageProcessing/languages/de/config/transitionWords.js +0 -31
  174. package/src/languageProcessing/languages/de/config/twoPartTransitionWords.js +0 -12
  175. package/src/languageProcessing/languages/de/config/wordComplexity.js +0 -4
  176. package/src/languageProcessing/languages/de/helpers/calculateFleschReadingScore.js +0 -18
  177. package/src/languageProcessing/languages/de/helpers/checkIfWordIsComplex.js +0 -40
  178. package/src/languageProcessing/languages/de/helpers/checkIfWordIsFunction.js +0 -15
  179. package/src/languageProcessing/languages/de/helpers/getClauses.js +0 -25
  180. package/src/languageProcessing/languages/de/helpers/getStemmer.js +0 -22
  181. package/src/languageProcessing/languages/de/helpers/internal/SentenceTokenizer.js +0 -31
  182. package/src/languageProcessing/languages/de/helpers/internal/detectAndStemRegularParticiple.js +0 -128
  183. package/src/languageProcessing/languages/de/helpers/internal/determineStem.js +0 -128
  184. package/src/languageProcessing/languages/de/helpers/internal/getParticiples.js +0 -40
  185. package/src/languageProcessing/languages/de/helpers/internal/stem.js +0 -215
  186. package/src/languageProcessing/languages/de/helpers/memoizedSentenceTokenizer.js +0 -28
  187. package/src/languageProcessing/languages/de/values/Clause.js +0 -85
  188. package/src/languageProcessing/languages/el/Researcher.js +0 -46
  189. package/src/languageProcessing/languages/el/config/firstWordExceptions.js +0 -47
  190. package/src/languageProcessing/languages/el/config/functionWords.js +0 -116
  191. package/src/languageProcessing/languages/el/config/internal/auxiliaries.js +0 -19
  192. package/src/languageProcessing/languages/el/config/internal/morphologicalPassiveSuffixes.js +0 -87
  193. package/src/languageProcessing/languages/el/config/internal/nonPassiveVerbStems.js +0 -138
  194. package/src/languageProcessing/languages/el/config/stopWords.js +0 -854
  195. package/src/languageProcessing/languages/el/config/transitionWords.js +0 -26
  196. package/src/languageProcessing/languages/el/config/twoPartTransitionWords.js +0 -10
  197. package/src/languageProcessing/languages/el/helpers/getClauses.js +0 -25
  198. package/src/languageProcessing/languages/el/helpers/getStemmer.js +0 -21
  199. package/src/languageProcessing/languages/el/helpers/internal/getParticiples.js +0 -20
  200. package/src/languageProcessing/languages/el/helpers/internal/stem.js +0 -368
  201. package/src/languageProcessing/languages/el/helpers/isPassiveSentence.js +0 -38
  202. package/src/languageProcessing/languages/el/values/Clause.js +0 -37
  203. package/src/languageProcessing/languages/en/Researcher.js +0 -46
  204. package/src/languageProcessing/languages/en/config/abbreviations.js +0 -55
  205. package/src/languageProcessing/languages/en/config/firstWordExceptions.js +0 -14
  206. package/src/languageProcessing/languages/en/config/functionWords.js +0 -186
  207. package/src/languageProcessing/languages/en/config/internal/passiveVoiceAuxiliaries.js +0 -44
  208. package/src/languageProcessing/languages/en/config/internal/passiveVoiceIrregulars.js +0 -354
  209. package/src/languageProcessing/languages/en/config/internal/passiveVoiceNonVerbEndingEd.js +0 -3047
  210. package/src/languageProcessing/languages/en/config/regularParticiplesRegex.js +0 -5
  211. package/src/languageProcessing/languages/en/config/stopWords.js +0 -52
  212. package/src/languageProcessing/languages/en/config/syllables.json +0 -86
  213. package/src/languageProcessing/languages/en/config/transitionWords.js +0 -48
  214. package/src/languageProcessing/languages/en/config/twoPartTransitionWords.js +0 -7
  215. package/src/languageProcessing/languages/en/config/wordComplexity.js +0 -5
  216. package/src/languageProcessing/languages/en/helpers/calculateFleschReadingScore.js +0 -18
  217. package/src/languageProcessing/languages/en/helpers/checkIfWordIsComplex.js +0 -43
  218. package/src/languageProcessing/languages/en/helpers/getClauses.js +0 -49
  219. package/src/languageProcessing/languages/en/helpers/getStemmer.js +0 -22
  220. package/src/languageProcessing/languages/en/helpers/internal/determineStem.js +0 -178
  221. package/src/languageProcessing/languages/en/helpers/internal/getAdjectiveStem.js +0 -162
  222. package/src/languageProcessing/languages/en/helpers/internal/getParticiples.js +0 -25
  223. package/src/languageProcessing/languages/en/helpers/internal/getVerbStem.js +0 -237
  224. package/src/languageProcessing/languages/en/values/Clause.js +0 -68
  225. package/src/languageProcessing/languages/es/Researcher.js +0 -48
  226. package/src/languageProcessing/languages/es/config/firstWordExceptions.js +0 -16
  227. package/src/languageProcessing/languages/es/config/functionWords.js +0 -321
  228. package/src/languageProcessing/languages/es/config/internal/passiveVoiceAuxiliaries.js +0 -60
  229. package/src/languageProcessing/languages/es/config/internal/passiveVoiceParticiples.js +0 -7327
  230. package/src/languageProcessing/languages/es/config/sentenceLength.js +0 -3
  231. package/src/languageProcessing/languages/es/config/stopWords.js +0 -33
  232. package/src/languageProcessing/languages/es/config/syllables.json +0 -176
  233. package/src/languageProcessing/languages/es/config/transitionWords.js +0 -40
  234. package/src/languageProcessing/languages/es/config/twoPartTransitionWords.js +0 -10
  235. package/src/languageProcessing/languages/es/config/wordComplexity.js +0 -4
  236. package/src/languageProcessing/languages/es/helpers/calculateFleschReadingScore.js +0 -18
  237. package/src/languageProcessing/languages/es/helpers/checkIfWordIsComplex.js +0 -56
  238. package/src/languageProcessing/languages/es/helpers/getClauses.js +0 -29
  239. package/src/languageProcessing/languages/es/helpers/getStemmer.js +0 -22
  240. package/src/languageProcessing/languages/es/helpers/internal/checkVerbStemModifications.js +0 -41
  241. package/src/languageProcessing/languages/es/helpers/internal/getParticiples.js +0 -35
  242. package/src/languageProcessing/languages/es/helpers/internal/stem.js +0 -793
  243. package/src/languageProcessing/languages/es/values/Clause.js +0 -47
  244. package/src/languageProcessing/languages/fa/Researcher.js +0 -47
  245. package/src/languageProcessing/languages/fa/config/firstWordExceptions.js +0 -12
  246. package/src/languageProcessing/languages/fa/config/functionWords.js +0 -122
  247. package/src/languageProcessing/languages/fa/config/internal/participles.js +0 -1429
  248. package/src/languageProcessing/languages/fa/config/sentenceLength.js +0 -3
  249. package/src/languageProcessing/languages/fa/config/transitionWords.js +0 -20
  250. package/src/languageProcessing/languages/fa/config/twoPartTransitionWords.js +0 -9
  251. package/src/languageProcessing/languages/fa/helpers/createBasicWordForms.js +0 -97
  252. package/src/languageProcessing/languages/fa/helpers/getStemmer.js +0 -13
  253. package/src/languageProcessing/languages/fa/helpers/isPassiveSentence.js +0 -14
  254. package/src/languageProcessing/languages/fr/Researcher.js +0 -46
  255. package/src/languageProcessing/languages/fr/config/firstWordExceptions.js +0 -16
  256. package/src/languageProcessing/languages/fr/config/functionWords.js +0 -281
  257. package/src/languageProcessing/languages/fr/config/internal/exceptionsParticiplesActive.js +0 -1510
  258. package/src/languageProcessing/languages/fr/config/internal/passiveVoiceAuxiliaries.js +0 -108
  259. package/src/languageProcessing/languages/fr/config/internal/passiveVoiceIrregulars.js +0 -565
  260. package/src/languageProcessing/languages/fr/config/stopWords.js +0 -119
  261. package/src/languageProcessing/languages/fr/config/syllables.json +0 -1426
  262. package/src/languageProcessing/languages/fr/config/transitionWords.js +0 -59
  263. package/src/languageProcessing/languages/fr/config/twoPartTransitionWords.js +0 -15
  264. package/src/languageProcessing/languages/fr/config/wordComplexity.js +0 -4
  265. package/src/languageProcessing/languages/fr/helpers/calculateFleschReadingScore.js +0 -18
  266. package/src/languageProcessing/languages/fr/helpers/checkIfWordIsComplex.js +0 -67
  267. package/src/languageProcessing/languages/fr/helpers/getClauses.js +0 -34
  268. package/src/languageProcessing/languages/fr/helpers/getStemmer.js +0 -22
  269. package/src/languageProcessing/languages/fr/helpers/internal/getParticiples.js +0 -72
  270. package/src/languageProcessing/languages/fr/helpers/internal/stem.js +0 -633
  271. package/src/languageProcessing/languages/fr/values/Clause.js +0 -96
  272. package/src/languageProcessing/languages/he/Researcher.js +0 -50
  273. package/src/languageProcessing/languages/he/config/firstWordExceptions.js +0 -13
  274. package/src/languageProcessing/languages/he/config/functionWords.js +0 -564
  275. package/src/languageProcessing/languages/he/config/internal/regularRootsHufal.js +0 -186
  276. package/src/languageProcessing/languages/he/config/internal/regularRootsNifal.js +0 -195
  277. package/src/languageProcessing/languages/he/config/internal/regularRootsPual.js +0 -168
  278. package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsHufal.js +0 -188
  279. package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsNifal.js +0 -197
  280. package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsPual.js +0 -170
  281. package/src/languageProcessing/languages/he/config/prefixedFunctionWords.js +0 -2
  282. package/src/languageProcessing/languages/he/config/sentenceLength.js +0 -3
  283. package/src/languageProcessing/languages/he/config/transitionWords.js +0 -28
  284. package/src/languageProcessing/languages/he/config/twoPartTransitionWords.js +0 -8
  285. package/src/languageProcessing/languages/he/helpers/createBasicWordForms.js +0 -33
  286. package/src/languageProcessing/languages/he/helpers/getStemmer.js +0 -22
  287. package/src/languageProcessing/languages/he/helpers/internal/stem.js +0 -52
  288. package/src/languageProcessing/languages/he/helpers/isPassiveSentence.js +0 -96
  289. package/src/languageProcessing/languages/he/helpers/stem.js +0 -52
  290. package/src/languageProcessing/languages/hu/Researcher.js +0 -48
  291. package/src/languageProcessing/languages/hu/config/firstWordExceptions.js +0 -31
  292. package/src/languageProcessing/languages/hu/config/functionWords.js +0 -284
  293. package/src/languageProcessing/languages/hu/config/internal/auxiliaries.js +0 -97
  294. package/src/languageProcessing/languages/hu/config/internal/morphologicalPassiveAffixes.js +0 -125
  295. package/src/languageProcessing/languages/hu/config/internal/nonPassivesInVaAndVe.js +0 -265
  296. package/src/languageProcessing/languages/hu/config/internal/odikVerbs.js +0 -273
  297. package/src/languageProcessing/languages/hu/config/internal/participles.js +0 -412
  298. package/src/languageProcessing/languages/hu/config/stopWords.js +0 -213
  299. package/src/languageProcessing/languages/hu/config/transitionWords.js +0 -42
  300. package/src/languageProcessing/languages/hu/config/twoPartTransitionWords.js +0 -34
  301. package/src/languageProcessing/languages/hu/helpers/getClauses.js +0 -25
  302. package/src/languageProcessing/languages/hu/helpers/getStemmer.js +0 -22
  303. package/src/languageProcessing/languages/hu/helpers/internal/getParticiples.js +0 -21
  304. package/src/languageProcessing/languages/hu/helpers/internal/stem.js +0 -389
  305. package/src/languageProcessing/languages/hu/helpers/isPassiveSentence.js +0 -54
  306. package/src/languageProcessing/languages/hu/values/Clause.js +0 -41
  307. package/src/languageProcessing/languages/id/Researcher.js +0 -46
  308. package/src/languageProcessing/languages/id/config/firstWordExceptions.js +0 -13
  309. package/src/languageProcessing/languages/id/config/functionWords.js +0 -202
  310. package/src/languageProcessing/languages/id/config/internal/nonPassiveVerbsStartingDi.js +0 -215
  311. package/src/languageProcessing/languages/id/config/transitionWords.js +0 -62
  312. package/src/languageProcessing/languages/id/config/twoPartTransitionWords.js +0 -13
  313. package/src/languageProcessing/languages/id/helpers/getStemmer.js +0 -22
  314. package/src/languageProcessing/languages/id/helpers/internal/stem.js +0 -462
  315. package/src/languageProcessing/languages/id/helpers/internal/stemHelpers.js +0 -78
  316. package/src/languageProcessing/languages/id/helpers/isPassiveSentence.js +0 -39
  317. package/src/languageProcessing/languages/id/helpers/splitIntoTokensCustom.js +0 -47
  318. package/src/languageProcessing/languages/it/Researcher.js +0 -48
  319. package/src/languageProcessing/languages/it/config/firstWordExceptions.js +0 -17
  320. package/src/languageProcessing/languages/it/config/functionWords.js +0 -277
  321. package/src/languageProcessing/languages/it/config/internal/passiveVoiceAuxiliaries.js +0 -98
  322. package/src/languageProcessing/languages/it/config/internal/passiveVoiceParticiples.js +0 -7197
  323. package/src/languageProcessing/languages/it/config/sentenceLength.js +0 -3
  324. package/src/languageProcessing/languages/it/config/stopWords.js +0 -57
  325. package/src/languageProcessing/languages/it/config/syllables.json +0 -573
  326. package/src/languageProcessing/languages/it/config/transitionWords.js +0 -104
  327. package/src/languageProcessing/languages/it/config/twoPartTransitionWords.js +0 -9
  328. package/src/languageProcessing/languages/it/helpers/calculateFleschReadingScore.js +0 -15
  329. package/src/languageProcessing/languages/it/helpers/getClauses.js +0 -32
  330. package/src/languageProcessing/languages/it/helpers/getStemmer.js +0 -22
  331. package/src/languageProcessing/languages/it/helpers/internal/getParticiples.js +0 -34
  332. package/src/languageProcessing/languages/it/helpers/internal/stem.js +0 -436
  333. package/src/languageProcessing/languages/it/values/Clause.js +0 -47
  334. package/src/languageProcessing/languages/ja/Researcher.js +0 -86
  335. package/src/languageProcessing/languages/ja/config/assessmentApplicabilityCharacterCount.js +0 -4
  336. package/src/languageProcessing/languages/ja/config/firstWordExceptions.js +0 -8
  337. package/src/languageProcessing/languages/ja/config/functionWords.js +0 -563
  338. package/src/languageProcessing/languages/ja/config/keyphraseLength.js +0 -16
  339. package/src/languageProcessing/languages/ja/config/metaDescriptionLength.js +0 -4
  340. package/src/languageProcessing/languages/ja/config/paragraphLength.js +0 -10
  341. package/src/languageProcessing/languages/ja/config/sentenceLength.js +0 -4
  342. package/src/languageProcessing/languages/ja/config/subheadingsTooLong.js +0 -18
  343. package/src/languageProcessing/languages/ja/config/textLength.js +0 -47
  344. package/src/languageProcessing/languages/ja/config/topicLength.js +0 -5
  345. package/src/languageProcessing/languages/ja/config/transitionWords.js +0 -354
  346. package/src/languageProcessing/languages/ja/customResearches/findKeyphraseInSEOTitle.js +0 -98
  347. package/src/languageProcessing/languages/ja/customResearches/getKeyphraseLength.js +0 -19
  348. package/src/languageProcessing/languages/ja/customResearches/getWordForms.js +0 -50
  349. package/src/languageProcessing/languages/ja/customResearches/textLength.js +0 -24
  350. package/src/languageProcessing/languages/ja/helpers/countCharacters.js +0 -19
  351. package/src/languageProcessing/languages/ja/helpers/customGetStemmer.js +0 -21
  352. package/src/languageProcessing/languages/ja/helpers/getContentWords.js +0 -21
  353. package/src/languageProcessing/languages/ja/helpers/getWords.js +0 -31
  354. package/src/languageProcessing/languages/ja/helpers/internal/SentenceTokenizer.js +0 -102
  355. package/src/languageProcessing/languages/ja/helpers/internal/createWordForms.js +0 -68
  356. package/src/languageProcessing/languages/ja/helpers/internal/determineStem.js +0 -17
  357. package/src/languageProcessing/languages/ja/helpers/matchTextWithWord.js +0 -53
  358. package/src/languageProcessing/languages/ja/helpers/matchTransitionWords.js +0 -25
  359. package/src/languageProcessing/languages/ja/helpers/memoizedSentenceTokenizer.js +0 -28
  360. package/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js +0 -20
  361. package/src/languageProcessing/languages/ja/helpers/wordsCharacterCount.js +0 -13
  362. package/src/languageProcessing/languages/nb/Researcher.js +0 -45
  363. package/src/languageProcessing/languages/nb/config/firstWordExceptions.js +0 -12
  364. package/src/languageProcessing/languages/nb/config/functionWords.js +0 -106
  365. package/src/languageProcessing/languages/nb/config/internal/participles.js +0 -3127
  366. package/src/languageProcessing/languages/nb/config/internal/passiveVoiceAuxiliaries.js +0 -15
  367. package/src/languageProcessing/languages/nb/config/stopWords.js +0 -39
  368. package/src/languageProcessing/languages/nb/config/transitionWords.js +0 -21
  369. package/src/languageProcessing/languages/nb/config/twoPartTransitionWords.js +0 -10
  370. package/src/languageProcessing/languages/nb/helpers/getClauses.js +0 -28
  371. package/src/languageProcessing/languages/nb/helpers/getStemmer.js +0 -22
  372. package/src/languageProcessing/languages/nb/helpers/internal/getParticiples.js +0 -24
  373. package/src/languageProcessing/languages/nb/helpers/internal/stem.js +0 -133
  374. package/src/languageProcessing/languages/nb/values/Clause.js +0 -43
  375. package/src/languageProcessing/languages/nl/Researcher.js +0 -48
  376. package/src/languageProcessing/languages/nl/config/firstWordExceptions.js +0 -15
  377. package/src/languageProcessing/languages/nl/config/functionWords.js +0 -233
  378. package/src/languageProcessing/languages/nl/config/internal/nonParticiples.js +0 -2515
  379. package/src/languageProcessing/languages/nl/config/internal/passiveVoiceAuxiliaries.js +0 -13
  380. package/src/languageProcessing/languages/nl/config/internal/passiveVoiceIrregulars.js +0 -474
  381. package/src/languageProcessing/languages/nl/config/keyphraseLength.js +0 -10
  382. package/src/languageProcessing/languages/nl/config/stopWords.js +0 -35
  383. package/src/languageProcessing/languages/nl/config/syllables.json +0 -343
  384. package/src/languageProcessing/languages/nl/config/transitionWords.js +0 -22
  385. package/src/languageProcessing/languages/nl/config/twoPartTransitionWords.js +0 -8
  386. package/src/languageProcessing/languages/nl/helpers/calculateFleschReadingScore.js +0 -15
  387. package/src/languageProcessing/languages/nl/helpers/getClauses.js +0 -25
  388. package/src/languageProcessing/languages/nl/helpers/getStemmer.js +0 -22
  389. package/src/languageProcessing/languages/nl/helpers/internal/checkExceptionsWithFullForms.js +0 -128
  390. package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemRegularParticiple.js +0 -324
  391. package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemSuffixes.js +0 -164
  392. package/src/languageProcessing/languages/nl/helpers/internal/determineStem.js +0 -133
  393. package/src/languageProcessing/languages/nl/helpers/internal/getParticiples.js +0 -25
  394. package/src/languageProcessing/languages/nl/helpers/internal/getStemWordsWithTAndDEnding.js +0 -183
  395. package/src/languageProcessing/languages/nl/helpers/internal/stem.js +0 -146
  396. package/src/languageProcessing/languages/nl/helpers/internal/stemModificationHelpers.js +0 -109
  397. package/src/languageProcessing/languages/nl/helpers/internal/stemTOrDFromEndOfWord.js +0 -65
  398. package/src/languageProcessing/languages/nl/values/Clause.js +0 -62
  399. package/src/languageProcessing/languages/pl/Researcher.js +0 -47
  400. package/src/languageProcessing/languages/pl/config/firstWordExceptions.js +0 -12
  401. package/src/languageProcessing/languages/pl/config/functionWords.js +0 -421
  402. package/src/languageProcessing/languages/pl/config/internal/auxiliaries.js +0 -85
  403. package/src/languageProcessing/languages/pl/config/internal/participles.js +0 -26433
  404. package/src/languageProcessing/languages/pl/config/sentenceLength.js +0 -10
  405. package/src/languageProcessing/languages/pl/config/stopWords.js +0 -36
  406. package/src/languageProcessing/languages/pl/config/transitionWords.js +0 -42
  407. package/src/languageProcessing/languages/pl/config/twoPartTransitionWords.js +0 -8
  408. package/src/languageProcessing/languages/pl/helpers/getClauses.js +0 -25
  409. package/src/languageProcessing/languages/pl/helpers/getStemmer.js +0 -22
  410. package/src/languageProcessing/languages/pl/helpers/internal/getParticiples.js +0 -18
  411. package/src/languageProcessing/languages/pl/helpers/internal/stem.js +0 -161
  412. package/src/languageProcessing/languages/pl/values/Clause.js +0 -53
  413. package/src/languageProcessing/languages/pt/Researcher.js +0 -48
  414. package/src/languageProcessing/languages/pt/config/firstWordExceptions.js +0 -15
  415. package/src/languageProcessing/languages/pt/config/functionWords.js +0 -226
  416. package/src/languageProcessing/languages/pt/config/internal/passiveVoiceAuxiliaries.js +0 -66
  417. package/src/languageProcessing/languages/pt/config/internal/passiveVoiceParticiples.js +0 -4088
  418. package/src/languageProcessing/languages/pt/config/sentenceLength.js +0 -3
  419. package/src/languageProcessing/languages/pt/config/stopWords.js +0 -50
  420. package/src/languageProcessing/languages/pt/config/syllables.json +0 -38
  421. package/src/languageProcessing/languages/pt/config/transitionWords.js +0 -34
  422. package/src/languageProcessing/languages/pt/config/twoPartTransitionWords.js +0 -9
  423. package/src/languageProcessing/languages/pt/helpers/calculateFleschReadingScore.js +0 -15
  424. package/src/languageProcessing/languages/pt/helpers/getClauses.js +0 -29
  425. package/src/languageProcessing/languages/pt/helpers/getStemmer.js +0 -22
  426. package/src/languageProcessing/languages/pt/helpers/internal/getParticiples.js +0 -35
  427. package/src/languageProcessing/languages/pt/helpers/internal/stem.js +0 -319
  428. package/src/languageProcessing/languages/pt/values/Clause.js +0 -43
  429. package/src/languageProcessing/languages/ru/Researcher.js +0 -48
  430. package/src/languageProcessing/languages/ru/config/firstWordExceptions.js +0 -14
  431. package/src/languageProcessing/languages/ru/config/fleschReadingEaseScores.js +0 -20
  432. package/src/languageProcessing/languages/ru/config/functionWords.js +0 -519
  433. package/src/languageProcessing/languages/ru/config/internal/participlesShortenedList.js +0 -2914
  434. package/src/languageProcessing/languages/ru/config/internal/passiveVoiceParticiples.js +0 -6295
  435. package/src/languageProcessing/languages/ru/config/sentenceLength.js +0 -3
  436. package/src/languageProcessing/languages/ru/config/syllables.json +0 -19
  437. package/src/languageProcessing/languages/ru/config/transitionWords.js +0 -62
  438. package/src/languageProcessing/languages/ru/config/twoPartTransitionWords.js +0 -14
  439. package/src/languageProcessing/languages/ru/helpers/calculateFleschReadingScore.js +0 -16
  440. package/src/languageProcessing/languages/ru/helpers/getStemmer.js +0 -22
  441. package/src/languageProcessing/languages/ru/helpers/internal/stem.js +0 -288
  442. package/src/languageProcessing/languages/ru/helpers/isPassiveSentence.js +0 -14
  443. package/src/languageProcessing/languages/sk/Researcher.js +0 -46
  444. package/src/languageProcessing/languages/sk/config/firstWordExceptions.js +0 -14
  445. package/src/languageProcessing/languages/sk/config/functionWords.js +0 -855
  446. package/src/languageProcessing/languages/sk/config/internal/nonPassives.js +0 -1074
  447. package/src/languageProcessing/languages/sk/config/internal/passiveVoiceAuxiliaries.js +0 -22
  448. package/src/languageProcessing/languages/sk/config/stopWords.js +0 -34
  449. package/src/languageProcessing/languages/sk/config/transitionWords.js +0 -23
  450. package/src/languageProcessing/languages/sk/config/twoPartTransitionWords.js +0 -10
  451. package/src/languageProcessing/languages/sk/helpers/getClauses.js +0 -26
  452. package/src/languageProcessing/languages/sk/helpers/getStemmer.js +0 -22
  453. package/src/languageProcessing/languages/sk/helpers/internal/getParticiples.js +0 -16
  454. package/src/languageProcessing/languages/sk/helpers/internal/stem.js +0 -319
  455. package/src/languageProcessing/languages/sk/values/Clause.js +0 -39
  456. package/src/languageProcessing/languages/sv/Researcher.js +0 -45
  457. package/src/languageProcessing/languages/sv/config/firstWordExceptions.js +0 -15
  458. package/src/languageProcessing/languages/sv/config/functionWords.js +0 -176
  459. package/src/languageProcessing/languages/sv/config/internal/passiveVerbs.js +0 -10400
  460. package/src/languageProcessing/languages/sv/config/keyphraseLength.js +0 -11
  461. package/src/languageProcessing/languages/sv/config/transitionWords.js +0 -35
  462. package/src/languageProcessing/languages/sv/config/twoPartTransitionWords.js +0 -8
  463. package/src/languageProcessing/languages/sv/helpers/getStemmer.js +0 -22
  464. package/src/languageProcessing/languages/sv/helpers/internal/stem.js +0 -152
  465. package/src/languageProcessing/languages/sv/helpers/isPassiveSentence.js +0 -14
  466. package/src/languageProcessing/languages/tr/Researcher.js +0 -44
  467. package/src/languageProcessing/languages/tr/config/firstWordExceptions.js +0 -13
  468. package/src/languageProcessing/languages/tr/config/functionWords.js +0 -116
  469. package/src/languageProcessing/languages/tr/config/internal/nonPassiveExceptions.js +0 -574
  470. package/src/languageProcessing/languages/tr/config/internal/passiveEndings.js +0 -151
  471. package/src/languageProcessing/languages/tr/config/sentenceLength.js +0 -7
  472. package/src/languageProcessing/languages/tr/config/transitionWords.js +0 -42
  473. package/src/languageProcessing/languages/tr/config/twoPartTransitionWords.js +0 -7
  474. package/src/languageProcessing/languages/tr/helpers/getStemmer.js +0 -22
  475. package/src/languageProcessing/languages/tr/helpers/internal/stem.js +0 -20
  476. package/src/languageProcessing/languages/tr/helpers/isPassiveSentence.js +0 -43
  477. package/src/languageProcessing/researches/altTagCount.js +0 -70
  478. package/src/languageProcessing/researches/countSentencesFromText.js +0 -19
  479. package/src/languageProcessing/researches/findKeyphraseInSEOTitle.js +0 -257
  480. package/src/languageProcessing/researches/findKeywordInFirstParagraph.js +0 -86
  481. package/src/languageProcessing/researches/findTransitionWords.js +0 -123
  482. package/src/languageProcessing/researches/functionWordsInKeyphrase.js +0 -44
  483. package/src/languageProcessing/researches/getAnchorsWithKeyphrase.js +0 -227
  484. package/src/languageProcessing/researches/getFleschReadingScore.js +0 -150
  485. package/src/languageProcessing/researches/getKeywordDensity.js +0 -44
  486. package/src/languageProcessing/researches/getLinkStatistics.js +0 -54
  487. package/src/languageProcessing/researches/getLinks.js +0 -18
  488. package/src/languageProcessing/researches/getLongCenterAlignedTexts.js +0 -37
  489. package/src/languageProcessing/researches/getParagraphLength.js +0 -44
  490. package/src/languageProcessing/researches/getParagraphs.js +0 -18
  491. package/src/languageProcessing/researches/getPassiveVoiceResult.js +0 -129
  492. package/src/languageProcessing/researches/getProminentWordsForInsights.js +0 -48
  493. package/src/languageProcessing/researches/getProminentWordsForInternalLinking.js +0 -119
  494. package/src/languageProcessing/researches/getSentenceBeginnings.js +0 -124
  495. package/src/languageProcessing/researches/getSubheadingTextLengths.js +0 -59
  496. package/src/languageProcessing/researches/getWordForms.js +0 -204
  497. package/src/languageProcessing/researches/h1s.js +0 -10
  498. package/src/languageProcessing/researches/imageCount.js +0 -16
  499. package/src/languageProcessing/researches/index.js +0 -5
  500. package/src/languageProcessing/researches/keyphraseDistribution.js +0 -249
  501. package/src/languageProcessing/researches/keyphraseLength.js +0 -17
  502. package/src/languageProcessing/researches/keywordCount.js +0 -134
  503. package/src/languageProcessing/researches/keywordCountInUrl.js +0 -57
  504. package/src/languageProcessing/researches/matchKeywordInSubheadings.js +0 -62
  505. package/src/languageProcessing/researches/metaDescriptionKeyword.js +0 -85
  506. package/src/languageProcessing/researches/metaDescriptionLength.js +0 -12
  507. package/src/languageProcessing/researches/pageTitleWidth.js +0 -11
  508. package/src/languageProcessing/researches/readingTime.js +0 -82
  509. package/src/languageProcessing/researches/sentences.js +0 -20
  510. package/src/languageProcessing/researches/videoCount.js +0 -32
  511. package/src/languageProcessing/researches/wordComplexity.js +0 -129
  512. package/src/languageProcessing/researches/wordCountInText.js +0 -29
  513. package/src/languageProcessing/values/Clause.js +0 -108
  514. package/src/languageProcessing/values/ProminentWord.js +0 -95
  515. package/src/languageProcessing/values/Sentence.js +0 -111
  516. package/src/languageProcessing/values/index.js +0 -9
  517. package/src/markers/addMark.js +0 -9
  518. package/src/markers/addMarkSingleWord.js +0 -32
  519. package/src/markers/index.js +0 -7
  520. package/src/markers/removeDuplicateMarks.js +0 -27
  521. package/src/markers/removeMarks.js +0 -11
  522. package/src/parse/build/build.js +0 -52
  523. package/src/parse/build/index.js +0 -10
  524. package/src/parse/build/private/adapt.js +0 -113
  525. package/src/parse/build/private/adaptAttributes.js +0 -36
  526. package/src/parse/build/private/alwaysFilterElements.js +0 -75
  527. package/src/parse/build/private/combineIntoImplicitParagraphs.js +0 -130
  528. package/src/parse/build/private/filterBeforeTokenizing.js +0 -32
  529. package/src/parse/build/private/filterHelpers.js +0 -44
  530. package/src/parse/build/private/filterTree.js +0 -42
  531. package/src/parse/build/private/getTextElementPositions.js +0 -184
  532. package/src/parse/build/private/helpers/parseClassAttribute.js +0 -9
  533. package/src/parse/build/private/isPhrasingContent.js +0 -28
  534. package/src/parse/build/private/parseBlocks.js +0 -151
  535. package/src/parse/build/private/tokenize.js +0 -74
  536. package/src/parse/language/LanguageProcessor.js +0 -74
  537. package/src/parse/structure/Heading.js +0 -26
  538. package/src/parse/structure/Node.js +0 -69
  539. package/src/parse/structure/Paragraph.js +0 -48
  540. package/src/parse/structure/Sentence.js +0 -30
  541. package/src/parse/structure/SourceCodeLocation.js +0 -41
  542. package/src/parse/structure/Text.js +0 -27
  543. package/src/parse/structure/Token.js +0 -24
  544. package/src/parse/structure/index.js +0 -16
  545. package/src/parse/traverse/findAllInTree.js +0 -58
  546. package/src/parse/traverse/index.js +0 -12
  547. package/src/parse/traverse/innerText.js +0 -26
  548. package/src/parsedPaper/ParsedPaper.js +0 -92
  549. package/src/parsedPaper/assess/TreeAssessor.js +0 -184
  550. package/src/parsedPaper/assess/assessmentListFactories.js +0 -73
  551. package/src/parsedPaper/assess/assessments/Assessment.js +0 -79
  552. package/src/parsedPaper/assess/assessments/index.js +0 -6
  553. package/src/parsedPaper/assess/assessorFactories.js +0 -104
  554. package/src/parsedPaper/assess/cornerstone/assessmentListFactories.js +0 -47
  555. package/src/parsedPaper/assess/cornerstone/index.js +0 -5
  556. package/src/parsedPaper/assess/index.js +0 -20
  557. package/src/parsedPaper/build/PaperParser.js +0 -105
  558. package/src/parsedPaper/build/linguisticParsing/Sentence.js +0 -89
  559. package/src/parsedPaper/build/linguisticParsing/SentenceTokenizer.js +0 -323
  560. package/src/parsedPaper/build/linguisticParsing/parseText.js +0 -20
  561. package/src/parsedPaper/build/tree/TreeBuilder.js +0 -75
  562. package/src/parsedPaper/build/tree/cleanup/calculateTextIndices.js +0 -190
  563. package/src/parsedPaper/build/tree/cleanup/getElementContent.js +0 -21
  564. package/src/parsedPaper/build/tree/cleanup/postParsing.js +0 -37
  565. package/src/parsedPaper/build/tree/html/HTMLTreeConverter.js +0 -230
  566. package/src/parsedPaper/build/tree/html/buildTree.js +0 -31
  567. package/src/parsedPaper/build/tree/html/htmlConstants.js +0 -37
  568. package/src/parsedPaper/build/tree/index.js +0 -14
  569. package/src/parsedPaper/build/tree/metadata/buildTree.js +0 -32
  570. package/src/parsedPaper/research/TreeResearcher.js +0 -134
  571. package/src/parsedPaper/research/index.js +0 -13
  572. package/src/parsedPaper/research/researches/Headings.js +0 -20
  573. package/src/parsedPaper/research/researches/LinkStatistics.js +0 -128
  574. package/src/parsedPaper/research/researches/Research.js +0 -50
  575. package/src/parsedPaper/research/researches/index.js +0 -1
  576. package/src/parsedPaper/structure/tree/FormattingElement.js +0 -67
  577. package/src/parsedPaper/structure/tree/SourceCodeLocation.js +0 -31
  578. package/src/parsedPaper/structure/tree/TextContainer.js +0 -85
  579. package/src/parsedPaper/structure/tree/index.js +0 -22
  580. package/src/parsedPaper/structure/tree/nodes/Heading.js +0 -26
  581. package/src/parsedPaper/structure/tree/nodes/LeafNode.js +0 -75
  582. package/src/parsedPaper/structure/tree/nodes/List.js +0 -47
  583. package/src/parsedPaper/structure/tree/nodes/ListItem.js +0 -26
  584. package/src/parsedPaper/structure/tree/nodes/MetadataMiscellaneous.js +0 -46
  585. package/src/parsedPaper/structure/tree/nodes/MetadataText.js +0 -26
  586. package/src/parsedPaper/structure/tree/nodes/Node.js +0 -154
  587. package/src/parsedPaper/structure/tree/nodes/Paragraph.js +0 -24
  588. package/src/parsedPaper/structure/tree/nodes/StructuredNode.js +0 -52
  589. package/src/parsedPaper/structure/tree/nodes/index.js +0 -21
  590. package/src/scoring/assessments/assessment.js +0 -63
  591. package/src/scoring/assessments/index.js +0 -58
  592. package/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +0 -173
  593. package/src/scoring/assessments/readability/SentenceBeginningsAssessment.js +0 -132
  594. package/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +0 -186
  595. package/src/scoring/assessments/readability/TransitionWordsAssessment.js +0 -168
  596. package/src/scoring/assessments/seo/ImageCountAssessment.js +0 -112
  597. package/src/scoring/assessments/seo/InternalLinksAssessment.js +0 -114
  598. package/src/scoring/assessments/seo/IntroductionKeywordAssessment.js +0 -110
  599. package/src/scoring/assessments/seo/KeyphraseAssessment.js +0 -104
  600. package/src/scoring/assessments/seo/KeyphraseLengthAssessment.js +0 -110
  601. package/src/scoring/assessments/seo/KeywordDensityAssessment.js +0 -116
  602. package/src/scoring/assessments/seo/MetaDescriptionKeywordAssessment.js +0 -114
  603. package/src/scoring/assessments/seo/MetaDescriptionLengthAssessment.js +0 -112
  604. package/src/scoring/assessments/seo/MetaTitleKeywordAssessment.js +0 -111
  605. package/src/scoring/assessments/seo/NumberInMetaTitleAssessment.js +0 -107
  606. package/src/scoring/assessments/seo/OutboundLinksAssessment.js +0 -111
  607. package/src/scoring/assessments/seo/PageTitleWidthAssessment.js +0 -104
  608. package/src/scoring/assessments/seo/SingleH1Assessment.js +0 -118
  609. package/src/scoring/assessments/seo/SingleTitleAssessment.js +0 -108
  610. package/src/scoring/assessments/seo/SubHeadingsKeywordAssessment.js +0 -107
  611. package/src/scoring/assessments/seo/TextImagesAssessment.js +0 -144
  612. package/src/scoring/assessments/seo/TextLengthAssessment.js +0 -100
  613. package/src/scoring/assessments/seo/UrlKeywordAssessment.js +0 -111
  614. package/src/scoring/assessments/seo/UrlLengthAssessment.js +0 -103
  615. package/src/scoring/assessors/assessor.js +0 -269
  616. package/src/scoring/assessors/avadaAssessor.js +0 -67
  617. package/src/scoring/assessors/contentAssessor.js +0 -159
  618. package/src/scoring/assessors/index.js +0 -4
  619. package/src/scoring/assessors/seoAssessor.js +0 -57
  620. package/src/scoring/helpers/assessments/checkForTooLongSentences.js +0 -13
  621. package/src/scoring/helpers/assessments/inRange.js +0 -49
  622. package/src/scoring/helpers/assessments/keyphraseLengthFactor.js +0 -10
  623. package/src/scoring/helpers/assessments/recommendedKeywordCount.js +0 -43
  624. package/src/scoring/helpers/index.js +0 -74
  625. package/src/scoring/interpreters/index.js +0 -5
  626. package/src/scoring/interpreters/scoreToRating.js +0 -31
  627. package/src/scoring/renderers/AssessorPresenter.js +0 -360
  628. package/src/scoring/scoreAggregators/ReadabilityScoreAggregator.js +0 -203
  629. package/src/scoring/scoreAggregators/SEOScoreAggregator.js +0 -54
  630. package/src/scoring/scoreAggregators/ScoreAggregator.js +0 -23
  631. package/src/scoring/scoreAggregators/index.js +0 -3
  632. package/src/values/AssessmentResult.js +0 -496
  633. package/src/values/Mark.js +0 -271
  634. package/src/values/Paper.js +0 -425
  635. package/src/values/index.js +0 -9
  636. package/src/vendor/turkishStemmer.js +0 -3435
  637. package/tsconfig.json +0 -15
@@ -1,640 +0,0 @@
1
- import { isNaN, isUndefined, map } from "lodash";
2
-
3
- import core from "tokenizer2/core";
4
-
5
- import { normalize as normalizeQuotes } from "../sanitize/quotes.js";
6
-
7
- import abbreviations from "../../languages/en/config/abbreviations";
8
-
9
- import createRegexFromArray from "../regex/createRegexFromArray";
10
- import wordBoundaries from "../../../config/wordBoundaries";
11
-
12
- // All characters that indicate a sentence delimiter.
13
- const fullStop = ".";
14
-
15
- const fullStopRegex = new RegExp( "^[" + fullStop + "]$" );
16
- const smallerThanContentRegex = /^<[^><]*$/;
17
- const htmlStartRegex = /^<([^>\s/]+)[^>]*>$/mi;
18
- const htmlEndRegex = /^<\/([^>\s]+)[^>]*>$/mi;
19
-
20
- const blockStartRegex = /^\s*[[({]\s*$/;
21
- const blockEndRegex = /^\s*[\])}]\s*$/;
22
-
23
- const abbreviationsPreparedForRegex = abbreviations.map( ( abbreviation ) => abbreviation.replace( ".", "\\." ) );
24
- const abbreviationsRegex = createRegexFromArray( abbreviationsPreparedForRegex );
25
-
26
- const wordBoundariesForRegex = "(^|$|[" + wordBoundaries().map( ( boundary ) => "\\" + boundary ).join( "" ) + "])";
27
- const lastCharacterPartOfInitialsRegex = new RegExp( wordBoundariesForRegex + "[A-Za-z]$" );
28
-
29
- // Constants to be used in isValidTagPair.
30
- // A regex to get the tag type.
31
- const tagTypeRegex = /<\/?([^\s]+?)(\s|>)/;
32
- // Semantic tags (as opposed to style tags) are tags that are used to structure the text.
33
- const semanticTags = [ "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "span", "li", "main" ];
34
-
35
- /**
36
- * Class for tokenizing a (html) text into sentences.
37
- */
38
- export default class SentenceTokenizer {
39
- /**
40
- * Constructor
41
- * @constructor
42
- */
43
- constructor() {
44
- /*
45
- * \u2026 - ellipsis.
46
- * \u06D4 - Urdu full stop.
47
- * \u061f - Arabic question mark.
48
- */
49
- this.sentenceDelimiters = "”〞〟„』›»’‛`\"?!\u2026\u06d4\u061f";
50
- }
51
-
52
- /**
53
- * Gets the sentence delimiters.
54
- *
55
- * @returns {string} The sentence delimiters.
56
- */
57
- getSentenceDelimiters() {
58
- return this.sentenceDelimiters;
59
- }
60
-
61
- /**
62
- * Returns whether or not a certain character is a number.
63
- *
64
- * @param {string} character The character to check.
65
- * @returns {boolean} Whether or not the character is a capital letter.
66
- */
67
- isNumber( character ) {
68
- return ! isNaN( parseInt( character, 10 ) );
69
- }
70
-
71
- /**
72
- * Returns whether or not a given HTML tag is a break tag.
73
- *
74
- * @param {string} htmlTag The HTML tag to check.
75
- * @returns {boolean} Whether or not the given HTML tag is a break tag.
76
- */
77
- isBreakTag( htmlTag ) {
78
- return /<\/?br/.test( htmlTag );
79
- }
80
-
81
- /**
82
- * Returns whether or not a given character is quotation mark.
83
- *
84
- * @param {string} character The character to check.
85
- * @returns {boolean} Whether or not the given character is a quotation mark.
86
- */
87
- isQuotation( character ) {
88
- character = normalizeQuotes( character );
89
-
90
- return "'" === character ||
91
- "\"" === character;
92
- }
93
-
94
- /**
95
- * A mock definition of this function. This function is only used in extensions for languages that use an ordinal dot.
96
- *
97
- * @returns {boolean} Always returns false as it is a language specific implementation if a language has an ordinal dot.
98
- */
99
- endsWithOrdinalDot() {
100
- return false;
101
- }
102
-
103
- /**
104
- * Returns whether or not a given character is a punctuation mark that can be at the beginning
105
- * of a sentence, like ¿ and ¡ used in Spanish.
106
- *
107
- * @param {string} character The character to check.
108
- * @returns {boolean} Whether or not the given character is a punctuation mark.
109
- */
110
- isPunctuation( character ) {
111
- return "¿" === character ||
112
- "¡" === character;
113
- }
114
-
115
- /**
116
- * Removes duplicate whitespace from a given text.
117
- *
118
- * @param {string} text The text with duplicate whitespace.
119
- * @returns {string} The text without duplicate whitespace.
120
- */
121
- removeDuplicateWhitespace( text ) {
122
- return text.replace( /\s+/, " " );
123
- }
124
-
125
- /**
126
- * Returns whether or not a certain character is a capital letter.
127
- *
128
- * @param {string} character The character to check.
129
- * @returns {boolean} Whether or not the character is a capital letter.
130
- */
131
- isCapitalLetter( character ) {
132
- return character !== character.toLocaleLowerCase();
133
- }
134
-
135
- /**
136
- * Checks whether the given character is a smaller than sign.
137
- *
138
- * This function is used to make sure that tokenizing the content after
139
- * the smaller than sign works as expected.
140
- * E.g. 'A sentence. < Hello world!' = ['A sentence.', '< Hello world!'].
141
- *
142
- * @param {string} character The character to check.
143
- * @returns {boolean} Whether the character is a smaller than sign ('<') or not.
144
- */
145
- isSmallerThanSign( character ) {
146
- return character === "<";
147
- }
148
-
149
- /**
150
- * Retrieves the next two characters from an array with the two next tokens.
151
- *
152
- * @param {Array} nextTokens The two next tokens. Might be undefined.
153
- * @returns {string} The next two characters.
154
- */
155
- getNextTwoCharacters( nextTokens ) {
156
- let next = "";
157
-
158
- if ( ! isUndefined( nextTokens[ 0 ] ) ) {
159
- next += nextTokens[ 0 ].src;
160
- }
161
-
162
- if ( ! isUndefined( nextTokens[ 1 ] ) ) {
163
- next += nextTokens[ 1 ].src;
164
- }
165
-
166
- next = this.removeDuplicateWhitespace( next );
167
-
168
- return next;
169
- }
170
-
171
- /**
172
- * Checks whether a character is from a language that's written from right to left.
173
- * These languages don't have capital letter forms. Therefore any letter from these languages is a
174
- * potential sentence beginning.
175
- *
176
- * @param {string} letter The letter to check.
177
- *
178
- * @returns {boolean} Whether the letter is from an LTR language.
179
- */
180
- isLetterFromSpecificLanguage( letter ) {
181
- const ltrLetterRanges = [
182
- // Hebrew characters.
183
- /^[\u0590-\u05fe]+$/i,
184
- // Arabic characters (used for Arabic, Farsi, Urdu).
185
- /^[\u0600-\u06FF]+$/i,
186
- // Additional Farsi characters.
187
- /^[\uFB8A\u067E\u0686\u06AF]+$/i,
188
- ];
189
-
190
- return (
191
- ltrLetterRanges.some( ltrLetterRange => ltrLetterRange.test( letter ) )
192
- );
193
- }
194
-
195
- /**
196
- * Checks if the sentenceBeginning beginning is a valid beginning.
197
- *
198
- * @param {string} sentenceBeginning The beginning of the sentence to validate.
199
- * @returns {boolean} Returns true if it is a valid beginning, false if it is not.
200
- */
201
- isValidSentenceBeginning( sentenceBeginning ) {
202
- return ( this.isCapitalLetter( sentenceBeginning ) ||
203
- this.isLetterFromSpecificLanguage( sentenceBeginning ) ||
204
- this.isNumber( sentenceBeginning ) ||
205
- this.isQuotation( sentenceBeginning ) ||
206
- this.isPunctuation( sentenceBeginning ) ||
207
- this.isSmallerThanSign( sentenceBeginning ) );
208
- }
209
-
210
- /**
211
- * Checks if the token is a valid sentence start.
212
- *
213
- * @param {Object} token The token to validate.
214
- * @returns {boolean} Returns true if the token is valid sentence start, false if it is not.
215
- */
216
- isSentenceStart( token ) {
217
- return ( ! isUndefined( token ) && (
218
- "html-start" === token.type ||
219
- "html-end" === token.type ||
220
- "block-start" === token.type
221
- ) );
222
- }
223
-
224
- /**
225
- * Checks if the token is a valid sentence ending. A valid sentence ending is either a full stop or another
226
- * delimiter such as "?", "!", etc.
227
- *
228
- * @param {Object} token The token to validate.
229
- * @returns {boolean} Returns true if the token is valid sentence ending, false if it is not.
230
- */
231
- isSentenceEnding( token ) {
232
- return (
233
- ! isUndefined( token ) &&
234
- ( token.type === "full-stop" || token.type === "sentence-delimiter" )
235
- );
236
- }
237
-
238
- /**
239
- * Checks if a full stop is part of a person's initials.
240
- *
241
- * Tests if tokens exist. Then tests if the tokens are of the right type.
242
- * For previous token, it checks if the sentence ends with a single letter.
243
- * For nextToken it checks if it is a single letter.
244
- * Checks if next token is followed by a full stop.
245
- *
246
- * @param {object} token The current token (must be a full stop).
247
- * @param {object} previousToken The token before the full stop.
248
- * @param {object} nextToken The token following the full stop.
249
- * @param {object} secondToNextToken The second token after the full stop.
250
- * @returns {boolean} True if a full stop is part of a person's initials, False if the full stop is not part of a person's initials.
251
- */
252
- isPartOfPersonInitial( token, previousToken, nextToken, secondToNextToken ) {
253
- return ( ! isUndefined( token ) &&
254
- ! isUndefined( nextToken ) &&
255
- ! isUndefined( secondToNextToken ) &&
256
- ! isUndefined( previousToken ) &&
257
- token.type === "full-stop" &&
258
- previousToken.type === "sentence" &&
259
- lastCharacterPartOfInitialsRegex.test( previousToken.src ) &&
260
- nextToken.type === "sentence" &&
261
- nextToken.src.trim().length === 1 &&
262
- secondToNextToken.type === "full-stop"
263
- );
264
- }
265
-
266
- /**
267
- * Tokens that represent a '<', followed by content until it enters another '<' or '>'
268
- * gets another pass by the tokenizer.
269
- *
270
- * @param {Object} token A token of type 'smaller-than-sign-content'.
271
- * @param {string[]} tokenSentences The current array of found sentences. Sentences may get added by this method.
272
- * @param {string} currentSentence The current sentence. Sentence parts may get appended by this method.
273
- * @returns {{tokenSentences, currentSentence}} The found sentences and the current sentence, appended when necessary.
274
- */
275
- tokenizeSmallerThanContent( token, tokenSentences, currentSentence ) {
276
- /*
277
- Remove the '<' from the text, to avoid matching this rule
278
- recursively again and again.
279
- We add it again later on.
280
- */
281
- const localText = token.src.substring( 1 );
282
-
283
- // Tokenize the current smaller-than-content token without the first '<'.
284
- const tokenizerResult = this.createTokenizer();
285
- this.tokenize( tokenizerResult.tokenizer, localText );
286
- const localSentences = this.getSentencesFromTokens( tokenizerResult.tokens, false );
287
-
288
- localSentences[ 0 ] = isUndefined( localSentences[ 0 ] ) ? "<" : "<" + localSentences[ 0 ];
289
-
290
- /*
291
- * When the first sentence has a valid sentence beginning.
292
- * Add the currently build sentence to the sentences.
293
- * Start building the next sentence.
294
- */
295
- if ( this.isValidSentenceBeginning( localSentences[ 0 ] ) ) {
296
- tokenSentences.push( currentSentence );
297
- currentSentence = "";
298
- }
299
- currentSentence += localSentences[ 0 ];
300
-
301
- if ( localSentences.length > 1 ) {
302
- /*
303
- There is a new sentence after the first,
304
- add and reset the current sentence.
305
- */
306
- tokenSentences.push( currentSentence );
307
- currentSentence = "";
308
-
309
- // Remove the first sentence (we do not need to add it again).
310
- localSentences.shift();
311
- // Last sentence gets special treatment.
312
- const lastSentence = localSentences.pop();
313
-
314
- // Add the remaining found sentences.
315
- localSentences.forEach( sentence => {
316
- tokenSentences.push( sentence );
317
- } );
318
-
319
- const sentenceEndRegex = new RegExp( "[" + fullStop + this.getSentenceDelimiters() + "]$" );
320
-
321
- // Check if the last sentence has a valid sentence ending.
322
- if ( lastSentence.match( sentenceEndRegex ) ) {
323
- // If so, add it as a sentence.
324
- tokenSentences.push( lastSentence );
325
- } else {
326
- // If not, start making a new one.
327
- currentSentence = lastSentence;
328
- }
329
- }
330
- return {
331
- tokenSentences,
332
- currentSentence,
333
- };
334
- }
335
-
336
- /**
337
- * Creates a tokenizer.
338
- *
339
- * @returns {Object} The tokenizer and the tokens.
340
- */
341
- createTokenizer() {
342
- const sentenceDelimiterRegex = new RegExp( "^[" + this.getSentenceDelimiters() + "]$" );
343
- const sentenceRegex = new RegExp( "^[^" + fullStop + this.getSentenceDelimiters() + "<\\(\\)\\[\\]]+$" );
344
-
345
- const tokens = [];
346
- const tokenizer = core( function( token ) {
347
- tokens.push( token );
348
- } );
349
-
350
- tokenizer.addRule( fullStopRegex, "full-stop" );
351
- tokenizer.addRule( smallerThanContentRegex, "smaller-than-sign-content" );
352
- tokenizer.addRule( htmlStartRegex, "html-start" );
353
- tokenizer.addRule( htmlEndRegex, "html-end" );
354
- tokenizer.addRule( blockStartRegex, "block-start" );
355
- tokenizer.addRule( blockEndRegex, "block-end" );
356
- tokenizer.addRule( sentenceDelimiterRegex, "sentence-delimiter" );
357
- tokenizer.addRule( sentenceRegex, "sentence" );
358
-
359
- return {
360
- tokenizer,
361
- tokens,
362
- };
363
- }
364
-
365
- /**
366
- * Tokenizes the given text using the given tokenizer.
367
- *
368
- * @param {Object} tokenizer The tokenizer to use.
369
- * @param {string} text The text to tokenize.
370
- * @returns {void}
371
- */
372
- tokenize( tokenizer, text ) {
373
- tokenizer.onText( text );
374
-
375
- try {
376
- tokenizer.end();
377
- } catch ( e ) {
378
- console.error( "Tokenizer end error:", e, e.tokenizer2 );
379
- }
380
- }
381
-
382
- /**
383
- * Checks if a string ends with an abbreviation.
384
- * @param {string} currentSentence A (part of) a sentence.
385
- * @returns {boolean} True if the string ends with an abbreviation that is in abbreviations.js. Otherwise, False.
386
- */
387
- endsWithAbbreviation( currentSentence ) {
388
- const matchedAbbreviations = currentSentence.match( abbreviationsRegex );
389
-
390
- if ( ! matchedAbbreviations ) {
391
- return false;
392
- }
393
-
394
- const lastAbbreviation = matchedAbbreviations.pop();
395
- return currentSentence.endsWith( lastAbbreviation );
396
- }
397
-
398
- /**
399
- * Checks whether the given tokens are a valid html tag pair.
400
- * Note that this method is not a full html tag validator. It should be replaced with a better solution once the html parser is implemented.
401
- *
402
- * @param {object} firstToken The first token to check. It is asserted that this token contains/is an opening html tag.
403
- * @param {object} lastToken The last token to check. It is asserted that this token contains/is a closing html tag.
404
- *
405
- * @returns {boolean} True if the tokens are a valid html tag pair. Otherwise, False.
406
- */
407
- isValidTagPair( firstToken, lastToken ) {
408
- const firstTokenText = firstToken.src;
409
- const lastTokenText = lastToken.src;
410
-
411
- // Get the tag types.
412
- const firstTagType = firstTokenText.match( tagTypeRegex )[ 1 ];
413
- const lastTagType = lastTokenText.match( tagTypeRegex )[ 1 ];
414
-
415
-
416
- // Check if the tags are the same and if they are a semantic tag (p, div, h1, h2, h3, h4, h5, h6, span).
417
- return firstTagType === lastTagType && semanticTags.includes( firstTagType );
418
- }
419
-
420
- /**
421
- * Returns an array of sentences for a given array of tokens, assumes that the text has already been split into blocks.
422
- *
423
- * @param {Object[]} tokenArray The tokens from the sentence tokenizer.
424
- * @param {boolean} [trimSentences=true] Whether to trim the sentences at the end or not.
425
- *
426
- * @returns {string[]} A list of sentences.
427
- */
428
- getSentencesFromTokens( tokenArray, trimSentences = true ) {
429
- let tokenSentences = [], currentSentence = "", nextSentenceStart, sliced;
430
-
431
- // Drop the first and last HTML tag if both are present.
432
- do {
433
- sliced = false;
434
- const firstToken = tokenArray[ 0 ];
435
- const lastToken = tokenArray[ tokenArray.length - 1 ];
436
-
437
- if ( firstToken && lastToken && firstToken.type === "html-start" &&
438
- lastToken.type === "html-end" && this.isValidTagPair( firstToken, lastToken ) ) {
439
- tokenArray = tokenArray.slice( 1, tokenArray.length - 1 );
440
-
441
- sliced = true;
442
- }
443
- } while ( sliced && tokenArray.length > 1 );
444
-
445
- tokenArray.forEach( ( token, i ) => {
446
- let hasNextSentence, nextCharacters, tokenizeResults;
447
- const nextToken = tokenArray[ i + 1 ];
448
- const previousToken = tokenArray[ i - 1 ];
449
- const secondToNextToken = tokenArray[ i + 2 ];
450
- nextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );
451
-
452
- // For a new sentence we need to check the next two characters.
453
- hasNextSentence = nextCharacters.length >= 2;
454
- nextSentenceStart = hasNextSentence ? nextCharacters[ 1 ] : "";
455
-
456
- switch ( token.type ) {
457
- case "html-start":
458
- case "html-end":
459
- if ( this.isBreakTag( token.src ) ) {
460
- tokenSentences.push( currentSentence );
461
- currentSentence = "";
462
- } else {
463
- currentSentence += token.src;
464
- }
465
- break;
466
-
467
- case "smaller-than-sign-content":
468
- tokenizeResults = this.tokenizeSmallerThanContent( token, tokenSentences, currentSentence );
469
- tokenSentences = tokenizeResults.tokenSentences;
470
- currentSentence = tokenizeResults.currentSentence;
471
- break;
472
- case "sentence":
473
- currentSentence += token.src;
474
- break;
475
- case "sentence-delimiter":
476
- currentSentence += token.src;
477
-
478
- /*
479
- * Only split text into sentences if:
480
- * the next token is defined, AND
481
- * the next token type is neither "block-end" nor "sentence-delimiter", AND
482
- * the next token first character is a white space
483
- */
484
- if ( ! isUndefined( nextToken ) &&
485
- "block-end" !== nextToken.type &&
486
- "sentence-delimiter" !== nextToken.type &&
487
- this.isCharacterASpace( nextToken.src[ 0 ] ) ) {
488
- // Don't split on quotation marks unless they're preceded by a full stop.
489
- if ( this.isQuotation( token.src ) && previousToken && previousToken.src !== "." ) {
490
- break;
491
- }
492
- /*
493
- * Only split on ellipsis or quotation marks when:
494
- * a) There is a next sentence, and the next character is a valid sentence beginning preceded by a white space, OR
495
- * b) The next token is a sentence start
496
- */
497
- if ( this.isQuotation( token.src ) || token.src === "…" ) {
498
- currentSentence = this.getValidSentence( hasNextSentence,
499
- nextSentenceStart,
500
- nextCharacters,
501
- nextToken,
502
- tokenSentences,
503
- currentSentence );
504
- } else {
505
- tokenSentences.push( currentSentence );
506
- currentSentence = "";
507
- }
508
- }
509
- break;
510
-
511
- case "full-stop":
512
- currentSentence += token.src;
513
- nextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );
514
-
515
- // For a new sentence we need to check the next two characters.
516
- hasNextSentence = nextCharacters.length >= 2;
517
- nextSentenceStart = hasNextSentence ? nextCharacters[ 1 ] : "";
518
-
519
- // If the current sentence ends with an abbreviation, the full stop does not split the sentence.
520
- if ( this.endsWithAbbreviation( currentSentence ) ) {
521
- break;
522
- }
523
-
524
- // It should not split the text if the first character of the potential next sentence is a number.
525
- if ( hasNextSentence && this.isNumber( nextCharacters[ 0 ] ) ) {
526
- break;
527
- }
528
-
529
- // If the full stop is part of a person's initials, don't split sentence.
530
- if ( this.isPartOfPersonInitial( token, previousToken, nextToken, secondToNextToken ) ) {
531
- break;
532
- }
533
-
534
- // If the full stop is an ordinal dot (in German), then don't break the sentence.
535
- // This check should be done after hasNextSentence && this.isNumber( nextCharacters[ 0 ] ) (above).
536
- // Because otherwise it could break before that test.
537
- if ( this.endsWithOrdinalDot( currentSentence ) ) {
538
- break;
539
- }
540
-
541
- /*
542
- * Only split on full stop when:
543
- * a) There is a next sentence, and the next character is a valid sentence beginning preceded by a white space, OR
544
- * b) The next token is a sentence start
545
- */
546
- currentSentence = this.getValidSentence( hasNextSentence,
547
- nextSentenceStart,
548
- nextCharacters,
549
- nextToken,
550
- tokenSentences,
551
- currentSentence );
552
-
553
- break;
554
-
555
- case "block-start":
556
- currentSentence += token.src;
557
- break;
558
-
559
- case "block-end":
560
- currentSentence += token.src;
561
-
562
- nextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );
563
-
564
- // For a new sentence we need to check the next two characters.
565
- hasNextSentence = nextCharacters.length >= 2;
566
- nextSentenceStart = hasNextSentence ? nextCharacters[ 0 ] : "";
567
-
568
- /* Don't split if:
569
- * - The next character is a number. For example: IPv4-numbers.
570
- * - The block end is preceded by a valid sentence ending, but not followed by a valid sentence beginning.
571
- */
572
- if (
573
- hasNextSentence && this.isNumber( nextCharacters[ 0 ] ) ||
574
- ( this.isSentenceEnding( previousToken ) &&
575
- ( ! ( this.isValidSentenceBeginning( nextSentenceStart ) || this.isSentenceStart( nextToken ) ) ) )
576
- ) {
577
- break;
578
- }
579
-
580
- /*
581
- * Split if:
582
- * - The block end is preceded by a sentence ending and followed by a valid sentence beginning.
583
- */
584
- if (
585
- this.isSentenceEnding( previousToken ) &&
586
- ( this.isSentenceStart( nextToken ) || this.isValidSentenceBeginning( nextSentenceStart ) )
587
- ) {
588
- tokenSentences.push( currentSentence );
589
- currentSentence = "";
590
- }
591
- break;
592
- }
593
- } );
594
-
595
- if ( "" !== currentSentence ) {
596
- tokenSentences.push( currentSentence );
597
- }
598
-
599
- if ( trimSentences ) {
600
- tokenSentences = map( tokenSentences, function( sentence ) {
601
- return sentence.trim();
602
- } );
603
- }
604
-
605
- return tokenSentences;
606
- }
607
-
608
- /**
609
- * Gets the current sentence when:
610
- * a) There is a next sentence, and the next character is a valid sentence beginning preceded by a white space, OR
611
- * b) The next token is a sentence start
612
- *
613
- * @param {boolean} hasNextSentence Whether the next characters are more than two.
614
- * @param {string} nextSentenceStart The second character of the next characters.
615
- * @param {string} nextCharacters The string values of the next two tokens.
616
- * @param {object} nextToken The next token object.
617
- * @param {array} tokenSentences The array of pushed valid sentences.
618
- * @param {string} currentSentence The current sentence.
619
- *
620
- * @returns {string} The current sentence.
621
- */
622
- getValidSentence( hasNextSentence, nextSentenceStart, nextCharacters, nextToken, tokenSentences, currentSentence ) {
623
- if ( ( hasNextSentence && this.isValidSentenceBeginning( nextSentenceStart ) && this.isCharacterASpace( nextCharacters[ 0 ] ) ) ||
624
- this.isSentenceStart( nextToken ) ) {
625
- tokenSentences.push( currentSentence );
626
- currentSentence = "";
627
- }
628
- return currentSentence;
629
- }
630
-
631
- /**
632
- * Checks if the character is a whitespace.
633
- *
634
- * @param {string} character The character to check.
635
- * @returns {boolean} Whether the character is a whitespace.
636
- */
637
- isCharacterASpace( character ) {
638
- return /\s/.test( character );
639
- }
640
- }
@@ -1,20 +0,0 @@
1
- /** @module stringProcessing/countSentences */
2
-
3
- import getSentences from "./getSentences.js";
4
-
5
- /**
6
- * Counts the number of sentences in a given string.
7
- *
8
- * @param {string} text The text used to count sentences.
9
- * @param {function} memoizedTokenizer The memoized sentence tokenizer.
10
- *
11
- * @returns {number} The number of sentences in the text.
12
- */
13
- export default function( text, memoizedTokenizer ) {
14
- const sentences = getSentences( text, memoizedTokenizer );
15
- let sentenceCount = 0;
16
- for ( let i = 0; i < sentences.length; i++ ) {
17
- sentenceCount++;
18
- }
19
- return sentenceCount;
20
- }