axyseo 2.0.0-alpha.0.0.4 → 2.0.0-alpha.0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (637) hide show
  1. package/package.json +5 -2
  2. package/.browserslistrc +0 -1
  3. package/.gitattributes +0 -1
  4. package/babel.config.js +0 -3
  5. package/eslint.config.mjs +0 -119
  6. package/src/bundledPlugins/index.js +0 -5
  7. package/src/bundledPlugins/previouslyUsedKeywords.js +0 -192
  8. package/src/config/diacritics.js +0 -106
  9. package/src/config/getTransliterations.js +0 -1447
  10. package/src/config/transliterationsWPstyle.js +0 -774
  11. package/src/config/wordBoundaries.js +0 -23
  12. package/src/config/wordBoundariesWithoutPunctuation.js +0 -9
  13. package/src/const/analysis.js +0 -41
  14. package/src/errors/invalidType.js +0 -14
  15. package/src/errors/missingArgument.js +0 -14
  16. package/src/helpers/createMeasurementElement.js +0 -40
  17. package/src/helpers/domManipulation.js +0 -65
  18. package/src/helpers/errors.js +0 -26
  19. package/src/helpers/factory.js +0 -219
  20. package/src/helpers/formatNumber.js +0 -12
  21. package/src/helpers/formatString.js +0 -33
  22. package/src/helpers/getLanguagesWithWordComplexity.js +0 -8
  23. package/src/helpers/getLanguagesWithWordFormSupport.js +0 -11
  24. package/src/helpers/getWordComplexityConfig.js +0 -20
  25. package/src/helpers/getWordComplexityHelper.js +0 -20
  26. package/src/helpers/htmlEntities.js +0 -41
  27. package/src/helpers/includesAny.js +0 -19
  28. package/src/helpers/index.js +0 -127
  29. package/src/helpers/shortlinker/Shortlinker.js +0 -75
  30. package/src/helpers/shortlinker/index.js +0 -1
  31. package/src/helpers/shortlinker/singleton.js +0 -68
  32. package/src/helpers/types.js +0 -34
  33. package/src/index.js +0 -60
  34. package/src/languageProcessing/AbstractResearcher.js +0 -366
  35. package/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +0 -125
  36. package/src/languageProcessing/helpers/html/getFieldsToMark.js +0 -29
  37. package/src/languageProcessing/helpers/html/getSubheadingTexts.js +0 -47
  38. package/src/languageProcessing/helpers/html/getSubheadings.js +0 -95
  39. package/src/languageProcessing/helpers/html/html.js +0 -176
  40. package/src/languageProcessing/helpers/html/htmlParser.js +0 -145
  41. package/src/languageProcessing/helpers/html/matchParagraphs.js +0 -62
  42. package/src/languageProcessing/helpers/html/normalizeHTML.js +0 -16
  43. package/src/languageProcessing/helpers/image/getAltAttribute.js +0 -20
  44. package/src/languageProcessing/helpers/image/getImagesInTree.js +0 -16
  45. package/src/languageProcessing/helpers/image/imageInText.js +0 -19
  46. package/src/languageProcessing/helpers/index.js +0 -12
  47. package/src/languageProcessing/helpers/language/getLanguage.js +0 -9
  48. package/src/languageProcessing/helpers/link/checkNofollow.js +0 -38
  49. package/src/languageProcessing/helpers/link/getAnchorsFromText.js +0 -32
  50. package/src/languageProcessing/helpers/link/getLinkType.js +0 -32
  51. package/src/languageProcessing/helpers/match/findKeywordFormsInString.js +0 -101
  52. package/src/languageProcessing/helpers/match/isDoubleQuoted.js +0 -13
  53. package/src/languageProcessing/helpers/match/matchTextWithArray.js +0 -36
  54. package/src/languageProcessing/helpers/match/matchTextWithTransliteration.js +0 -58
  55. package/src/languageProcessing/helpers/match/matchTextWithWord.js +0 -45
  56. package/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +0 -164
  57. package/src/languageProcessing/helpers/match/processExactMatchRequest.js +0 -20
  58. package/src/languageProcessing/helpers/morphology/baseStemmer.js +0 -11
  59. package/src/languageProcessing/helpers/morphology/buildFormRule.js +0 -19
  60. package/src/languageProcessing/helpers/morphology/buildTopicStems.js +0 -169
  61. package/src/languageProcessing/helpers/morphology/createRulesFromArrays.js +0 -45
  62. package/src/languageProcessing/helpers/morphology/exceptionListHelpers.js +0 -65
  63. package/src/languageProcessing/helpers/morphology/findMatchingEndingInArray.js +0 -24
  64. package/src/languageProcessing/helpers/morphology/flattenSortLength.js +0 -14
  65. package/src/languageProcessing/helpers/morphology/getAllWordsFromPaper.js +0 -39
  66. package/src/languageProcessing/helpers/morphology/regexHelpers.js +0 -44
  67. package/src/languageProcessing/helpers/morphology/stemHelpers.js +0 -38
  68. package/src/languageProcessing/helpers/morphology/stemPrefixedFunctionWords.js +0 -31
  69. package/src/languageProcessing/helpers/passiveVoice/periphrastic/directPrecedenceException.js +0 -36
  70. package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/getClausesSplitOnStopWords.js +0 -113
  71. package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/nonDirectParticiplePrecedenceException.js +0 -45
  72. package/src/languageProcessing/helpers/passiveVoice/periphrastic/getClauses.js +0 -231
  73. package/src/languageProcessing/helpers/passiveVoice/periphrastic/getIndicesWithRegex.js +0 -20
  74. package/src/languageProcessing/helpers/passiveVoice/periphrastic/matchRegularParticiples.js +0 -23
  75. package/src/languageProcessing/helpers/passiveVoice/periphrastic/precedenceException.js +0 -40
  76. package/src/languageProcessing/helpers/prominentWords/determineProminentWords.js +0 -238
  77. package/src/languageProcessing/helpers/regex/createRegexFromArray.js +0 -35
  78. package/src/languageProcessing/helpers/regex/createRegexFromDoubleArray.js +0 -34
  79. package/src/languageProcessing/helpers/regex/createWordRegex.js +0 -30
  80. package/src/languageProcessing/helpers/regex/matchStringWithRegex.js +0 -19
  81. package/src/languageProcessing/helpers/regex/searchAndReplaceWithOneRegex.js +0 -14
  82. package/src/languageProcessing/helpers/sanitize/doubleQuotes.js +0 -12
  83. package/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +0 -131
  84. package/src/languageProcessing/helpers/sanitize/mergeListItems.js +0 -24
  85. package/src/languageProcessing/helpers/sanitize/parseSynonyms.js +0 -20
  86. package/src/languageProcessing/helpers/sanitize/quotes.js +0 -46
  87. package/src/languageProcessing/helpers/sanitize/removeEmailAddresses.js +0 -12
  88. package/src/languageProcessing/helpers/sanitize/removePunctuation.js +0 -64
  89. package/src/languageProcessing/helpers/sanitize/removePunctuationExceptQuotes.js +0 -18
  90. package/src/languageProcessing/helpers/sanitize/removeSentenceTerminators.js +0 -13
  91. package/src/languageProcessing/helpers/sanitize/removeURLs.js +0 -13
  92. package/src/languageProcessing/helpers/sanitize/sanitizeLineBreakTag.js +0 -11
  93. package/src/languageProcessing/helpers/sanitize/sanitizeString.js +0 -18
  94. package/src/languageProcessing/helpers/sanitize/stripHTMLTags.js +0 -57
  95. package/src/languageProcessing/helpers/sanitize/stripNonTextTags.js +0 -15
  96. package/src/languageProcessing/helpers/sanitize/stripNumbers.js +0 -21
  97. package/src/languageProcessing/helpers/sanitize/stripSpaces.js +0 -23
  98. package/src/languageProcessing/helpers/sanitize/stripWordBoundaries.js +0 -65
  99. package/src/languageProcessing/helpers/sanitize/unifyWhitespace.js +0 -61
  100. package/src/languageProcessing/helpers/sentence/SentenceTokenizer.js +0 -640
  101. package/src/languageProcessing/helpers/sentence/countSentences.js +0 -20
  102. package/src/languageProcessing/helpers/sentence/getSentences.js +0 -65
  103. package/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +0 -55
  104. package/src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer.js +0 -28
  105. package/src/languageProcessing/helpers/sentence/sentencesLength.js +0 -31
  106. package/src/languageProcessing/helpers/syllables/DeviationFragment.js +0 -112
  107. package/src/languageProcessing/helpers/syllables/countSyllables.js +0 -182
  108. package/src/languageProcessing/helpers/syllables/syllableCountIterator.js +0 -56
  109. package/src/languageProcessing/helpers/syllables/syllableCountStep.js +0 -68
  110. package/src/languageProcessing/helpers/transform/transformWordsWithHyphens.js +0 -17
  111. package/src/languageProcessing/helpers/transliterate/replaceDiacritics.js +0 -22
  112. package/src/languageProcessing/helpers/transliterate/specialCharacterMappings.js +0 -214
  113. package/src/languageProcessing/helpers/transliterate/transliterate.js +0 -20
  114. package/src/languageProcessing/helpers/transliterate/transliterateWPstyle.js +0 -21
  115. package/src/languageProcessing/helpers/url/parseSlug.js +0 -10
  116. package/src/languageProcessing/helpers/url/url.js +0 -172
  117. package/src/languageProcessing/helpers/word/addWordboundary.js +0 -37
  118. package/src/languageProcessing/helpers/word/areWordsInSentence.js +0 -16
  119. package/src/languageProcessing/helpers/word/countMetaDescriptionLength.js +0 -18
  120. package/src/languageProcessing/helpers/word/countWords.js +0 -14
  121. package/src/languageProcessing/helpers/word/createPunctuationTokens.js +0 -42
  122. package/src/languageProcessing/helpers/word/filterWordsFromArray.js +0 -15
  123. package/src/languageProcessing/helpers/word/followsIndex.js +0 -25
  124. package/src/languageProcessing/helpers/word/getAllWordsFromTree.js +0 -23
  125. package/src/languageProcessing/helpers/word/getWords.js +0 -43
  126. package/src/languageProcessing/helpers/word/includesIndex.js +0 -30
  127. package/src/languageProcessing/helpers/word/indices.js +0 -146
  128. package/src/languageProcessing/helpers/word/markWordsInSentences.js +0 -173
  129. package/src/languageProcessing/helpers/word/matchWordInSentence.js +0 -61
  130. package/src/languageProcessing/helpers/word/splitIntoTokens.js +0 -46
  131. package/src/languageProcessing/index.js +0 -91
  132. package/src/languageProcessing/languages/_default/Researcher.js +0 -34
  133. package/src/languageProcessing/languages/_default/helpers/getStemmer.js +0 -11
  134. package/src/languageProcessing/languages/ar/Researcher.js +0 -46
  135. package/src/languageProcessing/languages/ar/config/firstWordExceptions.js +0 -14
  136. package/src/languageProcessing/languages/ar/config/functionWords.js +0 -329
  137. package/src/languageProcessing/languages/ar/config/internal/passiveVerbsWithLongVowel.js +0 -570
  138. package/src/languageProcessing/languages/ar/config/prefixedFunctionWords.js +0 -5
  139. package/src/languageProcessing/languages/ar/config/transitionWords.js +0 -19
  140. package/src/languageProcessing/languages/ar/config/twoPartTransitionWords.js +0 -7
  141. package/src/languageProcessing/languages/ar/helpers/createBasicWordForms.js +0 -32
  142. package/src/languageProcessing/languages/ar/helpers/getStemmer.js +0 -22
  143. package/src/languageProcessing/languages/ar/helpers/internal/stem.js +0 -632
  144. package/src/languageProcessing/languages/ar/helpers/isPassiveSentence.js +0 -33
  145. package/src/languageProcessing/languages/ca/Researcher.js +0 -43
  146. package/src/languageProcessing/languages/ca/config/sentenceLength.js +0 -3
  147. package/src/languageProcessing/languages/ca/config/transitionWords.js +0 -31
  148. package/src/languageProcessing/languages/ca/config/twoPartTransitionWords.js +0 -7
  149. package/src/languageProcessing/languages/ca/helpers/getStemmer.js +0 -11
  150. package/src/languageProcessing/languages/cs/Researcher.js +0 -44
  151. package/src/languageProcessing/languages/cs/config/firstWordExceptions.js +0 -15
  152. package/src/languageProcessing/languages/cs/config/functionWords.js +0 -121
  153. package/src/languageProcessing/languages/cs/config/internal/passiveVoiceAuxiliaries.js +0 -38
  154. package/src/languageProcessing/languages/cs/config/internal/passiveVoiceEndings.js +0 -54
  155. package/src/languageProcessing/languages/cs/config/stopWords.js +0 -42
  156. package/src/languageProcessing/languages/cs/config/transitionWords.js +0 -26
  157. package/src/languageProcessing/languages/cs/config/twoPartTransitionWords.js +0 -8
  158. package/src/languageProcessing/languages/cs/helpers/getClauses.js +0 -26
  159. package/src/languageProcessing/languages/cs/helpers/getStemmer.js +0 -22
  160. package/src/languageProcessing/languages/cs/helpers/internal/getParticiples.js +0 -16
  161. package/src/languageProcessing/languages/cs/helpers/internal/stem.js +0 -499
  162. package/src/languageProcessing/languages/cs/values/Clause.js +0 -34
  163. package/src/languageProcessing/languages/de/Researcher.js +0 -52
  164. package/src/languageProcessing/languages/de/config/firstWordExceptions.js +0 -17
  165. package/src/languageProcessing/languages/de/config/functionWords.js +0 -303
  166. package/src/languageProcessing/languages/de/config/internal/exceptionsParticiplesActive.js +0 -2231
  167. package/src/languageProcessing/languages/de/config/internal/passiveVoiceAuxiliaries.js +0 -96
  168. package/src/languageProcessing/languages/de/config/internal/passiveVoiceIrregulars.js +0 -368
  169. package/src/languageProcessing/languages/de/config/internal/passiveVoiceRegex.js +0 -72
  170. package/src/languageProcessing/languages/de/config/keyphraseLength.js +0 -11
  171. package/src/languageProcessing/languages/de/config/stopWords.js +0 -67
  172. package/src/languageProcessing/languages/de/config/syllables.json +0 -460
  173. package/src/languageProcessing/languages/de/config/transitionWords.js +0 -31
  174. package/src/languageProcessing/languages/de/config/twoPartTransitionWords.js +0 -12
  175. package/src/languageProcessing/languages/de/config/wordComplexity.js +0 -4
  176. package/src/languageProcessing/languages/de/helpers/calculateFleschReadingScore.js +0 -18
  177. package/src/languageProcessing/languages/de/helpers/checkIfWordIsComplex.js +0 -40
  178. package/src/languageProcessing/languages/de/helpers/checkIfWordIsFunction.js +0 -15
  179. package/src/languageProcessing/languages/de/helpers/getClauses.js +0 -25
  180. package/src/languageProcessing/languages/de/helpers/getStemmer.js +0 -22
  181. package/src/languageProcessing/languages/de/helpers/internal/SentenceTokenizer.js +0 -31
  182. package/src/languageProcessing/languages/de/helpers/internal/detectAndStemRegularParticiple.js +0 -128
  183. package/src/languageProcessing/languages/de/helpers/internal/determineStem.js +0 -128
  184. package/src/languageProcessing/languages/de/helpers/internal/getParticiples.js +0 -40
  185. package/src/languageProcessing/languages/de/helpers/internal/stem.js +0 -215
  186. package/src/languageProcessing/languages/de/helpers/memoizedSentenceTokenizer.js +0 -28
  187. package/src/languageProcessing/languages/de/values/Clause.js +0 -85
  188. package/src/languageProcessing/languages/el/Researcher.js +0 -46
  189. package/src/languageProcessing/languages/el/config/firstWordExceptions.js +0 -47
  190. package/src/languageProcessing/languages/el/config/functionWords.js +0 -116
  191. package/src/languageProcessing/languages/el/config/internal/auxiliaries.js +0 -19
  192. package/src/languageProcessing/languages/el/config/internal/morphologicalPassiveSuffixes.js +0 -87
  193. package/src/languageProcessing/languages/el/config/internal/nonPassiveVerbStems.js +0 -138
  194. package/src/languageProcessing/languages/el/config/stopWords.js +0 -854
  195. package/src/languageProcessing/languages/el/config/transitionWords.js +0 -26
  196. package/src/languageProcessing/languages/el/config/twoPartTransitionWords.js +0 -10
  197. package/src/languageProcessing/languages/el/helpers/getClauses.js +0 -25
  198. package/src/languageProcessing/languages/el/helpers/getStemmer.js +0 -21
  199. package/src/languageProcessing/languages/el/helpers/internal/getParticiples.js +0 -20
  200. package/src/languageProcessing/languages/el/helpers/internal/stem.js +0 -368
  201. package/src/languageProcessing/languages/el/helpers/isPassiveSentence.js +0 -38
  202. package/src/languageProcessing/languages/el/values/Clause.js +0 -37
  203. package/src/languageProcessing/languages/en/Researcher.js +0 -46
  204. package/src/languageProcessing/languages/en/config/abbreviations.js +0 -55
  205. package/src/languageProcessing/languages/en/config/firstWordExceptions.js +0 -14
  206. package/src/languageProcessing/languages/en/config/functionWords.js +0 -186
  207. package/src/languageProcessing/languages/en/config/internal/passiveVoiceAuxiliaries.js +0 -44
  208. package/src/languageProcessing/languages/en/config/internal/passiveVoiceIrregulars.js +0 -354
  209. package/src/languageProcessing/languages/en/config/internal/passiveVoiceNonVerbEndingEd.js +0 -3047
  210. package/src/languageProcessing/languages/en/config/regularParticiplesRegex.js +0 -5
  211. package/src/languageProcessing/languages/en/config/stopWords.js +0 -52
  212. package/src/languageProcessing/languages/en/config/syllables.json +0 -86
  213. package/src/languageProcessing/languages/en/config/transitionWords.js +0 -48
  214. package/src/languageProcessing/languages/en/config/twoPartTransitionWords.js +0 -7
  215. package/src/languageProcessing/languages/en/config/wordComplexity.js +0 -5
  216. package/src/languageProcessing/languages/en/helpers/calculateFleschReadingScore.js +0 -18
  217. package/src/languageProcessing/languages/en/helpers/checkIfWordIsComplex.js +0 -43
  218. package/src/languageProcessing/languages/en/helpers/getClauses.js +0 -49
  219. package/src/languageProcessing/languages/en/helpers/getStemmer.js +0 -22
  220. package/src/languageProcessing/languages/en/helpers/internal/determineStem.js +0 -178
  221. package/src/languageProcessing/languages/en/helpers/internal/getAdjectiveStem.js +0 -162
  222. package/src/languageProcessing/languages/en/helpers/internal/getParticiples.js +0 -25
  223. package/src/languageProcessing/languages/en/helpers/internal/getVerbStem.js +0 -237
  224. package/src/languageProcessing/languages/en/values/Clause.js +0 -68
  225. package/src/languageProcessing/languages/es/Researcher.js +0 -48
  226. package/src/languageProcessing/languages/es/config/firstWordExceptions.js +0 -16
  227. package/src/languageProcessing/languages/es/config/functionWords.js +0 -321
  228. package/src/languageProcessing/languages/es/config/internal/passiveVoiceAuxiliaries.js +0 -60
  229. package/src/languageProcessing/languages/es/config/internal/passiveVoiceParticiples.js +0 -7327
  230. package/src/languageProcessing/languages/es/config/sentenceLength.js +0 -3
  231. package/src/languageProcessing/languages/es/config/stopWords.js +0 -33
  232. package/src/languageProcessing/languages/es/config/syllables.json +0 -176
  233. package/src/languageProcessing/languages/es/config/transitionWords.js +0 -40
  234. package/src/languageProcessing/languages/es/config/twoPartTransitionWords.js +0 -10
  235. package/src/languageProcessing/languages/es/config/wordComplexity.js +0 -4
  236. package/src/languageProcessing/languages/es/helpers/calculateFleschReadingScore.js +0 -18
  237. package/src/languageProcessing/languages/es/helpers/checkIfWordIsComplex.js +0 -56
  238. package/src/languageProcessing/languages/es/helpers/getClauses.js +0 -29
  239. package/src/languageProcessing/languages/es/helpers/getStemmer.js +0 -22
  240. package/src/languageProcessing/languages/es/helpers/internal/checkVerbStemModifications.js +0 -41
  241. package/src/languageProcessing/languages/es/helpers/internal/getParticiples.js +0 -35
  242. package/src/languageProcessing/languages/es/helpers/internal/stem.js +0 -793
  243. package/src/languageProcessing/languages/es/values/Clause.js +0 -47
  244. package/src/languageProcessing/languages/fa/Researcher.js +0 -47
  245. package/src/languageProcessing/languages/fa/config/firstWordExceptions.js +0 -12
  246. package/src/languageProcessing/languages/fa/config/functionWords.js +0 -122
  247. package/src/languageProcessing/languages/fa/config/internal/participles.js +0 -1429
  248. package/src/languageProcessing/languages/fa/config/sentenceLength.js +0 -3
  249. package/src/languageProcessing/languages/fa/config/transitionWords.js +0 -20
  250. package/src/languageProcessing/languages/fa/config/twoPartTransitionWords.js +0 -9
  251. package/src/languageProcessing/languages/fa/helpers/createBasicWordForms.js +0 -97
  252. package/src/languageProcessing/languages/fa/helpers/getStemmer.js +0 -13
  253. package/src/languageProcessing/languages/fa/helpers/isPassiveSentence.js +0 -14
  254. package/src/languageProcessing/languages/fr/Researcher.js +0 -46
  255. package/src/languageProcessing/languages/fr/config/firstWordExceptions.js +0 -16
  256. package/src/languageProcessing/languages/fr/config/functionWords.js +0 -281
  257. package/src/languageProcessing/languages/fr/config/internal/exceptionsParticiplesActive.js +0 -1510
  258. package/src/languageProcessing/languages/fr/config/internal/passiveVoiceAuxiliaries.js +0 -108
  259. package/src/languageProcessing/languages/fr/config/internal/passiveVoiceIrregulars.js +0 -565
  260. package/src/languageProcessing/languages/fr/config/stopWords.js +0 -119
  261. package/src/languageProcessing/languages/fr/config/syllables.json +0 -1426
  262. package/src/languageProcessing/languages/fr/config/transitionWords.js +0 -59
  263. package/src/languageProcessing/languages/fr/config/twoPartTransitionWords.js +0 -15
  264. package/src/languageProcessing/languages/fr/config/wordComplexity.js +0 -4
  265. package/src/languageProcessing/languages/fr/helpers/calculateFleschReadingScore.js +0 -18
  266. package/src/languageProcessing/languages/fr/helpers/checkIfWordIsComplex.js +0 -67
  267. package/src/languageProcessing/languages/fr/helpers/getClauses.js +0 -34
  268. package/src/languageProcessing/languages/fr/helpers/getStemmer.js +0 -22
  269. package/src/languageProcessing/languages/fr/helpers/internal/getParticiples.js +0 -72
  270. package/src/languageProcessing/languages/fr/helpers/internal/stem.js +0 -633
  271. package/src/languageProcessing/languages/fr/values/Clause.js +0 -96
  272. package/src/languageProcessing/languages/he/Researcher.js +0 -50
  273. package/src/languageProcessing/languages/he/config/firstWordExceptions.js +0 -13
  274. package/src/languageProcessing/languages/he/config/functionWords.js +0 -564
  275. package/src/languageProcessing/languages/he/config/internal/regularRootsHufal.js +0 -186
  276. package/src/languageProcessing/languages/he/config/internal/regularRootsNifal.js +0 -195
  277. package/src/languageProcessing/languages/he/config/internal/regularRootsPual.js +0 -168
  278. package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsHufal.js +0 -188
  279. package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsNifal.js +0 -197
  280. package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsPual.js +0 -170
  281. package/src/languageProcessing/languages/he/config/prefixedFunctionWords.js +0 -2
  282. package/src/languageProcessing/languages/he/config/sentenceLength.js +0 -3
  283. package/src/languageProcessing/languages/he/config/transitionWords.js +0 -28
  284. package/src/languageProcessing/languages/he/config/twoPartTransitionWords.js +0 -8
  285. package/src/languageProcessing/languages/he/helpers/createBasicWordForms.js +0 -33
  286. package/src/languageProcessing/languages/he/helpers/getStemmer.js +0 -22
  287. package/src/languageProcessing/languages/he/helpers/internal/stem.js +0 -52
  288. package/src/languageProcessing/languages/he/helpers/isPassiveSentence.js +0 -96
  289. package/src/languageProcessing/languages/he/helpers/stem.js +0 -52
  290. package/src/languageProcessing/languages/hu/Researcher.js +0 -48
  291. package/src/languageProcessing/languages/hu/config/firstWordExceptions.js +0 -31
  292. package/src/languageProcessing/languages/hu/config/functionWords.js +0 -284
  293. package/src/languageProcessing/languages/hu/config/internal/auxiliaries.js +0 -97
  294. package/src/languageProcessing/languages/hu/config/internal/morphologicalPassiveAffixes.js +0 -125
  295. package/src/languageProcessing/languages/hu/config/internal/nonPassivesInVaAndVe.js +0 -265
  296. package/src/languageProcessing/languages/hu/config/internal/odikVerbs.js +0 -273
  297. package/src/languageProcessing/languages/hu/config/internal/participles.js +0 -412
  298. package/src/languageProcessing/languages/hu/config/stopWords.js +0 -213
  299. package/src/languageProcessing/languages/hu/config/transitionWords.js +0 -42
  300. package/src/languageProcessing/languages/hu/config/twoPartTransitionWords.js +0 -34
  301. package/src/languageProcessing/languages/hu/helpers/getClauses.js +0 -25
  302. package/src/languageProcessing/languages/hu/helpers/getStemmer.js +0 -22
  303. package/src/languageProcessing/languages/hu/helpers/internal/getParticiples.js +0 -21
  304. package/src/languageProcessing/languages/hu/helpers/internal/stem.js +0 -389
  305. package/src/languageProcessing/languages/hu/helpers/isPassiveSentence.js +0 -54
  306. package/src/languageProcessing/languages/hu/values/Clause.js +0 -41
  307. package/src/languageProcessing/languages/id/Researcher.js +0 -46
  308. package/src/languageProcessing/languages/id/config/firstWordExceptions.js +0 -13
  309. package/src/languageProcessing/languages/id/config/functionWords.js +0 -202
  310. package/src/languageProcessing/languages/id/config/internal/nonPassiveVerbsStartingDi.js +0 -215
  311. package/src/languageProcessing/languages/id/config/transitionWords.js +0 -62
  312. package/src/languageProcessing/languages/id/config/twoPartTransitionWords.js +0 -13
  313. package/src/languageProcessing/languages/id/helpers/getStemmer.js +0 -22
  314. package/src/languageProcessing/languages/id/helpers/internal/stem.js +0 -462
  315. package/src/languageProcessing/languages/id/helpers/internal/stemHelpers.js +0 -78
  316. package/src/languageProcessing/languages/id/helpers/isPassiveSentence.js +0 -39
  317. package/src/languageProcessing/languages/id/helpers/splitIntoTokensCustom.js +0 -47
  318. package/src/languageProcessing/languages/it/Researcher.js +0 -48
  319. package/src/languageProcessing/languages/it/config/firstWordExceptions.js +0 -17
  320. package/src/languageProcessing/languages/it/config/functionWords.js +0 -277
  321. package/src/languageProcessing/languages/it/config/internal/passiveVoiceAuxiliaries.js +0 -98
  322. package/src/languageProcessing/languages/it/config/internal/passiveVoiceParticiples.js +0 -7197
  323. package/src/languageProcessing/languages/it/config/sentenceLength.js +0 -3
  324. package/src/languageProcessing/languages/it/config/stopWords.js +0 -57
  325. package/src/languageProcessing/languages/it/config/syllables.json +0 -573
  326. package/src/languageProcessing/languages/it/config/transitionWords.js +0 -104
  327. package/src/languageProcessing/languages/it/config/twoPartTransitionWords.js +0 -9
  328. package/src/languageProcessing/languages/it/helpers/calculateFleschReadingScore.js +0 -15
  329. package/src/languageProcessing/languages/it/helpers/getClauses.js +0 -32
  330. package/src/languageProcessing/languages/it/helpers/getStemmer.js +0 -22
  331. package/src/languageProcessing/languages/it/helpers/internal/getParticiples.js +0 -34
  332. package/src/languageProcessing/languages/it/helpers/internal/stem.js +0 -436
  333. package/src/languageProcessing/languages/it/values/Clause.js +0 -47
  334. package/src/languageProcessing/languages/ja/Researcher.js +0 -86
  335. package/src/languageProcessing/languages/ja/config/assessmentApplicabilityCharacterCount.js +0 -4
  336. package/src/languageProcessing/languages/ja/config/firstWordExceptions.js +0 -8
  337. package/src/languageProcessing/languages/ja/config/functionWords.js +0 -563
  338. package/src/languageProcessing/languages/ja/config/keyphraseLength.js +0 -16
  339. package/src/languageProcessing/languages/ja/config/metaDescriptionLength.js +0 -4
  340. package/src/languageProcessing/languages/ja/config/paragraphLength.js +0 -10
  341. package/src/languageProcessing/languages/ja/config/sentenceLength.js +0 -4
  342. package/src/languageProcessing/languages/ja/config/subheadingsTooLong.js +0 -18
  343. package/src/languageProcessing/languages/ja/config/textLength.js +0 -47
  344. package/src/languageProcessing/languages/ja/config/topicLength.js +0 -5
  345. package/src/languageProcessing/languages/ja/config/transitionWords.js +0 -354
  346. package/src/languageProcessing/languages/ja/customResearches/findKeyphraseInSEOTitle.js +0 -98
  347. package/src/languageProcessing/languages/ja/customResearches/getKeyphraseLength.js +0 -19
  348. package/src/languageProcessing/languages/ja/customResearches/getWordForms.js +0 -50
  349. package/src/languageProcessing/languages/ja/customResearches/textLength.js +0 -24
  350. package/src/languageProcessing/languages/ja/helpers/countCharacters.js +0 -19
  351. package/src/languageProcessing/languages/ja/helpers/customGetStemmer.js +0 -21
  352. package/src/languageProcessing/languages/ja/helpers/getContentWords.js +0 -21
  353. package/src/languageProcessing/languages/ja/helpers/getWords.js +0 -31
  354. package/src/languageProcessing/languages/ja/helpers/internal/SentenceTokenizer.js +0 -102
  355. package/src/languageProcessing/languages/ja/helpers/internal/createWordForms.js +0 -68
  356. package/src/languageProcessing/languages/ja/helpers/internal/determineStem.js +0 -17
  357. package/src/languageProcessing/languages/ja/helpers/matchTextWithWord.js +0 -53
  358. package/src/languageProcessing/languages/ja/helpers/matchTransitionWords.js +0 -25
  359. package/src/languageProcessing/languages/ja/helpers/memoizedSentenceTokenizer.js +0 -28
  360. package/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js +0 -20
  361. package/src/languageProcessing/languages/ja/helpers/wordsCharacterCount.js +0 -13
  362. package/src/languageProcessing/languages/nb/Researcher.js +0 -45
  363. package/src/languageProcessing/languages/nb/config/firstWordExceptions.js +0 -12
  364. package/src/languageProcessing/languages/nb/config/functionWords.js +0 -106
  365. package/src/languageProcessing/languages/nb/config/internal/participles.js +0 -3127
  366. package/src/languageProcessing/languages/nb/config/internal/passiveVoiceAuxiliaries.js +0 -15
  367. package/src/languageProcessing/languages/nb/config/stopWords.js +0 -39
  368. package/src/languageProcessing/languages/nb/config/transitionWords.js +0 -21
  369. package/src/languageProcessing/languages/nb/config/twoPartTransitionWords.js +0 -10
  370. package/src/languageProcessing/languages/nb/helpers/getClauses.js +0 -28
  371. package/src/languageProcessing/languages/nb/helpers/getStemmer.js +0 -22
  372. package/src/languageProcessing/languages/nb/helpers/internal/getParticiples.js +0 -24
  373. package/src/languageProcessing/languages/nb/helpers/internal/stem.js +0 -133
  374. package/src/languageProcessing/languages/nb/values/Clause.js +0 -43
  375. package/src/languageProcessing/languages/nl/Researcher.js +0 -48
  376. package/src/languageProcessing/languages/nl/config/firstWordExceptions.js +0 -15
  377. package/src/languageProcessing/languages/nl/config/functionWords.js +0 -233
  378. package/src/languageProcessing/languages/nl/config/internal/nonParticiples.js +0 -2515
  379. package/src/languageProcessing/languages/nl/config/internal/passiveVoiceAuxiliaries.js +0 -13
  380. package/src/languageProcessing/languages/nl/config/internal/passiveVoiceIrregulars.js +0 -474
  381. package/src/languageProcessing/languages/nl/config/keyphraseLength.js +0 -10
  382. package/src/languageProcessing/languages/nl/config/stopWords.js +0 -35
  383. package/src/languageProcessing/languages/nl/config/syllables.json +0 -343
  384. package/src/languageProcessing/languages/nl/config/transitionWords.js +0 -22
  385. package/src/languageProcessing/languages/nl/config/twoPartTransitionWords.js +0 -8
  386. package/src/languageProcessing/languages/nl/helpers/calculateFleschReadingScore.js +0 -15
  387. package/src/languageProcessing/languages/nl/helpers/getClauses.js +0 -25
  388. package/src/languageProcessing/languages/nl/helpers/getStemmer.js +0 -22
  389. package/src/languageProcessing/languages/nl/helpers/internal/checkExceptionsWithFullForms.js +0 -128
  390. package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemRegularParticiple.js +0 -324
  391. package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemSuffixes.js +0 -164
  392. package/src/languageProcessing/languages/nl/helpers/internal/determineStem.js +0 -133
  393. package/src/languageProcessing/languages/nl/helpers/internal/getParticiples.js +0 -25
  394. package/src/languageProcessing/languages/nl/helpers/internal/getStemWordsWithTAndDEnding.js +0 -183
  395. package/src/languageProcessing/languages/nl/helpers/internal/stem.js +0 -146
  396. package/src/languageProcessing/languages/nl/helpers/internal/stemModificationHelpers.js +0 -109
  397. package/src/languageProcessing/languages/nl/helpers/internal/stemTOrDFromEndOfWord.js +0 -65
  398. package/src/languageProcessing/languages/nl/values/Clause.js +0 -62
  399. package/src/languageProcessing/languages/pl/Researcher.js +0 -47
  400. package/src/languageProcessing/languages/pl/config/firstWordExceptions.js +0 -12
  401. package/src/languageProcessing/languages/pl/config/functionWords.js +0 -421
  402. package/src/languageProcessing/languages/pl/config/internal/auxiliaries.js +0 -85
  403. package/src/languageProcessing/languages/pl/config/internal/participles.js +0 -26433
  404. package/src/languageProcessing/languages/pl/config/sentenceLength.js +0 -10
  405. package/src/languageProcessing/languages/pl/config/stopWords.js +0 -36
  406. package/src/languageProcessing/languages/pl/config/transitionWords.js +0 -42
  407. package/src/languageProcessing/languages/pl/config/twoPartTransitionWords.js +0 -8
  408. package/src/languageProcessing/languages/pl/helpers/getClauses.js +0 -25
  409. package/src/languageProcessing/languages/pl/helpers/getStemmer.js +0 -22
  410. package/src/languageProcessing/languages/pl/helpers/internal/getParticiples.js +0 -18
  411. package/src/languageProcessing/languages/pl/helpers/internal/stem.js +0 -161
  412. package/src/languageProcessing/languages/pl/values/Clause.js +0 -53
  413. package/src/languageProcessing/languages/pt/Researcher.js +0 -48
  414. package/src/languageProcessing/languages/pt/config/firstWordExceptions.js +0 -15
  415. package/src/languageProcessing/languages/pt/config/functionWords.js +0 -226
  416. package/src/languageProcessing/languages/pt/config/internal/passiveVoiceAuxiliaries.js +0 -66
  417. package/src/languageProcessing/languages/pt/config/internal/passiveVoiceParticiples.js +0 -4088
  418. package/src/languageProcessing/languages/pt/config/sentenceLength.js +0 -3
  419. package/src/languageProcessing/languages/pt/config/stopWords.js +0 -50
  420. package/src/languageProcessing/languages/pt/config/syllables.json +0 -38
  421. package/src/languageProcessing/languages/pt/config/transitionWords.js +0 -34
  422. package/src/languageProcessing/languages/pt/config/twoPartTransitionWords.js +0 -9
  423. package/src/languageProcessing/languages/pt/helpers/calculateFleschReadingScore.js +0 -15
  424. package/src/languageProcessing/languages/pt/helpers/getClauses.js +0 -29
  425. package/src/languageProcessing/languages/pt/helpers/getStemmer.js +0 -22
  426. package/src/languageProcessing/languages/pt/helpers/internal/getParticiples.js +0 -35
  427. package/src/languageProcessing/languages/pt/helpers/internal/stem.js +0 -319
  428. package/src/languageProcessing/languages/pt/values/Clause.js +0 -43
  429. package/src/languageProcessing/languages/ru/Researcher.js +0 -48
  430. package/src/languageProcessing/languages/ru/config/firstWordExceptions.js +0 -14
  431. package/src/languageProcessing/languages/ru/config/fleschReadingEaseScores.js +0 -20
  432. package/src/languageProcessing/languages/ru/config/functionWords.js +0 -519
  433. package/src/languageProcessing/languages/ru/config/internal/participlesShortenedList.js +0 -2914
  434. package/src/languageProcessing/languages/ru/config/internal/passiveVoiceParticiples.js +0 -6295
  435. package/src/languageProcessing/languages/ru/config/sentenceLength.js +0 -3
  436. package/src/languageProcessing/languages/ru/config/syllables.json +0 -19
  437. package/src/languageProcessing/languages/ru/config/transitionWords.js +0 -62
  438. package/src/languageProcessing/languages/ru/config/twoPartTransitionWords.js +0 -14
  439. package/src/languageProcessing/languages/ru/helpers/calculateFleschReadingScore.js +0 -16
  440. package/src/languageProcessing/languages/ru/helpers/getStemmer.js +0 -22
  441. package/src/languageProcessing/languages/ru/helpers/internal/stem.js +0 -288
  442. package/src/languageProcessing/languages/ru/helpers/isPassiveSentence.js +0 -14
  443. package/src/languageProcessing/languages/sk/Researcher.js +0 -46
  444. package/src/languageProcessing/languages/sk/config/firstWordExceptions.js +0 -14
  445. package/src/languageProcessing/languages/sk/config/functionWords.js +0 -855
  446. package/src/languageProcessing/languages/sk/config/internal/nonPassives.js +0 -1074
  447. package/src/languageProcessing/languages/sk/config/internal/passiveVoiceAuxiliaries.js +0 -22
  448. package/src/languageProcessing/languages/sk/config/stopWords.js +0 -34
  449. package/src/languageProcessing/languages/sk/config/transitionWords.js +0 -23
  450. package/src/languageProcessing/languages/sk/config/twoPartTransitionWords.js +0 -10
  451. package/src/languageProcessing/languages/sk/helpers/getClauses.js +0 -26
  452. package/src/languageProcessing/languages/sk/helpers/getStemmer.js +0 -22
  453. package/src/languageProcessing/languages/sk/helpers/internal/getParticiples.js +0 -16
  454. package/src/languageProcessing/languages/sk/helpers/internal/stem.js +0 -319
  455. package/src/languageProcessing/languages/sk/values/Clause.js +0 -39
  456. package/src/languageProcessing/languages/sv/Researcher.js +0 -45
  457. package/src/languageProcessing/languages/sv/config/firstWordExceptions.js +0 -15
  458. package/src/languageProcessing/languages/sv/config/functionWords.js +0 -176
  459. package/src/languageProcessing/languages/sv/config/internal/passiveVerbs.js +0 -10400
  460. package/src/languageProcessing/languages/sv/config/keyphraseLength.js +0 -11
  461. package/src/languageProcessing/languages/sv/config/transitionWords.js +0 -35
  462. package/src/languageProcessing/languages/sv/config/twoPartTransitionWords.js +0 -8
  463. package/src/languageProcessing/languages/sv/helpers/getStemmer.js +0 -22
  464. package/src/languageProcessing/languages/sv/helpers/internal/stem.js +0 -152
  465. package/src/languageProcessing/languages/sv/helpers/isPassiveSentence.js +0 -14
  466. package/src/languageProcessing/languages/tr/Researcher.js +0 -44
  467. package/src/languageProcessing/languages/tr/config/firstWordExceptions.js +0 -13
  468. package/src/languageProcessing/languages/tr/config/functionWords.js +0 -116
  469. package/src/languageProcessing/languages/tr/config/internal/nonPassiveExceptions.js +0 -574
  470. package/src/languageProcessing/languages/tr/config/internal/passiveEndings.js +0 -151
  471. package/src/languageProcessing/languages/tr/config/sentenceLength.js +0 -7
  472. package/src/languageProcessing/languages/tr/config/transitionWords.js +0 -42
  473. package/src/languageProcessing/languages/tr/config/twoPartTransitionWords.js +0 -7
  474. package/src/languageProcessing/languages/tr/helpers/getStemmer.js +0 -22
  475. package/src/languageProcessing/languages/tr/helpers/internal/stem.js +0 -20
  476. package/src/languageProcessing/languages/tr/helpers/isPassiveSentence.js +0 -43
  477. package/src/languageProcessing/researches/altTagCount.js +0 -70
  478. package/src/languageProcessing/researches/countSentencesFromText.js +0 -19
  479. package/src/languageProcessing/researches/findKeyphraseInSEOTitle.js +0 -257
  480. package/src/languageProcessing/researches/findKeywordInFirstParagraph.js +0 -86
  481. package/src/languageProcessing/researches/findTransitionWords.js +0 -123
  482. package/src/languageProcessing/researches/functionWordsInKeyphrase.js +0 -44
  483. package/src/languageProcessing/researches/getAnchorsWithKeyphrase.js +0 -227
  484. package/src/languageProcessing/researches/getFleschReadingScore.js +0 -150
  485. package/src/languageProcessing/researches/getKeywordDensity.js +0 -44
  486. package/src/languageProcessing/researches/getLinkStatistics.js +0 -54
  487. package/src/languageProcessing/researches/getLinks.js +0 -18
  488. package/src/languageProcessing/researches/getLongCenterAlignedTexts.js +0 -37
  489. package/src/languageProcessing/researches/getParagraphLength.js +0 -44
  490. package/src/languageProcessing/researches/getParagraphs.js +0 -18
  491. package/src/languageProcessing/researches/getPassiveVoiceResult.js +0 -129
  492. package/src/languageProcessing/researches/getProminentWordsForInsights.js +0 -48
  493. package/src/languageProcessing/researches/getProminentWordsForInternalLinking.js +0 -119
  494. package/src/languageProcessing/researches/getSentenceBeginnings.js +0 -124
  495. package/src/languageProcessing/researches/getSubheadingTextLengths.js +0 -59
  496. package/src/languageProcessing/researches/getWordForms.js +0 -204
  497. package/src/languageProcessing/researches/h1s.js +0 -10
  498. package/src/languageProcessing/researches/imageCount.js +0 -16
  499. package/src/languageProcessing/researches/index.js +0 -5
  500. package/src/languageProcessing/researches/keyphraseDistribution.js +0 -249
  501. package/src/languageProcessing/researches/keyphraseLength.js +0 -17
  502. package/src/languageProcessing/researches/keywordCount.js +0 -134
  503. package/src/languageProcessing/researches/keywordCountInUrl.js +0 -57
  504. package/src/languageProcessing/researches/matchKeywordInSubheadings.js +0 -62
  505. package/src/languageProcessing/researches/metaDescriptionKeyword.js +0 -85
  506. package/src/languageProcessing/researches/metaDescriptionLength.js +0 -12
  507. package/src/languageProcessing/researches/pageTitleWidth.js +0 -11
  508. package/src/languageProcessing/researches/readingTime.js +0 -82
  509. package/src/languageProcessing/researches/sentences.js +0 -20
  510. package/src/languageProcessing/researches/videoCount.js +0 -32
  511. package/src/languageProcessing/researches/wordComplexity.js +0 -129
  512. package/src/languageProcessing/researches/wordCountInText.js +0 -29
  513. package/src/languageProcessing/values/Clause.js +0 -108
  514. package/src/languageProcessing/values/ProminentWord.js +0 -95
  515. package/src/languageProcessing/values/Sentence.js +0 -111
  516. package/src/languageProcessing/values/index.js +0 -9
  517. package/src/markers/addMark.js +0 -9
  518. package/src/markers/addMarkSingleWord.js +0 -32
  519. package/src/markers/index.js +0 -7
  520. package/src/markers/removeDuplicateMarks.js +0 -27
  521. package/src/markers/removeMarks.js +0 -11
  522. package/src/parse/build/build.js +0 -52
  523. package/src/parse/build/index.js +0 -10
  524. package/src/parse/build/private/adapt.js +0 -113
  525. package/src/parse/build/private/adaptAttributes.js +0 -36
  526. package/src/parse/build/private/alwaysFilterElements.js +0 -75
  527. package/src/parse/build/private/combineIntoImplicitParagraphs.js +0 -130
  528. package/src/parse/build/private/filterBeforeTokenizing.js +0 -32
  529. package/src/parse/build/private/filterHelpers.js +0 -44
  530. package/src/parse/build/private/filterTree.js +0 -42
  531. package/src/parse/build/private/getTextElementPositions.js +0 -184
  532. package/src/parse/build/private/helpers/parseClassAttribute.js +0 -9
  533. package/src/parse/build/private/isPhrasingContent.js +0 -28
  534. package/src/parse/build/private/parseBlocks.js +0 -151
  535. package/src/parse/build/private/tokenize.js +0 -74
  536. package/src/parse/language/LanguageProcessor.js +0 -74
  537. package/src/parse/structure/Heading.js +0 -26
  538. package/src/parse/structure/Node.js +0 -69
  539. package/src/parse/structure/Paragraph.js +0 -48
  540. package/src/parse/structure/Sentence.js +0 -30
  541. package/src/parse/structure/SourceCodeLocation.js +0 -41
  542. package/src/parse/structure/Text.js +0 -27
  543. package/src/parse/structure/Token.js +0 -24
  544. package/src/parse/structure/index.js +0 -16
  545. package/src/parse/traverse/findAllInTree.js +0 -58
  546. package/src/parse/traverse/index.js +0 -12
  547. package/src/parse/traverse/innerText.js +0 -26
  548. package/src/parsedPaper/ParsedPaper.js +0 -92
  549. package/src/parsedPaper/assess/TreeAssessor.js +0 -184
  550. package/src/parsedPaper/assess/assessmentListFactories.js +0 -73
  551. package/src/parsedPaper/assess/assessments/Assessment.js +0 -79
  552. package/src/parsedPaper/assess/assessments/index.js +0 -6
  553. package/src/parsedPaper/assess/assessorFactories.js +0 -104
  554. package/src/parsedPaper/assess/cornerstone/assessmentListFactories.js +0 -47
  555. package/src/parsedPaper/assess/cornerstone/index.js +0 -5
  556. package/src/parsedPaper/assess/index.js +0 -20
  557. package/src/parsedPaper/build/PaperParser.js +0 -105
  558. package/src/parsedPaper/build/linguisticParsing/Sentence.js +0 -89
  559. package/src/parsedPaper/build/linguisticParsing/SentenceTokenizer.js +0 -323
  560. package/src/parsedPaper/build/linguisticParsing/parseText.js +0 -20
  561. package/src/parsedPaper/build/tree/TreeBuilder.js +0 -75
  562. package/src/parsedPaper/build/tree/cleanup/calculateTextIndices.js +0 -190
  563. package/src/parsedPaper/build/tree/cleanup/getElementContent.js +0 -21
  564. package/src/parsedPaper/build/tree/cleanup/postParsing.js +0 -37
  565. package/src/parsedPaper/build/tree/html/HTMLTreeConverter.js +0 -230
  566. package/src/parsedPaper/build/tree/html/buildTree.js +0 -31
  567. package/src/parsedPaper/build/tree/html/htmlConstants.js +0 -37
  568. package/src/parsedPaper/build/tree/index.js +0 -14
  569. package/src/parsedPaper/build/tree/metadata/buildTree.js +0 -32
  570. package/src/parsedPaper/research/TreeResearcher.js +0 -134
  571. package/src/parsedPaper/research/index.js +0 -13
  572. package/src/parsedPaper/research/researches/Headings.js +0 -20
  573. package/src/parsedPaper/research/researches/LinkStatistics.js +0 -128
  574. package/src/parsedPaper/research/researches/Research.js +0 -50
  575. package/src/parsedPaper/research/researches/index.js +0 -1
  576. package/src/parsedPaper/structure/tree/FormattingElement.js +0 -67
  577. package/src/parsedPaper/structure/tree/SourceCodeLocation.js +0 -31
  578. package/src/parsedPaper/structure/tree/TextContainer.js +0 -85
  579. package/src/parsedPaper/structure/tree/index.js +0 -22
  580. package/src/parsedPaper/structure/tree/nodes/Heading.js +0 -26
  581. package/src/parsedPaper/structure/tree/nodes/LeafNode.js +0 -75
  582. package/src/parsedPaper/structure/tree/nodes/List.js +0 -47
  583. package/src/parsedPaper/structure/tree/nodes/ListItem.js +0 -26
  584. package/src/parsedPaper/structure/tree/nodes/MetadataMiscellaneous.js +0 -46
  585. package/src/parsedPaper/structure/tree/nodes/MetadataText.js +0 -26
  586. package/src/parsedPaper/structure/tree/nodes/Node.js +0 -154
  587. package/src/parsedPaper/structure/tree/nodes/Paragraph.js +0 -24
  588. package/src/parsedPaper/structure/tree/nodes/StructuredNode.js +0 -52
  589. package/src/parsedPaper/structure/tree/nodes/index.js +0 -21
  590. package/src/scoring/assessments/assessment.js +0 -63
  591. package/src/scoring/assessments/index.js +0 -58
  592. package/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +0 -173
  593. package/src/scoring/assessments/readability/SentenceBeginningsAssessment.js +0 -132
  594. package/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +0 -186
  595. package/src/scoring/assessments/readability/TransitionWordsAssessment.js +0 -168
  596. package/src/scoring/assessments/seo/ImageCountAssessment.js +0 -112
  597. package/src/scoring/assessments/seo/InternalLinksAssessment.js +0 -114
  598. package/src/scoring/assessments/seo/IntroductionKeywordAssessment.js +0 -110
  599. package/src/scoring/assessments/seo/KeyphraseAssessment.js +0 -104
  600. package/src/scoring/assessments/seo/KeyphraseLengthAssessment.js +0 -110
  601. package/src/scoring/assessments/seo/KeywordDensityAssessment.js +0 -116
  602. package/src/scoring/assessments/seo/MetaDescriptionKeywordAssessment.js +0 -114
  603. package/src/scoring/assessments/seo/MetaDescriptionLengthAssessment.js +0 -112
  604. package/src/scoring/assessments/seo/MetaTitleKeywordAssessment.js +0 -111
  605. package/src/scoring/assessments/seo/NumberInMetaTitleAssessment.js +0 -107
  606. package/src/scoring/assessments/seo/OutboundLinksAssessment.js +0 -111
  607. package/src/scoring/assessments/seo/PageTitleWidthAssessment.js +0 -104
  608. package/src/scoring/assessments/seo/SingleH1Assessment.js +0 -118
  609. package/src/scoring/assessments/seo/SingleTitleAssessment.js +0 -108
  610. package/src/scoring/assessments/seo/SubHeadingsKeywordAssessment.js +0 -107
  611. package/src/scoring/assessments/seo/TextImagesAssessment.js +0 -144
  612. package/src/scoring/assessments/seo/TextLengthAssessment.js +0 -100
  613. package/src/scoring/assessments/seo/UrlKeywordAssessment.js +0 -111
  614. package/src/scoring/assessments/seo/UrlLengthAssessment.js +0 -103
  615. package/src/scoring/assessors/assessor.js +0 -269
  616. package/src/scoring/assessors/avadaAssessor.js +0 -67
  617. package/src/scoring/assessors/contentAssessor.js +0 -159
  618. package/src/scoring/assessors/index.js +0 -4
  619. package/src/scoring/assessors/seoAssessor.js +0 -57
  620. package/src/scoring/helpers/assessments/checkForTooLongSentences.js +0 -13
  621. package/src/scoring/helpers/assessments/inRange.js +0 -49
  622. package/src/scoring/helpers/assessments/keyphraseLengthFactor.js +0 -10
  623. package/src/scoring/helpers/assessments/recommendedKeywordCount.js +0 -43
  624. package/src/scoring/helpers/index.js +0 -74
  625. package/src/scoring/interpreters/index.js +0 -5
  626. package/src/scoring/interpreters/scoreToRating.js +0 -31
  627. package/src/scoring/renderers/AssessorPresenter.js +0 -360
  628. package/src/scoring/scoreAggregators/ReadabilityScoreAggregator.js +0 -203
  629. package/src/scoring/scoreAggregators/SEOScoreAggregator.js +0 -54
  630. package/src/scoring/scoreAggregators/ScoreAggregator.js +0 -23
  631. package/src/scoring/scoreAggregators/index.js +0 -3
  632. package/src/values/AssessmentResult.js +0 -496
  633. package/src/values/Mark.js +0 -271
  634. package/src/values/Paper.js +0 -425
  635. package/src/values/index.js +0 -9
  636. package/src/vendor/turkishStemmer.js +0 -3435
  637. package/tsconfig.json +0 -15
@@ -1,324 +0,0 @@
1
- import {languageProcessing} from '../../../index';
2
- const {
3
- regexHelpers: {doesWordMatchRegex}
4
- } = languageProcessing;
5
-
6
- import nonParticiples from '../../config/internal/nonParticiples.js';
7
- import {modifyStem} from './stemModificationHelpers';
8
-
9
- /**
10
- * Checks whether the word is on an exception list of participles that do not have a ge- prefix. If it is found on the list,
11
- * remove only the last letter (the suffix).
12
- *
13
- * @param {array} dataExceptionListToCheck The list of the exception words.
14
- * @param {string} word The (unstemmed) word to check.
15
- *
16
- * @returns {null|string} The stemmed word or null if the word was not found on the exception list.
17
- */
18
- const checkAndStemIfExceptionWithoutGePrefix = function(dataExceptionListToCheck, word) {
19
- if (dataExceptionListToCheck.includes(word)) {
20
- return word.slice(0, -1);
21
- }
22
- return null;
23
- };
24
-
25
- /**
26
- * Checks whether a word that was detected as a participle should not have the suffix (-t or -d) removed because it is part
27
- * of the stem. For example, in the participle 'geantwoord', the -d belongs to the stem so it should not be removed.
28
- * The checks are conducted on the word without the prefix, so 'antwoord' in the case of 'geantwoord'.
29
- *
30
- * For words ending in -t, there are three checks:
31
- * 1) An exception list (exceptions to a rule) containing words where -t SHOULD be stemmed,
32
- * 2) The rule, defined using a regex with word endings where -t is part of the stem,
33
- * 3) A list of verbs with stem ending in -t, to cover cases that were not possible to find using a regex.
34
- *
35
- * For words ending in -d, we check a list of verbs with stem ending in -d.
36
- *
37
- * @param {string} wordWithoutPrefix The word without prefix(es).
38
- * @param {Object} morphologyDataNL The Dutch morphology data.
39
- *
40
- * @returns {boolean} Whether the suffix should be stemmed.
41
- */
42
- const shouldSuffixBeStemmed = function(wordWithoutPrefix, morphologyDataNL) {
43
- if (wordWithoutPrefix.endsWith('t')) {
44
- // Return true (suffix should be stemmed) if word was found on the exception list of verbs which should have the final -t stemmed.
45
- const exceptionsTShouldBeStemmed = morphologyDataNL.ambiguousTAndDEndings.wordsTShouldBeStemmed;
46
- if (exceptionsTShouldBeStemmed.includes(wordWithoutPrefix)) {
47
- return true;
48
- }
49
- // Return false (suffix should not be stemmed) if word matches the regex for stems ending in -t.
50
- if (
51
- doesWordMatchRegex(
52
- wordWithoutPrefix,
53
- morphologyDataNL.ambiguousTAndDEndings.tOrDArePartOfStem.tEnding
54
- )
55
- ) {
56
- return false;
57
- }
58
- /*
59
- * Return false (suffix should not be stemmed) if the word was found on the list of verbs with stem ending in -t (e.g. haast)
60
- * Otherwise, return true (if no checks are matched, the default condition is for -t to be stemmed).
61
- */
62
- const exceptionsTShouldNotBeStemmed =
63
- morphologyDataNL.stemExceptions.wordsNotToBeStemmedExceptions.verbs;
64
- return !exceptionsTShouldNotBeStemmed.includes(wordWithoutPrefix);
65
- }
66
- const exceptionsDShouldNotBeStemmed = morphologyDataNL.pastParticipleStemmer.doNotStemD;
67
- return !exceptionsDShouldNotBeStemmed.includes(wordWithoutPrefix);
68
- };
69
-
70
- /**
71
- * Detects whether a word is a participle of a regular verb without prefixes other than ge-. If it is, checks whether
72
- * the word is an exception that should not have the prefix or the suffix stemmed. Then stems the word accordingly
73
- * (remove prefix, suffix, or both).
74
- *
75
- * @param {Object} morphologyDataNL The Dutch morphology data.
76
- * @param {string} word The word (not stemmed) to check.
77
- *
78
- * @returns {string|null} The stem or null if no participle was matched.
79
- */
80
- const detectAndStemParticiplesWithoutPrefixes = function(morphologyDataNL, word) {
81
- const geStemTParticipleRegex = new RegExp(
82
- '^' + morphologyDataNL.pastParticipleStemmer.participleStemmingClasses[0].regex
83
- );
84
-
85
- // Check if it's a ge + stem + t/d participle.
86
- if (geStemTParticipleRegex.test(word)) {
87
- // Check if the ge- is actually part of the stem. If yes, stem only the suffix.
88
- const exception = checkAndStemIfExceptionWithoutGePrefix(
89
- morphologyDataNL.pastParticipleStemmer.doNotStemGe,
90
- word
91
- );
92
- if (exception) {
93
- return exception;
94
- }
95
-
96
- // Remove the prefix.
97
- let wordWithoutPrefix = word.slice(2);
98
-
99
- // Check if stem starts with ë. If yes, replace ë with e.
100
- if (wordWithoutPrefix.startsWith('ë')) {
101
- wordWithoutPrefix = 'e' + wordWithoutPrefix.slice(1);
102
- }
103
- // Check whether the suffix should be stemmed. If yes, remove it and return the stem.
104
- if (shouldSuffixBeStemmed(wordWithoutPrefix, morphologyDataNL)) {
105
- return wordWithoutPrefix.slice(0, -1);
106
- }
107
-
108
- return wordWithoutPrefix;
109
- }
110
-
111
- return null;
112
- };
113
-
114
- /**
115
- * Determines whether a given participle pattern combined with prefixes from a given class (separable or inseparable)
116
- * applies to a given word and if so, returns the stem.
117
- *
118
- * @param {Object} morphologyDataNL The Dutch morphology data.
119
- * @param {string} word The word (not stemmed) to check.
120
- * @param {boolean} separable Whether the prefix is separable or not.
121
- * @param {string[]} prefixes The prefixes of a certain prefix class.
122
- * @param {string} regexPart The regex part for a given class (completed to a full regex within the function).
123
- *
124
- * @returns {string|null} The stem or null if no prefixed participle was matched.
125
- */
126
- const detectAndStemParticiplePerPrefixClass = function(
127
- morphologyDataNL,
128
- word,
129
- separable,
130
- prefixes,
131
- regexPart
132
- ) {
133
- for (const currentPrefix of prefixes) {
134
- const participleRegex = new RegExp('^' + currentPrefix + regexPart);
135
-
136
- if (participleRegex.test(word)) {
137
- let wordWithoutPrefix = word.slice(currentPrefix.length - word.length);
138
- /*
139
- * After removing a separable prefix, check whether the ge- belongs to the stem (e.g. the -ge- in opgebruikt).
140
- * If it does, stem only the suffix.
141
- */
142
- if (separable) {
143
- const exception = checkAndStemIfExceptionWithoutGePrefix(
144
- morphologyDataNL.pastParticipleStemmer.doNotStemGe,
145
- wordWithoutPrefix
146
- );
147
- if (exception) {
148
- return currentPrefix + exception;
149
- }
150
- wordWithoutPrefix = wordWithoutPrefix.slice(2);
151
- }
152
- // Check whether stem starts with ë. If yes, replace ë with e.
153
- if (wordWithoutPrefix.startsWith('ë')) {
154
- wordWithoutPrefix = 'e' + wordWithoutPrefix.slice(1);
155
- }
156
-
157
- if (shouldSuffixBeStemmed(wordWithoutPrefix, morphologyDataNL)) {
158
- return currentPrefix + wordWithoutPrefix.slice(0, -1);
159
- }
160
- return currentPrefix + wordWithoutPrefix;
161
- }
162
- }
163
-
164
- return null;
165
- };
166
-
167
- /**
168
- * Detects whether a word is a regular participle of a compound verb. A compound verb has a prefix in addition to, or instead of, ge-.
169
- * For example, afgemaakt has the separable prefix af-, and beantwoord has the inseparable prefix be-. If a participle
170
- * of a compound verb is detected, it is stemmed by removing the ge- (in case of a verb with a separable prefix) and the suffix -t or -d.
171
- *
172
- * @param {Object} morphologyDataNL The Dutch morphology data.
173
- * @param {string} word The word (not stemmed) to check.
174
- *
175
- * @returns {string|null} The stem or null if no participle with prefix was matched.
176
- */
177
- const detectAndStemParticiplesWithPrefixes = function(morphologyDataNL, word) {
178
- /*
179
- * It's important to preserve order here, since the ge + stem ending in -t regex is more specific than
180
- * the stem + t regex, and therefore must be checked first.
181
- */
182
- for (const participleClass of morphologyDataNL.pastParticipleStemmer.participleStemmingClasses) {
183
- const regex = participleClass.regex;
184
- const separable = participleClass.separable;
185
-
186
- const prefixes = separable
187
- ? morphologyDataNL.pastParticipleStemmer.compoundVerbsPrefixes.separable
188
- : morphologyDataNL.pastParticipleStemmer.compoundVerbsPrefixes.inseparable;
189
-
190
- const stem = detectAndStemParticiplePerPrefixClass(
191
- morphologyDataNL,
192
- word,
193
- separable,
194
- prefixes,
195
- regex
196
- );
197
-
198
- if (stem) {
199
- return stem;
200
- }
201
- }
202
-
203
- return null;
204
- };
205
-
206
- /**
207
- * Checks whether the word is on the list of participles that do not need to be stemmed, because the participle form
208
- * is the same as the stem.
209
- *
210
- * @param {string[]} dataParticiplesSameAsStem The list of exceptions whose stem is the same as the participle.
211
- * @param {string} word The word to check.
212
- * @returns {boolean} Whether the word is found on the exception list.
213
- */
214
- const checkIfParticipleIsSameAsStem = function(dataParticiplesSameAsStem, word) {
215
- return dataParticiplesSameAsStem.includes(word);
216
- };
217
-
218
- /**
219
- * Check whether the word is on an exception list of past participles with inseparable prefixes and ending in -end.
220
- * If not, stem the word that starts with an inseparable verb prefix and ends in -end as a present participle.
221
- *
222
- * @param {array} inseparablePrefixes The list of inseparable prefixes.
223
- * @param {array} dataExceptionListToCheck The list of the exception words.
224
- * @param {array} finalChangesRules The array of regex-based rules to be applied to the stem.
225
- * @param {string} word The (unstemmed) word to check.
226
- *
227
- * @returns {null|string} The stemmed word or null if the word was found on the exception list.
228
- */
229
- const checkAndStemIfInseparablePrefixWithEndEnding = function(
230
- inseparablePrefixes,
231
- dataExceptionListToCheck,
232
- finalChangesRules,
233
- word
234
- ) {
235
- const startsWithInseparablePrefix = inseparablePrefixes
236
- .map(prefix => word.startsWith(prefix))
237
- .some(value => value === true);
238
-
239
- if (
240
- startsWithInseparablePrefix &&
241
- word.endsWith('end') &&
242
- !dataExceptionListToCheck.includes(word)
243
- ) {
244
- return modifyStem(word.slice(0, -3), finalChangesRules);
245
- }
246
- return null;
247
- };
248
-
249
- /**
250
- * Detects whether a word is a regular participle and if so, returns the stem.
251
- *
252
- * @param {Object} morphologyDataNL The Dutch morphology data.
253
- * @param {string} word The word (not stemmed) to check.
254
- *
255
- * @returns {string|null} The participle stem or null if no regular participle was matched.
256
- */
257
- export function detectAndStemRegularParticiple(morphologyDataNL, word) {
258
- // Check whether the word is not a participle. If it is not, return empty string.
259
- if (
260
- word.endsWith('heid') ||
261
- word.endsWith('teit') ||
262
- word.endsWith('tijd') ||
263
- nonParticiples.includes(word)
264
- ) {
265
- return '';
266
- }
267
-
268
- /**
269
- * Check whether the word is on an exception list of verbs whose participle is the same as the stem. If the word is found
270
- * on the list, return the stem.
271
- */
272
- if (
273
- checkIfParticipleIsSameAsStem(
274
- morphologyDataNL.pastParticipleStemmer.inseparableCompoundVerbsNotToBeStemmed,
275
- word
276
- )
277
- ) {
278
- return word;
279
- }
280
-
281
- // Check and stem if the word is a participle without any separable or inseparable prefix
282
- let stem = detectAndStemParticiplesWithoutPrefixes(morphologyDataNL, word);
283
-
284
- if (stem) {
285
- return stem;
286
- }
287
-
288
- /**
289
- * Check whether the word is on an exception list of inseparable compound verbs with a prefix that is usually separable.
290
- * If it is, remove just the suffix and return the stem.
291
- */
292
- stem = checkAndStemIfExceptionWithoutGePrefix(
293
- morphologyDataNL.pastParticipleStemmer.inseparableCompoundVerbs,
294
- word
295
- );
296
-
297
- if (stem) {
298
- return stem;
299
- }
300
-
301
- /**
302
- * Check whether the word is on an exception list of past participles with inseparable prefixes and ending in -end.
303
- * If not, stem the word that starts with an inseparable verb prefix and ends in -end as a present participle.
304
- */
305
- stem = checkAndStemIfInseparablePrefixWithEndEnding(
306
- morphologyDataNL.pastParticipleStemmer.compoundVerbsPrefixes.inseparable,
307
- morphologyDataNL.pastParticipleStemmer.pastParticiplesEndingOnEnd,
308
- morphologyDataNL.regularStemmer.stemModifications.finalChanges,
309
- word
310
- );
311
-
312
- if (stem) {
313
- return stem;
314
- }
315
-
316
- // Check and stem if the word is a participle with a separable or inseparable prefix
317
- stem = detectAndStemParticiplesWithPrefixes(morphologyDataNL, word);
318
-
319
- if (stem) {
320
- return stem;
321
- }
322
-
323
- return null;
324
- }
@@ -1,164 +0,0 @@
1
- /**
2
- * @file Dutch stemming algorithm. Adapted from:
3
- * @author:
4
- * @copyright
5
- * All rights reserved.
6
- * Implementation of the stemming algorithm from http://snowball.tartarus.org/algorithms/dutch/stemmer.html
7
- * Copyright of the algorithm is: Copyright (c) 2001, Dr Martin Porter and can be found at http://snowball.tartarus.org/license.php
8
- *
9
- * Redistribution and use in source and binary forms, with or without modification, is covered by the standard BSD license.
10
- */
11
-
12
- import { isVowelDoublingAllowed, modifyStem } from "./stemModificationHelpers";
13
-
14
- /**
15
- * Determines the start index of the R1 region.
16
- * R1 is the region after the first non-vowel following a vowel. It should include at least 3 letters.
17
- *
18
- * @param {string} word The word for which to determine the R1 region.
19
- * @returns {number} The start index of the R1 region.
20
- */
21
- const determineR1 = function( word ) {
22
- // Start with matching the first cluster that consists of a vowel and a non-vowel.
23
- let r1Index = word.search( /[aeiouyèäüëïöáéíóú][^aeiouyèäüëïöáéíóú]/ );
24
- // Then add 2 since the R1 index is the index after the first vowel & non-vowel matched with the regex.
25
- if ( r1Index !== -1 ) {
26
- r1Index += 2;
27
- }
28
-
29
- // Adjust R1 so that the region preceding it includes at least 3 letters.
30
- if ( r1Index !== -1 && r1Index < 3 ) {
31
- r1Index = 3;
32
- }
33
-
34
- return r1Index;
35
- };
36
-
37
- /**
38
- * Searches for suffixes in a word.
39
- *
40
- * @param {string} word The word in which to look for suffixes.
41
- * @param {Object} suffixStep One of the three steps of findings suffixes.
42
- * @param {number} r1Index The index of the R1 region.
43
- *
44
- * @returns {Object} The index of the suffix and extra information about whether, and how, the stem will need to be modified.
45
- */
46
- const findSuffix = function( word, suffixStep, r1Index ) {
47
- const suffixStepArray = Object.entries( suffixStep );
48
- for ( const suffixClass of suffixStepArray ) {
49
- const suffixes = suffixClass[ 1 ].suffixes;
50
-
51
- const matchedRegex = suffixes.find( suffixRegex => new RegExp( suffixRegex ).exec( word ) );
52
-
53
- if ( matchedRegex ) {
54
- const matched = new RegExp( matchedRegex ).exec( word );
55
- const suffix = matched[ matched.length - 1 ];
56
- const suffixIndex = word.lastIndexOf( suffix );
57
-
58
- if ( r1Index !== -1 && suffixIndex >= r1Index ) {
59
- return {
60
- suffixIndex: suffixIndex,
61
- stemModification: suffixClass[ 1 ].stemModification,
62
- };
63
- }
64
- }
65
- }
66
- };
67
-
68
- /**
69
- * Deletes the suffix and modifies the stem according to the required modifications.
70
- *
71
- * @param {string} word The word from which to delete the suffix.
72
- * @param {Object} suffixStep One of the three steps of deleting a suffix.
73
- * @param {number} suffixIndex The index of the found suffix.
74
- * @param {string} stemModification The type of stem modification that needs to be done.
75
- * @param {Object} morphologyDataNL The Dutch morphology data file.
76
- * @returns {string} The stemmed and modified word.
77
- */
78
- const deleteSuffixAndModifyStem = function( word, suffixStep, suffixIndex, stemModification, morphologyDataNL ) {
79
- if ( stemModification === "hedenToHeid" ) {
80
- return modifyStem( word, morphologyDataNL.regularStemmer.stemModifications.hedenToHeid );
81
- }
82
- word = word.substring( 0, suffixIndex );
83
- if ( stemModification === "changeIedtoId" ) {
84
- return modifyStem( word, morphologyDataNL.regularStemmer.stemModifications.iedToId );
85
- } else if ( stemModification === "changeInktoIng" && word.endsWith( "ink" ) ) {
86
- return modifyStem( word, morphologyDataNL.regularStemmer.stemModifications.inkToIng );
87
- } else if (
88
- stemModification === "vowelDoubling" &&
89
- isVowelDoublingAllowed(
90
- word,
91
- morphologyDataNL.regularStemmer.stemModifications.exceptionsStemModifications,
92
- morphologyDataNL.pastParticipleStemmer.compoundVerbsPrefixes
93
- )
94
- ) {
95
- return modifyStem( word, morphologyDataNL.regularStemmer.stemModifications.doubleVowel );
96
- }
97
- return word;
98
- };
99
-
100
- /**
101
- * Finds and deletes the suffix found in a particular step, and modifies the stem.
102
- *
103
- * @param {string} word The word for which to find and delete a suffix.
104
- * @param {Object} suffixStep One of the three suffix steps.
105
- * @param {number} r1Index The index of the R1 region.
106
- * @param {Object} morphologyDataNL The Dutch morphology data file.
107
- * @returns {string} The word with the deleted suffix.
108
- */
109
- const findAndDeleteSuffix = function( word, suffixStep, r1Index, morphologyDataNL ) {
110
- const foundSuffix = findSuffix( word, suffixStep, r1Index );
111
- if ( typeof foundSuffix !== "undefined" ) {
112
- word = deleteSuffixAndModifyStem( word, suffixStep, foundSuffix.suffixIndex, foundSuffix.stemModification, morphologyDataNL );
113
- }
114
-
115
- return word;
116
- };
117
-
118
- /**
119
- * Runs through three stemming steps that process different kinds of suffixes, determines if there is a valid suffix
120
- * within the R1 region that can be deleted for stemming and deletes it, as well as applies suffix-specific stem
121
- * modifications if needed.
122
- *
123
- * @param {string} word The word for which to find and delete suffixes.
124
- * @param {Object} suffixSteps All of the suffix steps.
125
- * @param {number} r1Index The index of the R1 region
126
- * @param {Object} morphologyDataNL The Dutch morphology data file.
127
- * @returns {string} The word with the delete suffix.
128
- */
129
- const findAndDeleteSuffixes = function( word, suffixSteps, r1Index, morphologyDataNL ) {
130
- const suffixStepsArray = Object.entries( suffixSteps );
131
- for ( const suffixStep of suffixStepsArray ) {
132
- word = findAndDeleteSuffix( word, suffixStep[ 1 ], r1Index, morphologyDataNL );
133
- }
134
-
135
- return word;
136
- };
137
-
138
- /**
139
- * Search for suffixes in a word, remove them if found, and modify the stem if needed.
140
- *
141
- * @param {string} word The word to stem.
142
- * @param {Object} morphologyDataNL The Dutch morphology data file.
143
- *
144
- * @returns {string} The stemmed word.
145
- */
146
- export default function detectAndStemSuffixes( word, morphologyDataNL ) {
147
- /*
148
- * Put i and y in between vowels, initial y, and y after a vowel into upper case. This is because they should
149
- * be treated as consonants so we want to differentiate them from other i's and y's when matching regexes.
150
- */
151
- word = modifyStem( word, morphologyDataNL.regularStemmer.stemModifications.IAndYToUppercase );
152
-
153
- // Find the start index of the R1 region.
154
- const r1Index = determineR1( word );
155
-
156
- // Import the suffixes from all three steps.
157
- const suffixSteps = morphologyDataNL.regularStemmer.suffixes;
158
-
159
- // Run through the three steps of possible de-suffixation.
160
- word = findAndDeleteSuffixes( word, suffixSteps, r1Index, morphologyDataNL );
161
-
162
- // Do final modifications to the stem.
163
- return modifyStem( word, morphologyDataNL.regularStemmer.stemModifications.finalChanges );
164
- }
@@ -1,133 +0,0 @@
1
- import {flatten} from 'lodash';
2
- import {languageProcessing} from '../../../index';
3
- const {
4
- flattenSortLength,
5
- exceptionListHelpers: {checkExceptionListWithTwoStems}
6
- } = languageProcessing;
7
-
8
- import stem from './stem';
9
- import {stemTOrDFromEndOfWord} from './stemTOrDFromEndOfWord';
10
-
11
- /**
12
- * Checks if the word checked is in the list of strong verbs exceptions. If it is, only return the first stem from the stem set.
13
- * E.g. stems: help, hielp, geholp -> the stem returned would be "help".
14
- *
15
- * @param {Object} strongVerbsLists The exception lists of strong verbs.
16
- * @param {string} stemmedWord The word to check.
17
- * @returns {string} The unique stem.
18
- */
19
- const checkStrongVerbExceptionList = function(strongVerbsLists, stemmedWord) {
20
- for (const key of Object.keys(strongVerbsLists)) {
21
- for (const stemsSet of strongVerbsLists[key]) {
22
- const stems = flatten(Object.values(stemsSet));
23
- if (stems.includes(stemmedWord)) {
24
- return stems[0];
25
- }
26
- }
27
- }
28
- };
29
-
30
- /**
31
- * Checks if the word checked is in the list of strong verbs exceptions. Before checking, see if the word has a prefix and delete it if it does.
32
- * If the stem after prefix deletion is in the verb exception list, only return the first stem from the stem set and attach back the prefix.
33
- * E.g. words to check: verhielp, stem set: help, hielp, geholp -> the stem returned would be "verhelp".
34
- *
35
- * @param {Object} morphologyDataNL The Dutch morphology data file.
36
- * @param {string} stemmedWord The word to check.
37
- *
38
- * @returns {string} The unique stem.
39
- */
40
- const findStemOnVerbExceptionList = function(morphologyDataNL, stemmedWord) {
41
- const prefixes = flattenSortLength(morphologyDataNL.pastParticipleStemmer.compoundVerbsPrefixes);
42
- // Check whether the inputted stem is started with one of the separable compound prefixes
43
- let foundPrefix = prefixes.find(prefix => stemmedWord.startsWith(prefix));
44
- const doNotStemPrefix =
45
- morphologyDataNL.stemExceptions.stemmingExceptionsWithMultipleStems.strongAndIrregularVerbs
46
- .doNotStemPrefix;
47
- const doNotStemPrefixException = doNotStemPrefix.find(exception =>
48
- stemmedWord.endsWith(exception)
49
- );
50
- let stemmedWordWithoutPrefix = '';
51
-
52
- // Check whether the stemmedWord is in the list of strong verbs starting with be-, ont- or ver- that do not need to be stemmed.
53
- if (doNotStemPrefixException) {
54
- // Reset foundPrefix so that it won't be attached when the stem is found in the verb exception list.
55
- foundPrefix = null;
56
- // If the inputted stem is started with one of the separable compound prefixes, the prefix needs to be deleted for now.
57
- } else if (foundPrefix) {
58
- // Delete the prefix for now.
59
- stemmedWordWithoutPrefix = stemmedWord.slice(foundPrefix.length, stemmedWord.length);
60
- // At least 3 characters left after prefix deletion so that e.g. "be" is not found in the stem "berg".
61
- if (stemmedWordWithoutPrefix.length > 2) {
62
- stemmedWord = stemmedWordWithoutPrefix;
63
- } else {
64
- // Reset foundPrefix so that it won't be attached when the stem is found in the verb exception list.
65
- foundPrefix = null;
66
- }
67
- }
68
-
69
- const strongVerbExceptions =
70
- morphologyDataNL.stemExceptions.stemmingExceptionsWithMultipleStems.strongAndIrregularVerbs
71
- .strongVerbStems;
72
- // Find stem strong verbs lists.
73
- const strongVerbsExceptionLists = [
74
- strongVerbExceptions.irregularStrongVerbs,
75
- strongVerbExceptions.regularStrongVerbs,
76
- strongVerbExceptions.bothRegularAndIrregularStrongVerbs
77
- ];
78
- for (let i = 0; i < strongVerbsExceptionLists.length; i++) {
79
- const checkIfWordIsException = checkStrongVerbExceptionList(
80
- strongVerbsExceptionLists[i],
81
- stemmedWord
82
- );
83
- if (checkIfWordIsException) {
84
- // If the word checked had a prefix previously, attach it back.
85
- if (foundPrefix) {
86
- return (
87
- foundPrefix + checkStrongVerbExceptionList(strongVerbsExceptionLists[i], stemmedWord)
88
- );
89
- }
90
- // If the word checked did not have a prefix previously, only return the first stem.
91
- return checkStrongVerbExceptionList(strongVerbsExceptionLists[i], stemmedWord);
92
- }
93
- }
94
- };
95
-
96
- /**
97
- * Return the unique stem for a given Dutch input word.
98
- *
99
- * @param {string} word The word to be checked.
100
- * @param {Object} morphologyDataNL The Dutch data file.
101
- *
102
- * @returns {string} The unique stem.
103
- */
104
- export default function determineStem(word, morphologyDataNL) {
105
- const stemmedWord = stem(word, morphologyDataNL);
106
-
107
- // Check whether the stemmed word is on an exception list of words with multiple stems. If it is, return the canonical stem.
108
- let stemFromExceptionList = checkExceptionListWithTwoStems(
109
- morphologyDataNL.stemExceptions.stemmingExceptionsWithMultipleStems
110
- .stemmingExceptionsWithTwoStems,
111
- stemmedWord
112
- );
113
- if (stemFromExceptionList) {
114
- return stemFromExceptionList;
115
- }
116
- stemFromExceptionList = findStemOnVerbExceptionList(morphologyDataNL, stemmedWord);
117
- if (stemFromExceptionList) {
118
- return stemFromExceptionList;
119
- }
120
-
121
- // If the stemmed word ends in -t or -d, check whether it should be stemmed further, and return the stem with or without the -t/d.
122
- const ambiguousEndings = morphologyDataNL.ambiguousTAndDEndings.tAndDEndings;
123
- for (const ending of ambiguousEndings) {
124
- if (stemmedWord.endsWith(ending)) {
125
- const stemmedWordWithoutTOrD = stemTOrDFromEndOfWord(morphologyDataNL, stemmedWord, word);
126
- if (stemmedWordWithoutTOrD) {
127
- return stemmedWordWithoutTOrD;
128
- }
129
- }
130
- }
131
-
132
- return stemmedWord;
133
- }
@@ -1,25 +0,0 @@
1
- import {includes} from 'lodash';
2
- import {languageProcessing} from '../../../index';
3
- const {getWords, matchRegularParticiples} = languageProcessing;
4
-
5
- import irregularParticiples from '../../config/internal/passiveVoiceIrregulars';
6
-
7
- /**
8
- * Creates an array of participle for the participles found in a clause.
9
- *
10
- * @param {string} clauseText The clause text to find participles in.
11
- *
12
- * @returns {Array} The list with participle.
13
- */
14
- export default function getParticiples(clauseText) {
15
- const words = getWords(clauseText);
16
- const regexes = [
17
- /^(ge|be|ont|ver|her|er)\S+([dt])($|[ \n\r\t.,'()"+\-;!?:/»«‹›<>])/gi,
18
- /^(aan|af|bij|binnen|los|mee|na|neer|om|onder|samen|terug|tegen|toe|uit|vast)(ge)\S+([dtn])($|[ \n\r\t.,'()"+\-;!?:/»«‹›<>])/gi
19
- ];
20
-
21
- return words.filter(
22
- word =>
23
- matchRegularParticiples(word, regexes).length !== 0 || includes(irregularParticiples, word)
24
- );
25
- }