axyseo 2.0.0-alpha.0.0.4 → 2.0.0-alpha.0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -2
- package/.browserslistrc +0 -1
- package/.gitattributes +0 -1
- package/babel.config.js +0 -3
- package/eslint.config.mjs +0 -119
- package/src/bundledPlugins/index.js +0 -5
- package/src/bundledPlugins/previouslyUsedKeywords.js +0 -192
- package/src/config/diacritics.js +0 -106
- package/src/config/getTransliterations.js +0 -1447
- package/src/config/transliterationsWPstyle.js +0 -774
- package/src/config/wordBoundaries.js +0 -23
- package/src/config/wordBoundariesWithoutPunctuation.js +0 -9
- package/src/const/analysis.js +0 -41
- package/src/errors/invalidType.js +0 -14
- package/src/errors/missingArgument.js +0 -14
- package/src/helpers/createMeasurementElement.js +0 -40
- package/src/helpers/domManipulation.js +0 -65
- package/src/helpers/errors.js +0 -26
- package/src/helpers/factory.js +0 -219
- package/src/helpers/formatNumber.js +0 -12
- package/src/helpers/formatString.js +0 -33
- package/src/helpers/getLanguagesWithWordComplexity.js +0 -8
- package/src/helpers/getLanguagesWithWordFormSupport.js +0 -11
- package/src/helpers/getWordComplexityConfig.js +0 -20
- package/src/helpers/getWordComplexityHelper.js +0 -20
- package/src/helpers/htmlEntities.js +0 -41
- package/src/helpers/includesAny.js +0 -19
- package/src/helpers/index.js +0 -127
- package/src/helpers/shortlinker/Shortlinker.js +0 -75
- package/src/helpers/shortlinker/index.js +0 -1
- package/src/helpers/shortlinker/singleton.js +0 -68
- package/src/helpers/types.js +0 -34
- package/src/index.js +0 -60
- package/src/languageProcessing/AbstractResearcher.js +0 -366
- package/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +0 -125
- package/src/languageProcessing/helpers/html/getFieldsToMark.js +0 -29
- package/src/languageProcessing/helpers/html/getSubheadingTexts.js +0 -47
- package/src/languageProcessing/helpers/html/getSubheadings.js +0 -95
- package/src/languageProcessing/helpers/html/html.js +0 -176
- package/src/languageProcessing/helpers/html/htmlParser.js +0 -145
- package/src/languageProcessing/helpers/html/matchParagraphs.js +0 -62
- package/src/languageProcessing/helpers/html/normalizeHTML.js +0 -16
- package/src/languageProcessing/helpers/image/getAltAttribute.js +0 -20
- package/src/languageProcessing/helpers/image/getImagesInTree.js +0 -16
- package/src/languageProcessing/helpers/image/imageInText.js +0 -19
- package/src/languageProcessing/helpers/index.js +0 -12
- package/src/languageProcessing/helpers/language/getLanguage.js +0 -9
- package/src/languageProcessing/helpers/link/checkNofollow.js +0 -38
- package/src/languageProcessing/helpers/link/getAnchorsFromText.js +0 -32
- package/src/languageProcessing/helpers/link/getLinkType.js +0 -32
- package/src/languageProcessing/helpers/match/findKeywordFormsInString.js +0 -101
- package/src/languageProcessing/helpers/match/isDoubleQuoted.js +0 -13
- package/src/languageProcessing/helpers/match/matchTextWithArray.js +0 -36
- package/src/languageProcessing/helpers/match/matchTextWithTransliteration.js +0 -58
- package/src/languageProcessing/helpers/match/matchTextWithWord.js +0 -45
- package/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +0 -164
- package/src/languageProcessing/helpers/match/processExactMatchRequest.js +0 -20
- package/src/languageProcessing/helpers/morphology/baseStemmer.js +0 -11
- package/src/languageProcessing/helpers/morphology/buildFormRule.js +0 -19
- package/src/languageProcessing/helpers/morphology/buildTopicStems.js +0 -169
- package/src/languageProcessing/helpers/morphology/createRulesFromArrays.js +0 -45
- package/src/languageProcessing/helpers/morphology/exceptionListHelpers.js +0 -65
- package/src/languageProcessing/helpers/morphology/findMatchingEndingInArray.js +0 -24
- package/src/languageProcessing/helpers/morphology/flattenSortLength.js +0 -14
- package/src/languageProcessing/helpers/morphology/getAllWordsFromPaper.js +0 -39
- package/src/languageProcessing/helpers/morphology/regexHelpers.js +0 -44
- package/src/languageProcessing/helpers/morphology/stemHelpers.js +0 -38
- package/src/languageProcessing/helpers/morphology/stemPrefixedFunctionWords.js +0 -31
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/directPrecedenceException.js +0 -36
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/getClausesSplitOnStopWords.js +0 -113
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/nonDirectParticiplePrecedenceException.js +0 -45
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/getClauses.js +0 -231
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/getIndicesWithRegex.js +0 -20
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/matchRegularParticiples.js +0 -23
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/precedenceException.js +0 -40
- package/src/languageProcessing/helpers/prominentWords/determineProminentWords.js +0 -238
- package/src/languageProcessing/helpers/regex/createRegexFromArray.js +0 -35
- package/src/languageProcessing/helpers/regex/createRegexFromDoubleArray.js +0 -34
- package/src/languageProcessing/helpers/regex/createWordRegex.js +0 -30
- package/src/languageProcessing/helpers/regex/matchStringWithRegex.js +0 -19
- package/src/languageProcessing/helpers/regex/searchAndReplaceWithOneRegex.js +0 -14
- package/src/languageProcessing/helpers/sanitize/doubleQuotes.js +0 -12
- package/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +0 -131
- package/src/languageProcessing/helpers/sanitize/mergeListItems.js +0 -24
- package/src/languageProcessing/helpers/sanitize/parseSynonyms.js +0 -20
- package/src/languageProcessing/helpers/sanitize/quotes.js +0 -46
- package/src/languageProcessing/helpers/sanitize/removeEmailAddresses.js +0 -12
- package/src/languageProcessing/helpers/sanitize/removePunctuation.js +0 -64
- package/src/languageProcessing/helpers/sanitize/removePunctuationExceptQuotes.js +0 -18
- package/src/languageProcessing/helpers/sanitize/removeSentenceTerminators.js +0 -13
- package/src/languageProcessing/helpers/sanitize/removeURLs.js +0 -13
- package/src/languageProcessing/helpers/sanitize/sanitizeLineBreakTag.js +0 -11
- package/src/languageProcessing/helpers/sanitize/sanitizeString.js +0 -18
- package/src/languageProcessing/helpers/sanitize/stripHTMLTags.js +0 -57
- package/src/languageProcessing/helpers/sanitize/stripNonTextTags.js +0 -15
- package/src/languageProcessing/helpers/sanitize/stripNumbers.js +0 -21
- package/src/languageProcessing/helpers/sanitize/stripSpaces.js +0 -23
- package/src/languageProcessing/helpers/sanitize/stripWordBoundaries.js +0 -65
- package/src/languageProcessing/helpers/sanitize/unifyWhitespace.js +0 -61
- package/src/languageProcessing/helpers/sentence/SentenceTokenizer.js +0 -640
- package/src/languageProcessing/helpers/sentence/countSentences.js +0 -20
- package/src/languageProcessing/helpers/sentence/getSentences.js +0 -65
- package/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +0 -55
- package/src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer.js +0 -28
- package/src/languageProcessing/helpers/sentence/sentencesLength.js +0 -31
- package/src/languageProcessing/helpers/syllables/DeviationFragment.js +0 -112
- package/src/languageProcessing/helpers/syllables/countSyllables.js +0 -182
- package/src/languageProcessing/helpers/syllables/syllableCountIterator.js +0 -56
- package/src/languageProcessing/helpers/syllables/syllableCountStep.js +0 -68
- package/src/languageProcessing/helpers/transform/transformWordsWithHyphens.js +0 -17
- package/src/languageProcessing/helpers/transliterate/replaceDiacritics.js +0 -22
- package/src/languageProcessing/helpers/transliterate/specialCharacterMappings.js +0 -214
- package/src/languageProcessing/helpers/transliterate/transliterate.js +0 -20
- package/src/languageProcessing/helpers/transliterate/transliterateWPstyle.js +0 -21
- package/src/languageProcessing/helpers/url/parseSlug.js +0 -10
- package/src/languageProcessing/helpers/url/url.js +0 -172
- package/src/languageProcessing/helpers/word/addWordboundary.js +0 -37
- package/src/languageProcessing/helpers/word/areWordsInSentence.js +0 -16
- package/src/languageProcessing/helpers/word/countMetaDescriptionLength.js +0 -18
- package/src/languageProcessing/helpers/word/countWords.js +0 -14
- package/src/languageProcessing/helpers/word/createPunctuationTokens.js +0 -42
- package/src/languageProcessing/helpers/word/filterWordsFromArray.js +0 -15
- package/src/languageProcessing/helpers/word/followsIndex.js +0 -25
- package/src/languageProcessing/helpers/word/getAllWordsFromTree.js +0 -23
- package/src/languageProcessing/helpers/word/getWords.js +0 -43
- package/src/languageProcessing/helpers/word/includesIndex.js +0 -30
- package/src/languageProcessing/helpers/word/indices.js +0 -146
- package/src/languageProcessing/helpers/word/markWordsInSentences.js +0 -173
- package/src/languageProcessing/helpers/word/matchWordInSentence.js +0 -61
- package/src/languageProcessing/helpers/word/splitIntoTokens.js +0 -46
- package/src/languageProcessing/index.js +0 -91
- package/src/languageProcessing/languages/_default/Researcher.js +0 -34
- package/src/languageProcessing/languages/_default/helpers/getStemmer.js +0 -11
- package/src/languageProcessing/languages/ar/Researcher.js +0 -46
- package/src/languageProcessing/languages/ar/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/ar/config/functionWords.js +0 -329
- package/src/languageProcessing/languages/ar/config/internal/passiveVerbsWithLongVowel.js +0 -570
- package/src/languageProcessing/languages/ar/config/prefixedFunctionWords.js +0 -5
- package/src/languageProcessing/languages/ar/config/transitionWords.js +0 -19
- package/src/languageProcessing/languages/ar/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/ar/helpers/createBasicWordForms.js +0 -32
- package/src/languageProcessing/languages/ar/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/ar/helpers/internal/stem.js +0 -632
- package/src/languageProcessing/languages/ar/helpers/isPassiveSentence.js +0 -33
- package/src/languageProcessing/languages/ca/Researcher.js +0 -43
- package/src/languageProcessing/languages/ca/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/ca/config/transitionWords.js +0 -31
- package/src/languageProcessing/languages/ca/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/ca/helpers/getStemmer.js +0 -11
- package/src/languageProcessing/languages/cs/Researcher.js +0 -44
- package/src/languageProcessing/languages/cs/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/cs/config/functionWords.js +0 -121
- package/src/languageProcessing/languages/cs/config/internal/passiveVoiceAuxiliaries.js +0 -38
- package/src/languageProcessing/languages/cs/config/internal/passiveVoiceEndings.js +0 -54
- package/src/languageProcessing/languages/cs/config/stopWords.js +0 -42
- package/src/languageProcessing/languages/cs/config/transitionWords.js +0 -26
- package/src/languageProcessing/languages/cs/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/cs/helpers/getClauses.js +0 -26
- package/src/languageProcessing/languages/cs/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/cs/helpers/internal/getParticiples.js +0 -16
- package/src/languageProcessing/languages/cs/helpers/internal/stem.js +0 -499
- package/src/languageProcessing/languages/cs/values/Clause.js +0 -34
- package/src/languageProcessing/languages/de/Researcher.js +0 -52
- package/src/languageProcessing/languages/de/config/firstWordExceptions.js +0 -17
- package/src/languageProcessing/languages/de/config/functionWords.js +0 -303
- package/src/languageProcessing/languages/de/config/internal/exceptionsParticiplesActive.js +0 -2231
- package/src/languageProcessing/languages/de/config/internal/passiveVoiceAuxiliaries.js +0 -96
- package/src/languageProcessing/languages/de/config/internal/passiveVoiceIrregulars.js +0 -368
- package/src/languageProcessing/languages/de/config/internal/passiveVoiceRegex.js +0 -72
- package/src/languageProcessing/languages/de/config/keyphraseLength.js +0 -11
- package/src/languageProcessing/languages/de/config/stopWords.js +0 -67
- package/src/languageProcessing/languages/de/config/syllables.json +0 -460
- package/src/languageProcessing/languages/de/config/transitionWords.js +0 -31
- package/src/languageProcessing/languages/de/config/twoPartTransitionWords.js +0 -12
- package/src/languageProcessing/languages/de/config/wordComplexity.js +0 -4
- package/src/languageProcessing/languages/de/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/de/helpers/checkIfWordIsComplex.js +0 -40
- package/src/languageProcessing/languages/de/helpers/checkIfWordIsFunction.js +0 -15
- package/src/languageProcessing/languages/de/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/de/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/de/helpers/internal/SentenceTokenizer.js +0 -31
- package/src/languageProcessing/languages/de/helpers/internal/detectAndStemRegularParticiple.js +0 -128
- package/src/languageProcessing/languages/de/helpers/internal/determineStem.js +0 -128
- package/src/languageProcessing/languages/de/helpers/internal/getParticiples.js +0 -40
- package/src/languageProcessing/languages/de/helpers/internal/stem.js +0 -215
- package/src/languageProcessing/languages/de/helpers/memoizedSentenceTokenizer.js +0 -28
- package/src/languageProcessing/languages/de/values/Clause.js +0 -85
- package/src/languageProcessing/languages/el/Researcher.js +0 -46
- package/src/languageProcessing/languages/el/config/firstWordExceptions.js +0 -47
- package/src/languageProcessing/languages/el/config/functionWords.js +0 -116
- package/src/languageProcessing/languages/el/config/internal/auxiliaries.js +0 -19
- package/src/languageProcessing/languages/el/config/internal/morphologicalPassiveSuffixes.js +0 -87
- package/src/languageProcessing/languages/el/config/internal/nonPassiveVerbStems.js +0 -138
- package/src/languageProcessing/languages/el/config/stopWords.js +0 -854
- package/src/languageProcessing/languages/el/config/transitionWords.js +0 -26
- package/src/languageProcessing/languages/el/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/el/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/el/helpers/getStemmer.js +0 -21
- package/src/languageProcessing/languages/el/helpers/internal/getParticiples.js +0 -20
- package/src/languageProcessing/languages/el/helpers/internal/stem.js +0 -368
- package/src/languageProcessing/languages/el/helpers/isPassiveSentence.js +0 -38
- package/src/languageProcessing/languages/el/values/Clause.js +0 -37
- package/src/languageProcessing/languages/en/Researcher.js +0 -46
- package/src/languageProcessing/languages/en/config/abbreviations.js +0 -55
- package/src/languageProcessing/languages/en/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/en/config/functionWords.js +0 -186
- package/src/languageProcessing/languages/en/config/internal/passiveVoiceAuxiliaries.js +0 -44
- package/src/languageProcessing/languages/en/config/internal/passiveVoiceIrregulars.js +0 -354
- package/src/languageProcessing/languages/en/config/internal/passiveVoiceNonVerbEndingEd.js +0 -3047
- package/src/languageProcessing/languages/en/config/regularParticiplesRegex.js +0 -5
- package/src/languageProcessing/languages/en/config/stopWords.js +0 -52
- package/src/languageProcessing/languages/en/config/syllables.json +0 -86
- package/src/languageProcessing/languages/en/config/transitionWords.js +0 -48
- package/src/languageProcessing/languages/en/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/en/config/wordComplexity.js +0 -5
- package/src/languageProcessing/languages/en/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/en/helpers/checkIfWordIsComplex.js +0 -43
- package/src/languageProcessing/languages/en/helpers/getClauses.js +0 -49
- package/src/languageProcessing/languages/en/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/en/helpers/internal/determineStem.js +0 -178
- package/src/languageProcessing/languages/en/helpers/internal/getAdjectiveStem.js +0 -162
- package/src/languageProcessing/languages/en/helpers/internal/getParticiples.js +0 -25
- package/src/languageProcessing/languages/en/helpers/internal/getVerbStem.js +0 -237
- package/src/languageProcessing/languages/en/values/Clause.js +0 -68
- package/src/languageProcessing/languages/es/Researcher.js +0 -48
- package/src/languageProcessing/languages/es/config/firstWordExceptions.js +0 -16
- package/src/languageProcessing/languages/es/config/functionWords.js +0 -321
- package/src/languageProcessing/languages/es/config/internal/passiveVoiceAuxiliaries.js +0 -60
- package/src/languageProcessing/languages/es/config/internal/passiveVoiceParticiples.js +0 -7327
- package/src/languageProcessing/languages/es/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/es/config/stopWords.js +0 -33
- package/src/languageProcessing/languages/es/config/syllables.json +0 -176
- package/src/languageProcessing/languages/es/config/transitionWords.js +0 -40
- package/src/languageProcessing/languages/es/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/es/config/wordComplexity.js +0 -4
- package/src/languageProcessing/languages/es/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/es/helpers/checkIfWordIsComplex.js +0 -56
- package/src/languageProcessing/languages/es/helpers/getClauses.js +0 -29
- package/src/languageProcessing/languages/es/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/es/helpers/internal/checkVerbStemModifications.js +0 -41
- package/src/languageProcessing/languages/es/helpers/internal/getParticiples.js +0 -35
- package/src/languageProcessing/languages/es/helpers/internal/stem.js +0 -793
- package/src/languageProcessing/languages/es/values/Clause.js +0 -47
- package/src/languageProcessing/languages/fa/Researcher.js +0 -47
- package/src/languageProcessing/languages/fa/config/firstWordExceptions.js +0 -12
- package/src/languageProcessing/languages/fa/config/functionWords.js +0 -122
- package/src/languageProcessing/languages/fa/config/internal/participles.js +0 -1429
- package/src/languageProcessing/languages/fa/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/fa/config/transitionWords.js +0 -20
- package/src/languageProcessing/languages/fa/config/twoPartTransitionWords.js +0 -9
- package/src/languageProcessing/languages/fa/helpers/createBasicWordForms.js +0 -97
- package/src/languageProcessing/languages/fa/helpers/getStemmer.js +0 -13
- package/src/languageProcessing/languages/fa/helpers/isPassiveSentence.js +0 -14
- package/src/languageProcessing/languages/fr/Researcher.js +0 -46
- package/src/languageProcessing/languages/fr/config/firstWordExceptions.js +0 -16
- package/src/languageProcessing/languages/fr/config/functionWords.js +0 -281
- package/src/languageProcessing/languages/fr/config/internal/exceptionsParticiplesActive.js +0 -1510
- package/src/languageProcessing/languages/fr/config/internal/passiveVoiceAuxiliaries.js +0 -108
- package/src/languageProcessing/languages/fr/config/internal/passiveVoiceIrregulars.js +0 -565
- package/src/languageProcessing/languages/fr/config/stopWords.js +0 -119
- package/src/languageProcessing/languages/fr/config/syllables.json +0 -1426
- package/src/languageProcessing/languages/fr/config/transitionWords.js +0 -59
- package/src/languageProcessing/languages/fr/config/twoPartTransitionWords.js +0 -15
- package/src/languageProcessing/languages/fr/config/wordComplexity.js +0 -4
- package/src/languageProcessing/languages/fr/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/fr/helpers/checkIfWordIsComplex.js +0 -67
- package/src/languageProcessing/languages/fr/helpers/getClauses.js +0 -34
- package/src/languageProcessing/languages/fr/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/fr/helpers/internal/getParticiples.js +0 -72
- package/src/languageProcessing/languages/fr/helpers/internal/stem.js +0 -633
- package/src/languageProcessing/languages/fr/values/Clause.js +0 -96
- package/src/languageProcessing/languages/he/Researcher.js +0 -50
- package/src/languageProcessing/languages/he/config/firstWordExceptions.js +0 -13
- package/src/languageProcessing/languages/he/config/functionWords.js +0 -564
- package/src/languageProcessing/languages/he/config/internal/regularRootsHufal.js +0 -186
- package/src/languageProcessing/languages/he/config/internal/regularRootsNifal.js +0 -195
- package/src/languageProcessing/languages/he/config/internal/regularRootsPual.js +0 -168
- package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsHufal.js +0 -188
- package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsNifal.js +0 -197
- package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsPual.js +0 -170
- package/src/languageProcessing/languages/he/config/prefixedFunctionWords.js +0 -2
- package/src/languageProcessing/languages/he/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/he/config/transitionWords.js +0 -28
- package/src/languageProcessing/languages/he/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/he/helpers/createBasicWordForms.js +0 -33
- package/src/languageProcessing/languages/he/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/he/helpers/internal/stem.js +0 -52
- package/src/languageProcessing/languages/he/helpers/isPassiveSentence.js +0 -96
- package/src/languageProcessing/languages/he/helpers/stem.js +0 -52
- package/src/languageProcessing/languages/hu/Researcher.js +0 -48
- package/src/languageProcessing/languages/hu/config/firstWordExceptions.js +0 -31
- package/src/languageProcessing/languages/hu/config/functionWords.js +0 -284
- package/src/languageProcessing/languages/hu/config/internal/auxiliaries.js +0 -97
- package/src/languageProcessing/languages/hu/config/internal/morphologicalPassiveAffixes.js +0 -125
- package/src/languageProcessing/languages/hu/config/internal/nonPassivesInVaAndVe.js +0 -265
- package/src/languageProcessing/languages/hu/config/internal/odikVerbs.js +0 -273
- package/src/languageProcessing/languages/hu/config/internal/participles.js +0 -412
- package/src/languageProcessing/languages/hu/config/stopWords.js +0 -213
- package/src/languageProcessing/languages/hu/config/transitionWords.js +0 -42
- package/src/languageProcessing/languages/hu/config/twoPartTransitionWords.js +0 -34
- package/src/languageProcessing/languages/hu/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/hu/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/hu/helpers/internal/getParticiples.js +0 -21
- package/src/languageProcessing/languages/hu/helpers/internal/stem.js +0 -389
- package/src/languageProcessing/languages/hu/helpers/isPassiveSentence.js +0 -54
- package/src/languageProcessing/languages/hu/values/Clause.js +0 -41
- package/src/languageProcessing/languages/id/Researcher.js +0 -46
- package/src/languageProcessing/languages/id/config/firstWordExceptions.js +0 -13
- package/src/languageProcessing/languages/id/config/functionWords.js +0 -202
- package/src/languageProcessing/languages/id/config/internal/nonPassiveVerbsStartingDi.js +0 -215
- package/src/languageProcessing/languages/id/config/transitionWords.js +0 -62
- package/src/languageProcessing/languages/id/config/twoPartTransitionWords.js +0 -13
- package/src/languageProcessing/languages/id/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/id/helpers/internal/stem.js +0 -462
- package/src/languageProcessing/languages/id/helpers/internal/stemHelpers.js +0 -78
- package/src/languageProcessing/languages/id/helpers/isPassiveSentence.js +0 -39
- package/src/languageProcessing/languages/id/helpers/splitIntoTokensCustom.js +0 -47
- package/src/languageProcessing/languages/it/Researcher.js +0 -48
- package/src/languageProcessing/languages/it/config/firstWordExceptions.js +0 -17
- package/src/languageProcessing/languages/it/config/functionWords.js +0 -277
- package/src/languageProcessing/languages/it/config/internal/passiveVoiceAuxiliaries.js +0 -98
- package/src/languageProcessing/languages/it/config/internal/passiveVoiceParticiples.js +0 -7197
- package/src/languageProcessing/languages/it/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/it/config/stopWords.js +0 -57
- package/src/languageProcessing/languages/it/config/syllables.json +0 -573
- package/src/languageProcessing/languages/it/config/transitionWords.js +0 -104
- package/src/languageProcessing/languages/it/config/twoPartTransitionWords.js +0 -9
- package/src/languageProcessing/languages/it/helpers/calculateFleschReadingScore.js +0 -15
- package/src/languageProcessing/languages/it/helpers/getClauses.js +0 -32
- package/src/languageProcessing/languages/it/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/it/helpers/internal/getParticiples.js +0 -34
- package/src/languageProcessing/languages/it/helpers/internal/stem.js +0 -436
- package/src/languageProcessing/languages/it/values/Clause.js +0 -47
- package/src/languageProcessing/languages/ja/Researcher.js +0 -86
- package/src/languageProcessing/languages/ja/config/assessmentApplicabilityCharacterCount.js +0 -4
- package/src/languageProcessing/languages/ja/config/firstWordExceptions.js +0 -8
- package/src/languageProcessing/languages/ja/config/functionWords.js +0 -563
- package/src/languageProcessing/languages/ja/config/keyphraseLength.js +0 -16
- package/src/languageProcessing/languages/ja/config/metaDescriptionLength.js +0 -4
- package/src/languageProcessing/languages/ja/config/paragraphLength.js +0 -10
- package/src/languageProcessing/languages/ja/config/sentenceLength.js +0 -4
- package/src/languageProcessing/languages/ja/config/subheadingsTooLong.js +0 -18
- package/src/languageProcessing/languages/ja/config/textLength.js +0 -47
- package/src/languageProcessing/languages/ja/config/topicLength.js +0 -5
- package/src/languageProcessing/languages/ja/config/transitionWords.js +0 -354
- package/src/languageProcessing/languages/ja/customResearches/findKeyphraseInSEOTitle.js +0 -98
- package/src/languageProcessing/languages/ja/customResearches/getKeyphraseLength.js +0 -19
- package/src/languageProcessing/languages/ja/customResearches/getWordForms.js +0 -50
- package/src/languageProcessing/languages/ja/customResearches/textLength.js +0 -24
- package/src/languageProcessing/languages/ja/helpers/countCharacters.js +0 -19
- package/src/languageProcessing/languages/ja/helpers/customGetStemmer.js +0 -21
- package/src/languageProcessing/languages/ja/helpers/getContentWords.js +0 -21
- package/src/languageProcessing/languages/ja/helpers/getWords.js +0 -31
- package/src/languageProcessing/languages/ja/helpers/internal/SentenceTokenizer.js +0 -102
- package/src/languageProcessing/languages/ja/helpers/internal/createWordForms.js +0 -68
- package/src/languageProcessing/languages/ja/helpers/internal/determineStem.js +0 -17
- package/src/languageProcessing/languages/ja/helpers/matchTextWithWord.js +0 -53
- package/src/languageProcessing/languages/ja/helpers/matchTransitionWords.js +0 -25
- package/src/languageProcessing/languages/ja/helpers/memoizedSentenceTokenizer.js +0 -28
- package/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js +0 -20
- package/src/languageProcessing/languages/ja/helpers/wordsCharacterCount.js +0 -13
- package/src/languageProcessing/languages/nb/Researcher.js +0 -45
- package/src/languageProcessing/languages/nb/config/firstWordExceptions.js +0 -12
- package/src/languageProcessing/languages/nb/config/functionWords.js +0 -106
- package/src/languageProcessing/languages/nb/config/internal/participles.js +0 -3127
- package/src/languageProcessing/languages/nb/config/internal/passiveVoiceAuxiliaries.js +0 -15
- package/src/languageProcessing/languages/nb/config/stopWords.js +0 -39
- package/src/languageProcessing/languages/nb/config/transitionWords.js +0 -21
- package/src/languageProcessing/languages/nb/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/nb/helpers/getClauses.js +0 -28
- package/src/languageProcessing/languages/nb/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/nb/helpers/internal/getParticiples.js +0 -24
- package/src/languageProcessing/languages/nb/helpers/internal/stem.js +0 -133
- package/src/languageProcessing/languages/nb/values/Clause.js +0 -43
- package/src/languageProcessing/languages/nl/Researcher.js +0 -48
- package/src/languageProcessing/languages/nl/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/nl/config/functionWords.js +0 -233
- package/src/languageProcessing/languages/nl/config/internal/nonParticiples.js +0 -2515
- package/src/languageProcessing/languages/nl/config/internal/passiveVoiceAuxiliaries.js +0 -13
- package/src/languageProcessing/languages/nl/config/internal/passiveVoiceIrregulars.js +0 -474
- package/src/languageProcessing/languages/nl/config/keyphraseLength.js +0 -10
- package/src/languageProcessing/languages/nl/config/stopWords.js +0 -35
- package/src/languageProcessing/languages/nl/config/syllables.json +0 -343
- package/src/languageProcessing/languages/nl/config/transitionWords.js +0 -22
- package/src/languageProcessing/languages/nl/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/nl/helpers/calculateFleschReadingScore.js +0 -15
- package/src/languageProcessing/languages/nl/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/nl/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/nl/helpers/internal/checkExceptionsWithFullForms.js +0 -128
- package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemRegularParticiple.js +0 -324
- package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemSuffixes.js +0 -164
- package/src/languageProcessing/languages/nl/helpers/internal/determineStem.js +0 -133
- package/src/languageProcessing/languages/nl/helpers/internal/getParticiples.js +0 -25
- package/src/languageProcessing/languages/nl/helpers/internal/getStemWordsWithTAndDEnding.js +0 -183
- package/src/languageProcessing/languages/nl/helpers/internal/stem.js +0 -146
- package/src/languageProcessing/languages/nl/helpers/internal/stemModificationHelpers.js +0 -109
- package/src/languageProcessing/languages/nl/helpers/internal/stemTOrDFromEndOfWord.js +0 -65
- package/src/languageProcessing/languages/nl/values/Clause.js +0 -62
- package/src/languageProcessing/languages/pl/Researcher.js +0 -47
- package/src/languageProcessing/languages/pl/config/firstWordExceptions.js +0 -12
- package/src/languageProcessing/languages/pl/config/functionWords.js +0 -421
- package/src/languageProcessing/languages/pl/config/internal/auxiliaries.js +0 -85
- package/src/languageProcessing/languages/pl/config/internal/participles.js +0 -26433
- package/src/languageProcessing/languages/pl/config/sentenceLength.js +0 -10
- package/src/languageProcessing/languages/pl/config/stopWords.js +0 -36
- package/src/languageProcessing/languages/pl/config/transitionWords.js +0 -42
- package/src/languageProcessing/languages/pl/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/pl/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/pl/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/pl/helpers/internal/getParticiples.js +0 -18
- package/src/languageProcessing/languages/pl/helpers/internal/stem.js +0 -161
- package/src/languageProcessing/languages/pl/values/Clause.js +0 -53
- package/src/languageProcessing/languages/pt/Researcher.js +0 -48
- package/src/languageProcessing/languages/pt/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/pt/config/functionWords.js +0 -226
- package/src/languageProcessing/languages/pt/config/internal/passiveVoiceAuxiliaries.js +0 -66
- package/src/languageProcessing/languages/pt/config/internal/passiveVoiceParticiples.js +0 -4088
- package/src/languageProcessing/languages/pt/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/pt/config/stopWords.js +0 -50
- package/src/languageProcessing/languages/pt/config/syllables.json +0 -38
- package/src/languageProcessing/languages/pt/config/transitionWords.js +0 -34
- package/src/languageProcessing/languages/pt/config/twoPartTransitionWords.js +0 -9
- package/src/languageProcessing/languages/pt/helpers/calculateFleschReadingScore.js +0 -15
- package/src/languageProcessing/languages/pt/helpers/getClauses.js +0 -29
- package/src/languageProcessing/languages/pt/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/pt/helpers/internal/getParticiples.js +0 -35
- package/src/languageProcessing/languages/pt/helpers/internal/stem.js +0 -319
- package/src/languageProcessing/languages/pt/values/Clause.js +0 -43
- package/src/languageProcessing/languages/ru/Researcher.js +0 -48
- package/src/languageProcessing/languages/ru/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/ru/config/fleschReadingEaseScores.js +0 -20
- package/src/languageProcessing/languages/ru/config/functionWords.js +0 -519
- package/src/languageProcessing/languages/ru/config/internal/participlesShortenedList.js +0 -2914
- package/src/languageProcessing/languages/ru/config/internal/passiveVoiceParticiples.js +0 -6295
- package/src/languageProcessing/languages/ru/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/ru/config/syllables.json +0 -19
- package/src/languageProcessing/languages/ru/config/transitionWords.js +0 -62
- package/src/languageProcessing/languages/ru/config/twoPartTransitionWords.js +0 -14
- package/src/languageProcessing/languages/ru/helpers/calculateFleschReadingScore.js +0 -16
- package/src/languageProcessing/languages/ru/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/ru/helpers/internal/stem.js +0 -288
- package/src/languageProcessing/languages/ru/helpers/isPassiveSentence.js +0 -14
- package/src/languageProcessing/languages/sk/Researcher.js +0 -46
- package/src/languageProcessing/languages/sk/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/sk/config/functionWords.js +0 -855
- package/src/languageProcessing/languages/sk/config/internal/nonPassives.js +0 -1074
- package/src/languageProcessing/languages/sk/config/internal/passiveVoiceAuxiliaries.js +0 -22
- package/src/languageProcessing/languages/sk/config/stopWords.js +0 -34
- package/src/languageProcessing/languages/sk/config/transitionWords.js +0 -23
- package/src/languageProcessing/languages/sk/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/sk/helpers/getClauses.js +0 -26
- package/src/languageProcessing/languages/sk/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/sk/helpers/internal/getParticiples.js +0 -16
- package/src/languageProcessing/languages/sk/helpers/internal/stem.js +0 -319
- package/src/languageProcessing/languages/sk/values/Clause.js +0 -39
- package/src/languageProcessing/languages/sv/Researcher.js +0 -45
- package/src/languageProcessing/languages/sv/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/sv/config/functionWords.js +0 -176
- package/src/languageProcessing/languages/sv/config/internal/passiveVerbs.js +0 -10400
- package/src/languageProcessing/languages/sv/config/keyphraseLength.js +0 -11
- package/src/languageProcessing/languages/sv/config/transitionWords.js +0 -35
- package/src/languageProcessing/languages/sv/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/sv/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/sv/helpers/internal/stem.js +0 -152
- package/src/languageProcessing/languages/sv/helpers/isPassiveSentence.js +0 -14
- package/src/languageProcessing/languages/tr/Researcher.js +0 -44
- package/src/languageProcessing/languages/tr/config/firstWordExceptions.js +0 -13
- package/src/languageProcessing/languages/tr/config/functionWords.js +0 -116
- package/src/languageProcessing/languages/tr/config/internal/nonPassiveExceptions.js +0 -574
- package/src/languageProcessing/languages/tr/config/internal/passiveEndings.js +0 -151
- package/src/languageProcessing/languages/tr/config/sentenceLength.js +0 -7
- package/src/languageProcessing/languages/tr/config/transitionWords.js +0 -42
- package/src/languageProcessing/languages/tr/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/tr/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/tr/helpers/internal/stem.js +0 -20
- package/src/languageProcessing/languages/tr/helpers/isPassiveSentence.js +0 -43
- package/src/languageProcessing/researches/altTagCount.js +0 -70
- package/src/languageProcessing/researches/countSentencesFromText.js +0 -19
- package/src/languageProcessing/researches/findKeyphraseInSEOTitle.js +0 -257
- package/src/languageProcessing/researches/findKeywordInFirstParagraph.js +0 -86
- package/src/languageProcessing/researches/findTransitionWords.js +0 -123
- package/src/languageProcessing/researches/functionWordsInKeyphrase.js +0 -44
- package/src/languageProcessing/researches/getAnchorsWithKeyphrase.js +0 -227
- package/src/languageProcessing/researches/getFleschReadingScore.js +0 -150
- package/src/languageProcessing/researches/getKeywordDensity.js +0 -44
- package/src/languageProcessing/researches/getLinkStatistics.js +0 -54
- package/src/languageProcessing/researches/getLinks.js +0 -18
- package/src/languageProcessing/researches/getLongCenterAlignedTexts.js +0 -37
- package/src/languageProcessing/researches/getParagraphLength.js +0 -44
- package/src/languageProcessing/researches/getParagraphs.js +0 -18
- package/src/languageProcessing/researches/getPassiveVoiceResult.js +0 -129
- package/src/languageProcessing/researches/getProminentWordsForInsights.js +0 -48
- package/src/languageProcessing/researches/getProminentWordsForInternalLinking.js +0 -119
- package/src/languageProcessing/researches/getSentenceBeginnings.js +0 -124
- package/src/languageProcessing/researches/getSubheadingTextLengths.js +0 -59
- package/src/languageProcessing/researches/getWordForms.js +0 -204
- package/src/languageProcessing/researches/h1s.js +0 -10
- package/src/languageProcessing/researches/imageCount.js +0 -16
- package/src/languageProcessing/researches/index.js +0 -5
- package/src/languageProcessing/researches/keyphraseDistribution.js +0 -249
- package/src/languageProcessing/researches/keyphraseLength.js +0 -17
- package/src/languageProcessing/researches/keywordCount.js +0 -134
- package/src/languageProcessing/researches/keywordCountInUrl.js +0 -57
- package/src/languageProcessing/researches/matchKeywordInSubheadings.js +0 -62
- package/src/languageProcessing/researches/metaDescriptionKeyword.js +0 -85
- package/src/languageProcessing/researches/metaDescriptionLength.js +0 -12
- package/src/languageProcessing/researches/pageTitleWidth.js +0 -11
- package/src/languageProcessing/researches/readingTime.js +0 -82
- package/src/languageProcessing/researches/sentences.js +0 -20
- package/src/languageProcessing/researches/videoCount.js +0 -32
- package/src/languageProcessing/researches/wordComplexity.js +0 -129
- package/src/languageProcessing/researches/wordCountInText.js +0 -29
- package/src/languageProcessing/values/Clause.js +0 -108
- package/src/languageProcessing/values/ProminentWord.js +0 -95
- package/src/languageProcessing/values/Sentence.js +0 -111
- package/src/languageProcessing/values/index.js +0 -9
- package/src/markers/addMark.js +0 -9
- package/src/markers/addMarkSingleWord.js +0 -32
- package/src/markers/index.js +0 -7
- package/src/markers/removeDuplicateMarks.js +0 -27
- package/src/markers/removeMarks.js +0 -11
- package/src/parse/build/build.js +0 -52
- package/src/parse/build/index.js +0 -10
- package/src/parse/build/private/adapt.js +0 -113
- package/src/parse/build/private/adaptAttributes.js +0 -36
- package/src/parse/build/private/alwaysFilterElements.js +0 -75
- package/src/parse/build/private/combineIntoImplicitParagraphs.js +0 -130
- package/src/parse/build/private/filterBeforeTokenizing.js +0 -32
- package/src/parse/build/private/filterHelpers.js +0 -44
- package/src/parse/build/private/filterTree.js +0 -42
- package/src/parse/build/private/getTextElementPositions.js +0 -184
- package/src/parse/build/private/helpers/parseClassAttribute.js +0 -9
- package/src/parse/build/private/isPhrasingContent.js +0 -28
- package/src/parse/build/private/parseBlocks.js +0 -151
- package/src/parse/build/private/tokenize.js +0 -74
- package/src/parse/language/LanguageProcessor.js +0 -74
- package/src/parse/structure/Heading.js +0 -26
- package/src/parse/structure/Node.js +0 -69
- package/src/parse/structure/Paragraph.js +0 -48
- package/src/parse/structure/Sentence.js +0 -30
- package/src/parse/structure/SourceCodeLocation.js +0 -41
- package/src/parse/structure/Text.js +0 -27
- package/src/parse/structure/Token.js +0 -24
- package/src/parse/structure/index.js +0 -16
- package/src/parse/traverse/findAllInTree.js +0 -58
- package/src/parse/traverse/index.js +0 -12
- package/src/parse/traverse/innerText.js +0 -26
- package/src/parsedPaper/ParsedPaper.js +0 -92
- package/src/parsedPaper/assess/TreeAssessor.js +0 -184
- package/src/parsedPaper/assess/assessmentListFactories.js +0 -73
- package/src/parsedPaper/assess/assessments/Assessment.js +0 -79
- package/src/parsedPaper/assess/assessments/index.js +0 -6
- package/src/parsedPaper/assess/assessorFactories.js +0 -104
- package/src/parsedPaper/assess/cornerstone/assessmentListFactories.js +0 -47
- package/src/parsedPaper/assess/cornerstone/index.js +0 -5
- package/src/parsedPaper/assess/index.js +0 -20
- package/src/parsedPaper/build/PaperParser.js +0 -105
- package/src/parsedPaper/build/linguisticParsing/Sentence.js +0 -89
- package/src/parsedPaper/build/linguisticParsing/SentenceTokenizer.js +0 -323
- package/src/parsedPaper/build/linguisticParsing/parseText.js +0 -20
- package/src/parsedPaper/build/tree/TreeBuilder.js +0 -75
- package/src/parsedPaper/build/tree/cleanup/calculateTextIndices.js +0 -190
- package/src/parsedPaper/build/tree/cleanup/getElementContent.js +0 -21
- package/src/parsedPaper/build/tree/cleanup/postParsing.js +0 -37
- package/src/parsedPaper/build/tree/html/HTMLTreeConverter.js +0 -230
- package/src/parsedPaper/build/tree/html/buildTree.js +0 -31
- package/src/parsedPaper/build/tree/html/htmlConstants.js +0 -37
- package/src/parsedPaper/build/tree/index.js +0 -14
- package/src/parsedPaper/build/tree/metadata/buildTree.js +0 -32
- package/src/parsedPaper/research/TreeResearcher.js +0 -134
- package/src/parsedPaper/research/index.js +0 -13
- package/src/parsedPaper/research/researches/Headings.js +0 -20
- package/src/parsedPaper/research/researches/LinkStatistics.js +0 -128
- package/src/parsedPaper/research/researches/Research.js +0 -50
- package/src/parsedPaper/research/researches/index.js +0 -1
- package/src/parsedPaper/structure/tree/FormattingElement.js +0 -67
- package/src/parsedPaper/structure/tree/SourceCodeLocation.js +0 -31
- package/src/parsedPaper/structure/tree/TextContainer.js +0 -85
- package/src/parsedPaper/structure/tree/index.js +0 -22
- package/src/parsedPaper/structure/tree/nodes/Heading.js +0 -26
- package/src/parsedPaper/structure/tree/nodes/LeafNode.js +0 -75
- package/src/parsedPaper/structure/tree/nodes/List.js +0 -47
- package/src/parsedPaper/structure/tree/nodes/ListItem.js +0 -26
- package/src/parsedPaper/structure/tree/nodes/MetadataMiscellaneous.js +0 -46
- package/src/parsedPaper/structure/tree/nodes/MetadataText.js +0 -26
- package/src/parsedPaper/structure/tree/nodes/Node.js +0 -154
- package/src/parsedPaper/structure/tree/nodes/Paragraph.js +0 -24
- package/src/parsedPaper/structure/tree/nodes/StructuredNode.js +0 -52
- package/src/parsedPaper/structure/tree/nodes/index.js +0 -21
- package/src/scoring/assessments/assessment.js +0 -63
- package/src/scoring/assessments/index.js +0 -58
- package/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +0 -173
- package/src/scoring/assessments/readability/SentenceBeginningsAssessment.js +0 -132
- package/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +0 -186
- package/src/scoring/assessments/readability/TransitionWordsAssessment.js +0 -168
- package/src/scoring/assessments/seo/ImageCountAssessment.js +0 -112
- package/src/scoring/assessments/seo/InternalLinksAssessment.js +0 -114
- package/src/scoring/assessments/seo/IntroductionKeywordAssessment.js +0 -110
- package/src/scoring/assessments/seo/KeyphraseAssessment.js +0 -104
- package/src/scoring/assessments/seo/KeyphraseLengthAssessment.js +0 -110
- package/src/scoring/assessments/seo/KeywordDensityAssessment.js +0 -116
- package/src/scoring/assessments/seo/MetaDescriptionKeywordAssessment.js +0 -114
- package/src/scoring/assessments/seo/MetaDescriptionLengthAssessment.js +0 -112
- package/src/scoring/assessments/seo/MetaTitleKeywordAssessment.js +0 -111
- package/src/scoring/assessments/seo/NumberInMetaTitleAssessment.js +0 -107
- package/src/scoring/assessments/seo/OutboundLinksAssessment.js +0 -111
- package/src/scoring/assessments/seo/PageTitleWidthAssessment.js +0 -104
- package/src/scoring/assessments/seo/SingleH1Assessment.js +0 -118
- package/src/scoring/assessments/seo/SingleTitleAssessment.js +0 -108
- package/src/scoring/assessments/seo/SubHeadingsKeywordAssessment.js +0 -107
- package/src/scoring/assessments/seo/TextImagesAssessment.js +0 -144
- package/src/scoring/assessments/seo/TextLengthAssessment.js +0 -100
- package/src/scoring/assessments/seo/UrlKeywordAssessment.js +0 -111
- package/src/scoring/assessments/seo/UrlLengthAssessment.js +0 -103
- package/src/scoring/assessors/assessor.js +0 -269
- package/src/scoring/assessors/avadaAssessor.js +0 -67
- package/src/scoring/assessors/contentAssessor.js +0 -159
- package/src/scoring/assessors/index.js +0 -4
- package/src/scoring/assessors/seoAssessor.js +0 -57
- package/src/scoring/helpers/assessments/checkForTooLongSentences.js +0 -13
- package/src/scoring/helpers/assessments/inRange.js +0 -49
- package/src/scoring/helpers/assessments/keyphraseLengthFactor.js +0 -10
- package/src/scoring/helpers/assessments/recommendedKeywordCount.js +0 -43
- package/src/scoring/helpers/index.js +0 -74
- package/src/scoring/interpreters/index.js +0 -5
- package/src/scoring/interpreters/scoreToRating.js +0 -31
- package/src/scoring/renderers/AssessorPresenter.js +0 -360
- package/src/scoring/scoreAggregators/ReadabilityScoreAggregator.js +0 -203
- package/src/scoring/scoreAggregators/SEOScoreAggregator.js +0 -54
- package/src/scoring/scoreAggregators/ScoreAggregator.js +0 -23
- package/src/scoring/scoreAggregators/index.js +0 -3
- package/src/values/AssessmentResult.js +0 -496
- package/src/values/Mark.js +0 -271
- package/src/values/Paper.js +0 -425
- package/src/values/index.js +0 -9
- package/src/vendor/turkishStemmer.js +0 -3435
- package/tsconfig.json +0 -15
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import {get} from 'lodash';
|
|
2
|
-
import {languageProcessing} from '../../../index';
|
|
3
|
-
const {baseStemmer} = languageProcessing;
|
|
4
|
-
|
|
5
|
-
import determineStem from './internal/determineStem';
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Returns the stemmer for a researcher.
|
|
9
|
-
*
|
|
10
|
-
* @param {Researcher} researcher The researcher.
|
|
11
|
-
*
|
|
12
|
-
* @returns {Function} The stemmer.
|
|
13
|
-
*/
|
|
14
|
-
export default function getStemmer(researcher) {
|
|
15
|
-
const morphologyData = get(researcher.getData('morphology'), 'de', false);
|
|
16
|
-
|
|
17
|
-
if (morphologyData) {
|
|
18
|
-
return word => determineStem(word, morphologyData);
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
return baseStemmer;
|
|
22
|
-
}
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import SentenceTokenizer from "../../../../helpers/sentence/SentenceTokenizer";
|
|
2
|
-
import wordBoundaries from "../../../../../config/wordBoundaries";
|
|
3
|
-
|
|
4
|
-
// The beginning of a string (^) or one of the word boundaries from the wordBoundaries helper.
|
|
5
|
-
const wordBoundariesForRegex = "(^|[" + wordBoundaries().map( ( boundary ) => "\\" + boundary ).join( "" ) + "])";
|
|
6
|
-
const ordinalDotRegex = new RegExp( wordBoundariesForRegex + "\\d{1,3}\\.$" );
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Class for tokenizing a (html) text into sentences.
|
|
10
|
-
*/
|
|
11
|
-
export default class GermanSentenceTokenizer extends SentenceTokenizer {
|
|
12
|
-
/**
|
|
13
|
-
* Constructor
|
|
14
|
-
* @constructor
|
|
15
|
-
*/
|
|
16
|
-
constructor() {
|
|
17
|
-
super();
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* Checks whether a fullstop is an ordinal dot instead of a sentence splitter.
|
|
22
|
-
* See: https://en.wikipedia.org/wiki/Ordinal_indicator#Ordinal_dot
|
|
23
|
-
*
|
|
24
|
-
* @param {string} currentSentence A string ending with a full stop.
|
|
25
|
-
* @returns {boolean} Returns true if the full stop is an ordinal dot, false otherwise.
|
|
26
|
-
*/
|
|
27
|
-
endsWithOrdinalDot( currentSentence ) {
|
|
28
|
-
return ordinalDotRegex.test( currentSentence.trim() );
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
|
package/src/languageProcessing/languages/de/helpers/internal/detectAndStemRegularParticiple.js
DELETED
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
import exceptionsParticiplesActive from "../../config/internal/exceptionsParticiplesActive";
|
|
2
|
-
import exceptions from "../../config/internal/passiveVoiceRegex";
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Detects whether a word is a regular participle without a prefix and if so, returns the stem.
|
|
6
|
-
*
|
|
7
|
-
* @param {Object} morphologyDataVerbs The German morphology data for verbs.
|
|
8
|
-
* @param {string} word The word (not stemmed) to check.
|
|
9
|
-
*
|
|
10
|
-
* @returns {string|null} The stem or null if no participle was matched.
|
|
11
|
-
*/
|
|
12
|
-
const detectAndStemParticiplesWithoutPrefixes = function( morphologyDataVerbs, word ) {
|
|
13
|
-
const geStemTParticipleRegex = new RegExp( "^" + morphologyDataVerbs.participleStemmingClasses[ 1 ].regex );
|
|
14
|
-
const geStemEtParticipleRegex = new RegExp( "^" + morphologyDataVerbs.participleStemmingClasses[ 0 ].regex );
|
|
15
|
-
|
|
16
|
-
/*
|
|
17
|
-
* Check if it's a ge + stem ending in d/t + et participle.
|
|
18
|
-
* As this is the more specific regex, it needs to be checked before the ge + stem + t regex.
|
|
19
|
-
*/
|
|
20
|
-
if ( geStemEtParticipleRegex.test( word ) ) {
|
|
21
|
-
// Remove the two-letter prefix and the two-letter suffix.
|
|
22
|
-
return ( word.slice( 2, word.length - 2 ) );
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
// Check if it's a ge + stem + t participle.
|
|
26
|
-
if ( geStemTParticipleRegex.test( word ) ) {
|
|
27
|
-
// Remove the two-letter prefix and the one-letter suffix.
|
|
28
|
-
return ( word.slice( 2, word.length - 1 ) );
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
return null;
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Determines whether a given participle pattern combined with prefixes from a given class applies to a given word
|
|
36
|
-
* and if so, returns the stem.
|
|
37
|
-
*
|
|
38
|
-
* @param {string} word The word (not stemmed) to check.
|
|
39
|
-
* @param {string[]} prefixes The prefixes of a certain prefix class.
|
|
40
|
-
* @param {string} regexPart The regex part for a given class (completed to a full regex within the function).
|
|
41
|
-
* @param {number} startStem Where to start cutting off the de-prefixed word.
|
|
42
|
-
* @param {number} endStem Where to end cutting off the de-prefixed word (from the end index).
|
|
43
|
-
*
|
|
44
|
-
* @returns {string|null} The stem or null if no prefixed participle was matched.
|
|
45
|
-
*/
|
|
46
|
-
const detectAndStemParticiplePerPrefixClass = function( word, prefixes, regexPart, startStem, endStem ) {
|
|
47
|
-
for ( const currentPrefix of prefixes ) {
|
|
48
|
-
const participleRegex = new RegExp( "^" + currentPrefix + regexPart );
|
|
49
|
-
|
|
50
|
-
if ( participleRegex.test( word ) ) {
|
|
51
|
-
const wordWithoutPrefix = word.slice( currentPrefix.length - word.length );
|
|
52
|
-
const wordWithoutParticipleAffixes = wordWithoutPrefix.slice( startStem, wordWithoutPrefix.length - endStem );
|
|
53
|
-
|
|
54
|
-
return ( currentPrefix + wordWithoutParticipleAffixes );
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
return null;
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
/**
|
|
62
|
-
* Detects whether a word is a regular participle with a prefix and if so, returns the stem.
|
|
63
|
-
*
|
|
64
|
-
* @param {Object} morphologyDataVerbs The German morphology data for verbs.
|
|
65
|
-
* @param {string} word The word (not stemmed) to check.
|
|
66
|
-
*
|
|
67
|
-
* @returns {string|null} The stem or null if no participle with prefix was matched.
|
|
68
|
-
*/
|
|
69
|
-
const detectAndStemParticiplesWithPrefixes = function( morphologyDataVerbs, word ) {
|
|
70
|
-
const prefixesSeparableOrInseparable = morphologyDataVerbs.prefixes.separableOrInseparable;
|
|
71
|
-
|
|
72
|
-
/*
|
|
73
|
-
* It's important to preserve order here, since the ge + stem ending in d/t + et regex is more specific than
|
|
74
|
-
* the ge + stem + t regex, and therefore must be checked first.
|
|
75
|
-
*/
|
|
76
|
-
for ( const participleClass of morphologyDataVerbs.participleStemmingClasses ) {
|
|
77
|
-
const regex = participleClass.regex;
|
|
78
|
-
const startStem = participleClass.startStem;
|
|
79
|
-
const endStem = participleClass.endStem;
|
|
80
|
-
const separable = participleClass.separable;
|
|
81
|
-
|
|
82
|
-
const prefixes = separable
|
|
83
|
-
? morphologyDataVerbs.prefixes.separable
|
|
84
|
-
: morphologyDataVerbs.prefixes.inseparable;
|
|
85
|
-
|
|
86
|
-
let stem = detectAndStemParticiplePerPrefixClass( word, prefixes, regex, startStem, endStem );
|
|
87
|
-
|
|
88
|
-
if ( stem ) {
|
|
89
|
-
return stem;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
stem = detectAndStemParticiplePerPrefixClass( word, prefixesSeparableOrInseparable, regex, startStem, endStem );
|
|
93
|
-
|
|
94
|
-
if ( stem ) {
|
|
95
|
-
return stem;
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
return null;
|
|
100
|
-
};
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Detects whether a word is a regular participle and if so, returns the stem.
|
|
104
|
-
*
|
|
105
|
-
* @param {Object} morphologyDataVerbs The German morphology data for verbs.
|
|
106
|
-
* @param {string} word The word (not stemmed) to check.
|
|
107
|
-
*
|
|
108
|
-
* @returns {string|null} The participle stem or null if no regular participle was matched.
|
|
109
|
-
*/
|
|
110
|
-
export function detectAndStemRegularParticiple( morphologyDataVerbs, word ) {
|
|
111
|
-
if ( exceptions( word ).length > 0 || exceptionsParticiplesActive.includes( word ) ) {
|
|
112
|
-
return "";
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
let stem = detectAndStemParticiplesWithoutPrefixes( morphologyDataVerbs, word );
|
|
116
|
-
|
|
117
|
-
if ( stem ) {
|
|
118
|
-
return stem;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
stem = detectAndStemParticiplesWithPrefixes( morphologyDataVerbs, word );
|
|
122
|
-
|
|
123
|
-
if ( stem ) {
|
|
124
|
-
return stem;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
return null;
|
|
128
|
-
}
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
import {flatten} from 'lodash';
|
|
2
|
-
import {languageProcessing} from '../../../index';
|
|
3
|
-
const {flattenSortLength} = languageProcessing;
|
|
4
|
-
|
|
5
|
-
import {detectAndStemRegularParticiple} from './detectAndStemRegularParticiple';
|
|
6
|
-
|
|
7
|
-
import stem from './stem';
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* Returns a stem for a word that appears on the noun exception lists.
|
|
11
|
-
*
|
|
12
|
-
* @param {Object} morphologyDataNouns The German morphology data for nouns.
|
|
13
|
-
* @param {string} stemmedWord The stem to check.
|
|
14
|
-
*
|
|
15
|
-
* @returns {string|null} The stemmed word or null if none was found.
|
|
16
|
-
*/
|
|
17
|
-
const findStemOnNounExceptionList = function(morphologyDataNouns, stemmedWord) {
|
|
18
|
-
const exceptionStems = morphologyDataNouns.exceptionStems;
|
|
19
|
-
|
|
20
|
-
for (const exceptionStemSet of exceptionStems) {
|
|
21
|
-
const matchedStem = exceptionStemSet.find(exceptionStem => stemmedWord.endsWith(exceptionStem));
|
|
22
|
-
|
|
23
|
-
if (matchedStem) {
|
|
24
|
-
const precedingLexicalMaterial = stemmedWord.slice(
|
|
25
|
-
0,
|
|
26
|
-
stemmedWord.length - matchedStem.length
|
|
27
|
-
);
|
|
28
|
-
|
|
29
|
-
return precedingLexicalMaterial + exceptionStemSet[0];
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
return null;
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
* Returns a stem for a word that appears on the adjective exception lists.
|
|
38
|
-
*
|
|
39
|
-
* @param {Object} morphologyDataAdjectives The German morphology data for adjectives.
|
|
40
|
-
* @param {string} stemmedWord The stem to check.
|
|
41
|
-
*
|
|
42
|
-
* @returns {string|null} The stemmed word or null if none was found.
|
|
43
|
-
*/
|
|
44
|
-
const findStemOnAdjectiveExceptionList = function(morphologyDataAdjectives, stemmedWord) {
|
|
45
|
-
const adjectiveExceptionClasses = morphologyDataAdjectives.exceptions;
|
|
46
|
-
|
|
47
|
-
for (const key of Object.keys(adjectiveExceptionClasses)) {
|
|
48
|
-
const exceptionStems = adjectiveExceptionClasses[key];
|
|
49
|
-
|
|
50
|
-
for (const exceptionStemSet of exceptionStems) {
|
|
51
|
-
if (exceptionStemSet.includes(stemmedWord)) {
|
|
52
|
-
return exceptionStemSet[0];
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
return null;
|
|
58
|
-
};
|
|
59
|
-
|
|
60
|
-
/**
|
|
61
|
-
* Returns a stem for a word that appears on the verb exception lists.
|
|
62
|
-
*
|
|
63
|
-
* @param {Object} morphologyDataVerbs The German morphology data for verbs.
|
|
64
|
-
* @param {string} stemmedWord The stem to check.
|
|
65
|
-
*
|
|
66
|
-
* @returns {string|null} The stemmed word or null if none was found.
|
|
67
|
-
*/
|
|
68
|
-
const findStemOnVerbExceptionList = function(morphologyDataVerbs, stemmedWord) {
|
|
69
|
-
let wordToCheck = stemmedWord;
|
|
70
|
-
const strongAndIrregularVerbStems = morphologyDataVerbs.strongAndIrregularVerbs.stems;
|
|
71
|
-
const prefixes = flattenSortLength(morphologyDataVerbs.prefixes);
|
|
72
|
-
|
|
73
|
-
let matchedPrefix = prefixes.find(prefix => stemmedWord.startsWith(prefix));
|
|
74
|
-
|
|
75
|
-
if (matchedPrefix) {
|
|
76
|
-
const wordWithoutPrefix = wordToCheck.slice(matchedPrefix.length, wordToCheck.length);
|
|
77
|
-
|
|
78
|
-
/* At least 3 characters so that e.g. "be" is not found in the stem "berg". A minimum length of 3 was chosen
|
|
79
|
-
* as a safe option, since 2-letter verb stems are highly unlikely to impossible.
|
|
80
|
-
*/
|
|
81
|
-
if (wordWithoutPrefix.length > 2) {
|
|
82
|
-
wordToCheck = wordWithoutPrefix;
|
|
83
|
-
} else {
|
|
84
|
-
matchedPrefix = null;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
for (const strongOrIrregularVerbParadigm of strongAndIrregularVerbStems) {
|
|
89
|
-
let stems = strongOrIrregularVerbParadigm.stems;
|
|
90
|
-
stems = flatten(Object.values(stems));
|
|
91
|
-
|
|
92
|
-
if (stems.includes(wordToCheck)) {
|
|
93
|
-
if (matchedPrefix) {
|
|
94
|
-
// The present tense stem is returned as a default stem.
|
|
95
|
-
return matchedPrefix + strongOrIrregularVerbParadigm.stems.present;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
return strongOrIrregularVerbParadigm.stems.present;
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
return null;
|
|
103
|
-
};
|
|
104
|
-
|
|
105
|
-
/**
|
|
106
|
-
* Returns the stem for a given German input word.
|
|
107
|
-
*
|
|
108
|
-
* @param {string} word The word to get the stem for.
|
|
109
|
-
* @param {Object} morphologyDataGerman The German morphology data.
|
|
110
|
-
*
|
|
111
|
-
* @returns {string} Stemmed form of the word.
|
|
112
|
-
*/
|
|
113
|
-
export default function determineStem(word, morphologyDataGerman) {
|
|
114
|
-
const verbData = morphologyDataGerman.verbs;
|
|
115
|
-
const stemmedWord = stem(verbData, word);
|
|
116
|
-
|
|
117
|
-
/*
|
|
118
|
-
* Goes through the stem exception functions from left to right, returns the first stem it finds.
|
|
119
|
-
* If no stem has been found, return the original, programmatically created, stem.
|
|
120
|
-
*/
|
|
121
|
-
return (
|
|
122
|
-
findStemOnNounExceptionList(morphologyDataGerman.nouns, stemmedWord) ||
|
|
123
|
-
findStemOnAdjectiveExceptionList(morphologyDataGerman.adjectives, stemmedWord) ||
|
|
124
|
-
findStemOnVerbExceptionList(verbData, stemmedWord) ||
|
|
125
|
-
detectAndStemRegularParticiple(verbData, word) ||
|
|
126
|
-
stemmedWord
|
|
127
|
-
);
|
|
128
|
-
}
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import {forEach} from 'lodash';
|
|
2
|
-
import {languageProcessing} from '../../../index';
|
|
3
|
-
const {getWords} = languageProcessing;
|
|
4
|
-
|
|
5
|
-
import regexFunctionFactory from '../../config/internal/passiveVoiceRegex.js';
|
|
6
|
-
const regexFunction = regexFunctionFactory();
|
|
7
|
-
import irregularParticiples from '../../config/internal/passiveVoiceIrregulars.js';
|
|
8
|
-
|
|
9
|
-
const verbsBeginningWithErVerEntBeZerHerUber = regexFunction.verbsBeginningWithErVerEntBeZerHerUber;
|
|
10
|
-
const verbsBeginningWithGe = regexFunction.verbsBeginningWithGe;
|
|
11
|
-
const verbsWithGeInMiddle = regexFunction.verbsWithGeInMiddle;
|
|
12
|
-
const verbsWithErVerEntBeZerHerUberInMiddle = regexFunction.verbsWithErVerEntBeZerHerUberInMiddle;
|
|
13
|
-
const verbsEndingWithIert = regexFunction.verbsEndingWithIert;
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Creates German participles array for the participles found in a clause.
|
|
17
|
-
*
|
|
18
|
-
* @param {string} clauseText The clause to finds participles in.
|
|
19
|
-
*
|
|
20
|
-
* @returns {Array} The array with the German participles found.
|
|
21
|
-
*/
|
|
22
|
-
export default function(clauseText) {
|
|
23
|
-
const words = getWords(clauseText);
|
|
24
|
-
|
|
25
|
-
const foundParticiples = [];
|
|
26
|
-
|
|
27
|
-
forEach(words, function(word) {
|
|
28
|
-
if (
|
|
29
|
-
verbsBeginningWithGe(word).length !== 0 ||
|
|
30
|
-
verbsWithGeInMiddle(word).length !== 0 ||
|
|
31
|
-
verbsBeginningWithErVerEntBeZerHerUber(word).length !== 0 ||
|
|
32
|
-
verbsWithErVerEntBeZerHerUberInMiddle(word).length !== 0 ||
|
|
33
|
-
verbsEndingWithIert(word).length !== 0 ||
|
|
34
|
-
irregularParticiples.includes(word)
|
|
35
|
-
) {
|
|
36
|
-
foundParticiples.push(word);
|
|
37
|
-
}
|
|
38
|
-
});
|
|
39
|
-
return foundParticiples;
|
|
40
|
-
}
|
|
@@ -1,215 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @file German stemming algorithm. Adapted from:
|
|
3
|
-
* @author: Joder Illi
|
|
4
|
-
* @copyright (c) 2010, FormBlitz AG
|
|
5
|
-
* All rights reserved.
|
|
6
|
-
* Implementation of the stemming algorithm from http://snowball.tartarus.org/algorithms/german/stemmer.html
|
|
7
|
-
* Copyright of the algorithm is: Copyright (c) 2001, Dr Martin Porter and can be found at http://snowball.tartarus.org/license.php
|
|
8
|
-
*
|
|
9
|
-
* Redistribution and use in source and binary forms, with or without modification, is covered by the standard BSD license.
|
|
10
|
-
*/
|
|
11
|
-
/**
|
|
12
|
-
* Determines the start index of the R1 region.
|
|
13
|
-
* R1 is the region after the first non-vowel following a vowel. It should include at least 3 letters.
|
|
14
|
-
*
|
|
15
|
-
* @param {string} word The word for which to determine the R1 region.
|
|
16
|
-
* @returns {number} The start index of the R1 region.
|
|
17
|
-
*/
|
|
18
|
-
const determineR1 = function( word ) {
|
|
19
|
-
// Start with matching first vowel and non-vowel.
|
|
20
|
-
let r1Index = word.search( /[aeiouyäöü][^aeiouyäöü]/ );
|
|
21
|
-
// Then add 2 since the R1 index is the index after the first vowel & non-vowel matched with the regex.
|
|
22
|
-
if ( r1Index !== -1 ) {
|
|
23
|
-
r1Index += 2;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
// Adjust R1 so that the region preceding it includes at least 3 letters.
|
|
27
|
-
if ( r1Index !== -1 && r1Index < 3 ) {
|
|
28
|
-
r1Index = 3;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
return r1Index;
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Search for the longest among the following suffixes,
|
|
36
|
-
* (a) em ern er
|
|
37
|
-
* (b) e en es
|
|
38
|
-
* (c) s (preceded by a valid s-ending)
|
|
39
|
-
* Define a valid s-ending as one of b, d, f, g, h, k, l, m, n, r or t.
|
|
40
|
-
*
|
|
41
|
-
* @param {string} word The word to check for the suffix.
|
|
42
|
-
* @returns {{index1: number, optionUsed1: string}} The index of the suffix and the kind of suffix used.
|
|
43
|
-
*/
|
|
44
|
-
const findSuffixStep1 = function( word ) {
|
|
45
|
-
const a1Index = word.search( /(em|ern|er)$/g );
|
|
46
|
-
const b1Index = word.search( /(e|en|es)$/g );
|
|
47
|
-
let c1Index = word.search( /([bdfghklmnrt]s)$/g );
|
|
48
|
-
// Exclude the s-ending before the s.
|
|
49
|
-
if ( c1Index !== -1 ) {
|
|
50
|
-
c1Index++;
|
|
51
|
-
}
|
|
52
|
-
let optionUsed1 = "";
|
|
53
|
-
let index1 = 10000;
|
|
54
|
-
if ( a1Index !== -1 ) {
|
|
55
|
-
optionUsed1 = "a";
|
|
56
|
-
index1 = a1Index;
|
|
57
|
-
|
|
58
|
-
return { index1, optionUsed1 };
|
|
59
|
-
} else if ( b1Index !== -1 ) {
|
|
60
|
-
optionUsed1 = "b";
|
|
61
|
-
index1 = b1Index;
|
|
62
|
-
|
|
63
|
-
return { index1, optionUsed1 };
|
|
64
|
-
} else if ( c1Index !== -1 ) {
|
|
65
|
-
optionUsed1 = "c";
|
|
66
|
-
index1 = c1Index;
|
|
67
|
-
|
|
68
|
-
return { index1, optionUsed1 };
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
return { index1, optionUsed1 };
|
|
72
|
-
};
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Search for the longest among the following suffixes,
|
|
76
|
-
* (a) en er est
|
|
77
|
-
* (b) st (preceded by a valid st-ending, itself preceded by at least 3 letters)
|
|
78
|
-
* Define a valid st-ending as one of b, d, f, g, h, k, l, m, n or t.
|
|
79
|
-
*
|
|
80
|
-
* @param {string} word The word to check for the suffix.
|
|
81
|
-
* @returns {number} The index of the suffix.
|
|
82
|
-
*/
|
|
83
|
-
const findSuffixStep2 = function( word ) {
|
|
84
|
-
const a2Index = word.search( /(en|er|est)$/g );
|
|
85
|
-
let b2Index = word.search( /(.{3}[bdfghklmnt]st)$/g );
|
|
86
|
-
// Exclude the st-ending and the preceding 3 letters.
|
|
87
|
-
if ( b2Index !== -1 ) {
|
|
88
|
-
b2Index += 4;
|
|
89
|
-
}
|
|
90
|
-
let index2 = 10000;
|
|
91
|
-
if ( a2Index !== -1 ) {
|
|
92
|
-
index2 = a2Index;
|
|
93
|
-
} else if ( b2Index !== -1 ) {
|
|
94
|
-
index2 = b2Index;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
return index2;
|
|
98
|
-
};
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* Delete the suffix found in step 1 if in R1. (The letter of the valid s-ending is not necessarily in R1.)
|
|
102
|
-
* If an ending of group (b) is deleted, and the ending is preceded by niss, delete the final s.
|
|
103
|
-
* (For example, äckern -> äck, ackers -> acker, armes -> arm, bedürfnissen -> bedürfnis).
|
|
104
|
-
*
|
|
105
|
-
* @param {string} word The word for which to delete the suffix.
|
|
106
|
-
* @param {number} index1 The index of the suffix found in step 1.
|
|
107
|
-
* @param {string} optionUsed1 The type of the suffix found in step 1.
|
|
108
|
-
* @param {number} r1Index The R1 index.
|
|
109
|
-
*
|
|
110
|
-
* @returns {string} The word with the deleted suffix.
|
|
111
|
-
*/
|
|
112
|
-
const deleteSuffix1 = function( word, index1, optionUsed1, r1Index ) {
|
|
113
|
-
if ( index1 !== 10000 && r1Index !== -1 ) {
|
|
114
|
-
if ( index1 >= r1Index ) {
|
|
115
|
-
word = word.substring( 0, index1 );
|
|
116
|
-
if ( optionUsed1 === "b" ) {
|
|
117
|
-
if ( word.search( /niss$/ ) !== -1 ) {
|
|
118
|
-
word = word.substring( 0, word.length - 1 );
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
return word;
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
/**
|
|
127
|
-
* Delete the suffix found in step 2 if in R1.
|
|
128
|
-
* (For example, derbsten -> derbst by step 1, and derbst -> derb by step 2,
|
|
129
|
-
* since b is a valid st-ending, and is preceded by just 3 letters).
|
|
130
|
-
*
|
|
131
|
-
* @param {string} word The word for which to delete the suffix.
|
|
132
|
-
* @param {number} index2 The index of the suffix found in step 2.
|
|
133
|
-
* @param {number} r1Index The R1 index.
|
|
134
|
-
*
|
|
135
|
-
* @returns {string} The word with the deleted suffix.
|
|
136
|
-
*/
|
|
137
|
-
const deleteSuffix2 = function( word, index2, r1Index ) {
|
|
138
|
-
if ( index2 !== 10000 && r1Index !== -1 ) {
|
|
139
|
-
if ( index2 >= r1Index ) {
|
|
140
|
-
word = word.substring( 0, index2 );
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
return word;
|
|
144
|
-
};
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
/**
|
|
148
|
-
* Stems irregular verbs.
|
|
149
|
-
*
|
|
150
|
-
* @param {Object} morphologyDataVerbs The German morphology data for verbs.
|
|
151
|
-
* @param {string} word The word to stem.
|
|
152
|
-
*
|
|
153
|
-
* @returns {string} The stemmed word.
|
|
154
|
-
*/
|
|
155
|
-
const stemIrregularVerbs = function( morphologyDataVerbs, word ) {
|
|
156
|
-
const irregularVerbs = morphologyDataVerbs.veryIrregularVerbs;
|
|
157
|
-
|
|
158
|
-
const matchedParadigm = irregularVerbs.find( paradigm => {
|
|
159
|
-
const forms = paradigm.forms;
|
|
160
|
-
return forms.includes( word );
|
|
161
|
-
} );
|
|
162
|
-
|
|
163
|
-
if ( matchedParadigm ) {
|
|
164
|
-
return matchedParadigm.stem;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
return null;
|
|
168
|
-
};
|
|
169
|
-
|
|
170
|
-
/**
|
|
171
|
-
* Stems German words.
|
|
172
|
-
*
|
|
173
|
-
* @param {Object} morphologyDataVerbs The German morphology data for verbs.
|
|
174
|
-
* @param {string} word The word to stem.
|
|
175
|
-
*
|
|
176
|
-
* @returns {string} The stemmed word.
|
|
177
|
-
*/
|
|
178
|
-
export default function stem( morphologyDataVerbs, word ) {
|
|
179
|
-
// Check if word is a very irregular verb, and if so, return its stem.
|
|
180
|
-
const veryIrregularVerbStem = stemIrregularVerbs( morphologyDataVerbs, word );
|
|
181
|
-
|
|
182
|
-
if ( veryIrregularVerbStem ) {
|
|
183
|
-
return veryIrregularVerbStem;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
// Put u and y between vowels into upper case.
|
|
187
|
-
word = word.replace( /([aeiouyäöü])u([aeiouyäöü])/g, "$1U$2" );
|
|
188
|
-
word = word.replace( /([aeiouyäöü])y([aeiouyäöü])/g, "$1Y$2" );
|
|
189
|
-
word = word.replace( /([aeiouyäöü])i([aeiouyäöü])/g, "$1I$2" );
|
|
190
|
-
word = word.replace( /([aeiouyäöü])e([aeiouyäöü])/g, "$1E$2" );
|
|
191
|
-
|
|
192
|
-
// Find the start index of the R1 region.
|
|
193
|
-
const r1Index = determineR1( word );
|
|
194
|
-
|
|
195
|
-
// Find suffixes as defined in step 1.
|
|
196
|
-
const index1 = findSuffixStep1( word ).index1;
|
|
197
|
-
const optionUsed1 = findSuffixStep1( word ).optionUsed1;
|
|
198
|
-
|
|
199
|
-
// Delete the suffix found in step 1.
|
|
200
|
-
word = deleteSuffix1( word, index1, optionUsed1, r1Index );
|
|
201
|
-
|
|
202
|
-
// Find suffixes as defined in step 2.
|
|
203
|
-
const index2 = findSuffixStep2( word );
|
|
204
|
-
|
|
205
|
-
// Delete the suffix found in step 2.
|
|
206
|
-
word = deleteSuffix2( word, index2, r1Index );
|
|
207
|
-
|
|
208
|
-
// Turn U and Y back into lower case.
|
|
209
|
-
word = word.replace( /U/g, "u" );
|
|
210
|
-
word = word.replace( /Y/g, "y" );
|
|
211
|
-
word = word.replace( /I/g, "i" );
|
|
212
|
-
word = word.replace( /E/g, "e" );
|
|
213
|
-
|
|
214
|
-
return word;
|
|
215
|
-
}
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import { memoize } from "lodash";
|
|
2
|
-
import SentenceTokenizer from "./internal/SentenceTokenizer";
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Returns the sentences from a certain text.
|
|
6
|
-
*
|
|
7
|
-
* @param {string} text The text to retrieve sentences from.
|
|
8
|
-
* @param {boolean} [trimSentences=true] Whether to trim whitespace from the beginning and end of the sentences or not.
|
|
9
|
-
*
|
|
10
|
-
* @returns {Array<string>} The list of sentences in the text.
|
|
11
|
-
*/
|
|
12
|
-
function getSentenceTokenizer( text, trimSentences = true ) {
|
|
13
|
-
const sentenceTokenizer = new SentenceTokenizer();
|
|
14
|
-
const { tokenizer, tokens } = sentenceTokenizer.createTokenizer();
|
|
15
|
-
sentenceTokenizer.tokenize( tokenizer, text );
|
|
16
|
-
|
|
17
|
-
return ( tokens.length === 0 ? [] : sentenceTokenizer.getSentencesFromTokens( tokens, trimSentences ) );
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
/*
|
|
21
|
-
* The second argument to the memoize function is a so-called resolver function.
|
|
22
|
-
* It creates a cache key consisting of a combination of all arguments to a function.
|
|
23
|
-
* This is needed because by default, only the first argument to a function is used as the map cache key by the memoize function.
|
|
24
|
-
* This means that a function is only re-run if the value of the first argument changes.
|
|
25
|
-
* We want to re-run the getSentenceTokenizer function also when only the second argument changes to prevent cache collisions.
|
|
26
|
-
* @see https://lodash.com/docs/4.17.15#memoize
|
|
27
|
-
*/
|
|
28
|
-
export default memoize( getSentenceTokenizer, ( ...args ) => JSON.stringify( args ) );
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
import {includes, map} from 'lodash';
|
|
2
|
-
import {languageProcessing} from '../../../index';
|
|
3
|
-
const {indices, values} = languageProcessing;
|
|
4
|
-
const {getIndicesByWord, getIndicesByWordList} = indices;
|
|
5
|
-
const {Clause} = values;
|
|
6
|
-
|
|
7
|
-
import getParticiples from '../helpers/internal/getParticiples';
|
|
8
|
-
import exceptionsParticiplesActive from '../config/internal/exceptionsParticiplesActive.js';
|
|
9
|
-
import {participleLike as participleLikeAuxiliaries} from '../config/internal/passiveVoiceAuxiliaries.js';
|
|
10
|
-
|
|
11
|
-
const exceptionsRegex = /\S+(apparat|arbeit|dienst|haft|halt|keit|kraft|not|pflicht|schaft|schrift|tät|wert|zeit)($|[ \n\r\t.,'()"+-;!?:/»«‹›<>])/gi;
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Creates a Clause object for the German language.
|
|
15
|
-
*/
|
|
16
|
-
class GermanClause extends Clause {
|
|
17
|
-
/**
|
|
18
|
-
* Constructor.
|
|
19
|
-
*
|
|
20
|
-
* @param {string} clauseText The text of the clause.
|
|
21
|
-
* @param {Array} auxiliaries The auxiliaries.
|
|
22
|
-
*
|
|
23
|
-
* @constructor
|
|
24
|
-
*/
|
|
25
|
-
constructor(clauseText, auxiliaries) {
|
|
26
|
-
super(clauseText, auxiliaries);
|
|
27
|
-
this._participles = getParticiples(this.getClauseText());
|
|
28
|
-
this.checkParticiples();
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Checks if any exceptions are applicable to this participle that would result in the clause not being passive.
|
|
33
|
-
* If no exceptions are found, the clause is passive.
|
|
34
|
-
*
|
|
35
|
-
* @returns {void}
|
|
36
|
-
*/
|
|
37
|
-
checkParticiples() {
|
|
38
|
-
const foundParticiples = this.getParticiples().filter(participle => {
|
|
39
|
-
return (
|
|
40
|
-
!this.hasNounSuffix(participle) &&
|
|
41
|
-
!includes(exceptionsParticiplesActive, participle) &&
|
|
42
|
-
!this.hasHabenSeinException(participle) &&
|
|
43
|
-
!includes(participleLikeAuxiliaries, participle)
|
|
44
|
-
);
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
this.setPassive(foundParticiples.length > 0);
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Checks whether a found participle ends in a noun suffix.
|
|
52
|
-
* If a word ends in a noun suffix from the exceptionsRegex, it isn't a participle.
|
|
53
|
-
*
|
|
54
|
-
* @param {string} participle The participle to check.
|
|
55
|
-
*
|
|
56
|
-
* @returns {boolean} Returns true if it ends in a noun suffix, otherwise returns false.
|
|
57
|
-
*/
|
|
58
|
-
hasNounSuffix(participle) {
|
|
59
|
-
return participle.match(exceptionsRegex) !== null;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
* Checks whether a participle is followed by 'haben' or 'sein'.
|
|
64
|
-
* If a participle is followed by one of these, the clause is not passive.
|
|
65
|
-
*
|
|
66
|
-
* @param {string} participle The participle to check.
|
|
67
|
-
*
|
|
68
|
-
* @returns {boolean} Returns true if it is an exception, otherwise returns false.
|
|
69
|
-
*/
|
|
70
|
-
hasHabenSeinException(participle) {
|
|
71
|
-
const participleIndices = getIndicesByWord(participle, this.getClauseText());
|
|
72
|
-
let habenSeinIndices = getIndicesByWordList(['haben', 'sein'], this.getClauseText());
|
|
73
|
-
|
|
74
|
-
// Don't check further if there is no participle or no haben/sein.
|
|
75
|
-
if (participleIndices.length === 0 || habenSeinIndices.length === 0) {
|
|
76
|
-
return false;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
habenSeinIndices = map(habenSeinIndices, 'index');
|
|
80
|
-
const currentParticiple = participleIndices[0];
|
|
81
|
-
return includes(habenSeinIndices, currentParticiple.index + currentParticiple.match.length + 1);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
export default GermanClause;
|