axyseo 2.0.0-alpha.0.0.4 → 2.0.0-alpha.0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -2
- package/.browserslistrc +0 -1
- package/.gitattributes +0 -1
- package/babel.config.js +0 -3
- package/eslint.config.mjs +0 -119
- package/src/bundledPlugins/index.js +0 -5
- package/src/bundledPlugins/previouslyUsedKeywords.js +0 -192
- package/src/config/diacritics.js +0 -106
- package/src/config/getTransliterations.js +0 -1447
- package/src/config/transliterationsWPstyle.js +0 -774
- package/src/config/wordBoundaries.js +0 -23
- package/src/config/wordBoundariesWithoutPunctuation.js +0 -9
- package/src/const/analysis.js +0 -41
- package/src/errors/invalidType.js +0 -14
- package/src/errors/missingArgument.js +0 -14
- package/src/helpers/createMeasurementElement.js +0 -40
- package/src/helpers/domManipulation.js +0 -65
- package/src/helpers/errors.js +0 -26
- package/src/helpers/factory.js +0 -219
- package/src/helpers/formatNumber.js +0 -12
- package/src/helpers/formatString.js +0 -33
- package/src/helpers/getLanguagesWithWordComplexity.js +0 -8
- package/src/helpers/getLanguagesWithWordFormSupport.js +0 -11
- package/src/helpers/getWordComplexityConfig.js +0 -20
- package/src/helpers/getWordComplexityHelper.js +0 -20
- package/src/helpers/htmlEntities.js +0 -41
- package/src/helpers/includesAny.js +0 -19
- package/src/helpers/index.js +0 -127
- package/src/helpers/shortlinker/Shortlinker.js +0 -75
- package/src/helpers/shortlinker/index.js +0 -1
- package/src/helpers/shortlinker/singleton.js +0 -68
- package/src/helpers/types.js +0 -34
- package/src/index.js +0 -60
- package/src/languageProcessing/AbstractResearcher.js +0 -366
- package/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +0 -125
- package/src/languageProcessing/helpers/html/getFieldsToMark.js +0 -29
- package/src/languageProcessing/helpers/html/getSubheadingTexts.js +0 -47
- package/src/languageProcessing/helpers/html/getSubheadings.js +0 -95
- package/src/languageProcessing/helpers/html/html.js +0 -176
- package/src/languageProcessing/helpers/html/htmlParser.js +0 -145
- package/src/languageProcessing/helpers/html/matchParagraphs.js +0 -62
- package/src/languageProcessing/helpers/html/normalizeHTML.js +0 -16
- package/src/languageProcessing/helpers/image/getAltAttribute.js +0 -20
- package/src/languageProcessing/helpers/image/getImagesInTree.js +0 -16
- package/src/languageProcessing/helpers/image/imageInText.js +0 -19
- package/src/languageProcessing/helpers/index.js +0 -12
- package/src/languageProcessing/helpers/language/getLanguage.js +0 -9
- package/src/languageProcessing/helpers/link/checkNofollow.js +0 -38
- package/src/languageProcessing/helpers/link/getAnchorsFromText.js +0 -32
- package/src/languageProcessing/helpers/link/getLinkType.js +0 -32
- package/src/languageProcessing/helpers/match/findKeywordFormsInString.js +0 -101
- package/src/languageProcessing/helpers/match/isDoubleQuoted.js +0 -13
- package/src/languageProcessing/helpers/match/matchTextWithArray.js +0 -36
- package/src/languageProcessing/helpers/match/matchTextWithTransliteration.js +0 -58
- package/src/languageProcessing/helpers/match/matchTextWithWord.js +0 -45
- package/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +0 -164
- package/src/languageProcessing/helpers/match/processExactMatchRequest.js +0 -20
- package/src/languageProcessing/helpers/morphology/baseStemmer.js +0 -11
- package/src/languageProcessing/helpers/morphology/buildFormRule.js +0 -19
- package/src/languageProcessing/helpers/morphology/buildTopicStems.js +0 -169
- package/src/languageProcessing/helpers/morphology/createRulesFromArrays.js +0 -45
- package/src/languageProcessing/helpers/morphology/exceptionListHelpers.js +0 -65
- package/src/languageProcessing/helpers/morphology/findMatchingEndingInArray.js +0 -24
- package/src/languageProcessing/helpers/morphology/flattenSortLength.js +0 -14
- package/src/languageProcessing/helpers/morphology/getAllWordsFromPaper.js +0 -39
- package/src/languageProcessing/helpers/morphology/regexHelpers.js +0 -44
- package/src/languageProcessing/helpers/morphology/stemHelpers.js +0 -38
- package/src/languageProcessing/helpers/morphology/stemPrefixedFunctionWords.js +0 -31
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/directPrecedenceException.js +0 -36
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/getClausesSplitOnStopWords.js +0 -113
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/nonDirectParticiplePrecedenceException.js +0 -45
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/getClauses.js +0 -231
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/getIndicesWithRegex.js +0 -20
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/matchRegularParticiples.js +0 -23
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/precedenceException.js +0 -40
- package/src/languageProcessing/helpers/prominentWords/determineProminentWords.js +0 -238
- package/src/languageProcessing/helpers/regex/createRegexFromArray.js +0 -35
- package/src/languageProcessing/helpers/regex/createRegexFromDoubleArray.js +0 -34
- package/src/languageProcessing/helpers/regex/createWordRegex.js +0 -30
- package/src/languageProcessing/helpers/regex/matchStringWithRegex.js +0 -19
- package/src/languageProcessing/helpers/regex/searchAndReplaceWithOneRegex.js +0 -14
- package/src/languageProcessing/helpers/sanitize/doubleQuotes.js +0 -12
- package/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +0 -131
- package/src/languageProcessing/helpers/sanitize/mergeListItems.js +0 -24
- package/src/languageProcessing/helpers/sanitize/parseSynonyms.js +0 -20
- package/src/languageProcessing/helpers/sanitize/quotes.js +0 -46
- package/src/languageProcessing/helpers/sanitize/removeEmailAddresses.js +0 -12
- package/src/languageProcessing/helpers/sanitize/removePunctuation.js +0 -64
- package/src/languageProcessing/helpers/sanitize/removePunctuationExceptQuotes.js +0 -18
- package/src/languageProcessing/helpers/sanitize/removeSentenceTerminators.js +0 -13
- package/src/languageProcessing/helpers/sanitize/removeURLs.js +0 -13
- package/src/languageProcessing/helpers/sanitize/sanitizeLineBreakTag.js +0 -11
- package/src/languageProcessing/helpers/sanitize/sanitizeString.js +0 -18
- package/src/languageProcessing/helpers/sanitize/stripHTMLTags.js +0 -57
- package/src/languageProcessing/helpers/sanitize/stripNonTextTags.js +0 -15
- package/src/languageProcessing/helpers/sanitize/stripNumbers.js +0 -21
- package/src/languageProcessing/helpers/sanitize/stripSpaces.js +0 -23
- package/src/languageProcessing/helpers/sanitize/stripWordBoundaries.js +0 -65
- package/src/languageProcessing/helpers/sanitize/unifyWhitespace.js +0 -61
- package/src/languageProcessing/helpers/sentence/SentenceTokenizer.js +0 -640
- package/src/languageProcessing/helpers/sentence/countSentences.js +0 -20
- package/src/languageProcessing/helpers/sentence/getSentences.js +0 -65
- package/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +0 -55
- package/src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer.js +0 -28
- package/src/languageProcessing/helpers/sentence/sentencesLength.js +0 -31
- package/src/languageProcessing/helpers/syllables/DeviationFragment.js +0 -112
- package/src/languageProcessing/helpers/syllables/countSyllables.js +0 -182
- package/src/languageProcessing/helpers/syllables/syllableCountIterator.js +0 -56
- package/src/languageProcessing/helpers/syllables/syllableCountStep.js +0 -68
- package/src/languageProcessing/helpers/transform/transformWordsWithHyphens.js +0 -17
- package/src/languageProcessing/helpers/transliterate/replaceDiacritics.js +0 -22
- package/src/languageProcessing/helpers/transliterate/specialCharacterMappings.js +0 -214
- package/src/languageProcessing/helpers/transliterate/transliterate.js +0 -20
- package/src/languageProcessing/helpers/transliterate/transliterateWPstyle.js +0 -21
- package/src/languageProcessing/helpers/url/parseSlug.js +0 -10
- package/src/languageProcessing/helpers/url/url.js +0 -172
- package/src/languageProcessing/helpers/word/addWordboundary.js +0 -37
- package/src/languageProcessing/helpers/word/areWordsInSentence.js +0 -16
- package/src/languageProcessing/helpers/word/countMetaDescriptionLength.js +0 -18
- package/src/languageProcessing/helpers/word/countWords.js +0 -14
- package/src/languageProcessing/helpers/word/createPunctuationTokens.js +0 -42
- package/src/languageProcessing/helpers/word/filterWordsFromArray.js +0 -15
- package/src/languageProcessing/helpers/word/followsIndex.js +0 -25
- package/src/languageProcessing/helpers/word/getAllWordsFromTree.js +0 -23
- package/src/languageProcessing/helpers/word/getWords.js +0 -43
- package/src/languageProcessing/helpers/word/includesIndex.js +0 -30
- package/src/languageProcessing/helpers/word/indices.js +0 -146
- package/src/languageProcessing/helpers/word/markWordsInSentences.js +0 -173
- package/src/languageProcessing/helpers/word/matchWordInSentence.js +0 -61
- package/src/languageProcessing/helpers/word/splitIntoTokens.js +0 -46
- package/src/languageProcessing/index.js +0 -91
- package/src/languageProcessing/languages/_default/Researcher.js +0 -34
- package/src/languageProcessing/languages/_default/helpers/getStemmer.js +0 -11
- package/src/languageProcessing/languages/ar/Researcher.js +0 -46
- package/src/languageProcessing/languages/ar/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/ar/config/functionWords.js +0 -329
- package/src/languageProcessing/languages/ar/config/internal/passiveVerbsWithLongVowel.js +0 -570
- package/src/languageProcessing/languages/ar/config/prefixedFunctionWords.js +0 -5
- package/src/languageProcessing/languages/ar/config/transitionWords.js +0 -19
- package/src/languageProcessing/languages/ar/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/ar/helpers/createBasicWordForms.js +0 -32
- package/src/languageProcessing/languages/ar/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/ar/helpers/internal/stem.js +0 -632
- package/src/languageProcessing/languages/ar/helpers/isPassiveSentence.js +0 -33
- package/src/languageProcessing/languages/ca/Researcher.js +0 -43
- package/src/languageProcessing/languages/ca/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/ca/config/transitionWords.js +0 -31
- package/src/languageProcessing/languages/ca/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/ca/helpers/getStemmer.js +0 -11
- package/src/languageProcessing/languages/cs/Researcher.js +0 -44
- package/src/languageProcessing/languages/cs/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/cs/config/functionWords.js +0 -121
- package/src/languageProcessing/languages/cs/config/internal/passiveVoiceAuxiliaries.js +0 -38
- package/src/languageProcessing/languages/cs/config/internal/passiveVoiceEndings.js +0 -54
- package/src/languageProcessing/languages/cs/config/stopWords.js +0 -42
- package/src/languageProcessing/languages/cs/config/transitionWords.js +0 -26
- package/src/languageProcessing/languages/cs/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/cs/helpers/getClauses.js +0 -26
- package/src/languageProcessing/languages/cs/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/cs/helpers/internal/getParticiples.js +0 -16
- package/src/languageProcessing/languages/cs/helpers/internal/stem.js +0 -499
- package/src/languageProcessing/languages/cs/values/Clause.js +0 -34
- package/src/languageProcessing/languages/de/Researcher.js +0 -52
- package/src/languageProcessing/languages/de/config/firstWordExceptions.js +0 -17
- package/src/languageProcessing/languages/de/config/functionWords.js +0 -303
- package/src/languageProcessing/languages/de/config/internal/exceptionsParticiplesActive.js +0 -2231
- package/src/languageProcessing/languages/de/config/internal/passiveVoiceAuxiliaries.js +0 -96
- package/src/languageProcessing/languages/de/config/internal/passiveVoiceIrregulars.js +0 -368
- package/src/languageProcessing/languages/de/config/internal/passiveVoiceRegex.js +0 -72
- package/src/languageProcessing/languages/de/config/keyphraseLength.js +0 -11
- package/src/languageProcessing/languages/de/config/stopWords.js +0 -67
- package/src/languageProcessing/languages/de/config/syllables.json +0 -460
- package/src/languageProcessing/languages/de/config/transitionWords.js +0 -31
- package/src/languageProcessing/languages/de/config/twoPartTransitionWords.js +0 -12
- package/src/languageProcessing/languages/de/config/wordComplexity.js +0 -4
- package/src/languageProcessing/languages/de/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/de/helpers/checkIfWordIsComplex.js +0 -40
- package/src/languageProcessing/languages/de/helpers/checkIfWordIsFunction.js +0 -15
- package/src/languageProcessing/languages/de/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/de/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/de/helpers/internal/SentenceTokenizer.js +0 -31
- package/src/languageProcessing/languages/de/helpers/internal/detectAndStemRegularParticiple.js +0 -128
- package/src/languageProcessing/languages/de/helpers/internal/determineStem.js +0 -128
- package/src/languageProcessing/languages/de/helpers/internal/getParticiples.js +0 -40
- package/src/languageProcessing/languages/de/helpers/internal/stem.js +0 -215
- package/src/languageProcessing/languages/de/helpers/memoizedSentenceTokenizer.js +0 -28
- package/src/languageProcessing/languages/de/values/Clause.js +0 -85
- package/src/languageProcessing/languages/el/Researcher.js +0 -46
- package/src/languageProcessing/languages/el/config/firstWordExceptions.js +0 -47
- package/src/languageProcessing/languages/el/config/functionWords.js +0 -116
- package/src/languageProcessing/languages/el/config/internal/auxiliaries.js +0 -19
- package/src/languageProcessing/languages/el/config/internal/morphologicalPassiveSuffixes.js +0 -87
- package/src/languageProcessing/languages/el/config/internal/nonPassiveVerbStems.js +0 -138
- package/src/languageProcessing/languages/el/config/stopWords.js +0 -854
- package/src/languageProcessing/languages/el/config/transitionWords.js +0 -26
- package/src/languageProcessing/languages/el/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/el/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/el/helpers/getStemmer.js +0 -21
- package/src/languageProcessing/languages/el/helpers/internal/getParticiples.js +0 -20
- package/src/languageProcessing/languages/el/helpers/internal/stem.js +0 -368
- package/src/languageProcessing/languages/el/helpers/isPassiveSentence.js +0 -38
- package/src/languageProcessing/languages/el/values/Clause.js +0 -37
- package/src/languageProcessing/languages/en/Researcher.js +0 -46
- package/src/languageProcessing/languages/en/config/abbreviations.js +0 -55
- package/src/languageProcessing/languages/en/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/en/config/functionWords.js +0 -186
- package/src/languageProcessing/languages/en/config/internal/passiveVoiceAuxiliaries.js +0 -44
- package/src/languageProcessing/languages/en/config/internal/passiveVoiceIrregulars.js +0 -354
- package/src/languageProcessing/languages/en/config/internal/passiveVoiceNonVerbEndingEd.js +0 -3047
- package/src/languageProcessing/languages/en/config/regularParticiplesRegex.js +0 -5
- package/src/languageProcessing/languages/en/config/stopWords.js +0 -52
- package/src/languageProcessing/languages/en/config/syllables.json +0 -86
- package/src/languageProcessing/languages/en/config/transitionWords.js +0 -48
- package/src/languageProcessing/languages/en/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/en/config/wordComplexity.js +0 -5
- package/src/languageProcessing/languages/en/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/en/helpers/checkIfWordIsComplex.js +0 -43
- package/src/languageProcessing/languages/en/helpers/getClauses.js +0 -49
- package/src/languageProcessing/languages/en/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/en/helpers/internal/determineStem.js +0 -178
- package/src/languageProcessing/languages/en/helpers/internal/getAdjectiveStem.js +0 -162
- package/src/languageProcessing/languages/en/helpers/internal/getParticiples.js +0 -25
- package/src/languageProcessing/languages/en/helpers/internal/getVerbStem.js +0 -237
- package/src/languageProcessing/languages/en/values/Clause.js +0 -68
- package/src/languageProcessing/languages/es/Researcher.js +0 -48
- package/src/languageProcessing/languages/es/config/firstWordExceptions.js +0 -16
- package/src/languageProcessing/languages/es/config/functionWords.js +0 -321
- package/src/languageProcessing/languages/es/config/internal/passiveVoiceAuxiliaries.js +0 -60
- package/src/languageProcessing/languages/es/config/internal/passiveVoiceParticiples.js +0 -7327
- package/src/languageProcessing/languages/es/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/es/config/stopWords.js +0 -33
- package/src/languageProcessing/languages/es/config/syllables.json +0 -176
- package/src/languageProcessing/languages/es/config/transitionWords.js +0 -40
- package/src/languageProcessing/languages/es/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/es/config/wordComplexity.js +0 -4
- package/src/languageProcessing/languages/es/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/es/helpers/checkIfWordIsComplex.js +0 -56
- package/src/languageProcessing/languages/es/helpers/getClauses.js +0 -29
- package/src/languageProcessing/languages/es/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/es/helpers/internal/checkVerbStemModifications.js +0 -41
- package/src/languageProcessing/languages/es/helpers/internal/getParticiples.js +0 -35
- package/src/languageProcessing/languages/es/helpers/internal/stem.js +0 -793
- package/src/languageProcessing/languages/es/values/Clause.js +0 -47
- package/src/languageProcessing/languages/fa/Researcher.js +0 -47
- package/src/languageProcessing/languages/fa/config/firstWordExceptions.js +0 -12
- package/src/languageProcessing/languages/fa/config/functionWords.js +0 -122
- package/src/languageProcessing/languages/fa/config/internal/participles.js +0 -1429
- package/src/languageProcessing/languages/fa/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/fa/config/transitionWords.js +0 -20
- package/src/languageProcessing/languages/fa/config/twoPartTransitionWords.js +0 -9
- package/src/languageProcessing/languages/fa/helpers/createBasicWordForms.js +0 -97
- package/src/languageProcessing/languages/fa/helpers/getStemmer.js +0 -13
- package/src/languageProcessing/languages/fa/helpers/isPassiveSentence.js +0 -14
- package/src/languageProcessing/languages/fr/Researcher.js +0 -46
- package/src/languageProcessing/languages/fr/config/firstWordExceptions.js +0 -16
- package/src/languageProcessing/languages/fr/config/functionWords.js +0 -281
- package/src/languageProcessing/languages/fr/config/internal/exceptionsParticiplesActive.js +0 -1510
- package/src/languageProcessing/languages/fr/config/internal/passiveVoiceAuxiliaries.js +0 -108
- package/src/languageProcessing/languages/fr/config/internal/passiveVoiceIrregulars.js +0 -565
- package/src/languageProcessing/languages/fr/config/stopWords.js +0 -119
- package/src/languageProcessing/languages/fr/config/syllables.json +0 -1426
- package/src/languageProcessing/languages/fr/config/transitionWords.js +0 -59
- package/src/languageProcessing/languages/fr/config/twoPartTransitionWords.js +0 -15
- package/src/languageProcessing/languages/fr/config/wordComplexity.js +0 -4
- package/src/languageProcessing/languages/fr/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/fr/helpers/checkIfWordIsComplex.js +0 -67
- package/src/languageProcessing/languages/fr/helpers/getClauses.js +0 -34
- package/src/languageProcessing/languages/fr/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/fr/helpers/internal/getParticiples.js +0 -72
- package/src/languageProcessing/languages/fr/helpers/internal/stem.js +0 -633
- package/src/languageProcessing/languages/fr/values/Clause.js +0 -96
- package/src/languageProcessing/languages/he/Researcher.js +0 -50
- package/src/languageProcessing/languages/he/config/firstWordExceptions.js +0 -13
- package/src/languageProcessing/languages/he/config/functionWords.js +0 -564
- package/src/languageProcessing/languages/he/config/internal/regularRootsHufal.js +0 -186
- package/src/languageProcessing/languages/he/config/internal/regularRootsNifal.js +0 -195
- package/src/languageProcessing/languages/he/config/internal/regularRootsPual.js +0 -168
- package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsHufal.js +0 -188
- package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsNifal.js +0 -197
- package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsPual.js +0 -170
- package/src/languageProcessing/languages/he/config/prefixedFunctionWords.js +0 -2
- package/src/languageProcessing/languages/he/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/he/config/transitionWords.js +0 -28
- package/src/languageProcessing/languages/he/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/he/helpers/createBasicWordForms.js +0 -33
- package/src/languageProcessing/languages/he/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/he/helpers/internal/stem.js +0 -52
- package/src/languageProcessing/languages/he/helpers/isPassiveSentence.js +0 -96
- package/src/languageProcessing/languages/he/helpers/stem.js +0 -52
- package/src/languageProcessing/languages/hu/Researcher.js +0 -48
- package/src/languageProcessing/languages/hu/config/firstWordExceptions.js +0 -31
- package/src/languageProcessing/languages/hu/config/functionWords.js +0 -284
- package/src/languageProcessing/languages/hu/config/internal/auxiliaries.js +0 -97
- package/src/languageProcessing/languages/hu/config/internal/morphologicalPassiveAffixes.js +0 -125
- package/src/languageProcessing/languages/hu/config/internal/nonPassivesInVaAndVe.js +0 -265
- package/src/languageProcessing/languages/hu/config/internal/odikVerbs.js +0 -273
- package/src/languageProcessing/languages/hu/config/internal/participles.js +0 -412
- package/src/languageProcessing/languages/hu/config/stopWords.js +0 -213
- package/src/languageProcessing/languages/hu/config/transitionWords.js +0 -42
- package/src/languageProcessing/languages/hu/config/twoPartTransitionWords.js +0 -34
- package/src/languageProcessing/languages/hu/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/hu/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/hu/helpers/internal/getParticiples.js +0 -21
- package/src/languageProcessing/languages/hu/helpers/internal/stem.js +0 -389
- package/src/languageProcessing/languages/hu/helpers/isPassiveSentence.js +0 -54
- package/src/languageProcessing/languages/hu/values/Clause.js +0 -41
- package/src/languageProcessing/languages/id/Researcher.js +0 -46
- package/src/languageProcessing/languages/id/config/firstWordExceptions.js +0 -13
- package/src/languageProcessing/languages/id/config/functionWords.js +0 -202
- package/src/languageProcessing/languages/id/config/internal/nonPassiveVerbsStartingDi.js +0 -215
- package/src/languageProcessing/languages/id/config/transitionWords.js +0 -62
- package/src/languageProcessing/languages/id/config/twoPartTransitionWords.js +0 -13
- package/src/languageProcessing/languages/id/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/id/helpers/internal/stem.js +0 -462
- package/src/languageProcessing/languages/id/helpers/internal/stemHelpers.js +0 -78
- package/src/languageProcessing/languages/id/helpers/isPassiveSentence.js +0 -39
- package/src/languageProcessing/languages/id/helpers/splitIntoTokensCustom.js +0 -47
- package/src/languageProcessing/languages/it/Researcher.js +0 -48
- package/src/languageProcessing/languages/it/config/firstWordExceptions.js +0 -17
- package/src/languageProcessing/languages/it/config/functionWords.js +0 -277
- package/src/languageProcessing/languages/it/config/internal/passiveVoiceAuxiliaries.js +0 -98
- package/src/languageProcessing/languages/it/config/internal/passiveVoiceParticiples.js +0 -7197
- package/src/languageProcessing/languages/it/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/it/config/stopWords.js +0 -57
- package/src/languageProcessing/languages/it/config/syllables.json +0 -573
- package/src/languageProcessing/languages/it/config/transitionWords.js +0 -104
- package/src/languageProcessing/languages/it/config/twoPartTransitionWords.js +0 -9
- package/src/languageProcessing/languages/it/helpers/calculateFleschReadingScore.js +0 -15
- package/src/languageProcessing/languages/it/helpers/getClauses.js +0 -32
- package/src/languageProcessing/languages/it/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/it/helpers/internal/getParticiples.js +0 -34
- package/src/languageProcessing/languages/it/helpers/internal/stem.js +0 -436
- package/src/languageProcessing/languages/it/values/Clause.js +0 -47
- package/src/languageProcessing/languages/ja/Researcher.js +0 -86
- package/src/languageProcessing/languages/ja/config/assessmentApplicabilityCharacterCount.js +0 -4
- package/src/languageProcessing/languages/ja/config/firstWordExceptions.js +0 -8
- package/src/languageProcessing/languages/ja/config/functionWords.js +0 -563
- package/src/languageProcessing/languages/ja/config/keyphraseLength.js +0 -16
- package/src/languageProcessing/languages/ja/config/metaDescriptionLength.js +0 -4
- package/src/languageProcessing/languages/ja/config/paragraphLength.js +0 -10
- package/src/languageProcessing/languages/ja/config/sentenceLength.js +0 -4
- package/src/languageProcessing/languages/ja/config/subheadingsTooLong.js +0 -18
- package/src/languageProcessing/languages/ja/config/textLength.js +0 -47
- package/src/languageProcessing/languages/ja/config/topicLength.js +0 -5
- package/src/languageProcessing/languages/ja/config/transitionWords.js +0 -354
- package/src/languageProcessing/languages/ja/customResearches/findKeyphraseInSEOTitle.js +0 -98
- package/src/languageProcessing/languages/ja/customResearches/getKeyphraseLength.js +0 -19
- package/src/languageProcessing/languages/ja/customResearches/getWordForms.js +0 -50
- package/src/languageProcessing/languages/ja/customResearches/textLength.js +0 -24
- package/src/languageProcessing/languages/ja/helpers/countCharacters.js +0 -19
- package/src/languageProcessing/languages/ja/helpers/customGetStemmer.js +0 -21
- package/src/languageProcessing/languages/ja/helpers/getContentWords.js +0 -21
- package/src/languageProcessing/languages/ja/helpers/getWords.js +0 -31
- package/src/languageProcessing/languages/ja/helpers/internal/SentenceTokenizer.js +0 -102
- package/src/languageProcessing/languages/ja/helpers/internal/createWordForms.js +0 -68
- package/src/languageProcessing/languages/ja/helpers/internal/determineStem.js +0 -17
- package/src/languageProcessing/languages/ja/helpers/matchTextWithWord.js +0 -53
- package/src/languageProcessing/languages/ja/helpers/matchTransitionWords.js +0 -25
- package/src/languageProcessing/languages/ja/helpers/memoizedSentenceTokenizer.js +0 -28
- package/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js +0 -20
- package/src/languageProcessing/languages/ja/helpers/wordsCharacterCount.js +0 -13
- package/src/languageProcessing/languages/nb/Researcher.js +0 -45
- package/src/languageProcessing/languages/nb/config/firstWordExceptions.js +0 -12
- package/src/languageProcessing/languages/nb/config/functionWords.js +0 -106
- package/src/languageProcessing/languages/nb/config/internal/participles.js +0 -3127
- package/src/languageProcessing/languages/nb/config/internal/passiveVoiceAuxiliaries.js +0 -15
- package/src/languageProcessing/languages/nb/config/stopWords.js +0 -39
- package/src/languageProcessing/languages/nb/config/transitionWords.js +0 -21
- package/src/languageProcessing/languages/nb/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/nb/helpers/getClauses.js +0 -28
- package/src/languageProcessing/languages/nb/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/nb/helpers/internal/getParticiples.js +0 -24
- package/src/languageProcessing/languages/nb/helpers/internal/stem.js +0 -133
- package/src/languageProcessing/languages/nb/values/Clause.js +0 -43
- package/src/languageProcessing/languages/nl/Researcher.js +0 -48
- package/src/languageProcessing/languages/nl/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/nl/config/functionWords.js +0 -233
- package/src/languageProcessing/languages/nl/config/internal/nonParticiples.js +0 -2515
- package/src/languageProcessing/languages/nl/config/internal/passiveVoiceAuxiliaries.js +0 -13
- package/src/languageProcessing/languages/nl/config/internal/passiveVoiceIrregulars.js +0 -474
- package/src/languageProcessing/languages/nl/config/keyphraseLength.js +0 -10
- package/src/languageProcessing/languages/nl/config/stopWords.js +0 -35
- package/src/languageProcessing/languages/nl/config/syllables.json +0 -343
- package/src/languageProcessing/languages/nl/config/transitionWords.js +0 -22
- package/src/languageProcessing/languages/nl/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/nl/helpers/calculateFleschReadingScore.js +0 -15
- package/src/languageProcessing/languages/nl/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/nl/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/nl/helpers/internal/checkExceptionsWithFullForms.js +0 -128
- package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemRegularParticiple.js +0 -324
- package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemSuffixes.js +0 -164
- package/src/languageProcessing/languages/nl/helpers/internal/determineStem.js +0 -133
- package/src/languageProcessing/languages/nl/helpers/internal/getParticiples.js +0 -25
- package/src/languageProcessing/languages/nl/helpers/internal/getStemWordsWithTAndDEnding.js +0 -183
- package/src/languageProcessing/languages/nl/helpers/internal/stem.js +0 -146
- package/src/languageProcessing/languages/nl/helpers/internal/stemModificationHelpers.js +0 -109
- package/src/languageProcessing/languages/nl/helpers/internal/stemTOrDFromEndOfWord.js +0 -65
- package/src/languageProcessing/languages/nl/values/Clause.js +0 -62
- package/src/languageProcessing/languages/pl/Researcher.js +0 -47
- package/src/languageProcessing/languages/pl/config/firstWordExceptions.js +0 -12
- package/src/languageProcessing/languages/pl/config/functionWords.js +0 -421
- package/src/languageProcessing/languages/pl/config/internal/auxiliaries.js +0 -85
- package/src/languageProcessing/languages/pl/config/internal/participles.js +0 -26433
- package/src/languageProcessing/languages/pl/config/sentenceLength.js +0 -10
- package/src/languageProcessing/languages/pl/config/stopWords.js +0 -36
- package/src/languageProcessing/languages/pl/config/transitionWords.js +0 -42
- package/src/languageProcessing/languages/pl/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/pl/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/pl/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/pl/helpers/internal/getParticiples.js +0 -18
- package/src/languageProcessing/languages/pl/helpers/internal/stem.js +0 -161
- package/src/languageProcessing/languages/pl/values/Clause.js +0 -53
- package/src/languageProcessing/languages/pt/Researcher.js +0 -48
- package/src/languageProcessing/languages/pt/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/pt/config/functionWords.js +0 -226
- package/src/languageProcessing/languages/pt/config/internal/passiveVoiceAuxiliaries.js +0 -66
- package/src/languageProcessing/languages/pt/config/internal/passiveVoiceParticiples.js +0 -4088
- package/src/languageProcessing/languages/pt/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/pt/config/stopWords.js +0 -50
- package/src/languageProcessing/languages/pt/config/syllables.json +0 -38
- package/src/languageProcessing/languages/pt/config/transitionWords.js +0 -34
- package/src/languageProcessing/languages/pt/config/twoPartTransitionWords.js +0 -9
- package/src/languageProcessing/languages/pt/helpers/calculateFleschReadingScore.js +0 -15
- package/src/languageProcessing/languages/pt/helpers/getClauses.js +0 -29
- package/src/languageProcessing/languages/pt/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/pt/helpers/internal/getParticiples.js +0 -35
- package/src/languageProcessing/languages/pt/helpers/internal/stem.js +0 -319
- package/src/languageProcessing/languages/pt/values/Clause.js +0 -43
- package/src/languageProcessing/languages/ru/Researcher.js +0 -48
- package/src/languageProcessing/languages/ru/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/ru/config/fleschReadingEaseScores.js +0 -20
- package/src/languageProcessing/languages/ru/config/functionWords.js +0 -519
- package/src/languageProcessing/languages/ru/config/internal/participlesShortenedList.js +0 -2914
- package/src/languageProcessing/languages/ru/config/internal/passiveVoiceParticiples.js +0 -6295
- package/src/languageProcessing/languages/ru/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/ru/config/syllables.json +0 -19
- package/src/languageProcessing/languages/ru/config/transitionWords.js +0 -62
- package/src/languageProcessing/languages/ru/config/twoPartTransitionWords.js +0 -14
- package/src/languageProcessing/languages/ru/helpers/calculateFleschReadingScore.js +0 -16
- package/src/languageProcessing/languages/ru/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/ru/helpers/internal/stem.js +0 -288
- package/src/languageProcessing/languages/ru/helpers/isPassiveSentence.js +0 -14
- package/src/languageProcessing/languages/sk/Researcher.js +0 -46
- package/src/languageProcessing/languages/sk/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/sk/config/functionWords.js +0 -855
- package/src/languageProcessing/languages/sk/config/internal/nonPassives.js +0 -1074
- package/src/languageProcessing/languages/sk/config/internal/passiveVoiceAuxiliaries.js +0 -22
- package/src/languageProcessing/languages/sk/config/stopWords.js +0 -34
- package/src/languageProcessing/languages/sk/config/transitionWords.js +0 -23
- package/src/languageProcessing/languages/sk/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/sk/helpers/getClauses.js +0 -26
- package/src/languageProcessing/languages/sk/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/sk/helpers/internal/getParticiples.js +0 -16
- package/src/languageProcessing/languages/sk/helpers/internal/stem.js +0 -319
- package/src/languageProcessing/languages/sk/values/Clause.js +0 -39
- package/src/languageProcessing/languages/sv/Researcher.js +0 -45
- package/src/languageProcessing/languages/sv/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/sv/config/functionWords.js +0 -176
- package/src/languageProcessing/languages/sv/config/internal/passiveVerbs.js +0 -10400
- package/src/languageProcessing/languages/sv/config/keyphraseLength.js +0 -11
- package/src/languageProcessing/languages/sv/config/transitionWords.js +0 -35
- package/src/languageProcessing/languages/sv/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/sv/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/sv/helpers/internal/stem.js +0 -152
- package/src/languageProcessing/languages/sv/helpers/isPassiveSentence.js +0 -14
- package/src/languageProcessing/languages/tr/Researcher.js +0 -44
- package/src/languageProcessing/languages/tr/config/firstWordExceptions.js +0 -13
- package/src/languageProcessing/languages/tr/config/functionWords.js +0 -116
- package/src/languageProcessing/languages/tr/config/internal/nonPassiveExceptions.js +0 -574
- package/src/languageProcessing/languages/tr/config/internal/passiveEndings.js +0 -151
- package/src/languageProcessing/languages/tr/config/sentenceLength.js +0 -7
- package/src/languageProcessing/languages/tr/config/transitionWords.js +0 -42
- package/src/languageProcessing/languages/tr/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/tr/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/tr/helpers/internal/stem.js +0 -20
- package/src/languageProcessing/languages/tr/helpers/isPassiveSentence.js +0 -43
- package/src/languageProcessing/researches/altTagCount.js +0 -70
- package/src/languageProcessing/researches/countSentencesFromText.js +0 -19
- package/src/languageProcessing/researches/findKeyphraseInSEOTitle.js +0 -257
- package/src/languageProcessing/researches/findKeywordInFirstParagraph.js +0 -86
- package/src/languageProcessing/researches/findTransitionWords.js +0 -123
- package/src/languageProcessing/researches/functionWordsInKeyphrase.js +0 -44
- package/src/languageProcessing/researches/getAnchorsWithKeyphrase.js +0 -227
- package/src/languageProcessing/researches/getFleschReadingScore.js +0 -150
- package/src/languageProcessing/researches/getKeywordDensity.js +0 -44
- package/src/languageProcessing/researches/getLinkStatistics.js +0 -54
- package/src/languageProcessing/researches/getLinks.js +0 -18
- package/src/languageProcessing/researches/getLongCenterAlignedTexts.js +0 -37
- package/src/languageProcessing/researches/getParagraphLength.js +0 -44
- package/src/languageProcessing/researches/getParagraphs.js +0 -18
- package/src/languageProcessing/researches/getPassiveVoiceResult.js +0 -129
- package/src/languageProcessing/researches/getProminentWordsForInsights.js +0 -48
- package/src/languageProcessing/researches/getProminentWordsForInternalLinking.js +0 -119
- package/src/languageProcessing/researches/getSentenceBeginnings.js +0 -124
- package/src/languageProcessing/researches/getSubheadingTextLengths.js +0 -59
- package/src/languageProcessing/researches/getWordForms.js +0 -204
- package/src/languageProcessing/researches/h1s.js +0 -10
- package/src/languageProcessing/researches/imageCount.js +0 -16
- package/src/languageProcessing/researches/index.js +0 -5
- package/src/languageProcessing/researches/keyphraseDistribution.js +0 -249
- package/src/languageProcessing/researches/keyphraseLength.js +0 -17
- package/src/languageProcessing/researches/keywordCount.js +0 -134
- package/src/languageProcessing/researches/keywordCountInUrl.js +0 -57
- package/src/languageProcessing/researches/matchKeywordInSubheadings.js +0 -62
- package/src/languageProcessing/researches/metaDescriptionKeyword.js +0 -85
- package/src/languageProcessing/researches/metaDescriptionLength.js +0 -12
- package/src/languageProcessing/researches/pageTitleWidth.js +0 -11
- package/src/languageProcessing/researches/readingTime.js +0 -82
- package/src/languageProcessing/researches/sentences.js +0 -20
- package/src/languageProcessing/researches/videoCount.js +0 -32
- package/src/languageProcessing/researches/wordComplexity.js +0 -129
- package/src/languageProcessing/researches/wordCountInText.js +0 -29
- package/src/languageProcessing/values/Clause.js +0 -108
- package/src/languageProcessing/values/ProminentWord.js +0 -95
- package/src/languageProcessing/values/Sentence.js +0 -111
- package/src/languageProcessing/values/index.js +0 -9
- package/src/markers/addMark.js +0 -9
- package/src/markers/addMarkSingleWord.js +0 -32
- package/src/markers/index.js +0 -7
- package/src/markers/removeDuplicateMarks.js +0 -27
- package/src/markers/removeMarks.js +0 -11
- package/src/parse/build/build.js +0 -52
- package/src/parse/build/index.js +0 -10
- package/src/parse/build/private/adapt.js +0 -113
- package/src/parse/build/private/adaptAttributes.js +0 -36
- package/src/parse/build/private/alwaysFilterElements.js +0 -75
- package/src/parse/build/private/combineIntoImplicitParagraphs.js +0 -130
- package/src/parse/build/private/filterBeforeTokenizing.js +0 -32
- package/src/parse/build/private/filterHelpers.js +0 -44
- package/src/parse/build/private/filterTree.js +0 -42
- package/src/parse/build/private/getTextElementPositions.js +0 -184
- package/src/parse/build/private/helpers/parseClassAttribute.js +0 -9
- package/src/parse/build/private/isPhrasingContent.js +0 -28
- package/src/parse/build/private/parseBlocks.js +0 -151
- package/src/parse/build/private/tokenize.js +0 -74
- package/src/parse/language/LanguageProcessor.js +0 -74
- package/src/parse/structure/Heading.js +0 -26
- package/src/parse/structure/Node.js +0 -69
- package/src/parse/structure/Paragraph.js +0 -48
- package/src/parse/structure/Sentence.js +0 -30
- package/src/parse/structure/SourceCodeLocation.js +0 -41
- package/src/parse/structure/Text.js +0 -27
- package/src/parse/structure/Token.js +0 -24
- package/src/parse/structure/index.js +0 -16
- package/src/parse/traverse/findAllInTree.js +0 -58
- package/src/parse/traverse/index.js +0 -12
- package/src/parse/traverse/innerText.js +0 -26
- package/src/parsedPaper/ParsedPaper.js +0 -92
- package/src/parsedPaper/assess/TreeAssessor.js +0 -184
- package/src/parsedPaper/assess/assessmentListFactories.js +0 -73
- package/src/parsedPaper/assess/assessments/Assessment.js +0 -79
- package/src/parsedPaper/assess/assessments/index.js +0 -6
- package/src/parsedPaper/assess/assessorFactories.js +0 -104
- package/src/parsedPaper/assess/cornerstone/assessmentListFactories.js +0 -47
- package/src/parsedPaper/assess/cornerstone/index.js +0 -5
- package/src/parsedPaper/assess/index.js +0 -20
- package/src/parsedPaper/build/PaperParser.js +0 -105
- package/src/parsedPaper/build/linguisticParsing/Sentence.js +0 -89
- package/src/parsedPaper/build/linguisticParsing/SentenceTokenizer.js +0 -323
- package/src/parsedPaper/build/linguisticParsing/parseText.js +0 -20
- package/src/parsedPaper/build/tree/TreeBuilder.js +0 -75
- package/src/parsedPaper/build/tree/cleanup/calculateTextIndices.js +0 -190
- package/src/parsedPaper/build/tree/cleanup/getElementContent.js +0 -21
- package/src/parsedPaper/build/tree/cleanup/postParsing.js +0 -37
- package/src/parsedPaper/build/tree/html/HTMLTreeConverter.js +0 -230
- package/src/parsedPaper/build/tree/html/buildTree.js +0 -31
- package/src/parsedPaper/build/tree/html/htmlConstants.js +0 -37
- package/src/parsedPaper/build/tree/index.js +0 -14
- package/src/parsedPaper/build/tree/metadata/buildTree.js +0 -32
- package/src/parsedPaper/research/TreeResearcher.js +0 -134
- package/src/parsedPaper/research/index.js +0 -13
- package/src/parsedPaper/research/researches/Headings.js +0 -20
- package/src/parsedPaper/research/researches/LinkStatistics.js +0 -128
- package/src/parsedPaper/research/researches/Research.js +0 -50
- package/src/parsedPaper/research/researches/index.js +0 -1
- package/src/parsedPaper/structure/tree/FormattingElement.js +0 -67
- package/src/parsedPaper/structure/tree/SourceCodeLocation.js +0 -31
- package/src/parsedPaper/structure/tree/TextContainer.js +0 -85
- package/src/parsedPaper/structure/tree/index.js +0 -22
- package/src/parsedPaper/structure/tree/nodes/Heading.js +0 -26
- package/src/parsedPaper/structure/tree/nodes/LeafNode.js +0 -75
- package/src/parsedPaper/structure/tree/nodes/List.js +0 -47
- package/src/parsedPaper/structure/tree/nodes/ListItem.js +0 -26
- package/src/parsedPaper/structure/tree/nodes/MetadataMiscellaneous.js +0 -46
- package/src/parsedPaper/structure/tree/nodes/MetadataText.js +0 -26
- package/src/parsedPaper/structure/tree/nodes/Node.js +0 -154
- package/src/parsedPaper/structure/tree/nodes/Paragraph.js +0 -24
- package/src/parsedPaper/structure/tree/nodes/StructuredNode.js +0 -52
- package/src/parsedPaper/structure/tree/nodes/index.js +0 -21
- package/src/scoring/assessments/assessment.js +0 -63
- package/src/scoring/assessments/index.js +0 -58
- package/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +0 -173
- package/src/scoring/assessments/readability/SentenceBeginningsAssessment.js +0 -132
- package/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +0 -186
- package/src/scoring/assessments/readability/TransitionWordsAssessment.js +0 -168
- package/src/scoring/assessments/seo/ImageCountAssessment.js +0 -112
- package/src/scoring/assessments/seo/InternalLinksAssessment.js +0 -114
- package/src/scoring/assessments/seo/IntroductionKeywordAssessment.js +0 -110
- package/src/scoring/assessments/seo/KeyphraseAssessment.js +0 -104
- package/src/scoring/assessments/seo/KeyphraseLengthAssessment.js +0 -110
- package/src/scoring/assessments/seo/KeywordDensityAssessment.js +0 -116
- package/src/scoring/assessments/seo/MetaDescriptionKeywordAssessment.js +0 -114
- package/src/scoring/assessments/seo/MetaDescriptionLengthAssessment.js +0 -112
- package/src/scoring/assessments/seo/MetaTitleKeywordAssessment.js +0 -111
- package/src/scoring/assessments/seo/NumberInMetaTitleAssessment.js +0 -107
- package/src/scoring/assessments/seo/OutboundLinksAssessment.js +0 -111
- package/src/scoring/assessments/seo/PageTitleWidthAssessment.js +0 -104
- package/src/scoring/assessments/seo/SingleH1Assessment.js +0 -118
- package/src/scoring/assessments/seo/SingleTitleAssessment.js +0 -108
- package/src/scoring/assessments/seo/SubHeadingsKeywordAssessment.js +0 -107
- package/src/scoring/assessments/seo/TextImagesAssessment.js +0 -144
- package/src/scoring/assessments/seo/TextLengthAssessment.js +0 -100
- package/src/scoring/assessments/seo/UrlKeywordAssessment.js +0 -111
- package/src/scoring/assessments/seo/UrlLengthAssessment.js +0 -103
- package/src/scoring/assessors/assessor.js +0 -269
- package/src/scoring/assessors/avadaAssessor.js +0 -67
- package/src/scoring/assessors/contentAssessor.js +0 -159
- package/src/scoring/assessors/index.js +0 -4
- package/src/scoring/assessors/seoAssessor.js +0 -57
- package/src/scoring/helpers/assessments/checkForTooLongSentences.js +0 -13
- package/src/scoring/helpers/assessments/inRange.js +0 -49
- package/src/scoring/helpers/assessments/keyphraseLengthFactor.js +0 -10
- package/src/scoring/helpers/assessments/recommendedKeywordCount.js +0 -43
- package/src/scoring/helpers/index.js +0 -74
- package/src/scoring/interpreters/index.js +0 -5
- package/src/scoring/interpreters/scoreToRating.js +0 -31
- package/src/scoring/renderers/AssessorPresenter.js +0 -360
- package/src/scoring/scoreAggregators/ReadabilityScoreAggregator.js +0 -203
- package/src/scoring/scoreAggregators/SEOScoreAggregator.js +0 -54
- package/src/scoring/scoreAggregators/ScoreAggregator.js +0 -23
- package/src/scoring/scoreAggregators/index.js +0 -3
- package/src/values/AssessmentResult.js +0 -496
- package/src/values/Mark.js +0 -271
- package/src/values/Paper.js +0 -425
- package/src/values/index.js +0 -9
- package/src/vendor/turkishStemmer.js +0 -3435
- package/tsconfig.json +0 -15
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* The maximum allowed length of a center-aligned paragraph or heading in characters.
|
|
3
|
-
* @type {number}
|
|
4
|
-
*/
|
|
5
|
-
const MAX_CENTER_ALIGNED_LENGTH = 50;
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* A regular expression to match heading tags.
|
|
9
|
-
* @type {RegExp}
|
|
10
|
-
*/
|
|
11
|
-
const HEADING_TAGS_REGEX = /^h[1-6]$/;
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Filters out all nodes that are center-aligned and longer than 50 characters (as defined in `MAX_CENTER_ALIGNED_LENGTH`).
|
|
15
|
-
* @param {Node[]} nodes An array of nodes.
|
|
16
|
-
* @returns {Node[]} An array containing all center-aligned nodes that are longer than 50 characters.
|
|
17
|
-
*/
|
|
18
|
-
function getLongCenterAlignedElements( nodes ) {
|
|
19
|
-
return nodes.filter( node => {
|
|
20
|
-
const isCenterAligned = node.attributes.class instanceof Set && node.attributes.class.has( "has-text-align-center" );
|
|
21
|
-
return isCenterAligned && node.innerText().length > MAX_CENTER_ALIGNED_LENGTH;
|
|
22
|
-
} );
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* Finds all paragraphs and headings that are center-aligned and longer than 50 characters (after stripping HTML tags).
|
|
27
|
-
* @param {Paper} paper The paper to analyze.
|
|
28
|
-
* @returns {Node[]} An array of nodes containing too long center-aligned paragraphs/headings.
|
|
29
|
-
*/
|
|
30
|
-
export default function( paper ) {
|
|
31
|
-
const tree = paper.getTree();
|
|
32
|
-
|
|
33
|
-
const paragraphs = tree.findAll( node => node.name === "p" );
|
|
34
|
-
const headings = tree.findAll( node => HEADING_TAGS_REGEX.test( node.name ) );
|
|
35
|
-
|
|
36
|
-
return getLongCenterAlignedElements( paragraphs.concat( headings ) );
|
|
37
|
-
}
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import { imageRegex } from "../helpers/image/imageInText";
|
|
2
|
-
import sanitizeLineBreakTag from "../helpers/sanitize/sanitizeLineBreakTag";
|
|
3
|
-
import countWords from "../helpers/word/countWords.js";
|
|
4
|
-
import matchParagraphs from "../helpers/html/matchParagraphs.js";
|
|
5
|
-
import { filter } from "lodash";
|
|
6
|
-
import removeHtmlBlocks from "../helpers/html/htmlParser";
|
|
7
|
-
import { filterShortcodesFromHTML } from "../helpers";
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* Gets all paragraphs and their word counts or character counts from the text.
|
|
11
|
-
*
|
|
12
|
-
* @param {Paper} paper The paper object to get the text from.
|
|
13
|
-
* @param {Researcher} researcher The researcher to use for analysis.
|
|
14
|
-
*
|
|
15
|
-
* @returns {Array} The array containing an object with the paragraph word or character count and paragraph text.
|
|
16
|
-
*/
|
|
17
|
-
export default function( paper, researcher ) {
|
|
18
|
-
let text = paper.getText();
|
|
19
|
-
text = removeHtmlBlocks( text );
|
|
20
|
-
text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
|
|
21
|
-
|
|
22
|
-
// Remove images from text before retrieving the paragraphs.
|
|
23
|
-
// This step is done here so that applying highlight in captions is possible for ParagraphTooLongAssessment.
|
|
24
|
-
text = text.replace( imageRegex, "" );
|
|
25
|
-
|
|
26
|
-
// Replace line break tags containing attribute(s) with paragraph tag.
|
|
27
|
-
text = sanitizeLineBreakTag( text );
|
|
28
|
-
const paragraphs = matchParagraphs( text );
|
|
29
|
-
const paragraphsLength = [];
|
|
30
|
-
|
|
31
|
-
// An optional custom helper to count length to use instead of countWords.
|
|
32
|
-
const customCountLength = researcher.getHelper( "customCountLength" );
|
|
33
|
-
|
|
34
|
-
paragraphs.map( function( paragraph ) {
|
|
35
|
-
paragraphsLength.push( {
|
|
36
|
-
countLength: customCountLength ? customCountLength( paragraph ) : countWords( paragraph ),
|
|
37
|
-
text: paragraph,
|
|
38
|
-
} );
|
|
39
|
-
} );
|
|
40
|
-
|
|
41
|
-
return filter( paragraphsLength, function( paragraphLength ) {
|
|
42
|
-
return ( paragraphLength.countLength > 0 );
|
|
43
|
-
} );
|
|
44
|
-
}
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import { reject } from "lodash";
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Returns all paragraphs in a given Paper.
|
|
5
|
-
* Remove paragraphs that do not contain sentences or only consist of links.
|
|
6
|
-
*
|
|
7
|
-
* @param {Paper} paper The current paper.
|
|
8
|
-
* @returns {Paragraph[]} All paragraphs in the paper.
|
|
9
|
-
*/
|
|
10
|
-
export default function( paper ) {
|
|
11
|
-
let paragraphs = paper.getTree().findAll( node => node.name === "p" );
|
|
12
|
-
|
|
13
|
-
// Remove empty paragraphs without sentences and paragraphs only consisting of links.
|
|
14
|
-
paragraphs = reject( paragraphs, paragraph => paragraph.sentences.length === 0 );
|
|
15
|
-
paragraphs = reject( paragraphs, paragraph => paragraph.childNodes.every( node => node.name === "a" ) );
|
|
16
|
-
|
|
17
|
-
return paragraphs;
|
|
18
|
-
}
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
import getSentences from "../helpers/sentence/getSentences.js";
|
|
2
|
-
import { stripFullTags as stripHTMLTags } from "../helpers/sanitize/stripHTMLTags.js";
|
|
3
|
-
import Sentence from "../../languageProcessing/values/Sentence.js";
|
|
4
|
-
|
|
5
|
-
import { forEach } from "lodash";
|
|
6
|
-
import removeHtmlBlocks from "../helpers/html/htmlParser";
|
|
7
|
-
import { filterShortcodesFromHTML } from "../helpers";
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* Looks for morphological passive voice.
|
|
11
|
-
* Supported morphological languages: "ru", "sv", "id", "ar", "he", "tr", "fa".
|
|
12
|
-
* Farsi is implemented as morphological because the periphrastic passives are used as compound verbs (among other compound passives).
|
|
13
|
-
* @param {Paper} paper The paper object.
|
|
14
|
-
* @param {Researcher} researcher The researcher.
|
|
15
|
-
*
|
|
16
|
-
* @returns {Object} The found passive sentences.
|
|
17
|
-
*/
|
|
18
|
-
export const getMorphologicalPassives = function( paper, researcher ) {
|
|
19
|
-
const isPassiveSentence = researcher.getHelper( "isPassiveSentence" );
|
|
20
|
-
let text = paper.getText();
|
|
21
|
-
text = removeHtmlBlocks( text );
|
|
22
|
-
text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
|
|
23
|
-
const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
|
|
24
|
-
const sentences = getSentences( text, memoizedTokenizer )
|
|
25
|
-
.map( function( sentence ) {
|
|
26
|
-
return new Sentence( sentence );
|
|
27
|
-
} );
|
|
28
|
-
const totalNumberSentences = sentences.length;
|
|
29
|
-
const passiveSentences = [];
|
|
30
|
-
|
|
31
|
-
forEach( sentences, function( sentence ) {
|
|
32
|
-
const strippedSentence = stripHTMLTags( sentence.getSentenceText() ).toLocaleLowerCase();
|
|
33
|
-
|
|
34
|
-
sentence.setPassive( isPassiveSentence( strippedSentence ) );
|
|
35
|
-
|
|
36
|
-
if ( sentence.isPassive() === true ) {
|
|
37
|
-
passiveSentences.push( sentence.getSentenceText() );
|
|
38
|
-
}
|
|
39
|
-
} );
|
|
40
|
-
|
|
41
|
-
return {
|
|
42
|
-
total: totalNumberSentences,
|
|
43
|
-
passives: passiveSentences,
|
|
44
|
-
};
|
|
45
|
-
};
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* Looks for periphrastic passive voice.
|
|
49
|
-
* Supported periphrastic languages: "en", "de", "nl", "fr", "es", "it", "pt", "pl", "sk".
|
|
50
|
-
*
|
|
51
|
-
* @param {Paper} paper The paper object.
|
|
52
|
-
* @param {Researcher} researcher The researcher.
|
|
53
|
-
*
|
|
54
|
-
* @returns {Object} The found passive sentences.
|
|
55
|
-
*/
|
|
56
|
-
export const getPeriphrasticPassives = function( paper, researcher ) {
|
|
57
|
-
const getClauses = researcher.getHelper( "getClauses" );
|
|
58
|
-
let text = paper.getText();
|
|
59
|
-
text = removeHtmlBlocks( text );
|
|
60
|
-
text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
|
|
61
|
-
const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
|
|
62
|
-
const sentences = getSentences( text, memoizedTokenizer )
|
|
63
|
-
.map( function( sentence ) {
|
|
64
|
-
return new Sentence( sentence );
|
|
65
|
-
} );
|
|
66
|
-
const totalNumberSentences = sentences.length;
|
|
67
|
-
const passiveSentences = [];
|
|
68
|
-
|
|
69
|
-
forEach( sentences, function( sentence ) {
|
|
70
|
-
const strippedSentence = stripHTMLTags( sentence.getSentenceText() ).toLocaleLowerCase();
|
|
71
|
-
|
|
72
|
-
// The functionality based on sentencePart objects should be rewritten using array indices of stopwords and auxiliaries.
|
|
73
|
-
|
|
74
|
-
// Divide a sentence into clauses and return an array of clause objects that have been checked for passiveness.
|
|
75
|
-
const clauses = getClauses( strippedSentence );
|
|
76
|
-
sentence.setClauses( clauses );
|
|
77
|
-
|
|
78
|
-
// Check sentence passiveness based on its clause passiveness.
|
|
79
|
-
if ( sentence.isPassive() ) {
|
|
80
|
-
passiveSentences.push( sentence.getSentenceText() );
|
|
81
|
-
}
|
|
82
|
-
} );
|
|
83
|
-
|
|
84
|
-
return {
|
|
85
|
-
total: totalNumberSentences,
|
|
86
|
-
passives: passiveSentences,
|
|
87
|
-
};
|
|
88
|
-
};
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Looks for both morphological and periphrastic passive voice
|
|
92
|
-
* Supported languages with both morphological and periphrastic passives: "hu", "nb".
|
|
93
|
-
* Due to technical difficulties "nb" is only implemented as periphrastic at the moment. Languages that have not been implemented yet: "da".
|
|
94
|
-
*
|
|
95
|
-
* @param {Paper} paper The paper object.
|
|
96
|
-
* @param {Researcher} researcher The researcher.
|
|
97
|
-
*
|
|
98
|
-
* @returns {Object} The found passive sentences.
|
|
99
|
-
*/
|
|
100
|
-
const getMorphologicalAndPeriphrasticPassive = function( paper, researcher ) {
|
|
101
|
-
const morphologicalPassives = getMorphologicalPassives( paper, researcher );
|
|
102
|
-
const periphrasticPassives = getPeriphrasticPassives( paper, researcher ).passives;
|
|
103
|
-
|
|
104
|
-
return {
|
|
105
|
-
total: morphologicalPassives.total,
|
|
106
|
-
passives: periphrasticPassives.concat( morphologicalPassives.passives ),
|
|
107
|
-
};
|
|
108
|
-
};
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* Looks for passive voice.
|
|
112
|
-
*
|
|
113
|
-
* @param {Paper} paper The paper object.
|
|
114
|
-
* @param {Researcher} researcher The researcher.
|
|
115
|
-
*
|
|
116
|
-
* @returns {Object} The found passive sentences.
|
|
117
|
-
*/
|
|
118
|
-
export default function getPassiveVoice( paper, researcher ) {
|
|
119
|
-
const passiveType = researcher.getConfig( "passiveConstructionType" );
|
|
120
|
-
|
|
121
|
-
if ( passiveType === "periphrastic" ) {
|
|
122
|
-
return getPeriphrasticPassives( paper, researcher );
|
|
123
|
-
}
|
|
124
|
-
if ( passiveType === "morphological" ) {
|
|
125
|
-
return getMorphologicalPassives( paper, researcher );
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
return getMorphologicalAndPeriphrasticPassive( paper, researcher );
|
|
129
|
-
}
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import { take } from "lodash";
|
|
2
|
-
import {
|
|
3
|
-
collapseProminentWordsOnStem,
|
|
4
|
-
filterProminentWords,
|
|
5
|
-
getProminentWords,
|
|
6
|
-
retrieveAbbreviations,
|
|
7
|
-
sortProminentWords,
|
|
8
|
-
} from "../helpers/prominentWords/determineProminentWords";
|
|
9
|
-
import removeURLs from "../helpers/sanitize/removeURLs.js";
|
|
10
|
-
import removeEmailAddresses from "../helpers/sanitize/removeEmailAddresses";
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Retrieves the prominent words from the given paper.
|
|
14
|
-
*
|
|
15
|
-
* @param {Paper} paper The paper to determine the prominent words of.
|
|
16
|
-
* @param {Researcher} researcher The researcher to use for analysis.
|
|
17
|
-
*
|
|
18
|
-
* @returns {WordCombination[]} Prominent words for this paper, filtered and sorted.
|
|
19
|
-
*/
|
|
20
|
-
function getProminentWordsForInsights( paper, researcher ) {
|
|
21
|
-
const functionWords = researcher.getConfig( "functionWords" );
|
|
22
|
-
// An optional custom helper to return custom function to return the stem of a word.
|
|
23
|
-
const customStemmer = researcher.getHelper( "customGetStemmer" );
|
|
24
|
-
const stemmer = customStemmer ? customStemmer( researcher ) : researcher.getHelper( "getStemmer" )( researcher );
|
|
25
|
-
// An optional custom helper to get words from the text.
|
|
26
|
-
const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" );
|
|
27
|
-
|
|
28
|
-
let text = paper.getText();
|
|
29
|
-
// We don't want to include URLs or email addresses in prominent words.
|
|
30
|
-
text = removeURLs( text );
|
|
31
|
-
text = removeEmailAddresses( text );
|
|
32
|
-
|
|
33
|
-
// If the language has a custom helper to get words from the text, we don't retrieve the abbreviation.
|
|
34
|
-
const abbreviations = getWordsCustomHelper ? [] : retrieveAbbreviations( text );
|
|
35
|
-
|
|
36
|
-
const prominentWordsFromText = getProminentWords( text, abbreviations, stemmer, functionWords, getWordsCustomHelper );
|
|
37
|
-
|
|
38
|
-
const collapsedWords = collapseProminentWordsOnStem( prominentWordsFromText );
|
|
39
|
-
sortProminentWords( collapsedWords );
|
|
40
|
-
|
|
41
|
-
/*
|
|
42
|
-
* Collapse the list of prominent words on stems, sort it, filter out all words that occur less than
|
|
43
|
-
* 5 times in the text. Return the 20 top items from this list.
|
|
44
|
-
*/
|
|
45
|
-
return take( filterProminentWords( collapsedWords, 5 ), 20 );
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
export default getProminentWordsForInsights;
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
import { take } from "lodash";
|
|
2
|
-
import countWords from "../helpers/word/countWords";
|
|
3
|
-
import {
|
|
4
|
-
collapseProminentWordsOnStem,
|
|
5
|
-
filterProminentWords,
|
|
6
|
-
getProminentWords,
|
|
7
|
-
getProminentWordsFromPaperAttributes,
|
|
8
|
-
retrieveAbbreviations,
|
|
9
|
-
sortProminentWords,
|
|
10
|
-
} from "../helpers/prominentWords/determineProminentWords";
|
|
11
|
-
import { getSubheadingsTopLevel, removeSubheadingsTopLevel } from "../helpers/html/getSubheadings";
|
|
12
|
-
import baseStemmer from "../helpers/morphology/baseStemmer";
|
|
13
|
-
import removeURLs from "../helpers/sanitize/removeURLs.js";
|
|
14
|
-
import removeEmailAddresses from "../helpers/sanitize/removeEmailAddresses";
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* Removes URLs and email addresses from the text.
|
|
18
|
-
*
|
|
19
|
-
* @param {string} text The text to sanitize.
|
|
20
|
-
*
|
|
21
|
-
* @returns {string} The text without URLs and email addresses.
|
|
22
|
-
*/
|
|
23
|
-
const sanitizeText = function( text ) {
|
|
24
|
-
text = removeURLs( text );
|
|
25
|
-
return removeEmailAddresses( text );
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Retrieves the prominent words from the given paper.
|
|
30
|
-
*
|
|
31
|
-
* @param {Paper} paper The paper to determine the prominent words of.
|
|
32
|
-
* @param {Researcher} researcher The researcher to use for analysis.
|
|
33
|
-
*
|
|
34
|
-
* @returns {Object} result A compound result object.
|
|
35
|
-
* @returns {ProminentWord[]} result.prominentWords Prominent words for this paper, filtered and sorted.
|
|
36
|
-
* @returns {boolean} result.hasMetaDescription Whether the metadescription is available in the input paper.
|
|
37
|
-
* @returns {boolean} result.hasTitle Whether the title is available in the input paper.
|
|
38
|
-
*/
|
|
39
|
-
function getProminentWordsForInternalLinking( paper, researcher ) {
|
|
40
|
-
const functionWords = researcher.getConfig( "functionWords" );
|
|
41
|
-
// An optional custom helper to return custom function to return the stem of a word.
|
|
42
|
-
const customStemmer = researcher.getHelper( "customGetStemmer" );
|
|
43
|
-
const stemmer = customStemmer ? customStemmer( researcher ) : researcher.getHelper( "getStemmer" )( researcher );
|
|
44
|
-
// An optional custom helper to get words from the text.
|
|
45
|
-
const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" );
|
|
46
|
-
// An optional custom helper to count length to use instead of countWords.
|
|
47
|
-
const customCountLength = researcher.getHelper( "customCountLength" );
|
|
48
|
-
|
|
49
|
-
const text = sanitizeText( paper.getText() );
|
|
50
|
-
const metadescription = sanitizeText( paper.getDescription() );
|
|
51
|
-
const title = sanitizeText( paper.getTitle() );
|
|
52
|
-
|
|
53
|
-
const result = {};
|
|
54
|
-
result.hasMetaDescription = metadescription !== "";
|
|
55
|
-
result.hasTitle = title !== "";
|
|
56
|
-
result.prominentWords = [];
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* We only want to return suggestions (and spend time calculating prominent words) if the text is at least 100 words.
|
|
60
|
-
* And when a customCountLength is available, we only want to return the suggestions if the text has at least 200 characters.
|
|
61
|
-
*/
|
|
62
|
-
if ( customCountLength ) {
|
|
63
|
-
if ( customCountLength( text ) < 200 ) {
|
|
64
|
-
return result;
|
|
65
|
-
}
|
|
66
|
-
} else if ( countWords( text ) < 100 ) {
|
|
67
|
-
return result;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const subheadings = getSubheadingsTopLevel( text ).map( subheading => subheading[ 2 ] );
|
|
71
|
-
const attributes = [
|
|
72
|
-
paper.getKeyword(),
|
|
73
|
-
paper.getSynonyms(),
|
|
74
|
-
title,
|
|
75
|
-
metadescription,
|
|
76
|
-
subheadings.join( " " ),
|
|
77
|
-
];
|
|
78
|
-
|
|
79
|
-
// If the language has a custom helper to get words from the text, we don't retrieve the abbreviation.
|
|
80
|
-
const abbreviations = getWordsCustomHelper ? [] : retrieveAbbreviations( text.concat( attributes.join( " " ) ) );
|
|
81
|
-
|
|
82
|
-
const removedSubheadingText = removeSubheadingsTopLevel( text );
|
|
83
|
-
const prominentWordsFromText = getProminentWords( removedSubheadingText, abbreviations, stemmer, functionWords, getWordsCustomHelper );
|
|
84
|
-
|
|
85
|
-
const prominentWordsFromPaperAttributes = getProminentWordsFromPaperAttributes(
|
|
86
|
-
attributes, abbreviations, stemmer, functionWords, getWordsCustomHelper );
|
|
87
|
-
|
|
88
|
-
/*
|
|
89
|
-
* If a word is used in any of the attributes, its weight is automatically high.
|
|
90
|
-
* To make sure the word survives weight filters and gets saved in the database, make the number of occurrences times-3.
|
|
91
|
-
*/
|
|
92
|
-
prominentWordsFromPaperAttributes.forEach( relevantWord => relevantWord.setOccurrences( relevantWord.getOccurrences() * 3 ) );
|
|
93
|
-
|
|
94
|
-
const collapsedWords = collapseProminentWordsOnStem( prominentWordsFromPaperAttributes.concat( prominentWordsFromText ) );
|
|
95
|
-
sortProminentWords( collapsedWords );
|
|
96
|
-
|
|
97
|
-
/*
|
|
98
|
-
* If morphology data are available for a language, the minimum number of occurrences to consider a word to be prominent is 4.
|
|
99
|
-
* This minimum number was chosen in order to avoid premature suggestions of words from the paper attributes.
|
|
100
|
-
* These get a times-3 boost and would therefore be prominent with just 1 occurrence.
|
|
101
|
-
*
|
|
102
|
-
* If morphology data are not available, and therefore word forms are not recognized, the minimum threshold is lowered to 2.
|
|
103
|
-
*/
|
|
104
|
-
let minimumNumberOfOccurrences = 4;
|
|
105
|
-
|
|
106
|
-
if ( stemmer === baseStemmer ) {
|
|
107
|
-
minimumNumberOfOccurrences = 2;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
/*
|
|
111
|
-
* Return the 100 top items from the collapsed and sorted list. The number is picked deliberately to prevent larger
|
|
112
|
-
* articles from getting too long of lists.
|
|
113
|
-
*/
|
|
114
|
-
result.prominentWords = take( filterProminentWords( collapsedWords, minimumNumberOfOccurrences ), 100 );
|
|
115
|
-
|
|
116
|
-
return result;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
export default getProminentWordsForInternalLinking;
|
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
import getWords from "../helpers/word/getWords.js";
|
|
2
|
-
import getSentences from "../helpers/sentence/getSentences";
|
|
3
|
-
import stripSpaces from "../helpers/sanitize/stripSpaces.js";
|
|
4
|
-
import { stripFullTags as stripTags } from "../helpers/sanitize/stripHTMLTags.js";
|
|
5
|
-
|
|
6
|
-
import { filter, forEach, isEmpty } from "lodash";
|
|
7
|
-
import removeHtmlBlocks from "../helpers/html/htmlParser";
|
|
8
|
-
import { filterShortcodesFromHTML } from "../helpers";
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Compares the first word of each sentence with the first word of the following sentence.
|
|
12
|
-
*
|
|
13
|
-
* @param {string} currentSentenceBeginning The first word of the current sentence.
|
|
14
|
-
* @param {string} nextSentenceBeginning The first word of the next sentence.
|
|
15
|
-
* @returns {boolean} Returns true if sentence beginnings match.
|
|
16
|
-
*/
|
|
17
|
-
const startsWithSameWord = function( currentSentenceBeginning, nextSentenceBeginning ) {
|
|
18
|
-
return ! isEmpty( currentSentenceBeginning ) && currentSentenceBeginning === nextSentenceBeginning;
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* Counts the number of similar sentence beginnings.
|
|
23
|
-
*
|
|
24
|
-
* @param {Array} sentenceBeginnings The array containing the first word of each sentence.
|
|
25
|
-
* @param {Array} sentences The array containing all sentences.
|
|
26
|
-
* @returns {Array} The array containing the objects containing the first words and the corresponding counts.
|
|
27
|
-
*/
|
|
28
|
-
const compareFirstWords = function( sentenceBeginnings, sentences ) {
|
|
29
|
-
const consecutiveFirstWords = [];
|
|
30
|
-
let foundSentences = [];
|
|
31
|
-
let sameBeginnings = 1;
|
|
32
|
-
|
|
33
|
-
forEach( sentenceBeginnings, function( beginning, i ) {
|
|
34
|
-
const currentSentenceBeginning = beginning;
|
|
35
|
-
const nextSentenceBeginning = sentenceBeginnings[ i + 1 ];
|
|
36
|
-
foundSentences.push( sentences[ i ] );
|
|
37
|
-
|
|
38
|
-
if ( startsWithSameWord( currentSentenceBeginning, nextSentenceBeginning ) ) {
|
|
39
|
-
sameBeginnings++;
|
|
40
|
-
} else {
|
|
41
|
-
consecutiveFirstWords.push( { word: currentSentenceBeginning, count: sameBeginnings, sentences: foundSentences } );
|
|
42
|
-
sameBeginnings = 1;
|
|
43
|
-
foundSentences = [];
|
|
44
|
-
}
|
|
45
|
-
} );
|
|
46
|
-
|
|
47
|
-
return consecutiveFirstWords;
|
|
48
|
-
};
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Retrieves the first word from the sentence. If the first or second word is on an exception list of words that should not be considered as sentence
|
|
52
|
-
* beginnings, the following word is also retrieved.
|
|
53
|
-
*
|
|
54
|
-
* @param {string} sentence The sentence to retrieve the first word from.
|
|
55
|
-
* @param {Array} firstWordExceptions First word exceptions to match against.
|
|
56
|
-
* @param {Array} secondWordExceptions Second word exceptions to match against.
|
|
57
|
-
* @param {function} getWordsCustomHelper The language-specific helper function to retrieve words from text.
|
|
58
|
-
*
|
|
59
|
-
* @returns {string} The first word of the sentence.
|
|
60
|
-
*/
|
|
61
|
-
function getSentenceBeginning( sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper ) {
|
|
62
|
-
const stripped = stripTags( stripSpaces( sentence ) );
|
|
63
|
-
const words = getWordsCustomHelper ? getWordsCustomHelper( stripped ) : getWords( stripped );
|
|
64
|
-
|
|
65
|
-
if ( words.length === 0 ) {
|
|
66
|
-
return "";
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
let firstWord = words[ 0 ].toLocaleLowerCase();
|
|
70
|
-
|
|
71
|
-
if ( firstWordExceptions.indexOf( firstWord ) > -1 && words.length > 1 ) {
|
|
72
|
-
firstWord = firstWord + " " + words[ 1 ];
|
|
73
|
-
if ( secondWordExceptions ) {
|
|
74
|
-
if ( secondWordExceptions.includes( words[ 1 ] ) ) {
|
|
75
|
-
firstWord = firstWord + " " + words[ 2 ];
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
return firstWord;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* Gets the first word of each sentence from the text, and returns an object containing the first word of each sentence and the corresponding counts.
|
|
85
|
-
*
|
|
86
|
-
* @param {Paper} paper The Paper object to get the text from.
|
|
87
|
-
* @param {Researcher} researcher The researcher this research is a part of.
|
|
88
|
-
*
|
|
89
|
-
* @returns {Object} The object containing the first word of each sentence and the corresponding counts.
|
|
90
|
-
*/
|
|
91
|
-
export default function( paper, researcher ) {
|
|
92
|
-
const firstWordExceptions = researcher.getConfig( "firstWordExceptions" );
|
|
93
|
-
const secondWordExceptions = researcher.getConfig( "secondWordExceptions" );
|
|
94
|
-
const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" );
|
|
95
|
-
const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
|
|
96
|
-
|
|
97
|
-
let text = paper.getText();
|
|
98
|
-
text = removeHtmlBlocks( text );
|
|
99
|
-
text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
|
|
100
|
-
|
|
101
|
-
// Remove any HTML whitespace padding and replace it with a single whitespace.
|
|
102
|
-
text = text.replace( /[\s\n]+/g, " " );
|
|
103
|
-
|
|
104
|
-
// Exclude text inside tables.
|
|
105
|
-
text = text.replace( /<figure class='wp-block-table'>.*<\/figure>/sg, "" );
|
|
106
|
-
|
|
107
|
-
// Exclude text inside list items.
|
|
108
|
-
text = text.replace( /<li(?:[^>]+)?>(.*?)<\/li>/ig, "" );
|
|
109
|
-
|
|
110
|
-
let sentences = getSentences( text, memoizedTokenizer );
|
|
111
|
-
|
|
112
|
-
let sentenceBeginnings = sentences.map( function( sentence ) {
|
|
113
|
-
return getSentenceBeginning( sentence, firstWordExceptions, secondWordExceptions, getWordsCustomHelper );
|
|
114
|
-
} );
|
|
115
|
-
|
|
116
|
-
sentences = sentences.filter( function( sentence ) {
|
|
117
|
-
const stripped = stripSpaces( sentence );
|
|
118
|
-
const words = getWordsCustomHelper ? getWordsCustomHelper( stripped ) : getWords( stripped );
|
|
119
|
-
return words.length > 0;
|
|
120
|
-
} );
|
|
121
|
-
sentenceBeginnings = filter( sentenceBeginnings );
|
|
122
|
-
|
|
123
|
-
return compareFirstWords( sentenceBeginnings, sentences );
|
|
124
|
-
}
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
import getSubheadingTexts from "../helpers/html/getSubheadingTexts";
|
|
2
|
-
import countWords from "../helpers/word/countWords";
|
|
3
|
-
import { forEach } from "lodash";
|
|
4
|
-
import removeHtmlBlocks from "../helpers/html/htmlParser";
|
|
5
|
-
import { filterShortcodesFromHTML } from "../helpers";
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Gets the subheadings from the text and returns the length of these subheading in an array.
|
|
9
|
-
*
|
|
10
|
-
* @param {Paper} paper The Paper object to get the text from.
|
|
11
|
-
* @param {Researcher} researcher The researcher to use for analysis.
|
|
12
|
-
*
|
|
13
|
-
* @returns {Object} The object containing the array of found subheadings and the length of the text before the first subheading.
|
|
14
|
-
*/
|
|
15
|
-
export default function( paper, researcher ) {
|
|
16
|
-
let text = paper.getText();
|
|
17
|
-
text = removeHtmlBlocks( text );
|
|
18
|
-
text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
|
|
19
|
-
const matches = getSubheadingTexts( text );
|
|
20
|
-
|
|
21
|
-
// An optional custom helper to count length to use instead of countWords.
|
|
22
|
-
const customCountLength = researcher.getHelper( "customCountLength" );
|
|
23
|
-
|
|
24
|
-
const foundSubheadings = [];
|
|
25
|
-
|
|
26
|
-
forEach( matches, function( match ) {
|
|
27
|
-
foundSubheadings.push( {
|
|
28
|
-
subheading: match.subheading,
|
|
29
|
-
text: match.text,
|
|
30
|
-
countLength: customCountLength ? customCountLength( match.text ) : countWords( match.text ),
|
|
31
|
-
index: match.index,
|
|
32
|
-
} );
|
|
33
|
-
} );
|
|
34
|
-
|
|
35
|
-
let textBeforeFirstSubheadingLength = 0;
|
|
36
|
-
let textBeforeFirstSubheading = "";
|
|
37
|
-
if ( foundSubheadings.length > 0 ) {
|
|
38
|
-
// Find first subheading.
|
|
39
|
-
const firstSubheading = foundSubheadings[ 0 ];
|
|
40
|
-
// Retrieve text preceding first subheading.
|
|
41
|
-
textBeforeFirstSubheading = text.slice( 0, firstSubheading.index );
|
|
42
|
-
textBeforeFirstSubheadingLength = customCountLength
|
|
43
|
-
? customCountLength( textBeforeFirstSubheading )
|
|
44
|
-
: countWords( textBeforeFirstSubheading );
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
// Check if there is a text before the first subheading.
|
|
48
|
-
if ( textBeforeFirstSubheadingLength > 0 && textBeforeFirstSubheading !== "" ) {
|
|
49
|
-
// Also add the text before the first subheading to the array.
|
|
50
|
-
foundSubheadings.unshift( {
|
|
51
|
-
// Assign an empty string for the subheading for text that comes before the first subheading.
|
|
52
|
-
subheading: "",
|
|
53
|
-
text: textBeforeFirstSubheading,
|
|
54
|
-
countLength: textBeforeFirstSubheadingLength,
|
|
55
|
-
} );
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
return foundSubheadings;
|
|
59
|
-
}
|