axyseo 2.0.0-alpha.0.0.4 → 2.0.0-alpha.0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -2
- package/.browserslistrc +0 -1
- package/.gitattributes +0 -1
- package/babel.config.js +0 -3
- package/eslint.config.mjs +0 -119
- package/src/bundledPlugins/index.js +0 -5
- package/src/bundledPlugins/previouslyUsedKeywords.js +0 -192
- package/src/config/diacritics.js +0 -106
- package/src/config/getTransliterations.js +0 -1447
- package/src/config/transliterationsWPstyle.js +0 -774
- package/src/config/wordBoundaries.js +0 -23
- package/src/config/wordBoundariesWithoutPunctuation.js +0 -9
- package/src/const/analysis.js +0 -41
- package/src/errors/invalidType.js +0 -14
- package/src/errors/missingArgument.js +0 -14
- package/src/helpers/createMeasurementElement.js +0 -40
- package/src/helpers/domManipulation.js +0 -65
- package/src/helpers/errors.js +0 -26
- package/src/helpers/factory.js +0 -219
- package/src/helpers/formatNumber.js +0 -12
- package/src/helpers/formatString.js +0 -33
- package/src/helpers/getLanguagesWithWordComplexity.js +0 -8
- package/src/helpers/getLanguagesWithWordFormSupport.js +0 -11
- package/src/helpers/getWordComplexityConfig.js +0 -20
- package/src/helpers/getWordComplexityHelper.js +0 -20
- package/src/helpers/htmlEntities.js +0 -41
- package/src/helpers/includesAny.js +0 -19
- package/src/helpers/index.js +0 -127
- package/src/helpers/shortlinker/Shortlinker.js +0 -75
- package/src/helpers/shortlinker/index.js +0 -1
- package/src/helpers/shortlinker/singleton.js +0 -68
- package/src/helpers/types.js +0 -34
- package/src/index.js +0 -60
- package/src/languageProcessing/AbstractResearcher.js +0 -366
- package/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +0 -125
- package/src/languageProcessing/helpers/html/getFieldsToMark.js +0 -29
- package/src/languageProcessing/helpers/html/getSubheadingTexts.js +0 -47
- package/src/languageProcessing/helpers/html/getSubheadings.js +0 -95
- package/src/languageProcessing/helpers/html/html.js +0 -176
- package/src/languageProcessing/helpers/html/htmlParser.js +0 -145
- package/src/languageProcessing/helpers/html/matchParagraphs.js +0 -62
- package/src/languageProcessing/helpers/html/normalizeHTML.js +0 -16
- package/src/languageProcessing/helpers/image/getAltAttribute.js +0 -20
- package/src/languageProcessing/helpers/image/getImagesInTree.js +0 -16
- package/src/languageProcessing/helpers/image/imageInText.js +0 -19
- package/src/languageProcessing/helpers/index.js +0 -12
- package/src/languageProcessing/helpers/language/getLanguage.js +0 -9
- package/src/languageProcessing/helpers/link/checkNofollow.js +0 -38
- package/src/languageProcessing/helpers/link/getAnchorsFromText.js +0 -32
- package/src/languageProcessing/helpers/link/getLinkType.js +0 -32
- package/src/languageProcessing/helpers/match/findKeywordFormsInString.js +0 -101
- package/src/languageProcessing/helpers/match/isDoubleQuoted.js +0 -13
- package/src/languageProcessing/helpers/match/matchTextWithArray.js +0 -36
- package/src/languageProcessing/helpers/match/matchTextWithTransliteration.js +0 -58
- package/src/languageProcessing/helpers/match/matchTextWithWord.js +0 -45
- package/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +0 -164
- package/src/languageProcessing/helpers/match/processExactMatchRequest.js +0 -20
- package/src/languageProcessing/helpers/morphology/baseStemmer.js +0 -11
- package/src/languageProcessing/helpers/morphology/buildFormRule.js +0 -19
- package/src/languageProcessing/helpers/morphology/buildTopicStems.js +0 -169
- package/src/languageProcessing/helpers/morphology/createRulesFromArrays.js +0 -45
- package/src/languageProcessing/helpers/morphology/exceptionListHelpers.js +0 -65
- package/src/languageProcessing/helpers/morphology/findMatchingEndingInArray.js +0 -24
- package/src/languageProcessing/helpers/morphology/flattenSortLength.js +0 -14
- package/src/languageProcessing/helpers/morphology/getAllWordsFromPaper.js +0 -39
- package/src/languageProcessing/helpers/morphology/regexHelpers.js +0 -44
- package/src/languageProcessing/helpers/morphology/stemHelpers.js +0 -38
- package/src/languageProcessing/helpers/morphology/stemPrefixedFunctionWords.js +0 -31
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/directPrecedenceException.js +0 -36
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/getClausesSplitOnStopWords.js +0 -113
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/freeAuxiliaryParticipleOrder/nonDirectParticiplePrecedenceException.js +0 -45
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/getClauses.js +0 -231
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/getIndicesWithRegex.js +0 -20
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/matchRegularParticiples.js +0 -23
- package/src/languageProcessing/helpers/passiveVoice/periphrastic/precedenceException.js +0 -40
- package/src/languageProcessing/helpers/prominentWords/determineProminentWords.js +0 -238
- package/src/languageProcessing/helpers/regex/createRegexFromArray.js +0 -35
- package/src/languageProcessing/helpers/regex/createRegexFromDoubleArray.js +0 -34
- package/src/languageProcessing/helpers/regex/createWordRegex.js +0 -30
- package/src/languageProcessing/helpers/regex/matchStringWithRegex.js +0 -19
- package/src/languageProcessing/helpers/regex/searchAndReplaceWithOneRegex.js +0 -14
- package/src/languageProcessing/helpers/sanitize/doubleQuotes.js +0 -12
- package/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +0 -131
- package/src/languageProcessing/helpers/sanitize/mergeListItems.js +0 -24
- package/src/languageProcessing/helpers/sanitize/parseSynonyms.js +0 -20
- package/src/languageProcessing/helpers/sanitize/quotes.js +0 -46
- package/src/languageProcessing/helpers/sanitize/removeEmailAddresses.js +0 -12
- package/src/languageProcessing/helpers/sanitize/removePunctuation.js +0 -64
- package/src/languageProcessing/helpers/sanitize/removePunctuationExceptQuotes.js +0 -18
- package/src/languageProcessing/helpers/sanitize/removeSentenceTerminators.js +0 -13
- package/src/languageProcessing/helpers/sanitize/removeURLs.js +0 -13
- package/src/languageProcessing/helpers/sanitize/sanitizeLineBreakTag.js +0 -11
- package/src/languageProcessing/helpers/sanitize/sanitizeString.js +0 -18
- package/src/languageProcessing/helpers/sanitize/stripHTMLTags.js +0 -57
- package/src/languageProcessing/helpers/sanitize/stripNonTextTags.js +0 -15
- package/src/languageProcessing/helpers/sanitize/stripNumbers.js +0 -21
- package/src/languageProcessing/helpers/sanitize/stripSpaces.js +0 -23
- package/src/languageProcessing/helpers/sanitize/stripWordBoundaries.js +0 -65
- package/src/languageProcessing/helpers/sanitize/unifyWhitespace.js +0 -61
- package/src/languageProcessing/helpers/sentence/SentenceTokenizer.js +0 -640
- package/src/languageProcessing/helpers/sentence/countSentences.js +0 -20
- package/src/languageProcessing/helpers/sentence/getSentences.js +0 -65
- package/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +0 -55
- package/src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer.js +0 -28
- package/src/languageProcessing/helpers/sentence/sentencesLength.js +0 -31
- package/src/languageProcessing/helpers/syllables/DeviationFragment.js +0 -112
- package/src/languageProcessing/helpers/syllables/countSyllables.js +0 -182
- package/src/languageProcessing/helpers/syllables/syllableCountIterator.js +0 -56
- package/src/languageProcessing/helpers/syllables/syllableCountStep.js +0 -68
- package/src/languageProcessing/helpers/transform/transformWordsWithHyphens.js +0 -17
- package/src/languageProcessing/helpers/transliterate/replaceDiacritics.js +0 -22
- package/src/languageProcessing/helpers/transliterate/specialCharacterMappings.js +0 -214
- package/src/languageProcessing/helpers/transliterate/transliterate.js +0 -20
- package/src/languageProcessing/helpers/transliterate/transliterateWPstyle.js +0 -21
- package/src/languageProcessing/helpers/url/parseSlug.js +0 -10
- package/src/languageProcessing/helpers/url/url.js +0 -172
- package/src/languageProcessing/helpers/word/addWordboundary.js +0 -37
- package/src/languageProcessing/helpers/word/areWordsInSentence.js +0 -16
- package/src/languageProcessing/helpers/word/countMetaDescriptionLength.js +0 -18
- package/src/languageProcessing/helpers/word/countWords.js +0 -14
- package/src/languageProcessing/helpers/word/createPunctuationTokens.js +0 -42
- package/src/languageProcessing/helpers/word/filterWordsFromArray.js +0 -15
- package/src/languageProcessing/helpers/word/followsIndex.js +0 -25
- package/src/languageProcessing/helpers/word/getAllWordsFromTree.js +0 -23
- package/src/languageProcessing/helpers/word/getWords.js +0 -43
- package/src/languageProcessing/helpers/word/includesIndex.js +0 -30
- package/src/languageProcessing/helpers/word/indices.js +0 -146
- package/src/languageProcessing/helpers/word/markWordsInSentences.js +0 -173
- package/src/languageProcessing/helpers/word/matchWordInSentence.js +0 -61
- package/src/languageProcessing/helpers/word/splitIntoTokens.js +0 -46
- package/src/languageProcessing/index.js +0 -91
- package/src/languageProcessing/languages/_default/Researcher.js +0 -34
- package/src/languageProcessing/languages/_default/helpers/getStemmer.js +0 -11
- package/src/languageProcessing/languages/ar/Researcher.js +0 -46
- package/src/languageProcessing/languages/ar/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/ar/config/functionWords.js +0 -329
- package/src/languageProcessing/languages/ar/config/internal/passiveVerbsWithLongVowel.js +0 -570
- package/src/languageProcessing/languages/ar/config/prefixedFunctionWords.js +0 -5
- package/src/languageProcessing/languages/ar/config/transitionWords.js +0 -19
- package/src/languageProcessing/languages/ar/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/ar/helpers/createBasicWordForms.js +0 -32
- package/src/languageProcessing/languages/ar/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/ar/helpers/internal/stem.js +0 -632
- package/src/languageProcessing/languages/ar/helpers/isPassiveSentence.js +0 -33
- package/src/languageProcessing/languages/ca/Researcher.js +0 -43
- package/src/languageProcessing/languages/ca/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/ca/config/transitionWords.js +0 -31
- package/src/languageProcessing/languages/ca/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/ca/helpers/getStemmer.js +0 -11
- package/src/languageProcessing/languages/cs/Researcher.js +0 -44
- package/src/languageProcessing/languages/cs/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/cs/config/functionWords.js +0 -121
- package/src/languageProcessing/languages/cs/config/internal/passiveVoiceAuxiliaries.js +0 -38
- package/src/languageProcessing/languages/cs/config/internal/passiveVoiceEndings.js +0 -54
- package/src/languageProcessing/languages/cs/config/stopWords.js +0 -42
- package/src/languageProcessing/languages/cs/config/transitionWords.js +0 -26
- package/src/languageProcessing/languages/cs/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/cs/helpers/getClauses.js +0 -26
- package/src/languageProcessing/languages/cs/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/cs/helpers/internal/getParticiples.js +0 -16
- package/src/languageProcessing/languages/cs/helpers/internal/stem.js +0 -499
- package/src/languageProcessing/languages/cs/values/Clause.js +0 -34
- package/src/languageProcessing/languages/de/Researcher.js +0 -52
- package/src/languageProcessing/languages/de/config/firstWordExceptions.js +0 -17
- package/src/languageProcessing/languages/de/config/functionWords.js +0 -303
- package/src/languageProcessing/languages/de/config/internal/exceptionsParticiplesActive.js +0 -2231
- package/src/languageProcessing/languages/de/config/internal/passiveVoiceAuxiliaries.js +0 -96
- package/src/languageProcessing/languages/de/config/internal/passiveVoiceIrregulars.js +0 -368
- package/src/languageProcessing/languages/de/config/internal/passiveVoiceRegex.js +0 -72
- package/src/languageProcessing/languages/de/config/keyphraseLength.js +0 -11
- package/src/languageProcessing/languages/de/config/stopWords.js +0 -67
- package/src/languageProcessing/languages/de/config/syllables.json +0 -460
- package/src/languageProcessing/languages/de/config/transitionWords.js +0 -31
- package/src/languageProcessing/languages/de/config/twoPartTransitionWords.js +0 -12
- package/src/languageProcessing/languages/de/config/wordComplexity.js +0 -4
- package/src/languageProcessing/languages/de/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/de/helpers/checkIfWordIsComplex.js +0 -40
- package/src/languageProcessing/languages/de/helpers/checkIfWordIsFunction.js +0 -15
- package/src/languageProcessing/languages/de/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/de/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/de/helpers/internal/SentenceTokenizer.js +0 -31
- package/src/languageProcessing/languages/de/helpers/internal/detectAndStemRegularParticiple.js +0 -128
- package/src/languageProcessing/languages/de/helpers/internal/determineStem.js +0 -128
- package/src/languageProcessing/languages/de/helpers/internal/getParticiples.js +0 -40
- package/src/languageProcessing/languages/de/helpers/internal/stem.js +0 -215
- package/src/languageProcessing/languages/de/helpers/memoizedSentenceTokenizer.js +0 -28
- package/src/languageProcessing/languages/de/values/Clause.js +0 -85
- package/src/languageProcessing/languages/el/Researcher.js +0 -46
- package/src/languageProcessing/languages/el/config/firstWordExceptions.js +0 -47
- package/src/languageProcessing/languages/el/config/functionWords.js +0 -116
- package/src/languageProcessing/languages/el/config/internal/auxiliaries.js +0 -19
- package/src/languageProcessing/languages/el/config/internal/morphologicalPassiveSuffixes.js +0 -87
- package/src/languageProcessing/languages/el/config/internal/nonPassiveVerbStems.js +0 -138
- package/src/languageProcessing/languages/el/config/stopWords.js +0 -854
- package/src/languageProcessing/languages/el/config/transitionWords.js +0 -26
- package/src/languageProcessing/languages/el/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/el/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/el/helpers/getStemmer.js +0 -21
- package/src/languageProcessing/languages/el/helpers/internal/getParticiples.js +0 -20
- package/src/languageProcessing/languages/el/helpers/internal/stem.js +0 -368
- package/src/languageProcessing/languages/el/helpers/isPassiveSentence.js +0 -38
- package/src/languageProcessing/languages/el/values/Clause.js +0 -37
- package/src/languageProcessing/languages/en/Researcher.js +0 -46
- package/src/languageProcessing/languages/en/config/abbreviations.js +0 -55
- package/src/languageProcessing/languages/en/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/en/config/functionWords.js +0 -186
- package/src/languageProcessing/languages/en/config/internal/passiveVoiceAuxiliaries.js +0 -44
- package/src/languageProcessing/languages/en/config/internal/passiveVoiceIrregulars.js +0 -354
- package/src/languageProcessing/languages/en/config/internal/passiveVoiceNonVerbEndingEd.js +0 -3047
- package/src/languageProcessing/languages/en/config/regularParticiplesRegex.js +0 -5
- package/src/languageProcessing/languages/en/config/stopWords.js +0 -52
- package/src/languageProcessing/languages/en/config/syllables.json +0 -86
- package/src/languageProcessing/languages/en/config/transitionWords.js +0 -48
- package/src/languageProcessing/languages/en/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/en/config/wordComplexity.js +0 -5
- package/src/languageProcessing/languages/en/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/en/helpers/checkIfWordIsComplex.js +0 -43
- package/src/languageProcessing/languages/en/helpers/getClauses.js +0 -49
- package/src/languageProcessing/languages/en/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/en/helpers/internal/determineStem.js +0 -178
- package/src/languageProcessing/languages/en/helpers/internal/getAdjectiveStem.js +0 -162
- package/src/languageProcessing/languages/en/helpers/internal/getParticiples.js +0 -25
- package/src/languageProcessing/languages/en/helpers/internal/getVerbStem.js +0 -237
- package/src/languageProcessing/languages/en/values/Clause.js +0 -68
- package/src/languageProcessing/languages/es/Researcher.js +0 -48
- package/src/languageProcessing/languages/es/config/firstWordExceptions.js +0 -16
- package/src/languageProcessing/languages/es/config/functionWords.js +0 -321
- package/src/languageProcessing/languages/es/config/internal/passiveVoiceAuxiliaries.js +0 -60
- package/src/languageProcessing/languages/es/config/internal/passiveVoiceParticiples.js +0 -7327
- package/src/languageProcessing/languages/es/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/es/config/stopWords.js +0 -33
- package/src/languageProcessing/languages/es/config/syllables.json +0 -176
- package/src/languageProcessing/languages/es/config/transitionWords.js +0 -40
- package/src/languageProcessing/languages/es/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/es/config/wordComplexity.js +0 -4
- package/src/languageProcessing/languages/es/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/es/helpers/checkIfWordIsComplex.js +0 -56
- package/src/languageProcessing/languages/es/helpers/getClauses.js +0 -29
- package/src/languageProcessing/languages/es/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/es/helpers/internal/checkVerbStemModifications.js +0 -41
- package/src/languageProcessing/languages/es/helpers/internal/getParticiples.js +0 -35
- package/src/languageProcessing/languages/es/helpers/internal/stem.js +0 -793
- package/src/languageProcessing/languages/es/values/Clause.js +0 -47
- package/src/languageProcessing/languages/fa/Researcher.js +0 -47
- package/src/languageProcessing/languages/fa/config/firstWordExceptions.js +0 -12
- package/src/languageProcessing/languages/fa/config/functionWords.js +0 -122
- package/src/languageProcessing/languages/fa/config/internal/participles.js +0 -1429
- package/src/languageProcessing/languages/fa/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/fa/config/transitionWords.js +0 -20
- package/src/languageProcessing/languages/fa/config/twoPartTransitionWords.js +0 -9
- package/src/languageProcessing/languages/fa/helpers/createBasicWordForms.js +0 -97
- package/src/languageProcessing/languages/fa/helpers/getStemmer.js +0 -13
- package/src/languageProcessing/languages/fa/helpers/isPassiveSentence.js +0 -14
- package/src/languageProcessing/languages/fr/Researcher.js +0 -46
- package/src/languageProcessing/languages/fr/config/firstWordExceptions.js +0 -16
- package/src/languageProcessing/languages/fr/config/functionWords.js +0 -281
- package/src/languageProcessing/languages/fr/config/internal/exceptionsParticiplesActive.js +0 -1510
- package/src/languageProcessing/languages/fr/config/internal/passiveVoiceAuxiliaries.js +0 -108
- package/src/languageProcessing/languages/fr/config/internal/passiveVoiceIrregulars.js +0 -565
- package/src/languageProcessing/languages/fr/config/stopWords.js +0 -119
- package/src/languageProcessing/languages/fr/config/syllables.json +0 -1426
- package/src/languageProcessing/languages/fr/config/transitionWords.js +0 -59
- package/src/languageProcessing/languages/fr/config/twoPartTransitionWords.js +0 -15
- package/src/languageProcessing/languages/fr/config/wordComplexity.js +0 -4
- package/src/languageProcessing/languages/fr/helpers/calculateFleschReadingScore.js +0 -18
- package/src/languageProcessing/languages/fr/helpers/checkIfWordIsComplex.js +0 -67
- package/src/languageProcessing/languages/fr/helpers/getClauses.js +0 -34
- package/src/languageProcessing/languages/fr/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/fr/helpers/internal/getParticiples.js +0 -72
- package/src/languageProcessing/languages/fr/helpers/internal/stem.js +0 -633
- package/src/languageProcessing/languages/fr/values/Clause.js +0 -96
- package/src/languageProcessing/languages/he/Researcher.js +0 -50
- package/src/languageProcessing/languages/he/config/firstWordExceptions.js +0 -13
- package/src/languageProcessing/languages/he/config/functionWords.js +0 -564
- package/src/languageProcessing/languages/he/config/internal/regularRootsHufal.js +0 -186
- package/src/languageProcessing/languages/he/config/internal/regularRootsNifal.js +0 -195
- package/src/languageProcessing/languages/he/config/internal/regularRootsPual.js +0 -168
- package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsHufal.js +0 -188
- package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsNifal.js +0 -197
- package/src/languageProcessing/languages/he/config/passiveVoice/regularRootsPual.js +0 -170
- package/src/languageProcessing/languages/he/config/prefixedFunctionWords.js +0 -2
- package/src/languageProcessing/languages/he/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/he/config/transitionWords.js +0 -28
- package/src/languageProcessing/languages/he/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/he/helpers/createBasicWordForms.js +0 -33
- package/src/languageProcessing/languages/he/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/he/helpers/internal/stem.js +0 -52
- package/src/languageProcessing/languages/he/helpers/isPassiveSentence.js +0 -96
- package/src/languageProcessing/languages/he/helpers/stem.js +0 -52
- package/src/languageProcessing/languages/hu/Researcher.js +0 -48
- package/src/languageProcessing/languages/hu/config/firstWordExceptions.js +0 -31
- package/src/languageProcessing/languages/hu/config/functionWords.js +0 -284
- package/src/languageProcessing/languages/hu/config/internal/auxiliaries.js +0 -97
- package/src/languageProcessing/languages/hu/config/internal/morphologicalPassiveAffixes.js +0 -125
- package/src/languageProcessing/languages/hu/config/internal/nonPassivesInVaAndVe.js +0 -265
- package/src/languageProcessing/languages/hu/config/internal/odikVerbs.js +0 -273
- package/src/languageProcessing/languages/hu/config/internal/participles.js +0 -412
- package/src/languageProcessing/languages/hu/config/stopWords.js +0 -213
- package/src/languageProcessing/languages/hu/config/transitionWords.js +0 -42
- package/src/languageProcessing/languages/hu/config/twoPartTransitionWords.js +0 -34
- package/src/languageProcessing/languages/hu/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/hu/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/hu/helpers/internal/getParticiples.js +0 -21
- package/src/languageProcessing/languages/hu/helpers/internal/stem.js +0 -389
- package/src/languageProcessing/languages/hu/helpers/isPassiveSentence.js +0 -54
- package/src/languageProcessing/languages/hu/values/Clause.js +0 -41
- package/src/languageProcessing/languages/id/Researcher.js +0 -46
- package/src/languageProcessing/languages/id/config/firstWordExceptions.js +0 -13
- package/src/languageProcessing/languages/id/config/functionWords.js +0 -202
- package/src/languageProcessing/languages/id/config/internal/nonPassiveVerbsStartingDi.js +0 -215
- package/src/languageProcessing/languages/id/config/transitionWords.js +0 -62
- package/src/languageProcessing/languages/id/config/twoPartTransitionWords.js +0 -13
- package/src/languageProcessing/languages/id/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/id/helpers/internal/stem.js +0 -462
- package/src/languageProcessing/languages/id/helpers/internal/stemHelpers.js +0 -78
- package/src/languageProcessing/languages/id/helpers/isPassiveSentence.js +0 -39
- package/src/languageProcessing/languages/id/helpers/splitIntoTokensCustom.js +0 -47
- package/src/languageProcessing/languages/it/Researcher.js +0 -48
- package/src/languageProcessing/languages/it/config/firstWordExceptions.js +0 -17
- package/src/languageProcessing/languages/it/config/functionWords.js +0 -277
- package/src/languageProcessing/languages/it/config/internal/passiveVoiceAuxiliaries.js +0 -98
- package/src/languageProcessing/languages/it/config/internal/passiveVoiceParticiples.js +0 -7197
- package/src/languageProcessing/languages/it/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/it/config/stopWords.js +0 -57
- package/src/languageProcessing/languages/it/config/syllables.json +0 -573
- package/src/languageProcessing/languages/it/config/transitionWords.js +0 -104
- package/src/languageProcessing/languages/it/config/twoPartTransitionWords.js +0 -9
- package/src/languageProcessing/languages/it/helpers/calculateFleschReadingScore.js +0 -15
- package/src/languageProcessing/languages/it/helpers/getClauses.js +0 -32
- package/src/languageProcessing/languages/it/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/it/helpers/internal/getParticiples.js +0 -34
- package/src/languageProcessing/languages/it/helpers/internal/stem.js +0 -436
- package/src/languageProcessing/languages/it/values/Clause.js +0 -47
- package/src/languageProcessing/languages/ja/Researcher.js +0 -86
- package/src/languageProcessing/languages/ja/config/assessmentApplicabilityCharacterCount.js +0 -4
- package/src/languageProcessing/languages/ja/config/firstWordExceptions.js +0 -8
- package/src/languageProcessing/languages/ja/config/functionWords.js +0 -563
- package/src/languageProcessing/languages/ja/config/keyphraseLength.js +0 -16
- package/src/languageProcessing/languages/ja/config/metaDescriptionLength.js +0 -4
- package/src/languageProcessing/languages/ja/config/paragraphLength.js +0 -10
- package/src/languageProcessing/languages/ja/config/sentenceLength.js +0 -4
- package/src/languageProcessing/languages/ja/config/subheadingsTooLong.js +0 -18
- package/src/languageProcessing/languages/ja/config/textLength.js +0 -47
- package/src/languageProcessing/languages/ja/config/topicLength.js +0 -5
- package/src/languageProcessing/languages/ja/config/transitionWords.js +0 -354
- package/src/languageProcessing/languages/ja/customResearches/findKeyphraseInSEOTitle.js +0 -98
- package/src/languageProcessing/languages/ja/customResearches/getKeyphraseLength.js +0 -19
- package/src/languageProcessing/languages/ja/customResearches/getWordForms.js +0 -50
- package/src/languageProcessing/languages/ja/customResearches/textLength.js +0 -24
- package/src/languageProcessing/languages/ja/helpers/countCharacters.js +0 -19
- package/src/languageProcessing/languages/ja/helpers/customGetStemmer.js +0 -21
- package/src/languageProcessing/languages/ja/helpers/getContentWords.js +0 -21
- package/src/languageProcessing/languages/ja/helpers/getWords.js +0 -31
- package/src/languageProcessing/languages/ja/helpers/internal/SentenceTokenizer.js +0 -102
- package/src/languageProcessing/languages/ja/helpers/internal/createWordForms.js +0 -68
- package/src/languageProcessing/languages/ja/helpers/internal/determineStem.js +0 -17
- package/src/languageProcessing/languages/ja/helpers/matchTextWithWord.js +0 -53
- package/src/languageProcessing/languages/ja/helpers/matchTransitionWords.js +0 -25
- package/src/languageProcessing/languages/ja/helpers/memoizedSentenceTokenizer.js +0 -28
- package/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js +0 -20
- package/src/languageProcessing/languages/ja/helpers/wordsCharacterCount.js +0 -13
- package/src/languageProcessing/languages/nb/Researcher.js +0 -45
- package/src/languageProcessing/languages/nb/config/firstWordExceptions.js +0 -12
- package/src/languageProcessing/languages/nb/config/functionWords.js +0 -106
- package/src/languageProcessing/languages/nb/config/internal/participles.js +0 -3127
- package/src/languageProcessing/languages/nb/config/internal/passiveVoiceAuxiliaries.js +0 -15
- package/src/languageProcessing/languages/nb/config/stopWords.js +0 -39
- package/src/languageProcessing/languages/nb/config/transitionWords.js +0 -21
- package/src/languageProcessing/languages/nb/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/nb/helpers/getClauses.js +0 -28
- package/src/languageProcessing/languages/nb/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/nb/helpers/internal/getParticiples.js +0 -24
- package/src/languageProcessing/languages/nb/helpers/internal/stem.js +0 -133
- package/src/languageProcessing/languages/nb/values/Clause.js +0 -43
- package/src/languageProcessing/languages/nl/Researcher.js +0 -48
- package/src/languageProcessing/languages/nl/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/nl/config/functionWords.js +0 -233
- package/src/languageProcessing/languages/nl/config/internal/nonParticiples.js +0 -2515
- package/src/languageProcessing/languages/nl/config/internal/passiveVoiceAuxiliaries.js +0 -13
- package/src/languageProcessing/languages/nl/config/internal/passiveVoiceIrregulars.js +0 -474
- package/src/languageProcessing/languages/nl/config/keyphraseLength.js +0 -10
- package/src/languageProcessing/languages/nl/config/stopWords.js +0 -35
- package/src/languageProcessing/languages/nl/config/syllables.json +0 -343
- package/src/languageProcessing/languages/nl/config/transitionWords.js +0 -22
- package/src/languageProcessing/languages/nl/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/nl/helpers/calculateFleschReadingScore.js +0 -15
- package/src/languageProcessing/languages/nl/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/nl/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/nl/helpers/internal/checkExceptionsWithFullForms.js +0 -128
- package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemRegularParticiple.js +0 -324
- package/src/languageProcessing/languages/nl/helpers/internal/detectAndStemSuffixes.js +0 -164
- package/src/languageProcessing/languages/nl/helpers/internal/determineStem.js +0 -133
- package/src/languageProcessing/languages/nl/helpers/internal/getParticiples.js +0 -25
- package/src/languageProcessing/languages/nl/helpers/internal/getStemWordsWithTAndDEnding.js +0 -183
- package/src/languageProcessing/languages/nl/helpers/internal/stem.js +0 -146
- package/src/languageProcessing/languages/nl/helpers/internal/stemModificationHelpers.js +0 -109
- package/src/languageProcessing/languages/nl/helpers/internal/stemTOrDFromEndOfWord.js +0 -65
- package/src/languageProcessing/languages/nl/values/Clause.js +0 -62
- package/src/languageProcessing/languages/pl/Researcher.js +0 -47
- package/src/languageProcessing/languages/pl/config/firstWordExceptions.js +0 -12
- package/src/languageProcessing/languages/pl/config/functionWords.js +0 -421
- package/src/languageProcessing/languages/pl/config/internal/auxiliaries.js +0 -85
- package/src/languageProcessing/languages/pl/config/internal/participles.js +0 -26433
- package/src/languageProcessing/languages/pl/config/sentenceLength.js +0 -10
- package/src/languageProcessing/languages/pl/config/stopWords.js +0 -36
- package/src/languageProcessing/languages/pl/config/transitionWords.js +0 -42
- package/src/languageProcessing/languages/pl/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/pl/helpers/getClauses.js +0 -25
- package/src/languageProcessing/languages/pl/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/pl/helpers/internal/getParticiples.js +0 -18
- package/src/languageProcessing/languages/pl/helpers/internal/stem.js +0 -161
- package/src/languageProcessing/languages/pl/values/Clause.js +0 -53
- package/src/languageProcessing/languages/pt/Researcher.js +0 -48
- package/src/languageProcessing/languages/pt/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/pt/config/functionWords.js +0 -226
- package/src/languageProcessing/languages/pt/config/internal/passiveVoiceAuxiliaries.js +0 -66
- package/src/languageProcessing/languages/pt/config/internal/passiveVoiceParticiples.js +0 -4088
- package/src/languageProcessing/languages/pt/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/pt/config/stopWords.js +0 -50
- package/src/languageProcessing/languages/pt/config/syllables.json +0 -38
- package/src/languageProcessing/languages/pt/config/transitionWords.js +0 -34
- package/src/languageProcessing/languages/pt/config/twoPartTransitionWords.js +0 -9
- package/src/languageProcessing/languages/pt/helpers/calculateFleschReadingScore.js +0 -15
- package/src/languageProcessing/languages/pt/helpers/getClauses.js +0 -29
- package/src/languageProcessing/languages/pt/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/pt/helpers/internal/getParticiples.js +0 -35
- package/src/languageProcessing/languages/pt/helpers/internal/stem.js +0 -319
- package/src/languageProcessing/languages/pt/values/Clause.js +0 -43
- package/src/languageProcessing/languages/ru/Researcher.js +0 -48
- package/src/languageProcessing/languages/ru/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/ru/config/fleschReadingEaseScores.js +0 -20
- package/src/languageProcessing/languages/ru/config/functionWords.js +0 -519
- package/src/languageProcessing/languages/ru/config/internal/participlesShortenedList.js +0 -2914
- package/src/languageProcessing/languages/ru/config/internal/passiveVoiceParticiples.js +0 -6295
- package/src/languageProcessing/languages/ru/config/sentenceLength.js +0 -3
- package/src/languageProcessing/languages/ru/config/syllables.json +0 -19
- package/src/languageProcessing/languages/ru/config/transitionWords.js +0 -62
- package/src/languageProcessing/languages/ru/config/twoPartTransitionWords.js +0 -14
- package/src/languageProcessing/languages/ru/helpers/calculateFleschReadingScore.js +0 -16
- package/src/languageProcessing/languages/ru/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/ru/helpers/internal/stem.js +0 -288
- package/src/languageProcessing/languages/ru/helpers/isPassiveSentence.js +0 -14
- package/src/languageProcessing/languages/sk/Researcher.js +0 -46
- package/src/languageProcessing/languages/sk/config/firstWordExceptions.js +0 -14
- package/src/languageProcessing/languages/sk/config/functionWords.js +0 -855
- package/src/languageProcessing/languages/sk/config/internal/nonPassives.js +0 -1074
- package/src/languageProcessing/languages/sk/config/internal/passiveVoiceAuxiliaries.js +0 -22
- package/src/languageProcessing/languages/sk/config/stopWords.js +0 -34
- package/src/languageProcessing/languages/sk/config/transitionWords.js +0 -23
- package/src/languageProcessing/languages/sk/config/twoPartTransitionWords.js +0 -10
- package/src/languageProcessing/languages/sk/helpers/getClauses.js +0 -26
- package/src/languageProcessing/languages/sk/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/sk/helpers/internal/getParticiples.js +0 -16
- package/src/languageProcessing/languages/sk/helpers/internal/stem.js +0 -319
- package/src/languageProcessing/languages/sk/values/Clause.js +0 -39
- package/src/languageProcessing/languages/sv/Researcher.js +0 -45
- package/src/languageProcessing/languages/sv/config/firstWordExceptions.js +0 -15
- package/src/languageProcessing/languages/sv/config/functionWords.js +0 -176
- package/src/languageProcessing/languages/sv/config/internal/passiveVerbs.js +0 -10400
- package/src/languageProcessing/languages/sv/config/keyphraseLength.js +0 -11
- package/src/languageProcessing/languages/sv/config/transitionWords.js +0 -35
- package/src/languageProcessing/languages/sv/config/twoPartTransitionWords.js +0 -8
- package/src/languageProcessing/languages/sv/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/sv/helpers/internal/stem.js +0 -152
- package/src/languageProcessing/languages/sv/helpers/isPassiveSentence.js +0 -14
- package/src/languageProcessing/languages/tr/Researcher.js +0 -44
- package/src/languageProcessing/languages/tr/config/firstWordExceptions.js +0 -13
- package/src/languageProcessing/languages/tr/config/functionWords.js +0 -116
- package/src/languageProcessing/languages/tr/config/internal/nonPassiveExceptions.js +0 -574
- package/src/languageProcessing/languages/tr/config/internal/passiveEndings.js +0 -151
- package/src/languageProcessing/languages/tr/config/sentenceLength.js +0 -7
- package/src/languageProcessing/languages/tr/config/transitionWords.js +0 -42
- package/src/languageProcessing/languages/tr/config/twoPartTransitionWords.js +0 -7
- package/src/languageProcessing/languages/tr/helpers/getStemmer.js +0 -22
- package/src/languageProcessing/languages/tr/helpers/internal/stem.js +0 -20
- package/src/languageProcessing/languages/tr/helpers/isPassiveSentence.js +0 -43
- package/src/languageProcessing/researches/altTagCount.js +0 -70
- package/src/languageProcessing/researches/countSentencesFromText.js +0 -19
- package/src/languageProcessing/researches/findKeyphraseInSEOTitle.js +0 -257
- package/src/languageProcessing/researches/findKeywordInFirstParagraph.js +0 -86
- package/src/languageProcessing/researches/findTransitionWords.js +0 -123
- package/src/languageProcessing/researches/functionWordsInKeyphrase.js +0 -44
- package/src/languageProcessing/researches/getAnchorsWithKeyphrase.js +0 -227
- package/src/languageProcessing/researches/getFleschReadingScore.js +0 -150
- package/src/languageProcessing/researches/getKeywordDensity.js +0 -44
- package/src/languageProcessing/researches/getLinkStatistics.js +0 -54
- package/src/languageProcessing/researches/getLinks.js +0 -18
- package/src/languageProcessing/researches/getLongCenterAlignedTexts.js +0 -37
- package/src/languageProcessing/researches/getParagraphLength.js +0 -44
- package/src/languageProcessing/researches/getParagraphs.js +0 -18
- package/src/languageProcessing/researches/getPassiveVoiceResult.js +0 -129
- package/src/languageProcessing/researches/getProminentWordsForInsights.js +0 -48
- package/src/languageProcessing/researches/getProminentWordsForInternalLinking.js +0 -119
- package/src/languageProcessing/researches/getSentenceBeginnings.js +0 -124
- package/src/languageProcessing/researches/getSubheadingTextLengths.js +0 -59
- package/src/languageProcessing/researches/getWordForms.js +0 -204
- package/src/languageProcessing/researches/h1s.js +0 -10
- package/src/languageProcessing/researches/imageCount.js +0 -16
- package/src/languageProcessing/researches/index.js +0 -5
- package/src/languageProcessing/researches/keyphraseDistribution.js +0 -249
- package/src/languageProcessing/researches/keyphraseLength.js +0 -17
- package/src/languageProcessing/researches/keywordCount.js +0 -134
- package/src/languageProcessing/researches/keywordCountInUrl.js +0 -57
- package/src/languageProcessing/researches/matchKeywordInSubheadings.js +0 -62
- package/src/languageProcessing/researches/metaDescriptionKeyword.js +0 -85
- package/src/languageProcessing/researches/metaDescriptionLength.js +0 -12
- package/src/languageProcessing/researches/pageTitleWidth.js +0 -11
- package/src/languageProcessing/researches/readingTime.js +0 -82
- package/src/languageProcessing/researches/sentences.js +0 -20
- package/src/languageProcessing/researches/videoCount.js +0 -32
- package/src/languageProcessing/researches/wordComplexity.js +0 -129
- package/src/languageProcessing/researches/wordCountInText.js +0 -29
- package/src/languageProcessing/values/Clause.js +0 -108
- package/src/languageProcessing/values/ProminentWord.js +0 -95
- package/src/languageProcessing/values/Sentence.js +0 -111
- package/src/languageProcessing/values/index.js +0 -9
- package/src/markers/addMark.js +0 -9
- package/src/markers/addMarkSingleWord.js +0 -32
- package/src/markers/index.js +0 -7
- package/src/markers/removeDuplicateMarks.js +0 -27
- package/src/markers/removeMarks.js +0 -11
- package/src/parse/build/build.js +0 -52
- package/src/parse/build/index.js +0 -10
- package/src/parse/build/private/adapt.js +0 -113
- package/src/parse/build/private/adaptAttributes.js +0 -36
- package/src/parse/build/private/alwaysFilterElements.js +0 -75
- package/src/parse/build/private/combineIntoImplicitParagraphs.js +0 -130
- package/src/parse/build/private/filterBeforeTokenizing.js +0 -32
- package/src/parse/build/private/filterHelpers.js +0 -44
- package/src/parse/build/private/filterTree.js +0 -42
- package/src/parse/build/private/getTextElementPositions.js +0 -184
- package/src/parse/build/private/helpers/parseClassAttribute.js +0 -9
- package/src/parse/build/private/isPhrasingContent.js +0 -28
- package/src/parse/build/private/parseBlocks.js +0 -151
- package/src/parse/build/private/tokenize.js +0 -74
- package/src/parse/language/LanguageProcessor.js +0 -74
- package/src/parse/structure/Heading.js +0 -26
- package/src/parse/structure/Node.js +0 -69
- package/src/parse/structure/Paragraph.js +0 -48
- package/src/parse/structure/Sentence.js +0 -30
- package/src/parse/structure/SourceCodeLocation.js +0 -41
- package/src/parse/structure/Text.js +0 -27
- package/src/parse/structure/Token.js +0 -24
- package/src/parse/structure/index.js +0 -16
- package/src/parse/traverse/findAllInTree.js +0 -58
- package/src/parse/traverse/index.js +0 -12
- package/src/parse/traverse/innerText.js +0 -26
- package/src/parsedPaper/ParsedPaper.js +0 -92
- package/src/parsedPaper/assess/TreeAssessor.js +0 -184
- package/src/parsedPaper/assess/assessmentListFactories.js +0 -73
- package/src/parsedPaper/assess/assessments/Assessment.js +0 -79
- package/src/parsedPaper/assess/assessments/index.js +0 -6
- package/src/parsedPaper/assess/assessorFactories.js +0 -104
- package/src/parsedPaper/assess/cornerstone/assessmentListFactories.js +0 -47
- package/src/parsedPaper/assess/cornerstone/index.js +0 -5
- package/src/parsedPaper/assess/index.js +0 -20
- package/src/parsedPaper/build/PaperParser.js +0 -105
- package/src/parsedPaper/build/linguisticParsing/Sentence.js +0 -89
- package/src/parsedPaper/build/linguisticParsing/SentenceTokenizer.js +0 -323
- package/src/parsedPaper/build/linguisticParsing/parseText.js +0 -20
- package/src/parsedPaper/build/tree/TreeBuilder.js +0 -75
- package/src/parsedPaper/build/tree/cleanup/calculateTextIndices.js +0 -190
- package/src/parsedPaper/build/tree/cleanup/getElementContent.js +0 -21
- package/src/parsedPaper/build/tree/cleanup/postParsing.js +0 -37
- package/src/parsedPaper/build/tree/html/HTMLTreeConverter.js +0 -230
- package/src/parsedPaper/build/tree/html/buildTree.js +0 -31
- package/src/parsedPaper/build/tree/html/htmlConstants.js +0 -37
- package/src/parsedPaper/build/tree/index.js +0 -14
- package/src/parsedPaper/build/tree/metadata/buildTree.js +0 -32
- package/src/parsedPaper/research/TreeResearcher.js +0 -134
- package/src/parsedPaper/research/index.js +0 -13
- package/src/parsedPaper/research/researches/Headings.js +0 -20
- package/src/parsedPaper/research/researches/LinkStatistics.js +0 -128
- package/src/parsedPaper/research/researches/Research.js +0 -50
- package/src/parsedPaper/research/researches/index.js +0 -1
- package/src/parsedPaper/structure/tree/FormattingElement.js +0 -67
- package/src/parsedPaper/structure/tree/SourceCodeLocation.js +0 -31
- package/src/parsedPaper/structure/tree/TextContainer.js +0 -85
- package/src/parsedPaper/structure/tree/index.js +0 -22
- package/src/parsedPaper/structure/tree/nodes/Heading.js +0 -26
- package/src/parsedPaper/structure/tree/nodes/LeafNode.js +0 -75
- package/src/parsedPaper/structure/tree/nodes/List.js +0 -47
- package/src/parsedPaper/structure/tree/nodes/ListItem.js +0 -26
- package/src/parsedPaper/structure/tree/nodes/MetadataMiscellaneous.js +0 -46
- package/src/parsedPaper/structure/tree/nodes/MetadataText.js +0 -26
- package/src/parsedPaper/structure/tree/nodes/Node.js +0 -154
- package/src/parsedPaper/structure/tree/nodes/Paragraph.js +0 -24
- package/src/parsedPaper/structure/tree/nodes/StructuredNode.js +0 -52
- package/src/parsedPaper/structure/tree/nodes/index.js +0 -21
- package/src/scoring/assessments/assessment.js +0 -63
- package/src/scoring/assessments/index.js +0 -58
- package/src/scoring/assessments/readability/ParagraphTooLongAssessment.js +0 -173
- package/src/scoring/assessments/readability/SentenceBeginningsAssessment.js +0 -132
- package/src/scoring/assessments/readability/SentenceLengthInTextAssessment.js +0 -186
- package/src/scoring/assessments/readability/TransitionWordsAssessment.js +0 -168
- package/src/scoring/assessments/seo/ImageCountAssessment.js +0 -112
- package/src/scoring/assessments/seo/InternalLinksAssessment.js +0 -114
- package/src/scoring/assessments/seo/IntroductionKeywordAssessment.js +0 -110
- package/src/scoring/assessments/seo/KeyphraseAssessment.js +0 -104
- package/src/scoring/assessments/seo/KeyphraseLengthAssessment.js +0 -110
- package/src/scoring/assessments/seo/KeywordDensityAssessment.js +0 -116
- package/src/scoring/assessments/seo/MetaDescriptionKeywordAssessment.js +0 -114
- package/src/scoring/assessments/seo/MetaDescriptionLengthAssessment.js +0 -112
- package/src/scoring/assessments/seo/MetaTitleKeywordAssessment.js +0 -111
- package/src/scoring/assessments/seo/NumberInMetaTitleAssessment.js +0 -107
- package/src/scoring/assessments/seo/OutboundLinksAssessment.js +0 -111
- package/src/scoring/assessments/seo/PageTitleWidthAssessment.js +0 -104
- package/src/scoring/assessments/seo/SingleH1Assessment.js +0 -118
- package/src/scoring/assessments/seo/SingleTitleAssessment.js +0 -108
- package/src/scoring/assessments/seo/SubHeadingsKeywordAssessment.js +0 -107
- package/src/scoring/assessments/seo/TextImagesAssessment.js +0 -144
- package/src/scoring/assessments/seo/TextLengthAssessment.js +0 -100
- package/src/scoring/assessments/seo/UrlKeywordAssessment.js +0 -111
- package/src/scoring/assessments/seo/UrlLengthAssessment.js +0 -103
- package/src/scoring/assessors/assessor.js +0 -269
- package/src/scoring/assessors/avadaAssessor.js +0 -67
- package/src/scoring/assessors/contentAssessor.js +0 -159
- package/src/scoring/assessors/index.js +0 -4
- package/src/scoring/assessors/seoAssessor.js +0 -57
- package/src/scoring/helpers/assessments/checkForTooLongSentences.js +0 -13
- package/src/scoring/helpers/assessments/inRange.js +0 -49
- package/src/scoring/helpers/assessments/keyphraseLengthFactor.js +0 -10
- package/src/scoring/helpers/assessments/recommendedKeywordCount.js +0 -43
- package/src/scoring/helpers/index.js +0 -74
- package/src/scoring/interpreters/index.js +0 -5
- package/src/scoring/interpreters/scoreToRating.js +0 -31
- package/src/scoring/renderers/AssessorPresenter.js +0 -360
- package/src/scoring/scoreAggregators/ReadabilityScoreAggregator.js +0 -203
- package/src/scoring/scoreAggregators/SEOScoreAggregator.js +0 -54
- package/src/scoring/scoreAggregators/ScoreAggregator.js +0 -23
- package/src/scoring/scoreAggregators/index.js +0 -3
- package/src/values/AssessmentResult.js +0 -496
- package/src/values/Mark.js +0 -271
- package/src/values/Paper.js +0 -425
- package/src/values/index.js +0 -9
- package/src/vendor/turkishStemmer.js +0 -3435
- package/tsconfig.json +0 -15
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Represents a sentence in a text.
|
|
3
|
-
*/
|
|
4
|
-
class Sentence {
|
|
5
|
-
/**
|
|
6
|
-
* Represents a sentence within a text.
|
|
7
|
-
*
|
|
8
|
-
* @param {string} text The text of this sentence.
|
|
9
|
-
* @param {number} startIndex The start index of this sentence.
|
|
10
|
-
* @param {number} endIndex The end index of this sentence.
|
|
11
|
-
*/
|
|
12
|
-
constructor( text, startIndex = 0, endIndex = 0 ) {
|
|
13
|
-
this.text = text;
|
|
14
|
-
this.words = [];
|
|
15
|
-
this.startIndex = startIndex;
|
|
16
|
-
this.endIndex = endIndex;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
/**
|
|
20
|
-
* Sets a text for this sentence.
|
|
21
|
-
*
|
|
22
|
-
* @param {string} text The text to be set for the sentence.
|
|
23
|
-
*
|
|
24
|
-
* @returns {void}
|
|
25
|
-
*/
|
|
26
|
-
setText( text ) {
|
|
27
|
-
this.text = text;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Appends text to this sentence.
|
|
32
|
-
*
|
|
33
|
-
* @param {string} text The text to be added to the sentence.
|
|
34
|
-
*
|
|
35
|
-
* @returns {void}
|
|
36
|
-
*/
|
|
37
|
-
appendText( text ) {
|
|
38
|
-
this.text += text;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
/**
|
|
42
|
-
* Returns the text of this sentence.
|
|
43
|
-
*
|
|
44
|
-
* @returns {string} The text of this senence.
|
|
45
|
-
*/
|
|
46
|
-
getText() {
|
|
47
|
-
return this.text;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Sets the start index of this sentence.
|
|
52
|
-
*
|
|
53
|
-
* @param {number} startIndex The start index of this sentence.
|
|
54
|
-
* @returns {void}
|
|
55
|
-
*/
|
|
56
|
-
setStartIndex( startIndex ) {
|
|
57
|
-
this.startIndex = startIndex;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
/**
|
|
61
|
-
* Sets the end index of this sentence.
|
|
62
|
-
*
|
|
63
|
-
* @param {number} endIndex The end index of this sentence.
|
|
64
|
-
* @returns {void}
|
|
65
|
-
*/
|
|
66
|
-
setEndIndex( endIndex ) {
|
|
67
|
-
this.endIndex = endIndex;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Returns the start index of this sentence.
|
|
72
|
-
*
|
|
73
|
-
* @returns {number} The start index of this sentence.
|
|
74
|
-
*/
|
|
75
|
-
getStartIndex() {
|
|
76
|
-
return this.startIndex;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Returns the end index of this sentence.
|
|
81
|
-
*
|
|
82
|
-
* @returns {number} The end index of this sentence.
|
|
83
|
-
*/
|
|
84
|
-
getEndIndex() {
|
|
85
|
-
return this.endIndex;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
export default Sentence;
|
|
@@ -1,323 +0,0 @@
|
|
|
1
|
-
import { isNaN, isUndefined } from "lodash";
|
|
2
|
-
|
|
3
|
-
import core from "tokenizer2/core";
|
|
4
|
-
import { normalize as normalizeQuotes } from "../../../languageProcessing/helpers/sanitize/quotes";
|
|
5
|
-
|
|
6
|
-
import Sentence from "./Sentence";
|
|
7
|
-
|
|
8
|
-
// All characters that indicate a sentence delimiter.
|
|
9
|
-
const fullStop = ".";
|
|
10
|
-
// The \u2026 character is an ellipsis
|
|
11
|
-
const sentenceDelimiters = "?!;\u2026";
|
|
12
|
-
|
|
13
|
-
const fullStopRegex = new RegExp( "^[" + fullStop + "]$" );
|
|
14
|
-
const sentenceDelimiterRegex = new RegExp( "^[" + sentenceDelimiters + "]$" );
|
|
15
|
-
const sentenceRegex = new RegExp( "^[^" + fullStop + sentenceDelimiters + "\\(\\)\\[\\]]+$" );
|
|
16
|
-
|
|
17
|
-
const blockStartRegex = /^\s*[[({]\s*$/;
|
|
18
|
-
const blockEndRegex = /^\s*[\])}]\s*$/;
|
|
19
|
-
|
|
20
|
-
const whiteSpaceStartRegex = /^\s*/;
|
|
21
|
-
const whiteSpaceEndRegex = /\s*$/;
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Class for tokenizing a (html) text into sentences.
|
|
25
|
-
*/
|
|
26
|
-
export default class SentenceTokenizer {
|
|
27
|
-
/**
|
|
28
|
-
* Returns whether or not a certain character is a number.
|
|
29
|
-
*
|
|
30
|
-
* @param {string} character The character to check.
|
|
31
|
-
* @returns {boolean} Whether or not the character is a capital letter.
|
|
32
|
-
*/
|
|
33
|
-
isNumber( character ) {
|
|
34
|
-
return ! isNaN( parseInt( character, 10 ) );
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
/**
|
|
38
|
-
* Returns whether or not a given character is quotation mark.
|
|
39
|
-
*
|
|
40
|
-
* @param {string} character The character to check.
|
|
41
|
-
*
|
|
42
|
-
* @returns {boolean} Whether or not the given character is a quotation mark.
|
|
43
|
-
*/
|
|
44
|
-
isQuotation( character ) {
|
|
45
|
-
character = normalizeQuotes( character );
|
|
46
|
-
|
|
47
|
-
return "'" === character ||
|
|
48
|
-
"\"" === character;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Returns whether or not a given character is a punctuation mark that can be at the beginning
|
|
53
|
-
* of a sentence, like ¿ and ¡ used in Spanish.
|
|
54
|
-
*
|
|
55
|
-
* @param {string} character The character to check.
|
|
56
|
-
*
|
|
57
|
-
* @returns {boolean} Whether or not the given character is a punctuation mark.
|
|
58
|
-
*/
|
|
59
|
-
isPunctuation( character ) {
|
|
60
|
-
return "¿" === character ||
|
|
61
|
-
"¡" === character;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
/**
|
|
65
|
-
* Removes duplicate whitespace from a given text.
|
|
66
|
-
*
|
|
67
|
-
* @param {string} text The text with duplicate whitespace.
|
|
68
|
-
* @returns {string} The text without duplicate whitespace.
|
|
69
|
-
*/
|
|
70
|
-
removeDuplicateWhitespace( text ) {
|
|
71
|
-
return text.replace( /\s+/, " " );
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Returns whether or not a certain character is a capital letter.
|
|
76
|
-
*
|
|
77
|
-
* @param {string} character The character to check.
|
|
78
|
-
*
|
|
79
|
-
* @returns {boolean} Whether or not the character is a capital letter.
|
|
80
|
-
*/
|
|
81
|
-
isCapitalLetter( character ) {
|
|
82
|
-
return character !== character.toLocaleLowerCase();
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
/**
|
|
86
|
-
* Retrieves the next two characters from an array with the two next tokens.
|
|
87
|
-
*
|
|
88
|
-
* @param {Array} nextTokens The two next tokens. Might be undefined.
|
|
89
|
-
*
|
|
90
|
-
* @returns {string} The next two characters.
|
|
91
|
-
*/
|
|
92
|
-
getNextTwoCharacters( nextTokens ) {
|
|
93
|
-
let next = "";
|
|
94
|
-
|
|
95
|
-
if ( ! isUndefined( nextTokens[ 0 ] ) ) {
|
|
96
|
-
next += nextTokens[ 0 ].src;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
if ( ! isUndefined( nextTokens[ 1 ] ) ) {
|
|
100
|
-
next += nextTokens[ 1 ].src;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
next = this.removeDuplicateWhitespace( next );
|
|
104
|
-
|
|
105
|
-
return next;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
/**
|
|
109
|
-
* Checks if the sentenceBeginning beginning is a valid beginning.
|
|
110
|
-
*
|
|
111
|
-
* @param {string} sentenceBeginning The beginning of the sentence to validate.
|
|
112
|
-
*
|
|
113
|
-
* @returns {boolean} Returns true if it is a valid beginning, false if it is not.
|
|
114
|
-
*/
|
|
115
|
-
isValidSentenceBeginning( sentenceBeginning ) {
|
|
116
|
-
return (
|
|
117
|
-
this.isCapitalLetter( sentenceBeginning ) ||
|
|
118
|
-
this.isNumber( sentenceBeginning ) ||
|
|
119
|
-
this.isQuotation( sentenceBeginning ) ||
|
|
120
|
-
this.isPunctuation( sentenceBeginning )
|
|
121
|
-
);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
/**
|
|
125
|
-
* Checks if the token is a valid sentence ending.
|
|
126
|
-
*
|
|
127
|
-
* @param {Object} token The token to validate.
|
|
128
|
-
*
|
|
129
|
-
* @returns {boolean} Returns true if the token is valid ending, false if it is not.
|
|
130
|
-
*/
|
|
131
|
-
isSentenceStart( token ) {
|
|
132
|
-
return ( ! isUndefined( token ) && (
|
|
133
|
-
"html-start" === token.type ||
|
|
134
|
-
"html-end" === token.type ||
|
|
135
|
-
"block-start" === token.type
|
|
136
|
-
) );
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
/**
|
|
140
|
-
* Creates a tokenizer.
|
|
141
|
-
*
|
|
142
|
-
* @returns {Object} The tokenizer and the tokens.
|
|
143
|
-
*/
|
|
144
|
-
createTokenizer() {
|
|
145
|
-
const tokens = [];
|
|
146
|
-
const tokenizer = core( function( token ) {
|
|
147
|
-
tokens.push( token );
|
|
148
|
-
} );
|
|
149
|
-
|
|
150
|
-
tokenizer.addRule( fullStopRegex, "full-stop" );
|
|
151
|
-
tokenizer.addRule( blockStartRegex, "block-start" );
|
|
152
|
-
tokenizer.addRule( blockEndRegex, "block-end" );
|
|
153
|
-
tokenizer.addRule( sentenceDelimiterRegex, "sentence-delimiter" );
|
|
154
|
-
tokenizer.addRule( sentenceRegex, "sentence" );
|
|
155
|
-
|
|
156
|
-
return {
|
|
157
|
-
tokenizer,
|
|
158
|
-
tokens,
|
|
159
|
-
};
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
/**
|
|
163
|
-
* Tokenizes the given text using the given tokenizer.
|
|
164
|
-
*
|
|
165
|
-
* @param {Tokenizer} tokenizer The tokenizer to use.
|
|
166
|
-
* @param {string} text The text to tokenize.
|
|
167
|
-
*
|
|
168
|
-
* @returns {void}
|
|
169
|
-
*/
|
|
170
|
-
tokenize( tokenizer, text ) {
|
|
171
|
-
tokenizer.onText( text );
|
|
172
|
-
|
|
173
|
-
try {
|
|
174
|
-
tokenizer.end();
|
|
175
|
-
} catch ( e ) {
|
|
176
|
-
console.error( "Tokenizer end error:", e, e.tokenizer2 );
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
/**
|
|
181
|
-
* Determines the start and end indices of a set of sentences form a text.
|
|
182
|
-
*
|
|
183
|
-
* @param {Sentence[]} sentences A set of sentences for which to determine indices.
|
|
184
|
-
*
|
|
185
|
-
* @returns {void}
|
|
186
|
-
*/
|
|
187
|
-
determineIndices( sentences ) {
|
|
188
|
-
let currentIndex = 0;
|
|
189
|
-
|
|
190
|
-
for ( const sentence of sentences ) {
|
|
191
|
-
const startIndex = currentIndex;
|
|
192
|
-
sentence.setStartIndex( currentIndex );
|
|
193
|
-
const endIndex = startIndex + sentence.text.length - 1;
|
|
194
|
-
sentence.setEndIndex( endIndex );
|
|
195
|
-
currentIndex = endIndex + 1;
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
/**
|
|
200
|
-
* Trims the white space from the beginning of a sentence and adjusts the sentence start index accordingly.
|
|
201
|
-
*
|
|
202
|
-
* @param {Sentence} sentence The sentence for which to trim the white space at the start.
|
|
203
|
-
*
|
|
204
|
-
* @returns {void}
|
|
205
|
-
*/
|
|
206
|
-
trimWhiteSpaceAtStart( sentence ) {
|
|
207
|
-
const whiteSpaceLength = sentence.text.match( whiteSpaceStartRegex )[ 0 ].length;
|
|
208
|
-
sentence.setText( sentence.getText().slice( whiteSpaceLength ) );
|
|
209
|
-
sentence.setStartIndex( sentence.getStartIndex() + whiteSpaceLength );
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
/**
|
|
213
|
-
* Trims the white space from the end of a sentence and adjusts the sentence end index accordingly.
|
|
214
|
-
*
|
|
215
|
-
* @param {Sentence} sentence The sentence for which to trim the white space at the end.
|
|
216
|
-
*
|
|
217
|
-
* @returns {void}
|
|
218
|
-
*/
|
|
219
|
-
trimWhiteSpaceAtEnd( sentence ) {
|
|
220
|
-
const whiteSpaceLength = sentence.text.match( whiteSpaceEndRegex )[ 0 ].length;
|
|
221
|
-
sentence.setText( sentence.getText().slice( 0, sentence.getText().length - whiteSpaceLength ) );
|
|
222
|
-
sentence.setEndIndex( sentence.getEndIndex() - whiteSpaceLength );
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
/**
|
|
226
|
-
* Trims white space from the beginning and end of sentences and adjusts the indices
|
|
227
|
-
* of the sentence beginnings and ends accordingly.
|
|
228
|
-
*
|
|
229
|
-
* @param {Sentence[]} sentences The sentences for which to trim the whitespace.
|
|
230
|
-
*
|
|
231
|
-
* @returns {void}
|
|
232
|
-
*/
|
|
233
|
-
trimWhiteSpaces( sentences ) {
|
|
234
|
-
for ( const sentence of sentences ) {
|
|
235
|
-
this.trimWhiteSpaceAtStart( sentence );
|
|
236
|
-
this.trimWhiteSpaceAtEnd( sentence );
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
/**
|
|
241
|
-
* Returns an array of sentence objects for a given array of tokens; assumes that the text has already been split into blocks.
|
|
242
|
-
*
|
|
243
|
-
* @param {Object[]} tokenArray The tokens from the sentence tokenizer.
|
|
244
|
-
*
|
|
245
|
-
* @returns {Sentence[]} An array of sentence objects.
|
|
246
|
-
*/
|
|
247
|
-
getSentencesFromTokens( tokenArray ) {
|
|
248
|
-
const tokenSentences = [];
|
|
249
|
-
let currentSentence = new Sentence( "", 0, 0 ),
|
|
250
|
-
nextSentenceStart;
|
|
251
|
-
|
|
252
|
-
tokenArray.forEach( ( token, i ) => {
|
|
253
|
-
let hasNextSentence, nextCharacters;
|
|
254
|
-
const nextToken = tokenArray[ i + 1 ];
|
|
255
|
-
const secondToNextToken = tokenArray[ i + 2 ];
|
|
256
|
-
|
|
257
|
-
switch ( token.type ) {
|
|
258
|
-
case "sentence":
|
|
259
|
-
currentSentence.appendText( token.src );
|
|
260
|
-
break;
|
|
261
|
-
|
|
262
|
-
case "sentence-delimiter":
|
|
263
|
-
currentSentence.appendText( token.src );
|
|
264
|
-
if ( ! isUndefined( nextToken ) && "block-end" !== nextToken.type && "sentence-delimiter" !== nextToken.type ) {
|
|
265
|
-
tokenSentences.push( currentSentence );
|
|
266
|
-
currentSentence = new Sentence( "" );
|
|
267
|
-
}
|
|
268
|
-
break;
|
|
269
|
-
|
|
270
|
-
case "full-stop":
|
|
271
|
-
currentSentence.appendText( token.src );
|
|
272
|
-
|
|
273
|
-
nextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );
|
|
274
|
-
|
|
275
|
-
// For a new sentence we need to check the next two characters.
|
|
276
|
-
hasNextSentence = nextCharacters.length >= 2;
|
|
277
|
-
nextSentenceStart = hasNextSentence ? nextCharacters[ 1 ] : "";
|
|
278
|
-
// If the next character is a number, never split. For example: IPv4-numbers.
|
|
279
|
-
if ( hasNextSentence && this.isNumber( nextCharacters[ 0 ] ) ) {
|
|
280
|
-
break;
|
|
281
|
-
}
|
|
282
|
-
// Only split on sentence delimiters when the next sentence looks like the start of a sentence.
|
|
283
|
-
if ( ( hasNextSentence && this.isValidSentenceBeginning( nextSentenceStart ) ) || this.isSentenceStart( nextToken ) ) {
|
|
284
|
-
tokenSentences.push( currentSentence );
|
|
285
|
-
currentSentence = new Sentence( "" );
|
|
286
|
-
}
|
|
287
|
-
break;
|
|
288
|
-
|
|
289
|
-
case "block-start":
|
|
290
|
-
currentSentence.appendText( token.src );
|
|
291
|
-
break;
|
|
292
|
-
|
|
293
|
-
case "block-end":
|
|
294
|
-
currentSentence.appendText( token.src );
|
|
295
|
-
nextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );
|
|
296
|
-
|
|
297
|
-
// For a new sentence we need to check the next two characters.
|
|
298
|
-
hasNextSentence = nextCharacters.length >= 2;
|
|
299
|
-
nextSentenceStart = hasNextSentence ? nextCharacters[ 0 ] : "";
|
|
300
|
-
// If the next character is a number, never split. For example: IPv4-numbers.
|
|
301
|
-
if ( hasNextSentence && this.isNumber( nextCharacters[ 0 ] ) ) {
|
|
302
|
-
break;
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
if ( ( hasNextSentence && this.isValidSentenceBeginning( nextSentenceStart ) ) || this.isSentenceStart( nextToken ) ) {
|
|
306
|
-
tokenSentences.push( currentSentence );
|
|
307
|
-
currentSentence = new Sentence( "" );
|
|
308
|
-
}
|
|
309
|
-
break;
|
|
310
|
-
}
|
|
311
|
-
} );
|
|
312
|
-
|
|
313
|
-
if ( currentSentence.getText() !== "" ) {
|
|
314
|
-
tokenSentences.push( currentSentence );
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
this.determineIndices( tokenSentences );
|
|
318
|
-
|
|
319
|
-
this.trimWhiteSpaces( tokenSentences );
|
|
320
|
-
|
|
321
|
-
return tokenSentences;
|
|
322
|
-
}
|
|
323
|
-
}
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
import SentenceTokenizer from "./SentenceTokenizer";
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Parses a text into sentences.
|
|
5
|
-
*
|
|
6
|
-
* @param {string} text The text to parse.
|
|
7
|
-
*
|
|
8
|
-
* @returns {string[]} An array of sentence objects.
|
|
9
|
-
*/
|
|
10
|
-
const parseTextIntoSentences = function( text ) {
|
|
11
|
-
const sentenceTokenizer = new SentenceTokenizer();
|
|
12
|
-
const { tokenizer, tokens } = sentenceTokenizer.createTokenizer();
|
|
13
|
-
|
|
14
|
-
sentenceTokenizer.tokenize( tokenizer, text );
|
|
15
|
-
return sentenceTokenizer.getSentencesFromTokens( tokens );
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
export {
|
|
19
|
-
parseTextIntoSentences,
|
|
20
|
-
};
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
import buildTree from "./html/buildTree";
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Builds a tree representation of a source text.
|
|
5
|
-
*
|
|
6
|
-
* Currently supported languages:
|
|
7
|
-
* - HTML (`"html"`)
|
|
8
|
-
*
|
|
9
|
-
* @memberOf module:parsedPaper/builder
|
|
10
|
-
*/
|
|
11
|
-
class TreeBuilder {
|
|
12
|
-
/**
|
|
13
|
-
* Creates a new TreeBuilder.
|
|
14
|
-
*/
|
|
15
|
-
constructor() {
|
|
16
|
-
/**
|
|
17
|
-
* A registry holding the parse function to be called
|
|
18
|
-
* for each supported formatting language.
|
|
19
|
-
*
|
|
20
|
-
* @type {Object<string,Function>}
|
|
21
|
-
* @private
|
|
22
|
-
*/
|
|
23
|
-
this._buildFunctions = {
|
|
24
|
-
html: buildTree,
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
this.build = this.build.bind( this );
|
|
28
|
-
this.register = this.register.bind( this );
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Parses the given source text to a tree representation.
|
|
33
|
-
*
|
|
34
|
-
* Chooses the right parser depending on the given formatting language in which the text has been written (defaults to HTML).
|
|
35
|
-
*
|
|
36
|
-
* @param {string} sourceText The source text that needs to be parsed to a tree representation.
|
|
37
|
-
* @param {Object} [options] Parse options.
|
|
38
|
-
* @param {string} [options.language="html"] The formatting language in which the source text has been written, e.g. `"html"` for HTML.
|
|
39
|
-
*
|
|
40
|
-
* @returns {module:parsedPaper/structure.Node} The tree representation as parsed from the source text.
|
|
41
|
-
*/
|
|
42
|
-
build( sourceText, options = { language: "html" } ) {
|
|
43
|
-
return this._buildFunctions[ options.language ]( sourceText );
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* Registers a build function for the given formatting language.
|
|
48
|
-
*
|
|
49
|
-
* Call this method if you want to add support for another formatting language.
|
|
50
|
-
*
|
|
51
|
-
* @example
|
|
52
|
-
* // Create a new build function.
|
|
53
|
-
* const myBuilder = sourceText => {
|
|
54
|
-
* const node = new Paragraph();
|
|
55
|
-
* node.textContainer.appendText( sourceText );
|
|
56
|
-
* return node;
|
|
57
|
-
* };
|
|
58
|
-
*
|
|
59
|
-
* // Register the build function for the language "my-language".
|
|
60
|
-
* treeBuilder.register( "my-language", myBuilder );
|
|
61
|
-
*
|
|
62
|
-
* // Build the tree using the registered builder.
|
|
63
|
-
* const tree = treeBuilder.build( "some input", { language: "my-language" } );
|
|
64
|
-
*
|
|
65
|
-
* @param {string} language The language to register.
|
|
66
|
-
* @param {function( sourceText: string ): module:parsedPaper/structure.Node} buildFunction The build function for the given language.
|
|
67
|
-
*
|
|
68
|
-
* @returns {void}
|
|
69
|
-
*/
|
|
70
|
-
register( language, buildFunction ) {
|
|
71
|
-
this._buildFunctions[ language ] = buildFunction;
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
export default TreeBuilder;
|
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
import { pullAll } from "lodash";
|
|
2
|
-
|
|
3
|
-
import { ignoredHtmlElements } from "../html/htmlConstants";
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Gathers all elements that can be closed given the position of the current element in the source code.
|
|
7
|
-
*
|
|
8
|
-
* Elements that can be closed are all elements that are opened before this element, but in which this element is
|
|
9
|
-
* not nested.
|
|
10
|
-
* E.g.
|
|
11
|
-
* ```html
|
|
12
|
-
* <strong>Hello</strong><em>World<b>!!!</b></em>`
|
|
13
|
-
* ```
|
|
14
|
-
* with `<b>!!!</b>` as the current element,
|
|
15
|
-
* means that the `<strong>` needs to be closed, but `<em>` **not**.
|
|
16
|
-
*
|
|
17
|
-
* @param {module:parsedPaper/structure.FormattingElement} currentElement The current element.
|
|
18
|
-
* @param {module:parsedPaper/structure.FormattingElement[]} openElements The elements that are currently open.
|
|
19
|
-
*
|
|
20
|
-
* @returns {module:parsedPaper/structure.FormattingElement[]} The elements that can be closed.
|
|
21
|
-
*/
|
|
22
|
-
const elementsThatCanBeClosed = function( currentElement, openElements ) {
|
|
23
|
-
return openElements.filter( el => {
|
|
24
|
-
const endTag = el.sourceCodeLocation.endTag;
|
|
25
|
-
return endTag.endOffset <= currentElement.sourceCodeLocation.startOffset;
|
|
26
|
-
} );
|
|
27
|
-
};
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* Closes the elements that can be closed given the position of the current element within the source code.
|
|
31
|
-
*
|
|
32
|
-
* This does two things:
|
|
33
|
-
* 1. The closed element's text end index is calculated based on the current offset.
|
|
34
|
-
* 2. The closed element's end tag lengths are counted towards the current offset, to make sure that the computed position
|
|
35
|
-
* of the formatting elements are still correct.
|
|
36
|
-
*
|
|
37
|
-
* @param {module:parsedPaper/structure.FormattingElement[]} elementsToClose The list of open elements that need to be closed
|
|
38
|
-
* @param {number} currentOffset The current offset when parsing the formatting elements
|
|
39
|
-
*
|
|
40
|
-
* @returns {number} The updated current offset
|
|
41
|
-
*
|
|
42
|
-
* @private
|
|
43
|
-
*/
|
|
44
|
-
const closeElements = function( elementsToClose, currentOffset ) {
|
|
45
|
-
// Sort, so we close all elements in the right order.
|
|
46
|
-
elementsToClose.sort( ( a, b ) => a.sourceCodeLocation.endTag.endOffset - b.sourceCodeLocation.endTag.endOffset );
|
|
47
|
-
|
|
48
|
-
elementsToClose.forEach( elementToClose => {
|
|
49
|
-
const endTag = elementToClose.sourceCodeLocation.endTag;
|
|
50
|
-
// Set the end position as seen in the text.
|
|
51
|
-
elementToClose.textEndIndex = endTag.startOffset - currentOffset;
|
|
52
|
-
/*
|
|
53
|
-
Add the end tag length of the to be closed element to the total offset.
|
|
54
|
-
*/
|
|
55
|
-
const endTagLength = endTag.endOffset - endTag.startOffset;
|
|
56
|
-
currentOffset += endTagLength;
|
|
57
|
-
} );
|
|
58
|
-
|
|
59
|
-
return currentOffset;
|
|
60
|
-
};
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
* Adds the content length of the given element (the part between the tags) to the current offset
|
|
64
|
-
* and adds the content to the element as a parameter.
|
|
65
|
-
*
|
|
66
|
-
* @param {module:parsedPaper/structure.FormattingElement} element The element of which to add the content length.
|
|
67
|
-
* @param {number} currentOffset The current offset to which to add the length to.
|
|
68
|
-
*
|
|
69
|
-
* @returns {number} The updated current offset
|
|
70
|
-
*/
|
|
71
|
-
const handleIgnoredContent = function( element, currentOffset ) {
|
|
72
|
-
// Has 0 length in text, so end = start.
|
|
73
|
-
element.textEndIndex = element.textStartIndex;
|
|
74
|
-
|
|
75
|
-
// Update current offset.
|
|
76
|
-
const end = element.sourceCodeLocation.endTag ? element.sourceCodeLocation.endTag.startOffset : element.sourceCodeLocation.endOffset;
|
|
77
|
-
const start = element.sourceCodeLocation.startTag ? element.sourceCodeLocation.startTag.endOffset : element.sourceCodeLocation.startOffset;
|
|
78
|
-
|
|
79
|
-
currentOffset += end - start;
|
|
80
|
-
|
|
81
|
-
return currentOffset;
|
|
82
|
-
};
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
/**
|
|
86
|
-
* Sets the start and end text positions of a comment.
|
|
87
|
-
*
|
|
88
|
-
* @param {module:parsedPaper/structure.FormattingElement} element The formatting element to assign start and end text positions to.
|
|
89
|
-
* @param {int} currentOffset A sum of all characters in the source code that don't get rendered
|
|
90
|
-
* (e.g., tags, comments).
|
|
91
|
-
*
|
|
92
|
-
* @returns {number} The length of the comment.
|
|
93
|
-
*
|
|
94
|
-
* @private
|
|
95
|
-
*/
|
|
96
|
-
const computeCommentStartEndTextIndices = function( element, currentOffset ) {
|
|
97
|
-
element.textStartIndex = element.sourceCodeLocation.startOffset - currentOffset;
|
|
98
|
-
element.textEndIndex = element.textStartIndex;
|
|
99
|
-
|
|
100
|
-
return element.sourceCodeLocation.endOffset - element.sourceCodeLocation.startOffset;
|
|
101
|
-
};
|
|
102
|
-
|
|
103
|
-
/**
|
|
104
|
-
* Sets the start and end text positions of one formatting element.
|
|
105
|
-
*
|
|
106
|
-
* @param {module:parsedPaper/structure.FormattingElement} element The formatting element to assign start and end text positions to.
|
|
107
|
-
* @param {int} currentOffset A sum of all characters in the source code that don't get rendered
|
|
108
|
-
* (e.g., tags, comments).
|
|
109
|
-
*
|
|
110
|
-
* @returns {int} The updated currentOffset.
|
|
111
|
-
*
|
|
112
|
-
* @private
|
|
113
|
-
*/
|
|
114
|
-
const computeElementStartTextIndex = function( element, currentOffset ) {
|
|
115
|
-
const startTag = element.sourceCodeLocation.startTag;
|
|
116
|
-
|
|
117
|
-
// For example: "<strong>".length
|
|
118
|
-
const startTagLength = startTag.endOffset - startTag.startOffset;
|
|
119
|
-
|
|
120
|
-
currentOffset += startTagLength;
|
|
121
|
-
|
|
122
|
-
// Set start position of element in heading's / paragraph's text.
|
|
123
|
-
element.textStartIndex = startTag.endOffset - currentOffset;
|
|
124
|
-
|
|
125
|
-
/*
|
|
126
|
-
Elements that have no end tags (e.g., void element like <img/> or self-closing elements) can be closed immediately.
|
|
127
|
-
The text length of those elements will be automatically 0.
|
|
128
|
-
*/
|
|
129
|
-
if ( ! element.sourceCodeLocation.endTag ) {
|
|
130
|
-
element.textEndIndex = element.textStartIndex;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
return currentOffset;
|
|
134
|
-
};
|
|
135
|
-
|
|
136
|
-
/**
|
|
137
|
-
* Sets the start and end position of the text in formatting elements of the given node.
|
|
138
|
-
*
|
|
139
|
-
* @param {module:parsedPaper/structure.LeafNode} node The node containing a TextContainer
|
|
140
|
-
*
|
|
141
|
-
* @returns {void}
|
|
142
|
-
*
|
|
143
|
-
* @private
|
|
144
|
-
*/
|
|
145
|
-
const calculateTextIndices = function( node ) {
|
|
146
|
-
if ( ! node.textContainer.formatting || node.textContainer.formatting.length === 0 ) {
|
|
147
|
-
return;
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
const openElements = [];
|
|
151
|
-
|
|
152
|
-
/*
|
|
153
|
-
Keeps track of the current total size of the start and end tags (and the ignored content)
|
|
154
|
-
These should not be counted towards the start and end position of the elements in the text.
|
|
155
|
-
*/
|
|
156
|
-
let currentOffset = node.sourceCodeLocation.startTag ? node.sourceCodeLocation.startTag.endOffset : node.sourceCodeLocation.startOffset;
|
|
157
|
-
|
|
158
|
-
node.textContainer.formatting.forEach( element => {
|
|
159
|
-
// Close elements that can be closed and remove them from the list of open elements.
|
|
160
|
-
const elementsToClose = elementsThatCanBeClosed( element, openElements );
|
|
161
|
-
currentOffset = closeElements( elementsToClose, currentOffset );
|
|
162
|
-
pullAll( openElements, elementsToClose );
|
|
163
|
-
|
|
164
|
-
// Comments are self-closing formatting elements that are completely ignored in rendering.
|
|
165
|
-
if ( element.type === "#comment" ) {
|
|
166
|
-
currentOffset += computeCommentStartEndTextIndices( element, currentOffset );
|
|
167
|
-
return;
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
currentOffset = computeElementStartTextIndex( element, currentOffset );
|
|
171
|
-
|
|
172
|
-
// If there is an endTag, the element should be closed in one of the next iterations of the loop.
|
|
173
|
-
if ( element.sourceCodeLocation.endTag ) {
|
|
174
|
-
openElements.push( element );
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
/*
|
|
178
|
-
If this element is an ignored element its contents are not in the text,
|
|
179
|
-
so its content should be added to the respective formatting element instead,
|
|
180
|
-
and the current offset should be updated.
|
|
181
|
-
*/
|
|
182
|
-
if ( ignoredHtmlElements.includes( element.type ) ) {
|
|
183
|
-
currentOffset = handleIgnoredContent( element, currentOffset );
|
|
184
|
-
}
|
|
185
|
-
} );
|
|
186
|
-
// Close all remaining elements.
|
|
187
|
-
closeElements( openElements, currentOffset );
|
|
188
|
-
};
|
|
189
|
-
|
|
190
|
-
export default calculateTextIndices;
|