npm - axyseo - Versions diffs - 2.0.0-alpha.2 → 2.1.0 - Mend

axyseo 2.0.0-alpha.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1255) hide show

package/build/languageProcessing/helpers/sentence/SentenceTokenizer.js CHANGED Viewed

@@ -1,16 +1,10 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", {
-  value: true
-});
-exports.default = void 0;
-var _lodash = require("lodash");
-var _core = _interopRequireDefault(require("tokenizer2/core"));
-var _quotes = require("../sanitize/quotes.js");
-var _abbreviations = _interopRequireDefault(require("../../languages/en/config/abbreviations"));
-var _createRegexFromArray = _interopRequireDefault(require("../regex/createRegexFromArray"));
-var _wordBoundaries = _interopRequireDefault(require("../../../config/wordBoundaries"));
-function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
+import { isNaN, isUndefined, map } from "lodash";
+import core from "tokenizer2/core";
+import { normalize as normalizeQuotes } from "../sanitize/quotes.js";
+import abbreviations from "../../languages/en/config/abbreviations";
+import createRegexFromArray from "../regex/createRegexFromArray";
+import wordBoundaries from "../../../config/wordBoundaries";
 // All characters that indicate a sentence delimiter.
 const fullStop = ".";
 const fullStopRegex = new RegExp("^[" + fullStop + "]$");
@@ -19,9 +13,9 @@ const htmlStartRegex = /^<([^>\s/]+)[^>]*>$/mi;
 const htmlEndRegex = /^<\/([^>\s]+)[^>]*>$/mi;
 const blockStartRegex = /^\s*[[({]\s*$/;
 const blockEndRegex = /^\s*[\])}]\s*$/;
-const abbreviationsPreparedForRegex = _abbreviations.default.map(abbreviation => abbreviation.replace(".", "\\."));
-const abbreviationsRegex = (0, _createRegexFromArray.default)(abbreviationsPreparedForRegex);
-const wordBoundariesForRegex = "(^|$|[" + (0, _wordBoundaries.default)().map(boundary => "\\" + boundary).join("") + "])";
+const abbreviationsPreparedForRegex = abbreviations.map(abbreviation => abbreviation.replace(".", "\\."));
+const abbreviationsRegex = createRegexFromArray(abbreviationsPreparedForRegex);
+const wordBoundariesForRegex = "(^|$|[" + wordBoundaries().map(boundary => "\\" + boundary).join("") + "])";
 const lastCharacterPartOfInitialsRegex = new RegExp(wordBoundariesForRegex + "[A-Za-z]$");
 // Constants to be used in isValidTagPair.
@@ -33,7 +27,7 @@ const semanticTags = ["p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "span", "l
 /**
  * Class for tokenizing a (html) text into sentences.
  */
-class SentenceTokenizer {
+export default class SentenceTokenizer {
   /**
    * Constructor
    * @constructor
@@ -63,7 +57,7 @@ class SentenceTokenizer {
    * @returns {boolean} Whether or not the character is a capital letter.
    */
   isNumber(character) {
-    return !(0, _lodash.isNaN)(parseInt(character, 10));
+    return !isNaN(parseInt(character, 10));
   }
   /**
@@ -83,7 +77,7 @@ class SentenceTokenizer {
    * @returns {boolean} Whether or not the given character is a quotation mark.
    */
   isQuotation(character) {
-    character = (0, _quotes.normalize)(character);
+    character = normalizeQuotes(character);
     return "'" === character || "\"" === character;
   }
@@ -149,10 +143,10 @@ class SentenceTokenizer {
    */
   getNextTwoCharacters(nextTokens) {
     let next = "";
-    if (!(0, _lodash.isUndefined)(nextTokens[0])) {
+    if (!isUndefined(nextTokens[0])) {
       next += nextTokens[0].src;
     }
-    if (!(0, _lodash.isUndefined)(nextTokens[1])) {
+    if (!isUndefined(nextTokens[1])) {
       next += nextTokens[1].src;
     }
     next = this.removeDuplicateWhitespace(next);
@@ -196,7 +190,7 @@ class SentenceTokenizer {
    * @returns {boolean} Returns true if the token is valid sentence start, false if it is not.
    */
   isSentenceStart(token) {
-    return !(0, _lodash.isUndefined)(token) && ("html-start" === token.type || "html-end" === token.type || "block-start" === token.type);
+    return !isUndefined(token) && ("html-start" === token.type || "html-end" === token.type || "block-start" === token.type);
   }
   /**
@@ -207,7 +201,7 @@ class SentenceTokenizer {
    * @returns {boolean} Returns true if the token is valid sentence ending, false if it is not.
    */
   isSentenceEnding(token) {
-    return !(0, _lodash.isUndefined)(token) && (token.type === "full-stop" || token.type === "sentence-delimiter");
+    return !isUndefined(token) && (token.type === "full-stop" || token.type === "sentence-delimiter");
   }
   /**
@@ -225,7 +219,7 @@ class SentenceTokenizer {
    * @returns {boolean} True if a full stop is part of a person's initials, False if the full stop is not part of a person's initials.
    */
   isPartOfPersonInitial(token, previousToken, nextToken, secondToNextToken) {
-    return !(0, _lodash.isUndefined)(token) && !(0, _lodash.isUndefined)(nextToken) && !(0, _lodash.isUndefined)(secondToNextToken) && !(0, _lodash.isUndefined)(previousToken) && token.type === "full-stop" && previousToken.type === "sentence" && lastCharacterPartOfInitialsRegex.test(previousToken.src) && nextToken.type === "sentence" && nextToken.src.trim().length === 1 && secondToNextToken.type === "full-stop";
+    return !isUndefined(token) && !isUndefined(nextToken) && !isUndefined(secondToNextToken) && !isUndefined(previousToken) && token.type === "full-stop" && previousToken.type === "sentence" && lastCharacterPartOfInitialsRegex.test(previousToken.src) && nextToken.type === "sentence" && nextToken.src.trim().length === 1 && secondToNextToken.type === "full-stop";
   }
   /**
@@ -249,7 +243,7 @@ class SentenceTokenizer {
     const tokenizerResult = this.createTokenizer();
     this.tokenize(tokenizerResult.tokenizer, localText);
     const localSentences = this.getSentencesFromTokens(tokenizerResult.tokens, false);
-    localSentences[0] = (0, _lodash.isUndefined)(localSentences[0]) ? "<" : "<" + localSentences[0];
+    localSentences[0] = isUndefined(localSentences[0]) ? "<" : "<" + localSentences[0];
     /*
      * When the first sentence has a valid sentence beginning.
@@ -304,7 +298,7 @@ class SentenceTokenizer {
     const sentenceDelimiterRegex = new RegExp("^[" + this.getSentenceDelimiters() + "]$");
     const sentenceRegex = new RegExp("^[^" + fullStop + this.getSentenceDelimiters() + "<\\(\\)\\[\\]]+$");
     const tokens = [];
-    const tokenizer = (0, _core.default)(function (token) {
+    const tokenizer = core(function (token) {
       tokens.push(token);
     });
     tokenizer.addRule(fullStopRegex, "full-stop");
@@ -433,7 +427,7 @@ class SentenceTokenizer {
               * the next token type is neither "block-end" nor "sentence-delimiter", AND
               * the next token first character is a white space
              */
-          if (!(0, _lodash.isUndefined)(nextToken) && "block-end" !== nextToken.type && "sentence-delimiter" !== nextToken.type && this.isCharacterASpace(nextToken.src[0])) {
+          if (!isUndefined(nextToken) && "block-end" !== nextToken.type && "sentence-delimiter" !== nextToken.type && this.isCharacterASpace(nextToken.src[0])) {
             // Don't split on quotation marks unless they're preceded by a full stop.
             if (this.isQuotation(token.src) && previousToken && previousToken.src !== ".") {
               break;
@@ -522,7 +516,7 @@ class SentenceTokenizer {
       tokenSentences.push(currentSentence);
     }
     if (trimSentences) {
-      tokenSentences = (0, _lodash.map)(tokenSentences, function (sentence) {
+      tokenSentences = map(tokenSentences, function (sentence) {
         return sentence.trim();
       });
     }
@@ -561,5 +555,4 @@ class SentenceTokenizer {
     return /\s/.test(character);
   }
 }
-exports.default = SentenceTokenizer;
 //# sourceMappingURL=SentenceTokenizer.js.map

package/build/languageProcessing/helpers/sentence/SentenceTokenizer.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"SentenceTokenizer.js","names":["_lodash","require","_core","_interopRequireDefault","_quotes","_abbreviations","_createRegexFromArray","_wordBoundaries","e","__esModule","default","fullStop","fullStopRegex","RegExp","smallerThanContentRegex","htmlStartRegex","htmlEndRegex","blockStartRegex","blockEndRegex","abbreviationsPreparedForRegex","abbreviations","map","abbreviation","replace","abbreviationsRegex","createRegexFromArray","wordBoundariesForRegex","wordBoundaries","boundary","join","lastCharacterPartOfInitialsRegex","tagTypeRegex","semanticTags","SentenceTokenizer","constructor","sentenceDelimiters","getSentenceDelimiters","isNumber","character","isNaN","parseInt","isBreakTag","htmlTag","test","isQuotation","normalizeQuotes","endsWithOrdinalDot","isPunctuation","removeDuplicateWhitespace","text","isCapitalLetter","toLocaleLowerCase","isSmallerThanSign","getNextTwoCharacters","nextTokens","next","isUndefined","src","isLetterFromSpecificLanguage","letter","ltrLetterRanges","some","ltrLetterRange","isValidSentenceBeginning","sentenceBeginning","isSentenceStart","token","type","isSentenceEnding","isPartOfPersonInitial","previousToken","nextToken","secondToNextToken","trim","length","tokenizeSmallerThanContent","tokenSentences","currentSentence","localText","substring","tokenizerResult","createTokenizer","tokenize","tokenizer","localSentences","getSentencesFromTokens","tokens","push","shift","lastSentence","pop","forEach","sentence","sentenceEndRegex","match","sentenceDelimiterRegex","sentenceRegex","core","addRule","onText","end","console","error","tokenizer2","endsWithAbbreviation","matchedAbbreviations","lastAbbreviation","endsWith","isValidTagPair","firstToken","lastToken","firstTokenText","lastTokenText","firstTagType","lastTagType","includes","tokenArray","trimSentences","nextSentenceStart","sliced","slice","i","hasNextSentence","nextCharacters","tokenizeResults","isCharacterASpace","getValidSentence","exports"],"sources":["../../../../src/languageProcessing/helpers/sentence/SentenceTokenizer.js"],"sourcesContent":["import { isNaN, isUndefined, map } from \"lodash\";\n\nimport core from \"tokenizer2/core\";\n\nimport { normalize as normalizeQuotes } from \"../sanitize/quotes.js\";\n\nimport abbreviations from \"../../languages/en/config/abbreviations\";\n\nimport createRegexFromArray from \"../regex/createRegexFromArray\";\nimport wordBoundaries from \"../../../config/wordBoundaries\";\n\n// All characters that indicate a sentence delimiter.\nconst fullStop = \".\";\n\nconst fullStopRegex = new RegExp( \"^[\" + fullStop + \"]$\" );\nconst smallerThanContentRegex = /^<[^><]$/;\nconst htmlStartRegex = /^<([^>\\s/]+)[^>]>$/mi;\nconst htmlEndRegex = /^<\\/([^>\\s]+)[^>]>$/mi;\n\nconst blockStartRegex = /^\\s[[({]\\s$/;\nconst blockEndRegex = /^\\s[\\])}]\\s$/;\n\nconst abbreviationsPreparedForRegex = abbreviations.map( ( abbreviation ) => abbreviation.replace( \".\", \"\\\\.\" ) );\nconst abbreviationsRegex = createRegexFromArray( abbreviationsPreparedForRegex );\n\nconst wordBoundariesForRegex = \"(^\|$\|[\" + wordBoundaries().map( ( boundary ) => \"\\\\\" + boundary ).join( \"\" ) + \"])\";\nconst lastCharacterPartOfInitialsRegex = new RegExp( wordBoundariesForRegex + \"[A-Za-z]$\" );\n\n// Constants to be used in isValidTagPair.\n// A regex to get the tag type.\nconst tagTypeRegex = /<\\/?([^\\s]+?)(\\s\|>)/;\n// Semantic tags (as opposed to style tags) are tags that are used to structure the text.\nconst semanticTags = [ \"p\", \"div\", \"h1\", \"h2\", \"h3\", \"h4\", \"h5\", \"h6\", \"span\", \"li\", \"main\" ];\n\n/\n Class for tokenizing a (html) text into sentences.\n /\nexport default class SentenceTokenizer {\n\t/\n\t Constructor\n\t * @constructor\n\t /\n\tconstructor() {\n\t\t/\n * \\u2026 - ellipsis.\n * \\u06D4 - Urdu full stop.\n * \\u061f - Arabic question mark.\n /\n\t\tthis.sentenceDelimiters = \"”〞〟„』›»’‛`\\\"?!\\u2026\\u06d4\\u061f\";\n\t}\n\n\t/\n\t Gets the sentence delimiters.\n\t \n\t @returns {string} The sentence delimiters.\n\t /\n\tgetSentenceDelimiters() {\n\t\treturn this.sentenceDelimiters;\n\t}\n\n\t/\n\t Returns whether or not a certain character is a number.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether or not the character is a capital letter.\n\t /\n\tisNumber( character ) {\n\t\treturn ! isNaN( parseInt( character, 10 ) );\n\t}\n\n\t/\n\t Returns whether or not a given HTML tag is a break tag.\n\t \n\t @param {string} htmlTag The HTML tag to check.\n\t * @returns {boolean} Whether or not the given HTML tag is a break tag.\n\t /\n\tisBreakTag( htmlTag ) {\n\t\treturn /<\\/?br/.test( htmlTag );\n\t}\n\n\t/\n\t Returns whether or not a given character is quotation mark.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether or not the given character is a quotation mark.\n\t /\n\tisQuotation( character ) {\n\t\tcharacter = normalizeQuotes( character );\n\n\t\treturn \"'\" === character \|\|\n\t\t\t\"\\\"\" === character;\n\t}\n\n\t/\n\t A mock definition of this function. This function is only used in extensions for languages that use an ordinal dot.\n\t \n\t @returns {boolean} Always returns false as it is a language specific implementation if a language has an ordinal dot.\n\t /\n\tendsWithOrdinalDot() {\n\t\treturn false;\n\t}\n\n\t/\n\t Returns whether or not a given character is a punctuation mark that can be at the beginning\n\t * of a sentence, like ¿ and ¡ used in Spanish.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether or not the given character is a punctuation mark.\n\t /\n\tisPunctuation( character ) {\n\t\treturn \"¿\" === character \|\|\n\t\t\t\"¡\" === character;\n\t}\n\n\t/\n\t Removes duplicate whitespace from a given text.\n\t \n\t @param {string} text The text with duplicate whitespace.\n\t * @returns {string} The text without duplicate whitespace.\n\t /\n\tremoveDuplicateWhitespace( text ) {\n\t\treturn text.replace( /\\s+/, \" \" );\n\t}\n\n\t/\n\t Returns whether or not a certain character is a capital letter.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether or not the character is a capital letter.\n\t /\n\tisCapitalLetter( character ) {\n\t\treturn character !== character.toLocaleLowerCase();\n\t}\n\n\t/\n\t Checks whether the given character is a smaller than sign.\n\t \n\t This function is used to make sure that tokenizing the content after\n\t * the smaller than sign works as expected.\n\t * E.g. 'A sentence. < Hello world!' = ['A sentence.', '< Hello world!'].\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether the character is a smaller than sign ('<') or not.\n\t /\n\tisSmallerThanSign( character ) {\n\t\treturn character === \"<\";\n\t}\n\n\t/\n\t Retrieves the next two characters from an array with the two next tokens.\n\t \n\t @param {Array} nextTokens The two next tokens. Might be undefined.\n\t * @returns {string} The next two characters.\n\t /\n\tgetNextTwoCharacters( nextTokens ) {\n\t\tlet next = \"\";\n\n\t\tif ( ! isUndefined( nextTokens[ 0 ] ) ) {\n\t\t\tnext += nextTokens[ 0 ].src;\n\t\t}\n\n\t\tif ( ! isUndefined( nextTokens[ 1 ] ) ) {\n\t\t\tnext += nextTokens[ 1 ].src;\n\t\t}\n\n\t\tnext = this.removeDuplicateWhitespace( next );\n\n\t\treturn next;\n\t}\n\n\t/\n\t Checks whether a character is from a language that's written from right to left.\n\t * These languages don't have capital letter forms. Therefore any letter from these languages is a\n\t * potential sentence beginning.\n\t \n\t @param {string} letter The letter to check.\n\t \n\t @returns {boolean} Whether the letter is from an LTR language.\n\t /\n\tisLetterFromSpecificLanguage( letter ) {\n\t\tconst ltrLetterRanges = [\n\t\t\t// Hebrew characters.\n\t\t\t/^[\\u0590-\\u05fe]+$/i,\n\t\t\t// Arabic characters (used for Arabic, Farsi, Urdu).\n\t\t\t/^[\\u0600-\\u06FF]+$/i,\n\t\t\t// Additional Farsi characters.\n\t\t\t/^[\\uFB8A\\u067E\\u0686\\u06AF]+$/i,\n\t\t];\n\n\t\treturn (\n\t\t\tltrLetterRanges.some( ltrLetterRange => ltrLetterRange.test( letter ) )\n\t\t);\n\t}\n\n\t/\n\t Checks if the sentenceBeginning beginning is a valid beginning.\n\t \n\t @param {string} sentenceBeginning The beginning of the sentence to validate.\n\t * @returns {boolean} Returns true if it is a valid beginning, false if it is not.\n\t /\n\tisValidSentenceBeginning( sentenceBeginning ) {\n\t\treturn ( this.isCapitalLetter( sentenceBeginning ) \|\|\n\t\t\t\tthis.isLetterFromSpecificLanguage( sentenceBeginning ) \|\|\n\t\t\t\tthis.isNumber( sentenceBeginning ) \|\|\n\t\t\t\tthis.isQuotation( sentenceBeginning ) \|\|\n\t\t\t\tthis.isPunctuation( sentenceBeginning ) \|\|\n\t\t\t\tthis.isSmallerThanSign( sentenceBeginning ) );\n\t}\n\n\t/\n\t Checks if the token is a valid sentence start.\n\t \n\t @param {Object} token The token to validate.\n\t * @returns {boolean} Returns true if the token is valid sentence start, false if it is not.\n\t /\n\tisSentenceStart( token ) {\n\t\treturn ( ! isUndefined( token ) && (\n\t\t\t\"html-start\" === token.type \|\|\n\t\t\t\"html-end\" === token.type \|\|\n\t\t\t\"block-start\" === token.type\n\t\t) );\n\t}\n\n\t/\n\t Checks if the token is a valid sentence ending. A valid sentence ending is either a full stop or another\n\t * delimiter such as \"?\", \"!\", etc.\n\t \n\t @param {Object} token The token to validate.\n\t * @returns {boolean} Returns true if the token is valid sentence ending, false if it is not.\n\t /\n\tisSentenceEnding( token ) {\n\t\treturn (\n\t\t\t! isUndefined( token ) &&\n\t\t\t( token.type === \"full-stop\" \|\| token.type === \"sentence-delimiter\" )\n\t\t);\n\t}\n\n\t/\n\t Checks if a full stop is part of a person's initials.\n\t \n\t Tests if tokens exist. Then tests if the tokens are of the right type.\n\t * For previous token, it checks if the sentence ends with a single letter.\n\t * For nextToken it checks if it is a single letter.\n\t * Checks if next token is followed by a full stop.\n\t \n\t @param {object} token The current token (must be a full stop).\n\t * @param {object} previousToken The token before the full stop.\n\t * @param {object} nextToken The token following the full stop.\n\t * @param {object} secondToNextToken The second token after the full stop.\n\t * @returns {boolean} True if a full stop is part of a person's initials, False if the full stop is not part of a person's initials.\n\t /\n\tisPartOfPersonInitial( token, previousToken, nextToken, secondToNextToken ) {\n\t\treturn ( ! isUndefined( token ) &&\n\t\t\t! isUndefined( nextToken ) &&\n\t\t\t! isUndefined( secondToNextToken ) &&\n\t\t\t! isUndefined( previousToken ) &&\n\t\t\ttoken.type === \"full-stop\" &&\n\t\t\tpreviousToken.type === \"sentence\" &&\n\t\t\tlastCharacterPartOfInitialsRegex.test( previousToken.src ) &&\n\t\t\tnextToken.type === \"sentence\" &&\n\t\t\tnextToken.src.trim().length === 1 &&\n\t\t\tsecondToNextToken.type === \"full-stop\"\n\t\t);\n\t}\n\n\t/\n\t Tokens that represent a '<', followed by content until it enters another '<' or '>'\n\t * gets another pass by the tokenizer.\n\t \n\t @param {Object} token A token of type 'smaller-than-sign-content'.\n\t * @param {string[]} tokenSentences The current array of found sentences. Sentences may get added by this method.\n\t * @param {string} currentSentence The current sentence. Sentence parts may get appended by this method.\n\t * @returns {{tokenSentences, currentSentence}} The found sentences and the current sentence, appended when necessary.\n\t /\n\ttokenizeSmallerThanContent( token, tokenSentences, currentSentence ) {\n\t\t/\n\t\t\tRemove the '<' from the text, to avoid matching this rule\n\t\t\trecursively again and again.\n\t\t\tWe add it again later on.\n\t\t/\n\t\tconst localText = token.src.substring( 1 );\n\n\t\t// Tokenize the current smaller-than-content token without the first '<'.\n\t\tconst tokenizerResult = this.createTokenizer();\n\t\tthis.tokenize( tokenizerResult.tokenizer, localText );\n\t\tconst localSentences = this.getSentencesFromTokens( tokenizerResult.tokens, false );\n\n\t\tlocalSentences[ 0 ] = isUndefined( localSentences[ 0 ] ) ? \"<\" : \"<\" + localSentences[ 0 ];\n\n\t\t/\n\t\t * When the first sentence has a valid sentence beginning.\n\t\t * Add the currently build sentence to the sentences.\n\t\t * Start building the next sentence.\n\t\t /\n\t\tif ( this.isValidSentenceBeginning( localSentences[ 0 ] ) ) {\n\t\t\ttokenSentences.push( currentSentence );\n\t\t\tcurrentSentence = \"\";\n\t\t}\n\t\tcurrentSentence += localSentences[ 0 ];\n\n\t\tif ( localSentences.length > 1 ) {\n\t\t\t/\n\t\t\t\tThere is a new sentence after the first,\n\t\t\t\tadd and reset the current sentence.\n\t\t\t /\n\t\t\ttokenSentences.push( currentSentence );\n\t\t\tcurrentSentence = \"\";\n\n\t\t\t// Remove the first sentence (we do not need to add it again).\n\t\t\tlocalSentences.shift();\n\t\t\t// Last sentence gets special treatment.\n\t\t\tconst lastSentence = localSentences.pop();\n\n\t\t\t// Add the remaining found sentences.\n\t\t\tlocalSentences.forEach( sentence => {\n\t\t\t\ttokenSentences.push( sentence );\n\t\t\t} );\n\n\t\t\tconst sentenceEndRegex = new RegExp( \"[\" + fullStop + this.getSentenceDelimiters() + \"]$\" );\n\n\t\t\t// Check if the last sentence has a valid sentence ending.\n\t\t\tif ( lastSentence.match( sentenceEndRegex ) ) {\n\t\t\t\t// If so, add it as a sentence.\n\t\t\t\ttokenSentences.push( lastSentence );\n\t\t\t} else {\n\t\t\t\t// If not, start making a new one.\n\t\t\t\tcurrentSentence = lastSentence;\n\t\t\t}\n\t\t}\n\t\treturn {\n\t\t\ttokenSentences,\n\t\t\tcurrentSentence,\n\t\t};\n\t}\n\n\t/\n\t Creates a tokenizer.\n\t \n\t @returns {Object} The tokenizer and the tokens.\n\t /\n\tcreateTokenizer() {\n\t\tconst sentenceDelimiterRegex = new RegExp( \"^[\" + this.getSentenceDelimiters() + \"]$\" );\n\t\tconst sentenceRegex = new RegExp( \"^[^\" + fullStop + this.getSentenceDelimiters() + \"<\\\$\\\$\\\\[\\\\]]+$\" );\n\n\t\tconst tokens = [];\n\t\tconst tokenizer = core( function( token ) {\n\t\t\ttokens.push( token );\n\t\t} );\n\n\t\ttokenizer.addRule( fullStopRegex, \"full-stop\" );\n\t\ttokenizer.addRule( smallerThanContentRegex, \"smaller-than-sign-content\" );\n\t\ttokenizer.addRule( htmlStartRegex, \"html-start\" );\n\t\ttokenizer.addRule( htmlEndRegex, \"html-end\" );\n\t\ttokenizer.addRule( blockStartRegex, \"block-start\" );\n\t\ttokenizer.addRule( blockEndRegex, \"block-end\" );\n\t\ttokenizer.addRule( sentenceDelimiterRegex, \"sentence-delimiter\" );\n\t\ttokenizer.addRule( sentenceRegex, \"sentence\" );\n\n\t\treturn {\n\t\t\ttokenizer,\n\t\t\ttokens,\n\t\t};\n\t}\n\n\t/\n\t Tokenizes the given text using the given tokenizer.\n\t \n\t @param {Object} tokenizer The tokenizer to use.\n\t * @param {string} text The text to tokenize.\n\t * @returns {void}\n\t /\n\ttokenize( tokenizer, text ) {\n\t\ttokenizer.onText( text );\n\n\t\ttry {\n\t\t\ttokenizer.end();\n\t\t} catch ( e ) {\n\t\t\tconsole.error( \"Tokenizer end error:\", e, e.tokenizer2 );\n\t\t}\n\t}\n\n\t/\n\t Checks if a string ends with an abbreviation.\n\t * @param {string} currentSentence A (part of) a sentence.\n\t * @returns {boolean} True if the string ends with an abbreviation that is in abbreviations.js. Otherwise, False.\n\t /\n\tendsWithAbbreviation( currentSentence ) {\n\t\tconst matchedAbbreviations = currentSentence.match( abbreviationsRegex );\n\n\t\tif ( ! matchedAbbreviations ) {\n\t\t\treturn false;\n\t\t}\n\n\t\tconst lastAbbreviation = matchedAbbreviations.pop();\n\t\treturn currentSentence.endsWith( lastAbbreviation );\n\t}\n\n\t/\n\t Checks whether the given tokens are a valid html tag pair.\n\t * Note that this method is not a full html tag validator. It should be replaced with a better solution once the html parser is implemented.\n\t \n\t @param {object} firstToken The first token to check. It is asserted that this token contains/is an opening html tag.\n\t * @param {object} lastToken The last token to check. It is asserted that this token contains/is a closing html tag.\n\t \n\t @returns {boolean} True if the tokens are a valid html tag pair. Otherwise, False.\n\t /\n\tisValidTagPair( firstToken, lastToken ) {\n\t\tconst firstTokenText = firstToken.src;\n\t\tconst lastTokenText = lastToken.src;\n\n\t\t// Get the tag types.\n\t\tconst firstTagType = firstTokenText.match( tagTypeRegex )[ 1 ];\n\t\tconst lastTagType = lastTokenText.match( tagTypeRegex )[ 1 ];\n\n\n\t\t// Check if the tags are the same and if they are a semantic tag (p, div, h1, h2, h3, h4, h5, h6, span).\n\t\treturn firstTagType === lastTagType && semanticTags.includes( firstTagType );\n\t}\n\n\t/\n\t Returns an array of sentences for a given array of tokens, assumes that the text has already been split into blocks.\n\t \n\t @param {Object[]} tokenArray The tokens from the sentence tokenizer.\n\t * @param {boolean} [trimSentences=true] Whether to trim the sentences at the end or not.\n\t \n\t @returns {string[]} A list of sentences.\n\t /\n\tgetSentencesFromTokens( tokenArray, trimSentences = true ) {\n\t\tlet tokenSentences = [], currentSentence = \"\", nextSentenceStart, sliced;\n\n\t\t// Drop the first and last HTML tag if both are present.\n\t\tdo {\n\t\t\tsliced = false;\n\t\t\tconst firstToken = tokenArray[ 0 ];\n\t\t\tconst lastToken = tokenArray[ tokenArray.length - 1 ];\n\n\t\t\tif ( firstToken && lastToken && firstToken.type === \"html-start\" &&\n\t\t\t\tlastToken.type === \"html-end\" && this.isValidTagPair( firstToken, lastToken ) ) {\n\t\t\t\ttokenArray = tokenArray.slice( 1, tokenArray.length - 1 );\n\n\t\t\t\tsliced = true;\n\t\t\t}\n\t\t} while ( sliced && tokenArray.length > 1 );\n\n\t\ttokenArray.forEach( ( token, i ) => {\n\t\t\tlet hasNextSentence, nextCharacters, tokenizeResults;\n\t\t\tconst nextToken = tokenArray[ i + 1 ];\n\t\t\tconst previousToken = tokenArray[ i - 1 ];\n\t\t\tconst secondToNextToken = tokenArray[ i + 2 ];\n\t\t\tnextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );\n\n\t\t\t// For a new sentence we need to check the next two characters.\n\t\t\thasNextSentence = nextCharacters.length >= 2;\n\t\t\tnextSentenceStart = hasNextSentence ? nextCharacters[ 1 ] : \"\";\n\n\t\t\tswitch ( token.type ) {\n\t\t\t\tcase \"html-start\":\n\t\t\t\tcase \"html-end\":\n\t\t\t\t\tif ( this.isBreakTag( token.src ) ) {\n\t\t\t\t\t\ttokenSentences.push( currentSentence );\n\t\t\t\t\t\tcurrentSentence = \"\";\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcurrentSentence += token.src;\n\t\t\t\t\t}\n\t\t\t\t\tbreak;\n\n\t\t\t\tcase \"smaller-than-sign-content\":\n\t\t\t\t\ttokenizeResults = this.tokenizeSmallerThanContent( token, tokenSentences, currentSentence );\n\t\t\t\t\ttokenSentences = tokenizeResults.tokenSentences;\n\t\t\t\t\tcurrentSentence = tokenizeResults.currentSentence;\n\t\t\t\t\tbreak;\n\t\t\t\tcase \"sentence\":\n\t\t\t\t\tcurrentSentence += token.src;\n\t\t\t\t\tbreak;\n\t\t\t\tcase \"sentence-delimiter\":\n\t\t\t\t\tcurrentSentence += token.src;\n\n\t\t\t\t\t/\n\t\t\t\t * Only split text into sentences if:\n\t\t\t\t * the next token is defined, AND\n\t\t\t\t * the next token type is neither \"block-end\" nor \"sentence-delimiter\", AND\n\t\t\t\t * the next token first character is a white space\n\t\t\t\t /\n\t\t\t\t\tif ( ! isUndefined( nextToken ) &&\n\t\t\t\t\t\t\"block-end\" !== nextToken.type &&\n\t\t\t\t\t\t\"sentence-delimiter\" !== nextToken.type &&\n\t\t\t\t\t\tthis.isCharacterASpace( nextToken.src[ 0 ] ) ) {\n\t\t\t\t\t\t// Don't split on quotation marks unless they're preceded by a full stop.\n\t\t\t\t\t\tif ( this.isQuotation( token.src ) && previousToken && previousToken.src !== \".\" ) {\n\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\t}\n\t\t\t\t\t\t/\n\t\t\t\t * Only split on ellipsis or quotation marks when:\n\t\t\t\t\t * a) There is a next sentence, and the next character is a valid sentence beginning preceded by a white space, OR\n\t\t\t\t\t * b) The next token is a sentence start\n\t\t\t\t\t /\n\t\t\t\t\t\tif ( this.isQuotation( token.src ) \|\| token.src === \"…\" ) {\n\t\t\t\t\t\t\tcurrentSentence = this.getValidSentence( hasNextSentence,\n\t\t\t\t\t\t\t\tnextSentenceStart,\n\t\t\t\t\t\t\t\tnextCharacters,\n\t\t\t\t\t\t\t\tnextToken,\n\t\t\t\t\t\t\t\ttokenSentences,\n\t\t\t\t\t\t\t\tcurrentSentence );\n\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\ttokenSentences.push( currentSentence );\n\t\t\t\t\t\t\tcurrentSentence = \"\";\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\tbreak;\n\n\t\t\t\tcase \"full-stop\":\n\t\t\t\t\tcurrentSentence += token.src;\n\t\t\t\t\tnextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );\n\n\t\t\t\t\t// For a new sentence we need to check the next two characters.\n\t\t\t\t\thasNextSentence = nextCharacters.length >= 2;\n\t\t\t\t\tnextSentenceStart = hasNextSentence ? nextCharacters[ 1 ] : \"\";\n\n\t\t\t\t\t// If the current sentence ends with an abbreviation, the full stop does not split the sentence.\n\t\t\t\t\tif ( this.endsWithAbbreviation( currentSentence ) ) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t// It should not split the text if the first character of the potential next sentence is a number.\n\t\t\t\t\tif ( hasNextSentence && this.isNumber( nextCharacters[ 0 ] ) ) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t// If the full stop is part of a person's initials, don't split sentence.\n\t\t\t\t\tif ( this.isPartOfPersonInitial( token, previousToken, nextToken, secondToNextToken ) ) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t// If the full stop is an ordinal dot (in German), then don't break the sentence.\n\t\t\t\t\t// This check should be done after hasNextSentence && this.isNumber( nextCharacters[ 0 ] ) (above).\n\t\t\t\t\t// Because otherwise it could break before that test.\n\t\t\t\t\tif ( this.endsWithOrdinalDot( currentSentence ) ) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t/\n\t\t\t\t\t * Only split on full stop when:\n\t\t\t\t\t * a) There is a next sentence, and the next character is a valid sentence beginning preceded by a white space, OR\n\t\t\t\t\t * b) The next token is a sentence start\n\t\t\t\t\t /\n\t\t\t\t\tcurrentSentence = this.getValidSentence( hasNextSentence,\n\t\t\t\t\t\tnextSentenceStart,\n\t\t\t\t\t\tnextCharacters,\n\t\t\t\t\t\tnextToken,\n\t\t\t\t\t\ttokenSentences,\n\t\t\t\t\t\tcurrentSentence );\n\n\t\t\t\t\tbreak;\n\n\t\t\t\tcase \"block-start\":\n\t\t\t\t\tcurrentSentence += token.src;\n\t\t\t\t\tbreak;\n\n\t\t\t\tcase \"block-end\":\n\t\t\t\t\tcurrentSentence += token.src;\n\n\t\t\t\t\tnextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );\n\n\t\t\t\t\t// For a new sentence we need to check the next two characters.\n\t\t\t\t\thasNextSentence = nextCharacters.length >= 2;\n\t\t\t\t\tnextSentenceStart = hasNextSentence ? nextCharacters[ 0 ] : \"\";\n\n\t\t\t\t\t/ Don't split if:\n\t\t\t\t\t * - The next character is a number. For example: IPv4-numbers.\n\t\t\t\t\t * - The block end is preceded by a valid sentence ending, but not followed by a valid sentence beginning.\n\t\t\t\t\t /\n\t\t\t\t\tif (\n\t\t\t\t\t\thasNextSentence && this.isNumber( nextCharacters[ 0 ] ) \|\|\n\t\t\t\t\t\t( this.isSentenceEnding( previousToken ) &&\n\t\t\t\t\t\t\t( ! ( this.isValidSentenceBeginning( nextSentenceStart ) \|\| this.isSentenceStart( nextToken ) ) ) )\n\t\t\t\t\t) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t/\n\t\t\t\t\t * Split if:\n\t\t\t\t\t * - The block end is preceded by a sentence ending and followed by a valid sentence beginning.\n\t\t\t\t\t /\n\t\t\t\t\tif (\n\t\t\t\t\t\tthis.isSentenceEnding( previousToken ) &&\n\t\t\t\t\t\t( this.isSentenceStart( nextToken ) \|\| this.isValidSentenceBeginning( nextSentenceStart ) )\n\t\t\t\t\t) {\n\t\t\t\t\t\ttokenSentences.push( currentSentence );\n\t\t\t\t\t\tcurrentSentence = \"\";\n\t\t\t\t\t}\n\t\t\t\t\tbreak;\n\t\t\t}\n\t\t} );\n\n\t\tif ( \"\" !== currentSentence ) {\n\t\t\ttokenSentences.push( currentSentence );\n\t\t}\n\n\t\tif ( trimSentences ) {\n\t\t\ttokenSentences = map( tokenSentences, function( sentence ) {\n\t\t\t\treturn sentence.trim();\n\t\t\t} );\n\t\t}\n\n\t\treturn tokenSentences;\n\t}\n\n\t/\n\t Gets the current sentence when:\n\t * a) There is a next sentence, and the next character is a valid sentence beginning preceded by a white space, OR\n\t * b) The next token is a sentence start\n\t \n\t @param {boolean} hasNextSentence Whether the next characters are more than two.\n\t * @param {string} nextSentenceStart The second character of the next characters.\n\t * @param {string} nextCharacters The string values of the next two tokens.\n\t * @param {object} nextToken The next token object.\n\t * @param {array} tokenSentences The array of pushed valid sentences.\n\t * @param {string} currentSentence The current sentence.\n\t \n\t @returns {string} The current sentence.\n\t /\n\tgetValidSentence( hasNextSentence, nextSentenceStart, nextCharacters, nextToken, tokenSentences, currentSentence ) {\n\t\tif ( ( hasNextSentence && this.isValidSentenceBeginning( nextSentenceStart ) && this.isCharacterASpace( nextCharacters[ 0 ] ) ) \|\|\n\t\t\tthis.isSentenceStart( nextToken ) ) {\n\t\t\ttokenSentences.push( currentSentence );\n\t\t\tcurrentSentence = \"\";\n\t\t}\n\t\treturn currentSentence;\n\t}\n\n\t/\n\t Checks if the character is a whitespace.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether the character is a whitespace.\n\t */\n\tisCharacterASpace( character ) {\n\t\treturn /\\s/.test( character );\n\t}\n}\n"],"mappings":";;;;;;AAAA,IAAAA,OAAA,GAAAC,OAAA;AAEA,IAAAC,KAAA,GAAAC,sBAAA,CAAAF,OAAA;AAEA,IAAAG,OAAA,GAAAH,OAAA;AAEA,IAAAI,cAAA,GAAAF,sBAAA,CAAAF,OAAA;AAEA,IAAAK,qBAAA,GAAAH,sBAAA,CAAAF,OAAA;AACA,IAAAM,eAAA,GAAAJ,sBAAA,CAAAF,OAAA;AAA4D,SAAAE,uBAAAK,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AAE5D;AACA,MAAMG,QAAQ,GAAG,GAAG;AAEpB,MAAMC,aAAa,GAAG,IAAIC,MAAM,CAAE,IAAI,GAAGF,QAAQ,GAAG,IAAK,CAAC;AAC1D,MAAMG,uBAAuB,GAAG,WAAW;AAC3C,MAAMC,cAAc,GAAG,uBAAuB;AAC9C,MAAMC,YAAY,GAAG,wBAAwB;AAE7C,MAAMC,eAAe,GAAG,eAAe;AACvC,MAAMC,aAAa,GAAG,gBAAgB;AAEtC,MAAMC,6BAA6B,GAAGC,sBAAa,CAACC,GAAG,CAAIC,YAAY,IAAMA,YAAY,CAACC,OAAO,CAAE,GAAG,EAAE,KAAM,CAAE,CAAC;AACjH,MAAMC,kBAAkB,GAAG,IAAAC,6BAAoB,EAAEN,6BAA8B,CAAC;AAEhF,MAAMO,sBAAsB,GAAG,QAAQ,GAAG,IAAAC,uBAAc,EAAC,CAAC,CAACN,GAAG,CAAIO,QAAQ,IAAM,IAAI,GAAGA,QAAS,CAAC,CAACC,IAAI,CAAE,EAAG,CAAC,GAAG,IAAI;AACnH,MAAMC,gCAAgC,GAAG,IAAIjB,MAAM,CAAEa,sBAAsB,GAAG,WAAY,CAAC;;AAE3F;AACA;AACA,MAAMK,YAAY,GAAG,qBAAqB;AAC1C;AACA,MAAMC,YAAY,GAAG,CAAE,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAE;;AAE7F;AACA;AACA;AACe,MAAMC,iBAAiB,CAAC;EACtC;AACD;AACA;AACA;EACCC,WAAWA,CAAA,EAAG;IACb;AACF;AACA;AACA;AACA;IACE,IAAI,CAACC,kBAAkB,GAAG,kCAAkC;EAC7D;;EAEA;AACD;AACA;AACA;AACA;EACCC,qBAAqBA,CAAA,EAAG;IACvB,OAAO,IAAI,CAACD,kBAAkB;EAC/B;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCE,QAAQA,CAAEC,SAAS,EAAG;IACrB,OAAO,CAAE,IAAAC,aAAK,EAAEC,QAAQ,CAAEF,SAAS,EAAE,EAAG,CAAE,CAAC;EAC5C;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCG,UAAUA,CAAEC,OAAO,EAAG;IACrB,OAAO,QAAQ,CAACC,IAAI,CAAED,OAAQ,CAAC;EAChC;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCE,WAAWA,CAAEN,SAAS,EAAG;IACxBA,SAAS,GAAG,IAAAO,iBAAe,EAAEP,SAAU,CAAC;IAExC,OAAO,GAAG,KAAKA,SAAS,IACvB,IAAI,KAAKA,SAAS;EACpB;;EAEA;AACD;AACA;AACA;AACA;EACCQ,kBAAkBA,CAAA,EAAG;IACpB,OAAO,KAAK;EACb;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;EACCC,aAAaA,CAAET,SAAS,EAAG;IAC1B,OAAO,GAAG,KAAKA,SAAS,IACvB,GAAG,KAAKA,SAAS;EACnB;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCU,yBAAyBA,CAAEC,IAAI,EAAG;IACjC,OAAOA,IAAI,CAAC1B,OAAO,CAAE,KAAK,EAAE,GAAI,CAAC;EAClC;;EAEA;AACD;AACA;AACA;AACA;AACA;EACC2B,eAAeA,CAAEZ,SAAS,EAAG;IAC5B,OAAOA,SAAS,KAAKA,SAAS,CAACa,iBAAiB,CAAC,CAAC;EACnD;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCC,iBAAiBA,CAAEd,SAAS,EAAG;IAC9B,OAAOA,SAAS,KAAK,GAAG;EACzB;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCe,oBAAoBA,CAAEC,UAAU,EAAG;IAClC,IAAIC,IAAI,GAAG,EAAE;IAEb,IAAK,CAAE,IAAAC,mBAAW,EAAEF,UAAU,CAAE,CAAC,CAAG,CAAC,EAAG;MACvCC,IAAI,IAAID,UAAU,CAAE,CAAC,CAAE,CAACG,GAAG;IAC5B;IAEA,IAAK,CAAE,IAAAD,mBAAW,EAAEF,UAAU,CAAE,CAAC,CAAG,CAAC,EAAG;MACvCC,IAAI,IAAID,UAAU,CAAE,CAAC,CAAE,CAACG,GAAG;IAC5B;IAEAF,IAAI,GAAG,IAAI,CAACP,yBAAyB,CAAEO,IAAK,CAAC;IAE7C,OAAOA,IAAI;EACZ;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCG,4BAA4BA,CAAEC,MAAM,EAAG;IACtC,MAAMC,eAAe,GAAG;IACvB;IACA,qBAAqB;IACrB;IACA,qBAAqB;IACrB;IACA,gCAAgC,CAChC;IAED,OACCA,eAAe,CAACC,IAAI,CAAEC,cAAc,IAAIA,cAAc,CAACnB,IAAI,CAAEgB,MAAO,CAAE,CAAC;EAEzE;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCI,wBAAwBA,CAAEC,iBAAiB,EAAG;IAC7C,OAAS,IAAI,CAACd,eAAe,CAAEc,iBAAkB,CAAC,IAChD,IAAI,CAACN,4BAA4B,CAAEM,iBAAkB,CAAC,IACtD,IAAI,CAAC3B,QAAQ,CAAE2B,iBAAkB,CAAC,IAClC,IAAI,CAACpB,WAAW,CAAEoB,iBAAkB,CAAC,IACrC,IAAI,CAACjB,aAAa,CAAEiB,iBAAkB,CAAC,IACvC,IAAI,CAACZ,iBAAiB,CAAEY,iBAAkB,CAAC;EAC9C;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCC,eAAeA,CAAEC,KAAK,EAAG;IACxB,OAAS,CAAE,IAAAV,mBAAW,EAAEU,KAAM,CAAC,KAC9B,YAAY,KAAKA,KAAK,CAACC,IAAI,IAC3B,UAAU,KAAKD,KAAK,CAACC,IAAI,IACzB,aAAa,KAAKD,KAAK,CAACC,IAAI,CAC5B;EACF;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;EACCC,gBAAgBA,CAAEF,KAAK,EAAG;IACzB,OACC,CAAE,IAAAV,mBAAW,EAAEU,KAAM,CAAC,KACpBA,KAAK,CAACC,IAAI,KAAK,WAAW,IAAID,KAAK,CAACC,IAAI,KAAK,oBAAoB,CAAE;EAEvE;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCE,qBAAqBA,CAAEH,KAAK,EAAEI,aAAa,EAAEC,SAAS,EAAEC,iBAAiB,EAAG;IAC3E,OAAS,CAAE,IAAAhB,mBAAW,EAAEU,KAAM,CAAC,IAC9B,CAAE,IAAAV,mBAAW,EAAEe,SAAU,CAAC,IAC1B,CAAE,IAAAf,mBAAW,EAAEgB,iBAAkB,CAAC,IAClC,CAAE,IAAAhB,mBAAW,EAAEc,aAAc,CAAC,IAC9BJ,KAAK,CAACC,IAAI,KAAK,WAAW,IAC1BG,aAAa,CAACH,IAAI,KAAK,UAAU,IACjCrC,gCAAgC,CAACa,IAAI,CAAE2B,aAAa,CAACb,GAAI,CAAC,IAC1Dc,SAAS,CAACJ,IAAI,KAAK,UAAU,IAC7BI,SAAS,CAACd,GAAG,CAACgB,IAAI,CAAC,CAAC,CAACC,MAAM,KAAK,CAAC,IACjCF,iBAAiB,CAACL,IAAI,KAAK,WAAW;EAExC;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCQ,0BAA0BA,CAAET,KAAK,EAAEU,cAAc,EAAEC,eAAe,EAAG;IACpE;AACF;AACA;AACA;AACA;IACE,MAAMC,SAAS,GAAGZ,KAAK,CAACT,GAAG,CAACsB,SAAS,CAAE,CAAE,CAAC;;IAE1C;IACA,MAAMC,eAAe,GAAG,IAAI,CAACC,eAAe,CAAC,CAAC;IAC9C,IAAI,CAACC,QAAQ,CAAEF,eAAe,CAACG,SAAS,EAAEL,SAAU,CAAC;IACrD,MAAMM,cAAc,GAAG,IAAI,CAACC,sBAAsB,CAAEL,eAAe,CAACM,MAAM,EAAE,KAAM,CAAC;IAEnFF,cAAc,CAAE,CAAC,CAAE,GAAG,IAAA5B,mBAAW,EAAE4B,cAAc,CAAE,CAAC,CAAG,CAAC,GAAG,GAAG,GAAG,GAAG,GAAGA,cAAc,CAAE,CAAC,CAAE;;IAE1F;AACF;AACA;AACA;AACA;IACE,IAAK,IAAI,CAACrB,wBAAwB,CAAEqB,cAAc,CAAE,CAAC,CAAG,CAAC,EAAG;MAC3DR,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;MACtCA,eAAe,GAAG,EAAE;IACrB;IACAA,eAAe,IAAIO,cAAc,CAAE,CAAC,CAAE;IAEtC,IAAKA,cAAc,CAACV,MAAM,GAAG,CAAC,EAAG;MAChC;AACH;AACA;AACA;MACGE,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;MACtCA,eAAe,GAAG,EAAE;;MAEpB;MACAO,cAAc,CAACI,KAAK,CAAC,CAAC;MACtB;MACA,MAAMC,YAAY,GAAGL,cAAc,CAACM,GAAG,CAAC,CAAC;;MAEzC;MACAN,cAAc,CAACO,OAAO,CAAEC,QAAQ,IAAI;QACnChB,cAAc,CAACW,IAAI,CAAEK,QAAS,CAAC;MAChC,CAAE,CAAC;MAEH,MAAMC,gBAAgB,GAAG,IAAIhF,MAAM,CAAE,GAAG,GAAGF,QAAQ,GAAG,IAAI,CAACyB,qBAAqB,CAAC,CAAC,GAAG,IAAK,CAAC;;MAE3F;MACA,IAAKqD,YAAY,CAACK,KAAK,CAAED,gBAAiB,CAAC,EAAG;QAC7C;QACAjB,cAAc,CAACW,IAAI,CAAEE,YAAa,CAAC;MACpC,CAAC,MAAM;QACN;QACAZ,eAAe,GAAGY,YAAY;MAC/B;IACD;IACA,OAAO;MACNb,cAAc;MACdC;IACD,CAAC;EACF;;EAEA;AACD;AACA;AACA;AACA;EACCI,eAAeA,CAAA,EAAG;IACjB,MAAMc,sBAAsB,GAAG,IAAIlF,MAAM,CAAE,IAAI,GAAG,IAAI,CAACuB,qBAAqB,CAAC,CAAC,GAAG,IAAK,CAAC;IACvF,MAAM4D,aAAa,GAAG,IAAInF,MAAM,CAAE,KAAK,GAAGF,QAAQ,GAAG,IAAI,CAACyB,qBAAqB,CAAC,CAAC,GAAG,kBAAmB,CAAC;IAExG,MAAMkD,MAAM,GAAG,EAAE;IACjB,MAAMH,SAAS,GAAG,IAAAc,aAAI,EAAE,UAAU/B,KAAK,EAAG;MACzCoB,MAAM,CAACC,IAAI,CAAErB,KAAM,CAAC;IACrB,CAAE,CAAC;IAEHiB,SAAS,CAACe,OAAO,CAAEtF,aAAa,EAAE,WAAY,CAAC;IAC/CuE,SAAS,CAACe,OAAO,CAAEpF,uBAAuB,EAAE,2BAA4B,CAAC;IACzEqE,SAAS,CAACe,OAAO,CAAEnF,cAAc,EAAE,YAAa,CAAC;IACjDoE,SAAS,CAACe,OAAO,CAAElF,YAAY,EAAE,UAAW,CAAC;IAC7CmE,SAAS,CAACe,OAAO,CAAEjF,eAAe,EAAE,aAAc,CAAC;IACnDkE,SAAS,CAACe,OAAO,CAAEhF,aAAa,EAAE,WAAY,CAAC;IAC/CiE,SAAS,CAACe,OAAO,CAAEH,sBAAsB,EAAE,oBAAqB,CAAC;IACjEZ,SAAS,CAACe,OAAO,CAAEF,aAAa,EAAE,UAAW,CAAC;IAE9C,OAAO;MACNb,SAAS;MACTG;IACD,CAAC;EACF;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;EACCJ,QAAQA,CAAEC,SAAS,EAAElC,IAAI,EAAG;IAC3BkC,SAAS,CAACgB,MAAM,CAAElD,IAAK,CAAC;IAExB,IAAI;MACHkC,SAAS,CAACiB,GAAG,CAAC,CAAC;IAChB,CAAC,CAAC,OAAQ5F,CAAC,EAAG;MACb6F,OAAO,CAACC,KAAK,CAAE,sBAAsB,EAAE9F,CAAC,EAAEA,CAAC,CAAC+F,UAAW,CAAC;IACzD;EACD;;EAEA;AACD;AACA;AACA;AACA;EACCC,oBAAoBA,CAAE3B,eAAe,EAAG;IACvC,MAAM4B,oBAAoB,GAAG5B,eAAe,CAACiB,KAAK,CAAEtE,kBAAmB,CAAC;IAExE,IAAK,CAAEiF,oBAAoB,EAAG;MAC7B,OAAO,KAAK;IACb;IAEA,MAAMC,gBAAgB,GAAGD,oBAAoB,CAACf,GAAG,CAAC,CAAC;IACnD,OAAOb,eAAe,CAAC8B,QAAQ,CAAED,gBAAiB,CAAC;EACpD;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCE,cAAcA,CAAEC,UAAU,EAAEC,SAAS,EAAG;IACvC,MAAMC,cAAc,GAAGF,UAAU,CAACpD,GAAG;IACrC,MAAMuD,aAAa,GAAGF,SAAS,CAACrD,GAAG;;IAEnC;IACA,MAAMwD,YAAY,GAAGF,cAAc,CAACjB,KAAK,CAAE/D,YAAa,CAAC,CAAE,CAAC,CAAE;IAC9D,MAAMmF,WAAW,GAAIF,aAAa,CAAClB,KAAK,CAAE/D,YAAa,CAAC,CAAE,CAAC,CAAE;;IAG7D;IACA,OAAOkF,YAAY,KAAKC,WAAW,IAAIlF,YAAY,CAACmF,QAAQ,CAAEF,YAAa,CAAC;EAC7E;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;EACC5B,sBAAsBA,CAAE+B,UAAU,EAAEC,aAAa,GAAG,IAAI,EAAG;IAC1D,IAAIzC,cAAc,GAAG,EAAE;MAAEC,eAAe,GAAG,EAAE;MAAEyC,iBAAiB;MAAEC,MAAM;;IAExE;IACA,GAAG;MACFA,MAAM,GAAG,KAAK;MACd,MAAMV,UAAU,GAAGO,UAAU,CAAE,CAAC,CAAE;MAClC,MAAMN,SAAS,GAAGM,UAAU,CAAEA,UAAU,CAAC1C,MAAM,GAAG,CAAC,CAAE;MAErD,IAAKmC,UAAU,IAAIC,SAAS,IAAID,UAAU,CAAC1C,IAAI,KAAK,YAAY,IAC/D2C,SAAS,CAAC3C,IAAI,KAAK,UAAU,IAAI,IAAI,CAACyC,cAAc,CAAEC,UAAU,EAAEC,SAAU,CAAC,EAAG;QAChFM,UAAU,GAAGA,UAAU,CAACI,KAAK,CAAE,CAAC,EAAEJ,UAAU,CAAC1C,MAAM,GAAG,CAAE,CAAC;QAEzD6C,MAAM,GAAG,IAAI;MACd;IACD,CAAC,QAASA,MAAM,IAAIH,UAAU,CAAC1C,MAAM,GAAG,CAAC;IAEzC0C,UAAU,CAACzB,OAAO,CAAE,CAAEzB,KAAK,EAAEuD,CAAC,KAAM;MACnC,IAAIC,eAAe,EAAEC,cAAc,EAAEC,eAAe;MACpD,MAAMrD,SAAS,GAAG6C,UAAU,CAAEK,CAAC,GAAG,CAAC,CAAE;MACrC,MAAMnD,aAAa,GAAG8C,UAAU,CAAEK,CAAC,GAAG,CAAC,CAAE;MACzC,MAAMjD,iBAAiB,GAAG4C,UAAU,CAAEK,CAAC,GAAG,CAAC,CAAE;MAC7CE,cAAc,GAAG,IAAI,CAACtE,oBAAoB,CAAE,CAAEkB,SAAS,EAAEC,iBAAiB,CAAG,CAAC;;MAE9E;MACAkD,eAAe,GAAGC,cAAc,CAACjD,MAAM,IAAI,CAAC;MAC5C4C,iBAAiB,GAAGI,eAAe,GAAGC,cAAc,CAAE,CAAC,CAAE,GAAG,EAAE;MAE9D,QAASzD,KAAK,CAACC,IAAI;QAClB,KAAK,YAAY;QACjB,KAAK,UAAU;UACd,IAAK,IAAI,CAAC1B,UAAU,CAAEyB,KAAK,CAACT,GAAI,CAAC,EAAG;YACnCmB,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;YACtCA,eAAe,GAAG,EAAE;UACrB,CAAC,MAAM;YACNA,eAAe,IAAIX,KAAK,CAACT,GAAG;UAC7B;UACA;QAED,KAAK,2BAA2B;UAC/BmE,eAAe,GAAG,IAAI,CAACjD,0BAA0B,CAAET,KAAK,EAAEU,cAAc,EAAEC,eAAgB,CAAC;UAC3FD,cAAc,GAAGgD,eAAe,CAAChD,cAAc;UAC/CC,eAAe,GAAG+C,eAAe,CAAC/C,eAAe;UACjD;QACD,KAAK,UAAU;UACdA,eAAe,IAAIX,KAAK,CAACT,GAAG;UAC5B;QACD,KAAK,oBAAoB;UACxBoB,eAAe,IAAIX,KAAK,CAACT,GAAG;;UAE5B;AACL;AACA;AACA;AACA;AACA;UACK,IAAK,CAAE,IAAAD,mBAAW,EAAEe,SAAU,CAAC,IAC9B,WAAW,KAAKA,SAAS,CAACJ,IAAI,IAC9B,oBAAoB,KAAKI,SAAS,CAACJ,IAAI,IACvC,IAAI,CAAC0D,iBAAiB,CAAEtD,SAAS,CAACd,GAAG,CAAE,CAAC,CAAG,CAAC,EAAG;YAC/C;YACA,IAAK,IAAI,CAACb,WAAW,CAAEsB,KAAK,CAACT,GAAI,CAAC,IAAIa,aAAa,IAAIA,aAAa,CAACb,GAAG,KAAK,GAAG,EAAG;cAClF;YACD;YACA;AACN;AACA;AACA;AACA;YACM,IAAK,IAAI,CAACb,WAAW,CAAEsB,KAAK,CAACT,GAAI,CAAC,IAAIS,KAAK,CAACT,GAAG,KAAK,GAAG,EAAG;cACzDoB,eAAe,GAAG,IAAI,CAACiD,gBAAgB,CAAEJ,eAAe,EACvDJ,iBAAiB,EACjBK,cAAc,EACdpD,SAAS,EACTK,cAAc,EACdC,eAAgB,CAAC;YACnB,CAAC,MAAM;cACND,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;cACtCA,eAAe,GAAG,EAAE;YACrB;UACD;UACA;QAED,KAAK,WAAW;UACfA,eAAe,IAAIX,KAAK,CAACT,GAAG;UAC5BkE,cAAc,GAAG,IAAI,CAACtE,oBAAoB,CAAE,CAAEkB,SAAS,EAAEC,iBAAiB,CAAG,CAAC;;UAE9E;UACAkD,eAAe,GAAGC,cAAc,CAACjD,MAAM,IAAI,CAAC;UAC5C4C,iBAAiB,GAAGI,eAAe,GAAGC,cAAc,CAAE,CAAC,CAAE,GAAG,EAAE;;UAE9D;UACA,IAAK,IAAI,CAACnB,oBAAoB,CAAE3B,eAAgB,CAAC,EAAG;YACnD;UACD;;UAEA;UACA,IAAK6C,eAAe,IAAI,IAAI,CAACrF,QAAQ,CAAEsF,cAAc,CAAE,CAAC,CAAG,CAAC,EAAG;YAC9D;UACD;;UAEA;UACA,IAAK,IAAI,CAACtD,qBAAqB,CAAEH,KAAK,EAAEI,aAAa,EAAEC,SAAS,EAAEC,iBAAkB,CAAC,EAAG;YACvF;UACD;;UAEA;UACA;UACA;UACA,IAAK,IAAI,CAAC1B,kBAAkB,CAAE+B,eAAgB,CAAC,EAAG;YACjD;UACD;;UAEA;AACL;AACA;AACA;AACA;UACKA,eAAe,GAAG,IAAI,CAACiD,gBAAgB,CAAEJ,eAAe,EACvDJ,iBAAiB,EACjBK,cAAc,EACdpD,SAAS,EACTK,cAAc,EACdC,eAAgB,CAAC;UAElB;QAED,KAAK,aAAa;UACjBA,eAAe,IAAIX,KAAK,CAACT,GAAG;UAC5B;QAED,KAAK,WAAW;UACfoB,eAAe,IAAIX,KAAK,CAACT,GAAG;UAE5BkE,cAAc,GAAG,IAAI,CAACtE,oBAAoB,CAAE,CAAEkB,SAAS,EAAEC,iBAAiB,CAAG,CAAC;;UAE9E;UACAkD,eAAe,GAAGC,cAAc,CAACjD,MAAM,IAAI,CAAC;UAC5C4C,iBAAiB,GAAGI,eAAe,GAAGC,cAAc,CAAE,CAAC,CAAE,GAAG,EAAE;;UAE9D;AACL;AACA;AACA;UACK,IACCD,eAAe,IAAI,IAAI,CAACrF,QAAQ,CAAEsF,cAAc,CAAE,CAAC,CAAG,CAAC,IACrD,IAAI,CAACvD,gBAAgB,CAAEE,aAAc,CAAC,IACrC,EAAI,IAAI,CAACP,wBAAwB,CAAEuD,iBAAkB,CAAC,IAAI,IAAI,CAACrD,eAAe,CAAEM,SAAU,CAAC,CAAM,EACnG;YACD;UACD;;UAEA;AACL;AACA;AACA;UACK,IACC,IAAI,CAACH,gBAAgB,CAAEE,aAAc,CAAC,KACpC,IAAI,CAACL,eAAe,CAAEM,SAAU,CAAC,IAAI,IAAI,CAACR,wBAAwB,CAAEuD,iBAAkB,CAAC,CAAE,EAC1F;YACD1C,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;YACtCA,eAAe,GAAG,EAAE;UACrB;UACA;MACF;IACD,CAAE,CAAC;IAEH,IAAK,EAAE,KAAKA,eAAe,EAAG;MAC7BD,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;IACvC;IAEA,IAAKwC,aAAa,EAAG;MACpBzC,cAAc,GAAG,IAAAvD,WAAG,EAAEuD,cAAc,EAAE,UAAUgB,QAAQ,EAAG;QAC1D,OAAOA,QAAQ,CAACnB,IAAI,CAAC,CAAC;MACvB,CAAE,CAAC;IACJ;IAEA,OAAOG,cAAc;EACtB;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCkD,gBAAgBA,CAAEJ,eAAe,EAAEJ,iBAAiB,EAAEK,cAAc,EAAEpD,SAAS,EAAEK,cAAc,EAAEC,eAAe,EAAG;IAClH,IAAO6C,eAAe,IAAI,IAAI,CAAC3D,wBAAwB,CAAEuD,iBAAkB,CAAC,IAAI,IAAI,CAACO,iBAAiB,CAAEF,cAAc,CAAE,CAAC,CAAG,CAAC,IAC5H,IAAI,CAAC1D,eAAe,CAAEM,SAAU,CAAC,EAAG;MACpCK,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;MACtCA,eAAe,GAAG,EAAE;IACrB;IACA,OAAOA,eAAe;EACvB;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCgD,iBAAiBA,CAAEvF,SAAS,EAAG;IAC9B,OAAO,IAAI,CAACK,IAAI,CAAEL,SAAU,CAAC;EAC9B;AACD;AAACyF,OAAA,CAAArH,OAAA,GAAAuB,iBAAA","ignoreList":[]}
1	+ {"version":3,"file":"SentenceTokenizer.js","names":["isNaN","isUndefined","map","core","normalize","normalizeQuotes","abbreviations","createRegexFromArray","wordBoundaries","fullStop","fullStopRegex","RegExp","smallerThanContentRegex","htmlStartRegex","htmlEndRegex","blockStartRegex","blockEndRegex","abbreviationsPreparedForRegex","abbreviation","replace","abbreviationsRegex","wordBoundariesForRegex","boundary","join","lastCharacterPartOfInitialsRegex","tagTypeRegex","semanticTags","SentenceTokenizer","constructor","sentenceDelimiters","getSentenceDelimiters","isNumber","character","parseInt","isBreakTag","htmlTag","test","isQuotation","endsWithOrdinalDot","isPunctuation","removeDuplicateWhitespace","text","isCapitalLetter","toLocaleLowerCase","isSmallerThanSign","getNextTwoCharacters","nextTokens","next","src","isLetterFromSpecificLanguage","letter","ltrLetterRanges","some","ltrLetterRange","isValidSentenceBeginning","sentenceBeginning","isSentenceStart","token","type","isSentenceEnding","isPartOfPersonInitial","previousToken","nextToken","secondToNextToken","trim","length","tokenizeSmallerThanContent","tokenSentences","currentSentence","localText","substring","tokenizerResult","createTokenizer","tokenize","tokenizer","localSentences","getSentencesFromTokens","tokens","push","shift","lastSentence","pop","forEach","sentence","sentenceEndRegex","match","sentenceDelimiterRegex","sentenceRegex","addRule","onText","end","e","console","error","tokenizer2","endsWithAbbreviation","matchedAbbreviations","lastAbbreviation","endsWith","isValidTagPair","firstToken","lastToken","firstTokenText","lastTokenText","firstTagType","lastTagType","includes","tokenArray","trimSentences","nextSentenceStart","sliced","slice","i","hasNextSentence","nextCharacters","tokenizeResults","isCharacterASpace","getValidSentence"],"sources":["../../../../src/languageProcessing/helpers/sentence/SentenceTokenizer.js"],"sourcesContent":["import { isNaN, isUndefined, map } from \"lodash\";\n\nimport core from \"tokenizer2/core\";\n\nimport { normalize as normalizeQuotes } from \"../sanitize/quotes.js\";\n\nimport abbreviations from \"../../languages/en/config/abbreviations\";\n\nimport createRegexFromArray from \"../regex/createRegexFromArray\";\nimport wordBoundaries from \"../../../config/wordBoundaries\";\n\n// All characters that indicate a sentence delimiter.\nconst fullStop = \".\";\n\nconst fullStopRegex = new RegExp( \"^[\" + fullStop + \"]$\" );\nconst smallerThanContentRegex = /^<[^><]$/;\nconst htmlStartRegex = /^<([^>\\s/]+)[^>]>$/mi;\nconst htmlEndRegex = /^<\\/([^>\\s]+)[^>]>$/mi;\n\nconst blockStartRegex = /^\\s[[({]\\s$/;\nconst blockEndRegex = /^\\s[\\])}]\\s$/;\n\nconst abbreviationsPreparedForRegex = abbreviations.map( ( abbreviation ) => abbreviation.replace( \".\", \"\\\\.\" ) );\nconst abbreviationsRegex = createRegexFromArray( abbreviationsPreparedForRegex );\n\nconst wordBoundariesForRegex = \"(^\|$\|[\" + wordBoundaries().map( ( boundary ) => \"\\\\\" + boundary ).join( \"\" ) + \"])\";\nconst lastCharacterPartOfInitialsRegex = new RegExp( wordBoundariesForRegex + \"[A-Za-z]$\" );\n\n// Constants to be used in isValidTagPair.\n// A regex to get the tag type.\nconst tagTypeRegex = /<\\/?([^\\s]+?)(\\s\|>)/;\n// Semantic tags (as opposed to style tags) are tags that are used to structure the text.\nconst semanticTags = [ \"p\", \"div\", \"h1\", \"h2\", \"h3\", \"h4\", \"h5\", \"h6\", \"span\", \"li\", \"main\" ];\n\n/\n Class for tokenizing a (html) text into sentences.\n /\nexport default class SentenceTokenizer {\n\t/\n\t Constructor\n\t * @constructor\n\t /\n\tconstructor() {\n\t\t/\n * \\u2026 - ellipsis.\n * \\u06D4 - Urdu full stop.\n * \\u061f - Arabic question mark.\n /\n\t\tthis.sentenceDelimiters = \"”〞〟„』›»’‛`\\\"?!\\u2026\\u06d4\\u061f\";\n\t}\n\n\t/\n\t Gets the sentence delimiters.\n\t \n\t @returns {string} The sentence delimiters.\n\t /\n\tgetSentenceDelimiters() {\n\t\treturn this.sentenceDelimiters;\n\t}\n\n\t/\n\t Returns whether or not a certain character is a number.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether or not the character is a capital letter.\n\t /\n\tisNumber( character ) {\n\t\treturn ! isNaN( parseInt( character, 10 ) );\n\t}\n\n\t/\n\t Returns whether or not a given HTML tag is a break tag.\n\t \n\t @param {string} htmlTag The HTML tag to check.\n\t * @returns {boolean} Whether or not the given HTML tag is a break tag.\n\t /\n\tisBreakTag( htmlTag ) {\n\t\treturn /<\\/?br/.test( htmlTag );\n\t}\n\n\t/\n\t Returns whether or not a given character is quotation mark.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether or not the given character is a quotation mark.\n\t /\n\tisQuotation( character ) {\n\t\tcharacter = normalizeQuotes( character );\n\n\t\treturn \"'\" === character \|\|\n\t\t\t\"\\\"\" === character;\n\t}\n\n\t/\n\t A mock definition of this function. This function is only used in extensions for languages that use an ordinal dot.\n\t \n\t @returns {boolean} Always returns false as it is a language specific implementation if a language has an ordinal dot.\n\t /\n\tendsWithOrdinalDot() {\n\t\treturn false;\n\t}\n\n\t/\n\t Returns whether or not a given character is a punctuation mark that can be at the beginning\n\t * of a sentence, like ¿ and ¡ used in Spanish.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether or not the given character is a punctuation mark.\n\t /\n\tisPunctuation( character ) {\n\t\treturn \"¿\" === character \|\|\n\t\t\t\"¡\" === character;\n\t}\n\n\t/\n\t Removes duplicate whitespace from a given text.\n\t \n\t @param {string} text The text with duplicate whitespace.\n\t * @returns {string} The text without duplicate whitespace.\n\t /\n\tremoveDuplicateWhitespace( text ) {\n\t\treturn text.replace( /\\s+/, \" \" );\n\t}\n\n\t/\n\t Returns whether or not a certain character is a capital letter.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether or not the character is a capital letter.\n\t /\n\tisCapitalLetter( character ) {\n\t\treturn character !== character.toLocaleLowerCase();\n\t}\n\n\t/\n\t Checks whether the given character is a smaller than sign.\n\t \n\t This function is used to make sure that tokenizing the content after\n\t * the smaller than sign works as expected.\n\t * E.g. 'A sentence. < Hello world!' = ['A sentence.', '< Hello world!'].\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether the character is a smaller than sign ('<') or not.\n\t /\n\tisSmallerThanSign( character ) {\n\t\treturn character === \"<\";\n\t}\n\n\t/\n\t Retrieves the next two characters from an array with the two next tokens.\n\t \n\t @param {Array} nextTokens The two next tokens. Might be undefined.\n\t * @returns {string} The next two characters.\n\t /\n\tgetNextTwoCharacters( nextTokens ) {\n\t\tlet next = \"\";\n\n\t\tif ( ! isUndefined( nextTokens[ 0 ] ) ) {\n\t\t\tnext += nextTokens[ 0 ].src;\n\t\t}\n\n\t\tif ( ! isUndefined( nextTokens[ 1 ] ) ) {\n\t\t\tnext += nextTokens[ 1 ].src;\n\t\t}\n\n\t\tnext = this.removeDuplicateWhitespace( next );\n\n\t\treturn next;\n\t}\n\n\t/\n\t Checks whether a character is from a language that's written from right to left.\n\t * These languages don't have capital letter forms. Therefore any letter from these languages is a\n\t * potential sentence beginning.\n\t \n\t @param {string} letter The letter to check.\n\t \n\t @returns {boolean} Whether the letter is from an LTR language.\n\t /\n\tisLetterFromSpecificLanguage( letter ) {\n\t\tconst ltrLetterRanges = [\n\t\t\t// Hebrew characters.\n\t\t\t/^[\\u0590-\\u05fe]+$/i,\n\t\t\t// Arabic characters (used for Arabic, Farsi, Urdu).\n\t\t\t/^[\\u0600-\\u06FF]+$/i,\n\t\t\t// Additional Farsi characters.\n\t\t\t/^[\\uFB8A\\u067E\\u0686\\u06AF]+$/i,\n\t\t];\n\n\t\treturn (\n\t\t\tltrLetterRanges.some( ltrLetterRange => ltrLetterRange.test( letter ) )\n\t\t);\n\t}\n\n\t/\n\t Checks if the sentenceBeginning beginning is a valid beginning.\n\t \n\t @param {string} sentenceBeginning The beginning of the sentence to validate.\n\t * @returns {boolean} Returns true if it is a valid beginning, false if it is not.\n\t /\n\tisValidSentenceBeginning( sentenceBeginning ) {\n\t\treturn ( this.isCapitalLetter( sentenceBeginning ) \|\|\n\t\t\t\tthis.isLetterFromSpecificLanguage( sentenceBeginning ) \|\|\n\t\t\t\tthis.isNumber( sentenceBeginning ) \|\|\n\t\t\t\tthis.isQuotation( sentenceBeginning ) \|\|\n\t\t\t\tthis.isPunctuation( sentenceBeginning ) \|\|\n\t\t\t\tthis.isSmallerThanSign( sentenceBeginning ) );\n\t}\n\n\t/\n\t Checks if the token is a valid sentence start.\n\t \n\t @param {Object} token The token to validate.\n\t * @returns {boolean} Returns true if the token is valid sentence start, false if it is not.\n\t /\n\tisSentenceStart( token ) {\n\t\treturn ( ! isUndefined( token ) && (\n\t\t\t\"html-start\" === token.type \|\|\n\t\t\t\"html-end\" === token.type \|\|\n\t\t\t\"block-start\" === token.type\n\t\t) );\n\t}\n\n\t/\n\t Checks if the token is a valid sentence ending. A valid sentence ending is either a full stop or another\n\t * delimiter such as \"?\", \"!\", etc.\n\t \n\t @param {Object} token The token to validate.\n\t * @returns {boolean} Returns true if the token is valid sentence ending, false if it is not.\n\t /\n\tisSentenceEnding( token ) {\n\t\treturn (\n\t\t\t! isUndefined( token ) &&\n\t\t\t( token.type === \"full-stop\" \|\| token.type === \"sentence-delimiter\" )\n\t\t);\n\t}\n\n\t/\n\t Checks if a full stop is part of a person's initials.\n\t \n\t Tests if tokens exist. Then tests if the tokens are of the right type.\n\t * For previous token, it checks if the sentence ends with a single letter.\n\t * For nextToken it checks if it is a single letter.\n\t * Checks if next token is followed by a full stop.\n\t \n\t @param {object} token The current token (must be a full stop).\n\t * @param {object} previousToken The token before the full stop.\n\t * @param {object} nextToken The token following the full stop.\n\t * @param {object} secondToNextToken The second token after the full stop.\n\t * @returns {boolean} True if a full stop is part of a person's initials, False if the full stop is not part of a person's initials.\n\t /\n\tisPartOfPersonInitial( token, previousToken, nextToken, secondToNextToken ) {\n\t\treturn ( ! isUndefined( token ) &&\n\t\t\t! isUndefined( nextToken ) &&\n\t\t\t! isUndefined( secondToNextToken ) &&\n\t\t\t! isUndefined( previousToken ) &&\n\t\t\ttoken.type === \"full-stop\" &&\n\t\t\tpreviousToken.type === \"sentence\" &&\n\t\t\tlastCharacterPartOfInitialsRegex.test( previousToken.src ) &&\n\t\t\tnextToken.type === \"sentence\" &&\n\t\t\tnextToken.src.trim().length === 1 &&\n\t\t\tsecondToNextToken.type === \"full-stop\"\n\t\t);\n\t}\n\n\t/\n\t Tokens that represent a '<', followed by content until it enters another '<' or '>'\n\t * gets another pass by the tokenizer.\n\t \n\t @param {Object} token A token of type 'smaller-than-sign-content'.\n\t * @param {string[]} tokenSentences The current array of found sentences. Sentences may get added by this method.\n\t * @param {string} currentSentence The current sentence. Sentence parts may get appended by this method.\n\t * @returns {{tokenSentences, currentSentence}} The found sentences and the current sentence, appended when necessary.\n\t /\n\ttokenizeSmallerThanContent( token, tokenSentences, currentSentence ) {\n\t\t/\n\t\t\tRemove the '<' from the text, to avoid matching this rule\n\t\t\trecursively again and again.\n\t\t\tWe add it again later on.\n\t\t/\n\t\tconst localText = token.src.substring( 1 );\n\n\t\t// Tokenize the current smaller-than-content token without the first '<'.\n\t\tconst tokenizerResult = this.createTokenizer();\n\t\tthis.tokenize( tokenizerResult.tokenizer, localText );\n\t\tconst localSentences = this.getSentencesFromTokens( tokenizerResult.tokens, false );\n\n\t\tlocalSentences[ 0 ] = isUndefined( localSentences[ 0 ] ) ? \"<\" : \"<\" + localSentences[ 0 ];\n\n\t\t/\n\t\t * When the first sentence has a valid sentence beginning.\n\t\t * Add the currently build sentence to the sentences.\n\t\t * Start building the next sentence.\n\t\t /\n\t\tif ( this.isValidSentenceBeginning( localSentences[ 0 ] ) ) {\n\t\t\ttokenSentences.push( currentSentence );\n\t\t\tcurrentSentence = \"\";\n\t\t}\n\t\tcurrentSentence += localSentences[ 0 ];\n\n\t\tif ( localSentences.length > 1 ) {\n\t\t\t/\n\t\t\t\tThere is a new sentence after the first,\n\t\t\t\tadd and reset the current sentence.\n\t\t\t /\n\t\t\ttokenSentences.push( currentSentence );\n\t\t\tcurrentSentence = \"\";\n\n\t\t\t// Remove the first sentence (we do not need to add it again).\n\t\t\tlocalSentences.shift();\n\t\t\t// Last sentence gets special treatment.\n\t\t\tconst lastSentence = localSentences.pop();\n\n\t\t\t// Add the remaining found sentences.\n\t\t\tlocalSentences.forEach( sentence => {\n\t\t\t\ttokenSentences.push( sentence );\n\t\t\t} );\n\n\t\t\tconst sentenceEndRegex = new RegExp( \"[\" + fullStop + this.getSentenceDelimiters() + \"]$\" );\n\n\t\t\t// Check if the last sentence has a valid sentence ending.\n\t\t\tif ( lastSentence.match( sentenceEndRegex ) ) {\n\t\t\t\t// If so, add it as a sentence.\n\t\t\t\ttokenSentences.push( lastSentence );\n\t\t\t} else {\n\t\t\t\t// If not, start making a new one.\n\t\t\t\tcurrentSentence = lastSentence;\n\t\t\t}\n\t\t}\n\t\treturn {\n\t\t\ttokenSentences,\n\t\t\tcurrentSentence,\n\t\t};\n\t}\n\n\t/\n\t Creates a tokenizer.\n\t \n\t @returns {Object} The tokenizer and the tokens.\n\t /\n\tcreateTokenizer() {\n\t\tconst sentenceDelimiterRegex = new RegExp( \"^[\" + this.getSentenceDelimiters() + \"]$\" );\n\t\tconst sentenceRegex = new RegExp( \"^[^\" + fullStop + this.getSentenceDelimiters() + \"<\\\$\\\$\\\\[\\\\]]+$\" );\n\n\t\tconst tokens = [];\n\t\tconst tokenizer = core( function( token ) {\n\t\t\ttokens.push( token );\n\t\t} );\n\n\t\ttokenizer.addRule( fullStopRegex, \"full-stop\" );\n\t\ttokenizer.addRule( smallerThanContentRegex, \"smaller-than-sign-content\" );\n\t\ttokenizer.addRule( htmlStartRegex, \"html-start\" );\n\t\ttokenizer.addRule( htmlEndRegex, \"html-end\" );\n\t\ttokenizer.addRule( blockStartRegex, \"block-start\" );\n\t\ttokenizer.addRule( blockEndRegex, \"block-end\" );\n\t\ttokenizer.addRule( sentenceDelimiterRegex, \"sentence-delimiter\" );\n\t\ttokenizer.addRule( sentenceRegex, \"sentence\" );\n\n\t\treturn {\n\t\t\ttokenizer,\n\t\t\ttokens,\n\t\t};\n\t}\n\n\t/\n\t Tokenizes the given text using the given tokenizer.\n\t \n\t @param {Object} tokenizer The tokenizer to use.\n\t * @param {string} text The text to tokenize.\n\t * @returns {void}\n\t /\n\ttokenize( tokenizer, text ) {\n\t\ttokenizer.onText( text );\n\n\t\ttry {\n\t\t\ttokenizer.end();\n\t\t} catch ( e ) {\n\t\t\tconsole.error( \"Tokenizer end error:\", e, e.tokenizer2 );\n\t\t}\n\t}\n\n\t/\n\t Checks if a string ends with an abbreviation.\n\t * @param {string} currentSentence A (part of) a sentence.\n\t * @returns {boolean} True if the string ends with an abbreviation that is in abbreviations.js. Otherwise, False.\n\t /\n\tendsWithAbbreviation( currentSentence ) {\n\t\tconst matchedAbbreviations = currentSentence.match( abbreviationsRegex );\n\n\t\tif ( ! matchedAbbreviations ) {\n\t\t\treturn false;\n\t\t}\n\n\t\tconst lastAbbreviation = matchedAbbreviations.pop();\n\t\treturn currentSentence.endsWith( lastAbbreviation );\n\t}\n\n\t/\n\t Checks whether the given tokens are a valid html tag pair.\n\t * Note that this method is not a full html tag validator. It should be replaced with a better solution once the html parser is implemented.\n\t \n\t @param {object} firstToken The first token to check. It is asserted that this token contains/is an opening html tag.\n\t * @param {object} lastToken The last token to check. It is asserted that this token contains/is a closing html tag.\n\t \n\t @returns {boolean} True if the tokens are a valid html tag pair. Otherwise, False.\n\t /\n\tisValidTagPair( firstToken, lastToken ) {\n\t\tconst firstTokenText = firstToken.src;\n\t\tconst lastTokenText = lastToken.src;\n\n\t\t// Get the tag types.\n\t\tconst firstTagType = firstTokenText.match( tagTypeRegex )[ 1 ];\n\t\tconst lastTagType = lastTokenText.match( tagTypeRegex )[ 1 ];\n\n\n\t\t// Check if the tags are the same and if they are a semantic tag (p, div, h1, h2, h3, h4, h5, h6, span).\n\t\treturn firstTagType === lastTagType && semanticTags.includes( firstTagType );\n\t}\n\n\t/\n\t Returns an array of sentences for a given array of tokens, assumes that the text has already been split into blocks.\n\t \n\t @param {Object[]} tokenArray The tokens from the sentence tokenizer.\n\t * @param {boolean} [trimSentences=true] Whether to trim the sentences at the end or not.\n\t \n\t @returns {string[]} A list of sentences.\n\t /\n\tgetSentencesFromTokens( tokenArray, trimSentences = true ) {\n\t\tlet tokenSentences = [], currentSentence = \"\", nextSentenceStart, sliced;\n\n\t\t// Drop the first and last HTML tag if both are present.\n\t\tdo {\n\t\t\tsliced = false;\n\t\t\tconst firstToken = tokenArray[ 0 ];\n\t\t\tconst lastToken = tokenArray[ tokenArray.length - 1 ];\n\n\t\t\tif ( firstToken && lastToken && firstToken.type === \"html-start\" &&\n\t\t\t\tlastToken.type === \"html-end\" && this.isValidTagPair( firstToken, lastToken ) ) {\n\t\t\t\ttokenArray = tokenArray.slice( 1, tokenArray.length - 1 );\n\n\t\t\t\tsliced = true;\n\t\t\t}\n\t\t} while ( sliced && tokenArray.length > 1 );\n\n\t\ttokenArray.forEach( ( token, i ) => {\n\t\t\tlet hasNextSentence, nextCharacters, tokenizeResults;\n\t\t\tconst nextToken = tokenArray[ i + 1 ];\n\t\t\tconst previousToken = tokenArray[ i - 1 ];\n\t\t\tconst secondToNextToken = tokenArray[ i + 2 ];\n\t\t\tnextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );\n\n\t\t\t// For a new sentence we need to check the next two characters.\n\t\t\thasNextSentence = nextCharacters.length >= 2;\n\t\t\tnextSentenceStart = hasNextSentence ? nextCharacters[ 1 ] : \"\";\n\n\t\t\tswitch ( token.type ) {\n\t\t\t\tcase \"html-start\":\n\t\t\t\tcase \"html-end\":\n\t\t\t\t\tif ( this.isBreakTag( token.src ) ) {\n\t\t\t\t\t\ttokenSentences.push( currentSentence );\n\t\t\t\t\t\tcurrentSentence = \"\";\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcurrentSentence += token.src;\n\t\t\t\t\t}\n\t\t\t\t\tbreak;\n\n\t\t\t\tcase \"smaller-than-sign-content\":\n\t\t\t\t\ttokenizeResults = this.tokenizeSmallerThanContent( token, tokenSentences, currentSentence );\n\t\t\t\t\ttokenSentences = tokenizeResults.tokenSentences;\n\t\t\t\t\tcurrentSentence = tokenizeResults.currentSentence;\n\t\t\t\t\tbreak;\n\t\t\t\tcase \"sentence\":\n\t\t\t\t\tcurrentSentence += token.src;\n\t\t\t\t\tbreak;\n\t\t\t\tcase \"sentence-delimiter\":\n\t\t\t\t\tcurrentSentence += token.src;\n\n\t\t\t\t\t/\n\t\t\t\t * Only split text into sentences if:\n\t\t\t\t * the next token is defined, AND\n\t\t\t\t * the next token type is neither \"block-end\" nor \"sentence-delimiter\", AND\n\t\t\t\t * the next token first character is a white space\n\t\t\t\t /\n\t\t\t\t\tif ( ! isUndefined( nextToken ) &&\n\t\t\t\t\t\t\"block-end\" !== nextToken.type &&\n\t\t\t\t\t\t\"sentence-delimiter\" !== nextToken.type &&\n\t\t\t\t\t\tthis.isCharacterASpace( nextToken.src[ 0 ] ) ) {\n\t\t\t\t\t\t// Don't split on quotation marks unless they're preceded by a full stop.\n\t\t\t\t\t\tif ( this.isQuotation( token.src ) && previousToken && previousToken.src !== \".\" ) {\n\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\t}\n\t\t\t\t\t\t/\n\t\t\t\t * Only split on ellipsis or quotation marks when:\n\t\t\t\t\t * a) There is a next sentence, and the next character is a valid sentence beginning preceded by a white space, OR\n\t\t\t\t\t * b) The next token is a sentence start\n\t\t\t\t\t /\n\t\t\t\t\t\tif ( this.isQuotation( token.src ) \|\| token.src === \"…\" ) {\n\t\t\t\t\t\t\tcurrentSentence = this.getValidSentence( hasNextSentence,\n\t\t\t\t\t\t\t\tnextSentenceStart,\n\t\t\t\t\t\t\t\tnextCharacters,\n\t\t\t\t\t\t\t\tnextToken,\n\t\t\t\t\t\t\t\ttokenSentences,\n\t\t\t\t\t\t\t\tcurrentSentence );\n\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\ttokenSentences.push( currentSentence );\n\t\t\t\t\t\t\tcurrentSentence = \"\";\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\tbreak;\n\n\t\t\t\tcase \"full-stop\":\n\t\t\t\t\tcurrentSentence += token.src;\n\t\t\t\t\tnextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );\n\n\t\t\t\t\t// For a new sentence we need to check the next two characters.\n\t\t\t\t\thasNextSentence = nextCharacters.length >= 2;\n\t\t\t\t\tnextSentenceStart = hasNextSentence ? nextCharacters[ 1 ] : \"\";\n\n\t\t\t\t\t// If the current sentence ends with an abbreviation, the full stop does not split the sentence.\n\t\t\t\t\tif ( this.endsWithAbbreviation( currentSentence ) ) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t// It should not split the text if the first character of the potential next sentence is a number.\n\t\t\t\t\tif ( hasNextSentence && this.isNumber( nextCharacters[ 0 ] ) ) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t// If the full stop is part of a person's initials, don't split sentence.\n\t\t\t\t\tif ( this.isPartOfPersonInitial( token, previousToken, nextToken, secondToNextToken ) ) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t// If the full stop is an ordinal dot (in German), then don't break the sentence.\n\t\t\t\t\t// This check should be done after hasNextSentence && this.isNumber( nextCharacters[ 0 ] ) (above).\n\t\t\t\t\t// Because otherwise it could break before that test.\n\t\t\t\t\tif ( this.endsWithOrdinalDot( currentSentence ) ) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t/\n\t\t\t\t\t * Only split on full stop when:\n\t\t\t\t\t * a) There is a next sentence, and the next character is a valid sentence beginning preceded by a white space, OR\n\t\t\t\t\t * b) The next token is a sentence start\n\t\t\t\t\t /\n\t\t\t\t\tcurrentSentence = this.getValidSentence( hasNextSentence,\n\t\t\t\t\t\tnextSentenceStart,\n\t\t\t\t\t\tnextCharacters,\n\t\t\t\t\t\tnextToken,\n\t\t\t\t\t\ttokenSentences,\n\t\t\t\t\t\tcurrentSentence );\n\n\t\t\t\t\tbreak;\n\n\t\t\t\tcase \"block-start\":\n\t\t\t\t\tcurrentSentence += token.src;\n\t\t\t\t\tbreak;\n\n\t\t\t\tcase \"block-end\":\n\t\t\t\t\tcurrentSentence += token.src;\n\n\t\t\t\t\tnextCharacters = this.getNextTwoCharacters( [ nextToken, secondToNextToken ] );\n\n\t\t\t\t\t// For a new sentence we need to check the next two characters.\n\t\t\t\t\thasNextSentence = nextCharacters.length >= 2;\n\t\t\t\t\tnextSentenceStart = hasNextSentence ? nextCharacters[ 0 ] : \"\";\n\n\t\t\t\t\t/ Don't split if:\n\t\t\t\t\t * - The next character is a number. For example: IPv4-numbers.\n\t\t\t\t\t * - The block end is preceded by a valid sentence ending, but not followed by a valid sentence beginning.\n\t\t\t\t\t /\n\t\t\t\t\tif (\n\t\t\t\t\t\thasNextSentence && this.isNumber( nextCharacters[ 0 ] ) \|\|\n\t\t\t\t\t\t( this.isSentenceEnding( previousToken ) &&\n\t\t\t\t\t\t\t( ! ( this.isValidSentenceBeginning( nextSentenceStart ) \|\| this.isSentenceStart( nextToken ) ) ) )\n\t\t\t\t\t) {\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\n\t\t\t\t\t/\n\t\t\t\t\t * Split if:\n\t\t\t\t\t * - The block end is preceded by a sentence ending and followed by a valid sentence beginning.\n\t\t\t\t\t /\n\t\t\t\t\tif (\n\t\t\t\t\t\tthis.isSentenceEnding( previousToken ) &&\n\t\t\t\t\t\t( this.isSentenceStart( nextToken ) \|\| this.isValidSentenceBeginning( nextSentenceStart ) )\n\t\t\t\t\t) {\n\t\t\t\t\t\ttokenSentences.push( currentSentence );\n\t\t\t\t\t\tcurrentSentence = \"\";\n\t\t\t\t\t}\n\t\t\t\t\tbreak;\n\t\t\t}\n\t\t} );\n\n\t\tif ( \"\" !== currentSentence ) {\n\t\t\ttokenSentences.push( currentSentence );\n\t\t}\n\n\t\tif ( trimSentences ) {\n\t\t\ttokenSentences = map( tokenSentences, function( sentence ) {\n\t\t\t\treturn sentence.trim();\n\t\t\t} );\n\t\t}\n\n\t\treturn tokenSentences;\n\t}\n\n\t/\n\t Gets the current sentence when:\n\t * a) There is a next sentence, and the next character is a valid sentence beginning preceded by a white space, OR\n\t * b) The next token is a sentence start\n\t \n\t @param {boolean} hasNextSentence Whether the next characters are more than two.\n\t * @param {string} nextSentenceStart The second character of the next characters.\n\t * @param {string} nextCharacters The string values of the next two tokens.\n\t * @param {object} nextToken The next token object.\n\t * @param {array} tokenSentences The array of pushed valid sentences.\n\t * @param {string} currentSentence The current sentence.\n\t \n\t @returns {string} The current sentence.\n\t /\n\tgetValidSentence( hasNextSentence, nextSentenceStart, nextCharacters, nextToken, tokenSentences, currentSentence ) {\n\t\tif ( ( hasNextSentence && this.isValidSentenceBeginning( nextSentenceStart ) && this.isCharacterASpace( nextCharacters[ 0 ] ) ) \|\|\n\t\t\tthis.isSentenceStart( nextToken ) ) {\n\t\t\ttokenSentences.push( currentSentence );\n\t\t\tcurrentSentence = \"\";\n\t\t}\n\t\treturn currentSentence;\n\t}\n\n\t/\n\t Checks if the character is a whitespace.\n\t \n\t @param {string} character The character to check.\n\t * @returns {boolean} Whether the character is a whitespace.\n\t */\n\tisCharacterASpace( character ) {\n\t\treturn /\\s/.test( character );\n\t}\n}\n"],"mappings":"AAAA,SAASA,KAAK,EAAEC,WAAW,EAAEC,GAAG,QAAQ,QAAQ;AAEhD,OAAOC,IAAI,MAAM,iBAAiB;AAElC,SAASC,SAAS,IAAIC,eAAe;AAErC,OAAOC,aAAa;AAEpB,OAAOC,oBAAoB;AAC3B,OAAOC,cAAc;;AAErB;AACA,MAAMC,QAAQ,GAAG,GAAG;AAEpB,MAAMC,aAAa,GAAG,IAAIC,MAAM,CAAE,IAAI,GAAGF,QAAQ,GAAG,IAAK,CAAC;AAC1D,MAAMG,uBAAuB,GAAG,WAAW;AAC3C,MAAMC,cAAc,GAAG,uBAAuB;AAC9C,MAAMC,YAAY,GAAG,wBAAwB;AAE7C,MAAMC,eAAe,GAAG,eAAe;AACvC,MAAMC,aAAa,GAAG,gBAAgB;AAEtC,MAAMC,6BAA6B,GAAGX,aAAa,CAACJ,GAAG,CAAIgB,YAAY,IAAMA,YAAY,CAACC,OAAO,CAAE,GAAG,EAAE,KAAM,CAAE,CAAC;AACjH,MAAMC,kBAAkB,GAAGb,oBAAoB,CAAEU,6BAA8B,CAAC;AAEhF,MAAMI,sBAAsB,GAAG,QAAQ,GAAGb,cAAc,CAAC,CAAC,CAACN,GAAG,CAAIoB,QAAQ,IAAM,IAAI,GAAGA,QAAS,CAAC,CAACC,IAAI,CAAE,EAAG,CAAC,GAAG,IAAI;AACnH,MAAMC,gCAAgC,GAAG,IAAIb,MAAM,CAAEU,sBAAsB,GAAG,WAAY,CAAC;;AAE3F;AACA;AACA,MAAMI,YAAY,GAAG,qBAAqB;AAC1C;AACA,MAAMC,YAAY,GAAG,CAAE,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAE;;AAE7F;AACA;AACA;AACA,eAAe,MAAMC,iBAAiB,CAAC;EACtC;AACD;AACA;AACA;EACCC,WAAWA,CAAA,EAAG;IACb;AACF;AACA;AACA;AACA;IACE,IAAI,CAACC,kBAAkB,GAAG,kCAAkC;EAC7D;;EAEA;AACD;AACA;AACA;AACA;EACCC,qBAAqBA,CAAA,EAAG;IACvB,OAAO,IAAI,CAACD,kBAAkB;EAC/B;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCE,QAAQA,CAAEC,SAAS,EAAG;IACrB,OAAO,CAAEhC,KAAK,CAAEiC,QAAQ,CAAED,SAAS,EAAE,EAAG,CAAE,CAAC;EAC5C;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCE,UAAUA,CAAEC,OAAO,EAAG;IACrB,OAAO,QAAQ,CAACC,IAAI,CAAED,OAAQ,CAAC;EAChC;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCE,WAAWA,CAAEL,SAAS,EAAG;IACxBA,SAAS,GAAG3B,eAAe,CAAE2B,SAAU,CAAC;IAExC,OAAO,GAAG,KAAKA,SAAS,IACvB,IAAI,KAAKA,SAAS;EACpB;;EAEA;AACD;AACA;AACA;AACA;EACCM,kBAAkBA,CAAA,EAAG;IACpB,OAAO,KAAK;EACb;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;EACCC,aAAaA,CAAEP,SAAS,EAAG;IAC1B,OAAO,GAAG,KAAKA,SAAS,IACvB,GAAG,KAAKA,SAAS;EACnB;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCQ,yBAAyBA,CAAEC,IAAI,EAAG;IACjC,OAAOA,IAAI,CAACtB,OAAO,CAAE,KAAK,EAAE,GAAI,CAAC;EAClC;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCuB,eAAeA,CAAEV,SAAS,EAAG;IAC5B,OAAOA,SAAS,KAAKA,SAAS,CAACW,iBAAiB,CAAC,CAAC;EACnD;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCC,iBAAiBA,CAAEZ,SAAS,EAAG;IAC9B,OAAOA,SAAS,KAAK,GAAG;EACzB;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCa,oBAAoBA,CAAEC,UAAU,EAAG;IAClC,IAAIC,IAAI,GAAG,EAAE;IAEb,IAAK,CAAE9C,WAAW,CAAE6C,UAAU,CAAE,CAAC,CAAG,CAAC,EAAG;MACvCC,IAAI,IAAID,UAAU,CAAE,CAAC,CAAE,CAACE,GAAG;IAC5B;IAEA,IAAK,CAAE/C,WAAW,CAAE6C,UAAU,CAAE,CAAC,CAAG,CAAC,EAAG;MACvCC,IAAI,IAAID,UAAU,CAAE,CAAC,CAAE,CAACE,GAAG;IAC5B;IAEAD,IAAI,GAAG,IAAI,CAACP,yBAAyB,CAAEO,IAAK,CAAC;IAE7C,OAAOA,IAAI;EACZ;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCE,4BAA4BA,CAAEC,MAAM,EAAG;IACtC,MAAMC,eAAe,GAAG;IACvB;IACA,qBAAqB;IACrB;IACA,qBAAqB;IACrB;IACA,gCAAgC,CAChC;IAED,OACCA,eAAe,CAACC,IAAI,CAAEC,cAAc,IAAIA,cAAc,CAACjB,IAAI,CAAEc,MAAO,CAAE,CAAC;EAEzE;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCI,wBAAwBA,CAAEC,iBAAiB,EAAG;IAC7C,OAAS,IAAI,CAACb,eAAe,CAAEa,iBAAkB,CAAC,IAChD,IAAI,CAACN,4BAA4B,CAAEM,iBAAkB,CAAC,IACtD,IAAI,CAACxB,QAAQ,CAAEwB,iBAAkB,CAAC,IAClC,IAAI,CAAClB,WAAW,CAAEkB,iBAAkB,CAAC,IACrC,IAAI,CAAChB,aAAa,CAAEgB,iBAAkB,CAAC,IACvC,IAAI,CAACX,iBAAiB,CAAEW,iBAAkB,CAAC;EAC9C;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCC,eAAeA,CAAEC,KAAK,EAAG;IACxB,OAAS,CAAExD,WAAW,CAAEwD,KAAM,CAAC,KAC9B,YAAY,KAAKA,KAAK,CAACC,IAAI,IAC3B,UAAU,KAAKD,KAAK,CAACC,IAAI,IACzB,aAAa,KAAKD,KAAK,CAACC,IAAI,CAC5B;EACF;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;EACCC,gBAAgBA,CAAEF,KAAK,EAAG;IACzB,OACC,CAAExD,WAAW,CAAEwD,KAAM,CAAC,KACpBA,KAAK,CAACC,IAAI,KAAK,WAAW,IAAID,KAAK,CAACC,IAAI,KAAK,oBAAoB,CAAE;EAEvE;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCE,qBAAqBA,CAAEH,KAAK,EAAEI,aAAa,EAAEC,SAAS,EAAEC,iBAAiB,EAAG;IAC3E,OAAS,CAAE9D,WAAW,CAAEwD,KAAM,CAAC,IAC9B,CAAExD,WAAW,CAAE6D,SAAU,CAAC,IAC1B,CAAE7D,WAAW,CAAE8D,iBAAkB,CAAC,IAClC,CAAE9D,WAAW,CAAE4D,aAAc,CAAC,IAC9BJ,KAAK,CAACC,IAAI,KAAK,WAAW,IAC1BG,aAAa,CAACH,IAAI,KAAK,UAAU,IACjClC,gCAAgC,CAACY,IAAI,CAAEyB,aAAa,CAACb,GAAI,CAAC,IAC1Dc,SAAS,CAACJ,IAAI,KAAK,UAAU,IAC7BI,SAAS,CAACd,GAAG,CAACgB,IAAI,CAAC,CAAC,CAACC,MAAM,KAAK,CAAC,IACjCF,iBAAiB,CAACL,IAAI,KAAK,WAAW;EAExC;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCQ,0BAA0BA,CAAET,KAAK,EAAEU,cAAc,EAAEC,eAAe,EAAG;IACpE;AACF;AACA;AACA;AACA;IACE,MAAMC,SAAS,GAAGZ,KAAK,CAACT,GAAG,CAACsB,SAAS,CAAE,CAAE,CAAC;;IAE1C;IACA,MAAMC,eAAe,GAAG,IAAI,CAACC,eAAe,CAAC,CAAC;IAC9C,IAAI,CAACC,QAAQ,CAAEF,eAAe,CAACG,SAAS,EAAEL,SAAU,CAAC;IACrD,MAAMM,cAAc,GAAG,IAAI,CAACC,sBAAsB,CAAEL,eAAe,CAACM,MAAM,EAAE,KAAM,CAAC;IAEnFF,cAAc,CAAE,CAAC,CAAE,GAAG1E,WAAW,CAAE0E,cAAc,CAAE,CAAC,CAAG,CAAC,GAAG,GAAG,GAAG,GAAG,GAAGA,cAAc,CAAE,CAAC,CAAE;;IAE1F;AACF;AACA;AACA;AACA;IACE,IAAK,IAAI,CAACrB,wBAAwB,CAAEqB,cAAc,CAAE,CAAC,CAAG,CAAC,EAAG;MAC3DR,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;MACtCA,eAAe,GAAG,EAAE;IACrB;IACAA,eAAe,IAAIO,cAAc,CAAE,CAAC,CAAE;IAEtC,IAAKA,cAAc,CAACV,MAAM,GAAG,CAAC,EAAG;MAChC;AACH;AACA;AACA;MACGE,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;MACtCA,eAAe,GAAG,EAAE;;MAEpB;MACAO,cAAc,CAACI,KAAK,CAAC,CAAC;MACtB;MACA,MAAMC,YAAY,GAAGL,cAAc,CAACM,GAAG,CAAC,CAAC;;MAEzC;MACAN,cAAc,CAACO,OAAO,CAAEC,QAAQ,IAAI;QACnChB,cAAc,CAACW,IAAI,CAAEK,QAAS,CAAC;MAChC,CAAE,CAAC;MAEH,MAAMC,gBAAgB,GAAG,IAAIzE,MAAM,CAAE,GAAG,GAAGF,QAAQ,GAAG,IAAI,CAACqB,qBAAqB,CAAC,CAAC,GAAG,IAAK,CAAC;;MAE3F;MACA,IAAKkD,YAAY,CAACK,KAAK,CAAED,gBAAiB,CAAC,EAAG;QAC7C;QACAjB,cAAc,CAACW,IAAI,CAAEE,YAAa,CAAC;MACpC,CAAC,MAAM;QACN;QACAZ,eAAe,GAAGY,YAAY;MAC/B;IACD;IACA,OAAO;MACNb,cAAc;MACdC;IACD,CAAC;EACF;;EAEA;AACD;AACA;AACA;AACA;EACCI,eAAeA,CAAA,EAAG;IACjB,MAAMc,sBAAsB,GAAG,IAAI3E,MAAM,CAAE,IAAI,GAAG,IAAI,CAACmB,qBAAqB,CAAC,CAAC,GAAG,IAAK,CAAC;IACvF,MAAMyD,aAAa,GAAG,IAAI5E,MAAM,CAAE,KAAK,GAAGF,QAAQ,GAAG,IAAI,CAACqB,qBAAqB,CAAC,CAAC,GAAG,kBAAmB,CAAC;IAExG,MAAM+C,MAAM,GAAG,EAAE;IACjB,MAAMH,SAAS,GAAGvE,IAAI,CAAE,UAAUsD,KAAK,EAAG;MACzCoB,MAAM,CAACC,IAAI,CAAErB,KAAM,CAAC;IACrB,CAAE,CAAC;IAEHiB,SAAS,CAACc,OAAO,CAAE9E,aAAa,EAAE,WAAY,CAAC;IAC/CgE,SAAS,CAACc,OAAO,CAAE5E,uBAAuB,EAAE,2BAA4B,CAAC;IACzE8D,SAAS,CAACc,OAAO,CAAE3E,cAAc,EAAE,YAAa,CAAC;IACjD6D,SAAS,CAACc,OAAO,CAAE1E,YAAY,EAAE,UAAW,CAAC;IAC7C4D,SAAS,CAACc,OAAO,CAAEzE,eAAe,EAAE,aAAc,CAAC;IACnD2D,SAAS,CAACc,OAAO,CAAExE,aAAa,EAAE,WAAY,CAAC;IAC/C0D,SAAS,CAACc,OAAO,CAAEF,sBAAsB,EAAE,oBAAqB,CAAC;IACjEZ,SAAS,CAACc,OAAO,CAAED,aAAa,EAAE,UAAW,CAAC;IAE9C,OAAO;MACNb,SAAS;MACTG;IACD,CAAC;EACF;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;EACCJ,QAAQA,CAAEC,SAAS,EAAEjC,IAAI,EAAG;IAC3BiC,SAAS,CAACe,MAAM,CAAEhD,IAAK,CAAC;IAExB,IAAI;MACHiC,SAAS,CAACgB,GAAG,CAAC,CAAC;IAChB,CAAC,CAAC,OAAQC,CAAC,EAAG;MACbC,OAAO,CAACC,KAAK,CAAE,sBAAsB,EAAEF,CAAC,EAAEA,CAAC,CAACG,UAAW,CAAC;IACzD;EACD;;EAEA;AACD;AACA;AACA;AACA;EACCC,oBAAoBA,CAAE3B,eAAe,EAAG;IACvC,MAAM4B,oBAAoB,GAAG5B,eAAe,CAACiB,KAAK,CAAEjE,kBAAmB,CAAC;IAExE,IAAK,CAAE4E,oBAAoB,EAAG;MAC7B,OAAO,KAAK;IACb;IAEA,MAAMC,gBAAgB,GAAGD,oBAAoB,CAACf,GAAG,CAAC,CAAC;IACnD,OAAOb,eAAe,CAAC8B,QAAQ,CAAED,gBAAiB,CAAC;EACpD;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCE,cAAcA,CAAEC,UAAU,EAAEC,SAAS,EAAG;IACvC,MAAMC,cAAc,GAAGF,UAAU,CAACpD,GAAG;IACrC,MAAMuD,aAAa,GAAGF,SAAS,CAACrD,GAAG;;IAEnC;IACA,MAAMwD,YAAY,GAAGF,cAAc,CAACjB,KAAK,CAAE5D,YAAa,CAAC,CAAE,CAAC,CAAE;IAC9D,MAAMgF,WAAW,GAAIF,aAAa,CAAClB,KAAK,CAAE5D,YAAa,CAAC,CAAE,CAAC,CAAE;;IAG7D;IACA,OAAO+E,YAAY,KAAKC,WAAW,IAAI/E,YAAY,CAACgF,QAAQ,CAAEF,YAAa,CAAC;EAC7E;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;EACC5B,sBAAsBA,CAAE+B,UAAU,EAAEC,aAAa,GAAG,IAAI,EAAG;IAC1D,IAAIzC,cAAc,GAAG,EAAE;MAAEC,eAAe,GAAG,EAAE;MAAEyC,iBAAiB;MAAEC,MAAM;;IAExE;IACA,GAAG;MACFA,MAAM,GAAG,KAAK;MACd,MAAMV,UAAU,GAAGO,UAAU,CAAE,CAAC,CAAE;MAClC,MAAMN,SAAS,GAAGM,UAAU,CAAEA,UAAU,CAAC1C,MAAM,GAAG,CAAC,CAAE;MAErD,IAAKmC,UAAU,IAAIC,SAAS,IAAID,UAAU,CAAC1C,IAAI,KAAK,YAAY,IAC/D2C,SAAS,CAAC3C,IAAI,KAAK,UAAU,IAAI,IAAI,CAACyC,cAAc,CAAEC,UAAU,EAAEC,SAAU,CAAC,EAAG;QAChFM,UAAU,GAAGA,UAAU,CAACI,KAAK,CAAE,CAAC,EAAEJ,UAAU,CAAC1C,MAAM,GAAG,CAAE,CAAC;QAEzD6C,MAAM,GAAG,IAAI;MACd;IACD,CAAC,QAASA,MAAM,IAAIH,UAAU,CAAC1C,MAAM,GAAG,CAAC;IAEzC0C,UAAU,CAACzB,OAAO,CAAE,CAAEzB,KAAK,EAAEuD,CAAC,KAAM;MACnC,IAAIC,eAAe,EAAEC,cAAc,EAAEC,eAAe;MACpD,MAAMrD,SAAS,GAAG6C,UAAU,CAAEK,CAAC,GAAG,CAAC,CAAE;MACrC,MAAMnD,aAAa,GAAG8C,UAAU,CAAEK,CAAC,GAAG,CAAC,CAAE;MACzC,MAAMjD,iBAAiB,GAAG4C,UAAU,CAAEK,CAAC,GAAG,CAAC,CAAE;MAC7CE,cAAc,GAAG,IAAI,CAACrE,oBAAoB,CAAE,CAAEiB,SAAS,EAAEC,iBAAiB,CAAG,CAAC;;MAE9E;MACAkD,eAAe,GAAGC,cAAc,CAACjD,MAAM,IAAI,CAAC;MAC5C4C,iBAAiB,GAAGI,eAAe,GAAGC,cAAc,CAAE,CAAC,CAAE,GAAG,EAAE;MAE9D,QAASzD,KAAK,CAACC,IAAI;QAClB,KAAK,YAAY;QACjB,KAAK,UAAU;UACd,IAAK,IAAI,CAACxB,UAAU,CAAEuB,KAAK,CAACT,GAAI,CAAC,EAAG;YACnCmB,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;YACtCA,eAAe,GAAG,EAAE;UACrB,CAAC,MAAM;YACNA,eAAe,IAAIX,KAAK,CAACT,GAAG;UAC7B;UACA;QAED,KAAK,2BAA2B;UAC/BmE,eAAe,GAAG,IAAI,CAACjD,0BAA0B,CAAET,KAAK,EAAEU,cAAc,EAAEC,eAAgB,CAAC;UAC3FD,cAAc,GAAGgD,eAAe,CAAChD,cAAc;UAC/CC,eAAe,GAAG+C,eAAe,CAAC/C,eAAe;UACjD;QACD,KAAK,UAAU;UACdA,eAAe,IAAIX,KAAK,CAACT,GAAG;UAC5B;QACD,KAAK,oBAAoB;UACxBoB,eAAe,IAAIX,KAAK,CAACT,GAAG;;UAE5B;AACL;AACA;AACA;AACA;AACA;UACK,IAAK,CAAE/C,WAAW,CAAE6D,SAAU,CAAC,IAC9B,WAAW,KAAKA,SAAS,CAACJ,IAAI,IAC9B,oBAAoB,KAAKI,SAAS,CAACJ,IAAI,IACvC,IAAI,CAAC0D,iBAAiB,CAAEtD,SAAS,CAACd,GAAG,CAAE,CAAC,CAAG,CAAC,EAAG;YAC/C;YACA,IAAK,IAAI,CAACX,WAAW,CAAEoB,KAAK,CAACT,GAAI,CAAC,IAAIa,aAAa,IAAIA,aAAa,CAACb,GAAG,KAAK,GAAG,EAAG;cAClF;YACD;YACA;AACN;AACA;AACA;AACA;YACM,IAAK,IAAI,CAACX,WAAW,CAAEoB,KAAK,CAACT,GAAI,CAAC,IAAIS,KAAK,CAACT,GAAG,KAAK,GAAG,EAAG;cACzDoB,eAAe,GAAG,IAAI,CAACiD,gBAAgB,CAAEJ,eAAe,EACvDJ,iBAAiB,EACjBK,cAAc,EACdpD,SAAS,EACTK,cAAc,EACdC,eAAgB,CAAC;YACnB,CAAC,MAAM;cACND,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;cACtCA,eAAe,GAAG,EAAE;YACrB;UACD;UACA;QAED,KAAK,WAAW;UACfA,eAAe,IAAIX,KAAK,CAACT,GAAG;UAC5BkE,cAAc,GAAG,IAAI,CAACrE,oBAAoB,CAAE,CAAEiB,SAAS,EAAEC,iBAAiB,CAAG,CAAC;;UAE9E;UACAkD,eAAe,GAAGC,cAAc,CAACjD,MAAM,IAAI,CAAC;UAC5C4C,iBAAiB,GAAGI,eAAe,GAAGC,cAAc,CAAE,CAAC,CAAE,GAAG,EAAE;;UAE9D;UACA,IAAK,IAAI,CAACnB,oBAAoB,CAAE3B,eAAgB,CAAC,EAAG;YACnD;UACD;;UAEA;UACA,IAAK6C,eAAe,IAAI,IAAI,CAAClF,QAAQ,CAAEmF,cAAc,CAAE,CAAC,CAAG,CAAC,EAAG;YAC9D;UACD;;UAEA;UACA,IAAK,IAAI,CAACtD,qBAAqB,CAAEH,KAAK,EAAEI,aAAa,EAAEC,SAAS,EAAEC,iBAAkB,CAAC,EAAG;YACvF;UACD;;UAEA;UACA;UACA;UACA,IAAK,IAAI,CAACzB,kBAAkB,CAAE8B,eAAgB,CAAC,EAAG;YACjD;UACD;;UAEA;AACL;AACA;AACA;AACA;UACKA,eAAe,GAAG,IAAI,CAACiD,gBAAgB,CAAEJ,eAAe,EACvDJ,iBAAiB,EACjBK,cAAc,EACdpD,SAAS,EACTK,cAAc,EACdC,eAAgB,CAAC;UAElB;QAED,KAAK,aAAa;UACjBA,eAAe,IAAIX,KAAK,CAACT,GAAG;UAC5B;QAED,KAAK,WAAW;UACfoB,eAAe,IAAIX,KAAK,CAACT,GAAG;UAE5BkE,cAAc,GAAG,IAAI,CAACrE,oBAAoB,CAAE,CAAEiB,SAAS,EAAEC,iBAAiB,CAAG,CAAC;;UAE9E;UACAkD,eAAe,GAAGC,cAAc,CAACjD,MAAM,IAAI,CAAC;UAC5C4C,iBAAiB,GAAGI,eAAe,GAAGC,cAAc,CAAE,CAAC,CAAE,GAAG,EAAE;;UAE9D;AACL;AACA;AACA;UACK,IACCD,eAAe,IAAI,IAAI,CAAClF,QAAQ,CAAEmF,cAAc,CAAE,CAAC,CAAG,CAAC,IACrD,IAAI,CAACvD,gBAAgB,CAAEE,aAAc,CAAC,IACrC,EAAI,IAAI,CAACP,wBAAwB,CAAEuD,iBAAkB,CAAC,IAAI,IAAI,CAACrD,eAAe,CAAEM,SAAU,CAAC,CAAM,EACnG;YACD;UACD;;UAEA;AACL;AACA;AACA;UACK,IACC,IAAI,CAACH,gBAAgB,CAAEE,aAAc,CAAC,KACpC,IAAI,CAACL,eAAe,CAAEM,SAAU,CAAC,IAAI,IAAI,CAACR,wBAAwB,CAAEuD,iBAAkB,CAAC,CAAE,EAC1F;YACD1C,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;YACtCA,eAAe,GAAG,EAAE;UACrB;UACA;MACF;IACD,CAAE,CAAC;IAEH,IAAK,EAAE,KAAKA,eAAe,EAAG;MAC7BD,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;IACvC;IAEA,IAAKwC,aAAa,EAAG;MACpBzC,cAAc,GAAGjE,GAAG,CAAEiE,cAAc,EAAE,UAAUgB,QAAQ,EAAG;QAC1D,OAAOA,QAAQ,CAACnB,IAAI,CAAC,CAAC;MACvB,CAAE,CAAC;IACJ;IAEA,OAAOG,cAAc;EACtB;;EAEA;AACD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACCkD,gBAAgBA,CAAEJ,eAAe,EAAEJ,iBAAiB,EAAEK,cAAc,EAAEpD,SAAS,EAAEK,cAAc,EAAEC,eAAe,EAAG;IAClH,IAAO6C,eAAe,IAAI,IAAI,CAAC3D,wBAAwB,CAAEuD,iBAAkB,CAAC,IAAI,IAAI,CAACO,iBAAiB,CAAEF,cAAc,CAAE,CAAC,CAAG,CAAC,IAC5H,IAAI,CAAC1D,eAAe,CAAEM,SAAU,CAAC,EAAG;MACpCK,cAAc,CAACW,IAAI,CAAEV,eAAgB,CAAC;MACtCA,eAAe,GAAG,EAAE;IACrB;IACA,OAAOA,eAAe;EACvB;;EAEA;AACD;AACA;AACA;AACA;AACA;EACCgD,iBAAiBA,CAAEpF,SAAS,EAAG;IAC9B,OAAO,IAAI,CAACI,IAAI,CAAEJ,SAAU,CAAC;EAC9B;AACD","ignoreList":[]}

package/build/languageProcessing/helpers/sentence/countSentences.js CHANGED Viewed

@@ -1,13 +1,7 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", {
-  value: true
-});
-exports.default = _default;
-var _getSentences = _interopRequireDefault(require("./getSentences.js"));
-function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
 /** @module stringProcessing/countSentences */
+import getSentences from "./getSentences.js";
 /**
  * Counts the number of sentences in a given string.
  *
@@ -16,8 +10,8 @@ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e
  *
  * @returns {number} The number of sentences in the text.
  */
-function _default(text, memoizedTokenizer) {
-  const sentences = (0, _getSentences.default)(text, memoizedTokenizer);
+export default function (text, memoizedTokenizer) {
+  const sentences = getSentences(text, memoizedTokenizer);
   let sentenceCount = 0;
   for (let i = 0; i < sentences.length; i++) {
     sentenceCount++;

package/build/languageProcessing/helpers/sentence/countSentences.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"countSentences.js","names":["~~_getSentences~~","~~_interopRequireDefault","require","e","__esModule","default","_default","~~text","memoizedTokenizer","sentences","~~getSentences","~~sentenceCount","i","length"],"sources":["../../../../src/languageProcessing/helpers/sentence/countSentences.js"],"sourcesContent":["/** @module stringProcessing/countSentences /\n\nimport getSentences from \"./getSentences.js\";\n\n/\n Counts the number of sentences in a given string.\n \n @param {string} text The text used to count sentences.\n * @param {function} memoizedTokenizer The memoized sentence tokenizer.\n \n @returns {number} The number of sentences in the text.\n */\nexport default function( text, memoizedTokenizer ) {\n\tconst sentences = getSentences( text, memoizedTokenizer );\n\tlet sentenceCount = 0;\n\tfor ( let i = 0; i < sentences.length; i++ ) {\n\t\tsentenceCount++;\n\t}\n\treturn sentenceCount;\n}\n"],"mappings":"~~;;;;;;~~AAEA,~~IAAAA~~,~~aAAA,GAAAC,sBAAA,CAAAC,OAAA;AAA6C,SAAAD,uBAAAE,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AAF7C~~;;~~AAIA~~;AACA;AACA;AACA;AACA;AACA;AACA;AACA;~~AACe~~,~~SAAAG~~,~~SAAUC~~,IAAI,EAAEC,iBAAiB,EAAG;EAClD,MAAMC,SAAS,~~GAAG~~,~~IAAAC~~,~~qBAAY~~,~~EAAEH,~~IAAI,EAAEC,iBAAkB,CAAC;EACzD,~~IAAIG~~,aAAa,GAAG,CAAC;EACrB,KAAM,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,~~GAAGH~~,SAAS,~~CAACI~~,MAAM,EAAED,CAAC,EAAE,EAAG;IAC5CD,aAAa,EAAE;EAChB;EACA,OAAOA,aAAa;AACrB","ignoreList":[]}
1	+ {"version":3,"file":"countSentences.js","names":["getSentences","text","memoizedTokenizer","sentences","sentenceCount","i","length"],"sources":["../../../../src/languageProcessing/helpers/sentence/countSentences.js"],"sourcesContent":["/** @module stringProcessing/countSentences /\n\nimport getSentences from \"./getSentences.js\";\n\n/\n Counts the number of sentences in a given string.\n \n @param {string} text The text used to count sentences.\n * @param {function} memoizedTokenizer The memoized sentence tokenizer.\n \n @returns {number} The number of sentences in the text.\n */\nexport default function( text, memoizedTokenizer ) {\n\tconst sentences = getSentences( text, memoizedTokenizer );\n\tlet sentenceCount = 0;\n\tfor ( let i = 0; i < sentences.length; i++ ) {\n\t\tsentenceCount++;\n\t}\n\treturn sentenceCount;\n}\n"],"mappings":"AAAA;;AAEA,OAAOA,YAAY;;AAEnB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAAUC,IAAI,EAAEC,iBAAiB,EAAG;EAClD,MAAMC,SAAS,GAAGH,YAAY,CAAEC,IAAI,EAAEC,iBAAkB,CAAC;EACzD,IAAIE,aAAa,GAAG,CAAC;EACrB,KAAM,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGF,SAAS,CAACG,MAAM,EAAED,CAAC,EAAE,EAAG;IAC5CD,aAAa,EAAE;EAChB;EACA,OAAOA,aAAa;AACrB","ignoreList":[]}

package/build/languageProcessing/helpers/sentence/getSentences.js CHANGED Viewed

@@ -1,19 +1,12 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", {
-  value: true
-});
-exports.default = _default;
-var _lodash = require("lodash");
-var _html = require("../html/html.js");
-var _imageInText = require("../image/imageInText");
-var _stripHTMLTags = require("../sanitize/stripHTMLTags");
-var _unifyWhitespace = require("../sanitize/unifyWhitespace");
-var _memoizedSentenceTokenizer = _interopRequireDefault(require("./memoizedSentenceTokenizer"));
-function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
 // Lodash imports.
+import { filter, flatMap, isEmpty, negate } from "lodash";
 // Internal dependencies.
+import { getBlocks } from "../html/html.js";
+import { imageRegex } from "../image/imageInText";
+import { stripBlockTagsAtStartEnd } from "../sanitize/stripHTMLTags";
+import { unifyNonBreakingSpace } from "../sanitize/unifyWhitespace";
+import defaultSentenceTokenizer from "./memoizedSentenceTokenizer";
 // Character classes.
 const newLines = "\n\r|\n|\r";
@@ -30,20 +23,20 @@ const paragraphTagsRegex = new RegExp("^(<p>|</p>)$");
  *
  * @returns {Array} Sentences found in the text.
  */
-function _default(text, memoizedTokenizer = _memoizedSentenceTokenizer.default) {
+export default function (text, memoizedTokenizer = defaultSentenceTokenizer) {
   // We don't remove the other HTML tags here since removing them might lead to incorrect results when running the sentence tokenizer.
   // Unify only non-breaking spaces and not the other whitespaces since a whitespace could signify a sentence break or a new line.
-  text = (0, _unifyWhitespace.unifyNonBreakingSpace)(text);
+  text = unifyNonBreakingSpace(text);
   /*
    * Remove images from text before tokenizing it into sentences.
    * This is necessary since the highlighting feature doesn't work if the yoastmark tags are enclosing a sentence starting with an image.
    * This step is done here so that applying highlight in captions is possible for all assessments that use this helper.
    */
-  text = text.replace(_imageInText.imageRegex, "");
-  let blocks = (0, _html.getBlocks)(text);
+  text = text.replace(imageRegex, "");
+  let blocks = getBlocks(text);
   // Split each block on newlines.
-  blocks = (0, _lodash.flatMap)(blocks, function (block) {
+  blocks = flatMap(blocks, function (block) {
     return block.split(newLineRegex);
   });
@@ -65,7 +58,7 @@ function _default(text, memoizedTokenizer = _memoizedSentenceTokenizer.default)
    * After tokenized, sometimes there are still block tags present in the beginning/end of a sentence.
    * Unstripped, these tags could potentially break the highlighting functionality.
    */
-  sentences = sentences.map(sentence => (0, _stripHTMLTags.stripBlockTagsAtStartEnd)(sentence).trim());
-  return (0, _lodash.filter)(sentences, (0, _lodash.negate)(_lodash.isEmpty));
+  sentences = sentences.map(sentence => stripBlockTagsAtStartEnd(sentence).trim());
+  return filter(sentences, negate(isEmpty));
 }
 //# sourceMappingURL=getSentences.js.map

package/build/languageProcessing/helpers/sentence/getSentences.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"getSentences.js","names":["~~_lodash~~","~~require~~","~~_html~~","~~_imageInText~~","~~_stripHTMLTags~~","~~_unifyWhitespace~~","~~_memoizedSentenceTokenizer~~","~~_interopRequireDefault~~","e","~~__esModule","default","~~newLines","newLineRegex","RegExp","paragraphTagsRegex","~~_default","~~text","memoizedTokenizer","~~defaultSentenceTokenizer","unifyNonBreakingSpace","~~replace","~~imageRegex","~~blocks","~~getBlocks","flatMap","~~block","split","~~filter","~~test","sentences","map","flat","sentence","~~stripBlockTagsAtStartEnd","~~trim"~~,"negate","isEmpty"~~],"sources":["../../../../src/languageProcessing/helpers/sentence/getSentences.js"],"sourcesContent":["// Lodash imports.\nimport { filter, flatMap, isEmpty, negate } from \"lodash\";\n\n// Internal dependencies.\nimport { getBlocks } from \"../html/html.js\";\nimport { imageRegex } from \"../image/imageInText\";\nimport { stripBlockTagsAtStartEnd } from \"../sanitize/stripHTMLTags\";\nimport { unifyNonBreakingSpace } from \"../sanitize/unifyWhitespace\";\nimport defaultSentenceTokenizer from \"./memoizedSentenceTokenizer\";\n\n// Character classes.\nconst newLines = \"\\n\\r\|\\n\|\\r\";\n\n// Regular expressions.\nconst newLineRegex = new RegExp( newLines );\nconst paragraphTagsRegex = new RegExp( \"^(<p>\|</p>)$\" );\n\n/*\n Returns sentences in a string.\n \n @param {String} text The string to count sentences in.\n * @param {function} memoizedTokenizer The memoized sentence tokenizer.\n \n @returns {Array} Sentences found in the text.\n /\nexport default function( text, memoizedTokenizer = defaultSentenceTokenizer ) {\n\t// We don't remove the other HTML tags here since removing them might lead to incorrect results when running the sentence tokenizer.\n\t// Unify only non-breaking spaces and not the other whitespaces since a whitespace could signify a sentence break or a new line.\n\ttext = unifyNonBreakingSpace( text );\n\t/\n\t * Remove images from text before tokenizing it into sentences.\n\t * This is necessary since the highlighting feature doesn't work if the yoastmark tags are enclosing a sentence starting with an image.\n\t * This step is done here so that applying highlight in captions is possible for all assessments that use this helper.\n\t /\n\ttext = text.replace( imageRegex, \"\" );\n\n\tlet blocks = getBlocks( text );\n\n\t// Split each block on newlines.\n\tblocks = flatMap( blocks, function( block ) {\n\t\treturn block.split( newLineRegex );\n\t} );\n\n\t/\n\t * Filter blocks that contain only paragraph tags. This step is necessary\n\t * since switching between editors might add extra paragraph tags with a new line tag in the end\n\t * that are incorrectly converted into separate blocks.\n\t /\n\tblocks = blocks.filter( block => ! paragraphTagsRegex.test( block ) );\n\n\t/\n\t * We use the `map` method followed by `flat` instead of `flatMap` because `flatMap` would override the second\n\t * argument of the memoizedTokenizer with the index of the iteratee.\n\t /\n\tlet sentences = blocks.map( block => memoizedTokenizer( block ) ).flat();\n\n\t/\n\t * Strip block tags from the start and/or the end of each sentence and whitespaces if present.\n\t * After tokenized, sometimes there are still block tags present in the beginning/end of a sentence.\n\t * Unstripped, these tags could potentially break the highlighting functionality.\n\t */\n\tsentences = sentences.map( sentence => stripBlockTagsAtStartEnd( sentence ).trim() );\n\n\treturn filter( sentences, negate( isEmpty ) );\n}\n"],"mappings":"~~;;;;;;~~AACA,~~IAAAA~~,~~OAAA~~,~~GAAAC~~,~~OAAA;AAGA~~,~~IAAAC~~,~~KAAA~~,~~GAAAD~~,~~OAAA;AACA~~,~~IAAAE~~,~~YAAA,GAAAF,OAAA~~;AACA,~~IAAAG~~,~~cAAA~~,~~GAAAH~~,~~OAAA~~;~~AACA~~,~~IAAAI~~,~~gBAAA,GAAAJ,OAAA~~;~~AACA~~,~~IAAAK~~,~~0BAAA,GAAAC,sBAAA,CAAAN,OAAA~~;~~AAAmE~~,~~SAAAM~~,~~uBAAAC,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AARnE~~;;~~AAGA;;AAOA~~;AACA,~~MAAMG~~,QAAQ,GAAG,YAAY;;AAE7B;AACA,MAAMC,YAAY,GAAG,IAAIC,MAAM,CAAEF,QAAS,CAAC;AAC3C,MAAMG,kBAAkB,GAAG,IAAID,MAAM,CAAE,cAAe,CAAC;;AAEvD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;~~AACe~~,~~SAAAE~~,~~SAAUC~~,IAAI,EAAEC,iBAAiB,~~GAAGC~~,~~kCAAwB~~,EAAG;EAC7E;EACA;~~EACAF~~,IAAI,~~GAAG~~,~~IAAAG~~,~~sCAAqB~~,~~EAAEH,~~IAAK,CAAC;EACpC;AACD;AACA;AACA;AACA;EACCA,IAAI,GAAGA,IAAI,~~CAACI~~,OAAO,~~CAAEC~~,~~uBAAU~~,EAAE,EAAG,CAAC;EAErC,~~IAAIC~~,MAAM,~~GAAG~~,~~IAAAC~~,~~eAAS~~,~~EAAEP,~~IAAK,CAAC;;EAE9B;~~EACAM~~,MAAM,~~GAAG~~,~~IAAAE~~,~~eAAO~~,~~EAAEF,~~MAAM,EAAE,~~UAAUG~~,KAAK,EAAG;IAC3C,OAAOA,KAAK,CAACC,KAAK,~~CAAEd~~,YAAa,CAAC;EACnC,CAAE,CAAC;;EAEH;AACD;AACA;AACA;AACA;~~EACCU~~,MAAM,GAAGA,MAAM,~~CAACK~~,MAAM,~~CAAEF~~,KAAK,IAAI,~~CAAEX~~,kBAAkB,~~CAACc~~,IAAI,~~CAAEH~~,KAAM,CAAE,CAAC;;EAErE;AACD;AACA;AACA;EACC,~~IAAII~~,SAAS,~~GAAGP~~,MAAM,~~CAACQ~~,GAAG,~~CAAEL~~,KAAK,~~IAAIR~~,iBAAiB,~~CAAEQ~~,KAAM,CAAE,CAAC,~~CAACM~~,IAAI,CAAC,CAAC;;EAExE;AACD;AACA;AACA;AACA;EACCF,SAAS,GAAGA,SAAS,CAACC,GAAG,CAAEE,QAAQ,~~IAAI~~,~~IAAAC~~,~~uCAAwB~~,~~EAAED,~~QAAS,CAAC,~~CAACE~~,IAAI,CAAC,CAAE,CAAC;EAEpF,~~OAAO~~,~~IAAAP~~,~~cAAM~~,~~EAAEE,~~SAAS,~~EAAE~~,~~IAAAM~~,~~cAAM~~,~~EAAEC~~,~~eAAQ,~~CAAE,CAAC;AAC9C","ignoreList":[]}
1	+ {"version":3,"file":"getSentences.js","names":["filter","flatMap","isEmpty","negate","getBlocks","imageRegex","stripBlockTagsAtStartEnd","unifyNonBreakingSpace","defaultSentenceTokenizer","newLines","newLineRegex","RegExp","paragraphTagsRegex","text","memoizedTokenizer","replace","blocks","block","split","test","sentences","map","flat","sentence","trim"],"sources":["../../../../src/languageProcessing/helpers/sentence/getSentences.js"],"sourcesContent":["// Lodash imports.\nimport { filter, flatMap, isEmpty, negate } from \"lodash\";\n\n// Internal dependencies.\nimport { getBlocks } from \"../html/html.js\";\nimport { imageRegex } from \"../image/imageInText\";\nimport { stripBlockTagsAtStartEnd } from \"../sanitize/stripHTMLTags\";\nimport { unifyNonBreakingSpace } from \"../sanitize/unifyWhitespace\";\nimport defaultSentenceTokenizer from \"./memoizedSentenceTokenizer\";\n\n// Character classes.\nconst newLines = \"\\n\\r\|\\n\|\\r\";\n\n// Regular expressions.\nconst newLineRegex = new RegExp( newLines );\nconst paragraphTagsRegex = new RegExp( \"^(<p>\|</p>)$\" );\n\n/*\n Returns sentences in a string.\n \n @param {String} text The string to count sentences in.\n * @param {function} memoizedTokenizer The memoized sentence tokenizer.\n \n @returns {Array} Sentences found in the text.\n /\nexport default function( text, memoizedTokenizer = defaultSentenceTokenizer ) {\n\t// We don't remove the other HTML tags here since removing them might lead to incorrect results when running the sentence tokenizer.\n\t// Unify only non-breaking spaces and not the other whitespaces since a whitespace could signify a sentence break or a new line.\n\ttext = unifyNonBreakingSpace( text );\n\t/\n\t * Remove images from text before tokenizing it into sentences.\n\t * This is necessary since the highlighting feature doesn't work if the yoastmark tags are enclosing a sentence starting with an image.\n\t * This step is done here so that applying highlight in captions is possible for all assessments that use this helper.\n\t /\n\ttext = text.replace( imageRegex, \"\" );\n\n\tlet blocks = getBlocks( text );\n\n\t// Split each block on newlines.\n\tblocks = flatMap( blocks, function( block ) {\n\t\treturn block.split( newLineRegex );\n\t} );\n\n\t/\n\t * Filter blocks that contain only paragraph tags. This step is necessary\n\t * since switching between editors might add extra paragraph tags with a new line tag in the end\n\t * that are incorrectly converted into separate blocks.\n\t /\n\tblocks = blocks.filter( block => ! paragraphTagsRegex.test( block ) );\n\n\t/\n\t * We use the `map` method followed by `flat` instead of `flatMap` because `flatMap` would override the second\n\t * argument of the memoizedTokenizer with the index of the iteratee.\n\t /\n\tlet sentences = blocks.map( block => memoizedTokenizer( block ) ).flat();\n\n\t/\n\t * Strip block tags from the start and/or the end of each sentence and whitespaces if present.\n\t * After tokenized, sometimes there are still block tags present in the beginning/end of a sentence.\n\t * Unstripped, these tags could potentially break the highlighting functionality.\n\t */\n\tsentences = sentences.map( sentence => stripBlockTagsAtStartEnd( sentence ).trim() );\n\n\treturn filter( sentences, negate( isEmpty ) );\n}\n"],"mappings":"AAAA;AACA,SAASA,MAAM,EAAEC,OAAO,EAAEC,OAAO,EAAEC,MAAM,QAAQ,QAAQ;;AAEzD;AACA,SAASC,SAAS;AAClB,SAASC,UAAU;AACnB,SAASC,wBAAwB;AACjC,SAASC,qBAAqB;AAC9B,OAAOC,wBAAwB;;AAE/B;AACA,MAAMC,QAAQ,GAAG,YAAY;;AAE7B;AACA,MAAMC,YAAY,GAAG,IAAIC,MAAM,CAAEF,QAAS,CAAC;AAC3C,MAAMG,kBAAkB,GAAG,IAAID,MAAM,CAAE,cAAe,CAAC;;AAEvD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAAUE,IAAI,EAAEC,iBAAiB,GAAGN,wBAAwB,EAAG;EAC7E;EACA;EACAK,IAAI,GAAGN,qBAAqB,CAAEM,IAAK,CAAC;EACpC;AACD;AACA;AACA;AACA;EACCA,IAAI,GAAGA,IAAI,CAACE,OAAO,CAAEV,UAAU,EAAE,EAAG,CAAC;EAErC,IAAIW,MAAM,GAAGZ,SAAS,CAAES,IAAK,CAAC;;EAE9B;EACAG,MAAM,GAAGf,OAAO,CAAEe,MAAM,EAAE,UAAUC,KAAK,EAAG;IAC3C,OAAOA,KAAK,CAACC,KAAK,CAAER,YAAa,CAAC;EACnC,CAAE,CAAC;;EAEH;AACD;AACA;AACA;AACA;EACCM,MAAM,GAAGA,MAAM,CAAChB,MAAM,CAAEiB,KAAK,IAAI,CAAEL,kBAAkB,CAACO,IAAI,CAAEF,KAAM,CAAE,CAAC;;EAErE;AACD;AACA;AACA;EACC,IAAIG,SAAS,GAAGJ,MAAM,CAACK,GAAG,CAAEJ,KAAK,IAAIH,iBAAiB,CAAEG,KAAM,CAAE,CAAC,CAACK,IAAI,CAAC,CAAC;;EAExE;AACD;AACA;AACA;AACA;EACCF,SAAS,GAAGA,SAAS,CAACC,GAAG,CAAEE,QAAQ,IAAIjB,wBAAwB,CAAEiB,QAAS,CAAC,CAACC,IAAI,CAAC,CAAE,CAAC;EAEpF,OAAOxB,MAAM,CAAEoB,SAAS,EAAEjB,MAAM,CAAED,OAAQ,CAAE,CAAC;AAC9C","ignoreList":[]}

package/build/languageProcessing/helpers/sentence/getSentencesFromTree.js CHANGED Viewed

@@ -1,10 +1,3 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", {
-  value: true
-});
-exports.default = _default;
-exports.getParentNode = getParentNode;
 /**
  * Retrieves the start offset for a given node.
  * @param {Node} node The current node.
@@ -20,7 +13,7 @@ function getStartOffset(node) {
  * @param {Node} 	node 	The current node.
  * @returns {Node} The parent node.
  */
-function getParentNode(paper, node) {
+export function getParentNode(paper, node) {
   // Includes a fallback so that if a parent node cannot be found for an implicit paragraph, we use the current node as the parent node.
   return paper.getTree().findAll(treeNode => treeNode.childNodes && treeNode.childNodes.includes(node))[0] || node;
 }
@@ -33,7 +26,7 @@ function getParentNode(paper, node) {
  *
  * @returns {Sentence[]} The array of sentences retrieved from paragraph and heading nodes plus sourceCodeLocation of the parent node.
  */
-function _default(paper) {
+export default function (paper) {
   // Get all nodes that have a sentence property which is not an empty array.
   const tree = paper.getTree().findAll(treeNode => !!treeNode.sentences);
   return tree.flatMap(node => node.sentences.map(sentence => {

package/build/languageProcessing/helpers/sentence/getSentencesFromTree.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"getSentencesFromTree.js","names":["getStartOffset","node","sourceCodeLocation","startTag","endOffset","startOffset","getParentNode","paper","getTree","findAll","treeNode","childNodes","includes","~~_default","~~tree","sentences","flatMap","map","sentence","parentNode","isImplicit","parentStartOffset","parentClientId","clientId","parentAttributeId","attributeId","isParentFirstSectionOfBlock","isFirstSection"],"sources":["../../../../src/languageProcessing/helpers/sentence/getSentencesFromTree.js"],"sourcesContent":["/*\n Retrieves the start offset for a given node.\n * @param {Node} node The current node.\n * @returns {number} The start offset.\n /\nfunction getStartOffset( node ) {\n\treturn node.sourceCodeLocation &&\n\t\t( ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) \|\| node.sourceCodeLocation.startOffset ) \|\| 0;\n}\n\n/\n Retrieves the parent node for a given node.\n * @param {Paper} \tpaper \tThe current paper.\n * @param {Node} \tnode \tThe current node.\n * @returns {Node} The parent node.\n /\nexport function getParentNode( paper, node ) {\n\t// Includes a fallback so that if a parent node cannot be found for an implicit paragraph, we use the current node as the parent node.\n\treturn paper.getTree().findAll( treeNode => treeNode.childNodes && treeNode.childNodes.includes( node ) )[ 0 ] \|\| node;\n}\n\n/\n Gets all the sentences from paragraph and heading nodes.\n * These two node types are the nodes that should contain sentences for the analysis.\n \n @param {Paper} paper The paper to get the sentences from.\n \n @returns {Sentence[]} The array of sentences retrieved from paragraph and heading nodes plus sourceCodeLocation of the parent node.\n */\nexport default function( paper ) {\n\t// Get all nodes that have a sentence property which is not an empty array.\n\tconst tree = paper.getTree().findAll( treeNode => !! treeNode.sentences );\n\n\treturn tree.flatMap( node => node.sentences.map( sentence => {\n\t\tlet parentNode = node;\n\n\t\t// For implicit paragraphs, base the details on the parent of this node.\n\t\tif ( node.isImplicit ) {\n\t\t\tparentNode = getParentNode( paper, node );\n\t\t}\n\n\t\treturn {\n\t\t\t...sentence,\n\t\t\t// The parent node's start offset is the start offset of the parent node if it doesn't have a `startTag` property.\n\t\t\tparentStartOffset: getStartOffset( parentNode ),\n\t\t\t// The block client id of the parent node.\n\t\t\tparentClientId: parentNode.clientId \|\| \"\",\n\t\t\t// The attribute id of the parent node, if available, otherwise an empty string.\n\t\t\t// Only used for position-based highlighting in sub-blocks of Yoast blocks.\n\t\t\tparentAttributeId: node.attributeId \|\| \"\",\n\t\t\t// Whether the parent node is the first section of Yoast sub-blocks. Only used for position-based highlighting.\n\t\t\tisParentFirstSectionOfBlock: node.isFirstSection \|\| false,\n\t\t};\n\t} ) );\n}\n"],"mappings":"~~;;;;;;;~~AAAA;AACA;AACA;AACA;AACA;AACA,SAASA,cAAcA,CAAEC,IAAI,EAAG;EAC/B,OAAOA,IAAI,CAACC,kBAAkB,KACzBD,IAAI,CAACC,kBAAkB,CAACC,QAAQ,IAAIF,IAAI,CAACC,kBAAkB,CAACC,QAAQ,CAACC,SAAS,IAAMH,IAAI,CAACC,kBAAkB,CAACG,WAAW,CAAE,IAAI,CAAC;AACpI;;AAEA;AACA;AACA;AACA;AACA;AACA;~~AACO~~,SAASC,aAAaA,CAAEC,KAAK,EAAEN,IAAI,EAAG;EAC5C;EACA,OAAOM,KAAK,CAACC,OAAO,CAAC,CAAC,CAACC,OAAO,CAAEC,QAAQ,IAAIA,QAAQ,CAACC,UAAU,IAAID,QAAQ,CAACC,UAAU,CAACC,QAAQ,CAAEX,IAAK,CAAE,CAAC,CAAE,CAAC,CAAE,IAAIA,IAAI;AACvH;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;~~AACe~~,~~SAAAY~~,~~SAAUN~~,KAAK,EAAG;EAChC;EACA,~~MAAMO~~,IAAI,~~GAAGP~~,KAAK,CAACC,OAAO,CAAC,CAAC,CAACC,OAAO,CAAEC,QAAQ,IAAI,CAAC,CAAEA,QAAQ,~~CAACK~~,SAAU,CAAC;EAEzE,OAAOD,IAAI,CAACE,OAAO,~~CAAEf~~,IAAI,IAAIA,IAAI,~~CAACc~~,SAAS,CAACE,GAAG,CAAEC,QAAQ,IAAI;IAC5D,IAAIC,UAAU,~~GAAGlB~~,IAAI;;IAErB;IACA,IAAKA,IAAI,~~CAACmB~~,UAAU,EAAG;MACtBD,UAAU,~~GAAGb~~,aAAa,CAAEC,KAAK,EAAEN,IAAK,CAAC;IAC1C;IAEA,OAAO;MACN,~~GAAGiB~~,QAAQ;MACX;MACAG,iBAAiB,~~EAAErB~~,cAAc,~~CAAEmB~~,UAAW,CAAC;MAC/C;MACAG,cAAc,EAAEH,UAAU,CAACI,QAAQ,IAAI,EAAE;MACzC;MACA;MACAC,iBAAiB,~~EAAEvB~~,IAAI,~~CAACwB~~,WAAW,IAAI,EAAE;MACzC;MACAC,2BAA2B,~~EAAEzB~~,IAAI,~~CAAC0B~~,cAAc,IAAI;IACrD,CAAC;EACF,CAAE,CAAE,CAAC;AACN","ignoreList":[]}
1	+ {"version":3,"file":"getSentencesFromTree.js","names":["getStartOffset","node","sourceCodeLocation","startTag","endOffset","startOffset","getParentNode","paper","getTree","findAll","treeNode","childNodes","includes","tree","sentences","flatMap","map","sentence","parentNode","isImplicit","parentStartOffset","parentClientId","clientId","parentAttributeId","attributeId","isParentFirstSectionOfBlock","isFirstSection"],"sources":["../../../../src/languageProcessing/helpers/sentence/getSentencesFromTree.js"],"sourcesContent":["/*\n Retrieves the start offset for a given node.\n * @param {Node} node The current node.\n * @returns {number} The start offset.\n /\nfunction getStartOffset( node ) {\n\treturn node.sourceCodeLocation &&\n\t\t( ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) \|\| node.sourceCodeLocation.startOffset ) \|\| 0;\n}\n\n/\n Retrieves the parent node for a given node.\n * @param {Paper} \tpaper \tThe current paper.\n * @param {Node} \tnode \tThe current node.\n * @returns {Node} The parent node.\n /\nexport function getParentNode( paper, node ) {\n\t// Includes a fallback so that if a parent node cannot be found for an implicit paragraph, we use the current node as the parent node.\n\treturn paper.getTree().findAll( treeNode => treeNode.childNodes && treeNode.childNodes.includes( node ) )[ 0 ] \|\| node;\n}\n\n/\n Gets all the sentences from paragraph and heading nodes.\n * These two node types are the nodes that should contain sentences for the analysis.\n \n @param {Paper} paper The paper to get the sentences from.\n \n @returns {Sentence[]} The array of sentences retrieved from paragraph and heading nodes plus sourceCodeLocation of the parent node.\n */\nexport default function( paper ) {\n\t// Get all nodes that have a sentence property which is not an empty array.\n\tconst tree = paper.getTree().findAll( treeNode => !! treeNode.sentences );\n\n\treturn tree.flatMap( node => node.sentences.map( sentence => {\n\t\tlet parentNode = node;\n\n\t\t// For implicit paragraphs, base the details on the parent of this node.\n\t\tif ( node.isImplicit ) {\n\t\t\tparentNode = getParentNode( paper, node );\n\t\t}\n\n\t\treturn {\n\t\t\t...sentence,\n\t\t\t// The parent node's start offset is the start offset of the parent node if it doesn't have a `startTag` property.\n\t\t\tparentStartOffset: getStartOffset( parentNode ),\n\t\t\t// The block client id of the parent node.\n\t\t\tparentClientId: parentNode.clientId \|\| \"\",\n\t\t\t// The attribute id of the parent node, if available, otherwise an empty string.\n\t\t\t// Only used for position-based highlighting in sub-blocks of Yoast blocks.\n\t\t\tparentAttributeId: node.attributeId \|\| \"\",\n\t\t\t// Whether the parent node is the first section of Yoast sub-blocks. Only used for position-based highlighting.\n\t\t\tisParentFirstSectionOfBlock: node.isFirstSection \|\| false,\n\t\t};\n\t} ) );\n}\n"],"mappings":"AAAA;AACA;AACA;AACA;AACA;AACA,SAASA,cAAcA,CAAEC,IAAI,EAAG;EAC/B,OAAOA,IAAI,CAACC,kBAAkB,KACzBD,IAAI,CAACC,kBAAkB,CAACC,QAAQ,IAAIF,IAAI,CAACC,kBAAkB,CAACC,QAAQ,CAACC,SAAS,IAAMH,IAAI,CAACC,kBAAkB,CAACG,WAAW,CAAE,IAAI,CAAC;AACpI;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA,OAAO,SAASC,aAAaA,CAAEC,KAAK,EAAEN,IAAI,EAAG;EAC5C;EACA,OAAOM,KAAK,CAACC,OAAO,CAAC,CAAC,CAACC,OAAO,CAAEC,QAAQ,IAAIA,QAAQ,CAACC,UAAU,IAAID,QAAQ,CAACC,UAAU,CAACC,QAAQ,CAAEX,IAAK,CAAE,CAAC,CAAE,CAAC,CAAE,IAAIA,IAAI;AACvH;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe,UAAUM,KAAK,EAAG;EAChC;EACA,MAAMM,IAAI,GAAGN,KAAK,CAACC,OAAO,CAAC,CAAC,CAACC,OAAO,CAAEC,QAAQ,IAAI,CAAC,CAAEA,QAAQ,CAACI,SAAU,CAAC;EAEzE,OAAOD,IAAI,CAACE,OAAO,CAAEd,IAAI,IAAIA,IAAI,CAACa,SAAS,CAACE,GAAG,CAAEC,QAAQ,IAAI;IAC5D,IAAIC,UAAU,GAAGjB,IAAI;;IAErB;IACA,IAAKA,IAAI,CAACkB,UAAU,EAAG;MACtBD,UAAU,GAAGZ,aAAa,CAAEC,KAAK,EAAEN,IAAK,CAAC;IAC1C;IAEA,OAAO;MACN,GAAGgB,QAAQ;MACX;MACAG,iBAAiB,EAAEpB,cAAc,CAAEkB,UAAW,CAAC;MAC/C;MACAG,cAAc,EAAEH,UAAU,CAACI,QAAQ,IAAI,EAAE;MACzC;MACA;MACAC,iBAAiB,EAAEtB,IAAI,CAACuB,WAAW,IAAI,EAAE;MACzC;MACAC,2BAA2B,EAAExB,IAAI,CAACyB,cAAc,IAAI;IACrD,CAAC;EACF,CAAE,CAAE,CAAC;AACN","ignoreList":[]}

package/build/languageProcessing/helpers/sentence/memoizedSentenceTokenizer.js CHANGED Viewed

@@ -1,12 +1,6 @@
-"use strict";
+import SentenceTokenizer from "./SentenceTokenizer";
+import { memoize } from "lodash";
-Object.defineProperty(exports, "__esModule", {
-  value: true
-});
-exports.default = void 0;
-var _SentenceTokenizer = _interopRequireDefault(require("./SentenceTokenizer"));
-var _lodash = require("lodash");
-function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
 /**
  * Returns the sentences from a certain text.
  *
@@ -16,7 +10,7 @@ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e
  * @returns {Array<string>} The list of sentences in the text.
  */
 function getSentenceTokenizer(text, trimSentences = true) {
-  const sentenceTokenizer = new _SentenceTokenizer.default();
+  const sentenceTokenizer = new SentenceTokenizer();
   const {
     tokenizer,
     tokens
@@ -33,5 +27,5 @@ function getSentenceTokenizer(text, trimSentences = true) {
  * We want to re-run the getSentenceTokenizer function also when only the second argument changes to prevent cache collisions.
  * @see https://lodash.com/docs/4.17.15#memoize
  */
-var _default = exports.default = (0, _lodash.memoize)(getSentenceTokenizer, (...args) => JSON.stringify(args));
+export default memoize(getSentenceTokenizer, (...args) => JSON.stringify(args));
 //# sourceMappingURL=memoizedSentenceTokenizer.js.map

package/build/languageProcessing/helpers/sentence/memoizedSentenceTokenizer.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"memoizedSentenceTokenizer.js","names":["~~_SentenceTokenizer~~","~~_interopRequireDefault~~","~~require","_lodash","e","__esModule","default","~~getSentenceTokenizer","text","trimSentences","sentenceTokenizer","~~SentenceTokenizer","~~tokenizer","tokens","createTokenizer","tokenize","length","getSentencesFromTokens","~~_default","exports","memoize","~~args","JSON","stringify"],"sources":["../../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer.js"],"sourcesContent":["import SentenceTokenizer from \"./SentenceTokenizer\";\nimport { memoize } from \"lodash\";\n\n/*\n Returns the sentences from a certain text.\n \n @param {string} text \t\t\t\t\tThe text to retrieve sentences from.\n * @param {boolean} [trimSentences=true] \tWhether to trim whitespace from the beginning and end of the sentences or not.\n \n @returns {Array<string>} The list of sentences in the text.\n /\nfunction getSentenceTokenizer( text, trimSentences = true ) {\n\tconst sentenceTokenizer = new SentenceTokenizer();\n\tconst { tokenizer, tokens } = sentenceTokenizer.createTokenizer();\n\tsentenceTokenizer.tokenize( tokenizer, text );\n\n\treturn ( tokens.length === 0 ? [] : sentenceTokenizer.getSentencesFromTokens( tokens, trimSentences ) );\n}\n\n/\n * The second argument to the memoize function is a so-called resolver function.\n * It creates a cache key consisting of a combination of all arguments to a function.\n * This is needed because by default, only the first argument to a function is used as the map cache key by the memoize function.\n * This means that a function is only re-run if the value of the first argument changes.\n * We want to re-run the getSentenceTokenizer function also when only the second argument changes to prevent cache collisions.\n * @see https://lodash.com/docs/4.17.15#memoize\n */\nexport default memoize( getSentenceTokenizer, ( ...args ) => JSON.stringify( args ) );\n"],"mappings":"~~;;;;;;~~AAAA,~~IAAAA~~,~~kBAAA,GAAAC,sBAAA,CAAAC,OAAA~~;~~AACA~~,~~IAAAC~~,~~OAAA~~,~~GAAAD~~,~~OAAA~~;~~AAAiC,SAAAD,uBAAAG,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AAEjC;~~AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,~~SAASG~~,oBAAoBA,CAAEC,IAAI,EAAEC,aAAa,GAAG,IAAI,EAAG;EAC3D,MAAMC,iBAAiB,GAAG,~~IAAIC~~,~~0BAAiB~~,CAAC,CAAC;EACjD,MAAM;~~IAAEC~~,SAAS;IAAEC;EAAO,CAAC,~~GAAGH~~,iBAAiB,~~CAACI~~,eAAe,CAAC,CAAC;~~EACjEJ~~,iBAAiB,~~CAACK~~,QAAQ,CAAEH,SAAS,~~EAAEJ~~,IAAK,CAAC;EAE7C,~~OAASK~~,MAAM,CAACG,MAAM,KAAK,CAAC,GAAG,EAAE,~~GAAGN~~,iBAAiB,~~CAACO~~,sBAAsB,CAAEJ,MAAM,~~EAAEJ~~,aAAc,CAAC;AACtG;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;~~AAPA~~,~~IAAAS~~,~~QAAA~~,~~GAAAC~~,~~OAAA,CAAAb,OAAA,GAQe,IAAAc,eAAO,EAAEb,~~oBAAoB,EAAE,CAAE,~~GAAGc~~,IAAI,KAAMC,IAAI,CAACC,SAAS,CAAEF,IAAK,CAAE,CAAC","ignoreList":[]}
1	+ {"version":3,"file":"memoizedSentenceTokenizer.js","names":["SentenceTokenizer","memoize","getSentenceTokenizer","text","trimSentences","sentenceTokenizer","tokenizer","tokens","createTokenizer","tokenize","length","getSentencesFromTokens","args","JSON","stringify"],"sources":["../../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer.js"],"sourcesContent":["import SentenceTokenizer from \"./SentenceTokenizer\";\nimport { memoize } from \"lodash\";\n\n/*\n Returns the sentences from a certain text.\n \n @param {string} text \t\t\t\t\tThe text to retrieve sentences from.\n * @param {boolean} [trimSentences=true] \tWhether to trim whitespace from the beginning and end of the sentences or not.\n \n @returns {Array<string>} The list of sentences in the text.\n /\nfunction getSentenceTokenizer( text, trimSentences = true ) {\n\tconst sentenceTokenizer = new SentenceTokenizer();\n\tconst { tokenizer, tokens } = sentenceTokenizer.createTokenizer();\n\tsentenceTokenizer.tokenize( tokenizer, text );\n\n\treturn ( tokens.length === 0 ? [] : sentenceTokenizer.getSentencesFromTokens( tokens, trimSentences ) );\n}\n\n/\n * The second argument to the memoize function is a so-called resolver function.\n * It creates a cache key consisting of a combination of all arguments to a function.\n * This is needed because by default, only the first argument to a function is used as the map cache key by the memoize function.\n * This means that a function is only re-run if the value of the first argument changes.\n * We want to re-run the getSentenceTokenizer function also when only the second argument changes to prevent cache collisions.\n * @see https://lodash.com/docs/4.17.15#memoize\n */\nexport default memoize( getSentenceTokenizer, ( ...args ) => JSON.stringify( args ) );\n"],"mappings":"AAAA,OAAOA,iBAAiB;AACxB,SAASC,OAAO,QAAQ,QAAQ;;AAEhC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASC,oBAAoBA,CAAEC,IAAI,EAAEC,aAAa,GAAG,IAAI,EAAG;EAC3D,MAAMC,iBAAiB,GAAG,IAAIL,iBAAiB,CAAC,CAAC;EACjD,MAAM;IAAEM,SAAS;IAAEC;EAAO,CAAC,GAAGF,iBAAiB,CAACG,eAAe,CAAC,CAAC;EACjEH,iBAAiB,CAACI,QAAQ,CAAEH,SAAS,EAAEH,IAAK,CAAC;EAE7C,OAASI,MAAM,CAACG,MAAM,KAAK,CAAC,GAAG,EAAE,GAAGL,iBAAiB,CAACM,sBAAsB,CAAEJ,MAAM,EAAEH,aAAc,CAAC;AACtG;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAeH,OAAO,CAAEC,oBAAoB,EAAE,CAAE,GAAGU,IAAI,KAAMC,IAAI,CAACC,SAAS,CAAEF,IAAK,CAAE,CAAC","ignoreList":[]}

package/build/languageProcessing/helpers/sentence/sentencesLength.js CHANGED Viewed

@@ -1,13 +1,7 @@
-"use strict";
+import wordCount from "../word/countWords.js";
+import { forEach } from "lodash";
+import { stripFullTags as stripHTMLTags } from "../sanitize/stripHTMLTags.js";
-Object.defineProperty(exports, "__esModule", {
-  value: true
-});
-exports.default = _default;
-var _countWords = _interopRequireDefault(require("../word/countWords.js"));
-var _lodash = require("lodash");
-var _stripHTMLTags = require("../sanitize/stripHTMLTags.js");
-function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
 /**
  * Returns an array with the length of each sentence.
  *
@@ -16,14 +10,14 @@ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e
  *
  * @returns {Array} Array with the length of each sentence.
  */
-function _default(sentences, researcher) {
+export default function (sentences, researcher) {
   const sentencesWordCount = [];
-  (0, _lodash.forEach)(sentences, function (sentence) {
+  forEach(sentences, function (sentence) {
     // For counting words we want to omit the HTMLtags.
-    const strippedSentence = (0, _stripHTMLTags.stripFullTags)(sentence);
+    const strippedSentence = stripHTMLTags(sentence);
     // A helper to count characters for languages that don't count number of words for text length.
     const countCharacters = researcher.getHelper("customCountLength");
-    const length = countCharacters ? countCharacters(strippedSentence) : (0, _countWords.default)(strippedSentence);
+    const length = countCharacters ? countCharacters(strippedSentence) : wordCount(strippedSentence);
     if (length <= 0) {
       return;
     }