rita 3.0.22 → 3.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/rita.js CHANGED
@@ -845,32 +845,32 @@ var Tokenizer = class {
845
845
  * Returns an array containing all unique alphabetical words (tokens) in the text.
846
846
  * Punctuation and case are ignored unless specified otherwise.
847
847
  * @param {string} text - The text from which to extract the tokens
848
- * @param {object} [opts] - The options
849
- * @param {boolean} opts.caseSensitive=false - Whether to pay attention to case
850
- * @param {boolean} opts.ignoreStopWords=false - Whether to ignore words like 'the', 'and', 'a', 'of', etc, as specified in RiTa.STOP_WORDS
851
- * @param {boolean} opts.splitContractions=false - Whether to convert contractions (e.g., "I'd" or "she'll") into multiple individual tokens
852
- * @param {boolean} opts.includePunct=false - Whether to include punctuation in the results
853
- * @param {boolean} opts.sort=false - Whether to sort the tokens before returning them
848
+ * @param {object} [options] - The options
849
+ * @param {boolean} [options.caseSensitive=false] - Whether to pay attention to case
850
+ * @param {boolean} [options.ignoreStopWords=false] - Whether to ignore words like 'the', 'and', 'a', 'of', etc, as specified in RiTa.STOP_WORDS
851
+ * @param {boolean} [options.splitContractions=false] - Whether to convert contractions (e.g., "I'd" or "she'll") into multiple individual tokens
852
+ * @param {boolean} [options.includePunct=false] - Whether to include punctuation in the results
853
+ * @param {boolean} [options.sort=false] - Whether to sort the tokens before returning them
854
854
  * @returns {string[]} Array of tokens
855
855
  */
856
- tokens(text, opts = {
856
+ tokens(text, options = {
857
857
  caseSensitive: false,
858
858
  ignoreStopWords: false,
859
859
  splitContractions: false,
860
860
  includePunct: false,
861
861
  sort: false
862
862
  }) {
863
- let words = this.tokenize(text, opts), map = {};
863
+ let words = this.tokenize(text, options), map = {};
864
864
  words.forEach((w) => {
865
- if (!opts.caseSensitive)
865
+ if (!options.caseSensitive)
866
866
  w = w.toLowerCase();
867
- if (opts.includePunct || ALPHA_RE.test(w))
867
+ if (options.includePunct || ALPHA_RE.test(w))
868
868
  map[w] = 1;
869
869
  });
870
870
  let tokens = Object.keys(map);
871
- if (opts.ignoreStopWords)
871
+ if (options.ignoreStopWords)
872
872
  tokens = tokens.filter((t) => !this.RiTa.isStopWord(t));
873
- return opts.sort ? tokens.sort() : tokens;
873
+ return options.sort ? tokens.sort() : tokens;
874
874
  }
875
875
  tokenize(input, opts = {
876
876
  // regex: null,
@@ -41591,8 +41591,7 @@ var Analyzer = class {
41591
41591
  return features;
41592
41592
  }
41593
41593
  computePhones(word, opts) {
41594
- if (!this.lts)
41595
- this.lts = new rita_lts_default(this.RiTa);
41594
+ this.lts = this.lts || new rita_lts_default(this.RiTa);
41596
41595
  return this.lts.buildPhones(word, opts);
41597
41596
  }
41598
41597
  phonesToStress(phones) {
@@ -41609,8 +41608,7 @@ var Analyzer = class {
41609
41608
  return stress;
41610
41609
  }
41611
41610
  analyzeWord(word, opts = {}) {
41612
- let RiTa2 = this.RiTa;
41613
- let result = RiTa2.CACHING && this.cache[word];
41611
+ let result = this.RiTa.CACHING && this.cache[word];
41614
41612
  if (typeof result === "undefined") {
41615
41613
  let slash = "/", delim = "-";
41616
41614
  let lex = this.RiTa.lexicon;
@@ -41639,7 +41637,7 @@ var Analyzer = class {
41639
41637
  }
41640
41638
  result = { phones, stresses, syllables };
41641
41639
  Object.keys(result).forEach((k) => result[k] = result[k].trim());
41642
- if (RiTa2.CACHING)
41640
+ if (this.RiTa.CACHING)
41643
41641
  this.cache[word] = result;
41644
41642
  }
41645
41643
  return result;
@@ -41926,15 +41924,29 @@ var randgen_default = SeededRandom;
41926
41924
  import { parse, stringify } from "@ungap/structured-clone/json";
41927
41925
  var _RiMarkov = class _RiMarkov {
41928
41926
  // RiTa
41929
- constructor(n, opts = {}) {
41927
+ /**
41928
+ * Creates a new RiMarkov object with functions for text-generation and other probabilistic functions,
41929
+ * via Markov chains (or n-grams) with options to process words or tokens split by arbitrary regular expressions.
41930
+ * @param {number} [n] - the n-gram size (an integer >= 2)
41931
+ * @param {object} [options={}] - options for the model
41932
+ * @param {string|string[]} [options.text] - a text string, or array of sentences, to add to the model (same as via model.addText()
41933
+ * @param {boolean} [options.trace] - output trace info to the console
41934
+ * @param {number} [options.maxLengthMatch] - # of words allowed in result to match a sequence in the input, default=∞
41935
+ * @param {number} [options.maxAttempts=999] - max attempts before to complete one ore more generations before erroring, default=999
41936
+ * @param {function} [options.tokenize] - custom tokenizer with tokenize() method, defaults to RiTa.tokenize()
41937
+ * @param {function} [options.untokenize] - custom untokenizer with untokenize() method, defaults to RiTa.untokenize()
41938
+ * @param {boolean} [options.disableInputChecks=false] - if true, allow result to be present in the input, default
41939
+ * @memberof RiMarkov
41940
+ */
41941
+ constructor(n, options = {}) {
41930
41942
  this.n = n;
41931
41943
  this.root = new Node(null, "ROOT");
41932
- this.trace = opts.trace;
41933
- this.mlm = opts.maxLengthMatch;
41934
- this.maxAttempts = opts.maxAttempts || 999;
41935
- this.tokenize = opts.tokenize || _RiMarkov.parent.tokenize;
41936
- this.untokenize = opts.untokenize || _RiMarkov.parent.untokenize;
41937
- this.disableInputChecks = opts.disableInputChecks;
41944
+ this.trace = options.trace;
41945
+ this.mlm = options.maxLengthMatch;
41946
+ this.maxAttempts = options.maxAttempts || 999;
41947
+ this.tokenize = options.tokenize || _RiMarkov.parent.tokenize;
41948
+ this.untokenize = options.untokenize || _RiMarkov.parent.untokenize;
41949
+ this.disableInputChecks = options.disableInputChecks;
41938
41950
  this.sentenceStarts = [];
41939
41951
  this.sentenceEnds = /* @__PURE__ */ new Set();
41940
41952
  if (this.n < 2)
@@ -41943,9 +41955,16 @@ var _RiMarkov = class _RiMarkov {
41943
41955
  throw Error("maxLengthMatch must be >= N");
41944
41956
  if (!this.disableInputChecks || this.mlm)
41945
41957
  this.input = [];
41946
- if (opts.text)
41947
- this.addText(opts.text);
41958
+ if (options.text)
41959
+ this.addText(options.text);
41948
41960
  }
41961
+ /**
41962
+ * Loads text into the model. If a raw string is provided, it will be split into sentences
41963
+ * via RiTa.sentences(). If an array is provided, each string will be treated as an individual sentence.
41964
+ * @param {string|string[]} text - a text string, or array of sentences, to add to the model
41965
+ * @param {number} [multiplier=1] - number of times to add the text to the model
41966
+ * @return {RiMarkov} - the RiMarkov instance
41967
+ */
41949
41968
  addText(text, multiplier = 1) {
41950
41969
  let sents = Array.isArray(text) ? text : _RiMarkov.parent.sentences(text);
41951
41970
  let wrap, allWords = [];
@@ -41963,19 +41982,32 @@ var _RiMarkov = class _RiMarkov {
41963
41982
  this.input.push(allWords[i]);
41964
41983
  }
41965
41984
  }
41985
+ return this;
41966
41986
  }
41967
- generate(count, opts = {}) {
41987
+ /**
41988
+ * Generates `count` joined sentences from the model.
41989
+ * @param {number} [count=1] - the number of sentences to generate (default=1)
41990
+ * @param {object} [options={}] - options for the generation
41991
+ * @param {number} [options.minLength=5] - minimum length of each sentence
41992
+ * @param {number} [options.maxLength=35] - maximum length of each sentence
41993
+ * @param {number} [options.temperature=1] - temperature acts as a knob to adjust the probability that input elements will be selected for the output. At higher values, infrequent words are more likely to be chosen, while at lower values the most frequent inputs are more likely to be output. If no value is provided, then tokens are chosen according to their relative frequency in the input.
41994
+ * @param {boolean} [options.allowDuplicates=false] - if true, allow duplicate sentences in the output
41995
+ * @param {string|string[]} [options.seed] - a seed string or array of tokens to start the generation
41996
+ * @param {boolean} [options.trace] - output trace info to the console
41997
+ * @return {string[]} - the generated sentences
41998
+ */
41999
+ generate(count, options = {}) {
41968
42000
  if (arguments.length === 1 && typeof count === "object") {
41969
- opts = count;
42001
+ options = count;
41970
42002
  count = 1;
41971
42003
  }
41972
42004
  const num = count || 1;
41973
- const minLength = opts.minLength || 5;
41974
- const maxLength = opts.maxLength || 35;
41975
- if (typeof opts.temperature !== "undefined" && opts.temperature <= 0) {
42005
+ const minLength = options.minLength || 5;
42006
+ const maxLength = options.maxLength || 35;
42007
+ if (typeof options.temperature !== "undefined" && options.temperature <= 0) {
41976
42008
  throw Error("Temperature option must be greater than 0");
41977
42009
  }
41978
- let tries = 0, tokens = [], usedStarts = [];
42010
+ let tries = 0, tokens = [];
41979
42011
  let minIdx = 0, sentenceIdxs = [];
41980
42012
  let markedNodes = [];
41981
42013
  const unmarkNodes = () => {
@@ -42014,7 +42046,7 @@ var _RiMarkov = class _RiMarkov {
42014
42046
  return false;
42015
42047
  }
42016
42048
  let flatSent = this.untokenize(sentence);
42017
- if (!opts.allowDuplicates && isSubArray(sentence, tokens.slice(0, sentIdx))) {
42049
+ if (!options.allowDuplicates && isSubArray(sentence, tokens.slice(0, sentIdx))) {
42018
42050
  fail("duplicate (pop: " + next.token + ")");
42019
42051
  return false;
42020
42052
  }
@@ -42101,7 +42133,7 @@ var _RiMarkov = class _RiMarkov {
42101
42133
  return len ? sentenceIdxs[len - 1] : 0;
42102
42134
  };
42103
42135
  const selectStart = () => {
42104
- let seed = opts.seed;
42136
+ let seed = options.seed;
42105
42137
  if (seed && seed.length) {
42106
42138
  if (typeof seed === "string")
42107
42139
  seed = this.tokenize(seed);
@@ -42131,7 +42163,7 @@ var _RiMarkov = class _RiMarkov {
42131
42163
  continue;
42132
42164
  }
42133
42165
  let parent = this._pathTo(tokens);
42134
- let next = this._selectNext(parent, opts.temperature, tokens, notMarked);
42166
+ let next = this._selectNext(parent, options.temperature, tokens, notMarked);
42135
42167
  if (!next) {
42136
42168
  fail("mlm-fail(" + this.mlm + ")", this._flatten(tokens), true);
42137
42169
  continue;
@@ -42152,11 +42184,20 @@ var _RiMarkov = class _RiMarkov {
42152
42184
  let str = this.untokenize(tokens.map((t) => t.token)).trim();
42153
42185
  return num > 1 ? this._splitEnds(str) : str;
42154
42186
  }
42187
+ /**
42188
+ * Converts the model to a JSON-formatted string for storage or serialization
42189
+ * @return {string} - the JSON string
42190
+ */
42155
42191
  toJSON() {
42156
42192
  let data = Object.keys(this).reduce((acc, k) => Object.assign(acc, { [k]: this[k] }), {});
42157
42193
  data.sentenceEnds = [...data.sentenceEnds];
42158
42194
  return stringify(data);
42159
42195
  }
42196
+ /**
42197
+ * Creates a new model from one previously saved as JSON
42198
+ * @param {string} json - the JSON string to load
42199
+ * @return {RiMarkov} - the RiMarkov instance
42200
+ */
42160
42201
  static fromJSON(json) {
42161
42202
  let parsed = parse(json);
42162
42203
  let rm = Object.assign(new _RiMarkov(), parsed);
@@ -42167,7 +42208,13 @@ var _RiMarkov = class _RiMarkov {
42167
42208
  populate(rm.root = new Node(null, "ROOT"), jsonRoot);
42168
42209
  return rm;
42169
42210
  }
42170
- /* returns array of possible tokens after pre and (optionally) before post */
42211
+ /**
42212
+ * Returns array of possible tokens after pre and (optionally) before post. If only one array parameter is provided, this function returns all possible next words, ordered by probability, for the given array.
42213
+ * If two arrays are provided, it returns an unordered list of possible words w that complete the n-gram consisting of: pre[0]...pre[k], w, post[k+1]...post[n].
42214
+ * @param {string[]} pre - the list of tokens preceding the completion
42215
+ * @param {string[]} [post] - the (optional) list of tokens following the completion
42216
+ * @return {string[]} - an unordered list of possible next tokens
42217
+ */
42171
42218
  completions(pre, post) {
42172
42219
  let tn, result = [];
42173
42220
  if (post) {
@@ -42191,8 +42238,14 @@ var _RiMarkov = class _RiMarkov {
42191
42238
  }
42192
42239
  return result;
42193
42240
  }
42194
- /* return an object mapping {string -> prob} */
42195
- probabilities(path, temp) {
42241
+ /**
42242
+ * Returns the full set of possible next tokens as a object, mapping tokens to probabilities,
42243
+ * given an array of tokens representing the path down the tree (with length less than `n`).
42244
+ * @param {string|string[]} path - the path to the node as a string or an array of tokens
42245
+ * @param {number} [temperature=1] - temperature acts as a knob to adjust the probability that input elements will be selected for the output. At higher values, infrequent words are more likely to be chosen, while at lower values the most frequent inputs are more likely to be output. If no value is provided, then tokens are chosen according to their relative frequency in the input.
42246
+ * @return {object} - a map of tokens to probabilities
42247
+ */
42248
+ probabilities(path, temperature) {
42196
42249
  if (!Array.isArray(path))
42197
42250
  path = this.tokenize(path);
42198
42251
  const probs = {};
@@ -42200,11 +42253,17 @@ var _RiMarkov = class _RiMarkov {
42200
42253
  if (parent) {
42201
42254
  const children = parent.childNodes();
42202
42255
  const weights = children.map((n) => n.count);
42203
- const pdist = _RiMarkov.parent.randomizer.ndist(weights, temp);
42256
+ const pdist = _RiMarkov.parent.randomizer.ndist(weights, temperature);
42204
42257
  children.forEach((c, i) => probs[c.token] = pdist[i]);
42205
42258
  }
42206
42259
  return probs;
42207
42260
  }
42261
+ /**
42262
+ * Returns either the raw (unigram) probability for a single token in the model (0 if it does not exist), OR
42263
+ * the probability of a sequence of K tokens where K is less than `n` (0 if the sequence does not exist).
42264
+ * @param {string|string[]} data - the token or array of tokens to check
42265
+ * @return {number} - the probability of the token or sequence
42266
+ */
42208
42267
  probability(data) {
42209
42268
  let p = 0;
42210
42269
  if (data && data.length) {
@@ -42214,10 +42273,20 @@ var _RiMarkov = class _RiMarkov {
42214
42273
  }
42215
42274
  return p;
42216
42275
  }
42276
+ /**
42277
+ * Returns a string representation of the model or a subtree of the model, optionally ordered by probability.
42278
+ * @param {object} root - the root node of the subtree to print
42279
+ * @param {boolean} sort - if true, sort the nodes by probability
42280
+ * @return {string} - the string representation of the model
42281
+ */
42217
42282
  toString(root, sort) {
42218
42283
  root = root || this.root;
42219
42284
  return root.asTree(sort).replace(/{}/g, "");
42220
42285
  }
42286
+ /**
42287
+ * Returns the number of tokens currently in the model.
42288
+ * @return {number} - number of tokens
42289
+ */
42221
42290
  size() {
42222
42291
  return this.root.childCount(true);
42223
42292
  }
@@ -42490,14 +42559,13 @@ var MULTI_SP_RE = / +/g;
42490
42559
  var markov_default = RiMarkov;
42491
42560
 
42492
42561
  // src/rita.js
42493
- import { RiScript } from "riscript";
42494
- var { Grammar: RiGrammar } = RiScript;
42562
+ import { RiScript, RiGrammar } from "riscript";
42495
42563
  var RiTa = class _RiTa {
42496
42564
  /**
42497
42565
  * Create a RiTa grammar instance
42498
- * @param {object} rules - the rules of the grammar
42499
- * @param {object} context - the context of the grammar
42500
- * @returns {object} - a new RiGrammar instance // TODO: fix return type -> RiGrammar
42566
+ * @param {object} [rules] - the rules of the grammar
42567
+ * @param {object} [context] - the context of the grammar
42568
+ * @returns {RiGrammar} - a new RiGrammar instance
42501
42569
  */
42502
42570
  static grammar(rules, context) {
42503
42571
  return new RiGrammar(rules, context);
@@ -42535,9 +42603,9 @@ var RiTa = class _RiTa {
42535
42603
  /**
42536
42604
  * Evaluates the input script via the RiScript parser
42537
42605
  * @param {string} script - the script to evaluate
42538
- * @param {object} context - the context to evaluate the script ing
42606
+ * @param {object} [context] - the context to evaluate the script ing
42539
42607
  * @param {object} [options] - options for the evaluation
42540
- * @param {boolean} options.trace - whether to trace the evaluation
42608
+ * @param {boolean} [options.trace] - whether to trace the evaluation
42541
42609
  * @returns {string} the result of the evaluation
42542
42610
  */
42543
42611
  static evaluate(script, context, options) {
@@ -42547,6 +42615,13 @@ var RiTa = class _RiTa {
42547
42615
  * Creates a new RiMarkov object
42548
42616
  * @param {number} n - an int representing the n-factor of the markov chain
42549
42617
  * @param {object} [options] - options for the markov chain
42618
+ * @param {string|string[]} [options.text] - a text string, or array of sentences, to add to the model (same as via model.addText()
42619
+ * @param {number} [options.maxLengthMatch] - # of words allowed in result to match a sequence in the input, default=∞
42620
+ * @param {number} [options.maxAttempts=999] - max attempts before to complete one ore more generations before erroring, default=999
42621
+ * @param {function} [options.tokenize] - custom tokenizer with tokenize() method, defaults to RiTa.tokenize()
42622
+ * @param {function} [options.untokenize] - custom untokenizer with untokenize() method, defaults to RiTa.untokenize()
42623
+ * @param {boolean} [options.disableInputChecks=false] - if true, allow result to be present in the input, default
42624
+ * @param {boolean} [options.trace] - output trace info to the console
42550
42625
  * @returns {RiMarkov}
42551
42626
  */
42552
42627
  static markov(n, options) {
@@ -42557,9 +42632,9 @@ var RiTa = class _RiTa {
42557
42632
  * @overload
42558
42633
  * @param {string} keyword
42559
42634
  * @param {object} [options]
42560
- * @param {number} options.numWords - the number of words to include in the context
42561
- * @param {string} options.text - the text as input for the KWIC model
42562
- * @param {string[]} options.words - the array of words to be used as input for the KWIC model
42635
+ * @param {number} [options.numWords] - the number of words to include in the context
42636
+ * @param {string} [options.text] - the text as input for the KWIC model
42637
+ * @param {string[]} [options.words] - the array of words to be used as input for the KWIC model
42563
42638
  * @returns {string[]} all the occurrences of the keyword in the model, each with no more
42564
42639
  * than 'numWords' words of context on either side
42565
42640
  * @overload
@@ -42575,11 +42650,11 @@ var RiTa = class _RiTa {
42575
42650
  * Creates a concordance, a list of words with their frequency of occurence, from the given text and options.
42576
42651
  * @param {string} text - the text from which to create the concordance
42577
42652
  * @param {object} [options] - options for the concordance
42578
- * @param {boolean} options.ignoreCase=false - whether to ignore case when creating the concordance
42579
- * @param {boolean} options.ignoreStopWords=false - whether to ignore stop words like
42653
+ * @param {boolean} [options.ignoreCase=false] - whether to ignore case when creating the concordance
42654
+ * @param {boolean} [options.ignoreStopWords=false] - whether to ignore stop words like
42580
42655
  * 'the', 'and', 'a', 'of', etc, as specified in RiTa.STOP_WORDS
42581
- * @param {boolean} options.ignorePunctuation=false - whether to ignore punctuation when creating the concordance
42582
- * @param {string[]} options.wordsToIgnore=null - words to ignore when creating the concordance (alternate stop-words)
42656
+ * @param {boolean} [options.ignorePunctuation=false] - whether to ignore punctuation when creating the concordance
42657
+ * @param {string[]} [options.wordsToIgnore=null] - words to ignore when creating the concordance (alternate stop-words)
42583
42658
  * @returns {object} the concordance, an object with words as keys and frequencies as values
42584
42659
  */
42585
42660
  static concordance(text, options) {
@@ -42641,14 +42716,14 @@ var RiTa = class _RiTa {
42641
42716
  * (length, syllable-count, phonemic pattern, stress pattern, part-of-speech, etc.).
42642
42717
  * @param {(string|RegExp)} [pattern] - the pattern to match
42643
42718
  * @param {object} [options]
42644
- * @param {number} options.minLength=4 - the minimum length of the word
42645
- * @param {number} options.maxLength=-1 - the maximum length of the word
42646
- * @param {number} options.numSyllables=null - the number of syllables in the word
42647
- * @param {number} options.limit=10 - the maximum number of results to retur
42648
- * @param {string} options.pos=null - the part-of-speech of the word to return,
42719
+ * @param {number} [options.minLength=4] - the minimum length of the word
42720
+ * @param {number} [options.maxLength=-1] - the maximum length of the word
42721
+ * @param {number} [options.numSyllables=null] - the number of syllables in the word
42722
+ * @param {number} [options.limit=10] - the maximum number of results to retur
42723
+ * @param {string} [options.pos=null] - the part-of-speech of the word to return,
42649
42724
  * either from the Penn tag set or the simplified tag set [a, r, v, n]
42650
- * @param {RegExp} options.pattern=null - the spelling or phonemic pattern to match
42651
- * @param {string} options.type=null - the type of regex or string pattern to match,
42725
+ * @param {RegExp} [options.pattern=null] - the spelling or phonemic pattern to match
42726
+ * @param {string} [options.type=null] - the type of regex or string pattern to match,
42652
42727
  * options are 'stresses' or 'phones' or 'letters' (the default)
42653
42728
  * @returns {string} a random word matching the criteria in the options object
42654
42729
  */
@@ -42660,12 +42735,12 @@ var RiTa = class _RiTa {
42660
42735
  * their final stressed vowel and all following phonemes are identical.
42661
42736
  * @param {string} word
42662
42737
  * @param {object} [options]
42663
- * @param {number} options.minLength=4 - the minimum length of the words
42664
- * @param {number} options.maxLength - the maximum length of the words
42665
- * @param {number} options.numSyllables - the number of syllables in the words
42666
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
42667
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
42668
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
42738
+ * @param {number} [options.minLength=4] - the minimum length of the words
42739
+ * @param {number} [options.maxLength] - the maximum length of the words
42740
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42741
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42742
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42743
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
42669
42744
  * or the simplified tag set [a, r, v, n]
42670
42745
  * @returns {Promise<string[]>} an array of rhymes that match criteria in the options object
42671
42746
  */
@@ -42687,12 +42762,12 @@ var RiTa = class _RiTa {
42687
42762
  * of each word in the lexicon via a minimum-edit-distance metric.
42688
42763
  * @param {string} word
42689
42764
  * @param {object} [options]
42690
- * @param {number} options.minLength=4 - the minimum length of the words
42691
- * @param {number} options.maxLength - the maximum length of the words
42692
- * @param {number} options.numSyllables - the number of syllables in the words
42693
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
42694
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
42695
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
42765
+ * @param {number} [options.minLength=4] - the minimum length of the words
42766
+ * @param {number} [options.maxLength] - the maximum length of the words
42767
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42768
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42769
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42770
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
42696
42771
  * or the simplified tag set [a, r, v, n]
42697
42772
  * @returns {Promise<string[]>} an array of alliterations matching criteria in the options object
42698
42773
  */
@@ -42703,7 +42778,7 @@ var RiTa = class _RiTa {
42703
42778
  * Returns true if the word is in the lexicon, else false
42704
42779
  * @param {string} word - the word to check
42705
42780
  * @param {object} [options] - options for the search
42706
- * @param {boolean} options.noDerivations=false - whether to ignore derivations and only search for raw words
42781
+ * @param {boolean} [options.noDerivations=false] - whether to ignore derivations and only search for raw words
42707
42782
  * @returns {boolean} true if the word is in the lexicon, else false
42708
42783
  */
42709
42784
  static hasWord(word, options) {
@@ -42713,7 +42788,7 @@ var RiTa = class _RiTa {
42713
42788
  * Returns true if the word is an abbreviation, else false
42714
42789
  * @param {string} input - the word to check
42715
42790
  * @param {object} [options] - options for the search
42716
- * @param {boolean} options.caseSensitive=false - whether to ignore case when checking for abbreviations
42791
+ * @param {boolean} [options.caseSensitive=false] - whether to ignore case when checking for abbreviations
42717
42792
  * @returns {boolean} true if the word is an abbreviation, else false
42718
42793
  */
42719
42794
  static isAbbrev(input, options) {
@@ -42739,12 +42814,12 @@ var RiTa = class _RiTa {
42739
42814
  * to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
42740
42815
  * @param {string} word - the word to match
42741
42816
  * @param {object} [options] - options for the search
42742
- * @param {number} options.minLength=4 - the minimum length of the words
42743
- * @param {number} options.maxLength - the maximum length of the words
42744
- * @param {number} options.numSyllables - the number of syllables in the words
42745
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
42746
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
42747
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set or the simplified tag set [a, r, v, n]
42817
+ * @param {number} [options.minLength=4] - the minimum length of the words
42818
+ * @param {number} [options.maxLength] - the maximum length of the words
42819
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42820
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42821
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42822
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set or the simplified tag set [a, r, v, n]
42748
42823
  * @returns {Promise<string[]>} an array of words matching the spelling pattern and criteria in the options object
42749
42824
  */
42750
42825
  static async spellsLike(word, options) {
@@ -42755,13 +42830,13 @@ var RiTa = class _RiTa {
42755
42830
  * to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
42756
42831
  * @param {string} word - the word to match
42757
42832
  * @param {object} [options] - options for the search
42758
- * @param {number} options.minLength=4 - the minimum length of the words
42759
- * @param {number} options.maxLength - the maximum length of the words
42760
- * @param {number} options.numSyllables - the number of syllables in the words
42761
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
42762
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
42763
- * @param {boolean} options.matchSpelling=false, if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
42764
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
42833
+ * @param {number} [options.minLength=4] - the minimum length of the words
42834
+ * @param {number} [options.maxLength] - the maximum length of the words
42835
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42836
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42837
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42838
+ * @param {boolean} [options.matchSpelling=false] if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
42839
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
42765
42840
  * or the simplified tag set [a, r, v, n]
42766
42841
  * @returns {Promise<string[]>} an array of words matching the phonemic pattern and criteria in the options object
42767
42842
  */
@@ -42773,7 +42848,7 @@ var RiTa = class _RiTa {
42773
42848
  * from the Penn tag set or the simplified tag set [a, r, v, n].
42774
42849
  * @param {(string|string[])} word - the word or words to tag
42775
42850
  * @param {object} [options] - options for the tagging
42776
- * @param {boolean} options.simple - use simple tags (noun=n,verb=v,adverb=a,adjective=r)
42851
+ * @param {boolean} [options.simple] - use simple tags (noun=n,verb=v,adverb=a,adjective=r)
42777
42852
  * @returns {string|string[]} - an array of part-of-speech tags for each word in the input
42778
42853
  */
42779
42854
  static pos(word, options) {
@@ -42830,7 +42905,7 @@ var RiTa = class _RiTa {
42830
42905
  * Tags the input string with part-of-speech tags, either from the Penn tag set or the simplified tag set [a, r, v, n].
42831
42906
  * @param {string} sentence - the sentence to tag
42832
42907
  * @param {object} [options] - options for the tagging
42833
- * @param {boolean} options.simple=false - use the simplified tag set [a, r, v, n]
42908
+ * @param {boolean} [options.simple=false] - use the simplified tag set [a, r, v, n]
42834
42909
  * @returns {string} the tagged sentence
42835
42910
  */
42836
42911
  static posInline(sentence, options) {
@@ -42857,14 +42932,14 @@ var RiTa = class _RiTa {
42857
42932
  * spelling, phonemes, stresses, part-of-speech, etc. If no regex or options are supplied, the full set of words is returned.
42858
42933
  * @param {(string|RegExp)} [pattern] - the pattern to match
42859
42934
  * @param {object} [options] - options for the search
42860
- * @param {number} options.minLength=4 - the minimum length of the words
42861
- * @param {number} options.maxLength - the maximum length of the words
42862
- * @param {number} options.numSyllables - the number of syllables in the words
42863
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
42864
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
42865
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
42935
+ * @param {number} [options.minLength=4] - the minimum length of the words
42936
+ * @param {number} [options.maxLength] - the maximum length of the words
42937
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42938
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42939
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42940
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
42866
42941
  * or the simplified tag set [a, r, v, n]
42867
- * @param {string} options.type - the type of regex or string pattern to match, options are 'stresses'
42942
+ * @param {string} [options.type] - the type of regex or string pattern to match, options are 'stresses'
42868
42943
  * or 'phones' or 'letters' (the default)
42869
42944
  * @returns {Promise<string[]>} an array of words matching the criteria in both the pattern and the options object
42870
42945
  */
@@ -42876,13 +42951,13 @@ var RiTa = class _RiTa {
42876
42951
  * Punctuation and case are ignored unless specified otherwise.
42877
42952
  * @param {string} text - The text from which to extract the tokens
42878
42953
  * @param {object} [options] - The options
42879
- * @param {boolean} options.caseSensitive=false - Whether to pay attention to case
42880
- * @param {boolean} options.ignoreStopWords=false - Whether to ignore words such as 'the', 'and', 'a', 'of', etc,
42954
+ * @param {boolean} [options.caseSensitive=false] - Whether to pay attention to case
42955
+ * @param {boolean} [options.ignoreStopWords=false] - Whether to ignore words such as 'the', 'and', 'a', 'of', etc,
42881
42956
  * as specified in RiTa.STOP_WORDS
42882
- * @param {boolean} options.splitContractions=false - Whether to convert contractions
42957
+ * @param {boolean} [options.splitContractions=false] - Whether to convert contractions
42883
42958
  * (e.g., "I'd" or "she'll") into multiple individual tokens
42884
- * @param {boolean} options.includePunct=false - Whether to include punctuation in the results
42885
- * @param {boolean} options.sort=false - Whether to sort the tokens before returning them
42959
+ * @param {boolean} [options.includePunct=false] - Whether to include punctuation in the results
42960
+ * @param {boolean} [options.sort=false] - Whether to sort the tokens before returning them
42886
42961
  * @returns {string[]} Array of tokens
42887
42962
  */
42888
42963
  static tokens(text, options = {
@@ -42898,10 +42973,10 @@ var RiTa = class _RiTa {
42898
42973
  * Tokenizes an input string into words, according to the Penn Treebank conventions
42899
42974
  * @param {string} input - The text to tokenize
42900
42975
  * @param {object} [options] - The options
42901
- * @param {RegExp} options.regex=null - An optional custom regex to split on
42902
- * @param {boolean} options.splitHyphens=false - Whether to split hyphenated words
42976
+ * @param {RegExp} [options.regex=null] - An optional custom regex to split on
42977
+ * @param {boolean} [options.splitHyphens=false] - Whether to split hyphenated words
42903
42978
  * (e.g., "mother-in-law") into multiple individual tokens
42904
- * @param {boolean} options.splitContractions=false - Whether to split contractions
42979
+ * @param {boolean} [options.splitContractions=false] - Whether to split contractions
42905
42980
  * (e.g., "I'd" or "she'll") into multiple individual tokens
42906
42981
  * @returns {string[]} Array of tokens
42907
42982
  */
@@ -42912,7 +42987,7 @@ var RiTa = class _RiTa {
42912
42987
  * Joins an array (of words and punctuation) into a sentence, according to
42913
42988
  * the Penn Treebank conventions. The inverse of RiTa.tokenize().
42914
42989
  * @param {string[]} input - The array of words to join
42915
- * @param {string} delim=' ' - The delimiter to use between words, or a space by default
42990
+ * @param {string} [delim=' '] - The delimiter to use between words, or a space by default
42916
42991
  * @returns {string} The joined sentence
42917
42992
  */
42918
42993
  static untokenize(input, delim = " ") {
@@ -42964,14 +43039,14 @@ var RiTa = class _RiTa {
42964
43039
  * Conjugates the 'verb' according to the specified options (tense, person, number, etc.)
42965
43040
  * @param {string} verbWord
42966
43041
  * @param {object} [options]
42967
- * @param {number} options.tense - the tense of the verb, either RiTa.PAST, RiTa.PRESENT, or RiTa.FUTURE
42968
- * @param {number} options.person - the person of the verb, either RiTa.FIRST, RiTa.SECOND, or RiTa.THIRD
42969
- * @param {number} options.number - the number of the verb, either RiTa.SINGULAR or RiTa.PLURAL
42970
- * @param {number} options.form - the form of the verb, either RiTa.INFINITIVE or RiTa.GERUND
42971
- * @param {boolean} options.passive - whether the verb should be passive
42972
- * @param {boolean} options.progressive - whether the verb should be progressive
42973
- * @param {boolean} options.perfect - whether the verb should be perfect
42974
- * @param {boolean} options.interrogative - whether the verb should be in interrogative form
43042
+ * @param {number} [options.tense] - the tense of the verb, either RiTa.PAST, RiTa.PRESENT, or RiTa.FUTURE
43043
+ * @param {number} [options.person] - the person of the verb, either RiTa.FIRST, RiTa.SECOND, or RiTa.THIRD
43044
+ * @param {number} [options.number] - the number of the verb, either RiTa.SINGULAR or RiTa.PLURAL
43045
+ * @param {number} [options.form] - the form of the verb, either RiTa.INFINITIVE or RiTa.GERUND
43046
+ * @param {boolean} [options.passive] - whether the verb should be passive
43047
+ * @param {boolean} [options.progressive] - whether the verb should be progressive
43048
+ * @param {boolean} [options.perfect] - whether the verb should be perfect
43049
+ * @param {boolean} [options.interrogative] - whether the verb should be in interrogative form
42975
43050
  * @returns {string} the conjugated verb
42976
43051
  */
42977
43052
  static conjugate(verbWord, options) {
@@ -42980,6 +43055,7 @@ var RiTa = class _RiTa {
42980
43055
  /**
42981
43056
  * Analyzes the input and returns a new string containing the stresses for each syllable of the input text .
42982
43057
  * @param {string} input - the text to analyze
43058
+ * @param {object} [options] - options for the analysis
42983
43059
  * @returns {string} a string containing the stresses for each syllable of the input text
42984
43060
  */
42985
43061
  static stresses(input, options) {
@@ -42988,6 +43064,7 @@ var RiTa = class _RiTa {
42988
43064
  /**
42989
43065
  * Analyzes the input and returns a new string containing the syllables of the input text.
42990
43066
  * @param {string} input - the text to analyze
43067
+ * @param {object} [options] - options for the analysis
42991
43068
  * @returns {string} a string containing the syllables of the input text
42992
43069
  */
42993
43070
  static syllables(input, options) {
@@ -42996,6 +43073,7 @@ var RiTa = class _RiTa {
42996
43073
  /**
42997
43074
  * Analyzes the input and returns a new string containing the phonemes of the input text.
42998
43075
  * @param {string} input - the text to analyze
43076
+ * @param {object} [options] - options for the analysis
42999
43077
  * @returns {string} a string containing the phones of the input text
43000
43078
  */
43001
43079
  static phones(input, options) {
@@ -43006,7 +43084,7 @@ var RiTa = class _RiTa {
43006
43084
  * including phonemes, syllables, stresses, and part-of-speech tags.
43007
43085
  * @param {string} input - the text to analyze
43008
43086
  * @param {object} [options] - options for the analysis
43009
- * @param {boolean} options.simple=false - whether to use the simplified tag set [a, r, v, n]
43087
+ * @param {boolean} [options.simple=false] - whether to use the simplified tag set [a, r, v, n]
43010
43088
  * @returns {object} an object containing the features of the input text (phones, syllables, stresses, pos), or the features inline
43011
43089
  */
43012
43090
  static analyze(input, options) {
@@ -43019,13 +43097,13 @@ var RiTa = class _RiTa {
43019
43097
  * returning the set of closest matches that also match the criteria in the options object.
43020
43098
  * @param {string} word - the word to match
43021
43099
  * @param {object} [options] - options for the search
43022
- * @param {number} options.minLength=4 - the minimum length of the words
43023
- * @param {number} options.maxLength - the maximum length of the words
43024
- * @param {number} options.numSyllables - the number of syllables in the words
43025
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
43026
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
43100
+ * @param {number} [options.minLength=4] - the minimum length of the words
43101
+ * @param {number} [options.maxLength] - the maximum length of the words
43102
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43103
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43104
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43027
43105
  * or the simplified tag set [a, r, v, n]
43028
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
43106
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43029
43107
  * @return {string[]} an array of words matching the spelling pattern and criteria in the options object
43030
43108
  */
43031
43109
  static spellsLikeSync(word, options) {
@@ -43036,13 +43114,13 @@ var RiTa = class _RiTa {
43036
43114
  * to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
43037
43115
  * @param {string} word - the word to match
43038
43116
  * @param {object} [options] - options for the search
43039
- * @param {number} options.minLength=4 - the minimum length of the words
43040
- * @param {number} options.maxLength - the maximum length of the words
43041
- * @param {number} options.numSyllables - the number of syllables in the words
43042
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
43043
- * @param {boolean} options.matchSpelling=false, if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
43044
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
43045
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
43117
+ * @param {number} [options.minLength=4] - the minimum length of the words
43118
+ * @param {number} [options.maxLength] - the maximum length of the words
43119
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43120
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43121
+ * @param {boolean} [options.matchSpelling=false] if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
43122
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43123
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43046
43124
  * or the simplified tag set [a, r, v, n]
43047
43125
  * @return {string[]} an array of words matching the phonemic pattern and criteria in the options object
43048
43126
  */
@@ -43054,12 +43132,12 @@ var RiTa = class _RiTa {
43054
43132
  * Two words are considered as rhyming if their final stressed vowel and all following phonemes are identical.
43055
43133
  * @param {string} word - the word to match
43056
43134
  * @param {object} [options] - options for the search
43057
- * @param {number} options.minLength=4 - the minimum length of the words
43058
- * @param {number} options.maxLength - the maximum length of the words
43059
- * @param {number} options.numSyllables - the number of syllables in the words
43060
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
43061
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
43062
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
43135
+ * @param {number} [options.minLength=4] - the minimum length of the words
43136
+ * @param {number} [options.maxLength] - the maximum length of the words
43137
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43138
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43139
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43140
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43063
43141
  * or the simplified tag set [a, r, v, n]
43064
43142
  * @return {string[]} an array of rhymes that match criteria in the options object
43065
43143
  */
@@ -43072,14 +43150,14 @@ var RiTa = class _RiTa {
43072
43150
  * part-of-speech, etc.
43073
43151
  * @param {(string|RegExp)} [pattern] - the pattern to match
43074
43152
  * @param {object} [options] - options for the search
43075
- * @param {number} options.minLength=4 - the minimum length of the words
43076
- * @param {number} options.maxLength - the maximum length of the words
43077
- * @param {number} options.numSyllables - the number of syllables in the words
43078
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
43079
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
43080
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
43153
+ * @param {number} [options.minLength=4] - the minimum length of the words
43154
+ * @param {number} [options.maxLength] - the maximum length of the words
43155
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43156
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43157
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43158
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43081
43159
  * or the simplified tag set [a, r, v, n]
43082
- * @param {string} options.type - the type of regex or string pattern to match, options are 'stresses' or 'phones' or 'letters' (the default)
43160
+ * @param {string} [options.type] - the type of regex or string pattern to match, options are 'stresses' or 'phones' or 'letters' (the default)
43083
43161
  * @return {string[]} an array of words matching the criteria in both the pattern and the options object
43084
43162
  */
43085
43163
  static searchSync(pattern, options) {
@@ -43090,12 +43168,12 @@ var RiTa = class _RiTa {
43090
43168
  * of the input string to those of each word in the lexicon via a minimum-edit-distance metric.
43091
43169
  * @param {string} word - the word to match
43092
43170
  * @param {object} [options] - options for the search
43093
- * @param {number} options.minLength=4 - the minimum length of the words
43094
- * @param {number} options.maxLength - the maximum length of the words
43095
- * @param {number} options.numSyllables - the number of syllables in the words
43096
- * @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
43097
- * @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
43098
- * @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
43171
+ * @param {number} [options.minLength=4] - the minimum length of the words
43172
+ * @param {number} [options.maxLength] - the maximum length of the words
43173
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43174
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43175
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43176
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43099
43177
  * or the simplified tag set [a, r, v, n]
43100
43178
  * @return {string[]} an array of alliterations matching criteria in the options object
43101
43179
  */
@@ -43143,7 +43221,7 @@ markov_default.parent = RiTa;
43143
43221
  stemmer_default.tokenizer = RiTa.tokenizer;
43144
43222
  RiTa.SILENT = false;
43145
43223
  RiTa.SILENCE_LTS = false;
43146
- RiTa.VERSION = "3.0.22";
43224
+ RiTa.VERSION = "3.0.23";
43147
43225
  RiTa.FIRST = 1;
43148
43226
  RiTa.SECOND = 2;
43149
43227
  RiTa.THIRD = 3;
@@ -43170,9 +43248,9 @@ RiTa.GERUND = 2;
43170
43248
  RiTa.SPLIT_CONTRACTIONS = false;
43171
43249
  var ONLY_PUNCT = /^[\p{P}|\+|-|<|>|\^|\$|\ufffd|`]*$/u;
43172
43250
  var IS_LETTER = /^[a-z\u00C0-\u00ff]+$/;
43173
- RiTa.RiScript = RiScript;
43174
43251
  RiTa.riscript = new RiScript({ RiTa });
43175
43252
  export {
43253
+ markov_default as RiMarkov,
43176
43254
  RiTa
43177
43255
  };
43178
43256
  //# sourceMappingURL=rita.js.map