rita 3.0.21 → 3.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/rita.cjs CHANGED
@@ -796,9 +796,9 @@ var _Stemmer = class _Stemmer {
796
796
  if (!input.includes(" ")) {
797
797
  return _Stemmer.stemEnglish(input);
798
798
  }
799
- const words = _Stemmer.parent.tokenize(input);
799
+ const words = _Stemmer.tokenizer.tokenize(input);
800
800
  const stems = _Stemmer.stemAll(words);
801
- return _Stemmer.parent.untokenize(stems);
801
+ return _Stemmer.tokenizer.untokenize(stems);
802
802
  }
803
803
  static stemAll(input) {
804
804
  return input.map((i) => _Stemmer.stemEnglish(i));
@@ -830,6 +830,7 @@ var _Stemmer = class _Stemmer {
830
830
  return _Stemmer.impl.getCurrent();
831
831
  }
832
832
  };
833
+ __publicField(_Stemmer, "tokenizer");
833
834
  __publicField(_Stemmer, "impl", new SnowballStemmer());
834
835
  var Stemmer = _Stemmer;
835
836
  var stemmer_default = Stemmer;
@@ -840,57 +841,51 @@ var Tokenizer = class {
840
841
  this.RiTa = parent;
841
842
  this.splitter = /(\S.+?[.!?]["\u201D]?)(?=\s+|$)/g;
842
843
  }
843
- tokens(text, opts = {}) {
844
- let words = this.tokenize(text, opts), map = {};
844
+ /**
845
+ * Returns an array containing all unique alphabetical words (tokens) in the text.
846
+ * Punctuation and case are ignored unless specified otherwise.
847
+ * @param {string} text - The text from which to extract the tokens
848
+ * @param {object} [options] - The options
849
+ * @param {boolean} [options.caseSensitive=false] - Whether to pay attention to case
850
+ * @param {boolean} [options.ignoreStopWords=false] - Whether to ignore words like 'the', 'and', 'a', 'of', etc, as specified in RiTa.STOP_WORDS
851
+ * @param {boolean} [options.splitContractions=false] - Whether to convert contractions (e.g., "I'd" or "she'll") into multiple individual tokens
852
+ * @param {boolean} [options.includePunct=false] - Whether to include punctuation in the results
853
+ * @param {boolean} [options.sort=false] - Whether to sort the tokens before returning them
854
+ * @returns {string[]} Array of tokens
855
+ */
856
+ tokens(text, options = {
857
+ caseSensitive: false,
858
+ ignoreStopWords: false,
859
+ splitContractions: false,
860
+ includePunct: false,
861
+ sort: false
862
+ }) {
863
+ let words = this.tokenize(text, options), map = {};
845
864
  words.forEach((w) => {
846
- if (!opts.caseSensitive)
865
+ if (!options.caseSensitive)
847
866
  w = w.toLowerCase();
848
- if (opts.includePunct || ALPHA_RE.test(w))
867
+ if (options.includePunct || ALPHA_RE.test(w))
849
868
  map[w] = 1;
850
869
  });
851
870
  let tokens = Object.keys(map);
852
- if (opts.ignoreStopWords)
871
+ if (options.ignoreStopWords)
853
872
  tokens = tokens.filter((t) => !this.RiTa.isStopWord(t));
854
- return opts.sort ? tokens.sort() : tokens;
855
- }
856
- sentences(text, regex2) {
857
- if (!text || !text.length)
858
- return [text];
859
- let clean = text.replace(NL_RE, " ");
860
- let delim = "___";
861
- let re = new RegExp(delim, "g");
862
- let pattern = regex2 || this.splitter;
863
- let unescapeAbbrevs = (arr2) => {
864
- for (let i = 0; i < arr2.length; i++) {
865
- arr2[i] = arr2[i].replace(re, ".");
866
- }
867
- return arr2;
868
- };
869
- let escapeAbbrevs = (text2) => {
870
- let abbrevs = this.RiTa.ABRV;
871
- for (let i = 0; i < abbrevs.length; i++) {
872
- let abv = abbrevs[i];
873
- let idx = text2.indexOf(abv);
874
- while (idx > -1) {
875
- text2 = text2.replace(abv, abv.replace(".", delim));
876
- idx = text2.indexOf(abv);
877
- }
878
- }
879
- return text2;
880
- };
881
- let arr = escapeAbbrevs(clean).match(pattern);
882
- return arr && arr.length ? unescapeAbbrevs(arr) : [text];
873
+ return options.sort ? tokens.sort() : tokens;
883
874
  }
884
- tokenize(input, opts = {}) {
875
+ tokenize(input, opts = {
876
+ // regex: null,
877
+ // splitHyphens: false,
878
+ // splitContractions: false
879
+ }) {
885
880
  if (typeof input !== "string")
886
881
  return [];
887
882
  if (opts.regex)
888
- return input.split(regex);
883
+ return input.split(opts.regex);
889
884
  let { tags, text } = this.pushTags(input.trim());
890
885
  for (let i = 0; i < TOKENIZE_RE.length; i += 2) {
891
886
  text = text.replace(TOKENIZE_RE[i], TOKENIZE_RE[i + 1]);
892
887
  }
893
- if (opts.splitHyphen) {
888
+ if (opts.splitHyphens) {
894
889
  text = text.replace(/([a-zA-Z]+)-([a-zA-Z]+)/g, "$1 - $2");
895
890
  }
896
891
  if (this.RiTa.SPLIT_CONTRACTIONS || opts.splitContractions) {
@@ -901,11 +896,10 @@ var Tokenizer = class {
901
896
  let result = this.popTags(text.trim().split(WS_RE), tags);
902
897
  return result;
903
898
  }
904
- untokenize(arr, delim) {
899
+ untokenize(arr, delim = " ") {
905
900
  if (!arr || !Array.isArray(arr))
906
901
  return "";
907
902
  arr = this.preProcessTags(arr);
908
- delim = delim || " ";
909
903
  let nextNoSpace = false, afterQuote = false, midSentence = false;
910
904
  let withinQuote = arr.length && QUOTE_RE.test(arr[0]);
911
905
  let result = arr[0] || "";
@@ -966,6 +960,40 @@ var Tokenizer = class {
966
960
  }
967
961
  return result.trim();
968
962
  }
963
+ /**
964
+ * Split the input text into sentences according to the options
965
+ * @param {string} text - The text to split
966
+ * @param {(string|RegExp)} [regex] - An optional custom regex to split on
967
+ * @returns {string[]} An array of sentences
968
+ */
969
+ sentences(text, regex) {
970
+ if (!text || !text.length)
971
+ return [text];
972
+ let clean = text.replace(NL_RE, " ");
973
+ let delim = "___";
974
+ let re = new RegExp(delim, "g");
975
+ let pattern = regex || this.splitter;
976
+ let unescapeAbbrevs = (arr2) => {
977
+ for (let i = 0; i < arr2.length; i++) {
978
+ arr2[i] = arr2[i].replace(re, ".");
979
+ }
980
+ return arr2;
981
+ };
982
+ let escapeAbbrevs = (text2) => {
983
+ let abbrevs = this.RiTa.ABRV;
984
+ for (let i = 0; i < abbrevs.length; i++) {
985
+ let abv = abbrevs[i];
986
+ let idx = text2.indexOf(abv);
987
+ while (idx > -1) {
988
+ text2 = text2.replace(abv, abv.replace(".", delim));
989
+ idx = text2.indexOf(abv);
990
+ }
991
+ }
992
+ return text2;
993
+ };
994
+ let arr = escapeAbbrevs(clean).match(pattern);
995
+ return _optionalChain([arr, 'optionalAccess', _2 => _2.length]) ? unescapeAbbrevs(arr) : [text];
996
+ }
969
997
  pushTags(text) {
970
998
  let tags = [], tagIdx = 0;
971
999
  while (TAG_RE.test(text)) {
@@ -1335,6 +1363,7 @@ var TOKENIZE_RE = [
1335
1363
  // RS
1336
1364
  ];
1337
1365
  var CONTRACTS_RE = [
1366
+ // TODO: 'She'd have wanted' -> 'She would have wanted'
1338
1367
  /([Cc])an['\u2019]t/g,
1339
1368
  "$1an not",
1340
1369
  /([Dd])idn['\u2019]t/g,
@@ -1348,7 +1377,8 @@ var CONTRACTS_RE = [
1348
1377
  /([tT]hat)['\u2019]s/g,
1349
1378
  "$1 is",
1350
1379
  /(she|he|you|they|i)['\u2019]d/gi,
1351
- "$1 would",
1380
+ "$1 had",
1381
+ // changed from would, 12/8/23
1352
1382
  /(she|he|you|they|i)['\u2019]ll/gi,
1353
1383
  "$1 will",
1354
1384
  /n['\u2019]t /g,
@@ -1364,9 +1394,9 @@ var tokenizer_default = Tokenizer;
1364
1394
 
1365
1395
  // src/conjugator.js
1366
1396
  var RegularExpression = class {
1367
- constructor(regex2, offset, suffix) {
1368
- this.raw = regex2;
1369
- this.regex = new RegExp(regex2);
1397
+ constructor(regex, offset, suffix) {
1398
+ this.raw = regex;
1399
+ this.regex = new RegExp(regex);
1370
1400
  this.offset = offset;
1371
1401
  this.suffix = suffix || "";
1372
1402
  }
@@ -1383,7 +1413,7 @@ var RegularExpression = class {
1383
1413
  return "/" + this.raw + "/";
1384
1414
  }
1385
1415
  };
1386
- var RE = (a, b, c) => new RegularExpression(a, b, c);
1416
+ var RE = (a, b, c, _) => new RegularExpression(a, b, c);
1387
1417
  var Conjugator = class {
1388
1418
  constructor(parent) {
1389
1419
  __publicField(this, "_handleStem", function(word) {
@@ -1432,11 +1462,11 @@ var Conjugator = class {
1432
1462
  }
1433
1463
  args = this._parseArgs(args);
1434
1464
  let frontVG = TO_BE.includes(verb) ? "be" : this._handleStem(verb);
1435
- let actualModal, verbForm, conjs = [], RiTa3 = this.RiTa;
1436
- if (this.form === RiTa3.INFINITIVE) {
1465
+ let actualModal, verbForm, conjs = [], RiTa2 = this.RiTa;
1466
+ if (this.form === RiTa2.INFINITIVE) {
1437
1467
  actualModal = "to";
1438
1468
  }
1439
- if (this.tense === RiTa3.FUTURE) {
1469
+ if (this.tense === RiTa2.FUTURE) {
1440
1470
  actualModal = "will";
1441
1471
  }
1442
1472
  if (this.passive) {
@@ -1456,7 +1486,7 @@ var Conjugator = class {
1456
1486
  frontVG = null;
1457
1487
  }
1458
1488
  if (frontVG) {
1459
- if (this.form === RiTa3.GERUND) {
1489
+ if (this.form === RiTa2.GERUND) {
1460
1490
  conjs.push(this.presentPart(frontVG));
1461
1491
  } else if (this.interrogative && frontVG != "be" && conjs.length < 1) {
1462
1492
  conjs.push(frontVG);
@@ -1575,19 +1605,19 @@ var Conjugator = class {
1575
1605
  }
1576
1606
  _parseArgs(args) {
1577
1607
  this._reset();
1578
- const RiTa3 = this.RiTa;
1608
+ const RiTa2 = this.RiTa;
1579
1609
  if (typeof args === "string") {
1580
1610
  if (/^[123][SP](Pr|Pa|Fu)$/.test(args)) {
1581
1611
  let opts = {};
1582
1612
  opts.person = parseInt(args[0]);
1583
- opts.number = args[1] === "S" ? RiTa3.SINGULAR : RiTa3.PLURAL;
1613
+ opts.number = args[1] === "S" ? RiTa2.SINGULAR : RiTa2.PLURAL;
1584
1614
  let tense = args.substr(2);
1585
1615
  if (tense === "Pr")
1586
- opts.tense = RiTa3.PRESENT;
1616
+ opts.tense = RiTa2.PRESENT;
1587
1617
  if (tense === "Fu")
1588
- opts.tense = RiTa3.FUTURE;
1618
+ opts.tense = RiTa2.FUTURE;
1589
1619
  if (tense === "Pa")
1590
- opts.tense = RiTa3.PAST;
1620
+ opts.tense = RiTa2.PAST;
1591
1621
  args = opts;
1592
1622
  } else {
1593
1623
  throw Error("Invalid args: " + args);
@@ -1678,20 +1708,20 @@ var Conjugator = class {
1678
1708
  return false;
1679
1709
  }
1680
1710
  _pastTense(theVerb, pers, numb) {
1681
- const RiTa3 = this.RiTa;
1711
+ const RiTa2 = this.RiTa;
1682
1712
  if (theVerb.toLowerCase() === "be") {
1683
1713
  switch (numb) {
1684
- case RiTa3.SINGULAR:
1714
+ case RiTa2.SINGULAR:
1685
1715
  switch (pers) {
1686
- case RiTa3.FIRST:
1716
+ case RiTa2.FIRST:
1687
1717
  break;
1688
- case RiTa3.THIRD:
1718
+ case RiTa2.THIRD:
1689
1719
  return "was";
1690
- case RiTa3.SECOND:
1720
+ case RiTa2.SECOND:
1691
1721
  return "were";
1692
1722
  }
1693
1723
  break;
1694
- case RiTa3.PLURAL:
1724
+ case RiTa2.PLURAL:
1695
1725
  return "were";
1696
1726
  }
1697
1727
  }
@@ -1700,17 +1730,17 @@ var Conjugator = class {
1700
1730
  _presentTense(theVerb, person, number) {
1701
1731
  person = person || this.person;
1702
1732
  number = number || this.number;
1703
- const RiTa3 = this.RiTa;
1704
- if (person === RiTa3.THIRD && number === RiTa3.SINGULAR) {
1733
+ const RiTa2 = this.RiTa;
1734
+ if (person === RiTa2.THIRD && number === RiTa2.SINGULAR) {
1705
1735
  return this._checkRules(PRESENT_RULESET, theVerb);
1706
1736
  } else if (theVerb === "be") {
1707
- if (number === RiTa3.SINGULAR) {
1737
+ if (number === RiTa2.SINGULAR) {
1708
1738
  switch (person) {
1709
- case RiTa3.FIRST:
1739
+ case RiTa2.FIRST:
1710
1740
  return "am";
1711
- case RiTa3.SECOND:
1741
+ case RiTa2.SECOND:
1712
1742
  return "are";
1713
- case RiTa3.THIRD:
1743
+ case RiTa2.THIRD:
1714
1744
  return "is";
1715
1745
  }
1716
1746
  } else {
@@ -4787,13 +4817,13 @@ var Util = class _Util {
4787
4817
  return !isNaN(parseFloat(n)) && isFinite(n);
4788
4818
  }
4789
4819
  static numOpt(opts, name, def = 0) {
4790
- return _Util.isNum(_optionalChain([opts, 'optionalAccess', _ => _[name]])) ? opts[name] : def;
4820
+ return _Util.isNum(_optionalChain([opts, 'optionalAccess', _3 => _3[name]])) ? opts[name] : def;
4791
4821
  }
4792
4822
  };
4793
4823
  var RE2 = class {
4794
- constructor(regex2, offset, suffix) {
4795
- this.raw = regex2;
4796
- this.regex = new RegExp(regex2);
4824
+ constructor(regex, offset, suffix) {
4825
+ this.raw = regex;
4826
+ this.regex = new RegExp(regex);
4797
4827
  this.offset = offset;
4798
4828
  this.suffix = suffix || "";
4799
4829
  }
@@ -27069,20 +27099,25 @@ var Lexicon = class {
27069
27099
  async soundsLike(word, options = {}) {
27070
27100
  return this._promise(this.soundsLikeSync, [word, options]);
27071
27101
  }
27102
+ /**
27103
+ * A synchronous version of RiTa.lexicon.soundsLike().
27104
+ * @param {string} word
27105
+ * @param {object} [opts]
27106
+ * @returns {string[]} An array of words that sound like the input word
27107
+ */
27072
27108
  soundsLikeSync(word, opts = {}) {
27073
27109
  if (!word || !word.length)
27074
27110
  return [];
27075
- opts.type = "sound";
27076
- return opts.matchSpelling ? this._bySoundAndLetter(word, opts) : this._byTypeSync(word, opts);
27111
+ return opts.matchSpelling ? this._bySoundAndLetterSync(word, opts) : this._byTypeSync(word, { ...opts, type: "sound" });
27077
27112
  }
27078
- randomWord(regex2, opts) {
27079
- if (!regex2 && !opts) {
27113
+ randomWord(pattern, opts) {
27114
+ if (!pattern && !opts) {
27080
27115
  return this.RiTa.random(Object.keys(this.data));
27081
27116
  }
27082
- if (!(regex2 instanceof RegExp)) {
27083
- if (typeof regex2 === "object" && !opts) {
27084
- opts = regex2;
27085
- regex2 = void 0;
27117
+ if (!(pattern instanceof RegExp)) {
27118
+ if (typeof pattern === "object" && !opts) {
27119
+ opts = pattern;
27120
+ pattern = void 0;
27086
27121
  }
27087
27122
  }
27088
27123
  opts = opts || {};
@@ -27090,14 +27125,14 @@ var Lexicon = class {
27090
27125
  opts.shuffle = true;
27091
27126
  opts.strictPos = true;
27092
27127
  opts.minLength = util_default.numOpt(opts, "minLength", 4);
27093
- let result = this.searchSync(regex2, opts);
27128
+ let result = this.searchSync(pattern, opts);
27094
27129
  if (result.length < 1 && opts.hasOwnProperty("pos")) {
27095
27130
  opts.strictPos = false;
27096
- result = this.searchSync(regex2, opts);
27131
+ result = this.searchSync(pattern, opts);
27097
27132
  }
27098
27133
  if (result.length < 1) {
27099
27134
  ["strictPos", "shuffle", "targetPos"].forEach((k) => delete opts[k]);
27100
- throw Error("No words matching constraints:\n" + JSON.stringify(opts, 0, 2));
27135
+ throw Error("No words matching constraints:\n" + JSON.stringify(opts, void 0, 2));
27101
27136
  }
27102
27137
  return result[0];
27103
27138
  }
@@ -27108,7 +27143,7 @@ var Lexicon = class {
27108
27143
  let words = Object.keys(this.data);
27109
27144
  if (!pattern && !options)
27110
27145
  return words;
27111
- let { regex: regex2, opts } = this._parseRegex(pattern, options);
27146
+ let { regex, opts } = this._parseRegex(pattern, options);
27112
27147
  this._parseArgs(opts);
27113
27148
  if (opts.shuffle)
27114
27149
  words = this.RiTa.randomizer.shuffle(words);
@@ -27124,7 +27159,7 @@ var Lexicon = class {
27124
27159
  if (word !== words[i])
27125
27160
  data = this.data[word];
27126
27161
  }
27127
- if (!regex2 || this._regexMatch(word, data, regex2, opts.type)) {
27162
+ if (!regex || this._regexMatch(word, data, regex, opts.type)) {
27128
27163
  result.push(word);
27129
27164
  if (result.length === opts.limit)
27130
27165
  break;
@@ -27260,30 +27295,37 @@ var Lexicon = class {
27260
27295
  opts.targetPos = tpos;
27261
27296
  }
27262
27297
  _reconjugate(word, pos) {
27263
- const RiTa3 = this.RiTa;
27298
+ const RiTa2 = this.RiTa;
27264
27299
  switch (pos) {
27265
27300
  case "vbd":
27266
- return RiTa3.conjugate(word, {
27267
- number: RiTa3.SINGULAR,
27268
- person: RiTa3.FIRST,
27269
- tense: RiTa3.PAST
27301
+ return RiTa2.conjugate(word, {
27302
+ number: RiTa2.SINGULAR,
27303
+ person: RiTa2.FIRST,
27304
+ tense: RiTa2.PAST
27270
27305
  });
27271
27306
  case "vbg":
27272
- return RiTa3.presentPart(word);
27307
+ return RiTa2.presentPart(word);
27273
27308
  case "vbn":
27274
- return RiTa3.pastPart(word);
27309
+ return RiTa2.pastPart(word);
27275
27310
  case "vbp":
27276
27311
  return word;
27277
27312
  case "vbz":
27278
- return RiTa3.conjugate(word, {
27279
- number: RiTa3.SINGULAR,
27280
- person: RiTa3.THIRD,
27281
- tense: RiTa3.PRESENT
27313
+ return RiTa2.conjugate(word, {
27314
+ number: RiTa2.SINGULAR,
27315
+ person: RiTa2.THIRD,
27316
+ tense: RiTa2.PRESENT
27282
27317
  });
27283
27318
  default:
27284
27319
  throw Error("Unexpected pos: " + pos);
27285
27320
  }
27286
27321
  }
27322
+ _bySoundAndLetterSync(word, opts) {
27323
+ let bySound = this._byTypeSync(word, { ...opts, type: "sound" });
27324
+ let byLetter = this._byTypeSync(word, { ...opts, type: "letter" });
27325
+ if (bySound.length < 1 || byLetter.length < 1)
27326
+ return [];
27327
+ return this._intersect(bySound, byLetter).slice(0, opts.limit);
27328
+ }
27287
27329
  async _bySoundAndLetter(word, opts) {
27288
27330
  let types = ["sound", "letter"];
27289
27331
  let promises = types.map((type) => this._promise(this._byTypeSync, [word, { ...opts, type }]));
@@ -27307,7 +27349,8 @@ var Lexicon = class {
27307
27349
  // med for 2 strings (or 2 arrays)
27308
27350
  minEditDist(source, target) {
27309
27351
  let cost;
27310
- let i, j, matrix = [];
27352
+ let i, j;
27353
+ let matrix = [];
27311
27354
  let sI;
27312
27355
  let tJ;
27313
27356
  for (i = 0; i <= source.length; i++) {
@@ -27331,8 +27374,8 @@ var Lexicon = class {
27331
27374
  }
27332
27375
  return matrix[source.length][target.length];
27333
27376
  }
27334
- isMassNoun(w, pos) {
27335
- return w.endsWith("ness") || w.endsWith("ism") || pos.indexOf("vbg") > 0 || util_default.MASS_NOUNS.includes(w);
27377
+ isMassNoun(w) {
27378
+ return w.endsWith("ness") || w.endsWith("ism") || this.RiTa.MASS_NOUNS.includes(w);
27336
27379
  }
27337
27380
  // helpers ---------------------------------------------------------------
27338
27381
  _promise(fun, args) {
@@ -27344,44 +27387,44 @@ var Lexicon = class {
27344
27387
  }
27345
27388
  });
27346
27389
  }
27347
- _parseRegex(regex2, opts) {
27348
- if (typeof regex2 === "string") {
27390
+ _parseRegex(regex, opts) {
27391
+ if (typeof regex === "string") {
27349
27392
  if (opts && opts.type === "stresses") {
27350
- if (/^\^?[01]+\$?$/.test(regex2)) {
27351
- regex2 = regex2.replace(/([01])(?=([01]))/g, "$1/");
27393
+ if (/^\^?[01]+\$?$/.test(regex)) {
27394
+ regex = regex.replace(/([01])(?=([01]))/g, "$1/");
27352
27395
  }
27353
27396
  }
27354
- regex2 = new RegExp(regex2);
27355
- } else if (regex2 instanceof RegExp) {
27356
- } else if (typeof regex2 === "object" || regex2 === void 0 && typeof opts === "object") {
27397
+ regex = new RegExp(regex);
27398
+ } else if (regex instanceof RegExp) {
27399
+ } else if (typeof regex === "object" || regex === void 0 && typeof opts === "object") {
27357
27400
  if (!opts) {
27358
- opts = regex2;
27401
+ opts = regex;
27359
27402
  }
27360
- regex2 = opts.regex;
27361
- if (typeof regex2 === "string") {
27403
+ regex = opts.regex;
27404
+ if (typeof regex === "string") {
27362
27405
  if (opts && opts.type === "stresses") {
27363
- if (/^\^?[01]+\$?$/.test(regex2)) {
27364
- regex2 = regex2.replace(/([01])(?=([01]))/g, "$1/");
27406
+ if (/^\^?[01]+\$?$/.test(regex)) {
27407
+ regex = regex.replace(/([01])(?=([01]))/g, "$1/");
27365
27408
  }
27366
27409
  }
27367
- regex2 = new RegExp(regex2);
27410
+ regex = new RegExp(regex);
27368
27411
  }
27369
27412
  }
27370
- return { regex: regex2, opts: opts || {} };
27413
+ return { regex, opts: opts || {} };
27371
27414
  }
27372
- _regexMatch(word, data, regex2, type) {
27415
+ _regexMatch(word, data, regex, type) {
27373
27416
  if (type === "stresses") {
27374
27417
  let phones = data ? data[0] : this.rawPhones(word);
27375
27418
  let stresses = this.analyzer.phonesToStress(phones);
27376
- if (regex2.test(stresses))
27419
+ if (regex.test(stresses))
27377
27420
  return true;
27378
27421
  } else if (type === "phones") {
27379
27422
  let phones = data ? data[0] : this.rawPhones(word);
27380
27423
  phones = phones.replace(/1/g, "").replace(/ /g, "-");
27381
- if (regex2.test(phones))
27424
+ if (regex.test(phones))
27382
27425
  return true;
27383
27426
  } else {
27384
- if (regex2.test(word))
27427
+ if (regex.test(word))
27385
27428
  return true;
27386
27429
  }
27387
27430
  }
@@ -27471,6 +27514,10 @@ var lexicon_default = Lexicon;
27471
27514
 
27472
27515
  // src/tagger.js
27473
27516
  var Tagger = class {
27517
+ /**
27518
+ * Create a Tagger.
27519
+ * @param {any} parent - RiTa parent class.
27520
+ */
27474
27521
  constructor(parent) {
27475
27522
  this.RiTa = parent;
27476
27523
  }
@@ -27535,18 +27582,30 @@ var Tagger = class {
27535
27582
  }
27536
27583
  return [];
27537
27584
  }
27538
- tag(words, opts) {
27539
- let simple = opts && opts.simple;
27540
- let inline = opts && opts.inline;
27541
- let dbug = 0, result = [], choices2d = [];
27542
- if (opts && opts.dbug)
27543
- dbug = 1;
27544
- if (!words || !words.length)
27545
- return inline ? "" : [];
27546
- if (!Array.isArray(words)) {
27547
- if (!words.trim().length)
27548
- return inline ? "" : [];
27549
- words = this.RiTa.tokenizer.tokenize(words);
27585
+ /**
27586
+ * Tags an array of words with their part-of-speech
27587
+ * @param {(string|string[])} input - The input containing a word or words
27588
+ * @param {object} [opts] - options for the tagging {inline, simple}
27589
+ * @param {boolean} [opts.inline] - tags are returned inline with words
27590
+ * @param {boolean} [opts.simple] - use simple tags (noun=n,verb=v,adverb=a,adjective=r)
27591
+ * @returns {any} the pos tag(s) or string with tags inline
27592
+ */
27593
+ tag(input, opts = {
27594
+ inline: false,
27595
+ simple: false
27596
+ }) {
27597
+ let result = [], choices2d = [];
27598
+ let dbug = _optionalChain([opts, 'optionalAccess', _4 => _4.dbug]) || false;
27599
+ if (!input || !input.length)
27600
+ return opts.inline ? "" : [];
27601
+ let words;
27602
+ if (!Array.isArray(input)) {
27603
+ if (!input.trim().length) {
27604
+ return opts.inline ? "" : [];
27605
+ }
27606
+ words = this.RiTa.tokenizer.tokenize(input);
27607
+ } else {
27608
+ words = input;
27550
27609
  }
27551
27610
  for (let i = 0, l = words.length; i < l; i++) {
27552
27611
  let word = words[i];
@@ -27563,7 +27622,7 @@ var Tagger = class {
27563
27622
  }
27564
27623
  }
27565
27624
  let tags = this._applyContext(words, result, choices2d, dbug);
27566
- if (simple) {
27625
+ if (opts.simple) {
27567
27626
  for (let i = 0; i < tags.length; i++) {
27568
27627
  if (NOUNS.includes(tags[i]))
27569
27628
  tags[i] = "n";
@@ -27577,7 +27636,7 @@ var Tagger = class {
27577
27636
  tags[i] = "-";
27578
27637
  }
27579
27638
  }
27580
- return inline ? this.inlineTags(words, tags) : tags;
27639
+ return opts.inline ? this.inlineTags(words, tags) : tags;
27581
27640
  }
27582
27641
  //////////////////////////////////////////////////////////////////
27583
27642
  _isNoLexIrregularVerb(stem) {
@@ -27719,7 +27778,14 @@ var Tagger = class {
27719
27778
  console.log("\n Custom(" + i + ") tagged '" + frm + "' -> '" + to + "'\n\n");
27720
27779
  }
27721
27780
  // debug only: not available in built version since 'dbug' in tag() is 0
27722
- // Applies a customized subset of the Brill transformations
27781
+ /**
27782
+ * Applies a customized subset of the Brill transformations
27783
+ * @param {string[]} words
27784
+ * @param {string[]} result
27785
+ * @param {string[]} choices
27786
+ * @param {boolean} dbug
27787
+ * @returns
27788
+ */
27723
27789
  _applyContext(words, result, choices, dbug) {
27724
27790
  for (let i = 0, l = words.length; i < l; i++) {
27725
27791
  let word = words[i], tag = result[i];
@@ -27802,7 +27868,7 @@ var Tagger = class {
27802
27868
  let idx = result.slice(i + 1).indexOf("nn");
27803
27869
  let allJJ = true;
27804
27870
  for (let k = 0; k < idx; k++) {
27805
- if (!result[i + 1 + k] === "jj") {
27871
+ if (result[i + 1 + k] !== "jj") {
27806
27872
  allJJ = false;
27807
27873
  break;
27808
27874
  }
@@ -27948,7 +28014,7 @@ var Tagger = class {
27948
28014
  for (let j = 0; j < tags.length; j++) {
27949
28015
  if (pos === tags[j])
27950
28016
  return true;
27951
- if (pos === "n" && NOUNS.includes(tags[j]) || pos === "v" && VERBS.includes(tags[j]) || pos === "r" && ADVS.includes(tags[j]) || pos === "a" && ADJS.includes.isAdjTag(tags[j])) {
28017
+ if (pos === "n" && NOUNS.includes(tags[j]) || pos === "v" && VERBS.includes(tags[j]) || pos === "r" && ADVS.includes(tags[j]) || pos === "a" && ADJS.includes(tags[j])) {
27952
28018
  return true;
27953
28019
  }
27954
28020
  }
@@ -27996,7 +28062,7 @@ var Inflector = class {
27996
28062
  if (!word.length)
27997
28063
  return "";
27998
28064
  let check = word.toLowerCase();
27999
- if (this.RiTa.MASS_NOUNS.includes(check)) {
28065
+ if (this.RiTa.lexicon.isMassNoun(check)) {
28000
28066
  dbug && console.log(word + " hit MASS_NOUNS");
28001
28067
  return word;
28002
28068
  }
@@ -28011,11 +28077,6 @@ var Inflector = class {
28011
28077
  return word;
28012
28078
  }
28013
28079
  singularize(word, opts) {
28014
- if (this.isSingular(word, opts)) {
28015
- if (opts && opts.debug)
28016
- console.log("pluralize returning via isPlural()");
28017
- return word;
28018
- }
28019
28080
  return this.adjustNumber(word, SING, opts && opts.dbug);
28020
28081
  }
28021
28082
  pluralize(word, opts) {
@@ -28026,30 +28087,30 @@ var Inflector = class {
28026
28087
  }
28027
28088
  return this.adjustNumber(word, PLUR, opts && opts.dbug);
28028
28089
  }
28029
- isSingular(word, opts) {
28030
- return false;
28031
- if (word && typeof word !== "string") {
28032
- throw Error(`isSingular() takes string`);
28033
- }
28034
- if (!word || !word.length)
28035
- return false;
28036
- let dbug = opts && opts.dbug;
28037
- word = word.toLowerCase();
28038
- if (this.RiTa.MASS_NOUNS.includes(word)) {
28039
- dbug && console.log(word + " is mass noun");
28040
- return true;
28041
- }
28042
- return NN_ENDS_IN_S.includes(word);
28043
- }
28090
+ /*isSingular(word, opts) {
28091
+ // return false;
28092
+ // if (word && typeof word !== 'string') {
28093
+ // throw Error(`isSingular() takes string`);
28094
+ // }
28095
+ if (!word || !word.length) return false;
28096
+
28097
+ let dbug = opts && opts.dbug;
28098
+
28099
+ word = word.toLowerCase();
28100
+
28101
+ if (this.RiTa.MASS_NOUNS.includes(word)) {
28102
+ dbug && console.log(word + " is mass noun");
28103
+ return true;
28104
+ }
28105
+
28106
+ return NOUNS_ENDING_IN_S.includes(word);
28107
+ }*/
28044
28108
  isPlural(word, opts) {
28045
- if (word && typeof word !== "string") {
28046
- throw Error(`isPlural() takes string`);
28047
- }
28048
28109
  if (!word || !word.length)
28049
28110
  return false;
28050
28111
  let dbug = opts && opts.dbug;
28051
28112
  word = word.toLowerCase();
28052
- if (this.RiTa.MASS_NOUNS.includes(word)) {
28113
+ if (this.RiTa.lexicon.isMassNoun(word)) {
28053
28114
  dbug && console.log(word + " is mass noun");
28054
28115
  return true;
28055
28116
  }
@@ -28075,13 +28136,9 @@ var Inflector = class {
28075
28136
  dbug && console.log(word + ": latin rule -a to -ae");
28076
28137
  return true;
28077
28138
  }
28078
- if (this.RiTa.HAS_LEXICON) {
28079
- let tags = this.RiTa.tagger.allTags(sing, { noGuessing: true });
28080
- if (tags.includes("nn")) {
28081
- dbug && console.log(word + "'s singular form " + sing + " is nn");
28082
- return true;
28083
- }
28084
- } else {
28139
+ let tags = this.RiTa.tagger.allTags(sing, { noGuessing: true });
28140
+ if (tags.includes("nn")) {
28141
+ dbug && console.log(word + "'s singular form " + sing + " is nn");
28085
28142
  return true;
28086
28143
  }
28087
28144
  }
@@ -28159,7 +28216,7 @@ var SING_RULES = [
28159
28216
  RE3("(sh|ch|o|ss|x|z|us)es$", 2),
28160
28217
  RE3("ses$", 2, "is"),
28161
28218
  // catharses, prognoses
28162
- // singulars ending in s, TODO: replace with NN_ENDS_IN_S list
28219
+ // singulars ending in s, TODO: replace with NOUNS_ENDING_IN_S list
28163
28220
  RE3("([vs]is|gas|[im]nus|genus|[ptbl]us|[ai]ss|[dr]ess)$", 0),
28164
28221
  // octopus, thesis, alumnus, gas, bus (singulars)
28165
28222
  DEFAULT_SING
@@ -28242,7 +28299,10 @@ var LetterToSound = class _LetterToSound {
28242
28299
  this.tokenizer.tokenize(line, " ");
28243
28300
  let type = this.tokenizer.nextToken();
28244
28301
  if (type === "S" || type === "P") {
28245
- this.stateMachine[this.numStates++] = this.createState(type, this.tokenizer);
28302
+ this.stateMachine[this.numStates++] = this.createState(
28303
+ type
28304
+ /*, this.tokenizer*/
28305
+ );
28246
28306
  } else if (type === "I") {
28247
28307
  let index = parseInt(this.tokenizer.nextToken());
28248
28308
  if (index != this.numStates) {
@@ -28256,12 +28316,12 @@ var LetterToSound = class _LetterToSound {
28256
28316
  }
28257
28317
  }
28258
28318
  buildPhones(word, opts) {
28259
- const RiTa3 = this.RiTa;
28260
- if (!word || !word.length || RiTa3.isPunct(word))
28319
+ const RiTa2 = this.RiTa;
28320
+ if (!word || !word.length || RiTa2.isPunct(word))
28261
28321
  return;
28262
28322
  let phoneList = [], windowSize = 4;
28263
28323
  let fullBuff, tmp, currentState, startIndex, stateIndex, c;
28264
- let silent = RiTa3.SILENT || RiTa3.SILENCE_LTS || opts && opts.silent;
28324
+ let silent = RiTa2.SILENT || RiTa2.SILENCE_LTS || opts && opts.silent;
28265
28325
  if (!_LetterToSound.RULES) {
28266
28326
  if (!this.warnedForNoLTS) {
28267
28327
  this.warnedForNoLTS = true;
@@ -28273,16 +28333,14 @@ var LetterToSound = class _LetterToSound {
28273
28333
  word = word.toLowerCase();
28274
28334
  if (util_default.isNum(word)) {
28275
28335
  if (/^[0-9]+$/.test(word)) {
28276
- if (RiTa3.HAS_LEXICON) {
28277
- word = word.length > 1 ? word.split("") : [word];
28278
- for (let k = 0; k < word.length; k++) {
28279
- let asWord = util_default.Numbers.toWords[parseInt(word[k])];
28280
- let phs = RiTa3.lexicon.rawPhones(asWord, { noLts: true });
28281
- phs = phs.replace(/1/g, "").replace(/ /g, "-");
28282
- phoneList.push(...phs.split("-"));
28283
- }
28284
- return phoneList;
28336
+ word = word.length > 1 ? word.split("") : [word];
28337
+ for (let k = 0; k < word.length; k++) {
28338
+ let asWord = util_default.Numbers.toWords[parseInt(word[k])];
28339
+ let phs = RiTa2.lexicon.rawPhones(asWord, { noLts: true });
28340
+ phs = phs.replace(/1/g, "").replace(/ /g, "-");
28341
+ phoneList.push(...phs.split("-"));
28285
28342
  }
28343
+ return phoneList;
28286
28344
  }
28287
28345
  }
28288
28346
  tmp = "000#" + word.trim() + "#000", fullBuff = tmp.split("");
@@ -28297,7 +28355,7 @@ var LetterToSound = class _LetterToSound {
28297
28355
  startIndex = this.letterIndex[c];
28298
28356
  if (isNaN(parseFloat(startIndex)) || !isFinite(startIndex)) {
28299
28357
  if (!silent) {
28300
- console.warn("Unable to generate LTS for '" + word + "', no index for '" + c + "', isDigit=" + util_default.isNum(c) + ", isPunct=" + RiTa3.isPunct(c));
28358
+ console.warn("Unable to generate LTS for '" + word + "', no index for '" + c + "', isDigit=" + util_default.isNum(c) + ", isPunct=" + RiTa2.isPunct(c));
28301
28359
  }
28302
28360
  return;
28303
28361
  }
@@ -28322,7 +28380,7 @@ var LetterToSound = class _LetterToSound {
28322
28380
  return state;
28323
28381
  } else {
28324
28382
  this.tokenizer.tokenize(i);
28325
- return this.getState(this.tokenizer.nextToken(), this.tokenizer);
28383
+ return this.getState(this.tokenizer.nextToken());
28326
28384
  }
28327
28385
  }
28328
28386
  };
@@ -41533,8 +41591,7 @@ var Analyzer = class {
41533
41591
  return features;
41534
41592
  }
41535
41593
  computePhones(word, opts) {
41536
- if (!this.lts)
41537
- this.lts = new rita_lts_default(this.RiTa);
41594
+ this.lts = this.lts || new rita_lts_default(this.RiTa);
41538
41595
  return this.lts.buildPhones(word, opts);
41539
41596
  }
41540
41597
  phonesToStress(phones) {
@@ -41551,8 +41608,7 @@ var Analyzer = class {
41551
41608
  return stress;
41552
41609
  }
41553
41610
  analyzeWord(word, opts = {}) {
41554
- let RiTa3 = this.RiTa;
41555
- let result = RiTa3.CACHING && this.cache[word];
41611
+ let result = this.RiTa.CACHING && this.cache[word];
41556
41612
  if (typeof result === "undefined") {
41557
41613
  let slash = "/", delim = "-";
41558
41614
  let lex = this.RiTa.lexicon;
@@ -41581,7 +41637,7 @@ var Analyzer = class {
41581
41637
  }
41582
41638
  result = { phones, stresses, syllables };
41583
41639
  Object.keys(result).forEach((k) => result[k] = result[k].trim());
41584
- if (RiTa3.CACHING)
41640
+ if (this.RiTa.CACHING)
41585
41641
  this.cache[word] = result;
41586
41642
  }
41587
41643
  return result;
@@ -41601,15 +41657,15 @@ var Analyzer = class {
41601
41657
  }
41602
41658
  //#HWF this part is unchanged but move to a separated function
41603
41659
  _computePhonesWord(word, lex, opts, isPart) {
41604
- let rawPhones, RiTa3 = this.RiTa;
41660
+ let rawPhones, RiTa2 = this.RiTa;
41605
41661
  if (isPart)
41606
41662
  rawPhones = lex.rawPhones(word, { noLts: true });
41607
41663
  if (!rawPhones && word.endsWith("s")) {
41608
- let sing = RiTa3.singularize(word);
41664
+ let sing = RiTa2.singularize(word);
41609
41665
  rawPhones = lex.rawPhones(sing, { noLts: true });
41610
41666
  rawPhones && (rawPhones += "-z");
41611
41667
  }
41612
- let silent = RiTa3.SILENT || RiTa3.SILENCE_LTS || opts && opts.silent;
41668
+ let silent = RiTa2.SILENT || RiTa2.SILENCE_LTS || opts && opts.silent;
41613
41669
  if (!rawPhones) {
41614
41670
  let ltsPhones = this.computePhones(word, opts);
41615
41671
  if (ltsPhones && ltsPhones.length) {
@@ -41740,7 +41796,7 @@ var SeededRandom = class {
41740
41796
  if (!(Array.isArray(arg) || util_default.isNum(arg)))
41741
41797
  throw Error("Expects [] or int");
41742
41798
  let o = Array.isArray(arg) ? arg : Array.from(Array(arg).keys());
41743
- for (let j, x, i = o.length; i; j = parseInt(this.random() * i), x = o[--i], o[i] = o[j], o[j] = x) {
41799
+ for (let j, x, i = o.length; i; j = Math.floor(this.random() * i), x = o[--i], o[i] = o[j], o[j] = x) {
41744
41800
  }
41745
41801
  return o;
41746
41802
  }
@@ -41866,16 +41922,31 @@ var randgen_default = SeededRandom;
41866
41922
 
41867
41923
  // src/markov.js
41868
41924
  var _json = require('@ungap/structured-clone/json');
41869
- var RiMarkov = class _RiMarkov {
41870
- constructor(n, opts = {}) {
41925
+ var _RiMarkov = class _RiMarkov {
41926
+ // RiTa
41927
+ /**
41928
+ * Creates a new RiMarkov object with functions for text-generation and other probabilistic functions,
41929
+ * via Markov chains (or n-grams) with options to process words or tokens split by arbitrary regular expressions.
41930
+ * @param {number} [n] - the n-gram size (an integer >= 2)
41931
+ * @param {object} [options={}] - options for the model
41932
+ * @param {string|string[]} [options.text] - a text string, or array of sentences, to add to the model (same as via model.addText()
41933
+ * @param {boolean} [options.trace] - output trace info to the console
41934
+ * @param {number} [options.maxLengthMatch] - # of words allowed in result to match a sequence in the input, default=∞
41935
+ * @param {number} [options.maxAttempts=999] - max attempts before to complete one ore more generations before erroring, default=999
41936
+ * @param {function} [options.tokenize] - custom tokenizer with tokenize() method, defaults to RiTa.tokenize()
41937
+ * @param {function} [options.untokenize] - custom untokenizer with untokenize() method, defaults to RiTa.untokenize()
41938
+ * @param {boolean} [options.disableInputChecks=false] - if true, allow result to be present in the input, default
41939
+ * @memberof RiMarkov
41940
+ */
41941
+ constructor(n, options = {}) {
41871
41942
  this.n = n;
41872
41943
  this.root = new Node(null, "ROOT");
41873
- this.trace = opts.trace;
41874
- this.mlm = opts.maxLengthMatch;
41875
- this.maxAttempts = opts.maxAttempts || 999;
41876
- this.tokenize = opts.tokenize || RiTa().tokenize;
41877
- this.untokenize = opts.untokenize || RiTa().untokenize;
41878
- this.disableInputChecks = opts.disableInputChecks;
41944
+ this.trace = options.trace;
41945
+ this.mlm = options.maxLengthMatch;
41946
+ this.maxAttempts = options.maxAttempts || 999;
41947
+ this.tokenize = options.tokenize || _RiMarkov.parent.tokenize;
41948
+ this.untokenize = options.untokenize || _RiMarkov.parent.untokenize;
41949
+ this.disableInputChecks = options.disableInputChecks;
41879
41950
  this.sentenceStarts = [];
41880
41951
  this.sentenceEnds = /* @__PURE__ */ new Set();
41881
41952
  if (this.n < 2)
@@ -41884,11 +41955,18 @@ var RiMarkov = class _RiMarkov {
41884
41955
  throw Error("maxLengthMatch must be >= N");
41885
41956
  if (!this.disableInputChecks || this.mlm)
41886
41957
  this.input = [];
41887
- if (opts.text)
41888
- this.addText(opts.text);
41889
- }
41958
+ if (options.text)
41959
+ this.addText(options.text);
41960
+ }
41961
+ /**
41962
+ * Loads text into the model. If a raw string is provided, it will be split into sentences
41963
+ * via RiTa.sentences(). If an array is provided, each string will be treated as an individual sentence.
41964
+ * @param {string|string[]} text - a text string, or array of sentences, to add to the model
41965
+ * @param {number} [multiplier=1] - number of times to add the text to the model
41966
+ * @return {RiMarkov} - the RiMarkov instance
41967
+ */
41890
41968
  addText(text, multiplier = 1) {
41891
- let sents = Array.isArray(text) ? text : RiTa().sentences(text);
41969
+ let sents = Array.isArray(text) ? text : _RiMarkov.parent.sentences(text);
41892
41970
  let wrap, allWords = [];
41893
41971
  for (let k = 0; k < multiplier; k++) {
41894
41972
  for (let i = 0; i < sents.length; i++) {
@@ -41904,19 +41982,32 @@ var RiMarkov = class _RiMarkov {
41904
41982
  this.input.push(allWords[i]);
41905
41983
  }
41906
41984
  }
41907
- }
41908
- generate(count, opts = {}) {
41985
+ return this;
41986
+ }
41987
+ /**
41988
+ * Generates `count` joined sentences from the model.
41989
+ * @param {number} [count=1] - the number of sentences to generate (default=1)
41990
+ * @param {object} [options={}] - options for the generation
41991
+ * @param {number} [options.minLength=5] - minimum length of each sentence
41992
+ * @param {number} [options.maxLength=35] - maximum length of each sentence
41993
+ * @param {number} [options.temperature=1] - temperature acts as a knob to adjust the probability that input elements will be selected for the output. At higher values, infrequent words are more likely to be chosen, while at lower values the most frequent inputs are more likely to be output. If no value is provided, then tokens are chosen according to their relative frequency in the input.
41994
+ * @param {boolean} [options.allowDuplicates=false] - if true, allow duplicate sentences in the output
41995
+ * @param {string|string[]} [options.seed] - a seed string or array of tokens to start the generation
41996
+ * @param {boolean} [options.trace] - output trace info to the console
41997
+ * @return {string[]} - the generated sentences
41998
+ */
41999
+ generate(count, options = {}) {
41909
42000
  if (arguments.length === 1 && typeof count === "object") {
41910
- opts = count;
42001
+ options = count;
41911
42002
  count = 1;
41912
42003
  }
41913
42004
  const num = count || 1;
41914
- const minLength = opts.minLength || 5;
41915
- const maxLength = opts.maxLength || 35;
41916
- if (typeof opts.temperature !== "undefined" && opts.temperature <= 0) {
42005
+ const minLength = options.minLength || 5;
42006
+ const maxLength = options.maxLength || 35;
42007
+ if (typeof options.temperature !== "undefined" && options.temperature <= 0) {
41917
42008
  throw Error("Temperature option must be greater than 0");
41918
42009
  }
41919
- let tries = 0, tokens = [], usedStarts = [];
42010
+ let tries = 0, tokens = [];
41920
42011
  let minIdx = 0, sentenceIdxs = [];
41921
42012
  let markedNodes = [];
41922
42013
  const unmarkNodes = () => {
@@ -41955,7 +42046,7 @@ var RiMarkov = class _RiMarkov {
41955
42046
  return false;
41956
42047
  }
41957
42048
  let flatSent = this.untokenize(sentence);
41958
- if (!opts.allowDuplicates && isSubArray(sentence, tokens.slice(0, sentIdx))) {
42049
+ if (!options.allowDuplicates && isSubArray(sentence, tokens.slice(0, sentIdx))) {
41959
42050
  fail("duplicate (pop: " + next.token + ")");
41960
42051
  return false;
41961
42052
  }
@@ -42042,7 +42133,7 @@ var RiMarkov = class _RiMarkov {
42042
42133
  return len ? sentenceIdxs[len - 1] : 0;
42043
42134
  };
42044
42135
  const selectStart = () => {
42045
- let seed = opts.seed;
42136
+ let seed = options.seed;
42046
42137
  if (seed && seed.length) {
42047
42138
  if (typeof seed === "string")
42048
42139
  seed = this.tokenize(seed);
@@ -42055,7 +42146,7 @@ var RiMarkov = class _RiMarkov {
42055
42146
  let usableStarts = this.sentenceStarts.filter((ss) => notMarked(this.root.child(ss)));
42056
42147
  if (!usableStarts.length)
42057
42148
  throw Error("No valid sentence-starts remaining");
42058
- let start = RiTa().random(usableStarts);
42149
+ let start = _RiMarkov.parent.random(usableStarts);
42059
42150
  let startTok = this.root.child(start);
42060
42151
  markNode(startTok);
42061
42152
  usableStarts = this.sentenceStarts.filter((ss) => notMarked(this.root.child(ss)));
@@ -42072,7 +42163,7 @@ var RiMarkov = class _RiMarkov {
42072
42163
  continue;
42073
42164
  }
42074
42165
  let parent = this._pathTo(tokens);
42075
- let next = this._selectNext(parent, opts.temperature, tokens, notMarked);
42166
+ let next = this._selectNext(parent, options.temperature, tokens, notMarked);
42076
42167
  if (!next) {
42077
42168
  fail("mlm-fail(" + this.mlm + ")", this._flatten(tokens), true);
42078
42169
  continue;
@@ -42093,11 +42184,20 @@ var RiMarkov = class _RiMarkov {
42093
42184
  let str = this.untokenize(tokens.map((t) => t.token)).trim();
42094
42185
  return num > 1 ? this._splitEnds(str) : str;
42095
42186
  }
42187
+ /**
42188
+ * Converts the model to a JSON-formatted string for storage or serialization
42189
+ * @return {string} - the JSON string
42190
+ */
42096
42191
  toJSON() {
42097
42192
  let data = Object.keys(this).reduce((acc, k) => Object.assign(acc, { [k]: this[k] }), {});
42098
42193
  data.sentenceEnds = [...data.sentenceEnds];
42099
42194
  return _json.stringify.call(void 0, data);
42100
42195
  }
42196
+ /**
42197
+ * Creates a new model from one previously saved as JSON
42198
+ * @param {string} json - the JSON string to load
42199
+ * @return {RiMarkov} - the RiMarkov instance
42200
+ */
42101
42201
  static fromJSON(json) {
42102
42202
  let parsed = _json.parse.call(void 0, json);
42103
42203
  let rm = Object.assign(new _RiMarkov(), parsed);
@@ -42108,14 +42208,20 @@ var RiMarkov = class _RiMarkov {
42108
42208
  populate(rm.root = new Node(null, "ROOT"), jsonRoot);
42109
42209
  return rm;
42110
42210
  }
42111
- /* returns array of possible tokens after pre and (optionally) before post */
42211
+ /**
42212
+ * Returns array of possible tokens after pre and (optionally) before post. If only one array parameter is provided, this function returns all possible next words, ordered by probability, for the given array.
42213
+ * If two arrays are provided, it returns an unordered list of possible words w that complete the n-gram consisting of: pre[0]...pre[k], w, post[k+1]...post[n].
42214
+ * @param {string[]} pre - the list of tokens preceding the completion
42215
+ * @param {string[]} [post] - the (optional) list of tokens following the completion
42216
+ * @return {string[]} - an unordered list of possible next tokens
42217
+ */
42112
42218
  completions(pre, post) {
42113
42219
  let tn, result = [];
42114
42220
  if (post) {
42115
42221
  if (pre.length + post.length > this.n)
42116
42222
  throw Error("Sum of pre.length && post.length must be <= N, was " + (pre.length + post.length));
42117
42223
  if (!(tn = this._pathTo(pre))) {
42118
- if (!RiTa().SILENT)
42224
+ if (!_RiMarkov.parent.SILENT)
42119
42225
  console.warn("Unable to find nodes in pre: " + pre);
42120
42226
  return;
42121
42227
  }
@@ -42132,8 +42238,14 @@ var RiMarkov = class _RiMarkov {
42132
42238
  }
42133
42239
  return result;
42134
42240
  }
42135
- /* return an object mapping {string -> prob} */
42136
- probabilities(path, temp) {
42241
+ /**
42242
+ * Returns the full set of possible next tokens as a object, mapping tokens to probabilities,
42243
+ * given an array of tokens representing the path down the tree (with length less than `n`).
42244
+ * @param {string|string[]} path - the path to the node as a string or an array of tokens
42245
+ * @param {number} [temperature=1] - temperature acts as a knob to adjust the probability that input elements will be selected for the output. At higher values, infrequent words are more likely to be chosen, while at lower values the most frequent inputs are more likely to be output. If no value is provided, then tokens are chosen according to their relative frequency in the input.
42246
+ * @return {object} - a map of tokens to probabilities
42247
+ */
42248
+ probabilities(path, temperature) {
42137
42249
  if (!Array.isArray(path))
42138
42250
  path = this.tokenize(path);
42139
42251
  const probs = {};
@@ -42141,11 +42253,17 @@ var RiMarkov = class _RiMarkov {
42141
42253
  if (parent) {
42142
42254
  const children = parent.childNodes();
42143
42255
  const weights = children.map((n) => n.count);
42144
- const pdist = _RiMarkov.parent.randomizer.ndist(weights, temp);
42256
+ const pdist = _RiMarkov.parent.randomizer.ndist(weights, temperature);
42145
42257
  children.forEach((c, i) => probs[c.token] = pdist[i]);
42146
42258
  }
42147
42259
  return probs;
42148
42260
  }
42261
+ /**
42262
+ * Returns either the raw (unigram) probability for a single token in the model (0 if it does not exist), OR
42263
+ * the probability of a sequence of K tokens where K is less than `n` (0 if the sequence does not exist).
42264
+ * @param {string|string[]} data - the token or array of tokens to check
42265
+ * @return {number} - the probability of the token or sequence
42266
+ */
42149
42267
  probability(data) {
42150
42268
  let p = 0;
42151
42269
  if (data && data.length) {
@@ -42155,10 +42273,20 @@ var RiMarkov = class _RiMarkov {
42155
42273
  }
42156
42274
  return p;
42157
42275
  }
42276
+ /**
42277
+ * Returns a string representation of the model or a subtree of the model, optionally ordered by probability.
42278
+ * @param {object} root - the root node of the subtree to print
42279
+ * @param {boolean} sort - if true, sort the nodes by probability
42280
+ * @return {string} - the string representation of the model
42281
+ */
42158
42282
  toString(root, sort) {
42159
42283
  root = root || this.root;
42160
42284
  return root.asTree(sort).replace(/{}/g, "");
42161
42285
  }
42286
+ /**
42287
+ * Returns the number of tokens currently in the model.
42288
+ * @return {number} - number of tokens
42289
+ */
42162
42290
  size() {
42163
42291
  return this.root.childCount(true);
42164
42292
  }
@@ -42281,6 +42409,8 @@ var RiMarkov = class _RiMarkov {
42281
42409
  return sent.replace(MULTI_SP_RE, " ");
42282
42410
  }
42283
42411
  };
42412
+ __publicField(_RiMarkov, "parent");
42413
+ var RiMarkov = _RiMarkov;
42284
42414
  var Node = class _Node {
42285
42415
  constructor(parent, word, count) {
42286
42416
  this.children = {};
@@ -42289,6 +42419,7 @@ var Node = class _Node {
42289
42419
  this.count = count || 0;
42290
42420
  this.numChildren = -1;
42291
42421
  this.marked = false;
42422
+ this.hidden = false;
42292
42423
  }
42293
42424
  // Find a (direct) child node with matching token, given a word or node
42294
42425
  child(word) {
@@ -42407,9 +42538,6 @@ function populate(objNode, jsonNode) {
42407
42538
  populate(newNode, child);
42408
42539
  }
42409
42540
  }
42410
- function RiTa() {
42411
- return RiMarkov.parent;
42412
- }
42413
42541
  function throwError(tries, oks) {
42414
42542
  throw Error("Failed after " + tries + " tries" + (oks ? " and " + oks + " successes" : "") + ", you may need to adjust options or add more text");
42415
42543
  }
@@ -42432,228 +42560,697 @@ var markov_default = RiMarkov;
42432
42560
 
42433
42561
  // src/rita.js
42434
42562
  var _riscript = require('riscript');
42435
- var { Grammar: RiGrammar } = _riscript.RiScript;
42436
- var RiTa2 = class _RiTa {
42563
+ var RiTa = class _RiTa {
42564
+ /**
42565
+ * Create a RiTa grammar instance
42566
+ * @param {object} [rules] - the rules of the grammar
42567
+ * @param {object} [context] - the context of the grammar
42568
+ * @returns {RiGrammar} - a new RiGrammar instance
42569
+ */
42437
42570
  static grammar(rules, context) {
42438
- return new RiGrammar(...arguments);
42571
+ return new (0, _riscript.RiGrammar)(rules, context);
42439
42572
  }
42573
+ /**
42574
+ * Add a transform function to the RiScript parser
42575
+ * @param {string} name - the name of the transform
42576
+ * @param {function} definition - the transform function
42577
+ */
42440
42578
  static addTransform(name, definition) {
42441
- return _RiTa.riscript.addTransform(...arguments);
42579
+ _RiTa.riscript.addTransform(name, definition);
42442
42580
  }
42581
+ /**
42582
+ * Remove a transform function from the RiScript parser
42583
+ * @param {string} name - the name of the transform to remove
42584
+ */
42443
42585
  static removeTransform(name) {
42444
- return _RiTa.riscript.removeTransform(...arguments);
42586
+ _RiTa.riscript.removeTransform(name);
42445
42587
  }
42588
+ /**
42589
+ * Returns the names of all current transform functions
42590
+ * @returns {string[]} the names of all transforms
42591
+ */
42446
42592
  static getTransforms() {
42447
42593
  return _RiTa.riscript.getTransforms();
42448
42594
  }
42595
+ /**
42596
+ * Adds the appropriate article ('a' or 'an') to the word, according to its phonemes (useful as a transform function)
42597
+ * @param {string} word - the word to transform
42598
+ * @returns {string} - the word with an article, e.g., 'honor' -> 'an honor'
42599
+ */
42449
42600
  static articlize(word) {
42450
- return _riscript.RiScript.articlize(...arguments);
42451
- }
42452
- static evaluate(script, context, opts) {
42453
- return _RiTa.riscript.evaluate(...arguments);
42454
- }
42455
- static markov(n, opts) {
42456
- return new markov_default(...arguments);
42457
- }
42458
- static kwic(word, opts) {
42459
- return _RiTa.concorder.kwic(...arguments);
42460
- }
42461
- static concordance(string, opts) {
42462
- return _RiTa.concorder.concordance(...arguments);
42463
- }
42601
+ return _riscript.RiScript.articlize(word, _RiTa);
42602
+ }
42603
+ /**
42604
+ * Evaluates the input script via the RiScript parser
42605
+ * @param {string} script - the script to evaluate
42606
+ * @param {object} [context] - the context to evaluate the script ing
42607
+ * @param {object} [options] - options for the evaluation
42608
+ * @param {boolean} [options.trace] - whether to trace the evaluation
42609
+ * @returns {string} the result of the evaluation
42610
+ */
42611
+ static evaluate(script, context, options) {
42612
+ return _RiTa.riscript.evaluate(script, context, options);
42613
+ }
42614
+ /**
42615
+ * Creates a new RiMarkov object
42616
+ * @param {number} n - an int representing the n-factor of the markov chain
42617
+ * @param {object} [options] - options for the markov chain
42618
+ * @param {string|string[]} [options.text] - a text string, or array of sentences, to add to the model (same as via model.addText()
42619
+ * @param {number} [options.maxLengthMatch] - # of words allowed in result to match a sequence in the input, default=∞
42620
+ * @param {number} [options.maxAttempts=999] - max attempts before to complete one ore more generations before erroring, default=999
42621
+ * @param {function} [options.tokenize] - custom tokenizer with tokenize() method, defaults to RiTa.tokenize()
42622
+ * @param {function} [options.untokenize] - custom untokenizer with untokenize() method, defaults to RiTa.untokenize()
42623
+ * @param {boolean} [options.disableInputChecks=false] - if true, allow result to be present in the input, default
42624
+ * @param {boolean} [options.trace] - output trace info to the console
42625
+ * @returns {RiMarkov}
42626
+ */
42627
+ static markov(n, options) {
42628
+ return new markov_default(n, options);
42629
+ }
42630
+ /**
42631
+ * Return a list of occurrences of the key word in the Key-Word-In-Context (KWIC) model.
42632
+ * @overload
42633
+ * @param {string} keyword
42634
+ * @param {object} [options]
42635
+ * @param {number} [options.numWords] - the number of words to include in the context
42636
+ * @param {string} [options.text] - the text as input for the KWIC model
42637
+ * @param {string[]} [options.words] - the array of words to be used as input for the KWIC model
42638
+ * @returns {string[]} all the occurrences of the keyword in the model, each with no more
42639
+ * than 'numWords' words of context on either side
42640
+ * @overload
42641
+ * @param {string} keyword
42642
+ * @param {number} text - the number of words to include in the context
42643
+ * @returns {string[]} all the occurrences of the keyword in the model, each with no more
42644
+ * than 'numWords' words of context on either side
42645
+ */
42646
+ static kwic(keyword, options) {
42647
+ return _RiTa.concorder.kwic(keyword, options);
42648
+ }
42649
+ /**
42650
+ * Creates a concordance, a list of words with their frequency of occurence, from the given text and options.
42651
+ * @param {string} text - the text from which to create the concordance
42652
+ * @param {object} [options] - options for the concordance
42653
+ * @param {boolean} [options.ignoreCase=false] - whether to ignore case when creating the concordance
42654
+ * @param {boolean} [options.ignoreStopWords=false] - whether to ignore stop words like
42655
+ * 'the', 'and', 'a', 'of', etc, as specified in RiTa.STOP_WORDS
42656
+ * @param {boolean} [options.ignorePunctuation=false] - whether to ignore punctuation when creating the concordance
42657
+ * @param {string[]} [options.wordsToIgnore=null] - words to ignore when creating the concordance (alternate stop-words)
42658
+ * @returns {object} the concordance, an object with words as keys and frequencies as values
42659
+ */
42660
+ static concordance(text, options) {
42661
+ return _RiTa.concorder.concordance(text, options);
42662
+ }
42663
+ /**
42664
+ * Returns a random ordering of the input array or a random ordering of integers from 1 to k
42665
+ * @overload
42666
+ * @param {object[]} array - the array to shuffle
42667
+ * @returns {object[]} the input array in a random order
42668
+ * @overload
42669
+ * @param {number} k - the number of integers to return
42670
+ * @returns {number[]} an array of arrays of integers from 1 to k in random order
42671
+ */
42464
42672
  static randomOrdering(arrayOrInt) {
42465
- return _RiTa.randomizer.randomOrdering(...arguments);
42673
+ return _RiTa.randomizer.randomOrdering(arrayOrInt);
42466
42674
  }
42467
- static randomSeed(number) {
42468
- return _RiTa.randomizer.seed(number);
42675
+ /**
42676
+ * Sets the seed for the RiTa random number generator
42677
+ * @param {number} seed - the seed to set
42678
+ */
42679
+ static randomSeed(seed) {
42680
+ _RiTa.randomizer.seed(seed);
42469
42681
  }
42682
+ /**
42683
+ * Returns true if the sentence is a question, else false
42684
+ * @param {string} sentence
42685
+ * @returns {boolean} - true if the sentence is a question, else false
42686
+ */
42470
42687
  static isQuestion(sentence) {
42471
42688
  return _RiTa.QUESTIONS.includes(_RiTa.tokenize(sentence)[0].toLowerCase());
42472
42689
  }
42690
+ /**
42691
+ * Returns true if the character is a vowel, else false
42692
+ * @param {string} char
42693
+ * @returns {boolean} - true if the character is a vowel, else false
42694
+ */
42473
42695
  static isVowel(char) {
42474
42696
  return char && char.length === 1 && _RiTa.VOWELS.includes(char);
42475
42697
  }
42698
+ /**
42699
+ * Returns true if the character is a consonant, else false
42700
+ * @param {string} char
42701
+ * @returns {boolean} - true if the character is a consonant, else false
42702
+ */
42476
42703
  static isConsonant(char) {
42477
42704
  return char && char.length === 1 && !_RiTa.VOWELS.includes(char) && IS_LETTER.test(char);
42478
42705
  }
42706
+ /**
42707
+ * Capitalizes the first letter of the input string, leaving others unchanged
42708
+ * @param {string} string - the string to capitalize
42709
+ * @returns {string} the capitalized string
42710
+ */
42479
42711
  static capitalize(string) {
42480
42712
  return string ? string[0].toUpperCase() + string.substring(1) : "";
42481
42713
  }
42482
- static randomWord(opts) {
42483
- return _RiTa.lexicon.randomWord(...arguments);
42484
- }
42485
- static async rhymes(word, opts) {
42486
- return await _RiTa.lexicon.rhymes(...arguments);
42487
- }
42714
+ /**
42715
+ * Return a random word from the lexicon matching the specified criteria
42716
+ * (length, syllable-count, phonemic pattern, stress pattern, part-of-speech, etc.).
42717
+ * @param {(string|RegExp)} [pattern] - the pattern to match
42718
+ * @param {object} [options]
42719
+ * @param {number} [options.minLength=4] - the minimum length of the word
42720
+ * @param {number} [options.maxLength=-1] - the maximum length of the word
42721
+ * @param {number} [options.numSyllables=null] - the number of syllables in the word
42722
+ * @param {number} [options.limit=10] - the maximum number of results to retur
42723
+ * @param {string} [options.pos=null] - the part-of-speech of the word to return,
42724
+ * either from the Penn tag set or the simplified tag set [a, r, v, n]
42725
+ * @param {RegExp} [options.pattern=null] - the spelling or phonemic pattern to match
42726
+ * @param {string} [options.type=null] - the type of regex or string pattern to match,
42727
+ * options are 'stresses' or 'phones' or 'letters' (the default)
42728
+ * @returns {string} a random word matching the criteria in the options object
42729
+ */
42730
+ static randomWord(pattern, options) {
42731
+ return _RiTa.lexicon.randomWord(pattern, options);
42732
+ }
42733
+ /**
42734
+ * Returns words that rhyme with the given word. Two words are considered as rhyming if
42735
+ * their final stressed vowel and all following phonemes are identical.
42736
+ * @param {string} word
42737
+ * @param {object} [options]
42738
+ * @param {number} [options.minLength=4] - the minimum length of the words
42739
+ * @param {number} [options.maxLength] - the maximum length of the words
42740
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42741
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42742
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42743
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
42744
+ * or the simplified tag set [a, r, v, n]
42745
+ * @returns {Promise<string[]>} an array of rhymes that match criteria in the options object
42746
+ */
42747
+ static async rhymes(word, options) {
42748
+ return await _RiTa.lexicon.rhymes(word, options);
42749
+ }
42750
+ /**
42751
+ * Returns words that rhyme with the given word. Two words are considered as rhyming if
42752
+ * their final stressed vowel and all following phonemes are identical.
42753
+ * @param {string} word1 - the first word to compare
42754
+ * @param {string} word2 - the second word to compare
42755
+ * @returns {boolean} true if the two words rhyme, else false
42756
+ */
42488
42757
  static isRhyme(word1, word2) {
42489
- return _RiTa.lexicon.isRhyme(...arguments);
42490
- }
42491
- static async alliterations(word, opts) {
42492
- return await _RiTa.lexicon.alliterations(...arguments);
42493
- }
42494
- static hasWord(word) {
42495
- return _RiTa.lexicon.hasWord(...arguments);
42496
- }
42497
- static isAbbrev(input, { caseSensitive = false } = {}) {
42758
+ return _RiTa.lexicon.isRhyme(word1, word2);
42759
+ }
42760
+ /**
42761
+ * Finds alliterations by comparing the phonemes of the input string to those
42762
+ * of each word in the lexicon via a minimum-edit-distance metric.
42763
+ * @param {string} word
42764
+ * @param {object} [options]
42765
+ * @param {number} [options.minLength=4] - the minimum length of the words
42766
+ * @param {number} [options.maxLength] - the maximum length of the words
42767
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42768
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42769
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42770
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
42771
+ * or the simplified tag set [a, r, v, n]
42772
+ * @returns {Promise<string[]>} an array of alliterations matching criteria in the options object
42773
+ */
42774
+ static async alliterations(word, options) {
42775
+ return await _RiTa.lexicon.alliterations(word, options);
42776
+ }
42777
+ /**
42778
+ * Returns true if the word is in the lexicon, else false
42779
+ * @param {string} word - the word to check
42780
+ * @param {object} [options] - options for the search
42781
+ * @param {boolean} [options.noDerivations=false] - whether to ignore derivations and only search for raw words
42782
+ * @returns {boolean} true if the word is in the lexicon, else false
42783
+ */
42784
+ static hasWord(word, options) {
42785
+ return _RiTa.lexicon.hasWord(word, options);
42786
+ }
42787
+ /**
42788
+ * Returns true if the word is an abbreviation, else false
42789
+ * @param {string} input - the word to check
42790
+ * @param {object} [options] - options for the search
42791
+ * @param {boolean} [options.caseSensitive=false] - whether to ignore case when checking for abbreviations
42792
+ * @returns {boolean} true if the word is an abbreviation, else false
42793
+ */
42794
+ static isAbbrev(input, options) {
42498
42795
  if (typeof input === "string") {
42499
- if (caseSensitive)
42796
+ if (_optionalChain([options, 'optionalAccess', _5 => _5.caseSensitive]))
42500
42797
  return _RiTa.ABRV.includes(input.trim());
42501
42798
  let check = input.trim().toLowerCase();
42502
42799
  return _RiTa.ABRV.some((a) => a.toLowerCase() === check);
42503
42800
  }
42504
42801
  }
42802
+ /**
42803
+ * Returns true if the two words are an alliteration (if their first stressed consonants match).
42804
+ * Note: returns true if wordA.equals(wordB) and false if either (or both) are null.
42805
+ * @param {string} word1 - the first word to compare
42806
+ * @param {string} word2 - the second word to compare
42807
+ * @returns {boolean} true if the two words are an alliteration, else false
42808
+ */
42505
42809
  static isAlliteration(word1, word2) {
42506
- return _RiTa.lexicon.isAlliteration(...arguments);
42507
- }
42508
- static async spellsLike(word, opts) {
42509
- return await _RiTa.lexicon.spellsLike(...arguments);
42510
- }
42511
- static async soundsLike(word, opts) {
42512
- return await _RiTa.lexicon.soundsLike(...arguments);
42513
- }
42514
- static pos(word) {
42515
- return _RiTa.tagger.tag(...arguments);
42810
+ return _RiTa.lexicon.isAlliteration(word1, word2);
42811
+ }
42812
+ /**
42813
+ * Compares the letters of the input word (using a version of the Levenstein min-edit distance algorithm)
42814
+ * to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
42815
+ * @param {string} word - the word to match
42816
+ * @param {object} [options] - options for the search
42817
+ * @param {number} [options.minLength=4] - the minimum length of the words
42818
+ * @param {number} [options.maxLength] - the maximum length of the words
42819
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42820
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42821
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42822
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set or the simplified tag set [a, r, v, n]
42823
+ * @returns {Promise<string[]>} an array of words matching the spelling pattern and criteria in the options object
42824
+ */
42825
+ static async spellsLike(word, options) {
42826
+ return await _RiTa.lexicon.spellsLike(word, options);
42827
+ }
42828
+ /**
42829
+ * Compares the phonemes of the input pattern (using a version of the Levenstein min-edit distance algorithm)
42830
+ * to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
42831
+ * @param {string} word - the word to match
42832
+ * @param {object} [options] - options for the search
42833
+ * @param {number} [options.minLength=4] - the minimum length of the words
42834
+ * @param {number} [options.maxLength] - the maximum length of the words
42835
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42836
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42837
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42838
+ * @param {boolean} [options.matchSpelling=false] if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
42839
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
42840
+ * or the simplified tag set [a, r, v, n]
42841
+ * @returns {Promise<string[]>} an array of words matching the phonemic pattern and criteria in the options object
42842
+ */
42843
+ static async soundsLike(word, options) {
42844
+ return await _RiTa.lexicon.soundsLike(word, options);
42845
+ }
42846
+ /**
42847
+ * Generates part-of-speech tags for each word in the input with tags
42848
+ * from the Penn tag set or the simplified tag set [a, r, v, n].
42849
+ * @param {(string|string[])} word - the word or words to tag
42850
+ * @param {object} [options] - options for the tagging
42851
+ * @param {boolean} [options.simple] - use simple tags (noun=n,verb=v,adverb=a,adjective=r)
42852
+ * @returns {string|string[]} - an array of part-of-speech tags for each word in the input
42853
+ */
42854
+ static pos(word, options) {
42855
+ if (options && "inline" in options) {
42856
+ throw Error("Use RiTa.posInline() instead");
42857
+ }
42858
+ return _RiTa.tagger.tag(word, options);
42516
42859
  }
42860
+ /**
42861
+ * Returns true if the word has a noun form. That is, if any of its possible
42862
+ * parts of speech are any variant of a noun in the Penn tag set(e.g. nn, nns, nnp, nnps).
42863
+ * @param {string} word - the word to check
42864
+ * @returns {string} - true if the word is a noun, else false
42865
+ */
42517
42866
  static isNoun(word) {
42518
42867
  return _RiTa.tagger.isNoun(word);
42519
42868
  }
42869
+ /**
42870
+ * Returns true if word has an adjective form. That is, if any of its possible parts of speech
42871
+ * are any variant of an adjective in the Penn tag set (e.g. jj, jjr, jjs).
42872
+ * @param {string} word - the word to check
42873
+ * @returns {string} - true if the word is an adjective, else false
42874
+ */
42520
42875
  static isAdjective(word) {
42521
42876
  return _RiTa.tagger.isAdjective(word);
42522
42877
  }
42878
+ /**
42879
+ * Returns true if the word has an adverb form. That is, if any of its possible parts of speech
42880
+ * are any variant of an adverb in the Penn tag set (e.g. rb, rbr, rbs).
42881
+ * @param {string} word - the word to check
42882
+ * @returns {string} - true if the word is an adverb, else false
42883
+ */
42523
42884
  static isAdverb(word) {
42524
42885
  return _RiTa.tagger.isAdverb(word);
42525
42886
  }
42526
- static isPunct(text) {
42527
- return text && text.length && ONLY_PUNCT.test(text);
42528
- }
42887
+ /**
42888
+ * Returns true for if word has a verb form. That is, if any of its possible
42889
+ * parts of speech are any variant of a verb in the Penn tag set (e.g. vb, vbg, vbd, vbp, vbz).
42890
+ * @param {string} word - the word to check
42891
+ * @returns {string} - true if the word is a verb, else false
42892
+ */
42529
42893
  static isVerb(word) {
42530
42894
  return _RiTa.tagger.isVerb(word);
42531
42895
  }
42532
- static posInline(words, opts = {}) {
42533
- opts.inline = true;
42534
- return _RiTa.tagger.tag(words, opts);
42896
+ /**
42897
+ * Returns true if every character of 'text' is a punctuation character.
42898
+ * @param {string} text
42899
+ * @returns {boolean} true if every character of 'text' is punctuation, else false
42900
+ */
42901
+ static isPunct(text) {
42902
+ return text && text.length && ONLY_PUNCT.test(text);
42903
+ }
42904
+ /**
42905
+ * Tags the input string with part-of-speech tags, either from the Penn tag set or the simplified tag set [a, r, v, n].
42906
+ * @param {string} sentence - the sentence to tag
42907
+ * @param {object} [options] - options for the tagging
42908
+ * @param {boolean} [options.simple=false] - use the simplified tag set [a, r, v, n]
42909
+ * @returns {string} the tagged sentence
42910
+ */
42911
+ static posInline(sentence, options) {
42912
+ return _RiTa.tagger.tag(sentence, { ...options, inline: true });
42535
42913
  }
42914
+ /**
42915
+ * Return the singular form of the input word
42916
+ * @param {string} word - the word to singularize
42917
+ * @returns {string} the singular form of the input word
42918
+ */
42536
42919
  static singularize(word) {
42537
- return _RiTa.inflector.singularize(...arguments);
42920
+ return _RiTa.inflector.singularize(word);
42538
42921
  }
42922
+ /**
42923
+ * Return the plural form of the input word
42924
+ * @param {string} word - the word to pluralize
42925
+ * @returns {string} the plural form of the input word
42926
+ */
42539
42927
  static pluralize(word) {
42540
- return _RiTa.inflector.pluralize(...arguments);
42541
- }
42542
- static async search(pattern, opts) {
42543
- return await _RiTa.lexicon.search(...arguments);
42544
- }
42545
- static tokens(string, opts) {
42546
- return _RiTa.tokenizer.tokens(...arguments);
42547
- }
42548
- static tokenize(string, opts) {
42549
- return _RiTa.tokenizer.tokenize(...arguments);
42550
- }
42551
- static untokenize(stringArray, delim) {
42552
- return _RiTa.tokenizer.untokenize(...arguments);
42553
- }
42554
- static sentences(string) {
42555
- return _RiTa.tokenizer.sentences(...arguments);
42556
- }
42557
- static isStopWord(w) {
42558
- return _RiTa.STOP_WORDS.includes(w.toLowerCase());
42928
+ return _RiTa.inflector.pluralize(word);
42929
+ }
42930
+ /**
42931
+ * Searches for words in the lexicon matching the given criteria, either by length, syllable-count,
42932
+ * spelling, phonemes, stresses, part-of-speech, etc. If no regex or options are supplied, the full set of words is returned.
42933
+ * @param {(string|RegExp)} [pattern] - the pattern to match
42934
+ * @param {object} [options] - options for the search
42935
+ * @param {number} [options.minLength=4] - the minimum length of the words
42936
+ * @param {number} [options.maxLength] - the maximum length of the words
42937
+ * @param {number} [options.numSyllables] - the number of syllables in the words
42938
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
42939
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
42940
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
42941
+ * or the simplified tag set [a, r, v, n]
42942
+ * @param {string} [options.type] - the type of regex or string pattern to match, options are 'stresses'
42943
+ * or 'phones' or 'letters' (the default)
42944
+ * @returns {Promise<string[]>} an array of words matching the criteria in both the pattern and the options object
42945
+ */
42946
+ static async search(pattern, options) {
42947
+ return await _RiTa.lexicon.search(pattern, options);
42948
+ }
42949
+ /**
42950
+ * Returns an array containing all unique alphabetical words (tokens) in the text.
42951
+ * Punctuation and case are ignored unless specified otherwise.
42952
+ * @param {string} text - The text from which to extract the tokens
42953
+ * @param {object} [options] - The options
42954
+ * @param {boolean} [options.caseSensitive=false] - Whether to pay attention to case
42955
+ * @param {boolean} [options.ignoreStopWords=false] - Whether to ignore words such as 'the', 'and', 'a', 'of', etc,
42956
+ * as specified in RiTa.STOP_WORDS
42957
+ * @param {boolean} [options.splitContractions=false] - Whether to convert contractions
42958
+ * (e.g., "I'd" or "she'll") into multiple individual tokens
42959
+ * @param {boolean} [options.includePunct=false] - Whether to include punctuation in the results
42960
+ * @param {boolean} [options.sort=false] - Whether to sort the tokens before returning them
42961
+ * @returns {string[]} Array of tokens
42962
+ */
42963
+ static tokens(text, options = {
42964
+ caseSensitive: false,
42965
+ ignoreStopWords: false,
42966
+ splitContractions: false,
42967
+ includePunct: false,
42968
+ sort: false
42969
+ }) {
42970
+ return _RiTa.tokenizer.tokens(text, options);
42971
+ }
42972
+ /**
42973
+ * Tokenizes an input string into words, according to the Penn Treebank conventions
42974
+ * @param {string} input - The text to tokenize
42975
+ * @param {object} [options] - The options
42976
+ * @param {RegExp} [options.regex=null] - An optional custom regex to split on
42977
+ * @param {boolean} [options.splitHyphens=false] - Whether to split hyphenated words
42978
+ * (e.g., "mother-in-law") into multiple individual tokens
42979
+ * @param {boolean} [options.splitContractions=false] - Whether to split contractions
42980
+ * (e.g., "I'd" or "she'll") into multiple individual tokens
42981
+ * @returns {string[]} Array of tokens
42982
+ */
42983
+ static tokenize(input, options) {
42984
+ return _RiTa.tokenizer.tokenize(input, options);
42985
+ }
42986
+ /**
42987
+ * Joins an array (of words and punctuation) into a sentence, according to
42988
+ * the Penn Treebank conventions. The inverse of RiTa.tokenize().
42989
+ * @param {string[]} input - The array of words to join
42990
+ * @param {string} [delim=' '] - The delimiter to use between words, or a space by default
42991
+ * @returns {string} The joined sentence
42992
+ */
42993
+ static untokenize(input, delim = " ") {
42994
+ return _RiTa.tokenizer.untokenize(input, delim);
42995
+ }
42996
+ /**
42997
+ * Split the input text into sentences following using Penn Treebank conventions and the specified options.
42998
+ * @param {string} text - The text to split
42999
+ * @param {(string|RegExp)} [pattern] - An optional custom regex to split on
43000
+ * @returns {string[]} An array of sentences
43001
+ */
43002
+ static sentences(text, pattern) {
43003
+ return _RiTa.tokenizer.sentences(text, pattern);
43004
+ }
43005
+ /**
43006
+ * Returns true if the word is a 'stop word', a commonly used word that is often ignored in text processing.
43007
+ * To use your own list, set RiTa.STOP_WORDS to a new array of (lowercase) words.
43008
+ * @param {string} word - the word to check
43009
+ * @returns {boolean} true if the word is a stop word, else false
43010
+ */
43011
+ static isStopWord(word) {
43012
+ return _RiTa.STOP_WORDS.includes(word.toLowerCase());
42559
43013
  }
42560
- static stem(string) {
42561
- return stemmer_default.stem(...arguments);
43014
+ /**
43015
+ * Extracts base roots from a word according to the Pling stemming algorithm.
43016
+ * @param {string} word - the word to stem
43017
+ * @returns {string} the base root of the word
43018
+ */
43019
+ static stem(word) {
43020
+ return stemmer_default.stem(word);
42562
43021
  }
43022
+ /**
43023
+ * Returns the present participle of the input word (e.g., "walking" for "walk").
43024
+ * @param {string} verbWord - the word to get the present participle of
43025
+ * @returns {string} the present participle of the input word
43026
+ */
42563
43027
  static presentPart(verbWord) {
42564
- return _RiTa.conjugator.presentPart(...arguments);
43028
+ return _RiTa.conjugator.presentPart(verbWord);
42565
43029
  }
43030
+ /**
43031
+ * Returns the past participle of the input word (e.g., "walked" for "walk").
43032
+ * @param {string} verbWord
43033
+ * @returns {string} the past participle of the input word
43034
+ */
42566
43035
  static pastPart(verbWord) {
42567
- return _RiTa.conjugator.pastPart(...arguments);
42568
- }
42569
- static conjugate(verbWord, opts) {
42570
- return _RiTa.conjugator.conjugate(...arguments);
42571
- }
42572
- static stresses(string) {
42573
- return _RiTa.analyzer.analyze(...arguments).stresses;
42574
- }
42575
- static syllables(string) {
42576
- return _RiTa.analyzer.analyze(...arguments).syllables;
42577
- }
42578
- static phones(string) {
42579
- return _RiTa.analyzer.analyze(...arguments).phones;
42580
- }
42581
- static analyze(string) {
42582
- return _RiTa.analyzer.analyze(...arguments);
43036
+ return _RiTa.conjugator.pastPart(verbWord);
43037
+ }
43038
+ /**
43039
+ * Conjugates the 'verb' according to the specified options (tense, person, number, etc.)
43040
+ * @param {string} verbWord
43041
+ * @param {object} [options]
43042
+ * @param {number} [options.tense] - the tense of the verb, either RiTa.PAST, RiTa.PRESENT, or RiTa.FUTURE
43043
+ * @param {number} [options.person] - the person of the verb, either RiTa.FIRST, RiTa.SECOND, or RiTa.THIRD
43044
+ * @param {number} [options.number] - the number of the verb, either RiTa.SINGULAR or RiTa.PLURAL
43045
+ * @param {number} [options.form] - the form of the verb, either RiTa.INFINITIVE or RiTa.GERUND
43046
+ * @param {boolean} [options.passive] - whether the verb should be passive
43047
+ * @param {boolean} [options.progressive] - whether the verb should be progressive
43048
+ * @param {boolean} [options.perfect] - whether the verb should be perfect
43049
+ * @param {boolean} [options.interrogative] - whether the verb should be in interrogative form
43050
+ * @returns {string} the conjugated verb
43051
+ */
43052
+ static conjugate(verbWord, options) {
43053
+ return _RiTa.conjugator.conjugate(verbWord, options);
43054
+ }
43055
+ /**
43056
+ * Analyzes the input and returns a new string containing the stresses for each syllable of the input text .
43057
+ * @param {string} input - the text to analyze
43058
+ * @param {object} [options] - options for the analysis
43059
+ * @returns {string} a string containing the stresses for each syllable of the input text
43060
+ */
43061
+ static stresses(input, options) {
43062
+ return _RiTa.analyzer.analyze(input, options).stresses;
43063
+ }
43064
+ /**
43065
+ * Analyzes the input and returns a new string containing the syllables of the input text.
43066
+ * @param {string} input - the text to analyze
43067
+ * @param {object} [options] - options for the analysis
43068
+ * @returns {string} a string containing the syllables of the input text
43069
+ */
43070
+ static syllables(input, options) {
43071
+ return _RiTa.analyzer.analyze(input, options).syllables;
43072
+ }
43073
+ /**
43074
+ * Analyzes the input and returns a new string containing the phonemes of the input text.
43075
+ * @param {string} input - the text to analyze
43076
+ * @param {object} [options] - options for the analysis
43077
+ * @returns {string} a string containing the phones of the input text
43078
+ */
43079
+ static phones(input, options) {
43080
+ return _RiTa.analyzer.analyze(input, options).phones;
43081
+ }
43082
+ /**
43083
+ * Analyzes the input to compute a set of features for the input,
43084
+ * including phonemes, syllables, stresses, and part-of-speech tags.
43085
+ * @param {string} input - the text to analyze
43086
+ * @param {object} [options] - options for the analysis
43087
+ * @param {boolean} [options.simple=false] - whether to use the simplified tag set [a, r, v, n]
43088
+ * @returns {object} an object containing the features of the input text (phones, syllables, stresses, pos), or the features inline
43089
+ */
43090
+ static analyze(input, options) {
43091
+ return _RiTa.analyzer.analyze(input, options);
42583
43092
  }
42584
43093
  ////////////////////////////// lex-sync ////////////////////////////
42585
- static spellsLikeSync(word, opts) {
42586
- return _RiTa.lexicon.spellsLikeSync(...arguments);
42587
- }
42588
- static sountsLikeSync(word, opts) {
42589
- return _RiTa.lexicon.sountsLikeSync(...arguments);
42590
- }
42591
- static rhymesSync(word, opts) {
42592
- return _RiTa.lexicon.rhymesSync(...arguments);
42593
- }
42594
- // TODO: all need tests
42595
- static searchSync(word, opts) {
42596
- return _RiTa.lexicon.rhymesSync(...arguments);
42597
- }
42598
- static alliterationsSync(word, opts) {
42599
- return _RiTa.lexicon.alliterationsSync(...arguments);
43094
+ /**
43095
+ * A synchronous version of RiTa.spellsLike(). It compares the letters of the input word
43096
+ * (using a version of the Levenstein min-edit distance algorithm) to each word in the lexicon,
43097
+ * returning the set of closest matches that also match the criteria in the options object.
43098
+ * @param {string} word - the word to match
43099
+ * @param {object} [options] - options for the search
43100
+ * @param {number} [options.minLength=4] - the minimum length of the words
43101
+ * @param {number} [options.maxLength] - the maximum length of the words
43102
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43103
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43104
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43105
+ * or the simplified tag set [a, r, v, n]
43106
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43107
+ * @return {string[]} an array of words matching the spelling pattern and criteria in the options object
43108
+ */
43109
+ static spellsLikeSync(word, options) {
43110
+ return _RiTa.lexicon.spellsLikeSync(word, options);
43111
+ }
43112
+ /**
43113
+ * A synchronous version of RiTa.lexicon.soundsLike(). It compares the phonemes of the input pattern (using a version of the Levenstein min-edit distance algorithm)
43114
+ * to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
43115
+ * @param {string} word - the word to match
43116
+ * @param {object} [options] - options for the search
43117
+ * @param {number} [options.minLength=4] - the minimum length of the words
43118
+ * @param {number} [options.maxLength] - the maximum length of the words
43119
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43120
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43121
+ * @param {boolean} [options.matchSpelling=false] if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
43122
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43123
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43124
+ * or the simplified tag set [a, r, v, n]
43125
+ * @return {string[]} an array of words matching the phonemic pattern and criteria in the options object
43126
+ */
43127
+ static soundsLikeSync(word, options) {
43128
+ return _RiTa.lexicon.soundsLikeSync(word, options);
43129
+ }
43130
+ /**
43131
+ * Synchronous version of RiTa.rhymes(). Returns words that rhyme with the given word.
43132
+ * Two words are considered as rhyming if their final stressed vowel and all following phonemes are identical.
43133
+ * @param {string} word - the word to match
43134
+ * @param {object} [options] - options for the search
43135
+ * @param {number} [options.minLength=4] - the minimum length of the words
43136
+ * @param {number} [options.maxLength] - the maximum length of the words
43137
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43138
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43139
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43140
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43141
+ * or the simplified tag set [a, r, v, n]
43142
+ * @return {string[]} an array of rhymes that match criteria in the options object
43143
+ */
43144
+ static rhymesSync(word, options) {
43145
+ return _RiTa.lexicon.rhymesSync(word, options);
43146
+ }
43147
+ /**
43148
+ * A synchronous version of RiTa.search(). Searches for words in the lexicon matching
43149
+ * the given criteria, either by length, syllable-count, spelling, phonemes, stresses,
43150
+ * part-of-speech, etc.
43151
+ * @param {(string|RegExp)} [pattern] - the pattern to match
43152
+ * @param {object} [options] - options for the search
43153
+ * @param {number} [options.minLength=4] - the minimum length of the words
43154
+ * @param {number} [options.maxLength] - the maximum length of the words
43155
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43156
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43157
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43158
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43159
+ * or the simplified tag set [a, r, v, n]
43160
+ * @param {string} [options.type] - the type of regex or string pattern to match, options are 'stresses' or 'phones' or 'letters' (the default)
43161
+ * @return {string[]} an array of words matching the criteria in both the pattern and the options object
43162
+ */
43163
+ static searchSync(pattern, options) {
43164
+ return _RiTa.lexicon.searchSync(pattern, options);
43165
+ }
43166
+ /**
43167
+ * A synchronous version of RiTa.alliterations(). Finds alliterations by comparing the phonemes
43168
+ * of the input string to those of each word in the lexicon via a minimum-edit-distance metric.
43169
+ * @param {string} word - the word to match
43170
+ * @param {object} [options] - options for the search
43171
+ * @param {number} [options.minLength=4] - the minimum length of the words
43172
+ * @param {number} [options.maxLength] - the maximum length of the words
43173
+ * @param {number} [options.numSyllables] - the number of syllables in the words
43174
+ * @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
43175
+ * @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
43176
+ * @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
43177
+ * or the simplified tag set [a, r, v, n]
43178
+ * @return {string[]} an array of alliterations matching criteria in the options object
43179
+ */
43180
+ static alliterationsSync(word, options) {
43181
+ return _RiTa.lexicon.alliterationsSync(word, options);
42600
43182
  }
42601
43183
  ////////////////////////////// niapa /////////////////////////////
42602
- static randi(opts) {
42603
- return Math.floor(_RiTa.randomizer.random(...arguments));
42604
- }
42605
- static random(opts) {
43184
+ /**
43185
+ * Returns a random integer from a range
43186
+ * The version of random() with one parameter returns a random integer from 0 up to but not including the number.
43187
+ * The version of random() with two parameters returns a random integer from the first number up to but not including the second.
43188
+ * @param {number} param1 - the first parameter
43189
+ * @param {number} [param2] - the second optional parameter
43190
+ * @returns {number} a random integer from the range
43191
+ */
43192
+ static randi(param1, param2) {
43193
+ return Math.floor(_RiTa.random(...arguments));
43194
+ }
43195
+ /**
43196
+ * Returns a random number or a random element from an array.
43197
+ * The version of random() with no parameters returns a random number from 0 up to but not including 1.
43198
+ * The version of random() with one parameter works one of two ways. If the argument passed is a number, random() returns a random number from 0 up to but not including the number.
43199
+ * If the argument passed is an array, random() returns a random element from that array.
43200
+ * The version of random() with two parameters returns a random number from the first number up to but not including the second.
43201
+ * @param {number|object[]} [param1] - the minimum value of the random number, or an array of values to choose from
43202
+ * @param {number} [param2] - the maximum value of the random number
43203
+ * @returns {number|object} a random number or a random element from the array
43204
+ */
43205
+ static random(param1, param2) {
42606
43206
  return _RiTa.randomizer.random(...arguments);
42607
43207
  }
42608
43208
  };
42609
- markov_default.parent = RiTa2;
42610
- stemmer_default.parent = RiTa2;
42611
- RiTa2.RiGrammar = RiGrammar;
42612
- RiTa2.RiMarkov = markov_default;
42613
- RiTa2.Stemmer = stemmer_default;
42614
- RiTa2.tagger = new tagger_default(RiTa2);
42615
- RiTa2.analyzer = new analyzer_default(RiTa2);
42616
- RiTa2.concorder = new concorder_default(RiTa2);
42617
- RiTa2.tokenizer = new tokenizer_default(RiTa2);
42618
- RiTa2.inflector = new inflector_default(RiTa2);
42619
- RiTa2.randomizer = new randgen_default(RiTa2);
42620
- RiTa2.lexicon = new lexicon_default(RiTa2);
42621
- RiTa2.conjugator = new conjugator_default(RiTa2);
42622
- RiTa2.SILENT = false;
42623
- RiTa2.SILENCE_LTS = false;
42624
- RiTa2.PHONES = ["aa", "ae", "ah", "ao", "aw", "ay", "b", "ch", "d", "dh", "eh", "er", "ey", "f", "g", "hh", "ih", "iy", "jh", "k", "l", "m", "n", "ng", "ow", "oy", "p", "r", "s", "sh", "t", "th", "uh", "uw", "v", "w", "y", "z", "zh"];
42625
- RiTa2.VERSION = "3.0.21";
42626
- RiTa2.HAS_LEXICON = typeof __NOLEX__ === "undefined";
42627
- RiTa2.FIRST = 1;
42628
- RiTa2.SECOND = 2;
42629
- RiTa2.THIRD = 3;
42630
- RiTa2.PAST = 4;
42631
- RiTa2.PRESENT = 5;
42632
- RiTa2.FUTURE = 6;
42633
- RiTa2.SINGULAR = 7;
42634
- RiTa2.PLURAL = 8;
42635
- RiTa2.NORMAL = 9;
42636
- RiTa2.STRESS = "1";
42637
- RiTa2.NOSTRESS = "0";
42638
- RiTa2.PHONE_BOUNDARY = "-";
42639
- RiTa2.WORD_BOUNDARY = " ";
42640
- RiTa2.SYLLABLE_BOUNDARY = "/";
42641
- RiTa2.SENTENCE_BOUNDARY = "|";
42642
- RiTa2.VOWELS = "aeiou";
42643
- RiTa2.MODAL_EXCEPTIONS = ["hardness", "shortness"];
42644
- RiTa2.ABRV = ["Adm.", "Capt.", "Cmdr.", "Col.", "Dr.", "Gen.", "Gov.", "Lt.", "Maj.", "Messrs.", "Mr.", "Mrs.", "Ms.", "Prof.", "Rep.", "Reps.", "Rev.", "Sen.", "Sens.", "Sgt.", "Sr.", "St.", "A.k.a.", "C.f.", "I.e.", "E.g.", "Vs.", "V.", "Jan.", "Feb.", "Mar.", "Apr.", "Mar.", "Jun.", "Jul.", "Aug.", "Sept.", "Oct.", "Nov.", "Dec."];
42645
- RiTa2.QUESTIONS = ["was", "what", "when", "where", "which", "why", "who", "will", "would", "who", "how", "if", "is", "could", "might", "does", "are", "have"];
42646
- RiTa2.STOP_WORDS = ["and", "a", "of", "in", "i", "you", "is", "to", "that", "it", "for", "on", "have", "with", "this", "be", "not", "are", "as", "was", "but", "or", "from", "my", "at", "if", "they", "your", "all", "he", "by", "one", "me", "what", "so", "can", "will", "do", "an", "about", "we", "just", "would", "there", "no", "like", "out", "his", "has", "up", "more", "who", "when", "don't", "some", "had", "them", "any", "their", "it's", "only", "which", "i'm", "been", "other", "were", "how", "then", "now", "her", "than", "she", "well", "also", "us", "very", "because", "am", "here", "could", "even", "him", "into", "our", "much", "too", "did", "should", "over", "want", "these", "may", "where", "most", "many", "those", "does", "why", "please", "off", "going", "its", "i've", "down", "that's", "can't", "you're", "didn't", "another", "around", "must", "few", "doesn't", "the", "every", "yes", "each", "maybe", "i'll", "away", "doing", "oh", "else", "isn't", "he's", "there's", "hi", "won't", "ok", "they're", "yeah", "mine", "we're", "what's", "shall", "she's", "hello", "okay", "here's", "less", "didn't", "said", "over", "this", "that", "just", "then", "under", "some"];
42647
- RiTa2.MASS_NOUNS = ["abalone", "asbestos", "barracks", "bathos", "breeches", "beef", "britches", "chaos", "chinese", "cognoscenti", "clippers", "corps", "cosmos", "crossroads", "diabetes", "ethos", "gallows", "graffiti", "herpes", "innings", "lens", "means", "measles", "mews", "mumps", "news", "pasta", "pathos", "pincers", "pliers", "proceedings", "rabies", "rhinoceros", "sassafras", "scissors", "series", "shears", "species", "tuna", "acoustics", "aesthetics", "aquatics", "basics", "ceramics", "classics", "cosmetics", "dialectics", "deer", "dynamics", "ethics", "harmonics", "heroics", "mechanics", "metrics", "ooze", "optics", "physics", "polemics", "pyrotechnics", "statistics", "tactics", "tropics", "bengalese", "bengali", "bonsai", "booze", "cellulose", "mess", "moose", "burmese", "chinese", "colossus", "congolese", "discus", "electrolysis", "emphasis", "expertise", "flu", "fructose", "gauze", "glucose", "grease", "guyanese", "haze", "incense", "japanese", "lebanese", "malaise", "mayonnaise", "maltese", "music", "money", "menopause", "merchandise", "olympics", "overuse", "paradise", "poise", "potash", "portuguese", "prose", "recompense", "remorse", "repose", "senegalese", "siamese", "singhalese", "sleaze", "sioux", "sudanese", "suspense", "swiss", "taiwanese", "vietnamese", "unease", "aircraft", "anise", "antifreeze", "applause", "archdiocese", "apparatus", "asparagus", "bellows", "bison", "bluefish", "bourgeois", "bream", "brill", "butterfingers", "cargo", "carp", "catfish", "chassis", "clone", "clones", "clothes", "chub", "cod", "codfish", "coley", "contretemps", "crawfish", "crayfish", "cuttlefish", "dice", "dogfish", "doings", "dory", "downstairs", "eldest", "earnings", "economics", "electronics", "firstborn", "fish", "flatfish", "flounder", "fowl", "fry", "fries", "works", "goldfish", "golf", "grand", "grief", "haddock", "hake", "halibut", "headquarters", "herring", "hertz", "honey", "horsepower", "goods", "hovercraft", "ironworks", "kilohertz", "ling", "shrimp", "swine", "lungfish", "mackerel", "macaroni", "megahertz", "moorfowl", "moorgame", "mullet", "nepalese", "offspring", "pants", "patois", "pekinese", "perch", "pickerel", "pike", "potpourri", "precis", "quid", "rand", "rendezvous", "roach", "salmon", "samurai", "seychelles", "shad", "sheep", "shellfish", "smelt", "spaghetti", "spacecraft", "starfish", "stockfish", "sunfish", "superficies", "sweepstakes", "smallpox", "swordfish", "tennis", "tobacco", "triceps", "trout", "tunafish", "turbot", "trousers", "turf", "dibs", "undersigned", "waterfowl", "waterworks", "waxworks", "wildfowl", "woodworm", "yen", "aries", "pisces", "forceps", "jeans", "mathematics", "odds", "politics", "remains", "aids", "wildlife", "shall", "would", "may", "might", "ought", "should", "acne", "admiration", "advice", "air", "anger", "anticipation", "assistance", "awareness", "bacon", "baggage", "blood", "bravery", "chess", "clay", "clothing", "coal", "compliance", "comprehension", "confusion", "consciousness", "cream", "darkness", "diligence", "dust", "education", "empathy", "enthusiasm", "envy", "equality", "equipment", "evidence", "feedback", "fitness", "flattery", "foliage", "fun", "furniture", "garbage", "gold", "gossip", "grammar", "gratitude", "gravel", "guilt", "happiness", "hardware", "hate", "hay", "health", "heat", "help", "hesitation", "homework", "honesty", "honor", "honour", "hospitality", "hostility", "humanity", "humility", "ice", "immortality", "independence", "information", "integrity", "intimidation", "jargon", "jealousy", "jewelry", "justice", "knowledge", "literacy", "logic", "luck", "lumber", "luggage", "mail", "management", "milk", "morale", "mud", "nonsense", "oppression", "optimism", "oxygen", "participation", "pay", "peace", "perseverance", "pessimism", "pneumonia", "poetry", "police", "pride", "privacy", "propaganda", "public", "punctuation", "recovery", "rice", "rust", "satisfaction", "schnapps", "shame", "slang", "software", "stamina", "starvation", "steam", "steel", "stuff", "support", "sweat", "thunder", "timber", "toil", "traffic", "tongs", "training", "trash", "valor", "vehemence", "violence", "warmth", "waste", "weather", "wheat", "wisdom", "work", "accommodation", "advertising", "aid", "art", "bread", "business", "butter", "calm", "cash", "cheese", "childhood", "clothing ", "coffee", "content", "corruption", "courage", "currency", "damage", "danger", "determination", "electricity", "employment", "energy", "entertainment", "failure", "fame", "fire", "flour", "food", "freedom", "friendship", "fuel", "genetics", "hair", "harm", "hospitality ", "housework", "humour", "imagination", "importance", "innocence", "intelligence", "juice", "kindness", "labour", "lack", "laughter", "leisure", "literature", "litter", "love", "magic", "metal", "motherhood", "motivation", "nature", "nutrition", "obesity", "oil", "old age", "paper", "patience", "permission", "pollution", "poverty", "power", "production", "progress", "pronunciation", "publicity", "quality", "quantity", "racism", "rain", "relaxation", "research", "respect", "room (space)", "rubbish", "safety", "salt", "sand", "seafood", "shopping", "silence", "smoke", "snow", "soup", "speed", "spelling", "stress ", "sugar", "sunshine", "tea", "time", "tolerance", "trade", "transportation", "travel", "trust", "understanding", "unemployment", "usage", "vision", "water", "wealth", "weight", "welfare", "width", "wood", "yoga", "youth", "homecare", "childcare", "fanfare", "healthcare", "medicare"];
42648
- RiTa2.INFINITIVE = 1;
42649
- RiTa2.GERUND = 2;
42650
- RiTa2.SPLIT_CONTRACTIONS = false;
43209
+ RiTa.RiGrammar = _riscript.RiGrammar;
43210
+ RiTa.RiMarkov = markov_default;
43211
+ RiTa.Stemmer = stemmer_default;
43212
+ RiTa.randomizer = new randgen_default();
43213
+ RiTa.tagger = new tagger_default(RiTa);
43214
+ RiTa.analyzer = new analyzer_default(RiTa);
43215
+ RiTa.concorder = new concorder_default(RiTa);
43216
+ RiTa.tokenizer = new tokenizer_default(RiTa);
43217
+ RiTa.inflector = new inflector_default(RiTa);
43218
+ RiTa.lexicon = new lexicon_default(RiTa);
43219
+ RiTa.conjugator = new conjugator_default(RiTa);
43220
+ markov_default.parent = RiTa;
43221
+ stemmer_default.tokenizer = RiTa.tokenizer;
43222
+ RiTa.SILENT = false;
43223
+ RiTa.SILENCE_LTS = false;
43224
+ RiTa.VERSION = "3.0.23";
43225
+ RiTa.FIRST = 1;
43226
+ RiTa.SECOND = 2;
43227
+ RiTa.THIRD = 3;
43228
+ RiTa.PAST = 4;
43229
+ RiTa.PRESENT = 5;
43230
+ RiTa.FUTURE = 6;
43231
+ RiTa.SINGULAR = 7;
43232
+ RiTa.PLURAL = 8;
43233
+ RiTa.NORMAL = 9;
43234
+ RiTa.STRESS = "1";
43235
+ RiTa.NOSTRESS = "0";
43236
+ RiTa.PHONE_BOUNDARY = "-";
43237
+ RiTa.WORD_BOUNDARY = " ";
43238
+ RiTa.SYLLABLE_BOUNDARY = "/";
43239
+ RiTa.SENTENCE_BOUNDARY = "|";
43240
+ RiTa.VOWELS = "aeiou";
43241
+ RiTa.PHONES = ["aa", "ae", "ah", "ao", "aw", "ay", "b", "ch", "d", "dh", "eh", "er", "ey", "f", "g", "hh", "ih", "iy", "jh", "k", "l", "m", "n", "ng", "ow", "oy", "p", "r", "s", "sh", "t", "th", "uh", "uw", "v", "w", "y", "z", "zh"];
43242
+ RiTa.ABRV = ["Adm.", "Capt.", "Cmdr.", "Col.", "Dr.", "Gen.", "Gov.", "Lt.", "Maj.", "Messrs.", "Mr.", "Mrs.", "Ms.", "Prof.", "Rep.", "Reps.", "Rev.", "Sen.", "Sens.", "Sgt.", "Sr.", "St.", "A.k.a.", "C.f.", "I.e.", "E.g.", "Vs.", "V.", "Jan.", "Feb.", "Mar.", "Apr.", "Mar.", "Jun.", "Jul.", "Aug.", "Sept.", "Oct.", "Nov.", "Dec."];
43243
+ RiTa.QUESTIONS = ["was", "what", "when", "where", "which", "why", "who", "will", "would", "who", "how", "if", "is", "could", "might", "does", "are", "have"];
43244
+ RiTa.STOP_WORDS = ["and", "a", "of", "in", "i", "you", "is", "to", "that", "it", "for", "on", "have", "with", "this", "be", "not", "are", "as", "was", "but", "or", "from", "my", "at", "if", "they", "your", "all", "he", "by", "one", "me", "what", "so", "can", "will", "do", "an", "about", "we", "just", "would", "there", "no", "like", "out", "his", "has", "up", "more", "who", "when", "don't", "some", "had", "them", "any", "their", "it's", "only", "which", "i'm", "been", "other", "were", "how", "then", "now", "her", "than", "she", "well", "also", "us", "very", "because", "am", "here", "could", "even", "him", "into", "our", "much", "too", "did", "should", "over", "want", "these", "may", "where", "most", "many", "those", "does", "why", "please", "off", "going", "its", "i've", "down", "that's", "can't", "you're", "didn't", "another", "around", "must", "few", "doesn't", "the", "every", "yes", "each", "maybe", "i'll", "away", "doing", "oh", "else", "isn't", "he's", "there's", "hi", "won't", "ok", "they're", "yeah", "mine", "we're", "what's", "shall", "she's", "hello", "okay", "here's", "less", "didn't", "said", "over", "this", "that", "just", "then", "under", "some"];
43245
+ RiTa.MASS_NOUNS = ["abalone", "asbestos", "barracks", "bathos", "breeches", "beef", "britches", "chaos", "chinese", "cognoscenti", "clippers", "corps", "cosmos", "crossroads", "diabetes", "ethos", "gallows", "graffiti", "herpes", "innings", "lens", "means", "measles", "mews", "mumps", "news", "pasta", "pathos", "pincers", "pliers", "proceedings", "rabies", "rhinoceros", "sassafras", "scissors", "series", "shears", "species", "tuna", "acoustics", "aesthetics", "aquatics", "basics", "ceramics", "classics", "cosmetics", "dialectics", "deer", "dynamics", "ethics", "harmonics", "heroics", "mechanics", "metrics", "ooze", "optics", "physics", "polemics", "pyrotechnics", "statistics", "tactics", "tropics", "bengalese", "bengali", "bonsai", "booze", "cellulose", "mess", "moose", "burmese", "chinese", "colossus", "congolese", "discus", "electrolysis", "emphasis", "expertise", "flu", "fructose", "gauze", "glucose", "grease", "guyanese", "haze", "incense", "japanese", "lebanese", "malaise", "mayonnaise", "maltese", "music", "money", "menopause", "merchandise", "olympics", "overuse", "paradise", "poise", "potash", "portuguese", "prose", "recompense", "remorse", "repose", "senegalese", "siamese", "singhalese", "sleaze", "sioux", "sudanese", "suspense", "swiss", "taiwanese", "vietnamese", "unease", "aircraft", "anise", "antifreeze", "applause", "archdiocese", "apparatus", "asparagus", "bellows", "bison", "bluefish", "bourgeois", "bream", "brill", "butterfingers", "cargo", "carp", "catfish", "chassis", "clone", "clones", "clothes", "chub", "cod", "codfish", "coley", "contretemps", "crawfish", "crayfish", "cuttlefish", "dice", "dogfish", "doings", "dory", "downstairs", "eldest", "earnings", "economics", "electronics", "firstborn", "fish", "flatfish", "flounder", "fowl", "fry", "fries", "works", "goldfish", "golf", "grand", "grief", "haddock", "hake", "halibut", "headquarters", "herring", "hertz", "honey", "horsepower", "goods", "hovercraft", "ironworks", "kilohertz", "ling", "shrimp", "swine", "lungfish", "mackerel", "macaroni", "megahertz", "moorfowl", "moorgame", "mullet", "nepalese", "offspring", "pants", "patois", "pekinese", "perch", "pickerel", "pike", "potpourri", "precis", "quid", "rand", "rendezvous", "roach", "salmon", "samurai", "seychelles", "shad", "sheep", "shellfish", "smelt", "spaghetti", "spacecraft", "starfish", "stockfish", "sunfish", "superficies", "sweepstakes", "smallpox", "swordfish", "tennis", "tobacco", "triceps", "trout", "tunafish", "turbot", "trousers", "turf", "dibs", "undersigned", "waterfowl", "waterworks", "waxworks", "wildfowl", "woodworm", "yen", "aries", "pisces", "forceps", "jeans", "mathematics", "odds", "politics", "remains", "aids", "wildlife", "shall", "would", "may", "might", "ought", "should", "acne", "admiration", "advice", "air", "anger", "anticipation", "assistance", "awareness", "bacon", "baggage", "blood", "bravery", "chess", "clay", "clothing", "coal", "compliance", "comprehension", "confusion", "consciousness", "cream", "darkness", "diligence", "dust", "education", "empathy", "enthusiasm", "envy", "equality", "equipment", "evidence", "feedback", "fitness", "flattery", "foliage", "fun", "furniture", "garbage", "gold", "gossip", "grammar", "gratitude", "gravel", "guilt", "happiness", "hardware", "hate", "hay", "health", "heat", "help", "hesitation", "homework", "honesty", "honor", "honour", "hospitality", "hostility", "humanity", "humility", "ice", "immortality", "independence", "information", "integrity", "intimidation", "jargon", "jealousy", "jewelry", "justice", "knowledge", "literacy", "logic", "luck", "lumber", "luggage", "mail", "management", "milk", "morale", "mud", "nonsense", "oppression", "optimism", "oxygen", "participation", "pay", "peace", "perseverance", "pessimism", "pneumonia", "poetry", "police", "pride", "privacy", "propaganda", "public", "punctuation", "recovery", "rice", "rust", "satisfaction", "schnapps", "shame", "slang", "software", "stamina", "starvation", "steam", "steel", "stuff", "support", "sweat", "thunder", "timber", "toil", "traffic", "tongs", "training", "trash", "valor", "vehemence", "violence", "warmth", "waste", "weather", "wheat", "wisdom", "work", "accommodation", "advertising", "aid", "art", "bread", "business", "butter", "calm", "cash", "cheese", "childhood", "clothing ", "coffee", "content", "corruption", "courage", "currency", "damage", "danger", "determination", "electricity", "employment", "energy", "entertainment", "failure", "fame", "fire", "flour", "food", "freedom", "friendship", "fuel", "genetics", "hair", "harm", "hospitality ", "housework", "humour", "imagination", "importance", "innocence", "intelligence", "juice", "kindness", "labour", "lack", "laughter", "leisure", "literature", "litter", "love", "magic", "metal", "motherhood", "motivation", "nature", "nutrition", "obesity", "oil", "old age", "paper", "patience", "permission", "pollution", "poverty", "power", "production", "progress", "pronunciation", "publicity", "quality", "quantity", "racism", "rain", "relaxation", "research", "respect", "room (space)", "rubbish", "safety", "salt", "sand", "seafood", "shopping", "silence", "smoke", "snow", "soup", "speed", "spelling", "stress ", "sugar", "sunshine", "tea", "time", "tolerance", "trade", "transportation", "travel", "trust", "understanding", "unemployment", "usage", "vision", "water", "wealth", "weight", "welfare", "width", "wood", "yoga", "youth", "homecare", "childcare", "fanfare", "healthcare", "medicare"];
43246
+ RiTa.INFINITIVE = 1;
43247
+ RiTa.GERUND = 2;
43248
+ RiTa.SPLIT_CONTRACTIONS = false;
42651
43249
  var ONLY_PUNCT = /^[\p{P}|\+|-|<|>|\^|\$|\ufffd|`]*$/u;
42652
43250
  var IS_LETTER = /^[a-z\u00C0-\u00ff]+$/;
42653
- RiTa2.RiScript = _riscript.RiScript;
42654
- _riscript.RiScript.RiTa = RiTa2;
42655
- RiTa2.riscript = new (0, _riscript.RiScript)({ RiTa: RiTa2 });
43251
+ RiTa.riscript = new (0, _riscript.RiScript)({ RiTa });
43252
+
42656
43253
 
42657
43254
 
42658
- exports.RiTa = RiTa2;
43255
+ exports.RiMarkov = markov_default; exports.RiTa = RiTa;
42659
43256
  //# sourceMappingURL=rita.cjs.map