rita 3.0.21 → 3.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/rita.cjs +976 -379
- package/dist/rita.cjs.map +1 -1
- package/dist/rita.js +976 -379
- package/dist/rita.js.map +1 -1
- package/dist/rita.min.js +89 -89
- package/dist/rita.min.js.map +1 -1
- package/package.json +4 -3
package/dist/rita.cjs
CHANGED
|
@@ -796,9 +796,9 @@ var _Stemmer = class _Stemmer {
|
|
|
796
796
|
if (!input.includes(" ")) {
|
|
797
797
|
return _Stemmer.stemEnglish(input);
|
|
798
798
|
}
|
|
799
|
-
const words = _Stemmer.
|
|
799
|
+
const words = _Stemmer.tokenizer.tokenize(input);
|
|
800
800
|
const stems = _Stemmer.stemAll(words);
|
|
801
|
-
return _Stemmer.
|
|
801
|
+
return _Stemmer.tokenizer.untokenize(stems);
|
|
802
802
|
}
|
|
803
803
|
static stemAll(input) {
|
|
804
804
|
return input.map((i) => _Stemmer.stemEnglish(i));
|
|
@@ -830,6 +830,7 @@ var _Stemmer = class _Stemmer {
|
|
|
830
830
|
return _Stemmer.impl.getCurrent();
|
|
831
831
|
}
|
|
832
832
|
};
|
|
833
|
+
__publicField(_Stemmer, "tokenizer");
|
|
833
834
|
__publicField(_Stemmer, "impl", new SnowballStemmer());
|
|
834
835
|
var Stemmer = _Stemmer;
|
|
835
836
|
var stemmer_default = Stemmer;
|
|
@@ -840,57 +841,51 @@ var Tokenizer = class {
|
|
|
840
841
|
this.RiTa = parent;
|
|
841
842
|
this.splitter = /(\S.+?[.!?]["\u201D]?)(?=\s+|$)/g;
|
|
842
843
|
}
|
|
843
|
-
|
|
844
|
-
|
|
844
|
+
/**
|
|
845
|
+
* Returns an array containing all unique alphabetical words (tokens) in the text.
|
|
846
|
+
* Punctuation and case are ignored unless specified otherwise.
|
|
847
|
+
* @param {string} text - The text from which to extract the tokens
|
|
848
|
+
* @param {object} [options] - The options
|
|
849
|
+
* @param {boolean} [options.caseSensitive=false] - Whether to pay attention to case
|
|
850
|
+
* @param {boolean} [options.ignoreStopWords=false] - Whether to ignore words like 'the', 'and', 'a', 'of', etc, as specified in RiTa.STOP_WORDS
|
|
851
|
+
* @param {boolean} [options.splitContractions=false] - Whether to convert contractions (e.g., "I'd" or "she'll") into multiple individual tokens
|
|
852
|
+
* @param {boolean} [options.includePunct=false] - Whether to include punctuation in the results
|
|
853
|
+
* @param {boolean} [options.sort=false] - Whether to sort the tokens before returning them
|
|
854
|
+
* @returns {string[]} Array of tokens
|
|
855
|
+
*/
|
|
856
|
+
tokens(text, options = {
|
|
857
|
+
caseSensitive: false,
|
|
858
|
+
ignoreStopWords: false,
|
|
859
|
+
splitContractions: false,
|
|
860
|
+
includePunct: false,
|
|
861
|
+
sort: false
|
|
862
|
+
}) {
|
|
863
|
+
let words = this.tokenize(text, options), map = {};
|
|
845
864
|
words.forEach((w) => {
|
|
846
|
-
if (!
|
|
865
|
+
if (!options.caseSensitive)
|
|
847
866
|
w = w.toLowerCase();
|
|
848
|
-
if (
|
|
867
|
+
if (options.includePunct || ALPHA_RE.test(w))
|
|
849
868
|
map[w] = 1;
|
|
850
869
|
});
|
|
851
870
|
let tokens = Object.keys(map);
|
|
852
|
-
if (
|
|
871
|
+
if (options.ignoreStopWords)
|
|
853
872
|
tokens = tokens.filter((t) => !this.RiTa.isStopWord(t));
|
|
854
|
-
return
|
|
855
|
-
}
|
|
856
|
-
sentences(text, regex2) {
|
|
857
|
-
if (!text || !text.length)
|
|
858
|
-
return [text];
|
|
859
|
-
let clean = text.replace(NL_RE, " ");
|
|
860
|
-
let delim = "___";
|
|
861
|
-
let re = new RegExp(delim, "g");
|
|
862
|
-
let pattern = regex2 || this.splitter;
|
|
863
|
-
let unescapeAbbrevs = (arr2) => {
|
|
864
|
-
for (let i = 0; i < arr2.length; i++) {
|
|
865
|
-
arr2[i] = arr2[i].replace(re, ".");
|
|
866
|
-
}
|
|
867
|
-
return arr2;
|
|
868
|
-
};
|
|
869
|
-
let escapeAbbrevs = (text2) => {
|
|
870
|
-
let abbrevs = this.RiTa.ABRV;
|
|
871
|
-
for (let i = 0; i < abbrevs.length; i++) {
|
|
872
|
-
let abv = abbrevs[i];
|
|
873
|
-
let idx = text2.indexOf(abv);
|
|
874
|
-
while (idx > -1) {
|
|
875
|
-
text2 = text2.replace(abv, abv.replace(".", delim));
|
|
876
|
-
idx = text2.indexOf(abv);
|
|
877
|
-
}
|
|
878
|
-
}
|
|
879
|
-
return text2;
|
|
880
|
-
};
|
|
881
|
-
let arr = escapeAbbrevs(clean).match(pattern);
|
|
882
|
-
return arr && arr.length ? unescapeAbbrevs(arr) : [text];
|
|
873
|
+
return options.sort ? tokens.sort() : tokens;
|
|
883
874
|
}
|
|
884
|
-
tokenize(input, opts = {
|
|
875
|
+
tokenize(input, opts = {
|
|
876
|
+
// regex: null,
|
|
877
|
+
// splitHyphens: false,
|
|
878
|
+
// splitContractions: false
|
|
879
|
+
}) {
|
|
885
880
|
if (typeof input !== "string")
|
|
886
881
|
return [];
|
|
887
882
|
if (opts.regex)
|
|
888
|
-
return input.split(regex);
|
|
883
|
+
return input.split(opts.regex);
|
|
889
884
|
let { tags, text } = this.pushTags(input.trim());
|
|
890
885
|
for (let i = 0; i < TOKENIZE_RE.length; i += 2) {
|
|
891
886
|
text = text.replace(TOKENIZE_RE[i], TOKENIZE_RE[i + 1]);
|
|
892
887
|
}
|
|
893
|
-
if (opts.
|
|
888
|
+
if (opts.splitHyphens) {
|
|
894
889
|
text = text.replace(/([a-zA-Z]+)-([a-zA-Z]+)/g, "$1 - $2");
|
|
895
890
|
}
|
|
896
891
|
if (this.RiTa.SPLIT_CONTRACTIONS || opts.splitContractions) {
|
|
@@ -901,11 +896,10 @@ var Tokenizer = class {
|
|
|
901
896
|
let result = this.popTags(text.trim().split(WS_RE), tags);
|
|
902
897
|
return result;
|
|
903
898
|
}
|
|
904
|
-
untokenize(arr, delim) {
|
|
899
|
+
untokenize(arr, delim = " ") {
|
|
905
900
|
if (!arr || !Array.isArray(arr))
|
|
906
901
|
return "";
|
|
907
902
|
arr = this.preProcessTags(arr);
|
|
908
|
-
delim = delim || " ";
|
|
909
903
|
let nextNoSpace = false, afterQuote = false, midSentence = false;
|
|
910
904
|
let withinQuote = arr.length && QUOTE_RE.test(arr[0]);
|
|
911
905
|
let result = arr[0] || "";
|
|
@@ -966,6 +960,40 @@ var Tokenizer = class {
|
|
|
966
960
|
}
|
|
967
961
|
return result.trim();
|
|
968
962
|
}
|
|
963
|
+
/**
|
|
964
|
+
* Split the input text into sentences according to the options
|
|
965
|
+
* @param {string} text - The text to split
|
|
966
|
+
* @param {(string|RegExp)} [regex] - An optional custom regex to split on
|
|
967
|
+
* @returns {string[]} An array of sentences
|
|
968
|
+
*/
|
|
969
|
+
sentences(text, regex) {
|
|
970
|
+
if (!text || !text.length)
|
|
971
|
+
return [text];
|
|
972
|
+
let clean = text.replace(NL_RE, " ");
|
|
973
|
+
let delim = "___";
|
|
974
|
+
let re = new RegExp(delim, "g");
|
|
975
|
+
let pattern = regex || this.splitter;
|
|
976
|
+
let unescapeAbbrevs = (arr2) => {
|
|
977
|
+
for (let i = 0; i < arr2.length; i++) {
|
|
978
|
+
arr2[i] = arr2[i].replace(re, ".");
|
|
979
|
+
}
|
|
980
|
+
return arr2;
|
|
981
|
+
};
|
|
982
|
+
let escapeAbbrevs = (text2) => {
|
|
983
|
+
let abbrevs = this.RiTa.ABRV;
|
|
984
|
+
for (let i = 0; i < abbrevs.length; i++) {
|
|
985
|
+
let abv = abbrevs[i];
|
|
986
|
+
let idx = text2.indexOf(abv);
|
|
987
|
+
while (idx > -1) {
|
|
988
|
+
text2 = text2.replace(abv, abv.replace(".", delim));
|
|
989
|
+
idx = text2.indexOf(abv);
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
return text2;
|
|
993
|
+
};
|
|
994
|
+
let arr = escapeAbbrevs(clean).match(pattern);
|
|
995
|
+
return _optionalChain([arr, 'optionalAccess', _2 => _2.length]) ? unescapeAbbrevs(arr) : [text];
|
|
996
|
+
}
|
|
969
997
|
pushTags(text) {
|
|
970
998
|
let tags = [], tagIdx = 0;
|
|
971
999
|
while (TAG_RE.test(text)) {
|
|
@@ -1335,6 +1363,7 @@ var TOKENIZE_RE = [
|
|
|
1335
1363
|
// RS
|
|
1336
1364
|
];
|
|
1337
1365
|
var CONTRACTS_RE = [
|
|
1366
|
+
// TODO: 'She'd have wanted' -> 'She would have wanted'
|
|
1338
1367
|
/([Cc])an['\u2019]t/g,
|
|
1339
1368
|
"$1an not",
|
|
1340
1369
|
/([Dd])idn['\u2019]t/g,
|
|
@@ -1348,7 +1377,8 @@ var CONTRACTS_RE = [
|
|
|
1348
1377
|
/([tT]hat)['\u2019]s/g,
|
|
1349
1378
|
"$1 is",
|
|
1350
1379
|
/(she|he|you|they|i)['\u2019]d/gi,
|
|
1351
|
-
"$1
|
|
1380
|
+
"$1 had",
|
|
1381
|
+
// changed from would, 12/8/23
|
|
1352
1382
|
/(she|he|you|they|i)['\u2019]ll/gi,
|
|
1353
1383
|
"$1 will",
|
|
1354
1384
|
/n['\u2019]t /g,
|
|
@@ -1364,9 +1394,9 @@ var tokenizer_default = Tokenizer;
|
|
|
1364
1394
|
|
|
1365
1395
|
// src/conjugator.js
|
|
1366
1396
|
var RegularExpression = class {
|
|
1367
|
-
constructor(
|
|
1368
|
-
this.raw =
|
|
1369
|
-
this.regex = new RegExp(
|
|
1397
|
+
constructor(regex, offset, suffix) {
|
|
1398
|
+
this.raw = regex;
|
|
1399
|
+
this.regex = new RegExp(regex);
|
|
1370
1400
|
this.offset = offset;
|
|
1371
1401
|
this.suffix = suffix || "";
|
|
1372
1402
|
}
|
|
@@ -1383,7 +1413,7 @@ var RegularExpression = class {
|
|
|
1383
1413
|
return "/" + this.raw + "/";
|
|
1384
1414
|
}
|
|
1385
1415
|
};
|
|
1386
|
-
var RE = (a, b, c) => new RegularExpression(a, b, c);
|
|
1416
|
+
var RE = (a, b, c, _) => new RegularExpression(a, b, c);
|
|
1387
1417
|
var Conjugator = class {
|
|
1388
1418
|
constructor(parent) {
|
|
1389
1419
|
__publicField(this, "_handleStem", function(word) {
|
|
@@ -1432,11 +1462,11 @@ var Conjugator = class {
|
|
|
1432
1462
|
}
|
|
1433
1463
|
args = this._parseArgs(args);
|
|
1434
1464
|
let frontVG = TO_BE.includes(verb) ? "be" : this._handleStem(verb);
|
|
1435
|
-
let actualModal, verbForm, conjs = [],
|
|
1436
|
-
if (this.form ===
|
|
1465
|
+
let actualModal, verbForm, conjs = [], RiTa2 = this.RiTa;
|
|
1466
|
+
if (this.form === RiTa2.INFINITIVE) {
|
|
1437
1467
|
actualModal = "to";
|
|
1438
1468
|
}
|
|
1439
|
-
if (this.tense ===
|
|
1469
|
+
if (this.tense === RiTa2.FUTURE) {
|
|
1440
1470
|
actualModal = "will";
|
|
1441
1471
|
}
|
|
1442
1472
|
if (this.passive) {
|
|
@@ -1456,7 +1486,7 @@ var Conjugator = class {
|
|
|
1456
1486
|
frontVG = null;
|
|
1457
1487
|
}
|
|
1458
1488
|
if (frontVG) {
|
|
1459
|
-
if (this.form ===
|
|
1489
|
+
if (this.form === RiTa2.GERUND) {
|
|
1460
1490
|
conjs.push(this.presentPart(frontVG));
|
|
1461
1491
|
} else if (this.interrogative && frontVG != "be" && conjs.length < 1) {
|
|
1462
1492
|
conjs.push(frontVG);
|
|
@@ -1575,19 +1605,19 @@ var Conjugator = class {
|
|
|
1575
1605
|
}
|
|
1576
1606
|
_parseArgs(args) {
|
|
1577
1607
|
this._reset();
|
|
1578
|
-
const
|
|
1608
|
+
const RiTa2 = this.RiTa;
|
|
1579
1609
|
if (typeof args === "string") {
|
|
1580
1610
|
if (/^[123][SP](Pr|Pa|Fu)$/.test(args)) {
|
|
1581
1611
|
let opts = {};
|
|
1582
1612
|
opts.person = parseInt(args[0]);
|
|
1583
|
-
opts.number = args[1] === "S" ?
|
|
1613
|
+
opts.number = args[1] === "S" ? RiTa2.SINGULAR : RiTa2.PLURAL;
|
|
1584
1614
|
let tense = args.substr(2);
|
|
1585
1615
|
if (tense === "Pr")
|
|
1586
|
-
opts.tense =
|
|
1616
|
+
opts.tense = RiTa2.PRESENT;
|
|
1587
1617
|
if (tense === "Fu")
|
|
1588
|
-
opts.tense =
|
|
1618
|
+
opts.tense = RiTa2.FUTURE;
|
|
1589
1619
|
if (tense === "Pa")
|
|
1590
|
-
opts.tense =
|
|
1620
|
+
opts.tense = RiTa2.PAST;
|
|
1591
1621
|
args = opts;
|
|
1592
1622
|
} else {
|
|
1593
1623
|
throw Error("Invalid args: " + args);
|
|
@@ -1678,20 +1708,20 @@ var Conjugator = class {
|
|
|
1678
1708
|
return false;
|
|
1679
1709
|
}
|
|
1680
1710
|
_pastTense(theVerb, pers, numb) {
|
|
1681
|
-
const
|
|
1711
|
+
const RiTa2 = this.RiTa;
|
|
1682
1712
|
if (theVerb.toLowerCase() === "be") {
|
|
1683
1713
|
switch (numb) {
|
|
1684
|
-
case
|
|
1714
|
+
case RiTa2.SINGULAR:
|
|
1685
1715
|
switch (pers) {
|
|
1686
|
-
case
|
|
1716
|
+
case RiTa2.FIRST:
|
|
1687
1717
|
break;
|
|
1688
|
-
case
|
|
1718
|
+
case RiTa2.THIRD:
|
|
1689
1719
|
return "was";
|
|
1690
|
-
case
|
|
1720
|
+
case RiTa2.SECOND:
|
|
1691
1721
|
return "were";
|
|
1692
1722
|
}
|
|
1693
1723
|
break;
|
|
1694
|
-
case
|
|
1724
|
+
case RiTa2.PLURAL:
|
|
1695
1725
|
return "were";
|
|
1696
1726
|
}
|
|
1697
1727
|
}
|
|
@@ -1700,17 +1730,17 @@ var Conjugator = class {
|
|
|
1700
1730
|
_presentTense(theVerb, person, number) {
|
|
1701
1731
|
person = person || this.person;
|
|
1702
1732
|
number = number || this.number;
|
|
1703
|
-
const
|
|
1704
|
-
if (person ===
|
|
1733
|
+
const RiTa2 = this.RiTa;
|
|
1734
|
+
if (person === RiTa2.THIRD && number === RiTa2.SINGULAR) {
|
|
1705
1735
|
return this._checkRules(PRESENT_RULESET, theVerb);
|
|
1706
1736
|
} else if (theVerb === "be") {
|
|
1707
|
-
if (number ===
|
|
1737
|
+
if (number === RiTa2.SINGULAR) {
|
|
1708
1738
|
switch (person) {
|
|
1709
|
-
case
|
|
1739
|
+
case RiTa2.FIRST:
|
|
1710
1740
|
return "am";
|
|
1711
|
-
case
|
|
1741
|
+
case RiTa2.SECOND:
|
|
1712
1742
|
return "are";
|
|
1713
|
-
case
|
|
1743
|
+
case RiTa2.THIRD:
|
|
1714
1744
|
return "is";
|
|
1715
1745
|
}
|
|
1716
1746
|
} else {
|
|
@@ -4787,13 +4817,13 @@ var Util = class _Util {
|
|
|
4787
4817
|
return !isNaN(parseFloat(n)) && isFinite(n);
|
|
4788
4818
|
}
|
|
4789
4819
|
static numOpt(opts, name, def = 0) {
|
|
4790
|
-
return _Util.isNum(_optionalChain([opts, 'optionalAccess',
|
|
4820
|
+
return _Util.isNum(_optionalChain([opts, 'optionalAccess', _3 => _3[name]])) ? opts[name] : def;
|
|
4791
4821
|
}
|
|
4792
4822
|
};
|
|
4793
4823
|
var RE2 = class {
|
|
4794
|
-
constructor(
|
|
4795
|
-
this.raw =
|
|
4796
|
-
this.regex = new RegExp(
|
|
4824
|
+
constructor(regex, offset, suffix) {
|
|
4825
|
+
this.raw = regex;
|
|
4826
|
+
this.regex = new RegExp(regex);
|
|
4797
4827
|
this.offset = offset;
|
|
4798
4828
|
this.suffix = suffix || "";
|
|
4799
4829
|
}
|
|
@@ -27069,20 +27099,25 @@ var Lexicon = class {
|
|
|
27069
27099
|
async soundsLike(word, options = {}) {
|
|
27070
27100
|
return this._promise(this.soundsLikeSync, [word, options]);
|
|
27071
27101
|
}
|
|
27102
|
+
/**
|
|
27103
|
+
* A synchronous version of RiTa.lexicon.soundsLike().
|
|
27104
|
+
* @param {string} word
|
|
27105
|
+
* @param {object} [opts]
|
|
27106
|
+
* @returns {string[]} An array of words that sound like the input word
|
|
27107
|
+
*/
|
|
27072
27108
|
soundsLikeSync(word, opts = {}) {
|
|
27073
27109
|
if (!word || !word.length)
|
|
27074
27110
|
return [];
|
|
27075
|
-
opts.type
|
|
27076
|
-
return opts.matchSpelling ? this._bySoundAndLetter(word, opts) : this._byTypeSync(word, opts);
|
|
27111
|
+
return opts.matchSpelling ? this._bySoundAndLetterSync(word, opts) : this._byTypeSync(word, { ...opts, type: "sound" });
|
|
27077
27112
|
}
|
|
27078
|
-
randomWord(
|
|
27079
|
-
if (!
|
|
27113
|
+
randomWord(pattern, opts) {
|
|
27114
|
+
if (!pattern && !opts) {
|
|
27080
27115
|
return this.RiTa.random(Object.keys(this.data));
|
|
27081
27116
|
}
|
|
27082
|
-
if (!(
|
|
27083
|
-
if (typeof
|
|
27084
|
-
opts =
|
|
27085
|
-
|
|
27117
|
+
if (!(pattern instanceof RegExp)) {
|
|
27118
|
+
if (typeof pattern === "object" && !opts) {
|
|
27119
|
+
opts = pattern;
|
|
27120
|
+
pattern = void 0;
|
|
27086
27121
|
}
|
|
27087
27122
|
}
|
|
27088
27123
|
opts = opts || {};
|
|
@@ -27090,14 +27125,14 @@ var Lexicon = class {
|
|
|
27090
27125
|
opts.shuffle = true;
|
|
27091
27126
|
opts.strictPos = true;
|
|
27092
27127
|
opts.minLength = util_default.numOpt(opts, "minLength", 4);
|
|
27093
|
-
let result = this.searchSync(
|
|
27128
|
+
let result = this.searchSync(pattern, opts);
|
|
27094
27129
|
if (result.length < 1 && opts.hasOwnProperty("pos")) {
|
|
27095
27130
|
opts.strictPos = false;
|
|
27096
|
-
result = this.searchSync(
|
|
27131
|
+
result = this.searchSync(pattern, opts);
|
|
27097
27132
|
}
|
|
27098
27133
|
if (result.length < 1) {
|
|
27099
27134
|
["strictPos", "shuffle", "targetPos"].forEach((k) => delete opts[k]);
|
|
27100
|
-
throw Error("No words matching constraints:\n" + JSON.stringify(opts, 0, 2));
|
|
27135
|
+
throw Error("No words matching constraints:\n" + JSON.stringify(opts, void 0, 2));
|
|
27101
27136
|
}
|
|
27102
27137
|
return result[0];
|
|
27103
27138
|
}
|
|
@@ -27108,7 +27143,7 @@ var Lexicon = class {
|
|
|
27108
27143
|
let words = Object.keys(this.data);
|
|
27109
27144
|
if (!pattern && !options)
|
|
27110
27145
|
return words;
|
|
27111
|
-
let { regex
|
|
27146
|
+
let { regex, opts } = this._parseRegex(pattern, options);
|
|
27112
27147
|
this._parseArgs(opts);
|
|
27113
27148
|
if (opts.shuffle)
|
|
27114
27149
|
words = this.RiTa.randomizer.shuffle(words);
|
|
@@ -27124,7 +27159,7 @@ var Lexicon = class {
|
|
|
27124
27159
|
if (word !== words[i])
|
|
27125
27160
|
data = this.data[word];
|
|
27126
27161
|
}
|
|
27127
|
-
if (!
|
|
27162
|
+
if (!regex || this._regexMatch(word, data, regex, opts.type)) {
|
|
27128
27163
|
result.push(word);
|
|
27129
27164
|
if (result.length === opts.limit)
|
|
27130
27165
|
break;
|
|
@@ -27260,30 +27295,37 @@ var Lexicon = class {
|
|
|
27260
27295
|
opts.targetPos = tpos;
|
|
27261
27296
|
}
|
|
27262
27297
|
_reconjugate(word, pos) {
|
|
27263
|
-
const
|
|
27298
|
+
const RiTa2 = this.RiTa;
|
|
27264
27299
|
switch (pos) {
|
|
27265
27300
|
case "vbd":
|
|
27266
|
-
return
|
|
27267
|
-
number:
|
|
27268
|
-
person:
|
|
27269
|
-
tense:
|
|
27301
|
+
return RiTa2.conjugate(word, {
|
|
27302
|
+
number: RiTa2.SINGULAR,
|
|
27303
|
+
person: RiTa2.FIRST,
|
|
27304
|
+
tense: RiTa2.PAST
|
|
27270
27305
|
});
|
|
27271
27306
|
case "vbg":
|
|
27272
|
-
return
|
|
27307
|
+
return RiTa2.presentPart(word);
|
|
27273
27308
|
case "vbn":
|
|
27274
|
-
return
|
|
27309
|
+
return RiTa2.pastPart(word);
|
|
27275
27310
|
case "vbp":
|
|
27276
27311
|
return word;
|
|
27277
27312
|
case "vbz":
|
|
27278
|
-
return
|
|
27279
|
-
number:
|
|
27280
|
-
person:
|
|
27281
|
-
tense:
|
|
27313
|
+
return RiTa2.conjugate(word, {
|
|
27314
|
+
number: RiTa2.SINGULAR,
|
|
27315
|
+
person: RiTa2.THIRD,
|
|
27316
|
+
tense: RiTa2.PRESENT
|
|
27282
27317
|
});
|
|
27283
27318
|
default:
|
|
27284
27319
|
throw Error("Unexpected pos: " + pos);
|
|
27285
27320
|
}
|
|
27286
27321
|
}
|
|
27322
|
+
_bySoundAndLetterSync(word, opts) {
|
|
27323
|
+
let bySound = this._byTypeSync(word, { ...opts, type: "sound" });
|
|
27324
|
+
let byLetter = this._byTypeSync(word, { ...opts, type: "letter" });
|
|
27325
|
+
if (bySound.length < 1 || byLetter.length < 1)
|
|
27326
|
+
return [];
|
|
27327
|
+
return this._intersect(bySound, byLetter).slice(0, opts.limit);
|
|
27328
|
+
}
|
|
27287
27329
|
async _bySoundAndLetter(word, opts) {
|
|
27288
27330
|
let types = ["sound", "letter"];
|
|
27289
27331
|
let promises = types.map((type) => this._promise(this._byTypeSync, [word, { ...opts, type }]));
|
|
@@ -27307,7 +27349,8 @@ var Lexicon = class {
|
|
|
27307
27349
|
// med for 2 strings (or 2 arrays)
|
|
27308
27350
|
minEditDist(source, target) {
|
|
27309
27351
|
let cost;
|
|
27310
|
-
let i, j
|
|
27352
|
+
let i, j;
|
|
27353
|
+
let matrix = [];
|
|
27311
27354
|
let sI;
|
|
27312
27355
|
let tJ;
|
|
27313
27356
|
for (i = 0; i <= source.length; i++) {
|
|
@@ -27331,8 +27374,8 @@ var Lexicon = class {
|
|
|
27331
27374
|
}
|
|
27332
27375
|
return matrix[source.length][target.length];
|
|
27333
27376
|
}
|
|
27334
|
-
isMassNoun(w
|
|
27335
|
-
return w.endsWith("ness") || w.endsWith("ism") ||
|
|
27377
|
+
isMassNoun(w) {
|
|
27378
|
+
return w.endsWith("ness") || w.endsWith("ism") || this.RiTa.MASS_NOUNS.includes(w);
|
|
27336
27379
|
}
|
|
27337
27380
|
// helpers ---------------------------------------------------------------
|
|
27338
27381
|
_promise(fun, args) {
|
|
@@ -27344,44 +27387,44 @@ var Lexicon = class {
|
|
|
27344
27387
|
}
|
|
27345
27388
|
});
|
|
27346
27389
|
}
|
|
27347
|
-
_parseRegex(
|
|
27348
|
-
if (typeof
|
|
27390
|
+
_parseRegex(regex, opts) {
|
|
27391
|
+
if (typeof regex === "string") {
|
|
27349
27392
|
if (opts && opts.type === "stresses") {
|
|
27350
|
-
if (/^\^?[01]+\$?$/.test(
|
|
27351
|
-
|
|
27393
|
+
if (/^\^?[01]+\$?$/.test(regex)) {
|
|
27394
|
+
regex = regex.replace(/([01])(?=([01]))/g, "$1/");
|
|
27352
27395
|
}
|
|
27353
27396
|
}
|
|
27354
|
-
|
|
27355
|
-
} else if (
|
|
27356
|
-
} else if (typeof
|
|
27397
|
+
regex = new RegExp(regex);
|
|
27398
|
+
} else if (regex instanceof RegExp) {
|
|
27399
|
+
} else if (typeof regex === "object" || regex === void 0 && typeof opts === "object") {
|
|
27357
27400
|
if (!opts) {
|
|
27358
|
-
opts =
|
|
27401
|
+
opts = regex;
|
|
27359
27402
|
}
|
|
27360
|
-
|
|
27361
|
-
if (typeof
|
|
27403
|
+
regex = opts.regex;
|
|
27404
|
+
if (typeof regex === "string") {
|
|
27362
27405
|
if (opts && opts.type === "stresses") {
|
|
27363
|
-
if (/^\^?[01]+\$?$/.test(
|
|
27364
|
-
|
|
27406
|
+
if (/^\^?[01]+\$?$/.test(regex)) {
|
|
27407
|
+
regex = regex.replace(/([01])(?=([01]))/g, "$1/");
|
|
27365
27408
|
}
|
|
27366
27409
|
}
|
|
27367
|
-
|
|
27410
|
+
regex = new RegExp(regex);
|
|
27368
27411
|
}
|
|
27369
27412
|
}
|
|
27370
|
-
return { regex
|
|
27413
|
+
return { regex, opts: opts || {} };
|
|
27371
27414
|
}
|
|
27372
|
-
_regexMatch(word, data,
|
|
27415
|
+
_regexMatch(word, data, regex, type) {
|
|
27373
27416
|
if (type === "stresses") {
|
|
27374
27417
|
let phones = data ? data[0] : this.rawPhones(word);
|
|
27375
27418
|
let stresses = this.analyzer.phonesToStress(phones);
|
|
27376
|
-
if (
|
|
27419
|
+
if (regex.test(stresses))
|
|
27377
27420
|
return true;
|
|
27378
27421
|
} else if (type === "phones") {
|
|
27379
27422
|
let phones = data ? data[0] : this.rawPhones(word);
|
|
27380
27423
|
phones = phones.replace(/1/g, "").replace(/ /g, "-");
|
|
27381
|
-
if (
|
|
27424
|
+
if (regex.test(phones))
|
|
27382
27425
|
return true;
|
|
27383
27426
|
} else {
|
|
27384
|
-
if (
|
|
27427
|
+
if (regex.test(word))
|
|
27385
27428
|
return true;
|
|
27386
27429
|
}
|
|
27387
27430
|
}
|
|
@@ -27471,6 +27514,10 @@ var lexicon_default = Lexicon;
|
|
|
27471
27514
|
|
|
27472
27515
|
// src/tagger.js
|
|
27473
27516
|
var Tagger = class {
|
|
27517
|
+
/**
|
|
27518
|
+
* Create a Tagger.
|
|
27519
|
+
* @param {any} parent - RiTa parent class.
|
|
27520
|
+
*/
|
|
27474
27521
|
constructor(parent) {
|
|
27475
27522
|
this.RiTa = parent;
|
|
27476
27523
|
}
|
|
@@ -27535,18 +27582,30 @@ var Tagger = class {
|
|
|
27535
27582
|
}
|
|
27536
27583
|
return [];
|
|
27537
27584
|
}
|
|
27538
|
-
|
|
27539
|
-
|
|
27540
|
-
|
|
27541
|
-
|
|
27542
|
-
|
|
27543
|
-
|
|
27544
|
-
|
|
27545
|
-
|
|
27546
|
-
|
|
27547
|
-
|
|
27548
|
-
|
|
27549
|
-
|
|
27585
|
+
/**
|
|
27586
|
+
* Tags an array of words with their part-of-speech
|
|
27587
|
+
* @param {(string|string[])} input - The input containing a word or words
|
|
27588
|
+
* @param {object} [opts] - options for the tagging {inline, simple}
|
|
27589
|
+
* @param {boolean} [opts.inline] - tags are returned inline with words
|
|
27590
|
+
* @param {boolean} [opts.simple] - use simple tags (noun=n,verb=v,adverb=a,adjective=r)
|
|
27591
|
+
* @returns {any} the pos tag(s) or string with tags inline
|
|
27592
|
+
*/
|
|
27593
|
+
tag(input, opts = {
|
|
27594
|
+
inline: false,
|
|
27595
|
+
simple: false
|
|
27596
|
+
}) {
|
|
27597
|
+
let result = [], choices2d = [];
|
|
27598
|
+
let dbug = _optionalChain([opts, 'optionalAccess', _4 => _4.dbug]) || false;
|
|
27599
|
+
if (!input || !input.length)
|
|
27600
|
+
return opts.inline ? "" : [];
|
|
27601
|
+
let words;
|
|
27602
|
+
if (!Array.isArray(input)) {
|
|
27603
|
+
if (!input.trim().length) {
|
|
27604
|
+
return opts.inline ? "" : [];
|
|
27605
|
+
}
|
|
27606
|
+
words = this.RiTa.tokenizer.tokenize(input);
|
|
27607
|
+
} else {
|
|
27608
|
+
words = input;
|
|
27550
27609
|
}
|
|
27551
27610
|
for (let i = 0, l = words.length; i < l; i++) {
|
|
27552
27611
|
let word = words[i];
|
|
@@ -27563,7 +27622,7 @@ var Tagger = class {
|
|
|
27563
27622
|
}
|
|
27564
27623
|
}
|
|
27565
27624
|
let tags = this._applyContext(words, result, choices2d, dbug);
|
|
27566
|
-
if (simple) {
|
|
27625
|
+
if (opts.simple) {
|
|
27567
27626
|
for (let i = 0; i < tags.length; i++) {
|
|
27568
27627
|
if (NOUNS.includes(tags[i]))
|
|
27569
27628
|
tags[i] = "n";
|
|
@@ -27577,7 +27636,7 @@ var Tagger = class {
|
|
|
27577
27636
|
tags[i] = "-";
|
|
27578
27637
|
}
|
|
27579
27638
|
}
|
|
27580
|
-
return inline ? this.inlineTags(words, tags) : tags;
|
|
27639
|
+
return opts.inline ? this.inlineTags(words, tags) : tags;
|
|
27581
27640
|
}
|
|
27582
27641
|
//////////////////////////////////////////////////////////////////
|
|
27583
27642
|
_isNoLexIrregularVerb(stem) {
|
|
@@ -27719,7 +27778,14 @@ var Tagger = class {
|
|
|
27719
27778
|
console.log("\n Custom(" + i + ") tagged '" + frm + "' -> '" + to + "'\n\n");
|
|
27720
27779
|
}
|
|
27721
27780
|
// debug only: not available in built version since 'dbug' in tag() is 0
|
|
27722
|
-
|
|
27781
|
+
/**
|
|
27782
|
+
* Applies a customized subset of the Brill transformations
|
|
27783
|
+
* @param {string[]} words
|
|
27784
|
+
* @param {string[]} result
|
|
27785
|
+
* @param {string[]} choices
|
|
27786
|
+
* @param {boolean} dbug
|
|
27787
|
+
* @returns
|
|
27788
|
+
*/
|
|
27723
27789
|
_applyContext(words, result, choices, dbug) {
|
|
27724
27790
|
for (let i = 0, l = words.length; i < l; i++) {
|
|
27725
27791
|
let word = words[i], tag = result[i];
|
|
@@ -27802,7 +27868,7 @@ var Tagger = class {
|
|
|
27802
27868
|
let idx = result.slice(i + 1).indexOf("nn");
|
|
27803
27869
|
let allJJ = true;
|
|
27804
27870
|
for (let k = 0; k < idx; k++) {
|
|
27805
|
-
if (
|
|
27871
|
+
if (result[i + 1 + k] !== "jj") {
|
|
27806
27872
|
allJJ = false;
|
|
27807
27873
|
break;
|
|
27808
27874
|
}
|
|
@@ -27948,7 +28014,7 @@ var Tagger = class {
|
|
|
27948
28014
|
for (let j = 0; j < tags.length; j++) {
|
|
27949
28015
|
if (pos === tags[j])
|
|
27950
28016
|
return true;
|
|
27951
|
-
if (pos === "n" && NOUNS.includes(tags[j]) || pos === "v" && VERBS.includes(tags[j]) || pos === "r" && ADVS.includes(tags[j]) || pos === "a" && ADJS.includes
|
|
28017
|
+
if (pos === "n" && NOUNS.includes(tags[j]) || pos === "v" && VERBS.includes(tags[j]) || pos === "r" && ADVS.includes(tags[j]) || pos === "a" && ADJS.includes(tags[j])) {
|
|
27952
28018
|
return true;
|
|
27953
28019
|
}
|
|
27954
28020
|
}
|
|
@@ -27996,7 +28062,7 @@ var Inflector = class {
|
|
|
27996
28062
|
if (!word.length)
|
|
27997
28063
|
return "";
|
|
27998
28064
|
let check = word.toLowerCase();
|
|
27999
|
-
if (this.RiTa.
|
|
28065
|
+
if (this.RiTa.lexicon.isMassNoun(check)) {
|
|
28000
28066
|
dbug && console.log(word + " hit MASS_NOUNS");
|
|
28001
28067
|
return word;
|
|
28002
28068
|
}
|
|
@@ -28011,11 +28077,6 @@ var Inflector = class {
|
|
|
28011
28077
|
return word;
|
|
28012
28078
|
}
|
|
28013
28079
|
singularize(word, opts) {
|
|
28014
|
-
if (this.isSingular(word, opts)) {
|
|
28015
|
-
if (opts && opts.debug)
|
|
28016
|
-
console.log("pluralize returning via isPlural()");
|
|
28017
|
-
return word;
|
|
28018
|
-
}
|
|
28019
28080
|
return this.adjustNumber(word, SING, opts && opts.dbug);
|
|
28020
28081
|
}
|
|
28021
28082
|
pluralize(word, opts) {
|
|
@@ -28026,30 +28087,30 @@ var Inflector = class {
|
|
|
28026
28087
|
}
|
|
28027
28088
|
return this.adjustNumber(word, PLUR, opts && opts.dbug);
|
|
28028
28089
|
}
|
|
28029
|
-
isSingular(word, opts) {
|
|
28030
|
-
|
|
28031
|
-
|
|
28032
|
-
throw Error(`isSingular() takes string`);
|
|
28033
|
-
|
|
28034
|
-
|
|
28035
|
-
|
|
28036
|
-
|
|
28037
|
-
|
|
28038
|
-
|
|
28039
|
-
|
|
28040
|
-
|
|
28041
|
-
|
|
28042
|
-
|
|
28043
|
-
|
|
28090
|
+
/*isSingular(word, opts) {
|
|
28091
|
+
// return false;
|
|
28092
|
+
// if (word && typeof word !== 'string') {
|
|
28093
|
+
// throw Error(`isSingular() takes string`);
|
|
28094
|
+
// }
|
|
28095
|
+
if (!word || !word.length) return false;
|
|
28096
|
+
|
|
28097
|
+
let dbug = opts && opts.dbug;
|
|
28098
|
+
|
|
28099
|
+
word = word.toLowerCase();
|
|
28100
|
+
|
|
28101
|
+
if (this.RiTa.MASS_NOUNS.includes(word)) {
|
|
28102
|
+
dbug && console.log(word + " is mass noun");
|
|
28103
|
+
return true;
|
|
28104
|
+
}
|
|
28105
|
+
|
|
28106
|
+
return NOUNS_ENDING_IN_S.includes(word);
|
|
28107
|
+
}*/
|
|
28044
28108
|
isPlural(word, opts) {
|
|
28045
|
-
if (word && typeof word !== "string") {
|
|
28046
|
-
throw Error(`isPlural() takes string`);
|
|
28047
|
-
}
|
|
28048
28109
|
if (!word || !word.length)
|
|
28049
28110
|
return false;
|
|
28050
28111
|
let dbug = opts && opts.dbug;
|
|
28051
28112
|
word = word.toLowerCase();
|
|
28052
|
-
if (this.RiTa.
|
|
28113
|
+
if (this.RiTa.lexicon.isMassNoun(word)) {
|
|
28053
28114
|
dbug && console.log(word + " is mass noun");
|
|
28054
28115
|
return true;
|
|
28055
28116
|
}
|
|
@@ -28075,13 +28136,9 @@ var Inflector = class {
|
|
|
28075
28136
|
dbug && console.log(word + ": latin rule -a to -ae");
|
|
28076
28137
|
return true;
|
|
28077
28138
|
}
|
|
28078
|
-
|
|
28079
|
-
|
|
28080
|
-
|
|
28081
|
-
dbug && console.log(word + "'s singular form " + sing + " is nn");
|
|
28082
|
-
return true;
|
|
28083
|
-
}
|
|
28084
|
-
} else {
|
|
28139
|
+
let tags = this.RiTa.tagger.allTags(sing, { noGuessing: true });
|
|
28140
|
+
if (tags.includes("nn")) {
|
|
28141
|
+
dbug && console.log(word + "'s singular form " + sing + " is nn");
|
|
28085
28142
|
return true;
|
|
28086
28143
|
}
|
|
28087
28144
|
}
|
|
@@ -28159,7 +28216,7 @@ var SING_RULES = [
|
|
|
28159
28216
|
RE3("(sh|ch|o|ss|x|z|us)es$", 2),
|
|
28160
28217
|
RE3("ses$", 2, "is"),
|
|
28161
28218
|
// catharses, prognoses
|
|
28162
|
-
// singulars ending in s, TODO: replace with
|
|
28219
|
+
// singulars ending in s, TODO: replace with NOUNS_ENDING_IN_S list
|
|
28163
28220
|
RE3("([vs]is|gas|[im]nus|genus|[ptbl]us|[ai]ss|[dr]ess)$", 0),
|
|
28164
28221
|
// octopus, thesis, alumnus, gas, bus (singulars)
|
|
28165
28222
|
DEFAULT_SING
|
|
@@ -28242,7 +28299,10 @@ var LetterToSound = class _LetterToSound {
|
|
|
28242
28299
|
this.tokenizer.tokenize(line, " ");
|
|
28243
28300
|
let type = this.tokenizer.nextToken();
|
|
28244
28301
|
if (type === "S" || type === "P") {
|
|
28245
|
-
this.stateMachine[this.numStates++] = this.createState(
|
|
28302
|
+
this.stateMachine[this.numStates++] = this.createState(
|
|
28303
|
+
type
|
|
28304
|
+
/*, this.tokenizer*/
|
|
28305
|
+
);
|
|
28246
28306
|
} else if (type === "I") {
|
|
28247
28307
|
let index = parseInt(this.tokenizer.nextToken());
|
|
28248
28308
|
if (index != this.numStates) {
|
|
@@ -28256,12 +28316,12 @@ var LetterToSound = class _LetterToSound {
|
|
|
28256
28316
|
}
|
|
28257
28317
|
}
|
|
28258
28318
|
buildPhones(word, opts) {
|
|
28259
|
-
const
|
|
28260
|
-
if (!word || !word.length ||
|
|
28319
|
+
const RiTa2 = this.RiTa;
|
|
28320
|
+
if (!word || !word.length || RiTa2.isPunct(word))
|
|
28261
28321
|
return;
|
|
28262
28322
|
let phoneList = [], windowSize = 4;
|
|
28263
28323
|
let fullBuff, tmp, currentState, startIndex, stateIndex, c;
|
|
28264
|
-
let silent =
|
|
28324
|
+
let silent = RiTa2.SILENT || RiTa2.SILENCE_LTS || opts && opts.silent;
|
|
28265
28325
|
if (!_LetterToSound.RULES) {
|
|
28266
28326
|
if (!this.warnedForNoLTS) {
|
|
28267
28327
|
this.warnedForNoLTS = true;
|
|
@@ -28273,16 +28333,14 @@ var LetterToSound = class _LetterToSound {
|
|
|
28273
28333
|
word = word.toLowerCase();
|
|
28274
28334
|
if (util_default.isNum(word)) {
|
|
28275
28335
|
if (/^[0-9]+$/.test(word)) {
|
|
28276
|
-
|
|
28277
|
-
|
|
28278
|
-
|
|
28279
|
-
|
|
28280
|
-
|
|
28281
|
-
|
|
28282
|
-
phoneList.push(...phs.split("-"));
|
|
28283
|
-
}
|
|
28284
|
-
return phoneList;
|
|
28336
|
+
word = word.length > 1 ? word.split("") : [word];
|
|
28337
|
+
for (let k = 0; k < word.length; k++) {
|
|
28338
|
+
let asWord = util_default.Numbers.toWords[parseInt(word[k])];
|
|
28339
|
+
let phs = RiTa2.lexicon.rawPhones(asWord, { noLts: true });
|
|
28340
|
+
phs = phs.replace(/1/g, "").replace(/ /g, "-");
|
|
28341
|
+
phoneList.push(...phs.split("-"));
|
|
28285
28342
|
}
|
|
28343
|
+
return phoneList;
|
|
28286
28344
|
}
|
|
28287
28345
|
}
|
|
28288
28346
|
tmp = "000#" + word.trim() + "#000", fullBuff = tmp.split("");
|
|
@@ -28297,7 +28355,7 @@ var LetterToSound = class _LetterToSound {
|
|
|
28297
28355
|
startIndex = this.letterIndex[c];
|
|
28298
28356
|
if (isNaN(parseFloat(startIndex)) || !isFinite(startIndex)) {
|
|
28299
28357
|
if (!silent) {
|
|
28300
|
-
console.warn("Unable to generate LTS for '" + word + "', no index for '" + c + "', isDigit=" + util_default.isNum(c) + ", isPunct=" +
|
|
28358
|
+
console.warn("Unable to generate LTS for '" + word + "', no index for '" + c + "', isDigit=" + util_default.isNum(c) + ", isPunct=" + RiTa2.isPunct(c));
|
|
28301
28359
|
}
|
|
28302
28360
|
return;
|
|
28303
28361
|
}
|
|
@@ -28322,7 +28380,7 @@ var LetterToSound = class _LetterToSound {
|
|
|
28322
28380
|
return state;
|
|
28323
28381
|
} else {
|
|
28324
28382
|
this.tokenizer.tokenize(i);
|
|
28325
|
-
return this.getState(this.tokenizer.nextToken()
|
|
28383
|
+
return this.getState(this.tokenizer.nextToken());
|
|
28326
28384
|
}
|
|
28327
28385
|
}
|
|
28328
28386
|
};
|
|
@@ -41533,8 +41591,7 @@ var Analyzer = class {
|
|
|
41533
41591
|
return features;
|
|
41534
41592
|
}
|
|
41535
41593
|
computePhones(word, opts) {
|
|
41536
|
-
|
|
41537
|
-
this.lts = new rita_lts_default(this.RiTa);
|
|
41594
|
+
this.lts = this.lts || new rita_lts_default(this.RiTa);
|
|
41538
41595
|
return this.lts.buildPhones(word, opts);
|
|
41539
41596
|
}
|
|
41540
41597
|
phonesToStress(phones) {
|
|
@@ -41551,8 +41608,7 @@ var Analyzer = class {
|
|
|
41551
41608
|
return stress;
|
|
41552
41609
|
}
|
|
41553
41610
|
analyzeWord(word, opts = {}) {
|
|
41554
|
-
let
|
|
41555
|
-
let result = RiTa3.CACHING && this.cache[word];
|
|
41611
|
+
let result = this.RiTa.CACHING && this.cache[word];
|
|
41556
41612
|
if (typeof result === "undefined") {
|
|
41557
41613
|
let slash = "/", delim = "-";
|
|
41558
41614
|
let lex = this.RiTa.lexicon;
|
|
@@ -41581,7 +41637,7 @@ var Analyzer = class {
|
|
|
41581
41637
|
}
|
|
41582
41638
|
result = { phones, stresses, syllables };
|
|
41583
41639
|
Object.keys(result).forEach((k) => result[k] = result[k].trim());
|
|
41584
|
-
if (
|
|
41640
|
+
if (this.RiTa.CACHING)
|
|
41585
41641
|
this.cache[word] = result;
|
|
41586
41642
|
}
|
|
41587
41643
|
return result;
|
|
@@ -41601,15 +41657,15 @@ var Analyzer = class {
|
|
|
41601
41657
|
}
|
|
41602
41658
|
//#HWF this part is unchanged but move to a separated function
|
|
41603
41659
|
_computePhonesWord(word, lex, opts, isPart) {
|
|
41604
|
-
let rawPhones,
|
|
41660
|
+
let rawPhones, RiTa2 = this.RiTa;
|
|
41605
41661
|
if (isPart)
|
|
41606
41662
|
rawPhones = lex.rawPhones(word, { noLts: true });
|
|
41607
41663
|
if (!rawPhones && word.endsWith("s")) {
|
|
41608
|
-
let sing =
|
|
41664
|
+
let sing = RiTa2.singularize(word);
|
|
41609
41665
|
rawPhones = lex.rawPhones(sing, { noLts: true });
|
|
41610
41666
|
rawPhones && (rawPhones += "-z");
|
|
41611
41667
|
}
|
|
41612
|
-
let silent =
|
|
41668
|
+
let silent = RiTa2.SILENT || RiTa2.SILENCE_LTS || opts && opts.silent;
|
|
41613
41669
|
if (!rawPhones) {
|
|
41614
41670
|
let ltsPhones = this.computePhones(word, opts);
|
|
41615
41671
|
if (ltsPhones && ltsPhones.length) {
|
|
@@ -41740,7 +41796,7 @@ var SeededRandom = class {
|
|
|
41740
41796
|
if (!(Array.isArray(arg) || util_default.isNum(arg)))
|
|
41741
41797
|
throw Error("Expects [] or int");
|
|
41742
41798
|
let o = Array.isArray(arg) ? arg : Array.from(Array(arg).keys());
|
|
41743
|
-
for (let j, x, i = o.length; i; j =
|
|
41799
|
+
for (let j, x, i = o.length; i; j = Math.floor(this.random() * i), x = o[--i], o[i] = o[j], o[j] = x) {
|
|
41744
41800
|
}
|
|
41745
41801
|
return o;
|
|
41746
41802
|
}
|
|
@@ -41866,16 +41922,31 @@ var randgen_default = SeededRandom;
|
|
|
41866
41922
|
|
|
41867
41923
|
// src/markov.js
|
|
41868
41924
|
var _json = require('@ungap/structured-clone/json');
|
|
41869
|
-
var
|
|
41870
|
-
|
|
41925
|
+
var _RiMarkov = class _RiMarkov {
|
|
41926
|
+
// RiTa
|
|
41927
|
+
/**
|
|
41928
|
+
* Creates a new RiMarkov object with functions for text-generation and other probabilistic functions,
|
|
41929
|
+
* via Markov chains (or n-grams) with options to process words or tokens split by arbitrary regular expressions.
|
|
41930
|
+
* @param {number} [n] - the n-gram size (an integer >= 2)
|
|
41931
|
+
* @param {object} [options={}] - options for the model
|
|
41932
|
+
* @param {string|string[]} [options.text] - a text string, or array of sentences, to add to the model (same as via model.addText()
|
|
41933
|
+
* @param {boolean} [options.trace] - output trace info to the console
|
|
41934
|
+
* @param {number} [options.maxLengthMatch] - # of words allowed in result to match a sequence in the input, default=∞
|
|
41935
|
+
* @param {number} [options.maxAttempts=999] - max attempts before to complete one ore more generations before erroring, default=999
|
|
41936
|
+
* @param {function} [options.tokenize] - custom tokenizer with tokenize() method, defaults to RiTa.tokenize()
|
|
41937
|
+
* @param {function} [options.untokenize] - custom untokenizer with untokenize() method, defaults to RiTa.untokenize()
|
|
41938
|
+
* @param {boolean} [options.disableInputChecks=false] - if true, allow result to be present in the input, default
|
|
41939
|
+
* @memberof RiMarkov
|
|
41940
|
+
*/
|
|
41941
|
+
constructor(n, options = {}) {
|
|
41871
41942
|
this.n = n;
|
|
41872
41943
|
this.root = new Node(null, "ROOT");
|
|
41873
|
-
this.trace =
|
|
41874
|
-
this.mlm =
|
|
41875
|
-
this.maxAttempts =
|
|
41876
|
-
this.tokenize =
|
|
41877
|
-
this.untokenize =
|
|
41878
|
-
this.disableInputChecks =
|
|
41944
|
+
this.trace = options.trace;
|
|
41945
|
+
this.mlm = options.maxLengthMatch;
|
|
41946
|
+
this.maxAttempts = options.maxAttempts || 999;
|
|
41947
|
+
this.tokenize = options.tokenize || _RiMarkov.parent.tokenize;
|
|
41948
|
+
this.untokenize = options.untokenize || _RiMarkov.parent.untokenize;
|
|
41949
|
+
this.disableInputChecks = options.disableInputChecks;
|
|
41879
41950
|
this.sentenceStarts = [];
|
|
41880
41951
|
this.sentenceEnds = /* @__PURE__ */ new Set();
|
|
41881
41952
|
if (this.n < 2)
|
|
@@ -41884,11 +41955,18 @@ var RiMarkov = class _RiMarkov {
|
|
|
41884
41955
|
throw Error("maxLengthMatch must be >= N");
|
|
41885
41956
|
if (!this.disableInputChecks || this.mlm)
|
|
41886
41957
|
this.input = [];
|
|
41887
|
-
if (
|
|
41888
|
-
this.addText(
|
|
41889
|
-
}
|
|
41958
|
+
if (options.text)
|
|
41959
|
+
this.addText(options.text);
|
|
41960
|
+
}
|
|
41961
|
+
/**
|
|
41962
|
+
* Loads text into the model. If a raw string is provided, it will be split into sentences
|
|
41963
|
+
* via RiTa.sentences(). If an array is provided, each string will be treated as an individual sentence.
|
|
41964
|
+
* @param {string|string[]} text - a text string, or array of sentences, to add to the model
|
|
41965
|
+
* @param {number} [multiplier=1] - number of times to add the text to the model
|
|
41966
|
+
* @return {RiMarkov} - the RiMarkov instance
|
|
41967
|
+
*/
|
|
41890
41968
|
addText(text, multiplier = 1) {
|
|
41891
|
-
let sents = Array.isArray(text) ? text :
|
|
41969
|
+
let sents = Array.isArray(text) ? text : _RiMarkov.parent.sentences(text);
|
|
41892
41970
|
let wrap, allWords = [];
|
|
41893
41971
|
for (let k = 0; k < multiplier; k++) {
|
|
41894
41972
|
for (let i = 0; i < sents.length; i++) {
|
|
@@ -41904,19 +41982,32 @@ var RiMarkov = class _RiMarkov {
|
|
|
41904
41982
|
this.input.push(allWords[i]);
|
|
41905
41983
|
}
|
|
41906
41984
|
}
|
|
41907
|
-
|
|
41908
|
-
|
|
41985
|
+
return this;
|
|
41986
|
+
}
|
|
41987
|
+
/**
|
|
41988
|
+
* Generates `count` joined sentences from the model.
|
|
41989
|
+
* @param {number} [count=1] - the number of sentences to generate (default=1)
|
|
41990
|
+
* @param {object} [options={}] - options for the generation
|
|
41991
|
+
* @param {number} [options.minLength=5] - minimum length of each sentence
|
|
41992
|
+
* @param {number} [options.maxLength=35] - maximum length of each sentence
|
|
41993
|
+
* @param {number} [options.temperature=1] - temperature acts as a knob to adjust the probability that input elements will be selected for the output. At higher values, infrequent words are more likely to be chosen, while at lower values the most frequent inputs are more likely to be output. If no value is provided, then tokens are chosen according to their relative frequency in the input.
|
|
41994
|
+
* @param {boolean} [options.allowDuplicates=false] - if true, allow duplicate sentences in the output
|
|
41995
|
+
* @param {string|string[]} [options.seed] - a seed string or array of tokens to start the generation
|
|
41996
|
+
* @param {boolean} [options.trace] - output trace info to the console
|
|
41997
|
+
* @return {string[]} - the generated sentences
|
|
41998
|
+
*/
|
|
41999
|
+
generate(count, options = {}) {
|
|
41909
42000
|
if (arguments.length === 1 && typeof count === "object") {
|
|
41910
|
-
|
|
42001
|
+
options = count;
|
|
41911
42002
|
count = 1;
|
|
41912
42003
|
}
|
|
41913
42004
|
const num = count || 1;
|
|
41914
|
-
const minLength =
|
|
41915
|
-
const maxLength =
|
|
41916
|
-
if (typeof
|
|
42005
|
+
const minLength = options.minLength || 5;
|
|
42006
|
+
const maxLength = options.maxLength || 35;
|
|
42007
|
+
if (typeof options.temperature !== "undefined" && options.temperature <= 0) {
|
|
41917
42008
|
throw Error("Temperature option must be greater than 0");
|
|
41918
42009
|
}
|
|
41919
|
-
let tries = 0, tokens = []
|
|
42010
|
+
let tries = 0, tokens = [];
|
|
41920
42011
|
let minIdx = 0, sentenceIdxs = [];
|
|
41921
42012
|
let markedNodes = [];
|
|
41922
42013
|
const unmarkNodes = () => {
|
|
@@ -41955,7 +42046,7 @@ var RiMarkov = class _RiMarkov {
|
|
|
41955
42046
|
return false;
|
|
41956
42047
|
}
|
|
41957
42048
|
let flatSent = this.untokenize(sentence);
|
|
41958
|
-
if (!
|
|
42049
|
+
if (!options.allowDuplicates && isSubArray(sentence, tokens.slice(0, sentIdx))) {
|
|
41959
42050
|
fail("duplicate (pop: " + next.token + ")");
|
|
41960
42051
|
return false;
|
|
41961
42052
|
}
|
|
@@ -42042,7 +42133,7 @@ var RiMarkov = class _RiMarkov {
|
|
|
42042
42133
|
return len ? sentenceIdxs[len - 1] : 0;
|
|
42043
42134
|
};
|
|
42044
42135
|
const selectStart = () => {
|
|
42045
|
-
let seed =
|
|
42136
|
+
let seed = options.seed;
|
|
42046
42137
|
if (seed && seed.length) {
|
|
42047
42138
|
if (typeof seed === "string")
|
|
42048
42139
|
seed = this.tokenize(seed);
|
|
@@ -42055,7 +42146,7 @@ var RiMarkov = class _RiMarkov {
|
|
|
42055
42146
|
let usableStarts = this.sentenceStarts.filter((ss) => notMarked(this.root.child(ss)));
|
|
42056
42147
|
if (!usableStarts.length)
|
|
42057
42148
|
throw Error("No valid sentence-starts remaining");
|
|
42058
|
-
let start =
|
|
42149
|
+
let start = _RiMarkov.parent.random(usableStarts);
|
|
42059
42150
|
let startTok = this.root.child(start);
|
|
42060
42151
|
markNode(startTok);
|
|
42061
42152
|
usableStarts = this.sentenceStarts.filter((ss) => notMarked(this.root.child(ss)));
|
|
@@ -42072,7 +42163,7 @@ var RiMarkov = class _RiMarkov {
|
|
|
42072
42163
|
continue;
|
|
42073
42164
|
}
|
|
42074
42165
|
let parent = this._pathTo(tokens);
|
|
42075
|
-
let next = this._selectNext(parent,
|
|
42166
|
+
let next = this._selectNext(parent, options.temperature, tokens, notMarked);
|
|
42076
42167
|
if (!next) {
|
|
42077
42168
|
fail("mlm-fail(" + this.mlm + ")", this._flatten(tokens), true);
|
|
42078
42169
|
continue;
|
|
@@ -42093,11 +42184,20 @@ var RiMarkov = class _RiMarkov {
|
|
|
42093
42184
|
let str = this.untokenize(tokens.map((t) => t.token)).trim();
|
|
42094
42185
|
return num > 1 ? this._splitEnds(str) : str;
|
|
42095
42186
|
}
|
|
42187
|
+
/**
|
|
42188
|
+
* Converts the model to a JSON-formatted string for storage or serialization
|
|
42189
|
+
* @return {string} - the JSON string
|
|
42190
|
+
*/
|
|
42096
42191
|
toJSON() {
|
|
42097
42192
|
let data = Object.keys(this).reduce((acc, k) => Object.assign(acc, { [k]: this[k] }), {});
|
|
42098
42193
|
data.sentenceEnds = [...data.sentenceEnds];
|
|
42099
42194
|
return _json.stringify.call(void 0, data);
|
|
42100
42195
|
}
|
|
42196
|
+
/**
|
|
42197
|
+
* Creates a new model from one previously saved as JSON
|
|
42198
|
+
* @param {string} json - the JSON string to load
|
|
42199
|
+
* @return {RiMarkov} - the RiMarkov instance
|
|
42200
|
+
*/
|
|
42101
42201
|
static fromJSON(json) {
|
|
42102
42202
|
let parsed = _json.parse.call(void 0, json);
|
|
42103
42203
|
let rm = Object.assign(new _RiMarkov(), parsed);
|
|
@@ -42108,14 +42208,20 @@ var RiMarkov = class _RiMarkov {
|
|
|
42108
42208
|
populate(rm.root = new Node(null, "ROOT"), jsonRoot);
|
|
42109
42209
|
return rm;
|
|
42110
42210
|
}
|
|
42111
|
-
|
|
42211
|
+
/**
|
|
42212
|
+
* Returns array of possible tokens after pre and (optionally) before post. If only one array parameter is provided, this function returns all possible next words, ordered by probability, for the given array.
|
|
42213
|
+
* If two arrays are provided, it returns an unordered list of possible words w that complete the n-gram consisting of: pre[0]...pre[k], w, post[k+1]...post[n].
|
|
42214
|
+
* @param {string[]} pre - the list of tokens preceding the completion
|
|
42215
|
+
* @param {string[]} [post] - the (optional) list of tokens following the completion
|
|
42216
|
+
* @return {string[]} - an unordered list of possible next tokens
|
|
42217
|
+
*/
|
|
42112
42218
|
completions(pre, post) {
|
|
42113
42219
|
let tn, result = [];
|
|
42114
42220
|
if (post) {
|
|
42115
42221
|
if (pre.length + post.length > this.n)
|
|
42116
42222
|
throw Error("Sum of pre.length && post.length must be <= N, was " + (pre.length + post.length));
|
|
42117
42223
|
if (!(tn = this._pathTo(pre))) {
|
|
42118
|
-
if (!
|
|
42224
|
+
if (!_RiMarkov.parent.SILENT)
|
|
42119
42225
|
console.warn("Unable to find nodes in pre: " + pre);
|
|
42120
42226
|
return;
|
|
42121
42227
|
}
|
|
@@ -42132,8 +42238,14 @@ var RiMarkov = class _RiMarkov {
|
|
|
42132
42238
|
}
|
|
42133
42239
|
return result;
|
|
42134
42240
|
}
|
|
42135
|
-
|
|
42136
|
-
|
|
42241
|
+
/**
|
|
42242
|
+
* Returns the full set of possible next tokens as a object, mapping tokens to probabilities,
|
|
42243
|
+
* given an array of tokens representing the path down the tree (with length less than `n`).
|
|
42244
|
+
* @param {string|string[]} path - the path to the node as a string or an array of tokens
|
|
42245
|
+
* @param {number} [temperature=1] - temperature acts as a knob to adjust the probability that input elements will be selected for the output. At higher values, infrequent words are more likely to be chosen, while at lower values the most frequent inputs are more likely to be output. If no value is provided, then tokens are chosen according to their relative frequency in the input.
|
|
42246
|
+
* @return {object} - a map of tokens to probabilities
|
|
42247
|
+
*/
|
|
42248
|
+
probabilities(path, temperature) {
|
|
42137
42249
|
if (!Array.isArray(path))
|
|
42138
42250
|
path = this.tokenize(path);
|
|
42139
42251
|
const probs = {};
|
|
@@ -42141,11 +42253,17 @@ var RiMarkov = class _RiMarkov {
|
|
|
42141
42253
|
if (parent) {
|
|
42142
42254
|
const children = parent.childNodes();
|
|
42143
42255
|
const weights = children.map((n) => n.count);
|
|
42144
|
-
const pdist = _RiMarkov.parent.randomizer.ndist(weights,
|
|
42256
|
+
const pdist = _RiMarkov.parent.randomizer.ndist(weights, temperature);
|
|
42145
42257
|
children.forEach((c, i) => probs[c.token] = pdist[i]);
|
|
42146
42258
|
}
|
|
42147
42259
|
return probs;
|
|
42148
42260
|
}
|
|
42261
|
+
/**
|
|
42262
|
+
* Returns either the raw (unigram) probability for a single token in the model (0 if it does not exist), OR
|
|
42263
|
+
* the probability of a sequence of K tokens where K is less than `n` (0 if the sequence does not exist).
|
|
42264
|
+
* @param {string|string[]} data - the token or array of tokens to check
|
|
42265
|
+
* @return {number} - the probability of the token or sequence
|
|
42266
|
+
*/
|
|
42149
42267
|
probability(data) {
|
|
42150
42268
|
let p = 0;
|
|
42151
42269
|
if (data && data.length) {
|
|
@@ -42155,10 +42273,20 @@ var RiMarkov = class _RiMarkov {
|
|
|
42155
42273
|
}
|
|
42156
42274
|
return p;
|
|
42157
42275
|
}
|
|
42276
|
+
/**
|
|
42277
|
+
* Returns a string representation of the model or a subtree of the model, optionally ordered by probability.
|
|
42278
|
+
* @param {object} root - the root node of the subtree to print
|
|
42279
|
+
* @param {boolean} sort - if true, sort the nodes by probability
|
|
42280
|
+
* @return {string} - the string representation of the model
|
|
42281
|
+
*/
|
|
42158
42282
|
toString(root, sort) {
|
|
42159
42283
|
root = root || this.root;
|
|
42160
42284
|
return root.asTree(sort).replace(/{}/g, "");
|
|
42161
42285
|
}
|
|
42286
|
+
/**
|
|
42287
|
+
* Returns the number of tokens currently in the model.
|
|
42288
|
+
* @return {number} - number of tokens
|
|
42289
|
+
*/
|
|
42162
42290
|
size() {
|
|
42163
42291
|
return this.root.childCount(true);
|
|
42164
42292
|
}
|
|
@@ -42281,6 +42409,8 @@ var RiMarkov = class _RiMarkov {
|
|
|
42281
42409
|
return sent.replace(MULTI_SP_RE, " ");
|
|
42282
42410
|
}
|
|
42283
42411
|
};
|
|
42412
|
+
__publicField(_RiMarkov, "parent");
|
|
42413
|
+
var RiMarkov = _RiMarkov;
|
|
42284
42414
|
var Node = class _Node {
|
|
42285
42415
|
constructor(parent, word, count) {
|
|
42286
42416
|
this.children = {};
|
|
@@ -42289,6 +42419,7 @@ var Node = class _Node {
|
|
|
42289
42419
|
this.count = count || 0;
|
|
42290
42420
|
this.numChildren = -1;
|
|
42291
42421
|
this.marked = false;
|
|
42422
|
+
this.hidden = false;
|
|
42292
42423
|
}
|
|
42293
42424
|
// Find a (direct) child node with matching token, given a word or node
|
|
42294
42425
|
child(word) {
|
|
@@ -42407,9 +42538,6 @@ function populate(objNode, jsonNode) {
|
|
|
42407
42538
|
populate(newNode, child);
|
|
42408
42539
|
}
|
|
42409
42540
|
}
|
|
42410
|
-
function RiTa() {
|
|
42411
|
-
return RiMarkov.parent;
|
|
42412
|
-
}
|
|
42413
42541
|
function throwError(tries, oks) {
|
|
42414
42542
|
throw Error("Failed after " + tries + " tries" + (oks ? " and " + oks + " successes" : "") + ", you may need to adjust options or add more text");
|
|
42415
42543
|
}
|
|
@@ -42432,228 +42560,697 @@ var markov_default = RiMarkov;
|
|
|
42432
42560
|
|
|
42433
42561
|
// src/rita.js
|
|
42434
42562
|
var _riscript = require('riscript');
|
|
42435
|
-
var
|
|
42436
|
-
|
|
42563
|
+
var RiTa = class _RiTa {
|
|
42564
|
+
/**
|
|
42565
|
+
* Create a RiTa grammar instance
|
|
42566
|
+
* @param {object} [rules] - the rules of the grammar
|
|
42567
|
+
* @param {object} [context] - the context of the grammar
|
|
42568
|
+
* @returns {RiGrammar} - a new RiGrammar instance
|
|
42569
|
+
*/
|
|
42437
42570
|
static grammar(rules, context) {
|
|
42438
|
-
return new RiGrammar(
|
|
42571
|
+
return new (0, _riscript.RiGrammar)(rules, context);
|
|
42439
42572
|
}
|
|
42573
|
+
/**
|
|
42574
|
+
* Add a transform function to the RiScript parser
|
|
42575
|
+
* @param {string} name - the name of the transform
|
|
42576
|
+
* @param {function} definition - the transform function
|
|
42577
|
+
*/
|
|
42440
42578
|
static addTransform(name, definition) {
|
|
42441
|
-
|
|
42579
|
+
_RiTa.riscript.addTransform(name, definition);
|
|
42442
42580
|
}
|
|
42581
|
+
/**
|
|
42582
|
+
* Remove a transform function from the RiScript parser
|
|
42583
|
+
* @param {string} name - the name of the transform to remove
|
|
42584
|
+
*/
|
|
42443
42585
|
static removeTransform(name) {
|
|
42444
|
-
|
|
42586
|
+
_RiTa.riscript.removeTransform(name);
|
|
42445
42587
|
}
|
|
42588
|
+
/**
|
|
42589
|
+
* Returns the names of all current transform functions
|
|
42590
|
+
* @returns {string[]} the names of all transforms
|
|
42591
|
+
*/
|
|
42446
42592
|
static getTransforms() {
|
|
42447
42593
|
return _RiTa.riscript.getTransforms();
|
|
42448
42594
|
}
|
|
42595
|
+
/**
|
|
42596
|
+
* Adds the appropriate article ('a' or 'an') to the word, according to its phonemes (useful as a transform function)
|
|
42597
|
+
* @param {string} word - the word to transform
|
|
42598
|
+
* @returns {string} - the word with an article, e.g., 'honor' -> 'an honor'
|
|
42599
|
+
*/
|
|
42449
42600
|
static articlize(word) {
|
|
42450
|
-
return _riscript.RiScript.articlize(
|
|
42451
|
-
}
|
|
42452
|
-
|
|
42453
|
-
|
|
42454
|
-
|
|
42455
|
-
|
|
42456
|
-
|
|
42457
|
-
|
|
42458
|
-
|
|
42459
|
-
|
|
42460
|
-
|
|
42461
|
-
|
|
42462
|
-
|
|
42463
|
-
|
|
42601
|
+
return _riscript.RiScript.articlize(word, _RiTa);
|
|
42602
|
+
}
|
|
42603
|
+
/**
|
|
42604
|
+
* Evaluates the input script via the RiScript parser
|
|
42605
|
+
* @param {string} script - the script to evaluate
|
|
42606
|
+
* @param {object} [context] - the context to evaluate the script ing
|
|
42607
|
+
* @param {object} [options] - options for the evaluation
|
|
42608
|
+
* @param {boolean} [options.trace] - whether to trace the evaluation
|
|
42609
|
+
* @returns {string} the result of the evaluation
|
|
42610
|
+
*/
|
|
42611
|
+
static evaluate(script, context, options) {
|
|
42612
|
+
return _RiTa.riscript.evaluate(script, context, options);
|
|
42613
|
+
}
|
|
42614
|
+
/**
|
|
42615
|
+
* Creates a new RiMarkov object
|
|
42616
|
+
* @param {number} n - an int representing the n-factor of the markov chain
|
|
42617
|
+
* @param {object} [options] - options for the markov chain
|
|
42618
|
+
* @param {string|string[]} [options.text] - a text string, or array of sentences, to add to the model (same as via model.addText()
|
|
42619
|
+
* @param {number} [options.maxLengthMatch] - # of words allowed in result to match a sequence in the input, default=∞
|
|
42620
|
+
* @param {number} [options.maxAttempts=999] - max attempts before to complete one ore more generations before erroring, default=999
|
|
42621
|
+
* @param {function} [options.tokenize] - custom tokenizer with tokenize() method, defaults to RiTa.tokenize()
|
|
42622
|
+
* @param {function} [options.untokenize] - custom untokenizer with untokenize() method, defaults to RiTa.untokenize()
|
|
42623
|
+
* @param {boolean} [options.disableInputChecks=false] - if true, allow result to be present in the input, default
|
|
42624
|
+
* @param {boolean} [options.trace] - output trace info to the console
|
|
42625
|
+
* @returns {RiMarkov}
|
|
42626
|
+
*/
|
|
42627
|
+
static markov(n, options) {
|
|
42628
|
+
return new markov_default(n, options);
|
|
42629
|
+
}
|
|
42630
|
+
/**
|
|
42631
|
+
* Return a list of occurrences of the key word in the Key-Word-In-Context (KWIC) model.
|
|
42632
|
+
* @overload
|
|
42633
|
+
* @param {string} keyword
|
|
42634
|
+
* @param {object} [options]
|
|
42635
|
+
* @param {number} [options.numWords] - the number of words to include in the context
|
|
42636
|
+
* @param {string} [options.text] - the text as input for the KWIC model
|
|
42637
|
+
* @param {string[]} [options.words] - the array of words to be used as input for the KWIC model
|
|
42638
|
+
* @returns {string[]} all the occurrences of the keyword in the model, each with no more
|
|
42639
|
+
* than 'numWords' words of context on either side
|
|
42640
|
+
* @overload
|
|
42641
|
+
* @param {string} keyword
|
|
42642
|
+
* @param {number} text - the number of words to include in the context
|
|
42643
|
+
* @returns {string[]} all the occurrences of the keyword in the model, each with no more
|
|
42644
|
+
* than 'numWords' words of context on either side
|
|
42645
|
+
*/
|
|
42646
|
+
static kwic(keyword, options) {
|
|
42647
|
+
return _RiTa.concorder.kwic(keyword, options);
|
|
42648
|
+
}
|
|
42649
|
+
/**
|
|
42650
|
+
* Creates a concordance, a list of words with their frequency of occurence, from the given text and options.
|
|
42651
|
+
* @param {string} text - the text from which to create the concordance
|
|
42652
|
+
* @param {object} [options] - options for the concordance
|
|
42653
|
+
* @param {boolean} [options.ignoreCase=false] - whether to ignore case when creating the concordance
|
|
42654
|
+
* @param {boolean} [options.ignoreStopWords=false] - whether to ignore stop words like
|
|
42655
|
+
* 'the', 'and', 'a', 'of', etc, as specified in RiTa.STOP_WORDS
|
|
42656
|
+
* @param {boolean} [options.ignorePunctuation=false] - whether to ignore punctuation when creating the concordance
|
|
42657
|
+
* @param {string[]} [options.wordsToIgnore=null] - words to ignore when creating the concordance (alternate stop-words)
|
|
42658
|
+
* @returns {object} the concordance, an object with words as keys and frequencies as values
|
|
42659
|
+
*/
|
|
42660
|
+
static concordance(text, options) {
|
|
42661
|
+
return _RiTa.concorder.concordance(text, options);
|
|
42662
|
+
}
|
|
42663
|
+
/**
|
|
42664
|
+
* Returns a random ordering of the input array or a random ordering of integers from 1 to k
|
|
42665
|
+
* @overload
|
|
42666
|
+
* @param {object[]} array - the array to shuffle
|
|
42667
|
+
* @returns {object[]} the input array in a random order
|
|
42668
|
+
* @overload
|
|
42669
|
+
* @param {number} k - the number of integers to return
|
|
42670
|
+
* @returns {number[]} an array of arrays of integers from 1 to k in random order
|
|
42671
|
+
*/
|
|
42464
42672
|
static randomOrdering(arrayOrInt) {
|
|
42465
|
-
return _RiTa.randomizer.randomOrdering(
|
|
42673
|
+
return _RiTa.randomizer.randomOrdering(arrayOrInt);
|
|
42466
42674
|
}
|
|
42467
|
-
|
|
42468
|
-
|
|
42675
|
+
/**
|
|
42676
|
+
* Sets the seed for the RiTa random number generator
|
|
42677
|
+
* @param {number} seed - the seed to set
|
|
42678
|
+
*/
|
|
42679
|
+
static randomSeed(seed) {
|
|
42680
|
+
_RiTa.randomizer.seed(seed);
|
|
42469
42681
|
}
|
|
42682
|
+
/**
|
|
42683
|
+
* Returns true if the sentence is a question, else false
|
|
42684
|
+
* @param {string} sentence
|
|
42685
|
+
* @returns {boolean} - true if the sentence is a question, else false
|
|
42686
|
+
*/
|
|
42470
42687
|
static isQuestion(sentence) {
|
|
42471
42688
|
return _RiTa.QUESTIONS.includes(_RiTa.tokenize(sentence)[0].toLowerCase());
|
|
42472
42689
|
}
|
|
42690
|
+
/**
|
|
42691
|
+
* Returns true if the character is a vowel, else false
|
|
42692
|
+
* @param {string} char
|
|
42693
|
+
* @returns {boolean} - true if the character is a vowel, else false
|
|
42694
|
+
*/
|
|
42473
42695
|
static isVowel(char) {
|
|
42474
42696
|
return char && char.length === 1 && _RiTa.VOWELS.includes(char);
|
|
42475
42697
|
}
|
|
42698
|
+
/**
|
|
42699
|
+
* Returns true if the character is a consonant, else false
|
|
42700
|
+
* @param {string} char
|
|
42701
|
+
* @returns {boolean} - true if the character is a consonant, else false
|
|
42702
|
+
*/
|
|
42476
42703
|
static isConsonant(char) {
|
|
42477
42704
|
return char && char.length === 1 && !_RiTa.VOWELS.includes(char) && IS_LETTER.test(char);
|
|
42478
42705
|
}
|
|
42706
|
+
/**
|
|
42707
|
+
* Capitalizes the first letter of the input string, leaving others unchanged
|
|
42708
|
+
* @param {string} string - the string to capitalize
|
|
42709
|
+
* @returns {string} the capitalized string
|
|
42710
|
+
*/
|
|
42479
42711
|
static capitalize(string) {
|
|
42480
42712
|
return string ? string[0].toUpperCase() + string.substring(1) : "";
|
|
42481
42713
|
}
|
|
42482
|
-
|
|
42483
|
-
|
|
42484
|
-
|
|
42485
|
-
|
|
42486
|
-
|
|
42487
|
-
|
|
42714
|
+
/**
|
|
42715
|
+
* Return a random word from the lexicon matching the specified criteria
|
|
42716
|
+
* (length, syllable-count, phonemic pattern, stress pattern, part-of-speech, etc.).
|
|
42717
|
+
* @param {(string|RegExp)} [pattern] - the pattern to match
|
|
42718
|
+
* @param {object} [options]
|
|
42719
|
+
* @param {number} [options.minLength=4] - the minimum length of the word
|
|
42720
|
+
* @param {number} [options.maxLength=-1] - the maximum length of the word
|
|
42721
|
+
* @param {number} [options.numSyllables=null] - the number of syllables in the word
|
|
42722
|
+
* @param {number} [options.limit=10] - the maximum number of results to retur
|
|
42723
|
+
* @param {string} [options.pos=null] - the part-of-speech of the word to return,
|
|
42724
|
+
* either from the Penn tag set or the simplified tag set [a, r, v, n]
|
|
42725
|
+
* @param {RegExp} [options.pattern=null] - the spelling or phonemic pattern to match
|
|
42726
|
+
* @param {string} [options.type=null] - the type of regex or string pattern to match,
|
|
42727
|
+
* options are 'stresses' or 'phones' or 'letters' (the default)
|
|
42728
|
+
* @returns {string} a random word matching the criteria in the options object
|
|
42729
|
+
*/
|
|
42730
|
+
static randomWord(pattern, options) {
|
|
42731
|
+
return _RiTa.lexicon.randomWord(pattern, options);
|
|
42732
|
+
}
|
|
42733
|
+
/**
|
|
42734
|
+
* Returns words that rhyme with the given word. Two words are considered as rhyming if
|
|
42735
|
+
* their final stressed vowel and all following phonemes are identical.
|
|
42736
|
+
* @param {string} word
|
|
42737
|
+
* @param {object} [options]
|
|
42738
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
42739
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
42740
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
42741
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
42742
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
42743
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
|
|
42744
|
+
* or the simplified tag set [a, r, v, n]
|
|
42745
|
+
* @returns {Promise<string[]>} an array of rhymes that match criteria in the options object
|
|
42746
|
+
*/
|
|
42747
|
+
static async rhymes(word, options) {
|
|
42748
|
+
return await _RiTa.lexicon.rhymes(word, options);
|
|
42749
|
+
}
|
|
42750
|
+
/**
|
|
42751
|
+
* Returns words that rhyme with the given word. Two words are considered as rhyming if
|
|
42752
|
+
* their final stressed vowel and all following phonemes are identical.
|
|
42753
|
+
* @param {string} word1 - the first word to compare
|
|
42754
|
+
* @param {string} word2 - the second word to compare
|
|
42755
|
+
* @returns {boolean} true if the two words rhyme, else false
|
|
42756
|
+
*/
|
|
42488
42757
|
static isRhyme(word1, word2) {
|
|
42489
|
-
return _RiTa.lexicon.isRhyme(
|
|
42490
|
-
}
|
|
42491
|
-
|
|
42492
|
-
|
|
42493
|
-
|
|
42494
|
-
|
|
42495
|
-
|
|
42496
|
-
|
|
42497
|
-
|
|
42758
|
+
return _RiTa.lexicon.isRhyme(word1, word2);
|
|
42759
|
+
}
|
|
42760
|
+
/**
|
|
42761
|
+
* Finds alliterations by comparing the phonemes of the input string to those
|
|
42762
|
+
* of each word in the lexicon via a minimum-edit-distance metric.
|
|
42763
|
+
* @param {string} word
|
|
42764
|
+
* @param {object} [options]
|
|
42765
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
42766
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
42767
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
42768
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
42769
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
42770
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
|
|
42771
|
+
* or the simplified tag set [a, r, v, n]
|
|
42772
|
+
* @returns {Promise<string[]>} an array of alliterations matching criteria in the options object
|
|
42773
|
+
*/
|
|
42774
|
+
static async alliterations(word, options) {
|
|
42775
|
+
return await _RiTa.lexicon.alliterations(word, options);
|
|
42776
|
+
}
|
|
42777
|
+
/**
|
|
42778
|
+
* Returns true if the word is in the lexicon, else false
|
|
42779
|
+
* @param {string} word - the word to check
|
|
42780
|
+
* @param {object} [options] - options for the search
|
|
42781
|
+
* @param {boolean} [options.noDerivations=false] - whether to ignore derivations and only search for raw words
|
|
42782
|
+
* @returns {boolean} true if the word is in the lexicon, else false
|
|
42783
|
+
*/
|
|
42784
|
+
static hasWord(word, options) {
|
|
42785
|
+
return _RiTa.lexicon.hasWord(word, options);
|
|
42786
|
+
}
|
|
42787
|
+
/**
|
|
42788
|
+
* Returns true if the word is an abbreviation, else false
|
|
42789
|
+
* @param {string} input - the word to check
|
|
42790
|
+
* @param {object} [options] - options for the search
|
|
42791
|
+
* @param {boolean} [options.caseSensitive=false] - whether to ignore case when checking for abbreviations
|
|
42792
|
+
* @returns {boolean} true if the word is an abbreviation, else false
|
|
42793
|
+
*/
|
|
42794
|
+
static isAbbrev(input, options) {
|
|
42498
42795
|
if (typeof input === "string") {
|
|
42499
|
-
if (caseSensitive)
|
|
42796
|
+
if (_optionalChain([options, 'optionalAccess', _5 => _5.caseSensitive]))
|
|
42500
42797
|
return _RiTa.ABRV.includes(input.trim());
|
|
42501
42798
|
let check = input.trim().toLowerCase();
|
|
42502
42799
|
return _RiTa.ABRV.some((a) => a.toLowerCase() === check);
|
|
42503
42800
|
}
|
|
42504
42801
|
}
|
|
42802
|
+
/**
|
|
42803
|
+
* Returns true if the two words are an alliteration (if their first stressed consonants match).
|
|
42804
|
+
* Note: returns true if wordA.equals(wordB) and false if either (or both) are null.
|
|
42805
|
+
* @param {string} word1 - the first word to compare
|
|
42806
|
+
* @param {string} word2 - the second word to compare
|
|
42807
|
+
* @returns {boolean} true if the two words are an alliteration, else false
|
|
42808
|
+
*/
|
|
42505
42809
|
static isAlliteration(word1, word2) {
|
|
42506
|
-
return _RiTa.lexicon.isAlliteration(
|
|
42507
|
-
}
|
|
42508
|
-
|
|
42509
|
-
|
|
42510
|
-
|
|
42511
|
-
|
|
42512
|
-
|
|
42513
|
-
|
|
42514
|
-
|
|
42515
|
-
|
|
42810
|
+
return _RiTa.lexicon.isAlliteration(word1, word2);
|
|
42811
|
+
}
|
|
42812
|
+
/**
|
|
42813
|
+
* Compares the letters of the input word (using a version of the Levenstein min-edit distance algorithm)
|
|
42814
|
+
* to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
|
|
42815
|
+
* @param {string} word - the word to match
|
|
42816
|
+
* @param {object} [options] - options for the search
|
|
42817
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
42818
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
42819
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
42820
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
42821
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
42822
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set or the simplified tag set [a, r, v, n]
|
|
42823
|
+
* @returns {Promise<string[]>} an array of words matching the spelling pattern and criteria in the options object
|
|
42824
|
+
*/
|
|
42825
|
+
static async spellsLike(word, options) {
|
|
42826
|
+
return await _RiTa.lexicon.spellsLike(word, options);
|
|
42827
|
+
}
|
|
42828
|
+
/**
|
|
42829
|
+
* Compares the phonemes of the input pattern (using a version of the Levenstein min-edit distance algorithm)
|
|
42830
|
+
* to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
|
|
42831
|
+
* @param {string} word - the word to match
|
|
42832
|
+
* @param {object} [options] - options for the search
|
|
42833
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
42834
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
42835
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
42836
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
42837
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
42838
|
+
* @param {boolean} [options.matchSpelling=false] if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
|
|
42839
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
|
|
42840
|
+
* or the simplified tag set [a, r, v, n]
|
|
42841
|
+
* @returns {Promise<string[]>} an array of words matching the phonemic pattern and criteria in the options object
|
|
42842
|
+
*/
|
|
42843
|
+
static async soundsLike(word, options) {
|
|
42844
|
+
return await _RiTa.lexicon.soundsLike(word, options);
|
|
42845
|
+
}
|
|
42846
|
+
/**
|
|
42847
|
+
* Generates part-of-speech tags for each word in the input with tags
|
|
42848
|
+
* from the Penn tag set or the simplified tag set [a, r, v, n].
|
|
42849
|
+
* @param {(string|string[])} word - the word or words to tag
|
|
42850
|
+
* @param {object} [options] - options for the tagging
|
|
42851
|
+
* @param {boolean} [options.simple] - use simple tags (noun=n,verb=v,adverb=a,adjective=r)
|
|
42852
|
+
* @returns {string|string[]} - an array of part-of-speech tags for each word in the input
|
|
42853
|
+
*/
|
|
42854
|
+
static pos(word, options) {
|
|
42855
|
+
if (options && "inline" in options) {
|
|
42856
|
+
throw Error("Use RiTa.posInline() instead");
|
|
42857
|
+
}
|
|
42858
|
+
return _RiTa.tagger.tag(word, options);
|
|
42516
42859
|
}
|
|
42860
|
+
/**
|
|
42861
|
+
* Returns true if the word has a noun form. That is, if any of its possible
|
|
42862
|
+
* parts of speech are any variant of a noun in the Penn tag set(e.g. nn, nns, nnp, nnps).
|
|
42863
|
+
* @param {string} word - the word to check
|
|
42864
|
+
* @returns {string} - true if the word is a noun, else false
|
|
42865
|
+
*/
|
|
42517
42866
|
static isNoun(word) {
|
|
42518
42867
|
return _RiTa.tagger.isNoun(word);
|
|
42519
42868
|
}
|
|
42869
|
+
/**
|
|
42870
|
+
* Returns true if word has an adjective form. That is, if any of its possible parts of speech
|
|
42871
|
+
* are any variant of an adjective in the Penn tag set (e.g. jj, jjr, jjs).
|
|
42872
|
+
* @param {string} word - the word to check
|
|
42873
|
+
* @returns {string} - true if the word is an adjective, else false
|
|
42874
|
+
*/
|
|
42520
42875
|
static isAdjective(word) {
|
|
42521
42876
|
return _RiTa.tagger.isAdjective(word);
|
|
42522
42877
|
}
|
|
42878
|
+
/**
|
|
42879
|
+
* Returns true if the word has an adverb form. That is, if any of its possible parts of speech
|
|
42880
|
+
* are any variant of an adverb in the Penn tag set (e.g. rb, rbr, rbs).
|
|
42881
|
+
* @param {string} word - the word to check
|
|
42882
|
+
* @returns {string} - true if the word is an adverb, else false
|
|
42883
|
+
*/
|
|
42523
42884
|
static isAdverb(word) {
|
|
42524
42885
|
return _RiTa.tagger.isAdverb(word);
|
|
42525
42886
|
}
|
|
42526
|
-
|
|
42527
|
-
|
|
42528
|
-
|
|
42887
|
+
/**
|
|
42888
|
+
* Returns true for if word has a verb form. That is, if any of its possible
|
|
42889
|
+
* parts of speech are any variant of a verb in the Penn tag set (e.g. vb, vbg, vbd, vbp, vbz).
|
|
42890
|
+
* @param {string} word - the word to check
|
|
42891
|
+
* @returns {string} - true if the word is a verb, else false
|
|
42892
|
+
*/
|
|
42529
42893
|
static isVerb(word) {
|
|
42530
42894
|
return _RiTa.tagger.isVerb(word);
|
|
42531
42895
|
}
|
|
42532
|
-
|
|
42533
|
-
|
|
42534
|
-
|
|
42896
|
+
/**
|
|
42897
|
+
* Returns true if every character of 'text' is a punctuation character.
|
|
42898
|
+
* @param {string} text
|
|
42899
|
+
* @returns {boolean} true if every character of 'text' is punctuation, else false
|
|
42900
|
+
*/
|
|
42901
|
+
static isPunct(text) {
|
|
42902
|
+
return text && text.length && ONLY_PUNCT.test(text);
|
|
42903
|
+
}
|
|
42904
|
+
/**
|
|
42905
|
+
* Tags the input string with part-of-speech tags, either from the Penn tag set or the simplified tag set [a, r, v, n].
|
|
42906
|
+
* @param {string} sentence - the sentence to tag
|
|
42907
|
+
* @param {object} [options] - options for the tagging
|
|
42908
|
+
* @param {boolean} [options.simple=false] - use the simplified tag set [a, r, v, n]
|
|
42909
|
+
* @returns {string} the tagged sentence
|
|
42910
|
+
*/
|
|
42911
|
+
static posInline(sentence, options) {
|
|
42912
|
+
return _RiTa.tagger.tag(sentence, { ...options, inline: true });
|
|
42535
42913
|
}
|
|
42914
|
+
/**
|
|
42915
|
+
* Return the singular form of the input word
|
|
42916
|
+
* @param {string} word - the word to singularize
|
|
42917
|
+
* @returns {string} the singular form of the input word
|
|
42918
|
+
*/
|
|
42536
42919
|
static singularize(word) {
|
|
42537
|
-
return _RiTa.inflector.singularize(
|
|
42920
|
+
return _RiTa.inflector.singularize(word);
|
|
42538
42921
|
}
|
|
42922
|
+
/**
|
|
42923
|
+
* Return the plural form of the input word
|
|
42924
|
+
* @param {string} word - the word to pluralize
|
|
42925
|
+
* @returns {string} the plural form of the input word
|
|
42926
|
+
*/
|
|
42539
42927
|
static pluralize(word) {
|
|
42540
|
-
return _RiTa.inflector.pluralize(
|
|
42541
|
-
}
|
|
42542
|
-
|
|
42543
|
-
|
|
42544
|
-
|
|
42545
|
-
|
|
42546
|
-
|
|
42547
|
-
|
|
42548
|
-
|
|
42549
|
-
|
|
42550
|
-
|
|
42551
|
-
|
|
42552
|
-
|
|
42553
|
-
|
|
42554
|
-
|
|
42555
|
-
|
|
42556
|
-
|
|
42557
|
-
|
|
42558
|
-
|
|
42928
|
+
return _RiTa.inflector.pluralize(word);
|
|
42929
|
+
}
|
|
42930
|
+
/**
|
|
42931
|
+
* Searches for words in the lexicon matching the given criteria, either by length, syllable-count,
|
|
42932
|
+
* spelling, phonemes, stresses, part-of-speech, etc. If no regex or options are supplied, the full set of words is returned.
|
|
42933
|
+
* @param {(string|RegExp)} [pattern] - the pattern to match
|
|
42934
|
+
* @param {object} [options] - options for the search
|
|
42935
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
42936
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
42937
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
42938
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
42939
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
42940
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
|
|
42941
|
+
* or the simplified tag set [a, r, v, n]
|
|
42942
|
+
* @param {string} [options.type] - the type of regex or string pattern to match, options are 'stresses'
|
|
42943
|
+
* or 'phones' or 'letters' (the default)
|
|
42944
|
+
* @returns {Promise<string[]>} an array of words matching the criteria in both the pattern and the options object
|
|
42945
|
+
*/
|
|
42946
|
+
static async search(pattern, options) {
|
|
42947
|
+
return await _RiTa.lexicon.search(pattern, options);
|
|
42948
|
+
}
|
|
42949
|
+
/**
|
|
42950
|
+
* Returns an array containing all unique alphabetical words (tokens) in the text.
|
|
42951
|
+
* Punctuation and case are ignored unless specified otherwise.
|
|
42952
|
+
* @param {string} text - The text from which to extract the tokens
|
|
42953
|
+
* @param {object} [options] - The options
|
|
42954
|
+
* @param {boolean} [options.caseSensitive=false] - Whether to pay attention to case
|
|
42955
|
+
* @param {boolean} [options.ignoreStopWords=false] - Whether to ignore words such as 'the', 'and', 'a', 'of', etc,
|
|
42956
|
+
* as specified in RiTa.STOP_WORDS
|
|
42957
|
+
* @param {boolean} [options.splitContractions=false] - Whether to convert contractions
|
|
42958
|
+
* (e.g., "I'd" or "she'll") into multiple individual tokens
|
|
42959
|
+
* @param {boolean} [options.includePunct=false] - Whether to include punctuation in the results
|
|
42960
|
+
* @param {boolean} [options.sort=false] - Whether to sort the tokens before returning them
|
|
42961
|
+
* @returns {string[]} Array of tokens
|
|
42962
|
+
*/
|
|
42963
|
+
static tokens(text, options = {
|
|
42964
|
+
caseSensitive: false,
|
|
42965
|
+
ignoreStopWords: false,
|
|
42966
|
+
splitContractions: false,
|
|
42967
|
+
includePunct: false,
|
|
42968
|
+
sort: false
|
|
42969
|
+
}) {
|
|
42970
|
+
return _RiTa.tokenizer.tokens(text, options);
|
|
42971
|
+
}
|
|
42972
|
+
/**
|
|
42973
|
+
* Tokenizes an input string into words, according to the Penn Treebank conventions
|
|
42974
|
+
* @param {string} input - The text to tokenize
|
|
42975
|
+
* @param {object} [options] - The options
|
|
42976
|
+
* @param {RegExp} [options.regex=null] - An optional custom regex to split on
|
|
42977
|
+
* @param {boolean} [options.splitHyphens=false] - Whether to split hyphenated words
|
|
42978
|
+
* (e.g., "mother-in-law") into multiple individual tokens
|
|
42979
|
+
* @param {boolean} [options.splitContractions=false] - Whether to split contractions
|
|
42980
|
+
* (e.g., "I'd" or "she'll") into multiple individual tokens
|
|
42981
|
+
* @returns {string[]} Array of tokens
|
|
42982
|
+
*/
|
|
42983
|
+
static tokenize(input, options) {
|
|
42984
|
+
return _RiTa.tokenizer.tokenize(input, options);
|
|
42985
|
+
}
|
|
42986
|
+
/**
|
|
42987
|
+
* Joins an array (of words and punctuation) into a sentence, according to
|
|
42988
|
+
* the Penn Treebank conventions. The inverse of RiTa.tokenize().
|
|
42989
|
+
* @param {string[]} input - The array of words to join
|
|
42990
|
+
* @param {string} [delim=' '] - The delimiter to use between words, or a space by default
|
|
42991
|
+
* @returns {string} The joined sentence
|
|
42992
|
+
*/
|
|
42993
|
+
static untokenize(input, delim = " ") {
|
|
42994
|
+
return _RiTa.tokenizer.untokenize(input, delim);
|
|
42995
|
+
}
|
|
42996
|
+
/**
|
|
42997
|
+
* Split the input text into sentences following using Penn Treebank conventions and the specified options.
|
|
42998
|
+
* @param {string} text - The text to split
|
|
42999
|
+
* @param {(string|RegExp)} [pattern] - An optional custom regex to split on
|
|
43000
|
+
* @returns {string[]} An array of sentences
|
|
43001
|
+
*/
|
|
43002
|
+
static sentences(text, pattern) {
|
|
43003
|
+
return _RiTa.tokenizer.sentences(text, pattern);
|
|
43004
|
+
}
|
|
43005
|
+
/**
|
|
43006
|
+
* Returns true if the word is a 'stop word', a commonly used word that is often ignored in text processing.
|
|
43007
|
+
* To use your own list, set RiTa.STOP_WORDS to a new array of (lowercase) words.
|
|
43008
|
+
* @param {string} word - the word to check
|
|
43009
|
+
* @returns {boolean} true if the word is a stop word, else false
|
|
43010
|
+
*/
|
|
43011
|
+
static isStopWord(word) {
|
|
43012
|
+
return _RiTa.STOP_WORDS.includes(word.toLowerCase());
|
|
42559
43013
|
}
|
|
42560
|
-
|
|
42561
|
-
|
|
43014
|
+
/**
|
|
43015
|
+
* Extracts base roots from a word according to the Pling stemming algorithm.
|
|
43016
|
+
* @param {string} word - the word to stem
|
|
43017
|
+
* @returns {string} the base root of the word
|
|
43018
|
+
*/
|
|
43019
|
+
static stem(word) {
|
|
43020
|
+
return stemmer_default.stem(word);
|
|
42562
43021
|
}
|
|
43022
|
+
/**
|
|
43023
|
+
* Returns the present participle of the input word (e.g., "walking" for "walk").
|
|
43024
|
+
* @param {string} verbWord - the word to get the present participle of
|
|
43025
|
+
* @returns {string} the present participle of the input word
|
|
43026
|
+
*/
|
|
42563
43027
|
static presentPart(verbWord) {
|
|
42564
|
-
return _RiTa.conjugator.presentPart(
|
|
43028
|
+
return _RiTa.conjugator.presentPart(verbWord);
|
|
42565
43029
|
}
|
|
43030
|
+
/**
|
|
43031
|
+
* Returns the past participle of the input word (e.g., "walked" for "walk").
|
|
43032
|
+
* @param {string} verbWord
|
|
43033
|
+
* @returns {string} the past participle of the input word
|
|
43034
|
+
*/
|
|
42566
43035
|
static pastPart(verbWord) {
|
|
42567
|
-
return _RiTa.conjugator.pastPart(
|
|
42568
|
-
}
|
|
42569
|
-
|
|
42570
|
-
|
|
42571
|
-
|
|
42572
|
-
|
|
42573
|
-
|
|
42574
|
-
|
|
42575
|
-
|
|
42576
|
-
|
|
42577
|
-
|
|
42578
|
-
|
|
42579
|
-
|
|
42580
|
-
|
|
42581
|
-
|
|
42582
|
-
|
|
43036
|
+
return _RiTa.conjugator.pastPart(verbWord);
|
|
43037
|
+
}
|
|
43038
|
+
/**
|
|
43039
|
+
* Conjugates the 'verb' according to the specified options (tense, person, number, etc.)
|
|
43040
|
+
* @param {string} verbWord
|
|
43041
|
+
* @param {object} [options]
|
|
43042
|
+
* @param {number} [options.tense] - the tense of the verb, either RiTa.PAST, RiTa.PRESENT, or RiTa.FUTURE
|
|
43043
|
+
* @param {number} [options.person] - the person of the verb, either RiTa.FIRST, RiTa.SECOND, or RiTa.THIRD
|
|
43044
|
+
* @param {number} [options.number] - the number of the verb, either RiTa.SINGULAR or RiTa.PLURAL
|
|
43045
|
+
* @param {number} [options.form] - the form of the verb, either RiTa.INFINITIVE or RiTa.GERUND
|
|
43046
|
+
* @param {boolean} [options.passive] - whether the verb should be passive
|
|
43047
|
+
* @param {boolean} [options.progressive] - whether the verb should be progressive
|
|
43048
|
+
* @param {boolean} [options.perfect] - whether the verb should be perfect
|
|
43049
|
+
* @param {boolean} [options.interrogative] - whether the verb should be in interrogative form
|
|
43050
|
+
* @returns {string} the conjugated verb
|
|
43051
|
+
*/
|
|
43052
|
+
static conjugate(verbWord, options) {
|
|
43053
|
+
return _RiTa.conjugator.conjugate(verbWord, options);
|
|
43054
|
+
}
|
|
43055
|
+
/**
|
|
43056
|
+
* Analyzes the input and returns a new string containing the stresses for each syllable of the input text .
|
|
43057
|
+
* @param {string} input - the text to analyze
|
|
43058
|
+
* @param {object} [options] - options for the analysis
|
|
43059
|
+
* @returns {string} a string containing the stresses for each syllable of the input text
|
|
43060
|
+
*/
|
|
43061
|
+
static stresses(input, options) {
|
|
43062
|
+
return _RiTa.analyzer.analyze(input, options).stresses;
|
|
43063
|
+
}
|
|
43064
|
+
/**
|
|
43065
|
+
* Analyzes the input and returns a new string containing the syllables of the input text.
|
|
43066
|
+
* @param {string} input - the text to analyze
|
|
43067
|
+
* @param {object} [options] - options for the analysis
|
|
43068
|
+
* @returns {string} a string containing the syllables of the input text
|
|
43069
|
+
*/
|
|
43070
|
+
static syllables(input, options) {
|
|
43071
|
+
return _RiTa.analyzer.analyze(input, options).syllables;
|
|
43072
|
+
}
|
|
43073
|
+
/**
|
|
43074
|
+
* Analyzes the input and returns a new string containing the phonemes of the input text.
|
|
43075
|
+
* @param {string} input - the text to analyze
|
|
43076
|
+
* @param {object} [options] - options for the analysis
|
|
43077
|
+
* @returns {string} a string containing the phones of the input text
|
|
43078
|
+
*/
|
|
43079
|
+
static phones(input, options) {
|
|
43080
|
+
return _RiTa.analyzer.analyze(input, options).phones;
|
|
43081
|
+
}
|
|
43082
|
+
/**
|
|
43083
|
+
* Analyzes the input to compute a set of features for the input,
|
|
43084
|
+
* including phonemes, syllables, stresses, and part-of-speech tags.
|
|
43085
|
+
* @param {string} input - the text to analyze
|
|
43086
|
+
* @param {object} [options] - options for the analysis
|
|
43087
|
+
* @param {boolean} [options.simple=false] - whether to use the simplified tag set [a, r, v, n]
|
|
43088
|
+
* @returns {object} an object containing the features of the input text (phones, syllables, stresses, pos), or the features inline
|
|
43089
|
+
*/
|
|
43090
|
+
static analyze(input, options) {
|
|
43091
|
+
return _RiTa.analyzer.analyze(input, options);
|
|
42583
43092
|
}
|
|
42584
43093
|
////////////////////////////// lex-sync ////////////////////////////
|
|
42585
|
-
|
|
42586
|
-
|
|
42587
|
-
|
|
42588
|
-
|
|
42589
|
-
|
|
42590
|
-
|
|
42591
|
-
|
|
42592
|
-
|
|
42593
|
-
|
|
42594
|
-
|
|
42595
|
-
|
|
42596
|
-
|
|
42597
|
-
|
|
42598
|
-
|
|
42599
|
-
|
|
43094
|
+
/**
|
|
43095
|
+
* A synchronous version of RiTa.spellsLike(). It compares the letters of the input word
|
|
43096
|
+
* (using a version of the Levenstein min-edit distance algorithm) to each word in the lexicon,
|
|
43097
|
+
* returning the set of closest matches that also match the criteria in the options object.
|
|
43098
|
+
* @param {string} word - the word to match
|
|
43099
|
+
* @param {object} [options] - options for the search
|
|
43100
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
43101
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
43102
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
43103
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
43104
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
|
|
43105
|
+
* or the simplified tag set [a, r, v, n]
|
|
43106
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
43107
|
+
* @return {string[]} an array of words matching the spelling pattern and criteria in the options object
|
|
43108
|
+
*/
|
|
43109
|
+
static spellsLikeSync(word, options) {
|
|
43110
|
+
return _RiTa.lexicon.spellsLikeSync(word, options);
|
|
43111
|
+
}
|
|
43112
|
+
/**
|
|
43113
|
+
* A synchronous version of RiTa.lexicon.soundsLike(). It compares the phonemes of the input pattern (using a version of the Levenstein min-edit distance algorithm)
|
|
43114
|
+
* to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
|
|
43115
|
+
* @param {string} word - the word to match
|
|
43116
|
+
* @param {object} [options] - options for the search
|
|
43117
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
43118
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
43119
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
43120
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
43121
|
+
* @param {boolean} [options.matchSpelling=false] if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
|
|
43122
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
43123
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
|
|
43124
|
+
* or the simplified tag set [a, r, v, n]
|
|
43125
|
+
* @return {string[]} an array of words matching the phonemic pattern and criteria in the options object
|
|
43126
|
+
*/
|
|
43127
|
+
static soundsLikeSync(word, options) {
|
|
43128
|
+
return _RiTa.lexicon.soundsLikeSync(word, options);
|
|
43129
|
+
}
|
|
43130
|
+
/**
|
|
43131
|
+
* Synchronous version of RiTa.rhymes(). Returns words that rhyme with the given word.
|
|
43132
|
+
* Two words are considered as rhyming if their final stressed vowel and all following phonemes are identical.
|
|
43133
|
+
* @param {string} word - the word to match
|
|
43134
|
+
* @param {object} [options] - options for the search
|
|
43135
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
43136
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
43137
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
43138
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
43139
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
43140
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
|
|
43141
|
+
* or the simplified tag set [a, r, v, n]
|
|
43142
|
+
* @return {string[]} an array of rhymes that match criteria in the options object
|
|
43143
|
+
*/
|
|
43144
|
+
static rhymesSync(word, options) {
|
|
43145
|
+
return _RiTa.lexicon.rhymesSync(word, options);
|
|
43146
|
+
}
|
|
43147
|
+
/**
|
|
43148
|
+
* A synchronous version of RiTa.search(). Searches for words in the lexicon matching
|
|
43149
|
+
* the given criteria, either by length, syllable-count, spelling, phonemes, stresses,
|
|
43150
|
+
* part-of-speech, etc.
|
|
43151
|
+
* @param {(string|RegExp)} [pattern] - the pattern to match
|
|
43152
|
+
* @param {object} [options] - options for the search
|
|
43153
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
43154
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
43155
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
43156
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
43157
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
43158
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
|
|
43159
|
+
* or the simplified tag set [a, r, v, n]
|
|
43160
|
+
* @param {string} [options.type] - the type of regex or string pattern to match, options are 'stresses' or 'phones' or 'letters' (the default)
|
|
43161
|
+
* @return {string[]} an array of words matching the criteria in both the pattern and the options object
|
|
43162
|
+
*/
|
|
43163
|
+
static searchSync(pattern, options) {
|
|
43164
|
+
return _RiTa.lexicon.searchSync(pattern, options);
|
|
43165
|
+
}
|
|
43166
|
+
/**
|
|
43167
|
+
* A synchronous version of RiTa.alliterations(). Finds alliterations by comparing the phonemes
|
|
43168
|
+
* of the input string to those of each word in the lexicon via a minimum-edit-distance metric.
|
|
43169
|
+
* @param {string} word - the word to match
|
|
43170
|
+
* @param {object} [options] - options for the search
|
|
43171
|
+
* @param {number} [options.minLength=4] - the minimum length of the words
|
|
43172
|
+
* @param {number} [options.maxLength] - the maximum length of the words
|
|
43173
|
+
* @param {number} [options.numSyllables] - the number of syllables in the words
|
|
43174
|
+
* @param {number} [options.limit=10] - the maximum number of results to return (pass -1 to return all matches)
|
|
43175
|
+
* @param {boolean} [options.shuffle=false] - whether to shuffle the results before returning them
|
|
43176
|
+
* @param {string} [options.pos] - the part-of-speech of the words to return, either from the Penn tag set
|
|
43177
|
+
* or the simplified tag set [a, r, v, n]
|
|
43178
|
+
* @return {string[]} an array of alliterations matching criteria in the options object
|
|
43179
|
+
*/
|
|
43180
|
+
static alliterationsSync(word, options) {
|
|
43181
|
+
return _RiTa.lexicon.alliterationsSync(word, options);
|
|
42600
43182
|
}
|
|
42601
43183
|
////////////////////////////// niapa /////////////////////////////
|
|
42602
|
-
|
|
42603
|
-
|
|
42604
|
-
|
|
42605
|
-
|
|
43184
|
+
/**
|
|
43185
|
+
* Returns a random integer from a range
|
|
43186
|
+
* The version of random() with one parameter returns a random integer from 0 up to but not including the number.
|
|
43187
|
+
* The version of random() with two parameters returns a random integer from the first number up to but not including the second.
|
|
43188
|
+
* @param {number} param1 - the first parameter
|
|
43189
|
+
* @param {number} [param2] - the second optional parameter
|
|
43190
|
+
* @returns {number} a random integer from the range
|
|
43191
|
+
*/
|
|
43192
|
+
static randi(param1, param2) {
|
|
43193
|
+
return Math.floor(_RiTa.random(...arguments));
|
|
43194
|
+
}
|
|
43195
|
+
/**
|
|
43196
|
+
* Returns a random number or a random element from an array.
|
|
43197
|
+
* The version of random() with no parameters returns a random number from 0 up to but not including 1.
|
|
43198
|
+
* The version of random() with one parameter works one of two ways. If the argument passed is a number, random() returns a random number from 0 up to but not including the number.
|
|
43199
|
+
* If the argument passed is an array, random() returns a random element from that array.
|
|
43200
|
+
* The version of random() with two parameters returns a random number from the first number up to but not including the second.
|
|
43201
|
+
* @param {number|object[]} [param1] - the minimum value of the random number, or an array of values to choose from
|
|
43202
|
+
* @param {number} [param2] - the maximum value of the random number
|
|
43203
|
+
* @returns {number|object} a random number or a random element from the array
|
|
43204
|
+
*/
|
|
43205
|
+
static random(param1, param2) {
|
|
42606
43206
|
return _RiTa.randomizer.random(...arguments);
|
|
42607
43207
|
}
|
|
42608
43208
|
};
|
|
42609
|
-
|
|
42610
|
-
|
|
42611
|
-
|
|
42612
|
-
|
|
42613
|
-
|
|
42614
|
-
|
|
42615
|
-
|
|
42616
|
-
|
|
42617
|
-
|
|
42618
|
-
|
|
42619
|
-
|
|
42620
|
-
|
|
42621
|
-
|
|
42622
|
-
|
|
42623
|
-
|
|
42624
|
-
|
|
42625
|
-
|
|
42626
|
-
|
|
42627
|
-
|
|
42628
|
-
|
|
42629
|
-
|
|
42630
|
-
|
|
42631
|
-
|
|
42632
|
-
|
|
42633
|
-
|
|
42634
|
-
|
|
42635
|
-
|
|
42636
|
-
|
|
42637
|
-
|
|
42638
|
-
|
|
42639
|
-
|
|
42640
|
-
|
|
42641
|
-
|
|
42642
|
-
|
|
42643
|
-
|
|
42644
|
-
|
|
42645
|
-
|
|
42646
|
-
|
|
42647
|
-
|
|
42648
|
-
|
|
42649
|
-
RiTa2.GERUND = 2;
|
|
42650
|
-
RiTa2.SPLIT_CONTRACTIONS = false;
|
|
43209
|
+
RiTa.RiGrammar = _riscript.RiGrammar;
|
|
43210
|
+
RiTa.RiMarkov = markov_default;
|
|
43211
|
+
RiTa.Stemmer = stemmer_default;
|
|
43212
|
+
RiTa.randomizer = new randgen_default();
|
|
43213
|
+
RiTa.tagger = new tagger_default(RiTa);
|
|
43214
|
+
RiTa.analyzer = new analyzer_default(RiTa);
|
|
43215
|
+
RiTa.concorder = new concorder_default(RiTa);
|
|
43216
|
+
RiTa.tokenizer = new tokenizer_default(RiTa);
|
|
43217
|
+
RiTa.inflector = new inflector_default(RiTa);
|
|
43218
|
+
RiTa.lexicon = new lexicon_default(RiTa);
|
|
43219
|
+
RiTa.conjugator = new conjugator_default(RiTa);
|
|
43220
|
+
markov_default.parent = RiTa;
|
|
43221
|
+
stemmer_default.tokenizer = RiTa.tokenizer;
|
|
43222
|
+
RiTa.SILENT = false;
|
|
43223
|
+
RiTa.SILENCE_LTS = false;
|
|
43224
|
+
RiTa.VERSION = "3.0.23";
|
|
43225
|
+
RiTa.FIRST = 1;
|
|
43226
|
+
RiTa.SECOND = 2;
|
|
43227
|
+
RiTa.THIRD = 3;
|
|
43228
|
+
RiTa.PAST = 4;
|
|
43229
|
+
RiTa.PRESENT = 5;
|
|
43230
|
+
RiTa.FUTURE = 6;
|
|
43231
|
+
RiTa.SINGULAR = 7;
|
|
43232
|
+
RiTa.PLURAL = 8;
|
|
43233
|
+
RiTa.NORMAL = 9;
|
|
43234
|
+
RiTa.STRESS = "1";
|
|
43235
|
+
RiTa.NOSTRESS = "0";
|
|
43236
|
+
RiTa.PHONE_BOUNDARY = "-";
|
|
43237
|
+
RiTa.WORD_BOUNDARY = " ";
|
|
43238
|
+
RiTa.SYLLABLE_BOUNDARY = "/";
|
|
43239
|
+
RiTa.SENTENCE_BOUNDARY = "|";
|
|
43240
|
+
RiTa.VOWELS = "aeiou";
|
|
43241
|
+
RiTa.PHONES = ["aa", "ae", "ah", "ao", "aw", "ay", "b", "ch", "d", "dh", "eh", "er", "ey", "f", "g", "hh", "ih", "iy", "jh", "k", "l", "m", "n", "ng", "ow", "oy", "p", "r", "s", "sh", "t", "th", "uh", "uw", "v", "w", "y", "z", "zh"];
|
|
43242
|
+
RiTa.ABRV = ["Adm.", "Capt.", "Cmdr.", "Col.", "Dr.", "Gen.", "Gov.", "Lt.", "Maj.", "Messrs.", "Mr.", "Mrs.", "Ms.", "Prof.", "Rep.", "Reps.", "Rev.", "Sen.", "Sens.", "Sgt.", "Sr.", "St.", "A.k.a.", "C.f.", "I.e.", "E.g.", "Vs.", "V.", "Jan.", "Feb.", "Mar.", "Apr.", "Mar.", "Jun.", "Jul.", "Aug.", "Sept.", "Oct.", "Nov.", "Dec."];
|
|
43243
|
+
RiTa.QUESTIONS = ["was", "what", "when", "where", "which", "why", "who", "will", "would", "who", "how", "if", "is", "could", "might", "does", "are", "have"];
|
|
43244
|
+
RiTa.STOP_WORDS = ["and", "a", "of", "in", "i", "you", "is", "to", "that", "it", "for", "on", "have", "with", "this", "be", "not", "are", "as", "was", "but", "or", "from", "my", "at", "if", "they", "your", "all", "he", "by", "one", "me", "what", "so", "can", "will", "do", "an", "about", "we", "just", "would", "there", "no", "like", "out", "his", "has", "up", "more", "who", "when", "don't", "some", "had", "them", "any", "their", "it's", "only", "which", "i'm", "been", "other", "were", "how", "then", "now", "her", "than", "she", "well", "also", "us", "very", "because", "am", "here", "could", "even", "him", "into", "our", "much", "too", "did", "should", "over", "want", "these", "may", "where", "most", "many", "those", "does", "why", "please", "off", "going", "its", "i've", "down", "that's", "can't", "you're", "didn't", "another", "around", "must", "few", "doesn't", "the", "every", "yes", "each", "maybe", "i'll", "away", "doing", "oh", "else", "isn't", "he's", "there's", "hi", "won't", "ok", "they're", "yeah", "mine", "we're", "what's", "shall", "she's", "hello", "okay", "here's", "less", "didn't", "said", "over", "this", "that", "just", "then", "under", "some"];
|
|
43245
|
+
RiTa.MASS_NOUNS = ["abalone", "asbestos", "barracks", "bathos", "breeches", "beef", "britches", "chaos", "chinese", "cognoscenti", "clippers", "corps", "cosmos", "crossroads", "diabetes", "ethos", "gallows", "graffiti", "herpes", "innings", "lens", "means", "measles", "mews", "mumps", "news", "pasta", "pathos", "pincers", "pliers", "proceedings", "rabies", "rhinoceros", "sassafras", "scissors", "series", "shears", "species", "tuna", "acoustics", "aesthetics", "aquatics", "basics", "ceramics", "classics", "cosmetics", "dialectics", "deer", "dynamics", "ethics", "harmonics", "heroics", "mechanics", "metrics", "ooze", "optics", "physics", "polemics", "pyrotechnics", "statistics", "tactics", "tropics", "bengalese", "bengali", "bonsai", "booze", "cellulose", "mess", "moose", "burmese", "chinese", "colossus", "congolese", "discus", "electrolysis", "emphasis", "expertise", "flu", "fructose", "gauze", "glucose", "grease", "guyanese", "haze", "incense", "japanese", "lebanese", "malaise", "mayonnaise", "maltese", "music", "money", "menopause", "merchandise", "olympics", "overuse", "paradise", "poise", "potash", "portuguese", "prose", "recompense", "remorse", "repose", "senegalese", "siamese", "singhalese", "sleaze", "sioux", "sudanese", "suspense", "swiss", "taiwanese", "vietnamese", "unease", "aircraft", "anise", "antifreeze", "applause", "archdiocese", "apparatus", "asparagus", "bellows", "bison", "bluefish", "bourgeois", "bream", "brill", "butterfingers", "cargo", "carp", "catfish", "chassis", "clone", "clones", "clothes", "chub", "cod", "codfish", "coley", "contretemps", "crawfish", "crayfish", "cuttlefish", "dice", "dogfish", "doings", "dory", "downstairs", "eldest", "earnings", "economics", "electronics", "firstborn", "fish", "flatfish", "flounder", "fowl", "fry", "fries", "works", "goldfish", "golf", "grand", "grief", "haddock", "hake", "halibut", "headquarters", "herring", "hertz", "honey", "horsepower", "goods", "hovercraft", "ironworks", "kilohertz", "ling", "shrimp", "swine", "lungfish", "mackerel", "macaroni", "megahertz", "moorfowl", "moorgame", "mullet", "nepalese", "offspring", "pants", "patois", "pekinese", "perch", "pickerel", "pike", "potpourri", "precis", "quid", "rand", "rendezvous", "roach", "salmon", "samurai", "seychelles", "shad", "sheep", "shellfish", "smelt", "spaghetti", "spacecraft", "starfish", "stockfish", "sunfish", "superficies", "sweepstakes", "smallpox", "swordfish", "tennis", "tobacco", "triceps", "trout", "tunafish", "turbot", "trousers", "turf", "dibs", "undersigned", "waterfowl", "waterworks", "waxworks", "wildfowl", "woodworm", "yen", "aries", "pisces", "forceps", "jeans", "mathematics", "odds", "politics", "remains", "aids", "wildlife", "shall", "would", "may", "might", "ought", "should", "acne", "admiration", "advice", "air", "anger", "anticipation", "assistance", "awareness", "bacon", "baggage", "blood", "bravery", "chess", "clay", "clothing", "coal", "compliance", "comprehension", "confusion", "consciousness", "cream", "darkness", "diligence", "dust", "education", "empathy", "enthusiasm", "envy", "equality", "equipment", "evidence", "feedback", "fitness", "flattery", "foliage", "fun", "furniture", "garbage", "gold", "gossip", "grammar", "gratitude", "gravel", "guilt", "happiness", "hardware", "hate", "hay", "health", "heat", "help", "hesitation", "homework", "honesty", "honor", "honour", "hospitality", "hostility", "humanity", "humility", "ice", "immortality", "independence", "information", "integrity", "intimidation", "jargon", "jealousy", "jewelry", "justice", "knowledge", "literacy", "logic", "luck", "lumber", "luggage", "mail", "management", "milk", "morale", "mud", "nonsense", "oppression", "optimism", "oxygen", "participation", "pay", "peace", "perseverance", "pessimism", "pneumonia", "poetry", "police", "pride", "privacy", "propaganda", "public", "punctuation", "recovery", "rice", "rust", "satisfaction", "schnapps", "shame", "slang", "software", "stamina", "starvation", "steam", "steel", "stuff", "support", "sweat", "thunder", "timber", "toil", "traffic", "tongs", "training", "trash", "valor", "vehemence", "violence", "warmth", "waste", "weather", "wheat", "wisdom", "work", "accommodation", "advertising", "aid", "art", "bread", "business", "butter", "calm", "cash", "cheese", "childhood", "clothing ", "coffee", "content", "corruption", "courage", "currency", "damage", "danger", "determination", "electricity", "employment", "energy", "entertainment", "failure", "fame", "fire", "flour", "food", "freedom", "friendship", "fuel", "genetics", "hair", "harm", "hospitality ", "housework", "humour", "imagination", "importance", "innocence", "intelligence", "juice", "kindness", "labour", "lack", "laughter", "leisure", "literature", "litter", "love", "magic", "metal", "motherhood", "motivation", "nature", "nutrition", "obesity", "oil", "old age", "paper", "patience", "permission", "pollution", "poverty", "power", "production", "progress", "pronunciation", "publicity", "quality", "quantity", "racism", "rain", "relaxation", "research", "respect", "room (space)", "rubbish", "safety", "salt", "sand", "seafood", "shopping", "silence", "smoke", "snow", "soup", "speed", "spelling", "stress ", "sugar", "sunshine", "tea", "time", "tolerance", "trade", "transportation", "travel", "trust", "understanding", "unemployment", "usage", "vision", "water", "wealth", "weight", "welfare", "width", "wood", "yoga", "youth", "homecare", "childcare", "fanfare", "healthcare", "medicare"];
|
|
43246
|
+
RiTa.INFINITIVE = 1;
|
|
43247
|
+
RiTa.GERUND = 2;
|
|
43248
|
+
RiTa.SPLIT_CONTRACTIONS = false;
|
|
42651
43249
|
var ONLY_PUNCT = /^[\p{P}|\+|-|<|>|\^|\$|\ufffd|`]*$/u;
|
|
42652
43250
|
var IS_LETTER = /^[a-z\u00C0-\u00ff]+$/;
|
|
42653
|
-
|
|
42654
|
-
|
|
42655
|
-
RiTa2.riscript = new (0, _riscript.RiScript)({ RiTa: RiTa2 });
|
|
43251
|
+
RiTa.riscript = new (0, _riscript.RiScript)({ RiTa });
|
|
43252
|
+
|
|
42656
43253
|
|
|
42657
43254
|
|
|
42658
|
-
exports.RiTa =
|
|
43255
|
+
exports.RiMarkov = markov_default; exports.RiTa = RiTa;
|
|
42659
43256
|
//# sourceMappingURL=rita.cjs.map
|