rita 3.0.21 → 3.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/rita.cjs +865 -346
- package/dist/rita.cjs.map +1 -1
- package/dist/rita.js +864 -345
- package/dist/rita.js.map +1 -1
- package/dist/rita.min.js +86 -86
- package/dist/rita.min.js.map +1 -1
- package/package.json +4 -3
package/dist/rita.cjs
CHANGED
|
@@ -796,9 +796,9 @@ var _Stemmer = class _Stemmer {
|
|
|
796
796
|
if (!input.includes(" ")) {
|
|
797
797
|
return _Stemmer.stemEnglish(input);
|
|
798
798
|
}
|
|
799
|
-
const words = _Stemmer.
|
|
799
|
+
const words = _Stemmer.tokenizer.tokenize(input);
|
|
800
800
|
const stems = _Stemmer.stemAll(words);
|
|
801
|
-
return _Stemmer.
|
|
801
|
+
return _Stemmer.tokenizer.untokenize(stems);
|
|
802
802
|
}
|
|
803
803
|
static stemAll(input) {
|
|
804
804
|
return input.map((i) => _Stemmer.stemEnglish(i));
|
|
@@ -830,6 +830,7 @@ var _Stemmer = class _Stemmer {
|
|
|
830
830
|
return _Stemmer.impl.getCurrent();
|
|
831
831
|
}
|
|
832
832
|
};
|
|
833
|
+
__publicField(_Stemmer, "tokenizer");
|
|
833
834
|
__publicField(_Stemmer, "impl", new SnowballStemmer());
|
|
834
835
|
var Stemmer = _Stemmer;
|
|
835
836
|
var stemmer_default = Stemmer;
|
|
@@ -840,7 +841,25 @@ var Tokenizer = class {
|
|
|
840
841
|
this.RiTa = parent;
|
|
841
842
|
this.splitter = /(\S.+?[.!?]["\u201D]?)(?=\s+|$)/g;
|
|
842
843
|
}
|
|
843
|
-
|
|
844
|
+
/**
|
|
845
|
+
* Returns an array containing all unique alphabetical words (tokens) in the text.
|
|
846
|
+
* Punctuation and case are ignored unless specified otherwise.
|
|
847
|
+
* @param {string} text - The text from which to extract the tokens
|
|
848
|
+
* @param {object} [opts] - The options
|
|
849
|
+
* @param {boolean} opts.caseSensitive=false - Whether to pay attention to case
|
|
850
|
+
* @param {boolean} opts.ignoreStopWords=false - Whether to ignore words like 'the', 'and', 'a', 'of', etc, as specified in RiTa.STOP_WORDS
|
|
851
|
+
* @param {boolean} opts.splitContractions=false - Whether to convert contractions (e.g., "I'd" or "she'll") into multiple individual tokens
|
|
852
|
+
* @param {boolean} opts.includePunct=false - Whether to include punctuation in the results
|
|
853
|
+
* @param {boolean} opts.sort=false - Whether to sort the tokens before returning them
|
|
854
|
+
* @returns {string[]} Array of tokens
|
|
855
|
+
*/
|
|
856
|
+
tokens(text, opts = {
|
|
857
|
+
caseSensitive: false,
|
|
858
|
+
ignoreStopWords: false,
|
|
859
|
+
splitContractions: false,
|
|
860
|
+
includePunct: false,
|
|
861
|
+
sort: false
|
|
862
|
+
}) {
|
|
844
863
|
let words = this.tokenize(text, opts), map = {};
|
|
845
864
|
words.forEach((w) => {
|
|
846
865
|
if (!opts.caseSensitive)
|
|
@@ -853,44 +872,20 @@ var Tokenizer = class {
|
|
|
853
872
|
tokens = tokens.filter((t) => !this.RiTa.isStopWord(t));
|
|
854
873
|
return opts.sort ? tokens.sort() : tokens;
|
|
855
874
|
}
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
let re = new RegExp(delim, "g");
|
|
862
|
-
let pattern = regex2 || this.splitter;
|
|
863
|
-
let unescapeAbbrevs = (arr2) => {
|
|
864
|
-
for (let i = 0; i < arr2.length; i++) {
|
|
865
|
-
arr2[i] = arr2[i].replace(re, ".");
|
|
866
|
-
}
|
|
867
|
-
return arr2;
|
|
868
|
-
};
|
|
869
|
-
let escapeAbbrevs = (text2) => {
|
|
870
|
-
let abbrevs = this.RiTa.ABRV;
|
|
871
|
-
for (let i = 0; i < abbrevs.length; i++) {
|
|
872
|
-
let abv = abbrevs[i];
|
|
873
|
-
let idx = text2.indexOf(abv);
|
|
874
|
-
while (idx > -1) {
|
|
875
|
-
text2 = text2.replace(abv, abv.replace(".", delim));
|
|
876
|
-
idx = text2.indexOf(abv);
|
|
877
|
-
}
|
|
878
|
-
}
|
|
879
|
-
return text2;
|
|
880
|
-
};
|
|
881
|
-
let arr = escapeAbbrevs(clean).match(pattern);
|
|
882
|
-
return arr && arr.length ? unescapeAbbrevs(arr) : [text];
|
|
883
|
-
}
|
|
884
|
-
tokenize(input, opts = {}) {
|
|
875
|
+
tokenize(input, opts = {
|
|
876
|
+
// regex: null,
|
|
877
|
+
// splitHyphens: false,
|
|
878
|
+
// splitContractions: false
|
|
879
|
+
}) {
|
|
885
880
|
if (typeof input !== "string")
|
|
886
881
|
return [];
|
|
887
882
|
if (opts.regex)
|
|
888
|
-
return input.split(regex);
|
|
883
|
+
return input.split(opts.regex);
|
|
889
884
|
let { tags, text } = this.pushTags(input.trim());
|
|
890
885
|
for (let i = 0; i < TOKENIZE_RE.length; i += 2) {
|
|
891
886
|
text = text.replace(TOKENIZE_RE[i], TOKENIZE_RE[i + 1]);
|
|
892
887
|
}
|
|
893
|
-
if (opts.
|
|
888
|
+
if (opts.splitHyphens) {
|
|
894
889
|
text = text.replace(/([a-zA-Z]+)-([a-zA-Z]+)/g, "$1 - $2");
|
|
895
890
|
}
|
|
896
891
|
if (this.RiTa.SPLIT_CONTRACTIONS || opts.splitContractions) {
|
|
@@ -901,11 +896,10 @@ var Tokenizer = class {
|
|
|
901
896
|
let result = this.popTags(text.trim().split(WS_RE), tags);
|
|
902
897
|
return result;
|
|
903
898
|
}
|
|
904
|
-
untokenize(arr, delim) {
|
|
899
|
+
untokenize(arr, delim = " ") {
|
|
905
900
|
if (!arr || !Array.isArray(arr))
|
|
906
901
|
return "";
|
|
907
902
|
arr = this.preProcessTags(arr);
|
|
908
|
-
delim = delim || " ";
|
|
909
903
|
let nextNoSpace = false, afterQuote = false, midSentence = false;
|
|
910
904
|
let withinQuote = arr.length && QUOTE_RE.test(arr[0]);
|
|
911
905
|
let result = arr[0] || "";
|
|
@@ -966,6 +960,40 @@ var Tokenizer = class {
|
|
|
966
960
|
}
|
|
967
961
|
return result.trim();
|
|
968
962
|
}
|
|
963
|
+
/**
|
|
964
|
+
* Split the input text into sentences according to the options
|
|
965
|
+
* @param {string} text - The text to split
|
|
966
|
+
* @param {(string|RegExp)} [regex] - An optional custom regex to split on
|
|
967
|
+
* @returns {string[]} An array of sentences
|
|
968
|
+
*/
|
|
969
|
+
sentences(text, regex) {
|
|
970
|
+
if (!text || !text.length)
|
|
971
|
+
return [text];
|
|
972
|
+
let clean = text.replace(NL_RE, " ");
|
|
973
|
+
let delim = "___";
|
|
974
|
+
let re = new RegExp(delim, "g");
|
|
975
|
+
let pattern = regex || this.splitter;
|
|
976
|
+
let unescapeAbbrevs = (arr2) => {
|
|
977
|
+
for (let i = 0; i < arr2.length; i++) {
|
|
978
|
+
arr2[i] = arr2[i].replace(re, ".");
|
|
979
|
+
}
|
|
980
|
+
return arr2;
|
|
981
|
+
};
|
|
982
|
+
let escapeAbbrevs = (text2) => {
|
|
983
|
+
let abbrevs = this.RiTa.ABRV;
|
|
984
|
+
for (let i = 0; i < abbrevs.length; i++) {
|
|
985
|
+
let abv = abbrevs[i];
|
|
986
|
+
let idx = text2.indexOf(abv);
|
|
987
|
+
while (idx > -1) {
|
|
988
|
+
text2 = text2.replace(abv, abv.replace(".", delim));
|
|
989
|
+
idx = text2.indexOf(abv);
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
return text2;
|
|
993
|
+
};
|
|
994
|
+
let arr = escapeAbbrevs(clean).match(pattern);
|
|
995
|
+
return _optionalChain([arr, 'optionalAccess', _2 => _2.length]) ? unescapeAbbrevs(arr) : [text];
|
|
996
|
+
}
|
|
969
997
|
pushTags(text) {
|
|
970
998
|
let tags = [], tagIdx = 0;
|
|
971
999
|
while (TAG_RE.test(text)) {
|
|
@@ -1335,6 +1363,7 @@ var TOKENIZE_RE = [
|
|
|
1335
1363
|
// RS
|
|
1336
1364
|
];
|
|
1337
1365
|
var CONTRACTS_RE = [
|
|
1366
|
+
// TODO: 'She'd have wanted' -> 'She would have wanted'
|
|
1338
1367
|
/([Cc])an['\u2019]t/g,
|
|
1339
1368
|
"$1an not",
|
|
1340
1369
|
/([Dd])idn['\u2019]t/g,
|
|
@@ -1348,7 +1377,8 @@ var CONTRACTS_RE = [
|
|
|
1348
1377
|
/([tT]hat)['\u2019]s/g,
|
|
1349
1378
|
"$1 is",
|
|
1350
1379
|
/(she|he|you|they|i)['\u2019]d/gi,
|
|
1351
|
-
"$1
|
|
1380
|
+
"$1 had",
|
|
1381
|
+
// changed from would, 12/8/23
|
|
1352
1382
|
/(she|he|you|they|i)['\u2019]ll/gi,
|
|
1353
1383
|
"$1 will",
|
|
1354
1384
|
/n['\u2019]t /g,
|
|
@@ -1364,9 +1394,9 @@ var tokenizer_default = Tokenizer;
|
|
|
1364
1394
|
|
|
1365
1395
|
// src/conjugator.js
|
|
1366
1396
|
var RegularExpression = class {
|
|
1367
|
-
constructor(
|
|
1368
|
-
this.raw =
|
|
1369
|
-
this.regex = new RegExp(
|
|
1397
|
+
constructor(regex, offset, suffix) {
|
|
1398
|
+
this.raw = regex;
|
|
1399
|
+
this.regex = new RegExp(regex);
|
|
1370
1400
|
this.offset = offset;
|
|
1371
1401
|
this.suffix = suffix || "";
|
|
1372
1402
|
}
|
|
@@ -1383,7 +1413,7 @@ var RegularExpression = class {
|
|
|
1383
1413
|
return "/" + this.raw + "/";
|
|
1384
1414
|
}
|
|
1385
1415
|
};
|
|
1386
|
-
var RE = (a, b, c) => new RegularExpression(a, b, c);
|
|
1416
|
+
var RE = (a, b, c, _) => new RegularExpression(a, b, c);
|
|
1387
1417
|
var Conjugator = class {
|
|
1388
1418
|
constructor(parent) {
|
|
1389
1419
|
__publicField(this, "_handleStem", function(word) {
|
|
@@ -1432,11 +1462,11 @@ var Conjugator = class {
|
|
|
1432
1462
|
}
|
|
1433
1463
|
args = this._parseArgs(args);
|
|
1434
1464
|
let frontVG = TO_BE.includes(verb) ? "be" : this._handleStem(verb);
|
|
1435
|
-
let actualModal, verbForm, conjs = [],
|
|
1436
|
-
if (this.form ===
|
|
1465
|
+
let actualModal, verbForm, conjs = [], RiTa2 = this.RiTa;
|
|
1466
|
+
if (this.form === RiTa2.INFINITIVE) {
|
|
1437
1467
|
actualModal = "to";
|
|
1438
1468
|
}
|
|
1439
|
-
if (this.tense ===
|
|
1469
|
+
if (this.tense === RiTa2.FUTURE) {
|
|
1440
1470
|
actualModal = "will";
|
|
1441
1471
|
}
|
|
1442
1472
|
if (this.passive) {
|
|
@@ -1456,7 +1486,7 @@ var Conjugator = class {
|
|
|
1456
1486
|
frontVG = null;
|
|
1457
1487
|
}
|
|
1458
1488
|
if (frontVG) {
|
|
1459
|
-
if (this.form ===
|
|
1489
|
+
if (this.form === RiTa2.GERUND) {
|
|
1460
1490
|
conjs.push(this.presentPart(frontVG));
|
|
1461
1491
|
} else if (this.interrogative && frontVG != "be" && conjs.length < 1) {
|
|
1462
1492
|
conjs.push(frontVG);
|
|
@@ -1575,19 +1605,19 @@ var Conjugator = class {
|
|
|
1575
1605
|
}
|
|
1576
1606
|
_parseArgs(args) {
|
|
1577
1607
|
this._reset();
|
|
1578
|
-
const
|
|
1608
|
+
const RiTa2 = this.RiTa;
|
|
1579
1609
|
if (typeof args === "string") {
|
|
1580
1610
|
if (/^[123][SP](Pr|Pa|Fu)$/.test(args)) {
|
|
1581
1611
|
let opts = {};
|
|
1582
1612
|
opts.person = parseInt(args[0]);
|
|
1583
|
-
opts.number = args[1] === "S" ?
|
|
1613
|
+
opts.number = args[1] === "S" ? RiTa2.SINGULAR : RiTa2.PLURAL;
|
|
1584
1614
|
let tense = args.substr(2);
|
|
1585
1615
|
if (tense === "Pr")
|
|
1586
|
-
opts.tense =
|
|
1616
|
+
opts.tense = RiTa2.PRESENT;
|
|
1587
1617
|
if (tense === "Fu")
|
|
1588
|
-
opts.tense =
|
|
1618
|
+
opts.tense = RiTa2.FUTURE;
|
|
1589
1619
|
if (tense === "Pa")
|
|
1590
|
-
opts.tense =
|
|
1620
|
+
opts.tense = RiTa2.PAST;
|
|
1591
1621
|
args = opts;
|
|
1592
1622
|
} else {
|
|
1593
1623
|
throw Error("Invalid args: " + args);
|
|
@@ -1678,20 +1708,20 @@ var Conjugator = class {
|
|
|
1678
1708
|
return false;
|
|
1679
1709
|
}
|
|
1680
1710
|
_pastTense(theVerb, pers, numb) {
|
|
1681
|
-
const
|
|
1711
|
+
const RiTa2 = this.RiTa;
|
|
1682
1712
|
if (theVerb.toLowerCase() === "be") {
|
|
1683
1713
|
switch (numb) {
|
|
1684
|
-
case
|
|
1714
|
+
case RiTa2.SINGULAR:
|
|
1685
1715
|
switch (pers) {
|
|
1686
|
-
case
|
|
1716
|
+
case RiTa2.FIRST:
|
|
1687
1717
|
break;
|
|
1688
|
-
case
|
|
1718
|
+
case RiTa2.THIRD:
|
|
1689
1719
|
return "was";
|
|
1690
|
-
case
|
|
1720
|
+
case RiTa2.SECOND:
|
|
1691
1721
|
return "were";
|
|
1692
1722
|
}
|
|
1693
1723
|
break;
|
|
1694
|
-
case
|
|
1724
|
+
case RiTa2.PLURAL:
|
|
1695
1725
|
return "were";
|
|
1696
1726
|
}
|
|
1697
1727
|
}
|
|
@@ -1700,17 +1730,17 @@ var Conjugator = class {
|
|
|
1700
1730
|
_presentTense(theVerb, person, number) {
|
|
1701
1731
|
person = person || this.person;
|
|
1702
1732
|
number = number || this.number;
|
|
1703
|
-
const
|
|
1704
|
-
if (person ===
|
|
1733
|
+
const RiTa2 = this.RiTa;
|
|
1734
|
+
if (person === RiTa2.THIRD && number === RiTa2.SINGULAR) {
|
|
1705
1735
|
return this._checkRules(PRESENT_RULESET, theVerb);
|
|
1706
1736
|
} else if (theVerb === "be") {
|
|
1707
|
-
if (number ===
|
|
1737
|
+
if (number === RiTa2.SINGULAR) {
|
|
1708
1738
|
switch (person) {
|
|
1709
|
-
case
|
|
1739
|
+
case RiTa2.FIRST:
|
|
1710
1740
|
return "am";
|
|
1711
|
-
case
|
|
1741
|
+
case RiTa2.SECOND:
|
|
1712
1742
|
return "are";
|
|
1713
|
-
case
|
|
1743
|
+
case RiTa2.THIRD:
|
|
1714
1744
|
return "is";
|
|
1715
1745
|
}
|
|
1716
1746
|
} else {
|
|
@@ -4787,13 +4817,13 @@ var Util = class _Util {
|
|
|
4787
4817
|
return !isNaN(parseFloat(n)) && isFinite(n);
|
|
4788
4818
|
}
|
|
4789
4819
|
static numOpt(opts, name, def = 0) {
|
|
4790
|
-
return _Util.isNum(_optionalChain([opts, 'optionalAccess',
|
|
4820
|
+
return _Util.isNum(_optionalChain([opts, 'optionalAccess', _3 => _3[name]])) ? opts[name] : def;
|
|
4791
4821
|
}
|
|
4792
4822
|
};
|
|
4793
4823
|
var RE2 = class {
|
|
4794
|
-
constructor(
|
|
4795
|
-
this.raw =
|
|
4796
|
-
this.regex = new RegExp(
|
|
4824
|
+
constructor(regex, offset, suffix) {
|
|
4825
|
+
this.raw = regex;
|
|
4826
|
+
this.regex = new RegExp(regex);
|
|
4797
4827
|
this.offset = offset;
|
|
4798
4828
|
this.suffix = suffix || "";
|
|
4799
4829
|
}
|
|
@@ -27069,20 +27099,25 @@ var Lexicon = class {
|
|
|
27069
27099
|
async soundsLike(word, options = {}) {
|
|
27070
27100
|
return this._promise(this.soundsLikeSync, [word, options]);
|
|
27071
27101
|
}
|
|
27102
|
+
/**
|
|
27103
|
+
* A synchronous version of RiTa.lexicon.soundsLike().
|
|
27104
|
+
* @param {string} word
|
|
27105
|
+
* @param {object} [opts]
|
|
27106
|
+
* @returns {string[]} An array of words that sound like the input word
|
|
27107
|
+
*/
|
|
27072
27108
|
soundsLikeSync(word, opts = {}) {
|
|
27073
27109
|
if (!word || !word.length)
|
|
27074
27110
|
return [];
|
|
27075
|
-
opts.type
|
|
27076
|
-
return opts.matchSpelling ? this._bySoundAndLetter(word, opts) : this._byTypeSync(word, opts);
|
|
27111
|
+
return opts.matchSpelling ? this._bySoundAndLetterSync(word, opts) : this._byTypeSync(word, { ...opts, type: "sound" });
|
|
27077
27112
|
}
|
|
27078
|
-
randomWord(
|
|
27079
|
-
if (!
|
|
27113
|
+
randomWord(pattern, opts) {
|
|
27114
|
+
if (!pattern && !opts) {
|
|
27080
27115
|
return this.RiTa.random(Object.keys(this.data));
|
|
27081
27116
|
}
|
|
27082
|
-
if (!(
|
|
27083
|
-
if (typeof
|
|
27084
|
-
opts =
|
|
27085
|
-
|
|
27117
|
+
if (!(pattern instanceof RegExp)) {
|
|
27118
|
+
if (typeof pattern === "object" && !opts) {
|
|
27119
|
+
opts = pattern;
|
|
27120
|
+
pattern = void 0;
|
|
27086
27121
|
}
|
|
27087
27122
|
}
|
|
27088
27123
|
opts = opts || {};
|
|
@@ -27090,14 +27125,14 @@ var Lexicon = class {
|
|
|
27090
27125
|
opts.shuffle = true;
|
|
27091
27126
|
opts.strictPos = true;
|
|
27092
27127
|
opts.minLength = util_default.numOpt(opts, "minLength", 4);
|
|
27093
|
-
let result = this.searchSync(
|
|
27128
|
+
let result = this.searchSync(pattern, opts);
|
|
27094
27129
|
if (result.length < 1 && opts.hasOwnProperty("pos")) {
|
|
27095
27130
|
opts.strictPos = false;
|
|
27096
|
-
result = this.searchSync(
|
|
27131
|
+
result = this.searchSync(pattern, opts);
|
|
27097
27132
|
}
|
|
27098
27133
|
if (result.length < 1) {
|
|
27099
27134
|
["strictPos", "shuffle", "targetPos"].forEach((k) => delete opts[k]);
|
|
27100
|
-
throw Error("No words matching constraints:\n" + JSON.stringify(opts, 0, 2));
|
|
27135
|
+
throw Error("No words matching constraints:\n" + JSON.stringify(opts, void 0, 2));
|
|
27101
27136
|
}
|
|
27102
27137
|
return result[0];
|
|
27103
27138
|
}
|
|
@@ -27108,7 +27143,7 @@ var Lexicon = class {
|
|
|
27108
27143
|
let words = Object.keys(this.data);
|
|
27109
27144
|
if (!pattern && !options)
|
|
27110
27145
|
return words;
|
|
27111
|
-
let { regex
|
|
27146
|
+
let { regex, opts } = this._parseRegex(pattern, options);
|
|
27112
27147
|
this._parseArgs(opts);
|
|
27113
27148
|
if (opts.shuffle)
|
|
27114
27149
|
words = this.RiTa.randomizer.shuffle(words);
|
|
@@ -27124,7 +27159,7 @@ var Lexicon = class {
|
|
|
27124
27159
|
if (word !== words[i])
|
|
27125
27160
|
data = this.data[word];
|
|
27126
27161
|
}
|
|
27127
|
-
if (!
|
|
27162
|
+
if (!regex || this._regexMatch(word, data, regex, opts.type)) {
|
|
27128
27163
|
result.push(word);
|
|
27129
27164
|
if (result.length === opts.limit)
|
|
27130
27165
|
break;
|
|
@@ -27260,30 +27295,37 @@ var Lexicon = class {
|
|
|
27260
27295
|
opts.targetPos = tpos;
|
|
27261
27296
|
}
|
|
27262
27297
|
_reconjugate(word, pos) {
|
|
27263
|
-
const
|
|
27298
|
+
const RiTa2 = this.RiTa;
|
|
27264
27299
|
switch (pos) {
|
|
27265
27300
|
case "vbd":
|
|
27266
|
-
return
|
|
27267
|
-
number:
|
|
27268
|
-
person:
|
|
27269
|
-
tense:
|
|
27301
|
+
return RiTa2.conjugate(word, {
|
|
27302
|
+
number: RiTa2.SINGULAR,
|
|
27303
|
+
person: RiTa2.FIRST,
|
|
27304
|
+
tense: RiTa2.PAST
|
|
27270
27305
|
});
|
|
27271
27306
|
case "vbg":
|
|
27272
|
-
return
|
|
27307
|
+
return RiTa2.presentPart(word);
|
|
27273
27308
|
case "vbn":
|
|
27274
|
-
return
|
|
27309
|
+
return RiTa2.pastPart(word);
|
|
27275
27310
|
case "vbp":
|
|
27276
27311
|
return word;
|
|
27277
27312
|
case "vbz":
|
|
27278
|
-
return
|
|
27279
|
-
number:
|
|
27280
|
-
person:
|
|
27281
|
-
tense:
|
|
27313
|
+
return RiTa2.conjugate(word, {
|
|
27314
|
+
number: RiTa2.SINGULAR,
|
|
27315
|
+
person: RiTa2.THIRD,
|
|
27316
|
+
tense: RiTa2.PRESENT
|
|
27282
27317
|
});
|
|
27283
27318
|
default:
|
|
27284
27319
|
throw Error("Unexpected pos: " + pos);
|
|
27285
27320
|
}
|
|
27286
27321
|
}
|
|
27322
|
+
_bySoundAndLetterSync(word, opts) {
|
|
27323
|
+
let bySound = this._byTypeSync(word, { ...opts, type: "sound" });
|
|
27324
|
+
let byLetter = this._byTypeSync(word, { ...opts, type: "letter" });
|
|
27325
|
+
if (bySound.length < 1 || byLetter.length < 1)
|
|
27326
|
+
return [];
|
|
27327
|
+
return this._intersect(bySound, byLetter).slice(0, opts.limit);
|
|
27328
|
+
}
|
|
27287
27329
|
async _bySoundAndLetter(word, opts) {
|
|
27288
27330
|
let types = ["sound", "letter"];
|
|
27289
27331
|
let promises = types.map((type) => this._promise(this._byTypeSync, [word, { ...opts, type }]));
|
|
@@ -27307,7 +27349,8 @@ var Lexicon = class {
|
|
|
27307
27349
|
// med for 2 strings (or 2 arrays)
|
|
27308
27350
|
minEditDist(source, target) {
|
|
27309
27351
|
let cost;
|
|
27310
|
-
let i, j
|
|
27352
|
+
let i, j;
|
|
27353
|
+
let matrix = [];
|
|
27311
27354
|
let sI;
|
|
27312
27355
|
let tJ;
|
|
27313
27356
|
for (i = 0; i <= source.length; i++) {
|
|
@@ -27331,8 +27374,8 @@ var Lexicon = class {
|
|
|
27331
27374
|
}
|
|
27332
27375
|
return matrix[source.length][target.length];
|
|
27333
27376
|
}
|
|
27334
|
-
isMassNoun(w
|
|
27335
|
-
return w.endsWith("ness") || w.endsWith("ism") ||
|
|
27377
|
+
isMassNoun(w) {
|
|
27378
|
+
return w.endsWith("ness") || w.endsWith("ism") || this.RiTa.MASS_NOUNS.includes(w);
|
|
27336
27379
|
}
|
|
27337
27380
|
// helpers ---------------------------------------------------------------
|
|
27338
27381
|
_promise(fun, args) {
|
|
@@ -27344,44 +27387,44 @@ var Lexicon = class {
|
|
|
27344
27387
|
}
|
|
27345
27388
|
});
|
|
27346
27389
|
}
|
|
27347
|
-
_parseRegex(
|
|
27348
|
-
if (typeof
|
|
27390
|
+
_parseRegex(regex, opts) {
|
|
27391
|
+
if (typeof regex === "string") {
|
|
27349
27392
|
if (opts && opts.type === "stresses") {
|
|
27350
|
-
if (/^\^?[01]+\$?$/.test(
|
|
27351
|
-
|
|
27393
|
+
if (/^\^?[01]+\$?$/.test(regex)) {
|
|
27394
|
+
regex = regex.replace(/([01])(?=([01]))/g, "$1/");
|
|
27352
27395
|
}
|
|
27353
27396
|
}
|
|
27354
|
-
|
|
27355
|
-
} else if (
|
|
27356
|
-
} else if (typeof
|
|
27397
|
+
regex = new RegExp(regex);
|
|
27398
|
+
} else if (regex instanceof RegExp) {
|
|
27399
|
+
} else if (typeof regex === "object" || regex === void 0 && typeof opts === "object") {
|
|
27357
27400
|
if (!opts) {
|
|
27358
|
-
opts =
|
|
27401
|
+
opts = regex;
|
|
27359
27402
|
}
|
|
27360
|
-
|
|
27361
|
-
if (typeof
|
|
27403
|
+
regex = opts.regex;
|
|
27404
|
+
if (typeof regex === "string") {
|
|
27362
27405
|
if (opts && opts.type === "stresses") {
|
|
27363
|
-
if (/^\^?[01]+\$?$/.test(
|
|
27364
|
-
|
|
27406
|
+
if (/^\^?[01]+\$?$/.test(regex)) {
|
|
27407
|
+
regex = regex.replace(/([01])(?=([01]))/g, "$1/");
|
|
27365
27408
|
}
|
|
27366
27409
|
}
|
|
27367
|
-
|
|
27410
|
+
regex = new RegExp(regex);
|
|
27368
27411
|
}
|
|
27369
27412
|
}
|
|
27370
|
-
return { regex
|
|
27413
|
+
return { regex, opts: opts || {} };
|
|
27371
27414
|
}
|
|
27372
|
-
_regexMatch(word, data,
|
|
27415
|
+
_regexMatch(word, data, regex, type) {
|
|
27373
27416
|
if (type === "stresses") {
|
|
27374
27417
|
let phones = data ? data[0] : this.rawPhones(word);
|
|
27375
27418
|
let stresses = this.analyzer.phonesToStress(phones);
|
|
27376
|
-
if (
|
|
27419
|
+
if (regex.test(stresses))
|
|
27377
27420
|
return true;
|
|
27378
27421
|
} else if (type === "phones") {
|
|
27379
27422
|
let phones = data ? data[0] : this.rawPhones(word);
|
|
27380
27423
|
phones = phones.replace(/1/g, "").replace(/ /g, "-");
|
|
27381
|
-
if (
|
|
27424
|
+
if (regex.test(phones))
|
|
27382
27425
|
return true;
|
|
27383
27426
|
} else {
|
|
27384
|
-
if (
|
|
27427
|
+
if (regex.test(word))
|
|
27385
27428
|
return true;
|
|
27386
27429
|
}
|
|
27387
27430
|
}
|
|
@@ -27471,6 +27514,10 @@ var lexicon_default = Lexicon;
|
|
|
27471
27514
|
|
|
27472
27515
|
// src/tagger.js
|
|
27473
27516
|
var Tagger = class {
|
|
27517
|
+
/**
|
|
27518
|
+
* Create a Tagger.
|
|
27519
|
+
* @param {any} parent - RiTa parent class.
|
|
27520
|
+
*/
|
|
27474
27521
|
constructor(parent) {
|
|
27475
27522
|
this.RiTa = parent;
|
|
27476
27523
|
}
|
|
@@ -27535,18 +27582,30 @@ var Tagger = class {
|
|
|
27535
27582
|
}
|
|
27536
27583
|
return [];
|
|
27537
27584
|
}
|
|
27538
|
-
|
|
27539
|
-
|
|
27540
|
-
|
|
27541
|
-
|
|
27542
|
-
|
|
27543
|
-
|
|
27544
|
-
|
|
27545
|
-
|
|
27546
|
-
|
|
27547
|
-
|
|
27548
|
-
|
|
27549
|
-
|
|
27585
|
+
/**
|
|
27586
|
+
* Tags an array of words with their part-of-speech
|
|
27587
|
+
* @param {(string|string[])} input - The input containing a word or words
|
|
27588
|
+
* @param {object} [opts] - options for the tagging {inline, simple}
|
|
27589
|
+
* @param {boolean} [opts.inline] - tags are returned inline with words
|
|
27590
|
+
* @param {boolean} [opts.simple] - use simple tags (noun=n,verb=v,adverb=a,adjective=r)
|
|
27591
|
+
* @returns {any} the pos tag(s) or string with tags inline
|
|
27592
|
+
*/
|
|
27593
|
+
tag(input, opts = {
|
|
27594
|
+
inline: false,
|
|
27595
|
+
simple: false
|
|
27596
|
+
}) {
|
|
27597
|
+
let result = [], choices2d = [];
|
|
27598
|
+
let dbug = _optionalChain([opts, 'optionalAccess', _4 => _4.dbug]) || false;
|
|
27599
|
+
if (!input || !input.length)
|
|
27600
|
+
return opts.inline ? "" : [];
|
|
27601
|
+
let words;
|
|
27602
|
+
if (!Array.isArray(input)) {
|
|
27603
|
+
if (!input.trim().length) {
|
|
27604
|
+
return opts.inline ? "" : [];
|
|
27605
|
+
}
|
|
27606
|
+
words = this.RiTa.tokenizer.tokenize(input);
|
|
27607
|
+
} else {
|
|
27608
|
+
words = input;
|
|
27550
27609
|
}
|
|
27551
27610
|
for (let i = 0, l = words.length; i < l; i++) {
|
|
27552
27611
|
let word = words[i];
|
|
@@ -27563,7 +27622,7 @@ var Tagger = class {
|
|
|
27563
27622
|
}
|
|
27564
27623
|
}
|
|
27565
27624
|
let tags = this._applyContext(words, result, choices2d, dbug);
|
|
27566
|
-
if (simple) {
|
|
27625
|
+
if (opts.simple) {
|
|
27567
27626
|
for (let i = 0; i < tags.length; i++) {
|
|
27568
27627
|
if (NOUNS.includes(tags[i]))
|
|
27569
27628
|
tags[i] = "n";
|
|
@@ -27577,7 +27636,7 @@ var Tagger = class {
|
|
|
27577
27636
|
tags[i] = "-";
|
|
27578
27637
|
}
|
|
27579
27638
|
}
|
|
27580
|
-
return inline ? this.inlineTags(words, tags) : tags;
|
|
27639
|
+
return opts.inline ? this.inlineTags(words, tags) : tags;
|
|
27581
27640
|
}
|
|
27582
27641
|
//////////////////////////////////////////////////////////////////
|
|
27583
27642
|
_isNoLexIrregularVerb(stem) {
|
|
@@ -27719,7 +27778,14 @@ var Tagger = class {
|
|
|
27719
27778
|
console.log("\n Custom(" + i + ") tagged '" + frm + "' -> '" + to + "'\n\n");
|
|
27720
27779
|
}
|
|
27721
27780
|
// debug only: not available in built version since 'dbug' in tag() is 0
|
|
27722
|
-
|
|
27781
|
+
/**
|
|
27782
|
+
* Applies a customized subset of the Brill transformations
|
|
27783
|
+
* @param {string[]} words
|
|
27784
|
+
* @param {string[]} result
|
|
27785
|
+
* @param {string[]} choices
|
|
27786
|
+
* @param {boolean} dbug
|
|
27787
|
+
* @returns
|
|
27788
|
+
*/
|
|
27723
27789
|
_applyContext(words, result, choices, dbug) {
|
|
27724
27790
|
for (let i = 0, l = words.length; i < l; i++) {
|
|
27725
27791
|
let word = words[i], tag = result[i];
|
|
@@ -27802,7 +27868,7 @@ var Tagger = class {
|
|
|
27802
27868
|
let idx = result.slice(i + 1).indexOf("nn");
|
|
27803
27869
|
let allJJ = true;
|
|
27804
27870
|
for (let k = 0; k < idx; k++) {
|
|
27805
|
-
if (
|
|
27871
|
+
if (result[i + 1 + k] !== "jj") {
|
|
27806
27872
|
allJJ = false;
|
|
27807
27873
|
break;
|
|
27808
27874
|
}
|
|
@@ -27948,7 +28014,7 @@ var Tagger = class {
|
|
|
27948
28014
|
for (let j = 0; j < tags.length; j++) {
|
|
27949
28015
|
if (pos === tags[j])
|
|
27950
28016
|
return true;
|
|
27951
|
-
if (pos === "n" && NOUNS.includes(tags[j]) || pos === "v" && VERBS.includes(tags[j]) || pos === "r" && ADVS.includes(tags[j]) || pos === "a" && ADJS.includes
|
|
28017
|
+
if (pos === "n" && NOUNS.includes(tags[j]) || pos === "v" && VERBS.includes(tags[j]) || pos === "r" && ADVS.includes(tags[j]) || pos === "a" && ADJS.includes(tags[j])) {
|
|
27952
28018
|
return true;
|
|
27953
28019
|
}
|
|
27954
28020
|
}
|
|
@@ -27996,7 +28062,7 @@ var Inflector = class {
|
|
|
27996
28062
|
if (!word.length)
|
|
27997
28063
|
return "";
|
|
27998
28064
|
let check = word.toLowerCase();
|
|
27999
|
-
if (this.RiTa.
|
|
28065
|
+
if (this.RiTa.lexicon.isMassNoun(check)) {
|
|
28000
28066
|
dbug && console.log(word + " hit MASS_NOUNS");
|
|
28001
28067
|
return word;
|
|
28002
28068
|
}
|
|
@@ -28011,11 +28077,6 @@ var Inflector = class {
|
|
|
28011
28077
|
return word;
|
|
28012
28078
|
}
|
|
28013
28079
|
singularize(word, opts) {
|
|
28014
|
-
if (this.isSingular(word, opts)) {
|
|
28015
|
-
if (opts && opts.debug)
|
|
28016
|
-
console.log("pluralize returning via isPlural()");
|
|
28017
|
-
return word;
|
|
28018
|
-
}
|
|
28019
28080
|
return this.adjustNumber(word, SING, opts && opts.dbug);
|
|
28020
28081
|
}
|
|
28021
28082
|
pluralize(word, opts) {
|
|
@@ -28026,30 +28087,30 @@ var Inflector = class {
|
|
|
28026
28087
|
}
|
|
28027
28088
|
return this.adjustNumber(word, PLUR, opts && opts.dbug);
|
|
28028
28089
|
}
|
|
28029
|
-
isSingular(word, opts) {
|
|
28030
|
-
|
|
28031
|
-
|
|
28032
|
-
throw Error(`isSingular() takes string`);
|
|
28033
|
-
|
|
28034
|
-
|
|
28035
|
-
|
|
28036
|
-
|
|
28037
|
-
|
|
28038
|
-
|
|
28039
|
-
|
|
28040
|
-
|
|
28041
|
-
|
|
28042
|
-
|
|
28043
|
-
|
|
28090
|
+
/*isSingular(word, opts) {
|
|
28091
|
+
// return false;
|
|
28092
|
+
// if (word && typeof word !== 'string') {
|
|
28093
|
+
// throw Error(`isSingular() takes string`);
|
|
28094
|
+
// }
|
|
28095
|
+
if (!word || !word.length) return false;
|
|
28096
|
+
|
|
28097
|
+
let dbug = opts && opts.dbug;
|
|
28098
|
+
|
|
28099
|
+
word = word.toLowerCase();
|
|
28100
|
+
|
|
28101
|
+
if (this.RiTa.MASS_NOUNS.includes(word)) {
|
|
28102
|
+
dbug && console.log(word + " is mass noun");
|
|
28103
|
+
return true;
|
|
28104
|
+
}
|
|
28105
|
+
|
|
28106
|
+
return NOUNS_ENDING_IN_S.includes(word);
|
|
28107
|
+
}*/
|
|
28044
28108
|
isPlural(word, opts) {
|
|
28045
|
-
if (word && typeof word !== "string") {
|
|
28046
|
-
throw Error(`isPlural() takes string`);
|
|
28047
|
-
}
|
|
28048
28109
|
if (!word || !word.length)
|
|
28049
28110
|
return false;
|
|
28050
28111
|
let dbug = opts && opts.dbug;
|
|
28051
28112
|
word = word.toLowerCase();
|
|
28052
|
-
if (this.RiTa.
|
|
28113
|
+
if (this.RiTa.lexicon.isMassNoun(word)) {
|
|
28053
28114
|
dbug && console.log(word + " is mass noun");
|
|
28054
28115
|
return true;
|
|
28055
28116
|
}
|
|
@@ -28075,13 +28136,9 @@ var Inflector = class {
|
|
|
28075
28136
|
dbug && console.log(word + ": latin rule -a to -ae");
|
|
28076
28137
|
return true;
|
|
28077
28138
|
}
|
|
28078
|
-
|
|
28079
|
-
|
|
28080
|
-
|
|
28081
|
-
dbug && console.log(word + "'s singular form " + sing + " is nn");
|
|
28082
|
-
return true;
|
|
28083
|
-
}
|
|
28084
|
-
} else {
|
|
28139
|
+
let tags = this.RiTa.tagger.allTags(sing, { noGuessing: true });
|
|
28140
|
+
if (tags.includes("nn")) {
|
|
28141
|
+
dbug && console.log(word + "'s singular form " + sing + " is nn");
|
|
28085
28142
|
return true;
|
|
28086
28143
|
}
|
|
28087
28144
|
}
|
|
@@ -28159,7 +28216,7 @@ var SING_RULES = [
|
|
|
28159
28216
|
RE3("(sh|ch|o|ss|x|z|us)es$", 2),
|
|
28160
28217
|
RE3("ses$", 2, "is"),
|
|
28161
28218
|
// catharses, prognoses
|
|
28162
|
-
// singulars ending in s, TODO: replace with
|
|
28219
|
+
// singulars ending in s, TODO: replace with NOUNS_ENDING_IN_S list
|
|
28163
28220
|
RE3("([vs]is|gas|[im]nus|genus|[ptbl]us|[ai]ss|[dr]ess)$", 0),
|
|
28164
28221
|
// octopus, thesis, alumnus, gas, bus (singulars)
|
|
28165
28222
|
DEFAULT_SING
|
|
@@ -28242,7 +28299,10 @@ var LetterToSound = class _LetterToSound {
|
|
|
28242
28299
|
this.tokenizer.tokenize(line, " ");
|
|
28243
28300
|
let type = this.tokenizer.nextToken();
|
|
28244
28301
|
if (type === "S" || type === "P") {
|
|
28245
|
-
this.stateMachine[this.numStates++] = this.createState(
|
|
28302
|
+
this.stateMachine[this.numStates++] = this.createState(
|
|
28303
|
+
type
|
|
28304
|
+
/*, this.tokenizer*/
|
|
28305
|
+
);
|
|
28246
28306
|
} else if (type === "I") {
|
|
28247
28307
|
let index = parseInt(this.tokenizer.nextToken());
|
|
28248
28308
|
if (index != this.numStates) {
|
|
@@ -28256,12 +28316,12 @@ var LetterToSound = class _LetterToSound {
|
|
|
28256
28316
|
}
|
|
28257
28317
|
}
|
|
28258
28318
|
buildPhones(word, opts) {
|
|
28259
|
-
const
|
|
28260
|
-
if (!word || !word.length ||
|
|
28319
|
+
const RiTa2 = this.RiTa;
|
|
28320
|
+
if (!word || !word.length || RiTa2.isPunct(word))
|
|
28261
28321
|
return;
|
|
28262
28322
|
let phoneList = [], windowSize = 4;
|
|
28263
28323
|
let fullBuff, tmp, currentState, startIndex, stateIndex, c;
|
|
28264
|
-
let silent =
|
|
28324
|
+
let silent = RiTa2.SILENT || RiTa2.SILENCE_LTS || opts && opts.silent;
|
|
28265
28325
|
if (!_LetterToSound.RULES) {
|
|
28266
28326
|
if (!this.warnedForNoLTS) {
|
|
28267
28327
|
this.warnedForNoLTS = true;
|
|
@@ -28273,16 +28333,14 @@ var LetterToSound = class _LetterToSound {
|
|
|
28273
28333
|
word = word.toLowerCase();
|
|
28274
28334
|
if (util_default.isNum(word)) {
|
|
28275
28335
|
if (/^[0-9]+$/.test(word)) {
|
|
28276
|
-
|
|
28277
|
-
|
|
28278
|
-
|
|
28279
|
-
|
|
28280
|
-
|
|
28281
|
-
|
|
28282
|
-
phoneList.push(...phs.split("-"));
|
|
28283
|
-
}
|
|
28284
|
-
return phoneList;
|
|
28336
|
+
word = word.length > 1 ? word.split("") : [word];
|
|
28337
|
+
for (let k = 0; k < word.length; k++) {
|
|
28338
|
+
let asWord = util_default.Numbers.toWords[parseInt(word[k])];
|
|
28339
|
+
let phs = RiTa2.lexicon.rawPhones(asWord, { noLts: true });
|
|
28340
|
+
phs = phs.replace(/1/g, "").replace(/ /g, "-");
|
|
28341
|
+
phoneList.push(...phs.split("-"));
|
|
28285
28342
|
}
|
|
28343
|
+
return phoneList;
|
|
28286
28344
|
}
|
|
28287
28345
|
}
|
|
28288
28346
|
tmp = "000#" + word.trim() + "#000", fullBuff = tmp.split("");
|
|
@@ -28297,7 +28355,7 @@ var LetterToSound = class _LetterToSound {
|
|
|
28297
28355
|
startIndex = this.letterIndex[c];
|
|
28298
28356
|
if (isNaN(parseFloat(startIndex)) || !isFinite(startIndex)) {
|
|
28299
28357
|
if (!silent) {
|
|
28300
|
-
console.warn("Unable to generate LTS for '" + word + "', no index for '" + c + "', isDigit=" + util_default.isNum(c) + ", isPunct=" +
|
|
28358
|
+
console.warn("Unable to generate LTS for '" + word + "', no index for '" + c + "', isDigit=" + util_default.isNum(c) + ", isPunct=" + RiTa2.isPunct(c));
|
|
28301
28359
|
}
|
|
28302
28360
|
return;
|
|
28303
28361
|
}
|
|
@@ -28322,7 +28380,7 @@ var LetterToSound = class _LetterToSound {
|
|
|
28322
28380
|
return state;
|
|
28323
28381
|
} else {
|
|
28324
28382
|
this.tokenizer.tokenize(i);
|
|
28325
|
-
return this.getState(this.tokenizer.nextToken()
|
|
28383
|
+
return this.getState(this.tokenizer.nextToken());
|
|
28326
28384
|
}
|
|
28327
28385
|
}
|
|
28328
28386
|
};
|
|
@@ -41551,8 +41609,8 @@ var Analyzer = class {
|
|
|
41551
41609
|
return stress;
|
|
41552
41610
|
}
|
|
41553
41611
|
analyzeWord(word, opts = {}) {
|
|
41554
|
-
let
|
|
41555
|
-
let result =
|
|
41612
|
+
let RiTa2 = this.RiTa;
|
|
41613
|
+
let result = RiTa2.CACHING && this.cache[word];
|
|
41556
41614
|
if (typeof result === "undefined") {
|
|
41557
41615
|
let slash = "/", delim = "-";
|
|
41558
41616
|
let lex = this.RiTa.lexicon;
|
|
@@ -41581,7 +41639,7 @@ var Analyzer = class {
|
|
|
41581
41639
|
}
|
|
41582
41640
|
result = { phones, stresses, syllables };
|
|
41583
41641
|
Object.keys(result).forEach((k) => result[k] = result[k].trim());
|
|
41584
|
-
if (
|
|
41642
|
+
if (RiTa2.CACHING)
|
|
41585
41643
|
this.cache[word] = result;
|
|
41586
41644
|
}
|
|
41587
41645
|
return result;
|
|
@@ -41601,15 +41659,15 @@ var Analyzer = class {
|
|
|
41601
41659
|
}
|
|
41602
41660
|
//#HWF this part is unchanged but move to a separated function
|
|
41603
41661
|
_computePhonesWord(word, lex, opts, isPart) {
|
|
41604
|
-
let rawPhones,
|
|
41662
|
+
let rawPhones, RiTa2 = this.RiTa;
|
|
41605
41663
|
if (isPart)
|
|
41606
41664
|
rawPhones = lex.rawPhones(word, { noLts: true });
|
|
41607
41665
|
if (!rawPhones && word.endsWith("s")) {
|
|
41608
|
-
let sing =
|
|
41666
|
+
let sing = RiTa2.singularize(word);
|
|
41609
41667
|
rawPhones = lex.rawPhones(sing, { noLts: true });
|
|
41610
41668
|
rawPhones && (rawPhones += "-z");
|
|
41611
41669
|
}
|
|
41612
|
-
let silent =
|
|
41670
|
+
let silent = RiTa2.SILENT || RiTa2.SILENCE_LTS || opts && opts.silent;
|
|
41613
41671
|
if (!rawPhones) {
|
|
41614
41672
|
let ltsPhones = this.computePhones(word, opts);
|
|
41615
41673
|
if (ltsPhones && ltsPhones.length) {
|
|
@@ -41740,7 +41798,7 @@ var SeededRandom = class {
|
|
|
41740
41798
|
if (!(Array.isArray(arg) || util_default.isNum(arg)))
|
|
41741
41799
|
throw Error("Expects [] or int");
|
|
41742
41800
|
let o = Array.isArray(arg) ? arg : Array.from(Array(arg).keys());
|
|
41743
|
-
for (let j, x, i = o.length; i; j =
|
|
41801
|
+
for (let j, x, i = o.length; i; j = Math.floor(this.random() * i), x = o[--i], o[i] = o[j], o[j] = x) {
|
|
41744
41802
|
}
|
|
41745
41803
|
return o;
|
|
41746
41804
|
}
|
|
@@ -41866,15 +41924,16 @@ var randgen_default = SeededRandom;
|
|
|
41866
41924
|
|
|
41867
41925
|
// src/markov.js
|
|
41868
41926
|
var _json = require('@ungap/structured-clone/json');
|
|
41869
|
-
var
|
|
41927
|
+
var _RiMarkov = class _RiMarkov {
|
|
41928
|
+
// RiTa
|
|
41870
41929
|
constructor(n, opts = {}) {
|
|
41871
41930
|
this.n = n;
|
|
41872
41931
|
this.root = new Node(null, "ROOT");
|
|
41873
41932
|
this.trace = opts.trace;
|
|
41874
41933
|
this.mlm = opts.maxLengthMatch;
|
|
41875
41934
|
this.maxAttempts = opts.maxAttempts || 999;
|
|
41876
|
-
this.tokenize = opts.tokenize ||
|
|
41877
|
-
this.untokenize = opts.untokenize ||
|
|
41935
|
+
this.tokenize = opts.tokenize || _RiMarkov.parent.tokenize;
|
|
41936
|
+
this.untokenize = opts.untokenize || _RiMarkov.parent.untokenize;
|
|
41878
41937
|
this.disableInputChecks = opts.disableInputChecks;
|
|
41879
41938
|
this.sentenceStarts = [];
|
|
41880
41939
|
this.sentenceEnds = /* @__PURE__ */ new Set();
|
|
@@ -41888,7 +41947,7 @@ var RiMarkov = class _RiMarkov {
|
|
|
41888
41947
|
this.addText(opts.text);
|
|
41889
41948
|
}
|
|
41890
41949
|
addText(text, multiplier = 1) {
|
|
41891
|
-
let sents = Array.isArray(text) ? text :
|
|
41950
|
+
let sents = Array.isArray(text) ? text : _RiMarkov.parent.sentences(text);
|
|
41892
41951
|
let wrap, allWords = [];
|
|
41893
41952
|
for (let k = 0; k < multiplier; k++) {
|
|
41894
41953
|
for (let i = 0; i < sents.length; i++) {
|
|
@@ -42055,7 +42114,7 @@ var RiMarkov = class _RiMarkov {
|
|
|
42055
42114
|
let usableStarts = this.sentenceStarts.filter((ss) => notMarked(this.root.child(ss)));
|
|
42056
42115
|
if (!usableStarts.length)
|
|
42057
42116
|
throw Error("No valid sentence-starts remaining");
|
|
42058
|
-
let start =
|
|
42117
|
+
let start = _RiMarkov.parent.random(usableStarts);
|
|
42059
42118
|
let startTok = this.root.child(start);
|
|
42060
42119
|
markNode(startTok);
|
|
42061
42120
|
usableStarts = this.sentenceStarts.filter((ss) => notMarked(this.root.child(ss)));
|
|
@@ -42115,7 +42174,7 @@ var RiMarkov = class _RiMarkov {
|
|
|
42115
42174
|
if (pre.length + post.length > this.n)
|
|
42116
42175
|
throw Error("Sum of pre.length && post.length must be <= N, was " + (pre.length + post.length));
|
|
42117
42176
|
if (!(tn = this._pathTo(pre))) {
|
|
42118
|
-
if (!
|
|
42177
|
+
if (!_RiMarkov.parent.SILENT)
|
|
42119
42178
|
console.warn("Unable to find nodes in pre: " + pre);
|
|
42120
42179
|
return;
|
|
42121
42180
|
}
|
|
@@ -42281,6 +42340,8 @@ var RiMarkov = class _RiMarkov {
|
|
|
42281
42340
|
return sent.replace(MULTI_SP_RE, " ");
|
|
42282
42341
|
}
|
|
42283
42342
|
};
|
|
42343
|
+
__publicField(_RiMarkov, "parent");
|
|
42344
|
+
var RiMarkov = _RiMarkov;
|
|
42284
42345
|
var Node = class _Node {
|
|
42285
42346
|
constructor(parent, word, count) {
|
|
42286
42347
|
this.children = {};
|
|
@@ -42289,6 +42350,7 @@ var Node = class _Node {
|
|
|
42289
42350
|
this.count = count || 0;
|
|
42290
42351
|
this.numChildren = -1;
|
|
42291
42352
|
this.marked = false;
|
|
42353
|
+
this.hidden = false;
|
|
42292
42354
|
}
|
|
42293
42355
|
// Find a (direct) child node with matching token, given a word or node
|
|
42294
42356
|
child(word) {
|
|
@@ -42407,9 +42469,6 @@ function populate(objNode, jsonNode) {
|
|
|
42407
42469
|
populate(newNode, child);
|
|
42408
42470
|
}
|
|
42409
42471
|
}
|
|
42410
|
-
function RiTa() {
|
|
42411
|
-
return RiMarkov.parent;
|
|
42412
|
-
}
|
|
42413
42472
|
function throwError(tries, oks) {
|
|
42414
42473
|
throw Error("Failed after " + tries + " tries" + (oks ? " and " + oks + " successes" : "") + ", you may need to adjust options or add more text");
|
|
42415
42474
|
}
|
|
@@ -42433,227 +42492,687 @@ var markov_default = RiMarkov;
|
|
|
42433
42492
|
// src/rita.js
|
|
42434
42493
|
var _riscript = require('riscript');
|
|
42435
42494
|
var { Grammar: RiGrammar } = _riscript.RiScript;
|
|
42436
|
-
var
|
|
42495
|
+
var RiTa = class _RiTa {
|
|
42496
|
+
/**
|
|
42497
|
+
* Create a RiTa grammar instance
|
|
42498
|
+
* @param {object} rules - the rules of the grammar
|
|
42499
|
+
* @param {object} context - the context of the grammar
|
|
42500
|
+
* @returns {object} - a new RiGrammar instance // TODO: fix return type -> RiGrammar
|
|
42501
|
+
*/
|
|
42437
42502
|
static grammar(rules, context) {
|
|
42438
|
-
return new RiGrammar(
|
|
42503
|
+
return new RiGrammar(rules, context);
|
|
42439
42504
|
}
|
|
42505
|
+
/**
|
|
42506
|
+
* Add a transform function to the RiScript parser
|
|
42507
|
+
* @param {string} name - the name of the transform
|
|
42508
|
+
* @param {function} definition - the transform function
|
|
42509
|
+
*/
|
|
42440
42510
|
static addTransform(name, definition) {
|
|
42441
|
-
|
|
42511
|
+
_RiTa.riscript.addTransform(name, definition);
|
|
42442
42512
|
}
|
|
42513
|
+
/**
|
|
42514
|
+
* Remove a transform function from the RiScript parser
|
|
42515
|
+
* @param {string} name - the name of the transform to remove
|
|
42516
|
+
*/
|
|
42443
42517
|
static removeTransform(name) {
|
|
42444
|
-
|
|
42518
|
+
_RiTa.riscript.removeTransform(name);
|
|
42445
42519
|
}
|
|
42520
|
+
/**
|
|
42521
|
+
* Returns the names of all current transform functions
|
|
42522
|
+
* @returns {string[]} the names of all transforms
|
|
42523
|
+
*/
|
|
42446
42524
|
static getTransforms() {
|
|
42447
42525
|
return _RiTa.riscript.getTransforms();
|
|
42448
42526
|
}
|
|
42527
|
+
/**
|
|
42528
|
+
* Adds the appropriate article ('a' or 'an') to the word, according to its phonemes (useful as a transform function)
|
|
42529
|
+
* @param {string} word - the word to transform
|
|
42530
|
+
* @returns {string} - the word with an article, e.g., 'honor' -> 'an honor'
|
|
42531
|
+
*/
|
|
42449
42532
|
static articlize(word) {
|
|
42450
|
-
return _riscript.RiScript.articlize(
|
|
42451
|
-
}
|
|
42452
|
-
|
|
42453
|
-
|
|
42454
|
-
|
|
42455
|
-
|
|
42456
|
-
|
|
42457
|
-
|
|
42458
|
-
|
|
42459
|
-
|
|
42460
|
-
|
|
42461
|
-
|
|
42462
|
-
|
|
42463
|
-
|
|
42533
|
+
return _riscript.RiScript.articlize(word, _RiTa);
|
|
42534
|
+
}
|
|
42535
|
+
/**
|
|
42536
|
+
* Evaluates the input script via the RiScript parser
|
|
42537
|
+
* @param {string} script - the script to evaluate
|
|
42538
|
+
* @param {object} context - the context to evaluate the script ing
|
|
42539
|
+
* @param {object} [options] - options for the evaluation
|
|
42540
|
+
* @param {boolean} options.trace - whether to trace the evaluation
|
|
42541
|
+
* @returns {string} the result of the evaluation
|
|
42542
|
+
*/
|
|
42543
|
+
static evaluate(script, context, options) {
|
|
42544
|
+
return _RiTa.riscript.evaluate(script, context, options);
|
|
42545
|
+
}
|
|
42546
|
+
/**
|
|
42547
|
+
* Creates a new RiMarkov object
|
|
42548
|
+
* @param {number} n - an int representing the n-factor of the markov chain
|
|
42549
|
+
* @param {object} [options] - options for the markov chain
|
|
42550
|
+
* @returns {RiMarkov}
|
|
42551
|
+
*/
|
|
42552
|
+
static markov(n, options) {
|
|
42553
|
+
return new markov_default(n, options);
|
|
42554
|
+
}
|
|
42555
|
+
/**
|
|
42556
|
+
* Return a list of occurrences of the key word in the Key-Word-In-Context (KWIC) model.
|
|
42557
|
+
* @overload
|
|
42558
|
+
* @param {string} keyword
|
|
42559
|
+
* @param {object} [options]
|
|
42560
|
+
* @param {number} options.numWords - the number of words to include in the context
|
|
42561
|
+
* @param {string} options.text - the text as input for the KWIC model
|
|
42562
|
+
* @param {string[]} options.words - the array of words to be used as input for the KWIC model
|
|
42563
|
+
* @returns {string[]} all the occurrences of the keyword in the model, each with no more
|
|
42564
|
+
* than 'numWords' words of context on either side
|
|
42565
|
+
* @overload
|
|
42566
|
+
* @param {string} keyword
|
|
42567
|
+
* @param {number} text - the number of words to include in the context
|
|
42568
|
+
* @returns {string[]} all the occurrences of the keyword in the model, each with no more
|
|
42569
|
+
* than 'numWords' words of context on either side
|
|
42570
|
+
*/
|
|
42571
|
+
static kwic(keyword, options) {
|
|
42572
|
+
return _RiTa.concorder.kwic(keyword, options);
|
|
42573
|
+
}
|
|
42574
|
+
/**
|
|
42575
|
+
* Creates a concordance, a list of words with their frequency of occurence, from the given text and options.
|
|
42576
|
+
* @param {string} text - the text from which to create the concordance
|
|
42577
|
+
* @param {object} [options] - options for the concordance
|
|
42578
|
+
* @param {boolean} options.ignoreCase=false - whether to ignore case when creating the concordance
|
|
42579
|
+
* @param {boolean} options.ignoreStopWords=false - whether to ignore stop words like
|
|
42580
|
+
* 'the', 'and', 'a', 'of', etc, as specified in RiTa.STOP_WORDS
|
|
42581
|
+
* @param {boolean} options.ignorePunctuation=false - whether to ignore punctuation when creating the concordance
|
|
42582
|
+
* @param {string[]} options.wordsToIgnore=null - words to ignore when creating the concordance (alternate stop-words)
|
|
42583
|
+
* @returns {object} the concordance, an object with words as keys and frequencies as values
|
|
42584
|
+
*/
|
|
42585
|
+
static concordance(text, options) {
|
|
42586
|
+
return _RiTa.concorder.concordance(text, options);
|
|
42587
|
+
}
|
|
42588
|
+
/**
|
|
42589
|
+
* Returns a random ordering of the input array or a random ordering of integers from 1 to k
|
|
42590
|
+
* @overload
|
|
42591
|
+
* @param {object[]} array - the array to shuffle
|
|
42592
|
+
* @returns {object[]} the input array in a random order
|
|
42593
|
+
* @overload
|
|
42594
|
+
* @param {number} k - the number of integers to return
|
|
42595
|
+
* @returns {number[]} an array of arrays of integers from 1 to k in random order
|
|
42596
|
+
*/
|
|
42464
42597
|
static randomOrdering(arrayOrInt) {
|
|
42465
|
-
return _RiTa.randomizer.randomOrdering(
|
|
42598
|
+
return _RiTa.randomizer.randomOrdering(arrayOrInt);
|
|
42466
42599
|
}
|
|
42467
|
-
|
|
42468
|
-
|
|
42600
|
+
/**
|
|
42601
|
+
* Sets the seed for the RiTa random number generator
|
|
42602
|
+
* @param {number} seed - the seed to set
|
|
42603
|
+
*/
|
|
42604
|
+
static randomSeed(seed) {
|
|
42605
|
+
_RiTa.randomizer.seed(seed);
|
|
42469
42606
|
}
|
|
42607
|
+
/**
|
|
42608
|
+
* Returns true if the sentence is a question, else false
|
|
42609
|
+
* @param {string} sentence
|
|
42610
|
+
* @returns {boolean} - true if the sentence is a question, else false
|
|
42611
|
+
*/
|
|
42470
42612
|
static isQuestion(sentence) {
|
|
42471
42613
|
return _RiTa.QUESTIONS.includes(_RiTa.tokenize(sentence)[0].toLowerCase());
|
|
42472
42614
|
}
|
|
42615
|
+
/**
|
|
42616
|
+
* Returns true if the character is a vowel, else false
|
|
42617
|
+
* @param {string} char
|
|
42618
|
+
* @returns {boolean} - true if the character is a vowel, else false
|
|
42619
|
+
*/
|
|
42473
42620
|
static isVowel(char) {
|
|
42474
42621
|
return char && char.length === 1 && _RiTa.VOWELS.includes(char);
|
|
42475
42622
|
}
|
|
42623
|
+
/**
|
|
42624
|
+
* Returns true if the character is a consonant, else false
|
|
42625
|
+
* @param {string} char
|
|
42626
|
+
* @returns {boolean} - true if the character is a consonant, else false
|
|
42627
|
+
*/
|
|
42476
42628
|
static isConsonant(char) {
|
|
42477
42629
|
return char && char.length === 1 && !_RiTa.VOWELS.includes(char) && IS_LETTER.test(char);
|
|
42478
42630
|
}
|
|
42631
|
+
/**
|
|
42632
|
+
* Capitalizes the first letter of the input string, leaving others unchanged
|
|
42633
|
+
* @param {string} string - the string to capitalize
|
|
42634
|
+
* @returns {string} the capitalized string
|
|
42635
|
+
*/
|
|
42479
42636
|
static capitalize(string) {
|
|
42480
42637
|
return string ? string[0].toUpperCase() + string.substring(1) : "";
|
|
42481
42638
|
}
|
|
42482
|
-
|
|
42483
|
-
|
|
42484
|
-
|
|
42485
|
-
|
|
42486
|
-
|
|
42487
|
-
|
|
42639
|
+
/**
|
|
42640
|
+
* Return a random word from the lexicon matching the specified criteria
|
|
42641
|
+
* (length, syllable-count, phonemic pattern, stress pattern, part-of-speech, etc.).
|
|
42642
|
+
* @param {(string|RegExp)} [pattern] - the pattern to match
|
|
42643
|
+
* @param {object} [options]
|
|
42644
|
+
* @param {number} options.minLength=4 - the minimum length of the word
|
|
42645
|
+
* @param {number} options.maxLength=-1 - the maximum length of the word
|
|
42646
|
+
* @param {number} options.numSyllables=null - the number of syllables in the word
|
|
42647
|
+
* @param {number} options.limit=10 - the maximum number of results to retur
|
|
42648
|
+
* @param {string} options.pos=null - the part-of-speech of the word to return,
|
|
42649
|
+
* either from the Penn tag set or the simplified tag set [a, r, v, n]
|
|
42650
|
+
* @param {RegExp} options.pattern=null - the spelling or phonemic pattern to match
|
|
42651
|
+
* @param {string} options.type=null - the type of regex or string pattern to match,
|
|
42652
|
+
* options are 'stresses' or 'phones' or 'letters' (the default)
|
|
42653
|
+
* @returns {string} a random word matching the criteria in the options object
|
|
42654
|
+
*/
|
|
42655
|
+
static randomWord(pattern, options) {
|
|
42656
|
+
return _RiTa.lexicon.randomWord(pattern, options);
|
|
42657
|
+
}
|
|
42658
|
+
/**
|
|
42659
|
+
* Returns words that rhyme with the given word. Two words are considered as rhyming if
|
|
42660
|
+
* their final stressed vowel and all following phonemes are identical.
|
|
42661
|
+
* @param {string} word
|
|
42662
|
+
* @param {object} [options]
|
|
42663
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
42664
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
42665
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
42666
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
42667
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
42668
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
|
|
42669
|
+
* or the simplified tag set [a, r, v, n]
|
|
42670
|
+
* @returns {Promise<string[]>} an array of rhymes that match criteria in the options object
|
|
42671
|
+
*/
|
|
42672
|
+
static async rhymes(word, options) {
|
|
42673
|
+
return await _RiTa.lexicon.rhymes(word, options);
|
|
42674
|
+
}
|
|
42675
|
+
/**
|
|
42676
|
+
* Returns words that rhyme with the given word. Two words are considered as rhyming if
|
|
42677
|
+
* their final stressed vowel and all following phonemes are identical.
|
|
42678
|
+
* @param {string} word1 - the first word to compare
|
|
42679
|
+
* @param {string} word2 - the second word to compare
|
|
42680
|
+
* @returns {boolean} true if the two words rhyme, else false
|
|
42681
|
+
*/
|
|
42488
42682
|
static isRhyme(word1, word2) {
|
|
42489
|
-
return _RiTa.lexicon.isRhyme(
|
|
42490
|
-
}
|
|
42491
|
-
|
|
42492
|
-
|
|
42493
|
-
|
|
42494
|
-
|
|
42495
|
-
|
|
42496
|
-
|
|
42497
|
-
|
|
42683
|
+
return _RiTa.lexicon.isRhyme(word1, word2);
|
|
42684
|
+
}
|
|
42685
|
+
/**
|
|
42686
|
+
* Finds alliterations by comparing the phonemes of the input string to those
|
|
42687
|
+
* of each word in the lexicon via a minimum-edit-distance metric.
|
|
42688
|
+
* @param {string} word
|
|
42689
|
+
* @param {object} [options]
|
|
42690
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
42691
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
42692
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
42693
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
42694
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
42695
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
|
|
42696
|
+
* or the simplified tag set [a, r, v, n]
|
|
42697
|
+
* @returns {Promise<string[]>} an array of alliterations matching criteria in the options object
|
|
42698
|
+
*/
|
|
42699
|
+
static async alliterations(word, options) {
|
|
42700
|
+
return await _RiTa.lexicon.alliterations(word, options);
|
|
42701
|
+
}
|
|
42702
|
+
/**
|
|
42703
|
+
* Returns true if the word is in the lexicon, else false
|
|
42704
|
+
* @param {string} word - the word to check
|
|
42705
|
+
* @param {object} [options] - options for the search
|
|
42706
|
+
* @param {boolean} options.noDerivations=false - whether to ignore derivations and only search for raw words
|
|
42707
|
+
* @returns {boolean} true if the word is in the lexicon, else false
|
|
42708
|
+
*/
|
|
42709
|
+
static hasWord(word, options) {
|
|
42710
|
+
return _RiTa.lexicon.hasWord(word, options);
|
|
42711
|
+
}
|
|
42712
|
+
/**
|
|
42713
|
+
* Returns true if the word is an abbreviation, else false
|
|
42714
|
+
* @param {string} input - the word to check
|
|
42715
|
+
* @param {object} [options] - options for the search
|
|
42716
|
+
* @param {boolean} options.caseSensitive=false - whether to ignore case when checking for abbreviations
|
|
42717
|
+
* @returns {boolean} true if the word is an abbreviation, else false
|
|
42718
|
+
*/
|
|
42719
|
+
static isAbbrev(input, options) {
|
|
42498
42720
|
if (typeof input === "string") {
|
|
42499
|
-
if (caseSensitive)
|
|
42721
|
+
if (_optionalChain([options, 'optionalAccess', _5 => _5.caseSensitive]))
|
|
42500
42722
|
return _RiTa.ABRV.includes(input.trim());
|
|
42501
42723
|
let check = input.trim().toLowerCase();
|
|
42502
42724
|
return _RiTa.ABRV.some((a) => a.toLowerCase() === check);
|
|
42503
42725
|
}
|
|
42504
42726
|
}
|
|
42727
|
+
/**
|
|
42728
|
+
* Returns true if the two words are an alliteration (if their first stressed consonants match).
|
|
42729
|
+
* Note: returns true if wordA.equals(wordB) and false if either (or both) are null.
|
|
42730
|
+
* @param {string} word1 - the first word to compare
|
|
42731
|
+
* @param {string} word2 - the second word to compare
|
|
42732
|
+
* @returns {boolean} true if the two words are an alliteration, else false
|
|
42733
|
+
*/
|
|
42505
42734
|
static isAlliteration(word1, word2) {
|
|
42506
|
-
return _RiTa.lexicon.isAlliteration(
|
|
42507
|
-
}
|
|
42508
|
-
|
|
42509
|
-
|
|
42510
|
-
|
|
42511
|
-
|
|
42512
|
-
|
|
42513
|
-
|
|
42514
|
-
|
|
42515
|
-
|
|
42735
|
+
return _RiTa.lexicon.isAlliteration(word1, word2);
|
|
42736
|
+
}
|
|
42737
|
+
/**
|
|
42738
|
+
* Compares the letters of the input word (using a version of the Levenstein min-edit distance algorithm)
|
|
42739
|
+
* to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
|
|
42740
|
+
* @param {string} word - the word to match
|
|
42741
|
+
* @param {object} [options] - options for the search
|
|
42742
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
42743
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
42744
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
42745
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
42746
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
42747
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set or the simplified tag set [a, r, v, n]
|
|
42748
|
+
* @returns {Promise<string[]>} an array of words matching the spelling pattern and criteria in the options object
|
|
42749
|
+
*/
|
|
42750
|
+
static async spellsLike(word, options) {
|
|
42751
|
+
return await _RiTa.lexicon.spellsLike(word, options);
|
|
42752
|
+
}
|
|
42753
|
+
/**
|
|
42754
|
+
* Compares the phonemes of the input pattern (using a version of the Levenstein min-edit distance algorithm)
|
|
42755
|
+
* to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
|
|
42756
|
+
* @param {string} word - the word to match
|
|
42757
|
+
* @param {object} [options] - options for the search
|
|
42758
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
42759
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
42760
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
42761
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
42762
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
42763
|
+
* @param {boolean} options.matchSpelling=false, if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
|
|
42764
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
|
|
42765
|
+
* or the simplified tag set [a, r, v, n]
|
|
42766
|
+
* @returns {Promise<string[]>} an array of words matching the phonemic pattern and criteria in the options object
|
|
42767
|
+
*/
|
|
42768
|
+
static async soundsLike(word, options) {
|
|
42769
|
+
return await _RiTa.lexicon.soundsLike(word, options);
|
|
42770
|
+
}
|
|
42771
|
+
/**
|
|
42772
|
+
* Generates part-of-speech tags for each word in the input with tags
|
|
42773
|
+
* from the Penn tag set or the simplified tag set [a, r, v, n].
|
|
42774
|
+
* @param {(string|string[])} word - the word or words to tag
|
|
42775
|
+
* @param {object} [options] - options for the tagging
|
|
42776
|
+
* @param {boolean} options.simple - use simple tags (noun=n,verb=v,adverb=a,adjective=r)
|
|
42777
|
+
* @returns {string|string[]} - an array of part-of-speech tags for each word in the input
|
|
42778
|
+
*/
|
|
42779
|
+
static pos(word, options) {
|
|
42780
|
+
if (options && "inline" in options) {
|
|
42781
|
+
throw Error("Use RiTa.posInline() instead");
|
|
42782
|
+
}
|
|
42783
|
+
return _RiTa.tagger.tag(word, options);
|
|
42516
42784
|
}
|
|
42785
|
+
/**
|
|
42786
|
+
* Returns true if the word has a noun form. That is, if any of its possible
|
|
42787
|
+
* parts of speech are any variant of a noun in the Penn tag set(e.g. nn, nns, nnp, nnps).
|
|
42788
|
+
* @param {string} word - the word to check
|
|
42789
|
+
* @returns {string} - true if the word is a noun, else false
|
|
42790
|
+
*/
|
|
42517
42791
|
static isNoun(word) {
|
|
42518
42792
|
return _RiTa.tagger.isNoun(word);
|
|
42519
42793
|
}
|
|
42794
|
+
/**
|
|
42795
|
+
* Returns true if word has an adjective form. That is, if any of its possible parts of speech
|
|
42796
|
+
* are any variant of an adjective in the Penn tag set (e.g. jj, jjr, jjs).
|
|
42797
|
+
* @param {string} word - the word to check
|
|
42798
|
+
* @returns {string} - true if the word is an adjective, else false
|
|
42799
|
+
*/
|
|
42520
42800
|
static isAdjective(word) {
|
|
42521
42801
|
return _RiTa.tagger.isAdjective(word);
|
|
42522
42802
|
}
|
|
42803
|
+
/**
|
|
42804
|
+
* Returns true if the word has an adverb form. That is, if any of its possible parts of speech
|
|
42805
|
+
* are any variant of an adverb in the Penn tag set (e.g. rb, rbr, rbs).
|
|
42806
|
+
* @param {string} word - the word to check
|
|
42807
|
+
* @returns {string} - true if the word is an adverb, else false
|
|
42808
|
+
*/
|
|
42523
42809
|
static isAdverb(word) {
|
|
42524
42810
|
return _RiTa.tagger.isAdverb(word);
|
|
42525
42811
|
}
|
|
42526
|
-
|
|
42527
|
-
|
|
42528
|
-
|
|
42812
|
+
/**
|
|
42813
|
+
* Returns true for if word has a verb form. That is, if any of its possible
|
|
42814
|
+
* parts of speech are any variant of a verb in the Penn tag set (e.g. vb, vbg, vbd, vbp, vbz).
|
|
42815
|
+
* @param {string} word - the word to check
|
|
42816
|
+
* @returns {string} - true if the word is a verb, else false
|
|
42817
|
+
*/
|
|
42529
42818
|
static isVerb(word) {
|
|
42530
42819
|
return _RiTa.tagger.isVerb(word);
|
|
42531
42820
|
}
|
|
42532
|
-
|
|
42533
|
-
|
|
42534
|
-
|
|
42821
|
+
/**
|
|
42822
|
+
* Returns true if every character of 'text' is a punctuation character.
|
|
42823
|
+
* @param {string} text
|
|
42824
|
+
* @returns {boolean} true if every character of 'text' is punctuation, else false
|
|
42825
|
+
*/
|
|
42826
|
+
static isPunct(text) {
|
|
42827
|
+
return text && text.length && ONLY_PUNCT.test(text);
|
|
42535
42828
|
}
|
|
42829
|
+
/**
|
|
42830
|
+
* Tags the input string with part-of-speech tags, either from the Penn tag set or the simplified tag set [a, r, v, n].
|
|
42831
|
+
* @param {string} sentence - the sentence to tag
|
|
42832
|
+
* @param {object} [options] - options for the tagging
|
|
42833
|
+
* @param {boolean} options.simple=false - use the simplified tag set [a, r, v, n]
|
|
42834
|
+
* @returns {string} the tagged sentence
|
|
42835
|
+
*/
|
|
42836
|
+
static posInline(sentence, options) {
|
|
42837
|
+
return _RiTa.tagger.tag(sentence, { ...options, inline: true });
|
|
42838
|
+
}
|
|
42839
|
+
/**
|
|
42840
|
+
* Return the singular form of the input word
|
|
42841
|
+
* @param {string} word - the word to singularize
|
|
42842
|
+
* @returns {string} the singular form of the input word
|
|
42843
|
+
*/
|
|
42536
42844
|
static singularize(word) {
|
|
42537
|
-
return _RiTa.inflector.singularize(
|
|
42845
|
+
return _RiTa.inflector.singularize(word);
|
|
42538
42846
|
}
|
|
42847
|
+
/**
|
|
42848
|
+
* Return the plural form of the input word
|
|
42849
|
+
* @param {string} word - the word to pluralize
|
|
42850
|
+
* @returns {string} the plural form of the input word
|
|
42851
|
+
*/
|
|
42539
42852
|
static pluralize(word) {
|
|
42540
|
-
return _RiTa.inflector.pluralize(
|
|
42541
|
-
}
|
|
42542
|
-
|
|
42543
|
-
|
|
42544
|
-
|
|
42545
|
-
|
|
42546
|
-
|
|
42547
|
-
|
|
42548
|
-
|
|
42549
|
-
|
|
42550
|
-
|
|
42551
|
-
|
|
42552
|
-
|
|
42553
|
-
|
|
42554
|
-
|
|
42555
|
-
|
|
42556
|
-
|
|
42557
|
-
|
|
42558
|
-
|
|
42853
|
+
return _RiTa.inflector.pluralize(word);
|
|
42854
|
+
}
|
|
42855
|
+
/**
|
|
42856
|
+
* Searches for words in the lexicon matching the given criteria, either by length, syllable-count,
|
|
42857
|
+
* spelling, phonemes, stresses, part-of-speech, etc. If no regex or options are supplied, the full set of words is returned.
|
|
42858
|
+
* @param {(string|RegExp)} [pattern] - the pattern to match
|
|
42859
|
+
* @param {object} [options] - options for the search
|
|
42860
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
42861
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
42862
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
42863
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
42864
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
42865
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
|
|
42866
|
+
* or the simplified tag set [a, r, v, n]
|
|
42867
|
+
* @param {string} options.type - the type of regex or string pattern to match, options are 'stresses'
|
|
42868
|
+
* or 'phones' or 'letters' (the default)
|
|
42869
|
+
* @returns {Promise<string[]>} an array of words matching the criteria in both the pattern and the options object
|
|
42870
|
+
*/
|
|
42871
|
+
static async search(pattern, options) {
|
|
42872
|
+
return await _RiTa.lexicon.search(pattern, options);
|
|
42873
|
+
}
|
|
42874
|
+
/**
|
|
42875
|
+
* Returns an array containing all unique alphabetical words (tokens) in the text.
|
|
42876
|
+
* Punctuation and case are ignored unless specified otherwise.
|
|
42877
|
+
* @param {string} text - The text from which to extract the tokens
|
|
42878
|
+
* @param {object} [options] - The options
|
|
42879
|
+
* @param {boolean} options.caseSensitive=false - Whether to pay attention to case
|
|
42880
|
+
* @param {boolean} options.ignoreStopWords=false - Whether to ignore words such as 'the', 'and', 'a', 'of', etc,
|
|
42881
|
+
* as specified in RiTa.STOP_WORDS
|
|
42882
|
+
* @param {boolean} options.splitContractions=false - Whether to convert contractions
|
|
42883
|
+
* (e.g., "I'd" or "she'll") into multiple individual tokens
|
|
42884
|
+
* @param {boolean} options.includePunct=false - Whether to include punctuation in the results
|
|
42885
|
+
* @param {boolean} options.sort=false - Whether to sort the tokens before returning them
|
|
42886
|
+
* @returns {string[]} Array of tokens
|
|
42887
|
+
*/
|
|
42888
|
+
static tokens(text, options = {
|
|
42889
|
+
caseSensitive: false,
|
|
42890
|
+
ignoreStopWords: false,
|
|
42891
|
+
splitContractions: false,
|
|
42892
|
+
includePunct: false,
|
|
42893
|
+
sort: false
|
|
42894
|
+
}) {
|
|
42895
|
+
return _RiTa.tokenizer.tokens(text, options);
|
|
42896
|
+
}
|
|
42897
|
+
/**
|
|
42898
|
+
* Tokenizes an input string into words, according to the Penn Treebank conventions
|
|
42899
|
+
* @param {string} input - The text to tokenize
|
|
42900
|
+
* @param {object} [options] - The options
|
|
42901
|
+
* @param {RegExp} options.regex=null - An optional custom regex to split on
|
|
42902
|
+
* @param {boolean} options.splitHyphens=false - Whether to split hyphenated words
|
|
42903
|
+
* (e.g., "mother-in-law") into multiple individual tokens
|
|
42904
|
+
* @param {boolean} options.splitContractions=false - Whether to split contractions
|
|
42905
|
+
* (e.g., "I'd" or "she'll") into multiple individual tokens
|
|
42906
|
+
* @returns {string[]} Array of tokens
|
|
42907
|
+
*/
|
|
42908
|
+
static tokenize(input, options) {
|
|
42909
|
+
return _RiTa.tokenizer.tokenize(input, options);
|
|
42910
|
+
}
|
|
42911
|
+
/**
|
|
42912
|
+
* Joins an array (of words and punctuation) into a sentence, according to
|
|
42913
|
+
* the Penn Treebank conventions. The inverse of RiTa.tokenize().
|
|
42914
|
+
* @param {string[]} input - The array of words to join
|
|
42915
|
+
* @param {string} delim=' ' - The delimiter to use between words, or a space by default
|
|
42916
|
+
* @returns {string} The joined sentence
|
|
42917
|
+
*/
|
|
42918
|
+
static untokenize(input, delim = " ") {
|
|
42919
|
+
return _RiTa.tokenizer.untokenize(input, delim);
|
|
42920
|
+
}
|
|
42921
|
+
/**
|
|
42922
|
+
* Split the input text into sentences following using Penn Treebank conventions and the specified options.
|
|
42923
|
+
* @param {string} text - The text to split
|
|
42924
|
+
* @param {(string|RegExp)} [pattern] - An optional custom regex to split on
|
|
42925
|
+
* @returns {string[]} An array of sentences
|
|
42926
|
+
*/
|
|
42927
|
+
static sentences(text, pattern) {
|
|
42928
|
+
return _RiTa.tokenizer.sentences(text, pattern);
|
|
42929
|
+
}
|
|
42930
|
+
/**
|
|
42931
|
+
* Returns true if the word is a 'stop word', a commonly used word that is often ignored in text processing.
|
|
42932
|
+
* To use your own list, set RiTa.STOP_WORDS to a new array of (lowercase) words.
|
|
42933
|
+
* @param {string} word - the word to check
|
|
42934
|
+
* @returns {boolean} true if the word is a stop word, else false
|
|
42935
|
+
*/
|
|
42936
|
+
static isStopWord(word) {
|
|
42937
|
+
return _RiTa.STOP_WORDS.includes(word.toLowerCase());
|
|
42559
42938
|
}
|
|
42560
|
-
|
|
42561
|
-
|
|
42939
|
+
/**
|
|
42940
|
+
* Extracts base roots from a word according to the Pling stemming algorithm.
|
|
42941
|
+
* @param {string} word - the word to stem
|
|
42942
|
+
* @returns {string} the base root of the word
|
|
42943
|
+
*/
|
|
42944
|
+
static stem(word) {
|
|
42945
|
+
return stemmer_default.stem(word);
|
|
42562
42946
|
}
|
|
42947
|
+
/**
|
|
42948
|
+
* Returns the present participle of the input word (e.g., "walking" for "walk").
|
|
42949
|
+
* @param {string} verbWord - the word to get the present participle of
|
|
42950
|
+
* @returns {string} the present participle of the input word
|
|
42951
|
+
*/
|
|
42563
42952
|
static presentPart(verbWord) {
|
|
42564
|
-
return _RiTa.conjugator.presentPart(
|
|
42953
|
+
return _RiTa.conjugator.presentPart(verbWord);
|
|
42565
42954
|
}
|
|
42955
|
+
/**
|
|
42956
|
+
* Returns the past participle of the input word (e.g., "walked" for "walk").
|
|
42957
|
+
* @param {string} verbWord
|
|
42958
|
+
* @returns {string} the past participle of the input word
|
|
42959
|
+
*/
|
|
42566
42960
|
static pastPart(verbWord) {
|
|
42567
|
-
return _RiTa.conjugator.pastPart(
|
|
42568
|
-
}
|
|
42569
|
-
|
|
42570
|
-
|
|
42571
|
-
|
|
42572
|
-
|
|
42573
|
-
|
|
42961
|
+
return _RiTa.conjugator.pastPart(verbWord);
|
|
42962
|
+
}
|
|
42963
|
+
/**
|
|
42964
|
+
* Conjugates the 'verb' according to the specified options (tense, person, number, etc.)
|
|
42965
|
+
* @param {string} verbWord
|
|
42966
|
+
* @param {object} [options]
|
|
42967
|
+
* @param {number} options.tense - the tense of the verb, either RiTa.PAST, RiTa.PRESENT, or RiTa.FUTURE
|
|
42968
|
+
* @param {number} options.person - the person of the verb, either RiTa.FIRST, RiTa.SECOND, or RiTa.THIRD
|
|
42969
|
+
* @param {number} options.number - the number of the verb, either RiTa.SINGULAR or RiTa.PLURAL
|
|
42970
|
+
* @param {number} options.form - the form of the verb, either RiTa.INFINITIVE or RiTa.GERUND
|
|
42971
|
+
* @param {boolean} options.passive - whether the verb should be passive
|
|
42972
|
+
* @param {boolean} options.progressive - whether the verb should be progressive
|
|
42973
|
+
* @param {boolean} options.perfect - whether the verb should be perfect
|
|
42974
|
+
* @param {boolean} options.interrogative - whether the verb should be in interrogative form
|
|
42975
|
+
* @returns {string} the conjugated verb
|
|
42976
|
+
*/
|
|
42977
|
+
static conjugate(verbWord, options) {
|
|
42978
|
+
return _RiTa.conjugator.conjugate(verbWord, options);
|
|
42574
42979
|
}
|
|
42575
|
-
|
|
42576
|
-
|
|
42980
|
+
/**
|
|
42981
|
+
* Analyzes the input and returns a new string containing the stresses for each syllable of the input text .
|
|
42982
|
+
* @param {string} input - the text to analyze
|
|
42983
|
+
* @returns {string} a string containing the stresses for each syllable of the input text
|
|
42984
|
+
*/
|
|
42985
|
+
static stresses(input, options) {
|
|
42986
|
+
return _RiTa.analyzer.analyze(input, options).stresses;
|
|
42577
42987
|
}
|
|
42578
|
-
|
|
42579
|
-
|
|
42988
|
+
/**
|
|
42989
|
+
* Analyzes the input and returns a new string containing the syllables of the input text.
|
|
42990
|
+
* @param {string} input - the text to analyze
|
|
42991
|
+
* @returns {string} a string containing the syllables of the input text
|
|
42992
|
+
*/
|
|
42993
|
+
static syllables(input, options) {
|
|
42994
|
+
return _RiTa.analyzer.analyze(input, options).syllables;
|
|
42580
42995
|
}
|
|
42581
|
-
|
|
42582
|
-
|
|
42996
|
+
/**
|
|
42997
|
+
* Analyzes the input and returns a new string containing the phonemes of the input text.
|
|
42998
|
+
* @param {string} input - the text to analyze
|
|
42999
|
+
* @returns {string} a string containing the phones of the input text
|
|
43000
|
+
*/
|
|
43001
|
+
static phones(input, options) {
|
|
43002
|
+
return _RiTa.analyzer.analyze(input, options).phones;
|
|
43003
|
+
}
|
|
43004
|
+
/**
|
|
43005
|
+
* Analyzes the input to compute a set of features for the input,
|
|
43006
|
+
* including phonemes, syllables, stresses, and part-of-speech tags.
|
|
43007
|
+
* @param {string} input - the text to analyze
|
|
43008
|
+
* @param {object} [options] - options for the analysis
|
|
43009
|
+
* @param {boolean} options.simple=false - whether to use the simplified tag set [a, r, v, n]
|
|
43010
|
+
* @returns {object} an object containing the features of the input text (phones, syllables, stresses, pos), or the features inline
|
|
43011
|
+
*/
|
|
43012
|
+
static analyze(input, options) {
|
|
43013
|
+
return _RiTa.analyzer.analyze(input, options);
|
|
42583
43014
|
}
|
|
42584
43015
|
////////////////////////////// lex-sync ////////////////////////////
|
|
42585
|
-
|
|
42586
|
-
|
|
42587
|
-
|
|
42588
|
-
|
|
42589
|
-
|
|
42590
|
-
|
|
42591
|
-
|
|
42592
|
-
|
|
42593
|
-
|
|
42594
|
-
|
|
42595
|
-
|
|
42596
|
-
|
|
42597
|
-
|
|
42598
|
-
|
|
42599
|
-
|
|
43016
|
+
/**
|
|
43017
|
+
* A synchronous version of RiTa.spellsLike(). It compares the letters of the input word
|
|
43018
|
+
* (using a version of the Levenstein min-edit distance algorithm) to each word in the lexicon,
|
|
43019
|
+
* returning the set of closest matches that also match the criteria in the options object.
|
|
43020
|
+
* @param {string} word - the word to match
|
|
43021
|
+
* @param {object} [options] - options for the search
|
|
43022
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
43023
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
43024
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
43025
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
43026
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
|
|
43027
|
+
* or the simplified tag set [a, r, v, n]
|
|
43028
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
43029
|
+
* @return {string[]} an array of words matching the spelling pattern and criteria in the options object
|
|
43030
|
+
*/
|
|
43031
|
+
static spellsLikeSync(word, options) {
|
|
43032
|
+
return _RiTa.lexicon.spellsLikeSync(word, options);
|
|
43033
|
+
}
|
|
43034
|
+
/**
|
|
43035
|
+
* A synchronous version of RiTa.lexicon.soundsLike(). It compares the phonemes of the input pattern (using a version of the Levenstein min-edit distance algorithm)
|
|
43036
|
+
* to each word in the lexicon, returning the set of closest matches that also match the criteria in the options object.
|
|
43037
|
+
* @param {string} word - the word to match
|
|
43038
|
+
* @param {object} [options] - options for the search
|
|
43039
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
43040
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
43041
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
43042
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
43043
|
+
* @param {boolean} options.matchSpelling=false, if true will also attempt to match spelling by returning an intersection with RiTa.spellsLike()
|
|
43044
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
43045
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
|
|
43046
|
+
* or the simplified tag set [a, r, v, n]
|
|
43047
|
+
* @return {string[]} an array of words matching the phonemic pattern and criteria in the options object
|
|
43048
|
+
*/
|
|
43049
|
+
static soundsLikeSync(word, options) {
|
|
43050
|
+
return _RiTa.lexicon.soundsLikeSync(word, options);
|
|
43051
|
+
}
|
|
43052
|
+
/**
|
|
43053
|
+
* Synchronous version of RiTa.rhymes(). Returns words that rhyme with the given word.
|
|
43054
|
+
* Two words are considered as rhyming if their final stressed vowel and all following phonemes are identical.
|
|
43055
|
+
* @param {string} word - the word to match
|
|
43056
|
+
* @param {object} [options] - options for the search
|
|
43057
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
43058
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
43059
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
43060
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
43061
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
43062
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
|
|
43063
|
+
* or the simplified tag set [a, r, v, n]
|
|
43064
|
+
* @return {string[]} an array of rhymes that match criteria in the options object
|
|
43065
|
+
*/
|
|
43066
|
+
static rhymesSync(word, options) {
|
|
43067
|
+
return _RiTa.lexicon.rhymesSync(word, options);
|
|
43068
|
+
}
|
|
43069
|
+
/**
|
|
43070
|
+
* A synchronous version of RiTa.search(). Searches for words in the lexicon matching
|
|
43071
|
+
* the given criteria, either by length, syllable-count, spelling, phonemes, stresses,
|
|
43072
|
+
* part-of-speech, etc.
|
|
43073
|
+
* @param {(string|RegExp)} [pattern] - the pattern to match
|
|
43074
|
+
* @param {object} [options] - options for the search
|
|
43075
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
43076
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
43077
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
43078
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
43079
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
43080
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
|
|
43081
|
+
* or the simplified tag set [a, r, v, n]
|
|
43082
|
+
* @param {string} options.type - the type of regex or string pattern to match, options are 'stresses' or 'phones' or 'letters' (the default)
|
|
43083
|
+
* @return {string[]} an array of words matching the criteria in both the pattern and the options object
|
|
43084
|
+
*/
|
|
43085
|
+
static searchSync(pattern, options) {
|
|
43086
|
+
return _RiTa.lexicon.searchSync(pattern, options);
|
|
43087
|
+
}
|
|
43088
|
+
/**
|
|
43089
|
+
* A synchronous version of RiTa.alliterations(). Finds alliterations by comparing the phonemes
|
|
43090
|
+
* of the input string to those of each word in the lexicon via a minimum-edit-distance metric.
|
|
43091
|
+
* @param {string} word - the word to match
|
|
43092
|
+
* @param {object} [options] - options for the search
|
|
43093
|
+
* @param {number} options.minLength=4 - the minimum length of the words
|
|
43094
|
+
* @param {number} options.maxLength - the maximum length of the words
|
|
43095
|
+
* @param {number} options.numSyllables - the number of syllables in the words
|
|
43096
|
+
* @param {number} options.limit=10 - the maximum number of results to return (pass -1 to return all matches)
|
|
43097
|
+
* @param {boolean} options.shuffle=false - whether to shuffle the results before returning them
|
|
43098
|
+
* @param {string} options.pos - the part-of-speech of the words to return, either from the Penn tag set
|
|
43099
|
+
* or the simplified tag set [a, r, v, n]
|
|
43100
|
+
* @return {string[]} an array of alliterations matching criteria in the options object
|
|
43101
|
+
*/
|
|
43102
|
+
static alliterationsSync(word, options) {
|
|
43103
|
+
return _RiTa.lexicon.alliterationsSync(word, options);
|
|
42600
43104
|
}
|
|
42601
43105
|
////////////////////////////// niapa /////////////////////////////
|
|
42602
|
-
|
|
42603
|
-
|
|
42604
|
-
|
|
42605
|
-
|
|
43106
|
+
/**
|
|
43107
|
+
* Returns a random integer from a range
|
|
43108
|
+
* The version of random() with one parameter returns a random integer from 0 up to but not including the number.
|
|
43109
|
+
* The version of random() with two parameters returns a random integer from the first number up to but not including the second.
|
|
43110
|
+
* @param {number} param1 - the first parameter
|
|
43111
|
+
* @param {number} [param2] - the second optional parameter
|
|
43112
|
+
* @returns {number} a random integer from the range
|
|
43113
|
+
*/
|
|
43114
|
+
static randi(param1, param2) {
|
|
43115
|
+
return Math.floor(_RiTa.random(...arguments));
|
|
43116
|
+
}
|
|
43117
|
+
/**
|
|
43118
|
+
* Returns a random number or a random element from an array.
|
|
43119
|
+
* The version of random() with no parameters returns a random number from 0 up to but not including 1.
|
|
43120
|
+
* The version of random() with one parameter works one of two ways. If the argument passed is a number, random() returns a random number from 0 up to but not including the number.
|
|
43121
|
+
* If the argument passed is an array, random() returns a random element from that array.
|
|
43122
|
+
* The version of random() with two parameters returns a random number from the first number up to but not including the second.
|
|
43123
|
+
* @param {number|object[]} [param1] - the minimum value of the random number, or an array of values to choose from
|
|
43124
|
+
* @param {number} [param2] - the maximum value of the random number
|
|
43125
|
+
* @returns {number|object} a random number or a random element from the array
|
|
43126
|
+
*/
|
|
43127
|
+
static random(param1, param2) {
|
|
42606
43128
|
return _RiTa.randomizer.random(...arguments);
|
|
42607
43129
|
}
|
|
42608
43130
|
};
|
|
42609
|
-
|
|
42610
|
-
|
|
42611
|
-
|
|
42612
|
-
|
|
42613
|
-
|
|
42614
|
-
|
|
42615
|
-
|
|
42616
|
-
|
|
42617
|
-
|
|
42618
|
-
|
|
42619
|
-
|
|
42620
|
-
|
|
42621
|
-
|
|
42622
|
-
|
|
42623
|
-
|
|
42624
|
-
|
|
42625
|
-
|
|
42626
|
-
|
|
42627
|
-
|
|
42628
|
-
|
|
42629
|
-
|
|
42630
|
-
|
|
42631
|
-
|
|
42632
|
-
|
|
42633
|
-
|
|
42634
|
-
|
|
42635
|
-
|
|
42636
|
-
|
|
42637
|
-
|
|
42638
|
-
|
|
42639
|
-
|
|
42640
|
-
|
|
42641
|
-
|
|
42642
|
-
|
|
42643
|
-
|
|
42644
|
-
|
|
42645
|
-
|
|
42646
|
-
|
|
42647
|
-
|
|
42648
|
-
|
|
42649
|
-
RiTa2.GERUND = 2;
|
|
42650
|
-
RiTa2.SPLIT_CONTRACTIONS = false;
|
|
43131
|
+
RiTa.RiGrammar = RiGrammar;
|
|
43132
|
+
RiTa.RiMarkov = markov_default;
|
|
43133
|
+
RiTa.Stemmer = stemmer_default;
|
|
43134
|
+
RiTa.randomizer = new randgen_default();
|
|
43135
|
+
RiTa.tagger = new tagger_default(RiTa);
|
|
43136
|
+
RiTa.analyzer = new analyzer_default(RiTa);
|
|
43137
|
+
RiTa.concorder = new concorder_default(RiTa);
|
|
43138
|
+
RiTa.tokenizer = new tokenizer_default(RiTa);
|
|
43139
|
+
RiTa.inflector = new inflector_default(RiTa);
|
|
43140
|
+
RiTa.lexicon = new lexicon_default(RiTa);
|
|
43141
|
+
RiTa.conjugator = new conjugator_default(RiTa);
|
|
43142
|
+
markov_default.parent = RiTa;
|
|
43143
|
+
stemmer_default.tokenizer = RiTa.tokenizer;
|
|
43144
|
+
RiTa.SILENT = false;
|
|
43145
|
+
RiTa.SILENCE_LTS = false;
|
|
43146
|
+
RiTa.VERSION = "3.0.22";
|
|
43147
|
+
RiTa.FIRST = 1;
|
|
43148
|
+
RiTa.SECOND = 2;
|
|
43149
|
+
RiTa.THIRD = 3;
|
|
43150
|
+
RiTa.PAST = 4;
|
|
43151
|
+
RiTa.PRESENT = 5;
|
|
43152
|
+
RiTa.FUTURE = 6;
|
|
43153
|
+
RiTa.SINGULAR = 7;
|
|
43154
|
+
RiTa.PLURAL = 8;
|
|
43155
|
+
RiTa.NORMAL = 9;
|
|
43156
|
+
RiTa.STRESS = "1";
|
|
43157
|
+
RiTa.NOSTRESS = "0";
|
|
43158
|
+
RiTa.PHONE_BOUNDARY = "-";
|
|
43159
|
+
RiTa.WORD_BOUNDARY = " ";
|
|
43160
|
+
RiTa.SYLLABLE_BOUNDARY = "/";
|
|
43161
|
+
RiTa.SENTENCE_BOUNDARY = "|";
|
|
43162
|
+
RiTa.VOWELS = "aeiou";
|
|
43163
|
+
RiTa.PHONES = ["aa", "ae", "ah", "ao", "aw", "ay", "b", "ch", "d", "dh", "eh", "er", "ey", "f", "g", "hh", "ih", "iy", "jh", "k", "l", "m", "n", "ng", "ow", "oy", "p", "r", "s", "sh", "t", "th", "uh", "uw", "v", "w", "y", "z", "zh"];
|
|
43164
|
+
RiTa.ABRV = ["Adm.", "Capt.", "Cmdr.", "Col.", "Dr.", "Gen.", "Gov.", "Lt.", "Maj.", "Messrs.", "Mr.", "Mrs.", "Ms.", "Prof.", "Rep.", "Reps.", "Rev.", "Sen.", "Sens.", "Sgt.", "Sr.", "St.", "A.k.a.", "C.f.", "I.e.", "E.g.", "Vs.", "V.", "Jan.", "Feb.", "Mar.", "Apr.", "Mar.", "Jun.", "Jul.", "Aug.", "Sept.", "Oct.", "Nov.", "Dec."];
|
|
43165
|
+
RiTa.QUESTIONS = ["was", "what", "when", "where", "which", "why", "who", "will", "would", "who", "how", "if", "is", "could", "might", "does", "are", "have"];
|
|
43166
|
+
RiTa.STOP_WORDS = ["and", "a", "of", "in", "i", "you", "is", "to", "that", "it", "for", "on", "have", "with", "this", "be", "not", "are", "as", "was", "but", "or", "from", "my", "at", "if", "they", "your", "all", "he", "by", "one", "me", "what", "so", "can", "will", "do", "an", "about", "we", "just", "would", "there", "no", "like", "out", "his", "has", "up", "more", "who", "when", "don't", "some", "had", "them", "any", "their", "it's", "only", "which", "i'm", "been", "other", "were", "how", "then", "now", "her", "than", "she", "well", "also", "us", "very", "because", "am", "here", "could", "even", "him", "into", "our", "much", "too", "did", "should", "over", "want", "these", "may", "where", "most", "many", "those", "does", "why", "please", "off", "going", "its", "i've", "down", "that's", "can't", "you're", "didn't", "another", "around", "must", "few", "doesn't", "the", "every", "yes", "each", "maybe", "i'll", "away", "doing", "oh", "else", "isn't", "he's", "there's", "hi", "won't", "ok", "they're", "yeah", "mine", "we're", "what's", "shall", "she's", "hello", "okay", "here's", "less", "didn't", "said", "over", "this", "that", "just", "then", "under", "some"];
|
|
43167
|
+
RiTa.MASS_NOUNS = ["abalone", "asbestos", "barracks", "bathos", "breeches", "beef", "britches", "chaos", "chinese", "cognoscenti", "clippers", "corps", "cosmos", "crossroads", "diabetes", "ethos", "gallows", "graffiti", "herpes", "innings", "lens", "means", "measles", "mews", "mumps", "news", "pasta", "pathos", "pincers", "pliers", "proceedings", "rabies", "rhinoceros", "sassafras", "scissors", "series", "shears", "species", "tuna", "acoustics", "aesthetics", "aquatics", "basics", "ceramics", "classics", "cosmetics", "dialectics", "deer", "dynamics", "ethics", "harmonics", "heroics", "mechanics", "metrics", "ooze", "optics", "physics", "polemics", "pyrotechnics", "statistics", "tactics", "tropics", "bengalese", "bengali", "bonsai", "booze", "cellulose", "mess", "moose", "burmese", "chinese", "colossus", "congolese", "discus", "electrolysis", "emphasis", "expertise", "flu", "fructose", "gauze", "glucose", "grease", "guyanese", "haze", "incense", "japanese", "lebanese", "malaise", "mayonnaise", "maltese", "music", "money", "menopause", "merchandise", "olympics", "overuse", "paradise", "poise", "potash", "portuguese", "prose", "recompense", "remorse", "repose", "senegalese", "siamese", "singhalese", "sleaze", "sioux", "sudanese", "suspense", "swiss", "taiwanese", "vietnamese", "unease", "aircraft", "anise", "antifreeze", "applause", "archdiocese", "apparatus", "asparagus", "bellows", "bison", "bluefish", "bourgeois", "bream", "brill", "butterfingers", "cargo", "carp", "catfish", "chassis", "clone", "clones", "clothes", "chub", "cod", "codfish", "coley", "contretemps", "crawfish", "crayfish", "cuttlefish", "dice", "dogfish", "doings", "dory", "downstairs", "eldest", "earnings", "economics", "electronics", "firstborn", "fish", "flatfish", "flounder", "fowl", "fry", "fries", "works", "goldfish", "golf", "grand", "grief", "haddock", "hake", "halibut", "headquarters", "herring", "hertz", "honey", "horsepower", "goods", "hovercraft", "ironworks", "kilohertz", "ling", "shrimp", "swine", "lungfish", "mackerel", "macaroni", "megahertz", "moorfowl", "moorgame", "mullet", "nepalese", "offspring", "pants", "patois", "pekinese", "perch", "pickerel", "pike", "potpourri", "precis", "quid", "rand", "rendezvous", "roach", "salmon", "samurai", "seychelles", "shad", "sheep", "shellfish", "smelt", "spaghetti", "spacecraft", "starfish", "stockfish", "sunfish", "superficies", "sweepstakes", "smallpox", "swordfish", "tennis", "tobacco", "triceps", "trout", "tunafish", "turbot", "trousers", "turf", "dibs", "undersigned", "waterfowl", "waterworks", "waxworks", "wildfowl", "woodworm", "yen", "aries", "pisces", "forceps", "jeans", "mathematics", "odds", "politics", "remains", "aids", "wildlife", "shall", "would", "may", "might", "ought", "should", "acne", "admiration", "advice", "air", "anger", "anticipation", "assistance", "awareness", "bacon", "baggage", "blood", "bravery", "chess", "clay", "clothing", "coal", "compliance", "comprehension", "confusion", "consciousness", "cream", "darkness", "diligence", "dust", "education", "empathy", "enthusiasm", "envy", "equality", "equipment", "evidence", "feedback", "fitness", "flattery", "foliage", "fun", "furniture", "garbage", "gold", "gossip", "grammar", "gratitude", "gravel", "guilt", "happiness", "hardware", "hate", "hay", "health", "heat", "help", "hesitation", "homework", "honesty", "honor", "honour", "hospitality", "hostility", "humanity", "humility", "ice", "immortality", "independence", "information", "integrity", "intimidation", "jargon", "jealousy", "jewelry", "justice", "knowledge", "literacy", "logic", "luck", "lumber", "luggage", "mail", "management", "milk", "morale", "mud", "nonsense", "oppression", "optimism", "oxygen", "participation", "pay", "peace", "perseverance", "pessimism", "pneumonia", "poetry", "police", "pride", "privacy", "propaganda", "public", "punctuation", "recovery", "rice", "rust", "satisfaction", "schnapps", "shame", "slang", "software", "stamina", "starvation", "steam", "steel", "stuff", "support", "sweat", "thunder", "timber", "toil", "traffic", "tongs", "training", "trash", "valor", "vehemence", "violence", "warmth", "waste", "weather", "wheat", "wisdom", "work", "accommodation", "advertising", "aid", "art", "bread", "business", "butter", "calm", "cash", "cheese", "childhood", "clothing ", "coffee", "content", "corruption", "courage", "currency", "damage", "danger", "determination", "electricity", "employment", "energy", "entertainment", "failure", "fame", "fire", "flour", "food", "freedom", "friendship", "fuel", "genetics", "hair", "harm", "hospitality ", "housework", "humour", "imagination", "importance", "innocence", "intelligence", "juice", "kindness", "labour", "lack", "laughter", "leisure", "literature", "litter", "love", "magic", "metal", "motherhood", "motivation", "nature", "nutrition", "obesity", "oil", "old age", "paper", "patience", "permission", "pollution", "poverty", "power", "production", "progress", "pronunciation", "publicity", "quality", "quantity", "racism", "rain", "relaxation", "research", "respect", "room (space)", "rubbish", "safety", "salt", "sand", "seafood", "shopping", "silence", "smoke", "snow", "soup", "speed", "spelling", "stress ", "sugar", "sunshine", "tea", "time", "tolerance", "trade", "transportation", "travel", "trust", "understanding", "unemployment", "usage", "vision", "water", "wealth", "weight", "welfare", "width", "wood", "yoga", "youth", "homecare", "childcare", "fanfare", "healthcare", "medicare"];
|
|
43168
|
+
RiTa.INFINITIVE = 1;
|
|
43169
|
+
RiTa.GERUND = 2;
|
|
43170
|
+
RiTa.SPLIT_CONTRACTIONS = false;
|
|
42651
43171
|
var ONLY_PUNCT = /^[\p{P}|\+|-|<|>|\^|\$|\ufffd|`]*$/u;
|
|
42652
43172
|
var IS_LETTER = /^[a-z\u00C0-\u00ff]+$/;
|
|
42653
|
-
|
|
42654
|
-
_riscript.RiScript
|
|
42655
|
-
RiTa2.riscript = new (0, _riscript.RiScript)({ RiTa: RiTa2 });
|
|
43173
|
+
RiTa.RiScript = _riscript.RiScript;
|
|
43174
|
+
RiTa.riscript = new (0, _riscript.RiScript)({ RiTa });
|
|
42656
43175
|
|
|
42657
43176
|
|
|
42658
|
-
exports.RiTa =
|
|
43177
|
+
exports.RiTa = RiTa;
|
|
42659
43178
|
//# sourceMappingURL=rita.cjs.map
|