npm - rita - Versions diffs - 3.1.1 → 3.1.3 - Mend

rita 3.1.1 → 3.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -1,3 +1,6 @@
+![ritajs](pen.jpg)
 <a href="https://github.com/dhowe/rjs3/actions"><img src="https://github.com/dhowe/rjs3/actions/workflows/node.js.yml/badge.svg" alt="ci tests"></a> <a href="https://www.npmjs.com/package/rita"><img src="https://img.shields.io/npm/v/rita.svg" alt="npm version"></a> <a href="https://www.gnu.org/licenses/gpl-3.0.en.html"><img src="https://img.shields.io/badge/license-GPL-orange.svg" alt="license"></a>
@@ -22,6 +25,7 @@ Note: version 3.0 contains breaking changes -- please check the [release notes](
 * For [node](#with-nodejs-and-npm): `$ npm install rita`
    ```let { RiTa }  = require('rita');```
 * For [browsers](#a-simple-browser-sketch): ```<script src="https://unpkg.com/rita"></script>```
+* For [p5.js](#with-p5js): ```<script src="https://unpkg.com/rita"></script>```
 * For [developers](#developing)

package/dist/rita.cjs CHANGED Viewed

@@ -874,6 +874,7 @@ var Tokenizer = class {
   }
   tokenize(input, opts = {
     // regex: null,
+    // debug: false,
     // splitHyphens: false,
     // splitContractions: false
   }) {
@@ -883,7 +884,11 @@ var Tokenizer = class {
       return input.split(opts.regex);
     let { tags, text } = this.pushTags(input.trim());
     for (let i = 0; i < TOKENIZE_RE.length; i += 2) {
+      if (opts.debug)
+        var pre = text;
       text = text.replace(TOKENIZE_RE[i], TOKENIZE_RE[i + 1]);
+      if (opts.debug && text !== pre)
+        console.log("HIT" + i, pre + " -> " + text, TOKENIZE_RE[i], TOKENIZE_RE[i + 1]);
     }
     if (opts.splitHyphens) {
       text = text.replace(/([a-zA-Z]+)-([a-zA-Z]+)/g, "$1 - $2");
@@ -1105,85 +1110,85 @@ var APOS_RE = /^[\u2019']+$/;
 var NL_RE = /(\r?\n)+/g;
 var WWW_RE = /^(www[0-9]?|WWW[0-9]?)$/;
 var NOSP_BF_PUNCT_RE = /^[,\.\;\:\?\!\)""\u201c\u201d\u2019\u2018`'%\u2026\u2103\^\*\u00b0\/\u2044\u2012\u2013\u2014\-@]+$/;
-var LINEBREAK_RE = /[\n\r\036]/;
+var LINEBREAK_RE = /\r?\n/;
 var URL_RE = /((http[s]?):(\/\/))?([-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b)([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*)/;
 var EMAIL_RE = /^\w+([\.-]?\w+)*@\w+([\.-]?\w+)*(\.\w{2,3})+$/;
 var TOKENIZE_RE = [
   // save  --------
-  /([Ee])[.]([Gg])[.]/g,
+  /\b([Ee])[.]([Gg])[.]/g,
   "_$1$2_",
   //E.g
-  /([Ii])[.]([Ee])[.]/g,
+  /\b([Ii])[.]([Ee])[.]/g,
   "_$1$2_",
   //i.e
-  /([Aa])[.]([Mm])[.]/g,
+  /\b([Aa])[.]([Mm])[.]/g,
   "_$1$2_",
   //a.m.
-  /([Pp])[.]([Mm])[.]/g,
+  /\b([Pp])[.]([Mm])[.]/g,
   "_$1$2_",
   //p.m.
-  /(Cap)[\.]/g,
+  /\b(Cap)[\.]/g,
   "_Cap_",
   //Cap.
-  /([Cc])[\.]/g,
+  /\b([Cc])[\.]/g,
   "_$1_",
   //c.
-  /([Ee][Tt])[\s]([Aa][Ll])[\.]/,
+  /\b([Ee][Tt])[\s]([Aa][Ll])[\.]/,
   "_$1zzz$2_",
   // et al.
-  /(etc|ETC)[\.]/g,
+  /\b(etc|ETC)[\.]/g,
   "_$1_",
   //etc.
-  /([Pp])[\.]([Ss])[\.]/g,
+  /\b([Pp])[\.]([Ss])[\.]/g,
   "_$1$2dot_",
   // p.s.
-  /([Pp])[\.]([Ss])/g,
+  /\b([Pp])[\.]([Ss])/g,
   "_$1$2_",
   // p.s
-  /([Pp])([Hh])[\.]([Dd])/g,
+  /\b([Pp])([Hh])[\.]([Dd])/g,
   "_$1$2$3_",
   // Ph.D
-  /([Rr])[\.]([Ii])[\.]([Pp])/g,
+  /\b([Rr])[\.]([Ii])[\.]([Pp])/g,
   "_$1$2$3_",
   // R.I.P
-  /([Vv])([Ss]?)[\.]/g,
+  /\b([Vv])([Ss]?)[\.]/g,
   "_$1$2_",
   // vs. and v.
-  /([Mm])([Rr]|[Ss]|[Xx])[\.]/g,
+  /\b([Mm])([Rr]|[Ss]|[Xx])\./g,
   "_$1$2_",
   // Mr. Ms. and Mx.
-  /([Dd])([Rr])[\.]/g,
+  /\b([Dd])([Rr])[\.]/g,
   "_$1$2_",
   // Dr.
-  /([Pp])([Ff])[\.]/g,
+  /\b([Pp])([Ff])[\.]/g,
   "_$1$2_",
   // Pf.
-  /([Ii])([Nn])([Dd]|[Cc])[\.]/g,
+  /\b([Ii])([Nn])([Dd]|[Cc])[\.]/g,
   "_$1$2$3_",
   // Ind. and Inc.
-  /([Cc])([Oo])[\.][\,][\s]([Ll])([Tt])([Dd])[\.]/g,
+  /\b([Cc])([Oo])[\.][\,][\s]([Ll])([Tt])([Dd])[\.]/g,
   "_$1$2dcs$3$4$5_",
   // co., ltd.
-  /([Cc])([Oo])[\.][\s]([Ll])([Tt])([Dd])[\.]/g,
+  /\b([Cc])([Oo])[\.][\s]([Ll])([Tt])([Dd])[\.]/g,
   "_$1$2ds$3$4$5_",
   // co. ltd.
-  /([Cc])([Oo])[\.][\,]([Ll])([Tt])([Dd])[\.]/g,
+  /\b([Cc])([Oo])[\.][\,]([Ll])([Tt])([Dd])[\.]/g,
   "_$1$2dc$3$4$5_",
   // co.,ltd.
-  /([Cc])([Oo])([Rr]?)([Pp]?)[\.]/g,
+  /\b([Cc])([Oo])([Rr]?)([Pp]?)[\.]/g,
   "_$1$2$3$4_",
   // Corp. and Co.
-  /([Ll])([Tt])([Dd])[\.]/g,
+  /\b([Ll])([Tt])([Dd])[\.]/g,
   "_$1$2$3_",
   // ltd.
-  /(prof|Prof|PROF)[\.]/g,
+  /\b(prof|Prof|PROF)[\.]/g,
   "_$1_",
   //Prof.
   //   /(\w+([\.-_]?\w+)*)@(\w+([\.-_]?\w+)*)\.(\w{2,3})/g, "$1__AT__$3.$5", //email addresses
   // /^\w+([\.-]?\w+)+@\w+([\.:]?\w+)+(\.[a-zA-Z0-9]{2,3})+$/g, "$1__AT__$2", //email addresses
-  /([\w.]+)@(\w+\.\w+)/g,
+  /\b([\w.]+)@(\w+\.\w+)/g,
   "$1__AT__$2",
-  /((http[s]?):(\/\/))([-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b)([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*)/g,
+  /\b((http[s]?):(\/\/))([-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b)([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*)/g,
   "$2COLON$3$4$5",
   //urls with http(s)
   //decimal #
@@ -1212,12 +1217,10 @@ var TOKENIZE_RE = [
   /\r/g,
   " _CARRIAGERETURN_ ",
   // CR
-  /\036/g,
-  " _RECORDSEPARATOR_ ",
-  // RS
+  ///\036/g, " _RECORDSEPARATOR_ ", // RS
   //--------------------------
   /\.\.\.\s/g,
-  "_elipsisDDD_ ",
+  "_elipsis_ ",
   /([\?!\"\u201C\.,;:@#$%&])/g,
   " $1 ",
   /\u2026/g,
@@ -1259,7 +1262,7 @@ var TOKENIZE_RE = [
   " ^ ",
   /\u00b0/g,
   " \xB0 ",
-  /_elipsisDDD_/g,
+  /_elipsis_/g,
   " ... ",
   //pop ------------------
   /_([Ee])([Gg])_/g,
@@ -1356,30 +1359,29 @@ var TOKENIZE_RE = [
   "\r\n",
   // CR LF
   /_LINEFEEDCARRIAGERETURN_/g,
-  "\n\r",
+  "\n\r"
   // LF CR
-  /_RECORDSEPARATOR_/g,
-  "\\036"
-  // RS
+  ///_RECORDSEPARATOR_/g, "\\036", // RS
 ];
 var CONTRACTS_RE = [
   // TODO: 'She'd have wanted' -> 'She would have wanted'
-  /([Cc])an['\u2019]t/g,
+  // WORKING HERE: add word boundaries \b to these
+  /\b([Cc])an['\u2019]t/g,
   "$1an not",
-  /([Dd])idn['\u2019]t/g,
+  /\b([Dd])idn['\u2019]t/g,
   "$1id not",
-  /([CcWw])ouldn['\u2019]t/g,
+  /\b([CcWw])ouldn['\u2019]t/g,
   "$1ould not",
-  /([Ss])houldn['\u2019]t/g,
+  /\b([Ss])houldn['\u2019]t/g,
   "$1hould not",
-  /([Ii])t['\u2019]s/g,
+  /\b([Ii])t['\u2019]s/g,
   "$1t is",
-  /([tT]hat)['\u2019]s/g,
+  /\b([tT]hat)['\u2019]s/g,
   "$1 is",
-  /(she|he|you|they|i)['\u2019]d/gi,
+  /\b(she|he|you|they|i)['\u2019]d/gi,
   "$1 had",
   // changed from would, 12/8/23
-  /(she|he|you|they|i)['\u2019]ll/gi,
+  /\b(she|he|you|they|i)['\u2019]ll/gi,
   "$1 will",
   /n['\u2019]t /g,
   " not ",
@@ -1444,11 +1446,10 @@ var Conjugator = class {
     });
     this.RiTa = parent;
     this._reset();
-    this.RiTa.search({ pos: "v", limit: -1, minLength: -1 }).then((res) => {
-      this.allVerbs = res;
-      this.verbsEndingInE = res.filter((v) => v.endsWith("e"));
-      this.verbsEndingInDouble = res.filter((v) => /([^])\1$/.test(v));
-    });
+    let data = this.RiTa.lexicon.data;
+    this.allVerbs = Object.keys(data).filter((word) => data[word][1].split(" ").includes("vb"));
+    this.verbsEndingInE = this.allVerbs.filter((v) => v.endsWith("e"));
+    this.verbsEndingInDouble = this.allVerbs.filter((v) => /([^])\1$/.test(v));
   }
   // TODO: add handling of past tense modals.
   conjugate(verb, args) {
@@ -27402,7 +27403,7 @@ var Lexicon = class {
       }
       regex = opts.regex;
       if (typeof regex === "string") {
-        if (opts && opts.type === "stresses") {
+        if (opts && /^stress(es)?$/.test(opts.type)) {
           if (/^\^?[01]+\$?$/.test(regex)) {
             regex = regex.replace(/([01])(?=([01]))/g, "$1/");
           }
@@ -43238,7 +43239,7 @@ markov_default.parent = RiTa;
 stemmer_default.tokenizer = RiTa.tokenizer;
 RiTa.SILENT = false;
 RiTa.SILENCE_LTS = false;
-RiTa.VERSION = "3.1.1";
+RiTa.VERSION = "3.1.3";
 RiTa.FIRST = 1;
 RiTa.SECOND = 2;
 RiTa.THIRD = 3;