npm - eyeling - Versions diffs - 1.25.0 → 1.25.2 - Mend

eyeling 1.25.0 → 1.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/browser/eyeling.browser.js +333 -110
package/eyeling.js +333 -110
package/lib/cli.js +1 -1
package/lib/engine.js +131 -41
package/lib/lexer.js +143 -49
package/lib/parser.js +24 -12
package/lib/prelude.js +34 -7
package/package.json +1 -1

package/lib/lexer.js CHANGED Viewed

@@ -30,7 +30,26 @@ class N3SyntaxError extends SyntaxError {
 }
 function isWs(c) {
-  return /\s/.test(c);
+  if (c === null || c === undefined) return false;
+  const code = c.charCodeAt(0);
+  // Fast path for the whitespace used by N3/Turtle inputs.
+  return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
+}
+function isAsciiAlphaCode(code) {
+  return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
+}
+function isAsciiDigitCode(code) {
+  return code >= 48 && code <= 57;
+}
+function isAsciiAlpha(c) {
+  return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
+}
+function isAsciiDigit(c) {
+  return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
 }
 // Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
@@ -43,13 +62,18 @@ function isWs(c) {
 //
 // We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
 function isHexDigit(c) {
-  return c !== null && /^[0-9A-Fa-f]$/.test(c);
+  if (c === null || c === undefined) return false;
+  const code = c.charCodeAt(0);
+  return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
 }
 function isPnCharsBase(c) {
   // Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
   // Covers most letters used in practice (including ñ) and common scripts.
-  return c !== null && /[A-Za-z]|\p{L}|\p{Nl}/u.test(c);
+  if (c === null || c === undefined) return false;
+  const code = c.charCodeAt(0);
+  if (isAsciiAlphaCode(code)) return true;
+  return /\p{L}|\p{Nl}/u.test(c);
 }
 function isPnCharsU(c) {
@@ -59,9 +83,11 @@ function isPnCharsU(c) {
 function isPnChars(c) {
   // PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
-  if (c === null) return false;
+  if (c === null || c === undefined) return false;
+  const code = c.charCodeAt(0);
+  if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
   if (isPnCharsU(c)) return true;
-  if (c === '-' || /[0-9]/.test(c) || c === '\u00B7') return true;
+  if (c === '\u00B7') return true;
   const cp = c.codePointAt(0);
   return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
 }
@@ -1163,10 +1189,23 @@ function normalizeRdfCompatibility(inputText) {
   return text;
 }
+function isNumericLikeIdentifier(word) {
+  if (typeof word !== 'string' || word.length === 0) return false;
+  for (let j = 0; j < word.length; j++) {
+    const code = word.charCodeAt(j);
+    if (!((code >= 48 && code <= 57) || code === 46 || code === 45)) return false;
+  }
+  return true;
+}
 function lex(inputText, opts = {}) {
   const rdf = !!(opts && opts.rdf);
   if (rdf) inputText = normalizeRdfCompatibility(inputText);
-  const chars = Array.from(inputText);
+  // Avoid copying large ASCII/BMP inputs into an Array.  Array.from() is
+  // only needed when the text contains surrogate pairs and we want the old
+  // code-point iteration behavior for non-BMP characters.
+  const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
   const n = chars.length;
   let i = 0;
   const tokens = [];
@@ -1182,19 +1221,29 @@ function lex(inputText, opts = {}) {
   // - Accepts percent escapes (%HH) as PLX fragments.
   // - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
   // - Accepts '.' inside a name only when it is not terminal.
+  function sliceChars(start, end) {
+    return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
+  }
   function readIdentText(startOffsetForErrors) {
-    const out = [];
+    const start = i;
+    let out = null;
+    function appendRawUntilHere() {
+      if (out === null) out = [sliceChars(start, i)];
+    }
     while (i < n) {
-      const cc = peek();
-      if (cc === null || isWs(cc)) break;
+      const cc = chars[i];
+      if (cc === null || cc === undefined || isWs(cc)) break;
       // Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
-      if ('{}()[];,'.includes(cc)) break;
+      if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
       // Dot is allowed inside PN_LOCAL, but not at the end.
       if (cc === '.') {
         if (!canContinueAfterDot(peek(1))) break;
-        out.push('.');
+        if (out !== null) out.push('.');
         i++;
         continue;
       }
@@ -1209,6 +1258,7 @@ function lex(inputText, opts = {}) {
             typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
           );
         }
+        appendRawUntilHere();
         out.push('%', h1, h2);
         i += 3;
         continue;
@@ -1218,6 +1268,7 @@ function lex(inputText, opts = {}) {
       if (cc === '\\') {
         const esc = peek(1);
         if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
+          appendRawUntilHere();
           out.push(esc); // decoded form
           i += 2;
           continue;
@@ -1229,14 +1280,14 @@ function lex(inputText, opts = {}) {
       }
       if (isIdentChar(cc)) {
-        out.push(cc);
+        if (out !== null) out.push(cc);
         i++;
         continue;
       }
       break;
     }
-    return out.join('');
+    return out === null ? sliceChars(start, i) : out.join('');
   }
   while (i < n) {
@@ -1315,22 +1366,47 @@ function lex(inputText, opts = {}) {
       continue;
     }
-    // 5) Single-character punctuation
-    if ('{}()[];,.'.includes(c)) {
-      const mapping = {
-        '{': 'LBrace',
-        '}': 'RBrace',
-        '(': 'LParen',
-        ')': 'RParen',
-        '[': 'LBracket',
-        ']': 'RBracket',
-        ';': 'Semicolon',
-        ',': 'Comma',
-        '.': 'Dot',
-      };
-      tokens.push(new Token(mapping[c], null, i));
-      i++;
-      continue;
+    // 5) Single-character punctuation.  Use a switch rather than allocating a
+    // mapping object for every punctuation token in large inputs.
+    switch (c) {
+      case '{':
+        tokens.push(new Token('LBrace', null, i));
+        i++;
+        continue;
+      case '}':
+        tokens.push(new Token('RBrace', null, i));
+        i++;
+        continue;
+      case '(':
+        tokens.push(new Token('LParen', null, i));
+        i++;
+        continue;
+      case ')':
+        tokens.push(new Token('RParen', null, i));
+        i++;
+        continue;
+      case '[':
+        tokens.push(new Token('LBracket', null, i));
+        i++;
+        continue;
+      case ']':
+        tokens.push(new Token('RBracket', null, i));
+        i++;
+        continue;
+      case ';':
+        tokens.push(new Token('Semicolon', null, i));
+        i++;
+        continue;
+      case ',':
+        tokens.push(new Token('Comma', null, i));
+        i++;
+        continue;
+      case '.':
+        tokens.push(new Token('Dot', null, i));
+        i++;
+        continue;
+      default:
+        break;
     }
     // String literal: short "..." or long """..."""
@@ -1389,26 +1465,36 @@ function lex(inputText, opts = {}) {
         continue;
       }
-      // Short string literal " ... "
+      // Short string literal " ... ".  Most data files contain plain
+      // unescaped labels; keep that path slice-based and avoid building an
+      // intermediate character array + raw quoted string.
       i++; // consume opening "
-      const sChars = [];
+      const contentStart = i;
+      let sChars = null;
+      let closed = false;
       while (i < n) {
         const cc = chars[i];
         i++;
         if (cc === '\\') {
+          if (sChars === null) sChars = [sliceChars(contentStart, i - 1)];
           if (i < n) {
             const esc = chars[i];
             i++;
             sChars.push('\\');
             sChars.push(esc);
+          } else {
+            sChars.push('\\');
           }
           continue;
         }
-        if (cc === '"') break;
-        sChars.push(cc);
+        if (cc === '"') {
+          closed = true;
+          break;
+        }
+        if (sChars !== null) sChars.push(cc);
       }
-      const raw = '"' + sChars.join('') + '"';
-      const decoded = decodeN3StringEscapes(stripQuotes(raw), start);
+      const rawContent = sChars === null ? sliceChars(contentStart, closed ? i - 1 : i) : sChars.join('');
+      const decoded = sChars === null ? rawContent : decodeN3StringEscapes(rawContent, start);
       assertValidStringLiteralValue(decoded, start);
       const s = JSON.stringify(decoded); // canonical short quoted form
       tokens.push(new Token('Literal', s, start));
@@ -1473,24 +1559,32 @@ function lex(inputText, opts = {}) {
       // Short string literal ' ... '
       i++; // consume opening '
-      const sChars = [];
+      const contentStart = i;
+      let sChars = null;
+      let closed = false;
       while (i < n) {
         const cc = chars[i];
         i++;
         if (cc === '\\') {
+          if (sChars === null) sChars = [sliceChars(contentStart, i - 1)];
           if (i < n) {
             const esc = chars[i];
             i++;
             sChars.push('\\');
             sChars.push(esc);
+          } else {
+            sChars.push('\\');
           }
           continue;
         }
-        if (cc === "'") break;
-        sChars.push(cc);
+        if (cc === "'") {
+          closed = true;
+          break;
+        }
+        if (sChars !== null) sChars.push(cc);
       }
-      const raw = "'" + sChars.join('') + "'";
-      const decoded = decodeN3StringEscapes(stripQuotes(raw), start);
+      const rawContent = sChars === null ? sliceChars(contentStart, closed ? i - 1 : i) : sChars.join('');
+      const decoded = sChars === null ? rawContent : decodeN3StringEscapes(rawContent, start);
       assertValidStringLiteralValue(decoded, start);
       const s = JSON.stringify(decoded); // canonical short quoted form
       tokens.push(new Token('Literal', s, start));
@@ -1523,10 +1617,10 @@ function lex(inputText, opts = {}) {
         //   "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
         const tagChars = [];
         let cc = peek();
-        if (cc === null || !/[A-Za-z]/.test(cc)) {
+        if (cc === null || !isAsciiAlpha(cc)) {
           throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
         }
-        while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
+        while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
           tagChars.push(cc);
           i++;
         }
@@ -1550,7 +1644,7 @@ function lex(inputText, opts = {}) {
       // Otherwise, treat as a directive (@prefix, @base)
       const wordChars = [];
       let cc;
-      while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
+      while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
         wordChars.push(cc);
         i++;
       }
@@ -1562,19 +1656,19 @@ function lex(inputText, opts = {}) {
     }
     // 6) Numeric literal (integer or float)
-    if (/[0-9]/.test(c) || (c === '-' && peek(1) !== null && /[0-9]/.test(peek(1)))) {
+    if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
       const start = i;
       const numChars = [c];
       i++;
       while (i < n) {
         const cc = chars[i];
-        if (/[0-9]/.test(cc)) {
+        if (isAsciiDigit(cc)) {
           numChars.push(cc);
           i++;
           continue;
         }
         if (cc === '.') {
-          if (i + 1 < n && /[0-9]/.test(chars[i + 1])) {
+          if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
             numChars.push('.');
             i++;
             continue;
@@ -1589,14 +1683,14 @@ function lex(inputText, opts = {}) {
       if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
         let j = i + 1;
         if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
-        if (j < n && /[0-9]/.test(chars[j])) {
+        if (j < n && isAsciiDigit(chars[j])) {
           numChars.push(chars[i]); // e/E
           i++;
           if (i < n && (chars[i] === '+' || chars[i] === '-')) {
             numChars.push(chars[i]);
             i++;
           }
-          while (i < n && /[0-9]/.test(chars[i])) {
+          while (i < n && isAsciiDigit(chars[i])) {
             numChars.push(chars[i]);
             i++;
           }
@@ -1615,7 +1709,7 @@ function lex(inputText, opts = {}) {
     }
     if (word === 'true' || word === 'false') {
       tokens.push(new Token('Literal', word, start));
-    } else if ([...word].every((ch) => /[0-9.-]/.test(ch))) {
+    } else if (isNumericLikeIdentifier(word)) {
       tokens.push(new Token('Literal', word, start));
     } else {
       tokens.push(new Token('Ident', word, start));

package/lib/parser.js CHANGED Viewed

@@ -86,7 +86,15 @@ class Parser {
   }
   isIdentKeyword(tok, keyword) {
-    return tok && tok.typ === 'Ident' && typeof tok.value === 'string' && tok.value.toLowerCase() === keyword;
+    if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
+    const v = tok.value;
+    if (v.length !== keyword.length) return false;
+    for (let i = 0; i < keyword.length; i++) {
+      const code = v.charCodeAt(i);
+      const lower = code >= 65 && code <= 90 ? code + 32 : code;
+      if (lower !== keyword.charCodeAt(i)) return false;
+    }
+    return true;
   }
   canStartSparqlPrefixDirective() {
@@ -233,7 +241,7 @@ class Parser {
     } else if (tok2.typ === 'Ident') {
       const qn = tok2.value || '';
       if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok2, qn);
-      assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), tok2, '@prefix directive IRI');
+      assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok2, '@prefix directive IRI');
       iri = this.prefixes.expandQName(qn);
     } else {
       this.fail(`Expected IRI after @prefix, got ${tok2.toString()}`, tok2);
@@ -250,7 +258,7 @@ class Parser {
     } else if (tok.typ === 'Ident') {
       const qn = tok.value || '';
       if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok, qn);
-      assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), tok, '@base directive IRI');
+      assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok, '@base directive IRI');
       iri = this.prefixes.expandQName(qn);
     } else {
       this.fail(`Expected IRI after @base, got ${tok.toString()}`, tok);
@@ -279,7 +287,7 @@ class Parser {
     } else if (tok2.typ === 'Ident') {
       const qn = tok2.value || '';
       if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok2, qn);
-      assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), tok2, '@prefix directive IRI');
+      assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok2, '@prefix directive IRI');
       iri = this.prefixes.expandQName(qn);
     } else {
       this.fail(`Expected IRI after PREFIX, got ${tok2.toString()}`, tok2);
@@ -300,7 +308,7 @@ class Parser {
     } else if (tok.typ === 'Ident') {
       const qn = tok.value || '';
       if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok, qn);
-      assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), tok, 'BASE directive IRI');
+      assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok, 'BASE directive IRI');
       iri = this.prefixes.expandQName(qn);
     } else {
       this.fail(`Expected IRI after BASE, got ${tok.toString()}`, tok);
@@ -347,14 +355,18 @@ class Parser {
       const name = val || '';
       if (name === 'a') {
         return internIri(RDF_NS + 'type');
-      } else if (name.startsWith('_:')) {
+      }
+      const sep = name.indexOf(':');
+      if (sep === 1 && name.charCodeAt(0) === 95) {
         return new Blank(name);
-      } else if (name.includes(':')) {
-        assertValidQNamePrefix(name.split(':', 1)[0], this.fail.bind(this), tok);
-        return internIri(this.prefixes.expandQName(name));
-      } else {
-        failInvalidKeywordLikeIdent(this.fail.bind(this), tok, name);
       }
+      if (sep >= 0) {
+        const prefixName = name.slice(0, sep);
+        assertValidQNamePrefix(prefixName, this.fail.bind(this), tok);
+        const base = this.prefixes.map[prefixName] || '';
+        return internIri(base ? base + name.slice(sep + 1) : name);
+      }
+      failInvalidKeywordLikeIdent(this.fail.bind(this), tok, name);
     }
     if (typ === 'Literal') {
@@ -385,7 +397,7 @@ class Parser {
         } else if (dtTok.typ === 'Ident') {
           const qn = dtTok.value || '';
           if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), dtTok, qn);
-          assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), dtTok, 'datatype prefixed name');
+          assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), dtTok, 'datatype prefixed name');
           dtIri = this.prefixes.expandQName(qn);
         } else {
           this.fail(`Expected datatype after ^^, got ${dtTok.toString()}`, dtTok);

package/lib/prelude.js CHANGED Viewed

@@ -250,21 +250,40 @@ function literalParts(lit) {
 // equality fast-paths than repeated string key construction.
 let __nextTid = 1;
-const __tidIntern = new Map(); // string key -> number
+const __tidIntern = new Map(); // legacy generic key -> number
+const __iriTidIntern = new Map(); // IRI value -> number
+const __blankTidIntern = new Map(); // blank label -> number
+const __literalTidIntern = new Map(); // normalized literal lexical form -> number
 // Avoid storing extremely large literal keys in the global term-id intern map.
 // For huge literals we still assign a unique __tid, but we do not intern the key.
 const MAX_LITERAL_TID_LEN = 1024;
-function __getTid(key) {
-  let id = __tidIntern.get(key);
+function __getTidFromMap(map, key) {
+  let id = map.get(key);
   if (!id) {
     id = __nextTid++;
-    __tidIntern.set(key, id);
+    map.set(key, id);
   }
   return id;
 }
+function __getTid(key) {
+  return __getTidFromMap(__tidIntern, key);
+}
+function __getIriTid(value) {
+  return __getTidFromMap(__iriTidIntern, value);
+}
+function __getBlankTid(label) {
+  return __getTidFromMap(__blankTidIntern, label);
+}
+function __getLiteralTid(norm) {
+  return __getTidFromMap(__literalTidIntern, norm);
+}
 function __isQuotedLexical(lit) {
   if (typeof lit !== 'string') return false;
   if (lit.length >= 6) {
@@ -310,6 +329,14 @@ function __isPlainStringLiteralValue(lit) {
 function normalizeLiteralForTid(lit) {
   // Canonicalize so that plain string and explicit xsd:string share the same id.
   if (typeof lit !== 'string') return lit;
+  // Fast path for the overwhelmingly common lexer output for plain string
+  // literals: a canonical JSON-style quoted lexical form with no suffix.
+  // This avoids literalParts()/language-tag parsing for large fact tables.
+  if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
+    return `${lit}^^<${XSD_NS}string>`;
+  }
   const [lex, dt] = literalParts(lit);
   if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
   if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
@@ -327,7 +354,7 @@ class Iri extends Term {
     super();
     this.value = value;
     Object.defineProperty(this, '__tid', {
-      value: __getTid('I:' + value),
+      value: __getIriTid(value),
       enumerable: false,
     });
   }
@@ -339,7 +366,7 @@ class Literal extends Term {
     this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
     const norm = normalizeLiteralForTid(value);
     const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
-    const tid = useIntern ? __getTid('L:' + norm) : __nextTid++;
+    const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
     Object.defineProperty(this, '__tid', {
       value: tid,
       enumerable: false,
@@ -359,7 +386,7 @@ class Blank extends Term {
     super();
     this.label = label; // _:b1, etc.
     Object.defineProperty(this, '__tid', {
-      value: __getTid('B:' + label),
+      value: __getBlankTid(label),
       enumerable: false,
     });
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "eyeling",
-  "version": "1.25.0",
+  "version": "1.25.2",
   "description": "A minimal Notation3 (N3) reasoner in JavaScript.",
   "main": "./index.js",
   "keywords": [