eyeling 1.25.0 → 1.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4825,7 +4825,7 @@ function main() {
4825
4825
  parseN3Text(text, {
4826
4826
  baseIri: __sourceLabelToBaseIri(sourceLabel),
4827
4827
  label: sourceLabel,
4828
- collectUsedPrefixes: true,
4828
+ collectUsedPrefixes: streamMode,
4829
4829
  keepSourceArtifacts: false,
4830
4830
  rdf: rdfMode,
4831
4831
  }),
@@ -6536,11 +6536,13 @@ function termFastKey(t) {
6536
6536
  if (t instanceof Iri || t instanceof Blank) return t.__tid;
6537
6537
 
6538
6538
  if (t instanceof Literal) {
6539
- // Very large literals intentionally skip global interning in prelude.js to
6540
- // avoid retaining huge strings forever. Their per-object __tid is therefore
6541
- // not value-stable, so using it here breaks duplicate detection for facts
6542
- // such as long log:outputString blocks that are re-derived during forward
6543
- // chaining. Fall back to a value-based key in that case.
6539
+ // Literal construction already computed a value-stable __tid for ordinary
6540
+ // short literals. Avoid re-running literalParts()/datatype normalization
6541
+ // while building fact indexes; on data-heavy inputs this is a hot path.
6542
+ // Only the rare over-sized literal needs the value-based fallback because
6543
+ // prelude intentionally gives such literals per-object ids to avoid
6544
+ // retaining huge strings in the global interner.
6545
+ if (typeof t.value !== 'string' || t.value.length + 64 <= MAX_LITERAL_TID_LEN) return t.__tid;
6544
6546
  const norm = normalizeLiteralForTid(t.value);
6545
6547
  if (typeof norm === 'string' && norm.length > MAX_LITERAL_TID_LEN) return 'L:' + norm;
6546
6548
  return t.__tid;
@@ -6627,17 +6629,28 @@ function ensureFactIndexes(facts) {
6627
6629
  enumerable: false,
6628
6630
  writable: true,
6629
6631
  });
6632
+ Object.defineProperty(facts, '__keySetComplete', {
6633
+ value: false,
6634
+ enumerable: false,
6635
+ writable: true,
6636
+ });
6630
6637
 
6631
- for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i);
6638
+ // Build lookup indexes eagerly, but do not populate the duplicate-detection
6639
+ // string Set for every input fact. The predicate/subject/object indexes are
6640
+ // enough to verify duplicates when needed; avoiding 100k+ joined string keys
6641
+ // saves substantial time and GC on data-heavy query workloads.
6642
+ for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i, false);
6632
6643
  }
6633
6644
 
6634
- function indexFact(facts, tr, idx) {
6645
+ function indexFact(facts, tr, idx, addKeySet = true) {
6635
6646
  const sk = termFastKey(tr.s);
6636
6647
  const ok = termFastKey(tr.o);
6648
+ let pkForKey = null;
6637
6649
 
6638
6650
  if (tr.p instanceof Iri) {
6639
6651
  // Use predicate term id as the primary key to avoid hashing long IRI strings.
6640
6652
  const pk = tr.p.__tid;
6653
+ pkForKey = pk;
6641
6654
 
6642
6655
  let pb = facts.__byPred.get(pk);
6643
6656
  if (!pb) {
@@ -6695,8 +6708,10 @@ function indexFact(facts, tr, idx) {
6695
6708
  }
6696
6709
  }
6697
6710
 
6698
- const key = tripleFastKey(tr);
6699
- if (key !== null) facts.__keySet.add(key);
6711
+ if (addKeySet && sk !== null && ok !== null) {
6712
+ if (pkForKey === null) pkForKey = termFastKey(tr.p);
6713
+ if (pkForKey !== null) facts.__keySet.add(sk + '\t' + pkForKey + '\t' + ok);
6714
+ }
6700
6715
  }
6701
6716
 
6702
6717
  function candidateFacts(facts, goal) {
@@ -6758,7 +6773,10 @@ function hasFactIndexed(facts, tr) {
6758
6773
  ensureFactIndexes(facts);
6759
6774
 
6760
6775
  const key = tripleFastKey(tr);
6761
- if (key !== null) return facts.__keySet.has(key);
6776
+ if (key !== null) {
6777
+ if (facts.__keySet.has(key)) return true;
6778
+ if (facts.__keySetComplete) return false;
6779
+ }
6762
6780
 
6763
6781
  if (tr.p instanceof Iri) {
6764
6782
  const pk = tr.p.__tid;
@@ -6788,7 +6806,7 @@ function pushFactIndexed(facts, tr) {
6788
6806
  ensureFactIndexes(facts);
6789
6807
  const idx = facts.length;
6790
6808
  facts.push(tr);
6791
- indexFact(facts, tr, idx);
6809
+ indexFact(facts, tr, idx, true);
6792
6810
  }
6793
6811
 
6794
6812
  function makeDerivedRecord(fact, rule, premises, subst, captureExplanations) {
@@ -8325,11 +8343,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8325
8343
  const varGen = [0];
8326
8344
  const skCounter = [0];
8327
8345
 
8328
- // Speed up dynamic rule promotion by maintaining O(1) membership sets.
8329
- // (Some workloads derive many rule-producing triples.)
8330
-
8331
- __ensureRuleKeySet(forwardRules);
8332
- __ensureRuleKeySet(backRules);
8346
+ // Rule-key sets are only needed if a program actually derives rule-producing
8347
+ // triples. Building them eagerly is expensive on large static rule sets, so
8348
+ // dynamic-promotion sites create them lazily before duplicate checks.
8333
8349
 
8334
8350
  // Cache head blank-node skolemization per (rule firing, head blank label).
8335
8351
  // This prevents repeatedly generating fresh _:sk_N blanks for the *same*
@@ -8508,8 +8524,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8508
8524
  newRule.conclusion,
8509
8525
  newRule.__dynamicConclusionTerm || null,
8510
8526
  );
8511
- if (!forwardRules.__ruleKeySet.has(key)) {
8512
- forwardRules.__ruleKeySet.add(key);
8527
+ const forwardRuleKeySet = __ensureRuleKeySet(forwardRules);
8528
+ if (!forwardRuleKeySet.has(key)) {
8529
+ forwardRuleKeySet.add(key);
8513
8530
  forwardRules.push(newRule);
8514
8531
  rulesChanged = true;
8515
8532
  }
@@ -8523,8 +8540,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8523
8540
  newRule.conclusion,
8524
8541
  newRule.__dynamicConclusionTerm || null,
8525
8542
  );
8526
- if (!backRules.__ruleKeySet.has(key)) {
8527
- backRules.__ruleKeySet.add(key);
8543
+ const backRuleKeySet = __ensureRuleKeySet(backRules);
8544
+ if (!backRuleKeySet.has(key)) {
8545
+ backRuleKeySet.add(key);
8528
8546
  backRules.push(newRule);
8529
8547
  indexBackRule(backRules, newRule);
8530
8548
  rulesChanged = true;
@@ -9462,7 +9480,26 @@ class N3SyntaxError extends SyntaxError {
9462
9480
  }
9463
9481
 
9464
9482
  function isWs(c) {
9465
- return /\s/.test(c);
9483
+ if (c === null || c === undefined) return false;
9484
+ const code = c.charCodeAt(0);
9485
+ // Fast path for the whitespace used by N3/Turtle inputs.
9486
+ return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
9487
+ }
9488
+
9489
+ function isAsciiAlphaCode(code) {
9490
+ return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
9491
+ }
9492
+
9493
+ function isAsciiDigitCode(code) {
9494
+ return code >= 48 && code <= 57;
9495
+ }
9496
+
9497
+ function isAsciiAlpha(c) {
9498
+ return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
9499
+ }
9500
+
9501
+ function isAsciiDigit(c) {
9502
+ return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
9466
9503
  }
9467
9504
 
9468
9505
  // Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
@@ -9475,13 +9512,18 @@ function isWs(c) {
9475
9512
  //
9476
9513
  // We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
9477
9514
  function isHexDigit(c) {
9478
- return c !== null && /^[0-9A-Fa-f]$/.test(c);
9515
+ if (c === null || c === undefined) return false;
9516
+ const code = c.charCodeAt(0);
9517
+ return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
9479
9518
  }
9480
9519
 
9481
9520
  function isPnCharsBase(c) {
9482
9521
  // Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
9483
9522
  // Covers most letters used in practice (including ñ) and common scripts.
9484
- return c !== null && /[A-Za-z]|\p{L}|\p{Nl}/u.test(c);
9523
+ if (c === null || c === undefined) return false;
9524
+ const code = c.charCodeAt(0);
9525
+ if (isAsciiAlphaCode(code)) return true;
9526
+ return /\p{L}|\p{Nl}/u.test(c);
9485
9527
  }
9486
9528
 
9487
9529
  function isPnCharsU(c) {
@@ -9491,9 +9533,11 @@ function isPnCharsU(c) {
9491
9533
 
9492
9534
  function isPnChars(c) {
9493
9535
  // PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
9494
- if (c === null) return false;
9536
+ if (c === null || c === undefined) return false;
9537
+ const code = c.charCodeAt(0);
9538
+ if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
9495
9539
  if (isPnCharsU(c)) return true;
9496
- if (c === '-' || /[0-9]/.test(c) || c === '\u00B7') return true;
9540
+ if (c === '\u00B7') return true;
9497
9541
  const cp = c.codePointAt(0);
9498
9542
  return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
9499
9543
  }
@@ -10598,7 +10642,10 @@ function normalizeRdfCompatibility(inputText) {
10598
10642
  function lex(inputText, opts = {}) {
10599
10643
  const rdf = !!(opts && opts.rdf);
10600
10644
  if (rdf) inputText = normalizeRdfCompatibility(inputText);
10601
- const chars = Array.from(inputText);
10645
+ // Avoid copying large ASCII/BMP inputs into an Array. Array.from() is
10646
+ // only needed when the text contains surrogate pairs and we want the old
10647
+ // code-point iteration behavior for non-BMP characters.
10648
+ const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
10602
10649
  const n = chars.length;
10603
10650
  let i = 0;
10604
10651
  const tokens = [];
@@ -10614,19 +10661,29 @@ function lex(inputText, opts = {}) {
10614
10661
  // - Accepts percent escapes (%HH) as PLX fragments.
10615
10662
  // - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
10616
10663
  // - Accepts '.' inside a name only when it is not terminal.
10664
+ function sliceChars(start, end) {
10665
+ return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
10666
+ }
10667
+
10617
10668
  function readIdentText(startOffsetForErrors) {
10618
- const out = [];
10669
+ const start = i;
10670
+ let out = null;
10671
+
10672
+ function appendRawUntilHere() {
10673
+ if (out === null) out = [sliceChars(start, i)];
10674
+ }
10675
+
10619
10676
  while (i < n) {
10620
- const cc = peek();
10621
- if (cc === null || isWs(cc)) break;
10677
+ const cc = chars[i];
10678
+ if (cc === null || cc === undefined || isWs(cc)) break;
10622
10679
 
10623
10680
  // Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
10624
- if ('{}()[];,'.includes(cc)) break;
10681
+ if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
10625
10682
 
10626
10683
  // Dot is allowed inside PN_LOCAL, but not at the end.
10627
10684
  if (cc === '.') {
10628
10685
  if (!canContinueAfterDot(peek(1))) break;
10629
- out.push('.');
10686
+ if (out !== null) out.push('.');
10630
10687
  i++;
10631
10688
  continue;
10632
10689
  }
@@ -10641,6 +10698,7 @@ function lex(inputText, opts = {}) {
10641
10698
  typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
10642
10699
  );
10643
10700
  }
10701
+ appendRawUntilHere();
10644
10702
  out.push('%', h1, h2);
10645
10703
  i += 3;
10646
10704
  continue;
@@ -10650,6 +10708,7 @@ function lex(inputText, opts = {}) {
10650
10708
  if (cc === '\\') {
10651
10709
  const esc = peek(1);
10652
10710
  if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
10711
+ appendRawUntilHere();
10653
10712
  out.push(esc); // decoded form
10654
10713
  i += 2;
10655
10714
  continue;
@@ -10661,14 +10720,14 @@ function lex(inputText, opts = {}) {
10661
10720
  }
10662
10721
 
10663
10722
  if (isIdentChar(cc)) {
10664
- out.push(cc);
10723
+ if (out !== null) out.push(cc);
10665
10724
  i++;
10666
10725
  continue;
10667
10726
  }
10668
10727
 
10669
10728
  break;
10670
10729
  }
10671
- return out.join('');
10730
+ return out === null ? sliceChars(start, i) : out.join('');
10672
10731
  }
10673
10732
 
10674
10733
  while (i < n) {
@@ -10955,10 +11014,10 @@ function lex(inputText, opts = {}) {
10955
11014
  // "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
10956
11015
  const tagChars = [];
10957
11016
  let cc = peek();
10958
- if (cc === null || !/[A-Za-z]/.test(cc)) {
11017
+ if (cc === null || !isAsciiAlpha(cc)) {
10959
11018
  throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
10960
11019
  }
10961
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
11020
+ while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
10962
11021
  tagChars.push(cc);
10963
11022
  i++;
10964
11023
  }
@@ -10982,7 +11041,7 @@ function lex(inputText, opts = {}) {
10982
11041
  // Otherwise, treat as a directive (@prefix, @base)
10983
11042
  const wordChars = [];
10984
11043
  let cc;
10985
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
11044
+ while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
10986
11045
  wordChars.push(cc);
10987
11046
  i++;
10988
11047
  }
@@ -10994,19 +11053,19 @@ function lex(inputText, opts = {}) {
10994
11053
  }
10995
11054
 
10996
11055
  // 6) Numeric literal (integer or float)
10997
- if (/[0-9]/.test(c) || (c === '-' && peek(1) !== null && /[0-9]/.test(peek(1)))) {
11056
+ if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
10998
11057
  const start = i;
10999
11058
  const numChars = [c];
11000
11059
  i++;
11001
11060
  while (i < n) {
11002
11061
  const cc = chars[i];
11003
- if (/[0-9]/.test(cc)) {
11062
+ if (isAsciiDigit(cc)) {
11004
11063
  numChars.push(cc);
11005
11064
  i++;
11006
11065
  continue;
11007
11066
  }
11008
11067
  if (cc === '.') {
11009
- if (i + 1 < n && /[0-9]/.test(chars[i + 1])) {
11068
+ if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
11010
11069
  numChars.push('.');
11011
11070
  i++;
11012
11071
  continue;
@@ -11021,14 +11080,14 @@ function lex(inputText, opts = {}) {
11021
11080
  if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
11022
11081
  let j = i + 1;
11023
11082
  if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
11024
- if (j < n && /[0-9]/.test(chars[j])) {
11083
+ if (j < n && isAsciiDigit(chars[j])) {
11025
11084
  numChars.push(chars[i]); // e/E
11026
11085
  i++;
11027
11086
  if (i < n && (chars[i] === '+' || chars[i] === '-')) {
11028
11087
  numChars.push(chars[i]);
11029
11088
  i++;
11030
11089
  }
11031
- while (i < n && /[0-9]/.test(chars[i])) {
11090
+ while (i < n && isAsciiDigit(chars[i])) {
11032
11091
  numChars.push(chars[i]);
11033
11092
  i++;
11034
11093
  }
@@ -11477,7 +11536,15 @@ class Parser {
11477
11536
  }
11478
11537
 
11479
11538
  isIdentKeyword(tok, keyword) {
11480
- return tok && tok.typ === 'Ident' && typeof tok.value === 'string' && tok.value.toLowerCase() === keyword;
11539
+ if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
11540
+ const v = tok.value;
11541
+ if (v.length !== keyword.length) return false;
11542
+ for (let i = 0; i < keyword.length; i++) {
11543
+ const code = v.charCodeAt(i);
11544
+ const lower = code >= 65 && code <= 90 ? code + 32 : code;
11545
+ if (lower !== keyword.charCodeAt(i)) return false;
11546
+ }
11547
+ return true;
11481
11548
  }
11482
11549
 
11483
11550
  canStartSparqlPrefixDirective() {
@@ -12400,21 +12467,40 @@ function literalParts(lit) {
12400
12467
  // equality fast-paths than repeated string key construction.
12401
12468
 
12402
12469
  let __nextTid = 1;
12403
- const __tidIntern = new Map(); // string key -> number
12470
+ const __tidIntern = new Map(); // legacy generic key -> number
12471
+ const __iriTidIntern = new Map(); // IRI value -> number
12472
+ const __blankTidIntern = new Map(); // blank label -> number
12473
+ const __literalTidIntern = new Map(); // normalized literal lexical form -> number
12404
12474
 
12405
12475
  // Avoid storing extremely large literal keys in the global term-id intern map.
12406
12476
  // For huge literals we still assign a unique __tid, but we do not intern the key.
12407
12477
  const MAX_LITERAL_TID_LEN = 1024;
12408
12478
 
12409
- function __getTid(key) {
12410
- let id = __tidIntern.get(key);
12479
+ function __getTidFromMap(map, key) {
12480
+ let id = map.get(key);
12411
12481
  if (!id) {
12412
12482
  id = __nextTid++;
12413
- __tidIntern.set(key, id);
12483
+ map.set(key, id);
12414
12484
  }
12415
12485
  return id;
12416
12486
  }
12417
12487
 
12488
+ function __getTid(key) {
12489
+ return __getTidFromMap(__tidIntern, key);
12490
+ }
12491
+
12492
+ function __getIriTid(value) {
12493
+ return __getTidFromMap(__iriTidIntern, value);
12494
+ }
12495
+
12496
+ function __getBlankTid(label) {
12497
+ return __getTidFromMap(__blankTidIntern, label);
12498
+ }
12499
+
12500
+ function __getLiteralTid(norm) {
12501
+ return __getTidFromMap(__literalTidIntern, norm);
12502
+ }
12503
+
12418
12504
  function __isQuotedLexical(lit) {
12419
12505
  if (typeof lit !== 'string') return false;
12420
12506
  if (lit.length >= 6) {
@@ -12460,6 +12546,14 @@ function __isPlainStringLiteralValue(lit) {
12460
12546
  function normalizeLiteralForTid(lit) {
12461
12547
  // Canonicalize so that plain string and explicit xsd:string share the same id.
12462
12548
  if (typeof lit !== 'string') return lit;
12549
+
12550
+ // Fast path for the overwhelmingly common lexer output for plain string
12551
+ // literals: a canonical JSON-style quoted lexical form with no suffix.
12552
+ // This avoids literalParts()/language-tag parsing for large fact tables.
12553
+ if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
12554
+ return `${lit}^^<${XSD_NS}string>`;
12555
+ }
12556
+
12463
12557
  const [lex, dt] = literalParts(lit);
12464
12558
  if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
12465
12559
  if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
@@ -12477,7 +12571,7 @@ class Iri extends Term {
12477
12571
  super();
12478
12572
  this.value = value;
12479
12573
  Object.defineProperty(this, '__tid', {
12480
- value: __getTid('I:' + value),
12574
+ value: __getIriTid(value),
12481
12575
  enumerable: false,
12482
12576
  });
12483
12577
  }
@@ -12489,7 +12583,7 @@ class Literal extends Term {
12489
12583
  this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
12490
12584
  const norm = normalizeLiteralForTid(value);
12491
12585
  const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
12492
- const tid = useIntern ? __getTid('L:' + norm) : __nextTid++;
12586
+ const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
12493
12587
  Object.defineProperty(this, '__tid', {
12494
12588
  value: tid,
12495
12589
  enumerable: false,
@@ -12509,7 +12603,7 @@ class Blank extends Term {
12509
12603
  super();
12510
12604
  this.label = label; // _:b1, etc.
12511
12605
  Object.defineProperty(this, '__tid', {
12512
- value: __getTid('B:' + label),
12606
+ value: __getBlankTid(label),
12513
12607
  enumerable: false,
12514
12608
  });
12515
12609
  }
package/eyeling.js CHANGED
@@ -4825,7 +4825,7 @@ function main() {
4825
4825
  parseN3Text(text, {
4826
4826
  baseIri: __sourceLabelToBaseIri(sourceLabel),
4827
4827
  label: sourceLabel,
4828
- collectUsedPrefixes: true,
4828
+ collectUsedPrefixes: streamMode,
4829
4829
  keepSourceArtifacts: false,
4830
4830
  rdf: rdfMode,
4831
4831
  }),
@@ -6536,11 +6536,13 @@ function termFastKey(t) {
6536
6536
  if (t instanceof Iri || t instanceof Blank) return t.__tid;
6537
6537
 
6538
6538
  if (t instanceof Literal) {
6539
- // Very large literals intentionally skip global interning in prelude.js to
6540
- // avoid retaining huge strings forever. Their per-object __tid is therefore
6541
- // not value-stable, so using it here breaks duplicate detection for facts
6542
- // such as long log:outputString blocks that are re-derived during forward
6543
- // chaining. Fall back to a value-based key in that case.
6539
+ // Literal construction already computed a value-stable __tid for ordinary
6540
+ // short literals. Avoid re-running literalParts()/datatype normalization
6541
+ // while building fact indexes; on data-heavy inputs this is a hot path.
6542
+ // Only the rare over-sized literal needs the value-based fallback because
6543
+ // prelude intentionally gives such literals per-object ids to avoid
6544
+ // retaining huge strings in the global interner.
6545
+ if (typeof t.value !== 'string' || t.value.length + 64 <= MAX_LITERAL_TID_LEN) return t.__tid;
6544
6546
  const norm = normalizeLiteralForTid(t.value);
6545
6547
  if (typeof norm === 'string' && norm.length > MAX_LITERAL_TID_LEN) return 'L:' + norm;
6546
6548
  return t.__tid;
@@ -6627,17 +6629,28 @@ function ensureFactIndexes(facts) {
6627
6629
  enumerable: false,
6628
6630
  writable: true,
6629
6631
  });
6632
+ Object.defineProperty(facts, '__keySetComplete', {
6633
+ value: false,
6634
+ enumerable: false,
6635
+ writable: true,
6636
+ });
6630
6637
 
6631
- for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i);
6638
+ // Build lookup indexes eagerly, but do not populate the duplicate-detection
6639
+ // string Set for every input fact. The predicate/subject/object indexes are
6640
+ // enough to verify duplicates when needed; avoiding 100k+ joined string keys
6641
+ // saves substantial time and GC on data-heavy query workloads.
6642
+ for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i, false);
6632
6643
  }
6633
6644
 
6634
- function indexFact(facts, tr, idx) {
6645
+ function indexFact(facts, tr, idx, addKeySet = true) {
6635
6646
  const sk = termFastKey(tr.s);
6636
6647
  const ok = termFastKey(tr.o);
6648
+ let pkForKey = null;
6637
6649
 
6638
6650
  if (tr.p instanceof Iri) {
6639
6651
  // Use predicate term id as the primary key to avoid hashing long IRI strings.
6640
6652
  const pk = tr.p.__tid;
6653
+ pkForKey = pk;
6641
6654
 
6642
6655
  let pb = facts.__byPred.get(pk);
6643
6656
  if (!pb) {
@@ -6695,8 +6708,10 @@ function indexFact(facts, tr, idx) {
6695
6708
  }
6696
6709
  }
6697
6710
 
6698
- const key = tripleFastKey(tr);
6699
- if (key !== null) facts.__keySet.add(key);
6711
+ if (addKeySet && sk !== null && ok !== null) {
6712
+ if (pkForKey === null) pkForKey = termFastKey(tr.p);
6713
+ if (pkForKey !== null) facts.__keySet.add(sk + '\t' + pkForKey + '\t' + ok);
6714
+ }
6700
6715
  }
6701
6716
 
6702
6717
  function candidateFacts(facts, goal) {
@@ -6758,7 +6773,10 @@ function hasFactIndexed(facts, tr) {
6758
6773
  ensureFactIndexes(facts);
6759
6774
 
6760
6775
  const key = tripleFastKey(tr);
6761
- if (key !== null) return facts.__keySet.has(key);
6776
+ if (key !== null) {
6777
+ if (facts.__keySet.has(key)) return true;
6778
+ if (facts.__keySetComplete) return false;
6779
+ }
6762
6780
 
6763
6781
  if (tr.p instanceof Iri) {
6764
6782
  const pk = tr.p.__tid;
@@ -6788,7 +6806,7 @@ function pushFactIndexed(facts, tr) {
6788
6806
  ensureFactIndexes(facts);
6789
6807
  const idx = facts.length;
6790
6808
  facts.push(tr);
6791
- indexFact(facts, tr, idx);
6809
+ indexFact(facts, tr, idx, true);
6792
6810
  }
6793
6811
 
6794
6812
  function makeDerivedRecord(fact, rule, premises, subst, captureExplanations) {
@@ -8325,11 +8343,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8325
8343
  const varGen = [0];
8326
8344
  const skCounter = [0];
8327
8345
 
8328
- // Speed up dynamic rule promotion by maintaining O(1) membership sets.
8329
- // (Some workloads derive many rule-producing triples.)
8330
-
8331
- __ensureRuleKeySet(forwardRules);
8332
- __ensureRuleKeySet(backRules);
8346
+ // Rule-key sets are only needed if a program actually derives rule-producing
8347
+ // triples. Building them eagerly is expensive on large static rule sets, so
8348
+ // dynamic-promotion sites create them lazily before duplicate checks.
8333
8349
 
8334
8350
  // Cache head blank-node skolemization per (rule firing, head blank label).
8335
8351
  // This prevents repeatedly generating fresh _:sk_N blanks for the *same*
@@ -8508,8 +8524,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8508
8524
  newRule.conclusion,
8509
8525
  newRule.__dynamicConclusionTerm || null,
8510
8526
  );
8511
- if (!forwardRules.__ruleKeySet.has(key)) {
8512
- forwardRules.__ruleKeySet.add(key);
8527
+ const forwardRuleKeySet = __ensureRuleKeySet(forwardRules);
8528
+ if (!forwardRuleKeySet.has(key)) {
8529
+ forwardRuleKeySet.add(key);
8513
8530
  forwardRules.push(newRule);
8514
8531
  rulesChanged = true;
8515
8532
  }
@@ -8523,8 +8540,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8523
8540
  newRule.conclusion,
8524
8541
  newRule.__dynamicConclusionTerm || null,
8525
8542
  );
8526
- if (!backRules.__ruleKeySet.has(key)) {
8527
- backRules.__ruleKeySet.add(key);
8543
+ const backRuleKeySet = __ensureRuleKeySet(backRules);
8544
+ if (!backRuleKeySet.has(key)) {
8545
+ backRuleKeySet.add(key);
8528
8546
  backRules.push(newRule);
8529
8547
  indexBackRule(backRules, newRule);
8530
8548
  rulesChanged = true;
@@ -9462,7 +9480,26 @@ class N3SyntaxError extends SyntaxError {
9462
9480
  }
9463
9481
 
9464
9482
  function isWs(c) {
9465
- return /\s/.test(c);
9483
+ if (c === null || c === undefined) return false;
9484
+ const code = c.charCodeAt(0);
9485
+ // Fast path for the whitespace used by N3/Turtle inputs.
9486
+ return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
9487
+ }
9488
+
9489
+ function isAsciiAlphaCode(code) {
9490
+ return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
9491
+ }
9492
+
9493
+ function isAsciiDigitCode(code) {
9494
+ return code >= 48 && code <= 57;
9495
+ }
9496
+
9497
+ function isAsciiAlpha(c) {
9498
+ return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
9499
+ }
9500
+
9501
+ function isAsciiDigit(c) {
9502
+ return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
9466
9503
  }
9467
9504
 
9468
9505
  // Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
@@ -9475,13 +9512,18 @@ function isWs(c) {
9475
9512
  //
9476
9513
  // We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
9477
9514
  function isHexDigit(c) {
9478
- return c !== null && /^[0-9A-Fa-f]$/.test(c);
9515
+ if (c === null || c === undefined) return false;
9516
+ const code = c.charCodeAt(0);
9517
+ return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
9479
9518
  }
9480
9519
 
9481
9520
  function isPnCharsBase(c) {
9482
9521
  // Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
9483
9522
  // Covers most letters used in practice (including ñ) and common scripts.
9484
- return c !== null && /[A-Za-z]|\p{L}|\p{Nl}/u.test(c);
9523
+ if (c === null || c === undefined) return false;
9524
+ const code = c.charCodeAt(0);
9525
+ if (isAsciiAlphaCode(code)) return true;
9526
+ return /\p{L}|\p{Nl}/u.test(c);
9485
9527
  }
9486
9528
 
9487
9529
  function isPnCharsU(c) {
@@ -9491,9 +9533,11 @@ function isPnCharsU(c) {
9491
9533
 
9492
9534
  function isPnChars(c) {
9493
9535
  // PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
9494
- if (c === null) return false;
9536
+ if (c === null || c === undefined) return false;
9537
+ const code = c.charCodeAt(0);
9538
+ if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
9495
9539
  if (isPnCharsU(c)) return true;
9496
- if (c === '-' || /[0-9]/.test(c) || c === '\u00B7') return true;
9540
+ if (c === '\u00B7') return true;
9497
9541
  const cp = c.codePointAt(0);
9498
9542
  return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
9499
9543
  }
@@ -10598,7 +10642,10 @@ function normalizeRdfCompatibility(inputText) {
10598
10642
  function lex(inputText, opts = {}) {
10599
10643
  const rdf = !!(opts && opts.rdf);
10600
10644
  if (rdf) inputText = normalizeRdfCompatibility(inputText);
10601
- const chars = Array.from(inputText);
10645
+ // Avoid copying large ASCII/BMP inputs into an Array. Array.from() is
10646
+ // only needed when the text contains surrogate pairs and we want the old
10647
+ // code-point iteration behavior for non-BMP characters.
10648
+ const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
10602
10649
  const n = chars.length;
10603
10650
  let i = 0;
10604
10651
  const tokens = [];
@@ -10614,19 +10661,29 @@ function lex(inputText, opts = {}) {
10614
10661
  // - Accepts percent escapes (%HH) as PLX fragments.
10615
10662
  // - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
10616
10663
  // - Accepts '.' inside a name only when it is not terminal.
10664
+ function sliceChars(start, end) {
10665
+ return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
10666
+ }
10667
+
10617
10668
  function readIdentText(startOffsetForErrors) {
10618
- const out = [];
10669
+ const start = i;
10670
+ let out = null;
10671
+
10672
+ function appendRawUntilHere() {
10673
+ if (out === null) out = [sliceChars(start, i)];
10674
+ }
10675
+
10619
10676
  while (i < n) {
10620
- const cc = peek();
10621
- if (cc === null || isWs(cc)) break;
10677
+ const cc = chars[i];
10678
+ if (cc === null || cc === undefined || isWs(cc)) break;
10622
10679
 
10623
10680
  // Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
10624
- if ('{}()[];,'.includes(cc)) break;
10681
+ if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
10625
10682
 
10626
10683
  // Dot is allowed inside PN_LOCAL, but not at the end.
10627
10684
  if (cc === '.') {
10628
10685
  if (!canContinueAfterDot(peek(1))) break;
10629
- out.push('.');
10686
+ if (out !== null) out.push('.');
10630
10687
  i++;
10631
10688
  continue;
10632
10689
  }
@@ -10641,6 +10698,7 @@ function lex(inputText, opts = {}) {
10641
10698
  typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
10642
10699
  );
10643
10700
  }
10701
+ appendRawUntilHere();
10644
10702
  out.push('%', h1, h2);
10645
10703
  i += 3;
10646
10704
  continue;
@@ -10650,6 +10708,7 @@ function lex(inputText, opts = {}) {
10650
10708
  if (cc === '\\') {
10651
10709
  const esc = peek(1);
10652
10710
  if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
10711
+ appendRawUntilHere();
10653
10712
  out.push(esc); // decoded form
10654
10713
  i += 2;
10655
10714
  continue;
@@ -10661,14 +10720,14 @@ function lex(inputText, opts = {}) {
10661
10720
  }
10662
10721
 
10663
10722
  if (isIdentChar(cc)) {
10664
- out.push(cc);
10723
+ if (out !== null) out.push(cc);
10665
10724
  i++;
10666
10725
  continue;
10667
10726
  }
10668
10727
 
10669
10728
  break;
10670
10729
  }
10671
- return out.join('');
10730
+ return out === null ? sliceChars(start, i) : out.join('');
10672
10731
  }
10673
10732
 
10674
10733
  while (i < n) {
@@ -10955,10 +11014,10 @@ function lex(inputText, opts = {}) {
10955
11014
  // "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
10956
11015
  const tagChars = [];
10957
11016
  let cc = peek();
10958
- if (cc === null || !/[A-Za-z]/.test(cc)) {
11017
+ if (cc === null || !isAsciiAlpha(cc)) {
10959
11018
  throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
10960
11019
  }
10961
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
11020
+ while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
10962
11021
  tagChars.push(cc);
10963
11022
  i++;
10964
11023
  }
@@ -10982,7 +11041,7 @@ function lex(inputText, opts = {}) {
10982
11041
  // Otherwise, treat as a directive (@prefix, @base)
10983
11042
  const wordChars = [];
10984
11043
  let cc;
10985
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
11044
+ while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
10986
11045
  wordChars.push(cc);
10987
11046
  i++;
10988
11047
  }
@@ -10994,19 +11053,19 @@ function lex(inputText, opts = {}) {
10994
11053
  }
10995
11054
 
10996
11055
  // 6) Numeric literal (integer or float)
10997
- if (/[0-9]/.test(c) || (c === '-' && peek(1) !== null && /[0-9]/.test(peek(1)))) {
11056
+ if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
10998
11057
  const start = i;
10999
11058
  const numChars = [c];
11000
11059
  i++;
11001
11060
  while (i < n) {
11002
11061
  const cc = chars[i];
11003
- if (/[0-9]/.test(cc)) {
11062
+ if (isAsciiDigit(cc)) {
11004
11063
  numChars.push(cc);
11005
11064
  i++;
11006
11065
  continue;
11007
11066
  }
11008
11067
  if (cc === '.') {
11009
- if (i + 1 < n && /[0-9]/.test(chars[i + 1])) {
11068
+ if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
11010
11069
  numChars.push('.');
11011
11070
  i++;
11012
11071
  continue;
@@ -11021,14 +11080,14 @@ function lex(inputText, opts = {}) {
11021
11080
  if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
11022
11081
  let j = i + 1;
11023
11082
  if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
11024
- if (j < n && /[0-9]/.test(chars[j])) {
11083
+ if (j < n && isAsciiDigit(chars[j])) {
11025
11084
  numChars.push(chars[i]); // e/E
11026
11085
  i++;
11027
11086
  if (i < n && (chars[i] === '+' || chars[i] === '-')) {
11028
11087
  numChars.push(chars[i]);
11029
11088
  i++;
11030
11089
  }
11031
- while (i < n && /[0-9]/.test(chars[i])) {
11090
+ while (i < n && isAsciiDigit(chars[i])) {
11032
11091
  numChars.push(chars[i]);
11033
11092
  i++;
11034
11093
  }
@@ -11477,7 +11536,15 @@ class Parser {
11477
11536
  }
11478
11537
 
11479
11538
  isIdentKeyword(tok, keyword) {
11480
- return tok && tok.typ === 'Ident' && typeof tok.value === 'string' && tok.value.toLowerCase() === keyword;
11539
+ if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
11540
+ const v = tok.value;
11541
+ if (v.length !== keyword.length) return false;
11542
+ for (let i = 0; i < keyword.length; i++) {
11543
+ const code = v.charCodeAt(i);
11544
+ const lower = code >= 65 && code <= 90 ? code + 32 : code;
11545
+ if (lower !== keyword.charCodeAt(i)) return false;
11546
+ }
11547
+ return true;
11481
11548
  }
11482
11549
 
11483
11550
  canStartSparqlPrefixDirective() {
@@ -12400,21 +12467,40 @@ function literalParts(lit) {
12400
12467
  // equality fast-paths than repeated string key construction.
12401
12468
 
12402
12469
  let __nextTid = 1;
12403
- const __tidIntern = new Map(); // string key -> number
12470
+ const __tidIntern = new Map(); // legacy generic key -> number
12471
+ const __iriTidIntern = new Map(); // IRI value -> number
12472
+ const __blankTidIntern = new Map(); // blank label -> number
12473
+ const __literalTidIntern = new Map(); // normalized literal lexical form -> number
12404
12474
 
12405
12475
  // Avoid storing extremely large literal keys in the global term-id intern map.
12406
12476
  // For huge literals we still assign a unique __tid, but we do not intern the key.
12407
12477
  const MAX_LITERAL_TID_LEN = 1024;
12408
12478
 
12409
- function __getTid(key) {
12410
- let id = __tidIntern.get(key);
12479
+ function __getTidFromMap(map, key) {
12480
+ let id = map.get(key);
12411
12481
  if (!id) {
12412
12482
  id = __nextTid++;
12413
- __tidIntern.set(key, id);
12483
+ map.set(key, id);
12414
12484
  }
12415
12485
  return id;
12416
12486
  }
12417
12487
 
12488
+ function __getTid(key) {
12489
+ return __getTidFromMap(__tidIntern, key);
12490
+ }
12491
+
12492
+ function __getIriTid(value) {
12493
+ return __getTidFromMap(__iriTidIntern, value);
12494
+ }
12495
+
12496
+ function __getBlankTid(label) {
12497
+ return __getTidFromMap(__blankTidIntern, label);
12498
+ }
12499
+
12500
+ function __getLiteralTid(norm) {
12501
+ return __getTidFromMap(__literalTidIntern, norm);
12502
+ }
12503
+
12418
12504
  function __isQuotedLexical(lit) {
12419
12505
  if (typeof lit !== 'string') return false;
12420
12506
  if (lit.length >= 6) {
@@ -12460,6 +12546,14 @@ function __isPlainStringLiteralValue(lit) {
12460
12546
  function normalizeLiteralForTid(lit) {
12461
12547
  // Canonicalize so that plain string and explicit xsd:string share the same id.
12462
12548
  if (typeof lit !== 'string') return lit;
12549
+
12550
+ // Fast path for the overwhelmingly common lexer output for plain string
12551
+ // literals: a canonical JSON-style quoted lexical form with no suffix.
12552
+ // This avoids literalParts()/language-tag parsing for large fact tables.
12553
+ if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
12554
+ return `${lit}^^<${XSD_NS}string>`;
12555
+ }
12556
+
12463
12557
  const [lex, dt] = literalParts(lit);
12464
12558
  if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
12465
12559
  if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
@@ -12477,7 +12571,7 @@ class Iri extends Term {
12477
12571
  super();
12478
12572
  this.value = value;
12479
12573
  Object.defineProperty(this, '__tid', {
12480
- value: __getTid('I:' + value),
12574
+ value: __getIriTid(value),
12481
12575
  enumerable: false,
12482
12576
  });
12483
12577
  }
@@ -12489,7 +12583,7 @@ class Literal extends Term {
12489
12583
  this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
12490
12584
  const norm = normalizeLiteralForTid(value);
12491
12585
  const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
12492
- const tid = useIntern ? __getTid('L:' + norm) : __nextTid++;
12586
+ const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
12493
12587
  Object.defineProperty(this, '__tid', {
12494
12588
  value: tid,
12495
12589
  enumerable: false,
@@ -12509,7 +12603,7 @@ class Blank extends Term {
12509
12603
  super();
12510
12604
  this.label = label; // _:b1, etc.
12511
12605
  Object.defineProperty(this, '__tid', {
12512
- value: __getTid('B:' + label),
12606
+ value: __getBlankTid(label),
12513
12607
  enumerable: false,
12514
12608
  });
12515
12609
  }
package/lib/cli.js CHANGED
@@ -210,7 +210,7 @@ function main() {
210
210
  parseN3Text(text, {
211
211
  baseIri: __sourceLabelToBaseIri(sourceLabel),
212
212
  label: sourceLabel,
213
- collectUsedPrefixes: true,
213
+ collectUsedPrefixes: streamMode,
214
214
  keepSourceArtifacts: false,
215
215
  rdf: rdfMode,
216
216
  }),
package/lib/engine.js CHANGED
@@ -1049,11 +1049,13 @@ function termFastKey(t) {
1049
1049
  if (t instanceof Iri || t instanceof Blank) return t.__tid;
1050
1050
 
1051
1051
  if (t instanceof Literal) {
1052
- // Very large literals intentionally skip global interning in prelude.js to
1053
- // avoid retaining huge strings forever. Their per-object __tid is therefore
1054
- // not value-stable, so using it here breaks duplicate detection for facts
1055
- // such as long log:outputString blocks that are re-derived during forward
1056
- // chaining. Fall back to a value-based key in that case.
1052
+ // Literal construction already computed a value-stable __tid for ordinary
1053
+ // short literals. Avoid re-running literalParts()/datatype normalization
1054
+ // while building fact indexes; on data-heavy inputs this is a hot path.
1055
+ // Only the rare over-sized literal needs the value-based fallback because
1056
+ // prelude intentionally gives such literals per-object ids to avoid
1057
+ // retaining huge strings in the global interner.
1058
+ if (typeof t.value !== 'string' || t.value.length + 64 <= MAX_LITERAL_TID_LEN) return t.__tid;
1057
1059
  const norm = normalizeLiteralForTid(t.value);
1058
1060
  if (typeof norm === 'string' && norm.length > MAX_LITERAL_TID_LEN) return 'L:' + norm;
1059
1061
  return t.__tid;
@@ -1140,17 +1142,28 @@ function ensureFactIndexes(facts) {
1140
1142
  enumerable: false,
1141
1143
  writable: true,
1142
1144
  });
1145
+ Object.defineProperty(facts, '__keySetComplete', {
1146
+ value: false,
1147
+ enumerable: false,
1148
+ writable: true,
1149
+ });
1143
1150
 
1144
- for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i);
1151
+ // Build lookup indexes eagerly, but do not populate the duplicate-detection
1152
+ // string Set for every input fact. The predicate/subject/object indexes are
1153
+ // enough to verify duplicates when needed; avoiding 100k+ joined string keys
1154
+ // saves substantial time and GC on data-heavy query workloads.
1155
+ for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i, false);
1145
1156
  }
1146
1157
 
1147
- function indexFact(facts, tr, idx) {
1158
+ function indexFact(facts, tr, idx, addKeySet = true) {
1148
1159
  const sk = termFastKey(tr.s);
1149
1160
  const ok = termFastKey(tr.o);
1161
+ let pkForKey = null;
1150
1162
 
1151
1163
  if (tr.p instanceof Iri) {
1152
1164
  // Use predicate term id as the primary key to avoid hashing long IRI strings.
1153
1165
  const pk = tr.p.__tid;
1166
+ pkForKey = pk;
1154
1167
 
1155
1168
  let pb = facts.__byPred.get(pk);
1156
1169
  if (!pb) {
@@ -1208,8 +1221,10 @@ function indexFact(facts, tr, idx) {
1208
1221
  }
1209
1222
  }
1210
1223
 
1211
- const key = tripleFastKey(tr);
1212
- if (key !== null) facts.__keySet.add(key);
1224
+ if (addKeySet && sk !== null && ok !== null) {
1225
+ if (pkForKey === null) pkForKey = termFastKey(tr.p);
1226
+ if (pkForKey !== null) facts.__keySet.add(sk + '\t' + pkForKey + '\t' + ok);
1227
+ }
1213
1228
  }
1214
1229
 
1215
1230
  function candidateFacts(facts, goal) {
@@ -1271,7 +1286,10 @@ function hasFactIndexed(facts, tr) {
1271
1286
  ensureFactIndexes(facts);
1272
1287
 
1273
1288
  const key = tripleFastKey(tr);
1274
- if (key !== null) return facts.__keySet.has(key);
1289
+ if (key !== null) {
1290
+ if (facts.__keySet.has(key)) return true;
1291
+ if (facts.__keySetComplete) return false;
1292
+ }
1275
1293
 
1276
1294
  if (tr.p instanceof Iri) {
1277
1295
  const pk = tr.p.__tid;
@@ -1301,7 +1319,7 @@ function pushFactIndexed(facts, tr) {
1301
1319
  ensureFactIndexes(facts);
1302
1320
  const idx = facts.length;
1303
1321
  facts.push(tr);
1304
- indexFact(facts, tr, idx);
1322
+ indexFact(facts, tr, idx, true);
1305
1323
  }
1306
1324
 
1307
1325
  function makeDerivedRecord(fact, rule, premises, subst, captureExplanations) {
@@ -2838,11 +2856,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
2838
2856
  const varGen = [0];
2839
2857
  const skCounter = [0];
2840
2858
 
2841
- // Speed up dynamic rule promotion by maintaining O(1) membership sets.
2842
- // (Some workloads derive many rule-producing triples.)
2843
-
2844
- __ensureRuleKeySet(forwardRules);
2845
- __ensureRuleKeySet(backRules);
2859
+ // Rule-key sets are only needed if a program actually derives rule-producing
2860
+ // triples. Building them eagerly is expensive on large static rule sets, so
2861
+ // dynamic-promotion sites create them lazily before duplicate checks.
2846
2862
 
2847
2863
  // Cache head blank-node skolemization per (rule firing, head blank label).
2848
2864
  // This prevents repeatedly generating fresh _:sk_N blanks for the *same*
@@ -3021,8 +3037,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
3021
3037
  newRule.conclusion,
3022
3038
  newRule.__dynamicConclusionTerm || null,
3023
3039
  );
3024
- if (!forwardRules.__ruleKeySet.has(key)) {
3025
- forwardRules.__ruleKeySet.add(key);
3040
+ const forwardRuleKeySet = __ensureRuleKeySet(forwardRules);
3041
+ if (!forwardRuleKeySet.has(key)) {
3042
+ forwardRuleKeySet.add(key);
3026
3043
  forwardRules.push(newRule);
3027
3044
  rulesChanged = true;
3028
3045
  }
@@ -3036,8 +3053,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
3036
3053
  newRule.conclusion,
3037
3054
  newRule.__dynamicConclusionTerm || null,
3038
3055
  );
3039
- if (!backRules.__ruleKeySet.has(key)) {
3040
- backRules.__ruleKeySet.add(key);
3056
+ const backRuleKeySet = __ensureRuleKeySet(backRules);
3057
+ if (!backRuleKeySet.has(key)) {
3058
+ backRuleKeySet.add(key);
3041
3059
  backRules.push(newRule);
3042
3060
  indexBackRule(backRules, newRule);
3043
3061
  rulesChanged = true;
package/lib/lexer.js CHANGED
@@ -30,7 +30,26 @@ class N3SyntaxError extends SyntaxError {
30
30
  }
31
31
 
32
32
  function isWs(c) {
33
- return /\s/.test(c);
33
+ if (c === null || c === undefined) return false;
34
+ const code = c.charCodeAt(0);
35
+ // Fast path for the whitespace used by N3/Turtle inputs.
36
+ return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
37
+ }
38
+
39
+ function isAsciiAlphaCode(code) {
40
+ return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
41
+ }
42
+
43
+ function isAsciiDigitCode(code) {
44
+ return code >= 48 && code <= 57;
45
+ }
46
+
47
+ function isAsciiAlpha(c) {
48
+ return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
49
+ }
50
+
51
+ function isAsciiDigit(c) {
52
+ return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
34
53
  }
35
54
 
36
55
  // Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
@@ -43,13 +62,18 @@ function isWs(c) {
43
62
  //
44
63
  // We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
45
64
  function isHexDigit(c) {
46
- return c !== null && /^[0-9A-Fa-f]$/.test(c);
65
+ if (c === null || c === undefined) return false;
66
+ const code = c.charCodeAt(0);
67
+ return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
47
68
  }
48
69
 
49
70
  function isPnCharsBase(c) {
50
71
  // Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
51
72
  // Covers most letters used in practice (including ñ) and common scripts.
52
- return c !== null && /[A-Za-z]|\p{L}|\p{Nl}/u.test(c);
73
+ if (c === null || c === undefined) return false;
74
+ const code = c.charCodeAt(0);
75
+ if (isAsciiAlphaCode(code)) return true;
76
+ return /\p{L}|\p{Nl}/u.test(c);
53
77
  }
54
78
 
55
79
  function isPnCharsU(c) {
@@ -59,9 +83,11 @@ function isPnCharsU(c) {
59
83
 
60
84
  function isPnChars(c) {
61
85
  // PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
62
- if (c === null) return false;
86
+ if (c === null || c === undefined) return false;
87
+ const code = c.charCodeAt(0);
88
+ if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
63
89
  if (isPnCharsU(c)) return true;
64
- if (c === '-' || /[0-9]/.test(c) || c === '\u00B7') return true;
90
+ if (c === '\u00B7') return true;
65
91
  const cp = c.codePointAt(0);
66
92
  return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
67
93
  }
@@ -1166,7 +1192,10 @@ function normalizeRdfCompatibility(inputText) {
1166
1192
  function lex(inputText, opts = {}) {
1167
1193
  const rdf = !!(opts && opts.rdf);
1168
1194
  if (rdf) inputText = normalizeRdfCompatibility(inputText);
1169
- const chars = Array.from(inputText);
1195
+ // Avoid copying large ASCII/BMP inputs into an Array. Array.from() is
1196
+ // only needed when the text contains surrogate pairs and we want the old
1197
+ // code-point iteration behavior for non-BMP characters.
1198
+ const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
1170
1199
  const n = chars.length;
1171
1200
  let i = 0;
1172
1201
  const tokens = [];
@@ -1182,19 +1211,29 @@ function lex(inputText, opts = {}) {
1182
1211
  // - Accepts percent escapes (%HH) as PLX fragments.
1183
1212
  // - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
1184
1213
  // - Accepts '.' inside a name only when it is not terminal.
1214
+ function sliceChars(start, end) {
1215
+ return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
1216
+ }
1217
+
1185
1218
  function readIdentText(startOffsetForErrors) {
1186
- const out = [];
1219
+ const start = i;
1220
+ let out = null;
1221
+
1222
+ function appendRawUntilHere() {
1223
+ if (out === null) out = [sliceChars(start, i)];
1224
+ }
1225
+
1187
1226
  while (i < n) {
1188
- const cc = peek();
1189
- if (cc === null || isWs(cc)) break;
1227
+ const cc = chars[i];
1228
+ if (cc === null || cc === undefined || isWs(cc)) break;
1190
1229
 
1191
1230
  // Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
1192
- if ('{}()[];,'.includes(cc)) break;
1231
+ if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
1193
1232
 
1194
1233
  // Dot is allowed inside PN_LOCAL, but not at the end.
1195
1234
  if (cc === '.') {
1196
1235
  if (!canContinueAfterDot(peek(1))) break;
1197
- out.push('.');
1236
+ if (out !== null) out.push('.');
1198
1237
  i++;
1199
1238
  continue;
1200
1239
  }
@@ -1209,6 +1248,7 @@ function lex(inputText, opts = {}) {
1209
1248
  typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
1210
1249
  );
1211
1250
  }
1251
+ appendRawUntilHere();
1212
1252
  out.push('%', h1, h2);
1213
1253
  i += 3;
1214
1254
  continue;
@@ -1218,6 +1258,7 @@ function lex(inputText, opts = {}) {
1218
1258
  if (cc === '\\') {
1219
1259
  const esc = peek(1);
1220
1260
  if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
1261
+ appendRawUntilHere();
1221
1262
  out.push(esc); // decoded form
1222
1263
  i += 2;
1223
1264
  continue;
@@ -1229,14 +1270,14 @@ function lex(inputText, opts = {}) {
1229
1270
  }
1230
1271
 
1231
1272
  if (isIdentChar(cc)) {
1232
- out.push(cc);
1273
+ if (out !== null) out.push(cc);
1233
1274
  i++;
1234
1275
  continue;
1235
1276
  }
1236
1277
 
1237
1278
  break;
1238
1279
  }
1239
- return out.join('');
1280
+ return out === null ? sliceChars(start, i) : out.join('');
1240
1281
  }
1241
1282
 
1242
1283
  while (i < n) {
@@ -1523,10 +1564,10 @@ function lex(inputText, opts = {}) {
1523
1564
  // "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
1524
1565
  const tagChars = [];
1525
1566
  let cc = peek();
1526
- if (cc === null || !/[A-Za-z]/.test(cc)) {
1567
+ if (cc === null || !isAsciiAlpha(cc)) {
1527
1568
  throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
1528
1569
  }
1529
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
1570
+ while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
1530
1571
  tagChars.push(cc);
1531
1572
  i++;
1532
1573
  }
@@ -1550,7 +1591,7 @@ function lex(inputText, opts = {}) {
1550
1591
  // Otherwise, treat as a directive (@prefix, @base)
1551
1592
  const wordChars = [];
1552
1593
  let cc;
1553
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
1594
+ while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
1554
1595
  wordChars.push(cc);
1555
1596
  i++;
1556
1597
  }
@@ -1562,19 +1603,19 @@ function lex(inputText, opts = {}) {
1562
1603
  }
1563
1604
 
1564
1605
  // 6) Numeric literal (integer or float)
1565
- if (/[0-9]/.test(c) || (c === '-' && peek(1) !== null && /[0-9]/.test(peek(1)))) {
1606
+ if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
1566
1607
  const start = i;
1567
1608
  const numChars = [c];
1568
1609
  i++;
1569
1610
  while (i < n) {
1570
1611
  const cc = chars[i];
1571
- if (/[0-9]/.test(cc)) {
1612
+ if (isAsciiDigit(cc)) {
1572
1613
  numChars.push(cc);
1573
1614
  i++;
1574
1615
  continue;
1575
1616
  }
1576
1617
  if (cc === '.') {
1577
- if (i + 1 < n && /[0-9]/.test(chars[i + 1])) {
1618
+ if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
1578
1619
  numChars.push('.');
1579
1620
  i++;
1580
1621
  continue;
@@ -1589,14 +1630,14 @@ function lex(inputText, opts = {}) {
1589
1630
  if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
1590
1631
  let j = i + 1;
1591
1632
  if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
1592
- if (j < n && /[0-9]/.test(chars[j])) {
1633
+ if (j < n && isAsciiDigit(chars[j])) {
1593
1634
  numChars.push(chars[i]); // e/E
1594
1635
  i++;
1595
1636
  if (i < n && (chars[i] === '+' || chars[i] === '-')) {
1596
1637
  numChars.push(chars[i]);
1597
1638
  i++;
1598
1639
  }
1599
- while (i < n && /[0-9]/.test(chars[i])) {
1640
+ while (i < n && isAsciiDigit(chars[i])) {
1600
1641
  numChars.push(chars[i]);
1601
1642
  i++;
1602
1643
  }
package/lib/parser.js CHANGED
@@ -86,7 +86,15 @@ class Parser {
86
86
  }
87
87
 
88
88
  isIdentKeyword(tok, keyword) {
89
- return tok && tok.typ === 'Ident' && typeof tok.value === 'string' && tok.value.toLowerCase() === keyword;
89
+ if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
90
+ const v = tok.value;
91
+ if (v.length !== keyword.length) return false;
92
+ for (let i = 0; i < keyword.length; i++) {
93
+ const code = v.charCodeAt(i);
94
+ const lower = code >= 65 && code <= 90 ? code + 32 : code;
95
+ if (lower !== keyword.charCodeAt(i)) return false;
96
+ }
97
+ return true;
90
98
  }
91
99
 
92
100
  canStartSparqlPrefixDirective() {
package/lib/prelude.js CHANGED
@@ -250,21 +250,40 @@ function literalParts(lit) {
250
250
  // equality fast-paths than repeated string key construction.
251
251
 
252
252
  let __nextTid = 1;
253
- const __tidIntern = new Map(); // string key -> number
253
+ const __tidIntern = new Map(); // legacy generic key -> number
254
+ const __iriTidIntern = new Map(); // IRI value -> number
255
+ const __blankTidIntern = new Map(); // blank label -> number
256
+ const __literalTidIntern = new Map(); // normalized literal lexical form -> number
254
257
 
255
258
  // Avoid storing extremely large literal keys in the global term-id intern map.
256
259
  // For huge literals we still assign a unique __tid, but we do not intern the key.
257
260
  const MAX_LITERAL_TID_LEN = 1024;
258
261
 
259
- function __getTid(key) {
260
- let id = __tidIntern.get(key);
262
+ function __getTidFromMap(map, key) {
263
+ let id = map.get(key);
261
264
  if (!id) {
262
265
  id = __nextTid++;
263
- __tidIntern.set(key, id);
266
+ map.set(key, id);
264
267
  }
265
268
  return id;
266
269
  }
267
270
 
271
+ function __getTid(key) {
272
+ return __getTidFromMap(__tidIntern, key);
273
+ }
274
+
275
+ function __getIriTid(value) {
276
+ return __getTidFromMap(__iriTidIntern, value);
277
+ }
278
+
279
+ function __getBlankTid(label) {
280
+ return __getTidFromMap(__blankTidIntern, label);
281
+ }
282
+
283
+ function __getLiteralTid(norm) {
284
+ return __getTidFromMap(__literalTidIntern, norm);
285
+ }
286
+
268
287
  function __isQuotedLexical(lit) {
269
288
  if (typeof lit !== 'string') return false;
270
289
  if (lit.length >= 6) {
@@ -310,6 +329,14 @@ function __isPlainStringLiteralValue(lit) {
310
329
  function normalizeLiteralForTid(lit) {
311
330
  // Canonicalize so that plain string and explicit xsd:string share the same id.
312
331
  if (typeof lit !== 'string') return lit;
332
+
333
+ // Fast path for the overwhelmingly common lexer output for plain string
334
+ // literals: a canonical JSON-style quoted lexical form with no suffix.
335
+ // This avoids literalParts()/language-tag parsing for large fact tables.
336
+ if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
337
+ return `${lit}^^<${XSD_NS}string>`;
338
+ }
339
+
313
340
  const [lex, dt] = literalParts(lit);
314
341
  if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
315
342
  if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
@@ -327,7 +354,7 @@ class Iri extends Term {
327
354
  super();
328
355
  this.value = value;
329
356
  Object.defineProperty(this, '__tid', {
330
- value: __getTid('I:' + value),
357
+ value: __getIriTid(value),
331
358
  enumerable: false,
332
359
  });
333
360
  }
@@ -339,7 +366,7 @@ class Literal extends Term {
339
366
  this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
340
367
  const norm = normalizeLiteralForTid(value);
341
368
  const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
342
- const tid = useIntern ? __getTid('L:' + norm) : __nextTid++;
369
+ const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
343
370
  Object.defineProperty(this, '__tid', {
344
371
  value: tid,
345
372
  enumerable: false,
@@ -359,7 +386,7 @@ class Blank extends Term {
359
386
  super();
360
387
  this.label = label; // _:b1, etc.
361
388
  Object.defineProperty(this, '__tid', {
362
- value: __getTid('B:' + label),
389
+ value: __getBlankTid(label),
363
390
  enumerable: false,
364
391
  });
365
392
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "eyeling",
3
- "version": "1.25.0",
3
+ "version": "1.25.1",
4
4
  "description": "A minimal Notation3 (N3) reasoner in JavaScript.",
5
5
  "main": "./index.js",
6
6
  "keywords": [