eyeling 1.25.0 → 1.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4825,7 +4825,7 @@ function main() {
4825
4825
  parseN3Text(text, {
4826
4826
  baseIri: __sourceLabelToBaseIri(sourceLabel),
4827
4827
  label: sourceLabel,
4828
- collectUsedPrefixes: true,
4828
+ collectUsedPrefixes: streamMode,
4829
4829
  keepSourceArtifacts: false,
4830
4830
  rdf: rdfMode,
4831
4831
  }),
@@ -5849,6 +5849,14 @@ function __prepareForwardRule(r) {
5849
5849
  configurable: true,
5850
5850
  });
5851
5851
  }
5852
+ if (!hasOwn.call(r, '__needsForwardSkipCheck')) {
5853
+ Object.defineProperty(r, '__needsForwardSkipCheck', {
5854
+ value: !!(r.__headIsStrictGround || (r.__scopedSkipInfo && r.__scopedSkipInfo.needsSnap)),
5855
+ enumerable: false,
5856
+ writable: false,
5857
+ configurable: true,
5858
+ });
5859
+ }
5852
5860
  }
5853
5861
 
5854
5862
  function __graphTriplesOrTrue(term) {
@@ -6167,6 +6175,11 @@ function skolemizeTermForHeadBlanks(t, headBlankLabels, mapping, skCounter, firi
6167
6175
  }
6168
6176
 
6169
6177
  function skolemizeTripleForHeadBlanks(tr, headBlankLabels, mapping, skCounter, firingKey, globalMap) {
6178
+ // Fast path: the common case has no explicit head blanks. Do not allocate a
6179
+ // replacement Triple or compute a firing key when skolemization cannot change
6180
+ // anything. This matters for long single-premise chains such as
6181
+ // deep-taxonomy-100000, where every derived head triple is otherwise copied.
6182
+ if (!headBlankLabels || headBlankLabels.size === 0) return tr;
6170
6183
  return new Triple(
6171
6184
  skolemizeTermForHeadBlanks(tr.s, headBlankLabels, mapping, skCounter, firingKey, globalMap),
6172
6185
  skolemizeTermForHeadBlanks(tr.p, headBlankLabels, mapping, skCounter, firingKey, globalMap),
@@ -6536,11 +6549,13 @@ function termFastKey(t) {
6536
6549
  if (t instanceof Iri || t instanceof Blank) return t.__tid;
6537
6550
 
6538
6551
  if (t instanceof Literal) {
6539
- // Very large literals intentionally skip global interning in prelude.js to
6540
- // avoid retaining huge strings forever. Their per-object __tid is therefore
6541
- // not value-stable, so using it here breaks duplicate detection for facts
6542
- // such as long log:outputString blocks that are re-derived during forward
6543
- // chaining. Fall back to a value-based key in that case.
6552
+ // Literal construction already computed a value-stable __tid for ordinary
6553
+ // short literals. Avoid re-running literalParts()/datatype normalization
6554
+ // while building fact indexes; on data-heavy inputs this is a hot path.
6555
+ // Only the rare over-sized literal needs the value-based fallback because
6556
+ // prelude intentionally gives such literals per-object ids to avoid
6557
+ // retaining huge strings in the global interner.
6558
+ if (typeof t.value !== 'string' || t.value.length + 64 <= MAX_LITERAL_TID_LEN) return t.__tid;
6544
6559
  const norm = normalizeLiteralForTid(t.value);
6545
6560
  if (typeof norm === 'string' && norm.length > MAX_LITERAL_TID_LEN) return 'L:' + norm;
6546
6561
  return t.__tid;
@@ -6627,17 +6642,57 @@ function ensureFactIndexes(facts) {
6627
6642
  enumerable: false,
6628
6643
  writable: true,
6629
6644
  });
6645
+ Object.defineProperty(facts, '__keySetComplete', {
6646
+ value: false,
6647
+ enumerable: false,
6648
+ writable: true,
6649
+ });
6630
6650
 
6631
- for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i);
6651
+ // Build lookup indexes eagerly, but do not populate the duplicate-detection
6652
+ // string Set for every input fact. The predicate/subject/object indexes are
6653
+ // enough to verify duplicates when needed; avoiding 100k+ joined string keys
6654
+ // saves substantial time and GC on data-heavy query workloads.
6655
+ for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i, false);
6632
6656
  }
6633
6657
 
6634
- function indexFact(facts, tr, idx) {
6658
+ function cloneFactIndexesForSnapshot(src, dest) {
6659
+ ensureFactIndexes(src);
6660
+
6661
+ function cloneArrayMap(map) {
6662
+ const out = new Map();
6663
+ for (const [k, arr] of map) out.set(k, arr.slice());
6664
+ return out;
6665
+ }
6666
+
6667
+ function cloneNestedArrayMap(map) {
6668
+ const out = new Map();
6669
+ for (const [k, inner] of map) {
6670
+ const innerOut = new Map();
6671
+ for (const [k2, arr] of inner) innerOut.set(k2, arr.slice());
6672
+ out.set(k, innerOut);
6673
+ }
6674
+ return out;
6675
+ }
6676
+
6677
+ Object.defineProperty(dest, '__byPred', { value: cloneArrayMap(src.__byPred), enumerable: false, writable: true });
6678
+ Object.defineProperty(dest, '__byPS', { value: cloneNestedArrayMap(src.__byPS), enumerable: false, writable: true });
6679
+ Object.defineProperty(dest, '__byPO', { value: cloneNestedArrayMap(src.__byPO), enumerable: false, writable: true });
6680
+ Object.defineProperty(dest, '__wildPred', { value: src.__wildPred.slice(), enumerable: false, writable: true });
6681
+ Object.defineProperty(dest, '__wildPS', { value: cloneArrayMap(src.__wildPS), enumerable: false, writable: true });
6682
+ Object.defineProperty(dest, '__wildPO', { value: cloneArrayMap(src.__wildPO), enumerable: false, writable: true });
6683
+ Object.defineProperty(dest, '__keySet', { value: new Set(src.__keySet), enumerable: false, writable: true });
6684
+ Object.defineProperty(dest, '__keySetComplete', { value: !!src.__keySetComplete, enumerable: false, writable: true });
6685
+ }
6686
+
6687
+ function indexFact(facts, tr, idx, addKeySet = true) {
6635
6688
  const sk = termFastKey(tr.s);
6636
6689
  const ok = termFastKey(tr.o);
6690
+ let pkForKey = null;
6637
6691
 
6638
6692
  if (tr.p instanceof Iri) {
6639
6693
  // Use predicate term id as the primary key to avoid hashing long IRI strings.
6640
6694
  const pk = tr.p.__tid;
6695
+ pkForKey = pk;
6641
6696
 
6642
6697
  let pb = facts.__byPred.get(pk);
6643
6698
  if (!pb) {
@@ -6695,8 +6750,10 @@ function indexFact(facts, tr, idx) {
6695
6750
  }
6696
6751
  }
6697
6752
 
6698
- const key = tripleFastKey(tr);
6699
- if (key !== null) facts.__keySet.add(key);
6753
+ if (addKeySet && sk !== null && ok !== null) {
6754
+ if (pkForKey === null) pkForKey = termFastKey(tr.p);
6755
+ if (pkForKey !== null) facts.__keySet.add(sk + '\t' + pkForKey + '\t' + ok);
6756
+ }
6700
6757
  }
6701
6758
 
6702
6759
  function candidateFacts(facts, goal) {
@@ -6758,7 +6815,10 @@ function hasFactIndexed(facts, tr) {
6758
6815
  ensureFactIndexes(facts);
6759
6816
 
6760
6817
  const key = tripleFastKey(tr);
6761
- if (key !== null) return facts.__keySet.has(key);
6818
+ if (key !== null) {
6819
+ if (facts.__keySet.has(key)) return true;
6820
+ if (facts.__keySetComplete) return false;
6821
+ }
6762
6822
 
6763
6823
  if (tr.p instanceof Iri) {
6764
6824
  const pk = tr.p.__tid;
@@ -6788,7 +6848,7 @@ function pushFactIndexed(facts, tr) {
6788
6848
  ensureFactIndexes(facts);
6789
6849
  const idx = facts.length;
6790
6850
  facts.push(tr);
6791
- indexFact(facts, tr, idx);
6851
+ indexFact(facts, tr, idx, true);
6792
6852
  }
6793
6853
 
6794
6854
  function makeDerivedRecord(fact, rule, premises, subst, captureExplanations) {
@@ -6910,13 +6970,20 @@ function makeSinglePremiseAgendaIndex(forwardRules, backRules) {
6910
6970
  if (!isSinglePremiseAgendaRuleSafe(r, backRules)) continue;
6911
6971
 
6912
6972
  const goal = r.premise[0];
6973
+ const goalSKey = termFastKey(goal.s);
6974
+ const goalOKey = termFastKey(goal.o);
6975
+ const fastSubjectVar = goal.p instanceof Iri && goal.s instanceof Var && goalOKey !== null ? goal.s.name : null;
6976
+ const fastObjectVar = goal.p instanceof Iri && goal.o instanceof Var && goalSKey !== null ? goal.o.name : null;
6913
6977
  const entry = {
6914
6978
  rule: r,
6915
6979
  ruleIndex: i,
6916
6980
  goal,
6917
6981
  goalPredTid: goal.p instanceof Iri ? goal.p.__tid : null,
6918
- goalSKey: termFastKey(goal.s),
6919
- goalOKey: termFastKey(goal.o),
6982
+ goalSKey,
6983
+ goalOKey,
6984
+ needsSkipCheck: !!r.__needsForwardSkipCheck,
6985
+ fastSubjectVar,
6986
+ fastObjectVar,
6920
6987
  };
6921
6988
 
6922
6989
  index.indexed.add(r);
@@ -8325,11 +8392,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8325
8392
  const varGen = [0];
8326
8393
  const skCounter = [0];
8327
8394
 
8328
- // Speed up dynamic rule promotion by maintaining O(1) membership sets.
8329
- // (Some workloads derive many rule-producing triples.)
8330
-
8331
- __ensureRuleKeySet(forwardRules);
8332
- __ensureRuleKeySet(backRules);
8395
+ // Rule-key sets are only needed if a program actually derives rule-producing
8396
+ // triples. Building them eagerly is expensive on large static rule sets, so
8397
+ // dynamic-promotion sites create them lazily before duplicate checks.
8333
8398
 
8334
8399
  // Cache head blank-node skolemization per (rule firing, head blank label).
8335
8400
  // This prevents repeatedly generating fresh _:sk_N blanks for the *same*
@@ -8379,7 +8444,7 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8379
8444
 
8380
8445
  function makeScopedSnapshot() {
8381
8446
  const snap = facts.slice();
8382
- ensureFactIndexes(snap);
8447
+ cloneFactIndexesForSnapshot(facts, snap);
8383
8448
  Object.defineProperty(snap, '__scopedSnapshot', {
8384
8449
  value: snap,
8385
8450
  enumerable: false,
@@ -8433,10 +8498,21 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8433
8498
  let changedHere = false;
8434
8499
  let rulesChanged = false;
8435
8500
 
8436
- // IMPORTANT: one skolem map per *rule firing*
8501
+ // IMPORTANT: one skolem map per *rule firing*. Instantiate premise
8502
+ // triples and build the firing key lazily: normal CLI runs do not capture
8503
+ // proof records, and most rules have no explicit head blanks, so the eager
8504
+ // work was pure allocation on large forward chains.
8437
8505
  const skMap = {};
8438
- const instantiatedPremises = r.premise.map((b) => applySubstTriple(b, s));
8439
- const fireKey = __firingKey(ruleIndex, instantiatedPremises);
8506
+ let instantiatedPremises = null;
8507
+ let fireKey = null;
8508
+ function getInstantiatedPremises() {
8509
+ if (instantiatedPremises === null) instantiatedPremises = r.premise.map((b) => applySubstTriple(b, s));
8510
+ return instantiatedPremises;
8511
+ }
8512
+ function getFireKey() {
8513
+ if (fireKey === null) fireKey = __firingKey(ruleIndex, getInstantiatedPremises());
8514
+ return fireKey;
8515
+ }
8440
8516
 
8441
8517
  // Support "dynamic" rule heads where the consequent is a term that
8442
8518
  // (after substitution) evaluates to a quoted formula.
@@ -8489,7 +8565,7 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8489
8565
  if (isFwRuleTriple || isBwRuleTriple) {
8490
8566
  if (!hasFactIndexed(facts, instantiated)) {
8491
8567
  pushFactIndexed(facts, instantiated);
8492
- const df = makeDerivedRecord(instantiated, r, instantiatedPremises, s, captureExplanations);
8568
+ const df = makeDerivedRecord(instantiated, r, getInstantiatedPremises(), s, captureExplanations);
8493
8569
  derivedForward.push(df);
8494
8570
  if (typeof onDerived === 'function') onDerived(df);
8495
8571
  changedHere = true;
@@ -8508,8 +8584,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8508
8584
  newRule.conclusion,
8509
8585
  newRule.__dynamicConclusionTerm || null,
8510
8586
  );
8511
- if (!forwardRules.__ruleKeySet.has(key)) {
8512
- forwardRules.__ruleKeySet.add(key);
8587
+ const forwardRuleKeySet = __ensureRuleKeySet(forwardRules);
8588
+ if (!forwardRuleKeySet.has(key)) {
8589
+ forwardRuleKeySet.add(key);
8513
8590
  forwardRules.push(newRule);
8514
8591
  rulesChanged = true;
8515
8592
  }
@@ -8523,8 +8600,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8523
8600
  newRule.conclusion,
8524
8601
  newRule.__dynamicConclusionTerm || null,
8525
8602
  );
8526
- if (!backRules.__ruleKeySet.has(key)) {
8527
- backRules.__ruleKeySet.add(key);
8603
+ const backRuleKeySet = __ensureRuleKeySet(backRules);
8604
+ if (!backRuleKeySet.has(key)) {
8605
+ backRuleKeySet.add(key);
8528
8606
  backRules.push(newRule);
8529
8607
  indexBackRule(backRules, newRule);
8530
8608
  rulesChanged = true;
@@ -8535,20 +8613,23 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8535
8613
  }
8536
8614
 
8537
8615
  // Only skolemize blank nodes that occur explicitly in the rule head
8538
- const inst = skolemizeTripleForHeadBlanks(
8539
- instantiated,
8540
- headBlankLabelsHere,
8541
- skMap,
8542
- skCounter,
8543
- fireKey,
8544
- headSkolemCache,
8545
- );
8616
+ const inst =
8617
+ headBlankLabelsHere && headBlankLabelsHere.size
8618
+ ? skolemizeTripleForHeadBlanks(
8619
+ instantiated,
8620
+ headBlankLabelsHere,
8621
+ skMap,
8622
+ skCounter,
8623
+ getFireKey(),
8624
+ headSkolemCache,
8625
+ )
8626
+ : instantiated;
8546
8627
 
8547
8628
  if (!isGroundTriple(inst)) continue;
8548
8629
  if (hasFactIndexed(facts, inst)) continue;
8549
8630
 
8550
8631
  pushFactIndexed(facts, inst);
8551
- const df = makeDerivedRecord(inst, r, instantiatedPremises, s, captureExplanations);
8632
+ const df = makeDerivedRecord(inst, r, getInstantiatedPremises(), s, captureExplanations);
8552
8633
  derivedForward.push(df);
8553
8634
  if (typeof onDerived === 'function') onDerived(df);
8554
8635
 
@@ -8575,10 +8656,19 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8575
8656
  for (let ci = 0; ci < total; ci++) {
8576
8657
  const entry = ci < candidates.exactLen ? candidates.exact[ci] : candidates.wild[ci - candidates.exactLen];
8577
8658
  const r = entry.rule;
8578
- if (__skipForwardRuleNow(r)) continue;
8579
-
8580
- const s = unifyTriple(entry.goal, fact, __emptySubst());
8581
- if (s === null) continue;
8659
+ if (entry.needsSkipCheck && __skipForwardRuleNow(r)) continue;
8660
+
8661
+ let s;
8662
+ if (entry.fastSubjectVar !== null) {
8663
+ s = __emptySubst();
8664
+ s[entry.fastSubjectVar] = fact.s;
8665
+ } else if (entry.fastObjectVar !== null) {
8666
+ s = __emptySubst();
8667
+ s[entry.fastObjectVar] = fact.o;
8668
+ } else {
8669
+ s = unifyTriple(entry.goal, fact, __emptySubst());
8670
+ if (s === null) continue;
8671
+ }
8582
8672
 
8583
8673
  const outcome = __emitForwardRuleSolution(r, entry.ruleIndex, s);
8584
8674
  if (outcome.rulesChanged) {
@@ -8595,7 +8685,7 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
8595
8685
  for (let i = 0; i < forwardRules.length; i++) {
8596
8686
  const r = forwardRules[i];
8597
8687
  if (agendaIndex.indexed.has(r)) continue;
8598
- if (__skipForwardRuleNow(r)) continue;
8688
+ if (r.__needsForwardSkipCheck && __skipForwardRuleNow(r)) continue;
8599
8689
 
8600
8690
  const headIsStrictGround = r.__headIsStrictGround;
8601
8691
  const maxSols = r.isFuse || headIsStrictGround ? 1 : undefined;
@@ -9462,7 +9552,26 @@ class N3SyntaxError extends SyntaxError {
9462
9552
  }
9463
9553
 
9464
9554
  function isWs(c) {
9465
- return /\s/.test(c);
9555
+ if (c === null || c === undefined) return false;
9556
+ const code = c.charCodeAt(0);
9557
+ // Fast path for the whitespace used by N3/Turtle inputs.
9558
+ return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
9559
+ }
9560
+
9561
+ function isAsciiAlphaCode(code) {
9562
+ return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
9563
+ }
9564
+
9565
+ function isAsciiDigitCode(code) {
9566
+ return code >= 48 && code <= 57;
9567
+ }
9568
+
9569
+ function isAsciiAlpha(c) {
9570
+ return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
9571
+ }
9572
+
9573
+ function isAsciiDigit(c) {
9574
+ return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
9466
9575
  }
9467
9576
 
9468
9577
  // Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
@@ -9475,13 +9584,18 @@ function isWs(c) {
9475
9584
  //
9476
9585
  // We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
9477
9586
  function isHexDigit(c) {
9478
- return c !== null && /^[0-9A-Fa-f]$/.test(c);
9587
+ if (c === null || c === undefined) return false;
9588
+ const code = c.charCodeAt(0);
9589
+ return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
9479
9590
  }
9480
9591
 
9481
9592
  function isPnCharsBase(c) {
9482
9593
  // Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
9483
9594
  // Covers most letters used in practice (including ñ) and common scripts.
9484
- return c !== null && /[A-Za-z]|\p{L}|\p{Nl}/u.test(c);
9595
+ if (c === null || c === undefined) return false;
9596
+ const code = c.charCodeAt(0);
9597
+ if (isAsciiAlphaCode(code)) return true;
9598
+ return /\p{L}|\p{Nl}/u.test(c);
9485
9599
  }
9486
9600
 
9487
9601
  function isPnCharsU(c) {
@@ -9491,9 +9605,11 @@ function isPnCharsU(c) {
9491
9605
 
9492
9606
  function isPnChars(c) {
9493
9607
  // PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
9494
- if (c === null) return false;
9608
+ if (c === null || c === undefined) return false;
9609
+ const code = c.charCodeAt(0);
9610
+ if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
9495
9611
  if (isPnCharsU(c)) return true;
9496
- if (c === '-' || /[0-9]/.test(c) || c === '\u00B7') return true;
9612
+ if (c === '\u00B7') return true;
9497
9613
  const cp = c.codePointAt(0);
9498
9614
  return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
9499
9615
  }
@@ -10595,10 +10711,23 @@ function normalizeRdfCompatibility(inputText) {
10595
10711
  return text;
10596
10712
  }
10597
10713
 
10714
+
10715
+ function isNumericLikeIdentifier(word) {
10716
+ if (typeof word !== 'string' || word.length === 0) return false;
10717
+ for (let j = 0; j < word.length; j++) {
10718
+ const code = word.charCodeAt(j);
10719
+ if (!((code >= 48 && code <= 57) || code === 46 || code === 45)) return false;
10720
+ }
10721
+ return true;
10722
+ }
10723
+
10598
10724
  function lex(inputText, opts = {}) {
10599
10725
  const rdf = !!(opts && opts.rdf);
10600
10726
  if (rdf) inputText = normalizeRdfCompatibility(inputText);
10601
- const chars = Array.from(inputText);
10727
+ // Avoid copying large ASCII/BMP inputs into an Array. Array.from() is
10728
+ // only needed when the text contains surrogate pairs and we want the old
10729
+ // code-point iteration behavior for non-BMP characters.
10730
+ const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
10602
10731
  const n = chars.length;
10603
10732
  let i = 0;
10604
10733
  const tokens = [];
@@ -10614,19 +10743,29 @@ function lex(inputText, opts = {}) {
10614
10743
  // - Accepts percent escapes (%HH) as PLX fragments.
10615
10744
  // - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
10616
10745
  // - Accepts '.' inside a name only when it is not terminal.
10746
+ function sliceChars(start, end) {
10747
+ return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
10748
+ }
10749
+
10617
10750
  function readIdentText(startOffsetForErrors) {
10618
- const out = [];
10751
+ const start = i;
10752
+ let out = null;
10753
+
10754
+ function appendRawUntilHere() {
10755
+ if (out === null) out = [sliceChars(start, i)];
10756
+ }
10757
+
10619
10758
  while (i < n) {
10620
- const cc = peek();
10621
- if (cc === null || isWs(cc)) break;
10759
+ const cc = chars[i];
10760
+ if (cc === null || cc === undefined || isWs(cc)) break;
10622
10761
 
10623
10762
  // Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
10624
- if ('{}()[];,'.includes(cc)) break;
10763
+ if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
10625
10764
 
10626
10765
  // Dot is allowed inside PN_LOCAL, but not at the end.
10627
10766
  if (cc === '.') {
10628
10767
  if (!canContinueAfterDot(peek(1))) break;
10629
- out.push('.');
10768
+ if (out !== null) out.push('.');
10630
10769
  i++;
10631
10770
  continue;
10632
10771
  }
@@ -10641,6 +10780,7 @@ function lex(inputText, opts = {}) {
10641
10780
  typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
10642
10781
  );
10643
10782
  }
10783
+ appendRawUntilHere();
10644
10784
  out.push('%', h1, h2);
10645
10785
  i += 3;
10646
10786
  continue;
@@ -10650,6 +10790,7 @@ function lex(inputText, opts = {}) {
10650
10790
  if (cc === '\\') {
10651
10791
  const esc = peek(1);
10652
10792
  if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
10793
+ appendRawUntilHere();
10653
10794
  out.push(esc); // decoded form
10654
10795
  i += 2;
10655
10796
  continue;
@@ -10661,14 +10802,14 @@ function lex(inputText, opts = {}) {
10661
10802
  }
10662
10803
 
10663
10804
  if (isIdentChar(cc)) {
10664
- out.push(cc);
10805
+ if (out !== null) out.push(cc);
10665
10806
  i++;
10666
10807
  continue;
10667
10808
  }
10668
10809
 
10669
10810
  break;
10670
10811
  }
10671
- return out.join('');
10812
+ return out === null ? sliceChars(start, i) : out.join('');
10672
10813
  }
10673
10814
 
10674
10815
  while (i < n) {
@@ -10747,22 +10888,47 @@ function lex(inputText, opts = {}) {
10747
10888
  continue;
10748
10889
  }
10749
10890
 
10750
- // 5) Single-character punctuation
10751
- if ('{}()[];,.'.includes(c)) {
10752
- const mapping = {
10753
- '{': 'LBrace',
10754
- '}': 'RBrace',
10755
- '(': 'LParen',
10756
- ')': 'RParen',
10757
- '[': 'LBracket',
10758
- ']': 'RBracket',
10759
- ';': 'Semicolon',
10760
- ',': 'Comma',
10761
- '.': 'Dot',
10762
- };
10763
- tokens.push(new Token(mapping[c], null, i));
10764
- i++;
10765
- continue;
10891
+ // 5) Single-character punctuation. Use a switch rather than allocating a
10892
+ // mapping object for every punctuation token in large inputs.
10893
+ switch (c) {
10894
+ case '{':
10895
+ tokens.push(new Token('LBrace', null, i));
10896
+ i++;
10897
+ continue;
10898
+ case '}':
10899
+ tokens.push(new Token('RBrace', null, i));
10900
+ i++;
10901
+ continue;
10902
+ case '(':
10903
+ tokens.push(new Token('LParen', null, i));
10904
+ i++;
10905
+ continue;
10906
+ case ')':
10907
+ tokens.push(new Token('RParen', null, i));
10908
+ i++;
10909
+ continue;
10910
+ case '[':
10911
+ tokens.push(new Token('LBracket', null, i));
10912
+ i++;
10913
+ continue;
10914
+ case ']':
10915
+ tokens.push(new Token('RBracket', null, i));
10916
+ i++;
10917
+ continue;
10918
+ case ';':
10919
+ tokens.push(new Token('Semicolon', null, i));
10920
+ i++;
10921
+ continue;
10922
+ case ',':
10923
+ tokens.push(new Token('Comma', null, i));
10924
+ i++;
10925
+ continue;
10926
+ case '.':
10927
+ tokens.push(new Token('Dot', null, i));
10928
+ i++;
10929
+ continue;
10930
+ default:
10931
+ break;
10766
10932
  }
10767
10933
 
10768
10934
  // String literal: short "..." or long """..."""
@@ -10821,26 +10987,36 @@ function lex(inputText, opts = {}) {
10821
10987
  continue;
10822
10988
  }
10823
10989
 
10824
- // Short string literal " ... "
10990
+ // Short string literal " ... ". Most data files contain plain
10991
+ // unescaped labels; keep that path slice-based and avoid building an
10992
+ // intermediate character array + raw quoted string.
10825
10993
  i++; // consume opening "
10826
- const sChars = [];
10994
+ const contentStart = i;
10995
+ let sChars = null;
10996
+ let closed = false;
10827
10997
  while (i < n) {
10828
10998
  const cc = chars[i];
10829
10999
  i++;
10830
11000
  if (cc === '\\') {
11001
+ if (sChars === null) sChars = [sliceChars(contentStart, i - 1)];
10831
11002
  if (i < n) {
10832
11003
  const esc = chars[i];
10833
11004
  i++;
10834
11005
  sChars.push('\\');
10835
11006
  sChars.push(esc);
11007
+ } else {
11008
+ sChars.push('\\');
10836
11009
  }
10837
11010
  continue;
10838
11011
  }
10839
- if (cc === '"') break;
10840
- sChars.push(cc);
11012
+ if (cc === '"') {
11013
+ closed = true;
11014
+ break;
11015
+ }
11016
+ if (sChars !== null) sChars.push(cc);
10841
11017
  }
10842
- const raw = '"' + sChars.join('') + '"';
10843
- const decoded = decodeN3StringEscapes(stripQuotes(raw), start);
11018
+ const rawContent = sChars === null ? sliceChars(contentStart, closed ? i - 1 : i) : sChars.join('');
11019
+ const decoded = sChars === null ? rawContent : decodeN3StringEscapes(rawContent, start);
10844
11020
  assertValidStringLiteralValue(decoded, start);
10845
11021
  const s = JSON.stringify(decoded); // canonical short quoted form
10846
11022
  tokens.push(new Token('Literal', s, start));
@@ -10905,24 +11081,32 @@ function lex(inputText, opts = {}) {
10905
11081
 
10906
11082
  // Short string literal ' ... '
10907
11083
  i++; // consume opening '
10908
- const sChars = [];
11084
+ const contentStart = i;
11085
+ let sChars = null;
11086
+ let closed = false;
10909
11087
  while (i < n) {
10910
11088
  const cc = chars[i];
10911
11089
  i++;
10912
11090
  if (cc === '\\') {
11091
+ if (sChars === null) sChars = [sliceChars(contentStart, i - 1)];
10913
11092
  if (i < n) {
10914
11093
  const esc = chars[i];
10915
11094
  i++;
10916
11095
  sChars.push('\\');
10917
11096
  sChars.push(esc);
11097
+ } else {
11098
+ sChars.push('\\');
10918
11099
  }
10919
11100
  continue;
10920
11101
  }
10921
- if (cc === "'") break;
10922
- sChars.push(cc);
11102
+ if (cc === "'") {
11103
+ closed = true;
11104
+ break;
11105
+ }
11106
+ if (sChars !== null) sChars.push(cc);
10923
11107
  }
10924
- const raw = "'" + sChars.join('') + "'";
10925
- const decoded = decodeN3StringEscapes(stripQuotes(raw), start);
11108
+ const rawContent = sChars === null ? sliceChars(contentStart, closed ? i - 1 : i) : sChars.join('');
11109
+ const decoded = sChars === null ? rawContent : decodeN3StringEscapes(rawContent, start);
10926
11110
  assertValidStringLiteralValue(decoded, start);
10927
11111
  const s = JSON.stringify(decoded); // canonical short quoted form
10928
11112
  tokens.push(new Token('Literal', s, start));
@@ -10955,10 +11139,10 @@ function lex(inputText, opts = {}) {
10955
11139
  // "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
10956
11140
  const tagChars = [];
10957
11141
  let cc = peek();
10958
- if (cc === null || !/[A-Za-z]/.test(cc)) {
11142
+ if (cc === null || !isAsciiAlpha(cc)) {
10959
11143
  throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
10960
11144
  }
10961
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
11145
+ while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
10962
11146
  tagChars.push(cc);
10963
11147
  i++;
10964
11148
  }
@@ -10982,7 +11166,7 @@ function lex(inputText, opts = {}) {
10982
11166
  // Otherwise, treat as a directive (@prefix, @base)
10983
11167
  const wordChars = [];
10984
11168
  let cc;
10985
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
11169
+ while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
10986
11170
  wordChars.push(cc);
10987
11171
  i++;
10988
11172
  }
@@ -10994,19 +11178,19 @@ function lex(inputText, opts = {}) {
10994
11178
  }
10995
11179
 
10996
11180
  // 6) Numeric literal (integer or float)
10997
- if (/[0-9]/.test(c) || (c === '-' && peek(1) !== null && /[0-9]/.test(peek(1)))) {
11181
+ if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
10998
11182
  const start = i;
10999
11183
  const numChars = [c];
11000
11184
  i++;
11001
11185
  while (i < n) {
11002
11186
  const cc = chars[i];
11003
- if (/[0-9]/.test(cc)) {
11187
+ if (isAsciiDigit(cc)) {
11004
11188
  numChars.push(cc);
11005
11189
  i++;
11006
11190
  continue;
11007
11191
  }
11008
11192
  if (cc === '.') {
11009
- if (i + 1 < n && /[0-9]/.test(chars[i + 1])) {
11193
+ if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
11010
11194
  numChars.push('.');
11011
11195
  i++;
11012
11196
  continue;
@@ -11021,14 +11205,14 @@ function lex(inputText, opts = {}) {
11021
11205
  if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
11022
11206
  let j = i + 1;
11023
11207
  if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
11024
- if (j < n && /[0-9]/.test(chars[j])) {
11208
+ if (j < n && isAsciiDigit(chars[j])) {
11025
11209
  numChars.push(chars[i]); // e/E
11026
11210
  i++;
11027
11211
  if (i < n && (chars[i] === '+' || chars[i] === '-')) {
11028
11212
  numChars.push(chars[i]);
11029
11213
  i++;
11030
11214
  }
11031
- while (i < n && /[0-9]/.test(chars[i])) {
11215
+ while (i < n && isAsciiDigit(chars[i])) {
11032
11216
  numChars.push(chars[i]);
11033
11217
  i++;
11034
11218
  }
@@ -11047,7 +11231,7 @@ function lex(inputText, opts = {}) {
11047
11231
  }
11048
11232
  if (word === 'true' || word === 'false') {
11049
11233
  tokens.push(new Token('Literal', word, start));
11050
- } else if ([...word].every((ch) => /[0-9.-]/.test(ch))) {
11234
+ } else if (isNumericLikeIdentifier(word)) {
11051
11235
  tokens.push(new Token('Literal', word, start));
11052
11236
  } else {
11053
11237
  tokens.push(new Token('Ident', word, start));
@@ -11477,7 +11661,15 @@ class Parser {
11477
11661
  }
11478
11662
 
11479
11663
  isIdentKeyword(tok, keyword) {
11480
- return tok && tok.typ === 'Ident' && typeof tok.value === 'string' && tok.value.toLowerCase() === keyword;
11664
+ if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
11665
+ const v = tok.value;
11666
+ if (v.length !== keyword.length) return false;
11667
+ for (let i = 0; i < keyword.length; i++) {
11668
+ const code = v.charCodeAt(i);
11669
+ const lower = code >= 65 && code <= 90 ? code + 32 : code;
11670
+ if (lower !== keyword.charCodeAt(i)) return false;
11671
+ }
11672
+ return true;
11481
11673
  }
11482
11674
 
11483
11675
  canStartSparqlPrefixDirective() {
@@ -11624,7 +11816,7 @@ class Parser {
11624
11816
  } else if (tok2.typ === 'Ident') {
11625
11817
  const qn = tok2.value || '';
11626
11818
  if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok2, qn);
11627
- assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), tok2, '@prefix directive IRI');
11819
+ assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok2, '@prefix directive IRI');
11628
11820
  iri = this.prefixes.expandQName(qn);
11629
11821
  } else {
11630
11822
  this.fail(`Expected IRI after @prefix, got ${tok2.toString()}`, tok2);
@@ -11641,7 +11833,7 @@ class Parser {
11641
11833
  } else if (tok.typ === 'Ident') {
11642
11834
  const qn = tok.value || '';
11643
11835
  if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok, qn);
11644
- assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), tok, '@base directive IRI');
11836
+ assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok, '@base directive IRI');
11645
11837
  iri = this.prefixes.expandQName(qn);
11646
11838
  } else {
11647
11839
  this.fail(`Expected IRI after @base, got ${tok.toString()}`, tok);
@@ -11670,7 +11862,7 @@ class Parser {
11670
11862
  } else if (tok2.typ === 'Ident') {
11671
11863
  const qn = tok2.value || '';
11672
11864
  if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok2, qn);
11673
- assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), tok2, '@prefix directive IRI');
11865
+ assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok2, '@prefix directive IRI');
11674
11866
  iri = this.prefixes.expandQName(qn);
11675
11867
  } else {
11676
11868
  this.fail(`Expected IRI after PREFIX, got ${tok2.toString()}`, tok2);
@@ -11691,7 +11883,7 @@ class Parser {
11691
11883
  } else if (tok.typ === 'Ident') {
11692
11884
  const qn = tok.value || '';
11693
11885
  if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok, qn);
11694
- assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), tok, 'BASE directive IRI');
11886
+ assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok, 'BASE directive IRI');
11695
11887
  iri = this.prefixes.expandQName(qn);
11696
11888
  } else {
11697
11889
  this.fail(`Expected IRI after BASE, got ${tok.toString()}`, tok);
@@ -11738,14 +11930,18 @@ class Parser {
11738
11930
  const name = val || '';
11739
11931
  if (name === 'a') {
11740
11932
  return internIri(RDF_NS + 'type');
11741
- } else if (name.startsWith('_:')) {
11933
+ }
11934
+ const sep = name.indexOf(':');
11935
+ if (sep === 1 && name.charCodeAt(0) === 95) {
11742
11936
  return new Blank(name);
11743
- } else if (name.includes(':')) {
11744
- assertValidQNamePrefix(name.split(':', 1)[0], this.fail.bind(this), tok);
11745
- return internIri(this.prefixes.expandQName(name));
11746
- } else {
11747
- failInvalidKeywordLikeIdent(this.fail.bind(this), tok, name);
11748
11937
  }
11938
+ if (sep >= 0) {
11939
+ const prefixName = name.slice(0, sep);
11940
+ assertValidQNamePrefix(prefixName, this.fail.bind(this), tok);
11941
+ const base = this.prefixes.map[prefixName] || '';
11942
+ return internIri(base ? base + name.slice(sep + 1) : name);
11943
+ }
11944
+ failInvalidKeywordLikeIdent(this.fail.bind(this), tok, name);
11749
11945
  }
11750
11946
 
11751
11947
  if (typ === 'Literal') {
@@ -11776,7 +11972,7 @@ class Parser {
11776
11972
  } else if (dtTok.typ === 'Ident') {
11777
11973
  const qn = dtTok.value || '';
11778
11974
  if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), dtTok, qn);
11779
- assertValidQNamePrefix(qn.split(':', 1)[0], this.fail.bind(this), dtTok, 'datatype prefixed name');
11975
+ assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), dtTok, 'datatype prefixed name');
11780
11976
  dtIri = this.prefixes.expandQName(qn);
11781
11977
  } else {
11782
11978
  this.fail(`Expected datatype after ^^, got ${dtTok.toString()}`, dtTok);
@@ -12400,21 +12596,40 @@ function literalParts(lit) {
12400
12596
  // equality fast-paths than repeated string key construction.
12401
12597
 
12402
12598
  let __nextTid = 1;
12403
- const __tidIntern = new Map(); // string key -> number
12599
+ const __tidIntern = new Map(); // legacy generic key -> number
12600
+ const __iriTidIntern = new Map(); // IRI value -> number
12601
+ const __blankTidIntern = new Map(); // blank label -> number
12602
+ const __literalTidIntern = new Map(); // normalized literal lexical form -> number
12404
12603
 
12405
12604
  // Avoid storing extremely large literal keys in the global term-id intern map.
12406
12605
  // For huge literals we still assign a unique __tid, but we do not intern the key.
12407
12606
  const MAX_LITERAL_TID_LEN = 1024;
12408
12607
 
12409
- function __getTid(key) {
12410
- let id = __tidIntern.get(key);
12608
+ function __getTidFromMap(map, key) {
12609
+ let id = map.get(key);
12411
12610
  if (!id) {
12412
12611
  id = __nextTid++;
12413
- __tidIntern.set(key, id);
12612
+ map.set(key, id);
12414
12613
  }
12415
12614
  return id;
12416
12615
  }
12417
12616
 
12617
+ function __getTid(key) {
12618
+ return __getTidFromMap(__tidIntern, key);
12619
+ }
12620
+
12621
+ function __getIriTid(value) {
12622
+ return __getTidFromMap(__iriTidIntern, value);
12623
+ }
12624
+
12625
+ function __getBlankTid(label) {
12626
+ return __getTidFromMap(__blankTidIntern, label);
12627
+ }
12628
+
12629
+ function __getLiteralTid(norm) {
12630
+ return __getTidFromMap(__literalTidIntern, norm);
12631
+ }
12632
+
12418
12633
  function __isQuotedLexical(lit) {
12419
12634
  if (typeof lit !== 'string') return false;
12420
12635
  if (lit.length >= 6) {
@@ -12460,6 +12675,14 @@ function __isPlainStringLiteralValue(lit) {
12460
12675
  function normalizeLiteralForTid(lit) {
12461
12676
  // Canonicalize so that plain string and explicit xsd:string share the same id.
12462
12677
  if (typeof lit !== 'string') return lit;
12678
+
12679
+ // Fast path for the overwhelmingly common lexer output for plain string
12680
+ // literals: a canonical JSON-style quoted lexical form with no suffix.
12681
+ // This avoids literalParts()/language-tag parsing for large fact tables.
12682
+ if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
12683
+ return `${lit}^^<${XSD_NS}string>`;
12684
+ }
12685
+
12463
12686
  const [lex, dt] = literalParts(lit);
12464
12687
  if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
12465
12688
  if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
@@ -12477,7 +12700,7 @@ class Iri extends Term {
12477
12700
  super();
12478
12701
  this.value = value;
12479
12702
  Object.defineProperty(this, '__tid', {
12480
- value: __getTid('I:' + value),
12703
+ value: __getIriTid(value),
12481
12704
  enumerable: false,
12482
12705
  });
12483
12706
  }
@@ -12489,7 +12712,7 @@ class Literal extends Term {
12489
12712
  this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
12490
12713
  const norm = normalizeLiteralForTid(value);
12491
12714
  const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
12492
- const tid = useIntern ? __getTid('L:' + norm) : __nextTid++;
12715
+ const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
12493
12716
  Object.defineProperty(this, '__tid', {
12494
12717
  value: tid,
12495
12718
  enumerable: false,
@@ -12509,7 +12732,7 @@ class Blank extends Term {
12509
12732
  super();
12510
12733
  this.label = label; // _:b1, etc.
12511
12734
  Object.defineProperty(this, '__tid', {
12512
- value: __getTid('B:' + label),
12735
+ value: __getBlankTid(label),
12513
12736
  enumerable: false,
12514
12737
  });
12515
12738
  }