eyeling 1.25.0 → 1.25.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/eyeling.browser.js +333 -110
- package/eyeling.js +333 -110
- package/lib/cli.js +1 -1
- package/lib/engine.js +131 -41
- package/lib/lexer.js +143 -49
- package/lib/parser.js +24 -12
- package/lib/prelude.js +34 -7
- package/package.json +1 -1
|
@@ -4825,7 +4825,7 @@ function main() {
|
|
|
4825
4825
|
parseN3Text(text, {
|
|
4826
4826
|
baseIri: __sourceLabelToBaseIri(sourceLabel),
|
|
4827
4827
|
label: sourceLabel,
|
|
4828
|
-
collectUsedPrefixes:
|
|
4828
|
+
collectUsedPrefixes: streamMode,
|
|
4829
4829
|
keepSourceArtifacts: false,
|
|
4830
4830
|
rdf: rdfMode,
|
|
4831
4831
|
}),
|
|
@@ -5849,6 +5849,14 @@ function __prepareForwardRule(r) {
|
|
|
5849
5849
|
configurable: true,
|
|
5850
5850
|
});
|
|
5851
5851
|
}
|
|
5852
|
+
if (!hasOwn.call(r, '__needsForwardSkipCheck')) {
|
|
5853
|
+
Object.defineProperty(r, '__needsForwardSkipCheck', {
|
|
5854
|
+
value: !!(r.__headIsStrictGround || (r.__scopedSkipInfo && r.__scopedSkipInfo.needsSnap)),
|
|
5855
|
+
enumerable: false,
|
|
5856
|
+
writable: false,
|
|
5857
|
+
configurable: true,
|
|
5858
|
+
});
|
|
5859
|
+
}
|
|
5852
5860
|
}
|
|
5853
5861
|
|
|
5854
5862
|
function __graphTriplesOrTrue(term) {
|
|
@@ -6167,6 +6175,11 @@ function skolemizeTermForHeadBlanks(t, headBlankLabels, mapping, skCounter, firi
|
|
|
6167
6175
|
}
|
|
6168
6176
|
|
|
6169
6177
|
function skolemizeTripleForHeadBlanks(tr, headBlankLabels, mapping, skCounter, firingKey, globalMap) {
|
|
6178
|
+
// Fast path: the common case has no explicit head blanks. Do not allocate a
|
|
6179
|
+
// replacement Triple or compute a firing key when skolemization cannot change
|
|
6180
|
+
// anything. This matters for long single-premise chains such as
|
|
6181
|
+
// deep-taxonomy-100000, where every derived head triple is otherwise copied.
|
|
6182
|
+
if (!headBlankLabels || headBlankLabels.size === 0) return tr;
|
|
6170
6183
|
return new Triple(
|
|
6171
6184
|
skolemizeTermForHeadBlanks(tr.s, headBlankLabels, mapping, skCounter, firingKey, globalMap),
|
|
6172
6185
|
skolemizeTermForHeadBlanks(tr.p, headBlankLabels, mapping, skCounter, firingKey, globalMap),
|
|
@@ -6536,11 +6549,13 @@ function termFastKey(t) {
|
|
|
6536
6549
|
if (t instanceof Iri || t instanceof Blank) return t.__tid;
|
|
6537
6550
|
|
|
6538
6551
|
if (t instanceof Literal) {
|
|
6539
|
-
//
|
|
6540
|
-
//
|
|
6541
|
-
//
|
|
6542
|
-
//
|
|
6543
|
-
//
|
|
6552
|
+
// Literal construction already computed a value-stable __tid for ordinary
|
|
6553
|
+
// short literals. Avoid re-running literalParts()/datatype normalization
|
|
6554
|
+
// while building fact indexes; on data-heavy inputs this is a hot path.
|
|
6555
|
+
// Only the rare over-sized literal needs the value-based fallback because
|
|
6556
|
+
// prelude intentionally gives such literals per-object ids to avoid
|
|
6557
|
+
// retaining huge strings in the global interner.
|
|
6558
|
+
if (typeof t.value !== 'string' || t.value.length + 64 <= MAX_LITERAL_TID_LEN) return t.__tid;
|
|
6544
6559
|
const norm = normalizeLiteralForTid(t.value);
|
|
6545
6560
|
if (typeof norm === 'string' && norm.length > MAX_LITERAL_TID_LEN) return 'L:' + norm;
|
|
6546
6561
|
return t.__tid;
|
|
@@ -6627,17 +6642,57 @@ function ensureFactIndexes(facts) {
|
|
|
6627
6642
|
enumerable: false,
|
|
6628
6643
|
writable: true,
|
|
6629
6644
|
});
|
|
6645
|
+
Object.defineProperty(facts, '__keySetComplete', {
|
|
6646
|
+
value: false,
|
|
6647
|
+
enumerable: false,
|
|
6648
|
+
writable: true,
|
|
6649
|
+
});
|
|
6630
6650
|
|
|
6631
|
-
|
|
6651
|
+
// Build lookup indexes eagerly, but do not populate the duplicate-detection
|
|
6652
|
+
// string Set for every input fact. The predicate/subject/object indexes are
|
|
6653
|
+
// enough to verify duplicates when needed; avoiding 100k+ joined string keys
|
|
6654
|
+
// saves substantial time and GC on data-heavy query workloads.
|
|
6655
|
+
for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i, false);
|
|
6632
6656
|
}
|
|
6633
6657
|
|
|
6634
|
-
function
|
|
6658
|
+
function cloneFactIndexesForSnapshot(src, dest) {
|
|
6659
|
+
ensureFactIndexes(src);
|
|
6660
|
+
|
|
6661
|
+
function cloneArrayMap(map) {
|
|
6662
|
+
const out = new Map();
|
|
6663
|
+
for (const [k, arr] of map) out.set(k, arr.slice());
|
|
6664
|
+
return out;
|
|
6665
|
+
}
|
|
6666
|
+
|
|
6667
|
+
function cloneNestedArrayMap(map) {
|
|
6668
|
+
const out = new Map();
|
|
6669
|
+
for (const [k, inner] of map) {
|
|
6670
|
+
const innerOut = new Map();
|
|
6671
|
+
for (const [k2, arr] of inner) innerOut.set(k2, arr.slice());
|
|
6672
|
+
out.set(k, innerOut);
|
|
6673
|
+
}
|
|
6674
|
+
return out;
|
|
6675
|
+
}
|
|
6676
|
+
|
|
6677
|
+
Object.defineProperty(dest, '__byPred', { value: cloneArrayMap(src.__byPred), enumerable: false, writable: true });
|
|
6678
|
+
Object.defineProperty(dest, '__byPS', { value: cloneNestedArrayMap(src.__byPS), enumerable: false, writable: true });
|
|
6679
|
+
Object.defineProperty(dest, '__byPO', { value: cloneNestedArrayMap(src.__byPO), enumerable: false, writable: true });
|
|
6680
|
+
Object.defineProperty(dest, '__wildPred', { value: src.__wildPred.slice(), enumerable: false, writable: true });
|
|
6681
|
+
Object.defineProperty(dest, '__wildPS', { value: cloneArrayMap(src.__wildPS), enumerable: false, writable: true });
|
|
6682
|
+
Object.defineProperty(dest, '__wildPO', { value: cloneArrayMap(src.__wildPO), enumerable: false, writable: true });
|
|
6683
|
+
Object.defineProperty(dest, '__keySet', { value: new Set(src.__keySet), enumerable: false, writable: true });
|
|
6684
|
+
Object.defineProperty(dest, '__keySetComplete', { value: !!src.__keySetComplete, enumerable: false, writable: true });
|
|
6685
|
+
}
|
|
6686
|
+
|
|
6687
|
+
function indexFact(facts, tr, idx, addKeySet = true) {
|
|
6635
6688
|
const sk = termFastKey(tr.s);
|
|
6636
6689
|
const ok = termFastKey(tr.o);
|
|
6690
|
+
let pkForKey = null;
|
|
6637
6691
|
|
|
6638
6692
|
if (tr.p instanceof Iri) {
|
|
6639
6693
|
// Use predicate term id as the primary key to avoid hashing long IRI strings.
|
|
6640
6694
|
const pk = tr.p.__tid;
|
|
6695
|
+
pkForKey = pk;
|
|
6641
6696
|
|
|
6642
6697
|
let pb = facts.__byPred.get(pk);
|
|
6643
6698
|
if (!pb) {
|
|
@@ -6695,8 +6750,10 @@ function indexFact(facts, tr, idx) {
|
|
|
6695
6750
|
}
|
|
6696
6751
|
}
|
|
6697
6752
|
|
|
6698
|
-
|
|
6699
|
-
|
|
6753
|
+
if (addKeySet && sk !== null && ok !== null) {
|
|
6754
|
+
if (pkForKey === null) pkForKey = termFastKey(tr.p);
|
|
6755
|
+
if (pkForKey !== null) facts.__keySet.add(sk + '\t' + pkForKey + '\t' + ok);
|
|
6756
|
+
}
|
|
6700
6757
|
}
|
|
6701
6758
|
|
|
6702
6759
|
function candidateFacts(facts, goal) {
|
|
@@ -6758,7 +6815,10 @@ function hasFactIndexed(facts, tr) {
|
|
|
6758
6815
|
ensureFactIndexes(facts);
|
|
6759
6816
|
|
|
6760
6817
|
const key = tripleFastKey(tr);
|
|
6761
|
-
if (key !== null)
|
|
6818
|
+
if (key !== null) {
|
|
6819
|
+
if (facts.__keySet.has(key)) return true;
|
|
6820
|
+
if (facts.__keySetComplete) return false;
|
|
6821
|
+
}
|
|
6762
6822
|
|
|
6763
6823
|
if (tr.p instanceof Iri) {
|
|
6764
6824
|
const pk = tr.p.__tid;
|
|
@@ -6788,7 +6848,7 @@ function pushFactIndexed(facts, tr) {
|
|
|
6788
6848
|
ensureFactIndexes(facts);
|
|
6789
6849
|
const idx = facts.length;
|
|
6790
6850
|
facts.push(tr);
|
|
6791
|
-
indexFact(facts, tr, idx);
|
|
6851
|
+
indexFact(facts, tr, idx, true);
|
|
6792
6852
|
}
|
|
6793
6853
|
|
|
6794
6854
|
function makeDerivedRecord(fact, rule, premises, subst, captureExplanations) {
|
|
@@ -6910,13 +6970,20 @@ function makeSinglePremiseAgendaIndex(forwardRules, backRules) {
|
|
|
6910
6970
|
if (!isSinglePremiseAgendaRuleSafe(r, backRules)) continue;
|
|
6911
6971
|
|
|
6912
6972
|
const goal = r.premise[0];
|
|
6973
|
+
const goalSKey = termFastKey(goal.s);
|
|
6974
|
+
const goalOKey = termFastKey(goal.o);
|
|
6975
|
+
const fastSubjectVar = goal.p instanceof Iri && goal.s instanceof Var && goalOKey !== null ? goal.s.name : null;
|
|
6976
|
+
const fastObjectVar = goal.p instanceof Iri && goal.o instanceof Var && goalSKey !== null ? goal.o.name : null;
|
|
6913
6977
|
const entry = {
|
|
6914
6978
|
rule: r,
|
|
6915
6979
|
ruleIndex: i,
|
|
6916
6980
|
goal,
|
|
6917
6981
|
goalPredTid: goal.p instanceof Iri ? goal.p.__tid : null,
|
|
6918
|
-
goalSKey
|
|
6919
|
-
goalOKey
|
|
6982
|
+
goalSKey,
|
|
6983
|
+
goalOKey,
|
|
6984
|
+
needsSkipCheck: !!r.__needsForwardSkipCheck,
|
|
6985
|
+
fastSubjectVar,
|
|
6986
|
+
fastObjectVar,
|
|
6920
6987
|
};
|
|
6921
6988
|
|
|
6922
6989
|
index.indexed.add(r);
|
|
@@ -8325,11 +8392,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8325
8392
|
const varGen = [0];
|
|
8326
8393
|
const skCounter = [0];
|
|
8327
8394
|
|
|
8328
|
-
//
|
|
8329
|
-
//
|
|
8330
|
-
|
|
8331
|
-
__ensureRuleKeySet(forwardRules);
|
|
8332
|
-
__ensureRuleKeySet(backRules);
|
|
8395
|
+
// Rule-key sets are only needed if a program actually derives rule-producing
|
|
8396
|
+
// triples. Building them eagerly is expensive on large static rule sets, so
|
|
8397
|
+
// dynamic-promotion sites create them lazily before duplicate checks.
|
|
8333
8398
|
|
|
8334
8399
|
// Cache head blank-node skolemization per (rule firing, head blank label).
|
|
8335
8400
|
// This prevents repeatedly generating fresh _:sk_N blanks for the *same*
|
|
@@ -8379,7 +8444,7 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8379
8444
|
|
|
8380
8445
|
function makeScopedSnapshot() {
|
|
8381
8446
|
const snap = facts.slice();
|
|
8382
|
-
|
|
8447
|
+
cloneFactIndexesForSnapshot(facts, snap);
|
|
8383
8448
|
Object.defineProperty(snap, '__scopedSnapshot', {
|
|
8384
8449
|
value: snap,
|
|
8385
8450
|
enumerable: false,
|
|
@@ -8433,10 +8498,21 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8433
8498
|
let changedHere = false;
|
|
8434
8499
|
let rulesChanged = false;
|
|
8435
8500
|
|
|
8436
|
-
// IMPORTANT: one skolem map per *rule firing
|
|
8501
|
+
// IMPORTANT: one skolem map per *rule firing*. Instantiate premise
|
|
8502
|
+
// triples and build the firing key lazily: normal CLI runs do not capture
|
|
8503
|
+
// proof records, and most rules have no explicit head blanks, so the eager
|
|
8504
|
+
// work was pure allocation on large forward chains.
|
|
8437
8505
|
const skMap = {};
|
|
8438
|
-
|
|
8439
|
-
|
|
8506
|
+
let instantiatedPremises = null;
|
|
8507
|
+
let fireKey = null;
|
|
8508
|
+
function getInstantiatedPremises() {
|
|
8509
|
+
if (instantiatedPremises === null) instantiatedPremises = r.premise.map((b) => applySubstTriple(b, s));
|
|
8510
|
+
return instantiatedPremises;
|
|
8511
|
+
}
|
|
8512
|
+
function getFireKey() {
|
|
8513
|
+
if (fireKey === null) fireKey = __firingKey(ruleIndex, getInstantiatedPremises());
|
|
8514
|
+
return fireKey;
|
|
8515
|
+
}
|
|
8440
8516
|
|
|
8441
8517
|
// Support "dynamic" rule heads where the consequent is a term that
|
|
8442
8518
|
// (after substitution) evaluates to a quoted formula.
|
|
@@ -8489,7 +8565,7 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8489
8565
|
if (isFwRuleTriple || isBwRuleTriple) {
|
|
8490
8566
|
if (!hasFactIndexed(facts, instantiated)) {
|
|
8491
8567
|
pushFactIndexed(facts, instantiated);
|
|
8492
|
-
const df = makeDerivedRecord(instantiated, r,
|
|
8568
|
+
const df = makeDerivedRecord(instantiated, r, getInstantiatedPremises(), s, captureExplanations);
|
|
8493
8569
|
derivedForward.push(df);
|
|
8494
8570
|
if (typeof onDerived === 'function') onDerived(df);
|
|
8495
8571
|
changedHere = true;
|
|
@@ -8508,8 +8584,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8508
8584
|
newRule.conclusion,
|
|
8509
8585
|
newRule.__dynamicConclusionTerm || null,
|
|
8510
8586
|
);
|
|
8511
|
-
|
|
8512
|
-
|
|
8587
|
+
const forwardRuleKeySet = __ensureRuleKeySet(forwardRules);
|
|
8588
|
+
if (!forwardRuleKeySet.has(key)) {
|
|
8589
|
+
forwardRuleKeySet.add(key);
|
|
8513
8590
|
forwardRules.push(newRule);
|
|
8514
8591
|
rulesChanged = true;
|
|
8515
8592
|
}
|
|
@@ -8523,8 +8600,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8523
8600
|
newRule.conclusion,
|
|
8524
8601
|
newRule.__dynamicConclusionTerm || null,
|
|
8525
8602
|
);
|
|
8526
|
-
|
|
8527
|
-
|
|
8603
|
+
const backRuleKeySet = __ensureRuleKeySet(backRules);
|
|
8604
|
+
if (!backRuleKeySet.has(key)) {
|
|
8605
|
+
backRuleKeySet.add(key);
|
|
8528
8606
|
backRules.push(newRule);
|
|
8529
8607
|
indexBackRule(backRules, newRule);
|
|
8530
8608
|
rulesChanged = true;
|
|
@@ -8535,20 +8613,23 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8535
8613
|
}
|
|
8536
8614
|
|
|
8537
8615
|
// Only skolemize blank nodes that occur explicitly in the rule head
|
|
8538
|
-
const inst =
|
|
8539
|
-
|
|
8540
|
-
|
|
8541
|
-
|
|
8542
|
-
|
|
8543
|
-
|
|
8544
|
-
|
|
8545
|
-
|
|
8616
|
+
const inst =
|
|
8617
|
+
headBlankLabelsHere && headBlankLabelsHere.size
|
|
8618
|
+
? skolemizeTripleForHeadBlanks(
|
|
8619
|
+
instantiated,
|
|
8620
|
+
headBlankLabelsHere,
|
|
8621
|
+
skMap,
|
|
8622
|
+
skCounter,
|
|
8623
|
+
getFireKey(),
|
|
8624
|
+
headSkolemCache,
|
|
8625
|
+
)
|
|
8626
|
+
: instantiated;
|
|
8546
8627
|
|
|
8547
8628
|
if (!isGroundTriple(inst)) continue;
|
|
8548
8629
|
if (hasFactIndexed(facts, inst)) continue;
|
|
8549
8630
|
|
|
8550
8631
|
pushFactIndexed(facts, inst);
|
|
8551
|
-
const df = makeDerivedRecord(inst, r,
|
|
8632
|
+
const df = makeDerivedRecord(inst, r, getInstantiatedPremises(), s, captureExplanations);
|
|
8552
8633
|
derivedForward.push(df);
|
|
8553
8634
|
if (typeof onDerived === 'function') onDerived(df);
|
|
8554
8635
|
|
|
@@ -8575,10 +8656,19 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8575
8656
|
for (let ci = 0; ci < total; ci++) {
|
|
8576
8657
|
const entry = ci < candidates.exactLen ? candidates.exact[ci] : candidates.wild[ci - candidates.exactLen];
|
|
8577
8658
|
const r = entry.rule;
|
|
8578
|
-
if (__skipForwardRuleNow(r)) continue;
|
|
8579
|
-
|
|
8580
|
-
|
|
8581
|
-
if (
|
|
8659
|
+
if (entry.needsSkipCheck && __skipForwardRuleNow(r)) continue;
|
|
8660
|
+
|
|
8661
|
+
let s;
|
|
8662
|
+
if (entry.fastSubjectVar !== null) {
|
|
8663
|
+
s = __emptySubst();
|
|
8664
|
+
s[entry.fastSubjectVar] = fact.s;
|
|
8665
|
+
} else if (entry.fastObjectVar !== null) {
|
|
8666
|
+
s = __emptySubst();
|
|
8667
|
+
s[entry.fastObjectVar] = fact.o;
|
|
8668
|
+
} else {
|
|
8669
|
+
s = unifyTriple(entry.goal, fact, __emptySubst());
|
|
8670
|
+
if (s === null) continue;
|
|
8671
|
+
}
|
|
8582
8672
|
|
|
8583
8673
|
const outcome = __emitForwardRuleSolution(r, entry.ruleIndex, s);
|
|
8584
8674
|
if (outcome.rulesChanged) {
|
|
@@ -8595,7 +8685,7 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8595
8685
|
for (let i = 0; i < forwardRules.length; i++) {
|
|
8596
8686
|
const r = forwardRules[i];
|
|
8597
8687
|
if (agendaIndex.indexed.has(r)) continue;
|
|
8598
|
-
if (__skipForwardRuleNow(r)) continue;
|
|
8688
|
+
if (r.__needsForwardSkipCheck && __skipForwardRuleNow(r)) continue;
|
|
8599
8689
|
|
|
8600
8690
|
const headIsStrictGround = r.__headIsStrictGround;
|
|
8601
8691
|
const maxSols = r.isFuse || headIsStrictGround ? 1 : undefined;
|
|
@@ -9462,7 +9552,26 @@ class N3SyntaxError extends SyntaxError {
|
|
|
9462
9552
|
}
|
|
9463
9553
|
|
|
9464
9554
|
function isWs(c) {
|
|
9465
|
-
|
|
9555
|
+
if (c === null || c === undefined) return false;
|
|
9556
|
+
const code = c.charCodeAt(0);
|
|
9557
|
+
// Fast path for the whitespace used by N3/Turtle inputs.
|
|
9558
|
+
return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
|
|
9559
|
+
}
|
|
9560
|
+
|
|
9561
|
+
function isAsciiAlphaCode(code) {
|
|
9562
|
+
return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
|
|
9563
|
+
}
|
|
9564
|
+
|
|
9565
|
+
function isAsciiDigitCode(code) {
|
|
9566
|
+
return code >= 48 && code <= 57;
|
|
9567
|
+
}
|
|
9568
|
+
|
|
9569
|
+
function isAsciiAlpha(c) {
|
|
9570
|
+
return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
|
|
9571
|
+
}
|
|
9572
|
+
|
|
9573
|
+
function isAsciiDigit(c) {
|
|
9574
|
+
return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
|
|
9466
9575
|
}
|
|
9467
9576
|
|
|
9468
9577
|
// Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
|
|
@@ -9475,13 +9584,18 @@ function isWs(c) {
|
|
|
9475
9584
|
//
|
|
9476
9585
|
// We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
|
|
9477
9586
|
function isHexDigit(c) {
|
|
9478
|
-
|
|
9587
|
+
if (c === null || c === undefined) return false;
|
|
9588
|
+
const code = c.charCodeAt(0);
|
|
9589
|
+
return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
|
|
9479
9590
|
}
|
|
9480
9591
|
|
|
9481
9592
|
function isPnCharsBase(c) {
|
|
9482
9593
|
// Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
|
|
9483
9594
|
// Covers most letters used in practice (including ñ) and common scripts.
|
|
9484
|
-
|
|
9595
|
+
if (c === null || c === undefined) return false;
|
|
9596
|
+
const code = c.charCodeAt(0);
|
|
9597
|
+
if (isAsciiAlphaCode(code)) return true;
|
|
9598
|
+
return /\p{L}|\p{Nl}/u.test(c);
|
|
9485
9599
|
}
|
|
9486
9600
|
|
|
9487
9601
|
function isPnCharsU(c) {
|
|
@@ -9491,9 +9605,11 @@ function isPnCharsU(c) {
|
|
|
9491
9605
|
|
|
9492
9606
|
function isPnChars(c) {
|
|
9493
9607
|
// PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
|
|
9494
|
-
if (c === null) return false;
|
|
9608
|
+
if (c === null || c === undefined) return false;
|
|
9609
|
+
const code = c.charCodeAt(0);
|
|
9610
|
+
if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
|
|
9495
9611
|
if (isPnCharsU(c)) return true;
|
|
9496
|
-
if (c === '
|
|
9612
|
+
if (c === '\u00B7') return true;
|
|
9497
9613
|
const cp = c.codePointAt(0);
|
|
9498
9614
|
return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
|
|
9499
9615
|
}
|
|
@@ -10595,10 +10711,23 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
10595
10711
|
return text;
|
|
10596
10712
|
}
|
|
10597
10713
|
|
|
10714
|
+
|
|
10715
|
+
function isNumericLikeIdentifier(word) {
|
|
10716
|
+
if (typeof word !== 'string' || word.length === 0) return false;
|
|
10717
|
+
for (let j = 0; j < word.length; j++) {
|
|
10718
|
+
const code = word.charCodeAt(j);
|
|
10719
|
+
if (!((code >= 48 && code <= 57) || code === 46 || code === 45)) return false;
|
|
10720
|
+
}
|
|
10721
|
+
return true;
|
|
10722
|
+
}
|
|
10723
|
+
|
|
10598
10724
|
function lex(inputText, opts = {}) {
|
|
10599
10725
|
const rdf = !!(opts && opts.rdf);
|
|
10600
10726
|
if (rdf) inputText = normalizeRdfCompatibility(inputText);
|
|
10601
|
-
|
|
10727
|
+
// Avoid copying large ASCII/BMP inputs into an Array. Array.from() is
|
|
10728
|
+
// only needed when the text contains surrogate pairs and we want the old
|
|
10729
|
+
// code-point iteration behavior for non-BMP characters.
|
|
10730
|
+
const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
|
|
10602
10731
|
const n = chars.length;
|
|
10603
10732
|
let i = 0;
|
|
10604
10733
|
const tokens = [];
|
|
@@ -10614,19 +10743,29 @@ function lex(inputText, opts = {}) {
|
|
|
10614
10743
|
// - Accepts percent escapes (%HH) as PLX fragments.
|
|
10615
10744
|
// - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
|
|
10616
10745
|
// - Accepts '.' inside a name only when it is not terminal.
|
|
10746
|
+
function sliceChars(start, end) {
|
|
10747
|
+
return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
|
|
10748
|
+
}
|
|
10749
|
+
|
|
10617
10750
|
function readIdentText(startOffsetForErrors) {
|
|
10618
|
-
const
|
|
10751
|
+
const start = i;
|
|
10752
|
+
let out = null;
|
|
10753
|
+
|
|
10754
|
+
function appendRawUntilHere() {
|
|
10755
|
+
if (out === null) out = [sliceChars(start, i)];
|
|
10756
|
+
}
|
|
10757
|
+
|
|
10619
10758
|
while (i < n) {
|
|
10620
|
-
const cc =
|
|
10621
|
-
if (cc === null || isWs(cc)) break;
|
|
10759
|
+
const cc = chars[i];
|
|
10760
|
+
if (cc === null || cc === undefined || isWs(cc)) break;
|
|
10622
10761
|
|
|
10623
10762
|
// Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
|
|
10624
|
-
if ('{}()[]
|
|
10763
|
+
if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
|
|
10625
10764
|
|
|
10626
10765
|
// Dot is allowed inside PN_LOCAL, but not at the end.
|
|
10627
10766
|
if (cc === '.') {
|
|
10628
10767
|
if (!canContinueAfterDot(peek(1))) break;
|
|
10629
|
-
out.push('.');
|
|
10768
|
+
if (out !== null) out.push('.');
|
|
10630
10769
|
i++;
|
|
10631
10770
|
continue;
|
|
10632
10771
|
}
|
|
@@ -10641,6 +10780,7 @@ function lex(inputText, opts = {}) {
|
|
|
10641
10780
|
typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
|
|
10642
10781
|
);
|
|
10643
10782
|
}
|
|
10783
|
+
appendRawUntilHere();
|
|
10644
10784
|
out.push('%', h1, h2);
|
|
10645
10785
|
i += 3;
|
|
10646
10786
|
continue;
|
|
@@ -10650,6 +10790,7 @@ function lex(inputText, opts = {}) {
|
|
|
10650
10790
|
if (cc === '\\') {
|
|
10651
10791
|
const esc = peek(1);
|
|
10652
10792
|
if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
|
|
10793
|
+
appendRawUntilHere();
|
|
10653
10794
|
out.push(esc); // decoded form
|
|
10654
10795
|
i += 2;
|
|
10655
10796
|
continue;
|
|
@@ -10661,14 +10802,14 @@ function lex(inputText, opts = {}) {
|
|
|
10661
10802
|
}
|
|
10662
10803
|
|
|
10663
10804
|
if (isIdentChar(cc)) {
|
|
10664
|
-
out.push(cc);
|
|
10805
|
+
if (out !== null) out.push(cc);
|
|
10665
10806
|
i++;
|
|
10666
10807
|
continue;
|
|
10667
10808
|
}
|
|
10668
10809
|
|
|
10669
10810
|
break;
|
|
10670
10811
|
}
|
|
10671
|
-
return out.join('');
|
|
10812
|
+
return out === null ? sliceChars(start, i) : out.join('');
|
|
10672
10813
|
}
|
|
10673
10814
|
|
|
10674
10815
|
while (i < n) {
|
|
@@ -10747,22 +10888,47 @@ function lex(inputText, opts = {}) {
|
|
|
10747
10888
|
continue;
|
|
10748
10889
|
}
|
|
10749
10890
|
|
|
10750
|
-
// 5) Single-character punctuation
|
|
10751
|
-
|
|
10752
|
-
|
|
10753
|
-
|
|
10754
|
-
|
|
10755
|
-
|
|
10756
|
-
|
|
10757
|
-
|
|
10758
|
-
|
|
10759
|
-
|
|
10760
|
-
|
|
10761
|
-
|
|
10762
|
-
|
|
10763
|
-
|
|
10764
|
-
|
|
10765
|
-
|
|
10891
|
+
// 5) Single-character punctuation. Use a switch rather than allocating a
|
|
10892
|
+
// mapping object for every punctuation token in large inputs.
|
|
10893
|
+
switch (c) {
|
|
10894
|
+
case '{':
|
|
10895
|
+
tokens.push(new Token('LBrace', null, i));
|
|
10896
|
+
i++;
|
|
10897
|
+
continue;
|
|
10898
|
+
case '}':
|
|
10899
|
+
tokens.push(new Token('RBrace', null, i));
|
|
10900
|
+
i++;
|
|
10901
|
+
continue;
|
|
10902
|
+
case '(':
|
|
10903
|
+
tokens.push(new Token('LParen', null, i));
|
|
10904
|
+
i++;
|
|
10905
|
+
continue;
|
|
10906
|
+
case ')':
|
|
10907
|
+
tokens.push(new Token('RParen', null, i));
|
|
10908
|
+
i++;
|
|
10909
|
+
continue;
|
|
10910
|
+
case '[':
|
|
10911
|
+
tokens.push(new Token('LBracket', null, i));
|
|
10912
|
+
i++;
|
|
10913
|
+
continue;
|
|
10914
|
+
case ']':
|
|
10915
|
+
tokens.push(new Token('RBracket', null, i));
|
|
10916
|
+
i++;
|
|
10917
|
+
continue;
|
|
10918
|
+
case ';':
|
|
10919
|
+
tokens.push(new Token('Semicolon', null, i));
|
|
10920
|
+
i++;
|
|
10921
|
+
continue;
|
|
10922
|
+
case ',':
|
|
10923
|
+
tokens.push(new Token('Comma', null, i));
|
|
10924
|
+
i++;
|
|
10925
|
+
continue;
|
|
10926
|
+
case '.':
|
|
10927
|
+
tokens.push(new Token('Dot', null, i));
|
|
10928
|
+
i++;
|
|
10929
|
+
continue;
|
|
10930
|
+
default:
|
|
10931
|
+
break;
|
|
10766
10932
|
}
|
|
10767
10933
|
|
|
10768
10934
|
// String literal: short "..." or long """..."""
|
|
@@ -10821,26 +10987,36 @@ function lex(inputText, opts = {}) {
|
|
|
10821
10987
|
continue;
|
|
10822
10988
|
}
|
|
10823
10989
|
|
|
10824
|
-
// Short string literal " ... "
|
|
10990
|
+
// Short string literal " ... ". Most data files contain plain
|
|
10991
|
+
// unescaped labels; keep that path slice-based and avoid building an
|
|
10992
|
+
// intermediate character array + raw quoted string.
|
|
10825
10993
|
i++; // consume opening "
|
|
10826
|
-
const
|
|
10994
|
+
const contentStart = i;
|
|
10995
|
+
let sChars = null;
|
|
10996
|
+
let closed = false;
|
|
10827
10997
|
while (i < n) {
|
|
10828
10998
|
const cc = chars[i];
|
|
10829
10999
|
i++;
|
|
10830
11000
|
if (cc === '\\') {
|
|
11001
|
+
if (sChars === null) sChars = [sliceChars(contentStart, i - 1)];
|
|
10831
11002
|
if (i < n) {
|
|
10832
11003
|
const esc = chars[i];
|
|
10833
11004
|
i++;
|
|
10834
11005
|
sChars.push('\\');
|
|
10835
11006
|
sChars.push(esc);
|
|
11007
|
+
} else {
|
|
11008
|
+
sChars.push('\\');
|
|
10836
11009
|
}
|
|
10837
11010
|
continue;
|
|
10838
11011
|
}
|
|
10839
|
-
if (cc === '"')
|
|
10840
|
-
|
|
11012
|
+
if (cc === '"') {
|
|
11013
|
+
closed = true;
|
|
11014
|
+
break;
|
|
11015
|
+
}
|
|
11016
|
+
if (sChars !== null) sChars.push(cc);
|
|
10841
11017
|
}
|
|
10842
|
-
const
|
|
10843
|
-
const decoded = decodeN3StringEscapes(
|
|
11018
|
+
const rawContent = sChars === null ? sliceChars(contentStart, closed ? i - 1 : i) : sChars.join('');
|
|
11019
|
+
const decoded = sChars === null ? rawContent : decodeN3StringEscapes(rawContent, start);
|
|
10844
11020
|
assertValidStringLiteralValue(decoded, start);
|
|
10845
11021
|
const s = JSON.stringify(decoded); // canonical short quoted form
|
|
10846
11022
|
tokens.push(new Token('Literal', s, start));
|
|
@@ -10905,24 +11081,32 @@ function lex(inputText, opts = {}) {
|
|
|
10905
11081
|
|
|
10906
11082
|
// Short string literal ' ... '
|
|
10907
11083
|
i++; // consume opening '
|
|
10908
|
-
const
|
|
11084
|
+
const contentStart = i;
|
|
11085
|
+
let sChars = null;
|
|
11086
|
+
let closed = false;
|
|
10909
11087
|
while (i < n) {
|
|
10910
11088
|
const cc = chars[i];
|
|
10911
11089
|
i++;
|
|
10912
11090
|
if (cc === '\\') {
|
|
11091
|
+
if (sChars === null) sChars = [sliceChars(contentStart, i - 1)];
|
|
10913
11092
|
if (i < n) {
|
|
10914
11093
|
const esc = chars[i];
|
|
10915
11094
|
i++;
|
|
10916
11095
|
sChars.push('\\');
|
|
10917
11096
|
sChars.push(esc);
|
|
11097
|
+
} else {
|
|
11098
|
+
sChars.push('\\');
|
|
10918
11099
|
}
|
|
10919
11100
|
continue;
|
|
10920
11101
|
}
|
|
10921
|
-
if (cc === "'")
|
|
10922
|
-
|
|
11102
|
+
if (cc === "'") {
|
|
11103
|
+
closed = true;
|
|
11104
|
+
break;
|
|
11105
|
+
}
|
|
11106
|
+
if (sChars !== null) sChars.push(cc);
|
|
10923
11107
|
}
|
|
10924
|
-
const
|
|
10925
|
-
const decoded = decodeN3StringEscapes(
|
|
11108
|
+
const rawContent = sChars === null ? sliceChars(contentStart, closed ? i - 1 : i) : sChars.join('');
|
|
11109
|
+
const decoded = sChars === null ? rawContent : decodeN3StringEscapes(rawContent, start);
|
|
10926
11110
|
assertValidStringLiteralValue(decoded, start);
|
|
10927
11111
|
const s = JSON.stringify(decoded); // canonical short quoted form
|
|
10928
11112
|
tokens.push(new Token('Literal', s, start));
|
|
@@ -10955,10 +11139,10 @@ function lex(inputText, opts = {}) {
|
|
|
10955
11139
|
// "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
|
|
10956
11140
|
const tagChars = [];
|
|
10957
11141
|
let cc = peek();
|
|
10958
|
-
if (cc === null ||
|
|
11142
|
+
if (cc === null || !isAsciiAlpha(cc)) {
|
|
10959
11143
|
throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
|
|
10960
11144
|
}
|
|
10961
|
-
while ((cc = peek()) !== null &&
|
|
11145
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
10962
11146
|
tagChars.push(cc);
|
|
10963
11147
|
i++;
|
|
10964
11148
|
}
|
|
@@ -10982,7 +11166,7 @@ function lex(inputText, opts = {}) {
|
|
|
10982
11166
|
// Otherwise, treat as a directive (@prefix, @base)
|
|
10983
11167
|
const wordChars = [];
|
|
10984
11168
|
let cc;
|
|
10985
|
-
while ((cc = peek()) !== null &&
|
|
11169
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
10986
11170
|
wordChars.push(cc);
|
|
10987
11171
|
i++;
|
|
10988
11172
|
}
|
|
@@ -10994,19 +11178,19 @@ function lex(inputText, opts = {}) {
|
|
|
10994
11178
|
}
|
|
10995
11179
|
|
|
10996
11180
|
// 6) Numeric literal (integer or float)
|
|
10997
|
-
if (
|
|
11181
|
+
if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
|
|
10998
11182
|
const start = i;
|
|
10999
11183
|
const numChars = [c];
|
|
11000
11184
|
i++;
|
|
11001
11185
|
while (i < n) {
|
|
11002
11186
|
const cc = chars[i];
|
|
11003
|
-
if (
|
|
11187
|
+
if (isAsciiDigit(cc)) {
|
|
11004
11188
|
numChars.push(cc);
|
|
11005
11189
|
i++;
|
|
11006
11190
|
continue;
|
|
11007
11191
|
}
|
|
11008
11192
|
if (cc === '.') {
|
|
11009
|
-
if (i + 1 < n &&
|
|
11193
|
+
if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
|
|
11010
11194
|
numChars.push('.');
|
|
11011
11195
|
i++;
|
|
11012
11196
|
continue;
|
|
@@ -11021,14 +11205,14 @@ function lex(inputText, opts = {}) {
|
|
|
11021
11205
|
if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
|
|
11022
11206
|
let j = i + 1;
|
|
11023
11207
|
if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
|
|
11024
|
-
if (j < n &&
|
|
11208
|
+
if (j < n && isAsciiDigit(chars[j])) {
|
|
11025
11209
|
numChars.push(chars[i]); // e/E
|
|
11026
11210
|
i++;
|
|
11027
11211
|
if (i < n && (chars[i] === '+' || chars[i] === '-')) {
|
|
11028
11212
|
numChars.push(chars[i]);
|
|
11029
11213
|
i++;
|
|
11030
11214
|
}
|
|
11031
|
-
while (i < n &&
|
|
11215
|
+
while (i < n && isAsciiDigit(chars[i])) {
|
|
11032
11216
|
numChars.push(chars[i]);
|
|
11033
11217
|
i++;
|
|
11034
11218
|
}
|
|
@@ -11047,7 +11231,7 @@ function lex(inputText, opts = {}) {
|
|
|
11047
11231
|
}
|
|
11048
11232
|
if (word === 'true' || word === 'false') {
|
|
11049
11233
|
tokens.push(new Token('Literal', word, start));
|
|
11050
|
-
} else if (
|
|
11234
|
+
} else if (isNumericLikeIdentifier(word)) {
|
|
11051
11235
|
tokens.push(new Token('Literal', word, start));
|
|
11052
11236
|
} else {
|
|
11053
11237
|
tokens.push(new Token('Ident', word, start));
|
|
@@ -11477,7 +11661,15 @@ class Parser {
|
|
|
11477
11661
|
}
|
|
11478
11662
|
|
|
11479
11663
|
isIdentKeyword(tok, keyword) {
|
|
11480
|
-
|
|
11664
|
+
if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
|
|
11665
|
+
const v = tok.value;
|
|
11666
|
+
if (v.length !== keyword.length) return false;
|
|
11667
|
+
for (let i = 0; i < keyword.length; i++) {
|
|
11668
|
+
const code = v.charCodeAt(i);
|
|
11669
|
+
const lower = code >= 65 && code <= 90 ? code + 32 : code;
|
|
11670
|
+
if (lower !== keyword.charCodeAt(i)) return false;
|
|
11671
|
+
}
|
|
11672
|
+
return true;
|
|
11481
11673
|
}
|
|
11482
11674
|
|
|
11483
11675
|
canStartSparqlPrefixDirective() {
|
|
@@ -11624,7 +11816,7 @@ class Parser {
|
|
|
11624
11816
|
} else if (tok2.typ === 'Ident') {
|
|
11625
11817
|
const qn = tok2.value || '';
|
|
11626
11818
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok2, qn);
|
|
11627
|
-
assertValidQNamePrefix(qn.
|
|
11819
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok2, '@prefix directive IRI');
|
|
11628
11820
|
iri = this.prefixes.expandQName(qn);
|
|
11629
11821
|
} else {
|
|
11630
11822
|
this.fail(`Expected IRI after @prefix, got ${tok2.toString()}`, tok2);
|
|
@@ -11641,7 +11833,7 @@ class Parser {
|
|
|
11641
11833
|
} else if (tok.typ === 'Ident') {
|
|
11642
11834
|
const qn = tok.value || '';
|
|
11643
11835
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok, qn);
|
|
11644
|
-
assertValidQNamePrefix(qn.
|
|
11836
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok, '@base directive IRI');
|
|
11645
11837
|
iri = this.prefixes.expandQName(qn);
|
|
11646
11838
|
} else {
|
|
11647
11839
|
this.fail(`Expected IRI after @base, got ${tok.toString()}`, tok);
|
|
@@ -11670,7 +11862,7 @@ class Parser {
|
|
|
11670
11862
|
} else if (tok2.typ === 'Ident') {
|
|
11671
11863
|
const qn = tok2.value || '';
|
|
11672
11864
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok2, qn);
|
|
11673
|
-
assertValidQNamePrefix(qn.
|
|
11865
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok2, '@prefix directive IRI');
|
|
11674
11866
|
iri = this.prefixes.expandQName(qn);
|
|
11675
11867
|
} else {
|
|
11676
11868
|
this.fail(`Expected IRI after PREFIX, got ${tok2.toString()}`, tok2);
|
|
@@ -11691,7 +11883,7 @@ class Parser {
|
|
|
11691
11883
|
} else if (tok.typ === 'Ident') {
|
|
11692
11884
|
const qn = tok.value || '';
|
|
11693
11885
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok, qn);
|
|
11694
|
-
assertValidQNamePrefix(qn.
|
|
11886
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok, 'BASE directive IRI');
|
|
11695
11887
|
iri = this.prefixes.expandQName(qn);
|
|
11696
11888
|
} else {
|
|
11697
11889
|
this.fail(`Expected IRI after BASE, got ${tok.toString()}`, tok);
|
|
@@ -11738,14 +11930,18 @@ class Parser {
|
|
|
11738
11930
|
const name = val || '';
|
|
11739
11931
|
if (name === 'a') {
|
|
11740
11932
|
return internIri(RDF_NS + 'type');
|
|
11741
|
-
}
|
|
11933
|
+
}
|
|
11934
|
+
const sep = name.indexOf(':');
|
|
11935
|
+
if (sep === 1 && name.charCodeAt(0) === 95) {
|
|
11742
11936
|
return new Blank(name);
|
|
11743
|
-
} else if (name.includes(':')) {
|
|
11744
|
-
assertValidQNamePrefix(name.split(':', 1)[0], this.fail.bind(this), tok);
|
|
11745
|
-
return internIri(this.prefixes.expandQName(name));
|
|
11746
|
-
} else {
|
|
11747
|
-
failInvalidKeywordLikeIdent(this.fail.bind(this), tok, name);
|
|
11748
11937
|
}
|
|
11938
|
+
if (sep >= 0) {
|
|
11939
|
+
const prefixName = name.slice(0, sep);
|
|
11940
|
+
assertValidQNamePrefix(prefixName, this.fail.bind(this), tok);
|
|
11941
|
+
const base = this.prefixes.map[prefixName] || '';
|
|
11942
|
+
return internIri(base ? base + name.slice(sep + 1) : name);
|
|
11943
|
+
}
|
|
11944
|
+
failInvalidKeywordLikeIdent(this.fail.bind(this), tok, name);
|
|
11749
11945
|
}
|
|
11750
11946
|
|
|
11751
11947
|
if (typ === 'Literal') {
|
|
@@ -11776,7 +11972,7 @@ class Parser {
|
|
|
11776
11972
|
} else if (dtTok.typ === 'Ident') {
|
|
11777
11973
|
const qn = dtTok.value || '';
|
|
11778
11974
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), dtTok, qn);
|
|
11779
|
-
assertValidQNamePrefix(qn.
|
|
11975
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), dtTok, 'datatype prefixed name');
|
|
11780
11976
|
dtIri = this.prefixes.expandQName(qn);
|
|
11781
11977
|
} else {
|
|
11782
11978
|
this.fail(`Expected datatype after ^^, got ${dtTok.toString()}`, dtTok);
|
|
@@ -12400,21 +12596,40 @@ function literalParts(lit) {
|
|
|
12400
12596
|
// equality fast-paths than repeated string key construction.
|
|
12401
12597
|
|
|
12402
12598
|
let __nextTid = 1;
|
|
12403
|
-
const __tidIntern = new Map(); //
|
|
12599
|
+
const __tidIntern = new Map(); // legacy generic key -> number
|
|
12600
|
+
const __iriTidIntern = new Map(); // IRI value -> number
|
|
12601
|
+
const __blankTidIntern = new Map(); // blank label -> number
|
|
12602
|
+
const __literalTidIntern = new Map(); // normalized literal lexical form -> number
|
|
12404
12603
|
|
|
12405
12604
|
// Avoid storing extremely large literal keys in the global term-id intern map.
|
|
12406
12605
|
// For huge literals we still assign a unique __tid, but we do not intern the key.
|
|
12407
12606
|
const MAX_LITERAL_TID_LEN = 1024;
|
|
12408
12607
|
|
|
12409
|
-
function
|
|
12410
|
-
let id =
|
|
12608
|
+
function __getTidFromMap(map, key) {
|
|
12609
|
+
let id = map.get(key);
|
|
12411
12610
|
if (!id) {
|
|
12412
12611
|
id = __nextTid++;
|
|
12413
|
-
|
|
12612
|
+
map.set(key, id);
|
|
12414
12613
|
}
|
|
12415
12614
|
return id;
|
|
12416
12615
|
}
|
|
12417
12616
|
|
|
12617
|
+
function __getTid(key) {
|
|
12618
|
+
return __getTidFromMap(__tidIntern, key);
|
|
12619
|
+
}
|
|
12620
|
+
|
|
12621
|
+
function __getIriTid(value) {
|
|
12622
|
+
return __getTidFromMap(__iriTidIntern, value);
|
|
12623
|
+
}
|
|
12624
|
+
|
|
12625
|
+
function __getBlankTid(label) {
|
|
12626
|
+
return __getTidFromMap(__blankTidIntern, label);
|
|
12627
|
+
}
|
|
12628
|
+
|
|
12629
|
+
function __getLiteralTid(norm) {
|
|
12630
|
+
return __getTidFromMap(__literalTidIntern, norm);
|
|
12631
|
+
}
|
|
12632
|
+
|
|
12418
12633
|
function __isQuotedLexical(lit) {
|
|
12419
12634
|
if (typeof lit !== 'string') return false;
|
|
12420
12635
|
if (lit.length >= 6) {
|
|
@@ -12460,6 +12675,14 @@ function __isPlainStringLiteralValue(lit) {
|
|
|
12460
12675
|
function normalizeLiteralForTid(lit) {
|
|
12461
12676
|
// Canonicalize so that plain string and explicit xsd:string share the same id.
|
|
12462
12677
|
if (typeof lit !== 'string') return lit;
|
|
12678
|
+
|
|
12679
|
+
// Fast path for the overwhelmingly common lexer output for plain string
|
|
12680
|
+
// literals: a canonical JSON-style quoted lexical form with no suffix.
|
|
12681
|
+
// This avoids literalParts()/language-tag parsing for large fact tables.
|
|
12682
|
+
if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
|
|
12683
|
+
return `${lit}^^<${XSD_NS}string>`;
|
|
12684
|
+
}
|
|
12685
|
+
|
|
12463
12686
|
const [lex, dt] = literalParts(lit);
|
|
12464
12687
|
if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
|
|
12465
12688
|
if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
|
|
@@ -12477,7 +12700,7 @@ class Iri extends Term {
|
|
|
12477
12700
|
super();
|
|
12478
12701
|
this.value = value;
|
|
12479
12702
|
Object.defineProperty(this, '__tid', {
|
|
12480
|
-
value:
|
|
12703
|
+
value: __getIriTid(value),
|
|
12481
12704
|
enumerable: false,
|
|
12482
12705
|
});
|
|
12483
12706
|
}
|
|
@@ -12489,7 +12712,7 @@ class Literal extends Term {
|
|
|
12489
12712
|
this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
|
|
12490
12713
|
const norm = normalizeLiteralForTid(value);
|
|
12491
12714
|
const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
|
|
12492
|
-
const tid = useIntern ?
|
|
12715
|
+
const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
|
|
12493
12716
|
Object.defineProperty(this, '__tid', {
|
|
12494
12717
|
value: tid,
|
|
12495
12718
|
enumerable: false,
|
|
@@ -12509,7 +12732,7 @@ class Blank extends Term {
|
|
|
12509
12732
|
super();
|
|
12510
12733
|
this.label = label; // _:b1, etc.
|
|
12511
12734
|
Object.defineProperty(this, '__tid', {
|
|
12512
|
-
value:
|
|
12735
|
+
value: __getBlankTid(label),
|
|
12513
12736
|
enumerable: false,
|
|
12514
12737
|
});
|
|
12515
12738
|
}
|