eyeling 1.25.0 → 1.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/eyeling.browser.js +144 -50
- package/eyeling.js +144 -50
- package/lib/cli.js +1 -1
- package/lib/engine.js +38 -20
- package/lib/lexer.js +62 -21
- package/lib/parser.js +9 -1
- package/lib/prelude.js +34 -7
- package/package.json +1 -1
|
@@ -4825,7 +4825,7 @@ function main() {
|
|
|
4825
4825
|
parseN3Text(text, {
|
|
4826
4826
|
baseIri: __sourceLabelToBaseIri(sourceLabel),
|
|
4827
4827
|
label: sourceLabel,
|
|
4828
|
-
collectUsedPrefixes:
|
|
4828
|
+
collectUsedPrefixes: streamMode,
|
|
4829
4829
|
keepSourceArtifacts: false,
|
|
4830
4830
|
rdf: rdfMode,
|
|
4831
4831
|
}),
|
|
@@ -6536,11 +6536,13 @@ function termFastKey(t) {
|
|
|
6536
6536
|
if (t instanceof Iri || t instanceof Blank) return t.__tid;
|
|
6537
6537
|
|
|
6538
6538
|
if (t instanceof Literal) {
|
|
6539
|
-
//
|
|
6540
|
-
//
|
|
6541
|
-
//
|
|
6542
|
-
//
|
|
6543
|
-
//
|
|
6539
|
+
// Literal construction already computed a value-stable __tid for ordinary
|
|
6540
|
+
// short literals. Avoid re-running literalParts()/datatype normalization
|
|
6541
|
+
// while building fact indexes; on data-heavy inputs this is a hot path.
|
|
6542
|
+
// Only the rare over-sized literal needs the value-based fallback because
|
|
6543
|
+
// prelude intentionally gives such literals per-object ids to avoid
|
|
6544
|
+
// retaining huge strings in the global interner.
|
|
6545
|
+
if (typeof t.value !== 'string' || t.value.length + 64 <= MAX_LITERAL_TID_LEN) return t.__tid;
|
|
6544
6546
|
const norm = normalizeLiteralForTid(t.value);
|
|
6545
6547
|
if (typeof norm === 'string' && norm.length > MAX_LITERAL_TID_LEN) return 'L:' + norm;
|
|
6546
6548
|
return t.__tid;
|
|
@@ -6627,17 +6629,28 @@ function ensureFactIndexes(facts) {
|
|
|
6627
6629
|
enumerable: false,
|
|
6628
6630
|
writable: true,
|
|
6629
6631
|
});
|
|
6632
|
+
Object.defineProperty(facts, '__keySetComplete', {
|
|
6633
|
+
value: false,
|
|
6634
|
+
enumerable: false,
|
|
6635
|
+
writable: true,
|
|
6636
|
+
});
|
|
6630
6637
|
|
|
6631
|
-
|
|
6638
|
+
// Build lookup indexes eagerly, but do not populate the duplicate-detection
|
|
6639
|
+
// string Set for every input fact. The predicate/subject/object indexes are
|
|
6640
|
+
// enough to verify duplicates when needed; avoiding 100k+ joined string keys
|
|
6641
|
+
// saves substantial time and GC on data-heavy query workloads.
|
|
6642
|
+
for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i, false);
|
|
6632
6643
|
}
|
|
6633
6644
|
|
|
6634
|
-
function indexFact(facts, tr, idx) {
|
|
6645
|
+
function indexFact(facts, tr, idx, addKeySet = true) {
|
|
6635
6646
|
const sk = termFastKey(tr.s);
|
|
6636
6647
|
const ok = termFastKey(tr.o);
|
|
6648
|
+
let pkForKey = null;
|
|
6637
6649
|
|
|
6638
6650
|
if (tr.p instanceof Iri) {
|
|
6639
6651
|
// Use predicate term id as the primary key to avoid hashing long IRI strings.
|
|
6640
6652
|
const pk = tr.p.__tid;
|
|
6653
|
+
pkForKey = pk;
|
|
6641
6654
|
|
|
6642
6655
|
let pb = facts.__byPred.get(pk);
|
|
6643
6656
|
if (!pb) {
|
|
@@ -6695,8 +6708,10 @@ function indexFact(facts, tr, idx) {
|
|
|
6695
6708
|
}
|
|
6696
6709
|
}
|
|
6697
6710
|
|
|
6698
|
-
|
|
6699
|
-
|
|
6711
|
+
if (addKeySet && sk !== null && ok !== null) {
|
|
6712
|
+
if (pkForKey === null) pkForKey = termFastKey(tr.p);
|
|
6713
|
+
if (pkForKey !== null) facts.__keySet.add(sk + '\t' + pkForKey + '\t' + ok);
|
|
6714
|
+
}
|
|
6700
6715
|
}
|
|
6701
6716
|
|
|
6702
6717
|
function candidateFacts(facts, goal) {
|
|
@@ -6758,7 +6773,10 @@ function hasFactIndexed(facts, tr) {
|
|
|
6758
6773
|
ensureFactIndexes(facts);
|
|
6759
6774
|
|
|
6760
6775
|
const key = tripleFastKey(tr);
|
|
6761
|
-
if (key !== null)
|
|
6776
|
+
if (key !== null) {
|
|
6777
|
+
if (facts.__keySet.has(key)) return true;
|
|
6778
|
+
if (facts.__keySetComplete) return false;
|
|
6779
|
+
}
|
|
6762
6780
|
|
|
6763
6781
|
if (tr.p instanceof Iri) {
|
|
6764
6782
|
const pk = tr.p.__tid;
|
|
@@ -6788,7 +6806,7 @@ function pushFactIndexed(facts, tr) {
|
|
|
6788
6806
|
ensureFactIndexes(facts);
|
|
6789
6807
|
const idx = facts.length;
|
|
6790
6808
|
facts.push(tr);
|
|
6791
|
-
indexFact(facts, tr, idx);
|
|
6809
|
+
indexFact(facts, tr, idx, true);
|
|
6792
6810
|
}
|
|
6793
6811
|
|
|
6794
6812
|
function makeDerivedRecord(fact, rule, premises, subst, captureExplanations) {
|
|
@@ -8325,11 +8343,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8325
8343
|
const varGen = [0];
|
|
8326
8344
|
const skCounter = [0];
|
|
8327
8345
|
|
|
8328
|
-
//
|
|
8329
|
-
//
|
|
8330
|
-
|
|
8331
|
-
__ensureRuleKeySet(forwardRules);
|
|
8332
|
-
__ensureRuleKeySet(backRules);
|
|
8346
|
+
// Rule-key sets are only needed if a program actually derives rule-producing
|
|
8347
|
+
// triples. Building them eagerly is expensive on large static rule sets, so
|
|
8348
|
+
// dynamic-promotion sites create them lazily before duplicate checks.
|
|
8333
8349
|
|
|
8334
8350
|
// Cache head blank-node skolemization per (rule firing, head blank label).
|
|
8335
8351
|
// This prevents repeatedly generating fresh _:sk_N blanks for the *same*
|
|
@@ -8508,8 +8524,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8508
8524
|
newRule.conclusion,
|
|
8509
8525
|
newRule.__dynamicConclusionTerm || null,
|
|
8510
8526
|
);
|
|
8511
|
-
|
|
8512
|
-
|
|
8527
|
+
const forwardRuleKeySet = __ensureRuleKeySet(forwardRules);
|
|
8528
|
+
if (!forwardRuleKeySet.has(key)) {
|
|
8529
|
+
forwardRuleKeySet.add(key);
|
|
8513
8530
|
forwardRules.push(newRule);
|
|
8514
8531
|
rulesChanged = true;
|
|
8515
8532
|
}
|
|
@@ -8523,8 +8540,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8523
8540
|
newRule.conclusion,
|
|
8524
8541
|
newRule.__dynamicConclusionTerm || null,
|
|
8525
8542
|
);
|
|
8526
|
-
|
|
8527
|
-
|
|
8543
|
+
const backRuleKeySet = __ensureRuleKeySet(backRules);
|
|
8544
|
+
if (!backRuleKeySet.has(key)) {
|
|
8545
|
+
backRuleKeySet.add(key);
|
|
8528
8546
|
backRules.push(newRule);
|
|
8529
8547
|
indexBackRule(backRules, newRule);
|
|
8530
8548
|
rulesChanged = true;
|
|
@@ -9462,7 +9480,26 @@ class N3SyntaxError extends SyntaxError {
|
|
|
9462
9480
|
}
|
|
9463
9481
|
|
|
9464
9482
|
function isWs(c) {
|
|
9465
|
-
|
|
9483
|
+
if (c === null || c === undefined) return false;
|
|
9484
|
+
const code = c.charCodeAt(0);
|
|
9485
|
+
// Fast path for the whitespace used by N3/Turtle inputs.
|
|
9486
|
+
return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
|
|
9487
|
+
}
|
|
9488
|
+
|
|
9489
|
+
function isAsciiAlphaCode(code) {
|
|
9490
|
+
return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
|
|
9491
|
+
}
|
|
9492
|
+
|
|
9493
|
+
function isAsciiDigitCode(code) {
|
|
9494
|
+
return code >= 48 && code <= 57;
|
|
9495
|
+
}
|
|
9496
|
+
|
|
9497
|
+
function isAsciiAlpha(c) {
|
|
9498
|
+
return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
|
|
9499
|
+
}
|
|
9500
|
+
|
|
9501
|
+
function isAsciiDigit(c) {
|
|
9502
|
+
return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
|
|
9466
9503
|
}
|
|
9467
9504
|
|
|
9468
9505
|
// Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
|
|
@@ -9475,13 +9512,18 @@ function isWs(c) {
|
|
|
9475
9512
|
//
|
|
9476
9513
|
// We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
|
|
9477
9514
|
function isHexDigit(c) {
|
|
9478
|
-
|
|
9515
|
+
if (c === null || c === undefined) return false;
|
|
9516
|
+
const code = c.charCodeAt(0);
|
|
9517
|
+
return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
|
|
9479
9518
|
}
|
|
9480
9519
|
|
|
9481
9520
|
function isPnCharsBase(c) {
|
|
9482
9521
|
// Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
|
|
9483
9522
|
// Covers most letters used in practice (including ñ) and common scripts.
|
|
9484
|
-
|
|
9523
|
+
if (c === null || c === undefined) return false;
|
|
9524
|
+
const code = c.charCodeAt(0);
|
|
9525
|
+
if (isAsciiAlphaCode(code)) return true;
|
|
9526
|
+
return /\p{L}|\p{Nl}/u.test(c);
|
|
9485
9527
|
}
|
|
9486
9528
|
|
|
9487
9529
|
function isPnCharsU(c) {
|
|
@@ -9491,9 +9533,11 @@ function isPnCharsU(c) {
|
|
|
9491
9533
|
|
|
9492
9534
|
function isPnChars(c) {
|
|
9493
9535
|
// PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
|
|
9494
|
-
if (c === null) return false;
|
|
9536
|
+
if (c === null || c === undefined) return false;
|
|
9537
|
+
const code = c.charCodeAt(0);
|
|
9538
|
+
if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
|
|
9495
9539
|
if (isPnCharsU(c)) return true;
|
|
9496
|
-
if (c === '
|
|
9540
|
+
if (c === '\u00B7') return true;
|
|
9497
9541
|
const cp = c.codePointAt(0);
|
|
9498
9542
|
return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
|
|
9499
9543
|
}
|
|
@@ -10598,7 +10642,10 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
10598
10642
|
function lex(inputText, opts = {}) {
|
|
10599
10643
|
const rdf = !!(opts && opts.rdf);
|
|
10600
10644
|
if (rdf) inputText = normalizeRdfCompatibility(inputText);
|
|
10601
|
-
|
|
10645
|
+
// Avoid copying large ASCII/BMP inputs into an Array. Array.from() is
|
|
10646
|
+
// only needed when the text contains surrogate pairs and we want the old
|
|
10647
|
+
// code-point iteration behavior for non-BMP characters.
|
|
10648
|
+
const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
|
|
10602
10649
|
const n = chars.length;
|
|
10603
10650
|
let i = 0;
|
|
10604
10651
|
const tokens = [];
|
|
@@ -10614,19 +10661,29 @@ function lex(inputText, opts = {}) {
|
|
|
10614
10661
|
// - Accepts percent escapes (%HH) as PLX fragments.
|
|
10615
10662
|
// - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
|
|
10616
10663
|
// - Accepts '.' inside a name only when it is not terminal.
|
|
10664
|
+
function sliceChars(start, end) {
|
|
10665
|
+
return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
|
|
10666
|
+
}
|
|
10667
|
+
|
|
10617
10668
|
function readIdentText(startOffsetForErrors) {
|
|
10618
|
-
const
|
|
10669
|
+
const start = i;
|
|
10670
|
+
let out = null;
|
|
10671
|
+
|
|
10672
|
+
function appendRawUntilHere() {
|
|
10673
|
+
if (out === null) out = [sliceChars(start, i)];
|
|
10674
|
+
}
|
|
10675
|
+
|
|
10619
10676
|
while (i < n) {
|
|
10620
|
-
const cc =
|
|
10621
|
-
if (cc === null || isWs(cc)) break;
|
|
10677
|
+
const cc = chars[i];
|
|
10678
|
+
if (cc === null || cc === undefined || isWs(cc)) break;
|
|
10622
10679
|
|
|
10623
10680
|
// Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
|
|
10624
|
-
if ('{}()[]
|
|
10681
|
+
if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
|
|
10625
10682
|
|
|
10626
10683
|
// Dot is allowed inside PN_LOCAL, but not at the end.
|
|
10627
10684
|
if (cc === '.') {
|
|
10628
10685
|
if (!canContinueAfterDot(peek(1))) break;
|
|
10629
|
-
out.push('.');
|
|
10686
|
+
if (out !== null) out.push('.');
|
|
10630
10687
|
i++;
|
|
10631
10688
|
continue;
|
|
10632
10689
|
}
|
|
@@ -10641,6 +10698,7 @@ function lex(inputText, opts = {}) {
|
|
|
10641
10698
|
typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
|
|
10642
10699
|
);
|
|
10643
10700
|
}
|
|
10701
|
+
appendRawUntilHere();
|
|
10644
10702
|
out.push('%', h1, h2);
|
|
10645
10703
|
i += 3;
|
|
10646
10704
|
continue;
|
|
@@ -10650,6 +10708,7 @@ function lex(inputText, opts = {}) {
|
|
|
10650
10708
|
if (cc === '\\') {
|
|
10651
10709
|
const esc = peek(1);
|
|
10652
10710
|
if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
|
|
10711
|
+
appendRawUntilHere();
|
|
10653
10712
|
out.push(esc); // decoded form
|
|
10654
10713
|
i += 2;
|
|
10655
10714
|
continue;
|
|
@@ -10661,14 +10720,14 @@ function lex(inputText, opts = {}) {
|
|
|
10661
10720
|
}
|
|
10662
10721
|
|
|
10663
10722
|
if (isIdentChar(cc)) {
|
|
10664
|
-
out.push(cc);
|
|
10723
|
+
if (out !== null) out.push(cc);
|
|
10665
10724
|
i++;
|
|
10666
10725
|
continue;
|
|
10667
10726
|
}
|
|
10668
10727
|
|
|
10669
10728
|
break;
|
|
10670
10729
|
}
|
|
10671
|
-
return out.join('');
|
|
10730
|
+
return out === null ? sliceChars(start, i) : out.join('');
|
|
10672
10731
|
}
|
|
10673
10732
|
|
|
10674
10733
|
while (i < n) {
|
|
@@ -10955,10 +11014,10 @@ function lex(inputText, opts = {}) {
|
|
|
10955
11014
|
// "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
|
|
10956
11015
|
const tagChars = [];
|
|
10957
11016
|
let cc = peek();
|
|
10958
|
-
if (cc === null ||
|
|
11017
|
+
if (cc === null || !isAsciiAlpha(cc)) {
|
|
10959
11018
|
throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
|
|
10960
11019
|
}
|
|
10961
|
-
while ((cc = peek()) !== null &&
|
|
11020
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
10962
11021
|
tagChars.push(cc);
|
|
10963
11022
|
i++;
|
|
10964
11023
|
}
|
|
@@ -10982,7 +11041,7 @@ function lex(inputText, opts = {}) {
|
|
|
10982
11041
|
// Otherwise, treat as a directive (@prefix, @base)
|
|
10983
11042
|
const wordChars = [];
|
|
10984
11043
|
let cc;
|
|
10985
|
-
while ((cc = peek()) !== null &&
|
|
11044
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
10986
11045
|
wordChars.push(cc);
|
|
10987
11046
|
i++;
|
|
10988
11047
|
}
|
|
@@ -10994,19 +11053,19 @@ function lex(inputText, opts = {}) {
|
|
|
10994
11053
|
}
|
|
10995
11054
|
|
|
10996
11055
|
// 6) Numeric literal (integer or float)
|
|
10997
|
-
if (
|
|
11056
|
+
if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
|
|
10998
11057
|
const start = i;
|
|
10999
11058
|
const numChars = [c];
|
|
11000
11059
|
i++;
|
|
11001
11060
|
while (i < n) {
|
|
11002
11061
|
const cc = chars[i];
|
|
11003
|
-
if (
|
|
11062
|
+
if (isAsciiDigit(cc)) {
|
|
11004
11063
|
numChars.push(cc);
|
|
11005
11064
|
i++;
|
|
11006
11065
|
continue;
|
|
11007
11066
|
}
|
|
11008
11067
|
if (cc === '.') {
|
|
11009
|
-
if (i + 1 < n &&
|
|
11068
|
+
if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
|
|
11010
11069
|
numChars.push('.');
|
|
11011
11070
|
i++;
|
|
11012
11071
|
continue;
|
|
@@ -11021,14 +11080,14 @@ function lex(inputText, opts = {}) {
|
|
|
11021
11080
|
if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
|
|
11022
11081
|
let j = i + 1;
|
|
11023
11082
|
if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
|
|
11024
|
-
if (j < n &&
|
|
11083
|
+
if (j < n && isAsciiDigit(chars[j])) {
|
|
11025
11084
|
numChars.push(chars[i]); // e/E
|
|
11026
11085
|
i++;
|
|
11027
11086
|
if (i < n && (chars[i] === '+' || chars[i] === '-')) {
|
|
11028
11087
|
numChars.push(chars[i]);
|
|
11029
11088
|
i++;
|
|
11030
11089
|
}
|
|
11031
|
-
while (i < n &&
|
|
11090
|
+
while (i < n && isAsciiDigit(chars[i])) {
|
|
11032
11091
|
numChars.push(chars[i]);
|
|
11033
11092
|
i++;
|
|
11034
11093
|
}
|
|
@@ -11477,7 +11536,15 @@ class Parser {
|
|
|
11477
11536
|
}
|
|
11478
11537
|
|
|
11479
11538
|
isIdentKeyword(tok, keyword) {
|
|
11480
|
-
|
|
11539
|
+
if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
|
|
11540
|
+
const v = tok.value;
|
|
11541
|
+
if (v.length !== keyword.length) return false;
|
|
11542
|
+
for (let i = 0; i < keyword.length; i++) {
|
|
11543
|
+
const code = v.charCodeAt(i);
|
|
11544
|
+
const lower = code >= 65 && code <= 90 ? code + 32 : code;
|
|
11545
|
+
if (lower !== keyword.charCodeAt(i)) return false;
|
|
11546
|
+
}
|
|
11547
|
+
return true;
|
|
11481
11548
|
}
|
|
11482
11549
|
|
|
11483
11550
|
canStartSparqlPrefixDirective() {
|
|
@@ -12400,21 +12467,40 @@ function literalParts(lit) {
|
|
|
12400
12467
|
// equality fast-paths than repeated string key construction.
|
|
12401
12468
|
|
|
12402
12469
|
let __nextTid = 1;
|
|
12403
|
-
const __tidIntern = new Map(); //
|
|
12470
|
+
const __tidIntern = new Map(); // legacy generic key -> number
|
|
12471
|
+
const __iriTidIntern = new Map(); // IRI value -> number
|
|
12472
|
+
const __blankTidIntern = new Map(); // blank label -> number
|
|
12473
|
+
const __literalTidIntern = new Map(); // normalized literal lexical form -> number
|
|
12404
12474
|
|
|
12405
12475
|
// Avoid storing extremely large literal keys in the global term-id intern map.
|
|
12406
12476
|
// For huge literals we still assign a unique __tid, but we do not intern the key.
|
|
12407
12477
|
const MAX_LITERAL_TID_LEN = 1024;
|
|
12408
12478
|
|
|
12409
|
-
function
|
|
12410
|
-
let id =
|
|
12479
|
+
function __getTidFromMap(map, key) {
|
|
12480
|
+
let id = map.get(key);
|
|
12411
12481
|
if (!id) {
|
|
12412
12482
|
id = __nextTid++;
|
|
12413
|
-
|
|
12483
|
+
map.set(key, id);
|
|
12414
12484
|
}
|
|
12415
12485
|
return id;
|
|
12416
12486
|
}
|
|
12417
12487
|
|
|
12488
|
+
function __getTid(key) {
|
|
12489
|
+
return __getTidFromMap(__tidIntern, key);
|
|
12490
|
+
}
|
|
12491
|
+
|
|
12492
|
+
function __getIriTid(value) {
|
|
12493
|
+
return __getTidFromMap(__iriTidIntern, value);
|
|
12494
|
+
}
|
|
12495
|
+
|
|
12496
|
+
function __getBlankTid(label) {
|
|
12497
|
+
return __getTidFromMap(__blankTidIntern, label);
|
|
12498
|
+
}
|
|
12499
|
+
|
|
12500
|
+
function __getLiteralTid(norm) {
|
|
12501
|
+
return __getTidFromMap(__literalTidIntern, norm);
|
|
12502
|
+
}
|
|
12503
|
+
|
|
12418
12504
|
function __isQuotedLexical(lit) {
|
|
12419
12505
|
if (typeof lit !== 'string') return false;
|
|
12420
12506
|
if (lit.length >= 6) {
|
|
@@ -12460,6 +12546,14 @@ function __isPlainStringLiteralValue(lit) {
|
|
|
12460
12546
|
function normalizeLiteralForTid(lit) {
|
|
12461
12547
|
// Canonicalize so that plain string and explicit xsd:string share the same id.
|
|
12462
12548
|
if (typeof lit !== 'string') return lit;
|
|
12549
|
+
|
|
12550
|
+
// Fast path for the overwhelmingly common lexer output for plain string
|
|
12551
|
+
// literals: a canonical JSON-style quoted lexical form with no suffix.
|
|
12552
|
+
// This avoids literalParts()/language-tag parsing for large fact tables.
|
|
12553
|
+
if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
|
|
12554
|
+
return `${lit}^^<${XSD_NS}string>`;
|
|
12555
|
+
}
|
|
12556
|
+
|
|
12463
12557
|
const [lex, dt] = literalParts(lit);
|
|
12464
12558
|
if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
|
|
12465
12559
|
if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
|
|
@@ -12477,7 +12571,7 @@ class Iri extends Term {
|
|
|
12477
12571
|
super();
|
|
12478
12572
|
this.value = value;
|
|
12479
12573
|
Object.defineProperty(this, '__tid', {
|
|
12480
|
-
value:
|
|
12574
|
+
value: __getIriTid(value),
|
|
12481
12575
|
enumerable: false,
|
|
12482
12576
|
});
|
|
12483
12577
|
}
|
|
@@ -12489,7 +12583,7 @@ class Literal extends Term {
|
|
|
12489
12583
|
this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
|
|
12490
12584
|
const norm = normalizeLiteralForTid(value);
|
|
12491
12585
|
const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
|
|
12492
|
-
const tid = useIntern ?
|
|
12586
|
+
const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
|
|
12493
12587
|
Object.defineProperty(this, '__tid', {
|
|
12494
12588
|
value: tid,
|
|
12495
12589
|
enumerable: false,
|
|
@@ -12509,7 +12603,7 @@ class Blank extends Term {
|
|
|
12509
12603
|
super();
|
|
12510
12604
|
this.label = label; // _:b1, etc.
|
|
12511
12605
|
Object.defineProperty(this, '__tid', {
|
|
12512
|
-
value:
|
|
12606
|
+
value: __getBlankTid(label),
|
|
12513
12607
|
enumerable: false,
|
|
12514
12608
|
});
|
|
12515
12609
|
}
|
package/eyeling.js
CHANGED
|
@@ -4825,7 +4825,7 @@ function main() {
|
|
|
4825
4825
|
parseN3Text(text, {
|
|
4826
4826
|
baseIri: __sourceLabelToBaseIri(sourceLabel),
|
|
4827
4827
|
label: sourceLabel,
|
|
4828
|
-
collectUsedPrefixes:
|
|
4828
|
+
collectUsedPrefixes: streamMode,
|
|
4829
4829
|
keepSourceArtifacts: false,
|
|
4830
4830
|
rdf: rdfMode,
|
|
4831
4831
|
}),
|
|
@@ -6536,11 +6536,13 @@ function termFastKey(t) {
|
|
|
6536
6536
|
if (t instanceof Iri || t instanceof Blank) return t.__tid;
|
|
6537
6537
|
|
|
6538
6538
|
if (t instanceof Literal) {
|
|
6539
|
-
//
|
|
6540
|
-
//
|
|
6541
|
-
//
|
|
6542
|
-
//
|
|
6543
|
-
//
|
|
6539
|
+
// Literal construction already computed a value-stable __tid for ordinary
|
|
6540
|
+
// short literals. Avoid re-running literalParts()/datatype normalization
|
|
6541
|
+
// while building fact indexes; on data-heavy inputs this is a hot path.
|
|
6542
|
+
// Only the rare over-sized literal needs the value-based fallback because
|
|
6543
|
+
// prelude intentionally gives such literals per-object ids to avoid
|
|
6544
|
+
// retaining huge strings in the global interner.
|
|
6545
|
+
if (typeof t.value !== 'string' || t.value.length + 64 <= MAX_LITERAL_TID_LEN) return t.__tid;
|
|
6544
6546
|
const norm = normalizeLiteralForTid(t.value);
|
|
6545
6547
|
if (typeof norm === 'string' && norm.length > MAX_LITERAL_TID_LEN) return 'L:' + norm;
|
|
6546
6548
|
return t.__tid;
|
|
@@ -6627,17 +6629,28 @@ function ensureFactIndexes(facts) {
|
|
|
6627
6629
|
enumerable: false,
|
|
6628
6630
|
writable: true,
|
|
6629
6631
|
});
|
|
6632
|
+
Object.defineProperty(facts, '__keySetComplete', {
|
|
6633
|
+
value: false,
|
|
6634
|
+
enumerable: false,
|
|
6635
|
+
writable: true,
|
|
6636
|
+
});
|
|
6630
6637
|
|
|
6631
|
-
|
|
6638
|
+
// Build lookup indexes eagerly, but do not populate the duplicate-detection
|
|
6639
|
+
// string Set for every input fact. The predicate/subject/object indexes are
|
|
6640
|
+
// enough to verify duplicates when needed; avoiding 100k+ joined string keys
|
|
6641
|
+
// saves substantial time and GC on data-heavy query workloads.
|
|
6642
|
+
for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i, false);
|
|
6632
6643
|
}
|
|
6633
6644
|
|
|
6634
|
-
function indexFact(facts, tr, idx) {
|
|
6645
|
+
function indexFact(facts, tr, idx, addKeySet = true) {
|
|
6635
6646
|
const sk = termFastKey(tr.s);
|
|
6636
6647
|
const ok = termFastKey(tr.o);
|
|
6648
|
+
let pkForKey = null;
|
|
6637
6649
|
|
|
6638
6650
|
if (tr.p instanceof Iri) {
|
|
6639
6651
|
// Use predicate term id as the primary key to avoid hashing long IRI strings.
|
|
6640
6652
|
const pk = tr.p.__tid;
|
|
6653
|
+
pkForKey = pk;
|
|
6641
6654
|
|
|
6642
6655
|
let pb = facts.__byPred.get(pk);
|
|
6643
6656
|
if (!pb) {
|
|
@@ -6695,8 +6708,10 @@ function indexFact(facts, tr, idx) {
|
|
|
6695
6708
|
}
|
|
6696
6709
|
}
|
|
6697
6710
|
|
|
6698
|
-
|
|
6699
|
-
|
|
6711
|
+
if (addKeySet && sk !== null && ok !== null) {
|
|
6712
|
+
if (pkForKey === null) pkForKey = termFastKey(tr.p);
|
|
6713
|
+
if (pkForKey !== null) facts.__keySet.add(sk + '\t' + pkForKey + '\t' + ok);
|
|
6714
|
+
}
|
|
6700
6715
|
}
|
|
6701
6716
|
|
|
6702
6717
|
function candidateFacts(facts, goal) {
|
|
@@ -6758,7 +6773,10 @@ function hasFactIndexed(facts, tr) {
|
|
|
6758
6773
|
ensureFactIndexes(facts);
|
|
6759
6774
|
|
|
6760
6775
|
const key = tripleFastKey(tr);
|
|
6761
|
-
if (key !== null)
|
|
6776
|
+
if (key !== null) {
|
|
6777
|
+
if (facts.__keySet.has(key)) return true;
|
|
6778
|
+
if (facts.__keySetComplete) return false;
|
|
6779
|
+
}
|
|
6762
6780
|
|
|
6763
6781
|
if (tr.p instanceof Iri) {
|
|
6764
6782
|
const pk = tr.p.__tid;
|
|
@@ -6788,7 +6806,7 @@ function pushFactIndexed(facts, tr) {
|
|
|
6788
6806
|
ensureFactIndexes(facts);
|
|
6789
6807
|
const idx = facts.length;
|
|
6790
6808
|
facts.push(tr);
|
|
6791
|
-
indexFact(facts, tr, idx);
|
|
6809
|
+
indexFact(facts, tr, idx, true);
|
|
6792
6810
|
}
|
|
6793
6811
|
|
|
6794
6812
|
function makeDerivedRecord(fact, rule, premises, subst, captureExplanations) {
|
|
@@ -8325,11 +8343,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8325
8343
|
const varGen = [0];
|
|
8326
8344
|
const skCounter = [0];
|
|
8327
8345
|
|
|
8328
|
-
//
|
|
8329
|
-
//
|
|
8330
|
-
|
|
8331
|
-
__ensureRuleKeySet(forwardRules);
|
|
8332
|
-
__ensureRuleKeySet(backRules);
|
|
8346
|
+
// Rule-key sets are only needed if a program actually derives rule-producing
|
|
8347
|
+
// triples. Building them eagerly is expensive on large static rule sets, so
|
|
8348
|
+
// dynamic-promotion sites create them lazily before duplicate checks.
|
|
8333
8349
|
|
|
8334
8350
|
// Cache head blank-node skolemization per (rule firing, head blank label).
|
|
8335
8351
|
// This prevents repeatedly generating fresh _:sk_N blanks for the *same*
|
|
@@ -8508,8 +8524,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8508
8524
|
newRule.conclusion,
|
|
8509
8525
|
newRule.__dynamicConclusionTerm || null,
|
|
8510
8526
|
);
|
|
8511
|
-
|
|
8512
|
-
|
|
8527
|
+
const forwardRuleKeySet = __ensureRuleKeySet(forwardRules);
|
|
8528
|
+
if (!forwardRuleKeySet.has(key)) {
|
|
8529
|
+
forwardRuleKeySet.add(key);
|
|
8513
8530
|
forwardRules.push(newRule);
|
|
8514
8531
|
rulesChanged = true;
|
|
8515
8532
|
}
|
|
@@ -8523,8 +8540,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
8523
8540
|
newRule.conclusion,
|
|
8524
8541
|
newRule.__dynamicConclusionTerm || null,
|
|
8525
8542
|
);
|
|
8526
|
-
|
|
8527
|
-
|
|
8543
|
+
const backRuleKeySet = __ensureRuleKeySet(backRules);
|
|
8544
|
+
if (!backRuleKeySet.has(key)) {
|
|
8545
|
+
backRuleKeySet.add(key);
|
|
8528
8546
|
backRules.push(newRule);
|
|
8529
8547
|
indexBackRule(backRules, newRule);
|
|
8530
8548
|
rulesChanged = true;
|
|
@@ -9462,7 +9480,26 @@ class N3SyntaxError extends SyntaxError {
|
|
|
9462
9480
|
}
|
|
9463
9481
|
|
|
9464
9482
|
function isWs(c) {
|
|
9465
|
-
|
|
9483
|
+
if (c === null || c === undefined) return false;
|
|
9484
|
+
const code = c.charCodeAt(0);
|
|
9485
|
+
// Fast path for the whitespace used by N3/Turtle inputs.
|
|
9486
|
+
return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
|
|
9487
|
+
}
|
|
9488
|
+
|
|
9489
|
+
function isAsciiAlphaCode(code) {
|
|
9490
|
+
return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
|
|
9491
|
+
}
|
|
9492
|
+
|
|
9493
|
+
function isAsciiDigitCode(code) {
|
|
9494
|
+
return code >= 48 && code <= 57;
|
|
9495
|
+
}
|
|
9496
|
+
|
|
9497
|
+
function isAsciiAlpha(c) {
|
|
9498
|
+
return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
|
|
9499
|
+
}
|
|
9500
|
+
|
|
9501
|
+
function isAsciiDigit(c) {
|
|
9502
|
+
return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
|
|
9466
9503
|
}
|
|
9467
9504
|
|
|
9468
9505
|
// Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
|
|
@@ -9475,13 +9512,18 @@ function isWs(c) {
|
|
|
9475
9512
|
//
|
|
9476
9513
|
// We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
|
|
9477
9514
|
function isHexDigit(c) {
|
|
9478
|
-
|
|
9515
|
+
if (c === null || c === undefined) return false;
|
|
9516
|
+
const code = c.charCodeAt(0);
|
|
9517
|
+
return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
|
|
9479
9518
|
}
|
|
9480
9519
|
|
|
9481
9520
|
function isPnCharsBase(c) {
|
|
9482
9521
|
// Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
|
|
9483
9522
|
// Covers most letters used in practice (including ñ) and common scripts.
|
|
9484
|
-
|
|
9523
|
+
if (c === null || c === undefined) return false;
|
|
9524
|
+
const code = c.charCodeAt(0);
|
|
9525
|
+
if (isAsciiAlphaCode(code)) return true;
|
|
9526
|
+
return /\p{L}|\p{Nl}/u.test(c);
|
|
9485
9527
|
}
|
|
9486
9528
|
|
|
9487
9529
|
function isPnCharsU(c) {
|
|
@@ -9491,9 +9533,11 @@ function isPnCharsU(c) {
|
|
|
9491
9533
|
|
|
9492
9534
|
function isPnChars(c) {
|
|
9493
9535
|
// PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
|
|
9494
|
-
if (c === null) return false;
|
|
9536
|
+
if (c === null || c === undefined) return false;
|
|
9537
|
+
const code = c.charCodeAt(0);
|
|
9538
|
+
if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
|
|
9495
9539
|
if (isPnCharsU(c)) return true;
|
|
9496
|
-
if (c === '
|
|
9540
|
+
if (c === '\u00B7') return true;
|
|
9497
9541
|
const cp = c.codePointAt(0);
|
|
9498
9542
|
return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
|
|
9499
9543
|
}
|
|
@@ -10598,7 +10642,10 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
10598
10642
|
function lex(inputText, opts = {}) {
|
|
10599
10643
|
const rdf = !!(opts && opts.rdf);
|
|
10600
10644
|
if (rdf) inputText = normalizeRdfCompatibility(inputText);
|
|
10601
|
-
|
|
10645
|
+
// Avoid copying large ASCII/BMP inputs into an Array. Array.from() is
|
|
10646
|
+
// only needed when the text contains surrogate pairs and we want the old
|
|
10647
|
+
// code-point iteration behavior for non-BMP characters.
|
|
10648
|
+
const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
|
|
10602
10649
|
const n = chars.length;
|
|
10603
10650
|
let i = 0;
|
|
10604
10651
|
const tokens = [];
|
|
@@ -10614,19 +10661,29 @@ function lex(inputText, opts = {}) {
|
|
|
10614
10661
|
// - Accepts percent escapes (%HH) as PLX fragments.
|
|
10615
10662
|
// - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
|
|
10616
10663
|
// - Accepts '.' inside a name only when it is not terminal.
|
|
10664
|
+
function sliceChars(start, end) {
|
|
10665
|
+
return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
|
|
10666
|
+
}
|
|
10667
|
+
|
|
10617
10668
|
function readIdentText(startOffsetForErrors) {
|
|
10618
|
-
const
|
|
10669
|
+
const start = i;
|
|
10670
|
+
let out = null;
|
|
10671
|
+
|
|
10672
|
+
function appendRawUntilHere() {
|
|
10673
|
+
if (out === null) out = [sliceChars(start, i)];
|
|
10674
|
+
}
|
|
10675
|
+
|
|
10619
10676
|
while (i < n) {
|
|
10620
|
-
const cc =
|
|
10621
|
-
if (cc === null || isWs(cc)) break;
|
|
10677
|
+
const cc = chars[i];
|
|
10678
|
+
if (cc === null || cc === undefined || isWs(cc)) break;
|
|
10622
10679
|
|
|
10623
10680
|
// Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
|
|
10624
|
-
if ('{}()[]
|
|
10681
|
+
if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
|
|
10625
10682
|
|
|
10626
10683
|
// Dot is allowed inside PN_LOCAL, but not at the end.
|
|
10627
10684
|
if (cc === '.') {
|
|
10628
10685
|
if (!canContinueAfterDot(peek(1))) break;
|
|
10629
|
-
out.push('.');
|
|
10686
|
+
if (out !== null) out.push('.');
|
|
10630
10687
|
i++;
|
|
10631
10688
|
continue;
|
|
10632
10689
|
}
|
|
@@ -10641,6 +10698,7 @@ function lex(inputText, opts = {}) {
|
|
|
10641
10698
|
typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
|
|
10642
10699
|
);
|
|
10643
10700
|
}
|
|
10701
|
+
appendRawUntilHere();
|
|
10644
10702
|
out.push('%', h1, h2);
|
|
10645
10703
|
i += 3;
|
|
10646
10704
|
continue;
|
|
@@ -10650,6 +10708,7 @@ function lex(inputText, opts = {}) {
|
|
|
10650
10708
|
if (cc === '\\') {
|
|
10651
10709
|
const esc = peek(1);
|
|
10652
10710
|
if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
|
|
10711
|
+
appendRawUntilHere();
|
|
10653
10712
|
out.push(esc); // decoded form
|
|
10654
10713
|
i += 2;
|
|
10655
10714
|
continue;
|
|
@@ -10661,14 +10720,14 @@ function lex(inputText, opts = {}) {
|
|
|
10661
10720
|
}
|
|
10662
10721
|
|
|
10663
10722
|
if (isIdentChar(cc)) {
|
|
10664
|
-
out.push(cc);
|
|
10723
|
+
if (out !== null) out.push(cc);
|
|
10665
10724
|
i++;
|
|
10666
10725
|
continue;
|
|
10667
10726
|
}
|
|
10668
10727
|
|
|
10669
10728
|
break;
|
|
10670
10729
|
}
|
|
10671
|
-
return out.join('');
|
|
10730
|
+
return out === null ? sliceChars(start, i) : out.join('');
|
|
10672
10731
|
}
|
|
10673
10732
|
|
|
10674
10733
|
while (i < n) {
|
|
@@ -10955,10 +11014,10 @@ function lex(inputText, opts = {}) {
|
|
|
10955
11014
|
// "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
|
|
10956
11015
|
const tagChars = [];
|
|
10957
11016
|
let cc = peek();
|
|
10958
|
-
if (cc === null ||
|
|
11017
|
+
if (cc === null || !isAsciiAlpha(cc)) {
|
|
10959
11018
|
throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
|
|
10960
11019
|
}
|
|
10961
|
-
while ((cc = peek()) !== null &&
|
|
11020
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
10962
11021
|
tagChars.push(cc);
|
|
10963
11022
|
i++;
|
|
10964
11023
|
}
|
|
@@ -10982,7 +11041,7 @@ function lex(inputText, opts = {}) {
|
|
|
10982
11041
|
// Otherwise, treat as a directive (@prefix, @base)
|
|
10983
11042
|
const wordChars = [];
|
|
10984
11043
|
let cc;
|
|
10985
|
-
while ((cc = peek()) !== null &&
|
|
11044
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
10986
11045
|
wordChars.push(cc);
|
|
10987
11046
|
i++;
|
|
10988
11047
|
}
|
|
@@ -10994,19 +11053,19 @@ function lex(inputText, opts = {}) {
|
|
|
10994
11053
|
}
|
|
10995
11054
|
|
|
10996
11055
|
// 6) Numeric literal (integer or float)
|
|
10997
|
-
if (
|
|
11056
|
+
if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
|
|
10998
11057
|
const start = i;
|
|
10999
11058
|
const numChars = [c];
|
|
11000
11059
|
i++;
|
|
11001
11060
|
while (i < n) {
|
|
11002
11061
|
const cc = chars[i];
|
|
11003
|
-
if (
|
|
11062
|
+
if (isAsciiDigit(cc)) {
|
|
11004
11063
|
numChars.push(cc);
|
|
11005
11064
|
i++;
|
|
11006
11065
|
continue;
|
|
11007
11066
|
}
|
|
11008
11067
|
if (cc === '.') {
|
|
11009
|
-
if (i + 1 < n &&
|
|
11068
|
+
if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
|
|
11010
11069
|
numChars.push('.');
|
|
11011
11070
|
i++;
|
|
11012
11071
|
continue;
|
|
@@ -11021,14 +11080,14 @@ function lex(inputText, opts = {}) {
|
|
|
11021
11080
|
if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
|
|
11022
11081
|
let j = i + 1;
|
|
11023
11082
|
if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
|
|
11024
|
-
if (j < n &&
|
|
11083
|
+
if (j < n && isAsciiDigit(chars[j])) {
|
|
11025
11084
|
numChars.push(chars[i]); // e/E
|
|
11026
11085
|
i++;
|
|
11027
11086
|
if (i < n && (chars[i] === '+' || chars[i] === '-')) {
|
|
11028
11087
|
numChars.push(chars[i]);
|
|
11029
11088
|
i++;
|
|
11030
11089
|
}
|
|
11031
|
-
while (i < n &&
|
|
11090
|
+
while (i < n && isAsciiDigit(chars[i])) {
|
|
11032
11091
|
numChars.push(chars[i]);
|
|
11033
11092
|
i++;
|
|
11034
11093
|
}
|
|
@@ -11477,7 +11536,15 @@ class Parser {
|
|
|
11477
11536
|
}
|
|
11478
11537
|
|
|
11479
11538
|
isIdentKeyword(tok, keyword) {
|
|
11480
|
-
|
|
11539
|
+
if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
|
|
11540
|
+
const v = tok.value;
|
|
11541
|
+
if (v.length !== keyword.length) return false;
|
|
11542
|
+
for (let i = 0; i < keyword.length; i++) {
|
|
11543
|
+
const code = v.charCodeAt(i);
|
|
11544
|
+
const lower = code >= 65 && code <= 90 ? code + 32 : code;
|
|
11545
|
+
if (lower !== keyword.charCodeAt(i)) return false;
|
|
11546
|
+
}
|
|
11547
|
+
return true;
|
|
11481
11548
|
}
|
|
11482
11549
|
|
|
11483
11550
|
canStartSparqlPrefixDirective() {
|
|
@@ -12400,21 +12467,40 @@ function literalParts(lit) {
|
|
|
12400
12467
|
// equality fast-paths than repeated string key construction.
|
|
12401
12468
|
|
|
12402
12469
|
let __nextTid = 1;
|
|
12403
|
-
const __tidIntern = new Map(); //
|
|
12470
|
+
const __tidIntern = new Map(); // legacy generic key -> number
|
|
12471
|
+
const __iriTidIntern = new Map(); // IRI value -> number
|
|
12472
|
+
const __blankTidIntern = new Map(); // blank label -> number
|
|
12473
|
+
const __literalTidIntern = new Map(); // normalized literal lexical form -> number
|
|
12404
12474
|
|
|
12405
12475
|
// Avoid storing extremely large literal keys in the global term-id intern map.
|
|
12406
12476
|
// For huge literals we still assign a unique __tid, but we do not intern the key.
|
|
12407
12477
|
const MAX_LITERAL_TID_LEN = 1024;
|
|
12408
12478
|
|
|
12409
|
-
function
|
|
12410
|
-
let id =
|
|
12479
|
+
function __getTidFromMap(map, key) {
|
|
12480
|
+
let id = map.get(key);
|
|
12411
12481
|
if (!id) {
|
|
12412
12482
|
id = __nextTid++;
|
|
12413
|
-
|
|
12483
|
+
map.set(key, id);
|
|
12414
12484
|
}
|
|
12415
12485
|
return id;
|
|
12416
12486
|
}
|
|
12417
12487
|
|
|
12488
|
+
function __getTid(key) {
|
|
12489
|
+
return __getTidFromMap(__tidIntern, key);
|
|
12490
|
+
}
|
|
12491
|
+
|
|
12492
|
+
function __getIriTid(value) {
|
|
12493
|
+
return __getTidFromMap(__iriTidIntern, value);
|
|
12494
|
+
}
|
|
12495
|
+
|
|
12496
|
+
function __getBlankTid(label) {
|
|
12497
|
+
return __getTidFromMap(__blankTidIntern, label);
|
|
12498
|
+
}
|
|
12499
|
+
|
|
12500
|
+
function __getLiteralTid(norm) {
|
|
12501
|
+
return __getTidFromMap(__literalTidIntern, norm);
|
|
12502
|
+
}
|
|
12503
|
+
|
|
12418
12504
|
function __isQuotedLexical(lit) {
|
|
12419
12505
|
if (typeof lit !== 'string') return false;
|
|
12420
12506
|
if (lit.length >= 6) {
|
|
@@ -12460,6 +12546,14 @@ function __isPlainStringLiteralValue(lit) {
|
|
|
12460
12546
|
function normalizeLiteralForTid(lit) {
|
|
12461
12547
|
// Canonicalize so that plain string and explicit xsd:string share the same id.
|
|
12462
12548
|
if (typeof lit !== 'string') return lit;
|
|
12549
|
+
|
|
12550
|
+
// Fast path for the overwhelmingly common lexer output for plain string
|
|
12551
|
+
// literals: a canonical JSON-style quoted lexical form with no suffix.
|
|
12552
|
+
// This avoids literalParts()/language-tag parsing for large fact tables.
|
|
12553
|
+
if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
|
|
12554
|
+
return `${lit}^^<${XSD_NS}string>`;
|
|
12555
|
+
}
|
|
12556
|
+
|
|
12463
12557
|
const [lex, dt] = literalParts(lit);
|
|
12464
12558
|
if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
|
|
12465
12559
|
if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
|
|
@@ -12477,7 +12571,7 @@ class Iri extends Term {
|
|
|
12477
12571
|
super();
|
|
12478
12572
|
this.value = value;
|
|
12479
12573
|
Object.defineProperty(this, '__tid', {
|
|
12480
|
-
value:
|
|
12574
|
+
value: __getIriTid(value),
|
|
12481
12575
|
enumerable: false,
|
|
12482
12576
|
});
|
|
12483
12577
|
}
|
|
@@ -12489,7 +12583,7 @@ class Literal extends Term {
|
|
|
12489
12583
|
this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
|
|
12490
12584
|
const norm = normalizeLiteralForTid(value);
|
|
12491
12585
|
const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
|
|
12492
|
-
const tid = useIntern ?
|
|
12586
|
+
const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
|
|
12493
12587
|
Object.defineProperty(this, '__tid', {
|
|
12494
12588
|
value: tid,
|
|
12495
12589
|
enumerable: false,
|
|
@@ -12509,7 +12603,7 @@ class Blank extends Term {
|
|
|
12509
12603
|
super();
|
|
12510
12604
|
this.label = label; // _:b1, etc.
|
|
12511
12605
|
Object.defineProperty(this, '__tid', {
|
|
12512
|
-
value:
|
|
12606
|
+
value: __getBlankTid(label),
|
|
12513
12607
|
enumerable: false,
|
|
12514
12608
|
});
|
|
12515
12609
|
}
|
package/lib/cli.js
CHANGED
package/lib/engine.js
CHANGED
|
@@ -1049,11 +1049,13 @@ function termFastKey(t) {
|
|
|
1049
1049
|
if (t instanceof Iri || t instanceof Blank) return t.__tid;
|
|
1050
1050
|
|
|
1051
1051
|
if (t instanceof Literal) {
|
|
1052
|
-
//
|
|
1053
|
-
//
|
|
1054
|
-
//
|
|
1055
|
-
//
|
|
1056
|
-
//
|
|
1052
|
+
// Literal construction already computed a value-stable __tid for ordinary
|
|
1053
|
+
// short literals. Avoid re-running literalParts()/datatype normalization
|
|
1054
|
+
// while building fact indexes; on data-heavy inputs this is a hot path.
|
|
1055
|
+
// Only the rare over-sized literal needs the value-based fallback because
|
|
1056
|
+
// prelude intentionally gives such literals per-object ids to avoid
|
|
1057
|
+
// retaining huge strings in the global interner.
|
|
1058
|
+
if (typeof t.value !== 'string' || t.value.length + 64 <= MAX_LITERAL_TID_LEN) return t.__tid;
|
|
1057
1059
|
const norm = normalizeLiteralForTid(t.value);
|
|
1058
1060
|
if (typeof norm === 'string' && norm.length > MAX_LITERAL_TID_LEN) return 'L:' + norm;
|
|
1059
1061
|
return t.__tid;
|
|
@@ -1140,17 +1142,28 @@ function ensureFactIndexes(facts) {
|
|
|
1140
1142
|
enumerable: false,
|
|
1141
1143
|
writable: true,
|
|
1142
1144
|
});
|
|
1145
|
+
Object.defineProperty(facts, '__keySetComplete', {
|
|
1146
|
+
value: false,
|
|
1147
|
+
enumerable: false,
|
|
1148
|
+
writable: true,
|
|
1149
|
+
});
|
|
1143
1150
|
|
|
1144
|
-
|
|
1151
|
+
// Build lookup indexes eagerly, but do not populate the duplicate-detection
|
|
1152
|
+
// string Set for every input fact. The predicate/subject/object indexes are
|
|
1153
|
+
// enough to verify duplicates when needed; avoiding 100k+ joined string keys
|
|
1154
|
+
// saves substantial time and GC on data-heavy query workloads.
|
|
1155
|
+
for (let i = 0; i < facts.length; i++) indexFact(facts, facts[i], i, false);
|
|
1145
1156
|
}
|
|
1146
1157
|
|
|
1147
|
-
function indexFact(facts, tr, idx) {
|
|
1158
|
+
function indexFact(facts, tr, idx, addKeySet = true) {
|
|
1148
1159
|
const sk = termFastKey(tr.s);
|
|
1149
1160
|
const ok = termFastKey(tr.o);
|
|
1161
|
+
let pkForKey = null;
|
|
1150
1162
|
|
|
1151
1163
|
if (tr.p instanceof Iri) {
|
|
1152
1164
|
// Use predicate term id as the primary key to avoid hashing long IRI strings.
|
|
1153
1165
|
const pk = tr.p.__tid;
|
|
1166
|
+
pkForKey = pk;
|
|
1154
1167
|
|
|
1155
1168
|
let pb = facts.__byPred.get(pk);
|
|
1156
1169
|
if (!pb) {
|
|
@@ -1208,8 +1221,10 @@ function indexFact(facts, tr, idx) {
|
|
|
1208
1221
|
}
|
|
1209
1222
|
}
|
|
1210
1223
|
|
|
1211
|
-
|
|
1212
|
-
|
|
1224
|
+
if (addKeySet && sk !== null && ok !== null) {
|
|
1225
|
+
if (pkForKey === null) pkForKey = termFastKey(tr.p);
|
|
1226
|
+
if (pkForKey !== null) facts.__keySet.add(sk + '\t' + pkForKey + '\t' + ok);
|
|
1227
|
+
}
|
|
1213
1228
|
}
|
|
1214
1229
|
|
|
1215
1230
|
function candidateFacts(facts, goal) {
|
|
@@ -1271,7 +1286,10 @@ function hasFactIndexed(facts, tr) {
|
|
|
1271
1286
|
ensureFactIndexes(facts);
|
|
1272
1287
|
|
|
1273
1288
|
const key = tripleFastKey(tr);
|
|
1274
|
-
if (key !== null)
|
|
1289
|
+
if (key !== null) {
|
|
1290
|
+
if (facts.__keySet.has(key)) return true;
|
|
1291
|
+
if (facts.__keySetComplete) return false;
|
|
1292
|
+
}
|
|
1275
1293
|
|
|
1276
1294
|
if (tr.p instanceof Iri) {
|
|
1277
1295
|
const pk = tr.p.__tid;
|
|
@@ -1301,7 +1319,7 @@ function pushFactIndexed(facts, tr) {
|
|
|
1301
1319
|
ensureFactIndexes(facts);
|
|
1302
1320
|
const idx = facts.length;
|
|
1303
1321
|
facts.push(tr);
|
|
1304
|
-
indexFact(facts, tr, idx);
|
|
1322
|
+
indexFact(facts, tr, idx, true);
|
|
1305
1323
|
}
|
|
1306
1324
|
|
|
1307
1325
|
function makeDerivedRecord(fact, rule, premises, subst, captureExplanations) {
|
|
@@ -2838,11 +2856,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
2838
2856
|
const varGen = [0];
|
|
2839
2857
|
const skCounter = [0];
|
|
2840
2858
|
|
|
2841
|
-
//
|
|
2842
|
-
//
|
|
2843
|
-
|
|
2844
|
-
__ensureRuleKeySet(forwardRules);
|
|
2845
|
-
__ensureRuleKeySet(backRules);
|
|
2859
|
+
// Rule-key sets are only needed if a program actually derives rule-producing
|
|
2860
|
+
// triples. Building them eagerly is expensive on large static rule sets, so
|
|
2861
|
+
// dynamic-promotion sites create them lazily before duplicate checks.
|
|
2846
2862
|
|
|
2847
2863
|
// Cache head blank-node skolemization per (rule firing, head blank label).
|
|
2848
2864
|
// This prevents repeatedly generating fresh _:sk_N blanks for the *same*
|
|
@@ -3021,8 +3037,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
3021
3037
|
newRule.conclusion,
|
|
3022
3038
|
newRule.__dynamicConclusionTerm || null,
|
|
3023
3039
|
);
|
|
3024
|
-
|
|
3025
|
-
|
|
3040
|
+
const forwardRuleKeySet = __ensureRuleKeySet(forwardRules);
|
|
3041
|
+
if (!forwardRuleKeySet.has(key)) {
|
|
3042
|
+
forwardRuleKeySet.add(key);
|
|
3026
3043
|
forwardRules.push(newRule);
|
|
3027
3044
|
rulesChanged = true;
|
|
3028
3045
|
}
|
|
@@ -3036,8 +3053,9 @@ function forwardChain(facts, forwardRules, backRules, onDerived /* optional */,
|
|
|
3036
3053
|
newRule.conclusion,
|
|
3037
3054
|
newRule.__dynamicConclusionTerm || null,
|
|
3038
3055
|
);
|
|
3039
|
-
|
|
3040
|
-
|
|
3056
|
+
const backRuleKeySet = __ensureRuleKeySet(backRules);
|
|
3057
|
+
if (!backRuleKeySet.has(key)) {
|
|
3058
|
+
backRuleKeySet.add(key);
|
|
3041
3059
|
backRules.push(newRule);
|
|
3042
3060
|
indexBackRule(backRules, newRule);
|
|
3043
3061
|
rulesChanged = true;
|
package/lib/lexer.js
CHANGED
|
@@ -30,7 +30,26 @@ class N3SyntaxError extends SyntaxError {
|
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
function isWs(c) {
|
|
33
|
-
|
|
33
|
+
if (c === null || c === undefined) return false;
|
|
34
|
+
const code = c.charCodeAt(0);
|
|
35
|
+
// Fast path for the whitespace used by N3/Turtle inputs.
|
|
36
|
+
return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function isAsciiAlphaCode(code) {
|
|
40
|
+
return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function isAsciiDigitCode(code) {
|
|
44
|
+
return code >= 48 && code <= 57;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function isAsciiAlpha(c) {
|
|
48
|
+
return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function isAsciiDigit(c) {
|
|
52
|
+
return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
|
|
34
53
|
}
|
|
35
54
|
|
|
36
55
|
// Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
|
|
@@ -43,13 +62,18 @@ function isWs(c) {
|
|
|
43
62
|
//
|
|
44
63
|
// We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
|
|
45
64
|
function isHexDigit(c) {
|
|
46
|
-
|
|
65
|
+
if (c === null || c === undefined) return false;
|
|
66
|
+
const code = c.charCodeAt(0);
|
|
67
|
+
return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
|
|
47
68
|
}
|
|
48
69
|
|
|
49
70
|
function isPnCharsBase(c) {
|
|
50
71
|
// Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
|
|
51
72
|
// Covers most letters used in practice (including ñ) and common scripts.
|
|
52
|
-
|
|
73
|
+
if (c === null || c === undefined) return false;
|
|
74
|
+
const code = c.charCodeAt(0);
|
|
75
|
+
if (isAsciiAlphaCode(code)) return true;
|
|
76
|
+
return /\p{L}|\p{Nl}/u.test(c);
|
|
53
77
|
}
|
|
54
78
|
|
|
55
79
|
function isPnCharsU(c) {
|
|
@@ -59,9 +83,11 @@ function isPnCharsU(c) {
|
|
|
59
83
|
|
|
60
84
|
function isPnChars(c) {
|
|
61
85
|
// PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
|
|
62
|
-
if (c === null) return false;
|
|
86
|
+
if (c === null || c === undefined) return false;
|
|
87
|
+
const code = c.charCodeAt(0);
|
|
88
|
+
if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
|
|
63
89
|
if (isPnCharsU(c)) return true;
|
|
64
|
-
if (c === '
|
|
90
|
+
if (c === '\u00B7') return true;
|
|
65
91
|
const cp = c.codePointAt(0);
|
|
66
92
|
return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
|
|
67
93
|
}
|
|
@@ -1166,7 +1192,10 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
1166
1192
|
function lex(inputText, opts = {}) {
|
|
1167
1193
|
const rdf = !!(opts && opts.rdf);
|
|
1168
1194
|
if (rdf) inputText = normalizeRdfCompatibility(inputText);
|
|
1169
|
-
|
|
1195
|
+
// Avoid copying large ASCII/BMP inputs into an Array. Array.from() is
|
|
1196
|
+
// only needed when the text contains surrogate pairs and we want the old
|
|
1197
|
+
// code-point iteration behavior for non-BMP characters.
|
|
1198
|
+
const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
|
|
1170
1199
|
const n = chars.length;
|
|
1171
1200
|
let i = 0;
|
|
1172
1201
|
const tokens = [];
|
|
@@ -1182,19 +1211,29 @@ function lex(inputText, opts = {}) {
|
|
|
1182
1211
|
// - Accepts percent escapes (%HH) as PLX fragments.
|
|
1183
1212
|
// - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
|
|
1184
1213
|
// - Accepts '.' inside a name only when it is not terminal.
|
|
1214
|
+
function sliceChars(start, end) {
|
|
1215
|
+
return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1185
1218
|
function readIdentText(startOffsetForErrors) {
|
|
1186
|
-
const
|
|
1219
|
+
const start = i;
|
|
1220
|
+
let out = null;
|
|
1221
|
+
|
|
1222
|
+
function appendRawUntilHere() {
|
|
1223
|
+
if (out === null) out = [sliceChars(start, i)];
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1187
1226
|
while (i < n) {
|
|
1188
|
-
const cc =
|
|
1189
|
-
if (cc === null || isWs(cc)) break;
|
|
1227
|
+
const cc = chars[i];
|
|
1228
|
+
if (cc === null || cc === undefined || isWs(cc)) break;
|
|
1190
1229
|
|
|
1191
1230
|
// Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
|
|
1192
|
-
if ('{}()[]
|
|
1231
|
+
if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
|
|
1193
1232
|
|
|
1194
1233
|
// Dot is allowed inside PN_LOCAL, but not at the end.
|
|
1195
1234
|
if (cc === '.') {
|
|
1196
1235
|
if (!canContinueAfterDot(peek(1))) break;
|
|
1197
|
-
out.push('.');
|
|
1236
|
+
if (out !== null) out.push('.');
|
|
1198
1237
|
i++;
|
|
1199
1238
|
continue;
|
|
1200
1239
|
}
|
|
@@ -1209,6 +1248,7 @@ function lex(inputText, opts = {}) {
|
|
|
1209
1248
|
typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
|
|
1210
1249
|
);
|
|
1211
1250
|
}
|
|
1251
|
+
appendRawUntilHere();
|
|
1212
1252
|
out.push('%', h1, h2);
|
|
1213
1253
|
i += 3;
|
|
1214
1254
|
continue;
|
|
@@ -1218,6 +1258,7 @@ function lex(inputText, opts = {}) {
|
|
|
1218
1258
|
if (cc === '\\') {
|
|
1219
1259
|
const esc = peek(1);
|
|
1220
1260
|
if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
|
|
1261
|
+
appendRawUntilHere();
|
|
1221
1262
|
out.push(esc); // decoded form
|
|
1222
1263
|
i += 2;
|
|
1223
1264
|
continue;
|
|
@@ -1229,14 +1270,14 @@ function lex(inputText, opts = {}) {
|
|
|
1229
1270
|
}
|
|
1230
1271
|
|
|
1231
1272
|
if (isIdentChar(cc)) {
|
|
1232
|
-
out.push(cc);
|
|
1273
|
+
if (out !== null) out.push(cc);
|
|
1233
1274
|
i++;
|
|
1234
1275
|
continue;
|
|
1235
1276
|
}
|
|
1236
1277
|
|
|
1237
1278
|
break;
|
|
1238
1279
|
}
|
|
1239
|
-
return out.join('');
|
|
1280
|
+
return out === null ? sliceChars(start, i) : out.join('');
|
|
1240
1281
|
}
|
|
1241
1282
|
|
|
1242
1283
|
while (i < n) {
|
|
@@ -1523,10 +1564,10 @@ function lex(inputText, opts = {}) {
|
|
|
1523
1564
|
// "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
|
|
1524
1565
|
const tagChars = [];
|
|
1525
1566
|
let cc = peek();
|
|
1526
|
-
if (cc === null ||
|
|
1567
|
+
if (cc === null || !isAsciiAlpha(cc)) {
|
|
1527
1568
|
throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
|
|
1528
1569
|
}
|
|
1529
|
-
while ((cc = peek()) !== null &&
|
|
1570
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
1530
1571
|
tagChars.push(cc);
|
|
1531
1572
|
i++;
|
|
1532
1573
|
}
|
|
@@ -1550,7 +1591,7 @@ function lex(inputText, opts = {}) {
|
|
|
1550
1591
|
// Otherwise, treat as a directive (@prefix, @base)
|
|
1551
1592
|
const wordChars = [];
|
|
1552
1593
|
let cc;
|
|
1553
|
-
while ((cc = peek()) !== null &&
|
|
1594
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
1554
1595
|
wordChars.push(cc);
|
|
1555
1596
|
i++;
|
|
1556
1597
|
}
|
|
@@ -1562,19 +1603,19 @@ function lex(inputText, opts = {}) {
|
|
|
1562
1603
|
}
|
|
1563
1604
|
|
|
1564
1605
|
// 6) Numeric literal (integer or float)
|
|
1565
|
-
if (
|
|
1606
|
+
if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
|
|
1566
1607
|
const start = i;
|
|
1567
1608
|
const numChars = [c];
|
|
1568
1609
|
i++;
|
|
1569
1610
|
while (i < n) {
|
|
1570
1611
|
const cc = chars[i];
|
|
1571
|
-
if (
|
|
1612
|
+
if (isAsciiDigit(cc)) {
|
|
1572
1613
|
numChars.push(cc);
|
|
1573
1614
|
i++;
|
|
1574
1615
|
continue;
|
|
1575
1616
|
}
|
|
1576
1617
|
if (cc === '.') {
|
|
1577
|
-
if (i + 1 < n &&
|
|
1618
|
+
if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
|
|
1578
1619
|
numChars.push('.');
|
|
1579
1620
|
i++;
|
|
1580
1621
|
continue;
|
|
@@ -1589,14 +1630,14 @@ function lex(inputText, opts = {}) {
|
|
|
1589
1630
|
if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
|
|
1590
1631
|
let j = i + 1;
|
|
1591
1632
|
if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
|
|
1592
|
-
if (j < n &&
|
|
1633
|
+
if (j < n && isAsciiDigit(chars[j])) {
|
|
1593
1634
|
numChars.push(chars[i]); // e/E
|
|
1594
1635
|
i++;
|
|
1595
1636
|
if (i < n && (chars[i] === '+' || chars[i] === '-')) {
|
|
1596
1637
|
numChars.push(chars[i]);
|
|
1597
1638
|
i++;
|
|
1598
1639
|
}
|
|
1599
|
-
while (i < n &&
|
|
1640
|
+
while (i < n && isAsciiDigit(chars[i])) {
|
|
1600
1641
|
numChars.push(chars[i]);
|
|
1601
1642
|
i++;
|
|
1602
1643
|
}
|
package/lib/parser.js
CHANGED
|
@@ -86,7 +86,15 @@ class Parser {
|
|
|
86
86
|
}
|
|
87
87
|
|
|
88
88
|
isIdentKeyword(tok, keyword) {
|
|
89
|
-
|
|
89
|
+
if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
|
|
90
|
+
const v = tok.value;
|
|
91
|
+
if (v.length !== keyword.length) return false;
|
|
92
|
+
for (let i = 0; i < keyword.length; i++) {
|
|
93
|
+
const code = v.charCodeAt(i);
|
|
94
|
+
const lower = code >= 65 && code <= 90 ? code + 32 : code;
|
|
95
|
+
if (lower !== keyword.charCodeAt(i)) return false;
|
|
96
|
+
}
|
|
97
|
+
return true;
|
|
90
98
|
}
|
|
91
99
|
|
|
92
100
|
canStartSparqlPrefixDirective() {
|
package/lib/prelude.js
CHANGED
|
@@ -250,21 +250,40 @@ function literalParts(lit) {
|
|
|
250
250
|
// equality fast-paths than repeated string key construction.
|
|
251
251
|
|
|
252
252
|
let __nextTid = 1;
|
|
253
|
-
const __tidIntern = new Map(); //
|
|
253
|
+
const __tidIntern = new Map(); // legacy generic key -> number
|
|
254
|
+
const __iriTidIntern = new Map(); // IRI value -> number
|
|
255
|
+
const __blankTidIntern = new Map(); // blank label -> number
|
|
256
|
+
const __literalTidIntern = new Map(); // normalized literal lexical form -> number
|
|
254
257
|
|
|
255
258
|
// Avoid storing extremely large literal keys in the global term-id intern map.
|
|
256
259
|
// For huge literals we still assign a unique __tid, but we do not intern the key.
|
|
257
260
|
const MAX_LITERAL_TID_LEN = 1024;
|
|
258
261
|
|
|
259
|
-
function
|
|
260
|
-
let id =
|
|
262
|
+
function __getTidFromMap(map, key) {
|
|
263
|
+
let id = map.get(key);
|
|
261
264
|
if (!id) {
|
|
262
265
|
id = __nextTid++;
|
|
263
|
-
|
|
266
|
+
map.set(key, id);
|
|
264
267
|
}
|
|
265
268
|
return id;
|
|
266
269
|
}
|
|
267
270
|
|
|
271
|
+
function __getTid(key) {
|
|
272
|
+
return __getTidFromMap(__tidIntern, key);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function __getIriTid(value) {
|
|
276
|
+
return __getTidFromMap(__iriTidIntern, value);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function __getBlankTid(label) {
|
|
280
|
+
return __getTidFromMap(__blankTidIntern, label);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
function __getLiteralTid(norm) {
|
|
284
|
+
return __getTidFromMap(__literalTidIntern, norm);
|
|
285
|
+
}
|
|
286
|
+
|
|
268
287
|
function __isQuotedLexical(lit) {
|
|
269
288
|
if (typeof lit !== 'string') return false;
|
|
270
289
|
if (lit.length >= 6) {
|
|
@@ -310,6 +329,14 @@ function __isPlainStringLiteralValue(lit) {
|
|
|
310
329
|
function normalizeLiteralForTid(lit) {
|
|
311
330
|
// Canonicalize so that plain string and explicit xsd:string share the same id.
|
|
312
331
|
if (typeof lit !== 'string') return lit;
|
|
332
|
+
|
|
333
|
+
// Fast path for the overwhelmingly common lexer output for plain string
|
|
334
|
+
// literals: a canonical JSON-style quoted lexical form with no suffix.
|
|
335
|
+
// This avoids literalParts()/language-tag parsing for large fact tables.
|
|
336
|
+
if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
|
|
337
|
+
return `${lit}^^<${XSD_NS}string>`;
|
|
338
|
+
}
|
|
339
|
+
|
|
313
340
|
const [lex, dt] = literalParts(lit);
|
|
314
341
|
if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
|
|
315
342
|
if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
|
|
@@ -327,7 +354,7 @@ class Iri extends Term {
|
|
|
327
354
|
super();
|
|
328
355
|
this.value = value;
|
|
329
356
|
Object.defineProperty(this, '__tid', {
|
|
330
|
-
value:
|
|
357
|
+
value: __getIriTid(value),
|
|
331
358
|
enumerable: false,
|
|
332
359
|
});
|
|
333
360
|
}
|
|
@@ -339,7 +366,7 @@ class Literal extends Term {
|
|
|
339
366
|
this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
|
|
340
367
|
const norm = normalizeLiteralForTid(value);
|
|
341
368
|
const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
|
|
342
|
-
const tid = useIntern ?
|
|
369
|
+
const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
|
|
343
370
|
Object.defineProperty(this, '__tid', {
|
|
344
371
|
value: tid,
|
|
345
372
|
enumerable: false,
|
|
@@ -359,7 +386,7 @@ class Blank extends Term {
|
|
|
359
386
|
super();
|
|
360
387
|
this.label = label; // _:b1, etc.
|
|
361
388
|
Object.defineProperty(this, '__tid', {
|
|
362
|
-
value:
|
|
389
|
+
value: __getBlankTid(label),
|
|
363
390
|
enumerable: false,
|
|
364
391
|
});
|
|
365
392
|
}
|