eyeling 1.25.0 → 1.25.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/eyeling.browser.js +333 -110
- package/eyeling.js +333 -110
- package/lib/cli.js +1 -1
- package/lib/engine.js +131 -41
- package/lib/lexer.js +143 -49
- package/lib/parser.js +24 -12
- package/lib/prelude.js +34 -7
- package/package.json +1 -1
package/lib/lexer.js
CHANGED
|
@@ -30,7 +30,26 @@ class N3SyntaxError extends SyntaxError {
|
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
function isWs(c) {
|
|
33
|
-
|
|
33
|
+
if (c === null || c === undefined) return false;
|
|
34
|
+
const code = c.charCodeAt(0);
|
|
35
|
+
// Fast path for the whitespace used by N3/Turtle inputs.
|
|
36
|
+
return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d || code === 0x0c;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function isAsciiAlphaCode(code) {
|
|
40
|
+
return (code >= 65 && code <= 90) || (code >= 97 && code <= 122);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function isAsciiDigitCode(code) {
|
|
44
|
+
return code >= 48 && code <= 57;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function isAsciiAlpha(c) {
|
|
48
|
+
return c !== null && c !== undefined && isAsciiAlphaCode(c.charCodeAt(0));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function isAsciiDigit(c) {
|
|
52
|
+
return c !== null && c !== undefined && isAsciiDigitCode(c.charCodeAt(0));
|
|
34
53
|
}
|
|
35
54
|
|
|
36
55
|
// Turtle/N3 prefixed names (PNAME_*) allow many Unicode letters and certain
|
|
@@ -43,13 +62,18 @@ function isWs(c) {
|
|
|
43
62
|
//
|
|
44
63
|
// We implement a grammar-aligned matcher for PN_CHARS* and PLX fragments.
|
|
45
64
|
function isHexDigit(c) {
|
|
46
|
-
|
|
65
|
+
if (c === null || c === undefined) return false;
|
|
66
|
+
const code = c.charCodeAt(0);
|
|
67
|
+
return (code >= 48 && code <= 57) || (code >= 65 && code <= 70) || (code >= 97 && code <= 102);
|
|
47
68
|
}
|
|
48
69
|
|
|
49
70
|
function isPnCharsBase(c) {
|
|
50
71
|
// Approximation of PN_CHARS_BASE from the N3 grammar using Unicode properties.
|
|
51
72
|
// Covers most letters used in practice (including ñ) and common scripts.
|
|
52
|
-
|
|
73
|
+
if (c === null || c === undefined) return false;
|
|
74
|
+
const code = c.charCodeAt(0);
|
|
75
|
+
if (isAsciiAlphaCode(code)) return true;
|
|
76
|
+
return /\p{L}|\p{Nl}/u.test(c);
|
|
53
77
|
}
|
|
54
78
|
|
|
55
79
|
function isPnCharsU(c) {
|
|
@@ -59,9 +83,11 @@ function isPnCharsU(c) {
|
|
|
59
83
|
|
|
60
84
|
function isPnChars(c) {
|
|
61
85
|
// PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | U+00B7 | [U+0300-U+036F] | [U+203F-U+2040]
|
|
62
|
-
if (c === null) return false;
|
|
86
|
+
if (c === null || c === undefined) return false;
|
|
87
|
+
const code = c.charCodeAt(0);
|
|
88
|
+
if (isAsciiAlphaCode(code) || isAsciiDigitCode(code) || code === 95 || code === 45) return true;
|
|
63
89
|
if (isPnCharsU(c)) return true;
|
|
64
|
-
if (c === '
|
|
90
|
+
if (c === '\u00B7') return true;
|
|
65
91
|
const cp = c.codePointAt(0);
|
|
66
92
|
return (cp >= 0x0300 && cp <= 0x036f) || (cp >= 0x203f && cp <= 0x2040);
|
|
67
93
|
}
|
|
@@ -1163,10 +1189,23 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
1163
1189
|
return text;
|
|
1164
1190
|
}
|
|
1165
1191
|
|
|
1192
|
+
|
|
1193
|
+
function isNumericLikeIdentifier(word) {
|
|
1194
|
+
if (typeof word !== 'string' || word.length === 0) return false;
|
|
1195
|
+
for (let j = 0; j < word.length; j++) {
|
|
1196
|
+
const code = word.charCodeAt(j);
|
|
1197
|
+
if (!((code >= 48 && code <= 57) || code === 46 || code === 45)) return false;
|
|
1198
|
+
}
|
|
1199
|
+
return true;
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1166
1202
|
function lex(inputText, opts = {}) {
|
|
1167
1203
|
const rdf = !!(opts && opts.rdf);
|
|
1168
1204
|
if (rdf) inputText = normalizeRdfCompatibility(inputText);
|
|
1169
|
-
|
|
1205
|
+
// Avoid copying large ASCII/BMP inputs into an Array. Array.from() is
|
|
1206
|
+
// only needed when the text contains surrogate pairs and we want the old
|
|
1207
|
+
// code-point iteration behavior for non-BMP characters.
|
|
1208
|
+
const chars = /[\uD800-\uDFFF]/.test(inputText) ? Array.from(inputText) : inputText;
|
|
1170
1209
|
const n = chars.length;
|
|
1171
1210
|
let i = 0;
|
|
1172
1211
|
const tokens = [];
|
|
@@ -1182,19 +1221,29 @@ function lex(inputText, opts = {}) {
|
|
|
1182
1221
|
// - Accepts percent escapes (%HH) as PLX fragments.
|
|
1183
1222
|
// - Accepts PN_LOCAL_ESC backslash escapes and decodes them ("\\." -> ".").
|
|
1184
1223
|
// - Accepts '.' inside a name only when it is not terminal.
|
|
1224
|
+
function sliceChars(start, end) {
|
|
1225
|
+
return typeof chars === 'string' ? chars.slice(start, end) : chars.slice(start, end).join('');
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1185
1228
|
function readIdentText(startOffsetForErrors) {
|
|
1186
|
-
const
|
|
1229
|
+
const start = i;
|
|
1230
|
+
let out = null;
|
|
1231
|
+
|
|
1232
|
+
function appendRawUntilHere() {
|
|
1233
|
+
if (out === null) out = [sliceChars(start, i)];
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1187
1236
|
while (i < n) {
|
|
1188
|
-
const cc =
|
|
1189
|
-
if (cc === null || isWs(cc)) break;
|
|
1237
|
+
const cc = chars[i];
|
|
1238
|
+
if (cc === null || cc === undefined || isWs(cc)) break;
|
|
1190
1239
|
|
|
1191
1240
|
// Hard stops: delimiters cannot appear unescaped inside PNAME tokens.
|
|
1192
|
-
if ('{}()[]
|
|
1241
|
+
if (cc === '{' || cc === '}' || cc === '(' || cc === ')' || cc === '[' || cc === ']' || cc === ';' || cc === ',') break;
|
|
1193
1242
|
|
|
1194
1243
|
// Dot is allowed inside PN_LOCAL, but not at the end.
|
|
1195
1244
|
if (cc === '.') {
|
|
1196
1245
|
if (!canContinueAfterDot(peek(1))) break;
|
|
1197
|
-
out.push('.');
|
|
1246
|
+
if (out !== null) out.push('.');
|
|
1198
1247
|
i++;
|
|
1199
1248
|
continue;
|
|
1200
1249
|
}
|
|
@@ -1209,6 +1258,7 @@ function lex(inputText, opts = {}) {
|
|
|
1209
1258
|
typeof startOffsetForErrors === 'number' ? startOffsetForErrors : i,
|
|
1210
1259
|
);
|
|
1211
1260
|
}
|
|
1261
|
+
appendRawUntilHere();
|
|
1212
1262
|
out.push('%', h1, h2);
|
|
1213
1263
|
i += 3;
|
|
1214
1264
|
continue;
|
|
@@ -1218,6 +1268,7 @@ function lex(inputText, opts = {}) {
|
|
|
1218
1268
|
if (cc === '\\') {
|
|
1219
1269
|
const esc = peek(1);
|
|
1220
1270
|
if (esc !== null && PN_LOCAL_ESC_SET.has(esc)) {
|
|
1271
|
+
appendRawUntilHere();
|
|
1221
1272
|
out.push(esc); // decoded form
|
|
1222
1273
|
i += 2;
|
|
1223
1274
|
continue;
|
|
@@ -1229,14 +1280,14 @@ function lex(inputText, opts = {}) {
|
|
|
1229
1280
|
}
|
|
1230
1281
|
|
|
1231
1282
|
if (isIdentChar(cc)) {
|
|
1232
|
-
out.push(cc);
|
|
1283
|
+
if (out !== null) out.push(cc);
|
|
1233
1284
|
i++;
|
|
1234
1285
|
continue;
|
|
1235
1286
|
}
|
|
1236
1287
|
|
|
1237
1288
|
break;
|
|
1238
1289
|
}
|
|
1239
|
-
return out.join('');
|
|
1290
|
+
return out === null ? sliceChars(start, i) : out.join('');
|
|
1240
1291
|
}
|
|
1241
1292
|
|
|
1242
1293
|
while (i < n) {
|
|
@@ -1315,22 +1366,47 @@ function lex(inputText, opts = {}) {
|
|
|
1315
1366
|
continue;
|
|
1316
1367
|
}
|
|
1317
1368
|
|
|
1318
|
-
// 5) Single-character punctuation
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1369
|
+
// 5) Single-character punctuation. Use a switch rather than allocating a
|
|
1370
|
+
// mapping object for every punctuation token in large inputs.
|
|
1371
|
+
switch (c) {
|
|
1372
|
+
case '{':
|
|
1373
|
+
tokens.push(new Token('LBrace', null, i));
|
|
1374
|
+
i++;
|
|
1375
|
+
continue;
|
|
1376
|
+
case '}':
|
|
1377
|
+
tokens.push(new Token('RBrace', null, i));
|
|
1378
|
+
i++;
|
|
1379
|
+
continue;
|
|
1380
|
+
case '(':
|
|
1381
|
+
tokens.push(new Token('LParen', null, i));
|
|
1382
|
+
i++;
|
|
1383
|
+
continue;
|
|
1384
|
+
case ')':
|
|
1385
|
+
tokens.push(new Token('RParen', null, i));
|
|
1386
|
+
i++;
|
|
1387
|
+
continue;
|
|
1388
|
+
case '[':
|
|
1389
|
+
tokens.push(new Token('LBracket', null, i));
|
|
1390
|
+
i++;
|
|
1391
|
+
continue;
|
|
1392
|
+
case ']':
|
|
1393
|
+
tokens.push(new Token('RBracket', null, i));
|
|
1394
|
+
i++;
|
|
1395
|
+
continue;
|
|
1396
|
+
case ';':
|
|
1397
|
+
tokens.push(new Token('Semicolon', null, i));
|
|
1398
|
+
i++;
|
|
1399
|
+
continue;
|
|
1400
|
+
case ',':
|
|
1401
|
+
tokens.push(new Token('Comma', null, i));
|
|
1402
|
+
i++;
|
|
1403
|
+
continue;
|
|
1404
|
+
case '.':
|
|
1405
|
+
tokens.push(new Token('Dot', null, i));
|
|
1406
|
+
i++;
|
|
1407
|
+
continue;
|
|
1408
|
+
default:
|
|
1409
|
+
break;
|
|
1334
1410
|
}
|
|
1335
1411
|
|
|
1336
1412
|
// String literal: short "..." or long """..."""
|
|
@@ -1389,26 +1465,36 @@ function lex(inputText, opts = {}) {
|
|
|
1389
1465
|
continue;
|
|
1390
1466
|
}
|
|
1391
1467
|
|
|
1392
|
-
// Short string literal " ... "
|
|
1468
|
+
// Short string literal " ... ". Most data files contain plain
|
|
1469
|
+
// unescaped labels; keep that path slice-based and avoid building an
|
|
1470
|
+
// intermediate character array + raw quoted string.
|
|
1393
1471
|
i++; // consume opening "
|
|
1394
|
-
const
|
|
1472
|
+
const contentStart = i;
|
|
1473
|
+
let sChars = null;
|
|
1474
|
+
let closed = false;
|
|
1395
1475
|
while (i < n) {
|
|
1396
1476
|
const cc = chars[i];
|
|
1397
1477
|
i++;
|
|
1398
1478
|
if (cc === '\\') {
|
|
1479
|
+
if (sChars === null) sChars = [sliceChars(contentStart, i - 1)];
|
|
1399
1480
|
if (i < n) {
|
|
1400
1481
|
const esc = chars[i];
|
|
1401
1482
|
i++;
|
|
1402
1483
|
sChars.push('\\');
|
|
1403
1484
|
sChars.push(esc);
|
|
1485
|
+
} else {
|
|
1486
|
+
sChars.push('\\');
|
|
1404
1487
|
}
|
|
1405
1488
|
continue;
|
|
1406
1489
|
}
|
|
1407
|
-
if (cc === '"')
|
|
1408
|
-
|
|
1490
|
+
if (cc === '"') {
|
|
1491
|
+
closed = true;
|
|
1492
|
+
break;
|
|
1493
|
+
}
|
|
1494
|
+
if (sChars !== null) sChars.push(cc);
|
|
1409
1495
|
}
|
|
1410
|
-
const
|
|
1411
|
-
const decoded = decodeN3StringEscapes(
|
|
1496
|
+
const rawContent = sChars === null ? sliceChars(contentStart, closed ? i - 1 : i) : sChars.join('');
|
|
1497
|
+
const decoded = sChars === null ? rawContent : decodeN3StringEscapes(rawContent, start);
|
|
1412
1498
|
assertValidStringLiteralValue(decoded, start);
|
|
1413
1499
|
const s = JSON.stringify(decoded); // canonical short quoted form
|
|
1414
1500
|
tokens.push(new Token('Literal', s, start));
|
|
@@ -1473,24 +1559,32 @@ function lex(inputText, opts = {}) {
|
|
|
1473
1559
|
|
|
1474
1560
|
// Short string literal ' ... '
|
|
1475
1561
|
i++; // consume opening '
|
|
1476
|
-
const
|
|
1562
|
+
const contentStart = i;
|
|
1563
|
+
let sChars = null;
|
|
1564
|
+
let closed = false;
|
|
1477
1565
|
while (i < n) {
|
|
1478
1566
|
const cc = chars[i];
|
|
1479
1567
|
i++;
|
|
1480
1568
|
if (cc === '\\') {
|
|
1569
|
+
if (sChars === null) sChars = [sliceChars(contentStart, i - 1)];
|
|
1481
1570
|
if (i < n) {
|
|
1482
1571
|
const esc = chars[i];
|
|
1483
1572
|
i++;
|
|
1484
1573
|
sChars.push('\\');
|
|
1485
1574
|
sChars.push(esc);
|
|
1575
|
+
} else {
|
|
1576
|
+
sChars.push('\\');
|
|
1486
1577
|
}
|
|
1487
1578
|
continue;
|
|
1488
1579
|
}
|
|
1489
|
-
if (cc === "'")
|
|
1490
|
-
|
|
1580
|
+
if (cc === "'") {
|
|
1581
|
+
closed = true;
|
|
1582
|
+
break;
|
|
1583
|
+
}
|
|
1584
|
+
if (sChars !== null) sChars.push(cc);
|
|
1491
1585
|
}
|
|
1492
|
-
const
|
|
1493
|
-
const decoded = decodeN3StringEscapes(
|
|
1586
|
+
const rawContent = sChars === null ? sliceChars(contentStart, closed ? i - 1 : i) : sChars.join('');
|
|
1587
|
+
const decoded = sChars === null ? rawContent : decodeN3StringEscapes(rawContent, start);
|
|
1494
1588
|
assertValidStringLiteralValue(decoded, start);
|
|
1495
1589
|
const s = JSON.stringify(decoded); // canonical short quoted form
|
|
1496
1590
|
tokens.push(new Token('Literal', s, start));
|
|
@@ -1523,10 +1617,10 @@ function lex(inputText, opts = {}) {
|
|
|
1523
1617
|
// "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
|
|
1524
1618
|
const tagChars = [];
|
|
1525
1619
|
let cc = peek();
|
|
1526
|
-
if (cc === null ||
|
|
1620
|
+
if (cc === null || !isAsciiAlpha(cc)) {
|
|
1527
1621
|
throw new N3SyntaxError("Invalid language tag (expected [A-Za-z] after '@')", start);
|
|
1528
1622
|
}
|
|
1529
|
-
while ((cc = peek()) !== null &&
|
|
1623
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
1530
1624
|
tagChars.push(cc);
|
|
1531
1625
|
i++;
|
|
1532
1626
|
}
|
|
@@ -1550,7 +1644,7 @@ function lex(inputText, opts = {}) {
|
|
|
1550
1644
|
// Otherwise, treat as a directive (@prefix, @base)
|
|
1551
1645
|
const wordChars = [];
|
|
1552
1646
|
let cc;
|
|
1553
|
-
while ((cc = peek()) !== null &&
|
|
1647
|
+
while ((cc = peek()) !== null && isAsciiAlpha(cc)) {
|
|
1554
1648
|
wordChars.push(cc);
|
|
1555
1649
|
i++;
|
|
1556
1650
|
}
|
|
@@ -1562,19 +1656,19 @@ function lex(inputText, opts = {}) {
|
|
|
1562
1656
|
}
|
|
1563
1657
|
|
|
1564
1658
|
// 6) Numeric literal (integer or float)
|
|
1565
|
-
if (
|
|
1659
|
+
if (isAsciiDigit(c) || (c === '-' && peek(1) !== null && isAsciiDigit(peek(1)))) {
|
|
1566
1660
|
const start = i;
|
|
1567
1661
|
const numChars = [c];
|
|
1568
1662
|
i++;
|
|
1569
1663
|
while (i < n) {
|
|
1570
1664
|
const cc = chars[i];
|
|
1571
|
-
if (
|
|
1665
|
+
if (isAsciiDigit(cc)) {
|
|
1572
1666
|
numChars.push(cc);
|
|
1573
1667
|
i++;
|
|
1574
1668
|
continue;
|
|
1575
1669
|
}
|
|
1576
1670
|
if (cc === '.') {
|
|
1577
|
-
if (i + 1 < n &&
|
|
1671
|
+
if (i + 1 < n && isAsciiDigit(chars[i + 1])) {
|
|
1578
1672
|
numChars.push('.');
|
|
1579
1673
|
i++;
|
|
1580
1674
|
continue;
|
|
@@ -1589,14 +1683,14 @@ function lex(inputText, opts = {}) {
|
|
|
1589
1683
|
if (i < n && (chars[i] === 'e' || chars[i] === 'E')) {
|
|
1590
1684
|
let j = i + 1;
|
|
1591
1685
|
if (j < n && (chars[j] === '+' || chars[j] === '-')) j++;
|
|
1592
|
-
if (j < n &&
|
|
1686
|
+
if (j < n && isAsciiDigit(chars[j])) {
|
|
1593
1687
|
numChars.push(chars[i]); // e/E
|
|
1594
1688
|
i++;
|
|
1595
1689
|
if (i < n && (chars[i] === '+' || chars[i] === '-')) {
|
|
1596
1690
|
numChars.push(chars[i]);
|
|
1597
1691
|
i++;
|
|
1598
1692
|
}
|
|
1599
|
-
while (i < n &&
|
|
1693
|
+
while (i < n && isAsciiDigit(chars[i])) {
|
|
1600
1694
|
numChars.push(chars[i]);
|
|
1601
1695
|
i++;
|
|
1602
1696
|
}
|
|
@@ -1615,7 +1709,7 @@ function lex(inputText, opts = {}) {
|
|
|
1615
1709
|
}
|
|
1616
1710
|
if (word === 'true' || word === 'false') {
|
|
1617
1711
|
tokens.push(new Token('Literal', word, start));
|
|
1618
|
-
} else if (
|
|
1712
|
+
} else if (isNumericLikeIdentifier(word)) {
|
|
1619
1713
|
tokens.push(new Token('Literal', word, start));
|
|
1620
1714
|
} else {
|
|
1621
1715
|
tokens.push(new Token('Ident', word, start));
|
package/lib/parser.js
CHANGED
|
@@ -86,7 +86,15 @@ class Parser {
|
|
|
86
86
|
}
|
|
87
87
|
|
|
88
88
|
isIdentKeyword(tok, keyword) {
|
|
89
|
-
|
|
89
|
+
if (!tok || tok.typ !== 'Ident' || typeof tok.value !== 'string') return false;
|
|
90
|
+
const v = tok.value;
|
|
91
|
+
if (v.length !== keyword.length) return false;
|
|
92
|
+
for (let i = 0; i < keyword.length; i++) {
|
|
93
|
+
const code = v.charCodeAt(i);
|
|
94
|
+
const lower = code >= 65 && code <= 90 ? code + 32 : code;
|
|
95
|
+
if (lower !== keyword.charCodeAt(i)) return false;
|
|
96
|
+
}
|
|
97
|
+
return true;
|
|
90
98
|
}
|
|
91
99
|
|
|
92
100
|
canStartSparqlPrefixDirective() {
|
|
@@ -233,7 +241,7 @@ class Parser {
|
|
|
233
241
|
} else if (tok2.typ === 'Ident') {
|
|
234
242
|
const qn = tok2.value || '';
|
|
235
243
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok2, qn);
|
|
236
|
-
assertValidQNamePrefix(qn.
|
|
244
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok2, '@prefix directive IRI');
|
|
237
245
|
iri = this.prefixes.expandQName(qn);
|
|
238
246
|
} else {
|
|
239
247
|
this.fail(`Expected IRI after @prefix, got ${tok2.toString()}`, tok2);
|
|
@@ -250,7 +258,7 @@ class Parser {
|
|
|
250
258
|
} else if (tok.typ === 'Ident') {
|
|
251
259
|
const qn = tok.value || '';
|
|
252
260
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok, qn);
|
|
253
|
-
assertValidQNamePrefix(qn.
|
|
261
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok, '@base directive IRI');
|
|
254
262
|
iri = this.prefixes.expandQName(qn);
|
|
255
263
|
} else {
|
|
256
264
|
this.fail(`Expected IRI after @base, got ${tok.toString()}`, tok);
|
|
@@ -279,7 +287,7 @@ class Parser {
|
|
|
279
287
|
} else if (tok2.typ === 'Ident') {
|
|
280
288
|
const qn = tok2.value || '';
|
|
281
289
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok2, qn);
|
|
282
|
-
assertValidQNamePrefix(qn.
|
|
290
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok2, '@prefix directive IRI');
|
|
283
291
|
iri = this.prefixes.expandQName(qn);
|
|
284
292
|
} else {
|
|
285
293
|
this.fail(`Expected IRI after PREFIX, got ${tok2.toString()}`, tok2);
|
|
@@ -300,7 +308,7 @@ class Parser {
|
|
|
300
308
|
} else if (tok.typ === 'Ident') {
|
|
301
309
|
const qn = tok.value || '';
|
|
302
310
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), tok, qn);
|
|
303
|
-
assertValidQNamePrefix(qn.
|
|
311
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), tok, 'BASE directive IRI');
|
|
304
312
|
iri = this.prefixes.expandQName(qn);
|
|
305
313
|
} else {
|
|
306
314
|
this.fail(`Expected IRI after BASE, got ${tok.toString()}`, tok);
|
|
@@ -347,14 +355,18 @@ class Parser {
|
|
|
347
355
|
const name = val || '';
|
|
348
356
|
if (name === 'a') {
|
|
349
357
|
return internIri(RDF_NS + 'type');
|
|
350
|
-
}
|
|
358
|
+
}
|
|
359
|
+
const sep = name.indexOf(':');
|
|
360
|
+
if (sep === 1 && name.charCodeAt(0) === 95) {
|
|
351
361
|
return new Blank(name);
|
|
352
|
-
} else if (name.includes(':')) {
|
|
353
|
-
assertValidQNamePrefix(name.split(':', 1)[0], this.fail.bind(this), tok);
|
|
354
|
-
return internIri(this.prefixes.expandQName(name));
|
|
355
|
-
} else {
|
|
356
|
-
failInvalidKeywordLikeIdent(this.fail.bind(this), tok, name);
|
|
357
362
|
}
|
|
363
|
+
if (sep >= 0) {
|
|
364
|
+
const prefixName = name.slice(0, sep);
|
|
365
|
+
assertValidQNamePrefix(prefixName, this.fail.bind(this), tok);
|
|
366
|
+
const base = this.prefixes.map[prefixName] || '';
|
|
367
|
+
return internIri(base ? base + name.slice(sep + 1) : name);
|
|
368
|
+
}
|
|
369
|
+
failInvalidKeywordLikeIdent(this.fail.bind(this), tok, name);
|
|
358
370
|
}
|
|
359
371
|
|
|
360
372
|
if (typ === 'Literal') {
|
|
@@ -385,7 +397,7 @@ class Parser {
|
|
|
385
397
|
} else if (dtTok.typ === 'Ident') {
|
|
386
398
|
const qn = dtTok.value || '';
|
|
387
399
|
if (!qn.includes(':')) failInvalidKeywordLikeIdent(this.fail.bind(this), dtTok, qn);
|
|
388
|
-
assertValidQNamePrefix(qn.
|
|
400
|
+
assertValidQNamePrefix(qn.slice(0, qn.indexOf(':')), this.fail.bind(this), dtTok, 'datatype prefixed name');
|
|
389
401
|
dtIri = this.prefixes.expandQName(qn);
|
|
390
402
|
} else {
|
|
391
403
|
this.fail(`Expected datatype after ^^, got ${dtTok.toString()}`, dtTok);
|
package/lib/prelude.js
CHANGED
|
@@ -250,21 +250,40 @@ function literalParts(lit) {
|
|
|
250
250
|
// equality fast-paths than repeated string key construction.
|
|
251
251
|
|
|
252
252
|
let __nextTid = 1;
|
|
253
|
-
const __tidIntern = new Map(); //
|
|
253
|
+
const __tidIntern = new Map(); // legacy generic key -> number
|
|
254
|
+
const __iriTidIntern = new Map(); // IRI value -> number
|
|
255
|
+
const __blankTidIntern = new Map(); // blank label -> number
|
|
256
|
+
const __literalTidIntern = new Map(); // normalized literal lexical form -> number
|
|
254
257
|
|
|
255
258
|
// Avoid storing extremely large literal keys in the global term-id intern map.
|
|
256
259
|
// For huge literals we still assign a unique __tid, but we do not intern the key.
|
|
257
260
|
const MAX_LITERAL_TID_LEN = 1024;
|
|
258
261
|
|
|
259
|
-
function
|
|
260
|
-
let id =
|
|
262
|
+
function __getTidFromMap(map, key) {
|
|
263
|
+
let id = map.get(key);
|
|
261
264
|
if (!id) {
|
|
262
265
|
id = __nextTid++;
|
|
263
|
-
|
|
266
|
+
map.set(key, id);
|
|
264
267
|
}
|
|
265
268
|
return id;
|
|
266
269
|
}
|
|
267
270
|
|
|
271
|
+
function __getTid(key) {
|
|
272
|
+
return __getTidFromMap(__tidIntern, key);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function __getIriTid(value) {
|
|
276
|
+
return __getTidFromMap(__iriTidIntern, value);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function __getBlankTid(label) {
|
|
280
|
+
return __getTidFromMap(__blankTidIntern, label);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
function __getLiteralTid(norm) {
|
|
284
|
+
return __getTidFromMap(__literalTidIntern, norm);
|
|
285
|
+
}
|
|
286
|
+
|
|
268
287
|
function __isQuotedLexical(lit) {
|
|
269
288
|
if (typeof lit !== 'string') return false;
|
|
270
289
|
if (lit.length >= 6) {
|
|
@@ -310,6 +329,14 @@ function __isPlainStringLiteralValue(lit) {
|
|
|
310
329
|
function normalizeLiteralForTid(lit) {
|
|
311
330
|
// Canonicalize so that plain string and explicit xsd:string share the same id.
|
|
312
331
|
if (typeof lit !== 'string') return lit;
|
|
332
|
+
|
|
333
|
+
// Fast path for the overwhelmingly common lexer output for plain string
|
|
334
|
+
// literals: a canonical JSON-style quoted lexical form with no suffix.
|
|
335
|
+
// This avoids literalParts()/language-tag parsing for large fact tables.
|
|
336
|
+
if (lit.length >= 2 && lit.charCodeAt(0) === 34 && lit.charCodeAt(lit.length - 1) === 34 && lit.indexOf('^^') < 0) {
|
|
337
|
+
return `${lit}^^<${XSD_NS}string>`;
|
|
338
|
+
}
|
|
339
|
+
|
|
313
340
|
const [lex, dt] = literalParts(lit);
|
|
314
341
|
if (dt === XSD_NS + 'string') return `${lex}^^<${XSD_NS}string>`;
|
|
315
342
|
if (dt === null && __isPlainStringLiteralValue(lit)) return `${lex}^^<${XSD_NS}string>`;
|
|
@@ -327,7 +354,7 @@ class Iri extends Term {
|
|
|
327
354
|
super();
|
|
328
355
|
this.value = value;
|
|
329
356
|
Object.defineProperty(this, '__tid', {
|
|
330
|
-
value:
|
|
357
|
+
value: __getIriTid(value),
|
|
331
358
|
enumerable: false,
|
|
332
359
|
});
|
|
333
360
|
}
|
|
@@ -339,7 +366,7 @@ class Literal extends Term {
|
|
|
339
366
|
this.value = value; // raw lexical form, e.g. "foo", 12, true, or "\"1944-08-21\"^^..."
|
|
340
367
|
const norm = normalizeLiteralForTid(value);
|
|
341
368
|
const useIntern = typeof norm === 'string' && norm.length <= MAX_LITERAL_TID_LEN;
|
|
342
|
-
const tid = useIntern ?
|
|
369
|
+
const tid = useIntern ? __getLiteralTid(norm) : __nextTid++;
|
|
343
370
|
Object.defineProperty(this, '__tid', {
|
|
344
371
|
value: tid,
|
|
345
372
|
enumerable: false,
|
|
@@ -359,7 +386,7 @@ class Blank extends Term {
|
|
|
359
386
|
super();
|
|
360
387
|
this.label = label; // _:b1, etc.
|
|
361
388
|
Object.defineProperty(this, '__tid', {
|
|
362
|
-
value:
|
|
389
|
+
value: __getBlankTid(label),
|
|
363
390
|
enumerable: false,
|
|
364
391
|
});
|
|
365
392
|
}
|