eyeling 1.5.25 → 1.5.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/eyeling.js +79 -6
- package/package.json +1 -1
package/eyeling.js
CHANGED
|
@@ -318,9 +318,47 @@ function lex(inputText) {
|
|
|
318
318
|
continue;
|
|
319
319
|
}
|
|
320
320
|
|
|
321
|
-
// Directives: @prefix, @base
|
|
321
|
+
// Directives: @prefix, @base (and language tags after string literals)
|
|
322
322
|
if (c === "@") {
|
|
323
|
-
|
|
323
|
+
const prevTok = tokens.length ? tokens[tokens.length - 1] : null;
|
|
324
|
+
const prevWasQuotedLiteral =
|
|
325
|
+
prevTok &&
|
|
326
|
+
prevTok.typ === "Literal" &&
|
|
327
|
+
typeof prevTok.value === "string" &&
|
|
328
|
+
prevTok.value.startsWith('"');
|
|
329
|
+
|
|
330
|
+
i++; // consume '@'
|
|
331
|
+
|
|
332
|
+
if (prevWasQuotedLiteral) {
|
|
333
|
+
// N3 grammar production LANGTAG:
|
|
334
|
+
// "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
|
|
335
|
+
const tagChars = [];
|
|
336
|
+
let cc = peek();
|
|
337
|
+
if (cc === null || !/[A-Za-z]/.test(cc)) {
|
|
338
|
+
throw new Error("Invalid language tag (expected [A-Za-z] after '@')");
|
|
339
|
+
}
|
|
340
|
+
while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
|
|
341
|
+
tagChars.push(cc);
|
|
342
|
+
i++;
|
|
343
|
+
}
|
|
344
|
+
while (peek() === "-") {
|
|
345
|
+
tagChars.push("-");
|
|
346
|
+
i++; // consume '-'
|
|
347
|
+
const segChars = [];
|
|
348
|
+
while ((cc = peek()) !== null && /[A-Za-z0-9]/.test(cc)) {
|
|
349
|
+
segChars.push(cc);
|
|
350
|
+
i++;
|
|
351
|
+
}
|
|
352
|
+
if (!segChars.length) {
|
|
353
|
+
throw new Error("Invalid language tag (expected [A-Za-z0-9]+ after '-')");
|
|
354
|
+
}
|
|
355
|
+
tagChars.push(...segChars);
|
|
356
|
+
}
|
|
357
|
+
tokens.push(new Token("LangTag", tagChars.join("")));
|
|
358
|
+
continue;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Otherwise, treat as a directive (@prefix, @base)
|
|
324
362
|
const wordChars = [];
|
|
325
363
|
let cc;
|
|
326
364
|
while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
|
|
@@ -684,6 +722,23 @@ class Parser {
|
|
|
684
722
|
|
|
685
723
|
if (typ === "Literal") {
|
|
686
724
|
let s = val || "";
|
|
725
|
+
|
|
726
|
+
// Optional language tag: "..."@en, per N3 LANGTAG production.
|
|
727
|
+
if (this.peek().typ === "LangTag") {
|
|
728
|
+
// Only quoted string literals can carry a language tag.
|
|
729
|
+
if (!(s.startsWith('"') && s.endsWith('"'))) {
|
|
730
|
+
throw new Error("Language tag is only allowed on quoted string literals");
|
|
731
|
+
}
|
|
732
|
+
const langTok = this.next();
|
|
733
|
+
const lang = langTok.value || "";
|
|
734
|
+
s = `${s}@${lang}`;
|
|
735
|
+
|
|
736
|
+
// N3/Turtle: language tags and datatypes are mutually exclusive.
|
|
737
|
+
if (this.peek().typ === "HatHat") {
|
|
738
|
+
throw new Error("A literal cannot have both a language tag (@...) and a datatype (^^...)");
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
|
|
687
742
|
if (this.peek().typ === "HatHat") {
|
|
688
743
|
this.next();
|
|
689
744
|
const dtTok = this.next();
|
|
@@ -1594,16 +1649,34 @@ function composeSubst(outer, delta) {
|
|
|
1594
1649
|
// ============================================================================
|
|
1595
1650
|
|
|
1596
1651
|
function literalParts(lit) {
|
|
1652
|
+
// Split a literal into lexical form and datatype IRI (if any).
|
|
1653
|
+
// Also strip an optional language tag from the lexical form:
|
|
1654
|
+
// "\"hello\"@en" -> "\"hello\""
|
|
1655
|
+
// "\"hello\"@en^^<...>" is rejected earlier in the parser.
|
|
1597
1656
|
const idx = lit.indexOf("^^");
|
|
1657
|
+
let lex = lit;
|
|
1658
|
+
let dt = null;
|
|
1659
|
+
|
|
1598
1660
|
if (idx >= 0) {
|
|
1599
|
-
|
|
1600
|
-
|
|
1661
|
+
lex = lit.slice(0, idx);
|
|
1662
|
+
dt = lit.slice(idx + 2).trim();
|
|
1601
1663
|
if (dt.startsWith("<") && dt.endsWith(">")) {
|
|
1602
1664
|
dt = dt.slice(1, -1);
|
|
1603
1665
|
}
|
|
1604
|
-
return [lex, dt];
|
|
1605
1666
|
}
|
|
1606
|
-
|
|
1667
|
+
|
|
1668
|
+
// Strip LANGTAG from the lexical form when present.
|
|
1669
|
+
if (lex.length >= 2 && lex[0] === '"') {
|
|
1670
|
+
const lastQuote = lex.lastIndexOf('"');
|
|
1671
|
+
if (lastQuote > 0 && lastQuote < lex.length - 1 && lex[lastQuote + 1] === "@") {
|
|
1672
|
+
const lang = lex.slice(lastQuote + 2);
|
|
1673
|
+
if (/^[A-Za-z]+(?:-[A-Za-z0-9]+)*$/.test(lang)) {
|
|
1674
|
+
lex = lex.slice(0, lastQuote + 1);
|
|
1675
|
+
}
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
return [lex, dt];
|
|
1607
1680
|
}
|
|
1608
1681
|
|
|
1609
1682
|
function stripQuotes(lex) {
|