eyeling 1.5.25 → 1.5.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/eyeling.js +116 -7
- package/package.json +1 -1
- package/test/api.test.js +15 -0
package/eyeling.js
CHANGED
|
@@ -280,8 +280,41 @@ function lex(inputText) {
|
|
|
280
280
|
continue;
|
|
281
281
|
}
|
|
282
282
|
|
|
283
|
-
// String literal
|
|
283
|
+
// String literal: short "..." or long """..."""
|
|
284
284
|
if (c === '"') {
|
|
285
|
+
// Long string literal """ ... """
|
|
286
|
+
if (peek(1) === '"' && peek(2) === '"') {
|
|
287
|
+
i += 3; // consume opening """
|
|
288
|
+
const sChars = [];
|
|
289
|
+
let closed = false;
|
|
290
|
+
while (i < n) {
|
|
291
|
+
// closing delimiter?
|
|
292
|
+
if (peek() === '"' && peek(1) === '"' && peek(2) === '"') {
|
|
293
|
+
i += 3; // consume closing """
|
|
294
|
+
closed = true;
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
let cc = chars[i];
|
|
298
|
+
i++;
|
|
299
|
+
if (cc === "\\") {
|
|
300
|
+
// Preserve escapes verbatim (same behavior as short strings)
|
|
301
|
+
if (i < n) {
|
|
302
|
+
const esc = chars[i];
|
|
303
|
+
i++;
|
|
304
|
+
sChars.push("\\");
|
|
305
|
+
sChars.push(esc);
|
|
306
|
+
}
|
|
307
|
+
continue;
|
|
308
|
+
}
|
|
309
|
+
sChars.push(cc);
|
|
310
|
+
}
|
|
311
|
+
if (!closed) throw new Error('Unterminated long string literal """..."""');
|
|
312
|
+
const s = '"""' + sChars.join("") + '"""';
|
|
313
|
+
tokens.push(new Token("Literal", s));
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Short string literal " ... "
|
|
285
318
|
i++; // consume opening "
|
|
286
319
|
const sChars = [];
|
|
287
320
|
while (i < n) {
|
|
@@ -318,9 +351,47 @@ function lex(inputText) {
|
|
|
318
351
|
continue;
|
|
319
352
|
}
|
|
320
353
|
|
|
321
|
-
// Directives: @prefix, @base
|
|
354
|
+
// Directives: @prefix, @base (and language tags after string literals)
|
|
322
355
|
if (c === "@") {
|
|
323
|
-
|
|
356
|
+
const prevTok = tokens.length ? tokens[tokens.length - 1] : null;
|
|
357
|
+
const prevWasQuotedLiteral =
|
|
358
|
+
prevTok &&
|
|
359
|
+
prevTok.typ === "Literal" &&
|
|
360
|
+
typeof prevTok.value === "string" &&
|
|
361
|
+
prevTok.value.startsWith('"');
|
|
362
|
+
|
|
363
|
+
i++; // consume '@'
|
|
364
|
+
|
|
365
|
+
if (prevWasQuotedLiteral) {
|
|
366
|
+
// N3 grammar production LANGTAG:
|
|
367
|
+
// "@" [a-zA-Z]+ ("-" [a-zA-Z0-9]+)*
|
|
368
|
+
const tagChars = [];
|
|
369
|
+
let cc = peek();
|
|
370
|
+
if (cc === null || !/[A-Za-z]/.test(cc)) {
|
|
371
|
+
throw new Error("Invalid language tag (expected [A-Za-z] after '@')");
|
|
372
|
+
}
|
|
373
|
+
while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
|
|
374
|
+
tagChars.push(cc);
|
|
375
|
+
i++;
|
|
376
|
+
}
|
|
377
|
+
while (peek() === "-") {
|
|
378
|
+
tagChars.push("-");
|
|
379
|
+
i++; // consume '-'
|
|
380
|
+
const segChars = [];
|
|
381
|
+
while ((cc = peek()) !== null && /[A-Za-z0-9]/.test(cc)) {
|
|
382
|
+
segChars.push(cc);
|
|
383
|
+
i++;
|
|
384
|
+
}
|
|
385
|
+
if (!segChars.length) {
|
|
386
|
+
throw new Error("Invalid language tag (expected [A-Za-z0-9]+ after '-')");
|
|
387
|
+
}
|
|
388
|
+
tagChars.push(...segChars);
|
|
389
|
+
}
|
|
390
|
+
tokens.push(new Token("LangTag", tagChars.join("")));
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// Otherwise, treat as a directive (@prefix, @base)
|
|
324
395
|
const wordChars = [];
|
|
325
396
|
let cc;
|
|
326
397
|
while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
|
|
@@ -684,6 +755,23 @@ class Parser {
|
|
|
684
755
|
|
|
685
756
|
if (typ === "Literal") {
|
|
686
757
|
let s = val || "";
|
|
758
|
+
|
|
759
|
+
// Optional language tag: "..."@en, per N3 LANGTAG production.
|
|
760
|
+
if (this.peek().typ === "LangTag") {
|
|
761
|
+
// Only quoted string literals can carry a language tag.
|
|
762
|
+
if (!(s.startsWith('"') && s.endsWith('"'))) {
|
|
763
|
+
throw new Error("Language tag is only allowed on quoted string literals");
|
|
764
|
+
}
|
|
765
|
+
const langTok = this.next();
|
|
766
|
+
const lang = langTok.value || "";
|
|
767
|
+
s = `${s}@${lang}`;
|
|
768
|
+
|
|
769
|
+
// N3/Turtle: language tags and datatypes are mutually exclusive.
|
|
770
|
+
if (this.peek().typ === "HatHat") {
|
|
771
|
+
throw new Error("A literal cannot have both a language tag (@...) and a datatype (^^...)");
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
|
|
687
775
|
if (this.peek().typ === "HatHat") {
|
|
688
776
|
this.next();
|
|
689
777
|
const dtTok = this.next();
|
|
@@ -1594,19 +1682,40 @@ function composeSubst(outer, delta) {
|
|
|
1594
1682
|
// ============================================================================
|
|
1595
1683
|
|
|
1596
1684
|
function literalParts(lit) {
|
|
1685
|
+
// Split a literal into lexical form and datatype IRI (if any).
|
|
1686
|
+
// Also strip an optional language tag from the lexical form:
|
|
1687
|
+
// "\"hello\"@en" -> "\"hello\""
|
|
1688
|
+
// "\"hello\"@en^^<...>" is rejected earlier in the parser.
|
|
1597
1689
|
const idx = lit.indexOf("^^");
|
|
1690
|
+
let lex = lit;
|
|
1691
|
+
let dt = null;
|
|
1692
|
+
|
|
1598
1693
|
if (idx >= 0) {
|
|
1599
|
-
|
|
1600
|
-
|
|
1694
|
+
lex = lit.slice(0, idx);
|
|
1695
|
+
dt = lit.slice(idx + 2).trim();
|
|
1601
1696
|
if (dt.startsWith("<") && dt.endsWith(">")) {
|
|
1602
1697
|
dt = dt.slice(1, -1);
|
|
1603
1698
|
}
|
|
1604
|
-
return [lex, dt];
|
|
1605
1699
|
}
|
|
1606
|
-
|
|
1700
|
+
|
|
1701
|
+
// Strip LANGTAG from the lexical form when present.
|
|
1702
|
+
if (lex.length >= 2 && lex[0] === '"') {
|
|
1703
|
+
const lastQuote = lex.lastIndexOf('"');
|
|
1704
|
+
if (lastQuote > 0 && lastQuote < lex.length - 1 && lex[lastQuote + 1] === "@") {
|
|
1705
|
+
const lang = lex.slice(lastQuote + 2);
|
|
1706
|
+
if (/^[A-Za-z]+(?:-[A-Za-z0-9]+)*$/.test(lang)) {
|
|
1707
|
+
lex = lex.slice(0, lastQuote + 1);
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
return [lex, dt];
|
|
1607
1713
|
}
|
|
1608
1714
|
|
|
1609
1715
|
function stripQuotes(lex) {
|
|
1716
|
+
if (lex.length >= 6 && lex.startsWith('"""') && lex.endsWith('"""')) {
|
|
1717
|
+
return lex.slice(3, -3);
|
|
1718
|
+
}
|
|
1610
1719
|
if (lex.length >= 2 && lex[0] === '"' && lex[lex.length - 1] === '"') {
|
|
1611
1720
|
return lex.slice(1, -1);
|
|
1612
1721
|
}
|
package/package.json
CHANGED
package/test/api.test.js
CHANGED
|
@@ -621,6 +621,21 @@ ${U('s')} ${U('p')} ${U('o')}. # another trailing comment
|
|
|
621
621
|
mustOccurExactly(out, reD, 1, 'diamond subclass should not duplicate x type D');
|
|
622
622
|
},
|
|
623
623
|
},
|
|
624
|
+
|
|
625
|
+
{
|
|
626
|
+
name: '42 literals: language tags are accepted and preserved',
|
|
627
|
+
opt: { proofComments: false },
|
|
628
|
+
input: ` { ?s ${U('p')} ?o } => { ?s ${U('q')} ?o }. ${U('s')} ${U('p')} "colour"@en-GB.`,
|
|
629
|
+
expect: [new RegExp(`${EX}s>\\s+<${EX}q>\\s+"colour"@en-GB\\s*\\.`)],
|
|
630
|
+
},
|
|
631
|
+
|
|
632
|
+
{
|
|
633
|
+
name: '43 literals: long """...""" strings are accepted (with lang tag)',
|
|
634
|
+
opt: { proofComments: false },
|
|
635
|
+
input: ` { ?s ${U('p')} ?o } => { ?s ${U('q')} ?o }. ${U('s')} ${U('p')} """Hello
|
|
636
|
+
world"""@en.`,
|
|
637
|
+
expect: [new RegExp(`${EX}s>\\s+<${EX}q>\\s+(?:"""Hello[\\s\\S]*?world"""@en|"Hello\\\\nworld"@en)\\s*\\.`)],
|
|
638
|
+
},
|
|
624
639
|
];
|
|
625
640
|
|
|
626
641
|
let passed = 0;
|