eyeling 1.24.3 → 1.24.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/tools/n3gen.js DELETED
@@ -1,2166 +0,0 @@
1
- #!/usr/bin/env node
2
- 'use strict';
3
-
4
- /*
5
- * n3gen.js — Convert Turtle (.ttl) or TriG (.trig) to N3.
6
- *
7
- * This tool always emits N3 to stdout. The input syntax is selected by the file
8
- * extension:
9
- * - .ttl (RDF 1.2 Turtle)
10
- * - .trig (RDF 1.2 TriG)
11
- * *
12
- * TriG → N3 mapping (named graphs)
13
- * TriG: <graphName> { ...triples... }
14
- * N3: <graphName> log:nameOf { ...triples... } .
15
- *
16
- *
17
- * RDF 1.2 Turtle-star / TriG-star
18
- * - triple terms: log:nameOf <<( s p o )>>
19
- * - sugar form: << s p o >> :is true .
20
- * triple terms are emitted as singleton graph terms in N3:
21
- * log:nameOf { s p o . } .
22
- *
23
- * ----------------------------------------------------------------------------
24
- * Usage
25
- * n3gen file.ttl > file.n3
26
- * n3gen file.trig > file.n3
27
- */
28
-
29
- const fs = require('node:fs/promises');
30
- const path = require('node:path');
31
- const process = require('node:process');
32
-
33
- const crypto = require('node:crypto');
34
-
35
- function stripIriRef(s) {
36
- // Allow passing an IRIREF like <...>
37
- if (typeof s !== 'string') return '';
38
- s = s.trim();
39
- if (s.startsWith('<') && s.endsWith('>')) return s.slice(1, -1);
40
- return s;
41
- }
42
-
43
- function normalizeSkolemRoot(root) {
44
- root = stripIriRef(root);
45
- if (!root) return '';
46
- // Ensure it ends with '/.well-known/genid/' OR at least with '/'
47
- if (!root.endsWith('/')) root += '/';
48
- return root;
49
- }
50
-
51
- // Skolemization (Option C)
52
- //
53
- // We mint recognizable Skolem IRIs using a stable, per-input UUID:
54
- //
55
- // @prefix skolem: <https://eyereasoner.github.io/.well-known/genid/UUID#>.
56
- //
57
- // and then replace cross-scope blank nodes with IRIs like: skolem:e38
58
- //
59
- // The UUID is deterministic from the *input file content* (SHA-256 based).
60
- const SKOLEM_PREFIX = 'skolem';
61
- const DEFAULT_SKOLEM_ROOT = 'https://eyereasoner.github.io/.well-known/genid/';
62
- const SKOLEM_ROOT = normalizeSkolemRoot(process.env.SKOLEM_ROOT) || DEFAULT_SKOLEM_ROOT;
63
-
64
- let SKOLEM_UUID = null; // e.g., '3f2504e0-4f89-5d3a-9a0c-0305e82c3301'
65
- let SKOLEM_PREFIX_IRI = null; // e.g., 'https://.../.well-known/genid/<UUID>#'
66
-
67
- function deterministicUuidFromText(inputText) {
68
- const h = crypto.createHash('sha256').update(inputText, 'utf8').digest();
69
- const b = Buffer.from(h.subarray(0, 16));
70
-
71
- // Set version (5) and variant (RFC 4122) bits to make it look like a UUID.
72
- b[6] = (b[6] & 0x0f) | 0x50;
73
- b[8] = (b[8] & 0x3f) | 0x80;
74
-
75
- const hex = b.toString('hex');
76
- return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`;
77
- }
78
-
79
- function initSkolemForInput(inputText) {
80
- SKOLEM_UUID = deterministicUuidFromText(inputText);
81
- SKOLEM_PREFIX_IRI = `${SKOLEM_ROOT}${SKOLEM_UUID}#`;
82
- }
83
-
84
- function pnLocalSafe(s) {
85
- // Turtle PN_LOCAL allows percent escapes (PLX). We make sure all "special"
86
- // encodeURIComponent survivors are percent-escaped too.
87
- return encodeURIComponent(s).replace(/[!'()*]/g, (c) => '%' + c.charCodeAt(0).toString(16).toUpperCase());
88
- }
89
-
90
- // ---------------------------------------------------------------------------
91
- // Mapping namespace
92
- // ---------------------------------------------------------------------------
93
-
94
- // Use the W3C log: vocabulary to represent:
95
- // - TriG named graphs as N3 graph terms:
96
- // <g> log:nameOf { ... } .
97
- // - RDF 1.2 Turtle-star / TriG-star reified triples:
98
- // <reifier> log:nameOf { <s> <p> <o> . } .
99
- const LOG_NS = 'http://www.w3.org/2000/10/swap/log#';
100
- const log = {
101
- nameOf: `${LOG_NS}nameOf`,
102
- };
103
-
104
- // ---------------------------------------------------------------------------
105
- // Minimal Turtle/N3 model + lexer + parser
106
- // ---------------------------------------------------------------------------
107
-
108
- const RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
109
- const XSD_NS = 'http://www.w3.org/2001/XMLSchema#';
110
- const OWL_NS = 'http://www.w3.org/2002/07/owl#';
111
-
112
- // Avoid literal triple-quote sequences in this source (helps embedding in tools).
113
- const DQ3 = '"'.repeat(3);
114
- const SQ3 = "'".repeat(3);
115
-
116
- // RDF 1.2: language tags follow BCP47 and may be followed by an initial direction suffix ("--ltr" / "--rtl").
117
- // We validate in the lexer so downstream code can treat it as an opaque tag string.
118
- const LANGTAG_WITH_DIR_REGEX = /^[A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*(?:--(?:ltr|rtl))?$/i;
119
-
120
- function resolveIriRef(ref, base) {
121
- // RDF 1.2: resolve relative IRI references using RFC3986 basic algorithm (via WHATWG URL).
122
- // If the reference is malformed, fail fast rather than silently returning a broken IRI.
123
- if (!base) return ref;
124
- if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(ref)) return ref; // already absolute
125
- const resolved = new URL(ref, base); // throws on invalid
126
- return resolved.href;
127
- }
128
-
129
- class Term {}
130
- class Iri extends Term {
131
- constructor(value) {
132
- super();
133
- this.value = value;
134
- }
135
- }
136
- class Literal extends Term {
137
- constructor(value) {
138
- super();
139
- this.value = value; // raw lexical form, e.g. "foo", 12, or "\"x\"^^<dt>"
140
- }
141
- }
142
- class Blank extends Term {
143
- constructor(label) {
144
- super();
145
- this.label = label; // _:b1 etc
146
- }
147
- }
148
- class Var extends Term {
149
- constructor(name) {
150
- super();
151
- this.name = name; // no leading '?'
152
- }
153
- }
154
- class ListTerm extends Term {
155
- constructor(elems) {
156
- super();
157
- this.elems = elems;
158
- }
159
- }
160
- class OpenListTerm extends Term {
161
- constructor(prefix, tailVar) {
162
- super();
163
- this.prefix = prefix; // Term[]
164
- this.tailVar = tailVar; // string
165
- }
166
- }
167
- class GraphTerm extends Term {
168
- constructor(triples) {
169
- super();
170
- this.triples = triples; // Triple[]
171
- }
172
- }
173
- class Triple {
174
- constructor(s, p, o) {
175
- this.s = s;
176
- this.p = p;
177
- this.o = o;
178
- }
179
- }
180
-
181
- const __iriIntern = new Map();
182
- const __literalIntern = new Map();
183
- function internIri(value) {
184
- let t = __iriIntern.get(value);
185
- if (!t) {
186
- t = new Iri(value);
187
- __iriIntern.set(value, t);
188
- }
189
- return t;
190
- }
191
- function internLiteral(value) {
192
- let t = __literalIntern.get(value);
193
- if (!t) {
194
- t = new Literal(value);
195
- __literalIntern.set(value, t);
196
- }
197
- return t;
198
- }
199
-
200
- class PrefixEnv {
201
- constructor(map, baseIri) {
202
- this.map = map || {}; // prefix -> IRI (including "" for @prefix :)
203
- this.baseIri = baseIri || ''; // base IRI
204
- }
205
-
206
- static newDefault() {
207
- return new PrefixEnv({}, '');
208
- }
209
-
210
- setPrefix(pfx, iri) {
211
- this.map[pfx] = iri;
212
- }
213
-
214
- setBase(iri) {
215
- this.baseIri = iri;
216
- }
217
-
218
- expandQName(qn) {
219
- const idx = qn.indexOf(':');
220
- if (idx < 0) return qn;
221
- const pfx = qn.slice(0, idx);
222
- const local = qn.slice(idx + 1);
223
- const base = Object.prototype.hasOwnProperty.call(this.map, pfx) ? this.map[pfx] : null;
224
- if (base == null) return qn;
225
- return base + local;
226
- }
227
-
228
- // Best-effort QName compaction for writing (safe-ish, not fully Turtle grammar)
229
- shrinkIri(iri) {
230
- let bestPfx = null;
231
- let bestBase = '';
232
- for (const [pfx, base] of Object.entries(this.map)) {
233
- if (!base) continue;
234
- if (iri.startsWith(base) && base.length > bestBase.length) {
235
- bestPfx = pfx;
236
- bestBase = base;
237
- }
238
- }
239
- if (bestPfx == null) return null;
240
-
241
- const local = iri.slice(bestBase.length);
242
-
243
- // Conservative “looks like PN_LOCAL-ish”
244
- if (!local) return null;
245
- if (!/^[A-Za-z0-9_.~-]+$/.test(local)) return null;
246
-
247
- if (bestPfx === '') return `:${local}`;
248
- return `${bestPfx}:${local}`;
249
- }
250
- }
251
-
252
- // -------------------- LEXER ------------------------------
253
-
254
- class Token {
255
- constructor(typ, value = null) {
256
- this.typ = typ;
257
- this.value = value;
258
- }
259
- toString() {
260
- if (this.value == null) return `Token(${this.typ})`;
261
- return `Token(${this.typ}, ${JSON.stringify(this.value)})`;
262
- }
263
- }
264
-
265
- function isWs(c) {
266
- return /\s/.test(c);
267
- }
268
- function isNameChar(c) {
269
- return /[\p{L}\p{N}_\-:%]/u.test(c);
270
- }
271
-
272
- function stripQuotes(s) {
273
- if (s.startsWith(DQ3) && s.endsWith(DQ3)) return s.slice(3, -3);
274
- if (s.startsWith(SQ3) && s.endsWith(SQ3)) return s.slice(3, -3);
275
- if ((s.startsWith('"') && s.endsWith('"')) || (s.startsWith("'") && s.endsWith("'"))) return s.slice(1, -1);
276
- return s;
277
- }
278
-
279
- function decodeN3StringEscapes(s) {
280
- let out = '';
281
- for (let i = 0; i < s.length; i++) {
282
- const c = s[i];
283
- if (c !== '\\') {
284
- out += c;
285
- continue;
286
- }
287
- if (i + 1 >= s.length) {
288
- out += '\\';
289
- continue;
290
- }
291
- const e = s[++i];
292
- switch (e) {
293
- case 't':
294
- out += '\t';
295
- break;
296
- case 'n':
297
- out += '\n';
298
- break;
299
- case 'r':
300
- out += '\r';
301
- break;
302
- case 'b':
303
- out += '\b';
304
- break;
305
- case 'f':
306
- out += '\f';
307
- break;
308
- case '"':
309
- out += '"';
310
- break;
311
- case "'":
312
- out += "'";
313
- break;
314
- case '\\':
315
- out += '\\';
316
- break;
317
- case 'u': {
318
- const hex = s.slice(i + 1, i + 5);
319
- if (/^[0-9A-Fa-f]{4}$/.test(hex)) {
320
- out += String.fromCharCode(parseInt(hex, 16));
321
- i += 4;
322
- } else out += '\\u';
323
- break;
324
- }
325
- case 'U': {
326
- const hex = s.slice(i + 1, i + 9);
327
- if (/^[0-9A-Fa-f]{8}$/.test(hex)) {
328
- const cp = parseInt(hex, 16);
329
- if (cp >= 0 && cp <= 0x10ffff) out += String.fromCodePoint(cp);
330
- else out += '\\U' + hex;
331
- i += 8;
332
- } else out += '\\U';
333
- break;
334
- }
335
- default:
336
- out += '\\' + e;
337
- }
338
- }
339
- return out;
340
- }
341
-
342
- function lex(inputText) {
343
- const chars = Array.from(inputText);
344
- const n = chars.length;
345
- let i = 0;
346
- const tokens = [];
347
-
348
- function peek(offset = 0) {
349
- const j = i + offset;
350
- return j >= 0 && j < n ? chars[j] : null;
351
- }
352
-
353
- while (i < n) {
354
- const c = peek();
355
- if (c === null) break;
356
-
357
- // 1) whitespace
358
- if (isWs(c)) {
359
- i++;
360
- continue;
361
- }
362
-
363
- // 2) # comments
364
- if (c === '#') {
365
- while (i < n && chars[i] !== '\n' && chars[i] !== '\r') i++;
366
- continue;
367
- }
368
- // 3) operators: =>, <= ; single '=' as owl:sameAs
369
- if (c === '=') {
370
- if (peek(1) === '>') {
371
- tokens.push(new Token('OpImplies'));
372
- i += 2;
373
- continue;
374
- } else {
375
- tokens.push(new Token('Equals'));
376
- i += 1;
377
- continue;
378
- }
379
- }
380
-
381
- // RDF 1.2 Turtle-star / TriG-star tokens
382
- if (c === '>' && peek(1) === '>') {
383
- tokens.push(new Token('StarClose'));
384
- i += 2;
385
- continue;
386
- }
387
- if (c === '~') {
388
- tokens.push(new Token('Tilde'));
389
- i += 1;
390
- continue;
391
- }
392
-
393
- // RDF 1.2 Turtle/TriG annotations: annotation blocks {| ... |}
394
- if (c === '{' && peek(1) === '|') {
395
- tokens.push(new Token('AnnOpen'));
396
- i += 2;
397
- continue;
398
- }
399
- if (c === '|' && peek(1) === '}') {
400
- tokens.push(new Token('AnnClose'));
401
- i += 2;
402
- continue;
403
- }
404
-
405
- if (c === '<') {
406
- if (peek(1) === '<') {
407
- tokens.push(new Token('StarOpen'));
408
- i += 2;
409
- continue;
410
- }
411
- if (peek(1) === '=') {
412
- tokens.push(new Token('OpImpliedBy'));
413
- i += 2;
414
- continue;
415
- }
416
- if (peek(1) === '-') {
417
- tokens.push(new Token('OpPredInvert'));
418
- i += 2;
419
- continue;
420
- }
421
- i++; // consume '<'
422
- const iriChars = [];
423
- while (i < n && chars[i] !== '>') {
424
- iriChars.push(chars[i]);
425
- i++;
426
- }
427
- if (i >= n || chars[i] !== '>') throw new Error('Unterminated IRI <...>');
428
- i++; // consume '>'
429
- tokens.push(new Token('IriRef', iriChars.join('')));
430
- continue;
431
- }
432
-
433
- // 4) path operators: !, ^, ^^
434
- if (c === '!') {
435
- tokens.push(new Token('OpPathFwd'));
436
- i++;
437
- continue;
438
- }
439
- if (c === '^') {
440
- if (peek(1) === '^') {
441
- tokens.push(new Token('HatHat'));
442
- i += 2;
443
- continue;
444
- }
445
- tokens.push(new Token('OpPathRev'));
446
- i++;
447
- continue;
448
- }
449
-
450
- // 5) punctuation
451
- // RDF 1.2: allow decimal literals that start with ".<digit>" (e.g., .5)
452
- if ('{}()[];,~'.includes(c) || c === '.' || c === ',') {
453
- if (c === '.' && peek(1) !== null && /[0-9]/.test(peek(1))) {
454
- // handled by numeric literal logic below
455
- } else {
456
- const mapping = {
457
- '{': 'LBrace',
458
- '}': 'RBrace',
459
- '(': 'LParen',
460
- ')': 'RParen',
461
- '[': 'LBracket',
462
- ']': 'RBracket',
463
- ';': 'Semicolon',
464
- '~': 'Tilde',
465
- ',': 'Comma',
466
- '.': 'Dot',
467
- };
468
- tokens.push(new Token(mapping[c]));
469
- i++;
470
- continue;
471
- }
472
- }
473
-
474
- // 6) string literals: short or long (double or single)
475
- if (c === '"') {
476
- if (peek(1) === '"' && peek(2) === '"') {
477
- i += 3;
478
- const sChars = [];
479
- let closed = false;
480
- while (i < n) {
481
- const cc = chars[i];
482
- if (cc === '\\') {
483
- i++;
484
- if (i < n) {
485
- const esc = chars[i];
486
- i++;
487
- sChars.push('\\', esc);
488
- } else sChars.push('\\');
489
- continue;
490
- }
491
- if (cc === '"') {
492
- let run = 0;
493
- while (i + run < n && chars[i + run] === '"') run++;
494
- if (run >= 3) {
495
- for (let k = 0; k < run - 3; k++) sChars.push('"');
496
- i += run;
497
- closed = true;
498
- break;
499
- }
500
- for (let k = 0; k < run; k++) sChars.push('"');
501
- i += run;
502
- continue;
503
- }
504
- sChars.push(cc);
505
- i++;
506
- }
507
- if (!closed) throw new Error('Unterminated long string literal');
508
- const raw = DQ3 + sChars.join('') + DQ3;
509
- const decoded = decodeN3StringEscapes(stripQuotes(raw));
510
- const canon = JSON.stringify(decoded);
511
- tokens.push(new Token('Literal', canon));
512
- continue;
513
- }
514
-
515
- i++;
516
- const sChars = [];
517
- while (i < n) {
518
- const cc = chars[i];
519
- i++;
520
- if (cc === '\\') {
521
- if (i < n) {
522
- const esc = chars[i];
523
- i++;
524
- sChars.push('\\', esc);
525
- }
526
- continue;
527
- }
528
- if (cc === '"') break;
529
- sChars.push(cc);
530
- }
531
- const raw = '"' + sChars.join('') + '"';
532
- const decoded = decodeN3StringEscapes(stripQuotes(raw));
533
- const canon = JSON.stringify(decoded);
534
- tokens.push(new Token('Literal', canon));
535
- continue;
536
- }
537
-
538
- if (c === "'") {
539
- if (peek(1) === "'" && peek(2) === "'") {
540
- i += 3;
541
- const sChars = [];
542
- let closed = false;
543
- while (i < n) {
544
- const cc = chars[i];
545
- if (cc === '\\') {
546
- i++;
547
- if (i < n) {
548
- const esc = chars[i];
549
- i++;
550
- sChars.push('\\', esc);
551
- } else sChars.push('\\');
552
- continue;
553
- }
554
- if (cc === "'") {
555
- let run = 0;
556
- while (i + run < n && chars[i + run] === "'") run++;
557
- if (run >= 3) {
558
- for (let k = 0; k < run - 3; k++) sChars.push("'");
559
- i += run;
560
- closed = true;
561
- break;
562
- }
563
- for (let k = 0; k < run; k++) sChars.push("'");
564
- i += run;
565
- continue;
566
- }
567
- sChars.push(cc);
568
- i++;
569
- }
570
- if (!closed) throw new Error('Unterminated long string literal');
571
- const raw = SQ3 + sChars.join('') + SQ3;
572
- const decoded = decodeN3StringEscapes(stripQuotes(raw));
573
- const canon = JSON.stringify(decoded);
574
- tokens.push(new Token('Literal', canon));
575
- continue;
576
- }
577
-
578
- i++;
579
- const sChars = [];
580
- while (i < n) {
581
- const cc = chars[i];
582
- i++;
583
- if (cc === '\\') {
584
- if (i < n) {
585
- const esc = chars[i];
586
- i++;
587
- sChars.push('\\', esc);
588
- }
589
- continue;
590
- }
591
- if (cc === "'") break;
592
- sChars.push(cc);
593
- }
594
- const raw = "'" + sChars.join('') + "'";
595
- const decoded = decodeN3StringEscapes(stripQuotes(raw));
596
- const canon = JSON.stringify(decoded);
597
- tokens.push(new Token('Literal', canon));
598
- continue;
599
- }
600
-
601
- // 7) directives or language tags with '@'
602
- if (c === '@') {
603
- const prevTok = tokens.length ? tokens[tokens.length - 1] : null;
604
- const prevWasQuotedLiteral =
605
- prevTok && prevTok.typ === 'Literal' && typeof prevTok.value === 'string' && prevTok.value.startsWith('"');
606
-
607
- i++; // consume '@'
608
-
609
- if (prevWasQuotedLiteral) {
610
- // RDF 1.2: language tags follow BCP47 and may be followed by an initial text direction: @lang--ltr / @lang--rtl
611
- const tagChars = [];
612
- let cc = peek();
613
- if (cc === null || !/[A-Za-z]/.test(cc)) throw new Error("Invalid language tag (expected [A-Za-z] after '@')");
614
-
615
- // Primary language subtag (1..8 alpha)
616
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
617
- tagChars.push(cc);
618
- i++;
619
- // primary subtag length limit
620
- if (tagChars.length > 8) throw new Error('Invalid language tag (primary subtag too long; max 8)');
621
- }
622
-
623
- // Additional BCP47 subtags: -[A-Za-z0-9]{1,8}
624
- while ((cc = peek()) === '-' && peek(1) !== '-') {
625
- tagChars.push('-');
626
- i++;
627
- const segChars = [];
628
- let dd = peek();
629
- if (dd === null || !/[A-Za-z0-9]/.test(dd))
630
- throw new Error("Invalid language tag (expected [A-Za-z0-9]+ after '-')");
631
- while ((dd = peek()) !== null && /[A-Za-z0-9]/.test(dd)) {
632
- segChars.push(dd);
633
- i++;
634
- if (segChars.length > 8) throw new Error('Invalid language tag subtag too long; max 8');
635
- }
636
- if (!segChars.length) throw new Error("Invalid language tag (expected [A-Za-z0-9]+ after '-')");
637
- tagChars.push(...segChars);
638
- }
639
-
640
- // Optional initial direction suffix: --ltr / --rtl
641
- if (peek() === '-' && peek(1) === '-') {
642
- i += 2;
643
- const dirChars = [];
644
- let dd;
645
- while ((dd = peek()) !== null && /[A-Za-z]/.test(dd)) {
646
- dirChars.push(dd);
647
- i++;
648
- if (dirChars.length > 3) break;
649
- }
650
- const dir = dirChars.join('').toLowerCase();
651
- if (dir !== 'ltr' && dir !== 'rtl') {
652
- throw new Error('Invalid language direction (expected --ltr or --rtl)');
653
- }
654
- tagChars.push('-', '-', dir);
655
- }
656
-
657
- const lang = tagChars.join('');
658
- if (!LANGTAG_WITH_DIR_REGEX.test(lang)) {
659
- throw new Error(`Invalid BCP47 language tag: ${lang}`);
660
- }
661
-
662
- tokens.push(new Token('LangTag', lang));
663
- continue;
664
- }
665
-
666
- const wordChars = [];
667
- let cc;
668
- while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
669
- wordChars.push(cc);
670
- i++;
671
- }
672
- const word = wordChars.join('');
673
- if (word === 'prefix') tokens.push(new Token('AtPrefix'));
674
- else if (word === 'base') tokens.push(new Token('AtBase'));
675
- else throw new Error(`Unknown directive @${word}`);
676
- continue;
677
- }
678
-
679
- // 8) numeric literals (RDF 1.2 Turtle shorthand: integer / decimal / double)
680
- // integer: [+-]?[0-9]+
681
- // decimal: [+-]?[0-9]*\.[0-9]+ (allows .5)
682
- // double : [+-]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)[eE][+-]?[0-9]+
683
- if (
684
- /[0-9]/.test(c) ||
685
- (c === '.' && peek(1) !== null && /[0-9]/.test(peek(1))) ||
686
- ((c === '-' || c === '+') &&
687
- peek(1) !== null &&
688
- (/[0-9]/.test(peek(1)) || (peek(1) === '.' && peek(2) !== null && /[0-9]/.test(peek(2)))))
689
- ) {
690
- const rest = chars.slice(i).join('');
691
-
692
- let m = rest.match(/^[+-]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)[eE][+-]?[0-9]+/);
693
- if (m) {
694
- tokens.push(new Token('Literal', m[0]));
695
- i += m[0].length;
696
- continue;
697
- }
698
-
699
- m = rest.match(/^[+-]?[0-9]*\.[0-9]+/);
700
- if (m) {
701
- tokens.push(new Token('Literal', m[0]));
702
- i += m[0].length;
703
- continue;
704
- }
705
-
706
- m = rest.match(/^[+-]?[0-9]+/);
707
- if (m) {
708
- tokens.push(new Token('Literal', m[0]));
709
- i += m[0].length;
710
- continue;
711
- }
712
-
713
- // If we got here, it looked like a number start but didn't match any legal form.
714
- throw new Error(`Invalid numeric literal near: ${rest.slice(0, 32)}`);
715
- }
716
-
717
- // 9) var: ?x (SPARQL vars) or $this / $value (SHACL SPARQL vars)
718
- if (c === '?' || c === '$') {
719
- const sigil = c;
720
- i++;
721
- const nameChars = [];
722
- let cc;
723
- while ((cc = peek()) !== null && isNameChar(cc)) {
724
- nameChars.push(cc);
725
- i++;
726
- }
727
- if (!nameChars.length) throw new Error(`Expected variable name after '${sigil}'`);
728
- tokens.push(new Token('Var', nameChars.join('')));
729
- continue;
730
- }
731
-
732
- // 10) identifier / qname / keywords
733
- if (isNameChar(c) || c === '_') {
734
- const nameChars = [c];
735
- i++;
736
- while (i < n) {
737
- const cc = chars[i];
738
- if (isNameChar(cc) || cc === '_' || cc === '.') {
739
- nameChars.push(cc);
740
- i++;
741
- continue;
742
- }
743
- break;
744
- }
745
- const word = nameChars.join('');
746
-
747
- // If an identifier ends with one or more '.' characters, treat them as statement terminators.
748
- // This allows Turtle like ':s :p :o.' (no whitespace before '.').
749
- // Keep '...' as a single identifier (used by some N3 syntaxes).
750
- if (word !== '...' && word.endsWith('.') && word.length > 1) {
751
- let w = word;
752
- let dots = 0;
753
- while (w.endsWith('.') && w.length > 0 && w !== '...') {
754
- w = w.slice(0, -1);
755
- dots++;
756
- }
757
- if (w.length > 0) {
758
- // Re-run the literal/ident decision on w, then emit Dot tokens.
759
- if (w === 'true' || w === 'false') tokens.push(new Token('Literal', w));
760
- else tokens.push(new Token('Ident', w));
761
- for (let d = 0; d < dots; d++) tokens.push(new Token('Dot'));
762
- continue;
763
- }
764
- }
765
-
766
- // true/false as literals
767
- if (word === 'true' || word === 'false') tokens.push(new Token('Literal', word));
768
- else tokens.push(new Token('Ident', word));
769
- continue;
770
- }
771
-
772
- throw new Error(`Unexpected character in input: ${JSON.stringify(c)}`);
773
- }
774
-
775
- tokens.push(new Token('EOF'));
776
- return tokens;
777
- }
778
-
779
- // -------------------- PARSER (Turtle + N3-graphs; TriG extension separately) --------------------
780
-
781
- class TurtleParser {
782
- constructor(tokens) {
783
- this.toks = tokens;
784
- this.pos = 0;
785
- this.prefixes = PrefixEnv.newDefault();
786
- this.blankCounter = 0;
787
- this.pendingTriples = [];
788
- this.reifierCounter = 0;
789
- this.reifiesEmitted = new Set();
790
- }
791
-
792
- peek() {
793
- return this.toks[this.pos];
794
- }
795
-
796
- next() {
797
- const tok = this.toks[this.pos];
798
- this.pos += 1;
799
- return tok;
800
- }
801
-
802
- expect(typ) {
803
- const tok = this.next();
804
- if (tok.typ !== typ) throw new Error(`Expected ${typ}, got ${tok.toString()}`);
805
- return tok;
806
- }
807
-
808
- // Generate a fresh blank node used for RDF 1.2 reifiedTriple sugar (<< s p o >>)
809
- freshReifier() {
810
- this.reifierCounter += 1;
811
- return new Blank(`_:n3r${this.reifierCounter}`);
812
- }
813
-
814
- termKey(t) {
815
- if (t == null) return '[]';
816
- if (t instanceof Iri) return `I:${t.value}`;
817
- if (t instanceof Blank) return `B:${t.label}`;
818
- if (t instanceof Literal) return `L:${t.value}`;
819
- if (t instanceof Var) return `V:${t.name}`;
820
- if (t instanceof ListTerm) return `T:(` + t.elems.map((x) => this.termKey(x)).join(' ') + `)`;
821
- if (t instanceof GraphTerm) {
822
- const inner = t.triples
823
- .map((tr) => `${this.termKey(tr.s)} ${this.termKey(tr.p)} ${this.termKey(tr.o)}`)
824
- .join(' | ');
825
- return `G:{${inner}}`;
826
- }
827
- return `X:${String(t)}`;
828
- }
829
-
830
- // Emit the implicit (or explicit) reifier triple required by RDF 1.2 reifiedTriple sugar:
831
- // reifier log:nameOf tripleTerm .
832
- // We represent tripleTerm in N3 as a quoted graph term: { s p o . }
833
- emitReifies(reifier, tripleGraph) {
834
- const key = `${this.termKey(reifier)}|${this.termKey(tripleGraph)}`;
835
- if (this.reifiesEmitted.has(key)) return;
836
- this.reifiesEmitted.add(key);
837
- this.pendingTriples.push(new Triple(reifier, internIri(LOG_NS + 'nameOf'), tripleGraph));
838
- }
839
-
840
- // Accept '.' OR (when inside {...}) accept '}' as implicit terminator for last triple
841
- expectDotOrRBrace() {
842
- const tok = this.peek();
843
- if (tok.typ === 'Dot') {
844
- this.next();
845
- return;
846
- }
847
- if (tok.typ === 'RBrace') return;
848
- throw new Error(`Expected '.' (or '}'), got ${tok.toString()}`);
849
- }
850
-
851
- parsePrefixDirective() {
852
- // @prefix pfx: <iri> .
853
- const pfxTok = this.next();
854
- if (pfxTok.typ !== 'Ident') throw new Error(`Expected prefix label after @prefix, got ${pfxTok.toString()}`);
855
- const label = (pfxTok.value || '').replace(/:$/, '');
856
- const iriTok = this.next();
857
- let iri;
858
- if (iriTok.typ === 'IriRef') iri = iriTok.value || '';
859
- else if (iriTok.typ === 'Ident') iri = iriTok.value || '';
860
- else throw new Error(`Expected IRI after @prefix, got ${iriTok.toString()}`);
861
- this.expect('Dot');
862
- this.prefixes.setPrefix(label, iri);
863
- }
864
-
865
- parseSparqlPrefixDirective() {
866
- // PREFIX pfx: <iri> (no trailing '.')
867
- const pfxTok = this.next();
868
- if (pfxTok.typ !== 'Ident') throw new Error(`Expected prefix label after PREFIX, got ${pfxTok.toString()}`);
869
- const label = (pfxTok.value || '').replace(/:$/, '');
870
- const iriTok = this.next();
871
- let iri;
872
- if (iriTok.typ === 'IriRef') iri = iriTok.value || '';
873
- else if (iriTok.typ === 'Ident') iri = iriTok.value || '';
874
- else throw new Error(`Expected IRI after PREFIX, got ${iriTok.toString()}`);
875
- if (this.peek().typ === 'Dot') this.next(); // permissive
876
- this.prefixes.setPrefix(label, iri);
877
- }
878
-
879
- parseBaseDirective() {
880
- // @base <iri> .
881
- const iriTok = this.next();
882
- let iri;
883
- if (iriTok.typ === 'IriRef') iri = iriTok.value || '';
884
- else if (iriTok.typ === 'Ident') iri = iriTok.value || '';
885
- else throw new Error(`Expected IRI after @base, got ${iriTok.toString()}`);
886
- this.expect('Dot');
887
- this.prefixes.setBase(iri);
888
- }
889
-
890
- parseSparqlBaseDirective() {
891
- // BASE <iri>
892
- const iriTok = this.next();
893
- if (iriTok.typ !== 'IriRef') throw new Error(`Expected <IRI> after BASE, got ${iriTok.toString()}`);
894
- const iri = iriTok.value || '';
895
- if (this.peek().typ === 'Dot') this.next(); // permissive
896
- this.prefixes.setBase(iri);
897
- }
898
-
899
- parseTurtleDocument() {
900
- const triples = [];
901
- while (this.peek().typ !== 'EOF') {
902
- // RDF 1.2: VERSION announcement (e.g., VERSION "1.2")
903
- if (
904
- this.peek().typ === 'Ident' &&
905
- typeof this.peek().value === 'string' &&
906
- this.peek().value.toLowerCase() === 'version'
907
- ) {
908
- this.next(); // VERSION
909
- const vTok = this.next();
910
- if (vTok.typ !== 'Literal') throw new Error(`Expected a literal after VERSION, got ${vTok.toString()}`);
911
- if (this.peek().typ === 'Dot') this.next(); // permissive
912
- continue;
913
- }
914
-
915
- if (this.peek().typ === 'AtPrefix') {
916
- this.next();
917
- this.parsePrefixDirective();
918
- continue;
919
- }
920
- if (this.peek().typ === 'AtBase') {
921
- this.next();
922
- this.parseBaseDirective();
923
- continue;
924
- }
925
- // SPARQL-style directives
926
- if (
927
- this.peek().typ === 'Ident' &&
928
- typeof this.peek().value === 'string' &&
929
- this.peek().value.toLowerCase() === 'prefix' &&
930
- this.toks[this.pos + 1] &&
931
- this.toks[this.pos + 1].typ === 'Ident' &&
932
- typeof this.toks[this.pos + 1].value === 'string' &&
933
- this.toks[this.pos + 1].value.endsWith(':')
934
- ) {
935
- this.next(); // PREFIX
936
- this.parseSparqlPrefixDirective();
937
- continue;
938
- }
939
- if (
940
- this.peek().typ === 'Ident' &&
941
- typeof this.peek().value === 'string' &&
942
- this.peek().value.toLowerCase() === 'base' &&
943
- this.toks[this.pos + 1] &&
944
- this.toks[this.pos + 1].typ === 'IriRef'
945
- ) {
946
- this.next(); // BASE
947
- this.parseSparqlBaseDirective();
948
- continue;
949
- }
950
-
951
- const subj = this.parseTerm();
952
-
953
- let more;
954
- if (this.peek().typ === 'Dot') {
955
- more = [];
956
- if (this.pendingTriples.length > 0) {
957
- more = this.pendingTriples;
958
- this.pendingTriples = [];
959
- }
960
- this.next();
961
- } else {
962
- more = this.parsePredicateObjectList(subj);
963
- this.expect('Dot');
964
- }
965
- triples.push(...more);
966
- }
967
- return { triples, prefixes: this.prefixes };
968
- }
969
-
970
- parseTerm() {
971
- let t = this.parsePathItem();
972
- while (this.peek().typ === 'OpPathFwd' || this.peek().typ === 'OpPathRev') {
973
- const dir = this.next().typ;
974
- const pred = this.parsePathItem();
975
-
976
- this.blankCounter += 1;
977
- const bn = new Blank(`_:b${this.blankCounter}`);
978
- this.pendingTriples.push(dir === 'OpPathFwd' ? new Triple(t, pred, bn) : new Triple(bn, pred, t));
979
- t = bn;
980
- }
981
- return t;
982
- }
983
-
984
- parsePathItem() {
985
- const tok = this.next();
986
- const typ = tok.typ;
987
- const val = tok.value;
988
-
989
- if (typ === 'Equals') return internIri(OWL_NS + 'sameAs');
990
-
991
- if (typ === 'IriRef') {
992
- const base = this.prefixes.baseIri || '';
993
- return internIri(resolveIriRef(val || '', base));
994
- }
995
-
996
- if (typ === 'Ident') {
997
- const name = val || '';
998
- if (name === 'a') return internIri(RDF_NS + 'type');
999
- if (name.startsWith('_:')) return new Blank(name);
1000
- if (name.includes(':')) return internIri(this.prefixes.expandQName(name));
1001
- return internIri(name);
1002
- }
1003
-
1004
- if (typ === 'Literal') {
1005
- let s = val || '';
1006
-
1007
- // Optional language tag: "... "@en
1008
- if (this.peek().typ === 'LangTag') {
1009
- if (!(s.startsWith('"') && s.endsWith('"')))
1010
- throw new Error('Language tag is only allowed on quoted string literals');
1011
- const langTok = this.next();
1012
- s = `${s}@${langTok.value || ''}`;
1013
- if (this.peek().typ === 'HatHat') throw new Error('A literal cannot have both a language tag and a datatype');
1014
- }
1015
-
1016
- // Optional datatype: ^^ <...> or ^^ qname
1017
- if (this.peek().typ === 'HatHat') {
1018
- this.next();
1019
- const dtTok = this.next();
1020
- let dtIri;
1021
- if (dtTok.typ === 'IriRef') dtIri = dtTok.value || '';
1022
- else if (dtTok.typ === 'Ident') {
1023
- const qn = dtTok.value || '';
1024
- dtIri = qn.includes(':') ? this.prefixes.expandQName(qn) : qn;
1025
- } else throw new Error(`Expected datatype after ^^, got ${dtTok.toString()}`);
1026
- s = `${s}^^<${dtIri}>`;
1027
- }
1028
-
1029
- return internLiteral(s);
1030
- }
1031
-
1032
- if (typ === 'Var') return new Var(val || '');
1033
- if (typ === 'LParen') return this.parseList();
1034
- if (typ === 'LBracket') return this.parseBlank();
1035
- if (typ === 'LBrace') throw new Error('N3 graph terms { ... } are not supported in Turtle/TriG input');
1036
- if (typ === 'StarOpen') return this.parseStarTerm();
1037
-
1038
- throw new Error(`Unexpected term token: ${tok.toString()}`);
1039
- }
1040
-
1041
- parseStarTerm() {
1042
- // RDF 1.2 Turtle-star / TriG-star:
1043
- // - tripleTerm: <<( s p o )>>
1044
- // - reifiedTriple (syntactic sugar): << s p o [~ reifier] >>
1045
- if (this.peek().typ === 'LParen') {
1046
- // tripleTerm
1047
- this.next(); // '('
1048
- const s = this.parseTerm();
1049
- const p = this.parseTerm();
1050
- const o = this.parseTerm();
1051
- this.expect('RParen');
1052
- this.expect('StarClose');
1053
- return new GraphTerm([new Triple(s, p, o)]);
1054
- }
1055
-
1056
- // reifiedTriple sugar -> expand to a reifier node that log:nameOf a tripleTerm
1057
- const s = this.parseTerm();
1058
- const p = this.parseTerm();
1059
- const o = this.parseTerm();
1060
-
1061
- let reifier;
1062
- if (this.peek().typ === 'Tilde') {
1063
- this.next();
1064
- reifier = this.parseTerm();
1065
- } else {
1066
- reifier = this.freshReifier();
1067
- }
1068
-
1069
- this.expect('StarClose');
1070
-
1071
- const tripleTerm = new GraphTerm([new Triple(s, p, o)]);
1072
- this.emitReifies(reifier, tripleTerm);
1073
- return reifier;
1074
- }
1075
-
1076
- parseList() {
1077
- const elems = [];
1078
- while (this.peek().typ !== 'RParen') {
1079
- // Be permissive: allow commas inside lists (even though Turtle lists are whitespace-separated).
1080
- if (this.peek().typ === 'Comma') {
1081
- this.next();
1082
- continue;
1083
- }
1084
- elems.push(this.parseTerm());
1085
- if (this.peek().typ === 'EOF') throw new Error("Unterminated list '(' ... ')'");
1086
- }
1087
- this.next(); // ')'
1088
- return new ListTerm(elems);
1089
- }
1090
-
1091
- parseBlank() {
1092
- // [] or [ ... ] property list
1093
- if (this.peek().typ === 'RBracket') {
1094
- this.next();
1095
- this.blankCounter += 1;
1096
- return new Blank(`_:b${this.blankCounter}`);
1097
- }
1098
-
1099
- let id = null;
1100
- if (this.peek().typ === 'Ident' && (this.peek().value || '').startsWith('_:')) id = this.next().value;
1101
- else {
1102
- this.blankCounter += 1;
1103
- id = `_:b${this.blankCounter}`;
1104
- }
1105
-
1106
- const subj = new Blank(id);
1107
- if (this.peek().typ !== 'RBracket') {
1108
- const more = this.parsePredicateObjectList(subj);
1109
- // Keep the triples produced by the property list so they are emitted with the surrounding statement.
1110
- this.pendingTriples.push(...more);
1111
- }
1112
-
1113
- this.expect('RBracket');
1114
- return new Blank(id);
1115
- }
1116
-
1117
- // Parses inside "{ ... }" AFTER the '{' has been consumed.
1118
- // We accept both "s p o ." and "s p o" before '}' as last triple (permissive).
1119
- parseGraph() {
1120
- const triples = [];
1121
- while (this.peek().typ !== 'RBrace') {
1122
- const subj = this.parseTerm();
1123
-
1124
- let more;
1125
- if (this.peek().typ === 'Dot') {
1126
- more = [];
1127
- if (this.pendingTriples.length > 0) {
1128
- more = this.pendingTriples;
1129
- this.pendingTriples = [];
1130
- }
1131
- this.next();
1132
- } else {
1133
- more = this.parsePredicateObjectList(subj);
1134
- this.expectDotOrRBrace();
1135
- if (this.peek().typ === 'Dot') this.next();
1136
- }
1137
-
1138
- triples.push(...more);
1139
- }
1140
- this.next(); // consume '}'
1141
- return new GraphTerm(triples);
1142
- }
1143
-
1144
- parsePredicateObjectList(subject) {
1145
- const out = [];
1146
-
1147
- if (this.pendingTriples.length > 0) {
1148
- out.push(...this.pendingTriples);
1149
- this.pendingTriples = [];
1150
- }
1151
-
1152
- while (true) {
1153
- let verb;
1154
- let invert = false;
1155
-
1156
- if (this.peek().typ === 'Ident' && (this.peek().value || '') === 'a') {
1157
- this.next();
1158
- verb = internIri(RDF_NS + 'type');
1159
- } else if (this.peek().typ === 'Ident' && (this.peek().value || '') === 'has') {
1160
- this.next();
1161
- invert = true;
1162
- verb = this.parseTerm();
1163
- } else {
1164
- if (this.peek().typ === 'OpPredInvert') {
1165
- invert = true;
1166
- this.next();
1167
- }
1168
- verb = this.parseTerm();
1169
- }
1170
-
1171
- out.push(...this.parseAnnotatedObjectList(subject, verb, invert));
1172
-
1173
- if (this.peek().typ === 'Semicolon') {
1174
- this.next();
1175
- if (
1176
- this.peek().typ === 'Dot' ||
1177
- this.peek().typ === 'RBrace' ||
1178
- this.peek().typ === 'RBracket' ||
1179
- this.peek().typ === 'AnnClose'
1180
- )
1181
- break;
1182
- continue;
1183
- }
1184
- break;
1185
- }
1186
-
1187
- // Include any triples generated by nested blank node property lists / reifiers
1188
- // that were encountered while parsing this predicate-object list.
1189
- if (this.pendingTriples.length > 0) {
1190
- out.push(...this.pendingTriples);
1191
- this.pendingTriples = [];
1192
- }
1193
-
1194
- return out;
1195
- }
1196
-
1197
- parseObjectList() {
1198
- const objs = [this.parseTerm()];
1199
- while (this.peek().typ === 'Comma') {
1200
- this.next();
1201
- objs.push(this.parseTerm());
1202
- }
1203
- return objs;
1204
- }
1205
-
1206
- // RDF 1.2 Turtle/TriG: triple annotations and reifiers
1207
- // After an object, Turtle 1.2 allows optional:
1208
- // ~ <reifier>
1209
- // {| <predicateObjectList> |}
1210
- // We convert these into eyeling-friendly N3 by emitting:
1211
- // <reifier> log:nameOf { <s> <p> <o> . } .
1212
- // <reifier> <annP> <annO> .
1213
-
1214
- parseAnnotationBlock(reifier) {
1215
- this.expect('AnnOpen');
1216
- const out = [];
1217
- if (this.peek().typ !== 'AnnClose') {
1218
- out.push(...this.parsePredicateObjectList(reifier));
1219
- }
1220
- this.expect('AnnClose');
1221
- return out;
1222
- }
1223
-
1224
- parseAnnotatedObjectList(subject, verb, invert) {
1225
- const out = [];
1226
- out.push(...this.parseAnnotatedObjectTriples(subject, verb, invert));
1227
- while (this.peek().typ === 'Comma') {
1228
- this.next();
1229
- out.push(...this.parseAnnotatedObjectTriples(subject, verb, invert));
1230
- }
1231
- return out;
1232
- }
1233
-
1234
- parseAnnotatedObjectTriples(subject, verb, invert) {
1235
- const out = [];
1236
-
1237
- const obj = this.parseTerm();
1238
- const s = invert ? obj : subject;
1239
- const o = invert ? subject : obj;
1240
-
1241
- // asserted triple
1242
- // Special-case RDF 1.2 explicit triple reification:
1243
- // _:r rdf:reifies <<( s p o )>> .
1244
- // Emit as:
1245
- // _:r log:nameOf { s p o . } .
1246
- // This matches the mapping we already use for reifiedTriple sugar and annotations.
1247
- let assertedVerb = verb;
1248
- let assertedObj = o;
1249
- if (!invert && verb instanceof Iri && verb.value === RDF_NS + 'reifies' && obj instanceof GraphTerm) {
1250
- assertedVerb = internIri(log.nameOf);
1251
- assertedObj = obj;
1252
- }
1253
- out.push(new Triple(s, assertedVerb, assertedObj));
1254
-
1255
- // optional reifier and/or annotation blocks
1256
- let reifier = null;
1257
-
1258
- if (this.peek().typ === 'Tilde') {
1259
- this.next();
1260
- // Allow empty reifier: ~ {| ... |} (fresh blank node)
1261
- if (this.peek().typ === 'AnnOpen') reifier = this.freshReifier();
1262
- else reifier = this.parseTerm();
1263
- }
1264
-
1265
- // If there is an annotation block without an explicit reifier, allocate one
1266
- if (!reifier && this.peek().typ === 'AnnOpen') {
1267
- reifier = this.freshReifier();
1268
- }
1269
-
1270
- if (reifier) {
1271
- const tripleTerm = new GraphTerm([new Triple(s, assertedVerb, assertedObj)]);
1272
- this.emitReifies(reifier, tripleTerm);
1273
- if (this.pendingTriples.length) {
1274
- out.push(...this.pendingTriples);
1275
- this.pendingTriples = [];
1276
- }
1277
-
1278
- // zero or more annotation blocks
1279
- while (this.peek().typ === 'AnnOpen') {
1280
- out.push(...this.parseAnnotationBlock(reifier));
1281
- }
1282
- }
1283
-
1284
- return out;
1285
- }
1286
- }
1287
-
1288
- // TriG: Turtle + graph blocks (graphName { ... })
1289
- class TriGParser extends TurtleParser {
1290
- parseTrigDocument() {
1291
- const quads = []; // { s,p,o,g } where g is Term|null
1292
-
1293
- while (this.peek().typ !== 'EOF') {
1294
- // RDF 1.2: VERSION announcement (e.g., VERSION "1.2")
1295
- if (
1296
- this.peek().typ === 'Ident' &&
1297
- typeof this.peek().value === 'string' &&
1298
- this.peek().value.toLowerCase() === 'version'
1299
- ) {
1300
- this.next(); // VERSION
1301
- const vTok = this.next();
1302
- if (vTok.typ !== 'Literal') throw new Error(`Expected a literal after VERSION, got ${vTok.toString()}`);
1303
- if (this.peek().typ === 'Dot') this.next(); // permissive
1304
- continue;
1305
- }
1306
-
1307
- // directives
1308
- if (this.peek().typ === 'AtPrefix') {
1309
- this.next();
1310
- this.parsePrefixDirective();
1311
- continue;
1312
- }
1313
- if (this.peek().typ === 'AtBase') {
1314
- this.next();
1315
- this.parseBaseDirective();
1316
- continue;
1317
- }
1318
- if (
1319
- this.peek().typ === 'Ident' &&
1320
- typeof this.peek().value === 'string' &&
1321
- this.peek().value.toLowerCase() === 'prefix' &&
1322
- this.toks[this.pos + 1] &&
1323
- this.toks[this.pos + 1].typ === 'Ident' &&
1324
- typeof this.toks[this.pos + 1].value === 'string' &&
1325
- this.toks[this.pos + 1].value.endsWith(':')
1326
- ) {
1327
- this.next();
1328
- this.parseSparqlPrefixDirective();
1329
- continue;
1330
- }
1331
- if (
1332
- this.peek().typ === 'Ident' &&
1333
- typeof this.peek().value === 'string' &&
1334
- this.peek().value.toLowerCase() === 'base' &&
1335
- this.toks[this.pos + 1] &&
1336
- this.toks[this.pos + 1].typ === 'IriRef'
1337
- ) {
1338
- this.next();
1339
- this.parseSparqlBaseDirective();
1340
- continue;
1341
- }
1342
-
1343
- // Default graph block: { ... }
1344
- if (this.peek().typ === 'LBrace') {
1345
- this.next(); // consume '{'
1346
- const f = this.parseGraph();
1347
- if (this.peek().typ === 'Dot') this.next(); // accept optional '.'
1348
- for (const tr of f.triples) quads.push({ s: tr.s, p: tr.p, o: tr.o, g: null });
1349
- continue;
1350
- }
1351
-
1352
- // SPARQL-style named graph block: GRAPH <g> { ... }
1353
- if (
1354
- this.peek().typ === 'Ident' &&
1355
- typeof this.peek().value === 'string' &&
1356
- this.peek().value.toLowerCase() === 'graph'
1357
- ) {
1358
- this.next(); // GRAPH
1359
- const gname = this.parseTerm();
1360
- this.expect('LBrace');
1361
- const f = this.parseGraph();
1362
- if (this.peek().typ === 'Dot') this.next(); // accept optional '.'
1363
- for (const tr of f.triples) quads.push({ s: tr.s, p: tr.p, o: tr.o, g: gname });
1364
- continue;
1365
- }
1366
-
1367
- // Either a Turtle triple in default graph, or a named graph block: graphName { ... }
1368
- const first = this.parseTerm();
1369
-
1370
- if (this.peek().typ === 'LBrace') {
1371
- this.next(); // consume '{'
1372
- const f = this.parseGraph();
1373
- if (this.peek().typ === 'Dot') this.next(); // accept optional '.'
1374
- for (const tr of f.triples) quads.push({ s: tr.s, p: tr.p, o: tr.o, g: first });
1375
- continue;
1376
- }
1377
-
1378
- // Plain Turtle triple statement in default graph
1379
- let more;
1380
- if (this.peek().typ === 'Dot') {
1381
- more = [];
1382
- if (this.pendingTriples.length > 0) {
1383
- more = this.pendingTriples;
1384
- this.pendingTriples = [];
1385
- }
1386
- this.next();
1387
- } else {
1388
- more = this.parsePredicateObjectList(first);
1389
- this.expect('Dot');
1390
- }
1391
- for (const tr of more) quads.push({ s: tr.s, p: tr.p, o: tr.o, g: null });
1392
- }
1393
-
1394
- return { quads, prefixes: this.prefixes };
1395
- }
1396
- }
1397
-
1398
- // ---------------------------------------------------------------------------
1399
- // Serializers (Turtle-ish / TriG-ish / N3-ish)
1400
- // ---------------------------------------------------------------------------
1401
-
1402
- /**
1403
- * Render a Turtle/N3 literal token string, shrinking any datatype IRIRef (^^<...>)
1404
- * to a prefixed name if possible, e.g. ^^<http://www.w3.org/2001/XMLSchema#date> -> ^^xsd:date
1405
- * when an appropriate prefix is in scope.
1406
- *
1407
- * Note: this keeps the original lexical spelling and only rewrites the datatype IRIRef.
1408
- */
1409
- function literalToText(raw, prefixes) {
1410
- if (!raw || typeof raw !== 'string') return String(raw);
1411
-
1412
- // Typed literal with datatype as IRIREF.
1413
- // Example: "2021-07-07"^^<http://www.w3.org/2001/XMLSchema#date>
1414
- // We only rewrite the datatype part.
1415
- const m = raw.match(/\^\^<([^>]+)>/);
1416
- if (!m) return raw;
1417
-
1418
- const dtIri = m[1];
1419
- const qn = prefixes ? prefixes.shrinkIri(dtIri) : null;
1420
- if (!qn) return raw;
1421
-
1422
- // Replace only the first occurrence.
1423
- return raw.replace(`^^<${dtIri}>`, `^^${qn}`);
1424
- }
1425
-
1426
- function termToText(t, prefixes, skolemMap) {
1427
- if (t == null) return '[]';
1428
- if (t instanceof Iri) {
1429
- if (t.value === RDF_NS + 'type') return 'a';
1430
- const qn = prefixes ? prefixes.shrinkIri(t.value) : null;
1431
- return qn || `<${t.value}>`;
1432
- }
1433
- if (t instanceof Blank) {
1434
- if (skolemMap && skolemMap.has(t.label)) return skolemMap.get(t.label);
1435
- return t.label;
1436
- }
1437
- if (t instanceof Literal) return literalToText(t.value, prefixes);
1438
- if (t instanceof Var) return `?${t.name}`;
1439
- if (t instanceof ListTerm) return `(${t.elems.map((x) => termToText(x, prefixes, skolemMap)).join(' ')})`;
1440
- if (t instanceof OpenListTerm)
1441
- return `(${t.prefix.map((x) => termToText(x, prefixes, skolemMap)).join(' ')} ... ?${t.tailVar})`;
1442
- if (t instanceof GraphTerm) {
1443
- const inner = t.triples
1444
- .map(
1445
- (tr) =>
1446
- `${termToText(tr.s, prefixes, skolemMap)} ${termToText(tr.p, prefixes, skolemMap)} ${termToText(tr.o, prefixes, skolemMap)} .`,
1447
- )
1448
- .join(' ');
1449
- return `{ ${inner} }`;
1450
- }
1451
- return String(t);
1452
- }
1453
-
1454
- // ---------------------------------------------------------------------------
1455
- // Skolemize blank nodes that would otherwise "split" across quoted graph terms.
1456
- //
1457
- // In N3, blank nodes inside { ... } are existentially scoped to that formula,
1458
- // so reusing the same _:id outside does NOT imply coreference.
1459
- // For RDF 1.2 triple terms we serialize as { s p o . }, we optionally replace
1460
- // any blank node that appears both inside a quoted graph term AND outside it
1461
- // with a stable IRI constant (<urn:skolem:...>) to preserve identity.
1462
- // ---------------------------------------------------------------------------
1463
-
1464
- function buildSkolemMapForBnodesThatCrossScopes(triples) {
1465
- // In RDF (incl. RDF 1.2 triple terms and TriG datasets), blank nodes can be
1466
- // shared across different “scopes” in the concrete syntax (e.g., between the
1467
- // default graph and named graphs, or between multiple named graphs, or between
1468
- // asserted triples and triple terms). In N3, blank nodes inside quoted graph
1469
- // terms (`{ ... }`) do NOT automatically corefer with blank nodes outside, or
1470
- // in other quoted graph terms.
1471
- //
1472
- // To preserve coreference, we Skolemize blank nodes that appear in more than
1473
- // one scope:
1474
- // - OUT: outside any GraphTerm
1475
- // - Gk: inside the k-th encountered GraphTerm (each GraphTerm gets its own)
1476
- //
1477
- // Each such blank node label is replaced by a minted IRI in the skolem: namespace (see SKOLEM_PREFIX_IRI).
1478
- const scopesByLbl = new Map();
1479
- let graphTermId = 0;
1480
-
1481
- function add(lbl, scope) {
1482
- if (!scopesByLbl.has(lbl)) scopesByLbl.set(lbl, new Set());
1483
- scopesByLbl.get(lbl).add(scope);
1484
- }
1485
-
1486
- function visitTerm(t, scope) {
1487
- if (!t) return;
1488
- if (t instanceof Blank) {
1489
- add(t.label, scope);
1490
- return;
1491
- }
1492
- if (t instanceof ListTerm) {
1493
- for (const e of t.elems) visitTerm(e, scope);
1494
- return;
1495
- }
1496
- if (t instanceof OpenListTerm) {
1497
- for (const e of t.prefix) visitTerm(e, scope);
1498
- return;
1499
- }
1500
- if (t instanceof GraphTerm) {
1501
- const innerScope = `G${graphTermId++}`;
1502
- for (const tr of t.triples) {
1503
- visitTerm(tr.s, innerScope);
1504
- visitTerm(tr.p, innerScope);
1505
- visitTerm(tr.o, innerScope);
1506
- }
1507
- return;
1508
- }
1509
- }
1510
-
1511
- for (const tr of triples) {
1512
- visitTerm(tr.s, 'OUT');
1513
- visitTerm(tr.p, 'OUT');
1514
- visitTerm(tr.o, 'OUT');
1515
- }
1516
-
1517
- const skolemMap = new Map();
1518
- for (const [lbl, scopes] of scopesByLbl.entries()) {
1519
- if (scopes.size <= 1) continue;
1520
-
1521
- const id = lbl.startsWith('_:') ? lbl.slice(2) : lbl;
1522
- const local = pnLocalSafe(id);
1523
- skolemMap.set(lbl, `${SKOLEM_PREFIX}:${local}`);
1524
- }
1525
- return skolemMap;
1526
- }
1527
-
1528
- // ---------------------------------------------------------------------------
1529
- // RDF list (rdf:first/rest) folding
1530
- //
1531
- // Some producers expand Turtle/N3 list syntax into explicit RDF collection
1532
- // triples. When writing N3/Turtle, it is useful to fold those back into
1533
- // ListTerm so the output matches common Turtle/N3 expectations.
1534
- //
1535
- // We fold only “plain” lists where each list node has exactly one rdf:first and
1536
- // one rdf:rest triple, and no other outgoing triples. Intermediate nodes must
1537
- // not be referenced from outside the list chain. This keeps the transformation
1538
- // semantics-preserving.
1539
- // ---------------------------------------------------------------------------
1540
-
1541
- function termKey(t) {
1542
- if (t == null) return 'N:null';
1543
- if (t instanceof Iri) return `I:${t.value}`;
1544
- if (t instanceof Blank) return `B:${t.label}`;
1545
- if (t instanceof Literal) return `L:${t.value}`;
1546
- if (t instanceof Var) return `V:${t.name}`;
1547
- if (t instanceof ListTerm) return `T:(` + t.elems.map(termKey).join(' ') + `)`;
1548
- if (t instanceof OpenListTerm) return `T:(` + t.prefix.map(termKey).join(' ') + ` ... ?${t.tailVar})`;
1549
- if (t instanceof GraphTerm)
1550
- return `G:{` + t.triples.map((tr) => `${termKey(tr.s)} ${termKey(tr.p)} ${termKey(tr.o)}`).join(' ; ') + `}`;
1551
- return `X:${String(t)}`;
1552
- }
1553
-
1554
- function foldRdfLists(triples) {
1555
- const rdfFirst = RDF_NS + 'first';
1556
- const rdfRest = RDF_NS + 'rest';
1557
- const rdfNil = RDF_NS + 'nil';
1558
-
1559
- const outBySubj = new Map(); // key -> { term, idxs: number[] }
1560
- const incoming = new Map(); // key -> total incoming as object
1561
- const incomingRest = new Map(); // key -> incoming via rdf:rest
1562
-
1563
- function addIncoming(objKey, viaRest) {
1564
- incoming.set(objKey, (incoming.get(objKey) || 0) + 1);
1565
- if (viaRest) incomingRest.set(objKey, (incomingRest.get(objKey) || 0) + 1);
1566
- }
1567
-
1568
- for (let i = 0; i < triples.length; i++) {
1569
- const tr = triples[i];
1570
- const sKey = termKey(tr.s);
1571
- if (!outBySubj.has(sKey)) outBySubj.set(sKey, { term: tr.s, idxs: [] });
1572
- outBySubj.get(sKey).idxs.push(i);
1573
-
1574
- const oKey = termKey(tr.o);
1575
- const viaRest = isIri(tr.p, rdfRest);
1576
- addIncoming(oKey, viaRest);
1577
- }
1578
-
1579
- function outgoingTriplesOf(key) {
1580
- const rec = outBySubj.get(key);
1581
- if (!rec) return [];
1582
- return rec.idxs.map((idx) => ({ idx, tr: triples[idx] }));
1583
- }
1584
-
1585
- // Identify candidate list heads: blank nodes with exactly one rdf:first and one rdf:rest.
1586
- //
1587
- // NOTE: This converter currently writes one triple per line (it does not group by subject).
1588
- // In Turtle/N3, repeating a collection term ( ... ) across multiple triples would mint
1589
- // a fresh list each time. To remain semantics-preserving, we only fold “annotated” list
1590
- // heads (i.e., heads with extra outgoing predicates) when:
1591
- // - the head is not referenced as an object elsewhere, and
1592
- // - there is at most one extra outgoing triple.
1593
- const listMap = new Map(); // headKey -> { listTerm, removeIdxs:Set<number>, chainKeys:string[] }
1594
-
1595
- for (const [sKey, rec] of outBySubj.entries()) {
1596
- if (!(rec.term instanceof Blank)) continue;
1597
-
1598
- const outs = outgoingTriplesOf(sKey);
1599
- const firsts = outs.filter((x) => isIri(x.tr.p, rdfFirst));
1600
- const rests = outs.filter((x) => isIri(x.tr.p, rdfRest));
1601
- if (firsts.length !== 1 || rests.length !== 1) continue;
1602
-
1603
- const extras = outs.filter((x) => !(isIri(x.tr.p, rdfFirst) || isIri(x.tr.p, rdfRest)));
1604
- const incHead = incoming.get(sKey) || 0;
1605
- const incHeadRest = incomingRest.get(sKey) || 0;
1606
-
1607
- // Head sharing safety: if the head node is referenced multiple times,
1608
- // folding would duplicate the list (not semantics-preserving).
1609
- if (incHead > 1) continue;
1610
-
1611
- if (extras.length > 0) {
1612
- if (incHead !== 0 || incHeadRest !== 0) continue;
1613
- if (extras.length > 1) continue;
1614
- }
1615
-
1616
- // Walk the rdf:rest chain.
1617
- const elems = [];
1618
- const removeIdxs = new Set();
1619
- const chainKeys = [];
1620
- const seen = new Set();
1621
- const headKey = sKey;
1622
- let curKey = sKey;
1623
- let isOk = true;
1624
-
1625
- while (true) {
1626
- if (seen.has(curKey)) {
1627
- isOk = false;
1628
- break;
1629
- }
1630
- seen.add(curKey);
1631
- chainKeys.push(curKey);
1632
-
1633
- const outs2 = outgoingTriplesOf(curKey);
1634
- const f2 = outs2.filter((x) => isIri(x.tr.p, rdfFirst));
1635
- const r2 = outs2.filter((x) => isIri(x.tr.p, rdfRest));
1636
- if (f2.length !== 1 || r2.length !== 1) {
1637
- isOk = false;
1638
- break;
1639
- }
1640
-
1641
- // Only the head is allowed to have extra outgoing predicates.
1642
- if (curKey !== headKey && outs2.length !== 2) {
1643
- isOk = false;
1644
- break;
1645
- }
1646
-
1647
- elems.push(f2[0].tr.o);
1648
- removeIdxs.add(f2[0].idx);
1649
- removeIdxs.add(r2[0].idx);
1650
-
1651
- const next = r2[0].tr.o;
1652
- if (next instanceof Iri && next.value === rdfNil) break;
1653
- if (!(next instanceof Blank)) {
1654
- isOk = false;
1655
- break;
1656
- }
1657
-
1658
- const nextKey = termKey(next);
1659
-
1660
- // Intermediate node safety: only referenced via rdf:rest and exactly once.
1661
- const inc = incoming.get(nextKey) || 0;
1662
- const incR = incomingRest.get(nextKey) || 0;
1663
- if (inc !== incR || incR !== 1) {
1664
- isOk = false;
1665
- break;
1666
- }
1667
-
1668
- curKey = nextKey;
1669
- }
1670
-
1671
- if (!isOk) continue;
1672
-
1673
- listMap.set(headKey, { listTerm: new ListTerm(elems), removeIdxs, chainKeys });
1674
- }
1675
-
1676
- if (listMap.size === 0) return triples;
1677
-
1678
- // Prevent double folding: intermediate nodes in a folded chain should not also be heads.
1679
- const intermediate = new Set();
1680
- for (const v of listMap.values()) {
1681
- for (let i = 1; i < v.chainKeys.length; i++) intermediate.add(v.chainKeys[i]);
1682
- }
1683
- for (const k of intermediate) {
1684
- if (listMap.has(k)) listMap.delete(k);
1685
- }
1686
- if (listMap.size === 0) return triples;
1687
-
1688
- // Build set of triple indices to remove (rdf:first/rest only).
1689
- const removeAll = new Set();
1690
- for (const v of listMap.values()) for (const idx of v.removeIdxs) removeAll.add(idx);
1691
-
1692
- // Replace list-head blank nodes with ListTerm *recursively* so nested collections fold too.
1693
- function replaceTerm(t) {
1694
- if (t == null) return t;
1695
-
1696
- if (t instanceof Blank) {
1697
- const m = listMap.get(termKey(t));
1698
- if (m) return replaceTerm(m.listTerm);
1699
- return t;
1700
- }
1701
- if (t instanceof ListTerm) {
1702
- return new ListTerm(t.elems.map((x) => replaceTerm(x)));
1703
- }
1704
- if (t instanceof OpenListTerm) {
1705
- return new OpenListTerm(
1706
- t.prefix.map((x) => replaceTerm(x)),
1707
- t.tailVar,
1708
- );
1709
- }
1710
- if (t instanceof GraphTerm) {
1711
- const inner = t.triples.map((tr) => new Triple(replaceTerm(tr.s), replaceTerm(tr.p), replaceTerm(tr.o)));
1712
- return new GraphTerm(inner);
1713
- }
1714
- return t;
1715
- }
1716
-
1717
- const newTriples = [];
1718
- for (let i = 0; i < triples.length; i++) {
1719
- if (removeAll.has(i)) continue;
1720
- const tr = triples[i];
1721
- newTriples.push(new Triple(replaceTerm(tr.s), replaceTerm(tr.p), replaceTerm(tr.o)));
1722
- }
1723
-
1724
- return newTriples;
1725
- }
1726
-
1727
- function pruneUnusedPrefixes(prefixes, triples) {
1728
- if (!prefixes || !prefixes.map) return prefixes;
1729
-
1730
- const used = new Set();
1731
-
1732
- function visitTerm(t) {
1733
- if (!t) return;
1734
- if (t instanceof Iri) {
1735
- if (t.value === RDF_NS + 'type') return; // written as 'a'
1736
- const qn = prefixes.shrinkIri(t.value);
1737
- if (!qn) return;
1738
- const idx = qn.indexOf(':');
1739
- const pfx = idx === 0 ? '' : qn.slice(0, idx);
1740
- used.add(pfx);
1741
- return;
1742
- }
1743
-
1744
- if (t instanceof Literal) {
1745
- // A typed literal may reference a QName in its datatype, e.g. "2021-07-07"^^xsd:date.
1746
- // Our Literal stores the full lexical token, so we conservatively scan for ^^prefix:local.
1747
- const re = /\^\^([A-Za-z_][A-Za-z0-9_.-]*|):[A-Za-z_][A-Za-z0-9_.-]*/g;
1748
- for (const m of t.value.matchAll(re)) {
1749
- const pfx = m[1] || '';
1750
- used.add(pfx);
1751
- }
1752
- return;
1753
- }
1754
-
1755
- if (t instanceof ListTerm) {
1756
- for (const e of t.elems) visitTerm(e);
1757
- return;
1758
- }
1759
- if (t instanceof GraphTerm) {
1760
- for (const tr of t.triples) {
1761
- visitTerm(tr.s);
1762
- visitTerm(tr.p);
1763
- visitTerm(tr.o);
1764
- }
1765
- }
1766
- }
1767
-
1768
- for (const tr of triples) {
1769
- visitTerm(tr.s);
1770
- visitTerm(tr.p);
1771
- visitTerm(tr.o);
1772
- }
1773
-
1774
- const newMap = {};
1775
- for (const pfx of used) {
1776
- if (Object.prototype.hasOwnProperty.call(prefixes.map, pfx)) newMap[pfx] = prefixes.map[pfx];
1777
- }
1778
-
1779
- return new PrefixEnv(newMap, prefixes.baseIri);
1780
- }
1781
-
1782
- function isIri(t, iri) {
1783
- return t instanceof Iri && t.value === iri;
1784
- }
1785
-
1786
- function renderPrefixPrologue(prefixes) {
1787
- const out = [];
1788
-
1789
- if (prefixes && prefixes.baseIri) out.push(`@base <${prefixes.baseIri}> .`);
1790
-
1791
- if (prefixes && prefixes.map) {
1792
- for (const [pfx, iri] of Object.entries(prefixes.map)) {
1793
- if (!iri) continue;
1794
- const label = pfx === '' ? ':' : `${pfx}:`;
1795
- out.push(`@prefix ${label} <${iri}> .`);
1796
- }
1797
- }
1798
- return out.join('\n');
1799
- }
1800
-
1801
- function ensureSkolemPrefix(prefixes, skolemMap) {
1802
- if (!skolemMap || skolemMap.size === 0) return prefixes;
1803
-
1804
- // Make skolem: prefix IRI deterministic.
1805
- //
1806
- // Preferred: initSkolemForInput(text) sets SKOLEM_UUID / SKOLEM_PREFIX_IRI
1807
- // deterministically from the input text (CLI and turtleToN3/trigToN3).
1808
- //
1809
- // Fallback (library/advanced usage): derive a stable UUID from the *set of
1810
- // blank-node labels that actually require skolemization* (plus @base if any).
1811
- // This removes the last source of non-determinism (crypto.randomUUID()).
1812
- if (!SKOLEM_UUID) {
1813
- const base = prefixes ? prefixes.baseIri || '' : '';
1814
- const labels = [...skolemMap.keys()].sort().join('\n');
1815
- const seed = ['n3gen-skolem', SKOLEM_ROOT, base, labels, ''].join('\n');
1816
- const uuid = deterministicUuidFromText(seed);
1817
- SKOLEM_PREFIX_IRI = `${SKOLEM_ROOT}${uuid}#`;
1818
- } else if (!SKOLEM_PREFIX_IRI) {
1819
- SKOLEM_PREFIX_IRI = `${SKOLEM_ROOT}${SKOLEM_UUID}#`;
1820
- }
1821
-
1822
- const baseMap = prefixes && prefixes.map ? prefixes.map : {};
1823
- const newMap = { ...baseMap, [SKOLEM_PREFIX]: SKOLEM_PREFIX_IRI };
1824
- const baseIri = prefixes ? prefixes.baseIri : '';
1825
- return new PrefixEnv(newMap, baseIri);
1826
- }
1827
-
1828
- // Ensure log: prefix is available whenever we emit log:nameOf (or any other log:* IRI).
1829
- function usesLogNamespace(triples) {
1830
- let used = false;
1831
-
1832
- function visitTerm(t) {
1833
- if (!t || used) return;
1834
-
1835
- if (t instanceof Iri) {
1836
- if (t.value.startsWith(LOG_NS)) used = true;
1837
- return;
1838
- }
1839
-
1840
- if (t instanceof Literal) {
1841
- // Detect log: use in typed literal tokens, or explicit IRI datatypes in LOG_NS.
1842
- if (t.value.includes('^^log:') || t.value.includes(`^^<${LOG_NS}`)) used = true;
1843
- return;
1844
- }
1845
-
1846
- if (t instanceof ListTerm) {
1847
- for (const e of t.elems) visitTerm(e);
1848
- return;
1849
- }
1850
-
1851
- if (t instanceof GraphTerm) {
1852
- for (const tr of t.triples) {
1853
- visitTerm(tr.s);
1854
- visitTerm(tr.p);
1855
- visitTerm(tr.o);
1856
- }
1857
- }
1858
- }
1859
-
1860
- for (const tr of triples || []) {
1861
- // triples may be instances of Triple or plain objects with {s,p,o}
1862
- visitTerm(tr.s);
1863
- visitTerm(tr.p);
1864
- visitTerm(tr.o);
1865
- if (used) break;
1866
- }
1867
- return used;
1868
- }
1869
-
1870
- function ensureLogPrefixIfUsed(prefixes, triples) {
1871
- if (!usesLogNamespace(triples)) return prefixes;
1872
-
1873
- const baseMap = prefixes && prefixes.map ? prefixes.map : {};
1874
- const newMap = { ...baseMap, log: LOG_NS }; // overwrite any existing log: mapping
1875
- const baseIri = prefixes ? prefixes.baseIri : '';
1876
- return new PrefixEnv(newMap, baseIri);
1877
- }
1878
-
1879
- function usesRdfNamespace(triples) {
1880
- let used = false;
1881
-
1882
- function visitTerm(t) {
1883
- if (!t || used) return;
1884
-
1885
- if (t instanceof Iri) {
1886
- // rdf:type is rendered as 'a', so it doesn't require declaring rdf:
1887
- if (t.value.startsWith(RDF_NS) && t.value !== RDF_NS + 'type') used = true;
1888
- return;
1889
- }
1890
-
1891
- if (t instanceof Literal) {
1892
- // Conservative: detect rdf: appearing in a datatype token, e.g. ^^rdf:langString or ^^<...rdf-syntax-ns#...>
1893
- if (t.value.includes('^^rdf:') || t.value.includes(`^^<${RDF_NS}`)) used = true;
1894
- return;
1895
- }
1896
-
1897
- if (t instanceof ListTerm) {
1898
- for (const e of t.elems) visitTerm(e);
1899
- return;
1900
- }
1901
-
1902
- if (t instanceof GraphTerm) {
1903
- for (const tr of t.triples) {
1904
- visitTerm(tr.s);
1905
- visitTerm(tr.p);
1906
- visitTerm(tr.o);
1907
- }
1908
- }
1909
- }
1910
-
1911
- for (const tr of triples || []) {
1912
- visitTerm(tr.s);
1913
- visitTerm(tr.p);
1914
- visitTerm(tr.o);
1915
- if (used) break;
1916
- }
1917
- return used;
1918
- }
1919
-
1920
- function ensureRdfPrefixIfUsed(prefixes, triples) {
1921
- if (!usesRdfNamespace(triples)) return prefixes;
1922
-
1923
- // If rdf: is already declared, keep it as-is; otherwise add it.
1924
- const baseMap = prefixes && prefixes.map ? prefixes.map : {};
1925
- if (Object.prototype.hasOwnProperty.call(baseMap, 'rdf')) return prefixes;
1926
-
1927
- const newMap = { ...baseMap, rdf: RDF_NS };
1928
- const baseIri = prefixes ? prefixes.baseIri : '';
1929
- return new PrefixEnv(newMap, baseIri);
1930
- }
1931
-
1932
- function usesXsdPrefix(triples) {
1933
- let used = false;
1934
-
1935
- function visitTerm(t) {
1936
- if (!t || used) return;
1937
-
1938
- if (t instanceof Iri) {
1939
- // If an XSD namespace IRI is printed (rare, but possible), xsd: prefix is required.
1940
- if (t.value.startsWith(XSD_NS)) used = true;
1941
- return;
1942
- }
1943
-
1944
- if (t instanceof Literal) {
1945
- // Detect xsd: use in typed literal tokens, e.g. "2021-07-07"^^xsd:date.
1946
- // Also detect explicit IRI datatypes in XSD namespace.
1947
- if (t.value.includes('^^xsd:') || t.value.includes(`^^<${XSD_NS}`)) used = true;
1948
- return;
1949
- }
1950
-
1951
- if (t instanceof ListTerm) {
1952
- for (const e of t.elems) visitTerm(e);
1953
- return;
1954
- }
1955
-
1956
- if (t instanceof GraphTerm) {
1957
- for (const tr of t.triples) {
1958
- visitTerm(tr.s);
1959
- visitTerm(tr.p);
1960
- visitTerm(tr.o);
1961
- }
1962
- return;
1963
- }
1964
- }
1965
-
1966
- for (const tr of triples || []) {
1967
- visitTerm(tr.s);
1968
- visitTerm(tr.p);
1969
- visitTerm(tr.o);
1970
- if (used) break;
1971
- }
1972
- return used;
1973
- }
1974
-
1975
- function ensureXsdPrefixIfUsed(prefixes, triples) {
1976
- if (!usesXsdPrefix(triples)) return prefixes;
1977
-
1978
- // If xsd: is already declared, keep it as-is; otherwise add it.
1979
- const baseMap = prefixes && prefixes.map ? prefixes.map : {};
1980
- if (Object.prototype.hasOwnProperty.call(baseMap, 'xsd')) return prefixes;
1981
-
1982
- const newMap = { ...baseMap, xsd: XSD_NS };
1983
- const baseIri = prefixes ? prefixes.baseIri : '';
1984
- return new PrefixEnv(newMap, baseIri);
1985
- }
1986
-
1987
- function groupQuadsByGraph(quads) {
1988
- const m = new Map(); // key -> { gTerm, triples: Triple[] }
1989
- function keyOfGraph(g) {
1990
- if (g == null) return 'DEFAULT';
1991
- if (g instanceof Iri) return `I:${g.value}`;
1992
- if (g instanceof Blank) return `B:${g.label}`;
1993
- return `X:${String(g)}`;
1994
- }
1995
- for (const q of quads) {
1996
- const k = keyOfGraph(q.g);
1997
- if (!m.has(k)) m.set(k, { gTerm: q.g, triples: [] });
1998
- m.get(k).triples.push(new Triple(q.s, q.p, q.o));
1999
- }
2000
- return m;
2001
- }
2002
-
2003
- function writeN3LogNameOf({ datasetQuads, prefixes }) {
2004
- const blocks = [];
2005
- const grouped = groupQuadsByGraph(datasetQuads);
2006
-
2007
- // For prefix pruning + Skolemization we build a synthetic triple stream that
2008
- // matches the *output* structure:
2009
- // - default graph triples are “outside” any GraphTerm
2010
- // - each named graph is wrapped as: gTerm log:nameOf { ... }
2011
- // This allows us to detect blank nodes that must corefer across graphs.
2012
- const pseudoTriplesForUse = [];
2013
- const logNameOfIri = new Iri(log.nameOf);
2014
-
2015
- if (grouped.has('DEFAULT')) {
2016
- const { triples } = grouped.get('DEFAULT');
2017
- pseudoTriplesForUse.push(...foldRdfLists(triples));
2018
- }
2019
-
2020
- for (const [k, { gTerm, triples }] of grouped.entries()) {
2021
- if (k === 'DEFAULT') continue;
2022
- const folded = foldRdfLists(triples);
2023
- pseudoTriplesForUse.push({ s: gTerm, p: logNameOfIri, o: new GraphTerm(folded) });
2024
- }
2025
-
2026
- const prunedPrefixes = pruneUnusedPrefixes(prefixes, pseudoTriplesForUse);
2027
- const skolemMap = buildSkolemMapForBnodesThatCrossScopes(pseudoTriplesForUse);
2028
- const outPrefixes = ensureRdfPrefixIfUsed(
2029
- ensureXsdPrefixIfUsed(
2030
- ensureLogPrefixIfUsed(ensureSkolemPrefix(prunedPrefixes, skolemMap), pseudoTriplesForUse),
2031
- pseudoTriplesForUse,
2032
- ),
2033
- pseudoTriplesForUse,
2034
- );
2035
- const pro = renderPrefixPrologue(outPrefixes).trim();
2036
- if (pro) blocks.push(pro, '');
2037
-
2038
- // default graph: emit triples at top-level (no log:nameOf wrapper)
2039
- if (grouped.has('DEFAULT')) {
2040
- const { triples } = grouped.get('DEFAULT');
2041
- const folded = foldRdfLists(triples);
2042
- for (const tr of folded) {
2043
- blocks.push(
2044
- `${termToText(tr.s, outPrefixes, skolemMap)} ${termToText(tr.p, outPrefixes, skolemMap)} ${termToText(tr.o, outPrefixes, skolemMap)} .`,
2045
- );
2046
- }
2047
- blocks.push('');
2048
- }
2049
-
2050
- const named = [...grouped.entries()].filter(([k]) => k !== 'DEFAULT');
2051
- named.sort((a, b) => a[0].localeCompare(b[0]));
2052
- for (const [, { gTerm, triples }] of named) {
2053
- blocks.push(`${termToText(gTerm, outPrefixes, skolemMap)} log:nameOf {`);
2054
- const folded = foldRdfLists(triples);
2055
- if (folded.length) {
2056
- blocks.push(
2057
- folded
2058
- .map(
2059
- (tr) =>
2060
- ` ${termToText(tr.s, outPrefixes, skolemMap)} ${termToText(tr.p, outPrefixes, skolemMap)} ${termToText(tr.o, outPrefixes, skolemMap)} .`,
2061
- )
2062
- .join('\n'),
2063
- );
2064
- }
2065
- blocks.push('} .', '');
2066
- }
2067
-
2068
- return blocks.join('\n').trim() + '\n';
2069
- }
2070
-
2071
- // ---------------------------------------------------------------------------
2072
- // Parsing + N3 output (Turtle/TriG -> N3)
2073
- // ---------------------------------------------------------------------------
2074
-
2075
- function parseTriG(text) {
2076
- const p = new TriGParser(lex(text));
2077
- return p.parseTrigDocument();
2078
- }
2079
-
2080
- function parseTurtle(text) {
2081
- const p = new TurtleParser(lex(text));
2082
- return p.parseTurtleDocument();
2083
- }
2084
-
2085
- function writeN3Triples({ triples, prefixes }) {
2086
- const foldedTriples = foldRdfLists(triples);
2087
- const prunedPrefixes = pruneUnusedPrefixes(prefixes, foldedTriples);
2088
- const skolemMap = buildSkolemMapForBnodesThatCrossScopes(foldedTriples);
2089
- const outPrefixes = ensureRdfPrefixIfUsed(
2090
- ensureXsdPrefixIfUsed(
2091
- ensureLogPrefixIfUsed(ensureSkolemPrefix(prunedPrefixes, skolemMap), foldedTriples),
2092
- foldedTriples,
2093
- ),
2094
- foldedTriples,
2095
- );
2096
- const blocks = [];
2097
- const pro = renderPrefixPrologue(outPrefixes).trim();
2098
- if (pro) blocks.push(pro, '');
2099
- for (const tr of foldedTriples) {
2100
- blocks.push(
2101
- `${termToText(tr.s, outPrefixes, skolemMap)} ${termToText(tr.p, outPrefixes, skolemMap)} ${termToText(tr.o, outPrefixes, skolemMap)} .`,
2102
- );
2103
- }
2104
- return blocks.join('\n').trim() + '\n';
2105
- }
2106
-
2107
- function turtleToN3(ttlText) {
2108
- // Ensure deterministic per-input Skolem prefix IRI even when used as a library.
2109
- initSkolemForInput(ttlText);
2110
- const { triples, prefixes } = parseTurtle(ttlText);
2111
- return writeN3Triples({ triples, prefixes });
2112
- }
2113
-
2114
- function trigToN3(trigText) {
2115
- // Ensure deterministic per-input Skolem prefix IRI even when used as a library.
2116
- initSkolemForInput(trigText);
2117
- const { quads, prefixes } = parseTriG(trigText);
2118
- return writeN3LogNameOf({ datasetQuads: quads, prefixes });
2119
- }
2120
-
2121
- function printHelp() {
2122
- process.stdout.write(`Usage:
2123
- n3gen <file.ttl|file.trig>
2124
-
2125
- Converts RDF 1.2 Turtle (.ttl) or TriG (.trig) to Notation 3 (.n3) and writes to stdout.
2126
-
2127
- Examples:
2128
- n3gen file.ttl > file.n3
2129
- n3gen file.trig > file.n3
2130
- `);
2131
- }
2132
-
2133
- async function main() {
2134
- const args = process.argv.slice(2);
2135
- if (args.length === 0 || args.includes('--help') || args.includes('-h')) {
2136
- printHelp();
2137
- if (args.length === 0) process.exitCode = 2;
2138
- return;
2139
- }
2140
- if (args.length !== 1) {
2141
- printHelp();
2142
- process.exitCode = 2;
2143
- return;
2144
- }
2145
-
2146
- const inputFile = args[0];
2147
- const ext = path.extname(inputFile).toLowerCase();
2148
-
2149
- const text = await fs.readFile(inputFile, 'utf8');
2150
-
2151
- if (ext === '.ttl') {
2152
- process.stdout.write(turtleToN3(text));
2153
- return;
2154
- }
2155
- if (ext === '.trig') {
2156
- process.stdout.write(trigToN3(text));
2157
- return;
2158
- }
2159
-
2160
- throw new Error(`Unsupported file extension "${ext}". Use .ttl or .trig`);
2161
- }
2162
-
2163
- main().catch((e) => {
2164
- console.error(e?.stack || String(e));
2165
- process.exitCode = 1;
2166
- });