eyeling 1.24.3 → 1.24.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HANDBOOK.md +2 -2
- package/dist/browser/eyeling.browser.js +1 -1
- package/eyeling.js +1 -1
- package/lib/lexer.js +1 -1
- package/package.json +2 -3
- package/see/README.md +1 -1
- package/see/examples/input/path_discovery.trig +1 -1
- package/see/see.js +1 -1
- package/test/n3gen.test.js +0 -166
- package/tools/n3gen.js +0 -2166
package/tools/n3gen.js
DELETED
|
@@ -1,2166 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
'use strict';
|
|
3
|
-
|
|
4
|
-
/*
|
|
5
|
-
* n3gen.js — Convert Turtle (.ttl) or TriG (.trig) to N3.
|
|
6
|
-
*
|
|
7
|
-
* This tool always emits N3 to stdout. The input syntax is selected by the file
|
|
8
|
-
* extension:
|
|
9
|
-
* - .ttl (RDF 1.2 Turtle)
|
|
10
|
-
* - .trig (RDF 1.2 TriG)
|
|
11
|
-
* *
|
|
12
|
-
* TriG → N3 mapping (named graphs)
|
|
13
|
-
* TriG: <graphName> { ...triples... }
|
|
14
|
-
* N3: <graphName> log:nameOf { ...triples... } .
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* RDF 1.2 Turtle-star / TriG-star
|
|
18
|
-
* - triple terms: log:nameOf <<( s p o )>>
|
|
19
|
-
* - sugar form: << s p o >> :is true .
|
|
20
|
-
* triple terms are emitted as singleton graph terms in N3:
|
|
21
|
-
* log:nameOf { s p o . } .
|
|
22
|
-
*
|
|
23
|
-
* ----------------------------------------------------------------------------
|
|
24
|
-
* Usage
|
|
25
|
-
* n3gen file.ttl > file.n3
|
|
26
|
-
* n3gen file.trig > file.n3
|
|
27
|
-
*/
|
|
28
|
-
|
|
29
|
-
const fs = require('node:fs/promises');
|
|
30
|
-
const path = require('node:path');
|
|
31
|
-
const process = require('node:process');
|
|
32
|
-
|
|
33
|
-
const crypto = require('node:crypto');
|
|
34
|
-
|
|
35
|
-
function stripIriRef(s) {
|
|
36
|
-
// Allow passing an IRIREF like <...>
|
|
37
|
-
if (typeof s !== 'string') return '';
|
|
38
|
-
s = s.trim();
|
|
39
|
-
if (s.startsWith('<') && s.endsWith('>')) return s.slice(1, -1);
|
|
40
|
-
return s;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
function normalizeSkolemRoot(root) {
|
|
44
|
-
root = stripIriRef(root);
|
|
45
|
-
if (!root) return '';
|
|
46
|
-
// Ensure it ends with '/.well-known/genid/' OR at least with '/'
|
|
47
|
-
if (!root.endsWith('/')) root += '/';
|
|
48
|
-
return root;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// Skolemization (Option C)
|
|
52
|
-
//
|
|
53
|
-
// We mint recognizable Skolem IRIs using a stable, per-input UUID:
|
|
54
|
-
//
|
|
55
|
-
// @prefix skolem: <https://eyereasoner.github.io/.well-known/genid/UUID#>.
|
|
56
|
-
//
|
|
57
|
-
// and then replace cross-scope blank nodes with IRIs like: skolem:e38
|
|
58
|
-
//
|
|
59
|
-
// The UUID is deterministic from the *input file content* (SHA-256 based).
|
|
60
|
-
const SKOLEM_PREFIX = 'skolem';
|
|
61
|
-
const DEFAULT_SKOLEM_ROOT = 'https://eyereasoner.github.io/.well-known/genid/';
|
|
62
|
-
const SKOLEM_ROOT = normalizeSkolemRoot(process.env.SKOLEM_ROOT) || DEFAULT_SKOLEM_ROOT;
|
|
63
|
-
|
|
64
|
-
let SKOLEM_UUID = null; // e.g., '3f2504e0-4f89-5d3a-9a0c-0305e82c3301'
|
|
65
|
-
let SKOLEM_PREFIX_IRI = null; // e.g., 'https://.../.well-known/genid/<UUID>#'
|
|
66
|
-
|
|
67
|
-
function deterministicUuidFromText(inputText) {
|
|
68
|
-
const h = crypto.createHash('sha256').update(inputText, 'utf8').digest();
|
|
69
|
-
const b = Buffer.from(h.subarray(0, 16));
|
|
70
|
-
|
|
71
|
-
// Set version (5) and variant (RFC 4122) bits to make it look like a UUID.
|
|
72
|
-
b[6] = (b[6] & 0x0f) | 0x50;
|
|
73
|
-
b[8] = (b[8] & 0x3f) | 0x80;
|
|
74
|
-
|
|
75
|
-
const hex = b.toString('hex');
|
|
76
|
-
return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
function initSkolemForInput(inputText) {
|
|
80
|
-
SKOLEM_UUID = deterministicUuidFromText(inputText);
|
|
81
|
-
SKOLEM_PREFIX_IRI = `${SKOLEM_ROOT}${SKOLEM_UUID}#`;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
function pnLocalSafe(s) {
|
|
85
|
-
// Turtle PN_LOCAL allows percent escapes (PLX). We make sure all "special"
|
|
86
|
-
// encodeURIComponent survivors are percent-escaped too.
|
|
87
|
-
return encodeURIComponent(s).replace(/[!'()*]/g, (c) => '%' + c.charCodeAt(0).toString(16).toUpperCase());
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// ---------------------------------------------------------------------------
|
|
91
|
-
// Mapping namespace
|
|
92
|
-
// ---------------------------------------------------------------------------
|
|
93
|
-
|
|
94
|
-
// Use the W3C log: vocabulary to represent:
|
|
95
|
-
// - TriG named graphs as N3 graph terms:
|
|
96
|
-
// <g> log:nameOf { ... } .
|
|
97
|
-
// - RDF 1.2 Turtle-star / TriG-star reified triples:
|
|
98
|
-
// <reifier> log:nameOf { <s> <p> <o> . } .
|
|
99
|
-
const LOG_NS = 'http://www.w3.org/2000/10/swap/log#';
|
|
100
|
-
const log = {
|
|
101
|
-
nameOf: `${LOG_NS}nameOf`,
|
|
102
|
-
};
|
|
103
|
-
|
|
104
|
-
// ---------------------------------------------------------------------------
|
|
105
|
-
// Minimal Turtle/N3 model + lexer + parser
|
|
106
|
-
// ---------------------------------------------------------------------------
|
|
107
|
-
|
|
108
|
-
const RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
|
|
109
|
-
const XSD_NS = 'http://www.w3.org/2001/XMLSchema#';
|
|
110
|
-
const OWL_NS = 'http://www.w3.org/2002/07/owl#';
|
|
111
|
-
|
|
112
|
-
// Avoid literal triple-quote sequences in this source (helps embedding in tools).
|
|
113
|
-
const DQ3 = '"'.repeat(3);
|
|
114
|
-
const SQ3 = "'".repeat(3);
|
|
115
|
-
|
|
116
|
-
// RDF 1.2: language tags follow BCP47 and may be followed by an initial direction suffix ("--ltr" / "--rtl").
|
|
117
|
-
// We validate in the lexer so downstream code can treat it as an opaque tag string.
|
|
118
|
-
const LANGTAG_WITH_DIR_REGEX = /^[A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*(?:--(?:ltr|rtl))?$/i;
|
|
119
|
-
|
|
120
|
-
function resolveIriRef(ref, base) {
|
|
121
|
-
// RDF 1.2: resolve relative IRI references using RFC3986 basic algorithm (via WHATWG URL).
|
|
122
|
-
// If the reference is malformed, fail fast rather than silently returning a broken IRI.
|
|
123
|
-
if (!base) return ref;
|
|
124
|
-
if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(ref)) return ref; // already absolute
|
|
125
|
-
const resolved = new URL(ref, base); // throws on invalid
|
|
126
|
-
return resolved.href;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
class Term {}
|
|
130
|
-
class Iri extends Term {
|
|
131
|
-
constructor(value) {
|
|
132
|
-
super();
|
|
133
|
-
this.value = value;
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
class Literal extends Term {
|
|
137
|
-
constructor(value) {
|
|
138
|
-
super();
|
|
139
|
-
this.value = value; // raw lexical form, e.g. "foo", 12, or "\"x\"^^<dt>"
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
class Blank extends Term {
|
|
143
|
-
constructor(label) {
|
|
144
|
-
super();
|
|
145
|
-
this.label = label; // _:b1 etc
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
class Var extends Term {
|
|
149
|
-
constructor(name) {
|
|
150
|
-
super();
|
|
151
|
-
this.name = name; // no leading '?'
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
class ListTerm extends Term {
|
|
155
|
-
constructor(elems) {
|
|
156
|
-
super();
|
|
157
|
-
this.elems = elems;
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
class OpenListTerm extends Term {
|
|
161
|
-
constructor(prefix, tailVar) {
|
|
162
|
-
super();
|
|
163
|
-
this.prefix = prefix; // Term[]
|
|
164
|
-
this.tailVar = tailVar; // string
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
class GraphTerm extends Term {
|
|
168
|
-
constructor(triples) {
|
|
169
|
-
super();
|
|
170
|
-
this.triples = triples; // Triple[]
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
class Triple {
|
|
174
|
-
constructor(s, p, o) {
|
|
175
|
-
this.s = s;
|
|
176
|
-
this.p = p;
|
|
177
|
-
this.o = o;
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
const __iriIntern = new Map();
|
|
182
|
-
const __literalIntern = new Map();
|
|
183
|
-
function internIri(value) {
|
|
184
|
-
let t = __iriIntern.get(value);
|
|
185
|
-
if (!t) {
|
|
186
|
-
t = new Iri(value);
|
|
187
|
-
__iriIntern.set(value, t);
|
|
188
|
-
}
|
|
189
|
-
return t;
|
|
190
|
-
}
|
|
191
|
-
function internLiteral(value) {
|
|
192
|
-
let t = __literalIntern.get(value);
|
|
193
|
-
if (!t) {
|
|
194
|
-
t = new Literal(value);
|
|
195
|
-
__literalIntern.set(value, t);
|
|
196
|
-
}
|
|
197
|
-
return t;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
class PrefixEnv {
|
|
201
|
-
constructor(map, baseIri) {
|
|
202
|
-
this.map = map || {}; // prefix -> IRI (including "" for @prefix :)
|
|
203
|
-
this.baseIri = baseIri || ''; // base IRI
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
static newDefault() {
|
|
207
|
-
return new PrefixEnv({}, '');
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
setPrefix(pfx, iri) {
|
|
211
|
-
this.map[pfx] = iri;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
setBase(iri) {
|
|
215
|
-
this.baseIri = iri;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
expandQName(qn) {
|
|
219
|
-
const idx = qn.indexOf(':');
|
|
220
|
-
if (idx < 0) return qn;
|
|
221
|
-
const pfx = qn.slice(0, idx);
|
|
222
|
-
const local = qn.slice(idx + 1);
|
|
223
|
-
const base = Object.prototype.hasOwnProperty.call(this.map, pfx) ? this.map[pfx] : null;
|
|
224
|
-
if (base == null) return qn;
|
|
225
|
-
return base + local;
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
// Best-effort QName compaction for writing (safe-ish, not fully Turtle grammar)
|
|
229
|
-
shrinkIri(iri) {
|
|
230
|
-
let bestPfx = null;
|
|
231
|
-
let bestBase = '';
|
|
232
|
-
for (const [pfx, base] of Object.entries(this.map)) {
|
|
233
|
-
if (!base) continue;
|
|
234
|
-
if (iri.startsWith(base) && base.length > bestBase.length) {
|
|
235
|
-
bestPfx = pfx;
|
|
236
|
-
bestBase = base;
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
if (bestPfx == null) return null;
|
|
240
|
-
|
|
241
|
-
const local = iri.slice(bestBase.length);
|
|
242
|
-
|
|
243
|
-
// Conservative “looks like PN_LOCAL-ish”
|
|
244
|
-
if (!local) return null;
|
|
245
|
-
if (!/^[A-Za-z0-9_.~-]+$/.test(local)) return null;
|
|
246
|
-
|
|
247
|
-
if (bestPfx === '') return `:${local}`;
|
|
248
|
-
return `${bestPfx}:${local}`;
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
// -------------------- LEXER ------------------------------
|
|
253
|
-
|
|
254
|
-
class Token {
|
|
255
|
-
constructor(typ, value = null) {
|
|
256
|
-
this.typ = typ;
|
|
257
|
-
this.value = value;
|
|
258
|
-
}
|
|
259
|
-
toString() {
|
|
260
|
-
if (this.value == null) return `Token(${this.typ})`;
|
|
261
|
-
return `Token(${this.typ}, ${JSON.stringify(this.value)})`;
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
function isWs(c) {
|
|
266
|
-
return /\s/.test(c);
|
|
267
|
-
}
|
|
268
|
-
function isNameChar(c) {
|
|
269
|
-
return /[\p{L}\p{N}_\-:%]/u.test(c);
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
function stripQuotes(s) {
|
|
273
|
-
if (s.startsWith(DQ3) && s.endsWith(DQ3)) return s.slice(3, -3);
|
|
274
|
-
if (s.startsWith(SQ3) && s.endsWith(SQ3)) return s.slice(3, -3);
|
|
275
|
-
if ((s.startsWith('"') && s.endsWith('"')) || (s.startsWith("'") && s.endsWith("'"))) return s.slice(1, -1);
|
|
276
|
-
return s;
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
function decodeN3StringEscapes(s) {
|
|
280
|
-
let out = '';
|
|
281
|
-
for (let i = 0; i < s.length; i++) {
|
|
282
|
-
const c = s[i];
|
|
283
|
-
if (c !== '\\') {
|
|
284
|
-
out += c;
|
|
285
|
-
continue;
|
|
286
|
-
}
|
|
287
|
-
if (i + 1 >= s.length) {
|
|
288
|
-
out += '\\';
|
|
289
|
-
continue;
|
|
290
|
-
}
|
|
291
|
-
const e = s[++i];
|
|
292
|
-
switch (e) {
|
|
293
|
-
case 't':
|
|
294
|
-
out += '\t';
|
|
295
|
-
break;
|
|
296
|
-
case 'n':
|
|
297
|
-
out += '\n';
|
|
298
|
-
break;
|
|
299
|
-
case 'r':
|
|
300
|
-
out += '\r';
|
|
301
|
-
break;
|
|
302
|
-
case 'b':
|
|
303
|
-
out += '\b';
|
|
304
|
-
break;
|
|
305
|
-
case 'f':
|
|
306
|
-
out += '\f';
|
|
307
|
-
break;
|
|
308
|
-
case '"':
|
|
309
|
-
out += '"';
|
|
310
|
-
break;
|
|
311
|
-
case "'":
|
|
312
|
-
out += "'";
|
|
313
|
-
break;
|
|
314
|
-
case '\\':
|
|
315
|
-
out += '\\';
|
|
316
|
-
break;
|
|
317
|
-
case 'u': {
|
|
318
|
-
const hex = s.slice(i + 1, i + 5);
|
|
319
|
-
if (/^[0-9A-Fa-f]{4}$/.test(hex)) {
|
|
320
|
-
out += String.fromCharCode(parseInt(hex, 16));
|
|
321
|
-
i += 4;
|
|
322
|
-
} else out += '\\u';
|
|
323
|
-
break;
|
|
324
|
-
}
|
|
325
|
-
case 'U': {
|
|
326
|
-
const hex = s.slice(i + 1, i + 9);
|
|
327
|
-
if (/^[0-9A-Fa-f]{8}$/.test(hex)) {
|
|
328
|
-
const cp = parseInt(hex, 16);
|
|
329
|
-
if (cp >= 0 && cp <= 0x10ffff) out += String.fromCodePoint(cp);
|
|
330
|
-
else out += '\\U' + hex;
|
|
331
|
-
i += 8;
|
|
332
|
-
} else out += '\\U';
|
|
333
|
-
break;
|
|
334
|
-
}
|
|
335
|
-
default:
|
|
336
|
-
out += '\\' + e;
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
return out;
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
function lex(inputText) {
|
|
343
|
-
const chars = Array.from(inputText);
|
|
344
|
-
const n = chars.length;
|
|
345
|
-
let i = 0;
|
|
346
|
-
const tokens = [];
|
|
347
|
-
|
|
348
|
-
function peek(offset = 0) {
|
|
349
|
-
const j = i + offset;
|
|
350
|
-
return j >= 0 && j < n ? chars[j] : null;
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
while (i < n) {
|
|
354
|
-
const c = peek();
|
|
355
|
-
if (c === null) break;
|
|
356
|
-
|
|
357
|
-
// 1) whitespace
|
|
358
|
-
if (isWs(c)) {
|
|
359
|
-
i++;
|
|
360
|
-
continue;
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
// 2) # comments
|
|
364
|
-
if (c === '#') {
|
|
365
|
-
while (i < n && chars[i] !== '\n' && chars[i] !== '\r') i++;
|
|
366
|
-
continue;
|
|
367
|
-
}
|
|
368
|
-
// 3) operators: =>, <= ; single '=' as owl:sameAs
|
|
369
|
-
if (c === '=') {
|
|
370
|
-
if (peek(1) === '>') {
|
|
371
|
-
tokens.push(new Token('OpImplies'));
|
|
372
|
-
i += 2;
|
|
373
|
-
continue;
|
|
374
|
-
} else {
|
|
375
|
-
tokens.push(new Token('Equals'));
|
|
376
|
-
i += 1;
|
|
377
|
-
continue;
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
// RDF 1.2 Turtle-star / TriG-star tokens
|
|
382
|
-
if (c === '>' && peek(1) === '>') {
|
|
383
|
-
tokens.push(new Token('StarClose'));
|
|
384
|
-
i += 2;
|
|
385
|
-
continue;
|
|
386
|
-
}
|
|
387
|
-
if (c === '~') {
|
|
388
|
-
tokens.push(new Token('Tilde'));
|
|
389
|
-
i += 1;
|
|
390
|
-
continue;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
// RDF 1.2 Turtle/TriG annotations: annotation blocks {| ... |}
|
|
394
|
-
if (c === '{' && peek(1) === '|') {
|
|
395
|
-
tokens.push(new Token('AnnOpen'));
|
|
396
|
-
i += 2;
|
|
397
|
-
continue;
|
|
398
|
-
}
|
|
399
|
-
if (c === '|' && peek(1) === '}') {
|
|
400
|
-
tokens.push(new Token('AnnClose'));
|
|
401
|
-
i += 2;
|
|
402
|
-
continue;
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
if (c === '<') {
|
|
406
|
-
if (peek(1) === '<') {
|
|
407
|
-
tokens.push(new Token('StarOpen'));
|
|
408
|
-
i += 2;
|
|
409
|
-
continue;
|
|
410
|
-
}
|
|
411
|
-
if (peek(1) === '=') {
|
|
412
|
-
tokens.push(new Token('OpImpliedBy'));
|
|
413
|
-
i += 2;
|
|
414
|
-
continue;
|
|
415
|
-
}
|
|
416
|
-
if (peek(1) === '-') {
|
|
417
|
-
tokens.push(new Token('OpPredInvert'));
|
|
418
|
-
i += 2;
|
|
419
|
-
continue;
|
|
420
|
-
}
|
|
421
|
-
i++; // consume '<'
|
|
422
|
-
const iriChars = [];
|
|
423
|
-
while (i < n && chars[i] !== '>') {
|
|
424
|
-
iriChars.push(chars[i]);
|
|
425
|
-
i++;
|
|
426
|
-
}
|
|
427
|
-
if (i >= n || chars[i] !== '>') throw new Error('Unterminated IRI <...>');
|
|
428
|
-
i++; // consume '>'
|
|
429
|
-
tokens.push(new Token('IriRef', iriChars.join('')));
|
|
430
|
-
continue;
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
// 4) path operators: !, ^, ^^
|
|
434
|
-
if (c === '!') {
|
|
435
|
-
tokens.push(new Token('OpPathFwd'));
|
|
436
|
-
i++;
|
|
437
|
-
continue;
|
|
438
|
-
}
|
|
439
|
-
if (c === '^') {
|
|
440
|
-
if (peek(1) === '^') {
|
|
441
|
-
tokens.push(new Token('HatHat'));
|
|
442
|
-
i += 2;
|
|
443
|
-
continue;
|
|
444
|
-
}
|
|
445
|
-
tokens.push(new Token('OpPathRev'));
|
|
446
|
-
i++;
|
|
447
|
-
continue;
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
// 5) punctuation
|
|
451
|
-
// RDF 1.2: allow decimal literals that start with ".<digit>" (e.g., .5)
|
|
452
|
-
if ('{}()[];,~'.includes(c) || c === '.' || c === ',') {
|
|
453
|
-
if (c === '.' && peek(1) !== null && /[0-9]/.test(peek(1))) {
|
|
454
|
-
// handled by numeric literal logic below
|
|
455
|
-
} else {
|
|
456
|
-
const mapping = {
|
|
457
|
-
'{': 'LBrace',
|
|
458
|
-
'}': 'RBrace',
|
|
459
|
-
'(': 'LParen',
|
|
460
|
-
')': 'RParen',
|
|
461
|
-
'[': 'LBracket',
|
|
462
|
-
']': 'RBracket',
|
|
463
|
-
';': 'Semicolon',
|
|
464
|
-
'~': 'Tilde',
|
|
465
|
-
',': 'Comma',
|
|
466
|
-
'.': 'Dot',
|
|
467
|
-
};
|
|
468
|
-
tokens.push(new Token(mapping[c]));
|
|
469
|
-
i++;
|
|
470
|
-
continue;
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
// 6) string literals: short or long (double or single)
|
|
475
|
-
if (c === '"') {
|
|
476
|
-
if (peek(1) === '"' && peek(2) === '"') {
|
|
477
|
-
i += 3;
|
|
478
|
-
const sChars = [];
|
|
479
|
-
let closed = false;
|
|
480
|
-
while (i < n) {
|
|
481
|
-
const cc = chars[i];
|
|
482
|
-
if (cc === '\\') {
|
|
483
|
-
i++;
|
|
484
|
-
if (i < n) {
|
|
485
|
-
const esc = chars[i];
|
|
486
|
-
i++;
|
|
487
|
-
sChars.push('\\', esc);
|
|
488
|
-
} else sChars.push('\\');
|
|
489
|
-
continue;
|
|
490
|
-
}
|
|
491
|
-
if (cc === '"') {
|
|
492
|
-
let run = 0;
|
|
493
|
-
while (i + run < n && chars[i + run] === '"') run++;
|
|
494
|
-
if (run >= 3) {
|
|
495
|
-
for (let k = 0; k < run - 3; k++) sChars.push('"');
|
|
496
|
-
i += run;
|
|
497
|
-
closed = true;
|
|
498
|
-
break;
|
|
499
|
-
}
|
|
500
|
-
for (let k = 0; k < run; k++) sChars.push('"');
|
|
501
|
-
i += run;
|
|
502
|
-
continue;
|
|
503
|
-
}
|
|
504
|
-
sChars.push(cc);
|
|
505
|
-
i++;
|
|
506
|
-
}
|
|
507
|
-
if (!closed) throw new Error('Unterminated long string literal');
|
|
508
|
-
const raw = DQ3 + sChars.join('') + DQ3;
|
|
509
|
-
const decoded = decodeN3StringEscapes(stripQuotes(raw));
|
|
510
|
-
const canon = JSON.stringify(decoded);
|
|
511
|
-
tokens.push(new Token('Literal', canon));
|
|
512
|
-
continue;
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
i++;
|
|
516
|
-
const sChars = [];
|
|
517
|
-
while (i < n) {
|
|
518
|
-
const cc = chars[i];
|
|
519
|
-
i++;
|
|
520
|
-
if (cc === '\\') {
|
|
521
|
-
if (i < n) {
|
|
522
|
-
const esc = chars[i];
|
|
523
|
-
i++;
|
|
524
|
-
sChars.push('\\', esc);
|
|
525
|
-
}
|
|
526
|
-
continue;
|
|
527
|
-
}
|
|
528
|
-
if (cc === '"') break;
|
|
529
|
-
sChars.push(cc);
|
|
530
|
-
}
|
|
531
|
-
const raw = '"' + sChars.join('') + '"';
|
|
532
|
-
const decoded = decodeN3StringEscapes(stripQuotes(raw));
|
|
533
|
-
const canon = JSON.stringify(decoded);
|
|
534
|
-
tokens.push(new Token('Literal', canon));
|
|
535
|
-
continue;
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
if (c === "'") {
|
|
539
|
-
if (peek(1) === "'" && peek(2) === "'") {
|
|
540
|
-
i += 3;
|
|
541
|
-
const sChars = [];
|
|
542
|
-
let closed = false;
|
|
543
|
-
while (i < n) {
|
|
544
|
-
const cc = chars[i];
|
|
545
|
-
if (cc === '\\') {
|
|
546
|
-
i++;
|
|
547
|
-
if (i < n) {
|
|
548
|
-
const esc = chars[i];
|
|
549
|
-
i++;
|
|
550
|
-
sChars.push('\\', esc);
|
|
551
|
-
} else sChars.push('\\');
|
|
552
|
-
continue;
|
|
553
|
-
}
|
|
554
|
-
if (cc === "'") {
|
|
555
|
-
let run = 0;
|
|
556
|
-
while (i + run < n && chars[i + run] === "'") run++;
|
|
557
|
-
if (run >= 3) {
|
|
558
|
-
for (let k = 0; k < run - 3; k++) sChars.push("'");
|
|
559
|
-
i += run;
|
|
560
|
-
closed = true;
|
|
561
|
-
break;
|
|
562
|
-
}
|
|
563
|
-
for (let k = 0; k < run; k++) sChars.push("'");
|
|
564
|
-
i += run;
|
|
565
|
-
continue;
|
|
566
|
-
}
|
|
567
|
-
sChars.push(cc);
|
|
568
|
-
i++;
|
|
569
|
-
}
|
|
570
|
-
if (!closed) throw new Error('Unterminated long string literal');
|
|
571
|
-
const raw = SQ3 + sChars.join('') + SQ3;
|
|
572
|
-
const decoded = decodeN3StringEscapes(stripQuotes(raw));
|
|
573
|
-
const canon = JSON.stringify(decoded);
|
|
574
|
-
tokens.push(new Token('Literal', canon));
|
|
575
|
-
continue;
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
i++;
|
|
579
|
-
const sChars = [];
|
|
580
|
-
while (i < n) {
|
|
581
|
-
const cc = chars[i];
|
|
582
|
-
i++;
|
|
583
|
-
if (cc === '\\') {
|
|
584
|
-
if (i < n) {
|
|
585
|
-
const esc = chars[i];
|
|
586
|
-
i++;
|
|
587
|
-
sChars.push('\\', esc);
|
|
588
|
-
}
|
|
589
|
-
continue;
|
|
590
|
-
}
|
|
591
|
-
if (cc === "'") break;
|
|
592
|
-
sChars.push(cc);
|
|
593
|
-
}
|
|
594
|
-
const raw = "'" + sChars.join('') + "'";
|
|
595
|
-
const decoded = decodeN3StringEscapes(stripQuotes(raw));
|
|
596
|
-
const canon = JSON.stringify(decoded);
|
|
597
|
-
tokens.push(new Token('Literal', canon));
|
|
598
|
-
continue;
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
// 7) directives or language tags with '@'
|
|
602
|
-
if (c === '@') {
|
|
603
|
-
const prevTok = tokens.length ? tokens[tokens.length - 1] : null;
|
|
604
|
-
const prevWasQuotedLiteral =
|
|
605
|
-
prevTok && prevTok.typ === 'Literal' && typeof prevTok.value === 'string' && prevTok.value.startsWith('"');
|
|
606
|
-
|
|
607
|
-
i++; // consume '@'
|
|
608
|
-
|
|
609
|
-
if (prevWasQuotedLiteral) {
|
|
610
|
-
// RDF 1.2: language tags follow BCP47 and may be followed by an initial text direction: @lang--ltr / @lang--rtl
|
|
611
|
-
const tagChars = [];
|
|
612
|
-
let cc = peek();
|
|
613
|
-
if (cc === null || !/[A-Za-z]/.test(cc)) throw new Error("Invalid language tag (expected [A-Za-z] after '@')");
|
|
614
|
-
|
|
615
|
-
// Primary language subtag (1..8 alpha)
|
|
616
|
-
while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
|
|
617
|
-
tagChars.push(cc);
|
|
618
|
-
i++;
|
|
619
|
-
// primary subtag length limit
|
|
620
|
-
if (tagChars.length > 8) throw new Error('Invalid language tag (primary subtag too long; max 8)');
|
|
621
|
-
}
|
|
622
|
-
|
|
623
|
-
// Additional BCP47 subtags: -[A-Za-z0-9]{1,8}
|
|
624
|
-
while ((cc = peek()) === '-' && peek(1) !== '-') {
|
|
625
|
-
tagChars.push('-');
|
|
626
|
-
i++;
|
|
627
|
-
const segChars = [];
|
|
628
|
-
let dd = peek();
|
|
629
|
-
if (dd === null || !/[A-Za-z0-9]/.test(dd))
|
|
630
|
-
throw new Error("Invalid language tag (expected [A-Za-z0-9]+ after '-')");
|
|
631
|
-
while ((dd = peek()) !== null && /[A-Za-z0-9]/.test(dd)) {
|
|
632
|
-
segChars.push(dd);
|
|
633
|
-
i++;
|
|
634
|
-
if (segChars.length > 8) throw new Error('Invalid language tag subtag too long; max 8');
|
|
635
|
-
}
|
|
636
|
-
if (!segChars.length) throw new Error("Invalid language tag (expected [A-Za-z0-9]+ after '-')");
|
|
637
|
-
tagChars.push(...segChars);
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
// Optional initial direction suffix: --ltr / --rtl
|
|
641
|
-
if (peek() === '-' && peek(1) === '-') {
|
|
642
|
-
i += 2;
|
|
643
|
-
const dirChars = [];
|
|
644
|
-
let dd;
|
|
645
|
-
while ((dd = peek()) !== null && /[A-Za-z]/.test(dd)) {
|
|
646
|
-
dirChars.push(dd);
|
|
647
|
-
i++;
|
|
648
|
-
if (dirChars.length > 3) break;
|
|
649
|
-
}
|
|
650
|
-
const dir = dirChars.join('').toLowerCase();
|
|
651
|
-
if (dir !== 'ltr' && dir !== 'rtl') {
|
|
652
|
-
throw new Error('Invalid language direction (expected --ltr or --rtl)');
|
|
653
|
-
}
|
|
654
|
-
tagChars.push('-', '-', dir);
|
|
655
|
-
}
|
|
656
|
-
|
|
657
|
-
const lang = tagChars.join('');
|
|
658
|
-
if (!LANGTAG_WITH_DIR_REGEX.test(lang)) {
|
|
659
|
-
throw new Error(`Invalid BCP47 language tag: ${lang}`);
|
|
660
|
-
}
|
|
661
|
-
|
|
662
|
-
tokens.push(new Token('LangTag', lang));
|
|
663
|
-
continue;
|
|
664
|
-
}
|
|
665
|
-
|
|
666
|
-
const wordChars = [];
|
|
667
|
-
let cc;
|
|
668
|
-
while ((cc = peek()) !== null && /[A-Za-z]/.test(cc)) {
|
|
669
|
-
wordChars.push(cc);
|
|
670
|
-
i++;
|
|
671
|
-
}
|
|
672
|
-
const word = wordChars.join('');
|
|
673
|
-
if (word === 'prefix') tokens.push(new Token('AtPrefix'));
|
|
674
|
-
else if (word === 'base') tokens.push(new Token('AtBase'));
|
|
675
|
-
else throw new Error(`Unknown directive @${word}`);
|
|
676
|
-
continue;
|
|
677
|
-
}
|
|
678
|
-
|
|
679
|
-
// 8) numeric literals (RDF 1.2 Turtle shorthand: integer / decimal / double)
|
|
680
|
-
// integer: [+-]?[0-9]+
|
|
681
|
-
// decimal: [+-]?[0-9]*\.[0-9]+ (allows .5)
|
|
682
|
-
// double : [+-]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)[eE][+-]?[0-9]+
|
|
683
|
-
if (
|
|
684
|
-
/[0-9]/.test(c) ||
|
|
685
|
-
(c === '.' && peek(1) !== null && /[0-9]/.test(peek(1))) ||
|
|
686
|
-
((c === '-' || c === '+') &&
|
|
687
|
-
peek(1) !== null &&
|
|
688
|
-
(/[0-9]/.test(peek(1)) || (peek(1) === '.' && peek(2) !== null && /[0-9]/.test(peek(2)))))
|
|
689
|
-
) {
|
|
690
|
-
const rest = chars.slice(i).join('');
|
|
691
|
-
|
|
692
|
-
let m = rest.match(/^[+-]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)[eE][+-]?[0-9]+/);
|
|
693
|
-
if (m) {
|
|
694
|
-
tokens.push(new Token('Literal', m[0]));
|
|
695
|
-
i += m[0].length;
|
|
696
|
-
continue;
|
|
697
|
-
}
|
|
698
|
-
|
|
699
|
-
m = rest.match(/^[+-]?[0-9]*\.[0-9]+/);
|
|
700
|
-
if (m) {
|
|
701
|
-
tokens.push(new Token('Literal', m[0]));
|
|
702
|
-
i += m[0].length;
|
|
703
|
-
continue;
|
|
704
|
-
}
|
|
705
|
-
|
|
706
|
-
m = rest.match(/^[+-]?[0-9]+/);
|
|
707
|
-
if (m) {
|
|
708
|
-
tokens.push(new Token('Literal', m[0]));
|
|
709
|
-
i += m[0].length;
|
|
710
|
-
continue;
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
// If we got here, it looked like a number start but didn't match any legal form.
|
|
714
|
-
throw new Error(`Invalid numeric literal near: ${rest.slice(0, 32)}`);
|
|
715
|
-
}
|
|
716
|
-
|
|
717
|
-
// 9) var: ?x (SPARQL vars) or $this / $value (SHACL SPARQL vars)
|
|
718
|
-
if (c === '?' || c === '$') {
|
|
719
|
-
const sigil = c;
|
|
720
|
-
i++;
|
|
721
|
-
const nameChars = [];
|
|
722
|
-
let cc;
|
|
723
|
-
while ((cc = peek()) !== null && isNameChar(cc)) {
|
|
724
|
-
nameChars.push(cc);
|
|
725
|
-
i++;
|
|
726
|
-
}
|
|
727
|
-
if (!nameChars.length) throw new Error(`Expected variable name after '${sigil}'`);
|
|
728
|
-
tokens.push(new Token('Var', nameChars.join('')));
|
|
729
|
-
continue;
|
|
730
|
-
}
|
|
731
|
-
|
|
732
|
-
// 10) identifier / qname / keywords
|
|
733
|
-
if (isNameChar(c) || c === '_') {
|
|
734
|
-
const nameChars = [c];
|
|
735
|
-
i++;
|
|
736
|
-
while (i < n) {
|
|
737
|
-
const cc = chars[i];
|
|
738
|
-
if (isNameChar(cc) || cc === '_' || cc === '.') {
|
|
739
|
-
nameChars.push(cc);
|
|
740
|
-
i++;
|
|
741
|
-
continue;
|
|
742
|
-
}
|
|
743
|
-
break;
|
|
744
|
-
}
|
|
745
|
-
const word = nameChars.join('');
|
|
746
|
-
|
|
747
|
-
// If an identifier ends with one or more '.' characters, treat them as statement terminators.
|
|
748
|
-
// This allows Turtle like ':s :p :o.' (no whitespace before '.').
|
|
749
|
-
// Keep '...' as a single identifier (used by some N3 syntaxes).
|
|
750
|
-
if (word !== '...' && word.endsWith('.') && word.length > 1) {
|
|
751
|
-
let w = word;
|
|
752
|
-
let dots = 0;
|
|
753
|
-
while (w.endsWith('.') && w.length > 0 && w !== '...') {
|
|
754
|
-
w = w.slice(0, -1);
|
|
755
|
-
dots++;
|
|
756
|
-
}
|
|
757
|
-
if (w.length > 0) {
|
|
758
|
-
// Re-run the literal/ident decision on w, then emit Dot tokens.
|
|
759
|
-
if (w === 'true' || w === 'false') tokens.push(new Token('Literal', w));
|
|
760
|
-
else tokens.push(new Token('Ident', w));
|
|
761
|
-
for (let d = 0; d < dots; d++) tokens.push(new Token('Dot'));
|
|
762
|
-
continue;
|
|
763
|
-
}
|
|
764
|
-
}
|
|
765
|
-
|
|
766
|
-
// true/false as literals
|
|
767
|
-
if (word === 'true' || word === 'false') tokens.push(new Token('Literal', word));
|
|
768
|
-
else tokens.push(new Token('Ident', word));
|
|
769
|
-
continue;
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
throw new Error(`Unexpected character in input: ${JSON.stringify(c)}`);
|
|
773
|
-
}
|
|
774
|
-
|
|
775
|
-
tokens.push(new Token('EOF'));
|
|
776
|
-
return tokens;
|
|
777
|
-
}
|
|
778
|
-
|
|
779
|
-
// -------------------- PARSER (Turtle + N3-graphs; TriG extension separately) --------------------
|
|
780
|
-
|
|
781
|
-
class TurtleParser {
|
|
782
|
-
constructor(tokens) {
|
|
783
|
-
this.toks = tokens;
|
|
784
|
-
this.pos = 0;
|
|
785
|
-
this.prefixes = PrefixEnv.newDefault();
|
|
786
|
-
this.blankCounter = 0;
|
|
787
|
-
this.pendingTriples = [];
|
|
788
|
-
this.reifierCounter = 0;
|
|
789
|
-
this.reifiesEmitted = new Set();
|
|
790
|
-
}
|
|
791
|
-
|
|
792
|
-
peek() {
|
|
793
|
-
return this.toks[this.pos];
|
|
794
|
-
}
|
|
795
|
-
|
|
796
|
-
next() {
|
|
797
|
-
const tok = this.toks[this.pos];
|
|
798
|
-
this.pos += 1;
|
|
799
|
-
return tok;
|
|
800
|
-
}
|
|
801
|
-
|
|
802
|
-
expect(typ) {
|
|
803
|
-
const tok = this.next();
|
|
804
|
-
if (tok.typ !== typ) throw new Error(`Expected ${typ}, got ${tok.toString()}`);
|
|
805
|
-
return tok;
|
|
806
|
-
}
|
|
807
|
-
|
|
808
|
-
// Generate a fresh blank node used for RDF 1.2 reifiedTriple sugar (<< s p o >>)
|
|
809
|
-
freshReifier() {
|
|
810
|
-
this.reifierCounter += 1;
|
|
811
|
-
return new Blank(`_:n3r${this.reifierCounter}`);
|
|
812
|
-
}
|
|
813
|
-
|
|
814
|
-
termKey(t) {
|
|
815
|
-
if (t == null) return '[]';
|
|
816
|
-
if (t instanceof Iri) return `I:${t.value}`;
|
|
817
|
-
if (t instanceof Blank) return `B:${t.label}`;
|
|
818
|
-
if (t instanceof Literal) return `L:${t.value}`;
|
|
819
|
-
if (t instanceof Var) return `V:${t.name}`;
|
|
820
|
-
if (t instanceof ListTerm) return `T:(` + t.elems.map((x) => this.termKey(x)).join(' ') + `)`;
|
|
821
|
-
if (t instanceof GraphTerm) {
|
|
822
|
-
const inner = t.triples
|
|
823
|
-
.map((tr) => `${this.termKey(tr.s)} ${this.termKey(tr.p)} ${this.termKey(tr.o)}`)
|
|
824
|
-
.join(' | ');
|
|
825
|
-
return `G:{${inner}}`;
|
|
826
|
-
}
|
|
827
|
-
return `X:${String(t)}`;
|
|
828
|
-
}
|
|
829
|
-
|
|
830
|
-
// Emit the implicit (or explicit) reifier triple required by RDF 1.2 reifiedTriple sugar:
|
|
831
|
-
// reifier log:nameOf tripleTerm .
|
|
832
|
-
// We represent tripleTerm in N3 as a quoted graph term: { s p o . }
|
|
833
|
-
emitReifies(reifier, tripleGraph) {
|
|
834
|
-
const key = `${this.termKey(reifier)}|${this.termKey(tripleGraph)}`;
|
|
835
|
-
if (this.reifiesEmitted.has(key)) return;
|
|
836
|
-
this.reifiesEmitted.add(key);
|
|
837
|
-
this.pendingTriples.push(new Triple(reifier, internIri(LOG_NS + 'nameOf'), tripleGraph));
|
|
838
|
-
}
|
|
839
|
-
|
|
840
|
-
// Accept '.' OR (when inside {...}) accept '}' as implicit terminator for last triple
|
|
841
|
-
expectDotOrRBrace() {
|
|
842
|
-
const tok = this.peek();
|
|
843
|
-
if (tok.typ === 'Dot') {
|
|
844
|
-
this.next();
|
|
845
|
-
return;
|
|
846
|
-
}
|
|
847
|
-
if (tok.typ === 'RBrace') return;
|
|
848
|
-
throw new Error(`Expected '.' (or '}'), got ${tok.toString()}`);
|
|
849
|
-
}
|
|
850
|
-
|
|
851
|
-
parsePrefixDirective() {
|
|
852
|
-
// @prefix pfx: <iri> .
|
|
853
|
-
const pfxTok = this.next();
|
|
854
|
-
if (pfxTok.typ !== 'Ident') throw new Error(`Expected prefix label after @prefix, got ${pfxTok.toString()}`);
|
|
855
|
-
const label = (pfxTok.value || '').replace(/:$/, '');
|
|
856
|
-
const iriTok = this.next();
|
|
857
|
-
let iri;
|
|
858
|
-
if (iriTok.typ === 'IriRef') iri = iriTok.value || '';
|
|
859
|
-
else if (iriTok.typ === 'Ident') iri = iriTok.value || '';
|
|
860
|
-
else throw new Error(`Expected IRI after @prefix, got ${iriTok.toString()}`);
|
|
861
|
-
this.expect('Dot');
|
|
862
|
-
this.prefixes.setPrefix(label, iri);
|
|
863
|
-
}
|
|
864
|
-
|
|
865
|
-
parseSparqlPrefixDirective() {
|
|
866
|
-
// PREFIX pfx: <iri> (no trailing '.')
|
|
867
|
-
const pfxTok = this.next();
|
|
868
|
-
if (pfxTok.typ !== 'Ident') throw new Error(`Expected prefix label after PREFIX, got ${pfxTok.toString()}`);
|
|
869
|
-
const label = (pfxTok.value || '').replace(/:$/, '');
|
|
870
|
-
const iriTok = this.next();
|
|
871
|
-
let iri;
|
|
872
|
-
if (iriTok.typ === 'IriRef') iri = iriTok.value || '';
|
|
873
|
-
else if (iriTok.typ === 'Ident') iri = iriTok.value || '';
|
|
874
|
-
else throw new Error(`Expected IRI after PREFIX, got ${iriTok.toString()}`);
|
|
875
|
-
if (this.peek().typ === 'Dot') this.next(); // permissive
|
|
876
|
-
this.prefixes.setPrefix(label, iri);
|
|
877
|
-
}
|
|
878
|
-
|
|
879
|
-
parseBaseDirective() {
|
|
880
|
-
// @base <iri> .
|
|
881
|
-
const iriTok = this.next();
|
|
882
|
-
let iri;
|
|
883
|
-
if (iriTok.typ === 'IriRef') iri = iriTok.value || '';
|
|
884
|
-
else if (iriTok.typ === 'Ident') iri = iriTok.value || '';
|
|
885
|
-
else throw new Error(`Expected IRI after @base, got ${iriTok.toString()}`);
|
|
886
|
-
this.expect('Dot');
|
|
887
|
-
this.prefixes.setBase(iri);
|
|
888
|
-
}
|
|
889
|
-
|
|
890
|
-
parseSparqlBaseDirective() {
|
|
891
|
-
// BASE <iri>
|
|
892
|
-
const iriTok = this.next();
|
|
893
|
-
if (iriTok.typ !== 'IriRef') throw new Error(`Expected <IRI> after BASE, got ${iriTok.toString()}`);
|
|
894
|
-
const iri = iriTok.value || '';
|
|
895
|
-
if (this.peek().typ === 'Dot') this.next(); // permissive
|
|
896
|
-
this.prefixes.setBase(iri);
|
|
897
|
-
}
|
|
898
|
-
|
|
899
|
-
parseTurtleDocument() {
|
|
900
|
-
const triples = [];
|
|
901
|
-
while (this.peek().typ !== 'EOF') {
|
|
902
|
-
// RDF 1.2: VERSION announcement (e.g., VERSION "1.2")
|
|
903
|
-
if (
|
|
904
|
-
this.peek().typ === 'Ident' &&
|
|
905
|
-
typeof this.peek().value === 'string' &&
|
|
906
|
-
this.peek().value.toLowerCase() === 'version'
|
|
907
|
-
) {
|
|
908
|
-
this.next(); // VERSION
|
|
909
|
-
const vTok = this.next();
|
|
910
|
-
if (vTok.typ !== 'Literal') throw new Error(`Expected a literal after VERSION, got ${vTok.toString()}`);
|
|
911
|
-
if (this.peek().typ === 'Dot') this.next(); // permissive
|
|
912
|
-
continue;
|
|
913
|
-
}
|
|
914
|
-
|
|
915
|
-
if (this.peek().typ === 'AtPrefix') {
|
|
916
|
-
this.next();
|
|
917
|
-
this.parsePrefixDirective();
|
|
918
|
-
continue;
|
|
919
|
-
}
|
|
920
|
-
if (this.peek().typ === 'AtBase') {
|
|
921
|
-
this.next();
|
|
922
|
-
this.parseBaseDirective();
|
|
923
|
-
continue;
|
|
924
|
-
}
|
|
925
|
-
// SPARQL-style directives
|
|
926
|
-
if (
|
|
927
|
-
this.peek().typ === 'Ident' &&
|
|
928
|
-
typeof this.peek().value === 'string' &&
|
|
929
|
-
this.peek().value.toLowerCase() === 'prefix' &&
|
|
930
|
-
this.toks[this.pos + 1] &&
|
|
931
|
-
this.toks[this.pos + 1].typ === 'Ident' &&
|
|
932
|
-
typeof this.toks[this.pos + 1].value === 'string' &&
|
|
933
|
-
this.toks[this.pos + 1].value.endsWith(':')
|
|
934
|
-
) {
|
|
935
|
-
this.next(); // PREFIX
|
|
936
|
-
this.parseSparqlPrefixDirective();
|
|
937
|
-
continue;
|
|
938
|
-
}
|
|
939
|
-
if (
|
|
940
|
-
this.peek().typ === 'Ident' &&
|
|
941
|
-
typeof this.peek().value === 'string' &&
|
|
942
|
-
this.peek().value.toLowerCase() === 'base' &&
|
|
943
|
-
this.toks[this.pos + 1] &&
|
|
944
|
-
this.toks[this.pos + 1].typ === 'IriRef'
|
|
945
|
-
) {
|
|
946
|
-
this.next(); // BASE
|
|
947
|
-
this.parseSparqlBaseDirective();
|
|
948
|
-
continue;
|
|
949
|
-
}
|
|
950
|
-
|
|
951
|
-
const subj = this.parseTerm();
|
|
952
|
-
|
|
953
|
-
let more;
|
|
954
|
-
if (this.peek().typ === 'Dot') {
|
|
955
|
-
more = [];
|
|
956
|
-
if (this.pendingTriples.length > 0) {
|
|
957
|
-
more = this.pendingTriples;
|
|
958
|
-
this.pendingTriples = [];
|
|
959
|
-
}
|
|
960
|
-
this.next();
|
|
961
|
-
} else {
|
|
962
|
-
more = this.parsePredicateObjectList(subj);
|
|
963
|
-
this.expect('Dot');
|
|
964
|
-
}
|
|
965
|
-
triples.push(...more);
|
|
966
|
-
}
|
|
967
|
-
return { triples, prefixes: this.prefixes };
|
|
968
|
-
}
|
|
969
|
-
|
|
970
|
-
parseTerm() {
|
|
971
|
-
let t = this.parsePathItem();
|
|
972
|
-
while (this.peek().typ === 'OpPathFwd' || this.peek().typ === 'OpPathRev') {
|
|
973
|
-
const dir = this.next().typ;
|
|
974
|
-
const pred = this.parsePathItem();
|
|
975
|
-
|
|
976
|
-
this.blankCounter += 1;
|
|
977
|
-
const bn = new Blank(`_:b${this.blankCounter}`);
|
|
978
|
-
this.pendingTriples.push(dir === 'OpPathFwd' ? new Triple(t, pred, bn) : new Triple(bn, pred, t));
|
|
979
|
-
t = bn;
|
|
980
|
-
}
|
|
981
|
-
return t;
|
|
982
|
-
}
|
|
983
|
-
|
|
984
|
-
parsePathItem() {
|
|
985
|
-
const tok = this.next();
|
|
986
|
-
const typ = tok.typ;
|
|
987
|
-
const val = tok.value;
|
|
988
|
-
|
|
989
|
-
if (typ === 'Equals') return internIri(OWL_NS + 'sameAs');
|
|
990
|
-
|
|
991
|
-
if (typ === 'IriRef') {
|
|
992
|
-
const base = this.prefixes.baseIri || '';
|
|
993
|
-
return internIri(resolveIriRef(val || '', base));
|
|
994
|
-
}
|
|
995
|
-
|
|
996
|
-
if (typ === 'Ident') {
|
|
997
|
-
const name = val || '';
|
|
998
|
-
if (name === 'a') return internIri(RDF_NS + 'type');
|
|
999
|
-
if (name.startsWith('_:')) return new Blank(name);
|
|
1000
|
-
if (name.includes(':')) return internIri(this.prefixes.expandQName(name));
|
|
1001
|
-
return internIri(name);
|
|
1002
|
-
}
|
|
1003
|
-
|
|
1004
|
-
if (typ === 'Literal') {
|
|
1005
|
-
let s = val || '';
|
|
1006
|
-
|
|
1007
|
-
// Optional language tag: "... "@en
|
|
1008
|
-
if (this.peek().typ === 'LangTag') {
|
|
1009
|
-
if (!(s.startsWith('"') && s.endsWith('"')))
|
|
1010
|
-
throw new Error('Language tag is only allowed on quoted string literals');
|
|
1011
|
-
const langTok = this.next();
|
|
1012
|
-
s = `${s}@${langTok.value || ''}`;
|
|
1013
|
-
if (this.peek().typ === 'HatHat') throw new Error('A literal cannot have both a language tag and a datatype');
|
|
1014
|
-
}
|
|
1015
|
-
|
|
1016
|
-
// Optional datatype: ^^ <...> or ^^ qname
|
|
1017
|
-
if (this.peek().typ === 'HatHat') {
|
|
1018
|
-
this.next();
|
|
1019
|
-
const dtTok = this.next();
|
|
1020
|
-
let dtIri;
|
|
1021
|
-
if (dtTok.typ === 'IriRef') dtIri = dtTok.value || '';
|
|
1022
|
-
else if (dtTok.typ === 'Ident') {
|
|
1023
|
-
const qn = dtTok.value || '';
|
|
1024
|
-
dtIri = qn.includes(':') ? this.prefixes.expandQName(qn) : qn;
|
|
1025
|
-
} else throw new Error(`Expected datatype after ^^, got ${dtTok.toString()}`);
|
|
1026
|
-
s = `${s}^^<${dtIri}>`;
|
|
1027
|
-
}
|
|
1028
|
-
|
|
1029
|
-
return internLiteral(s);
|
|
1030
|
-
}
|
|
1031
|
-
|
|
1032
|
-
if (typ === 'Var') return new Var(val || '');
|
|
1033
|
-
if (typ === 'LParen') return this.parseList();
|
|
1034
|
-
if (typ === 'LBracket') return this.parseBlank();
|
|
1035
|
-
if (typ === 'LBrace') throw new Error('N3 graph terms { ... } are not supported in Turtle/TriG input');
|
|
1036
|
-
if (typ === 'StarOpen') return this.parseStarTerm();
|
|
1037
|
-
|
|
1038
|
-
throw new Error(`Unexpected term token: ${tok.toString()}`);
|
|
1039
|
-
}
|
|
1040
|
-
|
|
1041
|
-
parseStarTerm() {
|
|
1042
|
-
// RDF 1.2 Turtle-star / TriG-star:
|
|
1043
|
-
// - tripleTerm: <<( s p o )>>
|
|
1044
|
-
// - reifiedTriple (syntactic sugar): << s p o [~ reifier] >>
|
|
1045
|
-
if (this.peek().typ === 'LParen') {
|
|
1046
|
-
// tripleTerm
|
|
1047
|
-
this.next(); // '('
|
|
1048
|
-
const s = this.parseTerm();
|
|
1049
|
-
const p = this.parseTerm();
|
|
1050
|
-
const o = this.parseTerm();
|
|
1051
|
-
this.expect('RParen');
|
|
1052
|
-
this.expect('StarClose');
|
|
1053
|
-
return new GraphTerm([new Triple(s, p, o)]);
|
|
1054
|
-
}
|
|
1055
|
-
|
|
1056
|
-
// reifiedTriple sugar -> expand to a reifier node that log:nameOf a tripleTerm
|
|
1057
|
-
const s = this.parseTerm();
|
|
1058
|
-
const p = this.parseTerm();
|
|
1059
|
-
const o = this.parseTerm();
|
|
1060
|
-
|
|
1061
|
-
let reifier;
|
|
1062
|
-
if (this.peek().typ === 'Tilde') {
|
|
1063
|
-
this.next();
|
|
1064
|
-
reifier = this.parseTerm();
|
|
1065
|
-
} else {
|
|
1066
|
-
reifier = this.freshReifier();
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
this.expect('StarClose');
|
|
1070
|
-
|
|
1071
|
-
const tripleTerm = new GraphTerm([new Triple(s, p, o)]);
|
|
1072
|
-
this.emitReifies(reifier, tripleTerm);
|
|
1073
|
-
return reifier;
|
|
1074
|
-
}
|
|
1075
|
-
|
|
1076
|
-
parseList() {
|
|
1077
|
-
const elems = [];
|
|
1078
|
-
while (this.peek().typ !== 'RParen') {
|
|
1079
|
-
// Be permissive: allow commas inside lists (even though Turtle lists are whitespace-separated).
|
|
1080
|
-
if (this.peek().typ === 'Comma') {
|
|
1081
|
-
this.next();
|
|
1082
|
-
continue;
|
|
1083
|
-
}
|
|
1084
|
-
elems.push(this.parseTerm());
|
|
1085
|
-
if (this.peek().typ === 'EOF') throw new Error("Unterminated list '(' ... ')'");
|
|
1086
|
-
}
|
|
1087
|
-
this.next(); // ')'
|
|
1088
|
-
return new ListTerm(elems);
|
|
1089
|
-
}
|
|
1090
|
-
|
|
1091
|
-
parseBlank() {
|
|
1092
|
-
// [] or [ ... ] property list
|
|
1093
|
-
if (this.peek().typ === 'RBracket') {
|
|
1094
|
-
this.next();
|
|
1095
|
-
this.blankCounter += 1;
|
|
1096
|
-
return new Blank(`_:b${this.blankCounter}`);
|
|
1097
|
-
}
|
|
1098
|
-
|
|
1099
|
-
let id = null;
|
|
1100
|
-
if (this.peek().typ === 'Ident' && (this.peek().value || '').startsWith('_:')) id = this.next().value;
|
|
1101
|
-
else {
|
|
1102
|
-
this.blankCounter += 1;
|
|
1103
|
-
id = `_:b${this.blankCounter}`;
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
const subj = new Blank(id);
|
|
1107
|
-
if (this.peek().typ !== 'RBracket') {
|
|
1108
|
-
const more = this.parsePredicateObjectList(subj);
|
|
1109
|
-
// Keep the triples produced by the property list so they are emitted with the surrounding statement.
|
|
1110
|
-
this.pendingTriples.push(...more);
|
|
1111
|
-
}
|
|
1112
|
-
|
|
1113
|
-
this.expect('RBracket');
|
|
1114
|
-
return new Blank(id);
|
|
1115
|
-
}
|
|
1116
|
-
|
|
1117
|
-
// Parses inside "{ ... }" AFTER the '{' has been consumed.
|
|
1118
|
-
// We accept both "s p o ." and "s p o" before '}' as last triple (permissive).
|
|
1119
|
-
parseGraph() {
|
|
1120
|
-
const triples = [];
|
|
1121
|
-
while (this.peek().typ !== 'RBrace') {
|
|
1122
|
-
const subj = this.parseTerm();
|
|
1123
|
-
|
|
1124
|
-
let more;
|
|
1125
|
-
if (this.peek().typ === 'Dot') {
|
|
1126
|
-
more = [];
|
|
1127
|
-
if (this.pendingTriples.length > 0) {
|
|
1128
|
-
more = this.pendingTriples;
|
|
1129
|
-
this.pendingTriples = [];
|
|
1130
|
-
}
|
|
1131
|
-
this.next();
|
|
1132
|
-
} else {
|
|
1133
|
-
more = this.parsePredicateObjectList(subj);
|
|
1134
|
-
this.expectDotOrRBrace();
|
|
1135
|
-
if (this.peek().typ === 'Dot') this.next();
|
|
1136
|
-
}
|
|
1137
|
-
|
|
1138
|
-
triples.push(...more);
|
|
1139
|
-
}
|
|
1140
|
-
this.next(); // consume '}'
|
|
1141
|
-
return new GraphTerm(triples);
|
|
1142
|
-
}
|
|
1143
|
-
|
|
1144
|
-
parsePredicateObjectList(subject) {
|
|
1145
|
-
const out = [];
|
|
1146
|
-
|
|
1147
|
-
if (this.pendingTriples.length > 0) {
|
|
1148
|
-
out.push(...this.pendingTriples);
|
|
1149
|
-
this.pendingTriples = [];
|
|
1150
|
-
}
|
|
1151
|
-
|
|
1152
|
-
while (true) {
|
|
1153
|
-
let verb;
|
|
1154
|
-
let invert = false;
|
|
1155
|
-
|
|
1156
|
-
if (this.peek().typ === 'Ident' && (this.peek().value || '') === 'a') {
|
|
1157
|
-
this.next();
|
|
1158
|
-
verb = internIri(RDF_NS + 'type');
|
|
1159
|
-
} else if (this.peek().typ === 'Ident' && (this.peek().value || '') === 'has') {
|
|
1160
|
-
this.next();
|
|
1161
|
-
invert = true;
|
|
1162
|
-
verb = this.parseTerm();
|
|
1163
|
-
} else {
|
|
1164
|
-
if (this.peek().typ === 'OpPredInvert') {
|
|
1165
|
-
invert = true;
|
|
1166
|
-
this.next();
|
|
1167
|
-
}
|
|
1168
|
-
verb = this.parseTerm();
|
|
1169
|
-
}
|
|
1170
|
-
|
|
1171
|
-
out.push(...this.parseAnnotatedObjectList(subject, verb, invert));
|
|
1172
|
-
|
|
1173
|
-
if (this.peek().typ === 'Semicolon') {
|
|
1174
|
-
this.next();
|
|
1175
|
-
if (
|
|
1176
|
-
this.peek().typ === 'Dot' ||
|
|
1177
|
-
this.peek().typ === 'RBrace' ||
|
|
1178
|
-
this.peek().typ === 'RBracket' ||
|
|
1179
|
-
this.peek().typ === 'AnnClose'
|
|
1180
|
-
)
|
|
1181
|
-
break;
|
|
1182
|
-
continue;
|
|
1183
|
-
}
|
|
1184
|
-
break;
|
|
1185
|
-
}
|
|
1186
|
-
|
|
1187
|
-
// Include any triples generated by nested blank node property lists / reifiers
|
|
1188
|
-
// that were encountered while parsing this predicate-object list.
|
|
1189
|
-
if (this.pendingTriples.length > 0) {
|
|
1190
|
-
out.push(...this.pendingTriples);
|
|
1191
|
-
this.pendingTriples = [];
|
|
1192
|
-
}
|
|
1193
|
-
|
|
1194
|
-
return out;
|
|
1195
|
-
}
|
|
1196
|
-
|
|
1197
|
-
parseObjectList() {
|
|
1198
|
-
const objs = [this.parseTerm()];
|
|
1199
|
-
while (this.peek().typ === 'Comma') {
|
|
1200
|
-
this.next();
|
|
1201
|
-
objs.push(this.parseTerm());
|
|
1202
|
-
}
|
|
1203
|
-
return objs;
|
|
1204
|
-
}
|
|
1205
|
-
|
|
1206
|
-
// RDF 1.2 Turtle/TriG: triple annotations and reifiers
|
|
1207
|
-
// After an object, Turtle 1.2 allows optional:
|
|
1208
|
-
// ~ <reifier>
|
|
1209
|
-
// {| <predicateObjectList> |}
|
|
1210
|
-
// We convert these into eyeling-friendly N3 by emitting:
|
|
1211
|
-
// <reifier> log:nameOf { <s> <p> <o> . } .
|
|
1212
|
-
// <reifier> <annP> <annO> .
|
|
1213
|
-
|
|
1214
|
-
parseAnnotationBlock(reifier) {
|
|
1215
|
-
this.expect('AnnOpen');
|
|
1216
|
-
const out = [];
|
|
1217
|
-
if (this.peek().typ !== 'AnnClose') {
|
|
1218
|
-
out.push(...this.parsePredicateObjectList(reifier));
|
|
1219
|
-
}
|
|
1220
|
-
this.expect('AnnClose');
|
|
1221
|
-
return out;
|
|
1222
|
-
}
|
|
1223
|
-
|
|
1224
|
-
parseAnnotatedObjectList(subject, verb, invert) {
|
|
1225
|
-
const out = [];
|
|
1226
|
-
out.push(...this.parseAnnotatedObjectTriples(subject, verb, invert));
|
|
1227
|
-
while (this.peek().typ === 'Comma') {
|
|
1228
|
-
this.next();
|
|
1229
|
-
out.push(...this.parseAnnotatedObjectTriples(subject, verb, invert));
|
|
1230
|
-
}
|
|
1231
|
-
return out;
|
|
1232
|
-
}
|
|
1233
|
-
|
|
1234
|
-
parseAnnotatedObjectTriples(subject, verb, invert) {
|
|
1235
|
-
const out = [];
|
|
1236
|
-
|
|
1237
|
-
const obj = this.parseTerm();
|
|
1238
|
-
const s = invert ? obj : subject;
|
|
1239
|
-
const o = invert ? subject : obj;
|
|
1240
|
-
|
|
1241
|
-
// asserted triple
|
|
1242
|
-
// Special-case RDF 1.2 explicit triple reification:
|
|
1243
|
-
// _:r rdf:reifies <<( s p o )>> .
|
|
1244
|
-
// Emit as:
|
|
1245
|
-
// _:r log:nameOf { s p o . } .
|
|
1246
|
-
// This matches the mapping we already use for reifiedTriple sugar and annotations.
|
|
1247
|
-
let assertedVerb = verb;
|
|
1248
|
-
let assertedObj = o;
|
|
1249
|
-
if (!invert && verb instanceof Iri && verb.value === RDF_NS + 'reifies' && obj instanceof GraphTerm) {
|
|
1250
|
-
assertedVerb = internIri(log.nameOf);
|
|
1251
|
-
assertedObj = obj;
|
|
1252
|
-
}
|
|
1253
|
-
out.push(new Triple(s, assertedVerb, assertedObj));
|
|
1254
|
-
|
|
1255
|
-
// optional reifier and/or annotation blocks
|
|
1256
|
-
let reifier = null;
|
|
1257
|
-
|
|
1258
|
-
if (this.peek().typ === 'Tilde') {
|
|
1259
|
-
this.next();
|
|
1260
|
-
// Allow empty reifier: ~ {| ... |} (fresh blank node)
|
|
1261
|
-
if (this.peek().typ === 'AnnOpen') reifier = this.freshReifier();
|
|
1262
|
-
else reifier = this.parseTerm();
|
|
1263
|
-
}
|
|
1264
|
-
|
|
1265
|
-
// If there is an annotation block without an explicit reifier, allocate one
|
|
1266
|
-
if (!reifier && this.peek().typ === 'AnnOpen') {
|
|
1267
|
-
reifier = this.freshReifier();
|
|
1268
|
-
}
|
|
1269
|
-
|
|
1270
|
-
if (reifier) {
|
|
1271
|
-
const tripleTerm = new GraphTerm([new Triple(s, assertedVerb, assertedObj)]);
|
|
1272
|
-
this.emitReifies(reifier, tripleTerm);
|
|
1273
|
-
if (this.pendingTriples.length) {
|
|
1274
|
-
out.push(...this.pendingTriples);
|
|
1275
|
-
this.pendingTriples = [];
|
|
1276
|
-
}
|
|
1277
|
-
|
|
1278
|
-
// zero or more annotation blocks
|
|
1279
|
-
while (this.peek().typ === 'AnnOpen') {
|
|
1280
|
-
out.push(...this.parseAnnotationBlock(reifier));
|
|
1281
|
-
}
|
|
1282
|
-
}
|
|
1283
|
-
|
|
1284
|
-
return out;
|
|
1285
|
-
}
|
|
1286
|
-
}
|
|
1287
|
-
|
|
1288
|
-
// TriG: Turtle + graph blocks (graphName { ... })
|
|
1289
|
-
class TriGParser extends TurtleParser {
|
|
1290
|
-
parseTrigDocument() {
|
|
1291
|
-
const quads = []; // { s,p,o,g } where g is Term|null
|
|
1292
|
-
|
|
1293
|
-
while (this.peek().typ !== 'EOF') {
|
|
1294
|
-
// RDF 1.2: VERSION announcement (e.g., VERSION "1.2")
|
|
1295
|
-
if (
|
|
1296
|
-
this.peek().typ === 'Ident' &&
|
|
1297
|
-
typeof this.peek().value === 'string' &&
|
|
1298
|
-
this.peek().value.toLowerCase() === 'version'
|
|
1299
|
-
) {
|
|
1300
|
-
this.next(); // VERSION
|
|
1301
|
-
const vTok = this.next();
|
|
1302
|
-
if (vTok.typ !== 'Literal') throw new Error(`Expected a literal after VERSION, got ${vTok.toString()}`);
|
|
1303
|
-
if (this.peek().typ === 'Dot') this.next(); // permissive
|
|
1304
|
-
continue;
|
|
1305
|
-
}
|
|
1306
|
-
|
|
1307
|
-
// directives
|
|
1308
|
-
if (this.peek().typ === 'AtPrefix') {
|
|
1309
|
-
this.next();
|
|
1310
|
-
this.parsePrefixDirective();
|
|
1311
|
-
continue;
|
|
1312
|
-
}
|
|
1313
|
-
if (this.peek().typ === 'AtBase') {
|
|
1314
|
-
this.next();
|
|
1315
|
-
this.parseBaseDirective();
|
|
1316
|
-
continue;
|
|
1317
|
-
}
|
|
1318
|
-
if (
|
|
1319
|
-
this.peek().typ === 'Ident' &&
|
|
1320
|
-
typeof this.peek().value === 'string' &&
|
|
1321
|
-
this.peek().value.toLowerCase() === 'prefix' &&
|
|
1322
|
-
this.toks[this.pos + 1] &&
|
|
1323
|
-
this.toks[this.pos + 1].typ === 'Ident' &&
|
|
1324
|
-
typeof this.toks[this.pos + 1].value === 'string' &&
|
|
1325
|
-
this.toks[this.pos + 1].value.endsWith(':')
|
|
1326
|
-
) {
|
|
1327
|
-
this.next();
|
|
1328
|
-
this.parseSparqlPrefixDirective();
|
|
1329
|
-
continue;
|
|
1330
|
-
}
|
|
1331
|
-
if (
|
|
1332
|
-
this.peek().typ === 'Ident' &&
|
|
1333
|
-
typeof this.peek().value === 'string' &&
|
|
1334
|
-
this.peek().value.toLowerCase() === 'base' &&
|
|
1335
|
-
this.toks[this.pos + 1] &&
|
|
1336
|
-
this.toks[this.pos + 1].typ === 'IriRef'
|
|
1337
|
-
) {
|
|
1338
|
-
this.next();
|
|
1339
|
-
this.parseSparqlBaseDirective();
|
|
1340
|
-
continue;
|
|
1341
|
-
}
|
|
1342
|
-
|
|
1343
|
-
// Default graph block: { ... }
|
|
1344
|
-
if (this.peek().typ === 'LBrace') {
|
|
1345
|
-
this.next(); // consume '{'
|
|
1346
|
-
const f = this.parseGraph();
|
|
1347
|
-
if (this.peek().typ === 'Dot') this.next(); // accept optional '.'
|
|
1348
|
-
for (const tr of f.triples) quads.push({ s: tr.s, p: tr.p, o: tr.o, g: null });
|
|
1349
|
-
continue;
|
|
1350
|
-
}
|
|
1351
|
-
|
|
1352
|
-
// SPARQL-style named graph block: GRAPH <g> { ... }
|
|
1353
|
-
if (
|
|
1354
|
-
this.peek().typ === 'Ident' &&
|
|
1355
|
-
typeof this.peek().value === 'string' &&
|
|
1356
|
-
this.peek().value.toLowerCase() === 'graph'
|
|
1357
|
-
) {
|
|
1358
|
-
this.next(); // GRAPH
|
|
1359
|
-
const gname = this.parseTerm();
|
|
1360
|
-
this.expect('LBrace');
|
|
1361
|
-
const f = this.parseGraph();
|
|
1362
|
-
if (this.peek().typ === 'Dot') this.next(); // accept optional '.'
|
|
1363
|
-
for (const tr of f.triples) quads.push({ s: tr.s, p: tr.p, o: tr.o, g: gname });
|
|
1364
|
-
continue;
|
|
1365
|
-
}
|
|
1366
|
-
|
|
1367
|
-
// Either a Turtle triple in default graph, or a named graph block: graphName { ... }
|
|
1368
|
-
const first = this.parseTerm();
|
|
1369
|
-
|
|
1370
|
-
if (this.peek().typ === 'LBrace') {
|
|
1371
|
-
this.next(); // consume '{'
|
|
1372
|
-
const f = this.parseGraph();
|
|
1373
|
-
if (this.peek().typ === 'Dot') this.next(); // accept optional '.'
|
|
1374
|
-
for (const tr of f.triples) quads.push({ s: tr.s, p: tr.p, o: tr.o, g: first });
|
|
1375
|
-
continue;
|
|
1376
|
-
}
|
|
1377
|
-
|
|
1378
|
-
// Plain Turtle triple statement in default graph
|
|
1379
|
-
let more;
|
|
1380
|
-
if (this.peek().typ === 'Dot') {
|
|
1381
|
-
more = [];
|
|
1382
|
-
if (this.pendingTriples.length > 0) {
|
|
1383
|
-
more = this.pendingTriples;
|
|
1384
|
-
this.pendingTriples = [];
|
|
1385
|
-
}
|
|
1386
|
-
this.next();
|
|
1387
|
-
} else {
|
|
1388
|
-
more = this.parsePredicateObjectList(first);
|
|
1389
|
-
this.expect('Dot');
|
|
1390
|
-
}
|
|
1391
|
-
for (const tr of more) quads.push({ s: tr.s, p: tr.p, o: tr.o, g: null });
|
|
1392
|
-
}
|
|
1393
|
-
|
|
1394
|
-
return { quads, prefixes: this.prefixes };
|
|
1395
|
-
}
|
|
1396
|
-
}
|
|
1397
|
-
|
|
1398
|
-
// ---------------------------------------------------------------------------
|
|
1399
|
-
// Serializers (Turtle-ish / TriG-ish / N3-ish)
|
|
1400
|
-
// ---------------------------------------------------------------------------
|
|
1401
|
-
|
|
1402
|
-
/**
|
|
1403
|
-
* Render a Turtle/N3 literal token string, shrinking any datatype IRIRef (^^<...>)
|
|
1404
|
-
* to a prefixed name if possible, e.g. ^^<http://www.w3.org/2001/XMLSchema#date> -> ^^xsd:date
|
|
1405
|
-
* when an appropriate prefix is in scope.
|
|
1406
|
-
*
|
|
1407
|
-
* Note: this keeps the original lexical spelling and only rewrites the datatype IRIRef.
|
|
1408
|
-
*/
|
|
1409
|
-
function literalToText(raw, prefixes) {
|
|
1410
|
-
if (!raw || typeof raw !== 'string') return String(raw);
|
|
1411
|
-
|
|
1412
|
-
// Typed literal with datatype as IRIREF.
|
|
1413
|
-
// Example: "2021-07-07"^^<http://www.w3.org/2001/XMLSchema#date>
|
|
1414
|
-
// We only rewrite the datatype part.
|
|
1415
|
-
const m = raw.match(/\^\^<([^>]+)>/);
|
|
1416
|
-
if (!m) return raw;
|
|
1417
|
-
|
|
1418
|
-
const dtIri = m[1];
|
|
1419
|
-
const qn = prefixes ? prefixes.shrinkIri(dtIri) : null;
|
|
1420
|
-
if (!qn) return raw;
|
|
1421
|
-
|
|
1422
|
-
// Replace only the first occurrence.
|
|
1423
|
-
return raw.replace(`^^<${dtIri}>`, `^^${qn}`);
|
|
1424
|
-
}
|
|
1425
|
-
|
|
1426
|
-
function termToText(t, prefixes, skolemMap) {
|
|
1427
|
-
if (t == null) return '[]';
|
|
1428
|
-
if (t instanceof Iri) {
|
|
1429
|
-
if (t.value === RDF_NS + 'type') return 'a';
|
|
1430
|
-
const qn = prefixes ? prefixes.shrinkIri(t.value) : null;
|
|
1431
|
-
return qn || `<${t.value}>`;
|
|
1432
|
-
}
|
|
1433
|
-
if (t instanceof Blank) {
|
|
1434
|
-
if (skolemMap && skolemMap.has(t.label)) return skolemMap.get(t.label);
|
|
1435
|
-
return t.label;
|
|
1436
|
-
}
|
|
1437
|
-
if (t instanceof Literal) return literalToText(t.value, prefixes);
|
|
1438
|
-
if (t instanceof Var) return `?${t.name}`;
|
|
1439
|
-
if (t instanceof ListTerm) return `(${t.elems.map((x) => termToText(x, prefixes, skolemMap)).join(' ')})`;
|
|
1440
|
-
if (t instanceof OpenListTerm)
|
|
1441
|
-
return `(${t.prefix.map((x) => termToText(x, prefixes, skolemMap)).join(' ')} ... ?${t.tailVar})`;
|
|
1442
|
-
if (t instanceof GraphTerm) {
|
|
1443
|
-
const inner = t.triples
|
|
1444
|
-
.map(
|
|
1445
|
-
(tr) =>
|
|
1446
|
-
`${termToText(tr.s, prefixes, skolemMap)} ${termToText(tr.p, prefixes, skolemMap)} ${termToText(tr.o, prefixes, skolemMap)} .`,
|
|
1447
|
-
)
|
|
1448
|
-
.join(' ');
|
|
1449
|
-
return `{ ${inner} }`;
|
|
1450
|
-
}
|
|
1451
|
-
return String(t);
|
|
1452
|
-
}
|
|
1453
|
-
|
|
1454
|
-
// ---------------------------------------------------------------------------
|
|
1455
|
-
// Skolemize blank nodes that would otherwise "split" across quoted graph terms.
|
|
1456
|
-
//
|
|
1457
|
-
// In N3, blank nodes inside { ... } are existentially scoped to that formula,
|
|
1458
|
-
// so reusing the same _:id outside does NOT imply coreference.
|
|
1459
|
-
// For RDF 1.2 triple terms we serialize as { s p o . }, we optionally replace
|
|
1460
|
-
// any blank node that appears both inside a quoted graph term AND outside it
|
|
1461
|
-
// with a stable IRI constant (<urn:skolem:...>) to preserve identity.
|
|
1462
|
-
// ---------------------------------------------------------------------------
|
|
1463
|
-
|
|
1464
|
-
function buildSkolemMapForBnodesThatCrossScopes(triples) {
|
|
1465
|
-
// In RDF (incl. RDF 1.2 triple terms and TriG datasets), blank nodes can be
|
|
1466
|
-
// shared across different “scopes” in the concrete syntax (e.g., between the
|
|
1467
|
-
// default graph and named graphs, or between multiple named graphs, or between
|
|
1468
|
-
// asserted triples and triple terms). In N3, blank nodes inside quoted graph
|
|
1469
|
-
// terms (`{ ... }`) do NOT automatically corefer with blank nodes outside, or
|
|
1470
|
-
// in other quoted graph terms.
|
|
1471
|
-
//
|
|
1472
|
-
// To preserve coreference, we Skolemize blank nodes that appear in more than
|
|
1473
|
-
// one scope:
|
|
1474
|
-
// - OUT: outside any GraphTerm
|
|
1475
|
-
// - Gk: inside the k-th encountered GraphTerm (each GraphTerm gets its own)
|
|
1476
|
-
//
|
|
1477
|
-
// Each such blank node label is replaced by a minted IRI in the skolem: namespace (see SKOLEM_PREFIX_IRI).
|
|
1478
|
-
const scopesByLbl = new Map();
|
|
1479
|
-
let graphTermId = 0;
|
|
1480
|
-
|
|
1481
|
-
function add(lbl, scope) {
|
|
1482
|
-
if (!scopesByLbl.has(lbl)) scopesByLbl.set(lbl, new Set());
|
|
1483
|
-
scopesByLbl.get(lbl).add(scope);
|
|
1484
|
-
}
|
|
1485
|
-
|
|
1486
|
-
function visitTerm(t, scope) {
|
|
1487
|
-
if (!t) return;
|
|
1488
|
-
if (t instanceof Blank) {
|
|
1489
|
-
add(t.label, scope);
|
|
1490
|
-
return;
|
|
1491
|
-
}
|
|
1492
|
-
if (t instanceof ListTerm) {
|
|
1493
|
-
for (const e of t.elems) visitTerm(e, scope);
|
|
1494
|
-
return;
|
|
1495
|
-
}
|
|
1496
|
-
if (t instanceof OpenListTerm) {
|
|
1497
|
-
for (const e of t.prefix) visitTerm(e, scope);
|
|
1498
|
-
return;
|
|
1499
|
-
}
|
|
1500
|
-
if (t instanceof GraphTerm) {
|
|
1501
|
-
const innerScope = `G${graphTermId++}`;
|
|
1502
|
-
for (const tr of t.triples) {
|
|
1503
|
-
visitTerm(tr.s, innerScope);
|
|
1504
|
-
visitTerm(tr.p, innerScope);
|
|
1505
|
-
visitTerm(tr.o, innerScope);
|
|
1506
|
-
}
|
|
1507
|
-
return;
|
|
1508
|
-
}
|
|
1509
|
-
}
|
|
1510
|
-
|
|
1511
|
-
for (const tr of triples) {
|
|
1512
|
-
visitTerm(tr.s, 'OUT');
|
|
1513
|
-
visitTerm(tr.p, 'OUT');
|
|
1514
|
-
visitTerm(tr.o, 'OUT');
|
|
1515
|
-
}
|
|
1516
|
-
|
|
1517
|
-
const skolemMap = new Map();
|
|
1518
|
-
for (const [lbl, scopes] of scopesByLbl.entries()) {
|
|
1519
|
-
if (scopes.size <= 1) continue;
|
|
1520
|
-
|
|
1521
|
-
const id = lbl.startsWith('_:') ? lbl.slice(2) : lbl;
|
|
1522
|
-
const local = pnLocalSafe(id);
|
|
1523
|
-
skolemMap.set(lbl, `${SKOLEM_PREFIX}:${local}`);
|
|
1524
|
-
}
|
|
1525
|
-
return skolemMap;
|
|
1526
|
-
}
|
|
1527
|
-
|
|
1528
|
-
// ---------------------------------------------------------------------------
|
|
1529
|
-
// RDF list (rdf:first/rest) folding
|
|
1530
|
-
//
|
|
1531
|
-
// Some producers expand Turtle/N3 list syntax into explicit RDF collection
|
|
1532
|
-
// triples. When writing N3/Turtle, it is useful to fold those back into
|
|
1533
|
-
// ListTerm so the output matches common Turtle/N3 expectations.
|
|
1534
|
-
//
|
|
1535
|
-
// We fold only “plain” lists where each list node has exactly one rdf:first and
|
|
1536
|
-
// one rdf:rest triple, and no other outgoing triples. Intermediate nodes must
|
|
1537
|
-
// not be referenced from outside the list chain. This keeps the transformation
|
|
1538
|
-
// semantics-preserving.
|
|
1539
|
-
// ---------------------------------------------------------------------------
|
|
1540
|
-
|
|
1541
|
-
function termKey(t) {
|
|
1542
|
-
if (t == null) return 'N:null';
|
|
1543
|
-
if (t instanceof Iri) return `I:${t.value}`;
|
|
1544
|
-
if (t instanceof Blank) return `B:${t.label}`;
|
|
1545
|
-
if (t instanceof Literal) return `L:${t.value}`;
|
|
1546
|
-
if (t instanceof Var) return `V:${t.name}`;
|
|
1547
|
-
if (t instanceof ListTerm) return `T:(` + t.elems.map(termKey).join(' ') + `)`;
|
|
1548
|
-
if (t instanceof OpenListTerm) return `T:(` + t.prefix.map(termKey).join(' ') + ` ... ?${t.tailVar})`;
|
|
1549
|
-
if (t instanceof GraphTerm)
|
|
1550
|
-
return `G:{` + t.triples.map((tr) => `${termKey(tr.s)} ${termKey(tr.p)} ${termKey(tr.o)}`).join(' ; ') + `}`;
|
|
1551
|
-
return `X:${String(t)}`;
|
|
1552
|
-
}
|
|
1553
|
-
|
|
1554
|
-
function foldRdfLists(triples) {
|
|
1555
|
-
const rdfFirst = RDF_NS + 'first';
|
|
1556
|
-
const rdfRest = RDF_NS + 'rest';
|
|
1557
|
-
const rdfNil = RDF_NS + 'nil';
|
|
1558
|
-
|
|
1559
|
-
const outBySubj = new Map(); // key -> { term, idxs: number[] }
|
|
1560
|
-
const incoming = new Map(); // key -> total incoming as object
|
|
1561
|
-
const incomingRest = new Map(); // key -> incoming via rdf:rest
|
|
1562
|
-
|
|
1563
|
-
function addIncoming(objKey, viaRest) {
|
|
1564
|
-
incoming.set(objKey, (incoming.get(objKey) || 0) + 1);
|
|
1565
|
-
if (viaRest) incomingRest.set(objKey, (incomingRest.get(objKey) || 0) + 1);
|
|
1566
|
-
}
|
|
1567
|
-
|
|
1568
|
-
for (let i = 0; i < triples.length; i++) {
|
|
1569
|
-
const tr = triples[i];
|
|
1570
|
-
const sKey = termKey(tr.s);
|
|
1571
|
-
if (!outBySubj.has(sKey)) outBySubj.set(sKey, { term: tr.s, idxs: [] });
|
|
1572
|
-
outBySubj.get(sKey).idxs.push(i);
|
|
1573
|
-
|
|
1574
|
-
const oKey = termKey(tr.o);
|
|
1575
|
-
const viaRest = isIri(tr.p, rdfRest);
|
|
1576
|
-
addIncoming(oKey, viaRest);
|
|
1577
|
-
}
|
|
1578
|
-
|
|
1579
|
-
function outgoingTriplesOf(key) {
|
|
1580
|
-
const rec = outBySubj.get(key);
|
|
1581
|
-
if (!rec) return [];
|
|
1582
|
-
return rec.idxs.map((idx) => ({ idx, tr: triples[idx] }));
|
|
1583
|
-
}
|
|
1584
|
-
|
|
1585
|
-
// Identify candidate list heads: blank nodes with exactly one rdf:first and one rdf:rest.
|
|
1586
|
-
//
|
|
1587
|
-
// NOTE: This converter currently writes one triple per line (it does not group by subject).
|
|
1588
|
-
// In Turtle/N3, repeating a collection term ( ... ) across multiple triples would mint
|
|
1589
|
-
// a fresh list each time. To remain semantics-preserving, we only fold “annotated” list
|
|
1590
|
-
// heads (i.e., heads with extra outgoing predicates) when:
|
|
1591
|
-
// - the head is not referenced as an object elsewhere, and
|
|
1592
|
-
// - there is at most one extra outgoing triple.
|
|
1593
|
-
const listMap = new Map(); // headKey -> { listTerm, removeIdxs:Set<number>, chainKeys:string[] }
|
|
1594
|
-
|
|
1595
|
-
for (const [sKey, rec] of outBySubj.entries()) {
|
|
1596
|
-
if (!(rec.term instanceof Blank)) continue;
|
|
1597
|
-
|
|
1598
|
-
const outs = outgoingTriplesOf(sKey);
|
|
1599
|
-
const firsts = outs.filter((x) => isIri(x.tr.p, rdfFirst));
|
|
1600
|
-
const rests = outs.filter((x) => isIri(x.tr.p, rdfRest));
|
|
1601
|
-
if (firsts.length !== 1 || rests.length !== 1) continue;
|
|
1602
|
-
|
|
1603
|
-
const extras = outs.filter((x) => !(isIri(x.tr.p, rdfFirst) || isIri(x.tr.p, rdfRest)));
|
|
1604
|
-
const incHead = incoming.get(sKey) || 0;
|
|
1605
|
-
const incHeadRest = incomingRest.get(sKey) || 0;
|
|
1606
|
-
|
|
1607
|
-
// Head sharing safety: if the head node is referenced multiple times,
|
|
1608
|
-
// folding would duplicate the list (not semantics-preserving).
|
|
1609
|
-
if (incHead > 1) continue;
|
|
1610
|
-
|
|
1611
|
-
if (extras.length > 0) {
|
|
1612
|
-
if (incHead !== 0 || incHeadRest !== 0) continue;
|
|
1613
|
-
if (extras.length > 1) continue;
|
|
1614
|
-
}
|
|
1615
|
-
|
|
1616
|
-
// Walk the rdf:rest chain.
|
|
1617
|
-
const elems = [];
|
|
1618
|
-
const removeIdxs = new Set();
|
|
1619
|
-
const chainKeys = [];
|
|
1620
|
-
const seen = new Set();
|
|
1621
|
-
const headKey = sKey;
|
|
1622
|
-
let curKey = sKey;
|
|
1623
|
-
let isOk = true;
|
|
1624
|
-
|
|
1625
|
-
while (true) {
|
|
1626
|
-
if (seen.has(curKey)) {
|
|
1627
|
-
isOk = false;
|
|
1628
|
-
break;
|
|
1629
|
-
}
|
|
1630
|
-
seen.add(curKey);
|
|
1631
|
-
chainKeys.push(curKey);
|
|
1632
|
-
|
|
1633
|
-
const outs2 = outgoingTriplesOf(curKey);
|
|
1634
|
-
const f2 = outs2.filter((x) => isIri(x.tr.p, rdfFirst));
|
|
1635
|
-
const r2 = outs2.filter((x) => isIri(x.tr.p, rdfRest));
|
|
1636
|
-
if (f2.length !== 1 || r2.length !== 1) {
|
|
1637
|
-
isOk = false;
|
|
1638
|
-
break;
|
|
1639
|
-
}
|
|
1640
|
-
|
|
1641
|
-
// Only the head is allowed to have extra outgoing predicates.
|
|
1642
|
-
if (curKey !== headKey && outs2.length !== 2) {
|
|
1643
|
-
isOk = false;
|
|
1644
|
-
break;
|
|
1645
|
-
}
|
|
1646
|
-
|
|
1647
|
-
elems.push(f2[0].tr.o);
|
|
1648
|
-
removeIdxs.add(f2[0].idx);
|
|
1649
|
-
removeIdxs.add(r2[0].idx);
|
|
1650
|
-
|
|
1651
|
-
const next = r2[0].tr.o;
|
|
1652
|
-
if (next instanceof Iri && next.value === rdfNil) break;
|
|
1653
|
-
if (!(next instanceof Blank)) {
|
|
1654
|
-
isOk = false;
|
|
1655
|
-
break;
|
|
1656
|
-
}
|
|
1657
|
-
|
|
1658
|
-
const nextKey = termKey(next);
|
|
1659
|
-
|
|
1660
|
-
// Intermediate node safety: only referenced via rdf:rest and exactly once.
|
|
1661
|
-
const inc = incoming.get(nextKey) || 0;
|
|
1662
|
-
const incR = incomingRest.get(nextKey) || 0;
|
|
1663
|
-
if (inc !== incR || incR !== 1) {
|
|
1664
|
-
isOk = false;
|
|
1665
|
-
break;
|
|
1666
|
-
}
|
|
1667
|
-
|
|
1668
|
-
curKey = nextKey;
|
|
1669
|
-
}
|
|
1670
|
-
|
|
1671
|
-
if (!isOk) continue;
|
|
1672
|
-
|
|
1673
|
-
listMap.set(headKey, { listTerm: new ListTerm(elems), removeIdxs, chainKeys });
|
|
1674
|
-
}
|
|
1675
|
-
|
|
1676
|
-
if (listMap.size === 0) return triples;
|
|
1677
|
-
|
|
1678
|
-
// Prevent double folding: intermediate nodes in a folded chain should not also be heads.
|
|
1679
|
-
const intermediate = new Set();
|
|
1680
|
-
for (const v of listMap.values()) {
|
|
1681
|
-
for (let i = 1; i < v.chainKeys.length; i++) intermediate.add(v.chainKeys[i]);
|
|
1682
|
-
}
|
|
1683
|
-
for (const k of intermediate) {
|
|
1684
|
-
if (listMap.has(k)) listMap.delete(k);
|
|
1685
|
-
}
|
|
1686
|
-
if (listMap.size === 0) return triples;
|
|
1687
|
-
|
|
1688
|
-
// Build set of triple indices to remove (rdf:first/rest only).
|
|
1689
|
-
const removeAll = new Set();
|
|
1690
|
-
for (const v of listMap.values()) for (const idx of v.removeIdxs) removeAll.add(idx);
|
|
1691
|
-
|
|
1692
|
-
// Replace list-head blank nodes with ListTerm *recursively* so nested collections fold too.
|
|
1693
|
-
function replaceTerm(t) {
|
|
1694
|
-
if (t == null) return t;
|
|
1695
|
-
|
|
1696
|
-
if (t instanceof Blank) {
|
|
1697
|
-
const m = listMap.get(termKey(t));
|
|
1698
|
-
if (m) return replaceTerm(m.listTerm);
|
|
1699
|
-
return t;
|
|
1700
|
-
}
|
|
1701
|
-
if (t instanceof ListTerm) {
|
|
1702
|
-
return new ListTerm(t.elems.map((x) => replaceTerm(x)));
|
|
1703
|
-
}
|
|
1704
|
-
if (t instanceof OpenListTerm) {
|
|
1705
|
-
return new OpenListTerm(
|
|
1706
|
-
t.prefix.map((x) => replaceTerm(x)),
|
|
1707
|
-
t.tailVar,
|
|
1708
|
-
);
|
|
1709
|
-
}
|
|
1710
|
-
if (t instanceof GraphTerm) {
|
|
1711
|
-
const inner = t.triples.map((tr) => new Triple(replaceTerm(tr.s), replaceTerm(tr.p), replaceTerm(tr.o)));
|
|
1712
|
-
return new GraphTerm(inner);
|
|
1713
|
-
}
|
|
1714
|
-
return t;
|
|
1715
|
-
}
|
|
1716
|
-
|
|
1717
|
-
const newTriples = [];
|
|
1718
|
-
for (let i = 0; i < triples.length; i++) {
|
|
1719
|
-
if (removeAll.has(i)) continue;
|
|
1720
|
-
const tr = triples[i];
|
|
1721
|
-
newTriples.push(new Triple(replaceTerm(tr.s), replaceTerm(tr.p), replaceTerm(tr.o)));
|
|
1722
|
-
}
|
|
1723
|
-
|
|
1724
|
-
return newTriples;
|
|
1725
|
-
}
|
|
1726
|
-
|
|
1727
|
-
function pruneUnusedPrefixes(prefixes, triples) {
|
|
1728
|
-
if (!prefixes || !prefixes.map) return prefixes;
|
|
1729
|
-
|
|
1730
|
-
const used = new Set();
|
|
1731
|
-
|
|
1732
|
-
function visitTerm(t) {
|
|
1733
|
-
if (!t) return;
|
|
1734
|
-
if (t instanceof Iri) {
|
|
1735
|
-
if (t.value === RDF_NS + 'type') return; // written as 'a'
|
|
1736
|
-
const qn = prefixes.shrinkIri(t.value);
|
|
1737
|
-
if (!qn) return;
|
|
1738
|
-
const idx = qn.indexOf(':');
|
|
1739
|
-
const pfx = idx === 0 ? '' : qn.slice(0, idx);
|
|
1740
|
-
used.add(pfx);
|
|
1741
|
-
return;
|
|
1742
|
-
}
|
|
1743
|
-
|
|
1744
|
-
if (t instanceof Literal) {
|
|
1745
|
-
// A typed literal may reference a QName in its datatype, e.g. "2021-07-07"^^xsd:date.
|
|
1746
|
-
// Our Literal stores the full lexical token, so we conservatively scan for ^^prefix:local.
|
|
1747
|
-
const re = /\^\^([A-Za-z_][A-Za-z0-9_.-]*|):[A-Za-z_][A-Za-z0-9_.-]*/g;
|
|
1748
|
-
for (const m of t.value.matchAll(re)) {
|
|
1749
|
-
const pfx = m[1] || '';
|
|
1750
|
-
used.add(pfx);
|
|
1751
|
-
}
|
|
1752
|
-
return;
|
|
1753
|
-
}
|
|
1754
|
-
|
|
1755
|
-
if (t instanceof ListTerm) {
|
|
1756
|
-
for (const e of t.elems) visitTerm(e);
|
|
1757
|
-
return;
|
|
1758
|
-
}
|
|
1759
|
-
if (t instanceof GraphTerm) {
|
|
1760
|
-
for (const tr of t.triples) {
|
|
1761
|
-
visitTerm(tr.s);
|
|
1762
|
-
visitTerm(tr.p);
|
|
1763
|
-
visitTerm(tr.o);
|
|
1764
|
-
}
|
|
1765
|
-
}
|
|
1766
|
-
}
|
|
1767
|
-
|
|
1768
|
-
for (const tr of triples) {
|
|
1769
|
-
visitTerm(tr.s);
|
|
1770
|
-
visitTerm(tr.p);
|
|
1771
|
-
visitTerm(tr.o);
|
|
1772
|
-
}
|
|
1773
|
-
|
|
1774
|
-
const newMap = {};
|
|
1775
|
-
for (const pfx of used) {
|
|
1776
|
-
if (Object.prototype.hasOwnProperty.call(prefixes.map, pfx)) newMap[pfx] = prefixes.map[pfx];
|
|
1777
|
-
}
|
|
1778
|
-
|
|
1779
|
-
return new PrefixEnv(newMap, prefixes.baseIri);
|
|
1780
|
-
}
|
|
1781
|
-
|
|
1782
|
-
function isIri(t, iri) {
|
|
1783
|
-
return t instanceof Iri && t.value === iri;
|
|
1784
|
-
}
|
|
1785
|
-
|
|
1786
|
-
function renderPrefixPrologue(prefixes) {
|
|
1787
|
-
const out = [];
|
|
1788
|
-
|
|
1789
|
-
if (prefixes && prefixes.baseIri) out.push(`@base <${prefixes.baseIri}> .`);
|
|
1790
|
-
|
|
1791
|
-
if (prefixes && prefixes.map) {
|
|
1792
|
-
for (const [pfx, iri] of Object.entries(prefixes.map)) {
|
|
1793
|
-
if (!iri) continue;
|
|
1794
|
-
const label = pfx === '' ? ':' : `${pfx}:`;
|
|
1795
|
-
out.push(`@prefix ${label} <${iri}> .`);
|
|
1796
|
-
}
|
|
1797
|
-
}
|
|
1798
|
-
return out.join('\n');
|
|
1799
|
-
}
|
|
1800
|
-
|
|
1801
|
-
function ensureSkolemPrefix(prefixes, skolemMap) {
|
|
1802
|
-
if (!skolemMap || skolemMap.size === 0) return prefixes;
|
|
1803
|
-
|
|
1804
|
-
// Make skolem: prefix IRI deterministic.
|
|
1805
|
-
//
|
|
1806
|
-
// Preferred: initSkolemForInput(text) sets SKOLEM_UUID / SKOLEM_PREFIX_IRI
|
|
1807
|
-
// deterministically from the input text (CLI and turtleToN3/trigToN3).
|
|
1808
|
-
//
|
|
1809
|
-
// Fallback (library/advanced usage): derive a stable UUID from the *set of
|
|
1810
|
-
// blank-node labels that actually require skolemization* (plus @base if any).
|
|
1811
|
-
// This removes the last source of non-determinism (crypto.randomUUID()).
|
|
1812
|
-
if (!SKOLEM_UUID) {
|
|
1813
|
-
const base = prefixes ? prefixes.baseIri || '' : '';
|
|
1814
|
-
const labels = [...skolemMap.keys()].sort().join('\n');
|
|
1815
|
-
const seed = ['n3gen-skolem', SKOLEM_ROOT, base, labels, ''].join('\n');
|
|
1816
|
-
const uuid = deterministicUuidFromText(seed);
|
|
1817
|
-
SKOLEM_PREFIX_IRI = `${SKOLEM_ROOT}${uuid}#`;
|
|
1818
|
-
} else if (!SKOLEM_PREFIX_IRI) {
|
|
1819
|
-
SKOLEM_PREFIX_IRI = `${SKOLEM_ROOT}${SKOLEM_UUID}#`;
|
|
1820
|
-
}
|
|
1821
|
-
|
|
1822
|
-
const baseMap = prefixes && prefixes.map ? prefixes.map : {};
|
|
1823
|
-
const newMap = { ...baseMap, [SKOLEM_PREFIX]: SKOLEM_PREFIX_IRI };
|
|
1824
|
-
const baseIri = prefixes ? prefixes.baseIri : '';
|
|
1825
|
-
return new PrefixEnv(newMap, baseIri);
|
|
1826
|
-
}
|
|
1827
|
-
|
|
1828
|
-
// Ensure log: prefix is available whenever we emit log:nameOf (or any other log:* IRI).
|
|
1829
|
-
function usesLogNamespace(triples) {
|
|
1830
|
-
let used = false;
|
|
1831
|
-
|
|
1832
|
-
function visitTerm(t) {
|
|
1833
|
-
if (!t || used) return;
|
|
1834
|
-
|
|
1835
|
-
if (t instanceof Iri) {
|
|
1836
|
-
if (t.value.startsWith(LOG_NS)) used = true;
|
|
1837
|
-
return;
|
|
1838
|
-
}
|
|
1839
|
-
|
|
1840
|
-
if (t instanceof Literal) {
|
|
1841
|
-
// Detect log: use in typed literal tokens, or explicit IRI datatypes in LOG_NS.
|
|
1842
|
-
if (t.value.includes('^^log:') || t.value.includes(`^^<${LOG_NS}`)) used = true;
|
|
1843
|
-
return;
|
|
1844
|
-
}
|
|
1845
|
-
|
|
1846
|
-
if (t instanceof ListTerm) {
|
|
1847
|
-
for (const e of t.elems) visitTerm(e);
|
|
1848
|
-
return;
|
|
1849
|
-
}
|
|
1850
|
-
|
|
1851
|
-
if (t instanceof GraphTerm) {
|
|
1852
|
-
for (const tr of t.triples) {
|
|
1853
|
-
visitTerm(tr.s);
|
|
1854
|
-
visitTerm(tr.p);
|
|
1855
|
-
visitTerm(tr.o);
|
|
1856
|
-
}
|
|
1857
|
-
}
|
|
1858
|
-
}
|
|
1859
|
-
|
|
1860
|
-
for (const tr of triples || []) {
|
|
1861
|
-
// triples may be instances of Triple or plain objects with {s,p,o}
|
|
1862
|
-
visitTerm(tr.s);
|
|
1863
|
-
visitTerm(tr.p);
|
|
1864
|
-
visitTerm(tr.o);
|
|
1865
|
-
if (used) break;
|
|
1866
|
-
}
|
|
1867
|
-
return used;
|
|
1868
|
-
}
|
|
1869
|
-
|
|
1870
|
-
function ensureLogPrefixIfUsed(prefixes, triples) {
|
|
1871
|
-
if (!usesLogNamespace(triples)) return prefixes;
|
|
1872
|
-
|
|
1873
|
-
const baseMap = prefixes && prefixes.map ? prefixes.map : {};
|
|
1874
|
-
const newMap = { ...baseMap, log: LOG_NS }; // overwrite any existing log: mapping
|
|
1875
|
-
const baseIri = prefixes ? prefixes.baseIri : '';
|
|
1876
|
-
return new PrefixEnv(newMap, baseIri);
|
|
1877
|
-
}
|
|
1878
|
-
|
|
1879
|
-
function usesRdfNamespace(triples) {
|
|
1880
|
-
let used = false;
|
|
1881
|
-
|
|
1882
|
-
function visitTerm(t) {
|
|
1883
|
-
if (!t || used) return;
|
|
1884
|
-
|
|
1885
|
-
if (t instanceof Iri) {
|
|
1886
|
-
// rdf:type is rendered as 'a', so it doesn't require declaring rdf:
|
|
1887
|
-
if (t.value.startsWith(RDF_NS) && t.value !== RDF_NS + 'type') used = true;
|
|
1888
|
-
return;
|
|
1889
|
-
}
|
|
1890
|
-
|
|
1891
|
-
if (t instanceof Literal) {
|
|
1892
|
-
// Conservative: detect rdf: appearing in a datatype token, e.g. ^^rdf:langString or ^^<...rdf-syntax-ns#...>
|
|
1893
|
-
if (t.value.includes('^^rdf:') || t.value.includes(`^^<${RDF_NS}`)) used = true;
|
|
1894
|
-
return;
|
|
1895
|
-
}
|
|
1896
|
-
|
|
1897
|
-
if (t instanceof ListTerm) {
|
|
1898
|
-
for (const e of t.elems) visitTerm(e);
|
|
1899
|
-
return;
|
|
1900
|
-
}
|
|
1901
|
-
|
|
1902
|
-
if (t instanceof GraphTerm) {
|
|
1903
|
-
for (const tr of t.triples) {
|
|
1904
|
-
visitTerm(tr.s);
|
|
1905
|
-
visitTerm(tr.p);
|
|
1906
|
-
visitTerm(tr.o);
|
|
1907
|
-
}
|
|
1908
|
-
}
|
|
1909
|
-
}
|
|
1910
|
-
|
|
1911
|
-
for (const tr of triples || []) {
|
|
1912
|
-
visitTerm(tr.s);
|
|
1913
|
-
visitTerm(tr.p);
|
|
1914
|
-
visitTerm(tr.o);
|
|
1915
|
-
if (used) break;
|
|
1916
|
-
}
|
|
1917
|
-
return used;
|
|
1918
|
-
}
|
|
1919
|
-
|
|
1920
|
-
function ensureRdfPrefixIfUsed(prefixes, triples) {
|
|
1921
|
-
if (!usesRdfNamespace(triples)) return prefixes;
|
|
1922
|
-
|
|
1923
|
-
// If rdf: is already declared, keep it as-is; otherwise add it.
|
|
1924
|
-
const baseMap = prefixes && prefixes.map ? prefixes.map : {};
|
|
1925
|
-
if (Object.prototype.hasOwnProperty.call(baseMap, 'rdf')) return prefixes;
|
|
1926
|
-
|
|
1927
|
-
const newMap = { ...baseMap, rdf: RDF_NS };
|
|
1928
|
-
const baseIri = prefixes ? prefixes.baseIri : '';
|
|
1929
|
-
return new PrefixEnv(newMap, baseIri);
|
|
1930
|
-
}
|
|
1931
|
-
|
|
1932
|
-
function usesXsdPrefix(triples) {
|
|
1933
|
-
let used = false;
|
|
1934
|
-
|
|
1935
|
-
function visitTerm(t) {
|
|
1936
|
-
if (!t || used) return;
|
|
1937
|
-
|
|
1938
|
-
if (t instanceof Iri) {
|
|
1939
|
-
// If an XSD namespace IRI is printed (rare, but possible), xsd: prefix is required.
|
|
1940
|
-
if (t.value.startsWith(XSD_NS)) used = true;
|
|
1941
|
-
return;
|
|
1942
|
-
}
|
|
1943
|
-
|
|
1944
|
-
if (t instanceof Literal) {
|
|
1945
|
-
// Detect xsd: use in typed literal tokens, e.g. "2021-07-07"^^xsd:date.
|
|
1946
|
-
// Also detect explicit IRI datatypes in XSD namespace.
|
|
1947
|
-
if (t.value.includes('^^xsd:') || t.value.includes(`^^<${XSD_NS}`)) used = true;
|
|
1948
|
-
return;
|
|
1949
|
-
}
|
|
1950
|
-
|
|
1951
|
-
if (t instanceof ListTerm) {
|
|
1952
|
-
for (const e of t.elems) visitTerm(e);
|
|
1953
|
-
return;
|
|
1954
|
-
}
|
|
1955
|
-
|
|
1956
|
-
if (t instanceof GraphTerm) {
|
|
1957
|
-
for (const tr of t.triples) {
|
|
1958
|
-
visitTerm(tr.s);
|
|
1959
|
-
visitTerm(tr.p);
|
|
1960
|
-
visitTerm(tr.o);
|
|
1961
|
-
}
|
|
1962
|
-
return;
|
|
1963
|
-
}
|
|
1964
|
-
}
|
|
1965
|
-
|
|
1966
|
-
for (const tr of triples || []) {
|
|
1967
|
-
visitTerm(tr.s);
|
|
1968
|
-
visitTerm(tr.p);
|
|
1969
|
-
visitTerm(tr.o);
|
|
1970
|
-
if (used) break;
|
|
1971
|
-
}
|
|
1972
|
-
return used;
|
|
1973
|
-
}
|
|
1974
|
-
|
|
1975
|
-
function ensureXsdPrefixIfUsed(prefixes, triples) {
|
|
1976
|
-
if (!usesXsdPrefix(triples)) return prefixes;
|
|
1977
|
-
|
|
1978
|
-
// If xsd: is already declared, keep it as-is; otherwise add it.
|
|
1979
|
-
const baseMap = prefixes && prefixes.map ? prefixes.map : {};
|
|
1980
|
-
if (Object.prototype.hasOwnProperty.call(baseMap, 'xsd')) return prefixes;
|
|
1981
|
-
|
|
1982
|
-
const newMap = { ...baseMap, xsd: XSD_NS };
|
|
1983
|
-
const baseIri = prefixes ? prefixes.baseIri : '';
|
|
1984
|
-
return new PrefixEnv(newMap, baseIri);
|
|
1985
|
-
}
|
|
1986
|
-
|
|
1987
|
-
function groupQuadsByGraph(quads) {
|
|
1988
|
-
const m = new Map(); // key -> { gTerm, triples: Triple[] }
|
|
1989
|
-
function keyOfGraph(g) {
|
|
1990
|
-
if (g == null) return 'DEFAULT';
|
|
1991
|
-
if (g instanceof Iri) return `I:${g.value}`;
|
|
1992
|
-
if (g instanceof Blank) return `B:${g.label}`;
|
|
1993
|
-
return `X:${String(g)}`;
|
|
1994
|
-
}
|
|
1995
|
-
for (const q of quads) {
|
|
1996
|
-
const k = keyOfGraph(q.g);
|
|
1997
|
-
if (!m.has(k)) m.set(k, { gTerm: q.g, triples: [] });
|
|
1998
|
-
m.get(k).triples.push(new Triple(q.s, q.p, q.o));
|
|
1999
|
-
}
|
|
2000
|
-
return m;
|
|
2001
|
-
}
|
|
2002
|
-
|
|
2003
|
-
function writeN3LogNameOf({ datasetQuads, prefixes }) {
|
|
2004
|
-
const blocks = [];
|
|
2005
|
-
const grouped = groupQuadsByGraph(datasetQuads);
|
|
2006
|
-
|
|
2007
|
-
// For prefix pruning + Skolemization we build a synthetic triple stream that
|
|
2008
|
-
// matches the *output* structure:
|
|
2009
|
-
// - default graph triples are “outside” any GraphTerm
|
|
2010
|
-
// - each named graph is wrapped as: gTerm log:nameOf { ... }
|
|
2011
|
-
// This allows us to detect blank nodes that must corefer across graphs.
|
|
2012
|
-
const pseudoTriplesForUse = [];
|
|
2013
|
-
const logNameOfIri = new Iri(log.nameOf);
|
|
2014
|
-
|
|
2015
|
-
if (grouped.has('DEFAULT')) {
|
|
2016
|
-
const { triples } = grouped.get('DEFAULT');
|
|
2017
|
-
pseudoTriplesForUse.push(...foldRdfLists(triples));
|
|
2018
|
-
}
|
|
2019
|
-
|
|
2020
|
-
for (const [k, { gTerm, triples }] of grouped.entries()) {
|
|
2021
|
-
if (k === 'DEFAULT') continue;
|
|
2022
|
-
const folded = foldRdfLists(triples);
|
|
2023
|
-
pseudoTriplesForUse.push({ s: gTerm, p: logNameOfIri, o: new GraphTerm(folded) });
|
|
2024
|
-
}
|
|
2025
|
-
|
|
2026
|
-
const prunedPrefixes = pruneUnusedPrefixes(prefixes, pseudoTriplesForUse);
|
|
2027
|
-
const skolemMap = buildSkolemMapForBnodesThatCrossScopes(pseudoTriplesForUse);
|
|
2028
|
-
const outPrefixes = ensureRdfPrefixIfUsed(
|
|
2029
|
-
ensureXsdPrefixIfUsed(
|
|
2030
|
-
ensureLogPrefixIfUsed(ensureSkolemPrefix(prunedPrefixes, skolemMap), pseudoTriplesForUse),
|
|
2031
|
-
pseudoTriplesForUse,
|
|
2032
|
-
),
|
|
2033
|
-
pseudoTriplesForUse,
|
|
2034
|
-
);
|
|
2035
|
-
const pro = renderPrefixPrologue(outPrefixes).trim();
|
|
2036
|
-
if (pro) blocks.push(pro, '');
|
|
2037
|
-
|
|
2038
|
-
// default graph: emit triples at top-level (no log:nameOf wrapper)
|
|
2039
|
-
if (grouped.has('DEFAULT')) {
|
|
2040
|
-
const { triples } = grouped.get('DEFAULT');
|
|
2041
|
-
const folded = foldRdfLists(triples);
|
|
2042
|
-
for (const tr of folded) {
|
|
2043
|
-
blocks.push(
|
|
2044
|
-
`${termToText(tr.s, outPrefixes, skolemMap)} ${termToText(tr.p, outPrefixes, skolemMap)} ${termToText(tr.o, outPrefixes, skolemMap)} .`,
|
|
2045
|
-
);
|
|
2046
|
-
}
|
|
2047
|
-
blocks.push('');
|
|
2048
|
-
}
|
|
2049
|
-
|
|
2050
|
-
const named = [...grouped.entries()].filter(([k]) => k !== 'DEFAULT');
|
|
2051
|
-
named.sort((a, b) => a[0].localeCompare(b[0]));
|
|
2052
|
-
for (const [, { gTerm, triples }] of named) {
|
|
2053
|
-
blocks.push(`${termToText(gTerm, outPrefixes, skolemMap)} log:nameOf {`);
|
|
2054
|
-
const folded = foldRdfLists(triples);
|
|
2055
|
-
if (folded.length) {
|
|
2056
|
-
blocks.push(
|
|
2057
|
-
folded
|
|
2058
|
-
.map(
|
|
2059
|
-
(tr) =>
|
|
2060
|
-
` ${termToText(tr.s, outPrefixes, skolemMap)} ${termToText(tr.p, outPrefixes, skolemMap)} ${termToText(tr.o, outPrefixes, skolemMap)} .`,
|
|
2061
|
-
)
|
|
2062
|
-
.join('\n'),
|
|
2063
|
-
);
|
|
2064
|
-
}
|
|
2065
|
-
blocks.push('} .', '');
|
|
2066
|
-
}
|
|
2067
|
-
|
|
2068
|
-
return blocks.join('\n').trim() + '\n';
|
|
2069
|
-
}
|
|
2070
|
-
|
|
2071
|
-
// ---------------------------------------------------------------------------
|
|
2072
|
-
// Parsing + N3 output (Turtle/TriG -> N3)
|
|
2073
|
-
// ---------------------------------------------------------------------------
|
|
2074
|
-
|
|
2075
|
-
function parseTriG(text) {
|
|
2076
|
-
const p = new TriGParser(lex(text));
|
|
2077
|
-
return p.parseTrigDocument();
|
|
2078
|
-
}
|
|
2079
|
-
|
|
2080
|
-
function parseTurtle(text) {
|
|
2081
|
-
const p = new TurtleParser(lex(text));
|
|
2082
|
-
return p.parseTurtleDocument();
|
|
2083
|
-
}
|
|
2084
|
-
|
|
2085
|
-
function writeN3Triples({ triples, prefixes }) {
|
|
2086
|
-
const foldedTriples = foldRdfLists(triples);
|
|
2087
|
-
const prunedPrefixes = pruneUnusedPrefixes(prefixes, foldedTriples);
|
|
2088
|
-
const skolemMap = buildSkolemMapForBnodesThatCrossScopes(foldedTriples);
|
|
2089
|
-
const outPrefixes = ensureRdfPrefixIfUsed(
|
|
2090
|
-
ensureXsdPrefixIfUsed(
|
|
2091
|
-
ensureLogPrefixIfUsed(ensureSkolemPrefix(prunedPrefixes, skolemMap), foldedTriples),
|
|
2092
|
-
foldedTriples,
|
|
2093
|
-
),
|
|
2094
|
-
foldedTriples,
|
|
2095
|
-
);
|
|
2096
|
-
const blocks = [];
|
|
2097
|
-
const pro = renderPrefixPrologue(outPrefixes).trim();
|
|
2098
|
-
if (pro) blocks.push(pro, '');
|
|
2099
|
-
for (const tr of foldedTriples) {
|
|
2100
|
-
blocks.push(
|
|
2101
|
-
`${termToText(tr.s, outPrefixes, skolemMap)} ${termToText(tr.p, outPrefixes, skolemMap)} ${termToText(tr.o, outPrefixes, skolemMap)} .`,
|
|
2102
|
-
);
|
|
2103
|
-
}
|
|
2104
|
-
return blocks.join('\n').trim() + '\n';
|
|
2105
|
-
}
|
|
2106
|
-
|
|
2107
|
-
function turtleToN3(ttlText) {
|
|
2108
|
-
// Ensure deterministic per-input Skolem prefix IRI even when used as a library.
|
|
2109
|
-
initSkolemForInput(ttlText);
|
|
2110
|
-
const { triples, prefixes } = parseTurtle(ttlText);
|
|
2111
|
-
return writeN3Triples({ triples, prefixes });
|
|
2112
|
-
}
|
|
2113
|
-
|
|
2114
|
-
function trigToN3(trigText) {
|
|
2115
|
-
// Ensure deterministic per-input Skolem prefix IRI even when used as a library.
|
|
2116
|
-
initSkolemForInput(trigText);
|
|
2117
|
-
const { quads, prefixes } = parseTriG(trigText);
|
|
2118
|
-
return writeN3LogNameOf({ datasetQuads: quads, prefixes });
|
|
2119
|
-
}
|
|
2120
|
-
|
|
2121
|
-
function printHelp() {
|
|
2122
|
-
process.stdout.write(`Usage:
|
|
2123
|
-
n3gen <file.ttl|file.trig>
|
|
2124
|
-
|
|
2125
|
-
Converts RDF 1.2 Turtle (.ttl) or TriG (.trig) to Notation 3 (.n3) and writes to stdout.
|
|
2126
|
-
|
|
2127
|
-
Examples:
|
|
2128
|
-
n3gen file.ttl > file.n3
|
|
2129
|
-
n3gen file.trig > file.n3
|
|
2130
|
-
`);
|
|
2131
|
-
}
|
|
2132
|
-
|
|
2133
|
-
async function main() {
|
|
2134
|
-
const args = process.argv.slice(2);
|
|
2135
|
-
if (args.length === 0 || args.includes('--help') || args.includes('-h')) {
|
|
2136
|
-
printHelp();
|
|
2137
|
-
if (args.length === 0) process.exitCode = 2;
|
|
2138
|
-
return;
|
|
2139
|
-
}
|
|
2140
|
-
if (args.length !== 1) {
|
|
2141
|
-
printHelp();
|
|
2142
|
-
process.exitCode = 2;
|
|
2143
|
-
return;
|
|
2144
|
-
}
|
|
2145
|
-
|
|
2146
|
-
const inputFile = args[0];
|
|
2147
|
-
const ext = path.extname(inputFile).toLowerCase();
|
|
2148
|
-
|
|
2149
|
-
const text = await fs.readFile(inputFile, 'utf8');
|
|
2150
|
-
|
|
2151
|
-
if (ext === '.ttl') {
|
|
2152
|
-
process.stdout.write(turtleToN3(text));
|
|
2153
|
-
return;
|
|
2154
|
-
}
|
|
2155
|
-
if (ext === '.trig') {
|
|
2156
|
-
process.stdout.write(trigToN3(text));
|
|
2157
|
-
return;
|
|
2158
|
-
}
|
|
2159
|
-
|
|
2160
|
-
throw new Error(`Unsupported file extension "${ext}". Use .ttl or .trig`);
|
|
2161
|
-
}
|
|
2162
|
-
|
|
2163
|
-
main().catch((e) => {
|
|
2164
|
-
console.error(e?.stack || String(e));
|
|
2165
|
-
process.exitCode = 1;
|
|
2166
|
-
});
|