eyeling 1.24.3 → 1.24.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HANDBOOK.md +4 -4
- package/dist/browser/eyeling.browser.js +251 -5
- package/eyeling.js +251 -5
- package/lib/lexer.js +251 -5
- package/package.json +2 -3
- package/see/README.md +1 -1
- package/see/examples/input/path_discovery.trig +1 -1
- package/see/see.js +1 -1
- package/test/api.test.js +40 -0
- package/test/package.test.js +1 -1
- package/examples/annotation.n3 +0 -8
- package/examples/arcling/README.md +0 -11
- package/examples/collection.n3 +0 -4
- package/examples/context-association.n3 +0 -33
- package/examples/input/annotation.ttl +0 -6
- package/examples/input/collection.ttl +0 -13
- package/examples/input/context-association.trig +0 -35
- package/examples/input/reifies.ttl +0 -10
- package/examples/input/triple-term.ttl +0 -8
- package/examples/output/annotation.n3 +0 -0
- package/examples/output/collection.n3 +0 -0
- package/examples/output/context-association.n3 +0 -0
- package/examples/output/reifies.n3 +0 -0
- package/examples/output/triple-term.n3 +0 -0
- package/examples/reifies.n3 +0 -8
- package/examples/triple-term.n3 +0 -7
- package/test/n3gen.test.js +0 -166
- package/tools/n3gen.js +0 -2166
package/HANDBOOK.md
CHANGED
|
@@ -239,7 +239,7 @@ Parsing becomes dramatically simpler because tokenization already decided where
|
|
|
239
239
|
|
|
240
240
|
By default, Eyeling parses ordinary N3. Selected RDF/TriG surface syntax is accepted only when RDF compatibility is explicitly enabled with `eyeling -r file.trig`, `eyeling --rdf file.trig`, or API option `{ rdf: true }`. In that mode, the lexer normalizes RDF/TriG input syntax to ordinary N3 graph terms before normal parsing, and the printer emits RDF/TriG-compatible output where feasible. Eyeling remains an N3 reasoner; this is syntax compatibility, not a separate RDF dataset reasoning model.
|
|
241
241
|
|
|
242
|
-
In RDF compatibility mode, RDF 1.2 triple terms written as `<<( s p o )
|
|
242
|
+
In RDF compatibility mode, RDF 1.2 triple terms written as `<<( s p o )>>`, plus the reified triple form `<<s p o ~ r>>`, are normalized to Eyeling's existing singleton quoted-formula term `{ s p o }`. A reifier `r` is preserved as `r rdf:reifies { s p o }`. A leading `VERSION "1.2"` or `@version "1.2"` directive is ignored for the same reason. On output, `--rdf` converts a singleton graph term back to `<<( ... )>>` only when its inner triple is valid as an RDF triple term; otherwise it stays in N3 graph-term form. It also prints `log:nameOf` graph-term triples back as TriG named graph blocks. For example:
|
|
243
243
|
|
|
244
244
|
```n3
|
|
245
245
|
:observation rdf:reifies <<( :sensor :reports :overheating )>> .
|
|
@@ -251,7 +251,7 @@ is treated internally like:
|
|
|
251
251
|
:observation rdf:reifies { :sensor :reports :overheating } .
|
|
252
252
|
```
|
|
253
253
|
|
|
254
|
-
RDF/TriG named graph blocks are normalized
|
|
254
|
+
RDF/TriG named graph blocks are normalized to ordinary N3 graph terms:
|
|
255
255
|
|
|
256
256
|
```trig
|
|
257
257
|
:factoryDataset {
|
|
@@ -2594,7 +2594,7 @@ Quoted graphs/formulas use `{ ... }`. Inside a quoted formula, directive scope m
|
|
|
2594
2594
|
|
|
2595
2595
|
- `@prefix/@base` and `PREFIX/BASE` directives may appear at top level **or inside `{ ... }`**, and apply to the formula they occur in (formula-local scoping).
|
|
2596
2596
|
|
|
2597
|
-
With `-r, --rdf` / `{ rdf: true }`, Eyeling also accepts
|
|
2597
|
+
With `-r, --rdf` / `{ rdf: true }`, Eyeling also accepts RDF 1.2 triple-term surface forms such as `<<( s p o )>>` and `<<s p o ~ r>>` as compatibility spellings for a singleton quoted formula `{ s p o }`. In the same mode, feasible singleton graph terms are printed back as RDF 1.2 triple terms, while invalid cases such as a literal subject remain ordinary N3 graph terms. This is useful for inputs that use `rdf:reifies` or other predicates whose objects are RDF 1.2 triple terms, while keeping the default language and the rest of Eyeling on its N3 formula-term model.
|
|
2598
2598
|
|
|
2599
2599
|
For the formal grammar, see the N3 spec grammar:
|
|
2600
2600
|
|
|
@@ -3883,7 +3883,7 @@ In RDF compatibility mode, Eyeling accepts this surface form by translating the
|
|
|
3883
3883
|
:observation rdf:reifies { :sensor :reports :overheating } .
|
|
3884
3884
|
```
|
|
3885
3885
|
|
|
3886
|
-
The dataset example also uses named graph syntax, which RDF compatibility mode normalizes to `log:nameOf` graph terms
|
|
3886
|
+
The dataset example also uses named graph syntax, which RDF compatibility mode normalizes to `log:nameOf` graph terms:
|
|
3887
3887
|
|
|
3888
3888
|
```trig
|
|
3889
3889
|
:factoryDataset {
|
|
@@ -9588,8 +9588,9 @@ function stripQuotes(lex) {
|
|
|
9588
9588
|
// - RDF 1.2 triple terms <<( s p o )>> become singleton graph terms { s p o }.
|
|
9589
9589
|
// - TriG named graph blocks g { ... } become g log:nameOf { ... } .
|
|
9590
9590
|
// - A top-level default graph block { ... } is unwrapped into ordinary triples.
|
|
9591
|
-
// This
|
|
9591
|
+
// This keeps all downstream parsing/reasoning N3-only.
|
|
9592
9592
|
const LOG_NAME_OF_IRI = '<http://www.w3.org/2000/10/swap/log#nameOf>';
|
|
9593
|
+
const RDF_REIFIES_IRI = '<http://www.w3.org/1999/02/22-rdf-syntax-ns#reifies>';
|
|
9593
9594
|
|
|
9594
9595
|
function normalizeRdfCompatibility(inputText) {
|
|
9595
9596
|
let text = String(inputText ?? '');
|
|
@@ -9598,11 +9599,12 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9598
9599
|
// surface-syntax normalization. Avoid scanning large files character-by-character
|
|
9599
9600
|
// unless they actually contain RDF 1.2 triple terms, VERSION directives, or a
|
|
9600
9601
|
// plausible top-level TriG named graph block.
|
|
9601
|
-
const hasTripleTerms = text.includes('<<
|
|
9602
|
+
const hasTripleTerms = text.includes('<<');
|
|
9602
9603
|
const hasVersionDirective = /^\s*(?:@version|VERSION)\s+(["'])1\.2\1\s*\.?\s*(?:#.*)?$/im.test(text);
|
|
9603
9604
|
const hasNamedGraphCandidate = /(?:^|[.\r\n])\s*(?:GRAPH\s+)?(?:<[^>\r\n]*>|_:[A-Za-z][A-Za-z0-9_-]*|[A-Za-z][A-Za-z0-9_-]*:[^\s{};,.()[\]]*)\s*\{/m.test(text);
|
|
9605
|
+
const hasAnnotationSyntax = /(?:^|\s)~\s*(?:<|_:[A-Za-z]|[A-Za-z][A-Za-z0-9_-]*:|\{\|)|\{\|/.test(text);
|
|
9604
9606
|
|
|
9605
|
-
if (!hasTripleTerms && !hasVersionDirective && !hasNamedGraphCandidate) return text;
|
|
9607
|
+
if (!hasTripleTerms && !hasVersionDirective && !hasNamedGraphCandidate && !hasAnnotationSyntax) return text;
|
|
9606
9608
|
|
|
9607
9609
|
function isWordChar(ch) {
|
|
9608
9610
|
return ch != null && /[A-Za-z0-9_:-]/.test(ch);
|
|
@@ -9664,11 +9666,67 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9664
9666
|
|
|
9665
9667
|
function convertTripleTerms(s) {
|
|
9666
9668
|
let i = 0;
|
|
9669
|
+
const reifierTriples = [];
|
|
9667
9670
|
|
|
9668
9671
|
function startsAt(needle, at = i) {
|
|
9669
9672
|
return s.startsWith(needle, at);
|
|
9670
9673
|
}
|
|
9671
9674
|
|
|
9675
|
+
function splitTopLevelReifier(body) {
|
|
9676
|
+
let depthBrace = 0;
|
|
9677
|
+
let depthBracket = 0;
|
|
9678
|
+
let depthParen = 0;
|
|
9679
|
+
for (let j = 0; j < body.length; j++) {
|
|
9680
|
+
const ch = body[j];
|
|
9681
|
+
if (ch === '"' || ch === "'") {
|
|
9682
|
+
const str = readStringAt(body, j);
|
|
9683
|
+
j = str.end - 1;
|
|
9684
|
+
continue;
|
|
9685
|
+
}
|
|
9686
|
+
if (ch === '<') {
|
|
9687
|
+
const iri = readIriAt(body, j);
|
|
9688
|
+
j = iri.end - 1;
|
|
9689
|
+
continue;
|
|
9690
|
+
}
|
|
9691
|
+
if (ch === '#') {
|
|
9692
|
+
while (j < body.length && body[j] !== '\n' && body[j] !== '\r') j += 1;
|
|
9693
|
+
continue;
|
|
9694
|
+
}
|
|
9695
|
+
if (ch === '{') depthBrace += 1;
|
|
9696
|
+
else if (ch === '}' && depthBrace > 0) depthBrace -= 1;
|
|
9697
|
+
else if (ch === '[') depthBracket += 1;
|
|
9698
|
+
else if (ch === ']' && depthBracket > 0) depthBracket -= 1;
|
|
9699
|
+
else if (ch === '(') depthParen += 1;
|
|
9700
|
+
else if (ch === ')' && depthParen > 0) depthParen -= 1;
|
|
9701
|
+
else if (ch === '~' && depthBrace === 0 && depthBracket === 0 && depthParen === 0) {
|
|
9702
|
+
return { triple: body.slice(0, j).trim(), reifier: body.slice(j + 1).trim() };
|
|
9703
|
+
}
|
|
9704
|
+
}
|
|
9705
|
+
return { triple: body.trim(), reifier: '' };
|
|
9706
|
+
}
|
|
9707
|
+
|
|
9708
|
+
function firstTerm(text) {
|
|
9709
|
+
const at = skipWsAndComments(text, 0);
|
|
9710
|
+
if (at >= text.length) return '';
|
|
9711
|
+
if (text[at] === '<') return readIriAt(text, at).text;
|
|
9712
|
+
let j = at;
|
|
9713
|
+
while (j < text.length && !/\s/.test(text[j]) && !'{}[](),;.'.includes(text[j])) j += 1;
|
|
9714
|
+
return text.slice(at, j);
|
|
9715
|
+
}
|
|
9716
|
+
|
|
9717
|
+
function graphTermFromTripleBody(rawBody, parenthesized) {
|
|
9718
|
+
let body = rawBody.trim();
|
|
9719
|
+
if (parenthesized && body.startsWith('(') && body.endsWith(')')) body = body.slice(1, -1).trim();
|
|
9720
|
+
const split = splitTopLevelReifier(body);
|
|
9721
|
+
const triple = split.triple;
|
|
9722
|
+
const graph = '{ ' + triple + ' }';
|
|
9723
|
+
if (split.reifier) {
|
|
9724
|
+
const reifier = firstTerm(split.reifier);
|
|
9725
|
+
if (reifier) reifierTriples.push(`${reifier} ${RDF_REIFIES_IRI} ${graph} .`);
|
|
9726
|
+
}
|
|
9727
|
+
return graph;
|
|
9728
|
+
}
|
|
9729
|
+
|
|
9672
9730
|
function convertUntil(stopToken) {
|
|
9673
9731
|
let out = '';
|
|
9674
9732
|
while (i < s.length) {
|
|
@@ -9678,7 +9736,12 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9678
9736
|
}
|
|
9679
9737
|
if (startsAt('<<(')) {
|
|
9680
9738
|
i += 3;
|
|
9681
|
-
out +=
|
|
9739
|
+
out += graphTermFromTripleBody(convertUntil(')>>'), false);
|
|
9740
|
+
continue;
|
|
9741
|
+
}
|
|
9742
|
+
if (startsAt('<<')) {
|
|
9743
|
+
i += 2;
|
|
9744
|
+
out += graphTermFromTripleBody(convertUntil('>>'), false);
|
|
9682
9745
|
continue;
|
|
9683
9746
|
}
|
|
9684
9747
|
const ch = s[i];
|
|
@@ -9709,7 +9772,189 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9709
9772
|
return out;
|
|
9710
9773
|
}
|
|
9711
9774
|
|
|
9712
|
-
|
|
9775
|
+
const converted = convertUntil(null);
|
|
9776
|
+
if (reifierTriples.length === 0) return converted;
|
|
9777
|
+
return converted + (converted.endsWith('\n') ? '' : '\n') + reifierTriples.join('\n') + '\n';
|
|
9778
|
+
}
|
|
9779
|
+
|
|
9780
|
+
|
|
9781
|
+
function convertAnnotations(s) {
|
|
9782
|
+
let out = '';
|
|
9783
|
+
let i = 0;
|
|
9784
|
+
let statementStart = true;
|
|
9785
|
+
let generatedBlank = 0;
|
|
9786
|
+
|
|
9787
|
+
function readBalancedDelimited(s, at, open, close) {
|
|
9788
|
+
if (!s.startsWith(open, at)) return null;
|
|
9789
|
+
let j = at + open.length;
|
|
9790
|
+
let depth = 1;
|
|
9791
|
+
while (j < s.length) {
|
|
9792
|
+
const ch = s[j];
|
|
9793
|
+
if (ch === '"' || ch === "'") {
|
|
9794
|
+
j = readStringAt(s, j).end;
|
|
9795
|
+
continue;
|
|
9796
|
+
}
|
|
9797
|
+
if (ch === '<' && !s.startsWith('<<', j)) {
|
|
9798
|
+
j = readIriAt(s, j).end;
|
|
9799
|
+
continue;
|
|
9800
|
+
}
|
|
9801
|
+
if (ch === '#') {
|
|
9802
|
+
while (j < s.length && s[j] !== '\n' && s[j] !== '\r') j += 1;
|
|
9803
|
+
continue;
|
|
9804
|
+
}
|
|
9805
|
+
if (s.startsWith(open, j)) {
|
|
9806
|
+
depth += 1;
|
|
9807
|
+
j += open.length;
|
|
9808
|
+
continue;
|
|
9809
|
+
}
|
|
9810
|
+
if (s.startsWith(close, j)) {
|
|
9811
|
+
depth -= 1;
|
|
9812
|
+
j += close.length;
|
|
9813
|
+
if (depth === 0) return { text: s.slice(at, j), inner: s.slice(at + open.length, j - close.length), end: j };
|
|
9814
|
+
continue;
|
|
9815
|
+
}
|
|
9816
|
+
j += 1;
|
|
9817
|
+
}
|
|
9818
|
+
throw new N3SyntaxError(`Unterminated RDF annotation block, expected ${close}`);
|
|
9819
|
+
}
|
|
9820
|
+
|
|
9821
|
+
function readTermLikeAt(s, at) {
|
|
9822
|
+
const j = skipWsAndComments(s, at);
|
|
9823
|
+
if (j >= s.length) return null;
|
|
9824
|
+
if (s[j] === '<') return readIriAt(s, j);
|
|
9825
|
+
if (s[j] === '"' || s[j] === "'") {
|
|
9826
|
+
const str = readStringAt(s, j);
|
|
9827
|
+
let end = str.end;
|
|
9828
|
+
let text = str.text;
|
|
9829
|
+
if (s.startsWith('^^', end)) {
|
|
9830
|
+
const dt = readTermAt(s, end + 2);
|
|
9831
|
+
if (dt) {
|
|
9832
|
+
text += '^^' + dt.text;
|
|
9833
|
+
end = dt.end;
|
|
9834
|
+
}
|
|
9835
|
+
} else if (s[end] === '@') {
|
|
9836
|
+
let k = end + 1;
|
|
9837
|
+
if (/[A-Za-z]/.test(s[k] || '')) {
|
|
9838
|
+
while (k < s.length && /[A-Za-z0-9-]/.test(s[k])) k += 1;
|
|
9839
|
+
text += s.slice(end, k);
|
|
9840
|
+
end = k;
|
|
9841
|
+
}
|
|
9842
|
+
}
|
|
9843
|
+
return { text, end };
|
|
9844
|
+
}
|
|
9845
|
+
if (s[j] === '{') return readBalancedBlock(s, j);
|
|
9846
|
+
if (s[j] === '[') return readBalancedDelimited(s, j, '[', ']');
|
|
9847
|
+
if (s[j] === '(') return readBalancedDelimited(s, j, '(', ')');
|
|
9848
|
+
return readTermAt(s, j);
|
|
9849
|
+
}
|
|
9850
|
+
|
|
9851
|
+
function readAnnotationBlockAt(s, at) {
|
|
9852
|
+
if (!s.startsWith('{|', at)) return null;
|
|
9853
|
+
return readBalancedDelimited(s, at, '{|', '|}');
|
|
9854
|
+
}
|
|
9855
|
+
|
|
9856
|
+
function tryReadAnnotatedTriple(at) {
|
|
9857
|
+
const start = skipWsAndComments(s, at);
|
|
9858
|
+
if (start >= s.length) return null;
|
|
9859
|
+
if (s[start] === '@') return null;
|
|
9860
|
+
if (startsWordAt(s, 'PREFIX', start) || startsWordAt(s, 'BASE', start) || startsWordAt(s, 'VERSION', start)) return null;
|
|
9861
|
+
if (startsWordAt(s, 'GRAPH', start)) return null;
|
|
9862
|
+
|
|
9863
|
+
const subj = readTermLikeAt(s, start);
|
|
9864
|
+
if (!subj) return null;
|
|
9865
|
+
let j = skipWsAndComments(s, subj.end);
|
|
9866
|
+
const pred = readTermLikeAt(s, j);
|
|
9867
|
+
if (!pred) return null;
|
|
9868
|
+
j = skipWsAndComments(s, pred.end);
|
|
9869
|
+
const obj = readTermLikeAt(s, j);
|
|
9870
|
+
if (!obj) return null;
|
|
9871
|
+
j = skipWsAndComments(s, obj.end);
|
|
9872
|
+
if (s[j] !== '~' && !s.startsWith('{|', j)) return null;
|
|
9873
|
+
|
|
9874
|
+
let reifier = '';
|
|
9875
|
+
const annotationBlocks = [];
|
|
9876
|
+
while (j < s.length) {
|
|
9877
|
+
j = skipWsAndComments(s, j);
|
|
9878
|
+
if (s[j] === '~') {
|
|
9879
|
+
j += 1;
|
|
9880
|
+
j = skipWsAndComments(s, j);
|
|
9881
|
+
const term = readTermAt(s, j);
|
|
9882
|
+
if (term) {
|
|
9883
|
+
reifier = term.text;
|
|
9884
|
+
j = term.end;
|
|
9885
|
+
} else if (!reifier) {
|
|
9886
|
+
reifier = `_:rdfAnnotation${++generatedBlank}`;
|
|
9887
|
+
}
|
|
9888
|
+
continue;
|
|
9889
|
+
}
|
|
9890
|
+
if (s.startsWith('{|', j)) {
|
|
9891
|
+
const block = readAnnotationBlockAt(s, j);
|
|
9892
|
+
if (!reifier) reifier = `_:rdfAnnotation${++generatedBlank}`;
|
|
9893
|
+
annotationBlocks.push(block.inner.trim());
|
|
9894
|
+
j = block.end;
|
|
9895
|
+
continue;
|
|
9896
|
+
}
|
|
9897
|
+
break;
|
|
9898
|
+
}
|
|
9899
|
+
|
|
9900
|
+
const after = skipWsAndComments(s, j);
|
|
9901
|
+
if (s[after] !== '.') return null;
|
|
9902
|
+
if (!reifier && annotationBlocks.length === 0) return null;
|
|
9903
|
+
|
|
9904
|
+
const baseTriple = `${subj.text} ${pred.text} ${obj.text}`;
|
|
9905
|
+
const graph = `{ ${baseTriple} }`;
|
|
9906
|
+
const extra = [];
|
|
9907
|
+
if (reifier) extra.push(`${reifier} ${RDF_REIFIES_IRI} ${graph} .`);
|
|
9908
|
+
for (const inner of annotationBlocks) {
|
|
9909
|
+
if (inner) extra.push(`${reifier} ${inner} .`);
|
|
9910
|
+
}
|
|
9911
|
+
return {
|
|
9912
|
+
start,
|
|
9913
|
+
end: after + 1,
|
|
9914
|
+
text: `${baseTriple} .${extra.length ? '\n' + extra.join('\n') : ''}`,
|
|
9915
|
+
};
|
|
9916
|
+
}
|
|
9917
|
+
|
|
9918
|
+
while (i < s.length) {
|
|
9919
|
+
if (statementStart) {
|
|
9920
|
+
const converted = tryReadAnnotatedTriple(i);
|
|
9921
|
+
if (converted) {
|
|
9922
|
+
out += s.slice(i, converted.start) + converted.text;
|
|
9923
|
+
i = converted.end;
|
|
9924
|
+
statementStart = true;
|
|
9925
|
+
continue;
|
|
9926
|
+
}
|
|
9927
|
+
}
|
|
9928
|
+
|
|
9929
|
+
const ch = s[i];
|
|
9930
|
+
if (ch === '"' || ch === "'") {
|
|
9931
|
+
const str = readStringAt(s, i);
|
|
9932
|
+
out += str.text;
|
|
9933
|
+
i = str.end;
|
|
9934
|
+
continue;
|
|
9935
|
+
}
|
|
9936
|
+
if (ch === '<' && !s.startsWith('<<', i)) {
|
|
9937
|
+
const iri = readIriAt(s, i);
|
|
9938
|
+
out += iri.text;
|
|
9939
|
+
i = iri.end;
|
|
9940
|
+
continue;
|
|
9941
|
+
}
|
|
9942
|
+
if (ch === '#') {
|
|
9943
|
+
while (i < s.length) {
|
|
9944
|
+
const c = s[i++];
|
|
9945
|
+
out += c;
|
|
9946
|
+
if (c === '\n' || c === '\r') break;
|
|
9947
|
+
}
|
|
9948
|
+
statementStart = true;
|
|
9949
|
+
continue;
|
|
9950
|
+
}
|
|
9951
|
+
out += ch;
|
|
9952
|
+
if (ch === '.' || ch === '{' || ch === '}' || ch === '\n' || ch === '\r') statementStart = true;
|
|
9953
|
+
else if (!/\s/.test(ch)) statementStart = false;
|
|
9954
|
+
i += 1;
|
|
9955
|
+
}
|
|
9956
|
+
|
|
9957
|
+
return out;
|
|
9713
9958
|
}
|
|
9714
9959
|
|
|
9715
9960
|
function stripVersionDirectives(s) {
|
|
@@ -9868,6 +10113,7 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9868
10113
|
}
|
|
9869
10114
|
|
|
9870
10115
|
if (hasTripleTerms) text = convertTripleTerms(text);
|
|
10116
|
+
if (hasAnnotationSyntax) text = convertAnnotations(text);
|
|
9871
10117
|
if (hasVersionDirective) text = stripVersionDirectives(text);
|
|
9872
10118
|
if (hasVersionDirective || hasNamedGraphCandidate) text = normalizeNamedGraphs(text);
|
|
9873
10119
|
return text;
|
package/eyeling.js
CHANGED
|
@@ -9588,8 +9588,9 @@ function stripQuotes(lex) {
|
|
|
9588
9588
|
// - RDF 1.2 triple terms <<( s p o )>> become singleton graph terms { s p o }.
|
|
9589
9589
|
// - TriG named graph blocks g { ... } become g log:nameOf { ... } .
|
|
9590
9590
|
// - A top-level default graph block { ... } is unwrapped into ordinary triples.
|
|
9591
|
-
// This
|
|
9591
|
+
// This keeps all downstream parsing/reasoning N3-only.
|
|
9592
9592
|
const LOG_NAME_OF_IRI = '<http://www.w3.org/2000/10/swap/log#nameOf>';
|
|
9593
|
+
const RDF_REIFIES_IRI = '<http://www.w3.org/1999/02/22-rdf-syntax-ns#reifies>';
|
|
9593
9594
|
|
|
9594
9595
|
function normalizeRdfCompatibility(inputText) {
|
|
9595
9596
|
let text = String(inputText ?? '');
|
|
@@ -9598,11 +9599,12 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9598
9599
|
// surface-syntax normalization. Avoid scanning large files character-by-character
|
|
9599
9600
|
// unless they actually contain RDF 1.2 triple terms, VERSION directives, or a
|
|
9600
9601
|
// plausible top-level TriG named graph block.
|
|
9601
|
-
const hasTripleTerms = text.includes('<<
|
|
9602
|
+
const hasTripleTerms = text.includes('<<');
|
|
9602
9603
|
const hasVersionDirective = /^\s*(?:@version|VERSION)\s+(["'])1\.2\1\s*\.?\s*(?:#.*)?$/im.test(text);
|
|
9603
9604
|
const hasNamedGraphCandidate = /(?:^|[.\r\n])\s*(?:GRAPH\s+)?(?:<[^>\r\n]*>|_:[A-Za-z][A-Za-z0-9_-]*|[A-Za-z][A-Za-z0-9_-]*:[^\s{};,.()[\]]*)\s*\{/m.test(text);
|
|
9605
|
+
const hasAnnotationSyntax = /(?:^|\s)~\s*(?:<|_:[A-Za-z]|[A-Za-z][A-Za-z0-9_-]*:|\{\|)|\{\|/.test(text);
|
|
9604
9606
|
|
|
9605
|
-
if (!hasTripleTerms && !hasVersionDirective && !hasNamedGraphCandidate) return text;
|
|
9607
|
+
if (!hasTripleTerms && !hasVersionDirective && !hasNamedGraphCandidate && !hasAnnotationSyntax) return text;
|
|
9606
9608
|
|
|
9607
9609
|
function isWordChar(ch) {
|
|
9608
9610
|
return ch != null && /[A-Za-z0-9_:-]/.test(ch);
|
|
@@ -9664,11 +9666,67 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9664
9666
|
|
|
9665
9667
|
function convertTripleTerms(s) {
|
|
9666
9668
|
let i = 0;
|
|
9669
|
+
const reifierTriples = [];
|
|
9667
9670
|
|
|
9668
9671
|
function startsAt(needle, at = i) {
|
|
9669
9672
|
return s.startsWith(needle, at);
|
|
9670
9673
|
}
|
|
9671
9674
|
|
|
9675
|
+
function splitTopLevelReifier(body) {
|
|
9676
|
+
let depthBrace = 0;
|
|
9677
|
+
let depthBracket = 0;
|
|
9678
|
+
let depthParen = 0;
|
|
9679
|
+
for (let j = 0; j < body.length; j++) {
|
|
9680
|
+
const ch = body[j];
|
|
9681
|
+
if (ch === '"' || ch === "'") {
|
|
9682
|
+
const str = readStringAt(body, j);
|
|
9683
|
+
j = str.end - 1;
|
|
9684
|
+
continue;
|
|
9685
|
+
}
|
|
9686
|
+
if (ch === '<') {
|
|
9687
|
+
const iri = readIriAt(body, j);
|
|
9688
|
+
j = iri.end - 1;
|
|
9689
|
+
continue;
|
|
9690
|
+
}
|
|
9691
|
+
if (ch === '#') {
|
|
9692
|
+
while (j < body.length && body[j] !== '\n' && body[j] !== '\r') j += 1;
|
|
9693
|
+
continue;
|
|
9694
|
+
}
|
|
9695
|
+
if (ch === '{') depthBrace += 1;
|
|
9696
|
+
else if (ch === '}' && depthBrace > 0) depthBrace -= 1;
|
|
9697
|
+
else if (ch === '[') depthBracket += 1;
|
|
9698
|
+
else if (ch === ']' && depthBracket > 0) depthBracket -= 1;
|
|
9699
|
+
else if (ch === '(') depthParen += 1;
|
|
9700
|
+
else if (ch === ')' && depthParen > 0) depthParen -= 1;
|
|
9701
|
+
else if (ch === '~' && depthBrace === 0 && depthBracket === 0 && depthParen === 0) {
|
|
9702
|
+
return { triple: body.slice(0, j).trim(), reifier: body.slice(j + 1).trim() };
|
|
9703
|
+
}
|
|
9704
|
+
}
|
|
9705
|
+
return { triple: body.trim(), reifier: '' };
|
|
9706
|
+
}
|
|
9707
|
+
|
|
9708
|
+
function firstTerm(text) {
|
|
9709
|
+
const at = skipWsAndComments(text, 0);
|
|
9710
|
+
if (at >= text.length) return '';
|
|
9711
|
+
if (text[at] === '<') return readIriAt(text, at).text;
|
|
9712
|
+
let j = at;
|
|
9713
|
+
while (j < text.length && !/\s/.test(text[j]) && !'{}[](),;.'.includes(text[j])) j += 1;
|
|
9714
|
+
return text.slice(at, j);
|
|
9715
|
+
}
|
|
9716
|
+
|
|
9717
|
+
function graphTermFromTripleBody(rawBody, parenthesized) {
|
|
9718
|
+
let body = rawBody.trim();
|
|
9719
|
+
if (parenthesized && body.startsWith('(') && body.endsWith(')')) body = body.slice(1, -1).trim();
|
|
9720
|
+
const split = splitTopLevelReifier(body);
|
|
9721
|
+
const triple = split.triple;
|
|
9722
|
+
const graph = '{ ' + triple + ' }';
|
|
9723
|
+
if (split.reifier) {
|
|
9724
|
+
const reifier = firstTerm(split.reifier);
|
|
9725
|
+
if (reifier) reifierTriples.push(`${reifier} ${RDF_REIFIES_IRI} ${graph} .`);
|
|
9726
|
+
}
|
|
9727
|
+
return graph;
|
|
9728
|
+
}
|
|
9729
|
+
|
|
9672
9730
|
function convertUntil(stopToken) {
|
|
9673
9731
|
let out = '';
|
|
9674
9732
|
while (i < s.length) {
|
|
@@ -9678,7 +9736,12 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9678
9736
|
}
|
|
9679
9737
|
if (startsAt('<<(')) {
|
|
9680
9738
|
i += 3;
|
|
9681
|
-
out +=
|
|
9739
|
+
out += graphTermFromTripleBody(convertUntil(')>>'), false);
|
|
9740
|
+
continue;
|
|
9741
|
+
}
|
|
9742
|
+
if (startsAt('<<')) {
|
|
9743
|
+
i += 2;
|
|
9744
|
+
out += graphTermFromTripleBody(convertUntil('>>'), false);
|
|
9682
9745
|
continue;
|
|
9683
9746
|
}
|
|
9684
9747
|
const ch = s[i];
|
|
@@ -9709,7 +9772,189 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9709
9772
|
return out;
|
|
9710
9773
|
}
|
|
9711
9774
|
|
|
9712
|
-
|
|
9775
|
+
const converted = convertUntil(null);
|
|
9776
|
+
if (reifierTriples.length === 0) return converted;
|
|
9777
|
+
return converted + (converted.endsWith('\n') ? '' : '\n') + reifierTriples.join('\n') + '\n';
|
|
9778
|
+
}
|
|
9779
|
+
|
|
9780
|
+
|
|
9781
|
+
function convertAnnotations(s) {
|
|
9782
|
+
let out = '';
|
|
9783
|
+
let i = 0;
|
|
9784
|
+
let statementStart = true;
|
|
9785
|
+
let generatedBlank = 0;
|
|
9786
|
+
|
|
9787
|
+
function readBalancedDelimited(s, at, open, close) {
|
|
9788
|
+
if (!s.startsWith(open, at)) return null;
|
|
9789
|
+
let j = at + open.length;
|
|
9790
|
+
let depth = 1;
|
|
9791
|
+
while (j < s.length) {
|
|
9792
|
+
const ch = s[j];
|
|
9793
|
+
if (ch === '"' || ch === "'") {
|
|
9794
|
+
j = readStringAt(s, j).end;
|
|
9795
|
+
continue;
|
|
9796
|
+
}
|
|
9797
|
+
if (ch === '<' && !s.startsWith('<<', j)) {
|
|
9798
|
+
j = readIriAt(s, j).end;
|
|
9799
|
+
continue;
|
|
9800
|
+
}
|
|
9801
|
+
if (ch === '#') {
|
|
9802
|
+
while (j < s.length && s[j] !== '\n' && s[j] !== '\r') j += 1;
|
|
9803
|
+
continue;
|
|
9804
|
+
}
|
|
9805
|
+
if (s.startsWith(open, j)) {
|
|
9806
|
+
depth += 1;
|
|
9807
|
+
j += open.length;
|
|
9808
|
+
continue;
|
|
9809
|
+
}
|
|
9810
|
+
if (s.startsWith(close, j)) {
|
|
9811
|
+
depth -= 1;
|
|
9812
|
+
j += close.length;
|
|
9813
|
+
if (depth === 0) return { text: s.slice(at, j), inner: s.slice(at + open.length, j - close.length), end: j };
|
|
9814
|
+
continue;
|
|
9815
|
+
}
|
|
9816
|
+
j += 1;
|
|
9817
|
+
}
|
|
9818
|
+
throw new N3SyntaxError(`Unterminated RDF annotation block, expected ${close}`);
|
|
9819
|
+
}
|
|
9820
|
+
|
|
9821
|
+
function readTermLikeAt(s, at) {
|
|
9822
|
+
const j = skipWsAndComments(s, at);
|
|
9823
|
+
if (j >= s.length) return null;
|
|
9824
|
+
if (s[j] === '<') return readIriAt(s, j);
|
|
9825
|
+
if (s[j] === '"' || s[j] === "'") {
|
|
9826
|
+
const str = readStringAt(s, j);
|
|
9827
|
+
let end = str.end;
|
|
9828
|
+
let text = str.text;
|
|
9829
|
+
if (s.startsWith('^^', end)) {
|
|
9830
|
+
const dt = readTermAt(s, end + 2);
|
|
9831
|
+
if (dt) {
|
|
9832
|
+
text += '^^' + dt.text;
|
|
9833
|
+
end = dt.end;
|
|
9834
|
+
}
|
|
9835
|
+
} else if (s[end] === '@') {
|
|
9836
|
+
let k = end + 1;
|
|
9837
|
+
if (/[A-Za-z]/.test(s[k] || '')) {
|
|
9838
|
+
while (k < s.length && /[A-Za-z0-9-]/.test(s[k])) k += 1;
|
|
9839
|
+
text += s.slice(end, k);
|
|
9840
|
+
end = k;
|
|
9841
|
+
}
|
|
9842
|
+
}
|
|
9843
|
+
return { text, end };
|
|
9844
|
+
}
|
|
9845
|
+
if (s[j] === '{') return readBalancedBlock(s, j);
|
|
9846
|
+
if (s[j] === '[') return readBalancedDelimited(s, j, '[', ']');
|
|
9847
|
+
if (s[j] === '(') return readBalancedDelimited(s, j, '(', ')');
|
|
9848
|
+
return readTermAt(s, j);
|
|
9849
|
+
}
|
|
9850
|
+
|
|
9851
|
+
function readAnnotationBlockAt(s, at) {
|
|
9852
|
+
if (!s.startsWith('{|', at)) return null;
|
|
9853
|
+
return readBalancedDelimited(s, at, '{|', '|}');
|
|
9854
|
+
}
|
|
9855
|
+
|
|
9856
|
+
function tryReadAnnotatedTriple(at) {
|
|
9857
|
+
const start = skipWsAndComments(s, at);
|
|
9858
|
+
if (start >= s.length) return null;
|
|
9859
|
+
if (s[start] === '@') return null;
|
|
9860
|
+
if (startsWordAt(s, 'PREFIX', start) || startsWordAt(s, 'BASE', start) || startsWordAt(s, 'VERSION', start)) return null;
|
|
9861
|
+
if (startsWordAt(s, 'GRAPH', start)) return null;
|
|
9862
|
+
|
|
9863
|
+
const subj = readTermLikeAt(s, start);
|
|
9864
|
+
if (!subj) return null;
|
|
9865
|
+
let j = skipWsAndComments(s, subj.end);
|
|
9866
|
+
const pred = readTermLikeAt(s, j);
|
|
9867
|
+
if (!pred) return null;
|
|
9868
|
+
j = skipWsAndComments(s, pred.end);
|
|
9869
|
+
const obj = readTermLikeAt(s, j);
|
|
9870
|
+
if (!obj) return null;
|
|
9871
|
+
j = skipWsAndComments(s, obj.end);
|
|
9872
|
+
if (s[j] !== '~' && !s.startsWith('{|', j)) return null;
|
|
9873
|
+
|
|
9874
|
+
let reifier = '';
|
|
9875
|
+
const annotationBlocks = [];
|
|
9876
|
+
while (j < s.length) {
|
|
9877
|
+
j = skipWsAndComments(s, j);
|
|
9878
|
+
if (s[j] === '~') {
|
|
9879
|
+
j += 1;
|
|
9880
|
+
j = skipWsAndComments(s, j);
|
|
9881
|
+
const term = readTermAt(s, j);
|
|
9882
|
+
if (term) {
|
|
9883
|
+
reifier = term.text;
|
|
9884
|
+
j = term.end;
|
|
9885
|
+
} else if (!reifier) {
|
|
9886
|
+
reifier = `_:rdfAnnotation${++generatedBlank}`;
|
|
9887
|
+
}
|
|
9888
|
+
continue;
|
|
9889
|
+
}
|
|
9890
|
+
if (s.startsWith('{|', j)) {
|
|
9891
|
+
const block = readAnnotationBlockAt(s, j);
|
|
9892
|
+
if (!reifier) reifier = `_:rdfAnnotation${++generatedBlank}`;
|
|
9893
|
+
annotationBlocks.push(block.inner.trim());
|
|
9894
|
+
j = block.end;
|
|
9895
|
+
continue;
|
|
9896
|
+
}
|
|
9897
|
+
break;
|
|
9898
|
+
}
|
|
9899
|
+
|
|
9900
|
+
const after = skipWsAndComments(s, j);
|
|
9901
|
+
if (s[after] !== '.') return null;
|
|
9902
|
+
if (!reifier && annotationBlocks.length === 0) return null;
|
|
9903
|
+
|
|
9904
|
+
const baseTriple = `${subj.text} ${pred.text} ${obj.text}`;
|
|
9905
|
+
const graph = `{ ${baseTriple} }`;
|
|
9906
|
+
const extra = [];
|
|
9907
|
+
if (reifier) extra.push(`${reifier} ${RDF_REIFIES_IRI} ${graph} .`);
|
|
9908
|
+
for (const inner of annotationBlocks) {
|
|
9909
|
+
if (inner) extra.push(`${reifier} ${inner} .`);
|
|
9910
|
+
}
|
|
9911
|
+
return {
|
|
9912
|
+
start,
|
|
9913
|
+
end: after + 1,
|
|
9914
|
+
text: `${baseTriple} .${extra.length ? '\n' + extra.join('\n') : ''}`,
|
|
9915
|
+
};
|
|
9916
|
+
}
|
|
9917
|
+
|
|
9918
|
+
while (i < s.length) {
|
|
9919
|
+
if (statementStart) {
|
|
9920
|
+
const converted = tryReadAnnotatedTriple(i);
|
|
9921
|
+
if (converted) {
|
|
9922
|
+
out += s.slice(i, converted.start) + converted.text;
|
|
9923
|
+
i = converted.end;
|
|
9924
|
+
statementStart = true;
|
|
9925
|
+
continue;
|
|
9926
|
+
}
|
|
9927
|
+
}
|
|
9928
|
+
|
|
9929
|
+
const ch = s[i];
|
|
9930
|
+
if (ch === '"' || ch === "'") {
|
|
9931
|
+
const str = readStringAt(s, i);
|
|
9932
|
+
out += str.text;
|
|
9933
|
+
i = str.end;
|
|
9934
|
+
continue;
|
|
9935
|
+
}
|
|
9936
|
+
if (ch === '<' && !s.startsWith('<<', i)) {
|
|
9937
|
+
const iri = readIriAt(s, i);
|
|
9938
|
+
out += iri.text;
|
|
9939
|
+
i = iri.end;
|
|
9940
|
+
continue;
|
|
9941
|
+
}
|
|
9942
|
+
if (ch === '#') {
|
|
9943
|
+
while (i < s.length) {
|
|
9944
|
+
const c = s[i++];
|
|
9945
|
+
out += c;
|
|
9946
|
+
if (c === '\n' || c === '\r') break;
|
|
9947
|
+
}
|
|
9948
|
+
statementStart = true;
|
|
9949
|
+
continue;
|
|
9950
|
+
}
|
|
9951
|
+
out += ch;
|
|
9952
|
+
if (ch === '.' || ch === '{' || ch === '}' || ch === '\n' || ch === '\r') statementStart = true;
|
|
9953
|
+
else if (!/\s/.test(ch)) statementStart = false;
|
|
9954
|
+
i += 1;
|
|
9955
|
+
}
|
|
9956
|
+
|
|
9957
|
+
return out;
|
|
9713
9958
|
}
|
|
9714
9959
|
|
|
9715
9960
|
function stripVersionDirectives(s) {
|
|
@@ -9868,6 +10113,7 @@ function normalizeRdfCompatibility(inputText) {
|
|
|
9868
10113
|
}
|
|
9869
10114
|
|
|
9870
10115
|
if (hasTripleTerms) text = convertTripleTerms(text);
|
|
10116
|
+
if (hasAnnotationSyntax) text = convertAnnotations(text);
|
|
9871
10117
|
if (hasVersionDirective) text = stripVersionDirectives(text);
|
|
9872
10118
|
if (hasVersionDirective || hasNamedGraphCandidate) text = normalizeNamedGraphs(text);
|
|
9873
10119
|
return text;
|