rdflib 2.3.0-ab7e9999 → 2.3.0-d60f1a34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -0
- package/dist/rdflib.min.js +1 -1
- package/dist/rdflib.min.js.map +1 -1
- package/esm/n3parser.js +20 -11
- package/esm/serialize.js +4 -2
- package/esm/serializer.js +40 -9
- package/lib/n3parser.js +20 -11
- package/lib/serialize.js +4 -2
- package/lib/serializer.d.ts +7 -0
- package/lib/serializer.js +40 -9
- package/package.json +1 -1
- package/src/n3parser.js +20 -11
- package/src/serialize.ts +4 -2
- package/src/serializer.js +40 -9
package/esm/n3parser.js
CHANGED
|
@@ -183,9 +183,19 @@ var ws = new RegExp("^[ \\t]*", 'g');
|
|
|
183
183
|
var signed_integer = new RegExp("^[-+]?[0-9]+", 'g');
|
|
184
184
|
var number_syntax = new RegExp("^([-+]?[0-9]+)(\\.[0-9]+)?([eE][-+]?[0-9]+)?", 'g');
|
|
185
185
|
var datetime_syntax = new RegExp('^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9](T[0-9][0-9]:[0-9][0-9](:[0-9][0-9](\\.[0-9]*)?)?)?Z?');
|
|
186
|
+
|
|
187
|
+
// Reused in tight loops to detect whitespace or comment after a dot
|
|
188
|
+
var wsOrHash = new RegExp("[\\s#]");
|
|
186
189
|
var digitstring = new RegExp("^[0-9]+", 'g');
|
|
187
190
|
var interesting = new RegExp("[\\\\\\r\\n\\\"]", 'g');
|
|
188
191
|
var langcode = new RegExp("^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*", 'g');
|
|
192
|
+
|
|
193
|
+
// Returns true when a dot at position i should terminate a name,
|
|
194
|
+
// i.e., when the next character is whitespace, a comment start, or EOF
|
|
195
|
+
function dotTerminatesName(str, i) {
|
|
196
|
+
var next = str.charAt(i + 1);
|
|
197
|
+
return next === '' || wsOrHash.test(next);
|
|
198
|
+
}
|
|
189
199
|
function createSinkParser(store, openFormula, thisDoc, baseURI, genPrefix, metaURI, flags, why) {
|
|
190
200
|
return new SinkParser(store, openFormula, thisDoc, baseURI, genPrefix, metaURI, flags, why);
|
|
191
201
|
}
|
|
@@ -675,9 +685,14 @@ export class SinkParser {
|
|
|
675
685
|
throw BadSyntax(this._thisDoc, this.lines, str, i, "EOF when ']' expected after [ <propertyList>");
|
|
676
686
|
}
|
|
677
687
|
if (str.slice(j, j + 1) == ".") {
|
|
678
|
-
// If a dot is found after a blank node, treat it as statement terminator
|
|
688
|
+
// If a dot is found after a blank node, treat it as a statement terminator.
|
|
689
|
+
// Do NOT consume the '.' here: statement terminators are handled centrally by
|
|
690
|
+
// checkDot() (called by directiveOrStatement after statement()). Consuming the dot
|
|
691
|
+
// locally would bypass that unified logic and could cause inconsistencies.
|
|
692
|
+
// We do consume ']' below because it is a structural closer of the blank node,
|
|
693
|
+
// not a statement terminator.
|
|
679
694
|
res.push(subj);
|
|
680
|
-
return j;
|
|
695
|
+
return j; // leave '.' for checkDot()
|
|
681
696
|
}
|
|
682
697
|
if (str.slice(j, j + 1) != "]") {
|
|
683
698
|
throw BadSyntax(this._thisDoc, this.lines, str, j, "']' expected");
|
|
@@ -1129,10 +1144,8 @@ export class SinkParser {
|
|
|
1129
1144
|
var i = j;
|
|
1130
1145
|
while (i < pyjslib_len(str)) {
|
|
1131
1146
|
var c = str.charAt(i);
|
|
1132
|
-
// Allow dot in names unless it is followed by whitespace/comment/EOF
|
|
1133
1147
|
if (c === '.') {
|
|
1134
|
-
|
|
1135
|
-
if (next === '' || /[\s#]/.test(next)) {
|
|
1148
|
+
if (dotTerminatesName(str, i)) {
|
|
1136
1149
|
break; // treat as statement terminator, not part of name
|
|
1137
1150
|
}
|
|
1138
1151
|
// else: accept '.' as part of name
|
|
@@ -1165,10 +1178,8 @@ export class SinkParser {
|
|
|
1165
1178
|
var i = i + 1;
|
|
1166
1179
|
while (i < pyjslib_len(str)) {
|
|
1167
1180
|
var c = str.charAt(i);
|
|
1168
|
-
// Allow dot unless followed by whitespace/comment/EOF; otherwise break on invalid chars
|
|
1169
1181
|
if (c === '.') {
|
|
1170
|
-
|
|
1171
|
-
if (next === '' || /[\s#]/.test(next)) {
|
|
1182
|
+
if (dotTerminatesName(str, i)) {
|
|
1172
1183
|
break; // dot ends the name here
|
|
1173
1184
|
}
|
|
1174
1185
|
} else if (_notNameChars.indexOf(c) >= 0) {
|
|
@@ -1186,10 +1197,8 @@ export class SinkParser {
|
|
|
1186
1197
|
var ln = "";
|
|
1187
1198
|
while (i < pyjslib_len(str)) {
|
|
1188
1199
|
var c = str.charAt(i);
|
|
1189
|
-
// Allow dot unless followed by whitespace/comment/EOF; otherwise break on invalid chars
|
|
1190
1200
|
if (c === '.') {
|
|
1191
|
-
|
|
1192
|
-
if (next === '' || /[\s#]/.test(next)) {
|
|
1201
|
+
if (dotTerminatesName(str, i)) {
|
|
1193
1202
|
break; // dot ends the name here
|
|
1194
1203
|
}
|
|
1195
1204
|
} else if (_notNameChars.indexOf(c) >= 0) {
|
package/esm/serialize.js
CHANGED
|
@@ -40,7 +40,8 @@ contentType, callback, options) {
|
|
|
40
40
|
return executeCallback(null, documentString);
|
|
41
41
|
case TurtleContentType:
|
|
42
42
|
case TurtleLegacyContentType:
|
|
43
|
-
|
|
43
|
+
// Suppress = for sameAs and => for implies; preserve any user-specified flags (e.g., 'o')
|
|
44
|
+
sz.setFlags('si' + (opts.flags ? ' ' + opts.flags : ''));
|
|
44
45
|
documentString = sz.statementsToN3(newSts);
|
|
45
46
|
return executeCallback(null, documentString);
|
|
46
47
|
case NTriplesContentType:
|
|
@@ -48,7 +49,8 @@ contentType, callback, options) {
|
|
|
48
49
|
documentString = sz.statementsToNTriples(newSts);
|
|
49
50
|
return executeCallback(null, documentString);
|
|
50
51
|
case JSONLDContentType:
|
|
51
|
-
|
|
52
|
+
// turtle + dr (means no default, no relative prefix); preserve user flags
|
|
53
|
+
sz.setFlags('si dr' + (opts.flags ? ' ' + opts.flags : ''));
|
|
52
54
|
documentString = sz.statementsToJsonld(newSts); // convert via turtle
|
|
53
55
|
return executeCallback(null, documentString);
|
|
54
56
|
case NQuadsContentType:
|
package/esm/serializer.js
CHANGED
|
@@ -54,6 +54,13 @@ export class Serializer {
|
|
|
54
54
|
this.base = base;
|
|
55
55
|
return this;
|
|
56
56
|
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Set serializer behavior flags. Letters can be combined with spaces.
|
|
60
|
+
* Examples: 'si', 'deinprstux', 'si dr', 'o'.
|
|
61
|
+
* Notable flags:
|
|
62
|
+
* - 'o': do not abbreviate to a prefixed name when the local part contains a dot
|
|
63
|
+
*/
|
|
57
64
|
setFlags(flags) {
|
|
58
65
|
this.flags = flags || '';
|
|
59
66
|
return this;
|
|
@@ -244,6 +251,28 @@ export class Serializer {
|
|
|
244
251
|
toN3(f) {
|
|
245
252
|
return this.statementsToN3(f.statements);
|
|
246
253
|
}
|
|
254
|
+
// Validate if a string is a valid PN_LOCAL per Turtle 1.1 spec
|
|
255
|
+
// Allows dots inside the local name but not as trailing character
|
|
256
|
+
// Also allows empty local names (for URIs ending in / or #)
|
|
257
|
+
isValidPNLocal(local) {
|
|
258
|
+
// Empty local name is valid (e.g., ex: for http://example.com/)
|
|
259
|
+
if (local.length === 0) return true;
|
|
260
|
+
|
|
261
|
+
// Cannot end with a dot
|
|
262
|
+
if (local[local.length - 1] === '.') return false;
|
|
263
|
+
|
|
264
|
+
// Check each character (allow dots mid-string)
|
|
265
|
+
for (var i = 0; i < local.length; i++) {
|
|
266
|
+
var ch = local[i];
|
|
267
|
+
// Dot is allowed unless it's the last character (checked above)
|
|
268
|
+
if (ch === '.') continue;
|
|
269
|
+
// Other characters must not be in the blacklist
|
|
270
|
+
if (this._notNameChars.indexOf(ch) >= 0) {
|
|
271
|
+
return false;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return true;
|
|
275
|
+
}
|
|
247
276
|
explicitURI(uri) {
|
|
248
277
|
if (this.flags.indexOf('r') < 0 && this.base) {
|
|
249
278
|
uri = Uri.refTo(this.base, uri);
|
|
@@ -606,13 +635,17 @@ export class Serializer {
|
|
|
606
635
|
if (j >= 0 && this.flags.indexOf('p') < 0 && (
|
|
607
636
|
// Can split at namespace but only if http[s]: URI or file: or ws[s] (why not others?)
|
|
608
637
|
uri.indexOf('http') === 0 || uri.indexOf('ws') === 0 || uri.indexOf('file') === 0)) {
|
|
609
|
-
var
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
638
|
+
var localid = uri.slice(j + 1);
|
|
639
|
+
var namesp = uri.slice(0, j + 1);
|
|
640
|
+
// Don't split if namespace is just the protocol (e.g., https://)
|
|
641
|
+
// A valid namespace should have content after the protocol
|
|
642
|
+
var minNamespaceLength = uri.indexOf('://') + 4; // e.g., "http://x" minimum
|
|
643
|
+
// Also don't split if namespace is the base directory (would serialize as relative URI)
|
|
644
|
+
var baseDir = this.base ? this.base.slice(0, Math.max(this.base.lastIndexOf('/'), this.base.lastIndexOf('#')) + 1) : null;
|
|
645
|
+
var namespaceIsBaseDir = baseDir && namesp === baseDir;
|
|
646
|
+
// If flag 'o' is present, forbid dots in local part when abbreviating
|
|
647
|
+
var forbidDotLocal = this.flags.indexOf('o') >= 0 && localid.indexOf('.') >= 0;
|
|
648
|
+
var canSplit = !namespaceIsBaseDir && !forbidDotLocal && namesp.length > minNamespaceLength && this.isValidPNLocal(localid);
|
|
616
649
|
/*
|
|
617
650
|
if (uri.slice(0, j + 1) === this.base + '#') { // base-relative
|
|
618
651
|
if (canSplit) {
|
|
@@ -623,8 +656,6 @@ export class Serializer {
|
|
|
623
656
|
}
|
|
624
657
|
*/
|
|
625
658
|
if (canSplit) {
|
|
626
|
-
var localid = uri.slice(j + 1);
|
|
627
|
-
var namesp = uri.slice(0, j + 1);
|
|
628
659
|
if (this.defaultNamespace && this.defaultNamespace === namesp && this.flags.indexOf('d') < 0) {
|
|
629
660
|
// d -> suppress default
|
|
630
661
|
if (this.flags.indexOf('k') >= 0 && this.keyords.indexOf(localid) < 0) {
|
package/lib/n3parser.js
CHANGED
|
@@ -191,9 +191,19 @@ var ws = new RegExp("^[ \\t]*", 'g');
|
|
|
191
191
|
var signed_integer = new RegExp("^[-+]?[0-9]+", 'g');
|
|
192
192
|
var number_syntax = new RegExp("^([-+]?[0-9]+)(\\.[0-9]+)?([eE][-+]?[0-9]+)?", 'g');
|
|
193
193
|
var datetime_syntax = new RegExp('^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9](T[0-9][0-9]:[0-9][0-9](:[0-9][0-9](\\.[0-9]*)?)?)?Z?');
|
|
194
|
+
|
|
195
|
+
// Reused in tight loops to detect whitespace or comment after a dot
|
|
196
|
+
var wsOrHash = new RegExp("[\\s#]");
|
|
194
197
|
var digitstring = new RegExp("^[0-9]+", 'g');
|
|
195
198
|
var interesting = new RegExp("[\\\\\\r\\n\\\"]", 'g');
|
|
196
199
|
var langcode = new RegExp("^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*", 'g');
|
|
200
|
+
|
|
201
|
+
// Returns true when a dot at position i should terminate a name,
|
|
202
|
+
// i.e., when the next character is whitespace, a comment start, or EOF
|
|
203
|
+
function dotTerminatesName(str, i) {
|
|
204
|
+
var next = str.charAt(i + 1);
|
|
205
|
+
return next === '' || wsOrHash.test(next);
|
|
206
|
+
}
|
|
197
207
|
function createSinkParser(store, openFormula, thisDoc, baseURI, genPrefix, metaURI, flags, why) {
|
|
198
208
|
return new SinkParser(store, openFormula, thisDoc, baseURI, genPrefix, metaURI, flags, why);
|
|
199
209
|
}
|
|
@@ -683,9 +693,14 @@ class SinkParser {
|
|
|
683
693
|
throw BadSyntax(this._thisDoc, this.lines, str, i, "EOF when ']' expected after [ <propertyList>");
|
|
684
694
|
}
|
|
685
695
|
if (str.slice(j, j + 1) == ".") {
|
|
686
|
-
// If a dot is found after a blank node, treat it as statement terminator
|
|
696
|
+
// If a dot is found after a blank node, treat it as a statement terminator.
|
|
697
|
+
// Do NOT consume the '.' here: statement terminators are handled centrally by
|
|
698
|
+
// checkDot() (called by directiveOrStatement after statement()). Consuming the dot
|
|
699
|
+
// locally would bypass that unified logic and could cause inconsistencies.
|
|
700
|
+
// We do consume ']' below because it is a structural closer of the blank node,
|
|
701
|
+
// not a statement terminator.
|
|
687
702
|
res.push(subj);
|
|
688
|
-
return j;
|
|
703
|
+
return j; // leave '.' for checkDot()
|
|
689
704
|
}
|
|
690
705
|
if (str.slice(j, j + 1) != "]") {
|
|
691
706
|
throw BadSyntax(this._thisDoc, this.lines, str, j, "']' expected");
|
|
@@ -1137,10 +1152,8 @@ class SinkParser {
|
|
|
1137
1152
|
var i = j;
|
|
1138
1153
|
while (i < pyjslib_len(str)) {
|
|
1139
1154
|
var c = str.charAt(i);
|
|
1140
|
-
// Allow dot in names unless it is followed by whitespace/comment/EOF
|
|
1141
1155
|
if (c === '.') {
|
|
1142
|
-
|
|
1143
|
-
if (next === '' || /[\s#]/.test(next)) {
|
|
1156
|
+
if (dotTerminatesName(str, i)) {
|
|
1144
1157
|
break; // treat as statement terminator, not part of name
|
|
1145
1158
|
}
|
|
1146
1159
|
// else: accept '.' as part of name
|
|
@@ -1173,10 +1186,8 @@ class SinkParser {
|
|
|
1173
1186
|
var i = i + 1;
|
|
1174
1187
|
while (i < pyjslib_len(str)) {
|
|
1175
1188
|
var c = str.charAt(i);
|
|
1176
|
-
// Allow dot unless followed by whitespace/comment/EOF; otherwise break on invalid chars
|
|
1177
1189
|
if (c === '.') {
|
|
1178
|
-
|
|
1179
|
-
if (next === '' || /[\s#]/.test(next)) {
|
|
1190
|
+
if (dotTerminatesName(str, i)) {
|
|
1180
1191
|
break; // dot ends the name here
|
|
1181
1192
|
}
|
|
1182
1193
|
} else if (_notNameChars.indexOf(c) >= 0) {
|
|
@@ -1194,10 +1205,8 @@ class SinkParser {
|
|
|
1194
1205
|
var ln = "";
|
|
1195
1206
|
while (i < pyjslib_len(str)) {
|
|
1196
1207
|
var c = str.charAt(i);
|
|
1197
|
-
// Allow dot unless followed by whitespace/comment/EOF; otherwise break on invalid chars
|
|
1198
1208
|
if (c === '.') {
|
|
1199
|
-
|
|
1200
|
-
if (next === '' || /[\s#]/.test(next)) {
|
|
1209
|
+
if (dotTerminatesName(str, i)) {
|
|
1201
1210
|
break; // dot ends the name here
|
|
1202
1211
|
}
|
|
1203
1212
|
} else if (_notNameChars.indexOf(c) >= 0) {
|
package/lib/serialize.js
CHANGED
|
@@ -47,7 +47,8 @@ contentType, callback, options) {
|
|
|
47
47
|
return executeCallback(null, documentString);
|
|
48
48
|
case _types.TurtleContentType:
|
|
49
49
|
case _types.TurtleLegacyContentType:
|
|
50
|
-
|
|
50
|
+
// Suppress = for sameAs and => for implies; preserve any user-specified flags (e.g., 'o')
|
|
51
|
+
sz.setFlags('si' + (opts.flags ? ' ' + opts.flags : ''));
|
|
51
52
|
documentString = sz.statementsToN3(newSts);
|
|
52
53
|
return executeCallback(null, documentString);
|
|
53
54
|
case _types.NTriplesContentType:
|
|
@@ -55,7 +56,8 @@ contentType, callback, options) {
|
|
|
55
56
|
documentString = sz.statementsToNTriples(newSts);
|
|
56
57
|
return executeCallback(null, documentString);
|
|
57
58
|
case _types.JSONLDContentType:
|
|
58
|
-
|
|
59
|
+
// turtle + dr (means no default, no relative prefix); preserve user flags
|
|
60
|
+
sz.setFlags('si dr' + (opts.flags ? ' ' + opts.flags : ''));
|
|
59
61
|
documentString = sz.statementsToJsonld(newSts); // convert via turtle
|
|
60
62
|
return executeCallback(null, documentString);
|
|
61
63
|
case _types.NQuadsContentType:
|
package/lib/serializer.d.ts
CHANGED
|
@@ -22,6 +22,12 @@ export class Serializer {
|
|
|
22
22
|
string: NamedNode;
|
|
23
23
|
};
|
|
24
24
|
setBase(base: any): Serializer;
|
|
25
|
+
/**
|
|
26
|
+
* Set serializer behavior flags. Letters can be combined with spaces.
|
|
27
|
+
* Examples: 'si', 'deinprstux', 'si dr', 'o'.
|
|
28
|
+
* Notable flags:
|
|
29
|
+
* - 'o': do not abbreviate to a prefixed name when the local part contains a dot
|
|
30
|
+
*/
|
|
25
31
|
setFlags(flags: any): Serializer;
|
|
26
32
|
toStr(x: any): any;
|
|
27
33
|
fromStr(s: any): any;
|
|
@@ -51,6 +57,7 @@ export class Serializer {
|
|
|
51
57
|
toN3(f: any): string;
|
|
52
58
|
_notQNameChars: string;
|
|
53
59
|
_notNameChars: string;
|
|
60
|
+
isValidPNLocal(local: any): boolean;
|
|
54
61
|
explicitURI(uri: any): string;
|
|
55
62
|
statementsToNTriples(sts: any): string;
|
|
56
63
|
statementsToN3(sts: any): string;
|
package/lib/serializer.js
CHANGED
|
@@ -64,6 +64,13 @@ class Serializer {
|
|
|
64
64
|
this.base = base;
|
|
65
65
|
return this;
|
|
66
66
|
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Set serializer behavior flags. Letters can be combined with spaces.
|
|
70
|
+
* Examples: 'si', 'deinprstux', 'si dr', 'o'.
|
|
71
|
+
* Notable flags:
|
|
72
|
+
* - 'o': do not abbreviate to a prefixed name when the local part contains a dot
|
|
73
|
+
*/
|
|
67
74
|
setFlags(flags) {
|
|
68
75
|
this.flags = flags || '';
|
|
69
76
|
return this;
|
|
@@ -254,6 +261,28 @@ class Serializer {
|
|
|
254
261
|
toN3(f) {
|
|
255
262
|
return this.statementsToN3(f.statements);
|
|
256
263
|
}
|
|
264
|
+
// Validate if a string is a valid PN_LOCAL per Turtle 1.1 spec
|
|
265
|
+
// Allows dots inside the local name but not as trailing character
|
|
266
|
+
// Also allows empty local names (for URIs ending in / or #)
|
|
267
|
+
isValidPNLocal(local) {
|
|
268
|
+
// Empty local name is valid (e.g., ex: for http://example.com/)
|
|
269
|
+
if (local.length === 0) return true;
|
|
270
|
+
|
|
271
|
+
// Cannot end with a dot
|
|
272
|
+
if (local[local.length - 1] === '.') return false;
|
|
273
|
+
|
|
274
|
+
// Check each character (allow dots mid-string)
|
|
275
|
+
for (var i = 0; i < local.length; i++) {
|
|
276
|
+
var ch = local[i];
|
|
277
|
+
// Dot is allowed unless it's the last character (checked above)
|
|
278
|
+
if (ch === '.') continue;
|
|
279
|
+
// Other characters must not be in the blacklist
|
|
280
|
+
if (this._notNameChars.indexOf(ch) >= 0) {
|
|
281
|
+
return false;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
return true;
|
|
285
|
+
}
|
|
257
286
|
explicitURI(uri) {
|
|
258
287
|
if (this.flags.indexOf('r') < 0 && this.base) {
|
|
259
288
|
uri = Uri.refTo(this.base, uri);
|
|
@@ -616,13 +645,17 @@ class Serializer {
|
|
|
616
645
|
if (j >= 0 && this.flags.indexOf('p') < 0 && (
|
|
617
646
|
// Can split at namespace but only if http[s]: URI or file: or ws[s] (why not others?)
|
|
618
647
|
uri.indexOf('http') === 0 || uri.indexOf('ws') === 0 || uri.indexOf('file') === 0)) {
|
|
619
|
-
var
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
648
|
+
var localid = uri.slice(j + 1);
|
|
649
|
+
var namesp = uri.slice(0, j + 1);
|
|
650
|
+
// Don't split if namespace is just the protocol (e.g., https://)
|
|
651
|
+
// A valid namespace should have content after the protocol
|
|
652
|
+
var minNamespaceLength = uri.indexOf('://') + 4; // e.g., "http://x" minimum
|
|
653
|
+
// Also don't split if namespace is the base directory (would serialize as relative URI)
|
|
654
|
+
var baseDir = this.base ? this.base.slice(0, Math.max(this.base.lastIndexOf('/'), this.base.lastIndexOf('#')) + 1) : null;
|
|
655
|
+
var namespaceIsBaseDir = baseDir && namesp === baseDir;
|
|
656
|
+
// If flag 'o' is present, forbid dots in local part when abbreviating
|
|
657
|
+
var forbidDotLocal = this.flags.indexOf('o') >= 0 && localid.indexOf('.') >= 0;
|
|
658
|
+
var canSplit = !namespaceIsBaseDir && !forbidDotLocal && namesp.length > minNamespaceLength && this.isValidPNLocal(localid);
|
|
626
659
|
/*
|
|
627
660
|
if (uri.slice(0, j + 1) === this.base + '#') { // base-relative
|
|
628
661
|
if (canSplit) {
|
|
@@ -633,8 +666,6 @@ class Serializer {
|
|
|
633
666
|
}
|
|
634
667
|
*/
|
|
635
668
|
if (canSplit) {
|
|
636
|
-
var localid = uri.slice(j + 1);
|
|
637
|
-
var namesp = uri.slice(0, j + 1);
|
|
638
669
|
if (this.defaultNamespace && this.defaultNamespace === namesp && this.flags.indexOf('d') < 0) {
|
|
639
670
|
// d -> suppress default
|
|
640
671
|
if (this.flags.indexOf('k') >= 0 && this.keyords.indexOf(localid) < 0) {
|
package/package.json
CHANGED
package/src/n3parser.js
CHANGED
|
@@ -212,10 +212,20 @@ var signed_integer = new RegExp("^[-+]?[0-9]+", 'g');
|
|
|
212
212
|
var number_syntax = new RegExp("^([-+]?[0-9]+)(\\.[0-9]+)?([eE][-+]?[0-9]+)?", 'g');
|
|
213
213
|
var datetime_syntax = new RegExp('^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9](T[0-9][0-9]:[0-9][0-9](:[0-9][0-9](\\.[0-9]*)?)?)?Z?');
|
|
214
214
|
|
|
215
|
+
// Reused in tight loops to detect whitespace or comment after a dot
|
|
216
|
+
var wsOrHash = new RegExp("[\\s#]");
|
|
217
|
+
|
|
215
218
|
var digitstring = new RegExp("^[0-9]+", 'g');
|
|
216
219
|
var interesting = new RegExp("[\\\\\\r\\n\\\"]", 'g');
|
|
217
220
|
var langcode = new RegExp("^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*", 'g');
|
|
218
221
|
|
|
222
|
+
// Returns true when a dot at position i should terminate a name,
|
|
223
|
+
// i.e., when the next character is whitespace, a comment start, or EOF
|
|
224
|
+
function dotTerminatesName(str, i) {
|
|
225
|
+
var next = str.charAt(i + 1);
|
|
226
|
+
return next === '' || wsOrHash.test(next);
|
|
227
|
+
}
|
|
228
|
+
|
|
219
229
|
function createSinkParser(store, openFormula, thisDoc, baseURI, genPrefix, metaURI, flags, why) {
|
|
220
230
|
return new SinkParser(store, openFormula, thisDoc, baseURI, genPrefix, metaURI, flags, why);
|
|
221
231
|
}
|
|
@@ -739,9 +749,14 @@ export class SinkParser {
|
|
|
739
749
|
throw BadSyntax(this._thisDoc, this.lines, str, i, "EOF when ']' expected after [ <propertyList>");
|
|
740
750
|
}
|
|
741
751
|
if ((str.slice( j, ( j + 1 ) ) == ".")) {
|
|
742
|
-
// If a dot is found after a blank node, treat it as statement terminator
|
|
752
|
+
// If a dot is found after a blank node, treat it as a statement terminator.
|
|
753
|
+
// Do NOT consume the '.' here: statement terminators are handled centrally by
|
|
754
|
+
// checkDot() (called by directiveOrStatement after statement()). Consuming the dot
|
|
755
|
+
// locally would bypass that unified logic and could cause inconsistencies.
|
|
756
|
+
// We do consume ']' below because it is a structural closer of the blank node,
|
|
757
|
+
// not a statement terminator.
|
|
743
758
|
res.push(subj);
|
|
744
|
-
return j;
|
|
759
|
+
return j; // leave '.' for checkDot()
|
|
745
760
|
}
|
|
746
761
|
if ((str.slice( j, ( j + 1 ) ) != "]")) {
|
|
747
762
|
throw BadSyntax(this._thisDoc, this.lines, str, j, "']' expected");
|
|
@@ -1210,10 +1225,8 @@ export class SinkParser {
|
|
|
1210
1225
|
var i = j;
|
|
1211
1226
|
while ((i < pyjslib_len(str))) {
|
|
1212
1227
|
var c = str.charAt(i);
|
|
1213
|
-
// Allow dot in names unless it is followed by whitespace/comment/EOF
|
|
1214
1228
|
if (c === '.') {
|
|
1215
|
-
|
|
1216
|
-
if (next === '' || /[\s#]/.test(next)) {
|
|
1229
|
+
if (dotTerminatesName(str, i)) {
|
|
1217
1230
|
break; // treat as statement terminator, not part of name
|
|
1218
1231
|
}
|
|
1219
1232
|
// else: accept '.' as part of name
|
|
@@ -1247,10 +1260,8 @@ export class SinkParser {
|
|
|
1247
1260
|
var i = ( i + 1 ) ;
|
|
1248
1261
|
while ((i < pyjslib_len(str))) {
|
|
1249
1262
|
var c = str.charAt(i);
|
|
1250
|
-
// Allow dot unless followed by whitespace/comment/EOF; otherwise break on invalid chars
|
|
1251
1263
|
if (c === '.') {
|
|
1252
|
-
|
|
1253
|
-
if (next === '' || /[\s#]/.test(next)) {
|
|
1264
|
+
if (dotTerminatesName(str, i)) {
|
|
1254
1265
|
break; // dot ends the name here
|
|
1255
1266
|
}
|
|
1256
1267
|
} else if (_notNameChars.indexOf(c) >= 0) {
|
|
@@ -1269,10 +1280,8 @@ export class SinkParser {
|
|
|
1269
1280
|
var ln = "";
|
|
1270
1281
|
while ((i < pyjslib_len(str))) {
|
|
1271
1282
|
var c = str.charAt(i);
|
|
1272
|
-
// Allow dot unless followed by whitespace/comment/EOF; otherwise break on invalid chars
|
|
1273
1283
|
if (c === '.') {
|
|
1274
|
-
|
|
1275
|
-
if (next === '' || /[\s#]/.test(next)) {
|
|
1284
|
+
if (dotTerminatesName(str, i)) {
|
|
1276
1285
|
break; // dot ends the name here
|
|
1277
1286
|
}
|
|
1278
1287
|
} else if (_notNameChars.indexOf(c) >= 0) {
|
package/src/serialize.ts
CHANGED
|
@@ -72,7 +72,8 @@ export default function serialize (
|
|
|
72
72
|
return executeCallback(null, documentString)
|
|
73
73
|
case TurtleContentType:
|
|
74
74
|
case TurtleLegacyContentType:
|
|
75
|
-
|
|
75
|
+
// Suppress = for sameAs and => for implies; preserve any user-specified flags (e.g., 'o')
|
|
76
|
+
sz.setFlags('si' + (opts.flags ? (' ' + opts.flags) : ''))
|
|
76
77
|
documentString = sz.statementsToN3(newSts)
|
|
77
78
|
return executeCallback(null, documentString)
|
|
78
79
|
case NTriplesContentType:
|
|
@@ -80,7 +81,8 @@ export default function serialize (
|
|
|
80
81
|
documentString = sz.statementsToNTriples(newSts)
|
|
81
82
|
return executeCallback(null, documentString)
|
|
82
83
|
case JSONLDContentType:
|
|
83
|
-
|
|
84
|
+
// turtle + dr (means no default, no relative prefix); preserve user flags
|
|
85
|
+
sz.setFlags('si dr' + (opts.flags ? (' ' + opts.flags) : ''))
|
|
84
86
|
documentString = sz.statementsToJsonld(newSts) // convert via turtle
|
|
85
87
|
return executeCallback(null, documentString)
|
|
86
88
|
case NQuadsContentType:
|
package/src/serializer.js
CHANGED
|
@@ -52,6 +52,12 @@ export class Serializer {
|
|
|
52
52
|
return this
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
+
/**
|
|
56
|
+
* Set serializer behavior flags. Letters can be combined with spaces.
|
|
57
|
+
* Examples: 'si', 'deinprstux', 'si dr', 'o'.
|
|
58
|
+
* Notable flags:
|
|
59
|
+
* - 'o': do not abbreviate to a prefixed name when the local part contains a dot
|
|
60
|
+
*/
|
|
55
61
|
setFlags(flags) {
|
|
56
62
|
this.flags = flags || '';
|
|
57
63
|
return this
|
|
@@ -255,6 +261,29 @@ export class Serializer {
|
|
|
255
261
|
_notNameChars =
|
|
256
262
|
(this._notQNameChars + ':')
|
|
257
263
|
|
|
264
|
+
// Validate if a string is a valid PN_LOCAL per Turtle 1.1 spec
|
|
265
|
+
// Allows dots inside the local name but not as trailing character
|
|
266
|
+
// Also allows empty local names (for URIs ending in / or #)
|
|
267
|
+
isValidPNLocal(local) {
|
|
268
|
+
// Empty local name is valid (e.g., ex: for http://example.com/)
|
|
269
|
+
if (local.length === 0) return true
|
|
270
|
+
|
|
271
|
+
// Cannot end with a dot
|
|
272
|
+
if (local[local.length - 1] === '.') return false
|
|
273
|
+
|
|
274
|
+
// Check each character (allow dots mid-string)
|
|
275
|
+
for (var i = 0; i < local.length; i++) {
|
|
276
|
+
var ch = local[i]
|
|
277
|
+
// Dot is allowed unless it's the last character (checked above)
|
|
278
|
+
if (ch === '.') continue
|
|
279
|
+
// Other characters must not be in the blacklist
|
|
280
|
+
if (this._notNameChars.indexOf(ch) >= 0) {
|
|
281
|
+
return false
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
return true
|
|
285
|
+
}
|
|
286
|
+
|
|
258
287
|
explicitURI(uri) {
|
|
259
288
|
if (this.flags.indexOf('r') < 0 && this.base) {
|
|
260
289
|
uri = Uri.refTo(this.base, uri)
|
|
@@ -628,13 +657,17 @@ export class Serializer {
|
|
|
628
657
|
if (j >= 0 && this.flags.indexOf('p') < 0 &&
|
|
629
658
|
// Can split at namespace but only if http[s]: URI or file: or ws[s] (why not others?)
|
|
630
659
|
(uri.indexOf('http') === 0 || uri.indexOf('ws') === 0 || uri.indexOf('file') === 0)) {
|
|
631
|
-
var
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
660
|
+
var localid = uri.slice(j + 1)
|
|
661
|
+
var namesp = uri.slice(0, j + 1)
|
|
662
|
+
// Don't split if namespace is just the protocol (e.g., https://)
|
|
663
|
+
// A valid namespace should have content after the protocol
|
|
664
|
+
var minNamespaceLength = uri.indexOf('://') + 4 // e.g., "http://x" minimum
|
|
665
|
+
// Also don't split if namespace is the base directory (would serialize as relative URI)
|
|
666
|
+
var baseDir = this.base ? this.base.slice(0, Math.max(this.base.lastIndexOf('/'), this.base.lastIndexOf('#')) + 1) : null
|
|
667
|
+
var namespaceIsBaseDir = baseDir && namesp === baseDir
|
|
668
|
+
// If flag 'o' is present, forbid dots in local part when abbreviating
|
|
669
|
+
var forbidDotLocal = this.flags.indexOf('o') >= 0 && localid.indexOf('.') >= 0
|
|
670
|
+
var canSplit = !namespaceIsBaseDir && !forbidDotLocal && namesp.length > minNamespaceLength && this.isValidPNLocal(localid)
|
|
638
671
|
/*
|
|
639
672
|
if (uri.slice(0, j + 1) === this.base + '#') { // base-relative
|
|
640
673
|
if (canSplit) {
|
|
@@ -645,8 +678,6 @@ export class Serializer {
|
|
|
645
678
|
}
|
|
646
679
|
*/
|
|
647
680
|
if (canSplit) {
|
|
648
|
-
var localid = uri.slice(j + 1)
|
|
649
|
-
var namesp = uri.slice(0, j + 1)
|
|
650
681
|
if (this.defaultNamespace && this.defaultNamespace === namesp &&
|
|
651
682
|
this.flags.indexOf('d') < 0) { // d -> suppress default
|
|
652
683
|
if (this.flags.indexOf('k') >= 0 &&
|