mdld-parse 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENCE +167 -0
- package/README.md +341 -190
- package/index.js +722 -284
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -25,8 +25,9 @@ function hash(str) {
|
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
function expandIRI(term, ctx) {
|
|
28
|
-
if (
|
|
29
|
-
const
|
|
28
|
+
if (term == null) return null;
|
|
29
|
+
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
30
|
+
const t = raw.trim();
|
|
30
31
|
if (t.match(/^https?:/)) return t;
|
|
31
32
|
if (t.includes(':')) {
|
|
32
33
|
const [prefix, ref] = t.split(':', 2);
|
|
@@ -35,56 +36,88 @@ function expandIRI(term, ctx) {
|
|
|
35
36
|
return (ctx['@vocab'] || '') + t;
|
|
36
37
|
}
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
function parseAnnotation(raw) {
|
|
39
|
+
function parseSemanticBlock(raw) {
|
|
40
40
|
try {
|
|
41
|
-
const
|
|
42
|
-
|
|
41
|
+
const src = String(raw || '').trim();
|
|
42
|
+
const cleaned = src.replace(/^\{|\}$/g, '').trim();
|
|
43
|
+
if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
44
|
+
|
|
45
|
+
const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
46
|
+
const re = /\S+/g;
|
|
47
|
+
let m;
|
|
48
|
+
while ((m = re.exec(cleaned)) !== null) {
|
|
49
|
+
const token = m[0];
|
|
50
|
+
const relStart = 1 + m.index;
|
|
51
|
+
const relEnd = relStart + token.length;
|
|
52
|
+
const entryIndex = result.entries.length;
|
|
53
|
+
|
|
54
|
+
if (token === '=') {
|
|
55
|
+
result.subject = 'RESET';
|
|
56
|
+
result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
43
59
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
console.warn(`Unbalanced quotes in annotation: ${raw}`);
|
|
51
|
-
return { subject: null, entries: [], datatype: null, language: null };
|
|
52
|
-
}
|
|
60
|
+
if (token.startsWith('=')) {
|
|
61
|
+
const iri = token.substring(1);
|
|
62
|
+
result.subject = iri;
|
|
63
|
+
result.entries.push({ kind: 'subject', iri, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
53
66
|
|
|
54
|
-
|
|
55
|
-
|
|
67
|
+
if (token.startsWith('^^')) {
|
|
68
|
+
const datatype = token.substring(2);
|
|
69
|
+
if (!result.language) result.datatype = datatype;
|
|
70
|
+
result.entries.push({ kind: 'datatype', datatype, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
56
73
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
result.
|
|
60
|
-
|
|
61
|
-
result.
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
result.
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
} else {
|
|
71
|
-
result.entries.push({ kind: 'property', predicate: part, direction: 'forward' });
|
|
74
|
+
if (token.startsWith('@')) {
|
|
75
|
+
const language = token.substring(1);
|
|
76
|
+
result.language = language;
|
|
77
|
+
result.datatype = null;
|
|
78
|
+
result.entries.push({ kind: 'language', language, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (token.startsWith('.')) {
|
|
83
|
+
const classIRI = token.substring(1);
|
|
84
|
+
result.types.push({ iri: classIRI, entryIndex });
|
|
85
|
+
result.entries.push({ kind: 'type', iri: classIRI, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
86
|
+
continue;
|
|
72
87
|
}
|
|
73
|
-
}
|
|
74
88
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
89
|
+
if (token.startsWith('^?')) {
|
|
90
|
+
const iri = token.substring(2);
|
|
91
|
+
result.predicates.push({ iri, form: '^?', entryIndex });
|
|
92
|
+
result.entries.push({ kind: 'property', iri, form: '^?', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (token.startsWith('^')) {
|
|
97
|
+
const iri = token.substring(1);
|
|
98
|
+
result.predicates.push({ iri, form: '^', entryIndex });
|
|
99
|
+
result.entries.push({ kind: 'property', iri, form: '^', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (token.startsWith('?')) {
|
|
104
|
+
const iri = token.substring(1);
|
|
105
|
+
result.predicates.push({ iri, form: '?', entryIndex });
|
|
106
|
+
result.entries.push({ kind: 'property', iri, form: '?', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
result.predicates.push({ iri: token, form: '', entryIndex });
|
|
111
|
+
result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
78
112
|
}
|
|
79
113
|
|
|
80
114
|
return result;
|
|
81
115
|
} catch (error) {
|
|
82
|
-
console.error(`Error parsing
|
|
83
|
-
return { subject: null,
|
|
116
|
+
console.error(`Error parsing semantic block ${raw}:`, error);
|
|
117
|
+
return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
84
118
|
}
|
|
85
119
|
}
|
|
86
120
|
|
|
87
|
-
// Token scanning - consolidated helpers
|
|
88
121
|
function scanTokens(text) {
|
|
89
122
|
const tokens = [];
|
|
90
123
|
const lines = text.split('\n');
|
|
@@ -96,24 +129,32 @@ function scanTokens(text) {
|
|
|
96
129
|
const lineStart = pos;
|
|
97
130
|
pos += line.length + 1;
|
|
98
131
|
|
|
99
|
-
// Code blocks
|
|
100
132
|
if (line.startsWith('```')) {
|
|
101
133
|
if (!codeBlock) {
|
|
102
134
|
const fence = line.match(/^(`{3,})(.*)/);
|
|
135
|
+
const attrsText = fence[2].match(/\{[^}]+\}/)?.[0] || null;
|
|
136
|
+
const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
|
|
137
|
+
const contentStart = lineStart + line.length + 1;
|
|
103
138
|
codeBlock = {
|
|
104
139
|
fence: fence[1],
|
|
105
140
|
start: lineStart,
|
|
106
141
|
content: [],
|
|
107
|
-
lang: fence[2].trim().split(
|
|
108
|
-
attrs:
|
|
142
|
+
lang: fence[2].trim().split(/[\s{]/)[0],
|
|
143
|
+
attrs: attrsText,
|
|
144
|
+
attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
|
|
145
|
+
valueRangeStart: contentStart
|
|
109
146
|
};
|
|
110
147
|
} else if (line.startsWith(codeBlock.fence)) {
|
|
148
|
+
const valueStart = codeBlock.valueRangeStart;
|
|
149
|
+
const valueEnd = Math.max(valueStart, lineStart - 1);
|
|
111
150
|
tokens.push({
|
|
112
151
|
type: 'code',
|
|
113
152
|
range: [codeBlock.start, lineStart],
|
|
114
153
|
text: codeBlock.content.join('\n'),
|
|
115
154
|
lang: codeBlock.lang,
|
|
116
|
-
attrs: codeBlock.attrs
|
|
155
|
+
attrs: codeBlock.attrs,
|
|
156
|
+
attrsRange: codeBlock.attrsRange,
|
|
157
|
+
valueRange: [valueStart, valueEnd]
|
|
117
158
|
});
|
|
118
159
|
codeBlock = null;
|
|
119
160
|
}
|
|
@@ -125,94 +166,110 @@ function scanTokens(text) {
|
|
|
125
166
|
continue;
|
|
126
167
|
}
|
|
127
168
|
|
|
128
|
-
// Prefix declarations
|
|
129
169
|
const prefixMatch = line.match(/^\[([^\]]+)\]\s*\{:\s*([^}]+)\}/);
|
|
130
170
|
if (prefixMatch) {
|
|
131
171
|
tokens.push({ type: 'prefix', prefix: prefixMatch[1], iri: prefixMatch[2].trim() });
|
|
132
172
|
continue;
|
|
133
173
|
}
|
|
134
174
|
|
|
135
|
-
// Headings
|
|
136
175
|
const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
137
176
|
if (headingMatch) {
|
|
177
|
+
const attrs = headingMatch[3] || null;
|
|
178
|
+
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
179
|
+
const afterHashes = headingMatch[1].length;
|
|
180
|
+
const ws = line.substring(afterHashes).match(/^\s+/)?.[0]?.length || 0;
|
|
181
|
+
const valueStartInLine = afterHashes + ws;
|
|
182
|
+
const valueEndInLine = valueStartInLine + headingMatch[2].length;
|
|
138
183
|
tokens.push({
|
|
139
184
|
type: 'heading',
|
|
140
185
|
depth: headingMatch[1].length,
|
|
141
|
-
range: [lineStart, pos],
|
|
186
|
+
range: [lineStart, pos - 1],
|
|
142
187
|
text: headingMatch[2].trim(),
|
|
143
|
-
attrs
|
|
188
|
+
attrs,
|
|
189
|
+
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
190
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
144
191
|
});
|
|
145
192
|
continue;
|
|
146
193
|
}
|
|
147
194
|
|
|
148
|
-
// Lists
|
|
149
195
|
const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
150
196
|
if (listMatch) {
|
|
197
|
+
const attrs = listMatch[4] || null;
|
|
198
|
+
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
199
|
+
const prefix = listMatch[1].length + listMatch[2].length;
|
|
200
|
+
const ws = line.substring(prefix).match(/^\s+/)?.[0]?.length || 0;
|
|
201
|
+
const valueStartInLine = prefix + ws;
|
|
202
|
+
const valueEndInLine = valueStartInLine + listMatch[3].length;
|
|
151
203
|
tokens.push({
|
|
152
204
|
type: 'list',
|
|
153
205
|
indent: listMatch[1].length,
|
|
154
|
-
range: [lineStart, pos],
|
|
206
|
+
range: [lineStart, pos - 1],
|
|
155
207
|
text: listMatch[3].trim(),
|
|
156
|
-
attrs
|
|
208
|
+
attrs,
|
|
209
|
+
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
210
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
157
211
|
});
|
|
158
212
|
continue;
|
|
159
213
|
}
|
|
160
214
|
|
|
161
|
-
// Blockquotes
|
|
162
215
|
const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
163
216
|
if (blockquoteMatch) {
|
|
217
|
+
const attrs = blockquoteMatch[2] || null;
|
|
218
|
+
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
219
|
+
const prefixMatch = line.match(/^>\s+/);
|
|
220
|
+
const valueStartInLine = prefixMatch ? prefixMatch[0].length : 2;
|
|
221
|
+
const valueEndInLine = valueStartInLine + blockquoteMatch[1].length;
|
|
164
222
|
tokens.push({
|
|
165
223
|
type: 'blockquote',
|
|
166
|
-
range: [lineStart, pos],
|
|
224
|
+
range: [lineStart, pos - 1],
|
|
167
225
|
text: blockquoteMatch[1].trim(),
|
|
168
|
-
attrs
|
|
226
|
+
attrs,
|
|
227
|
+
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
228
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
169
229
|
});
|
|
170
230
|
continue;
|
|
171
231
|
}
|
|
172
232
|
|
|
173
|
-
// Paragraphs
|
|
174
233
|
if (line.trim()) {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
attrs: paraMatch[2] || null
|
|
182
|
-
});
|
|
183
|
-
}
|
|
234
|
+
tokens.push({
|
|
235
|
+
type: 'para',
|
|
236
|
+
range: [lineStart, pos - 1],
|
|
237
|
+
text: line.trim(),
|
|
238
|
+
attrs: null
|
|
239
|
+
});
|
|
184
240
|
}
|
|
185
241
|
}
|
|
186
242
|
|
|
187
243
|
return tokens;
|
|
188
244
|
}
|
|
189
245
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
const spans = [];
|
|
246
|
+
function extractInlineCarriers(text, baseOffset = 0) {
|
|
247
|
+
const carriers = [];
|
|
193
248
|
let pos = 0;
|
|
194
249
|
|
|
195
250
|
while (pos < text.length) {
|
|
196
251
|
const bracketStart = text.indexOf('[', pos);
|
|
197
|
-
if (bracketStart === -1)
|
|
198
|
-
if (pos < text.length) spans.push({ type: 'text', text: text.substring(pos) });
|
|
199
|
-
break;
|
|
200
|
-
}
|
|
252
|
+
if (bracketStart === -1) break;
|
|
201
253
|
|
|
202
|
-
|
|
254
|
+
let bracketDepth = 1;
|
|
255
|
+
let bracketEnd = bracketStart + 1;
|
|
203
256
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
257
|
+
while (bracketEnd < text.length && bracketDepth > 0) {
|
|
258
|
+
if (text[bracketEnd] === '[') {
|
|
259
|
+
bracketDepth++;
|
|
260
|
+
} else if (text[bracketEnd] === ']') {
|
|
261
|
+
bracketDepth--;
|
|
262
|
+
}
|
|
263
|
+
bracketEnd++;
|
|
208
264
|
}
|
|
209
265
|
|
|
210
|
-
|
|
211
|
-
|
|
266
|
+
if (bracketDepth > 0) break;
|
|
267
|
+
|
|
268
|
+
const carrierText = text.substring(bracketStart + 1, bracketEnd - 1);
|
|
269
|
+
const valueRange = [baseOffset + bracketStart + 1, baseOffset + bracketEnd - 1];
|
|
270
|
+
let spanEnd = bracketEnd;
|
|
212
271
|
let url = null;
|
|
213
|
-
let attrs = null;
|
|
214
272
|
|
|
215
|
-
// Parse link destination
|
|
216
273
|
if (text[spanEnd] === '(') {
|
|
217
274
|
const parenEnd = text.indexOf(')', spanEnd);
|
|
218
275
|
if (parenEnd !== -1) {
|
|
@@ -221,200 +278,208 @@ function extractInlineValue(text, baseOffset = 0) {
|
|
|
221
278
|
}
|
|
222
279
|
}
|
|
223
280
|
|
|
224
|
-
|
|
281
|
+
let attrs = null;
|
|
282
|
+
let attrsRange = null;
|
|
225
283
|
const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
|
|
226
284
|
if (attrsMatch) {
|
|
227
285
|
attrs = `{${attrsMatch[1]}}`;
|
|
286
|
+
const braceIndex = attrsMatch[0].indexOf('{');
|
|
287
|
+
const absStart = baseOffset + spanEnd + (braceIndex >= 0 ? braceIndex : 0);
|
|
288
|
+
attrsRange = [absStart, absStart + attrs.length];
|
|
228
289
|
spanEnd += attrsMatch[0].length;
|
|
229
290
|
}
|
|
230
291
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
292
|
+
let carrierType = 'span';
|
|
293
|
+
let resourceIRI = null;
|
|
294
|
+
|
|
295
|
+
if (url) {
|
|
296
|
+
if (url.startsWith('=')) {
|
|
297
|
+
pos = spanEnd;
|
|
298
|
+
continue;
|
|
299
|
+
} else {
|
|
300
|
+
carrierType = 'link';
|
|
301
|
+
resourceIRI = url;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
carriers.push({
|
|
306
|
+
type: carrierType,
|
|
307
|
+
text: carrierText,
|
|
308
|
+
url: resourceIRI,
|
|
235
309
|
attrs: attrs,
|
|
310
|
+
attrsRange,
|
|
311
|
+
valueRange,
|
|
236
312
|
range: [baseOffset + bracketStart, baseOffset + spanEnd]
|
|
237
313
|
});
|
|
238
314
|
|
|
239
315
|
pos = spanEnd;
|
|
240
316
|
}
|
|
241
317
|
|
|
242
|
-
return
|
|
318
|
+
return carriers;
|
|
243
319
|
}
|
|
244
320
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
const blockId = hash([subject, ...expanded.map(e => JSON.stringify(e))].join('|'));
|
|
321
|
+
function createBlock(subject, types, predicates, entries, range, attrsRange, valueRange, carrierType, ctx) {
|
|
322
|
+
const expanded = {
|
|
323
|
+
subject,
|
|
324
|
+
types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
|
|
325
|
+
predicates: predicates.map(p => ({ iri: expandIRI(p.iri, ctx), form: p.form }))
|
|
326
|
+
};
|
|
327
|
+
const blockId = hash([subject, JSON.stringify(expanded)].join('|'));
|
|
254
328
|
return {
|
|
255
329
|
id: blockId,
|
|
256
330
|
range: { start: range[0], end: range[1] },
|
|
331
|
+
attrsRange: attrsRange ? { start: attrsRange[0], end: attrsRange[1] } : null,
|
|
332
|
+
valueRange: valueRange ? { start: valueRange[0], end: valueRange[1] } : null,
|
|
333
|
+
carrierType: carrierType || null,
|
|
257
334
|
subject,
|
|
258
|
-
|
|
335
|
+
types: expanded.types,
|
|
336
|
+
predicates: expanded.predicates,
|
|
337
|
+
entries: entries || [],
|
|
259
338
|
context: { ...ctx }
|
|
260
339
|
};
|
|
261
340
|
}
|
|
262
341
|
|
|
263
|
-
function
|
|
342
|
+
function quadIndexKey(subject, predicate, object) {
|
|
343
|
+
const objKey = object.termType === 'Literal'
|
|
344
|
+
? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
|
|
345
|
+
: JSON.stringify({ t: object.termType, v: object.value });
|
|
346
|
+
return JSON.stringify([subject.value, predicate.value, objKey]);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
|
|
264
350
|
if (!subject || !predicate || !object) return;
|
|
265
351
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
266
352
|
quads.push(quad);
|
|
267
|
-
quadIndex.set(
|
|
353
|
+
quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), meta ? { blockId, ...meta } : { blockId });
|
|
268
354
|
}
|
|
269
355
|
|
|
270
|
-
function
|
|
356
|
+
function createLiteral(value, datatype, language, context, dataFactory) {
|
|
271
357
|
if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
|
|
272
358
|
if (language) return dataFactory.literal(value, language);
|
|
273
359
|
return dataFactory.literal(value);
|
|
274
360
|
}
|
|
275
361
|
|
|
276
|
-
function processAnnotation(
|
|
277
|
-
if (
|
|
278
|
-
|
|
279
|
-
const ann = parseAnnotation(token.attrs);
|
|
280
|
-
const originalSubject = state.currentSubject;
|
|
281
|
-
|
|
282
|
-
// Handle subject declaration
|
|
283
|
-
if (ann.subject === 'RESET') {
|
|
362
|
+
function processAnnotation(carrier, sem, state) {
|
|
363
|
+
if (sem.subject === 'RESET') {
|
|
284
364
|
state.currentSubject = null;
|
|
285
365
|
return;
|
|
286
366
|
}
|
|
287
|
-
if (ann.subject) {
|
|
288
|
-
state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
|
|
289
|
-
}
|
|
290
367
|
|
|
291
|
-
|
|
368
|
+
const previousSubject = state.currentSubject;
|
|
369
|
+
let newSubject = sem.subject ? state.df.namedNode(expandIRI(sem.subject, state.ctx)) : null;
|
|
370
|
+
if (newSubject) state.currentSubject = newSubject;
|
|
292
371
|
|
|
293
|
-
const
|
|
294
|
-
|
|
295
|
-
originalSubject;
|
|
372
|
+
const S = state.currentSubject;
|
|
373
|
+
if (!S) return;
|
|
296
374
|
|
|
297
|
-
const block = createBlock(
|
|
375
|
+
const block = createBlock(S.value, sem.types, sem.predicates, sem.entries, carrier.range, carrier.attrsRange || null, carrier.valueRange || null, carrier.type || null, state.ctx);
|
|
298
376
|
state.origin.blocks.set(block.id, block);
|
|
299
377
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
state.listContext.types.forEach(typeIRI => {
|
|
303
|
-
emitQuad(state.quads, state.origin.quadIndex, block.id,
|
|
304
|
-
targetSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
305
|
-
state.df.namedNode(typeIRI), state.df);
|
|
306
|
-
});
|
|
307
|
-
}
|
|
378
|
+
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
379
|
+
const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
308
380
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
state.df.namedNode(expandIRI(e.classIRI, state.ctx)), state.df);
|
|
317
|
-
} else if (e.kind === 'property' && e.predicate) {
|
|
318
|
-
const predicate = state.df.namedNode(expandIRI(e.predicate, state.ctx));
|
|
319
|
-
let object;
|
|
320
|
-
|
|
321
|
-
if (token.url) {
|
|
322
|
-
object = state.df.namedNode(expandIRI(token.url, state.ctx));
|
|
323
|
-
} else if (ann.subject && !token.url) {
|
|
324
|
-
if (e.direction === 'reverse') {
|
|
325
|
-
object = targetSubject;
|
|
326
|
-
} else {
|
|
327
|
-
object = token.type === 'code' ?
|
|
328
|
-
createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df) :
|
|
329
|
-
targetSubject;
|
|
330
|
-
}
|
|
331
|
-
} else {
|
|
332
|
-
object = createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df);
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
const subject = e.direction === 'reverse' ? object :
|
|
336
|
-
(ann.subject && !token.url && token.type !== 'code') ? originalSubject : targetSubject;
|
|
337
|
-
const objectRef = e.direction === 'reverse' ? originalSubject : object;
|
|
381
|
+
sem.types.forEach(t => {
|
|
382
|
+
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
383
|
+
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
384
|
+
const typeSubject = O || S;
|
|
385
|
+
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
386
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
|
|
387
|
+
});
|
|
338
388
|
|
|
339
|
-
|
|
389
|
+
sem.predicates.forEach(pred => {
|
|
390
|
+
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
391
|
+
const token = `${pred.form}${pred.iri}`;
|
|
392
|
+
|
|
393
|
+
if (pred.form === '') {
|
|
394
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
395
|
+
} else if (pred.form === '?') {
|
|
396
|
+
if (newSubject) {
|
|
397
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, previousSubject, P, newSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
398
|
+
} else if (O) {
|
|
399
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
400
|
+
}
|
|
401
|
+
} else if (pred.form === '^?') {
|
|
402
|
+
if (newSubject) {
|
|
403
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, newSubject, P, previousSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
404
|
+
} else if (O) {
|
|
405
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
406
|
+
}
|
|
340
407
|
}
|
|
341
408
|
});
|
|
342
409
|
}
|
|
343
410
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
if (!token.attrs || nextToken?.type !== 'list') return false;
|
|
411
|
+
function processListContext(contextSem, listTokens, state, contextSubject = null) {
|
|
412
|
+
if (!contextSubject) contextSubject = state.currentSubject;
|
|
347
413
|
|
|
348
|
-
|
|
349
|
-
|
|
414
|
+
listTokens.forEach(listToken => {
|
|
415
|
+
const carriers = extractInlineCarriers(listToken.text, listToken.range[0]);
|
|
416
|
+
let itemSubject = null;
|
|
417
|
+
let itemSubjectCarrier = null;
|
|
350
418
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
419
|
+
if (listToken.attrs) {
|
|
420
|
+
const itemSem = parseSemanticBlock(listToken.attrs);
|
|
421
|
+
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
422
|
+
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
423
|
+
itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
|
|
424
|
+
}
|
|
355
425
|
}
|
|
356
|
-
|
|
357
|
-
|
|
426
|
+
|
|
427
|
+
if (!itemSubject) {
|
|
428
|
+
for (const carrier of carriers) {
|
|
429
|
+
if (carrier.attrs) {
|
|
430
|
+
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
431
|
+
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
432
|
+
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
433
|
+
itemSubjectCarrier = carrier;
|
|
434
|
+
break;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
358
438
|
}
|
|
359
|
-
});
|
|
360
|
-
return true;
|
|
361
|
-
}
|
|
362
439
|
|
|
363
|
-
|
|
364
|
-
const ann = parseAnnotation(token.attrs);
|
|
365
|
-
const originalSubject = state.currentSubject;
|
|
440
|
+
if (!itemSubject) return;
|
|
366
441
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
442
|
+
contextSem.types.forEach(t => {
|
|
443
|
+
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
444
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandIRI(typeIRI, state.ctx)), state.df);
|
|
445
|
+
});
|
|
370
446
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
state.
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
const object = createLiteralValue(token.text, ann.datatype, ann.language, state.ctx, state.df);
|
|
380
|
-
emitQuad(state.quads, state.origin.quadIndex, 'list-item',
|
|
381
|
-
state.currentSubject, predicate, object, state.df);
|
|
382
|
-
}
|
|
383
|
-
});
|
|
447
|
+
contextSem.predicates.forEach(pred => {
|
|
448
|
+
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
449
|
+
if (pred.form === '^' || pred.form === '^?') {
|
|
450
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, P, contextSubject, state.df);
|
|
451
|
+
} else {
|
|
452
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context', contextSubject, P, itemSubject, state.df);
|
|
453
|
+
}
|
|
454
|
+
});
|
|
384
455
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
if (
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
emitQuad(state.quads, state.origin.quadIndex, 'list-context',
|
|
393
|
-
originalSubject, predicate, state.currentSubject, state.df);
|
|
456
|
+
const prevSubject = state.currentSubject;
|
|
457
|
+
state.currentSubject = itemSubject;
|
|
458
|
+
|
|
459
|
+
if (listToken.attrs) {
|
|
460
|
+
const itemSem = parseSemanticBlock(listToken.attrs);
|
|
461
|
+
const carrier = { type: 'list', text: listToken.text, range: listToken.range, attrsRange: listToken.attrsRange || null, valueRange: listToken.valueRange || null };
|
|
462
|
+
processAnnotation(carrier, itemSem, state);
|
|
394
463
|
}
|
|
395
|
-
}
|
|
396
464
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
state.df.namedNode(expandIRI(type, state.ctx)), state.df);
|
|
465
|
+
carriers.forEach(carrier => {
|
|
466
|
+
if (carrier.attrs) {
|
|
467
|
+
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
468
|
+
processAnnotation(carrier, itemSem, state);
|
|
469
|
+
}
|
|
403
470
|
});
|
|
404
|
-
}
|
|
405
471
|
|
|
406
|
-
|
|
472
|
+
state.currentSubject = prevSubject;
|
|
473
|
+
});
|
|
407
474
|
}
|
|
408
475
|
|
|
409
|
-
// Main parsing function
|
|
410
476
|
export function parse(text, options = {}) {
|
|
411
477
|
const state = {
|
|
412
478
|
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
413
479
|
df: options.dataFactory || DataFactory,
|
|
414
480
|
quads: [],
|
|
415
481
|
origin: { blocks: new Map(), quadIndex: new Map() },
|
|
416
|
-
currentSubject: null
|
|
417
|
-
listContext: null
|
|
482
|
+
currentSubject: null
|
|
418
483
|
};
|
|
419
484
|
|
|
420
485
|
const tokens = scanTokens(text);
|
|
@@ -422,122 +487,494 @@ export function parse(text, options = {}) {
|
|
|
422
487
|
|
|
423
488
|
for (let i = 0; i < tokens.length; i++) {
|
|
424
489
|
const token = tokens[i];
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
490
|
+
|
|
491
|
+
if (token.type === 'heading' && token.attrs) {
|
|
492
|
+
const sem = parseSemanticBlock(token.attrs);
|
|
493
|
+
const carrier = { type: 'heading', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
494
|
+
processAnnotation(carrier, sem, state);
|
|
495
|
+
} else if (token.type === 'code' && token.attrs) {
|
|
496
|
+
const sem = parseSemanticBlock(token.attrs);
|
|
497
|
+
const carrier = { type: 'code', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
498
|
+
processAnnotation(carrier, sem, state);
|
|
499
|
+
} else if (token.type === 'blockquote' && token.attrs) {
|
|
500
|
+
const sem = parseSemanticBlock(token.attrs);
|
|
501
|
+
const carrier = { type: 'blockquote', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
502
|
+
processAnnotation(carrier, sem, state);
|
|
503
|
+
} else if (token.type === 'para') {
|
|
504
|
+
// Check for standalone subject declarations: {=iri} on its own line
|
|
505
|
+
const standaloneSubjectMatch = token.text.match(/^\s*\{=(.*?)\}\s*$/);
|
|
506
|
+
if (standaloneSubjectMatch) {
|
|
507
|
+
const sem = parseSemanticBlock(`{=${standaloneSubjectMatch[1]}}`);
|
|
508
|
+
const attrsStart = token.range[0] + token.text.indexOf('{=');
|
|
509
|
+
const attrsEnd = attrsStart + (standaloneSubjectMatch[1] ? standaloneSubjectMatch[1].length : 0);
|
|
510
|
+
processAnnotation({ type: 'standalone', text: '', range: token.range, attrsRange: [attrsStart, attrsEnd], valueRange: null }, sem, state);
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
const followingLists = [];
|
|
514
|
+
let j = i + 1;
|
|
515
|
+
while (j < tokens.length && tokens[j].type === 'list') {
|
|
516
|
+
followingLists.push(tokens[j]);
|
|
517
|
+
j++;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
const contextMatch = token.text.match(/^(.+?)\s*\{([^}]+)\}$/);
|
|
521
|
+
if (contextMatch && followingLists.length > 0) {
|
|
522
|
+
const contextSem = parseSemanticBlock(`{${contextMatch[2]}}`);
|
|
523
|
+
let contextSubject = state.currentSubject;
|
|
524
|
+
|
|
525
|
+
// Always look for the most recent heading subject for context
|
|
526
|
+
for (let k = i - 1; k >= 0; k--) {
|
|
527
|
+
const prevToken = tokens[k];
|
|
528
|
+
if (prevToken.type === 'heading' && prevToken.attrs) {
|
|
529
|
+
const headingSem = parseSemanticBlock(prevToken.attrs);
|
|
530
|
+
if (headingSem.subject) {
|
|
531
|
+
contextSubject = state.df.namedNode(expandIRI(headingSem.subject, state.ctx));
|
|
532
|
+
break;
|
|
533
|
+
}
|
|
453
534
|
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
processListContext(contextSem, followingLists, state, contextSubject);
|
|
538
|
+
i = j - 1;
|
|
539
|
+
continue;
|
|
540
|
+
}
|
|
454
541
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
542
|
+
const carriers = extractInlineCarriers(token.text, token.range[0]);
|
|
543
|
+
carriers.forEach(carrier => {
|
|
544
|
+
if (carrier.attrs) {
|
|
545
|
+
const sem = parseSemanticBlock(carrier.attrs);
|
|
546
|
+
processAnnotation(carrier, sem, state);
|
|
458
547
|
}
|
|
459
|
-
|
|
460
|
-
case 'list':
|
|
461
|
-
if (state.listContext) processListItem(token, state);
|
|
462
|
-
break;
|
|
463
|
-
case 'blockquote':
|
|
464
|
-
if (state.currentSubject) processAnnotation(token, state, token.text);
|
|
465
|
-
break;
|
|
548
|
+
});
|
|
466
549
|
}
|
|
467
550
|
}
|
|
468
551
|
|
|
469
552
|
return { quads: state.quads, origin: state.origin, context: state.ctx };
|
|
470
553
|
}
|
|
471
554
|
|
|
472
|
-
function shortenIRI(iri, ctx) {
|
|
555
|
+
export function shortenIRI(iri, ctx) {
|
|
473
556
|
if (!iri || !iri.startsWith('http')) return iri;
|
|
474
|
-
|
|
475
|
-
// Check @vocab first
|
|
476
|
-
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) {
|
|
477
|
-
return iri.substring(ctx['@vocab'].length);
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
// Check prefixes
|
|
557
|
+
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
|
|
481
558
|
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
482
559
|
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
483
560
|
return prefix + ':' + iri.substring(namespace.length);
|
|
484
561
|
}
|
|
485
562
|
}
|
|
486
|
-
|
|
487
|
-
// No prefix found, return full IRI
|
|
488
563
|
return iri;
|
|
489
564
|
}
|
|
490
565
|
|
|
566
|
+
const serializeHelpers = {
|
|
567
|
+
readAttrsSpan: (block, text) => {
|
|
568
|
+
if (!block?.attrsRange) return null;
|
|
569
|
+
const { start, end } = block.attrsRange;
|
|
570
|
+
return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end > start)
|
|
571
|
+
? { start, end, text: text.substring(start, end) }
|
|
572
|
+
: null;
|
|
573
|
+
},
|
|
574
|
+
|
|
575
|
+
readValueSpan: (block, text) => {
|
|
576
|
+
if (!block?.valueRange) return null;
|
|
577
|
+
const { start, end } = block.valueRange;
|
|
578
|
+
return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
|
|
579
|
+
? { start, end, text: text.substring(start, end) }
|
|
580
|
+
: null;
|
|
581
|
+
},
|
|
582
|
+
|
|
583
|
+
normalizeAttrsTokens: (attrsText) => {
|
|
584
|
+
const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
|
|
585
|
+
return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
|
|
586
|
+
},
|
|
587
|
+
|
|
588
|
+
blockTokensFromEntries: (block) => block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null,
|
|
589
|
+
|
|
590
|
+
removeEntryAt: (block, entryIndex) => {
|
|
591
|
+
if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
|
|
592
|
+
return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
|
|
593
|
+
},
|
|
594
|
+
|
|
595
|
+
replaceLangDatatypeEntries: (block, lit, ctx) => {
|
|
596
|
+
if (!block?.entries) return null;
|
|
597
|
+
const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
|
|
598
|
+
const extras = [];
|
|
599
|
+
if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
|
|
600
|
+
const dt = lit?.datatype?.value;
|
|
601
|
+
if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
602
|
+
extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
|
|
603
|
+
}
|
|
604
|
+
return [...filtered, ...extras];
|
|
605
|
+
},
|
|
606
|
+
|
|
607
|
+
writeAttrsTokens: (tokens) => `{${tokens.join(' ').trim()}}`,
|
|
608
|
+
|
|
609
|
+
removeOneToken: (tokens, matchFn) => {
|
|
610
|
+
const i = tokens.findIndex(matchFn);
|
|
611
|
+
return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
|
|
612
|
+
},
|
|
613
|
+
|
|
614
|
+
normalizeQuad: (q) => {
|
|
615
|
+
if (!q) return null;
|
|
616
|
+
const { subject, predicate, object } = q;
|
|
617
|
+
if (object?.termType === 'Literal') {
|
|
618
|
+
const language = typeof object.language === 'string' ? object.language : '';
|
|
619
|
+
const datatype = object.datatype?.value || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' };
|
|
620
|
+
return { ...q, subject, predicate, object: { ...object, language, datatype } };
|
|
621
|
+
}
|
|
622
|
+
return { ...q, subject, predicate, object };
|
|
623
|
+
},
|
|
624
|
+
|
|
625
|
+
quadToKeyForOrigin: (q) => {
|
|
626
|
+
const nq = serializeHelpers.normalizeQuad(q);
|
|
627
|
+
return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
|
|
628
|
+
},
|
|
629
|
+
|
|
630
|
+
parseQuadIndexKey: (key) => {
|
|
631
|
+
try {
|
|
632
|
+
const [s, p, objKey] = JSON.parse(key);
|
|
633
|
+
return { s, p, o: JSON.parse(objKey) };
|
|
634
|
+
} catch {
|
|
635
|
+
return null;
|
|
636
|
+
}
|
|
637
|
+
},
|
|
638
|
+
|
|
639
|
+
sanitizeCarrierValueForBlock: (block, raw) => {
|
|
640
|
+
const s = String(raw ?? '');
|
|
641
|
+
const t = block?.carrierType;
|
|
642
|
+
if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
643
|
+
const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
|
|
644
|
+
return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
|
|
645
|
+
}
|
|
646
|
+
};
|
|
647
|
+
|
|
491
648
|
export function serialize({ text, diff, origin, options = {} }) {
|
|
492
|
-
if (!diff || (!diff.add?.length && !diff.delete?.length))
|
|
649
|
+
if (!diff || (!diff.add?.length && !diff.delete?.length)) {
|
|
650
|
+
const reparsed = parse(text, { context: options.context || {} });
|
|
651
|
+
return { text, origin: reparsed.origin };
|
|
652
|
+
}
|
|
493
653
|
|
|
654
|
+
const base = origin || parse(text, { context: options.context || {} }).origin;
|
|
494
655
|
let result = text;
|
|
495
656
|
const edits = [];
|
|
496
657
|
const ctx = options.context || {};
|
|
497
658
|
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
const
|
|
501
|
-
|
|
502
|
-
if (
|
|
659
|
+
const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
|
|
660
|
+
for (const [k, entry] of base?.quadIndex || []) {
|
|
661
|
+
const parsed = serializeHelpers.parseQuadIndexKey(k);
|
|
662
|
+
if (!parsed) continue;
|
|
663
|
+
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
664
|
+
if (parsed.o?.t !== 'Literal') continue;
|
|
665
|
+
if (parsed.o?.v !== literalValue) continue;
|
|
666
|
+
return entry;
|
|
667
|
+
}
|
|
668
|
+
return null;
|
|
669
|
+
};
|
|
670
|
+
|
|
671
|
+
const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
|
|
672
|
+
const out = [];
|
|
673
|
+
for (const [k, entry] of base?.quadIndex || []) {
|
|
674
|
+
const parsed = serializeHelpers.parseQuadIndexKey(k);
|
|
675
|
+
if (!parsed) continue;
|
|
676
|
+
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
677
|
+
if (parsed.o?.t !== 'Literal') continue;
|
|
678
|
+
const blockId = entry?.blockId || entry;
|
|
679
|
+
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
680
|
+
if (block) out.push({ block, entry, obj: parsed.o });
|
|
681
|
+
}
|
|
682
|
+
return out;
|
|
683
|
+
};
|
|
684
|
+
|
|
685
|
+
const objectSignature = (o) => {
|
|
686
|
+
if (!o) return '';
|
|
687
|
+
if (o.termType === 'Literal') {
|
|
688
|
+
return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
|
|
689
|
+
}
|
|
690
|
+
return JSON.stringify({ t: o.termType, v: o.value });
|
|
691
|
+
};
|
|
692
|
+
|
|
693
|
+
const anchors = new Map();
|
|
694
|
+
for (const q0 of diff.delete || []) {
|
|
695
|
+
const q = serializeHelpers.normalizeQuad(q0);
|
|
696
|
+
if (!q) continue;
|
|
697
|
+
if (!q?.subject || !q?.object || !q?.predicate) continue;
|
|
698
|
+
const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
|
|
699
|
+
const qk = serializeHelpers.quadToKeyForOrigin(q);
|
|
700
|
+
const entry = qk ? base?.quadIndex?.get(qk) : null;
|
|
701
|
+
const blockId = entry?.blockId || entry;
|
|
702
|
+
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
703
|
+
if (!block?.attrsRange) continue;
|
|
704
|
+
anchors.set(key, { block, entry });
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
const addBySP = new Map();
|
|
708
|
+
for (const q0 of diff.add || []) {
|
|
709
|
+
const q = serializeHelpers.normalizeQuad(q0);
|
|
710
|
+
if (!q) continue;
|
|
711
|
+
if (!q?.subject || !q?.predicate || !q?.object) continue;
|
|
712
|
+
const k = JSON.stringify([q.subject.value, q.predicate.value]);
|
|
713
|
+
const list = addBySP.get(k) || [];
|
|
714
|
+
list.push(q);
|
|
715
|
+
addBySP.set(k, list);
|
|
716
|
+
}
|
|
503
717
|
|
|
504
|
-
|
|
505
|
-
|
|
718
|
+
const consumedAdds = new Set();
|
|
719
|
+
const literalUpdates = [];
|
|
720
|
+
for (const dq0 of diff.delete || []) {
|
|
721
|
+
const dq = serializeHelpers.normalizeQuad(dq0);
|
|
722
|
+
if (!dq) continue;
|
|
723
|
+
if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
|
|
724
|
+
if (dq.object.termType !== 'Literal') continue;
|
|
725
|
+
const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
|
|
726
|
+
const candidates = addBySP.get(k) || [];
|
|
727
|
+
const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(serializeHelpers.quadToKeyForOrigin(x)));
|
|
728
|
+
if (!aq) continue;
|
|
729
|
+
|
|
730
|
+
const dqk = serializeHelpers.quadToKeyForOrigin(dq);
|
|
731
|
+
let entry = dqk ? base?.quadIndex?.get(dqk) : null;
|
|
732
|
+
if (!entry && dq.object?.termType === 'Literal') {
|
|
733
|
+
entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
|
|
734
|
+
}
|
|
735
|
+
const blockId = entry?.blockId || entry;
|
|
736
|
+
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
737
|
+
if (!block) continue;
|
|
738
|
+
|
|
739
|
+
literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
|
|
740
|
+
consumedAdds.add(serializeHelpers.quadToKeyForOrigin(aq));
|
|
741
|
+
}
|
|
506
742
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
const deleteStart = before === '\n' ? start - 1 : start;
|
|
512
|
-
const deleteEnd = after === '\n' ? end + 1 : end;
|
|
743
|
+
for (const q0 of diff.add || []) {
|
|
744
|
+
const quad = serializeHelpers.normalizeQuad(q0);
|
|
745
|
+
if (!quad || quad.object?.termType !== 'Literal') continue;
|
|
746
|
+
if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) continue;
|
|
513
747
|
|
|
514
|
-
|
|
748
|
+
const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
|
|
749
|
+
if (matches.length === 0) continue;
|
|
750
|
+
|
|
751
|
+
const desiredLang = quad.object.language || '';
|
|
752
|
+
const sameLang = matches.filter(m => {
|
|
753
|
+
const entries = m.block?.entries || [];
|
|
754
|
+
const langEntry = entries.find(e => e.kind === 'language');
|
|
755
|
+
const lang = langEntry?.language || '';
|
|
756
|
+
return lang === desiredLang;
|
|
515
757
|
});
|
|
758
|
+
|
|
759
|
+
if (sameLang.length !== 1) continue;
|
|
760
|
+
const target = sameLang[0].block;
|
|
761
|
+
const vSpan = serializeHelpers.readValueSpan(target, text);
|
|
762
|
+
if (!vSpan) continue;
|
|
763
|
+
|
|
764
|
+
const newValue = serializeHelpers.sanitizeCarrierValueForBlock(target, quad.object.value);
|
|
765
|
+
edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
|
|
766
|
+
|
|
767
|
+
const aSpan = serializeHelpers.readAttrsSpan(target, text);
|
|
768
|
+
if (aSpan && target?.entries?.length) {
|
|
769
|
+
const nextEntries = serializeHelpers.replaceLangDatatypeEntries(target, quad.object, ctx);
|
|
770
|
+
if (nextEntries) {
|
|
771
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
772
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
consumedAdds.add(quad);
|
|
516
777
|
}
|
|
517
778
|
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
779
|
+
for (const u of literalUpdates) {
|
|
780
|
+
const span = serializeHelpers.readValueSpan(u.block, text);
|
|
781
|
+
if (span) {
|
|
782
|
+
const newValue = serializeHelpers.sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
|
|
783
|
+
edits.push({ start: span.start, end: span.end, text: newValue });
|
|
784
|
+
}
|
|
521
785
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
786
|
+
const aSpan = serializeHelpers.readAttrsSpan(u.block, text);
|
|
787
|
+
if (aSpan) {
|
|
788
|
+
if (u.block?.entries?.length) {
|
|
789
|
+
const nextEntries = serializeHelpers.replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
|
|
790
|
+
if (nextEntries) {
|
|
791
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
792
|
+
if (nextTokens.length === 0) {
|
|
793
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
794
|
+
} else {
|
|
795
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
} else {
|
|
799
|
+
const tokens = serializeHelpers.normalizeAttrsTokens(aSpan.text);
|
|
800
|
+
const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object);
|
|
801
|
+
if (updated.join(' ') !== tokens.join(' ')) {
|
|
802
|
+
if (updated.length === 0) {
|
|
803
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
804
|
+
} else {
|
|
805
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(updated) });
|
|
806
|
+
}
|
|
526
807
|
}
|
|
527
808
|
}
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
const updateAttrsDatatypeLang = (tokens, newLit) => {
|
|
813
|
+
const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
|
|
814
|
+
if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
|
|
815
|
+
const dt = newLit?.datatype?.value;
|
|
816
|
+
if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
817
|
+
return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
|
|
818
|
+
}
|
|
819
|
+
return predicatesAndTypes;
|
|
820
|
+
};
|
|
528
821
|
|
|
529
|
-
|
|
530
|
-
|
|
822
|
+
if (diff.delete) {
|
|
823
|
+
diff.delete.forEach(q0 => {
|
|
824
|
+
const quad = serializeHelpers.normalizeQuad(q0);
|
|
825
|
+
if (!quad) return;
|
|
826
|
+
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
531
827
|
|
|
532
828
|
if (quad.object.termType === 'Literal') {
|
|
533
|
-
|
|
829
|
+
const isUpdated = literalUpdates.some(u =>
|
|
830
|
+
u.deleteQuad.subject.value === quad.subject.value &&
|
|
831
|
+
u.deleteQuad.predicate.value === quad.predicate.value &&
|
|
832
|
+
u.deleteQuad.object.value === quad.object.value
|
|
833
|
+
);
|
|
834
|
+
if (isUpdated) return;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
const key = serializeHelpers.quadToKeyForOrigin(quad);
|
|
838
|
+
let entry = key ? base?.quadIndex?.get(key) : null;
|
|
839
|
+
if (!entry && quad.object?.termType === 'Literal') {
|
|
840
|
+
entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
|
|
841
|
+
}
|
|
842
|
+
const blockId = entry?.blockId || entry;
|
|
843
|
+
if (!blockId) return;
|
|
844
|
+
const block = base?.blocks?.get(blockId);
|
|
845
|
+
const span = serializeHelpers.readAttrsSpan(block, text);
|
|
846
|
+
if (!span) return;
|
|
847
|
+
|
|
848
|
+
if (entry?.entryIndex != null && block?.entries?.length) {
|
|
849
|
+
const nextEntries = serializeHelpers.removeEntryAt(block, entry.entryIndex);
|
|
850
|
+
if (!nextEntries) return;
|
|
851
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
852
|
+
if (nextTokens.length === 0) {
|
|
853
|
+
edits.push({ start: span.start, end: span.end, text: '{}' });
|
|
854
|
+
} else {
|
|
855
|
+
edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
|
|
856
|
+
}
|
|
857
|
+
return;
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
const tokens = serializeHelpers.normalizeAttrsTokens(span.text);
|
|
861
|
+
let updated = tokens;
|
|
862
|
+
let removed = false;
|
|
863
|
+
|
|
864
|
+
if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
|
|
865
|
+
const expectedType = entry.expandedType || quad.object.value;
|
|
866
|
+
({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
|
|
867
|
+
if (!t.startsWith('.')) return false;
|
|
868
|
+
const raw = t.slice(1);
|
|
869
|
+
return expandIRI(raw, ctx) === expectedType;
|
|
870
|
+
}));
|
|
534
871
|
} else {
|
|
535
|
-
|
|
872
|
+
const expectedPred = entry?.expandedPredicate || quad.predicate.value;
|
|
873
|
+
const expectedForm = entry?.form;
|
|
874
|
+
({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
|
|
875
|
+
const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
|
|
876
|
+
if (!m) return false;
|
|
877
|
+
const form = m[1] || '';
|
|
878
|
+
const raw = m[2];
|
|
879
|
+
if (expectedForm != null && form !== expectedForm) return false;
|
|
880
|
+
return expandIRI(raw, ctx) === expectedPred;
|
|
881
|
+
}));
|
|
536
882
|
}
|
|
537
883
|
|
|
538
|
-
|
|
884
|
+
if (!removed) return;
|
|
885
|
+
|
|
886
|
+
if (updated.length === 0) {
|
|
887
|
+
edits.push({ start: span.start, end: span.end, text: '{}' });
|
|
888
|
+
return;
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
const newAttrs = serializeHelpers.writeAttrsTokens(updated);
|
|
892
|
+
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
893
|
+
});
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
if (diff.add) {
|
|
897
|
+
diff.add.forEach(q0 => {
|
|
898
|
+
const quad = serializeHelpers.normalizeQuad(q0);
|
|
899
|
+
if (!quad) return;
|
|
900
|
+
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
901
|
+
|
|
902
|
+
if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) return;
|
|
903
|
+
|
|
904
|
+
const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
|
|
905
|
+
const anchored = anchors.get(anchorKey) || null;
|
|
906
|
+
let targetBlock = anchored?.block || null;
|
|
907
|
+
|
|
908
|
+
if (!targetBlock) {
|
|
909
|
+
for (const [, block] of base?.blocks || []) {
|
|
910
|
+
if (block.subject === quad.subject.value && block.attrsRange) {
|
|
911
|
+
targetBlock = block;
|
|
912
|
+
break;
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
|
|
918
|
+
if (!targetBlock) {
|
|
919
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
920
|
+
if (quad.object.termType === 'Literal') {
|
|
921
|
+
const value = String(quad.object.value ?? '');
|
|
922
|
+
let ann = predShort;
|
|
923
|
+
if (quad.object.language) ann += ` @${quad.object.language}`;
|
|
924
|
+
else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
925
|
+
ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
|
|
926
|
+
}
|
|
927
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
|
|
928
|
+
} else {
|
|
929
|
+
const full = quad.object.value;
|
|
930
|
+
const label = shortenIRI(full, ctx);
|
|
931
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
|
|
932
|
+
}
|
|
933
|
+
return;
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
937
|
+
if (quad.object.termType === 'Literal') {
|
|
938
|
+
const value = String(quad.object.value ?? '');
|
|
939
|
+
let ann = predShort;
|
|
940
|
+
if (quad.object.language) ann += ` @${quad.object.language}`;
|
|
941
|
+
else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
942
|
+
ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
|
|
943
|
+
}
|
|
944
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
|
|
945
|
+
return;
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
if (quad.object.termType === 'NamedNode') {
|
|
949
|
+
const full = quad.object.value;
|
|
950
|
+
const label = shortenIRI(full, ctx);
|
|
951
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
|
|
952
|
+
return;
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
const span = serializeHelpers.readAttrsSpan(targetBlock, text);
|
|
957
|
+
if (!span) return;
|
|
958
|
+
const tokens = serializeHelpers.blockTokensFromEntries(targetBlock) || serializeHelpers.normalizeAttrsTokens(span.text);
|
|
959
|
+
|
|
960
|
+
if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
|
|
961
|
+
const typeShort = shortenIRI(quad.object.value, ctx);
|
|
962
|
+
const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
|
|
963
|
+
if (!typeToken) return;
|
|
964
|
+
if (tokens.includes(typeToken)) return;
|
|
965
|
+
const updated = [...tokens, typeToken];
|
|
966
|
+
edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
|
|
967
|
+
return;
|
|
968
|
+
}
|
|
539
969
|
|
|
540
|
-
|
|
970
|
+
const form = anchored?.entry?.form;
|
|
971
|
+
if (form == null) return;
|
|
972
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
973
|
+
const predToken = `${form}${predShort}`;
|
|
974
|
+
if (!predToken) return;
|
|
975
|
+
if (tokens.includes(predToken)) return;
|
|
976
|
+
const updated = [...tokens, predToken];
|
|
977
|
+
edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
|
|
541
978
|
});
|
|
542
979
|
}
|
|
543
980
|
|
|
@@ -546,7 +983,8 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
546
983
|
result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
|
|
547
984
|
});
|
|
548
985
|
|
|
549
|
-
|
|
986
|
+
const reparsed = parse(result, { context: options.context || {} });
|
|
987
|
+
return { text: result, origin: reparsed.origin };
|
|
550
988
|
}
|
|
551
989
|
|
|
552
|
-
export default { parse, serialize,
|
|
990
|
+
export default { parse, serialize, parseSemanticBlock, shortenIRI };
|