mdld-parse 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -27
- package/index.js +775 -201
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -24,9 +24,11 @@ function hash(str) {
|
|
|
24
24
|
return Math.abs(h).toString(16).slice(0, 12);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
// IRI Utilities
|
|
27
28
|
function expandIRI(term, ctx) {
|
|
28
|
-
if (
|
|
29
|
-
const
|
|
29
|
+
if (term == null) return null;
|
|
30
|
+
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
31
|
+
const t = raw.trim();
|
|
30
32
|
if (t.match(/^https?:/)) return t;
|
|
31
33
|
if (t.includes(':')) {
|
|
32
34
|
const [prefix, ref] = t.split(':', 2);
|
|
@@ -35,40 +37,100 @@ function expandIRI(term, ctx) {
|
|
|
35
37
|
return (ctx['@vocab'] || '') + t;
|
|
36
38
|
}
|
|
37
39
|
|
|
40
|
+
export function shortenIRI(iri, ctx) {
|
|
41
|
+
if (!iri || !iri.startsWith('http')) return iri;
|
|
42
|
+
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
|
|
43
|
+
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
44
|
+
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
45
|
+
return prefix + ':' + iri.substring(namespace.length);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return iri;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function processIRI(term, ctx, operation = 'expand') {
|
|
52
|
+
return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
|
|
53
|
+
}
|
|
54
|
+
|
|
38
55
|
function parseSemanticBlock(raw) {
|
|
39
56
|
try {
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
const
|
|
57
|
+
const src = String(raw || '').trim();
|
|
58
|
+
const cleaned = src.replace(/^\{|\}$/g, '').trim();
|
|
59
|
+
if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
60
|
+
|
|
61
|
+
const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
62
|
+
const re = /\S+/g;
|
|
63
|
+
let m;
|
|
64
|
+
while ((m = re.exec(cleaned)) !== null) {
|
|
65
|
+
const token = m[0];
|
|
66
|
+
const relStart = 1 + m.index;
|
|
67
|
+
const relEnd = relStart + token.length;
|
|
68
|
+
const entryIndex = result.entries.length;
|
|
45
69
|
|
|
46
|
-
for (const token of tokens) {
|
|
47
70
|
if (token === '=') {
|
|
48
71
|
result.subject = 'RESET';
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
result.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
result.
|
|
63
|
-
|
|
64
|
-
|
|
72
|
+
result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (token.startsWith('=')) {
|
|
77
|
+
const iri = token.substring(1);
|
|
78
|
+
result.subject = iri;
|
|
79
|
+
result.entries.push({ kind: 'subject', iri, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (token.startsWith('^^')) {
|
|
84
|
+
const datatype = token.substring(2);
|
|
85
|
+
if (!result.language) result.datatype = datatype;
|
|
86
|
+
result.entries.push({ kind: 'datatype', datatype, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (token.startsWith('@')) {
|
|
91
|
+
const language = token.substring(1);
|
|
92
|
+
result.language = language;
|
|
93
|
+
result.datatype = null;
|
|
94
|
+
result.entries.push({ kind: 'language', language, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (token.startsWith('.')) {
|
|
99
|
+
const classIRI = token.substring(1);
|
|
100
|
+
result.types.push({ iri: classIRI, entryIndex });
|
|
101
|
+
result.entries.push({ kind: 'type', iri: classIRI, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (token.startsWith('^?')) {
|
|
106
|
+
const iri = token.substring(2);
|
|
107
|
+
result.predicates.push({ iri, form: '^?', entryIndex });
|
|
108
|
+
result.entries.push({ kind: 'property', iri, form: '^?', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (token.startsWith('^')) {
|
|
113
|
+
const iri = token.substring(1);
|
|
114
|
+
result.predicates.push({ iri, form: '^', entryIndex });
|
|
115
|
+
result.entries.push({ kind: 'property', iri, form: '^', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
116
|
+
continue;
|
|
65
117
|
}
|
|
118
|
+
|
|
119
|
+
if (token.startsWith('?')) {
|
|
120
|
+
const iri = token.substring(1);
|
|
121
|
+
result.predicates.push({ iri, form: '?', entryIndex });
|
|
122
|
+
result.entries.push({ kind: 'property', iri, form: '?', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
result.predicates.push({ iri: token, form: '', entryIndex });
|
|
127
|
+
result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
66
128
|
}
|
|
67
129
|
|
|
68
130
|
return result;
|
|
69
131
|
} catch (error) {
|
|
70
132
|
console.error(`Error parsing semantic block ${raw}:`, error);
|
|
71
|
-
return { subject: null, types: [], predicates: [], datatype: null, language: null };
|
|
133
|
+
return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
72
134
|
}
|
|
73
135
|
}
|
|
74
136
|
|
|
@@ -86,20 +148,29 @@ function scanTokens(text) {
|
|
|
86
148
|
if (line.startsWith('```')) {
|
|
87
149
|
if (!codeBlock) {
|
|
88
150
|
const fence = line.match(/^(`{3,})(.*)/);
|
|
151
|
+
const attrsText = fence[2].match(/\{[^}]+\}/)?.[0] || null;
|
|
152
|
+
const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
|
|
153
|
+
const contentStart = lineStart + line.length + 1;
|
|
89
154
|
codeBlock = {
|
|
90
155
|
fence: fence[1],
|
|
91
156
|
start: lineStart,
|
|
92
157
|
content: [],
|
|
93
158
|
lang: fence[2].trim().split(/[\s{]/)[0],
|
|
94
|
-
attrs:
|
|
159
|
+
attrs: attrsText,
|
|
160
|
+
attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
|
|
161
|
+
valueRangeStart: contentStart
|
|
95
162
|
};
|
|
96
163
|
} else if (line.startsWith(codeBlock.fence)) {
|
|
164
|
+
const valueStart = codeBlock.valueRangeStart;
|
|
165
|
+
const valueEnd = Math.max(valueStart, lineStart - 1);
|
|
97
166
|
tokens.push({
|
|
98
167
|
type: 'code',
|
|
99
168
|
range: [codeBlock.start, lineStart],
|
|
100
169
|
text: codeBlock.content.join('\n'),
|
|
101
170
|
lang: codeBlock.lang,
|
|
102
|
-
attrs: codeBlock.attrs
|
|
171
|
+
attrs: codeBlock.attrs,
|
|
172
|
+
attrsRange: codeBlock.attrsRange,
|
|
173
|
+
valueRange: [valueStart, valueEnd]
|
|
103
174
|
});
|
|
104
175
|
codeBlock = null;
|
|
105
176
|
}
|
|
@@ -119,35 +190,58 @@ function scanTokens(text) {
|
|
|
119
190
|
|
|
120
191
|
const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
121
192
|
if (headingMatch) {
|
|
193
|
+
const attrs = headingMatch[3] || null;
|
|
194
|
+
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
195
|
+
const afterHashes = headingMatch[1].length;
|
|
196
|
+
const ws = line.substring(afterHashes).match(/^\s+/)?.[0]?.length || 0;
|
|
197
|
+
const valueStartInLine = afterHashes + ws;
|
|
198
|
+
const valueEndInLine = valueStartInLine + headingMatch[2].length;
|
|
122
199
|
tokens.push({
|
|
123
200
|
type: 'heading',
|
|
124
201
|
depth: headingMatch[1].length,
|
|
125
202
|
range: [lineStart, pos - 1],
|
|
126
203
|
text: headingMatch[2].trim(),
|
|
127
|
-
attrs
|
|
204
|
+
attrs,
|
|
205
|
+
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
206
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
128
207
|
});
|
|
129
208
|
continue;
|
|
130
209
|
}
|
|
131
210
|
|
|
132
211
|
const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
133
212
|
if (listMatch) {
|
|
213
|
+
const attrs = listMatch[4] || null;
|
|
214
|
+
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
215
|
+
const prefix = listMatch[1].length + listMatch[2].length;
|
|
216
|
+
const ws = line.substring(prefix).match(/^\s+/)?.[0]?.length || 0;
|
|
217
|
+
const valueStartInLine = prefix + ws;
|
|
218
|
+
const valueEndInLine = valueStartInLine + listMatch[3].length;
|
|
134
219
|
tokens.push({
|
|
135
220
|
type: 'list',
|
|
136
221
|
indent: listMatch[1].length,
|
|
137
222
|
range: [lineStart, pos - 1],
|
|
138
223
|
text: listMatch[3].trim(),
|
|
139
|
-
attrs
|
|
224
|
+
attrs,
|
|
225
|
+
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
226
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
140
227
|
});
|
|
141
228
|
continue;
|
|
142
229
|
}
|
|
143
230
|
|
|
144
231
|
const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
145
232
|
if (blockquoteMatch) {
|
|
233
|
+
const attrs = blockquoteMatch[2] || null;
|
|
234
|
+
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
235
|
+
const prefixMatch = line.match(/^>\s+/);
|
|
236
|
+
const valueStartInLine = prefixMatch ? prefixMatch[0].length : 2;
|
|
237
|
+
const valueEndInLine = valueStartInLine + blockquoteMatch[1].length;
|
|
146
238
|
tokens.push({
|
|
147
239
|
type: 'blockquote',
|
|
148
240
|
range: [lineStart, pos - 1],
|
|
149
241
|
text: blockquoteMatch[1].trim(),
|
|
150
|
-
attrs
|
|
242
|
+
attrs,
|
|
243
|
+
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
244
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
151
245
|
});
|
|
152
246
|
continue;
|
|
153
247
|
}
|
|
@@ -165,7 +259,6 @@ function scanTokens(text) {
|
|
|
165
259
|
return tokens;
|
|
166
260
|
}
|
|
167
261
|
|
|
168
|
-
// Extract inline carriers: [text] {attrs}, [text](url) {attrs}, [text](=iri) {attrs}
|
|
169
262
|
function extractInlineCarriers(text, baseOffset = 0) {
|
|
170
263
|
const carriers = [];
|
|
171
264
|
let pos = 0;
|
|
@@ -174,14 +267,25 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
174
267
|
const bracketStart = text.indexOf('[', pos);
|
|
175
268
|
if (bracketStart === -1) break;
|
|
176
269
|
|
|
177
|
-
|
|
178
|
-
|
|
270
|
+
let bracketDepth = 1;
|
|
271
|
+
let bracketEnd = bracketStart + 1;
|
|
179
272
|
|
|
180
|
-
|
|
181
|
-
|
|
273
|
+
while (bracketEnd < text.length && bracketDepth > 0) {
|
|
274
|
+
if (text[bracketEnd] === '[') {
|
|
275
|
+
bracketDepth++;
|
|
276
|
+
} else if (text[bracketEnd] === ']') {
|
|
277
|
+
bracketDepth--;
|
|
278
|
+
}
|
|
279
|
+
bracketEnd++;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (bracketDepth > 0) break;
|
|
283
|
+
|
|
284
|
+
const carrierText = text.substring(bracketStart + 1, bracketEnd - 1);
|
|
285
|
+
const valueRange = [baseOffset + bracketStart + 1, baseOffset + bracketEnd - 1];
|
|
286
|
+
let spanEnd = bracketEnd;
|
|
182
287
|
let url = null;
|
|
183
288
|
|
|
184
|
-
// Check for (url) or (=iri)
|
|
185
289
|
if (text[spanEnd] === '(') {
|
|
186
290
|
const parenEnd = text.indexOf(')', spanEnd);
|
|
187
291
|
if (parenEnd !== -1) {
|
|
@@ -190,22 +294,24 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
190
294
|
}
|
|
191
295
|
}
|
|
192
296
|
|
|
193
|
-
// Check for {attrs}
|
|
194
297
|
let attrs = null;
|
|
298
|
+
let attrsRange = null;
|
|
195
299
|
const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
|
|
196
300
|
if (attrsMatch) {
|
|
197
301
|
attrs = `{${attrsMatch[1]}}`;
|
|
302
|
+
const braceIndex = attrsMatch[0].indexOf('{');
|
|
303
|
+
const absStart = baseOffset + spanEnd + (braceIndex >= 0 ? braceIndex : 0);
|
|
304
|
+
attrsRange = [absStart, absStart + attrs.length];
|
|
198
305
|
spanEnd += attrsMatch[0].length;
|
|
199
306
|
}
|
|
200
307
|
|
|
201
|
-
// Determine type and resource
|
|
202
308
|
let carrierType = 'span';
|
|
203
309
|
let resourceIRI = null;
|
|
204
310
|
|
|
205
311
|
if (url) {
|
|
206
312
|
if (url.startsWith('=')) {
|
|
207
|
-
|
|
208
|
-
|
|
313
|
+
pos = spanEnd;
|
|
314
|
+
continue;
|
|
209
315
|
} else {
|
|
210
316
|
carrierType = 'link';
|
|
211
317
|
resourceIRI = url;
|
|
@@ -217,6 +323,8 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
217
323
|
text: carrierText,
|
|
218
324
|
url: resourceIRI,
|
|
219
325
|
attrs: attrs,
|
|
326
|
+
attrsRange,
|
|
327
|
+
valueRange,
|
|
220
328
|
range: [baseOffset + bracketStart, baseOffset + spanEnd]
|
|
221
329
|
});
|
|
222
330
|
|
|
@@ -226,191 +334,237 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
226
334
|
return carriers;
|
|
227
335
|
}
|
|
228
336
|
|
|
229
|
-
function createBlock(subject, types, predicates, range, ctx) {
|
|
337
|
+
function createBlock(subject, types, predicates, entries, range, attrsRange, valueRange, carrierType, ctx) {
|
|
230
338
|
const expanded = {
|
|
231
|
-
subject
|
|
232
|
-
types: types.map(t => expandIRI(t, ctx)),
|
|
233
|
-
predicates: predicates.map(p => ({
|
|
234
|
-
iri: expandIRI(p.iri, ctx),
|
|
235
|
-
form: p.form
|
|
236
|
-
}))
|
|
339
|
+
subject,
|
|
340
|
+
types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
|
|
341
|
+
predicates: predicates.map(p => ({ iri: expandIRI(p.iri, ctx), form: p.form }))
|
|
237
342
|
};
|
|
238
|
-
|
|
239
343
|
const blockId = hash([subject, JSON.stringify(expanded)].join('|'));
|
|
240
344
|
return {
|
|
241
345
|
id: blockId,
|
|
242
346
|
range: { start: range[0], end: range[1] },
|
|
347
|
+
attrsRange: attrsRange ? { start: attrsRange[0], end: attrsRange[1] } : null,
|
|
348
|
+
valueRange: valueRange ? { start: valueRange[0], end: valueRange[1] } : null,
|
|
349
|
+
carrierType: carrierType || null,
|
|
243
350
|
subject,
|
|
244
351
|
types: expanded.types,
|
|
245
352
|
predicates: expanded.predicates,
|
|
353
|
+
entries: entries || [],
|
|
246
354
|
context: { ...ctx }
|
|
247
355
|
};
|
|
248
356
|
}
|
|
249
357
|
|
|
250
|
-
|
|
358
|
+
// Quad Utilities
|
|
359
|
+
function quadIndexKey(subject, predicate, object) {
|
|
360
|
+
const objKey = object.termType === 'Literal'
|
|
361
|
+
? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
|
|
362
|
+
: JSON.stringify({ t: object.termType, v: object.value });
|
|
363
|
+
return JSON.stringify([subject.value, predicate.value, objKey]);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function normalizeQuad(q) {
|
|
367
|
+
if (!q) return null;
|
|
368
|
+
const { subject, predicate, object } = q;
|
|
369
|
+
if (object?.termType === 'Literal') {
|
|
370
|
+
const language = typeof object.language === 'string' ? object.language : '';
|
|
371
|
+
const datatype = object.datatype?.value || 'http://www.w3.org/2001/XMLSchema#string';
|
|
372
|
+
return { ...q, subject, predicate, object: { ...object, language, datatype } };
|
|
373
|
+
}
|
|
374
|
+
return { ...q, subject, predicate, object };
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
function objectSignature(o) {
|
|
378
|
+
if (!o) return '';
|
|
379
|
+
if (o.termType === 'Literal') {
|
|
380
|
+
return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
|
|
381
|
+
}
|
|
382
|
+
return JSON.stringify({ t: o.termType, v: o.value });
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function quadToKeyForOrigin(q) {
|
|
386
|
+
const nq = normalizeQuad(q);
|
|
387
|
+
return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
function parseQuadIndexKey(key) {
|
|
391
|
+
try {
|
|
392
|
+
const [s, p, objKey] = JSON.parse(key);
|
|
393
|
+
return { s, p, o: JSON.parse(objKey) };
|
|
394
|
+
} catch {
|
|
395
|
+
return null;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// Semantic Slot Utilities
|
|
400
|
+
function createSemanticSlotId(subject, predicate) {
|
|
401
|
+
return hash(`${subject.value}|${predicate.value}`);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
function createSlotInfo(blockId, entryIndex, meta = {}) {
|
|
405
|
+
const slotId = meta.subject && meta.predicate ? createSemanticSlotId(meta.subject, meta.predicate) : null;
|
|
406
|
+
return {
|
|
407
|
+
blockId,
|
|
408
|
+
entryIndex,
|
|
409
|
+
slotId,
|
|
410
|
+
isVacant: false,
|
|
411
|
+
lastValue: null,
|
|
412
|
+
vacantSince: null,
|
|
413
|
+
...meta
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
function markSlotAsVacant(slotInfo, deletedValue) {
|
|
418
|
+
if (!slotInfo) return null;
|
|
419
|
+
return {
|
|
420
|
+
...slotInfo,
|
|
421
|
+
isVacant: true,
|
|
422
|
+
lastValue: deletedValue,
|
|
423
|
+
vacantSince: Date.now()
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function findVacantSlot(quadIndex, subject, predicate) {
|
|
428
|
+
const targetSlotId = createSemanticSlotId(subject, predicate);
|
|
429
|
+
return Array.from(quadIndex.values())
|
|
430
|
+
.find(slot => slot.slotId === targetSlotId && slot.isVacant);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
function occupySlot(slotInfo, newValue) {
|
|
434
|
+
if (!slotInfo || !slotInfo.isVacant) return null;
|
|
435
|
+
return {
|
|
436
|
+
...slotInfo,
|
|
437
|
+
isVacant: false,
|
|
438
|
+
lastValue: newValue,
|
|
439
|
+
vacantSince: null
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
|
|
251
444
|
if (!subject || !predicate || !object) return;
|
|
252
445
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
253
446
|
quads.push(quad);
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
447
|
+
|
|
448
|
+
// Create enhanced slot info with semantic slot tracking
|
|
449
|
+
const slotInfo = createSlotInfo(blockId, meta?.entryIndex, {
|
|
450
|
+
...meta,
|
|
451
|
+
subject,
|
|
452
|
+
predicate,
|
|
453
|
+
object
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), slotInfo);
|
|
260
457
|
}
|
|
261
458
|
|
|
262
459
|
function createLiteral(value, datatype, language, context, dataFactory) {
|
|
263
|
-
if (datatype)
|
|
264
|
-
|
|
265
|
-
}
|
|
266
|
-
if (language) {
|
|
267
|
-
return dataFactory.literal(value, language);
|
|
268
|
-
}
|
|
460
|
+
if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
|
|
461
|
+
if (language) return dataFactory.literal(value, language);
|
|
269
462
|
return dataFactory.literal(value);
|
|
270
463
|
}
|
|
271
464
|
|
|
272
|
-
// Core processing: handle subject/type declarations and property emissions
|
|
273
465
|
function processAnnotation(carrier, sem, state) {
|
|
274
|
-
// §6.1 Subject declaration
|
|
275
466
|
if (sem.subject === 'RESET') {
|
|
276
467
|
state.currentSubject = null;
|
|
277
468
|
return;
|
|
278
469
|
}
|
|
279
470
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
471
|
+
const previousSubject = state.currentSubject;
|
|
472
|
+
let newSubject = sem.subject ? state.df.namedNode(expandIRI(sem.subject, state.ctx)) : null;
|
|
473
|
+
if (newSubject) state.currentSubject = newSubject;
|
|
283
474
|
|
|
284
|
-
// Determine the subject for emissions
|
|
285
475
|
const S = state.currentSubject;
|
|
286
|
-
if (!S) return;
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
const block = createBlock(
|
|
290
|
-
S.value,
|
|
291
|
-
sem.types,
|
|
292
|
-
sem.predicates,
|
|
293
|
-
carrier.range,
|
|
294
|
-
state.ctx
|
|
295
|
-
);
|
|
476
|
+
if (!S) return;
|
|
477
|
+
|
|
478
|
+
const block = createBlock(S.value, sem.types, sem.predicates, sem.entries, carrier.range, carrier.attrsRange || null, carrier.valueRange || null, carrier.type || null, state.ctx);
|
|
296
479
|
state.origin.blocks.set(block.id, block);
|
|
297
480
|
|
|
298
|
-
// Extract L (literal) and O (object IRI)
|
|
299
481
|
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
300
482
|
const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
301
483
|
|
|
302
|
-
|
|
303
|
-
|
|
484
|
+
sem.types.forEach(t => {
|
|
485
|
+
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
486
|
+
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
304
487
|
const typeSubject = O || S;
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
state.origin.quadIndex,
|
|
308
|
-
block.id,
|
|
309
|
-
typeSubject,
|
|
310
|
-
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
311
|
-
state.df.namedNode(expandIRI(typeIRI, state.ctx)),
|
|
312
|
-
state.df
|
|
313
|
-
);
|
|
488
|
+
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
489
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
|
|
314
490
|
});
|
|
315
491
|
|
|
316
|
-
// §8 Emit predicate triples (routing table)
|
|
317
492
|
sem.predicates.forEach(pred => {
|
|
318
493
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
494
|
+
const token = `${pred.form}${pred.iri}`;
|
|
319
495
|
|
|
320
496
|
if (pred.form === '') {
|
|
321
|
-
|
|
322
|
-
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df);
|
|
497
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
323
498
|
} else if (pred.form === '?') {
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
499
|
+
if (newSubject) {
|
|
500
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, previousSubject, P, newSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
501
|
+
} else if (O) {
|
|
502
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
327
503
|
}
|
|
328
|
-
} else if (pred.form === '^') {
|
|
329
|
-
// ^p: reverse literal (L → S impossible, emit nothing per spec)
|
|
330
|
-
// Note: Some interpretations might emit S → S or skip
|
|
331
504
|
} else if (pred.form === '^?') {
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
505
|
+
if (newSubject) {
|
|
506
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, newSubject, P, previousSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
507
|
+
} else if (O) {
|
|
508
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
335
509
|
}
|
|
336
510
|
}
|
|
337
511
|
});
|
|
338
512
|
}
|
|
339
513
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
const contextSubject = state.currentSubject;
|
|
343
|
-
if (!contextSubject) return;
|
|
514
|
+
function processListContext(contextSem, listTokens, state, contextSubject = null) {
|
|
515
|
+
if (!contextSubject) contextSubject = state.currentSubject;
|
|
344
516
|
|
|
345
517
|
listTokens.forEach(listToken => {
|
|
346
|
-
// Extract carriers from list item text
|
|
347
518
|
const carriers = extractInlineCarriers(listToken.text, listToken.range[0]);
|
|
348
|
-
|
|
349
|
-
// Find subject from carriers or list item annotation
|
|
350
519
|
let itemSubject = null;
|
|
351
520
|
let itemSubjectCarrier = null;
|
|
352
521
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
if (
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
itemSubjectCarrier = carrier;
|
|
359
|
-
break;
|
|
522
|
+
if (listToken.attrs) {
|
|
523
|
+
const itemSem = parseSemanticBlock(listToken.attrs);
|
|
524
|
+
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
525
|
+
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
526
|
+
itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
|
|
360
527
|
}
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
if (!itemSubject) {
|
|
531
|
+
for (const carrier of carriers) {
|
|
532
|
+
if (carrier.attrs) {
|
|
533
|
+
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
534
|
+
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
535
|
+
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
536
|
+
itemSubjectCarrier = carrier;
|
|
537
|
+
break;
|
|
538
|
+
}
|
|
367
539
|
}
|
|
368
540
|
}
|
|
369
541
|
}
|
|
370
542
|
|
|
371
|
-
if (!itemSubject) return;
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
emitQuad(
|
|
376
|
-
state.quads,
|
|
377
|
-
state.origin.quadIndex,
|
|
378
|
-
'list-context',
|
|
379
|
-
itemSubject,
|
|
380
|
-
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
381
|
-
state.df.namedNode(expandIRI(typeIRI, state.ctx)),
|
|
382
|
-
state.df
|
|
383
|
-
);
|
|
543
|
+
if (!itemSubject) return;
|
|
544
|
+
|
|
545
|
+
contextSem.types.forEach(t => {
|
|
546
|
+
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
547
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandIRI(typeIRI, state.ctx)), state.df);
|
|
384
548
|
});
|
|
385
549
|
|
|
386
|
-
// Emit context relationships
|
|
387
550
|
contextSem.predicates.forEach(pred => {
|
|
388
551
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
389
|
-
|
|
390
552
|
if (pred.form === '^' || pred.form === '^?') {
|
|
391
|
-
|
|
392
|
-
emitQuad(state.quads, state.origin.quadIndex, 'list-context',
|
|
393
|
-
itemSubject, P, contextSubject, state.df);
|
|
553
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, P, contextSubject, state.df);
|
|
394
554
|
} else {
|
|
395
|
-
|
|
396
|
-
emitQuad(state.quads, state.origin.quadIndex, 'list-context',
|
|
397
|
-
contextSubject, P, itemSubject, state.df);
|
|
555
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context', contextSubject, P, itemSubject, state.df);
|
|
398
556
|
}
|
|
399
557
|
});
|
|
400
558
|
|
|
401
|
-
// Process item's own annotations
|
|
402
559
|
const prevSubject = state.currentSubject;
|
|
403
560
|
state.currentSubject = itemSubject;
|
|
404
561
|
|
|
405
|
-
// Process the list token's own attributes
|
|
406
562
|
if (listToken.attrs) {
|
|
407
563
|
const itemSem = parseSemanticBlock(listToken.attrs);
|
|
408
|
-
|
|
409
|
-
const carrier = { type: 'list', text: listToken.text.replace(/\[([^\]]+)\]\([^)]+\)/, '$1'), range: listToken.range };
|
|
564
|
+
const carrier = { type: 'list', text: listToken.text, range: listToken.range, attrsRange: listToken.attrsRange || null, valueRange: listToken.valueRange || null };
|
|
410
565
|
processAnnotation(carrier, itemSem, state);
|
|
411
566
|
}
|
|
412
567
|
|
|
413
|
-
// Process inline carriers' attributes
|
|
414
568
|
carriers.forEach(carrier => {
|
|
415
569
|
if (carrier.attrs) {
|
|
416
570
|
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
@@ -432,8 +586,6 @@ export function parse(text, options = {}) {
|
|
|
432
586
|
};
|
|
433
587
|
|
|
434
588
|
const tokens = scanTokens(text);
|
|
435
|
-
|
|
436
|
-
// Apply prefix declarations
|
|
437
589
|
tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
|
|
438
590
|
|
|
439
591
|
for (let i = 0; i < tokens.length; i++) {
|
|
@@ -441,18 +593,26 @@ export function parse(text, options = {}) {
|
|
|
441
593
|
|
|
442
594
|
if (token.type === 'heading' && token.attrs) {
|
|
443
595
|
const sem = parseSemanticBlock(token.attrs);
|
|
444
|
-
const carrier = { type: 'heading', text: token.text, range: token.range };
|
|
596
|
+
const carrier = { type: 'heading', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
445
597
|
processAnnotation(carrier, sem, state);
|
|
446
598
|
} else if (token.type === 'code' && token.attrs) {
|
|
447
599
|
const sem = parseSemanticBlock(token.attrs);
|
|
448
|
-
const carrier = { type: 'code', text: token.text, range: token.range };
|
|
600
|
+
const carrier = { type: 'code', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
449
601
|
processAnnotation(carrier, sem, state);
|
|
450
602
|
} else if (token.type === 'blockquote' && token.attrs) {
|
|
451
603
|
const sem = parseSemanticBlock(token.attrs);
|
|
452
|
-
const carrier = { type: 'blockquote', text: token.text, range: token.range };
|
|
604
|
+
const carrier = { type: 'blockquote', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
453
605
|
processAnnotation(carrier, sem, state);
|
|
454
606
|
} else if (token.type === 'para') {
|
|
455
|
-
// Check for
|
|
607
|
+
// Check for standalone subject declarations: {=iri} on its own line
|
|
608
|
+
const standaloneSubjectMatch = token.text.match(/^\s*\{=(.*?)\}\s*$/);
|
|
609
|
+
if (standaloneSubjectMatch) {
|
|
610
|
+
const sem = parseSemanticBlock(`{=${standaloneSubjectMatch[1]}}`);
|
|
611
|
+
const attrsStart = token.range[0] + token.text.indexOf('{=');
|
|
612
|
+
const attrsEnd = attrsStart + (standaloneSubjectMatch[1] ? standaloneSubjectMatch[1].length : 0);
|
|
613
|
+
processAnnotation({ type: 'standalone', text: '', range: token.range, attrsRange: [attrsStart, attrsEnd], valueRange: null }, sem, state);
|
|
614
|
+
}
|
|
615
|
+
|
|
456
616
|
const followingLists = [];
|
|
457
617
|
let j = i + 1;
|
|
458
618
|
while (j < tokens.length && tokens[j].type === 'list') {
|
|
@@ -460,17 +620,28 @@ export function parse(text, options = {}) {
|
|
|
460
620
|
j++;
|
|
461
621
|
}
|
|
462
622
|
|
|
463
|
-
// Check if this paragraph ends with {attrs} and is followed by lists
|
|
464
623
|
const contextMatch = token.text.match(/^(.+?)\s*\{([^}]+)\}$/);
|
|
465
624
|
if (contextMatch && followingLists.length > 0) {
|
|
466
|
-
// This is a list context annotation
|
|
467
625
|
const contextSem = parseSemanticBlock(`{${contextMatch[2]}}`);
|
|
468
|
-
|
|
626
|
+
let contextSubject = state.currentSubject;
|
|
627
|
+
|
|
628
|
+
// Always look for the most recent heading subject for context
|
|
629
|
+
for (let k = i - 1; k >= 0; k--) {
|
|
630
|
+
const prevToken = tokens[k];
|
|
631
|
+
if (prevToken.type === 'heading' && prevToken.attrs) {
|
|
632
|
+
const headingSem = parseSemanticBlock(prevToken.attrs);
|
|
633
|
+
if (headingSem.subject) {
|
|
634
|
+
contextSubject = state.df.namedNode(expandIRI(headingSem.subject, state.ctx));
|
|
635
|
+
break;
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
processListContext(contextSem, followingLists, state, contextSubject);
|
|
469
641
|
i = j - 1;
|
|
470
642
|
continue;
|
|
471
643
|
}
|
|
472
644
|
|
|
473
|
-
// Process inline carriers
|
|
474
645
|
const carriers = extractInlineCarriers(token.text, token.range[0]);
|
|
475
646
|
carriers.forEach(carrier => {
|
|
476
647
|
if (carrier.attrs) {
|
|
@@ -484,76 +655,442 @@ export function parse(text, options = {}) {
|
|
|
484
655
|
return { quads: state.quads, origin: state.origin, context: state.ctx };
|
|
485
656
|
}
|
|
486
657
|
|
|
487
|
-
function shortenIRI(iri, ctx) {
|
|
488
|
-
if (!iri || !iri.startsWith('http')) return iri;
|
|
489
658
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
659
|
+
// Text Processing Utilities
|
|
660
|
+
function readSpan(block, text, spanType = 'attrs') {
|
|
661
|
+
const range = spanType === 'attrs' ? block?.attrsRange : block?.valueRange;
|
|
662
|
+
if (!range) return null;
|
|
663
|
+
const { start, end } = range;
|
|
664
|
+
return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
|
|
665
|
+
? { start, end, text: text.substring(start, end) }
|
|
666
|
+
: null;
|
|
667
|
+
}
|
|
493
668
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
669
|
+
function normalizeAttrsTokens(attrsText) {
|
|
670
|
+
const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
|
|
671
|
+
return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
function writeAttrsTokens(tokens) {
|
|
675
|
+
return `{${tokens.join(' ').trim()}}`;
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
function removeOneToken(tokens, matchFn) {
|
|
679
|
+
const i = tokens.findIndex(matchFn);
|
|
680
|
+
return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
function sanitizeCarrierValueForBlock(block, raw) {
|
|
684
|
+
const s = String(raw ?? '');
|
|
685
|
+
const t = block?.carrierType;
|
|
686
|
+
if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
687
|
+
const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
|
|
688
|
+
return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
function blockTokensFromEntries(block) {
|
|
692
|
+
return block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null;
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
function removeEntryAt(block, entryIndex) {
|
|
696
|
+
if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
|
|
697
|
+
return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
function replaceLangDatatypeEntries(block, lit, ctx) {
|
|
701
|
+
if (!block?.entries) return null;
|
|
702
|
+
const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
|
|
703
|
+
const extras = [];
|
|
704
|
+
if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
|
|
705
|
+
const dt = lit?.datatype?.value;
|
|
706
|
+
if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
707
|
+
extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
|
|
498
708
|
}
|
|
709
|
+
return [...filtered, ...extras];
|
|
710
|
+
}
|
|
499
711
|
|
|
500
|
-
|
|
712
|
+
function updateAttrsDatatypeLang(tokens, newLit, ctx) {
|
|
713
|
+
const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
|
|
714
|
+
if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
|
|
715
|
+
const dt = newLit?.datatype?.value;
|
|
716
|
+
if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
717
|
+
return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
|
|
718
|
+
}
|
|
719
|
+
return predicatesAndTypes;
|
|
501
720
|
}
|
|
502
721
|
|
|
503
722
|
export function serialize({ text, diff, origin, options = {} }) {
|
|
504
|
-
if (!diff || (!diff.add?.length && !diff.delete?.length))
|
|
723
|
+
if (!diff || (!diff.add?.length && !diff.delete?.length)) {
|
|
724
|
+
const reparsed = parse(text, { context: options.context || {} });
|
|
725
|
+
return { text, origin: reparsed.origin };
|
|
726
|
+
}
|
|
505
727
|
|
|
728
|
+
const base = origin || parse(text, { context: options.context || {} }).origin;
|
|
506
729
|
let result = text;
|
|
507
730
|
const edits = [];
|
|
508
731
|
const ctx = options.context || {};
|
|
509
732
|
|
|
733
|
+
const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
|
|
734
|
+
for (const [k, entry] of base?.quadIndex || []) {
|
|
735
|
+
const parsed = parseQuadIndexKey(k);
|
|
736
|
+
if (!parsed) continue;
|
|
737
|
+
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
738
|
+
if (parsed.o?.t !== 'Literal') continue;
|
|
739
|
+
if (parsed.o?.v === literalValue) return entry;
|
|
740
|
+
}
|
|
741
|
+
return null;
|
|
742
|
+
};
|
|
743
|
+
|
|
744
|
+
const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
|
|
745
|
+
const out = [];
|
|
746
|
+
for (const [k, entry] of base?.quadIndex || []) {
|
|
747
|
+
const parsed = parseQuadIndexKey(k);
|
|
748
|
+
if (!parsed) continue;
|
|
749
|
+
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
750
|
+
if (parsed.o?.t !== 'Literal') continue;
|
|
751
|
+
const blockId = entry?.blockId || entry;
|
|
752
|
+
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
753
|
+
if (block) out.push({ block, entry, obj: parsed.o });
|
|
754
|
+
}
|
|
755
|
+
return out;
|
|
756
|
+
};
|
|
757
|
+
|
|
758
|
+
const anchors = new Map();
|
|
759
|
+
for (const q0 of diff.delete || []) {
|
|
760
|
+
const q = normalizeQuad(q0);
|
|
761
|
+
if (!q) continue;
|
|
762
|
+
if (!q?.subject || !q?.object || !q?.predicate) continue;
|
|
763
|
+
const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
|
|
764
|
+
const qk = quadToKeyForOrigin(q);
|
|
765
|
+
const entry = qk ? base?.quadIndex?.get(qk) : null;
|
|
766
|
+
const blockId = entry?.blockId || entry;
|
|
767
|
+
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
768
|
+
if (!block?.attrsRange) continue;
|
|
769
|
+
anchors.set(key, { block, entry });
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
const addBySP = new Map();
|
|
773
|
+
for (const q0 of diff.add || []) {
|
|
774
|
+
const q = normalizeQuad(q0);
|
|
775
|
+
if (!q) continue;
|
|
776
|
+
if (!q?.subject || !q?.predicate || !q?.object) continue;
|
|
777
|
+
const k = JSON.stringify([q.subject.value, q.predicate.value]);
|
|
778
|
+
const list = addBySP.get(k) || [];
|
|
779
|
+
list.push(q);
|
|
780
|
+
addBySP.set(k, list);
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
const consumedAdds = new Set();
|
|
784
|
+
const literalUpdates = [];
|
|
785
|
+
for (const dq0 of diff.delete || []) {
|
|
786
|
+
const dq = normalizeQuad(dq0);
|
|
787
|
+
if (!dq) continue;
|
|
788
|
+
if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
|
|
789
|
+
if (dq.object.termType !== 'Literal') continue;
|
|
790
|
+
const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
|
|
791
|
+
const candidates = addBySP.get(k) || [];
|
|
792
|
+
const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(quadToKeyForOrigin(x)));
|
|
793
|
+
if (!aq) continue;
|
|
794
|
+
|
|
795
|
+
const dqk = quadToKeyForOrigin(dq);
|
|
796
|
+
let entry = dqk ? base?.quadIndex?.get(dqk) : null;
|
|
797
|
+
if (!entry && dq.object?.termType === 'Literal') {
|
|
798
|
+
entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
|
|
799
|
+
}
|
|
800
|
+
const blockId = entry?.blockId || entry;
|
|
801
|
+
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
802
|
+
if (!block) continue;
|
|
803
|
+
|
|
804
|
+
literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
|
|
805
|
+
consumedAdds.add(quadToKeyForOrigin(aq));
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
for (const q0 of diff.add || []) {
|
|
809
|
+
const quad = normalizeQuad(q0);
|
|
810
|
+
if (!quad || quad.object?.termType !== 'Literal') continue;
|
|
811
|
+
if (consumedAdds.has(quadToKeyForOrigin(quad))) continue;
|
|
812
|
+
|
|
813
|
+
// Check if there's a vacant slot we can reuse
|
|
814
|
+
const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
|
|
815
|
+
if (!vacantSlot) continue;
|
|
816
|
+
|
|
817
|
+
const block = base?.blocks?.get(vacantSlot.blockId);
|
|
818
|
+
if (!block) continue;
|
|
819
|
+
|
|
820
|
+
const span = readSpan(block, text, 'attrs');
|
|
821
|
+
if (!span) continue;
|
|
822
|
+
|
|
823
|
+
// Occupy the vacant slot and update the annotation
|
|
824
|
+
const occupiedSlot = occupySlot(vacantSlot, quad.object);
|
|
825
|
+
if (!occupiedSlot) continue;
|
|
826
|
+
|
|
827
|
+
// Update the carrier value
|
|
828
|
+
const valueSpan = readSpan(block, text, 'value');
|
|
829
|
+
if (valueSpan) {
|
|
830
|
+
edits.push({ start: valueSpan.start, end: valueSpan.end, text: quad.object.value });
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
// Update the annotation block to restore the predicate token
|
|
834
|
+
const tokens = normalizeAttrsTokens(span.text);
|
|
835
|
+
const predToken = `${vacantSlot.form || ''}${shortenIRI(quad.predicate.value, ctx)}`;
|
|
836
|
+
|
|
837
|
+
// For empty annotation blocks, replace entirely; for non-empty, add if missing
|
|
838
|
+
if (tokens.length === 0) {
|
|
839
|
+
edits.push({ start: span.start, end: span.end, text: `{${predToken}}` });
|
|
840
|
+
} else if (!tokens.includes(predToken)) {
|
|
841
|
+
const updated = [...tokens, predToken];
|
|
842
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
// Mark as consumed and continue
|
|
846
|
+
consumedAdds.add(quadToKeyForOrigin(quad));
|
|
847
|
+
continue;
|
|
848
|
+
|
|
849
|
+
const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
|
|
850
|
+
if (matches.length === 0) continue;
|
|
851
|
+
|
|
852
|
+
const desiredLang = quad.object.language || '';
|
|
853
|
+
const sameLang = matches.filter(m => {
|
|
854
|
+
const entries = m.block?.entries || [];
|
|
855
|
+
const langEntry = entries.find(e => e.kind === 'language');
|
|
856
|
+
const lang = langEntry?.language || '';
|
|
857
|
+
return lang === desiredLang;
|
|
858
|
+
});
|
|
859
|
+
|
|
860
|
+
if (sameLang.length !== 1) continue;
|
|
861
|
+
const target = sameLang[0].block;
|
|
862
|
+
const vSpan = readSpan(target, text, 'value');
|
|
863
|
+
if (!vSpan) continue;
|
|
864
|
+
|
|
865
|
+
const newValue = sanitizeCarrierValueForBlock(target, quad.object.value);
|
|
866
|
+
edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
|
|
867
|
+
|
|
868
|
+
const aSpan = readSpan(target, text, 'attrs');
|
|
869
|
+
if (aSpan && target?.entries?.length) {
|
|
870
|
+
const nextEntries = replaceLangDatatypeEntries(target, quad.object, ctx);
|
|
871
|
+
if (nextEntries) {
|
|
872
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
873
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
consumedAdds.add(quad);
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
for (const u of literalUpdates) {
|
|
881
|
+
const span = readSpan(u.block, text, 'value');
|
|
882
|
+
if (span) {
|
|
883
|
+
const newValue = sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
|
|
884
|
+
edits.push({ start: span.start, end: span.end, text: newValue });
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
const aSpan = readSpan(u.block, text, 'attrs');
|
|
888
|
+
if (aSpan) {
|
|
889
|
+
if (u.block?.entries?.length) {
|
|
890
|
+
const nextEntries = replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
|
|
891
|
+
if (nextEntries) {
|
|
892
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
893
|
+
if (nextTokens.length === 0) {
|
|
894
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
895
|
+
} else {
|
|
896
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
} else {
|
|
900
|
+
const tokens = normalizeAttrsTokens(aSpan.text);
|
|
901
|
+
const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object, ctx);
|
|
902
|
+
if (updated.join(' ') !== tokens.join(' ')) {
|
|
903
|
+
if (updated.length === 0) {
|
|
904
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
905
|
+
} else {
|
|
906
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(updated) });
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
|
|
510
914
|
if (diff.delete) {
|
|
511
|
-
diff.delete.forEach(
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
915
|
+
diff.delete.forEach(q0 => {
|
|
916
|
+
const quad = normalizeQuad(q0);
|
|
917
|
+
if (!quad) return;
|
|
918
|
+
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
919
|
+
|
|
920
|
+
if (quad.object.termType === 'Literal') {
|
|
921
|
+
const isUpdated = literalUpdates.some(u =>
|
|
922
|
+
u.deleteQuad.subject.value === quad.subject.value &&
|
|
923
|
+
u.deleteQuad.predicate.value === quad.predicate.value &&
|
|
924
|
+
u.deleteQuad.object.value === quad.object.value
|
|
925
|
+
);
|
|
926
|
+
if (isUpdated) return;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
const key = quadToKeyForOrigin(quad);
|
|
930
|
+
let entry = key ? base?.quadIndex?.get(key) : null;
|
|
931
|
+
if (!entry && quad.object?.termType === 'Literal') {
|
|
932
|
+
entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
// Mark the semantic slot as vacant for future reuse
|
|
936
|
+
if (entry && entry.slotId) {
|
|
937
|
+
// Capture block information before marking as vacant
|
|
938
|
+
const block = base?.blocks?.get(entry.blockId);
|
|
939
|
+
const blockInfo = block ? {
|
|
940
|
+
id: entry.blockId,
|
|
941
|
+
range: block.range,
|
|
942
|
+
attrsRange: block.attrsRange,
|
|
943
|
+
valueRange: block.valueRange,
|
|
944
|
+
carrierType: block.carrierType,
|
|
945
|
+
subject: block.subject,
|
|
946
|
+
context: block.context
|
|
947
|
+
} : null;
|
|
948
|
+
|
|
949
|
+
const vacantSlot = markSlotAsVacant(entry, quad.object);
|
|
950
|
+
if (vacantSlot && blockInfo) {
|
|
951
|
+
vacantSlot.blockInfo = blockInfo;
|
|
952
|
+
base.quadIndex.set(key, vacantSlot);
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
const blockId = entry?.blockId || entry;
|
|
519
957
|
if (!blockId) return;
|
|
520
958
|
|
|
521
|
-
const block =
|
|
959
|
+
const block = base?.blocks?.get(blockId);
|
|
522
960
|
if (!block) return;
|
|
523
961
|
|
|
524
|
-
const
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
962
|
+
const span = readSpan(block, text, 'attrs');
|
|
963
|
+
if (!span) return;
|
|
964
|
+
|
|
965
|
+
// Handle entry removal by index
|
|
966
|
+
if (entry?.entryIndex != null && block?.entries?.length) {
|
|
967
|
+
const nextEntries = removeEntryAt(block, entry.entryIndex);
|
|
968
|
+
if (!nextEntries) return;
|
|
969
|
+
|
|
970
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
971
|
+
const newText = nextTokens.length === 0 ? '{}' : writeAttrsTokens(nextTokens);
|
|
972
|
+
edits.push({ start: span.start, end: span.end, text: newText });
|
|
973
|
+
return;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
const tokens = normalizeAttrsTokens(span.text);
|
|
977
|
+
let updated = tokens;
|
|
978
|
+
let removed = false;
|
|
979
|
+
|
|
980
|
+
if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
|
|
981
|
+
const expectedType = entry.expandedType || quad.object.value;
|
|
982
|
+
({ tokens: updated, removed } = removeOneToken(tokens, t => {
|
|
983
|
+
if (!t.startsWith('.')) return false;
|
|
984
|
+
const raw = t.slice(1);
|
|
985
|
+
return expandIRI(raw, ctx) === expectedType;
|
|
986
|
+
}));
|
|
987
|
+
} else {
|
|
988
|
+
const expectedPred = entry?.expandedPredicate || quad.predicate.value;
|
|
989
|
+
const expectedForm = entry?.form;
|
|
990
|
+
({ tokens: updated, removed } = removeOneToken(tokens, t => {
|
|
991
|
+
const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
|
|
992
|
+
if (!m) return false;
|
|
993
|
+
const form = m[1] || '';
|
|
994
|
+
const raw = m[2];
|
|
995
|
+
if (expectedForm != null && form !== expectedForm) return false;
|
|
996
|
+
return expandIRI(raw, ctx) === expectedPred;
|
|
997
|
+
}));
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
if (!removed) return;
|
|
1001
|
+
|
|
1002
|
+
if (updated.length === 0) {
|
|
1003
|
+
edits.push({ start: span.start, end: span.end, text: '{}' });
|
|
1004
|
+
return;
|
|
1005
|
+
}
|
|
530
1006
|
|
|
531
|
-
|
|
1007
|
+
const newAttrs = writeAttrsTokens(updated);
|
|
1008
|
+
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
532
1009
|
});
|
|
533
1010
|
}
|
|
534
1011
|
|
|
535
1012
|
if (diff.add) {
|
|
536
|
-
diff.add.forEach(
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
1013
|
+
diff.add.forEach(q0 => {
|
|
1014
|
+
const quad = normalizeQuad(q0);
|
|
1015
|
+
if (!quad) return;
|
|
1016
|
+
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
1017
|
+
|
|
1018
|
+
if (consumedAdds.has(quadToKeyForOrigin(quad))) return;
|
|
1019
|
+
|
|
1020
|
+
const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
|
|
1021
|
+
const anchored = anchors.get(anchorKey) || null;
|
|
1022
|
+
let targetBlock = anchored?.block || null;
|
|
1023
|
+
|
|
1024
|
+
if (!targetBlock) {
|
|
1025
|
+
for (const [, block] of base?.blocks || []) {
|
|
1026
|
+
if (block.subject === quad.subject.value && block.attrsRange) {
|
|
1027
|
+
targetBlock = block;
|
|
1028
|
+
break;
|
|
1029
|
+
}
|
|
543
1030
|
}
|
|
544
1031
|
}
|
|
545
1032
|
|
|
546
|
-
|
|
547
|
-
|
|
1033
|
+
if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
|
|
1034
|
+
if (!targetBlock) {
|
|
1035
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1036
|
+
if (quad.object.termType === 'Literal') {
|
|
1037
|
+
const value = String(quad.object.value ?? '');
|
|
1038
|
+
let ann = predShort;
|
|
1039
|
+
if (quad.object.language) ann += ` @${quad.object.language}`;
|
|
1040
|
+
else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
1041
|
+
ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
|
|
1042
|
+
}
|
|
1043
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
|
|
1044
|
+
} else {
|
|
1045
|
+
const full = quad.object.value;
|
|
1046
|
+
const label = shortenIRI(full, ctx);
|
|
1047
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${label}) {?${predShort}}` });
|
|
1048
|
+
}
|
|
1049
|
+
return;
|
|
1050
|
+
}
|
|
548
1051
|
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
1052
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1053
|
+
if (quad.object.termType === 'Literal') {
|
|
1054
|
+
const value = String(quad.object.value ?? '');
|
|
1055
|
+
let ann = predShort;
|
|
1056
|
+
if (quad.object.language) ann += ` @${quad.object.language}`;
|
|
1057
|
+
else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
1058
|
+
ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
|
|
1059
|
+
}
|
|
1060
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
|
|
1061
|
+
return;
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
if (quad.object.termType === 'NamedNode') {
|
|
1065
|
+
const full = quad.object.value;
|
|
1066
|
+
const label = shortenIRI(full, ctx);
|
|
1067
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${shortenIRI(full, ctx)} ?${predShort}}` });
|
|
1068
|
+
return;
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
const span = readSpan(targetBlock, text, 'attrs');
|
|
1073
|
+
if (!span) return;
|
|
1074
|
+
const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
|
|
1075
|
+
|
|
1076
|
+
if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
|
|
1077
|
+
const typeShort = shortenIRI(quad.object.value, ctx);
|
|
1078
|
+
const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
|
|
1079
|
+
if (!typeToken) return;
|
|
1080
|
+
if (tokens.includes(typeToken)) return;
|
|
1081
|
+
const updated = [...tokens, typeToken];
|
|
1082
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1083
|
+
return;
|
|
553
1084
|
}
|
|
554
1085
|
|
|
555
|
-
const
|
|
556
|
-
|
|
1086
|
+
const form = anchored?.entry?.form;
|
|
1087
|
+
if (form == null) return;
|
|
1088
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1089
|
+
const predToken = `${form}${predShort}`;
|
|
1090
|
+
if (!predToken) return;
|
|
1091
|
+
if (tokens.includes(predToken)) return;
|
|
1092
|
+
const updated = [...tokens, predToken];
|
|
1093
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
557
1094
|
});
|
|
558
1095
|
}
|
|
559
1096
|
|
|
@@ -562,7 +1099,44 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
562
1099
|
result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
|
|
563
1100
|
});
|
|
564
1101
|
|
|
565
|
-
|
|
1102
|
+
// Extract vacant slots before reparsing to preserve them
|
|
1103
|
+
const vacantSlots = new Map();
|
|
1104
|
+
base?.quadIndex?.forEach((slot, key) => {
|
|
1105
|
+
if (slot.isVacant) {
|
|
1106
|
+
vacantSlots.set(key, slot);
|
|
1107
|
+
}
|
|
1108
|
+
});
|
|
1109
|
+
|
|
1110
|
+
const reparsed = parse(result, { context: options.context || {} });
|
|
1111
|
+
|
|
1112
|
+
// Merge vacant slots back into the new origin
|
|
1113
|
+
vacantSlots.forEach((vacantSlot, key) => {
|
|
1114
|
+
// Check if the block still exists in the new origin
|
|
1115
|
+
if (!reparsed.origin.blocks.has(vacantSlot.blockId)) {
|
|
1116
|
+
// Recreate the empty block for the vacant slot using preserved info
|
|
1117
|
+
const blockInfo = vacantSlot.blockInfo;
|
|
1118
|
+
if (blockInfo) {
|
|
1119
|
+
const emptyBlock = {
|
|
1120
|
+
id: blockInfo.id,
|
|
1121
|
+
range: blockInfo.range || { start: 0, end: 0 },
|
|
1122
|
+
attrsRange: blockInfo.attrsRange,
|
|
1123
|
+
valueRange: blockInfo.valueRange,
|
|
1124
|
+
carrierType: blockInfo.carrierType || 'span',
|
|
1125
|
+
subject: blockInfo.subject || '',
|
|
1126
|
+
types: [],
|
|
1127
|
+
predicates: [],
|
|
1128
|
+
entries: [], // Empty entries - just {} annotation
|
|
1129
|
+
context: blockInfo.context || { ...ctx }
|
|
1130
|
+
};
|
|
1131
|
+
reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
// Merge the vacant slot back
|
|
1136
|
+
reparsed.origin.quadIndex.set(key, vacantSlot);
|
|
1137
|
+
});
|
|
1138
|
+
|
|
1139
|
+
return { text: result, origin: reparsed.origin };
|
|
566
1140
|
}
|
|
567
1141
|
|
|
568
|
-
export default { parse, serialize, parseSemanticBlock };
|
|
1142
|
+
export default { parse, serialize, parseSemanticBlock, shortenIRI };
|