mdld-parse 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LICENCE +167 -0
  2. package/README.md +341 -190
  3. package/index.js +722 -284
  4. package/package.json +1 -1
package/index.js CHANGED
@@ -25,8 +25,9 @@ function hash(str) {
25
25
  }
26
26
 
27
27
  function expandIRI(term, ctx) {
28
- if (!term) return null;
29
- const t = term.trim();
28
+ if (term == null) return null;
29
+ const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
30
+ const t = raw.trim();
30
31
  if (t.match(/^https?:/)) return t;
31
32
  if (t.includes(':')) {
32
33
  const [prefix, ref] = t.split(':', 2);
@@ -35,56 +36,88 @@ function expandIRI(term, ctx) {
35
36
  return (ctx['@vocab'] || '') + t;
36
37
  }
37
38
 
38
- // Annotation parsing - explicit string operations
39
- function parseAnnotation(raw) {
39
+ function parseSemanticBlock(raw) {
40
40
  try {
41
- const cleaned = raw.replace(/^\{|\}$/g, '').trim();
42
- if (!cleaned) return { subject: null, entries: [], datatype: null, language: null };
41
+ const src = String(raw || '').trim();
42
+ const cleaned = src.replace(/^\{|\}$/g, '').trim();
43
+ if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
44
+
45
+ const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
46
+ const re = /\S+/g;
47
+ let m;
48
+ while ((m = re.exec(cleaned)) !== null) {
49
+ const token = m[0];
50
+ const relStart = 1 + m.index;
51
+ const relEnd = relStart + token.length;
52
+ const entryIndex = result.entries.length;
53
+
54
+ if (token === '=') {
55
+ result.subject = 'RESET';
56
+ result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
57
+ continue;
58
+ }
43
59
 
44
- // Validate quotes
45
- let quoteCount = 0;
46
- for (let i = 0; i < cleaned.length; i++) {
47
- if (cleaned[i] === '"') quoteCount++;
48
- }
49
- if (quoteCount % 2 !== 0) {
50
- console.warn(`Unbalanced quotes in annotation: ${raw}`);
51
- return { subject: null, entries: [], datatype: null, language: null };
52
- }
60
+ if (token.startsWith('=')) {
61
+ const iri = token.substring(1);
62
+ result.subject = iri;
63
+ result.entries.push({ kind: 'subject', iri, relRange: { start: relStart, end: relEnd }, raw: token });
64
+ continue;
65
+ }
53
66
 
54
- const result = { subject: null, entries: [], datatype: null, language: null };
55
- const parts = cleaned.split(/\s+/).filter(p => p);
67
+ if (token.startsWith('^^')) {
68
+ const datatype = token.substring(2);
69
+ if (!result.language) result.datatype = datatype;
70
+ result.entries.push({ kind: 'datatype', datatype, relRange: { start: relStart, end: relEnd }, raw: token });
71
+ continue;
72
+ }
56
73
 
57
- for (const part of parts) {
58
- if (part === '=') {
59
- result.subject = 'RESET';
60
- } else if (part.startsWith('=')) {
61
- result.subject = part.substring(1);
62
- } else if (part.startsWith('@')) {
63
- result.language = part.substring(1);
64
- } else if (part.startsWith('^^')) {
65
- result.datatype = part.substring(2);
66
- } else if (part.startsWith('^')) {
67
- result.entries.push({ kind: 'property', predicate: part.substring(1), direction: 'reverse' });
68
- } else if (part.startsWith('.')) {
69
- result.entries.push({ kind: 'type', classIRI: part.substring(1) });
70
- } else {
71
- result.entries.push({ kind: 'property', predicate: part, direction: 'forward' });
74
+ if (token.startsWith('@')) {
75
+ const language = token.substring(1);
76
+ result.language = language;
77
+ result.datatype = null;
78
+ result.entries.push({ kind: 'language', language, relRange: { start: relStart, end: relEnd }, raw: token });
79
+ continue;
80
+ }
81
+
82
+ if (token.startsWith('.')) {
83
+ const classIRI = token.substring(1);
84
+ result.types.push({ iri: classIRI, entryIndex });
85
+ result.entries.push({ kind: 'type', iri: classIRI, relRange: { start: relStart, end: relEnd }, raw: token });
86
+ continue;
72
87
  }
73
- }
74
88
 
75
- if (result.entries.length === 0 && !result.subject) {
76
- console.warn(`No valid entries found in annotation: ${raw}`);
77
- return { subject: null, entries: [], datatype: null, language: null };
89
+ if (token.startsWith('^?')) {
90
+ const iri = token.substring(2);
91
+ result.predicates.push({ iri, form: '^?', entryIndex });
92
+ result.entries.push({ kind: 'property', iri, form: '^?', relRange: { start: relStart, end: relEnd }, raw: token });
93
+ continue;
94
+ }
95
+
96
+ if (token.startsWith('^')) {
97
+ const iri = token.substring(1);
98
+ result.predicates.push({ iri, form: '^', entryIndex });
99
+ result.entries.push({ kind: 'property', iri, form: '^', relRange: { start: relStart, end: relEnd }, raw: token });
100
+ continue;
101
+ }
102
+
103
+ if (token.startsWith('?')) {
104
+ const iri = token.substring(1);
105
+ result.predicates.push({ iri, form: '?', entryIndex });
106
+ result.entries.push({ kind: 'property', iri, form: '?', relRange: { start: relStart, end: relEnd }, raw: token });
107
+ continue;
108
+ }
109
+
110
+ result.predicates.push({ iri: token, form: '', entryIndex });
111
+ result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
78
112
  }
79
113
 
80
114
  return result;
81
115
  } catch (error) {
82
- console.error(`Error parsing annotation ${raw}:`, error);
83
- return { subject: null, entries: [], datatype: null, language: null };
116
+ console.error(`Error parsing semantic block ${raw}:`, error);
117
+ return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
84
118
  }
85
119
  }
86
120
 
87
- // Token scanning - consolidated helpers
88
121
  function scanTokens(text) {
89
122
  const tokens = [];
90
123
  const lines = text.split('\n');
@@ -96,24 +129,32 @@ function scanTokens(text) {
96
129
  const lineStart = pos;
97
130
  pos += line.length + 1;
98
131
 
99
- // Code blocks
100
132
  if (line.startsWith('```')) {
101
133
  if (!codeBlock) {
102
134
  const fence = line.match(/^(`{3,})(.*)/);
135
+ const attrsText = fence[2].match(/\{[^}]+\}/)?.[0] || null;
136
+ const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
137
+ const contentStart = lineStart + line.length + 1;
103
138
  codeBlock = {
104
139
  fence: fence[1],
105
140
  start: lineStart,
106
141
  content: [],
107
- lang: fence[2].trim().split('{')[0].trim(),
108
- attrs: fence[2].match(/\{[^}]+\}/)?.[0]
142
+ lang: fence[2].trim().split(/[\s{]/)[0],
143
+ attrs: attrsText,
144
+ attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
145
+ valueRangeStart: contentStart
109
146
  };
110
147
  } else if (line.startsWith(codeBlock.fence)) {
148
+ const valueStart = codeBlock.valueRangeStart;
149
+ const valueEnd = Math.max(valueStart, lineStart - 1);
111
150
  tokens.push({
112
151
  type: 'code',
113
152
  range: [codeBlock.start, lineStart],
114
153
  text: codeBlock.content.join('\n'),
115
154
  lang: codeBlock.lang,
116
- attrs: codeBlock.attrs
155
+ attrs: codeBlock.attrs,
156
+ attrsRange: codeBlock.attrsRange,
157
+ valueRange: [valueStart, valueEnd]
117
158
  });
118
159
  codeBlock = null;
119
160
  }
@@ -125,94 +166,110 @@ function scanTokens(text) {
125
166
  continue;
126
167
  }
127
168
 
128
- // Prefix declarations
129
169
  const prefixMatch = line.match(/^\[([^\]]+)\]\s*\{:\s*([^}]+)\}/);
130
170
  if (prefixMatch) {
131
171
  tokens.push({ type: 'prefix', prefix: prefixMatch[1], iri: prefixMatch[2].trim() });
132
172
  continue;
133
173
  }
134
174
 
135
- // Headings
136
175
  const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
137
176
  if (headingMatch) {
177
+ const attrs = headingMatch[3] || null;
178
+ const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
179
+ const afterHashes = headingMatch[1].length;
180
+ const ws = line.substring(afterHashes).match(/^\s+/)?.[0]?.length || 0;
181
+ const valueStartInLine = afterHashes + ws;
182
+ const valueEndInLine = valueStartInLine + headingMatch[2].length;
138
183
  tokens.push({
139
184
  type: 'heading',
140
185
  depth: headingMatch[1].length,
141
- range: [lineStart, pos],
186
+ range: [lineStart, pos - 1],
142
187
  text: headingMatch[2].trim(),
143
- attrs: headingMatch[3]
188
+ attrs,
189
+ attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
190
+ valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
144
191
  });
145
192
  continue;
146
193
  }
147
194
 
148
- // Lists
149
195
  const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
150
196
  if (listMatch) {
197
+ const attrs = listMatch[4] || null;
198
+ const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
199
+ const prefix = listMatch[1].length + listMatch[2].length;
200
+ const ws = line.substring(prefix).match(/^\s+/)?.[0]?.length || 0;
201
+ const valueStartInLine = prefix + ws;
202
+ const valueEndInLine = valueStartInLine + listMatch[3].length;
151
203
  tokens.push({
152
204
  type: 'list',
153
205
  indent: listMatch[1].length,
154
- range: [lineStart, pos],
206
+ range: [lineStart, pos - 1],
155
207
  text: listMatch[3].trim(),
156
- attrs: listMatch[4]
208
+ attrs,
209
+ attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
210
+ valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
157
211
  });
158
212
  continue;
159
213
  }
160
214
 
161
- // Blockquotes
162
215
  const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
163
216
  if (blockquoteMatch) {
217
+ const attrs = blockquoteMatch[2] || null;
218
+ const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
219
+ const prefixMatch = line.match(/^>\s+/);
220
+ const valueStartInLine = prefixMatch ? prefixMatch[0].length : 2;
221
+ const valueEndInLine = valueStartInLine + blockquoteMatch[1].length;
164
222
  tokens.push({
165
223
  type: 'blockquote',
166
- range: [lineStart, pos],
224
+ range: [lineStart, pos - 1],
167
225
  text: blockquoteMatch[1].trim(),
168
- attrs: blockquoteMatch[2]
226
+ attrs,
227
+ attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
228
+ valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
169
229
  });
170
230
  continue;
171
231
  }
172
232
 
173
- // Paragraphs
174
233
  if (line.trim()) {
175
- const paraMatch = line.match(/^(.+?)(?:\s*(\{[^}]+\}))?$/);
176
- if (paraMatch) {
177
- tokens.push({
178
- type: 'para',
179
- range: [lineStart, pos],
180
- text: paraMatch[1].trim(),
181
- attrs: paraMatch[2] || null
182
- });
183
- }
234
+ tokens.push({
235
+ type: 'para',
236
+ range: [lineStart, pos - 1],
237
+ text: line.trim(),
238
+ attrs: null
239
+ });
184
240
  }
185
241
  }
186
242
 
187
243
  return tokens;
188
244
  }
189
245
 
190
- // Inline value extraction - simplified
191
- function extractInlineValue(text, baseOffset = 0) {
192
- const spans = [];
246
+ function extractInlineCarriers(text, baseOffset = 0) {
247
+ const carriers = [];
193
248
  let pos = 0;
194
249
 
195
250
  while (pos < text.length) {
196
251
  const bracketStart = text.indexOf('[', pos);
197
- if (bracketStart === -1) {
198
- if (pos < text.length) spans.push({ type: 'text', text: text.substring(pos) });
199
- break;
200
- }
252
+ if (bracketStart === -1) break;
201
253
 
202
- if (bracketStart > pos) spans.push({ type: 'text', text: text.substring(pos, bracketStart) });
254
+ let bracketDepth = 1;
255
+ let bracketEnd = bracketStart + 1;
203
256
 
204
- const bracketEnd = text.indexOf(']', bracketStart);
205
- if (bracketEnd === -1) {
206
- spans.push({ type: 'text', text: text.substring(bracketStart) });
207
- break;
257
+ while (bracketEnd < text.length && bracketDepth > 0) {
258
+ if (text[bracketEnd] === '[') {
259
+ bracketDepth++;
260
+ } else if (text[bracketEnd] === ']') {
261
+ bracketDepth--;
262
+ }
263
+ bracketEnd++;
208
264
  }
209
265
 
210
- const spanText = text.substring(bracketStart + 1, bracketEnd);
211
- let spanEnd = bracketEnd + 1;
266
+ if (bracketDepth > 0) break;
267
+
268
+ const carrierText = text.substring(bracketStart + 1, bracketEnd - 1);
269
+ const valueRange = [baseOffset + bracketStart + 1, baseOffset + bracketEnd - 1];
270
+ let spanEnd = bracketEnd;
212
271
  let url = null;
213
- let attrs = null;
214
272
 
215
- // Parse link destination
216
273
  if (text[spanEnd] === '(') {
217
274
  const parenEnd = text.indexOf(')', spanEnd);
218
275
  if (parenEnd !== -1) {
@@ -221,200 +278,208 @@ function extractInlineValue(text, baseOffset = 0) {
221
278
  }
222
279
  }
223
280
 
224
- // Parse attributes
281
+ let attrs = null;
282
+ let attrsRange = null;
225
283
  const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
226
284
  if (attrsMatch) {
227
285
  attrs = `{${attrsMatch[1]}}`;
286
+ const braceIndex = attrsMatch[0].indexOf('{');
287
+ const absStart = baseOffset + spanEnd + (braceIndex >= 0 ? braceIndex : 0);
288
+ attrsRange = [absStart, absStart + attrs.length];
228
289
  spanEnd += attrsMatch[0].length;
229
290
  }
230
291
 
231
- spans.push({
232
- type: url ? 'link' : 'span',
233
- text: spanText,
234
- url: url,
292
+ let carrierType = 'span';
293
+ let resourceIRI = null;
294
+
295
+ if (url) {
296
+ if (url.startsWith('=')) {
297
+ pos = spanEnd;
298
+ continue;
299
+ } else {
300
+ carrierType = 'link';
301
+ resourceIRI = url;
302
+ }
303
+ }
304
+
305
+ carriers.push({
306
+ type: carrierType,
307
+ text: carrierText,
308
+ url: resourceIRI,
235
309
  attrs: attrs,
310
+ attrsRange,
311
+ valueRange,
236
312
  range: [baseOffset + bracketStart, baseOffset + spanEnd]
237
313
  });
238
314
 
239
315
  pos = spanEnd;
240
316
  }
241
317
 
242
- return spans.length ? spans : [{ type: 'text', text: text }];
318
+ return carriers;
243
319
  }
244
320
 
245
- // Core processing functions - consolidated
246
- function createBlock(subject, entries, range, ctx) {
247
- const expanded = entries.map(e => ({
248
- ...e,
249
- predicate: e.predicate ? expandIRI(e.predicate, ctx) : null,
250
- classIRI: e.classIRI ? expandIRI(e.classIRI, ctx) : null
251
- }));
252
-
253
- const blockId = hash([subject, ...expanded.map(e => JSON.stringify(e))].join('|'));
321
+ function createBlock(subject, types, predicates, entries, range, attrsRange, valueRange, carrierType, ctx) {
322
+ const expanded = {
323
+ subject,
324
+ types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
325
+ predicates: predicates.map(p => ({ iri: expandIRI(p.iri, ctx), form: p.form }))
326
+ };
327
+ const blockId = hash([subject, JSON.stringify(expanded)].join('|'));
254
328
  return {
255
329
  id: blockId,
256
330
  range: { start: range[0], end: range[1] },
331
+ attrsRange: attrsRange ? { start: attrsRange[0], end: attrsRange[1] } : null,
332
+ valueRange: valueRange ? { start: valueRange[0], end: valueRange[1] } : null,
333
+ carrierType: carrierType || null,
257
334
  subject,
258
- entries: expanded,
335
+ types: expanded.types,
336
+ predicates: expanded.predicates,
337
+ entries: entries || [],
259
338
  context: { ...ctx }
260
339
  };
261
340
  }
262
341
 
263
- function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory) {
342
+ function quadIndexKey(subject, predicate, object) {
343
+ const objKey = object.termType === 'Literal'
344
+ ? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
345
+ : JSON.stringify({ t: object.termType, v: object.value });
346
+ return JSON.stringify([subject.value, predicate.value, objKey]);
347
+ }
348
+
349
+ function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
264
350
  if (!subject || !predicate || !object) return;
265
351
  const quad = dataFactory.quad(subject, predicate, object);
266
352
  quads.push(quad);
267
- quadIndex.set(JSON.stringify([quad.subject.value, quad.predicate.value, quad.object.value]), blockId);
353
+ quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), meta ? { blockId, ...meta } : { blockId });
268
354
  }
269
355
 
270
- function createLiteralValue(value, datatype, language, context, dataFactory) {
356
+ function createLiteral(value, datatype, language, context, dataFactory) {
271
357
  if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
272
358
  if (language) return dataFactory.literal(value, language);
273
359
  return dataFactory.literal(value);
274
360
  }
275
361
 
276
- function processAnnotation(token, state, textContent = null) {
277
- if (!token.attrs) return;
278
-
279
- const ann = parseAnnotation(token.attrs);
280
- const originalSubject = state.currentSubject;
281
-
282
- // Handle subject declaration
283
- if (ann.subject === 'RESET') {
362
+ function processAnnotation(carrier, sem, state) {
363
+ if (sem.subject === 'RESET') {
284
364
  state.currentSubject = null;
285
365
  return;
286
366
  }
287
- if (ann.subject) {
288
- state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
289
- }
290
367
 
291
- if (!originalSubject && !ann.subject) return;
368
+ const previousSubject = state.currentSubject;
369
+ let newSubject = sem.subject ? state.df.namedNode(expandIRI(sem.subject, state.ctx)) : null;
370
+ if (newSubject) state.currentSubject = newSubject;
292
371
 
293
- const targetSubject = ann.subject ?
294
- state.df.namedNode(expandIRI(ann.subject, state.ctx)) :
295
- originalSubject;
372
+ const S = state.currentSubject;
373
+ if (!S) return;
296
374
 
297
- const block = createBlock(targetSubject.value, ann.entries, token.range, state.ctx);
375
+ const block = createBlock(S.value, sem.types, sem.predicates, sem.entries, carrier.range, carrier.attrsRange || null, carrier.valueRange || null, carrier.type || null, state.ctx);
298
376
  state.origin.blocks.set(block.id, block);
299
377
 
300
- // Handle list context types
301
- if (token.type === 'list' && state.listContext?.types.length > 0) {
302
- state.listContext.types.forEach(typeIRI => {
303
- emitQuad(state.quads, state.origin.quadIndex, block.id,
304
- targetSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
305
- state.df.namedNode(typeIRI), state.df);
306
- });
307
- }
378
+ const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
379
+ const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
308
380
 
309
- // Emit triples
310
- ann.entries.forEach(e => {
311
- if (e.kind === 'type') {
312
- const typeSubject = token.url ?
313
- state.df.namedNode(expandIRI(token.url, state.ctx)) : targetSubject;
314
- emitQuad(state.quads, state.origin.quadIndex, block.id,
315
- typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
316
- state.df.namedNode(expandIRI(e.classIRI, state.ctx)), state.df);
317
- } else if (e.kind === 'property' && e.predicate) {
318
- const predicate = state.df.namedNode(expandIRI(e.predicate, state.ctx));
319
- let object;
320
-
321
- if (token.url) {
322
- object = state.df.namedNode(expandIRI(token.url, state.ctx));
323
- } else if (ann.subject && !token.url) {
324
- if (e.direction === 'reverse') {
325
- object = targetSubject;
326
- } else {
327
- object = token.type === 'code' ?
328
- createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df) :
329
- targetSubject;
330
- }
331
- } else {
332
- object = createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df);
333
- }
334
-
335
- const subject = e.direction === 'reverse' ? object :
336
- (ann.subject && !token.url && token.type !== 'code') ? originalSubject : targetSubject;
337
- const objectRef = e.direction === 'reverse' ? originalSubject : object;
381
+ sem.types.forEach(t => {
382
+ const typeIRI = typeof t === 'string' ? t : t.iri;
383
+ const entryIndex = typeof t === 'string' ? null : t.entryIndex;
384
+ const typeSubject = O || S;
385
+ const expandedType = expandIRI(typeIRI, state.ctx);
386
+ emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
387
+ });
338
388
 
339
- emitQuad(state.quads, state.origin.quadIndex, block.id, subject, predicate, objectRef, state.df);
389
+ sem.predicates.forEach(pred => {
390
+ const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
391
+ const token = `${pred.form}${pred.iri}`;
392
+
393
+ if (pred.form === '') {
394
+ emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
395
+ } else if (pred.form === '?') {
396
+ if (newSubject) {
397
+ emitQuad(state.quads, state.origin.quadIndex, block.id, previousSubject, P, newSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
398
+ } else if (O) {
399
+ emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
400
+ }
401
+ } else if (pred.form === '^?') {
402
+ if (newSubject) {
403
+ emitQuad(state.quads, state.origin.quadIndex, block.id, newSubject, P, previousSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
404
+ } else if (O) {
405
+ emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
406
+ }
340
407
  }
341
408
  });
342
409
  }
343
410
 
344
- // List processing - simplified
345
- function setupListContext(token, state, nextToken) {
346
- if (!token.attrs || nextToken?.type !== 'list') return false;
411
+ function processListContext(contextSem, listTokens, state, contextSubject = null) {
412
+ if (!contextSubject) contextSubject = state.currentSubject;
347
413
 
348
- const ann = parseAnnotation(token.attrs);
349
- state.listContext = { predicate: null, types: [], reverse: false };
414
+ listTokens.forEach(listToken => {
415
+ const carriers = extractInlineCarriers(listToken.text, listToken.range[0]);
416
+ let itemSubject = null;
417
+ let itemSubjectCarrier = null;
350
418
 
351
- ann.entries.forEach(e => {
352
- if (e.kind === 'property') {
353
- state.listContext.predicate = expandIRI(e.predicate, state.ctx);
354
- state.listContext.reverse = e.direction === 'reverse';
419
+ if (listToken.attrs) {
420
+ const itemSem = parseSemanticBlock(listToken.attrs);
421
+ if (itemSem.subject && itemSem.subject !== 'RESET') {
422
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
423
+ itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
424
+ }
355
425
  }
356
- if (e.kind === 'type') {
357
- state.listContext.types.push(expandIRI(e.classIRI, state.ctx));
426
+
427
+ if (!itemSubject) {
428
+ for (const carrier of carriers) {
429
+ if (carrier.attrs) {
430
+ const itemSem = parseSemanticBlock(carrier.attrs);
431
+ if (itemSem.subject && itemSem.subject !== 'RESET') {
432
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
433
+ itemSubjectCarrier = carrier;
434
+ break;
435
+ }
436
+ }
437
+ }
358
438
  }
359
- });
360
- return true;
361
- }
362
439
 
363
- function processListItem(token, state) {
364
- const ann = parseAnnotation(token.attrs);
365
- const originalSubject = state.currentSubject;
440
+ if (!itemSubject) return;
366
441
 
367
- if (ann.subject) {
368
- state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
369
- }
442
+ contextSem.types.forEach(t => {
443
+ const typeIRI = typeof t === 'string' ? t : t.iri;
444
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandIRI(typeIRI, state.ctx)), state.df);
445
+ });
370
446
 
371
- // Process item properties
372
- ann.entries.forEach(e => {
373
- if (e.kind === 'type') {
374
- emitQuad(state.quads, state.origin.quadIndex, 'list-item',
375
- state.currentSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
376
- state.df.namedNode(expandIRI(e.classIRI, state.ctx)), state.df);
377
- } else if (e.kind === 'property' && e.predicate) {
378
- const predicate = state.df.namedNode(expandIRI(e.predicate, state.ctx));
379
- const object = createLiteralValue(token.text, ann.datatype, ann.language, state.ctx, state.df);
380
- emitQuad(state.quads, state.origin.quadIndex, 'list-item',
381
- state.currentSubject, predicate, object, state.df);
382
- }
383
- });
447
+ contextSem.predicates.forEach(pred => {
448
+ const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
449
+ if (pred.form === '^' || pred.form === '^?') {
450
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, P, contextSubject, state.df);
451
+ } else {
452
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context', contextSubject, P, itemSubject, state.df);
453
+ }
454
+ });
384
455
 
385
- // Process list context relationship
386
- if (state.listContext?.predicate && originalSubject) {
387
- const predicate = state.df.namedNode(expandIRI(state.listContext.predicate, state.ctx));
388
- if (state.listContext.reverse) {
389
- emitQuad(state.quads, state.origin.quadIndex, 'list-context',
390
- state.currentSubject, predicate, originalSubject, state.df);
391
- } else {
392
- emitQuad(state.quads, state.origin.quadIndex, 'list-context',
393
- originalSubject, predicate, state.currentSubject, state.df);
456
+ const prevSubject = state.currentSubject;
457
+ state.currentSubject = itemSubject;
458
+
459
+ if (listToken.attrs) {
460
+ const itemSem = parseSemanticBlock(listToken.attrs);
461
+ const carrier = { type: 'list', text: listToken.text, range: listToken.range, attrsRange: listToken.attrsRange || null, valueRange: listToken.valueRange || null };
462
+ processAnnotation(carrier, itemSem, state);
394
463
  }
395
- }
396
464
 
397
- // Apply list context types
398
- if (state.listContext?.types.length > 0 && ann.subject) {
399
- state.listContext.types.forEach(type => {
400
- emitQuad(state.quads, state.origin.quadIndex, 'list-item',
401
- state.currentSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
402
- state.df.namedNode(expandIRI(type, state.ctx)), state.df);
465
+ carriers.forEach(carrier => {
466
+ if (carrier.attrs) {
467
+ const itemSem = parseSemanticBlock(carrier.attrs);
468
+ processAnnotation(carrier, itemSem, state);
469
+ }
403
470
  });
404
- }
405
471
 
406
- state.currentSubject = originalSubject;
472
+ state.currentSubject = prevSubject;
473
+ });
407
474
  }
408
475
 
409
- // Main parsing function
410
476
  export function parse(text, options = {}) {
411
477
  const state = {
412
478
  ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
413
479
  df: options.dataFactory || DataFactory,
414
480
  quads: [],
415
481
  origin: { blocks: new Map(), quadIndex: new Map() },
416
- currentSubject: null,
417
- listContext: null
482
+ currentSubject: null
418
483
  };
419
484
 
420
485
  const tokens = scanTokens(text);
@@ -422,122 +487,494 @@ export function parse(text, options = {}) {
422
487
 
423
488
  for (let i = 0; i < tokens.length; i++) {
424
489
  const token = tokens[i];
425
- const nextToken = tokens[i + 1];
426
-
427
- switch (token.type) {
428
- case 'heading':
429
- processAnnotation(token, state, token.text);
430
- break;
431
- case 'code':
432
- processAnnotation(token, state, token.text);
433
- break;
434
- case 'para':
435
- if (setupListContext(token, state, nextToken)) break;
436
- // Regular paragraphs are NOT value carriers per spec
437
- // Only process spans and links within paragraphs
438
- if (state.currentSubject) {
439
- const spans = extractInlineValue(token.text, token.range[0]);
440
-
441
- // Process annotated spans (value carriers)
442
- spans.filter(s => s.type === 'span' && s.attrs)
443
- .forEach(span => processAnnotation(span, state, span.text));
444
-
445
- // Process spans where paragraph has annotation
446
- if (token.attrs) {
447
- spans.filter(s => s.type === 'span')
448
- .forEach(span => {
449
- // Attach paragraph's annotation to the span
450
- const spanWithAttrs = { ...span, attrs: token.attrs };
451
- processAnnotation(spanWithAttrs, state, span.text);
452
- });
490
+
491
+ if (token.type === 'heading' && token.attrs) {
492
+ const sem = parseSemanticBlock(token.attrs);
493
+ const carrier = { type: 'heading', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
494
+ processAnnotation(carrier, sem, state);
495
+ } else if (token.type === 'code' && token.attrs) {
496
+ const sem = parseSemanticBlock(token.attrs);
497
+ const carrier = { type: 'code', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
498
+ processAnnotation(carrier, sem, state);
499
+ } else if (token.type === 'blockquote' && token.attrs) {
500
+ const sem = parseSemanticBlock(token.attrs);
501
+ const carrier = { type: 'blockquote', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
502
+ processAnnotation(carrier, sem, state);
503
+ } else if (token.type === 'para') {
504
+ // Check for standalone subject declarations: {=iri} on its own line
505
+ const standaloneSubjectMatch = token.text.match(/^\s*\{=(.*?)\}\s*$/);
506
+ if (standaloneSubjectMatch) {
507
+ const sem = parseSemanticBlock(`{=${standaloneSubjectMatch[1]}}`);
508
+ const attrsStart = token.range[0] + token.text.indexOf('{=');
509
+ const attrsEnd = attrsStart + (standaloneSubjectMatch[1] ? standaloneSubjectMatch[1].length : 0);
510
+ processAnnotation({ type: 'standalone', text: '', range: token.range, attrsRange: [attrsStart, attrsEnd], valueRange: null }, sem, state);
511
+ }
512
+
513
+ const followingLists = [];
514
+ let j = i + 1;
515
+ while (j < tokens.length && tokens[j].type === 'list') {
516
+ followingLists.push(tokens[j]);
517
+ j++;
518
+ }
519
+
520
+ const contextMatch = token.text.match(/^(.+?)\s*\{([^}]+)\}$/);
521
+ if (contextMatch && followingLists.length > 0) {
522
+ const contextSem = parseSemanticBlock(`{${contextMatch[2]}}`);
523
+ let contextSubject = state.currentSubject;
524
+
525
+ // Always look for the most recent heading subject for context
526
+ for (let k = i - 1; k >= 0; k--) {
527
+ const prevToken = tokens[k];
528
+ if (prevToken.type === 'heading' && prevToken.attrs) {
529
+ const headingSem = parseSemanticBlock(prevToken.attrs);
530
+ if (headingSem.subject) {
531
+ contextSubject = state.df.namedNode(expandIRI(headingSem.subject, state.ctx));
532
+ break;
533
+ }
453
534
  }
535
+ }
536
+
537
+ processListContext(contextSem, followingLists, state, contextSubject);
538
+ i = j - 1;
539
+ continue;
540
+ }
454
541
 
455
- // Process links (value carriers)
456
- spans.filter(s => s.type === 'link')
457
- .forEach(link => processAnnotation(link, state, link.text));
542
+ const carriers = extractInlineCarriers(token.text, token.range[0]);
543
+ carriers.forEach(carrier => {
544
+ if (carrier.attrs) {
545
+ const sem = parseSemanticBlock(carrier.attrs);
546
+ processAnnotation(carrier, sem, state);
458
547
  }
459
- break;
460
- case 'list':
461
- if (state.listContext) processListItem(token, state);
462
- break;
463
- case 'blockquote':
464
- if (state.currentSubject) processAnnotation(token, state, token.text);
465
- break;
548
+ });
466
549
  }
467
550
  }
468
551
 
469
552
  return { quads: state.quads, origin: state.origin, context: state.ctx };
470
553
  }
471
554
 
472
- function shortenIRI(iri, ctx) {
555
+ export function shortenIRI(iri, ctx) {
473
556
  if (!iri || !iri.startsWith('http')) return iri;
474
-
475
- // Check @vocab first
476
- if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) {
477
- return iri.substring(ctx['@vocab'].length);
478
- }
479
-
480
- // Check prefixes
557
+ if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
481
558
  for (const [prefix, namespace] of Object.entries(ctx)) {
482
559
  if (prefix !== '@vocab' && iri.startsWith(namespace)) {
483
560
  return prefix + ':' + iri.substring(namespace.length);
484
561
  }
485
562
  }
486
-
487
- // No prefix found, return full IRI
488
563
  return iri;
489
564
  }
490
565
 
566
+ const serializeHelpers = {
567
+ readAttrsSpan: (block, text) => {
568
+ if (!block?.attrsRange) return null;
569
+ const { start, end } = block.attrsRange;
570
+ return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end > start)
571
+ ? { start, end, text: text.substring(start, end) }
572
+ : null;
573
+ },
574
+
575
+ readValueSpan: (block, text) => {
576
+ if (!block?.valueRange) return null;
577
+ const { start, end } = block.valueRange;
578
+ return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
579
+ ? { start, end, text: text.substring(start, end) }
580
+ : null;
581
+ },
582
+
583
+ normalizeAttrsTokens: (attrsText) => {
584
+ const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
585
+ return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
586
+ },
587
+
588
+ blockTokensFromEntries: (block) => block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null,
589
+
590
+ removeEntryAt: (block, entryIndex) => {
591
+ if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
592
+ return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
593
+ },
594
+
595
+ replaceLangDatatypeEntries: (block, lit, ctx) => {
596
+ if (!block?.entries) return null;
597
+ const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
598
+ const extras = [];
599
+ if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
600
+ const dt = lit?.datatype?.value;
601
+ if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
602
+ extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
603
+ }
604
+ return [...filtered, ...extras];
605
+ },
606
+
607
+ writeAttrsTokens: (tokens) => `{${tokens.join(' ').trim()}}`,
608
+
609
+ removeOneToken: (tokens, matchFn) => {
610
+ const i = tokens.findIndex(matchFn);
611
+ return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
612
+ },
613
+
614
+ normalizeQuad: (q) => {
615
+ if (!q) return null;
616
+ const { subject, predicate, object } = q;
617
+ if (object?.termType === 'Literal') {
618
+ const language = typeof object.language === 'string' ? object.language : '';
619
+ const datatype = object.datatype?.value || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' };
620
+ return { ...q, subject, predicate, object: { ...object, language, datatype } };
621
+ }
622
+ return { ...q, subject, predicate, object };
623
+ },
624
+
625
+ quadToKeyForOrigin: (q) => {
626
+ const nq = serializeHelpers.normalizeQuad(q);
627
+ return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
628
+ },
629
+
630
+ parseQuadIndexKey: (key) => {
631
+ try {
632
+ const [s, p, objKey] = JSON.parse(key);
633
+ return { s, p, o: JSON.parse(objKey) };
634
+ } catch {
635
+ return null;
636
+ }
637
+ },
638
+
639
+ sanitizeCarrierValueForBlock: (block, raw) => {
640
+ const s = String(raw ?? '');
641
+ const t = block?.carrierType;
642
+ if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
643
+ const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
644
+ return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
645
+ }
646
+ };
647
+
491
648
  export function serialize({ text, diff, origin, options = {} }) {
492
- if (!diff || (!diff.add?.length && !diff.delete?.length)) return { text, origin };
649
+ if (!diff || (!diff.add?.length && !diff.delete?.length)) {
650
+ const reparsed = parse(text, { context: options.context || {} });
651
+ return { text, origin: reparsed.origin };
652
+ }
493
653
 
654
+ const base = origin || parse(text, { context: options.context || {} }).origin;
494
655
  let result = text;
495
656
  const edits = [];
496
657
  const ctx = options.context || {};
497
658
 
498
- if (diff.delete) {
499
- diff.delete.forEach(quad => {
500
- const key = JSON.stringify([quad.subject.value, quad.predicate.value, quad.object.value]);
501
- const blockId = origin?.quadIndex.get(key);
502
- if (!blockId) return;
659
+ const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
660
+ for (const [k, entry] of base?.quadIndex || []) {
661
+ const parsed = serializeHelpers.parseQuadIndexKey(k);
662
+ if (!parsed) continue;
663
+ if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
664
+ if (parsed.o?.t !== 'Literal') continue;
665
+ if (parsed.o?.v !== literalValue) continue;
666
+ return entry;
667
+ }
668
+ return null;
669
+ };
670
+
671
+ const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
672
+ const out = [];
673
+ for (const [k, entry] of base?.quadIndex || []) {
674
+ const parsed = serializeHelpers.parseQuadIndexKey(k);
675
+ if (!parsed) continue;
676
+ if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
677
+ if (parsed.o?.t !== 'Literal') continue;
678
+ const blockId = entry?.blockId || entry;
679
+ const block = blockId ? base?.blocks?.get(blockId) : null;
680
+ if (block) out.push({ block, entry, obj: parsed.o });
681
+ }
682
+ return out;
683
+ };
684
+
685
+ const objectSignature = (o) => {
686
+ if (!o) return '';
687
+ if (o.termType === 'Literal') {
688
+ return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
689
+ }
690
+ return JSON.stringify({ t: o.termType, v: o.value });
691
+ };
692
+
693
+ const anchors = new Map();
694
+ for (const q0 of diff.delete || []) {
695
+ const q = serializeHelpers.normalizeQuad(q0);
696
+ if (!q) continue;
697
+ if (!q?.subject || !q?.object || !q?.predicate) continue;
698
+ const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
699
+ const qk = serializeHelpers.quadToKeyForOrigin(q);
700
+ const entry = qk ? base?.quadIndex?.get(qk) : null;
701
+ const blockId = entry?.blockId || entry;
702
+ const block = blockId ? base?.blocks?.get(blockId) : null;
703
+ if (!block?.attrsRange) continue;
704
+ anchors.set(key, { block, entry });
705
+ }
706
+
707
+ const addBySP = new Map();
708
+ for (const q0 of diff.add || []) {
709
+ const q = serializeHelpers.normalizeQuad(q0);
710
+ if (!q) continue;
711
+ if (!q?.subject || !q?.predicate || !q?.object) continue;
712
+ const k = JSON.stringify([q.subject.value, q.predicate.value]);
713
+ const list = addBySP.get(k) || [];
714
+ list.push(q);
715
+ addBySP.set(k, list);
716
+ }
503
717
 
504
- const block = origin.blocks.get(blockId);
505
- if (!block) return;
718
+ const consumedAdds = new Set();
719
+ const literalUpdates = [];
720
+ for (const dq0 of diff.delete || []) {
721
+ const dq = serializeHelpers.normalizeQuad(dq0);
722
+ if (!dq) continue;
723
+ if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
724
+ if (dq.object.termType !== 'Literal') continue;
725
+ const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
726
+ const candidates = addBySP.get(k) || [];
727
+ const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(serializeHelpers.quadToKeyForOrigin(x)));
728
+ if (!aq) continue;
729
+
730
+ const dqk = serializeHelpers.quadToKeyForOrigin(dq);
731
+ let entry = dqk ? base?.quadIndex?.get(dqk) : null;
732
+ if (!entry && dq.object?.termType === 'Literal') {
733
+ entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
734
+ }
735
+ const blockId = entry?.blockId || entry;
736
+ const block = blockId ? base?.blocks?.get(blockId) : null;
737
+ if (!block) continue;
738
+
739
+ literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
740
+ consumedAdds.add(serializeHelpers.quadToKeyForOrigin(aq));
741
+ }
506
742
 
507
- const start = block.range.start;
508
- const end = block.range.end;
509
- const before = text.substring(Math.max(0, start - 1), start);
510
- const after = text.substring(end, end + 1);
511
- const deleteStart = before === '\n' ? start - 1 : start;
512
- const deleteEnd = after === '\n' ? end + 1 : end;
743
+ for (const q0 of diff.add || []) {
744
+ const quad = serializeHelpers.normalizeQuad(q0);
745
+ if (!quad || quad.object?.termType !== 'Literal') continue;
746
+ if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) continue;
513
747
 
514
- edits.push({ start: deleteStart, end: deleteEnd, text: '' });
748
+ const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
749
+ if (matches.length === 0) continue;
750
+
751
+ const desiredLang = quad.object.language || '';
752
+ const sameLang = matches.filter(m => {
753
+ const entries = m.block?.entries || [];
754
+ const langEntry = entries.find(e => e.kind === 'language');
755
+ const lang = langEntry?.language || '';
756
+ return lang === desiredLang;
515
757
  });
758
+
759
+ if (sameLang.length !== 1) continue;
760
+ const target = sameLang[0].block;
761
+ const vSpan = serializeHelpers.readValueSpan(target, text);
762
+ if (!vSpan) continue;
763
+
764
+ const newValue = serializeHelpers.sanitizeCarrierValueForBlock(target, quad.object.value);
765
+ edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
766
+
767
+ const aSpan = serializeHelpers.readAttrsSpan(target, text);
768
+ if (aSpan && target?.entries?.length) {
769
+ const nextEntries = serializeHelpers.replaceLangDatatypeEntries(target, quad.object, ctx);
770
+ if (nextEntries) {
771
+ const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
772
+ edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
773
+ }
774
+ }
775
+
776
+ consumedAdds.add(quad);
516
777
  }
517
778
 
518
- if (diff.add) {
519
- diff.add.forEach(quad => {
520
- let insertPos = result.length;
779
+ for (const u of literalUpdates) {
780
+ const span = serializeHelpers.readValueSpan(u.block, text);
781
+ if (span) {
782
+ const newValue = serializeHelpers.sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
783
+ edits.push({ start: span.start, end: span.end, text: newValue });
784
+ }
521
785
 
522
- for (const [, block] of origin?.blocks || []) {
523
- if (block.subject === quad.subject.value) {
524
- insertPos = block.range.end;
525
- break;
786
+ const aSpan = serializeHelpers.readAttrsSpan(u.block, text);
787
+ if (aSpan) {
788
+ if (u.block?.entries?.length) {
789
+ const nextEntries = serializeHelpers.replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
790
+ if (nextEntries) {
791
+ const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
792
+ if (nextTokens.length === 0) {
793
+ edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
794
+ } else {
795
+ edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
796
+ }
797
+ }
798
+ } else {
799
+ const tokens = serializeHelpers.normalizeAttrsTokens(aSpan.text);
800
+ const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object);
801
+ if (updated.join(' ') !== tokens.join(' ')) {
802
+ if (updated.length === 0) {
803
+ edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
804
+ } else {
805
+ edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(updated) });
806
+ }
526
807
  }
527
808
  }
809
+ }
810
+ }
811
+
812
+ const updateAttrsDatatypeLang = (tokens, newLit) => {
813
+ const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
814
+ if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
815
+ const dt = newLit?.datatype?.value;
816
+ if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
817
+ return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
818
+ }
819
+ return predicatesAndTypes;
820
+ };
528
821
 
529
- const pred = shortenIRI(quad.predicate.value, ctx);
530
- let objText;
822
+ if (diff.delete) {
823
+ diff.delete.forEach(q0 => {
824
+ const quad = serializeHelpers.normalizeQuad(q0);
825
+ if (!quad) return;
826
+ if (!quad?.subject || !quad?.predicate || !quad?.object) return;
531
827
 
532
828
  if (quad.object.termType === 'Literal') {
533
- objText = quad.object.value;
829
+ const isUpdated = literalUpdates.some(u =>
830
+ u.deleteQuad.subject.value === quad.subject.value &&
831
+ u.deleteQuad.predicate.value === quad.predicate.value &&
832
+ u.deleteQuad.object.value === quad.object.value
833
+ );
834
+ if (isUpdated) return;
835
+ }
836
+
837
+ const key = serializeHelpers.quadToKeyForOrigin(quad);
838
+ let entry = key ? base?.quadIndex?.get(key) : null;
839
+ if (!entry && quad.object?.termType === 'Literal') {
840
+ entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
841
+ }
842
+ const blockId = entry?.blockId || entry;
843
+ if (!blockId) return;
844
+ const block = base?.blocks?.get(blockId);
845
+ const span = serializeHelpers.readAttrsSpan(block, text);
846
+ if (!span) return;
847
+
848
+ if (entry?.entryIndex != null && block?.entries?.length) {
849
+ const nextEntries = serializeHelpers.removeEntryAt(block, entry.entryIndex);
850
+ if (!nextEntries) return;
851
+ const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
852
+ if (nextTokens.length === 0) {
853
+ edits.push({ start: span.start, end: span.end, text: '{}' });
854
+ } else {
855
+ edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
856
+ }
857
+ return;
858
+ }
859
+
860
+ const tokens = serializeHelpers.normalizeAttrsTokens(span.text);
861
+ let updated = tokens;
862
+ let removed = false;
863
+
864
+ if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
865
+ const expectedType = entry.expandedType || quad.object.value;
866
+ ({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
867
+ if (!t.startsWith('.')) return false;
868
+ const raw = t.slice(1);
869
+ return expandIRI(raw, ctx) === expectedType;
870
+ }));
534
871
  } else {
535
- objText = shortenIRI(quad.object.value, ctx);
872
+ const expectedPred = entry?.expandedPredicate || quad.predicate.value;
873
+ const expectedForm = entry?.form;
874
+ ({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
875
+ const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
876
+ if (!m) return false;
877
+ const form = m[1] || '';
878
+ const raw = m[2];
879
+ if (expectedForm != null && form !== expectedForm) return false;
880
+ return expandIRI(raw, ctx) === expectedPred;
881
+ }));
536
882
  }
537
883
 
538
- const newLine = `\n[${objText}] {${pred}}`;
884
+ if (!removed) return;
885
+
886
+ if (updated.length === 0) {
887
+ edits.push({ start: span.start, end: span.end, text: '{}' });
888
+ return;
889
+ }
890
+
891
+ const newAttrs = serializeHelpers.writeAttrsTokens(updated);
892
+ edits.push({ start: span.start, end: span.end, text: newAttrs });
893
+ });
894
+ }
895
+
896
+ if (diff.add) {
897
+ diff.add.forEach(q0 => {
898
+ const quad = serializeHelpers.normalizeQuad(q0);
899
+ if (!quad) return;
900
+ if (!quad?.subject || !quad?.predicate || !quad?.object) return;
901
+
902
+ if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) return;
903
+
904
+ const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
905
+ const anchored = anchors.get(anchorKey) || null;
906
+ let targetBlock = anchored?.block || null;
907
+
908
+ if (!targetBlock) {
909
+ for (const [, block] of base?.blocks || []) {
910
+ if (block.subject === quad.subject.value && block.attrsRange) {
911
+ targetBlock = block;
912
+ break;
913
+ }
914
+ }
915
+ }
916
+
917
+ if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
918
+ if (!targetBlock) {
919
+ const predShort = shortenIRI(quad.predicate.value, ctx);
920
+ if (quad.object.termType === 'Literal') {
921
+ const value = String(quad.object.value ?? '');
922
+ let ann = predShort;
923
+ if (quad.object.language) ann += ` @${quad.object.language}`;
924
+ else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
925
+ ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
926
+ }
927
+ edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
928
+ } else {
929
+ const full = quad.object.value;
930
+ const label = shortenIRI(full, ctx);
931
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
932
+ }
933
+ return;
934
+ }
935
+
936
+ const predShort = shortenIRI(quad.predicate.value, ctx);
937
+ if (quad.object.termType === 'Literal') {
938
+ const value = String(quad.object.value ?? '');
939
+ let ann = predShort;
940
+ if (quad.object.language) ann += ` @${quad.object.language}`;
941
+ else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
942
+ ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
943
+ }
944
+ edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
945
+ return;
946
+ }
947
+
948
+ if (quad.object.termType === 'NamedNode') {
949
+ const full = quad.object.value;
950
+ const label = shortenIRI(full, ctx);
951
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
952
+ return;
953
+ }
954
+ }
955
+
956
+ const span = serializeHelpers.readAttrsSpan(targetBlock, text);
957
+ if (!span) return;
958
+ const tokens = serializeHelpers.blockTokensFromEntries(targetBlock) || serializeHelpers.normalizeAttrsTokens(span.text);
959
+
960
+ if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
961
+ const typeShort = shortenIRI(quad.object.value, ctx);
962
+ const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
963
+ if (!typeToken) return;
964
+ if (tokens.includes(typeToken)) return;
965
+ const updated = [...tokens, typeToken];
966
+ edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
967
+ return;
968
+ }
539
969
 
540
- edits.push({ start: insertPos, end: insertPos, text: newLine });
970
+ const form = anchored?.entry?.form;
971
+ if (form == null) return;
972
+ const predShort = shortenIRI(quad.predicate.value, ctx);
973
+ const predToken = `${form}${predShort}`;
974
+ if (!predToken) return;
975
+ if (tokens.includes(predToken)) return;
976
+ const updated = [...tokens, predToken];
977
+ edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
541
978
  });
542
979
  }
543
980
 
@@ -546,7 +983,8 @@ export function serialize({ text, diff, origin, options = {} }) {
546
983
  result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
547
984
  });
548
985
 
549
- return { text: result, origin };
986
+ const reparsed = parse(result, { context: options.context || {} });
987
+ return { text: result, origin: reparsed.origin };
550
988
  }
551
989
 
552
- export default { parse, serialize, parseAnnotation };
990
+ export default { parse, serialize, parseSemanticBlock, shortenIRI };