mdld-parse 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +22 -27
  2. package/index.js +775 -201
  3. package/package.json +1 -1
package/index.js CHANGED
@@ -24,9 +24,11 @@ function hash(str) {
24
24
  return Math.abs(h).toString(16).slice(0, 12);
25
25
  }
26
26
 
27
+ // IRI Utilities
27
28
  function expandIRI(term, ctx) {
28
- if (!term) return null;
29
- const t = term.trim();
29
+ if (term == null) return null;
30
+ const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
31
+ const t = raw.trim();
30
32
  if (t.match(/^https?:/)) return t;
31
33
  if (t.includes(':')) {
32
34
  const [prefix, ref] = t.split(':', 2);
@@ -35,40 +37,100 @@ function expandIRI(term, ctx) {
35
37
  return (ctx['@vocab'] || '') + t;
36
38
  }
37
39
 
40
+ export function shortenIRI(iri, ctx) {
41
+ if (!iri || !iri.startsWith('http')) return iri;
42
+ if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
43
+ for (const [prefix, namespace] of Object.entries(ctx)) {
44
+ if (prefix !== '@vocab' && iri.startsWith(namespace)) {
45
+ return prefix + ':' + iri.substring(namespace.length);
46
+ }
47
+ }
48
+ return iri;
49
+ }
50
+
51
+ function processIRI(term, ctx, operation = 'expand') {
52
+ return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
53
+ }
54
+
38
55
  function parseSemanticBlock(raw) {
39
56
  try {
40
- const cleaned = raw.replace(/^\{|\}$/g, '').trim();
41
- if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null };
42
-
43
- const result = { subject: null, types: [], predicates: [], datatype: null, language: null };
44
- const tokens = cleaned.split(/\s+/).filter(t => t);
57
+ const src = String(raw || '').trim();
58
+ const cleaned = src.replace(/^\{|\}$/g, '').trim();
59
+ if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
60
+
61
+ const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
62
+ const re = /\S+/g;
63
+ let m;
64
+ while ((m = re.exec(cleaned)) !== null) {
65
+ const token = m[0];
66
+ const relStart = 1 + m.index;
67
+ const relEnd = relStart + token.length;
68
+ const entryIndex = result.entries.length;
45
69
 
46
- for (const token of tokens) {
47
70
  if (token === '=') {
48
71
  result.subject = 'RESET';
49
- } else if (token.startsWith('=')) {
50
- result.subject = token.substring(1);
51
- } else if (token.startsWith('^^')) {
52
- result.datatype = token.substring(2);
53
- } else if (token.startsWith('@')) {
54
- result.language = token.substring(1);
55
- } else if (token.startsWith('.')) {
56
- result.types.push(token.substring(1));
57
- } else if (token.startsWith('^?')) {
58
- result.predicates.push({ iri: token.substring(2), form: '^?' });
59
- } else if (token.startsWith('^')) {
60
- result.predicates.push({ iri: token.substring(1), form: '^' });
61
- } else if (token.startsWith('?')) {
62
- result.predicates.push({ iri: token.substring(1), form: '?' });
63
- } else {
64
- result.predicates.push({ iri: token, form: '' });
72
+ result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
73
+ continue;
74
+ }
75
+
76
+ if (token.startsWith('=')) {
77
+ const iri = token.substring(1);
78
+ result.subject = iri;
79
+ result.entries.push({ kind: 'subject', iri, relRange: { start: relStart, end: relEnd }, raw: token });
80
+ continue;
81
+ }
82
+
83
+ if (token.startsWith('^^')) {
84
+ const datatype = token.substring(2);
85
+ if (!result.language) result.datatype = datatype;
86
+ result.entries.push({ kind: 'datatype', datatype, relRange: { start: relStart, end: relEnd }, raw: token });
87
+ continue;
88
+ }
89
+
90
+ if (token.startsWith('@')) {
91
+ const language = token.substring(1);
92
+ result.language = language;
93
+ result.datatype = null;
94
+ result.entries.push({ kind: 'language', language, relRange: { start: relStart, end: relEnd }, raw: token });
95
+ continue;
96
+ }
97
+
98
+ if (token.startsWith('.')) {
99
+ const classIRI = token.substring(1);
100
+ result.types.push({ iri: classIRI, entryIndex });
101
+ result.entries.push({ kind: 'type', iri: classIRI, relRange: { start: relStart, end: relEnd }, raw: token });
102
+ continue;
103
+ }
104
+
105
+ if (token.startsWith('^?')) {
106
+ const iri = token.substring(2);
107
+ result.predicates.push({ iri, form: '^?', entryIndex });
108
+ result.entries.push({ kind: 'property', iri, form: '^?', relRange: { start: relStart, end: relEnd }, raw: token });
109
+ continue;
110
+ }
111
+
112
+ if (token.startsWith('^')) {
113
+ const iri = token.substring(1);
114
+ result.predicates.push({ iri, form: '^', entryIndex });
115
+ result.entries.push({ kind: 'property', iri, form: '^', relRange: { start: relStart, end: relEnd }, raw: token });
116
+ continue;
65
117
  }
118
+
119
+ if (token.startsWith('?')) {
120
+ const iri = token.substring(1);
121
+ result.predicates.push({ iri, form: '?', entryIndex });
122
+ result.entries.push({ kind: 'property', iri, form: '?', relRange: { start: relStart, end: relEnd }, raw: token });
123
+ continue;
124
+ }
125
+
126
+ result.predicates.push({ iri: token, form: '', entryIndex });
127
+ result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
66
128
  }
67
129
 
68
130
  return result;
69
131
  } catch (error) {
70
132
  console.error(`Error parsing semantic block ${raw}:`, error);
71
- return { subject: null, types: [], predicates: [], datatype: null, language: null };
133
+ return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
72
134
  }
73
135
  }
74
136
 
@@ -86,20 +148,29 @@ function scanTokens(text) {
86
148
  if (line.startsWith('```')) {
87
149
  if (!codeBlock) {
88
150
  const fence = line.match(/^(`{3,})(.*)/);
151
+ const attrsText = fence[2].match(/\{[^}]+\}/)?.[0] || null;
152
+ const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
153
+ const contentStart = lineStart + line.length + 1;
89
154
  codeBlock = {
90
155
  fence: fence[1],
91
156
  start: lineStart,
92
157
  content: [],
93
158
  lang: fence[2].trim().split(/[\s{]/)[0],
94
- attrs: fence[2].match(/\{[^}]+\}/)?.[0]
159
+ attrs: attrsText,
160
+ attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
161
+ valueRangeStart: contentStart
95
162
  };
96
163
  } else if (line.startsWith(codeBlock.fence)) {
164
+ const valueStart = codeBlock.valueRangeStart;
165
+ const valueEnd = Math.max(valueStart, lineStart - 1);
97
166
  tokens.push({
98
167
  type: 'code',
99
168
  range: [codeBlock.start, lineStart],
100
169
  text: codeBlock.content.join('\n'),
101
170
  lang: codeBlock.lang,
102
- attrs: codeBlock.attrs
171
+ attrs: codeBlock.attrs,
172
+ attrsRange: codeBlock.attrsRange,
173
+ valueRange: [valueStart, valueEnd]
103
174
  });
104
175
  codeBlock = null;
105
176
  }
@@ -119,35 +190,58 @@ function scanTokens(text) {
119
190
 
120
191
  const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
121
192
  if (headingMatch) {
193
+ const attrs = headingMatch[3] || null;
194
+ const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
195
+ const afterHashes = headingMatch[1].length;
196
+ const ws = line.substring(afterHashes).match(/^\s+/)?.[0]?.length || 0;
197
+ const valueStartInLine = afterHashes + ws;
198
+ const valueEndInLine = valueStartInLine + headingMatch[2].length;
122
199
  tokens.push({
123
200
  type: 'heading',
124
201
  depth: headingMatch[1].length,
125
202
  range: [lineStart, pos - 1],
126
203
  text: headingMatch[2].trim(),
127
- attrs: headingMatch[3]
204
+ attrs,
205
+ attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
206
+ valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
128
207
  });
129
208
  continue;
130
209
  }
131
210
 
132
211
  const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
133
212
  if (listMatch) {
213
+ const attrs = listMatch[4] || null;
214
+ const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
215
+ const prefix = listMatch[1].length + listMatch[2].length;
216
+ const ws = line.substring(prefix).match(/^\s+/)?.[0]?.length || 0;
217
+ const valueStartInLine = prefix + ws;
218
+ const valueEndInLine = valueStartInLine + listMatch[3].length;
134
219
  tokens.push({
135
220
  type: 'list',
136
221
  indent: listMatch[1].length,
137
222
  range: [lineStart, pos - 1],
138
223
  text: listMatch[3].trim(),
139
- attrs: listMatch[4]
224
+ attrs,
225
+ attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
226
+ valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
140
227
  });
141
228
  continue;
142
229
  }
143
230
 
144
231
  const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
145
232
  if (blockquoteMatch) {
233
+ const attrs = blockquoteMatch[2] || null;
234
+ const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
235
+ const prefixMatch = line.match(/^>\s+/);
236
+ const valueStartInLine = prefixMatch ? prefixMatch[0].length : 2;
237
+ const valueEndInLine = valueStartInLine + blockquoteMatch[1].length;
146
238
  tokens.push({
147
239
  type: 'blockquote',
148
240
  range: [lineStart, pos - 1],
149
241
  text: blockquoteMatch[1].trim(),
150
- attrs: blockquoteMatch[2]
242
+ attrs,
243
+ attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
244
+ valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
151
245
  });
152
246
  continue;
153
247
  }
@@ -165,7 +259,6 @@ function scanTokens(text) {
165
259
  return tokens;
166
260
  }
167
261
 
168
- // Extract inline carriers: [text] {attrs}, [text](url) {attrs}, [text](=iri) {attrs}
169
262
  function extractInlineCarriers(text, baseOffset = 0) {
170
263
  const carriers = [];
171
264
  let pos = 0;
@@ -174,14 +267,25 @@ function extractInlineCarriers(text, baseOffset = 0) {
174
267
  const bracketStart = text.indexOf('[', pos);
175
268
  if (bracketStart === -1) break;
176
269
 
177
- const bracketEnd = text.indexOf(']', bracketStart);
178
- if (bracketEnd === -1) break;
270
+ let bracketDepth = 1;
271
+ let bracketEnd = bracketStart + 1;
179
272
 
180
- const carrierText = text.substring(bracketStart + 1, bracketEnd);
181
- let spanEnd = bracketEnd + 1;
273
+ while (bracketEnd < text.length && bracketDepth > 0) {
274
+ if (text[bracketEnd] === '[') {
275
+ bracketDepth++;
276
+ } else if (text[bracketEnd] === ']') {
277
+ bracketDepth--;
278
+ }
279
+ bracketEnd++;
280
+ }
281
+
282
+ if (bracketDepth > 0) break;
283
+
284
+ const carrierText = text.substring(bracketStart + 1, bracketEnd - 1);
285
+ const valueRange = [baseOffset + bracketStart + 1, baseOffset + bracketEnd - 1];
286
+ let spanEnd = bracketEnd;
182
287
  let url = null;
183
288
 
184
- // Check for (url) or (=iri)
185
289
  if (text[spanEnd] === '(') {
186
290
  const parenEnd = text.indexOf(')', spanEnd);
187
291
  if (parenEnd !== -1) {
@@ -190,22 +294,24 @@ function extractInlineCarriers(text, baseOffset = 0) {
190
294
  }
191
295
  }
192
296
 
193
- // Check for {attrs}
194
297
  let attrs = null;
298
+ let attrsRange = null;
195
299
  const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
196
300
  if (attrsMatch) {
197
301
  attrs = `{${attrsMatch[1]}}`;
302
+ const braceIndex = attrsMatch[0].indexOf('{');
303
+ const absStart = baseOffset + spanEnd + (braceIndex >= 0 ? braceIndex : 0);
304
+ attrsRange = [absStart, absStart + attrs.length];
198
305
  spanEnd += attrsMatch[0].length;
199
306
  }
200
307
 
201
- // Determine type and resource
202
308
  let carrierType = 'span';
203
309
  let resourceIRI = null;
204
310
 
205
311
  if (url) {
206
312
  if (url.startsWith('=')) {
207
- carrierType = 'resource';
208
- resourceIRI = url.substring(1);
313
+ pos = spanEnd;
314
+ continue;
209
315
  } else {
210
316
  carrierType = 'link';
211
317
  resourceIRI = url;
@@ -217,6 +323,8 @@ function extractInlineCarriers(text, baseOffset = 0) {
217
323
  text: carrierText,
218
324
  url: resourceIRI,
219
325
  attrs: attrs,
326
+ attrsRange,
327
+ valueRange,
220
328
  range: [baseOffset + bracketStart, baseOffset + spanEnd]
221
329
  });
222
330
 
@@ -226,191 +334,237 @@ function extractInlineCarriers(text, baseOffset = 0) {
226
334
  return carriers;
227
335
  }
228
336
 
229
- function createBlock(subject, types, predicates, range, ctx) {
337
+ function createBlock(subject, types, predicates, entries, range, attrsRange, valueRange, carrierType, ctx) {
230
338
  const expanded = {
231
- subject: subject,
232
- types: types.map(t => expandIRI(t, ctx)),
233
- predicates: predicates.map(p => ({
234
- iri: expandIRI(p.iri, ctx),
235
- form: p.form
236
- }))
339
+ subject,
340
+ types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
341
+ predicates: predicates.map(p => ({ iri: expandIRI(p.iri, ctx), form: p.form }))
237
342
  };
238
-
239
343
  const blockId = hash([subject, JSON.stringify(expanded)].join('|'));
240
344
  return {
241
345
  id: blockId,
242
346
  range: { start: range[0], end: range[1] },
347
+ attrsRange: attrsRange ? { start: attrsRange[0], end: attrsRange[1] } : null,
348
+ valueRange: valueRange ? { start: valueRange[0], end: valueRange[1] } : null,
349
+ carrierType: carrierType || null,
243
350
  subject,
244
351
  types: expanded.types,
245
352
  predicates: expanded.predicates,
353
+ entries: entries || [],
246
354
  context: { ...ctx }
247
355
  };
248
356
  }
249
357
 
250
- function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory) {
358
+ // Quad Utilities
359
+ function quadIndexKey(subject, predicate, object) {
360
+ const objKey = object.termType === 'Literal'
361
+ ? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
362
+ : JSON.stringify({ t: object.termType, v: object.value });
363
+ return JSON.stringify([subject.value, predicate.value, objKey]);
364
+ }
365
+
366
+ function normalizeQuad(q) {
367
+ if (!q) return null;
368
+ const { subject, predicate, object } = q;
369
+ if (object?.termType === 'Literal') {
370
+ const language = typeof object.language === 'string' ? object.language : '';
371
+ const datatype = object.datatype?.value || 'http://www.w3.org/2001/XMLSchema#string';
372
+ return { ...q, subject, predicate, object: { ...object, language, datatype } };
373
+ }
374
+ return { ...q, subject, predicate, object };
375
+ }
376
+
377
+ function objectSignature(o) {
378
+ if (!o) return '';
379
+ if (o.termType === 'Literal') {
380
+ return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
381
+ }
382
+ return JSON.stringify({ t: o.termType, v: o.value });
383
+ }
384
+
385
+ function quadToKeyForOrigin(q) {
386
+ const nq = normalizeQuad(q);
387
+ return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
388
+ }
389
+
390
+ function parseQuadIndexKey(key) {
391
+ try {
392
+ const [s, p, objKey] = JSON.parse(key);
393
+ return { s, p, o: JSON.parse(objKey) };
394
+ } catch {
395
+ return null;
396
+ }
397
+ }
398
+
399
+ // Semantic Slot Utilities
400
+ function createSemanticSlotId(subject, predicate) {
401
+ return hash(`${subject.value}|${predicate.value}`);
402
+ }
403
+
404
+ function createSlotInfo(blockId, entryIndex, meta = {}) {
405
+ const slotId = meta.subject && meta.predicate ? createSemanticSlotId(meta.subject, meta.predicate) : null;
406
+ return {
407
+ blockId,
408
+ entryIndex,
409
+ slotId,
410
+ isVacant: false,
411
+ lastValue: null,
412
+ vacantSince: null,
413
+ ...meta
414
+ };
415
+ }
416
+
417
+ function markSlotAsVacant(slotInfo, deletedValue) {
418
+ if (!slotInfo) return null;
419
+ return {
420
+ ...slotInfo,
421
+ isVacant: true,
422
+ lastValue: deletedValue,
423
+ vacantSince: Date.now()
424
+ };
425
+ }
426
+
427
+ function findVacantSlot(quadIndex, subject, predicate) {
428
+ const targetSlotId = createSemanticSlotId(subject, predicate);
429
+ return Array.from(quadIndex.values())
430
+ .find(slot => slot.slotId === targetSlotId && slot.isVacant);
431
+ }
432
+
433
+ function occupySlot(slotInfo, newValue) {
434
+ if (!slotInfo || !slotInfo.isVacant) return null;
435
+ return {
436
+ ...slotInfo,
437
+ isVacant: false,
438
+ lastValue: newValue,
439
+ vacantSince: null
440
+ };
441
+ }
442
+
443
+ function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
251
444
  if (!subject || !predicate || !object) return;
252
445
  const quad = dataFactory.quad(subject, predicate, object);
253
446
  quads.push(quad);
254
- const key = JSON.stringify([
255
- quad.subject.value,
256
- quad.predicate.value,
257
- quad.object.termType === 'Literal' ? quad.object.value : quad.object.value
258
- ]);
259
- quadIndex.set(key, blockId);
447
+
448
+ // Create enhanced slot info with semantic slot tracking
449
+ const slotInfo = createSlotInfo(blockId, meta?.entryIndex, {
450
+ ...meta,
451
+ subject,
452
+ predicate,
453
+ object
454
+ });
455
+
456
+ quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), slotInfo);
260
457
  }
261
458
 
262
459
  function createLiteral(value, datatype, language, context, dataFactory) {
263
- if (datatype) {
264
- return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
265
- }
266
- if (language) {
267
- return dataFactory.literal(value, language);
268
- }
460
+ if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
461
+ if (language) return dataFactory.literal(value, language);
269
462
  return dataFactory.literal(value);
270
463
  }
271
464
 
272
- // Core processing: handle subject/type declarations and property emissions
273
465
  function processAnnotation(carrier, sem, state) {
274
- // §6.1 Subject declaration
275
466
  if (sem.subject === 'RESET') {
276
467
  state.currentSubject = null;
277
468
  return;
278
469
  }
279
470
 
280
- if (sem.subject) {
281
- state.currentSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
282
- }
471
+ const previousSubject = state.currentSubject;
472
+ let newSubject = sem.subject ? state.df.namedNode(expandIRI(sem.subject, state.ctx)) : null;
473
+ if (newSubject) state.currentSubject = newSubject;
283
474
 
284
- // Determine the subject for emissions
285
475
  const S = state.currentSubject;
286
- if (!S) return; // Need a subject to emit anything
287
-
288
- // Create origin block
289
- const block = createBlock(
290
- S.value,
291
- sem.types,
292
- sem.predicates,
293
- carrier.range,
294
- state.ctx
295
- );
476
+ if (!S) return;
477
+
478
+ const block = createBlock(S.value, sem.types, sem.predicates, sem.entries, carrier.range, carrier.attrsRange || null, carrier.valueRange || null, carrier.type || null, state.ctx);
296
479
  state.origin.blocks.set(block.id, block);
297
480
 
298
- // Extract L (literal) and O (object IRI)
299
481
  const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
300
482
  const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
301
483
 
302
- // §7 Emit type triples
303
- sem.types.forEach(typeIRI => {
484
+ sem.types.forEach(t => {
485
+ const typeIRI = typeof t === 'string' ? t : t.iri;
486
+ const entryIndex = typeof t === 'string' ? null : t.entryIndex;
304
487
  const typeSubject = O || S;
305
- emitQuad(
306
- state.quads,
307
- state.origin.quadIndex,
308
- block.id,
309
- typeSubject,
310
- state.df.namedNode(expandIRI('rdf:type', state.ctx)),
311
- state.df.namedNode(expandIRI(typeIRI, state.ctx)),
312
- state.df
313
- );
488
+ const expandedType = expandIRI(typeIRI, state.ctx);
489
+ emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
314
490
  });
315
491
 
316
- // §8 Emit predicate triples (routing table)
317
492
  sem.predicates.forEach(pred => {
318
493
  const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
494
+ const token = `${pred.form}${pred.iri}`;
319
495
 
320
496
  if (pred.form === '') {
321
- // p: S L
322
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df);
497
+ emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
323
498
  } else if (pred.form === '?') {
324
- // ?p: S → O
325
- if (O) {
326
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df);
499
+ if (newSubject) {
500
+ emitQuad(state.quads, state.origin.quadIndex, block.id, previousSubject, P, newSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
501
+ } else if (O) {
502
+ emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
327
503
  }
328
- } else if (pred.form === '^') {
329
- // ^p: reverse literal (L → S impossible, emit nothing per spec)
330
- // Note: Some interpretations might emit S → S or skip
331
504
  } else if (pred.form === '^?') {
332
- // ^?p: O → S
333
- if (O) {
334
- emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df);
505
+ if (newSubject) {
506
+ emitQuad(state.quads, state.origin.quadIndex, block.id, newSubject, P, previousSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
507
+ } else if (O) {
508
+ emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
335
509
  }
336
510
  }
337
511
  });
338
512
  }
339
513
 
340
- // Process list with context annotation
341
- function processListContext(contextSem, listTokens, state) {
342
- const contextSubject = state.currentSubject;
343
- if (!contextSubject) return;
514
+ function processListContext(contextSem, listTokens, state, contextSubject = null) {
515
+ if (!contextSubject) contextSubject = state.currentSubject;
344
516
 
345
517
  listTokens.forEach(listToken => {
346
- // Extract carriers from list item text
347
518
  const carriers = extractInlineCarriers(listToken.text, listToken.range[0]);
348
-
349
- // Find subject from carriers or list item annotation
350
519
  let itemSubject = null;
351
520
  let itemSubjectCarrier = null;
352
521
 
353
- // First check carriers for subject declarations
354
- for (const carrier of carriers) {
355
- if (carrier.url && carrier.type === 'resource') {
356
- // [text](=iri) declares a subject
357
- itemSubject = state.df.namedNode(expandIRI(carrier.url, state.ctx));
358
- itemSubjectCarrier = carrier;
359
- break;
522
+ if (listToken.attrs) {
523
+ const itemSem = parseSemanticBlock(listToken.attrs);
524
+ if (itemSem.subject && itemSem.subject !== 'RESET') {
525
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
526
+ itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
360
527
  }
361
- if (carrier.attrs) {
362
- const itemSem = parseSemanticBlock(carrier.attrs);
363
- if (itemSem.subject && itemSem.subject !== 'RESET') {
364
- itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
365
- itemSubjectCarrier = carrier;
366
- break;
528
+ }
529
+
530
+ if (!itemSubject) {
531
+ for (const carrier of carriers) {
532
+ if (carrier.attrs) {
533
+ const itemSem = parseSemanticBlock(carrier.attrs);
534
+ if (itemSem.subject && itemSem.subject !== 'RESET') {
535
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
536
+ itemSubjectCarrier = carrier;
537
+ break;
538
+ }
367
539
  }
368
540
  }
369
541
  }
370
542
 
371
- if (!itemSubject) return; // List items must declare subjects
372
-
373
- // Apply context types to item
374
- contextSem.types.forEach(typeIRI => {
375
- emitQuad(
376
- state.quads,
377
- state.origin.quadIndex,
378
- 'list-context',
379
- itemSubject,
380
- state.df.namedNode(expandIRI('rdf:type', state.ctx)),
381
- state.df.namedNode(expandIRI(typeIRI, state.ctx)),
382
- state.df
383
- );
543
+ if (!itemSubject) return;
544
+
545
+ contextSem.types.forEach(t => {
546
+ const typeIRI = typeof t === 'string' ? t : t.iri;
547
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandIRI(typeIRI, state.ctx)), state.df);
384
548
  });
385
549
 
386
- // Emit context relationships
387
550
  contextSem.predicates.forEach(pred => {
388
551
  const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
389
-
390
552
  if (pred.form === '^' || pred.form === '^?') {
391
- // Reverse: item context
392
- emitQuad(state.quads, state.origin.quadIndex, 'list-context',
393
- itemSubject, P, contextSubject, state.df);
553
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, P, contextSubject, state.df);
394
554
  } else {
395
- // Forward: context item
396
- emitQuad(state.quads, state.origin.quadIndex, 'list-context',
397
- contextSubject, P, itemSubject, state.df);
555
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context', contextSubject, P, itemSubject, state.df);
398
556
  }
399
557
  });
400
558
 
401
- // Process item's own annotations
402
559
  const prevSubject = state.currentSubject;
403
560
  state.currentSubject = itemSubject;
404
561
 
405
- // Process the list token's own attributes
406
562
  if (listToken.attrs) {
407
563
  const itemSem = parseSemanticBlock(listToken.attrs);
408
- // For list item attributes, the literal is the text content without links
409
- const carrier = { type: 'list', text: listToken.text.replace(/\[([^\]]+)\]\([^)]+\)/, '$1'), range: listToken.range };
564
+ const carrier = { type: 'list', text: listToken.text, range: listToken.range, attrsRange: listToken.attrsRange || null, valueRange: listToken.valueRange || null };
410
565
  processAnnotation(carrier, itemSem, state);
411
566
  }
412
567
 
413
- // Process inline carriers' attributes
414
568
  carriers.forEach(carrier => {
415
569
  if (carrier.attrs) {
416
570
  const itemSem = parseSemanticBlock(carrier.attrs);
@@ -432,8 +586,6 @@ export function parse(text, options = {}) {
432
586
  };
433
587
 
434
588
  const tokens = scanTokens(text);
435
-
436
- // Apply prefix declarations
437
589
  tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
438
590
 
439
591
  for (let i = 0; i < tokens.length; i++) {
@@ -441,18 +593,26 @@ export function parse(text, options = {}) {
441
593
 
442
594
  if (token.type === 'heading' && token.attrs) {
443
595
  const sem = parseSemanticBlock(token.attrs);
444
- const carrier = { type: 'heading', text: token.text, range: token.range };
596
+ const carrier = { type: 'heading', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
445
597
  processAnnotation(carrier, sem, state);
446
598
  } else if (token.type === 'code' && token.attrs) {
447
599
  const sem = parseSemanticBlock(token.attrs);
448
- const carrier = { type: 'code', text: token.text, range: token.range };
600
+ const carrier = { type: 'code', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
449
601
  processAnnotation(carrier, sem, state);
450
602
  } else if (token.type === 'blockquote' && token.attrs) {
451
603
  const sem = parseSemanticBlock(token.attrs);
452
- const carrier = { type: 'blockquote', text: token.text, range: token.range };
604
+ const carrier = { type: 'blockquote', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
453
605
  processAnnotation(carrier, sem, state);
454
606
  } else if (token.type === 'para') {
455
- // Check for list context
607
+ // Check for standalone subject declarations: {=iri} on its own line
608
+ const standaloneSubjectMatch = token.text.match(/^\s*\{=(.*?)\}\s*$/);
609
+ if (standaloneSubjectMatch) {
610
+ const sem = parseSemanticBlock(`{=${standaloneSubjectMatch[1]}}`);
611
+ const attrsStart = token.range[0] + token.text.indexOf('{=');
612
+ const attrsEnd = attrsStart + (standaloneSubjectMatch[1] ? standaloneSubjectMatch[1].length : 0);
613
+ processAnnotation({ type: 'standalone', text: '', range: token.range, attrsRange: [attrsStart, attrsEnd], valueRange: null }, sem, state);
614
+ }
615
+
456
616
  const followingLists = [];
457
617
  let j = i + 1;
458
618
  while (j < tokens.length && tokens[j].type === 'list') {
@@ -460,17 +620,28 @@ export function parse(text, options = {}) {
460
620
  j++;
461
621
  }
462
622
 
463
- // Check if this paragraph ends with {attrs} and is followed by lists
464
623
  const contextMatch = token.text.match(/^(.+?)\s*\{([^}]+)\}$/);
465
624
  if (contextMatch && followingLists.length > 0) {
466
- // This is a list context annotation
467
625
  const contextSem = parseSemanticBlock(`{${contextMatch[2]}}`);
468
- processListContext(contextSem, followingLists, state);
626
+ let contextSubject = state.currentSubject;
627
+
628
+ // Always look for the most recent heading subject for context
629
+ for (let k = i - 1; k >= 0; k--) {
630
+ const prevToken = tokens[k];
631
+ if (prevToken.type === 'heading' && prevToken.attrs) {
632
+ const headingSem = parseSemanticBlock(prevToken.attrs);
633
+ if (headingSem.subject) {
634
+ contextSubject = state.df.namedNode(expandIRI(headingSem.subject, state.ctx));
635
+ break;
636
+ }
637
+ }
638
+ }
639
+
640
+ processListContext(contextSem, followingLists, state, contextSubject);
469
641
  i = j - 1;
470
642
  continue;
471
643
  }
472
644
 
473
- // Process inline carriers
474
645
  const carriers = extractInlineCarriers(token.text, token.range[0]);
475
646
  carriers.forEach(carrier => {
476
647
  if (carrier.attrs) {
@@ -484,76 +655,442 @@ export function parse(text, options = {}) {
484
655
  return { quads: state.quads, origin: state.origin, context: state.ctx };
485
656
  }
486
657
 
487
- function shortenIRI(iri, ctx) {
488
- if (!iri || !iri.startsWith('http')) return iri;
489
658
 
490
- if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) {
491
- return iri.substring(ctx['@vocab'].length);
492
- }
659
+ // Text Processing Utilities
660
+ function readSpan(block, text, spanType = 'attrs') {
661
+ const range = spanType === 'attrs' ? block?.attrsRange : block?.valueRange;
662
+ if (!range) return null;
663
+ const { start, end } = range;
664
+ return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
665
+ ? { start, end, text: text.substring(start, end) }
666
+ : null;
667
+ }
493
668
 
494
- for (const [prefix, namespace] of Object.entries(ctx)) {
495
- if (prefix !== '@vocab' && iri.startsWith(namespace)) {
496
- return prefix + ':' + iri.substring(namespace.length);
497
- }
669
+ function normalizeAttrsTokens(attrsText) {
670
+ const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
671
+ return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
672
+ }
673
+
674
+ function writeAttrsTokens(tokens) {
675
+ return `{${tokens.join(' ').trim()}}`;
676
+ }
677
+
678
+ function removeOneToken(tokens, matchFn) {
679
+ const i = tokens.findIndex(matchFn);
680
+ return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
681
+ }
682
+
683
+ function sanitizeCarrierValueForBlock(block, raw) {
684
+ const s = String(raw ?? '');
685
+ const t = block?.carrierType;
686
+ if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
687
+ const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
688
+ return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
689
+ }
690
+
691
+ function blockTokensFromEntries(block) {
692
+ return block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null;
693
+ }
694
+
695
+ function removeEntryAt(block, entryIndex) {
696
+ if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
697
+ return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
698
+ }
699
+
700
+ function replaceLangDatatypeEntries(block, lit, ctx) {
701
+ if (!block?.entries) return null;
702
+ const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
703
+ const extras = [];
704
+ if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
705
+ const dt = lit?.datatype?.value;
706
+ if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
707
+ extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
498
708
  }
709
+ return [...filtered, ...extras];
710
+ }
499
711
 
500
- return iri;
712
+ function updateAttrsDatatypeLang(tokens, newLit, ctx) {
713
+ const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
714
+ if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
715
+ const dt = newLit?.datatype?.value;
716
+ if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
717
+ return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
718
+ }
719
+ return predicatesAndTypes;
501
720
  }
502
721
 
503
722
  export function serialize({ text, diff, origin, options = {} }) {
504
- if (!diff || (!diff.add?.length && !diff.delete?.length)) return { text, origin };
723
+ if (!diff || (!diff.add?.length && !diff.delete?.length)) {
724
+ const reparsed = parse(text, { context: options.context || {} });
725
+ return { text, origin: reparsed.origin };
726
+ }
505
727
 
728
+ const base = origin || parse(text, { context: options.context || {} }).origin;
506
729
  let result = text;
507
730
  const edits = [];
508
731
  const ctx = options.context || {};
509
732
 
733
+ const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
734
+ for (const [k, entry] of base?.quadIndex || []) {
735
+ const parsed = parseQuadIndexKey(k);
736
+ if (!parsed) continue;
737
+ if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
738
+ if (parsed.o?.t !== 'Literal') continue;
739
+ if (parsed.o?.v === literalValue) return entry;
740
+ }
741
+ return null;
742
+ };
743
+
744
+ const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
745
+ const out = [];
746
+ for (const [k, entry] of base?.quadIndex || []) {
747
+ const parsed = parseQuadIndexKey(k);
748
+ if (!parsed) continue;
749
+ if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
750
+ if (parsed.o?.t !== 'Literal') continue;
751
+ const blockId = entry?.blockId || entry;
752
+ const block = blockId ? base?.blocks?.get(blockId) : null;
753
+ if (block) out.push({ block, entry, obj: parsed.o });
754
+ }
755
+ return out;
756
+ };
757
+
758
+ const anchors = new Map();
759
+ for (const q0 of diff.delete || []) {
760
+ const q = normalizeQuad(q0);
761
+ if (!q) continue;
762
+ if (!q?.subject || !q?.object || !q?.predicate) continue;
763
+ const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
764
+ const qk = quadToKeyForOrigin(q);
765
+ const entry = qk ? base?.quadIndex?.get(qk) : null;
766
+ const blockId = entry?.blockId || entry;
767
+ const block = blockId ? base?.blocks?.get(blockId) : null;
768
+ if (!block?.attrsRange) continue;
769
+ anchors.set(key, { block, entry });
770
+ }
771
+
772
+ const addBySP = new Map();
773
+ for (const q0 of diff.add || []) {
774
+ const q = normalizeQuad(q0);
775
+ if (!q) continue;
776
+ if (!q?.subject || !q?.predicate || !q?.object) continue;
777
+ const k = JSON.stringify([q.subject.value, q.predicate.value]);
778
+ const list = addBySP.get(k) || [];
779
+ list.push(q);
780
+ addBySP.set(k, list);
781
+ }
782
+
783
+ const consumedAdds = new Set();
784
+ const literalUpdates = [];
785
+ for (const dq0 of diff.delete || []) {
786
+ const dq = normalizeQuad(dq0);
787
+ if (!dq) continue;
788
+ if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
789
+ if (dq.object.termType !== 'Literal') continue;
790
+ const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
791
+ const candidates = addBySP.get(k) || [];
792
+ const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(quadToKeyForOrigin(x)));
793
+ if (!aq) continue;
794
+
795
+ const dqk = quadToKeyForOrigin(dq);
796
+ let entry = dqk ? base?.quadIndex?.get(dqk) : null;
797
+ if (!entry && dq.object?.termType === 'Literal') {
798
+ entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
799
+ }
800
+ const blockId = entry?.blockId || entry;
801
+ const block = blockId ? base?.blocks?.get(blockId) : null;
802
+ if (!block) continue;
803
+
804
+ literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
805
+ consumedAdds.add(quadToKeyForOrigin(aq));
806
+ }
807
+
808
+ for (const q0 of diff.add || []) {
809
+ const quad = normalizeQuad(q0);
810
+ if (!quad || quad.object?.termType !== 'Literal') continue;
811
+ if (consumedAdds.has(quadToKeyForOrigin(quad))) continue;
812
+
813
+ // Check if there's a vacant slot we can reuse
814
+ const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
815
+ if (!vacantSlot) continue;
816
+
817
+ const block = base?.blocks?.get(vacantSlot.blockId);
818
+ if (!block) continue;
819
+
820
+ const span = readSpan(block, text, 'attrs');
821
+ if (!span) continue;
822
+
823
+ // Occupy the vacant slot and update the annotation
824
+ const occupiedSlot = occupySlot(vacantSlot, quad.object);
825
+ if (!occupiedSlot) continue;
826
+
827
+ // Update the carrier value
828
+ const valueSpan = readSpan(block, text, 'value');
829
+ if (valueSpan) {
830
+ edits.push({ start: valueSpan.start, end: valueSpan.end, text: quad.object.value });
831
+ }
832
+
833
+ // Update the annotation block to restore the predicate token
834
+ const tokens = normalizeAttrsTokens(span.text);
835
+ const predToken = `${vacantSlot.form || ''}${shortenIRI(quad.predicate.value, ctx)}`;
836
+
837
+ // For empty annotation blocks, replace entirely; for non-empty, add if missing
838
+ if (tokens.length === 0) {
839
+ edits.push({ start: span.start, end: span.end, text: `{${predToken}}` });
840
+ } else if (!tokens.includes(predToken)) {
841
+ const updated = [...tokens, predToken];
842
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
843
+ }
844
+
845
+ // Mark as consumed and continue
846
+ consumedAdds.add(quadToKeyForOrigin(quad));
847
+ continue;
848
+
849
+ const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
850
+ if (matches.length === 0) continue;
851
+
852
+ const desiredLang = quad.object.language || '';
853
+ const sameLang = matches.filter(m => {
854
+ const entries = m.block?.entries || [];
855
+ const langEntry = entries.find(e => e.kind === 'language');
856
+ const lang = langEntry?.language || '';
857
+ return lang === desiredLang;
858
+ });
859
+
860
+ if (sameLang.length !== 1) continue;
861
+ const target = sameLang[0].block;
862
+ const vSpan = readSpan(target, text, 'value');
863
+ if (!vSpan) continue;
864
+
865
+ const newValue = sanitizeCarrierValueForBlock(target, quad.object.value);
866
+ edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
867
+
868
+ const aSpan = readSpan(target, text, 'attrs');
869
+ if (aSpan && target?.entries?.length) {
870
+ const nextEntries = replaceLangDatatypeEntries(target, quad.object, ctx);
871
+ if (nextEntries) {
872
+ const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
873
+ edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
874
+ }
875
+ }
876
+
877
+ consumedAdds.add(quad);
878
+ }
879
+
880
+ for (const u of literalUpdates) {
881
+ const span = readSpan(u.block, text, 'value');
882
+ if (span) {
883
+ const newValue = sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
884
+ edits.push({ start: span.start, end: span.end, text: newValue });
885
+ }
886
+
887
+ const aSpan = readSpan(u.block, text, 'attrs');
888
+ if (aSpan) {
889
+ if (u.block?.entries?.length) {
890
+ const nextEntries = replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
891
+ if (nextEntries) {
892
+ const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
893
+ if (nextTokens.length === 0) {
894
+ edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
895
+ } else {
896
+ edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
897
+ }
898
+ }
899
+ } else {
900
+ const tokens = normalizeAttrsTokens(aSpan.text);
901
+ const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object, ctx);
902
+ if (updated.join(' ') !== tokens.join(' ')) {
903
+ if (updated.length === 0) {
904
+ edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
905
+ } else {
906
+ edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(updated) });
907
+ }
908
+ }
909
+ }
910
+ }
911
+ }
912
+
913
+
510
914
  if (diff.delete) {
511
- diff.delete.forEach(quad => {
512
- if (!quad || !quad.subject) return;
513
- const key = JSON.stringify([
514
- quad.subject.value,
515
- quad.predicate.value,
516
- quad.object.termType === 'Literal' ? quad.object.value : quad.object.value
517
- ]);
518
- const blockId = origin?.quadIndex.get(key);
915
+ diff.delete.forEach(q0 => {
916
+ const quad = normalizeQuad(q0);
917
+ if (!quad) return;
918
+ if (!quad?.subject || !quad?.predicate || !quad?.object) return;
919
+
920
+ if (quad.object.termType === 'Literal') {
921
+ const isUpdated = literalUpdates.some(u =>
922
+ u.deleteQuad.subject.value === quad.subject.value &&
923
+ u.deleteQuad.predicate.value === quad.predicate.value &&
924
+ u.deleteQuad.object.value === quad.object.value
925
+ );
926
+ if (isUpdated) return;
927
+ }
928
+
929
+ const key = quadToKeyForOrigin(quad);
930
+ let entry = key ? base?.quadIndex?.get(key) : null;
931
+ if (!entry && quad.object?.termType === 'Literal') {
932
+ entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
933
+ }
934
+
935
+ // Mark the semantic slot as vacant for future reuse
936
+ if (entry && entry.slotId) {
937
+ // Capture block information before marking as vacant
938
+ const block = base?.blocks?.get(entry.blockId);
939
+ const blockInfo = block ? {
940
+ id: entry.blockId,
941
+ range: block.range,
942
+ attrsRange: block.attrsRange,
943
+ valueRange: block.valueRange,
944
+ carrierType: block.carrierType,
945
+ subject: block.subject,
946
+ context: block.context
947
+ } : null;
948
+
949
+ const vacantSlot = markSlotAsVacant(entry, quad.object);
950
+ if (vacantSlot && blockInfo) {
951
+ vacantSlot.blockInfo = blockInfo;
952
+ base.quadIndex.set(key, vacantSlot);
953
+ }
954
+ }
955
+
956
+ const blockId = entry?.blockId || entry;
519
957
  if (!blockId) return;
520
958
 
521
- const block = origin.blocks.get(blockId);
959
+ const block = base?.blocks?.get(blockId);
522
960
  if (!block) return;
523
961
 
524
- const start = block.range.start;
525
- const end = block.range.end;
526
- const before = text.substring(Math.max(0, start - 1), start);
527
- const after = text.substring(end, Math.min(end + 1, text.length));
528
- const deleteStart = before === '\n' ? start - 1 : start;
529
- const deleteEnd = after === '\n' ? end + 1 : end;
962
+ const span = readSpan(block, text, 'attrs');
963
+ if (!span) return;
964
+
965
+ // Handle entry removal by index
966
+ if (entry?.entryIndex != null && block?.entries?.length) {
967
+ const nextEntries = removeEntryAt(block, entry.entryIndex);
968
+ if (!nextEntries) return;
969
+
970
+ const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
971
+ const newText = nextTokens.length === 0 ? '{}' : writeAttrsTokens(nextTokens);
972
+ edits.push({ start: span.start, end: span.end, text: newText });
973
+ return;
974
+ }
975
+
976
+ const tokens = normalizeAttrsTokens(span.text);
977
+ let updated = tokens;
978
+ let removed = false;
979
+
980
+ if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
981
+ const expectedType = entry.expandedType || quad.object.value;
982
+ ({ tokens: updated, removed } = removeOneToken(tokens, t => {
983
+ if (!t.startsWith('.')) return false;
984
+ const raw = t.slice(1);
985
+ return expandIRI(raw, ctx) === expectedType;
986
+ }));
987
+ } else {
988
+ const expectedPred = entry?.expandedPredicate || quad.predicate.value;
989
+ const expectedForm = entry?.form;
990
+ ({ tokens: updated, removed } = removeOneToken(tokens, t => {
991
+ const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
992
+ if (!m) return false;
993
+ const form = m[1] || '';
994
+ const raw = m[2];
995
+ if (expectedForm != null && form !== expectedForm) return false;
996
+ return expandIRI(raw, ctx) === expectedPred;
997
+ }));
998
+ }
999
+
1000
+ if (!removed) return;
1001
+
1002
+ if (updated.length === 0) {
1003
+ edits.push({ start: span.start, end: span.end, text: '{}' });
1004
+ return;
1005
+ }
530
1006
 
531
- edits.push({ start: deleteStart, end: deleteEnd, text: '' });
1007
+ const newAttrs = writeAttrsTokens(updated);
1008
+ edits.push({ start: span.start, end: span.end, text: newAttrs });
532
1009
  });
533
1010
  }
534
1011
 
535
1012
  if (diff.add) {
536
- diff.add.forEach(quad => {
537
- let insertPos = result.length;
538
-
539
- for (const [, block] of origin?.blocks || []) {
540
- if (block.subject === quad.subject.value) {
541
- insertPos = block.range.end;
542
- break;
1013
+ diff.add.forEach(q0 => {
1014
+ const quad = normalizeQuad(q0);
1015
+ if (!quad) return;
1016
+ if (!quad?.subject || !quad?.predicate || !quad?.object) return;
1017
+
1018
+ if (consumedAdds.has(quadToKeyForOrigin(quad))) return;
1019
+
1020
+ const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
1021
+ const anchored = anchors.get(anchorKey) || null;
1022
+ let targetBlock = anchored?.block || null;
1023
+
1024
+ if (!targetBlock) {
1025
+ for (const [, block] of base?.blocks || []) {
1026
+ if (block.subject === quad.subject.value && block.attrsRange) {
1027
+ targetBlock = block;
1028
+ break;
1029
+ }
543
1030
  }
544
1031
  }
545
1032
 
546
- const pred = shortenIRI(quad.predicate.value, ctx);
547
- let objText;
1033
+ if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
1034
+ if (!targetBlock) {
1035
+ const predShort = shortenIRI(quad.predicate.value, ctx);
1036
+ if (quad.object.termType === 'Literal') {
1037
+ const value = String(quad.object.value ?? '');
1038
+ let ann = predShort;
1039
+ if (quad.object.language) ann += ` @${quad.object.language}`;
1040
+ else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
1041
+ ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
1042
+ }
1043
+ edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
1044
+ } else {
1045
+ const full = quad.object.value;
1046
+ const label = shortenIRI(full, ctx);
1047
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${label}) {?${predShort}}` });
1048
+ }
1049
+ return;
1050
+ }
548
1051
 
549
- if (quad.object.termType === 'Literal') {
550
- objText = quad.object.value;
551
- } else {
552
- objText = shortenIRI(quad.object.value, ctx);
1052
+ const predShort = shortenIRI(quad.predicate.value, ctx);
1053
+ if (quad.object.termType === 'Literal') {
1054
+ const value = String(quad.object.value ?? '');
1055
+ let ann = predShort;
1056
+ if (quad.object.language) ann += ` @${quad.object.language}`;
1057
+ else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
1058
+ ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
1059
+ }
1060
+ edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
1061
+ return;
1062
+ }
1063
+
1064
+ if (quad.object.termType === 'NamedNode') {
1065
+ const full = quad.object.value;
1066
+ const label = shortenIRI(full, ctx);
1067
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${shortenIRI(full, ctx)} ?${predShort}}` });
1068
+ return;
1069
+ }
1070
+ }
1071
+
1072
+ const span = readSpan(targetBlock, text, 'attrs');
1073
+ if (!span) return;
1074
+ const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
1075
+
1076
+ if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
1077
+ const typeShort = shortenIRI(quad.object.value, ctx);
1078
+ const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
1079
+ if (!typeToken) return;
1080
+ if (tokens.includes(typeToken)) return;
1081
+ const updated = [...tokens, typeToken];
1082
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
1083
+ return;
553
1084
  }
554
1085
 
555
- const newLine = `\n[${objText}] {${pred}}`;
556
- edits.push({ start: insertPos, end: insertPos, text: newLine });
1086
+ const form = anchored?.entry?.form;
1087
+ if (form == null) return;
1088
+ const predShort = shortenIRI(quad.predicate.value, ctx);
1089
+ const predToken = `${form}${predShort}`;
1090
+ if (!predToken) return;
1091
+ if (tokens.includes(predToken)) return;
1092
+ const updated = [...tokens, predToken];
1093
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
557
1094
  });
558
1095
  }
559
1096
 
@@ -562,7 +1099,44 @@ export function serialize({ text, diff, origin, options = {} }) {
562
1099
  result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
563
1100
  });
564
1101
 
565
- return { text: result, origin };
1102
+ // Extract vacant slots before reparsing to preserve them
1103
+ const vacantSlots = new Map();
1104
+ base?.quadIndex?.forEach((slot, key) => {
1105
+ if (slot.isVacant) {
1106
+ vacantSlots.set(key, slot);
1107
+ }
1108
+ });
1109
+
1110
+ const reparsed = parse(result, { context: options.context || {} });
1111
+
1112
+ // Merge vacant slots back into the new origin
1113
+ vacantSlots.forEach((vacantSlot, key) => {
1114
+ // Check if the block still exists in the new origin
1115
+ if (!reparsed.origin.blocks.has(vacantSlot.blockId)) {
1116
+ // Recreate the empty block for the vacant slot using preserved info
1117
+ const blockInfo = vacantSlot.blockInfo;
1118
+ if (blockInfo) {
1119
+ const emptyBlock = {
1120
+ id: blockInfo.id,
1121
+ range: blockInfo.range || { start: 0, end: 0 },
1122
+ attrsRange: blockInfo.attrsRange,
1123
+ valueRange: blockInfo.valueRange,
1124
+ carrierType: blockInfo.carrierType || 'span',
1125
+ subject: blockInfo.subject || '',
1126
+ types: [],
1127
+ predicates: [],
1128
+ entries: [], // Empty entries - just {} annotation
1129
+ context: blockInfo.context || { ...ctx }
1130
+ };
1131
+ reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
1132
+ }
1133
+ }
1134
+
1135
+ // Merge the vacant slot back
1136
+ reparsed.origin.quadIndex.set(key, vacantSlot);
1137
+ });
1138
+
1139
+ return { text: result, origin: reparsed.origin };
566
1140
  }
567
1141
 
568
- export default { parse, serialize, parseSemanticBlock };
1142
+ export default { parse, serialize, parseSemanticBlock, shortenIRI };