mdld-parse 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +15 -19
  2. package/index.js +621 -199
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -149,12 +149,12 @@ ex:apollo11 schema:organizer ex:nasa .
149
149
 
150
150
  ### Resource Declaration
151
151
 
152
- Declare resources inline with `(=iri)`:
152
+ Declare resources inline with `{=iri}`:
153
153
 
154
154
  ```markdown
155
155
  # Mission {=ex:apollo11}
156
156
 
157
- [Neil Armstrong](=ex:armstrong) {?commander .Person}
157
+ [Neil Armstrong] {=ex:armstrong ?commander .Person}
158
158
  ```
159
159
 
160
160
  ```turtle
@@ -171,8 +171,8 @@ Lists require explicit subjects per item:
171
171
 
172
172
  Ingredients: {?ingredient .Ingredient}
173
173
 
174
- - [Flour](=ex:flour) {name}
175
- - [Water](=ex:water) {name}
174
+ - Flour {=ex:flour name}
175
+ - Water {=ex:water name}
176
176
  ```
177
177
 
178
178
  ```turtle
@@ -219,7 +219,7 @@ Reverse the relationship direction:
219
219
 
220
220
  Part of: {^?hasPart}
221
221
 
222
- - [Book](=ex:book) {}
222
+ - Book {=ex:book}
223
223
  ```
224
224
 
225
225
  ```turtle
@@ -331,21 +331,17 @@ console.log(updated.text);
331
331
  Only specific markdown elements can carry semantic values:
332
332
 
333
333
  **Inline:**
334
- - `[text]` — span with annotation
335
- - `[text](url)` — link to external resource
336
- - `[text](=iri)` — inline resource declaration
334
+ - `[text] {...}` — span with annotation
335
+ - `[text](url) {...}` — link to external resource
336
+ - `[text] {...}` — inline resource declaration
337
+ - `![alt text](image.png) {...}` — embedding with annotation
337
338
 
338
339
  **Block:**
339
340
  - Headings (`# Title`)
340
- - List items (`- item`)
341
+ - List items (`- item`, `1. item`) (single-level)
341
342
  - Blockquotes (`> quote`)
342
343
  - Code blocks (` ```lang `)
343
344
 
344
- **Non-carriers:**
345
- - Plain paragraphs without `[...]`
346
- - Images (future)
347
- - Tables (future)
348
-
349
345
  ## Architecture
350
346
 
351
347
  ### Design Principles
@@ -383,12 +379,12 @@ MD-LD explicitly forbids to ensure deterministic parsing:
383
379
 
384
380
  Attendees: {?attendee}
385
381
 
386
- - [Alice](=urn:person:alice) {name}
387
- - [Bob](=urn:person:bob) {name}
382
+ - Alice {=urn:person:alice name}
383
+ - Bob {=urn:person:bob name}
388
384
 
389
385
  Action items: {?actionItem}
390
386
 
391
- - [Review proposal](=urn:task:1) {name}
387
+ - Review proposal {=urn:task:1 name}
392
388
  ```
393
389
 
394
390
  ### Developer Documentation
@@ -401,7 +397,7 @@ Action items: {?actionItem}
401
397
 
402
398
  Example:
403
399
 
404
- ```bash {=api:/users/:id/example .CodeExample programmingLanguage}
400
+ ```bash {=api:/users/:id#example .CodeExample text}
405
401
  curl https://api.example.com/users/123
406
402
  ```
407
403
  ````
@@ -412,7 +408,7 @@ curl https://api.example.com/users/123
412
408
  # Paper {=doi:10.1234/example .ScholarlyArticle}
413
409
 
414
410
  [Semantic Web] {about}
415
- [Alice Johnson](=orcid:0000-0001-2345-6789) {?author .Person}
411
+ [Alice Johnson] {=orcid:0000-0001-2345-6789 author}
416
412
  [2024-01] {datePublished ^^xsd:gYearMonth}
417
413
 
418
414
  > This paper explores semantic markup in Markdown. {abstract @en}
package/index.js CHANGED
@@ -25,8 +25,9 @@ function hash(str) {
25
25
  }
26
26
 
27
27
  function expandIRI(term, ctx) {
28
- if (!term) return null;
29
- const t = term.trim();
28
+ if (term == null) return null;
29
+ const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
30
+ const t = raw.trim();
30
31
  if (t.match(/^https?:/)) return t;
31
32
  if (t.includes(':')) {
32
33
  const [prefix, ref] = t.split(':', 2);
@@ -37,38 +38,83 @@ function expandIRI(term, ctx) {
37
38
 
38
39
  function parseSemanticBlock(raw) {
39
40
  try {
40
- const cleaned = raw.replace(/^\{|\}$/g, '').trim();
41
- if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null };
41
+ const src = String(raw || '').trim();
42
+ const cleaned = src.replace(/^\{|\}$/g, '').trim();
43
+ if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
44
+
45
+ const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
46
+ const re = /\S+/g;
47
+ let m;
48
+ while ((m = re.exec(cleaned)) !== null) {
49
+ const token = m[0];
50
+ const relStart = 1 + m.index;
51
+ const relEnd = relStart + token.length;
52
+ const entryIndex = result.entries.length;
42
53
 
43
- const result = { subject: null, types: [], predicates: [], datatype: null, language: null };
44
- const tokens = cleaned.split(/\s+/).filter(t => t);
45
-
46
- for (const token of tokens) {
47
54
  if (token === '=') {
48
55
  result.subject = 'RESET';
49
- } else if (token.startsWith('=')) {
50
- result.subject = token.substring(1);
51
- } else if (token.startsWith('^^')) {
52
- result.datatype = token.substring(2);
53
- } else if (token.startsWith('@')) {
54
- result.language = token.substring(1);
55
- } else if (token.startsWith('.')) {
56
- result.types.push(token.substring(1));
57
- } else if (token.startsWith('^?')) {
58
- result.predicates.push({ iri: token.substring(2), form: '^?' });
59
- } else if (token.startsWith('^')) {
60
- result.predicates.push({ iri: token.substring(1), form: '^' });
61
- } else if (token.startsWith('?')) {
62
- result.predicates.push({ iri: token.substring(1), form: '?' });
63
- } else {
64
- result.predicates.push({ iri: token, form: '' });
56
+ result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
57
+ continue;
58
+ }
59
+
60
+ if (token.startsWith('=')) {
61
+ const iri = token.substring(1);
62
+ result.subject = iri;
63
+ result.entries.push({ kind: 'subject', iri, relRange: { start: relStart, end: relEnd }, raw: token });
64
+ continue;
65
+ }
66
+
67
+ if (token.startsWith('^^')) {
68
+ const datatype = token.substring(2);
69
+ if (!result.language) result.datatype = datatype;
70
+ result.entries.push({ kind: 'datatype', datatype, relRange: { start: relStart, end: relEnd }, raw: token });
71
+ continue;
72
+ }
73
+
74
+ if (token.startsWith('@')) {
75
+ const language = token.substring(1);
76
+ result.language = language;
77
+ result.datatype = null;
78
+ result.entries.push({ kind: 'language', language, relRange: { start: relStart, end: relEnd }, raw: token });
79
+ continue;
80
+ }
81
+
82
+ if (token.startsWith('.')) {
83
+ const classIRI = token.substring(1);
84
+ result.types.push({ iri: classIRI, entryIndex });
85
+ result.entries.push({ kind: 'type', iri: classIRI, relRange: { start: relStart, end: relEnd }, raw: token });
86
+ continue;
87
+ }
88
+
89
+ if (token.startsWith('^?')) {
90
+ const iri = token.substring(2);
91
+ result.predicates.push({ iri, form: '^?', entryIndex });
92
+ result.entries.push({ kind: 'property', iri, form: '^?', relRange: { start: relStart, end: relEnd }, raw: token });
93
+ continue;
65
94
  }
95
+
96
+ if (token.startsWith('^')) {
97
+ const iri = token.substring(1);
98
+ result.predicates.push({ iri, form: '^', entryIndex });
99
+ result.entries.push({ kind: 'property', iri, form: '^', relRange: { start: relStart, end: relEnd }, raw: token });
100
+ continue;
101
+ }
102
+
103
+ if (token.startsWith('?')) {
104
+ const iri = token.substring(1);
105
+ result.predicates.push({ iri, form: '?', entryIndex });
106
+ result.entries.push({ kind: 'property', iri, form: '?', relRange: { start: relStart, end: relEnd }, raw: token });
107
+ continue;
108
+ }
109
+
110
+ result.predicates.push({ iri: token, form: '', entryIndex });
111
+ result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
66
112
  }
67
113
 
68
114
  return result;
69
115
  } catch (error) {
70
116
  console.error(`Error parsing semantic block ${raw}:`, error);
71
- return { subject: null, types: [], predicates: [], datatype: null, language: null };
117
+ return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
72
118
  }
73
119
  }
74
120
 
@@ -86,20 +132,29 @@ function scanTokens(text) {
86
132
  if (line.startsWith('```')) {
87
133
  if (!codeBlock) {
88
134
  const fence = line.match(/^(`{3,})(.*)/);
135
+ const attrsText = fence[2].match(/\{[^}]+\}/)?.[0] || null;
136
+ const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
137
+ const contentStart = lineStart + line.length + 1;
89
138
  codeBlock = {
90
139
  fence: fence[1],
91
140
  start: lineStart,
92
141
  content: [],
93
142
  lang: fence[2].trim().split(/[\s{]/)[0],
94
- attrs: fence[2].match(/\{[^}]+\}/)?.[0]
143
+ attrs: attrsText,
144
+ attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
145
+ valueRangeStart: contentStart
95
146
  };
96
147
  } else if (line.startsWith(codeBlock.fence)) {
148
+ const valueStart = codeBlock.valueRangeStart;
149
+ const valueEnd = Math.max(valueStart, lineStart - 1);
97
150
  tokens.push({
98
151
  type: 'code',
99
152
  range: [codeBlock.start, lineStart],
100
153
  text: codeBlock.content.join('\n'),
101
154
  lang: codeBlock.lang,
102
- attrs: codeBlock.attrs
155
+ attrs: codeBlock.attrs,
156
+ attrsRange: codeBlock.attrsRange,
157
+ valueRange: [valueStart, valueEnd]
103
158
  });
104
159
  codeBlock = null;
105
160
  }
@@ -119,35 +174,58 @@ function scanTokens(text) {
119
174
 
120
175
  const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
121
176
  if (headingMatch) {
177
+ const attrs = headingMatch[3] || null;
178
+ const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
179
+ const afterHashes = headingMatch[1].length;
180
+ const ws = line.substring(afterHashes).match(/^\s+/)?.[0]?.length || 0;
181
+ const valueStartInLine = afterHashes + ws;
182
+ const valueEndInLine = valueStartInLine + headingMatch[2].length;
122
183
  tokens.push({
123
184
  type: 'heading',
124
185
  depth: headingMatch[1].length,
125
186
  range: [lineStart, pos - 1],
126
187
  text: headingMatch[2].trim(),
127
- attrs: headingMatch[3]
188
+ attrs,
189
+ attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
190
+ valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
128
191
  });
129
192
  continue;
130
193
  }
131
194
 
132
195
  const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
133
196
  if (listMatch) {
197
+ const attrs = listMatch[4] || null;
198
+ const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
199
+ const prefix = listMatch[1].length + listMatch[2].length;
200
+ const ws = line.substring(prefix).match(/^\s+/)?.[0]?.length || 0;
201
+ const valueStartInLine = prefix + ws;
202
+ const valueEndInLine = valueStartInLine + listMatch[3].length;
134
203
  tokens.push({
135
204
  type: 'list',
136
205
  indent: listMatch[1].length,
137
206
  range: [lineStart, pos - 1],
138
207
  text: listMatch[3].trim(),
139
- attrs: listMatch[4]
208
+ attrs,
209
+ attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
210
+ valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
140
211
  });
141
212
  continue;
142
213
  }
143
214
 
144
215
  const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
145
216
  if (blockquoteMatch) {
217
+ const attrs = blockquoteMatch[2] || null;
218
+ const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
219
+ const prefixMatch = line.match(/^>\s+/);
220
+ const valueStartInLine = prefixMatch ? prefixMatch[0].length : 2;
221
+ const valueEndInLine = valueStartInLine + blockquoteMatch[1].length;
146
222
  tokens.push({
147
223
  type: 'blockquote',
148
224
  range: [lineStart, pos - 1],
149
225
  text: blockquoteMatch[1].trim(),
150
- attrs: blockquoteMatch[2]
226
+ attrs,
227
+ attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
228
+ valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
151
229
  });
152
230
  continue;
153
231
  }
@@ -165,7 +243,6 @@ function scanTokens(text) {
165
243
  return tokens;
166
244
  }
167
245
 
168
- // Extract inline carriers: [text] {attrs}, [text](url) {attrs}, [text](=iri) {attrs}
169
246
  function extractInlineCarriers(text, baseOffset = 0) {
170
247
  const carriers = [];
171
248
  let pos = 0;
@@ -174,14 +251,25 @@ function extractInlineCarriers(text, baseOffset = 0) {
174
251
  const bracketStart = text.indexOf('[', pos);
175
252
  if (bracketStart === -1) break;
176
253
 
177
- const bracketEnd = text.indexOf(']', bracketStart);
178
- if (bracketEnd === -1) break;
254
+ let bracketDepth = 1;
255
+ let bracketEnd = bracketStart + 1;
256
+
257
+ while (bracketEnd < text.length && bracketDepth > 0) {
258
+ if (text[bracketEnd] === '[') {
259
+ bracketDepth++;
260
+ } else if (text[bracketEnd] === ']') {
261
+ bracketDepth--;
262
+ }
263
+ bracketEnd++;
264
+ }
265
+
266
+ if (bracketDepth > 0) break;
179
267
 
180
- const carrierText = text.substring(bracketStart + 1, bracketEnd);
181
- let spanEnd = bracketEnd + 1;
268
+ const carrierText = text.substring(bracketStart + 1, bracketEnd - 1);
269
+ const valueRange = [baseOffset + bracketStart + 1, baseOffset + bracketEnd - 1];
270
+ let spanEnd = bracketEnd;
182
271
  let url = null;
183
272
 
184
- // Check for (url) or (=iri)
185
273
  if (text[spanEnd] === '(') {
186
274
  const parenEnd = text.indexOf(')', spanEnd);
187
275
  if (parenEnd !== -1) {
@@ -190,22 +278,24 @@ function extractInlineCarriers(text, baseOffset = 0) {
190
278
  }
191
279
  }
192
280
 
193
- // Check for {attrs}
194
281
  let attrs = null;
282
+ let attrsRange = null;
195
283
  const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
196
284
  if (attrsMatch) {
197
285
  attrs = `{${attrsMatch[1]}}`;
286
+ const braceIndex = attrsMatch[0].indexOf('{');
287
+ const absStart = baseOffset + spanEnd + (braceIndex >= 0 ? braceIndex : 0);
288
+ attrsRange = [absStart, absStart + attrs.length];
198
289
  spanEnd += attrsMatch[0].length;
199
290
  }
200
291
 
201
- // Determine type and resource
202
292
  let carrierType = 'span';
203
293
  let resourceIRI = null;
204
294
 
205
295
  if (url) {
206
296
  if (url.startsWith('=')) {
207
- carrierType = 'resource';
208
- resourceIRI = url.substring(1);
297
+ pos = spanEnd;
298
+ continue;
209
299
  } else {
210
300
  carrierType = 'link';
211
301
  resourceIRI = url;
@@ -217,6 +307,8 @@ function extractInlineCarriers(text, baseOffset = 0) {
217
307
  text: carrierText,
218
308
  url: resourceIRI,
219
309
  attrs: attrs,
310
+ attrsRange,
311
+ valueRange,
220
312
  range: [baseOffset + bracketStart, baseOffset + spanEnd]
221
313
  });
222
314
 
@@ -226,191 +318,150 @@ function extractInlineCarriers(text, baseOffset = 0) {
226
318
  return carriers;
227
319
  }
228
320
 
229
- function createBlock(subject, types, predicates, range, ctx) {
321
+ function createBlock(subject, types, predicates, entries, range, attrsRange, valueRange, carrierType, ctx) {
230
322
  const expanded = {
231
- subject: subject,
232
- types: types.map(t => expandIRI(t, ctx)),
233
- predicates: predicates.map(p => ({
234
- iri: expandIRI(p.iri, ctx),
235
- form: p.form
236
- }))
323
+ subject,
324
+ types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
325
+ predicates: predicates.map(p => ({ iri: expandIRI(p.iri, ctx), form: p.form }))
237
326
  };
238
-
239
327
  const blockId = hash([subject, JSON.stringify(expanded)].join('|'));
240
328
  return {
241
329
  id: blockId,
242
330
  range: { start: range[0], end: range[1] },
331
+ attrsRange: attrsRange ? { start: attrsRange[0], end: attrsRange[1] } : null,
332
+ valueRange: valueRange ? { start: valueRange[0], end: valueRange[1] } : null,
333
+ carrierType: carrierType || null,
243
334
  subject,
244
335
  types: expanded.types,
245
336
  predicates: expanded.predicates,
337
+ entries: entries || [],
246
338
  context: { ...ctx }
247
339
  };
248
340
  }
249
341
 
250
- function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory) {
342
+ function quadIndexKey(subject, predicate, object) {
343
+ const objKey = object.termType === 'Literal'
344
+ ? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
345
+ : JSON.stringify({ t: object.termType, v: object.value });
346
+ return JSON.stringify([subject.value, predicate.value, objKey]);
347
+ }
348
+
349
+ function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
251
350
  if (!subject || !predicate || !object) return;
252
351
  const quad = dataFactory.quad(subject, predicate, object);
253
352
  quads.push(quad);
254
- const key = JSON.stringify([
255
- quad.subject.value,
256
- quad.predicate.value,
257
- quad.object.termType === 'Literal' ? quad.object.value : quad.object.value
258
- ]);
259
- quadIndex.set(key, blockId);
353
+ quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), meta ? { blockId, ...meta } : { blockId });
260
354
  }
261
355
 
262
356
  function createLiteral(value, datatype, language, context, dataFactory) {
263
- if (datatype) {
264
- return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
265
- }
266
- if (language) {
267
- return dataFactory.literal(value, language);
268
- }
357
+ if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
358
+ if (language) return dataFactory.literal(value, language);
269
359
  return dataFactory.literal(value);
270
360
  }
271
361
 
272
- // Core processing: handle subject/type declarations and property emissions
273
362
  function processAnnotation(carrier, sem, state) {
274
- // §6.1 Subject declaration
275
363
  if (sem.subject === 'RESET') {
276
364
  state.currentSubject = null;
277
365
  return;
278
366
  }
279
367
 
280
- if (sem.subject) {
281
- state.currentSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
282
- }
368
+ const previousSubject = state.currentSubject;
369
+ let newSubject = sem.subject ? state.df.namedNode(expandIRI(sem.subject, state.ctx)) : null;
370
+ if (newSubject) state.currentSubject = newSubject;
283
371
 
284
- // Determine the subject for emissions
285
372
  const S = state.currentSubject;
286
- if (!S) return; // Need a subject to emit anything
287
-
288
- // Create origin block
289
- const block = createBlock(
290
- S.value,
291
- sem.types,
292
- sem.predicates,
293
- carrier.range,
294
- state.ctx
295
- );
373
+ if (!S) return;
374
+
375
+ const block = createBlock(S.value, sem.types, sem.predicates, sem.entries, carrier.range, carrier.attrsRange || null, carrier.valueRange || null, carrier.type || null, state.ctx);
296
376
  state.origin.blocks.set(block.id, block);
297
377
 
298
- // Extract L (literal) and O (object IRI)
299
378
  const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
300
379
  const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
301
380
 
302
- // §7 Emit type triples
303
- sem.types.forEach(typeIRI => {
381
+ sem.types.forEach(t => {
382
+ const typeIRI = typeof t === 'string' ? t : t.iri;
383
+ const entryIndex = typeof t === 'string' ? null : t.entryIndex;
304
384
  const typeSubject = O || S;
305
- emitQuad(
306
- state.quads,
307
- state.origin.quadIndex,
308
- block.id,
309
- typeSubject,
310
- state.df.namedNode(expandIRI('rdf:type', state.ctx)),
311
- state.df.namedNode(expandIRI(typeIRI, state.ctx)),
312
- state.df
313
- );
385
+ const expandedType = expandIRI(typeIRI, state.ctx);
386
+ emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
314
387
  });
315
388
 
316
- // §8 Emit predicate triples (routing table)
317
389
  sem.predicates.forEach(pred => {
318
390
  const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
391
+ const token = `${pred.form}${pred.iri}`;
319
392
 
320
393
  if (pred.form === '') {
321
- // p: S L
322
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df);
394
+ emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
323
395
  } else if (pred.form === '?') {
324
- // ?p: S → O
325
- if (O) {
326
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df);
396
+ if (newSubject) {
397
+ emitQuad(state.quads, state.origin.quadIndex, block.id, previousSubject, P, newSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
398
+ } else if (O) {
399
+ emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
327
400
  }
328
- } else if (pred.form === '^') {
329
- // ^p: reverse literal (L → S impossible, emit nothing per spec)
330
- // Note: Some interpretations might emit S → S or skip
331
401
  } else if (pred.form === '^?') {
332
- // ^?p: O → S
333
- if (O) {
334
- emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df);
402
+ if (newSubject) {
403
+ emitQuad(state.quads, state.origin.quadIndex, block.id, newSubject, P, previousSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
404
+ } else if (O) {
405
+ emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
335
406
  }
336
407
  }
337
408
  });
338
409
  }
339
410
 
340
- // Process list with context annotation
341
- function processListContext(contextSem, listTokens, state) {
342
- const contextSubject = state.currentSubject;
343
- if (!contextSubject) return;
411
+ function processListContext(contextSem, listTokens, state, contextSubject = null) {
412
+ if (!contextSubject) contextSubject = state.currentSubject;
344
413
 
345
414
  listTokens.forEach(listToken => {
346
- // Extract carriers from list item text
347
415
  const carriers = extractInlineCarriers(listToken.text, listToken.range[0]);
348
-
349
- // Find subject from carriers or list item annotation
350
416
  let itemSubject = null;
351
417
  let itemSubjectCarrier = null;
352
418
 
353
- // First check carriers for subject declarations
354
- for (const carrier of carriers) {
355
- if (carrier.url && carrier.type === 'resource') {
356
- // [text](=iri) declares a subject
357
- itemSubject = state.df.namedNode(expandIRI(carrier.url, state.ctx));
358
- itemSubjectCarrier = carrier;
359
- break;
419
+ if (listToken.attrs) {
420
+ const itemSem = parseSemanticBlock(listToken.attrs);
421
+ if (itemSem.subject && itemSem.subject !== 'RESET') {
422
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
423
+ itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
360
424
  }
361
- if (carrier.attrs) {
362
- const itemSem = parseSemanticBlock(carrier.attrs);
363
- if (itemSem.subject && itemSem.subject !== 'RESET') {
364
- itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
365
- itemSubjectCarrier = carrier;
366
- break;
425
+ }
426
+
427
+ if (!itemSubject) {
428
+ for (const carrier of carriers) {
429
+ if (carrier.attrs) {
430
+ const itemSem = parseSemanticBlock(carrier.attrs);
431
+ if (itemSem.subject && itemSem.subject !== 'RESET') {
432
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
433
+ itemSubjectCarrier = carrier;
434
+ break;
435
+ }
367
436
  }
368
437
  }
369
438
  }
370
439
 
371
- if (!itemSubject) return; // List items must declare subjects
372
-
373
- // Apply context types to item
374
- contextSem.types.forEach(typeIRI => {
375
- emitQuad(
376
- state.quads,
377
- state.origin.quadIndex,
378
- 'list-context',
379
- itemSubject,
380
- state.df.namedNode(expandIRI('rdf:type', state.ctx)),
381
- state.df.namedNode(expandIRI(typeIRI, state.ctx)),
382
- state.df
383
- );
440
+ if (!itemSubject) return;
441
+
442
+ contextSem.types.forEach(t => {
443
+ const typeIRI = typeof t === 'string' ? t : t.iri;
444
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandIRI(typeIRI, state.ctx)), state.df);
384
445
  });
385
446
 
386
- // Emit context relationships
387
447
  contextSem.predicates.forEach(pred => {
388
448
  const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
389
-
390
449
  if (pred.form === '^' || pred.form === '^?') {
391
- // Reverse: item context
392
- emitQuad(state.quads, state.origin.quadIndex, 'list-context',
393
- itemSubject, P, contextSubject, state.df);
450
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, P, contextSubject, state.df);
394
451
  } else {
395
- // Forward: context item
396
- emitQuad(state.quads, state.origin.quadIndex, 'list-context',
397
- contextSubject, P, itemSubject, state.df);
452
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context', contextSubject, P, itemSubject, state.df);
398
453
  }
399
454
  });
400
455
 
401
- // Process item's own annotations
402
456
  const prevSubject = state.currentSubject;
403
457
  state.currentSubject = itemSubject;
404
458
 
405
- // Process the list token's own attributes
406
459
  if (listToken.attrs) {
407
460
  const itemSem = parseSemanticBlock(listToken.attrs);
408
- // For list item attributes, the literal is the text content without links
409
- const carrier = { type: 'list', text: listToken.text.replace(/\[([^\]]+)\]\([^)]+\)/, '$1'), range: listToken.range };
461
+ const carrier = { type: 'list', text: listToken.text, range: listToken.range, attrsRange: listToken.attrsRange || null, valueRange: listToken.valueRange || null };
410
462
  processAnnotation(carrier, itemSem, state);
411
463
  }
412
464
 
413
- // Process inline carriers' attributes
414
465
  carriers.forEach(carrier => {
415
466
  if (carrier.attrs) {
416
467
  const itemSem = parseSemanticBlock(carrier.attrs);
@@ -432,8 +483,6 @@ export function parse(text, options = {}) {
432
483
  };
433
484
 
434
485
  const tokens = scanTokens(text);
435
-
436
- // Apply prefix declarations
437
486
  tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
438
487
 
439
488
  for (let i = 0; i < tokens.length; i++) {
@@ -441,18 +490,26 @@ export function parse(text, options = {}) {
441
490
 
442
491
  if (token.type === 'heading' && token.attrs) {
443
492
  const sem = parseSemanticBlock(token.attrs);
444
- const carrier = { type: 'heading', text: token.text, range: token.range };
493
+ const carrier = { type: 'heading', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
445
494
  processAnnotation(carrier, sem, state);
446
495
  } else if (token.type === 'code' && token.attrs) {
447
496
  const sem = parseSemanticBlock(token.attrs);
448
- const carrier = { type: 'code', text: token.text, range: token.range };
497
+ const carrier = { type: 'code', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
449
498
  processAnnotation(carrier, sem, state);
450
499
  } else if (token.type === 'blockquote' && token.attrs) {
451
500
  const sem = parseSemanticBlock(token.attrs);
452
- const carrier = { type: 'blockquote', text: token.text, range: token.range };
501
+ const carrier = { type: 'blockquote', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
453
502
  processAnnotation(carrier, sem, state);
454
503
  } else if (token.type === 'para') {
455
- // Check for list context
504
+ // Check for standalone subject declarations: {=iri} on its own line
505
+ const standaloneSubjectMatch = token.text.match(/^\s*\{=(.*?)\}\s*$/);
506
+ if (standaloneSubjectMatch) {
507
+ const sem = parseSemanticBlock(`{=${standaloneSubjectMatch[1]}}`);
508
+ const attrsStart = token.range[0] + token.text.indexOf('{=');
509
+ const attrsEnd = attrsStart + (standaloneSubjectMatch[1] ? standaloneSubjectMatch[1].length : 0);
510
+ processAnnotation({ type: 'standalone', text: '', range: token.range, attrsRange: [attrsStart, attrsEnd], valueRange: null }, sem, state);
511
+ }
512
+
456
513
  const followingLists = [];
457
514
  let j = i + 1;
458
515
  while (j < tokens.length && tokens[j].type === 'list') {
@@ -460,17 +517,28 @@ export function parse(text, options = {}) {
460
517
  j++;
461
518
  }
462
519
 
463
- // Check if this paragraph ends with {attrs} and is followed by lists
464
520
  const contextMatch = token.text.match(/^(.+?)\s*\{([^}]+)\}$/);
465
521
  if (contextMatch && followingLists.length > 0) {
466
- // This is a list context annotation
467
522
  const contextSem = parseSemanticBlock(`{${contextMatch[2]}}`);
468
- processListContext(contextSem, followingLists, state);
523
+ let contextSubject = state.currentSubject;
524
+
525
+ // Always look for the most recent heading subject for context
526
+ for (let k = i - 1; k >= 0; k--) {
527
+ const prevToken = tokens[k];
528
+ if (prevToken.type === 'heading' && prevToken.attrs) {
529
+ const headingSem = parseSemanticBlock(prevToken.attrs);
530
+ if (headingSem.subject) {
531
+ contextSubject = state.df.namedNode(expandIRI(headingSem.subject, state.ctx));
532
+ break;
533
+ }
534
+ }
535
+ }
536
+
537
+ processListContext(contextSem, followingLists, state, contextSubject);
469
538
  i = j - 1;
470
539
  continue;
471
540
  }
472
541
 
473
- // Process inline carriers
474
542
  const carriers = extractInlineCarriers(token.text, token.range[0]);
475
543
  carriers.forEach(carrier => {
476
544
  if (carrier.attrs) {
@@ -484,76 +552,429 @@ export function parse(text, options = {}) {
484
552
  return { quads: state.quads, origin: state.origin, context: state.ctx };
485
553
  }
486
554
 
487
- function shortenIRI(iri, ctx) {
555
+ export function shortenIRI(iri, ctx) {
488
556
  if (!iri || !iri.startsWith('http')) return iri;
489
-
490
- if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) {
491
- return iri.substring(ctx['@vocab'].length);
492
- }
493
-
557
+ if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
494
558
  for (const [prefix, namespace] of Object.entries(ctx)) {
495
559
  if (prefix !== '@vocab' && iri.startsWith(namespace)) {
496
560
  return prefix + ':' + iri.substring(namespace.length);
497
561
  }
498
562
  }
499
-
500
563
  return iri;
501
564
  }
502
565
 
566
+ const serializeHelpers = {
567
+ readAttrsSpan: (block, text) => {
568
+ if (!block?.attrsRange) return null;
569
+ const { start, end } = block.attrsRange;
570
+ return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end > start)
571
+ ? { start, end, text: text.substring(start, end) }
572
+ : null;
573
+ },
574
+
575
+ readValueSpan: (block, text) => {
576
+ if (!block?.valueRange) return null;
577
+ const { start, end } = block.valueRange;
578
+ return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
579
+ ? { start, end, text: text.substring(start, end) }
580
+ : null;
581
+ },
582
+
583
+ normalizeAttrsTokens: (attrsText) => {
584
+ const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
585
+ return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
586
+ },
587
+
588
+ blockTokensFromEntries: (block) => block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null,
589
+
590
+ removeEntryAt: (block, entryIndex) => {
591
+ if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
592
+ return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
593
+ },
594
+
595
+ replaceLangDatatypeEntries: (block, lit, ctx) => {
596
+ if (!block?.entries) return null;
597
+ const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
598
+ const extras = [];
599
+ if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
600
+ const dt = lit?.datatype?.value;
601
+ if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
602
+ extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
603
+ }
604
+ return [...filtered, ...extras];
605
+ },
606
+
607
+ writeAttrsTokens: (tokens) => `{${tokens.join(' ').trim()}}`,
608
+
609
+ removeOneToken: (tokens, matchFn) => {
610
+ const i = tokens.findIndex(matchFn);
611
+ return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
612
+ },
613
+
614
+ normalizeQuad: (q) => {
615
+ if (!q) return null;
616
+ const { subject, predicate, object } = q;
617
+ if (object?.termType === 'Literal') {
618
+ const language = typeof object.language === 'string' ? object.language : '';
619
+ const datatype = object.datatype?.value || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' };
620
+ return { ...q, subject, predicate, object: { ...object, language, datatype } };
621
+ }
622
+ return { ...q, subject, predicate, object };
623
+ },
624
+
625
+ quadToKeyForOrigin: (q) => {
626
+ const nq = serializeHelpers.normalizeQuad(q);
627
+ return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
628
+ },
629
+
630
+ parseQuadIndexKey: (key) => {
631
+ try {
632
+ const [s, p, objKey] = JSON.parse(key);
633
+ return { s, p, o: JSON.parse(objKey) };
634
+ } catch {
635
+ return null;
636
+ }
637
+ },
638
+
639
+ sanitizeCarrierValueForBlock: (block, raw) => {
640
+ const s = String(raw ?? '');
641
+ const t = block?.carrierType;
642
+ if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
643
+ const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
644
+ return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
645
+ }
646
+ };
647
+
503
648
  export function serialize({ text, diff, origin, options = {} }) {
504
- if (!diff || (!diff.add?.length && !diff.delete?.length)) return { text, origin };
649
+ if (!diff || (!diff.add?.length && !diff.delete?.length)) {
650
+ const reparsed = parse(text, { context: options.context || {} });
651
+ return { text, origin: reparsed.origin };
652
+ }
505
653
 
654
+ const base = origin || parse(text, { context: options.context || {} }).origin;
506
655
  let result = text;
507
656
  const edits = [];
508
657
  const ctx = options.context || {};
509
658
 
510
- if (diff.delete) {
511
- diff.delete.forEach(quad => {
512
- if (!quad || !quad.subject) return;
513
- const key = JSON.stringify([
514
- quad.subject.value,
515
- quad.predicate.value,
516
- quad.object.termType === 'Literal' ? quad.object.value : quad.object.value
517
- ]);
518
- const blockId = origin?.quadIndex.get(key);
519
- if (!blockId) return;
659
+ const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
660
+ for (const [k, entry] of base?.quadIndex || []) {
661
+ const parsed = serializeHelpers.parseQuadIndexKey(k);
662
+ if (!parsed) continue;
663
+ if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
664
+ if (parsed.o?.t !== 'Literal') continue;
665
+ if (parsed.o?.v !== literalValue) continue;
666
+ return entry;
667
+ }
668
+ return null;
669
+ };
670
+
671
+ const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
672
+ const out = [];
673
+ for (const [k, entry] of base?.quadIndex || []) {
674
+ const parsed = serializeHelpers.parseQuadIndexKey(k);
675
+ if (!parsed) continue;
676
+ if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
677
+ if (parsed.o?.t !== 'Literal') continue;
678
+ const blockId = entry?.blockId || entry;
679
+ const block = blockId ? base?.blocks?.get(blockId) : null;
680
+ if (block) out.push({ block, entry, obj: parsed.o });
681
+ }
682
+ return out;
683
+ };
684
+
685
+ const objectSignature = (o) => {
686
+ if (!o) return '';
687
+ if (o.termType === 'Literal') {
688
+ return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
689
+ }
690
+ return JSON.stringify({ t: o.termType, v: o.value });
691
+ };
692
+
693
+ const anchors = new Map();
694
+ for (const q0 of diff.delete || []) {
695
+ const q = serializeHelpers.normalizeQuad(q0);
696
+ if (!q) continue;
697
+ if (!q?.subject || !q?.object || !q?.predicate) continue;
698
+ const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
699
+ const qk = serializeHelpers.quadToKeyForOrigin(q);
700
+ const entry = qk ? base?.quadIndex?.get(qk) : null;
701
+ const blockId = entry?.blockId || entry;
702
+ const block = blockId ? base?.blocks?.get(blockId) : null;
703
+ if (!block?.attrsRange) continue;
704
+ anchors.set(key, { block, entry });
705
+ }
706
+
707
+ const addBySP = new Map();
708
+ for (const q0 of diff.add || []) {
709
+ const q = serializeHelpers.normalizeQuad(q0);
710
+ if (!q) continue;
711
+ if (!q?.subject || !q?.predicate || !q?.object) continue;
712
+ const k = JSON.stringify([q.subject.value, q.predicate.value]);
713
+ const list = addBySP.get(k) || [];
714
+ list.push(q);
715
+ addBySP.set(k, list);
716
+ }
717
+
718
+ const consumedAdds = new Set();
719
+ const literalUpdates = [];
720
+ for (const dq0 of diff.delete || []) {
721
+ const dq = serializeHelpers.normalizeQuad(dq0);
722
+ if (!dq) continue;
723
+ if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
724
+ if (dq.object.termType !== 'Literal') continue;
725
+ const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
726
+ const candidates = addBySP.get(k) || [];
727
+ const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(serializeHelpers.quadToKeyForOrigin(x)));
728
+ if (!aq) continue;
729
+
730
+ const dqk = serializeHelpers.quadToKeyForOrigin(dq);
731
+ let entry = dqk ? base?.quadIndex?.get(dqk) : null;
732
+ if (!entry && dq.object?.termType === 'Literal') {
733
+ entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
734
+ }
735
+ const blockId = entry?.blockId || entry;
736
+ const block = blockId ? base?.blocks?.get(blockId) : null;
737
+ if (!block) continue;
738
+
739
+ literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
740
+ consumedAdds.add(serializeHelpers.quadToKeyForOrigin(aq));
741
+ }
520
742
 
521
- const block = origin.blocks.get(blockId);
522
- if (!block) return;
743
+ for (const q0 of diff.add || []) {
744
+ const quad = serializeHelpers.normalizeQuad(q0);
745
+ if (!quad || quad.object?.termType !== 'Literal') continue;
746
+ if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) continue;
523
747
 
524
- const start = block.range.start;
525
- const end = block.range.end;
526
- const before = text.substring(Math.max(0, start - 1), start);
527
- const after = text.substring(end, Math.min(end + 1, text.length));
528
- const deleteStart = before === '\n' ? start - 1 : start;
529
- const deleteEnd = after === '\n' ? end + 1 : end;
748
+ const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
749
+ if (matches.length === 0) continue;
530
750
 
531
- edits.push({ start: deleteStart, end: deleteEnd, text: '' });
751
+ const desiredLang = quad.object.language || '';
752
+ const sameLang = matches.filter(m => {
753
+ const entries = m.block?.entries || [];
754
+ const langEntry = entries.find(e => e.kind === 'language');
755
+ const lang = langEntry?.language || '';
756
+ return lang === desiredLang;
532
757
  });
758
+
759
+ if (sameLang.length !== 1) continue;
760
+ const target = sameLang[0].block;
761
+ const vSpan = serializeHelpers.readValueSpan(target, text);
762
+ if (!vSpan) continue;
763
+
764
+ const newValue = serializeHelpers.sanitizeCarrierValueForBlock(target, quad.object.value);
765
+ edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
766
+
767
+ const aSpan = serializeHelpers.readAttrsSpan(target, text);
768
+ if (aSpan && target?.entries?.length) {
769
+ const nextEntries = serializeHelpers.replaceLangDatatypeEntries(target, quad.object, ctx);
770
+ if (nextEntries) {
771
+ const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
772
+ edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
773
+ }
774
+ }
775
+
776
+ consumedAdds.add(quad);
533
777
  }
534
778
 
535
- if (diff.add) {
536
- diff.add.forEach(quad => {
537
- let insertPos = result.length;
779
+ for (const u of literalUpdates) {
780
+ const span = serializeHelpers.readValueSpan(u.block, text);
781
+ if (span) {
782
+ const newValue = serializeHelpers.sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
783
+ edits.push({ start: span.start, end: span.end, text: newValue });
784
+ }
538
785
 
539
- for (const [, block] of origin?.blocks || []) {
540
- if (block.subject === quad.subject.value) {
541
- insertPos = block.range.end;
542
- break;
786
+ const aSpan = serializeHelpers.readAttrsSpan(u.block, text);
787
+ if (aSpan) {
788
+ if (u.block?.entries?.length) {
789
+ const nextEntries = serializeHelpers.replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
790
+ if (nextEntries) {
791
+ const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
792
+ if (nextTokens.length === 0) {
793
+ edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
794
+ } else {
795
+ edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
796
+ }
797
+ }
798
+ } else {
799
+ const tokens = serializeHelpers.normalizeAttrsTokens(aSpan.text);
800
+ const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object);
801
+ if (updated.join(' ') !== tokens.join(' ')) {
802
+ if (updated.length === 0) {
803
+ edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
804
+ } else {
805
+ edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(updated) });
806
+ }
543
807
  }
544
808
  }
809
+ }
810
+ }
811
+
812
+ const updateAttrsDatatypeLang = (tokens, newLit) => {
813
+ const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
814
+ if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
815
+ const dt = newLit?.datatype?.value;
816
+ if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
817
+ return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
818
+ }
819
+ return predicatesAndTypes;
820
+ };
545
821
 
546
- const pred = shortenIRI(quad.predicate.value, ctx);
547
- let objText;
822
+ if (diff.delete) {
823
+ diff.delete.forEach(q0 => {
824
+ const quad = serializeHelpers.normalizeQuad(q0);
825
+ if (!quad) return;
826
+ if (!quad?.subject || !quad?.predicate || !quad?.object) return;
548
827
 
549
828
  if (quad.object.termType === 'Literal') {
550
- objText = quad.object.value;
829
+ const isUpdated = literalUpdates.some(u =>
830
+ u.deleteQuad.subject.value === quad.subject.value &&
831
+ u.deleteQuad.predicate.value === quad.predicate.value &&
832
+ u.deleteQuad.object.value === quad.object.value
833
+ );
834
+ if (isUpdated) return;
835
+ }
836
+
837
+ const key = serializeHelpers.quadToKeyForOrigin(quad);
838
+ let entry = key ? base?.quadIndex?.get(key) : null;
839
+ if (!entry && quad.object?.termType === 'Literal') {
840
+ entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
841
+ }
842
+ const blockId = entry?.blockId || entry;
843
+ if (!blockId) return;
844
+ const block = base?.blocks?.get(blockId);
845
+ const span = serializeHelpers.readAttrsSpan(block, text);
846
+ if (!span) return;
847
+
848
+ if (entry?.entryIndex != null && block?.entries?.length) {
849
+ const nextEntries = serializeHelpers.removeEntryAt(block, entry.entryIndex);
850
+ if (!nextEntries) return;
851
+ const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
852
+ if (nextTokens.length === 0) {
853
+ edits.push({ start: span.start, end: span.end, text: '{}' });
854
+ } else {
855
+ edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
856
+ }
857
+ return;
858
+ }
859
+
860
+ const tokens = serializeHelpers.normalizeAttrsTokens(span.text);
861
+ let updated = tokens;
862
+ let removed = false;
863
+
864
+ if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
865
+ const expectedType = entry.expandedType || quad.object.value;
866
+ ({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
867
+ if (!t.startsWith('.')) return false;
868
+ const raw = t.slice(1);
869
+ return expandIRI(raw, ctx) === expectedType;
870
+ }));
551
871
  } else {
552
- objText = shortenIRI(quad.object.value, ctx);
872
+ const expectedPred = entry?.expandedPredicate || quad.predicate.value;
873
+ const expectedForm = entry?.form;
874
+ ({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
875
+ const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
876
+ if (!m) return false;
877
+ const form = m[1] || '';
878
+ const raw = m[2];
879
+ if (expectedForm != null && form !== expectedForm) return false;
880
+ return expandIRI(raw, ctx) === expectedPred;
881
+ }));
882
+ }
883
+
884
+ if (!removed) return;
885
+
886
+ if (updated.length === 0) {
887
+ edits.push({ start: span.start, end: span.end, text: '{}' });
888
+ return;
889
+ }
890
+
891
+ const newAttrs = serializeHelpers.writeAttrsTokens(updated);
892
+ edits.push({ start: span.start, end: span.end, text: newAttrs });
893
+ });
894
+ }
895
+
896
+ if (diff.add) {
897
+ diff.add.forEach(q0 => {
898
+ const quad = serializeHelpers.normalizeQuad(q0);
899
+ if (!quad) return;
900
+ if (!quad?.subject || !quad?.predicate || !quad?.object) return;
901
+
902
+ if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) return;
903
+
904
+ const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
905
+ const anchored = anchors.get(anchorKey) || null;
906
+ let targetBlock = anchored?.block || null;
907
+
908
+ if (!targetBlock) {
909
+ for (const [, block] of base?.blocks || []) {
910
+ if (block.subject === quad.subject.value && block.attrsRange) {
911
+ targetBlock = block;
912
+ break;
913
+ }
914
+ }
915
+ }
916
+
917
+ if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
918
+ if (!targetBlock) {
919
+ const predShort = shortenIRI(quad.predicate.value, ctx);
920
+ if (quad.object.termType === 'Literal') {
921
+ const value = String(quad.object.value ?? '');
922
+ let ann = predShort;
923
+ if (quad.object.language) ann += ` @${quad.object.language}`;
924
+ else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
925
+ ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
926
+ }
927
+ edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
928
+ } else {
929
+ const full = quad.object.value;
930
+ const label = shortenIRI(full, ctx);
931
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
932
+ }
933
+ return;
934
+ }
935
+
936
+ const predShort = shortenIRI(quad.predicate.value, ctx);
937
+ if (quad.object.termType === 'Literal') {
938
+ const value = String(quad.object.value ?? '');
939
+ let ann = predShort;
940
+ if (quad.object.language) ann += ` @${quad.object.language}`;
941
+ else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
942
+ ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
943
+ }
944
+ edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
945
+ return;
946
+ }
947
+
948
+ if (quad.object.termType === 'NamedNode') {
949
+ const full = quad.object.value;
950
+ const label = shortenIRI(full, ctx);
951
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
952
+ return;
953
+ }
954
+ }
955
+
956
+ const span = serializeHelpers.readAttrsSpan(targetBlock, text);
957
+ if (!span) return;
958
+ const tokens = serializeHelpers.blockTokensFromEntries(targetBlock) || serializeHelpers.normalizeAttrsTokens(span.text);
959
+
960
+ if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
961
+ const typeShort = shortenIRI(quad.object.value, ctx);
962
+ const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
963
+ if (!typeToken) return;
964
+ if (tokens.includes(typeToken)) return;
965
+ const updated = [...tokens, typeToken];
966
+ edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
967
+ return;
553
968
  }
554
969
 
555
- const newLine = `\n[${objText}] {${pred}}`;
556
- edits.push({ start: insertPos, end: insertPos, text: newLine });
970
+ const form = anchored?.entry?.form;
971
+ if (form == null) return;
972
+ const predShort = shortenIRI(quad.predicate.value, ctx);
973
+ const predToken = `${form}${predShort}`;
974
+ if (!predToken) return;
975
+ if (tokens.includes(predToken)) return;
976
+ const updated = [...tokens, predToken];
977
+ edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
557
978
  });
558
979
  }
559
980
 
@@ -562,7 +983,8 @@ export function serialize({ text, diff, origin, options = {} }) {
562
983
  result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
563
984
  });
564
985
 
565
- return { text: result, origin };
986
+ const reparsed = parse(result, { context: options.context || {} });
987
+ return { text: result, origin: reparsed.origin };
566
988
  }
567
989
 
568
- export default { parse, serialize, parseSemanticBlock };
990
+ export default { parse, serialize, parseSemanticBlock, shortenIRI };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.2.3",
3
+ "version": "0.2.4",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",