mdld-parse 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LICENCE +167 -0
  2. package/README.md +345 -190
  3. package/index.js +264 -248
  4. package/package.json +1 -1
package/index.js CHANGED
@@ -35,56 +35,43 @@ function expandIRI(term, ctx) {
35
35
  return (ctx['@vocab'] || '') + t;
36
36
  }
37
37
 
38
- // Annotation parsing - explicit string operations
39
- function parseAnnotation(raw) {
38
+ function parseSemanticBlock(raw) {
40
39
  try {
41
40
  const cleaned = raw.replace(/^\{|\}$/g, '').trim();
42
- if (!cleaned) return { subject: null, entries: [], datatype: null, language: null };
41
+ if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null };
43
42
 
44
- // Validate quotes
45
- let quoteCount = 0;
46
- for (let i = 0; i < cleaned.length; i++) {
47
- if (cleaned[i] === '"') quoteCount++;
48
- }
49
- if (quoteCount % 2 !== 0) {
50
- console.warn(`Unbalanced quotes in annotation: ${raw}`);
51
- return { subject: null, entries: [], datatype: null, language: null };
52
- }
53
-
54
- const result = { subject: null, entries: [], datatype: null, language: null };
55
- const parts = cleaned.split(/\s+/).filter(p => p);
43
+ const result = { subject: null, types: [], predicates: [], datatype: null, language: null };
44
+ const tokens = cleaned.split(/\s+/).filter(t => t);
56
45
 
57
- for (const part of parts) {
58
- if (part === '=') {
46
+ for (const token of tokens) {
47
+ if (token === '=') {
59
48
  result.subject = 'RESET';
60
- } else if (part.startsWith('=')) {
61
- result.subject = part.substring(1);
62
- } else if (part.startsWith('@')) {
63
- result.language = part.substring(1);
64
- } else if (part.startsWith('^^')) {
65
- result.datatype = part.substring(2);
66
- } else if (part.startsWith('^')) {
67
- result.entries.push({ kind: 'property', predicate: part.substring(1), direction: 'reverse' });
68
- } else if (part.startsWith('.')) {
69
- result.entries.push({ kind: 'type', classIRI: part.substring(1) });
49
+ } else if (token.startsWith('=')) {
50
+ result.subject = token.substring(1);
51
+ } else if (token.startsWith('^^')) {
52
+ result.datatype = token.substring(2);
53
+ } else if (token.startsWith('@')) {
54
+ result.language = token.substring(1);
55
+ } else if (token.startsWith('.')) {
56
+ result.types.push(token.substring(1));
57
+ } else if (token.startsWith('^?')) {
58
+ result.predicates.push({ iri: token.substring(2), form: '^?' });
59
+ } else if (token.startsWith('^')) {
60
+ result.predicates.push({ iri: token.substring(1), form: '^' });
61
+ } else if (token.startsWith('?')) {
62
+ result.predicates.push({ iri: token.substring(1), form: '?' });
70
63
  } else {
71
- result.entries.push({ kind: 'property', predicate: part, direction: 'forward' });
64
+ result.predicates.push({ iri: token, form: '' });
72
65
  }
73
66
  }
74
67
 
75
- if (result.entries.length === 0 && !result.subject) {
76
- console.warn(`No valid entries found in annotation: ${raw}`);
77
- return { subject: null, entries: [], datatype: null, language: null };
78
- }
79
-
80
68
  return result;
81
69
  } catch (error) {
82
- console.error(`Error parsing annotation ${raw}:`, error);
83
- return { subject: null, entries: [], datatype: null, language: null };
70
+ console.error(`Error parsing semantic block ${raw}:`, error);
71
+ return { subject: null, types: [], predicates: [], datatype: null, language: null };
84
72
  }
85
73
  }
86
74
 
87
- // Token scanning - consolidated helpers
88
75
  function scanTokens(text) {
89
76
  const tokens = [];
90
77
  const lines = text.split('\n');
@@ -96,7 +83,6 @@ function scanTokens(text) {
96
83
  const lineStart = pos;
97
84
  pos += line.length + 1;
98
85
 
99
- // Code blocks
100
86
  if (line.startsWith('```')) {
101
87
  if (!codeBlock) {
102
88
  const fence = line.match(/^(`{3,})(.*)/);
@@ -104,7 +90,7 @@ function scanTokens(text) {
104
90
  fence: fence[1],
105
91
  start: lineStart,
106
92
  content: [],
107
- lang: fence[2].trim().split('{')[0].trim(),
93
+ lang: fence[2].trim().split(/[\s{]/)[0],
108
94
  attrs: fence[2].match(/\{[^}]+\}/)?.[0]
109
95
  };
110
96
  } else if (line.startsWith(codeBlock.fence)) {
@@ -125,94 +111,77 @@ function scanTokens(text) {
125
111
  continue;
126
112
  }
127
113
 
128
- // Prefix declarations
129
114
  const prefixMatch = line.match(/^\[([^\]]+)\]\s*\{:\s*([^}]+)\}/);
130
115
  if (prefixMatch) {
131
116
  tokens.push({ type: 'prefix', prefix: prefixMatch[1], iri: prefixMatch[2].trim() });
132
117
  continue;
133
118
  }
134
119
 
135
- // Headings
136
120
  const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
137
121
  if (headingMatch) {
138
122
  tokens.push({
139
123
  type: 'heading',
140
124
  depth: headingMatch[1].length,
141
- range: [lineStart, pos],
125
+ range: [lineStart, pos - 1],
142
126
  text: headingMatch[2].trim(),
143
127
  attrs: headingMatch[3]
144
128
  });
145
129
  continue;
146
130
  }
147
131
 
148
- // Lists
149
132
  const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
150
133
  if (listMatch) {
151
134
  tokens.push({
152
135
  type: 'list',
153
136
  indent: listMatch[1].length,
154
- range: [lineStart, pos],
137
+ range: [lineStart, pos - 1],
155
138
  text: listMatch[3].trim(),
156
139
  attrs: listMatch[4]
157
140
  });
158
141
  continue;
159
142
  }
160
143
 
161
- // Blockquotes
162
144
  const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
163
145
  if (blockquoteMatch) {
164
146
  tokens.push({
165
147
  type: 'blockquote',
166
- range: [lineStart, pos],
148
+ range: [lineStart, pos - 1],
167
149
  text: blockquoteMatch[1].trim(),
168
150
  attrs: blockquoteMatch[2]
169
151
  });
170
152
  continue;
171
153
  }
172
154
 
173
- // Paragraphs
174
155
  if (line.trim()) {
175
- const paraMatch = line.match(/^(.+?)(?:\s*(\{[^}]+\}))?$/);
176
- if (paraMatch) {
177
- tokens.push({
178
- type: 'para',
179
- range: [lineStart, pos],
180
- text: paraMatch[1].trim(),
181
- attrs: paraMatch[2] || null
182
- });
183
- }
156
+ tokens.push({
157
+ type: 'para',
158
+ range: [lineStart, pos - 1],
159
+ text: line.trim(),
160
+ attrs: null
161
+ });
184
162
  }
185
163
  }
186
164
 
187
165
  return tokens;
188
166
  }
189
167
 
190
- // Inline value extraction - simplified
191
- function extractInlineValue(text, baseOffset = 0) {
192
- const spans = [];
168
+ // Extract inline carriers: [text] {attrs}, [text](url) {attrs}, [text](=iri) {attrs}
169
+ function extractInlineCarriers(text, baseOffset = 0) {
170
+ const carriers = [];
193
171
  let pos = 0;
194
172
 
195
173
  while (pos < text.length) {
196
174
  const bracketStart = text.indexOf('[', pos);
197
- if (bracketStart === -1) {
198
- if (pos < text.length) spans.push({ type: 'text', text: text.substring(pos) });
199
- break;
200
- }
201
-
202
- if (bracketStart > pos) spans.push({ type: 'text', text: text.substring(pos, bracketStart) });
175
+ if (bracketStart === -1) break;
203
176
 
204
177
  const bracketEnd = text.indexOf(']', bracketStart);
205
- if (bracketEnd === -1) {
206
- spans.push({ type: 'text', text: text.substring(bracketStart) });
207
- break;
208
- }
178
+ if (bracketEnd === -1) break;
209
179
 
210
- const spanText = text.substring(bracketStart + 1, bracketEnd);
180
+ const carrierText = text.substring(bracketStart + 1, bracketEnd);
211
181
  let spanEnd = bracketEnd + 1;
212
182
  let url = null;
213
- let attrs = null;
214
183
 
215
- // Parse link destination
184
+ // Check for (url) or (=iri)
216
185
  if (text[spanEnd] === '(') {
217
186
  const parenEnd = text.indexOf(')', spanEnd);
218
187
  if (parenEnd !== -1) {
@@ -221,17 +190,32 @@ function extractInlineValue(text, baseOffset = 0) {
221
190
  }
222
191
  }
223
192
 
224
- // Parse attributes
193
+ // Check for {attrs}
194
+ let attrs = null;
225
195
  const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
226
196
  if (attrsMatch) {
227
197
  attrs = `{${attrsMatch[1]}}`;
228
198
  spanEnd += attrsMatch[0].length;
229
199
  }
230
200
 
231
- spans.push({
232
- type: url ? 'link' : 'span',
233
- text: spanText,
234
- url: url,
201
+ // Determine type and resource
202
+ let carrierType = 'span';
203
+ let resourceIRI = null;
204
+
205
+ if (url) {
206
+ if (url.startsWith('=')) {
207
+ carrierType = 'resource';
208
+ resourceIRI = url.substring(1);
209
+ } else {
210
+ carrierType = 'link';
211
+ resourceIRI = url;
212
+ }
213
+ }
214
+
215
+ carriers.push({
216
+ type: carrierType,
217
+ text: carrierText,
218
+ url: resourceIRI,
235
219
  attrs: attrs,
236
220
  range: [baseOffset + bracketStart, baseOffset + spanEnd]
237
221
  });
@@ -239,23 +223,26 @@ function extractInlineValue(text, baseOffset = 0) {
239
223
  pos = spanEnd;
240
224
  }
241
225
 
242
- return spans.length ? spans : [{ type: 'text', text: text }];
226
+ return carriers;
243
227
  }
244
228
 
245
- // Core processing functions - consolidated
246
- function createBlock(subject, entries, range, ctx) {
247
- const expanded = entries.map(e => ({
248
- ...e,
249
- predicate: e.predicate ? expandIRI(e.predicate, ctx) : null,
250
- classIRI: e.classIRI ? expandIRI(e.classIRI, ctx) : null
251
- }));
229
+ function createBlock(subject, types, predicates, range, ctx) {
230
+ const expanded = {
231
+ subject: subject,
232
+ types: types.map(t => expandIRI(t, ctx)),
233
+ predicates: predicates.map(p => ({
234
+ iri: expandIRI(p.iri, ctx),
235
+ form: p.form
236
+ }))
237
+ };
252
238
 
253
- const blockId = hash([subject, ...expanded.map(e => JSON.stringify(e))].join('|'));
239
+ const blockId = hash([subject, JSON.stringify(expanded)].join('|'));
254
240
  return {
255
241
  id: blockId,
256
242
  range: { start: range[0], end: range[1] },
257
243
  subject,
258
- entries: expanded,
244
+ types: expanded.types,
245
+ predicates: expanded.predicates,
259
246
  context: { ...ctx }
260
247
  };
261
248
  }
@@ -264,205 +251,233 @@ function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFac
264
251
  if (!subject || !predicate || !object) return;
265
252
  const quad = dataFactory.quad(subject, predicate, object);
266
253
  quads.push(quad);
267
- quadIndex.set(JSON.stringify([quad.subject.value, quad.predicate.value, quad.object.value]), blockId);
254
+ const key = JSON.stringify([
255
+ quad.subject.value,
256
+ quad.predicate.value,
257
+ quad.object.termType === 'Literal' ? quad.object.value : quad.object.value
258
+ ]);
259
+ quadIndex.set(key, blockId);
268
260
  }
269
261
 
270
- function createLiteralValue(value, datatype, language, context, dataFactory) {
271
- if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
272
- if (language) return dataFactory.literal(value, language);
262
+ function createLiteral(value, datatype, language, context, dataFactory) {
263
+ if (datatype) {
264
+ return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
265
+ }
266
+ if (language) {
267
+ return dataFactory.literal(value, language);
268
+ }
273
269
  return dataFactory.literal(value);
274
270
  }
275
271
 
276
- function processAnnotation(token, state, textContent = null) {
277
- if (!token.attrs) return;
278
-
279
- const ann = parseAnnotation(token.attrs);
280
- const originalSubject = state.currentSubject;
281
-
282
- // Handle subject declaration
283
- if (ann.subject === 'RESET') {
272
+ // Core processing: handle subject/type declarations and property emissions
273
+ function processAnnotation(carrier, sem, state) {
274
+ // §6.1 Subject declaration
275
+ if (sem.subject === 'RESET') {
284
276
  state.currentSubject = null;
285
277
  return;
286
278
  }
287
- if (ann.subject) {
288
- state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
289
- }
290
-
291
- if (!originalSubject && !ann.subject) return;
292
279
 
293
- const targetSubject = ann.subject ?
294
- state.df.namedNode(expandIRI(ann.subject, state.ctx)) :
295
- originalSubject;
280
+ if (sem.subject) {
281
+ state.currentSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
282
+ }
296
283
 
297
- const block = createBlock(targetSubject.value, ann.entries, token.range, state.ctx);
284
+ // Determine the subject for emissions
285
+ const S = state.currentSubject;
286
+ if (!S) return; // Need a subject to emit anything
287
+
288
+ // Create origin block
289
+ const block = createBlock(
290
+ S.value,
291
+ sem.types,
292
+ sem.predicates,
293
+ carrier.range,
294
+ state.ctx
295
+ );
298
296
  state.origin.blocks.set(block.id, block);
299
297
 
300
- // Handle list context types
301
- if (token.type === 'list' && state.listContext?.types.length > 0) {
302
- state.listContext.types.forEach(typeIRI => {
303
- emitQuad(state.quads, state.origin.quadIndex, block.id,
304
- targetSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
305
- state.df.namedNode(typeIRI), state.df);
306
- });
307
- }
298
+ // Extract L (literal) and O (object IRI)
299
+ const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
300
+ const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
301
+
302
+ // §7 Emit type triples
303
+ sem.types.forEach(typeIRI => {
304
+ const typeSubject = O || S;
305
+ emitQuad(
306
+ state.quads,
307
+ state.origin.quadIndex,
308
+ block.id,
309
+ typeSubject,
310
+ state.df.namedNode(expandIRI('rdf:type', state.ctx)),
311
+ state.df.namedNode(expandIRI(typeIRI, state.ctx)),
312
+ state.df
313
+ );
314
+ });
308
315
 
309
- // Emit triples
310
- ann.entries.forEach(e => {
311
- if (e.kind === 'type') {
312
- const typeSubject = token.url ?
313
- state.df.namedNode(expandIRI(token.url, state.ctx)) : targetSubject;
314
- emitQuad(state.quads, state.origin.quadIndex, block.id,
315
- typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
316
- state.df.namedNode(expandIRI(e.classIRI, state.ctx)), state.df);
317
- } else if (e.kind === 'property' && e.predicate) {
318
- const predicate = state.df.namedNode(expandIRI(e.predicate, state.ctx));
319
- let object;
320
-
321
- if (token.url) {
322
- object = state.df.namedNode(expandIRI(token.url, state.ctx));
323
- } else if (ann.subject && !token.url) {
324
- if (e.direction === 'reverse') {
325
- object = targetSubject;
326
- } else {
327
- object = token.type === 'code' ?
328
- createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df) :
329
- targetSubject;
330
- }
331
- } else {
332
- object = createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df);
316
+ // §8 Emit predicate triples (routing table)
317
+ sem.predicates.forEach(pred => {
318
+ const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
319
+
320
+ if (pred.form === '') {
321
+ // p: S → L
322
+ emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df);
323
+ } else if (pred.form === '?') {
324
+ // ?p: S O
325
+ if (O) {
326
+ emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df);
327
+ }
328
+ } else if (pred.form === '^') {
329
+ // ^p: reverse literal (L → S impossible, emit nothing per spec)
330
+ // Note: Some interpretations might emit S → S or skip
331
+ } else if (pred.form === '^?') {
332
+ // ^?p: O → S
333
+ if (O) {
334
+ emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df);
333
335
  }
334
-
335
- const subject = e.direction === 'reverse' ? object :
336
- (ann.subject && !token.url && token.type !== 'code') ? originalSubject : targetSubject;
337
- const objectRef = e.direction === 'reverse' ? originalSubject : object;
338
-
339
- emitQuad(state.quads, state.origin.quadIndex, block.id, subject, predicate, objectRef, state.df);
340
336
  }
341
337
  });
342
338
  }
343
339
 
344
- // List processing - simplified
345
- function setupListContext(token, state, nextToken) {
346
- if (!token.attrs || nextToken?.type !== 'list') return false;
347
-
348
- const ann = parseAnnotation(token.attrs);
349
- state.listContext = { predicate: null, types: [], reverse: false };
350
-
351
- ann.entries.forEach(e => {
352
- if (e.kind === 'property') {
353
- state.listContext.predicate = expandIRI(e.predicate, state.ctx);
354
- state.listContext.reverse = e.direction === 'reverse';
355
- }
356
- if (e.kind === 'type') {
357
- state.listContext.types.push(expandIRI(e.classIRI, state.ctx));
340
+ // Process list with context annotation
341
+ function processListContext(contextSem, listTokens, state) {
342
+ const contextSubject = state.currentSubject;
343
+ if (!contextSubject) return;
344
+
345
+ listTokens.forEach(listToken => {
346
+ // Extract carriers from list item text
347
+ const carriers = extractInlineCarriers(listToken.text, listToken.range[0]);
348
+
349
+ // Find subject from carriers or list item annotation
350
+ let itemSubject = null;
351
+ let itemSubjectCarrier = null;
352
+
353
+ // First check carriers for subject declarations
354
+ for (const carrier of carriers) {
355
+ if (carrier.url && carrier.type === 'resource') {
356
+ // [text](=iri) declares a subject
357
+ itemSubject = state.df.namedNode(expandIRI(carrier.url, state.ctx));
358
+ itemSubjectCarrier = carrier;
359
+ break;
360
+ }
361
+ if (carrier.attrs) {
362
+ const itemSem = parseSemanticBlock(carrier.attrs);
363
+ if (itemSem.subject && itemSem.subject !== 'RESET') {
364
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
365
+ itemSubjectCarrier = carrier;
366
+ break;
367
+ }
368
+ }
358
369
  }
359
- });
360
- return true;
361
- }
362
370
 
363
- function processListItem(token, state) {
364
- const ann = parseAnnotation(token.attrs);
365
- const originalSubject = state.currentSubject;
371
+ if (!itemSubject) return; // List items must declare subjects
372
+
373
+ // Apply context types to item
374
+ contextSem.types.forEach(typeIRI => {
375
+ emitQuad(
376
+ state.quads,
377
+ state.origin.quadIndex,
378
+ 'list-context',
379
+ itemSubject,
380
+ state.df.namedNode(expandIRI('rdf:type', state.ctx)),
381
+ state.df.namedNode(expandIRI(typeIRI, state.ctx)),
382
+ state.df
383
+ );
384
+ });
366
385
 
367
- if (ann.subject) {
368
- state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
369
- }
386
+ // Emit context relationships
387
+ contextSem.predicates.forEach(pred => {
388
+ const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
370
389
 
371
- // Process item properties
372
- ann.entries.forEach(e => {
373
- if (e.kind === 'type') {
374
- emitQuad(state.quads, state.origin.quadIndex, 'list-item',
375
- state.currentSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
376
- state.df.namedNode(expandIRI(e.classIRI, state.ctx)), state.df);
377
- } else if (e.kind === 'property' && e.predicate) {
378
- const predicate = state.df.namedNode(expandIRI(e.predicate, state.ctx));
379
- const object = createLiteralValue(token.text, ann.datatype, ann.language, state.ctx, state.df);
380
- emitQuad(state.quads, state.origin.quadIndex, 'list-item',
381
- state.currentSubject, predicate, object, state.df);
382
- }
383
- });
390
+ if (pred.form === '^' || pred.form === '^?') {
391
+ // Reverse: item → context
392
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context',
393
+ itemSubject, P, contextSubject, state.df);
394
+ } else {
395
+ // Forward: context → item
396
+ emitQuad(state.quads, state.origin.quadIndex, 'list-context',
397
+ contextSubject, P, itemSubject, state.df);
398
+ }
399
+ });
384
400
 
385
- // Process list context relationship
386
- if (state.listContext?.predicate && originalSubject) {
387
- const predicate = state.df.namedNode(expandIRI(state.listContext.predicate, state.ctx));
388
- if (state.listContext.reverse) {
389
- emitQuad(state.quads, state.origin.quadIndex, 'list-context',
390
- state.currentSubject, predicate, originalSubject, state.df);
391
- } else {
392
- emitQuad(state.quads, state.origin.quadIndex, 'list-context',
393
- originalSubject, predicate, state.currentSubject, state.df);
401
+ // Process item's own annotations
402
+ const prevSubject = state.currentSubject;
403
+ state.currentSubject = itemSubject;
404
+
405
+ // Process the list token's own attributes
406
+ if (listToken.attrs) {
407
+ const itemSem = parseSemanticBlock(listToken.attrs);
408
+ // For list item attributes, the literal is the text content without links
409
+ const carrier = { type: 'list', text: listToken.text.replace(/\[([^\]]+)\]\([^)]+\)/, '$1'), range: listToken.range };
410
+ processAnnotation(carrier, itemSem, state);
394
411
  }
395
- }
396
412
 
397
- // Apply list context types
398
- if (state.listContext?.types.length > 0 && ann.subject) {
399
- state.listContext.types.forEach(type => {
400
- emitQuad(state.quads, state.origin.quadIndex, 'list-item',
401
- state.currentSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
402
- state.df.namedNode(expandIRI(type, state.ctx)), state.df);
413
+ // Process inline carriers' attributes
414
+ carriers.forEach(carrier => {
415
+ if (carrier.attrs) {
416
+ const itemSem = parseSemanticBlock(carrier.attrs);
417
+ processAnnotation(carrier, itemSem, state);
418
+ }
403
419
  });
404
- }
405
420
 
406
- state.currentSubject = originalSubject;
421
+ state.currentSubject = prevSubject;
422
+ });
407
423
  }
408
424
 
409
- // Main parsing function
410
425
  export function parse(text, options = {}) {
411
426
  const state = {
412
427
  ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
413
428
  df: options.dataFactory || DataFactory,
414
429
  quads: [],
415
430
  origin: { blocks: new Map(), quadIndex: new Map() },
416
- currentSubject: null,
417
- listContext: null
431
+ currentSubject: null
418
432
  };
419
433
 
420
434
  const tokens = scanTokens(text);
435
+
436
+ // Apply prefix declarations
421
437
  tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
422
438
 
423
439
  for (let i = 0; i < tokens.length; i++) {
424
440
  const token = tokens[i];
425
- const nextToken = tokens[i + 1];
426
441
 
427
- switch (token.type) {
428
- case 'heading':
429
- processAnnotation(token, state, token.text);
430
- break;
431
- case 'code':
432
- processAnnotation(token, state, token.text);
433
- break;
434
- case 'para':
435
- if (setupListContext(token, state, nextToken)) break;
436
- // Regular paragraphs are NOT value carriers per spec
437
- // Only process spans and links within paragraphs
438
- if (state.currentSubject) {
439
- const spans = extractInlineValue(token.text, token.range[0]);
440
-
441
- // Process annotated spans (value carriers)
442
- spans.filter(s => s.type === 'span' && s.attrs)
443
- .forEach(span => processAnnotation(span, state, span.text));
444
-
445
- // Process spans where paragraph has annotation
446
- if (token.attrs) {
447
- spans.filter(s => s.type === 'span')
448
- .forEach(span => {
449
- // Attach paragraph's annotation to the span
450
- const spanWithAttrs = { ...span, attrs: token.attrs };
451
- processAnnotation(spanWithAttrs, state, span.text);
452
- });
453
- }
454
-
455
- // Process links (value carriers)
456
- spans.filter(s => s.type === 'link')
457
- .forEach(link => processAnnotation(link, state, link.text));
442
+ if (token.type === 'heading' && token.attrs) {
443
+ const sem = parseSemanticBlock(token.attrs);
444
+ const carrier = { type: 'heading', text: token.text, range: token.range };
445
+ processAnnotation(carrier, sem, state);
446
+ } else if (token.type === 'code' && token.attrs) {
447
+ const sem = parseSemanticBlock(token.attrs);
448
+ const carrier = { type: 'code', text: token.text, range: token.range };
449
+ processAnnotation(carrier, sem, state);
450
+ } else if (token.type === 'blockquote' && token.attrs) {
451
+ const sem = parseSemanticBlock(token.attrs);
452
+ const carrier = { type: 'blockquote', text: token.text, range: token.range };
453
+ processAnnotation(carrier, sem, state);
454
+ } else if (token.type === 'para') {
455
+ // Check for list context
456
+ const followingLists = [];
457
+ let j = i + 1;
458
+ while (j < tokens.length && tokens[j].type === 'list') {
459
+ followingLists.push(tokens[j]);
460
+ j++;
461
+ }
462
+
463
+ // Check if this paragraph ends with {attrs} and is followed by lists
464
+ const contextMatch = token.text.match(/^(.+?)\s*\{([^}]+)\}$/);
465
+ if (contextMatch && followingLists.length > 0) {
466
+ // This is a list context annotation
467
+ const contextSem = parseSemanticBlock(`{${contextMatch[2]}}`);
468
+ processListContext(contextSem, followingLists, state);
469
+ i = j - 1;
470
+ continue;
471
+ }
472
+
473
+ // Process inline carriers
474
+ const carriers = extractInlineCarriers(token.text, token.range[0]);
475
+ carriers.forEach(carrier => {
476
+ if (carrier.attrs) {
477
+ const sem = parseSemanticBlock(carrier.attrs);
478
+ processAnnotation(carrier, sem, state);
458
479
  }
459
- break;
460
- case 'list':
461
- if (state.listContext) processListItem(token, state);
462
- break;
463
- case 'blockquote':
464
- if (state.currentSubject) processAnnotation(token, state, token.text);
465
- break;
480
+ });
466
481
  }
467
482
  }
468
483
 
@@ -472,19 +487,16 @@ export function parse(text, options = {}) {
472
487
  function shortenIRI(iri, ctx) {
473
488
  if (!iri || !iri.startsWith('http')) return iri;
474
489
 
475
- // Check @vocab first
476
490
  if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) {
477
491
  return iri.substring(ctx['@vocab'].length);
478
492
  }
479
493
 
480
- // Check prefixes
481
494
  for (const [prefix, namespace] of Object.entries(ctx)) {
482
495
  if (prefix !== '@vocab' && iri.startsWith(namespace)) {
483
496
  return prefix + ':' + iri.substring(namespace.length);
484
497
  }
485
498
  }
486
499
 
487
- // No prefix found, return full IRI
488
500
  return iri;
489
501
  }
490
502
 
@@ -497,7 +509,12 @@ export function serialize({ text, diff, origin, options = {} }) {
497
509
 
498
510
  if (diff.delete) {
499
511
  diff.delete.forEach(quad => {
500
- const key = JSON.stringify([quad.subject.value, quad.predicate.value, quad.object.value]);
512
+ if (!quad || !quad.subject) return;
513
+ const key = JSON.stringify([
514
+ quad.subject.value,
515
+ quad.predicate.value,
516
+ quad.object.termType === 'Literal' ? quad.object.value : quad.object.value
517
+ ]);
501
518
  const blockId = origin?.quadIndex.get(key);
502
519
  if (!blockId) return;
503
520
 
@@ -507,7 +524,7 @@ export function serialize({ text, diff, origin, options = {} }) {
507
524
  const start = block.range.start;
508
525
  const end = block.range.end;
509
526
  const before = text.substring(Math.max(0, start - 1), start);
510
- const after = text.substring(end, end + 1);
527
+ const after = text.substring(end, Math.min(end + 1, text.length));
511
528
  const deleteStart = before === '\n' ? start - 1 : start;
512
529
  const deleteEnd = after === '\n' ? end + 1 : end;
513
530
 
@@ -536,7 +553,6 @@ export function serialize({ text, diff, origin, options = {} }) {
536
553
  }
537
554
 
538
555
  const newLine = `\n[${objText}] {${pred}}`;
539
-
540
556
  edits.push({ start: insertPos, end: insertPos, text: newLine });
541
557
  });
542
558
  }
@@ -549,4 +565,4 @@ export function serialize({ text, diff, origin, options = {} }) {
549
565
  return { text: result, origin };
550
566
  }
551
567
 
552
- export default { parse, serialize, parseAnnotation };
568
+ export default { parse, serialize, parseSemanticBlock };