mdld-parse 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,10 +22,10 @@ Energy level: [8] {my:energyLevel ^^xsd:integer}
22
22
 
23
23
  Met [Sam] {+my:sam .my:Person ?my:attendee} on my regular walk at [Central Park] {+my:central-park ?my:location .my:Place label @en} and talked about [Sunny] {my:weather} weather.
24
24
 
25
- Activities: {?my:hasActivity .my:Activity label}
25
+ Activities:
26
26
 
27
- - Walking {=#walking}
28
- - Reading {=#reading}
27
+ - **Walking** {+ex:walking ?my:hasActivity .my:Activity label}
28
+ - **Reading** {+ex:reading ?my:hasActivity .my:Activity label}
29
29
 
30
30
  ```
31
31
 
@@ -59,7 +59,7 @@ my:central-park a my:Place;
59
59
 
60
60
  ```
61
61
 
62
- Read the [FULL SPEC](./docs/Spec/Spec.md).
62
+ Read the [FULL SPEC](./spec/Spec.md).
63
63
 
64
64
  ## Core Features
65
65
 
@@ -69,7 +69,6 @@ Read the [FULL SPEC](./docs/Spec/Spec.md).
69
69
  - **Four predicate forms**: `p` (S→L), `?p` (S→O), `!p` (O→S)
70
70
  - **Type declarations**: `.Class` for rdf:type triples
71
71
  - **Datatypes & language**: `^^xsd:date` and `@en` support
72
- - **Lists**: Explicit subject declarations and numbered ordered lists with `rdf:List` support
73
72
  - **Fragments**: Built-in document structuring with `{=#fragment}`
74
73
  - **Round-trip serialization**: Markdown ↔ RDF ↔ Markdown preserves structure
75
74
 
@@ -213,14 +212,15 @@ ex:armstrong a prov:Person .
213
212
 
214
213
  ### Lists
215
214
 
216
- Lists require explicit subjects per item.
215
+ Lists are pure Markdown structure. Each list item requires explicit annotations:
217
216
 
218
217
  ```markdown
219
218
  # Recipe {=ex:recipe}
220
219
 
221
- Ingredients: {?ex:ingredient .ex:Ingredient}
222
- - Flour {=ex:flour label}
223
- - Water {=ex:water label}
220
+ Ingredients:
221
+
222
+ - **Flour** {+ex:flour ?ex:ingredient .ex:Ingredient label}
223
+ - **Water** {+ex:water ?ex:ingredient .ex:Ingredient label}
224
224
  ```
225
225
 
226
226
  ```turtle
@@ -229,6 +229,11 @@ ex:flour a ex:Ingredient ; rdfs:label "Flour" .
229
229
  ex:water a ex:Ingredient ; rdfs:label "Water" .
230
230
  ```
231
231
 
232
+ **Key Rules:**
233
+ - No semantic propagation from list scope
234
+ - Each item must have explicit annotations
235
+ - Use `+IRI` to maintain subject chaining for repeated object properties
236
+
232
237
  ### Code Blocks
233
238
 
234
239
  Code blocks are value carriers:
@@ -503,7 +508,7 @@ Only specific markdown elements can carry semantic values:
503
508
 
504
509
  **Block:**
505
510
  - Headings (`# Title`)
506
- - List items (`- item`, `1. item`) (single-level)
511
+ - List items (`- item`, `1. item`) — pure Markdown structure
507
512
  - Blockquotes (`> quote`)
508
513
  - Code blocks (` ```lang `)
509
514
 
@@ -579,14 +584,14 @@ Therefore, the algebra is **closed**.
579
584
 
580
585
  # Meeting Notes {=alice:meeting-2024-01-15 .alice:Meeting}
581
586
 
582
- Attendees: {?alice:attendee label}
587
+ Attendees:
583
588
 
584
- - Alice {=alice:alice}
585
- - Bob {=alice:bob}
589
+ - **Alice** {+alice:alice ?alice:attendee label}
590
+ - **Bob** {+alice:bob ?alice:attendee label}
586
591
 
587
- Action items: {?alice:actionItem label}
592
+ Action items:
588
593
 
589
- - Review proposal {=alice:task-1}
594
+ - **Review proposal** {+alice:task-1 ?alice:actionItem label}
590
595
  ```
591
596
 
592
597
  ### Developer Documentation
@@ -630,7 +635,7 @@ Tests validate:
630
635
  - Subject declaration and context
631
636
  - All predicate forms (p, ?p, !p)
632
637
  - Datatypes and language tags
633
- - List processing
638
+ - Explicit list item annotations
634
639
  - Code blocks and blockquotes
635
640
  - Round-trip serialization
636
641
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.5.5",
3
+ "version": "0.6.0",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
package/src/applyDiff.js CHANGED
@@ -15,25 +15,93 @@ import {
15
15
  addSoftFragmentToken,
16
16
  removeSoftFragmentToken,
17
17
  objectSignature,
18
- expandIRI
18
+ expandIRI,
19
+ DataFactory
19
20
  } from './utils.js';
20
21
 
21
22
  function getBlockById(base, blockId) {
22
- return blockId ? base?.blocks?.get(blockId) : null;
23
+ return blockId ? base?.quadMap?.get(blockId) : null;
23
24
  }
24
25
 
25
26
  function getEntryByQuadKey(base, quadKey) {
26
- return quadKey ? base?.quadIndex?.get(quadKey) : null;
27
+ return quadKey ? base?.quadMap?.get(quadKey) : null;
28
+ }
29
+
30
+ // Helper functions for cleaner term type checking
31
+ function isLiteral(term) {
32
+ return term?.termType === 'Literal';
33
+ }
34
+
35
+ function isNamedNode(term) {
36
+ return term?.termType === 'NamedNode';
37
+ }
38
+
39
+ function isRdfType(term) {
40
+ return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
41
+ }
42
+
43
+ function createAnnotationForQuad(quad, ctx) {
44
+ const predShort = shortenIRI(quad.predicate.value, ctx);
45
+ if (isLiteral(quad.object)) {
46
+ const value = String(quad.object.value ?? '');
47
+ const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
48
+ return { text: `[${value}] {${ann}}`, isLiteral: true };
49
+ } else if (isNamedNode(quad.object)) {
50
+ const objectShort = shortenIRI(quad.object.value, ctx);
51
+ const objectAnn = createObjectAnnotation(objectShort, predShort);
52
+ return { text: objectAnn, isLiteral: false };
53
+ }
54
+ return null;
55
+ }
56
+
57
+ function createSubjectBlockForQuad(quad, ctx) {
58
+ const subjectShort = shortenIRI(quad.subject.value, ctx);
59
+ const predShort = shortenIRI(quad.predicate.value, ctx);
60
+ const subjectName = extractLocalName(quad.subject.value);
61
+
62
+ if (isNamedNode(quad.object)) {
63
+ // IRI object: create object reference
64
+ const objectShort = shortenIRI(quad.object.value, ctx);
65
+ return { text: `\n\n# ${subjectName.charAt(0).toUpperCase() + subjectName.slice(1)} {=${subjectShort}}\n[${objectShort}] {${predShort}}\n`, isNewSubject: true };
66
+ } else {
67
+ // Literal object: create property on separate line
68
+ const value = String(quad.object.value ?? '');
69
+ const annotation = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
70
+ return { text: `\n\n# ${subjectName.charAt(0).toUpperCase() + subjectName.slice(1)} {=${subjectShort}}\n[${value}] {${annotation}}\n`, isNewSubject: true };
71
+ }
72
+ }
73
+
74
+ function extractLocalName(iri) {
75
+ return iri.split('/').pop() || iri.split('#').pop() || iri;
27
76
  }
28
77
 
29
78
  function isValidQuad(quad) {
30
79
  return quad && quad.subject && quad.predicate && quad.object;
31
80
  }
32
81
 
82
+ function normalizeDiffQuads(quads, ctx) {
83
+ // Use DataFactory.fromQuad for proper RDF/JS compatibility
84
+ // But first expand any CURIEs in the quads to ensure proper matching
85
+ return quads.map(quad => {
86
+ // Expand CURIEs to full IRIs before normalization
87
+ const expandedQuad = {
88
+ subject: quad.subject.termType === 'NamedNode'
89
+ ? { ...quad.subject, value: expandIRI(quad.subject.value, ctx) }
90
+ : quad.subject,
91
+ predicate: quad.predicate.termType === 'NamedNode'
92
+ ? { ...quad.predicate, value: expandIRI(quad.predicate.value, ctx) }
93
+ : quad.predicate,
94
+ object: quad.object,
95
+ graph: quad.graph
96
+ };
97
+ return DataFactory.fromQuad(expandedQuad);
98
+ }).filter(isValidQuad);
99
+ }
100
+
33
101
  function createLiteralAnnotation(value, predicate, language, datatype, ctx) {
34
102
  let ann = predicate;
35
103
  if (language) ann += ` @${language}`;
36
- else if (datatype?.value && datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
104
+ else if (datatype?.value && datatype.value !== DataFactory.literal('').datatype.value) {
37
105
  ann += ` ^^${shortenIRI(datatype.value, ctx)}`;
38
106
  }
39
107
  return ann;
@@ -126,23 +194,24 @@ function removeTokenFromSlot(entry, tokens, ctx, quad) {
126
194
  }
127
195
 
128
196
  function addTokenToSlot(tokens, ctx, quad) {
129
- if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
197
+ // Use cleaner helper functions
198
+ if (isRdfType(quad.predicate) && isNamedNode(quad.object)) {
130
199
  const typeShort = shortenIRI(quad.object.value, ctx);
131
200
  const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
132
201
  if (typeToken && !tokens.includes(typeToken)) {
133
202
  return [...tokens, typeToken];
134
203
  }
135
- } else if (quad.object.termType === 'NamedNode') {
204
+ } else if (isNamedNode(quad.object)) {
136
205
  const objectShort = shortenIRI(quad.object.value, ctx);
137
206
  const isSoftFragment = quad.object.value.includes('#');
138
207
  const fragment = isSoftFragment ? quad.object.value.split('#')[1] : null;
139
208
 
140
- if (isSoftFragment) {
141
- return addSoftFragmentToken(tokens, fragment);
209
+ if (fragment) {
210
+ return addSoftFragmentToken(tokens, objectShort, fragment);
142
211
  } else {
143
212
  return addObjectToken(tokens, objectShort);
144
213
  }
145
- } else if (quad.object.termType === 'Literal') {
214
+ } else if (isLiteral(quad.object)) {
146
215
  const predShort = shortenIRI(quad.predicate.value, ctx);
147
216
  if (!tokens.includes(predShort)) {
148
217
  return [...tokens, predShort];
@@ -179,9 +248,9 @@ export function applyDiff({ text, diff, origin, options = {} }) {
179
248
 
180
249
 
181
250
  function planOperations(diff, base, ctx) {
182
- // Normalize quads once
183
- const normAdds = (diff.add || []).map(normalizeQuad).filter(isValidQuad);
184
- const normDeletes = (diff.delete || []).map(normalizeQuad).filter(isValidQuad);
251
+ // Normalize quads using DataFactory for proper RDF/JS compatibility
252
+ const normAdds = normalizeDiffQuads(diff.add || [], ctx);
253
+ const normDeletes = normalizeDiffQuads(diff.delete || [], ctx);
185
254
 
186
255
  const plan = {
187
256
  literalUpdates: [],
@@ -206,8 +275,7 @@ function planOperations(diff, base, ctx) {
206
275
  const key = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
207
276
  const quadKey = quadToKeyForOrigin(quad);
208
277
  const entry = getEntryByQuadKey(base, quadKey);
209
- const blockId = entry?.blockId || entry;
210
- const block = getBlockById(base, blockId);
278
+ const block = entry; // In unified structure, entry is the block
211
279
  if (block?.attrsRange) {
212
280
  anchors.set(key, { block, entry });
213
281
  }
@@ -215,18 +283,18 @@ function planOperations(diff, base, ctx) {
215
283
 
216
284
  // Detect literal updates early
217
285
  for (const deleteQuad of normDeletes) {
218
- if (deleteQuad.object.termType !== 'Literal') continue;
286
+ if (!isLiteral(deleteQuad.object)) continue;
219
287
 
220
288
  const k = JSON.stringify([deleteQuad.subject.value, deleteQuad.predicate.value]);
221
289
  const candidates = addBySP.get(k) || [];
222
290
  const addQuad = candidates.find(x =>
223
- x?.object?.termType === 'Literal' && !plan.consumedAdds.has(quadToKeyForOrigin(x))
291
+ isLiteral(x?.object) && !plan.consumedAdds.has(quadToKeyForOrigin(x))
224
292
  );
225
293
 
226
294
  if (!addQuad) continue;
227
295
 
228
296
  const entry = resolveOriginEntry(deleteQuad, base);
229
- const block = entry ? getBlockById(base, entry.blockId || entry) : null;
297
+ const block = entry; // In unified structure, the entry is the block
230
298
 
231
299
  if (block) {
232
300
  plan.literalUpdates.push({ deleteQuad, addQuad, entry, block });
@@ -236,13 +304,13 @@ function planOperations(diff, base, ctx) {
236
304
 
237
305
  // Find vacant slot occupations
238
306
  for (const quad of normAdds) {
239
- if (quad.object.termType !== 'Literal') continue;
307
+ if (!isLiteral(quad.object)) continue;
240
308
  if (plan.consumedAdds.has(quadToKeyForOrigin(quad))) continue;
241
309
 
242
- const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
310
+ const vacantSlot = findVacantSlot(base?.quadMap, quad.subject, quad.predicate);
243
311
  if (!vacantSlot) continue;
244
312
 
245
- const block = base?.blocks?.get(vacantSlot.blockId);
313
+ const block = vacantSlot; // In unified structure, the slot is the block
246
314
  if (block) {
247
315
  plan.vacantSlotOccupations.push({ quad, vacantSlot, block });
248
316
  plan.consumedAdds.add(quadToKeyForOrigin(quad));
@@ -251,7 +319,7 @@ function planOperations(diff, base, ctx) {
251
319
 
252
320
  // Plan remaining deletes
253
321
  for (const quad of normDeletes) {
254
- if (quad.object.termType === 'Literal') {
322
+ if (isLiteral(quad.object)) {
255
323
  const isUpdated = plan.literalUpdates.some(u =>
256
324
  u.deleteQuad.subject.value === quad.subject.value &&
257
325
  u.deleteQuad.predicate.value === quad.predicate.value &&
@@ -261,7 +329,7 @@ function planOperations(diff, base, ctx) {
261
329
  }
262
330
 
263
331
  const entry = resolveOriginEntry(quad, base);
264
- const block = entry ? getBlockById(base, entry.blockId || entry) : null;
332
+ const block = entry; // In unified structure, entry is the block
265
333
  if (block) {
266
334
  plan.deletes.push({ quad, entry, block });
267
335
  }
@@ -348,7 +416,7 @@ function materializeEdits(plan, text, ctx, base) {
348
416
  };
349
417
  vacantSlot.blockInfo = blockInfo;
350
418
  const key = quadToKeyForOrigin(quad);
351
- if (key) base.quadIndex.set(key, vacantSlot);
419
+ if (key) base.quadMap.set(key, vacantSlot);
352
420
  }
353
421
 
354
422
  const span = readSpan(block, text, 'attrs');
@@ -382,56 +450,45 @@ function materializeEdits(plan, text, ctx, base) {
382
450
  continue;
383
451
  }
384
452
 
385
- if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
453
+ if (isLiteral(quad.object) || isNamedNode(quad.object)) {
386
454
  if (!targetBlock) {
387
- const predShort = shortenIRI(quad.predicate.value, ctx);
388
- if (quad.object.termType === 'Literal') {
389
- const value = String(quad.object.value ?? '');
390
- const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
391
- edits.push({ start: text.length, end: text.length, text: `\n[${value}] {${ann}}` });
455
+ // No target block - check if subject already exists in document
456
+ const subjectExists = Array.from(base?.quadMap?.values() || [])
457
+ .some(block => block.subject?.value === quad.subject.value);
458
+
459
+ let annotation;
460
+ if (!subjectExists && isNamedNode(quad.object)) {
461
+ // New subject with IRI object - create subject block
462
+ annotation = createSubjectBlockForQuad(quad, ctx);
463
+ } else if (subjectExists) {
464
+ // Existing subject - create simple annotation
465
+ annotation = createAnnotationForQuad(quad, ctx);
392
466
  } else {
393
- const objectShort = shortenIRI(quad.object.value, ctx);
394
- edits.push({ start: text.length, end: text.length, text: createObjectAnnotation(objectShort, predShort) });
467
+ // New subject with literal - create subject block
468
+ annotation = createSubjectBlockForQuad(quad, ctx);
395
469
  }
396
- continue;
397
- }
398
470
 
399
- const span = readSpan(targetBlock, text, 'attrs');
400
- if (!span) continue;
401
-
402
- // Check if this is a subject-only block (like {=ex:order-123})
403
- const tokens = normalizeAttrsTokens(span.text);
404
- const hasSubjectToken = tokens.some(t => t.startsWith('='));
405
- const hasPredicateTokens = tokens.some(t => !t.startsWith('=') && !t.startsWith('.'));
406
-
407
- if (tokens.length === 1 && tokens[0].startsWith('=')) {
408
- // This is a subject-only block, create new annotation
409
- const predShort = shortenIRI(quad.predicate.value, ctx);
410
- if (quad.object.termType === 'Literal') {
411
- const value = String(quad.object.value ?? '');
412
- const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
413
- edits.push({ start: text.length, end: text.length, text: `\n[${value}] {${ann}}` });
414
- } else {
415
- const objectShort = shortenIRI(quad.object.value, ctx);
416
- edits.push({ start: text.length, end: text.length, text: createObjectAnnotation(objectShort, predShort) });
471
+ if (annotation) {
472
+ edits.push({ start: text.length, end: text.length, text: annotation.text });
417
473
  }
418
474
  continue;
419
475
  }
420
476
 
421
- // Normal annotation block, add tokens
422
- const existingTokens = blockTokensFromEntries(targetBlock) || tokens;
423
- let updated = addTokenToSlot(existingTokens, ctx, quad);
477
+ // Insert annotation after target block's range
478
+ const annotation = createAnnotationForQuad(quad, ctx);
479
+ if (annotation) {
480
+ // Find the end of the target block's content, not just its range
481
+ const targetBlockEnd = targetBlock.range.end;
482
+ let insertPos = targetBlockEnd;
424
483
 
425
- // For literal predicates with datatypes, we need to add datatype token too
426
- if (quad.object.termType === 'Literal' && quad.object.datatype && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
427
- const datatypeToken = `^^${shortenIRI(quad.object.datatype.value, ctx)}`;
428
- if (!updated.includes(datatypeToken)) {
429
- updated = [...updated, datatypeToken];
484
+ // Skip past the target block's content to find the right insertion point
485
+ while (insertPos < text.length && text[insertPos] !== '\n') {
486
+ insertPos++;
430
487
  }
431
- }
432
488
 
433
- if (updated.length !== existingTokens.length) {
434
- edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
489
+ // Insert after the target block's content
490
+ const finalInsertPos = insertPos < text.length ? insertPos : text.length;
491
+ edits.push({ start: finalInsertPos, end: finalInsertPos, text: `\n${annotation.text}` });
435
492
  }
436
493
  }
437
494
  }
@@ -450,7 +507,7 @@ function applyEdits(text, edits, ctx, base) {
450
507
 
451
508
  // Extract vacant slots before reparsing
452
509
  const vacantSlots = new Map();
453
- base?.quadIndex?.forEach((slot, key) => {
510
+ base?.quadMap?.forEach((slot, key) => {
454
511
  if (slot.isVacant) vacantSlots.set(key, slot);
455
512
  });
456
513
 
@@ -458,7 +515,7 @@ function applyEdits(text, edits, ctx, base) {
458
515
 
459
516
  // Merge vacant slots back
460
517
  vacantSlots.forEach((vacantSlot, key) => {
461
- if (!reparsed.origin.blocks.has(vacantSlot.blockId) && vacantSlot.blockInfo) {
518
+ if (!reparsed.origin.quadMap.has(vacantSlot.id) && vacantSlot.blockInfo) {
462
519
  const { blockInfo } = vacantSlot;
463
520
  const emptyBlock = {
464
521
  id: blockInfo.id,
@@ -469,12 +526,11 @@ function applyEdits(text, edits, ctx, base) {
469
526
  subject: blockInfo.subject || '',
470
527
  types: [],
471
528
  predicates: [],
472
- entries: [],
473
529
  context: blockInfo.context || { ...ctx }
474
530
  };
475
- reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
531
+ reparsed.origin.quadMap.set(vacantSlot.id, emptyBlock);
476
532
  }
477
- reparsed.origin.quadIndex.set(key, vacantSlot);
533
+ reparsed.origin.quadMap.set(key, vacantSlot);
478
534
  });
479
535
 
480
536
  return { text: result, origin: reparsed.origin };
@@ -483,11 +539,11 @@ function applyEdits(text, edits, ctx, base) {
483
539
  // Helper functions for origin lookup
484
540
  function resolveOriginEntry(quad, base) {
485
541
  const key = quadToKeyForOrigin(quad);
486
- let entry = key ? base?.quadIndex?.get(key) : null;
542
+ let entry = key ? base?.quadMap?.get(key) : null;
487
543
 
488
- if (!entry && quad.object?.termType === 'Literal') {
544
+ if (!entry && isLiteral(quad.object)) {
489
545
  // Fallback: search by value
490
- for (const [k, e] of base?.quadIndex || []) {
546
+ for (const [k, e] of base?.quadMap || []) {
491
547
  const parsed = parseQuadIndexKey(k);
492
548
  if (parsed && parsed.s === quad.subject.value &&
493
549
  parsed.p === quad.predicate.value &&
@@ -507,12 +563,21 @@ function findTargetBlock(quad, base, anchors) {
507
563
  const anchored = anchors.get(anchorKey);
508
564
  if (anchored?.block) return anchored.block;
509
565
 
510
- // Block affinity: prefer same block, then same subject
511
- for (const [, block] of base?.blocks || []) {
512
- if (block.subject === quad.subject.value && block.attrsRange) {
513
- return block;
514
- }
566
+ // Find the best position within the subject's section
567
+ // Look for blocks with the same subject and sort by position
568
+ const subjectBlocks = Array.from(base?.quadMap?.values() || [])
569
+ .filter(block => block.subject?.value === quad.subject.value)
570
+ .sort((a, b) => a.range.start - b.range.start);
571
+
572
+ if (subjectBlocks.length === 0) return null;
573
+
574
+ // Strategy: Find the last block with attrsRange to maintain consistency
575
+ // For identical subject blocks, prefer the first one to avoid creating duplicates
576
+ const blocksWithAttrs = subjectBlocks.filter(block => block.attrsRange);
577
+ if (blocksWithAttrs.length > 0) {
578
+ return blocksWithAttrs[blocksWithAttrs.length - 1]; // Return last matching block
515
579
  }
516
580
 
517
- return null;
581
+ // Fallback: return the last block in the subject's section
582
+ return subjectBlocks[subjectBlocks.length - 1];
518
583
  }