mdld-parse 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +24 -9
  2. package/index.js +342 -150
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -1,15 +1,14 @@
1
1
  # MD-LD Parse v0.2
2
2
 
3
- **Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{}` annotations.
3
+ **Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{...}` annotations.
4
4
 
5
5
  [![NPM](https://img.shields.io/npm/v/mdld-parse)](https://www.npmjs.com/package/mdld-parse)
6
- [![License](https://img.shields.io/npm/l/mdld-parse)](https://github.com/mdld-js/mdld-parse)
7
6
 
8
- [Documentation](https://mdld.js.org) | [Specification](https://mdld.js.org/spec) | [Playground](https://mdld.js.org/playground)
7
+ [Documentation](https://mdld.js.org) | [Repository](https://github.com/davay42/mdld-parse) | [Playground](https://mdld.js.org/playground)
9
8
 
10
9
  ## What is MD-LD?
11
10
 
12
- MD-LD allows you to author RDF graphs directly in Markdown using explicit `{}` annotations:
11
+ MD-LD allows you to author RDF graphs directly in Markdown using explicit `{...}` annotations:
13
12
 
14
13
  ```markdown
15
14
  # Apollo 11 {=ex:apollo11 .SpaceMission}
@@ -32,8 +31,8 @@ ex:apollo11 a schema:SpaceMission ;
32
31
 
33
32
  MD-LD v0.2 provides strict semantic guarantees:
34
33
 
35
- 1. **CommonMark-preserving** — Removing `{}` yields valid Markdown
36
- 2. **Explicit semantics** — Every quad originates from explicit `{}`
34
+ 1. **CommonMark-preserving** — Removing `{...}` yields valid Markdown
35
+ 2. **Explicit semantics** — Every quad originates from explicit `{...}`
37
36
  3. **Single-pass parsing** — Streaming-friendly, deterministic
38
37
  4. **No blank nodes** — All subjects are stable IRIs
39
38
  5. **Complete traceability** — Every quad maps to source location
@@ -95,12 +94,28 @@ Each predicate form determines the graph edge:
95
94
 
96
95
  ### Subject Declaration
97
96
 
98
- Set the current subject (emits no quads):
97
+ Set current subject (emits no quads):
99
98
 
100
99
  ```markdown
101
100
  ## Apollo 11 {=ex:apollo11}
102
101
  ```
103
102
 
103
+ #### Fragment Syntax
104
+
105
+ Create fragment IRIs relative to current subject:
106
+
107
+ ```markdown
108
+ # Document {=ex:document}
109
+ {=#summary}
110
+ [Content] {name}
111
+ ```
112
+
113
+ ```turtle
114
+ ex:document#summary schema:name "Content" .
115
+ ```
116
+
117
+ Fragments replace any existing fragment and require a current subject.
118
+
104
119
  Subject remains in scope until reset with `{=}` or new subject declared.
105
120
 
106
121
  ### Type Declaration
@@ -140,7 +155,7 @@ Links create relationships (use `?` prefix):
140
155
  ```markdown
141
156
  # Mission {=ex:apollo11}
142
157
 
143
- [NASA](ex:nasa) {?organizer}
158
+ [NASA] {=ex:nasa ?organizer}
144
159
  ```
145
160
 
146
161
  ```turtle
@@ -294,7 +309,7 @@ Apply RDF changes back to markdown with proper positioning.
294
309
  **Returns:** `{ text, origin }`
295
310
 
296
311
  - `text` — Updated markdown
297
- - `origin` — Updated origin tracking
312
+ - `origin` — Updated origin tracking vacant slots
298
313
 
299
314
  **Example:**
300
315
 
package/index.js CHANGED
@@ -24,6 +24,7 @@ function hash(str) {
24
24
  return Math.abs(h).toString(16).slice(0, 12);
25
25
  }
26
26
 
27
+ // IRI Utilities
27
28
  function expandIRI(term, ctx) {
28
29
  if (term == null) return null;
29
30
  const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
@@ -36,6 +37,21 @@ function expandIRI(term, ctx) {
36
37
  return (ctx['@vocab'] || '') + t;
37
38
  }
38
39
 
40
+ export function shortenIRI(iri, ctx) {
41
+ if (!iri || !iri.startsWith('http')) return iri;
42
+ if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
43
+ for (const [prefix, namespace] of Object.entries(ctx)) {
44
+ if (prefix !== '@vocab' && iri.startsWith(namespace)) {
45
+ return prefix + ':' + iri.substring(namespace.length);
46
+ }
47
+ }
48
+ return iri;
49
+ }
50
+
51
+ function processIRI(term, ctx, operation = 'expand') {
52
+ return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
53
+ }
54
+
39
55
  function parseSemanticBlock(raw) {
40
56
  try {
41
57
  const src = String(raw || '').trim();
@@ -57,6 +73,13 @@ function parseSemanticBlock(raw) {
57
73
  continue;
58
74
  }
59
75
 
76
+ if (token.startsWith('=#')) {
77
+ const fragment = token.substring(2);
78
+ result.subject = `=#${fragment}`;
79
+ result.entries.push({ kind: 'fragment', fragment, relRange: { start: relStart, end: relEnd }, raw: token });
80
+ continue;
81
+ }
82
+
60
83
  if (token.startsWith('=')) {
61
84
  const iri = token.substring(1);
62
85
  result.subject = iri;
@@ -339,6 +362,7 @@ function createBlock(subject, types, predicates, entries, range, attrsRange, val
339
362
  };
340
363
  }
341
364
 
365
+ // Quad Utilities
342
366
  function quadIndexKey(subject, predicate, object) {
343
367
  const objKey = object.termType === 'Literal'
344
368
  ? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
@@ -346,11 +370,97 @@ function quadIndexKey(subject, predicate, object) {
346
370
  return JSON.stringify([subject.value, predicate.value, objKey]);
347
371
  }
348
372
 
373
+ function normalizeQuad(q) {
374
+ if (!q) return null;
375
+ const { subject, predicate, object } = q;
376
+ if (object?.termType === 'Literal') {
377
+ const language = typeof object.language === 'string' ? object.language : '';
378
+ const datatype = object.datatype?.value || 'http://www.w3.org/2001/XMLSchema#string';
379
+ return { ...q, subject, predicate, object: { ...object, language, datatype } };
380
+ }
381
+ return { ...q, subject, predicate, object };
382
+ }
383
+
384
+ function objectSignature(o) {
385
+ if (!o) return '';
386
+ if (o.termType === 'Literal') {
387
+ return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
388
+ }
389
+ return JSON.stringify({ t: o.termType, v: o.value });
390
+ }
391
+
392
+ function quadToKeyForOrigin(q) {
393
+ const nq = normalizeQuad(q);
394
+ return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
395
+ }
396
+
397
+ function parseQuadIndexKey(key) {
398
+ try {
399
+ const [s, p, objKey] = JSON.parse(key);
400
+ return { s, p, o: JSON.parse(objKey) };
401
+ } catch {
402
+ return null;
403
+ }
404
+ }
405
+
406
+ // Semantic Slot Utilities
407
+ function createSemanticSlotId(subject, predicate) {
408
+ return hash(`${subject.value}|${predicate.value}`);
409
+ }
410
+
411
+ function createSlotInfo(blockId, entryIndex, meta = {}) {
412
+ const slotId = meta.subject && meta.predicate ? createSemanticSlotId(meta.subject, meta.predicate) : null;
413
+ return {
414
+ blockId,
415
+ entryIndex,
416
+ slotId,
417
+ isVacant: false,
418
+ lastValue: null,
419
+ vacantSince: null,
420
+ ...meta
421
+ };
422
+ }
423
+
424
+ function markSlotAsVacant(slotInfo, deletedValue) {
425
+ if (!slotInfo) return null;
426
+ return {
427
+ ...slotInfo,
428
+ isVacant: true,
429
+ lastValue: deletedValue,
430
+ vacantSince: Date.now()
431
+ };
432
+ }
433
+
434
+ function findVacantSlot(quadIndex, subject, predicate) {
435
+ const targetSlotId = createSemanticSlotId(subject, predicate);
436
+ return Array.from(quadIndex.values())
437
+ .find(slot => slot.slotId === targetSlotId && slot.isVacant);
438
+ }
439
+
440
+ function occupySlot(slotInfo, newValue) {
441
+ if (!slotInfo || !slotInfo.isVacant) return null;
442
+ return {
443
+ ...slotInfo,
444
+ isVacant: false,
445
+ lastValue: newValue,
446
+ vacantSince: null
447
+ };
448
+ }
449
+
349
450
  function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
350
451
  if (!subject || !predicate || !object) return;
351
452
  const quad = dataFactory.quad(subject, predicate, object);
352
453
  quads.push(quad);
353
- quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), meta ? { blockId, ...meta } : { blockId });
454
+
455
+ // Create enhanced slot info with semantic slot tracking
456
+ const slotInfo = createSlotInfo(blockId, meta?.entryIndex, {
457
+ ...meta,
458
+ subject,
459
+ predicate,
460
+ object
461
+ });
462
+
463
+ quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), slotInfo);
354
464
  }
355
465
 
356
466
  function createLiteral(value, datatype, language, context, dataFactory) {
@@ -366,7 +476,22 @@ function processAnnotation(carrier, sem, state) {
366
476
  }
367
477
 
368
478
  const previousSubject = state.currentSubject;
369
- let newSubject = sem.subject ? state.df.namedNode(expandIRI(sem.subject, state.ctx)) : null;
479
+ let newSubject = null;
480
+
481
+ if (sem.subject) {
482
+ if (sem.subject.startsWith('=#')) {
483
+ // Handle fragment syntax
484
+ const fragment = sem.subject.substring(2);
485
+ if (state.currentSubject) {
486
+ // Replace any existing fragment in current subject
487
+ const baseIRI = state.currentSubject.value.split('#')[0];
488
+ newSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
489
+ }
490
+ } else {
491
+ // Regular IRI
492
+ newSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
493
+ }
494
+ }
370
495
  if (newSubject) state.currentSubject = newSubject;
371
496
 
372
497
  const S = state.currentSubject;
@@ -419,7 +544,16 @@ function processListContext(contextSem, listTokens, state, contextSubject = null
419
544
  if (listToken.attrs) {
420
545
  const itemSem = parseSemanticBlock(listToken.attrs);
421
546
  if (itemSem.subject && itemSem.subject !== 'RESET') {
422
- itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
547
+ if (itemSem.subject.startsWith('=#')) {
548
+ // Handle fragment syntax in list items
549
+ const fragment = itemSem.subject.substring(2);
550
+ if (state.currentSubject) {
551
+ const baseIRI = state.currentSubject.value.split('#')[0];
552
+ itemSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
553
+ }
554
+ } else {
555
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
556
+ }
423
557
  itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
424
558
  }
425
559
  }
@@ -429,7 +563,16 @@ function processListContext(contextSem, listTokens, state, contextSubject = null
429
563
  if (carrier.attrs) {
430
564
  const itemSem = parseSemanticBlock(carrier.attrs);
431
565
  if (itemSem.subject && itemSem.subject !== 'RESET') {
432
- itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
566
+ if (itemSem.subject.startsWith('=#')) {
567
+ // Handle fragment syntax in inline carriers
568
+ const fragment = itemSem.subject.substring(2);
569
+ if (state.currentSubject) {
570
+ const baseIRI = state.currentSubject.value.split('#')[0];
571
+ itemSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
572
+ }
573
+ } else {
574
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
575
+ }
433
576
  itemSubjectCarrier = carrier;
434
577
  break;
435
578
  }
@@ -552,98 +695,69 @@ export function parse(text, options = {}) {
552
695
  return { quads: state.quads, origin: state.origin, context: state.ctx };
553
696
  }
554
697
 
555
- export function shortenIRI(iri, ctx) {
556
- if (!iri || !iri.startsWith('http')) return iri;
557
- if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
558
- for (const [prefix, namespace] of Object.entries(ctx)) {
559
- if (prefix !== '@vocab' && iri.startsWith(namespace)) {
560
- return prefix + ':' + iri.substring(namespace.length);
561
- }
562
- }
563
- return iri;
564
- }
565
-
566
- const serializeHelpers = {
567
- readAttrsSpan: (block, text) => {
568
- if (!block?.attrsRange) return null;
569
- const { start, end } = block.attrsRange;
570
- return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end > start)
571
- ? { start, end, text: text.substring(start, end) }
572
- : null;
573
- },
574
-
575
- readValueSpan: (block, text) => {
576
- if (!block?.valueRange) return null;
577
- const { start, end } = block.valueRange;
578
- return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
579
- ? { start, end, text: text.substring(start, end) }
580
- : null;
581
- },
582
-
583
- normalizeAttrsTokens: (attrsText) => {
584
- const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
585
- return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
586
- },
587
698
 
588
- blockTokensFromEntries: (block) => block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null,
699
+ // Text Processing Utilities
700
+ function readSpan(block, text, spanType = 'attrs') {
701
+ const range = spanType === 'attrs' ? block?.attrsRange : block?.valueRange;
702
+ if (!range) return null;
703
+ const { start, end } = range;
704
+ return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
705
+ ? { start, end, text: text.substring(start, end) }
706
+ : null;
707
+ }
589
708
 
590
- removeEntryAt: (block, entryIndex) => {
591
- if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
592
- return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
593
- },
709
+ function normalizeAttrsTokens(attrsText) {
710
+ const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
711
+ return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
712
+ }
594
713
 
595
- replaceLangDatatypeEntries: (block, lit, ctx) => {
596
- if (!block?.entries) return null;
597
- const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
598
- const extras = [];
599
- if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
600
- const dt = lit?.datatype?.value;
601
- if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
602
- extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
603
- }
604
- return [...filtered, ...extras];
605
- },
714
+ function writeAttrsTokens(tokens) {
715
+ return `{${tokens.join(' ').trim()}}`;
716
+ }
606
717
 
607
- writeAttrsTokens: (tokens) => `{${tokens.join(' ').trim()}}`,
718
+ function removeOneToken(tokens, matchFn) {
719
+ const i = tokens.findIndex(matchFn);
720
+ return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
721
+ }
608
722
 
609
- removeOneToken: (tokens, matchFn) => {
610
- const i = tokens.findIndex(matchFn);
611
- return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
612
- },
723
+ function sanitizeCarrierValueForBlock(block, raw) {
724
+ const s = String(raw ?? '');
725
+ const t = block?.carrierType;
726
+ if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
727
+ const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
728
+ return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
729
+ }
613
730
 
614
- normalizeQuad: (q) => {
615
- if (!q) return null;
616
- const { subject, predicate, object } = q;
617
- if (object?.termType === 'Literal') {
618
- const language = typeof object.language === 'string' ? object.language : '';
619
- const datatype = object.datatype?.value || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' };
620
- return { ...q, subject, predicate, object: { ...object, language, datatype } };
621
- }
622
- return { ...q, subject, predicate, object };
623
- },
731
+ function blockTokensFromEntries(block) {
732
+ return block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null;
733
+ }
624
734
 
625
- quadToKeyForOrigin: (q) => {
626
- const nq = serializeHelpers.normalizeQuad(q);
627
- return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
628
- },
735
+ function removeEntryAt(block, entryIndex) {
736
+ if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
737
+ return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
738
+ }
629
739
 
630
- parseQuadIndexKey: (key) => {
631
- try {
632
- const [s, p, objKey] = JSON.parse(key);
633
- return { s, p, o: JSON.parse(objKey) };
634
- } catch {
635
- return null;
636
- }
637
- },
740
+ function replaceLangDatatypeEntries(block, lit, ctx) {
741
+ if (!block?.entries) return null;
742
+ const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
743
+ const extras = [];
744
+ if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
745
+ const dt = lit?.datatype?.value;
746
+ if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
747
+ extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
748
+ }
749
+ return [...filtered, ...extras];
750
+ }
638
751
 
639
- sanitizeCarrierValueForBlock: (block, raw) => {
640
- const s = String(raw ?? '');
641
- const t = block?.carrierType;
642
- if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
643
- const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
644
- return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
752
+ function updateAttrsDatatypeLang(tokens, newLit, ctx) {
753
+ const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
754
+ if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
755
+ const dt = newLit?.datatype?.value;
756
+ if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
757
+ return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
645
758
  }
646
- };
759
+ return predicatesAndTypes;
760
+ }
647
761
 
648
762
  export function serialize({ text, diff, origin, options = {} }) {
649
763
  if (!diff || (!diff.add?.length && !diff.delete?.length)) {
@@ -658,12 +772,11 @@ export function serialize({ text, diff, origin, options = {} }) {
658
772
 
659
773
  const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
660
774
  for (const [k, entry] of base?.quadIndex || []) {
661
- const parsed = serializeHelpers.parseQuadIndexKey(k);
775
+ const parsed = parseQuadIndexKey(k);
662
776
  if (!parsed) continue;
663
777
  if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
664
778
  if (parsed.o?.t !== 'Literal') continue;
665
- if (parsed.o?.v !== literalValue) continue;
666
- return entry;
779
+ if (parsed.o?.v === literalValue) return entry;
667
780
  }
668
781
  return null;
669
782
  };
@@ -671,7 +784,7 @@ export function serialize({ text, diff, origin, options = {} }) {
671
784
  const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
672
785
  const out = [];
673
786
  for (const [k, entry] of base?.quadIndex || []) {
674
- const parsed = serializeHelpers.parseQuadIndexKey(k);
787
+ const parsed = parseQuadIndexKey(k);
675
788
  if (!parsed) continue;
676
789
  if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
677
790
  if (parsed.o?.t !== 'Literal') continue;
@@ -682,21 +795,13 @@ export function serialize({ text, diff, origin, options = {} }) {
682
795
  return out;
683
796
  };
684
797
 
685
- const objectSignature = (o) => {
686
- if (!o) return '';
687
- if (o.termType === 'Literal') {
688
- return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
689
- }
690
- return JSON.stringify({ t: o.termType, v: o.value });
691
- };
692
-
693
798
  const anchors = new Map();
694
799
  for (const q0 of diff.delete || []) {
695
- const q = serializeHelpers.normalizeQuad(q0);
800
+ const q = normalizeQuad(q0);
696
801
  if (!q) continue;
697
802
  if (!q?.subject || !q?.object || !q?.predicate) continue;
698
803
  const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
699
- const qk = serializeHelpers.quadToKeyForOrigin(q);
804
+ const qk = quadToKeyForOrigin(q);
700
805
  const entry = qk ? base?.quadIndex?.get(qk) : null;
701
806
  const blockId = entry?.blockId || entry;
702
807
  const block = blockId ? base?.blocks?.get(blockId) : null;
@@ -706,7 +811,7 @@ export function serialize({ text, diff, origin, options = {} }) {
706
811
 
707
812
  const addBySP = new Map();
708
813
  for (const q0 of diff.add || []) {
709
- const q = serializeHelpers.normalizeQuad(q0);
814
+ const q = normalizeQuad(q0);
710
815
  if (!q) continue;
711
816
  if (!q?.subject || !q?.predicate || !q?.object) continue;
712
817
  const k = JSON.stringify([q.subject.value, q.predicate.value]);
@@ -718,16 +823,16 @@ export function serialize({ text, diff, origin, options = {} }) {
718
823
  const consumedAdds = new Set();
719
824
  const literalUpdates = [];
720
825
  for (const dq0 of diff.delete || []) {
721
- const dq = serializeHelpers.normalizeQuad(dq0);
826
+ const dq = normalizeQuad(dq0);
722
827
  if (!dq) continue;
723
828
  if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
724
829
  if (dq.object.termType !== 'Literal') continue;
725
830
  const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
726
831
  const candidates = addBySP.get(k) || [];
727
- const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(serializeHelpers.quadToKeyForOrigin(x)));
832
+ const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(quadToKeyForOrigin(x)));
728
833
  if (!aq) continue;
729
834
 
730
- const dqk = serializeHelpers.quadToKeyForOrigin(dq);
835
+ const dqk = quadToKeyForOrigin(dq);
731
836
  let entry = dqk ? base?.quadIndex?.get(dqk) : null;
732
837
  if (!entry && dq.object?.termType === 'Literal') {
733
838
  entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
@@ -737,13 +842,49 @@ export function serialize({ text, diff, origin, options = {} }) {
737
842
  if (!block) continue;
738
843
 
739
844
  literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
740
- consumedAdds.add(serializeHelpers.quadToKeyForOrigin(aq));
845
+ consumedAdds.add(quadToKeyForOrigin(aq));
741
846
  }
742
847
 
743
848
  for (const q0 of diff.add || []) {
744
- const quad = serializeHelpers.normalizeQuad(q0);
849
+ const quad = normalizeQuad(q0);
745
850
  if (!quad || quad.object?.termType !== 'Literal') continue;
746
- if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) continue;
851
+ if (consumedAdds.has(quadToKeyForOrigin(quad))) continue;
852
+
853
+ // Check if there's a vacant slot we can reuse
854
+ const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
855
+ if (!vacantSlot) continue;
856
+
857
+ const block = base?.blocks?.get(vacantSlot.blockId);
858
+ if (!block) continue;
859
+
860
+ const span = readSpan(block, text, 'attrs');
861
+ if (!span) continue;
862
+
863
+ // Occupy the vacant slot and update the annotation
864
+ const occupiedSlot = occupySlot(vacantSlot, quad.object);
865
+ if (!occupiedSlot) continue;
866
+
867
+ // Update the carrier value
868
+ const valueSpan = readSpan(block, text, 'value');
869
+ if (valueSpan) {
870
+ edits.push({ start: valueSpan.start, end: valueSpan.end, text: quad.object.value });
871
+ }
872
+
873
+ // Update the annotation block to restore the predicate token
874
+ const tokens = normalizeAttrsTokens(span.text);
875
+ const predToken = `${vacantSlot.form || ''}${shortenIRI(quad.predicate.value, ctx)}`;
876
+
877
+ // For empty annotation blocks, replace entirely; for non-empty, add if missing
878
+ if (tokens.length === 0) {
879
+ edits.push({ start: span.start, end: span.end, text: `{${predToken}}` });
880
+ } else if (!tokens.includes(predToken)) {
881
+ const updated = [...tokens, predToken];
882
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
883
+ }
884
+
885
+ // Mark as consumed and continue
886
+ consumedAdds.add(quadToKeyForOrigin(quad));
887
+ continue;
747
888
 
748
889
  const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
749
890
  if (matches.length === 0) continue;
@@ -758,18 +899,18 @@ export function serialize({ text, diff, origin, options = {} }) {
758
899
 
759
900
  if (sameLang.length !== 1) continue;
760
901
  const target = sameLang[0].block;
761
- const vSpan = serializeHelpers.readValueSpan(target, text);
902
+ const vSpan = readSpan(target, text, 'value');
762
903
  if (!vSpan) continue;
763
904
 
764
- const newValue = serializeHelpers.sanitizeCarrierValueForBlock(target, quad.object.value);
905
+ const newValue = sanitizeCarrierValueForBlock(target, quad.object.value);
765
906
  edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
766
907
 
767
- const aSpan = serializeHelpers.readAttrsSpan(target, text);
908
+ const aSpan = readSpan(target, text, 'attrs');
768
909
  if (aSpan && target?.entries?.length) {
769
- const nextEntries = serializeHelpers.replaceLangDatatypeEntries(target, quad.object, ctx);
910
+ const nextEntries = replaceLangDatatypeEntries(target, quad.object, ctx);
770
911
  if (nextEntries) {
771
912
  const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
772
- edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
913
+ edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
773
914
  }
774
915
  }
775
916
 
@@ -777,51 +918,42 @@ export function serialize({ text, diff, origin, options = {} }) {
777
918
  }
778
919
 
779
920
  for (const u of literalUpdates) {
780
- const span = serializeHelpers.readValueSpan(u.block, text);
921
+ const span = readSpan(u.block, text, 'value');
781
922
  if (span) {
782
- const newValue = serializeHelpers.sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
923
+ const newValue = sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
783
924
  edits.push({ start: span.start, end: span.end, text: newValue });
784
925
  }
785
926
 
786
- const aSpan = serializeHelpers.readAttrsSpan(u.block, text);
927
+ const aSpan = readSpan(u.block, text, 'attrs');
787
928
  if (aSpan) {
788
929
  if (u.block?.entries?.length) {
789
- const nextEntries = serializeHelpers.replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
930
+ const nextEntries = replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
790
931
  if (nextEntries) {
791
932
  const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
792
933
  if (nextTokens.length === 0) {
793
934
  edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
794
935
  } else {
795
- edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
936
+ edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
796
937
  }
797
938
  }
798
939
  } else {
799
- const tokens = serializeHelpers.normalizeAttrsTokens(aSpan.text);
800
- const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object);
940
+ const tokens = normalizeAttrsTokens(aSpan.text);
941
+ const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object, ctx);
801
942
  if (updated.join(' ') !== tokens.join(' ')) {
802
943
  if (updated.length === 0) {
803
944
  edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
804
945
  } else {
805
- edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(updated) });
946
+ edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(updated) });
806
947
  }
807
948
  }
808
949
  }
809
950
  }
810
951
  }
811
952
 
812
- const updateAttrsDatatypeLang = (tokens, newLit) => {
813
- const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
814
- if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
815
- const dt = newLit?.datatype?.value;
816
- if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
817
- return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
818
- }
819
- return predicatesAndTypes;
820
- };
821
953
 
822
954
  if (diff.delete) {
823
955
  diff.delete.forEach(q0 => {
824
- const quad = serializeHelpers.normalizeQuad(q0);
956
+ const quad = normalizeQuad(q0);
825
957
  if (!quad) return;
826
958
  if (!quad?.subject || !quad?.predicate || !quad?.object) return;
827
959
 
@@ -834,36 +966,60 @@ export function serialize({ text, diff, origin, options = {} }) {
834
966
  if (isUpdated) return;
835
967
  }
836
968
 
837
- const key = serializeHelpers.quadToKeyForOrigin(quad);
969
+ const key = quadToKeyForOrigin(quad);
838
970
  let entry = key ? base?.quadIndex?.get(key) : null;
839
971
  if (!entry && quad.object?.termType === 'Literal') {
840
972
  entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
841
973
  }
974
+
975
+ // Mark the semantic slot as vacant for future reuse
976
+ if (entry && entry.slotId) {
977
+ // Capture block information before marking as vacant
978
+ const block = base?.blocks?.get(entry.blockId);
979
+ const blockInfo = block ? {
980
+ id: entry.blockId,
981
+ range: block.range,
982
+ attrsRange: block.attrsRange,
983
+ valueRange: block.valueRange,
984
+ carrierType: block.carrierType,
985
+ subject: block.subject,
986
+ context: block.context
987
+ } : null;
988
+
989
+ const vacantSlot = markSlotAsVacant(entry, quad.object);
990
+ if (vacantSlot && blockInfo) {
991
+ vacantSlot.blockInfo = blockInfo;
992
+ base.quadIndex.set(key, vacantSlot);
993
+ }
994
+ }
995
+
842
996
  const blockId = entry?.blockId || entry;
843
997
  if (!blockId) return;
998
+
844
999
  const block = base?.blocks?.get(blockId);
845
- const span = serializeHelpers.readAttrsSpan(block, text);
1000
+ if (!block) return;
1001
+
1002
+ const span = readSpan(block, text, 'attrs');
846
1003
  if (!span) return;
847
1004
 
1005
+ // Handle entry removal by index
848
1006
  if (entry?.entryIndex != null && block?.entries?.length) {
849
- const nextEntries = serializeHelpers.removeEntryAt(block, entry.entryIndex);
1007
+ const nextEntries = removeEntryAt(block, entry.entryIndex);
850
1008
  if (!nextEntries) return;
1009
+
851
1010
  const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
852
- if (nextTokens.length === 0) {
853
- edits.push({ start: span.start, end: span.end, text: '{}' });
854
- } else {
855
- edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
856
- }
1011
+ const newText = nextTokens.length === 0 ? '{}' : writeAttrsTokens(nextTokens);
1012
+ edits.push({ start: span.start, end: span.end, text: newText });
857
1013
  return;
858
1014
  }
859
1015
 
860
- const tokens = serializeHelpers.normalizeAttrsTokens(span.text);
1016
+ const tokens = normalizeAttrsTokens(span.text);
861
1017
  let updated = tokens;
862
1018
  let removed = false;
863
1019
 
864
1020
  if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
865
1021
  const expectedType = entry.expandedType || quad.object.value;
866
- ({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
1022
+ ({ tokens: updated, removed } = removeOneToken(tokens, t => {
867
1023
  if (!t.startsWith('.')) return false;
868
1024
  const raw = t.slice(1);
869
1025
  return expandIRI(raw, ctx) === expectedType;
@@ -871,7 +1027,7 @@ export function serialize({ text, diff, origin, options = {} }) {
871
1027
  } else {
872
1028
  const expectedPred = entry?.expandedPredicate || quad.predicate.value;
873
1029
  const expectedForm = entry?.form;
874
- ({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
1030
+ ({ tokens: updated, removed } = removeOneToken(tokens, t => {
875
1031
  const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
876
1032
  if (!m) return false;
877
1033
  const form = m[1] || '';
@@ -888,18 +1044,18 @@ export function serialize({ text, diff, origin, options = {} }) {
888
1044
  return;
889
1045
  }
890
1046
 
891
- const newAttrs = serializeHelpers.writeAttrsTokens(updated);
1047
+ const newAttrs = writeAttrsTokens(updated);
892
1048
  edits.push({ start: span.start, end: span.end, text: newAttrs });
893
1049
  });
894
1050
  }
895
1051
 
896
1052
  if (diff.add) {
897
1053
  diff.add.forEach(q0 => {
898
- const quad = serializeHelpers.normalizeQuad(q0);
1054
+ const quad = normalizeQuad(q0);
899
1055
  if (!quad) return;
900
1056
  if (!quad?.subject || !quad?.predicate || !quad?.object) return;
901
1057
 
902
- if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) return;
1058
+ if (consumedAdds.has(quadToKeyForOrigin(quad))) return;
903
1059
 
904
1060
  const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
905
1061
  const anchored = anchors.get(anchorKey) || null;
@@ -928,7 +1084,7 @@ export function serialize({ text, diff, origin, options = {} }) {
928
1084
  } else {
929
1085
  const full = quad.object.value;
930
1086
  const label = shortenIRI(full, ctx);
931
- edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
1087
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${label}) {?${predShort}}` });
932
1088
  }
933
1089
  return;
934
1090
  }
@@ -948,14 +1104,14 @@ export function serialize({ text, diff, origin, options = {} }) {
948
1104
  if (quad.object.termType === 'NamedNode') {
949
1105
  const full = quad.object.value;
950
1106
  const label = shortenIRI(full, ctx);
951
- edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
1107
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${shortenIRI(full, ctx)} ?${predShort}}` });
952
1108
  return;
953
1109
  }
954
1110
  }
955
1111
 
956
- const span = serializeHelpers.readAttrsSpan(targetBlock, text);
1112
+ const span = readSpan(targetBlock, text, 'attrs');
957
1113
  if (!span) return;
958
- const tokens = serializeHelpers.blockTokensFromEntries(targetBlock) || serializeHelpers.normalizeAttrsTokens(span.text);
1114
+ const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
959
1115
 
960
1116
  if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
961
1117
  const typeShort = shortenIRI(quad.object.value, ctx);
@@ -963,7 +1119,7 @@ export function serialize({ text, diff, origin, options = {} }) {
963
1119
  if (!typeToken) return;
964
1120
  if (tokens.includes(typeToken)) return;
965
1121
  const updated = [...tokens, typeToken];
966
- edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
1122
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
967
1123
  return;
968
1124
  }
969
1125
 
@@ -974,7 +1130,7 @@ export function serialize({ text, diff, origin, options = {} }) {
974
1130
  if (!predToken) return;
975
1131
  if (tokens.includes(predToken)) return;
976
1132
  const updated = [...tokens, predToken];
977
- edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
1133
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
978
1134
  });
979
1135
  }
980
1136
 
@@ -983,7 +1139,43 @@ export function serialize({ text, diff, origin, options = {} }) {
983
1139
  result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
984
1140
  });
985
1141
 
1142
+ // Extract vacant slots before reparsing to preserve them
1143
+ const vacantSlots = new Map();
1144
+ base?.quadIndex?.forEach((slot, key) => {
1145
+ if (slot.isVacant) {
1146
+ vacantSlots.set(key, slot);
1147
+ }
1148
+ });
1149
+
986
1150
  const reparsed = parse(result, { context: options.context || {} });
1151
+
1152
+ // Merge vacant slots back into the new origin
1153
+ vacantSlots.forEach((vacantSlot, key) => {
1154
+ // Check if the block still exists in the new origin
1155
+ if (!reparsed.origin.blocks.has(vacantSlot.blockId)) {
1156
+ // Recreate the empty block for the vacant slot using preserved info
1157
+ const blockInfo = vacantSlot.blockInfo;
1158
+ if (blockInfo) {
1159
+ const emptyBlock = {
1160
+ id: blockInfo.id,
1161
+ range: blockInfo.range || { start: 0, end: 0 },
1162
+ attrsRange: blockInfo.attrsRange,
1163
+ valueRange: blockInfo.valueRange,
1164
+ carrierType: blockInfo.carrierType || 'span',
1165
+ subject: blockInfo.subject || '',
1166
+ types: [],
1167
+ predicates: [],
1168
+ entries: [], // Empty entries - just {} annotation
1169
+ context: blockInfo.context || { ...ctx }
1170
+ };
1171
+ reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
1172
+ }
1173
+ }
1174
+
1175
+ // Merge the vacant slot back
1176
+ reparsed.origin.quadIndex.set(key, vacantSlot);
1177
+ });
1178
+
987
1179
  return { text: result, origin: reparsed.origin };
988
1180
  }
989
1181
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.2.4",
3
+ "version": "0.2.6",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",