mdld-parse 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +7 -8
  2. package/index.js +299 -147
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -1,15 +1,14 @@
1
1
  # MD-LD Parse v0.2
2
2
 
3
- **Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{}` annotations.
3
+ **Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{...}` annotations.
4
4
 
5
5
  [![NPM](https://img.shields.io/npm/v/mdld-parse)](https://www.npmjs.com/package/mdld-parse)
6
- [![License](https://img.shields.io/npm/l/mdld-parse)](https://github.com/mdld-js/mdld-parse)
7
6
 
8
- [Documentation](https://mdld.js.org) | [Specification](https://mdld.js.org/spec) | [Playground](https://mdld.js.org/playground)
7
+ [Documentation](https://mdld.js.org) | [Repository](https://github.com/davay42/mdld-parse) | [Playground](https://mdld.js.org/playground)
9
8
 
10
9
  ## What is MD-LD?
11
10
 
12
- MD-LD allows you to author RDF graphs directly in Markdown using explicit `{}` annotations:
11
+ MD-LD allows you to author RDF graphs directly in Markdown using explicit `{...}` annotations:
13
12
 
14
13
  ```markdown
15
14
  # Apollo 11 {=ex:apollo11 .SpaceMission}
@@ -32,8 +31,8 @@ ex:apollo11 a schema:SpaceMission ;
32
31
 
33
32
  MD-LD v0.2 provides strict semantic guarantees:
34
33
 
35
- 1. **CommonMark-preserving** — Removing `{}` yields valid Markdown
36
- 2. **Explicit semantics** — Every quad originates from explicit `{}`
34
+ 1. **CommonMark-preserving** — Removing `{...}` yields valid Markdown
35
+ 2. **Explicit semantics** — Every quad originates from explicit `{...}`
37
36
  3. **Single-pass parsing** — Streaming-friendly, deterministic
38
37
  4. **No blank nodes** — All subjects are stable IRIs
39
38
  5. **Complete traceability** — Every quad maps to source location
@@ -140,7 +139,7 @@ Links create relationships (use `?` prefix):
140
139
  ```markdown
141
140
  # Mission {=ex:apollo11}
142
141
 
143
- [NASA](ex:nasa) {?organizer}
142
+ [NASA] {=ex:nasa ?organizer}
144
143
  ```
145
144
 
146
145
  ```turtle
@@ -294,7 +293,7 @@ Apply RDF changes back to markdown with proper positioning.
294
293
  **Returns:** `{ text, origin }`
295
294
 
296
295
  - `text` — Updated markdown
297
- - `origin` — Updated origin tracking
296
+ - `origin` — Updated origin tracking vacant slots
298
297
 
299
298
  **Example:**
300
299
 
package/index.js CHANGED
@@ -24,6 +24,7 @@ function hash(str) {
24
24
  return Math.abs(h).toString(16).slice(0, 12);
25
25
  }
26
26
 
27
+ // IRI Utilities
27
28
  function expandIRI(term, ctx) {
28
29
  if (term == null) return null;
29
30
  const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
@@ -36,6 +37,21 @@ function expandIRI(term, ctx) {
36
37
  return (ctx['@vocab'] || '') + t;
37
38
  }
38
39
 
40
+ export function shortenIRI(iri, ctx) {
41
+ if (!iri || !iri.startsWith('http')) return iri;
42
+ if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
43
+ for (const [prefix, namespace] of Object.entries(ctx)) {
44
+ if (prefix !== '@vocab' && iri.startsWith(namespace)) {
45
+ return prefix + ':' + iri.substring(namespace.length);
46
+ }
47
+ }
48
+ return iri;
49
+ }
50
+
51
+ function processIRI(term, ctx, operation = 'expand') {
52
+ return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
53
+ }
54
+
39
55
  function parseSemanticBlock(raw) {
40
56
  try {
41
57
  const src = String(raw || '').trim();
@@ -339,6 +355,7 @@ function createBlock(subject, types, predicates, entries, range, attrsRange, val
339
355
  };
340
356
  }
341
357
 
358
+ // Quad Utilities
342
359
  function quadIndexKey(subject, predicate, object) {
343
360
  const objKey = object.termType === 'Literal'
344
361
  ? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
@@ -346,11 +363,97 @@ function quadIndexKey(subject, predicate, object) {
346
363
  return JSON.stringify([subject.value, predicate.value, objKey]);
347
364
  }
348
365
 
366
+ function normalizeQuad(q) {
367
+ if (!q) return null;
368
+ const { subject, predicate, object } = q;
369
+ if (object?.termType === 'Literal') {
370
+ const language = typeof object.language === 'string' ? object.language : '';
371
+ const datatype = object.datatype?.value || 'http://www.w3.org/2001/XMLSchema#string';
372
+ return { ...q, subject, predicate, object: { ...object, language, datatype } };
373
+ }
374
+ return { ...q, subject, predicate, object };
375
+ }
376
+
377
+ function objectSignature(o) {
378
+ if (!o) return '';
379
+ if (o.termType === 'Literal') {
380
+ return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
381
+ }
382
+ return JSON.stringify({ t: o.termType, v: o.value });
383
+ }
384
+
385
+ function quadToKeyForOrigin(q) {
386
+ const nq = normalizeQuad(q);
387
+ return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
388
+ }
389
+
390
+ function parseQuadIndexKey(key) {
391
+ try {
392
+ const [s, p, objKey] = JSON.parse(key);
393
+ return { s, p, o: JSON.parse(objKey) };
394
+ } catch {
395
+ return null;
396
+ }
397
+ }
398
+
399
+ // Semantic Slot Utilities
400
+ function createSemanticSlotId(subject, predicate) {
401
+ return hash(`${subject.value}|${predicate.value}`);
402
+ }
403
+
404
+ function createSlotInfo(blockId, entryIndex, meta = {}) {
405
+ const slotId = meta.subject && meta.predicate ? createSemanticSlotId(meta.subject, meta.predicate) : null;
406
+ return {
407
+ blockId,
408
+ entryIndex,
409
+ slotId,
410
+ isVacant: false,
411
+ lastValue: null,
412
+ vacantSince: null,
413
+ ...meta
414
+ };
415
+ }
416
+
417
+ function markSlotAsVacant(slotInfo, deletedValue) {
418
+ if (!slotInfo) return null;
419
+ return {
420
+ ...slotInfo,
421
+ isVacant: true,
422
+ lastValue: deletedValue,
423
+ vacantSince: Date.now()
424
+ };
425
+ }
426
+
427
+ function findVacantSlot(quadIndex, subject, predicate) {
428
+ const targetSlotId = createSemanticSlotId(subject, predicate);
429
+ return Array.from(quadIndex.values())
430
+ .find(slot => slot.slotId === targetSlotId && slot.isVacant);
431
+ }
432
+
433
+ function occupySlot(slotInfo, newValue) {
434
+ if (!slotInfo || !slotInfo.isVacant) return null;
435
+ return {
436
+ ...slotInfo,
437
+ isVacant: false,
438
+ lastValue: newValue,
439
+ vacantSince: null
440
+ };
441
+ }
442
+
349
443
  function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
350
444
  if (!subject || !predicate || !object) return;
351
445
  const quad = dataFactory.quad(subject, predicate, object);
352
446
  quads.push(quad);
353
- quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), meta ? { blockId, ...meta } : { blockId });
447
+
448
+ // Create enhanced slot info with semantic slot tracking
449
+ const slotInfo = createSlotInfo(blockId, meta?.entryIndex, {
450
+ ...meta,
451
+ subject,
452
+ predicate,
453
+ object
454
+ });
455
+
456
+ quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), slotInfo);
354
457
  }
355
458
 
356
459
  function createLiteral(value, datatype, language, context, dataFactory) {
@@ -552,98 +655,69 @@ export function parse(text, options = {}) {
552
655
  return { quads: state.quads, origin: state.origin, context: state.ctx };
553
656
  }
554
657
 
555
- export function shortenIRI(iri, ctx) {
556
- if (!iri || !iri.startsWith('http')) return iri;
557
- if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
558
- for (const [prefix, namespace] of Object.entries(ctx)) {
559
- if (prefix !== '@vocab' && iri.startsWith(namespace)) {
560
- return prefix + ':' + iri.substring(namespace.length);
561
- }
562
- }
563
- return iri;
564
- }
565
-
566
- const serializeHelpers = {
567
- readAttrsSpan: (block, text) => {
568
- if (!block?.attrsRange) return null;
569
- const { start, end } = block.attrsRange;
570
- return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end > start)
571
- ? { start, end, text: text.substring(start, end) }
572
- : null;
573
- },
574
658
 
575
- readValueSpan: (block, text) => {
576
- if (!block?.valueRange) return null;
577
- const { start, end } = block.valueRange;
578
- return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
579
- ? { start, end, text: text.substring(start, end) }
580
- : null;
581
- },
582
-
583
- normalizeAttrsTokens: (attrsText) => {
584
- const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
585
- return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
586
- },
587
-
588
- blockTokensFromEntries: (block) => block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null,
659
+ // Text Processing Utilities
660
+ function readSpan(block, text, spanType = 'attrs') {
661
+ const range = spanType === 'attrs' ? block?.attrsRange : block?.valueRange;
662
+ if (!range) return null;
663
+ const { start, end } = range;
664
+ return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
665
+ ? { start, end, text: text.substring(start, end) }
666
+ : null;
667
+ }
589
668
 
590
- removeEntryAt: (block, entryIndex) => {
591
- if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
592
- return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
593
- },
669
+ function normalizeAttrsTokens(attrsText) {
670
+ const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
671
+ return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
672
+ }
594
673
 
595
- replaceLangDatatypeEntries: (block, lit, ctx) => {
596
- if (!block?.entries) return null;
597
- const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
598
- const extras = [];
599
- if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
600
- const dt = lit?.datatype?.value;
601
- if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
602
- extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
603
- }
604
- return [...filtered, ...extras];
605
- },
674
+ function writeAttrsTokens(tokens) {
675
+ return `{${tokens.join(' ').trim()}}`;
676
+ }
606
677
 
607
- writeAttrsTokens: (tokens) => `{${tokens.join(' ').trim()}}`,
678
+ function removeOneToken(tokens, matchFn) {
679
+ const i = tokens.findIndex(matchFn);
680
+ return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
681
+ }
608
682
 
609
- removeOneToken: (tokens, matchFn) => {
610
- const i = tokens.findIndex(matchFn);
611
- return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
612
- },
683
+ function sanitizeCarrierValueForBlock(block, raw) {
684
+ const s = String(raw ?? '');
685
+ const t = block?.carrierType;
686
+ if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
687
+ const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
688
+ return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
689
+ }
613
690
 
614
- normalizeQuad: (q) => {
615
- if (!q) return null;
616
- const { subject, predicate, object } = q;
617
- if (object?.termType === 'Literal') {
618
- const language = typeof object.language === 'string' ? object.language : '';
619
- const datatype = object.datatype?.value || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' };
620
- return { ...q, subject, predicate, object: { ...object, language, datatype } };
621
- }
622
- return { ...q, subject, predicate, object };
623
- },
691
+ function blockTokensFromEntries(block) {
692
+ return block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null;
693
+ }
624
694
 
625
- quadToKeyForOrigin: (q) => {
626
- const nq = serializeHelpers.normalizeQuad(q);
627
- return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
628
- },
695
+ function removeEntryAt(block, entryIndex) {
696
+ if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
697
+ return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
698
+ }
629
699
 
630
- parseQuadIndexKey: (key) => {
631
- try {
632
- const [s, p, objKey] = JSON.parse(key);
633
- return { s, p, o: JSON.parse(objKey) };
634
- } catch {
635
- return null;
636
- }
637
- },
700
+ function replaceLangDatatypeEntries(block, lit, ctx) {
701
+ if (!block?.entries) return null;
702
+ const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
703
+ const extras = [];
704
+ if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
705
+ const dt = lit?.datatype?.value;
706
+ if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
707
+ extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
708
+ }
709
+ return [...filtered, ...extras];
710
+ }
638
711
 
639
- sanitizeCarrierValueForBlock: (block, raw) => {
640
- const s = String(raw ?? '');
641
- const t = block?.carrierType;
642
- if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
643
- const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
644
- return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
712
+ function updateAttrsDatatypeLang(tokens, newLit, ctx) {
713
+ const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
714
+ if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
715
+ const dt = newLit?.datatype?.value;
716
+ if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
717
+ return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
645
718
  }
646
- };
719
+ return predicatesAndTypes;
720
+ }
647
721
 
648
722
  export function serialize({ text, diff, origin, options = {} }) {
649
723
  if (!diff || (!diff.add?.length && !diff.delete?.length)) {
@@ -658,12 +732,11 @@ export function serialize({ text, diff, origin, options = {} }) {
658
732
 
659
733
  const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
660
734
  for (const [k, entry] of base?.quadIndex || []) {
661
- const parsed = serializeHelpers.parseQuadIndexKey(k);
735
+ const parsed = parseQuadIndexKey(k);
662
736
  if (!parsed) continue;
663
737
  if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
664
738
  if (parsed.o?.t !== 'Literal') continue;
665
- if (parsed.o?.v !== literalValue) continue;
666
- return entry;
739
+ if (parsed.o?.v === literalValue) return entry;
667
740
  }
668
741
  return null;
669
742
  };
@@ -671,7 +744,7 @@ export function serialize({ text, diff, origin, options = {} }) {
671
744
  const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
672
745
  const out = [];
673
746
  for (const [k, entry] of base?.quadIndex || []) {
674
- const parsed = serializeHelpers.parseQuadIndexKey(k);
747
+ const parsed = parseQuadIndexKey(k);
675
748
  if (!parsed) continue;
676
749
  if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
677
750
  if (parsed.o?.t !== 'Literal') continue;
@@ -682,21 +755,13 @@ export function serialize({ text, diff, origin, options = {} }) {
682
755
  return out;
683
756
  };
684
757
 
685
- const objectSignature = (o) => {
686
- if (!o) return '';
687
- if (o.termType === 'Literal') {
688
- return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
689
- }
690
- return JSON.stringify({ t: o.termType, v: o.value });
691
- };
692
-
693
758
  const anchors = new Map();
694
759
  for (const q0 of diff.delete || []) {
695
- const q = serializeHelpers.normalizeQuad(q0);
760
+ const q = normalizeQuad(q0);
696
761
  if (!q) continue;
697
762
  if (!q?.subject || !q?.object || !q?.predicate) continue;
698
763
  const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
699
- const qk = serializeHelpers.quadToKeyForOrigin(q);
764
+ const qk = quadToKeyForOrigin(q);
700
765
  const entry = qk ? base?.quadIndex?.get(qk) : null;
701
766
  const blockId = entry?.blockId || entry;
702
767
  const block = blockId ? base?.blocks?.get(blockId) : null;
@@ -706,7 +771,7 @@ export function serialize({ text, diff, origin, options = {} }) {
706
771
 
707
772
  const addBySP = new Map();
708
773
  for (const q0 of diff.add || []) {
709
- const q = serializeHelpers.normalizeQuad(q0);
774
+ const q = normalizeQuad(q0);
710
775
  if (!q) continue;
711
776
  if (!q?.subject || !q?.predicate || !q?.object) continue;
712
777
  const k = JSON.stringify([q.subject.value, q.predicate.value]);
@@ -718,16 +783,16 @@ export function serialize({ text, diff, origin, options = {} }) {
718
783
  const consumedAdds = new Set();
719
784
  const literalUpdates = [];
720
785
  for (const dq0 of diff.delete || []) {
721
- const dq = serializeHelpers.normalizeQuad(dq0);
786
+ const dq = normalizeQuad(dq0);
722
787
  if (!dq) continue;
723
788
  if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
724
789
  if (dq.object.termType !== 'Literal') continue;
725
790
  const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
726
791
  const candidates = addBySP.get(k) || [];
727
- const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(serializeHelpers.quadToKeyForOrigin(x)));
792
+ const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(quadToKeyForOrigin(x)));
728
793
  if (!aq) continue;
729
794
 
730
- const dqk = serializeHelpers.quadToKeyForOrigin(dq);
795
+ const dqk = quadToKeyForOrigin(dq);
731
796
  let entry = dqk ? base?.quadIndex?.get(dqk) : null;
732
797
  if (!entry && dq.object?.termType === 'Literal') {
733
798
  entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
@@ -737,13 +802,49 @@ export function serialize({ text, diff, origin, options = {} }) {
737
802
  if (!block) continue;
738
803
 
739
804
  literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
740
- consumedAdds.add(serializeHelpers.quadToKeyForOrigin(aq));
805
+ consumedAdds.add(quadToKeyForOrigin(aq));
741
806
  }
742
807
 
743
808
  for (const q0 of diff.add || []) {
744
- const quad = serializeHelpers.normalizeQuad(q0);
809
+ const quad = normalizeQuad(q0);
745
810
  if (!quad || quad.object?.termType !== 'Literal') continue;
746
- if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) continue;
811
+ if (consumedAdds.has(quadToKeyForOrigin(quad))) continue;
812
+
813
+ // Check if there's a vacant slot we can reuse
814
+ const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
815
+ if (!vacantSlot) continue;
816
+
817
+ const block = base?.blocks?.get(vacantSlot.blockId);
818
+ if (!block) continue;
819
+
820
+ const span = readSpan(block, text, 'attrs');
821
+ if (!span) continue;
822
+
823
+ // Occupy the vacant slot and update the annotation
824
+ const occupiedSlot = occupySlot(vacantSlot, quad.object);
825
+ if (!occupiedSlot) continue;
826
+
827
+ // Update the carrier value
828
+ const valueSpan = readSpan(block, text, 'value');
829
+ if (valueSpan) {
830
+ edits.push({ start: valueSpan.start, end: valueSpan.end, text: quad.object.value });
831
+ }
832
+
833
+ // Update the annotation block to restore the predicate token
834
+ const tokens = normalizeAttrsTokens(span.text);
835
+ const predToken = `${vacantSlot.form || ''}${shortenIRI(quad.predicate.value, ctx)}`;
836
+
837
+ // For empty annotation blocks, replace entirely; for non-empty, add if missing
838
+ if (tokens.length === 0) {
839
+ edits.push({ start: span.start, end: span.end, text: `{${predToken}}` });
840
+ } else if (!tokens.includes(predToken)) {
841
+ const updated = [...tokens, predToken];
842
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
843
+ }
844
+
845
+ // Mark as consumed and continue
846
+ consumedAdds.add(quadToKeyForOrigin(quad));
847
+ continue;
747
848
 
748
849
  const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
749
850
  if (matches.length === 0) continue;
@@ -758,18 +859,18 @@ export function serialize({ text, diff, origin, options = {} }) {
758
859
 
759
860
  if (sameLang.length !== 1) continue;
760
861
  const target = sameLang[0].block;
761
- const vSpan = serializeHelpers.readValueSpan(target, text);
862
+ const vSpan = readSpan(target, text, 'value');
762
863
  if (!vSpan) continue;
763
864
 
764
- const newValue = serializeHelpers.sanitizeCarrierValueForBlock(target, quad.object.value);
865
+ const newValue = sanitizeCarrierValueForBlock(target, quad.object.value);
765
866
  edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
766
867
 
767
- const aSpan = serializeHelpers.readAttrsSpan(target, text);
868
+ const aSpan = readSpan(target, text, 'attrs');
768
869
  if (aSpan && target?.entries?.length) {
769
- const nextEntries = serializeHelpers.replaceLangDatatypeEntries(target, quad.object, ctx);
870
+ const nextEntries = replaceLangDatatypeEntries(target, quad.object, ctx);
770
871
  if (nextEntries) {
771
872
  const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
772
- edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
873
+ edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
773
874
  }
774
875
  }
775
876
 
@@ -777,51 +878,42 @@ export function serialize({ text, diff, origin, options = {} }) {
777
878
  }
778
879
 
779
880
  for (const u of literalUpdates) {
780
- const span = serializeHelpers.readValueSpan(u.block, text);
881
+ const span = readSpan(u.block, text, 'value');
781
882
  if (span) {
782
- const newValue = serializeHelpers.sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
883
+ const newValue = sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
783
884
  edits.push({ start: span.start, end: span.end, text: newValue });
784
885
  }
785
886
 
786
- const aSpan = serializeHelpers.readAttrsSpan(u.block, text);
887
+ const aSpan = readSpan(u.block, text, 'attrs');
787
888
  if (aSpan) {
788
889
  if (u.block?.entries?.length) {
789
- const nextEntries = serializeHelpers.replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
890
+ const nextEntries = replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
790
891
  if (nextEntries) {
791
892
  const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
792
893
  if (nextTokens.length === 0) {
793
894
  edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
794
895
  } else {
795
- edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
896
+ edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
796
897
  }
797
898
  }
798
899
  } else {
799
- const tokens = serializeHelpers.normalizeAttrsTokens(aSpan.text);
800
- const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object);
900
+ const tokens = normalizeAttrsTokens(aSpan.text);
901
+ const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object, ctx);
801
902
  if (updated.join(' ') !== tokens.join(' ')) {
802
903
  if (updated.length === 0) {
803
904
  edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
804
905
  } else {
805
- edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(updated) });
906
+ edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(updated) });
806
907
  }
807
908
  }
808
909
  }
809
910
  }
810
911
  }
811
912
 
812
- const updateAttrsDatatypeLang = (tokens, newLit) => {
813
- const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
814
- if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
815
- const dt = newLit?.datatype?.value;
816
- if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
817
- return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
818
- }
819
- return predicatesAndTypes;
820
- };
821
913
 
822
914
  if (diff.delete) {
823
915
  diff.delete.forEach(q0 => {
824
- const quad = serializeHelpers.normalizeQuad(q0);
916
+ const quad = normalizeQuad(q0);
825
917
  if (!quad) return;
826
918
  if (!quad?.subject || !quad?.predicate || !quad?.object) return;
827
919
 
@@ -834,36 +926,60 @@ export function serialize({ text, diff, origin, options = {} }) {
834
926
  if (isUpdated) return;
835
927
  }
836
928
 
837
- const key = serializeHelpers.quadToKeyForOrigin(quad);
929
+ const key = quadToKeyForOrigin(quad);
838
930
  let entry = key ? base?.quadIndex?.get(key) : null;
839
931
  if (!entry && quad.object?.termType === 'Literal') {
840
932
  entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
841
933
  }
934
+
935
+ // Mark the semantic slot as vacant for future reuse
936
+ if (entry && entry.slotId) {
937
+ // Capture block information before marking as vacant
938
+ const block = base?.blocks?.get(entry.blockId);
939
+ const blockInfo = block ? {
940
+ id: entry.blockId,
941
+ range: block.range,
942
+ attrsRange: block.attrsRange,
943
+ valueRange: block.valueRange,
944
+ carrierType: block.carrierType,
945
+ subject: block.subject,
946
+ context: block.context
947
+ } : null;
948
+
949
+ const vacantSlot = markSlotAsVacant(entry, quad.object);
950
+ if (vacantSlot && blockInfo) {
951
+ vacantSlot.blockInfo = blockInfo;
952
+ base.quadIndex.set(key, vacantSlot);
953
+ }
954
+ }
955
+
842
956
  const blockId = entry?.blockId || entry;
843
957
  if (!blockId) return;
958
+
844
959
  const block = base?.blocks?.get(blockId);
845
- const span = serializeHelpers.readAttrsSpan(block, text);
960
+ if (!block) return;
961
+
962
+ const span = readSpan(block, text, 'attrs');
846
963
  if (!span) return;
847
964
 
965
+ // Handle entry removal by index
848
966
  if (entry?.entryIndex != null && block?.entries?.length) {
849
- const nextEntries = serializeHelpers.removeEntryAt(block, entry.entryIndex);
967
+ const nextEntries = removeEntryAt(block, entry.entryIndex);
850
968
  if (!nextEntries) return;
969
+
851
970
  const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
852
- if (nextTokens.length === 0) {
853
- edits.push({ start: span.start, end: span.end, text: '{}' });
854
- } else {
855
- edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
856
- }
971
+ const newText = nextTokens.length === 0 ? '{}' : writeAttrsTokens(nextTokens);
972
+ edits.push({ start: span.start, end: span.end, text: newText });
857
973
  return;
858
974
  }
859
975
 
860
- const tokens = serializeHelpers.normalizeAttrsTokens(span.text);
976
+ const tokens = normalizeAttrsTokens(span.text);
861
977
  let updated = tokens;
862
978
  let removed = false;
863
979
 
864
980
  if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
865
981
  const expectedType = entry.expandedType || quad.object.value;
866
- ({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
982
+ ({ tokens: updated, removed } = removeOneToken(tokens, t => {
867
983
  if (!t.startsWith('.')) return false;
868
984
  const raw = t.slice(1);
869
985
  return expandIRI(raw, ctx) === expectedType;
@@ -871,7 +987,7 @@ export function serialize({ text, diff, origin, options = {} }) {
871
987
  } else {
872
988
  const expectedPred = entry?.expandedPredicate || quad.predicate.value;
873
989
  const expectedForm = entry?.form;
874
- ({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
990
+ ({ tokens: updated, removed } = removeOneToken(tokens, t => {
875
991
  const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
876
992
  if (!m) return false;
877
993
  const form = m[1] || '';
@@ -888,18 +1004,18 @@ export function serialize({ text, diff, origin, options = {} }) {
888
1004
  return;
889
1005
  }
890
1006
 
891
- const newAttrs = serializeHelpers.writeAttrsTokens(updated);
1007
+ const newAttrs = writeAttrsTokens(updated);
892
1008
  edits.push({ start: span.start, end: span.end, text: newAttrs });
893
1009
  });
894
1010
  }
895
1011
 
896
1012
  if (diff.add) {
897
1013
  diff.add.forEach(q0 => {
898
- const quad = serializeHelpers.normalizeQuad(q0);
1014
+ const quad = normalizeQuad(q0);
899
1015
  if (!quad) return;
900
1016
  if (!quad?.subject || !quad?.predicate || !quad?.object) return;
901
1017
 
902
- if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) return;
1018
+ if (consumedAdds.has(quadToKeyForOrigin(quad))) return;
903
1019
 
904
1020
  const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
905
1021
  const anchored = anchors.get(anchorKey) || null;
@@ -928,7 +1044,7 @@ export function serialize({ text, diff, origin, options = {} }) {
928
1044
  } else {
929
1045
  const full = quad.object.value;
930
1046
  const label = shortenIRI(full, ctx);
931
- edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
1047
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${label}) {?${predShort}}` });
932
1048
  }
933
1049
  return;
934
1050
  }
@@ -948,14 +1064,14 @@ export function serialize({ text, diff, origin, options = {} }) {
948
1064
  if (quad.object.termType === 'NamedNode') {
949
1065
  const full = quad.object.value;
950
1066
  const label = shortenIRI(full, ctx);
951
- edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
1067
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${shortenIRI(full, ctx)} ?${predShort}}` });
952
1068
  return;
953
1069
  }
954
1070
  }
955
1071
 
956
- const span = serializeHelpers.readAttrsSpan(targetBlock, text);
1072
+ const span = readSpan(targetBlock, text, 'attrs');
957
1073
  if (!span) return;
958
- const tokens = serializeHelpers.blockTokensFromEntries(targetBlock) || serializeHelpers.normalizeAttrsTokens(span.text);
1074
+ const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
959
1075
 
960
1076
  if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
961
1077
  const typeShort = shortenIRI(quad.object.value, ctx);
@@ -963,7 +1079,7 @@ export function serialize({ text, diff, origin, options = {} }) {
963
1079
  if (!typeToken) return;
964
1080
  if (tokens.includes(typeToken)) return;
965
1081
  const updated = [...tokens, typeToken];
966
- edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
1082
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
967
1083
  return;
968
1084
  }
969
1085
 
@@ -974,7 +1090,7 @@ export function serialize({ text, diff, origin, options = {} }) {
974
1090
  if (!predToken) return;
975
1091
  if (tokens.includes(predToken)) return;
976
1092
  const updated = [...tokens, predToken];
977
- edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
1093
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
978
1094
  });
979
1095
  }
980
1096
 
@@ -983,7 +1099,43 @@ export function serialize({ text, diff, origin, options = {} }) {
983
1099
  result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
984
1100
  });
985
1101
 
1102
+ // Extract vacant slots before reparsing to preserve them
1103
+ const vacantSlots = new Map();
1104
+ base?.quadIndex?.forEach((slot, key) => {
1105
+ if (slot.isVacant) {
1106
+ vacantSlots.set(key, slot);
1107
+ }
1108
+ });
1109
+
986
1110
  const reparsed = parse(result, { context: options.context || {} });
1111
+
1112
+ // Merge vacant slots back into the new origin
1113
+ vacantSlots.forEach((vacantSlot, key) => {
1114
+ // Check if the block still exists in the new origin
1115
+ if (!reparsed.origin.blocks.has(vacantSlot.blockId)) {
1116
+ // Recreate the empty block for the vacant slot using preserved info
1117
+ const blockInfo = vacantSlot.blockInfo;
1118
+ if (blockInfo) {
1119
+ const emptyBlock = {
1120
+ id: blockInfo.id,
1121
+ range: blockInfo.range || { start: 0, end: 0 },
1122
+ attrsRange: blockInfo.attrsRange,
1123
+ valueRange: blockInfo.valueRange,
1124
+ carrierType: blockInfo.carrierType || 'span',
1125
+ subject: blockInfo.subject || '',
1126
+ types: [],
1127
+ predicates: [],
1128
+ entries: [], // Empty entries - just {} annotation
1129
+ context: blockInfo.context || { ...ctx }
1130
+ };
1131
+ reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
1132
+ }
1133
+ }
1134
+
1135
+ // Merge the vacant slot back
1136
+ reparsed.origin.quadIndex.set(key, vacantSlot);
1137
+ });
1138
+
987
1139
  return { text: result, origin: reparsed.origin };
988
1140
  }
989
1141
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.2.4",
3
+ "version": "0.2.5",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",