mdld-parse 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -8
- package/index.js +299 -147
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
# MD-LD Parse v0.2
|
|
2
2
|
|
|
3
|
-
**Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{}` annotations.
|
|
3
|
+
**Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{...}` annotations.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/mdld-parse)
|
|
6
|
-
[](https://github.com/mdld-js/mdld-parse)
|
|
7
6
|
|
|
8
|
-
[Documentation](https://mdld.js.org) | [
|
|
7
|
+
[Documentation](https://mdld.js.org) | [Repository](https://github.com/davay42/mdld-parse) | [Playground](https://mdld.js.org/playground)
|
|
9
8
|
|
|
10
9
|
## What is MD-LD?
|
|
11
10
|
|
|
12
|
-
MD-LD allows you to author RDF graphs directly in Markdown using explicit `{}` annotations:
|
|
11
|
+
MD-LD allows you to author RDF graphs directly in Markdown using explicit `{...}` annotations:
|
|
13
12
|
|
|
14
13
|
```markdown
|
|
15
14
|
# Apollo 11 {=ex:apollo11 .SpaceMission}
|
|
@@ -32,8 +31,8 @@ ex:apollo11 a schema:SpaceMission ;
|
|
|
32
31
|
|
|
33
32
|
MD-LD v0.2 provides strict semantic guarantees:
|
|
34
33
|
|
|
35
|
-
1. **CommonMark-preserving** — Removing `{}` yields valid Markdown
|
|
36
|
-
2. **Explicit semantics** — Every quad originates from explicit `{}`
|
|
34
|
+
1. **CommonMark-preserving** — Removing `{...}` yields valid Markdown
|
|
35
|
+
2. **Explicit semantics** — Every quad originates from explicit `{...}`
|
|
37
36
|
3. **Single-pass parsing** — Streaming-friendly, deterministic
|
|
38
37
|
4. **No blank nodes** — All subjects are stable IRIs
|
|
39
38
|
5. **Complete traceability** — Every quad maps to source location
|
|
@@ -140,7 +139,7 @@ Links create relationships (use `?` prefix):
|
|
|
140
139
|
```markdown
|
|
141
140
|
# Mission {=ex:apollo11}
|
|
142
141
|
|
|
143
|
-
[NASA]
|
|
142
|
+
[NASA] {=ex:nasa ?organizer}
|
|
144
143
|
```
|
|
145
144
|
|
|
146
145
|
```turtle
|
|
@@ -294,7 +293,7 @@ Apply RDF changes back to markdown with proper positioning.
|
|
|
294
293
|
**Returns:** `{ text, origin }`
|
|
295
294
|
|
|
296
295
|
- `text` — Updated markdown
|
|
297
|
-
- `origin` — Updated origin tracking
|
|
296
|
+
- `origin` — Updated origin tracking vacant slots
|
|
298
297
|
|
|
299
298
|
**Example:**
|
|
300
299
|
|
package/index.js
CHANGED
|
@@ -24,6 +24,7 @@ function hash(str) {
|
|
|
24
24
|
return Math.abs(h).toString(16).slice(0, 12);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
// IRI Utilities
|
|
27
28
|
function expandIRI(term, ctx) {
|
|
28
29
|
if (term == null) return null;
|
|
29
30
|
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
@@ -36,6 +37,21 @@ function expandIRI(term, ctx) {
|
|
|
36
37
|
return (ctx['@vocab'] || '') + t;
|
|
37
38
|
}
|
|
38
39
|
|
|
40
|
+
export function shortenIRI(iri, ctx) {
|
|
41
|
+
if (!iri || !iri.startsWith('http')) return iri;
|
|
42
|
+
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
|
|
43
|
+
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
44
|
+
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
45
|
+
return prefix + ':' + iri.substring(namespace.length);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return iri;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function processIRI(term, ctx, operation = 'expand') {
|
|
52
|
+
return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
|
|
53
|
+
}
|
|
54
|
+
|
|
39
55
|
function parseSemanticBlock(raw) {
|
|
40
56
|
try {
|
|
41
57
|
const src = String(raw || '').trim();
|
|
@@ -339,6 +355,7 @@ function createBlock(subject, types, predicates, entries, range, attrsRange, val
|
|
|
339
355
|
};
|
|
340
356
|
}
|
|
341
357
|
|
|
358
|
+
// Quad Utilities
|
|
342
359
|
function quadIndexKey(subject, predicate, object) {
|
|
343
360
|
const objKey = object.termType === 'Literal'
|
|
344
361
|
? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
|
|
@@ -346,11 +363,97 @@ function quadIndexKey(subject, predicate, object) {
|
|
|
346
363
|
return JSON.stringify([subject.value, predicate.value, objKey]);
|
|
347
364
|
}
|
|
348
365
|
|
|
366
|
+
function normalizeQuad(q) {
|
|
367
|
+
if (!q) return null;
|
|
368
|
+
const { subject, predicate, object } = q;
|
|
369
|
+
if (object?.termType === 'Literal') {
|
|
370
|
+
const language = typeof object.language === 'string' ? object.language : '';
|
|
371
|
+
const datatype = object.datatype?.value || 'http://www.w3.org/2001/XMLSchema#string';
|
|
372
|
+
return { ...q, subject, predicate, object: { ...object, language, datatype } };
|
|
373
|
+
}
|
|
374
|
+
return { ...q, subject, predicate, object };
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
function objectSignature(o) {
|
|
378
|
+
if (!o) return '';
|
|
379
|
+
if (o.termType === 'Literal') {
|
|
380
|
+
return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
|
|
381
|
+
}
|
|
382
|
+
return JSON.stringify({ t: o.termType, v: o.value });
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function quadToKeyForOrigin(q) {
|
|
386
|
+
const nq = normalizeQuad(q);
|
|
387
|
+
return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
function parseQuadIndexKey(key) {
|
|
391
|
+
try {
|
|
392
|
+
const [s, p, objKey] = JSON.parse(key);
|
|
393
|
+
return { s, p, o: JSON.parse(objKey) };
|
|
394
|
+
} catch {
|
|
395
|
+
return null;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// Semantic Slot Utilities
|
|
400
|
+
function createSemanticSlotId(subject, predicate) {
|
|
401
|
+
return hash(`${subject.value}|${predicate.value}`);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
function createSlotInfo(blockId, entryIndex, meta = {}) {
|
|
405
|
+
const slotId = meta.subject && meta.predicate ? createSemanticSlotId(meta.subject, meta.predicate) : null;
|
|
406
|
+
return {
|
|
407
|
+
blockId,
|
|
408
|
+
entryIndex,
|
|
409
|
+
slotId,
|
|
410
|
+
isVacant: false,
|
|
411
|
+
lastValue: null,
|
|
412
|
+
vacantSince: null,
|
|
413
|
+
...meta
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
function markSlotAsVacant(slotInfo, deletedValue) {
|
|
418
|
+
if (!slotInfo) return null;
|
|
419
|
+
return {
|
|
420
|
+
...slotInfo,
|
|
421
|
+
isVacant: true,
|
|
422
|
+
lastValue: deletedValue,
|
|
423
|
+
vacantSince: Date.now()
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function findVacantSlot(quadIndex, subject, predicate) {
|
|
428
|
+
const targetSlotId = createSemanticSlotId(subject, predicate);
|
|
429
|
+
return Array.from(quadIndex.values())
|
|
430
|
+
.find(slot => slot.slotId === targetSlotId && slot.isVacant);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
function occupySlot(slotInfo, newValue) {
|
|
434
|
+
if (!slotInfo || !slotInfo.isVacant) return null;
|
|
435
|
+
return {
|
|
436
|
+
...slotInfo,
|
|
437
|
+
isVacant: false,
|
|
438
|
+
lastValue: newValue,
|
|
439
|
+
vacantSince: null
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
349
443
|
function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
|
|
350
444
|
if (!subject || !predicate || !object) return;
|
|
351
445
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
352
446
|
quads.push(quad);
|
|
353
|
-
|
|
447
|
+
|
|
448
|
+
// Create enhanced slot info with semantic slot tracking
|
|
449
|
+
const slotInfo = createSlotInfo(blockId, meta?.entryIndex, {
|
|
450
|
+
...meta,
|
|
451
|
+
subject,
|
|
452
|
+
predicate,
|
|
453
|
+
object
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), slotInfo);
|
|
354
457
|
}
|
|
355
458
|
|
|
356
459
|
function createLiteral(value, datatype, language, context, dataFactory) {
|
|
@@ -552,98 +655,69 @@ export function parse(text, options = {}) {
|
|
|
552
655
|
return { quads: state.quads, origin: state.origin, context: state.ctx };
|
|
553
656
|
}
|
|
554
657
|
|
|
555
|
-
export function shortenIRI(iri, ctx) {
|
|
556
|
-
if (!iri || !iri.startsWith('http')) return iri;
|
|
557
|
-
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
|
|
558
|
-
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
559
|
-
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
560
|
-
return prefix + ':' + iri.substring(namespace.length);
|
|
561
|
-
}
|
|
562
|
-
}
|
|
563
|
-
return iri;
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
const serializeHelpers = {
|
|
567
|
-
readAttrsSpan: (block, text) => {
|
|
568
|
-
if (!block?.attrsRange) return null;
|
|
569
|
-
const { start, end } = block.attrsRange;
|
|
570
|
-
return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end > start)
|
|
571
|
-
? { start, end, text: text.substring(start, end) }
|
|
572
|
-
: null;
|
|
573
|
-
},
|
|
574
658
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
|
|
585
|
-
return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
|
|
586
|
-
},
|
|
587
|
-
|
|
588
|
-
blockTokensFromEntries: (block) => block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null,
|
|
659
|
+
// Text Processing Utilities
|
|
660
|
+
function readSpan(block, text, spanType = 'attrs') {
|
|
661
|
+
const range = spanType === 'attrs' ? block?.attrsRange : block?.valueRange;
|
|
662
|
+
if (!range) return null;
|
|
663
|
+
const { start, end } = range;
|
|
664
|
+
return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
|
|
665
|
+
? { start, end, text: text.substring(start, end) }
|
|
666
|
+
: null;
|
|
667
|
+
}
|
|
589
668
|
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
669
|
+
function normalizeAttrsTokens(attrsText) {
|
|
670
|
+
const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
|
|
671
|
+
return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
|
|
672
|
+
}
|
|
594
673
|
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
const extras = [];
|
|
599
|
-
if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
|
|
600
|
-
const dt = lit?.datatype?.value;
|
|
601
|
-
if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
602
|
-
extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
|
|
603
|
-
}
|
|
604
|
-
return [...filtered, ...extras];
|
|
605
|
-
},
|
|
674
|
+
function writeAttrsTokens(tokens) {
|
|
675
|
+
return `{${tokens.join(' ').trim()}}`;
|
|
676
|
+
}
|
|
606
677
|
|
|
607
|
-
|
|
678
|
+
function removeOneToken(tokens, matchFn) {
|
|
679
|
+
const i = tokens.findIndex(matchFn);
|
|
680
|
+
return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
|
|
681
|
+
}
|
|
608
682
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
683
|
+
function sanitizeCarrierValueForBlock(block, raw) {
|
|
684
|
+
const s = String(raw ?? '');
|
|
685
|
+
const t = block?.carrierType;
|
|
686
|
+
if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
687
|
+
const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
|
|
688
|
+
return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
|
|
689
|
+
}
|
|
613
690
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
if (object?.termType === 'Literal') {
|
|
618
|
-
const language = typeof object.language === 'string' ? object.language : '';
|
|
619
|
-
const datatype = object.datatype?.value || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' };
|
|
620
|
-
return { ...q, subject, predicate, object: { ...object, language, datatype } };
|
|
621
|
-
}
|
|
622
|
-
return { ...q, subject, predicate, object };
|
|
623
|
-
},
|
|
691
|
+
function blockTokensFromEntries(block) {
|
|
692
|
+
return block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null;
|
|
693
|
+
}
|
|
624
694
|
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
695
|
+
function removeEntryAt(block, entryIndex) {
|
|
696
|
+
if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
|
|
697
|
+
return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
|
|
698
|
+
}
|
|
629
699
|
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
700
|
+
function replaceLangDatatypeEntries(block, lit, ctx) {
|
|
701
|
+
if (!block?.entries) return null;
|
|
702
|
+
const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
|
|
703
|
+
const extras = [];
|
|
704
|
+
if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
|
|
705
|
+
const dt = lit?.datatype?.value;
|
|
706
|
+
if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
707
|
+
extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
|
|
708
|
+
}
|
|
709
|
+
return [...filtered, ...extras];
|
|
710
|
+
}
|
|
638
711
|
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
return
|
|
712
|
+
function updateAttrsDatatypeLang(tokens, newLit, ctx) {
|
|
713
|
+
const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
|
|
714
|
+
if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
|
|
715
|
+
const dt = newLit?.datatype?.value;
|
|
716
|
+
if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
717
|
+
return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
|
|
645
718
|
}
|
|
646
|
-
|
|
719
|
+
return predicatesAndTypes;
|
|
720
|
+
}
|
|
647
721
|
|
|
648
722
|
export function serialize({ text, diff, origin, options = {} }) {
|
|
649
723
|
if (!diff || (!diff.add?.length && !diff.delete?.length)) {
|
|
@@ -658,12 +732,11 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
658
732
|
|
|
659
733
|
const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
|
|
660
734
|
for (const [k, entry] of base?.quadIndex || []) {
|
|
661
|
-
const parsed =
|
|
735
|
+
const parsed = parseQuadIndexKey(k);
|
|
662
736
|
if (!parsed) continue;
|
|
663
737
|
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
664
738
|
if (parsed.o?.t !== 'Literal') continue;
|
|
665
|
-
if (parsed.o?.v
|
|
666
|
-
return entry;
|
|
739
|
+
if (parsed.o?.v === literalValue) return entry;
|
|
667
740
|
}
|
|
668
741
|
return null;
|
|
669
742
|
};
|
|
@@ -671,7 +744,7 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
671
744
|
const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
|
|
672
745
|
const out = [];
|
|
673
746
|
for (const [k, entry] of base?.quadIndex || []) {
|
|
674
|
-
const parsed =
|
|
747
|
+
const parsed = parseQuadIndexKey(k);
|
|
675
748
|
if (!parsed) continue;
|
|
676
749
|
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
677
750
|
if (parsed.o?.t !== 'Literal') continue;
|
|
@@ -682,21 +755,13 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
682
755
|
return out;
|
|
683
756
|
};
|
|
684
757
|
|
|
685
|
-
const objectSignature = (o) => {
|
|
686
|
-
if (!o) return '';
|
|
687
|
-
if (o.termType === 'Literal') {
|
|
688
|
-
return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
|
|
689
|
-
}
|
|
690
|
-
return JSON.stringify({ t: o.termType, v: o.value });
|
|
691
|
-
};
|
|
692
|
-
|
|
693
758
|
const anchors = new Map();
|
|
694
759
|
for (const q0 of diff.delete || []) {
|
|
695
|
-
const q =
|
|
760
|
+
const q = normalizeQuad(q0);
|
|
696
761
|
if (!q) continue;
|
|
697
762
|
if (!q?.subject || !q?.object || !q?.predicate) continue;
|
|
698
763
|
const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
|
|
699
|
-
const qk =
|
|
764
|
+
const qk = quadToKeyForOrigin(q);
|
|
700
765
|
const entry = qk ? base?.quadIndex?.get(qk) : null;
|
|
701
766
|
const blockId = entry?.blockId || entry;
|
|
702
767
|
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
@@ -706,7 +771,7 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
706
771
|
|
|
707
772
|
const addBySP = new Map();
|
|
708
773
|
for (const q0 of diff.add || []) {
|
|
709
|
-
const q =
|
|
774
|
+
const q = normalizeQuad(q0);
|
|
710
775
|
if (!q) continue;
|
|
711
776
|
if (!q?.subject || !q?.predicate || !q?.object) continue;
|
|
712
777
|
const k = JSON.stringify([q.subject.value, q.predicate.value]);
|
|
@@ -718,16 +783,16 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
718
783
|
const consumedAdds = new Set();
|
|
719
784
|
const literalUpdates = [];
|
|
720
785
|
for (const dq0 of diff.delete || []) {
|
|
721
|
-
const dq =
|
|
786
|
+
const dq = normalizeQuad(dq0);
|
|
722
787
|
if (!dq) continue;
|
|
723
788
|
if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
|
|
724
789
|
if (dq.object.termType !== 'Literal') continue;
|
|
725
790
|
const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
|
|
726
791
|
const candidates = addBySP.get(k) || [];
|
|
727
|
-
const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(
|
|
792
|
+
const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(quadToKeyForOrigin(x)));
|
|
728
793
|
if (!aq) continue;
|
|
729
794
|
|
|
730
|
-
const dqk =
|
|
795
|
+
const dqk = quadToKeyForOrigin(dq);
|
|
731
796
|
let entry = dqk ? base?.quadIndex?.get(dqk) : null;
|
|
732
797
|
if (!entry && dq.object?.termType === 'Literal') {
|
|
733
798
|
entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
|
|
@@ -737,13 +802,49 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
737
802
|
if (!block) continue;
|
|
738
803
|
|
|
739
804
|
literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
|
|
740
|
-
consumedAdds.add(
|
|
805
|
+
consumedAdds.add(quadToKeyForOrigin(aq));
|
|
741
806
|
}
|
|
742
807
|
|
|
743
808
|
for (const q0 of diff.add || []) {
|
|
744
|
-
const quad =
|
|
809
|
+
const quad = normalizeQuad(q0);
|
|
745
810
|
if (!quad || quad.object?.termType !== 'Literal') continue;
|
|
746
|
-
if (consumedAdds.has(
|
|
811
|
+
if (consumedAdds.has(quadToKeyForOrigin(quad))) continue;
|
|
812
|
+
|
|
813
|
+
// Check if there's a vacant slot we can reuse
|
|
814
|
+
const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
|
|
815
|
+
if (!vacantSlot) continue;
|
|
816
|
+
|
|
817
|
+
const block = base?.blocks?.get(vacantSlot.blockId);
|
|
818
|
+
if (!block) continue;
|
|
819
|
+
|
|
820
|
+
const span = readSpan(block, text, 'attrs');
|
|
821
|
+
if (!span) continue;
|
|
822
|
+
|
|
823
|
+
// Occupy the vacant slot and update the annotation
|
|
824
|
+
const occupiedSlot = occupySlot(vacantSlot, quad.object);
|
|
825
|
+
if (!occupiedSlot) continue;
|
|
826
|
+
|
|
827
|
+
// Update the carrier value
|
|
828
|
+
const valueSpan = readSpan(block, text, 'value');
|
|
829
|
+
if (valueSpan) {
|
|
830
|
+
edits.push({ start: valueSpan.start, end: valueSpan.end, text: quad.object.value });
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
// Update the annotation block to restore the predicate token
|
|
834
|
+
const tokens = normalizeAttrsTokens(span.text);
|
|
835
|
+
const predToken = `${vacantSlot.form || ''}${shortenIRI(quad.predicate.value, ctx)}`;
|
|
836
|
+
|
|
837
|
+
// For empty annotation blocks, replace entirely; for non-empty, add if missing
|
|
838
|
+
if (tokens.length === 0) {
|
|
839
|
+
edits.push({ start: span.start, end: span.end, text: `{${predToken}}` });
|
|
840
|
+
} else if (!tokens.includes(predToken)) {
|
|
841
|
+
const updated = [...tokens, predToken];
|
|
842
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
// Mark as consumed and continue
|
|
846
|
+
consumedAdds.add(quadToKeyForOrigin(quad));
|
|
847
|
+
continue;
|
|
747
848
|
|
|
748
849
|
const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
|
|
749
850
|
if (matches.length === 0) continue;
|
|
@@ -758,18 +859,18 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
758
859
|
|
|
759
860
|
if (sameLang.length !== 1) continue;
|
|
760
861
|
const target = sameLang[0].block;
|
|
761
|
-
const vSpan =
|
|
862
|
+
const vSpan = readSpan(target, text, 'value');
|
|
762
863
|
if (!vSpan) continue;
|
|
763
864
|
|
|
764
|
-
const newValue =
|
|
865
|
+
const newValue = sanitizeCarrierValueForBlock(target, quad.object.value);
|
|
765
866
|
edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
|
|
766
867
|
|
|
767
|
-
const aSpan =
|
|
868
|
+
const aSpan = readSpan(target, text, 'attrs');
|
|
768
869
|
if (aSpan && target?.entries?.length) {
|
|
769
|
-
const nextEntries =
|
|
870
|
+
const nextEntries = replaceLangDatatypeEntries(target, quad.object, ctx);
|
|
770
871
|
if (nextEntries) {
|
|
771
872
|
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
772
|
-
edits.push({ start: aSpan.start, end: aSpan.end, text:
|
|
873
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
|
|
773
874
|
}
|
|
774
875
|
}
|
|
775
876
|
|
|
@@ -777,51 +878,42 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
777
878
|
}
|
|
778
879
|
|
|
779
880
|
for (const u of literalUpdates) {
|
|
780
|
-
const span =
|
|
881
|
+
const span = readSpan(u.block, text, 'value');
|
|
781
882
|
if (span) {
|
|
782
|
-
const newValue =
|
|
883
|
+
const newValue = sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
|
|
783
884
|
edits.push({ start: span.start, end: span.end, text: newValue });
|
|
784
885
|
}
|
|
785
886
|
|
|
786
|
-
const aSpan =
|
|
887
|
+
const aSpan = readSpan(u.block, text, 'attrs');
|
|
787
888
|
if (aSpan) {
|
|
788
889
|
if (u.block?.entries?.length) {
|
|
789
|
-
const nextEntries =
|
|
890
|
+
const nextEntries = replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
|
|
790
891
|
if (nextEntries) {
|
|
791
892
|
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
792
893
|
if (nextTokens.length === 0) {
|
|
793
894
|
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
794
895
|
} else {
|
|
795
|
-
edits.push({ start: aSpan.start, end: aSpan.end, text:
|
|
896
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
|
|
796
897
|
}
|
|
797
898
|
}
|
|
798
899
|
} else {
|
|
799
|
-
const tokens =
|
|
800
|
-
const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object);
|
|
900
|
+
const tokens = normalizeAttrsTokens(aSpan.text);
|
|
901
|
+
const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object, ctx);
|
|
801
902
|
if (updated.join(' ') !== tokens.join(' ')) {
|
|
802
903
|
if (updated.length === 0) {
|
|
803
904
|
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
804
905
|
} else {
|
|
805
|
-
edits.push({ start: aSpan.start, end: aSpan.end, text:
|
|
906
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(updated) });
|
|
806
907
|
}
|
|
807
908
|
}
|
|
808
909
|
}
|
|
809
910
|
}
|
|
810
911
|
}
|
|
811
912
|
|
|
812
|
-
const updateAttrsDatatypeLang = (tokens, newLit) => {
|
|
813
|
-
const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
|
|
814
|
-
if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
|
|
815
|
-
const dt = newLit?.datatype?.value;
|
|
816
|
-
if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
817
|
-
return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
|
|
818
|
-
}
|
|
819
|
-
return predicatesAndTypes;
|
|
820
|
-
};
|
|
821
913
|
|
|
822
914
|
if (diff.delete) {
|
|
823
915
|
diff.delete.forEach(q0 => {
|
|
824
|
-
const quad =
|
|
916
|
+
const quad = normalizeQuad(q0);
|
|
825
917
|
if (!quad) return;
|
|
826
918
|
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
827
919
|
|
|
@@ -834,36 +926,60 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
834
926
|
if (isUpdated) return;
|
|
835
927
|
}
|
|
836
928
|
|
|
837
|
-
const key =
|
|
929
|
+
const key = quadToKeyForOrigin(quad);
|
|
838
930
|
let entry = key ? base?.quadIndex?.get(key) : null;
|
|
839
931
|
if (!entry && quad.object?.termType === 'Literal') {
|
|
840
932
|
entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
|
|
841
933
|
}
|
|
934
|
+
|
|
935
|
+
// Mark the semantic slot as vacant for future reuse
|
|
936
|
+
if (entry && entry.slotId) {
|
|
937
|
+
// Capture block information before marking as vacant
|
|
938
|
+
const block = base?.blocks?.get(entry.blockId);
|
|
939
|
+
const blockInfo = block ? {
|
|
940
|
+
id: entry.blockId,
|
|
941
|
+
range: block.range,
|
|
942
|
+
attrsRange: block.attrsRange,
|
|
943
|
+
valueRange: block.valueRange,
|
|
944
|
+
carrierType: block.carrierType,
|
|
945
|
+
subject: block.subject,
|
|
946
|
+
context: block.context
|
|
947
|
+
} : null;
|
|
948
|
+
|
|
949
|
+
const vacantSlot = markSlotAsVacant(entry, quad.object);
|
|
950
|
+
if (vacantSlot && blockInfo) {
|
|
951
|
+
vacantSlot.blockInfo = blockInfo;
|
|
952
|
+
base.quadIndex.set(key, vacantSlot);
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
|
|
842
956
|
const blockId = entry?.blockId || entry;
|
|
843
957
|
if (!blockId) return;
|
|
958
|
+
|
|
844
959
|
const block = base?.blocks?.get(blockId);
|
|
845
|
-
|
|
960
|
+
if (!block) return;
|
|
961
|
+
|
|
962
|
+
const span = readSpan(block, text, 'attrs');
|
|
846
963
|
if (!span) return;
|
|
847
964
|
|
|
965
|
+
// Handle entry removal by index
|
|
848
966
|
if (entry?.entryIndex != null && block?.entries?.length) {
|
|
849
|
-
const nextEntries =
|
|
967
|
+
const nextEntries = removeEntryAt(block, entry.entryIndex);
|
|
850
968
|
if (!nextEntries) return;
|
|
969
|
+
|
|
851
970
|
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
} else {
|
|
855
|
-
edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
|
|
856
|
-
}
|
|
971
|
+
const newText = nextTokens.length === 0 ? '{}' : writeAttrsTokens(nextTokens);
|
|
972
|
+
edits.push({ start: span.start, end: span.end, text: newText });
|
|
857
973
|
return;
|
|
858
974
|
}
|
|
859
975
|
|
|
860
|
-
const tokens =
|
|
976
|
+
const tokens = normalizeAttrsTokens(span.text);
|
|
861
977
|
let updated = tokens;
|
|
862
978
|
let removed = false;
|
|
863
979
|
|
|
864
980
|
if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
|
|
865
981
|
const expectedType = entry.expandedType || quad.object.value;
|
|
866
|
-
({ tokens: updated, removed } =
|
|
982
|
+
({ tokens: updated, removed } = removeOneToken(tokens, t => {
|
|
867
983
|
if (!t.startsWith('.')) return false;
|
|
868
984
|
const raw = t.slice(1);
|
|
869
985
|
return expandIRI(raw, ctx) === expectedType;
|
|
@@ -871,7 +987,7 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
871
987
|
} else {
|
|
872
988
|
const expectedPred = entry?.expandedPredicate || quad.predicate.value;
|
|
873
989
|
const expectedForm = entry?.form;
|
|
874
|
-
({ tokens: updated, removed } =
|
|
990
|
+
({ tokens: updated, removed } = removeOneToken(tokens, t => {
|
|
875
991
|
const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
|
|
876
992
|
if (!m) return false;
|
|
877
993
|
const form = m[1] || '';
|
|
@@ -888,18 +1004,18 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
888
1004
|
return;
|
|
889
1005
|
}
|
|
890
1006
|
|
|
891
|
-
const newAttrs =
|
|
1007
|
+
const newAttrs = writeAttrsTokens(updated);
|
|
892
1008
|
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
893
1009
|
});
|
|
894
1010
|
}
|
|
895
1011
|
|
|
896
1012
|
if (diff.add) {
|
|
897
1013
|
diff.add.forEach(q0 => {
|
|
898
|
-
const quad =
|
|
1014
|
+
const quad = normalizeQuad(q0);
|
|
899
1015
|
if (!quad) return;
|
|
900
1016
|
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
901
1017
|
|
|
902
|
-
if (consumedAdds.has(
|
|
1018
|
+
if (consumedAdds.has(quadToKeyForOrigin(quad))) return;
|
|
903
1019
|
|
|
904
1020
|
const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
|
|
905
1021
|
const anchored = anchors.get(anchorKey) || null;
|
|
@@ -928,7 +1044,7 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
928
1044
|
} else {
|
|
929
1045
|
const full = quad.object.value;
|
|
930
1046
|
const label = shortenIRI(full, ctx);
|
|
931
|
-
edits.push({ start: result.length, end: result.length, text: `\n[${label}]
|
|
1047
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${label}) {?${predShort}}` });
|
|
932
1048
|
}
|
|
933
1049
|
return;
|
|
934
1050
|
}
|
|
@@ -948,14 +1064,14 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
948
1064
|
if (quad.object.termType === 'NamedNode') {
|
|
949
1065
|
const full = quad.object.value;
|
|
950
1066
|
const label = shortenIRI(full, ctx);
|
|
951
|
-
edits.push({ start: result.length, end: result.length, text: `\n[${label}](
|
|
1067
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${shortenIRI(full, ctx)} ?${predShort}}` });
|
|
952
1068
|
return;
|
|
953
1069
|
}
|
|
954
1070
|
}
|
|
955
1071
|
|
|
956
|
-
const span =
|
|
1072
|
+
const span = readSpan(targetBlock, text, 'attrs');
|
|
957
1073
|
if (!span) return;
|
|
958
|
-
const tokens =
|
|
1074
|
+
const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
|
|
959
1075
|
|
|
960
1076
|
if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
|
|
961
1077
|
const typeShort = shortenIRI(quad.object.value, ctx);
|
|
@@ -963,7 +1079,7 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
963
1079
|
if (!typeToken) return;
|
|
964
1080
|
if (tokens.includes(typeToken)) return;
|
|
965
1081
|
const updated = [...tokens, typeToken];
|
|
966
|
-
edits.push({ start: span.start, end: span.end, text:
|
|
1082
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
967
1083
|
return;
|
|
968
1084
|
}
|
|
969
1085
|
|
|
@@ -974,7 +1090,7 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
974
1090
|
if (!predToken) return;
|
|
975
1091
|
if (tokens.includes(predToken)) return;
|
|
976
1092
|
const updated = [...tokens, predToken];
|
|
977
|
-
edits.push({ start: span.start, end: span.end, text:
|
|
1093
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
978
1094
|
});
|
|
979
1095
|
}
|
|
980
1096
|
|
|
@@ -983,7 +1099,43 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
983
1099
|
result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
|
|
984
1100
|
});
|
|
985
1101
|
|
|
1102
|
+
// Extract vacant slots before reparsing to preserve them
|
|
1103
|
+
const vacantSlots = new Map();
|
|
1104
|
+
base?.quadIndex?.forEach((slot, key) => {
|
|
1105
|
+
if (slot.isVacant) {
|
|
1106
|
+
vacantSlots.set(key, slot);
|
|
1107
|
+
}
|
|
1108
|
+
});
|
|
1109
|
+
|
|
986
1110
|
const reparsed = parse(result, { context: options.context || {} });
|
|
1111
|
+
|
|
1112
|
+
// Merge vacant slots back into the new origin
|
|
1113
|
+
vacantSlots.forEach((vacantSlot, key) => {
|
|
1114
|
+
// Check if the block still exists in the new origin
|
|
1115
|
+
if (!reparsed.origin.blocks.has(vacantSlot.blockId)) {
|
|
1116
|
+
// Recreate the empty block for the vacant slot using preserved info
|
|
1117
|
+
const blockInfo = vacantSlot.blockInfo;
|
|
1118
|
+
if (blockInfo) {
|
|
1119
|
+
const emptyBlock = {
|
|
1120
|
+
id: blockInfo.id,
|
|
1121
|
+
range: blockInfo.range || { start: 0, end: 0 },
|
|
1122
|
+
attrsRange: blockInfo.attrsRange,
|
|
1123
|
+
valueRange: blockInfo.valueRange,
|
|
1124
|
+
carrierType: blockInfo.carrierType || 'span',
|
|
1125
|
+
subject: blockInfo.subject || '',
|
|
1126
|
+
types: [],
|
|
1127
|
+
predicates: [],
|
|
1128
|
+
entries: [], // Empty entries - just {} annotation
|
|
1129
|
+
context: blockInfo.context || { ...ctx }
|
|
1130
|
+
};
|
|
1131
|
+
reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
// Merge the vacant slot back
|
|
1136
|
+
reparsed.origin.quadIndex.set(key, vacantSlot);
|
|
1137
|
+
});
|
|
1138
|
+
|
|
987
1139
|
return { text: result, origin: reparsed.origin };
|
|
988
1140
|
}
|
|
989
1141
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.5",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|