npm - mdld-parse - Versions diffs - 0.5.5 → 0.6.0 - Mend

mdld-parse 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -22,10 +22,10 @@ Energy level: [8] {my:energyLevel ^^xsd:integer}
 Met [Sam] {+my:sam .my:Person ?my:attendee} on my regular walk at [Central Park] {+my:central-park ?my:location .my:Place label @en} and talked about [Sunny] {my:weather} weather.
-Activities: {?my:hasActivity .my:Activity label}
+Activities:
-- Walking {=#walking}
-- Reading {=#reading}
+- **Walking** {+ex:walking ?my:hasActivity .my:Activity label}
+- **Reading** {+ex:reading ?my:hasActivity .my:Activity label}
 ```
@@ -59,7 +59,7 @@ my:central-park a my:Place;
 ```
-Read the [FULL SPEC](./docs/Spec/Spec.md).
+Read the [FULL SPEC](./spec/Spec.md).
 ## Core Features
@@ -69,7 +69,6 @@ Read the [FULL SPEC](./docs/Spec/Spec.md).
 - **Four predicate forms**: `p` (S→L), `?p` (S→O), `!p` (O→S)
 - **Type declarations**: `.Class` for rdf:type triples
 - **Datatypes & language**: `^^xsd:date` and `@en` support
-- **Lists**: Explicit subject declarations and numbered ordered lists with `rdf:List` support
 - **Fragments**: Built-in document structuring with `{=#fragment}`
 - **Round-trip serialization**: Markdown ↔ RDF ↔ Markdown preserves structure
@@ -213,14 +212,15 @@ ex:armstrong a prov:Person .
 ### Lists
-Lists require explicit subjects per item.
+Lists are pure Markdown structure. Each list item requires explicit annotations:
 ```markdown
 # Recipe {=ex:recipe}
-Ingredients: {?ex:ingredient .ex:Ingredient}
-- Flour {=ex:flour label}
-- Water {=ex:water label}
+Ingredients:
+- **Flour** {+ex:flour ?ex:ingredient .ex:Ingredient label}
+- **Water** {+ex:water ?ex:ingredient .ex:Ingredient label}
 ```
 ```turtle
@@ -229,6 +229,11 @@ ex:flour a ex:Ingredient ; rdfs:label "Flour" .
 ex:water a ex:Ingredient ; rdfs:label "Water" .
 ```
+**Key Rules:**
+- No semantic propagation from list scope
+- Each item must have explicit annotations
+- Use `+IRI` to maintain subject chaining for repeated object properties
 ### Code Blocks
 Code blocks are value carriers:
@@ -503,7 +508,7 @@ Only specific markdown elements can carry semantic values:
 **Block:**
 - Headings (`# Title`)
-- List items (`- item`, `1. item`) (single-level)
+- List items (`- item`, `1. item`) — pure Markdown structure
 - Blockquotes (`> quote`)
 - Code blocks (` ```lang `)
@@ -579,14 +584,14 @@ Therefore, the algebra is **closed**.
 # Meeting Notes {=alice:meeting-2024-01-15 .alice:Meeting}
-Attendees: {?alice:attendee label}
+Attendees:
-- Alice {=alice:alice}
-- Bob {=alice:bob}
+- **Alice** {+alice:alice ?alice:attendee label}
+- **Bob** {+alice:bob ?alice:attendee label}
-Action items: {?alice:actionItem label}
+Action items:
-- Review proposal {=alice:task-1}
+- **Review proposal** {+alice:task-1 ?alice:actionItem label}
 ```
 ### Developer Documentation
@@ -630,7 +635,7 @@ Tests validate:
 - Subject declaration and context
 - All predicate forms (p, ?p, !p)
 - Datatypes and language tags
-- List processing
+- Explicit list item annotations
 - Code blocks and blockquotes
 - Round-trip serialization

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "mdld-parse",
-	"version": "0.5.5",
+	"version": "0.6.0",
 	"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
 	"type": "module",
 	"main": "index.js",

package/src/applyDiff.js CHANGED Viewed

@@ -15,25 +15,93 @@ import {
     addSoftFragmentToken,
     removeSoftFragmentToken,
     objectSignature,
-    expandIRI
+    expandIRI,
+    DataFactory
 } from './utils.js';
 function getBlockById(base, blockId) {
-    return blockId ? base?.blocks?.get(blockId) : null;
+    return blockId ? base?.quadMap?.get(blockId) : null;
 }
 function getEntryByQuadKey(base, quadKey) {
-    return quadKey ? base?.quadIndex?.get(quadKey) : null;
+    return quadKey ? base?.quadMap?.get(quadKey) : null;
+}
+// Helper functions for cleaner term type checking
+function isLiteral(term) {
+    return term?.termType === 'Literal';
+}
+function isNamedNode(term) {
+    return term?.termType === 'NamedNode';
+}
+function isRdfType(term) {
+    return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
+}
+function createAnnotationForQuad(quad, ctx) {
+    const predShort = shortenIRI(quad.predicate.value, ctx);
+    if (isLiteral(quad.object)) {
+        const value = String(quad.object.value ?? '');
+        const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
+        return { text: `[${value}] {${ann}}`, isLiteral: true };
+    } else if (isNamedNode(quad.object)) {
+        const objectShort = shortenIRI(quad.object.value, ctx);
+        const objectAnn = createObjectAnnotation(objectShort, predShort);
+        return { text: objectAnn, isLiteral: false };
+    }
+    return null;
+}
+function createSubjectBlockForQuad(quad, ctx) {
+    const subjectShort = shortenIRI(quad.subject.value, ctx);
+    const predShort = shortenIRI(quad.predicate.value, ctx);
+    const subjectName = extractLocalName(quad.subject.value);
+    if (isNamedNode(quad.object)) {
+        // IRI object: create object reference
+        const objectShort = shortenIRI(quad.object.value, ctx);
+        return { text: `\n\n# ${subjectName.charAt(0).toUpperCase() + subjectName.slice(1)} {=${subjectShort}}\n[${objectShort}] {${predShort}}\n`, isNewSubject: true };
+    } else {
+        // Literal object: create property on separate line
+        const value = String(quad.object.value ?? '');
+        const annotation = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
+        return { text: `\n\n# ${subjectName.charAt(0).toUpperCase() + subjectName.slice(1)} {=${subjectShort}}\n[${value}] {${annotation}}\n`, isNewSubject: true };
+    }
+}
+function extractLocalName(iri) {
+    return iri.split('/').pop() || iri.split('#').pop() || iri;
 }
 function isValidQuad(quad) {
     return quad && quad.subject && quad.predicate && quad.object;
 }
+function normalizeDiffQuads(quads, ctx) {
+    // Use DataFactory.fromQuad for proper RDF/JS compatibility
+    // But first expand any CURIEs in the quads to ensure proper matching
+    return quads.map(quad => {
+        // Expand CURIEs to full IRIs before normalization
+        const expandedQuad = {
+            subject: quad.subject.termType === 'NamedNode'
+                ? { ...quad.subject, value: expandIRI(quad.subject.value, ctx) }
+                : quad.subject,
+            predicate: quad.predicate.termType === 'NamedNode'
+                ? { ...quad.predicate, value: expandIRI(quad.predicate.value, ctx) }
+                : quad.predicate,
+            object: quad.object,
+            graph: quad.graph
+        };
+        return DataFactory.fromQuad(expandedQuad);
+    }).filter(isValidQuad);
+}
 function createLiteralAnnotation(value, predicate, language, datatype, ctx) {
     let ann = predicate;
     if (language) ann += ` @${language}`;
-    else if (datatype?.value && datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
+    else if (datatype?.value && datatype.value !== DataFactory.literal('').datatype.value) {
         ann += ` ^^${shortenIRI(datatype.value, ctx)}`;
     }
     return ann;
@@ -126,23 +194,24 @@ function removeTokenFromSlot(entry, tokens, ctx, quad) {
 }
 function addTokenToSlot(tokens, ctx, quad) {
-    if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
+    // Use cleaner helper functions
+    if (isRdfType(quad.predicate) && isNamedNode(quad.object)) {
         const typeShort = shortenIRI(quad.object.value, ctx);
         const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
         if (typeToken && !tokens.includes(typeToken)) {
             return [...tokens, typeToken];
         }
-    } else if (quad.object.termType === 'NamedNode') {
+    } else if (isNamedNode(quad.object)) {
         const objectShort = shortenIRI(quad.object.value, ctx);
         const isSoftFragment = quad.object.value.includes('#');
         const fragment = isSoftFragment ? quad.object.value.split('#')[1] : null;
-        if (isSoftFragment) {
-            return addSoftFragmentToken(tokens, fragment);
+        if (fragment) {
+            return addSoftFragmentToken(tokens, objectShort, fragment);
         } else {
             return addObjectToken(tokens, objectShort);
         }
-    } else if (quad.object.termType === 'Literal') {
+    } else if (isLiteral(quad.object)) {
         const predShort = shortenIRI(quad.predicate.value, ctx);
         if (!tokens.includes(predShort)) {
             return [...tokens, predShort];
@@ -179,9 +248,9 @@ export function applyDiff({ text, diff, origin, options = {} }) {
 function planOperations(diff, base, ctx) {
-    // Normalize quads once
-    const normAdds = (diff.add || []).map(normalizeQuad).filter(isValidQuad);
-    const normDeletes = (diff.delete || []).map(normalizeQuad).filter(isValidQuad);
+    // Normalize quads using DataFactory for proper RDF/JS compatibility
+    const normAdds = normalizeDiffQuads(diff.add || [], ctx);
+    const normDeletes = normalizeDiffQuads(diff.delete || [], ctx);
     const plan = {
         literalUpdates: [],
@@ -206,8 +275,7 @@ function planOperations(diff, base, ctx) {
         const key = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
         const quadKey = quadToKeyForOrigin(quad);
         const entry = getEntryByQuadKey(base, quadKey);
-        const blockId = entry?.blockId || entry;
-        const block = getBlockById(base, blockId);
+        const block = entry; // In unified structure, entry is the block
         if (block?.attrsRange) {
             anchors.set(key, { block, entry });
         }
@@ -215,18 +283,18 @@ function planOperations(diff, base, ctx) {
     // Detect literal updates early
     for (const deleteQuad of normDeletes) {
-        if (deleteQuad.object.termType !== 'Literal') continue;
+        if (!isLiteral(deleteQuad.object)) continue;
         const k = JSON.stringify([deleteQuad.subject.value, deleteQuad.predicate.value]);
         const candidates = addBySP.get(k) || [];
         const addQuad = candidates.find(x =>
-            x?.object?.termType === 'Literal' && !plan.consumedAdds.has(quadToKeyForOrigin(x))
+            isLiteral(x?.object) && !plan.consumedAdds.has(quadToKeyForOrigin(x))
         );
         if (!addQuad) continue;
         const entry = resolveOriginEntry(deleteQuad, base);
-        const block = entry ? getBlockById(base, entry.blockId || entry) : null;
+        const block = entry; // In unified structure, the entry is the block
         if (block) {
             plan.literalUpdates.push({ deleteQuad, addQuad, entry, block });
@@ -236,13 +304,13 @@ function planOperations(diff, base, ctx) {
     // Find vacant slot occupations
     for (const quad of normAdds) {
-        if (quad.object.termType !== 'Literal') continue;
+        if (!isLiteral(quad.object)) continue;
         if (plan.consumedAdds.has(quadToKeyForOrigin(quad))) continue;
-        const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
+        const vacantSlot = findVacantSlot(base?.quadMap, quad.subject, quad.predicate);
         if (!vacantSlot) continue;
-        const block = base?.blocks?.get(vacantSlot.blockId);
+        const block = vacantSlot; // In unified structure, the slot is the block
         if (block) {
             plan.vacantSlotOccupations.push({ quad, vacantSlot, block });
             plan.consumedAdds.add(quadToKeyForOrigin(quad));
@@ -251,7 +319,7 @@ function planOperations(diff, base, ctx) {
     // Plan remaining deletes
     for (const quad of normDeletes) {
-        if (quad.object.termType === 'Literal') {
+        if (isLiteral(quad.object)) {
             const isUpdated = plan.literalUpdates.some(u =>
                 u.deleteQuad.subject.value === quad.subject.value &&
                 u.deleteQuad.predicate.value === quad.predicate.value &&
@@ -261,7 +329,7 @@ function planOperations(diff, base, ctx) {
         }
         const entry = resolveOriginEntry(quad, base);
-        const block = entry ? getBlockById(base, entry.blockId || entry) : null;
+        const block = entry; // In unified structure, entry is the block
         if (block) {
             plan.deletes.push({ quad, entry, block });
         }
@@ -348,7 +416,7 @@ function materializeEdits(plan, text, ctx, base) {
             };
             vacantSlot.blockInfo = blockInfo;
             const key = quadToKeyForOrigin(quad);
-            if (key) base.quadIndex.set(key, vacantSlot);
+            if (key) base.quadMap.set(key, vacantSlot);
         }
         const span = readSpan(block, text, 'attrs');
@@ -382,56 +450,45 @@ function materializeEdits(plan, text, ctx, base) {
             continue;
         }
-        if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
+        if (isLiteral(quad.object) || isNamedNode(quad.object)) {
             if (!targetBlock) {
-                const predShort = shortenIRI(quad.predicate.value, ctx);
-                if (quad.object.termType === 'Literal') {
-                    const value = String(quad.object.value ?? '');
-                    const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
-                    edits.push({ start: text.length, end: text.length, text: `\n[${value}] {${ann}}` });
+                // No target block - check if subject already exists in document
+                const subjectExists = Array.from(base?.quadMap?.values() || [])
+                    .some(block => block.subject?.value === quad.subject.value);
+                let annotation;
+                if (!subjectExists && isNamedNode(quad.object)) {
+                    // New subject with IRI object - create subject block
+                    annotation = createSubjectBlockForQuad(quad, ctx);
+                } else if (subjectExists) {
+                    // Existing subject - create simple annotation
+                    annotation = createAnnotationForQuad(quad, ctx);
                 } else {
-                    const objectShort = shortenIRI(quad.object.value, ctx);
-                    edits.push({ start: text.length, end: text.length, text: createObjectAnnotation(objectShort, predShort) });
+                    // New subject with literal - create subject block
+                    annotation = createSubjectBlockForQuad(quad, ctx);
                 }
-                continue;
-            }
-            const span = readSpan(targetBlock, text, 'attrs');
-            if (!span) continue;
-            // Check if this is a subject-only block (like {=ex:order-123})
-            const tokens = normalizeAttrsTokens(span.text);
-            const hasSubjectToken = tokens.some(t => t.startsWith('='));
-            const hasPredicateTokens = tokens.some(t => !t.startsWith('=') && !t.startsWith('.'));
-            if (tokens.length === 1 && tokens[0].startsWith('=')) {
-                // This is a subject-only block, create new annotation
-                const predShort = shortenIRI(quad.predicate.value, ctx);
-                if (quad.object.termType === 'Literal') {
-                    const value = String(quad.object.value ?? '');
-                    const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
-                    edits.push({ start: text.length, end: text.length, text: `\n[${value}] {${ann}}` });
-                } else {
-                    const objectShort = shortenIRI(quad.object.value, ctx);
-                    edits.push({ start: text.length, end: text.length, text: createObjectAnnotation(objectShort, predShort) });
+                if (annotation) {
+                    edits.push({ start: text.length, end: text.length, text: annotation.text });
                 }
                 continue;
             }
-            // Normal annotation block, add tokens
-            const existingTokens = blockTokensFromEntries(targetBlock) || tokens;
-            let updated = addTokenToSlot(existingTokens, ctx, quad);
+            // Insert annotation after target block's range
+            const annotation = createAnnotationForQuad(quad, ctx);
+            if (annotation) {
+                // Find the end of the target block's content, not just its range
+                const targetBlockEnd = targetBlock.range.end;
+                let insertPos = targetBlockEnd;
-            // For literal predicates with datatypes, we need to add datatype token too
-            if (quad.object.termType === 'Literal' && quad.object.datatype && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
-                const datatypeToken = `^^${shortenIRI(quad.object.datatype.value, ctx)}`;
-                if (!updated.includes(datatypeToken)) {
-                    updated = [...updated, datatypeToken];
+                // Skip past the target block's content to find the right insertion point
+                while (insertPos < text.length && text[insertPos] !== '\n') {
+                    insertPos++;
                 }
-            }
-            if (updated.length !== existingTokens.length) {
-                edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
+                // Insert after the target block's content
+                const finalInsertPos = insertPos < text.length ? insertPos : text.length;
+                edits.push({ start: finalInsertPos, end: finalInsertPos, text: `\n${annotation.text}` });
             }
         }
     }
@@ -450,7 +507,7 @@ function applyEdits(text, edits, ctx, base) {
     // Extract vacant slots before reparsing
     const vacantSlots = new Map();
-    base?.quadIndex?.forEach((slot, key) => {
+    base?.quadMap?.forEach((slot, key) => {
         if (slot.isVacant) vacantSlots.set(key, slot);
     });
@@ -458,7 +515,7 @@ function applyEdits(text, edits, ctx, base) {
     // Merge vacant slots back
     vacantSlots.forEach((vacantSlot, key) => {
-        if (!reparsed.origin.blocks.has(vacantSlot.blockId) && vacantSlot.blockInfo) {
+        if (!reparsed.origin.quadMap.has(vacantSlot.id) && vacantSlot.blockInfo) {
             const { blockInfo } = vacantSlot;
             const emptyBlock = {
                 id: blockInfo.id,
@@ -469,12 +526,11 @@ function applyEdits(text, edits, ctx, base) {
                 subject: blockInfo.subject || '',
                 types: [],
                 predicates: [],
-                entries: [],
                 context: blockInfo.context || { ...ctx }
             };
-            reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
+            reparsed.origin.quadMap.set(vacantSlot.id, emptyBlock);
         }
-        reparsed.origin.quadIndex.set(key, vacantSlot);
+        reparsed.origin.quadMap.set(key, vacantSlot);
     });
     return { text: result, origin: reparsed.origin };
@@ -483,11 +539,11 @@ function applyEdits(text, edits, ctx, base) {
 // Helper functions for origin lookup
 function resolveOriginEntry(quad, base) {
     const key = quadToKeyForOrigin(quad);
-    let entry = key ? base?.quadIndex?.get(key) : null;
+    let entry = key ? base?.quadMap?.get(key) : null;
-    if (!entry && quad.object?.termType === 'Literal') {
+    if (!entry && isLiteral(quad.object)) {
         // Fallback: search by value
-        for (const [k, e] of base?.quadIndex || []) {
+        for (const [k, e] of base?.quadMap || []) {
             const parsed = parseQuadIndexKey(k);
             if (parsed && parsed.s === quad.subject.value &&
                 parsed.p === quad.predicate.value &&
@@ -507,12 +563,21 @@ function findTargetBlock(quad, base, anchors) {
     const anchored = anchors.get(anchorKey);
     if (anchored?.block) return anchored.block;
-    // Block affinity: prefer same block, then same subject
-    for (const [, block] of base?.blocks || []) {
-        if (block.subject === quad.subject.value && block.attrsRange) {
-            return block;
-        }
+    // Find the best position within the subject's section
+    // Look for blocks with the same subject and sort by position
+    const subjectBlocks = Array.from(base?.quadMap?.values() || [])
+        .filter(block => block.subject?.value === quad.subject.value)
+        .sort((a, b) => a.range.start - b.range.start);
+    if (subjectBlocks.length === 0) return null;
+    // Strategy: Find the last block with attrsRange to maintain consistency
+    // For identical subject blocks, prefer the first one to avoid creating duplicates
+    const blocksWithAttrs = subjectBlocks.filter(block => block.attrsRange);
+    if (blocksWithAttrs.length > 0) {
+        return blocksWithAttrs[blocksWithAttrs.length - 1]; // Return last matching block
     }
-    return null;
+    // Fallback: return the last block in the subject's section
+    return subjectBlocks[subjectBlocks.length - 1];
 }