npm - mdld-parse - Versions diffs - 0.5.2 → 0.5.3 - Mend

mdld-parse 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +142 -27
package/package.json +3 -2
package/src/{serialize.js → applyDiff.js} +1 -1
package/src/generate.js +248 -0
package/src/index.js +3 -1
package/src/locate.js +92 -0
package/src/parse.js +6 -1

package/README.md CHANGED Viewed

@@ -11,26 +11,55 @@
 MD-LD allows you to author RDF graphs directly in Markdown using explicit `{...}` annotations:
 ```markdown
-# Apollo 11 {=ex:apollo11 .SpaceMission}
+[my] <tag:alice@example.com,2026:>
-Launch: [1969-07-16] {startDate ^^xsd:date}
-Crew: [Neil Armstrong] {+ex:armstrong ?crewMember name}
-Description: [First crewed Moon landing] {description}
+# 2024-07-18 {=my:journal-2024-07-18 .my:Event my:date ^^xsd:date}
+## A good day {label}
+Mood: [Happy] {my:mood}
+Energy level: [8] {my:energyLevel ^^xsd:integer}
+Met [Sam] {+my:sam .my:Person ?my:attendee} on my regular walk at [Central Park] {+my:central-park ?my:location .my:Place label @en} and talked about [Sunny] {my:weather} weather.
+Activities: {?my:hasActivity .my:Activity label}
+- Walking {=#walking}
+- Reading {=#reading}
-[Section] {+#overview ?hasPart}
-Overview: [Mission summary] {description}
 ```
 Generates valid RDF triples:
 ```turtle
-ex:apollo11 a schema:SpaceMission ;
-  schema:startDate "1969-07-16"^^xsd:date ;
-  schema:crewMember ex:armstrong ;
-  schema:description "First crewed Moon landing" .
-ex:armstrong schema:name "Neil Armstrong" .
-```
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
+@prefix sh: <http://www.w3.org/ns/shacl#>.
+@prefix prov: <http://www.w3.org/ns/prov#>.
+@prefix ex: <http://example.org/>.
+@prefix my: <tag:alice@example.com,2026:>.
+my:journal-2024-07-18 a my:Event;
+    my:date "2024-07-18"^^xsd:date;
+    rdfs:label "A good day";
+    my:mood "Happy";
+    my:energyLevel 8;
+    my:attendee my:sam;
+    my:location my:central-park;
+    my:weather "Sunny";
+    my:hasActivity <tag:alice@example.com,2026:journal-2024-07-18#walking>, <tag:alice@example.com,2026:journal-2024-07-18#reading>.
+my:sam a my:Person.
+my:central-park a my:Place;
+    rdfs:label "Central Park"@en.
+<tag:alice@example.com,2026:journal-2024-07-18#walking> a my:Activity;
+    rdfs:label "Walking".
+<tag:alice@example.com,2026:journal-2024-07-18#reading> a my:Activity;
+    rdfs:label "Reading".
+```
+Read the [FULL SPEC](./docs/Spec/Spec.md).
 ## Core Features
@@ -329,7 +358,7 @@ console.log(result.quads);
 // ]
 ```
-### `serialize({ text, diff, origin, options })`
+### `applyDiff({ text, diff, origin, options })`
 Apply RDF changes back to markdown with proper positioning.
@@ -364,7 +393,7 @@ const newQuad = {
   object: { termType: 'Literal', value: '2024-01-01' }
 };
-const updated = serialize({
+const updated = applyDiff({
   text: original,
   diff: { add: [newQuad] },
   origin: result.origin,
@@ -378,6 +407,92 @@ console.log(updated.text);
 // [2024-01-01] {datePublished}
 ```
+### `generate(quads, context)`
+Generate deterministic MDLD from RDF quads with origin tracking.
+**Parameters:**
+- `quads` (array) — Array of RDF/JS Quads to convert
+- `context` (object, optional) — Prefix mappings (default: `{}`)
+  - Merged with DEFAULT_CONTEXT for proper CURIE shortening
+  - Only user-defined prefixes are rendered in output
+**Returns:** `{ text, origin, context }`
+- `text` — Generated MDLD markdown
+- `origin` — Origin tracking object with:
+  - `blocks` — Map of block IDs to source locations
+  - `quadIndex` — Map of quads to block IDs
+- `context` — Final context used (includes defaults)
+**Example:**
+```javascript
+const quads = [
+  {
+    subject: { termType: 'NamedNode', value: 'http://example.org/article' },
+    predicate: { termType: 'NamedNode', value: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' },
+    object: { termType: 'NamedNode', value: 'http://example.org/Article' }
+  },
+  {
+    subject: { termType: 'NamedNode', value: 'http://example.org/article' },
+    predicate: { termType: 'NamedNode', value: 'http://schema.org/author' },
+    object: { termType: 'NamedNode', value: 'http://example.org/alice' }
+  }
+];
+const result = generate(quads, {
+  ex: 'http://example.org/',
+  schema: 'http://schema.org/'
+});
+console.log(result.text);
+// # Article {=ex:article .ex:Article}
+//
+// > alice {+ex:alice ?schema:author}
+```
+### `locate(quad, origin, text, context)`
+Locate the precise text range of a quad in MDLD text using origin tracking.
+**Parameters:**
+- `quad` (object) — The quad to locate (subject, predicate, object)
+- `origin` (object, optional) — Origin object containing blocks and quadIndex
+- `text` (string, optional) — MDLD text (auto-parsed if origin not provided)
+- `context` (object, optional) — Context for parsing when text needs to be parsed
+**Returns:** `{ blockId, entryIndex, range, content, blockRange, carrierType, isVacant }` or `null`
+- `blockId` — ID of the containing block
+- `entryIndex` — Position within block entries
+- `range` — Precise character range of the quad content
+- `content` — Actual text content at that range
+- `blockRange` — Full range of the containing block
+- `carrierType` — Type of carrier (heading, blockquote, list, span)
+- `isVacant` — Whether the slot is marked as vacant
+**Example:**
+```javascript
+import { parse, locate } from './src/index.js';
+const result = parse(mdldText, { context: { ex: 'http://example.org/' } });
+const quad = result.quads[0]; // Find a quad to locate
+// Pattern 1: With origin (most efficient)
+const location1 = locate(quad, result.origin, mdldText);
+// Pattern 2: Auto-parse text (convenient)
+const location2 = locate(quad, null, mdldText, { ex: 'http://example.org/' });
+console.log(location1.range); // { start: 38, end: 44 }
+console.log(location1.content); // " Alice"
+console.log(location1.carrierType); // "blockquote"
+```
 ## Value Carriers
 Only specific markdown elements can carry semantic values:
@@ -464,14 +579,14 @@ Therefore, the algebra is **closed**.
 ```markdown
 [alice] <tag:alice@example.com,2026:>
-# Meeting Notes {=alice:meeting-2024-01-15 .Meeting}
+# Meeting Notes {=alice:meeting-2024-01-15 .alice:Meeting}
-Attendees: {?attendee name}
+Attendees: {?alice:attendee label}
 - Alice {=alice:alice}
 - Bob {=alice:bob}
-Action items: {?actionItem name}
+Action items: {?alice:actionItem label}
 - Review proposal {=alice:task-1}
 ```
@@ -479,14 +594,14 @@ Action items: {?actionItem name}
 ### Developer Documentation
 ````markdown
-# API Endpoint {=api:/users/:id .APIEndpoint}
+# API Endpoint {=api:/users/:id .api:Endpoint}
-[GET] {method}
-[/users/:id] {path}
+[GET] {api:method}
+[/users/:id] {api:path}
 Example:
-```bash {=api:/users/:id#example .CodeExample text}
+```bash {=api:/users/:id#example .api:CodeExample api:code}
 curl https://api.example.com/users/123
 ```
 ````
@@ -496,13 +611,13 @@ curl https://api.example.com/users/123
 ```markdown
 [alice] <tag:alice@example.com,2026:>
-# Paper {=alice:paper-semantic-markdown .ScholarlyArticle}
+# Paper {=alice:paper-semantic-markdown .alice:ScholarlyArticle}
-[Semantic Web] {about}
-[Alice Johnson] {=alice:alice-johnson ?author}
-[2024-01] {datePublished ^^xsd:gYearMonth}
+[Semantic Web] {label}
+[Alice Johnson] {=alice:alice-johnson ?alice:author}
+[2024-01] {alice:datePublished ^^xsd:gYearMonth}
-> This paper explores semantic markup in Markdown. {abstract @en}
+> This paper explores semantic markup in Markdown. {comment @en}
 ```
 ## Testing

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "mdld-parse",
-	"version": "0.5.2",
+	"version": "0.5.3",
 	"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
 	"type": "module",
 	"main": "index.js",
@@ -12,7 +12,8 @@
 		"src"
 	],
 	"scripts": {
-		"test": "node tests/index.js"
+		"test": "node tests/index.js",
+		"dev": "pnpx live-server"
 	},
 	"keywords": [
 		"mdld",

package/src/{serialize.js → applyDiff.js} RENAMED Viewed

@@ -158,7 +158,7 @@ function markEntryAsVacant(entry, quad) {
     return null;
 }
-export function serialize({ text, diff, origin, options = {} }) {
+export function applyDiff({ text, diff, origin, options = {} }) {
     if (!diff || (!diff.add?.length && !diff.delete?.length)) {
         const reparsed = parse(text, { context: options.context || {} });
         return { text, origin: reparsed.origin };

package/src/generate.js ADDED Viewed

@@ -0,0 +1,248 @@
+import { shortenIRI, expandIRI, quadIndexKey, createSlotInfo, DEFAULT_CONTEXT } from './utils.js';
+function extractLocalName(iri) {
+    const separators = ['#', '/', ':'];
+    for (const sep of separators) {
+        const lastSep = iri.lastIndexOf(sep);
+        if (lastSep !== -1 && lastSep < iri.length - 1) {
+            return iri.substring(lastSep + 1);
+        }
+    }
+    return iri;
+}
+/**
+ * Generate deterministic MDLD from RDF quads
+ * Purpose: TTL→MDLD conversion with canonical structure
+ * Input: RDF quads + context
+ * Output: MDLD text + origin + context
+ */
+export function generate(quads, context = {}) {
+    const fullContext = { ...DEFAULT_CONTEXT, ...context };
+    const normalizedQuads = normalizeAndSortQuads(quads);
+    const subjectGroups = groupQuadsBySubject(normalizedQuads);
+    const { text, blocks, quadIndex } = buildDeterministicMDLD(subjectGroups, fullContext);
+    return {
+        text,
+        origin: { blocks, quadIndex },
+        context: fullContext
+    };
+}
+function normalizeAndSortQuads(quads) {
+    return quads
+        .map(quad => ({
+            subject: { termType: quad.subject.termType, value: quad.subject.value },
+            predicate: { termType: quad.predicate.termType, value: quad.predicate.value },
+            object: quad.object.termType === 'Literal'
+                ? {
+                    termType: 'Literal',
+                    value: quad.object.value,
+                    language: quad.object.language || null,
+                    datatype: quad.object.datatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' }
+                }
+                : { termType: 'NamedNode', value: quad.object.value }
+        }))
+        .sort((a, b) => {
+            // Deterministic sorting: subject -> predicate -> object
+            const sComp = a.subject.value.localeCompare(b.subject.value);
+            if (sComp !== 0) return sComp;
+            const pComp = a.predicate.value.localeCompare(b.predicate.value);
+            if (pComp !== 0) return pComp;
+            const oA = a.object.termType === 'Literal' ? a.object.value : a.object.value;
+            const oB = b.object.termType === 'Literal' ? b.object.value : b.object.value;
+            return oA.localeCompare(oB);
+        });
+}
+function groupQuadsBySubject(quads) {
+    const groups = new Map();
+    for (const quad of quads) {
+        if (!groups.has(quad.subject.value)) {
+            groups.set(quad.subject.value, []);
+        }
+        groups.get(quad.subject.value).push(quad);
+    }
+    return groups;
+}
+function buildDeterministicMDLD(subjectGroups, context) {
+    let text = '';
+    let currentPos = 0;
+    const blocks = new Map();
+    const quadIndex = new Map();
+    // Add prefixes first (deterministic order), but exclude default context prefixes
+    const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
+    for (const [prefix, namespace] of sortedPrefixes) {
+        // Skip default context prefixes - they're implicit in MDLD
+        if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
+            const prefixDecl = `[${prefix}] <${namespace}>\n`;
+            const blockId = generateBlockId();
+            blocks.set(blockId, {
+                id: blockId,
+                range: { start: currentPos, end: currentPos + prefixDecl.length },
+                subject: null,
+                entries: [{ kind: 'prefix', prefix, namespace, raw: prefixDecl.trim() }],
+                carrierType: 'prefix'
+            });
+            text += prefixDecl;
+            currentPos += prefixDecl.length;
+        }
+    }
+    if (sortedPrefixes.length > 0) {
+        text += '\n';
+        currentPos += 1;
+    }
+    // Process subjects in deterministic order
+    const sortedSubjects = Array.from(subjectGroups.keys()).sort();
+    for (const subjectIRI of sortedSubjects) {
+        const subjectQuads = subjectGroups.get(subjectIRI);
+        const shortSubject = shortenIRI(subjectIRI, context);
+        // Separate types, literals, and objects
+        const types = subjectQuads.filter(q => q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
+        const literals = subjectQuads.filter(q => q.object.termType === 'Literal' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
+        const objects = subjectQuads.filter(q => q.object.termType === 'NamedNode' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
+        // Generate heading
+        const localSubjectName = extractLocalName(subjectIRI);
+        const typeAnnotations = types.length > 0
+            ? ' ' + types.map(t => '.' + extractLocalName(t.object.value)).sort().join(' ')
+            : '';
+        const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
+        const blockId = generateBlockId();
+        const headingBlock = {
+            id: blockId,
+            range: { start: currentPos, end: currentPos + headingText.length },
+            subject: subjectIRI,
+            entries: [
+                { kind: 'subject', raw: `=${shortSubject}`, expandedSubject: subjectIRI },
+                ...types.map((t, i) => ({
+                    kind: 'type',
+                    raw: '.' + extractLocalName(t.object.value),
+                    expandedType: t.object.value,
+                    entryIndex: i
+                }))
+            ],
+            carrierType: 'heading'
+        };
+        blocks.set(blockId, headingBlock);
+        // Add type quads to index
+        types.forEach((quad, i) => {
+            const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
+            quadIndex.set(key, createSlotInfo(blockId, i, {
+                kind: 'type',
+                subject: quad.subject,
+                predicate: quad.predicate,
+                object: quad.object
+            }));
+        });
+        text += headingText;
+        currentPos += headingText.length;
+        // Add literals (deterministic order)
+        const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
+        for (const quad of sortedLiterals) {
+            const predShort = shortenIRI(quad.predicate.value, context);
+            let annotation = predShort;
+            if (quad.object.language) {
+                annotation += ` @${quad.object.language}`;
+            } else if (quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
+                annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
+            }
+            const literalText = `> ${quad.object.value} {${annotation}}\n`;
+            const literalBlockId = generateBlockId();
+            const literalBlock = {
+                id: literalBlockId,
+                range: { start: currentPos, end: currentPos + literalText.length },
+                subject: subjectIRI,
+                entries: [{
+                    kind: 'property',
+                    raw: annotation,
+                    expandedPredicate: quad.predicate.value,
+                    form: '',
+                    entryIndex: 0
+                }],
+                carrierType: 'span',
+                valueRange: { start: currentPos + 1, end: currentPos + 1 + quad.object.value.length },
+                attrsRange: { start: currentPos + literalText.indexOf('{'), end: currentPos + literalText.indexOf('}') + 1 }
+            };
+            blocks.set(literalBlockId, literalBlock);
+            // Add to quad index
+            const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
+            quadIndex.set(key, createSlotInfo(literalBlockId, 0, {
+                kind: 'pred',
+                subject: quad.subject,
+                predicate: quad.predicate,
+                object: quad.object,
+                form: ''
+            }));
+            text += literalText;
+            currentPos += literalText.length;
+        }
+        // Add objects (deterministic order)
+        const sortedObjects = objects.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
+        for (const quad of sortedObjects) {
+            const predShort = shortenIRI(quad.predicate.value, context);
+            const objShort = shortenIRI(quad.object.value, context);
+            const localName = extractLocalName(quad.object.value);
+            const objectText = `> ${localName} {+${objShort} ?${predShort}}\n`;
+            const objectBlockId = generateBlockId();
+            const objectBlock = {
+                id: objectBlockId,
+                range: { start: currentPos, end: currentPos + objectText.length },
+                subject: subjectIRI,
+                entries: [{
+                    kind: 'object',
+                    raw: objShort,
+                    expandedObject: quad.object.value,
+                    entryIndex: 0
+                }],
+                carrierType: 'span'
+            };
+            blocks.set(objectBlockId, objectBlock);
+            // Add to quad index
+            const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
+            quadIndex.set(key, createSlotInfo(objectBlockId, 0, {
+                kind: 'pred',
+                subject: quad.subject,
+                predicate: quad.predicate,
+                object: quad.object,
+                form: '?'
+            }));
+            text += objectText;
+            currentPos += objectText.length;
+        }
+        if (sortedLiterals.length > 0 || sortedObjects.length > 0) {
+            text += '\n';
+            currentPos += 1;
+        }
+    }
+    return { text: text.trim(), blocks, quadIndex };
+}
+function generateBlockId() {
+    return Math.random().toString(36).substring(2, 10);
+}

package/src/index.js CHANGED Viewed

@@ -1,5 +1,7 @@
 export { parse } from './parse.js';
-export { serialize } from './serialize.js';
+export { applyDiff } from './applyDiff.js';
+export { generate } from './generate.js';
+export { locate } from './locate.js';
 export {
     DEFAULT_CONTEXT,
     DataFactory,

package/src/locate.js ADDED Viewed

@@ -0,0 +1,92 @@
+import { parse } from './parse.js';
+import { normalizeQuad, quadIndexKey } from './utils.js';
+/**
+ * Locate the precise text range of a quad in MDLD text using origin tracking
+ *
+ * @param {Object} quad - The quad to locate (subject, predicate, object)
+ * @param {Object} origin - Origin object containing blocks and quadIndex (optional)
+ * @param {string} text - Original MDLD text (optional, parsed if origin not provided)
+ * @param {Object} context - Context for parsing (optional, used if text needs parsing)
+ * @returns {Object|null} Range information or null if not found
+ */
+export function locate(quad, origin, text = '', context = {}) {
+    // If origin not provided, parse text to get origin
+    if (!origin && text) {
+        const parseResult = parse(text, { context });
+        origin = parseResult.origin;
+    }
+    if (!quad || !origin || !origin.quadIndex || !origin.blocks) {
+        return null;
+    }
+    // Normalize the quad for consistent key generation
+    const normalizedQuad = normalizeQuad(quad);
+    if (!normalizedQuad) {
+        return null;
+    }
+    // Generate the quad key to lookup in quadIndex
+    const quadKey = quadIndexKey(normalizedQuad.subject, normalizedQuad.predicate, normalizedQuad.object);
+    // Find the slot information in quadIndex
+    const slotInfo = origin.quadIndex.get(quadKey);
+    if (!slotInfo) {
+        return null;
+    }
+    // Get the block information
+    const block = origin.blocks.get(slotInfo.blockId);
+    if (!block) {
+        return null;
+    }
+    // Extract the actual text content based on carrier type and entry
+    let contentRange = null;
+    let content = '';
+    if (block.carrierType === 'heading') {
+        // For headings, use the block's main range
+        contentRange = block.range;
+        content = text.substring(block.range.start, block.range.end);
+    } else if (block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
+        // For blockquotes, lists, and spans, extract from block range
+        contentRange = block.range;
+        content = text.substring(block.range.start, block.range.end);
+        // For blockquotes, try to extract the specific carrier content from entries
+        if (slotInfo.entryIndex != null && block.entries && block.entries[slotInfo.entryIndex]) {
+            const entry = block.entries[slotInfo.entryIndex];
+            if (entry.raw) {
+                // For blockquotes, the entry.raw contains the full carrier text
+                // Extract just the content part before the annotation
+                const annotationStart = entry.raw.indexOf('{');
+                if (annotationStart !== -1) {
+                    const carrierContent = entry.raw.substring(0, annotationStart).trim();
+                    // Find this content in the block text
+                    const contentStart = text.indexOf(carrierContent, block.range.start);
+                    if (contentStart !== -1) {
+                        const contentEnd = contentStart + carrierContent.length;
+                        contentRange = { start: contentStart, end: contentEnd };
+                        content = text.substring(contentStart, contentEnd);
+                    }
+                }
+            }
+        }
+    }
+    return {
+        blockId: slotInfo.blockId,
+        entryIndex: slotInfo.entryIndex,
+        kind: slotInfo.kind,
+        subject: normalizedQuad.subject,
+        predicate: normalizedQuad.predicate,
+        object: normalizedQuad.object,
+        range: contentRange,
+        content: content,
+        blockRange: block.range,
+        carrierType: block.carrierType,
+        isVacant: slotInfo.isVacant || false
+    };
+}

package/src/parse.js CHANGED Viewed

@@ -562,7 +562,12 @@ const manageListStack = (token, state) => {
 const combineSemanticInfo = (token, carriers, listFrame, state, itemSubject) => {
     const combinedSem = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
-    const addSem = (sem) => { combinedSem.types.push(...sem.types); combinedSem.predicates.push(...sem.predicates); combinedSem.entries.push(...sem.entries); };
+    const addSem = (sem) => {
+        const entryIndex = combinedSem.entries.length;
+        combinedSem.types.push(...sem.types);
+        combinedSem.predicates.push(...sem.predicates);
+        combinedSem.entries.push(...sem.entries.map(entry => ({ ...entry, entryIndex })));
+    };
     if (listFrame?.contextSem) {
         const inheritedSem = processContextSem({ sem: listFrame.contextSem, itemSubject, contextSubject: listFrame.contextSubject, inheritLiterals: true, state });