npm - mdld-parse - Versions diffs - 0.5.3 → 0.5.5 - Mend

mdld-parse 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -141,11 +141,11 @@ Create fragment IRIs relative to current subject:
 ```markdown
 # Document {=ex:document}
 {=#summary}
-[Content] {name}
+[Content] {label}
 ```
 ```turtle
-ex:document#summary schema:name "Content" .
+ex:document#summary rdfs:label "Content" .
 ```
 Fragments replace any existing fragment and require a current subject.
@@ -157,11 +157,11 @@ Subject remains in scope until reset with `{=}` or new subject declared.
 Emit `rdf:type` triple:
 ```markdown
-## Apollo 11 {=ex:apollo11 .SpaceMission .Event}
+## Apollo 11 {=ex:apollo11 .ex:SpaceMission .ex:Event}
 ```
 ```turtle
-ex:apollo11 a schema:SpaceMission, schema:Event .
+ex:apollo11 a ex:SpaceMission, ex:Event .
 ```
 ### Literal Properties
@@ -171,15 +171,15 @@ Inline value carriers emit literal properties:
 ```markdown
 # Mission {=ex:apollo11}
-[Neil Armstrong] {commander}
-[1969] {year ^^xsd:gYear}
-[Historic mission] {description @en}
+[Neil Armstrong] {ex:commander}
+[1969] {ex:year ^^xsd:gYear}
+[Historic mission] {ex:description @en}
 ```
 ```turtle
-ex:apollo11 schema:commander "Neil Armstrong" ;
-  schema:year "1969"^^xsd:gYear ;
-  schema:description "Historic mission"@en .
+ex:apollo11 ex:commander "Neil Armstrong" ;
+  ex:year "1969"^^xsd:gYear ;
+  ex:description "Historic mission"@en .
 ```
 ### Object Properties
@@ -189,11 +189,11 @@ Links create relationships (use `?` prefix):
 ```markdown
 # Mission {=ex:apollo11}
-[NASA] {=ex:nasa ?organizer}
+[NASA] {=ex:nasa ?ex:organizer}
 ```
 ```turtle
-ex:apollo11 schema:organizer ex:nasa .
+ex:apollo11 ex:organizer ex:nasa .
 ```
 ### Resource Declaration
@@ -203,12 +203,12 @@ Declare resources inline with `{=iri}`:
 ```markdown
 # Mission {=ex:apollo11}
-[Neil Armstrong] {=ex:armstrong ?commander .Person}
+[Neil Armstrong] {=ex:armstrong ?ex:commander .prov:Person}
 ```
 ```turtle
-ex:apollo11 schema:commander ex:armstrong .
-ex:armstrong a schema:Person .
+ex:apollo11 ex:commander ex:armstrong .
+ex:armstrong a prov:Person .
 ```
 ### Lists
@@ -218,15 +218,15 @@ Lists require explicit subjects per item.
 ```markdown
 # Recipe {=ex:recipe}
-Ingredients: {?ingredient .Ingredient}
-- Flour {=ex:flour name}
-- Water {=ex:water name}
+Ingredients: {?ex:ingredient .ex:Ingredient}
+- Flour {=ex:flour label}
+- Water {=ex:water label}
 ```
 ```turtle
-ex:recipe schema:ingredient ex:flour, ex:water .
-ex:flour a schema:Ingredient ; schema:name "Flour" .
-ex:water a schema:Ingredient ; schema:name "Water" .
+ex:recipe ex:ingredient ex:flour, ex:water .
+ex:flour a ex:Ingredient ; rdfs:label "Flour" .
+ex:water a ex:Ingredient ; rdfs:label "Water" .
 ```
 ### Code Blocks
@@ -236,14 +236,14 @@ Code blocks are value carriers:
 ````markdown
 # Example {=ex:example}
-```javascript {=ex:code .SoftwareSourceCode text}
+```javascript {=ex:code .ex:SoftwareSourceCode ex:text}
 console.log("hello");
 ```
 ````
 ```turtle
-ex:code a schema:SoftwareSourceCode ;
-  schema:text "console.log(\"hello\")" .
+ex:code a ex:SoftwareSourceCode ;
+  ex:text "console.log(\"hello\")" .
 ```
 ### Blockquotes
@@ -251,11 +251,11 @@ ex:code a schema:SoftwareSourceCode ;
 ```markdown
 # Article {=ex:article}
-> MD-LD bridges Markdown and RDF. {abstract}
+> MD-LD bridges Markdown and RDF. {comment}
 ```
 ```turtle
-ex:article schema:abstract "MD-LD bridges Markdown and RDF." .
+ex:article rdfs:comment "MD-LD bridges Markdown and RDF." .
 ```
 ### Reverse Relations
@@ -265,13 +265,13 @@ Reverse the relationship direction:
 ```markdown
 # Part {=ex:part}
-Part of: {!hasPart}
+Part of: {!ex:hasPart}
 - Book {=ex:book}
 ```
 ```turtle
-ex:book schema:hasPart ex:part .
+ex:book ex:hasPart ex:part .
 ```
 ### Prefix Declarations
@@ -279,7 +279,6 @@ ex:book schema:hasPart ex:part .
 ```markdown
 [ex] <http://example.org/>
 [foaf] <http://xmlns.com/foaf/0.1/>
-[@vocab] <http://schema.org/>
 # Person {=ex:alice .foaf:Person}
 ```
@@ -326,7 +325,7 @@ Parse MD-LD markdown and return RDF quads with origin tracking.
 - `markdown` (string) — MD-LD formatted text
 - `options` (object, optional):
-  - `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd, schema }`)
+  - `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd, sh, prov }`)
   - `dataFactory` (object) — Custom RDF/JS DataFactory
 **Returns:** `{ quads, origin, context }`
@@ -382,14 +381,14 @@ Apply RDF changes back to markdown with proper positioning.
 ```javascript
 const original = `# Article {=ex:article}
-[Alice] {author}`;
+[Alice] {ex:author}`;
 const result = parse(original, { context: { ex: 'http://example.org/' } });
 // Add a new property
 const newQuad = {
   subject: { termType: 'NamedNode', value: 'http://example.org/article' },
-  predicate: { termType: 'NamedNode', value: 'http://schema.org/datePublished' },
+  predicate: { termType: 'NamedNode', value: 'http://example.org/datePublished' },
   object: { termType: 'Literal', value: '2024-01-01' }
 };
@@ -437,20 +436,19 @@ const quads = [
   },
   {
     subject: { termType: 'NamedNode', value: 'http://example.org/article' },
-    predicate: { termType: 'NamedNode', value: 'http://schema.org/author' },
+    predicate: { termType: 'NamedNode', value: 'http://example.org/author' },
     object: { termType: 'NamedNode', value: 'http://example.org/alice' }
   }
 ];
 const result = generate(quads, {
   ex: 'http://example.org/',
-  schema: 'http://schema.org/'
 });
 console.log(result.text);
 // # Article {=ex:article .ex:Article}
 //
-// > alice {+ex:alice ?schema:author}
+// > alice {+ex:alice ?ex:author}
 ```
 ### `locate(quad, origin, text, context)`

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "mdld-parse",
-	"version": "0.5.3",
+	"version": "0.5.5",
 	"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
 	"type": "module",
 	"main": "index.js",

package/src/generate.js CHANGED Viewed

@@ -164,7 +164,7 @@ function buildDeterministicMDLD(subjectGroups, context) {
                 annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
             }
-            const literalText = `> ${quad.object.value} {${annotation}}\n`;
+            const literalText = `[${quad.object.value}] {${annotation}}\n`;
             const literalBlockId = generateBlockId();
             const literalBlock = {
                 id: literalBlockId,
@@ -204,7 +204,7 @@ function buildDeterministicMDLD(subjectGroups, context) {
             const objShort = shortenIRI(quad.object.value, context);
             const localName = extractLocalName(quad.object.value);
-            const objectText = `> ${localName} {+${objShort} ?${predShort}}\n`;
+            const objectText = `[${localName}] {+${objShort} ?${predShort}}\n`;
             const objectBlockId = generateBlockId();
             const objectBlock = {
                 id: objectBlockId,

package/src/parse.js CHANGED Viewed

@@ -10,7 +10,7 @@ import {
 } from './utils.js';
 const URL_REGEX = /^[a-zA-Z][a-zA-Z0-9+.-]*:/;
-const FENCE_REGEX = /^(`{3,})(.*)/;
+const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
 const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
 const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
 const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
@@ -22,6 +22,29 @@ const INLINE_CARRIER_PATTERNS = {
     CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
 };
+// Cache for fence regex patterns to avoid recreation
+const FENCE_CLOSE_PATTERNS = new Map();
+function getFenceClosePattern(fenceChar) {
+    if (!FENCE_CLOSE_PATTERNS.has(fenceChar)) {
+        FENCE_CLOSE_PATTERNS.set(fenceChar, new RegExp(`^(${fenceChar}{3,})`));
+    }
+    return FENCE_CLOSE_PATTERNS.get(fenceChar);
+}
+function parseLangAndAttrs(langAndAttrs) {
+    const spaceIndex = langAndAttrs.indexOf(' ');
+    const braceIndex = langAndAttrs.indexOf('{');
+    const langEnd = Math.min(
+        spaceIndex > -1 ? spaceIndex : Infinity,
+        braceIndex > -1 ? braceIndex : Infinity
+    );
+    return {
+        lang: langAndAttrs.substring(0, langEnd),
+        attrsText: langAndAttrs.substring(langEnd).match(/\{[^{}]*\}/)?.[0] || null
+    };
+}
 const semCache = {};
 const EMPTY_SEM = Object.freeze({ predicates: [], types: [], subject: null });
@@ -79,108 +102,104 @@ function scanTokens(text) {
     let pos = 0;
     let codeBlock = null;
-    const processors = [
-        {
-            test: line => line.startsWith('```'),
-            process: (line, lineStart, pos) => {
-                if (!codeBlock) {
-                    const fenceMatch = line.match(FENCE_REGEX);
-                    const attrsText = fenceMatch[2].match(/\{[^{}]*\}/)?.[0] || null;
-                    const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
-                    const contentStart = lineStart + line.length + 1;
-                    const langAndAttrs = fenceMatch[2];
-                    const langEnd = langAndAttrs.indexOf(' ') > -1 ? langAndAttrs.indexOf(' ') :
-                        langAndAttrs.indexOf('{') > -1 ? langAndAttrs.indexOf('{') : langAndAttrs.length;
-                    codeBlock = {
-                        fence: fenceMatch[1],
-                        start: lineStart,
-                        content: [],
-                        lang: langAndAttrs.substring(0, langEnd),
-                        attrs: attrsText,
-                        attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
-                        valueRangeStart: contentStart
-                    };
-                } else if (line.startsWith(codeBlock.fence)) {
-                    const valueStart = codeBlock.valueRangeStart;
-                    const valueEnd = Math.max(valueStart, lineStart - 1);
-                    tokens.push({
-                        type: 'code',
-                        range: [codeBlock.start, lineStart],
-                        text: codeBlock.content.join('\n'),
-                        lang: codeBlock.lang,
-                        attrs: codeBlock.attrs,
-                        attrsRange: codeBlock.attrsRange,
-                        valueRange: [valueStart, valueEnd]
-                    });
-                    codeBlock = null;
-                }
-                return true;
-            }
-        },
-        {
-            test: () => codeBlock,
-            process: line => {
-                codeBlock.content.push(line);
-                return true;
-            }
-        },
-        {
-            test: line => PREFIX_REGEX.test(line),
-            process: (line, lineStart, pos) => {
-                const match = PREFIX_REGEX.exec(line);
-                tokens.push({ type: 'prefix', prefix: match[1], iri: match[2].trim() });
-                return true;
-            }
-        },
-        {
-            test: line => HEADING_REGEX.test(line),
-            process: (line, lineStart, pos) => {
-                const match = HEADING_REGEX.exec(line);
-                const attrs = match[3] || null;
-                const afterHashes = match[1].length;
-                const rangeInfo = calcRangeInfo(line, attrs, lineStart, afterHashes, match[2].length);
-                tokens.push(createToken('heading', [lineStart, pos - 1], match[2].trim(), attrs,
-                    rangeInfo.attrsRange, rangeInfo.valueRange, { depth: match[1].length }));
-                return true;
-            }
-        },
-        {
-            test: line => UNORDERED_LIST_REGEX.test(line),
-            process: (line, lineStart, pos) => {
-                const match = UNORDERED_LIST_REGEX.exec(line);
-                tokens.push(createListToken('list', line, lineStart, pos, match, match[1].length));
-                return true;
-            }
-        },
-        {
-            test: line => BLOCKQUOTE_REGEX.test(line),
-            process: (line, lineStart, pos) => {
-                const match = BLOCKQUOTE_REGEX.exec(line);
-                const attrs = match[2] || null;
-                const valueStartInLine = line.startsWith('> ') ? 2 : line.indexOf('>') + 1;
-                const valueEndInLine = valueStartInLine + match[1].length;
-                tokens.push(createToken('blockquote', [lineStart, pos - 1], match[1].trim(), attrs,
-                    calcAttrsRange(line, attrs, lineStart),
-                    [lineStart + valueStartInLine, lineStart + valueEndInLine]));
-                return true;
-            }
-        },
-        {
-            test: line => line.trim(),
-            process: (line, lineStart, pos) => {
-                tokens.push(createToken('para', [lineStart, pos - 1], line.trim()));
-                return true;
+    // Direct lookup instead of linear search
+    const PROCESSORS = [
+        { type: 'fence', test: line => FENCE_REGEX.test(line.trim()), process: handleFence },
+        { type: 'content', test: () => codeBlock, process: line => codeBlock.content.push(line) },
+        { type: 'prefix', test: line => PREFIX_REGEX.test(line), process: handlePrefix },
+        { type: 'heading', test: line => HEADING_REGEX.test(line), process: handleHeading },
+        { type: 'list', test: line => UNORDERED_LIST_REGEX.test(line), process: handleList },
+        { type: 'blockquote', test: line => BLOCKQUOTE_REGEX.test(line), process: handleBlockquote },
+        { type: 'para', test: line => line.trim(), process: handlePara }
+    ];
+    function handleFence(line, lineStart, pos) {
+        const trimmedLine = line.trim();
+        if (!codeBlock) {
+            const fenceMatch = trimmedLine.match(FENCE_REGEX);
+            if (!fenceMatch) return false;
+            const { lang, attrsText } = parseLangAndAttrs(fenceMatch[2]);
+            const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
+            const contentStart = lineStart + line.length + 1;
+            codeBlock = {
+                fence: fenceMatch[1],
+                start: lineStart,
+                content: [],
+                lang,
+                attrs: attrsText,
+                attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
+                valueRangeStart: contentStart
+            };
+        } else {
+            const fenceChar = codeBlock.fence[0];
+            const expectedFence = fenceChar.repeat(codeBlock.fence.length);
+            const fenceMatch = trimmedLine.match(getFenceClosePattern(fenceChar));
+            if (fenceMatch && fenceMatch[1] === expectedFence) {
+                const valueStart = codeBlock.valueRangeStart;
+                const valueEnd = Math.max(valueStart, lineStart - 1);
+                tokens.push({
+                    type: 'code',
+                    range: [codeBlock.start, lineStart],
+                    text: codeBlock.content.join('\n'),
+                    lang: codeBlock.lang,
+                    attrs: codeBlock.attrs,
+                    attrsRange: codeBlock.attrsRange,
+                    valueRange: [valueStart, valueEnd]
+                });
+                codeBlock = null;
             }
         }
-    ];
+        return true;
+    }
+    function handlePrefix(line, lineStart, pos) {
+        const match = PREFIX_REGEX.exec(line);
+        tokens.push({ type: 'prefix', prefix: match[1], iri: match[2].trim() });
+        return true;
+    }
+    function handleHeading(line, lineStart, pos) {
+        const match = HEADING_REGEX.exec(line);
+        const attrs = match[3] || null;
+        const afterHashes = match[1].length;
+        const rangeInfo = calcRangeInfo(line, attrs, lineStart, afterHashes, match[2].length);
+        tokens.push(createToken('heading', [lineStart, pos - 1], match[2].trim(), attrs,
+            rangeInfo.attrsRange, rangeInfo.valueRange, { depth: match[1].length }));
+        return true;
+    }
+    function handleList(line, lineStart, pos) {
+        const match = UNORDERED_LIST_REGEX.exec(line);
+        tokens.push(createListToken('list', line, lineStart, pos, match, match[1].length));
+        return true;
+    }
+    function handleBlockquote(line, lineStart, pos) {
+        const match = BLOCKQUOTE_REGEX.exec(line);
+        const attrs = match[2] || null;
+        const valueStartInLine = line.startsWith('> ') ? 2 : line.indexOf('>') + 1;
+        const valueEndInLine = valueStartInLine + match[1].length;
+        tokens.push(createToken('blockquote', [lineStart, pos - 1], match[1].trim(), attrs,
+            calcAttrsRange(line, attrs, lineStart),
+            [lineStart + valueStartInLine, lineStart + valueEndInLine]));
+        return true;
+    }
+    function handlePara(line, lineStart, pos) {
+        tokens.push(createToken('para', [lineStart, pos - 1], line.trim()));
+        return true;
+    }
     for (let i = 0; i < lines.length; i++) {
         const line = lines[i];
         const lineStart = pos;
         pos += line.length + 1;
-        // Try each processor until one handles the line
-        for (const processor of processors) {
+        // Direct processor lookup - O(n) instead of O(n*m)
+        for (const processor of PROCESSORS) {
             if (processor.test(line) && processor.process(line, lineStart, pos)) {
                 break;
             }

package/src/utils.js CHANGED Viewed

@@ -25,16 +25,31 @@ export function hash(str) {
     return Math.abs(h).toString(16).slice(0, 12);
 }
+const iriCache = new Map();
 export function expandIRI(term, ctx) {
     if (term == null) return null;
+    const cacheKey = `${term}|${ctx['@vocab'] || ''}|${Object.keys(ctx).filter(k => k !== '@vocab').sort().map(k => `${k}:${ctx[k]}`).join(',')}`;
+    if (iriCache.has(cacheKey)) {
+        return iriCache.get(cacheKey);
+    }
     const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
     const t = raw.trim();
-    if (t.match(/^https?:/)) return t;
-    if (t.includes(':')) {
+    let result;
+    if (t.match(/^https?:/)) {
+        result = t;
+    } else if (t.includes(':')) {
         const [prefix, ref] = t.split(':', 2);
-        return ctx[prefix] ? ctx[prefix] + ref : t;
+        result = ctx[prefix] ? ctx[prefix] + ref : t;
+    } else {
+        result = (ctx['@vocab'] || '') + t;
     }
-    return (ctx['@vocab'] || '') + t;
+    iriCache.set(cacheKey, result);
+    return result;
 }
 export function shortenIRI(iri, ctx) {