npm - mdld-parse - Versions diffs - 0.7.0 → 0.7.2 - Mend

mdld-parse 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -56,6 +56,7 @@ console.log(result.quads);
 - **🧩 Fragments** - Document structuring with `{=#fragment}`
 - **⚡ Polarity system** - Sophisticated diff authoring with `+` and `-` prefixes
 - **📍 Origin tracking** - Complete provenance with lean quad-to-source mapping
+- **🎯 Elevated statements** - Automatic rdf:Statement pattern detection for "golden" graph extraction
 ## 🌟 What is MD-LD?
@@ -122,7 +123,24 @@ Each predicate form determines the graph edge:
 | `?p`  | S → O   | `[NASA] {=ex:nasa ?org}`     | object property  |
 | `!p` | O → S    | `[Parent] {=ex:p !hasPart}`  | reverse object   |
-## 🎨 Syntax Quick Reference
+## � Elevated Statements
+MD-LD automatically detects `rdf:Statement` patterns during parsing and extracts elevated SPO quads for convenient consumption by applications.
+### Pattern Detection
+When the parser encounters a complete `rdf:Statement` pattern with `rdf:subject`, `rdf:predicate`, and `rdf:object`, it automatically adds the corresponding SPO quad to the `statements` array:
+```markdown
+[ex] <http://example.org/>
+## Elevated statement {=ex:stmt1 .rdf:Statement}
+**Alice** {+ex:alice ?rdf:subject} *knows* {+ex:knows ?rdf:predicate} **Bob** {+ex:bob ?rdf:object}
+Direct statement:**Alice** {=ex:alice} knows **Bob** {?ex:knows +ex:bob}
+``
+## �🎨 Syntax Quick Reference
 ### Subject Declaration
 Set current subject (emits no quads):
@@ -180,10 +198,11 @@ Parse MD-LD markdown and return RDF quads with lean origin tracking.
   - `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd, sh, prov }`)
   - `dataFactory` (object) — Custom RDF/JS DataFactory
-**Returns:** `{ quads, remove, origin, context }`
+**Returns:** `{ quads, remove, statements, origin, context }`
 - `quads` — Array of RDF/JS Quads (final resolved graph state)
 - `remove` — Array of RDF/JS Quads (external retractions targeting prior state)
+- `statements` — Array of elevated RDF/JS Quads extracted from rdf:Statement patterns
 - `origin` — Lean origin tracking object with quadIndex for UI navigation
 - `context` — Final context used (includes prefixes)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "mdld-parse",
-	"version": "0.7.0",
+	"version": "0.7.2",
 	"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
 	"type": "module",
 	"main": "index.js",
@@ -38,8 +38,5 @@
 	"homepage": "https://mdld.js.org",
 	"bugs": {
 		"url": "https://github.com/davay42/mdld-parse/issues"
-	},
-	"devDependencies": {
-		"n3": "^2.0.1"
 	}
 }

package/src/merge.js CHANGED Viewed

@@ -34,13 +34,14 @@ function normalizeInput(input, options, docContext) {
  * Merges multiple MDLD documents with diff polarity resolution
  * @param {Array<string|ParseResult>} docs
  * @param {Object} options
- * @returns {Object}
+ * @returns {Object} Merge result with quads, remove, statements, origin, and context
  */
 export function merge(docs, options = {}) {
     const sessionBuffer = new Map(); // Use Map instead of Set for proper quad storage
     const sessionRemoveSet = new Set();
     const allDocuments = [];
     const quadIndex = new Map();
+    const allStatements = []; // Collect statements from all documents
     // Process each document in order
     for (let i = 0; i < docs.length; i++) {
@@ -57,10 +58,16 @@ export function merge(docs, options = {}) {
             index: i,
             input: typeof input === 'string' ? 'string' : 'ParseResult',
             origin: doc.origin,
-            context: doc.context
+            context: doc.context,
+            statementsCount: doc.statements?.length || 0 // Track statements count
         };
         allDocuments.push(documentOrigin);
+        // Collect statements from this document
+        if (doc.statements && doc.statements.length > 0) {
+            allStatements.push(...doc.statements);
+        }
         // Fold assertions into session buffer
         for (const quad of doc.quads) {
             const key = quadKey(quad);
@@ -125,6 +132,7 @@ export function merge(docs, options = {}) {
     return {
         quads: filteredQuads,
         remove: filteredRemove,
+        statements: allStatements, // Include all collected statements
         origin: mergeOrigin,
         context: finalContext
     };

package/src/parse.js CHANGED Viewed

@@ -210,6 +210,12 @@ function createCarrier(type, text, attrs, attrsRange, valueRange, range, pos, ex
     return { type, text, attrs, attrsRange, valueRange, range, pos, ...extra };
 }
+// Pre-compiled carrier patterns for better performance
+const CARRIER_PATTERN_ARRAY = [
+    ['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
+    ['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
+];
 function extractInlineCarriers(text, baseOffset = 0) {
     const carriers = [];
     let pos = 0;
@@ -243,7 +249,8 @@ function extractInlineCarriers(text, baseOffset = 0) {
         const extractor = CARRIER_EXTRACTORS[text[pos]];
         if (extractor) return extractor(text, pos, baseOffset);
-        for (const [type, pattern] of Object.entries(INLINE_CARRIER_PATTERNS)) {
+        // Use pre-compiled patterns instead of Object.entries()
+        for (const [type, pattern] of CARRIER_PATTERN_ARRAY) {
             pattern.lastIndex = pos;
             const match = pattern.exec(text);
             if (match) {
@@ -364,7 +371,7 @@ function createBlock(subject, types, predicates, range, attrsRange, valueRange,
     };
 }
-function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null) {
+function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null, statements = null, statementCandidates = null) {
     if (!subject || !predicate || !object) return;
     const quad = dataFactory.quad(subject, predicate, object);
@@ -397,14 +404,17 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
         quadBuffer.set(quadKey, quad);
         quads.push(quad);
-        // Create lean origin entry
+        // Detect rdf:Statement pattern during single-pass parsing
+        detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
+        // Create lean origin entry - avoid spread operator for better performance
         const originEntry = {
             blockId: block.id,
             range: block.range,
             carrierType: block.carrierType,
             subject: subject.value,
             predicate: predicate.value,
-            context: { ...block.context },
+            context: block.context, // Direct reference instead of spread
             polarity: meta?.remove ? '-' : '+',
             value: block.text || ''
         };
@@ -413,10 +423,68 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
     }
 }
+// Extract RDF constants once at module level for efficiency
+const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
+const RDF_STATEMENT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement';
+const RDF_SUBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject';
+const RDF_PREDICATE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate';
+const RDF_OBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#object';
+function detectStatementPatternSinglePass(quad, dataFactory, meta, statements = null, statementCandidates = null) {
+    // Skip if not called from parse context (for testing compatibility)
+    if (!statements || !statementCandidates) return;
+    const predicate = quad.predicate.value;
+    // Early filter: only process rdf:Statement related predicates
+    if (predicate !== RDF_TYPE &&
+        predicate !== RDF_SUBJECT &&
+        predicate !== RDF_PREDICATE &&
+        predicate !== RDF_OBJECT) {
+        return;
+    }
+    // Check if this quad starts a new rdf:Statement pattern
+    if (predicate === RDF_TYPE && quad.object.value === RDF_STATEMENT) {
+        statementCandidates.set(quad.subject.value, { spo: {} });
+        return;
+    }
+    // Check if this quad completes part of an existing rdf:Statement pattern
+    const candidate = statementCandidates.get(quad.subject.value);
+    if (!candidate) return;
+    // Direct property assignment instead of switch for better performance
+    if (predicate === RDF_SUBJECT) {
+        candidate.spo.subject = quad.object;
+    } else if (predicate === RDF_PREDICATE) {
+        candidate.spo.predicate = quad.object;
+    } else if (predicate === RDF_OBJECT) {
+        candidate.spo.object = quad.object;
+        // Store the original quad for potential literal extraction
+        candidate.objectQuad = quad;
+    }
+    // Check if pattern is complete and create elevated SPO quad
+    if (candidate.spo.subject && candidate.spo.predicate && candidate.spo.object) {
+        // Use the object directly - literal detection happens at parse time
+        const spoQuad = dataFactory.quad(
+            candidate.spo.subject,
+            candidate.spo.predicate,
+            candidate.spo.object
+        );
+        statements.push(spoQuad);
+        // Clean up candidate to avoid duplicate detection
+        statementCandidates.delete(quad.subject.value);
+    }
+}
 const resolveFragment = (fragment, state) => {
     if (!state.currentSubject) return null;
-    const baseIRI = state.currentSubject.value.split('#')[0];
-    return state.df.namedNode(`${baseIRI}#${fragment}`);
+    const subjectValue = state.currentSubject.value;
+    const hashIndex = subjectValue.indexOf('#');
+    const baseIRI = hashIndex > -1 ? subjectValue.slice(0, hashIndex) : subjectValue;
+    return state.df.namedNode(baseIRI + '#' + fragment);
 };
 function resolveSubject(sem, state) {
@@ -444,7 +512,8 @@ const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
         state.df.namedNode(expandIRI('rdf:type', state.ctx)),
         state.df.namedNode(expandedType),
         state.df,
-        { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove }
+        { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove },
+        state.statements, state.statementCandidates
     );
 };
@@ -487,7 +556,8 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
             const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
             emitQuad(state.quads, state.quadBuffer, state.removeSet, state.origin.quadIndex, block,
                 role.subject, P, role.object, state.df,
-                { kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false }
+                { kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false },
+                state.statements, state.statementCandidates
             );
         }
     });
@@ -592,23 +662,28 @@ export function parse(text, options = {}) {
         origin: { quadIndex: new Map() },
         currentSubject: null,
         tokens: null,
-        currentTokenIndex: -1
+        currentTokenIndex: -1,
+        statements: [],
+        statementCandidates: new Map() // Track incomplete rdf:Statement patterns
     };
     state.tokens = scanTokens(text);
-    state.tokens.filter(t => t.type === 'prefix').forEach(t => {
-        let resolvedIri = t.iri;
-        if (t.iri.includes(':')) {
-            const colonIndex = t.iri.indexOf(':');
-            const potentialPrefix = t.iri.substring(0, colonIndex);
-            const reference = t.iri.substring(colonIndex + 1);
-            if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
-                resolvedIri = state.ctx[potentialPrefix] + reference;
+    // Single loop instead of filter+forEach for better performance
+    for (const token of state.tokens) {
+        if (token.type === 'prefix') {
+            let resolvedIri = token.iri;
+            if (token.iri.includes(':')) {
+                const colonIndex = token.iri.indexOf(':');
+                const potentialPrefix = token.iri.substring(0, colonIndex);
+                const reference = token.iri.substring(colonIndex + 1);
+                if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
+                    resolvedIri = state.ctx[potentialPrefix] + reference;
+                }
             }
+            state.ctx[token.prefix] = resolvedIri;
         }
-        state.ctx[t.prefix] = resolvedIri;
-    });
+    }
     for (let i = 0; i < state.tokens.length; i++) {
         const token = state.tokens[i];
@@ -616,18 +691,20 @@ export function parse(text, options = {}) {
         TOKEN_PROCESSORS[token.type]?.(token, state);
     }
-    // Convert removeSet to array and ensure hard invariant: quads ∩ remove = ∅
-    const removeArray = Array.from(state.removeSet);
+    // Optimize array operations - avoid Array.from() and filter()
     const quadKeys = new Set();
-    state.quads.forEach(q => {
-        quadKeys.add(quadIndexKey(q.subject, q.predicate, q.object));
-    });
+    for (const quad of state.quads) {
+        quadKeys.add(quadIndexKey(quad.subject, quad.predicate, quad.object));
+    }
-    // Filter removeArray to ensure no overlap with quads
-    const filteredRemove = removeArray.filter(quad => {
+    // Direct iteration instead of Array.from() + filter()
+    const filteredRemove = [];
+    for (const quad of state.removeSet) {
         const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
-        return !quadKeys.has(key);
-    });
+        if (!quadKeys.has(key)) {
+            filteredRemove.push(quad);
+        }
+    }
-    return { quads: state.quads, remove: filteredRemove, origin: state.origin, context: state.ctx };
+    return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
 }