npm - mdld-parse - Versions diffs - 0.7.3 → 0.7.5 - Mend

mdld-parse 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "mdld-parse",
-	"version": "0.7.3",
+	"version": "0.7.5",
 	"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
 	"type": "module",
 	"main": "index.js",
@@ -38,8 +38,5 @@
 	"homepage": "https://mdld.js.org",
 	"bugs": {
 		"url": "https://github.com/davay42/mdld-parse/issues"
-	},
-	"dependencies": {
-		"rdfa-parse": "^1.0.1"
 	}
 }

package/src/constants.js ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Shared utilities for MD-LD Parser and Renderer
+ * Ensures DRY code and consistent CommonMark processing
+ */
+export const DEFAULT_CONTEXT = {
+    '@vocab': "http://www.w3.org/2000/01/rdf-schema#",
+    rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+    rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
+    xsd: 'http://www.w3.org/2001/XMLSchema#',
+    sh: "http://www.w3.org/ns/shacl#",
+    prov: 'http://www.w3.org/ns/prov#'
+};
+// CommonMark patterns - shared between parser and renderer
+export const URL_REGEX = /^(https?|ftp|mailto|tag|nih|urn|uuid|did|web|ipfs|ipns|data|file|urn:uuid):/;
+export const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
+export const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
+export const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
+export const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
+export const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
+export const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
+// Pre-compiled carrier patterns for performance
+export const CARRIER_PATTERN_ARRAY = [
+    ['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
+    ['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
+];

package/src/generate.js CHANGED Viewed

@@ -1,21 +1,26 @@
 import { shortenIRI, expandIRI, DataFactory } from './utils.js';
-import { DEFAULT_CONTEXT } from './shared.js';
-// Helper functions for cleaner term type checking
-function isLiteral(term) {
-    return term?.termType === 'Literal';
-}
-function isNamedNode(term) {
-    return term?.termType === 'NamedNode';
-}
-function isRdfType(term) {
-    return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
-}
+import { DEFAULT_CONTEXT } from './constants.js';
+import {
+    isLiteral,
+    collectUsedPrefixes,
+    sortQuadsByPredicate,
+    generatePrefixDeclaration,
+    generateLiteralText,
+    generateObjectText,
+    filterQuadsByType
+} from './shared.js';
+export function extractLocalName(iri, ctx = {}) {
+    if (!iri) return iri;
+    // Check for exact prefix matches first
+    for (const [prefix, namespace] of Object.entries(ctx)) {
+        if (iri.startsWith(namespace) || iri.startsWith(namespace.slice(0, -1))) {
+            return iri.substring(namespace.length);
+        }
+    }
-function extractLocalName(iri) {
+    // Fallback to original logic for local names
     const separators = ['#', '/', ':'];
     for (const sep of separators) {
         const lastSep = iri.lastIndexOf(sep);
@@ -83,14 +88,14 @@ function groupQuadsBySubject(quads) {
 function buildDeterministicMDLD(subjectGroups, context) {
     let text = '';
+    const usedPrefixes = collectUsedPrefixes(subjectGroups, context);
     // Add prefixes first (deterministic order), but exclude default context prefixes
     const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
     for (const [prefix, namespace] of sortedPrefixes) {
         // Skip default context prefixes - they're implicit in MDLD
-        if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
-            const prefixDecl = `[${prefix}] <${namespace}>\n`;
-            text += prefixDecl;
+        if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix] && usedPrefixes.has(prefix)) {
+            text += generatePrefixDeclaration(prefix, namespace);
         }
     }
@@ -105,47 +110,25 @@ function buildDeterministicMDLD(subjectGroups, context) {
         const subjectQuads = subjectGroups.get(subjectIRI);
         const shortSubject = shortenIRI(subjectIRI, context);
-        // Separate types, literals, and objects using helper functions
-        const types = subjectQuads.filter(q => isRdfType(q.predicate));
-        const literals = subjectQuads.filter(q => isLiteral(q.object) && !isRdfType(q.predicate));
-        const objects = subjectQuads.filter(q => isNamedNode(q.object) && !isRdfType(q.predicate));
+        // Separate types, literals, and objects using shared utility
+        const { types, literals, objects } = filterQuadsByType(subjectQuads);
         // Generate heading
-        const localSubjectName = extractLocalName(subjectIRI);
+        const localSubjectName = extractLocalName(subjectIRI, context);
         const typeAnnotations = types.length > 0
-            ? ' ' + types.map(t => '.' + extractLocalName(t.object.value)).sort().join(' ')
+            ? ' ' + types.map(t => '.' + shortenIRI(t.object.value, context)).sort().join(' ')
             : '';
-        const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
-        text += headingText;
-        // Add literals (deterministic order)
-        const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
-        for (const quad of sortedLiterals) {
-            const predShort = shortenIRI(quad.predicate.value, context);
-            let annotation = predShort;
-            // Use DataFactory XSD constants for datatype comparison
-            const xsdString = 'http://www.w3.org/2001/XMLSchema#string';
-            if (quad.object.language) {
-                annotation += ` @${quad.object.language}`;
-            } else if (quad.object.datatype.value !== xsdString) {
-                annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
-            }
+        text += `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n`;
-            const literalText = `[${quad.object.value}] {${annotation}}\n`;
-            text += literalText;
-        }
+        // Add literals and objects using shared utilities
+        sortQuadsByPredicate(literals).forEach(quad => {
+            text += generateLiteralText(quad, context);
+        });
-        // Add objects (deterministic order)
-        const sortedObjects = objects.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
-        for (const quad of sortedObjects) {
-            const objShort = shortenIRI(quad.object.value, context);
-            const predShort = shortenIRI(quad.predicate.value, context);
-            const objectText = `[${objShort}] {+${objShort} ?${predShort}}\n`;
-            text += objectText;
-        }
+        sortQuadsByPredicate(objects).forEach(quad => {
+            text += generateObjectText(quad, context);
+        });
         text += '\n';
     }

package/src/index.js CHANGED Viewed

@@ -3,7 +3,7 @@ export { merge } from './merge.js';
 export { generate } from './generate.js';
 export { locate } from './locate.js';
 export { render } from './render.js';
-export { DEFAULT_CONTEXT } from './shared.js';
+export { DEFAULT_CONTEXT } from './constants.js';
 export {
     DataFactory,
     hash,

package/src/locate.js CHANGED Viewed

@@ -18,21 +18,6 @@ export function locate(quad, origin) {
         return null;
     }
-    // Find the origin entry in quadIndex
-    const entry = origin.quadIndex.get(quadKey);
-    if (!entry) {
-        return null;
-    }
-    // Return the lean origin entry structure
-    return {
-        blockId: entry.blockId,
-        range: entry.range,
-        carrierType: entry.carrierType,
-        subject: entry.subject,
-        predicate: entry.predicate,
-        context: entry.context,
-        value: entry.value,
-        polarity: entry.polarity
-    };
+    // Return the origin entry directly - no need to create new object
+    return origin.quadIndex.get(quadKey) || null;
 }

package/src/merge.js CHANGED Viewed

@@ -1,15 +1,14 @@
 import { parse } from './parse.js';
-import { DEFAULT_CONTEXT } from './shared.js';
+import { quadToKeyForOrigin } from './utils.js';
+import { DEFAULT_CONTEXT } from './constants.js';
 /**
- * Creates a unique key for quad identity matching
+ * Creates a unique key for quad identity matching - using shared utility
  * @param {Quad} quad
  * @returns {string}
  */
 function quadKey(quad) {
-    const datatype = quad.object.datatype?.value || '';
-    const language = quad.object.language || '';
-    return `${quad.subject.value}|${quad.predicate.value}|${quad.object.value}|${datatype}|${language}`;
+    return quadToKeyForOrigin(quad);
 }
 /**
@@ -42,6 +41,7 @@ export function merge(docs, options = {}) {
     const allDocuments = [];
     const quadIndex = new Map();
     const allStatements = []; // Collect statements from all documents
+    const accumulatedContext = new Map(); // Track all unique prefixes across documents
     // Process each document in order
     for (let i = 0; i < docs.length; i++) {
@@ -53,6 +53,16 @@ export function merge(docs, options = {}) {
         // Normalize input to ParseResult
         const doc = normalizeInput(input, options, docContext);
+        // Accumulate context from this document
+        if (doc.context) {
+            for (const [prefix, namespace] of Object.entries(doc.context)) {
+                // Don't override default context entries unless explicitly provided in options
+                if (!accumulatedContext.has(prefix) && !DEFAULT_CONTEXT[prefix]) {
+                    accumulatedContext.set(prefix, namespace);
+                }
+            }
+        }
         // Create document origin
         const documentOrigin = {
             index: i,
@@ -74,14 +84,12 @@ export function merge(docs, options = {}) {
             sessionBuffer.set(key, quad);
             // Create quad origin with document index and polarity
-            const existingOrigin = doc.origin.quadIndex.get(quadKey(quad));
-            if (existingOrigin) {
-                quadIndex.set(quadKey(quad), {
-                    ...existingOrigin,
-                    documentIndex: i,
-                    polarity: '+'
-                });
-            }
+            const existingOrigin = doc.origin.quadIndex.get(key);
+            quadIndex.set(key, {
+                ...(existingOrigin || {}),
+                documentIndex: i,
+                polarity: '+'
+            });
         }
         // Fold retractions
@@ -97,14 +105,12 @@ export function merge(docs, options = {}) {
             }
             // Create quad origin for remove quads
-            const existingOrigin = doc.origin.quadIndex.get(quadKey(quad));
-            if (existingOrigin) {
-                quadIndex.set(quadKey(quad), {
-                    ...existingOrigin,
-                    documentIndex: i,
-                    polarity: '-'
-                });
-            }
+            const existingOrigin = doc.origin.quadIndex.get(key);
+            quadIndex.set(key, {
+                ...(existingOrigin || {}),
+                documentIndex: i,
+                polarity: '-'
+            });
         }
     }
@@ -119,7 +125,11 @@ export function merge(docs, options = {}) {
     };
     // Build final context (union of all contexts)
-    const finalContext = { ...DEFAULT_CONTEXT, ...options.context };
+    const finalContext = {
+        ...DEFAULT_CONTEXT,
+        ...options.context,
+        ...Object.fromEntries(accumulatedContext)
+    };
     // Enforce hard invariant
     const quadKeys = new Set(finalQuads.map(quadKey));

package/src/parse.js CHANGED Viewed

@@ -1,93 +1,128 @@
 import {
     DataFactory,
     expandIRI,
-    parseSemanticBlock,
     quadIndexKey,
     createLiteral,
     hash
 } from './utils.js';
-import { DEFAULT_CONTEXT, URL_REGEX, FENCE_REGEX, PREFIX_REGEX, HEADING_REGEX, UNORDERED_LIST_REGEX, BLOCKQUOTE_REGEX, STANDALONE_SUBJECT_REGEX, INLINE_CARRIER_PATTERNS } from './shared.js';
+import {
+    DEFAULT_CONTEXT,
+    URL_REGEX,
+    FENCE_REGEX,
+    PREFIX_REGEX,
+    HEADING_REGEX,
+    UNORDERED_LIST_REGEX,
+    BLOCKQUOTE_REGEX,
+    STANDALONE_SUBJECT_REGEX,
+    CARRIER_PATTERN_ARRAY,
+} from './constants.js';
+import {
-// Cache for fence regex patterns to avoid recreation
-const FENCE_CLOSE_PATTERNS = new Map();
+    getFenceClosePattern,
+    calcRangeInfo,
+    calcAttrsRange,
+    createToken,
+    createCarrier,
+    createListToken,
+    parseSemCached,
+    parseLangAndAttrs,
+    findMatchingBracket,
+    extractUrlFromBrackets,
+    extractAttributesFromText,
+    determineCarrierType,
+    calcCarrierRanges,
+    extractCleanText,
+    RDF_TYPE,
+    RDF_STATEMENT,
+    RDF_SUBJECT,
+    RDF_PREDICATE,
+    RDF_OBJECT,
+    createLeanOriginEntry,
+    resolveSubject,
+    resolveObject,
+    processTokenWithBlockTracking
+} from './shared.js';
-function getFenceClosePattern(fenceChar) {
-    if (!FENCE_CLOSE_PATTERNS.has(fenceChar)) {
-        FENCE_CLOSE_PATTERNS.set(fenceChar, new RegExp(`^(${fenceChar}{3,})`));
-    }
-    return FENCE_CLOSE_PATTERNS.get(fenceChar);
-}
-function parseLangAndAttrs(langAndAttrs) {
-    const spaceIndex = langAndAttrs.indexOf(' ');
-    const braceIndex = langAndAttrs.indexOf('{');
-    const langEnd = Math.min(
-        spaceIndex > -1 ? spaceIndex : Infinity,
-        braceIndex > -1 ? braceIndex : Infinity
-    );
-    return {
-        lang: langAndAttrs.substring(0, langEnd),
-        attrsText: langAndAttrs.substring(langEnd).match(/\{[^{}]*\}/)?.[0] || null
+export function parse(text, options = {}) {
+    const state = {
+        ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
+        df: options.dataFactory || DataFactory,
+        quads: [],
+        quadBuffer: new Map(),
+        removeSet: new Set(),
+        origin: {
+            quadIndex: new Map(),
+            blocks: new Map(),
+            documentStructure: []
+        },
+        currentSubject: null,
+        tokens: null,
+        currentTokenIndex: -1,
+        statements: [],
+        statementCandidates: new Map(),
+        currentBlock: null,
+        blockStack: []
     };
-}
-const semCache = {};
-const EMPTY_SEM = Object.freeze({ predicates: [], types: [], subject: null });
+    state.tokens = scanTokens(text);
+    // Single-pass processing: resolve prefixes AND process tokens together
+    for (let i = 0; i < state.tokens.length; i++) {
+        const token = state.tokens[i];
+        state.currentTokenIndex = i;
+        // Handle prefix tokens immediately during main pass
+        if (token.type === 'prefix') {
+            let resolvedIri = token.iri;
+            if (token.iri.includes(':')) {
+                const colonIndex = token.iri.indexOf(':');
+                const potentialPrefix = token.iri.substring(0, colonIndex);
+                const reference = token.iri.substring(colonIndex + 1);
+                if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
+                    resolvedIri = state.ctx[potentialPrefix] + reference;
+                }
+            }
+            state.ctx[token.prefix] = resolvedIri;
+            continue; // Skip token processor for prefixes
+        }
-function parseSemCached(attrs) {
-    if (!attrs) return EMPTY_SEM;
-    let sem = semCache[attrs];
-    if (!sem) {
-        sem = Object.freeze(parseSemanticBlock(attrs));
-        semCache[attrs] = sem;
+        // Process all other tokens
+        TOKEN_PROCESSORS[token.type]?.(token, state);
     }
-    return sem;
-}
-function calcRangeInfo(line, attrs, lineStart, prefixLength, valueLength) {
-    const wsLength = prefixLength < line.length && line[prefixLength] === ' ' ? 1 :
-        line.slice(prefixLength).match(/^\s+/)?.[0]?.length || 0;
-    const valueStartInLine = prefixLength + wsLength;
-    return {
-        valueRange: [lineStart + valueStartInLine, lineStart + valueStartInLine + valueLength],
-        attrsRange: calcAttrsRange(line, attrs, lineStart)
-    };
-}
+    // Optimized quad filtering - use Set.has() instead of array.includes()
+    const quadKeys = new Set();
+    for (const quad of state.quads) {
+        quadKeys.add(quadIndexKey(quad.subject, quad.predicate, quad.object));
+    }
-function calcAttrsRange(line, attrs, lineStart) {
-    if (!attrs) return null;
-    const attrsStartInLine = line.lastIndexOf(attrs);
-    return attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null;
-}
+    // Direct Set iteration - more efficient than filter()
+    const filteredRemove = [];
+    for (const quad of state.removeSet) {
+        const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
+        if (!quadKeys.has(key)) {
+            filteredRemove.push(quad);
+        }
+    }
-function createToken(type, range, text, attrs = null, attrsRange = null, valueRange = null, extra = {}) {
-    const token = { type, range, text, attrs, attrsRange, valueRange, ...extra };
-    Object.defineProperty(token, '_carriers', {
-        enumerable: false, writable: true, value: null
-    });
-    return token;
+    return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
 }
+// Cache for fence regex patterns - using shared utility
 function getCarriers(token) {
     if (token.type === 'code') return [];
     return token._carriers || (token._carriers = extractInlineCarriers(token.text, token.range[0]));
 }
-const createListToken = (type, line, lineStart, pos, match) => {
-    const attrs = match[4] || null;
-    const prefix = match[1].length + (match[2] ? match[2].length : 0);
-    const rangeInfo = calcRangeInfo(line, attrs, lineStart, prefix, match[3].length);
-    return createToken(type, [lineStart, pos - 1], match[3].trim(), attrs,
-        rangeInfo.attrsRange, rangeInfo.valueRange, { indent: match[1].length });
-};
 function scanTokens(text) {
     const tokens = [];
     const lines = text.split('\n');
     let pos = 0;
     let codeBlock = null;
-    // Direct lookup instead of linear search
     const PROCESSORS = [
         { type: 'fence', test: line => FENCE_REGEX.test(line.trim()), process: handleFence },
         { type: 'content', test: () => codeBlock, process: line => codeBlock.content.push(line) },
@@ -194,16 +229,6 @@ function scanTokens(text) {
     return tokens;
 }
-function createCarrier(type, text, attrs, attrsRange, valueRange, range, pos, extra = {}) {
-    return { type, text, attrs, attrsRange, valueRange, range, pos, ...extra };
-}
-// Pre-compiled carrier patterns for better performance
-const CARRIER_PATTERN_ARRAY = [
-    ['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
-    ['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
-];
 function extractInlineCarriers(text, baseOffset = 0) {
     const carriers = [];
     let pos = 0;
@@ -268,74 +293,6 @@ function extractInlineCarriers(text, baseOffset = 0) {
     return carriers;
 }
-function calcCarrierRanges(match, baseOffset, matchStart) {
-    const valueStart = baseOffset + matchStart + match[0].indexOf(match[1]);
-    const valueEnd = valueStart + match[1].length;
-    const attrsStart = baseOffset + matchStart + match[0].indexOf('{');
-    const attrsEnd = attrsStart + match[2].length + 2; // +2 for { and }
-    return {
-        valueRange: [valueStart, valueEnd],
-        attrsRange: [attrsStart + 1, attrsEnd - 1], // Exclude braces
-        range: [baseOffset + matchStart, attrsEnd],
-        pos: matchStart + match[0].length // pos should be relative to current text, not document
-    };
-}
-function findMatchingBracket(text, bracketStart) {
-    let bracketDepth = 1;
-    let bracketEnd = bracketStart + 1;
-    while (bracketEnd < text.length && bracketDepth > 0) {
-        if (text[bracketEnd] === '[') bracketDepth++;
-        else if (text[bracketEnd] === ']') bracketDepth--;
-        bracketEnd++;
-    }
-    return bracketDepth > 0 ? null : bracketEnd;
-}
-function extractUrlFromBrackets(text, bracketEnd) {
-    let url = null;
-    let spanEnd = bracketEnd;
-    if (text[spanEnd] === '(') {
-        const parenEnd = text.indexOf(')', spanEnd);
-        if (parenEnd !== -1) {
-            url = text.substring(spanEnd + 1, parenEnd);
-            spanEnd = parenEnd + 1;
-        }
-    }
-    return { url, spanEnd };
-}
-function extractAttributesFromText(text, spanEnd, baseOffset) {
-    let attrs = null;
-    let attrsRange = null;
-    const remaining = text.substring(spanEnd);
-    const wsMatch = remaining.match(/^\s+/);
-    const attrsStart = wsMatch ? wsMatch[0].length : 0;
-    if (remaining[attrsStart] === '{') {
-        const braceEnd = remaining.indexOf('}', attrsStart);
-        if (braceEnd !== -1) {
-            attrs = remaining.substring(attrsStart, braceEnd + 1);
-            const absStart = baseOffset + spanEnd + attrsStart;
-            attrsRange = [absStart, absStart + attrs.length];
-            spanEnd += braceEnd + 1;
-        }
-    }
-    return { attrs, attrsRange, finalSpanEnd: spanEnd };
-}
-function determineCarrierType(url) {
-    if (url && !url.startsWith('=')) {
-        return { carrierType: 'link', resourceIRI: url };
-    }
-    return { carrierType: 'span', resourceIRI: null };
-}
 function createBlockEntry(token, state) {
     const blockId = token._blockId || hash(`${token.type}:${token.range?.[0]}:${token.range?.[1]}`);
@@ -364,31 +321,6 @@ function createBlockEntry(token, state) {
     return blockEntry;
 }
-function extractCleanText(token) {
-    if (!token.text) return '';
-    let text = token.text;
-    // Remove semantic annotations
-    if (token.attrsRange) {
-        const beforeAttrs = text.substring(0, token.attrsRange[0] - (token.range?.[0] || 0));
-        const afterAttrs = text.substring(token.attrsRange[1] - (token.range?.[0] || 0));
-        text = beforeAttrs + afterAttrs;
-    }
-    // Clean based on token type
-    switch (token.type) {
-        case 'heading':
-            return text.replace(/^#+\s*/, '').trim();
-        case 'list':
-            return text.replace(/^[-*+]\s*/, '').trim();
-        case 'blockquote':
-            return text.replace(/^>\s*/, '').trim();
-        default:
-            return text.trim();
-    }
-}
 function enrichBlockFromAnnotation(blockEntry, sem, carrier, state) {
     // Update subject if available
     if (sem.subject && sem.subject !== 'RESET') {
@@ -539,17 +471,8 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
         // Detect rdf:Statement pattern during single-pass parsing
         detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
-        // Create lean origin entry - avoid spread operator for better performance
-        const originEntry = {
-            blockId: block.id,
-            range: block.range,
-            carrierType: block.carrierType,
-            subject: subject.value,
-            predicate: predicate.value,
-            context: block.context, // Direct reference instead of spread
-            polarity: meta?.remove ? '-' : '+',
-            value: block.text || ''
-        };
+        // Create lean origin entry using shared utility
+        const originEntry = createLeanOriginEntry(block, subject, predicate, meta);
         quadIndex.set(quadKey, originEntry);
@@ -563,13 +486,6 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
     }
 }
-// Extract RDF constants once at module level for efficiency
-const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
-const RDF_STATEMENT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement';
-const RDF_SUBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject';
-const RDF_PREDICATE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate';
-const RDF_OBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#object';
 function detectStatementPatternSinglePass(quad, dataFactory, meta, statements = null, statementCandidates = null) {
     // Skip if not called from parse context (for testing compatibility)
     if (!statements || !statementCandidates) return;
@@ -619,30 +535,6 @@ function detectStatementPatternSinglePass(quad, dataFactory, meta, statements =
     }
 }
-const resolveFragment = (fragment, state) => {
-    if (!state.currentSubject) return null;
-    const subjectValue = state.currentSubject.value;
-    const hashIndex = subjectValue.indexOf('#');
-    const baseIRI = hashIndex > -1 ? subjectValue.slice(0, hashIndex) : subjectValue;
-    return state.df.namedNode(baseIRI + '#' + fragment);
-};
-function resolveSubject(sem, state) {
-    if (!sem.subject) return null;
-    if (sem.subject === 'RESET') {
-        state.currentSubject = null;
-        return null;
-    }
-    if (sem.subject.startsWith('=#')) return resolveFragment(sem.subject.substring(2), state);
-    return state.df.namedNode(expandIRI(sem.subject, state.ctx));
-}
-function resolveObject(sem, state) {
-    if (!sem.object) return null;
-    if (sem.object.startsWith('#')) return resolveFragment(sem.object.substring(1), state);
-    return state.df.namedNode(expandIRI(sem.object, state.ctx));
-}
 const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
     const expandedType = expandIRI(typeIRI, state.ctx);
     const typeInfo = typeof entryIndex === 'object' ? entryIndex : { entryIndex, remove: false };
@@ -748,123 +640,9 @@ function processStandaloneSubject(token, state) {
 }
 const TOKEN_PROCESSORS = {
-    heading: (token, state) => {
-        const blockEntry = createBlockEntry(token, state);
-        state.currentBlock = blockEntry;
-        state.blockStack.push(blockEntry.id);
-        processTokenAnnotations(token, state, token.type);
-        state.blockStack.pop();
-        state.currentBlock = state.blockStack.length > 0 ?
-            state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
-    },
-    code: (token, state) => {
-        const blockEntry = createBlockEntry(token, state);
-        state.currentBlock = blockEntry;
-        state.blockStack.push(blockEntry.id);
-        processTokenAnnotations(token, state, token.type);
-        state.blockStack.pop();
-        state.currentBlock = state.blockStack.length > 0 ?
-            state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
-    },
-    blockquote: (token, state) => {
-        const blockEntry = createBlockEntry(token, state);
-        state.currentBlock = blockEntry;
-        state.blockStack.push(blockEntry.id);
-        processTokenAnnotations(token, state, token.type);
-        state.blockStack.pop();
-        state.currentBlock = state.blockStack.length > 0 ?
-            state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
-    },
-    para: (token, state) => {
-        const blockEntry = createBlockEntry(token, state);
-        state.currentBlock = blockEntry;
-        state.blockStack.push(blockEntry.id);
-        processStandaloneSubject(token, state);
-        processTokenAnnotations(token, state, token.type);
-        state.blockStack.pop();
-        state.currentBlock = state.blockStack.length > 0 ?
-            state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
-    },
-    list: (token, state) => {
-        const blockEntry = createBlockEntry(token, state);
-        state.currentBlock = blockEntry;
-        state.blockStack.push(blockEntry.id);
-        processTokenAnnotations(token, state, token.type);
-        state.blockStack.pop();
-        state.currentBlock = state.blockStack.length > 0 ?
-            state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
-    },
+    heading: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry),
+    code: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry),
+    blockquote: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry),
+    para: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry, [processStandaloneSubject]),
+    list: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry),
 };
-export function parse(text, options = {}) {
-    const state = {
-        ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
-        df: options.dataFactory || DataFactory,
-        quads: [],
-        quadBuffer: new Map(),
-        removeSet: new Set(),
-        origin: {
-            quadIndex: new Map(),
-            blocks: new Map(),
-            documentStructure: []
-        },
-        currentSubject: null,
-        tokens: null,
-        currentTokenIndex: -1,
-        statements: [],
-        statementCandidates: new Map(), // Track incomplete rdf:Statement patterns
-        currentBlock: null,
-        blockStack: []
-    };
-    state.tokens = scanTokens(text);
-    // Single loop instead of filter+forEach for better performance
-    for (const token of state.tokens) {
-        if (token.type === 'prefix') {
-            let resolvedIri = token.iri;
-            if (token.iri.includes(':')) {
-                const colonIndex = token.iri.indexOf(':');
-                const potentialPrefix = token.iri.substring(0, colonIndex);
-                const reference = token.iri.substring(colonIndex + 1);
-                if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
-                    resolvedIri = state.ctx[potentialPrefix] + reference;
-                }
-            }
-            state.ctx[token.prefix] = resolvedIri;
-        }
-    }
-    for (let i = 0; i < state.tokens.length; i++) {
-        const token = state.tokens[i];
-        state.currentTokenIndex = i;
-        TOKEN_PROCESSORS[token.type]?.(token, state);
-    }
-    // Optimize array operations - avoid Array.from() and filter()
-    const quadKeys = new Set();
-    for (const quad of state.quads) {
-        quadKeys.add(quadIndexKey(quad.subject, quad.predicate, quad.object));
-    }
-    // Direct iteration instead of Array.from() + filter()
-    const filteredRemove = [];
-    for (const quad of state.removeSet) {
-        const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
-        if (!quadKeys.has(key)) {
-            filteredRemove.push(quad);
-        }
-    }
-    return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
-}

package/src/render.js CHANGED Viewed

@@ -6,7 +6,12 @@ import {
     parseSemanticBlock,
     hash
 } from './utils.js';
-import { DEFAULT_CONTEXT } from './shared.js';
+import {
+    escapeHtml,
+    getIndentLevel,
+    processPredicates
+} from './shared.js';
+import { DEFAULT_CONTEXT } from './constants.js';
 /**
  * Render MD-LD to HTML+RDFa
@@ -302,17 +307,6 @@ function parseMarkdownList(markdownList, blocks, state) {
     return html;
 }
-/**
- * Get indent level from source text
- */
-function getIndentLevel(block, sourceText) {
-    if (!block.range || !sourceText) return 0;
-    const text = sourceText.substring(block.range.start, block.range.end);
-    const indentMatch = text.match(/^(\s*)/);
-    return indentMatch ? indentMatch[1].length : 0;
-}
 /**
  * Render a single block
  */
@@ -408,26 +402,9 @@ function buildRDFaAttrsFromBlock(block, ctx) {
         attrs.push(`typeof="${escapeHtml(types)}"`);
     }
-    // Predicates
+    // Predicates using shared utility
     if (block.predicates && block.predicates.length > 0) {
-        const literalProps = [];
-        const objectProps = [];
-        const reverseProps = [];
-        block.predicates.forEach(pred => {
-            const iri = typeof pred === 'string' ? pred : pred.iri;
-            const expanded = expandIRI(iri, ctx);
-            const shortened = shortenIRI(expanded, ctx);
-            const form = typeof pred === 'string' ? '' : (pred.form || '');
-            if (form === '!') {
-                reverseProps.push(shortened);
-            } else if (form === '?') {
-                objectProps.push(shortened);
-            } else {
-                literalProps.push(shortened);
-            }
-        });
+        const { literalProps, objectProps, reverseProps } = processPredicates(block.predicates, ctx);
         if (literalProps.length > 0) {
             attrs.push(`property="${escapeHtml(literalProps.join(' '))}"`);
@@ -474,17 +451,3 @@ function wrapWithRDFaContext(html, ctx) {
     return `<div${prefixDecl}${vocabDecl}>${html}</div>`;
 }
-/**
- * Escape HTML special characters
- */
-function escapeHtml(text) {
-    const map = {
-        '&': '&amp;',
-        '<': '&lt;',
-        '>': '&gt;',
-        '"': '&quot;',
-        "'": '&#39;'
-    };
-    return String(text || '').replace(/[&<>"']/g, m => map[m]);
-}

package/src/shared.js CHANGED Viewed

@@ -1,37 +1,5 @@
-/**
- * Shared utilities for MD-LD Parser and Renderer
- * Ensures DRY code and consistent CommonMark processing
- */
-export const DEFAULT_CONTEXT = {
-    '@vocab': "http://www.w3.org/2000/01/rdf-schema#",
-    rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
-    rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
-    xsd: 'http://www.w3.org/2001/XMLSchema#',
-    sh: "http://www.w3.org/ns/shacl#",
-    prov: 'http://www.w3.org/ns/prov#'
-};
-// CommonMark patterns - shared between parser and renderer
-export const URL_REGEX = /^(https?|ftp|mailto|tag|nih|urn|uuid|did|web|ipfs|ipns|data|file|urn:uuid):/;
-export const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
-export const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
-export const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
-export const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
-export const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
-export const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
-// Inline carrier patterns - shared extraction logic
-export const INLINE_CARRIER_PATTERNS = {
-    EMPHASIS: /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y,
-    CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
-};
-// Pre-compiled carrier patterns for performance
-export const CARRIER_PATTERN_ARRAY = [
-    ['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
-    ['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
-];
+import { DEFAULT_CONTEXT, STANDALONE_SUBJECT_REGEX, FENCE_REGEX, PREFIX_REGEX, HEADING_REGEX, UNORDERED_LIST_REGEX, BLOCKQUOTE_REGEX } from './constants.js';
+import { parseSemanticBlock, expandIRI, shortenIRI } from './utils.js';
 // Cache for fence regex patterns
 export const FENCE_CLOSE_PATTERNS = new Map();
@@ -156,6 +124,172 @@ export const PROCESSORS = [
     { test: line => true, process: null } // Default: paragraph
 ];
+// Token scanning processors - shared between parser and renderer
+export const TOKEN_PROCESSORS = [
+    { type: 'fence', test: line => FENCE_REGEX.test(line.trim()), process: null }, // Will be overridden in parse.js
+    { type: 'content', test: line => false, process: null }, // Will be overridden in parse.js
+    { type: 'prefix', test: line => PREFIX_REGEX.test(line), process: null }, // Will be overridden in parse.js
+    { type: 'heading', test: line => HEADING_REGEX.test(line), process: null }, // Will be overridden in parse.js
+    { type: 'list', test: line => UNORDERED_LIST_REGEX.test(line), process: null }, // Will be overridden in parse.js
+    { type: 'blockquote', test: line => BLOCKQUOTE_REGEX.test(line), process: null }, // Will be overridden in parse.js
+    { type: 'para', test: line => line.trim(), process: null } // Will be overridden in parse.js
+];
+// Language and attributes parsing
+export function parseLangAndAttrs(langAndAttrs) {
+    const spaceIndex = langAndAttrs.indexOf(' ');
+    const braceIndex = langAndAttrs.indexOf('{');
+    const langEnd = Math.min(
+        spaceIndex > -1 ? spaceIndex : Infinity,
+        braceIndex > -1 ? braceIndex : Infinity
+    );
+    return {
+        lang: langAndAttrs.substring(0, langEnd),
+        attrsText: langAndAttrs.substring(langEnd).match(/\{[^{}]*\}/)?.[0] || null
+    };
+}
+// Carrier extraction utilities
+export function findMatchingBracket(text, bracketStart) {
+    let bracketDepth = 1;
+    let bracketEnd = bracketStart + 1;
+    while (bracketEnd < text.length && bracketDepth > 0) {
+        if (text[bracketEnd] === '[') bracketDepth++;
+        else if (text[bracketEnd] === ']') bracketDepth--;
+        bracketEnd++;
+    }
+    return bracketDepth > 0 ? null : bracketEnd;
+}
+export function extractUrlFromBrackets(text, bracketEnd) {
+    let url = null;
+    let spanEnd = bracketEnd;
+    if (text[spanEnd] === '(') {
+        const parenEnd = text.indexOf(')', spanEnd);
+        if (parenEnd !== -1) {
+            url = text.substring(spanEnd + 1, parenEnd);
+            spanEnd = parenEnd + 1;
+        }
+    }
+    return { url, spanEnd };
+}
+export function extractAttributesFromText(text, spanEnd, baseOffset) {
+    let attrs = null;
+    let attrsRange = null;
+    const remaining = text.substring(spanEnd);
+    const wsMatch = remaining.match(/^\s+/);
+    const attrsStart = wsMatch ? wsMatch[0].length : 0;
+    if (remaining[attrsStart] === '{') {
+        const braceEnd = remaining.indexOf('}', attrsStart);
+        if (braceEnd !== -1) {
+            attrs = remaining.substring(attrsStart, braceEnd + 1);
+            const absStart = baseOffset + spanEnd + attrsStart;
+            attrsRange = [absStart, absStart + attrs.length];
+            spanEnd += braceEnd + 1;
+        }
+    }
+    return { attrs, attrsRange, finalSpanEnd: spanEnd };
+}
+export function determineCarrierType(url) {
+    if (url && !url.startsWith('=')) {
+        return { carrierType: 'link', resourceIRI: url };
+    }
+    return { carrierType: 'span', resourceIRI: null };
+}
+export function calcCarrierRanges(match, baseOffset, matchStart) {
+    const valueStart = baseOffset + matchStart + match[0].indexOf(match[1]);
+    const valueEnd = valueStart + match[1].length;
+    const attrsStart = baseOffset + matchStart + match[0].indexOf('{');
+    const attrsEnd = attrsStart + match[2].length + 2; // +2 for { and }
+    return {
+        valueRange: [valueStart, valueEnd],
+        attrsRange: [attrsStart + 1, attrsEnd - 1], // Exclude braces
+        range: [baseOffset + matchStart, attrsEnd],
+        pos: matchStart + match[0].length // pos should be relative to current text, not document
+    };
+}
+// Clean text extraction utilities
+export function extractCleanText(token) {
+    if (!token.text) return '';
+    let text = token.text;
+    // Remove semantic annotations
+    if (token.attrsRange) {
+        const beforeAttrs = text.substring(0, token.attrsRange[0] - (token.range?.[0] || 0));
+        const afterAttrs = text.substring(token.attrsRange[1] - (token.range?.[0] || 0));
+        text = beforeAttrs + afterAttrs;
+    }
+    // Clean based on token type
+    switch (token.type) {
+        case 'heading':
+            return text.replace(/^#+\s*/, '').trim();
+        case 'list':
+            return text.replace(/^[-*+]\s*/, '').trim();
+        case 'blockquote':
+            return text.replace(/^>\s*/, '').trim();
+        default:
+            return text.trim();
+    }
+}
+// Quad emission utilities
+export const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
+export const RDF_STATEMENT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement';
+export const RDF_SUBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject';
+export const RDF_PREDICATE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate';
+export const RDF_OBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#object';
+export function createLeanOriginEntry(block, subject, predicate, meta = null) {
+    return {
+        blockId: block.id,
+        range: block.range,
+        carrierType: block.carrierType,
+        subject: subject.value,
+        predicate: predicate.value,
+        context: block.context, // Direct reference instead of spread
+        polarity: meta?.remove ? '-' : '+',
+        value: block.text || ''
+    };
+}
+// Fragment resolution utilities
+export function resolveFragment(fragment, currentSubject, dataFactory) {
+    if (!currentSubject) return null;
+    const subjectValue = currentSubject.value;
+    const hashIndex = subjectValue.indexOf('#');
+    const baseIRI = hashIndex > -1 ? subjectValue.slice(0, hashIndex) : subjectValue;
+    return dataFactory.namedNode(baseIRI + '#' + fragment);
+}
+export function resolveSubject(sem, state) {
+    if (!sem.subject) return null;
+    if (sem.subject === 'RESET') {
+        state.currentSubject = null;
+        return null;
+    }
+    if (sem.subject.startsWith('=#')) return resolveFragment(sem.subject.substring(2), state.currentSubject, state.df);
+    return state.df.namedNode(expandIRI(sem.subject, state.ctx));
+}
+export function resolveObject(sem, state) {
+    if (!sem.object) return null;
+    if (sem.object.startsWith('#')) return resolveFragment(sem.object.substring(1), state.currentSubject, state.df);
+    return state.df.namedNode(expandIRI(sem.object, state.ctx));
+}
 // HTML escaping - shared utility
 export function escapeHtml(text) {
     if (!text) return '';
@@ -167,6 +301,77 @@ export function escapeHtml(text) {
         .replace(/'/g, '&#x27;');
 }
+// RDF term type checking utilities - shared across modules
+export function isLiteral(term) {
+    return term?.termType === 'Literal';
+}
+export function isNamedNode(term) {
+    return term?.termType === 'NamedNode';
+}
+export function isRdfType(term) {
+    return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
+}
+// IRI prefix extraction utility
+export function getPrefixFromIRI(iri, context) {
+    if (!iri) return null;
+    const shortened = shortenIRI(iri, context);
+    if (shortened.includes(':')) {
+        return shortened.split(':')[0];
+    }
+    return null;
+}
+// Prefix collection utility - used by generate.js
+export function collectUsedPrefixes(subjectGroups, context) {
+    const usedPrefixes = new Set();
+    for (const subjectQuads of subjectGroups.values()) {
+        for (const quad of subjectQuads) {
+            // Check subject prefix
+            const subjectPrefix = getPrefixFromIRI(quad.subject.value, context);
+            if (subjectPrefix) usedPrefixes.add(subjectPrefix);
+            // Check predicate prefix
+            const predicatePrefix = getPrefixFromIRI(quad.predicate.value, context);
+            if (predicatePrefix) usedPrefixes.add(predicatePrefix);
+            // Check object prefix if it's a named node
+            if (isNamedNode(quad.object)) {
+                const objectPrefix = getPrefixFromIRI(quad.object.value, context);
+                if (objectPrefix) usedPrefixes.add(objectPrefix);
+            }
+            // Check datatype prefix if present
+            if (quad.object.datatype && quad.object.datatype.value) {
+                const datatypePrefix = getPrefixFromIRI(quad.object.datatype.value, context);
+                if (datatypePrefix) usedPrefixes.add(datatypePrefix);
+            }
+        }
+    }
+    return usedPrefixes;
+}
+// Token processing utility - eliminates duplication in TOKEN_PROCESSORS
+export function processTokenWithBlockTracking(token, state, processAnnotations, createBlockEntry, additionalProcessors = []) {
+    const blockEntry = createBlockEntry(token, state);
+    state.currentBlock = blockEntry;
+    state.blockStack.push(blockEntry.id);
+    // Run any additional processors first
+    additionalProcessors.forEach(processor => processor(token, state));
+    // Process annotations
+    processAnnotations(token, state, token.type);
+    state.blockStack.pop();
+    state.currentBlock = state.blockStack.length > 0 ?
+        state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
+}
 // Quad key generation - shared between parser and renderer
 export function quadIndexKey(subject, predicate, object) {
     const datatype = object.datatype?.value || '';
@@ -199,14 +404,126 @@ export function resolveSubjectType(subjectDecl) {
     return 'full-iri';
 }
-// Fragment resolution - shared logic
-export function resolveFragment(fragment, currentSubject) {
-    if (!currentSubject) {
-        throw new Error('Fragment requires current subject');
+// Constants - shared across modules (bundle-size optimized)
+export const XSD_STRING = 'http://www.w3.org/2001/XMLSchema#string';
+// Optimized sorting utilities - inline for better minification
+export function sortQuadsByPredicate(quads) {
+    return quads.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
+}
+// Optimized text generation - template literals for smaller bundle
+export const generatePrefixDeclaration = (prefix, namespace) => `[${prefix}] <${namespace}>\n`;
+export function generateLiteralText(quad, context) {
+    const predShort = shortenIRI(quad.predicate.value, context);
+    let annotation = predShort;
+    if (quad.object.language) {
+        annotation += ` @${quad.object.language}`;
+    } else if (quad.object.datatype.value !== XSD_STRING) {
+        annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
+    }
+    return `[${quad.object.value}] {${annotation}}\n`;
+}
+export const generateObjectText = (quad, context) => {
+    const objShort = shortenIRI(quad.object.value, context);
+    const predShort = shortenIRI(quad.predicate.value, context);
+    return `[${objShort}] {+${objShort} ?${predShort}}\n`;
+};
+// Optimized quad filtering - destructuring for smaller minified output
+export function filterQuadsByType(subjectQuads) {
+    const types = [], literals = [], objects = [];
+    for (const q of subjectQuads) {
+        if (isRdfType(q.predicate)) {
+            types.push(q);
+        } else if (isLiteral(q.object)) {
+            literals.push(q);
+        } else if (isNamedNode(q.object)) {
+            objects.push(q);
+        }
     }
-    const fragmentName = fragment.substring(2); // Remove =#
-    const baseIRI = currentSubject.value;
-    const hashIndex = baseIRI.indexOf('#');
-    const base = hashIndex > -1 ? baseIRI.slice(0, hashIndex) : baseIRI;
-    return base + '#' + fragmentName;
+    return { types, literals, objects };
+}
+// Predicate processing utilities - common RDFa patterns
+export function processPredicates(predicates, ctx) {
+    const literalProps = [];
+    const objectProps = [];
+    const reverseProps = [];
+    predicates.forEach(pred => {
+        const iri = typeof pred === 'string' ? pred : pred.iri;
+        const expanded = expandIRI(iri, ctx);
+        const shortened = shortenIRI(expanded, ctx);
+        const form = typeof pred === 'string' ? '' : (pred.form || '');
+        if (form === '!') {
+            reverseProps.push(shortened);
+        } else if (form === '?') {
+            objectProps.push(shortened);
+        } else {
+            literalProps.push(shortened);
+        }
+    });
+    return { literalProps, objectProps, reverseProps };
+}
+// Deterministic sorting utilities - ensure consistent output
+export function sortDeterministic(array, keyFn) {
+    return array.sort((a, b) => {
+        const keyA = keyFn(a);
+        const keyB = keyFn(b);
+        return keyA.localeCompare(keyB);
+    });
+}
+export function sortQuadsDeterministically(quads) {
+    return quads.sort((a, b) => {
+        // Deterministic sorting: subject -> predicate -> object
+        const sComp = a.subject.value.localeCompare(b.subject.value);
+        if (sComp !== 0) return sComp;
+        const pComp = a.predicate.value.localeCompare(b.predicate.value);
+        if (pComp !== 0) return pComp;
+        const oA = isLiteral(a.object) ? a.object.value : a.object.value;
+        const oB = isLiteral(b.object) ? b.object.value : b.object.value;
+        return oA.localeCompare(oB);
+    });
+}
+// Optimized deterministic prefix generation
+export function generateDeterministicPrefixes(context, usedPrefixes) {
+    const sortedEntries = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
+    let text = '';
+    for (const [prefix, namespace] of sortedEntries) {
+        if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix] && usedPrefixes.has(prefix)) {
+            text += generatePrefixDeclaration(prefix, namespace);
+        }
+    }
+    return text;
+}
+// Memory-efficient block creation
+export function createOptimizedBlockEntry(token, state) {
+    const id = hash(`${token.range[0]}-${token.range[1]}-${token.text.slice(0, 50)}`);
+    const block = {
+        id,
+        type: token.type,
+        carrierType: token.type,
+        range: token.range,
+        text: token.text,
+        carriers: [],
+        predicates: [],
+        subject: state.currentSubject,
+        context: { ...state.ctx }
+    };
+    state.origin.blocks.set(id, block);
+    return block;
 }

package/src/utils.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { URL_REGEX, DEFAULT_CONTEXT } from './shared.js';
+import { URL_REGEX, DEFAULT_CONTEXT } from './constants.js';
 // Base Term class for RDF/JS compatibility
 export class Term {