npm - mdld-parse - Versions diffs - 0.1.0 → 0.2.2 - Mend

mdld-parse 0.1.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/index.js CHANGED Viewed

@@ -1,882 +1,552 @@
-/**
- * MD-LD Parser — Markdown-Linked Data to RDF Quads
- *
- * Zero-dependency, streaming-capable parser for MD-LD documents.
- * Outputs RDF/JS compatible quads.
- */
-// ============================================================================
-// RDF/JS Data Factory (Minimal Implementation)
-// ============================================================================
-const DefaultDataFactory = {
-  namedNode: (value) => ({ termType: 'NamedNode', value }),
-  blankNode: (value = `b${Math.random().toString(36).slice(2, 11)}`) => ({
-    termType: 'BlankNode',
-    value
-  }),
-  literal: (value, languageOrDatatype) => {
-    if (typeof languageOrDatatype === 'string') {
-      return {
-        termType: 'Literal',
-        value,
-        language: languageOrDatatype,
-        datatype: { termType: 'NamedNode', value: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString' }
-      };
-    }
-    return {
-      termType: 'Literal',
-      value,
-      language: '',
-      datatype: languageOrDatatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' }
-    };
-  },
-  quad: (subject, predicate, object, graph) => ({
-    subject,
-    predicate,
-    object,
-    graph: graph || DefaultDataFactory.defaultGraph()
-  }),
-  defaultGraph: () => ({ termType: 'DefaultGraph', value: '' })
+const DEFAULT_CONTEXT = {
+    '@vocab': 'http://schema.org/',
+    rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+    rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
+    xsd: 'http://www.w3.org/2001/XMLSchema#',
+    schema: 'http://schema.org/'
 };
-// ============================================================================
-// YAML-LD Parser (Minimal YAML subset for frontmatter)
-// ============================================================================
-function parseYAMLLD(yamlText) {
-  try {
-    const lines = yamlText.trim().split('\n');
-    const obj = {};
-    let currentKey = null;
-    let indent = 0;
-    let inArray = false;
-    let currentArray = null;
-    for (let line of lines) {
-      const trimmed = line.trim();
-      if (!trimmed || trimmed.startsWith('#')) continue;
-      const leadingSpaces = line.match(/^\s*/)[0].length;
-      // Array item
-      if (trimmed.startsWith('- ')) {
-        if (!inArray) {
-          currentArray = [];
-          inArray = true;
-        }
-        const value = trimmed.substring(2).trim();
-        currentArray.push(parseYAMLValue(value));
-        continue;
-      }
-      // Key-value pair
-      const colonIndex = trimmed.indexOf(':');
-      if (colonIndex > 0) {
-        const key = trimmed.substring(0, colonIndex).trim().replace(/^['"]|['"]$/g, '');
-        let value = trimmed.substring(colonIndex + 1).trim();
-        // Save previous array
-        if (inArray && currentKey && currentArray) {
-          obj[currentKey] = currentArray;
-          inArray = false;
-          currentArray = null;
-        }
-        currentKey = key;
-        if (!value) {
-          // Empty value or nested object/array coming
-          indent = leadingSpaces;
-          continue;
+const DataFactory = {
+    namedNode: (v) => ({ termType: 'NamedNode', value: v }),
+    blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
+    literal: (v, lang) => {
+        if (typeof lang === 'string') {
+            return { termType: 'Literal', value: v, language: lang, datatype: DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString') };
         }
+        return { termType: 'Literal', value: v, language: '', datatype: lang || DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#string') };
+    },
+    quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
+};
-        obj[key] = parseYAMLValue(value);
-      }
-    }
+function hash(str) {
+    let h = 5381;
+    for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
+    return Math.abs(h).toString(16).slice(0, 12);
+}
-    // Save last array
-    if (inArray && currentKey && currentArray) {
-      obj[currentKey] = currentArray;
+function expandIRI(term, ctx) {
+    if (!term) return null;
+    const t = term.trim();
+    if (t.match(/^https?:/)) return t;
+    if (t.includes(':')) {
+        const [prefix, ref] = t.split(':', 2);
+        return ctx[prefix] ? ctx[prefix] + ref : t;
     }
-    return obj;
-  } catch (e) {
-    console.warn('YAML-LD parse error:', e);
-    return {};
-  }
+    return (ctx['@vocab'] || '') + t;
 }
-function parseYAMLValue(value) {
-  value = value.replace(/^['"]|['"]$/g, '');
-  if (value === 'true') return true;
-  if (value === 'false') return false;
-  if (value === 'null') return null;
-  if (/^-?\d+$/.test(value)) return parseInt(value, 10);
-  if (/^-?\d+\.\d+$/.test(value)) return parseFloat(value);
+// Annotation parsing - explicit string operations
+function parseAnnotation(raw) {
+    try {
+        const cleaned = raw.replace(/^\{|\}$/g, '').trim();
+        if (!cleaned) return { subject: null, entries: [], datatype: null, language: null };
-  return value;
-}
-// ============================================================================
-// Markdown Tokenizer (Minimal - focuses on structure)
-// ============================================================================
-function tokenizeMarkdown(text) {
-  const tokens = [];
-  const lines = text.split('\n');
-  let i = 0;
-  let inCodeBlock = false;
-  let codeFence = null;
-  let codeLang = null;
-  let codeAttrs = {};
-  let codeLines = [];
-  while (i < lines.length) {
-    const line = lines[i];
-    const trimmed = line.trim();
-    // Fenced code block ```lang {attrs}
-    const fenceMatch = line.match(/^(```+)(.*)$/);
-    if (fenceMatch) {
-      const [, fence, rest] = fenceMatch;
-      if (!inCodeBlock) {
-        // Start of code block
-        inCodeBlock = true;
-        codeFence = fence;
-        codeLines = [];
-        codeLang = null;
-        codeAttrs = {};
-        const restTrimmed = rest.trim();
-        if (restTrimmed) {
-          // Extract language (first token that is not an attribute block)
-          const attrIndex = restTrimmed.indexOf('{');
-          const langPart = attrIndex >= 0 ? restTrimmed.substring(0, attrIndex).trim() : restTrimmed;
-          if (langPart) {
-            codeLang = langPart.split(/\s+/)[0];
-          }
-          // Attributes after language: ```lang {#id typeof="..."}
-          const attrMatch = restTrimmed.match(/\{[^}]+\}/);
-          if (attrMatch) {
-            codeAttrs = parseAttributes(attrMatch[0]);
-          }
+        // Validate quotes
+        let quoteCount = 0;
+        for (let i = 0; i < cleaned.length; i++) {
+            if (cleaned[i] === '"') quoteCount++;
+        }
+        if (quoteCount % 2 !== 0) {
+            console.warn(`Unbalanced quotes in annotation: ${raw}`);
+            return { subject: null, entries: [], datatype: null, language: null };
         }
-        i++;
-        continue;
-      }
-      // Closing fence (must match opening fence length)
-      if (inCodeBlock && fence === codeFence) {
-        tokens.push({
-          type: 'code',
-          lang: codeLang,
-          text: codeLines.join('\n'),
-          attrs: codeAttrs
-        });
+        const result = { subject: null, entries: [], datatype: null, language: null };
+        const parts = cleaned.split(/\s+/).filter(p => p);
+        for (const part of parts) {
+            if (part === '=') {
+                result.subject = 'RESET';
+            } else if (part.startsWith('=')) {
+                result.subject = part.substring(1);
+            } else if (part.startsWith('@')) {
+                result.language = part.substring(1);
+            } else if (part.startsWith('^^')) {
+                result.datatype = part.substring(2);
+            } else if (part.startsWith('^')) {
+                result.entries.push({ kind: 'property', predicate: part.substring(1), direction: 'reverse' });
+            } else if (part.startsWith('.')) {
+                result.entries.push({ kind: 'type', classIRI: part.substring(1) });
+            } else {
+                result.entries.push({ kind: 'property', predicate: part, direction: 'forward' });
+            }
+        }
-        inCodeBlock = false;
-        codeFence = null;
-        codeLang = null;
-        codeAttrs = {};
-        codeLines = [];
+        if (result.entries.length === 0 && !result.subject) {
+            console.warn(`No valid entries found in annotation: ${raw}`);
+            return { subject: null, entries: [], datatype: null, language: null };
+        }
-        i++;
-        continue;
-      }
+        return result;
+    } catch (error) {
+        console.error(`Error parsing annotation ${raw}:`, error);
+        return { subject: null, entries: [], datatype: null, language: null };
     }
+}
-    if (inCodeBlock) {
-      codeLines.push(line);
-      i++;
-      continue;
-    }
+// Token scanning - consolidated helpers
+function scanTokens(text) {
+    const tokens = [];
+    const lines = text.split('\n');
+    let pos = 0;
+    let codeBlock = null;
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        const lineStart = pos;
+        pos += line.length + 1;
+        // Code blocks
+        if (line.startsWith('```')) {
+            if (!codeBlock) {
+                const fence = line.match(/^(`{3,})(.*)/);
+                codeBlock = {
+                    fence: fence[1],
+                    start: lineStart,
+                    content: [],
+                    lang: fence[2].trim().split('{')[0].trim(),
+                    attrs: fence[2].match(/\{[^}]+\}/)?.[0]
+                };
+            } else if (line.startsWith(codeBlock.fence)) {
+                tokens.push({
+                    type: 'code',
+                    range: [codeBlock.start, lineStart],
+                    text: codeBlock.content.join('\n'),
+                    lang: codeBlock.lang,
+                    attrs: codeBlock.attrs
+                });
+                codeBlock = null;
+            }
+            continue;
+        }
-    // Heading with potential attributes on next line
-    const headingMatch = line.match(/^(#{1,6})\s+(.+?)(\s*\{[^}]+\})?$/);
-    if (headingMatch) {
-      const [, hashes, text, attrs] = headingMatch;
-      let attributes = attrs ? parseAttributes(attrs) : {};
-      // Check next line for attributes
-      if (!attrs && i + 1 < lines.length) {
-        const nextLine = lines[i + 1].trim();
-        if (nextLine.match(/^\{[^}]+\}$/)) {
-          attributes = parseAttributes(nextLine);
-          i++; // Skip the attribute line
+        if (codeBlock) {
+            codeBlock.content.push(line);
+            continue;
         }
-      }
-      tokens.push({
-        type: 'heading',
-        depth: hashes.length,
-        text: text.trim(),
-        attrs: attributes
-      });
-      i++;
-      continue;
-    }
-    // Task list item
-    const taskMatch = line.match(/^(\s*)([-*+])\s+\[([ xX])\]\s+(.+?)(\s*\{[^}]+\})?$/);
-    if (taskMatch) {
-      const [, indent, marker, checked, text, attrs] = taskMatch;
-      tokens.push({
-        type: 'taskItem',
-        indent: indent.length,
-        checked: checked.toLowerCase() === 'x',
-        text: text.trim(),
-        attrs: attrs ? parseAttributes(attrs) : {}
-      });
-      i++;
-      continue;
-    }
+        // Prefix declarations
+        const prefixMatch = line.match(/^\[([^\]]+)\]\s*\{:\s*([^}]+)\}/);
+        if (prefixMatch) {
+            tokens.push({ type: 'prefix', prefix: prefixMatch[1], iri: prefixMatch[2].trim() });
+            continue;
+        }
-    // Regular list item (must come after task item check)
-    const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(\s*\{[^}]+\})?$/);
-    if (listMatch) {
-      const [, indent, marker, text, attrs] = listMatch;
-      // If the list item has trailing attribute syntax (e.g. - [Link](#id){rel="hasPart"})
-      // treat those attributes as part of the inline content so that parseInline
-      // can correctly interpret them on the link/span itself.
-      const combinedText = attrs ? `${text}${attrs.trim()}` : text;
-      tokens.push({
-        type: 'listItem',
-        indent: indent.length,
-        text: combinedText.trim(),
-        attrs: attrs ? parseAttributes(attrs) : {}
-      });
-      i++;
-      continue;
-    }
+        // Headings
+        const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
+        if (headingMatch) {
+            tokens.push({
+                type: 'heading',
+                depth: headingMatch[1].length,
+                range: [lineStart, pos],
+                text: headingMatch[2].trim(),
+                attrs: headingMatch[3]
+            });
+            continue;
+        }
-    // Paragraph
-    if (trimmed && !trimmed.match(/^(---|```)/)) {
-      tokens.push({
-        type: 'paragraph',
-        text: line
-      });
-      i++;
-      continue;
-    }
+        // Lists
+        const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
+        if (listMatch) {
+            tokens.push({
+                type: 'list',
+                indent: listMatch[1].length,
+                range: [lineStart, pos],
+                text: listMatch[3].trim(),
+                attrs: listMatch[4]
+            });
+            continue;
+        }
-    // Blank line
-    if (!trimmed) {
-      tokens.push({ type: 'blank' });
-    }
+        // Blockquotes
+        const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
+        if (blockquoteMatch) {
+            tokens.push({
+                type: 'blockquote',
+                range: [lineStart, pos],
+                text: blockquoteMatch[1].trim(),
+                attrs: blockquoteMatch[2]
+            });
+            continue;
+        }
-    i++;
-  }
+        // Paragraphs
+        if (line.trim()) {
+            const paraMatch = line.match(/^(.+?)(?:\s*(\{[^}]+\}))?$/);
+            if (paraMatch) {
+                tokens.push({
+                    type: 'para',
+                    range: [lineStart, pos],
+                    text: paraMatch[1].trim(),
+                    attrs: paraMatch[2] || null
+                });
+            }
+        }
+    }
-  return tokens;
+    return tokens;
 }
-// ============================================================================
-// Attribute Parser {#id .class key="value"}
-// ============================================================================
+// Inline value extraction - simplified
+function extractInlineValue(text, baseOffset = 0) {
+    const spans = [];
+    let pos = 0;
-function parseAttributes(attrString) {
-  const attrs = {};
-  const cleaned = attrString.replace(/^\{|\}$/g, '').trim();
-  // ID: #something
-  const idMatch = cleaned.match(/#([^\s.]+)/);
-  if (idMatch) attrs.id = idMatch[1];
-  // Classes: .class1 .class2
-  const classMatches = cleaned.match(/\.([^\s.#]+)/g);
-  if (classMatches) {
-    attrs.class = classMatches.map(c => c.substring(1)).join(' ');
-  }
+    while (pos < text.length) {
+        const bracketStart = text.indexOf('[', pos);
+        if (bracketStart === -1) {
+            if (pos < text.length) spans.push({ type: 'text', text: text.substring(pos) });
+            break;
+        }
-  // Key-value pairs: key="value" or key='value'
-  const kvRegex = /(\w+)=["']([^"']*)["']/g;
-  let match;
-  while ((match = kvRegex.exec(cleaned)) !== null) {
-    attrs[match[1]] = match[2];
-  }
+        if (bracketStart > pos) spans.push({ type: 'text', text: text.substring(pos, bracketStart) });
-  return attrs;
-}
+        const bracketEnd = text.indexOf(']', bracketStart);
+        if (bracketEnd === -1) {
+            spans.push({ type: 'text', text: text.substring(bracketStart) });
+            break;
+        }
-// ============================================================================
-// Inline Parser (for [text](url){attrs} and [text]{attrs})
-// ============================================================================
-function parseInline(text) {
-  const spans = [];
-  let pos = 0;
-  // Pattern: [text](url){attrs} or [text]{attrs}
-  const inlineRegex = /\[([^\]]+)\](?:\(([^)]+)\))?(?:\{([^}]+)\})?/g;
-  let match;
-  let lastIndex = 0;
-  while ((match = inlineRegex.exec(text)) !== null) {
-    // Text before match
-    if (match.index > lastIndex) {
-      spans.push({
-        type: 'text',
-        value: text.substring(lastIndex, match.index)
-      });
-    }
+        const spanText = text.substring(bracketStart + 1, bracketEnd);
+        let spanEnd = bracketEnd + 1;
+        let url = null;
+        let attrs = null;
+        // Parse link destination
+        if (text[spanEnd] === '(') {
+            const parenEnd = text.indexOf(')', spanEnd);
+            if (parenEnd !== -1) {
+                url = text.substring(spanEnd + 1, parenEnd);
+                spanEnd = parenEnd + 1;
+            }
+        }
-    const [fullMatch, linkText, url, attrs] = match;
-    spans.push({
-      type: url ? 'link' : 'span',
-      text: linkText,
-      url: url || null,
-      attrs: attrs ? parseAttributes(`{${attrs}}`) : {}
-    });
+        // Parse attributes
+        const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
+        if (attrsMatch) {
+            attrs = `{${attrsMatch[1]}}`;
+            spanEnd += attrsMatch[0].length;
+        }
-    lastIndex = match.index + fullMatch.length;
-  }
+        spans.push({
+            type: url ? 'link' : 'span',
+            text: spanText,
+            url: url,
+            attrs: attrs,
+            range: [baseOffset + bracketStart, baseOffset + spanEnd]
+        });
-  // Remaining text
-  if (lastIndex < text.length) {
-    spans.push({
-      type: 'text',
-      value: text.substring(lastIndex)
-    });
-  }
+        pos = spanEnd;
+    }
-  return spans.length > 0 ? spans : [{ type: 'text', value: text }];
+    return spans.length ? spans : [{ type: 'text', text: text }];
 }
-// ============================================================================
-// MD-LD Parser
-// ============================================================================
-export class MDLDParser {
-  constructor(options = {}) {
-    this.options = {
-      baseIRI: options.baseIRI || '',
-      defaultVocab: options.defaultVocab || 'http://schema.org/',
-      dataFactory: options.dataFactory || DefaultDataFactory,
-      ...options
-    };
+// Core processing functions - consolidated
+function createBlock(subject, entries, range, ctx) {
+    const expanded = entries.map(e => ({
+        ...e,
+        predicate: e.predicate ? expandIRI(e.predicate, ctx) : null,
+        classIRI: e.classIRI ? expandIRI(e.classIRI, ctx) : null
+    }));
-    this.df = this.options.dataFactory;
-    this.quads = [];
-    this.context = null;
-    this.rootSubject = null;
-    this.currentSubject = null;
-    this.blankNodeCounter = 0;
-    this.subjectStack = [];
-    this.blankNodeMap = new Map();
-  }
-  hashBlankNode(input) {
-    if (this.blankNodeMap.has(input)) {
-      return this.blankNodeMap.get(input);
-    }
-    let hash = 5381;
-    for (let i = 0; i < input.length; i++) {
-      hash = ((hash << 5) + hash) + input.charCodeAt(i);
-    }
-    const bnId = `b${Math.abs(hash).toString(16).slice(0, 12)}`;
-    this.blankNodeMap.set(input, bnId);
-    return bnId;
-  }
+    const blockId = hash([subject, ...expanded.map(e => JSON.stringify(e))].join('|'));
+    return {
+        id: blockId,
+        range: { start: range[0], end: range[1] },
+        subject,
+        entries: expanded,
+        context: { ...ctx }
+    };
+}
-  parse(markdown) {
-    this.quads = [];
+function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory) {
+    if (!subject || !predicate || !object) return;
+    const quad = dataFactory.quad(subject, predicate, object);
+    quads.push(quad);
+    quadIndex.set(JSON.stringify([quad.subject.value, quad.predicate.value, quad.object.value]), blockId);
+}
-    // Extract frontmatter
-    const { frontmatter, body } = this.extractFrontmatter(markdown);
+function createLiteralValue(value, datatype, language, context, dataFactory) {
+    if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
+    if (language) return dataFactory.literal(value, language);
+    return dataFactory.literal(value);
+}
-    // Parse YAML-LD frontmatter
-    if (frontmatter) {
-      try {
-        this.context = parseYAMLLD(frontmatter);
+function processAnnotation(token, state, textContent = null) {
+    if (!token.attrs) return;
-        // Check for @base in @context (JSON-LD standard)
-        if (this.context['@context']?.['@base']) {
-          this.options.baseIRI = this.context['@context']['@base'];
-        }
+    const ann = parseAnnotation(token.attrs);
+    const originalSubject = state.currentSubject;
-        this.rootSubject = this.resolveRootSubject(this.context);
-        // Emit root subject type if present
-        if (this.context['@type']) {
-          const types = Array.isArray(this.context['@type'])
-            ? this.context['@type']
-            : [this.context['@type']];
-          types.forEach(type => {
-            const typeNode = this.resolveResource(type);
-            if (typeNode) {
-              this.emitQuad(
-                this.rootSubject,
-                this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-                typeNode
-              );
-            }
-          });
-        }
-      } catch (e) {
-        console.error('YAML-LD parse error:', e);
-        this.context = {
-          '@context': { '@vocab': this.options.defaultVocab }
-        };
-        this.rootSubject = this.df.namedNode(this.options.baseIRI || '');
-      }
-    } else {
-      // No frontmatter - use base IRI as root
-      this.context = {
-        '@context': { '@vocab': this.options.defaultVocab }
-      };
-      this.rootSubject = this.df.namedNode(this.options.baseIRI || '');
+    // Handle subject declaration
+    if (ann.subject === 'RESET') {
+        state.currentSubject = null;
+        return;
+    }
+    if (ann.subject) {
+        state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
     }
-    this.currentSubject = this.rootSubject;
-    // Tokenize markdown
-    const tokens = tokenizeMarkdown(body);
+    if (!originalSubject && !ann.subject) return;
-    // Process tokens
-    this.processTokens(tokens);
+    const targetSubject = ann.subject ?
+        state.df.namedNode(expandIRI(ann.subject, state.ctx)) :
+        originalSubject;
-    return this.quads;
-  }
+    const block = createBlock(targetSubject.value, ann.entries, token.range, state.ctx);
+    state.origin.blocks.set(block.id, block);
-  extractFrontmatter(markdown) {
-    const match = markdown.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
-    if (match) {
-      return { frontmatter: match[1], body: match[2] };
-    }
-    return { frontmatter: null, body: markdown };
-  }
-  resolveRootSubject(context) {
-    if (context['@id']) {
-      const id = context['@id'];
-      if (id.startsWith('#')) {
-        const fullIRI = (this.options.baseIRI || '') + id;
-        return this.df.namedNode(fullIRI);
-      }
-      if (id.startsWith('_:')) {
-        return this.df.blankNode(id.substring(2));
-      }
-      if (id.includes(':')) {
-        return this.df.namedNode(id);
-      }
-      return this.df.namedNode(this.options.baseIRI + id);
+    // Handle list context types
+    if (token.type === 'list' && state.listContext?.types.length > 0) {
+        state.listContext.types.forEach(typeIRI => {
+            emitQuad(state.quads, state.origin.quadIndex, block.id,
+                targetSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
+                state.df.namedNode(typeIRI), state.df);
+        });
     }
-    return this.df.namedNode(this.options.baseIRI || '');
-  }
-  getRootFragment() {
-    const rootValue = this.rootSubject.value;
-    const hashIndex = rootValue.lastIndexOf('#');
-    return hashIndex >= 0 ? rootValue.substring(hashIndex + 1) : '';
-  }
+    // Emit triples
+    ann.entries.forEach(e => {
+        if (e.kind === 'type') {
+            const typeSubject = token.url ?
+                state.df.namedNode(expandIRI(token.url, state.ctx)) : targetSubject;
+            emitQuad(state.quads, state.origin.quadIndex, block.id,
+                typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
+                state.df.namedNode(expandIRI(e.classIRI, state.ctx)), state.df);
+        } else if (e.kind === 'property' && e.predicate) {
+            const predicate = state.df.namedNode(expandIRI(e.predicate, state.ctx));
+            let object;
-  processTokens(tokens) {
-    let firstParagraph = true;
-    let titleEmitted = false;
+            if (token.url) {
+                object = state.df.namedNode(expandIRI(token.url, state.ctx));
+            } else if (ann.subject && !token.url) {
+                if (e.direction === 'reverse') {
+                    object = targetSubject;
+                } else {
+                    object = token.type === 'code' ?
+                        createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df) :
+                        targetSubject;
+                }
+            } else {
+                object = createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df);
+            }
-    for (let i = 0; i < tokens.length; i++) {
-      const token = tokens[i];
-      if (token.type === 'heading') {
-        // First h1 becomes label (but don't emit if heading has #id attribute)
-        if (token.depth === 1 && !titleEmitted && !token.attrs.id) {
-          this.emitQuad(
-            this.rootSubject,
-            this.df.namedNode('http://www.w3.org/2000/01/rdf-schema#label'),
-            this.df.literal(token.text)
-          );
-          titleEmitted = true;
-        }
+            const subject = e.direction === 'reverse' ? object :
+                (ann.subject && !token.url && token.type !== 'code') ? originalSubject : targetSubject;
+            const objectRef = e.direction === 'reverse' ? originalSubject : object;
-        // Heading with #id becomes new subject
-        if (token.attrs.id) {
-          const rootFragment = this.getRootFragment();
-          let newSubject;
-          if (token.attrs.id === rootFragment) {
-            // Same as root document subject
-            newSubject = this.rootSubject;
-          } else {
-            // Fragment relative to root
-            const baseForFragment = this.rootSubject.value.split('#')[0];
-            newSubject = this.df.namedNode(baseForFragment + '#' + token.attrs.id);
-          }
-          // Type assertion
-          if (token.attrs.typeof) {
-            const types = token.attrs.typeof.trim().split(/\s+/).filter(Boolean);
-            types.forEach(type => {
-              const typeNode = this.resolveResource(type);
-              if (typeNode) {
-                this.emitQuad(
-                  newSubject,
-                  this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-                  typeNode
-                );
-              }
-            });
-          }
-          // Heading text becomes an rdfs:label of the subject
-          this.emitQuad(
-            newSubject,
-            this.df.namedNode('http://www.w3.org/2000/01/rdf-schema#label'),
-            this.df.literal(token.text.trim())
-          );
-          // Set as current subject
-          this.currentSubject = newSubject;
-          this.subjectStack.push(newSubject);
-        } else if (!titleEmitted) {
-          // Heading without id keeps parent context
-          // but h1 without attributes still sets root as current
-          if (token.depth === 1) {
-            this.currentSubject = this.rootSubject;
-          }
+            emitQuad(state.quads, state.origin.quadIndex, block.id, subject, predicate, objectRef, state.df);
         }
+    });
+}
-        continue;
-      }
-      if (token.type === 'code') {
-        // Code blocks become SoftwareSourceCode-like resources
-        let snippetSubject;
-        if (token.attrs && token.attrs.id) {
-          const rootFragment = this.getRootFragment();
-          if (token.attrs.id === rootFragment) {
-            snippetSubject = this.rootSubject;
-          } else {
-            const baseForFragment = this.rootSubject.value.split('#')[0];
-            snippetSubject = this.df.namedNode(baseForFragment + '#' + token.attrs.id);
-          }
-        } else {
-          snippetSubject = this.df.blankNode(
-            this.hashBlankNode(`code:${token.lang || ''}:${token.text}`)
-          );
-        }
+// List processing - simplified
+function setupListContext(token, state, nextToken) {
+    if (!token.attrs || nextToken?.type !== 'list') return false;
-        // Type assertion: typeof override or default SoftwareSourceCode
-        if (token.attrs && token.attrs.typeof) {
-          const types = token.attrs.typeof.trim().split(/\s+/).filter(Boolean);
-          types.forEach(type => {
-            const typeNode = this.resolveResource(type);
-            if (typeNode) {
-              this.emitQuad(
-                snippetSubject,
-                this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-                typeNode
-              );
-            }
-          });
-        } else {
-          const defaultType = this.resolveResource('SoftwareSourceCode');
-          if (defaultType) {
-            this.emitQuad(
-              snippetSubject,
-              this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-              defaultType
-            );
-          }
-        }
+    const ann = parseAnnotation(token.attrs);
+    state.listContext = { predicate: null, types: [], reverse: false };
-        // Programming language from fenced code info string
-        if (token.lang) {
-          const langPred = this.resolveResource('programmingLanguage');
-          if (langPred) {
-            this.emitQuad(
-              snippetSubject,
-              langPred,
-              this.df.literal(token.lang)
-            );
-          }
+    ann.entries.forEach(e => {
+        if (e.kind === 'property') {
+            state.listContext.predicate = expandIRI(e.predicate, state.ctx);
+            state.listContext.reverse = e.direction === 'reverse';
         }
-        // Raw source text
-        const textPred = this.resolveResource('text');
-        if (textPred && token.text) {
-          this.emitQuad(
-            snippetSubject,
-            textPred,
-            this.df.literal(token.text)
-          );
+        if (e.kind === 'type') {
+            state.listContext.types.push(expandIRI(e.classIRI, state.ctx));
         }
+    });
+    return true;
+}
-        // Link from current subject to code snippet
-        const hasPartPred = this.resolveResource('hasPart');
-        if (hasPartPred) {
-          this.emitQuad(
-            this.currentSubject,
-            hasPartPred,
-            snippetSubject
-          );
-        }
+function processListItem(token, state) {
+    const ann = parseAnnotation(token.attrs);
+    const originalSubject = state.currentSubject;
-        continue;
-      }
-      if (token.type === 'paragraph') {
-        // First paragraph after title becomes description
-        if (firstParagraph && titleEmitted) {
-          const text = token.text.trim();
-          if (text && !text.match(/\[.*\]/)) { // Simple text, no links
-            this.emitQuad(
-              this.rootSubject,
-              this.df.namedNode('http://purl.org/dc/terms/description'),
-              this.df.literal(text)
-            );
-          }
-          firstParagraph = false;
-        }
+    if (ann.subject) {
+        state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
+    }
-        // Process inline annotations
-        this.processInline(token.text);
-        continue;
-      }
-      if (token.type === 'listItem') {
-        this.processInline(token.text);
-        continue;
-      }
-      if (token.type === 'taskItem') {
-        // Task items create Action instances
-        let action;
-        if (token.attrs.id) {
-          const rootFragment = this.getRootFragment();
-          if (token.attrs.id === rootFragment) {
-            action = this.rootSubject;
-          } else {
-            const baseForFragment = this.rootSubject.value.split('#')[0];
-            action = this.df.namedNode(baseForFragment + '#' + token.attrs.id);
-          }
-        } else {
-          action = this.df.blankNode(this.hashBlankNode(`task:${token.text}`));
+    // Process item properties
+    ann.entries.forEach(e => {
+        if (e.kind === 'type') {
+            emitQuad(state.quads, state.origin.quadIndex, 'list-item',
+                state.currentSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
+                state.df.namedNode(expandIRI(e.classIRI, state.ctx)), state.df);
+        } else if (e.kind === 'property' && e.predicate) {
+            const predicate = state.df.namedNode(expandIRI(e.predicate, state.ctx));
+            const object = createLiteralValue(token.text, ann.datatype, ann.language, state.ctx, state.df);
+            emitQuad(state.quads, state.origin.quadIndex, 'list-item',
+                state.currentSubject, predicate, object, state.df);
         }
+    });
-        // Type declaration (always Action, or overridden by typeof)
-        let actionType = 'http://schema.org/Action';
-        if (token.attrs.typeof) {
-          const types = token.attrs.typeof.trim().split(/\s+/).filter(Boolean);
-          types.forEach(type => {
-            const typeNode = this.resolveResource(type);
-            if (typeNode) {
-              this.emitQuad(
-                action,
-                this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-                typeNode
-              );
-            }
-          });
+    // Process list context relationship
+    if (state.listContext?.predicate && originalSubject) {
+        const predicate = state.df.namedNode(expandIRI(state.listContext.predicate, state.ctx));
+        if (state.listContext.reverse) {
+            emitQuad(state.quads, state.origin.quadIndex, 'list-context',
+                state.currentSubject, predicate, originalSubject, state.df);
         } else {
-          this.emitQuad(
-            action,
-            this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-            this.df.namedNode(actionType)
-          );
+            emitQuad(state.quads, state.origin.quadIndex, 'list-context',
+                originalSubject, predicate, state.currentSubject, state.df);
         }
+    }
-        this.emitQuad(
-          action,
-          this.df.namedNode('http://schema.org/name'),
-          this.df.literal(token.text)
-        );
-        const status = token.checked
-          ? 'http://schema.org/CompletedActionStatus'
-          : 'http://schema.org/PotentialActionStatus';
-        this.emitQuad(
-          action,
-          this.df.namedNode('http://schema.org/actionStatus'),
-          this.df.namedNode(status)
-        );
-        // Link to current subject
-        this.emitQuad(
-          this.currentSubject,
-          this.df.namedNode('http://schema.org/potentialAction'),
-          action
-        );
-        continue;
-      }
+    // Apply list context types
+    if (state.listContext?.types.length > 0 && ann.subject) {
+        state.listContext.types.forEach(type => {
+            emitQuad(state.quads, state.origin.quadIndex, 'list-item',
+                state.currentSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
+                state.df.namedNode(expandIRI(type, state.ctx)), state.df);
+        });
     }
-  }
-  processInline(text) {
-    const spans = parseInline(text);
-    for (const span of spans) {
-      if (span.type === 'text') {
-        continue;
-      }
-      if (span.type === 'link' || span.type === 'span') {
-        const attrs = span.attrs;
-        // Subject declaration
-        let subject = this.currentSubject;
-        if (attrs.id) {
-          const rootFragment = this.getRootFragment();
-          if (attrs.id === rootFragment) {
-            // Same as root document subject
-            subject = this.rootSubject;
-          } else {
-            // Fragment relative to root
-            const baseForFragment = this.rootSubject.value.split('#')[0];
-            subject = this.df.namedNode(baseForFragment + '#' + attrs.id);
-          }
-          // Type assertion
-          if (attrs.typeof) {
-            const types = attrs.typeof.trim().split(/\s+/).filter(Boolean);
-            types.forEach(type => {
-              const typeNode = this.resolveResource(type);
-              if (typeNode) {
-                this.emitQuad(
-                  subject,
-                  this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-                  typeNode
-                );
-              }
-            });
-          }
-        }
-        // Property (literal)
-        if (attrs.property) {
-          const properties = attrs.property.trim().split(/\s+/).filter(Boolean);
-          properties.forEach(prop => {
-            const predicate = this.resolveResource(prop);
-            if (!predicate) return;
+    state.currentSubject = originalSubject;
+}
-            let object;
-            if (attrs.datatype) {
-              const datatypeIRI = this.resolveResource(attrs.datatype);
-              if (datatypeIRI && datatypeIRI.value) {
-                object = this.df.literal(span.text, datatypeIRI);
-              } else {
-                object = this.df.literal(span.text);
-              }
-            } else {
-              object = this.df.literal(span.text);
-            }
+// Main parsing function
+export function parse(text, options = {}) {
+    const state = {
+        ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
+        df: options.dataFactory || DataFactory,
+        quads: [],
+        origin: { blocks: new Map(), quadIndex: new Map() },
+        currentSubject: null,
+        listContext: null
+    };
-            this.emitQuad(subject, predicate, object);
-          });
-        }
+    const tokens = scanTokens(text);
+    tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
-        // Relationship (object property)
-        if (attrs.rel && span.url) {
-          const rels = attrs.rel.trim().split(/\s+/).filter(Boolean);
-          let objectNode;
-          if (span.url.startsWith('#')) {
-            const baseForFragment = this.rootSubject.value.split('#')[0];
-            objectNode = this.df.namedNode(baseForFragment + span.url);
-          } else {
-            objectNode = this.df.namedNode(span.url);
-          }
-          rels.forEach(rel => {
-            const predicate = this.resolveResource(rel);
-            if (predicate) {
-              this.emitQuad(subject, predicate, objectNode);
-            }
-          });
+    for (let i = 0; i < tokens.length; i++) {
+        const token = tokens[i];
+        const nextToken = tokens[i + 1];
+        switch (token.type) {
+            case 'heading':
+                processAnnotation(token, state, token.text);
+                break;
+            case 'code':
+                processAnnotation(token, state, token.text);
+                break;
+            case 'para':
+                if (setupListContext(token, state, nextToken)) break;
+                // Regular paragraphs are NOT value carriers per spec
+                // Only process spans and links within paragraphs
+                if (state.currentSubject) {
+                    const spans = extractInlineValue(token.text, token.range[0]);
+                    // Process annotated spans (value carriers)
+                    spans.filter(s => s.type === 'span' && s.attrs)
+                        .forEach(span => processAnnotation(span, state, span.text));
+                    // Process spans where paragraph has annotation
+                    if (token.attrs) {
+                        spans.filter(s => s.type === 'span')
+                            .forEach(span => {
+                                // Attach paragraph's annotation to the span
+                                const spanWithAttrs = { ...span, attrs: token.attrs };
+                                processAnnotation(spanWithAttrs, state, span.text);
+                            });
+                    }
+                    // Process links (value carriers)
+                    spans.filter(s => s.type === 'link')
+                        .forEach(link => processAnnotation(link, state, link.text));
+                }
+                break;
+            case 'list':
+                if (state.listContext) processListItem(token, state);
+                break;
+            case 'blockquote':
+                if (state.currentSubject) processAnnotation(token, state, token.text);
+                break;
         }
+    }
-        // typeof without id creates typed blank node
-        if (attrs.typeof && !attrs.id && attrs.rel) {
-          const blankSubject = this.df.blankNode(this.hashBlankNode(`span:${span.text}:${JSON.stringify(attrs)}}`));
-          const types = attrs.typeof.trim().split(/\s+/).filter(Boolean);
-          types.forEach(type => {
-            const typeNode = this.resolveResource(type);
-            if (typeNode) {
-              this.emitQuad(
-                blankSubject,
-                this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-                typeNode
-              );
-            }
-          });
-          // Link from current subject
-          if (attrs.rel) {
-            const rels = attrs.rel.trim().split(/\s+/).filter(Boolean);
-            rels.forEach(rel => {
-              const predicate = this.resolveResource(rel);
-              if (predicate) {
-                this.emitQuad(subject, predicate, blankSubject);
-              }
-            });
-          }
+    return { quads: state.quads, origin: state.origin, context: state.ctx };
+}
+function shortenIRI(iri, ctx) {
+    if (!iri || !iri.startsWith('http')) return iri;
+    // Check @vocab first
+    if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) {
+        return iri.substring(ctx['@vocab'].length);
+    }
+    // Check prefixes
+    for (const [prefix, namespace] of Object.entries(ctx)) {
+        if (prefix !== '@vocab' && iri.startsWith(namespace)) {
+            return prefix + ':' + iri.substring(namespace.length);
         }
-      }
     }
-  }
-  resolveResource(term) {
-    if (!term || typeof term !== 'string') return null;
+    // No prefix found, return full IRI
+    return iri;
+}
+export function serialize({ text, diff, origin, options = {} }) {
+    if (!diff || (!diff.add?.length && !diff.delete?.length)) return { text, origin };
-    const trimmed = term.trim();
-    if (!trimmed) return null;
+    let result = text;
+    const edits = [];
+    const ctx = options.context || {};
-    // Absolute IRI
-    if (trimmed.match(/^https?:/)) {
-      return this.df.namedNode(trimmed);
-    }
+    if (diff.delete) {
+        diff.delete.forEach(quad => {
+            const key = JSON.stringify([quad.subject.value, quad.predicate.value, quad.object.value]);
+            const blockId = origin?.quadIndex.get(key);
+            if (!blockId) return;
-    // CURIE
-    if (trimmed.includes(':')) {
-      const [prefix, reference] = trimmed.split(':', 2);
-      const contextObj = this.context?.['@context'] || {};
+            const block = origin.blocks.get(blockId);
+            if (!block) return;
-      if (contextObj[prefix]) {
-        return this.df.namedNode(contextObj[prefix] + reference);
-      }
+            const start = block.range.start;
+            const end = block.range.end;
+            const before = text.substring(Math.max(0, start - 1), start);
+            const after = text.substring(end, end + 1);
+            const deleteStart = before === '\n' ? start - 1 : start;
+            const deleteEnd = after === '\n' ? end + 1 : end;
-      // Default XSD namespace
-      if (prefix === 'xsd') {
-        return this.df.namedNode('http://www.w3.org/2001/XMLSchema#' + reference);
-      }
+            edits.push({ start: deleteStart, end: deleteEnd, text: '' });
+        });
     }
-    // Default vocab
-    const vocab = this.context?.['@context']?.['@vocab'] || this.options.defaultVocab;
-    return this.df.namedNode(vocab + trimmed);
-  }
+    if (diff.add) {
+        diff.add.forEach(quad => {
+            let insertPos = result.length;
-  emitQuad(subject, predicate, object) {
-    if (!subject || !predicate || !object) return;
+            for (const [, block] of origin?.blocks || []) {
+                if (block.subject === quad.subject.value) {
+                    insertPos = block.range.end;
+                    break;
+                }
+            }
-    const quad = this.df.quad(subject, predicate, object);
-    this.quads.push(quad);
-  }
+            const pred = shortenIRI(quad.predicate.value, ctx);
+            let objText;
-  getQuads() {
-    return this.quads;
-  }
-}
+            if (quad.object.termType === 'Literal') {
+                objText = quad.object.value;
+            } else {
+                objText = shortenIRI(quad.object.value, ctx);
+            }
-// ============================================================================
-// Convenience API
-// ============================================================================
+            const newLine = `\n[${objText}] {${pred}}`;
+            edits.push({ start: insertPos, end: insertPos, text: newLine });
+        });
+    }
+    edits.sort((a, b) => b.start - a.start);
+    edits.forEach(edit => {
+        result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
+    });
-export function parseMDLD(markdown, options = {}) {
-  const parser = new MDLDParser(options);
-  return parser.parse(markdown);
+    return { text: result, origin };
 }
-export default { MDLDParser, parseMDLD, DefaultDataFactory };
+export default { parse, serialize, parseAnnotation };