mdld-parse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +374 -0
  2. package/index.js +882 -0
  3. package/package.json +39 -0
  4. package/tests.js +409 -0
package/index.js ADDED
@@ -0,0 +1,882 @@
1
+ /**
2
+ * MD-LD Parser — Markdown-Linked Data to RDF Quads
3
+ *
4
+ * Zero-dependency, streaming-capable parser for MD-LD documents.
5
+ * Outputs RDF/JS compatible quads.
6
+ */
7
+
8
+ // ============================================================================
9
+ // RDF/JS Data Factory (Minimal Implementation)
10
+ // ============================================================================
11
+
12
+ const DefaultDataFactory = {
13
+ namedNode: (value) => ({ termType: 'NamedNode', value }),
14
+ blankNode: (value = `b${Math.random().toString(36).slice(2, 11)}`) => ({
15
+ termType: 'BlankNode',
16
+ value
17
+ }),
18
+ literal: (value, languageOrDatatype) => {
19
+ if (typeof languageOrDatatype === 'string') {
20
+ return {
21
+ termType: 'Literal',
22
+ value,
23
+ language: languageOrDatatype,
24
+ datatype: { termType: 'NamedNode', value: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString' }
25
+ };
26
+ }
27
+ return {
28
+ termType: 'Literal',
29
+ value,
30
+ language: '',
31
+ datatype: languageOrDatatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' }
32
+ };
33
+ },
34
+ quad: (subject, predicate, object, graph) => ({
35
+ subject,
36
+ predicate,
37
+ object,
38
+ graph: graph || DefaultDataFactory.defaultGraph()
39
+ }),
40
+ defaultGraph: () => ({ termType: 'DefaultGraph', value: '' })
41
+ };
42
+
43
+ // ============================================================================
44
+ // YAML-LD Parser (Minimal YAML subset for frontmatter)
45
+ // ============================================================================
46
+
47
+ function parseYAMLLD(yamlText) {
48
+ try {
49
+ const lines = yamlText.trim().split('\n');
50
+ const obj = {};
51
+ let currentKey = null;
52
+ let indent = 0;
53
+ let inArray = false;
54
+ let currentArray = null;
55
+
56
+ for (let line of lines) {
57
+ const trimmed = line.trim();
58
+ if (!trimmed || trimmed.startsWith('#')) continue;
59
+
60
+ const leadingSpaces = line.match(/^\s*/)[0].length;
61
+
62
+ // Array item
63
+ if (trimmed.startsWith('- ')) {
64
+ if (!inArray) {
65
+ currentArray = [];
66
+ inArray = true;
67
+ }
68
+ const value = trimmed.substring(2).trim();
69
+ currentArray.push(parseYAMLValue(value));
70
+ continue;
71
+ }
72
+
73
+ // Key-value pair
74
+ const colonIndex = trimmed.indexOf(':');
75
+ if (colonIndex > 0) {
76
+ const key = trimmed.substring(0, colonIndex).trim().replace(/^['"]|['"]$/g, '');
77
+ let value = trimmed.substring(colonIndex + 1).trim();
78
+
79
+ // Save previous array
80
+ if (inArray && currentKey && currentArray) {
81
+ obj[currentKey] = currentArray;
82
+ inArray = false;
83
+ currentArray = null;
84
+ }
85
+
86
+ currentKey = key;
87
+
88
+ if (!value) {
89
+ // Empty value or nested object/array coming
90
+ indent = leadingSpaces;
91
+ continue;
92
+ }
93
+
94
+ obj[key] = parseYAMLValue(value);
95
+ }
96
+ }
97
+
98
+ // Save last array
99
+ if (inArray && currentKey && currentArray) {
100
+ obj[currentKey] = currentArray;
101
+ }
102
+
103
+ return obj;
104
+ } catch (e) {
105
+ console.warn('YAML-LD parse error:', e);
106
+ return {};
107
+ }
108
+ }
109
+
110
+ function parseYAMLValue(value) {
111
+ value = value.replace(/^['"]|['"]$/g, '');
112
+
113
+ if (value === 'true') return true;
114
+ if (value === 'false') return false;
115
+ if (value === 'null') return null;
116
+ if (/^-?\d+$/.test(value)) return parseInt(value, 10);
117
+ if (/^-?\d+\.\d+$/.test(value)) return parseFloat(value);
118
+
119
+ return value;
120
+ }
121
+
122
+ // ============================================================================
123
+ // Markdown Tokenizer (Minimal - focuses on structure)
124
+ // ============================================================================
125
+
126
+ function tokenizeMarkdown(text) {
127
+ const tokens = [];
128
+ const lines = text.split('\n');
129
+ let i = 0;
130
+ let inCodeBlock = false;
131
+ let codeFence = null;
132
+ let codeLang = null;
133
+ let codeAttrs = {};
134
+ let codeLines = [];
135
+
136
+ while (i < lines.length) {
137
+ const line = lines[i];
138
+ const trimmed = line.trim();
139
+
140
+ // Fenced code block ```lang {attrs}
141
+ const fenceMatch = line.match(/^(```+)(.*)$/);
142
+ if (fenceMatch) {
143
+ const [, fence, rest] = fenceMatch;
144
+
145
+ if (!inCodeBlock) {
146
+ // Start of code block
147
+ inCodeBlock = true;
148
+ codeFence = fence;
149
+ codeLines = [];
150
+ codeLang = null;
151
+ codeAttrs = {};
152
+
153
+ const restTrimmed = rest.trim();
154
+ if (restTrimmed) {
155
+ // Extract language (first token that is not an attribute block)
156
+ const attrIndex = restTrimmed.indexOf('{');
157
+ const langPart = attrIndex >= 0 ? restTrimmed.substring(0, attrIndex).trim() : restTrimmed;
158
+ if (langPart) {
159
+ codeLang = langPart.split(/\s+/)[0];
160
+ }
161
+
162
+ // Attributes after language: ```lang {#id typeof="..."}
163
+ const attrMatch = restTrimmed.match(/\{[^}]+\}/);
164
+ if (attrMatch) {
165
+ codeAttrs = parseAttributes(attrMatch[0]);
166
+ }
167
+ }
168
+
169
+ i++;
170
+ continue;
171
+ }
172
+
173
+ // Closing fence (must match opening fence length)
174
+ if (inCodeBlock && fence === codeFence) {
175
+ tokens.push({
176
+ type: 'code',
177
+ lang: codeLang,
178
+ text: codeLines.join('\n'),
179
+ attrs: codeAttrs
180
+ });
181
+
182
+ inCodeBlock = false;
183
+ codeFence = null;
184
+ codeLang = null;
185
+ codeAttrs = {};
186
+ codeLines = [];
187
+
188
+ i++;
189
+ continue;
190
+ }
191
+ }
192
+
193
+ if (inCodeBlock) {
194
+ codeLines.push(line);
195
+ i++;
196
+ continue;
197
+ }
198
+
199
+ // Heading with potential attributes on next line
200
+ const headingMatch = line.match(/^(#{1,6})\s+(.+?)(\s*\{[^}]+\})?$/);
201
+ if (headingMatch) {
202
+ const [, hashes, text, attrs] = headingMatch;
203
+ let attributes = attrs ? parseAttributes(attrs) : {};
204
+
205
+ // Check next line for attributes
206
+ if (!attrs && i + 1 < lines.length) {
207
+ const nextLine = lines[i + 1].trim();
208
+ if (nextLine.match(/^\{[^}]+\}$/)) {
209
+ attributes = parseAttributes(nextLine);
210
+ i++; // Skip the attribute line
211
+ }
212
+ }
213
+
214
+ tokens.push({
215
+ type: 'heading',
216
+ depth: hashes.length,
217
+ text: text.trim(),
218
+ attrs: attributes
219
+ });
220
+ i++;
221
+ continue;
222
+ }
223
+
224
+ // Task list item
225
+ const taskMatch = line.match(/^(\s*)([-*+])\s+\[([ xX])\]\s+(.+?)(\s*\{[^}]+\})?$/);
226
+ if (taskMatch) {
227
+ const [, indent, marker, checked, text, attrs] = taskMatch;
228
+ tokens.push({
229
+ type: 'taskItem',
230
+ indent: indent.length,
231
+ checked: checked.toLowerCase() === 'x',
232
+ text: text.trim(),
233
+ attrs: attrs ? parseAttributes(attrs) : {}
234
+ });
235
+ i++;
236
+ continue;
237
+ }
238
+
239
+ // Regular list item (must come after task item check)
240
+ const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(\s*\{[^}]+\})?$/);
241
+ if (listMatch) {
242
+ const [, indent, marker, text, attrs] = listMatch;
243
+
244
+ // If the list item has trailing attribute syntax (e.g. - [Link](#id){rel="hasPart"})
245
+ // treat those attributes as part of the inline content so that parseInline
246
+ // can correctly interpret them on the link/span itself.
247
+ const combinedText = attrs ? `${text}${attrs.trim()}` : text;
248
+
249
+ tokens.push({
250
+ type: 'listItem',
251
+ indent: indent.length,
252
+ text: combinedText.trim(),
253
+ attrs: attrs ? parseAttributes(attrs) : {}
254
+ });
255
+ i++;
256
+ continue;
257
+ }
258
+
259
+ // Paragraph
260
+ if (trimmed && !trimmed.match(/^(---|```)/)) {
261
+ tokens.push({
262
+ type: 'paragraph',
263
+ text: line
264
+ });
265
+ i++;
266
+ continue;
267
+ }
268
+
269
+ // Blank line
270
+ if (!trimmed) {
271
+ tokens.push({ type: 'blank' });
272
+ }
273
+
274
+ i++;
275
+ }
276
+
277
+ return tokens;
278
+ }
279
+
280
+ // ============================================================================
281
+ // Attribute Parser {#id .class key="value"}
282
+ // ============================================================================
283
+
284
+ function parseAttributes(attrString) {
285
+ const attrs = {};
286
+ const cleaned = attrString.replace(/^\{|\}$/g, '').trim();
287
+
288
+ // ID: #something
289
+ const idMatch = cleaned.match(/#([^\s.]+)/);
290
+ if (idMatch) attrs.id = idMatch[1];
291
+
292
+ // Classes: .class1 .class2
293
+ const classMatches = cleaned.match(/\.([^\s.#]+)/g);
294
+ if (classMatches) {
295
+ attrs.class = classMatches.map(c => c.substring(1)).join(' ');
296
+ }
297
+
298
+ // Key-value pairs: key="value" or key='value'
299
+ const kvRegex = /(\w+)=["']([^"']*)["']/g;
300
+ let match;
301
+ while ((match = kvRegex.exec(cleaned)) !== null) {
302
+ attrs[match[1]] = match[2];
303
+ }
304
+
305
+ return attrs;
306
+ }
307
+
308
+ // ============================================================================
309
+ // Inline Parser (for [text](url){attrs} and [text]{attrs})
310
+ // ============================================================================
311
+
312
+ function parseInline(text) {
313
+ const spans = [];
314
+ let pos = 0;
315
+
316
+ // Pattern: [text](url){attrs} or [text]{attrs}
317
+ const inlineRegex = /\[([^\]]+)\](?:\(([^)]+)\))?(?:\{([^}]+)\})?/g;
318
+ let match;
319
+ let lastIndex = 0;
320
+
321
+ while ((match = inlineRegex.exec(text)) !== null) {
322
+ // Text before match
323
+ if (match.index > lastIndex) {
324
+ spans.push({
325
+ type: 'text',
326
+ value: text.substring(lastIndex, match.index)
327
+ });
328
+ }
329
+
330
+ const [fullMatch, linkText, url, attrs] = match;
331
+ spans.push({
332
+ type: url ? 'link' : 'span',
333
+ text: linkText,
334
+ url: url || null,
335
+ attrs: attrs ? parseAttributes(`{${attrs}}`) : {}
336
+ });
337
+
338
+ lastIndex = match.index + fullMatch.length;
339
+ }
340
+
341
+ // Remaining text
342
+ if (lastIndex < text.length) {
343
+ spans.push({
344
+ type: 'text',
345
+ value: text.substring(lastIndex)
346
+ });
347
+ }
348
+
349
+ return spans.length > 0 ? spans : [{ type: 'text', value: text }];
350
+ }
351
+
352
+ // ============================================================================
353
+ // MD-LD Parser
354
+ // ============================================================================
355
+
356
+ export class MDLDParser {
357
+ constructor(options = {}) {
358
+ this.options = {
359
+ baseIRI: options.baseIRI || '',
360
+ defaultVocab: options.defaultVocab || 'http://schema.org/',
361
+ dataFactory: options.dataFactory || DefaultDataFactory,
362
+ ...options
363
+ };
364
+
365
+ this.df = this.options.dataFactory;
366
+ this.quads = [];
367
+ this.context = null;
368
+ this.rootSubject = null;
369
+ this.currentSubject = null;
370
+ this.blankNodeCounter = 0;
371
+ this.subjectStack = [];
372
+ this.blankNodeMap = new Map();
373
+ }
374
+
375
+ hashBlankNode(input) {
376
+ if (this.blankNodeMap.has(input)) {
377
+ return this.blankNodeMap.get(input);
378
+ }
379
+ let hash = 5381;
380
+ for (let i = 0; i < input.length; i++) {
381
+ hash = ((hash << 5) + hash) + input.charCodeAt(i);
382
+ }
383
+ const bnId = `b${Math.abs(hash).toString(16).slice(0, 12)}`;
384
+ this.blankNodeMap.set(input, bnId);
385
+ return bnId;
386
+ }
387
+
388
+ parse(markdown) {
389
+ this.quads = [];
390
+
391
+ // Extract frontmatter
392
+ const { frontmatter, body } = this.extractFrontmatter(markdown);
393
+
394
+ // Parse YAML-LD frontmatter
395
+ if (frontmatter) {
396
+ try {
397
+ this.context = parseYAMLLD(frontmatter);
398
+
399
+ // Check for @base in @context (JSON-LD standard)
400
+ if (this.context['@context']?.['@base']) {
401
+ this.options.baseIRI = this.context['@context']['@base'];
402
+ }
403
+
404
+ this.rootSubject = this.resolveRootSubject(this.context);
405
+
406
+ // Emit root subject type if present
407
+ if (this.context['@type']) {
408
+ const types = Array.isArray(this.context['@type'])
409
+ ? this.context['@type']
410
+ : [this.context['@type']];
411
+
412
+ types.forEach(type => {
413
+ const typeNode = this.resolveResource(type);
414
+ if (typeNode) {
415
+ this.emitQuad(
416
+ this.rootSubject,
417
+ this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
418
+ typeNode
419
+ );
420
+ }
421
+ });
422
+ }
423
+ } catch (e) {
424
+ console.error('YAML-LD parse error:', e);
425
+ this.context = {
426
+ '@context': { '@vocab': this.options.defaultVocab }
427
+ };
428
+ this.rootSubject = this.df.namedNode(this.options.baseIRI || '');
429
+ }
430
+ } else {
431
+ // No frontmatter - use base IRI as root
432
+ this.context = {
433
+ '@context': { '@vocab': this.options.defaultVocab }
434
+ };
435
+ this.rootSubject = this.df.namedNode(this.options.baseIRI || '');
436
+ }
437
+
438
+ this.currentSubject = this.rootSubject;
439
+
440
+ // Tokenize markdown
441
+ const tokens = tokenizeMarkdown(body);
442
+
443
+ // Process tokens
444
+ this.processTokens(tokens);
445
+
446
+ return this.quads;
447
+ }
448
+
449
+ extractFrontmatter(markdown) {
450
+ const match = markdown.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
451
+ if (match) {
452
+ return { frontmatter: match[1], body: match[2] };
453
+ }
454
+ return { frontmatter: null, body: markdown };
455
+ }
456
+
457
+ resolveRootSubject(context) {
458
+ if (context['@id']) {
459
+ const id = context['@id'];
460
+ if (id.startsWith('#')) {
461
+ const fullIRI = (this.options.baseIRI || '') + id;
462
+ return this.df.namedNode(fullIRI);
463
+ }
464
+ if (id.startsWith('_:')) {
465
+ return this.df.blankNode(id.substring(2));
466
+ }
467
+ if (id.includes(':')) {
468
+ return this.df.namedNode(id);
469
+ }
470
+ return this.df.namedNode(this.options.baseIRI + id);
471
+ }
472
+ return this.df.namedNode(this.options.baseIRI || '');
473
+ }
474
+
475
+ getRootFragment() {
476
+ const rootValue = this.rootSubject.value;
477
+ const hashIndex = rootValue.lastIndexOf('#');
478
+ return hashIndex >= 0 ? rootValue.substring(hashIndex + 1) : '';
479
+ }
480
+
481
+ processTokens(tokens) {
482
+ let firstParagraph = true;
483
+ let titleEmitted = false;
484
+
485
+ for (let i = 0; i < tokens.length; i++) {
486
+ const token = tokens[i];
487
+
488
+ if (token.type === 'heading') {
489
+ // First h1 becomes label (but don't emit if heading has #id attribute)
490
+ if (token.depth === 1 && !titleEmitted && !token.attrs.id) {
491
+ this.emitQuad(
492
+ this.rootSubject,
493
+ this.df.namedNode('http://www.w3.org/2000/01/rdf-schema#label'),
494
+ this.df.literal(token.text)
495
+ );
496
+ titleEmitted = true;
497
+ }
498
+
499
+ // Heading with #id becomes new subject
500
+ if (token.attrs.id) {
501
+ const rootFragment = this.getRootFragment();
502
+ let newSubject;
503
+
504
+ if (token.attrs.id === rootFragment) {
505
+ // Same as root document subject
506
+ newSubject = this.rootSubject;
507
+ } else {
508
+ // Fragment relative to root
509
+ const baseForFragment = this.rootSubject.value.split('#')[0];
510
+ newSubject = this.df.namedNode(baseForFragment + '#' + token.attrs.id);
511
+ }
512
+
513
+ // Type assertion
514
+ if (token.attrs.typeof) {
515
+ const types = token.attrs.typeof.trim().split(/\s+/).filter(Boolean);
516
+ types.forEach(type => {
517
+ const typeNode = this.resolveResource(type);
518
+ if (typeNode) {
519
+ this.emitQuad(
520
+ newSubject,
521
+ this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
522
+ typeNode
523
+ );
524
+ }
525
+ });
526
+ }
527
+
528
+ // Heading text becomes an rdfs:label of the subject
529
+ this.emitQuad(
530
+ newSubject,
531
+ this.df.namedNode('http://www.w3.org/2000/01/rdf-schema#label'),
532
+ this.df.literal(token.text.trim())
533
+ );
534
+
535
+ // Set as current subject
536
+ this.currentSubject = newSubject;
537
+ this.subjectStack.push(newSubject);
538
+ } else if (!titleEmitted) {
539
+ // Heading without id keeps parent context
540
+ // but h1 without attributes still sets root as current
541
+ if (token.depth === 1) {
542
+ this.currentSubject = this.rootSubject;
543
+ }
544
+ }
545
+
546
+ continue;
547
+ }
548
+
549
+ if (token.type === 'code') {
550
+ // Code blocks become SoftwareSourceCode-like resources
551
+ let snippetSubject;
552
+
553
+ if (token.attrs && token.attrs.id) {
554
+ const rootFragment = this.getRootFragment();
555
+ if (token.attrs.id === rootFragment) {
556
+ snippetSubject = this.rootSubject;
557
+ } else {
558
+ const baseForFragment = this.rootSubject.value.split('#')[0];
559
+ snippetSubject = this.df.namedNode(baseForFragment + '#' + token.attrs.id);
560
+ }
561
+ } else {
562
+ snippetSubject = this.df.blankNode(
563
+ this.hashBlankNode(`code:${token.lang || ''}:${token.text}`)
564
+ );
565
+ }
566
+
567
+ // Type assertion: typeof override or default SoftwareSourceCode
568
+ if (token.attrs && token.attrs.typeof) {
569
+ const types = token.attrs.typeof.trim().split(/\s+/).filter(Boolean);
570
+ types.forEach(type => {
571
+ const typeNode = this.resolveResource(type);
572
+ if (typeNode) {
573
+ this.emitQuad(
574
+ snippetSubject,
575
+ this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
576
+ typeNode
577
+ );
578
+ }
579
+ });
580
+ } else {
581
+ const defaultType = this.resolveResource('SoftwareSourceCode');
582
+ if (defaultType) {
583
+ this.emitQuad(
584
+ snippetSubject,
585
+ this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
586
+ defaultType
587
+ );
588
+ }
589
+ }
590
+
591
+ // Programming language from fenced code info string
592
+ if (token.lang) {
593
+ const langPred = this.resolveResource('programmingLanguage');
594
+ if (langPred) {
595
+ this.emitQuad(
596
+ snippetSubject,
597
+ langPred,
598
+ this.df.literal(token.lang)
599
+ );
600
+ }
601
+ }
602
+
603
+ // Raw source text
604
+ const textPred = this.resolveResource('text');
605
+ if (textPred && token.text) {
606
+ this.emitQuad(
607
+ snippetSubject,
608
+ textPred,
609
+ this.df.literal(token.text)
610
+ );
611
+ }
612
+
613
+ // Link from current subject to code snippet
614
+ const hasPartPred = this.resolveResource('hasPart');
615
+ if (hasPartPred) {
616
+ this.emitQuad(
617
+ this.currentSubject,
618
+ hasPartPred,
619
+ snippetSubject
620
+ );
621
+ }
622
+
623
+ continue;
624
+ }
625
+
626
+ if (token.type === 'paragraph') {
627
+ // First paragraph after title becomes description
628
+ if (firstParagraph && titleEmitted) {
629
+ const text = token.text.trim();
630
+ if (text && !text.match(/\[.*\]/)) { // Simple text, no links
631
+ this.emitQuad(
632
+ this.rootSubject,
633
+ this.df.namedNode('http://purl.org/dc/terms/description'),
634
+ this.df.literal(text)
635
+ );
636
+ }
637
+ firstParagraph = false;
638
+ }
639
+
640
+ // Process inline annotations
641
+ this.processInline(token.text);
642
+ continue;
643
+ }
644
+
645
+ if (token.type === 'listItem') {
646
+ this.processInline(token.text);
647
+ continue;
648
+ }
649
+
650
+ if (token.type === 'taskItem') {
651
+ // Task items create Action instances
652
+ let action;
653
+ if (token.attrs.id) {
654
+ const rootFragment = this.getRootFragment();
655
+ if (token.attrs.id === rootFragment) {
656
+ action = this.rootSubject;
657
+ } else {
658
+ const baseForFragment = this.rootSubject.value.split('#')[0];
659
+ action = this.df.namedNode(baseForFragment + '#' + token.attrs.id);
660
+ }
661
+ } else {
662
+ action = this.df.blankNode(this.hashBlankNode(`task:${token.text}`));
663
+ }
664
+
665
+ // Type declaration (always Action, or overridden by typeof)
666
+ let actionType = 'http://schema.org/Action';
667
+ if (token.attrs.typeof) {
668
+ const types = token.attrs.typeof.trim().split(/\s+/).filter(Boolean);
669
+ types.forEach(type => {
670
+ const typeNode = this.resolveResource(type);
671
+ if (typeNode) {
672
+ this.emitQuad(
673
+ action,
674
+ this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
675
+ typeNode
676
+ );
677
+ }
678
+ });
679
+ } else {
680
+ this.emitQuad(
681
+ action,
682
+ this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
683
+ this.df.namedNode(actionType)
684
+ );
685
+ }
686
+
687
+ this.emitQuad(
688
+ action,
689
+ this.df.namedNode('http://schema.org/name'),
690
+ this.df.literal(token.text)
691
+ );
692
+
693
+ const status = token.checked
694
+ ? 'http://schema.org/CompletedActionStatus'
695
+ : 'http://schema.org/PotentialActionStatus';
696
+
697
+ this.emitQuad(
698
+ action,
699
+ this.df.namedNode('http://schema.org/actionStatus'),
700
+ this.df.namedNode(status)
701
+ );
702
+
703
+ // Link to current subject
704
+ this.emitQuad(
705
+ this.currentSubject,
706
+ this.df.namedNode('http://schema.org/potentialAction'),
707
+ action
708
+ );
709
+
710
+ continue;
711
+ }
712
+ }
713
+ }
714
+
715
+ processInline(text) {
716
+ const spans = parseInline(text);
717
+
718
+ for (const span of spans) {
719
+ if (span.type === 'text') {
720
+ continue;
721
+ }
722
+
723
+ if (span.type === 'link' || span.type === 'span') {
724
+ const attrs = span.attrs;
725
+
726
+ // Subject declaration
727
+ let subject = this.currentSubject;
728
+ if (attrs.id) {
729
+ const rootFragment = this.getRootFragment();
730
+
731
+ if (attrs.id === rootFragment) {
732
+ // Same as root document subject
733
+ subject = this.rootSubject;
734
+ } else {
735
+ // Fragment relative to root
736
+ const baseForFragment = this.rootSubject.value.split('#')[0];
737
+ subject = this.df.namedNode(baseForFragment + '#' + attrs.id);
738
+ }
739
+
740
+ // Type assertion
741
+ if (attrs.typeof) {
742
+ const types = attrs.typeof.trim().split(/\s+/).filter(Boolean);
743
+ types.forEach(type => {
744
+ const typeNode = this.resolveResource(type);
745
+ if (typeNode) {
746
+ this.emitQuad(
747
+ subject,
748
+ this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
749
+ typeNode
750
+ );
751
+ }
752
+ });
753
+ }
754
+ }
755
+
756
+ // Property (literal)
757
+ if (attrs.property) {
758
+ const properties = attrs.property.trim().split(/\s+/).filter(Boolean);
759
+ properties.forEach(prop => {
760
+ const predicate = this.resolveResource(prop);
761
+ if (!predicate) return;
762
+
763
+ let object;
764
+ if (attrs.datatype) {
765
+ const datatypeIRI = this.resolveResource(attrs.datatype);
766
+ if (datatypeIRI && datatypeIRI.value) {
767
+ object = this.df.literal(span.text, datatypeIRI);
768
+ } else {
769
+ object = this.df.literal(span.text);
770
+ }
771
+ } else {
772
+ object = this.df.literal(span.text);
773
+ }
774
+
775
+ this.emitQuad(subject, predicate, object);
776
+ });
777
+ }
778
+
779
+ // Relationship (object property)
780
+ if (attrs.rel && span.url) {
781
+ const rels = attrs.rel.trim().split(/\s+/).filter(Boolean);
782
+ let objectNode;
783
+
784
+ if (span.url.startsWith('#')) {
785
+ const baseForFragment = this.rootSubject.value.split('#')[0];
786
+ objectNode = this.df.namedNode(baseForFragment + span.url);
787
+ } else {
788
+ objectNode = this.df.namedNode(span.url);
789
+ }
790
+
791
+ rels.forEach(rel => {
792
+ const predicate = this.resolveResource(rel);
793
+ if (predicate) {
794
+ this.emitQuad(subject, predicate, objectNode);
795
+ }
796
+ });
797
+ }
798
+
799
+ // typeof without id creates typed blank node
800
+ if (attrs.typeof && !attrs.id && attrs.rel) {
801
+ const blankSubject = this.df.blankNode(this.hashBlankNode(`span:${span.text}:${JSON.stringify(attrs)}}`));
802
+
803
+ const types = attrs.typeof.trim().split(/\s+/).filter(Boolean);
804
+ types.forEach(type => {
805
+ const typeNode = this.resolveResource(type);
806
+ if (typeNode) {
807
+ this.emitQuad(
808
+ blankSubject,
809
+ this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
810
+ typeNode
811
+ );
812
+ }
813
+ });
814
+
815
+ // Link from current subject
816
+ if (attrs.rel) {
817
+ const rels = attrs.rel.trim().split(/\s+/).filter(Boolean);
818
+ rels.forEach(rel => {
819
+ const predicate = this.resolveResource(rel);
820
+ if (predicate) {
821
+ this.emitQuad(subject, predicate, blankSubject);
822
+ }
823
+ });
824
+ }
825
+ }
826
+ }
827
+ }
828
+ }
829
+
830
+ resolveResource(term) {
831
+ if (!term || typeof term !== 'string') return null;
832
+
833
+ const trimmed = term.trim();
834
+ if (!trimmed) return null;
835
+
836
+ // Absolute IRI
837
+ if (trimmed.match(/^https?:/)) {
838
+ return this.df.namedNode(trimmed);
839
+ }
840
+
841
+ // CURIE
842
+ if (trimmed.includes(':')) {
843
+ const [prefix, reference] = trimmed.split(':', 2);
844
+ const contextObj = this.context?.['@context'] || {};
845
+
846
+ if (contextObj[prefix]) {
847
+ return this.df.namedNode(contextObj[prefix] + reference);
848
+ }
849
+
850
+ // Default XSD namespace
851
+ if (prefix === 'xsd') {
852
+ return this.df.namedNode('http://www.w3.org/2001/XMLSchema#' + reference);
853
+ }
854
+ }
855
+
856
+ // Default vocab
857
+ const vocab = this.context?.['@context']?.['@vocab'] || this.options.defaultVocab;
858
+ return this.df.namedNode(vocab + trimmed);
859
+ }
860
+
861
+ emitQuad(subject, predicate, object) {
862
+ if (!subject || !predicate || !object) return;
863
+
864
+ const quad = this.df.quad(subject, predicate, object);
865
+ this.quads.push(quad);
866
+ }
867
+
868
+ getQuads() {
869
+ return this.quads;
870
+ }
871
+ }
872
+
873
+ // ============================================================================
874
+ // Convenience API
875
+ // ============================================================================
876
+
877
+ export function parseMDLD(markdown, options = {}) {
878
+ const parser = new MDLDParser(options);
879
+ return parser.parse(markdown);
880
+ }
881
+
882
+ export default { MDLDParser, parseMDLD, DefaultDataFactory };