mdld-parse 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/render.js ADDED
@@ -0,0 +1,490 @@
1
+ import { parse } from './parse.js';
2
+ import {
3
+ DEFAULT_CONTEXT,
4
+ DataFactory,
5
+ expandIRI,
6
+ shortenIRI,
7
+ parseSemanticBlock,
8
+ hash
9
+ } from './utils.js';
10
+
11
+ /**
12
+ * Render MD-LD to HTML+RDFa
13
+ * @param {string} mdld - MD-LD input string
14
+ * @param {Object} options - Rendering options
15
+ * @param {Object} options.context - Additional context prefixes
16
+ * @param {string} options.baseIRI - Base IRI for relative URLs
17
+ * @param {boolean} options.validate - Enable validation
18
+ * @param {boolean} options.pretty - Pretty print output
19
+ * @returns {Object} Render result with HTML and metadata
20
+ */
21
+ export function render(mdld, options = {}) {
22
+ // Phase 1: Parse MD-LD (reuse parser)
23
+ const parsed = parse(mdld, { context: options.context || {} });
24
+
25
+ // Phase 2: Build render state
26
+ const state = buildRenderState(parsed, options, mdld);
27
+
28
+ // Phase 3: Render blocks to HTML
29
+ const html = renderBlocks(parsed.origin.blocks, state);
30
+
31
+ // Phase 4: Wrap with RDFa context
32
+ const wrapped = wrapWithRDFaContext(html, state.ctx);
33
+
34
+ return {
35
+ html: wrapped,
36
+ context: state.ctx,
37
+ metadata: {
38
+ blockCount: parsed.origin.blocks.size,
39
+ quadCount: parsed.quads.length,
40
+ renderTime: Date.now()
41
+ }
42
+ };
43
+ }
44
+
45
+ /**
46
+ * Build render state following parser pattern
47
+ */
48
+ function buildRenderState(parsed, options, mdld) {
49
+ // Use the parser's context which already includes document prefixes
50
+ const ctx = parsed.context || { ...DEFAULT_CONTEXT, ...(options.context || {}) };
51
+
52
+ return {
53
+ ctx,
54
+ df: options.dataFactory || DataFactory,
55
+ baseIRI: options.baseIRI || '',
56
+ sourceText: mdld, // Store original text for content extraction
57
+ output: [],
58
+ currentSubject: null,
59
+ documentSubject: null,
60
+ blockStack: [],
61
+ carrierStack: []
62
+ };
63
+ }
64
+
65
+ /**
66
+ * Render blocks to HTML with RDFa annotations
67
+ */
68
+ function renderBlocks(blocks, state) {
69
+ // Sort blocks by position
70
+ const sortedBlocks = Array.from(blocks.values()).sort((a, b) => {
71
+ return (a.range?.start || 0) - (b.range?.start || 0);
72
+ });
73
+
74
+ // Separate list blocks from other blocks
75
+ const listBlocks = sortedBlocks.filter(block => block.carrierType === 'list');
76
+ const otherBlocks = sortedBlocks.filter(block => block.carrierType !== 'list');
77
+
78
+ // Render non-list blocks normally
79
+ otherBlocks.forEach(block => {
80
+ renderBlock(block, state);
81
+ });
82
+
83
+ // Render lists using Markdown approach with RDFa enrichment
84
+ if (listBlocks.length > 0) {
85
+ renderListsWithRDFa(listBlocks, state);
86
+ }
87
+
88
+ return state.output.join('');
89
+ }
90
+
91
+ /**
92
+ * Render lists using Markdown structure with RDFa enrichment
93
+ */
94
+ function renderListsWithRDFa(listBlocks, state) {
95
+ // Group list blocks by their context (consecutive blocks with similar context)
96
+ const listGroups = groupListBlocksByContext(listBlocks, state.sourceText);
97
+
98
+ listGroups.forEach(group => {
99
+ renderListGroup(group, state);
100
+ });
101
+ }
102
+
103
+ /**
104
+ * Group list blocks by their structural hierarchy
105
+ */
106
+ function groupListBlocksByContext(listBlocks, sourceText) {
107
+ const groups = [];
108
+
109
+ // Group consecutive list blocks
110
+ let currentGroup = null;
111
+
112
+ for (const block of listBlocks) {
113
+ // Start new group for each top-level item (indent 0)
114
+ const indent = getIndentLevel(block, sourceText);
115
+
116
+ if (indent === 0) {
117
+ // Close previous group
118
+ if (currentGroup) {
119
+ groups.push(currentGroup);
120
+ }
121
+
122
+ // Start new group with a generic name
123
+ currentGroup = {
124
+ contextName: 'Items',
125
+ blocks: [block]
126
+ };
127
+ } else {
128
+ // Add nested items to current group
129
+ if (currentGroup) {
130
+ currentGroup.blocks.push(block);
131
+ } else {
132
+ // This shouldn't happen, but handle it
133
+ currentGroup = {
134
+ contextName: 'Items',
135
+ blocks: [block]
136
+ };
137
+ }
138
+ }
139
+ }
140
+
141
+ if (currentGroup) {
142
+ groups.push(currentGroup);
143
+ }
144
+
145
+ return groups;
146
+ }
147
+
148
+ /**
149
+ * Render a list group with proper Markdown structure and RDFa enrichment
150
+ */
151
+ function renderListGroup(group, state) {
152
+ // Extract the list anchor text from the first block's position
153
+ const firstBlock = group.blocks[0];
154
+ const listAnchorText = extractListAnchorText(firstBlock, state.sourceText);
155
+
156
+ // Render the list anchor as a paragraph if it exists
157
+ if (listAnchorText) {
158
+ state.output.push(`<p>${escapeHtml(listAnchorText)}</p>`);
159
+ }
160
+
161
+ // Render the list directly without the semantic-list wrapper
162
+ state.output.push(`<ul>`);
163
+
164
+ // Render list items preserving Markdown structure
165
+ const markdownList = group.blocks.map(block =>
166
+ state.sourceText.substring(block.range.start, block.range.end)
167
+ ).join('\n');
168
+
169
+ // Parse markdown list and enrich with RDFa
170
+ const htmlList = parseMarkdownList(markdownList, group.blocks, state);
171
+ state.output.push(htmlList);
172
+
173
+ state.output.push(`</ul>`);
174
+ }
175
+
176
+ /**
177
+ * Extract list anchor text (the paragraph before the list)
178
+ */
179
+ function extractListAnchorText(firstBlock, sourceText) {
180
+ if (!firstBlock.range || !sourceText) return null;
181
+
182
+ // Look backwards from the first list item to find the list anchor
183
+ const startPos = firstBlock.range.start;
184
+
185
+ // Search backwards for a line that has semantic annotation but no value carrier
186
+ let searchPos = startPos;
187
+ let foundAnchor = null;
188
+
189
+ while (searchPos > 0 && !foundAnchor) {
190
+ // Find the start of the current line
191
+ let lineStart = searchPos - 1;
192
+ while (lineStart > 0 && sourceText[lineStart - 1] !== '\n') {
193
+ lineStart--;
194
+ }
195
+
196
+ // Extract the line
197
+ let lineEnd = searchPos;
198
+ while (lineEnd < sourceText.length && sourceText[lineEnd] !== '\n') {
199
+ lineEnd++;
200
+ }
201
+
202
+ const line = sourceText.substring(lineStart, lineEnd).trim();
203
+
204
+ // Check if this looks like a list anchor (has semantic annotation but no value carrier)
205
+ if (line.includes('{') && !line.match(/^-\s/)) {
206
+ foundAnchor = line;
207
+ break;
208
+ }
209
+
210
+ // Continue searching backwards
211
+ searchPos = lineStart - 1;
212
+ }
213
+
214
+ if (foundAnchor) {
215
+ // Clean the line by removing MD-LD annotations
216
+ const cleanLine = foundAnchor.replace(/\s*\{[^}]+\}\s*$/, '');
217
+ return cleanLine;
218
+ }
219
+
220
+ return null;
221
+ }
222
+
223
+ /**
224
+ * Parse markdown list and enrich with RDFa attributes
225
+ */
226
+ function parseMarkdownList(markdownList, blocks, state) {
227
+ const lines = markdownList.split('\n').filter(line => line.trim());
228
+ let html = '';
229
+ let currentLevel = 0;
230
+ let openLi = false;
231
+
232
+ lines.forEach((line, index) => {
233
+ const indent = line.match(/^(\s*)/)[1].length;
234
+ const content = line.trim();
235
+
236
+ if (content.startsWith('-')) {
237
+ const level = Math.floor(indent / 2); // 2 spaces per level
238
+ const itemContent = content.substring(1).trim();
239
+
240
+ // Find corresponding block for RDFa attributes
241
+ // Try exact match first, then try without MD-LD annotations
242
+ const cleanLine = itemContent.replace(/\s*\{[^}]+\}\s*$/, '');
243
+ let block = blocks.find(b =>
244
+ b.range && state.sourceText.substring(b.range.start, b.range.end).trim() === line
245
+ );
246
+
247
+ // If no exact match, try matching by clean content
248
+ if (!block) {
249
+ block = blocks.find(b => {
250
+ if (!b.range) return false;
251
+ const blockText = state.sourceText.substring(b.range.start, b.range.end).trim();
252
+ const blockCleanContent = blockText.replace(/^-\s*/, '').replace(/\s*\{[^}]+\}\s*$/, '');
253
+ return blockCleanContent === cleanLine;
254
+ });
255
+ }
256
+
257
+ // Clean content by removing MD-LD annotations
258
+ const cleanContent = itemContent.replace(/\s*\{[^}]+\}\s*$/, '');
259
+
260
+ // Close lists if going to a higher level
261
+ while (currentLevel > level) {
262
+ if (openLi) {
263
+ html += '</li>';
264
+ openLi = false;
265
+ }
266
+ html += '</ul>';
267
+ currentLevel--;
268
+ }
269
+
270
+ // Open lists if going deeper
271
+ while (currentLevel < level) {
272
+ if (openLi) {
273
+ html += '<ul>';
274
+ openLi = false;
275
+ } else {
276
+ html += '<ul>';
277
+ }
278
+ currentLevel++;
279
+ }
280
+
281
+ // Close previous li if open
282
+ if (openLi) {
283
+ html += '</li>';
284
+ openLi = false;
285
+ }
286
+
287
+ const attrs = block ? buildRDFaAttrsFromBlock(block, state.ctx) : '';
288
+ html += `<li${attrs}>${escapeHtml(cleanContent)}`;
289
+ openLi = true;
290
+ }
291
+ });
292
+
293
+ // Close any remaining open li and lists
294
+ if (openLi) {
295
+ html += '</li>';
296
+ }
297
+ while (currentLevel > 0) {
298
+ html += '</ul>';
299
+ currentLevel--;
300
+ }
301
+
302
+ return html;
303
+ }
304
+
305
+ /**
306
+ * Get indent level from source text
307
+ */
308
+ function getIndentLevel(block, sourceText) {
309
+ if (!block.range || !sourceText) return 0;
310
+
311
+ const text = sourceText.substring(block.range.start, block.range.end);
312
+ const indentMatch = text.match(/^(\s*)/);
313
+ return indentMatch ? indentMatch[1].length : 0;
314
+ }
315
+
316
+ /**
317
+ * Render a single block
318
+ */
319
+ function renderBlock(block, state) {
320
+ const attrs = buildRDFaAttrsFromBlock(block, state.ctx);
321
+
322
+ switch (block.type || block.carrierType) {
323
+ case 'heading':
324
+ const level = block.text ? block.text.match(/^#+/)?.[0]?.length || 1 : 1;
325
+ const tag = `h${level}`;
326
+ state.output.push(`<${tag}${attrs}>`);
327
+ renderBlockContent(block, state);
328
+ state.output.push(`</${tag}>`);
329
+ break;
330
+
331
+ case 'para':
332
+ state.output.push(`<p${attrs}>`);
333
+ renderBlockContent(block, state);
334
+ state.output.push(`</p>`);
335
+ break;
336
+
337
+ case 'list':
338
+ // List blocks are handled separately in renderListsWithRDFa
339
+ break;
340
+
341
+ case 'quote':
342
+ state.output.push(`<blockquote${attrs}>`);
343
+ renderBlockContent(block, state);
344
+ state.output.push(`</blockquote>`);
345
+ break;
346
+
347
+ case 'code':
348
+ const language = block.info || '';
349
+ state.output.push(`<pre><code${attrs}${language ? ` class="language-${escapeHtml(language)}"` : ''}>`);
350
+ state.output.push(escapeHtml(block.text || ''));
351
+ state.output.push(`</code></pre>`);
352
+ break;
353
+
354
+ default:
355
+ // Default rendering as paragraph
356
+ state.output.push(`<div${attrs}>`);
357
+ renderBlockContent(block, state);
358
+ state.output.push(`</div>`);
359
+ }
360
+ }
361
+
362
+ /**
363
+ * Render block content with inline carriers
364
+ */
365
+ function renderBlockContent(block, state) {
366
+ // Extract text from source using range information
367
+ if (block.range && state.sourceText) {
368
+ let text = state.sourceText.substring(block.range.start, block.range.end);
369
+
370
+ // Remove semantic block annotations from the text
371
+ if (block.attrsRange) {
372
+ const beforeAttrs = text.substring(0, block.attrsRange.start - block.range.start);
373
+ const afterAttrs = text.substring(block.attrsRange.end - block.range.start);
374
+ text = beforeAttrs + afterAttrs;
375
+ }
376
+
377
+ // For headings, extract text content from the heading
378
+ if (block.carrierType === 'heading') {
379
+ // Remove heading markers (#) and trim
380
+ const content = text.replace(/^#+\s*/, '').trim();
381
+ state.output.push(escapeHtml(content));
382
+ } else {
383
+ state.output.push(escapeHtml(text.trim()));
384
+ }
385
+ }
386
+ }
387
+
388
+ /**
389
+ * Build RDFa attributes from block
390
+ */
391
+ function buildRDFaAttrsFromBlock(block, ctx) {
392
+ const attrs = [];
393
+
394
+ // Subject
395
+ if (block.subject && block.subject !== 'RESET' && !block.subject.startsWith('=#') && !block.subject.startsWith('+')) {
396
+ const expanded = expandIRI(block.subject, ctx);
397
+ const shortened = shortenIRI(expanded, ctx);
398
+ attrs.push(`about="${escapeHtml(shortened)}"`);
399
+ }
400
+
401
+ // Types
402
+ if (block.types && block.types.length > 0) {
403
+ const types = block.types.map(t => {
404
+ const iri = typeof t === 'string' ? t : t.iri;
405
+ const expanded = expandIRI(iri, ctx);
406
+ return shortenIRI(expanded, ctx);
407
+ }).join(' ');
408
+ attrs.push(`typeof="${escapeHtml(types)}"`);
409
+ }
410
+
411
+ // Predicates
412
+ if (block.predicates && block.predicates.length > 0) {
413
+ const literalProps = [];
414
+ const objectProps = [];
415
+ const reverseProps = [];
416
+
417
+ block.predicates.forEach(pred => {
418
+ const iri = typeof pred === 'string' ? pred : pred.iri;
419
+ const expanded = expandIRI(iri, ctx);
420
+ const shortened = shortenIRI(expanded, ctx);
421
+ const form = typeof pred === 'string' ? '' : (pred.form || '');
422
+
423
+ if (form === '!') {
424
+ reverseProps.push(shortened);
425
+ } else if (form === '?') {
426
+ objectProps.push(shortened);
427
+ } else {
428
+ literalProps.push(shortened);
429
+ }
430
+ });
431
+
432
+ if (literalProps.length > 0) {
433
+ attrs.push(`property="${escapeHtml(literalProps.join(' '))}"`);
434
+ }
435
+ if (objectProps.length > 0) {
436
+ attrs.push(`rel="${escapeHtml(objectProps.join(' '))}"`);
437
+ }
438
+ if (reverseProps.length > 0) {
439
+ attrs.push(`rev="${escapeHtml(reverseProps.join(' '))}"`);
440
+ }
441
+ }
442
+
443
+ return attrs.length > 0 ? ` ${attrs.join(' ')}` : '';
444
+ }
445
+
446
+ /**
447
+ * Generate prefix declarations for RDFa
448
+ */
449
+ function generatePrefixDeclarations(ctx) {
450
+ const prefixes = [];
451
+
452
+ for (const [prefix, iri] of Object.entries(ctx)) {
453
+ if (prefix !== '@vocab') {
454
+ prefixes.push(`${prefix}: ${iri}`);
455
+ }
456
+ }
457
+
458
+ return prefixes.length > 0 ? ` prefix="${prefixes.join(' ')}"` : '';
459
+ }
460
+
461
+ /**
462
+ * Generate vocabulary declaration
463
+ */
464
+ function generateVocabDeclaration(ctx) {
465
+ return ctx['@vocab'] ? ` vocab="${ctx['@vocab']}"` : '';
466
+ }
467
+
468
+ /**
469
+ * Wrap HTML with RDFa context declarations
470
+ */
471
+ function wrapWithRDFaContext(html, ctx) {
472
+ const prefixDecl = generatePrefixDeclarations(ctx);
473
+ const vocabDecl = generateVocabDeclaration(ctx);
474
+
475
+ return `<div${prefixDecl}${vocabDecl}>${html}</div>`;
476
+ }
477
+
478
+ /**
479
+ * Escape HTML special characters
480
+ */
481
+ function escapeHtml(text) {
482
+ const map = {
483
+ '&': '&amp;',
484
+ '<': '&lt;',
485
+ '>': '&gt;',
486
+ '"': '&quot;',
487
+ "'": '&#39;'
488
+ };
489
+ return String(text || '').replace(/[&<>"']/g, m => map[m]);
490
+ }