@redpanda-data/docs-extensions-and-macros 4.10.8 → 4.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,389 @@
1
+ const path = require('path')
2
+ const os = require('os')
3
+ const TurndownService = require('turndown')
4
+ const turndownPluginGfm = require('turndown-plugin-gfm')
5
+ const { gfm } = turndownPluginGfm
6
+
7
+ module.exports.register = function () {
8
+ const logger = this.getLogger('convert-to-markdown-extension')
9
+ let playbook
10
+
11
+ // Shared Turndown configuration
12
+ const baseConfig = {
13
+ headingStyle: 'atx',
14
+ codeBlockStyle: 'fenced',
15
+ bulletListMarker: '-',
16
+ linkReferenceStyle: 'full',
17
+ }
18
+
19
+ // Factory: create a configured Turndown instance
20
+ function createTurndownBase() {
21
+ const td = new TurndownService(baseConfig)
22
+ td.use(gfm)
23
+
24
+ // Remove unwanted global elements (footers, modals, feedback, etc.)
25
+ td.addRule('remove-unwanted', {
26
+ filter: (node) => {
27
+ if (!node || !node.getAttribute) return false
28
+
29
+ const classAttr = (node.getAttribute('class') || '').toLowerCase()
30
+ const idAttr = (node.getAttribute('id') || '').toLowerCase()
31
+ const tag = node.nodeName.toLowerCase()
32
+
33
+ // Remove by tag
34
+ if (['script', 'style', 'footer', 'nav'].includes(tag)) return true
35
+
36
+ // Remove tracking or hidden images
37
+ if (
38
+ tag === 'img' &&
39
+ (classAttr.includes('tracking') ||
40
+ idAttr.includes('scarf') ||
41
+ node.getAttribute('role') === 'presentation' ||
42
+ node.style?.display === 'none')
43
+ ) {
44
+ return true
45
+ }
46
+
47
+ // Remove by class or id
48
+ const toRemove = [
49
+ 'thumbs',
50
+ 'back-to-top',
51
+ 'contributors-modal',
52
+ 'feedback-section',
53
+ 'feedback-toast',
54
+ 'pagination',
55
+ 'footer',
56
+ 'nav-expand',
57
+ 'banner-container',
58
+ 'markdown-dropdown',
59
+ ]
60
+ return toRemove.some(
61
+ (x) => classAttr.includes(x) || idAttr.includes(x)
62
+ )
63
+ },
64
+ replacement: () => '',
65
+ })
66
+
67
+ // Keep critical content blocks only
68
+ td.keep(['div.openblock.tabs', 'article.doc'])
69
+ return td
70
+ }
71
+
72
+ // Factory: create page-specific Turndown converter
73
+ function createTurndownForPage(page) {
74
+ const outerTurndown = createTurndownBase()
75
+ const nestedTurndown = createTurndownBase()
76
+
77
+ // Helper to add custom rules
78
+ function addCustomRules(turndownInstance, isInner = false) {
79
+ // Determine heading depth for tab conversion
80
+ function findNearestHeadingLevel(el) {
81
+ let current = el.previousElementSibling
82
+ while (current) {
83
+ if (/^H[1-6]$/i.test(current.nodeName))
84
+ return parseInt(current.nodeName.substring(1))
85
+ current = current.previousElementSibling
86
+ }
87
+ let parent = el.parentElement
88
+ while (parent) {
89
+ const headings = Array.from(
90
+ parent.querySelectorAll('h1,h2,h3,h4,h5,h6')
91
+ )
92
+ if (headings.length > 0) {
93
+ const last = headings[headings.length - 1]
94
+ return parseInt(last.nodeName.substring(1))
95
+ }
96
+ parent = parent.parentElement
97
+ }
98
+ return 2
99
+ }
100
+
101
+ // Asciidoctor tab conversion
102
+ turndownInstance.addRule('asciidoctor-tabs', {
103
+ filter: (node) => {
104
+ if (node.nodeName !== 'DIV') return false
105
+ const classAttr = node.getAttribute?.('class') || node.className || ''
106
+ return classAttr.includes('openblock') && classAttr.includes('tabs')
107
+ },
108
+ replacement: function (_, node) {
109
+ function processTabGroup(group, parentHeadingLevel = null) {
110
+ const contentDiv = group.querySelector('.content') || group
111
+ const tabList = contentDiv.querySelectorAll('li.tab')
112
+ if (!tabList.length) return ''
113
+
114
+ const nearestLevel =
115
+ parentHeadingLevel != null
116
+ ? parentHeadingLevel + 1
117
+ : findNearestHeadingLevel(group) + 1
118
+ const tabHeadingLevel = Math.min(nearestLevel, 6)
119
+ const headingPrefix = '#'.repeat(tabHeadingLevel)
120
+
121
+ let markdown = ''
122
+ tabList.forEach((tab) => {
123
+ const title =
124
+ tab.querySelector('p')?.textContent.trim() ||
125
+ tab.textContent.trim()
126
+
127
+ let panelId = tab.getAttribute('aria-controls')
128
+ if (!panelId && tab.id) panelId = tab.id + '--panel'
129
+ const panel = group.querySelector(`#${panelId}`)
130
+ if (!panel) return
131
+
132
+ const nestedTabs = panel.querySelectorAll('.openblock.tabs')
133
+ let nestedMdCombined = ''
134
+ nestedTabs.forEach((nested) => {
135
+ nestedMdCombined +=
136
+ '\n' + processTabGroup(nested, tabHeadingLevel) + '\n'
137
+ nested.remove()
138
+ })
139
+
140
+ const innerHtml = panel.innerHTML || ''
141
+ let md = ''
142
+ try {
143
+ const converter = isInner ? nestedTurndown : turndownInstance
144
+ md = converter.turndown(innerHtml)
145
+ } catch (e) {
146
+ logger.warn(`Turndown failed in nested tab: ${e.message}`)
147
+ }
148
+
149
+ markdown += `${headingPrefix} ${title}\n\n${md.trim()}\n${nestedMdCombined.trim()}\n\n`
150
+ })
151
+
152
+ return markdown.trim()
153
+ }
154
+
155
+ return '\n' + processTabGroup(node, null) + '\n'
156
+ },
157
+ })
158
+
159
+ // Admonition block conversion
160
+ turndownInstance.addRule('admonition', {
161
+ filter: (node) =>
162
+ node.nodeName === 'TABLE' &&
163
+ node.querySelector('td.icon') &&
164
+ node.querySelector('td.content'),
165
+ replacement: function (_, node) {
166
+ const iconCell = node.querySelector('td.icon')
167
+ const contentCell = node.querySelector('td.content')
168
+ if (!iconCell || !contentCell) return ''
169
+
170
+ const iconEl = iconCell.querySelector('i')
171
+ const classAttr = iconEl?.className || ''
172
+ const match = classAttr.match(/icon-([a-z]+)/i)
173
+ const type = match ? match[1].toUpperCase() : 'NOTE'
174
+
175
+ const titleEl =
176
+ node.querySelector('.title') ||
177
+ contentCell.querySelector('.title') ||
178
+ iconEl?.getAttribute('title')
179
+ const customTitle =
180
+ typeof titleEl === 'string'
181
+ ? titleEl.trim()
182
+ : titleEl?.textContent?.trim() || ''
183
+
184
+ const emojiMap = {
185
+ CAUTION: '⚠️',
186
+ WARNING: '⚠️',
187
+ TIP: '💡',
188
+ NOTE: '📝',
189
+ IMPORTANT: '❗',
190
+ }
191
+ const emoji = emojiMap[type] || '📘'
192
+
193
+ const innerHtml = contentCell.innerHTML || ''
194
+ let innerMd = ''
195
+ try {
196
+ const converter = isInner ? nestedTurndown : turndownInstance
197
+ innerMd = converter.turndown(innerHtml).trim()
198
+ } catch (e) {
199
+ logger.warn(`Turndown failed in admonition: ${e.message}`)
200
+ }
201
+
202
+ const titleLower = customTitle.toLowerCase()
203
+ const typeLower = type.toLowerCase()
204
+ const header =
205
+ customTitle && titleLower !== typeLower
206
+ ? `${emoji} **${type}: ${customTitle}**`
207
+ : `${emoji} **${type}**`
208
+
209
+ const quoted = innerMd
210
+ .split('\n')
211
+ .map((line) => (line.startsWith('>') ? line : `> ${line}`))
212
+ .join('\n')
213
+
214
+ return `\n> ${header}\n>\n${quoted}\n`
215
+ },
216
+ })
217
+
218
+ // Markdown table conversion
219
+ turndownInstance.addRule('tables', {
220
+ filter: (node) => {
221
+ if (node.nodeName !== 'TABLE') return false
222
+ if (node.querySelector('td.icon') && node.querySelector('td.content'))
223
+ return false
224
+ return true
225
+ },
226
+ replacement: function (content, node) {
227
+ const rows = Array.from(node.querySelectorAll('tr'))
228
+ if (!rows.length) return content
229
+ const tableRows = []
230
+ rows.forEach((row, index) => {
231
+ const cells = Array.from(row.querySelectorAll('th, td'))
232
+ const cellContents = cells.map((cell) =>
233
+ (cell.textContent || '').trim().replace(/\s+/g, ' ')
234
+ )
235
+ if (!cellContents.length) return
236
+ const rowLine = '| ' + cellContents.join(' | ') + ' |'
237
+ tableRows.push(rowLine)
238
+ if (index === 0) {
239
+ const separator =
240
+ '| ' + cellContents.map(() => '---').join(' | ') + ' |'
241
+ tableRows.push(separator)
242
+ }
243
+ })
244
+ return '\n' + tableRows.join('\n') + '\n'
245
+ },
246
+ })
247
+ }
248
+
249
+ addCustomRules(outerTurndown, false)
250
+ addCustomRules(nestedTurndown, true)
251
+ return outerTurndown
252
+ }
253
+
254
+ // Add marker attribute before UI rendering so templates can detect markdown availability
255
+ this.on('documentsConverted', ({ contentCatalog }) => {
256
+ const pages = contentCatalog.findBy({ family: 'page' })
257
+ logger.info(`Marking ${pages.length} pages as having markdown equivalents...`)
258
+
259
+ pages.forEach((page) => {
260
+ // Ensure attributes object exists
261
+ if (!page.asciidoc) page.asciidoc = {}
262
+ if (!page.asciidoc.attributes) page.asciidoc.attributes = {}
263
+
264
+ // Add marker that UI templates can check
265
+ page.asciidoc.attributes['page-has-markdown'] = ''
266
+ })
267
+ })
268
+
269
+ // Conversion pipeline
270
+ this.on('pagesComposed', async ({ playbook: pb, contentCatalog }) => {
271
+ playbook = pb
272
+ const siteUrl = playbook.site?.url || ''
273
+ const pages = contentCatalog.getPages()
274
+ logger.info(
275
+ `Converting ${pages.length} pages to Markdown${
276
+ siteUrl ? ` (site.url=${siteUrl})` : ''
277
+ }...`
278
+ )
279
+
280
+ const concurrency = Math.max(2, Math.floor(os.cpus().length / 2))
281
+ const queue = [...pages]
282
+ let convertedCount = 0
283
+
284
+ async function processQueue() {
285
+ while (queue.length) {
286
+ const page = queue.shift()
287
+ if (!page?.contents) continue
288
+
289
+ try {
290
+ const html = page.contents.toString().trim()
291
+ if (!html) continue
292
+
293
+ // Extract only the <article class="doc"> portion
294
+ const match = html.match(
295
+ /<article[^>]*class=["'][^"']*\bdoc\b[^"']*["'][^>]*>([\s\S]*?)<\/article>/i
296
+ )
297
+ if (!match || !match[1]) {
298
+ logger.info(`No <article class="doc"> found for ${page.src?.path}`)
299
+ continue
300
+ }
301
+ const articleHtml = match[1]
302
+
303
+ // Convert with Turndown
304
+ const td = createTurndownForPage(page)
305
+ let markdown = td.turndown(articleHtml).trim()
306
+
307
+ // Canonical source link
308
+ let canonicalUrl = ''
309
+ try {
310
+ if (siteUrl && page.pub?.url) {
311
+ const htmlStyle = playbook?.urls?.htmlExtensionStyle
312
+ const isIndexify = htmlStyle === 'indexify'
313
+ const baseUrl = new URL(page.pub.url, siteUrl)
314
+ let pathname = baseUrl.pathname
315
+
316
+ if (isIndexify) {
317
+ const looksLikeDir =
318
+ pathname.endsWith('/') ||
319
+ !path.basename(pathname).includes('.')
320
+ baseUrl.pathname = looksLikeDir
321
+ ? pathname.replace(/\/?$/, '/index.md')
322
+ : pathname.replace(/\.html$/, '.md')
323
+ } else {
324
+ baseUrl.pathname = pathname.replace(/\.html$/, '.md')
325
+ }
326
+
327
+ canonicalUrl = baseUrl.toString()
328
+ }
329
+ } catch (e) {
330
+ logger.debug(
331
+ `Failed to build canonical URL for ${page.src?.path}: ${e.message}`
332
+ )
333
+ }
334
+
335
+ // Prepend Markdown source reference and URL construction hint
336
+ if (canonicalUrl) {
337
+ const urlHint = `<!-- Note for AI: Links in this doc are relative to the current page and use indexify format. Add /index.md to directory-style links for the Markdown version. -->`
338
+
339
+ markdown = `<!-- Source: ${canonicalUrl} -->\n${urlHint}\n\n${markdown}`
340
+ }
341
+
342
+ // Clean up unnecessary whitespace
343
+ if (markdown) {
344
+ // Remove excessive blank lines (more than 2 consecutive newlines)
345
+ markdown = markdown.replace(/\n{3,}/g, '\n\n')
346
+ // Remove trailing whitespace from lines
347
+ markdown = markdown.replace(/[ \t]+$/gm, '')
348
+ // Remove leading/trailing whitespace from the entire document
349
+ markdown = markdown.trim()
350
+ }
351
+
352
+ if (markdown) {
353
+ page.markdownContents = Buffer.from(markdown, 'utf8')
354
+ convertedCount++
355
+ }
356
+ } catch (err) {
357
+ logger.error(
358
+ `Error converting ${page.src?.path || 'unknown'}: ${err.message}`
359
+ )
360
+ logger.debug(err.stack)
361
+ }
362
+ }
363
+ }
364
+
365
+ const workers = Array.from({ length: concurrency }, processQueue)
366
+ await Promise.all(workers)
367
+ logger.info(`Converted ${convertedCount} Markdown files.`)
368
+ })
369
+
370
+ // Add Markdown files to site catalog
371
+ this.on('beforePublish', ({ siteCatalog, contentCatalog }) => {
372
+ const pages = contentCatalog.getPages((p) => p.markdownContents)
373
+ if (!pages.length) {
374
+ logger.info('No Markdown files to publish.')
375
+ return
376
+ }
377
+ logger.info(`Adding ${pages.length} Markdown files to site catalog...`)
378
+ for (const page of pages) {
379
+ const htmlOut = page.out?.path
380
+ if (!htmlOut) continue
381
+ const mdOutPath = htmlOut.replace(/\.html$/, '.md')
382
+ siteCatalog.addFile({
383
+ contents: page.markdownContents,
384
+ out: { path: mdOutPath },
385
+ })
386
+ logger.debug(`Added Markdown: ${mdOutPath}`)
387
+ }
388
+ })
389
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redpanda-data/docs-extensions-and-macros",
3
- "version": "4.10.8",
3
+ "version": "4.11.1",
4
4
  "description": "Antora extensions and macros developed for Redpanda documentation.",
5
5
  "keywords": [
6
6
  "antora",
@@ -43,6 +43,7 @@
43
43
  "./extensions/process-context-switcher": "./extensions/process-context-switcher.js",
44
44
  "./extensions/archive-attachments": "./extensions/archive-attachments.js",
45
45
  "./extensions/add-pages-to-root": "./extensions/add-pages-to-root.js",
46
+ "./extensions/convert-to-markdown": "./extensions/convert-to-markdown.js",
46
47
  "./extensions/collect-bloblang-samples": "./extensions/collect-bloblang-samples.js",
47
48
  "./extensions/compute-end-of-life": "./extensions/compute-end-of-life.js",
48
49
  "./extensions/generate-rp-connect-categories": "./extensions/generate-rp-connect-categories.js",
@@ -85,8 +86,10 @@
85
86
  "@octokit/core": "^6.1.2",
86
87
  "@octokit/plugin-retry": "^7.1.1",
87
88
  "@octokit/rest": "^21.0.1",
89
+ "@redocly/cli": "^2.2.0",
88
90
  "algoliasearch": "^4.17.0",
89
91
  "chalk": "4.1.2",
92
+ "cheerio": "^1.1.2",
90
93
  "commander": "^14.0.0",
91
94
  "gulp": "^4.0.2",
92
95
  "gulp-connect": "^5.7.0",
@@ -103,9 +106,10 @@
103
106
  "sync-request": "^6.1.0",
104
107
  "tar": "^7.4.3",
105
108
  "tree-sitter": "^0.22.4",
109
+ "turndown": "^7.2.2",
110
+ "turndown-plugin-gfm": "^1.0.2",
106
111
  "yaml": "^2.7.1",
107
- "yargs": "^17.7.2",
108
- "@redocly/cli": "^2.2.0"
112
+ "yargs": "^17.7.2"
109
113
  },
110
114
  "devDependencies": {
111
115
  "@antora/cli": "3.1.4",