@redpanda-data/docs-extensions-and-macros 4.10.8 → 4.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
const path = require('path')
|
|
2
|
+
const os = require('os')
|
|
3
|
+
const TurndownService = require('turndown')
|
|
4
|
+
const turndownPluginGfm = require('turndown-plugin-gfm')
|
|
5
|
+
const { gfm } = turndownPluginGfm
|
|
6
|
+
|
|
7
|
+
module.exports.register = function () {
|
|
8
|
+
const logger = this.getLogger('convert-to-markdown-extension')
|
|
9
|
+
let playbook
|
|
10
|
+
|
|
11
|
+
// Shared Turndown configuration
|
|
12
|
+
const baseConfig = {
|
|
13
|
+
headingStyle: 'atx',
|
|
14
|
+
codeBlockStyle: 'fenced',
|
|
15
|
+
bulletListMarker: '-',
|
|
16
|
+
linkReferenceStyle: 'full',
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Factory: create a configured Turndown instance
|
|
20
|
+
function createTurndownBase() {
|
|
21
|
+
const td = new TurndownService(baseConfig)
|
|
22
|
+
td.use(gfm)
|
|
23
|
+
|
|
24
|
+
// Remove unwanted global elements (footers, modals, feedback, etc.)
|
|
25
|
+
td.addRule('remove-unwanted', {
|
|
26
|
+
filter: (node) => {
|
|
27
|
+
if (!node || !node.getAttribute) return false
|
|
28
|
+
|
|
29
|
+
const classAttr = (node.getAttribute('class') || '').toLowerCase()
|
|
30
|
+
const idAttr = (node.getAttribute('id') || '').toLowerCase()
|
|
31
|
+
const tag = node.nodeName.toLowerCase()
|
|
32
|
+
|
|
33
|
+
// Remove by tag
|
|
34
|
+
if (['script', 'style', 'footer', 'nav'].includes(tag)) return true
|
|
35
|
+
|
|
36
|
+
// Remove tracking or hidden images
|
|
37
|
+
if (
|
|
38
|
+
tag === 'img' &&
|
|
39
|
+
(classAttr.includes('tracking') ||
|
|
40
|
+
idAttr.includes('scarf') ||
|
|
41
|
+
node.getAttribute('role') === 'presentation' ||
|
|
42
|
+
node.style?.display === 'none')
|
|
43
|
+
) {
|
|
44
|
+
return true
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Remove by class or id
|
|
48
|
+
const toRemove = [
|
|
49
|
+
'thumbs',
|
|
50
|
+
'back-to-top',
|
|
51
|
+
'contributors-modal',
|
|
52
|
+
'feedback-section',
|
|
53
|
+
'feedback-toast',
|
|
54
|
+
'pagination',
|
|
55
|
+
'footer',
|
|
56
|
+
'nav-expand',
|
|
57
|
+
'banner-container',
|
|
58
|
+
'markdown-dropdown',
|
|
59
|
+
]
|
|
60
|
+
return toRemove.some(
|
|
61
|
+
(x) => classAttr.includes(x) || idAttr.includes(x)
|
|
62
|
+
)
|
|
63
|
+
},
|
|
64
|
+
replacement: () => '',
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
// Keep critical content blocks only
|
|
68
|
+
td.keep(['div.openblock.tabs', 'article.doc'])
|
|
69
|
+
return td
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Factory: create page-specific Turndown converter
|
|
73
|
+
function createTurndownForPage(page) {
|
|
74
|
+
const outerTurndown = createTurndownBase()
|
|
75
|
+
const nestedTurndown = createTurndownBase()
|
|
76
|
+
|
|
77
|
+
// Helper to add custom rules
|
|
78
|
+
function addCustomRules(turndownInstance, isInner = false) {
|
|
79
|
+
// Determine heading depth for tab conversion
|
|
80
|
+
function findNearestHeadingLevel(el) {
|
|
81
|
+
let current = el.previousElementSibling
|
|
82
|
+
while (current) {
|
|
83
|
+
if (/^H[1-6]$/i.test(current.nodeName))
|
|
84
|
+
return parseInt(current.nodeName.substring(1))
|
|
85
|
+
current = current.previousElementSibling
|
|
86
|
+
}
|
|
87
|
+
let parent = el.parentElement
|
|
88
|
+
while (parent) {
|
|
89
|
+
const headings = Array.from(
|
|
90
|
+
parent.querySelectorAll('h1,h2,h3,h4,h5,h6')
|
|
91
|
+
)
|
|
92
|
+
if (headings.length > 0) {
|
|
93
|
+
const last = headings[headings.length - 1]
|
|
94
|
+
return parseInt(last.nodeName.substring(1))
|
|
95
|
+
}
|
|
96
|
+
parent = parent.parentElement
|
|
97
|
+
}
|
|
98
|
+
return 2
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Asciidoctor tab conversion
|
|
102
|
+
turndownInstance.addRule('asciidoctor-tabs', {
|
|
103
|
+
filter: (node) => {
|
|
104
|
+
if (node.nodeName !== 'DIV') return false
|
|
105
|
+
const classAttr = node.getAttribute?.('class') || node.className || ''
|
|
106
|
+
return classAttr.includes('openblock') && classAttr.includes('tabs')
|
|
107
|
+
},
|
|
108
|
+
replacement: function (_, node) {
|
|
109
|
+
function processTabGroup(group, parentHeadingLevel = null) {
|
|
110
|
+
const contentDiv = group.querySelector('.content') || group
|
|
111
|
+
const tabList = contentDiv.querySelectorAll('li.tab')
|
|
112
|
+
if (!tabList.length) return ''
|
|
113
|
+
|
|
114
|
+
const nearestLevel =
|
|
115
|
+
parentHeadingLevel != null
|
|
116
|
+
? parentHeadingLevel + 1
|
|
117
|
+
: findNearestHeadingLevel(group) + 1
|
|
118
|
+
const tabHeadingLevel = Math.min(nearestLevel, 6)
|
|
119
|
+
const headingPrefix = '#'.repeat(tabHeadingLevel)
|
|
120
|
+
|
|
121
|
+
let markdown = ''
|
|
122
|
+
tabList.forEach((tab) => {
|
|
123
|
+
const title =
|
|
124
|
+
tab.querySelector('p')?.textContent.trim() ||
|
|
125
|
+
tab.textContent.trim()
|
|
126
|
+
|
|
127
|
+
let panelId = tab.getAttribute('aria-controls')
|
|
128
|
+
if (!panelId && tab.id) panelId = tab.id + '--panel'
|
|
129
|
+
const panel = group.querySelector(`#${panelId}`)
|
|
130
|
+
if (!panel) return
|
|
131
|
+
|
|
132
|
+
const nestedTabs = panel.querySelectorAll('.openblock.tabs')
|
|
133
|
+
let nestedMdCombined = ''
|
|
134
|
+
nestedTabs.forEach((nested) => {
|
|
135
|
+
nestedMdCombined +=
|
|
136
|
+
'\n' + processTabGroup(nested, tabHeadingLevel) + '\n'
|
|
137
|
+
nested.remove()
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
const innerHtml = panel.innerHTML || ''
|
|
141
|
+
let md = ''
|
|
142
|
+
try {
|
|
143
|
+
const converter = isInner ? nestedTurndown : turndownInstance
|
|
144
|
+
md = converter.turndown(innerHtml)
|
|
145
|
+
} catch (e) {
|
|
146
|
+
logger.warn(`Turndown failed in nested tab: ${e.message}`)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
markdown += `${headingPrefix} ${title}\n\n${md.trim()}\n${nestedMdCombined.trim()}\n\n`
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
return markdown.trim()
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return '\n' + processTabGroup(node, null) + '\n'
|
|
156
|
+
},
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
// Admonition block conversion
|
|
160
|
+
turndownInstance.addRule('admonition', {
|
|
161
|
+
filter: (node) =>
|
|
162
|
+
node.nodeName === 'TABLE' &&
|
|
163
|
+
node.querySelector('td.icon') &&
|
|
164
|
+
node.querySelector('td.content'),
|
|
165
|
+
replacement: function (_, node) {
|
|
166
|
+
const iconCell = node.querySelector('td.icon')
|
|
167
|
+
const contentCell = node.querySelector('td.content')
|
|
168
|
+
if (!iconCell || !contentCell) return ''
|
|
169
|
+
|
|
170
|
+
const iconEl = iconCell.querySelector('i')
|
|
171
|
+
const classAttr = iconEl?.className || ''
|
|
172
|
+
const match = classAttr.match(/icon-([a-z]+)/i)
|
|
173
|
+
const type = match ? match[1].toUpperCase() : 'NOTE'
|
|
174
|
+
|
|
175
|
+
const titleEl =
|
|
176
|
+
node.querySelector('.title') ||
|
|
177
|
+
contentCell.querySelector('.title') ||
|
|
178
|
+
iconEl?.getAttribute('title')
|
|
179
|
+
const customTitle =
|
|
180
|
+
typeof titleEl === 'string'
|
|
181
|
+
? titleEl.trim()
|
|
182
|
+
: titleEl?.textContent?.trim() || ''
|
|
183
|
+
|
|
184
|
+
const emojiMap = {
|
|
185
|
+
CAUTION: '⚠️',
|
|
186
|
+
WARNING: '⚠️',
|
|
187
|
+
TIP: '💡',
|
|
188
|
+
NOTE: '📝',
|
|
189
|
+
IMPORTANT: '❗',
|
|
190
|
+
}
|
|
191
|
+
const emoji = emojiMap[type] || '📘'
|
|
192
|
+
|
|
193
|
+
const innerHtml = contentCell.innerHTML || ''
|
|
194
|
+
let innerMd = ''
|
|
195
|
+
try {
|
|
196
|
+
const converter = isInner ? nestedTurndown : turndownInstance
|
|
197
|
+
innerMd = converter.turndown(innerHtml).trim()
|
|
198
|
+
} catch (e) {
|
|
199
|
+
logger.warn(`Turndown failed in admonition: ${e.message}`)
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const titleLower = customTitle.toLowerCase()
|
|
203
|
+
const typeLower = type.toLowerCase()
|
|
204
|
+
const header =
|
|
205
|
+
customTitle && titleLower !== typeLower
|
|
206
|
+
? `${emoji} **${type}: ${customTitle}**`
|
|
207
|
+
: `${emoji} **${type}**`
|
|
208
|
+
|
|
209
|
+
const quoted = innerMd
|
|
210
|
+
.split('\n')
|
|
211
|
+
.map((line) => (line.startsWith('>') ? line : `> ${line}`))
|
|
212
|
+
.join('\n')
|
|
213
|
+
|
|
214
|
+
return `\n> ${header}\n>\n${quoted}\n`
|
|
215
|
+
},
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
// Markdown table conversion
|
|
219
|
+
turndownInstance.addRule('tables', {
|
|
220
|
+
filter: (node) => {
|
|
221
|
+
if (node.nodeName !== 'TABLE') return false
|
|
222
|
+
if (node.querySelector('td.icon') && node.querySelector('td.content'))
|
|
223
|
+
return false
|
|
224
|
+
return true
|
|
225
|
+
},
|
|
226
|
+
replacement: function (content, node) {
|
|
227
|
+
const rows = Array.from(node.querySelectorAll('tr'))
|
|
228
|
+
if (!rows.length) return content
|
|
229
|
+
const tableRows = []
|
|
230
|
+
rows.forEach((row, index) => {
|
|
231
|
+
const cells = Array.from(row.querySelectorAll('th, td'))
|
|
232
|
+
const cellContents = cells.map((cell) =>
|
|
233
|
+
(cell.textContent || '').trim().replace(/\s+/g, ' ')
|
|
234
|
+
)
|
|
235
|
+
if (!cellContents.length) return
|
|
236
|
+
const rowLine = '| ' + cellContents.join(' | ') + ' |'
|
|
237
|
+
tableRows.push(rowLine)
|
|
238
|
+
if (index === 0) {
|
|
239
|
+
const separator =
|
|
240
|
+
'| ' + cellContents.map(() => '---').join(' | ') + ' |'
|
|
241
|
+
tableRows.push(separator)
|
|
242
|
+
}
|
|
243
|
+
})
|
|
244
|
+
return '\n' + tableRows.join('\n') + '\n'
|
|
245
|
+
},
|
|
246
|
+
})
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
addCustomRules(outerTurndown, false)
|
|
250
|
+
addCustomRules(nestedTurndown, true)
|
|
251
|
+
return outerTurndown
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Add marker attribute before UI rendering so templates can detect markdown availability
|
|
255
|
+
this.on('documentsConverted', ({ contentCatalog }) => {
|
|
256
|
+
const pages = contentCatalog.findBy({ family: 'page' })
|
|
257
|
+
logger.info(`Marking ${pages.length} pages as having markdown equivalents...`)
|
|
258
|
+
|
|
259
|
+
pages.forEach((page) => {
|
|
260
|
+
// Ensure attributes object exists
|
|
261
|
+
if (!page.asciidoc) page.asciidoc = {}
|
|
262
|
+
if (!page.asciidoc.attributes) page.asciidoc.attributes = {}
|
|
263
|
+
|
|
264
|
+
// Add marker that UI templates can check
|
|
265
|
+
page.asciidoc.attributes['page-has-markdown'] = ''
|
|
266
|
+
})
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
// Conversion pipeline
|
|
270
|
+
this.on('pagesComposed', async ({ playbook: pb, contentCatalog }) => {
|
|
271
|
+
playbook = pb
|
|
272
|
+
const siteUrl = playbook.site?.url || ''
|
|
273
|
+
const pages = contentCatalog.getPages()
|
|
274
|
+
logger.info(
|
|
275
|
+
`Converting ${pages.length} pages to Markdown${
|
|
276
|
+
siteUrl ? ` (site.url=${siteUrl})` : ''
|
|
277
|
+
}...`
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
const concurrency = Math.max(2, Math.floor(os.cpus().length / 2))
|
|
281
|
+
const queue = [...pages]
|
|
282
|
+
let convertedCount = 0
|
|
283
|
+
|
|
284
|
+
async function processQueue() {
|
|
285
|
+
while (queue.length) {
|
|
286
|
+
const page = queue.shift()
|
|
287
|
+
if (!page?.contents) continue
|
|
288
|
+
|
|
289
|
+
try {
|
|
290
|
+
const html = page.contents.toString().trim()
|
|
291
|
+
if (!html) continue
|
|
292
|
+
|
|
293
|
+
// Extract only the <article class="doc"> portion
|
|
294
|
+
const match = html.match(
|
|
295
|
+
/<article[^>]*class=["'][^"']*\bdoc\b[^"']*["'][^>]*>([\s\S]*?)<\/article>/i
|
|
296
|
+
)
|
|
297
|
+
if (!match || !match[1]) {
|
|
298
|
+
logger.info(`No <article class="doc"> found for ${page.src?.path}`)
|
|
299
|
+
continue
|
|
300
|
+
}
|
|
301
|
+
const articleHtml = match[1]
|
|
302
|
+
|
|
303
|
+
// Convert with Turndown
|
|
304
|
+
const td = createTurndownForPage(page)
|
|
305
|
+
let markdown = td.turndown(articleHtml).trim()
|
|
306
|
+
|
|
307
|
+
// Canonical source link
|
|
308
|
+
let canonicalUrl = ''
|
|
309
|
+
try {
|
|
310
|
+
if (siteUrl && page.pub?.url) {
|
|
311
|
+
const htmlStyle = playbook?.urls?.htmlExtensionStyle
|
|
312
|
+
const isIndexify = htmlStyle === 'indexify'
|
|
313
|
+
const baseUrl = new URL(page.pub.url, siteUrl)
|
|
314
|
+
let pathname = baseUrl.pathname
|
|
315
|
+
|
|
316
|
+
if (isIndexify) {
|
|
317
|
+
const looksLikeDir =
|
|
318
|
+
pathname.endsWith('/') ||
|
|
319
|
+
!path.basename(pathname).includes('.')
|
|
320
|
+
baseUrl.pathname = looksLikeDir
|
|
321
|
+
? pathname.replace(/\/?$/, '/index.md')
|
|
322
|
+
: pathname.replace(/\.html$/, '.md')
|
|
323
|
+
} else {
|
|
324
|
+
baseUrl.pathname = pathname.replace(/\.html$/, '.md')
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
canonicalUrl = baseUrl.toString()
|
|
328
|
+
}
|
|
329
|
+
} catch (e) {
|
|
330
|
+
logger.debug(
|
|
331
|
+
`Failed to build canonical URL for ${page.src?.path}: ${e.message}`
|
|
332
|
+
)
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Prepend Markdown source reference and URL construction hint
|
|
336
|
+
if (canonicalUrl) {
|
|
337
|
+
const urlHint = `<!-- Note for AI: Links in this doc are relative to the current page and use indexify format. Add /index.md to directory-style links for the Markdown version. -->`
|
|
338
|
+
|
|
339
|
+
markdown = `<!-- Source: ${canonicalUrl} -->\n${urlHint}\n\n${markdown}`
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Clean up unnecessary whitespace
|
|
343
|
+
if (markdown) {
|
|
344
|
+
// Remove excessive blank lines (more than 2 consecutive newlines)
|
|
345
|
+
markdown = markdown.replace(/\n{3,}/g, '\n\n')
|
|
346
|
+
// Remove trailing whitespace from lines
|
|
347
|
+
markdown = markdown.replace(/[ \t]+$/gm, '')
|
|
348
|
+
// Remove leading/trailing whitespace from the entire document
|
|
349
|
+
markdown = markdown.trim()
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
if (markdown) {
|
|
353
|
+
page.markdownContents = Buffer.from(markdown, 'utf8')
|
|
354
|
+
convertedCount++
|
|
355
|
+
}
|
|
356
|
+
} catch (err) {
|
|
357
|
+
logger.error(
|
|
358
|
+
`Error converting ${page.src?.path || 'unknown'}: ${err.message}`
|
|
359
|
+
)
|
|
360
|
+
logger.debug(err.stack)
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
const workers = Array.from({ length: concurrency }, processQueue)
|
|
366
|
+
await Promise.all(workers)
|
|
367
|
+
logger.info(`Converted ${convertedCount} Markdown files.`)
|
|
368
|
+
})
|
|
369
|
+
|
|
370
|
+
// Add Markdown files to site catalog
|
|
371
|
+
this.on('beforePublish', ({ siteCatalog, contentCatalog }) => {
|
|
372
|
+
const pages = contentCatalog.getPages((p) => p.markdownContents)
|
|
373
|
+
if (!pages.length) {
|
|
374
|
+
logger.info('No Markdown files to publish.')
|
|
375
|
+
return
|
|
376
|
+
}
|
|
377
|
+
logger.info(`Adding ${pages.length} Markdown files to site catalog...`)
|
|
378
|
+
for (const page of pages) {
|
|
379
|
+
const htmlOut = page.out?.path
|
|
380
|
+
if (!htmlOut) continue
|
|
381
|
+
const mdOutPath = htmlOut.replace(/\.html$/, '.md')
|
|
382
|
+
siteCatalog.addFile({
|
|
383
|
+
contents: page.markdownContents,
|
|
384
|
+
out: { path: mdOutPath },
|
|
385
|
+
})
|
|
386
|
+
logger.debug(`Added Markdown: ${mdOutPath}`)
|
|
387
|
+
}
|
|
388
|
+
})
|
|
389
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@redpanda-data/docs-extensions-and-macros",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.11.1",
|
|
4
4
|
"description": "Antora extensions and macros developed for Redpanda documentation.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"antora",
|
|
@@ -43,6 +43,7 @@
|
|
|
43
43
|
"./extensions/process-context-switcher": "./extensions/process-context-switcher.js",
|
|
44
44
|
"./extensions/archive-attachments": "./extensions/archive-attachments.js",
|
|
45
45
|
"./extensions/add-pages-to-root": "./extensions/add-pages-to-root.js",
|
|
46
|
+
"./extensions/convert-to-markdown": "./extensions/convert-to-markdown.js",
|
|
46
47
|
"./extensions/collect-bloblang-samples": "./extensions/collect-bloblang-samples.js",
|
|
47
48
|
"./extensions/compute-end-of-life": "./extensions/compute-end-of-life.js",
|
|
48
49
|
"./extensions/generate-rp-connect-categories": "./extensions/generate-rp-connect-categories.js",
|
|
@@ -85,8 +86,10 @@
|
|
|
85
86
|
"@octokit/core": "^6.1.2",
|
|
86
87
|
"@octokit/plugin-retry": "^7.1.1",
|
|
87
88
|
"@octokit/rest": "^21.0.1",
|
|
89
|
+
"@redocly/cli": "^2.2.0",
|
|
88
90
|
"algoliasearch": "^4.17.0",
|
|
89
91
|
"chalk": "4.1.2",
|
|
92
|
+
"cheerio": "^1.1.2",
|
|
90
93
|
"commander": "^14.0.0",
|
|
91
94
|
"gulp": "^4.0.2",
|
|
92
95
|
"gulp-connect": "^5.7.0",
|
|
@@ -103,9 +106,10 @@
|
|
|
103
106
|
"sync-request": "^6.1.0",
|
|
104
107
|
"tar": "^7.4.3",
|
|
105
108
|
"tree-sitter": "^0.22.4",
|
|
109
|
+
"turndown": "^7.2.2",
|
|
110
|
+
"turndown-plugin-gfm": "^1.0.2",
|
|
106
111
|
"yaml": "^2.7.1",
|
|
107
|
-
"yargs": "^17.7.2"
|
|
108
|
-
"@redocly/cli": "^2.2.0"
|
|
112
|
+
"yargs": "^17.7.2"
|
|
109
113
|
},
|
|
110
114
|
"devDependencies": {
|
|
111
115
|
"@antora/cli": "3.1.4",
|