@redpanda-data/docs-extensions-and-macros 4.13.1 → 4.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/bin/doc-tools-mcp.js +16 -4
  2. package/bin/doc-tools.js +768 -2089
  3. package/bin/mcp-tools/generated-docs-review.js +2 -2
  4. package/bin/mcp-tools/mcp-validation.js +1 -1
  5. package/bin/mcp-tools/openapi.js +2 -2
  6. package/bin/mcp-tools/property-docs.js +18 -0
  7. package/bin/mcp-tools/rpcn-docs.js +28 -3
  8. package/cli-utils/antora-utils.js +53 -2
  9. package/cli-utils/dependencies.js +313 -0
  10. package/cli-utils/diff-utils.js +273 -0
  11. package/cli-utils/doc-tools-utils.js +54 -0
  12. package/extensions/algolia-indexer/generate-index.js +134 -102
  13. package/extensions/algolia-indexer/index.js +70 -38
  14. package/extensions/collect-bloblang-samples.js +2 -1
  15. package/extensions/generate-rp-connect-categories.js +125 -67
  16. package/extensions/generate-rp-connect-info.js +291 -137
  17. package/macros/rp-connect-components.js +34 -5
  18. package/package.json +4 -3
  19. package/tools/add-commercial-names.js +207 -0
  20. package/tools/bundle-openapi.js +1 -1
  21. package/tools/generate-cli-docs.js +6 -2
  22. package/tools/get-console-version.js +5 -0
  23. package/tools/get-redpanda-version.js +5 -0
  24. package/tools/property-extractor/compare-properties.js +3 -3
  25. package/tools/property-extractor/generate-handlebars-docs.js +14 -14
  26. package/tools/property-extractor/generate-pr-summary.js +46 -0
  27. package/tools/property-extractor/pr-summary-formatter.js +375 -0
  28. package/tools/redpanda-connect/README.adoc +403 -38
  29. package/tools/redpanda-connect/connector-binary-analyzer.js +588 -0
  30. package/tools/redpanda-connect/generate-rpcn-connector-docs.js +97 -34
  31. package/tools/redpanda-connect/parse-csv-connectors.js +1 -1
  32. package/tools/redpanda-connect/pr-summary-formatter.js +663 -0
  33. package/tools/redpanda-connect/report-delta.js +70 -2
  34. package/tools/redpanda-connect/rpcn-connector-docs-handler.js +1279 -0
  35. package/tools/redpanda-connect/templates/connector.hbs +38 -0
  36. package/tools/redpanda-connect/templates/intro.hbs +0 -20
  37. package/tools/redpanda-connect/update-nav.js +216 -0
@@ -1,10 +1,8 @@
1
1
  'use strict'
2
2
 
3
- const { parse } = require('node-html-parser')
4
- const { decode } = require('html-entities')
5
- const path = require('path')
6
- const URL = require('url')
7
- const chalk = require('chalk')
3
+ const { parse } = require('node-html-parser')
4
+ const { decode } = require('html-entities')
5
+ const path = require('path')
8
6
 
9
7
  // Create encoder once at module scope for efficiency
10
8
  const textEncoder = new TextEncoder()
@@ -20,16 +18,22 @@ const textEncoder = new TextEncoder()
20
18
  * @param {Object} contentCatalog - The Antora content catalog, with pages and metadata.
21
19
  * @param {Object} [config={}] - Configuration options
22
20
  * @param {Boolean} config.indexLatestOnly - If true, only index the latest version of any given page.
21
+ * @param {Array} config.excludes - CSS selectors for elements to exclude from indexing.
23
22
  * @param {Object} config.logger - Logger to use
24
23
  * @typedef {Object} SearchIndexData
25
24
  * @returns {SearchIndexData} A data object that contains the Algolia index
26
25
  */
27
26
  function generateIndex (playbook, contentCatalog, { indexLatestOnly = false, excludes = [], logger } = {}) {
28
- if (!logger) logger = process.env.NODE_ENV === 'test' ? { info: () => undefined } : console
27
+ // Use provided logger or create a no-op logger for tests
28
+ if (!logger) {
29
+ logger = process.env.NODE_ENV === 'test'
30
+ ? { info: () => {}, warn: () => {}, error: () => {}, debug: () => {} }
31
+ : console
32
+ }
29
33
 
30
34
  const algolia = {}
31
35
 
32
- console.log(chalk.cyan('Indexing...'))
36
+ logger.info('Starting Algolia index generation...')
33
37
  const unixTimestamp = Math.floor(Date.now() / 1000)
34
38
 
35
39
  // Select indexable pages
@@ -37,37 +41,31 @@ function generateIndex (playbook, contentCatalog, { indexLatestOnly = false, exc
37
41
  if (!page.out || page.asciidoc?.attributes?.noindex != null) return
38
42
  return {}
39
43
  })
40
- if (!pages.length) return {}
41
44
 
42
- // Handle the site URL
43
- let siteUrl = playbook.site.url
44
- if (!siteUrl) {
45
- siteUrl = ''
45
+ if (!pages.length) {
46
+ logger.warn('No pages found to index')
47
+ return {}
46
48
  }
47
- if (siteUrl.charAt(siteUrl.length - 1) === '/') {
48
- siteUrl = siteUrl.substr(0, siteUrl.length - 1)
49
+
50
+ // Handle the site URL
51
+ let siteUrl = playbook.site.url || ''
52
+ if (siteUrl.endsWith('/')) {
53
+ siteUrl = siteUrl.slice(0, -1)
49
54
  }
50
55
  const urlPath = extractUrlPath(siteUrl)
51
56
 
52
- var algoliaCount = 0
57
+ let algoliaCount = 0
53
58
 
54
- for (var i = 0; i < pages.length; i++) {
55
- const page = pages[i]
59
+ for (const page of pages) {
56
60
  const root = parse(
57
61
  page.contents,
58
62
  {
59
63
  blockTextElements: {
60
- code: true,
61
- },
64
+ code: true
65
+ }
62
66
  }
63
67
  )
64
68
 
65
- /* Skip pages marked as "noindex" for "robots"
66
- const noindex = root.querySelector('meta[name=robots][content=noindex]')
67
- if (noindex) {
68
- continue
69
- }*/
70
-
71
69
  // Compute a flag identifying if the current page is in the
72
70
  // "current" component version.
73
71
  // When indexLatestOnly is set, we only index the current version.
@@ -78,23 +76,25 @@ function generateIndex (playbook, contentCatalog, { indexLatestOnly = false, exc
78
76
 
79
77
  if (indexLatestOnly && !isCurrent) continue
80
78
 
81
- // capture the component name and version
79
+ // Capture the component name and version
82
80
  const cname = component.name
83
- const version = page.src.origin.descriptor.prerelease ? page.src.origin.descriptor.displayVersion : page.src.version;
84
-
85
- // handle the page keywords
86
- const kw = root.querySelector('meta[name=keywords]')
87
- var keywords = []
88
- if (kw) {
89
- keywords = kw.getAttribute('content')
90
- keywords = keywords ? keywords.split(/,\s*/) : []
81
+ const version = page.src.origin?.descriptor?.prerelease
82
+ ? page.src.origin.descriptor.displayVersion
83
+ : page.src.version
84
+
85
+ // Handle the page keywords
86
+ const kwElement = root.querySelector('meta[name=keywords]')
87
+ let keywords = []
88
+ if (kwElement) {
89
+ const kwContent = kwElement.getAttribute('content')
90
+ keywords = kwContent ? kwContent.split(/,\s*/) : []
91
91
  }
92
92
 
93
- // gather page breadcrumbs
93
+ // Gather page breadcrumbs
94
94
  const breadcrumbs = []
95
95
  root.querySelectorAll('nav.breadcrumbs > ul > li a')
96
96
  .forEach((elem) => {
97
- var url = path.resolve(
97
+ const url = path.resolve(
98
98
  path.join('/', page.out.dirname),
99
99
  elem.getAttribute('href')
100
100
  )
@@ -111,7 +111,7 @@ function generateIndex (playbook, contentCatalog, { indexLatestOnly = false, exc
111
111
  continue
112
112
  }
113
113
 
114
- // handle titles
114
+ // Handle titles
115
115
  const h1 = article.querySelector('h1')
116
116
  if (!h1) {
117
117
  logger.warn(`No H1 in ${page.pub.url}...skipping`)
@@ -122,148 +122,180 @@ function generateIndex (playbook, contentCatalog, { indexLatestOnly = false, exc
122
122
 
123
123
  const titles = []
124
124
  article.querySelectorAll('h2,h3,h4,h5,h6').forEach((title) => {
125
- var id = title.getAttribute('id')
125
+ const id = title.getAttribute('id')
126
126
  if (id) {
127
127
  titles.push({
128
128
  t: title.text,
129
- h: id,
129
+ h: id
130
130
  })
131
131
  }
132
132
  title.remove()
133
133
  })
134
134
 
135
- // exclude elements within the article that should not be indexed
136
- excludes.forEach((excl) => {
137
- if (!excl) return
138
- article.querySelectorAll(excl).map((e) => e.remove())
139
- })
135
+ // Exclude elements within the article that should not be indexed
136
+ for (const excl of excludes) {
137
+ if (!excl) continue
138
+ article.querySelectorAll(excl).forEach((e) => e.remove())
139
+ }
140
140
 
141
- var intro = article.querySelector('p');
142
- // decode any HTML entities
143
- intro = decode(intro.rawText);
141
+ // FIXED: Handle potential null intro element
142
+ const introElement = article.querySelector('p')
143
+ const intro = introElement ? decode(introElement.rawText) : ''
144
144
 
145
- // establish structure in the Algolia index
145
+ // Establish structure in the Algolia index
146
146
  if (!(cname in algolia)) algolia[cname] = {}
147
147
  if (!(version in algolia[cname])) algolia[cname][version] = []
148
148
 
149
149
  // Check if this is a properties reference page (or has many titles)
150
- const isPropertiesPage = page.pub.url.includes('/properties/') || titles.length > 30;
150
+ const isPropertiesPage = page.pub.url.includes('/properties/') || titles.length > 30
151
151
 
152
152
  // Handle the article text
153
- let text = '';
153
+ let text = ''
154
154
 
155
155
  if (!isPropertiesPage) {
156
156
  // For normal pages, index full text content
157
- const contentElements = article.querySelectorAll('p, table, li');
158
- let contentText = '';
159
- let currentSize = 0;
157
+ const contentElements = article.querySelectorAll('p, table, li')
158
+ let contentText = ''
159
+ let currentSize = 0
160
160
  // Maximum size in bytes (Algolia's limit is 100KB, using 50KB for safety)
161
- const MAX_SIZE = 50000;
162
-
161
+ const MAX_SIZE = 50000
162
+
163
163
  for (const element of contentElements) {
164
- let elementText = '';
164
+ let elementText = ''
165
165
  if (element.tagName === 'TABLE') {
166
166
  for (const tr of element.querySelectorAll('tr')) {
167
167
  for (const cell of tr.querySelectorAll('td, th')) {
168
- elementText += cell.textContent + ' ';
168
+ elementText += cell.textContent + ' '
169
169
  }
170
170
  }
171
171
  } else {
172
- elementText = element.textContent;
172
+ elementText = element.textContent
173
173
  }
174
-
175
- const elementSize = textEncoder.encode(elementText).length;
174
+
175
+ const elementSize = textEncoder.encode(elementText).length
176
176
  if (currentSize + elementSize > MAX_SIZE) {
177
- break;
177
+ break
178
178
  }
179
-
180
- contentText += elementText;
181
- currentSize += elementSize;
179
+
180
+ contentText += elementText
181
+ currentSize += elementSize
182
182
  }
183
-
183
+
184
184
  text = contentText.replace(/\n/g, ' ')
185
185
  .replace(/\r/g, ' ')
186
186
  .replace(/\s+/g, ' ')
187
- .trim();
187
+ .trim()
188
188
  } else {
189
189
  // For long pages, only use intro as text (property names are already in titles array)
190
- text = intro;
191
- logger.info(`Skipping full text indexing for long page: ${page.pub.url} (${titles.length} properties)`);
190
+ text = intro
191
+ logger.info(`Skipping full text indexing for long page: ${page.pub.url} (${titles.length} properties)`)
192
192
  }
193
193
 
194
- let tag;
195
- const title = (component.title || '').trim();
194
+ let tag
195
+ const title = (component.title || '').trim()
196
196
  if (title.toLowerCase() === 'home') {
197
- // Collect all unique component titles except 'home', 'shared', 'search' using public API when available
197
+ // Collect all unique component titles except 'home', 'shared', 'search'
198
198
  const componentsList = typeof contentCatalog.getComponents === 'function'
199
199
  ? contentCatalog.getComponents()
200
200
  : Array.isArray(contentCatalog.components)
201
201
  ? contentCatalog.components
202
- : Object.values(contentCatalog.components || contentCatalog._components || {});
202
+ : Object.values(contentCatalog.components || contentCatalog._components || {})
203
+
203
204
  // Find the latest version for Self-Managed (component title: 'Self-Managed')
204
- let latestVersion = undefined;
205
- const selfManaged = componentsList.find(c => (c.title || '').trim().toLowerCase() === 'self-managed');
206
- if (selfManaged && selfManaged.latest && selfManaged.latest.version) {
207
- latestVersion = selfManaged.latest.version;
208
- if (latestVersion && !/^v/.test(latestVersion)) latestVersion = 'v' + latestVersion;
205
+ let selfManagedLatestVersion
206
+ const selfManaged = componentsList.find(c => (c.title || '').trim().toLowerCase() === 'self-managed')
207
+ if (selfManaged?.latest?.version) {
208
+ selfManagedLatestVersion = selfManaged.latest.version
209
+ if (selfManagedLatestVersion && !/^v/.test(selfManagedLatestVersion)) {
210
+ selfManagedLatestVersion = 'v' + selfManagedLatestVersion
211
+ }
209
212
  }
213
+
210
214
  const allComponentTitles = componentsList
211
215
  .map(c => (c.title || '').trim())
212
- .filter(t => t && !['home', 'shared', 'search'].includes(t.toLowerCase()));
216
+ .filter(t => t && !['home', 'shared', 'search'].includes(t.toLowerCase()))
217
+
213
218
  if (!allComponentTitles.length) {
214
- throw new Error('No component titles found for "home" page. Indexing aborted.');
219
+ throw new Error('No component titles found for "home" page. Indexing aborted.')
215
220
  }
216
- tag = [...new Set(allComponentTitles)];
221
+
222
+ tag = [...new Set(allComponentTitles)]
217
223
  // For Self-Managed, append v<latest-version> to the tag
218
- if (latestVersion) {
219
- tag = tag.map(t => t.toLowerCase() === 'self-managed' ? `${t} ${latestVersion}` : t);
224
+ if (selfManagedLatestVersion) {
225
+ tag = tag.map(t => t.toLowerCase() === 'self-managed' ? `${t} ${selfManagedLatestVersion}` : t)
220
226
  }
221
227
  } else {
222
- tag = `${title}${version ? ' v' + version : ''}`;
228
+ tag = `${title}${version ? ' v' + version : ''}`
223
229
  }
224
- var indexItem;
225
- const deployment = page.asciidoc?.attributes['env-kubernetes'] ? 'Kubernetes' : page.asciidoc?.attributes['env-linux'] ? 'Linux' : page.asciidoc?.attributes['env-docker'] ? 'Docker' : page.asciidoc?.attributes['page-cloud'] ? 'Redpanda Cloud' : ''
230
+
231
+ const deployment = page.asciidoc?.attributes['env-kubernetes']
232
+ ? 'Kubernetes'
233
+ : page.asciidoc?.attributes['env-linux']
234
+ ? 'Linux'
235
+ : page.asciidoc?.attributes['env-docker']
236
+ ? 'Docker'
237
+ : page.asciidoc?.attributes['page-cloud']
238
+ ? 'Redpanda Cloud'
239
+ : ''
226
240
 
227
241
  const categories = page.asciidoc?.attributes['page-categories']
228
- ? page.asciidoc.attributes['page-categories'].split(',').map(category => category.trim())
229
- : []
242
+ ? page.asciidoc.attributes['page-categories'].split(',').map(category => category.trim())
243
+ : []
230
244
 
231
- var indexItem = {
245
+ const commercialNames = page.asciidoc?.attributes['page-commercial-names']
246
+ ? page.asciidoc.attributes['page-commercial-names'].split(',').map(name => name.trim())
247
+ : []
248
+
249
+ // FIXED: keywords now included in index item
250
+ const indexItem = {
232
251
  title: documentTitle,
233
252
  version: version,
234
253
  text: text,
235
254
  intro: intro,
236
255
  objectID: urlPath + page.pub.url,
237
256
  titles: titles,
257
+ keywords: keywords,
238
258
  categories: categories,
239
- unixTimestamp: unixTimestamp,
259
+ commercialNames: commercialNames,
260
+ unixTimestamp: unixTimestamp
240
261
  }
241
262
 
242
263
  if (component.name !== 'redpanda-labs') {
243
- indexItem.product = component.title;
244
- indexItem.breadcrumbs = breadcrumbs;
245
- indexItem.type = 'Doc';
246
- indexItem._tags = Array.isArray(tag) ? tag : [tag];
264
+ indexItem.product = component.title
265
+ indexItem.breadcrumbs = breadcrumbs
266
+ indexItem.type = 'Doc'
267
+ indexItem._tags = Array.isArray(tag) ? tag : [tag]
247
268
  } else {
248
- indexItem.deployment = deployment;
249
- indexItem.type = 'Lab';
250
- indexItem.interactive = false;
251
- indexItem._tags = Array.isArray(tag) ? tag : [tag];
269
+ indexItem.deployment = deployment
270
+ indexItem.type = 'Lab'
271
+ indexItem.interactive = false
272
+ indexItem._tags = Array.isArray(tag) ? tag : [tag]
252
273
  }
274
+
253
275
  algolia[cname][version].push(indexItem)
254
276
  algoliaCount++
255
277
  }
278
+
279
+ logger.info(`Indexed ${algoliaCount} pages`)
256
280
  return algolia
257
281
  }
258
282
 
259
- // Extract the path from a URL
283
+ /**
284
+ * Extract the path from a URL
285
+ * @param {string} url - The URL to extract path from
286
+ * @returns {string} The URL path
287
+ */
260
288
  function extractUrlPath (url) {
261
- if (url) {
262
- if (url.charAt() === '/') return url
263
- const urlPath = URL.parse(url).pathname
289
+ if (!url) return ''
290
+ if (url.charAt(0) === '/') return url
291
+
292
+ try {
293
+ // FIXED: Use modern URL API instead of deprecated url.parse()
294
+ const urlPath = new URL(url).pathname
264
295
  return urlPath === '/' ? '' : urlPath
296
+ } catch {
297
+ return ''
265
298
  }
266
- return ''
267
299
  }
268
300
 
269
301
  module.exports = generateIndex
@@ -5,14 +5,16 @@ const algoliasearch = require('algoliasearch')
5
5
  const http = require('http')
6
6
  const https = require('https')
7
7
  const _ = require('lodash')
8
- process.env.UV_THREADPOOL_SIZE=16
8
+
9
+ // Increase thread pool size for better HTTP performance
10
+ process.env.UV_THREADPOOL_SIZE = 16
9
11
 
10
12
  /**
11
13
  * Algolia indexing for an Antora documentation site.
12
14
  *
13
15
  * @module antora-algolia-indexer
14
16
  */
15
- function register({
17
+ function register ({
16
18
  config: {
17
19
  indexLatestOnly,
18
20
  excludes,
@@ -21,29 +23,41 @@ function register({
21
23
  }) {
22
24
  const logger = this.getLogger('algolia-indexer-extension')
23
25
 
24
- if (!process.env.ALGOLIA_ADMIN_API_KEY || !process.env.ALGOLIA_APP_ID || !process.env.ALGOLIA_INDEX_NAME) return
25
-
26
- var client
27
- var index
28
-
29
- const httpAgent = new http.Agent({ keepAlive: true, maxSockets: 100 });
30
- const httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 100 });
26
+ // Validate required environment variables
27
+ const requiredEnvVars = ['ALGOLIA_ADMIN_API_KEY', 'ALGOLIA_APP_ID', 'ALGOLIA_INDEX_NAME']
28
+ const missingVars = requiredEnvVars.filter(v => !process.env[v])
31
29
 
32
- // Connect and authenticate with Algolia using the custom agent
33
- client = algoliasearch(process.env.ALGOLIA_APP_ID, process.env.ALGOLIA_ADMIN_API_KEY, {
34
- httpAgent: httpAgent,
35
- httpsAgent: httpsAgent
36
- })
37
- index = client.initIndex(process.env.ALGOLIA_INDEX_NAME)
30
+ if (missingVars.length > 0) {
31
+ logger.info(`Algolia indexing disabled - missing environment variables: ${missingVars.join(', ')}`)
32
+ return
33
+ }
38
34
 
35
+ // Validate unknown options
39
36
  if (Object.keys(unknownOptions).length) {
40
37
  const keys = Object.keys(unknownOptions)
41
38
  throw new Error(`Unrecognized option${keys.length > 1 ? 's' : ''} specified: ${keys.join(', ')}`)
42
39
  }
43
40
 
44
- this.on('beforePublish', async ({ playbook, siteCatalog, contentCatalog }) => {
41
+ // Create HTTP agents with connection pooling
42
+ const httpAgent = new http.Agent({ keepAlive: true, maxSockets: 100 })
43
+ const httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 100 })
44
+
45
+ // Connect and authenticate with Algolia
46
+ const client = algoliasearch(process.env.ALGOLIA_APP_ID, process.env.ALGOLIA_ADMIN_API_KEY, {
47
+ httpAgent: httpAgent,
48
+ httpsAgent: httpsAgent
49
+ })
50
+ const index = client.initIndex(process.env.ALGOLIA_INDEX_NAME)
51
+
52
+ this.on('beforePublish', async ({ playbook, contentCatalog }) => {
45
53
  const algolia = generateIndex(playbook, contentCatalog, { indexLatestOnly, excludes, logger })
46
- let existingObjectsMap = new Map()
54
+
55
+ if (!algolia || Object.keys(algolia).length === 0) {
56
+ logger.warn('No content to index for Algolia')
57
+ return
58
+ }
59
+
60
+ const existingObjectsMap = new Map()
47
61
 
48
62
  // Save objects in a local cache to query later.
49
63
  // Avoids sending multiple requests.
@@ -58,8 +72,9 @@ function register({
58
72
  }
59
73
  }
60
74
  })
75
+ logger.info(`Loaded ${existingObjectsMap.size} existing objects from Algolia index`)
61
76
  } catch (err) {
62
- logger.error(JSON.stringify(err))
77
+ logger.error(`Error browsing existing Algolia objects: ${JSON.stringify(err)}`)
63
78
  }
64
79
 
65
80
  let totalObjectsToUpdate = 0
@@ -90,46 +105,63 @@ function register({
90
105
  action: 'addObject',
91
106
  indexName: process.env.ALGOLIA_INDEX_NAME,
92
107
  body: object
93
- }));
108
+ }))
94
109
 
95
110
  const updateObjectActions = objectsToUpdate.map(object => ({
96
111
  action: 'updateObject',
97
112
  indexName: process.env.ALGOLIA_INDEX_NAME,
98
113
  body: object
99
- }));
114
+ }))
100
115
 
101
- const batchActions = [...addObjectActions, ...updateObjectActions];
116
+ const batchActions = [...addObjectActions, ...updateObjectActions]
102
117
 
103
- // Upload new records only if the objects have been updated or they are new.
104
- // See https://www.algolia.com/doc/api-reference/api-methods/batch/?client=javascript
105
- await client.multipleBatch(batchActions).then(() => {
106
- logger.info('Batch operations completed successfully');
107
- }).catch(error => {
108
- logger.error(`Error uploading records to Algolia: ${error.message}`);
109
- });
118
+ // FIXED: Only send batch if there are actions to perform
119
+ if (batchActions.length > 0) {
120
+ try {
121
+ await client.multipleBatch(batchActions)
122
+ logger.debug(`Batch completed: ${objectsToAdd.length} added, ${objectsToUpdate.length} updated for ${c}/${v}`)
123
+ } catch (error) {
124
+ logger.error(`Error uploading records to Algolia: ${error.message}`)
125
+ }
126
+ }
110
127
  }
111
128
  }
112
129
 
130
+ // Identify objects to delete (stale content)
113
131
  for (const [objectID, obj] of existingObjectsMap) {
114
- if ((obj.type === 'Doc' && !obj.objectID.includes('/api/')) || (!obj.type) || (obj.type === 'Lab' && !obj.interactive)) {
132
+ // Only delete Doc pages (not API) and Labs that aren't interactive
133
+ const shouldDelete = (obj.type === 'Doc' && !obj.objectID.includes('/api/')) ||
134
+ (!obj.type) ||
135
+ (obj.type === 'Lab' && !obj.interactive)
136
+
137
+ if (shouldDelete) {
115
138
  objectsToDelete.push(objectID)
116
139
  }
117
140
  }
141
+
118
142
  if (objectsToDelete.length > 0) {
119
- console.log(objectsToDelete)
120
- await index.deleteObjects(objectsToDelete).then(() => {
121
- console.log(`Deleted ${objectsToDelete.length} outdated records`);
122
- }).catch(error => {
123
- logger.error(`Error deleting records from Algolia: ${error.message}`);
124
- });
143
+ logger.info(`Deleting ${objectsToDelete.length} outdated records...`)
144
+ logger.debug(`Objects to delete: ${JSON.stringify(objectsToDelete)}`)
145
+
146
+ try {
147
+ await index.deleteObjects(objectsToDelete)
148
+ logger.info(`Successfully deleted ${objectsToDelete.length} outdated records`)
149
+ } catch (error) {
150
+ logger.error(`Error deleting records from Algolia: ${error.message}`)
151
+ }
125
152
  }
126
153
 
127
- logger.info('Updated records:' + totalObjectsToUpdate)
128
- logger.info('New records:' + totalObjectsToAdd)
154
+ // Summary
155
+ logger.info(`Algolia sync complete: ${totalObjectsToAdd} added, ${totalObjectsToUpdate} updated, ${objectsToDelete.length} deleted`)
129
156
 
130
- totalObjectsToAdd === 0 && totalObjectsToUpdate === 0 && logger.info('No new records uploaded or existing records updated')
157
+ if (totalObjectsToAdd === 0 && totalObjectsToUpdate === 0 && objectsToDelete.length === 0) {
158
+ logger.info('Index is up to date - no changes needed')
159
+ }
131
160
  })
132
161
 
162
+ // Cleanup HTTP agents on process exit
163
+ // NOTE: This registers a global handler. In watch mode, agents will persist
164
+ // between builds, which is generally fine for connection reuse.
133
165
  process.on('exit', () => {
134
166
  httpAgent.destroy()
135
167
  httpsAgent.destroy()
@@ -4,12 +4,13 @@ module.exports.register = function () {
4
4
  const logger = this.getLogger('collect-bloblang-samples');
5
5
 
6
6
  this.on('contentClassified', ({ contentCatalog }) => {
7
+
7
8
  const collectExamples = (examples, componentName) => {
8
9
  const bloblangSamples = [];
9
10
  const seenTitles = new Set();
10
11
 
11
12
  examples
12
- .filter((example) => example.src.relative.startsWith('playground/')) // Only include files in the 'bloblang' subdirectory
13
+ .filter((example) => example.src.relative.startsWith('playground/')) // Only include files in the 'playground' subdirectory
13
14
  .forEach((example) => {
14
15
  try {
15
16
  const content = example.contents.toString('utf8');