@stainless-api/docs 0.1.0-beta.98 → 1.0.0-beta.140

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/CHANGELOG.md +404 -0
  2. package/ambient.d.ts +6 -0
  3. package/eslint-suppressions.json +22 -6
  4. package/{eslint.config.js → eslint.config.ts} +1 -7
  5. package/package.json +57 -40
  6. package/plugin/assets/languages/php.svg +4 -0
  7. package/plugin/buildAlgoliaIndex.ts +6 -12
  8. package/plugin/components/SDKSelect.astro +0 -6
  9. package/plugin/components/SnippetCode.tsx +6 -37
  10. package/plugin/components/search/SearchAlgolia.astro +1 -1
  11. package/plugin/components/search/SearchIsland.tsx +19 -13
  12. package/plugin/generateAPIReferenceLink.ts +0 -40
  13. package/plugin/globalJs/ai-dropdown-options.ts +26 -13
  14. package/plugin/globalJs/code-snippets.ts +5 -5
  15. package/plugin/globalJs/copy.ts +20 -91
  16. package/plugin/globalJs/navigation.ts +13 -13
  17. package/plugin/globalJs/summary-selection-tweak.ts +29 -0
  18. package/plugin/index.ts +107 -163
  19. package/plugin/languages.ts +2 -1
  20. package/plugin/loadPluginConfig.ts +50 -153
  21. package/plugin/markdown/highlighter.ts +100 -0
  22. package/plugin/markdown/index.ts +39 -0
  23. package/plugin/middlewareBuilder/stainlessMiddleware.d.ts +2 -0
  24. package/plugin/react/Routing.tsx +10 -244
  25. package/plugin/referencePlaceholderUtils.ts +1 -1
  26. package/plugin/replaceSidebarPlaceholderMiddleware.ts +1 -1
  27. package/plugin/routes/Docs.astro +3 -1
  28. package/plugin/routes/Overview.astro +14 -7
  29. package/plugin/routes/llms.ts +186 -0
  30. package/plugin/routes/markdown.ts +62 -13
  31. package/plugin/sidebar-utils/sidebar-builder.ts +38 -12
  32. package/plugin/specs/defaultSpecLoader.ts +192 -0
  33. package/plugin/specs/fetchSpecSSR.ts +1 -1
  34. package/plugin/specs/utils.ts +86 -0
  35. package/shared/conditionalIntegration.ts +28 -0
  36. package/shared/getProsePages.ts +6 -7
  37. package/shared/virtualModule.ts +1 -26
  38. package/stl-docs/aiChatExamples.ts +31 -0
  39. package/stl-docs/chat/docs-chat-handler.ts +17 -0
  40. package/stl-docs/chat/hook.ts +225 -0
  41. package/stl-docs/chat/schemas.ts +27 -0
  42. package/stl-docs/chat/ui/AiChat.module.css +591 -0
  43. package/stl-docs/chat/ui/AiChat.tsx +175 -0
  44. package/stl-docs/chat/ui/Trigger.tsx +154 -0
  45. package/stl-docs/chat/ui/components/ChatControls.tsx +51 -0
  46. package/stl-docs/chat/ui/components/ChatEmpty.tsx +42 -0
  47. package/stl-docs/chat/ui/components/ChatLog.tsx +93 -0
  48. package/stl-docs/chat/ui/components/ChatMessage.tsx +47 -0
  49. package/stl-docs/chat/ui/components/CodeBlock.tsx +33 -0
  50. package/stl-docs/chat/ui/components/MessageFeedback.tsx +106 -0
  51. package/stl-docs/chat/ui/components/Table.tsx +15 -0
  52. package/stl-docs/chat/ui/components/ToolCall.tsx +34 -0
  53. package/stl-docs/chat/ui/components/hljs-github.css +81 -0
  54. package/stl-docs/chat/ui/scroll-manager.ts +86 -0
  55. package/stl-docs/chat/ui/types.ts +45 -0
  56. package/stl-docs/components/AiChatIsland.tsx +10 -12
  57. package/stl-docs/components/ContentPanel.astro +9 -0
  58. package/stl-docs/components/Footer.astro +89 -0
  59. package/stl-docs/components/Header.astro +0 -5
  60. package/stl-docs/components/PageFrame.astro +23 -8
  61. package/stl-docs/components/PageSidebar.astro +11 -0
  62. package/stl-docs/components/StainlessLogo.svg +4 -0
  63. package/stl-docs/components/TwoColumnContent.astro +2 -0
  64. package/stl-docs/components/headers/DefaultHeader.astro +6 -8
  65. package/stl-docs/components/headers/StackedHeader.astro +5 -53
  66. package/stl-docs/components/mintlify-compat/Accordion.astro +2 -2
  67. package/stl-docs/components/mintlify-compat/AccordionGroup.astro +0 -4
  68. package/stl-docs/components/mintlify-compat/Columns.astro +2 -2
  69. package/stl-docs/components/mintlify-compat/Frame.astro +2 -2
  70. package/stl-docs/components/mintlify-compat/Tab.astro +2 -2
  71. package/stl-docs/components/mintlify-compat/callouts/Callout.astro +2 -2
  72. package/stl-docs/components/mintlify-compat/callouts/Check.astro +0 -4
  73. package/stl-docs/components/mintlify-compat/callouts/Danger.astro +0 -4
  74. package/stl-docs/components/mintlify-compat/callouts/Info.astro +0 -4
  75. package/stl-docs/components/mintlify-compat/callouts/Note.astro +0 -4
  76. package/stl-docs/components/mintlify-compat/callouts/Tip.astro +0 -4
  77. package/stl-docs/components/mintlify-compat/callouts/Warning.astro +0 -4
  78. package/stl-docs/components/nav-tabs/NavDropdown.astro +12 -7
  79. package/stl-docs/components/nav-tabs/NavTabs.astro +5 -3
  80. package/stl-docs/components/nav-tabs/buildNavLinks.ts +2 -0
  81. package/stl-docs/components/pagination/Pagination.astro +4 -2
  82. package/stl-docs/components/pagination/PaginationLinkEmphasized.astro +2 -2
  83. package/stl-docs/components/pagination/PaginationLinkQuiet.astro +2 -2
  84. package/stl-docs/components/pagination/util.ts +3 -3
  85. package/stl-docs/components/sidebars/BaseSidebar.astro +72 -1
  86. package/stl-docs/disableCalloutSyntax.ts +1 -1
  87. package/stl-docs/fonts.ts +5 -5
  88. package/stl-docs/index.ts +76 -53
  89. package/stl-docs/loadStlDocsConfig.ts +38 -8
  90. package/stl-docs/og-image/components/OpenGraphFunctionSignature.tsx +64 -0
  91. package/stl-docs/og-image/components/OpenGraphImage.tsx +126 -0
  92. package/stl-docs/og-image/config.ts +56 -0
  93. package/stl-docs/og-image/image-gen/generate-api-reference-og-image.tsx +188 -0
  94. package/stl-docs/og-image/image-gen/generate-og-image.tsx +119 -0
  95. package/stl-docs/og-image/image-gen/get-logo-url.ts +47 -0
  96. package/stl-docs/og-image/index.ts +135 -0
  97. package/stl-docs/og-image/routes/add-og-image.ts +45 -0
  98. package/stl-docs/og-image/routes/get-api-reference-og-image.ts +36 -0
  99. package/stl-docs/og-image/routes/get-og-image.ts +28 -0
  100. package/stl-docs/og-image/theme.ts +43 -0
  101. package/stl-docs/og-image/utils.ts +14 -0
  102. package/stl-docs/proseDocSync.test.ts +74 -0
  103. package/stl-docs/proseDocSync.ts +344 -0
  104. package/stl-docs/proseMarkdown/proseMarkdownIntegration.ts +4 -12
  105. package/stl-docs/schema-extension.ts +12 -0
  106. package/stl-docs/tabsMiddleware.ts +1 -1
  107. package/styles/overrides.css +2 -14
  108. package/styles/page.css +210 -71
  109. package/styles/sidebar.css +30 -17
  110. package/styles/sl-variables.css +3 -8
  111. package/styles/stldocs-variables.css +2 -2
  112. package/styles/toc.css +8 -0
  113. package/tsconfig.json +1 -1
  114. package/virtual-module.d.ts +35 -11
  115. package/playground-virtual-modules.d.ts +0 -96
  116. package/plugin/globalJs/create-playground.shim.ts +0 -3
  117. package/plugin/globalJs/playground-data.shim.ts +0 -1
  118. package/plugin/globalJs/playground-data.ts +0 -14
  119. package/plugin/specs/FileCache.ts +0 -99
  120. package/plugin/specs/generateSpec.ts +0 -112
  121. package/plugin/specs/index.ts +0 -132
  122. package/plugin/specs/inputResolver.ts +0 -146
  123. package/plugin/specs/worker.ts +0 -199
  124. package/plugin/vendor/preview.worker.docs.js +0 -26108
  125. package/plugin/vendor/templates/cli.md +0 -1
  126. package/plugin/vendor/templates/go.md +0 -316
  127. package/plugin/vendor/templates/java.md +0 -89
  128. package/plugin/vendor/templates/kotlin.md +0 -89
  129. package/plugin/vendor/templates/node.md +0 -235
  130. package/plugin/vendor/templates/python.md +0 -251
  131. package/plugin/vendor/templates/ruby.md +0 -147
  132. package/plugin/vendor/templates/terraform.md +0 -60
  133. package/plugin/vendor/templates/typescript.md +0 -319
  134. package/scripts/vendor_deps.ts +0 -50
  135. package/stl-docs/components/ClientRouterHead.astro +0 -41
  136. package/stl-docs/components/content-panel/ContentPanel.astro +0 -42
  137. package/stl-docs/components/headers/SplashMobileMenuToggle.astro +0 -65
  138. package/stl-docs/proseSearchIndexing.ts +0 -606
@@ -1,606 +0,0 @@
1
- import type { AstroIntegration } from 'astro';
2
- import { readFile } from 'fs/promises';
3
- import { getProsePages } from '../shared/getProsePages';
4
- import { getSharedLogger } from '../shared/getSharedLogger';
5
- import { bold } from '../shared/terminalUtils';
6
- import * as cheerio from 'cheerio';
7
- import { toMarkdown } from './proseMarkdown/toMarkdown';
8
- import { NormalizedStainlessDocsConfig } from './loadStlDocsConfig';
9
- import { buildProseIndex } from '@stainless-api/docs-search/providers/algolia';
10
-
11
- type ContentBlock =
12
- | { type: 'header'; tag: string; id: string; text: string }
13
- | { type: 'content'; tag: string; text: string }
14
- | { type: 'code'; tag: string; language?: string; text: string };
15
-
16
- class SectionContext {
17
- headers: { level: number; text: string }[] = [];
18
- headerId: string | undefined;
19
- headerTag: string | undefined;
20
- headerText: string | undefined;
21
- hasContent = false;
22
-
23
- get(): string | undefined {
24
- if (this.headers.length === 0) return;
25
- return this.headers.map((h) => h.text).join(' > ');
26
- }
27
-
28
- header({ id, tag, text }: { id: string; tag: string; text: string }) {
29
- const level = getHeaderLevel(tag);
30
- if (level > 0) {
31
- while (this.headers.length > 0 && this.headers[this.headers.length - 1]!.level >= level) {
32
- this.headers.pop();
33
- }
34
- this.headers.push({ level, text });
35
- }
36
- this.headerId = id;
37
- this.headerTag = tag;
38
- this.headerText = text;
39
- this.hasContent = false;
40
- }
41
- }
42
-
43
- // Generate a URL-safe ID from header text (e.g., "OpenAPI Config" -> "openapi-config")
44
- function slugify(text: string): string {
45
- return text
46
- .toLowerCase()
47
- .replace(/`/g, '') // Remove backticks
48
- .replace(/[^a-z0-9]+/g, '-') // Replace non-alphanumeric with hyphens
49
- .replace(/^-|-$/g, ''); // Trim leading/trailing hyphens
50
- }
51
-
52
- // Check if a word ends with a real table cell boundary (| but not escaped \|)
53
- function isTableCellBoundary(word: string): boolean {
54
- return word.endsWith('|') && !word.endsWith('\\|');
55
- }
56
-
57
- /**
58
- * Extracts the header level from a tag like "h1", "h2", etc.
59
- */
60
- function getHeaderLevel(tag: string): number {
61
- const match = tag.match(/^h(\d)$/);
62
- return match ? parseInt(match[1]!, 10) : 0;
63
- }
64
-
65
- // Chunking configuration
66
- // We target 64-256 tokens per chunk, using ~1.3 tokens/word for English text
67
- const TOKENS_PER_WORD = 1.3;
68
- const MIN_TOKENS = 64;
69
- const MAX_TOKENS = 256;
70
- const MIN_WORDS = Math.floor(MIN_TOKENS / TOKENS_PER_WORD); // ~49 words
71
- const MAX_WORDS = Math.floor(MAX_TOKENS / TOKENS_PER_WORD); // ~197 words
72
- const LINE_BREAK_WORDS = Math.floor((MAX_TOKENS * 0.75) / TOKENS_PER_WORD); // ~148 words
73
- const SENTENCE_BREAK_WORDS = Math.floor((MAX_TOKENS * 0.875) / TOKENS_PER_WORD); // ~172 words
74
-
75
- /**
76
- * Chunks text content into segments of 64-256 tokens using word-based boundaries.
77
- * Prefers breaking at sentence endings for natural chunk boundaries.
78
- */
79
- function chunkTextByWords(text: string): string[] {
80
- const words = text.split(/\s+/).filter((w) => w.length > 0);
81
-
82
- if (words.length <= MAX_WORDS) {
83
- return words.length > 0 ? [words.join(' ')] : [];
84
- }
85
-
86
- const chunks: string[] = [];
87
- let currentChunk: string[] = [];
88
-
89
- for (const word of words) {
90
- currentChunk.push(word);
91
-
92
- // Force break at max words
93
- if (currentChunk.length >= MAX_WORDS) {
94
- chunks.push(currentChunk.join(' '));
95
- currentChunk = [];
96
- continue;
97
- }
98
-
99
- // Prefer breaking at sentence boundaries after threshold
100
- if (currentChunk.length >= SENTENCE_BREAK_WORDS && /[.!?]["']?$/.test(word)) {
101
- chunks.push(currentChunk.join(' '));
102
- currentChunk = [];
103
- }
104
- }
105
-
106
- if (currentChunk.length > 0) {
107
- if (currentChunk.length < MIN_WORDS && chunks.length > 0) {
108
- const lastChunk = chunks[chunks.length - 1]!;
109
- const mergedWords = lastChunk.split(/\s+/).length + currentChunk.length;
110
- if (mergedWords <= MAX_WORDS) {
111
- chunks[chunks.length - 1] = lastChunk + ' ' + currentChunk.join(' ');
112
- } else {
113
- chunks.push(currentChunk.join(' '));
114
- }
115
- } else {
116
- chunks.push(currentChunk.join(' '));
117
- }
118
- }
119
-
120
- return chunks;
121
- }
122
-
123
- type ContentBlockChunk = {
124
- type: 'prose';
125
- content: string;
126
- headerId?: string;
127
- headerTag?: string;
128
- tag?: string;
129
- language?: string;
130
- sectionContext?: string;
131
- };
132
-
133
- /**
134
- * Chunks content blocks into segments of 64-256 tokens.
135
- *
136
- * Chunking strategy:
137
- * 1. Break at headers to keep sections isolated
138
- * 2. Prefer breaking at line/table boundaries after LINE_BREAK_WORDS (~148 words / ~192 tokens)
139
- * 3. Break at sentence endings after SENTENCE_BREAK_WORDS (~172 words / ~224 tokens)
140
- * 4. Force break at MAX_WORDS, preferring table row boundaries if available
141
- * 5. Section context (header hierarchy) is recorded alongside each chunk for discoverability
142
- */
143
- function chunkByWords(blocks: ContentBlock[]): ContentBlockChunk[] {
144
- const chunks: ContentBlockChunk[] = [];
145
-
146
- let currentChunk: string[] = [];
147
- const ctx = new SectionContext();
148
-
149
- // Flush current chunk to output. If splitAt is provided, keep words after that index for next chunk.
150
- const flushChunk = (splitAt?: number) => {
151
- if (currentChunk.length === 0) return;
152
-
153
- const wordsToFlush = splitAt !== undefined ? currentChunk.slice(0, splitAt) : currentChunk;
154
- const wordsToKeep = splitAt !== undefined ? currentChunk.slice(splitAt) : [];
155
-
156
- if (wordsToFlush.length > 0) {
157
- const chunkText = wordsToFlush.join(' ').trim();
158
- const sectionContext = ctx.get();
159
-
160
- chunks.push({
161
- type: 'prose',
162
- content: chunkText,
163
- headerId: ctx.headerId,
164
- headerTag: ctx.headerTag,
165
- sectionContext: sectionContext || undefined,
166
- });
167
- ctx.hasContent = true;
168
- }
169
- currentChunk = wordsToKeep;
170
- };
171
-
172
- // Find a table row boundary to break at (between MIN_WORDS and current length)
173
- // Returns the index to split at, or undefined if no good boundary found
174
- const findTableRowBoundary = (): number | undefined => {
175
- for (let i = currentChunk.length - 1; i >= MIN_WORDS; i--) {
176
- const word = currentChunk[i]!;
177
- const nextWord = currentChunk[i + 1];
178
- // A row boundary is where one cell ends (|) and the next row starts (|)
179
- if (isTableCellBoundary(word) && nextWord?.startsWith('|')) {
180
- return i + 1;
181
- }
182
- }
183
- return undefined;
184
- };
185
-
186
- for (const block of blocks) {
187
- if (block.type === 'header') {
188
- flushChunk();
189
- ctx.header(block);
190
- continue;
191
- }
192
-
193
- // Chunk code blocks separately; they tend to be more important.
194
- if (block.type === 'code') {
195
- flushChunk();
196
- const codeText = block.text.trim();
197
- if (codeText) {
198
- for (const chunkText of chunkTextByWords(codeText)) {
199
- chunks.push({
200
- type: 'prose',
201
- content: chunkText,
202
- headerId: ctx.headerId,
203
- tag: 'code',
204
- language: block.language,
205
- sectionContext: ctx.get(),
206
- });
207
- ctx.hasContent = true;
208
- }
209
- }
210
- continue;
211
- }
212
-
213
- if (block.type !== 'content') continue;
214
-
215
- // Split by newlines first to preserve line boundary information
216
- const lines = block.text.split(/\n/);
217
- let inCodeBlock = false;
218
-
219
- for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
220
- const line = lines[lineIdx]!;
221
-
222
- // Track code block boundaries (standalone fences only)
223
- if (/^(`{3,}|~{3,})([a-zA-Z0-9+-]*)?\s*$/.test(line.trim())) {
224
- inCodeBlock = !inCodeBlock;
225
- }
226
-
227
- // Calculate indentation level (number of leading spaces, treating tabs as 2 spaces)
228
- const indentMatch = line.match(/^(\s*)/);
229
- const indentLevel = indentMatch ? indentMatch[1]!.replace(/\t/g, ' ').length : 0;
230
-
231
- const words = line.split(/\s+/).filter((w) => w.length > 0);
232
- const isLastLine = lineIdx === lines.length - 1;
233
-
234
- for (let wordIdx = 0; wordIdx < words.length; wordIdx++) {
235
- const word = words[wordIdx]!;
236
- const isEndOfLine = wordIdx === words.length - 1 && !isLastLine;
237
-
238
- if (currentChunk.length >= MAX_WORDS) {
239
- flushChunk(findTableRowBoundary());
240
- }
241
-
242
- currentChunk.push(word);
243
-
244
- // In code blocks, avoid early flushes to keep blocks together
245
- // - Light indentation (2+ spaces): require more words before flushing
246
- // - Deep indentation (4+ spaces): skip early flushes entirely
247
- const inShallowCode = inCodeBlock && indentLevel >= 2 && indentLevel < 4;
248
- const inDeepCode = inCodeBlock && indentLevel >= 4;
249
-
250
- // Flush early at natural break points
251
- const len = currentChunk.length;
252
- const atTableBreak = len >= LINE_BREAK_WORDS && isTableCellBoundary(word);
253
- // Shallow code: only flush at sentence threshold; Deep code: don't flush early
254
- const lineBreakThreshold = inShallowCode ? SENTENCE_BREAK_WORDS : LINE_BREAK_WORDS;
255
- const atLineBreak = len >= lineBreakThreshold && isEndOfLine && !inDeepCode;
256
- const atSentenceBreak = len >= SENTENCE_BREAK_WORDS && /[.!?]["']?$/.test(word) && !inDeepCode;
257
- if (atTableBreak || atLineBreak || atSentenceBreak) {
258
- flushChunk();
259
- }
260
- }
261
- }
262
- }
263
-
264
- flushChunk();
265
- return chunks;
266
- }
267
-
268
- /**
269
- * Parses markdown into content blocks, identifying headers, content sections, and code blocks.
270
- * Code blocks are extracted separately with language metadata for specialized indexing.
271
- */
272
- function parseMarkdown(markdown: string): ContentBlock[] {
273
- const blocks: ContentBlock[] = [];
274
-
275
- // Extract title from frontmatter and treat it as h1
276
- const frontmatterMatch = markdown.match(/^---\r?\n([\s\S]*?)\r?\n---/);
277
- if (frontmatterMatch) {
278
- const frontmatter = frontmatterMatch[1]!;
279
- const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
280
- if (titleMatch) {
281
- const title = titleMatch[1]!.trim().replace(/^["']|["']$/g, ''); // Remove quotes if present
282
- blocks.push({
283
- type: 'header',
284
- tag: 'h1',
285
- id: slugify(title),
286
- text: title,
287
- });
288
- }
289
- }
290
-
291
- // Remove frontmatter
292
- const content = markdown.replace(/^---[\s\S]*?---\r?\n*/, '').trim();
293
-
294
- // Split into lines and process
295
- const lines = content.split('\n');
296
- let currentContent: string[] = [];
297
- let inCodeBlock = false;
298
- let codeBlockLanguage: string | undefined;
299
- let codeBlockContent: string[] = [];
300
-
301
- const flushContent = () => {
302
- const text = currentContent.join('\n').trim();
303
- if (text) {
304
- blocks.push({ type: 'content', tag: 'p', text });
305
- }
306
- currentContent = [];
307
- };
308
-
309
- const flushCodeBlock = () => {
310
- if (codeBlockContent.length > 0) {
311
- const code = codeBlockContent.join('\n').trim();
312
- if (code) {
313
- blocks.push({
314
- type: 'code',
315
- tag: 'code',
316
- text: code,
317
- language: codeBlockLanguage || undefined,
318
- });
319
- }
320
- }
321
- codeBlockContent = [];
322
- codeBlockLanguage = undefined;
323
- };
324
-
325
- for (const line of lines) {
326
- // Track fenced code blocks (``` or ~~~)
327
- // Only match standalone markers: ```[language] with nothing else on the line
328
- // This avoids matching inline code blocks in table cells like "``` Then content..."
329
- const codeBlockMatch = line.match(/^(`{3,}|~{3,})([a-zA-Z0-9+-]*)?\s*$/);
330
- if (codeBlockMatch) {
331
- if (!inCodeBlock) {
332
- flushContent();
333
- inCodeBlock = true;
334
- codeBlockLanguage = codeBlockMatch[2] || undefined;
335
- } else {
336
- flushCodeBlock();
337
- inCodeBlock = false;
338
- }
339
- continue;
340
- }
341
-
342
- if (inCodeBlock) {
343
- codeBlockContent.push(line);
344
- continue;
345
- }
346
-
347
- // Only match headers outside of code blocks
348
- const headerMatch = line.match(/^(#{1,6})\s+(.+)$/);
349
-
350
- if (headerMatch) {
351
- flushContent();
352
- const level = headerMatch[1]!.length;
353
- const headerText = headerMatch[2]!.trim();
354
- blocks.push({
355
- type: 'header',
356
- tag: `h${level}`,
357
- id: slugify(headerText),
358
- text: headerText,
359
- });
360
- continue;
361
- }
362
-
363
- currentContent.push(line);
364
- }
365
-
366
- flushCodeBlock();
367
- flushContent();
368
- return blocks;
369
- }
370
-
371
- export type IndexEntry = {
372
- chunk: { id: string; index: number; total: number };
373
- id: string;
374
- tag: string;
375
- content: string;
376
- language?: string;
377
- sectionContext?: string;
378
- };
379
-
380
- /**
381
- * Extracts and chunks markdown content for search indexing.
382
- * Yields prose and code chunks with section context and language metadata.
383
- */
384
- export function* indexMarkdown(markdown: string): Generator<IndexEntry> {
385
- const blocks = parseMarkdown(markdown);
386
- const chunks = chunkByWords(blocks);
387
- const documentId = crypto.randomUUID();
388
-
389
- for (const [index, chunk] of chunks.entries()) {
390
- yield {
391
- id: chunk.headerId ?? '',
392
- tag: chunk.tag ?? chunk.headerTag ?? '',
393
- content: chunk.content,
394
- ...(chunk.sectionContext ? { sectionContext: chunk.sectionContext } : {}),
395
- ...(chunk.language ? { language: chunk.language } : {}),
396
- chunk: {
397
- id: documentId,
398
- index,
399
- total: chunks.length,
400
- },
401
- };
402
- }
403
- }
404
-
405
- const DEFAULT_ROOT = 'main';
406
- const DEFAULT_PATTERN = 'h1, h2, h3, h4, h5, h6, p, li, pre code';
407
-
408
- /**
409
- * Indexes HTML content for search, with section context and code language extraction.
410
- *
411
- * Features:
412
- * - Tracks header hierarchy to prepend section context (e.g., "Guide > Setup: ...")
413
- * - Extracts language metadata from code blocks (class="language-js")
414
- * - Uses word-based chunking with sentence boundary detection
415
- */
416
- export function* indexHTML(
417
- content: string,
418
- root = DEFAULT_ROOT,
419
- pattern = DEFAULT_PATTERN,
420
- ): Generator<IndexEntry> {
421
- const $ = cheerio.load(content);
422
- const matches = $(root).find(pattern);
423
-
424
- const ctx = new SectionContext();
425
-
426
- for (const match of matches) {
427
- const tagName = match.tagName.toLowerCase();
428
- const rawText = $(match).text().trim();
429
-
430
- if (getHeaderLevel(tagName) > 0) {
431
- ctx.header({ id: $(match).attr('id') ?? slugify(rawText), tag: tagName, text: rawText });
432
- continue;
433
- }
434
-
435
- // Check if this is a code block and extract language
436
- const isCode = tagName === 'code' && $(match).parent().is('pre');
437
- let language: string | undefined;
438
- if (isCode) {
439
- const classes = $(match).attr('class') || '';
440
- const langMatch = classes.match(/(?:language-|lang-)([a-zA-Z0-9+-]+)/);
441
- language = langMatch ? langMatch[1] : undefined;
442
- }
443
-
444
- // Build content with section context
445
- const sectionContext = ctx.get();
446
- const chunks = chunkTextByWords(rawText);
447
- const chunkId = crypto.randomUUID();
448
-
449
- for (const [chunkN, chunkText] of chunks.entries()) {
450
- yield {
451
- id: ctx.headerId ?? $(match).attr('id') ?? chunkId,
452
- tag: isCode ? 'code' : tagName,
453
- content: chunkText,
454
- ...(sectionContext ? { sectionContext } : {}),
455
- ...(language && { language }),
456
- chunk: {
457
- id: chunkId,
458
- index: chunkN,
459
- total: chunks.length,
460
- },
461
- };
462
- ctx.hasContent = true;
463
- }
464
- }
465
- }
466
-
467
- export function stainlessDocsAlgoliaProseIndexing({
468
- apiReferenceBasePath,
469
- }: {
470
- apiReferenceBasePath: string | null;
471
- }): AstroIntegration {
472
- return {
473
- name: 'stl-docs-prose-indexing',
474
- hooks: {
475
- 'astro:build:done': async ({ logger: localLogger, dir }) => {
476
- const logger = getSharedLogger({ fallback: localLogger });
477
- const outputBasePath = dir.pathname;
478
-
479
- const {
480
- PUBLIC_ALGOLIA_APP_ID: appId,
481
- PUBLIC_ALGOLIA_INDEX: indexName,
482
- PRIVATE_ALGOLIA_WRITE_KEY: algoliaWriteKey,
483
- } = process.env;
484
-
485
- if (!appId || !indexName || !algoliaWriteKey) {
486
- logger.info('Skipping algolia indexing due to missing environment variables');
487
- return;
488
- }
489
-
490
- const pagesToRender = await getProsePages({ apiReferenceBasePath, outputBasePath });
491
- logger.info(bold(`Indexing ${pagesToRender.length} prose pages for algolia search`));
492
-
493
- const objects = [];
494
- for (const absHtmlPath of pagesToRender) {
495
- const content = await readFile(absHtmlPath, 'utf-8');
496
- const idx = indexHTML(content);
497
- for (const entry of idx)
498
- objects.push({
499
- ...entry,
500
- source: absHtmlPath.slice(outputBasePath.length),
501
- });
502
- }
503
-
504
- try {
505
- await buildProseIndex(appId, `${indexName}-prose`, algoliaWriteKey, objects);
506
- } catch (err) {
507
- logger.error(`Failed to index prose content: ${err}`);
508
- }
509
- },
510
- },
511
- };
512
- }
513
-
514
- export function stainlessDocsVectorProseIndexing(
515
- config: NormalizedStainlessDocsConfig,
516
- apiReferenceBasePath: string | null,
517
- ): AstroIntegration {
518
- return {
519
- name: 'stl-docs-prose-indexing',
520
- hooks: {
521
- 'astro:build:done': async ({ logger: localLogger, dir }) => {
522
- const logger = getSharedLogger({ fallback: localLogger });
523
- const outputBasePath = dir.pathname;
524
-
525
- const stainlessProjectName = config.apiReference?.stainlessProject;
526
-
527
- const {
528
- STAINLESS_API_KEY: stainlessApiKey,
529
- STAINLESS_DOCS_SITE_ID: stainlessDocsSiteId,
530
- STAINLESS_DOCS_REPO_SHA: stainlessDocsRepoSha,
531
- } = process.env;
532
-
533
- // Skip indexing if required environment variables are not set
534
- if (!stainlessApiKey || !stainlessProjectName || !stainlessDocsSiteId || !stainlessDocsRepoSha) {
535
- logger.info(
536
- `Skipping vector prose search indexing: required environment/config variables not set, missing: ${[
537
- !stainlessApiKey && 'STAINLESS_API_KEY',
538
- !stainlessDocsSiteId && 'STAINLESS_DOCS_SITE_ID',
539
- !stainlessDocsRepoSha && 'STAINLESS_DOCS_REPO_SHA',
540
- !stainlessProjectName && 'stainlessProject in apiReference config',
541
- ]
542
- .filter(Boolean)
543
- .join(', ')}`,
544
- );
545
- return;
546
- }
547
-
548
- const pagesToRender = await getProsePages({ apiReferenceBasePath, outputBasePath });
549
-
550
- if (pagesToRender.length === 0) {
551
- logger.info('No prose pages found to index for vector search');
552
- return;
553
- }
554
-
555
- logger.info(bold(`Indexing ${pagesToRender.length} prose pages for vector search`));
556
-
557
- const objects: {
558
- id: string;
559
- tag: string;
560
- content: string;
561
- language?: string;
562
- kind: 'prose';
563
- source: string;
564
- }[] = [];
565
- for (const absHtmlPath of pagesToRender) {
566
- const content = await readFile(absHtmlPath, 'utf-8');
567
- const markdown = await toMarkdown(content);
568
-
569
- if (markdown) {
570
- const idx = indexMarkdown(markdown);
571
- for (const { chunk: _, ...entry } of idx)
572
- objects.push({
573
- ...entry,
574
- kind: 'prose',
575
- source: absHtmlPath.slice(outputBasePath.length),
576
- });
577
- }
578
- }
579
-
580
- if (objects.length === 0) {
581
- logger.info('No prose content extracted to index for vector search');
582
- return;
583
- }
584
-
585
- logger.info(bold(`Uploading ${objects.length} prose content chunks to stainless docs index`));
586
-
587
- const response = await fetch(
588
- `https://api.stainless.com/api/projects/${stainlessProjectName}/docs-sites/${stainlessDocsSiteId}/index`,
589
- {
590
- method: 'POST',
591
- headers: {
592
- 'Content-Type': 'application/json',
593
- Authorization: `Bearer ${stainlessApiKey}`,
594
- },
595
- body: JSON.stringify({
596
- docs_repo_sha: stainlessDocsRepoSha,
597
- index: objects,
598
- }),
599
- },
600
- );
601
-
602
- console.log(`docs index API response code ${response.status}: ${await response.text()}`);
603
- },
604
- },
605
- };
606
- }