@stainless-api/docs 0.1.0-beta.103 → 0.1.0-beta.104

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,15 +4,8 @@ import { getProsePages } from '../shared/getProsePages';
4
4
  import { getSharedLogger } from '../shared/getSharedLogger';
5
5
  import { bold } from '../shared/terminalUtils';
6
6
  import * as cheerio from 'cheerio';
7
- import { toMarkdown } from './proseMarkdown/toMarkdown';
8
- import { NormalizedStainlessDocsConfig } from './loadStlDocsConfig';
9
7
  import { buildProseIndex } from '@stainless-api/docs-search/providers/algolia';
10
8
 
11
- type ContentBlock =
12
- | { type: 'header'; tag: string; id: string; text: string }
13
- | { type: 'content'; tag: string; text: string }
14
- | { type: 'code'; tag: string; language?: string; text: string };
15
-
16
9
  class SectionContext {
17
10
  headers: { level: number; text: string }[] = [];
18
11
  headerId: string | undefined;
@@ -40,23 +33,14 @@ class SectionContext {
40
33
  }
41
34
  }
42
35
 
43
- // Generate a URL-safe ID from header text (e.g., "OpenAPI Config" -> "openapi-config")
44
36
  function slugify(text: string): string {
45
37
  return text
46
38
  .toLowerCase()
47
- .replace(/`/g, '') // Remove backticks
48
- .replace(/[^a-z0-9]+/g, '-') // Replace non-alphanumeric with hyphens
49
- .replace(/^-|-$/g, ''); // Trim leading/trailing hyphens
50
- }
51
-
52
- // Check if a word ends with a real table cell boundary (| but not escaped \|)
53
- function isTableCellBoundary(word: string): boolean {
54
- return word.endsWith('|') && !word.endsWith('\\|');
39
+ .replace(/`/g, '')
40
+ .replace(/[^a-z0-9]+/g, '-')
41
+ .replace(/^-|-$/g, '');
55
42
  }
56
43
 
57
- /**
58
- * Extracts the header level from a tag like "h1", "h2", etc.
59
- */
60
44
  function getHeaderLevel(tag: string): number {
61
45
  const match = tag.match(/^h(\d)$/);
62
46
  return match ? parseInt(match[1]!, 10) : 0;
@@ -69,7 +53,6 @@ const MIN_TOKENS = 64;
69
53
  const MAX_TOKENS = 256;
70
54
  const MIN_WORDS = Math.floor(MIN_TOKENS / TOKENS_PER_WORD); // ~49 words
71
55
  const MAX_WORDS = Math.floor(MAX_TOKENS / TOKENS_PER_WORD); // ~197 words
72
- const LINE_BREAK_WORDS = Math.floor((MAX_TOKENS * 0.75) / TOKENS_PER_WORD); // ~148 words
73
56
  const SENTENCE_BREAK_WORDS = Math.floor((MAX_TOKENS * 0.875) / TOKENS_PER_WORD); // ~172 words
74
57
 
75
58
  /**
@@ -120,254 +103,6 @@ function chunkTextByWords(text: string): string[] {
120
103
  return chunks;
121
104
  }
122
105
 
123
- type ContentBlockChunk = {
124
- type: 'prose';
125
- content: string;
126
- headerId?: string;
127
- headerTag?: string;
128
- tag?: string;
129
- language?: string;
130
- sectionContext?: string;
131
- };
132
-
133
- /**
134
- * Chunks content blocks into segments of 64-256 tokens.
135
- *
136
- * Chunking strategy:
137
- * 1. Break at headers to keep sections isolated
138
- * 2. Prefer breaking at line/table boundaries after LINE_BREAK_WORDS (~148 words / ~192 tokens)
139
- * 3. Break at sentence endings after SENTENCE_BREAK_WORDS (~172 words / ~224 tokens)
140
- * 4. Force break at MAX_WORDS, preferring table row boundaries if available
141
- * 5. Section context (header hierarchy) is recorded alongside each chunk for discoverability
142
- */
143
- function chunkByWords(blocks: ContentBlock[]): ContentBlockChunk[] {
144
- const chunks: ContentBlockChunk[] = [];
145
-
146
- let currentChunk: string[] = [];
147
- const ctx = new SectionContext();
148
-
149
- // Flush current chunk to output. If splitAt is provided, keep words after that index for next chunk.
150
- const flushChunk = (splitAt?: number) => {
151
- if (currentChunk.length === 0) return;
152
-
153
- const wordsToFlush = splitAt !== undefined ? currentChunk.slice(0, splitAt) : currentChunk;
154
- const wordsToKeep = splitAt !== undefined ? currentChunk.slice(splitAt) : [];
155
-
156
- if (wordsToFlush.length > 0) {
157
- const chunkText = wordsToFlush.join(' ').trim();
158
- const sectionContext = ctx.get();
159
-
160
- chunks.push({
161
- type: 'prose',
162
- content: chunkText,
163
- headerId: ctx.headerId,
164
- headerTag: ctx.headerTag,
165
- sectionContext: sectionContext || undefined,
166
- });
167
- ctx.hasContent = true;
168
- }
169
- currentChunk = wordsToKeep;
170
- };
171
-
172
- // Find a table row boundary to break at (between MIN_WORDS and current length)
173
- // Returns the index to split at, or undefined if no good boundary found
174
- const findTableRowBoundary = (): number | undefined => {
175
- for (let i = currentChunk.length - 1; i >= MIN_WORDS; i--) {
176
- const word = currentChunk[i]!;
177
- const nextWord = currentChunk[i + 1];
178
- // A row boundary is where one cell ends (|) and the next row starts (|)
179
- if (isTableCellBoundary(word) && nextWord?.startsWith('|')) {
180
- return i + 1;
181
- }
182
- }
183
- return undefined;
184
- };
185
-
186
- for (const block of blocks) {
187
- if (block.type === 'header') {
188
- flushChunk();
189
- ctx.header(block);
190
- continue;
191
- }
192
-
193
- // Chunk code blocks separately; they tend to be more important.
194
- if (block.type === 'code') {
195
- flushChunk();
196
- const codeText = block.text.trim();
197
- if (codeText) {
198
- for (const chunkText of chunkTextByWords(codeText)) {
199
- chunks.push({
200
- type: 'prose',
201
- content: chunkText,
202
- headerId: ctx.headerId,
203
- tag: 'code',
204
- language: block.language,
205
- sectionContext: ctx.get(),
206
- });
207
- ctx.hasContent = true;
208
- }
209
- }
210
- continue;
211
- }
212
-
213
- if (block.type !== 'content') continue;
214
-
215
- // Split by newlines first to preserve line boundary information
216
- const lines = block.text.split(/\n/);
217
- let inCodeBlock = false;
218
-
219
- for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
220
- const line = lines[lineIdx]!;
221
-
222
- // Track code block boundaries (standalone fences only)
223
- if (/^(`{3,}|~{3,})([a-zA-Z0-9+-]*)?\s*$/.test(line.trim())) {
224
- inCodeBlock = !inCodeBlock;
225
- }
226
-
227
- // Calculate indentation level (number of leading spaces, treating tabs as 2 spaces)
228
- const indentMatch = line.match(/^(\s*)/);
229
- const indentLevel = indentMatch ? indentMatch[1]!.replace(/\t/g, ' ').length : 0;
230
-
231
- const words = line.split(/\s+/).filter((w) => w.length > 0);
232
- const isLastLine = lineIdx === lines.length - 1;
233
-
234
- for (let wordIdx = 0; wordIdx < words.length; wordIdx++) {
235
- const word = words[wordIdx]!;
236
- const isEndOfLine = wordIdx === words.length - 1 && !isLastLine;
237
-
238
- if (currentChunk.length >= MAX_WORDS) {
239
- flushChunk(findTableRowBoundary());
240
- }
241
-
242
- currentChunk.push(word);
243
-
244
- // In code blocks, avoid early flushes to keep blocks together
245
- // - Light indentation (2+ spaces): require more words before flushing
246
- // - Deep indentation (4+ spaces): skip early flushes entirely
247
- const inShallowCode = inCodeBlock && indentLevel >= 2 && indentLevel < 4;
248
- const inDeepCode = inCodeBlock && indentLevel >= 4;
249
-
250
- // Flush early at natural break points
251
- const len = currentChunk.length;
252
- const atTableBreak = len >= LINE_BREAK_WORDS && isTableCellBoundary(word);
253
- // Shallow code: only flush at sentence threshold; Deep code: don't flush early
254
- const lineBreakThreshold = inShallowCode ? SENTENCE_BREAK_WORDS : LINE_BREAK_WORDS;
255
- const atLineBreak = len >= lineBreakThreshold && isEndOfLine && !inDeepCode;
256
- const atSentenceBreak = len >= SENTENCE_BREAK_WORDS && /[.!?]["']?$/.test(word) && !inDeepCode;
257
- if (atTableBreak || atLineBreak || atSentenceBreak) {
258
- flushChunk();
259
- }
260
- }
261
- }
262
- }
263
-
264
- flushChunk();
265
- return chunks;
266
- }
267
-
268
- /**
269
- * Parses markdown into content blocks, identifying headers, content sections, and code blocks.
270
- * Code blocks are extracted separately with language metadata for specialized indexing.
271
- */
272
- function parseMarkdown(markdown: string): ContentBlock[] {
273
- const blocks: ContentBlock[] = [];
274
-
275
- // Extract title from frontmatter and treat it as h1
276
- const frontmatterMatch = markdown.match(/^---\r?\n([\s\S]*?)\r?\n---/);
277
- if (frontmatterMatch) {
278
- const frontmatter = frontmatterMatch[1]!;
279
- const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
280
- if (titleMatch) {
281
- const title = titleMatch[1]!.trim().replace(/^["']|["']$/g, ''); // Remove quotes if present
282
- blocks.push({
283
- type: 'header',
284
- tag: 'h1',
285
- id: slugify(title),
286
- text: title,
287
- });
288
- }
289
- }
290
-
291
- // Remove frontmatter
292
- const content = markdown.replace(/^---[\s\S]*?---\r?\n*/, '').trim();
293
-
294
- // Split into lines and process
295
- const lines = content.split('\n');
296
- let currentContent: string[] = [];
297
- let inCodeBlock = false;
298
- let codeBlockLanguage: string | undefined;
299
- let codeBlockContent: string[] = [];
300
-
301
- const flushContent = () => {
302
- const text = currentContent.join('\n').trim();
303
- if (text) {
304
- blocks.push({ type: 'content', tag: 'p', text });
305
- }
306
- currentContent = [];
307
- };
308
-
309
- const flushCodeBlock = () => {
310
- if (codeBlockContent.length > 0) {
311
- const code = codeBlockContent.join('\n').trim();
312
- if (code) {
313
- blocks.push({
314
- type: 'code',
315
- tag: 'code',
316
- text: code,
317
- language: codeBlockLanguage || undefined,
318
- });
319
- }
320
- }
321
- codeBlockContent = [];
322
- codeBlockLanguage = undefined;
323
- };
324
-
325
- for (const line of lines) {
326
- // Track fenced code blocks (``` or ~~~)
327
- // Only match standalone markers: ```[language] with nothing else on the line
328
- // This avoids matching inline code blocks in table cells like "``` Then content..."
329
- const codeBlockMatch = line.match(/^(`{3,}|~{3,})([a-zA-Z0-9+-]*)?\s*$/);
330
- if (codeBlockMatch) {
331
- if (!inCodeBlock) {
332
- flushContent();
333
- inCodeBlock = true;
334
- codeBlockLanguage = codeBlockMatch[2] || undefined;
335
- } else {
336
- flushCodeBlock();
337
- inCodeBlock = false;
338
- }
339
- continue;
340
- }
341
-
342
- if (inCodeBlock) {
343
- codeBlockContent.push(line);
344
- continue;
345
- }
346
-
347
- // Only match headers outside of code blocks
348
- const headerMatch = line.match(/^(#{1,6})\s+(.+)$/);
349
-
350
- if (headerMatch) {
351
- flushContent();
352
- const level = headerMatch[1]!.length;
353
- const headerText = headerMatch[2]!.trim();
354
- blocks.push({
355
- type: 'header',
356
- tag: `h${level}`,
357
- id: slugify(headerText),
358
- text: headerText,
359
- });
360
- continue;
361
- }
362
-
363
- currentContent.push(line);
364
- }
365
-
366
- flushCodeBlock();
367
- flushContent();
368
- return blocks;
369
- }
370
-
371
106
  export type IndexEntry = {
372
107
  chunk: { id: string; index: number; total: number };
373
108
  id: string;
@@ -377,31 +112,6 @@ export type IndexEntry = {
377
112
  sectionContext?: string;
378
113
  };
379
114
 
380
- /**
381
- * Extracts and chunks markdown content for search indexing.
382
- * Yields prose and code chunks with section context and language metadata.
383
- */
384
- export function* indexMarkdown(markdown: string): Generator<IndexEntry> {
385
- const blocks = parseMarkdown(markdown);
386
- const chunks = chunkByWords(blocks);
387
- const documentId = crypto.randomUUID();
388
-
389
- for (const [index, chunk] of chunks.entries()) {
390
- yield {
391
- id: chunk.headerId ?? '',
392
- tag: chunk.tag ?? chunk.headerTag ?? '',
393
- content: chunk.content,
394
- ...(chunk.sectionContext ? { sectionContext: chunk.sectionContext } : {}),
395
- ...(chunk.language ? { language: chunk.language } : {}),
396
- chunk: {
397
- id: documentId,
398
- index,
399
- total: chunks.length,
400
- },
401
- };
402
- }
403
- }
404
-
405
115
  const DEFAULT_ROOT = 'main';
406
116
  const DEFAULT_PATTERN = 'h1, h2, h3, h4, h5, h6, p, li, pre code';
407
117
 
@@ -510,97 +220,3 @@ export function stainlessDocsAlgoliaProseIndexing({
510
220
  },
511
221
  };
512
222
  }
513
-
514
- export function stainlessDocsVectorProseIndexing(
515
- config: NormalizedStainlessDocsConfig,
516
- apiReferenceBasePath: string | null,
517
- ): AstroIntegration {
518
- return {
519
- name: 'stl-docs-prose-indexing',
520
- hooks: {
521
- 'astro:build:done': async ({ logger: localLogger, dir }) => {
522
- const logger = getSharedLogger({ fallback: localLogger });
523
- const outputBasePath = dir.pathname;
524
-
525
- const stainlessProjectName = config.apiReference?.stainlessProject;
526
-
527
- const {
528
- STAINLESS_API_KEY: stainlessApiKey,
529
- STAINLESS_DOCS_SITE_ID: stainlessDocsSiteId,
530
- STAINLESS_DOCS_REPO_SHA: stainlessDocsRepoSha,
531
- } = process.env;
532
-
533
- // Skip indexing if required environment variables are not set
534
- if (!stainlessApiKey || !stainlessProjectName || !stainlessDocsSiteId || !stainlessDocsRepoSha) {
535
- logger.info(
536
- `Skipping vector prose search indexing: required environment/config variables not set, missing: ${[
537
- !stainlessApiKey && 'STAINLESS_API_KEY',
538
- !stainlessDocsSiteId && 'STAINLESS_DOCS_SITE_ID',
539
- !stainlessDocsRepoSha && 'STAINLESS_DOCS_REPO_SHA',
540
- !stainlessProjectName && 'stainlessProject in apiReference config',
541
- ]
542
- .filter(Boolean)
543
- .join(', ')}`,
544
- );
545
- return;
546
- }
547
-
548
- const pagesToRender = await getProsePages({ apiReferenceBasePath, outputBasePath });
549
-
550
- if (pagesToRender.length === 0) {
551
- logger.info('No prose pages found to index for vector search');
552
- return;
553
- }
554
-
555
- logger.info(bold(`Indexing ${pagesToRender.length} prose pages for vector search`));
556
-
557
- const objects: {
558
- id: string;
559
- tag: string;
560
- content: string;
561
- language?: string;
562
- kind: 'prose';
563
- source: string;
564
- }[] = [];
565
- for (const absHtmlPath of pagesToRender) {
566
- const content = await readFile(absHtmlPath, 'utf-8');
567
- const markdown = await toMarkdown(content);
568
-
569
- if (markdown) {
570
- const idx = indexMarkdown(markdown);
571
- for (const { chunk: _, ...entry } of idx)
572
- objects.push({
573
- ...entry,
574
- kind: 'prose',
575
- source: absHtmlPath.slice(outputBasePath.length),
576
- });
577
- }
578
- }
579
-
580
- if (objects.length === 0) {
581
- logger.info('No prose content extracted to index for vector search');
582
- return;
583
- }
584
-
585
- logger.info(bold(`Uploading ${objects.length} prose content chunks to stainless docs index`));
586
-
587
- const response = await fetch(
588
- `https://api.stainless.com/api/projects/${stainlessProjectName}/docs-sites/${stainlessDocsSiteId}/index`,
589
- {
590
- method: 'POST',
591
- headers: {
592
- 'Content-Type': 'application/json',
593
- Authorization: `Bearer ${stainlessApiKey}`,
594
- },
595
- body: JSON.stringify({
596
- docs_repo_sha: stainlessDocsRepoSha,
597
- index: objects,
598
- }),
599
- },
600
- );
601
-
602
- console.log(`docs index API response code ${response.status}: ${await response.text()}`);
603
- },
604
- },
605
- };
606
- }
@@ -54,18 +54,6 @@ mobile-starlight-toc {
54
54
  }
55
55
  }
56
56
 
57
- @media (min-width: 50rem) {
58
- starlight-menu-button button {
59
- display: none;
60
- }
61
-
62
- mobile-starlight-toc {
63
- nav {
64
- inset-inline-start: calc(var(--sl-content-inline-start, 0));
65
- }
66
-
67
- summary {
68
- padding: 2rem 2rem;
69
- }
70
- }
57
+ starlight-menu-button {
58
+ display: none;
71
59
  }