jamdesk 1.1.37 → 1.1.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/integration/init.integration.test.js +44 -0
- package/dist/__tests__/integration/init.integration.test.js.map +1 -1
- package/dist/__tests__/unit/init.test.js +2 -1
- package/dist/__tests__/unit/init.test.js.map +1 -1
- package/package.json +1 -1
- package/templates/api-reference/openapi-example.mdx +55 -0
- package/templates/api-reference/request-response-examples.mdx +210 -0
- package/templates/components/callouts.mdx +56 -0
- package/templates/components/cards.mdx +80 -0
- package/templates/components/steps.mdx +39 -0
- package/templates/components/tabs-and-accordions.mdx +65 -0
- package/templates/docs.json +48 -0
- package/templates/introduction.mdx +40 -10
- package/templates/openapi/example-api.yaml +185 -0
- package/templates/quickstart.mdx +98 -9
- package/templates/writing/code-blocks.mdx +80 -0
- package/templates/writing/components.mdx +78 -0
- package/templates/writing/pages.mdx +59 -0
- package/vendored/app/[[...slug]]/page.tsx +26 -8
- package/vendored/app/api/chat/[project]/route.ts +53 -3
- package/vendored/app/api/docs-search/[project]/search/route.ts +48 -3
- package/vendored/app/layout.tsx +4 -4
- package/vendored/components/mdx/OpenApiEndpoint.tsx +2 -1
- package/vendored/components/navigation/Sidebar.tsx +9 -4
- package/vendored/components/search/SearchModal.tsx +13 -20
- package/vendored/hooks/useChat.ts +22 -4
- package/vendored/lib/chat-prompt.ts +1 -1
- package/vendored/lib/chat-tools.ts +3 -0
- package/vendored/lib/embedding-chunker.ts +18 -2
- package/vendored/lib/language-codes.json +27 -0
- package/vendored/lib/language-utils.ts +80 -5
- package/vendored/lib/link-rewriter.ts +67 -0
- package/vendored/lib/locale-helpers.ts +62 -0
- package/vendored/lib/openapi/code-examples.ts +5 -6
- package/vendored/lib/openapi/derive-auth.ts +46 -0
- package/vendored/lib/openapi/index.ts +7 -0
- package/vendored/lib/openapi/parser.ts +7 -2
- package/vendored/lib/openapi/resolve-server-url.ts +14 -0
- package/vendored/lib/openapi/types.ts +2 -0
- package/vendored/lib/path-safety.ts +96 -0
- package/vendored/lib/search-client.ts +117 -12
- package/vendored/lib/static-artifacts.ts +25 -1
- package/vendored/lib/static-file-route.ts +13 -0
- package/vendored/lib/vector-store.ts +70 -17
- package/vendored/scripts/build-search-index.cjs +91 -24
- package/vendored/themes/base.css +5 -0
- package/vendored/workspace-package-lock.json +6 -6
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
// Client-side search with Orama (BM25 ranking)
|
|
2
2
|
import { create, insertMultiple, search as oramaSearch, type Orama } from '@orama/orama';
|
|
3
|
+
import { LANGUAGE_CODES, resolveLocaleWithLoweredSet } from './language-utils';
|
|
4
|
+
|
|
5
|
+
// Lowercased canonical language codes — used to detect slug-prefix locales when
|
|
6
|
+
// the search-data.json predates the locale field (legacy fallback).
|
|
7
|
+
const KNOWN_LANGUAGE_CODES_LOWERED: ReadonlySet<string> = new Set(
|
|
8
|
+
LANGUAGE_CODES.map((c) => c.toLowerCase()),
|
|
9
|
+
);
|
|
3
10
|
|
|
4
11
|
export interface SearchResult {
|
|
5
12
|
id: string;
|
|
@@ -9,6 +16,8 @@ export interface SearchResult {
|
|
|
9
16
|
slug: string;
|
|
10
17
|
section?: string;
|
|
11
18
|
type?: 'api' | 'component' | 'guide' | 'help' | 'quickstart';
|
|
19
|
+
/** Language of the document. Empty string for default-language pages. */
|
|
20
|
+
locale: string;
|
|
12
21
|
}
|
|
13
22
|
|
|
14
23
|
type OramaDb = Orama<{
|
|
@@ -19,6 +28,7 @@ type OramaDb = Orama<{
|
|
|
19
28
|
slug: 'string';
|
|
20
29
|
section: 'string';
|
|
21
30
|
type: 'string';
|
|
31
|
+
locale: 'enum';
|
|
22
32
|
}>;
|
|
23
33
|
|
|
24
34
|
// Orama database instance
|
|
@@ -33,6 +43,19 @@ let initPromise: Promise<void> | null = null;
|
|
|
33
43
|
let lastEtag = '';
|
|
34
44
|
let lastParsedData: SearchResult[] | null = null;
|
|
35
45
|
|
|
46
|
+
/**
|
|
47
|
+
* Locales the URL-resolver treats as real translations for the current
|
|
48
|
+
* project. Populated from the index's `locale` field when present, falling
|
|
49
|
+
* back to slug-prefix derivation for legacy (pre-feature) search-data.json
|
|
50
|
+
* so French pages don't leak English results while customers rebuild.
|
|
51
|
+
*
|
|
52
|
+
* `indexHasLocaleField` is the separate gate that controls whether the Orama
|
|
53
|
+
* `where` clause is used — set only when the new index format is detected.
|
|
54
|
+
* Slug-prefix fallback runs in `search()` whenever the where clause is off.
|
|
55
|
+
*/
|
|
56
|
+
let projectLocalesLowered: ReadonlySet<string> = new Set();
|
|
57
|
+
let indexHasLocaleField = false;
|
|
58
|
+
|
|
36
59
|
/**
|
|
37
60
|
* Cheap fingerprint: count + first/last IDs + a sample of content lengths.
|
|
38
61
|
* Detects new/removed pages AND content edits (which change content length).
|
|
@@ -60,20 +83,52 @@ async function buildIndex(data: SearchResult[], etag: string): Promise<void> {
|
|
|
60
83
|
slug: 'string',
|
|
61
84
|
section: 'string',
|
|
62
85
|
type: 'string',
|
|
86
|
+
locale: 'enum',
|
|
63
87
|
},
|
|
64
88
|
});
|
|
65
89
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
90
|
+
// Distinguish new-format docs (have a `locale` field, possibly '') from
|
|
91
|
+
// legacy ones (field missing entirely). Once the `?? ''` fallback runs in
|
|
92
|
+
// the map below the two are indistinguishable, so check the raw input first.
|
|
93
|
+
// A single doc with the field is enough — the build script always emits the
|
|
94
|
+
// field for every doc when it emits any.
|
|
95
|
+
indexHasLocaleField = data.some(
|
|
96
|
+
(d) => typeof (d as { locale?: unknown }).locale === 'string',
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
const seenLocales = new Set<string>();
|
|
100
|
+
const slugPrefixLocales = new Set<string>();
|
|
101
|
+
const normalizedData = data.map(item => {
|
|
102
|
+
const raw = item.locale;
|
|
103
|
+
if (typeof raw === 'string' && raw.length > 0) seenLocales.add(raw);
|
|
104
|
+
// Track first-segment slug prefixes that match a known language code,
|
|
105
|
+
// used as the URL-to-locale whitelist when the index lacks the field.
|
|
106
|
+
const firstSeg = item.slug.split('/', 1)[0]?.toLowerCase();
|
|
107
|
+
if (firstSeg && KNOWN_LANGUAGE_CODES_LOWERED.has(firstSeg)) {
|
|
108
|
+
slugPrefixLocales.add(firstSeg);
|
|
109
|
+
}
|
|
110
|
+
return {
|
|
111
|
+
id: item.id,
|
|
112
|
+
title: item.title,
|
|
113
|
+
description: item.description || '',
|
|
114
|
+
content: item.content,
|
|
115
|
+
slug: item.slug,
|
|
116
|
+
section: item.section || '',
|
|
117
|
+
type: item.type || 'guide',
|
|
118
|
+
locale: raw ?? '',
|
|
119
|
+
};
|
|
120
|
+
});
|
|
75
121
|
|
|
76
122
|
await insertMultiple(db, normalizedData);
|
|
123
|
+
|
|
124
|
+
// Trust the index's own locale set when present (handles the dodo case
|
|
125
|
+
// where `de/` is a directory, not a translation — `seenLocales` won't
|
|
126
|
+
// include `de`). Fall back to slug-prefix derivation for legacy indexes so
|
|
127
|
+
// the URL still maps to a sensible locale during the rebuild window.
|
|
128
|
+
projectLocalesLowered = indexHasLocaleField
|
|
129
|
+
? new Set(Array.from(seenLocales, (l) => l.toLowerCase()))
|
|
130
|
+
: slugPrefixLocales;
|
|
131
|
+
|
|
77
132
|
committedFingerprint = buildingFingerprint;
|
|
78
133
|
lastParsedData = data;
|
|
79
134
|
lastEtag = etag;
|
|
@@ -104,7 +159,11 @@ export function getLastData(etag: string | null): SearchResult[] | null {
|
|
|
104
159
|
return lastParsedData;
|
|
105
160
|
}
|
|
106
161
|
|
|
107
|
-
export async function search(
|
|
162
|
+
export async function search(
|
|
163
|
+
query: string,
|
|
164
|
+
limit = 10,
|
|
165
|
+
language?: string,
|
|
166
|
+
): Promise<SearchResult[]> {
|
|
108
167
|
if (!db) {
|
|
109
168
|
console.warn('Search database not initialized');
|
|
110
169
|
return [];
|
|
@@ -114,9 +173,15 @@ export async function search(query: string, limit = 10): Promise<SearchResult[]>
|
|
|
114
173
|
return [];
|
|
115
174
|
}
|
|
116
175
|
|
|
176
|
+
const targetLocale = language ?? '';
|
|
177
|
+
|
|
178
|
+
// Fetch extra when post-filtering by slug prefix so we can still reach
|
|
179
|
+
// `limit` after dropping cross-locale hits.
|
|
180
|
+
const fetchLimit = indexHasLocaleField ? limit : Math.min(limit * 5, 50);
|
|
181
|
+
|
|
117
182
|
const results = await oramaSearch(db, {
|
|
118
183
|
term: query,
|
|
119
|
-
limit,
|
|
184
|
+
limit: fetchLimit,
|
|
120
185
|
tolerance: 1, // Allow 1 typo for fuzzy matching
|
|
121
186
|
boost: {
|
|
122
187
|
title: 2,
|
|
@@ -124,9 +189,32 @@ export async function search(query: string, limit = 10): Promise<SearchResult[]>
|
|
|
124
189
|
description: 1,
|
|
125
190
|
content: 0.5,
|
|
126
191
|
},
|
|
192
|
+
...(indexHasLocaleField ? { where: { locale: { eq: targetLocale } } } : {}),
|
|
127
193
|
});
|
|
128
194
|
|
|
129
|
-
|
|
195
|
+
const docs = results.hits.map(hit => hit.document as unknown as SearchResult);
|
|
196
|
+
|
|
197
|
+
// Slug-prefix fallback for un-rebuilt indexes: the index has no locale
|
|
198
|
+
// field, so the where clause was skipped above. Filter by slug prefix
|
|
199
|
+
// instead so French pages don't see English results during the rebuild
|
|
200
|
+
// window. Inverted on default-language pages: drop slugs whose prefix
|
|
201
|
+
// matches any known project locale.
|
|
202
|
+
if (!indexHasLocaleField) {
|
|
203
|
+
if (targetLocale) {
|
|
204
|
+
const prefix = `${targetLocale}/`;
|
|
205
|
+
return docs.filter(d => d.slug.startsWith(prefix)).slice(0, limit);
|
|
206
|
+
}
|
|
207
|
+
if (projectLocalesLowered.size > 0) {
|
|
208
|
+
return docs
|
|
209
|
+
.filter(d => {
|
|
210
|
+
const firstSeg = d.slug.split('/', 1)[0]?.toLowerCase();
|
|
211
|
+
return !firstSeg || !projectLocalesLowered.has(firstSeg);
|
|
212
|
+
})
|
|
213
|
+
.slice(0, limit);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return docs.slice(0, limit);
|
|
130
218
|
}
|
|
131
219
|
|
|
132
220
|
/** @internal Used by tests only */
|
|
@@ -134,3 +222,20 @@ export function isInitialized(): boolean {
|
|
|
134
222
|
return db !== null;
|
|
135
223
|
}
|
|
136
224
|
|
|
225
|
+
/**
|
|
226
|
+
* Resolve the locale that the search filter should target for a given pathname,
|
|
227
|
+
* gated by the locales actually present in the currently-committed index
|
|
228
|
+
* (or, in legacy mode, by slug-prefix derivation).
|
|
229
|
+
*
|
|
230
|
+
* Returns:
|
|
231
|
+
* - `''` when there is no committed index yet, OR the pathname has no
|
|
232
|
+
* language prefix, OR the prefix is not a known project locale.
|
|
233
|
+
* - The canonical language code when the prefix matches.
|
|
234
|
+
*
|
|
235
|
+
* Caller (SearchModal) should pass the result to `search()`. Empty string and
|
|
236
|
+
* undefined are equivalent — both target the default-language doc set.
|
|
237
|
+
*/
|
|
238
|
+
export function resolveActiveLocale(pathname: string): string {
|
|
239
|
+
if (projectLocalesLowered.size === 0) return '';
|
|
240
|
+
return resolveLocaleWithLoweredSet(pathname, projectLocalesLowered);
|
|
241
|
+
}
|
|
@@ -8,6 +8,11 @@
|
|
|
8
8
|
import type { NavigationConfig } from './docs-types.js';
|
|
9
9
|
import { RECURSE_KEYS } from './enhance-navigation.js';
|
|
10
10
|
import { filterVisibility } from './visibility-filter.js';
|
|
11
|
+
import {
|
|
12
|
+
buildLoweredLocaleSet,
|
|
13
|
+
resolveLocaleFromPath,
|
|
14
|
+
resolveLocaleWithLoweredSet,
|
|
15
|
+
} from './language-utils.js';
|
|
11
16
|
|
|
12
17
|
/**
|
|
13
18
|
* Page metadata for artifact generation.
|
|
@@ -585,6 +590,8 @@ export interface SearchDocument {
|
|
|
585
590
|
slug: string;
|
|
586
591
|
section?: string;
|
|
587
592
|
type: string;
|
|
593
|
+
/** Language of the document. Empty string for default-language pages. */
|
|
594
|
+
locale: string;
|
|
588
595
|
}
|
|
589
596
|
|
|
590
597
|
/**
|
|
@@ -602,6 +609,14 @@ export interface SearchPageInfo {
|
|
|
602
609
|
};
|
|
603
610
|
}
|
|
604
611
|
|
|
612
|
+
/** Whitelist-gated alias of `resolveLocaleFromPath`, named for the search-index domain. */
|
|
613
|
+
export function detectLocaleFromSlug(
|
|
614
|
+
slug: string,
|
|
615
|
+
projectLanguages: readonly string[],
|
|
616
|
+
): string {
|
|
617
|
+
return resolveLocaleFromPath(slug, projectLanguages);
|
|
618
|
+
}
|
|
619
|
+
|
|
605
620
|
/**
|
|
606
621
|
* Infer page type from slug path.
|
|
607
622
|
*/
|
|
@@ -668,15 +683,23 @@ export function extractSections(content: string): Array<{ heading: string; conte
|
|
|
668
683
|
* Generate search data from page content.
|
|
669
684
|
*
|
|
670
685
|
* @param pages - Array of page info with content
|
|
686
|
+
* @param projectLanguages - Language codes declared in docs.json.navigation.languages.
|
|
687
|
+
* Used as the locale whitelist; slugs whose prefix is not in this list are
|
|
688
|
+
* tagged as default-language (locale='').
|
|
671
689
|
* @returns JSON string of search documents
|
|
672
690
|
*/
|
|
673
|
-
export function generateSearchData(
|
|
691
|
+
export function generateSearchData(
|
|
692
|
+
pages: SearchPageInfo[],
|
|
693
|
+
projectLanguages: readonly string[],
|
|
694
|
+
): string {
|
|
674
695
|
const documents: SearchDocument[] = [];
|
|
696
|
+
const loweredLanguages = buildLoweredLocaleSet(projectLanguages);
|
|
675
697
|
|
|
676
698
|
for (const page of pages) {
|
|
677
699
|
const pathWithoutExt = page.path.replace(/\.mdx?$/, '');
|
|
678
700
|
const slug = pathWithoutExt.replace(/\\/g, '/');
|
|
679
701
|
const pageType = inferPageType(slug);
|
|
702
|
+
const locale = resolveLocaleWithLoweredSet(slug, loweredLanguages);
|
|
680
703
|
// Filter for="agents" content out of the search index — the site
|
|
681
704
|
// search is a human-facing surface, so agent-only content must not
|
|
682
705
|
// leak into autocomplete.
|
|
@@ -694,6 +717,7 @@ export function generateSearchData(pages: SearchPageInfo[]): string {
|
|
|
694
717
|
slug,
|
|
695
718
|
section: section.heading || undefined,
|
|
696
719
|
type: pageType,
|
|
720
|
+
locale,
|
|
697
721
|
});
|
|
698
722
|
}
|
|
699
723
|
});
|
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
* duplication across 12 route files (6 at root + 6 at /docs).
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
+
import fs from 'fs';
|
|
10
|
+
import path from 'path';
|
|
11
|
+
|
|
9
12
|
import { NextRequest, NextResponse } from 'next/server';
|
|
10
13
|
|
|
11
14
|
import { log } from '@/lib/logger';
|
|
@@ -55,6 +58,16 @@ export function createStaticFileHandler(
|
|
|
55
58
|
|
|
56
59
|
return async function GET(request: NextRequest): Promise<NextResponse> {
|
|
57
60
|
if (!isIsrMode()) {
|
|
61
|
+
// Dev fallback: `dev-project.cjs` writes static artifacts (search-data.json,
|
|
62
|
+
// sitemap.xml, ...) into `public/`. Next.js prefers route handlers over
|
|
63
|
+
// public/ files when both exist, so without this branch the dev server
|
|
64
|
+
// 404s on these paths and breaks the SearchModal init.
|
|
65
|
+
const localPath = path.join(process.cwd(), 'public', filename);
|
|
66
|
+
if (fs.existsSync(localPath)) {
|
|
67
|
+
return new NextResponse(fs.readFileSync(localPath), {
|
|
68
|
+
headers: { 'Content-Type': contentType, 'Cache-Control': 'no-cache' },
|
|
69
|
+
});
|
|
70
|
+
}
|
|
58
71
|
return new NextResponse('Not found', { status: 404 });
|
|
59
72
|
}
|
|
60
73
|
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { Index, FusionAlgorithm, WeightingStrategy, QueryMode } from '@upstash/vector';
|
|
13
13
|
import type { EmbeddingChunk } from './embedding-chunker.js';
|
|
14
|
+
import { logger } from '../shared/logger';
|
|
14
15
|
|
|
15
16
|
export interface ChunkMetadata {
|
|
16
17
|
[key: string]: unknown;
|
|
@@ -18,6 +19,10 @@ export interface ChunkMetadata {
|
|
|
18
19
|
sectionHeading: string;
|
|
19
20
|
pageTitle: string;
|
|
20
21
|
content: string;
|
|
22
|
+
/** Locale code (lowercased, e.g. "en", "es", "pt-br"). Absent for chunks
|
|
23
|
+
* written by single-language projects (no `i18n.languages` config) — the
|
|
24
|
+
* filter is gated per-project so unfiltered queries return all chunks. */
|
|
25
|
+
locale?: string;
|
|
21
26
|
}
|
|
22
27
|
|
|
23
28
|
/** Upstash limit per upsert call */
|
|
@@ -78,20 +83,24 @@ export async function upsertChunks(
|
|
|
78
83
|
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
79
84
|
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
80
85
|
await ns.upsert(
|
|
81
|
-
batch.map(c =>
|
|
82
|
-
|
|
83
|
-
// Prefix + body goes to Upstash for embedding/BM25; metadata.content
|
|
84
|
-
// stays prefix-free so consumers display clean body text.
|
|
85
|
-
data: c.prefix + c.content,
|
|
86
|
-
metadata: {
|
|
86
|
+
batch.map(c => {
|
|
87
|
+
const metadata: ChunkMetadata = {
|
|
87
88
|
pageSlug: c.pageSlug,
|
|
88
89
|
sectionHeading: c.sectionHeading,
|
|
89
90
|
pageTitle: c.pageTitle,
|
|
90
91
|
content: c.content.length > MAX_METADATA_CONTENT_CHARS
|
|
91
92
|
? c.content.slice(0, MAX_METADATA_CONTENT_CHARS) + '...'
|
|
92
93
|
: c.content,
|
|
93
|
-
}
|
|
94
|
-
|
|
94
|
+
};
|
|
95
|
+
if (c.locale) metadata.locale = c.locale; // omit when null
|
|
96
|
+
return {
|
|
97
|
+
id: c.id,
|
|
98
|
+
// Prefix + body goes to Upstash for embedding/BM25; metadata.content
|
|
99
|
+
// stays prefix-free so consumers display clean body text.
|
|
100
|
+
data: c.prefix + c.content,
|
|
101
|
+
metadata,
|
|
102
|
+
};
|
|
103
|
+
}),
|
|
95
104
|
);
|
|
96
105
|
}
|
|
97
106
|
}
|
|
@@ -130,13 +139,12 @@ export function extractTopicQuery(queryText: string): string | null {
|
|
|
130
139
|
/** Query with hybrid mode, falling back to dense-only if hybrid is not supported */
|
|
131
140
|
async function queryWithFallback(
|
|
132
141
|
ns: ReturnType<typeof getNamespace>,
|
|
133
|
-
params: { data: string; topK: number; includeMetadata: true },
|
|
142
|
+
params: { data: string; topK: number; includeMetadata: true; filter?: string },
|
|
134
143
|
): Promise<Array<{ id: string | number; score: number; metadata?: ChunkMetadata }>> {
|
|
135
144
|
try {
|
|
136
145
|
return await ns.query<ChunkMetadata>({ ...params, ...HYBRID_QUERY_OPTS });
|
|
137
146
|
} catch (err) {
|
|
138
|
-
|
|
139
|
-
console.warn('[vector-store] Hybrid query failed, falling back to dense-only:', String(err));
|
|
147
|
+
logger.warn('vector-store: hybrid query failed, falling back to dense-only', { error: String(err) });
|
|
140
148
|
return await ns.query<ChunkMetadata>({ ...params, queryMode: QueryMode.DENSE });
|
|
141
149
|
}
|
|
142
150
|
}
|
|
@@ -184,6 +192,10 @@ function filterAndMerge(
|
|
|
184
192
|
* request-pattern words (e.g. "give me a javascript example") from diluting
|
|
185
193
|
* the topic signal in the embedding.
|
|
186
194
|
*
|
|
195
|
+
* When `options.locale` is set, a strict Upstash filter expression is applied
|
|
196
|
+
* so only chunks for that locale are returned. No retry-without-filter on 0
|
|
197
|
+
* results — migration safety lives in the per-project gate (Task A5).
|
|
198
|
+
*
|
|
187
199
|
* Returns up to `topK` results with their similarity scores,
|
|
188
200
|
* filtering out any results with missing metadata.
|
|
189
201
|
*/
|
|
@@ -191,25 +203,66 @@ export async function querySimilarChunks(
|
|
|
191
203
|
projectId: string,
|
|
192
204
|
queryText: string,
|
|
193
205
|
topK = 5,
|
|
206
|
+
options: { locale?: string } = {},
|
|
194
207
|
): Promise<Array<ChunkMetadata & { score: number }>> {
|
|
195
208
|
const ns = getNamespace(projectId);
|
|
196
|
-
const
|
|
209
|
+
const locale = options.locale ? normalizeLocaleForFilter(options.locale) : undefined;
|
|
210
|
+
// Defense-in-depth: A5 rejects malformed locales at the API boundary. If a
|
|
211
|
+
// truthy locale here normalizes to empty, A5's guard was bypassed (test or
|
|
212
|
+
// internal caller) — surface it loudly rather than silently dropping the filter.
|
|
213
|
+
if (options.locale && !locale) {
|
|
214
|
+
logger.warn('vector-store: locale normalized to empty — filter skipped', { rawLocale: options.locale });
|
|
215
|
+
}
|
|
216
|
+
const filter = locale ? buildLocaleFilter(locale) : undefined;
|
|
217
|
+
// When filtering, raise effective topK by ~33% so we still get ~topK chunks
|
|
218
|
+
// back from a mixed-language namespace where filtering cuts the candidate set.
|
|
219
|
+
const effectiveTopK = filter ? Math.ceil(topK * 1.33) : topK;
|
|
220
|
+
const queryParams = { topK: effectiveTopK, includeMetadata: true as const, filter };
|
|
197
221
|
|
|
198
222
|
const topicQuery = extractTopicQuery(queryText);
|
|
199
223
|
|
|
200
224
|
// Dual-query: topic query is the PRIMARY source (better topical relevance);
|
|
201
225
|
// the full query fills remaining slots with unique results only.
|
|
226
|
+
let merged: Array<ChunkMetadata & { score: number }>;
|
|
202
227
|
if (topicQuery) {
|
|
203
228
|
const [fullResults, topicResults] = await Promise.all([
|
|
204
229
|
queryWithFallback(ns, { data: queryText, ...queryParams }),
|
|
205
230
|
queryWithFallback(ns, { data: topicQuery, ...queryParams }),
|
|
206
231
|
]);
|
|
232
|
+
merged = filterAndMerge([topicResults, fullResults], topK);
|
|
233
|
+
} else {
|
|
234
|
+
const results = await queryWithFallback(ns, { data: queryText, ...queryParams });
|
|
235
|
+
merged = filterAndMerge([results], topK);
|
|
236
|
+
}
|
|
207
237
|
|
|
208
|
-
|
|
209
|
-
|
|
238
|
+
// Telemetry: a filtered query that returns materially fewer chunks than
|
|
239
|
+
// requested signals the locale filter is hurting recall (project skewed away
|
|
240
|
+
// from the requested locale, or filter syntax matched a near-empty subset).
|
|
241
|
+
// Surface it so we can decide whether to widen the carve-out, increase the
|
|
242
|
+
// 1.33× boost, or rebuild the project's index.
|
|
243
|
+
if (filter && merged.length < Math.ceil(topK / 2)) {
|
|
244
|
+
logger.warn('vector-store: locale filter under-fills topK', {
|
|
245
|
+
projectId, locale, returned: merged.length, requested: topK,
|
|
246
|
+
});
|
|
210
247
|
}
|
|
211
248
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
249
|
+
return merged;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Validate + normalize a locale code for use inside an Upstash filter
|
|
254
|
+
* expression. We allow [a-z0-9_-] (covers `en`, `pt-br`, `zh-Hans` after
|
|
255
|
+
* lowercasing) and lowercase to match how chunks were stored. Any other
|
|
256
|
+
* character is dropped — defense against filter injection from a malformed
|
|
257
|
+
* client request. The route layer rejects bad locales at the API boundary
|
|
258
|
+
* (Task A5), so this strip is belt-and-suspenders.
|
|
259
|
+
*/
|
|
260
|
+
function normalizeLocaleForFilter(value: string): string {
|
|
261
|
+
return value.toLowerCase().replace(/[^a-z0-9_-]/g, '');
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/** Single source of truth for the Upstash filter expression. The smoke-test
|
|
265
|
+
* script reuses this so production and verification stay in sync. */
|
|
266
|
+
export function buildLocaleFilter(locale: string): string {
|
|
267
|
+
return `locale = "${locale}"`;
|
|
215
268
|
}
|
|
@@ -25,6 +25,56 @@ const { create, insertMultiple } = require('@orama/orama');
|
|
|
25
25
|
const { persist } = require('@orama/plugin-data-persistence');
|
|
26
26
|
const { filterVisibility } = require('./visibility-filter.cjs');
|
|
27
27
|
|
|
28
|
+
// Shared canonical list of language codes — keep in sync with
|
|
29
|
+
// lib/language-utils.ts. The sync test in
|
|
30
|
+
// __tests__/lib/language-codes-sync.test.ts catches drift.
|
|
31
|
+
const LANGUAGE_CODES = require('../lib/language-codes.json');
|
|
32
|
+
|
|
33
|
+
const LANGUAGE_CODE_BY_LOWER = (() => {
|
|
34
|
+
const m = new Map();
|
|
35
|
+
for (const k of LANGUAGE_CODES) {
|
|
36
|
+
const lower = k.toLowerCase();
|
|
37
|
+
if (!m.has(lower)) m.set(lower, k);
|
|
38
|
+
}
|
|
39
|
+
return m;
|
|
40
|
+
})();
|
|
41
|
+
|
|
42
|
+
/** Mirrors lib/language-utils.ts:extractLanguageFromPath. */
|
|
43
|
+
function extractLanguageFromPath(pathname) {
|
|
44
|
+
if (!pathname) return undefined;
|
|
45
|
+
const parts = pathname.replace(/^\/docs\/?/, '').replace(/^\//, '').split('/').filter(Boolean);
|
|
46
|
+
if (parts.length === 0) return undefined;
|
|
47
|
+
return LANGUAGE_CODE_BY_LOWER.get(parts[0].toLowerCase());
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Mirrors lib/language-utils.ts:resolveLocaleFromPath. */
|
|
51
|
+
function resolveLocaleFromPath(pathname, projectLanguages) {
|
|
52
|
+
const candidate = extractLanguageFromPath(pathname);
|
|
53
|
+
if (!candidate) return '';
|
|
54
|
+
const lowered = new Set(projectLanguages.map((l) => l.toLowerCase()));
|
|
55
|
+
return lowered.has(candidate.toLowerCase()) ? candidate : '';
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Read the project's declared languages from its docs.json. Returns [] when
|
|
60
|
+
* the file is missing or malformed — the script proceeds with no whitelist,
|
|
61
|
+
* which means every doc gets locale='' (matches single-language behavior).
|
|
62
|
+
*/
|
|
63
|
+
function readProjectLanguages(contentDir) {
|
|
64
|
+
try {
|
|
65
|
+
const docsJsonPath = path.join(contentDir, 'docs.json');
|
|
66
|
+
if (!fs.existsSync(docsJsonPath)) return [];
|
|
67
|
+
const config = JSON.parse(fs.readFileSync(docsJsonPath, 'utf-8'));
|
|
68
|
+
const langs = config?.navigation?.languages ?? [];
|
|
69
|
+
return langs
|
|
70
|
+
.map((l) => (l && typeof l.language === 'string' ? l.language : null))
|
|
71
|
+
.filter((c) => typeof c === 'string');
|
|
72
|
+
} catch (err) {
|
|
73
|
+
console.warn(`Could not read languages from ${contentDir}/docs.json: ${err.message}`);
|
|
74
|
+
return [];
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
28
78
|
// Concurrency control for parallel file processing
|
|
29
79
|
const CONCURRENCY = parseInt(process.env.SEARCH_INDEX_CONCURRENCY || '') || os.cpus().length * 2;
|
|
30
80
|
|
|
@@ -214,6 +264,11 @@ async function buildSearchIndex() {
|
|
|
214
264
|
process.exit(1);
|
|
215
265
|
}
|
|
216
266
|
|
|
267
|
+
const projectLanguages = readProjectLanguages(contentDir);
|
|
268
|
+
if (projectLanguages.length > 0) {
|
|
269
|
+
console.log(`Project languages: ${projectLanguages.join(', ')}`);
|
|
270
|
+
}
|
|
271
|
+
|
|
217
272
|
// First, collect all MDX file paths
|
|
218
273
|
const mdxFiles = [];
|
|
219
274
|
|
|
@@ -255,30 +310,40 @@ async function buildSearchIndex() {
|
|
|
255
310
|
async function processFile({ filePath, slug }) {
|
|
256
311
|
await semaphore.acquire();
|
|
257
312
|
try {
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
const
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
313
|
+
try {
|
|
314
|
+
const fileContents = await fsPromises.readFile(filePath, 'utf8');
|
|
315
|
+
const { data, content } = parseFrontmatterLenient(fileContents);
|
|
316
|
+
// Filter for="agents" content out of the search index.
|
|
317
|
+
const visibleContent = filterVisibility(content, 'humans');
|
|
318
|
+
const sections = extractSections(visibleContent);
|
|
319
|
+
|
|
320
|
+
const docs = [];
|
|
321
|
+
const normalizedSlug = slug.replace(/\\/g, '/');
|
|
322
|
+
const pageType = inferPageType(normalizedSlug);
|
|
323
|
+
const locale = resolveLocaleFromPath(normalizedSlug, projectLanguages);
|
|
324
|
+
sections.forEach((section, idx) => {
|
|
325
|
+
const cleanContent = stripMarkdown(section.content);
|
|
326
|
+
if (cleanContent.trim()) {
|
|
327
|
+
docs.push({
|
|
328
|
+
id: `${slug}-${idx}`,
|
|
329
|
+
title: data.title || slug.split('/').pop() || '',
|
|
330
|
+
description: data.description,
|
|
331
|
+
content: cleanContent.substring(0, 300),
|
|
332
|
+
slug: normalizedSlug,
|
|
333
|
+
section: section.heading || undefined,
|
|
334
|
+
type: pageType,
|
|
335
|
+
locale,
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
});
|
|
339
|
+
return docs;
|
|
340
|
+
} catch (err) {
|
|
341
|
+
// One malformed MDX file (bad YAML, etc.) used to abort the entire index
|
|
342
|
+
// build — leaving the project searchless. Skip the file with a warning
|
|
343
|
+
// so the rest of the docs remain searchable.
|
|
344
|
+
console.warn(`⚠ Skipping ${filePath} in search index: ${err.message}`);
|
|
345
|
+
return [];
|
|
346
|
+
}
|
|
282
347
|
} finally {
|
|
283
348
|
semaphore.release();
|
|
284
349
|
}
|
|
@@ -318,6 +383,7 @@ async function buildOramaIndex(searchData, outputDir) {
|
|
|
318
383
|
slug: 'string',
|
|
319
384
|
section: 'string',
|
|
320
385
|
type: 'string',
|
|
386
|
+
locale: 'enum',
|
|
321
387
|
},
|
|
322
388
|
});
|
|
323
389
|
|
|
@@ -330,6 +396,7 @@ async function buildOramaIndex(searchData, outputDir) {
|
|
|
330
396
|
slug: item.slug,
|
|
331
397
|
section: item.section || '',
|
|
332
398
|
type: item.type || 'guide',
|
|
399
|
+
locale: item.locale || '',
|
|
333
400
|
}));
|
|
334
401
|
|
|
335
402
|
await insertMultiple(db, normalizedData);
|
package/vendored/themes/base.css
CHANGED
|
@@ -378,6 +378,11 @@
|
|
|
378
378
|
text-decoration: none;
|
|
379
379
|
}
|
|
380
380
|
|
|
381
|
+
/* Restore spacing between stacked .not-prose blocks — prose sibling rules skip them. */
|
|
382
|
+
.prose > .not-prose + .not-prose {
|
|
383
|
+
margin-top: 1.5rem;
|
|
384
|
+
}
|
|
385
|
+
|
|
381
386
|
html {
|
|
382
387
|
/* Prevent horizontal overflow on mobile */
|
|
383
388
|
overflow-x: hidden;
|
|
@@ -2152,9 +2152,9 @@
|
|
|
2152
2152
|
}
|
|
2153
2153
|
},
|
|
2154
2154
|
"node_modules/baseline-browser-mapping": {
|
|
2155
|
-
"version": "2.10.
|
|
2156
|
-
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.
|
|
2157
|
-
"integrity": "sha512-
|
|
2155
|
+
"version": "2.10.23",
|
|
2156
|
+
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.23.tgz",
|
|
2157
|
+
"integrity": "sha512-xwVXGqevyKPsiuQdLj+dZMVjidjJV508TBqexND5HrF89cGdCYCJFB3qhcxRHSeMctdCfbR1jrxBajhDy7o29g==",
|
|
2158
2158
|
"license": "Apache-2.0",
|
|
2159
2159
|
"bin": {
|
|
2160
2160
|
"baseline-browser-mapping": "dist/cli.cjs"
|
|
@@ -5586,9 +5586,9 @@
|
|
|
5586
5586
|
}
|
|
5587
5587
|
},
|
|
5588
5588
|
"node_modules/postcss": {
|
|
5589
|
-
"version": "8.5.
|
|
5590
|
-
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.
|
|
5591
|
-
"integrity": "sha512-
|
|
5589
|
+
"version": "8.5.12",
|
|
5590
|
+
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.12.tgz",
|
|
5591
|
+
"integrity": "sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA==",
|
|
5592
5592
|
"funding": [
|
|
5593
5593
|
{
|
|
5594
5594
|
"type": "opencollective",
|