@dominikcz/greg 0.9.40 → 0.9.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/greg.js +40 -12
- package/docs/guide/deploying.md +1 -0
- package/docs/guide/markdown/links-and-toc.md +4 -4
- package/docs/reference/markdowndocs.md +3 -3
- package/docs/reference/search.md +71 -8
- package/package.json +1 -1
- package/src/lib/MarkdownDocs/AiChat.svelte +37 -79
- package/src/lib/MarkdownDocs/BackToTop.svelte +1 -1
- package/src/lib/MarkdownDocs/DocsNavigation.svelte +1 -1
- package/src/lib/MarkdownDocs/DocsSiteHeader.svelte +38 -1
- package/src/lib/MarkdownDocs/MarkdownDocs.svelte +87 -1
- package/src/lib/MarkdownDocs/SearchModal.svelte +56 -1
- package/src/lib/MarkdownDocs/TreeViewItem.svelte +1 -1
- package/src/lib/MarkdownDocs/__tests__/vitePluginCopyDocs.test.js +215 -0
- package/src/lib/MarkdownDocs/ai/promptBuilder.js +5 -6
- package/src/lib/MarkdownDocs/ai/promptBuilder.ts +5 -6
- package/src/lib/MarkdownDocs/ai/ragPipeline.js +40 -1
- package/src/lib/MarkdownDocs/ai/ragPipeline.ts +40 -3
- package/src/lib/MarkdownDocs/searchServer.js +209 -10
- package/src/lib/MarkdownDocs/vitePluginCopyDocs.js +9 -2
- package/src/lib/MarkdownDocs/vitePluginSearchIndex.js +167 -9
- package/src/lib/components/Link.svelte +1 -1
- package/types/index.d.ts +9 -0
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
|
|
30
30
|
import { createServer } from 'node:http';
|
|
31
31
|
import { existsSync, readFileSync } from 'node:fs';
|
|
32
|
-
import { resolve } from 'node:path';
|
|
32
|
+
import { resolve, dirname } from 'node:path';
|
|
33
33
|
import { statSync } from 'node:fs';
|
|
34
34
|
import { buildFuseResult } from './searchIndexBuilder.js';
|
|
35
35
|
import { loadGregConfig } from './loadGregConfig.js';
|
|
@@ -45,6 +45,24 @@ function fmtMs(ms) {
|
|
|
45
45
|
return `${ms.toFixed(1)}ms`;
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
function parseBoolean(value, fallback = false) {
|
|
49
|
+
if (value == null) return fallback;
|
|
50
|
+
const normalized = String(value).trim().toLowerCase();
|
|
51
|
+
if (!normalized) return fallback;
|
|
52
|
+
if (['1', 'true', 'yes', 'on'].includes(normalized)) return true;
|
|
53
|
+
if (['0', 'false', 'no', 'off'].includes(normalized)) return false;
|
|
54
|
+
return fallback;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function tokenizeQuery(value) {
|
|
58
|
+
return String(value || '')
|
|
59
|
+
.toLowerCase()
|
|
60
|
+
.replace(/[^\p{L}\p{N}\s-]+/gu, ' ')
|
|
61
|
+
.split(/[\s-]+/)
|
|
62
|
+
.map((part) => part.trim())
|
|
63
|
+
.filter((part) => part.length >= 3);
|
|
64
|
+
}
|
|
65
|
+
|
|
48
66
|
// ── CLI argument parser ───────────────────────────────────────────────────────
|
|
49
67
|
function parseArgs(argv) {
|
|
50
68
|
const args = {};
|
|
@@ -63,10 +81,18 @@ function parseArgs(argv) {
|
|
|
63
81
|
const args = parseArgs(process.argv.slice(2));
|
|
64
82
|
const gregConfig = await loadGregConfig();
|
|
65
83
|
const configuredFuzzy = gregConfig?.search?.fuzzy ?? {};
|
|
84
|
+
const configuredServer = gregConfig?.search?.server ?? {};
|
|
66
85
|
const port = parseInt(args.port ?? process.env.GREG_SEARCH_PORT ?? '3100', 10);
|
|
67
86
|
const host = String(args.host ?? process.env.GREG_SEARCH_HOST ?? 'localhost');
|
|
68
87
|
const url = String(args.url ?? process.env.GREG_SEARCH_URL ?? '/api/search');
|
|
69
88
|
const index = resolve(String(args.index ?? process.env.GREG_SEARCH_INDEX ?? 'dist/search-index.json'));
|
|
89
|
+
const shardsManifest = resolve(
|
|
90
|
+
String(
|
|
91
|
+
args['shards-manifest']
|
|
92
|
+
?? process.env.GREG_SEARCH_SHARDS_MANIFEST
|
|
93
|
+
?? resolve(dirname(index), 'search-index', 'manifest.json'),
|
|
94
|
+
),
|
|
95
|
+
);
|
|
70
96
|
const corsOrigin = String(args['cors-origin'] ?? process.env.GREG_SEARCH_CORS_ORIGIN ?? '*');
|
|
71
97
|
const corsMethods = String(args['cors-methods'] ?? process.env.GREG_SEARCH_CORS_METHODS ?? 'GET, OPTIONS');
|
|
72
98
|
const corsHeaders = String(args['cors-headers'] ?? process.env.GREG_SEARCH_CORS_HEADERS ?? 'Content-Type');
|
|
@@ -89,6 +115,25 @@ const ignoreLocationRaw =
|
|
|
89
115
|
configuredFuzzy.ignoreLocation;
|
|
90
116
|
const ignoreLocation =
|
|
91
117
|
String(ignoreLocationRaw ?? 'true').toLowerCase() !== 'false';
|
|
118
|
+
const preloadShards = parseBoolean(
|
|
119
|
+
args['preload-shards'] ?? process.env.GREG_SEARCH_PRELOAD_SHARDS ?? configuredServer.preloadShards,
|
|
120
|
+
true,
|
|
121
|
+
);
|
|
122
|
+
const maxLoadedShardsRaw = args['max-loaded-shards'] ?? process.env.GREG_SEARCH_MAX_LOADED_SHARDS ?? configuredServer.maxLoadedShards;
|
|
123
|
+
const configuredMaxLoadedShards = (() => {
|
|
124
|
+
if (maxLoadedShardsRaw == null || String(maxLoadedShardsRaw).trim() === '') return null;
|
|
125
|
+
const parsed = Number.parseInt(String(maxLoadedShardsRaw), 10);
|
|
126
|
+
if (!Number.isFinite(parsed) || parsed < 1 || parsed > 512) return null;
|
|
127
|
+
return parsed;
|
|
128
|
+
})();
|
|
129
|
+
const shardCandidatesRaw = args['shard-candidates'] ?? process.env.GREG_SEARCH_SHARD_CANDIDATES ?? configuredServer.shardCandidates;
|
|
130
|
+
const shardCandidates = (() => {
|
|
131
|
+
const fallback = 6;
|
|
132
|
+
if (shardCandidatesRaw == null || String(shardCandidatesRaw).trim() === '') return fallback;
|
|
133
|
+
const parsed = Number.parseInt(String(shardCandidatesRaw), 10);
|
|
134
|
+
if (!Number.isFinite(parsed) || parsed < 1 || parsed > 512) return fallback;
|
|
135
|
+
return parsed;
|
|
136
|
+
})();
|
|
92
137
|
|
|
93
138
|
function getCorsHeaders(req) {
|
|
94
139
|
const reflectedOrigin = req.headers.origin ? String(req.headers.origin) : '*';
|
|
@@ -141,6 +186,17 @@ let sectionCount = 0;
|
|
|
141
186
|
let loadParseMs = 0;
|
|
142
187
|
let fuseBuildMs = 0;
|
|
143
188
|
let indexMtimeMs = -1;
|
|
189
|
+
let shardedMode = false;
|
|
190
|
+
let shardManifest = null;
|
|
191
|
+
let shardManifestDir = '';
|
|
192
|
+
let shardLoadMs = 0;
|
|
193
|
+
let shardBuildMs = 0;
|
|
194
|
+
let loadedShardCount = 0;
|
|
195
|
+
let cacheTick = 0;
|
|
196
|
+
let shardPreloadMs = 0;
|
|
197
|
+
let maxLoadedShards = configuredMaxLoadedShards ?? 8;
|
|
198
|
+
/** @type {Map<string, { fuse: Fuse<any>; lastUsedAt: number }>} */
|
|
199
|
+
const shardCache = new Map();
|
|
144
200
|
|
|
145
201
|
function buildFuse(searchData) {
|
|
146
202
|
return new Fuse(searchData, {
|
|
@@ -157,6 +213,121 @@ function buildFuse(searchData) {
|
|
|
157
213
|
});
|
|
158
214
|
}
|
|
159
215
|
|
|
216
|
+
function loadShardManifest() {
|
|
217
|
+
if (!existsSync(shardsManifest)) return false;
|
|
218
|
+
let parsed;
|
|
219
|
+
try {
|
|
220
|
+
parsed = JSON.parse(readFileSync(shardsManifest, 'utf-8'));
|
|
221
|
+
} catch (/** @type {any} */ e) {
|
|
222
|
+
console.warn(`[greg-search] Failed to parse shards manifest (${shardsManifest}): ${e.message}`);
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
225
|
+
if (!parsed || parsed.format !== 'greg-search-shards-v1' || !Array.isArray(parsed.files)) {
|
|
226
|
+
return false;
|
|
227
|
+
}
|
|
228
|
+
shardManifest = parsed;
|
|
229
|
+
shardManifestDir = dirname(shardsManifest);
|
|
230
|
+
shardedMode = true;
|
|
231
|
+
return true;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function markShardUsage(key) {
|
|
235
|
+
const cached = shardCache.get(key);
|
|
236
|
+
if (!cached) return;
|
|
237
|
+
cached.lastUsedAt = ++cacheTick;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function evictShardsIfNeeded() {
|
|
241
|
+
while (shardCache.size > maxLoadedShards) {
|
|
242
|
+
let oldestKey = null;
|
|
243
|
+
let oldestTick = Number.POSITIVE_INFINITY;
|
|
244
|
+
for (const [key, value] of shardCache.entries()) {
|
|
245
|
+
if (value.lastUsedAt < oldestTick) {
|
|
246
|
+
oldestTick = value.lastUsedAt;
|
|
247
|
+
oldestKey = key;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
if (!oldestKey) break;
|
|
251
|
+
shardCache.delete(oldestKey);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
function getShardFuse(entry) {
|
|
256
|
+
const cached = shardCache.get(entry.key);
|
|
257
|
+
if (cached) {
|
|
258
|
+
markShardUsage(entry.key);
|
|
259
|
+
return cached.fuse;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
const filePath = resolve(shardManifestDir, entry.file);
|
|
263
|
+
const loadT0 = process.hrtime.bigint();
|
|
264
|
+
const shardData = JSON.parse(readFileSync(filePath, 'utf-8'));
|
|
265
|
+
shardLoadMs += msSince(loadT0);
|
|
266
|
+
|
|
267
|
+
const buildT0 = process.hrtime.bigint();
|
|
268
|
+
const shardFuse = buildFuse(shardData);
|
|
269
|
+
shardBuildMs += msSince(buildT0);
|
|
270
|
+
|
|
271
|
+
shardCache.set(entry.key, {
|
|
272
|
+
fuse: shardFuse,
|
|
273
|
+
lastUsedAt: ++cacheTick,
|
|
274
|
+
});
|
|
275
|
+
loadedShardCount += 1;
|
|
276
|
+
evictShardsIfNeeded();
|
|
277
|
+
return shardFuse;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function searchAcrossShards(query, limit) {
|
|
281
|
+
if (!query || !shardManifest?.files?.length) return [];
|
|
282
|
+
const terms = tokenizeQuery(query);
|
|
283
|
+
const entries = shardManifest.files;
|
|
284
|
+
const scores = entries.map((entry) => {
|
|
285
|
+
const hintSet = new Set(Array.isArray(entry.hints) ? entry.hints : []);
|
|
286
|
+
let score = 0;
|
|
287
|
+
for (const term of terms) {
|
|
288
|
+
if (hintSet.has(term)) score += 1;
|
|
289
|
+
}
|
|
290
|
+
return { entry, score };
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
const hasHintMatch = scores.some((item) => item.score > 0);
|
|
294
|
+
const prioritizedEntries = hasHintMatch
|
|
295
|
+
? scores
|
|
296
|
+
.sort((a, b) => b.score - a.score)
|
|
297
|
+
.slice(0, Math.min(shardCandidates, entries.length))
|
|
298
|
+
.map((item) => item.entry)
|
|
299
|
+
: entries;
|
|
300
|
+
|
|
301
|
+
const merged = [];
|
|
302
|
+
const searched = new Set();
|
|
303
|
+
for (const entry of prioritizedEntries) {
|
|
304
|
+
searched.add(entry.key);
|
|
305
|
+
const shardFuse = getShardFuse(entry);
|
|
306
|
+
const shardResults = shardFuse.search(query, { limit });
|
|
307
|
+
if (shardResults.length) merged.push(...shardResults);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (hasHintMatch && merged.length === 0) {
|
|
311
|
+
for (const entry of entries) {
|
|
312
|
+
if (searched.has(entry.key)) continue;
|
|
313
|
+
const shardFuse = getShardFuse(entry);
|
|
314
|
+
const shardResults = shardFuse.search(query, { limit });
|
|
315
|
+
if (shardResults.length) merged.push(...shardResults);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
merged.sort((a, b) => (a.score ?? 1) - (b.score ?? 1));
|
|
319
|
+
return merged.slice(0, limit).map(buildFuseResult);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
function preloadAllShards() {
|
|
323
|
+
if (!shardedMode || !shardManifest?.files?.length) return;
|
|
324
|
+
const t0 = process.hrtime.bigint();
|
|
325
|
+
for (const entry of shardManifest.files) {
|
|
326
|
+
getShardFuse(entry);
|
|
327
|
+
}
|
|
328
|
+
shardPreloadMs = msSince(t0);
|
|
329
|
+
}
|
|
330
|
+
|
|
160
331
|
function loadIndex(force = false) {
|
|
161
332
|
let stats;
|
|
162
333
|
try {
|
|
@@ -190,7 +361,16 @@ function loadIndex(force = false) {
|
|
|
190
361
|
return true;
|
|
191
362
|
}
|
|
192
363
|
|
|
193
|
-
|
|
364
|
+
if (loadShardManifest()) {
|
|
365
|
+
const manifest = shardManifest;
|
|
366
|
+
console.log(`[greg-search] Sharded mode enabled (${manifest.files.length} shards).`);
|
|
367
|
+
maxLoadedShards = configuredMaxLoadedShards ?? manifest.files.length;
|
|
368
|
+
if (preloadShards) {
|
|
369
|
+
preloadAllShards();
|
|
370
|
+
}
|
|
371
|
+
} else {
|
|
372
|
+
loadIndex(true);
|
|
373
|
+
}
|
|
194
374
|
|
|
195
375
|
// ── HTTP server ───────────────────────────────────────────────────────────────
|
|
196
376
|
const server = createServer((req, res) => {
|
|
@@ -225,9 +405,15 @@ const server = createServer((req, res) => {
|
|
|
225
405
|
.map(v => v.trim())
|
|
226
406
|
.filter(Boolean);
|
|
227
407
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
408
|
+
let results = [];
|
|
409
|
+
if (q) {
|
|
410
|
+
if (shardedMode) {
|
|
411
|
+
results = searchAcrossShards(q, limit);
|
|
412
|
+
} else {
|
|
413
|
+
loadIndex(false);
|
|
414
|
+
results = fuse.search(q, { limit }).map(buildFuseResult);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
231
417
|
if (localeRoot && baseRoot) {
|
|
232
418
|
results = results.filter((result) =>
|
|
233
419
|
isPathInLocale(result.id, localeRoot, baseRoot, localeRoots),
|
|
@@ -247,11 +433,24 @@ const server = createServer((req, res) => {
|
|
|
247
433
|
server.listen(port, host, () => {
|
|
248
434
|
console.log(`[greg-search] Listening on http://${host}:${port}${url}`);
|
|
249
435
|
const startupMs = msSince(startupT0);
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
436
|
+
if (shardedMode) {
|
|
437
|
+
console.log(
|
|
438
|
+
`[greg-search] Startup summary: mode=sharded, total=${fmtMs(startupMs)}, ` +
|
|
439
|
+
`docs=${shardManifest.totalDocs ?? 'unknown'}, shards=${shardManifest.files.length}, ` +
|
|
440
|
+
`max-loaded-shards=${maxLoadedShards}, preload=${preloadShards}, shard-candidates=${shardCandidates}`,
|
|
441
|
+
);
|
|
442
|
+
console.log(
|
|
443
|
+
`[greg-search] Runtime shard stats: loaded=${loadedShardCount}, ` +
|
|
444
|
+
`cache-size=${shardCache.size}, preload=${fmtMs(shardPreloadMs)}, ` +
|
|
445
|
+
`load+parse=${fmtMs(shardLoadMs)}, fuse-index=${fmtMs(shardBuildMs)}`,
|
|
446
|
+
);
|
|
447
|
+
} else {
|
|
448
|
+
console.log(
|
|
449
|
+
`[greg-search] Startup summary: load+parse=${fmtMs(loadParseMs)}, ` +
|
|
450
|
+
`fuse-index=${fmtMs(fuseBuildMs)}, total=${fmtMs(startupMs)}, ` +
|
|
451
|
+
`docs=${data.length}, sections=${sectionCount}`,
|
|
452
|
+
);
|
|
453
|
+
}
|
|
255
454
|
console.log(
|
|
256
455
|
`[greg-search] CORS: origin=${corsOrigin}, methods="${corsMethods}", ` +
|
|
257
456
|
`headers="${corsHeaders}", max-age=${corsMaxAge}`,
|
|
@@ -114,9 +114,12 @@ export function vitePluginCopyDocs({ docsDir = 'docs', srcDir = '/docs', staticD
|
|
|
114
114
|
? (url === rootPrefix || url.startsWith(rootPrefix + '/'))
|
|
115
115
|
: url.startsWith('/');
|
|
116
116
|
if (isDocsPath) {
|
|
117
|
-
|
|
117
|
+
let rel;
|
|
118
|
+
try { rel = decodeURIComponent(url.slice(rootPrefix.length).replace(/^\//, '')); }
|
|
119
|
+
catch { next(); return; }
|
|
118
120
|
for (const dir of (Array.isArray(docsDir) ? docsDir : [docsDir])) {
|
|
119
121
|
const filePath = path.resolve(root, dir, rel);
|
|
122
|
+
if (!filePath.startsWith(path.resolve(root, dir) + path.sep)) continue;
|
|
120
123
|
if (fs.existsSync(filePath) && fs.statSync(filePath).isFile()) {
|
|
121
124
|
res.setHeader('Content-Type', getContentType(filePath));
|
|
122
125
|
res.end(fs.readFileSync(filePath));
|
|
@@ -128,7 +131,11 @@ export function vitePluginCopyDocs({ docsDir = 'docs', srcDir = '/docs', staticD
|
|
|
128
131
|
// Extra static dirs (snippets etc.)
|
|
129
132
|
for (const dir of staticDirs) {
|
|
130
133
|
if (url.startsWith('/' + dir + '/') || url === '/' + dir) {
|
|
131
|
-
|
|
134
|
+
let rel;
|
|
135
|
+
try { rel = decodeURIComponent(url.slice(1)); }
|
|
136
|
+
catch { next(); return; }
|
|
137
|
+
const filePath = path.resolve(root, rel);
|
|
138
|
+
if (!filePath.startsWith(path.resolve(root, dir) + path.sep)) continue;
|
|
132
139
|
if (fs.existsSync(filePath) && fs.statSync(filePath).isFile()) {
|
|
133
140
|
res.setHeader('Content-Type', 'text/plain; charset=utf-8');
|
|
134
141
|
res.end(fs.readFileSync(filePath, 'utf8'));
|
|
@@ -1,6 +1,161 @@
|
|
|
1
|
-
import
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import { dirname, resolve } from 'node:path';
|
|
3
|
+
import { once } from 'node:events';
|
|
2
4
|
import { buildSearchIndex, invalidateSearchIndexCache } from './searchIndexBuilder.js';
|
|
3
5
|
|
|
6
|
+
async function writeChunk(target, chunk) {
|
|
7
|
+
if (target.write(chunk)) return;
|
|
8
|
+
await once(target, 'drain');
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async function streamIndexJson(target, index) {
|
|
13
|
+
await writeChunk(target, '[');
|
|
14
|
+
for (let i = 0; i < index.length; i++) {
|
|
15
|
+
if (i > 0) await writeChunk(target, ',');
|
|
16
|
+
await writeChunk(target, JSON.stringify(index[i]));
|
|
17
|
+
}
|
|
18
|
+
await writeChunk(target, ']');
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async function writeIndexToFile(filePath, index) {
|
|
22
|
+
await fs.promises.mkdir(dirname(filePath), { recursive: true });
|
|
23
|
+
const stream = fs.createWriteStream(filePath, { encoding: 'utf8' });
|
|
24
|
+
try {
|
|
25
|
+
await streamIndexJson(stream, index);
|
|
26
|
+
} finally {
|
|
27
|
+
stream.end();
|
|
28
|
+
}
|
|
29
|
+
await once(stream, 'finish');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function writeIndexToResponse(res, index) {
|
|
33
|
+
await streamIndexJson(res, index);
|
|
34
|
+
res.end();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function resolveOutputDir(config) {
|
|
38
|
+
return resolve(config.root, config.build.outDir);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function resolveShardCount() {
|
|
42
|
+
const DEFAULT_SHARDS = 32;
|
|
43
|
+
const raw = process.env.GREG_SEARCH_SHARDS;
|
|
44
|
+
if (raw == null || String(raw).trim() === '') return DEFAULT_SHARDS;
|
|
45
|
+
const normalized = String(raw).trim().toLowerCase();
|
|
46
|
+
if (['0', 'false', 'off', 'no'].includes(normalized)) return 0;
|
|
47
|
+
const parsed = Number.parseInt(String(raw), 10);
|
|
48
|
+
if (!Number.isFinite(parsed) || parsed < 2 || parsed > 512) return DEFAULT_SHARDS;
|
|
49
|
+
return parsed;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function hashRouteId(id) {
|
|
53
|
+
let hash = 5381;
|
|
54
|
+
const text = String(id || '');
|
|
55
|
+
for (let i = 0; i < text.length; i++) {
|
|
56
|
+
hash = ((hash << 5) + hash) ^ text.charCodeAt(i);
|
|
57
|
+
}
|
|
58
|
+
return hash >>> 0;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function padShardNumber(value, width = 3) {
|
|
62
|
+
return String(value).padStart(width, '0');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function formatBytes(bytes) {
|
|
66
|
+
const value = Number(bytes) || 0;
|
|
67
|
+
if (value < 1024) return `${value} B`;
|
|
68
|
+
const units = ['KB', 'MB', 'GB', 'TB'];
|
|
69
|
+
let current = value / 1024;
|
|
70
|
+
let index = 0;
|
|
71
|
+
while (current >= 1024 && index < units.length - 1) {
|
|
72
|
+
current /= 1024;
|
|
73
|
+
index += 1;
|
|
74
|
+
}
|
|
75
|
+
return `${current.toFixed(2)} ${units[index]}`;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function tokenizeHintText(value) {
|
|
79
|
+
return String(value || '')
|
|
80
|
+
.toLowerCase()
|
|
81
|
+
.replace(/[^\p{L}\p{N}\s-]+/gu, ' ')
|
|
82
|
+
.split(/[\s-]+/)
|
|
83
|
+
.map((part) => part.trim())
|
|
84
|
+
.filter((part) => part.length >= 3);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function buildShardHints(docs, maxTerms = 1200) {
|
|
88
|
+
const terms = new Set();
|
|
89
|
+
for (const doc of docs) {
|
|
90
|
+
for (const token of tokenizeHintText(doc?.title)) {
|
|
91
|
+
terms.add(token);
|
|
92
|
+
if (terms.size >= maxTerms) return Array.from(terms);
|
|
93
|
+
}
|
|
94
|
+
for (const section of (doc?.sections || [])) {
|
|
95
|
+
for (const token of tokenizeHintText(section?.heading)) {
|
|
96
|
+
terms.add(token);
|
|
97
|
+
if (terms.size >= maxTerms) return Array.from(terms);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return Array.from(terms);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async function writeShardedIndex(outDir, index, logger) {
|
|
105
|
+
const shardCount = resolveShardCount();
|
|
106
|
+
if (shardCount < 2) {
|
|
107
|
+
if (logger?.info) {
|
|
108
|
+
logger.info('[vite-plugin-search-index] Skipping sharded index generation (GREG_SEARCH_SHARDS disabled).');
|
|
109
|
+
}
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const shardDir = resolve(outDir, 'search-index');
|
|
113
|
+
const shardBuckets = Array.from({ length: shardCount }, () => []);
|
|
114
|
+
|
|
115
|
+
for (const entry of index) {
|
|
116
|
+
const bucket = hashRouteId(entry?.id) % shardCount;
|
|
117
|
+
shardBuckets[bucket].push(entry);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const files = [];
|
|
121
|
+
let writtenDocs = 0;
|
|
122
|
+
let totalBytes = 0;
|
|
123
|
+
for (let i = 0; i < shardBuckets.length; i++) {
|
|
124
|
+
const docs = shardBuckets[i];
|
|
125
|
+
const key = padShardNumber(i);
|
|
126
|
+
const file = `shard-${key}.json`;
|
|
127
|
+
const absFile = resolve(shardDir, file);
|
|
128
|
+
await writeIndexToFile(absFile, docs);
|
|
129
|
+
const size = fs.statSync(absFile).size;
|
|
130
|
+
const hints = buildShardHints(docs);
|
|
131
|
+
files.push({ key, file, docs: docs.length, size, hints });
|
|
132
|
+
writtenDocs += docs.length;
|
|
133
|
+
totalBytes += size;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const manifest = {
|
|
137
|
+
format: 'greg-search-shards-v1',
|
|
138
|
+
createdAt: new Date().toISOString(),
|
|
139
|
+
shardCount,
|
|
140
|
+
totalDocs: writtenDocs,
|
|
141
|
+
totalBytes,
|
|
142
|
+
files,
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
await fs.promises.mkdir(shardDir, { recursive: true });
|
|
146
|
+
await fs.promises.writeFile(
|
|
147
|
+
resolve(shardDir, 'manifest.json'),
|
|
148
|
+
JSON.stringify(manifest, null, 2),
|
|
149
|
+
'utf8',
|
|
150
|
+
);
|
|
151
|
+
|
|
152
|
+
if (logger?.info) {
|
|
153
|
+
logger.info(
|
|
154
|
+
`[vite-plugin-search-index] Wrote sharded search index (${shardCount} shards, ${formatBytes(totalBytes)}).`,
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
4
159
|
/**
|
|
5
160
|
* Vite plugin: serves /search-index.json in dev and emits it as a build asset.
|
|
6
161
|
*
|
|
@@ -13,6 +168,7 @@ import { buildSearchIndex, invalidateSearchIndexCache } from './searchIndexBuild
|
|
|
13
168
|
*/
|
|
14
169
|
export function vitePluginSearchIndex({ docsDir = 'docs', srcDir = '/docs' } = {}) {
|
|
15
170
|
let resolvedDocsDirs;
|
|
171
|
+
let resolvedOutDir;
|
|
16
172
|
|
|
17
173
|
return {
|
|
18
174
|
name: 'vite-plugin-search-index',
|
|
@@ -20,6 +176,7 @@ export function vitePluginSearchIndex({ docsDir = 'docs', srcDir = '/docs' } = {
|
|
|
20
176
|
configResolved(config) {
|
|
21
177
|
const dirs = Array.isArray(docsDir) ? docsDir : [docsDir];
|
|
22
178
|
resolvedDocsDirs = dirs.map(d => resolve(config.root, d));
|
|
179
|
+
resolvedOutDir = resolveOutputDir(config);
|
|
23
180
|
},
|
|
24
181
|
|
|
25
182
|
// Dev-server: answer GET /search-index.json with the cached index
|
|
@@ -37,21 +194,22 @@ export function vitePluginSearchIndex({ docsDir = 'docs', srcDir = '/docs' } = {
|
|
|
37
194
|
'Content-Type': 'application/json; charset=utf-8',
|
|
38
195
|
'Cache-Control': 'no-cache',
|
|
39
196
|
});
|
|
40
|
-
res
|
|
197
|
+
await writeIndexToResponse(res, index);
|
|
41
198
|
} catch (err) {
|
|
42
199
|
next(err);
|
|
43
200
|
}
|
|
44
201
|
});
|
|
45
202
|
},
|
|
46
203
|
|
|
47
|
-
// Production build:
|
|
48
|
-
|
|
204
|
+
// Production build: write full search-index.json directly to outDir.
|
|
205
|
+
// Streaming avoids creating one giant JSON string in memory.
|
|
206
|
+
async closeBundle() {
|
|
49
207
|
const index = await buildSearchIndex(resolvedDocsDirs, srcDir);
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
208
|
+
const outFile = resolve(resolvedOutDir, 'search-index.json');
|
|
209
|
+
await writeIndexToFile(outFile, index);
|
|
210
|
+
const fullSize = fs.statSync(outFile).size;
|
|
211
|
+
this.info(`[vite-plugin-search-index] Wrote search-index.json (${formatBytes(fullSize)}).`);
|
|
212
|
+
await writeShardedIndex(resolvedOutDir, index, this);
|
|
55
213
|
},
|
|
56
214
|
};
|
|
57
215
|
}
|
package/types/index.d.ts
CHANGED
|
@@ -445,6 +445,15 @@ export type GregConfig = {
|
|
|
445
445
|
/** Ignore match position in text. Default: true */
|
|
446
446
|
ignoreLocation?: boolean;
|
|
447
447
|
};
|
|
448
|
+
/** Standalone `greg search-server` performance tuning options. */
|
|
449
|
+
server?: {
|
|
450
|
+
/** Preload all shard indexes on startup. Default: true */
|
|
451
|
+
preloadShards?: boolean;
|
|
452
|
+
/** Max number of shard Fuse indexes kept in memory. */
|
|
453
|
+
maxLoadedShards?: number;
|
|
454
|
+
/** Number of candidate shards searched first per query. Default: 6 */
|
|
455
|
+
shardCandidates?: number;
|
|
456
|
+
};
|
|
448
457
|
/** AI knowledge-base (RAG) configuration. */
|
|
449
458
|
ai?: AiConfig;
|
|
450
459
|
};
|