searchsocket 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +731 -514
- package/dist/cli.js +3335 -492
- package/dist/client.d.cts +1 -1
- package/dist/client.d.ts +1 -1
- package/dist/index.cjs +2378 -475
- package/dist/index.d.cts +113 -40
- package/dist/index.d.ts +113 -40
- package/dist/index.js +2378 -475
- package/dist/{plugin-B_npJSux.d.cts → plugin-C61L-ykY.d.ts} +2 -1
- package/dist/{plugin-M-aW0ev6.d.ts → plugin-DoBW1gkK.d.cts} +2 -1
- package/dist/sveltekit.cjs +2430 -494
- package/dist/sveltekit.d.cts +2 -2
- package/dist/sveltekit.d.ts +2 -2
- package/dist/sveltekit.js +2416 -480
- package/dist/templates/search-dialog/SearchDialog.svelte +175 -0
- package/dist/templates/search-input/SearchInput.svelte +151 -0
- package/dist/templates/search-results/SearchResults.svelte +75 -0
- package/dist/{types-Dk43uz25.d.cts → types-029hl6P2.d.cts} +180 -9
- package/dist/{types-Dk43uz25.d.ts → types-029hl6P2.d.ts} +180 -9
- package/package.json +28 -11
- package/src/svelte/SearchSocket.svelte +35 -0
- package/src/svelte/index.svelte.ts +181 -0
package/dist/cli.js
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/cli.ts
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import
|
|
4
|
+
import fs11 from "fs";
|
|
5
|
+
import fsp2 from "fs/promises";
|
|
6
|
+
import path15 from "path";
|
|
7
7
|
import { execSync as execSync2 } from "child_process";
|
|
8
8
|
import { config as dotenvConfig } from "dotenv";
|
|
9
9
|
import chokidar from "chokidar";
|
|
10
|
-
import { Command } from "commander";
|
|
10
|
+
import { Command, Option } from "commander";
|
|
11
11
|
|
|
12
12
|
// package.json
|
|
13
13
|
var package_default = {
|
|
14
14
|
name: "searchsocket",
|
|
15
|
-
version: "0.
|
|
15
|
+
version: "0.6.1",
|
|
16
16
|
description: "Semantic site search and MCP retrieval for SvelteKit static sites",
|
|
17
17
|
license: "MIT",
|
|
18
18
|
author: "Greg Priday <greg@siteorigin.com>",
|
|
@@ -38,6 +38,7 @@ var package_default = {
|
|
|
38
38
|
files: [
|
|
39
39
|
"dist",
|
|
40
40
|
"!dist/**/*.map",
|
|
41
|
+
"src/svelte",
|
|
41
42
|
"README.md"
|
|
42
43
|
],
|
|
43
44
|
bin: {
|
|
@@ -63,6 +64,19 @@ var package_default = {
|
|
|
63
64
|
types: "./dist/scroll.d.ts",
|
|
64
65
|
import: "./dist/scroll.js",
|
|
65
66
|
require: "./dist/scroll.cjs"
|
|
67
|
+
},
|
|
68
|
+
"./svelte": {
|
|
69
|
+
types: "./src/svelte/index.svelte.ts",
|
|
70
|
+
svelte: "./src/svelte/index.svelte.ts",
|
|
71
|
+
default: "./src/svelte/index.svelte.ts"
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
peerDependencies: {
|
|
75
|
+
svelte: "^5.0.0"
|
|
76
|
+
},
|
|
77
|
+
peerDependenciesMeta: {
|
|
78
|
+
svelte: {
|
|
79
|
+
optional: true
|
|
66
80
|
}
|
|
67
81
|
},
|
|
68
82
|
scripts: {
|
|
@@ -78,8 +92,9 @@ var package_default = {
|
|
|
78
92
|
},
|
|
79
93
|
packageManager: "pnpm@10.29.2",
|
|
80
94
|
dependencies: {
|
|
95
|
+
"@clack/prompts": "^1.2.0",
|
|
81
96
|
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
82
|
-
"@upstash/
|
|
97
|
+
"@upstash/vector": "^1.2.3",
|
|
83
98
|
cheerio: "^1.2.0",
|
|
84
99
|
chokidar: "^5.0.0",
|
|
85
100
|
commander: "^14.0.3",
|
|
@@ -88,16 +103,19 @@ var package_default = {
|
|
|
88
103
|
"fast-glob": "^3.3.3",
|
|
89
104
|
"gray-matter": "^4.0.3",
|
|
90
105
|
jiti: "^2.6.1",
|
|
106
|
+
magicast: "^0.5.2",
|
|
91
107
|
"p-limit": "^7.3.0",
|
|
92
108
|
turndown: "^7.2.2",
|
|
93
109
|
"turndown-plugin-gfm": "^1.0.2",
|
|
94
110
|
zod: "^4.3.6"
|
|
95
111
|
},
|
|
96
112
|
devDependencies: {
|
|
113
|
+
"@sveltejs/vite-plugin-svelte": "^6.2.4",
|
|
97
114
|
"@types/express": "^5.0.6",
|
|
98
115
|
"@types/node": "^25.2.2",
|
|
99
116
|
"@types/turndown": "^5.0.6",
|
|
100
117
|
jsdom: "^28.1.0",
|
|
118
|
+
svelte: "^5.55.1",
|
|
101
119
|
tsup: "^8.5.1",
|
|
102
120
|
typescript: "^5.9.3",
|
|
103
121
|
vitest: "^4.0.18"
|
|
@@ -154,6 +172,7 @@ var searchSocketConfigSchema = z.object({
|
|
|
154
172
|
dropSelectors: z.array(z.string()).optional(),
|
|
155
173
|
ignoreAttr: z.string().optional(),
|
|
156
174
|
noindexAttr: z.string().optional(),
|
|
175
|
+
imageDescAttr: z.string().optional(),
|
|
157
176
|
respectRobotsNoindex: z.boolean().optional()
|
|
158
177
|
}).optional(),
|
|
159
178
|
transform: z.object({
|
|
@@ -169,35 +188,48 @@ var searchSocketConfigSchema = z.object({
|
|
|
169
188
|
headingPathDepth: z.number().int().positive().optional(),
|
|
170
189
|
dontSplitInside: z.array(z.enum(["code", "table", "blockquote"])).optional(),
|
|
171
190
|
prependTitle: z.boolean().optional(),
|
|
172
|
-
pageSummaryChunk: z.boolean().optional()
|
|
191
|
+
pageSummaryChunk: z.boolean().optional(),
|
|
192
|
+
weightHeadings: z.boolean().optional()
|
|
173
193
|
}).optional(),
|
|
174
194
|
upstash: z.object({
|
|
175
195
|
url: z.string().url().optional(),
|
|
176
196
|
token: z.string().min(1).optional(),
|
|
177
197
|
urlEnv: z.string().min(1).optional(),
|
|
178
|
-
tokenEnv: z.string().min(1).optional()
|
|
198
|
+
tokenEnv: z.string().min(1).optional(),
|
|
199
|
+
namespaces: z.object({
|
|
200
|
+
pages: z.string().min(1).optional(),
|
|
201
|
+
chunks: z.string().min(1).optional()
|
|
202
|
+
}).optional()
|
|
203
|
+
}).optional(),
|
|
204
|
+
embedding: z.object({
|
|
205
|
+
model: z.string().optional(),
|
|
206
|
+
dimensions: z.number().int().positive().optional(),
|
|
207
|
+
taskType: z.string().optional(),
|
|
208
|
+
batchSize: z.number().int().positive().optional()
|
|
179
209
|
}).optional(),
|
|
180
210
|
search: z.object({
|
|
181
|
-
semanticWeight: z.number().min(0).max(1).optional(),
|
|
182
|
-
inputEnrichment: z.boolean().optional(),
|
|
183
|
-
reranking: z.boolean().optional(),
|
|
184
211
|
dualSearch: z.boolean().optional(),
|
|
185
212
|
pageSearchWeight: z.number().min(0).max(1).optional()
|
|
186
213
|
}).optional(),
|
|
187
214
|
ranking: z.object({
|
|
188
215
|
enableIncomingLinkBoost: z.boolean().optional(),
|
|
189
216
|
enableDepthBoost: z.boolean().optional(),
|
|
217
|
+
enableFreshnessBoost: z.boolean().optional(),
|
|
218
|
+
freshnessDecayRate: z.number().positive().optional(),
|
|
219
|
+
enableAnchorTextBoost: z.boolean().optional(),
|
|
190
220
|
pageWeights: z.record(z.string(), z.number().min(0)).optional(),
|
|
191
221
|
aggregationCap: z.number().int().positive().optional(),
|
|
192
222
|
aggregationDecay: z.number().min(0).max(1).optional(),
|
|
193
223
|
minChunkScoreRatio: z.number().min(0).max(1).optional(),
|
|
194
|
-
|
|
224
|
+
minScoreRatio: z.number().min(0).max(1).optional(),
|
|
195
225
|
scoreGapThreshold: z.number().min(0).max(1).optional(),
|
|
196
226
|
weights: z.object({
|
|
197
227
|
incomingLinks: z.number().optional(),
|
|
198
228
|
depth: z.number().optional(),
|
|
199
229
|
aggregation: z.number().optional(),
|
|
200
|
-
titleMatch: z.number().optional()
|
|
230
|
+
titleMatch: z.number().optional(),
|
|
231
|
+
freshness: z.number().optional(),
|
|
232
|
+
anchorText: z.number().optional()
|
|
201
233
|
}).optional()
|
|
202
234
|
}).optional(),
|
|
203
235
|
api: z.object({
|
|
@@ -212,12 +244,28 @@ var searchSocketConfigSchema = z.object({
|
|
|
212
244
|
}).optional(),
|
|
213
245
|
mcp: z.object({
|
|
214
246
|
enable: z.boolean().optional(),
|
|
247
|
+
access: z.enum(["public", "private"]).optional(),
|
|
215
248
|
transport: z.enum(["stdio", "http"]).optional(),
|
|
216
249
|
http: z.object({
|
|
217
250
|
port: z.number().int().positive().optional(),
|
|
218
|
-
path: z.string().optional()
|
|
251
|
+
path: z.string().optional(),
|
|
252
|
+
apiKey: z.string().min(1).optional(),
|
|
253
|
+
apiKeyEnv: z.string().min(1).optional()
|
|
254
|
+
}).optional(),
|
|
255
|
+
handle: z.object({
|
|
256
|
+
path: z.string().optional(),
|
|
257
|
+
apiKey: z.string().min(1).optional(),
|
|
258
|
+
enableJsonResponse: z.boolean().optional()
|
|
219
259
|
}).optional()
|
|
220
260
|
}).optional(),
|
|
261
|
+
llmsTxt: z.object({
|
|
262
|
+
enable: z.boolean().optional(),
|
|
263
|
+
outputPath: z.string().optional(),
|
|
264
|
+
title: z.string().optional(),
|
|
265
|
+
description: z.string().optional(),
|
|
266
|
+
generateFull: z.boolean().optional(),
|
|
267
|
+
serveMarkdownVariants: z.boolean().optional()
|
|
268
|
+
}).optional(),
|
|
221
269
|
state: z.object({
|
|
222
270
|
dir: z.string().optional()
|
|
223
271
|
}).optional()
|
|
@@ -256,6 +304,7 @@ function createDefaultConfig(projectId) {
|
|
|
256
304
|
dropSelectors: DEFAULT_DROP_SELECTORS,
|
|
257
305
|
ignoreAttr: "data-search-ignore",
|
|
258
306
|
noindexAttr: "data-search-noindex",
|
|
307
|
+
imageDescAttr: "data-search-description",
|
|
259
308
|
respectRobotsNoindex: true
|
|
260
309
|
},
|
|
261
310
|
transform: {
|
|
@@ -265,39 +314,52 @@ function createDefaultConfig(projectId) {
|
|
|
265
314
|
},
|
|
266
315
|
chunking: {
|
|
267
316
|
strategy: "hybrid",
|
|
268
|
-
maxChars:
|
|
317
|
+
maxChars: 1500,
|
|
269
318
|
overlapChars: 200,
|
|
270
319
|
minChars: 250,
|
|
271
320
|
headingPathDepth: 3,
|
|
272
321
|
dontSplitInside: ["code", "table", "blockquote"],
|
|
273
322
|
prependTitle: true,
|
|
274
|
-
pageSummaryChunk: true
|
|
323
|
+
pageSummaryChunk: true,
|
|
324
|
+
weightHeadings: true
|
|
275
325
|
},
|
|
276
326
|
upstash: {
|
|
277
|
-
urlEnv: "
|
|
278
|
-
tokenEnv: "
|
|
327
|
+
urlEnv: "UPSTASH_VECTOR_REST_URL",
|
|
328
|
+
tokenEnv: "UPSTASH_VECTOR_REST_TOKEN",
|
|
329
|
+
namespaces: {
|
|
330
|
+
pages: "pages",
|
|
331
|
+
chunks: "chunks"
|
|
332
|
+
}
|
|
333
|
+
},
|
|
334
|
+
embedding: {
|
|
335
|
+
model: "bge-large-en-v1.5",
|
|
336
|
+
dimensions: 1024,
|
|
337
|
+
taskType: "RETRIEVAL_DOCUMENT",
|
|
338
|
+
batchSize: 100
|
|
279
339
|
},
|
|
280
340
|
search: {
|
|
281
|
-
semanticWeight: 0.75,
|
|
282
|
-
inputEnrichment: true,
|
|
283
|
-
reranking: true,
|
|
284
341
|
dualSearch: true,
|
|
285
342
|
pageSearchWeight: 0.3
|
|
286
343
|
},
|
|
287
344
|
ranking: {
|
|
288
345
|
enableIncomingLinkBoost: true,
|
|
289
346
|
enableDepthBoost: true,
|
|
347
|
+
enableFreshnessBoost: false,
|
|
348
|
+
freshnessDecayRate: 1e-3,
|
|
349
|
+
enableAnchorTextBoost: false,
|
|
290
350
|
pageWeights: {},
|
|
291
351
|
aggregationCap: 5,
|
|
292
352
|
aggregationDecay: 0.5,
|
|
293
353
|
minChunkScoreRatio: 0.5,
|
|
294
|
-
|
|
354
|
+
minScoreRatio: 0.7,
|
|
295
355
|
scoreGapThreshold: 0.4,
|
|
296
356
|
weights: {
|
|
297
357
|
incomingLinks: 0.05,
|
|
298
358
|
depth: 0.03,
|
|
299
359
|
aggregation: 0.1,
|
|
300
|
-
titleMatch: 0.15
|
|
360
|
+
titleMatch: 0.15,
|
|
361
|
+
freshness: 0.1,
|
|
362
|
+
anchorText: 0.1
|
|
301
363
|
}
|
|
302
364
|
},
|
|
303
365
|
api: {
|
|
@@ -308,12 +370,23 @@ function createDefaultConfig(projectId) {
|
|
|
308
370
|
},
|
|
309
371
|
mcp: {
|
|
310
372
|
enable: process.env.NODE_ENV !== "production",
|
|
373
|
+
access: "private",
|
|
311
374
|
transport: "stdio",
|
|
312
375
|
http: {
|
|
313
376
|
port: 3338,
|
|
314
377
|
path: "/mcp"
|
|
378
|
+
},
|
|
379
|
+
handle: {
|
|
380
|
+
path: "/api/mcp",
|
|
381
|
+
enableJsonResponse: true
|
|
315
382
|
}
|
|
316
383
|
},
|
|
384
|
+
llmsTxt: {
|
|
385
|
+
enable: false,
|
|
386
|
+
outputPath: "static/llms.txt",
|
|
387
|
+
generateFull: true,
|
|
388
|
+
serveMarkdownVariants: false
|
|
389
|
+
},
|
|
317
390
|
state: {
|
|
318
391
|
dir: ".searchsocket"
|
|
319
392
|
}
|
|
@@ -425,7 +498,15 @@ ${issues}`
|
|
|
425
498
|
},
|
|
426
499
|
upstash: {
|
|
427
500
|
...defaults.upstash,
|
|
428
|
-
...parsed.upstash
|
|
501
|
+
...parsed.upstash,
|
|
502
|
+
namespaces: {
|
|
503
|
+
...defaults.upstash.namespaces,
|
|
504
|
+
...parsed.upstash?.namespaces
|
|
505
|
+
}
|
|
506
|
+
},
|
|
507
|
+
embedding: {
|
|
508
|
+
...defaults.embedding,
|
|
509
|
+
...parsed.embedding
|
|
429
510
|
},
|
|
430
511
|
search: {
|
|
431
512
|
...defaults.search,
|
|
@@ -462,8 +543,16 @@ ${issues}`
|
|
|
462
543
|
http: {
|
|
463
544
|
...defaults.mcp.http,
|
|
464
545
|
...parsed.mcp?.http
|
|
546
|
+
},
|
|
547
|
+
handle: {
|
|
548
|
+
...defaults.mcp.handle,
|
|
549
|
+
...parsed.mcp?.handle
|
|
465
550
|
}
|
|
466
551
|
},
|
|
552
|
+
llmsTxt: {
|
|
553
|
+
...defaults.llmsTxt,
|
|
554
|
+
...parsed.llmsTxt
|
|
555
|
+
},
|
|
467
556
|
state: {
|
|
468
557
|
...defaults.state,
|
|
469
558
|
...parsed.state
|
|
@@ -483,6 +572,15 @@ ${issues}`
|
|
|
483
572
|
maxDepth: 10
|
|
484
573
|
};
|
|
485
574
|
}
|
|
575
|
+
if (merged.mcp.access === "public") {
|
|
576
|
+
const resolvedKey = merged.mcp.http.apiKey ?? (merged.mcp.http.apiKeyEnv ? process.env[merged.mcp.http.apiKeyEnv] : void 0);
|
|
577
|
+
if (!resolvedKey) {
|
|
578
|
+
throw new SearchSocketError(
|
|
579
|
+
"CONFIG_MISSING",
|
|
580
|
+
'`mcp.access` is "public" but no API key is configured. Set `mcp.http.apiKey` or `mcp.http.apiKeyEnv`.'
|
|
581
|
+
);
|
|
582
|
+
}
|
|
583
|
+
}
|
|
486
584
|
if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
|
|
487
585
|
throw new SearchSocketError("CONFIG_MISSING", "`source.crawl.baseUrl` is required when source.mode is crawl.");
|
|
488
586
|
}
|
|
@@ -521,8 +619,8 @@ function writeMinimalConfig(cwd) {
|
|
|
521
619
|
return target;
|
|
522
620
|
}
|
|
523
621
|
const content = `export default {
|
|
524
|
-
// Upstash
|
|
525
|
-
// upstash: { urlEnv: "
|
|
622
|
+
// Upstash Vector credentials (set via env vars or directly here)
|
|
623
|
+
// upstash: { urlEnv: "UPSTASH_VECTOR_REST_URL", tokenEnv: "UPSTASH_VECTOR_REST_TOKEN" }
|
|
526
624
|
};
|
|
527
625
|
`;
|
|
528
626
|
fs.writeFileSync(target, content, "utf8");
|
|
@@ -586,11 +684,11 @@ var Logger = class {
|
|
|
586
684
|
this.writeOut(` [${event}] ${data ? JSON.stringify(data) : ""}
|
|
587
685
|
`);
|
|
588
686
|
}
|
|
589
|
-
writeOut(
|
|
687
|
+
writeOut(text2) {
|
|
590
688
|
if (this.stderrOnly) {
|
|
591
|
-
process.stderr.write(
|
|
689
|
+
process.stderr.write(text2);
|
|
592
690
|
} else {
|
|
593
|
-
process.stdout.write(
|
|
691
|
+
process.stdout.write(text2);
|
|
594
692
|
}
|
|
595
693
|
}
|
|
596
694
|
logJson(event, data) {
|
|
@@ -617,13 +715,84 @@ function normalizeMarkdown(input) {
|
|
|
617
715
|
function sanitizeScopeName(scopeName) {
|
|
618
716
|
return scopeName.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
|
|
619
717
|
}
|
|
718
|
+
function markdownToPlain(markdown) {
|
|
719
|
+
return markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
|
|
720
|
+
}
|
|
620
721
|
function toSnippet(markdown, maxLen = 220) {
|
|
621
|
-
const plain = markdown
|
|
722
|
+
const plain = markdownToPlain(markdown);
|
|
622
723
|
if (plain.length <= maxLen) {
|
|
623
724
|
return plain;
|
|
624
725
|
}
|
|
625
726
|
return `${plain.slice(0, Math.max(0, maxLen - 1)).trim()}\u2026`;
|
|
626
727
|
}
|
|
728
|
+
function queryAwareExcerpt(markdown, query, maxLen = 220) {
|
|
729
|
+
const plain = markdownToPlain(markdown);
|
|
730
|
+
if (plain.length <= maxLen) return plain;
|
|
731
|
+
const tokens = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
|
|
732
|
+
if (tokens.length === 0) return toSnippet(markdown, maxLen);
|
|
733
|
+
const positions = [];
|
|
734
|
+
for (let ti = 0; ti < tokens.length; ti++) {
|
|
735
|
+
const escaped = tokens[ti].replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
736
|
+
const re = new RegExp(escaped, "gi");
|
|
737
|
+
let m;
|
|
738
|
+
while ((m = re.exec(plain)) !== null) {
|
|
739
|
+
positions.push({ start: m.index, end: m.index + m[0].length, tokenIdx: ti });
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
if (positions.length === 0) return toSnippet(markdown, maxLen);
|
|
743
|
+
positions.sort((a, b) => a.start - b.start);
|
|
744
|
+
let bestUniqueCount = 0;
|
|
745
|
+
let bestTotalCount = 0;
|
|
746
|
+
let bestLeft = 0;
|
|
747
|
+
let bestRight = 0;
|
|
748
|
+
let left = 0;
|
|
749
|
+
const tokenCounts = /* @__PURE__ */ new Map();
|
|
750
|
+
for (let right = 0; right < positions.length; right++) {
|
|
751
|
+
tokenCounts.set(positions[right].tokenIdx, (tokenCounts.get(positions[right].tokenIdx) ?? 0) + 1);
|
|
752
|
+
while (positions[right].end - positions[left].start > maxLen && left < right) {
|
|
753
|
+
const leftToken = positions[left].tokenIdx;
|
|
754
|
+
const cnt = tokenCounts.get(leftToken) - 1;
|
|
755
|
+
if (cnt === 0) tokenCounts.delete(leftToken);
|
|
756
|
+
else tokenCounts.set(leftToken, cnt);
|
|
757
|
+
left++;
|
|
758
|
+
}
|
|
759
|
+
const uniqueCount = tokenCounts.size;
|
|
760
|
+
const totalCount = right - left + 1;
|
|
761
|
+
if (uniqueCount > bestUniqueCount || uniqueCount === bestUniqueCount && totalCount > bestTotalCount) {
|
|
762
|
+
bestUniqueCount = uniqueCount;
|
|
763
|
+
bestTotalCount = totalCount;
|
|
764
|
+
bestLeft = left;
|
|
765
|
+
bestRight = right;
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
const mid = Math.floor((positions[bestLeft].start + positions[bestRight].end) / 2);
|
|
769
|
+
let start = Math.max(0, mid - Math.floor(maxLen / 2));
|
|
770
|
+
let end = Math.min(plain.length, start + maxLen);
|
|
771
|
+
start = Math.max(0, end - maxLen);
|
|
772
|
+
if (start > 0) {
|
|
773
|
+
const spaceIdx = plain.lastIndexOf(" ", start);
|
|
774
|
+
if (spaceIdx > start - 30) {
|
|
775
|
+
start = spaceIdx + 1;
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
if (end < plain.length) {
|
|
779
|
+
const spaceIdx = plain.indexOf(" ", end);
|
|
780
|
+
if (spaceIdx !== -1 && spaceIdx < end + 30) {
|
|
781
|
+
end = spaceIdx;
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
let excerpt = plain.slice(start, end);
|
|
785
|
+
if (excerpt.length > Math.ceil(maxLen * 1.2)) {
|
|
786
|
+
excerpt = excerpt.slice(0, maxLen);
|
|
787
|
+
const lastSpace = excerpt.lastIndexOf(" ");
|
|
788
|
+
if (lastSpace > maxLen * 0.5) {
|
|
789
|
+
excerpt = excerpt.slice(0, lastSpace);
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
const prefix = start > 0 ? "\u2026" : "";
|
|
793
|
+
const suffix = end < plain.length ? "\u2026" : "";
|
|
794
|
+
return `${prefix}${excerpt}${suffix}`;
|
|
795
|
+
}
|
|
627
796
|
function extractFirstParagraph(markdown) {
|
|
628
797
|
const lines = markdown.split("\n");
|
|
629
798
|
let inFence = false;
|
|
@@ -690,163 +859,346 @@ function ensureStateDirs(cwd, stateDir, scope) {
|
|
|
690
859
|
}
|
|
691
860
|
|
|
692
861
|
// src/indexing/pipeline.ts
|
|
693
|
-
import
|
|
862
|
+
import path11 from "path";
|
|
694
863
|
|
|
695
864
|
// src/vector/upstash.ts
|
|
696
|
-
|
|
697
|
-
return `${scope.projectId}--${scope.scopeName}`;
|
|
698
|
-
}
|
|
699
|
-
function pageIndexName(scope) {
|
|
700
|
-
return `${scope.projectId}--${scope.scopeName}--pages`;
|
|
701
|
-
}
|
|
865
|
+
import { QueryMode, FusionAlgorithm } from "@upstash/vector";
|
|
702
866
|
var UpstashSearchStore = class {
|
|
703
|
-
|
|
867
|
+
index;
|
|
868
|
+
pagesNs;
|
|
869
|
+
chunksNs;
|
|
704
870
|
constructor(opts) {
|
|
705
|
-
this.
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
return this.client.index(chunkIndexName(scope));
|
|
709
|
-
}
|
|
710
|
-
pageIndex(scope) {
|
|
711
|
-
return this.client.index(pageIndexName(scope));
|
|
871
|
+
this.index = opts.index;
|
|
872
|
+
this.pagesNs = opts.index.namespace(opts.pagesNamespace);
|
|
873
|
+
this.chunksNs = opts.index.namespace(opts.chunksNamespace);
|
|
712
874
|
}
|
|
713
875
|
async upsertChunks(chunks, scope) {
|
|
714
876
|
if (chunks.length === 0) return;
|
|
715
|
-
const
|
|
716
|
-
const BATCH_SIZE = 100;
|
|
877
|
+
const BATCH_SIZE = 90;
|
|
717
878
|
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
718
879
|
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
719
|
-
await
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
880
|
+
await this.chunksNs.upsert(
|
|
881
|
+
batch.map((c) => ({
|
|
882
|
+
id: c.id,
|
|
883
|
+
data: c.data,
|
|
884
|
+
metadata: {
|
|
885
|
+
...c.metadata,
|
|
886
|
+
projectId: scope.projectId,
|
|
887
|
+
scopeName: scope.scopeName,
|
|
888
|
+
type: c.metadata.type || "chunk"
|
|
889
|
+
}
|
|
890
|
+
}))
|
|
891
|
+
);
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
async search(data, opts, scope) {
|
|
895
|
+
const filterParts = [
|
|
896
|
+
`projectId = '${scope.projectId}'`,
|
|
897
|
+
`scopeName = '${scope.scopeName}'`
|
|
898
|
+
];
|
|
899
|
+
if (opts.filter) {
|
|
900
|
+
filterParts.push(opts.filter);
|
|
901
|
+
}
|
|
902
|
+
const results = await this.chunksNs.query({
|
|
903
|
+
data,
|
|
904
|
+
topK: opts.limit,
|
|
905
|
+
includeMetadata: true,
|
|
906
|
+
filter: filterParts.join(" AND "),
|
|
907
|
+
queryMode: QueryMode.HYBRID,
|
|
908
|
+
fusionAlgorithm: FusionAlgorithm.DBSF
|
|
909
|
+
});
|
|
910
|
+
return results.map((doc) => ({
|
|
911
|
+
id: String(doc.id),
|
|
912
|
+
score: doc.score,
|
|
913
|
+
metadata: {
|
|
914
|
+
projectId: doc.metadata?.projectId ?? "",
|
|
915
|
+
scopeName: doc.metadata?.scopeName ?? "",
|
|
916
|
+
url: doc.metadata?.url ?? "",
|
|
917
|
+
path: doc.metadata?.path ?? "",
|
|
918
|
+
title: doc.metadata?.title ?? "",
|
|
919
|
+
sectionTitle: doc.metadata?.sectionTitle ?? "",
|
|
920
|
+
headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
|
|
921
|
+
snippet: doc.metadata?.snippet ?? "",
|
|
922
|
+
chunkText: doc.metadata?.chunkText ?? "",
|
|
923
|
+
ordinal: doc.metadata?.ordinal ?? 0,
|
|
924
|
+
contentHash: doc.metadata?.contentHash ?? "",
|
|
925
|
+
depth: doc.metadata?.depth ?? 0,
|
|
926
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
927
|
+
routeFile: doc.metadata?.routeFile ?? "",
|
|
928
|
+
tags: doc.metadata?.tags ?? [],
|
|
929
|
+
description: doc.metadata?.description || void 0,
|
|
930
|
+
keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
|
|
931
|
+
publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
|
|
932
|
+
incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
|
|
933
|
+
}
|
|
934
|
+
}));
|
|
935
|
+
}
|
|
936
|
+
async searchChunksByUrl(data, url, opts, scope) {
|
|
937
|
+
const filterParts = [
|
|
938
|
+
`projectId = '${scope.projectId}'`,
|
|
939
|
+
`scopeName = '${scope.scopeName}'`,
|
|
940
|
+
`url = '${url}'`
|
|
941
|
+
];
|
|
942
|
+
if (opts.filter) {
|
|
943
|
+
filterParts.push(opts.filter);
|
|
944
|
+
}
|
|
945
|
+
const results = await this.chunksNs.query({
|
|
946
|
+
data,
|
|
947
|
+
topK: opts.limit,
|
|
948
|
+
includeMetadata: true,
|
|
949
|
+
filter: filterParts.join(" AND "),
|
|
950
|
+
queryMode: QueryMode.HYBRID,
|
|
951
|
+
fusionAlgorithm: FusionAlgorithm.DBSF
|
|
731
952
|
});
|
|
732
953
|
return results.map((doc) => ({
|
|
733
|
-
id: doc.id,
|
|
954
|
+
id: String(doc.id),
|
|
734
955
|
score: doc.score,
|
|
735
956
|
metadata: {
|
|
736
957
|
projectId: doc.metadata?.projectId ?? "",
|
|
737
958
|
scopeName: doc.metadata?.scopeName ?? "",
|
|
738
|
-
url: doc.
|
|
959
|
+
url: doc.metadata?.url ?? "",
|
|
739
960
|
path: doc.metadata?.path ?? "",
|
|
740
|
-
title: doc.
|
|
741
|
-
sectionTitle: doc.
|
|
742
|
-
headingPath: doc.
|
|
961
|
+
title: doc.metadata?.title ?? "",
|
|
962
|
+
sectionTitle: doc.metadata?.sectionTitle ?? "",
|
|
963
|
+
headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
|
|
743
964
|
snippet: doc.metadata?.snippet ?? "",
|
|
744
|
-
chunkText: doc.
|
|
965
|
+
chunkText: doc.metadata?.chunkText ?? "",
|
|
745
966
|
ordinal: doc.metadata?.ordinal ?? 0,
|
|
746
967
|
contentHash: doc.metadata?.contentHash ?? "",
|
|
747
968
|
depth: doc.metadata?.depth ?? 0,
|
|
748
969
|
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
749
970
|
routeFile: doc.metadata?.routeFile ?? "",
|
|
750
|
-
tags: doc.
|
|
971
|
+
tags: doc.metadata?.tags ?? [],
|
|
751
972
|
description: doc.metadata?.description || void 0,
|
|
752
|
-
keywords: doc.metadata?.keywords ? doc.metadata.keywords
|
|
973
|
+
keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
|
|
974
|
+
publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
|
|
975
|
+
incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
|
|
753
976
|
}
|
|
754
977
|
}));
|
|
755
978
|
}
|
|
756
|
-
async
|
|
757
|
-
|
|
979
|
+
async searchPagesByText(data, opts, scope) {
|
|
980
|
+
return this.queryPages({ data }, opts, scope);
|
|
981
|
+
}
|
|
982
|
+
async searchPagesByVector(vector, opts, scope) {
|
|
983
|
+
return this.queryPages({ vector }, opts, scope);
|
|
984
|
+
}
|
|
985
|
+
async queryPages(input, opts, scope) {
|
|
986
|
+
const filterParts = [
|
|
987
|
+
`projectId = '${scope.projectId}'`,
|
|
988
|
+
`scopeName = '${scope.scopeName}'`
|
|
989
|
+
];
|
|
990
|
+
if (opts.filter) {
|
|
991
|
+
filterParts.push(opts.filter);
|
|
992
|
+
}
|
|
758
993
|
let results;
|
|
759
994
|
try {
|
|
760
|
-
results = await
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
995
|
+
results = await this.pagesNs.query({
|
|
996
|
+
...input,
|
|
997
|
+
topK: opts.limit,
|
|
998
|
+
includeMetadata: true,
|
|
999
|
+
filter: filterParts.join(" AND "),
|
|
1000
|
+
queryMode: QueryMode.HYBRID,
|
|
1001
|
+
fusionAlgorithm: FusionAlgorithm.DBSF
|
|
767
1002
|
});
|
|
768
1003
|
} catch {
|
|
769
1004
|
return [];
|
|
770
1005
|
}
|
|
771
1006
|
return results.map((doc) => ({
|
|
772
|
-
id: doc.id,
|
|
1007
|
+
id: String(doc.id),
|
|
773
1008
|
score: doc.score,
|
|
774
|
-
title: doc.
|
|
775
|
-
url: doc.
|
|
776
|
-
description: doc.
|
|
777
|
-
tags: doc.
|
|
1009
|
+
title: doc.metadata?.title ?? "",
|
|
1010
|
+
url: doc.metadata?.url ?? "",
|
|
1011
|
+
description: doc.metadata?.description ?? "",
|
|
1012
|
+
tags: doc.metadata?.tags ?? [],
|
|
778
1013
|
depth: doc.metadata?.depth ?? 0,
|
|
779
1014
|
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
780
|
-
routeFile: doc.metadata?.routeFile ?? ""
|
|
1015
|
+
routeFile: doc.metadata?.routeFile ?? "",
|
|
1016
|
+
publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0
|
|
781
1017
|
}));
|
|
782
1018
|
}
|
|
783
|
-
async deleteByIds(ids,
|
|
1019
|
+
async deleteByIds(ids, _scope) {
|
|
784
1020
|
if (ids.length === 0) return;
|
|
785
|
-
const
|
|
786
|
-
const BATCH_SIZE = 500;
|
|
1021
|
+
const BATCH_SIZE = 90;
|
|
787
1022
|
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
788
1023
|
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
789
|
-
await
|
|
1024
|
+
await this.chunksNs.delete(batch);
|
|
790
1025
|
}
|
|
791
1026
|
}
|
|
792
1027
|
async deleteScope(scope) {
|
|
793
|
-
|
|
794
|
-
const
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
1028
|
+
for (const ns of [this.chunksNs, this.pagesNs]) {
|
|
1029
|
+
const ids = [];
|
|
1030
|
+
let cursor = "0";
|
|
1031
|
+
try {
|
|
1032
|
+
for (; ; ) {
|
|
1033
|
+
const result = await ns.range({
|
|
1034
|
+
cursor,
|
|
1035
|
+
limit: 100,
|
|
1036
|
+
includeMetadata: true
|
|
1037
|
+
});
|
|
1038
|
+
for (const doc of result.vectors) {
|
|
1039
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
|
|
1040
|
+
ids.push(String(doc.id));
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1044
|
+
cursor = result.nextCursor;
|
|
1045
|
+
}
|
|
1046
|
+
} catch {
|
|
1047
|
+
}
|
|
1048
|
+
if (ids.length > 0) {
|
|
1049
|
+
const BATCH_SIZE = 90;
|
|
1050
|
+
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
1051
|
+
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
1052
|
+
await ns.delete(batch);
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
802
1055
|
}
|
|
803
1056
|
}
|
|
804
1057
|
async listScopes(projectId) {
|
|
805
|
-
const
|
|
806
|
-
const
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
1058
|
+
const scopeMap = /* @__PURE__ */ new Map();
|
|
1059
|
+
for (const ns of [this.chunksNs, this.pagesNs]) {
|
|
1060
|
+
let cursor = "0";
|
|
1061
|
+
try {
|
|
1062
|
+
for (; ; ) {
|
|
1063
|
+
const result = await ns.range({
|
|
1064
|
+
cursor,
|
|
1065
|
+
limit: 100,
|
|
1066
|
+
includeMetadata: true
|
|
1067
|
+
});
|
|
1068
|
+
for (const doc of result.vectors) {
|
|
1069
|
+
if (doc.metadata?.projectId === projectId) {
|
|
1070
|
+
const scopeName = doc.metadata.scopeName ?? "";
|
|
1071
|
+
scopeMap.set(scopeName, (scopeMap.get(scopeName) ?? 0) + 1);
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1075
|
+
cursor = result.nextCursor;
|
|
1076
|
+
}
|
|
1077
|
+
} catch {
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
return [...scopeMap.entries()].map(([scopeName, count]) => ({
|
|
1081
|
+
projectId,
|
|
1082
|
+
scopeName,
|
|
1083
|
+
lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1084
|
+
documentCount: count
|
|
1085
|
+
}));
|
|
1086
|
+
}
|
|
1087
|
+
async getContentHashes(scope) {
|
|
1088
|
+
return this.scanHashes(this.chunksNs, scope);
|
|
1089
|
+
}
|
|
1090
|
+
/**
|
|
1091
|
+
* Fetch content hashes for a specific set of chunk keys using direct fetch()
|
|
1092
|
+
* instead of range(). This avoids potential issues with range() returning
|
|
1093
|
+
* vectors from the wrong namespace on hybrid indexes.
|
|
1094
|
+
*/
|
|
1095
|
+
async fetchContentHashesForKeys(keys, scope) {
|
|
1096
|
+
const map = /* @__PURE__ */ new Map();
|
|
1097
|
+
if (keys.length === 0) return map;
|
|
1098
|
+
const BATCH_SIZE = 90;
|
|
1099
|
+
for (let i = 0; i < keys.length; i += BATCH_SIZE) {
|
|
1100
|
+
const batch = keys.slice(i, i + BATCH_SIZE);
|
|
821
1101
|
try {
|
|
822
|
-
const
|
|
823
|
-
|
|
824
|
-
projectId,
|
|
825
|
-
scopeName,
|
|
826
|
-
lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
827
|
-
documentCount: info.documentCount
|
|
1102
|
+
const results = await this.chunksNs.fetch(batch, {
|
|
1103
|
+
includeMetadata: true
|
|
828
1104
|
});
|
|
1105
|
+
for (const doc of results) {
|
|
1106
|
+
if (doc && doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
|
|
1107
|
+
map.set(String(doc.id), doc.metadata.contentHash);
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
829
1110
|
} catch {
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
return map;
|
|
1114
|
+
}
|
|
1115
|
+
/**
|
|
1116
|
+
* Scan all IDs in the chunks namespace for this scope.
|
|
1117
|
+
* Used for deletion detection (finding stale chunk keys).
|
|
1118
|
+
*/
|
|
1119
|
+
async scanChunkIds(scope) {
|
|
1120
|
+
const ids = /* @__PURE__ */ new Set();
|
|
1121
|
+
let cursor = "0";
|
|
1122
|
+
try {
|
|
1123
|
+
for (; ; ) {
|
|
1124
|
+
const result = await this.chunksNs.range({
|
|
1125
|
+
cursor,
|
|
1126
|
+
limit: 100,
|
|
1127
|
+
includeMetadata: true
|
|
1128
|
+
});
|
|
1129
|
+
for (const doc of result.vectors) {
|
|
1130
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
|
|
1131
|
+
ids.add(String(doc.id));
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1135
|
+
cursor = result.nextCursor;
|
|
1136
|
+
}
|
|
1137
|
+
} catch {
|
|
1138
|
+
}
|
|
1139
|
+
return ids;
|
|
1140
|
+
}
|
|
1141
|
+
async scanHashes(ns, scope) {
|
|
1142
|
+
const map = /* @__PURE__ */ new Map();
|
|
1143
|
+
let cursor = "0";
|
|
1144
|
+
try {
|
|
1145
|
+
for (; ; ) {
|
|
1146
|
+
const result = await ns.range({
|
|
1147
|
+
cursor,
|
|
1148
|
+
limit: 100,
|
|
1149
|
+
includeMetadata: true
|
|
835
1150
|
});
|
|
1151
|
+
for (const doc of result.vectors) {
|
|
1152
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
|
|
1153
|
+
map.set(String(doc.id), doc.metadata.contentHash);
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1157
|
+
cursor = result.nextCursor;
|
|
836
1158
|
}
|
|
1159
|
+
} catch {
|
|
837
1160
|
}
|
|
838
|
-
return
|
|
1161
|
+
return map;
|
|
839
1162
|
}
|
|
840
|
-
async
|
|
1163
|
+
async listPages(scope, opts) {
|
|
1164
|
+
const cursor = opts?.cursor ?? "0";
|
|
1165
|
+
const limit = opts?.limit ?? 50;
|
|
1166
|
+
try {
|
|
1167
|
+
const result = await this.pagesNs.range({
|
|
1168
|
+
cursor,
|
|
1169
|
+
limit,
|
|
1170
|
+
includeMetadata: true
|
|
1171
|
+
});
|
|
1172
|
+
const pages = result.vectors.filter(
|
|
1173
|
+
(doc) => doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && (!opts?.pathPrefix || (doc.metadata?.url ?? "").startsWith(opts.pathPrefix))
|
|
1174
|
+
).map((doc) => ({
|
|
1175
|
+
url: doc.metadata?.url ?? "",
|
|
1176
|
+
title: doc.metadata?.title ?? "",
|
|
1177
|
+
description: doc.metadata?.description ?? "",
|
|
1178
|
+
routeFile: doc.metadata?.routeFile ?? ""
|
|
1179
|
+
}));
|
|
1180
|
+
const response = { pages };
|
|
1181
|
+
if (result.nextCursor && result.nextCursor !== "0") {
|
|
1182
|
+
response.nextCursor = result.nextCursor;
|
|
1183
|
+
}
|
|
1184
|
+
return response;
|
|
1185
|
+
} catch {
|
|
1186
|
+
return { pages: [] };
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
async getPageHashes(scope) {
|
|
841
1190
|
const map = /* @__PURE__ */ new Map();
|
|
842
|
-
const index = this.chunkIndex(scope);
|
|
843
1191
|
let cursor = "0";
|
|
844
1192
|
try {
|
|
845
1193
|
for (; ; ) {
|
|
846
|
-
const result = await
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
1194
|
+
const result = await this.pagesNs.range({
|
|
1195
|
+
cursor,
|
|
1196
|
+
limit: 100,
|
|
1197
|
+
includeMetadata: true
|
|
1198
|
+
});
|
|
1199
|
+
for (const doc of result.vectors) {
|
|
1200
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
|
|
1201
|
+
map.set(String(doc.id), doc.metadata.contentHash);
|
|
850
1202
|
}
|
|
851
1203
|
}
|
|
852
1204
|
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
@@ -856,47 +1208,43 @@ var UpstashSearchStore = class {
|
|
|
856
1208
|
}
|
|
857
1209
|
return map;
|
|
858
1210
|
}
|
|
1211
|
+
async deletePagesByIds(ids, _scope) {
|
|
1212
|
+
if (ids.length === 0) return;
|
|
1213
|
+
const BATCH_SIZE = 90;
|
|
1214
|
+
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
1215
|
+
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
1216
|
+
await this.pagesNs.delete(batch);
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
859
1219
|
async upsertPages(pages, scope) {
|
|
860
1220
|
if (pages.length === 0) return;
|
|
861
|
-
const
|
|
862
|
-
const BATCH_SIZE = 50;
|
|
1221
|
+
const BATCH_SIZE = 90;
|
|
863
1222
|
for (let i = 0; i < pages.length; i += BATCH_SIZE) {
|
|
864
1223
|
const batch = pages.slice(i, i + BATCH_SIZE);
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
markdown: p.markdown,
|
|
878
|
-
projectId: p.projectId,
|
|
879
|
-
scopeName: p.scopeName,
|
|
880
|
-
routeFile: p.routeFile,
|
|
881
|
-
routeResolution: p.routeResolution,
|
|
882
|
-
incomingLinks: p.incomingLinks,
|
|
883
|
-
outgoingLinks: p.outgoingLinks,
|
|
884
|
-
depth: p.depth,
|
|
885
|
-
indexedAt: p.indexedAt
|
|
886
|
-
}
|
|
887
|
-
}));
|
|
888
|
-
await index.upsert(docs);
|
|
1224
|
+
await this.pagesNs.upsert(
|
|
1225
|
+
batch.map((p) => ({
|
|
1226
|
+
id: p.id,
|
|
1227
|
+
data: p.data,
|
|
1228
|
+
metadata: {
|
|
1229
|
+
...p.metadata,
|
|
1230
|
+
projectId: scope.projectId,
|
|
1231
|
+
scopeName: scope.scopeName,
|
|
1232
|
+
type: "page"
|
|
1233
|
+
}
|
|
1234
|
+
}))
|
|
1235
|
+
);
|
|
889
1236
|
}
|
|
890
1237
|
}
|
|
891
1238
|
async getPage(url, scope) {
|
|
892
|
-
const index = this.pageIndex(scope);
|
|
893
1239
|
try {
|
|
894
|
-
const results = await
|
|
1240
|
+
const results = await this.pagesNs.fetch([url], {
|
|
1241
|
+
includeMetadata: true
|
|
1242
|
+
});
|
|
895
1243
|
const doc = results[0];
|
|
896
|
-
if (!doc) return null;
|
|
1244
|
+
if (!doc || !doc.metadata) return null;
|
|
897
1245
|
return {
|
|
898
|
-
url: doc.
|
|
899
|
-
title: doc.
|
|
1246
|
+
url: doc.metadata.url,
|
|
1247
|
+
title: doc.metadata.title,
|
|
900
1248
|
markdown: doc.metadata.markdown,
|
|
901
1249
|
projectId: doc.metadata.projectId,
|
|
902
1250
|
scopeName: doc.metadata.scopeName,
|
|
@@ -904,27 +1252,86 @@ var UpstashSearchStore = class {
|
|
|
904
1252
|
routeResolution: doc.metadata.routeResolution,
|
|
905
1253
|
incomingLinks: doc.metadata.incomingLinks,
|
|
906
1254
|
outgoingLinks: doc.metadata.outgoingLinks,
|
|
1255
|
+
outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? void 0,
|
|
907
1256
|
depth: doc.metadata.depth,
|
|
908
|
-
tags: doc.
|
|
1257
|
+
tags: doc.metadata.tags ?? [],
|
|
909
1258
|
indexedAt: doc.metadata.indexedAt,
|
|
910
|
-
summary: doc.
|
|
911
|
-
description: doc.
|
|
912
|
-
keywords: doc.
|
|
1259
|
+
summary: doc.metadata.summary || void 0,
|
|
1260
|
+
description: doc.metadata.description || void 0,
|
|
1261
|
+
keywords: doc.metadata.keywords?.length ? doc.metadata.keywords : void 0,
|
|
1262
|
+
publishedAt: typeof doc.metadata.publishedAt === "number" ? doc.metadata.publishedAt : void 0
|
|
913
1263
|
};
|
|
914
1264
|
} catch {
|
|
915
1265
|
return null;
|
|
916
1266
|
}
|
|
917
1267
|
}
|
|
1268
|
+
async fetchPageWithVector(url, scope) {
|
|
1269
|
+
try {
|
|
1270
|
+
const results = await this.pagesNs.fetch([url], {
|
|
1271
|
+
includeMetadata: true,
|
|
1272
|
+
includeVectors: true
|
|
1273
|
+
});
|
|
1274
|
+
const doc = results[0];
|
|
1275
|
+
if (!doc || !doc.metadata || !doc.vector) return null;
|
|
1276
|
+
if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
|
|
1277
|
+
return null;
|
|
1278
|
+
}
|
|
1279
|
+
return { metadata: doc.metadata, vector: doc.vector };
|
|
1280
|
+
} catch {
|
|
1281
|
+
return null;
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
async fetchPagesBatch(urls, scope) {
|
|
1285
|
+
if (urls.length === 0) return [];
|
|
1286
|
+
try {
|
|
1287
|
+
const results = await this.pagesNs.fetch(urls, {
|
|
1288
|
+
includeMetadata: true
|
|
1289
|
+
});
|
|
1290
|
+
const out = [];
|
|
1291
|
+
for (const doc of results) {
|
|
1292
|
+
if (!doc || !doc.metadata) continue;
|
|
1293
|
+
if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
|
|
1294
|
+
continue;
|
|
1295
|
+
}
|
|
1296
|
+
out.push({
|
|
1297
|
+
url: doc.metadata.url,
|
|
1298
|
+
title: doc.metadata.title,
|
|
1299
|
+
routeFile: doc.metadata.routeFile,
|
|
1300
|
+
outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? []
|
|
1301
|
+
});
|
|
1302
|
+
}
|
|
1303
|
+
return out;
|
|
1304
|
+
} catch {
|
|
1305
|
+
return [];
|
|
1306
|
+
}
|
|
1307
|
+
}
|
|
918
1308
|
async deletePages(scope) {
|
|
1309
|
+
const ids = [];
|
|
1310
|
+
let cursor = "0";
|
|
919
1311
|
try {
|
|
920
|
-
|
|
921
|
-
|
|
1312
|
+
for (; ; ) {
|
|
1313
|
+
const result = await this.pagesNs.range({
|
|
1314
|
+
cursor,
|
|
1315
|
+
limit: 100,
|
|
1316
|
+
includeMetadata: true
|
|
1317
|
+
});
|
|
1318
|
+
for (const doc of result.vectors) {
|
|
1319
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
|
|
1320
|
+
ids.push(String(doc.id));
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1324
|
+
cursor = result.nextCursor;
|
|
1325
|
+
}
|
|
922
1326
|
} catch {
|
|
923
1327
|
}
|
|
1328
|
+
if (ids.length > 0) {
|
|
1329
|
+
await this.deletePagesByIds(ids, scope);
|
|
1330
|
+
}
|
|
924
1331
|
}
|
|
925
1332
|
async health() {
|
|
926
1333
|
try {
|
|
927
|
-
await this.
|
|
1334
|
+
await this.index.info();
|
|
928
1335
|
return { ok: true };
|
|
929
1336
|
} catch (error) {
|
|
930
1337
|
return {
|
|
@@ -934,14 +1341,31 @@ var UpstashSearchStore = class {
|
|
|
934
1341
|
}
|
|
935
1342
|
}
|
|
936
1343
|
async dropAllIndexes(projectId) {
|
|
937
|
-
const
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
const
|
|
943
|
-
|
|
944
|
-
|
|
1344
|
+
for (const ns of [this.chunksNs, this.pagesNs]) {
|
|
1345
|
+
const ids = [];
|
|
1346
|
+
let cursor = "0";
|
|
1347
|
+
try {
|
|
1348
|
+
for (; ; ) {
|
|
1349
|
+
const result = await ns.range({
|
|
1350
|
+
cursor,
|
|
1351
|
+
limit: 100,
|
|
1352
|
+
includeMetadata: true
|
|
1353
|
+
});
|
|
1354
|
+
for (const doc of result.vectors) {
|
|
1355
|
+
if (doc.metadata?.projectId === projectId) {
|
|
1356
|
+
ids.push(String(doc.id));
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1360
|
+
cursor = result.nextCursor;
|
|
1361
|
+
}
|
|
1362
|
+
} catch {
|
|
1363
|
+
}
|
|
1364
|
+
if (ids.length > 0) {
|
|
1365
|
+
const BATCH_SIZE = 90;
|
|
1366
|
+
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
1367
|
+
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
1368
|
+
await ns.delete(batch);
|
|
945
1369
|
}
|
|
946
1370
|
}
|
|
947
1371
|
}
|
|
@@ -955,12 +1379,16 @@ async function createUpstashStore(config) {
|
|
|
955
1379
|
if (!url || !token) {
|
|
956
1380
|
throw new SearchSocketError(
|
|
957
1381
|
"VECTOR_BACKEND_UNAVAILABLE",
|
|
958
|
-
`Missing Upstash
|
|
1382
|
+
`Missing Upstash Vector credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
|
|
959
1383
|
);
|
|
960
1384
|
}
|
|
961
|
-
const {
|
|
962
|
-
const
|
|
963
|
-
return new UpstashSearchStore({
|
|
1385
|
+
const { Index } = await import("@upstash/vector");
|
|
1386
|
+
const index = new Index({ url, token });
|
|
1387
|
+
return new UpstashSearchStore({
|
|
1388
|
+
index,
|
|
1389
|
+
pagesNamespace: config.upstash.namespaces.pages,
|
|
1390
|
+
chunksNamespace: config.upstash.namespaces.chunks
|
|
1391
|
+
});
|
|
964
1392
|
}
|
|
965
1393
|
|
|
966
1394
|
// src/utils/hash.ts
|
|
@@ -1034,6 +1462,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
|
|
|
1034
1462
|
if (normalizeText(current.text)) {
|
|
1035
1463
|
sections.push({
|
|
1036
1464
|
sectionTitle: current.sectionTitle,
|
|
1465
|
+
headingLevel: current.headingLevel,
|
|
1037
1466
|
headingPath: current.headingPath,
|
|
1038
1467
|
text: current.text.trim()
|
|
1039
1468
|
});
|
|
@@ -1052,6 +1481,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
|
|
|
1052
1481
|
headingStack.length = level;
|
|
1053
1482
|
current = {
|
|
1054
1483
|
sectionTitle: title,
|
|
1484
|
+
headingLevel: level,
|
|
1055
1485
|
headingPath: headingStack.filter((entry) => Boolean(entry)).slice(0, headingPathDepth),
|
|
1056
1486
|
text: `${line}
|
|
1057
1487
|
`
|
|
@@ -1071,8 +1501,8 @@ function parseHeadingSections(markdown, headingPathDepth) {
|
|
|
1071
1501
|
}
|
|
1072
1502
|
return sections;
|
|
1073
1503
|
}
|
|
1074
|
-
function blockify(
|
|
1075
|
-
const lines =
|
|
1504
|
+
function blockify(text2, config) {
|
|
1505
|
+
const lines = text2.split("\n");
|
|
1076
1506
|
const blocks = [];
|
|
1077
1507
|
let inFence = false;
|
|
1078
1508
|
let current = [];
|
|
@@ -1179,20 +1609,21 @@ function splitOversizedBlock(block, config) {
|
|
|
1179
1609
|
return chunks.length > 0 ? chunks : [trimmed];
|
|
1180
1610
|
}
|
|
1181
1611
|
function splitSection(section, config) {
|
|
1182
|
-
const
|
|
1183
|
-
if (!
|
|
1612
|
+
const text2 = section.text.trim();
|
|
1613
|
+
if (!text2) {
|
|
1184
1614
|
return [];
|
|
1185
1615
|
}
|
|
1186
|
-
if (
|
|
1616
|
+
if (text2.length <= config.maxChars) {
|
|
1187
1617
|
return [
|
|
1188
1618
|
{
|
|
1189
1619
|
sectionTitle: section.sectionTitle,
|
|
1620
|
+
headingLevel: section.headingLevel,
|
|
1190
1621
|
headingPath: section.headingPath,
|
|
1191
|
-
chunkText:
|
|
1622
|
+
chunkText: text2
|
|
1192
1623
|
}
|
|
1193
1624
|
];
|
|
1194
1625
|
}
|
|
1195
|
-
const blocks = blockify(
|
|
1626
|
+
const blocks = blockify(text2, config);
|
|
1196
1627
|
const chunks = [];
|
|
1197
1628
|
let current = "";
|
|
1198
1629
|
for (const block of blocks) {
|
|
@@ -1237,6 +1668,7 @@ ${chunk}`;
|
|
|
1237
1668
|
}
|
|
1238
1669
|
return merged.map((chunkText) => ({
|
|
1239
1670
|
sectionTitle: section.sectionTitle,
|
|
1671
|
+
headingLevel: section.headingLevel,
|
|
1240
1672
|
headingPath: section.headingPath,
|
|
1241
1673
|
chunkText
|
|
1242
1674
|
}));
|
|
@@ -1252,6 +1684,18 @@ function buildSummaryChunkText(page) {
|
|
|
1252
1684
|
}
|
|
1253
1685
|
return parts.join("\n\n");
|
|
1254
1686
|
}
|
|
1687
|
+
function buildEmbeddingTitle(chunk) {
|
|
1688
|
+
if (!chunk.sectionTitle || chunk.headingLevel === void 0) return void 0;
|
|
1689
|
+
if (chunk.headingPath.length > 1) {
|
|
1690
|
+
const path16 = chunk.headingPath.join(" > ");
|
|
1691
|
+
const lastInPath = chunk.headingPath[chunk.headingPath.length - 1];
|
|
1692
|
+
if (lastInPath !== chunk.sectionTitle) {
|
|
1693
|
+
return `${chunk.title} \u2014 ${path16} > ${chunk.sectionTitle}`;
|
|
1694
|
+
}
|
|
1695
|
+
return `${chunk.title} \u2014 ${path16}`;
|
|
1696
|
+
}
|
|
1697
|
+
return `${chunk.title} \u2014 ${chunk.sectionTitle}`;
|
|
1698
|
+
}
|
|
1255
1699
|
function buildEmbeddingText(chunk, prependTitle) {
|
|
1256
1700
|
if (!prependTitle) return chunk.chunkText;
|
|
1257
1701
|
const prefix = chunk.sectionTitle ? `${chunk.title} \u2014 ${chunk.sectionTitle}` : chunk.title;
|
|
@@ -1282,10 +1726,14 @@ function chunkPage(page, config, scope) {
|
|
|
1282
1726
|
tags: page.tags,
|
|
1283
1727
|
contentHash: "",
|
|
1284
1728
|
description: page.description,
|
|
1285
|
-
keywords: page.keywords
|
|
1729
|
+
keywords: page.keywords,
|
|
1730
|
+
publishedAt: page.publishedAt,
|
|
1731
|
+
incomingAnchorText: page.incomingAnchorText,
|
|
1732
|
+
meta: page.meta
|
|
1286
1733
|
};
|
|
1287
1734
|
const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
|
|
1288
|
-
|
|
1735
|
+
const metaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
|
|
1736
|
+
summaryChunk.contentHash = sha256(normalizeText(embeddingText) + metaSuffix);
|
|
1289
1737
|
chunks.push(summaryChunk);
|
|
1290
1738
|
}
|
|
1291
1739
|
const ordinalOffset = config.chunking.pageSummaryChunk ? 1 : 0;
|
|
@@ -1302,6 +1750,7 @@ function chunkPage(page, config, scope) {
|
|
|
1302
1750
|
path: page.url,
|
|
1303
1751
|
title: page.title,
|
|
1304
1752
|
sectionTitle: entry.sectionTitle,
|
|
1753
|
+
headingLevel: entry.headingLevel,
|
|
1305
1754
|
headingPath: entry.headingPath,
|
|
1306
1755
|
chunkText: entry.chunkText,
|
|
1307
1756
|
snippet: toSnippet(entry.chunkText),
|
|
@@ -1311,10 +1760,16 @@ function chunkPage(page, config, scope) {
|
|
|
1311
1760
|
tags: page.tags,
|
|
1312
1761
|
contentHash: "",
|
|
1313
1762
|
description: page.description,
|
|
1314
|
-
keywords: page.keywords
|
|
1763
|
+
keywords: page.keywords,
|
|
1764
|
+
publishedAt: page.publishedAt,
|
|
1765
|
+
incomingAnchorText: page.incomingAnchorText,
|
|
1766
|
+
meta: page.meta
|
|
1315
1767
|
};
|
|
1316
1768
|
const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
|
|
1317
|
-
|
|
1769
|
+
const embeddingTitle = config.chunking.weightHeadings ? buildEmbeddingTitle(chunk) : void 0;
|
|
1770
|
+
const chunkMetaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
|
|
1771
|
+
const hashInput = embeddingTitle ? `${normalizeText(embeddingText)}|title:${embeddingTitle}` : normalizeText(embeddingText);
|
|
1772
|
+
chunk.contentHash = sha256(hashInput + chunkMetaSuffix);
|
|
1318
1773
|
chunks.push(chunk);
|
|
1319
1774
|
}
|
|
1320
1775
|
return chunks;
|
|
@@ -1325,6 +1780,113 @@ import { load } from "cheerio";
|
|
|
1325
1780
|
import matter from "gray-matter";
|
|
1326
1781
|
import TurndownService from "turndown";
|
|
1327
1782
|
import { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems } from "turndown-plugin-gfm";
|
|
1783
|
+
|
|
1784
|
+
// src/utils/structured-meta.ts
|
|
1785
|
+
var VALID_KEY_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
1786
|
+
function validateMetaKey(key) {
|
|
1787
|
+
return VALID_KEY_RE.test(key);
|
|
1788
|
+
}
|
|
1789
|
+
function parseMetaValue(content, dataType) {
|
|
1790
|
+
switch (dataType) {
|
|
1791
|
+
case "number": {
|
|
1792
|
+
const n = Number(content);
|
|
1793
|
+
return Number.isFinite(n) ? n : content;
|
|
1794
|
+
}
|
|
1795
|
+
case "boolean":
|
|
1796
|
+
return content === "true";
|
|
1797
|
+
case "string[]":
|
|
1798
|
+
return content ? content.split(",").map((s) => s.trim()) : [];
|
|
1799
|
+
case "date": {
|
|
1800
|
+
const ms = Number(content);
|
|
1801
|
+
return Number.isFinite(ms) ? ms : content;
|
|
1802
|
+
}
|
|
1803
|
+
default:
|
|
1804
|
+
return content;
|
|
1805
|
+
}
|
|
1806
|
+
}
|
|
1807
|
+
function escapeFilterValue(s) {
|
|
1808
|
+
return s.replace(/'/g, "''");
|
|
1809
|
+
}
|
|
1810
|
+
function buildMetaFilterString(filters) {
|
|
1811
|
+
const clauses = [];
|
|
1812
|
+
for (const [key, value] of Object.entries(filters)) {
|
|
1813
|
+
if (!validateMetaKey(key)) continue;
|
|
1814
|
+
const field = `meta.${key}`;
|
|
1815
|
+
if (typeof value === "string") {
|
|
1816
|
+
clauses.push(`${field} CONTAINS '${escapeFilterValue(value)}'`);
|
|
1817
|
+
} else if (typeof value === "boolean") {
|
|
1818
|
+
clauses.push(`${field} = ${value}`);
|
|
1819
|
+
} else {
|
|
1820
|
+
clauses.push(`${field} = ${value}`);
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
return clauses.join(" AND ");
|
|
1824
|
+
}
|
|
1825
|
+
|
|
1826
|
+
// src/indexing/extractor.ts
|
|
1827
|
+
function normalizeDateToMs(value) {
|
|
1828
|
+
if (value == null) return void 0;
|
|
1829
|
+
if (value instanceof Date) {
|
|
1830
|
+
const ts = value.getTime();
|
|
1831
|
+
return Number.isFinite(ts) ? ts : void 0;
|
|
1832
|
+
}
|
|
1833
|
+
if (typeof value === "string") {
|
|
1834
|
+
const ts = new Date(value).getTime();
|
|
1835
|
+
return Number.isFinite(ts) ? ts : void 0;
|
|
1836
|
+
}
|
|
1837
|
+
if (typeof value === "number") {
|
|
1838
|
+
return Number.isFinite(value) ? value : void 0;
|
|
1839
|
+
}
|
|
1840
|
+
return void 0;
|
|
1841
|
+
}
|
|
1842
|
+
var FRONTMATTER_DATE_FIELDS = ["date", "publishedAt", "updatedAt", "published_at", "updated_at"];
|
|
1843
|
+
function extractPublishedAtFromFrontmatter(data) {
|
|
1844
|
+
for (const field of FRONTMATTER_DATE_FIELDS) {
|
|
1845
|
+
const val = normalizeDateToMs(data[field]);
|
|
1846
|
+
if (val !== void 0) return val;
|
|
1847
|
+
}
|
|
1848
|
+
return void 0;
|
|
1849
|
+
}
|
|
1850
|
+
function extractPublishedAtFromHtml($) {
|
|
1851
|
+
const jsonLdScripts = $('script[type="application/ld+json"]');
|
|
1852
|
+
for (let i = 0; i < jsonLdScripts.length; i++) {
|
|
1853
|
+
try {
|
|
1854
|
+
const raw = $(jsonLdScripts[i]).html();
|
|
1855
|
+
if (!raw) continue;
|
|
1856
|
+
const parsed = JSON.parse(raw);
|
|
1857
|
+
const candidates = [];
|
|
1858
|
+
if (Array.isArray(parsed)) {
|
|
1859
|
+
candidates.push(...parsed);
|
|
1860
|
+
} else if (parsed && typeof parsed === "object") {
|
|
1861
|
+
candidates.push(parsed);
|
|
1862
|
+
if (Array.isArray(parsed["@graph"])) {
|
|
1863
|
+
candidates.push(...parsed["@graph"]);
|
|
1864
|
+
}
|
|
1865
|
+
}
|
|
1866
|
+
for (const candidate of candidates) {
|
|
1867
|
+
const val = normalizeDateToMs(candidate.datePublished);
|
|
1868
|
+
if (val !== void 0) return val;
|
|
1869
|
+
}
|
|
1870
|
+
} catch {
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
const ogTime = $('meta[property="article:published_time"]').attr("content")?.trim();
|
|
1874
|
+
if (ogTime) {
|
|
1875
|
+
const val = normalizeDateToMs(ogTime);
|
|
1876
|
+
if (val !== void 0) return val;
|
|
1877
|
+
}
|
|
1878
|
+
const itempropDate = $('meta[itemprop="datePublished"]').attr("content")?.trim() || $('time[itemprop="datePublished"]').attr("datetime")?.trim();
|
|
1879
|
+
if (itempropDate) {
|
|
1880
|
+
const val = normalizeDateToMs(itempropDate);
|
|
1881
|
+
if (val !== void 0) return val;
|
|
1882
|
+
}
|
|
1883
|
+
const timeEl = $("time[datetime]").first().attr("datetime")?.trim();
|
|
1884
|
+
if (timeEl) {
|
|
1885
|
+
const val = normalizeDateToMs(timeEl);
|
|
1886
|
+
if (val !== void 0) return val;
|
|
1887
|
+
}
|
|
1888
|
+
return void 0;
|
|
1889
|
+
}
|
|
1328
1890
|
function hasTopLevelNoindexComment(markdown) {
|
|
1329
1891
|
const lines = markdown.split(/\r?\n/);
|
|
1330
1892
|
let inFence = false;
|
|
@@ -1340,6 +1902,97 @@ function hasTopLevelNoindexComment(markdown) {
|
|
|
1340
1902
|
}
|
|
1341
1903
|
return false;
|
|
1342
1904
|
}
|
|
1905
|
+
var GARBAGE_ALT_WORDS = /* @__PURE__ */ new Set([
|
|
1906
|
+
"image",
|
|
1907
|
+
"photo",
|
|
1908
|
+
"picture",
|
|
1909
|
+
"icon",
|
|
1910
|
+
"logo",
|
|
1911
|
+
"banner",
|
|
1912
|
+
"screenshot",
|
|
1913
|
+
"thumbnail",
|
|
1914
|
+
"img",
|
|
1915
|
+
"graphic",
|
|
1916
|
+
"illustration",
|
|
1917
|
+
"spacer",
|
|
1918
|
+
"pixel",
|
|
1919
|
+
"placeholder",
|
|
1920
|
+
"avatar",
|
|
1921
|
+
"background"
|
|
1922
|
+
]);
|
|
1923
|
+
var IMAGE_EXT_RE = /\.(jpg|jpeg|png|gif|svg|webp|avif|bmp|ico)(\?.*)?$/i;
|
|
1924
|
+
function isMeaningfulAlt(alt) {
|
|
1925
|
+
const trimmed = alt.trim();
|
|
1926
|
+
if (!trimmed || trimmed.length < 5) return false;
|
|
1927
|
+
if (IMAGE_EXT_RE.test(trimmed)) return false;
|
|
1928
|
+
if (GARBAGE_ALT_WORDS.has(trimmed.toLowerCase())) return false;
|
|
1929
|
+
return true;
|
|
1930
|
+
}
|
|
1931
|
+
function resolveImageText(img, $, imageDescAttr) {
|
|
1932
|
+
const imgDesc = img.attr(imageDescAttr)?.trim();
|
|
1933
|
+
if (imgDesc) return imgDesc;
|
|
1934
|
+
const figure = img.closest("figure");
|
|
1935
|
+
if (figure.length) {
|
|
1936
|
+
const figDesc = figure.attr(imageDescAttr)?.trim();
|
|
1937
|
+
if (figDesc) return figDesc;
|
|
1938
|
+
}
|
|
1939
|
+
const alt = img.attr("alt")?.trim() ?? "";
|
|
1940
|
+
const caption = figure.length ? figure.find("figcaption").first().text().trim() : "";
|
|
1941
|
+
if (isMeaningfulAlt(alt) && caption) {
|
|
1942
|
+
return `${alt} \u2014 ${caption}`;
|
|
1943
|
+
}
|
|
1944
|
+
if (isMeaningfulAlt(alt)) {
|
|
1945
|
+
return alt;
|
|
1946
|
+
}
|
|
1947
|
+
if (caption) {
|
|
1948
|
+
return caption;
|
|
1949
|
+
}
|
|
1950
|
+
return null;
|
|
1951
|
+
}
|
|
1952
|
+
var STOP_ANCHORS = /* @__PURE__ */ new Set([
|
|
1953
|
+
"here",
|
|
1954
|
+
"click",
|
|
1955
|
+
"click here",
|
|
1956
|
+
"read more",
|
|
1957
|
+
"link",
|
|
1958
|
+
"this",
|
|
1959
|
+
"more"
|
|
1960
|
+
]);
|
|
1961
|
+
function normalizeAnchorText(raw) {
|
|
1962
|
+
const normalized = raw.replace(/\s+/g, " ").trim().toLowerCase();
|
|
1963
|
+
if (normalized.length < 3) return "";
|
|
1964
|
+
if (STOP_ANCHORS.has(normalized)) return "";
|
|
1965
|
+
if (normalized.length > 100) return normalized.slice(0, 100);
|
|
1966
|
+
return normalized;
|
|
1967
|
+
}
|
|
1968
|
+
function escapeHtml(text2) {
|
|
1969
|
+
return text2.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">");
|
|
1970
|
+
}
|
|
1971
|
+
function preprocessImages(root, $, imageDescAttr) {
|
|
1972
|
+
root.find("picture").each((_i, el) => {
|
|
1973
|
+
const picture = $(el);
|
|
1974
|
+
const img = picture.find("img").first();
|
|
1975
|
+
const parentFigure = picture.closest("figure");
|
|
1976
|
+
const text2 = img.length ? resolveImageText(img, $, imageDescAttr) : null;
|
|
1977
|
+
if (text2) {
|
|
1978
|
+
if (parentFigure.length) parentFigure.find("figcaption").remove();
|
|
1979
|
+
picture.replaceWith(`<span>${escapeHtml(text2)}</span>`);
|
|
1980
|
+
} else {
|
|
1981
|
+
picture.remove();
|
|
1982
|
+
}
|
|
1983
|
+
});
|
|
1984
|
+
root.find("img").each((_i, el) => {
|
|
1985
|
+
const img = $(el);
|
|
1986
|
+
const parentFigure = img.closest("figure");
|
|
1987
|
+
const text2 = resolveImageText(img, $, imageDescAttr);
|
|
1988
|
+
if (text2) {
|
|
1989
|
+
if (parentFigure.length) parentFigure.find("figcaption").remove();
|
|
1990
|
+
img.replaceWith(`<span>${escapeHtml(text2)}</span>`);
|
|
1991
|
+
} else {
|
|
1992
|
+
img.remove();
|
|
1993
|
+
}
|
|
1994
|
+
});
|
|
1995
|
+
}
|
|
1343
1996
|
function extractFromHtml(url, html, config) {
|
|
1344
1997
|
const $ = load(html);
|
|
1345
1998
|
const normalizedUrl = normalizeUrlPath(url);
|
|
@@ -1365,6 +2018,20 @@ function extractFromHtml(url, html, config) {
|
|
|
1365
2018
|
if (weight === 0) {
|
|
1366
2019
|
return null;
|
|
1367
2020
|
}
|
|
2021
|
+
if ($('meta[name="searchsocket:noindex"]').attr("content") === "true") {
|
|
2022
|
+
return null;
|
|
2023
|
+
}
|
|
2024
|
+
const RESERVED_META_KEYS = /* @__PURE__ */ new Set(["noindex", "tags"]);
|
|
2025
|
+
const meta = {};
|
|
2026
|
+
$('meta[name^="searchsocket:"]').each((_i, el) => {
|
|
2027
|
+
const name = $(el).attr("name") ?? "";
|
|
2028
|
+
const key = name.slice("searchsocket:".length);
|
|
2029
|
+
if (!key || RESERVED_META_KEYS.has(key) || !validateMetaKey(key)) return;
|
|
2030
|
+
const content = $(el).attr("content") ?? "";
|
|
2031
|
+
const dataType = $(el).attr("data-type") ?? "string";
|
|
2032
|
+
meta[key] = parseMetaValue(content, dataType);
|
|
2033
|
+
});
|
|
2034
|
+
const componentTags = $('meta[name="searchsocket:tags"]').attr("content")?.trim();
|
|
1368
2035
|
const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
|
|
1369
2036
|
const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
|
|
1370
2037
|
const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
|
|
@@ -1376,7 +2043,9 @@ function extractFromHtml(url, html, config) {
|
|
|
1376
2043
|
root.find(selector).remove();
|
|
1377
2044
|
}
|
|
1378
2045
|
root.find(`[${config.extract.ignoreAttr}]`).remove();
|
|
2046
|
+
preprocessImages(root, $, config.extract.imageDescAttr);
|
|
1379
2047
|
const outgoingLinks = [];
|
|
2048
|
+
const seenLinkKeys = /* @__PURE__ */ new Set();
|
|
1380
2049
|
root.find("a[href]").each((_index, node) => {
|
|
1381
2050
|
const href = $(node).attr("href");
|
|
1382
2051
|
if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:")) {
|
|
@@ -1387,7 +2056,19 @@ function extractFromHtml(url, html, config) {
|
|
|
1387
2056
|
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
1388
2057
|
return;
|
|
1389
2058
|
}
|
|
1390
|
-
|
|
2059
|
+
const url2 = normalizeUrlPath(parsed.pathname);
|
|
2060
|
+
let anchorText = normalizeAnchorText($(node).text());
|
|
2061
|
+
if (!anchorText) {
|
|
2062
|
+
const imgAlt = $(node).find("img").first().attr("alt") ?? "";
|
|
2063
|
+
if (isMeaningfulAlt(imgAlt)) {
|
|
2064
|
+
anchorText = normalizeAnchorText(imgAlt);
|
|
2065
|
+
}
|
|
2066
|
+
}
|
|
2067
|
+
const key = `${url2}|${anchorText}`;
|
|
2068
|
+
if (!seenLinkKeys.has(key)) {
|
|
2069
|
+
seenLinkKeys.add(key);
|
|
2070
|
+
outgoingLinks.push({ url: url2, anchorText });
|
|
2071
|
+
}
|
|
1391
2072
|
} catch {
|
|
1392
2073
|
}
|
|
1393
2074
|
});
|
|
@@ -1412,16 +2093,25 @@ function extractFromHtml(url, html, config) {
|
|
|
1412
2093
|
return null;
|
|
1413
2094
|
}
|
|
1414
2095
|
const tags = normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1);
|
|
2096
|
+
const publishedAt = extractPublishedAtFromHtml($);
|
|
2097
|
+
if (componentTags) {
|
|
2098
|
+
const extraTags = componentTags.split(",").map((t) => t.trim()).filter(Boolean);
|
|
2099
|
+
for (const t of extraTags) {
|
|
2100
|
+
if (!tags.includes(t)) tags.push(t);
|
|
2101
|
+
}
|
|
2102
|
+
}
|
|
1415
2103
|
return {
|
|
1416
2104
|
url: normalizeUrlPath(url),
|
|
1417
2105
|
title,
|
|
1418
2106
|
markdown,
|
|
1419
|
-
outgoingLinks
|
|
2107
|
+
outgoingLinks,
|
|
1420
2108
|
noindex: false,
|
|
1421
2109
|
tags,
|
|
1422
2110
|
description,
|
|
1423
2111
|
keywords,
|
|
1424
|
-
weight
|
|
2112
|
+
weight,
|
|
2113
|
+
publishedAt,
|
|
2114
|
+
meta: Object.keys(meta).length > 0 ? meta : void 0
|
|
1425
2115
|
};
|
|
1426
2116
|
}
|
|
1427
2117
|
function extractFromMarkdown(url, markdown, title) {
|
|
@@ -1442,6 +2132,24 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
1442
2132
|
if (mdWeight === 0) {
|
|
1443
2133
|
return null;
|
|
1444
2134
|
}
|
|
2135
|
+
let mdMeta;
|
|
2136
|
+
const rawMeta = searchsocketMeta?.meta;
|
|
2137
|
+
if (rawMeta && typeof rawMeta === "object" && !Array.isArray(rawMeta)) {
|
|
2138
|
+
const metaObj = {};
|
|
2139
|
+
for (const [key, val] of Object.entries(rawMeta)) {
|
|
2140
|
+
if (!validateMetaKey(key)) continue;
|
|
2141
|
+
if (typeof val === "string" || typeof val === "number" || typeof val === "boolean") {
|
|
2142
|
+
metaObj[key] = val;
|
|
2143
|
+
} else if (Array.isArray(val) && val.every((v) => typeof v === "string")) {
|
|
2144
|
+
metaObj[key] = val;
|
|
2145
|
+
} else if (val instanceof Date) {
|
|
2146
|
+
metaObj[key] = val.getTime();
|
|
2147
|
+
}
|
|
2148
|
+
}
|
|
2149
|
+
if (Object.keys(metaObj).length > 0) {
|
|
2150
|
+
mdMeta = metaObj;
|
|
2151
|
+
}
|
|
2152
|
+
}
|
|
1445
2153
|
const content = parsed.content;
|
|
1446
2154
|
const normalized = normalizeMarkdown(content);
|
|
1447
2155
|
if (!normalizeText(normalized)) {
|
|
@@ -1456,6 +2164,7 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
1456
2164
|
fmKeywords = frontmatter.keywords.split(",").map((k) => k.trim()).filter(Boolean);
|
|
1457
2165
|
}
|
|
1458
2166
|
if (fmKeywords && fmKeywords.length === 0) fmKeywords = void 0;
|
|
2167
|
+
const publishedAt = extractPublishedAtFromFrontmatter(frontmatter);
|
|
1459
2168
|
return {
|
|
1460
2169
|
url: normalizeUrlPath(url),
|
|
1461
2170
|
title: resolvedTitle,
|
|
@@ -1465,7 +2174,9 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
1465
2174
|
tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
|
|
1466
2175
|
description: fmDescription,
|
|
1467
2176
|
keywords: fmKeywords,
|
|
1468
|
-
weight: mdWeight
|
|
2177
|
+
weight: mdWeight,
|
|
2178
|
+
publishedAt,
|
|
2179
|
+
meta: mdMeta
|
|
1469
2180
|
};
|
|
1470
2181
|
}
|
|
1471
2182
|
|
|
@@ -1919,6 +2630,125 @@ function filePathToUrl(filePath, baseDir) {
|
|
|
1919
2630
|
const noExt = relative.replace(/\.md$/i, "").replace(/\/index$/i, "");
|
|
1920
2631
|
return normalizeUrlPath(noExt || "/");
|
|
1921
2632
|
}
|
|
2633
|
+
var ROUTE_FILE_RE = /\+(page|layout|error)(@[^.]+)?\.svelte$/;
|
|
2634
|
+
function isSvelteComponentFile(filePath) {
|
|
2635
|
+
if (!filePath.endsWith(".svelte")) return false;
|
|
2636
|
+
return !ROUTE_FILE_RE.test(filePath);
|
|
2637
|
+
}
|
|
2638
|
+
function extractSvelteComponentMeta(source) {
|
|
2639
|
+
const componentMatch = source.match(/<!--\s*@component\s*([\s\S]*?)\s*-->/);
|
|
2640
|
+
const description = componentMatch?.[1]?.trim() || void 0;
|
|
2641
|
+
const propsMatch = source.match(
|
|
2642
|
+
/let\s+\{([\s\S]*?)\}\s*(?::\s*([^=;{][\s\S]*?))?\s*=\s*\$props\(\)/
|
|
2643
|
+
);
|
|
2644
|
+
const props = [];
|
|
2645
|
+
if (propsMatch) {
|
|
2646
|
+
const destructureBlock = propsMatch[1];
|
|
2647
|
+
const typeAnnotation = propsMatch[2]?.trim();
|
|
2648
|
+
let resolvedTypeMap;
|
|
2649
|
+
if (typeAnnotation && /^[A-Z]\w*$/.test(typeAnnotation)) {
|
|
2650
|
+
resolvedTypeMap = resolveTypeReference(source, typeAnnotation);
|
|
2651
|
+
} else if (typeAnnotation && typeAnnotation.startsWith("{")) {
|
|
2652
|
+
resolvedTypeMap = parseInlineTypeAnnotation(typeAnnotation);
|
|
2653
|
+
}
|
|
2654
|
+
const propEntries = splitDestructureBlock(destructureBlock);
|
|
2655
|
+
for (const entry of propEntries) {
|
|
2656
|
+
const trimmed = entry.trim();
|
|
2657
|
+
if (!trimmed || trimmed.startsWith("...")) continue;
|
|
2658
|
+
let propName;
|
|
2659
|
+
let defaultValue;
|
|
2660
|
+
const renameMatch = trimmed.match(/^(\w+)\s*:\s*\w+\s*(?:=\s*([\s\S]+))?$/);
|
|
2661
|
+
if (renameMatch) {
|
|
2662
|
+
propName = renameMatch[1];
|
|
2663
|
+
defaultValue = renameMatch[2]?.trim();
|
|
2664
|
+
} else {
|
|
2665
|
+
const defaultMatch = trimmed.match(/^(\w+)\s*=\s*([\s\S]+)$/);
|
|
2666
|
+
if (defaultMatch) {
|
|
2667
|
+
propName = defaultMatch[1];
|
|
2668
|
+
defaultValue = defaultMatch[2]?.trim();
|
|
2669
|
+
} else {
|
|
2670
|
+
propName = trimmed.match(/^(\w+)/)?.[1] ?? trimmed;
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2673
|
+
const propType = resolvedTypeMap?.get(propName);
|
|
2674
|
+
props.push({
|
|
2675
|
+
name: propName,
|
|
2676
|
+
...propType ? { type: propType } : {},
|
|
2677
|
+
...defaultValue ? { default: defaultValue } : {}
|
|
2678
|
+
});
|
|
2679
|
+
}
|
|
2680
|
+
}
|
|
2681
|
+
return { description, props };
|
|
2682
|
+
}
|
|
2683
|
+
function splitDestructureBlock(block) {
|
|
2684
|
+
const entries = [];
|
|
2685
|
+
let depth = 0;
|
|
2686
|
+
let current = "";
|
|
2687
|
+
for (const ch of block) {
|
|
2688
|
+
if (ch === "{" || ch === "[" || ch === "(") {
|
|
2689
|
+
depth++;
|
|
2690
|
+
current += ch;
|
|
2691
|
+
} else if (ch === "}" || ch === "]" || ch === ")") {
|
|
2692
|
+
depth--;
|
|
2693
|
+
current += ch;
|
|
2694
|
+
} else if (ch === "," && depth === 0) {
|
|
2695
|
+
entries.push(current);
|
|
2696
|
+
current = "";
|
|
2697
|
+
} else {
|
|
2698
|
+
current += ch;
|
|
2699
|
+
}
|
|
2700
|
+
}
|
|
2701
|
+
if (current.trim()) entries.push(current);
|
|
2702
|
+
return entries;
|
|
2703
|
+
}
|
|
2704
|
+
function resolveTypeReference(source, typeName) {
|
|
2705
|
+
const startRe = new RegExp(`(?:interface\\s+${typeName}\\s*|type\\s+${typeName}\\s*=\\s*)\\{`);
|
|
2706
|
+
const startMatch = source.match(startRe);
|
|
2707
|
+
if (!startMatch || startMatch.index === void 0) return void 0;
|
|
2708
|
+
const bodyStart = startMatch.index + startMatch[0].length;
|
|
2709
|
+
let depth = 1;
|
|
2710
|
+
let i = bodyStart;
|
|
2711
|
+
while (i < source.length && depth > 0) {
|
|
2712
|
+
if (source[i] === "{") depth++;
|
|
2713
|
+
else if (source[i] === "}") depth--;
|
|
2714
|
+
i++;
|
|
2715
|
+
}
|
|
2716
|
+
if (depth !== 0) return void 0;
|
|
2717
|
+
const body = source.slice(bodyStart, i - 1);
|
|
2718
|
+
return parseTypeMembers(body);
|
|
2719
|
+
}
|
|
2720
|
+
function parseInlineTypeAnnotation(annotation) {
|
|
2721
|
+
const inner = annotation.replace(/^\{/, "").replace(/\}$/, "");
|
|
2722
|
+
return parseTypeMembers(inner);
|
|
2723
|
+
}
|
|
2724
|
+
function parseTypeMembers(body) {
|
|
2725
|
+
const map = /* @__PURE__ */ new Map();
|
|
2726
|
+
const members = body.split(/[;\n]/).map((m) => m.trim()).filter(Boolean);
|
|
2727
|
+
for (const member of members) {
|
|
2728
|
+
const memberMatch = member.match(/^(\w+)\??\s*:\s*(.+)$/);
|
|
2729
|
+
if (memberMatch) {
|
|
2730
|
+
map.set(memberMatch[1], memberMatch[2].replace(/,\s*$/, "").trim());
|
|
2731
|
+
}
|
|
2732
|
+
}
|
|
2733
|
+
return map;
|
|
2734
|
+
}
|
|
2735
|
+
function buildComponentMarkdown(componentName, meta) {
|
|
2736
|
+
if (!meta.description && meta.props.length === 0) return "";
|
|
2737
|
+
const parts = [`${componentName} component.`];
|
|
2738
|
+
if (meta.description) {
|
|
2739
|
+
parts.push(meta.description);
|
|
2740
|
+
}
|
|
2741
|
+
if (meta.props.length > 0) {
|
|
2742
|
+
const propEntries = meta.props.map((p) => {
|
|
2743
|
+
let entry = p.name;
|
|
2744
|
+
if (p.type) entry += ` (${p.type})`;
|
|
2745
|
+
if (p.default) entry += ` default: ${p.default}`;
|
|
2746
|
+
return entry;
|
|
2747
|
+
});
|
|
2748
|
+
parts.push(`Props: ${propEntries.join(", ")}.`);
|
|
2749
|
+
}
|
|
2750
|
+
return parts.join(" ");
|
|
2751
|
+
}
|
|
1922
2752
|
function normalizeSvelteToMarkdown(source) {
|
|
1923
2753
|
return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
|
|
1924
2754
|
}
|
|
@@ -1938,12 +2768,26 @@ async function loadContentFilesPages(cwd, config, maxPages) {
|
|
|
1938
2768
|
const pages = [];
|
|
1939
2769
|
for (const filePath of selected) {
|
|
1940
2770
|
const raw = await fs5.readFile(filePath, "utf8");
|
|
1941
|
-
|
|
2771
|
+
let markdown;
|
|
2772
|
+
let tags;
|
|
2773
|
+
if (filePath.endsWith(".md")) {
|
|
2774
|
+
markdown = raw;
|
|
2775
|
+
} else if (isSvelteComponentFile(filePath)) {
|
|
2776
|
+
const componentName = path7.basename(filePath, ".svelte");
|
|
2777
|
+
const meta = extractSvelteComponentMeta(raw);
|
|
2778
|
+
const componentMarkdown = buildComponentMarkdown(componentName, meta);
|
|
2779
|
+
const templateContent = normalizeSvelteToMarkdown(raw);
|
|
2780
|
+
markdown = componentMarkdown ? [componentMarkdown, templateContent].filter(Boolean).join("\n\n") : templateContent;
|
|
2781
|
+
tags = ["component"];
|
|
2782
|
+
} else {
|
|
2783
|
+
markdown = normalizeSvelteToMarkdown(raw);
|
|
2784
|
+
}
|
|
1942
2785
|
pages.push({
|
|
1943
2786
|
url: filePathToUrl(filePath, baseDir),
|
|
1944
2787
|
markdown,
|
|
1945
2788
|
sourcePath: path7.relative(cwd, filePath).replace(/\\/g, "/"),
|
|
1946
|
-
outgoingLinks: []
|
|
2789
|
+
outgoingLinks: [],
|
|
2790
|
+
...tags ? { tags } : {}
|
|
1947
2791
|
});
|
|
1948
2792
|
}
|
|
1949
2793
|
return pages;
|
|
@@ -1958,9 +2802,9 @@ function extractLocs(xml) {
|
|
|
1958
2802
|
const $ = cheerioLoad2(xml, { xmlMode: true });
|
|
1959
2803
|
const locs = [];
|
|
1960
2804
|
$("loc").each((_i, el) => {
|
|
1961
|
-
const
|
|
1962
|
-
if (
|
|
1963
|
-
locs.push(
|
|
2805
|
+
const text2 = $(el).text().trim();
|
|
2806
|
+
if (text2) {
|
|
2807
|
+
locs.push(text2);
|
|
1964
2808
|
}
|
|
1965
2809
|
});
|
|
1966
2810
|
return locs;
|
|
@@ -2175,32 +3019,68 @@ function nonNegativeOrZero(value) {
|
|
|
2175
3019
|
}
|
|
2176
3020
|
return Math.max(0, value);
|
|
2177
3021
|
}
|
|
2178
|
-
function normalizeForTitleMatch(
|
|
2179
|
-
return
|
|
3022
|
+
function normalizeForTitleMatch(text2) {
|
|
3023
|
+
return text2.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
|
|
2180
3024
|
}
|
|
2181
|
-
function rankHits(hits, config, query) {
|
|
3025
|
+
function rankHits(hits, config, query, debug) {
|
|
2182
3026
|
const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
|
|
2183
3027
|
const titleMatchWeight = config.ranking.weights.titleMatch;
|
|
2184
3028
|
return hits.map((hit) => {
|
|
2185
|
-
|
|
3029
|
+
const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
|
|
3030
|
+
let score = baseScore;
|
|
3031
|
+
let incomingLinkBoostValue = 0;
|
|
2186
3032
|
if (config.ranking.enableIncomingLinkBoost) {
|
|
2187
3033
|
const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
|
|
2188
|
-
|
|
3034
|
+
incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
|
|
3035
|
+
score += incomingLinkBoostValue;
|
|
2189
3036
|
}
|
|
3037
|
+
let depthBoostValue = 0;
|
|
2190
3038
|
if (config.ranking.enableDepthBoost) {
|
|
2191
3039
|
const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
|
|
2192
|
-
|
|
3040
|
+
depthBoostValue = depthBoost * config.ranking.weights.depth;
|
|
3041
|
+
score += depthBoostValue;
|
|
2193
3042
|
}
|
|
3043
|
+
let titleMatchBoostValue = 0;
|
|
2194
3044
|
if (normalizedQuery && titleMatchWeight > 0) {
|
|
2195
3045
|
const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
|
|
2196
3046
|
if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
|
|
2197
|
-
|
|
3047
|
+
titleMatchBoostValue = titleMatchWeight;
|
|
3048
|
+
score += titleMatchBoostValue;
|
|
2198
3049
|
}
|
|
2199
3050
|
}
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
3051
|
+
let freshnessBoostValue = 0;
|
|
3052
|
+
if (config.ranking.enableFreshnessBoost) {
|
|
3053
|
+
const publishedAt = hit.metadata.publishedAt;
|
|
3054
|
+
if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
|
|
3055
|
+
const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
|
|
3056
|
+
const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
|
|
3057
|
+
freshnessBoostValue = decay * config.ranking.weights.freshness;
|
|
3058
|
+
score += freshnessBoostValue;
|
|
3059
|
+
}
|
|
3060
|
+
}
|
|
3061
|
+
let anchorTextMatchBoostValue = 0;
|
|
3062
|
+
if (config.ranking.enableAnchorTextBoost && normalizedQuery && config.ranking.weights.anchorText > 0) {
|
|
3063
|
+
const normalizedAnchorText = normalizeForTitleMatch(hit.metadata.incomingAnchorText ?? "");
|
|
3064
|
+
if (normalizedAnchorText.length > 0 && normalizedQuery.length > 0 && (normalizedAnchorText.includes(normalizedQuery) || normalizedQuery.includes(normalizedAnchorText))) {
|
|
3065
|
+
anchorTextMatchBoostValue = config.ranking.weights.anchorText;
|
|
3066
|
+
score += anchorTextMatchBoostValue;
|
|
3067
|
+
}
|
|
3068
|
+
}
|
|
3069
|
+
const result = {
|
|
3070
|
+
hit,
|
|
3071
|
+
finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
|
|
3072
|
+
};
|
|
3073
|
+
if (debug) {
|
|
3074
|
+
result.breakdown = {
|
|
3075
|
+
baseScore,
|
|
3076
|
+
incomingLinkBoost: incomingLinkBoostValue,
|
|
3077
|
+
depthBoost: depthBoostValue,
|
|
3078
|
+
titleMatchBoost: titleMatchBoostValue,
|
|
3079
|
+
freshnessBoost: freshnessBoostValue,
|
|
3080
|
+
anchorTextMatchBoost: anchorTextMatchBoostValue
|
|
3081
|
+
};
|
|
3082
|
+
}
|
|
3083
|
+
return result;
|
|
2204
3084
|
}).sort((a, b) => {
|
|
2205
3085
|
const delta = b.finalScore - a.finalScore;
|
|
2206
3086
|
return Number.isNaN(delta) ? 0 : delta;
|
|
@@ -2209,12 +3089,13 @@ function rankHits(hits, config, query) {
|
|
|
2209
3089
|
function trimByScoreGap(results, config) {
|
|
2210
3090
|
if (results.length === 0) return results;
|
|
2211
3091
|
const threshold = config.ranking.scoreGapThreshold;
|
|
2212
|
-
const
|
|
2213
|
-
if (
|
|
2214
|
-
const
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
3092
|
+
const minScoreRatio = config.ranking.minScoreRatio;
|
|
3093
|
+
if (minScoreRatio > 0 && results.length > 0) {
|
|
3094
|
+
const topScore = results[0].pageScore;
|
|
3095
|
+
if (Number.isFinite(topScore) && topScore > 0) {
|
|
3096
|
+
const minThreshold = topScore * minScoreRatio;
|
|
3097
|
+
results = results.filter((r) => r.pageScore >= minThreshold);
|
|
3098
|
+
}
|
|
2218
3099
|
}
|
|
2219
3100
|
if (threshold > 0 && results.length > 1) {
|
|
2220
3101
|
for (let i = 1; i < results.length; i++) {
|
|
@@ -2284,61 +3165,99 @@ function aggregateByPage(ranked, config) {
|
|
|
2284
3165
|
return Number.isNaN(delta) ? 0 : delta;
|
|
2285
3166
|
});
|
|
2286
3167
|
}
|
|
2287
|
-
function
|
|
2288
|
-
|
|
2289
|
-
const
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
if (pageHit) {
|
|
2299
|
-
pagesWithChunks.add(url);
|
|
2300
|
-
const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
|
|
2301
|
-
return {
|
|
2302
|
-
hit: ranked.hit,
|
|
2303
|
-
finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
|
|
2304
|
-
};
|
|
3168
|
+
function rankPageHits(pageHits, config, query, debug) {
|
|
3169
|
+
const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
|
|
3170
|
+
const titleMatchWeight = config.ranking.weights.titleMatch;
|
|
3171
|
+
return pageHits.map((hit) => {
|
|
3172
|
+
const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
|
|
3173
|
+
let score = baseScore;
|
|
3174
|
+
let incomingLinkBoostValue = 0;
|
|
3175
|
+
if (config.ranking.enableIncomingLinkBoost) {
|
|
3176
|
+
const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.incomingLinks));
|
|
3177
|
+
incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
|
|
3178
|
+
score += incomingLinkBoostValue;
|
|
2305
3179
|
}
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
routeFile: pageHit.routeFile,
|
|
2329
|
-
tags: pageHit.tags
|
|
3180
|
+
let depthBoostValue = 0;
|
|
3181
|
+
if (config.ranking.enableDepthBoost) {
|
|
3182
|
+
const depthBoost = 1 / (1 + nonNegativeOrZero(hit.depth));
|
|
3183
|
+
depthBoostValue = depthBoost * config.ranking.weights.depth;
|
|
3184
|
+
score += depthBoostValue;
|
|
3185
|
+
}
|
|
3186
|
+
let titleMatchBoostValue = 0;
|
|
3187
|
+
if (normalizedQuery && titleMatchWeight > 0) {
|
|
3188
|
+
const normalizedTitle = normalizeForTitleMatch(hit.title);
|
|
3189
|
+
if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
|
|
3190
|
+
titleMatchBoostValue = titleMatchWeight;
|
|
3191
|
+
score += titleMatchBoostValue;
|
|
3192
|
+
}
|
|
3193
|
+
}
|
|
3194
|
+
let freshnessBoostValue = 0;
|
|
3195
|
+
if (config.ranking.enableFreshnessBoost) {
|
|
3196
|
+
const publishedAt = hit.publishedAt;
|
|
3197
|
+
if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
|
|
3198
|
+
const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
|
|
3199
|
+
const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
|
|
3200
|
+
freshnessBoostValue = decay * config.ranking.weights.freshness;
|
|
3201
|
+
score += freshnessBoostValue;
|
|
2330
3202
|
}
|
|
3203
|
+
}
|
|
3204
|
+
const pageWeight = findPageWeight(hit.url, config.ranking.pageWeights);
|
|
3205
|
+
if (pageWeight !== 1) {
|
|
3206
|
+
score *= pageWeight;
|
|
3207
|
+
}
|
|
3208
|
+
const result = {
|
|
3209
|
+
url: hit.url,
|
|
3210
|
+
title: hit.title,
|
|
3211
|
+
description: hit.description,
|
|
3212
|
+
routeFile: hit.routeFile,
|
|
3213
|
+
depth: hit.depth,
|
|
3214
|
+
incomingLinks: hit.incomingLinks,
|
|
3215
|
+
tags: hit.tags,
|
|
3216
|
+
baseScore,
|
|
3217
|
+
finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY,
|
|
3218
|
+
publishedAt: hit.publishedAt
|
|
2331
3219
|
};
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
3220
|
+
if (debug) {
|
|
3221
|
+
result.breakdown = {
|
|
3222
|
+
baseScore,
|
|
3223
|
+
pageWeight,
|
|
3224
|
+
incomingLinkBoost: incomingLinkBoostValue,
|
|
3225
|
+
depthBoost: depthBoostValue,
|
|
3226
|
+
titleMatchBoost: titleMatchBoostValue,
|
|
3227
|
+
freshnessBoost: freshnessBoostValue
|
|
3228
|
+
};
|
|
3229
|
+
}
|
|
3230
|
+
return result;
|
|
3231
|
+
}).filter((p) => findPageWeight(p.url, config.ranking.pageWeights) !== 0).sort((a, b) => {
|
|
2338
3232
|
const delta = b.finalScore - a.finalScore;
|
|
2339
3233
|
return Number.isNaN(delta) ? 0 : delta;
|
|
2340
3234
|
});
|
|
2341
3235
|
}
|
|
3236
|
+
function trimPagesByScoreGap(results, config) {
|
|
3237
|
+
if (results.length === 0) return results;
|
|
3238
|
+
const threshold = config.ranking.scoreGapThreshold;
|
|
3239
|
+
const minScoreRatio = config.ranking.minScoreRatio;
|
|
3240
|
+
if (minScoreRatio > 0 && results.length > 0) {
|
|
3241
|
+
const topScore = results[0].finalScore;
|
|
3242
|
+
if (Number.isFinite(topScore) && topScore > 0) {
|
|
3243
|
+
const minThreshold = topScore * minScoreRatio;
|
|
3244
|
+
results = results.filter((r) => r.finalScore >= minThreshold);
|
|
3245
|
+
}
|
|
3246
|
+
}
|
|
3247
|
+
if (threshold > 0 && results.length > 1) {
|
|
3248
|
+
for (let i = 1; i < results.length; i++) {
|
|
3249
|
+
const prev = results[i - 1].finalScore;
|
|
3250
|
+
const current = results[i].finalScore;
|
|
3251
|
+
if (prev > 0) {
|
|
3252
|
+
const gap = (prev - current) / prev;
|
|
3253
|
+
if (gap >= threshold) {
|
|
3254
|
+
return results.slice(0, i);
|
|
3255
|
+
}
|
|
3256
|
+
}
|
|
3257
|
+
}
|
|
3258
|
+
}
|
|
3259
|
+
return results;
|
|
3260
|
+
}
|
|
2342
3261
|
|
|
2343
3262
|
// src/utils/time.ts
|
|
2344
3263
|
function nowIso() {
|
|
@@ -2348,6 +3267,85 @@ function hrTimeMs(start) {
|
|
|
2348
3267
|
return Number(process.hrtime.bigint() - start) / 1e6;
|
|
2349
3268
|
}
|
|
2350
3269
|
|
|
3270
|
+
// src/indexing/llms-txt.ts
|
|
3271
|
+
import fs8 from "fs/promises";
|
|
3272
|
+
import path10 from "path";
|
|
3273
|
+
function resolvePageUrl(pageUrl, baseUrl) {
|
|
3274
|
+
if (!baseUrl) return pageUrl;
|
|
3275
|
+
try {
|
|
3276
|
+
return new URL(pageUrl, baseUrl).href;
|
|
3277
|
+
} catch {
|
|
3278
|
+
return pageUrl;
|
|
3279
|
+
}
|
|
3280
|
+
}
|
|
3281
|
+
function generateLlmsTxt(pages, config) {
|
|
3282
|
+
const title = config.llmsTxt.title ?? config.project.id;
|
|
3283
|
+
const description = config.llmsTxt.description;
|
|
3284
|
+
const baseUrl = config.project.baseUrl;
|
|
3285
|
+
const lines = [`# ${title}`];
|
|
3286
|
+
if (description) {
|
|
3287
|
+
lines.push("", `> ${description}`);
|
|
3288
|
+
}
|
|
3289
|
+
const filtered = pages.filter(
|
|
3290
|
+
(p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
|
|
3291
|
+
);
|
|
3292
|
+
const sorted = [...filtered].sort((a, b) => {
|
|
3293
|
+
if (a.depth !== b.depth) return a.depth - b.depth;
|
|
3294
|
+
return b.incomingLinks - a.incomingLinks;
|
|
3295
|
+
});
|
|
3296
|
+
if (sorted.length > 0) {
|
|
3297
|
+
lines.push("", "## Pages", "");
|
|
3298
|
+
for (const page of sorted) {
|
|
3299
|
+
const url = resolvePageUrl(page.url, baseUrl);
|
|
3300
|
+
if (page.description) {
|
|
3301
|
+
lines.push(`- [${page.title}](${url}): ${page.description}`);
|
|
3302
|
+
} else {
|
|
3303
|
+
lines.push(`- [${page.title}](${url})`);
|
|
3304
|
+
}
|
|
3305
|
+
}
|
|
3306
|
+
}
|
|
3307
|
+
lines.push("");
|
|
3308
|
+
return lines.join("\n");
|
|
3309
|
+
}
|
|
3310
|
+
function generateLlmsFullTxt(pages, config) {
|
|
3311
|
+
const title = config.llmsTxt.title ?? config.project.id;
|
|
3312
|
+
const description = config.llmsTxt.description;
|
|
3313
|
+
const baseUrl = config.project.baseUrl;
|
|
3314
|
+
const lines = [`# ${title}`];
|
|
3315
|
+
if (description) {
|
|
3316
|
+
lines.push("", `> ${description}`);
|
|
3317
|
+
}
|
|
3318
|
+
const filtered = pages.filter(
|
|
3319
|
+
(p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
|
|
3320
|
+
);
|
|
3321
|
+
const sorted = [...filtered].sort((a, b) => {
|
|
3322
|
+
if (a.depth !== b.depth) return a.depth - b.depth;
|
|
3323
|
+
return b.incomingLinks - a.incomingLinks;
|
|
3324
|
+
});
|
|
3325
|
+
for (const page of sorted) {
|
|
3326
|
+
const url = resolvePageUrl(page.url, baseUrl);
|
|
3327
|
+
lines.push("", "---", "", `## [${page.title}](${url})`, "");
|
|
3328
|
+
lines.push(page.markdown.trim());
|
|
3329
|
+
}
|
|
3330
|
+
lines.push("");
|
|
3331
|
+
return lines.join("\n");
|
|
3332
|
+
}
|
|
3333
|
+
async function writeLlmsTxt(pages, config, cwd, logger3) {
|
|
3334
|
+
const outputPath = path10.resolve(cwd, config.llmsTxt.outputPath);
|
|
3335
|
+
const outputDir = path10.dirname(outputPath);
|
|
3336
|
+
await fs8.mkdir(outputDir, { recursive: true });
|
|
3337
|
+
const content = generateLlmsTxt(pages, config);
|
|
3338
|
+
await fs8.writeFile(outputPath, content, "utf8");
|
|
3339
|
+
logger3.info(`Generated llms.txt at ${config.llmsTxt.outputPath}`);
|
|
3340
|
+
if (config.llmsTxt.generateFull) {
|
|
3341
|
+
const fullPath = outputPath.replace(/\.txt$/, "-full.txt");
|
|
3342
|
+
const fullContent = generateLlmsFullTxt(pages, config);
|
|
3343
|
+
await fs8.writeFile(fullPath, fullContent, "utf8");
|
|
3344
|
+
const relativeFull = path10.relative(cwd, fullPath);
|
|
3345
|
+
logger3.info(`Generated llms-full.txt at ${relativeFull}`);
|
|
3346
|
+
}
|
|
3347
|
+
}
|
|
3348
|
+
|
|
2351
3349
|
// src/indexing/pipeline.ts
|
|
2352
3350
|
function buildPageSummary(page, maxChars = 3500) {
|
|
2353
3351
|
const parts = [page.title];
|
|
@@ -2365,26 +3363,44 @@ function buildPageSummary(page, maxChars = 3500) {
|
|
|
2365
3363
|
if (joined.length <= maxChars) return joined;
|
|
2366
3364
|
return joined.slice(0, maxChars).trim();
|
|
2367
3365
|
}
|
|
3366
|
+
function buildPageContentHash(page) {
|
|
3367
|
+
const parts = [
|
|
3368
|
+
page.title,
|
|
3369
|
+
page.description ?? "",
|
|
3370
|
+
(page.keywords ?? []).slice().sort().join(","),
|
|
3371
|
+
page.tags.slice().sort().join(","),
|
|
3372
|
+
page.markdown,
|
|
3373
|
+
String(page.outgoingLinks),
|
|
3374
|
+
String(page.publishedAt ?? ""),
|
|
3375
|
+
page.incomingAnchorText ?? "",
|
|
3376
|
+
(page.outgoingLinkUrls ?? []).slice().sort().join(","),
|
|
3377
|
+
page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : ""
|
|
3378
|
+
];
|
|
3379
|
+
return sha256(parts.join("|"));
|
|
3380
|
+
}
|
|
2368
3381
|
var IndexPipeline = class _IndexPipeline {
|
|
2369
3382
|
cwd;
|
|
2370
3383
|
config;
|
|
2371
3384
|
store;
|
|
2372
3385
|
logger;
|
|
3386
|
+
hooks;
|
|
2373
3387
|
constructor(options) {
|
|
2374
3388
|
this.cwd = options.cwd;
|
|
2375
3389
|
this.config = options.config;
|
|
2376
3390
|
this.store = options.store;
|
|
2377
3391
|
this.logger = options.logger;
|
|
3392
|
+
this.hooks = options.hooks;
|
|
2378
3393
|
}
|
|
2379
3394
|
static async create(options = {}) {
|
|
2380
|
-
const cwd =
|
|
3395
|
+
const cwd = path11.resolve(options.cwd ?? process.cwd());
|
|
2381
3396
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
2382
3397
|
const store = options.store ?? await createUpstashStore(config);
|
|
2383
3398
|
return new _IndexPipeline({
|
|
2384
3399
|
cwd,
|
|
2385
3400
|
config,
|
|
2386
3401
|
store,
|
|
2387
|
-
logger: options.logger ?? new Logger()
|
|
3402
|
+
logger: options.logger ?? new Logger(),
|
|
3403
|
+
hooks: options.hooks ?? {}
|
|
2388
3404
|
});
|
|
2389
3405
|
}
|
|
2390
3406
|
getConfig() {
|
|
@@ -2405,7 +3421,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2405
3421
|
const scope = resolveScope(this.config, options.scopeOverride);
|
|
2406
3422
|
ensureStateDirs(this.cwd, this.config.state.dir, scope);
|
|
2407
3423
|
const sourceMode = options.sourceOverride ?? this.config.source.mode;
|
|
2408
|
-
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-
|
|
3424
|
+
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-vector)`);
|
|
2409
3425
|
if (options.force) {
|
|
2410
3426
|
this.logger.info("Force mode enabled \u2014 full rebuild");
|
|
2411
3427
|
}
|
|
@@ -2413,9 +3429,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2413
3429
|
this.logger.info("Dry run \u2014 no writes will be performed");
|
|
2414
3430
|
}
|
|
2415
3431
|
const manifestStart = stageStart();
|
|
2416
|
-
const
|
|
3432
|
+
const existingPageHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getPageHashes(scope);
|
|
2417
3433
|
stageEnd("manifest", manifestStart);
|
|
2418
|
-
this.logger.debug(`Manifest: ${
|
|
3434
|
+
this.logger.debug(`Manifest: ${existingPageHashes.size} existing page hashes loaded`);
|
|
2419
3435
|
const sourceStart = stageStart();
|
|
2420
3436
|
this.logger.info(`Loading pages (source: ${sourceMode})...`);
|
|
2421
3437
|
let sourcePages;
|
|
@@ -2451,11 +3467,11 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2451
3467
|
let robotsRules = null;
|
|
2452
3468
|
if (sourceMode === "static-output") {
|
|
2453
3469
|
robotsRules = await loadRobotsTxtFromDir(
|
|
2454
|
-
|
|
3470
|
+
path11.resolve(this.cwd, this.config.source.staticOutputDir)
|
|
2455
3471
|
);
|
|
2456
3472
|
} else if (sourceMode === "build" && this.config.source.build) {
|
|
2457
3473
|
robotsRules = await loadRobotsTxtFromDir(
|
|
2458
|
-
|
|
3474
|
+
path11.resolve(this.cwd, this.config.source.build.outputDir)
|
|
2459
3475
|
);
|
|
2460
3476
|
} else if (sourceMode === "crawl" && this.config.source.crawl) {
|
|
2461
3477
|
robotsRules = await fetchRobotsTxt(this.config.source.crawl.baseUrl);
|
|
@@ -2492,11 +3508,61 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2492
3508
|
);
|
|
2493
3509
|
continue;
|
|
2494
3510
|
}
|
|
2495
|
-
|
|
3511
|
+
if (sourcePage.tags && sourcePage.tags.length > 0) {
|
|
3512
|
+
extracted.tags = [.../* @__PURE__ */ new Set([...extracted.tags, ...sourcePage.tags])];
|
|
3513
|
+
}
|
|
3514
|
+
let accepted;
|
|
3515
|
+
if (this.hooks.transformPage) {
|
|
3516
|
+
const transformed = await this.hooks.transformPage(extracted);
|
|
3517
|
+
if (transformed === null) {
|
|
3518
|
+
this.logger.debug(`Page ${sourcePage.url} skipped by transformPage hook`);
|
|
3519
|
+
continue;
|
|
3520
|
+
}
|
|
3521
|
+
accepted = transformed;
|
|
3522
|
+
} else {
|
|
3523
|
+
accepted = extracted;
|
|
3524
|
+
}
|
|
3525
|
+
extractedPages.push(accepted);
|
|
2496
3526
|
this.logger.event("page_extracted", {
|
|
2497
|
-
url:
|
|
3527
|
+
url: accepted.url
|
|
2498
3528
|
});
|
|
2499
3529
|
}
|
|
3530
|
+
const customRecords = options.customRecords ?? [];
|
|
3531
|
+
if (customRecords.length > 0) {
|
|
3532
|
+
this.logger.info(`Processing ${customRecords.length} custom record${customRecords.length === 1 ? "" : "s"}...`);
|
|
3533
|
+
for (const record of customRecords) {
|
|
3534
|
+
const normalizedUrl = normalizeUrlPath(record.url);
|
|
3535
|
+
const normalized = normalizeMarkdown(record.content);
|
|
3536
|
+
if (!normalized.trim()) {
|
|
3537
|
+
this.logger.warn(`Custom record ${normalizedUrl} has empty content and was skipped.`);
|
|
3538
|
+
continue;
|
|
3539
|
+
}
|
|
3540
|
+
const urlTags = normalizedUrl.split("/").filter(Boolean).slice(0, 1);
|
|
3541
|
+
const tags = record.tags ? [.../* @__PURE__ */ new Set([...urlTags, ...record.tags])] : urlTags;
|
|
3542
|
+
const extracted = {
|
|
3543
|
+
url: normalizedUrl,
|
|
3544
|
+
title: record.title,
|
|
3545
|
+
markdown: normalized,
|
|
3546
|
+
outgoingLinks: [],
|
|
3547
|
+
noindex: false,
|
|
3548
|
+
tags,
|
|
3549
|
+
weight: record.weight
|
|
3550
|
+
};
|
|
3551
|
+
let accepted;
|
|
3552
|
+
if (this.hooks.transformPage) {
|
|
3553
|
+
const transformed = await this.hooks.transformPage(extracted);
|
|
3554
|
+
if (transformed === null) {
|
|
3555
|
+
this.logger.debug(`Custom record ${normalizedUrl} skipped by transformPage hook`);
|
|
3556
|
+
continue;
|
|
3557
|
+
}
|
|
3558
|
+
accepted = transformed;
|
|
3559
|
+
} else {
|
|
3560
|
+
accepted = extracted;
|
|
3561
|
+
}
|
|
3562
|
+
extractedPages.push(accepted);
|
|
3563
|
+
this.logger.event("page_extracted", { url: accepted.url, custom: true });
|
|
3564
|
+
}
|
|
3565
|
+
}
|
|
2500
3566
|
extractedPages.sort((a, b) => a.url.localeCompare(b.url));
|
|
2501
3567
|
const uniquePages = [];
|
|
2502
3568
|
const seenUrls = /* @__PURE__ */ new Set();
|
|
@@ -2529,15 +3595,28 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2529
3595
|
const linkStart = stageStart();
|
|
2530
3596
|
const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
|
|
2531
3597
|
const incomingLinkCount = /* @__PURE__ */ new Map();
|
|
3598
|
+
const incomingAnchorTexts = /* @__PURE__ */ new Map();
|
|
2532
3599
|
for (const page of indexablePages) {
|
|
2533
3600
|
incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
|
|
2534
3601
|
}
|
|
2535
3602
|
for (const page of indexablePages) {
|
|
2536
|
-
|
|
3603
|
+
const seenForCount = /* @__PURE__ */ new Set();
|
|
3604
|
+
const seenForAnchor = /* @__PURE__ */ new Set();
|
|
3605
|
+
for (const { url: outgoing, anchorText } of page.outgoingLinks) {
|
|
2537
3606
|
if (!pageSet.has(outgoing)) {
|
|
2538
3607
|
continue;
|
|
2539
3608
|
}
|
|
2540
|
-
|
|
3609
|
+
if (!seenForCount.has(outgoing)) {
|
|
3610
|
+
seenForCount.add(outgoing);
|
|
3611
|
+
incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
|
|
3612
|
+
}
|
|
3613
|
+
if (anchorText && !seenForAnchor.has(outgoing)) {
|
|
3614
|
+
seenForAnchor.add(outgoing);
|
|
3615
|
+
if (!incomingAnchorTexts.has(outgoing)) {
|
|
3616
|
+
incomingAnchorTexts.set(outgoing, /* @__PURE__ */ new Set());
|
|
3617
|
+
}
|
|
3618
|
+
incomingAnchorTexts.get(outgoing).add(anchorText);
|
|
3619
|
+
}
|
|
2541
3620
|
}
|
|
2542
3621
|
}
|
|
2543
3622
|
stageEnd("links", linkStart);
|
|
@@ -2556,6 +3635,15 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2556
3635
|
});
|
|
2557
3636
|
}
|
|
2558
3637
|
}
|
|
3638
|
+
for (const record of customRecords) {
|
|
3639
|
+
const normalizedUrl = normalizeUrlPath(record.url);
|
|
3640
|
+
if (!precomputedRoutes.has(normalizedUrl)) {
|
|
3641
|
+
precomputedRoutes.set(normalizedUrl, {
|
|
3642
|
+
routeFile: "",
|
|
3643
|
+
routeResolution: "exact"
|
|
3644
|
+
});
|
|
3645
|
+
}
|
|
3646
|
+
}
|
|
2559
3647
|
for (const page of indexablePages) {
|
|
2560
3648
|
const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
|
|
2561
3649
|
if (routeMatch.routeResolution === "best-effort") {
|
|
@@ -2573,6 +3661,17 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2573
3661
|
} else {
|
|
2574
3662
|
routeExact += 1;
|
|
2575
3663
|
}
|
|
3664
|
+
const anchorSet = incomingAnchorTexts.get(page.url);
|
|
3665
|
+
let incomingAnchorText;
|
|
3666
|
+
if (anchorSet && anchorSet.size > 0) {
|
|
3667
|
+
let joined = "";
|
|
3668
|
+
for (const phrase of anchorSet) {
|
|
3669
|
+
const next = joined ? `${joined} ${phrase}` : phrase;
|
|
3670
|
+
if (next.length > 500) break;
|
|
3671
|
+
joined = next;
|
|
3672
|
+
}
|
|
3673
|
+
incomingAnchorText = joined || void 0;
|
|
3674
|
+
}
|
|
2576
3675
|
const indexedPage = {
|
|
2577
3676
|
url: page.url,
|
|
2578
3677
|
title: page.title,
|
|
@@ -2582,40 +3681,113 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2582
3681
|
generatedAt: nowIso(),
|
|
2583
3682
|
incomingLinks: incomingLinkCount.get(page.url) ?? 0,
|
|
2584
3683
|
outgoingLinks: page.outgoingLinks.length,
|
|
3684
|
+
outgoingLinkUrls: page.outgoingLinks.map((l) => typeof l === "string" ? l : l.url),
|
|
2585
3685
|
depth: getUrlDepth(page.url),
|
|
2586
3686
|
tags: page.tags,
|
|
2587
3687
|
markdown: page.markdown,
|
|
2588
3688
|
description: page.description,
|
|
2589
|
-
keywords: page.keywords
|
|
3689
|
+
keywords: page.keywords,
|
|
3690
|
+
publishedAt: page.publishedAt,
|
|
3691
|
+
incomingAnchorText,
|
|
3692
|
+
meta: page.meta
|
|
2590
3693
|
};
|
|
2591
3694
|
pages.push(indexedPage);
|
|
2592
3695
|
this.logger.event("page_indexed", { url: page.url });
|
|
2593
3696
|
}
|
|
3697
|
+
const pageRecords = pages.map((p) => {
|
|
3698
|
+
const summary = buildPageSummary(p);
|
|
3699
|
+
return {
|
|
3700
|
+
url: p.url,
|
|
3701
|
+
title: p.title,
|
|
3702
|
+
markdown: p.markdown,
|
|
3703
|
+
projectId: scope.projectId,
|
|
3704
|
+
scopeName: scope.scopeName,
|
|
3705
|
+
routeFile: p.routeFile,
|
|
3706
|
+
routeResolution: p.routeResolution,
|
|
3707
|
+
incomingLinks: p.incomingLinks,
|
|
3708
|
+
outgoingLinks: p.outgoingLinks,
|
|
3709
|
+
outgoingLinkUrls: p.outgoingLinkUrls,
|
|
3710
|
+
depth: p.depth,
|
|
3711
|
+
tags: p.tags,
|
|
3712
|
+
indexedAt: p.generatedAt,
|
|
3713
|
+
summary,
|
|
3714
|
+
description: p.description,
|
|
3715
|
+
keywords: p.keywords,
|
|
3716
|
+
contentHash: buildPageContentHash(p),
|
|
3717
|
+
publishedAt: p.publishedAt,
|
|
3718
|
+
meta: p.meta
|
|
3719
|
+
};
|
|
3720
|
+
});
|
|
3721
|
+
const currentPageUrls = new Set(pageRecords.map((r) => r.url));
|
|
3722
|
+
const changedPages = pageRecords.filter(
|
|
3723
|
+
(r) => !existingPageHashes.has(r.url) || existingPageHashes.get(r.url) !== r.contentHash
|
|
3724
|
+
);
|
|
3725
|
+
const deletedPageUrls = [...existingPageHashes.keys()].filter((url) => !currentPageUrls.has(url));
|
|
2594
3726
|
if (!options.dryRun) {
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
|
|
2598
|
-
|
|
2599
|
-
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
3727
|
+
if (options.force) {
|
|
3728
|
+
await this.store.deletePages(scope);
|
|
3729
|
+
this.logger.info(`Upserting ${pageRecords.length} page summaries...`);
|
|
3730
|
+
const pageDocs = pageRecords.map((r) => ({
|
|
3731
|
+
id: r.url,
|
|
3732
|
+
data: r.summary ?? r.title,
|
|
3733
|
+
metadata: {
|
|
3734
|
+
title: r.title,
|
|
3735
|
+
url: r.url,
|
|
3736
|
+
description: r.description ?? "",
|
|
3737
|
+
keywords: r.keywords ?? [],
|
|
3738
|
+
summary: r.summary ?? "",
|
|
3739
|
+
tags: r.tags,
|
|
3740
|
+
markdown: r.markdown,
|
|
3741
|
+
routeFile: r.routeFile,
|
|
3742
|
+
routeResolution: r.routeResolution,
|
|
3743
|
+
incomingLinks: r.incomingLinks,
|
|
3744
|
+
outgoingLinks: r.outgoingLinks,
|
|
3745
|
+
outgoingLinkUrls: r.outgoingLinkUrls ?? [],
|
|
3746
|
+
depth: r.depth,
|
|
3747
|
+
indexedAt: r.indexedAt,
|
|
3748
|
+
contentHash: r.contentHash ?? "",
|
|
3749
|
+
publishedAt: r.publishedAt ?? null,
|
|
3750
|
+
...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
|
|
3751
|
+
}
|
|
3752
|
+
}));
|
|
3753
|
+
await this.store.upsertPages(pageDocs, scope);
|
|
3754
|
+
} else {
|
|
3755
|
+
if (changedPages.length > 0) {
|
|
3756
|
+
this.logger.info(`Upserting ${changedPages.length} changed page summaries...`);
|
|
3757
|
+
const pageDocs = changedPages.map((r) => ({
|
|
3758
|
+
id: r.url,
|
|
3759
|
+
data: r.summary ?? r.title,
|
|
3760
|
+
metadata: {
|
|
3761
|
+
title: r.title,
|
|
3762
|
+
url: r.url,
|
|
3763
|
+
description: r.description ?? "",
|
|
3764
|
+
keywords: r.keywords ?? [],
|
|
3765
|
+
summary: r.summary ?? "",
|
|
3766
|
+
tags: r.tags,
|
|
3767
|
+
markdown: r.markdown,
|
|
3768
|
+
routeFile: r.routeFile,
|
|
3769
|
+
routeResolution: r.routeResolution,
|
|
3770
|
+
incomingLinks: r.incomingLinks,
|
|
3771
|
+
outgoingLinks: r.outgoingLinks,
|
|
3772
|
+
outgoingLinkUrls: r.outgoingLinkUrls ?? [],
|
|
3773
|
+
depth: r.depth,
|
|
3774
|
+
indexedAt: r.indexedAt,
|
|
3775
|
+
contentHash: r.contentHash ?? "",
|
|
3776
|
+
publishedAt: r.publishedAt ?? null,
|
|
3777
|
+
...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
|
|
3778
|
+
}
|
|
3779
|
+
}));
|
|
3780
|
+
await this.store.upsertPages(pageDocs, scope);
|
|
3781
|
+
}
|
|
3782
|
+
if (deletedPageUrls.length > 0) {
|
|
3783
|
+
await this.store.deletePagesByIds(deletedPageUrls, scope);
|
|
3784
|
+
}
|
|
3785
|
+
}
|
|
2617
3786
|
}
|
|
3787
|
+
const pagesChanged = options.force ? pageRecords.length : changedPages.length;
|
|
3788
|
+
const pagesDeleted = deletedPageUrls.length;
|
|
2618
3789
|
stageEnd("pages", pagesStart);
|
|
3790
|
+
this.logger.info(`Page changes: ${pagesChanged} changed/new, ${pagesDeleted} deleted, ${pageRecords.length - changedPages.length} unchanged`);
|
|
2619
3791
|
this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
|
|
2620
3792
|
const chunkStart = stageStart();
|
|
2621
3793
|
this.logger.info("Chunking pages...");
|
|
@@ -2624,6 +3796,18 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2624
3796
|
if (typeof maxChunks === "number") {
|
|
2625
3797
|
chunks = chunks.slice(0, maxChunks);
|
|
2626
3798
|
}
|
|
3799
|
+
if (this.hooks.transformChunk) {
|
|
3800
|
+
const transformed = [];
|
|
3801
|
+
for (const chunk of chunks) {
|
|
3802
|
+
const result = await this.hooks.transformChunk(chunk);
|
|
3803
|
+
if (result === null) {
|
|
3804
|
+
this.logger.debug(`Chunk ${chunk.chunkKey} skipped by transformChunk hook`);
|
|
3805
|
+
continue;
|
|
3806
|
+
}
|
|
3807
|
+
transformed.push(result);
|
|
3808
|
+
}
|
|
3809
|
+
chunks = transformed;
|
|
3810
|
+
}
|
|
2627
3811
|
for (const chunk of chunks) {
|
|
2628
3812
|
this.logger.event("chunked", {
|
|
2629
3813
|
url: chunk.url,
|
|
@@ -2636,7 +3820,12 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2636
3820
|
for (const chunk of chunks) {
|
|
2637
3821
|
currentChunkMap.set(chunk.chunkKey, chunk);
|
|
2638
3822
|
}
|
|
2639
|
-
const
|
|
3823
|
+
const chunkHashStart = stageStart();
|
|
3824
|
+
const currentChunkKeys = chunks.map((c) => c.chunkKey);
|
|
3825
|
+
const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.fetchContentHashesForKeys(currentChunkKeys, scope);
|
|
3826
|
+
stageEnd("chunk_hashes", chunkHashStart);
|
|
3827
|
+
this.logger.debug(`Fetched ${existingHashes.size} existing chunk hashes for ${currentChunkKeys.length} current keys`);
|
|
3828
|
+
let changedChunks = chunks.filter((chunk) => {
|
|
2640
3829
|
if (options.force) {
|
|
2641
3830
|
return true;
|
|
2642
3831
|
}
|
|
@@ -2649,39 +3838,45 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2649
3838
|
}
|
|
2650
3839
|
return existingHash !== chunk.contentHash;
|
|
2651
3840
|
});
|
|
2652
|
-
const
|
|
3841
|
+
const existingChunkIds = options.force ? /* @__PURE__ */ new Set() : await this.store.scanChunkIds(scope);
|
|
3842
|
+
const deletes = [...existingChunkIds].filter((chunkKey) => !currentChunkMap.has(chunkKey));
|
|
3843
|
+
if (this.hooks.beforeIndex) {
|
|
3844
|
+
changedChunks = await this.hooks.beforeIndex(changedChunks);
|
|
3845
|
+
}
|
|
2653
3846
|
this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
|
|
2654
3847
|
const upsertStart = stageStart();
|
|
2655
3848
|
let documentsUpserted = 0;
|
|
2656
3849
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
2657
|
-
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash
|
|
2658
|
-
const UPSTASH_CONTENT_LIMIT = 4096;
|
|
2659
|
-
const FIELD_OVERHEAD = 200;
|
|
2660
|
-
const MAX_TEXT_CHARS = UPSTASH_CONTENT_LIMIT - FIELD_OVERHEAD;
|
|
3850
|
+
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
2661
3851
|
const docs = changedChunks.map((chunk) => {
|
|
2662
|
-
const
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
|
|
2669
|
-
const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
|
|
3852
|
+
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
3853
|
+
if (embeddingText.length > 2e3) {
|
|
3854
|
+
this.logger.warn(
|
|
3855
|
+
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
3856
|
+
);
|
|
3857
|
+
}
|
|
2670
3858
|
return {
|
|
2671
3859
|
id: chunk.chunkKey,
|
|
2672
|
-
|
|
3860
|
+
data: embeddingText,
|
|
2673
3861
|
metadata: {
|
|
2674
|
-
|
|
2675
|
-
scopeName: scope.scopeName,
|
|
3862
|
+
url: chunk.url,
|
|
2676
3863
|
path: chunk.path,
|
|
3864
|
+
title: chunk.title,
|
|
3865
|
+
sectionTitle: chunk.sectionTitle ?? "",
|
|
3866
|
+
headingPath: chunk.headingPath.join(" > "),
|
|
2677
3867
|
snippet: chunk.snippet,
|
|
3868
|
+
chunkText: embeddingText,
|
|
3869
|
+
tags: chunk.tags,
|
|
2678
3870
|
ordinal: chunk.ordinal,
|
|
2679
3871
|
contentHash: chunk.contentHash,
|
|
2680
3872
|
depth: chunk.depth,
|
|
2681
3873
|
incomingLinks: chunk.incomingLinks,
|
|
2682
3874
|
routeFile: chunk.routeFile,
|
|
2683
3875
|
description: chunk.description ?? "",
|
|
2684
|
-
keywords:
|
|
3876
|
+
keywords: chunk.keywords ?? [],
|
|
3877
|
+
publishedAt: chunk.publishedAt ?? null,
|
|
3878
|
+
incomingAnchorText: chunk.incomingAnchorText ?? "",
|
|
3879
|
+
...chunk.meta && Object.keys(chunk.meta).length > 0 ? { meta: chunk.meta } : {}
|
|
2685
3880
|
}
|
|
2686
3881
|
};
|
|
2687
3882
|
});
|
|
@@ -2699,9 +3894,16 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2699
3894
|
} else {
|
|
2700
3895
|
this.logger.info("No chunks to upsert \u2014 all up to date");
|
|
2701
3896
|
}
|
|
3897
|
+
if (this.config.llmsTxt.enable && !options.dryRun) {
|
|
3898
|
+
const llmsStart = stageStart();
|
|
3899
|
+
await writeLlmsTxt(pages, this.config, this.cwd, this.logger);
|
|
3900
|
+
stageEnd("llms_txt", llmsStart);
|
|
3901
|
+
}
|
|
2702
3902
|
this.logger.info("Done.");
|
|
2703
|
-
|
|
3903
|
+
const stats = {
|
|
2704
3904
|
pagesProcessed: pages.length,
|
|
3905
|
+
pagesChanged,
|
|
3906
|
+
pagesDeleted,
|
|
2705
3907
|
chunksTotal: chunks.length,
|
|
2706
3908
|
chunksChanged: changedChunks.length,
|
|
2707
3909
|
documentsUpserted,
|
|
@@ -2710,10 +3912,15 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2710
3912
|
routeBestEffort,
|
|
2711
3913
|
stageTimingsMs
|
|
2712
3914
|
};
|
|
3915
|
+
if (this.hooks.afterIndex) {
|
|
3916
|
+
await this.hooks.afterIndex(stats);
|
|
3917
|
+
}
|
|
3918
|
+
return stats;
|
|
2713
3919
|
}
|
|
2714
3920
|
};
|
|
2715
3921
|
|
|
2716
3922
|
// src/mcp/server.ts
|
|
3923
|
+
import { createHash as createHash2, timingSafeEqual } from "crypto";
|
|
2717
3924
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2718
3925
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
2719
3926
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
@@ -2721,16 +3928,139 @@ import { createMcpExpressApp } from "@modelcontextprotocol/sdk/server/express.js
|
|
|
2721
3928
|
import { z as z3 } from "zod";
|
|
2722
3929
|
|
|
2723
3930
|
// src/search/engine.ts
|
|
2724
|
-
import
|
|
3931
|
+
import path12 from "path";
|
|
2725
3932
|
import { z as z2 } from "zod";
|
|
3933
|
+
|
|
3934
|
+
// src/search/related-pages.ts
|
|
3935
|
+
function diceScore(urlA, urlB) {
|
|
3936
|
+
const segmentsA = urlA.split("/").filter(Boolean);
|
|
3937
|
+
const segmentsB = urlB.split("/").filter(Boolean);
|
|
3938
|
+
if (segmentsA.length === 0 && segmentsB.length === 0) return 1;
|
|
3939
|
+
if (segmentsA.length === 0 || segmentsB.length === 0) return 0;
|
|
3940
|
+
let shared = 0;
|
|
3941
|
+
const minLen = Math.min(segmentsA.length, segmentsB.length);
|
|
3942
|
+
for (let i = 0; i < minLen; i++) {
|
|
3943
|
+
if (segmentsA[i] === segmentsB[i]) {
|
|
3944
|
+
shared++;
|
|
3945
|
+
} else {
|
|
3946
|
+
break;
|
|
3947
|
+
}
|
|
3948
|
+
}
|
|
3949
|
+
return 2 * shared / (segmentsA.length + segmentsB.length);
|
|
3950
|
+
}
|
|
3951
|
+
function compositeScore(isLinked, dice, semantic) {
|
|
3952
|
+
return (isLinked ? 0.5 : 0) + 0.3 * dice + 0.2 * semantic;
|
|
3953
|
+
}
|
|
3954
|
+
function dominantRelationshipType(isOutgoing, isIncoming, dice) {
|
|
3955
|
+
if (isOutgoing) return "outgoing_link";
|
|
3956
|
+
if (isIncoming) return "incoming_link";
|
|
3957
|
+
if (dice > 0.4) return "sibling";
|
|
3958
|
+
return "semantic";
|
|
3959
|
+
}
|
|
3960
|
+
|
|
3961
|
+
// src/search/engine.ts
|
|
3962
|
+
var rankingOverridesSchema = z2.object({
|
|
3963
|
+
ranking: z2.object({
|
|
3964
|
+
enableIncomingLinkBoost: z2.boolean().optional(),
|
|
3965
|
+
enableDepthBoost: z2.boolean().optional(),
|
|
3966
|
+
aggregationCap: z2.number().int().positive().optional(),
|
|
3967
|
+
aggregationDecay: z2.number().min(0).max(1).optional(),
|
|
3968
|
+
minChunkScoreRatio: z2.number().min(0).max(1).optional(),
|
|
3969
|
+
minScoreRatio: z2.number().min(0).max(1).optional(),
|
|
3970
|
+
scoreGapThreshold: z2.number().min(0).max(1).optional(),
|
|
3971
|
+
weights: z2.object({
|
|
3972
|
+
incomingLinks: z2.number().optional(),
|
|
3973
|
+
depth: z2.number().optional(),
|
|
3974
|
+
aggregation: z2.number().optional(),
|
|
3975
|
+
titleMatch: z2.number().optional()
|
|
3976
|
+
}).optional()
|
|
3977
|
+
}).optional(),
|
|
3978
|
+
search: z2.object({
|
|
3979
|
+
pageSearchWeight: z2.number().min(0).max(1).optional()
|
|
3980
|
+
}).optional()
|
|
3981
|
+
}).optional();
|
|
2726
3982
|
var requestSchema = z2.object({
|
|
2727
3983
|
q: z2.string().trim().min(1),
|
|
2728
3984
|
topK: z2.number().int().positive().max(100).optional(),
|
|
2729
3985
|
scope: z2.string().optional(),
|
|
2730
3986
|
pathPrefix: z2.string().optional(),
|
|
2731
3987
|
tags: z2.array(z2.string()).optional(),
|
|
2732
|
-
|
|
3988
|
+
filters: z2.record(z2.string(), z2.union([z2.string(), z2.number(), z2.boolean()])).optional(),
|
|
3989
|
+
groupBy: z2.enum(["page", "chunk"]).optional(),
|
|
3990
|
+
maxSubResults: z2.number().int().positive().max(20).optional(),
|
|
3991
|
+
debug: z2.boolean().optional(),
|
|
3992
|
+
rankingOverrides: rankingOverridesSchema
|
|
2733
3993
|
});
|
|
3994
|
+
var MAX_SITE_STRUCTURE_PAGES = 2e3;
|
|
3995
|
+
function makeNode(url, depth) {
|
|
3996
|
+
return { url, title: "", depth, routeFile: "", isIndexed: false, childCount: 0, children: [] };
|
|
3997
|
+
}
|
|
3998
|
+
function buildTree(pages, pathPrefix) {
|
|
3999
|
+
const nodeMap = /* @__PURE__ */ new Map();
|
|
4000
|
+
const root = makeNode("/", 0);
|
|
4001
|
+
nodeMap.set("/", root);
|
|
4002
|
+
for (const page of pages) {
|
|
4003
|
+
const normalized = normalizeUrlPath(page.url);
|
|
4004
|
+
const segments = normalized.split("/").filter(Boolean);
|
|
4005
|
+
if (segments.length === 0) {
|
|
4006
|
+
root.title = page.title;
|
|
4007
|
+
root.routeFile = page.routeFile;
|
|
4008
|
+
root.isIndexed = true;
|
|
4009
|
+
continue;
|
|
4010
|
+
}
|
|
4011
|
+
for (let i = 1; i <= segments.length; i++) {
|
|
4012
|
+
const partialUrl = "/" + segments.slice(0, i).join("/");
|
|
4013
|
+
if (!nodeMap.has(partialUrl)) {
|
|
4014
|
+
nodeMap.set(partialUrl, makeNode(partialUrl, i));
|
|
4015
|
+
}
|
|
4016
|
+
}
|
|
4017
|
+
const node = nodeMap.get(normalized);
|
|
4018
|
+
node.title = page.title;
|
|
4019
|
+
node.routeFile = page.routeFile;
|
|
4020
|
+
node.isIndexed = true;
|
|
4021
|
+
}
|
|
4022
|
+
for (const [url, node] of nodeMap) {
|
|
4023
|
+
if (url === "/") continue;
|
|
4024
|
+
const segments = url.split("/").filter(Boolean);
|
|
4025
|
+
const parentUrl = segments.length === 1 ? "/" : "/" + segments.slice(0, -1).join("/");
|
|
4026
|
+
const parent = nodeMap.get(parentUrl) ?? root;
|
|
4027
|
+
parent.children.push(node);
|
|
4028
|
+
}
|
|
4029
|
+
const sortAndCount = (node) => {
|
|
4030
|
+
node.children.sort((a, b) => a.url.localeCompare(b.url));
|
|
4031
|
+
node.childCount = node.children.length;
|
|
4032
|
+
for (const child of node.children) {
|
|
4033
|
+
sortAndCount(child);
|
|
4034
|
+
}
|
|
4035
|
+
};
|
|
4036
|
+
sortAndCount(root);
|
|
4037
|
+
if (pathPrefix) {
|
|
4038
|
+
const normalizedPrefix = normalizeUrlPath(pathPrefix);
|
|
4039
|
+
const subtreeRoot = nodeMap.get(normalizedPrefix);
|
|
4040
|
+
if (subtreeRoot) {
|
|
4041
|
+
return subtreeRoot;
|
|
4042
|
+
}
|
|
4043
|
+
return makeNode(normalizedPrefix, normalizedPrefix.split("/").filter(Boolean).length);
|
|
4044
|
+
}
|
|
4045
|
+
return root;
|
|
4046
|
+
}
|
|
4047
|
+
function mergeRankingOverrides(base, overrides) {
|
|
4048
|
+
return {
|
|
4049
|
+
...base,
|
|
4050
|
+
search: {
|
|
4051
|
+
...base.search,
|
|
4052
|
+
...overrides.search
|
|
4053
|
+
},
|
|
4054
|
+
ranking: {
|
|
4055
|
+
...base.ranking,
|
|
4056
|
+
...overrides.ranking,
|
|
4057
|
+
weights: {
|
|
4058
|
+
...base.ranking.weights,
|
|
4059
|
+
...overrides.ranking?.weights
|
|
4060
|
+
}
|
|
4061
|
+
}
|
|
4062
|
+
};
|
|
4063
|
+
}
|
|
2734
4064
|
var SearchEngine = class _SearchEngine {
|
|
2735
4065
|
cwd;
|
|
2736
4066
|
config;
|
|
@@ -2741,7 +4071,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
2741
4071
|
this.store = options.store;
|
|
2742
4072
|
}
|
|
2743
4073
|
static async create(options = {}) {
|
|
2744
|
-
const cwd =
|
|
4074
|
+
const cwd = path12.resolve(options.cwd ?? process.cwd());
|
|
2745
4075
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
2746
4076
|
const store = options.store ?? await createUpstashStore(config);
|
|
2747
4077
|
return new _SearchEngine({
|
|
@@ -2760,125 +4090,203 @@ var SearchEngine = class _SearchEngine {
|
|
|
2760
4090
|
}
|
|
2761
4091
|
const input = parsed.data;
|
|
2762
4092
|
const totalStart = process.hrtime.bigint();
|
|
4093
|
+
const effectiveConfig = input.debug && input.rankingOverrides ? mergeRankingOverrides(this.config, input.rankingOverrides) : this.config;
|
|
2763
4094
|
const resolvedScope = resolveScope(this.config, input.scope);
|
|
2764
4095
|
const topK = input.topK ?? 10;
|
|
4096
|
+
const maxSubResults = input.maxSubResults ?? 5;
|
|
2765
4097
|
const groupByPage = (input.groupBy ?? "page") === "page";
|
|
2766
|
-
const
|
|
2767
|
-
const
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
4098
|
+
const queryText = input.q;
|
|
4099
|
+
const pathPrefix = input.pathPrefix ? input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}` : void 0;
|
|
4100
|
+
const filterTags = input.tags && input.tags.length > 0 ? input.tags : void 0;
|
|
4101
|
+
const metaFilterStr = input.filters && Object.keys(input.filters).length > 0 ? buildMetaFilterString(input.filters) : "";
|
|
4102
|
+
const metaFilter = metaFilterStr || void 0;
|
|
4103
|
+
const applyPagePostFilters = (hits) => {
|
|
4104
|
+
let filtered = hits;
|
|
4105
|
+
if (pathPrefix) {
|
|
4106
|
+
filtered = filtered.filter((h) => h.url.startsWith(pathPrefix));
|
|
2775
4107
|
}
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
|
-
|
|
4108
|
+
if (filterTags) {
|
|
4109
|
+
filtered = filtered.filter(
|
|
4110
|
+
(h) => filterTags.every((tag) => h.tags.includes(tag))
|
|
4111
|
+
);
|
|
4112
|
+
}
|
|
4113
|
+
return filtered;
|
|
4114
|
+
};
|
|
4115
|
+
const applyChunkPostFilters = (hits) => {
|
|
4116
|
+
let filtered = hits;
|
|
4117
|
+
if (filterTags) {
|
|
4118
|
+
filtered = filtered.filter(
|
|
4119
|
+
(h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
|
|
4120
|
+
);
|
|
4121
|
+
}
|
|
4122
|
+
return filtered;
|
|
4123
|
+
};
|
|
2779
4124
|
const searchStart = process.hrtime.bigint();
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
const
|
|
2783
|
-
const
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
{
|
|
2798
|
-
limit: chunkLimit,
|
|
2799
|
-
semanticWeight: this.config.search.semanticWeight,
|
|
2800
|
-
inputEnrichment: this.config.search.inputEnrichment,
|
|
2801
|
-
reranking: false,
|
|
2802
|
-
filter
|
|
2803
|
-
},
|
|
4125
|
+
if (groupByPage) {
|
|
4126
|
+
const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
|
|
4127
|
+
const pageLimit = Math.max(topK * 2, 20);
|
|
4128
|
+
const pageHits = await this.store.searchPagesByText(
|
|
4129
|
+
queryText,
|
|
4130
|
+
{ limit: pageLimit * fetchMultiplier, filter: metaFilter },
|
|
4131
|
+
resolvedScope
|
|
4132
|
+
);
|
|
4133
|
+
const filteredPages = applyPagePostFilters(pageHits);
|
|
4134
|
+
let rankedPages = rankPageHits(filteredPages, effectiveConfig, input.q, input.debug);
|
|
4135
|
+
rankedPages = trimPagesByScoreGap(rankedPages, effectiveConfig);
|
|
4136
|
+
const topPages = rankedPages.slice(0, topK);
|
|
4137
|
+
const chunkPromises = topPages.map(
|
|
4138
|
+
(page) => this.store.searchChunksByUrl(
|
|
4139
|
+
queryText,
|
|
4140
|
+
page.url,
|
|
4141
|
+
{ limit: maxSubResults, filter: metaFilter },
|
|
2804
4142
|
resolvedScope
|
|
2805
|
-
)
|
|
2806
|
-
|
|
2807
|
-
const
|
|
2808
|
-
|
|
4143
|
+
).then((chunks) => applyChunkPostFilters(chunks))
|
|
4144
|
+
);
|
|
4145
|
+
const allChunks = await Promise.all(chunkPromises);
|
|
4146
|
+
const searchMs = hrTimeMs(searchStart);
|
|
4147
|
+
const results = this.buildPageFirstResults(topPages, allChunks, input.q, input.debug, maxSubResults);
|
|
4148
|
+
return {
|
|
4149
|
+
q: input.q,
|
|
4150
|
+
scope: resolvedScope.scopeName,
|
|
4151
|
+
results,
|
|
4152
|
+
meta: {
|
|
4153
|
+
timingsMs: {
|
|
4154
|
+
search: Math.round(searchMs),
|
|
4155
|
+
total: Math.round(hrTimeMs(totalStart))
|
|
4156
|
+
}
|
|
4157
|
+
}
|
|
4158
|
+
};
|
|
2809
4159
|
} else {
|
|
4160
|
+
const candidateK = Math.max(50, topK);
|
|
4161
|
+
const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
|
|
2810
4162
|
const hits = await this.store.search(
|
|
2811
|
-
|
|
2812
|
-
{
|
|
2813
|
-
limit: candidateK,
|
|
2814
|
-
semanticWeight: this.config.search.semanticWeight,
|
|
2815
|
-
inputEnrichment: this.config.search.inputEnrichment,
|
|
2816
|
-
reranking: this.config.search.reranking,
|
|
2817
|
-
filter
|
|
2818
|
-
},
|
|
4163
|
+
queryText,
|
|
4164
|
+
{ limit: candidateK * fetchMultiplier, filter: metaFilter },
|
|
2819
4165
|
resolvedScope
|
|
2820
4166
|
);
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
4167
|
+
let filtered = hits;
|
|
4168
|
+
if (pathPrefix) {
|
|
4169
|
+
filtered = filtered.filter((h) => h.metadata.url.startsWith(pathPrefix));
|
|
4170
|
+
}
|
|
4171
|
+
if (filterTags) {
|
|
4172
|
+
filtered = filtered.filter(
|
|
4173
|
+
(h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
|
|
4174
|
+
);
|
|
4175
|
+
}
|
|
4176
|
+
const ranked = rankHits(filtered, effectiveConfig, input.q, input.debug);
|
|
4177
|
+
const searchMs = hrTimeMs(searchStart);
|
|
4178
|
+
const results = this.buildResults(ranked, topK, false, maxSubResults, input.q, input.debug, effectiveConfig);
|
|
4179
|
+
return {
|
|
4180
|
+
q: input.q,
|
|
4181
|
+
scope: resolvedScope.scopeName,
|
|
4182
|
+
results,
|
|
4183
|
+
meta: {
|
|
4184
|
+
timingsMs: {
|
|
4185
|
+
search: Math.round(searchMs),
|
|
4186
|
+
total: Math.round(hrTimeMs(totalStart))
|
|
4187
|
+
}
|
|
2833
4188
|
}
|
|
4189
|
+
};
|
|
4190
|
+
}
|
|
4191
|
+
}
|
|
4192
|
+
buildPageFirstResults(rankedPages, allChunks, query, debug, maxSubResults = 5) {
|
|
4193
|
+
return rankedPages.map((page, i) => {
|
|
4194
|
+
const chunks = allChunks[i] ?? [];
|
|
4195
|
+
const bestChunk = chunks[0];
|
|
4196
|
+
const snippet = bestChunk ? query ? queryAwareExcerpt(bestChunk.metadata.chunkText, query) : toSnippet(bestChunk.metadata.chunkText) : page.description || page.title;
|
|
4197
|
+
const result = {
|
|
4198
|
+
url: page.url,
|
|
4199
|
+
title: page.title,
|
|
4200
|
+
sectionTitle: bestChunk?.metadata.sectionTitle || void 0,
|
|
4201
|
+
snippet,
|
|
4202
|
+
chunkText: bestChunk?.metadata.chunkText || void 0,
|
|
4203
|
+
score: Number(page.finalScore.toFixed(6)),
|
|
4204
|
+
routeFile: page.routeFile,
|
|
4205
|
+
chunks: chunks.length > 0 ? chunks.slice(0, maxSubResults).map((c) => ({
|
|
4206
|
+
sectionTitle: c.metadata.sectionTitle || void 0,
|
|
4207
|
+
snippet: query ? queryAwareExcerpt(c.metadata.chunkText, query) : toSnippet(c.metadata.chunkText),
|
|
4208
|
+
chunkText: c.metadata.chunkText || void 0,
|
|
4209
|
+
headingPath: c.metadata.headingPath,
|
|
4210
|
+
score: Number(c.score.toFixed(6))
|
|
4211
|
+
})) : void 0
|
|
4212
|
+
};
|
|
4213
|
+
if (debug && page.breakdown) {
|
|
4214
|
+
result.breakdown = {
|
|
4215
|
+
baseScore: page.breakdown.baseScore,
|
|
4216
|
+
incomingLinkBoost: page.breakdown.incomingLinkBoost,
|
|
4217
|
+
depthBoost: page.breakdown.depthBoost,
|
|
4218
|
+
titleMatchBoost: page.breakdown.titleMatchBoost,
|
|
4219
|
+
freshnessBoost: page.breakdown.freshnessBoost,
|
|
4220
|
+
anchorTextMatchBoost: 0
|
|
4221
|
+
};
|
|
2834
4222
|
}
|
|
2835
|
-
|
|
4223
|
+
return result;
|
|
4224
|
+
});
|
|
2836
4225
|
}
|
|
2837
|
-
ensureSnippet(hit) {
|
|
4226
|
+
ensureSnippet(hit, query) {
|
|
4227
|
+
const chunkText = hit.hit.metadata.chunkText;
|
|
4228
|
+
if (query && chunkText) return queryAwareExcerpt(chunkText, query);
|
|
2838
4229
|
const snippet = hit.hit.metadata.snippet;
|
|
2839
4230
|
if (snippet && snippet.length >= 30) return snippet;
|
|
2840
|
-
const chunkText = hit.hit.metadata.chunkText;
|
|
2841
4231
|
if (chunkText) return toSnippet(chunkText);
|
|
2842
4232
|
return snippet || "";
|
|
2843
4233
|
}
|
|
2844
|
-
buildResults(ordered, topK, groupByPage,
|
|
4234
|
+
buildResults(ordered, topK, groupByPage, maxSubResults, query, debug, config) {
|
|
4235
|
+
const cfg = config ?? this.config;
|
|
2845
4236
|
if (groupByPage) {
|
|
2846
|
-
let pages = aggregateByPage(ordered,
|
|
2847
|
-
pages = trimByScoreGap(pages,
|
|
2848
|
-
const minRatio =
|
|
4237
|
+
let pages = aggregateByPage(ordered, cfg);
|
|
4238
|
+
pages = trimByScoreGap(pages, cfg);
|
|
4239
|
+
const minRatio = cfg.ranking.minChunkScoreRatio;
|
|
2849
4240
|
return pages.slice(0, topK).map((page) => {
|
|
2850
4241
|
const bestScore = page.bestChunk.finalScore;
|
|
2851
4242
|
const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
|
|
2852
|
-
const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0,
|
|
2853
|
-
|
|
4243
|
+
const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, maxSubResults);
|
|
4244
|
+
const result = {
|
|
2854
4245
|
url: page.url,
|
|
2855
4246
|
title: page.title,
|
|
2856
4247
|
sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
|
|
2857
|
-
snippet: this.ensureSnippet(page.bestChunk),
|
|
4248
|
+
snippet: this.ensureSnippet(page.bestChunk, query),
|
|
4249
|
+
chunkText: page.bestChunk.hit.metadata.chunkText || void 0,
|
|
2858
4250
|
score: Number(page.pageScore.toFixed(6)),
|
|
2859
4251
|
routeFile: page.routeFile,
|
|
2860
|
-
chunks: meaningful.length
|
|
4252
|
+
chunks: meaningful.length >= 1 ? meaningful.map((c) => ({
|
|
2861
4253
|
sectionTitle: c.hit.metadata.sectionTitle || void 0,
|
|
2862
|
-
snippet: this.ensureSnippet(c),
|
|
4254
|
+
snippet: this.ensureSnippet(c, query),
|
|
4255
|
+
chunkText: c.hit.metadata.chunkText || void 0,
|
|
2863
4256
|
headingPath: c.hit.metadata.headingPath,
|
|
2864
4257
|
score: Number(c.finalScore.toFixed(6))
|
|
2865
4258
|
})) : void 0
|
|
2866
4259
|
};
|
|
4260
|
+
if (debug && page.bestChunk.breakdown) {
|
|
4261
|
+
result.breakdown = page.bestChunk.breakdown;
|
|
4262
|
+
}
|
|
4263
|
+
return result;
|
|
2867
4264
|
});
|
|
2868
4265
|
} else {
|
|
2869
4266
|
let filtered = ordered;
|
|
2870
|
-
const
|
|
2871
|
-
if (
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
|
|
4267
|
+
const minScoreRatio = cfg.ranking.minScoreRatio;
|
|
4268
|
+
if (minScoreRatio > 0 && ordered.length > 0) {
|
|
4269
|
+
const topScore = ordered[0].finalScore;
|
|
4270
|
+
if (Number.isFinite(topScore) && topScore > 0) {
|
|
4271
|
+
const threshold = topScore * minScoreRatio;
|
|
4272
|
+
filtered = ordered.filter((entry) => entry.finalScore >= threshold);
|
|
4273
|
+
}
|
|
4274
|
+
}
|
|
4275
|
+
return filtered.slice(0, topK).map(({ hit, finalScore, breakdown }) => {
|
|
4276
|
+
const result = {
|
|
4277
|
+
url: hit.metadata.url,
|
|
4278
|
+
title: hit.metadata.title,
|
|
4279
|
+
sectionTitle: hit.metadata.sectionTitle || void 0,
|
|
4280
|
+
snippet: this.ensureSnippet({ hit, finalScore }, query),
|
|
4281
|
+
chunkText: hit.metadata.chunkText || void 0,
|
|
4282
|
+
score: Number(finalScore.toFixed(6)),
|
|
4283
|
+
routeFile: hit.metadata.routeFile
|
|
4284
|
+
};
|
|
4285
|
+
if (debug && breakdown) {
|
|
4286
|
+
result.breakdown = breakdown;
|
|
4287
|
+
}
|
|
4288
|
+
return result;
|
|
4289
|
+
});
|
|
2882
4290
|
}
|
|
2883
4291
|
}
|
|
2884
4292
|
async getPage(pathOrUrl, scope) {
|
|
@@ -2904,6 +4312,116 @@ var SearchEngine = class _SearchEngine {
|
|
|
2904
4312
|
markdown: page.markdown
|
|
2905
4313
|
};
|
|
2906
4314
|
}
|
|
4315
|
+
async listPages(opts) {
|
|
4316
|
+
const resolvedScope = resolveScope(this.config, opts?.scope);
|
|
4317
|
+
const pathPrefix = opts?.pathPrefix ? opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}` : void 0;
|
|
4318
|
+
return this.store.listPages(resolvedScope, {
|
|
4319
|
+
cursor: opts?.cursor,
|
|
4320
|
+
limit: opts?.limit,
|
|
4321
|
+
pathPrefix
|
|
4322
|
+
});
|
|
4323
|
+
}
|
|
4324
|
+
async getSiteStructure(opts) {
|
|
4325
|
+
const maxPages = Math.min(opts?.maxPages ?? MAX_SITE_STRUCTURE_PAGES, MAX_SITE_STRUCTURE_PAGES);
|
|
4326
|
+
const allPages = [];
|
|
4327
|
+
let cursor;
|
|
4328
|
+
let truncated = false;
|
|
4329
|
+
do {
|
|
4330
|
+
const result = await this.listPages({
|
|
4331
|
+
pathPrefix: opts?.pathPrefix,
|
|
4332
|
+
scope: opts?.scope,
|
|
4333
|
+
cursor,
|
|
4334
|
+
limit: 200
|
|
4335
|
+
});
|
|
4336
|
+
allPages.push(...result.pages);
|
|
4337
|
+
cursor = result.nextCursor;
|
|
4338
|
+
if (allPages.length >= maxPages) {
|
|
4339
|
+
truncated = allPages.length > maxPages || !!cursor;
|
|
4340
|
+
allPages.length = maxPages;
|
|
4341
|
+
break;
|
|
4342
|
+
}
|
|
4343
|
+
} while (cursor);
|
|
4344
|
+
const root = buildTree(allPages, opts?.pathPrefix);
|
|
4345
|
+
return {
|
|
4346
|
+
root,
|
|
4347
|
+
totalPages: allPages.length,
|
|
4348
|
+
truncated
|
|
4349
|
+
};
|
|
4350
|
+
}
|
|
4351
|
+
async getRelatedPages(pathOrUrl, opts) {
|
|
4352
|
+
const resolvedScope = resolveScope(this.config, opts?.scope);
|
|
4353
|
+
const urlPath = this.resolveInputPath(pathOrUrl);
|
|
4354
|
+
const topK = Math.min(opts?.topK ?? 10, 25);
|
|
4355
|
+
const source = await this.store.fetchPageWithVector(urlPath, resolvedScope);
|
|
4356
|
+
if (!source) {
|
|
4357
|
+
throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
|
|
4358
|
+
}
|
|
4359
|
+
const sourceOutgoing = new Set(source.metadata.outgoingLinkUrls ?? []);
|
|
4360
|
+
const semanticHits = await this.store.searchPagesByVector(
|
|
4361
|
+
source.vector,
|
|
4362
|
+
{ limit: 50 },
|
|
4363
|
+
resolvedScope
|
|
4364
|
+
);
|
|
4365
|
+
const filteredHits = semanticHits.filter((h) => h.url !== urlPath);
|
|
4366
|
+
const semanticScoreMap = /* @__PURE__ */ new Map();
|
|
4367
|
+
for (const hit of filteredHits) {
|
|
4368
|
+
semanticScoreMap.set(hit.url, hit.score);
|
|
4369
|
+
}
|
|
4370
|
+
const candidateUrls = /* @__PURE__ */ new Set();
|
|
4371
|
+
for (const hit of filteredHits) {
|
|
4372
|
+
candidateUrls.add(hit.url);
|
|
4373
|
+
}
|
|
4374
|
+
for (const url of sourceOutgoing) {
|
|
4375
|
+
if (url !== urlPath) candidateUrls.add(url);
|
|
4376
|
+
}
|
|
4377
|
+
const missingUrls = [...sourceOutgoing].filter(
|
|
4378
|
+
(u) => u !== urlPath && !semanticScoreMap.has(u)
|
|
4379
|
+
);
|
|
4380
|
+
const fetchedPages = missingUrls.length > 0 ? await this.store.fetchPagesBatch(missingUrls, resolvedScope) : [];
|
|
4381
|
+
const metaMap = /* @__PURE__ */ new Map();
|
|
4382
|
+
for (const hit of filteredHits) {
|
|
4383
|
+
metaMap.set(hit.url, { title: hit.title, routeFile: hit.routeFile, outgoingLinkUrls: [] });
|
|
4384
|
+
}
|
|
4385
|
+
for (const p of fetchedPages) {
|
|
4386
|
+
metaMap.set(p.url, { title: p.title, routeFile: p.routeFile, outgoingLinkUrls: p.outgoingLinkUrls });
|
|
4387
|
+
}
|
|
4388
|
+
const semanticUrls = filteredHits.map((h) => h.url);
|
|
4389
|
+
if (semanticUrls.length > 0) {
|
|
4390
|
+
const semanticPageData = await this.store.fetchPagesBatch(semanticUrls, resolvedScope);
|
|
4391
|
+
for (const p of semanticPageData) {
|
|
4392
|
+
const existing = metaMap.get(p.url);
|
|
4393
|
+
if (existing) {
|
|
4394
|
+
existing.outgoingLinkUrls = p.outgoingLinkUrls;
|
|
4395
|
+
}
|
|
4396
|
+
}
|
|
4397
|
+
}
|
|
4398
|
+
const candidates = [];
|
|
4399
|
+
for (const url of candidateUrls) {
|
|
4400
|
+
const meta = metaMap.get(url);
|
|
4401
|
+
if (!meta) continue;
|
|
4402
|
+
const isOutgoing = sourceOutgoing.has(url);
|
|
4403
|
+
const isIncoming = meta.outgoingLinkUrls.includes(urlPath);
|
|
4404
|
+
const isLinked = isOutgoing || isIncoming;
|
|
4405
|
+
const dice = diceScore(urlPath, url);
|
|
4406
|
+
const semantic = semanticScoreMap.get(url) ?? 0;
|
|
4407
|
+
const score = compositeScore(isLinked, dice, semantic);
|
|
4408
|
+
const relationshipType = dominantRelationshipType(isOutgoing, isIncoming, dice);
|
|
4409
|
+
candidates.push({
|
|
4410
|
+
url,
|
|
4411
|
+
title: meta.title,
|
|
4412
|
+
score: Number(score.toFixed(6)),
|
|
4413
|
+
relationshipType,
|
|
4414
|
+
routeFile: meta.routeFile
|
|
4415
|
+
});
|
|
4416
|
+
}
|
|
4417
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
4418
|
+
const results = candidates.slice(0, topK);
|
|
4419
|
+
return {
|
|
4420
|
+
sourceUrl: urlPath,
|
|
4421
|
+
scope: resolvedScope.scopeName,
|
|
4422
|
+
relatedPages: results
|
|
4423
|
+
};
|
|
4424
|
+
}
|
|
2907
4425
|
async health() {
|
|
2908
4426
|
return this.store.health();
|
|
2909
4427
|
}
|
|
@@ -2928,14 +4446,40 @@ function createServer(engine) {
|
|
|
2928
4446
|
server.registerTool(
|
|
2929
4447
|
"search",
|
|
2930
4448
|
{
|
|
2931
|
-
description:
|
|
4449
|
+
description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
|
|
2932
4450
|
inputSchema: {
|
|
2933
4451
|
query: z3.string().min(1),
|
|
2934
4452
|
scope: z3.string().optional(),
|
|
2935
4453
|
topK: z3.number().int().positive().max(100).optional(),
|
|
2936
4454
|
pathPrefix: z3.string().optional(),
|
|
2937
4455
|
tags: z3.array(z3.string()).optional(),
|
|
2938
|
-
|
|
4456
|
+
filters: z3.record(z3.string(), z3.union([z3.string(), z3.number(), z3.boolean()])).optional(),
|
|
4457
|
+
groupBy: z3.enum(["page", "chunk"]).optional(),
|
|
4458
|
+
maxSubResults: z3.number().int().positive().max(20).optional()
|
|
4459
|
+
},
|
|
4460
|
+
outputSchema: {
|
|
4461
|
+
q: z3.string(),
|
|
4462
|
+
scope: z3.string(),
|
|
4463
|
+
results: z3.array(z3.object({
|
|
4464
|
+
url: z3.string(),
|
|
4465
|
+
title: z3.string(),
|
|
4466
|
+
sectionTitle: z3.string().optional(),
|
|
4467
|
+
snippet: z3.string(),
|
|
4468
|
+
score: z3.number(),
|
|
4469
|
+
routeFile: z3.string(),
|
|
4470
|
+
chunks: z3.array(z3.object({
|
|
4471
|
+
sectionTitle: z3.string().optional(),
|
|
4472
|
+
snippet: z3.string(),
|
|
4473
|
+
headingPath: z3.array(z3.string()),
|
|
4474
|
+
score: z3.number()
|
|
4475
|
+
})).optional()
|
|
4476
|
+
})),
|
|
4477
|
+
meta: z3.object({
|
|
4478
|
+
timingsMs: z3.object({
|
|
4479
|
+
search: z3.number(),
|
|
4480
|
+
total: z3.number()
|
|
4481
|
+
})
|
|
4482
|
+
})
|
|
2939
4483
|
}
|
|
2940
4484
|
},
|
|
2941
4485
|
async (input) => {
|
|
@@ -2945,7 +4489,9 @@ function createServer(engine) {
|
|
|
2945
4489
|
scope: input.scope,
|
|
2946
4490
|
pathPrefix: input.pathPrefix,
|
|
2947
4491
|
tags: input.tags,
|
|
2948
|
-
|
|
4492
|
+
filters: input.filters,
|
|
4493
|
+
groupBy: input.groupBy,
|
|
4494
|
+
maxSubResults: input.maxSubResults
|
|
2949
4495
|
});
|
|
2950
4496
|
return {
|
|
2951
4497
|
content: [
|
|
@@ -2953,7 +4499,8 @@ function createServer(engine) {
|
|
|
2953
4499
|
type: "text",
|
|
2954
4500
|
text: JSON.stringify(result, null, 2)
|
|
2955
4501
|
}
|
|
2956
|
-
]
|
|
4502
|
+
],
|
|
4503
|
+
structuredContent: result
|
|
2957
4504
|
};
|
|
2958
4505
|
}
|
|
2959
4506
|
);
|
|
@@ -2978,34 +4525,175 @@ function createServer(engine) {
|
|
|
2978
4525
|
};
|
|
2979
4526
|
}
|
|
2980
4527
|
);
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
|
|
3002
|
-
|
|
3003
|
-
|
|
3004
|
-
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
4528
|
+
server.registerTool(
|
|
4529
|
+
"list_pages",
|
|
4530
|
+
{
|
|
4531
|
+
description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
|
|
4532
|
+
inputSchema: {
|
|
4533
|
+
pathPrefix: z3.string().optional(),
|
|
4534
|
+
cursor: z3.string().optional(),
|
|
4535
|
+
limit: z3.number().int().positive().max(200).optional(),
|
|
4536
|
+
scope: z3.string().optional()
|
|
4537
|
+
}
|
|
4538
|
+
},
|
|
4539
|
+
async (input) => {
|
|
4540
|
+
const result = await engine.listPages({
|
|
4541
|
+
pathPrefix: input.pathPrefix,
|
|
4542
|
+
cursor: input.cursor,
|
|
4543
|
+
limit: input.limit,
|
|
4544
|
+
scope: input.scope
|
|
4545
|
+
});
|
|
4546
|
+
return {
|
|
4547
|
+
content: [
|
|
4548
|
+
{
|
|
4549
|
+
type: "text",
|
|
4550
|
+
text: JSON.stringify(result, null, 2)
|
|
4551
|
+
}
|
|
4552
|
+
]
|
|
4553
|
+
};
|
|
4554
|
+
}
|
|
4555
|
+
);
|
|
4556
|
+
server.registerTool(
|
|
4557
|
+
"get_site_structure",
|
|
4558
|
+
{
|
|
4559
|
+
description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
|
|
4560
|
+
inputSchema: {
|
|
4561
|
+
pathPrefix: z3.string().optional(),
|
|
4562
|
+
scope: z3.string().optional(),
|
|
4563
|
+
maxPages: z3.number().int().positive().max(2e3).optional()
|
|
4564
|
+
}
|
|
4565
|
+
},
|
|
4566
|
+
async (input) => {
|
|
4567
|
+
const result = await engine.getSiteStructure({
|
|
4568
|
+
pathPrefix: input.pathPrefix,
|
|
4569
|
+
scope: input.scope,
|
|
4570
|
+
maxPages: input.maxPages
|
|
4571
|
+
});
|
|
4572
|
+
return {
|
|
4573
|
+
content: [
|
|
4574
|
+
{
|
|
4575
|
+
type: "text",
|
|
4576
|
+
text: JSON.stringify(result, null, 2)
|
|
4577
|
+
}
|
|
4578
|
+
]
|
|
4579
|
+
};
|
|
4580
|
+
}
|
|
4581
|
+
);
|
|
4582
|
+
server.registerTool(
|
|
4583
|
+
"find_source_file",
|
|
4584
|
+
{
|
|
4585
|
+
description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
|
|
4586
|
+
inputSchema: {
|
|
4587
|
+
query: z3.string().min(1),
|
|
4588
|
+
scope: z3.string().optional()
|
|
4589
|
+
}
|
|
4590
|
+
},
|
|
4591
|
+
async (input) => {
|
|
4592
|
+
const result = await engine.search({
|
|
4593
|
+
q: input.query,
|
|
4594
|
+
topK: 1,
|
|
4595
|
+
scope: input.scope
|
|
4596
|
+
});
|
|
4597
|
+
if (result.results.length === 0) {
|
|
4598
|
+
return {
|
|
4599
|
+
content: [
|
|
4600
|
+
{
|
|
4601
|
+
type: "text",
|
|
4602
|
+
text: JSON.stringify({
|
|
4603
|
+
error: "No matching content found for the given query."
|
|
4604
|
+
})
|
|
4605
|
+
}
|
|
4606
|
+
]
|
|
4607
|
+
};
|
|
4608
|
+
}
|
|
4609
|
+
const match = result.results[0];
|
|
4610
|
+
const { url, routeFile, sectionTitle, snippet } = match;
|
|
4611
|
+
return {
|
|
4612
|
+
content: [
|
|
4613
|
+
{
|
|
4614
|
+
type: "text",
|
|
4615
|
+
text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
|
|
4616
|
+
}
|
|
4617
|
+
]
|
|
4618
|
+
};
|
|
4619
|
+
}
|
|
4620
|
+
);
|
|
4621
|
+
server.registerTool(
|
|
4622
|
+
"get_related_pages",
|
|
4623
|
+
{
|
|
4624
|
+
description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
|
|
4625
|
+
inputSchema: {
|
|
4626
|
+
pathOrUrl: z3.string().min(1),
|
|
4627
|
+
scope: z3.string().optional(),
|
|
4628
|
+
topK: z3.number().int().positive().max(25).optional()
|
|
4629
|
+
}
|
|
4630
|
+
},
|
|
4631
|
+
async (input) => {
|
|
4632
|
+
const result = await engine.getRelatedPages(input.pathOrUrl, {
|
|
4633
|
+
topK: input.topK,
|
|
4634
|
+
scope: input.scope
|
|
4635
|
+
});
|
|
4636
|
+
return {
|
|
4637
|
+
content: [
|
|
4638
|
+
{
|
|
4639
|
+
type: "text",
|
|
4640
|
+
text: JSON.stringify(result, null, 2)
|
|
4641
|
+
}
|
|
4642
|
+
]
|
|
4643
|
+
};
|
|
4644
|
+
}
|
|
4645
|
+
);
|
|
4646
|
+
return server;
|
|
4647
|
+
}
|
|
4648
|
+
function resolveApiKey(config) {
|
|
4649
|
+
return config.mcp.http.apiKey ?? (config.mcp.http.apiKeyEnv ? process.env[config.mcp.http.apiKeyEnv] : void 0);
|
|
4650
|
+
}
|
|
4651
|
+
function verifyApiKey(provided, expected) {
|
|
4652
|
+
const a = createHash2("sha256").update(provided).digest();
|
|
4653
|
+
const b = createHash2("sha256").update(expected).digest();
|
|
4654
|
+
return timingSafeEqual(a, b);
|
|
4655
|
+
}
|
|
4656
|
+
function redirectConsoleToStderr() {
|
|
4657
|
+
const originalLog = console.log;
|
|
4658
|
+
console.log = (...args) => {
|
|
4659
|
+
process.stderr.write(`[LOG] ${args.map(String).join(" ")}
|
|
4660
|
+
`);
|
|
4661
|
+
};
|
|
4662
|
+
console.warn = (...args) => {
|
|
4663
|
+
process.stderr.write(`[WARN] ${args.map(String).join(" ")}
|
|
4664
|
+
`);
|
|
4665
|
+
};
|
|
4666
|
+
void originalLog;
|
|
4667
|
+
}
|
|
4668
|
+
async function startHttpServer(serverFactory, config, opts) {
|
|
4669
|
+
const app = createMcpExpressApp();
|
|
4670
|
+
const port = opts.httpPort ?? config.mcp.http.port;
|
|
4671
|
+
const endpointPath = opts.httpPath ?? config.mcp.http.path;
|
|
4672
|
+
const isPublic = config.mcp.access === "public";
|
|
4673
|
+
const host = isPublic ? "0.0.0.0" : "127.0.0.1";
|
|
4674
|
+
const apiKey = isPublic ? resolveApiKey(config) : void 0;
|
|
4675
|
+
app.post(endpointPath, async (req, res) => {
|
|
4676
|
+
if (isPublic && apiKey) {
|
|
4677
|
+
const authHeader = req.headers["authorization"];
|
|
4678
|
+
const provided = (authHeader?.startsWith("Bearer ") ? authHeader.slice(7) : void 0) ?? req.headers["x-api-key"] ?? "";
|
|
4679
|
+
if (!provided || !verifyApiKey(provided, apiKey)) {
|
|
4680
|
+
res.status(401).json({
|
|
4681
|
+
jsonrpc: "2.0",
|
|
4682
|
+
error: { code: -32001, message: "Unauthorized" },
|
|
4683
|
+
id: null
|
|
4684
|
+
});
|
|
4685
|
+
return;
|
|
4686
|
+
}
|
|
4687
|
+
}
|
|
4688
|
+
const server = serverFactory();
|
|
4689
|
+
const transport = new StreamableHTTPServerTransport({
|
|
4690
|
+
sessionIdGenerator: void 0
|
|
4691
|
+
});
|
|
4692
|
+
try {
|
|
4693
|
+
await server.connect(transport);
|
|
4694
|
+
await transport.handleRequest(req, res, req.body);
|
|
4695
|
+
res.on("close", () => {
|
|
4696
|
+
transport.close();
|
|
3009
4697
|
server.close();
|
|
3010
4698
|
});
|
|
3011
4699
|
} catch (error) {
|
|
@@ -3046,9 +4734,12 @@ async function startHttpServer(serverFactory, config, opts) {
|
|
|
3046
4734
|
);
|
|
3047
4735
|
});
|
|
3048
4736
|
await new Promise((resolve, reject) => {
|
|
3049
|
-
const instance = app.listen(port,
|
|
3050
|
-
process.stderr.write(`SearchSocket MCP HTTP server listening on http
|
|
4737
|
+
const instance = app.listen(port, host, () => {
|
|
4738
|
+
process.stderr.write(`SearchSocket MCP HTTP server listening on http://${host}:${port}${endpointPath}
|
|
3051
4739
|
`);
|
|
4740
|
+
if (isPublic) {
|
|
4741
|
+
process.stderr.write("WARNING: Server is in public mode. Ensure HTTPS is configured via a reverse proxy for production use.\n");
|
|
4742
|
+
}
|
|
3052
4743
|
resolve();
|
|
3053
4744
|
});
|
|
3054
4745
|
instance.once("error", reject);
|
|
@@ -3063,6 +4754,13 @@ async function runMcpServer(options = {}) {
|
|
|
3063
4754
|
cwd: options.cwd,
|
|
3064
4755
|
configPath: options.configPath
|
|
3065
4756
|
});
|
|
4757
|
+
if (options.access) config.mcp.access = options.access;
|
|
4758
|
+
if (options.apiKey) config.mcp.http.apiKey = options.apiKey;
|
|
4759
|
+
if (config.mcp.access === "public" && !resolveApiKey(config)) {
|
|
4760
|
+
throw new Error(
|
|
4761
|
+
'MCP access is "public" but no API key is configured. Pass --api-key or set mcp.http.apiKey / mcp.http.apiKeyEnv in config.'
|
|
4762
|
+
);
|
|
4763
|
+
}
|
|
3066
4764
|
const resolvedTransport = options.transport ?? config.mcp.transport;
|
|
3067
4765
|
if (resolvedTransport === "stdio") {
|
|
3068
4766
|
redirectConsoleToStderr();
|
|
@@ -3081,6 +4779,837 @@ async function runMcpServer(options = {}) {
|
|
|
3081
4779
|
await server.connect(stdioTransport);
|
|
3082
4780
|
}
|
|
3083
4781
|
|
|
4782
|
+
// src/playground/server.ts
|
|
4783
|
+
import express from "express";
|
|
4784
|
+
|
|
4785
|
+
// src/playground/playground.html
|
|
4786
|
+
var playground_default = `<!DOCTYPE html>
|
|
4787
|
+
<html lang="en">
|
|
4788
|
+
<head>
|
|
4789
|
+
<meta charset="utf-8">
|
|
4790
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
4791
|
+
<title>SearchSocket Playground</title>
|
|
4792
|
+
<style>
|
|
4793
|
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
|
4794
|
+
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f8f9fa; color: #1a1a2e; padding: 2rem; max-width: 900px; margin: 0 auto; }
|
|
4795
|
+
h1 { font-size: 1.5rem; margin-bottom: 1.5rem; color: #16213e; }
|
|
4796
|
+
h1 span { font-weight: 400; color: #888; }
|
|
4797
|
+
.search-box { display: flex; gap: 0.5rem; margin-bottom: 0.5rem; }
|
|
4798
|
+
.search-box input { flex: 1; padding: 0.75rem 1rem; font-size: 1rem; border: 2px solid #ddd; border-radius: 8px; outline: none; transition: border-color 0.2s; }
|
|
4799
|
+
.search-box input:focus { border-color: #4361ee; }
|
|
4800
|
+
.search-opts { display: flex; gap: 1rem; margin-bottom: 1.5rem; font-size: 0.85rem; color: #555; align-items: center; }
|
|
4801
|
+
.search-opts label { display: flex; align-items: center; gap: 0.3rem; cursor: pointer; }
|
|
4802
|
+
.search-opts select { padding: 0.25rem 0.5rem; border: 1px solid #ccc; border-radius: 4px; font-size: 0.85rem; }
|
|
4803
|
+
.meta { font-size: 0.8rem; color: #888; margin-bottom: 1rem; }
|
|
4804
|
+
.result { background: #fff; border: 1px solid #e0e0e0; border-radius: 8px; padding: 1rem 1.25rem; margin-bottom: 0.75rem; }
|
|
4805
|
+
.result-header { display: flex; justify-content: space-between; align-items: flex-start; gap: 1rem; }
|
|
4806
|
+
.result-title { font-size: 1.05rem; font-weight: 600; color: #16213e; text-decoration: none; }
|
|
4807
|
+
.result-title:hover { color: #4361ee; }
|
|
4808
|
+
.result-score { font-size: 0.8rem; font-weight: 600; color: #4361ee; white-space: nowrap; background: #eef1ff; padding: 0.2rem 0.5rem; border-radius: 4px; }
|
|
4809
|
+
.result-url { font-size: 0.8rem; color: #888; margin-top: 0.2rem; }
|
|
4810
|
+
.result-snippet { font-size: 0.9rem; color: #444; margin-top: 0.5rem; line-height: 1.5; }
|
|
4811
|
+
.result-meta { display: flex; gap: 0.75rem; flex-wrap: wrap; margin-top: 0.5rem; font-size: 0.78rem; color: #777; }
|
|
4812
|
+
.result-meta span { background: #f0f0f0; padding: 0.15rem 0.5rem; border-radius: 4px; }
|
|
4813
|
+
.breakdown { margin-top: 0.5rem; padding: 0.5rem 0.75rem; background: #f8f9fa; border-radius: 6px; font-size: 0.8rem; }
|
|
4814
|
+
.breakdown-row { display: flex; justify-content: space-between; padding: 0.15rem 0; }
|
|
4815
|
+
.breakdown-label { color: #555; }
|
|
4816
|
+
.breakdown-value { font-family: monospace; color: #333; }
|
|
4817
|
+
.chunks-toggle { font-size: 0.8rem; color: #4361ee; cursor: pointer; margin-top: 0.5rem; border: none; background: none; padding: 0; text-decoration: underline; }
|
|
4818
|
+
.chunks { margin-top: 0.5rem; padding-left: 1rem; border-left: 3px solid #e0e0e0; }
|
|
4819
|
+
.chunk { margin-bottom: 0.5rem; font-size: 0.85rem; }
|
|
4820
|
+
.chunk-heading { font-size: 0.78rem; color: #4361ee; margin-bottom: 0.15rem; }
|
|
4821
|
+
.chunk-score { font-size: 0.75rem; color: #999; }
|
|
4822
|
+
.chunk-snippet { color: #555; line-height: 1.4; }
|
|
4823
|
+
.empty { text-align: center; padding: 3rem; color: #999; }
|
|
4824
|
+
.loading { text-align: center; padding: 2rem; color: #999; }
|
|
4825
|
+
.hidden { display: none; }
|
|
4826
|
+
|
|
4827
|
+
/* Ranking Tuner */
|
|
4828
|
+
.tuner { margin-bottom: 1.5rem; border: 1px solid #e0e0e0; border-radius: 8px; background: #fff; }
|
|
4829
|
+
.tuner > summary { padding: 0.75rem 1rem; font-weight: 600; font-size: 0.95rem; cursor: pointer; color: #16213e; user-select: none; }
|
|
4830
|
+
.tuner > summary:hover { color: #4361ee; }
|
|
4831
|
+
.tuner-body { padding: 0.5rem 1rem 1rem; }
|
|
4832
|
+
.tuner-actions { display: flex; gap: 0.5rem; margin-bottom: 0.75rem; }
|
|
4833
|
+
.tuner-actions button { padding: 0.35rem 0.75rem; font-size: 0.8rem; border: 1px solid #ccc; border-radius: 4px; background: #fff; cursor: pointer; color: #555; }
|
|
4834
|
+
.tuner-actions button:hover { border-color: #4361ee; color: #4361ee; }
|
|
4835
|
+
.tuner-group { margin-bottom: 0.5rem; border: 1px solid #eee; border-radius: 6px; }
|
|
4836
|
+
.tuner-group > summary { padding: 0.5rem 0.75rem; font-size: 0.85rem; font-weight: 600; cursor: pointer; color: #444; user-select: none; }
|
|
4837
|
+
.tuner-group[open] { margin-bottom: 0.75rem; }
|
|
4838
|
+
.tuner-group-body { padding: 0.25rem 0.75rem 0.5rem; }
|
|
4839
|
+
.tuner-row { display: grid; grid-template-columns: 140px 1fr 70px 24px; gap: 8px; align-items: center; margin-bottom: 0.35rem; }
|
|
4840
|
+
.tuner-row label { font-size: 0.8rem; color: #555; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
|
|
4841
|
+
.tuner-row label.modified { color: #4361ee; font-weight: 600; }
|
|
4842
|
+
.tuner-row input[type="range"] { width: 100%; height: 6px; cursor: pointer; }
|
|
4843
|
+
.tuner-row input[type="number"] { width: 70px; padding: 0.2rem 0.35rem; font-size: 0.8rem; border: 1px solid #ccc; border-radius: 4px; text-align: right; }
|
|
4844
|
+
.tuner-row input[type="checkbox"] { width: 16px; height: 16px; cursor: pointer; }
|
|
4845
|
+
.tuner-row .reset-btn { width: 20px; height: 20px; border: none; background: none; cursor: pointer; color: #999; font-size: 1rem; padding: 0; line-height: 1; visibility: hidden; }
|
|
4846
|
+
.tuner-row .reset-btn.visible { visibility: visible; }
|
|
4847
|
+
.tuner-row .reset-btn:hover { color: #4361ee; }
|
|
4848
|
+
.tuner-bool-row { display: grid; grid-template-columns: 140px 1fr 24px; gap: 8px; align-items: center; margin-bottom: 0.35rem; }
|
|
4849
|
+
.tuner-bool-row label { font-size: 0.8rem; color: #555; }
|
|
4850
|
+
.tuner-bool-row label.modified { color: #4361ee; font-weight: 600; }
|
|
4851
|
+
.tuner-bool-row .reset-btn { width: 20px; height: 20px; border: none; background: none; cursor: pointer; color: #999; font-size: 1rem; padding: 0; line-height: 1; visibility: hidden; }
|
|
4852
|
+
.tuner-bool-row .reset-btn.visible { visibility: visible; }
|
|
4853
|
+
.tuner-export { margin-top: 0.75rem; }
|
|
4854
|
+
.tuner-export textarea { width: 100%; height: 120px; font-family: monospace; font-size: 0.8rem; padding: 0.5rem; border: 1px solid #ccc; border-radius: 6px; background: #f8f9fa; resize: vertical; }
|
|
4855
|
+
</style>
|
|
4856
|
+
</head>
|
|
4857
|
+
<body>
|
|
4858
|
+
<h1>SearchSocket <span>Playground</span></h1>
|
|
4859
|
+
<div class="search-box">
|
|
4860
|
+
<input type="text" id="q" placeholder="Type a search query..." autocomplete="off" autofocus>
|
|
4861
|
+
</div>
|
|
4862
|
+
<div class="search-opts">
|
|
4863
|
+
<label>
|
|
4864
|
+
Group by:
|
|
4865
|
+
<select id="groupBy">
|
|
4866
|
+
<option value="page">Page</option>
|
|
4867
|
+
<option value="chunk">Chunk</option>
|
|
4868
|
+
</select>
|
|
4869
|
+
</label>
|
|
4870
|
+
<label>
|
|
4871
|
+
Top K:
|
|
4872
|
+
<select id="topK">
|
|
4873
|
+
<option value="5">5</option>
|
|
4874
|
+
<option value="10" selected>10</option>
|
|
4875
|
+
<option value="20">20</option>
|
|
4876
|
+
<option value="50">50</option>
|
|
4877
|
+
</select>
|
|
4878
|
+
</label>
|
|
4879
|
+
</div>
|
|
4880
|
+
|
|
4881
|
+
<details class="tuner" id="tunerPanel">
|
|
4882
|
+
<summary>Ranking Tuner</summary>
|
|
4883
|
+
<div class="tuner-body">
|
|
4884
|
+
<div class="tuner-actions">
|
|
4885
|
+
<button id="resetAll" type="button">Reset All</button>
|
|
4886
|
+
<button id="exportConfig" type="button">Export Config</button>
|
|
4887
|
+
</div>
|
|
4888
|
+
<div id="tunerGroups"></div>
|
|
4889
|
+
<div class="tuner-export hidden" id="exportArea">
|
|
4890
|
+
<textarea id="exportText" readonly></textarea>
|
|
4891
|
+
</div>
|
|
4892
|
+
</div>
|
|
4893
|
+
</details>
|
|
4894
|
+
|
|
4895
|
+
<div id="meta" class="meta"></div>
|
|
4896
|
+
<div id="results"></div>
|
|
4897
|
+
|
|
4898
|
+
<script>
|
|
4899
|
+
(function() {
|
|
4900
|
+
var qInput = document.getElementById('q');
|
|
4901
|
+
var groupBySelect = document.getElementById('groupBy');
|
|
4902
|
+
var topKSelect = document.getElementById('topK');
|
|
4903
|
+
var resultsDiv = document.getElementById('results');
|
|
4904
|
+
var metaDiv = document.getElementById('meta');
|
|
4905
|
+
var tunerGroupsDiv = document.getElementById('tunerGroups');
|
|
4906
|
+
var exportArea = document.getElementById('exportArea');
|
|
4907
|
+
var exportText = document.getElementById('exportText');
|
|
4908
|
+
|
|
4909
|
+
var debounceTimer = null;
|
|
4910
|
+
var requestId = 0;
|
|
4911
|
+
var baselineConfig = null;
|
|
4912
|
+
var tunerParams = [];
|
|
4913
|
+
|
|
4914
|
+
var PARAM_DEFS = [
|
|
4915
|
+
{ group: 'Thresholds', key: 'ranking.minScoreRatio', label: 'minScoreRatio', min: 0, max: 1, step: 0.01 },
|
|
4916
|
+
{ group: 'Thresholds', key: 'ranking.scoreGapThreshold', label: 'scoreGapThreshold', min: 0, max: 1, step: 0.01 },
|
|
4917
|
+
{ group: 'Thresholds', key: 'ranking.minChunkScoreRatio', label: 'minChunkScoreRatio', min: 0, max: 1, step: 0.01 },
|
|
4918
|
+
{ group: 'Boosts', key: 'ranking.enableIncomingLinkBoost', label: 'incomingLinkBoost', type: 'bool' },
|
|
4919
|
+
{ group: 'Boosts', key: 'ranking.enableDepthBoost', label: 'depthBoost', type: 'bool' },
|
|
4920
|
+
{ group: 'Weights', key: 'ranking.weights.incomingLinks', label: 'incomingLinks', min: 0, max: 1, step: 0.01 },
|
|
4921
|
+
{ group: 'Weights', key: 'ranking.weights.depth', label: 'depth', min: 0, max: 1, step: 0.01 },
|
|
4922
|
+
{ group: 'Weights', key: 'ranking.weights.aggregation', label: 'aggregation', min: 0, max: 1, step: 0.01 },
|
|
4923
|
+
{ group: 'Weights', key: 'ranking.weights.titleMatch', label: 'titleMatch', min: 0, max: 1, step: 0.01 },
|
|
4924
|
+
{ group: 'Aggregation', key: 'ranking.aggregationCap', label: 'aggregationCap', min: 1, max: 20, step: 1 },
|
|
4925
|
+
{ group: 'Aggregation', key: 'ranking.aggregationDecay', label: 'aggregationDecay', min: 0, max: 1, step: 0.01 },
|
|
4926
|
+
{ group: 'Search', key: 'search.pageSearchWeight', label: 'pageSearchWeight', min: 0, max: 1, step: 0.01 }
|
|
4927
|
+
];
|
|
4928
|
+
|
|
4929
|
+
function getNestedValue(obj, path) {
|
|
4930
|
+
var parts = path.split('.');
|
|
4931
|
+
var v = obj;
|
|
4932
|
+
for (var i = 0; i < parts.length; i++) {
|
|
4933
|
+
if (v == null) return undefined;
|
|
4934
|
+
v = v[parts[i]];
|
|
4935
|
+
}
|
|
4936
|
+
return v;
|
|
4937
|
+
}
|
|
4938
|
+
|
|
4939
|
+
function setNestedValue(obj, path, value) {
|
|
4940
|
+
var parts = path.split('.');
|
|
4941
|
+
var cur = obj;
|
|
4942
|
+
for (var i = 0; i < parts.length - 1; i++) {
|
|
4943
|
+
if (!cur[parts[i]]) cur[parts[i]] = {};
|
|
4944
|
+
cur = cur[parts[i]];
|
|
4945
|
+
}
|
|
4946
|
+
cur[parts[parts.length - 1]] = value;
|
|
4947
|
+
}
|
|
4948
|
+
|
|
4949
|
+
function initTuner(config) {
|
|
4950
|
+
baselineConfig = config;
|
|
4951
|
+
var groups = {};
|
|
4952
|
+
PARAM_DEFS.forEach(function(def) {
|
|
4953
|
+
if (!groups[def.group]) groups[def.group] = [];
|
|
4954
|
+
groups[def.group].push(def);
|
|
4955
|
+
});
|
|
4956
|
+
|
|
4957
|
+
var html = '';
|
|
4958
|
+
Object.keys(groups).forEach(function(groupName) {
|
|
4959
|
+
html += '<details class="tuner-group" open>';
|
|
4960
|
+
html += '<summary>' + groupName + '</summary>';
|
|
4961
|
+
html += '<div class="tuner-group-body">';
|
|
4962
|
+
groups[groupName].forEach(function(def) {
|
|
4963
|
+
var val = getNestedValue(config, def.key);
|
|
4964
|
+
if (def.type === 'bool') {
|
|
4965
|
+
html += '<div class="tuner-bool-row" data-key="' + def.key + '">';
|
|
4966
|
+
html += '<label>' + def.label + '</label>';
|
|
4967
|
+
html += '<input type="checkbox"' + (val ? ' checked' : '') + ' data-param="' + def.key + '">';
|
|
4968
|
+
html += '<button class="reset-btn" title="Reset" data-reset="' + def.key + '">\\u21BA</button>';
|
|
4969
|
+
html += '</div>';
|
|
4970
|
+
} else {
|
|
4971
|
+
html += '<div class="tuner-row" data-key="' + def.key + '">';
|
|
4972
|
+
html += '<label>' + def.label + '</label>';
|
|
4973
|
+
html += '<input type="range" min="' + def.min + '" max="' + def.max + '" step="' + def.step + '" value="' + val + '" data-param="' + def.key + '">';
|
|
4974
|
+
html += '<input type="number" min="' + def.min + '" max="' + def.max + '" step="' + def.step + '" value="' + val + '" data-num="' + def.key + '">';
|
|
4975
|
+
html += '<button class="reset-btn" title="Reset" data-reset="' + def.key + '">\\u21BA</button>';
|
|
4976
|
+
html += '</div>';
|
|
4977
|
+
}
|
|
4978
|
+
});
|
|
4979
|
+
html += '</div></details>';
|
|
4980
|
+
});
|
|
4981
|
+
tunerGroupsDiv.innerHTML = html;
|
|
4982
|
+
|
|
4983
|
+
// Wire events
|
|
4984
|
+
tunerGroupsDiv.addEventListener('input', function(e) {
|
|
4985
|
+
var param = e.target.getAttribute('data-param');
|
|
4986
|
+
var num = e.target.getAttribute('data-num');
|
|
4987
|
+
if (param) {
|
|
4988
|
+
// Slider or checkbox changed \u2014 sync number input
|
|
4989
|
+
var row = e.target.closest('[data-key]');
|
|
4990
|
+
if (row && e.target.type === 'range') {
|
|
4991
|
+
var numInput = row.querySelector('[data-num]');
|
|
4992
|
+
if (numInput) numInput.value = e.target.value;
|
|
4993
|
+
}
|
|
4994
|
+
updateModifiedState(param);
|
|
4995
|
+
scheduleSearch();
|
|
4996
|
+
} else if (num) {
|
|
4997
|
+
// Number input changed \u2014 sync slider
|
|
4998
|
+
var row = e.target.closest('[data-key]');
|
|
4999
|
+
if (row) {
|
|
5000
|
+
var rangeInput = row.querySelector('[data-param]');
|
|
5001
|
+
if (rangeInput) rangeInput.value = e.target.value;
|
|
5002
|
+
}
|
|
5003
|
+
updateModifiedState(num);
|
|
5004
|
+
scheduleSearch();
|
|
5005
|
+
}
|
|
5006
|
+
});
|
|
5007
|
+
|
|
5008
|
+
tunerGroupsDiv.addEventListener('change', function(e) {
|
|
5009
|
+
var param = e.target.getAttribute('data-param');
|
|
5010
|
+
if (param && e.target.type === 'checkbox') {
|
|
5011
|
+
updateModifiedState(param);
|
|
5012
|
+
scheduleSearch();
|
|
5013
|
+
}
|
|
5014
|
+
});
|
|
5015
|
+
|
|
5016
|
+
tunerGroupsDiv.addEventListener('click', function(e) {
|
|
5017
|
+
var resetKey = e.target.getAttribute('data-reset');
|
|
5018
|
+
if (resetKey) {
|
|
5019
|
+
resetParam(resetKey);
|
|
5020
|
+
scheduleSearch();
|
|
5021
|
+
}
|
|
5022
|
+
});
|
|
5023
|
+
}
|
|
5024
|
+
|
|
5025
|
+
function updateModifiedState(key) {
|
|
5026
|
+
var baseline = getNestedValue(baselineConfig, key);
|
|
5027
|
+
var row = tunerGroupsDiv.querySelector('[data-key="' + key + '"]');
|
|
5028
|
+
if (!row) return;
|
|
5029
|
+
var input = row.querySelector('[data-param="' + key + '"]');
|
|
5030
|
+
if (!input) return;
|
|
5031
|
+
var current = input.type === 'checkbox' ? input.checked : parseFloat(input.value);
|
|
5032
|
+
var isModified = current !== baseline;
|
|
5033
|
+
var label = row.querySelector('label');
|
|
5034
|
+
var resetBtn = row.querySelector('.reset-btn');
|
|
5035
|
+
if (label) label.classList.toggle('modified', isModified);
|
|
5036
|
+
if (resetBtn) resetBtn.classList.toggle('visible', isModified);
|
|
5037
|
+
}
|
|
5038
|
+
|
|
5039
|
+
function resetParam(key) {
|
|
5040
|
+
var baseline = getNestedValue(baselineConfig, key);
|
|
5041
|
+
var row = tunerGroupsDiv.querySelector('[data-key="' + key + '"]');
|
|
5042
|
+
if (!row) return;
|
|
5043
|
+
var input = row.querySelector('[data-param="' + key + '"]');
|
|
5044
|
+
if (!input) return;
|
|
5045
|
+
if (input.type === 'checkbox') {
|
|
5046
|
+
input.checked = baseline;
|
|
5047
|
+
} else {
|
|
5048
|
+
input.value = baseline;
|
|
5049
|
+
var numInput = row.querySelector('[data-num]');
|
|
5050
|
+
if (numInput) numInput.value = baseline;
|
|
5051
|
+
}
|
|
5052
|
+
updateModifiedState(key);
|
|
5053
|
+
}
|
|
5054
|
+
|
|
5055
|
+
function resetAll() {
|
|
5056
|
+
PARAM_DEFS.forEach(function(def) {
|
|
5057
|
+
resetParam(def.key);
|
|
5058
|
+
});
|
|
5059
|
+
exportArea.classList.add('hidden');
|
|
5060
|
+
scheduleSearch();
|
|
5061
|
+
}
|
|
5062
|
+
|
|
5063
|
+
function collectOverrides() {
|
|
5064
|
+
var overrides = {};
|
|
5065
|
+
PARAM_DEFS.forEach(function(def) {
|
|
5066
|
+
var row = tunerGroupsDiv.querySelector('[data-key="' + def.key + '"]');
|
|
5067
|
+
if (!row) return;
|
|
5068
|
+
var input = row.querySelector('[data-param="' + def.key + '"]');
|
|
5069
|
+
if (!input) return;
|
|
5070
|
+
var val = def.type === 'bool' ? input.checked : parseFloat(input.value);
|
|
5071
|
+
setNestedValue(overrides, def.key, val);
|
|
5072
|
+
});
|
|
5073
|
+
return overrides;
|
|
5074
|
+
}
|
|
5075
|
+
|
|
5076
|
+
function collectChangedOverrides() {
|
|
5077
|
+
var overrides = {};
|
|
5078
|
+
var hasChanges = false;
|
|
5079
|
+
PARAM_DEFS.forEach(function(def) {
|
|
5080
|
+
var row = tunerGroupsDiv.querySelector('[data-key="' + def.key + '"]');
|
|
5081
|
+
if (!row) return;
|
|
5082
|
+
var input = row.querySelector('[data-param="' + def.key + '"]');
|
|
5083
|
+
if (!input) return;
|
|
5084
|
+
var current = def.type === 'bool' ? input.checked : parseFloat(input.value);
|
|
5085
|
+
var baseline = getNestedValue(baselineConfig, def.key);
|
|
5086
|
+
if (current !== baseline) {
|
|
5087
|
+
setNestedValue(overrides, def.key, current);
|
|
5088
|
+
hasChanges = true;
|
|
5089
|
+
}
|
|
5090
|
+
});
|
|
5091
|
+
return hasChanges ? overrides : null;
|
|
5092
|
+
}
|
|
5093
|
+
|
|
5094
|
+
function exportConfig() {
|
|
5095
|
+
var changed = collectChangedOverrides();
|
|
5096
|
+
if (!changed) {
|
|
5097
|
+
exportArea.classList.remove('hidden');
|
|
5098
|
+
exportText.value = '// No parameters have been changed from defaults.';
|
|
5099
|
+
return;
|
|
5100
|
+
}
|
|
5101
|
+
|
|
5102
|
+
var lines = [];
|
|
5103
|
+
if (changed.ranking) {
|
|
5104
|
+
lines.push('ranking: {');
|
|
5105
|
+
var r = changed.ranking;
|
|
5106
|
+
var simpleKeys = ['enableIncomingLinkBoost', 'enableDepthBoost', 'aggregationCap', 'aggregationDecay', 'minChunkScoreRatio', 'minScoreRatio', 'scoreGapThreshold'];
|
|
5107
|
+
simpleKeys.forEach(function(k) {
|
|
5108
|
+
if (r[k] !== undefined) lines.push(' ' + k + ': ' + JSON.stringify(r[k]) + ',');
|
|
5109
|
+
});
|
|
5110
|
+
if (r.weights) {
|
|
5111
|
+
lines.push(' weights: {');
|
|
5112
|
+
Object.keys(r.weights).forEach(function(wk) {
|
|
5113
|
+
lines.push(' ' + wk + ': ' + r.weights[wk] + ',');
|
|
5114
|
+
});
|
|
5115
|
+
lines.push(' },');
|
|
5116
|
+
}
|
|
5117
|
+
lines.push('},');
|
|
5118
|
+
}
|
|
5119
|
+
if (changed.search) {
|
|
5120
|
+
lines.push('search: {');
|
|
5121
|
+
Object.keys(changed.search).forEach(function(sk) {
|
|
5122
|
+
lines.push(' ' + sk + ': ' + changed.search[sk] + ',');
|
|
5123
|
+
});
|
|
5124
|
+
lines.push('},');
|
|
5125
|
+
}
|
|
5126
|
+
|
|
5127
|
+
exportArea.classList.remove('hidden');
|
|
5128
|
+
exportText.value = lines.join('\\n');
|
|
5129
|
+
}
|
|
5130
|
+
|
|
5131
|
+
// Read initial state from URL
|
|
5132
|
+
var params = new URLSearchParams(window.location.search);
|
|
5133
|
+
if (params.get('q')) qInput.value = params.get('q');
|
|
5134
|
+
if (params.get('groupBy')) groupBySelect.value = params.get('groupBy');
|
|
5135
|
+
if (params.get('topK')) topKSelect.value = params.get('topK');
|
|
5136
|
+
|
|
5137
|
+
function updateUrl() {
|
|
5138
|
+
var p = new URLSearchParams();
|
|
5139
|
+
if (qInput.value) p.set('q', qInput.value);
|
|
5140
|
+
if (groupBySelect.value !== 'page') p.set('groupBy', groupBySelect.value);
|
|
5141
|
+
if (topKSelect.value !== '10') p.set('topK', topKSelect.value);
|
|
5142
|
+
var qs = p.toString();
|
|
5143
|
+
history.replaceState(null, '', qs ? '?' + qs : window.location.pathname);
|
|
5144
|
+
}
|
|
5145
|
+
|
|
5146
|
+
function doSearch() {
|
|
5147
|
+
var query = qInput.value.trim();
|
|
5148
|
+
updateUrl();
|
|
5149
|
+
if (!query) {
|
|
5150
|
+
resultsDiv.innerHTML = '<div class="empty">Enter a query to search</div>';
|
|
5151
|
+
metaDiv.textContent = '';
|
|
5152
|
+
return;
|
|
5153
|
+
}
|
|
5154
|
+
|
|
5155
|
+
resultsDiv.innerHTML = '<div class="loading">Searching...</div>';
|
|
5156
|
+
|
|
5157
|
+
var thisRequestId = ++requestId;
|
|
5158
|
+
var body = {
|
|
5159
|
+
q: query,
|
|
5160
|
+
topK: parseInt(topKSelect.value, 10),
|
|
5161
|
+
groupBy: groupBySelect.value,
|
|
5162
|
+
debug: true
|
|
5163
|
+
};
|
|
5164
|
+
|
|
5165
|
+
if (baselineConfig) {
|
|
5166
|
+
body.rankingOverrides = collectOverrides();
|
|
5167
|
+
}
|
|
5168
|
+
|
|
5169
|
+
fetch('/_searchsocket/search', {
|
|
5170
|
+
method: 'POST',
|
|
5171
|
+
headers: { 'Content-Type': 'application/json' },
|
|
5172
|
+
body: JSON.stringify(body)
|
|
5173
|
+
}).then(function(res) {
|
|
5174
|
+
if (thisRequestId !== requestId) return;
|
|
5175
|
+
if (!res.ok) {
|
|
5176
|
+
return res.text().then(function(err) {
|
|
5177
|
+
resultsDiv.innerHTML = '<div class="empty">Error: ' + escapeHtml(err) + '</div>';
|
|
5178
|
+
});
|
|
5179
|
+
}
|
|
5180
|
+
return res.json().then(function(data) {
|
|
5181
|
+
if (thisRequestId !== requestId) return;
|
|
5182
|
+
renderResults(data);
|
|
5183
|
+
});
|
|
5184
|
+
}).catch(function(err) {
|
|
5185
|
+
if (thisRequestId !== requestId) return;
|
|
5186
|
+
resultsDiv.innerHTML = '<div class="empty">Network error: ' + escapeHtml(err.message) + '</div>';
|
|
5187
|
+
});
|
|
5188
|
+
}
|
|
5189
|
+
|
|
5190
|
+
function escapeHtml(str) {
|
|
5191
|
+
var d = document.createElement('div');
|
|
5192
|
+
d.textContent = str;
|
|
5193
|
+
return d.innerHTML;
|
|
5194
|
+
}
|
|
5195
|
+
|
|
5196
|
+
function renderResults(data) {
|
|
5197
|
+
metaDiv.textContent = data.results.length + ' results in ' + data.meta.timingsMs.total + 'ms (search: ' + data.meta.timingsMs.search + 'ms) \\u2014 scope: ' + data.scope;
|
|
5198
|
+
|
|
5199
|
+
if (data.results.length === 0) {
|
|
5200
|
+
resultsDiv.innerHTML = '<div class="empty">No results found</div>';
|
|
5201
|
+
return;
|
|
5202
|
+
}
|
|
5203
|
+
|
|
5204
|
+
resultsDiv.innerHTML = data.results.map(function(r, i) {
|
|
5205
|
+
var html = '<div class="result">';
|
|
5206
|
+
html += '<div class="result-header">';
|
|
5207
|
+
html += '<div><div class="result-title">' + escapeHtml(r.title) + '</div>';
|
|
5208
|
+
html += '<div class="result-url">' + escapeHtml(r.url) + '</div></div>';
|
|
5209
|
+
html += '<div class="result-score">' + r.score.toFixed(4) + '</div>';
|
|
5210
|
+
html += '</div>';
|
|
5211
|
+
|
|
5212
|
+
if (r.snippet) {
|
|
5213
|
+
html += '<div class="result-snippet">' + escapeHtml(r.snippet) + '</div>';
|
|
5214
|
+
}
|
|
5215
|
+
|
|
5216
|
+
html += '<div class="result-meta">';
|
|
5217
|
+
html += '<span>route: ' + escapeHtml(r.routeFile) + '</span>';
|
|
5218
|
+
if (r.sectionTitle) html += '<span>section: ' + escapeHtml(r.sectionTitle) + '</span>';
|
|
5219
|
+
html += '</div>';
|
|
5220
|
+
|
|
5221
|
+
if (r.breakdown) {
|
|
5222
|
+
html += '<div class="breakdown">';
|
|
5223
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Base score</span><span class="breakdown-value">' + r.breakdown.baseScore.toFixed(6) + '</span></div>';
|
|
5224
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Incoming link boost</span><span class="breakdown-value">' + r.breakdown.incomingLinkBoost.toFixed(6) + '</span></div>';
|
|
5225
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Depth boost</span><span class="breakdown-value">' + r.breakdown.depthBoost.toFixed(6) + '</span></div>';
|
|
5226
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Title match boost</span><span class="breakdown-value">' + r.breakdown.titleMatchBoost.toFixed(6) + '</span></div>';
|
|
5227
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Anchor text boost</span><span class="breakdown-value">' + (r.breakdown.anchorTextMatchBoost || 0).toFixed(6) + '</span></div>';
|
|
5228
|
+
html += '</div>';
|
|
5229
|
+
}
|
|
5230
|
+
|
|
5231
|
+
if (r.chunks && r.chunks.length > 0) {
|
|
5232
|
+
html += '<button class="chunks-toggle" data-idx="' + i + '">Show ' + r.chunks.length + ' chunks</button>';
|
|
5233
|
+
html += '<div class="chunks hidden" id="chunks-' + i + '">';
|
|
5234
|
+
r.chunks.forEach(function(c) {
|
|
5235
|
+
html += '<div class="chunk">';
|
|
5236
|
+
if (c.headingPath && c.headingPath.length > 0) {
|
|
5237
|
+
html += '<div class="chunk-heading">' + escapeHtml(c.headingPath.join(' > ')) + '</div>';
|
|
5238
|
+
}
|
|
5239
|
+
if (c.sectionTitle) {
|
|
5240
|
+
html += '<div class="chunk-heading">' + escapeHtml(c.sectionTitle) + '</div>';
|
|
5241
|
+
}
|
|
5242
|
+
html += '<div class="chunk-snippet">' + escapeHtml(c.snippet) + '</div>';
|
|
5243
|
+
html += '<div class="chunk-score">score: ' + c.score.toFixed(4) + '</div>';
|
|
5244
|
+
html += '</div>';
|
|
5245
|
+
});
|
|
5246
|
+
html += '</div>';
|
|
5247
|
+
}
|
|
5248
|
+
|
|
5249
|
+
html += '</div>';
|
|
5250
|
+
return html;
|
|
5251
|
+
}).join('');
|
|
5252
|
+
}
|
|
5253
|
+
|
|
5254
|
+
resultsDiv.addEventListener('click', function(e) {
|
|
5255
|
+
if (e.target.classList.contains('chunks-toggle')) {
|
|
5256
|
+
var idx = e.target.getAttribute('data-idx');
|
|
5257
|
+
var chunksDiv = document.getElementById('chunks-' + idx);
|
|
5258
|
+
if (chunksDiv) {
|
|
5259
|
+
chunksDiv.classList.toggle('hidden');
|
|
5260
|
+
e.target.textContent = chunksDiv.classList.contains('hidden')
|
|
5261
|
+
? 'Show ' + chunksDiv.children.length + ' chunks'
|
|
5262
|
+
: 'Hide chunks';
|
|
5263
|
+
}
|
|
5264
|
+
}
|
|
5265
|
+
});
|
|
5266
|
+
|
|
5267
|
+
function scheduleSearch() {
|
|
5268
|
+
clearTimeout(debounceTimer);
|
|
5269
|
+
debounceTimer = setTimeout(doSearch, 300);
|
|
5270
|
+
}
|
|
5271
|
+
|
|
5272
|
+
qInput.addEventListener('input', scheduleSearch);
|
|
5273
|
+
groupBySelect.addEventListener('change', scheduleSearch);
|
|
5274
|
+
topKSelect.addEventListener('change', scheduleSearch);
|
|
5275
|
+
|
|
5276
|
+
document.getElementById('resetAll').addEventListener('click', resetAll);
|
|
5277
|
+
document.getElementById('exportConfig').addEventListener('click', exportConfig);
|
|
5278
|
+
|
|
5279
|
+
// Fetch config and initialize tuner
|
|
5280
|
+
fetch('/_searchsocket/config').then(function(res) {
|
|
5281
|
+
if (res.ok) return res.json();
|
|
5282
|
+
return null;
|
|
5283
|
+
}).then(function(config) {
|
|
5284
|
+
if (config) initTuner(config);
|
|
5285
|
+
}).catch(function() {
|
|
5286
|
+
// Config endpoint not available \u2014 tuner stays empty
|
|
5287
|
+
});
|
|
5288
|
+
|
|
5289
|
+
// Trigger initial search if query is present
|
|
5290
|
+
if (qInput.value.trim()) doSearch();
|
|
5291
|
+
})();
|
|
5292
|
+
</script>
|
|
5293
|
+
</body>
|
|
5294
|
+
</html>
|
|
5295
|
+
`;
|
|
5296
|
+
|
|
5297
|
+
// src/playground/server.ts
|
|
5298
|
+
async function runPlaygroundServer(options) {
|
|
5299
|
+
const config = options.config ?? await loadConfig({
|
|
5300
|
+
cwd: options.cwd,
|
|
5301
|
+
configPath: options.configPath
|
|
5302
|
+
});
|
|
5303
|
+
let enginePromise = null;
|
|
5304
|
+
function getEngine() {
|
|
5305
|
+
if (!enginePromise) {
|
|
5306
|
+
enginePromise = SearchEngine.create({
|
|
5307
|
+
cwd: options.cwd,
|
|
5308
|
+
configPath: options.configPath,
|
|
5309
|
+
config
|
|
5310
|
+
});
|
|
5311
|
+
}
|
|
5312
|
+
return enginePromise;
|
|
5313
|
+
}
|
|
5314
|
+
const app = express();
|
|
5315
|
+
app.use(express.json());
|
|
5316
|
+
app.get("/_searchsocket", (_req, res) => {
|
|
5317
|
+
res.type("html").send(playground_default);
|
|
5318
|
+
});
|
|
5319
|
+
app.get("/_searchsocket/config", (_req, res) => {
|
|
5320
|
+
res.json({
|
|
5321
|
+
ranking: {
|
|
5322
|
+
enableIncomingLinkBoost: config.ranking.enableIncomingLinkBoost,
|
|
5323
|
+
enableDepthBoost: config.ranking.enableDepthBoost,
|
|
5324
|
+
aggregationCap: config.ranking.aggregationCap,
|
|
5325
|
+
aggregationDecay: config.ranking.aggregationDecay,
|
|
5326
|
+
minChunkScoreRatio: config.ranking.minChunkScoreRatio,
|
|
5327
|
+
minScoreRatio: config.ranking.minScoreRatio,
|
|
5328
|
+
scoreGapThreshold: config.ranking.scoreGapThreshold,
|
|
5329
|
+
weights: { ...config.ranking.weights }
|
|
5330
|
+
},
|
|
5331
|
+
search: {
|
|
5332
|
+
pageSearchWeight: config.search.pageSearchWeight
|
|
5333
|
+
}
|
|
5334
|
+
});
|
|
5335
|
+
});
|
|
5336
|
+
app.post("/_searchsocket/search", async (req, res) => {
|
|
5337
|
+
try {
|
|
5338
|
+
const searchEngine = await getEngine();
|
|
5339
|
+
const body = req.body;
|
|
5340
|
+
if (!body || typeof body.q !== "string" || body.q.trim().length === 0) {
|
|
5341
|
+
res.status(400).json({ error: "Missing or empty 'q' field" });
|
|
5342
|
+
return;
|
|
5343
|
+
}
|
|
5344
|
+
const result = await searchEngine.search({
|
|
5345
|
+
q: body.q,
|
|
5346
|
+
topK: typeof body.topK === "number" ? body.topK : void 0,
|
|
5347
|
+
scope: typeof body.scope === "string" ? body.scope : void 0,
|
|
5348
|
+
pathPrefix: typeof body.pathPrefix === "string" ? body.pathPrefix : void 0,
|
|
5349
|
+
tags: Array.isArray(body.tags) ? body.tags : void 0,
|
|
5350
|
+
groupBy: body.groupBy === "page" || body.groupBy === "chunk" ? body.groupBy : void 0,
|
|
5351
|
+
debug: body.debug === true,
|
|
5352
|
+
rankingOverrides: body.rankingOverrides && typeof body.rankingOverrides === "object" ? body.rankingOverrides : void 0
|
|
5353
|
+
});
|
|
5354
|
+
res.json(result);
|
|
5355
|
+
} catch (error) {
|
|
5356
|
+
const message = error instanceof Error ? error.message : "Internal server error";
|
|
5357
|
+
const status = error.statusCode ?? 500;
|
|
5358
|
+
res.status(status).json({ error: message });
|
|
5359
|
+
}
|
|
5360
|
+
});
|
|
5361
|
+
const preferredPort = options.port ?? 3337;
|
|
5362
|
+
function startServer(port) {
|
|
5363
|
+
return new Promise((resolve, reject) => {
|
|
5364
|
+
let httpServer;
|
|
5365
|
+
const onListening = () => {
|
|
5366
|
+
const addr = httpServer.address();
|
|
5367
|
+
resolve({
|
|
5368
|
+
port: addr.port,
|
|
5369
|
+
close: () => new Promise((r) => httpServer.close(() => r()))
|
|
5370
|
+
});
|
|
5371
|
+
};
|
|
5372
|
+
httpServer = app.listen(port, "127.0.0.1", onListening);
|
|
5373
|
+
httpServer.once("error", (err) => {
|
|
5374
|
+
if (err.code === "EADDRINUSE" && port !== 0) {
|
|
5375
|
+
startServer(0).then(resolve, reject);
|
|
5376
|
+
} else {
|
|
5377
|
+
reject(err);
|
|
5378
|
+
}
|
|
5379
|
+
});
|
|
5380
|
+
});
|
|
5381
|
+
}
|
|
5382
|
+
return startServer(preferredPort);
|
|
5383
|
+
}
|
|
5384
|
+
|
|
5385
|
+
// src/search/quality-metrics.ts
|
|
5386
|
+
function reciprocalRank(results, relevant) {
|
|
5387
|
+
const set = new Set(relevant);
|
|
5388
|
+
for (let i = 0; i < results.length; i++) {
|
|
5389
|
+
if (set.has(results[i].url)) {
|
|
5390
|
+
return 1 / (i + 1);
|
|
5391
|
+
}
|
|
5392
|
+
}
|
|
5393
|
+
return 0;
|
|
5394
|
+
}
|
|
5395
|
+
function mrr(queries) {
|
|
5396
|
+
if (queries.length === 0) return 0;
|
|
5397
|
+
const sum = queries.reduce((acc, q) => acc + reciprocalRank(q.results, q.relevant), 0);
|
|
5398
|
+
return sum / queries.length;
|
|
5399
|
+
}
|
|
5400
|
+
|
|
5401
|
+
// src/cli/test-schemas.ts
|
|
5402
|
+
import { z as z4 } from "zod";
|
|
5403
|
+
var testCaseSchema = z4.object({
|
|
5404
|
+
query: z4.string().min(1),
|
|
5405
|
+
expect: z4.object({
|
|
5406
|
+
topResult: z4.string().optional(),
|
|
5407
|
+
inTop5: z4.array(z4.string()).min(1).optional(),
|
|
5408
|
+
maxResults: z4.number().int().nonnegative().optional()
|
|
5409
|
+
}).refine(
|
|
5410
|
+
(e) => e.topResult !== void 0 || e.inTop5 !== void 0 || e.maxResults !== void 0,
|
|
5411
|
+
{ message: "expect must contain at least one of topResult, inTop5, or maxResults" }
|
|
5412
|
+
)
|
|
5413
|
+
});
|
|
5414
|
+
var testFileSchema = z4.array(testCaseSchema).min(1, "test file must contain at least one test case");
|
|
5415
|
+
|
|
5416
|
+
// src/cli.ts
|
|
5417
|
+
import * as clack from "@clack/prompts";
|
|
5418
|
+
|
|
5419
|
+
// src/init-helpers.ts
|
|
5420
|
+
import fs9 from "fs";
|
|
5421
|
+
import path13 from "path";
|
|
5422
|
+
import { parseModule, generateCode, builders } from "magicast";
|
|
5423
|
+
function ensureMcpJson(cwd) {
|
|
5424
|
+
const mcpPath = path13.join(cwd, ".mcp.json");
|
|
5425
|
+
const entry = {
|
|
5426
|
+
command: "npx",
|
|
5427
|
+
args: ["searchsocket", "mcp"],
|
|
5428
|
+
env: {
|
|
5429
|
+
UPSTASH_VECTOR_REST_URL: "${UPSTASH_VECTOR_REST_URL}",
|
|
5430
|
+
UPSTASH_VECTOR_REST_TOKEN: "${UPSTASH_VECTOR_REST_TOKEN}"
|
|
5431
|
+
}
|
|
5432
|
+
};
|
|
5433
|
+
let existing = {};
|
|
5434
|
+
if (fs9.existsSync(mcpPath)) {
|
|
5435
|
+
try {
|
|
5436
|
+
const raw = fs9.readFileSync(mcpPath, "utf8");
|
|
5437
|
+
existing = JSON.parse(raw);
|
|
5438
|
+
} catch {
|
|
5439
|
+
process.stderr.write("warning: .mcp.json exists but could not be parsed \u2014 skipping\n");
|
|
5440
|
+
return;
|
|
5441
|
+
}
|
|
5442
|
+
}
|
|
5443
|
+
const raw_servers = existing.mcpServers ?? {};
|
|
5444
|
+
const servers = typeof raw_servers === "object" && !Array.isArray(raw_servers) ? raw_servers : {};
|
|
5445
|
+
if (JSON.stringify(servers["searchsocket"]) === JSON.stringify(entry)) {
|
|
5446
|
+
return;
|
|
5447
|
+
}
|
|
5448
|
+
existing.mcpServers = { ...servers, searchsocket: entry };
|
|
5449
|
+
fs9.writeFileSync(mcpPath, JSON.stringify(existing, null, 2) + "\n", "utf8");
|
|
5450
|
+
}
|
|
5451
|
+
var HOOKS_SNIPPET = `import { searchsocketHandle } from "searchsocket/sveltekit";
|
|
5452
|
+
|
|
5453
|
+
export const handle = searchsocketHandle();`;
|
|
5454
|
+
var VITE_PLUGIN_SNIPPET = `import { searchsocketVitePlugin } from "searchsocket/sveltekit";
|
|
5455
|
+
|
|
5456
|
+
// Add to your Vite config plugins array:
|
|
5457
|
+
// plugins: [sveltekit(), searchsocketVitePlugin()]`;
|
|
5458
|
+
function injectHooksServerTs(cwd) {
|
|
5459
|
+
const hooksDir = path13.join(cwd, "src");
|
|
5460
|
+
const tsPath = path13.join(hooksDir, "hooks.server.ts");
|
|
5461
|
+
const jsPath = path13.join(hooksDir, "hooks.server.js");
|
|
5462
|
+
const hooksPath = fs9.existsSync(tsPath) ? tsPath : fs9.existsSync(jsPath) ? jsPath : null;
|
|
5463
|
+
if (!hooksPath) {
|
|
5464
|
+
fs9.mkdirSync(hooksDir, { recursive: true });
|
|
5465
|
+
fs9.writeFileSync(tsPath, HOOKS_SNIPPET + "\n", "utf8");
|
|
5466
|
+
return "created";
|
|
5467
|
+
}
|
|
5468
|
+
const original = fs9.readFileSync(hooksPath, "utf8");
|
|
5469
|
+
if (original.includes("searchsocketHandle")) {
|
|
5470
|
+
return "already-present";
|
|
5471
|
+
}
|
|
5472
|
+
try {
|
|
5473
|
+
const mod = parseModule(original);
|
|
5474
|
+
mod.imports.$append({
|
|
5475
|
+
from: "searchsocket/sveltekit",
|
|
5476
|
+
imported: "searchsocketHandle"
|
|
5477
|
+
});
|
|
5478
|
+
if (mod.exports.handle) {
|
|
5479
|
+
mod.imports.$append({
|
|
5480
|
+
from: "@sveltejs/kit/hooks",
|
|
5481
|
+
imported: "sequence"
|
|
5482
|
+
});
|
|
5483
|
+
const existingHandle = mod.exports.handle;
|
|
5484
|
+
mod.exports.handle = builders.functionCall(
|
|
5485
|
+
"sequence",
|
|
5486
|
+
builders.functionCall("searchsocketHandle"),
|
|
5487
|
+
existingHandle
|
|
5488
|
+
);
|
|
5489
|
+
const { code: code2 } = generateCode(mod);
|
|
5490
|
+
fs9.writeFileSync(hooksPath, code2, "utf8");
|
|
5491
|
+
return "composed";
|
|
5492
|
+
}
|
|
5493
|
+
mod.exports.handle = builders.functionCall("searchsocketHandle");
|
|
5494
|
+
const { code } = generateCode(mod);
|
|
5495
|
+
fs9.writeFileSync(hooksPath, code, "utf8");
|
|
5496
|
+
return "injected";
|
|
5497
|
+
} catch {
|
|
5498
|
+
return "fallback";
|
|
5499
|
+
}
|
|
5500
|
+
}
|
|
5501
|
+
function injectViteConfig(cwd) {
|
|
5502
|
+
const tsPath = path13.join(cwd, "vite.config.ts");
|
|
5503
|
+
const jsPath = path13.join(cwd, "vite.config.js");
|
|
5504
|
+
const configPath = fs9.existsSync(tsPath) ? tsPath : fs9.existsSync(jsPath) ? jsPath : null;
|
|
5505
|
+
if (!configPath) {
|
|
5506
|
+
return "no-config";
|
|
5507
|
+
}
|
|
5508
|
+
const original = fs9.readFileSync(configPath, "utf8");
|
|
5509
|
+
if (original.includes("searchsocketVitePlugin")) {
|
|
5510
|
+
return "already-present";
|
|
5511
|
+
}
|
|
5512
|
+
try {
|
|
5513
|
+
const mod = parseModule(original);
|
|
5514
|
+
mod.imports.$append({
|
|
5515
|
+
from: "searchsocket/sveltekit",
|
|
5516
|
+
imported: "searchsocketVitePlugin"
|
|
5517
|
+
});
|
|
5518
|
+
let config = mod.exports.default;
|
|
5519
|
+
if (!config) {
|
|
5520
|
+
return "fallback";
|
|
5521
|
+
}
|
|
5522
|
+
if (config.$type === "function-call") {
|
|
5523
|
+
config = config.$args[0];
|
|
5524
|
+
}
|
|
5525
|
+
if (!config.plugins) {
|
|
5526
|
+
config.plugins = [builders.functionCall("searchsocketVitePlugin")];
|
|
5527
|
+
} else {
|
|
5528
|
+
config.plugins.push(builders.functionCall("searchsocketVitePlugin"));
|
|
5529
|
+
}
|
|
5530
|
+
const { code } = generateCode(mod);
|
|
5531
|
+
fs9.writeFileSync(configPath, code, "utf8");
|
|
5532
|
+
return "injected";
|
|
5533
|
+
} catch {
|
|
5534
|
+
return "fallback";
|
|
5535
|
+
}
|
|
5536
|
+
}
|
|
5537
|
+
function writeEnvFile(cwd, url, token) {
|
|
5538
|
+
const envPath = path13.join(cwd, ".env");
|
|
5539
|
+
let content = "";
|
|
5540
|
+
if (fs9.existsSync(envPath)) {
|
|
5541
|
+
content = fs9.readFileSync(envPath, "utf8");
|
|
5542
|
+
}
|
|
5543
|
+
const lines = [];
|
|
5544
|
+
if (!content.includes("UPSTASH_VECTOR_REST_URL=")) {
|
|
5545
|
+
lines.push(`UPSTASH_VECTOR_REST_URL=${url}`);
|
|
5546
|
+
}
|
|
5547
|
+
if (!content.includes("UPSTASH_VECTOR_REST_TOKEN=")) {
|
|
5548
|
+
lines.push(`UPSTASH_VECTOR_REST_TOKEN=${token}`);
|
|
5549
|
+
}
|
|
5550
|
+
if (lines.length > 0) {
|
|
5551
|
+
const suffix = content.length > 0 && !content.endsWith("\n") ? "\n" : "";
|
|
5552
|
+
fs9.writeFileSync(envPath, content + suffix + lines.join("\n") + "\n", "utf8");
|
|
5553
|
+
}
|
|
5554
|
+
ensureGitignoreEntry(cwd, ".env");
|
|
5555
|
+
}
|
|
5556
|
+
function ensureGitignoreEntry(cwd, entry) {
|
|
5557
|
+
const gitignorePath = path13.join(cwd, ".gitignore");
|
|
5558
|
+
let content = "";
|
|
5559
|
+
if (fs9.existsSync(gitignorePath)) {
|
|
5560
|
+
content = fs9.readFileSync(gitignorePath, "utf8");
|
|
5561
|
+
}
|
|
5562
|
+
const lines = content.split("\n");
|
|
5563
|
+
if (lines.some((line) => line.trim() === entry)) {
|
|
5564
|
+
return;
|
|
5565
|
+
}
|
|
5566
|
+
const suffix = content.length > 0 && !content.endsWith("\n") ? "\n" : "";
|
|
5567
|
+
fs9.writeFileSync(gitignorePath, content + suffix + entry + "\n", "utf8");
|
|
5568
|
+
}
|
|
5569
|
+
|
|
5570
|
+
// src/add-helpers.ts
|
|
5571
|
+
import fs10 from "fs";
|
|
5572
|
+
import fsp from "fs/promises";
|
|
5573
|
+
import path14 from "path";
|
|
5574
|
+
import { fileURLToPath } from "url";
|
|
5575
|
+
var __dirname = path14.dirname(fileURLToPath(import.meta.url));
|
|
5576
|
+
var AVAILABLE_COMPONENTS = ["search-dialog", "search-input", "search-results"];
|
|
5577
|
+
function resolveTemplateDir() {
|
|
5578
|
+
return path14.resolve(__dirname, "templates");
|
|
5579
|
+
}
|
|
5580
|
+
function listAvailableComponents() {
|
|
5581
|
+
return [...AVAILABLE_COMPONENTS];
|
|
5582
|
+
}
|
|
5583
|
+
function isValidComponent(name) {
|
|
5584
|
+
return AVAILABLE_COMPONENTS.includes(name);
|
|
5585
|
+
}
|
|
5586
|
+
async function copyComponent(name, targetDir, options = {}) {
|
|
5587
|
+
const templateDir = path14.join(resolveTemplateDir(), name);
|
|
5588
|
+
if (!fs10.existsSync(templateDir)) {
|
|
5589
|
+
throw new Error(
|
|
5590
|
+
`Template directory not found: ${templateDir}. Run "pnpm run build" to generate templates.`
|
|
5591
|
+
);
|
|
5592
|
+
}
|
|
5593
|
+
const entries = await fsp.readdir(templateDir);
|
|
5594
|
+
const svelteFiles = entries.filter((f) => f.endsWith(".svelte"));
|
|
5595
|
+
if (svelteFiles.length === 0) {
|
|
5596
|
+
throw new Error(`No .svelte files found in template: ${name}`);
|
|
5597
|
+
}
|
|
5598
|
+
await fsp.mkdir(targetDir, { recursive: true });
|
|
5599
|
+
const written = [];
|
|
5600
|
+
const skipped = [];
|
|
5601
|
+
for (const file of svelteFiles) {
|
|
5602
|
+
const dest = path14.join(targetDir, file);
|
|
5603
|
+
if (fs10.existsSync(dest) && !options.overwrite) {
|
|
5604
|
+
skipped.push(dest);
|
|
5605
|
+
continue;
|
|
5606
|
+
}
|
|
5607
|
+
await fsp.copyFile(path14.join(templateDir, file), dest);
|
|
5608
|
+
written.push(dest);
|
|
5609
|
+
}
|
|
5610
|
+
return { written, skipped };
|
|
5611
|
+
}
|
|
5612
|
+
|
|
3084
5613
|
// src/cli.ts
|
|
3085
5614
|
function parsePositiveInt(value, flag) {
|
|
3086
5615
|
const parsed = Number(value);
|
|
@@ -3117,6 +5646,10 @@ function parseDurationMs(value) {
|
|
|
3117
5646
|
}
|
|
3118
5647
|
function printIndexSummary(stats) {
|
|
3119
5648
|
process.stdout.write(`pages processed: ${stats.pagesProcessed}
|
|
5649
|
+
`);
|
|
5650
|
+
process.stdout.write(`pages changed: ${stats.pagesChanged}
|
|
5651
|
+
`);
|
|
5652
|
+
process.stdout.write(`pages deleted: ${stats.pagesDeleted}
|
|
3120
5653
|
`);
|
|
3121
5654
|
process.stdout.write(`chunks total: ${stats.chunksTotal}
|
|
3122
5655
|
`);
|
|
@@ -3138,7 +5671,7 @@ function collectWatchPaths(config, cwd) {
|
|
|
3138
5671
|
const paths = ["src/routes/**"];
|
|
3139
5672
|
if (config.source.mode === "content-files" && config.source.contentFiles) {
|
|
3140
5673
|
for (const pattern of config.source.contentFiles.globs) {
|
|
3141
|
-
paths.push(
|
|
5674
|
+
paths.push(path15.join(config.source.contentFiles.baseDir, pattern));
|
|
3142
5675
|
}
|
|
3143
5676
|
}
|
|
3144
5677
|
if (config.source.mode === "static-output") {
|
|
@@ -3151,22 +5684,22 @@ function collectWatchPaths(config, cwd) {
|
|
|
3151
5684
|
paths.push("searchsocket.config.ts");
|
|
3152
5685
|
paths.push(config.source.build.outputDir);
|
|
3153
5686
|
}
|
|
3154
|
-
return paths.map((value) =>
|
|
5687
|
+
return paths.map((value) => path15.resolve(cwd, value));
|
|
3155
5688
|
}
|
|
3156
5689
|
function ensureStateDir(cwd) {
|
|
3157
|
-
const target =
|
|
3158
|
-
|
|
5690
|
+
const target = path15.join(cwd, ".searchsocket");
|
|
5691
|
+
fs11.mkdirSync(target, { recursive: true });
|
|
3159
5692
|
return target;
|
|
3160
5693
|
}
|
|
3161
5694
|
function ensureGitignore(cwd) {
|
|
3162
|
-
const gitignorePath =
|
|
5695
|
+
const gitignorePath = path15.join(cwd, ".gitignore");
|
|
3163
5696
|
const entries = [
|
|
3164
5697
|
".searchsocket/manifest.json",
|
|
3165
5698
|
".searchsocket/registry.json"
|
|
3166
5699
|
];
|
|
3167
5700
|
let content = "";
|
|
3168
|
-
if (
|
|
3169
|
-
content =
|
|
5701
|
+
if (fs11.existsSync(gitignorePath)) {
|
|
5702
|
+
content = fs11.readFileSync(gitignorePath, "utf8");
|
|
3170
5703
|
}
|
|
3171
5704
|
const lines = content.split("\n");
|
|
3172
5705
|
const missing = entries.filter((entry) => !lines.some((line) => line.trim() === entry));
|
|
@@ -3177,10 +5710,10 @@ function ensureGitignore(cwd) {
|
|
|
3177
5710
|
# SearchSocket local state
|
|
3178
5711
|
${missing.join("\n")}
|
|
3179
5712
|
`;
|
|
3180
|
-
|
|
5713
|
+
fs11.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
|
|
3181
5714
|
}
|
|
3182
5715
|
function readScopesFromFile(filePath) {
|
|
3183
|
-
const raw =
|
|
5716
|
+
const raw = fs11.readFileSync(filePath, "utf8");
|
|
3184
5717
|
return new Set(
|
|
3185
5718
|
raw.split(/\r?\n/).map((line) => line.trim()).filter(Boolean)
|
|
3186
5719
|
);
|
|
@@ -3204,8 +5737,8 @@ function readRemoteGitBranches(cwd) {
|
|
|
3204
5737
|
}
|
|
3205
5738
|
}
|
|
3206
5739
|
async function loadResolvedConfigForDev(cwd, configPath) {
|
|
3207
|
-
const resolvedConfigPath =
|
|
3208
|
-
if (
|
|
5740
|
+
const resolvedConfigPath = path15.resolve(cwd, configPath ?? "searchsocket.config.ts");
|
|
5741
|
+
if (fs11.existsSync(resolvedConfigPath)) {
|
|
3209
5742
|
return loadConfig({ cwd, configPath });
|
|
3210
5743
|
}
|
|
3211
5744
|
return mergeConfig(cwd, {});
|
|
@@ -3248,31 +5781,157 @@ async function runIndexCommand(opts) {
|
|
|
3248
5781
|
printIndexSummary(stats);
|
|
3249
5782
|
}
|
|
3250
5783
|
}
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
const
|
|
5784
|
+
async function runInteractiveInit(cwd) {
|
|
5785
|
+
clack.intro("searchsocket setup");
|
|
5786
|
+
const s = clack.spinner();
|
|
5787
|
+
s.start("Creating config files");
|
|
5788
|
+
const configPath = writeMinimalConfig(cwd);
|
|
5789
|
+
ensureStateDir(cwd);
|
|
5790
|
+
ensureGitignore(cwd);
|
|
5791
|
+
ensureMcpJson(cwd);
|
|
5792
|
+
s.stop("Config files created");
|
|
5793
|
+
const hasUrl = Boolean(process.env.UPSTASH_VECTOR_REST_URL);
|
|
5794
|
+
const hasToken = Boolean(process.env.UPSTASH_VECTOR_REST_TOKEN);
|
|
5795
|
+
if (!hasUrl || !hasToken) {
|
|
5796
|
+
clack.log.warn("Upstash Search credentials not found in environment.");
|
|
5797
|
+
const shouldConfigure = await clack.confirm({
|
|
5798
|
+
message: "Would you like to configure Upstash credentials now?",
|
|
5799
|
+
initialValue: true
|
|
5800
|
+
});
|
|
5801
|
+
if (clack.isCancel(shouldConfigure)) {
|
|
5802
|
+
clack.cancel("Setup cancelled.");
|
|
5803
|
+
process.exit(0);
|
|
5804
|
+
}
|
|
5805
|
+
if (shouldConfigure) {
|
|
5806
|
+
const url = hasUrl ? process.env.UPSTASH_VECTOR_REST_URL : await clack.text({
|
|
5807
|
+
message: "Upstash Search REST URL:",
|
|
5808
|
+
placeholder: "https://your-index.upstash.io",
|
|
5809
|
+
validate: (v) => !v ? "URL is required" : void 0
|
|
5810
|
+
});
|
|
5811
|
+
if (clack.isCancel(url)) {
|
|
5812
|
+
clack.cancel("Setup cancelled.");
|
|
5813
|
+
process.exit(0);
|
|
5814
|
+
}
|
|
5815
|
+
const token = hasToken ? process.env.UPSTASH_VECTOR_REST_TOKEN : await clack.text({
|
|
5816
|
+
message: "Upstash Search REST Token:",
|
|
5817
|
+
placeholder: "AX...",
|
|
5818
|
+
validate: (v) => !v ? "Token is required" : void 0
|
|
5819
|
+
});
|
|
5820
|
+
if (clack.isCancel(token)) {
|
|
5821
|
+
clack.cancel("Setup cancelled.");
|
|
5822
|
+
process.exit(0);
|
|
5823
|
+
}
|
|
5824
|
+
writeEnvFile(cwd, url, token);
|
|
5825
|
+
clack.log.success("Credentials written to .env");
|
|
5826
|
+
}
|
|
5827
|
+
} else {
|
|
5828
|
+
clack.log.success("Upstash credentials found in environment.");
|
|
5829
|
+
}
|
|
5830
|
+
s.start("Configuring hooks.server.ts");
|
|
5831
|
+
const hookResult = injectHooksServerTs(cwd);
|
|
5832
|
+
s.stop("hooks.server.ts configured");
|
|
5833
|
+
switch (hookResult) {
|
|
5834
|
+
case "created":
|
|
5835
|
+
clack.log.success("Created src/hooks.server.ts with searchsocketHandle.");
|
|
5836
|
+
break;
|
|
5837
|
+
case "injected":
|
|
5838
|
+
clack.log.success("Added searchsocketHandle to src/hooks.server.ts.");
|
|
5839
|
+
break;
|
|
5840
|
+
case "composed":
|
|
5841
|
+
clack.log.success("Composed searchsocketHandle with existing handle using sequence().");
|
|
5842
|
+
break;
|
|
5843
|
+
case "already-present":
|
|
5844
|
+
clack.log.info("searchsocketHandle already configured in hooks.server.ts.");
|
|
5845
|
+
break;
|
|
5846
|
+
case "fallback":
|
|
5847
|
+
clack.log.warn("Could not auto-inject hooks.server.ts. Add manually:");
|
|
5848
|
+
clack.log.message(HOOKS_SNIPPET);
|
|
5849
|
+
break;
|
|
5850
|
+
}
|
|
5851
|
+
s.start("Configuring Vite plugin");
|
|
5852
|
+
const viteResult = injectViteConfig(cwd);
|
|
5853
|
+
s.stop("Vite plugin configured");
|
|
5854
|
+
switch (viteResult) {
|
|
5855
|
+
case "injected":
|
|
5856
|
+
clack.log.success("Added searchsocketVitePlugin to Vite config.");
|
|
5857
|
+
break;
|
|
5858
|
+
case "already-present":
|
|
5859
|
+
clack.log.info("searchsocketVitePlugin already in Vite config.");
|
|
5860
|
+
break;
|
|
5861
|
+
case "no-config":
|
|
5862
|
+
clack.log.warn("No vite.config.ts/js found. Add the plugin manually:");
|
|
5863
|
+
clack.log.message(VITE_PLUGIN_SNIPPET);
|
|
5864
|
+
break;
|
|
5865
|
+
case "fallback":
|
|
5866
|
+
clack.log.warn("Could not auto-inject Vite config. Add manually:");
|
|
5867
|
+
clack.log.message(VITE_PLUGIN_SNIPPET);
|
|
5868
|
+
break;
|
|
5869
|
+
}
|
|
5870
|
+
clack.log.info("Run `searchsocket doctor` to verify your setup.");
|
|
5871
|
+
clack.outro("SearchSocket initialized! Run `searchsocket index` to index your site.");
|
|
5872
|
+
}
|
|
5873
|
+
async function runSilentInit(cwd) {
|
|
3256
5874
|
const configPath = writeMinimalConfig(cwd);
|
|
3257
5875
|
const stateDir = ensureStateDir(cwd);
|
|
3258
5876
|
ensureGitignore(cwd);
|
|
5877
|
+
ensureMcpJson(cwd);
|
|
3259
5878
|
process.stdout.write(`created/verified config: ${configPath}
|
|
3260
5879
|
`);
|
|
3261
5880
|
process.stdout.write(`created/verified state dir: ${stateDir}
|
|
3262
|
-
|
|
3263
5881
|
`);
|
|
3264
|
-
process.stdout.write("
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
|
|
5882
|
+
process.stdout.write("created/verified .mcp.json (MCP server config for Claude Code)\n\n");
|
|
5883
|
+
const hookResult = injectHooksServerTs(cwd);
|
|
5884
|
+
switch (hookResult) {
|
|
5885
|
+
case "created":
|
|
5886
|
+
process.stdout.write("created src/hooks.server.ts with searchsocketHandle\n");
|
|
5887
|
+
break;
|
|
5888
|
+
case "injected":
|
|
5889
|
+
process.stdout.write("added searchsocketHandle to src/hooks.server.ts\n");
|
|
5890
|
+
break;
|
|
5891
|
+
case "composed":
|
|
5892
|
+
process.stdout.write("composed searchsocketHandle with existing handle via sequence()\n");
|
|
5893
|
+
break;
|
|
5894
|
+
case "already-present":
|
|
5895
|
+
process.stdout.write("searchsocketHandle already present in hooks.server.ts\n");
|
|
5896
|
+
break;
|
|
5897
|
+
case "fallback":
|
|
5898
|
+
process.stdout.write("could not auto-inject hooks.server.ts \u2014 add manually:\n\n");
|
|
5899
|
+
process.stdout.write(HOOKS_SNIPPET + "\n\n");
|
|
5900
|
+
break;
|
|
5901
|
+
}
|
|
5902
|
+
const viteResult = injectViteConfig(cwd);
|
|
5903
|
+
switch (viteResult) {
|
|
5904
|
+
case "injected":
|
|
5905
|
+
process.stdout.write("added searchsocketVitePlugin to Vite config\n");
|
|
5906
|
+
break;
|
|
5907
|
+
case "already-present":
|
|
5908
|
+
process.stdout.write("searchsocketVitePlugin already in Vite config\n");
|
|
5909
|
+
break;
|
|
5910
|
+
case "no-config":
|
|
5911
|
+
process.stdout.write("no vite.config.ts/js found \u2014 add plugin manually:\n\n");
|
|
5912
|
+
process.stdout.write(VITE_PLUGIN_SNIPPET + "\n\n");
|
|
5913
|
+
break;
|
|
5914
|
+
case "fallback":
|
|
5915
|
+
process.stdout.write("could not auto-inject Vite config \u2014 add manually:\n\n");
|
|
5916
|
+
process.stdout.write(VITE_PLUGIN_SNIPPET + "\n\n");
|
|
5917
|
+
break;
|
|
5918
|
+
}
|
|
5919
|
+
}
|
|
5920
|
+
var program = new Command();
|
|
5921
|
+
program.name("searchsocket").description("Semantic site search and MCP retrieval for SvelteKit").version(package_default.version).option("-C, --cwd <path>", "working directory", process.cwd()).option("--config <path>", "config path (defaults to searchsocket.config.ts)");
|
|
5922
|
+
program.command("init").description("Initialize SearchSocket in a SvelteKit project").option("--non-interactive", "skip interactive prompts").action(async (opts, command) => {
|
|
5923
|
+
const root = getRootOptions(command).cwd ?? process.cwd();
|
|
5924
|
+
const cwd = path15.resolve(root);
|
|
5925
|
+
const isInteractive = Boolean(process.stdout.isTTY) && !opts.nonInteractive;
|
|
5926
|
+
if (isInteractive) {
|
|
5927
|
+
await runInteractiveInit(cwd);
|
|
5928
|
+
} else {
|
|
5929
|
+
await runSilentInit(cwd);
|
|
5930
|
+
}
|
|
3272
5931
|
});
|
|
3273
5932
|
program.command("index").description("Index site content into Upstash Search").option("--scope <name>", "scope override").option("--changed-only", "only process changed chunks", true).option("--no-changed-only", "re-index regardless of previous manifest").option("--force", "force full rebuild", false).option("--dry-run", "compute plan, no writes", false).option("--source <mode>", "source mode override: static-output|crawl|content-files|build").option("--max-pages <n>", "limit pages processed").option("--max-chunks <n>", "limit chunks processed").option("--quiet", "suppress all output except errors and warnings", false).option("--verbose", "verbose output", false).option("--json", "emit JSON logs and summary", false).action(async (opts, command) => {
|
|
3274
5933
|
const rootOpts = getRootOptions(command);
|
|
3275
|
-
const cwd =
|
|
5934
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3276
5935
|
await runIndexCommand({
|
|
3277
5936
|
cwd,
|
|
3278
5937
|
configPath: rootOpts?.config,
|
|
@@ -3290,7 +5949,7 @@ program.command("index").description("Index site content into Upstash Search").o
|
|
|
3290
5949
|
});
|
|
3291
5950
|
program.command("status").description("Show scope, indexing state, and backend health").option("--scope <name>", "scope override").action(async (opts, command) => {
|
|
3292
5951
|
const rootOpts = getRootOptions(command);
|
|
3293
|
-
const cwd =
|
|
5952
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3294
5953
|
const config = await loadConfig({ cwd, configPath: rootOpts?.config });
|
|
3295
5954
|
const scope = resolveScope(config, opts.scope);
|
|
3296
5955
|
let store;
|
|
@@ -3329,7 +5988,7 @@ program.command("status").description("Show scope, indexing state, and backend h
|
|
|
3329
5988
|
`);
|
|
3330
5989
|
process.stdout.write(`resolved scope: ${scope.scopeName}
|
|
3331
5990
|
`);
|
|
3332
|
-
process.stdout.write(`backend: upstash-
|
|
5991
|
+
process.stdout.write(`backend: upstash-vector
|
|
3333
5992
|
`);
|
|
3334
5993
|
process.stdout.write(`backend health: ${health.ok ? "ok" : `error (${health.details ?? "n/a"})`}
|
|
3335
5994
|
`);
|
|
@@ -3354,19 +6013,31 @@ program.command("status").description("Show scope, indexing state, and backend h
|
|
|
3354
6013
|
}
|
|
3355
6014
|
}
|
|
3356
6015
|
});
|
|
3357
|
-
program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
|
|
6016
|
+
program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--playground", "serve playground UI at /_searchsocket (default: true)", true).option("--no-playground", "disable playground UI").option("--playground-port <n>", "playground HTTP port", "3337").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
|
|
3358
6017
|
const rootOpts = getRootOptions(command);
|
|
3359
|
-
const cwd =
|
|
6018
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3360
6019
|
const config = await loadResolvedConfigForDev(cwd, rootOpts?.config);
|
|
3361
6020
|
const watchPaths = collectWatchPaths(config, cwd);
|
|
3362
6021
|
process.stdout.write("starting searchsocket dev watcher...\n");
|
|
3363
6022
|
process.stdout.write(`watching:
|
|
3364
6023
|
${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
|
|
3365
6024
|
`);
|
|
6025
|
+
const upstashUrl = config.upstash.url ?? process.env[config.upstash.urlEnv];
|
|
6026
|
+
const upstashToken = config.upstash.token ?? process.env[config.upstash.tokenEnv];
|
|
6027
|
+
const backendMissing = !upstashUrl || !upstashToken;
|
|
6028
|
+
if (backendMissing) {
|
|
6029
|
+
process.stdout.write(
|
|
6030
|
+
`Search backend not configured \u2014 set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} to enable indexing. Watching for file changes only.
|
|
6031
|
+
`
|
|
6032
|
+
);
|
|
6033
|
+
}
|
|
3366
6034
|
let running = false;
|
|
3367
6035
|
let pending = false;
|
|
3368
6036
|
let timer = null;
|
|
3369
6037
|
const run = async () => {
|
|
6038
|
+
if (backendMissing) {
|
|
6039
|
+
return;
|
|
6040
|
+
}
|
|
3370
6041
|
if (running) {
|
|
3371
6042
|
pending = true;
|
|
3372
6043
|
return;
|
|
@@ -3417,18 +6088,40 @@ ${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
|
|
|
3417
6088
|
httpPath: opts.mcpPath
|
|
3418
6089
|
});
|
|
3419
6090
|
}
|
|
6091
|
+
let closePlayground;
|
|
6092
|
+
if (opts.playground) {
|
|
6093
|
+
if (backendMissing) {
|
|
6094
|
+
process.stdout.write("playground disabled: search backend not configured\n");
|
|
6095
|
+
} else {
|
|
6096
|
+
void runPlaygroundServer({
|
|
6097
|
+
cwd,
|
|
6098
|
+
configPath: rootOpts?.config,
|
|
6099
|
+
config,
|
|
6100
|
+
port: parsePositiveInt(opts.playgroundPort, "--playground-port")
|
|
6101
|
+
}).then(({ port, close }) => {
|
|
6102
|
+
closePlayground = close;
|
|
6103
|
+
process.stdout.write(`playground available at http://127.0.0.1:${port}/_searchsocket
|
|
6104
|
+
`);
|
|
6105
|
+
}).catch((err) => {
|
|
6106
|
+
process.stderr.write(`playground error: ${err instanceof Error ? err.message : String(err)}
|
|
6107
|
+
`);
|
|
6108
|
+
});
|
|
6109
|
+
}
|
|
6110
|
+
}
|
|
3420
6111
|
await new Promise((resolve) => {
|
|
3421
6112
|
process.on("SIGINT", () => {
|
|
3422
|
-
|
|
6113
|
+
const cleanups = [watcher.close()];
|
|
6114
|
+
if (closePlayground) cleanups.push(closePlayground());
|
|
6115
|
+
void Promise.all(cleanups).then(() => resolve());
|
|
3423
6116
|
});
|
|
3424
6117
|
});
|
|
3425
6118
|
});
|
|
3426
6119
|
program.command("clean").description("Delete local state and optionally delete remote indexes for a scope").option("--scope <name>", "scope override").option("--remote", "delete remote scope indexes", false).action(async (opts, command) => {
|
|
3427
6120
|
const rootOpts = getRootOptions(command);
|
|
3428
|
-
const cwd =
|
|
6121
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3429
6122
|
const config = await loadConfig({ cwd, configPath: rootOpts?.config });
|
|
3430
|
-
const statePath =
|
|
3431
|
-
await
|
|
6123
|
+
const statePath = path15.join(cwd, config.state.dir);
|
|
6124
|
+
await fsp2.rm(statePath, { recursive: true, force: true });
|
|
3432
6125
|
process.stdout.write(`deleted local state directory: ${statePath}
|
|
3433
6126
|
`);
|
|
3434
6127
|
if (opts.remote) {
|
|
@@ -3440,7 +6133,7 @@ program.command("clean").description("Delete local state and optionally delete r
|
|
|
3440
6133
|
});
|
|
3441
6134
|
program.command("prune").description("List/delete stale scopes (dry-run by default)").option("--apply", "apply deletions", false).option("--scopes-file <path>", "file containing active scopes").option("--older-than <duration>", "ttl cutoff like 30d").action(async (opts, command) => {
|
|
3442
6135
|
const rootOpts = getRootOptions(command);
|
|
3443
|
-
const cwd =
|
|
6136
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3444
6137
|
const config = await loadConfig({ cwd, configPath: rootOpts?.config });
|
|
3445
6138
|
const baseScope = resolveScope(config);
|
|
3446
6139
|
let store;
|
|
@@ -3450,17 +6143,17 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
|
|
|
3450
6143
|
scopes = await store.listScopes(config.project.id);
|
|
3451
6144
|
} catch (error) {
|
|
3452
6145
|
process.stderr.write(
|
|
3453
|
-
`error: failed to access Upstash
|
|
6146
|
+
`error: failed to access Upstash Vector: ${error instanceof Error ? error.message : String(error)}
|
|
3454
6147
|
`
|
|
3455
6148
|
);
|
|
3456
6149
|
process.exitCode = 1;
|
|
3457
6150
|
return;
|
|
3458
6151
|
}
|
|
3459
|
-
process.stdout.write(`using Upstash
|
|
6152
|
+
process.stdout.write(`using Upstash Vector
|
|
3460
6153
|
`);
|
|
3461
6154
|
let keepScopes = /* @__PURE__ */ new Set();
|
|
3462
6155
|
if (opts.scopesFile) {
|
|
3463
|
-
keepScopes = readScopesFromFile(
|
|
6156
|
+
keepScopes = readScopesFromFile(path15.resolve(cwd, opts.scopesFile));
|
|
3464
6157
|
} else {
|
|
3465
6158
|
keepScopes = readRemoteGitBranches(cwd);
|
|
3466
6159
|
}
|
|
@@ -3531,7 +6224,7 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
|
|
|
3531
6224
|
});
|
|
3532
6225
|
program.command("doctor").description("Validate config, env vars, provider connectivity, and local write access").action(async (_opts, command) => {
|
|
3533
6226
|
const rootOpts = getRootOptions(command);
|
|
3534
|
-
const cwd =
|
|
6227
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3535
6228
|
const checks = [];
|
|
3536
6229
|
let config = null;
|
|
3537
6230
|
try {
|
|
@@ -3558,8 +6251,8 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3558
6251
|
details: upstashToken ? void 0 : "missing"
|
|
3559
6252
|
});
|
|
3560
6253
|
if (config.source.mode === "static-output") {
|
|
3561
|
-
const outputDir =
|
|
3562
|
-
const exists =
|
|
6254
|
+
const outputDir = path15.resolve(cwd, config.source.staticOutputDir);
|
|
6255
|
+
const exists = fs11.existsSync(outputDir);
|
|
3563
6256
|
checks.push({
|
|
3564
6257
|
name: "source: static output dir",
|
|
3565
6258
|
ok: exists,
|
|
@@ -3568,15 +6261,15 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3568
6261
|
} else if (config.source.mode === "build") {
|
|
3569
6262
|
const buildConfig = config.source.build;
|
|
3570
6263
|
if (buildConfig) {
|
|
3571
|
-
const manifestPath =
|
|
3572
|
-
const manifestExists =
|
|
6264
|
+
const manifestPath = path15.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
|
|
6265
|
+
const manifestExists = fs11.existsSync(manifestPath);
|
|
3573
6266
|
checks.push({
|
|
3574
6267
|
name: "source: build manifest",
|
|
3575
6268
|
ok: manifestExists,
|
|
3576
6269
|
details: manifestExists ? manifestPath : `${manifestPath} not found (run \`vite build\` first)`
|
|
3577
6270
|
});
|
|
3578
|
-
const viteBin =
|
|
3579
|
-
const viteExists =
|
|
6271
|
+
const viteBin = path15.resolve(cwd, "node_modules", ".bin", "vite");
|
|
6272
|
+
const viteExists = fs11.existsSync(viteBin);
|
|
3580
6273
|
checks.push({
|
|
3581
6274
|
name: "source: vite binary",
|
|
3582
6275
|
ok: viteExists,
|
|
@@ -3593,7 +6286,7 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3593
6286
|
const contentConfig = config.source.contentFiles;
|
|
3594
6287
|
if (contentConfig) {
|
|
3595
6288
|
const fg4 = await import("fast-glob");
|
|
3596
|
-
const baseDir =
|
|
6289
|
+
const baseDir = path15.resolve(cwd, contentConfig.baseDir);
|
|
3597
6290
|
const files = await fg4.default(contentConfig.globs, { cwd: baseDir, onlyFiles: true });
|
|
3598
6291
|
checks.push({
|
|
3599
6292
|
name: "source: content files",
|
|
@@ -3627,9 +6320,9 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3627
6320
|
try {
|
|
3628
6321
|
const scope = resolveScope(config);
|
|
3629
6322
|
const { statePath } = ensureStateDirs(cwd, config.state.dir, scope);
|
|
3630
|
-
const testPath =
|
|
3631
|
-
await
|
|
3632
|
-
await
|
|
6323
|
+
const testPath = path15.join(statePath, ".write-test");
|
|
6324
|
+
await fsp2.writeFile(testPath, "ok\n", "utf8");
|
|
6325
|
+
await fsp2.rm(testPath, { force: true });
|
|
3633
6326
|
checks.push({ name: "state directory writable", ok: true });
|
|
3634
6327
|
} catch (error) {
|
|
3635
6328
|
checks.push({
|
|
@@ -3654,20 +6347,22 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3654
6347
|
process.exitCode = 1;
|
|
3655
6348
|
}
|
|
3656
6349
|
});
|
|
3657
|
-
program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").action(async (opts, command) => {
|
|
6350
|
+
program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").addOption(new Option("--access <mode>", "access mode").choices(["public", "private"])).option("--api-key <key>", "API key for public access mode").action(async (opts, command) => {
|
|
3658
6351
|
const rootOpts = getRootOptions(command);
|
|
3659
|
-
const cwd =
|
|
6352
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3660
6353
|
await runMcpServer({
|
|
3661
6354
|
cwd,
|
|
3662
6355
|
configPath: rootOpts?.config,
|
|
3663
6356
|
transport: opts.transport,
|
|
3664
6357
|
httpPort: parsePositiveInt(opts.port, "--port"),
|
|
3665
|
-
httpPath: opts.path
|
|
6358
|
+
httpPath: opts.path,
|
|
6359
|
+
access: opts.access,
|
|
6360
|
+
apiKey: opts.apiKey
|
|
3666
6361
|
});
|
|
3667
6362
|
});
|
|
3668
6363
|
program.command("search").description("Quick CLI search against Upstash Search").requiredOption("--q <query>", "search query").option("--scope <name>", "scope override").option("--top-k <n>", "top K results", "10").option("--path-prefix <prefix>", "path prefix filter").action(async (opts, command) => {
|
|
3669
6364
|
const rootOpts = getRootOptions(command);
|
|
3670
|
-
const cwd =
|
|
6365
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3671
6366
|
const engine = await SearchEngine.create({
|
|
3672
6367
|
cwd,
|
|
3673
6368
|
configPath: rootOpts?.config
|
|
@@ -3681,8 +6376,156 @@ program.command("search").description("Quick CLI search against Upstash Search")
|
|
|
3681
6376
|
process.stdout.write(`${JSON.stringify(result, null, 2)}
|
|
3682
6377
|
`);
|
|
3683
6378
|
});
|
|
6379
|
+
program.command("test").description("Run search quality assertions against the live index").option("--file <path>", "path to test file", "searchsocket.test.json").option("--scope <name>", "scope override").option("--top-k <n>", "results per query", "10").action(async (opts, command) => {
|
|
6380
|
+
const rootOpts = getRootOptions(command);
|
|
6381
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
6382
|
+
const topK = parsePositiveInt(opts.topK, "--top-k");
|
|
6383
|
+
const filePath = path15.resolve(cwd, opts.file);
|
|
6384
|
+
let rawContent;
|
|
6385
|
+
try {
|
|
6386
|
+
rawContent = await fsp2.readFile(filePath, "utf8");
|
|
6387
|
+
} catch {
|
|
6388
|
+
process.stderr.write(`error: test file not found: ${filePath}
|
|
6389
|
+
`);
|
|
6390
|
+
process.exitCode = 1;
|
|
6391
|
+
return;
|
|
6392
|
+
}
|
|
6393
|
+
let rawJson;
|
|
6394
|
+
try {
|
|
6395
|
+
rawJson = JSON.parse(rawContent);
|
|
6396
|
+
} catch {
|
|
6397
|
+
process.stderr.write(`error: invalid JSON in ${filePath}
|
|
6398
|
+
`);
|
|
6399
|
+
process.exitCode = 1;
|
|
6400
|
+
return;
|
|
6401
|
+
}
|
|
6402
|
+
const parsed = testFileSchema.safeParse(rawJson);
|
|
6403
|
+
if (!parsed.success) {
|
|
6404
|
+
process.stderr.write(`error: invalid test file: ${parsed.error.issues[0]?.message ?? "unknown error"}
|
|
6405
|
+
`);
|
|
6406
|
+
process.exitCode = 1;
|
|
6407
|
+
return;
|
|
6408
|
+
}
|
|
6409
|
+
const testCases = parsed.data;
|
|
6410
|
+
const engine = await SearchEngine.create({
|
|
6411
|
+
cwd,
|
|
6412
|
+
configPath: rootOpts?.config
|
|
6413
|
+
});
|
|
6414
|
+
let passed = 0;
|
|
6415
|
+
let failed = 0;
|
|
6416
|
+
const mrrData = [];
|
|
6417
|
+
for (const tc of testCases) {
|
|
6418
|
+
let results;
|
|
6419
|
+
try {
|
|
6420
|
+
const response = await engine.search({
|
|
6421
|
+
q: tc.query,
|
|
6422
|
+
topK,
|
|
6423
|
+
scope: opts.scope
|
|
6424
|
+
});
|
|
6425
|
+
results = response.results;
|
|
6426
|
+
} catch (error) {
|
|
6427
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
6428
|
+
process.stdout.write(`FAIL "${tc.query}" \u2192 search error: ${msg}
|
|
6429
|
+
`);
|
|
6430
|
+
failed++;
|
|
6431
|
+
continue;
|
|
6432
|
+
}
|
|
6433
|
+
if (tc.expect.topResult !== void 0) {
|
|
6434
|
+
const expectedUrl = tc.expect.topResult;
|
|
6435
|
+
const rank = results.findIndex((r) => r.url === expectedUrl) + 1;
|
|
6436
|
+
mrrData.push({ results, relevant: [expectedUrl] });
|
|
6437
|
+
if (rank === 1) {
|
|
6438
|
+
process.stdout.write(`PASS "${tc.query}" \u2192 ${expectedUrl} at rank 1
|
|
6439
|
+
`);
|
|
6440
|
+
passed++;
|
|
6441
|
+
} else {
|
|
6442
|
+
const detail = rank === 0 ? "not found" : `got rank ${rank}`;
|
|
6443
|
+
process.stdout.write(`FAIL "${tc.query}" \u2192 expected ${expectedUrl} at rank 1, ${detail}
|
|
6444
|
+
`);
|
|
6445
|
+
failed++;
|
|
6446
|
+
}
|
|
6447
|
+
}
|
|
6448
|
+
if (tc.expect.inTop5 !== void 0) {
|
|
6449
|
+
const expectedUrls = tc.expect.inTop5;
|
|
6450
|
+
const top5Urls = results.slice(0, 5).map((r) => r.url);
|
|
6451
|
+
const missing = expectedUrls.filter((url) => !top5Urls.includes(url));
|
|
6452
|
+
mrrData.push({ results, relevant: expectedUrls });
|
|
6453
|
+
if (missing.length === 0) {
|
|
6454
|
+
process.stdout.write(`PASS "${tc.query}" \u2192 all expected URLs in top 5
|
|
6455
|
+
`);
|
|
6456
|
+
passed++;
|
|
6457
|
+
} else {
|
|
6458
|
+
const missingDetail = missing.map((url) => {
|
|
6459
|
+
const rank = results.findIndex((r) => r.url === url) + 1;
|
|
6460
|
+
return rank === 0 ? `${url} (not found)` : `${url} (rank ${rank})`;
|
|
6461
|
+
}).join(", ");
|
|
6462
|
+
process.stdout.write(`FAIL "${tc.query}" \u2192 missing from top 5: ${missingDetail}
|
|
6463
|
+
`);
|
|
6464
|
+
failed++;
|
|
6465
|
+
}
|
|
6466
|
+
}
|
|
6467
|
+
if (tc.expect.maxResults !== void 0) {
|
|
6468
|
+
const max = tc.expect.maxResults;
|
|
6469
|
+
const actual = results.length;
|
|
6470
|
+
if (actual <= max) {
|
|
6471
|
+
process.stdout.write(`PASS "${tc.query}" \u2192 ${actual} results (max ${max})
|
|
6472
|
+
`);
|
|
6473
|
+
passed++;
|
|
6474
|
+
} else {
|
|
6475
|
+
process.stdout.write(`FAIL "${tc.query}" \u2192 expected at most ${max} results, got ${actual}
|
|
6476
|
+
`);
|
|
6477
|
+
failed++;
|
|
6478
|
+
}
|
|
6479
|
+
}
|
|
6480
|
+
}
|
|
6481
|
+
const total = passed + failed;
|
|
6482
|
+
process.stdout.write(`
|
|
6483
|
+
results: ${passed} passed, ${failed} failed of ${total} assertions
|
|
6484
|
+
`);
|
|
6485
|
+
if (mrrData.length > 0) {
|
|
6486
|
+
const mrrValue = mrr(mrrData);
|
|
6487
|
+
process.stdout.write(`MRR: ${mrrValue.toFixed(4)}
|
|
6488
|
+
`);
|
|
6489
|
+
}
|
|
6490
|
+
process.stdout.write(`pass rate: ${total > 0 ? (passed / total * 100).toFixed(1) : "0.0"}%
|
|
6491
|
+
`);
|
|
6492
|
+
if (failed > 0) {
|
|
6493
|
+
process.exitCode = 1;
|
|
6494
|
+
}
|
|
6495
|
+
});
|
|
6496
|
+
program.command("add <component>").description("Copy a Svelte 5 search component template into your project").option("--dir <path>", "output directory", "src/lib/components/search").option("--overwrite", "overwrite existing files", false).action(async (component, opts, command) => {
|
|
6497
|
+
const root = getRootOptions(command).cwd ?? process.cwd();
|
|
6498
|
+
const cwd = path15.resolve(root);
|
|
6499
|
+
if (!isValidComponent(component)) {
|
|
6500
|
+
const available = listAvailableComponents();
|
|
6501
|
+
process.stderr.write(`unknown component: ${component}
|
|
6502
|
+
`);
|
|
6503
|
+
process.stderr.write(`available components: ${available.join(", ")}
|
|
6504
|
+
`);
|
|
6505
|
+
process.exit(1);
|
|
6506
|
+
}
|
|
6507
|
+
const targetDir = path15.resolve(cwd, opts.dir);
|
|
6508
|
+
const result = await copyComponent(component, targetDir, { overwrite: opts.overwrite });
|
|
6509
|
+
for (const filePath of result.written) {
|
|
6510
|
+
process.stdout.write(`created: ${path15.relative(cwd, filePath)}
|
|
6511
|
+
`);
|
|
6512
|
+
}
|
|
6513
|
+
for (const filePath of result.skipped) {
|
|
6514
|
+
process.stdout.write(`skipped (exists): ${path15.relative(cwd, filePath)}
|
|
6515
|
+
`);
|
|
6516
|
+
}
|
|
6517
|
+
const firstWritten = result.written[0];
|
|
6518
|
+
if (firstWritten) {
|
|
6519
|
+
process.stdout.write(`
|
|
6520
|
+
Usage:
|
|
6521
|
+
`);
|
|
6522
|
+
const fileName = path15.basename(firstWritten, ".svelte");
|
|
6523
|
+
process.stdout.write(` import ${fileName} from "${path15.relative(cwd, firstWritten).replace(/\\/g, "/")}";
|
|
6524
|
+
`);
|
|
6525
|
+
}
|
|
6526
|
+
});
|
|
3684
6527
|
async function main() {
|
|
3685
|
-
dotenvConfig({ path:
|
|
6528
|
+
dotenvConfig({ path: path15.resolve(process.cwd(), ".env") });
|
|
3686
6529
|
await program.parseAsync(process.argv);
|
|
3687
6530
|
}
|
|
3688
6531
|
main().catch((error) => {
|