searchsocket 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +731 -514
- package/dist/cli.js +3308 -524
- package/dist/client.d.cts +1 -1
- package/dist/client.d.ts +1 -1
- package/dist/index.cjs +2310 -466
- package/dist/index.d.cts +101 -40
- package/dist/index.d.ts +101 -40
- package/dist/index.js +2310 -466
- package/dist/{plugin-B_npJSux.d.cts → plugin-C61L-ykY.d.ts} +2 -1
- package/dist/{plugin-M-aW0ev6.d.ts → plugin-DoBW1gkK.d.cts} +2 -1
- package/dist/sveltekit.cjs +2342 -465
- package/dist/sveltekit.d.cts +2 -2
- package/dist/sveltekit.d.ts +2 -2
- package/dist/sveltekit.js +2344 -467
- package/dist/templates/search-dialog/SearchDialog.svelte +175 -0
- package/dist/templates/search-input/SearchInput.svelte +151 -0
- package/dist/templates/search-results/SearchResults.svelte +75 -0
- package/dist/{types-Dk43uz25.d.cts → types-029hl6P2.d.cts} +180 -9
- package/dist/{types-Dk43uz25.d.ts → types-029hl6P2.d.ts} +180 -9
- package/package.json +20 -2
- package/src/svelte/SearchSocket.svelte +35 -0
- package/src/svelte/index.svelte.ts +181 -0
package/dist/cli.js
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/cli.ts
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import
|
|
4
|
+
import fs11 from "fs";
|
|
5
|
+
import fsp2 from "fs/promises";
|
|
6
|
+
import path15 from "path";
|
|
7
7
|
import { execSync as execSync2 } from "child_process";
|
|
8
8
|
import { config as dotenvConfig } from "dotenv";
|
|
9
9
|
import chokidar from "chokidar";
|
|
10
|
-
import { Command } from "commander";
|
|
10
|
+
import { Command, Option } from "commander";
|
|
11
11
|
|
|
12
12
|
// package.json
|
|
13
13
|
var package_default = {
|
|
14
14
|
name: "searchsocket",
|
|
15
|
-
version: "0.
|
|
15
|
+
version: "0.6.0",
|
|
16
16
|
description: "Semantic site search and MCP retrieval for SvelteKit static sites",
|
|
17
17
|
license: "MIT",
|
|
18
18
|
author: "Greg Priday <greg@siteorigin.com>",
|
|
@@ -38,6 +38,7 @@ var package_default = {
|
|
|
38
38
|
files: [
|
|
39
39
|
"dist",
|
|
40
40
|
"!dist/**/*.map",
|
|
41
|
+
"src/svelte",
|
|
41
42
|
"README.md"
|
|
42
43
|
],
|
|
43
44
|
bin: {
|
|
@@ -63,6 +64,19 @@ var package_default = {
|
|
|
63
64
|
types: "./dist/scroll.d.ts",
|
|
64
65
|
import: "./dist/scroll.js",
|
|
65
66
|
require: "./dist/scroll.cjs"
|
|
67
|
+
},
|
|
68
|
+
"./svelte": {
|
|
69
|
+
types: "./src/svelte/index.svelte.ts",
|
|
70
|
+
svelte: "./src/svelte/index.svelte.ts",
|
|
71
|
+
default: "./src/svelte/index.svelte.ts"
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
peerDependencies: {
|
|
75
|
+
svelte: "^5.0.0"
|
|
76
|
+
},
|
|
77
|
+
peerDependenciesMeta: {
|
|
78
|
+
svelte: {
|
|
79
|
+
optional: true
|
|
66
80
|
}
|
|
67
81
|
},
|
|
68
82
|
scripts: {
|
|
@@ -78,8 +92,9 @@ var package_default = {
|
|
|
78
92
|
},
|
|
79
93
|
packageManager: "pnpm@10.29.2",
|
|
80
94
|
dependencies: {
|
|
95
|
+
"@clack/prompts": "^1.2.0",
|
|
81
96
|
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
82
|
-
"@upstash/
|
|
97
|
+
"@upstash/vector": "^1.2.3",
|
|
83
98
|
cheerio: "^1.2.0",
|
|
84
99
|
chokidar: "^5.0.0",
|
|
85
100
|
commander: "^14.0.3",
|
|
@@ -88,16 +103,19 @@ var package_default = {
|
|
|
88
103
|
"fast-glob": "^3.3.3",
|
|
89
104
|
"gray-matter": "^4.0.3",
|
|
90
105
|
jiti: "^2.6.1",
|
|
106
|
+
magicast: "^0.5.2",
|
|
91
107
|
"p-limit": "^7.3.0",
|
|
92
108
|
turndown: "^7.2.2",
|
|
93
109
|
"turndown-plugin-gfm": "^1.0.2",
|
|
94
110
|
zod: "^4.3.6"
|
|
95
111
|
},
|
|
96
112
|
devDependencies: {
|
|
113
|
+
"@sveltejs/vite-plugin-svelte": "^6.2.4",
|
|
97
114
|
"@types/express": "^5.0.6",
|
|
98
115
|
"@types/node": "^25.2.2",
|
|
99
116
|
"@types/turndown": "^5.0.6",
|
|
100
117
|
jsdom: "^28.1.0",
|
|
118
|
+
svelte: "^5.55.1",
|
|
101
119
|
tsup: "^8.5.1",
|
|
102
120
|
typescript: "^5.9.3",
|
|
103
121
|
vitest: "^4.0.18"
|
|
@@ -154,6 +172,7 @@ var searchSocketConfigSchema = z.object({
|
|
|
154
172
|
dropSelectors: z.array(z.string()).optional(),
|
|
155
173
|
ignoreAttr: z.string().optional(),
|
|
156
174
|
noindexAttr: z.string().optional(),
|
|
175
|
+
imageDescAttr: z.string().optional(),
|
|
157
176
|
respectRobotsNoindex: z.boolean().optional()
|
|
158
177
|
}).optional(),
|
|
159
178
|
transform: z.object({
|
|
@@ -169,35 +188,48 @@ var searchSocketConfigSchema = z.object({
|
|
|
169
188
|
headingPathDepth: z.number().int().positive().optional(),
|
|
170
189
|
dontSplitInside: z.array(z.enum(["code", "table", "blockquote"])).optional(),
|
|
171
190
|
prependTitle: z.boolean().optional(),
|
|
172
|
-
pageSummaryChunk: z.boolean().optional()
|
|
191
|
+
pageSummaryChunk: z.boolean().optional(),
|
|
192
|
+
weightHeadings: z.boolean().optional()
|
|
173
193
|
}).optional(),
|
|
174
194
|
upstash: z.object({
|
|
175
195
|
url: z.string().url().optional(),
|
|
176
196
|
token: z.string().min(1).optional(),
|
|
177
197
|
urlEnv: z.string().min(1).optional(),
|
|
178
|
-
tokenEnv: z.string().min(1).optional()
|
|
198
|
+
tokenEnv: z.string().min(1).optional(),
|
|
199
|
+
namespaces: z.object({
|
|
200
|
+
pages: z.string().min(1).optional(),
|
|
201
|
+
chunks: z.string().min(1).optional()
|
|
202
|
+
}).optional()
|
|
203
|
+
}).optional(),
|
|
204
|
+
embedding: z.object({
|
|
205
|
+
model: z.string().optional(),
|
|
206
|
+
dimensions: z.number().int().positive().optional(),
|
|
207
|
+
taskType: z.string().optional(),
|
|
208
|
+
batchSize: z.number().int().positive().optional()
|
|
179
209
|
}).optional(),
|
|
180
210
|
search: z.object({
|
|
181
|
-
semanticWeight: z.number().min(0).max(1).optional(),
|
|
182
|
-
inputEnrichment: z.boolean().optional(),
|
|
183
|
-
reranking: z.boolean().optional(),
|
|
184
211
|
dualSearch: z.boolean().optional(),
|
|
185
212
|
pageSearchWeight: z.number().min(0).max(1).optional()
|
|
186
213
|
}).optional(),
|
|
187
214
|
ranking: z.object({
|
|
188
215
|
enableIncomingLinkBoost: z.boolean().optional(),
|
|
189
216
|
enableDepthBoost: z.boolean().optional(),
|
|
217
|
+
enableFreshnessBoost: z.boolean().optional(),
|
|
218
|
+
freshnessDecayRate: z.number().positive().optional(),
|
|
219
|
+
enableAnchorTextBoost: z.boolean().optional(),
|
|
190
220
|
pageWeights: z.record(z.string(), z.number().min(0)).optional(),
|
|
191
221
|
aggregationCap: z.number().int().positive().optional(),
|
|
192
222
|
aggregationDecay: z.number().min(0).max(1).optional(),
|
|
193
223
|
minChunkScoreRatio: z.number().min(0).max(1).optional(),
|
|
194
|
-
|
|
224
|
+
minScoreRatio: z.number().min(0).max(1).optional(),
|
|
195
225
|
scoreGapThreshold: z.number().min(0).max(1).optional(),
|
|
196
226
|
weights: z.object({
|
|
197
227
|
incomingLinks: z.number().optional(),
|
|
198
228
|
depth: z.number().optional(),
|
|
199
229
|
aggregation: z.number().optional(),
|
|
200
|
-
titleMatch: z.number().optional()
|
|
230
|
+
titleMatch: z.number().optional(),
|
|
231
|
+
freshness: z.number().optional(),
|
|
232
|
+
anchorText: z.number().optional()
|
|
201
233
|
}).optional()
|
|
202
234
|
}).optional(),
|
|
203
235
|
api: z.object({
|
|
@@ -212,12 +244,28 @@ var searchSocketConfigSchema = z.object({
|
|
|
212
244
|
}).optional(),
|
|
213
245
|
mcp: z.object({
|
|
214
246
|
enable: z.boolean().optional(),
|
|
247
|
+
access: z.enum(["public", "private"]).optional(),
|
|
215
248
|
transport: z.enum(["stdio", "http"]).optional(),
|
|
216
249
|
http: z.object({
|
|
217
250
|
port: z.number().int().positive().optional(),
|
|
218
|
-
path: z.string().optional()
|
|
251
|
+
path: z.string().optional(),
|
|
252
|
+
apiKey: z.string().min(1).optional(),
|
|
253
|
+
apiKeyEnv: z.string().min(1).optional()
|
|
254
|
+
}).optional(),
|
|
255
|
+
handle: z.object({
|
|
256
|
+
path: z.string().optional(),
|
|
257
|
+
apiKey: z.string().min(1).optional(),
|
|
258
|
+
enableJsonResponse: z.boolean().optional()
|
|
219
259
|
}).optional()
|
|
220
260
|
}).optional(),
|
|
261
|
+
llmsTxt: z.object({
|
|
262
|
+
enable: z.boolean().optional(),
|
|
263
|
+
outputPath: z.string().optional(),
|
|
264
|
+
title: z.string().optional(),
|
|
265
|
+
description: z.string().optional(),
|
|
266
|
+
generateFull: z.boolean().optional(),
|
|
267
|
+
serveMarkdownVariants: z.boolean().optional()
|
|
268
|
+
}).optional(),
|
|
221
269
|
state: z.object({
|
|
222
270
|
dir: z.string().optional()
|
|
223
271
|
}).optional()
|
|
@@ -256,6 +304,7 @@ function createDefaultConfig(projectId) {
|
|
|
256
304
|
dropSelectors: DEFAULT_DROP_SELECTORS,
|
|
257
305
|
ignoreAttr: "data-search-ignore",
|
|
258
306
|
noindexAttr: "data-search-noindex",
|
|
307
|
+
imageDescAttr: "data-search-description",
|
|
259
308
|
respectRobotsNoindex: true
|
|
260
309
|
},
|
|
261
310
|
transform: {
|
|
@@ -265,39 +314,52 @@ function createDefaultConfig(projectId) {
|
|
|
265
314
|
},
|
|
266
315
|
chunking: {
|
|
267
316
|
strategy: "hybrid",
|
|
268
|
-
maxChars:
|
|
317
|
+
maxChars: 1500,
|
|
269
318
|
overlapChars: 200,
|
|
270
319
|
minChars: 250,
|
|
271
320
|
headingPathDepth: 3,
|
|
272
321
|
dontSplitInside: ["code", "table", "blockquote"],
|
|
273
322
|
prependTitle: true,
|
|
274
|
-
pageSummaryChunk: true
|
|
323
|
+
pageSummaryChunk: true,
|
|
324
|
+
weightHeadings: true
|
|
275
325
|
},
|
|
276
326
|
upstash: {
|
|
277
|
-
urlEnv: "
|
|
278
|
-
tokenEnv: "
|
|
327
|
+
urlEnv: "UPSTASH_VECTOR_REST_URL",
|
|
328
|
+
tokenEnv: "UPSTASH_VECTOR_REST_TOKEN",
|
|
329
|
+
namespaces: {
|
|
330
|
+
pages: "pages",
|
|
331
|
+
chunks: "chunks"
|
|
332
|
+
}
|
|
333
|
+
},
|
|
334
|
+
embedding: {
|
|
335
|
+
model: "bge-large-en-v1.5",
|
|
336
|
+
dimensions: 1024,
|
|
337
|
+
taskType: "RETRIEVAL_DOCUMENT",
|
|
338
|
+
batchSize: 100
|
|
279
339
|
},
|
|
280
340
|
search: {
|
|
281
|
-
semanticWeight: 0.75,
|
|
282
|
-
inputEnrichment: true,
|
|
283
|
-
reranking: true,
|
|
284
341
|
dualSearch: true,
|
|
285
342
|
pageSearchWeight: 0.3
|
|
286
343
|
},
|
|
287
344
|
ranking: {
|
|
288
345
|
enableIncomingLinkBoost: true,
|
|
289
346
|
enableDepthBoost: true,
|
|
347
|
+
enableFreshnessBoost: false,
|
|
348
|
+
freshnessDecayRate: 1e-3,
|
|
349
|
+
enableAnchorTextBoost: false,
|
|
290
350
|
pageWeights: {},
|
|
291
351
|
aggregationCap: 5,
|
|
292
352
|
aggregationDecay: 0.5,
|
|
293
353
|
minChunkScoreRatio: 0.5,
|
|
294
|
-
|
|
354
|
+
minScoreRatio: 0.7,
|
|
295
355
|
scoreGapThreshold: 0.4,
|
|
296
356
|
weights: {
|
|
297
357
|
incomingLinks: 0.05,
|
|
298
358
|
depth: 0.03,
|
|
299
359
|
aggregation: 0.1,
|
|
300
|
-
titleMatch: 0.15
|
|
360
|
+
titleMatch: 0.15,
|
|
361
|
+
freshness: 0.1,
|
|
362
|
+
anchorText: 0.1
|
|
301
363
|
}
|
|
302
364
|
},
|
|
303
365
|
api: {
|
|
@@ -308,12 +370,23 @@ function createDefaultConfig(projectId) {
|
|
|
308
370
|
},
|
|
309
371
|
mcp: {
|
|
310
372
|
enable: process.env.NODE_ENV !== "production",
|
|
373
|
+
access: "private",
|
|
311
374
|
transport: "stdio",
|
|
312
375
|
http: {
|
|
313
376
|
port: 3338,
|
|
314
377
|
path: "/mcp"
|
|
378
|
+
},
|
|
379
|
+
handle: {
|
|
380
|
+
path: "/api/mcp",
|
|
381
|
+
enableJsonResponse: true
|
|
315
382
|
}
|
|
316
383
|
},
|
|
384
|
+
llmsTxt: {
|
|
385
|
+
enable: false,
|
|
386
|
+
outputPath: "static/llms.txt",
|
|
387
|
+
generateFull: true,
|
|
388
|
+
serveMarkdownVariants: false
|
|
389
|
+
},
|
|
317
390
|
state: {
|
|
318
391
|
dir: ".searchsocket"
|
|
319
392
|
}
|
|
@@ -425,7 +498,15 @@ ${issues}`
|
|
|
425
498
|
},
|
|
426
499
|
upstash: {
|
|
427
500
|
...defaults.upstash,
|
|
428
|
-
...parsed.upstash
|
|
501
|
+
...parsed.upstash,
|
|
502
|
+
namespaces: {
|
|
503
|
+
...defaults.upstash.namespaces,
|
|
504
|
+
...parsed.upstash?.namespaces
|
|
505
|
+
}
|
|
506
|
+
},
|
|
507
|
+
embedding: {
|
|
508
|
+
...defaults.embedding,
|
|
509
|
+
...parsed.embedding
|
|
429
510
|
},
|
|
430
511
|
search: {
|
|
431
512
|
...defaults.search,
|
|
@@ -462,8 +543,16 @@ ${issues}`
|
|
|
462
543
|
http: {
|
|
463
544
|
...defaults.mcp.http,
|
|
464
545
|
...parsed.mcp?.http
|
|
546
|
+
},
|
|
547
|
+
handle: {
|
|
548
|
+
...defaults.mcp.handle,
|
|
549
|
+
...parsed.mcp?.handle
|
|
465
550
|
}
|
|
466
551
|
},
|
|
552
|
+
llmsTxt: {
|
|
553
|
+
...defaults.llmsTxt,
|
|
554
|
+
...parsed.llmsTxt
|
|
555
|
+
},
|
|
467
556
|
state: {
|
|
468
557
|
...defaults.state,
|
|
469
558
|
...parsed.state
|
|
@@ -483,6 +572,15 @@ ${issues}`
|
|
|
483
572
|
maxDepth: 10
|
|
484
573
|
};
|
|
485
574
|
}
|
|
575
|
+
if (merged.mcp.access === "public") {
|
|
576
|
+
const resolvedKey = merged.mcp.http.apiKey ?? (merged.mcp.http.apiKeyEnv ? process.env[merged.mcp.http.apiKeyEnv] : void 0);
|
|
577
|
+
if (!resolvedKey) {
|
|
578
|
+
throw new SearchSocketError(
|
|
579
|
+
"CONFIG_MISSING",
|
|
580
|
+
'`mcp.access` is "public" but no API key is configured. Set `mcp.http.apiKey` or `mcp.http.apiKeyEnv`.'
|
|
581
|
+
);
|
|
582
|
+
}
|
|
583
|
+
}
|
|
486
584
|
if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
|
|
487
585
|
throw new SearchSocketError("CONFIG_MISSING", "`source.crawl.baseUrl` is required when source.mode is crawl.");
|
|
488
586
|
}
|
|
@@ -521,8 +619,8 @@ function writeMinimalConfig(cwd) {
|
|
|
521
619
|
return target;
|
|
522
620
|
}
|
|
523
621
|
const content = `export default {
|
|
524
|
-
// Upstash
|
|
525
|
-
// upstash: { urlEnv: "
|
|
622
|
+
// Upstash Vector credentials (set via env vars or directly here)
|
|
623
|
+
// upstash: { urlEnv: "UPSTASH_VECTOR_REST_URL", tokenEnv: "UPSTASH_VECTOR_REST_TOKEN" }
|
|
526
624
|
};
|
|
527
625
|
`;
|
|
528
626
|
fs.writeFileSync(target, content, "utf8");
|
|
@@ -586,11 +684,11 @@ var Logger = class {
|
|
|
586
684
|
this.writeOut(` [${event}] ${data ? JSON.stringify(data) : ""}
|
|
587
685
|
`);
|
|
588
686
|
}
|
|
589
|
-
writeOut(
|
|
687
|
+
writeOut(text2) {
|
|
590
688
|
if (this.stderrOnly) {
|
|
591
|
-
process.stderr.write(
|
|
689
|
+
process.stderr.write(text2);
|
|
592
690
|
} else {
|
|
593
|
-
process.stdout.write(
|
|
691
|
+
process.stdout.write(text2);
|
|
594
692
|
}
|
|
595
693
|
}
|
|
596
694
|
logJson(event, data) {
|
|
@@ -617,13 +715,84 @@ function normalizeMarkdown(input) {
|
|
|
617
715
|
function sanitizeScopeName(scopeName) {
|
|
618
716
|
return scopeName.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
|
|
619
717
|
}
|
|
718
|
+
function markdownToPlain(markdown) {
|
|
719
|
+
return markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
|
|
720
|
+
}
|
|
620
721
|
function toSnippet(markdown, maxLen = 220) {
|
|
621
|
-
const plain = markdown
|
|
722
|
+
const plain = markdownToPlain(markdown);
|
|
622
723
|
if (plain.length <= maxLen) {
|
|
623
724
|
return plain;
|
|
624
725
|
}
|
|
625
726
|
return `${plain.slice(0, Math.max(0, maxLen - 1)).trim()}\u2026`;
|
|
626
727
|
}
|
|
728
|
+
function queryAwareExcerpt(markdown, query, maxLen = 220) {
|
|
729
|
+
const plain = markdownToPlain(markdown);
|
|
730
|
+
if (plain.length <= maxLen) return plain;
|
|
731
|
+
const tokens = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
|
|
732
|
+
if (tokens.length === 0) return toSnippet(markdown, maxLen);
|
|
733
|
+
const positions = [];
|
|
734
|
+
for (let ti = 0; ti < tokens.length; ti++) {
|
|
735
|
+
const escaped = tokens[ti].replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
736
|
+
const re = new RegExp(escaped, "gi");
|
|
737
|
+
let m;
|
|
738
|
+
while ((m = re.exec(plain)) !== null) {
|
|
739
|
+
positions.push({ start: m.index, end: m.index + m[0].length, tokenIdx: ti });
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
if (positions.length === 0) return toSnippet(markdown, maxLen);
|
|
743
|
+
positions.sort((a, b) => a.start - b.start);
|
|
744
|
+
let bestUniqueCount = 0;
|
|
745
|
+
let bestTotalCount = 0;
|
|
746
|
+
let bestLeft = 0;
|
|
747
|
+
let bestRight = 0;
|
|
748
|
+
let left = 0;
|
|
749
|
+
const tokenCounts = /* @__PURE__ */ new Map();
|
|
750
|
+
for (let right = 0; right < positions.length; right++) {
|
|
751
|
+
tokenCounts.set(positions[right].tokenIdx, (tokenCounts.get(positions[right].tokenIdx) ?? 0) + 1);
|
|
752
|
+
while (positions[right].end - positions[left].start > maxLen && left < right) {
|
|
753
|
+
const leftToken = positions[left].tokenIdx;
|
|
754
|
+
const cnt = tokenCounts.get(leftToken) - 1;
|
|
755
|
+
if (cnt === 0) tokenCounts.delete(leftToken);
|
|
756
|
+
else tokenCounts.set(leftToken, cnt);
|
|
757
|
+
left++;
|
|
758
|
+
}
|
|
759
|
+
const uniqueCount = tokenCounts.size;
|
|
760
|
+
const totalCount = right - left + 1;
|
|
761
|
+
if (uniqueCount > bestUniqueCount || uniqueCount === bestUniqueCount && totalCount > bestTotalCount) {
|
|
762
|
+
bestUniqueCount = uniqueCount;
|
|
763
|
+
bestTotalCount = totalCount;
|
|
764
|
+
bestLeft = left;
|
|
765
|
+
bestRight = right;
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
const mid = Math.floor((positions[bestLeft].start + positions[bestRight].end) / 2);
|
|
769
|
+
let start = Math.max(0, mid - Math.floor(maxLen / 2));
|
|
770
|
+
let end = Math.min(plain.length, start + maxLen);
|
|
771
|
+
start = Math.max(0, end - maxLen);
|
|
772
|
+
if (start > 0) {
|
|
773
|
+
const spaceIdx = plain.lastIndexOf(" ", start);
|
|
774
|
+
if (spaceIdx > start - 30) {
|
|
775
|
+
start = spaceIdx + 1;
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
if (end < plain.length) {
|
|
779
|
+
const spaceIdx = plain.indexOf(" ", end);
|
|
780
|
+
if (spaceIdx !== -1 && spaceIdx < end + 30) {
|
|
781
|
+
end = spaceIdx;
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
let excerpt = plain.slice(start, end);
|
|
785
|
+
if (excerpt.length > Math.ceil(maxLen * 1.2)) {
|
|
786
|
+
excerpt = excerpt.slice(0, maxLen);
|
|
787
|
+
const lastSpace = excerpt.lastIndexOf(" ");
|
|
788
|
+
if (lastSpace > maxLen * 0.5) {
|
|
789
|
+
excerpt = excerpt.slice(0, lastSpace);
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
const prefix = start > 0 ? "\u2026" : "";
|
|
793
|
+
const suffix = end < plain.length ? "\u2026" : "";
|
|
794
|
+
return `${prefix}${excerpt}${suffix}`;
|
|
795
|
+
}
|
|
627
796
|
function extractFirstParagraph(markdown) {
|
|
628
797
|
const lines = markdown.split("\n");
|
|
629
798
|
let inFence = false;
|
|
@@ -690,163 +859,292 @@ function ensureStateDirs(cwd, stateDir, scope) {
|
|
|
690
859
|
}
|
|
691
860
|
|
|
692
861
|
// src/indexing/pipeline.ts
|
|
693
|
-
import
|
|
862
|
+
import path11 from "path";
|
|
694
863
|
|
|
695
864
|
// src/vector/upstash.ts
|
|
696
|
-
|
|
697
|
-
return `${scope.projectId}--${scope.scopeName}`;
|
|
698
|
-
}
|
|
699
|
-
function pageIndexName(scope) {
|
|
700
|
-
return `${scope.projectId}--${scope.scopeName}--pages`;
|
|
701
|
-
}
|
|
865
|
+
import { QueryMode, FusionAlgorithm } from "@upstash/vector";
|
|
702
866
|
var UpstashSearchStore = class {
|
|
703
|
-
|
|
867
|
+
index;
|
|
868
|
+
pagesNs;
|
|
869
|
+
chunksNs;
|
|
704
870
|
constructor(opts) {
|
|
705
|
-
this.
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
return this.client.index(chunkIndexName(scope));
|
|
709
|
-
}
|
|
710
|
-
pageIndex(scope) {
|
|
711
|
-
return this.client.index(pageIndexName(scope));
|
|
871
|
+
this.index = opts.index;
|
|
872
|
+
this.pagesNs = opts.index.namespace(opts.pagesNamespace);
|
|
873
|
+
this.chunksNs = opts.index.namespace(opts.chunksNamespace);
|
|
712
874
|
}
|
|
713
875
|
async upsertChunks(chunks, scope) {
|
|
714
876
|
if (chunks.length === 0) return;
|
|
715
|
-
const index = this.chunkIndex(scope);
|
|
716
877
|
const BATCH_SIZE = 100;
|
|
717
878
|
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
718
879
|
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
719
|
-
await
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
880
|
+
await this.chunksNs.upsert(
|
|
881
|
+
batch.map((c) => ({
|
|
882
|
+
id: c.id,
|
|
883
|
+
data: c.data,
|
|
884
|
+
metadata: {
|
|
885
|
+
...c.metadata,
|
|
886
|
+
projectId: scope.projectId,
|
|
887
|
+
scopeName: scope.scopeName,
|
|
888
|
+
type: c.metadata.type || "chunk"
|
|
889
|
+
}
|
|
890
|
+
}))
|
|
891
|
+
);
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
async search(data, opts, scope) {
|
|
895
|
+
const filterParts = [
|
|
896
|
+
`projectId = '${scope.projectId}'`,
|
|
897
|
+
`scopeName = '${scope.scopeName}'`
|
|
898
|
+
];
|
|
899
|
+
if (opts.filter) {
|
|
900
|
+
filterParts.push(opts.filter);
|
|
901
|
+
}
|
|
902
|
+
const results = await this.chunksNs.query({
|
|
903
|
+
data,
|
|
904
|
+
topK: opts.limit,
|
|
905
|
+
includeMetadata: true,
|
|
906
|
+
filter: filterParts.join(" AND "),
|
|
907
|
+
queryMode: QueryMode.HYBRID,
|
|
908
|
+
fusionAlgorithm: FusionAlgorithm.DBSF
|
|
909
|
+
});
|
|
910
|
+
return results.map((doc) => ({
|
|
911
|
+
id: String(doc.id),
|
|
912
|
+
score: doc.score,
|
|
913
|
+
metadata: {
|
|
914
|
+
projectId: doc.metadata?.projectId ?? "",
|
|
915
|
+
scopeName: doc.metadata?.scopeName ?? "",
|
|
916
|
+
url: doc.metadata?.url ?? "",
|
|
917
|
+
path: doc.metadata?.path ?? "",
|
|
918
|
+
title: doc.metadata?.title ?? "",
|
|
919
|
+
sectionTitle: doc.metadata?.sectionTitle ?? "",
|
|
920
|
+
headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
|
|
921
|
+
snippet: doc.metadata?.snippet ?? "",
|
|
922
|
+
chunkText: doc.metadata?.chunkText ?? "",
|
|
923
|
+
ordinal: doc.metadata?.ordinal ?? 0,
|
|
924
|
+
contentHash: doc.metadata?.contentHash ?? "",
|
|
925
|
+
depth: doc.metadata?.depth ?? 0,
|
|
926
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
927
|
+
routeFile: doc.metadata?.routeFile ?? "",
|
|
928
|
+
tags: doc.metadata?.tags ?? [],
|
|
929
|
+
description: doc.metadata?.description || void 0,
|
|
930
|
+
keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
|
|
931
|
+
publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
|
|
932
|
+
incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
|
|
933
|
+
}
|
|
934
|
+
}));
|
|
935
|
+
}
|
|
936
|
+
async searchChunksByUrl(data, url, opts, scope) {
|
|
937
|
+
const filterParts = [
|
|
938
|
+
`projectId = '${scope.projectId}'`,
|
|
939
|
+
`scopeName = '${scope.scopeName}'`,
|
|
940
|
+
`url = '${url}'`
|
|
941
|
+
];
|
|
942
|
+
if (opts.filter) {
|
|
943
|
+
filterParts.push(opts.filter);
|
|
944
|
+
}
|
|
945
|
+
const results = await this.chunksNs.query({
|
|
946
|
+
data,
|
|
947
|
+
topK: opts.limit,
|
|
948
|
+
includeMetadata: true,
|
|
949
|
+
filter: filterParts.join(" AND "),
|
|
950
|
+
queryMode: QueryMode.HYBRID,
|
|
951
|
+
fusionAlgorithm: FusionAlgorithm.DBSF
|
|
731
952
|
});
|
|
732
953
|
return results.map((doc) => ({
|
|
733
|
-
id: doc.id,
|
|
954
|
+
id: String(doc.id),
|
|
734
955
|
score: doc.score,
|
|
735
956
|
metadata: {
|
|
736
957
|
projectId: doc.metadata?.projectId ?? "",
|
|
737
958
|
scopeName: doc.metadata?.scopeName ?? "",
|
|
738
|
-
url: doc.
|
|
959
|
+
url: doc.metadata?.url ?? "",
|
|
739
960
|
path: doc.metadata?.path ?? "",
|
|
740
|
-
title: doc.
|
|
741
|
-
sectionTitle: doc.
|
|
742
|
-
headingPath: doc.
|
|
961
|
+
title: doc.metadata?.title ?? "",
|
|
962
|
+
sectionTitle: doc.metadata?.sectionTitle ?? "",
|
|
963
|
+
headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
|
|
743
964
|
snippet: doc.metadata?.snippet ?? "",
|
|
744
|
-
chunkText: doc.
|
|
965
|
+
chunkText: doc.metadata?.chunkText ?? "",
|
|
745
966
|
ordinal: doc.metadata?.ordinal ?? 0,
|
|
746
967
|
contentHash: doc.metadata?.contentHash ?? "",
|
|
747
968
|
depth: doc.metadata?.depth ?? 0,
|
|
748
969
|
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
749
970
|
routeFile: doc.metadata?.routeFile ?? "",
|
|
750
|
-
tags: doc.
|
|
971
|
+
tags: doc.metadata?.tags ?? [],
|
|
751
972
|
description: doc.metadata?.description || void 0,
|
|
752
|
-
keywords: doc.metadata?.keywords ? doc.metadata.keywords
|
|
973
|
+
keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
|
|
974
|
+
publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
|
|
975
|
+
incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
|
|
753
976
|
}
|
|
754
977
|
}));
|
|
755
978
|
}
|
|
756
|
-
async
|
|
757
|
-
|
|
979
|
+
async searchPagesByText(data, opts, scope) {
|
|
980
|
+
return this.queryPages({ data }, opts, scope);
|
|
981
|
+
}
|
|
982
|
+
async searchPagesByVector(vector, opts, scope) {
|
|
983
|
+
return this.queryPages({ vector }, opts, scope);
|
|
984
|
+
}
|
|
985
|
+
async queryPages(input, opts, scope) {
|
|
986
|
+
const filterParts = [
|
|
987
|
+
`projectId = '${scope.projectId}'`,
|
|
988
|
+
`scopeName = '${scope.scopeName}'`
|
|
989
|
+
];
|
|
990
|
+
if (opts.filter) {
|
|
991
|
+
filterParts.push(opts.filter);
|
|
992
|
+
}
|
|
758
993
|
let results;
|
|
759
994
|
try {
|
|
760
|
-
results = await
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
995
|
+
results = await this.pagesNs.query({
|
|
996
|
+
...input,
|
|
997
|
+
topK: opts.limit,
|
|
998
|
+
includeMetadata: true,
|
|
999
|
+
filter: filterParts.join(" AND "),
|
|
1000
|
+
queryMode: QueryMode.HYBRID,
|
|
1001
|
+
fusionAlgorithm: FusionAlgorithm.DBSF
|
|
767
1002
|
});
|
|
768
1003
|
} catch {
|
|
769
1004
|
return [];
|
|
770
1005
|
}
|
|
771
1006
|
return results.map((doc) => ({
|
|
772
|
-
id: doc.id,
|
|
1007
|
+
id: String(doc.id),
|
|
773
1008
|
score: doc.score,
|
|
774
|
-
title: doc.
|
|
775
|
-
url: doc.
|
|
776
|
-
description: doc.
|
|
777
|
-
tags: doc.
|
|
1009
|
+
title: doc.metadata?.title ?? "",
|
|
1010
|
+
url: doc.metadata?.url ?? "",
|
|
1011
|
+
description: doc.metadata?.description ?? "",
|
|
1012
|
+
tags: doc.metadata?.tags ?? [],
|
|
778
1013
|
depth: doc.metadata?.depth ?? 0,
|
|
779
1014
|
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
780
|
-
routeFile: doc.metadata?.routeFile ?? ""
|
|
1015
|
+
routeFile: doc.metadata?.routeFile ?? "",
|
|
1016
|
+
publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0
|
|
781
1017
|
}));
|
|
782
1018
|
}
|
|
783
|
-
async deleteByIds(ids,
|
|
1019
|
+
async deleteByIds(ids, _scope) {
|
|
784
1020
|
if (ids.length === 0) return;
|
|
785
|
-
const
|
|
786
|
-
const BATCH_SIZE = 500;
|
|
1021
|
+
const BATCH_SIZE = 100;
|
|
787
1022
|
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
788
1023
|
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
789
|
-
await
|
|
1024
|
+
await this.chunksNs.delete(batch);
|
|
790
1025
|
}
|
|
791
1026
|
}
|
|
792
1027
|
async deleteScope(scope) {
|
|
793
|
-
|
|
794
|
-
const
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
1028
|
+
for (const ns of [this.chunksNs, this.pagesNs]) {
|
|
1029
|
+
const ids = [];
|
|
1030
|
+
let cursor = "0";
|
|
1031
|
+
try {
|
|
1032
|
+
for (; ; ) {
|
|
1033
|
+
const result = await ns.range({
|
|
1034
|
+
cursor,
|
|
1035
|
+
limit: 100,
|
|
1036
|
+
includeMetadata: true
|
|
1037
|
+
});
|
|
1038
|
+
for (const doc of result.vectors) {
|
|
1039
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
|
|
1040
|
+
ids.push(String(doc.id));
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1044
|
+
cursor = result.nextCursor;
|
|
1045
|
+
}
|
|
1046
|
+
} catch {
|
|
1047
|
+
}
|
|
1048
|
+
if (ids.length > 0) {
|
|
1049
|
+
const BATCH_SIZE = 100;
|
|
1050
|
+
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
1051
|
+
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
1052
|
+
await ns.delete(batch);
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
802
1055
|
}
|
|
803
1056
|
}
|
|
804
1057
|
async listScopes(projectId) {
|
|
805
|
-
const
|
|
806
|
-
const
|
|
807
|
-
|
|
808
|
-
for (const name of allIndexes) {
|
|
809
|
-
if (name.startsWith(prefix) && !name.endsWith("--pages")) {
|
|
810
|
-
const scopeName = name.slice(prefix.length);
|
|
811
|
-
scopeNames.add(scopeName);
|
|
812
|
-
}
|
|
813
|
-
}
|
|
814
|
-
const scopes = [];
|
|
815
|
-
for (const scopeName of scopeNames) {
|
|
816
|
-
const scope = {
|
|
817
|
-
projectId,
|
|
818
|
-
scopeName,
|
|
819
|
-
scopeId: `${projectId}:${scopeName}`
|
|
820
|
-
};
|
|
1058
|
+
const scopeMap = /* @__PURE__ */ new Map();
|
|
1059
|
+
for (const ns of [this.chunksNs, this.pagesNs]) {
|
|
1060
|
+
let cursor = "0";
|
|
821
1061
|
try {
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
1062
|
+
for (; ; ) {
|
|
1063
|
+
const result = await ns.range({
|
|
1064
|
+
cursor,
|
|
1065
|
+
limit: 100,
|
|
1066
|
+
includeMetadata: true
|
|
1067
|
+
});
|
|
1068
|
+
for (const doc of result.vectors) {
|
|
1069
|
+
if (doc.metadata?.projectId === projectId) {
|
|
1070
|
+
const scopeName = doc.metadata.scopeName ?? "";
|
|
1071
|
+
scopeMap.set(scopeName, (scopeMap.get(scopeName) ?? 0) + 1);
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1075
|
+
cursor = result.nextCursor;
|
|
1076
|
+
}
|
|
829
1077
|
} catch {
|
|
830
|
-
scopes.push({
|
|
831
|
-
projectId,
|
|
832
|
-
scopeName,
|
|
833
|
-
lastIndexedAt: "unknown",
|
|
834
|
-
documentCount: 0
|
|
835
|
-
});
|
|
836
1078
|
}
|
|
837
1079
|
}
|
|
838
|
-
return
|
|
1080
|
+
return [...scopeMap.entries()].map(([scopeName, count]) => ({
|
|
1081
|
+
projectId,
|
|
1082
|
+
scopeName,
|
|
1083
|
+
lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1084
|
+
documentCount: count
|
|
1085
|
+
}));
|
|
839
1086
|
}
|
|
840
1087
|
async getContentHashes(scope) {
|
|
841
1088
|
const map = /* @__PURE__ */ new Map();
|
|
842
|
-
const index = this.chunkIndex(scope);
|
|
843
1089
|
let cursor = "0";
|
|
844
1090
|
try {
|
|
845
1091
|
for (; ; ) {
|
|
846
|
-
const result = await
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
1092
|
+
const result = await this.chunksNs.range({
|
|
1093
|
+
cursor,
|
|
1094
|
+
limit: 100,
|
|
1095
|
+
includeMetadata: true
|
|
1096
|
+
});
|
|
1097
|
+
for (const doc of result.vectors) {
|
|
1098
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
|
|
1099
|
+
map.set(String(doc.id), doc.metadata.contentHash);
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1103
|
+
cursor = result.nextCursor;
|
|
1104
|
+
}
|
|
1105
|
+
} catch {
|
|
1106
|
+
}
|
|
1107
|
+
return map;
|
|
1108
|
+
}
|
|
1109
|
+
async listPages(scope, opts) {
|
|
1110
|
+
const cursor = opts?.cursor ?? "0";
|
|
1111
|
+
const limit = opts?.limit ?? 50;
|
|
1112
|
+
try {
|
|
1113
|
+
const result = await this.pagesNs.range({
|
|
1114
|
+
cursor,
|
|
1115
|
+
limit,
|
|
1116
|
+
includeMetadata: true
|
|
1117
|
+
});
|
|
1118
|
+
const pages = result.vectors.filter(
|
|
1119
|
+
(doc) => doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && (!opts?.pathPrefix || (doc.metadata?.url ?? "").startsWith(opts.pathPrefix))
|
|
1120
|
+
).map((doc) => ({
|
|
1121
|
+
url: doc.metadata?.url ?? "",
|
|
1122
|
+
title: doc.metadata?.title ?? "",
|
|
1123
|
+
description: doc.metadata?.description ?? "",
|
|
1124
|
+
routeFile: doc.metadata?.routeFile ?? ""
|
|
1125
|
+
}));
|
|
1126
|
+
const response = { pages };
|
|
1127
|
+
if (result.nextCursor && result.nextCursor !== "0") {
|
|
1128
|
+
response.nextCursor = result.nextCursor;
|
|
1129
|
+
}
|
|
1130
|
+
return response;
|
|
1131
|
+
} catch {
|
|
1132
|
+
return { pages: [] };
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
async getPageHashes(scope) {
|
|
1136
|
+
const map = /* @__PURE__ */ new Map();
|
|
1137
|
+
let cursor = "0";
|
|
1138
|
+
try {
|
|
1139
|
+
for (; ; ) {
|
|
1140
|
+
const result = await this.pagesNs.range({
|
|
1141
|
+
cursor,
|
|
1142
|
+
limit: 100,
|
|
1143
|
+
includeMetadata: true
|
|
1144
|
+
});
|
|
1145
|
+
for (const doc of result.vectors) {
|
|
1146
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
|
|
1147
|
+
map.set(String(doc.id), doc.metadata.contentHash);
|
|
850
1148
|
}
|
|
851
1149
|
}
|
|
852
1150
|
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
@@ -856,47 +1154,43 @@ var UpstashSearchStore = class {
|
|
|
856
1154
|
}
|
|
857
1155
|
return map;
|
|
858
1156
|
}
|
|
1157
|
+
async deletePagesByIds(ids, _scope) {
|
|
1158
|
+
if (ids.length === 0) return;
|
|
1159
|
+
const BATCH_SIZE = 50;
|
|
1160
|
+
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
1161
|
+
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
1162
|
+
await this.pagesNs.delete(batch);
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
859
1165
|
async upsertPages(pages, scope) {
|
|
860
1166
|
if (pages.length === 0) return;
|
|
861
|
-
const index = this.pageIndex(scope);
|
|
862
1167
|
const BATCH_SIZE = 50;
|
|
863
1168
|
for (let i = 0; i < pages.length; i += BATCH_SIZE) {
|
|
864
1169
|
const batch = pages.slice(i, i + BATCH_SIZE);
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
markdown: p.markdown,
|
|
878
|
-
projectId: p.projectId,
|
|
879
|
-
scopeName: p.scopeName,
|
|
880
|
-
routeFile: p.routeFile,
|
|
881
|
-
routeResolution: p.routeResolution,
|
|
882
|
-
incomingLinks: p.incomingLinks,
|
|
883
|
-
outgoingLinks: p.outgoingLinks,
|
|
884
|
-
depth: p.depth,
|
|
885
|
-
indexedAt: p.indexedAt
|
|
886
|
-
}
|
|
887
|
-
}));
|
|
888
|
-
await index.upsert(docs);
|
|
1170
|
+
await this.pagesNs.upsert(
|
|
1171
|
+
batch.map((p) => ({
|
|
1172
|
+
id: p.id,
|
|
1173
|
+
data: p.data,
|
|
1174
|
+
metadata: {
|
|
1175
|
+
...p.metadata,
|
|
1176
|
+
projectId: scope.projectId,
|
|
1177
|
+
scopeName: scope.scopeName,
|
|
1178
|
+
type: "page"
|
|
1179
|
+
}
|
|
1180
|
+
}))
|
|
1181
|
+
);
|
|
889
1182
|
}
|
|
890
1183
|
}
|
|
891
1184
|
async getPage(url, scope) {
|
|
892
|
-
const index = this.pageIndex(scope);
|
|
893
1185
|
try {
|
|
894
|
-
const results = await
|
|
1186
|
+
const results = await this.pagesNs.fetch([url], {
|
|
1187
|
+
includeMetadata: true
|
|
1188
|
+
});
|
|
895
1189
|
const doc = results[0];
|
|
896
|
-
if (!doc) return null;
|
|
1190
|
+
if (!doc || !doc.metadata) return null;
|
|
897
1191
|
return {
|
|
898
|
-
url: doc.
|
|
899
|
-
title: doc.
|
|
1192
|
+
url: doc.metadata.url,
|
|
1193
|
+
title: doc.metadata.title,
|
|
900
1194
|
markdown: doc.metadata.markdown,
|
|
901
1195
|
projectId: doc.metadata.projectId,
|
|
902
1196
|
scopeName: doc.metadata.scopeName,
|
|
@@ -904,27 +1198,86 @@ var UpstashSearchStore = class {
|
|
|
904
1198
|
routeResolution: doc.metadata.routeResolution,
|
|
905
1199
|
incomingLinks: doc.metadata.incomingLinks,
|
|
906
1200
|
outgoingLinks: doc.metadata.outgoingLinks,
|
|
1201
|
+
outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? void 0,
|
|
907
1202
|
depth: doc.metadata.depth,
|
|
908
|
-
tags: doc.
|
|
1203
|
+
tags: doc.metadata.tags ?? [],
|
|
909
1204
|
indexedAt: doc.metadata.indexedAt,
|
|
910
|
-
summary: doc.
|
|
911
|
-
description: doc.
|
|
912
|
-
keywords: doc.
|
|
1205
|
+
summary: doc.metadata.summary || void 0,
|
|
1206
|
+
description: doc.metadata.description || void 0,
|
|
1207
|
+
keywords: doc.metadata.keywords?.length ? doc.metadata.keywords : void 0,
|
|
1208
|
+
publishedAt: typeof doc.metadata.publishedAt === "number" ? doc.metadata.publishedAt : void 0
|
|
913
1209
|
};
|
|
914
1210
|
} catch {
|
|
915
1211
|
return null;
|
|
916
1212
|
}
|
|
917
1213
|
}
|
|
1214
|
+
async fetchPageWithVector(url, scope) {
|
|
1215
|
+
try {
|
|
1216
|
+
const results = await this.pagesNs.fetch([url], {
|
|
1217
|
+
includeMetadata: true,
|
|
1218
|
+
includeVectors: true
|
|
1219
|
+
});
|
|
1220
|
+
const doc = results[0];
|
|
1221
|
+
if (!doc || !doc.metadata || !doc.vector) return null;
|
|
1222
|
+
if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
|
|
1223
|
+
return null;
|
|
1224
|
+
}
|
|
1225
|
+
return { metadata: doc.metadata, vector: doc.vector };
|
|
1226
|
+
} catch {
|
|
1227
|
+
return null;
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
async fetchPagesBatch(urls, scope) {
|
|
1231
|
+
if (urls.length === 0) return [];
|
|
1232
|
+
try {
|
|
1233
|
+
const results = await this.pagesNs.fetch(urls, {
|
|
1234
|
+
includeMetadata: true
|
|
1235
|
+
});
|
|
1236
|
+
const out = [];
|
|
1237
|
+
for (const doc of results) {
|
|
1238
|
+
if (!doc || !doc.metadata) continue;
|
|
1239
|
+
if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
|
|
1240
|
+
continue;
|
|
1241
|
+
}
|
|
1242
|
+
out.push({
|
|
1243
|
+
url: doc.metadata.url,
|
|
1244
|
+
title: doc.metadata.title,
|
|
1245
|
+
routeFile: doc.metadata.routeFile,
|
|
1246
|
+
outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? []
|
|
1247
|
+
});
|
|
1248
|
+
}
|
|
1249
|
+
return out;
|
|
1250
|
+
} catch {
|
|
1251
|
+
return [];
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
918
1254
|
async deletePages(scope) {
|
|
1255
|
+
const ids = [];
|
|
1256
|
+
let cursor = "0";
|
|
919
1257
|
try {
|
|
920
|
-
|
|
921
|
-
|
|
1258
|
+
for (; ; ) {
|
|
1259
|
+
const result = await this.pagesNs.range({
|
|
1260
|
+
cursor,
|
|
1261
|
+
limit: 100,
|
|
1262
|
+
includeMetadata: true
|
|
1263
|
+
});
|
|
1264
|
+
for (const doc of result.vectors) {
|
|
1265
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
|
|
1266
|
+
ids.push(String(doc.id));
|
|
1267
|
+
}
|
|
1268
|
+
}
|
|
1269
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1270
|
+
cursor = result.nextCursor;
|
|
1271
|
+
}
|
|
922
1272
|
} catch {
|
|
923
1273
|
}
|
|
1274
|
+
if (ids.length > 0) {
|
|
1275
|
+
await this.deletePagesByIds(ids, scope);
|
|
1276
|
+
}
|
|
924
1277
|
}
|
|
925
1278
|
async health() {
|
|
926
1279
|
try {
|
|
927
|
-
await this.
|
|
1280
|
+
await this.index.info();
|
|
928
1281
|
return { ok: true };
|
|
929
1282
|
} catch (error) {
|
|
930
1283
|
return {
|
|
@@ -934,14 +1287,31 @@ var UpstashSearchStore = class {
|
|
|
934
1287
|
}
|
|
935
1288
|
}
|
|
936
1289
|
async dropAllIndexes(projectId) {
|
|
937
|
-
const
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
const
|
|
943
|
-
|
|
944
|
-
|
|
1290
|
+
for (const ns of [this.chunksNs, this.pagesNs]) {
|
|
1291
|
+
const ids = [];
|
|
1292
|
+
let cursor = "0";
|
|
1293
|
+
try {
|
|
1294
|
+
for (; ; ) {
|
|
1295
|
+
const result = await ns.range({
|
|
1296
|
+
cursor,
|
|
1297
|
+
limit: 100,
|
|
1298
|
+
includeMetadata: true
|
|
1299
|
+
});
|
|
1300
|
+
for (const doc of result.vectors) {
|
|
1301
|
+
if (doc.metadata?.projectId === projectId) {
|
|
1302
|
+
ids.push(String(doc.id));
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1305
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1306
|
+
cursor = result.nextCursor;
|
|
1307
|
+
}
|
|
1308
|
+
} catch {
|
|
1309
|
+
}
|
|
1310
|
+
if (ids.length > 0) {
|
|
1311
|
+
const BATCH_SIZE = 100;
|
|
1312
|
+
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
1313
|
+
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
1314
|
+
await ns.delete(batch);
|
|
945
1315
|
}
|
|
946
1316
|
}
|
|
947
1317
|
}
|
|
@@ -955,12 +1325,16 @@ async function createUpstashStore(config) {
|
|
|
955
1325
|
if (!url || !token) {
|
|
956
1326
|
throw new SearchSocketError(
|
|
957
1327
|
"VECTOR_BACKEND_UNAVAILABLE",
|
|
958
|
-
`Missing Upstash
|
|
1328
|
+
`Missing Upstash Vector credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
|
|
959
1329
|
);
|
|
960
1330
|
}
|
|
961
|
-
const {
|
|
962
|
-
const
|
|
963
|
-
return new UpstashSearchStore({
|
|
1331
|
+
const { Index } = await import("@upstash/vector");
|
|
1332
|
+
const index = new Index({ url, token });
|
|
1333
|
+
return new UpstashSearchStore({
|
|
1334
|
+
index,
|
|
1335
|
+
pagesNamespace: config.upstash.namespaces.pages,
|
|
1336
|
+
chunksNamespace: config.upstash.namespaces.chunks
|
|
1337
|
+
});
|
|
964
1338
|
}
|
|
965
1339
|
|
|
966
1340
|
// src/utils/hash.ts
|
|
@@ -1034,6 +1408,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
|
|
|
1034
1408
|
if (normalizeText(current.text)) {
|
|
1035
1409
|
sections.push({
|
|
1036
1410
|
sectionTitle: current.sectionTitle,
|
|
1411
|
+
headingLevel: current.headingLevel,
|
|
1037
1412
|
headingPath: current.headingPath,
|
|
1038
1413
|
text: current.text.trim()
|
|
1039
1414
|
});
|
|
@@ -1052,6 +1427,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
|
|
|
1052
1427
|
headingStack.length = level;
|
|
1053
1428
|
current = {
|
|
1054
1429
|
sectionTitle: title,
|
|
1430
|
+
headingLevel: level,
|
|
1055
1431
|
headingPath: headingStack.filter((entry) => Boolean(entry)).slice(0, headingPathDepth),
|
|
1056
1432
|
text: `${line}
|
|
1057
1433
|
`
|
|
@@ -1071,8 +1447,8 @@ function parseHeadingSections(markdown, headingPathDepth) {
|
|
|
1071
1447
|
}
|
|
1072
1448
|
return sections;
|
|
1073
1449
|
}
|
|
1074
|
-
function blockify(
|
|
1075
|
-
const lines =
|
|
1450
|
+
function blockify(text2, config) {
|
|
1451
|
+
const lines = text2.split("\n");
|
|
1076
1452
|
const blocks = [];
|
|
1077
1453
|
let inFence = false;
|
|
1078
1454
|
let current = [];
|
|
@@ -1179,20 +1555,21 @@ function splitOversizedBlock(block, config) {
|
|
|
1179
1555
|
return chunks.length > 0 ? chunks : [trimmed];
|
|
1180
1556
|
}
|
|
1181
1557
|
function splitSection(section, config) {
|
|
1182
|
-
const
|
|
1183
|
-
if (!
|
|
1558
|
+
const text2 = section.text.trim();
|
|
1559
|
+
if (!text2) {
|
|
1184
1560
|
return [];
|
|
1185
1561
|
}
|
|
1186
|
-
if (
|
|
1562
|
+
if (text2.length <= config.maxChars) {
|
|
1187
1563
|
return [
|
|
1188
1564
|
{
|
|
1189
1565
|
sectionTitle: section.sectionTitle,
|
|
1566
|
+
headingLevel: section.headingLevel,
|
|
1190
1567
|
headingPath: section.headingPath,
|
|
1191
|
-
chunkText:
|
|
1568
|
+
chunkText: text2
|
|
1192
1569
|
}
|
|
1193
1570
|
];
|
|
1194
1571
|
}
|
|
1195
|
-
const blocks = blockify(
|
|
1572
|
+
const blocks = blockify(text2, config);
|
|
1196
1573
|
const chunks = [];
|
|
1197
1574
|
let current = "";
|
|
1198
1575
|
for (const block of blocks) {
|
|
@@ -1237,6 +1614,7 @@ ${chunk}`;
|
|
|
1237
1614
|
}
|
|
1238
1615
|
return merged.map((chunkText) => ({
|
|
1239
1616
|
sectionTitle: section.sectionTitle,
|
|
1617
|
+
headingLevel: section.headingLevel,
|
|
1240
1618
|
headingPath: section.headingPath,
|
|
1241
1619
|
chunkText
|
|
1242
1620
|
}));
|
|
@@ -1252,6 +1630,18 @@ function buildSummaryChunkText(page) {
|
|
|
1252
1630
|
}
|
|
1253
1631
|
return parts.join("\n\n");
|
|
1254
1632
|
}
|
|
1633
|
+
function buildEmbeddingTitle(chunk) {
|
|
1634
|
+
if (!chunk.sectionTitle || chunk.headingLevel === void 0) return void 0;
|
|
1635
|
+
if (chunk.headingPath.length > 1) {
|
|
1636
|
+
const path16 = chunk.headingPath.join(" > ");
|
|
1637
|
+
const lastInPath = chunk.headingPath[chunk.headingPath.length - 1];
|
|
1638
|
+
if (lastInPath !== chunk.sectionTitle) {
|
|
1639
|
+
return `${chunk.title} \u2014 ${path16} > ${chunk.sectionTitle}`;
|
|
1640
|
+
}
|
|
1641
|
+
return `${chunk.title} \u2014 ${path16}`;
|
|
1642
|
+
}
|
|
1643
|
+
return `${chunk.title} \u2014 ${chunk.sectionTitle}`;
|
|
1644
|
+
}
|
|
1255
1645
|
function buildEmbeddingText(chunk, prependTitle) {
|
|
1256
1646
|
if (!prependTitle) return chunk.chunkText;
|
|
1257
1647
|
const prefix = chunk.sectionTitle ? `${chunk.title} \u2014 ${chunk.sectionTitle}` : chunk.title;
|
|
@@ -1282,10 +1672,14 @@ function chunkPage(page, config, scope) {
|
|
|
1282
1672
|
tags: page.tags,
|
|
1283
1673
|
contentHash: "",
|
|
1284
1674
|
description: page.description,
|
|
1285
|
-
keywords: page.keywords
|
|
1675
|
+
keywords: page.keywords,
|
|
1676
|
+
publishedAt: page.publishedAt,
|
|
1677
|
+
incomingAnchorText: page.incomingAnchorText,
|
|
1678
|
+
meta: page.meta
|
|
1286
1679
|
};
|
|
1287
1680
|
const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
|
|
1288
|
-
|
|
1681
|
+
const metaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
|
|
1682
|
+
summaryChunk.contentHash = sha256(normalizeText(embeddingText) + metaSuffix);
|
|
1289
1683
|
chunks.push(summaryChunk);
|
|
1290
1684
|
}
|
|
1291
1685
|
const ordinalOffset = config.chunking.pageSummaryChunk ? 1 : 0;
|
|
@@ -1302,6 +1696,7 @@ function chunkPage(page, config, scope) {
|
|
|
1302
1696
|
path: page.url,
|
|
1303
1697
|
title: page.title,
|
|
1304
1698
|
sectionTitle: entry.sectionTitle,
|
|
1699
|
+
headingLevel: entry.headingLevel,
|
|
1305
1700
|
headingPath: entry.headingPath,
|
|
1306
1701
|
chunkText: entry.chunkText,
|
|
1307
1702
|
snippet: toSnippet(entry.chunkText),
|
|
@@ -1311,10 +1706,16 @@ function chunkPage(page, config, scope) {
|
|
|
1311
1706
|
tags: page.tags,
|
|
1312
1707
|
contentHash: "",
|
|
1313
1708
|
description: page.description,
|
|
1314
|
-
keywords: page.keywords
|
|
1709
|
+
keywords: page.keywords,
|
|
1710
|
+
publishedAt: page.publishedAt,
|
|
1711
|
+
incomingAnchorText: page.incomingAnchorText,
|
|
1712
|
+
meta: page.meta
|
|
1315
1713
|
};
|
|
1316
1714
|
const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
|
|
1317
|
-
|
|
1715
|
+
const embeddingTitle = config.chunking.weightHeadings ? buildEmbeddingTitle(chunk) : void 0;
|
|
1716
|
+
const chunkMetaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
|
|
1717
|
+
const hashInput = embeddingTitle ? `${normalizeText(embeddingText)}|title:${embeddingTitle}` : normalizeText(embeddingText);
|
|
1718
|
+
chunk.contentHash = sha256(hashInput + chunkMetaSuffix);
|
|
1318
1719
|
chunks.push(chunk);
|
|
1319
1720
|
}
|
|
1320
1721
|
return chunks;
|
|
@@ -1325,6 +1726,113 @@ import { load } from "cheerio";
|
|
|
1325
1726
|
import matter from "gray-matter";
|
|
1326
1727
|
import TurndownService from "turndown";
|
|
1327
1728
|
import { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems } from "turndown-plugin-gfm";
|
|
1729
|
+
|
|
1730
|
+
// src/utils/structured-meta.ts
|
|
1731
|
+
var VALID_KEY_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
1732
|
+
function validateMetaKey(key) {
|
|
1733
|
+
return VALID_KEY_RE.test(key);
|
|
1734
|
+
}
|
|
1735
|
+
function parseMetaValue(content, dataType) {
|
|
1736
|
+
switch (dataType) {
|
|
1737
|
+
case "number": {
|
|
1738
|
+
const n = Number(content);
|
|
1739
|
+
return Number.isFinite(n) ? n : content;
|
|
1740
|
+
}
|
|
1741
|
+
case "boolean":
|
|
1742
|
+
return content === "true";
|
|
1743
|
+
case "string[]":
|
|
1744
|
+
return content ? content.split(",").map((s) => s.trim()) : [];
|
|
1745
|
+
case "date": {
|
|
1746
|
+
const ms = Number(content);
|
|
1747
|
+
return Number.isFinite(ms) ? ms : content;
|
|
1748
|
+
}
|
|
1749
|
+
default:
|
|
1750
|
+
return content;
|
|
1751
|
+
}
|
|
1752
|
+
}
|
|
1753
|
+
function escapeFilterValue(s) {
|
|
1754
|
+
return s.replace(/'/g, "''");
|
|
1755
|
+
}
|
|
1756
|
+
function buildMetaFilterString(filters) {
|
|
1757
|
+
const clauses = [];
|
|
1758
|
+
for (const [key, value] of Object.entries(filters)) {
|
|
1759
|
+
if (!validateMetaKey(key)) continue;
|
|
1760
|
+
const field = `meta.${key}`;
|
|
1761
|
+
if (typeof value === "string") {
|
|
1762
|
+
clauses.push(`${field} CONTAINS '${escapeFilterValue(value)}'`);
|
|
1763
|
+
} else if (typeof value === "boolean") {
|
|
1764
|
+
clauses.push(`${field} = ${value}`);
|
|
1765
|
+
} else {
|
|
1766
|
+
clauses.push(`${field} = ${value}`);
|
|
1767
|
+
}
|
|
1768
|
+
}
|
|
1769
|
+
return clauses.join(" AND ");
|
|
1770
|
+
}
|
|
1771
|
+
|
|
1772
|
+
// src/indexing/extractor.ts
|
|
1773
|
+
function normalizeDateToMs(value) {
|
|
1774
|
+
if (value == null) return void 0;
|
|
1775
|
+
if (value instanceof Date) {
|
|
1776
|
+
const ts = value.getTime();
|
|
1777
|
+
return Number.isFinite(ts) ? ts : void 0;
|
|
1778
|
+
}
|
|
1779
|
+
if (typeof value === "string") {
|
|
1780
|
+
const ts = new Date(value).getTime();
|
|
1781
|
+
return Number.isFinite(ts) ? ts : void 0;
|
|
1782
|
+
}
|
|
1783
|
+
if (typeof value === "number") {
|
|
1784
|
+
return Number.isFinite(value) ? value : void 0;
|
|
1785
|
+
}
|
|
1786
|
+
return void 0;
|
|
1787
|
+
}
|
|
1788
|
+
var FRONTMATTER_DATE_FIELDS = ["date", "publishedAt", "updatedAt", "published_at", "updated_at"];
|
|
1789
|
+
function extractPublishedAtFromFrontmatter(data) {
|
|
1790
|
+
for (const field of FRONTMATTER_DATE_FIELDS) {
|
|
1791
|
+
const val = normalizeDateToMs(data[field]);
|
|
1792
|
+
if (val !== void 0) return val;
|
|
1793
|
+
}
|
|
1794
|
+
return void 0;
|
|
1795
|
+
}
|
|
1796
|
+
function extractPublishedAtFromHtml($) {
|
|
1797
|
+
const jsonLdScripts = $('script[type="application/ld+json"]');
|
|
1798
|
+
for (let i = 0; i < jsonLdScripts.length; i++) {
|
|
1799
|
+
try {
|
|
1800
|
+
const raw = $(jsonLdScripts[i]).html();
|
|
1801
|
+
if (!raw) continue;
|
|
1802
|
+
const parsed = JSON.parse(raw);
|
|
1803
|
+
const candidates = [];
|
|
1804
|
+
if (Array.isArray(parsed)) {
|
|
1805
|
+
candidates.push(...parsed);
|
|
1806
|
+
} else if (parsed && typeof parsed === "object") {
|
|
1807
|
+
candidates.push(parsed);
|
|
1808
|
+
if (Array.isArray(parsed["@graph"])) {
|
|
1809
|
+
candidates.push(...parsed["@graph"]);
|
|
1810
|
+
}
|
|
1811
|
+
}
|
|
1812
|
+
for (const candidate of candidates) {
|
|
1813
|
+
const val = normalizeDateToMs(candidate.datePublished);
|
|
1814
|
+
if (val !== void 0) return val;
|
|
1815
|
+
}
|
|
1816
|
+
} catch {
|
|
1817
|
+
}
|
|
1818
|
+
}
|
|
1819
|
+
const ogTime = $('meta[property="article:published_time"]').attr("content")?.trim();
|
|
1820
|
+
if (ogTime) {
|
|
1821
|
+
const val = normalizeDateToMs(ogTime);
|
|
1822
|
+
if (val !== void 0) return val;
|
|
1823
|
+
}
|
|
1824
|
+
const itempropDate = $('meta[itemprop="datePublished"]').attr("content")?.trim() || $('time[itemprop="datePublished"]').attr("datetime")?.trim();
|
|
1825
|
+
if (itempropDate) {
|
|
1826
|
+
const val = normalizeDateToMs(itempropDate);
|
|
1827
|
+
if (val !== void 0) return val;
|
|
1828
|
+
}
|
|
1829
|
+
const timeEl = $("time[datetime]").first().attr("datetime")?.trim();
|
|
1830
|
+
if (timeEl) {
|
|
1831
|
+
const val = normalizeDateToMs(timeEl);
|
|
1832
|
+
if (val !== void 0) return val;
|
|
1833
|
+
}
|
|
1834
|
+
return void 0;
|
|
1835
|
+
}
|
|
1328
1836
|
function hasTopLevelNoindexComment(markdown) {
|
|
1329
1837
|
const lines = markdown.split(/\r?\n/);
|
|
1330
1838
|
let inFence = false;
|
|
@@ -1340,6 +1848,97 @@ function hasTopLevelNoindexComment(markdown) {
|
|
|
1340
1848
|
}
|
|
1341
1849
|
return false;
|
|
1342
1850
|
}
|
|
1851
|
+
var GARBAGE_ALT_WORDS = /* @__PURE__ */ new Set([
|
|
1852
|
+
"image",
|
|
1853
|
+
"photo",
|
|
1854
|
+
"picture",
|
|
1855
|
+
"icon",
|
|
1856
|
+
"logo",
|
|
1857
|
+
"banner",
|
|
1858
|
+
"screenshot",
|
|
1859
|
+
"thumbnail",
|
|
1860
|
+
"img",
|
|
1861
|
+
"graphic",
|
|
1862
|
+
"illustration",
|
|
1863
|
+
"spacer",
|
|
1864
|
+
"pixel",
|
|
1865
|
+
"placeholder",
|
|
1866
|
+
"avatar",
|
|
1867
|
+
"background"
|
|
1868
|
+
]);
|
|
1869
|
+
var IMAGE_EXT_RE = /\.(jpg|jpeg|png|gif|svg|webp|avif|bmp|ico)(\?.*)?$/i;
|
|
1870
|
+
function isMeaningfulAlt(alt) {
|
|
1871
|
+
const trimmed = alt.trim();
|
|
1872
|
+
if (!trimmed || trimmed.length < 5) return false;
|
|
1873
|
+
if (IMAGE_EXT_RE.test(trimmed)) return false;
|
|
1874
|
+
if (GARBAGE_ALT_WORDS.has(trimmed.toLowerCase())) return false;
|
|
1875
|
+
return true;
|
|
1876
|
+
}
|
|
1877
|
+
function resolveImageText(img, $, imageDescAttr) {
|
|
1878
|
+
const imgDesc = img.attr(imageDescAttr)?.trim();
|
|
1879
|
+
if (imgDesc) return imgDesc;
|
|
1880
|
+
const figure = img.closest("figure");
|
|
1881
|
+
if (figure.length) {
|
|
1882
|
+
const figDesc = figure.attr(imageDescAttr)?.trim();
|
|
1883
|
+
if (figDesc) return figDesc;
|
|
1884
|
+
}
|
|
1885
|
+
const alt = img.attr("alt")?.trim() ?? "";
|
|
1886
|
+
const caption = figure.length ? figure.find("figcaption").first().text().trim() : "";
|
|
1887
|
+
if (isMeaningfulAlt(alt) && caption) {
|
|
1888
|
+
return `${alt} \u2014 ${caption}`;
|
|
1889
|
+
}
|
|
1890
|
+
if (isMeaningfulAlt(alt)) {
|
|
1891
|
+
return alt;
|
|
1892
|
+
}
|
|
1893
|
+
if (caption) {
|
|
1894
|
+
return caption;
|
|
1895
|
+
}
|
|
1896
|
+
return null;
|
|
1897
|
+
}
|
|
1898
|
+
var STOP_ANCHORS = /* @__PURE__ */ new Set([
|
|
1899
|
+
"here",
|
|
1900
|
+
"click",
|
|
1901
|
+
"click here",
|
|
1902
|
+
"read more",
|
|
1903
|
+
"link",
|
|
1904
|
+
"this",
|
|
1905
|
+
"more"
|
|
1906
|
+
]);
|
|
1907
|
+
function normalizeAnchorText(raw) {
|
|
1908
|
+
const normalized = raw.replace(/\s+/g, " ").trim().toLowerCase();
|
|
1909
|
+
if (normalized.length < 3) return "";
|
|
1910
|
+
if (STOP_ANCHORS.has(normalized)) return "";
|
|
1911
|
+
if (normalized.length > 100) return normalized.slice(0, 100);
|
|
1912
|
+
return normalized;
|
|
1913
|
+
}
|
|
1914
|
+
function escapeHtml(text2) {
|
|
1915
|
+
return text2.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">");
|
|
1916
|
+
}
|
|
1917
|
+
function preprocessImages(root, $, imageDescAttr) {
|
|
1918
|
+
root.find("picture").each((_i, el) => {
|
|
1919
|
+
const picture = $(el);
|
|
1920
|
+
const img = picture.find("img").first();
|
|
1921
|
+
const parentFigure = picture.closest("figure");
|
|
1922
|
+
const text2 = img.length ? resolveImageText(img, $, imageDescAttr) : null;
|
|
1923
|
+
if (text2) {
|
|
1924
|
+
if (parentFigure.length) parentFigure.find("figcaption").remove();
|
|
1925
|
+
picture.replaceWith(`<span>${escapeHtml(text2)}</span>`);
|
|
1926
|
+
} else {
|
|
1927
|
+
picture.remove();
|
|
1928
|
+
}
|
|
1929
|
+
});
|
|
1930
|
+
root.find("img").each((_i, el) => {
|
|
1931
|
+
const img = $(el);
|
|
1932
|
+
const parentFigure = img.closest("figure");
|
|
1933
|
+
const text2 = resolveImageText(img, $, imageDescAttr);
|
|
1934
|
+
if (text2) {
|
|
1935
|
+
if (parentFigure.length) parentFigure.find("figcaption").remove();
|
|
1936
|
+
img.replaceWith(`<span>${escapeHtml(text2)}</span>`);
|
|
1937
|
+
} else {
|
|
1938
|
+
img.remove();
|
|
1939
|
+
}
|
|
1940
|
+
});
|
|
1941
|
+
}
|
|
1343
1942
|
function extractFromHtml(url, html, config) {
|
|
1344
1943
|
const $ = load(html);
|
|
1345
1944
|
const normalizedUrl = normalizeUrlPath(url);
|
|
@@ -1365,6 +1964,20 @@ function extractFromHtml(url, html, config) {
|
|
|
1365
1964
|
if (weight === 0) {
|
|
1366
1965
|
return null;
|
|
1367
1966
|
}
|
|
1967
|
+
if ($('meta[name="searchsocket:noindex"]').attr("content") === "true") {
|
|
1968
|
+
return null;
|
|
1969
|
+
}
|
|
1970
|
+
const RESERVED_META_KEYS = /* @__PURE__ */ new Set(["noindex", "tags"]);
|
|
1971
|
+
const meta = {};
|
|
1972
|
+
$('meta[name^="searchsocket:"]').each((_i, el) => {
|
|
1973
|
+
const name = $(el).attr("name") ?? "";
|
|
1974
|
+
const key = name.slice("searchsocket:".length);
|
|
1975
|
+
if (!key || RESERVED_META_KEYS.has(key) || !validateMetaKey(key)) return;
|
|
1976
|
+
const content = $(el).attr("content") ?? "";
|
|
1977
|
+
const dataType = $(el).attr("data-type") ?? "string";
|
|
1978
|
+
meta[key] = parseMetaValue(content, dataType);
|
|
1979
|
+
});
|
|
1980
|
+
const componentTags = $('meta[name="searchsocket:tags"]').attr("content")?.trim();
|
|
1368
1981
|
const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
|
|
1369
1982
|
const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
|
|
1370
1983
|
const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
|
|
@@ -1376,7 +1989,9 @@ function extractFromHtml(url, html, config) {
|
|
|
1376
1989
|
root.find(selector).remove();
|
|
1377
1990
|
}
|
|
1378
1991
|
root.find(`[${config.extract.ignoreAttr}]`).remove();
|
|
1992
|
+
preprocessImages(root, $, config.extract.imageDescAttr);
|
|
1379
1993
|
const outgoingLinks = [];
|
|
1994
|
+
const seenLinkKeys = /* @__PURE__ */ new Set();
|
|
1380
1995
|
root.find("a[href]").each((_index, node) => {
|
|
1381
1996
|
const href = $(node).attr("href");
|
|
1382
1997
|
if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:")) {
|
|
@@ -1387,7 +2002,19 @@ function extractFromHtml(url, html, config) {
|
|
|
1387
2002
|
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
1388
2003
|
return;
|
|
1389
2004
|
}
|
|
1390
|
-
|
|
2005
|
+
const url2 = normalizeUrlPath(parsed.pathname);
|
|
2006
|
+
let anchorText = normalizeAnchorText($(node).text());
|
|
2007
|
+
if (!anchorText) {
|
|
2008
|
+
const imgAlt = $(node).find("img").first().attr("alt") ?? "";
|
|
2009
|
+
if (isMeaningfulAlt(imgAlt)) {
|
|
2010
|
+
anchorText = normalizeAnchorText(imgAlt);
|
|
2011
|
+
}
|
|
2012
|
+
}
|
|
2013
|
+
const key = `${url2}|${anchorText}`;
|
|
2014
|
+
if (!seenLinkKeys.has(key)) {
|
|
2015
|
+
seenLinkKeys.add(key);
|
|
2016
|
+
outgoingLinks.push({ url: url2, anchorText });
|
|
2017
|
+
}
|
|
1391
2018
|
} catch {
|
|
1392
2019
|
}
|
|
1393
2020
|
});
|
|
@@ -1412,16 +2039,25 @@ function extractFromHtml(url, html, config) {
|
|
|
1412
2039
|
return null;
|
|
1413
2040
|
}
|
|
1414
2041
|
const tags = normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1);
|
|
2042
|
+
const publishedAt = extractPublishedAtFromHtml($);
|
|
2043
|
+
if (componentTags) {
|
|
2044
|
+
const extraTags = componentTags.split(",").map((t) => t.trim()).filter(Boolean);
|
|
2045
|
+
for (const t of extraTags) {
|
|
2046
|
+
if (!tags.includes(t)) tags.push(t);
|
|
2047
|
+
}
|
|
2048
|
+
}
|
|
1415
2049
|
return {
|
|
1416
2050
|
url: normalizeUrlPath(url),
|
|
1417
2051
|
title,
|
|
1418
2052
|
markdown,
|
|
1419
|
-
outgoingLinks
|
|
2053
|
+
outgoingLinks,
|
|
1420
2054
|
noindex: false,
|
|
1421
2055
|
tags,
|
|
1422
2056
|
description,
|
|
1423
2057
|
keywords,
|
|
1424
|
-
weight
|
|
2058
|
+
weight,
|
|
2059
|
+
publishedAt,
|
|
2060
|
+
meta: Object.keys(meta).length > 0 ? meta : void 0
|
|
1425
2061
|
};
|
|
1426
2062
|
}
|
|
1427
2063
|
function extractFromMarkdown(url, markdown, title) {
|
|
@@ -1442,6 +2078,24 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
1442
2078
|
if (mdWeight === 0) {
|
|
1443
2079
|
return null;
|
|
1444
2080
|
}
|
|
2081
|
+
let mdMeta;
|
|
2082
|
+
const rawMeta = searchsocketMeta?.meta;
|
|
2083
|
+
if (rawMeta && typeof rawMeta === "object" && !Array.isArray(rawMeta)) {
|
|
2084
|
+
const metaObj = {};
|
|
2085
|
+
for (const [key, val] of Object.entries(rawMeta)) {
|
|
2086
|
+
if (!validateMetaKey(key)) continue;
|
|
2087
|
+
if (typeof val === "string" || typeof val === "number" || typeof val === "boolean") {
|
|
2088
|
+
metaObj[key] = val;
|
|
2089
|
+
} else if (Array.isArray(val) && val.every((v) => typeof v === "string")) {
|
|
2090
|
+
metaObj[key] = val;
|
|
2091
|
+
} else if (val instanceof Date) {
|
|
2092
|
+
metaObj[key] = val.getTime();
|
|
2093
|
+
}
|
|
2094
|
+
}
|
|
2095
|
+
if (Object.keys(metaObj).length > 0) {
|
|
2096
|
+
mdMeta = metaObj;
|
|
2097
|
+
}
|
|
2098
|
+
}
|
|
1445
2099
|
const content = parsed.content;
|
|
1446
2100
|
const normalized = normalizeMarkdown(content);
|
|
1447
2101
|
if (!normalizeText(normalized)) {
|
|
@@ -1456,6 +2110,7 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
1456
2110
|
fmKeywords = frontmatter.keywords.split(",").map((k) => k.trim()).filter(Boolean);
|
|
1457
2111
|
}
|
|
1458
2112
|
if (fmKeywords && fmKeywords.length === 0) fmKeywords = void 0;
|
|
2113
|
+
const publishedAt = extractPublishedAtFromFrontmatter(frontmatter);
|
|
1459
2114
|
return {
|
|
1460
2115
|
url: normalizeUrlPath(url),
|
|
1461
2116
|
title: resolvedTitle,
|
|
@@ -1465,7 +2120,9 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
1465
2120
|
tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
|
|
1466
2121
|
description: fmDescription,
|
|
1467
2122
|
keywords: fmKeywords,
|
|
1468
|
-
weight: mdWeight
|
|
2123
|
+
weight: mdWeight,
|
|
2124
|
+
publishedAt,
|
|
2125
|
+
meta: mdMeta
|
|
1469
2126
|
};
|
|
1470
2127
|
}
|
|
1471
2128
|
|
|
@@ -1919,6 +2576,125 @@ function filePathToUrl(filePath, baseDir) {
|
|
|
1919
2576
|
const noExt = relative.replace(/\.md$/i, "").replace(/\/index$/i, "");
|
|
1920
2577
|
return normalizeUrlPath(noExt || "/");
|
|
1921
2578
|
}
|
|
2579
|
+
var ROUTE_FILE_RE = /\+(page|layout|error)(@[^.]+)?\.svelte$/;
|
|
2580
|
+
function isSvelteComponentFile(filePath) {
|
|
2581
|
+
if (!filePath.endsWith(".svelte")) return false;
|
|
2582
|
+
return !ROUTE_FILE_RE.test(filePath);
|
|
2583
|
+
}
|
|
2584
|
+
function extractSvelteComponentMeta(source) {
|
|
2585
|
+
const componentMatch = source.match(/<!--\s*@component\s*([\s\S]*?)\s*-->/);
|
|
2586
|
+
const description = componentMatch?.[1]?.trim() || void 0;
|
|
2587
|
+
const propsMatch = source.match(
|
|
2588
|
+
/let\s+\{([\s\S]*?)\}\s*(?::\s*([^=;{][\s\S]*?))?\s*=\s*\$props\(\)/
|
|
2589
|
+
);
|
|
2590
|
+
const props = [];
|
|
2591
|
+
if (propsMatch) {
|
|
2592
|
+
const destructureBlock = propsMatch[1];
|
|
2593
|
+
const typeAnnotation = propsMatch[2]?.trim();
|
|
2594
|
+
let resolvedTypeMap;
|
|
2595
|
+
if (typeAnnotation && /^[A-Z]\w*$/.test(typeAnnotation)) {
|
|
2596
|
+
resolvedTypeMap = resolveTypeReference(source, typeAnnotation);
|
|
2597
|
+
} else if (typeAnnotation && typeAnnotation.startsWith("{")) {
|
|
2598
|
+
resolvedTypeMap = parseInlineTypeAnnotation(typeAnnotation);
|
|
2599
|
+
}
|
|
2600
|
+
const propEntries = splitDestructureBlock(destructureBlock);
|
|
2601
|
+
for (const entry of propEntries) {
|
|
2602
|
+
const trimmed = entry.trim();
|
|
2603
|
+
if (!trimmed || trimmed.startsWith("...")) continue;
|
|
2604
|
+
let propName;
|
|
2605
|
+
let defaultValue;
|
|
2606
|
+
const renameMatch = trimmed.match(/^(\w+)\s*:\s*\w+\s*(?:=\s*([\s\S]+))?$/);
|
|
2607
|
+
if (renameMatch) {
|
|
2608
|
+
propName = renameMatch[1];
|
|
2609
|
+
defaultValue = renameMatch[2]?.trim();
|
|
2610
|
+
} else {
|
|
2611
|
+
const defaultMatch = trimmed.match(/^(\w+)\s*=\s*([\s\S]+)$/);
|
|
2612
|
+
if (defaultMatch) {
|
|
2613
|
+
propName = defaultMatch[1];
|
|
2614
|
+
defaultValue = defaultMatch[2]?.trim();
|
|
2615
|
+
} else {
|
|
2616
|
+
propName = trimmed.match(/^(\w+)/)?.[1] ?? trimmed;
|
|
2617
|
+
}
|
|
2618
|
+
}
|
|
2619
|
+
const propType = resolvedTypeMap?.get(propName);
|
|
2620
|
+
props.push({
|
|
2621
|
+
name: propName,
|
|
2622
|
+
...propType ? { type: propType } : {},
|
|
2623
|
+
...defaultValue ? { default: defaultValue } : {}
|
|
2624
|
+
});
|
|
2625
|
+
}
|
|
2626
|
+
}
|
|
2627
|
+
return { description, props };
|
|
2628
|
+
}
|
|
2629
|
+
function splitDestructureBlock(block) {
|
|
2630
|
+
const entries = [];
|
|
2631
|
+
let depth = 0;
|
|
2632
|
+
let current = "";
|
|
2633
|
+
for (const ch of block) {
|
|
2634
|
+
if (ch === "{" || ch === "[" || ch === "(") {
|
|
2635
|
+
depth++;
|
|
2636
|
+
current += ch;
|
|
2637
|
+
} else if (ch === "}" || ch === "]" || ch === ")") {
|
|
2638
|
+
depth--;
|
|
2639
|
+
current += ch;
|
|
2640
|
+
} else if (ch === "," && depth === 0) {
|
|
2641
|
+
entries.push(current);
|
|
2642
|
+
current = "";
|
|
2643
|
+
} else {
|
|
2644
|
+
current += ch;
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
if (current.trim()) entries.push(current);
|
|
2648
|
+
return entries;
|
|
2649
|
+
}
|
|
2650
|
+
function resolveTypeReference(source, typeName) {
|
|
2651
|
+
const startRe = new RegExp(`(?:interface\\s+${typeName}\\s*|type\\s+${typeName}\\s*=\\s*)\\{`);
|
|
2652
|
+
const startMatch = source.match(startRe);
|
|
2653
|
+
if (!startMatch || startMatch.index === void 0) return void 0;
|
|
2654
|
+
const bodyStart = startMatch.index + startMatch[0].length;
|
|
2655
|
+
let depth = 1;
|
|
2656
|
+
let i = bodyStart;
|
|
2657
|
+
while (i < source.length && depth > 0) {
|
|
2658
|
+
if (source[i] === "{") depth++;
|
|
2659
|
+
else if (source[i] === "}") depth--;
|
|
2660
|
+
i++;
|
|
2661
|
+
}
|
|
2662
|
+
if (depth !== 0) return void 0;
|
|
2663
|
+
const body = source.slice(bodyStart, i - 1);
|
|
2664
|
+
return parseTypeMembers(body);
|
|
2665
|
+
}
|
|
2666
|
+
function parseInlineTypeAnnotation(annotation) {
|
|
2667
|
+
const inner = annotation.replace(/^\{/, "").replace(/\}$/, "");
|
|
2668
|
+
return parseTypeMembers(inner);
|
|
2669
|
+
}
|
|
2670
|
+
function parseTypeMembers(body) {
|
|
2671
|
+
const map = /* @__PURE__ */ new Map();
|
|
2672
|
+
const members = body.split(/[;\n]/).map((m) => m.trim()).filter(Boolean);
|
|
2673
|
+
for (const member of members) {
|
|
2674
|
+
const memberMatch = member.match(/^(\w+)\??\s*:\s*(.+)$/);
|
|
2675
|
+
if (memberMatch) {
|
|
2676
|
+
map.set(memberMatch[1], memberMatch[2].replace(/,\s*$/, "").trim());
|
|
2677
|
+
}
|
|
2678
|
+
}
|
|
2679
|
+
return map;
|
|
2680
|
+
}
|
|
2681
|
+
function buildComponentMarkdown(componentName, meta) {
|
|
2682
|
+
if (!meta.description && meta.props.length === 0) return "";
|
|
2683
|
+
const parts = [`${componentName} component.`];
|
|
2684
|
+
if (meta.description) {
|
|
2685
|
+
parts.push(meta.description);
|
|
2686
|
+
}
|
|
2687
|
+
if (meta.props.length > 0) {
|
|
2688
|
+
const propEntries = meta.props.map((p) => {
|
|
2689
|
+
let entry = p.name;
|
|
2690
|
+
if (p.type) entry += ` (${p.type})`;
|
|
2691
|
+
if (p.default) entry += ` default: ${p.default}`;
|
|
2692
|
+
return entry;
|
|
2693
|
+
});
|
|
2694
|
+
parts.push(`Props: ${propEntries.join(", ")}.`);
|
|
2695
|
+
}
|
|
2696
|
+
return parts.join(" ");
|
|
2697
|
+
}
|
|
1922
2698
|
function normalizeSvelteToMarkdown(source) {
|
|
1923
2699
|
return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
|
|
1924
2700
|
}
|
|
@@ -1938,12 +2714,26 @@ async function loadContentFilesPages(cwd, config, maxPages) {
|
|
|
1938
2714
|
const pages = [];
|
|
1939
2715
|
for (const filePath of selected) {
|
|
1940
2716
|
const raw = await fs5.readFile(filePath, "utf8");
|
|
1941
|
-
|
|
2717
|
+
let markdown;
|
|
2718
|
+
let tags;
|
|
2719
|
+
if (filePath.endsWith(".md")) {
|
|
2720
|
+
markdown = raw;
|
|
2721
|
+
} else if (isSvelteComponentFile(filePath)) {
|
|
2722
|
+
const componentName = path7.basename(filePath, ".svelte");
|
|
2723
|
+
const meta = extractSvelteComponentMeta(raw);
|
|
2724
|
+
const componentMarkdown = buildComponentMarkdown(componentName, meta);
|
|
2725
|
+
const templateContent = normalizeSvelteToMarkdown(raw);
|
|
2726
|
+
markdown = componentMarkdown ? [componentMarkdown, templateContent].filter(Boolean).join("\n\n") : templateContent;
|
|
2727
|
+
tags = ["component"];
|
|
2728
|
+
} else {
|
|
2729
|
+
markdown = normalizeSvelteToMarkdown(raw);
|
|
2730
|
+
}
|
|
1942
2731
|
pages.push({
|
|
1943
2732
|
url: filePathToUrl(filePath, baseDir),
|
|
1944
2733
|
markdown,
|
|
1945
2734
|
sourcePath: path7.relative(cwd, filePath).replace(/\\/g, "/"),
|
|
1946
|
-
outgoingLinks: []
|
|
2735
|
+
outgoingLinks: [],
|
|
2736
|
+
...tags ? { tags } : {}
|
|
1947
2737
|
});
|
|
1948
2738
|
}
|
|
1949
2739
|
return pages;
|
|
@@ -1958,9 +2748,9 @@ function extractLocs(xml) {
|
|
|
1958
2748
|
const $ = cheerioLoad2(xml, { xmlMode: true });
|
|
1959
2749
|
const locs = [];
|
|
1960
2750
|
$("loc").each((_i, el) => {
|
|
1961
|
-
const
|
|
1962
|
-
if (
|
|
1963
|
-
locs.push(
|
|
2751
|
+
const text2 = $(el).text().trim();
|
|
2752
|
+
if (text2) {
|
|
2753
|
+
locs.push(text2);
|
|
1964
2754
|
}
|
|
1965
2755
|
});
|
|
1966
2756
|
return locs;
|
|
@@ -2175,32 +2965,68 @@ function nonNegativeOrZero(value) {
|
|
|
2175
2965
|
}
|
|
2176
2966
|
return Math.max(0, value);
|
|
2177
2967
|
}
|
|
2178
|
-
function normalizeForTitleMatch(
|
|
2179
|
-
return
|
|
2968
|
+
function normalizeForTitleMatch(text2) {
|
|
2969
|
+
return text2.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
|
|
2180
2970
|
}
|
|
2181
|
-
function rankHits(hits, config, query) {
|
|
2971
|
+
function rankHits(hits, config, query, debug) {
|
|
2182
2972
|
const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
|
|
2183
2973
|
const titleMatchWeight = config.ranking.weights.titleMatch;
|
|
2184
2974
|
return hits.map((hit) => {
|
|
2185
|
-
|
|
2975
|
+
const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
|
|
2976
|
+
let score = baseScore;
|
|
2977
|
+
let incomingLinkBoostValue = 0;
|
|
2186
2978
|
if (config.ranking.enableIncomingLinkBoost) {
|
|
2187
2979
|
const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
|
|
2188
|
-
|
|
2980
|
+
incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
|
|
2981
|
+
score += incomingLinkBoostValue;
|
|
2189
2982
|
}
|
|
2983
|
+
let depthBoostValue = 0;
|
|
2190
2984
|
if (config.ranking.enableDepthBoost) {
|
|
2191
2985
|
const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
|
|
2192
|
-
|
|
2986
|
+
depthBoostValue = depthBoost * config.ranking.weights.depth;
|
|
2987
|
+
score += depthBoostValue;
|
|
2193
2988
|
}
|
|
2989
|
+
let titleMatchBoostValue = 0;
|
|
2194
2990
|
if (normalizedQuery && titleMatchWeight > 0) {
|
|
2195
2991
|
const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
|
|
2196
2992
|
if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
|
|
2197
|
-
|
|
2993
|
+
titleMatchBoostValue = titleMatchWeight;
|
|
2994
|
+
score += titleMatchBoostValue;
|
|
2198
2995
|
}
|
|
2199
2996
|
}
|
|
2200
|
-
|
|
2997
|
+
let freshnessBoostValue = 0;
|
|
2998
|
+
if (config.ranking.enableFreshnessBoost) {
|
|
2999
|
+
const publishedAt = hit.metadata.publishedAt;
|
|
3000
|
+
if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
|
|
3001
|
+
const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
|
|
3002
|
+
const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
|
|
3003
|
+
freshnessBoostValue = decay * config.ranking.weights.freshness;
|
|
3004
|
+
score += freshnessBoostValue;
|
|
3005
|
+
}
|
|
3006
|
+
}
|
|
3007
|
+
let anchorTextMatchBoostValue = 0;
|
|
3008
|
+
if (config.ranking.enableAnchorTextBoost && normalizedQuery && config.ranking.weights.anchorText > 0) {
|
|
3009
|
+
const normalizedAnchorText = normalizeForTitleMatch(hit.metadata.incomingAnchorText ?? "");
|
|
3010
|
+
if (normalizedAnchorText.length > 0 && normalizedQuery.length > 0 && (normalizedAnchorText.includes(normalizedQuery) || normalizedQuery.includes(normalizedAnchorText))) {
|
|
3011
|
+
anchorTextMatchBoostValue = config.ranking.weights.anchorText;
|
|
3012
|
+
score += anchorTextMatchBoostValue;
|
|
3013
|
+
}
|
|
3014
|
+
}
|
|
3015
|
+
const result = {
|
|
2201
3016
|
hit,
|
|
2202
3017
|
finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
|
|
2203
3018
|
};
|
|
3019
|
+
if (debug) {
|
|
3020
|
+
result.breakdown = {
|
|
3021
|
+
baseScore,
|
|
3022
|
+
incomingLinkBoost: incomingLinkBoostValue,
|
|
3023
|
+
depthBoost: depthBoostValue,
|
|
3024
|
+
titleMatchBoost: titleMatchBoostValue,
|
|
3025
|
+
freshnessBoost: freshnessBoostValue,
|
|
3026
|
+
anchorTextMatchBoost: anchorTextMatchBoostValue
|
|
3027
|
+
};
|
|
3028
|
+
}
|
|
3029
|
+
return result;
|
|
2204
3030
|
}).sort((a, b) => {
|
|
2205
3031
|
const delta = b.finalScore - a.finalScore;
|
|
2206
3032
|
return Number.isNaN(delta) ? 0 : delta;
|
|
@@ -2209,12 +3035,13 @@ function rankHits(hits, config, query) {
|
|
|
2209
3035
|
function trimByScoreGap(results, config) {
|
|
2210
3036
|
if (results.length === 0) return results;
|
|
2211
3037
|
const threshold = config.ranking.scoreGapThreshold;
|
|
2212
|
-
const
|
|
2213
|
-
if (
|
|
2214
|
-
const
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
3038
|
+
const minScoreRatio = config.ranking.minScoreRatio;
|
|
3039
|
+
if (minScoreRatio > 0 && results.length > 0) {
|
|
3040
|
+
const topScore = results[0].pageScore;
|
|
3041
|
+
if (Number.isFinite(topScore) && topScore > 0) {
|
|
3042
|
+
const minThreshold = topScore * minScoreRatio;
|
|
3043
|
+
results = results.filter((r) => r.pageScore >= minThreshold);
|
|
3044
|
+
}
|
|
2218
3045
|
}
|
|
2219
3046
|
if (threshold > 0 && results.length > 1) {
|
|
2220
3047
|
for (let i = 1; i < results.length; i++) {
|
|
@@ -2284,61 +3111,99 @@ function aggregateByPage(ranked, config) {
|
|
|
2284
3111
|
return Number.isNaN(delta) ? 0 : delta;
|
|
2285
3112
|
});
|
|
2286
3113
|
}
|
|
2287
|
-
function
|
|
2288
|
-
|
|
2289
|
-
const
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
if (pageHit) {
|
|
2299
|
-
pagesWithChunks.add(url);
|
|
2300
|
-
const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
|
|
2301
|
-
return {
|
|
2302
|
-
hit: ranked.hit,
|
|
2303
|
-
finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
|
|
2304
|
-
};
|
|
3114
|
+
function rankPageHits(pageHits, config, query, debug) {
|
|
3115
|
+
const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
|
|
3116
|
+
const titleMatchWeight = config.ranking.weights.titleMatch;
|
|
3117
|
+
return pageHits.map((hit) => {
|
|
3118
|
+
const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
|
|
3119
|
+
let score = baseScore;
|
|
3120
|
+
let incomingLinkBoostValue = 0;
|
|
3121
|
+
if (config.ranking.enableIncomingLinkBoost) {
|
|
3122
|
+
const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.incomingLinks));
|
|
3123
|
+
incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
|
|
3124
|
+
score += incomingLinkBoostValue;
|
|
2305
3125
|
}
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
path: pageHit.url,
|
|
2319
|
-
title: pageHit.title,
|
|
2320
|
-
sectionTitle: "",
|
|
2321
|
-
headingPath: [],
|
|
2322
|
-
snippet: pageHit.description || pageHit.title,
|
|
2323
|
-
chunkText: pageHit.description || pageHit.title,
|
|
2324
|
-
ordinal: 0,
|
|
2325
|
-
contentHash: "",
|
|
2326
|
-
depth: pageHit.depth,
|
|
2327
|
-
incomingLinks: pageHit.incomingLinks,
|
|
2328
|
-
routeFile: pageHit.routeFile,
|
|
2329
|
-
tags: pageHit.tags
|
|
3126
|
+
let depthBoostValue = 0;
|
|
3127
|
+
if (config.ranking.enableDepthBoost) {
|
|
3128
|
+
const depthBoost = 1 / (1 + nonNegativeOrZero(hit.depth));
|
|
3129
|
+
depthBoostValue = depthBoost * config.ranking.weights.depth;
|
|
3130
|
+
score += depthBoostValue;
|
|
3131
|
+
}
|
|
3132
|
+
let titleMatchBoostValue = 0;
|
|
3133
|
+
if (normalizedQuery && titleMatchWeight > 0) {
|
|
3134
|
+
const normalizedTitle = normalizeForTitleMatch(hit.title);
|
|
3135
|
+
if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
|
|
3136
|
+
titleMatchBoostValue = titleMatchWeight;
|
|
3137
|
+
score += titleMatchBoostValue;
|
|
2330
3138
|
}
|
|
2331
|
-
}
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
3139
|
+
}
|
|
3140
|
+
let freshnessBoostValue = 0;
|
|
3141
|
+
if (config.ranking.enableFreshnessBoost) {
|
|
3142
|
+
const publishedAt = hit.publishedAt;
|
|
3143
|
+
if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
|
|
3144
|
+
const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
|
|
3145
|
+
const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
|
|
3146
|
+
freshnessBoostValue = decay * config.ranking.weights.freshness;
|
|
3147
|
+
score += freshnessBoostValue;
|
|
3148
|
+
}
|
|
3149
|
+
}
|
|
3150
|
+
const pageWeight = findPageWeight(hit.url, config.ranking.pageWeights);
|
|
3151
|
+
if (pageWeight !== 1) {
|
|
3152
|
+
score *= pageWeight;
|
|
3153
|
+
}
|
|
3154
|
+
const result = {
|
|
3155
|
+
url: hit.url,
|
|
3156
|
+
title: hit.title,
|
|
3157
|
+
description: hit.description,
|
|
3158
|
+
routeFile: hit.routeFile,
|
|
3159
|
+
depth: hit.depth,
|
|
3160
|
+
incomingLinks: hit.incomingLinks,
|
|
3161
|
+
tags: hit.tags,
|
|
3162
|
+
baseScore,
|
|
3163
|
+
finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY,
|
|
3164
|
+
publishedAt: hit.publishedAt
|
|
3165
|
+
};
|
|
3166
|
+
if (debug) {
|
|
3167
|
+
result.breakdown = {
|
|
3168
|
+
baseScore,
|
|
3169
|
+
pageWeight,
|
|
3170
|
+
incomingLinkBoost: incomingLinkBoostValue,
|
|
3171
|
+
depthBoost: depthBoostValue,
|
|
3172
|
+
titleMatchBoost: titleMatchBoostValue,
|
|
3173
|
+
freshnessBoost: freshnessBoostValue
|
|
3174
|
+
};
|
|
3175
|
+
}
|
|
3176
|
+
return result;
|
|
3177
|
+
}).filter((p) => findPageWeight(p.url, config.ranking.pageWeights) !== 0).sort((a, b) => {
|
|
2338
3178
|
const delta = b.finalScore - a.finalScore;
|
|
2339
3179
|
return Number.isNaN(delta) ? 0 : delta;
|
|
2340
3180
|
});
|
|
2341
3181
|
}
|
|
3182
|
+
function trimPagesByScoreGap(results, config) {
|
|
3183
|
+
if (results.length === 0) return results;
|
|
3184
|
+
const threshold = config.ranking.scoreGapThreshold;
|
|
3185
|
+
const minScoreRatio = config.ranking.minScoreRatio;
|
|
3186
|
+
if (minScoreRatio > 0 && results.length > 0) {
|
|
3187
|
+
const topScore = results[0].finalScore;
|
|
3188
|
+
if (Number.isFinite(topScore) && topScore > 0) {
|
|
3189
|
+
const minThreshold = topScore * minScoreRatio;
|
|
3190
|
+
results = results.filter((r) => r.finalScore >= minThreshold);
|
|
3191
|
+
}
|
|
3192
|
+
}
|
|
3193
|
+
if (threshold > 0 && results.length > 1) {
|
|
3194
|
+
for (let i = 1; i < results.length; i++) {
|
|
3195
|
+
const prev = results[i - 1].finalScore;
|
|
3196
|
+
const current = results[i].finalScore;
|
|
3197
|
+
if (prev > 0) {
|
|
3198
|
+
const gap = (prev - current) / prev;
|
|
3199
|
+
if (gap >= threshold) {
|
|
3200
|
+
return results.slice(0, i);
|
|
3201
|
+
}
|
|
3202
|
+
}
|
|
3203
|
+
}
|
|
3204
|
+
}
|
|
3205
|
+
return results;
|
|
3206
|
+
}
|
|
2342
3207
|
|
|
2343
3208
|
// src/utils/time.ts
|
|
2344
3209
|
function nowIso() {
|
|
@@ -2348,6 +3213,85 @@ function hrTimeMs(start) {
|
|
|
2348
3213
|
return Number(process.hrtime.bigint() - start) / 1e6;
|
|
2349
3214
|
}
|
|
2350
3215
|
|
|
3216
|
+
// src/indexing/llms-txt.ts
|
|
3217
|
+
import fs8 from "fs/promises";
|
|
3218
|
+
import path10 from "path";
|
|
3219
|
+
function resolvePageUrl(pageUrl, baseUrl) {
|
|
3220
|
+
if (!baseUrl) return pageUrl;
|
|
3221
|
+
try {
|
|
3222
|
+
return new URL(pageUrl, baseUrl).href;
|
|
3223
|
+
} catch {
|
|
3224
|
+
return pageUrl;
|
|
3225
|
+
}
|
|
3226
|
+
}
|
|
3227
|
+
function generateLlmsTxt(pages, config) {
|
|
3228
|
+
const title = config.llmsTxt.title ?? config.project.id;
|
|
3229
|
+
const description = config.llmsTxt.description;
|
|
3230
|
+
const baseUrl = config.project.baseUrl;
|
|
3231
|
+
const lines = [`# ${title}`];
|
|
3232
|
+
if (description) {
|
|
3233
|
+
lines.push("", `> ${description}`);
|
|
3234
|
+
}
|
|
3235
|
+
const filtered = pages.filter(
|
|
3236
|
+
(p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
|
|
3237
|
+
);
|
|
3238
|
+
const sorted = [...filtered].sort((a, b) => {
|
|
3239
|
+
if (a.depth !== b.depth) return a.depth - b.depth;
|
|
3240
|
+
return b.incomingLinks - a.incomingLinks;
|
|
3241
|
+
});
|
|
3242
|
+
if (sorted.length > 0) {
|
|
3243
|
+
lines.push("", "## Pages", "");
|
|
3244
|
+
for (const page of sorted) {
|
|
3245
|
+
const url = resolvePageUrl(page.url, baseUrl);
|
|
3246
|
+
if (page.description) {
|
|
3247
|
+
lines.push(`- [${page.title}](${url}): ${page.description}`);
|
|
3248
|
+
} else {
|
|
3249
|
+
lines.push(`- [${page.title}](${url})`);
|
|
3250
|
+
}
|
|
3251
|
+
}
|
|
3252
|
+
}
|
|
3253
|
+
lines.push("");
|
|
3254
|
+
return lines.join("\n");
|
|
3255
|
+
}
|
|
3256
|
+
function generateLlmsFullTxt(pages, config) {
|
|
3257
|
+
const title = config.llmsTxt.title ?? config.project.id;
|
|
3258
|
+
const description = config.llmsTxt.description;
|
|
3259
|
+
const baseUrl = config.project.baseUrl;
|
|
3260
|
+
const lines = [`# ${title}`];
|
|
3261
|
+
if (description) {
|
|
3262
|
+
lines.push("", `> ${description}`);
|
|
3263
|
+
}
|
|
3264
|
+
const filtered = pages.filter(
|
|
3265
|
+
(p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
|
|
3266
|
+
);
|
|
3267
|
+
const sorted = [...filtered].sort((a, b) => {
|
|
3268
|
+
if (a.depth !== b.depth) return a.depth - b.depth;
|
|
3269
|
+
return b.incomingLinks - a.incomingLinks;
|
|
3270
|
+
});
|
|
3271
|
+
for (const page of sorted) {
|
|
3272
|
+
const url = resolvePageUrl(page.url, baseUrl);
|
|
3273
|
+
lines.push("", "---", "", `## [${page.title}](${url})`, "");
|
|
3274
|
+
lines.push(page.markdown.trim());
|
|
3275
|
+
}
|
|
3276
|
+
lines.push("");
|
|
3277
|
+
return lines.join("\n");
|
|
3278
|
+
}
|
|
3279
|
+
async function writeLlmsTxt(pages, config, cwd, logger3) {
|
|
3280
|
+
const outputPath = path10.resolve(cwd, config.llmsTxt.outputPath);
|
|
3281
|
+
const outputDir = path10.dirname(outputPath);
|
|
3282
|
+
await fs8.mkdir(outputDir, { recursive: true });
|
|
3283
|
+
const content = generateLlmsTxt(pages, config);
|
|
3284
|
+
await fs8.writeFile(outputPath, content, "utf8");
|
|
3285
|
+
logger3.info(`Generated llms.txt at ${config.llmsTxt.outputPath}`);
|
|
3286
|
+
if (config.llmsTxt.generateFull) {
|
|
3287
|
+
const fullPath = outputPath.replace(/\.txt$/, "-full.txt");
|
|
3288
|
+
const fullContent = generateLlmsFullTxt(pages, config);
|
|
3289
|
+
await fs8.writeFile(fullPath, fullContent, "utf8");
|
|
3290
|
+
const relativeFull = path10.relative(cwd, fullPath);
|
|
3291
|
+
logger3.info(`Generated llms-full.txt at ${relativeFull}`);
|
|
3292
|
+
}
|
|
3293
|
+
}
|
|
3294
|
+
|
|
2351
3295
|
// src/indexing/pipeline.ts
|
|
2352
3296
|
function buildPageSummary(page, maxChars = 3500) {
|
|
2353
3297
|
const parts = [page.title];
|
|
@@ -2365,26 +3309,44 @@ function buildPageSummary(page, maxChars = 3500) {
|
|
|
2365
3309
|
if (joined.length <= maxChars) return joined;
|
|
2366
3310
|
return joined.slice(0, maxChars).trim();
|
|
2367
3311
|
}
|
|
3312
|
+
function buildPageContentHash(page) {
|
|
3313
|
+
const parts = [
|
|
3314
|
+
page.title,
|
|
3315
|
+
page.description ?? "",
|
|
3316
|
+
(page.keywords ?? []).slice().sort().join(","),
|
|
3317
|
+
page.tags.slice().sort().join(","),
|
|
3318
|
+
page.markdown,
|
|
3319
|
+
String(page.outgoingLinks),
|
|
3320
|
+
String(page.publishedAt ?? ""),
|
|
3321
|
+
page.incomingAnchorText ?? "",
|
|
3322
|
+
(page.outgoingLinkUrls ?? []).slice().sort().join(","),
|
|
3323
|
+
page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : ""
|
|
3324
|
+
];
|
|
3325
|
+
return sha256(parts.join("|"));
|
|
3326
|
+
}
|
|
2368
3327
|
var IndexPipeline = class _IndexPipeline {
|
|
2369
3328
|
cwd;
|
|
2370
3329
|
config;
|
|
2371
3330
|
store;
|
|
2372
3331
|
logger;
|
|
3332
|
+
hooks;
|
|
2373
3333
|
constructor(options) {
|
|
2374
3334
|
this.cwd = options.cwd;
|
|
2375
3335
|
this.config = options.config;
|
|
2376
3336
|
this.store = options.store;
|
|
2377
3337
|
this.logger = options.logger;
|
|
3338
|
+
this.hooks = options.hooks;
|
|
2378
3339
|
}
|
|
2379
3340
|
static async create(options = {}) {
|
|
2380
|
-
const cwd =
|
|
3341
|
+
const cwd = path11.resolve(options.cwd ?? process.cwd());
|
|
2381
3342
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
2382
3343
|
const store = options.store ?? await createUpstashStore(config);
|
|
2383
3344
|
return new _IndexPipeline({
|
|
2384
3345
|
cwd,
|
|
2385
3346
|
config,
|
|
2386
3347
|
store,
|
|
2387
|
-
logger: options.logger ?? new Logger()
|
|
3348
|
+
logger: options.logger ?? new Logger(),
|
|
3349
|
+
hooks: options.hooks ?? {}
|
|
2388
3350
|
});
|
|
2389
3351
|
}
|
|
2390
3352
|
getConfig() {
|
|
@@ -2405,7 +3367,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2405
3367
|
const scope = resolveScope(this.config, options.scopeOverride);
|
|
2406
3368
|
ensureStateDirs(this.cwd, this.config.state.dir, scope);
|
|
2407
3369
|
const sourceMode = options.sourceOverride ?? this.config.source.mode;
|
|
2408
|
-
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-
|
|
3370
|
+
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-vector)`);
|
|
2409
3371
|
if (options.force) {
|
|
2410
3372
|
this.logger.info("Force mode enabled \u2014 full rebuild");
|
|
2411
3373
|
}
|
|
@@ -2414,8 +3376,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2414
3376
|
}
|
|
2415
3377
|
const manifestStart = stageStart();
|
|
2416
3378
|
const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
|
|
3379
|
+
const existingPageHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getPageHashes(scope);
|
|
2417
3380
|
stageEnd("manifest", manifestStart);
|
|
2418
|
-
this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
|
|
3381
|
+
this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes, ${existingPageHashes.size} existing page hashes loaded`);
|
|
2419
3382
|
const sourceStart = stageStart();
|
|
2420
3383
|
this.logger.info(`Loading pages (source: ${sourceMode})...`);
|
|
2421
3384
|
let sourcePages;
|
|
@@ -2451,11 +3414,11 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2451
3414
|
let robotsRules = null;
|
|
2452
3415
|
if (sourceMode === "static-output") {
|
|
2453
3416
|
robotsRules = await loadRobotsTxtFromDir(
|
|
2454
|
-
|
|
3417
|
+
path11.resolve(this.cwd, this.config.source.staticOutputDir)
|
|
2455
3418
|
);
|
|
2456
3419
|
} else if (sourceMode === "build" && this.config.source.build) {
|
|
2457
3420
|
robotsRules = await loadRobotsTxtFromDir(
|
|
2458
|
-
|
|
3421
|
+
path11.resolve(this.cwd, this.config.source.build.outputDir)
|
|
2459
3422
|
);
|
|
2460
3423
|
} else if (sourceMode === "crawl" && this.config.source.crawl) {
|
|
2461
3424
|
robotsRules = await fetchRobotsTxt(this.config.source.crawl.baseUrl);
|
|
@@ -2492,11 +3455,61 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2492
3455
|
);
|
|
2493
3456
|
continue;
|
|
2494
3457
|
}
|
|
2495
|
-
|
|
3458
|
+
if (sourcePage.tags && sourcePage.tags.length > 0) {
|
|
3459
|
+
extracted.tags = [.../* @__PURE__ */ new Set([...extracted.tags, ...sourcePage.tags])];
|
|
3460
|
+
}
|
|
3461
|
+
let accepted;
|
|
3462
|
+
if (this.hooks.transformPage) {
|
|
3463
|
+
const transformed = await this.hooks.transformPage(extracted);
|
|
3464
|
+
if (transformed === null) {
|
|
3465
|
+
this.logger.debug(`Page ${sourcePage.url} skipped by transformPage hook`);
|
|
3466
|
+
continue;
|
|
3467
|
+
}
|
|
3468
|
+
accepted = transformed;
|
|
3469
|
+
} else {
|
|
3470
|
+
accepted = extracted;
|
|
3471
|
+
}
|
|
3472
|
+
extractedPages.push(accepted);
|
|
2496
3473
|
this.logger.event("page_extracted", {
|
|
2497
|
-
url:
|
|
3474
|
+
url: accepted.url
|
|
2498
3475
|
});
|
|
2499
3476
|
}
|
|
3477
|
+
const customRecords = options.customRecords ?? [];
|
|
3478
|
+
if (customRecords.length > 0) {
|
|
3479
|
+
this.logger.info(`Processing ${customRecords.length} custom record${customRecords.length === 1 ? "" : "s"}...`);
|
|
3480
|
+
for (const record of customRecords) {
|
|
3481
|
+
const normalizedUrl = normalizeUrlPath(record.url);
|
|
3482
|
+
const normalized = normalizeMarkdown(record.content);
|
|
3483
|
+
if (!normalized.trim()) {
|
|
3484
|
+
this.logger.warn(`Custom record ${normalizedUrl} has empty content and was skipped.`);
|
|
3485
|
+
continue;
|
|
3486
|
+
}
|
|
3487
|
+
const urlTags = normalizedUrl.split("/").filter(Boolean).slice(0, 1);
|
|
3488
|
+
const tags = record.tags ? [.../* @__PURE__ */ new Set([...urlTags, ...record.tags])] : urlTags;
|
|
3489
|
+
const extracted = {
|
|
3490
|
+
url: normalizedUrl,
|
|
3491
|
+
title: record.title,
|
|
3492
|
+
markdown: normalized,
|
|
3493
|
+
outgoingLinks: [],
|
|
3494
|
+
noindex: false,
|
|
3495
|
+
tags,
|
|
3496
|
+
weight: record.weight
|
|
3497
|
+
};
|
|
3498
|
+
let accepted;
|
|
3499
|
+
if (this.hooks.transformPage) {
|
|
3500
|
+
const transformed = await this.hooks.transformPage(extracted);
|
|
3501
|
+
if (transformed === null) {
|
|
3502
|
+
this.logger.debug(`Custom record ${normalizedUrl} skipped by transformPage hook`);
|
|
3503
|
+
continue;
|
|
3504
|
+
}
|
|
3505
|
+
accepted = transformed;
|
|
3506
|
+
} else {
|
|
3507
|
+
accepted = extracted;
|
|
3508
|
+
}
|
|
3509
|
+
extractedPages.push(accepted);
|
|
3510
|
+
this.logger.event("page_extracted", { url: accepted.url, custom: true });
|
|
3511
|
+
}
|
|
3512
|
+
}
|
|
2500
3513
|
extractedPages.sort((a, b) => a.url.localeCompare(b.url));
|
|
2501
3514
|
const uniquePages = [];
|
|
2502
3515
|
const seenUrls = /* @__PURE__ */ new Set();
|
|
@@ -2529,15 +3542,28 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2529
3542
|
const linkStart = stageStart();
|
|
2530
3543
|
const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
|
|
2531
3544
|
const incomingLinkCount = /* @__PURE__ */ new Map();
|
|
3545
|
+
const incomingAnchorTexts = /* @__PURE__ */ new Map();
|
|
2532
3546
|
for (const page of indexablePages) {
|
|
2533
3547
|
incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
|
|
2534
3548
|
}
|
|
2535
3549
|
for (const page of indexablePages) {
|
|
2536
|
-
|
|
3550
|
+
const seenForCount = /* @__PURE__ */ new Set();
|
|
3551
|
+
const seenForAnchor = /* @__PURE__ */ new Set();
|
|
3552
|
+
for (const { url: outgoing, anchorText } of page.outgoingLinks) {
|
|
2537
3553
|
if (!pageSet.has(outgoing)) {
|
|
2538
3554
|
continue;
|
|
2539
3555
|
}
|
|
2540
|
-
|
|
3556
|
+
if (!seenForCount.has(outgoing)) {
|
|
3557
|
+
seenForCount.add(outgoing);
|
|
3558
|
+
incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
|
|
3559
|
+
}
|
|
3560
|
+
if (anchorText && !seenForAnchor.has(outgoing)) {
|
|
3561
|
+
seenForAnchor.add(outgoing);
|
|
3562
|
+
if (!incomingAnchorTexts.has(outgoing)) {
|
|
3563
|
+
incomingAnchorTexts.set(outgoing, /* @__PURE__ */ new Set());
|
|
3564
|
+
}
|
|
3565
|
+
incomingAnchorTexts.get(outgoing).add(anchorText);
|
|
3566
|
+
}
|
|
2541
3567
|
}
|
|
2542
3568
|
}
|
|
2543
3569
|
stageEnd("links", linkStart);
|
|
@@ -2556,6 +3582,15 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2556
3582
|
});
|
|
2557
3583
|
}
|
|
2558
3584
|
}
|
|
3585
|
+
for (const record of customRecords) {
|
|
3586
|
+
const normalizedUrl = normalizeUrlPath(record.url);
|
|
3587
|
+
if (!precomputedRoutes.has(normalizedUrl)) {
|
|
3588
|
+
precomputedRoutes.set(normalizedUrl, {
|
|
3589
|
+
routeFile: "",
|
|
3590
|
+
routeResolution: "exact"
|
|
3591
|
+
});
|
|
3592
|
+
}
|
|
3593
|
+
}
|
|
2559
3594
|
for (const page of indexablePages) {
|
|
2560
3595
|
const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
|
|
2561
3596
|
if (routeMatch.routeResolution === "best-effort") {
|
|
@@ -2573,6 +3608,17 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2573
3608
|
} else {
|
|
2574
3609
|
routeExact += 1;
|
|
2575
3610
|
}
|
|
3611
|
+
const anchorSet = incomingAnchorTexts.get(page.url);
|
|
3612
|
+
let incomingAnchorText;
|
|
3613
|
+
if (anchorSet && anchorSet.size > 0) {
|
|
3614
|
+
let joined = "";
|
|
3615
|
+
for (const phrase of anchorSet) {
|
|
3616
|
+
const next = joined ? `${joined} ${phrase}` : phrase;
|
|
3617
|
+
if (next.length > 500) break;
|
|
3618
|
+
joined = next;
|
|
3619
|
+
}
|
|
3620
|
+
incomingAnchorText = joined || void 0;
|
|
3621
|
+
}
|
|
2576
3622
|
const indexedPage = {
|
|
2577
3623
|
url: page.url,
|
|
2578
3624
|
title: page.title,
|
|
@@ -2582,40 +3628,113 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2582
3628
|
generatedAt: nowIso(),
|
|
2583
3629
|
incomingLinks: incomingLinkCount.get(page.url) ?? 0,
|
|
2584
3630
|
outgoingLinks: page.outgoingLinks.length,
|
|
3631
|
+
outgoingLinkUrls: page.outgoingLinks.map((l) => typeof l === "string" ? l : l.url),
|
|
2585
3632
|
depth: getUrlDepth(page.url),
|
|
2586
3633
|
tags: page.tags,
|
|
2587
3634
|
markdown: page.markdown,
|
|
2588
3635
|
description: page.description,
|
|
2589
|
-
keywords: page.keywords
|
|
3636
|
+
keywords: page.keywords,
|
|
3637
|
+
publishedAt: page.publishedAt,
|
|
3638
|
+
incomingAnchorText,
|
|
3639
|
+
meta: page.meta
|
|
2590
3640
|
};
|
|
2591
3641
|
pages.push(indexedPage);
|
|
2592
3642
|
this.logger.event("page_indexed", { url: page.url });
|
|
2593
3643
|
}
|
|
3644
|
+
const pageRecords = pages.map((p) => {
|
|
3645
|
+
const summary = buildPageSummary(p);
|
|
3646
|
+
return {
|
|
3647
|
+
url: p.url,
|
|
3648
|
+
title: p.title,
|
|
3649
|
+
markdown: p.markdown,
|
|
3650
|
+
projectId: scope.projectId,
|
|
3651
|
+
scopeName: scope.scopeName,
|
|
3652
|
+
routeFile: p.routeFile,
|
|
3653
|
+
routeResolution: p.routeResolution,
|
|
3654
|
+
incomingLinks: p.incomingLinks,
|
|
3655
|
+
outgoingLinks: p.outgoingLinks,
|
|
3656
|
+
outgoingLinkUrls: p.outgoingLinkUrls,
|
|
3657
|
+
depth: p.depth,
|
|
3658
|
+
tags: p.tags,
|
|
3659
|
+
indexedAt: p.generatedAt,
|
|
3660
|
+
summary,
|
|
3661
|
+
description: p.description,
|
|
3662
|
+
keywords: p.keywords,
|
|
3663
|
+
contentHash: buildPageContentHash(p),
|
|
3664
|
+
publishedAt: p.publishedAt,
|
|
3665
|
+
meta: p.meta
|
|
3666
|
+
};
|
|
3667
|
+
});
|
|
3668
|
+
const currentPageUrls = new Set(pageRecords.map((r) => r.url));
|
|
3669
|
+
const changedPages = pageRecords.filter(
|
|
3670
|
+
(r) => !existingPageHashes.has(r.url) || existingPageHashes.get(r.url) !== r.contentHash
|
|
3671
|
+
);
|
|
3672
|
+
const deletedPageUrls = [...existingPageHashes.keys()].filter((url) => !currentPageUrls.has(url));
|
|
2594
3673
|
if (!options.dryRun) {
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
|
|
2598
|
-
|
|
2599
|
-
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
3674
|
+
if (options.force) {
|
|
3675
|
+
await this.store.deletePages(scope);
|
|
3676
|
+
this.logger.info(`Upserting ${pageRecords.length} page summaries...`);
|
|
3677
|
+
const pageDocs = pageRecords.map((r) => ({
|
|
3678
|
+
id: r.url,
|
|
3679
|
+
data: r.summary ?? r.title,
|
|
3680
|
+
metadata: {
|
|
3681
|
+
title: r.title,
|
|
3682
|
+
url: r.url,
|
|
3683
|
+
description: r.description ?? "",
|
|
3684
|
+
keywords: r.keywords ?? [],
|
|
3685
|
+
summary: r.summary ?? "",
|
|
3686
|
+
tags: r.tags,
|
|
3687
|
+
markdown: r.markdown,
|
|
3688
|
+
routeFile: r.routeFile,
|
|
3689
|
+
routeResolution: r.routeResolution,
|
|
3690
|
+
incomingLinks: r.incomingLinks,
|
|
3691
|
+
outgoingLinks: r.outgoingLinks,
|
|
3692
|
+
outgoingLinkUrls: r.outgoingLinkUrls ?? [],
|
|
3693
|
+
depth: r.depth,
|
|
3694
|
+
indexedAt: r.indexedAt,
|
|
3695
|
+
contentHash: r.contentHash ?? "",
|
|
3696
|
+
publishedAt: r.publishedAt ?? null,
|
|
3697
|
+
...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
|
|
3698
|
+
}
|
|
3699
|
+
}));
|
|
3700
|
+
await this.store.upsertPages(pageDocs, scope);
|
|
3701
|
+
} else {
|
|
3702
|
+
if (changedPages.length > 0) {
|
|
3703
|
+
this.logger.info(`Upserting ${changedPages.length} changed page summaries...`);
|
|
3704
|
+
const pageDocs = changedPages.map((r) => ({
|
|
3705
|
+
id: r.url,
|
|
3706
|
+
data: r.summary ?? r.title,
|
|
3707
|
+
metadata: {
|
|
3708
|
+
title: r.title,
|
|
3709
|
+
url: r.url,
|
|
3710
|
+
description: r.description ?? "",
|
|
3711
|
+
keywords: r.keywords ?? [],
|
|
3712
|
+
summary: r.summary ?? "",
|
|
3713
|
+
tags: r.tags,
|
|
3714
|
+
markdown: r.markdown,
|
|
3715
|
+
routeFile: r.routeFile,
|
|
3716
|
+
routeResolution: r.routeResolution,
|
|
3717
|
+
incomingLinks: r.incomingLinks,
|
|
3718
|
+
outgoingLinks: r.outgoingLinks,
|
|
3719
|
+
outgoingLinkUrls: r.outgoingLinkUrls ?? [],
|
|
3720
|
+
depth: r.depth,
|
|
3721
|
+
indexedAt: r.indexedAt,
|
|
3722
|
+
contentHash: r.contentHash ?? "",
|
|
3723
|
+
publishedAt: r.publishedAt ?? null,
|
|
3724
|
+
...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
|
|
3725
|
+
}
|
|
3726
|
+
}));
|
|
3727
|
+
await this.store.upsertPages(pageDocs, scope);
|
|
3728
|
+
}
|
|
3729
|
+
if (deletedPageUrls.length > 0) {
|
|
3730
|
+
await this.store.deletePagesByIds(deletedPageUrls, scope);
|
|
3731
|
+
}
|
|
3732
|
+
}
|
|
2617
3733
|
}
|
|
3734
|
+
const pagesChanged = options.force ? pageRecords.length : changedPages.length;
|
|
3735
|
+
const pagesDeleted = deletedPageUrls.length;
|
|
2618
3736
|
stageEnd("pages", pagesStart);
|
|
3737
|
+
this.logger.info(`Page changes: ${pagesChanged} changed/new, ${pagesDeleted} deleted, ${pageRecords.length - changedPages.length} unchanged`);
|
|
2619
3738
|
this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
|
|
2620
3739
|
const chunkStart = stageStart();
|
|
2621
3740
|
this.logger.info("Chunking pages...");
|
|
@@ -2624,6 +3743,18 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2624
3743
|
if (typeof maxChunks === "number") {
|
|
2625
3744
|
chunks = chunks.slice(0, maxChunks);
|
|
2626
3745
|
}
|
|
3746
|
+
if (this.hooks.transformChunk) {
|
|
3747
|
+
const transformed = [];
|
|
3748
|
+
for (const chunk of chunks) {
|
|
3749
|
+
const result = await this.hooks.transformChunk(chunk);
|
|
3750
|
+
if (result === null) {
|
|
3751
|
+
this.logger.debug(`Chunk ${chunk.chunkKey} skipped by transformChunk hook`);
|
|
3752
|
+
continue;
|
|
3753
|
+
}
|
|
3754
|
+
transformed.push(result);
|
|
3755
|
+
}
|
|
3756
|
+
chunks = transformed;
|
|
3757
|
+
}
|
|
2627
3758
|
for (const chunk of chunks) {
|
|
2628
3759
|
this.logger.event("chunked", {
|
|
2629
3760
|
url: chunk.url,
|
|
@@ -2636,7 +3767,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2636
3767
|
for (const chunk of chunks) {
|
|
2637
3768
|
currentChunkMap.set(chunk.chunkKey, chunk);
|
|
2638
3769
|
}
|
|
2639
|
-
|
|
3770
|
+
let changedChunks = chunks.filter((chunk) => {
|
|
2640
3771
|
if (options.force) {
|
|
2641
3772
|
return true;
|
|
2642
3773
|
}
|
|
@@ -2650,38 +3781,43 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2650
3781
|
return existingHash !== chunk.contentHash;
|
|
2651
3782
|
});
|
|
2652
3783
|
const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
|
|
3784
|
+
if (this.hooks.beforeIndex) {
|
|
3785
|
+
changedChunks = await this.hooks.beforeIndex(changedChunks);
|
|
3786
|
+
}
|
|
2653
3787
|
this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
|
|
2654
3788
|
const upsertStart = stageStart();
|
|
2655
3789
|
let documentsUpserted = 0;
|
|
2656
3790
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
2657
|
-
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash
|
|
2658
|
-
const UPSTASH_CONTENT_LIMIT = 4096;
|
|
2659
|
-
const FIELD_OVERHEAD = 200;
|
|
2660
|
-
const MAX_TEXT_CHARS = UPSTASH_CONTENT_LIMIT - FIELD_OVERHEAD;
|
|
3791
|
+
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
2661
3792
|
const docs = changedChunks.map((chunk) => {
|
|
2662
|
-
const
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
|
|
2669
|
-
const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
|
|
3793
|
+
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
3794
|
+
if (embeddingText.length > 2e3) {
|
|
3795
|
+
this.logger.warn(
|
|
3796
|
+
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
3797
|
+
);
|
|
3798
|
+
}
|
|
2670
3799
|
return {
|
|
2671
3800
|
id: chunk.chunkKey,
|
|
2672
|
-
|
|
3801
|
+
data: embeddingText,
|
|
2673
3802
|
metadata: {
|
|
2674
|
-
|
|
2675
|
-
scopeName: scope.scopeName,
|
|
3803
|
+
url: chunk.url,
|
|
2676
3804
|
path: chunk.path,
|
|
3805
|
+
title: chunk.title,
|
|
3806
|
+
sectionTitle: chunk.sectionTitle ?? "",
|
|
3807
|
+
headingPath: chunk.headingPath.join(" > "),
|
|
2677
3808
|
snippet: chunk.snippet,
|
|
3809
|
+
chunkText: embeddingText,
|
|
3810
|
+
tags: chunk.tags,
|
|
2678
3811
|
ordinal: chunk.ordinal,
|
|
2679
3812
|
contentHash: chunk.contentHash,
|
|
2680
3813
|
depth: chunk.depth,
|
|
2681
3814
|
incomingLinks: chunk.incomingLinks,
|
|
2682
3815
|
routeFile: chunk.routeFile,
|
|
2683
3816
|
description: chunk.description ?? "",
|
|
2684
|
-
keywords:
|
|
3817
|
+
keywords: chunk.keywords ?? [],
|
|
3818
|
+
publishedAt: chunk.publishedAt ?? null,
|
|
3819
|
+
incomingAnchorText: chunk.incomingAnchorText ?? "",
|
|
3820
|
+
...chunk.meta && Object.keys(chunk.meta).length > 0 ? { meta: chunk.meta } : {}
|
|
2685
3821
|
}
|
|
2686
3822
|
};
|
|
2687
3823
|
});
|
|
@@ -2699,9 +3835,16 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2699
3835
|
} else {
|
|
2700
3836
|
this.logger.info("No chunks to upsert \u2014 all up to date");
|
|
2701
3837
|
}
|
|
3838
|
+
if (this.config.llmsTxt.enable && !options.dryRun) {
|
|
3839
|
+
const llmsStart = stageStart();
|
|
3840
|
+
await writeLlmsTxt(pages, this.config, this.cwd, this.logger);
|
|
3841
|
+
stageEnd("llms_txt", llmsStart);
|
|
3842
|
+
}
|
|
2702
3843
|
this.logger.info("Done.");
|
|
2703
|
-
|
|
3844
|
+
const stats = {
|
|
2704
3845
|
pagesProcessed: pages.length,
|
|
3846
|
+
pagesChanged,
|
|
3847
|
+
pagesDeleted,
|
|
2705
3848
|
chunksTotal: chunks.length,
|
|
2706
3849
|
chunksChanged: changedChunks.length,
|
|
2707
3850
|
documentsUpserted,
|
|
@@ -2710,10 +3853,15 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
2710
3853
|
routeBestEffort,
|
|
2711
3854
|
stageTimingsMs
|
|
2712
3855
|
};
|
|
3856
|
+
if (this.hooks.afterIndex) {
|
|
3857
|
+
await this.hooks.afterIndex(stats);
|
|
3858
|
+
}
|
|
3859
|
+
return stats;
|
|
2713
3860
|
}
|
|
2714
3861
|
};
|
|
2715
3862
|
|
|
2716
3863
|
// src/mcp/server.ts
|
|
3864
|
+
import { createHash as createHash2, timingSafeEqual } from "crypto";
|
|
2717
3865
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2718
3866
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
2719
3867
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
@@ -2721,16 +3869,139 @@ import { createMcpExpressApp } from "@modelcontextprotocol/sdk/server/express.js
|
|
|
2721
3869
|
import { z as z3 } from "zod";
|
|
2722
3870
|
|
|
2723
3871
|
// src/search/engine.ts
|
|
2724
|
-
import
|
|
3872
|
+
import path12 from "path";
|
|
2725
3873
|
import { z as z2 } from "zod";
|
|
3874
|
+
|
|
3875
|
+
// src/search/related-pages.ts
|
|
3876
|
+
function diceScore(urlA, urlB) {
|
|
3877
|
+
const segmentsA = urlA.split("/").filter(Boolean);
|
|
3878
|
+
const segmentsB = urlB.split("/").filter(Boolean);
|
|
3879
|
+
if (segmentsA.length === 0 && segmentsB.length === 0) return 1;
|
|
3880
|
+
if (segmentsA.length === 0 || segmentsB.length === 0) return 0;
|
|
3881
|
+
let shared = 0;
|
|
3882
|
+
const minLen = Math.min(segmentsA.length, segmentsB.length);
|
|
3883
|
+
for (let i = 0; i < minLen; i++) {
|
|
3884
|
+
if (segmentsA[i] === segmentsB[i]) {
|
|
3885
|
+
shared++;
|
|
3886
|
+
} else {
|
|
3887
|
+
break;
|
|
3888
|
+
}
|
|
3889
|
+
}
|
|
3890
|
+
return 2 * shared / (segmentsA.length + segmentsB.length);
|
|
3891
|
+
}
|
|
3892
|
+
function compositeScore(isLinked, dice, semantic) {
|
|
3893
|
+
return (isLinked ? 0.5 : 0) + 0.3 * dice + 0.2 * semantic;
|
|
3894
|
+
}
|
|
3895
|
+
function dominantRelationshipType(isOutgoing, isIncoming, dice) {
|
|
3896
|
+
if (isOutgoing) return "outgoing_link";
|
|
3897
|
+
if (isIncoming) return "incoming_link";
|
|
3898
|
+
if (dice > 0.4) return "sibling";
|
|
3899
|
+
return "semantic";
|
|
3900
|
+
}
|
|
3901
|
+
|
|
3902
|
+
// src/search/engine.ts
|
|
3903
|
+
var rankingOverridesSchema = z2.object({
|
|
3904
|
+
ranking: z2.object({
|
|
3905
|
+
enableIncomingLinkBoost: z2.boolean().optional(),
|
|
3906
|
+
enableDepthBoost: z2.boolean().optional(),
|
|
3907
|
+
aggregationCap: z2.number().int().positive().optional(),
|
|
3908
|
+
aggregationDecay: z2.number().min(0).max(1).optional(),
|
|
3909
|
+
minChunkScoreRatio: z2.number().min(0).max(1).optional(),
|
|
3910
|
+
minScoreRatio: z2.number().min(0).max(1).optional(),
|
|
3911
|
+
scoreGapThreshold: z2.number().min(0).max(1).optional(),
|
|
3912
|
+
weights: z2.object({
|
|
3913
|
+
incomingLinks: z2.number().optional(),
|
|
3914
|
+
depth: z2.number().optional(),
|
|
3915
|
+
aggregation: z2.number().optional(),
|
|
3916
|
+
titleMatch: z2.number().optional()
|
|
3917
|
+
}).optional()
|
|
3918
|
+
}).optional(),
|
|
3919
|
+
search: z2.object({
|
|
3920
|
+
pageSearchWeight: z2.number().min(0).max(1).optional()
|
|
3921
|
+
}).optional()
|
|
3922
|
+
}).optional();
|
|
2726
3923
|
var requestSchema = z2.object({
|
|
2727
3924
|
q: z2.string().trim().min(1),
|
|
2728
3925
|
topK: z2.number().int().positive().max(100).optional(),
|
|
2729
3926
|
scope: z2.string().optional(),
|
|
2730
3927
|
pathPrefix: z2.string().optional(),
|
|
2731
3928
|
tags: z2.array(z2.string()).optional(),
|
|
2732
|
-
|
|
3929
|
+
filters: z2.record(z2.string(), z2.union([z2.string(), z2.number(), z2.boolean()])).optional(),
|
|
3930
|
+
groupBy: z2.enum(["page", "chunk"]).optional(),
|
|
3931
|
+
maxSubResults: z2.number().int().positive().max(20).optional(),
|
|
3932
|
+
debug: z2.boolean().optional(),
|
|
3933
|
+
rankingOverrides: rankingOverridesSchema
|
|
2733
3934
|
});
|
|
3935
|
+
var MAX_SITE_STRUCTURE_PAGES = 2e3;
|
|
3936
|
+
function makeNode(url, depth) {
|
|
3937
|
+
return { url, title: "", depth, routeFile: "", isIndexed: false, childCount: 0, children: [] };
|
|
3938
|
+
}
|
|
3939
|
+
function buildTree(pages, pathPrefix) {
|
|
3940
|
+
const nodeMap = /* @__PURE__ */ new Map();
|
|
3941
|
+
const root = makeNode("/", 0);
|
|
3942
|
+
nodeMap.set("/", root);
|
|
3943
|
+
for (const page of pages) {
|
|
3944
|
+
const normalized = normalizeUrlPath(page.url);
|
|
3945
|
+
const segments = normalized.split("/").filter(Boolean);
|
|
3946
|
+
if (segments.length === 0) {
|
|
3947
|
+
root.title = page.title;
|
|
3948
|
+
root.routeFile = page.routeFile;
|
|
3949
|
+
root.isIndexed = true;
|
|
3950
|
+
continue;
|
|
3951
|
+
}
|
|
3952
|
+
for (let i = 1; i <= segments.length; i++) {
|
|
3953
|
+
const partialUrl = "/" + segments.slice(0, i).join("/");
|
|
3954
|
+
if (!nodeMap.has(partialUrl)) {
|
|
3955
|
+
nodeMap.set(partialUrl, makeNode(partialUrl, i));
|
|
3956
|
+
}
|
|
3957
|
+
}
|
|
3958
|
+
const node = nodeMap.get(normalized);
|
|
3959
|
+
node.title = page.title;
|
|
3960
|
+
node.routeFile = page.routeFile;
|
|
3961
|
+
node.isIndexed = true;
|
|
3962
|
+
}
|
|
3963
|
+
for (const [url, node] of nodeMap) {
|
|
3964
|
+
if (url === "/") continue;
|
|
3965
|
+
const segments = url.split("/").filter(Boolean);
|
|
3966
|
+
const parentUrl = segments.length === 1 ? "/" : "/" + segments.slice(0, -1).join("/");
|
|
3967
|
+
const parent = nodeMap.get(parentUrl) ?? root;
|
|
3968
|
+
parent.children.push(node);
|
|
3969
|
+
}
|
|
3970
|
+
const sortAndCount = (node) => {
|
|
3971
|
+
node.children.sort((a, b) => a.url.localeCompare(b.url));
|
|
3972
|
+
node.childCount = node.children.length;
|
|
3973
|
+
for (const child of node.children) {
|
|
3974
|
+
sortAndCount(child);
|
|
3975
|
+
}
|
|
3976
|
+
};
|
|
3977
|
+
sortAndCount(root);
|
|
3978
|
+
if (pathPrefix) {
|
|
3979
|
+
const normalizedPrefix = normalizeUrlPath(pathPrefix);
|
|
3980
|
+
const subtreeRoot = nodeMap.get(normalizedPrefix);
|
|
3981
|
+
if (subtreeRoot) {
|
|
3982
|
+
return subtreeRoot;
|
|
3983
|
+
}
|
|
3984
|
+
return makeNode(normalizedPrefix, normalizedPrefix.split("/").filter(Boolean).length);
|
|
3985
|
+
}
|
|
3986
|
+
return root;
|
|
3987
|
+
}
|
|
3988
|
+
function mergeRankingOverrides(base, overrides) {
|
|
3989
|
+
return {
|
|
3990
|
+
...base,
|
|
3991
|
+
search: {
|
|
3992
|
+
...base.search,
|
|
3993
|
+
...overrides.search
|
|
3994
|
+
},
|
|
3995
|
+
ranking: {
|
|
3996
|
+
...base.ranking,
|
|
3997
|
+
...overrides.ranking,
|
|
3998
|
+
weights: {
|
|
3999
|
+
...base.ranking.weights,
|
|
4000
|
+
...overrides.ranking?.weights
|
|
4001
|
+
}
|
|
4002
|
+
}
|
|
4003
|
+
};
|
|
4004
|
+
}
|
|
2734
4005
|
var SearchEngine = class _SearchEngine {
|
|
2735
4006
|
cwd;
|
|
2736
4007
|
config;
|
|
@@ -2741,7 +4012,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
2741
4012
|
this.store = options.store;
|
|
2742
4013
|
}
|
|
2743
4014
|
static async create(options = {}) {
|
|
2744
|
-
const cwd =
|
|
4015
|
+
const cwd = path12.resolve(options.cwd ?? process.cwd());
|
|
2745
4016
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
2746
4017
|
const store = options.store ?? await createUpstashStore(config);
|
|
2747
4018
|
return new _SearchEngine({
|
|
@@ -2760,125 +4031,203 @@ var SearchEngine = class _SearchEngine {
|
|
|
2760
4031
|
}
|
|
2761
4032
|
const input = parsed.data;
|
|
2762
4033
|
const totalStart = process.hrtime.bigint();
|
|
4034
|
+
const effectiveConfig = input.debug && input.rankingOverrides ? mergeRankingOverrides(this.config, input.rankingOverrides) : this.config;
|
|
2763
4035
|
const resolvedScope = resolveScope(this.config, input.scope);
|
|
2764
4036
|
const topK = input.topK ?? 10;
|
|
4037
|
+
const maxSubResults = input.maxSubResults ?? 5;
|
|
2765
4038
|
const groupByPage = (input.groupBy ?? "page") === "page";
|
|
2766
|
-
const
|
|
2767
|
-
const
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
4039
|
+
const queryText = input.q;
|
|
4040
|
+
const pathPrefix = input.pathPrefix ? input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}` : void 0;
|
|
4041
|
+
const filterTags = input.tags && input.tags.length > 0 ? input.tags : void 0;
|
|
4042
|
+
const metaFilterStr = input.filters && Object.keys(input.filters).length > 0 ? buildMetaFilterString(input.filters) : "";
|
|
4043
|
+
const metaFilter = metaFilterStr || void 0;
|
|
4044
|
+
const applyPagePostFilters = (hits) => {
|
|
4045
|
+
let filtered = hits;
|
|
4046
|
+
if (pathPrefix) {
|
|
4047
|
+
filtered = filtered.filter((h) => h.url.startsWith(pathPrefix));
|
|
2775
4048
|
}
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
|
-
|
|
4049
|
+
if (filterTags) {
|
|
4050
|
+
filtered = filtered.filter(
|
|
4051
|
+
(h) => filterTags.every((tag) => h.tags.includes(tag))
|
|
4052
|
+
);
|
|
4053
|
+
}
|
|
4054
|
+
return filtered;
|
|
4055
|
+
};
|
|
4056
|
+
const applyChunkPostFilters = (hits) => {
|
|
4057
|
+
let filtered = hits;
|
|
4058
|
+
if (filterTags) {
|
|
4059
|
+
filtered = filtered.filter(
|
|
4060
|
+
(h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
|
|
4061
|
+
);
|
|
4062
|
+
}
|
|
4063
|
+
return filtered;
|
|
4064
|
+
};
|
|
2779
4065
|
const searchStart = process.hrtime.bigint();
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
const
|
|
2783
|
-
const
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
{
|
|
2798
|
-
limit: chunkLimit,
|
|
2799
|
-
semanticWeight: this.config.search.semanticWeight,
|
|
2800
|
-
inputEnrichment: this.config.search.inputEnrichment,
|
|
2801
|
-
reranking: false,
|
|
2802
|
-
filter
|
|
2803
|
-
},
|
|
4066
|
+
if (groupByPage) {
|
|
4067
|
+
const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
|
|
4068
|
+
const pageLimit = Math.max(topK * 2, 20);
|
|
4069
|
+
const pageHits = await this.store.searchPagesByText(
|
|
4070
|
+
queryText,
|
|
4071
|
+
{ limit: pageLimit * fetchMultiplier, filter: metaFilter },
|
|
4072
|
+
resolvedScope
|
|
4073
|
+
);
|
|
4074
|
+
const filteredPages = applyPagePostFilters(pageHits);
|
|
4075
|
+
let rankedPages = rankPageHits(filteredPages, effectiveConfig, input.q, input.debug);
|
|
4076
|
+
rankedPages = trimPagesByScoreGap(rankedPages, effectiveConfig);
|
|
4077
|
+
const topPages = rankedPages.slice(0, topK);
|
|
4078
|
+
const chunkPromises = topPages.map(
|
|
4079
|
+
(page) => this.store.searchChunksByUrl(
|
|
4080
|
+
queryText,
|
|
4081
|
+
page.url,
|
|
4082
|
+
{ limit: maxSubResults, filter: metaFilter },
|
|
2804
4083
|
resolvedScope
|
|
2805
|
-
)
|
|
2806
|
-
|
|
2807
|
-
const
|
|
2808
|
-
|
|
4084
|
+
).then((chunks) => applyChunkPostFilters(chunks))
|
|
4085
|
+
);
|
|
4086
|
+
const allChunks = await Promise.all(chunkPromises);
|
|
4087
|
+
const searchMs = hrTimeMs(searchStart);
|
|
4088
|
+
const results = this.buildPageFirstResults(topPages, allChunks, input.q, input.debug, maxSubResults);
|
|
4089
|
+
return {
|
|
4090
|
+
q: input.q,
|
|
4091
|
+
scope: resolvedScope.scopeName,
|
|
4092
|
+
results,
|
|
4093
|
+
meta: {
|
|
4094
|
+
timingsMs: {
|
|
4095
|
+
search: Math.round(searchMs),
|
|
4096
|
+
total: Math.round(hrTimeMs(totalStart))
|
|
4097
|
+
}
|
|
4098
|
+
}
|
|
4099
|
+
};
|
|
2809
4100
|
} else {
|
|
4101
|
+
const candidateK = Math.max(50, topK);
|
|
4102
|
+
const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
|
|
2810
4103
|
const hits = await this.store.search(
|
|
2811
|
-
|
|
2812
|
-
{
|
|
2813
|
-
limit: candidateK,
|
|
2814
|
-
semanticWeight: this.config.search.semanticWeight,
|
|
2815
|
-
inputEnrichment: this.config.search.inputEnrichment,
|
|
2816
|
-
reranking: this.config.search.reranking,
|
|
2817
|
-
filter
|
|
2818
|
-
},
|
|
4104
|
+
queryText,
|
|
4105
|
+
{ limit: candidateK * fetchMultiplier, filter: metaFilter },
|
|
2819
4106
|
resolvedScope
|
|
2820
4107
|
);
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
4108
|
+
let filtered = hits;
|
|
4109
|
+
if (pathPrefix) {
|
|
4110
|
+
filtered = filtered.filter((h) => h.metadata.url.startsWith(pathPrefix));
|
|
4111
|
+
}
|
|
4112
|
+
if (filterTags) {
|
|
4113
|
+
filtered = filtered.filter(
|
|
4114
|
+
(h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
|
|
4115
|
+
);
|
|
4116
|
+
}
|
|
4117
|
+
const ranked = rankHits(filtered, effectiveConfig, input.q, input.debug);
|
|
4118
|
+
const searchMs = hrTimeMs(searchStart);
|
|
4119
|
+
const results = this.buildResults(ranked, topK, false, maxSubResults, input.q, input.debug, effectiveConfig);
|
|
4120
|
+
return {
|
|
4121
|
+
q: input.q,
|
|
4122
|
+
scope: resolvedScope.scopeName,
|
|
4123
|
+
results,
|
|
4124
|
+
meta: {
|
|
4125
|
+
timingsMs: {
|
|
4126
|
+
search: Math.round(searchMs),
|
|
4127
|
+
total: Math.round(hrTimeMs(totalStart))
|
|
4128
|
+
}
|
|
2833
4129
|
}
|
|
4130
|
+
};
|
|
4131
|
+
}
|
|
4132
|
+
}
|
|
4133
|
+
buildPageFirstResults(rankedPages, allChunks, query, debug, maxSubResults = 5) {
|
|
4134
|
+
return rankedPages.map((page, i) => {
|
|
4135
|
+
const chunks = allChunks[i] ?? [];
|
|
4136
|
+
const bestChunk = chunks[0];
|
|
4137
|
+
const snippet = bestChunk ? query ? queryAwareExcerpt(bestChunk.metadata.chunkText, query) : toSnippet(bestChunk.metadata.chunkText) : page.description || page.title;
|
|
4138
|
+
const result = {
|
|
4139
|
+
url: page.url,
|
|
4140
|
+
title: page.title,
|
|
4141
|
+
sectionTitle: bestChunk?.metadata.sectionTitle || void 0,
|
|
4142
|
+
snippet,
|
|
4143
|
+
chunkText: bestChunk?.metadata.chunkText || void 0,
|
|
4144
|
+
score: Number(page.finalScore.toFixed(6)),
|
|
4145
|
+
routeFile: page.routeFile,
|
|
4146
|
+
chunks: chunks.length > 0 ? chunks.slice(0, maxSubResults).map((c) => ({
|
|
4147
|
+
sectionTitle: c.metadata.sectionTitle || void 0,
|
|
4148
|
+
snippet: query ? queryAwareExcerpt(c.metadata.chunkText, query) : toSnippet(c.metadata.chunkText),
|
|
4149
|
+
chunkText: c.metadata.chunkText || void 0,
|
|
4150
|
+
headingPath: c.metadata.headingPath,
|
|
4151
|
+
score: Number(c.score.toFixed(6))
|
|
4152
|
+
})) : void 0
|
|
4153
|
+
};
|
|
4154
|
+
if (debug && page.breakdown) {
|
|
4155
|
+
result.breakdown = {
|
|
4156
|
+
baseScore: page.breakdown.baseScore,
|
|
4157
|
+
incomingLinkBoost: page.breakdown.incomingLinkBoost,
|
|
4158
|
+
depthBoost: page.breakdown.depthBoost,
|
|
4159
|
+
titleMatchBoost: page.breakdown.titleMatchBoost,
|
|
4160
|
+
freshnessBoost: page.breakdown.freshnessBoost,
|
|
4161
|
+
anchorTextMatchBoost: 0
|
|
4162
|
+
};
|
|
2834
4163
|
}
|
|
2835
|
-
|
|
4164
|
+
return result;
|
|
4165
|
+
});
|
|
2836
4166
|
}
|
|
2837
|
-
ensureSnippet(hit) {
|
|
4167
|
+
ensureSnippet(hit, query) {
|
|
4168
|
+
const chunkText = hit.hit.metadata.chunkText;
|
|
4169
|
+
if (query && chunkText) return queryAwareExcerpt(chunkText, query);
|
|
2838
4170
|
const snippet = hit.hit.metadata.snippet;
|
|
2839
4171
|
if (snippet && snippet.length >= 30) return snippet;
|
|
2840
|
-
const chunkText = hit.hit.metadata.chunkText;
|
|
2841
4172
|
if (chunkText) return toSnippet(chunkText);
|
|
2842
4173
|
return snippet || "";
|
|
2843
4174
|
}
|
|
2844
|
-
buildResults(ordered, topK, groupByPage,
|
|
4175
|
+
buildResults(ordered, topK, groupByPage, maxSubResults, query, debug, config) {
|
|
4176
|
+
const cfg = config ?? this.config;
|
|
2845
4177
|
if (groupByPage) {
|
|
2846
|
-
let pages = aggregateByPage(ordered,
|
|
2847
|
-
pages = trimByScoreGap(pages,
|
|
2848
|
-
const minRatio =
|
|
4178
|
+
let pages = aggregateByPage(ordered, cfg);
|
|
4179
|
+
pages = trimByScoreGap(pages, cfg);
|
|
4180
|
+
const minRatio = cfg.ranking.minChunkScoreRatio;
|
|
2849
4181
|
return pages.slice(0, topK).map((page) => {
|
|
2850
4182
|
const bestScore = page.bestChunk.finalScore;
|
|
2851
4183
|
const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
|
|
2852
|
-
const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0,
|
|
2853
|
-
|
|
4184
|
+
const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, maxSubResults);
|
|
4185
|
+
const result = {
|
|
2854
4186
|
url: page.url,
|
|
2855
4187
|
title: page.title,
|
|
2856
4188
|
sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
|
|
2857
|
-
snippet: this.ensureSnippet(page.bestChunk),
|
|
4189
|
+
snippet: this.ensureSnippet(page.bestChunk, query),
|
|
4190
|
+
chunkText: page.bestChunk.hit.metadata.chunkText || void 0,
|
|
2858
4191
|
score: Number(page.pageScore.toFixed(6)),
|
|
2859
4192
|
routeFile: page.routeFile,
|
|
2860
|
-
chunks: meaningful.length
|
|
4193
|
+
chunks: meaningful.length >= 1 ? meaningful.map((c) => ({
|
|
2861
4194
|
sectionTitle: c.hit.metadata.sectionTitle || void 0,
|
|
2862
|
-
snippet: this.ensureSnippet(c),
|
|
4195
|
+
snippet: this.ensureSnippet(c, query),
|
|
4196
|
+
chunkText: c.hit.metadata.chunkText || void 0,
|
|
2863
4197
|
headingPath: c.hit.metadata.headingPath,
|
|
2864
4198
|
score: Number(c.finalScore.toFixed(6))
|
|
2865
4199
|
})) : void 0
|
|
2866
4200
|
};
|
|
4201
|
+
if (debug && page.bestChunk.breakdown) {
|
|
4202
|
+
result.breakdown = page.bestChunk.breakdown;
|
|
4203
|
+
}
|
|
4204
|
+
return result;
|
|
2867
4205
|
});
|
|
2868
4206
|
} else {
|
|
2869
4207
|
let filtered = ordered;
|
|
2870
|
-
const
|
|
2871
|
-
if (
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
|
|
4208
|
+
const minScoreRatio = cfg.ranking.minScoreRatio;
|
|
4209
|
+
if (minScoreRatio > 0 && ordered.length > 0) {
|
|
4210
|
+
const topScore = ordered[0].finalScore;
|
|
4211
|
+
if (Number.isFinite(topScore) && topScore > 0) {
|
|
4212
|
+
const threshold = topScore * minScoreRatio;
|
|
4213
|
+
filtered = ordered.filter((entry) => entry.finalScore >= threshold);
|
|
4214
|
+
}
|
|
4215
|
+
}
|
|
4216
|
+
return filtered.slice(0, topK).map(({ hit, finalScore, breakdown }) => {
|
|
4217
|
+
const result = {
|
|
4218
|
+
url: hit.metadata.url,
|
|
4219
|
+
title: hit.metadata.title,
|
|
4220
|
+
sectionTitle: hit.metadata.sectionTitle || void 0,
|
|
4221
|
+
snippet: this.ensureSnippet({ hit, finalScore }, query),
|
|
4222
|
+
chunkText: hit.metadata.chunkText || void 0,
|
|
4223
|
+
score: Number(finalScore.toFixed(6)),
|
|
4224
|
+
routeFile: hit.metadata.routeFile
|
|
4225
|
+
};
|
|
4226
|
+
if (debug && breakdown) {
|
|
4227
|
+
result.breakdown = breakdown;
|
|
4228
|
+
}
|
|
4229
|
+
return result;
|
|
4230
|
+
});
|
|
2882
4231
|
}
|
|
2883
4232
|
}
|
|
2884
4233
|
async getPage(pathOrUrl, scope) {
|
|
@@ -2904,6 +4253,116 @@ var SearchEngine = class _SearchEngine {
|
|
|
2904
4253
|
markdown: page.markdown
|
|
2905
4254
|
};
|
|
2906
4255
|
}
|
|
4256
|
+
async listPages(opts) {
|
|
4257
|
+
const resolvedScope = resolveScope(this.config, opts?.scope);
|
|
4258
|
+
const pathPrefix = opts?.pathPrefix ? opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}` : void 0;
|
|
4259
|
+
return this.store.listPages(resolvedScope, {
|
|
4260
|
+
cursor: opts?.cursor,
|
|
4261
|
+
limit: opts?.limit,
|
|
4262
|
+
pathPrefix
|
|
4263
|
+
});
|
|
4264
|
+
}
|
|
4265
|
+
async getSiteStructure(opts) {
|
|
4266
|
+
const maxPages = Math.min(opts?.maxPages ?? MAX_SITE_STRUCTURE_PAGES, MAX_SITE_STRUCTURE_PAGES);
|
|
4267
|
+
const allPages = [];
|
|
4268
|
+
let cursor;
|
|
4269
|
+
let truncated = false;
|
|
4270
|
+
do {
|
|
4271
|
+
const result = await this.listPages({
|
|
4272
|
+
pathPrefix: opts?.pathPrefix,
|
|
4273
|
+
scope: opts?.scope,
|
|
4274
|
+
cursor,
|
|
4275
|
+
limit: 200
|
|
4276
|
+
});
|
|
4277
|
+
allPages.push(...result.pages);
|
|
4278
|
+
cursor = result.nextCursor;
|
|
4279
|
+
if (allPages.length >= maxPages) {
|
|
4280
|
+
truncated = allPages.length > maxPages || !!cursor;
|
|
4281
|
+
allPages.length = maxPages;
|
|
4282
|
+
break;
|
|
4283
|
+
}
|
|
4284
|
+
} while (cursor);
|
|
4285
|
+
const root = buildTree(allPages, opts?.pathPrefix);
|
|
4286
|
+
return {
|
|
4287
|
+
root,
|
|
4288
|
+
totalPages: allPages.length,
|
|
4289
|
+
truncated
|
|
4290
|
+
};
|
|
4291
|
+
}
|
|
4292
|
+
async getRelatedPages(pathOrUrl, opts) {
|
|
4293
|
+
const resolvedScope = resolveScope(this.config, opts?.scope);
|
|
4294
|
+
const urlPath = this.resolveInputPath(pathOrUrl);
|
|
4295
|
+
const topK = Math.min(opts?.topK ?? 10, 25);
|
|
4296
|
+
const source = await this.store.fetchPageWithVector(urlPath, resolvedScope);
|
|
4297
|
+
if (!source) {
|
|
4298
|
+
throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
|
|
4299
|
+
}
|
|
4300
|
+
const sourceOutgoing = new Set(source.metadata.outgoingLinkUrls ?? []);
|
|
4301
|
+
const semanticHits = await this.store.searchPagesByVector(
|
|
4302
|
+
source.vector,
|
|
4303
|
+
{ limit: 50 },
|
|
4304
|
+
resolvedScope
|
|
4305
|
+
);
|
|
4306
|
+
const filteredHits = semanticHits.filter((h) => h.url !== urlPath);
|
|
4307
|
+
const semanticScoreMap = /* @__PURE__ */ new Map();
|
|
4308
|
+
for (const hit of filteredHits) {
|
|
4309
|
+
semanticScoreMap.set(hit.url, hit.score);
|
|
4310
|
+
}
|
|
4311
|
+
const candidateUrls = /* @__PURE__ */ new Set();
|
|
4312
|
+
for (const hit of filteredHits) {
|
|
4313
|
+
candidateUrls.add(hit.url);
|
|
4314
|
+
}
|
|
4315
|
+
for (const url of sourceOutgoing) {
|
|
4316
|
+
if (url !== urlPath) candidateUrls.add(url);
|
|
4317
|
+
}
|
|
4318
|
+
const missingUrls = [...sourceOutgoing].filter(
|
|
4319
|
+
(u) => u !== urlPath && !semanticScoreMap.has(u)
|
|
4320
|
+
);
|
|
4321
|
+
const fetchedPages = missingUrls.length > 0 ? await this.store.fetchPagesBatch(missingUrls, resolvedScope) : [];
|
|
4322
|
+
const metaMap = /* @__PURE__ */ new Map();
|
|
4323
|
+
for (const hit of filteredHits) {
|
|
4324
|
+
metaMap.set(hit.url, { title: hit.title, routeFile: hit.routeFile, outgoingLinkUrls: [] });
|
|
4325
|
+
}
|
|
4326
|
+
for (const p of fetchedPages) {
|
|
4327
|
+
metaMap.set(p.url, { title: p.title, routeFile: p.routeFile, outgoingLinkUrls: p.outgoingLinkUrls });
|
|
4328
|
+
}
|
|
4329
|
+
const semanticUrls = filteredHits.map((h) => h.url);
|
|
4330
|
+
if (semanticUrls.length > 0) {
|
|
4331
|
+
const semanticPageData = await this.store.fetchPagesBatch(semanticUrls, resolvedScope);
|
|
4332
|
+
for (const p of semanticPageData) {
|
|
4333
|
+
const existing = metaMap.get(p.url);
|
|
4334
|
+
if (existing) {
|
|
4335
|
+
existing.outgoingLinkUrls = p.outgoingLinkUrls;
|
|
4336
|
+
}
|
|
4337
|
+
}
|
|
4338
|
+
}
|
|
4339
|
+
const candidates = [];
|
|
4340
|
+
for (const url of candidateUrls) {
|
|
4341
|
+
const meta = metaMap.get(url);
|
|
4342
|
+
if (!meta) continue;
|
|
4343
|
+
const isOutgoing = sourceOutgoing.has(url);
|
|
4344
|
+
const isIncoming = meta.outgoingLinkUrls.includes(urlPath);
|
|
4345
|
+
const isLinked = isOutgoing || isIncoming;
|
|
4346
|
+
const dice = diceScore(urlPath, url);
|
|
4347
|
+
const semantic = semanticScoreMap.get(url) ?? 0;
|
|
4348
|
+
const score = compositeScore(isLinked, dice, semantic);
|
|
4349
|
+
const relationshipType = dominantRelationshipType(isOutgoing, isIncoming, dice);
|
|
4350
|
+
candidates.push({
|
|
4351
|
+
url,
|
|
4352
|
+
title: meta.title,
|
|
4353
|
+
score: Number(score.toFixed(6)),
|
|
4354
|
+
relationshipType,
|
|
4355
|
+
routeFile: meta.routeFile
|
|
4356
|
+
});
|
|
4357
|
+
}
|
|
4358
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
4359
|
+
const results = candidates.slice(0, topK);
|
|
4360
|
+
return {
|
|
4361
|
+
sourceUrl: urlPath,
|
|
4362
|
+
scope: resolvedScope.scopeName,
|
|
4363
|
+
relatedPages: results
|
|
4364
|
+
};
|
|
4365
|
+
}
|
|
2907
4366
|
async health() {
|
|
2908
4367
|
return this.store.health();
|
|
2909
4368
|
}
|
|
@@ -2928,14 +4387,40 @@ function createServer(engine) {
|
|
|
2928
4387
|
server.registerTool(
|
|
2929
4388
|
"search",
|
|
2930
4389
|
{
|
|
2931
|
-
description:
|
|
4390
|
+
description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
|
|
2932
4391
|
inputSchema: {
|
|
2933
4392
|
query: z3.string().min(1),
|
|
2934
4393
|
scope: z3.string().optional(),
|
|
2935
4394
|
topK: z3.number().int().positive().max(100).optional(),
|
|
2936
4395
|
pathPrefix: z3.string().optional(),
|
|
2937
4396
|
tags: z3.array(z3.string()).optional(),
|
|
2938
|
-
|
|
4397
|
+
filters: z3.record(z3.string(), z3.union([z3.string(), z3.number(), z3.boolean()])).optional(),
|
|
4398
|
+
groupBy: z3.enum(["page", "chunk"]).optional(),
|
|
4399
|
+
maxSubResults: z3.number().int().positive().max(20).optional()
|
|
4400
|
+
},
|
|
4401
|
+
outputSchema: {
|
|
4402
|
+
q: z3.string(),
|
|
4403
|
+
scope: z3.string(),
|
|
4404
|
+
results: z3.array(z3.object({
|
|
4405
|
+
url: z3.string(),
|
|
4406
|
+
title: z3.string(),
|
|
4407
|
+
sectionTitle: z3.string().optional(),
|
|
4408
|
+
snippet: z3.string(),
|
|
4409
|
+
score: z3.number(),
|
|
4410
|
+
routeFile: z3.string(),
|
|
4411
|
+
chunks: z3.array(z3.object({
|
|
4412
|
+
sectionTitle: z3.string().optional(),
|
|
4413
|
+
snippet: z3.string(),
|
|
4414
|
+
headingPath: z3.array(z3.string()),
|
|
4415
|
+
score: z3.number()
|
|
4416
|
+
})).optional()
|
|
4417
|
+
})),
|
|
4418
|
+
meta: z3.object({
|
|
4419
|
+
timingsMs: z3.object({
|
|
4420
|
+
search: z3.number(),
|
|
4421
|
+
total: z3.number()
|
|
4422
|
+
})
|
|
4423
|
+
})
|
|
2939
4424
|
}
|
|
2940
4425
|
},
|
|
2941
4426
|
async (input) => {
|
|
@@ -2945,7 +4430,9 @@ function createServer(engine) {
|
|
|
2945
4430
|
scope: input.scope,
|
|
2946
4431
|
pathPrefix: input.pathPrefix,
|
|
2947
4432
|
tags: input.tags,
|
|
2948
|
-
|
|
4433
|
+
filters: input.filters,
|
|
4434
|
+
groupBy: input.groupBy,
|
|
4435
|
+
maxSubResults: input.maxSubResults
|
|
2949
4436
|
});
|
|
2950
4437
|
return {
|
|
2951
4438
|
content: [
|
|
@@ -2953,7 +4440,8 @@ function createServer(engine) {
|
|
|
2953
4440
|
type: "text",
|
|
2954
4441
|
text: JSON.stringify(result, null, 2)
|
|
2955
4442
|
}
|
|
2956
|
-
]
|
|
4443
|
+
],
|
|
4444
|
+
structuredContent: result
|
|
2957
4445
|
};
|
|
2958
4446
|
}
|
|
2959
4447
|
);
|
|
@@ -2978,8 +4466,134 @@ function createServer(engine) {
|
|
|
2978
4466
|
};
|
|
2979
4467
|
}
|
|
2980
4468
|
);
|
|
4469
|
+
server.registerTool(
|
|
4470
|
+
"list_pages",
|
|
4471
|
+
{
|
|
4472
|
+
description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
|
|
4473
|
+
inputSchema: {
|
|
4474
|
+
pathPrefix: z3.string().optional(),
|
|
4475
|
+
cursor: z3.string().optional(),
|
|
4476
|
+
limit: z3.number().int().positive().max(200).optional(),
|
|
4477
|
+
scope: z3.string().optional()
|
|
4478
|
+
}
|
|
4479
|
+
},
|
|
4480
|
+
async (input) => {
|
|
4481
|
+
const result = await engine.listPages({
|
|
4482
|
+
pathPrefix: input.pathPrefix,
|
|
4483
|
+
cursor: input.cursor,
|
|
4484
|
+
limit: input.limit,
|
|
4485
|
+
scope: input.scope
|
|
4486
|
+
});
|
|
4487
|
+
return {
|
|
4488
|
+
content: [
|
|
4489
|
+
{
|
|
4490
|
+
type: "text",
|
|
4491
|
+
text: JSON.stringify(result, null, 2)
|
|
4492
|
+
}
|
|
4493
|
+
]
|
|
4494
|
+
};
|
|
4495
|
+
}
|
|
4496
|
+
);
|
|
4497
|
+
server.registerTool(
|
|
4498
|
+
"get_site_structure",
|
|
4499
|
+
{
|
|
4500
|
+
description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
|
|
4501
|
+
inputSchema: {
|
|
4502
|
+
pathPrefix: z3.string().optional(),
|
|
4503
|
+
scope: z3.string().optional(),
|
|
4504
|
+
maxPages: z3.number().int().positive().max(2e3).optional()
|
|
4505
|
+
}
|
|
4506
|
+
},
|
|
4507
|
+
async (input) => {
|
|
4508
|
+
const result = await engine.getSiteStructure({
|
|
4509
|
+
pathPrefix: input.pathPrefix,
|
|
4510
|
+
scope: input.scope,
|
|
4511
|
+
maxPages: input.maxPages
|
|
4512
|
+
});
|
|
4513
|
+
return {
|
|
4514
|
+
content: [
|
|
4515
|
+
{
|
|
4516
|
+
type: "text",
|
|
4517
|
+
text: JSON.stringify(result, null, 2)
|
|
4518
|
+
}
|
|
4519
|
+
]
|
|
4520
|
+
};
|
|
4521
|
+
}
|
|
4522
|
+
);
|
|
4523
|
+
server.registerTool(
|
|
4524
|
+
"find_source_file",
|
|
4525
|
+
{
|
|
4526
|
+
description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
|
|
4527
|
+
inputSchema: {
|
|
4528
|
+
query: z3.string().min(1),
|
|
4529
|
+
scope: z3.string().optional()
|
|
4530
|
+
}
|
|
4531
|
+
},
|
|
4532
|
+
async (input) => {
|
|
4533
|
+
const result = await engine.search({
|
|
4534
|
+
q: input.query,
|
|
4535
|
+
topK: 1,
|
|
4536
|
+
scope: input.scope
|
|
4537
|
+
});
|
|
4538
|
+
if (result.results.length === 0) {
|
|
4539
|
+
return {
|
|
4540
|
+
content: [
|
|
4541
|
+
{
|
|
4542
|
+
type: "text",
|
|
4543
|
+
text: JSON.stringify({
|
|
4544
|
+
error: "No matching content found for the given query."
|
|
4545
|
+
})
|
|
4546
|
+
}
|
|
4547
|
+
]
|
|
4548
|
+
};
|
|
4549
|
+
}
|
|
4550
|
+
const match = result.results[0];
|
|
4551
|
+
const { url, routeFile, sectionTitle, snippet } = match;
|
|
4552
|
+
return {
|
|
4553
|
+
content: [
|
|
4554
|
+
{
|
|
4555
|
+
type: "text",
|
|
4556
|
+
text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
|
|
4557
|
+
}
|
|
4558
|
+
]
|
|
4559
|
+
};
|
|
4560
|
+
}
|
|
4561
|
+
);
|
|
4562
|
+
server.registerTool(
|
|
4563
|
+
"get_related_pages",
|
|
4564
|
+
{
|
|
4565
|
+
description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
|
|
4566
|
+
inputSchema: {
|
|
4567
|
+
pathOrUrl: z3.string().min(1),
|
|
4568
|
+
scope: z3.string().optional(),
|
|
4569
|
+
topK: z3.number().int().positive().max(25).optional()
|
|
4570
|
+
}
|
|
4571
|
+
},
|
|
4572
|
+
async (input) => {
|
|
4573
|
+
const result = await engine.getRelatedPages(input.pathOrUrl, {
|
|
4574
|
+
topK: input.topK,
|
|
4575
|
+
scope: input.scope
|
|
4576
|
+
});
|
|
4577
|
+
return {
|
|
4578
|
+
content: [
|
|
4579
|
+
{
|
|
4580
|
+
type: "text",
|
|
4581
|
+
text: JSON.stringify(result, null, 2)
|
|
4582
|
+
}
|
|
4583
|
+
]
|
|
4584
|
+
};
|
|
4585
|
+
}
|
|
4586
|
+
);
|
|
2981
4587
|
return server;
|
|
2982
4588
|
}
|
|
4589
|
+
function resolveApiKey(config) {
|
|
4590
|
+
return config.mcp.http.apiKey ?? (config.mcp.http.apiKeyEnv ? process.env[config.mcp.http.apiKeyEnv] : void 0);
|
|
4591
|
+
}
|
|
4592
|
+
function verifyApiKey(provided, expected) {
|
|
4593
|
+
const a = createHash2("sha256").update(provided).digest();
|
|
4594
|
+
const b = createHash2("sha256").update(expected).digest();
|
|
4595
|
+
return timingSafeEqual(a, b);
|
|
4596
|
+
}
|
|
2983
4597
|
function redirectConsoleToStderr() {
|
|
2984
4598
|
const originalLog = console.log;
|
|
2985
4599
|
console.log = (...args) => {
|
|
@@ -2996,89 +4610,945 @@ async function startHttpServer(serverFactory, config, opts) {
|
|
|
2996
4610
|
const app = createMcpExpressApp();
|
|
2997
4611
|
const port = opts.httpPort ?? config.mcp.http.port;
|
|
2998
4612
|
const endpointPath = opts.httpPath ?? config.mcp.http.path;
|
|
4613
|
+
const isPublic = config.mcp.access === "public";
|
|
4614
|
+
const host = isPublic ? "0.0.0.0" : "127.0.0.1";
|
|
4615
|
+
const apiKey = isPublic ? resolveApiKey(config) : void 0;
|
|
2999
4616
|
app.post(endpointPath, async (req, res) => {
|
|
4617
|
+
if (isPublic && apiKey) {
|
|
4618
|
+
const authHeader = req.headers["authorization"];
|
|
4619
|
+
const provided = (authHeader?.startsWith("Bearer ") ? authHeader.slice(7) : void 0) ?? req.headers["x-api-key"] ?? "";
|
|
4620
|
+
if (!provided || !verifyApiKey(provided, apiKey)) {
|
|
4621
|
+
res.status(401).json({
|
|
4622
|
+
jsonrpc: "2.0",
|
|
4623
|
+
error: { code: -32001, message: "Unauthorized" },
|
|
4624
|
+
id: null
|
|
4625
|
+
});
|
|
4626
|
+
return;
|
|
4627
|
+
}
|
|
4628
|
+
}
|
|
3000
4629
|
const server = serverFactory();
|
|
3001
4630
|
const transport = new StreamableHTTPServerTransport({
|
|
3002
4631
|
sessionIdGenerator: void 0
|
|
3003
4632
|
});
|
|
3004
4633
|
try {
|
|
3005
|
-
await server.connect(transport);
|
|
3006
|
-
await transport.handleRequest(req, res, req.body);
|
|
3007
|
-
res.on("close", () => {
|
|
3008
|
-
transport.close();
|
|
3009
|
-
server.close();
|
|
4634
|
+
await server.connect(transport);
|
|
4635
|
+
await transport.handleRequest(req, res, req.body);
|
|
4636
|
+
res.on("close", () => {
|
|
4637
|
+
transport.close();
|
|
4638
|
+
server.close();
|
|
4639
|
+
});
|
|
4640
|
+
} catch (error) {
|
|
4641
|
+
if (!res.headersSent) {
|
|
4642
|
+
res.status(500).json({
|
|
4643
|
+
jsonrpc: "2.0",
|
|
4644
|
+
error: {
|
|
4645
|
+
code: -32603,
|
|
4646
|
+
message: error instanceof Error ? error.message : "Internal server error"
|
|
4647
|
+
},
|
|
4648
|
+
id: null
|
|
4649
|
+
});
|
|
4650
|
+
}
|
|
4651
|
+
}
|
|
4652
|
+
});
|
|
4653
|
+
app.get(endpointPath, (_req, res) => {
|
|
4654
|
+
res.writeHead(405).end(
|
|
4655
|
+
JSON.stringify({
|
|
4656
|
+
jsonrpc: "2.0",
|
|
4657
|
+
error: {
|
|
4658
|
+
code: -32e3,
|
|
4659
|
+
message: "Method not allowed"
|
|
4660
|
+
},
|
|
4661
|
+
id: null
|
|
4662
|
+
})
|
|
4663
|
+
);
|
|
4664
|
+
});
|
|
4665
|
+
app.delete(endpointPath, (_req, res) => {
|
|
4666
|
+
res.writeHead(405).end(
|
|
4667
|
+
JSON.stringify({
|
|
4668
|
+
jsonrpc: "2.0",
|
|
4669
|
+
error: {
|
|
4670
|
+
code: -32e3,
|
|
4671
|
+
message: "Method not allowed"
|
|
4672
|
+
},
|
|
4673
|
+
id: null
|
|
4674
|
+
})
|
|
4675
|
+
);
|
|
4676
|
+
});
|
|
4677
|
+
await new Promise((resolve, reject) => {
|
|
4678
|
+
const instance = app.listen(port, host, () => {
|
|
4679
|
+
process.stderr.write(`SearchSocket MCP HTTP server listening on http://${host}:${port}${endpointPath}
|
|
4680
|
+
`);
|
|
4681
|
+
if (isPublic) {
|
|
4682
|
+
process.stderr.write("WARNING: Server is in public mode. Ensure HTTPS is configured via a reverse proxy for production use.\n");
|
|
4683
|
+
}
|
|
4684
|
+
resolve();
|
|
4685
|
+
});
|
|
4686
|
+
instance.once("error", reject);
|
|
4687
|
+
process.on("SIGINT", async () => {
|
|
4688
|
+
await new Promise((shutdownResolve) => instance.close(() => shutdownResolve()));
|
|
4689
|
+
process.exit(0);
|
|
4690
|
+
});
|
|
4691
|
+
});
|
|
4692
|
+
}
|
|
4693
|
+
async function runMcpServer(options = {}) {
|
|
4694
|
+
const config = await loadConfig({
|
|
4695
|
+
cwd: options.cwd,
|
|
4696
|
+
configPath: options.configPath
|
|
4697
|
+
});
|
|
4698
|
+
if (options.access) config.mcp.access = options.access;
|
|
4699
|
+
if (options.apiKey) config.mcp.http.apiKey = options.apiKey;
|
|
4700
|
+
if (config.mcp.access === "public" && !resolveApiKey(config)) {
|
|
4701
|
+
throw new Error(
|
|
4702
|
+
'MCP access is "public" but no API key is configured. Pass --api-key or set mcp.http.apiKey / mcp.http.apiKeyEnv in config.'
|
|
4703
|
+
);
|
|
4704
|
+
}
|
|
4705
|
+
const resolvedTransport = options.transport ?? config.mcp.transport;
|
|
4706
|
+
if (resolvedTransport === "stdio") {
|
|
4707
|
+
redirectConsoleToStderr();
|
|
4708
|
+
}
|
|
4709
|
+
const engine = await SearchEngine.create({
|
|
4710
|
+
cwd: options.cwd,
|
|
4711
|
+
configPath: options.configPath,
|
|
4712
|
+
config
|
|
4713
|
+
});
|
|
4714
|
+
if (resolvedTransport === "http") {
|
|
4715
|
+
await startHttpServer(() => createServer(engine), config, options);
|
|
4716
|
+
return;
|
|
4717
|
+
}
|
|
4718
|
+
const server = createServer(engine);
|
|
4719
|
+
const stdioTransport = new StdioServerTransport();
|
|
4720
|
+
await server.connect(stdioTransport);
|
|
4721
|
+
}
|
|
4722
|
+
|
|
4723
|
+
// src/playground/server.ts
|
|
4724
|
+
import express from "express";
|
|
4725
|
+
|
|
4726
|
+
// src/playground/playground.html
|
|
4727
|
+
var playground_default = `<!DOCTYPE html>
|
|
4728
|
+
<html lang="en">
|
|
4729
|
+
<head>
|
|
4730
|
+
<meta charset="utf-8">
|
|
4731
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
4732
|
+
<title>SearchSocket Playground</title>
|
|
4733
|
+
<style>
|
|
4734
|
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
|
4735
|
+
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f8f9fa; color: #1a1a2e; padding: 2rem; max-width: 900px; margin: 0 auto; }
|
|
4736
|
+
h1 { font-size: 1.5rem; margin-bottom: 1.5rem; color: #16213e; }
|
|
4737
|
+
h1 span { font-weight: 400; color: #888; }
|
|
4738
|
+
.search-box { display: flex; gap: 0.5rem; margin-bottom: 0.5rem; }
|
|
4739
|
+
.search-box input { flex: 1; padding: 0.75rem 1rem; font-size: 1rem; border: 2px solid #ddd; border-radius: 8px; outline: none; transition: border-color 0.2s; }
|
|
4740
|
+
.search-box input:focus { border-color: #4361ee; }
|
|
4741
|
+
.search-opts { display: flex; gap: 1rem; margin-bottom: 1.5rem; font-size: 0.85rem; color: #555; align-items: center; }
|
|
4742
|
+
.search-opts label { display: flex; align-items: center; gap: 0.3rem; cursor: pointer; }
|
|
4743
|
+
.search-opts select { padding: 0.25rem 0.5rem; border: 1px solid #ccc; border-radius: 4px; font-size: 0.85rem; }
|
|
4744
|
+
.meta { font-size: 0.8rem; color: #888; margin-bottom: 1rem; }
|
|
4745
|
+
.result { background: #fff; border: 1px solid #e0e0e0; border-radius: 8px; padding: 1rem 1.25rem; margin-bottom: 0.75rem; }
|
|
4746
|
+
.result-header { display: flex; justify-content: space-between; align-items: flex-start; gap: 1rem; }
|
|
4747
|
+
.result-title { font-size: 1.05rem; font-weight: 600; color: #16213e; text-decoration: none; }
|
|
4748
|
+
.result-title:hover { color: #4361ee; }
|
|
4749
|
+
.result-score { font-size: 0.8rem; font-weight: 600; color: #4361ee; white-space: nowrap; background: #eef1ff; padding: 0.2rem 0.5rem; border-radius: 4px; }
|
|
4750
|
+
.result-url { font-size: 0.8rem; color: #888; margin-top: 0.2rem; }
|
|
4751
|
+
.result-snippet { font-size: 0.9rem; color: #444; margin-top: 0.5rem; line-height: 1.5; }
|
|
4752
|
+
.result-meta { display: flex; gap: 0.75rem; flex-wrap: wrap; margin-top: 0.5rem; font-size: 0.78rem; color: #777; }
|
|
4753
|
+
.result-meta span { background: #f0f0f0; padding: 0.15rem 0.5rem; border-radius: 4px; }
|
|
4754
|
+
.breakdown { margin-top: 0.5rem; padding: 0.5rem 0.75rem; background: #f8f9fa; border-radius: 6px; font-size: 0.8rem; }
|
|
4755
|
+
.breakdown-row { display: flex; justify-content: space-between; padding: 0.15rem 0; }
|
|
4756
|
+
.breakdown-label { color: #555; }
|
|
4757
|
+
.breakdown-value { font-family: monospace; color: #333; }
|
|
4758
|
+
.chunks-toggle { font-size: 0.8rem; color: #4361ee; cursor: pointer; margin-top: 0.5rem; border: none; background: none; padding: 0; text-decoration: underline; }
|
|
4759
|
+
.chunks { margin-top: 0.5rem; padding-left: 1rem; border-left: 3px solid #e0e0e0; }
|
|
4760
|
+
.chunk { margin-bottom: 0.5rem; font-size: 0.85rem; }
|
|
4761
|
+
.chunk-heading { font-size: 0.78rem; color: #4361ee; margin-bottom: 0.15rem; }
|
|
4762
|
+
.chunk-score { font-size: 0.75rem; color: #999; }
|
|
4763
|
+
.chunk-snippet { color: #555; line-height: 1.4; }
|
|
4764
|
+
.empty { text-align: center; padding: 3rem; color: #999; }
|
|
4765
|
+
.loading { text-align: center; padding: 2rem; color: #999; }
|
|
4766
|
+
.hidden { display: none; }
|
|
4767
|
+
|
|
4768
|
+
/* Ranking Tuner */
|
|
4769
|
+
.tuner { margin-bottom: 1.5rem; border: 1px solid #e0e0e0; border-radius: 8px; background: #fff; }
|
|
4770
|
+
.tuner > summary { padding: 0.75rem 1rem; font-weight: 600; font-size: 0.95rem; cursor: pointer; color: #16213e; user-select: none; }
|
|
4771
|
+
.tuner > summary:hover { color: #4361ee; }
|
|
4772
|
+
.tuner-body { padding: 0.5rem 1rem 1rem; }
|
|
4773
|
+
.tuner-actions { display: flex; gap: 0.5rem; margin-bottom: 0.75rem; }
|
|
4774
|
+
.tuner-actions button { padding: 0.35rem 0.75rem; font-size: 0.8rem; border: 1px solid #ccc; border-radius: 4px; background: #fff; cursor: pointer; color: #555; }
|
|
4775
|
+
.tuner-actions button:hover { border-color: #4361ee; color: #4361ee; }
|
|
4776
|
+
.tuner-group { margin-bottom: 0.5rem; border: 1px solid #eee; border-radius: 6px; }
|
|
4777
|
+
.tuner-group > summary { padding: 0.5rem 0.75rem; font-size: 0.85rem; font-weight: 600; cursor: pointer; color: #444; user-select: none; }
|
|
4778
|
+
.tuner-group[open] { margin-bottom: 0.75rem; }
|
|
4779
|
+
.tuner-group-body { padding: 0.25rem 0.75rem 0.5rem; }
|
|
4780
|
+
.tuner-row { display: grid; grid-template-columns: 140px 1fr 70px 24px; gap: 8px; align-items: center; margin-bottom: 0.35rem; }
|
|
4781
|
+
.tuner-row label { font-size: 0.8rem; color: #555; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
|
|
4782
|
+
.tuner-row label.modified { color: #4361ee; font-weight: 600; }
|
|
4783
|
+
.tuner-row input[type="range"] { width: 100%; height: 6px; cursor: pointer; }
|
|
4784
|
+
.tuner-row input[type="number"] { width: 70px; padding: 0.2rem 0.35rem; font-size: 0.8rem; border: 1px solid #ccc; border-radius: 4px; text-align: right; }
|
|
4785
|
+
.tuner-row input[type="checkbox"] { width: 16px; height: 16px; cursor: pointer; }
|
|
4786
|
+
.tuner-row .reset-btn { width: 20px; height: 20px; border: none; background: none; cursor: pointer; color: #999; font-size: 1rem; padding: 0; line-height: 1; visibility: hidden; }
|
|
4787
|
+
.tuner-row .reset-btn.visible { visibility: visible; }
|
|
4788
|
+
.tuner-row .reset-btn:hover { color: #4361ee; }
|
|
4789
|
+
.tuner-bool-row { display: grid; grid-template-columns: 140px 1fr 24px; gap: 8px; align-items: center; margin-bottom: 0.35rem; }
|
|
4790
|
+
.tuner-bool-row label { font-size: 0.8rem; color: #555; }
|
|
4791
|
+
.tuner-bool-row label.modified { color: #4361ee; font-weight: 600; }
|
|
4792
|
+
.tuner-bool-row .reset-btn { width: 20px; height: 20px; border: none; background: none; cursor: pointer; color: #999; font-size: 1rem; padding: 0; line-height: 1; visibility: hidden; }
|
|
4793
|
+
.tuner-bool-row .reset-btn.visible { visibility: visible; }
|
|
4794
|
+
.tuner-export { margin-top: 0.75rem; }
|
|
4795
|
+
.tuner-export textarea { width: 100%; height: 120px; font-family: monospace; font-size: 0.8rem; padding: 0.5rem; border: 1px solid #ccc; border-radius: 6px; background: #f8f9fa; resize: vertical; }
|
|
4796
|
+
</style>
|
|
4797
|
+
</head>
|
|
4798
|
+
<body>
|
|
4799
|
+
<h1>SearchSocket <span>Playground</span></h1>
|
|
4800
|
+
<div class="search-box">
|
|
4801
|
+
<input type="text" id="q" placeholder="Type a search query..." autocomplete="off" autofocus>
|
|
4802
|
+
</div>
|
|
4803
|
+
<div class="search-opts">
|
|
4804
|
+
<label>
|
|
4805
|
+
Group by:
|
|
4806
|
+
<select id="groupBy">
|
|
4807
|
+
<option value="page">Page</option>
|
|
4808
|
+
<option value="chunk">Chunk</option>
|
|
4809
|
+
</select>
|
|
4810
|
+
</label>
|
|
4811
|
+
<label>
|
|
4812
|
+
Top K:
|
|
4813
|
+
<select id="topK">
|
|
4814
|
+
<option value="5">5</option>
|
|
4815
|
+
<option value="10" selected>10</option>
|
|
4816
|
+
<option value="20">20</option>
|
|
4817
|
+
<option value="50">50</option>
|
|
4818
|
+
</select>
|
|
4819
|
+
</label>
|
|
4820
|
+
</div>
|
|
4821
|
+
|
|
4822
|
+
<details class="tuner" id="tunerPanel">
|
|
4823
|
+
<summary>Ranking Tuner</summary>
|
|
4824
|
+
<div class="tuner-body">
|
|
4825
|
+
<div class="tuner-actions">
|
|
4826
|
+
<button id="resetAll" type="button">Reset All</button>
|
|
4827
|
+
<button id="exportConfig" type="button">Export Config</button>
|
|
4828
|
+
</div>
|
|
4829
|
+
<div id="tunerGroups"></div>
|
|
4830
|
+
<div class="tuner-export hidden" id="exportArea">
|
|
4831
|
+
<textarea id="exportText" readonly></textarea>
|
|
4832
|
+
</div>
|
|
4833
|
+
</div>
|
|
4834
|
+
</details>
|
|
4835
|
+
|
|
4836
|
+
<div id="meta" class="meta"></div>
|
|
4837
|
+
<div id="results"></div>
|
|
4838
|
+
|
|
4839
|
+
<script>
|
|
4840
|
+
(function() {
|
|
4841
|
+
var qInput = document.getElementById('q');
|
|
4842
|
+
var groupBySelect = document.getElementById('groupBy');
|
|
4843
|
+
var topKSelect = document.getElementById('topK');
|
|
4844
|
+
var resultsDiv = document.getElementById('results');
|
|
4845
|
+
var metaDiv = document.getElementById('meta');
|
|
4846
|
+
var tunerGroupsDiv = document.getElementById('tunerGroups');
|
|
4847
|
+
var exportArea = document.getElementById('exportArea');
|
|
4848
|
+
var exportText = document.getElementById('exportText');
|
|
4849
|
+
|
|
4850
|
+
var debounceTimer = null;
|
|
4851
|
+
var requestId = 0;
|
|
4852
|
+
var baselineConfig = null;
|
|
4853
|
+
var tunerParams = [];
|
|
4854
|
+
|
|
4855
|
+
var PARAM_DEFS = [
|
|
4856
|
+
{ group: 'Thresholds', key: 'ranking.minScoreRatio', label: 'minScoreRatio', min: 0, max: 1, step: 0.01 },
|
|
4857
|
+
{ group: 'Thresholds', key: 'ranking.scoreGapThreshold', label: 'scoreGapThreshold', min: 0, max: 1, step: 0.01 },
|
|
4858
|
+
{ group: 'Thresholds', key: 'ranking.minChunkScoreRatio', label: 'minChunkScoreRatio', min: 0, max: 1, step: 0.01 },
|
|
4859
|
+
{ group: 'Boosts', key: 'ranking.enableIncomingLinkBoost', label: 'incomingLinkBoost', type: 'bool' },
|
|
4860
|
+
{ group: 'Boosts', key: 'ranking.enableDepthBoost', label: 'depthBoost', type: 'bool' },
|
|
4861
|
+
{ group: 'Weights', key: 'ranking.weights.incomingLinks', label: 'incomingLinks', min: 0, max: 1, step: 0.01 },
|
|
4862
|
+
{ group: 'Weights', key: 'ranking.weights.depth', label: 'depth', min: 0, max: 1, step: 0.01 },
|
|
4863
|
+
{ group: 'Weights', key: 'ranking.weights.aggregation', label: 'aggregation', min: 0, max: 1, step: 0.01 },
|
|
4864
|
+
{ group: 'Weights', key: 'ranking.weights.titleMatch', label: 'titleMatch', min: 0, max: 1, step: 0.01 },
|
|
4865
|
+
{ group: 'Aggregation', key: 'ranking.aggregationCap', label: 'aggregationCap', min: 1, max: 20, step: 1 },
|
|
4866
|
+
{ group: 'Aggregation', key: 'ranking.aggregationDecay', label: 'aggregationDecay', min: 0, max: 1, step: 0.01 },
|
|
4867
|
+
{ group: 'Search', key: 'search.pageSearchWeight', label: 'pageSearchWeight', min: 0, max: 1, step: 0.01 }
|
|
4868
|
+
];
|
|
4869
|
+
|
|
4870
|
+
function getNestedValue(obj, path) {
|
|
4871
|
+
var parts = path.split('.');
|
|
4872
|
+
var v = obj;
|
|
4873
|
+
for (var i = 0; i < parts.length; i++) {
|
|
4874
|
+
if (v == null) return undefined;
|
|
4875
|
+
v = v[parts[i]];
|
|
4876
|
+
}
|
|
4877
|
+
return v;
|
|
4878
|
+
}
|
|
4879
|
+
|
|
4880
|
+
function setNestedValue(obj, path, value) {
|
|
4881
|
+
var parts = path.split('.');
|
|
4882
|
+
var cur = obj;
|
|
4883
|
+
for (var i = 0; i < parts.length - 1; i++) {
|
|
4884
|
+
if (!cur[parts[i]]) cur[parts[i]] = {};
|
|
4885
|
+
cur = cur[parts[i]];
|
|
4886
|
+
}
|
|
4887
|
+
cur[parts[parts.length - 1]] = value;
|
|
4888
|
+
}
|
|
4889
|
+
|
|
4890
|
+
function initTuner(config) {
|
|
4891
|
+
baselineConfig = config;
|
|
4892
|
+
var groups = {};
|
|
4893
|
+
PARAM_DEFS.forEach(function(def) {
|
|
4894
|
+
if (!groups[def.group]) groups[def.group] = [];
|
|
4895
|
+
groups[def.group].push(def);
|
|
4896
|
+
});
|
|
4897
|
+
|
|
4898
|
+
var html = '';
|
|
4899
|
+
Object.keys(groups).forEach(function(groupName) {
|
|
4900
|
+
html += '<details class="tuner-group" open>';
|
|
4901
|
+
html += '<summary>' + groupName + '</summary>';
|
|
4902
|
+
html += '<div class="tuner-group-body">';
|
|
4903
|
+
groups[groupName].forEach(function(def) {
|
|
4904
|
+
var val = getNestedValue(config, def.key);
|
|
4905
|
+
if (def.type === 'bool') {
|
|
4906
|
+
html += '<div class="tuner-bool-row" data-key="' + def.key + '">';
|
|
4907
|
+
html += '<label>' + def.label + '</label>';
|
|
4908
|
+
html += '<input type="checkbox"' + (val ? ' checked' : '') + ' data-param="' + def.key + '">';
|
|
4909
|
+
html += '<button class="reset-btn" title="Reset" data-reset="' + def.key + '">\\u21BA</button>';
|
|
4910
|
+
html += '</div>';
|
|
4911
|
+
} else {
|
|
4912
|
+
html += '<div class="tuner-row" data-key="' + def.key + '">';
|
|
4913
|
+
html += '<label>' + def.label + '</label>';
|
|
4914
|
+
html += '<input type="range" min="' + def.min + '" max="' + def.max + '" step="' + def.step + '" value="' + val + '" data-param="' + def.key + '">';
|
|
4915
|
+
html += '<input type="number" min="' + def.min + '" max="' + def.max + '" step="' + def.step + '" value="' + val + '" data-num="' + def.key + '">';
|
|
4916
|
+
html += '<button class="reset-btn" title="Reset" data-reset="' + def.key + '">\\u21BA</button>';
|
|
4917
|
+
html += '</div>';
|
|
4918
|
+
}
|
|
4919
|
+
});
|
|
4920
|
+
html += '</div></details>';
|
|
4921
|
+
});
|
|
4922
|
+
tunerGroupsDiv.innerHTML = html;
|
|
4923
|
+
|
|
4924
|
+
// Wire events
|
|
4925
|
+
tunerGroupsDiv.addEventListener('input', function(e) {
|
|
4926
|
+
var param = e.target.getAttribute('data-param');
|
|
4927
|
+
var num = e.target.getAttribute('data-num');
|
|
4928
|
+
if (param) {
|
|
4929
|
+
// Slider or checkbox changed \u2014 sync number input
|
|
4930
|
+
var row = e.target.closest('[data-key]');
|
|
4931
|
+
if (row && e.target.type === 'range') {
|
|
4932
|
+
var numInput = row.querySelector('[data-num]');
|
|
4933
|
+
if (numInput) numInput.value = e.target.value;
|
|
4934
|
+
}
|
|
4935
|
+
updateModifiedState(param);
|
|
4936
|
+
scheduleSearch();
|
|
4937
|
+
} else if (num) {
|
|
4938
|
+
// Number input changed \u2014 sync slider
|
|
4939
|
+
var row = e.target.closest('[data-key]');
|
|
4940
|
+
if (row) {
|
|
4941
|
+
var rangeInput = row.querySelector('[data-param]');
|
|
4942
|
+
if (rangeInput) rangeInput.value = e.target.value;
|
|
4943
|
+
}
|
|
4944
|
+
updateModifiedState(num);
|
|
4945
|
+
scheduleSearch();
|
|
4946
|
+
}
|
|
4947
|
+
});
|
|
4948
|
+
|
|
4949
|
+
tunerGroupsDiv.addEventListener('change', function(e) {
|
|
4950
|
+
var param = e.target.getAttribute('data-param');
|
|
4951
|
+
if (param && e.target.type === 'checkbox') {
|
|
4952
|
+
updateModifiedState(param);
|
|
4953
|
+
scheduleSearch();
|
|
4954
|
+
}
|
|
4955
|
+
});
|
|
4956
|
+
|
|
4957
|
+
tunerGroupsDiv.addEventListener('click', function(e) {
|
|
4958
|
+
var resetKey = e.target.getAttribute('data-reset');
|
|
4959
|
+
if (resetKey) {
|
|
4960
|
+
resetParam(resetKey);
|
|
4961
|
+
scheduleSearch();
|
|
4962
|
+
}
|
|
4963
|
+
});
|
|
4964
|
+
}
|
|
4965
|
+
|
|
4966
|
+
function updateModifiedState(key) {
|
|
4967
|
+
var baseline = getNestedValue(baselineConfig, key);
|
|
4968
|
+
var row = tunerGroupsDiv.querySelector('[data-key="' + key + '"]');
|
|
4969
|
+
if (!row) return;
|
|
4970
|
+
var input = row.querySelector('[data-param="' + key + '"]');
|
|
4971
|
+
if (!input) return;
|
|
4972
|
+
var current = input.type === 'checkbox' ? input.checked : parseFloat(input.value);
|
|
4973
|
+
var isModified = current !== baseline;
|
|
4974
|
+
var label = row.querySelector('label');
|
|
4975
|
+
var resetBtn = row.querySelector('.reset-btn');
|
|
4976
|
+
if (label) label.classList.toggle('modified', isModified);
|
|
4977
|
+
if (resetBtn) resetBtn.classList.toggle('visible', isModified);
|
|
4978
|
+
}
|
|
4979
|
+
|
|
4980
|
+
function resetParam(key) {
|
|
4981
|
+
var baseline = getNestedValue(baselineConfig, key);
|
|
4982
|
+
var row = tunerGroupsDiv.querySelector('[data-key="' + key + '"]');
|
|
4983
|
+
if (!row) return;
|
|
4984
|
+
var input = row.querySelector('[data-param="' + key + '"]');
|
|
4985
|
+
if (!input) return;
|
|
4986
|
+
if (input.type === 'checkbox') {
|
|
4987
|
+
input.checked = baseline;
|
|
4988
|
+
} else {
|
|
4989
|
+
input.value = baseline;
|
|
4990
|
+
var numInput = row.querySelector('[data-num]');
|
|
4991
|
+
if (numInput) numInput.value = baseline;
|
|
4992
|
+
}
|
|
4993
|
+
updateModifiedState(key);
|
|
4994
|
+
}
|
|
4995
|
+
|
|
4996
|
+
function resetAll() {
|
|
4997
|
+
PARAM_DEFS.forEach(function(def) {
|
|
4998
|
+
resetParam(def.key);
|
|
4999
|
+
});
|
|
5000
|
+
exportArea.classList.add('hidden');
|
|
5001
|
+
scheduleSearch();
|
|
5002
|
+
}
|
|
5003
|
+
|
|
5004
|
+
function collectOverrides() {
|
|
5005
|
+
var overrides = {};
|
|
5006
|
+
PARAM_DEFS.forEach(function(def) {
|
|
5007
|
+
var row = tunerGroupsDiv.querySelector('[data-key="' + def.key + '"]');
|
|
5008
|
+
if (!row) return;
|
|
5009
|
+
var input = row.querySelector('[data-param="' + def.key + '"]');
|
|
5010
|
+
if (!input) return;
|
|
5011
|
+
var val = def.type === 'bool' ? input.checked : parseFloat(input.value);
|
|
5012
|
+
setNestedValue(overrides, def.key, val);
|
|
5013
|
+
});
|
|
5014
|
+
return overrides;
|
|
5015
|
+
}
|
|
5016
|
+
|
|
5017
|
+
function collectChangedOverrides() {
|
|
5018
|
+
var overrides = {};
|
|
5019
|
+
var hasChanges = false;
|
|
5020
|
+
PARAM_DEFS.forEach(function(def) {
|
|
5021
|
+
var row = tunerGroupsDiv.querySelector('[data-key="' + def.key + '"]');
|
|
5022
|
+
if (!row) return;
|
|
5023
|
+
var input = row.querySelector('[data-param="' + def.key + '"]');
|
|
5024
|
+
if (!input) return;
|
|
5025
|
+
var current = def.type === 'bool' ? input.checked : parseFloat(input.value);
|
|
5026
|
+
var baseline = getNestedValue(baselineConfig, def.key);
|
|
5027
|
+
if (current !== baseline) {
|
|
5028
|
+
setNestedValue(overrides, def.key, current);
|
|
5029
|
+
hasChanges = true;
|
|
5030
|
+
}
|
|
5031
|
+
});
|
|
5032
|
+
return hasChanges ? overrides : null;
|
|
5033
|
+
}
|
|
5034
|
+
|
|
5035
|
+
function exportConfig() {
|
|
5036
|
+
var changed = collectChangedOverrides();
|
|
5037
|
+
if (!changed) {
|
|
5038
|
+
exportArea.classList.remove('hidden');
|
|
5039
|
+
exportText.value = '// No parameters have been changed from defaults.';
|
|
5040
|
+
return;
|
|
5041
|
+
}
|
|
5042
|
+
|
|
5043
|
+
var lines = [];
|
|
5044
|
+
if (changed.ranking) {
|
|
5045
|
+
lines.push('ranking: {');
|
|
5046
|
+
var r = changed.ranking;
|
|
5047
|
+
var simpleKeys = ['enableIncomingLinkBoost', 'enableDepthBoost', 'aggregationCap', 'aggregationDecay', 'minChunkScoreRatio', 'minScoreRatio', 'scoreGapThreshold'];
|
|
5048
|
+
simpleKeys.forEach(function(k) {
|
|
5049
|
+
if (r[k] !== undefined) lines.push(' ' + k + ': ' + JSON.stringify(r[k]) + ',');
|
|
5050
|
+
});
|
|
5051
|
+
if (r.weights) {
|
|
5052
|
+
lines.push(' weights: {');
|
|
5053
|
+
Object.keys(r.weights).forEach(function(wk) {
|
|
5054
|
+
lines.push(' ' + wk + ': ' + r.weights[wk] + ',');
|
|
5055
|
+
});
|
|
5056
|
+
lines.push(' },');
|
|
5057
|
+
}
|
|
5058
|
+
lines.push('},');
|
|
5059
|
+
}
|
|
5060
|
+
if (changed.search) {
|
|
5061
|
+
lines.push('search: {');
|
|
5062
|
+
Object.keys(changed.search).forEach(function(sk) {
|
|
5063
|
+
lines.push(' ' + sk + ': ' + changed.search[sk] + ',');
|
|
5064
|
+
});
|
|
5065
|
+
lines.push('},');
|
|
5066
|
+
}
|
|
5067
|
+
|
|
5068
|
+
exportArea.classList.remove('hidden');
|
|
5069
|
+
exportText.value = lines.join('\\n');
|
|
5070
|
+
}
|
|
5071
|
+
|
|
5072
|
+
// Read initial state from URL
|
|
5073
|
+
var params = new URLSearchParams(window.location.search);
|
|
5074
|
+
if (params.get('q')) qInput.value = params.get('q');
|
|
5075
|
+
if (params.get('groupBy')) groupBySelect.value = params.get('groupBy');
|
|
5076
|
+
if (params.get('topK')) topKSelect.value = params.get('topK');
|
|
5077
|
+
|
|
5078
|
+
function updateUrl() {
|
|
5079
|
+
var p = new URLSearchParams();
|
|
5080
|
+
if (qInput.value) p.set('q', qInput.value);
|
|
5081
|
+
if (groupBySelect.value !== 'page') p.set('groupBy', groupBySelect.value);
|
|
5082
|
+
if (topKSelect.value !== '10') p.set('topK', topKSelect.value);
|
|
5083
|
+
var qs = p.toString();
|
|
5084
|
+
history.replaceState(null, '', qs ? '?' + qs : window.location.pathname);
|
|
5085
|
+
}
|
|
5086
|
+
|
|
5087
|
+
function doSearch() {
|
|
5088
|
+
var query = qInput.value.trim();
|
|
5089
|
+
updateUrl();
|
|
5090
|
+
if (!query) {
|
|
5091
|
+
resultsDiv.innerHTML = '<div class="empty">Enter a query to search</div>';
|
|
5092
|
+
metaDiv.textContent = '';
|
|
5093
|
+
return;
|
|
5094
|
+
}
|
|
5095
|
+
|
|
5096
|
+
resultsDiv.innerHTML = '<div class="loading">Searching...</div>';
|
|
5097
|
+
|
|
5098
|
+
var thisRequestId = ++requestId;
|
|
5099
|
+
var body = {
|
|
5100
|
+
q: query,
|
|
5101
|
+
topK: parseInt(topKSelect.value, 10),
|
|
5102
|
+
groupBy: groupBySelect.value,
|
|
5103
|
+
debug: true
|
|
5104
|
+
};
|
|
5105
|
+
|
|
5106
|
+
if (baselineConfig) {
|
|
5107
|
+
body.rankingOverrides = collectOverrides();
|
|
5108
|
+
}
|
|
5109
|
+
|
|
5110
|
+
fetch('/_searchsocket/search', {
|
|
5111
|
+
method: 'POST',
|
|
5112
|
+
headers: { 'Content-Type': 'application/json' },
|
|
5113
|
+
body: JSON.stringify(body)
|
|
5114
|
+
}).then(function(res) {
|
|
5115
|
+
if (thisRequestId !== requestId) return;
|
|
5116
|
+
if (!res.ok) {
|
|
5117
|
+
return res.text().then(function(err) {
|
|
5118
|
+
resultsDiv.innerHTML = '<div class="empty">Error: ' + escapeHtml(err) + '</div>';
|
|
5119
|
+
});
|
|
5120
|
+
}
|
|
5121
|
+
return res.json().then(function(data) {
|
|
5122
|
+
if (thisRequestId !== requestId) return;
|
|
5123
|
+
renderResults(data);
|
|
5124
|
+
});
|
|
5125
|
+
}).catch(function(err) {
|
|
5126
|
+
if (thisRequestId !== requestId) return;
|
|
5127
|
+
resultsDiv.innerHTML = '<div class="empty">Network error: ' + escapeHtml(err.message) + '</div>';
|
|
5128
|
+
});
|
|
5129
|
+
}
|
|
5130
|
+
|
|
5131
|
+
function escapeHtml(str) {
|
|
5132
|
+
var d = document.createElement('div');
|
|
5133
|
+
d.textContent = str;
|
|
5134
|
+
return d.innerHTML;
|
|
5135
|
+
}
|
|
5136
|
+
|
|
5137
|
+
function renderResults(data) {
|
|
5138
|
+
metaDiv.textContent = data.results.length + ' results in ' + data.meta.timingsMs.total + 'ms (search: ' + data.meta.timingsMs.search + 'ms) \\u2014 scope: ' + data.scope;
|
|
5139
|
+
|
|
5140
|
+
if (data.results.length === 0) {
|
|
5141
|
+
resultsDiv.innerHTML = '<div class="empty">No results found</div>';
|
|
5142
|
+
return;
|
|
5143
|
+
}
|
|
5144
|
+
|
|
5145
|
+
resultsDiv.innerHTML = data.results.map(function(r, i) {
|
|
5146
|
+
var html = '<div class="result">';
|
|
5147
|
+
html += '<div class="result-header">';
|
|
5148
|
+
html += '<div><div class="result-title">' + escapeHtml(r.title) + '</div>';
|
|
5149
|
+
html += '<div class="result-url">' + escapeHtml(r.url) + '</div></div>';
|
|
5150
|
+
html += '<div class="result-score">' + r.score.toFixed(4) + '</div>';
|
|
5151
|
+
html += '</div>';
|
|
5152
|
+
|
|
5153
|
+
if (r.snippet) {
|
|
5154
|
+
html += '<div class="result-snippet">' + escapeHtml(r.snippet) + '</div>';
|
|
5155
|
+
}
|
|
5156
|
+
|
|
5157
|
+
html += '<div class="result-meta">';
|
|
5158
|
+
html += '<span>route: ' + escapeHtml(r.routeFile) + '</span>';
|
|
5159
|
+
if (r.sectionTitle) html += '<span>section: ' + escapeHtml(r.sectionTitle) + '</span>';
|
|
5160
|
+
html += '</div>';
|
|
5161
|
+
|
|
5162
|
+
if (r.breakdown) {
|
|
5163
|
+
html += '<div class="breakdown">';
|
|
5164
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Base score</span><span class="breakdown-value">' + r.breakdown.baseScore.toFixed(6) + '</span></div>';
|
|
5165
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Incoming link boost</span><span class="breakdown-value">' + r.breakdown.incomingLinkBoost.toFixed(6) + '</span></div>';
|
|
5166
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Depth boost</span><span class="breakdown-value">' + r.breakdown.depthBoost.toFixed(6) + '</span></div>';
|
|
5167
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Title match boost</span><span class="breakdown-value">' + r.breakdown.titleMatchBoost.toFixed(6) + '</span></div>';
|
|
5168
|
+
html += '<div class="breakdown-row"><span class="breakdown-label">Anchor text boost</span><span class="breakdown-value">' + (r.breakdown.anchorTextMatchBoost || 0).toFixed(6) + '</span></div>';
|
|
5169
|
+
html += '</div>';
|
|
5170
|
+
}
|
|
5171
|
+
|
|
5172
|
+
if (r.chunks && r.chunks.length > 0) {
|
|
5173
|
+
html += '<button class="chunks-toggle" data-idx="' + i + '">Show ' + r.chunks.length + ' chunks</button>';
|
|
5174
|
+
html += '<div class="chunks hidden" id="chunks-' + i + '">';
|
|
5175
|
+
r.chunks.forEach(function(c) {
|
|
5176
|
+
html += '<div class="chunk">';
|
|
5177
|
+
if (c.headingPath && c.headingPath.length > 0) {
|
|
5178
|
+
html += '<div class="chunk-heading">' + escapeHtml(c.headingPath.join(' > ')) + '</div>';
|
|
5179
|
+
}
|
|
5180
|
+
if (c.sectionTitle) {
|
|
5181
|
+
html += '<div class="chunk-heading">' + escapeHtml(c.sectionTitle) + '</div>';
|
|
5182
|
+
}
|
|
5183
|
+
html += '<div class="chunk-snippet">' + escapeHtml(c.snippet) + '</div>';
|
|
5184
|
+
html += '<div class="chunk-score">score: ' + c.score.toFixed(4) + '</div>';
|
|
5185
|
+
html += '</div>';
|
|
5186
|
+
});
|
|
5187
|
+
html += '</div>';
|
|
5188
|
+
}
|
|
5189
|
+
|
|
5190
|
+
html += '</div>';
|
|
5191
|
+
return html;
|
|
5192
|
+
}).join('');
|
|
5193
|
+
}
|
|
5194
|
+
|
|
5195
|
+
resultsDiv.addEventListener('click', function(e) {
|
|
5196
|
+
if (e.target.classList.contains('chunks-toggle')) {
|
|
5197
|
+
var idx = e.target.getAttribute('data-idx');
|
|
5198
|
+
var chunksDiv = document.getElementById('chunks-' + idx);
|
|
5199
|
+
if (chunksDiv) {
|
|
5200
|
+
chunksDiv.classList.toggle('hidden');
|
|
5201
|
+
e.target.textContent = chunksDiv.classList.contains('hidden')
|
|
5202
|
+
? 'Show ' + chunksDiv.children.length + ' chunks'
|
|
5203
|
+
: 'Hide chunks';
|
|
5204
|
+
}
|
|
5205
|
+
}
|
|
5206
|
+
});
|
|
5207
|
+
|
|
5208
|
+
function scheduleSearch() {
|
|
5209
|
+
clearTimeout(debounceTimer);
|
|
5210
|
+
debounceTimer = setTimeout(doSearch, 300);
|
|
5211
|
+
}
|
|
5212
|
+
|
|
5213
|
+
qInput.addEventListener('input', scheduleSearch);
|
|
5214
|
+
groupBySelect.addEventListener('change', scheduleSearch);
|
|
5215
|
+
topKSelect.addEventListener('change', scheduleSearch);
|
|
5216
|
+
|
|
5217
|
+
document.getElementById('resetAll').addEventListener('click', resetAll);
|
|
5218
|
+
document.getElementById('exportConfig').addEventListener('click', exportConfig);
|
|
5219
|
+
|
|
5220
|
+
// Fetch config and initialize tuner
|
|
5221
|
+
fetch('/_searchsocket/config').then(function(res) {
|
|
5222
|
+
if (res.ok) return res.json();
|
|
5223
|
+
return null;
|
|
5224
|
+
}).then(function(config) {
|
|
5225
|
+
if (config) initTuner(config);
|
|
5226
|
+
}).catch(function() {
|
|
5227
|
+
// Config endpoint not available \u2014 tuner stays empty
|
|
5228
|
+
});
|
|
5229
|
+
|
|
5230
|
+
// Trigger initial search if query is present
|
|
5231
|
+
if (qInput.value.trim()) doSearch();
|
|
5232
|
+
})();
|
|
5233
|
+
</script>
|
|
5234
|
+
</body>
|
|
5235
|
+
</html>
|
|
5236
|
+
`;
|
|
5237
|
+
|
|
5238
|
+
// src/playground/server.ts
|
|
5239
|
+
async function runPlaygroundServer(options) {
|
|
5240
|
+
const config = options.config ?? await loadConfig({
|
|
5241
|
+
cwd: options.cwd,
|
|
5242
|
+
configPath: options.configPath
|
|
5243
|
+
});
|
|
5244
|
+
let enginePromise = null;
|
|
5245
|
+
function getEngine() {
|
|
5246
|
+
if (!enginePromise) {
|
|
5247
|
+
enginePromise = SearchEngine.create({
|
|
5248
|
+
cwd: options.cwd,
|
|
5249
|
+
configPath: options.configPath,
|
|
5250
|
+
config
|
|
5251
|
+
});
|
|
5252
|
+
}
|
|
5253
|
+
return enginePromise;
|
|
5254
|
+
}
|
|
5255
|
+
const app = express();
|
|
5256
|
+
app.use(express.json());
|
|
5257
|
+
app.get("/_searchsocket", (_req, res) => {
|
|
5258
|
+
res.type("html").send(playground_default);
|
|
5259
|
+
});
|
|
5260
|
+
app.get("/_searchsocket/config", (_req, res) => {
|
|
5261
|
+
res.json({
|
|
5262
|
+
ranking: {
|
|
5263
|
+
enableIncomingLinkBoost: config.ranking.enableIncomingLinkBoost,
|
|
5264
|
+
enableDepthBoost: config.ranking.enableDepthBoost,
|
|
5265
|
+
aggregationCap: config.ranking.aggregationCap,
|
|
5266
|
+
aggregationDecay: config.ranking.aggregationDecay,
|
|
5267
|
+
minChunkScoreRatio: config.ranking.minChunkScoreRatio,
|
|
5268
|
+
minScoreRatio: config.ranking.minScoreRatio,
|
|
5269
|
+
scoreGapThreshold: config.ranking.scoreGapThreshold,
|
|
5270
|
+
weights: { ...config.ranking.weights }
|
|
5271
|
+
},
|
|
5272
|
+
search: {
|
|
5273
|
+
pageSearchWeight: config.search.pageSearchWeight
|
|
5274
|
+
}
|
|
5275
|
+
});
|
|
5276
|
+
});
|
|
5277
|
+
app.post("/_searchsocket/search", async (req, res) => {
|
|
5278
|
+
try {
|
|
5279
|
+
const searchEngine = await getEngine();
|
|
5280
|
+
const body = req.body;
|
|
5281
|
+
if (!body || typeof body.q !== "string" || body.q.trim().length === 0) {
|
|
5282
|
+
res.status(400).json({ error: "Missing or empty 'q' field" });
|
|
5283
|
+
return;
|
|
5284
|
+
}
|
|
5285
|
+
const result = await searchEngine.search({
|
|
5286
|
+
q: body.q,
|
|
5287
|
+
topK: typeof body.topK === "number" ? body.topK : void 0,
|
|
5288
|
+
scope: typeof body.scope === "string" ? body.scope : void 0,
|
|
5289
|
+
pathPrefix: typeof body.pathPrefix === "string" ? body.pathPrefix : void 0,
|
|
5290
|
+
tags: Array.isArray(body.tags) ? body.tags : void 0,
|
|
5291
|
+
groupBy: body.groupBy === "page" || body.groupBy === "chunk" ? body.groupBy : void 0,
|
|
5292
|
+
debug: body.debug === true,
|
|
5293
|
+
rankingOverrides: body.rankingOverrides && typeof body.rankingOverrides === "object" ? body.rankingOverrides : void 0
|
|
3010
5294
|
});
|
|
5295
|
+
res.json(result);
|
|
3011
5296
|
} catch (error) {
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
error: {
|
|
3016
|
-
code: -32603,
|
|
3017
|
-
message: error instanceof Error ? error.message : "Internal server error"
|
|
3018
|
-
},
|
|
3019
|
-
id: null
|
|
3020
|
-
});
|
|
3021
|
-
}
|
|
5297
|
+
const message = error instanceof Error ? error.message : "Internal server error";
|
|
5298
|
+
const status = error.statusCode ?? 500;
|
|
5299
|
+
res.status(status).json({ error: message });
|
|
3022
5300
|
}
|
|
3023
5301
|
});
|
|
3024
|
-
|
|
3025
|
-
|
|
3026
|
-
|
|
3027
|
-
|
|
3028
|
-
|
|
3029
|
-
|
|
3030
|
-
|
|
3031
|
-
|
|
3032
|
-
|
|
3033
|
-
|
|
3034
|
-
|
|
3035
|
-
|
|
3036
|
-
|
|
3037
|
-
|
|
3038
|
-
|
|
3039
|
-
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3043
|
-
},
|
|
3044
|
-
id: null
|
|
3045
|
-
})
|
|
3046
|
-
);
|
|
3047
|
-
});
|
|
3048
|
-
await new Promise((resolve, reject) => {
|
|
3049
|
-
const instance = app.listen(port, "127.0.0.1", () => {
|
|
3050
|
-
process.stderr.write(`SearchSocket MCP HTTP server listening on http://127.0.0.1:${port}${endpointPath}
|
|
3051
|
-
`);
|
|
3052
|
-
resolve();
|
|
5302
|
+
const preferredPort = options.port ?? 3337;
|
|
5303
|
+
function startServer(port) {
|
|
5304
|
+
return new Promise((resolve, reject) => {
|
|
5305
|
+
let httpServer;
|
|
5306
|
+
const onListening = () => {
|
|
5307
|
+
const addr = httpServer.address();
|
|
5308
|
+
resolve({
|
|
5309
|
+
port: addr.port,
|
|
5310
|
+
close: () => new Promise((r) => httpServer.close(() => r()))
|
|
5311
|
+
});
|
|
5312
|
+
};
|
|
5313
|
+
httpServer = app.listen(port, "127.0.0.1", onListening);
|
|
5314
|
+
httpServer.once("error", (err) => {
|
|
5315
|
+
if (err.code === "EADDRINUSE" && port !== 0) {
|
|
5316
|
+
startServer(0).then(resolve, reject);
|
|
5317
|
+
} else {
|
|
5318
|
+
reject(err);
|
|
5319
|
+
}
|
|
5320
|
+
});
|
|
3053
5321
|
});
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
5322
|
+
}
|
|
5323
|
+
return startServer(preferredPort);
|
|
5324
|
+
}
|
|
5325
|
+
|
|
5326
|
+
// src/search/quality-metrics.ts
|
|
5327
|
+
function reciprocalRank(results, relevant) {
|
|
5328
|
+
const set = new Set(relevant);
|
|
5329
|
+
for (let i = 0; i < results.length; i++) {
|
|
5330
|
+
if (set.has(results[i].url)) {
|
|
5331
|
+
return 1 / (i + 1);
|
|
5332
|
+
}
|
|
5333
|
+
}
|
|
5334
|
+
return 0;
|
|
5335
|
+
}
|
|
5336
|
+
function mrr(queries) {
|
|
5337
|
+
if (queries.length === 0) return 0;
|
|
5338
|
+
const sum = queries.reduce((acc, q) => acc + reciprocalRank(q.results, q.relevant), 0);
|
|
5339
|
+
return sum / queries.length;
|
|
5340
|
+
}
|
|
5341
|
+
|
|
5342
|
+
// src/cli/test-schemas.ts
|
|
5343
|
+
import { z as z4 } from "zod";
|
|
5344
|
+
var testCaseSchema = z4.object({
|
|
5345
|
+
query: z4.string().min(1),
|
|
5346
|
+
expect: z4.object({
|
|
5347
|
+
topResult: z4.string().optional(),
|
|
5348
|
+
inTop5: z4.array(z4.string()).min(1).optional(),
|
|
5349
|
+
maxResults: z4.number().int().nonnegative().optional()
|
|
5350
|
+
}).refine(
|
|
5351
|
+
(e) => e.topResult !== void 0 || e.inTop5 !== void 0 || e.maxResults !== void 0,
|
|
5352
|
+
{ message: "expect must contain at least one of topResult, inTop5, or maxResults" }
|
|
5353
|
+
)
|
|
5354
|
+
});
|
|
5355
|
+
var testFileSchema = z4.array(testCaseSchema).min(1, "test file must contain at least one test case");
|
|
5356
|
+
|
|
5357
|
+
// src/cli.ts
|
|
5358
|
+
import * as clack from "@clack/prompts";
|
|
5359
|
+
|
|
5360
|
+
// src/init-helpers.ts
|
|
5361
|
+
import fs9 from "fs";
|
|
5362
|
+
import path13 from "path";
|
|
5363
|
+
import { parseModule, generateCode, builders } from "magicast";
|
|
5364
|
+
function ensureMcpJson(cwd) {
|
|
5365
|
+
const mcpPath = path13.join(cwd, ".mcp.json");
|
|
5366
|
+
const entry = {
|
|
5367
|
+
command: "npx",
|
|
5368
|
+
args: ["searchsocket", "mcp"],
|
|
5369
|
+
env: {
|
|
5370
|
+
UPSTASH_VECTOR_REST_URL: "${UPSTASH_VECTOR_REST_URL}",
|
|
5371
|
+
UPSTASH_VECTOR_REST_TOKEN: "${UPSTASH_VECTOR_REST_TOKEN}"
|
|
5372
|
+
}
|
|
5373
|
+
};
|
|
5374
|
+
let existing = {};
|
|
5375
|
+
if (fs9.existsSync(mcpPath)) {
|
|
5376
|
+
try {
|
|
5377
|
+
const raw = fs9.readFileSync(mcpPath, "utf8");
|
|
5378
|
+
existing = JSON.parse(raw);
|
|
5379
|
+
} catch {
|
|
5380
|
+
process.stderr.write("warning: .mcp.json exists but could not be parsed \u2014 skipping\n");
|
|
5381
|
+
return;
|
|
5382
|
+
}
|
|
5383
|
+
}
|
|
5384
|
+
const raw_servers = existing.mcpServers ?? {};
|
|
5385
|
+
const servers = typeof raw_servers === "object" && !Array.isArray(raw_servers) ? raw_servers : {};
|
|
5386
|
+
if (JSON.stringify(servers["searchsocket"]) === JSON.stringify(entry)) {
|
|
5387
|
+
return;
|
|
5388
|
+
}
|
|
5389
|
+
existing.mcpServers = { ...servers, searchsocket: entry };
|
|
5390
|
+
fs9.writeFileSync(mcpPath, JSON.stringify(existing, null, 2) + "\n", "utf8");
|
|
5391
|
+
}
|
|
5392
|
+
var HOOKS_SNIPPET = `import { searchsocketHandle } from "searchsocket/sveltekit";
|
|
5393
|
+
|
|
5394
|
+
export const handle = searchsocketHandle();`;
|
|
5395
|
+
var VITE_PLUGIN_SNIPPET = `import { searchsocketVitePlugin } from "searchsocket/sveltekit";
|
|
5396
|
+
|
|
5397
|
+
// Add to your Vite config plugins array:
|
|
5398
|
+
// plugins: [sveltekit(), searchsocketVitePlugin()]`;
|
|
5399
|
+
function injectHooksServerTs(cwd) {
|
|
5400
|
+
const hooksDir = path13.join(cwd, "src");
|
|
5401
|
+
const tsPath = path13.join(hooksDir, "hooks.server.ts");
|
|
5402
|
+
const jsPath = path13.join(hooksDir, "hooks.server.js");
|
|
5403
|
+
const hooksPath = fs9.existsSync(tsPath) ? tsPath : fs9.existsSync(jsPath) ? jsPath : null;
|
|
5404
|
+
if (!hooksPath) {
|
|
5405
|
+
fs9.mkdirSync(hooksDir, { recursive: true });
|
|
5406
|
+
fs9.writeFileSync(tsPath, HOOKS_SNIPPET + "\n", "utf8");
|
|
5407
|
+
return "created";
|
|
5408
|
+
}
|
|
5409
|
+
const original = fs9.readFileSync(hooksPath, "utf8");
|
|
5410
|
+
if (original.includes("searchsocketHandle")) {
|
|
5411
|
+
return "already-present";
|
|
5412
|
+
}
|
|
5413
|
+
try {
|
|
5414
|
+
const mod = parseModule(original);
|
|
5415
|
+
mod.imports.$append({
|
|
5416
|
+
from: "searchsocket/sveltekit",
|
|
5417
|
+
imported: "searchsocketHandle"
|
|
3058
5418
|
});
|
|
3059
|
-
|
|
5419
|
+
if (mod.exports.handle) {
|
|
5420
|
+
mod.imports.$append({
|
|
5421
|
+
from: "@sveltejs/kit/hooks",
|
|
5422
|
+
imported: "sequence"
|
|
5423
|
+
});
|
|
5424
|
+
const existingHandle = mod.exports.handle;
|
|
5425
|
+
mod.exports.handle = builders.functionCall(
|
|
5426
|
+
"sequence",
|
|
5427
|
+
builders.functionCall("searchsocketHandle"),
|
|
5428
|
+
existingHandle
|
|
5429
|
+
);
|
|
5430
|
+
const { code: code2 } = generateCode(mod);
|
|
5431
|
+
fs9.writeFileSync(hooksPath, code2, "utf8");
|
|
5432
|
+
return "composed";
|
|
5433
|
+
}
|
|
5434
|
+
mod.exports.handle = builders.functionCall("searchsocketHandle");
|
|
5435
|
+
const { code } = generateCode(mod);
|
|
5436
|
+
fs9.writeFileSync(hooksPath, code, "utf8");
|
|
5437
|
+
return "injected";
|
|
5438
|
+
} catch {
|
|
5439
|
+
return "fallback";
|
|
5440
|
+
}
|
|
3060
5441
|
}
|
|
3061
|
-
|
|
3062
|
-
const
|
|
3063
|
-
|
|
3064
|
-
|
|
3065
|
-
|
|
3066
|
-
|
|
3067
|
-
if (resolvedTransport === "stdio") {
|
|
3068
|
-
redirectConsoleToStderr();
|
|
5442
|
+
function injectViteConfig(cwd) {
|
|
5443
|
+
const tsPath = path13.join(cwd, "vite.config.ts");
|
|
5444
|
+
const jsPath = path13.join(cwd, "vite.config.js");
|
|
5445
|
+
const configPath = fs9.existsSync(tsPath) ? tsPath : fs9.existsSync(jsPath) ? jsPath : null;
|
|
5446
|
+
if (!configPath) {
|
|
5447
|
+
return "no-config";
|
|
3069
5448
|
}
|
|
3070
|
-
const
|
|
3071
|
-
|
|
3072
|
-
|
|
3073
|
-
|
|
3074
|
-
|
|
3075
|
-
|
|
3076
|
-
|
|
5449
|
+
const original = fs9.readFileSync(configPath, "utf8");
|
|
5450
|
+
if (original.includes("searchsocketVitePlugin")) {
|
|
5451
|
+
return "already-present";
|
|
5452
|
+
}
|
|
5453
|
+
try {
|
|
5454
|
+
const mod = parseModule(original);
|
|
5455
|
+
mod.imports.$append({
|
|
5456
|
+
from: "searchsocket/sveltekit",
|
|
5457
|
+
imported: "searchsocketVitePlugin"
|
|
5458
|
+
});
|
|
5459
|
+
let config = mod.exports.default;
|
|
5460
|
+
if (!config) {
|
|
5461
|
+
return "fallback";
|
|
5462
|
+
}
|
|
5463
|
+
if (config.$type === "function-call") {
|
|
5464
|
+
config = config.$args[0];
|
|
5465
|
+
}
|
|
5466
|
+
if (!config.plugins) {
|
|
5467
|
+
config.plugins = [builders.functionCall("searchsocketVitePlugin")];
|
|
5468
|
+
} else {
|
|
5469
|
+
config.plugins.push(builders.functionCall("searchsocketVitePlugin"));
|
|
5470
|
+
}
|
|
5471
|
+
const { code } = generateCode(mod);
|
|
5472
|
+
fs9.writeFileSync(configPath, code, "utf8");
|
|
5473
|
+
return "injected";
|
|
5474
|
+
} catch {
|
|
5475
|
+
return "fallback";
|
|
5476
|
+
}
|
|
5477
|
+
}
|
|
5478
|
+
function writeEnvFile(cwd, url, token) {
|
|
5479
|
+
const envPath = path13.join(cwd, ".env");
|
|
5480
|
+
let content = "";
|
|
5481
|
+
if (fs9.existsSync(envPath)) {
|
|
5482
|
+
content = fs9.readFileSync(envPath, "utf8");
|
|
5483
|
+
}
|
|
5484
|
+
const lines = [];
|
|
5485
|
+
if (!content.includes("UPSTASH_VECTOR_REST_URL=")) {
|
|
5486
|
+
lines.push(`UPSTASH_VECTOR_REST_URL=${url}`);
|
|
5487
|
+
}
|
|
5488
|
+
if (!content.includes("UPSTASH_VECTOR_REST_TOKEN=")) {
|
|
5489
|
+
lines.push(`UPSTASH_VECTOR_REST_TOKEN=${token}`);
|
|
5490
|
+
}
|
|
5491
|
+
if (lines.length > 0) {
|
|
5492
|
+
const suffix = content.length > 0 && !content.endsWith("\n") ? "\n" : "";
|
|
5493
|
+
fs9.writeFileSync(envPath, content + suffix + lines.join("\n") + "\n", "utf8");
|
|
5494
|
+
}
|
|
5495
|
+
ensureGitignoreEntry(cwd, ".env");
|
|
5496
|
+
}
|
|
5497
|
+
function ensureGitignoreEntry(cwd, entry) {
|
|
5498
|
+
const gitignorePath = path13.join(cwd, ".gitignore");
|
|
5499
|
+
let content = "";
|
|
5500
|
+
if (fs9.existsSync(gitignorePath)) {
|
|
5501
|
+
content = fs9.readFileSync(gitignorePath, "utf8");
|
|
5502
|
+
}
|
|
5503
|
+
const lines = content.split("\n");
|
|
5504
|
+
if (lines.some((line) => line.trim() === entry)) {
|
|
3077
5505
|
return;
|
|
3078
5506
|
}
|
|
3079
|
-
const
|
|
3080
|
-
|
|
3081
|
-
|
|
5507
|
+
const suffix = content.length > 0 && !content.endsWith("\n") ? "\n" : "";
|
|
5508
|
+
fs9.writeFileSync(gitignorePath, content + suffix + entry + "\n", "utf8");
|
|
5509
|
+
}
|
|
5510
|
+
|
|
5511
|
+
// src/add-helpers.ts
|
|
5512
|
+
import fs10 from "fs";
|
|
5513
|
+
import fsp from "fs/promises";
|
|
5514
|
+
import path14 from "path";
|
|
5515
|
+
import { fileURLToPath } from "url";
|
|
5516
|
+
var __dirname = path14.dirname(fileURLToPath(import.meta.url));
|
|
5517
|
+
var AVAILABLE_COMPONENTS = ["search-dialog", "search-input", "search-results"];
|
|
5518
|
+
function resolveTemplateDir() {
|
|
5519
|
+
return path14.resolve(__dirname, "templates");
|
|
5520
|
+
}
|
|
5521
|
+
function listAvailableComponents() {
|
|
5522
|
+
return [...AVAILABLE_COMPONENTS];
|
|
5523
|
+
}
|
|
5524
|
+
function isValidComponent(name) {
|
|
5525
|
+
return AVAILABLE_COMPONENTS.includes(name);
|
|
5526
|
+
}
|
|
5527
|
+
async function copyComponent(name, targetDir, options = {}) {
|
|
5528
|
+
const templateDir = path14.join(resolveTemplateDir(), name);
|
|
5529
|
+
if (!fs10.existsSync(templateDir)) {
|
|
5530
|
+
throw new Error(
|
|
5531
|
+
`Template directory not found: ${templateDir}. Run "pnpm run build" to generate templates.`
|
|
5532
|
+
);
|
|
5533
|
+
}
|
|
5534
|
+
const entries = await fsp.readdir(templateDir);
|
|
5535
|
+
const svelteFiles = entries.filter((f) => f.endsWith(".svelte"));
|
|
5536
|
+
if (svelteFiles.length === 0) {
|
|
5537
|
+
throw new Error(`No .svelte files found in template: ${name}`);
|
|
5538
|
+
}
|
|
5539
|
+
await fsp.mkdir(targetDir, { recursive: true });
|
|
5540
|
+
const written = [];
|
|
5541
|
+
const skipped = [];
|
|
5542
|
+
for (const file of svelteFiles) {
|
|
5543
|
+
const dest = path14.join(targetDir, file);
|
|
5544
|
+
if (fs10.existsSync(dest) && !options.overwrite) {
|
|
5545
|
+
skipped.push(dest);
|
|
5546
|
+
continue;
|
|
5547
|
+
}
|
|
5548
|
+
await fsp.copyFile(path14.join(templateDir, file), dest);
|
|
5549
|
+
written.push(dest);
|
|
5550
|
+
}
|
|
5551
|
+
return { written, skipped };
|
|
3082
5552
|
}
|
|
3083
5553
|
|
|
3084
5554
|
// src/cli.ts
|
|
@@ -3117,6 +5587,10 @@ function parseDurationMs(value) {
|
|
|
3117
5587
|
}
|
|
3118
5588
|
function printIndexSummary(stats) {
|
|
3119
5589
|
process.stdout.write(`pages processed: ${stats.pagesProcessed}
|
|
5590
|
+
`);
|
|
5591
|
+
process.stdout.write(`pages changed: ${stats.pagesChanged}
|
|
5592
|
+
`);
|
|
5593
|
+
process.stdout.write(`pages deleted: ${stats.pagesDeleted}
|
|
3120
5594
|
`);
|
|
3121
5595
|
process.stdout.write(`chunks total: ${stats.chunksTotal}
|
|
3122
5596
|
`);
|
|
@@ -3138,7 +5612,7 @@ function collectWatchPaths(config, cwd) {
|
|
|
3138
5612
|
const paths = ["src/routes/**"];
|
|
3139
5613
|
if (config.source.mode === "content-files" && config.source.contentFiles) {
|
|
3140
5614
|
for (const pattern of config.source.contentFiles.globs) {
|
|
3141
|
-
paths.push(
|
|
5615
|
+
paths.push(path15.join(config.source.contentFiles.baseDir, pattern));
|
|
3142
5616
|
}
|
|
3143
5617
|
}
|
|
3144
5618
|
if (config.source.mode === "static-output") {
|
|
@@ -3151,22 +5625,22 @@ function collectWatchPaths(config, cwd) {
|
|
|
3151
5625
|
paths.push("searchsocket.config.ts");
|
|
3152
5626
|
paths.push(config.source.build.outputDir);
|
|
3153
5627
|
}
|
|
3154
|
-
return paths.map((value) =>
|
|
5628
|
+
return paths.map((value) => path15.resolve(cwd, value));
|
|
3155
5629
|
}
|
|
3156
5630
|
function ensureStateDir(cwd) {
|
|
3157
|
-
const target =
|
|
3158
|
-
|
|
5631
|
+
const target = path15.join(cwd, ".searchsocket");
|
|
5632
|
+
fs11.mkdirSync(target, { recursive: true });
|
|
3159
5633
|
return target;
|
|
3160
5634
|
}
|
|
3161
5635
|
function ensureGitignore(cwd) {
|
|
3162
|
-
const gitignorePath =
|
|
5636
|
+
const gitignorePath = path15.join(cwd, ".gitignore");
|
|
3163
5637
|
const entries = [
|
|
3164
5638
|
".searchsocket/manifest.json",
|
|
3165
5639
|
".searchsocket/registry.json"
|
|
3166
5640
|
];
|
|
3167
5641
|
let content = "";
|
|
3168
|
-
if (
|
|
3169
|
-
content =
|
|
5642
|
+
if (fs11.existsSync(gitignorePath)) {
|
|
5643
|
+
content = fs11.readFileSync(gitignorePath, "utf8");
|
|
3170
5644
|
}
|
|
3171
5645
|
const lines = content.split("\n");
|
|
3172
5646
|
const missing = entries.filter((entry) => !lines.some((line) => line.trim() === entry));
|
|
@@ -3177,10 +5651,10 @@ function ensureGitignore(cwd) {
|
|
|
3177
5651
|
# SearchSocket local state
|
|
3178
5652
|
${missing.join("\n")}
|
|
3179
5653
|
`;
|
|
3180
|
-
|
|
5654
|
+
fs11.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
|
|
3181
5655
|
}
|
|
3182
5656
|
function readScopesFromFile(filePath) {
|
|
3183
|
-
const raw =
|
|
5657
|
+
const raw = fs11.readFileSync(filePath, "utf8");
|
|
3184
5658
|
return new Set(
|
|
3185
5659
|
raw.split(/\r?\n/).map((line) => line.trim()).filter(Boolean)
|
|
3186
5660
|
);
|
|
@@ -3204,8 +5678,8 @@ function readRemoteGitBranches(cwd) {
|
|
|
3204
5678
|
}
|
|
3205
5679
|
}
|
|
3206
5680
|
async function loadResolvedConfigForDev(cwd, configPath) {
|
|
3207
|
-
const resolvedConfigPath =
|
|
3208
|
-
if (
|
|
5681
|
+
const resolvedConfigPath = path15.resolve(cwd, configPath ?? "searchsocket.config.ts");
|
|
5682
|
+
if (fs11.existsSync(resolvedConfigPath)) {
|
|
3209
5683
|
return loadConfig({ cwd, configPath });
|
|
3210
5684
|
}
|
|
3211
5685
|
return mergeConfig(cwd, {});
|
|
@@ -3248,31 +5722,157 @@ async function runIndexCommand(opts) {
|
|
|
3248
5722
|
printIndexSummary(stats);
|
|
3249
5723
|
}
|
|
3250
5724
|
}
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
const
|
|
5725
|
+
async function runInteractiveInit(cwd) {
|
|
5726
|
+
clack.intro("searchsocket setup");
|
|
5727
|
+
const s = clack.spinner();
|
|
5728
|
+
s.start("Creating config files");
|
|
5729
|
+
const configPath = writeMinimalConfig(cwd);
|
|
5730
|
+
ensureStateDir(cwd);
|
|
5731
|
+
ensureGitignore(cwd);
|
|
5732
|
+
ensureMcpJson(cwd);
|
|
5733
|
+
s.stop("Config files created");
|
|
5734
|
+
const hasUrl = Boolean(process.env.UPSTASH_VECTOR_REST_URL);
|
|
5735
|
+
const hasToken = Boolean(process.env.UPSTASH_VECTOR_REST_TOKEN);
|
|
5736
|
+
if (!hasUrl || !hasToken) {
|
|
5737
|
+
clack.log.warn("Upstash Search credentials not found in environment.");
|
|
5738
|
+
const shouldConfigure = await clack.confirm({
|
|
5739
|
+
message: "Would you like to configure Upstash credentials now?",
|
|
5740
|
+
initialValue: true
|
|
5741
|
+
});
|
|
5742
|
+
if (clack.isCancel(shouldConfigure)) {
|
|
5743
|
+
clack.cancel("Setup cancelled.");
|
|
5744
|
+
process.exit(0);
|
|
5745
|
+
}
|
|
5746
|
+
if (shouldConfigure) {
|
|
5747
|
+
const url = hasUrl ? process.env.UPSTASH_VECTOR_REST_URL : await clack.text({
|
|
5748
|
+
message: "Upstash Search REST URL:",
|
|
5749
|
+
placeholder: "https://your-index.upstash.io",
|
|
5750
|
+
validate: (v) => !v ? "URL is required" : void 0
|
|
5751
|
+
});
|
|
5752
|
+
if (clack.isCancel(url)) {
|
|
5753
|
+
clack.cancel("Setup cancelled.");
|
|
5754
|
+
process.exit(0);
|
|
5755
|
+
}
|
|
5756
|
+
const token = hasToken ? process.env.UPSTASH_VECTOR_REST_TOKEN : await clack.text({
|
|
5757
|
+
message: "Upstash Search REST Token:",
|
|
5758
|
+
placeholder: "AX...",
|
|
5759
|
+
validate: (v) => !v ? "Token is required" : void 0
|
|
5760
|
+
});
|
|
5761
|
+
if (clack.isCancel(token)) {
|
|
5762
|
+
clack.cancel("Setup cancelled.");
|
|
5763
|
+
process.exit(0);
|
|
5764
|
+
}
|
|
5765
|
+
writeEnvFile(cwd, url, token);
|
|
5766
|
+
clack.log.success("Credentials written to .env");
|
|
5767
|
+
}
|
|
5768
|
+
} else {
|
|
5769
|
+
clack.log.success("Upstash credentials found in environment.");
|
|
5770
|
+
}
|
|
5771
|
+
s.start("Configuring hooks.server.ts");
|
|
5772
|
+
const hookResult = injectHooksServerTs(cwd);
|
|
5773
|
+
s.stop("hooks.server.ts configured");
|
|
5774
|
+
switch (hookResult) {
|
|
5775
|
+
case "created":
|
|
5776
|
+
clack.log.success("Created src/hooks.server.ts with searchsocketHandle.");
|
|
5777
|
+
break;
|
|
5778
|
+
case "injected":
|
|
5779
|
+
clack.log.success("Added searchsocketHandle to src/hooks.server.ts.");
|
|
5780
|
+
break;
|
|
5781
|
+
case "composed":
|
|
5782
|
+
clack.log.success("Composed searchsocketHandle with existing handle using sequence().");
|
|
5783
|
+
break;
|
|
5784
|
+
case "already-present":
|
|
5785
|
+
clack.log.info("searchsocketHandle already configured in hooks.server.ts.");
|
|
5786
|
+
break;
|
|
5787
|
+
case "fallback":
|
|
5788
|
+
clack.log.warn("Could not auto-inject hooks.server.ts. Add manually:");
|
|
5789
|
+
clack.log.message(HOOKS_SNIPPET);
|
|
5790
|
+
break;
|
|
5791
|
+
}
|
|
5792
|
+
s.start("Configuring Vite plugin");
|
|
5793
|
+
const viteResult = injectViteConfig(cwd);
|
|
5794
|
+
s.stop("Vite plugin configured");
|
|
5795
|
+
switch (viteResult) {
|
|
5796
|
+
case "injected":
|
|
5797
|
+
clack.log.success("Added searchsocketVitePlugin to Vite config.");
|
|
5798
|
+
break;
|
|
5799
|
+
case "already-present":
|
|
5800
|
+
clack.log.info("searchsocketVitePlugin already in Vite config.");
|
|
5801
|
+
break;
|
|
5802
|
+
case "no-config":
|
|
5803
|
+
clack.log.warn("No vite.config.ts/js found. Add the plugin manually:");
|
|
5804
|
+
clack.log.message(VITE_PLUGIN_SNIPPET);
|
|
5805
|
+
break;
|
|
5806
|
+
case "fallback":
|
|
5807
|
+
clack.log.warn("Could not auto-inject Vite config. Add manually:");
|
|
5808
|
+
clack.log.message(VITE_PLUGIN_SNIPPET);
|
|
5809
|
+
break;
|
|
5810
|
+
}
|
|
5811
|
+
clack.log.info("Run `searchsocket doctor` to verify your setup.");
|
|
5812
|
+
clack.outro("SearchSocket initialized! Run `searchsocket index` to index your site.");
|
|
5813
|
+
}
|
|
5814
|
+
async function runSilentInit(cwd) {
|
|
3256
5815
|
const configPath = writeMinimalConfig(cwd);
|
|
3257
5816
|
const stateDir = ensureStateDir(cwd);
|
|
3258
5817
|
ensureGitignore(cwd);
|
|
5818
|
+
ensureMcpJson(cwd);
|
|
3259
5819
|
process.stdout.write(`created/verified config: ${configPath}
|
|
3260
5820
|
`);
|
|
3261
5821
|
process.stdout.write(`created/verified state dir: ${stateDir}
|
|
3262
|
-
|
|
3263
5822
|
`);
|
|
3264
|
-
process.stdout.write("
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
|
|
5823
|
+
process.stdout.write("created/verified .mcp.json (MCP server config for Claude Code)\n\n");
|
|
5824
|
+
const hookResult = injectHooksServerTs(cwd);
|
|
5825
|
+
switch (hookResult) {
|
|
5826
|
+
case "created":
|
|
5827
|
+
process.stdout.write("created src/hooks.server.ts with searchsocketHandle\n");
|
|
5828
|
+
break;
|
|
5829
|
+
case "injected":
|
|
5830
|
+
process.stdout.write("added searchsocketHandle to src/hooks.server.ts\n");
|
|
5831
|
+
break;
|
|
5832
|
+
case "composed":
|
|
5833
|
+
process.stdout.write("composed searchsocketHandle with existing handle via sequence()\n");
|
|
5834
|
+
break;
|
|
5835
|
+
case "already-present":
|
|
5836
|
+
process.stdout.write("searchsocketHandle already present in hooks.server.ts\n");
|
|
5837
|
+
break;
|
|
5838
|
+
case "fallback":
|
|
5839
|
+
process.stdout.write("could not auto-inject hooks.server.ts \u2014 add manually:\n\n");
|
|
5840
|
+
process.stdout.write(HOOKS_SNIPPET + "\n\n");
|
|
5841
|
+
break;
|
|
5842
|
+
}
|
|
5843
|
+
const viteResult = injectViteConfig(cwd);
|
|
5844
|
+
switch (viteResult) {
|
|
5845
|
+
case "injected":
|
|
5846
|
+
process.stdout.write("added searchsocketVitePlugin to Vite config\n");
|
|
5847
|
+
break;
|
|
5848
|
+
case "already-present":
|
|
5849
|
+
process.stdout.write("searchsocketVitePlugin already in Vite config\n");
|
|
5850
|
+
break;
|
|
5851
|
+
case "no-config":
|
|
5852
|
+
process.stdout.write("no vite.config.ts/js found \u2014 add plugin manually:\n\n");
|
|
5853
|
+
process.stdout.write(VITE_PLUGIN_SNIPPET + "\n\n");
|
|
5854
|
+
break;
|
|
5855
|
+
case "fallback":
|
|
5856
|
+
process.stdout.write("could not auto-inject Vite config \u2014 add manually:\n\n");
|
|
5857
|
+
process.stdout.write(VITE_PLUGIN_SNIPPET + "\n\n");
|
|
5858
|
+
break;
|
|
5859
|
+
}
|
|
5860
|
+
}
|
|
5861
|
+
var program = new Command();
|
|
5862
|
+
program.name("searchsocket").description("Semantic site search and MCP retrieval for SvelteKit").version(package_default.version).option("-C, --cwd <path>", "working directory", process.cwd()).option("--config <path>", "config path (defaults to searchsocket.config.ts)");
|
|
5863
|
+
program.command("init").description("Initialize SearchSocket in a SvelteKit project").option("--non-interactive", "skip interactive prompts").action(async (opts, command) => {
|
|
5864
|
+
const root = getRootOptions(command).cwd ?? process.cwd();
|
|
5865
|
+
const cwd = path15.resolve(root);
|
|
5866
|
+
const isInteractive = Boolean(process.stdout.isTTY) && !opts.nonInteractive;
|
|
5867
|
+
if (isInteractive) {
|
|
5868
|
+
await runInteractiveInit(cwd);
|
|
5869
|
+
} else {
|
|
5870
|
+
await runSilentInit(cwd);
|
|
5871
|
+
}
|
|
3272
5872
|
});
|
|
3273
5873
|
program.command("index").description("Index site content into Upstash Search").option("--scope <name>", "scope override").option("--changed-only", "only process changed chunks", true).option("--no-changed-only", "re-index regardless of previous manifest").option("--force", "force full rebuild", false).option("--dry-run", "compute plan, no writes", false).option("--source <mode>", "source mode override: static-output|crawl|content-files|build").option("--max-pages <n>", "limit pages processed").option("--max-chunks <n>", "limit chunks processed").option("--quiet", "suppress all output except errors and warnings", false).option("--verbose", "verbose output", false).option("--json", "emit JSON logs and summary", false).action(async (opts, command) => {
|
|
3274
5874
|
const rootOpts = getRootOptions(command);
|
|
3275
|
-
const cwd =
|
|
5875
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3276
5876
|
await runIndexCommand({
|
|
3277
5877
|
cwd,
|
|
3278
5878
|
configPath: rootOpts?.config,
|
|
@@ -3290,7 +5890,7 @@ program.command("index").description("Index site content into Upstash Search").o
|
|
|
3290
5890
|
});
|
|
3291
5891
|
program.command("status").description("Show scope, indexing state, and backend health").option("--scope <name>", "scope override").action(async (opts, command) => {
|
|
3292
5892
|
const rootOpts = getRootOptions(command);
|
|
3293
|
-
const cwd =
|
|
5893
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3294
5894
|
const config = await loadConfig({ cwd, configPath: rootOpts?.config });
|
|
3295
5895
|
const scope = resolveScope(config, opts.scope);
|
|
3296
5896
|
let store;
|
|
@@ -3329,7 +5929,7 @@ program.command("status").description("Show scope, indexing state, and backend h
|
|
|
3329
5929
|
`);
|
|
3330
5930
|
process.stdout.write(`resolved scope: ${scope.scopeName}
|
|
3331
5931
|
`);
|
|
3332
|
-
process.stdout.write(`backend: upstash-
|
|
5932
|
+
process.stdout.write(`backend: upstash-vector
|
|
3333
5933
|
`);
|
|
3334
5934
|
process.stdout.write(`backend health: ${health.ok ? "ok" : `error (${health.details ?? "n/a"})`}
|
|
3335
5935
|
`);
|
|
@@ -3354,19 +5954,31 @@ program.command("status").description("Show scope, indexing state, and backend h
|
|
|
3354
5954
|
}
|
|
3355
5955
|
}
|
|
3356
5956
|
});
|
|
3357
|
-
program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
|
|
5957
|
+
program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--playground", "serve playground UI at /_searchsocket (default: true)", true).option("--no-playground", "disable playground UI").option("--playground-port <n>", "playground HTTP port", "3337").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
|
|
3358
5958
|
const rootOpts = getRootOptions(command);
|
|
3359
|
-
const cwd =
|
|
5959
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3360
5960
|
const config = await loadResolvedConfigForDev(cwd, rootOpts?.config);
|
|
3361
5961
|
const watchPaths = collectWatchPaths(config, cwd);
|
|
3362
5962
|
process.stdout.write("starting searchsocket dev watcher...\n");
|
|
3363
5963
|
process.stdout.write(`watching:
|
|
3364
5964
|
${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
|
|
3365
5965
|
`);
|
|
5966
|
+
const upstashUrl = config.upstash.url ?? process.env[config.upstash.urlEnv];
|
|
5967
|
+
const upstashToken = config.upstash.token ?? process.env[config.upstash.tokenEnv];
|
|
5968
|
+
const backendMissing = !upstashUrl || !upstashToken;
|
|
5969
|
+
if (backendMissing) {
|
|
5970
|
+
process.stdout.write(
|
|
5971
|
+
`Search backend not configured \u2014 set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} to enable indexing. Watching for file changes only.
|
|
5972
|
+
`
|
|
5973
|
+
);
|
|
5974
|
+
}
|
|
3366
5975
|
let running = false;
|
|
3367
5976
|
let pending = false;
|
|
3368
5977
|
let timer = null;
|
|
3369
5978
|
const run = async () => {
|
|
5979
|
+
if (backendMissing) {
|
|
5980
|
+
return;
|
|
5981
|
+
}
|
|
3370
5982
|
if (running) {
|
|
3371
5983
|
pending = true;
|
|
3372
5984
|
return;
|
|
@@ -3417,18 +6029,40 @@ ${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
|
|
|
3417
6029
|
httpPath: opts.mcpPath
|
|
3418
6030
|
});
|
|
3419
6031
|
}
|
|
6032
|
+
let closePlayground;
|
|
6033
|
+
if (opts.playground) {
|
|
6034
|
+
if (backendMissing) {
|
|
6035
|
+
process.stdout.write("playground disabled: search backend not configured\n");
|
|
6036
|
+
} else {
|
|
6037
|
+
void runPlaygroundServer({
|
|
6038
|
+
cwd,
|
|
6039
|
+
configPath: rootOpts?.config,
|
|
6040
|
+
config,
|
|
6041
|
+
port: parsePositiveInt(opts.playgroundPort, "--playground-port")
|
|
6042
|
+
}).then(({ port, close }) => {
|
|
6043
|
+
closePlayground = close;
|
|
6044
|
+
process.stdout.write(`playground available at http://127.0.0.1:${port}/_searchsocket
|
|
6045
|
+
`);
|
|
6046
|
+
}).catch((err) => {
|
|
6047
|
+
process.stderr.write(`playground error: ${err instanceof Error ? err.message : String(err)}
|
|
6048
|
+
`);
|
|
6049
|
+
});
|
|
6050
|
+
}
|
|
6051
|
+
}
|
|
3420
6052
|
await new Promise((resolve) => {
|
|
3421
6053
|
process.on("SIGINT", () => {
|
|
3422
|
-
|
|
6054
|
+
const cleanups = [watcher.close()];
|
|
6055
|
+
if (closePlayground) cleanups.push(closePlayground());
|
|
6056
|
+
void Promise.all(cleanups).then(() => resolve());
|
|
3423
6057
|
});
|
|
3424
6058
|
});
|
|
3425
6059
|
});
|
|
3426
6060
|
program.command("clean").description("Delete local state and optionally delete remote indexes for a scope").option("--scope <name>", "scope override").option("--remote", "delete remote scope indexes", false).action(async (opts, command) => {
|
|
3427
6061
|
const rootOpts = getRootOptions(command);
|
|
3428
|
-
const cwd =
|
|
6062
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3429
6063
|
const config = await loadConfig({ cwd, configPath: rootOpts?.config });
|
|
3430
|
-
const statePath =
|
|
3431
|
-
await
|
|
6064
|
+
const statePath = path15.join(cwd, config.state.dir);
|
|
6065
|
+
await fsp2.rm(statePath, { recursive: true, force: true });
|
|
3432
6066
|
process.stdout.write(`deleted local state directory: ${statePath}
|
|
3433
6067
|
`);
|
|
3434
6068
|
if (opts.remote) {
|
|
@@ -3440,7 +6074,7 @@ program.command("clean").description("Delete local state and optionally delete r
|
|
|
3440
6074
|
});
|
|
3441
6075
|
program.command("prune").description("List/delete stale scopes (dry-run by default)").option("--apply", "apply deletions", false).option("--scopes-file <path>", "file containing active scopes").option("--older-than <duration>", "ttl cutoff like 30d").action(async (opts, command) => {
|
|
3442
6076
|
const rootOpts = getRootOptions(command);
|
|
3443
|
-
const cwd =
|
|
6077
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3444
6078
|
const config = await loadConfig({ cwd, configPath: rootOpts?.config });
|
|
3445
6079
|
const baseScope = resolveScope(config);
|
|
3446
6080
|
let store;
|
|
@@ -3450,17 +6084,17 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
|
|
|
3450
6084
|
scopes = await store.listScopes(config.project.id);
|
|
3451
6085
|
} catch (error) {
|
|
3452
6086
|
process.stderr.write(
|
|
3453
|
-
`error: failed to access Upstash
|
|
6087
|
+
`error: failed to access Upstash Vector: ${error instanceof Error ? error.message : String(error)}
|
|
3454
6088
|
`
|
|
3455
6089
|
);
|
|
3456
6090
|
process.exitCode = 1;
|
|
3457
6091
|
return;
|
|
3458
6092
|
}
|
|
3459
|
-
process.stdout.write(`using Upstash
|
|
6093
|
+
process.stdout.write(`using Upstash Vector
|
|
3460
6094
|
`);
|
|
3461
6095
|
let keepScopes = /* @__PURE__ */ new Set();
|
|
3462
6096
|
if (opts.scopesFile) {
|
|
3463
|
-
keepScopes = readScopesFromFile(
|
|
6097
|
+
keepScopes = readScopesFromFile(path15.resolve(cwd, opts.scopesFile));
|
|
3464
6098
|
} else {
|
|
3465
6099
|
keepScopes = readRemoteGitBranches(cwd);
|
|
3466
6100
|
}
|
|
@@ -3531,7 +6165,7 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
|
|
|
3531
6165
|
});
|
|
3532
6166
|
program.command("doctor").description("Validate config, env vars, provider connectivity, and local write access").action(async (_opts, command) => {
|
|
3533
6167
|
const rootOpts = getRootOptions(command);
|
|
3534
|
-
const cwd =
|
|
6168
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3535
6169
|
const checks = [];
|
|
3536
6170
|
let config = null;
|
|
3537
6171
|
try {
|
|
@@ -3558,8 +6192,8 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3558
6192
|
details: upstashToken ? void 0 : "missing"
|
|
3559
6193
|
});
|
|
3560
6194
|
if (config.source.mode === "static-output") {
|
|
3561
|
-
const outputDir =
|
|
3562
|
-
const exists =
|
|
6195
|
+
const outputDir = path15.resolve(cwd, config.source.staticOutputDir);
|
|
6196
|
+
const exists = fs11.existsSync(outputDir);
|
|
3563
6197
|
checks.push({
|
|
3564
6198
|
name: "source: static output dir",
|
|
3565
6199
|
ok: exists,
|
|
@@ -3568,15 +6202,15 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3568
6202
|
} else if (config.source.mode === "build") {
|
|
3569
6203
|
const buildConfig = config.source.build;
|
|
3570
6204
|
if (buildConfig) {
|
|
3571
|
-
const manifestPath =
|
|
3572
|
-
const manifestExists =
|
|
6205
|
+
const manifestPath = path15.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
|
|
6206
|
+
const manifestExists = fs11.existsSync(manifestPath);
|
|
3573
6207
|
checks.push({
|
|
3574
6208
|
name: "source: build manifest",
|
|
3575
6209
|
ok: manifestExists,
|
|
3576
6210
|
details: manifestExists ? manifestPath : `${manifestPath} not found (run \`vite build\` first)`
|
|
3577
6211
|
});
|
|
3578
|
-
const viteBin =
|
|
3579
|
-
const viteExists =
|
|
6212
|
+
const viteBin = path15.resolve(cwd, "node_modules", ".bin", "vite");
|
|
6213
|
+
const viteExists = fs11.existsSync(viteBin);
|
|
3580
6214
|
checks.push({
|
|
3581
6215
|
name: "source: vite binary",
|
|
3582
6216
|
ok: viteExists,
|
|
@@ -3593,7 +6227,7 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3593
6227
|
const contentConfig = config.source.contentFiles;
|
|
3594
6228
|
if (contentConfig) {
|
|
3595
6229
|
const fg4 = await import("fast-glob");
|
|
3596
|
-
const baseDir =
|
|
6230
|
+
const baseDir = path15.resolve(cwd, contentConfig.baseDir);
|
|
3597
6231
|
const files = await fg4.default(contentConfig.globs, { cwd: baseDir, onlyFiles: true });
|
|
3598
6232
|
checks.push({
|
|
3599
6233
|
name: "source: content files",
|
|
@@ -3627,9 +6261,9 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3627
6261
|
try {
|
|
3628
6262
|
const scope = resolveScope(config);
|
|
3629
6263
|
const { statePath } = ensureStateDirs(cwd, config.state.dir, scope);
|
|
3630
|
-
const testPath =
|
|
3631
|
-
await
|
|
3632
|
-
await
|
|
6264
|
+
const testPath = path15.join(statePath, ".write-test");
|
|
6265
|
+
await fsp2.writeFile(testPath, "ok\n", "utf8");
|
|
6266
|
+
await fsp2.rm(testPath, { force: true });
|
|
3633
6267
|
checks.push({ name: "state directory writable", ok: true });
|
|
3634
6268
|
} catch (error) {
|
|
3635
6269
|
checks.push({
|
|
@@ -3654,20 +6288,22 @@ program.command("doctor").description("Validate config, env vars, provider conne
|
|
|
3654
6288
|
process.exitCode = 1;
|
|
3655
6289
|
}
|
|
3656
6290
|
});
|
|
3657
|
-
program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").action(async (opts, command) => {
|
|
6291
|
+
program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").addOption(new Option("--access <mode>", "access mode").choices(["public", "private"])).option("--api-key <key>", "API key for public access mode").action(async (opts, command) => {
|
|
3658
6292
|
const rootOpts = getRootOptions(command);
|
|
3659
|
-
const cwd =
|
|
6293
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3660
6294
|
await runMcpServer({
|
|
3661
6295
|
cwd,
|
|
3662
6296
|
configPath: rootOpts?.config,
|
|
3663
6297
|
transport: opts.transport,
|
|
3664
6298
|
httpPort: parsePositiveInt(opts.port, "--port"),
|
|
3665
|
-
httpPath: opts.path
|
|
6299
|
+
httpPath: opts.path,
|
|
6300
|
+
access: opts.access,
|
|
6301
|
+
apiKey: opts.apiKey
|
|
3666
6302
|
});
|
|
3667
6303
|
});
|
|
3668
6304
|
program.command("search").description("Quick CLI search against Upstash Search").requiredOption("--q <query>", "search query").option("--scope <name>", "scope override").option("--top-k <n>", "top K results", "10").option("--path-prefix <prefix>", "path prefix filter").action(async (opts, command) => {
|
|
3669
6305
|
const rootOpts = getRootOptions(command);
|
|
3670
|
-
const cwd =
|
|
6306
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
3671
6307
|
const engine = await SearchEngine.create({
|
|
3672
6308
|
cwd,
|
|
3673
6309
|
configPath: rootOpts?.config
|
|
@@ -3681,8 +6317,156 @@ program.command("search").description("Quick CLI search against Upstash Search")
|
|
|
3681
6317
|
process.stdout.write(`${JSON.stringify(result, null, 2)}
|
|
3682
6318
|
`);
|
|
3683
6319
|
});
|
|
6320
|
+
program.command("test").description("Run search quality assertions against the live index").option("--file <path>", "path to test file", "searchsocket.test.json").option("--scope <name>", "scope override").option("--top-k <n>", "results per query", "10").action(async (opts, command) => {
|
|
6321
|
+
const rootOpts = getRootOptions(command);
|
|
6322
|
+
const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
|
|
6323
|
+
const topK = parsePositiveInt(opts.topK, "--top-k");
|
|
6324
|
+
const filePath = path15.resolve(cwd, opts.file);
|
|
6325
|
+
let rawContent;
|
|
6326
|
+
try {
|
|
6327
|
+
rawContent = await fsp2.readFile(filePath, "utf8");
|
|
6328
|
+
} catch {
|
|
6329
|
+
process.stderr.write(`error: test file not found: ${filePath}
|
|
6330
|
+
`);
|
|
6331
|
+
process.exitCode = 1;
|
|
6332
|
+
return;
|
|
6333
|
+
}
|
|
6334
|
+
let rawJson;
|
|
6335
|
+
try {
|
|
6336
|
+
rawJson = JSON.parse(rawContent);
|
|
6337
|
+
} catch {
|
|
6338
|
+
process.stderr.write(`error: invalid JSON in ${filePath}
|
|
6339
|
+
`);
|
|
6340
|
+
process.exitCode = 1;
|
|
6341
|
+
return;
|
|
6342
|
+
}
|
|
6343
|
+
const parsed = testFileSchema.safeParse(rawJson);
|
|
6344
|
+
if (!parsed.success) {
|
|
6345
|
+
process.stderr.write(`error: invalid test file: ${parsed.error.issues[0]?.message ?? "unknown error"}
|
|
6346
|
+
`);
|
|
6347
|
+
process.exitCode = 1;
|
|
6348
|
+
return;
|
|
6349
|
+
}
|
|
6350
|
+
const testCases = parsed.data;
|
|
6351
|
+
const engine = await SearchEngine.create({
|
|
6352
|
+
cwd,
|
|
6353
|
+
configPath: rootOpts?.config
|
|
6354
|
+
});
|
|
6355
|
+
let passed = 0;
|
|
6356
|
+
let failed = 0;
|
|
6357
|
+
const mrrData = [];
|
|
6358
|
+
for (const tc of testCases) {
|
|
6359
|
+
let results;
|
|
6360
|
+
try {
|
|
6361
|
+
const response = await engine.search({
|
|
6362
|
+
q: tc.query,
|
|
6363
|
+
topK,
|
|
6364
|
+
scope: opts.scope
|
|
6365
|
+
});
|
|
6366
|
+
results = response.results;
|
|
6367
|
+
} catch (error) {
|
|
6368
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
6369
|
+
process.stdout.write(`FAIL "${tc.query}" \u2192 search error: ${msg}
|
|
6370
|
+
`);
|
|
6371
|
+
failed++;
|
|
6372
|
+
continue;
|
|
6373
|
+
}
|
|
6374
|
+
if (tc.expect.topResult !== void 0) {
|
|
6375
|
+
const expectedUrl = tc.expect.topResult;
|
|
6376
|
+
const rank = results.findIndex((r) => r.url === expectedUrl) + 1;
|
|
6377
|
+
mrrData.push({ results, relevant: [expectedUrl] });
|
|
6378
|
+
if (rank === 1) {
|
|
6379
|
+
process.stdout.write(`PASS "${tc.query}" \u2192 ${expectedUrl} at rank 1
|
|
6380
|
+
`);
|
|
6381
|
+
passed++;
|
|
6382
|
+
} else {
|
|
6383
|
+
const detail = rank === 0 ? "not found" : `got rank ${rank}`;
|
|
6384
|
+
process.stdout.write(`FAIL "${tc.query}" \u2192 expected ${expectedUrl} at rank 1, ${detail}
|
|
6385
|
+
`);
|
|
6386
|
+
failed++;
|
|
6387
|
+
}
|
|
6388
|
+
}
|
|
6389
|
+
if (tc.expect.inTop5 !== void 0) {
|
|
6390
|
+
const expectedUrls = tc.expect.inTop5;
|
|
6391
|
+
const top5Urls = results.slice(0, 5).map((r) => r.url);
|
|
6392
|
+
const missing = expectedUrls.filter((url) => !top5Urls.includes(url));
|
|
6393
|
+
mrrData.push({ results, relevant: expectedUrls });
|
|
6394
|
+
if (missing.length === 0) {
|
|
6395
|
+
process.stdout.write(`PASS "${tc.query}" \u2192 all expected URLs in top 5
|
|
6396
|
+
`);
|
|
6397
|
+
passed++;
|
|
6398
|
+
} else {
|
|
6399
|
+
const missingDetail = missing.map((url) => {
|
|
6400
|
+
const rank = results.findIndex((r) => r.url === url) + 1;
|
|
6401
|
+
return rank === 0 ? `${url} (not found)` : `${url} (rank ${rank})`;
|
|
6402
|
+
}).join(", ");
|
|
6403
|
+
process.stdout.write(`FAIL "${tc.query}" \u2192 missing from top 5: ${missingDetail}
|
|
6404
|
+
`);
|
|
6405
|
+
failed++;
|
|
6406
|
+
}
|
|
6407
|
+
}
|
|
6408
|
+
if (tc.expect.maxResults !== void 0) {
|
|
6409
|
+
const max = tc.expect.maxResults;
|
|
6410
|
+
const actual = results.length;
|
|
6411
|
+
if (actual <= max) {
|
|
6412
|
+
process.stdout.write(`PASS "${tc.query}" \u2192 ${actual} results (max ${max})
|
|
6413
|
+
`);
|
|
6414
|
+
passed++;
|
|
6415
|
+
} else {
|
|
6416
|
+
process.stdout.write(`FAIL "${tc.query}" \u2192 expected at most ${max} results, got ${actual}
|
|
6417
|
+
`);
|
|
6418
|
+
failed++;
|
|
6419
|
+
}
|
|
6420
|
+
}
|
|
6421
|
+
}
|
|
6422
|
+
const total = passed + failed;
|
|
6423
|
+
process.stdout.write(`
|
|
6424
|
+
results: ${passed} passed, ${failed} failed of ${total} assertions
|
|
6425
|
+
`);
|
|
6426
|
+
if (mrrData.length > 0) {
|
|
6427
|
+
const mrrValue = mrr(mrrData);
|
|
6428
|
+
process.stdout.write(`MRR: ${mrrValue.toFixed(4)}
|
|
6429
|
+
`);
|
|
6430
|
+
}
|
|
6431
|
+
process.stdout.write(`pass rate: ${total > 0 ? (passed / total * 100).toFixed(1) : "0.0"}%
|
|
6432
|
+
`);
|
|
6433
|
+
if (failed > 0) {
|
|
6434
|
+
process.exitCode = 1;
|
|
6435
|
+
}
|
|
6436
|
+
});
|
|
6437
|
+
program.command("add <component>").description("Copy a Svelte 5 search component template into your project").option("--dir <path>", "output directory", "src/lib/components/search").option("--overwrite", "overwrite existing files", false).action(async (component, opts, command) => {
|
|
6438
|
+
const root = getRootOptions(command).cwd ?? process.cwd();
|
|
6439
|
+
const cwd = path15.resolve(root);
|
|
6440
|
+
if (!isValidComponent(component)) {
|
|
6441
|
+
const available = listAvailableComponents();
|
|
6442
|
+
process.stderr.write(`unknown component: ${component}
|
|
6443
|
+
`);
|
|
6444
|
+
process.stderr.write(`available components: ${available.join(", ")}
|
|
6445
|
+
`);
|
|
6446
|
+
process.exit(1);
|
|
6447
|
+
}
|
|
6448
|
+
const targetDir = path15.resolve(cwd, opts.dir);
|
|
6449
|
+
const result = await copyComponent(component, targetDir, { overwrite: opts.overwrite });
|
|
6450
|
+
for (const filePath of result.written) {
|
|
6451
|
+
process.stdout.write(`created: ${path15.relative(cwd, filePath)}
|
|
6452
|
+
`);
|
|
6453
|
+
}
|
|
6454
|
+
for (const filePath of result.skipped) {
|
|
6455
|
+
process.stdout.write(`skipped (exists): ${path15.relative(cwd, filePath)}
|
|
6456
|
+
`);
|
|
6457
|
+
}
|
|
6458
|
+
const firstWritten = result.written[0];
|
|
6459
|
+
if (firstWritten) {
|
|
6460
|
+
process.stdout.write(`
|
|
6461
|
+
Usage:
|
|
6462
|
+
`);
|
|
6463
|
+
const fileName = path15.basename(firstWritten, ".svelte");
|
|
6464
|
+
process.stdout.write(` import ${fileName} from "${path15.relative(cwd, firstWritten).replace(/\\/g, "/")}";
|
|
6465
|
+
`);
|
|
6466
|
+
}
|
|
6467
|
+
});
|
|
3684
6468
|
async function main() {
|
|
3685
|
-
dotenvConfig({ path:
|
|
6469
|
+
dotenvConfig({ path: path15.resolve(process.cwd(), ".env") });
|
|
3686
6470
|
await program.parseAsync(process.argv);
|
|
3687
6471
|
}
|
|
3688
6472
|
main().catch((error) => {
|