searchsocket 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,18 +1,18 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/cli.ts
4
- import fs8 from "fs";
5
- import fsp from "fs/promises";
6
- import path12 from "path";
4
+ import fs11 from "fs";
5
+ import fsp2 from "fs/promises";
6
+ import path15 from "path";
7
7
  import { execSync as execSync2 } from "child_process";
8
8
  import { config as dotenvConfig } from "dotenv";
9
9
  import chokidar from "chokidar";
10
- import { Command } from "commander";
10
+ import { Command, Option } from "commander";
11
11
 
12
12
  // package.json
13
13
  var package_default = {
14
14
  name: "searchsocket",
15
- version: "0.5.0",
15
+ version: "0.6.0",
16
16
  description: "Semantic site search and MCP retrieval for SvelteKit static sites",
17
17
  license: "MIT",
18
18
  author: "Greg Priday <greg@siteorigin.com>",
@@ -38,6 +38,7 @@ var package_default = {
38
38
  files: [
39
39
  "dist",
40
40
  "!dist/**/*.map",
41
+ "src/svelte",
41
42
  "README.md"
42
43
  ],
43
44
  bin: {
@@ -63,6 +64,19 @@ var package_default = {
63
64
  types: "./dist/scroll.d.ts",
64
65
  import: "./dist/scroll.js",
65
66
  require: "./dist/scroll.cjs"
67
+ },
68
+ "./svelte": {
69
+ types: "./src/svelte/index.svelte.ts",
70
+ svelte: "./src/svelte/index.svelte.ts",
71
+ default: "./src/svelte/index.svelte.ts"
72
+ }
73
+ },
74
+ peerDependencies: {
75
+ svelte: "^5.0.0"
76
+ },
77
+ peerDependenciesMeta: {
78
+ svelte: {
79
+ optional: true
66
80
  }
67
81
  },
68
82
  scripts: {
@@ -78,8 +92,9 @@ var package_default = {
78
92
  },
79
93
  packageManager: "pnpm@10.29.2",
80
94
  dependencies: {
95
+ "@clack/prompts": "^1.2.0",
81
96
  "@modelcontextprotocol/sdk": "^1.26.0",
82
- "@upstash/search": "^0.1.7",
97
+ "@upstash/vector": "^1.2.3",
83
98
  cheerio: "^1.2.0",
84
99
  chokidar: "^5.0.0",
85
100
  commander: "^14.0.3",
@@ -88,16 +103,19 @@ var package_default = {
88
103
  "fast-glob": "^3.3.3",
89
104
  "gray-matter": "^4.0.3",
90
105
  jiti: "^2.6.1",
106
+ magicast: "^0.5.2",
91
107
  "p-limit": "^7.3.0",
92
108
  turndown: "^7.2.2",
93
109
  "turndown-plugin-gfm": "^1.0.2",
94
110
  zod: "^4.3.6"
95
111
  },
96
112
  devDependencies: {
113
+ "@sveltejs/vite-plugin-svelte": "^6.2.4",
97
114
  "@types/express": "^5.0.6",
98
115
  "@types/node": "^25.2.2",
99
116
  "@types/turndown": "^5.0.6",
100
117
  jsdom: "^28.1.0",
118
+ svelte: "^5.55.1",
101
119
  tsup: "^8.5.1",
102
120
  typescript: "^5.9.3",
103
121
  vitest: "^4.0.18"
@@ -154,6 +172,7 @@ var searchSocketConfigSchema = z.object({
154
172
  dropSelectors: z.array(z.string()).optional(),
155
173
  ignoreAttr: z.string().optional(),
156
174
  noindexAttr: z.string().optional(),
175
+ imageDescAttr: z.string().optional(),
157
176
  respectRobotsNoindex: z.boolean().optional()
158
177
  }).optional(),
159
178
  transform: z.object({
@@ -169,35 +188,48 @@ var searchSocketConfigSchema = z.object({
169
188
  headingPathDepth: z.number().int().positive().optional(),
170
189
  dontSplitInside: z.array(z.enum(["code", "table", "blockquote"])).optional(),
171
190
  prependTitle: z.boolean().optional(),
172
- pageSummaryChunk: z.boolean().optional()
191
+ pageSummaryChunk: z.boolean().optional(),
192
+ weightHeadings: z.boolean().optional()
173
193
  }).optional(),
174
194
  upstash: z.object({
175
195
  url: z.string().url().optional(),
176
196
  token: z.string().min(1).optional(),
177
197
  urlEnv: z.string().min(1).optional(),
178
- tokenEnv: z.string().min(1).optional()
198
+ tokenEnv: z.string().min(1).optional(),
199
+ namespaces: z.object({
200
+ pages: z.string().min(1).optional(),
201
+ chunks: z.string().min(1).optional()
202
+ }).optional()
203
+ }).optional(),
204
+ embedding: z.object({
205
+ model: z.string().optional(),
206
+ dimensions: z.number().int().positive().optional(),
207
+ taskType: z.string().optional(),
208
+ batchSize: z.number().int().positive().optional()
179
209
  }).optional(),
180
210
  search: z.object({
181
- semanticWeight: z.number().min(0).max(1).optional(),
182
- inputEnrichment: z.boolean().optional(),
183
- reranking: z.boolean().optional(),
184
211
  dualSearch: z.boolean().optional(),
185
212
  pageSearchWeight: z.number().min(0).max(1).optional()
186
213
  }).optional(),
187
214
  ranking: z.object({
188
215
  enableIncomingLinkBoost: z.boolean().optional(),
189
216
  enableDepthBoost: z.boolean().optional(),
217
+ enableFreshnessBoost: z.boolean().optional(),
218
+ freshnessDecayRate: z.number().positive().optional(),
219
+ enableAnchorTextBoost: z.boolean().optional(),
190
220
  pageWeights: z.record(z.string(), z.number().min(0)).optional(),
191
221
  aggregationCap: z.number().int().positive().optional(),
192
222
  aggregationDecay: z.number().min(0).max(1).optional(),
193
223
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
194
- minScore: z.number().min(0).max(1).optional(),
224
+ minScoreRatio: z.number().min(0).max(1).optional(),
195
225
  scoreGapThreshold: z.number().min(0).max(1).optional(),
196
226
  weights: z.object({
197
227
  incomingLinks: z.number().optional(),
198
228
  depth: z.number().optional(),
199
229
  aggregation: z.number().optional(),
200
- titleMatch: z.number().optional()
230
+ titleMatch: z.number().optional(),
231
+ freshness: z.number().optional(),
232
+ anchorText: z.number().optional()
201
233
  }).optional()
202
234
  }).optional(),
203
235
  api: z.object({
@@ -212,12 +244,28 @@ var searchSocketConfigSchema = z.object({
212
244
  }).optional(),
213
245
  mcp: z.object({
214
246
  enable: z.boolean().optional(),
247
+ access: z.enum(["public", "private"]).optional(),
215
248
  transport: z.enum(["stdio", "http"]).optional(),
216
249
  http: z.object({
217
250
  port: z.number().int().positive().optional(),
218
- path: z.string().optional()
251
+ path: z.string().optional(),
252
+ apiKey: z.string().min(1).optional(),
253
+ apiKeyEnv: z.string().min(1).optional()
254
+ }).optional(),
255
+ handle: z.object({
256
+ path: z.string().optional(),
257
+ apiKey: z.string().min(1).optional(),
258
+ enableJsonResponse: z.boolean().optional()
219
259
  }).optional()
220
260
  }).optional(),
261
+ llmsTxt: z.object({
262
+ enable: z.boolean().optional(),
263
+ outputPath: z.string().optional(),
264
+ title: z.string().optional(),
265
+ description: z.string().optional(),
266
+ generateFull: z.boolean().optional(),
267
+ serveMarkdownVariants: z.boolean().optional()
268
+ }).optional(),
221
269
  state: z.object({
222
270
  dir: z.string().optional()
223
271
  }).optional()
@@ -256,6 +304,7 @@ function createDefaultConfig(projectId) {
256
304
  dropSelectors: DEFAULT_DROP_SELECTORS,
257
305
  ignoreAttr: "data-search-ignore",
258
306
  noindexAttr: "data-search-noindex",
307
+ imageDescAttr: "data-search-description",
259
308
  respectRobotsNoindex: true
260
309
  },
261
310
  transform: {
@@ -265,39 +314,52 @@ function createDefaultConfig(projectId) {
265
314
  },
266
315
  chunking: {
267
316
  strategy: "hybrid",
268
- maxChars: 2200,
317
+ maxChars: 1500,
269
318
  overlapChars: 200,
270
319
  minChars: 250,
271
320
  headingPathDepth: 3,
272
321
  dontSplitInside: ["code", "table", "blockquote"],
273
322
  prependTitle: true,
274
- pageSummaryChunk: true
323
+ pageSummaryChunk: true,
324
+ weightHeadings: true
275
325
  },
276
326
  upstash: {
277
- urlEnv: "UPSTASH_SEARCH_REST_URL",
278
- tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
327
+ urlEnv: "UPSTASH_VECTOR_REST_URL",
328
+ tokenEnv: "UPSTASH_VECTOR_REST_TOKEN",
329
+ namespaces: {
330
+ pages: "pages",
331
+ chunks: "chunks"
332
+ }
333
+ },
334
+ embedding: {
335
+ model: "bge-large-en-v1.5",
336
+ dimensions: 1024,
337
+ taskType: "RETRIEVAL_DOCUMENT",
338
+ batchSize: 100
279
339
  },
280
340
  search: {
281
- semanticWeight: 0.75,
282
- inputEnrichment: true,
283
- reranking: true,
284
341
  dualSearch: true,
285
342
  pageSearchWeight: 0.3
286
343
  },
287
344
  ranking: {
288
345
  enableIncomingLinkBoost: true,
289
346
  enableDepthBoost: true,
347
+ enableFreshnessBoost: false,
348
+ freshnessDecayRate: 1e-3,
349
+ enableAnchorTextBoost: false,
290
350
  pageWeights: {},
291
351
  aggregationCap: 5,
292
352
  aggregationDecay: 0.5,
293
353
  minChunkScoreRatio: 0.5,
294
- minScore: 0.3,
354
+ minScoreRatio: 0.7,
295
355
  scoreGapThreshold: 0.4,
296
356
  weights: {
297
357
  incomingLinks: 0.05,
298
358
  depth: 0.03,
299
359
  aggregation: 0.1,
300
- titleMatch: 0.15
360
+ titleMatch: 0.15,
361
+ freshness: 0.1,
362
+ anchorText: 0.1
301
363
  }
302
364
  },
303
365
  api: {
@@ -308,12 +370,23 @@ function createDefaultConfig(projectId) {
308
370
  },
309
371
  mcp: {
310
372
  enable: process.env.NODE_ENV !== "production",
373
+ access: "private",
311
374
  transport: "stdio",
312
375
  http: {
313
376
  port: 3338,
314
377
  path: "/mcp"
378
+ },
379
+ handle: {
380
+ path: "/api/mcp",
381
+ enableJsonResponse: true
315
382
  }
316
383
  },
384
+ llmsTxt: {
385
+ enable: false,
386
+ outputPath: "static/llms.txt",
387
+ generateFull: true,
388
+ serveMarkdownVariants: false
389
+ },
317
390
  state: {
318
391
  dir: ".searchsocket"
319
392
  }
@@ -425,7 +498,15 @@ ${issues}`
425
498
  },
426
499
  upstash: {
427
500
  ...defaults.upstash,
428
- ...parsed.upstash
501
+ ...parsed.upstash,
502
+ namespaces: {
503
+ ...defaults.upstash.namespaces,
504
+ ...parsed.upstash?.namespaces
505
+ }
506
+ },
507
+ embedding: {
508
+ ...defaults.embedding,
509
+ ...parsed.embedding
429
510
  },
430
511
  search: {
431
512
  ...defaults.search,
@@ -462,8 +543,16 @@ ${issues}`
462
543
  http: {
463
544
  ...defaults.mcp.http,
464
545
  ...parsed.mcp?.http
546
+ },
547
+ handle: {
548
+ ...defaults.mcp.handle,
549
+ ...parsed.mcp?.handle
465
550
  }
466
551
  },
552
+ llmsTxt: {
553
+ ...defaults.llmsTxt,
554
+ ...parsed.llmsTxt
555
+ },
467
556
  state: {
468
557
  ...defaults.state,
469
558
  ...parsed.state
@@ -483,6 +572,15 @@ ${issues}`
483
572
  maxDepth: 10
484
573
  };
485
574
  }
575
+ if (merged.mcp.access === "public") {
576
+ const resolvedKey = merged.mcp.http.apiKey ?? (merged.mcp.http.apiKeyEnv ? process.env[merged.mcp.http.apiKeyEnv] : void 0);
577
+ if (!resolvedKey) {
578
+ throw new SearchSocketError(
579
+ "CONFIG_MISSING",
580
+ '`mcp.access` is "public" but no API key is configured. Set `mcp.http.apiKey` or `mcp.http.apiKeyEnv`.'
581
+ );
582
+ }
583
+ }
486
584
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
487
585
  throw new SearchSocketError("CONFIG_MISSING", "`source.crawl.baseUrl` is required when source.mode is crawl.");
488
586
  }
@@ -521,8 +619,8 @@ function writeMinimalConfig(cwd) {
521
619
  return target;
522
620
  }
523
621
  const content = `export default {
524
- // Upstash Search credentials (set via env vars or directly here)
525
- // upstash: { urlEnv: "UPSTASH_SEARCH_REST_URL", tokenEnv: "UPSTASH_SEARCH_REST_TOKEN" }
622
+ // Upstash Vector credentials (set via env vars or directly here)
623
+ // upstash: { urlEnv: "UPSTASH_VECTOR_REST_URL", tokenEnv: "UPSTASH_VECTOR_REST_TOKEN" }
526
624
  };
527
625
  `;
528
626
  fs.writeFileSync(target, content, "utf8");
@@ -586,11 +684,11 @@ var Logger = class {
586
684
  this.writeOut(` [${event}] ${data ? JSON.stringify(data) : ""}
587
685
  `);
588
686
  }
589
- writeOut(text) {
687
+ writeOut(text2) {
590
688
  if (this.stderrOnly) {
591
- process.stderr.write(text);
689
+ process.stderr.write(text2);
592
690
  } else {
593
- process.stdout.write(text);
691
+ process.stdout.write(text2);
594
692
  }
595
693
  }
596
694
  logJson(event, data) {
@@ -617,13 +715,84 @@ function normalizeMarkdown(input) {
617
715
  function sanitizeScopeName(scopeName) {
618
716
  return scopeName.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
619
717
  }
718
+ function markdownToPlain(markdown) {
719
+ return markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
720
+ }
620
721
  function toSnippet(markdown, maxLen = 220) {
621
- const plain = markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
722
+ const plain = markdownToPlain(markdown);
622
723
  if (plain.length <= maxLen) {
623
724
  return plain;
624
725
  }
625
726
  return `${plain.slice(0, Math.max(0, maxLen - 1)).trim()}\u2026`;
626
727
  }
728
+ function queryAwareExcerpt(markdown, query, maxLen = 220) {
729
+ const plain = markdownToPlain(markdown);
730
+ if (plain.length <= maxLen) return plain;
731
+ const tokens = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
732
+ if (tokens.length === 0) return toSnippet(markdown, maxLen);
733
+ const positions = [];
734
+ for (let ti = 0; ti < tokens.length; ti++) {
735
+ const escaped = tokens[ti].replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
736
+ const re = new RegExp(escaped, "gi");
737
+ let m;
738
+ while ((m = re.exec(plain)) !== null) {
739
+ positions.push({ start: m.index, end: m.index + m[0].length, tokenIdx: ti });
740
+ }
741
+ }
742
+ if (positions.length === 0) return toSnippet(markdown, maxLen);
743
+ positions.sort((a, b) => a.start - b.start);
744
+ let bestUniqueCount = 0;
745
+ let bestTotalCount = 0;
746
+ let bestLeft = 0;
747
+ let bestRight = 0;
748
+ let left = 0;
749
+ const tokenCounts = /* @__PURE__ */ new Map();
750
+ for (let right = 0; right < positions.length; right++) {
751
+ tokenCounts.set(positions[right].tokenIdx, (tokenCounts.get(positions[right].tokenIdx) ?? 0) + 1);
752
+ while (positions[right].end - positions[left].start > maxLen && left < right) {
753
+ const leftToken = positions[left].tokenIdx;
754
+ const cnt = tokenCounts.get(leftToken) - 1;
755
+ if (cnt === 0) tokenCounts.delete(leftToken);
756
+ else tokenCounts.set(leftToken, cnt);
757
+ left++;
758
+ }
759
+ const uniqueCount = tokenCounts.size;
760
+ const totalCount = right - left + 1;
761
+ if (uniqueCount > bestUniqueCount || uniqueCount === bestUniqueCount && totalCount > bestTotalCount) {
762
+ bestUniqueCount = uniqueCount;
763
+ bestTotalCount = totalCount;
764
+ bestLeft = left;
765
+ bestRight = right;
766
+ }
767
+ }
768
+ const mid = Math.floor((positions[bestLeft].start + positions[bestRight].end) / 2);
769
+ let start = Math.max(0, mid - Math.floor(maxLen / 2));
770
+ let end = Math.min(plain.length, start + maxLen);
771
+ start = Math.max(0, end - maxLen);
772
+ if (start > 0) {
773
+ const spaceIdx = plain.lastIndexOf(" ", start);
774
+ if (spaceIdx > start - 30) {
775
+ start = spaceIdx + 1;
776
+ }
777
+ }
778
+ if (end < plain.length) {
779
+ const spaceIdx = plain.indexOf(" ", end);
780
+ if (spaceIdx !== -1 && spaceIdx < end + 30) {
781
+ end = spaceIdx;
782
+ }
783
+ }
784
+ let excerpt = plain.slice(start, end);
785
+ if (excerpt.length > Math.ceil(maxLen * 1.2)) {
786
+ excerpt = excerpt.slice(0, maxLen);
787
+ const lastSpace = excerpt.lastIndexOf(" ");
788
+ if (lastSpace > maxLen * 0.5) {
789
+ excerpt = excerpt.slice(0, lastSpace);
790
+ }
791
+ }
792
+ const prefix = start > 0 ? "\u2026" : "";
793
+ const suffix = end < plain.length ? "\u2026" : "";
794
+ return `${prefix}${excerpt}${suffix}`;
795
+ }
627
796
  function extractFirstParagraph(markdown) {
628
797
  const lines = markdown.split("\n");
629
798
  let inFence = false;
@@ -690,163 +859,292 @@ function ensureStateDirs(cwd, stateDir, scope) {
690
859
  }
691
860
 
692
861
  // src/indexing/pipeline.ts
693
- import path10 from "path";
862
+ import path11 from "path";
694
863
 
695
864
  // src/vector/upstash.ts
696
- function chunkIndexName(scope) {
697
- return `${scope.projectId}--${scope.scopeName}`;
698
- }
699
- function pageIndexName(scope) {
700
- return `${scope.projectId}--${scope.scopeName}--pages`;
701
- }
865
+ import { QueryMode, FusionAlgorithm } from "@upstash/vector";
702
866
  var UpstashSearchStore = class {
703
- client;
867
+ index;
868
+ pagesNs;
869
+ chunksNs;
704
870
  constructor(opts) {
705
- this.client = opts.client;
706
- }
707
- chunkIndex(scope) {
708
- return this.client.index(chunkIndexName(scope));
709
- }
710
- pageIndex(scope) {
711
- return this.client.index(pageIndexName(scope));
871
+ this.index = opts.index;
872
+ this.pagesNs = opts.index.namespace(opts.pagesNamespace);
873
+ this.chunksNs = opts.index.namespace(opts.chunksNamespace);
712
874
  }
713
875
  async upsertChunks(chunks, scope) {
714
876
  if (chunks.length === 0) return;
715
- const index = this.chunkIndex(scope);
716
877
  const BATCH_SIZE = 100;
717
878
  for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
718
879
  const batch = chunks.slice(i, i + BATCH_SIZE);
719
- await index.upsert(batch);
720
- }
721
- }
722
- async search(query, opts, scope) {
723
- const index = this.chunkIndex(scope);
724
- const results = await index.search({
725
- query,
726
- limit: opts.limit,
727
- semanticWeight: opts.semanticWeight,
728
- inputEnrichment: opts.inputEnrichment,
729
- reranking: opts.reranking,
730
- filter: opts.filter
880
+ await this.chunksNs.upsert(
881
+ batch.map((c) => ({
882
+ id: c.id,
883
+ data: c.data,
884
+ metadata: {
885
+ ...c.metadata,
886
+ projectId: scope.projectId,
887
+ scopeName: scope.scopeName,
888
+ type: c.metadata.type || "chunk"
889
+ }
890
+ }))
891
+ );
892
+ }
893
+ }
894
+ async search(data, opts, scope) {
895
+ const filterParts = [
896
+ `projectId = '${scope.projectId}'`,
897
+ `scopeName = '${scope.scopeName}'`
898
+ ];
899
+ if (opts.filter) {
900
+ filterParts.push(opts.filter);
901
+ }
902
+ const results = await this.chunksNs.query({
903
+ data,
904
+ topK: opts.limit,
905
+ includeMetadata: true,
906
+ filter: filterParts.join(" AND "),
907
+ queryMode: QueryMode.HYBRID,
908
+ fusionAlgorithm: FusionAlgorithm.DBSF
909
+ });
910
+ return results.map((doc) => ({
911
+ id: String(doc.id),
912
+ score: doc.score,
913
+ metadata: {
914
+ projectId: doc.metadata?.projectId ?? "",
915
+ scopeName: doc.metadata?.scopeName ?? "",
916
+ url: doc.metadata?.url ?? "",
917
+ path: doc.metadata?.path ?? "",
918
+ title: doc.metadata?.title ?? "",
919
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
920
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
921
+ snippet: doc.metadata?.snippet ?? "",
922
+ chunkText: doc.metadata?.chunkText ?? "",
923
+ ordinal: doc.metadata?.ordinal ?? 0,
924
+ contentHash: doc.metadata?.contentHash ?? "",
925
+ depth: doc.metadata?.depth ?? 0,
926
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
927
+ routeFile: doc.metadata?.routeFile ?? "",
928
+ tags: doc.metadata?.tags ?? [],
929
+ description: doc.metadata?.description || void 0,
930
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
931
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
932
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
933
+ }
934
+ }));
935
+ }
936
+ async searchChunksByUrl(data, url, opts, scope) {
937
+ const filterParts = [
938
+ `projectId = '${scope.projectId}'`,
939
+ `scopeName = '${scope.scopeName}'`,
940
+ `url = '${url}'`
941
+ ];
942
+ if (opts.filter) {
943
+ filterParts.push(opts.filter);
944
+ }
945
+ const results = await this.chunksNs.query({
946
+ data,
947
+ topK: opts.limit,
948
+ includeMetadata: true,
949
+ filter: filterParts.join(" AND "),
950
+ queryMode: QueryMode.HYBRID,
951
+ fusionAlgorithm: FusionAlgorithm.DBSF
731
952
  });
732
953
  return results.map((doc) => ({
733
- id: doc.id,
954
+ id: String(doc.id),
734
955
  score: doc.score,
735
956
  metadata: {
736
957
  projectId: doc.metadata?.projectId ?? "",
737
958
  scopeName: doc.metadata?.scopeName ?? "",
738
- url: doc.content.url,
959
+ url: doc.metadata?.url ?? "",
739
960
  path: doc.metadata?.path ?? "",
740
- title: doc.content.title,
741
- sectionTitle: doc.content.sectionTitle,
742
- headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
961
+ title: doc.metadata?.title ?? "",
962
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
963
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
743
964
  snippet: doc.metadata?.snippet ?? "",
744
- chunkText: doc.content.text,
965
+ chunkText: doc.metadata?.chunkText ?? "",
745
966
  ordinal: doc.metadata?.ordinal ?? 0,
746
967
  contentHash: doc.metadata?.contentHash ?? "",
747
968
  depth: doc.metadata?.depth ?? 0,
748
969
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
749
970
  routeFile: doc.metadata?.routeFile ?? "",
750
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
971
+ tags: doc.metadata?.tags ?? [],
751
972
  description: doc.metadata?.description || void 0,
752
- keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
973
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
974
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
975
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
753
976
  }
754
977
  }));
755
978
  }
756
- async searchPages(query, opts, scope) {
757
- const index = this.pageIndex(scope);
979
+ async searchPagesByText(data, opts, scope) {
980
+ return this.queryPages({ data }, opts, scope);
981
+ }
982
+ async searchPagesByVector(vector, opts, scope) {
983
+ return this.queryPages({ vector }, opts, scope);
984
+ }
985
+ async queryPages(input, opts, scope) {
986
+ const filterParts = [
987
+ `projectId = '${scope.projectId}'`,
988
+ `scopeName = '${scope.scopeName}'`
989
+ ];
990
+ if (opts.filter) {
991
+ filterParts.push(opts.filter);
992
+ }
758
993
  let results;
759
994
  try {
760
- results = await index.search({
761
- query,
762
- limit: opts.limit,
763
- semanticWeight: opts.semanticWeight,
764
- inputEnrichment: opts.inputEnrichment,
765
- reranking: true,
766
- filter: opts.filter
995
+ results = await this.pagesNs.query({
996
+ ...input,
997
+ topK: opts.limit,
998
+ includeMetadata: true,
999
+ filter: filterParts.join(" AND "),
1000
+ queryMode: QueryMode.HYBRID,
1001
+ fusionAlgorithm: FusionAlgorithm.DBSF
767
1002
  });
768
1003
  } catch {
769
1004
  return [];
770
1005
  }
771
1006
  return results.map((doc) => ({
772
- id: doc.id,
1007
+ id: String(doc.id),
773
1008
  score: doc.score,
774
- title: doc.content.title,
775
- url: doc.content.url,
776
- description: doc.content.description ?? "",
777
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
1009
+ title: doc.metadata?.title ?? "",
1010
+ url: doc.metadata?.url ?? "",
1011
+ description: doc.metadata?.description ?? "",
1012
+ tags: doc.metadata?.tags ?? [],
778
1013
  depth: doc.metadata?.depth ?? 0,
779
1014
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
780
- routeFile: doc.metadata?.routeFile ?? ""
1015
+ routeFile: doc.metadata?.routeFile ?? "",
1016
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0
781
1017
  }));
782
1018
  }
783
- async deleteByIds(ids, scope) {
1019
+ async deleteByIds(ids, _scope) {
784
1020
  if (ids.length === 0) return;
785
- const index = this.chunkIndex(scope);
786
- const BATCH_SIZE = 500;
1021
+ const BATCH_SIZE = 100;
787
1022
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
788
1023
  const batch = ids.slice(i, i + BATCH_SIZE);
789
- await index.delete(batch);
1024
+ await this.chunksNs.delete(batch);
790
1025
  }
791
1026
  }
792
1027
  async deleteScope(scope) {
793
- try {
794
- const chunkIdx = this.chunkIndex(scope);
795
- await chunkIdx.deleteIndex();
796
- } catch {
797
- }
798
- try {
799
- const pageIdx = this.pageIndex(scope);
800
- await pageIdx.deleteIndex();
801
- } catch {
1028
+ for (const ns of [this.chunksNs, this.pagesNs]) {
1029
+ const ids = [];
1030
+ let cursor = "0";
1031
+ try {
1032
+ for (; ; ) {
1033
+ const result = await ns.range({
1034
+ cursor,
1035
+ limit: 100,
1036
+ includeMetadata: true
1037
+ });
1038
+ for (const doc of result.vectors) {
1039
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
1040
+ ids.push(String(doc.id));
1041
+ }
1042
+ }
1043
+ if (!result.nextCursor || result.nextCursor === "0") break;
1044
+ cursor = result.nextCursor;
1045
+ }
1046
+ } catch {
1047
+ }
1048
+ if (ids.length > 0) {
1049
+ const BATCH_SIZE = 100;
1050
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
1051
+ const batch = ids.slice(i, i + BATCH_SIZE);
1052
+ await ns.delete(batch);
1053
+ }
1054
+ }
802
1055
  }
803
1056
  }
804
1057
  async listScopes(projectId) {
805
- const allIndexes = await this.client.listIndexes();
806
- const prefix = `${projectId}--`;
807
- const scopeNames = /* @__PURE__ */ new Set();
808
- for (const name of allIndexes) {
809
- if (name.startsWith(prefix) && !name.endsWith("--pages")) {
810
- const scopeName = name.slice(prefix.length);
811
- scopeNames.add(scopeName);
812
- }
813
- }
814
- const scopes = [];
815
- for (const scopeName of scopeNames) {
816
- const scope = {
817
- projectId,
818
- scopeName,
819
- scopeId: `${projectId}:${scopeName}`
820
- };
1058
+ const scopeMap = /* @__PURE__ */ new Map();
1059
+ for (const ns of [this.chunksNs, this.pagesNs]) {
1060
+ let cursor = "0";
821
1061
  try {
822
- const info = await this.chunkIndex(scope).info();
823
- scopes.push({
824
- projectId,
825
- scopeName,
826
- lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
827
- documentCount: info.documentCount
828
- });
1062
+ for (; ; ) {
1063
+ const result = await ns.range({
1064
+ cursor,
1065
+ limit: 100,
1066
+ includeMetadata: true
1067
+ });
1068
+ for (const doc of result.vectors) {
1069
+ if (doc.metadata?.projectId === projectId) {
1070
+ const scopeName = doc.metadata.scopeName ?? "";
1071
+ scopeMap.set(scopeName, (scopeMap.get(scopeName) ?? 0) + 1);
1072
+ }
1073
+ }
1074
+ if (!result.nextCursor || result.nextCursor === "0") break;
1075
+ cursor = result.nextCursor;
1076
+ }
829
1077
  } catch {
830
- scopes.push({
831
- projectId,
832
- scopeName,
833
- lastIndexedAt: "unknown",
834
- documentCount: 0
835
- });
836
1078
  }
837
1079
  }
838
- return scopes;
1080
+ return [...scopeMap.entries()].map(([scopeName, count]) => ({
1081
+ projectId,
1082
+ scopeName,
1083
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
1084
+ documentCount: count
1085
+ }));
839
1086
  }
840
1087
  async getContentHashes(scope) {
841
1088
  const map = /* @__PURE__ */ new Map();
842
- const index = this.chunkIndex(scope);
843
1089
  let cursor = "0";
844
1090
  try {
845
1091
  for (; ; ) {
846
- const result = await index.range({ cursor, limit: 100 });
847
- for (const doc of result.documents) {
848
- if (doc.metadata?.contentHash) {
849
- map.set(doc.id, doc.metadata.contentHash);
1092
+ const result = await this.chunksNs.range({
1093
+ cursor,
1094
+ limit: 100,
1095
+ includeMetadata: true
1096
+ });
1097
+ for (const doc of result.vectors) {
1098
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
1099
+ map.set(String(doc.id), doc.metadata.contentHash);
1100
+ }
1101
+ }
1102
+ if (!result.nextCursor || result.nextCursor === "0") break;
1103
+ cursor = result.nextCursor;
1104
+ }
1105
+ } catch {
1106
+ }
1107
+ return map;
1108
+ }
1109
+ async listPages(scope, opts) {
1110
+ const cursor = opts?.cursor ?? "0";
1111
+ const limit = opts?.limit ?? 50;
1112
+ try {
1113
+ const result = await this.pagesNs.range({
1114
+ cursor,
1115
+ limit,
1116
+ includeMetadata: true
1117
+ });
1118
+ const pages = result.vectors.filter(
1119
+ (doc) => doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && (!opts?.pathPrefix || (doc.metadata?.url ?? "").startsWith(opts.pathPrefix))
1120
+ ).map((doc) => ({
1121
+ url: doc.metadata?.url ?? "",
1122
+ title: doc.metadata?.title ?? "",
1123
+ description: doc.metadata?.description ?? "",
1124
+ routeFile: doc.metadata?.routeFile ?? ""
1125
+ }));
1126
+ const response = { pages };
1127
+ if (result.nextCursor && result.nextCursor !== "0") {
1128
+ response.nextCursor = result.nextCursor;
1129
+ }
1130
+ return response;
1131
+ } catch {
1132
+ return { pages: [] };
1133
+ }
1134
+ }
1135
+ async getPageHashes(scope) {
1136
+ const map = /* @__PURE__ */ new Map();
1137
+ let cursor = "0";
1138
+ try {
1139
+ for (; ; ) {
1140
+ const result = await this.pagesNs.range({
1141
+ cursor,
1142
+ limit: 100,
1143
+ includeMetadata: true
1144
+ });
1145
+ for (const doc of result.vectors) {
1146
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
1147
+ map.set(String(doc.id), doc.metadata.contentHash);
850
1148
  }
851
1149
  }
852
1150
  if (!result.nextCursor || result.nextCursor === "0") break;
@@ -856,47 +1154,43 @@ var UpstashSearchStore = class {
856
1154
  }
857
1155
  return map;
858
1156
  }
1157
+ async deletePagesByIds(ids, _scope) {
1158
+ if (ids.length === 0) return;
1159
+ const BATCH_SIZE = 50;
1160
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
1161
+ const batch = ids.slice(i, i + BATCH_SIZE);
1162
+ await this.pagesNs.delete(batch);
1163
+ }
1164
+ }
859
1165
  async upsertPages(pages, scope) {
860
1166
  if (pages.length === 0) return;
861
- const index = this.pageIndex(scope);
862
1167
  const BATCH_SIZE = 50;
863
1168
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
864
1169
  const batch = pages.slice(i, i + BATCH_SIZE);
865
- const docs = batch.map((p) => ({
866
- id: p.url,
867
- content: {
868
- title: p.title,
869
- url: p.url,
870
- type: "page",
871
- description: p.description ?? "",
872
- keywords: (p.keywords ?? []).join(","),
873
- summary: p.summary ?? "",
874
- tags: p.tags.join(",")
875
- },
876
- metadata: {
877
- markdown: p.markdown,
878
- projectId: p.projectId,
879
- scopeName: p.scopeName,
880
- routeFile: p.routeFile,
881
- routeResolution: p.routeResolution,
882
- incomingLinks: p.incomingLinks,
883
- outgoingLinks: p.outgoingLinks,
884
- depth: p.depth,
885
- indexedAt: p.indexedAt
886
- }
887
- }));
888
- await index.upsert(docs);
1170
+ await this.pagesNs.upsert(
1171
+ batch.map((p) => ({
1172
+ id: p.id,
1173
+ data: p.data,
1174
+ metadata: {
1175
+ ...p.metadata,
1176
+ projectId: scope.projectId,
1177
+ scopeName: scope.scopeName,
1178
+ type: "page"
1179
+ }
1180
+ }))
1181
+ );
889
1182
  }
890
1183
  }
891
1184
  async getPage(url, scope) {
892
- const index = this.pageIndex(scope);
893
1185
  try {
894
- const results = await index.fetch([url]);
1186
+ const results = await this.pagesNs.fetch([url], {
1187
+ includeMetadata: true
1188
+ });
895
1189
  const doc = results[0];
896
- if (!doc) return null;
1190
+ if (!doc || !doc.metadata) return null;
897
1191
  return {
898
- url: doc.content.url,
899
- title: doc.content.title,
1192
+ url: doc.metadata.url,
1193
+ title: doc.metadata.title,
900
1194
  markdown: doc.metadata.markdown,
901
1195
  projectId: doc.metadata.projectId,
902
1196
  scopeName: doc.metadata.scopeName,
@@ -904,27 +1198,86 @@ var UpstashSearchStore = class {
904
1198
  routeResolution: doc.metadata.routeResolution,
905
1199
  incomingLinks: doc.metadata.incomingLinks,
906
1200
  outgoingLinks: doc.metadata.outgoingLinks,
1201
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? void 0,
907
1202
  depth: doc.metadata.depth,
908
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
1203
+ tags: doc.metadata.tags ?? [],
909
1204
  indexedAt: doc.metadata.indexedAt,
910
- summary: doc.content.summary || void 0,
911
- description: doc.content.description || void 0,
912
- keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
1205
+ summary: doc.metadata.summary || void 0,
1206
+ description: doc.metadata.description || void 0,
1207
+ keywords: doc.metadata.keywords?.length ? doc.metadata.keywords : void 0,
1208
+ publishedAt: typeof doc.metadata.publishedAt === "number" ? doc.metadata.publishedAt : void 0
913
1209
  };
914
1210
  } catch {
915
1211
  return null;
916
1212
  }
917
1213
  }
1214
+ async fetchPageWithVector(url, scope) {
1215
+ try {
1216
+ const results = await this.pagesNs.fetch([url], {
1217
+ includeMetadata: true,
1218
+ includeVectors: true
1219
+ });
1220
+ const doc = results[0];
1221
+ if (!doc || !doc.metadata || !doc.vector) return null;
1222
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
1223
+ return null;
1224
+ }
1225
+ return { metadata: doc.metadata, vector: doc.vector };
1226
+ } catch {
1227
+ return null;
1228
+ }
1229
+ }
1230
+ async fetchPagesBatch(urls, scope) {
1231
+ if (urls.length === 0) return [];
1232
+ try {
1233
+ const results = await this.pagesNs.fetch(urls, {
1234
+ includeMetadata: true
1235
+ });
1236
+ const out = [];
1237
+ for (const doc of results) {
1238
+ if (!doc || !doc.metadata) continue;
1239
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
1240
+ continue;
1241
+ }
1242
+ out.push({
1243
+ url: doc.metadata.url,
1244
+ title: doc.metadata.title,
1245
+ routeFile: doc.metadata.routeFile,
1246
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? []
1247
+ });
1248
+ }
1249
+ return out;
1250
+ } catch {
1251
+ return [];
1252
+ }
1253
+ }
918
1254
  async deletePages(scope) {
1255
+ const ids = [];
1256
+ let cursor = "0";
919
1257
  try {
920
- const index = this.pageIndex(scope);
921
- await index.reset();
1258
+ for (; ; ) {
1259
+ const result = await this.pagesNs.range({
1260
+ cursor,
1261
+ limit: 100,
1262
+ includeMetadata: true
1263
+ });
1264
+ for (const doc of result.vectors) {
1265
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
1266
+ ids.push(String(doc.id));
1267
+ }
1268
+ }
1269
+ if (!result.nextCursor || result.nextCursor === "0") break;
1270
+ cursor = result.nextCursor;
1271
+ }
922
1272
  } catch {
923
1273
  }
1274
+ if (ids.length > 0) {
1275
+ await this.deletePagesByIds(ids, scope);
1276
+ }
924
1277
  }
925
1278
  async health() {
926
1279
  try {
927
- await this.client.info();
1280
+ await this.index.info();
928
1281
  return { ok: true };
929
1282
  } catch (error) {
930
1283
  return {
@@ -934,14 +1287,31 @@ var UpstashSearchStore = class {
934
1287
  }
935
1288
  }
936
1289
  async dropAllIndexes(projectId) {
937
- const allIndexes = await this.client.listIndexes();
938
- const prefix = `${projectId}--`;
939
- for (const name of allIndexes) {
940
- if (name.startsWith(prefix)) {
941
- try {
942
- const index = this.client.index(name);
943
- await index.deleteIndex();
944
- } catch {
1290
+ for (const ns of [this.chunksNs, this.pagesNs]) {
1291
+ const ids = [];
1292
+ let cursor = "0";
1293
+ try {
1294
+ for (; ; ) {
1295
+ const result = await ns.range({
1296
+ cursor,
1297
+ limit: 100,
1298
+ includeMetadata: true
1299
+ });
1300
+ for (const doc of result.vectors) {
1301
+ if (doc.metadata?.projectId === projectId) {
1302
+ ids.push(String(doc.id));
1303
+ }
1304
+ }
1305
+ if (!result.nextCursor || result.nextCursor === "0") break;
1306
+ cursor = result.nextCursor;
1307
+ }
1308
+ } catch {
1309
+ }
1310
+ if (ids.length > 0) {
1311
+ const BATCH_SIZE = 100;
1312
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
1313
+ const batch = ids.slice(i, i + BATCH_SIZE);
1314
+ await ns.delete(batch);
945
1315
  }
946
1316
  }
947
1317
  }
@@ -955,12 +1325,16 @@ async function createUpstashStore(config) {
955
1325
  if (!url || !token) {
956
1326
  throw new SearchSocketError(
957
1327
  "VECTOR_BACKEND_UNAVAILABLE",
958
- `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
1328
+ `Missing Upstash Vector credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
959
1329
  );
960
1330
  }
961
- const { Search } = await import("@upstash/search");
962
- const client = new Search({ url, token });
963
- return new UpstashSearchStore({ client });
1331
+ const { Index } = await import("@upstash/vector");
1332
+ const index = new Index({ url, token });
1333
+ return new UpstashSearchStore({
1334
+ index,
1335
+ pagesNamespace: config.upstash.namespaces.pages,
1336
+ chunksNamespace: config.upstash.namespaces.chunks
1337
+ });
964
1338
  }
965
1339
 
966
1340
  // src/utils/hash.ts
@@ -1034,6 +1408,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
1034
1408
  if (normalizeText(current.text)) {
1035
1409
  sections.push({
1036
1410
  sectionTitle: current.sectionTitle,
1411
+ headingLevel: current.headingLevel,
1037
1412
  headingPath: current.headingPath,
1038
1413
  text: current.text.trim()
1039
1414
  });
@@ -1052,6 +1427,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
1052
1427
  headingStack.length = level;
1053
1428
  current = {
1054
1429
  sectionTitle: title,
1430
+ headingLevel: level,
1055
1431
  headingPath: headingStack.filter((entry) => Boolean(entry)).slice(0, headingPathDepth),
1056
1432
  text: `${line}
1057
1433
  `
@@ -1071,8 +1447,8 @@ function parseHeadingSections(markdown, headingPathDepth) {
1071
1447
  }
1072
1448
  return sections;
1073
1449
  }
1074
- function blockify(text, config) {
1075
- const lines = text.split("\n");
1450
+ function blockify(text2, config) {
1451
+ const lines = text2.split("\n");
1076
1452
  const blocks = [];
1077
1453
  let inFence = false;
1078
1454
  let current = [];
@@ -1179,20 +1555,21 @@ function splitOversizedBlock(block, config) {
1179
1555
  return chunks.length > 0 ? chunks : [trimmed];
1180
1556
  }
1181
1557
  function splitSection(section, config) {
1182
- const text = section.text.trim();
1183
- if (!text) {
1558
+ const text2 = section.text.trim();
1559
+ if (!text2) {
1184
1560
  return [];
1185
1561
  }
1186
- if (text.length <= config.maxChars) {
1562
+ if (text2.length <= config.maxChars) {
1187
1563
  return [
1188
1564
  {
1189
1565
  sectionTitle: section.sectionTitle,
1566
+ headingLevel: section.headingLevel,
1190
1567
  headingPath: section.headingPath,
1191
- chunkText: text
1568
+ chunkText: text2
1192
1569
  }
1193
1570
  ];
1194
1571
  }
1195
- const blocks = blockify(text, config);
1572
+ const blocks = blockify(text2, config);
1196
1573
  const chunks = [];
1197
1574
  let current = "";
1198
1575
  for (const block of blocks) {
@@ -1237,6 +1614,7 @@ ${chunk}`;
1237
1614
  }
1238
1615
  return merged.map((chunkText) => ({
1239
1616
  sectionTitle: section.sectionTitle,
1617
+ headingLevel: section.headingLevel,
1240
1618
  headingPath: section.headingPath,
1241
1619
  chunkText
1242
1620
  }));
@@ -1252,6 +1630,18 @@ function buildSummaryChunkText(page) {
1252
1630
  }
1253
1631
  return parts.join("\n\n");
1254
1632
  }
1633
+ function buildEmbeddingTitle(chunk) {
1634
+ if (!chunk.sectionTitle || chunk.headingLevel === void 0) return void 0;
1635
+ if (chunk.headingPath.length > 1) {
1636
+ const path16 = chunk.headingPath.join(" > ");
1637
+ const lastInPath = chunk.headingPath[chunk.headingPath.length - 1];
1638
+ if (lastInPath !== chunk.sectionTitle) {
1639
+ return `${chunk.title} \u2014 ${path16} > ${chunk.sectionTitle}`;
1640
+ }
1641
+ return `${chunk.title} \u2014 ${path16}`;
1642
+ }
1643
+ return `${chunk.title} \u2014 ${chunk.sectionTitle}`;
1644
+ }
1255
1645
  function buildEmbeddingText(chunk, prependTitle) {
1256
1646
  if (!prependTitle) return chunk.chunkText;
1257
1647
  const prefix = chunk.sectionTitle ? `${chunk.title} \u2014 ${chunk.sectionTitle}` : chunk.title;
@@ -1282,10 +1672,14 @@ function chunkPage(page, config, scope) {
1282
1672
  tags: page.tags,
1283
1673
  contentHash: "",
1284
1674
  description: page.description,
1285
- keywords: page.keywords
1675
+ keywords: page.keywords,
1676
+ publishedAt: page.publishedAt,
1677
+ incomingAnchorText: page.incomingAnchorText,
1678
+ meta: page.meta
1286
1679
  };
1287
1680
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
1288
- summaryChunk.contentHash = sha256(normalizeText(embeddingText));
1681
+ const metaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
1682
+ summaryChunk.contentHash = sha256(normalizeText(embeddingText) + metaSuffix);
1289
1683
  chunks.push(summaryChunk);
1290
1684
  }
1291
1685
  const ordinalOffset = config.chunking.pageSummaryChunk ? 1 : 0;
@@ -1302,6 +1696,7 @@ function chunkPage(page, config, scope) {
1302
1696
  path: page.url,
1303
1697
  title: page.title,
1304
1698
  sectionTitle: entry.sectionTitle,
1699
+ headingLevel: entry.headingLevel,
1305
1700
  headingPath: entry.headingPath,
1306
1701
  chunkText: entry.chunkText,
1307
1702
  snippet: toSnippet(entry.chunkText),
@@ -1311,10 +1706,16 @@ function chunkPage(page, config, scope) {
1311
1706
  tags: page.tags,
1312
1707
  contentHash: "",
1313
1708
  description: page.description,
1314
- keywords: page.keywords
1709
+ keywords: page.keywords,
1710
+ publishedAt: page.publishedAt,
1711
+ incomingAnchorText: page.incomingAnchorText,
1712
+ meta: page.meta
1315
1713
  };
1316
1714
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
1317
- chunk.contentHash = sha256(normalizeText(embeddingText));
1715
+ const embeddingTitle = config.chunking.weightHeadings ? buildEmbeddingTitle(chunk) : void 0;
1716
+ const chunkMetaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
1717
+ const hashInput = embeddingTitle ? `${normalizeText(embeddingText)}|title:${embeddingTitle}` : normalizeText(embeddingText);
1718
+ chunk.contentHash = sha256(hashInput + chunkMetaSuffix);
1318
1719
  chunks.push(chunk);
1319
1720
  }
1320
1721
  return chunks;
@@ -1325,6 +1726,113 @@ import { load } from "cheerio";
1325
1726
  import matter from "gray-matter";
1326
1727
  import TurndownService from "turndown";
1327
1728
  import { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems } from "turndown-plugin-gfm";
1729
+
1730
+ // src/utils/structured-meta.ts
1731
+ var VALID_KEY_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
1732
+ function validateMetaKey(key) {
1733
+ return VALID_KEY_RE.test(key);
1734
+ }
1735
+ function parseMetaValue(content, dataType) {
1736
+ switch (dataType) {
1737
+ case "number": {
1738
+ const n = Number(content);
1739
+ return Number.isFinite(n) ? n : content;
1740
+ }
1741
+ case "boolean":
1742
+ return content === "true";
1743
+ case "string[]":
1744
+ return content ? content.split(",").map((s) => s.trim()) : [];
1745
+ case "date": {
1746
+ const ms = Number(content);
1747
+ return Number.isFinite(ms) ? ms : content;
1748
+ }
1749
+ default:
1750
+ return content;
1751
+ }
1752
+ }
1753
+ function escapeFilterValue(s) {
1754
+ return s.replace(/'/g, "''");
1755
+ }
1756
+ function buildMetaFilterString(filters) {
1757
+ const clauses = [];
1758
+ for (const [key, value] of Object.entries(filters)) {
1759
+ if (!validateMetaKey(key)) continue;
1760
+ const field = `meta.${key}`;
1761
+ if (typeof value === "string") {
1762
+ clauses.push(`${field} CONTAINS '${escapeFilterValue(value)}'`);
1763
+ } else if (typeof value === "boolean") {
1764
+ clauses.push(`${field} = ${value}`);
1765
+ } else {
1766
+ clauses.push(`${field} = ${value}`);
1767
+ }
1768
+ }
1769
+ return clauses.join(" AND ");
1770
+ }
1771
+
1772
+ // src/indexing/extractor.ts
1773
+ function normalizeDateToMs(value) {
1774
+ if (value == null) return void 0;
1775
+ if (value instanceof Date) {
1776
+ const ts = value.getTime();
1777
+ return Number.isFinite(ts) ? ts : void 0;
1778
+ }
1779
+ if (typeof value === "string") {
1780
+ const ts = new Date(value).getTime();
1781
+ return Number.isFinite(ts) ? ts : void 0;
1782
+ }
1783
+ if (typeof value === "number") {
1784
+ return Number.isFinite(value) ? value : void 0;
1785
+ }
1786
+ return void 0;
1787
+ }
1788
+ var FRONTMATTER_DATE_FIELDS = ["date", "publishedAt", "updatedAt", "published_at", "updated_at"];
1789
+ function extractPublishedAtFromFrontmatter(data) {
1790
+ for (const field of FRONTMATTER_DATE_FIELDS) {
1791
+ const val = normalizeDateToMs(data[field]);
1792
+ if (val !== void 0) return val;
1793
+ }
1794
+ return void 0;
1795
+ }
1796
+ function extractPublishedAtFromHtml($) {
1797
+ const jsonLdScripts = $('script[type="application/ld+json"]');
1798
+ for (let i = 0; i < jsonLdScripts.length; i++) {
1799
+ try {
1800
+ const raw = $(jsonLdScripts[i]).html();
1801
+ if (!raw) continue;
1802
+ const parsed = JSON.parse(raw);
1803
+ const candidates = [];
1804
+ if (Array.isArray(parsed)) {
1805
+ candidates.push(...parsed);
1806
+ } else if (parsed && typeof parsed === "object") {
1807
+ candidates.push(parsed);
1808
+ if (Array.isArray(parsed["@graph"])) {
1809
+ candidates.push(...parsed["@graph"]);
1810
+ }
1811
+ }
1812
+ for (const candidate of candidates) {
1813
+ const val = normalizeDateToMs(candidate.datePublished);
1814
+ if (val !== void 0) return val;
1815
+ }
1816
+ } catch {
1817
+ }
1818
+ }
1819
+ const ogTime = $('meta[property="article:published_time"]').attr("content")?.trim();
1820
+ if (ogTime) {
1821
+ const val = normalizeDateToMs(ogTime);
1822
+ if (val !== void 0) return val;
1823
+ }
1824
+ const itempropDate = $('meta[itemprop="datePublished"]').attr("content")?.trim() || $('time[itemprop="datePublished"]').attr("datetime")?.trim();
1825
+ if (itempropDate) {
1826
+ const val = normalizeDateToMs(itempropDate);
1827
+ if (val !== void 0) return val;
1828
+ }
1829
+ const timeEl = $("time[datetime]").first().attr("datetime")?.trim();
1830
+ if (timeEl) {
1831
+ const val = normalizeDateToMs(timeEl);
1832
+ if (val !== void 0) return val;
1833
+ }
1834
+ return void 0;
1835
+ }
1328
1836
  function hasTopLevelNoindexComment(markdown) {
1329
1837
  const lines = markdown.split(/\r?\n/);
1330
1838
  let inFence = false;
@@ -1340,6 +1848,97 @@ function hasTopLevelNoindexComment(markdown) {
1340
1848
  }
1341
1849
  return false;
1342
1850
  }
1851
+ var GARBAGE_ALT_WORDS = /* @__PURE__ */ new Set([
1852
+ "image",
1853
+ "photo",
1854
+ "picture",
1855
+ "icon",
1856
+ "logo",
1857
+ "banner",
1858
+ "screenshot",
1859
+ "thumbnail",
1860
+ "img",
1861
+ "graphic",
1862
+ "illustration",
1863
+ "spacer",
1864
+ "pixel",
1865
+ "placeholder",
1866
+ "avatar",
1867
+ "background"
1868
+ ]);
1869
+ var IMAGE_EXT_RE = /\.(jpg|jpeg|png|gif|svg|webp|avif|bmp|ico)(\?.*)?$/i;
1870
+ function isMeaningfulAlt(alt) {
1871
+ const trimmed = alt.trim();
1872
+ if (!trimmed || trimmed.length < 5) return false;
1873
+ if (IMAGE_EXT_RE.test(trimmed)) return false;
1874
+ if (GARBAGE_ALT_WORDS.has(trimmed.toLowerCase())) return false;
1875
+ return true;
1876
+ }
1877
+ function resolveImageText(img, $, imageDescAttr) {
1878
+ const imgDesc = img.attr(imageDescAttr)?.trim();
1879
+ if (imgDesc) return imgDesc;
1880
+ const figure = img.closest("figure");
1881
+ if (figure.length) {
1882
+ const figDesc = figure.attr(imageDescAttr)?.trim();
1883
+ if (figDesc) return figDesc;
1884
+ }
1885
+ const alt = img.attr("alt")?.trim() ?? "";
1886
+ const caption = figure.length ? figure.find("figcaption").first().text().trim() : "";
1887
+ if (isMeaningfulAlt(alt) && caption) {
1888
+ return `${alt} \u2014 ${caption}`;
1889
+ }
1890
+ if (isMeaningfulAlt(alt)) {
1891
+ return alt;
1892
+ }
1893
+ if (caption) {
1894
+ return caption;
1895
+ }
1896
+ return null;
1897
+ }
1898
+ var STOP_ANCHORS = /* @__PURE__ */ new Set([
1899
+ "here",
1900
+ "click",
1901
+ "click here",
1902
+ "read more",
1903
+ "link",
1904
+ "this",
1905
+ "more"
1906
+ ]);
1907
+ function normalizeAnchorText(raw) {
1908
+ const normalized = raw.replace(/\s+/g, " ").trim().toLowerCase();
1909
+ if (normalized.length < 3) return "";
1910
+ if (STOP_ANCHORS.has(normalized)) return "";
1911
+ if (normalized.length > 100) return normalized.slice(0, 100);
1912
+ return normalized;
1913
+ }
1914
+ function escapeHtml(text2) {
1915
+ return text2.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
1916
+ }
1917
+ function preprocessImages(root, $, imageDescAttr) {
1918
+ root.find("picture").each((_i, el) => {
1919
+ const picture = $(el);
1920
+ const img = picture.find("img").first();
1921
+ const parentFigure = picture.closest("figure");
1922
+ const text2 = img.length ? resolveImageText(img, $, imageDescAttr) : null;
1923
+ if (text2) {
1924
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
1925
+ picture.replaceWith(`<span>${escapeHtml(text2)}</span>`);
1926
+ } else {
1927
+ picture.remove();
1928
+ }
1929
+ });
1930
+ root.find("img").each((_i, el) => {
1931
+ const img = $(el);
1932
+ const parentFigure = img.closest("figure");
1933
+ const text2 = resolveImageText(img, $, imageDescAttr);
1934
+ if (text2) {
1935
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
1936
+ img.replaceWith(`<span>${escapeHtml(text2)}</span>`);
1937
+ } else {
1938
+ img.remove();
1939
+ }
1940
+ });
1941
+ }
1343
1942
  function extractFromHtml(url, html, config) {
1344
1943
  const $ = load(html);
1345
1944
  const normalizedUrl = normalizeUrlPath(url);
@@ -1365,6 +1964,20 @@ function extractFromHtml(url, html, config) {
1365
1964
  if (weight === 0) {
1366
1965
  return null;
1367
1966
  }
1967
+ if ($('meta[name="searchsocket:noindex"]').attr("content") === "true") {
1968
+ return null;
1969
+ }
1970
+ const RESERVED_META_KEYS = /* @__PURE__ */ new Set(["noindex", "tags"]);
1971
+ const meta = {};
1972
+ $('meta[name^="searchsocket:"]').each((_i, el) => {
1973
+ const name = $(el).attr("name") ?? "";
1974
+ const key = name.slice("searchsocket:".length);
1975
+ if (!key || RESERVED_META_KEYS.has(key) || !validateMetaKey(key)) return;
1976
+ const content = $(el).attr("content") ?? "";
1977
+ const dataType = $(el).attr("data-type") ?? "string";
1978
+ meta[key] = parseMetaValue(content, dataType);
1979
+ });
1980
+ const componentTags = $('meta[name="searchsocket:tags"]').attr("content")?.trim();
1368
1981
  const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
1369
1982
  const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
1370
1983
  const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
@@ -1376,7 +1989,9 @@ function extractFromHtml(url, html, config) {
1376
1989
  root.find(selector).remove();
1377
1990
  }
1378
1991
  root.find(`[${config.extract.ignoreAttr}]`).remove();
1992
+ preprocessImages(root, $, config.extract.imageDescAttr);
1379
1993
  const outgoingLinks = [];
1994
+ const seenLinkKeys = /* @__PURE__ */ new Set();
1380
1995
  root.find("a[href]").each((_index, node) => {
1381
1996
  const href = $(node).attr("href");
1382
1997
  if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:")) {
@@ -1387,7 +2002,19 @@ function extractFromHtml(url, html, config) {
1387
2002
  if (!["http:", "https:"].includes(parsed.protocol)) {
1388
2003
  return;
1389
2004
  }
1390
- outgoingLinks.push(normalizeUrlPath(parsed.pathname));
2005
+ const url2 = normalizeUrlPath(parsed.pathname);
2006
+ let anchorText = normalizeAnchorText($(node).text());
2007
+ if (!anchorText) {
2008
+ const imgAlt = $(node).find("img").first().attr("alt") ?? "";
2009
+ if (isMeaningfulAlt(imgAlt)) {
2010
+ anchorText = normalizeAnchorText(imgAlt);
2011
+ }
2012
+ }
2013
+ const key = `${url2}|${anchorText}`;
2014
+ if (!seenLinkKeys.has(key)) {
2015
+ seenLinkKeys.add(key);
2016
+ outgoingLinks.push({ url: url2, anchorText });
2017
+ }
1391
2018
  } catch {
1392
2019
  }
1393
2020
  });
@@ -1412,16 +2039,25 @@ function extractFromHtml(url, html, config) {
1412
2039
  return null;
1413
2040
  }
1414
2041
  const tags = normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1);
2042
+ const publishedAt = extractPublishedAtFromHtml($);
2043
+ if (componentTags) {
2044
+ const extraTags = componentTags.split(",").map((t) => t.trim()).filter(Boolean);
2045
+ for (const t of extraTags) {
2046
+ if (!tags.includes(t)) tags.push(t);
2047
+ }
2048
+ }
1415
2049
  return {
1416
2050
  url: normalizeUrlPath(url),
1417
2051
  title,
1418
2052
  markdown,
1419
- outgoingLinks: [...new Set(outgoingLinks)],
2053
+ outgoingLinks,
1420
2054
  noindex: false,
1421
2055
  tags,
1422
2056
  description,
1423
2057
  keywords,
1424
- weight
2058
+ weight,
2059
+ publishedAt,
2060
+ meta: Object.keys(meta).length > 0 ? meta : void 0
1425
2061
  };
1426
2062
  }
1427
2063
  function extractFromMarkdown(url, markdown, title) {
@@ -1442,6 +2078,24 @@ function extractFromMarkdown(url, markdown, title) {
1442
2078
  if (mdWeight === 0) {
1443
2079
  return null;
1444
2080
  }
2081
+ let mdMeta;
2082
+ const rawMeta = searchsocketMeta?.meta;
2083
+ if (rawMeta && typeof rawMeta === "object" && !Array.isArray(rawMeta)) {
2084
+ const metaObj = {};
2085
+ for (const [key, val] of Object.entries(rawMeta)) {
2086
+ if (!validateMetaKey(key)) continue;
2087
+ if (typeof val === "string" || typeof val === "number" || typeof val === "boolean") {
2088
+ metaObj[key] = val;
2089
+ } else if (Array.isArray(val) && val.every((v) => typeof v === "string")) {
2090
+ metaObj[key] = val;
2091
+ } else if (val instanceof Date) {
2092
+ metaObj[key] = val.getTime();
2093
+ }
2094
+ }
2095
+ if (Object.keys(metaObj).length > 0) {
2096
+ mdMeta = metaObj;
2097
+ }
2098
+ }
1445
2099
  const content = parsed.content;
1446
2100
  const normalized = normalizeMarkdown(content);
1447
2101
  if (!normalizeText(normalized)) {
@@ -1456,6 +2110,7 @@ function extractFromMarkdown(url, markdown, title) {
1456
2110
  fmKeywords = frontmatter.keywords.split(",").map((k) => k.trim()).filter(Boolean);
1457
2111
  }
1458
2112
  if (fmKeywords && fmKeywords.length === 0) fmKeywords = void 0;
2113
+ const publishedAt = extractPublishedAtFromFrontmatter(frontmatter);
1459
2114
  return {
1460
2115
  url: normalizeUrlPath(url),
1461
2116
  title: resolvedTitle,
@@ -1465,7 +2120,9 @@ function extractFromMarkdown(url, markdown, title) {
1465
2120
  tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
1466
2121
  description: fmDescription,
1467
2122
  keywords: fmKeywords,
1468
- weight: mdWeight
2123
+ weight: mdWeight,
2124
+ publishedAt,
2125
+ meta: mdMeta
1469
2126
  };
1470
2127
  }
1471
2128
 
@@ -1919,6 +2576,125 @@ function filePathToUrl(filePath, baseDir) {
1919
2576
  const noExt = relative.replace(/\.md$/i, "").replace(/\/index$/i, "");
1920
2577
  return normalizeUrlPath(noExt || "/");
1921
2578
  }
2579
+ var ROUTE_FILE_RE = /\+(page|layout|error)(@[^.]+)?\.svelte$/;
2580
+ function isSvelteComponentFile(filePath) {
2581
+ if (!filePath.endsWith(".svelte")) return false;
2582
+ return !ROUTE_FILE_RE.test(filePath);
2583
+ }
2584
+ function extractSvelteComponentMeta(source) {
2585
+ const componentMatch = source.match(/<!--\s*@component\s*([\s\S]*?)\s*-->/);
2586
+ const description = componentMatch?.[1]?.trim() || void 0;
2587
+ const propsMatch = source.match(
2588
+ /let\s+\{([\s\S]*?)\}\s*(?::\s*([^=;{][\s\S]*?))?\s*=\s*\$props\(\)/
2589
+ );
2590
+ const props = [];
2591
+ if (propsMatch) {
2592
+ const destructureBlock = propsMatch[1];
2593
+ const typeAnnotation = propsMatch[2]?.trim();
2594
+ let resolvedTypeMap;
2595
+ if (typeAnnotation && /^[A-Z]\w*$/.test(typeAnnotation)) {
2596
+ resolvedTypeMap = resolveTypeReference(source, typeAnnotation);
2597
+ } else if (typeAnnotation && typeAnnotation.startsWith("{")) {
2598
+ resolvedTypeMap = parseInlineTypeAnnotation(typeAnnotation);
2599
+ }
2600
+ const propEntries = splitDestructureBlock(destructureBlock);
2601
+ for (const entry of propEntries) {
2602
+ const trimmed = entry.trim();
2603
+ if (!trimmed || trimmed.startsWith("...")) continue;
2604
+ let propName;
2605
+ let defaultValue;
2606
+ const renameMatch = trimmed.match(/^(\w+)\s*:\s*\w+\s*(?:=\s*([\s\S]+))?$/);
2607
+ if (renameMatch) {
2608
+ propName = renameMatch[1];
2609
+ defaultValue = renameMatch[2]?.trim();
2610
+ } else {
2611
+ const defaultMatch = trimmed.match(/^(\w+)\s*=\s*([\s\S]+)$/);
2612
+ if (defaultMatch) {
2613
+ propName = defaultMatch[1];
2614
+ defaultValue = defaultMatch[2]?.trim();
2615
+ } else {
2616
+ propName = trimmed.match(/^(\w+)/)?.[1] ?? trimmed;
2617
+ }
2618
+ }
2619
+ const propType = resolvedTypeMap?.get(propName);
2620
+ props.push({
2621
+ name: propName,
2622
+ ...propType ? { type: propType } : {},
2623
+ ...defaultValue ? { default: defaultValue } : {}
2624
+ });
2625
+ }
2626
+ }
2627
+ return { description, props };
2628
+ }
2629
+ function splitDestructureBlock(block) {
2630
+ const entries = [];
2631
+ let depth = 0;
2632
+ let current = "";
2633
+ for (const ch of block) {
2634
+ if (ch === "{" || ch === "[" || ch === "(") {
2635
+ depth++;
2636
+ current += ch;
2637
+ } else if (ch === "}" || ch === "]" || ch === ")") {
2638
+ depth--;
2639
+ current += ch;
2640
+ } else if (ch === "," && depth === 0) {
2641
+ entries.push(current);
2642
+ current = "";
2643
+ } else {
2644
+ current += ch;
2645
+ }
2646
+ }
2647
+ if (current.trim()) entries.push(current);
2648
+ return entries;
2649
+ }
2650
+ function resolveTypeReference(source, typeName) {
2651
+ const startRe = new RegExp(`(?:interface\\s+${typeName}\\s*|type\\s+${typeName}\\s*=\\s*)\\{`);
2652
+ const startMatch = source.match(startRe);
2653
+ if (!startMatch || startMatch.index === void 0) return void 0;
2654
+ const bodyStart = startMatch.index + startMatch[0].length;
2655
+ let depth = 1;
2656
+ let i = bodyStart;
2657
+ while (i < source.length && depth > 0) {
2658
+ if (source[i] === "{") depth++;
2659
+ else if (source[i] === "}") depth--;
2660
+ i++;
2661
+ }
2662
+ if (depth !== 0) return void 0;
2663
+ const body = source.slice(bodyStart, i - 1);
2664
+ return parseTypeMembers(body);
2665
+ }
2666
+ function parseInlineTypeAnnotation(annotation) {
2667
+ const inner = annotation.replace(/^\{/, "").replace(/\}$/, "");
2668
+ return parseTypeMembers(inner);
2669
+ }
2670
+ function parseTypeMembers(body) {
2671
+ const map = /* @__PURE__ */ new Map();
2672
+ const members = body.split(/[;\n]/).map((m) => m.trim()).filter(Boolean);
2673
+ for (const member of members) {
2674
+ const memberMatch = member.match(/^(\w+)\??\s*:\s*(.+)$/);
2675
+ if (memberMatch) {
2676
+ map.set(memberMatch[1], memberMatch[2].replace(/,\s*$/, "").trim());
2677
+ }
2678
+ }
2679
+ return map;
2680
+ }
2681
+ function buildComponentMarkdown(componentName, meta) {
2682
+ if (!meta.description && meta.props.length === 0) return "";
2683
+ const parts = [`${componentName} component.`];
2684
+ if (meta.description) {
2685
+ parts.push(meta.description);
2686
+ }
2687
+ if (meta.props.length > 0) {
2688
+ const propEntries = meta.props.map((p) => {
2689
+ let entry = p.name;
2690
+ if (p.type) entry += ` (${p.type})`;
2691
+ if (p.default) entry += ` default: ${p.default}`;
2692
+ return entry;
2693
+ });
2694
+ parts.push(`Props: ${propEntries.join(", ")}.`);
2695
+ }
2696
+ return parts.join(" ");
2697
+ }
1922
2698
  function normalizeSvelteToMarkdown(source) {
1923
2699
  return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
1924
2700
  }
@@ -1938,12 +2714,26 @@ async function loadContentFilesPages(cwd, config, maxPages) {
1938
2714
  const pages = [];
1939
2715
  for (const filePath of selected) {
1940
2716
  const raw = await fs5.readFile(filePath, "utf8");
1941
- const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
2717
+ let markdown;
2718
+ let tags;
2719
+ if (filePath.endsWith(".md")) {
2720
+ markdown = raw;
2721
+ } else if (isSvelteComponentFile(filePath)) {
2722
+ const componentName = path7.basename(filePath, ".svelte");
2723
+ const meta = extractSvelteComponentMeta(raw);
2724
+ const componentMarkdown = buildComponentMarkdown(componentName, meta);
2725
+ const templateContent = normalizeSvelteToMarkdown(raw);
2726
+ markdown = componentMarkdown ? [componentMarkdown, templateContent].filter(Boolean).join("\n\n") : templateContent;
2727
+ tags = ["component"];
2728
+ } else {
2729
+ markdown = normalizeSvelteToMarkdown(raw);
2730
+ }
1942
2731
  pages.push({
1943
2732
  url: filePathToUrl(filePath, baseDir),
1944
2733
  markdown,
1945
2734
  sourcePath: path7.relative(cwd, filePath).replace(/\\/g, "/"),
1946
- outgoingLinks: []
2735
+ outgoingLinks: [],
2736
+ ...tags ? { tags } : {}
1947
2737
  });
1948
2738
  }
1949
2739
  return pages;
@@ -1958,9 +2748,9 @@ function extractLocs(xml) {
1958
2748
  const $ = cheerioLoad2(xml, { xmlMode: true });
1959
2749
  const locs = [];
1960
2750
  $("loc").each((_i, el) => {
1961
- const text = $(el).text().trim();
1962
- if (text) {
1963
- locs.push(text);
2751
+ const text2 = $(el).text().trim();
2752
+ if (text2) {
2753
+ locs.push(text2);
1964
2754
  }
1965
2755
  });
1966
2756
  return locs;
@@ -2175,32 +2965,68 @@ function nonNegativeOrZero(value) {
2175
2965
  }
2176
2966
  return Math.max(0, value);
2177
2967
  }
2178
- function normalizeForTitleMatch(text) {
2179
- return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
2968
+ function normalizeForTitleMatch(text2) {
2969
+ return text2.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
2180
2970
  }
2181
- function rankHits(hits, config, query) {
2971
+ function rankHits(hits, config, query, debug) {
2182
2972
  const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
2183
2973
  const titleMatchWeight = config.ranking.weights.titleMatch;
2184
2974
  return hits.map((hit) => {
2185
- let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
2975
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
2976
+ let score = baseScore;
2977
+ let incomingLinkBoostValue = 0;
2186
2978
  if (config.ranking.enableIncomingLinkBoost) {
2187
2979
  const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
2188
- score += incomingBoost * config.ranking.weights.incomingLinks;
2980
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
2981
+ score += incomingLinkBoostValue;
2189
2982
  }
2983
+ let depthBoostValue = 0;
2190
2984
  if (config.ranking.enableDepthBoost) {
2191
2985
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
2192
- score += depthBoost * config.ranking.weights.depth;
2986
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
2987
+ score += depthBoostValue;
2193
2988
  }
2989
+ let titleMatchBoostValue = 0;
2194
2990
  if (normalizedQuery && titleMatchWeight > 0) {
2195
2991
  const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
2196
2992
  if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
2197
- score += titleMatchWeight;
2993
+ titleMatchBoostValue = titleMatchWeight;
2994
+ score += titleMatchBoostValue;
2198
2995
  }
2199
2996
  }
2200
- return {
2997
+ let freshnessBoostValue = 0;
2998
+ if (config.ranking.enableFreshnessBoost) {
2999
+ const publishedAt = hit.metadata.publishedAt;
3000
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
3001
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
3002
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
3003
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
3004
+ score += freshnessBoostValue;
3005
+ }
3006
+ }
3007
+ let anchorTextMatchBoostValue = 0;
3008
+ if (config.ranking.enableAnchorTextBoost && normalizedQuery && config.ranking.weights.anchorText > 0) {
3009
+ const normalizedAnchorText = normalizeForTitleMatch(hit.metadata.incomingAnchorText ?? "");
3010
+ if (normalizedAnchorText.length > 0 && normalizedQuery.length > 0 && (normalizedAnchorText.includes(normalizedQuery) || normalizedQuery.includes(normalizedAnchorText))) {
3011
+ anchorTextMatchBoostValue = config.ranking.weights.anchorText;
3012
+ score += anchorTextMatchBoostValue;
3013
+ }
3014
+ }
3015
+ const result = {
2201
3016
  hit,
2202
3017
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
2203
3018
  };
3019
+ if (debug) {
3020
+ result.breakdown = {
3021
+ baseScore,
3022
+ incomingLinkBoost: incomingLinkBoostValue,
3023
+ depthBoost: depthBoostValue,
3024
+ titleMatchBoost: titleMatchBoostValue,
3025
+ freshnessBoost: freshnessBoostValue,
3026
+ anchorTextMatchBoost: anchorTextMatchBoostValue
3027
+ };
3028
+ }
3029
+ return result;
2204
3030
  }).sort((a, b) => {
2205
3031
  const delta = b.finalScore - a.finalScore;
2206
3032
  return Number.isNaN(delta) ? 0 : delta;
@@ -2209,12 +3035,13 @@ function rankHits(hits, config, query) {
2209
3035
  function trimByScoreGap(results, config) {
2210
3036
  if (results.length === 0) return results;
2211
3037
  const threshold = config.ranking.scoreGapThreshold;
2212
- const minScore = config.ranking.minScore;
2213
- if (minScore > 0 && results.length > 0) {
2214
- const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
2215
- const mid = Math.floor(sortedScores.length / 2);
2216
- const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
2217
- if (median < minScore) return [];
3038
+ const minScoreRatio = config.ranking.minScoreRatio;
3039
+ if (minScoreRatio > 0 && results.length > 0) {
3040
+ const topScore = results[0].pageScore;
3041
+ if (Number.isFinite(topScore) && topScore > 0) {
3042
+ const minThreshold = topScore * minScoreRatio;
3043
+ results = results.filter((r) => r.pageScore >= minThreshold);
3044
+ }
2218
3045
  }
2219
3046
  if (threshold > 0 && results.length > 1) {
2220
3047
  for (let i = 1; i < results.length; i++) {
@@ -2284,61 +3111,99 @@ function aggregateByPage(ranked, config) {
2284
3111
  return Number.isNaN(delta) ? 0 : delta;
2285
3112
  });
2286
3113
  }
2287
- function mergePageAndChunkResults(pageHits, rankedChunks, config) {
2288
- if (pageHits.length === 0) return rankedChunks;
2289
- const w = config.search.pageSearchWeight;
2290
- const pageScoreMap = /* @__PURE__ */ new Map();
2291
- for (const ph of pageHits) {
2292
- pageScoreMap.set(ph.url, ph);
2293
- }
2294
- const pagesWithChunks = /* @__PURE__ */ new Set();
2295
- const merged = rankedChunks.map((ranked) => {
2296
- const url = ranked.hit.metadata.url;
2297
- const pageHit = pageScoreMap.get(url);
2298
- if (pageHit) {
2299
- pagesWithChunks.add(url);
2300
- const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
2301
- return {
2302
- hit: ranked.hit,
2303
- finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
2304
- };
3114
+ function rankPageHits(pageHits, config, query, debug) {
3115
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
3116
+ const titleMatchWeight = config.ranking.weights.titleMatch;
3117
+ return pageHits.map((hit) => {
3118
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
3119
+ let score = baseScore;
3120
+ let incomingLinkBoostValue = 0;
3121
+ if (config.ranking.enableIncomingLinkBoost) {
3122
+ const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.incomingLinks));
3123
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
3124
+ score += incomingLinkBoostValue;
2305
3125
  }
2306
- return ranked;
2307
- });
2308
- for (const [url, pageHit] of pageScoreMap) {
2309
- if (pagesWithChunks.has(url)) continue;
2310
- const syntheticScore = pageHit.score * w;
2311
- const syntheticHit = {
2312
- id: `page:${url}`,
2313
- score: pageHit.score,
2314
- metadata: {
2315
- projectId: "",
2316
- scopeName: "",
2317
- url: pageHit.url,
2318
- path: pageHit.url,
2319
- title: pageHit.title,
2320
- sectionTitle: "",
2321
- headingPath: [],
2322
- snippet: pageHit.description || pageHit.title,
2323
- chunkText: pageHit.description || pageHit.title,
2324
- ordinal: 0,
2325
- contentHash: "",
2326
- depth: pageHit.depth,
2327
- incomingLinks: pageHit.incomingLinks,
2328
- routeFile: pageHit.routeFile,
2329
- tags: pageHit.tags
3126
+ let depthBoostValue = 0;
3127
+ if (config.ranking.enableDepthBoost) {
3128
+ const depthBoost = 1 / (1 + nonNegativeOrZero(hit.depth));
3129
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
3130
+ score += depthBoostValue;
3131
+ }
3132
+ let titleMatchBoostValue = 0;
3133
+ if (normalizedQuery && titleMatchWeight > 0) {
3134
+ const normalizedTitle = normalizeForTitleMatch(hit.title);
3135
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
3136
+ titleMatchBoostValue = titleMatchWeight;
3137
+ score += titleMatchBoostValue;
2330
3138
  }
2331
- };
2332
- merged.push({
2333
- hit: syntheticHit,
2334
- finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
2335
- });
2336
- }
2337
- return merged.sort((a, b) => {
3139
+ }
3140
+ let freshnessBoostValue = 0;
3141
+ if (config.ranking.enableFreshnessBoost) {
3142
+ const publishedAt = hit.publishedAt;
3143
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
3144
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
3145
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
3146
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
3147
+ score += freshnessBoostValue;
3148
+ }
3149
+ }
3150
+ const pageWeight = findPageWeight(hit.url, config.ranking.pageWeights);
3151
+ if (pageWeight !== 1) {
3152
+ score *= pageWeight;
3153
+ }
3154
+ const result = {
3155
+ url: hit.url,
3156
+ title: hit.title,
3157
+ description: hit.description,
3158
+ routeFile: hit.routeFile,
3159
+ depth: hit.depth,
3160
+ incomingLinks: hit.incomingLinks,
3161
+ tags: hit.tags,
3162
+ baseScore,
3163
+ finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY,
3164
+ publishedAt: hit.publishedAt
3165
+ };
3166
+ if (debug) {
3167
+ result.breakdown = {
3168
+ baseScore,
3169
+ pageWeight,
3170
+ incomingLinkBoost: incomingLinkBoostValue,
3171
+ depthBoost: depthBoostValue,
3172
+ titleMatchBoost: titleMatchBoostValue,
3173
+ freshnessBoost: freshnessBoostValue
3174
+ };
3175
+ }
3176
+ return result;
3177
+ }).filter((p) => findPageWeight(p.url, config.ranking.pageWeights) !== 0).sort((a, b) => {
2338
3178
  const delta = b.finalScore - a.finalScore;
2339
3179
  return Number.isNaN(delta) ? 0 : delta;
2340
3180
  });
2341
3181
  }
3182
+ function trimPagesByScoreGap(results, config) {
3183
+ if (results.length === 0) return results;
3184
+ const threshold = config.ranking.scoreGapThreshold;
3185
+ const minScoreRatio = config.ranking.minScoreRatio;
3186
+ if (minScoreRatio > 0 && results.length > 0) {
3187
+ const topScore = results[0].finalScore;
3188
+ if (Number.isFinite(topScore) && topScore > 0) {
3189
+ const minThreshold = topScore * minScoreRatio;
3190
+ results = results.filter((r) => r.finalScore >= minThreshold);
3191
+ }
3192
+ }
3193
+ if (threshold > 0 && results.length > 1) {
3194
+ for (let i = 1; i < results.length; i++) {
3195
+ const prev = results[i - 1].finalScore;
3196
+ const current = results[i].finalScore;
3197
+ if (prev > 0) {
3198
+ const gap = (prev - current) / prev;
3199
+ if (gap >= threshold) {
3200
+ return results.slice(0, i);
3201
+ }
3202
+ }
3203
+ }
3204
+ }
3205
+ return results;
3206
+ }
2342
3207
 
2343
3208
  // src/utils/time.ts
2344
3209
  function nowIso() {
@@ -2348,6 +3213,85 @@ function hrTimeMs(start) {
2348
3213
  return Number(process.hrtime.bigint() - start) / 1e6;
2349
3214
  }
2350
3215
 
3216
+ // src/indexing/llms-txt.ts
3217
+ import fs8 from "fs/promises";
3218
+ import path10 from "path";
3219
+ function resolvePageUrl(pageUrl, baseUrl) {
3220
+ if (!baseUrl) return pageUrl;
3221
+ try {
3222
+ return new URL(pageUrl, baseUrl).href;
3223
+ } catch {
3224
+ return pageUrl;
3225
+ }
3226
+ }
3227
+ function generateLlmsTxt(pages, config) {
3228
+ const title = config.llmsTxt.title ?? config.project.id;
3229
+ const description = config.llmsTxt.description;
3230
+ const baseUrl = config.project.baseUrl;
3231
+ const lines = [`# ${title}`];
3232
+ if (description) {
3233
+ lines.push("", `> ${description}`);
3234
+ }
3235
+ const filtered = pages.filter(
3236
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
3237
+ );
3238
+ const sorted = [...filtered].sort((a, b) => {
3239
+ if (a.depth !== b.depth) return a.depth - b.depth;
3240
+ return b.incomingLinks - a.incomingLinks;
3241
+ });
3242
+ if (sorted.length > 0) {
3243
+ lines.push("", "## Pages", "");
3244
+ for (const page of sorted) {
3245
+ const url = resolvePageUrl(page.url, baseUrl);
3246
+ if (page.description) {
3247
+ lines.push(`- [${page.title}](${url}): ${page.description}`);
3248
+ } else {
3249
+ lines.push(`- [${page.title}](${url})`);
3250
+ }
3251
+ }
3252
+ }
3253
+ lines.push("");
3254
+ return lines.join("\n");
3255
+ }
3256
+ function generateLlmsFullTxt(pages, config) {
3257
+ const title = config.llmsTxt.title ?? config.project.id;
3258
+ const description = config.llmsTxt.description;
3259
+ const baseUrl = config.project.baseUrl;
3260
+ const lines = [`# ${title}`];
3261
+ if (description) {
3262
+ lines.push("", `> ${description}`);
3263
+ }
3264
+ const filtered = pages.filter(
3265
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
3266
+ );
3267
+ const sorted = [...filtered].sort((a, b) => {
3268
+ if (a.depth !== b.depth) return a.depth - b.depth;
3269
+ return b.incomingLinks - a.incomingLinks;
3270
+ });
3271
+ for (const page of sorted) {
3272
+ const url = resolvePageUrl(page.url, baseUrl);
3273
+ lines.push("", "---", "", `## [${page.title}](${url})`, "");
3274
+ lines.push(page.markdown.trim());
3275
+ }
3276
+ lines.push("");
3277
+ return lines.join("\n");
3278
+ }
3279
+ async function writeLlmsTxt(pages, config, cwd, logger3) {
3280
+ const outputPath = path10.resolve(cwd, config.llmsTxt.outputPath);
3281
+ const outputDir = path10.dirname(outputPath);
3282
+ await fs8.mkdir(outputDir, { recursive: true });
3283
+ const content = generateLlmsTxt(pages, config);
3284
+ await fs8.writeFile(outputPath, content, "utf8");
3285
+ logger3.info(`Generated llms.txt at ${config.llmsTxt.outputPath}`);
3286
+ if (config.llmsTxt.generateFull) {
3287
+ const fullPath = outputPath.replace(/\.txt$/, "-full.txt");
3288
+ const fullContent = generateLlmsFullTxt(pages, config);
3289
+ await fs8.writeFile(fullPath, fullContent, "utf8");
3290
+ const relativeFull = path10.relative(cwd, fullPath);
3291
+ logger3.info(`Generated llms-full.txt at ${relativeFull}`);
3292
+ }
3293
+ }
3294
+
2351
3295
  // src/indexing/pipeline.ts
2352
3296
  function buildPageSummary(page, maxChars = 3500) {
2353
3297
  const parts = [page.title];
@@ -2365,26 +3309,44 @@ function buildPageSummary(page, maxChars = 3500) {
2365
3309
  if (joined.length <= maxChars) return joined;
2366
3310
  return joined.slice(0, maxChars).trim();
2367
3311
  }
3312
+ function buildPageContentHash(page) {
3313
+ const parts = [
3314
+ page.title,
3315
+ page.description ?? "",
3316
+ (page.keywords ?? []).slice().sort().join(","),
3317
+ page.tags.slice().sort().join(","),
3318
+ page.markdown,
3319
+ String(page.outgoingLinks),
3320
+ String(page.publishedAt ?? ""),
3321
+ page.incomingAnchorText ?? "",
3322
+ (page.outgoingLinkUrls ?? []).slice().sort().join(","),
3323
+ page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : ""
3324
+ ];
3325
+ return sha256(parts.join("|"));
3326
+ }
2368
3327
  var IndexPipeline = class _IndexPipeline {
2369
3328
  cwd;
2370
3329
  config;
2371
3330
  store;
2372
3331
  logger;
3332
+ hooks;
2373
3333
  constructor(options) {
2374
3334
  this.cwd = options.cwd;
2375
3335
  this.config = options.config;
2376
3336
  this.store = options.store;
2377
3337
  this.logger = options.logger;
3338
+ this.hooks = options.hooks;
2378
3339
  }
2379
3340
  static async create(options = {}) {
2380
- const cwd = path10.resolve(options.cwd ?? process.cwd());
3341
+ const cwd = path11.resolve(options.cwd ?? process.cwd());
2381
3342
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
2382
3343
  const store = options.store ?? await createUpstashStore(config);
2383
3344
  return new _IndexPipeline({
2384
3345
  cwd,
2385
3346
  config,
2386
3347
  store,
2387
- logger: options.logger ?? new Logger()
3348
+ logger: options.logger ?? new Logger(),
3349
+ hooks: options.hooks ?? {}
2388
3350
  });
2389
3351
  }
2390
3352
  getConfig() {
@@ -2405,7 +3367,7 @@ var IndexPipeline = class _IndexPipeline {
2405
3367
  const scope = resolveScope(this.config, options.scopeOverride);
2406
3368
  ensureStateDirs(this.cwd, this.config.state.dir, scope);
2407
3369
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
2408
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
3370
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-vector)`);
2409
3371
  if (options.force) {
2410
3372
  this.logger.info("Force mode enabled \u2014 full rebuild");
2411
3373
  }
@@ -2414,8 +3376,9 @@ var IndexPipeline = class _IndexPipeline {
2414
3376
  }
2415
3377
  const manifestStart = stageStart();
2416
3378
  const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
3379
+ const existingPageHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getPageHashes(scope);
2417
3380
  stageEnd("manifest", manifestStart);
2418
- this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
3381
+ this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes, ${existingPageHashes.size} existing page hashes loaded`);
2419
3382
  const sourceStart = stageStart();
2420
3383
  this.logger.info(`Loading pages (source: ${sourceMode})...`);
2421
3384
  let sourcePages;
@@ -2451,11 +3414,11 @@ var IndexPipeline = class _IndexPipeline {
2451
3414
  let robotsRules = null;
2452
3415
  if (sourceMode === "static-output") {
2453
3416
  robotsRules = await loadRobotsTxtFromDir(
2454
- path10.resolve(this.cwd, this.config.source.staticOutputDir)
3417
+ path11.resolve(this.cwd, this.config.source.staticOutputDir)
2455
3418
  );
2456
3419
  } else if (sourceMode === "build" && this.config.source.build) {
2457
3420
  robotsRules = await loadRobotsTxtFromDir(
2458
- path10.resolve(this.cwd, this.config.source.build.outputDir)
3421
+ path11.resolve(this.cwd, this.config.source.build.outputDir)
2459
3422
  );
2460
3423
  } else if (sourceMode === "crawl" && this.config.source.crawl) {
2461
3424
  robotsRules = await fetchRobotsTxt(this.config.source.crawl.baseUrl);
@@ -2492,11 +3455,61 @@ var IndexPipeline = class _IndexPipeline {
2492
3455
  );
2493
3456
  continue;
2494
3457
  }
2495
- extractedPages.push(extracted);
3458
+ if (sourcePage.tags && sourcePage.tags.length > 0) {
3459
+ extracted.tags = [.../* @__PURE__ */ new Set([...extracted.tags, ...sourcePage.tags])];
3460
+ }
3461
+ let accepted;
3462
+ if (this.hooks.transformPage) {
3463
+ const transformed = await this.hooks.transformPage(extracted);
3464
+ if (transformed === null) {
3465
+ this.logger.debug(`Page ${sourcePage.url} skipped by transformPage hook`);
3466
+ continue;
3467
+ }
3468
+ accepted = transformed;
3469
+ } else {
3470
+ accepted = extracted;
3471
+ }
3472
+ extractedPages.push(accepted);
2496
3473
  this.logger.event("page_extracted", {
2497
- url: extracted.url
3474
+ url: accepted.url
2498
3475
  });
2499
3476
  }
3477
+ const customRecords = options.customRecords ?? [];
3478
+ if (customRecords.length > 0) {
3479
+ this.logger.info(`Processing ${customRecords.length} custom record${customRecords.length === 1 ? "" : "s"}...`);
3480
+ for (const record of customRecords) {
3481
+ const normalizedUrl = normalizeUrlPath(record.url);
3482
+ const normalized = normalizeMarkdown(record.content);
3483
+ if (!normalized.trim()) {
3484
+ this.logger.warn(`Custom record ${normalizedUrl} has empty content and was skipped.`);
3485
+ continue;
3486
+ }
3487
+ const urlTags = normalizedUrl.split("/").filter(Boolean).slice(0, 1);
3488
+ const tags = record.tags ? [.../* @__PURE__ */ new Set([...urlTags, ...record.tags])] : urlTags;
3489
+ const extracted = {
3490
+ url: normalizedUrl,
3491
+ title: record.title,
3492
+ markdown: normalized,
3493
+ outgoingLinks: [],
3494
+ noindex: false,
3495
+ tags,
3496
+ weight: record.weight
3497
+ };
3498
+ let accepted;
3499
+ if (this.hooks.transformPage) {
3500
+ const transformed = await this.hooks.transformPage(extracted);
3501
+ if (transformed === null) {
3502
+ this.logger.debug(`Custom record ${normalizedUrl} skipped by transformPage hook`);
3503
+ continue;
3504
+ }
3505
+ accepted = transformed;
3506
+ } else {
3507
+ accepted = extracted;
3508
+ }
3509
+ extractedPages.push(accepted);
3510
+ this.logger.event("page_extracted", { url: accepted.url, custom: true });
3511
+ }
3512
+ }
2500
3513
  extractedPages.sort((a, b) => a.url.localeCompare(b.url));
2501
3514
  const uniquePages = [];
2502
3515
  const seenUrls = /* @__PURE__ */ new Set();
@@ -2529,15 +3542,28 @@ var IndexPipeline = class _IndexPipeline {
2529
3542
  const linkStart = stageStart();
2530
3543
  const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
2531
3544
  const incomingLinkCount = /* @__PURE__ */ new Map();
3545
+ const incomingAnchorTexts = /* @__PURE__ */ new Map();
2532
3546
  for (const page of indexablePages) {
2533
3547
  incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
2534
3548
  }
2535
3549
  for (const page of indexablePages) {
2536
- for (const outgoing of page.outgoingLinks) {
3550
+ const seenForCount = /* @__PURE__ */ new Set();
3551
+ const seenForAnchor = /* @__PURE__ */ new Set();
3552
+ for (const { url: outgoing, anchorText } of page.outgoingLinks) {
2537
3553
  if (!pageSet.has(outgoing)) {
2538
3554
  continue;
2539
3555
  }
2540
- incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
3556
+ if (!seenForCount.has(outgoing)) {
3557
+ seenForCount.add(outgoing);
3558
+ incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
3559
+ }
3560
+ if (anchorText && !seenForAnchor.has(outgoing)) {
3561
+ seenForAnchor.add(outgoing);
3562
+ if (!incomingAnchorTexts.has(outgoing)) {
3563
+ incomingAnchorTexts.set(outgoing, /* @__PURE__ */ new Set());
3564
+ }
3565
+ incomingAnchorTexts.get(outgoing).add(anchorText);
3566
+ }
2541
3567
  }
2542
3568
  }
2543
3569
  stageEnd("links", linkStart);
@@ -2556,6 +3582,15 @@ var IndexPipeline = class _IndexPipeline {
2556
3582
  });
2557
3583
  }
2558
3584
  }
3585
+ for (const record of customRecords) {
3586
+ const normalizedUrl = normalizeUrlPath(record.url);
3587
+ if (!precomputedRoutes.has(normalizedUrl)) {
3588
+ precomputedRoutes.set(normalizedUrl, {
3589
+ routeFile: "",
3590
+ routeResolution: "exact"
3591
+ });
3592
+ }
3593
+ }
2559
3594
  for (const page of indexablePages) {
2560
3595
  const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
2561
3596
  if (routeMatch.routeResolution === "best-effort") {
@@ -2573,6 +3608,17 @@ var IndexPipeline = class _IndexPipeline {
2573
3608
  } else {
2574
3609
  routeExact += 1;
2575
3610
  }
3611
+ const anchorSet = incomingAnchorTexts.get(page.url);
3612
+ let incomingAnchorText;
3613
+ if (anchorSet && anchorSet.size > 0) {
3614
+ let joined = "";
3615
+ for (const phrase of anchorSet) {
3616
+ const next = joined ? `${joined} ${phrase}` : phrase;
3617
+ if (next.length > 500) break;
3618
+ joined = next;
3619
+ }
3620
+ incomingAnchorText = joined || void 0;
3621
+ }
2576
3622
  const indexedPage = {
2577
3623
  url: page.url,
2578
3624
  title: page.title,
@@ -2582,40 +3628,113 @@ var IndexPipeline = class _IndexPipeline {
2582
3628
  generatedAt: nowIso(),
2583
3629
  incomingLinks: incomingLinkCount.get(page.url) ?? 0,
2584
3630
  outgoingLinks: page.outgoingLinks.length,
3631
+ outgoingLinkUrls: page.outgoingLinks.map((l) => typeof l === "string" ? l : l.url),
2585
3632
  depth: getUrlDepth(page.url),
2586
3633
  tags: page.tags,
2587
3634
  markdown: page.markdown,
2588
3635
  description: page.description,
2589
- keywords: page.keywords
3636
+ keywords: page.keywords,
3637
+ publishedAt: page.publishedAt,
3638
+ incomingAnchorText,
3639
+ meta: page.meta
2590
3640
  };
2591
3641
  pages.push(indexedPage);
2592
3642
  this.logger.event("page_indexed", { url: page.url });
2593
3643
  }
3644
+ const pageRecords = pages.map((p) => {
3645
+ const summary = buildPageSummary(p);
3646
+ return {
3647
+ url: p.url,
3648
+ title: p.title,
3649
+ markdown: p.markdown,
3650
+ projectId: scope.projectId,
3651
+ scopeName: scope.scopeName,
3652
+ routeFile: p.routeFile,
3653
+ routeResolution: p.routeResolution,
3654
+ incomingLinks: p.incomingLinks,
3655
+ outgoingLinks: p.outgoingLinks,
3656
+ outgoingLinkUrls: p.outgoingLinkUrls,
3657
+ depth: p.depth,
3658
+ tags: p.tags,
3659
+ indexedAt: p.generatedAt,
3660
+ summary,
3661
+ description: p.description,
3662
+ keywords: p.keywords,
3663
+ contentHash: buildPageContentHash(p),
3664
+ publishedAt: p.publishedAt,
3665
+ meta: p.meta
3666
+ };
3667
+ });
3668
+ const currentPageUrls = new Set(pageRecords.map((r) => r.url));
3669
+ const changedPages = pageRecords.filter(
3670
+ (r) => !existingPageHashes.has(r.url) || existingPageHashes.get(r.url) !== r.contentHash
3671
+ );
3672
+ const deletedPageUrls = [...existingPageHashes.keys()].filter((url) => !currentPageUrls.has(url));
2594
3673
  if (!options.dryRun) {
2595
- const pageRecords = pages.map((p) => {
2596
- const summary = buildPageSummary(p);
2597
- return {
2598
- url: p.url,
2599
- title: p.title,
2600
- markdown: p.markdown,
2601
- projectId: scope.projectId,
2602
- scopeName: scope.scopeName,
2603
- routeFile: p.routeFile,
2604
- routeResolution: p.routeResolution,
2605
- incomingLinks: p.incomingLinks,
2606
- outgoingLinks: p.outgoingLinks,
2607
- depth: p.depth,
2608
- tags: p.tags,
2609
- indexedAt: p.generatedAt,
2610
- summary,
2611
- description: p.description,
2612
- keywords: p.keywords
2613
- };
2614
- });
2615
- await this.store.deletePages(scope);
2616
- await this.store.upsertPages(pageRecords, scope);
3674
+ if (options.force) {
3675
+ await this.store.deletePages(scope);
3676
+ this.logger.info(`Upserting ${pageRecords.length} page summaries...`);
3677
+ const pageDocs = pageRecords.map((r) => ({
3678
+ id: r.url,
3679
+ data: r.summary ?? r.title,
3680
+ metadata: {
3681
+ title: r.title,
3682
+ url: r.url,
3683
+ description: r.description ?? "",
3684
+ keywords: r.keywords ?? [],
3685
+ summary: r.summary ?? "",
3686
+ tags: r.tags,
3687
+ markdown: r.markdown,
3688
+ routeFile: r.routeFile,
3689
+ routeResolution: r.routeResolution,
3690
+ incomingLinks: r.incomingLinks,
3691
+ outgoingLinks: r.outgoingLinks,
3692
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
3693
+ depth: r.depth,
3694
+ indexedAt: r.indexedAt,
3695
+ contentHash: r.contentHash ?? "",
3696
+ publishedAt: r.publishedAt ?? null,
3697
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
3698
+ }
3699
+ }));
3700
+ await this.store.upsertPages(pageDocs, scope);
3701
+ } else {
3702
+ if (changedPages.length > 0) {
3703
+ this.logger.info(`Upserting ${changedPages.length} changed page summaries...`);
3704
+ const pageDocs = changedPages.map((r) => ({
3705
+ id: r.url,
3706
+ data: r.summary ?? r.title,
3707
+ metadata: {
3708
+ title: r.title,
3709
+ url: r.url,
3710
+ description: r.description ?? "",
3711
+ keywords: r.keywords ?? [],
3712
+ summary: r.summary ?? "",
3713
+ tags: r.tags,
3714
+ markdown: r.markdown,
3715
+ routeFile: r.routeFile,
3716
+ routeResolution: r.routeResolution,
3717
+ incomingLinks: r.incomingLinks,
3718
+ outgoingLinks: r.outgoingLinks,
3719
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
3720
+ depth: r.depth,
3721
+ indexedAt: r.indexedAt,
3722
+ contentHash: r.contentHash ?? "",
3723
+ publishedAt: r.publishedAt ?? null,
3724
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
3725
+ }
3726
+ }));
3727
+ await this.store.upsertPages(pageDocs, scope);
3728
+ }
3729
+ if (deletedPageUrls.length > 0) {
3730
+ await this.store.deletePagesByIds(deletedPageUrls, scope);
3731
+ }
3732
+ }
2617
3733
  }
3734
+ const pagesChanged = options.force ? pageRecords.length : changedPages.length;
3735
+ const pagesDeleted = deletedPageUrls.length;
2618
3736
  stageEnd("pages", pagesStart);
3737
+ this.logger.info(`Page changes: ${pagesChanged} changed/new, ${pagesDeleted} deleted, ${pageRecords.length - changedPages.length} unchanged`);
2619
3738
  this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
2620
3739
  const chunkStart = stageStart();
2621
3740
  this.logger.info("Chunking pages...");
@@ -2624,6 +3743,18 @@ var IndexPipeline = class _IndexPipeline {
2624
3743
  if (typeof maxChunks === "number") {
2625
3744
  chunks = chunks.slice(0, maxChunks);
2626
3745
  }
3746
+ if (this.hooks.transformChunk) {
3747
+ const transformed = [];
3748
+ for (const chunk of chunks) {
3749
+ const result = await this.hooks.transformChunk(chunk);
3750
+ if (result === null) {
3751
+ this.logger.debug(`Chunk ${chunk.chunkKey} skipped by transformChunk hook`);
3752
+ continue;
3753
+ }
3754
+ transformed.push(result);
3755
+ }
3756
+ chunks = transformed;
3757
+ }
2627
3758
  for (const chunk of chunks) {
2628
3759
  this.logger.event("chunked", {
2629
3760
  url: chunk.url,
@@ -2636,7 +3767,7 @@ var IndexPipeline = class _IndexPipeline {
2636
3767
  for (const chunk of chunks) {
2637
3768
  currentChunkMap.set(chunk.chunkKey, chunk);
2638
3769
  }
2639
- const changedChunks = chunks.filter((chunk) => {
3770
+ let changedChunks = chunks.filter((chunk) => {
2640
3771
  if (options.force) {
2641
3772
  return true;
2642
3773
  }
@@ -2650,38 +3781,43 @@ var IndexPipeline = class _IndexPipeline {
2650
3781
  return existingHash !== chunk.contentHash;
2651
3782
  });
2652
3783
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
3784
+ if (this.hooks.beforeIndex) {
3785
+ changedChunks = await this.hooks.beforeIndex(changedChunks);
3786
+ }
2653
3787
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
2654
3788
  const upsertStart = stageStart();
2655
3789
  let documentsUpserted = 0;
2656
3790
  if (!options.dryRun && changedChunks.length > 0) {
2657
- this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
2658
- const UPSTASH_CONTENT_LIMIT = 4096;
2659
- const FIELD_OVERHEAD = 200;
2660
- const MAX_TEXT_CHARS = UPSTASH_CONTENT_LIMIT - FIELD_OVERHEAD;
3791
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
2661
3792
  const docs = changedChunks.map((chunk) => {
2662
- const title = chunk.title;
2663
- const sectionTitle = chunk.sectionTitle ?? "";
2664
- const url = chunk.url;
2665
- const tags = chunk.tags.join(",");
2666
- const headingPath = chunk.headingPath.join(" > ");
2667
- const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
2668
- const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
2669
- const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
3793
+ const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
3794
+ if (embeddingText.length > 2e3) {
3795
+ this.logger.warn(
3796
+ `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
3797
+ );
3798
+ }
2670
3799
  return {
2671
3800
  id: chunk.chunkKey,
2672
- content: { title, sectionTitle, text, url, tags, headingPath },
3801
+ data: embeddingText,
2673
3802
  metadata: {
2674
- projectId: scope.projectId,
2675
- scopeName: scope.scopeName,
3803
+ url: chunk.url,
2676
3804
  path: chunk.path,
3805
+ title: chunk.title,
3806
+ sectionTitle: chunk.sectionTitle ?? "",
3807
+ headingPath: chunk.headingPath.join(" > "),
2677
3808
  snippet: chunk.snippet,
3809
+ chunkText: embeddingText,
3810
+ tags: chunk.tags,
2678
3811
  ordinal: chunk.ordinal,
2679
3812
  contentHash: chunk.contentHash,
2680
3813
  depth: chunk.depth,
2681
3814
  incomingLinks: chunk.incomingLinks,
2682
3815
  routeFile: chunk.routeFile,
2683
3816
  description: chunk.description ?? "",
2684
- keywords: (chunk.keywords ?? []).join(",")
3817
+ keywords: chunk.keywords ?? [],
3818
+ publishedAt: chunk.publishedAt ?? null,
3819
+ incomingAnchorText: chunk.incomingAnchorText ?? "",
3820
+ ...chunk.meta && Object.keys(chunk.meta).length > 0 ? { meta: chunk.meta } : {}
2685
3821
  }
2686
3822
  };
2687
3823
  });
@@ -2699,9 +3835,16 @@ var IndexPipeline = class _IndexPipeline {
2699
3835
  } else {
2700
3836
  this.logger.info("No chunks to upsert \u2014 all up to date");
2701
3837
  }
3838
+ if (this.config.llmsTxt.enable && !options.dryRun) {
3839
+ const llmsStart = stageStart();
3840
+ await writeLlmsTxt(pages, this.config, this.cwd, this.logger);
3841
+ stageEnd("llms_txt", llmsStart);
3842
+ }
2702
3843
  this.logger.info("Done.");
2703
- return {
3844
+ const stats = {
2704
3845
  pagesProcessed: pages.length,
3846
+ pagesChanged,
3847
+ pagesDeleted,
2705
3848
  chunksTotal: chunks.length,
2706
3849
  chunksChanged: changedChunks.length,
2707
3850
  documentsUpserted,
@@ -2710,10 +3853,15 @@ var IndexPipeline = class _IndexPipeline {
2710
3853
  routeBestEffort,
2711
3854
  stageTimingsMs
2712
3855
  };
3856
+ if (this.hooks.afterIndex) {
3857
+ await this.hooks.afterIndex(stats);
3858
+ }
3859
+ return stats;
2713
3860
  }
2714
3861
  };
2715
3862
 
2716
3863
  // src/mcp/server.ts
3864
+ import { createHash as createHash2, timingSafeEqual } from "crypto";
2717
3865
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2718
3866
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
2719
3867
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
@@ -2721,16 +3869,139 @@ import { createMcpExpressApp } from "@modelcontextprotocol/sdk/server/express.js
2721
3869
  import { z as z3 } from "zod";
2722
3870
 
2723
3871
  // src/search/engine.ts
2724
- import path11 from "path";
3872
+ import path12 from "path";
2725
3873
  import { z as z2 } from "zod";
3874
+
3875
+ // src/search/related-pages.ts
3876
+ function diceScore(urlA, urlB) {
3877
+ const segmentsA = urlA.split("/").filter(Boolean);
3878
+ const segmentsB = urlB.split("/").filter(Boolean);
3879
+ if (segmentsA.length === 0 && segmentsB.length === 0) return 1;
3880
+ if (segmentsA.length === 0 || segmentsB.length === 0) return 0;
3881
+ let shared = 0;
3882
+ const minLen = Math.min(segmentsA.length, segmentsB.length);
3883
+ for (let i = 0; i < minLen; i++) {
3884
+ if (segmentsA[i] === segmentsB[i]) {
3885
+ shared++;
3886
+ } else {
3887
+ break;
3888
+ }
3889
+ }
3890
+ return 2 * shared / (segmentsA.length + segmentsB.length);
3891
+ }
3892
+ function compositeScore(isLinked, dice, semantic) {
3893
+ return (isLinked ? 0.5 : 0) + 0.3 * dice + 0.2 * semantic;
3894
+ }
3895
+ function dominantRelationshipType(isOutgoing, isIncoming, dice) {
3896
+ if (isOutgoing) return "outgoing_link";
3897
+ if (isIncoming) return "incoming_link";
3898
+ if (dice > 0.4) return "sibling";
3899
+ return "semantic";
3900
+ }
3901
+
3902
+ // src/search/engine.ts
3903
+ var rankingOverridesSchema = z2.object({
3904
+ ranking: z2.object({
3905
+ enableIncomingLinkBoost: z2.boolean().optional(),
3906
+ enableDepthBoost: z2.boolean().optional(),
3907
+ aggregationCap: z2.number().int().positive().optional(),
3908
+ aggregationDecay: z2.number().min(0).max(1).optional(),
3909
+ minChunkScoreRatio: z2.number().min(0).max(1).optional(),
3910
+ minScoreRatio: z2.number().min(0).max(1).optional(),
3911
+ scoreGapThreshold: z2.number().min(0).max(1).optional(),
3912
+ weights: z2.object({
3913
+ incomingLinks: z2.number().optional(),
3914
+ depth: z2.number().optional(),
3915
+ aggregation: z2.number().optional(),
3916
+ titleMatch: z2.number().optional()
3917
+ }).optional()
3918
+ }).optional(),
3919
+ search: z2.object({
3920
+ pageSearchWeight: z2.number().min(0).max(1).optional()
3921
+ }).optional()
3922
+ }).optional();
2726
3923
  var requestSchema = z2.object({
2727
3924
  q: z2.string().trim().min(1),
2728
3925
  topK: z2.number().int().positive().max(100).optional(),
2729
3926
  scope: z2.string().optional(),
2730
3927
  pathPrefix: z2.string().optional(),
2731
3928
  tags: z2.array(z2.string()).optional(),
2732
- groupBy: z2.enum(["page", "chunk"]).optional()
3929
+ filters: z2.record(z2.string(), z2.union([z2.string(), z2.number(), z2.boolean()])).optional(),
3930
+ groupBy: z2.enum(["page", "chunk"]).optional(),
3931
+ maxSubResults: z2.number().int().positive().max(20).optional(),
3932
+ debug: z2.boolean().optional(),
3933
+ rankingOverrides: rankingOverridesSchema
2733
3934
  });
3935
+ var MAX_SITE_STRUCTURE_PAGES = 2e3;
3936
+ function makeNode(url, depth) {
3937
+ return { url, title: "", depth, routeFile: "", isIndexed: false, childCount: 0, children: [] };
3938
+ }
3939
+ function buildTree(pages, pathPrefix) {
3940
+ const nodeMap = /* @__PURE__ */ new Map();
3941
+ const root = makeNode("/", 0);
3942
+ nodeMap.set("/", root);
3943
+ for (const page of pages) {
3944
+ const normalized = normalizeUrlPath(page.url);
3945
+ const segments = normalized.split("/").filter(Boolean);
3946
+ if (segments.length === 0) {
3947
+ root.title = page.title;
3948
+ root.routeFile = page.routeFile;
3949
+ root.isIndexed = true;
3950
+ continue;
3951
+ }
3952
+ for (let i = 1; i <= segments.length; i++) {
3953
+ const partialUrl = "/" + segments.slice(0, i).join("/");
3954
+ if (!nodeMap.has(partialUrl)) {
3955
+ nodeMap.set(partialUrl, makeNode(partialUrl, i));
3956
+ }
3957
+ }
3958
+ const node = nodeMap.get(normalized);
3959
+ node.title = page.title;
3960
+ node.routeFile = page.routeFile;
3961
+ node.isIndexed = true;
3962
+ }
3963
+ for (const [url, node] of nodeMap) {
3964
+ if (url === "/") continue;
3965
+ const segments = url.split("/").filter(Boolean);
3966
+ const parentUrl = segments.length === 1 ? "/" : "/" + segments.slice(0, -1).join("/");
3967
+ const parent = nodeMap.get(parentUrl) ?? root;
3968
+ parent.children.push(node);
3969
+ }
3970
+ const sortAndCount = (node) => {
3971
+ node.children.sort((a, b) => a.url.localeCompare(b.url));
3972
+ node.childCount = node.children.length;
3973
+ for (const child of node.children) {
3974
+ sortAndCount(child);
3975
+ }
3976
+ };
3977
+ sortAndCount(root);
3978
+ if (pathPrefix) {
3979
+ const normalizedPrefix = normalizeUrlPath(pathPrefix);
3980
+ const subtreeRoot = nodeMap.get(normalizedPrefix);
3981
+ if (subtreeRoot) {
3982
+ return subtreeRoot;
3983
+ }
3984
+ return makeNode(normalizedPrefix, normalizedPrefix.split("/").filter(Boolean).length);
3985
+ }
3986
+ return root;
3987
+ }
3988
+ function mergeRankingOverrides(base, overrides) {
3989
+ return {
3990
+ ...base,
3991
+ search: {
3992
+ ...base.search,
3993
+ ...overrides.search
3994
+ },
3995
+ ranking: {
3996
+ ...base.ranking,
3997
+ ...overrides.ranking,
3998
+ weights: {
3999
+ ...base.ranking.weights,
4000
+ ...overrides.ranking?.weights
4001
+ }
4002
+ }
4003
+ };
4004
+ }
2734
4005
  var SearchEngine = class _SearchEngine {
2735
4006
  cwd;
2736
4007
  config;
@@ -2741,7 +4012,7 @@ var SearchEngine = class _SearchEngine {
2741
4012
  this.store = options.store;
2742
4013
  }
2743
4014
  static async create(options = {}) {
2744
- const cwd = path11.resolve(options.cwd ?? process.cwd());
4015
+ const cwd = path12.resolve(options.cwd ?? process.cwd());
2745
4016
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
2746
4017
  const store = options.store ?? await createUpstashStore(config);
2747
4018
  return new _SearchEngine({
@@ -2760,125 +4031,203 @@ var SearchEngine = class _SearchEngine {
2760
4031
  }
2761
4032
  const input = parsed.data;
2762
4033
  const totalStart = process.hrtime.bigint();
4034
+ const effectiveConfig = input.debug && input.rankingOverrides ? mergeRankingOverrides(this.config, input.rankingOverrides) : this.config;
2763
4035
  const resolvedScope = resolveScope(this.config, input.scope);
2764
4036
  const topK = input.topK ?? 10;
4037
+ const maxSubResults = input.maxSubResults ?? 5;
2765
4038
  const groupByPage = (input.groupBy ?? "page") === "page";
2766
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
2767
- const filterParts = [];
2768
- if (input.pathPrefix) {
2769
- const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
2770
- filterParts.push(`url GLOB '${prefix}*'`);
2771
- }
2772
- if (input.tags && input.tags.length > 0) {
2773
- for (const tag of input.tags) {
2774
- filterParts.push(`tags GLOB '*${tag}*'`);
4039
+ const queryText = input.q;
4040
+ const pathPrefix = input.pathPrefix ? input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}` : void 0;
4041
+ const filterTags = input.tags && input.tags.length > 0 ? input.tags : void 0;
4042
+ const metaFilterStr = input.filters && Object.keys(input.filters).length > 0 ? buildMetaFilterString(input.filters) : "";
4043
+ const metaFilter = metaFilterStr || void 0;
4044
+ const applyPagePostFilters = (hits) => {
4045
+ let filtered = hits;
4046
+ if (pathPrefix) {
4047
+ filtered = filtered.filter((h) => h.url.startsWith(pathPrefix));
2775
4048
  }
2776
- }
2777
- const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
2778
- const useDualSearch = this.config.search.dualSearch && groupByPage;
4049
+ if (filterTags) {
4050
+ filtered = filtered.filter(
4051
+ (h) => filterTags.every((tag) => h.tags.includes(tag))
4052
+ );
4053
+ }
4054
+ return filtered;
4055
+ };
4056
+ const applyChunkPostFilters = (hits) => {
4057
+ let filtered = hits;
4058
+ if (filterTags) {
4059
+ filtered = filtered.filter(
4060
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
4061
+ );
4062
+ }
4063
+ return filtered;
4064
+ };
2779
4065
  const searchStart = process.hrtime.bigint();
2780
- let ranked;
2781
- if (useDualSearch) {
2782
- const chunkLimit = Math.max(topK * 10, 100);
2783
- const pageLimit = 20;
2784
- const [pageHits, chunkHits] = await Promise.all([
2785
- this.store.searchPages(
2786
- input.q,
2787
- {
2788
- limit: pageLimit,
2789
- semanticWeight: this.config.search.semanticWeight,
2790
- inputEnrichment: this.config.search.inputEnrichment,
2791
- filter
2792
- },
2793
- resolvedScope
2794
- ),
2795
- this.store.search(
2796
- input.q,
2797
- {
2798
- limit: chunkLimit,
2799
- semanticWeight: this.config.search.semanticWeight,
2800
- inputEnrichment: this.config.search.inputEnrichment,
2801
- reranking: false,
2802
- filter
2803
- },
4066
+ if (groupByPage) {
4067
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
4068
+ const pageLimit = Math.max(topK * 2, 20);
4069
+ const pageHits = await this.store.searchPagesByText(
4070
+ queryText,
4071
+ { limit: pageLimit * fetchMultiplier, filter: metaFilter },
4072
+ resolvedScope
4073
+ );
4074
+ const filteredPages = applyPagePostFilters(pageHits);
4075
+ let rankedPages = rankPageHits(filteredPages, effectiveConfig, input.q, input.debug);
4076
+ rankedPages = trimPagesByScoreGap(rankedPages, effectiveConfig);
4077
+ const topPages = rankedPages.slice(0, topK);
4078
+ const chunkPromises = topPages.map(
4079
+ (page) => this.store.searchChunksByUrl(
4080
+ queryText,
4081
+ page.url,
4082
+ { limit: maxSubResults, filter: metaFilter },
2804
4083
  resolvedScope
2805
- )
2806
- ]);
2807
- const rankedChunks = rankHits(chunkHits, this.config, input.q);
2808
- ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
4084
+ ).then((chunks) => applyChunkPostFilters(chunks))
4085
+ );
4086
+ const allChunks = await Promise.all(chunkPromises);
4087
+ const searchMs = hrTimeMs(searchStart);
4088
+ const results = this.buildPageFirstResults(topPages, allChunks, input.q, input.debug, maxSubResults);
4089
+ return {
4090
+ q: input.q,
4091
+ scope: resolvedScope.scopeName,
4092
+ results,
4093
+ meta: {
4094
+ timingsMs: {
4095
+ search: Math.round(searchMs),
4096
+ total: Math.round(hrTimeMs(totalStart))
4097
+ }
4098
+ }
4099
+ };
2809
4100
  } else {
4101
+ const candidateK = Math.max(50, topK);
4102
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
2810
4103
  const hits = await this.store.search(
2811
- input.q,
2812
- {
2813
- limit: candidateK,
2814
- semanticWeight: this.config.search.semanticWeight,
2815
- inputEnrichment: this.config.search.inputEnrichment,
2816
- reranking: this.config.search.reranking,
2817
- filter
2818
- },
4104
+ queryText,
4105
+ { limit: candidateK * fetchMultiplier, filter: metaFilter },
2819
4106
  resolvedScope
2820
4107
  );
2821
- ranked = rankHits(hits, this.config, input.q);
2822
- }
2823
- const searchMs = hrTimeMs(searchStart);
2824
- const results = this.buildResults(ranked, topK, groupByPage, input.q);
2825
- return {
2826
- q: input.q,
2827
- scope: resolvedScope.scopeName,
2828
- results,
2829
- meta: {
2830
- timingsMs: {
2831
- search: Math.round(searchMs),
2832
- total: Math.round(hrTimeMs(totalStart))
4108
+ let filtered = hits;
4109
+ if (pathPrefix) {
4110
+ filtered = filtered.filter((h) => h.metadata.url.startsWith(pathPrefix));
4111
+ }
4112
+ if (filterTags) {
4113
+ filtered = filtered.filter(
4114
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
4115
+ );
4116
+ }
4117
+ const ranked = rankHits(filtered, effectiveConfig, input.q, input.debug);
4118
+ const searchMs = hrTimeMs(searchStart);
4119
+ const results = this.buildResults(ranked, topK, false, maxSubResults, input.q, input.debug, effectiveConfig);
4120
+ return {
4121
+ q: input.q,
4122
+ scope: resolvedScope.scopeName,
4123
+ results,
4124
+ meta: {
4125
+ timingsMs: {
4126
+ search: Math.round(searchMs),
4127
+ total: Math.round(hrTimeMs(totalStart))
4128
+ }
2833
4129
  }
4130
+ };
4131
+ }
4132
+ }
4133
+ buildPageFirstResults(rankedPages, allChunks, query, debug, maxSubResults = 5) {
4134
+ return rankedPages.map((page, i) => {
4135
+ const chunks = allChunks[i] ?? [];
4136
+ const bestChunk = chunks[0];
4137
+ const snippet = bestChunk ? query ? queryAwareExcerpt(bestChunk.metadata.chunkText, query) : toSnippet(bestChunk.metadata.chunkText) : page.description || page.title;
4138
+ const result = {
4139
+ url: page.url,
4140
+ title: page.title,
4141
+ sectionTitle: bestChunk?.metadata.sectionTitle || void 0,
4142
+ snippet,
4143
+ chunkText: bestChunk?.metadata.chunkText || void 0,
4144
+ score: Number(page.finalScore.toFixed(6)),
4145
+ routeFile: page.routeFile,
4146
+ chunks: chunks.length > 0 ? chunks.slice(0, maxSubResults).map((c) => ({
4147
+ sectionTitle: c.metadata.sectionTitle || void 0,
4148
+ snippet: query ? queryAwareExcerpt(c.metadata.chunkText, query) : toSnippet(c.metadata.chunkText),
4149
+ chunkText: c.metadata.chunkText || void 0,
4150
+ headingPath: c.metadata.headingPath,
4151
+ score: Number(c.score.toFixed(6))
4152
+ })) : void 0
4153
+ };
4154
+ if (debug && page.breakdown) {
4155
+ result.breakdown = {
4156
+ baseScore: page.breakdown.baseScore,
4157
+ incomingLinkBoost: page.breakdown.incomingLinkBoost,
4158
+ depthBoost: page.breakdown.depthBoost,
4159
+ titleMatchBoost: page.breakdown.titleMatchBoost,
4160
+ freshnessBoost: page.breakdown.freshnessBoost,
4161
+ anchorTextMatchBoost: 0
4162
+ };
2834
4163
  }
2835
- };
4164
+ return result;
4165
+ });
2836
4166
  }
2837
- ensureSnippet(hit) {
4167
+ ensureSnippet(hit, query) {
4168
+ const chunkText = hit.hit.metadata.chunkText;
4169
+ if (query && chunkText) return queryAwareExcerpt(chunkText, query);
2838
4170
  const snippet = hit.hit.metadata.snippet;
2839
4171
  if (snippet && snippet.length >= 30) return snippet;
2840
- const chunkText = hit.hit.metadata.chunkText;
2841
4172
  if (chunkText) return toSnippet(chunkText);
2842
4173
  return snippet || "";
2843
4174
  }
2844
- buildResults(ordered, topK, groupByPage, _query) {
4175
+ buildResults(ordered, topK, groupByPage, maxSubResults, query, debug, config) {
4176
+ const cfg = config ?? this.config;
2845
4177
  if (groupByPage) {
2846
- let pages = aggregateByPage(ordered, this.config);
2847
- pages = trimByScoreGap(pages, this.config);
2848
- const minRatio = this.config.ranking.minChunkScoreRatio;
4178
+ let pages = aggregateByPage(ordered, cfg);
4179
+ pages = trimByScoreGap(pages, cfg);
4180
+ const minRatio = cfg.ranking.minChunkScoreRatio;
2849
4181
  return pages.slice(0, topK).map((page) => {
2850
4182
  const bestScore = page.bestChunk.finalScore;
2851
4183
  const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
2852
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
2853
- return {
4184
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, maxSubResults);
4185
+ const result = {
2854
4186
  url: page.url,
2855
4187
  title: page.title,
2856
4188
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
2857
- snippet: this.ensureSnippet(page.bestChunk),
4189
+ snippet: this.ensureSnippet(page.bestChunk, query),
4190
+ chunkText: page.bestChunk.hit.metadata.chunkText || void 0,
2858
4191
  score: Number(page.pageScore.toFixed(6)),
2859
4192
  routeFile: page.routeFile,
2860
- chunks: meaningful.length > 1 ? meaningful.map((c) => ({
4193
+ chunks: meaningful.length >= 1 ? meaningful.map((c) => ({
2861
4194
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
2862
- snippet: this.ensureSnippet(c),
4195
+ snippet: this.ensureSnippet(c, query),
4196
+ chunkText: c.hit.metadata.chunkText || void 0,
2863
4197
  headingPath: c.hit.metadata.headingPath,
2864
4198
  score: Number(c.finalScore.toFixed(6))
2865
4199
  })) : void 0
2866
4200
  };
4201
+ if (debug && page.bestChunk.breakdown) {
4202
+ result.breakdown = page.bestChunk.breakdown;
4203
+ }
4204
+ return result;
2867
4205
  });
2868
4206
  } else {
2869
4207
  let filtered = ordered;
2870
- const minScore = this.config.ranking.minScore;
2871
- if (minScore > 0) {
2872
- filtered = ordered.filter((entry) => entry.finalScore >= minScore);
2873
- }
2874
- return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
2875
- url: hit.metadata.url,
2876
- title: hit.metadata.title,
2877
- sectionTitle: hit.metadata.sectionTitle || void 0,
2878
- snippet: this.ensureSnippet({ hit, finalScore }),
2879
- score: Number(finalScore.toFixed(6)),
2880
- routeFile: hit.metadata.routeFile
2881
- }));
4208
+ const minScoreRatio = cfg.ranking.minScoreRatio;
4209
+ if (minScoreRatio > 0 && ordered.length > 0) {
4210
+ const topScore = ordered[0].finalScore;
4211
+ if (Number.isFinite(topScore) && topScore > 0) {
4212
+ const threshold = topScore * minScoreRatio;
4213
+ filtered = ordered.filter((entry) => entry.finalScore >= threshold);
4214
+ }
4215
+ }
4216
+ return filtered.slice(0, topK).map(({ hit, finalScore, breakdown }) => {
4217
+ const result = {
4218
+ url: hit.metadata.url,
4219
+ title: hit.metadata.title,
4220
+ sectionTitle: hit.metadata.sectionTitle || void 0,
4221
+ snippet: this.ensureSnippet({ hit, finalScore }, query),
4222
+ chunkText: hit.metadata.chunkText || void 0,
4223
+ score: Number(finalScore.toFixed(6)),
4224
+ routeFile: hit.metadata.routeFile
4225
+ };
4226
+ if (debug && breakdown) {
4227
+ result.breakdown = breakdown;
4228
+ }
4229
+ return result;
4230
+ });
2882
4231
  }
2883
4232
  }
2884
4233
  async getPage(pathOrUrl, scope) {
@@ -2904,6 +4253,116 @@ var SearchEngine = class _SearchEngine {
2904
4253
  markdown: page.markdown
2905
4254
  };
2906
4255
  }
4256
+ async listPages(opts) {
4257
+ const resolvedScope = resolveScope(this.config, opts?.scope);
4258
+ const pathPrefix = opts?.pathPrefix ? opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}` : void 0;
4259
+ return this.store.listPages(resolvedScope, {
4260
+ cursor: opts?.cursor,
4261
+ limit: opts?.limit,
4262
+ pathPrefix
4263
+ });
4264
+ }
4265
+ async getSiteStructure(opts) {
4266
+ const maxPages = Math.min(opts?.maxPages ?? MAX_SITE_STRUCTURE_PAGES, MAX_SITE_STRUCTURE_PAGES);
4267
+ const allPages = [];
4268
+ let cursor;
4269
+ let truncated = false;
4270
+ do {
4271
+ const result = await this.listPages({
4272
+ pathPrefix: opts?.pathPrefix,
4273
+ scope: opts?.scope,
4274
+ cursor,
4275
+ limit: 200
4276
+ });
4277
+ allPages.push(...result.pages);
4278
+ cursor = result.nextCursor;
4279
+ if (allPages.length >= maxPages) {
4280
+ truncated = allPages.length > maxPages || !!cursor;
4281
+ allPages.length = maxPages;
4282
+ break;
4283
+ }
4284
+ } while (cursor);
4285
+ const root = buildTree(allPages, opts?.pathPrefix);
4286
+ return {
4287
+ root,
4288
+ totalPages: allPages.length,
4289
+ truncated
4290
+ };
4291
+ }
4292
+ async getRelatedPages(pathOrUrl, opts) {
4293
+ const resolvedScope = resolveScope(this.config, opts?.scope);
4294
+ const urlPath = this.resolveInputPath(pathOrUrl);
4295
+ const topK = Math.min(opts?.topK ?? 10, 25);
4296
+ const source = await this.store.fetchPageWithVector(urlPath, resolvedScope);
4297
+ if (!source) {
4298
+ throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
4299
+ }
4300
+ const sourceOutgoing = new Set(source.metadata.outgoingLinkUrls ?? []);
4301
+ const semanticHits = await this.store.searchPagesByVector(
4302
+ source.vector,
4303
+ { limit: 50 },
4304
+ resolvedScope
4305
+ );
4306
+ const filteredHits = semanticHits.filter((h) => h.url !== urlPath);
4307
+ const semanticScoreMap = /* @__PURE__ */ new Map();
4308
+ for (const hit of filteredHits) {
4309
+ semanticScoreMap.set(hit.url, hit.score);
4310
+ }
4311
+ const candidateUrls = /* @__PURE__ */ new Set();
4312
+ for (const hit of filteredHits) {
4313
+ candidateUrls.add(hit.url);
4314
+ }
4315
+ for (const url of sourceOutgoing) {
4316
+ if (url !== urlPath) candidateUrls.add(url);
4317
+ }
4318
+ const missingUrls = [...sourceOutgoing].filter(
4319
+ (u) => u !== urlPath && !semanticScoreMap.has(u)
4320
+ );
4321
+ const fetchedPages = missingUrls.length > 0 ? await this.store.fetchPagesBatch(missingUrls, resolvedScope) : [];
4322
+ const metaMap = /* @__PURE__ */ new Map();
4323
+ for (const hit of filteredHits) {
4324
+ metaMap.set(hit.url, { title: hit.title, routeFile: hit.routeFile, outgoingLinkUrls: [] });
4325
+ }
4326
+ for (const p of fetchedPages) {
4327
+ metaMap.set(p.url, { title: p.title, routeFile: p.routeFile, outgoingLinkUrls: p.outgoingLinkUrls });
4328
+ }
4329
+ const semanticUrls = filteredHits.map((h) => h.url);
4330
+ if (semanticUrls.length > 0) {
4331
+ const semanticPageData = await this.store.fetchPagesBatch(semanticUrls, resolvedScope);
4332
+ for (const p of semanticPageData) {
4333
+ const existing = metaMap.get(p.url);
4334
+ if (existing) {
4335
+ existing.outgoingLinkUrls = p.outgoingLinkUrls;
4336
+ }
4337
+ }
4338
+ }
4339
+ const candidates = [];
4340
+ for (const url of candidateUrls) {
4341
+ const meta = metaMap.get(url);
4342
+ if (!meta) continue;
4343
+ const isOutgoing = sourceOutgoing.has(url);
4344
+ const isIncoming = meta.outgoingLinkUrls.includes(urlPath);
4345
+ const isLinked = isOutgoing || isIncoming;
4346
+ const dice = diceScore(urlPath, url);
4347
+ const semantic = semanticScoreMap.get(url) ?? 0;
4348
+ const score = compositeScore(isLinked, dice, semantic);
4349
+ const relationshipType = dominantRelationshipType(isOutgoing, isIncoming, dice);
4350
+ candidates.push({
4351
+ url,
4352
+ title: meta.title,
4353
+ score: Number(score.toFixed(6)),
4354
+ relationshipType,
4355
+ routeFile: meta.routeFile
4356
+ });
4357
+ }
4358
+ candidates.sort((a, b) => b.score - a.score);
4359
+ const results = candidates.slice(0, topK);
4360
+ return {
4361
+ sourceUrl: urlPath,
4362
+ scope: resolvedScope.scopeName,
4363
+ relatedPages: results
4364
+ };
4365
+ }
2907
4366
  async health() {
2908
4367
  return this.store.health();
2909
4368
  }
@@ -2928,14 +4387,40 @@ function createServer(engine) {
2928
4387
  server.registerTool(
2929
4388
  "search",
2930
4389
  {
2931
- description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
4390
+ description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
2932
4391
  inputSchema: {
2933
4392
  query: z3.string().min(1),
2934
4393
  scope: z3.string().optional(),
2935
4394
  topK: z3.number().int().positive().max(100).optional(),
2936
4395
  pathPrefix: z3.string().optional(),
2937
4396
  tags: z3.array(z3.string()).optional(),
2938
- groupBy: z3.enum(["page", "chunk"]).optional()
4397
+ filters: z3.record(z3.string(), z3.union([z3.string(), z3.number(), z3.boolean()])).optional(),
4398
+ groupBy: z3.enum(["page", "chunk"]).optional(),
4399
+ maxSubResults: z3.number().int().positive().max(20).optional()
4400
+ },
4401
+ outputSchema: {
4402
+ q: z3.string(),
4403
+ scope: z3.string(),
4404
+ results: z3.array(z3.object({
4405
+ url: z3.string(),
4406
+ title: z3.string(),
4407
+ sectionTitle: z3.string().optional(),
4408
+ snippet: z3.string(),
4409
+ score: z3.number(),
4410
+ routeFile: z3.string(),
4411
+ chunks: z3.array(z3.object({
4412
+ sectionTitle: z3.string().optional(),
4413
+ snippet: z3.string(),
4414
+ headingPath: z3.array(z3.string()),
4415
+ score: z3.number()
4416
+ })).optional()
4417
+ })),
4418
+ meta: z3.object({
4419
+ timingsMs: z3.object({
4420
+ search: z3.number(),
4421
+ total: z3.number()
4422
+ })
4423
+ })
2939
4424
  }
2940
4425
  },
2941
4426
  async (input) => {
@@ -2945,7 +4430,9 @@ function createServer(engine) {
2945
4430
  scope: input.scope,
2946
4431
  pathPrefix: input.pathPrefix,
2947
4432
  tags: input.tags,
2948
- groupBy: input.groupBy
4433
+ filters: input.filters,
4434
+ groupBy: input.groupBy,
4435
+ maxSubResults: input.maxSubResults
2949
4436
  });
2950
4437
  return {
2951
4438
  content: [
@@ -2953,7 +4440,8 @@ function createServer(engine) {
2953
4440
  type: "text",
2954
4441
  text: JSON.stringify(result, null, 2)
2955
4442
  }
2956
- ]
4443
+ ],
4444
+ structuredContent: result
2957
4445
  };
2958
4446
  }
2959
4447
  );
@@ -2978,8 +4466,134 @@ function createServer(engine) {
2978
4466
  };
2979
4467
  }
2980
4468
  );
4469
+ server.registerTool(
4470
+ "list_pages",
4471
+ {
4472
+ description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
4473
+ inputSchema: {
4474
+ pathPrefix: z3.string().optional(),
4475
+ cursor: z3.string().optional(),
4476
+ limit: z3.number().int().positive().max(200).optional(),
4477
+ scope: z3.string().optional()
4478
+ }
4479
+ },
4480
+ async (input) => {
4481
+ const result = await engine.listPages({
4482
+ pathPrefix: input.pathPrefix,
4483
+ cursor: input.cursor,
4484
+ limit: input.limit,
4485
+ scope: input.scope
4486
+ });
4487
+ return {
4488
+ content: [
4489
+ {
4490
+ type: "text",
4491
+ text: JSON.stringify(result, null, 2)
4492
+ }
4493
+ ]
4494
+ };
4495
+ }
4496
+ );
4497
+ server.registerTool(
4498
+ "get_site_structure",
4499
+ {
4500
+ description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
4501
+ inputSchema: {
4502
+ pathPrefix: z3.string().optional(),
4503
+ scope: z3.string().optional(),
4504
+ maxPages: z3.number().int().positive().max(2e3).optional()
4505
+ }
4506
+ },
4507
+ async (input) => {
4508
+ const result = await engine.getSiteStructure({
4509
+ pathPrefix: input.pathPrefix,
4510
+ scope: input.scope,
4511
+ maxPages: input.maxPages
4512
+ });
4513
+ return {
4514
+ content: [
4515
+ {
4516
+ type: "text",
4517
+ text: JSON.stringify(result, null, 2)
4518
+ }
4519
+ ]
4520
+ };
4521
+ }
4522
+ );
4523
+ server.registerTool(
4524
+ "find_source_file",
4525
+ {
4526
+ description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
4527
+ inputSchema: {
4528
+ query: z3.string().min(1),
4529
+ scope: z3.string().optional()
4530
+ }
4531
+ },
4532
+ async (input) => {
4533
+ const result = await engine.search({
4534
+ q: input.query,
4535
+ topK: 1,
4536
+ scope: input.scope
4537
+ });
4538
+ if (result.results.length === 0) {
4539
+ return {
4540
+ content: [
4541
+ {
4542
+ type: "text",
4543
+ text: JSON.stringify({
4544
+ error: "No matching content found for the given query."
4545
+ })
4546
+ }
4547
+ ]
4548
+ };
4549
+ }
4550
+ const match = result.results[0];
4551
+ const { url, routeFile, sectionTitle, snippet } = match;
4552
+ return {
4553
+ content: [
4554
+ {
4555
+ type: "text",
4556
+ text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
4557
+ }
4558
+ ]
4559
+ };
4560
+ }
4561
+ );
4562
+ server.registerTool(
4563
+ "get_related_pages",
4564
+ {
4565
+ description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
4566
+ inputSchema: {
4567
+ pathOrUrl: z3.string().min(1),
4568
+ scope: z3.string().optional(),
4569
+ topK: z3.number().int().positive().max(25).optional()
4570
+ }
4571
+ },
4572
+ async (input) => {
4573
+ const result = await engine.getRelatedPages(input.pathOrUrl, {
4574
+ topK: input.topK,
4575
+ scope: input.scope
4576
+ });
4577
+ return {
4578
+ content: [
4579
+ {
4580
+ type: "text",
4581
+ text: JSON.stringify(result, null, 2)
4582
+ }
4583
+ ]
4584
+ };
4585
+ }
4586
+ );
2981
4587
  return server;
2982
4588
  }
4589
+ function resolveApiKey(config) {
4590
+ return config.mcp.http.apiKey ?? (config.mcp.http.apiKeyEnv ? process.env[config.mcp.http.apiKeyEnv] : void 0);
4591
+ }
4592
+ function verifyApiKey(provided, expected) {
4593
+ const a = createHash2("sha256").update(provided).digest();
4594
+ const b = createHash2("sha256").update(expected).digest();
4595
+ return timingSafeEqual(a, b);
4596
+ }
2983
4597
  function redirectConsoleToStderr() {
2984
4598
  const originalLog = console.log;
2985
4599
  console.log = (...args) => {
@@ -2996,89 +4610,945 @@ async function startHttpServer(serverFactory, config, opts) {
2996
4610
  const app = createMcpExpressApp();
2997
4611
  const port = opts.httpPort ?? config.mcp.http.port;
2998
4612
  const endpointPath = opts.httpPath ?? config.mcp.http.path;
4613
+ const isPublic = config.mcp.access === "public";
4614
+ const host = isPublic ? "0.0.0.0" : "127.0.0.1";
4615
+ const apiKey = isPublic ? resolveApiKey(config) : void 0;
2999
4616
  app.post(endpointPath, async (req, res) => {
4617
+ if (isPublic && apiKey) {
4618
+ const authHeader = req.headers["authorization"];
4619
+ const provided = (authHeader?.startsWith("Bearer ") ? authHeader.slice(7) : void 0) ?? req.headers["x-api-key"] ?? "";
4620
+ if (!provided || !verifyApiKey(provided, apiKey)) {
4621
+ res.status(401).json({
4622
+ jsonrpc: "2.0",
4623
+ error: { code: -32001, message: "Unauthorized" },
4624
+ id: null
4625
+ });
4626
+ return;
4627
+ }
4628
+ }
3000
4629
  const server = serverFactory();
3001
4630
  const transport = new StreamableHTTPServerTransport({
3002
4631
  sessionIdGenerator: void 0
3003
4632
  });
3004
4633
  try {
3005
- await server.connect(transport);
3006
- await transport.handleRequest(req, res, req.body);
3007
- res.on("close", () => {
3008
- transport.close();
3009
- server.close();
4634
+ await server.connect(transport);
4635
+ await transport.handleRequest(req, res, req.body);
4636
+ res.on("close", () => {
4637
+ transport.close();
4638
+ server.close();
4639
+ });
4640
+ } catch (error) {
4641
+ if (!res.headersSent) {
4642
+ res.status(500).json({
4643
+ jsonrpc: "2.0",
4644
+ error: {
4645
+ code: -32603,
4646
+ message: error instanceof Error ? error.message : "Internal server error"
4647
+ },
4648
+ id: null
4649
+ });
4650
+ }
4651
+ }
4652
+ });
4653
+ app.get(endpointPath, (_req, res) => {
4654
+ res.writeHead(405).end(
4655
+ JSON.stringify({
4656
+ jsonrpc: "2.0",
4657
+ error: {
4658
+ code: -32e3,
4659
+ message: "Method not allowed"
4660
+ },
4661
+ id: null
4662
+ })
4663
+ );
4664
+ });
4665
+ app.delete(endpointPath, (_req, res) => {
4666
+ res.writeHead(405).end(
4667
+ JSON.stringify({
4668
+ jsonrpc: "2.0",
4669
+ error: {
4670
+ code: -32e3,
4671
+ message: "Method not allowed"
4672
+ },
4673
+ id: null
4674
+ })
4675
+ );
4676
+ });
4677
+ await new Promise((resolve, reject) => {
4678
+ const instance = app.listen(port, host, () => {
4679
+ process.stderr.write(`SearchSocket MCP HTTP server listening on http://${host}:${port}${endpointPath}
4680
+ `);
4681
+ if (isPublic) {
4682
+ process.stderr.write("WARNING: Server is in public mode. Ensure HTTPS is configured via a reverse proxy for production use.\n");
4683
+ }
4684
+ resolve();
4685
+ });
4686
+ instance.once("error", reject);
4687
+ process.on("SIGINT", async () => {
4688
+ await new Promise((shutdownResolve) => instance.close(() => shutdownResolve()));
4689
+ process.exit(0);
4690
+ });
4691
+ });
4692
+ }
4693
+ async function runMcpServer(options = {}) {
4694
+ const config = await loadConfig({
4695
+ cwd: options.cwd,
4696
+ configPath: options.configPath
4697
+ });
4698
+ if (options.access) config.mcp.access = options.access;
4699
+ if (options.apiKey) config.mcp.http.apiKey = options.apiKey;
4700
+ if (config.mcp.access === "public" && !resolveApiKey(config)) {
4701
+ throw new Error(
4702
+ 'MCP access is "public" but no API key is configured. Pass --api-key or set mcp.http.apiKey / mcp.http.apiKeyEnv in config.'
4703
+ );
4704
+ }
4705
+ const resolvedTransport = options.transport ?? config.mcp.transport;
4706
+ if (resolvedTransport === "stdio") {
4707
+ redirectConsoleToStderr();
4708
+ }
4709
+ const engine = await SearchEngine.create({
4710
+ cwd: options.cwd,
4711
+ configPath: options.configPath,
4712
+ config
4713
+ });
4714
+ if (resolvedTransport === "http") {
4715
+ await startHttpServer(() => createServer(engine), config, options);
4716
+ return;
4717
+ }
4718
+ const server = createServer(engine);
4719
+ const stdioTransport = new StdioServerTransport();
4720
+ await server.connect(stdioTransport);
4721
+ }
4722
+
4723
+ // src/playground/server.ts
4724
+ import express from "express";
4725
+
4726
+ // src/playground/playground.html
4727
+ var playground_default = `<!DOCTYPE html>
4728
+ <html lang="en">
4729
+ <head>
4730
+ <meta charset="utf-8">
4731
+ <meta name="viewport" content="width=device-width, initial-scale=1">
4732
+ <title>SearchSocket Playground</title>
4733
+ <style>
4734
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
4735
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f8f9fa; color: #1a1a2e; padding: 2rem; max-width: 900px; margin: 0 auto; }
4736
+ h1 { font-size: 1.5rem; margin-bottom: 1.5rem; color: #16213e; }
4737
+ h1 span { font-weight: 400; color: #888; }
4738
+ .search-box { display: flex; gap: 0.5rem; margin-bottom: 0.5rem; }
4739
+ .search-box input { flex: 1; padding: 0.75rem 1rem; font-size: 1rem; border: 2px solid #ddd; border-radius: 8px; outline: none; transition: border-color 0.2s; }
4740
+ .search-box input:focus { border-color: #4361ee; }
4741
+ .search-opts { display: flex; gap: 1rem; margin-bottom: 1.5rem; font-size: 0.85rem; color: #555; align-items: center; }
4742
+ .search-opts label { display: flex; align-items: center; gap: 0.3rem; cursor: pointer; }
4743
+ .search-opts select { padding: 0.25rem 0.5rem; border: 1px solid #ccc; border-radius: 4px; font-size: 0.85rem; }
4744
+ .meta { font-size: 0.8rem; color: #888; margin-bottom: 1rem; }
4745
+ .result { background: #fff; border: 1px solid #e0e0e0; border-radius: 8px; padding: 1rem 1.25rem; margin-bottom: 0.75rem; }
4746
+ .result-header { display: flex; justify-content: space-between; align-items: flex-start; gap: 1rem; }
4747
+ .result-title { font-size: 1.05rem; font-weight: 600; color: #16213e; text-decoration: none; }
4748
+ .result-title:hover { color: #4361ee; }
4749
+ .result-score { font-size: 0.8rem; font-weight: 600; color: #4361ee; white-space: nowrap; background: #eef1ff; padding: 0.2rem 0.5rem; border-radius: 4px; }
4750
+ .result-url { font-size: 0.8rem; color: #888; margin-top: 0.2rem; }
4751
+ .result-snippet { font-size: 0.9rem; color: #444; margin-top: 0.5rem; line-height: 1.5; }
4752
+ .result-meta { display: flex; gap: 0.75rem; flex-wrap: wrap; margin-top: 0.5rem; font-size: 0.78rem; color: #777; }
4753
+ .result-meta span { background: #f0f0f0; padding: 0.15rem 0.5rem; border-radius: 4px; }
4754
+ .breakdown { margin-top: 0.5rem; padding: 0.5rem 0.75rem; background: #f8f9fa; border-radius: 6px; font-size: 0.8rem; }
4755
+ .breakdown-row { display: flex; justify-content: space-between; padding: 0.15rem 0; }
4756
+ .breakdown-label { color: #555; }
4757
+ .breakdown-value { font-family: monospace; color: #333; }
4758
+ .chunks-toggle { font-size: 0.8rem; color: #4361ee; cursor: pointer; margin-top: 0.5rem; border: none; background: none; padding: 0; text-decoration: underline; }
4759
+ .chunks { margin-top: 0.5rem; padding-left: 1rem; border-left: 3px solid #e0e0e0; }
4760
+ .chunk { margin-bottom: 0.5rem; font-size: 0.85rem; }
4761
+ .chunk-heading { font-size: 0.78rem; color: #4361ee; margin-bottom: 0.15rem; }
4762
+ .chunk-score { font-size: 0.75rem; color: #999; }
4763
+ .chunk-snippet { color: #555; line-height: 1.4; }
4764
+ .empty { text-align: center; padding: 3rem; color: #999; }
4765
+ .loading { text-align: center; padding: 2rem; color: #999; }
4766
+ .hidden { display: none; }
4767
+
4768
+ /* Ranking Tuner */
4769
+ .tuner { margin-bottom: 1.5rem; border: 1px solid #e0e0e0; border-radius: 8px; background: #fff; }
4770
+ .tuner > summary { padding: 0.75rem 1rem; font-weight: 600; font-size: 0.95rem; cursor: pointer; color: #16213e; user-select: none; }
4771
+ .tuner > summary:hover { color: #4361ee; }
4772
+ .tuner-body { padding: 0.5rem 1rem 1rem; }
4773
+ .tuner-actions { display: flex; gap: 0.5rem; margin-bottom: 0.75rem; }
4774
+ .tuner-actions button { padding: 0.35rem 0.75rem; font-size: 0.8rem; border: 1px solid #ccc; border-radius: 4px; background: #fff; cursor: pointer; color: #555; }
4775
+ .tuner-actions button:hover { border-color: #4361ee; color: #4361ee; }
4776
+ .tuner-group { margin-bottom: 0.5rem; border: 1px solid #eee; border-radius: 6px; }
4777
+ .tuner-group > summary { padding: 0.5rem 0.75rem; font-size: 0.85rem; font-weight: 600; cursor: pointer; color: #444; user-select: none; }
4778
+ .tuner-group[open] { margin-bottom: 0.75rem; }
4779
+ .tuner-group-body { padding: 0.25rem 0.75rem 0.5rem; }
4780
+ .tuner-row { display: grid; grid-template-columns: 140px 1fr 70px 24px; gap: 8px; align-items: center; margin-bottom: 0.35rem; }
4781
+ .tuner-row label { font-size: 0.8rem; color: #555; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
4782
+ .tuner-row label.modified { color: #4361ee; font-weight: 600; }
4783
+ .tuner-row input[type="range"] { width: 100%; height: 6px; cursor: pointer; }
4784
+ .tuner-row input[type="number"] { width: 70px; padding: 0.2rem 0.35rem; font-size: 0.8rem; border: 1px solid #ccc; border-radius: 4px; text-align: right; }
4785
+ .tuner-row input[type="checkbox"] { width: 16px; height: 16px; cursor: pointer; }
4786
+ .tuner-row .reset-btn { width: 20px; height: 20px; border: none; background: none; cursor: pointer; color: #999; font-size: 1rem; padding: 0; line-height: 1; visibility: hidden; }
4787
+ .tuner-row .reset-btn.visible { visibility: visible; }
4788
+ .tuner-row .reset-btn:hover { color: #4361ee; }
4789
+ .tuner-bool-row { display: grid; grid-template-columns: 140px 1fr 24px; gap: 8px; align-items: center; margin-bottom: 0.35rem; }
4790
+ .tuner-bool-row label { font-size: 0.8rem; color: #555; }
4791
+ .tuner-bool-row label.modified { color: #4361ee; font-weight: 600; }
4792
+ .tuner-bool-row .reset-btn { width: 20px; height: 20px; border: none; background: none; cursor: pointer; color: #999; font-size: 1rem; padding: 0; line-height: 1; visibility: hidden; }
4793
+ .tuner-bool-row .reset-btn.visible { visibility: visible; }
4794
+ .tuner-export { margin-top: 0.75rem; }
4795
+ .tuner-export textarea { width: 100%; height: 120px; font-family: monospace; font-size: 0.8rem; padding: 0.5rem; border: 1px solid #ccc; border-radius: 6px; background: #f8f9fa; resize: vertical; }
4796
+ </style>
4797
+ </head>
4798
+ <body>
4799
+ <h1>SearchSocket <span>Playground</span></h1>
4800
+ <div class="search-box">
4801
+ <input type="text" id="q" placeholder="Type a search query..." autocomplete="off" autofocus>
4802
+ </div>
4803
+ <div class="search-opts">
4804
+ <label>
4805
+ Group by:
4806
+ <select id="groupBy">
4807
+ <option value="page">Page</option>
4808
+ <option value="chunk">Chunk</option>
4809
+ </select>
4810
+ </label>
4811
+ <label>
4812
+ Top K:
4813
+ <select id="topK">
4814
+ <option value="5">5</option>
4815
+ <option value="10" selected>10</option>
4816
+ <option value="20">20</option>
4817
+ <option value="50">50</option>
4818
+ </select>
4819
+ </label>
4820
+ </div>
4821
+
4822
+ <details class="tuner" id="tunerPanel">
4823
+ <summary>Ranking Tuner</summary>
4824
+ <div class="tuner-body">
4825
+ <div class="tuner-actions">
4826
+ <button id="resetAll" type="button">Reset All</button>
4827
+ <button id="exportConfig" type="button">Export Config</button>
4828
+ </div>
4829
+ <div id="tunerGroups"></div>
4830
+ <div class="tuner-export hidden" id="exportArea">
4831
+ <textarea id="exportText" readonly></textarea>
4832
+ </div>
4833
+ </div>
4834
+ </details>
4835
+
4836
+ <div id="meta" class="meta"></div>
4837
+ <div id="results"></div>
4838
+
4839
+ <script>
4840
+ (function() {
4841
+ var qInput = document.getElementById('q');
4842
+ var groupBySelect = document.getElementById('groupBy');
4843
+ var topKSelect = document.getElementById('topK');
4844
+ var resultsDiv = document.getElementById('results');
4845
+ var metaDiv = document.getElementById('meta');
4846
+ var tunerGroupsDiv = document.getElementById('tunerGroups');
4847
+ var exportArea = document.getElementById('exportArea');
4848
+ var exportText = document.getElementById('exportText');
4849
+
4850
+ var debounceTimer = null;
4851
+ var requestId = 0;
4852
+ var baselineConfig = null;
4853
+ var tunerParams = [];
4854
+
4855
+ var PARAM_DEFS = [
4856
+ { group: 'Thresholds', key: 'ranking.minScoreRatio', label: 'minScoreRatio', min: 0, max: 1, step: 0.01 },
4857
+ { group: 'Thresholds', key: 'ranking.scoreGapThreshold', label: 'scoreGapThreshold', min: 0, max: 1, step: 0.01 },
4858
+ { group: 'Thresholds', key: 'ranking.minChunkScoreRatio', label: 'minChunkScoreRatio', min: 0, max: 1, step: 0.01 },
4859
+ { group: 'Boosts', key: 'ranking.enableIncomingLinkBoost', label: 'incomingLinkBoost', type: 'bool' },
4860
+ { group: 'Boosts', key: 'ranking.enableDepthBoost', label: 'depthBoost', type: 'bool' },
4861
+ { group: 'Weights', key: 'ranking.weights.incomingLinks', label: 'incomingLinks', min: 0, max: 1, step: 0.01 },
4862
+ { group: 'Weights', key: 'ranking.weights.depth', label: 'depth', min: 0, max: 1, step: 0.01 },
4863
+ { group: 'Weights', key: 'ranking.weights.aggregation', label: 'aggregation', min: 0, max: 1, step: 0.01 },
4864
+ { group: 'Weights', key: 'ranking.weights.titleMatch', label: 'titleMatch', min: 0, max: 1, step: 0.01 },
4865
+ { group: 'Aggregation', key: 'ranking.aggregationCap', label: 'aggregationCap', min: 1, max: 20, step: 1 },
4866
+ { group: 'Aggregation', key: 'ranking.aggregationDecay', label: 'aggregationDecay', min: 0, max: 1, step: 0.01 },
4867
+ { group: 'Search', key: 'search.pageSearchWeight', label: 'pageSearchWeight', min: 0, max: 1, step: 0.01 }
4868
+ ];
4869
+
4870
+ function getNestedValue(obj, path) {
4871
+ var parts = path.split('.');
4872
+ var v = obj;
4873
+ for (var i = 0; i < parts.length; i++) {
4874
+ if (v == null) return undefined;
4875
+ v = v[parts[i]];
4876
+ }
4877
+ return v;
4878
+ }
4879
+
4880
+ function setNestedValue(obj, path, value) {
4881
+ var parts = path.split('.');
4882
+ var cur = obj;
4883
+ for (var i = 0; i < parts.length - 1; i++) {
4884
+ if (!cur[parts[i]]) cur[parts[i]] = {};
4885
+ cur = cur[parts[i]];
4886
+ }
4887
+ cur[parts[parts.length - 1]] = value;
4888
+ }
4889
+
4890
+ function initTuner(config) {
4891
+ baselineConfig = config;
4892
+ var groups = {};
4893
+ PARAM_DEFS.forEach(function(def) {
4894
+ if (!groups[def.group]) groups[def.group] = [];
4895
+ groups[def.group].push(def);
4896
+ });
4897
+
4898
+ var html = '';
4899
+ Object.keys(groups).forEach(function(groupName) {
4900
+ html += '<details class="tuner-group" open>';
4901
+ html += '<summary>' + groupName + '</summary>';
4902
+ html += '<div class="tuner-group-body">';
4903
+ groups[groupName].forEach(function(def) {
4904
+ var val = getNestedValue(config, def.key);
4905
+ if (def.type === 'bool') {
4906
+ html += '<div class="tuner-bool-row" data-key="' + def.key + '">';
4907
+ html += '<label>' + def.label + '</label>';
4908
+ html += '<input type="checkbox"' + (val ? ' checked' : '') + ' data-param="' + def.key + '">';
4909
+ html += '<button class="reset-btn" title="Reset" data-reset="' + def.key + '">\\u21BA</button>';
4910
+ html += '</div>';
4911
+ } else {
4912
+ html += '<div class="tuner-row" data-key="' + def.key + '">';
4913
+ html += '<label>' + def.label + '</label>';
4914
+ html += '<input type="range" min="' + def.min + '" max="' + def.max + '" step="' + def.step + '" value="' + val + '" data-param="' + def.key + '">';
4915
+ html += '<input type="number" min="' + def.min + '" max="' + def.max + '" step="' + def.step + '" value="' + val + '" data-num="' + def.key + '">';
4916
+ html += '<button class="reset-btn" title="Reset" data-reset="' + def.key + '">\\u21BA</button>';
4917
+ html += '</div>';
4918
+ }
4919
+ });
4920
+ html += '</div></details>';
4921
+ });
4922
+ tunerGroupsDiv.innerHTML = html;
4923
+
4924
+ // Wire events
4925
+ tunerGroupsDiv.addEventListener('input', function(e) {
4926
+ var param = e.target.getAttribute('data-param');
4927
+ var num = e.target.getAttribute('data-num');
4928
+ if (param) {
4929
+ // Slider or checkbox changed \u2014 sync number input
4930
+ var row = e.target.closest('[data-key]');
4931
+ if (row && e.target.type === 'range') {
4932
+ var numInput = row.querySelector('[data-num]');
4933
+ if (numInput) numInput.value = e.target.value;
4934
+ }
4935
+ updateModifiedState(param);
4936
+ scheduleSearch();
4937
+ } else if (num) {
4938
+ // Number input changed \u2014 sync slider
4939
+ var row = e.target.closest('[data-key]');
4940
+ if (row) {
4941
+ var rangeInput = row.querySelector('[data-param]');
4942
+ if (rangeInput) rangeInput.value = e.target.value;
4943
+ }
4944
+ updateModifiedState(num);
4945
+ scheduleSearch();
4946
+ }
4947
+ });
4948
+
4949
+ tunerGroupsDiv.addEventListener('change', function(e) {
4950
+ var param = e.target.getAttribute('data-param');
4951
+ if (param && e.target.type === 'checkbox') {
4952
+ updateModifiedState(param);
4953
+ scheduleSearch();
4954
+ }
4955
+ });
4956
+
4957
+ tunerGroupsDiv.addEventListener('click', function(e) {
4958
+ var resetKey = e.target.getAttribute('data-reset');
4959
+ if (resetKey) {
4960
+ resetParam(resetKey);
4961
+ scheduleSearch();
4962
+ }
4963
+ });
4964
+ }
4965
+
4966
+ function updateModifiedState(key) {
4967
+ var baseline = getNestedValue(baselineConfig, key);
4968
+ var row = tunerGroupsDiv.querySelector('[data-key="' + key + '"]');
4969
+ if (!row) return;
4970
+ var input = row.querySelector('[data-param="' + key + '"]');
4971
+ if (!input) return;
4972
+ var current = input.type === 'checkbox' ? input.checked : parseFloat(input.value);
4973
+ var isModified = current !== baseline;
4974
+ var label = row.querySelector('label');
4975
+ var resetBtn = row.querySelector('.reset-btn');
4976
+ if (label) label.classList.toggle('modified', isModified);
4977
+ if (resetBtn) resetBtn.classList.toggle('visible', isModified);
4978
+ }
4979
+
4980
+ function resetParam(key) {
4981
+ var baseline = getNestedValue(baselineConfig, key);
4982
+ var row = tunerGroupsDiv.querySelector('[data-key="' + key + '"]');
4983
+ if (!row) return;
4984
+ var input = row.querySelector('[data-param="' + key + '"]');
4985
+ if (!input) return;
4986
+ if (input.type === 'checkbox') {
4987
+ input.checked = baseline;
4988
+ } else {
4989
+ input.value = baseline;
4990
+ var numInput = row.querySelector('[data-num]');
4991
+ if (numInput) numInput.value = baseline;
4992
+ }
4993
+ updateModifiedState(key);
4994
+ }
4995
+
4996
+ function resetAll() {
4997
+ PARAM_DEFS.forEach(function(def) {
4998
+ resetParam(def.key);
4999
+ });
5000
+ exportArea.classList.add('hidden');
5001
+ scheduleSearch();
5002
+ }
5003
+
5004
+ function collectOverrides() {
5005
+ var overrides = {};
5006
+ PARAM_DEFS.forEach(function(def) {
5007
+ var row = tunerGroupsDiv.querySelector('[data-key="' + def.key + '"]');
5008
+ if (!row) return;
5009
+ var input = row.querySelector('[data-param="' + def.key + '"]');
5010
+ if (!input) return;
5011
+ var val = def.type === 'bool' ? input.checked : parseFloat(input.value);
5012
+ setNestedValue(overrides, def.key, val);
5013
+ });
5014
+ return overrides;
5015
+ }
5016
+
5017
+ function collectChangedOverrides() {
5018
+ var overrides = {};
5019
+ var hasChanges = false;
5020
+ PARAM_DEFS.forEach(function(def) {
5021
+ var row = tunerGroupsDiv.querySelector('[data-key="' + def.key + '"]');
5022
+ if (!row) return;
5023
+ var input = row.querySelector('[data-param="' + def.key + '"]');
5024
+ if (!input) return;
5025
+ var current = def.type === 'bool' ? input.checked : parseFloat(input.value);
5026
+ var baseline = getNestedValue(baselineConfig, def.key);
5027
+ if (current !== baseline) {
5028
+ setNestedValue(overrides, def.key, current);
5029
+ hasChanges = true;
5030
+ }
5031
+ });
5032
+ return hasChanges ? overrides : null;
5033
+ }
5034
+
5035
+ function exportConfig() {
5036
+ var changed = collectChangedOverrides();
5037
+ if (!changed) {
5038
+ exportArea.classList.remove('hidden');
5039
+ exportText.value = '// No parameters have been changed from defaults.';
5040
+ return;
5041
+ }
5042
+
5043
+ var lines = [];
5044
+ if (changed.ranking) {
5045
+ lines.push('ranking: {');
5046
+ var r = changed.ranking;
5047
+ var simpleKeys = ['enableIncomingLinkBoost', 'enableDepthBoost', 'aggregationCap', 'aggregationDecay', 'minChunkScoreRatio', 'minScoreRatio', 'scoreGapThreshold'];
5048
+ simpleKeys.forEach(function(k) {
5049
+ if (r[k] !== undefined) lines.push(' ' + k + ': ' + JSON.stringify(r[k]) + ',');
5050
+ });
5051
+ if (r.weights) {
5052
+ lines.push(' weights: {');
5053
+ Object.keys(r.weights).forEach(function(wk) {
5054
+ lines.push(' ' + wk + ': ' + r.weights[wk] + ',');
5055
+ });
5056
+ lines.push(' },');
5057
+ }
5058
+ lines.push('},');
5059
+ }
5060
+ if (changed.search) {
5061
+ lines.push('search: {');
5062
+ Object.keys(changed.search).forEach(function(sk) {
5063
+ lines.push(' ' + sk + ': ' + changed.search[sk] + ',');
5064
+ });
5065
+ lines.push('},');
5066
+ }
5067
+
5068
+ exportArea.classList.remove('hidden');
5069
+ exportText.value = lines.join('\\n');
5070
+ }
5071
+
5072
+ // Read initial state from URL
5073
+ var params = new URLSearchParams(window.location.search);
5074
+ if (params.get('q')) qInput.value = params.get('q');
5075
+ if (params.get('groupBy')) groupBySelect.value = params.get('groupBy');
5076
+ if (params.get('topK')) topKSelect.value = params.get('topK');
5077
+
5078
+ function updateUrl() {
5079
+ var p = new URLSearchParams();
5080
+ if (qInput.value) p.set('q', qInput.value);
5081
+ if (groupBySelect.value !== 'page') p.set('groupBy', groupBySelect.value);
5082
+ if (topKSelect.value !== '10') p.set('topK', topKSelect.value);
5083
+ var qs = p.toString();
5084
+ history.replaceState(null, '', qs ? '?' + qs : window.location.pathname);
5085
+ }
5086
+
5087
+ function doSearch() {
5088
+ var query = qInput.value.trim();
5089
+ updateUrl();
5090
+ if (!query) {
5091
+ resultsDiv.innerHTML = '<div class="empty">Enter a query to search</div>';
5092
+ metaDiv.textContent = '';
5093
+ return;
5094
+ }
5095
+
5096
+ resultsDiv.innerHTML = '<div class="loading">Searching...</div>';
5097
+
5098
+ var thisRequestId = ++requestId;
5099
+ var body = {
5100
+ q: query,
5101
+ topK: parseInt(topKSelect.value, 10),
5102
+ groupBy: groupBySelect.value,
5103
+ debug: true
5104
+ };
5105
+
5106
+ if (baselineConfig) {
5107
+ body.rankingOverrides = collectOverrides();
5108
+ }
5109
+
5110
+ fetch('/_searchsocket/search', {
5111
+ method: 'POST',
5112
+ headers: { 'Content-Type': 'application/json' },
5113
+ body: JSON.stringify(body)
5114
+ }).then(function(res) {
5115
+ if (thisRequestId !== requestId) return;
5116
+ if (!res.ok) {
5117
+ return res.text().then(function(err) {
5118
+ resultsDiv.innerHTML = '<div class="empty">Error: ' + escapeHtml(err) + '</div>';
5119
+ });
5120
+ }
5121
+ return res.json().then(function(data) {
5122
+ if (thisRequestId !== requestId) return;
5123
+ renderResults(data);
5124
+ });
5125
+ }).catch(function(err) {
5126
+ if (thisRequestId !== requestId) return;
5127
+ resultsDiv.innerHTML = '<div class="empty">Network error: ' + escapeHtml(err.message) + '</div>';
5128
+ });
5129
+ }
5130
+
5131
+ function escapeHtml(str) {
5132
+ var d = document.createElement('div');
5133
+ d.textContent = str;
5134
+ return d.innerHTML;
5135
+ }
5136
+
5137
+ function renderResults(data) {
5138
+ metaDiv.textContent = data.results.length + ' results in ' + data.meta.timingsMs.total + 'ms (search: ' + data.meta.timingsMs.search + 'ms) \\u2014 scope: ' + data.scope;
5139
+
5140
+ if (data.results.length === 0) {
5141
+ resultsDiv.innerHTML = '<div class="empty">No results found</div>';
5142
+ return;
5143
+ }
5144
+
5145
+ resultsDiv.innerHTML = data.results.map(function(r, i) {
5146
+ var html = '<div class="result">';
5147
+ html += '<div class="result-header">';
5148
+ html += '<div><div class="result-title">' + escapeHtml(r.title) + '</div>';
5149
+ html += '<div class="result-url">' + escapeHtml(r.url) + '</div></div>';
5150
+ html += '<div class="result-score">' + r.score.toFixed(4) + '</div>';
5151
+ html += '</div>';
5152
+
5153
+ if (r.snippet) {
5154
+ html += '<div class="result-snippet">' + escapeHtml(r.snippet) + '</div>';
5155
+ }
5156
+
5157
+ html += '<div class="result-meta">';
5158
+ html += '<span>route: ' + escapeHtml(r.routeFile) + '</span>';
5159
+ if (r.sectionTitle) html += '<span>section: ' + escapeHtml(r.sectionTitle) + '</span>';
5160
+ html += '</div>';
5161
+
5162
+ if (r.breakdown) {
5163
+ html += '<div class="breakdown">';
5164
+ html += '<div class="breakdown-row"><span class="breakdown-label">Base score</span><span class="breakdown-value">' + r.breakdown.baseScore.toFixed(6) + '</span></div>';
5165
+ html += '<div class="breakdown-row"><span class="breakdown-label">Incoming link boost</span><span class="breakdown-value">' + r.breakdown.incomingLinkBoost.toFixed(6) + '</span></div>';
5166
+ html += '<div class="breakdown-row"><span class="breakdown-label">Depth boost</span><span class="breakdown-value">' + r.breakdown.depthBoost.toFixed(6) + '</span></div>';
5167
+ html += '<div class="breakdown-row"><span class="breakdown-label">Title match boost</span><span class="breakdown-value">' + r.breakdown.titleMatchBoost.toFixed(6) + '</span></div>';
5168
+ html += '<div class="breakdown-row"><span class="breakdown-label">Anchor text boost</span><span class="breakdown-value">' + (r.breakdown.anchorTextMatchBoost || 0).toFixed(6) + '</span></div>';
5169
+ html += '</div>';
5170
+ }
5171
+
5172
+ if (r.chunks && r.chunks.length > 0) {
5173
+ html += '<button class="chunks-toggle" data-idx="' + i + '">Show ' + r.chunks.length + ' chunks</button>';
5174
+ html += '<div class="chunks hidden" id="chunks-' + i + '">';
5175
+ r.chunks.forEach(function(c) {
5176
+ html += '<div class="chunk">';
5177
+ if (c.headingPath && c.headingPath.length > 0) {
5178
+ html += '<div class="chunk-heading">' + escapeHtml(c.headingPath.join(' > ')) + '</div>';
5179
+ }
5180
+ if (c.sectionTitle) {
5181
+ html += '<div class="chunk-heading">' + escapeHtml(c.sectionTitle) + '</div>';
5182
+ }
5183
+ html += '<div class="chunk-snippet">' + escapeHtml(c.snippet) + '</div>';
5184
+ html += '<div class="chunk-score">score: ' + c.score.toFixed(4) + '</div>';
5185
+ html += '</div>';
5186
+ });
5187
+ html += '</div>';
5188
+ }
5189
+
5190
+ html += '</div>';
5191
+ return html;
5192
+ }).join('');
5193
+ }
5194
+
5195
+ resultsDiv.addEventListener('click', function(e) {
5196
+ if (e.target.classList.contains('chunks-toggle')) {
5197
+ var idx = e.target.getAttribute('data-idx');
5198
+ var chunksDiv = document.getElementById('chunks-' + idx);
5199
+ if (chunksDiv) {
5200
+ chunksDiv.classList.toggle('hidden');
5201
+ e.target.textContent = chunksDiv.classList.contains('hidden')
5202
+ ? 'Show ' + chunksDiv.children.length + ' chunks'
5203
+ : 'Hide chunks';
5204
+ }
5205
+ }
5206
+ });
5207
+
5208
+ function scheduleSearch() {
5209
+ clearTimeout(debounceTimer);
5210
+ debounceTimer = setTimeout(doSearch, 300);
5211
+ }
5212
+
5213
+ qInput.addEventListener('input', scheduleSearch);
5214
+ groupBySelect.addEventListener('change', scheduleSearch);
5215
+ topKSelect.addEventListener('change', scheduleSearch);
5216
+
5217
+ document.getElementById('resetAll').addEventListener('click', resetAll);
5218
+ document.getElementById('exportConfig').addEventListener('click', exportConfig);
5219
+
5220
+ // Fetch config and initialize tuner
5221
+ fetch('/_searchsocket/config').then(function(res) {
5222
+ if (res.ok) return res.json();
5223
+ return null;
5224
+ }).then(function(config) {
5225
+ if (config) initTuner(config);
5226
+ }).catch(function() {
5227
+ // Config endpoint not available \u2014 tuner stays empty
5228
+ });
5229
+
5230
+ // Trigger initial search if query is present
5231
+ if (qInput.value.trim()) doSearch();
5232
+ })();
5233
+ </script>
5234
+ </body>
5235
+ </html>
5236
+ `;
5237
+
5238
+ // src/playground/server.ts
5239
+ async function runPlaygroundServer(options) {
5240
+ const config = options.config ?? await loadConfig({
5241
+ cwd: options.cwd,
5242
+ configPath: options.configPath
5243
+ });
5244
+ let enginePromise = null;
5245
+ function getEngine() {
5246
+ if (!enginePromise) {
5247
+ enginePromise = SearchEngine.create({
5248
+ cwd: options.cwd,
5249
+ configPath: options.configPath,
5250
+ config
5251
+ });
5252
+ }
5253
+ return enginePromise;
5254
+ }
5255
+ const app = express();
5256
+ app.use(express.json());
5257
+ app.get("/_searchsocket", (_req, res) => {
5258
+ res.type("html").send(playground_default);
5259
+ });
5260
+ app.get("/_searchsocket/config", (_req, res) => {
5261
+ res.json({
5262
+ ranking: {
5263
+ enableIncomingLinkBoost: config.ranking.enableIncomingLinkBoost,
5264
+ enableDepthBoost: config.ranking.enableDepthBoost,
5265
+ aggregationCap: config.ranking.aggregationCap,
5266
+ aggregationDecay: config.ranking.aggregationDecay,
5267
+ minChunkScoreRatio: config.ranking.minChunkScoreRatio,
5268
+ minScoreRatio: config.ranking.minScoreRatio,
5269
+ scoreGapThreshold: config.ranking.scoreGapThreshold,
5270
+ weights: { ...config.ranking.weights }
5271
+ },
5272
+ search: {
5273
+ pageSearchWeight: config.search.pageSearchWeight
5274
+ }
5275
+ });
5276
+ });
5277
+ app.post("/_searchsocket/search", async (req, res) => {
5278
+ try {
5279
+ const searchEngine = await getEngine();
5280
+ const body = req.body;
5281
+ if (!body || typeof body.q !== "string" || body.q.trim().length === 0) {
5282
+ res.status(400).json({ error: "Missing or empty 'q' field" });
5283
+ return;
5284
+ }
5285
+ const result = await searchEngine.search({
5286
+ q: body.q,
5287
+ topK: typeof body.topK === "number" ? body.topK : void 0,
5288
+ scope: typeof body.scope === "string" ? body.scope : void 0,
5289
+ pathPrefix: typeof body.pathPrefix === "string" ? body.pathPrefix : void 0,
5290
+ tags: Array.isArray(body.tags) ? body.tags : void 0,
5291
+ groupBy: body.groupBy === "page" || body.groupBy === "chunk" ? body.groupBy : void 0,
5292
+ debug: body.debug === true,
5293
+ rankingOverrides: body.rankingOverrides && typeof body.rankingOverrides === "object" ? body.rankingOverrides : void 0
3010
5294
  });
5295
+ res.json(result);
3011
5296
  } catch (error) {
3012
- if (!res.headersSent) {
3013
- res.status(500).json({
3014
- jsonrpc: "2.0",
3015
- error: {
3016
- code: -32603,
3017
- message: error instanceof Error ? error.message : "Internal server error"
3018
- },
3019
- id: null
3020
- });
3021
- }
5297
+ const message = error instanceof Error ? error.message : "Internal server error";
5298
+ const status = error.statusCode ?? 500;
5299
+ res.status(status).json({ error: message });
3022
5300
  }
3023
5301
  });
3024
- app.get(endpointPath, (_req, res) => {
3025
- res.writeHead(405).end(
3026
- JSON.stringify({
3027
- jsonrpc: "2.0",
3028
- error: {
3029
- code: -32e3,
3030
- message: "Method not allowed"
3031
- },
3032
- id: null
3033
- })
3034
- );
3035
- });
3036
- app.delete(endpointPath, (_req, res) => {
3037
- res.writeHead(405).end(
3038
- JSON.stringify({
3039
- jsonrpc: "2.0",
3040
- error: {
3041
- code: -32e3,
3042
- message: "Method not allowed"
3043
- },
3044
- id: null
3045
- })
3046
- );
3047
- });
3048
- await new Promise((resolve, reject) => {
3049
- const instance = app.listen(port, "127.0.0.1", () => {
3050
- process.stderr.write(`SearchSocket MCP HTTP server listening on http://127.0.0.1:${port}${endpointPath}
3051
- `);
3052
- resolve();
5302
+ const preferredPort = options.port ?? 3337;
5303
+ function startServer(port) {
5304
+ return new Promise((resolve, reject) => {
5305
+ let httpServer;
5306
+ const onListening = () => {
5307
+ const addr = httpServer.address();
5308
+ resolve({
5309
+ port: addr.port,
5310
+ close: () => new Promise((r) => httpServer.close(() => r()))
5311
+ });
5312
+ };
5313
+ httpServer = app.listen(port, "127.0.0.1", onListening);
5314
+ httpServer.once("error", (err) => {
5315
+ if (err.code === "EADDRINUSE" && port !== 0) {
5316
+ startServer(0).then(resolve, reject);
5317
+ } else {
5318
+ reject(err);
5319
+ }
5320
+ });
3053
5321
  });
3054
- instance.once("error", reject);
3055
- process.on("SIGINT", async () => {
3056
- await new Promise((shutdownResolve) => instance.close(() => shutdownResolve()));
3057
- process.exit(0);
5322
+ }
5323
+ return startServer(preferredPort);
5324
+ }
5325
+
5326
+ // src/search/quality-metrics.ts
5327
+ function reciprocalRank(results, relevant) {
5328
+ const set = new Set(relevant);
5329
+ for (let i = 0; i < results.length; i++) {
5330
+ if (set.has(results[i].url)) {
5331
+ return 1 / (i + 1);
5332
+ }
5333
+ }
5334
+ return 0;
5335
+ }
5336
+ function mrr(queries) {
5337
+ if (queries.length === 0) return 0;
5338
+ const sum = queries.reduce((acc, q) => acc + reciprocalRank(q.results, q.relevant), 0);
5339
+ return sum / queries.length;
5340
+ }
5341
+
5342
+ // src/cli/test-schemas.ts
5343
+ import { z as z4 } from "zod";
5344
+ var testCaseSchema = z4.object({
5345
+ query: z4.string().min(1),
5346
+ expect: z4.object({
5347
+ topResult: z4.string().optional(),
5348
+ inTop5: z4.array(z4.string()).min(1).optional(),
5349
+ maxResults: z4.number().int().nonnegative().optional()
5350
+ }).refine(
5351
+ (e) => e.topResult !== void 0 || e.inTop5 !== void 0 || e.maxResults !== void 0,
5352
+ { message: "expect must contain at least one of topResult, inTop5, or maxResults" }
5353
+ )
5354
+ });
5355
+ var testFileSchema = z4.array(testCaseSchema).min(1, "test file must contain at least one test case");
5356
+
5357
+ // src/cli.ts
5358
+ import * as clack from "@clack/prompts";
5359
+
5360
+ // src/init-helpers.ts
5361
+ import fs9 from "fs";
5362
+ import path13 from "path";
5363
+ import { parseModule, generateCode, builders } from "magicast";
5364
+ function ensureMcpJson(cwd) {
5365
+ const mcpPath = path13.join(cwd, ".mcp.json");
5366
+ const entry = {
5367
+ command: "npx",
5368
+ args: ["searchsocket", "mcp"],
5369
+ env: {
5370
+ UPSTASH_VECTOR_REST_URL: "${UPSTASH_VECTOR_REST_URL}",
5371
+ UPSTASH_VECTOR_REST_TOKEN: "${UPSTASH_VECTOR_REST_TOKEN}"
5372
+ }
5373
+ };
5374
+ let existing = {};
5375
+ if (fs9.existsSync(mcpPath)) {
5376
+ try {
5377
+ const raw = fs9.readFileSync(mcpPath, "utf8");
5378
+ existing = JSON.parse(raw);
5379
+ } catch {
5380
+ process.stderr.write("warning: .mcp.json exists but could not be parsed \u2014 skipping\n");
5381
+ return;
5382
+ }
5383
+ }
5384
+ const raw_servers = existing.mcpServers ?? {};
5385
+ const servers = typeof raw_servers === "object" && !Array.isArray(raw_servers) ? raw_servers : {};
5386
+ if (JSON.stringify(servers["searchsocket"]) === JSON.stringify(entry)) {
5387
+ return;
5388
+ }
5389
+ existing.mcpServers = { ...servers, searchsocket: entry };
5390
+ fs9.writeFileSync(mcpPath, JSON.stringify(existing, null, 2) + "\n", "utf8");
5391
+ }
5392
+ var HOOKS_SNIPPET = `import { searchsocketHandle } from "searchsocket/sveltekit";
5393
+
5394
+ export const handle = searchsocketHandle();`;
5395
+ var VITE_PLUGIN_SNIPPET = `import { searchsocketVitePlugin } from "searchsocket/sveltekit";
5396
+
5397
+ // Add to your Vite config plugins array:
5398
+ // plugins: [sveltekit(), searchsocketVitePlugin()]`;
5399
+ function injectHooksServerTs(cwd) {
5400
+ const hooksDir = path13.join(cwd, "src");
5401
+ const tsPath = path13.join(hooksDir, "hooks.server.ts");
5402
+ const jsPath = path13.join(hooksDir, "hooks.server.js");
5403
+ const hooksPath = fs9.existsSync(tsPath) ? tsPath : fs9.existsSync(jsPath) ? jsPath : null;
5404
+ if (!hooksPath) {
5405
+ fs9.mkdirSync(hooksDir, { recursive: true });
5406
+ fs9.writeFileSync(tsPath, HOOKS_SNIPPET + "\n", "utf8");
5407
+ return "created";
5408
+ }
5409
+ const original = fs9.readFileSync(hooksPath, "utf8");
5410
+ if (original.includes("searchsocketHandle")) {
5411
+ return "already-present";
5412
+ }
5413
+ try {
5414
+ const mod = parseModule(original);
5415
+ mod.imports.$append({
5416
+ from: "searchsocket/sveltekit",
5417
+ imported: "searchsocketHandle"
3058
5418
  });
3059
- });
5419
+ if (mod.exports.handle) {
5420
+ mod.imports.$append({
5421
+ from: "@sveltejs/kit/hooks",
5422
+ imported: "sequence"
5423
+ });
5424
+ const existingHandle = mod.exports.handle;
5425
+ mod.exports.handle = builders.functionCall(
5426
+ "sequence",
5427
+ builders.functionCall("searchsocketHandle"),
5428
+ existingHandle
5429
+ );
5430
+ const { code: code2 } = generateCode(mod);
5431
+ fs9.writeFileSync(hooksPath, code2, "utf8");
5432
+ return "composed";
5433
+ }
5434
+ mod.exports.handle = builders.functionCall("searchsocketHandle");
5435
+ const { code } = generateCode(mod);
5436
+ fs9.writeFileSync(hooksPath, code, "utf8");
5437
+ return "injected";
5438
+ } catch {
5439
+ return "fallback";
5440
+ }
3060
5441
  }
3061
- async function runMcpServer(options = {}) {
3062
- const config = await loadConfig({
3063
- cwd: options.cwd,
3064
- configPath: options.configPath
3065
- });
3066
- const resolvedTransport = options.transport ?? config.mcp.transport;
3067
- if (resolvedTransport === "stdio") {
3068
- redirectConsoleToStderr();
5442
+ function injectViteConfig(cwd) {
5443
+ const tsPath = path13.join(cwd, "vite.config.ts");
5444
+ const jsPath = path13.join(cwd, "vite.config.js");
5445
+ const configPath = fs9.existsSync(tsPath) ? tsPath : fs9.existsSync(jsPath) ? jsPath : null;
5446
+ if (!configPath) {
5447
+ return "no-config";
3069
5448
  }
3070
- const engine = await SearchEngine.create({
3071
- cwd: options.cwd,
3072
- configPath: options.configPath,
3073
- config
3074
- });
3075
- if (resolvedTransport === "http") {
3076
- await startHttpServer(() => createServer(engine), config, options);
5449
+ const original = fs9.readFileSync(configPath, "utf8");
5450
+ if (original.includes("searchsocketVitePlugin")) {
5451
+ return "already-present";
5452
+ }
5453
+ try {
5454
+ const mod = parseModule(original);
5455
+ mod.imports.$append({
5456
+ from: "searchsocket/sveltekit",
5457
+ imported: "searchsocketVitePlugin"
5458
+ });
5459
+ let config = mod.exports.default;
5460
+ if (!config) {
5461
+ return "fallback";
5462
+ }
5463
+ if (config.$type === "function-call") {
5464
+ config = config.$args[0];
5465
+ }
5466
+ if (!config.plugins) {
5467
+ config.plugins = [builders.functionCall("searchsocketVitePlugin")];
5468
+ } else {
5469
+ config.plugins.push(builders.functionCall("searchsocketVitePlugin"));
5470
+ }
5471
+ const { code } = generateCode(mod);
5472
+ fs9.writeFileSync(configPath, code, "utf8");
5473
+ return "injected";
5474
+ } catch {
5475
+ return "fallback";
5476
+ }
5477
+ }
5478
+ function writeEnvFile(cwd, url, token) {
5479
+ const envPath = path13.join(cwd, ".env");
5480
+ let content = "";
5481
+ if (fs9.existsSync(envPath)) {
5482
+ content = fs9.readFileSync(envPath, "utf8");
5483
+ }
5484
+ const lines = [];
5485
+ if (!content.includes("UPSTASH_VECTOR_REST_URL=")) {
5486
+ lines.push(`UPSTASH_VECTOR_REST_URL=${url}`);
5487
+ }
5488
+ if (!content.includes("UPSTASH_VECTOR_REST_TOKEN=")) {
5489
+ lines.push(`UPSTASH_VECTOR_REST_TOKEN=${token}`);
5490
+ }
5491
+ if (lines.length > 0) {
5492
+ const suffix = content.length > 0 && !content.endsWith("\n") ? "\n" : "";
5493
+ fs9.writeFileSync(envPath, content + suffix + lines.join("\n") + "\n", "utf8");
5494
+ }
5495
+ ensureGitignoreEntry(cwd, ".env");
5496
+ }
5497
+ function ensureGitignoreEntry(cwd, entry) {
5498
+ const gitignorePath = path13.join(cwd, ".gitignore");
5499
+ let content = "";
5500
+ if (fs9.existsSync(gitignorePath)) {
5501
+ content = fs9.readFileSync(gitignorePath, "utf8");
5502
+ }
5503
+ const lines = content.split("\n");
5504
+ if (lines.some((line) => line.trim() === entry)) {
3077
5505
  return;
3078
5506
  }
3079
- const server = createServer(engine);
3080
- const stdioTransport = new StdioServerTransport();
3081
- await server.connect(stdioTransport);
5507
+ const suffix = content.length > 0 && !content.endsWith("\n") ? "\n" : "";
5508
+ fs9.writeFileSync(gitignorePath, content + suffix + entry + "\n", "utf8");
5509
+ }
5510
+
5511
+ // src/add-helpers.ts
5512
+ import fs10 from "fs";
5513
+ import fsp from "fs/promises";
5514
+ import path14 from "path";
5515
+ import { fileURLToPath } from "url";
5516
+ var __dirname = path14.dirname(fileURLToPath(import.meta.url));
5517
+ var AVAILABLE_COMPONENTS = ["search-dialog", "search-input", "search-results"];
5518
+ function resolveTemplateDir() {
5519
+ return path14.resolve(__dirname, "templates");
5520
+ }
5521
+ function listAvailableComponents() {
5522
+ return [...AVAILABLE_COMPONENTS];
5523
+ }
5524
+ function isValidComponent(name) {
5525
+ return AVAILABLE_COMPONENTS.includes(name);
5526
+ }
5527
+ async function copyComponent(name, targetDir, options = {}) {
5528
+ const templateDir = path14.join(resolveTemplateDir(), name);
5529
+ if (!fs10.existsSync(templateDir)) {
5530
+ throw new Error(
5531
+ `Template directory not found: ${templateDir}. Run "pnpm run build" to generate templates.`
5532
+ );
5533
+ }
5534
+ const entries = await fsp.readdir(templateDir);
5535
+ const svelteFiles = entries.filter((f) => f.endsWith(".svelte"));
5536
+ if (svelteFiles.length === 0) {
5537
+ throw new Error(`No .svelte files found in template: ${name}`);
5538
+ }
5539
+ await fsp.mkdir(targetDir, { recursive: true });
5540
+ const written = [];
5541
+ const skipped = [];
5542
+ for (const file of svelteFiles) {
5543
+ const dest = path14.join(targetDir, file);
5544
+ if (fs10.existsSync(dest) && !options.overwrite) {
5545
+ skipped.push(dest);
5546
+ continue;
5547
+ }
5548
+ await fsp.copyFile(path14.join(templateDir, file), dest);
5549
+ written.push(dest);
5550
+ }
5551
+ return { written, skipped };
3082
5552
  }
3083
5553
 
3084
5554
  // src/cli.ts
@@ -3117,6 +5587,10 @@ function parseDurationMs(value) {
3117
5587
  }
3118
5588
  function printIndexSummary(stats) {
3119
5589
  process.stdout.write(`pages processed: ${stats.pagesProcessed}
5590
+ `);
5591
+ process.stdout.write(`pages changed: ${stats.pagesChanged}
5592
+ `);
5593
+ process.stdout.write(`pages deleted: ${stats.pagesDeleted}
3120
5594
  `);
3121
5595
  process.stdout.write(`chunks total: ${stats.chunksTotal}
3122
5596
  `);
@@ -3138,7 +5612,7 @@ function collectWatchPaths(config, cwd) {
3138
5612
  const paths = ["src/routes/**"];
3139
5613
  if (config.source.mode === "content-files" && config.source.contentFiles) {
3140
5614
  for (const pattern of config.source.contentFiles.globs) {
3141
- paths.push(path12.join(config.source.contentFiles.baseDir, pattern));
5615
+ paths.push(path15.join(config.source.contentFiles.baseDir, pattern));
3142
5616
  }
3143
5617
  }
3144
5618
  if (config.source.mode === "static-output") {
@@ -3151,22 +5625,22 @@ function collectWatchPaths(config, cwd) {
3151
5625
  paths.push("searchsocket.config.ts");
3152
5626
  paths.push(config.source.build.outputDir);
3153
5627
  }
3154
- return paths.map((value) => path12.resolve(cwd, value));
5628
+ return paths.map((value) => path15.resolve(cwd, value));
3155
5629
  }
3156
5630
  function ensureStateDir(cwd) {
3157
- const target = path12.join(cwd, ".searchsocket");
3158
- fs8.mkdirSync(target, { recursive: true });
5631
+ const target = path15.join(cwd, ".searchsocket");
5632
+ fs11.mkdirSync(target, { recursive: true });
3159
5633
  return target;
3160
5634
  }
3161
5635
  function ensureGitignore(cwd) {
3162
- const gitignorePath = path12.join(cwd, ".gitignore");
5636
+ const gitignorePath = path15.join(cwd, ".gitignore");
3163
5637
  const entries = [
3164
5638
  ".searchsocket/manifest.json",
3165
5639
  ".searchsocket/registry.json"
3166
5640
  ];
3167
5641
  let content = "";
3168
- if (fs8.existsSync(gitignorePath)) {
3169
- content = fs8.readFileSync(gitignorePath, "utf8");
5642
+ if (fs11.existsSync(gitignorePath)) {
5643
+ content = fs11.readFileSync(gitignorePath, "utf8");
3170
5644
  }
3171
5645
  const lines = content.split("\n");
3172
5646
  const missing = entries.filter((entry) => !lines.some((line) => line.trim() === entry));
@@ -3177,10 +5651,10 @@ function ensureGitignore(cwd) {
3177
5651
  # SearchSocket local state
3178
5652
  ${missing.join("\n")}
3179
5653
  `;
3180
- fs8.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
5654
+ fs11.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
3181
5655
  }
3182
5656
  function readScopesFromFile(filePath) {
3183
- const raw = fs8.readFileSync(filePath, "utf8");
5657
+ const raw = fs11.readFileSync(filePath, "utf8");
3184
5658
  return new Set(
3185
5659
  raw.split(/\r?\n/).map((line) => line.trim()).filter(Boolean)
3186
5660
  );
@@ -3204,8 +5678,8 @@ function readRemoteGitBranches(cwd) {
3204
5678
  }
3205
5679
  }
3206
5680
  async function loadResolvedConfigForDev(cwd, configPath) {
3207
- const resolvedConfigPath = path12.resolve(cwd, configPath ?? "searchsocket.config.ts");
3208
- if (fs8.existsSync(resolvedConfigPath)) {
5681
+ const resolvedConfigPath = path15.resolve(cwd, configPath ?? "searchsocket.config.ts");
5682
+ if (fs11.existsSync(resolvedConfigPath)) {
3209
5683
  return loadConfig({ cwd, configPath });
3210
5684
  }
3211
5685
  return mergeConfig(cwd, {});
@@ -3248,31 +5722,157 @@ async function runIndexCommand(opts) {
3248
5722
  printIndexSummary(stats);
3249
5723
  }
3250
5724
  }
3251
- var program = new Command();
3252
- program.name("searchsocket").description("Semantic site search and MCP retrieval for SvelteKit").version(package_default.version).option("-C, --cwd <path>", "working directory", process.cwd()).option("--config <path>", "config path (defaults to searchsocket.config.ts)");
3253
- program.command("init").description("Create searchsocket.config.ts and .searchsocket state directory").action(async (_opts, command) => {
3254
- const root = getRootOptions(command).cwd ?? process.cwd();
3255
- const cwd = path12.resolve(root);
5725
+ async function runInteractiveInit(cwd) {
5726
+ clack.intro("searchsocket setup");
5727
+ const s = clack.spinner();
5728
+ s.start("Creating config files");
5729
+ const configPath = writeMinimalConfig(cwd);
5730
+ ensureStateDir(cwd);
5731
+ ensureGitignore(cwd);
5732
+ ensureMcpJson(cwd);
5733
+ s.stop("Config files created");
5734
+ const hasUrl = Boolean(process.env.UPSTASH_VECTOR_REST_URL);
5735
+ const hasToken = Boolean(process.env.UPSTASH_VECTOR_REST_TOKEN);
5736
+ if (!hasUrl || !hasToken) {
5737
+ clack.log.warn("Upstash Search credentials not found in environment.");
5738
+ const shouldConfigure = await clack.confirm({
5739
+ message: "Would you like to configure Upstash credentials now?",
5740
+ initialValue: true
5741
+ });
5742
+ if (clack.isCancel(shouldConfigure)) {
5743
+ clack.cancel("Setup cancelled.");
5744
+ process.exit(0);
5745
+ }
5746
+ if (shouldConfigure) {
5747
+ const url = hasUrl ? process.env.UPSTASH_VECTOR_REST_URL : await clack.text({
5748
+ message: "Upstash Search REST URL:",
5749
+ placeholder: "https://your-index.upstash.io",
5750
+ validate: (v) => !v ? "URL is required" : void 0
5751
+ });
5752
+ if (clack.isCancel(url)) {
5753
+ clack.cancel("Setup cancelled.");
5754
+ process.exit(0);
5755
+ }
5756
+ const token = hasToken ? process.env.UPSTASH_VECTOR_REST_TOKEN : await clack.text({
5757
+ message: "Upstash Search REST Token:",
5758
+ placeholder: "AX...",
5759
+ validate: (v) => !v ? "Token is required" : void 0
5760
+ });
5761
+ if (clack.isCancel(token)) {
5762
+ clack.cancel("Setup cancelled.");
5763
+ process.exit(0);
5764
+ }
5765
+ writeEnvFile(cwd, url, token);
5766
+ clack.log.success("Credentials written to .env");
5767
+ }
5768
+ } else {
5769
+ clack.log.success("Upstash credentials found in environment.");
5770
+ }
5771
+ s.start("Configuring hooks.server.ts");
5772
+ const hookResult = injectHooksServerTs(cwd);
5773
+ s.stop("hooks.server.ts configured");
5774
+ switch (hookResult) {
5775
+ case "created":
5776
+ clack.log.success("Created src/hooks.server.ts with searchsocketHandle.");
5777
+ break;
5778
+ case "injected":
5779
+ clack.log.success("Added searchsocketHandle to src/hooks.server.ts.");
5780
+ break;
5781
+ case "composed":
5782
+ clack.log.success("Composed searchsocketHandle with existing handle using sequence().");
5783
+ break;
5784
+ case "already-present":
5785
+ clack.log.info("searchsocketHandle already configured in hooks.server.ts.");
5786
+ break;
5787
+ case "fallback":
5788
+ clack.log.warn("Could not auto-inject hooks.server.ts. Add manually:");
5789
+ clack.log.message(HOOKS_SNIPPET);
5790
+ break;
5791
+ }
5792
+ s.start("Configuring Vite plugin");
5793
+ const viteResult = injectViteConfig(cwd);
5794
+ s.stop("Vite plugin configured");
5795
+ switch (viteResult) {
5796
+ case "injected":
5797
+ clack.log.success("Added searchsocketVitePlugin to Vite config.");
5798
+ break;
5799
+ case "already-present":
5800
+ clack.log.info("searchsocketVitePlugin already in Vite config.");
5801
+ break;
5802
+ case "no-config":
5803
+ clack.log.warn("No vite.config.ts/js found. Add the plugin manually:");
5804
+ clack.log.message(VITE_PLUGIN_SNIPPET);
5805
+ break;
5806
+ case "fallback":
5807
+ clack.log.warn("Could not auto-inject Vite config. Add manually:");
5808
+ clack.log.message(VITE_PLUGIN_SNIPPET);
5809
+ break;
5810
+ }
5811
+ clack.log.info("Run `searchsocket doctor` to verify your setup.");
5812
+ clack.outro("SearchSocket initialized! Run `searchsocket index` to index your site.");
5813
+ }
5814
+ async function runSilentInit(cwd) {
3256
5815
  const configPath = writeMinimalConfig(cwd);
3257
5816
  const stateDir = ensureStateDir(cwd);
3258
5817
  ensureGitignore(cwd);
5818
+ ensureMcpJson(cwd);
3259
5819
  process.stdout.write(`created/verified config: ${configPath}
3260
5820
  `);
3261
5821
  process.stdout.write(`created/verified state dir: ${stateDir}
3262
-
3263
5822
  `);
3264
- process.stdout.write("SvelteKit hook snippet:\n\n");
3265
- process.stdout.write('import { searchsocketHandle } from "searchsocket/sveltekit";\n\n');
3266
- process.stdout.write("export const handle = searchsocketHandle();\n\n");
3267
- process.stdout.write("Optional build-triggered indexing plugin:\n\n");
3268
- process.stdout.write('import { searchsocketVitePlugin } from "searchsocket/sveltekit";\n\n');
3269
- process.stdout.write("// svelte.config.js / vite plugins:\n");
3270
- process.stdout.write("// searchsocketVitePlugin({ enabled: true, changedOnly: true })\n");
3271
- process.stdout.write("// or env-driven: SEARCHSOCKET_AUTO_INDEX=1 pnpm build\n");
5823
+ process.stdout.write("created/verified .mcp.json (MCP server config for Claude Code)\n\n");
5824
+ const hookResult = injectHooksServerTs(cwd);
5825
+ switch (hookResult) {
5826
+ case "created":
5827
+ process.stdout.write("created src/hooks.server.ts with searchsocketHandle\n");
5828
+ break;
5829
+ case "injected":
5830
+ process.stdout.write("added searchsocketHandle to src/hooks.server.ts\n");
5831
+ break;
5832
+ case "composed":
5833
+ process.stdout.write("composed searchsocketHandle with existing handle via sequence()\n");
5834
+ break;
5835
+ case "already-present":
5836
+ process.stdout.write("searchsocketHandle already present in hooks.server.ts\n");
5837
+ break;
5838
+ case "fallback":
5839
+ process.stdout.write("could not auto-inject hooks.server.ts \u2014 add manually:\n\n");
5840
+ process.stdout.write(HOOKS_SNIPPET + "\n\n");
5841
+ break;
5842
+ }
5843
+ const viteResult = injectViteConfig(cwd);
5844
+ switch (viteResult) {
5845
+ case "injected":
5846
+ process.stdout.write("added searchsocketVitePlugin to Vite config\n");
5847
+ break;
5848
+ case "already-present":
5849
+ process.stdout.write("searchsocketVitePlugin already in Vite config\n");
5850
+ break;
5851
+ case "no-config":
5852
+ process.stdout.write("no vite.config.ts/js found \u2014 add plugin manually:\n\n");
5853
+ process.stdout.write(VITE_PLUGIN_SNIPPET + "\n\n");
5854
+ break;
5855
+ case "fallback":
5856
+ process.stdout.write("could not auto-inject Vite config \u2014 add manually:\n\n");
5857
+ process.stdout.write(VITE_PLUGIN_SNIPPET + "\n\n");
5858
+ break;
5859
+ }
5860
+ }
5861
+ var program = new Command();
5862
+ program.name("searchsocket").description("Semantic site search and MCP retrieval for SvelteKit").version(package_default.version).option("-C, --cwd <path>", "working directory", process.cwd()).option("--config <path>", "config path (defaults to searchsocket.config.ts)");
5863
+ program.command("init").description("Initialize SearchSocket in a SvelteKit project").option("--non-interactive", "skip interactive prompts").action(async (opts, command) => {
5864
+ const root = getRootOptions(command).cwd ?? process.cwd();
5865
+ const cwd = path15.resolve(root);
5866
+ const isInteractive = Boolean(process.stdout.isTTY) && !opts.nonInteractive;
5867
+ if (isInteractive) {
5868
+ await runInteractiveInit(cwd);
5869
+ } else {
5870
+ await runSilentInit(cwd);
5871
+ }
3272
5872
  });
3273
5873
  program.command("index").description("Index site content into Upstash Search").option("--scope <name>", "scope override").option("--changed-only", "only process changed chunks", true).option("--no-changed-only", "re-index regardless of previous manifest").option("--force", "force full rebuild", false).option("--dry-run", "compute plan, no writes", false).option("--source <mode>", "source mode override: static-output|crawl|content-files|build").option("--max-pages <n>", "limit pages processed").option("--max-chunks <n>", "limit chunks processed").option("--quiet", "suppress all output except errors and warnings", false).option("--verbose", "verbose output", false).option("--json", "emit JSON logs and summary", false).action(async (opts, command) => {
3274
5874
  const rootOpts = getRootOptions(command);
3275
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
5875
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3276
5876
  await runIndexCommand({
3277
5877
  cwd,
3278
5878
  configPath: rootOpts?.config,
@@ -3290,7 +5890,7 @@ program.command("index").description("Index site content into Upstash Search").o
3290
5890
  });
3291
5891
  program.command("status").description("Show scope, indexing state, and backend health").option("--scope <name>", "scope override").action(async (opts, command) => {
3292
5892
  const rootOpts = getRootOptions(command);
3293
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
5893
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3294
5894
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3295
5895
  const scope = resolveScope(config, opts.scope);
3296
5896
  let store;
@@ -3329,7 +5929,7 @@ program.command("status").description("Show scope, indexing state, and backend h
3329
5929
  `);
3330
5930
  process.stdout.write(`resolved scope: ${scope.scopeName}
3331
5931
  `);
3332
- process.stdout.write(`backend: upstash-search
5932
+ process.stdout.write(`backend: upstash-vector
3333
5933
  `);
3334
5934
  process.stdout.write(`backend health: ${health.ok ? "ok" : `error (${health.details ?? "n/a"})`}
3335
5935
  `);
@@ -3354,19 +5954,31 @@ program.command("status").description("Show scope, indexing state, and backend h
3354
5954
  }
3355
5955
  }
3356
5956
  });
3357
- program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
5957
+ program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--playground", "serve playground UI at /_searchsocket (default: true)", true).option("--no-playground", "disable playground UI").option("--playground-port <n>", "playground HTTP port", "3337").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
3358
5958
  const rootOpts = getRootOptions(command);
3359
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
5959
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3360
5960
  const config = await loadResolvedConfigForDev(cwd, rootOpts?.config);
3361
5961
  const watchPaths = collectWatchPaths(config, cwd);
3362
5962
  process.stdout.write("starting searchsocket dev watcher...\n");
3363
5963
  process.stdout.write(`watching:
3364
5964
  ${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
3365
5965
  `);
5966
+ const upstashUrl = config.upstash.url ?? process.env[config.upstash.urlEnv];
5967
+ const upstashToken = config.upstash.token ?? process.env[config.upstash.tokenEnv];
5968
+ const backendMissing = !upstashUrl || !upstashToken;
5969
+ if (backendMissing) {
5970
+ process.stdout.write(
5971
+ `Search backend not configured \u2014 set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} to enable indexing. Watching for file changes only.
5972
+ `
5973
+ );
5974
+ }
3366
5975
  let running = false;
3367
5976
  let pending = false;
3368
5977
  let timer = null;
3369
5978
  const run = async () => {
5979
+ if (backendMissing) {
5980
+ return;
5981
+ }
3370
5982
  if (running) {
3371
5983
  pending = true;
3372
5984
  return;
@@ -3417,18 +6029,40 @@ ${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
3417
6029
  httpPath: opts.mcpPath
3418
6030
  });
3419
6031
  }
6032
+ let closePlayground;
6033
+ if (opts.playground) {
6034
+ if (backendMissing) {
6035
+ process.stdout.write("playground disabled: search backend not configured\n");
6036
+ } else {
6037
+ void runPlaygroundServer({
6038
+ cwd,
6039
+ configPath: rootOpts?.config,
6040
+ config,
6041
+ port: parsePositiveInt(opts.playgroundPort, "--playground-port")
6042
+ }).then(({ port, close }) => {
6043
+ closePlayground = close;
6044
+ process.stdout.write(`playground available at http://127.0.0.1:${port}/_searchsocket
6045
+ `);
6046
+ }).catch((err) => {
6047
+ process.stderr.write(`playground error: ${err instanceof Error ? err.message : String(err)}
6048
+ `);
6049
+ });
6050
+ }
6051
+ }
3420
6052
  await new Promise((resolve) => {
3421
6053
  process.on("SIGINT", () => {
3422
- void watcher.close().then(() => resolve());
6054
+ const cleanups = [watcher.close()];
6055
+ if (closePlayground) cleanups.push(closePlayground());
6056
+ void Promise.all(cleanups).then(() => resolve());
3423
6057
  });
3424
6058
  });
3425
6059
  });
3426
6060
  program.command("clean").description("Delete local state and optionally delete remote indexes for a scope").option("--scope <name>", "scope override").option("--remote", "delete remote scope indexes", false).action(async (opts, command) => {
3427
6061
  const rootOpts = getRootOptions(command);
3428
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6062
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3429
6063
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3430
- const statePath = path12.join(cwd, config.state.dir);
3431
- await fsp.rm(statePath, { recursive: true, force: true });
6064
+ const statePath = path15.join(cwd, config.state.dir);
6065
+ await fsp2.rm(statePath, { recursive: true, force: true });
3432
6066
  process.stdout.write(`deleted local state directory: ${statePath}
3433
6067
  `);
3434
6068
  if (opts.remote) {
@@ -3440,7 +6074,7 @@ program.command("clean").description("Delete local state and optionally delete r
3440
6074
  });
3441
6075
  program.command("prune").description("List/delete stale scopes (dry-run by default)").option("--apply", "apply deletions", false).option("--scopes-file <path>", "file containing active scopes").option("--older-than <duration>", "ttl cutoff like 30d").action(async (opts, command) => {
3442
6076
  const rootOpts = getRootOptions(command);
3443
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6077
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3444
6078
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3445
6079
  const baseScope = resolveScope(config);
3446
6080
  let store;
@@ -3450,17 +6084,17 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
3450
6084
  scopes = await store.listScopes(config.project.id);
3451
6085
  } catch (error) {
3452
6086
  process.stderr.write(
3453
- `error: failed to access Upstash Search: ${error instanceof Error ? error.message : String(error)}
6087
+ `error: failed to access Upstash Vector: ${error instanceof Error ? error.message : String(error)}
3454
6088
  `
3455
6089
  );
3456
6090
  process.exitCode = 1;
3457
6091
  return;
3458
6092
  }
3459
- process.stdout.write(`using Upstash Search
6093
+ process.stdout.write(`using Upstash Vector
3460
6094
  `);
3461
6095
  let keepScopes = /* @__PURE__ */ new Set();
3462
6096
  if (opts.scopesFile) {
3463
- keepScopes = readScopesFromFile(path12.resolve(cwd, opts.scopesFile));
6097
+ keepScopes = readScopesFromFile(path15.resolve(cwd, opts.scopesFile));
3464
6098
  } else {
3465
6099
  keepScopes = readRemoteGitBranches(cwd);
3466
6100
  }
@@ -3531,7 +6165,7 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
3531
6165
  });
3532
6166
  program.command("doctor").description("Validate config, env vars, provider connectivity, and local write access").action(async (_opts, command) => {
3533
6167
  const rootOpts = getRootOptions(command);
3534
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6168
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3535
6169
  const checks = [];
3536
6170
  let config = null;
3537
6171
  try {
@@ -3558,8 +6192,8 @@ program.command("doctor").description("Validate config, env vars, provider conne
3558
6192
  details: upstashToken ? void 0 : "missing"
3559
6193
  });
3560
6194
  if (config.source.mode === "static-output") {
3561
- const outputDir = path12.resolve(cwd, config.source.staticOutputDir);
3562
- const exists = fs8.existsSync(outputDir);
6195
+ const outputDir = path15.resolve(cwd, config.source.staticOutputDir);
6196
+ const exists = fs11.existsSync(outputDir);
3563
6197
  checks.push({
3564
6198
  name: "source: static output dir",
3565
6199
  ok: exists,
@@ -3568,15 +6202,15 @@ program.command("doctor").description("Validate config, env vars, provider conne
3568
6202
  } else if (config.source.mode === "build") {
3569
6203
  const buildConfig = config.source.build;
3570
6204
  if (buildConfig) {
3571
- const manifestPath = path12.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
3572
- const manifestExists = fs8.existsSync(manifestPath);
6205
+ const manifestPath = path15.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
6206
+ const manifestExists = fs11.existsSync(manifestPath);
3573
6207
  checks.push({
3574
6208
  name: "source: build manifest",
3575
6209
  ok: manifestExists,
3576
6210
  details: manifestExists ? manifestPath : `${manifestPath} not found (run \`vite build\` first)`
3577
6211
  });
3578
- const viteBin = path12.resolve(cwd, "node_modules", ".bin", "vite");
3579
- const viteExists = fs8.existsSync(viteBin);
6212
+ const viteBin = path15.resolve(cwd, "node_modules", ".bin", "vite");
6213
+ const viteExists = fs11.existsSync(viteBin);
3580
6214
  checks.push({
3581
6215
  name: "source: vite binary",
3582
6216
  ok: viteExists,
@@ -3593,7 +6227,7 @@ program.command("doctor").description("Validate config, env vars, provider conne
3593
6227
  const contentConfig = config.source.contentFiles;
3594
6228
  if (contentConfig) {
3595
6229
  const fg4 = await import("fast-glob");
3596
- const baseDir = path12.resolve(cwd, contentConfig.baseDir);
6230
+ const baseDir = path15.resolve(cwd, contentConfig.baseDir);
3597
6231
  const files = await fg4.default(contentConfig.globs, { cwd: baseDir, onlyFiles: true });
3598
6232
  checks.push({
3599
6233
  name: "source: content files",
@@ -3627,9 +6261,9 @@ program.command("doctor").description("Validate config, env vars, provider conne
3627
6261
  try {
3628
6262
  const scope = resolveScope(config);
3629
6263
  const { statePath } = ensureStateDirs(cwd, config.state.dir, scope);
3630
- const testPath = path12.join(statePath, ".write-test");
3631
- await fsp.writeFile(testPath, "ok\n", "utf8");
3632
- await fsp.rm(testPath, { force: true });
6264
+ const testPath = path15.join(statePath, ".write-test");
6265
+ await fsp2.writeFile(testPath, "ok\n", "utf8");
6266
+ await fsp2.rm(testPath, { force: true });
3633
6267
  checks.push({ name: "state directory writable", ok: true });
3634
6268
  } catch (error) {
3635
6269
  checks.push({
@@ -3654,20 +6288,22 @@ program.command("doctor").description("Validate config, env vars, provider conne
3654
6288
  process.exitCode = 1;
3655
6289
  }
3656
6290
  });
3657
- program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").action(async (opts, command) => {
6291
+ program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").addOption(new Option("--access <mode>", "access mode").choices(["public", "private"])).option("--api-key <key>", "API key for public access mode").action(async (opts, command) => {
3658
6292
  const rootOpts = getRootOptions(command);
3659
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6293
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3660
6294
  await runMcpServer({
3661
6295
  cwd,
3662
6296
  configPath: rootOpts?.config,
3663
6297
  transport: opts.transport,
3664
6298
  httpPort: parsePositiveInt(opts.port, "--port"),
3665
- httpPath: opts.path
6299
+ httpPath: opts.path,
6300
+ access: opts.access,
6301
+ apiKey: opts.apiKey
3666
6302
  });
3667
6303
  });
3668
6304
  program.command("search").description("Quick CLI search against Upstash Search").requiredOption("--q <query>", "search query").option("--scope <name>", "scope override").option("--top-k <n>", "top K results", "10").option("--path-prefix <prefix>", "path prefix filter").action(async (opts, command) => {
3669
6305
  const rootOpts = getRootOptions(command);
3670
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6306
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3671
6307
  const engine = await SearchEngine.create({
3672
6308
  cwd,
3673
6309
  configPath: rootOpts?.config
@@ -3681,8 +6317,156 @@ program.command("search").description("Quick CLI search against Upstash Search")
3681
6317
  process.stdout.write(`${JSON.stringify(result, null, 2)}
3682
6318
  `);
3683
6319
  });
6320
+ program.command("test").description("Run search quality assertions against the live index").option("--file <path>", "path to test file", "searchsocket.test.json").option("--scope <name>", "scope override").option("--top-k <n>", "results per query", "10").action(async (opts, command) => {
6321
+ const rootOpts = getRootOptions(command);
6322
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
6323
+ const topK = parsePositiveInt(opts.topK, "--top-k");
6324
+ const filePath = path15.resolve(cwd, opts.file);
6325
+ let rawContent;
6326
+ try {
6327
+ rawContent = await fsp2.readFile(filePath, "utf8");
6328
+ } catch {
6329
+ process.stderr.write(`error: test file not found: ${filePath}
6330
+ `);
6331
+ process.exitCode = 1;
6332
+ return;
6333
+ }
6334
+ let rawJson;
6335
+ try {
6336
+ rawJson = JSON.parse(rawContent);
6337
+ } catch {
6338
+ process.stderr.write(`error: invalid JSON in ${filePath}
6339
+ `);
6340
+ process.exitCode = 1;
6341
+ return;
6342
+ }
6343
+ const parsed = testFileSchema.safeParse(rawJson);
6344
+ if (!parsed.success) {
6345
+ process.stderr.write(`error: invalid test file: ${parsed.error.issues[0]?.message ?? "unknown error"}
6346
+ `);
6347
+ process.exitCode = 1;
6348
+ return;
6349
+ }
6350
+ const testCases = parsed.data;
6351
+ const engine = await SearchEngine.create({
6352
+ cwd,
6353
+ configPath: rootOpts?.config
6354
+ });
6355
+ let passed = 0;
6356
+ let failed = 0;
6357
+ const mrrData = [];
6358
+ for (const tc of testCases) {
6359
+ let results;
6360
+ try {
6361
+ const response = await engine.search({
6362
+ q: tc.query,
6363
+ topK,
6364
+ scope: opts.scope
6365
+ });
6366
+ results = response.results;
6367
+ } catch (error) {
6368
+ const msg = error instanceof Error ? error.message : String(error);
6369
+ process.stdout.write(`FAIL "${tc.query}" \u2192 search error: ${msg}
6370
+ `);
6371
+ failed++;
6372
+ continue;
6373
+ }
6374
+ if (tc.expect.topResult !== void 0) {
6375
+ const expectedUrl = tc.expect.topResult;
6376
+ const rank = results.findIndex((r) => r.url === expectedUrl) + 1;
6377
+ mrrData.push({ results, relevant: [expectedUrl] });
6378
+ if (rank === 1) {
6379
+ process.stdout.write(`PASS "${tc.query}" \u2192 ${expectedUrl} at rank 1
6380
+ `);
6381
+ passed++;
6382
+ } else {
6383
+ const detail = rank === 0 ? "not found" : `got rank ${rank}`;
6384
+ process.stdout.write(`FAIL "${tc.query}" \u2192 expected ${expectedUrl} at rank 1, ${detail}
6385
+ `);
6386
+ failed++;
6387
+ }
6388
+ }
6389
+ if (tc.expect.inTop5 !== void 0) {
6390
+ const expectedUrls = tc.expect.inTop5;
6391
+ const top5Urls = results.slice(0, 5).map((r) => r.url);
6392
+ const missing = expectedUrls.filter((url) => !top5Urls.includes(url));
6393
+ mrrData.push({ results, relevant: expectedUrls });
6394
+ if (missing.length === 0) {
6395
+ process.stdout.write(`PASS "${tc.query}" \u2192 all expected URLs in top 5
6396
+ `);
6397
+ passed++;
6398
+ } else {
6399
+ const missingDetail = missing.map((url) => {
6400
+ const rank = results.findIndex((r) => r.url === url) + 1;
6401
+ return rank === 0 ? `${url} (not found)` : `${url} (rank ${rank})`;
6402
+ }).join(", ");
6403
+ process.stdout.write(`FAIL "${tc.query}" \u2192 missing from top 5: ${missingDetail}
6404
+ `);
6405
+ failed++;
6406
+ }
6407
+ }
6408
+ if (tc.expect.maxResults !== void 0) {
6409
+ const max = tc.expect.maxResults;
6410
+ const actual = results.length;
6411
+ if (actual <= max) {
6412
+ process.stdout.write(`PASS "${tc.query}" \u2192 ${actual} results (max ${max})
6413
+ `);
6414
+ passed++;
6415
+ } else {
6416
+ process.stdout.write(`FAIL "${tc.query}" \u2192 expected at most ${max} results, got ${actual}
6417
+ `);
6418
+ failed++;
6419
+ }
6420
+ }
6421
+ }
6422
+ const total = passed + failed;
6423
+ process.stdout.write(`
6424
+ results: ${passed} passed, ${failed} failed of ${total} assertions
6425
+ `);
6426
+ if (mrrData.length > 0) {
6427
+ const mrrValue = mrr(mrrData);
6428
+ process.stdout.write(`MRR: ${mrrValue.toFixed(4)}
6429
+ `);
6430
+ }
6431
+ process.stdout.write(`pass rate: ${total > 0 ? (passed / total * 100).toFixed(1) : "0.0"}%
6432
+ `);
6433
+ if (failed > 0) {
6434
+ process.exitCode = 1;
6435
+ }
6436
+ });
6437
+ program.command("add <component>").description("Copy a Svelte 5 search component template into your project").option("--dir <path>", "output directory", "src/lib/components/search").option("--overwrite", "overwrite existing files", false).action(async (component, opts, command) => {
6438
+ const root = getRootOptions(command).cwd ?? process.cwd();
6439
+ const cwd = path15.resolve(root);
6440
+ if (!isValidComponent(component)) {
6441
+ const available = listAvailableComponents();
6442
+ process.stderr.write(`unknown component: ${component}
6443
+ `);
6444
+ process.stderr.write(`available components: ${available.join(", ")}
6445
+ `);
6446
+ process.exit(1);
6447
+ }
6448
+ const targetDir = path15.resolve(cwd, opts.dir);
6449
+ const result = await copyComponent(component, targetDir, { overwrite: opts.overwrite });
6450
+ for (const filePath of result.written) {
6451
+ process.stdout.write(`created: ${path15.relative(cwd, filePath)}
6452
+ `);
6453
+ }
6454
+ for (const filePath of result.skipped) {
6455
+ process.stdout.write(`skipped (exists): ${path15.relative(cwd, filePath)}
6456
+ `);
6457
+ }
6458
+ const firstWritten = result.written[0];
6459
+ if (firstWritten) {
6460
+ process.stdout.write(`
6461
+ Usage:
6462
+ `);
6463
+ const fileName = path15.basename(firstWritten, ".svelte");
6464
+ process.stdout.write(` import ${fileName} from "${path15.relative(cwd, firstWritten).replace(/\\/g, "/")}";
6465
+ `);
6466
+ }
6467
+ });
3684
6468
  async function main() {
3685
- dotenvConfig({ path: path12.resolve(process.cwd(), ".env") });
6469
+ dotenvConfig({ path: path15.resolve(process.cwd(), ".env") });
3686
6470
  await program.parseAsync(process.argv);
3687
6471
  }
3688
6472
  main().catch((error) => {