searchsocket 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,18 +1,18 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/cli.ts
4
- import fs8 from "fs";
5
- import fsp from "fs/promises";
6
- import path12 from "path";
4
+ import fs11 from "fs";
5
+ import fsp2 from "fs/promises";
6
+ import path15 from "path";
7
7
  import { execSync as execSync2 } from "child_process";
8
8
  import { config as dotenvConfig } from "dotenv";
9
9
  import chokidar from "chokidar";
10
- import { Command } from "commander";
10
+ import { Command, Option } from "commander";
11
11
 
12
12
  // package.json
13
13
  var package_default = {
14
14
  name: "searchsocket",
15
- version: "0.5.0",
15
+ version: "0.6.1",
16
16
  description: "Semantic site search and MCP retrieval for SvelteKit static sites",
17
17
  license: "MIT",
18
18
  author: "Greg Priday <greg@siteorigin.com>",
@@ -38,6 +38,7 @@ var package_default = {
38
38
  files: [
39
39
  "dist",
40
40
  "!dist/**/*.map",
41
+ "src/svelte",
41
42
  "README.md"
42
43
  ],
43
44
  bin: {
@@ -63,6 +64,19 @@ var package_default = {
63
64
  types: "./dist/scroll.d.ts",
64
65
  import: "./dist/scroll.js",
65
66
  require: "./dist/scroll.cjs"
67
+ },
68
+ "./svelte": {
69
+ types: "./src/svelte/index.svelte.ts",
70
+ svelte: "./src/svelte/index.svelte.ts",
71
+ default: "./src/svelte/index.svelte.ts"
72
+ }
73
+ },
74
+ peerDependencies: {
75
+ svelte: "^5.0.0"
76
+ },
77
+ peerDependenciesMeta: {
78
+ svelte: {
79
+ optional: true
66
80
  }
67
81
  },
68
82
  scripts: {
@@ -78,8 +92,9 @@ var package_default = {
78
92
  },
79
93
  packageManager: "pnpm@10.29.2",
80
94
  dependencies: {
95
+ "@clack/prompts": "^1.2.0",
81
96
  "@modelcontextprotocol/sdk": "^1.26.0",
82
- "@upstash/search": "^0.1.7",
97
+ "@upstash/vector": "^1.2.3",
83
98
  cheerio: "^1.2.0",
84
99
  chokidar: "^5.0.0",
85
100
  commander: "^14.0.3",
@@ -88,16 +103,19 @@ var package_default = {
88
103
  "fast-glob": "^3.3.3",
89
104
  "gray-matter": "^4.0.3",
90
105
  jiti: "^2.6.1",
106
+ magicast: "^0.5.2",
91
107
  "p-limit": "^7.3.0",
92
108
  turndown: "^7.2.2",
93
109
  "turndown-plugin-gfm": "^1.0.2",
94
110
  zod: "^4.3.6"
95
111
  },
96
112
  devDependencies: {
113
+ "@sveltejs/vite-plugin-svelte": "^6.2.4",
97
114
  "@types/express": "^5.0.6",
98
115
  "@types/node": "^25.2.2",
99
116
  "@types/turndown": "^5.0.6",
100
117
  jsdom: "^28.1.0",
118
+ svelte: "^5.55.1",
101
119
  tsup: "^8.5.1",
102
120
  typescript: "^5.9.3",
103
121
  vitest: "^4.0.18"
@@ -154,6 +172,7 @@ var searchSocketConfigSchema = z.object({
154
172
  dropSelectors: z.array(z.string()).optional(),
155
173
  ignoreAttr: z.string().optional(),
156
174
  noindexAttr: z.string().optional(),
175
+ imageDescAttr: z.string().optional(),
157
176
  respectRobotsNoindex: z.boolean().optional()
158
177
  }).optional(),
159
178
  transform: z.object({
@@ -169,35 +188,48 @@ var searchSocketConfigSchema = z.object({
169
188
  headingPathDepth: z.number().int().positive().optional(),
170
189
  dontSplitInside: z.array(z.enum(["code", "table", "blockquote"])).optional(),
171
190
  prependTitle: z.boolean().optional(),
172
- pageSummaryChunk: z.boolean().optional()
191
+ pageSummaryChunk: z.boolean().optional(),
192
+ weightHeadings: z.boolean().optional()
173
193
  }).optional(),
174
194
  upstash: z.object({
175
195
  url: z.string().url().optional(),
176
196
  token: z.string().min(1).optional(),
177
197
  urlEnv: z.string().min(1).optional(),
178
- tokenEnv: z.string().min(1).optional()
198
+ tokenEnv: z.string().min(1).optional(),
199
+ namespaces: z.object({
200
+ pages: z.string().min(1).optional(),
201
+ chunks: z.string().min(1).optional()
202
+ }).optional()
203
+ }).optional(),
204
+ embedding: z.object({
205
+ model: z.string().optional(),
206
+ dimensions: z.number().int().positive().optional(),
207
+ taskType: z.string().optional(),
208
+ batchSize: z.number().int().positive().optional()
179
209
  }).optional(),
180
210
  search: z.object({
181
- semanticWeight: z.number().min(0).max(1).optional(),
182
- inputEnrichment: z.boolean().optional(),
183
- reranking: z.boolean().optional(),
184
211
  dualSearch: z.boolean().optional(),
185
212
  pageSearchWeight: z.number().min(0).max(1).optional()
186
213
  }).optional(),
187
214
  ranking: z.object({
188
215
  enableIncomingLinkBoost: z.boolean().optional(),
189
216
  enableDepthBoost: z.boolean().optional(),
217
+ enableFreshnessBoost: z.boolean().optional(),
218
+ freshnessDecayRate: z.number().positive().optional(),
219
+ enableAnchorTextBoost: z.boolean().optional(),
190
220
  pageWeights: z.record(z.string(), z.number().min(0)).optional(),
191
221
  aggregationCap: z.number().int().positive().optional(),
192
222
  aggregationDecay: z.number().min(0).max(1).optional(),
193
223
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
194
- minScore: z.number().min(0).max(1).optional(),
224
+ minScoreRatio: z.number().min(0).max(1).optional(),
195
225
  scoreGapThreshold: z.number().min(0).max(1).optional(),
196
226
  weights: z.object({
197
227
  incomingLinks: z.number().optional(),
198
228
  depth: z.number().optional(),
199
229
  aggregation: z.number().optional(),
200
- titleMatch: z.number().optional()
230
+ titleMatch: z.number().optional(),
231
+ freshness: z.number().optional(),
232
+ anchorText: z.number().optional()
201
233
  }).optional()
202
234
  }).optional(),
203
235
  api: z.object({
@@ -212,12 +244,28 @@ var searchSocketConfigSchema = z.object({
212
244
  }).optional(),
213
245
  mcp: z.object({
214
246
  enable: z.boolean().optional(),
247
+ access: z.enum(["public", "private"]).optional(),
215
248
  transport: z.enum(["stdio", "http"]).optional(),
216
249
  http: z.object({
217
250
  port: z.number().int().positive().optional(),
218
- path: z.string().optional()
251
+ path: z.string().optional(),
252
+ apiKey: z.string().min(1).optional(),
253
+ apiKeyEnv: z.string().min(1).optional()
254
+ }).optional(),
255
+ handle: z.object({
256
+ path: z.string().optional(),
257
+ apiKey: z.string().min(1).optional(),
258
+ enableJsonResponse: z.boolean().optional()
219
259
  }).optional()
220
260
  }).optional(),
261
+ llmsTxt: z.object({
262
+ enable: z.boolean().optional(),
263
+ outputPath: z.string().optional(),
264
+ title: z.string().optional(),
265
+ description: z.string().optional(),
266
+ generateFull: z.boolean().optional(),
267
+ serveMarkdownVariants: z.boolean().optional()
268
+ }).optional(),
221
269
  state: z.object({
222
270
  dir: z.string().optional()
223
271
  }).optional()
@@ -256,6 +304,7 @@ function createDefaultConfig(projectId) {
256
304
  dropSelectors: DEFAULT_DROP_SELECTORS,
257
305
  ignoreAttr: "data-search-ignore",
258
306
  noindexAttr: "data-search-noindex",
307
+ imageDescAttr: "data-search-description",
259
308
  respectRobotsNoindex: true
260
309
  },
261
310
  transform: {
@@ -265,39 +314,52 @@ function createDefaultConfig(projectId) {
265
314
  },
266
315
  chunking: {
267
316
  strategy: "hybrid",
268
- maxChars: 2200,
317
+ maxChars: 1500,
269
318
  overlapChars: 200,
270
319
  minChars: 250,
271
320
  headingPathDepth: 3,
272
321
  dontSplitInside: ["code", "table", "blockquote"],
273
322
  prependTitle: true,
274
- pageSummaryChunk: true
323
+ pageSummaryChunk: true,
324
+ weightHeadings: true
275
325
  },
276
326
  upstash: {
277
- urlEnv: "UPSTASH_SEARCH_REST_URL",
278
- tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
327
+ urlEnv: "UPSTASH_VECTOR_REST_URL",
328
+ tokenEnv: "UPSTASH_VECTOR_REST_TOKEN",
329
+ namespaces: {
330
+ pages: "pages",
331
+ chunks: "chunks"
332
+ }
333
+ },
334
+ embedding: {
335
+ model: "bge-large-en-v1.5",
336
+ dimensions: 1024,
337
+ taskType: "RETRIEVAL_DOCUMENT",
338
+ batchSize: 100
279
339
  },
280
340
  search: {
281
- semanticWeight: 0.75,
282
- inputEnrichment: true,
283
- reranking: true,
284
341
  dualSearch: true,
285
342
  pageSearchWeight: 0.3
286
343
  },
287
344
  ranking: {
288
345
  enableIncomingLinkBoost: true,
289
346
  enableDepthBoost: true,
347
+ enableFreshnessBoost: false,
348
+ freshnessDecayRate: 1e-3,
349
+ enableAnchorTextBoost: false,
290
350
  pageWeights: {},
291
351
  aggregationCap: 5,
292
352
  aggregationDecay: 0.5,
293
353
  minChunkScoreRatio: 0.5,
294
- minScore: 0.3,
354
+ minScoreRatio: 0.7,
295
355
  scoreGapThreshold: 0.4,
296
356
  weights: {
297
357
  incomingLinks: 0.05,
298
358
  depth: 0.03,
299
359
  aggregation: 0.1,
300
- titleMatch: 0.15
360
+ titleMatch: 0.15,
361
+ freshness: 0.1,
362
+ anchorText: 0.1
301
363
  }
302
364
  },
303
365
  api: {
@@ -308,12 +370,23 @@ function createDefaultConfig(projectId) {
308
370
  },
309
371
  mcp: {
310
372
  enable: process.env.NODE_ENV !== "production",
373
+ access: "private",
311
374
  transport: "stdio",
312
375
  http: {
313
376
  port: 3338,
314
377
  path: "/mcp"
378
+ },
379
+ handle: {
380
+ path: "/api/mcp",
381
+ enableJsonResponse: true
315
382
  }
316
383
  },
384
+ llmsTxt: {
385
+ enable: false,
386
+ outputPath: "static/llms.txt",
387
+ generateFull: true,
388
+ serveMarkdownVariants: false
389
+ },
317
390
  state: {
318
391
  dir: ".searchsocket"
319
392
  }
@@ -425,7 +498,15 @@ ${issues}`
425
498
  },
426
499
  upstash: {
427
500
  ...defaults.upstash,
428
- ...parsed.upstash
501
+ ...parsed.upstash,
502
+ namespaces: {
503
+ ...defaults.upstash.namespaces,
504
+ ...parsed.upstash?.namespaces
505
+ }
506
+ },
507
+ embedding: {
508
+ ...defaults.embedding,
509
+ ...parsed.embedding
429
510
  },
430
511
  search: {
431
512
  ...defaults.search,
@@ -462,8 +543,16 @@ ${issues}`
462
543
  http: {
463
544
  ...defaults.mcp.http,
464
545
  ...parsed.mcp?.http
546
+ },
547
+ handle: {
548
+ ...defaults.mcp.handle,
549
+ ...parsed.mcp?.handle
465
550
  }
466
551
  },
552
+ llmsTxt: {
553
+ ...defaults.llmsTxt,
554
+ ...parsed.llmsTxt
555
+ },
467
556
  state: {
468
557
  ...defaults.state,
469
558
  ...parsed.state
@@ -483,6 +572,15 @@ ${issues}`
483
572
  maxDepth: 10
484
573
  };
485
574
  }
575
+ if (merged.mcp.access === "public") {
576
+ const resolvedKey = merged.mcp.http.apiKey ?? (merged.mcp.http.apiKeyEnv ? process.env[merged.mcp.http.apiKeyEnv] : void 0);
577
+ if (!resolvedKey) {
578
+ throw new SearchSocketError(
579
+ "CONFIG_MISSING",
580
+ '`mcp.access` is "public" but no API key is configured. Set `mcp.http.apiKey` or `mcp.http.apiKeyEnv`.'
581
+ );
582
+ }
583
+ }
486
584
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
487
585
  throw new SearchSocketError("CONFIG_MISSING", "`source.crawl.baseUrl` is required when source.mode is crawl.");
488
586
  }
@@ -521,8 +619,8 @@ function writeMinimalConfig(cwd) {
521
619
  return target;
522
620
  }
523
621
  const content = `export default {
524
- // Upstash Search credentials (set via env vars or directly here)
525
- // upstash: { urlEnv: "UPSTASH_SEARCH_REST_URL", tokenEnv: "UPSTASH_SEARCH_REST_TOKEN" }
622
+ // Upstash Vector credentials (set via env vars or directly here)
623
+ // upstash: { urlEnv: "UPSTASH_VECTOR_REST_URL", tokenEnv: "UPSTASH_VECTOR_REST_TOKEN" }
526
624
  };
527
625
  `;
528
626
  fs.writeFileSync(target, content, "utf8");
@@ -586,11 +684,11 @@ var Logger = class {
586
684
  this.writeOut(` [${event}] ${data ? JSON.stringify(data) : ""}
587
685
  `);
588
686
  }
589
- writeOut(text) {
687
+ writeOut(text2) {
590
688
  if (this.stderrOnly) {
591
- process.stderr.write(text);
689
+ process.stderr.write(text2);
592
690
  } else {
593
- process.stdout.write(text);
691
+ process.stdout.write(text2);
594
692
  }
595
693
  }
596
694
  logJson(event, data) {
@@ -617,13 +715,84 @@ function normalizeMarkdown(input) {
617
715
  function sanitizeScopeName(scopeName) {
618
716
  return scopeName.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
619
717
  }
718
+ function markdownToPlain(markdown) {
719
+ return markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
720
+ }
620
721
  function toSnippet(markdown, maxLen = 220) {
621
- const plain = markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
722
+ const plain = markdownToPlain(markdown);
622
723
  if (plain.length <= maxLen) {
623
724
  return plain;
624
725
  }
625
726
  return `${plain.slice(0, Math.max(0, maxLen - 1)).trim()}\u2026`;
626
727
  }
728
+ function queryAwareExcerpt(markdown, query, maxLen = 220) {
729
+ const plain = markdownToPlain(markdown);
730
+ if (plain.length <= maxLen) return plain;
731
+ const tokens = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
732
+ if (tokens.length === 0) return toSnippet(markdown, maxLen);
733
+ const positions = [];
734
+ for (let ti = 0; ti < tokens.length; ti++) {
735
+ const escaped = tokens[ti].replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
736
+ const re = new RegExp(escaped, "gi");
737
+ let m;
738
+ while ((m = re.exec(plain)) !== null) {
739
+ positions.push({ start: m.index, end: m.index + m[0].length, tokenIdx: ti });
740
+ }
741
+ }
742
+ if (positions.length === 0) return toSnippet(markdown, maxLen);
743
+ positions.sort((a, b) => a.start - b.start);
744
+ let bestUniqueCount = 0;
745
+ let bestTotalCount = 0;
746
+ let bestLeft = 0;
747
+ let bestRight = 0;
748
+ let left = 0;
749
+ const tokenCounts = /* @__PURE__ */ new Map();
750
+ for (let right = 0; right < positions.length; right++) {
751
+ tokenCounts.set(positions[right].tokenIdx, (tokenCounts.get(positions[right].tokenIdx) ?? 0) + 1);
752
+ while (positions[right].end - positions[left].start > maxLen && left < right) {
753
+ const leftToken = positions[left].tokenIdx;
754
+ const cnt = tokenCounts.get(leftToken) - 1;
755
+ if (cnt === 0) tokenCounts.delete(leftToken);
756
+ else tokenCounts.set(leftToken, cnt);
757
+ left++;
758
+ }
759
+ const uniqueCount = tokenCounts.size;
760
+ const totalCount = right - left + 1;
761
+ if (uniqueCount > bestUniqueCount || uniqueCount === bestUniqueCount && totalCount > bestTotalCount) {
762
+ bestUniqueCount = uniqueCount;
763
+ bestTotalCount = totalCount;
764
+ bestLeft = left;
765
+ bestRight = right;
766
+ }
767
+ }
768
+ const mid = Math.floor((positions[bestLeft].start + positions[bestRight].end) / 2);
769
+ let start = Math.max(0, mid - Math.floor(maxLen / 2));
770
+ let end = Math.min(plain.length, start + maxLen);
771
+ start = Math.max(0, end - maxLen);
772
+ if (start > 0) {
773
+ const spaceIdx = plain.lastIndexOf(" ", start);
774
+ if (spaceIdx > start - 30) {
775
+ start = spaceIdx + 1;
776
+ }
777
+ }
778
+ if (end < plain.length) {
779
+ const spaceIdx = plain.indexOf(" ", end);
780
+ if (spaceIdx !== -1 && spaceIdx < end + 30) {
781
+ end = spaceIdx;
782
+ }
783
+ }
784
+ let excerpt = plain.slice(start, end);
785
+ if (excerpt.length > Math.ceil(maxLen * 1.2)) {
786
+ excerpt = excerpt.slice(0, maxLen);
787
+ const lastSpace = excerpt.lastIndexOf(" ");
788
+ if (lastSpace > maxLen * 0.5) {
789
+ excerpt = excerpt.slice(0, lastSpace);
790
+ }
791
+ }
792
+ const prefix = start > 0 ? "\u2026" : "";
793
+ const suffix = end < plain.length ? "\u2026" : "";
794
+ return `${prefix}${excerpt}${suffix}`;
795
+ }
627
796
  function extractFirstParagraph(markdown) {
628
797
  const lines = markdown.split("\n");
629
798
  let inFence = false;
@@ -690,163 +859,346 @@ function ensureStateDirs(cwd, stateDir, scope) {
690
859
  }
691
860
 
692
861
  // src/indexing/pipeline.ts
693
- import path10 from "path";
862
+ import path11 from "path";
694
863
 
695
864
  // src/vector/upstash.ts
696
- function chunkIndexName(scope) {
697
- return `${scope.projectId}--${scope.scopeName}`;
698
- }
699
- function pageIndexName(scope) {
700
- return `${scope.projectId}--${scope.scopeName}--pages`;
701
- }
865
+ import { QueryMode, FusionAlgorithm } from "@upstash/vector";
702
866
  var UpstashSearchStore = class {
703
- client;
867
+ index;
868
+ pagesNs;
869
+ chunksNs;
704
870
  constructor(opts) {
705
- this.client = opts.client;
706
- }
707
- chunkIndex(scope) {
708
- return this.client.index(chunkIndexName(scope));
709
- }
710
- pageIndex(scope) {
711
- return this.client.index(pageIndexName(scope));
871
+ this.index = opts.index;
872
+ this.pagesNs = opts.index.namespace(opts.pagesNamespace);
873
+ this.chunksNs = opts.index.namespace(opts.chunksNamespace);
712
874
  }
713
875
  async upsertChunks(chunks, scope) {
714
876
  if (chunks.length === 0) return;
715
- const index = this.chunkIndex(scope);
716
- const BATCH_SIZE = 100;
877
+ const BATCH_SIZE = 90;
717
878
  for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
718
879
  const batch = chunks.slice(i, i + BATCH_SIZE);
719
- await index.upsert(batch);
720
- }
721
- }
722
- async search(query, opts, scope) {
723
- const index = this.chunkIndex(scope);
724
- const results = await index.search({
725
- query,
726
- limit: opts.limit,
727
- semanticWeight: opts.semanticWeight,
728
- inputEnrichment: opts.inputEnrichment,
729
- reranking: opts.reranking,
730
- filter: opts.filter
880
+ await this.chunksNs.upsert(
881
+ batch.map((c) => ({
882
+ id: c.id,
883
+ data: c.data,
884
+ metadata: {
885
+ ...c.metadata,
886
+ projectId: scope.projectId,
887
+ scopeName: scope.scopeName,
888
+ type: c.metadata.type || "chunk"
889
+ }
890
+ }))
891
+ );
892
+ }
893
+ }
894
+ async search(data, opts, scope) {
895
+ const filterParts = [
896
+ `projectId = '${scope.projectId}'`,
897
+ `scopeName = '${scope.scopeName}'`
898
+ ];
899
+ if (opts.filter) {
900
+ filterParts.push(opts.filter);
901
+ }
902
+ const results = await this.chunksNs.query({
903
+ data,
904
+ topK: opts.limit,
905
+ includeMetadata: true,
906
+ filter: filterParts.join(" AND "),
907
+ queryMode: QueryMode.HYBRID,
908
+ fusionAlgorithm: FusionAlgorithm.DBSF
909
+ });
910
+ return results.map((doc) => ({
911
+ id: String(doc.id),
912
+ score: doc.score,
913
+ metadata: {
914
+ projectId: doc.metadata?.projectId ?? "",
915
+ scopeName: doc.metadata?.scopeName ?? "",
916
+ url: doc.metadata?.url ?? "",
917
+ path: doc.metadata?.path ?? "",
918
+ title: doc.metadata?.title ?? "",
919
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
920
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
921
+ snippet: doc.metadata?.snippet ?? "",
922
+ chunkText: doc.metadata?.chunkText ?? "",
923
+ ordinal: doc.metadata?.ordinal ?? 0,
924
+ contentHash: doc.metadata?.contentHash ?? "",
925
+ depth: doc.metadata?.depth ?? 0,
926
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
927
+ routeFile: doc.metadata?.routeFile ?? "",
928
+ tags: doc.metadata?.tags ?? [],
929
+ description: doc.metadata?.description || void 0,
930
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
931
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
932
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
933
+ }
934
+ }));
935
+ }
936
+ async searchChunksByUrl(data, url, opts, scope) {
937
+ const filterParts = [
938
+ `projectId = '${scope.projectId}'`,
939
+ `scopeName = '${scope.scopeName}'`,
940
+ `url = '${url}'`
941
+ ];
942
+ if (opts.filter) {
943
+ filterParts.push(opts.filter);
944
+ }
945
+ const results = await this.chunksNs.query({
946
+ data,
947
+ topK: opts.limit,
948
+ includeMetadata: true,
949
+ filter: filterParts.join(" AND "),
950
+ queryMode: QueryMode.HYBRID,
951
+ fusionAlgorithm: FusionAlgorithm.DBSF
731
952
  });
732
953
  return results.map((doc) => ({
733
- id: doc.id,
954
+ id: String(doc.id),
734
955
  score: doc.score,
735
956
  metadata: {
736
957
  projectId: doc.metadata?.projectId ?? "",
737
958
  scopeName: doc.metadata?.scopeName ?? "",
738
- url: doc.content.url,
959
+ url: doc.metadata?.url ?? "",
739
960
  path: doc.metadata?.path ?? "",
740
- title: doc.content.title,
741
- sectionTitle: doc.content.sectionTitle,
742
- headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
961
+ title: doc.metadata?.title ?? "",
962
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
963
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
743
964
  snippet: doc.metadata?.snippet ?? "",
744
- chunkText: doc.content.text,
965
+ chunkText: doc.metadata?.chunkText ?? "",
745
966
  ordinal: doc.metadata?.ordinal ?? 0,
746
967
  contentHash: doc.metadata?.contentHash ?? "",
747
968
  depth: doc.metadata?.depth ?? 0,
748
969
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
749
970
  routeFile: doc.metadata?.routeFile ?? "",
750
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
971
+ tags: doc.metadata?.tags ?? [],
751
972
  description: doc.metadata?.description || void 0,
752
- keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
973
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
974
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
975
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
753
976
  }
754
977
  }));
755
978
  }
756
- async searchPages(query, opts, scope) {
757
- const index = this.pageIndex(scope);
979
+ async searchPagesByText(data, opts, scope) {
980
+ return this.queryPages({ data }, opts, scope);
981
+ }
982
+ async searchPagesByVector(vector, opts, scope) {
983
+ return this.queryPages({ vector }, opts, scope);
984
+ }
985
+ async queryPages(input, opts, scope) {
986
+ const filterParts = [
987
+ `projectId = '${scope.projectId}'`,
988
+ `scopeName = '${scope.scopeName}'`
989
+ ];
990
+ if (opts.filter) {
991
+ filterParts.push(opts.filter);
992
+ }
758
993
  let results;
759
994
  try {
760
- results = await index.search({
761
- query,
762
- limit: opts.limit,
763
- semanticWeight: opts.semanticWeight,
764
- inputEnrichment: opts.inputEnrichment,
765
- reranking: true,
766
- filter: opts.filter
995
+ results = await this.pagesNs.query({
996
+ ...input,
997
+ topK: opts.limit,
998
+ includeMetadata: true,
999
+ filter: filterParts.join(" AND "),
1000
+ queryMode: QueryMode.HYBRID,
1001
+ fusionAlgorithm: FusionAlgorithm.DBSF
767
1002
  });
768
1003
  } catch {
769
1004
  return [];
770
1005
  }
771
1006
  return results.map((doc) => ({
772
- id: doc.id,
1007
+ id: String(doc.id),
773
1008
  score: doc.score,
774
- title: doc.content.title,
775
- url: doc.content.url,
776
- description: doc.content.description ?? "",
777
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
1009
+ title: doc.metadata?.title ?? "",
1010
+ url: doc.metadata?.url ?? "",
1011
+ description: doc.metadata?.description ?? "",
1012
+ tags: doc.metadata?.tags ?? [],
778
1013
  depth: doc.metadata?.depth ?? 0,
779
1014
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
780
- routeFile: doc.metadata?.routeFile ?? ""
1015
+ routeFile: doc.metadata?.routeFile ?? "",
1016
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0
781
1017
  }));
782
1018
  }
783
- async deleteByIds(ids, scope) {
1019
+ async deleteByIds(ids, _scope) {
784
1020
  if (ids.length === 0) return;
785
- const index = this.chunkIndex(scope);
786
- const BATCH_SIZE = 500;
1021
+ const BATCH_SIZE = 90;
787
1022
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
788
1023
  const batch = ids.slice(i, i + BATCH_SIZE);
789
- await index.delete(batch);
1024
+ await this.chunksNs.delete(batch);
790
1025
  }
791
1026
  }
792
1027
  async deleteScope(scope) {
793
- try {
794
- const chunkIdx = this.chunkIndex(scope);
795
- await chunkIdx.deleteIndex();
796
- } catch {
797
- }
798
- try {
799
- const pageIdx = this.pageIndex(scope);
800
- await pageIdx.deleteIndex();
801
- } catch {
1028
+ for (const ns of [this.chunksNs, this.pagesNs]) {
1029
+ const ids = [];
1030
+ let cursor = "0";
1031
+ try {
1032
+ for (; ; ) {
1033
+ const result = await ns.range({
1034
+ cursor,
1035
+ limit: 100,
1036
+ includeMetadata: true
1037
+ });
1038
+ for (const doc of result.vectors) {
1039
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
1040
+ ids.push(String(doc.id));
1041
+ }
1042
+ }
1043
+ if (!result.nextCursor || result.nextCursor === "0") break;
1044
+ cursor = result.nextCursor;
1045
+ }
1046
+ } catch {
1047
+ }
1048
+ if (ids.length > 0) {
1049
+ const BATCH_SIZE = 90;
1050
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
1051
+ const batch = ids.slice(i, i + BATCH_SIZE);
1052
+ await ns.delete(batch);
1053
+ }
1054
+ }
802
1055
  }
803
1056
  }
804
1057
  async listScopes(projectId) {
805
- const allIndexes = await this.client.listIndexes();
806
- const prefix = `${projectId}--`;
807
- const scopeNames = /* @__PURE__ */ new Set();
808
- for (const name of allIndexes) {
809
- if (name.startsWith(prefix) && !name.endsWith("--pages")) {
810
- const scopeName = name.slice(prefix.length);
811
- scopeNames.add(scopeName);
812
- }
813
- }
814
- const scopes = [];
815
- for (const scopeName of scopeNames) {
816
- const scope = {
817
- projectId,
818
- scopeName,
819
- scopeId: `${projectId}:${scopeName}`
820
- };
1058
+ const scopeMap = /* @__PURE__ */ new Map();
1059
+ for (const ns of [this.chunksNs, this.pagesNs]) {
1060
+ let cursor = "0";
1061
+ try {
1062
+ for (; ; ) {
1063
+ const result = await ns.range({
1064
+ cursor,
1065
+ limit: 100,
1066
+ includeMetadata: true
1067
+ });
1068
+ for (const doc of result.vectors) {
1069
+ if (doc.metadata?.projectId === projectId) {
1070
+ const scopeName = doc.metadata.scopeName ?? "";
1071
+ scopeMap.set(scopeName, (scopeMap.get(scopeName) ?? 0) + 1);
1072
+ }
1073
+ }
1074
+ if (!result.nextCursor || result.nextCursor === "0") break;
1075
+ cursor = result.nextCursor;
1076
+ }
1077
+ } catch {
1078
+ }
1079
+ }
1080
+ return [...scopeMap.entries()].map(([scopeName, count]) => ({
1081
+ projectId,
1082
+ scopeName,
1083
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
1084
+ documentCount: count
1085
+ }));
1086
+ }
1087
+ async getContentHashes(scope) {
1088
+ return this.scanHashes(this.chunksNs, scope);
1089
+ }
1090
+ /**
1091
+ * Fetch content hashes for a specific set of chunk keys using direct fetch()
1092
+ * instead of range(). This avoids potential issues with range() returning
1093
+ * vectors from the wrong namespace on hybrid indexes.
1094
+ */
1095
+ async fetchContentHashesForKeys(keys, scope) {
1096
+ const map = /* @__PURE__ */ new Map();
1097
+ if (keys.length === 0) return map;
1098
+ const BATCH_SIZE = 90;
1099
+ for (let i = 0; i < keys.length; i += BATCH_SIZE) {
1100
+ const batch = keys.slice(i, i + BATCH_SIZE);
821
1101
  try {
822
- const info = await this.chunkIndex(scope).info();
823
- scopes.push({
824
- projectId,
825
- scopeName,
826
- lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
827
- documentCount: info.documentCount
1102
+ const results = await this.chunksNs.fetch(batch, {
1103
+ includeMetadata: true
828
1104
  });
1105
+ for (const doc of results) {
1106
+ if (doc && doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
1107
+ map.set(String(doc.id), doc.metadata.contentHash);
1108
+ }
1109
+ }
829
1110
  } catch {
830
- scopes.push({
831
- projectId,
832
- scopeName,
833
- lastIndexedAt: "unknown",
834
- documentCount: 0
1111
+ }
1112
+ }
1113
+ return map;
1114
+ }
1115
+ /**
1116
+ * Scan all IDs in the chunks namespace for this scope.
1117
+ * Used for deletion detection (finding stale chunk keys).
1118
+ */
1119
+ async scanChunkIds(scope) {
1120
+ const ids = /* @__PURE__ */ new Set();
1121
+ let cursor = "0";
1122
+ try {
1123
+ for (; ; ) {
1124
+ const result = await this.chunksNs.range({
1125
+ cursor,
1126
+ limit: 100,
1127
+ includeMetadata: true
1128
+ });
1129
+ for (const doc of result.vectors) {
1130
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
1131
+ ids.add(String(doc.id));
1132
+ }
1133
+ }
1134
+ if (!result.nextCursor || result.nextCursor === "0") break;
1135
+ cursor = result.nextCursor;
1136
+ }
1137
+ } catch {
1138
+ }
1139
+ return ids;
1140
+ }
1141
+ async scanHashes(ns, scope) {
1142
+ const map = /* @__PURE__ */ new Map();
1143
+ let cursor = "0";
1144
+ try {
1145
+ for (; ; ) {
1146
+ const result = await ns.range({
1147
+ cursor,
1148
+ limit: 100,
1149
+ includeMetadata: true
835
1150
  });
1151
+ for (const doc of result.vectors) {
1152
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
1153
+ map.set(String(doc.id), doc.metadata.contentHash);
1154
+ }
1155
+ }
1156
+ if (!result.nextCursor || result.nextCursor === "0") break;
1157
+ cursor = result.nextCursor;
836
1158
  }
1159
+ } catch {
837
1160
  }
838
- return scopes;
1161
+ return map;
839
1162
  }
840
- async getContentHashes(scope) {
1163
+ async listPages(scope, opts) {
1164
+ const cursor = opts?.cursor ?? "0";
1165
+ const limit = opts?.limit ?? 50;
1166
+ try {
1167
+ const result = await this.pagesNs.range({
1168
+ cursor,
1169
+ limit,
1170
+ includeMetadata: true
1171
+ });
1172
+ const pages = result.vectors.filter(
1173
+ (doc) => doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && (!opts?.pathPrefix || (doc.metadata?.url ?? "").startsWith(opts.pathPrefix))
1174
+ ).map((doc) => ({
1175
+ url: doc.metadata?.url ?? "",
1176
+ title: doc.metadata?.title ?? "",
1177
+ description: doc.metadata?.description ?? "",
1178
+ routeFile: doc.metadata?.routeFile ?? ""
1179
+ }));
1180
+ const response = { pages };
1181
+ if (result.nextCursor && result.nextCursor !== "0") {
1182
+ response.nextCursor = result.nextCursor;
1183
+ }
1184
+ return response;
1185
+ } catch {
1186
+ return { pages: [] };
1187
+ }
1188
+ }
1189
+ async getPageHashes(scope) {
841
1190
  const map = /* @__PURE__ */ new Map();
842
- const index = this.chunkIndex(scope);
843
1191
  let cursor = "0";
844
1192
  try {
845
1193
  for (; ; ) {
846
- const result = await index.range({ cursor, limit: 100 });
847
- for (const doc of result.documents) {
848
- if (doc.metadata?.contentHash) {
849
- map.set(doc.id, doc.metadata.contentHash);
1194
+ const result = await this.pagesNs.range({
1195
+ cursor,
1196
+ limit: 100,
1197
+ includeMetadata: true
1198
+ });
1199
+ for (const doc of result.vectors) {
1200
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
1201
+ map.set(String(doc.id), doc.metadata.contentHash);
850
1202
  }
851
1203
  }
852
1204
  if (!result.nextCursor || result.nextCursor === "0") break;
@@ -856,47 +1208,43 @@ var UpstashSearchStore = class {
856
1208
  }
857
1209
  return map;
858
1210
  }
1211
+ async deletePagesByIds(ids, _scope) {
1212
+ if (ids.length === 0) return;
1213
+ const BATCH_SIZE = 90;
1214
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
1215
+ const batch = ids.slice(i, i + BATCH_SIZE);
1216
+ await this.pagesNs.delete(batch);
1217
+ }
1218
+ }
859
1219
  async upsertPages(pages, scope) {
860
1220
  if (pages.length === 0) return;
861
- const index = this.pageIndex(scope);
862
- const BATCH_SIZE = 50;
1221
+ const BATCH_SIZE = 90;
863
1222
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
864
1223
  const batch = pages.slice(i, i + BATCH_SIZE);
865
- const docs = batch.map((p) => ({
866
- id: p.url,
867
- content: {
868
- title: p.title,
869
- url: p.url,
870
- type: "page",
871
- description: p.description ?? "",
872
- keywords: (p.keywords ?? []).join(","),
873
- summary: p.summary ?? "",
874
- tags: p.tags.join(",")
875
- },
876
- metadata: {
877
- markdown: p.markdown,
878
- projectId: p.projectId,
879
- scopeName: p.scopeName,
880
- routeFile: p.routeFile,
881
- routeResolution: p.routeResolution,
882
- incomingLinks: p.incomingLinks,
883
- outgoingLinks: p.outgoingLinks,
884
- depth: p.depth,
885
- indexedAt: p.indexedAt
886
- }
887
- }));
888
- await index.upsert(docs);
1224
+ await this.pagesNs.upsert(
1225
+ batch.map((p) => ({
1226
+ id: p.id,
1227
+ data: p.data,
1228
+ metadata: {
1229
+ ...p.metadata,
1230
+ projectId: scope.projectId,
1231
+ scopeName: scope.scopeName,
1232
+ type: "page"
1233
+ }
1234
+ }))
1235
+ );
889
1236
  }
890
1237
  }
891
1238
  async getPage(url, scope) {
892
- const index = this.pageIndex(scope);
893
1239
  try {
894
- const results = await index.fetch([url]);
1240
+ const results = await this.pagesNs.fetch([url], {
1241
+ includeMetadata: true
1242
+ });
895
1243
  const doc = results[0];
896
- if (!doc) return null;
1244
+ if (!doc || !doc.metadata) return null;
897
1245
  return {
898
- url: doc.content.url,
899
- title: doc.content.title,
1246
+ url: doc.metadata.url,
1247
+ title: doc.metadata.title,
900
1248
  markdown: doc.metadata.markdown,
901
1249
  projectId: doc.metadata.projectId,
902
1250
  scopeName: doc.metadata.scopeName,
@@ -904,27 +1252,86 @@ var UpstashSearchStore = class {
904
1252
  routeResolution: doc.metadata.routeResolution,
905
1253
  incomingLinks: doc.metadata.incomingLinks,
906
1254
  outgoingLinks: doc.metadata.outgoingLinks,
1255
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? void 0,
907
1256
  depth: doc.metadata.depth,
908
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
1257
+ tags: doc.metadata.tags ?? [],
909
1258
  indexedAt: doc.metadata.indexedAt,
910
- summary: doc.content.summary || void 0,
911
- description: doc.content.description || void 0,
912
- keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
1259
+ summary: doc.metadata.summary || void 0,
1260
+ description: doc.metadata.description || void 0,
1261
+ keywords: doc.metadata.keywords?.length ? doc.metadata.keywords : void 0,
1262
+ publishedAt: typeof doc.metadata.publishedAt === "number" ? doc.metadata.publishedAt : void 0
913
1263
  };
914
1264
  } catch {
915
1265
  return null;
916
1266
  }
917
1267
  }
1268
+ async fetchPageWithVector(url, scope) {
1269
+ try {
1270
+ const results = await this.pagesNs.fetch([url], {
1271
+ includeMetadata: true,
1272
+ includeVectors: true
1273
+ });
1274
+ const doc = results[0];
1275
+ if (!doc || !doc.metadata || !doc.vector) return null;
1276
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
1277
+ return null;
1278
+ }
1279
+ return { metadata: doc.metadata, vector: doc.vector };
1280
+ } catch {
1281
+ return null;
1282
+ }
1283
+ }
1284
+ async fetchPagesBatch(urls, scope) {
1285
+ if (urls.length === 0) return [];
1286
+ try {
1287
+ const results = await this.pagesNs.fetch(urls, {
1288
+ includeMetadata: true
1289
+ });
1290
+ const out = [];
1291
+ for (const doc of results) {
1292
+ if (!doc || !doc.metadata) continue;
1293
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
1294
+ continue;
1295
+ }
1296
+ out.push({
1297
+ url: doc.metadata.url,
1298
+ title: doc.metadata.title,
1299
+ routeFile: doc.metadata.routeFile,
1300
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? []
1301
+ });
1302
+ }
1303
+ return out;
1304
+ } catch {
1305
+ return [];
1306
+ }
1307
+ }
918
1308
  async deletePages(scope) {
1309
+ const ids = [];
1310
+ let cursor = "0";
919
1311
  try {
920
- const index = this.pageIndex(scope);
921
- await index.reset();
1312
+ for (; ; ) {
1313
+ const result = await this.pagesNs.range({
1314
+ cursor,
1315
+ limit: 100,
1316
+ includeMetadata: true
1317
+ });
1318
+ for (const doc of result.vectors) {
1319
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
1320
+ ids.push(String(doc.id));
1321
+ }
1322
+ }
1323
+ if (!result.nextCursor || result.nextCursor === "0") break;
1324
+ cursor = result.nextCursor;
1325
+ }
922
1326
  } catch {
923
1327
  }
1328
+ if (ids.length > 0) {
1329
+ await this.deletePagesByIds(ids, scope);
1330
+ }
924
1331
  }
925
1332
  async health() {
926
1333
  try {
927
- await this.client.info();
1334
+ await this.index.info();
928
1335
  return { ok: true };
929
1336
  } catch (error) {
930
1337
  return {
@@ -934,14 +1341,31 @@ var UpstashSearchStore = class {
934
1341
  }
935
1342
  }
936
1343
  async dropAllIndexes(projectId) {
937
- const allIndexes = await this.client.listIndexes();
938
- const prefix = `${projectId}--`;
939
- for (const name of allIndexes) {
940
- if (name.startsWith(prefix)) {
941
- try {
942
- const index = this.client.index(name);
943
- await index.deleteIndex();
944
- } catch {
1344
+ for (const ns of [this.chunksNs, this.pagesNs]) {
1345
+ const ids = [];
1346
+ let cursor = "0";
1347
+ try {
1348
+ for (; ; ) {
1349
+ const result = await ns.range({
1350
+ cursor,
1351
+ limit: 100,
1352
+ includeMetadata: true
1353
+ });
1354
+ for (const doc of result.vectors) {
1355
+ if (doc.metadata?.projectId === projectId) {
1356
+ ids.push(String(doc.id));
1357
+ }
1358
+ }
1359
+ if (!result.nextCursor || result.nextCursor === "0") break;
1360
+ cursor = result.nextCursor;
1361
+ }
1362
+ } catch {
1363
+ }
1364
+ if (ids.length > 0) {
1365
+ const BATCH_SIZE = 90;
1366
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
1367
+ const batch = ids.slice(i, i + BATCH_SIZE);
1368
+ await ns.delete(batch);
945
1369
  }
946
1370
  }
947
1371
  }
@@ -955,12 +1379,16 @@ async function createUpstashStore(config) {
955
1379
  if (!url || !token) {
956
1380
  throw new SearchSocketError(
957
1381
  "VECTOR_BACKEND_UNAVAILABLE",
958
- `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
1382
+ `Missing Upstash Vector credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
959
1383
  );
960
1384
  }
961
- const { Search } = await import("@upstash/search");
962
- const client = new Search({ url, token });
963
- return new UpstashSearchStore({ client });
1385
+ const { Index } = await import("@upstash/vector");
1386
+ const index = new Index({ url, token });
1387
+ return new UpstashSearchStore({
1388
+ index,
1389
+ pagesNamespace: config.upstash.namespaces.pages,
1390
+ chunksNamespace: config.upstash.namespaces.chunks
1391
+ });
964
1392
  }
965
1393
 
966
1394
  // src/utils/hash.ts
@@ -1034,6 +1462,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
1034
1462
  if (normalizeText(current.text)) {
1035
1463
  sections.push({
1036
1464
  sectionTitle: current.sectionTitle,
1465
+ headingLevel: current.headingLevel,
1037
1466
  headingPath: current.headingPath,
1038
1467
  text: current.text.trim()
1039
1468
  });
@@ -1052,6 +1481,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
1052
1481
  headingStack.length = level;
1053
1482
  current = {
1054
1483
  sectionTitle: title,
1484
+ headingLevel: level,
1055
1485
  headingPath: headingStack.filter((entry) => Boolean(entry)).slice(0, headingPathDepth),
1056
1486
  text: `${line}
1057
1487
  `
@@ -1071,8 +1501,8 @@ function parseHeadingSections(markdown, headingPathDepth) {
1071
1501
  }
1072
1502
  return sections;
1073
1503
  }
1074
- function blockify(text, config) {
1075
- const lines = text.split("\n");
1504
+ function blockify(text2, config) {
1505
+ const lines = text2.split("\n");
1076
1506
  const blocks = [];
1077
1507
  let inFence = false;
1078
1508
  let current = [];
@@ -1179,20 +1609,21 @@ function splitOversizedBlock(block, config) {
1179
1609
  return chunks.length > 0 ? chunks : [trimmed];
1180
1610
  }
1181
1611
  function splitSection(section, config) {
1182
- const text = section.text.trim();
1183
- if (!text) {
1612
+ const text2 = section.text.trim();
1613
+ if (!text2) {
1184
1614
  return [];
1185
1615
  }
1186
- if (text.length <= config.maxChars) {
1616
+ if (text2.length <= config.maxChars) {
1187
1617
  return [
1188
1618
  {
1189
1619
  sectionTitle: section.sectionTitle,
1620
+ headingLevel: section.headingLevel,
1190
1621
  headingPath: section.headingPath,
1191
- chunkText: text
1622
+ chunkText: text2
1192
1623
  }
1193
1624
  ];
1194
1625
  }
1195
- const blocks = blockify(text, config);
1626
+ const blocks = blockify(text2, config);
1196
1627
  const chunks = [];
1197
1628
  let current = "";
1198
1629
  for (const block of blocks) {
@@ -1237,6 +1668,7 @@ ${chunk}`;
1237
1668
  }
1238
1669
  return merged.map((chunkText) => ({
1239
1670
  sectionTitle: section.sectionTitle,
1671
+ headingLevel: section.headingLevel,
1240
1672
  headingPath: section.headingPath,
1241
1673
  chunkText
1242
1674
  }));
@@ -1252,6 +1684,18 @@ function buildSummaryChunkText(page) {
1252
1684
  }
1253
1685
  return parts.join("\n\n");
1254
1686
  }
1687
+ function buildEmbeddingTitle(chunk) {
1688
+ if (!chunk.sectionTitle || chunk.headingLevel === void 0) return void 0;
1689
+ if (chunk.headingPath.length > 1) {
1690
+ const path16 = chunk.headingPath.join(" > ");
1691
+ const lastInPath = chunk.headingPath[chunk.headingPath.length - 1];
1692
+ if (lastInPath !== chunk.sectionTitle) {
1693
+ return `${chunk.title} \u2014 ${path16} > ${chunk.sectionTitle}`;
1694
+ }
1695
+ return `${chunk.title} \u2014 ${path16}`;
1696
+ }
1697
+ return `${chunk.title} \u2014 ${chunk.sectionTitle}`;
1698
+ }
1255
1699
  function buildEmbeddingText(chunk, prependTitle) {
1256
1700
  if (!prependTitle) return chunk.chunkText;
1257
1701
  const prefix = chunk.sectionTitle ? `${chunk.title} \u2014 ${chunk.sectionTitle}` : chunk.title;
@@ -1282,10 +1726,14 @@ function chunkPage(page, config, scope) {
1282
1726
  tags: page.tags,
1283
1727
  contentHash: "",
1284
1728
  description: page.description,
1285
- keywords: page.keywords
1729
+ keywords: page.keywords,
1730
+ publishedAt: page.publishedAt,
1731
+ incomingAnchorText: page.incomingAnchorText,
1732
+ meta: page.meta
1286
1733
  };
1287
1734
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
1288
- summaryChunk.contentHash = sha256(normalizeText(embeddingText));
1735
+ const metaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
1736
+ summaryChunk.contentHash = sha256(normalizeText(embeddingText) + metaSuffix);
1289
1737
  chunks.push(summaryChunk);
1290
1738
  }
1291
1739
  const ordinalOffset = config.chunking.pageSummaryChunk ? 1 : 0;
@@ -1302,6 +1750,7 @@ function chunkPage(page, config, scope) {
1302
1750
  path: page.url,
1303
1751
  title: page.title,
1304
1752
  sectionTitle: entry.sectionTitle,
1753
+ headingLevel: entry.headingLevel,
1305
1754
  headingPath: entry.headingPath,
1306
1755
  chunkText: entry.chunkText,
1307
1756
  snippet: toSnippet(entry.chunkText),
@@ -1311,10 +1760,16 @@ function chunkPage(page, config, scope) {
1311
1760
  tags: page.tags,
1312
1761
  contentHash: "",
1313
1762
  description: page.description,
1314
- keywords: page.keywords
1763
+ keywords: page.keywords,
1764
+ publishedAt: page.publishedAt,
1765
+ incomingAnchorText: page.incomingAnchorText,
1766
+ meta: page.meta
1315
1767
  };
1316
1768
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
1317
- chunk.contentHash = sha256(normalizeText(embeddingText));
1769
+ const embeddingTitle = config.chunking.weightHeadings ? buildEmbeddingTitle(chunk) : void 0;
1770
+ const chunkMetaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
1771
+ const hashInput = embeddingTitle ? `${normalizeText(embeddingText)}|title:${embeddingTitle}` : normalizeText(embeddingText);
1772
+ chunk.contentHash = sha256(hashInput + chunkMetaSuffix);
1318
1773
  chunks.push(chunk);
1319
1774
  }
1320
1775
  return chunks;
@@ -1325,6 +1780,113 @@ import { load } from "cheerio";
1325
1780
  import matter from "gray-matter";
1326
1781
  import TurndownService from "turndown";
1327
1782
  import { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems } from "turndown-plugin-gfm";
1783
+
1784
+ // src/utils/structured-meta.ts
1785
+ var VALID_KEY_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
1786
+ function validateMetaKey(key) {
1787
+ return VALID_KEY_RE.test(key);
1788
+ }
1789
+ function parseMetaValue(content, dataType) {
1790
+ switch (dataType) {
1791
+ case "number": {
1792
+ const n = Number(content);
1793
+ return Number.isFinite(n) ? n : content;
1794
+ }
1795
+ case "boolean":
1796
+ return content === "true";
1797
+ case "string[]":
1798
+ return content ? content.split(",").map((s) => s.trim()) : [];
1799
+ case "date": {
1800
+ const ms = Number(content);
1801
+ return Number.isFinite(ms) ? ms : content;
1802
+ }
1803
+ default:
1804
+ return content;
1805
+ }
1806
+ }
1807
+ function escapeFilterValue(s) {
1808
+ return s.replace(/'/g, "''");
1809
+ }
1810
+ function buildMetaFilterString(filters) {
1811
+ const clauses = [];
1812
+ for (const [key, value] of Object.entries(filters)) {
1813
+ if (!validateMetaKey(key)) continue;
1814
+ const field = `meta.${key}`;
1815
+ if (typeof value === "string") {
1816
+ clauses.push(`${field} CONTAINS '${escapeFilterValue(value)}'`);
1817
+ } else if (typeof value === "boolean") {
1818
+ clauses.push(`${field} = ${value}`);
1819
+ } else {
1820
+ clauses.push(`${field} = ${value}`);
1821
+ }
1822
+ }
1823
+ return clauses.join(" AND ");
1824
+ }
1825
+
1826
+ // src/indexing/extractor.ts
1827
+ function normalizeDateToMs(value) {
1828
+ if (value == null) return void 0;
1829
+ if (value instanceof Date) {
1830
+ const ts = value.getTime();
1831
+ return Number.isFinite(ts) ? ts : void 0;
1832
+ }
1833
+ if (typeof value === "string") {
1834
+ const ts = new Date(value).getTime();
1835
+ return Number.isFinite(ts) ? ts : void 0;
1836
+ }
1837
+ if (typeof value === "number") {
1838
+ return Number.isFinite(value) ? value : void 0;
1839
+ }
1840
+ return void 0;
1841
+ }
1842
+ var FRONTMATTER_DATE_FIELDS = ["date", "publishedAt", "updatedAt", "published_at", "updated_at"];
1843
+ function extractPublishedAtFromFrontmatter(data) {
1844
+ for (const field of FRONTMATTER_DATE_FIELDS) {
1845
+ const val = normalizeDateToMs(data[field]);
1846
+ if (val !== void 0) return val;
1847
+ }
1848
+ return void 0;
1849
+ }
1850
+ function extractPublishedAtFromHtml($) {
1851
+ const jsonLdScripts = $('script[type="application/ld+json"]');
1852
+ for (let i = 0; i < jsonLdScripts.length; i++) {
1853
+ try {
1854
+ const raw = $(jsonLdScripts[i]).html();
1855
+ if (!raw) continue;
1856
+ const parsed = JSON.parse(raw);
1857
+ const candidates = [];
1858
+ if (Array.isArray(parsed)) {
1859
+ candidates.push(...parsed);
1860
+ } else if (parsed && typeof parsed === "object") {
1861
+ candidates.push(parsed);
1862
+ if (Array.isArray(parsed["@graph"])) {
1863
+ candidates.push(...parsed["@graph"]);
1864
+ }
1865
+ }
1866
+ for (const candidate of candidates) {
1867
+ const val = normalizeDateToMs(candidate.datePublished);
1868
+ if (val !== void 0) return val;
1869
+ }
1870
+ } catch {
1871
+ }
1872
+ }
1873
+ const ogTime = $('meta[property="article:published_time"]').attr("content")?.trim();
1874
+ if (ogTime) {
1875
+ const val = normalizeDateToMs(ogTime);
1876
+ if (val !== void 0) return val;
1877
+ }
1878
+ const itempropDate = $('meta[itemprop="datePublished"]').attr("content")?.trim() || $('time[itemprop="datePublished"]').attr("datetime")?.trim();
1879
+ if (itempropDate) {
1880
+ const val = normalizeDateToMs(itempropDate);
1881
+ if (val !== void 0) return val;
1882
+ }
1883
+ const timeEl = $("time[datetime]").first().attr("datetime")?.trim();
1884
+ if (timeEl) {
1885
+ const val = normalizeDateToMs(timeEl);
1886
+ if (val !== void 0) return val;
1887
+ }
1888
+ return void 0;
1889
+ }
1328
1890
  function hasTopLevelNoindexComment(markdown) {
1329
1891
  const lines = markdown.split(/\r?\n/);
1330
1892
  let inFence = false;
@@ -1340,6 +1902,97 @@ function hasTopLevelNoindexComment(markdown) {
1340
1902
  }
1341
1903
  return false;
1342
1904
  }
1905
+ var GARBAGE_ALT_WORDS = /* @__PURE__ */ new Set([
1906
+ "image",
1907
+ "photo",
1908
+ "picture",
1909
+ "icon",
1910
+ "logo",
1911
+ "banner",
1912
+ "screenshot",
1913
+ "thumbnail",
1914
+ "img",
1915
+ "graphic",
1916
+ "illustration",
1917
+ "spacer",
1918
+ "pixel",
1919
+ "placeholder",
1920
+ "avatar",
1921
+ "background"
1922
+ ]);
1923
+ var IMAGE_EXT_RE = /\.(jpg|jpeg|png|gif|svg|webp|avif|bmp|ico)(\?.*)?$/i;
1924
+ function isMeaningfulAlt(alt) {
1925
+ const trimmed = alt.trim();
1926
+ if (!trimmed || trimmed.length < 5) return false;
1927
+ if (IMAGE_EXT_RE.test(trimmed)) return false;
1928
+ if (GARBAGE_ALT_WORDS.has(trimmed.toLowerCase())) return false;
1929
+ return true;
1930
+ }
1931
+ function resolveImageText(img, $, imageDescAttr) {
1932
+ const imgDesc = img.attr(imageDescAttr)?.trim();
1933
+ if (imgDesc) return imgDesc;
1934
+ const figure = img.closest("figure");
1935
+ if (figure.length) {
1936
+ const figDesc = figure.attr(imageDescAttr)?.trim();
1937
+ if (figDesc) return figDesc;
1938
+ }
1939
+ const alt = img.attr("alt")?.trim() ?? "";
1940
+ const caption = figure.length ? figure.find("figcaption").first().text().trim() : "";
1941
+ if (isMeaningfulAlt(alt) && caption) {
1942
+ return `${alt} \u2014 ${caption}`;
1943
+ }
1944
+ if (isMeaningfulAlt(alt)) {
1945
+ return alt;
1946
+ }
1947
+ if (caption) {
1948
+ return caption;
1949
+ }
1950
+ return null;
1951
+ }
1952
+ var STOP_ANCHORS = /* @__PURE__ */ new Set([
1953
+ "here",
1954
+ "click",
1955
+ "click here",
1956
+ "read more",
1957
+ "link",
1958
+ "this",
1959
+ "more"
1960
+ ]);
1961
+ function normalizeAnchorText(raw) {
1962
+ const normalized = raw.replace(/\s+/g, " ").trim().toLowerCase();
1963
+ if (normalized.length < 3) return "";
1964
+ if (STOP_ANCHORS.has(normalized)) return "";
1965
+ if (normalized.length > 100) return normalized.slice(0, 100);
1966
+ return normalized;
1967
+ }
1968
+ function escapeHtml(text2) {
1969
+ return text2.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
1970
+ }
1971
+ function preprocessImages(root, $, imageDescAttr) {
1972
+ root.find("picture").each((_i, el) => {
1973
+ const picture = $(el);
1974
+ const img = picture.find("img").first();
1975
+ const parentFigure = picture.closest("figure");
1976
+ const text2 = img.length ? resolveImageText(img, $, imageDescAttr) : null;
1977
+ if (text2) {
1978
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
1979
+ picture.replaceWith(`<span>${escapeHtml(text2)}</span>`);
1980
+ } else {
1981
+ picture.remove();
1982
+ }
1983
+ });
1984
+ root.find("img").each((_i, el) => {
1985
+ const img = $(el);
1986
+ const parentFigure = img.closest("figure");
1987
+ const text2 = resolveImageText(img, $, imageDescAttr);
1988
+ if (text2) {
1989
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
1990
+ img.replaceWith(`<span>${escapeHtml(text2)}</span>`);
1991
+ } else {
1992
+ img.remove();
1993
+ }
1994
+ });
1995
+ }
1343
1996
  function extractFromHtml(url, html, config) {
1344
1997
  const $ = load(html);
1345
1998
  const normalizedUrl = normalizeUrlPath(url);
@@ -1365,6 +2018,20 @@ function extractFromHtml(url, html, config) {
1365
2018
  if (weight === 0) {
1366
2019
  return null;
1367
2020
  }
2021
+ if ($('meta[name="searchsocket:noindex"]').attr("content") === "true") {
2022
+ return null;
2023
+ }
2024
+ const RESERVED_META_KEYS = /* @__PURE__ */ new Set(["noindex", "tags"]);
2025
+ const meta = {};
2026
+ $('meta[name^="searchsocket:"]').each((_i, el) => {
2027
+ const name = $(el).attr("name") ?? "";
2028
+ const key = name.slice("searchsocket:".length);
2029
+ if (!key || RESERVED_META_KEYS.has(key) || !validateMetaKey(key)) return;
2030
+ const content = $(el).attr("content") ?? "";
2031
+ const dataType = $(el).attr("data-type") ?? "string";
2032
+ meta[key] = parseMetaValue(content, dataType);
2033
+ });
2034
+ const componentTags = $('meta[name="searchsocket:tags"]').attr("content")?.trim();
1368
2035
  const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
1369
2036
  const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
1370
2037
  const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
@@ -1376,7 +2043,9 @@ function extractFromHtml(url, html, config) {
1376
2043
  root.find(selector).remove();
1377
2044
  }
1378
2045
  root.find(`[${config.extract.ignoreAttr}]`).remove();
2046
+ preprocessImages(root, $, config.extract.imageDescAttr);
1379
2047
  const outgoingLinks = [];
2048
+ const seenLinkKeys = /* @__PURE__ */ new Set();
1380
2049
  root.find("a[href]").each((_index, node) => {
1381
2050
  const href = $(node).attr("href");
1382
2051
  if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:")) {
@@ -1387,7 +2056,19 @@ function extractFromHtml(url, html, config) {
1387
2056
  if (!["http:", "https:"].includes(parsed.protocol)) {
1388
2057
  return;
1389
2058
  }
1390
- outgoingLinks.push(normalizeUrlPath(parsed.pathname));
2059
+ const url2 = normalizeUrlPath(parsed.pathname);
2060
+ let anchorText = normalizeAnchorText($(node).text());
2061
+ if (!anchorText) {
2062
+ const imgAlt = $(node).find("img").first().attr("alt") ?? "";
2063
+ if (isMeaningfulAlt(imgAlt)) {
2064
+ anchorText = normalizeAnchorText(imgAlt);
2065
+ }
2066
+ }
2067
+ const key = `${url2}|${anchorText}`;
2068
+ if (!seenLinkKeys.has(key)) {
2069
+ seenLinkKeys.add(key);
2070
+ outgoingLinks.push({ url: url2, anchorText });
2071
+ }
1391
2072
  } catch {
1392
2073
  }
1393
2074
  });
@@ -1412,16 +2093,25 @@ function extractFromHtml(url, html, config) {
1412
2093
  return null;
1413
2094
  }
1414
2095
  const tags = normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1);
2096
+ const publishedAt = extractPublishedAtFromHtml($);
2097
+ if (componentTags) {
2098
+ const extraTags = componentTags.split(",").map((t) => t.trim()).filter(Boolean);
2099
+ for (const t of extraTags) {
2100
+ if (!tags.includes(t)) tags.push(t);
2101
+ }
2102
+ }
1415
2103
  return {
1416
2104
  url: normalizeUrlPath(url),
1417
2105
  title,
1418
2106
  markdown,
1419
- outgoingLinks: [...new Set(outgoingLinks)],
2107
+ outgoingLinks,
1420
2108
  noindex: false,
1421
2109
  tags,
1422
2110
  description,
1423
2111
  keywords,
1424
- weight
2112
+ weight,
2113
+ publishedAt,
2114
+ meta: Object.keys(meta).length > 0 ? meta : void 0
1425
2115
  };
1426
2116
  }
1427
2117
  function extractFromMarkdown(url, markdown, title) {
@@ -1442,6 +2132,24 @@ function extractFromMarkdown(url, markdown, title) {
1442
2132
  if (mdWeight === 0) {
1443
2133
  return null;
1444
2134
  }
2135
+ let mdMeta;
2136
+ const rawMeta = searchsocketMeta?.meta;
2137
+ if (rawMeta && typeof rawMeta === "object" && !Array.isArray(rawMeta)) {
2138
+ const metaObj = {};
2139
+ for (const [key, val] of Object.entries(rawMeta)) {
2140
+ if (!validateMetaKey(key)) continue;
2141
+ if (typeof val === "string" || typeof val === "number" || typeof val === "boolean") {
2142
+ metaObj[key] = val;
2143
+ } else if (Array.isArray(val) && val.every((v) => typeof v === "string")) {
2144
+ metaObj[key] = val;
2145
+ } else if (val instanceof Date) {
2146
+ metaObj[key] = val.getTime();
2147
+ }
2148
+ }
2149
+ if (Object.keys(metaObj).length > 0) {
2150
+ mdMeta = metaObj;
2151
+ }
2152
+ }
1445
2153
  const content = parsed.content;
1446
2154
  const normalized = normalizeMarkdown(content);
1447
2155
  if (!normalizeText(normalized)) {
@@ -1456,6 +2164,7 @@ function extractFromMarkdown(url, markdown, title) {
1456
2164
  fmKeywords = frontmatter.keywords.split(",").map((k) => k.trim()).filter(Boolean);
1457
2165
  }
1458
2166
  if (fmKeywords && fmKeywords.length === 0) fmKeywords = void 0;
2167
+ const publishedAt = extractPublishedAtFromFrontmatter(frontmatter);
1459
2168
  return {
1460
2169
  url: normalizeUrlPath(url),
1461
2170
  title: resolvedTitle,
@@ -1465,7 +2174,9 @@ function extractFromMarkdown(url, markdown, title) {
1465
2174
  tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
1466
2175
  description: fmDescription,
1467
2176
  keywords: fmKeywords,
1468
- weight: mdWeight
2177
+ weight: mdWeight,
2178
+ publishedAt,
2179
+ meta: mdMeta
1469
2180
  };
1470
2181
  }
1471
2182
 
@@ -1919,6 +2630,125 @@ function filePathToUrl(filePath, baseDir) {
1919
2630
  const noExt = relative.replace(/\.md$/i, "").replace(/\/index$/i, "");
1920
2631
  return normalizeUrlPath(noExt || "/");
1921
2632
  }
2633
+ var ROUTE_FILE_RE = /\+(page|layout|error)(@[^.]+)?\.svelte$/;
2634
+ function isSvelteComponentFile(filePath) {
2635
+ if (!filePath.endsWith(".svelte")) return false;
2636
+ return !ROUTE_FILE_RE.test(filePath);
2637
+ }
2638
+ function extractSvelteComponentMeta(source) {
2639
+ const componentMatch = source.match(/<!--\s*@component\s*([\s\S]*?)\s*-->/);
2640
+ const description = componentMatch?.[1]?.trim() || void 0;
2641
+ const propsMatch = source.match(
2642
+ /let\s+\{([\s\S]*?)\}\s*(?::\s*([^=;{][\s\S]*?))?\s*=\s*\$props\(\)/
2643
+ );
2644
+ const props = [];
2645
+ if (propsMatch) {
2646
+ const destructureBlock = propsMatch[1];
2647
+ const typeAnnotation = propsMatch[2]?.trim();
2648
+ let resolvedTypeMap;
2649
+ if (typeAnnotation && /^[A-Z]\w*$/.test(typeAnnotation)) {
2650
+ resolvedTypeMap = resolveTypeReference(source, typeAnnotation);
2651
+ } else if (typeAnnotation && typeAnnotation.startsWith("{")) {
2652
+ resolvedTypeMap = parseInlineTypeAnnotation(typeAnnotation);
2653
+ }
2654
+ const propEntries = splitDestructureBlock(destructureBlock);
2655
+ for (const entry of propEntries) {
2656
+ const trimmed = entry.trim();
2657
+ if (!trimmed || trimmed.startsWith("...")) continue;
2658
+ let propName;
2659
+ let defaultValue;
2660
+ const renameMatch = trimmed.match(/^(\w+)\s*:\s*\w+\s*(?:=\s*([\s\S]+))?$/);
2661
+ if (renameMatch) {
2662
+ propName = renameMatch[1];
2663
+ defaultValue = renameMatch[2]?.trim();
2664
+ } else {
2665
+ const defaultMatch = trimmed.match(/^(\w+)\s*=\s*([\s\S]+)$/);
2666
+ if (defaultMatch) {
2667
+ propName = defaultMatch[1];
2668
+ defaultValue = defaultMatch[2]?.trim();
2669
+ } else {
2670
+ propName = trimmed.match(/^(\w+)/)?.[1] ?? trimmed;
2671
+ }
2672
+ }
2673
+ const propType = resolvedTypeMap?.get(propName);
2674
+ props.push({
2675
+ name: propName,
2676
+ ...propType ? { type: propType } : {},
2677
+ ...defaultValue ? { default: defaultValue } : {}
2678
+ });
2679
+ }
2680
+ }
2681
+ return { description, props };
2682
+ }
2683
+ function splitDestructureBlock(block) {
2684
+ const entries = [];
2685
+ let depth = 0;
2686
+ let current = "";
2687
+ for (const ch of block) {
2688
+ if (ch === "{" || ch === "[" || ch === "(") {
2689
+ depth++;
2690
+ current += ch;
2691
+ } else if (ch === "}" || ch === "]" || ch === ")") {
2692
+ depth--;
2693
+ current += ch;
2694
+ } else if (ch === "," && depth === 0) {
2695
+ entries.push(current);
2696
+ current = "";
2697
+ } else {
2698
+ current += ch;
2699
+ }
2700
+ }
2701
+ if (current.trim()) entries.push(current);
2702
+ return entries;
2703
+ }
2704
+ function resolveTypeReference(source, typeName) {
2705
+ const startRe = new RegExp(`(?:interface\\s+${typeName}\\s*|type\\s+${typeName}\\s*=\\s*)\\{`);
2706
+ const startMatch = source.match(startRe);
2707
+ if (!startMatch || startMatch.index === void 0) return void 0;
2708
+ const bodyStart = startMatch.index + startMatch[0].length;
2709
+ let depth = 1;
2710
+ let i = bodyStart;
2711
+ while (i < source.length && depth > 0) {
2712
+ if (source[i] === "{") depth++;
2713
+ else if (source[i] === "}") depth--;
2714
+ i++;
2715
+ }
2716
+ if (depth !== 0) return void 0;
2717
+ const body = source.slice(bodyStart, i - 1);
2718
+ return parseTypeMembers(body);
2719
+ }
2720
+ function parseInlineTypeAnnotation(annotation) {
2721
+ const inner = annotation.replace(/^\{/, "").replace(/\}$/, "");
2722
+ return parseTypeMembers(inner);
2723
+ }
2724
+ function parseTypeMembers(body) {
2725
+ const map = /* @__PURE__ */ new Map();
2726
+ const members = body.split(/[;\n]/).map((m) => m.trim()).filter(Boolean);
2727
+ for (const member of members) {
2728
+ const memberMatch = member.match(/^(\w+)\??\s*:\s*(.+)$/);
2729
+ if (memberMatch) {
2730
+ map.set(memberMatch[1], memberMatch[2].replace(/,\s*$/, "").trim());
2731
+ }
2732
+ }
2733
+ return map;
2734
+ }
2735
+ function buildComponentMarkdown(componentName, meta) {
2736
+ if (!meta.description && meta.props.length === 0) return "";
2737
+ const parts = [`${componentName} component.`];
2738
+ if (meta.description) {
2739
+ parts.push(meta.description);
2740
+ }
2741
+ if (meta.props.length > 0) {
2742
+ const propEntries = meta.props.map((p) => {
2743
+ let entry = p.name;
2744
+ if (p.type) entry += ` (${p.type})`;
2745
+ if (p.default) entry += ` default: ${p.default}`;
2746
+ return entry;
2747
+ });
2748
+ parts.push(`Props: ${propEntries.join(", ")}.`);
2749
+ }
2750
+ return parts.join(" ");
2751
+ }
1922
2752
  function normalizeSvelteToMarkdown(source) {
1923
2753
  return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
1924
2754
  }
@@ -1938,12 +2768,26 @@ async function loadContentFilesPages(cwd, config, maxPages) {
1938
2768
  const pages = [];
1939
2769
  for (const filePath of selected) {
1940
2770
  const raw = await fs5.readFile(filePath, "utf8");
1941
- const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
2771
+ let markdown;
2772
+ let tags;
2773
+ if (filePath.endsWith(".md")) {
2774
+ markdown = raw;
2775
+ } else if (isSvelteComponentFile(filePath)) {
2776
+ const componentName = path7.basename(filePath, ".svelte");
2777
+ const meta = extractSvelteComponentMeta(raw);
2778
+ const componentMarkdown = buildComponentMarkdown(componentName, meta);
2779
+ const templateContent = normalizeSvelteToMarkdown(raw);
2780
+ markdown = componentMarkdown ? [componentMarkdown, templateContent].filter(Boolean).join("\n\n") : templateContent;
2781
+ tags = ["component"];
2782
+ } else {
2783
+ markdown = normalizeSvelteToMarkdown(raw);
2784
+ }
1942
2785
  pages.push({
1943
2786
  url: filePathToUrl(filePath, baseDir),
1944
2787
  markdown,
1945
2788
  sourcePath: path7.relative(cwd, filePath).replace(/\\/g, "/"),
1946
- outgoingLinks: []
2789
+ outgoingLinks: [],
2790
+ ...tags ? { tags } : {}
1947
2791
  });
1948
2792
  }
1949
2793
  return pages;
@@ -1958,9 +2802,9 @@ function extractLocs(xml) {
1958
2802
  const $ = cheerioLoad2(xml, { xmlMode: true });
1959
2803
  const locs = [];
1960
2804
  $("loc").each((_i, el) => {
1961
- const text = $(el).text().trim();
1962
- if (text) {
1963
- locs.push(text);
2805
+ const text2 = $(el).text().trim();
2806
+ if (text2) {
2807
+ locs.push(text2);
1964
2808
  }
1965
2809
  });
1966
2810
  return locs;
@@ -2175,32 +3019,68 @@ function nonNegativeOrZero(value) {
2175
3019
  }
2176
3020
  return Math.max(0, value);
2177
3021
  }
2178
- function normalizeForTitleMatch(text) {
2179
- return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
3022
+ function normalizeForTitleMatch(text2) {
3023
+ return text2.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
2180
3024
  }
2181
- function rankHits(hits, config, query) {
3025
+ function rankHits(hits, config, query, debug) {
2182
3026
  const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
2183
3027
  const titleMatchWeight = config.ranking.weights.titleMatch;
2184
3028
  return hits.map((hit) => {
2185
- let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
3029
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
3030
+ let score = baseScore;
3031
+ let incomingLinkBoostValue = 0;
2186
3032
  if (config.ranking.enableIncomingLinkBoost) {
2187
3033
  const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
2188
- score += incomingBoost * config.ranking.weights.incomingLinks;
3034
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
3035
+ score += incomingLinkBoostValue;
2189
3036
  }
3037
+ let depthBoostValue = 0;
2190
3038
  if (config.ranking.enableDepthBoost) {
2191
3039
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
2192
- score += depthBoost * config.ranking.weights.depth;
3040
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
3041
+ score += depthBoostValue;
2193
3042
  }
3043
+ let titleMatchBoostValue = 0;
2194
3044
  if (normalizedQuery && titleMatchWeight > 0) {
2195
3045
  const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
2196
3046
  if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
2197
- score += titleMatchWeight;
3047
+ titleMatchBoostValue = titleMatchWeight;
3048
+ score += titleMatchBoostValue;
2198
3049
  }
2199
3050
  }
2200
- return {
2201
- hit,
2202
- finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
2203
- };
3051
+ let freshnessBoostValue = 0;
3052
+ if (config.ranking.enableFreshnessBoost) {
3053
+ const publishedAt = hit.metadata.publishedAt;
3054
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
3055
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
3056
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
3057
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
3058
+ score += freshnessBoostValue;
3059
+ }
3060
+ }
3061
+ let anchorTextMatchBoostValue = 0;
3062
+ if (config.ranking.enableAnchorTextBoost && normalizedQuery && config.ranking.weights.anchorText > 0) {
3063
+ const normalizedAnchorText = normalizeForTitleMatch(hit.metadata.incomingAnchorText ?? "");
3064
+ if (normalizedAnchorText.length > 0 && normalizedQuery.length > 0 && (normalizedAnchorText.includes(normalizedQuery) || normalizedQuery.includes(normalizedAnchorText))) {
3065
+ anchorTextMatchBoostValue = config.ranking.weights.anchorText;
3066
+ score += anchorTextMatchBoostValue;
3067
+ }
3068
+ }
3069
+ const result = {
3070
+ hit,
3071
+ finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
3072
+ };
3073
+ if (debug) {
3074
+ result.breakdown = {
3075
+ baseScore,
3076
+ incomingLinkBoost: incomingLinkBoostValue,
3077
+ depthBoost: depthBoostValue,
3078
+ titleMatchBoost: titleMatchBoostValue,
3079
+ freshnessBoost: freshnessBoostValue,
3080
+ anchorTextMatchBoost: anchorTextMatchBoostValue
3081
+ };
3082
+ }
3083
+ return result;
2204
3084
  }).sort((a, b) => {
2205
3085
  const delta = b.finalScore - a.finalScore;
2206
3086
  return Number.isNaN(delta) ? 0 : delta;
@@ -2209,12 +3089,13 @@ function rankHits(hits, config, query) {
2209
3089
  function trimByScoreGap(results, config) {
2210
3090
  if (results.length === 0) return results;
2211
3091
  const threshold = config.ranking.scoreGapThreshold;
2212
- const minScore = config.ranking.minScore;
2213
- if (minScore > 0 && results.length > 0) {
2214
- const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
2215
- const mid = Math.floor(sortedScores.length / 2);
2216
- const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
2217
- if (median < minScore) return [];
3092
+ const minScoreRatio = config.ranking.minScoreRatio;
3093
+ if (minScoreRatio > 0 && results.length > 0) {
3094
+ const topScore = results[0].pageScore;
3095
+ if (Number.isFinite(topScore) && topScore > 0) {
3096
+ const minThreshold = topScore * minScoreRatio;
3097
+ results = results.filter((r) => r.pageScore >= minThreshold);
3098
+ }
2218
3099
  }
2219
3100
  if (threshold > 0 && results.length > 1) {
2220
3101
  for (let i = 1; i < results.length; i++) {
@@ -2284,61 +3165,99 @@ function aggregateByPage(ranked, config) {
2284
3165
  return Number.isNaN(delta) ? 0 : delta;
2285
3166
  });
2286
3167
  }
2287
- function mergePageAndChunkResults(pageHits, rankedChunks, config) {
2288
- if (pageHits.length === 0) return rankedChunks;
2289
- const w = config.search.pageSearchWeight;
2290
- const pageScoreMap = /* @__PURE__ */ new Map();
2291
- for (const ph of pageHits) {
2292
- pageScoreMap.set(ph.url, ph);
2293
- }
2294
- const pagesWithChunks = /* @__PURE__ */ new Set();
2295
- const merged = rankedChunks.map((ranked) => {
2296
- const url = ranked.hit.metadata.url;
2297
- const pageHit = pageScoreMap.get(url);
2298
- if (pageHit) {
2299
- pagesWithChunks.add(url);
2300
- const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
2301
- return {
2302
- hit: ranked.hit,
2303
- finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
2304
- };
3168
+ function rankPageHits(pageHits, config, query, debug) {
3169
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
3170
+ const titleMatchWeight = config.ranking.weights.titleMatch;
3171
+ return pageHits.map((hit) => {
3172
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
3173
+ let score = baseScore;
3174
+ let incomingLinkBoostValue = 0;
3175
+ if (config.ranking.enableIncomingLinkBoost) {
3176
+ const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.incomingLinks));
3177
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
3178
+ score += incomingLinkBoostValue;
2305
3179
  }
2306
- return ranked;
2307
- });
2308
- for (const [url, pageHit] of pageScoreMap) {
2309
- if (pagesWithChunks.has(url)) continue;
2310
- const syntheticScore = pageHit.score * w;
2311
- const syntheticHit = {
2312
- id: `page:${url}`,
2313
- score: pageHit.score,
2314
- metadata: {
2315
- projectId: "",
2316
- scopeName: "",
2317
- url: pageHit.url,
2318
- path: pageHit.url,
2319
- title: pageHit.title,
2320
- sectionTitle: "",
2321
- headingPath: [],
2322
- snippet: pageHit.description || pageHit.title,
2323
- chunkText: pageHit.description || pageHit.title,
2324
- ordinal: 0,
2325
- contentHash: "",
2326
- depth: pageHit.depth,
2327
- incomingLinks: pageHit.incomingLinks,
2328
- routeFile: pageHit.routeFile,
2329
- tags: pageHit.tags
3180
+ let depthBoostValue = 0;
3181
+ if (config.ranking.enableDepthBoost) {
3182
+ const depthBoost = 1 / (1 + nonNegativeOrZero(hit.depth));
3183
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
3184
+ score += depthBoostValue;
3185
+ }
3186
+ let titleMatchBoostValue = 0;
3187
+ if (normalizedQuery && titleMatchWeight > 0) {
3188
+ const normalizedTitle = normalizeForTitleMatch(hit.title);
3189
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
3190
+ titleMatchBoostValue = titleMatchWeight;
3191
+ score += titleMatchBoostValue;
3192
+ }
3193
+ }
3194
+ let freshnessBoostValue = 0;
3195
+ if (config.ranking.enableFreshnessBoost) {
3196
+ const publishedAt = hit.publishedAt;
3197
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
3198
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
3199
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
3200
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
3201
+ score += freshnessBoostValue;
2330
3202
  }
3203
+ }
3204
+ const pageWeight = findPageWeight(hit.url, config.ranking.pageWeights);
3205
+ if (pageWeight !== 1) {
3206
+ score *= pageWeight;
3207
+ }
3208
+ const result = {
3209
+ url: hit.url,
3210
+ title: hit.title,
3211
+ description: hit.description,
3212
+ routeFile: hit.routeFile,
3213
+ depth: hit.depth,
3214
+ incomingLinks: hit.incomingLinks,
3215
+ tags: hit.tags,
3216
+ baseScore,
3217
+ finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY,
3218
+ publishedAt: hit.publishedAt
2331
3219
  };
2332
- merged.push({
2333
- hit: syntheticHit,
2334
- finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
2335
- });
2336
- }
2337
- return merged.sort((a, b) => {
3220
+ if (debug) {
3221
+ result.breakdown = {
3222
+ baseScore,
3223
+ pageWeight,
3224
+ incomingLinkBoost: incomingLinkBoostValue,
3225
+ depthBoost: depthBoostValue,
3226
+ titleMatchBoost: titleMatchBoostValue,
3227
+ freshnessBoost: freshnessBoostValue
3228
+ };
3229
+ }
3230
+ return result;
3231
+ }).filter((p) => findPageWeight(p.url, config.ranking.pageWeights) !== 0).sort((a, b) => {
2338
3232
  const delta = b.finalScore - a.finalScore;
2339
3233
  return Number.isNaN(delta) ? 0 : delta;
2340
3234
  });
2341
3235
  }
3236
+ function trimPagesByScoreGap(results, config) {
3237
+ if (results.length === 0) return results;
3238
+ const threshold = config.ranking.scoreGapThreshold;
3239
+ const minScoreRatio = config.ranking.minScoreRatio;
3240
+ if (minScoreRatio > 0 && results.length > 0) {
3241
+ const topScore = results[0].finalScore;
3242
+ if (Number.isFinite(topScore) && topScore > 0) {
3243
+ const minThreshold = topScore * minScoreRatio;
3244
+ results = results.filter((r) => r.finalScore >= minThreshold);
3245
+ }
3246
+ }
3247
+ if (threshold > 0 && results.length > 1) {
3248
+ for (let i = 1; i < results.length; i++) {
3249
+ const prev = results[i - 1].finalScore;
3250
+ const current = results[i].finalScore;
3251
+ if (prev > 0) {
3252
+ const gap = (prev - current) / prev;
3253
+ if (gap >= threshold) {
3254
+ return results.slice(0, i);
3255
+ }
3256
+ }
3257
+ }
3258
+ }
3259
+ return results;
3260
+ }
2342
3261
 
2343
3262
  // src/utils/time.ts
2344
3263
  function nowIso() {
@@ -2348,6 +3267,85 @@ function hrTimeMs(start) {
2348
3267
  return Number(process.hrtime.bigint() - start) / 1e6;
2349
3268
  }
2350
3269
 
3270
+ // src/indexing/llms-txt.ts
3271
+ import fs8 from "fs/promises";
3272
+ import path10 from "path";
3273
+ function resolvePageUrl(pageUrl, baseUrl) {
3274
+ if (!baseUrl) return pageUrl;
3275
+ try {
3276
+ return new URL(pageUrl, baseUrl).href;
3277
+ } catch {
3278
+ return pageUrl;
3279
+ }
3280
+ }
3281
+ function generateLlmsTxt(pages, config) {
3282
+ const title = config.llmsTxt.title ?? config.project.id;
3283
+ const description = config.llmsTxt.description;
3284
+ const baseUrl = config.project.baseUrl;
3285
+ const lines = [`# ${title}`];
3286
+ if (description) {
3287
+ lines.push("", `> ${description}`);
3288
+ }
3289
+ const filtered = pages.filter(
3290
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
3291
+ );
3292
+ const sorted = [...filtered].sort((a, b) => {
3293
+ if (a.depth !== b.depth) return a.depth - b.depth;
3294
+ return b.incomingLinks - a.incomingLinks;
3295
+ });
3296
+ if (sorted.length > 0) {
3297
+ lines.push("", "## Pages", "");
3298
+ for (const page of sorted) {
3299
+ const url = resolvePageUrl(page.url, baseUrl);
3300
+ if (page.description) {
3301
+ lines.push(`- [${page.title}](${url}): ${page.description}`);
3302
+ } else {
3303
+ lines.push(`- [${page.title}](${url})`);
3304
+ }
3305
+ }
3306
+ }
3307
+ lines.push("");
3308
+ return lines.join("\n");
3309
+ }
3310
+ function generateLlmsFullTxt(pages, config) {
3311
+ const title = config.llmsTxt.title ?? config.project.id;
3312
+ const description = config.llmsTxt.description;
3313
+ const baseUrl = config.project.baseUrl;
3314
+ const lines = [`# ${title}`];
3315
+ if (description) {
3316
+ lines.push("", `> ${description}`);
3317
+ }
3318
+ const filtered = pages.filter(
3319
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
3320
+ );
3321
+ const sorted = [...filtered].sort((a, b) => {
3322
+ if (a.depth !== b.depth) return a.depth - b.depth;
3323
+ return b.incomingLinks - a.incomingLinks;
3324
+ });
3325
+ for (const page of sorted) {
3326
+ const url = resolvePageUrl(page.url, baseUrl);
3327
+ lines.push("", "---", "", `## [${page.title}](${url})`, "");
3328
+ lines.push(page.markdown.trim());
3329
+ }
3330
+ lines.push("");
3331
+ return lines.join("\n");
3332
+ }
3333
+ async function writeLlmsTxt(pages, config, cwd, logger3) {
3334
+ const outputPath = path10.resolve(cwd, config.llmsTxt.outputPath);
3335
+ const outputDir = path10.dirname(outputPath);
3336
+ await fs8.mkdir(outputDir, { recursive: true });
3337
+ const content = generateLlmsTxt(pages, config);
3338
+ await fs8.writeFile(outputPath, content, "utf8");
3339
+ logger3.info(`Generated llms.txt at ${config.llmsTxt.outputPath}`);
3340
+ if (config.llmsTxt.generateFull) {
3341
+ const fullPath = outputPath.replace(/\.txt$/, "-full.txt");
3342
+ const fullContent = generateLlmsFullTxt(pages, config);
3343
+ await fs8.writeFile(fullPath, fullContent, "utf8");
3344
+ const relativeFull = path10.relative(cwd, fullPath);
3345
+ logger3.info(`Generated llms-full.txt at ${relativeFull}`);
3346
+ }
3347
+ }
3348
+
2351
3349
  // src/indexing/pipeline.ts
2352
3350
  function buildPageSummary(page, maxChars = 3500) {
2353
3351
  const parts = [page.title];
@@ -2365,26 +3363,44 @@ function buildPageSummary(page, maxChars = 3500) {
2365
3363
  if (joined.length <= maxChars) return joined;
2366
3364
  return joined.slice(0, maxChars).trim();
2367
3365
  }
3366
+ function buildPageContentHash(page) {
3367
+ const parts = [
3368
+ page.title,
3369
+ page.description ?? "",
3370
+ (page.keywords ?? []).slice().sort().join(","),
3371
+ page.tags.slice().sort().join(","),
3372
+ page.markdown,
3373
+ String(page.outgoingLinks),
3374
+ String(page.publishedAt ?? ""),
3375
+ page.incomingAnchorText ?? "",
3376
+ (page.outgoingLinkUrls ?? []).slice().sort().join(","),
3377
+ page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : ""
3378
+ ];
3379
+ return sha256(parts.join("|"));
3380
+ }
2368
3381
  var IndexPipeline = class _IndexPipeline {
2369
3382
  cwd;
2370
3383
  config;
2371
3384
  store;
2372
3385
  logger;
3386
+ hooks;
2373
3387
  constructor(options) {
2374
3388
  this.cwd = options.cwd;
2375
3389
  this.config = options.config;
2376
3390
  this.store = options.store;
2377
3391
  this.logger = options.logger;
3392
+ this.hooks = options.hooks;
2378
3393
  }
2379
3394
  static async create(options = {}) {
2380
- const cwd = path10.resolve(options.cwd ?? process.cwd());
3395
+ const cwd = path11.resolve(options.cwd ?? process.cwd());
2381
3396
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
2382
3397
  const store = options.store ?? await createUpstashStore(config);
2383
3398
  return new _IndexPipeline({
2384
3399
  cwd,
2385
3400
  config,
2386
3401
  store,
2387
- logger: options.logger ?? new Logger()
3402
+ logger: options.logger ?? new Logger(),
3403
+ hooks: options.hooks ?? {}
2388
3404
  });
2389
3405
  }
2390
3406
  getConfig() {
@@ -2405,7 +3421,7 @@ var IndexPipeline = class _IndexPipeline {
2405
3421
  const scope = resolveScope(this.config, options.scopeOverride);
2406
3422
  ensureStateDirs(this.cwd, this.config.state.dir, scope);
2407
3423
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
2408
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
3424
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-vector)`);
2409
3425
  if (options.force) {
2410
3426
  this.logger.info("Force mode enabled \u2014 full rebuild");
2411
3427
  }
@@ -2413,9 +3429,9 @@ var IndexPipeline = class _IndexPipeline {
2413
3429
  this.logger.info("Dry run \u2014 no writes will be performed");
2414
3430
  }
2415
3431
  const manifestStart = stageStart();
2416
- const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
3432
+ const existingPageHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getPageHashes(scope);
2417
3433
  stageEnd("manifest", manifestStart);
2418
- this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
3434
+ this.logger.debug(`Manifest: ${existingPageHashes.size} existing page hashes loaded`);
2419
3435
  const sourceStart = stageStart();
2420
3436
  this.logger.info(`Loading pages (source: ${sourceMode})...`);
2421
3437
  let sourcePages;
@@ -2451,11 +3467,11 @@ var IndexPipeline = class _IndexPipeline {
2451
3467
  let robotsRules = null;
2452
3468
  if (sourceMode === "static-output") {
2453
3469
  robotsRules = await loadRobotsTxtFromDir(
2454
- path10.resolve(this.cwd, this.config.source.staticOutputDir)
3470
+ path11.resolve(this.cwd, this.config.source.staticOutputDir)
2455
3471
  );
2456
3472
  } else if (sourceMode === "build" && this.config.source.build) {
2457
3473
  robotsRules = await loadRobotsTxtFromDir(
2458
- path10.resolve(this.cwd, this.config.source.build.outputDir)
3474
+ path11.resolve(this.cwd, this.config.source.build.outputDir)
2459
3475
  );
2460
3476
  } else if (sourceMode === "crawl" && this.config.source.crawl) {
2461
3477
  robotsRules = await fetchRobotsTxt(this.config.source.crawl.baseUrl);
@@ -2492,11 +3508,61 @@ var IndexPipeline = class _IndexPipeline {
2492
3508
  );
2493
3509
  continue;
2494
3510
  }
2495
- extractedPages.push(extracted);
3511
+ if (sourcePage.tags && sourcePage.tags.length > 0) {
3512
+ extracted.tags = [.../* @__PURE__ */ new Set([...extracted.tags, ...sourcePage.tags])];
3513
+ }
3514
+ let accepted;
3515
+ if (this.hooks.transformPage) {
3516
+ const transformed = await this.hooks.transformPage(extracted);
3517
+ if (transformed === null) {
3518
+ this.logger.debug(`Page ${sourcePage.url} skipped by transformPage hook`);
3519
+ continue;
3520
+ }
3521
+ accepted = transformed;
3522
+ } else {
3523
+ accepted = extracted;
3524
+ }
3525
+ extractedPages.push(accepted);
2496
3526
  this.logger.event("page_extracted", {
2497
- url: extracted.url
3527
+ url: accepted.url
2498
3528
  });
2499
3529
  }
3530
+ const customRecords = options.customRecords ?? [];
3531
+ if (customRecords.length > 0) {
3532
+ this.logger.info(`Processing ${customRecords.length} custom record${customRecords.length === 1 ? "" : "s"}...`);
3533
+ for (const record of customRecords) {
3534
+ const normalizedUrl = normalizeUrlPath(record.url);
3535
+ const normalized = normalizeMarkdown(record.content);
3536
+ if (!normalized.trim()) {
3537
+ this.logger.warn(`Custom record ${normalizedUrl} has empty content and was skipped.`);
3538
+ continue;
3539
+ }
3540
+ const urlTags = normalizedUrl.split("/").filter(Boolean).slice(0, 1);
3541
+ const tags = record.tags ? [.../* @__PURE__ */ new Set([...urlTags, ...record.tags])] : urlTags;
3542
+ const extracted = {
3543
+ url: normalizedUrl,
3544
+ title: record.title,
3545
+ markdown: normalized,
3546
+ outgoingLinks: [],
3547
+ noindex: false,
3548
+ tags,
3549
+ weight: record.weight
3550
+ };
3551
+ let accepted;
3552
+ if (this.hooks.transformPage) {
3553
+ const transformed = await this.hooks.transformPage(extracted);
3554
+ if (transformed === null) {
3555
+ this.logger.debug(`Custom record ${normalizedUrl} skipped by transformPage hook`);
3556
+ continue;
3557
+ }
3558
+ accepted = transformed;
3559
+ } else {
3560
+ accepted = extracted;
3561
+ }
3562
+ extractedPages.push(accepted);
3563
+ this.logger.event("page_extracted", { url: accepted.url, custom: true });
3564
+ }
3565
+ }
2500
3566
  extractedPages.sort((a, b) => a.url.localeCompare(b.url));
2501
3567
  const uniquePages = [];
2502
3568
  const seenUrls = /* @__PURE__ */ new Set();
@@ -2529,15 +3595,28 @@ var IndexPipeline = class _IndexPipeline {
2529
3595
  const linkStart = stageStart();
2530
3596
  const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
2531
3597
  const incomingLinkCount = /* @__PURE__ */ new Map();
3598
+ const incomingAnchorTexts = /* @__PURE__ */ new Map();
2532
3599
  for (const page of indexablePages) {
2533
3600
  incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
2534
3601
  }
2535
3602
  for (const page of indexablePages) {
2536
- for (const outgoing of page.outgoingLinks) {
3603
+ const seenForCount = /* @__PURE__ */ new Set();
3604
+ const seenForAnchor = /* @__PURE__ */ new Set();
3605
+ for (const { url: outgoing, anchorText } of page.outgoingLinks) {
2537
3606
  if (!pageSet.has(outgoing)) {
2538
3607
  continue;
2539
3608
  }
2540
- incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
3609
+ if (!seenForCount.has(outgoing)) {
3610
+ seenForCount.add(outgoing);
3611
+ incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
3612
+ }
3613
+ if (anchorText && !seenForAnchor.has(outgoing)) {
3614
+ seenForAnchor.add(outgoing);
3615
+ if (!incomingAnchorTexts.has(outgoing)) {
3616
+ incomingAnchorTexts.set(outgoing, /* @__PURE__ */ new Set());
3617
+ }
3618
+ incomingAnchorTexts.get(outgoing).add(anchorText);
3619
+ }
2541
3620
  }
2542
3621
  }
2543
3622
  stageEnd("links", linkStart);
@@ -2556,6 +3635,15 @@ var IndexPipeline = class _IndexPipeline {
2556
3635
  });
2557
3636
  }
2558
3637
  }
3638
+ for (const record of customRecords) {
3639
+ const normalizedUrl = normalizeUrlPath(record.url);
3640
+ if (!precomputedRoutes.has(normalizedUrl)) {
3641
+ precomputedRoutes.set(normalizedUrl, {
3642
+ routeFile: "",
3643
+ routeResolution: "exact"
3644
+ });
3645
+ }
3646
+ }
2559
3647
  for (const page of indexablePages) {
2560
3648
  const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
2561
3649
  if (routeMatch.routeResolution === "best-effort") {
@@ -2573,6 +3661,17 @@ var IndexPipeline = class _IndexPipeline {
2573
3661
  } else {
2574
3662
  routeExact += 1;
2575
3663
  }
3664
+ const anchorSet = incomingAnchorTexts.get(page.url);
3665
+ let incomingAnchorText;
3666
+ if (anchorSet && anchorSet.size > 0) {
3667
+ let joined = "";
3668
+ for (const phrase of anchorSet) {
3669
+ const next = joined ? `${joined} ${phrase}` : phrase;
3670
+ if (next.length > 500) break;
3671
+ joined = next;
3672
+ }
3673
+ incomingAnchorText = joined || void 0;
3674
+ }
2576
3675
  const indexedPage = {
2577
3676
  url: page.url,
2578
3677
  title: page.title,
@@ -2582,40 +3681,113 @@ var IndexPipeline = class _IndexPipeline {
2582
3681
  generatedAt: nowIso(),
2583
3682
  incomingLinks: incomingLinkCount.get(page.url) ?? 0,
2584
3683
  outgoingLinks: page.outgoingLinks.length,
3684
+ outgoingLinkUrls: page.outgoingLinks.map((l) => typeof l === "string" ? l : l.url),
2585
3685
  depth: getUrlDepth(page.url),
2586
3686
  tags: page.tags,
2587
3687
  markdown: page.markdown,
2588
3688
  description: page.description,
2589
- keywords: page.keywords
3689
+ keywords: page.keywords,
3690
+ publishedAt: page.publishedAt,
3691
+ incomingAnchorText,
3692
+ meta: page.meta
2590
3693
  };
2591
3694
  pages.push(indexedPage);
2592
3695
  this.logger.event("page_indexed", { url: page.url });
2593
3696
  }
3697
+ const pageRecords = pages.map((p) => {
3698
+ const summary = buildPageSummary(p);
3699
+ return {
3700
+ url: p.url,
3701
+ title: p.title,
3702
+ markdown: p.markdown,
3703
+ projectId: scope.projectId,
3704
+ scopeName: scope.scopeName,
3705
+ routeFile: p.routeFile,
3706
+ routeResolution: p.routeResolution,
3707
+ incomingLinks: p.incomingLinks,
3708
+ outgoingLinks: p.outgoingLinks,
3709
+ outgoingLinkUrls: p.outgoingLinkUrls,
3710
+ depth: p.depth,
3711
+ tags: p.tags,
3712
+ indexedAt: p.generatedAt,
3713
+ summary,
3714
+ description: p.description,
3715
+ keywords: p.keywords,
3716
+ contentHash: buildPageContentHash(p),
3717
+ publishedAt: p.publishedAt,
3718
+ meta: p.meta
3719
+ };
3720
+ });
3721
+ const currentPageUrls = new Set(pageRecords.map((r) => r.url));
3722
+ const changedPages = pageRecords.filter(
3723
+ (r) => !existingPageHashes.has(r.url) || existingPageHashes.get(r.url) !== r.contentHash
3724
+ );
3725
+ const deletedPageUrls = [...existingPageHashes.keys()].filter((url) => !currentPageUrls.has(url));
2594
3726
  if (!options.dryRun) {
2595
- const pageRecords = pages.map((p) => {
2596
- const summary = buildPageSummary(p);
2597
- return {
2598
- url: p.url,
2599
- title: p.title,
2600
- markdown: p.markdown,
2601
- projectId: scope.projectId,
2602
- scopeName: scope.scopeName,
2603
- routeFile: p.routeFile,
2604
- routeResolution: p.routeResolution,
2605
- incomingLinks: p.incomingLinks,
2606
- outgoingLinks: p.outgoingLinks,
2607
- depth: p.depth,
2608
- tags: p.tags,
2609
- indexedAt: p.generatedAt,
2610
- summary,
2611
- description: p.description,
2612
- keywords: p.keywords
2613
- };
2614
- });
2615
- await this.store.deletePages(scope);
2616
- await this.store.upsertPages(pageRecords, scope);
3727
+ if (options.force) {
3728
+ await this.store.deletePages(scope);
3729
+ this.logger.info(`Upserting ${pageRecords.length} page summaries...`);
3730
+ const pageDocs = pageRecords.map((r) => ({
3731
+ id: r.url,
3732
+ data: r.summary ?? r.title,
3733
+ metadata: {
3734
+ title: r.title,
3735
+ url: r.url,
3736
+ description: r.description ?? "",
3737
+ keywords: r.keywords ?? [],
3738
+ summary: r.summary ?? "",
3739
+ tags: r.tags,
3740
+ markdown: r.markdown,
3741
+ routeFile: r.routeFile,
3742
+ routeResolution: r.routeResolution,
3743
+ incomingLinks: r.incomingLinks,
3744
+ outgoingLinks: r.outgoingLinks,
3745
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
3746
+ depth: r.depth,
3747
+ indexedAt: r.indexedAt,
3748
+ contentHash: r.contentHash ?? "",
3749
+ publishedAt: r.publishedAt ?? null,
3750
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
3751
+ }
3752
+ }));
3753
+ await this.store.upsertPages(pageDocs, scope);
3754
+ } else {
3755
+ if (changedPages.length > 0) {
3756
+ this.logger.info(`Upserting ${changedPages.length} changed page summaries...`);
3757
+ const pageDocs = changedPages.map((r) => ({
3758
+ id: r.url,
3759
+ data: r.summary ?? r.title,
3760
+ metadata: {
3761
+ title: r.title,
3762
+ url: r.url,
3763
+ description: r.description ?? "",
3764
+ keywords: r.keywords ?? [],
3765
+ summary: r.summary ?? "",
3766
+ tags: r.tags,
3767
+ markdown: r.markdown,
3768
+ routeFile: r.routeFile,
3769
+ routeResolution: r.routeResolution,
3770
+ incomingLinks: r.incomingLinks,
3771
+ outgoingLinks: r.outgoingLinks,
3772
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
3773
+ depth: r.depth,
3774
+ indexedAt: r.indexedAt,
3775
+ contentHash: r.contentHash ?? "",
3776
+ publishedAt: r.publishedAt ?? null,
3777
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
3778
+ }
3779
+ }));
3780
+ await this.store.upsertPages(pageDocs, scope);
3781
+ }
3782
+ if (deletedPageUrls.length > 0) {
3783
+ await this.store.deletePagesByIds(deletedPageUrls, scope);
3784
+ }
3785
+ }
2617
3786
  }
3787
+ const pagesChanged = options.force ? pageRecords.length : changedPages.length;
3788
+ const pagesDeleted = deletedPageUrls.length;
2618
3789
  stageEnd("pages", pagesStart);
3790
+ this.logger.info(`Page changes: ${pagesChanged} changed/new, ${pagesDeleted} deleted, ${pageRecords.length - changedPages.length} unchanged`);
2619
3791
  this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
2620
3792
  const chunkStart = stageStart();
2621
3793
  this.logger.info("Chunking pages...");
@@ -2624,6 +3796,18 @@ var IndexPipeline = class _IndexPipeline {
2624
3796
  if (typeof maxChunks === "number") {
2625
3797
  chunks = chunks.slice(0, maxChunks);
2626
3798
  }
3799
+ if (this.hooks.transformChunk) {
3800
+ const transformed = [];
3801
+ for (const chunk of chunks) {
3802
+ const result = await this.hooks.transformChunk(chunk);
3803
+ if (result === null) {
3804
+ this.logger.debug(`Chunk ${chunk.chunkKey} skipped by transformChunk hook`);
3805
+ continue;
3806
+ }
3807
+ transformed.push(result);
3808
+ }
3809
+ chunks = transformed;
3810
+ }
2627
3811
  for (const chunk of chunks) {
2628
3812
  this.logger.event("chunked", {
2629
3813
  url: chunk.url,
@@ -2636,7 +3820,12 @@ var IndexPipeline = class _IndexPipeline {
2636
3820
  for (const chunk of chunks) {
2637
3821
  currentChunkMap.set(chunk.chunkKey, chunk);
2638
3822
  }
2639
- const changedChunks = chunks.filter((chunk) => {
3823
+ const chunkHashStart = stageStart();
3824
+ const currentChunkKeys = chunks.map((c) => c.chunkKey);
3825
+ const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.fetchContentHashesForKeys(currentChunkKeys, scope);
3826
+ stageEnd("chunk_hashes", chunkHashStart);
3827
+ this.logger.debug(`Fetched ${existingHashes.size} existing chunk hashes for ${currentChunkKeys.length} current keys`);
3828
+ let changedChunks = chunks.filter((chunk) => {
2640
3829
  if (options.force) {
2641
3830
  return true;
2642
3831
  }
@@ -2649,39 +3838,45 @@ var IndexPipeline = class _IndexPipeline {
2649
3838
  }
2650
3839
  return existingHash !== chunk.contentHash;
2651
3840
  });
2652
- const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
3841
+ const existingChunkIds = options.force ? /* @__PURE__ */ new Set() : await this.store.scanChunkIds(scope);
3842
+ const deletes = [...existingChunkIds].filter((chunkKey) => !currentChunkMap.has(chunkKey));
3843
+ if (this.hooks.beforeIndex) {
3844
+ changedChunks = await this.hooks.beforeIndex(changedChunks);
3845
+ }
2653
3846
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
2654
3847
  const upsertStart = stageStart();
2655
3848
  let documentsUpserted = 0;
2656
3849
  if (!options.dryRun && changedChunks.length > 0) {
2657
- this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
2658
- const UPSTASH_CONTENT_LIMIT = 4096;
2659
- const FIELD_OVERHEAD = 200;
2660
- const MAX_TEXT_CHARS = UPSTASH_CONTENT_LIMIT - FIELD_OVERHEAD;
3850
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
2661
3851
  const docs = changedChunks.map((chunk) => {
2662
- const title = chunk.title;
2663
- const sectionTitle = chunk.sectionTitle ?? "";
2664
- const url = chunk.url;
2665
- const tags = chunk.tags.join(",");
2666
- const headingPath = chunk.headingPath.join(" > ");
2667
- const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
2668
- const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
2669
- const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
3852
+ const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
3853
+ if (embeddingText.length > 2e3) {
3854
+ this.logger.warn(
3855
+ `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
3856
+ );
3857
+ }
2670
3858
  return {
2671
3859
  id: chunk.chunkKey,
2672
- content: { title, sectionTitle, text, url, tags, headingPath },
3860
+ data: embeddingText,
2673
3861
  metadata: {
2674
- projectId: scope.projectId,
2675
- scopeName: scope.scopeName,
3862
+ url: chunk.url,
2676
3863
  path: chunk.path,
3864
+ title: chunk.title,
3865
+ sectionTitle: chunk.sectionTitle ?? "",
3866
+ headingPath: chunk.headingPath.join(" > "),
2677
3867
  snippet: chunk.snippet,
3868
+ chunkText: embeddingText,
3869
+ tags: chunk.tags,
2678
3870
  ordinal: chunk.ordinal,
2679
3871
  contentHash: chunk.contentHash,
2680
3872
  depth: chunk.depth,
2681
3873
  incomingLinks: chunk.incomingLinks,
2682
3874
  routeFile: chunk.routeFile,
2683
3875
  description: chunk.description ?? "",
2684
- keywords: (chunk.keywords ?? []).join(",")
3876
+ keywords: chunk.keywords ?? [],
3877
+ publishedAt: chunk.publishedAt ?? null,
3878
+ incomingAnchorText: chunk.incomingAnchorText ?? "",
3879
+ ...chunk.meta && Object.keys(chunk.meta).length > 0 ? { meta: chunk.meta } : {}
2685
3880
  }
2686
3881
  };
2687
3882
  });
@@ -2699,9 +3894,16 @@ var IndexPipeline = class _IndexPipeline {
2699
3894
  } else {
2700
3895
  this.logger.info("No chunks to upsert \u2014 all up to date");
2701
3896
  }
3897
+ if (this.config.llmsTxt.enable && !options.dryRun) {
3898
+ const llmsStart = stageStart();
3899
+ await writeLlmsTxt(pages, this.config, this.cwd, this.logger);
3900
+ stageEnd("llms_txt", llmsStart);
3901
+ }
2702
3902
  this.logger.info("Done.");
2703
- return {
3903
+ const stats = {
2704
3904
  pagesProcessed: pages.length,
3905
+ pagesChanged,
3906
+ pagesDeleted,
2705
3907
  chunksTotal: chunks.length,
2706
3908
  chunksChanged: changedChunks.length,
2707
3909
  documentsUpserted,
@@ -2710,10 +3912,15 @@ var IndexPipeline = class _IndexPipeline {
2710
3912
  routeBestEffort,
2711
3913
  stageTimingsMs
2712
3914
  };
3915
+ if (this.hooks.afterIndex) {
3916
+ await this.hooks.afterIndex(stats);
3917
+ }
3918
+ return stats;
2713
3919
  }
2714
3920
  };
2715
3921
 
2716
3922
  // src/mcp/server.ts
3923
+ import { createHash as createHash2, timingSafeEqual } from "crypto";
2717
3924
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2718
3925
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
2719
3926
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
@@ -2721,16 +3928,139 @@ import { createMcpExpressApp } from "@modelcontextprotocol/sdk/server/express.js
2721
3928
  import { z as z3 } from "zod";
2722
3929
 
2723
3930
  // src/search/engine.ts
2724
- import path11 from "path";
3931
+ import path12 from "path";
2725
3932
  import { z as z2 } from "zod";
3933
+
3934
+ // src/search/related-pages.ts
3935
+ function diceScore(urlA, urlB) {
3936
+ const segmentsA = urlA.split("/").filter(Boolean);
3937
+ const segmentsB = urlB.split("/").filter(Boolean);
3938
+ if (segmentsA.length === 0 && segmentsB.length === 0) return 1;
3939
+ if (segmentsA.length === 0 || segmentsB.length === 0) return 0;
3940
+ let shared = 0;
3941
+ const minLen = Math.min(segmentsA.length, segmentsB.length);
3942
+ for (let i = 0; i < minLen; i++) {
3943
+ if (segmentsA[i] === segmentsB[i]) {
3944
+ shared++;
3945
+ } else {
3946
+ break;
3947
+ }
3948
+ }
3949
+ return 2 * shared / (segmentsA.length + segmentsB.length);
3950
+ }
3951
+ function compositeScore(isLinked, dice, semantic) {
3952
+ return (isLinked ? 0.5 : 0) + 0.3 * dice + 0.2 * semantic;
3953
+ }
3954
+ function dominantRelationshipType(isOutgoing, isIncoming, dice) {
3955
+ if (isOutgoing) return "outgoing_link";
3956
+ if (isIncoming) return "incoming_link";
3957
+ if (dice > 0.4) return "sibling";
3958
+ return "semantic";
3959
+ }
3960
+
3961
+ // src/search/engine.ts
3962
+ var rankingOverridesSchema = z2.object({
3963
+ ranking: z2.object({
3964
+ enableIncomingLinkBoost: z2.boolean().optional(),
3965
+ enableDepthBoost: z2.boolean().optional(),
3966
+ aggregationCap: z2.number().int().positive().optional(),
3967
+ aggregationDecay: z2.number().min(0).max(1).optional(),
3968
+ minChunkScoreRatio: z2.number().min(0).max(1).optional(),
3969
+ minScoreRatio: z2.number().min(0).max(1).optional(),
3970
+ scoreGapThreshold: z2.number().min(0).max(1).optional(),
3971
+ weights: z2.object({
3972
+ incomingLinks: z2.number().optional(),
3973
+ depth: z2.number().optional(),
3974
+ aggregation: z2.number().optional(),
3975
+ titleMatch: z2.number().optional()
3976
+ }).optional()
3977
+ }).optional(),
3978
+ search: z2.object({
3979
+ pageSearchWeight: z2.number().min(0).max(1).optional()
3980
+ }).optional()
3981
+ }).optional();
2726
3982
  var requestSchema = z2.object({
2727
3983
  q: z2.string().trim().min(1),
2728
3984
  topK: z2.number().int().positive().max(100).optional(),
2729
3985
  scope: z2.string().optional(),
2730
3986
  pathPrefix: z2.string().optional(),
2731
3987
  tags: z2.array(z2.string()).optional(),
2732
- groupBy: z2.enum(["page", "chunk"]).optional()
3988
+ filters: z2.record(z2.string(), z2.union([z2.string(), z2.number(), z2.boolean()])).optional(),
3989
+ groupBy: z2.enum(["page", "chunk"]).optional(),
3990
+ maxSubResults: z2.number().int().positive().max(20).optional(),
3991
+ debug: z2.boolean().optional(),
3992
+ rankingOverrides: rankingOverridesSchema
2733
3993
  });
3994
+ var MAX_SITE_STRUCTURE_PAGES = 2e3;
3995
+ function makeNode(url, depth) {
3996
+ return { url, title: "", depth, routeFile: "", isIndexed: false, childCount: 0, children: [] };
3997
+ }
3998
+ function buildTree(pages, pathPrefix) {
3999
+ const nodeMap = /* @__PURE__ */ new Map();
4000
+ const root = makeNode("/", 0);
4001
+ nodeMap.set("/", root);
4002
+ for (const page of pages) {
4003
+ const normalized = normalizeUrlPath(page.url);
4004
+ const segments = normalized.split("/").filter(Boolean);
4005
+ if (segments.length === 0) {
4006
+ root.title = page.title;
4007
+ root.routeFile = page.routeFile;
4008
+ root.isIndexed = true;
4009
+ continue;
4010
+ }
4011
+ for (let i = 1; i <= segments.length; i++) {
4012
+ const partialUrl = "/" + segments.slice(0, i).join("/");
4013
+ if (!nodeMap.has(partialUrl)) {
4014
+ nodeMap.set(partialUrl, makeNode(partialUrl, i));
4015
+ }
4016
+ }
4017
+ const node = nodeMap.get(normalized);
4018
+ node.title = page.title;
4019
+ node.routeFile = page.routeFile;
4020
+ node.isIndexed = true;
4021
+ }
4022
+ for (const [url, node] of nodeMap) {
4023
+ if (url === "/") continue;
4024
+ const segments = url.split("/").filter(Boolean);
4025
+ const parentUrl = segments.length === 1 ? "/" : "/" + segments.slice(0, -1).join("/");
4026
+ const parent = nodeMap.get(parentUrl) ?? root;
4027
+ parent.children.push(node);
4028
+ }
4029
+ const sortAndCount = (node) => {
4030
+ node.children.sort((a, b) => a.url.localeCompare(b.url));
4031
+ node.childCount = node.children.length;
4032
+ for (const child of node.children) {
4033
+ sortAndCount(child);
4034
+ }
4035
+ };
4036
+ sortAndCount(root);
4037
+ if (pathPrefix) {
4038
+ const normalizedPrefix = normalizeUrlPath(pathPrefix);
4039
+ const subtreeRoot = nodeMap.get(normalizedPrefix);
4040
+ if (subtreeRoot) {
4041
+ return subtreeRoot;
4042
+ }
4043
+ return makeNode(normalizedPrefix, normalizedPrefix.split("/").filter(Boolean).length);
4044
+ }
4045
+ return root;
4046
+ }
4047
+ function mergeRankingOverrides(base, overrides) {
4048
+ return {
4049
+ ...base,
4050
+ search: {
4051
+ ...base.search,
4052
+ ...overrides.search
4053
+ },
4054
+ ranking: {
4055
+ ...base.ranking,
4056
+ ...overrides.ranking,
4057
+ weights: {
4058
+ ...base.ranking.weights,
4059
+ ...overrides.ranking?.weights
4060
+ }
4061
+ }
4062
+ };
4063
+ }
2734
4064
  var SearchEngine = class _SearchEngine {
2735
4065
  cwd;
2736
4066
  config;
@@ -2741,7 +4071,7 @@ var SearchEngine = class _SearchEngine {
2741
4071
  this.store = options.store;
2742
4072
  }
2743
4073
  static async create(options = {}) {
2744
- const cwd = path11.resolve(options.cwd ?? process.cwd());
4074
+ const cwd = path12.resolve(options.cwd ?? process.cwd());
2745
4075
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
2746
4076
  const store = options.store ?? await createUpstashStore(config);
2747
4077
  return new _SearchEngine({
@@ -2760,125 +4090,203 @@ var SearchEngine = class _SearchEngine {
2760
4090
  }
2761
4091
  const input = parsed.data;
2762
4092
  const totalStart = process.hrtime.bigint();
4093
+ const effectiveConfig = input.debug && input.rankingOverrides ? mergeRankingOverrides(this.config, input.rankingOverrides) : this.config;
2763
4094
  const resolvedScope = resolveScope(this.config, input.scope);
2764
4095
  const topK = input.topK ?? 10;
4096
+ const maxSubResults = input.maxSubResults ?? 5;
2765
4097
  const groupByPage = (input.groupBy ?? "page") === "page";
2766
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
2767
- const filterParts = [];
2768
- if (input.pathPrefix) {
2769
- const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
2770
- filterParts.push(`url GLOB '${prefix}*'`);
2771
- }
2772
- if (input.tags && input.tags.length > 0) {
2773
- for (const tag of input.tags) {
2774
- filterParts.push(`tags GLOB '*${tag}*'`);
4098
+ const queryText = input.q;
4099
+ const pathPrefix = input.pathPrefix ? input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}` : void 0;
4100
+ const filterTags = input.tags && input.tags.length > 0 ? input.tags : void 0;
4101
+ const metaFilterStr = input.filters && Object.keys(input.filters).length > 0 ? buildMetaFilterString(input.filters) : "";
4102
+ const metaFilter = metaFilterStr || void 0;
4103
+ const applyPagePostFilters = (hits) => {
4104
+ let filtered = hits;
4105
+ if (pathPrefix) {
4106
+ filtered = filtered.filter((h) => h.url.startsWith(pathPrefix));
2775
4107
  }
2776
- }
2777
- const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
2778
- const useDualSearch = this.config.search.dualSearch && groupByPage;
4108
+ if (filterTags) {
4109
+ filtered = filtered.filter(
4110
+ (h) => filterTags.every((tag) => h.tags.includes(tag))
4111
+ );
4112
+ }
4113
+ return filtered;
4114
+ };
4115
+ const applyChunkPostFilters = (hits) => {
4116
+ let filtered = hits;
4117
+ if (filterTags) {
4118
+ filtered = filtered.filter(
4119
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
4120
+ );
4121
+ }
4122
+ return filtered;
4123
+ };
2779
4124
  const searchStart = process.hrtime.bigint();
2780
- let ranked;
2781
- if (useDualSearch) {
2782
- const chunkLimit = Math.max(topK * 10, 100);
2783
- const pageLimit = 20;
2784
- const [pageHits, chunkHits] = await Promise.all([
2785
- this.store.searchPages(
2786
- input.q,
2787
- {
2788
- limit: pageLimit,
2789
- semanticWeight: this.config.search.semanticWeight,
2790
- inputEnrichment: this.config.search.inputEnrichment,
2791
- filter
2792
- },
2793
- resolvedScope
2794
- ),
2795
- this.store.search(
2796
- input.q,
2797
- {
2798
- limit: chunkLimit,
2799
- semanticWeight: this.config.search.semanticWeight,
2800
- inputEnrichment: this.config.search.inputEnrichment,
2801
- reranking: false,
2802
- filter
2803
- },
4125
+ if (groupByPage) {
4126
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
4127
+ const pageLimit = Math.max(topK * 2, 20);
4128
+ const pageHits = await this.store.searchPagesByText(
4129
+ queryText,
4130
+ { limit: pageLimit * fetchMultiplier, filter: metaFilter },
4131
+ resolvedScope
4132
+ );
4133
+ const filteredPages = applyPagePostFilters(pageHits);
4134
+ let rankedPages = rankPageHits(filteredPages, effectiveConfig, input.q, input.debug);
4135
+ rankedPages = trimPagesByScoreGap(rankedPages, effectiveConfig);
4136
+ const topPages = rankedPages.slice(0, topK);
4137
+ const chunkPromises = topPages.map(
4138
+ (page) => this.store.searchChunksByUrl(
4139
+ queryText,
4140
+ page.url,
4141
+ { limit: maxSubResults, filter: metaFilter },
2804
4142
  resolvedScope
2805
- )
2806
- ]);
2807
- const rankedChunks = rankHits(chunkHits, this.config, input.q);
2808
- ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
4143
+ ).then((chunks) => applyChunkPostFilters(chunks))
4144
+ );
4145
+ const allChunks = await Promise.all(chunkPromises);
4146
+ const searchMs = hrTimeMs(searchStart);
4147
+ const results = this.buildPageFirstResults(topPages, allChunks, input.q, input.debug, maxSubResults);
4148
+ return {
4149
+ q: input.q,
4150
+ scope: resolvedScope.scopeName,
4151
+ results,
4152
+ meta: {
4153
+ timingsMs: {
4154
+ search: Math.round(searchMs),
4155
+ total: Math.round(hrTimeMs(totalStart))
4156
+ }
4157
+ }
4158
+ };
2809
4159
  } else {
4160
+ const candidateK = Math.max(50, topK);
4161
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
2810
4162
  const hits = await this.store.search(
2811
- input.q,
2812
- {
2813
- limit: candidateK,
2814
- semanticWeight: this.config.search.semanticWeight,
2815
- inputEnrichment: this.config.search.inputEnrichment,
2816
- reranking: this.config.search.reranking,
2817
- filter
2818
- },
4163
+ queryText,
4164
+ { limit: candidateK * fetchMultiplier, filter: metaFilter },
2819
4165
  resolvedScope
2820
4166
  );
2821
- ranked = rankHits(hits, this.config, input.q);
2822
- }
2823
- const searchMs = hrTimeMs(searchStart);
2824
- const results = this.buildResults(ranked, topK, groupByPage, input.q);
2825
- return {
2826
- q: input.q,
2827
- scope: resolvedScope.scopeName,
2828
- results,
2829
- meta: {
2830
- timingsMs: {
2831
- search: Math.round(searchMs),
2832
- total: Math.round(hrTimeMs(totalStart))
4167
+ let filtered = hits;
4168
+ if (pathPrefix) {
4169
+ filtered = filtered.filter((h) => h.metadata.url.startsWith(pathPrefix));
4170
+ }
4171
+ if (filterTags) {
4172
+ filtered = filtered.filter(
4173
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
4174
+ );
4175
+ }
4176
+ const ranked = rankHits(filtered, effectiveConfig, input.q, input.debug);
4177
+ const searchMs = hrTimeMs(searchStart);
4178
+ const results = this.buildResults(ranked, topK, false, maxSubResults, input.q, input.debug, effectiveConfig);
4179
+ return {
4180
+ q: input.q,
4181
+ scope: resolvedScope.scopeName,
4182
+ results,
4183
+ meta: {
4184
+ timingsMs: {
4185
+ search: Math.round(searchMs),
4186
+ total: Math.round(hrTimeMs(totalStart))
4187
+ }
2833
4188
  }
4189
+ };
4190
+ }
4191
+ }
4192
+ buildPageFirstResults(rankedPages, allChunks, query, debug, maxSubResults = 5) {
4193
+ return rankedPages.map((page, i) => {
4194
+ const chunks = allChunks[i] ?? [];
4195
+ const bestChunk = chunks[0];
4196
+ const snippet = bestChunk ? query ? queryAwareExcerpt(bestChunk.metadata.chunkText, query) : toSnippet(bestChunk.metadata.chunkText) : page.description || page.title;
4197
+ const result = {
4198
+ url: page.url,
4199
+ title: page.title,
4200
+ sectionTitle: bestChunk?.metadata.sectionTitle || void 0,
4201
+ snippet,
4202
+ chunkText: bestChunk?.metadata.chunkText || void 0,
4203
+ score: Number(page.finalScore.toFixed(6)),
4204
+ routeFile: page.routeFile,
4205
+ chunks: chunks.length > 0 ? chunks.slice(0, maxSubResults).map((c) => ({
4206
+ sectionTitle: c.metadata.sectionTitle || void 0,
4207
+ snippet: query ? queryAwareExcerpt(c.metadata.chunkText, query) : toSnippet(c.metadata.chunkText),
4208
+ chunkText: c.metadata.chunkText || void 0,
4209
+ headingPath: c.metadata.headingPath,
4210
+ score: Number(c.score.toFixed(6))
4211
+ })) : void 0
4212
+ };
4213
+ if (debug && page.breakdown) {
4214
+ result.breakdown = {
4215
+ baseScore: page.breakdown.baseScore,
4216
+ incomingLinkBoost: page.breakdown.incomingLinkBoost,
4217
+ depthBoost: page.breakdown.depthBoost,
4218
+ titleMatchBoost: page.breakdown.titleMatchBoost,
4219
+ freshnessBoost: page.breakdown.freshnessBoost,
4220
+ anchorTextMatchBoost: 0
4221
+ };
2834
4222
  }
2835
- };
4223
+ return result;
4224
+ });
2836
4225
  }
2837
- ensureSnippet(hit) {
4226
+ ensureSnippet(hit, query) {
4227
+ const chunkText = hit.hit.metadata.chunkText;
4228
+ if (query && chunkText) return queryAwareExcerpt(chunkText, query);
2838
4229
  const snippet = hit.hit.metadata.snippet;
2839
4230
  if (snippet && snippet.length >= 30) return snippet;
2840
- const chunkText = hit.hit.metadata.chunkText;
2841
4231
  if (chunkText) return toSnippet(chunkText);
2842
4232
  return snippet || "";
2843
4233
  }
2844
- buildResults(ordered, topK, groupByPage, _query) {
4234
+ buildResults(ordered, topK, groupByPage, maxSubResults, query, debug, config) {
4235
+ const cfg = config ?? this.config;
2845
4236
  if (groupByPage) {
2846
- let pages = aggregateByPage(ordered, this.config);
2847
- pages = trimByScoreGap(pages, this.config);
2848
- const minRatio = this.config.ranking.minChunkScoreRatio;
4237
+ let pages = aggregateByPage(ordered, cfg);
4238
+ pages = trimByScoreGap(pages, cfg);
4239
+ const minRatio = cfg.ranking.minChunkScoreRatio;
2849
4240
  return pages.slice(0, topK).map((page) => {
2850
4241
  const bestScore = page.bestChunk.finalScore;
2851
4242
  const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
2852
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
2853
- return {
4243
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, maxSubResults);
4244
+ const result = {
2854
4245
  url: page.url,
2855
4246
  title: page.title,
2856
4247
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
2857
- snippet: this.ensureSnippet(page.bestChunk),
4248
+ snippet: this.ensureSnippet(page.bestChunk, query),
4249
+ chunkText: page.bestChunk.hit.metadata.chunkText || void 0,
2858
4250
  score: Number(page.pageScore.toFixed(6)),
2859
4251
  routeFile: page.routeFile,
2860
- chunks: meaningful.length > 1 ? meaningful.map((c) => ({
4252
+ chunks: meaningful.length >= 1 ? meaningful.map((c) => ({
2861
4253
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
2862
- snippet: this.ensureSnippet(c),
4254
+ snippet: this.ensureSnippet(c, query),
4255
+ chunkText: c.hit.metadata.chunkText || void 0,
2863
4256
  headingPath: c.hit.metadata.headingPath,
2864
4257
  score: Number(c.finalScore.toFixed(6))
2865
4258
  })) : void 0
2866
4259
  };
4260
+ if (debug && page.bestChunk.breakdown) {
4261
+ result.breakdown = page.bestChunk.breakdown;
4262
+ }
4263
+ return result;
2867
4264
  });
2868
4265
  } else {
2869
4266
  let filtered = ordered;
2870
- const minScore = this.config.ranking.minScore;
2871
- if (minScore > 0) {
2872
- filtered = ordered.filter((entry) => entry.finalScore >= minScore);
2873
- }
2874
- return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
2875
- url: hit.metadata.url,
2876
- title: hit.metadata.title,
2877
- sectionTitle: hit.metadata.sectionTitle || void 0,
2878
- snippet: this.ensureSnippet({ hit, finalScore }),
2879
- score: Number(finalScore.toFixed(6)),
2880
- routeFile: hit.metadata.routeFile
2881
- }));
4267
+ const minScoreRatio = cfg.ranking.minScoreRatio;
4268
+ if (minScoreRatio > 0 && ordered.length > 0) {
4269
+ const topScore = ordered[0].finalScore;
4270
+ if (Number.isFinite(topScore) && topScore > 0) {
4271
+ const threshold = topScore * minScoreRatio;
4272
+ filtered = ordered.filter((entry) => entry.finalScore >= threshold);
4273
+ }
4274
+ }
4275
+ return filtered.slice(0, topK).map(({ hit, finalScore, breakdown }) => {
4276
+ const result = {
4277
+ url: hit.metadata.url,
4278
+ title: hit.metadata.title,
4279
+ sectionTitle: hit.metadata.sectionTitle || void 0,
4280
+ snippet: this.ensureSnippet({ hit, finalScore }, query),
4281
+ chunkText: hit.metadata.chunkText || void 0,
4282
+ score: Number(finalScore.toFixed(6)),
4283
+ routeFile: hit.metadata.routeFile
4284
+ };
4285
+ if (debug && breakdown) {
4286
+ result.breakdown = breakdown;
4287
+ }
4288
+ return result;
4289
+ });
2882
4290
  }
2883
4291
  }
2884
4292
  async getPage(pathOrUrl, scope) {
@@ -2904,6 +4312,116 @@ var SearchEngine = class _SearchEngine {
2904
4312
  markdown: page.markdown
2905
4313
  };
2906
4314
  }
4315
+ async listPages(opts) {
4316
+ const resolvedScope = resolveScope(this.config, opts?.scope);
4317
+ const pathPrefix = opts?.pathPrefix ? opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}` : void 0;
4318
+ return this.store.listPages(resolvedScope, {
4319
+ cursor: opts?.cursor,
4320
+ limit: opts?.limit,
4321
+ pathPrefix
4322
+ });
4323
+ }
4324
+ async getSiteStructure(opts) {
4325
+ const maxPages = Math.min(opts?.maxPages ?? MAX_SITE_STRUCTURE_PAGES, MAX_SITE_STRUCTURE_PAGES);
4326
+ const allPages = [];
4327
+ let cursor;
4328
+ let truncated = false;
4329
+ do {
4330
+ const result = await this.listPages({
4331
+ pathPrefix: opts?.pathPrefix,
4332
+ scope: opts?.scope,
4333
+ cursor,
4334
+ limit: 200
4335
+ });
4336
+ allPages.push(...result.pages);
4337
+ cursor = result.nextCursor;
4338
+ if (allPages.length >= maxPages) {
4339
+ truncated = allPages.length > maxPages || !!cursor;
4340
+ allPages.length = maxPages;
4341
+ break;
4342
+ }
4343
+ } while (cursor);
4344
+ const root = buildTree(allPages, opts?.pathPrefix);
4345
+ return {
4346
+ root,
4347
+ totalPages: allPages.length,
4348
+ truncated
4349
+ };
4350
+ }
4351
+ async getRelatedPages(pathOrUrl, opts) {
4352
+ const resolvedScope = resolveScope(this.config, opts?.scope);
4353
+ const urlPath = this.resolveInputPath(pathOrUrl);
4354
+ const topK = Math.min(opts?.topK ?? 10, 25);
4355
+ const source = await this.store.fetchPageWithVector(urlPath, resolvedScope);
4356
+ if (!source) {
4357
+ throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
4358
+ }
4359
+ const sourceOutgoing = new Set(source.metadata.outgoingLinkUrls ?? []);
4360
+ const semanticHits = await this.store.searchPagesByVector(
4361
+ source.vector,
4362
+ { limit: 50 },
4363
+ resolvedScope
4364
+ );
4365
+ const filteredHits = semanticHits.filter((h) => h.url !== urlPath);
4366
+ const semanticScoreMap = /* @__PURE__ */ new Map();
4367
+ for (const hit of filteredHits) {
4368
+ semanticScoreMap.set(hit.url, hit.score);
4369
+ }
4370
+ const candidateUrls = /* @__PURE__ */ new Set();
4371
+ for (const hit of filteredHits) {
4372
+ candidateUrls.add(hit.url);
4373
+ }
4374
+ for (const url of sourceOutgoing) {
4375
+ if (url !== urlPath) candidateUrls.add(url);
4376
+ }
4377
+ const missingUrls = [...sourceOutgoing].filter(
4378
+ (u) => u !== urlPath && !semanticScoreMap.has(u)
4379
+ );
4380
+ const fetchedPages = missingUrls.length > 0 ? await this.store.fetchPagesBatch(missingUrls, resolvedScope) : [];
4381
+ const metaMap = /* @__PURE__ */ new Map();
4382
+ for (const hit of filteredHits) {
4383
+ metaMap.set(hit.url, { title: hit.title, routeFile: hit.routeFile, outgoingLinkUrls: [] });
4384
+ }
4385
+ for (const p of fetchedPages) {
4386
+ metaMap.set(p.url, { title: p.title, routeFile: p.routeFile, outgoingLinkUrls: p.outgoingLinkUrls });
4387
+ }
4388
+ const semanticUrls = filteredHits.map((h) => h.url);
4389
+ if (semanticUrls.length > 0) {
4390
+ const semanticPageData = await this.store.fetchPagesBatch(semanticUrls, resolvedScope);
4391
+ for (const p of semanticPageData) {
4392
+ const existing = metaMap.get(p.url);
4393
+ if (existing) {
4394
+ existing.outgoingLinkUrls = p.outgoingLinkUrls;
4395
+ }
4396
+ }
4397
+ }
4398
+ const candidates = [];
4399
+ for (const url of candidateUrls) {
4400
+ const meta = metaMap.get(url);
4401
+ if (!meta) continue;
4402
+ const isOutgoing = sourceOutgoing.has(url);
4403
+ const isIncoming = meta.outgoingLinkUrls.includes(urlPath);
4404
+ const isLinked = isOutgoing || isIncoming;
4405
+ const dice = diceScore(urlPath, url);
4406
+ const semantic = semanticScoreMap.get(url) ?? 0;
4407
+ const score = compositeScore(isLinked, dice, semantic);
4408
+ const relationshipType = dominantRelationshipType(isOutgoing, isIncoming, dice);
4409
+ candidates.push({
4410
+ url,
4411
+ title: meta.title,
4412
+ score: Number(score.toFixed(6)),
4413
+ relationshipType,
4414
+ routeFile: meta.routeFile
4415
+ });
4416
+ }
4417
+ candidates.sort((a, b) => b.score - a.score);
4418
+ const results = candidates.slice(0, topK);
4419
+ return {
4420
+ sourceUrl: urlPath,
4421
+ scope: resolvedScope.scopeName,
4422
+ relatedPages: results
4423
+ };
4424
+ }
2907
4425
  async health() {
2908
4426
  return this.store.health();
2909
4427
  }
@@ -2928,14 +4446,40 @@ function createServer(engine) {
2928
4446
  server.registerTool(
2929
4447
  "search",
2930
4448
  {
2931
- description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
4449
+ description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
2932
4450
  inputSchema: {
2933
4451
  query: z3.string().min(1),
2934
4452
  scope: z3.string().optional(),
2935
4453
  topK: z3.number().int().positive().max(100).optional(),
2936
4454
  pathPrefix: z3.string().optional(),
2937
4455
  tags: z3.array(z3.string()).optional(),
2938
- groupBy: z3.enum(["page", "chunk"]).optional()
4456
+ filters: z3.record(z3.string(), z3.union([z3.string(), z3.number(), z3.boolean()])).optional(),
4457
+ groupBy: z3.enum(["page", "chunk"]).optional(),
4458
+ maxSubResults: z3.number().int().positive().max(20).optional()
4459
+ },
4460
+ outputSchema: {
4461
+ q: z3.string(),
4462
+ scope: z3.string(),
4463
+ results: z3.array(z3.object({
4464
+ url: z3.string(),
4465
+ title: z3.string(),
4466
+ sectionTitle: z3.string().optional(),
4467
+ snippet: z3.string(),
4468
+ score: z3.number(),
4469
+ routeFile: z3.string(),
4470
+ chunks: z3.array(z3.object({
4471
+ sectionTitle: z3.string().optional(),
4472
+ snippet: z3.string(),
4473
+ headingPath: z3.array(z3.string()),
4474
+ score: z3.number()
4475
+ })).optional()
4476
+ })),
4477
+ meta: z3.object({
4478
+ timingsMs: z3.object({
4479
+ search: z3.number(),
4480
+ total: z3.number()
4481
+ })
4482
+ })
2939
4483
  }
2940
4484
  },
2941
4485
  async (input) => {
@@ -2945,7 +4489,9 @@ function createServer(engine) {
2945
4489
  scope: input.scope,
2946
4490
  pathPrefix: input.pathPrefix,
2947
4491
  tags: input.tags,
2948
- groupBy: input.groupBy
4492
+ filters: input.filters,
4493
+ groupBy: input.groupBy,
4494
+ maxSubResults: input.maxSubResults
2949
4495
  });
2950
4496
  return {
2951
4497
  content: [
@@ -2953,7 +4499,8 @@ function createServer(engine) {
2953
4499
  type: "text",
2954
4500
  text: JSON.stringify(result, null, 2)
2955
4501
  }
2956
- ]
4502
+ ],
4503
+ structuredContent: result
2957
4504
  };
2958
4505
  }
2959
4506
  );
@@ -2978,34 +4525,175 @@ function createServer(engine) {
2978
4525
  };
2979
4526
  }
2980
4527
  );
2981
- return server;
2982
- }
2983
- function redirectConsoleToStderr() {
2984
- const originalLog = console.log;
2985
- console.log = (...args) => {
2986
- process.stderr.write(`[LOG] ${args.map(String).join(" ")}
2987
- `);
2988
- };
2989
- console.warn = (...args) => {
2990
- process.stderr.write(`[WARN] ${args.map(String).join(" ")}
2991
- `);
2992
- };
2993
- void originalLog;
2994
- }
2995
- async function startHttpServer(serverFactory, config, opts) {
2996
- const app = createMcpExpressApp();
2997
- const port = opts.httpPort ?? config.mcp.http.port;
2998
- const endpointPath = opts.httpPath ?? config.mcp.http.path;
2999
- app.post(endpointPath, async (req, res) => {
3000
- const server = serverFactory();
3001
- const transport = new StreamableHTTPServerTransport({
3002
- sessionIdGenerator: void 0
3003
- });
3004
- try {
3005
- await server.connect(transport);
3006
- await transport.handleRequest(req, res, req.body);
3007
- res.on("close", () => {
3008
- transport.close();
4528
+ server.registerTool(
4529
+ "list_pages",
4530
+ {
4531
+ description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
4532
+ inputSchema: {
4533
+ pathPrefix: z3.string().optional(),
4534
+ cursor: z3.string().optional(),
4535
+ limit: z3.number().int().positive().max(200).optional(),
4536
+ scope: z3.string().optional()
4537
+ }
4538
+ },
4539
+ async (input) => {
4540
+ const result = await engine.listPages({
4541
+ pathPrefix: input.pathPrefix,
4542
+ cursor: input.cursor,
4543
+ limit: input.limit,
4544
+ scope: input.scope
4545
+ });
4546
+ return {
4547
+ content: [
4548
+ {
4549
+ type: "text",
4550
+ text: JSON.stringify(result, null, 2)
4551
+ }
4552
+ ]
4553
+ };
4554
+ }
4555
+ );
4556
+ server.registerTool(
4557
+ "get_site_structure",
4558
+ {
4559
+ description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
4560
+ inputSchema: {
4561
+ pathPrefix: z3.string().optional(),
4562
+ scope: z3.string().optional(),
4563
+ maxPages: z3.number().int().positive().max(2e3).optional()
4564
+ }
4565
+ },
4566
+ async (input) => {
4567
+ const result = await engine.getSiteStructure({
4568
+ pathPrefix: input.pathPrefix,
4569
+ scope: input.scope,
4570
+ maxPages: input.maxPages
4571
+ });
4572
+ return {
4573
+ content: [
4574
+ {
4575
+ type: "text",
4576
+ text: JSON.stringify(result, null, 2)
4577
+ }
4578
+ ]
4579
+ };
4580
+ }
4581
+ );
4582
+ server.registerTool(
4583
+ "find_source_file",
4584
+ {
4585
+ description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
4586
+ inputSchema: {
4587
+ query: z3.string().min(1),
4588
+ scope: z3.string().optional()
4589
+ }
4590
+ },
4591
+ async (input) => {
4592
+ const result = await engine.search({
4593
+ q: input.query,
4594
+ topK: 1,
4595
+ scope: input.scope
4596
+ });
4597
+ if (result.results.length === 0) {
4598
+ return {
4599
+ content: [
4600
+ {
4601
+ type: "text",
4602
+ text: JSON.stringify({
4603
+ error: "No matching content found for the given query."
4604
+ })
4605
+ }
4606
+ ]
4607
+ };
4608
+ }
4609
+ const match = result.results[0];
4610
+ const { url, routeFile, sectionTitle, snippet } = match;
4611
+ return {
4612
+ content: [
4613
+ {
4614
+ type: "text",
4615
+ text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
4616
+ }
4617
+ ]
4618
+ };
4619
+ }
4620
+ );
4621
+ server.registerTool(
4622
+ "get_related_pages",
4623
+ {
4624
+ description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
4625
+ inputSchema: {
4626
+ pathOrUrl: z3.string().min(1),
4627
+ scope: z3.string().optional(),
4628
+ topK: z3.number().int().positive().max(25).optional()
4629
+ }
4630
+ },
4631
+ async (input) => {
4632
+ const result = await engine.getRelatedPages(input.pathOrUrl, {
4633
+ topK: input.topK,
4634
+ scope: input.scope
4635
+ });
4636
+ return {
4637
+ content: [
4638
+ {
4639
+ type: "text",
4640
+ text: JSON.stringify(result, null, 2)
4641
+ }
4642
+ ]
4643
+ };
4644
+ }
4645
+ );
4646
+ return server;
4647
+ }
4648
+ function resolveApiKey(config) {
4649
+ return config.mcp.http.apiKey ?? (config.mcp.http.apiKeyEnv ? process.env[config.mcp.http.apiKeyEnv] : void 0);
4650
+ }
4651
+ function verifyApiKey(provided, expected) {
4652
+ const a = createHash2("sha256").update(provided).digest();
4653
+ const b = createHash2("sha256").update(expected).digest();
4654
+ return timingSafeEqual(a, b);
4655
+ }
4656
+ function redirectConsoleToStderr() {
4657
+ const originalLog = console.log;
4658
+ console.log = (...args) => {
4659
+ process.stderr.write(`[LOG] ${args.map(String).join(" ")}
4660
+ `);
4661
+ };
4662
+ console.warn = (...args) => {
4663
+ process.stderr.write(`[WARN] ${args.map(String).join(" ")}
4664
+ `);
4665
+ };
4666
+ void originalLog;
4667
+ }
4668
+ async function startHttpServer(serverFactory, config, opts) {
4669
+ const app = createMcpExpressApp();
4670
+ const port = opts.httpPort ?? config.mcp.http.port;
4671
+ const endpointPath = opts.httpPath ?? config.mcp.http.path;
4672
+ const isPublic = config.mcp.access === "public";
4673
+ const host = isPublic ? "0.0.0.0" : "127.0.0.1";
4674
+ const apiKey = isPublic ? resolveApiKey(config) : void 0;
4675
+ app.post(endpointPath, async (req, res) => {
4676
+ if (isPublic && apiKey) {
4677
+ const authHeader = req.headers["authorization"];
4678
+ const provided = (authHeader?.startsWith("Bearer ") ? authHeader.slice(7) : void 0) ?? req.headers["x-api-key"] ?? "";
4679
+ if (!provided || !verifyApiKey(provided, apiKey)) {
4680
+ res.status(401).json({
4681
+ jsonrpc: "2.0",
4682
+ error: { code: -32001, message: "Unauthorized" },
4683
+ id: null
4684
+ });
4685
+ return;
4686
+ }
4687
+ }
4688
+ const server = serverFactory();
4689
+ const transport = new StreamableHTTPServerTransport({
4690
+ sessionIdGenerator: void 0
4691
+ });
4692
+ try {
4693
+ await server.connect(transport);
4694
+ await transport.handleRequest(req, res, req.body);
4695
+ res.on("close", () => {
4696
+ transport.close();
3009
4697
  server.close();
3010
4698
  });
3011
4699
  } catch (error) {
@@ -3046,9 +4734,12 @@ async function startHttpServer(serverFactory, config, opts) {
3046
4734
  );
3047
4735
  });
3048
4736
  await new Promise((resolve, reject) => {
3049
- const instance = app.listen(port, "127.0.0.1", () => {
3050
- process.stderr.write(`SearchSocket MCP HTTP server listening on http://127.0.0.1:${port}${endpointPath}
4737
+ const instance = app.listen(port, host, () => {
4738
+ process.stderr.write(`SearchSocket MCP HTTP server listening on http://${host}:${port}${endpointPath}
3051
4739
  `);
4740
+ if (isPublic) {
4741
+ process.stderr.write("WARNING: Server is in public mode. Ensure HTTPS is configured via a reverse proxy for production use.\n");
4742
+ }
3052
4743
  resolve();
3053
4744
  });
3054
4745
  instance.once("error", reject);
@@ -3063,6 +4754,13 @@ async function runMcpServer(options = {}) {
3063
4754
  cwd: options.cwd,
3064
4755
  configPath: options.configPath
3065
4756
  });
4757
+ if (options.access) config.mcp.access = options.access;
4758
+ if (options.apiKey) config.mcp.http.apiKey = options.apiKey;
4759
+ if (config.mcp.access === "public" && !resolveApiKey(config)) {
4760
+ throw new Error(
4761
+ 'MCP access is "public" but no API key is configured. Pass --api-key or set mcp.http.apiKey / mcp.http.apiKeyEnv in config.'
4762
+ );
4763
+ }
3066
4764
  const resolvedTransport = options.transport ?? config.mcp.transport;
3067
4765
  if (resolvedTransport === "stdio") {
3068
4766
  redirectConsoleToStderr();
@@ -3081,6 +4779,837 @@ async function runMcpServer(options = {}) {
3081
4779
  await server.connect(stdioTransport);
3082
4780
  }
3083
4781
 
4782
+ // src/playground/server.ts
4783
+ import express from "express";
4784
+
4785
+ // src/playground/playground.html
4786
+ var playground_default = `<!DOCTYPE html>
4787
+ <html lang="en">
4788
+ <head>
4789
+ <meta charset="utf-8">
4790
+ <meta name="viewport" content="width=device-width, initial-scale=1">
4791
+ <title>SearchSocket Playground</title>
4792
+ <style>
4793
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
4794
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f8f9fa; color: #1a1a2e; padding: 2rem; max-width: 900px; margin: 0 auto; }
4795
+ h1 { font-size: 1.5rem; margin-bottom: 1.5rem; color: #16213e; }
4796
+ h1 span { font-weight: 400; color: #888; }
4797
+ .search-box { display: flex; gap: 0.5rem; margin-bottom: 0.5rem; }
4798
+ .search-box input { flex: 1; padding: 0.75rem 1rem; font-size: 1rem; border: 2px solid #ddd; border-radius: 8px; outline: none; transition: border-color 0.2s; }
4799
+ .search-box input:focus { border-color: #4361ee; }
4800
+ .search-opts { display: flex; gap: 1rem; margin-bottom: 1.5rem; font-size: 0.85rem; color: #555; align-items: center; }
4801
+ .search-opts label { display: flex; align-items: center; gap: 0.3rem; cursor: pointer; }
4802
+ .search-opts select { padding: 0.25rem 0.5rem; border: 1px solid #ccc; border-radius: 4px; font-size: 0.85rem; }
4803
+ .meta { font-size: 0.8rem; color: #888; margin-bottom: 1rem; }
4804
+ .result { background: #fff; border: 1px solid #e0e0e0; border-radius: 8px; padding: 1rem 1.25rem; margin-bottom: 0.75rem; }
4805
+ .result-header { display: flex; justify-content: space-between; align-items: flex-start; gap: 1rem; }
4806
+ .result-title { font-size: 1.05rem; font-weight: 600; color: #16213e; text-decoration: none; }
4807
+ .result-title:hover { color: #4361ee; }
4808
+ .result-score { font-size: 0.8rem; font-weight: 600; color: #4361ee; white-space: nowrap; background: #eef1ff; padding: 0.2rem 0.5rem; border-radius: 4px; }
4809
+ .result-url { font-size: 0.8rem; color: #888; margin-top: 0.2rem; }
4810
+ .result-snippet { font-size: 0.9rem; color: #444; margin-top: 0.5rem; line-height: 1.5; }
4811
+ .result-meta { display: flex; gap: 0.75rem; flex-wrap: wrap; margin-top: 0.5rem; font-size: 0.78rem; color: #777; }
4812
+ .result-meta span { background: #f0f0f0; padding: 0.15rem 0.5rem; border-radius: 4px; }
4813
+ .breakdown { margin-top: 0.5rem; padding: 0.5rem 0.75rem; background: #f8f9fa; border-radius: 6px; font-size: 0.8rem; }
4814
+ .breakdown-row { display: flex; justify-content: space-between; padding: 0.15rem 0; }
4815
+ .breakdown-label { color: #555; }
4816
+ .breakdown-value { font-family: monospace; color: #333; }
4817
+ .chunks-toggle { font-size: 0.8rem; color: #4361ee; cursor: pointer; margin-top: 0.5rem; border: none; background: none; padding: 0; text-decoration: underline; }
4818
+ .chunks { margin-top: 0.5rem; padding-left: 1rem; border-left: 3px solid #e0e0e0; }
4819
+ .chunk { margin-bottom: 0.5rem; font-size: 0.85rem; }
4820
+ .chunk-heading { font-size: 0.78rem; color: #4361ee; margin-bottom: 0.15rem; }
4821
+ .chunk-score { font-size: 0.75rem; color: #999; }
4822
+ .chunk-snippet { color: #555; line-height: 1.4; }
4823
+ .empty { text-align: center; padding: 3rem; color: #999; }
4824
+ .loading { text-align: center; padding: 2rem; color: #999; }
4825
+ .hidden { display: none; }
4826
+
4827
+ /* Ranking Tuner */
4828
+ .tuner { margin-bottom: 1.5rem; border: 1px solid #e0e0e0; border-radius: 8px; background: #fff; }
4829
+ .tuner > summary { padding: 0.75rem 1rem; font-weight: 600; font-size: 0.95rem; cursor: pointer; color: #16213e; user-select: none; }
4830
+ .tuner > summary:hover { color: #4361ee; }
4831
+ .tuner-body { padding: 0.5rem 1rem 1rem; }
4832
+ .tuner-actions { display: flex; gap: 0.5rem; margin-bottom: 0.75rem; }
4833
+ .tuner-actions button { padding: 0.35rem 0.75rem; font-size: 0.8rem; border: 1px solid #ccc; border-radius: 4px; background: #fff; cursor: pointer; color: #555; }
4834
+ .tuner-actions button:hover { border-color: #4361ee; color: #4361ee; }
4835
+ .tuner-group { margin-bottom: 0.5rem; border: 1px solid #eee; border-radius: 6px; }
4836
+ .tuner-group > summary { padding: 0.5rem 0.75rem; font-size: 0.85rem; font-weight: 600; cursor: pointer; color: #444; user-select: none; }
4837
+ .tuner-group[open] { margin-bottom: 0.75rem; }
4838
+ .tuner-group-body { padding: 0.25rem 0.75rem 0.5rem; }
4839
+ .tuner-row { display: grid; grid-template-columns: 140px 1fr 70px 24px; gap: 8px; align-items: center; margin-bottom: 0.35rem; }
4840
+ .tuner-row label { font-size: 0.8rem; color: #555; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
4841
+ .tuner-row label.modified { color: #4361ee; font-weight: 600; }
4842
+ .tuner-row input[type="range"] { width: 100%; height: 6px; cursor: pointer; }
4843
+ .tuner-row input[type="number"] { width: 70px; padding: 0.2rem 0.35rem; font-size: 0.8rem; border: 1px solid #ccc; border-radius: 4px; text-align: right; }
4844
+ .tuner-row input[type="checkbox"] { width: 16px; height: 16px; cursor: pointer; }
4845
+ .tuner-row .reset-btn { width: 20px; height: 20px; border: none; background: none; cursor: pointer; color: #999; font-size: 1rem; padding: 0; line-height: 1; visibility: hidden; }
4846
+ .tuner-row .reset-btn.visible { visibility: visible; }
4847
+ .tuner-row .reset-btn:hover { color: #4361ee; }
4848
+ .tuner-bool-row { display: grid; grid-template-columns: 140px 1fr 24px; gap: 8px; align-items: center; margin-bottom: 0.35rem; }
4849
+ .tuner-bool-row label { font-size: 0.8rem; color: #555; }
4850
+ .tuner-bool-row label.modified { color: #4361ee; font-weight: 600; }
4851
+ .tuner-bool-row .reset-btn { width: 20px; height: 20px; border: none; background: none; cursor: pointer; color: #999; font-size: 1rem; padding: 0; line-height: 1; visibility: hidden; }
4852
+ .tuner-bool-row .reset-btn.visible { visibility: visible; }
4853
+ .tuner-export { margin-top: 0.75rem; }
4854
+ .tuner-export textarea { width: 100%; height: 120px; font-family: monospace; font-size: 0.8rem; padding: 0.5rem; border: 1px solid #ccc; border-radius: 6px; background: #f8f9fa; resize: vertical; }
4855
+ </style>
4856
+ </head>
4857
+ <body>
4858
+ <h1>SearchSocket <span>Playground</span></h1>
4859
+ <div class="search-box">
4860
+ <input type="text" id="q" placeholder="Type a search query..." autocomplete="off" autofocus>
4861
+ </div>
4862
+ <div class="search-opts">
4863
+ <label>
4864
+ Group by:
4865
+ <select id="groupBy">
4866
+ <option value="page">Page</option>
4867
+ <option value="chunk">Chunk</option>
4868
+ </select>
4869
+ </label>
4870
+ <label>
4871
+ Top K:
4872
+ <select id="topK">
4873
+ <option value="5">5</option>
4874
+ <option value="10" selected>10</option>
4875
+ <option value="20">20</option>
4876
+ <option value="50">50</option>
4877
+ </select>
4878
+ </label>
4879
+ </div>
4880
+
4881
+ <details class="tuner" id="tunerPanel">
4882
+ <summary>Ranking Tuner</summary>
4883
+ <div class="tuner-body">
4884
+ <div class="tuner-actions">
4885
+ <button id="resetAll" type="button">Reset All</button>
4886
+ <button id="exportConfig" type="button">Export Config</button>
4887
+ </div>
4888
+ <div id="tunerGroups"></div>
4889
+ <div class="tuner-export hidden" id="exportArea">
4890
+ <textarea id="exportText" readonly></textarea>
4891
+ </div>
4892
+ </div>
4893
+ </details>
4894
+
4895
+ <div id="meta" class="meta"></div>
4896
+ <div id="results"></div>
4897
+
4898
+ <script>
4899
+ (function() {
4900
+ var qInput = document.getElementById('q');
4901
+ var groupBySelect = document.getElementById('groupBy');
4902
+ var topKSelect = document.getElementById('topK');
4903
+ var resultsDiv = document.getElementById('results');
4904
+ var metaDiv = document.getElementById('meta');
4905
+ var tunerGroupsDiv = document.getElementById('tunerGroups');
4906
+ var exportArea = document.getElementById('exportArea');
4907
+ var exportText = document.getElementById('exportText');
4908
+
4909
+ var debounceTimer = null;
4910
+ var requestId = 0;
4911
+ var baselineConfig = null;
4912
+ var tunerParams = [];
4913
+
4914
+ var PARAM_DEFS = [
4915
+ { group: 'Thresholds', key: 'ranking.minScoreRatio', label: 'minScoreRatio', min: 0, max: 1, step: 0.01 },
4916
+ { group: 'Thresholds', key: 'ranking.scoreGapThreshold', label: 'scoreGapThreshold', min: 0, max: 1, step: 0.01 },
4917
+ { group: 'Thresholds', key: 'ranking.minChunkScoreRatio', label: 'minChunkScoreRatio', min: 0, max: 1, step: 0.01 },
4918
+ { group: 'Boosts', key: 'ranking.enableIncomingLinkBoost', label: 'incomingLinkBoost', type: 'bool' },
4919
+ { group: 'Boosts', key: 'ranking.enableDepthBoost', label: 'depthBoost', type: 'bool' },
4920
+ { group: 'Weights', key: 'ranking.weights.incomingLinks', label: 'incomingLinks', min: 0, max: 1, step: 0.01 },
4921
+ { group: 'Weights', key: 'ranking.weights.depth', label: 'depth', min: 0, max: 1, step: 0.01 },
4922
+ { group: 'Weights', key: 'ranking.weights.aggregation', label: 'aggregation', min: 0, max: 1, step: 0.01 },
4923
+ { group: 'Weights', key: 'ranking.weights.titleMatch', label: 'titleMatch', min: 0, max: 1, step: 0.01 },
4924
+ { group: 'Aggregation', key: 'ranking.aggregationCap', label: 'aggregationCap', min: 1, max: 20, step: 1 },
4925
+ { group: 'Aggregation', key: 'ranking.aggregationDecay', label: 'aggregationDecay', min: 0, max: 1, step: 0.01 },
4926
+ { group: 'Search', key: 'search.pageSearchWeight', label: 'pageSearchWeight', min: 0, max: 1, step: 0.01 }
4927
+ ];
4928
+
4929
+ function getNestedValue(obj, path) {
4930
+ var parts = path.split('.');
4931
+ var v = obj;
4932
+ for (var i = 0; i < parts.length; i++) {
4933
+ if (v == null) return undefined;
4934
+ v = v[parts[i]];
4935
+ }
4936
+ return v;
4937
+ }
4938
+
4939
+ function setNestedValue(obj, path, value) {
4940
+ var parts = path.split('.');
4941
+ var cur = obj;
4942
+ for (var i = 0; i < parts.length - 1; i++) {
4943
+ if (!cur[parts[i]]) cur[parts[i]] = {};
4944
+ cur = cur[parts[i]];
4945
+ }
4946
+ cur[parts[parts.length - 1]] = value;
4947
+ }
4948
+
4949
+ function initTuner(config) {
4950
+ baselineConfig = config;
4951
+ var groups = {};
4952
+ PARAM_DEFS.forEach(function(def) {
4953
+ if (!groups[def.group]) groups[def.group] = [];
4954
+ groups[def.group].push(def);
4955
+ });
4956
+
4957
+ var html = '';
4958
+ Object.keys(groups).forEach(function(groupName) {
4959
+ html += '<details class="tuner-group" open>';
4960
+ html += '<summary>' + groupName + '</summary>';
4961
+ html += '<div class="tuner-group-body">';
4962
+ groups[groupName].forEach(function(def) {
4963
+ var val = getNestedValue(config, def.key);
4964
+ if (def.type === 'bool') {
4965
+ html += '<div class="tuner-bool-row" data-key="' + def.key + '">';
4966
+ html += '<label>' + def.label + '</label>';
4967
+ html += '<input type="checkbox"' + (val ? ' checked' : '') + ' data-param="' + def.key + '">';
4968
+ html += '<button class="reset-btn" title="Reset" data-reset="' + def.key + '">\\u21BA</button>';
4969
+ html += '</div>';
4970
+ } else {
4971
+ html += '<div class="tuner-row" data-key="' + def.key + '">';
4972
+ html += '<label>' + def.label + '</label>';
4973
+ html += '<input type="range" min="' + def.min + '" max="' + def.max + '" step="' + def.step + '" value="' + val + '" data-param="' + def.key + '">';
4974
+ html += '<input type="number" min="' + def.min + '" max="' + def.max + '" step="' + def.step + '" value="' + val + '" data-num="' + def.key + '">';
4975
+ html += '<button class="reset-btn" title="Reset" data-reset="' + def.key + '">\\u21BA</button>';
4976
+ html += '</div>';
4977
+ }
4978
+ });
4979
+ html += '</div></details>';
4980
+ });
4981
+ tunerGroupsDiv.innerHTML = html;
4982
+
4983
+ // Wire events
4984
+ tunerGroupsDiv.addEventListener('input', function(e) {
4985
+ var param = e.target.getAttribute('data-param');
4986
+ var num = e.target.getAttribute('data-num');
4987
+ if (param) {
4988
+ // Slider or checkbox changed \u2014 sync number input
4989
+ var row = e.target.closest('[data-key]');
4990
+ if (row && e.target.type === 'range') {
4991
+ var numInput = row.querySelector('[data-num]');
4992
+ if (numInput) numInput.value = e.target.value;
4993
+ }
4994
+ updateModifiedState(param);
4995
+ scheduleSearch();
4996
+ } else if (num) {
4997
+ // Number input changed \u2014 sync slider
4998
+ var row = e.target.closest('[data-key]');
4999
+ if (row) {
5000
+ var rangeInput = row.querySelector('[data-param]');
5001
+ if (rangeInput) rangeInput.value = e.target.value;
5002
+ }
5003
+ updateModifiedState(num);
5004
+ scheduleSearch();
5005
+ }
5006
+ });
5007
+
5008
+ tunerGroupsDiv.addEventListener('change', function(e) {
5009
+ var param = e.target.getAttribute('data-param');
5010
+ if (param && e.target.type === 'checkbox') {
5011
+ updateModifiedState(param);
5012
+ scheduleSearch();
5013
+ }
5014
+ });
5015
+
5016
+ tunerGroupsDiv.addEventListener('click', function(e) {
5017
+ var resetKey = e.target.getAttribute('data-reset');
5018
+ if (resetKey) {
5019
+ resetParam(resetKey);
5020
+ scheduleSearch();
5021
+ }
5022
+ });
5023
+ }
5024
+
5025
+ function updateModifiedState(key) {
5026
+ var baseline = getNestedValue(baselineConfig, key);
5027
+ var row = tunerGroupsDiv.querySelector('[data-key="' + key + '"]');
5028
+ if (!row) return;
5029
+ var input = row.querySelector('[data-param="' + key + '"]');
5030
+ if (!input) return;
5031
+ var current = input.type === 'checkbox' ? input.checked : parseFloat(input.value);
5032
+ var isModified = current !== baseline;
5033
+ var label = row.querySelector('label');
5034
+ var resetBtn = row.querySelector('.reset-btn');
5035
+ if (label) label.classList.toggle('modified', isModified);
5036
+ if (resetBtn) resetBtn.classList.toggle('visible', isModified);
5037
+ }
5038
+
5039
+ function resetParam(key) {
5040
+ var baseline = getNestedValue(baselineConfig, key);
5041
+ var row = tunerGroupsDiv.querySelector('[data-key="' + key + '"]');
5042
+ if (!row) return;
5043
+ var input = row.querySelector('[data-param="' + key + '"]');
5044
+ if (!input) return;
5045
+ if (input.type === 'checkbox') {
5046
+ input.checked = baseline;
5047
+ } else {
5048
+ input.value = baseline;
5049
+ var numInput = row.querySelector('[data-num]');
5050
+ if (numInput) numInput.value = baseline;
5051
+ }
5052
+ updateModifiedState(key);
5053
+ }
5054
+
5055
+ function resetAll() {
5056
+ PARAM_DEFS.forEach(function(def) {
5057
+ resetParam(def.key);
5058
+ });
5059
+ exportArea.classList.add('hidden');
5060
+ scheduleSearch();
5061
+ }
5062
+
5063
+ function collectOverrides() {
5064
+ var overrides = {};
5065
+ PARAM_DEFS.forEach(function(def) {
5066
+ var row = tunerGroupsDiv.querySelector('[data-key="' + def.key + '"]');
5067
+ if (!row) return;
5068
+ var input = row.querySelector('[data-param="' + def.key + '"]');
5069
+ if (!input) return;
5070
+ var val = def.type === 'bool' ? input.checked : parseFloat(input.value);
5071
+ setNestedValue(overrides, def.key, val);
5072
+ });
5073
+ return overrides;
5074
+ }
5075
+
5076
+ function collectChangedOverrides() {
5077
+ var overrides = {};
5078
+ var hasChanges = false;
5079
+ PARAM_DEFS.forEach(function(def) {
5080
+ var row = tunerGroupsDiv.querySelector('[data-key="' + def.key + '"]');
5081
+ if (!row) return;
5082
+ var input = row.querySelector('[data-param="' + def.key + '"]');
5083
+ if (!input) return;
5084
+ var current = def.type === 'bool' ? input.checked : parseFloat(input.value);
5085
+ var baseline = getNestedValue(baselineConfig, def.key);
5086
+ if (current !== baseline) {
5087
+ setNestedValue(overrides, def.key, current);
5088
+ hasChanges = true;
5089
+ }
5090
+ });
5091
+ return hasChanges ? overrides : null;
5092
+ }
5093
+
5094
+ function exportConfig() {
5095
+ var changed = collectChangedOverrides();
5096
+ if (!changed) {
5097
+ exportArea.classList.remove('hidden');
5098
+ exportText.value = '// No parameters have been changed from defaults.';
5099
+ return;
5100
+ }
5101
+
5102
+ var lines = [];
5103
+ if (changed.ranking) {
5104
+ lines.push('ranking: {');
5105
+ var r = changed.ranking;
5106
+ var simpleKeys = ['enableIncomingLinkBoost', 'enableDepthBoost', 'aggregationCap', 'aggregationDecay', 'minChunkScoreRatio', 'minScoreRatio', 'scoreGapThreshold'];
5107
+ simpleKeys.forEach(function(k) {
5108
+ if (r[k] !== undefined) lines.push(' ' + k + ': ' + JSON.stringify(r[k]) + ',');
5109
+ });
5110
+ if (r.weights) {
5111
+ lines.push(' weights: {');
5112
+ Object.keys(r.weights).forEach(function(wk) {
5113
+ lines.push(' ' + wk + ': ' + r.weights[wk] + ',');
5114
+ });
5115
+ lines.push(' },');
5116
+ }
5117
+ lines.push('},');
5118
+ }
5119
+ if (changed.search) {
5120
+ lines.push('search: {');
5121
+ Object.keys(changed.search).forEach(function(sk) {
5122
+ lines.push(' ' + sk + ': ' + changed.search[sk] + ',');
5123
+ });
5124
+ lines.push('},');
5125
+ }
5126
+
5127
+ exportArea.classList.remove('hidden');
5128
+ exportText.value = lines.join('\\n');
5129
+ }
5130
+
5131
+ // Read initial state from URL
5132
+ var params = new URLSearchParams(window.location.search);
5133
+ if (params.get('q')) qInput.value = params.get('q');
5134
+ if (params.get('groupBy')) groupBySelect.value = params.get('groupBy');
5135
+ if (params.get('topK')) topKSelect.value = params.get('topK');
5136
+
5137
+ function updateUrl() {
5138
+ var p = new URLSearchParams();
5139
+ if (qInput.value) p.set('q', qInput.value);
5140
+ if (groupBySelect.value !== 'page') p.set('groupBy', groupBySelect.value);
5141
+ if (topKSelect.value !== '10') p.set('topK', topKSelect.value);
5142
+ var qs = p.toString();
5143
+ history.replaceState(null, '', qs ? '?' + qs : window.location.pathname);
5144
+ }
5145
+
5146
+ function doSearch() {
5147
+ var query = qInput.value.trim();
5148
+ updateUrl();
5149
+ if (!query) {
5150
+ resultsDiv.innerHTML = '<div class="empty">Enter a query to search</div>';
5151
+ metaDiv.textContent = '';
5152
+ return;
5153
+ }
5154
+
5155
+ resultsDiv.innerHTML = '<div class="loading">Searching...</div>';
5156
+
5157
+ var thisRequestId = ++requestId;
5158
+ var body = {
5159
+ q: query,
5160
+ topK: parseInt(topKSelect.value, 10),
5161
+ groupBy: groupBySelect.value,
5162
+ debug: true
5163
+ };
5164
+
5165
+ if (baselineConfig) {
5166
+ body.rankingOverrides = collectOverrides();
5167
+ }
5168
+
5169
+ fetch('/_searchsocket/search', {
5170
+ method: 'POST',
5171
+ headers: { 'Content-Type': 'application/json' },
5172
+ body: JSON.stringify(body)
5173
+ }).then(function(res) {
5174
+ if (thisRequestId !== requestId) return;
5175
+ if (!res.ok) {
5176
+ return res.text().then(function(err) {
5177
+ resultsDiv.innerHTML = '<div class="empty">Error: ' + escapeHtml(err) + '</div>';
5178
+ });
5179
+ }
5180
+ return res.json().then(function(data) {
5181
+ if (thisRequestId !== requestId) return;
5182
+ renderResults(data);
5183
+ });
5184
+ }).catch(function(err) {
5185
+ if (thisRequestId !== requestId) return;
5186
+ resultsDiv.innerHTML = '<div class="empty">Network error: ' + escapeHtml(err.message) + '</div>';
5187
+ });
5188
+ }
5189
+
5190
+ function escapeHtml(str) {
5191
+ var d = document.createElement('div');
5192
+ d.textContent = str;
5193
+ return d.innerHTML;
5194
+ }
5195
+
5196
+ function renderResults(data) {
5197
+ metaDiv.textContent = data.results.length + ' results in ' + data.meta.timingsMs.total + 'ms (search: ' + data.meta.timingsMs.search + 'ms) \\u2014 scope: ' + data.scope;
5198
+
5199
+ if (data.results.length === 0) {
5200
+ resultsDiv.innerHTML = '<div class="empty">No results found</div>';
5201
+ return;
5202
+ }
5203
+
5204
+ resultsDiv.innerHTML = data.results.map(function(r, i) {
5205
+ var html = '<div class="result">';
5206
+ html += '<div class="result-header">';
5207
+ html += '<div><div class="result-title">' + escapeHtml(r.title) + '</div>';
5208
+ html += '<div class="result-url">' + escapeHtml(r.url) + '</div></div>';
5209
+ html += '<div class="result-score">' + r.score.toFixed(4) + '</div>';
5210
+ html += '</div>';
5211
+
5212
+ if (r.snippet) {
5213
+ html += '<div class="result-snippet">' + escapeHtml(r.snippet) + '</div>';
5214
+ }
5215
+
5216
+ html += '<div class="result-meta">';
5217
+ html += '<span>route: ' + escapeHtml(r.routeFile) + '</span>';
5218
+ if (r.sectionTitle) html += '<span>section: ' + escapeHtml(r.sectionTitle) + '</span>';
5219
+ html += '</div>';
5220
+
5221
+ if (r.breakdown) {
5222
+ html += '<div class="breakdown">';
5223
+ html += '<div class="breakdown-row"><span class="breakdown-label">Base score</span><span class="breakdown-value">' + r.breakdown.baseScore.toFixed(6) + '</span></div>';
5224
+ html += '<div class="breakdown-row"><span class="breakdown-label">Incoming link boost</span><span class="breakdown-value">' + r.breakdown.incomingLinkBoost.toFixed(6) + '</span></div>';
5225
+ html += '<div class="breakdown-row"><span class="breakdown-label">Depth boost</span><span class="breakdown-value">' + r.breakdown.depthBoost.toFixed(6) + '</span></div>';
5226
+ html += '<div class="breakdown-row"><span class="breakdown-label">Title match boost</span><span class="breakdown-value">' + r.breakdown.titleMatchBoost.toFixed(6) + '</span></div>';
5227
+ html += '<div class="breakdown-row"><span class="breakdown-label">Anchor text boost</span><span class="breakdown-value">' + (r.breakdown.anchorTextMatchBoost || 0).toFixed(6) + '</span></div>';
5228
+ html += '</div>';
5229
+ }
5230
+
5231
+ if (r.chunks && r.chunks.length > 0) {
5232
+ html += '<button class="chunks-toggle" data-idx="' + i + '">Show ' + r.chunks.length + ' chunks</button>';
5233
+ html += '<div class="chunks hidden" id="chunks-' + i + '">';
5234
+ r.chunks.forEach(function(c) {
5235
+ html += '<div class="chunk">';
5236
+ if (c.headingPath && c.headingPath.length > 0) {
5237
+ html += '<div class="chunk-heading">' + escapeHtml(c.headingPath.join(' > ')) + '</div>';
5238
+ }
5239
+ if (c.sectionTitle) {
5240
+ html += '<div class="chunk-heading">' + escapeHtml(c.sectionTitle) + '</div>';
5241
+ }
5242
+ html += '<div class="chunk-snippet">' + escapeHtml(c.snippet) + '</div>';
5243
+ html += '<div class="chunk-score">score: ' + c.score.toFixed(4) + '</div>';
5244
+ html += '</div>';
5245
+ });
5246
+ html += '</div>';
5247
+ }
5248
+
5249
+ html += '</div>';
5250
+ return html;
5251
+ }).join('');
5252
+ }
5253
+
5254
+ resultsDiv.addEventListener('click', function(e) {
5255
+ if (e.target.classList.contains('chunks-toggle')) {
5256
+ var idx = e.target.getAttribute('data-idx');
5257
+ var chunksDiv = document.getElementById('chunks-' + idx);
5258
+ if (chunksDiv) {
5259
+ chunksDiv.classList.toggle('hidden');
5260
+ e.target.textContent = chunksDiv.classList.contains('hidden')
5261
+ ? 'Show ' + chunksDiv.children.length + ' chunks'
5262
+ : 'Hide chunks';
5263
+ }
5264
+ }
5265
+ });
5266
+
5267
+ function scheduleSearch() {
5268
+ clearTimeout(debounceTimer);
5269
+ debounceTimer = setTimeout(doSearch, 300);
5270
+ }
5271
+
5272
+ qInput.addEventListener('input', scheduleSearch);
5273
+ groupBySelect.addEventListener('change', scheduleSearch);
5274
+ topKSelect.addEventListener('change', scheduleSearch);
5275
+
5276
+ document.getElementById('resetAll').addEventListener('click', resetAll);
5277
+ document.getElementById('exportConfig').addEventListener('click', exportConfig);
5278
+
5279
+ // Fetch config and initialize tuner
5280
+ fetch('/_searchsocket/config').then(function(res) {
5281
+ if (res.ok) return res.json();
5282
+ return null;
5283
+ }).then(function(config) {
5284
+ if (config) initTuner(config);
5285
+ }).catch(function() {
5286
+ // Config endpoint not available \u2014 tuner stays empty
5287
+ });
5288
+
5289
+ // Trigger initial search if query is present
5290
+ if (qInput.value.trim()) doSearch();
5291
+ })();
5292
+ </script>
5293
+ </body>
5294
+ </html>
5295
+ `;
5296
+
5297
+ // src/playground/server.ts
5298
+ async function runPlaygroundServer(options) {
5299
+ const config = options.config ?? await loadConfig({
5300
+ cwd: options.cwd,
5301
+ configPath: options.configPath
5302
+ });
5303
+ let enginePromise = null;
5304
+ function getEngine() {
5305
+ if (!enginePromise) {
5306
+ enginePromise = SearchEngine.create({
5307
+ cwd: options.cwd,
5308
+ configPath: options.configPath,
5309
+ config
5310
+ });
5311
+ }
5312
+ return enginePromise;
5313
+ }
5314
+ const app = express();
5315
+ app.use(express.json());
5316
+ app.get("/_searchsocket", (_req, res) => {
5317
+ res.type("html").send(playground_default);
5318
+ });
5319
+ app.get("/_searchsocket/config", (_req, res) => {
5320
+ res.json({
5321
+ ranking: {
5322
+ enableIncomingLinkBoost: config.ranking.enableIncomingLinkBoost,
5323
+ enableDepthBoost: config.ranking.enableDepthBoost,
5324
+ aggregationCap: config.ranking.aggregationCap,
5325
+ aggregationDecay: config.ranking.aggregationDecay,
5326
+ minChunkScoreRatio: config.ranking.minChunkScoreRatio,
5327
+ minScoreRatio: config.ranking.minScoreRatio,
5328
+ scoreGapThreshold: config.ranking.scoreGapThreshold,
5329
+ weights: { ...config.ranking.weights }
5330
+ },
5331
+ search: {
5332
+ pageSearchWeight: config.search.pageSearchWeight
5333
+ }
5334
+ });
5335
+ });
5336
+ app.post("/_searchsocket/search", async (req, res) => {
5337
+ try {
5338
+ const searchEngine = await getEngine();
5339
+ const body = req.body;
5340
+ if (!body || typeof body.q !== "string" || body.q.trim().length === 0) {
5341
+ res.status(400).json({ error: "Missing or empty 'q' field" });
5342
+ return;
5343
+ }
5344
+ const result = await searchEngine.search({
5345
+ q: body.q,
5346
+ topK: typeof body.topK === "number" ? body.topK : void 0,
5347
+ scope: typeof body.scope === "string" ? body.scope : void 0,
5348
+ pathPrefix: typeof body.pathPrefix === "string" ? body.pathPrefix : void 0,
5349
+ tags: Array.isArray(body.tags) ? body.tags : void 0,
5350
+ groupBy: body.groupBy === "page" || body.groupBy === "chunk" ? body.groupBy : void 0,
5351
+ debug: body.debug === true,
5352
+ rankingOverrides: body.rankingOverrides && typeof body.rankingOverrides === "object" ? body.rankingOverrides : void 0
5353
+ });
5354
+ res.json(result);
5355
+ } catch (error) {
5356
+ const message = error instanceof Error ? error.message : "Internal server error";
5357
+ const status = error.statusCode ?? 500;
5358
+ res.status(status).json({ error: message });
5359
+ }
5360
+ });
5361
+ const preferredPort = options.port ?? 3337;
5362
+ function startServer(port) {
5363
+ return new Promise((resolve, reject) => {
5364
+ let httpServer;
5365
+ const onListening = () => {
5366
+ const addr = httpServer.address();
5367
+ resolve({
5368
+ port: addr.port,
5369
+ close: () => new Promise((r) => httpServer.close(() => r()))
5370
+ });
5371
+ };
5372
+ httpServer = app.listen(port, "127.0.0.1", onListening);
5373
+ httpServer.once("error", (err) => {
5374
+ if (err.code === "EADDRINUSE" && port !== 0) {
5375
+ startServer(0).then(resolve, reject);
5376
+ } else {
5377
+ reject(err);
5378
+ }
5379
+ });
5380
+ });
5381
+ }
5382
+ return startServer(preferredPort);
5383
+ }
5384
+
5385
+ // src/search/quality-metrics.ts
5386
+ function reciprocalRank(results, relevant) {
5387
+ const set = new Set(relevant);
5388
+ for (let i = 0; i < results.length; i++) {
5389
+ if (set.has(results[i].url)) {
5390
+ return 1 / (i + 1);
5391
+ }
5392
+ }
5393
+ return 0;
5394
+ }
5395
+ function mrr(queries) {
5396
+ if (queries.length === 0) return 0;
5397
+ const sum = queries.reduce((acc, q) => acc + reciprocalRank(q.results, q.relevant), 0);
5398
+ return sum / queries.length;
5399
+ }
5400
+
5401
+ // src/cli/test-schemas.ts
5402
+ import { z as z4 } from "zod";
5403
+ var testCaseSchema = z4.object({
5404
+ query: z4.string().min(1),
5405
+ expect: z4.object({
5406
+ topResult: z4.string().optional(),
5407
+ inTop5: z4.array(z4.string()).min(1).optional(),
5408
+ maxResults: z4.number().int().nonnegative().optional()
5409
+ }).refine(
5410
+ (e) => e.topResult !== void 0 || e.inTop5 !== void 0 || e.maxResults !== void 0,
5411
+ { message: "expect must contain at least one of topResult, inTop5, or maxResults" }
5412
+ )
5413
+ });
5414
+ var testFileSchema = z4.array(testCaseSchema).min(1, "test file must contain at least one test case");
5415
+
5416
+ // src/cli.ts
5417
+ import * as clack from "@clack/prompts";
5418
+
5419
+ // src/init-helpers.ts
5420
+ import fs9 from "fs";
5421
+ import path13 from "path";
5422
+ import { parseModule, generateCode, builders } from "magicast";
5423
+ function ensureMcpJson(cwd) {
5424
+ const mcpPath = path13.join(cwd, ".mcp.json");
5425
+ const entry = {
5426
+ command: "npx",
5427
+ args: ["searchsocket", "mcp"],
5428
+ env: {
5429
+ UPSTASH_VECTOR_REST_URL: "${UPSTASH_VECTOR_REST_URL}",
5430
+ UPSTASH_VECTOR_REST_TOKEN: "${UPSTASH_VECTOR_REST_TOKEN}"
5431
+ }
5432
+ };
5433
+ let existing = {};
5434
+ if (fs9.existsSync(mcpPath)) {
5435
+ try {
5436
+ const raw = fs9.readFileSync(mcpPath, "utf8");
5437
+ existing = JSON.parse(raw);
5438
+ } catch {
5439
+ process.stderr.write("warning: .mcp.json exists but could not be parsed \u2014 skipping\n");
5440
+ return;
5441
+ }
5442
+ }
5443
+ const raw_servers = existing.mcpServers ?? {};
5444
+ const servers = typeof raw_servers === "object" && !Array.isArray(raw_servers) ? raw_servers : {};
5445
+ if (JSON.stringify(servers["searchsocket"]) === JSON.stringify(entry)) {
5446
+ return;
5447
+ }
5448
+ existing.mcpServers = { ...servers, searchsocket: entry };
5449
+ fs9.writeFileSync(mcpPath, JSON.stringify(existing, null, 2) + "\n", "utf8");
5450
+ }
5451
+ var HOOKS_SNIPPET = `import { searchsocketHandle } from "searchsocket/sveltekit";
5452
+
5453
+ export const handle = searchsocketHandle();`;
5454
+ var VITE_PLUGIN_SNIPPET = `import { searchsocketVitePlugin } from "searchsocket/sveltekit";
5455
+
5456
+ // Add to your Vite config plugins array:
5457
+ // plugins: [sveltekit(), searchsocketVitePlugin()]`;
5458
+ function injectHooksServerTs(cwd) {
5459
+ const hooksDir = path13.join(cwd, "src");
5460
+ const tsPath = path13.join(hooksDir, "hooks.server.ts");
5461
+ const jsPath = path13.join(hooksDir, "hooks.server.js");
5462
+ const hooksPath = fs9.existsSync(tsPath) ? tsPath : fs9.existsSync(jsPath) ? jsPath : null;
5463
+ if (!hooksPath) {
5464
+ fs9.mkdirSync(hooksDir, { recursive: true });
5465
+ fs9.writeFileSync(tsPath, HOOKS_SNIPPET + "\n", "utf8");
5466
+ return "created";
5467
+ }
5468
+ const original = fs9.readFileSync(hooksPath, "utf8");
5469
+ if (original.includes("searchsocketHandle")) {
5470
+ return "already-present";
5471
+ }
5472
+ try {
5473
+ const mod = parseModule(original);
5474
+ mod.imports.$append({
5475
+ from: "searchsocket/sveltekit",
5476
+ imported: "searchsocketHandle"
5477
+ });
5478
+ if (mod.exports.handle) {
5479
+ mod.imports.$append({
5480
+ from: "@sveltejs/kit/hooks",
5481
+ imported: "sequence"
5482
+ });
5483
+ const existingHandle = mod.exports.handle;
5484
+ mod.exports.handle = builders.functionCall(
5485
+ "sequence",
5486
+ builders.functionCall("searchsocketHandle"),
5487
+ existingHandle
5488
+ );
5489
+ const { code: code2 } = generateCode(mod);
5490
+ fs9.writeFileSync(hooksPath, code2, "utf8");
5491
+ return "composed";
5492
+ }
5493
+ mod.exports.handle = builders.functionCall("searchsocketHandle");
5494
+ const { code } = generateCode(mod);
5495
+ fs9.writeFileSync(hooksPath, code, "utf8");
5496
+ return "injected";
5497
+ } catch {
5498
+ return "fallback";
5499
+ }
5500
+ }
5501
+ function injectViteConfig(cwd) {
5502
+ const tsPath = path13.join(cwd, "vite.config.ts");
5503
+ const jsPath = path13.join(cwd, "vite.config.js");
5504
+ const configPath = fs9.existsSync(tsPath) ? tsPath : fs9.existsSync(jsPath) ? jsPath : null;
5505
+ if (!configPath) {
5506
+ return "no-config";
5507
+ }
5508
+ const original = fs9.readFileSync(configPath, "utf8");
5509
+ if (original.includes("searchsocketVitePlugin")) {
5510
+ return "already-present";
5511
+ }
5512
+ try {
5513
+ const mod = parseModule(original);
5514
+ mod.imports.$append({
5515
+ from: "searchsocket/sveltekit",
5516
+ imported: "searchsocketVitePlugin"
5517
+ });
5518
+ let config = mod.exports.default;
5519
+ if (!config) {
5520
+ return "fallback";
5521
+ }
5522
+ if (config.$type === "function-call") {
5523
+ config = config.$args[0];
5524
+ }
5525
+ if (!config.plugins) {
5526
+ config.plugins = [builders.functionCall("searchsocketVitePlugin")];
5527
+ } else {
5528
+ config.plugins.push(builders.functionCall("searchsocketVitePlugin"));
5529
+ }
5530
+ const { code } = generateCode(mod);
5531
+ fs9.writeFileSync(configPath, code, "utf8");
5532
+ return "injected";
5533
+ } catch {
5534
+ return "fallback";
5535
+ }
5536
+ }
5537
+ function writeEnvFile(cwd, url, token) {
5538
+ const envPath = path13.join(cwd, ".env");
5539
+ let content = "";
5540
+ if (fs9.existsSync(envPath)) {
5541
+ content = fs9.readFileSync(envPath, "utf8");
5542
+ }
5543
+ const lines = [];
5544
+ if (!content.includes("UPSTASH_VECTOR_REST_URL=")) {
5545
+ lines.push(`UPSTASH_VECTOR_REST_URL=${url}`);
5546
+ }
5547
+ if (!content.includes("UPSTASH_VECTOR_REST_TOKEN=")) {
5548
+ lines.push(`UPSTASH_VECTOR_REST_TOKEN=${token}`);
5549
+ }
5550
+ if (lines.length > 0) {
5551
+ const suffix = content.length > 0 && !content.endsWith("\n") ? "\n" : "";
5552
+ fs9.writeFileSync(envPath, content + suffix + lines.join("\n") + "\n", "utf8");
5553
+ }
5554
+ ensureGitignoreEntry(cwd, ".env");
5555
+ }
5556
+ function ensureGitignoreEntry(cwd, entry) {
5557
+ const gitignorePath = path13.join(cwd, ".gitignore");
5558
+ let content = "";
5559
+ if (fs9.existsSync(gitignorePath)) {
5560
+ content = fs9.readFileSync(gitignorePath, "utf8");
5561
+ }
5562
+ const lines = content.split("\n");
5563
+ if (lines.some((line) => line.trim() === entry)) {
5564
+ return;
5565
+ }
5566
+ const suffix = content.length > 0 && !content.endsWith("\n") ? "\n" : "";
5567
+ fs9.writeFileSync(gitignorePath, content + suffix + entry + "\n", "utf8");
5568
+ }
5569
+
5570
+ // src/add-helpers.ts
5571
+ import fs10 from "fs";
5572
+ import fsp from "fs/promises";
5573
+ import path14 from "path";
5574
+ import { fileURLToPath } from "url";
5575
+ var __dirname = path14.dirname(fileURLToPath(import.meta.url));
5576
+ var AVAILABLE_COMPONENTS = ["search-dialog", "search-input", "search-results"];
5577
+ function resolveTemplateDir() {
5578
+ return path14.resolve(__dirname, "templates");
5579
+ }
5580
+ function listAvailableComponents() {
5581
+ return [...AVAILABLE_COMPONENTS];
5582
+ }
5583
+ function isValidComponent(name) {
5584
+ return AVAILABLE_COMPONENTS.includes(name);
5585
+ }
5586
+ async function copyComponent(name, targetDir, options = {}) {
5587
+ const templateDir = path14.join(resolveTemplateDir(), name);
5588
+ if (!fs10.existsSync(templateDir)) {
5589
+ throw new Error(
5590
+ `Template directory not found: ${templateDir}. Run "pnpm run build" to generate templates.`
5591
+ );
5592
+ }
5593
+ const entries = await fsp.readdir(templateDir);
5594
+ const svelteFiles = entries.filter((f) => f.endsWith(".svelte"));
5595
+ if (svelteFiles.length === 0) {
5596
+ throw new Error(`No .svelte files found in template: ${name}`);
5597
+ }
5598
+ await fsp.mkdir(targetDir, { recursive: true });
5599
+ const written = [];
5600
+ const skipped = [];
5601
+ for (const file of svelteFiles) {
5602
+ const dest = path14.join(targetDir, file);
5603
+ if (fs10.existsSync(dest) && !options.overwrite) {
5604
+ skipped.push(dest);
5605
+ continue;
5606
+ }
5607
+ await fsp.copyFile(path14.join(templateDir, file), dest);
5608
+ written.push(dest);
5609
+ }
5610
+ return { written, skipped };
5611
+ }
5612
+
3084
5613
  // src/cli.ts
3085
5614
  function parsePositiveInt(value, flag) {
3086
5615
  const parsed = Number(value);
@@ -3117,6 +5646,10 @@ function parseDurationMs(value) {
3117
5646
  }
3118
5647
  function printIndexSummary(stats) {
3119
5648
  process.stdout.write(`pages processed: ${stats.pagesProcessed}
5649
+ `);
5650
+ process.stdout.write(`pages changed: ${stats.pagesChanged}
5651
+ `);
5652
+ process.stdout.write(`pages deleted: ${stats.pagesDeleted}
3120
5653
  `);
3121
5654
  process.stdout.write(`chunks total: ${stats.chunksTotal}
3122
5655
  `);
@@ -3138,7 +5671,7 @@ function collectWatchPaths(config, cwd) {
3138
5671
  const paths = ["src/routes/**"];
3139
5672
  if (config.source.mode === "content-files" && config.source.contentFiles) {
3140
5673
  for (const pattern of config.source.contentFiles.globs) {
3141
- paths.push(path12.join(config.source.contentFiles.baseDir, pattern));
5674
+ paths.push(path15.join(config.source.contentFiles.baseDir, pattern));
3142
5675
  }
3143
5676
  }
3144
5677
  if (config.source.mode === "static-output") {
@@ -3151,22 +5684,22 @@ function collectWatchPaths(config, cwd) {
3151
5684
  paths.push("searchsocket.config.ts");
3152
5685
  paths.push(config.source.build.outputDir);
3153
5686
  }
3154
- return paths.map((value) => path12.resolve(cwd, value));
5687
+ return paths.map((value) => path15.resolve(cwd, value));
3155
5688
  }
3156
5689
  function ensureStateDir(cwd) {
3157
- const target = path12.join(cwd, ".searchsocket");
3158
- fs8.mkdirSync(target, { recursive: true });
5690
+ const target = path15.join(cwd, ".searchsocket");
5691
+ fs11.mkdirSync(target, { recursive: true });
3159
5692
  return target;
3160
5693
  }
3161
5694
  function ensureGitignore(cwd) {
3162
- const gitignorePath = path12.join(cwd, ".gitignore");
5695
+ const gitignorePath = path15.join(cwd, ".gitignore");
3163
5696
  const entries = [
3164
5697
  ".searchsocket/manifest.json",
3165
5698
  ".searchsocket/registry.json"
3166
5699
  ];
3167
5700
  let content = "";
3168
- if (fs8.existsSync(gitignorePath)) {
3169
- content = fs8.readFileSync(gitignorePath, "utf8");
5701
+ if (fs11.existsSync(gitignorePath)) {
5702
+ content = fs11.readFileSync(gitignorePath, "utf8");
3170
5703
  }
3171
5704
  const lines = content.split("\n");
3172
5705
  const missing = entries.filter((entry) => !lines.some((line) => line.trim() === entry));
@@ -3177,10 +5710,10 @@ function ensureGitignore(cwd) {
3177
5710
  # SearchSocket local state
3178
5711
  ${missing.join("\n")}
3179
5712
  `;
3180
- fs8.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
5713
+ fs11.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
3181
5714
  }
3182
5715
  function readScopesFromFile(filePath) {
3183
- const raw = fs8.readFileSync(filePath, "utf8");
5716
+ const raw = fs11.readFileSync(filePath, "utf8");
3184
5717
  return new Set(
3185
5718
  raw.split(/\r?\n/).map((line) => line.trim()).filter(Boolean)
3186
5719
  );
@@ -3204,8 +5737,8 @@ function readRemoteGitBranches(cwd) {
3204
5737
  }
3205
5738
  }
3206
5739
  async function loadResolvedConfigForDev(cwd, configPath) {
3207
- const resolvedConfigPath = path12.resolve(cwd, configPath ?? "searchsocket.config.ts");
3208
- if (fs8.existsSync(resolvedConfigPath)) {
5740
+ const resolvedConfigPath = path15.resolve(cwd, configPath ?? "searchsocket.config.ts");
5741
+ if (fs11.existsSync(resolvedConfigPath)) {
3209
5742
  return loadConfig({ cwd, configPath });
3210
5743
  }
3211
5744
  return mergeConfig(cwd, {});
@@ -3248,31 +5781,157 @@ async function runIndexCommand(opts) {
3248
5781
  printIndexSummary(stats);
3249
5782
  }
3250
5783
  }
3251
- var program = new Command();
3252
- program.name("searchsocket").description("Semantic site search and MCP retrieval for SvelteKit").version(package_default.version).option("-C, --cwd <path>", "working directory", process.cwd()).option("--config <path>", "config path (defaults to searchsocket.config.ts)");
3253
- program.command("init").description("Create searchsocket.config.ts and .searchsocket state directory").action(async (_opts, command) => {
3254
- const root = getRootOptions(command).cwd ?? process.cwd();
3255
- const cwd = path12.resolve(root);
5784
+ async function runInteractiveInit(cwd) {
5785
+ clack.intro("searchsocket setup");
5786
+ const s = clack.spinner();
5787
+ s.start("Creating config files");
5788
+ const configPath = writeMinimalConfig(cwd);
5789
+ ensureStateDir(cwd);
5790
+ ensureGitignore(cwd);
5791
+ ensureMcpJson(cwd);
5792
+ s.stop("Config files created");
5793
+ const hasUrl = Boolean(process.env.UPSTASH_VECTOR_REST_URL);
5794
+ const hasToken = Boolean(process.env.UPSTASH_VECTOR_REST_TOKEN);
5795
+ if (!hasUrl || !hasToken) {
5796
+ clack.log.warn("Upstash Search credentials not found in environment.");
5797
+ const shouldConfigure = await clack.confirm({
5798
+ message: "Would you like to configure Upstash credentials now?",
5799
+ initialValue: true
5800
+ });
5801
+ if (clack.isCancel(shouldConfigure)) {
5802
+ clack.cancel("Setup cancelled.");
5803
+ process.exit(0);
5804
+ }
5805
+ if (shouldConfigure) {
5806
+ const url = hasUrl ? process.env.UPSTASH_VECTOR_REST_URL : await clack.text({
5807
+ message: "Upstash Search REST URL:",
5808
+ placeholder: "https://your-index.upstash.io",
5809
+ validate: (v) => !v ? "URL is required" : void 0
5810
+ });
5811
+ if (clack.isCancel(url)) {
5812
+ clack.cancel("Setup cancelled.");
5813
+ process.exit(0);
5814
+ }
5815
+ const token = hasToken ? process.env.UPSTASH_VECTOR_REST_TOKEN : await clack.text({
5816
+ message: "Upstash Search REST Token:",
5817
+ placeholder: "AX...",
5818
+ validate: (v) => !v ? "Token is required" : void 0
5819
+ });
5820
+ if (clack.isCancel(token)) {
5821
+ clack.cancel("Setup cancelled.");
5822
+ process.exit(0);
5823
+ }
5824
+ writeEnvFile(cwd, url, token);
5825
+ clack.log.success("Credentials written to .env");
5826
+ }
5827
+ } else {
5828
+ clack.log.success("Upstash credentials found in environment.");
5829
+ }
5830
+ s.start("Configuring hooks.server.ts");
5831
+ const hookResult = injectHooksServerTs(cwd);
5832
+ s.stop("hooks.server.ts configured");
5833
+ switch (hookResult) {
5834
+ case "created":
5835
+ clack.log.success("Created src/hooks.server.ts with searchsocketHandle.");
5836
+ break;
5837
+ case "injected":
5838
+ clack.log.success("Added searchsocketHandle to src/hooks.server.ts.");
5839
+ break;
5840
+ case "composed":
5841
+ clack.log.success("Composed searchsocketHandle with existing handle using sequence().");
5842
+ break;
5843
+ case "already-present":
5844
+ clack.log.info("searchsocketHandle already configured in hooks.server.ts.");
5845
+ break;
5846
+ case "fallback":
5847
+ clack.log.warn("Could not auto-inject hooks.server.ts. Add manually:");
5848
+ clack.log.message(HOOKS_SNIPPET);
5849
+ break;
5850
+ }
5851
+ s.start("Configuring Vite plugin");
5852
+ const viteResult = injectViteConfig(cwd);
5853
+ s.stop("Vite plugin configured");
5854
+ switch (viteResult) {
5855
+ case "injected":
5856
+ clack.log.success("Added searchsocketVitePlugin to Vite config.");
5857
+ break;
5858
+ case "already-present":
5859
+ clack.log.info("searchsocketVitePlugin already in Vite config.");
5860
+ break;
5861
+ case "no-config":
5862
+ clack.log.warn("No vite.config.ts/js found. Add the plugin manually:");
5863
+ clack.log.message(VITE_PLUGIN_SNIPPET);
5864
+ break;
5865
+ case "fallback":
5866
+ clack.log.warn("Could not auto-inject Vite config. Add manually:");
5867
+ clack.log.message(VITE_PLUGIN_SNIPPET);
5868
+ break;
5869
+ }
5870
+ clack.log.info("Run `searchsocket doctor` to verify your setup.");
5871
+ clack.outro("SearchSocket initialized! Run `searchsocket index` to index your site.");
5872
+ }
5873
+ async function runSilentInit(cwd) {
3256
5874
  const configPath = writeMinimalConfig(cwd);
3257
5875
  const stateDir = ensureStateDir(cwd);
3258
5876
  ensureGitignore(cwd);
5877
+ ensureMcpJson(cwd);
3259
5878
  process.stdout.write(`created/verified config: ${configPath}
3260
5879
  `);
3261
5880
  process.stdout.write(`created/verified state dir: ${stateDir}
3262
-
3263
5881
  `);
3264
- process.stdout.write("SvelteKit hook snippet:\n\n");
3265
- process.stdout.write('import { searchsocketHandle } from "searchsocket/sveltekit";\n\n');
3266
- process.stdout.write("export const handle = searchsocketHandle();\n\n");
3267
- process.stdout.write("Optional build-triggered indexing plugin:\n\n");
3268
- process.stdout.write('import { searchsocketVitePlugin } from "searchsocket/sveltekit";\n\n');
3269
- process.stdout.write("// svelte.config.js / vite plugins:\n");
3270
- process.stdout.write("// searchsocketVitePlugin({ enabled: true, changedOnly: true })\n");
3271
- process.stdout.write("// or env-driven: SEARCHSOCKET_AUTO_INDEX=1 pnpm build\n");
5882
+ process.stdout.write("created/verified .mcp.json (MCP server config for Claude Code)\n\n");
5883
+ const hookResult = injectHooksServerTs(cwd);
5884
+ switch (hookResult) {
5885
+ case "created":
5886
+ process.stdout.write("created src/hooks.server.ts with searchsocketHandle\n");
5887
+ break;
5888
+ case "injected":
5889
+ process.stdout.write("added searchsocketHandle to src/hooks.server.ts\n");
5890
+ break;
5891
+ case "composed":
5892
+ process.stdout.write("composed searchsocketHandle with existing handle via sequence()\n");
5893
+ break;
5894
+ case "already-present":
5895
+ process.stdout.write("searchsocketHandle already present in hooks.server.ts\n");
5896
+ break;
5897
+ case "fallback":
5898
+ process.stdout.write("could not auto-inject hooks.server.ts \u2014 add manually:\n\n");
5899
+ process.stdout.write(HOOKS_SNIPPET + "\n\n");
5900
+ break;
5901
+ }
5902
+ const viteResult = injectViteConfig(cwd);
5903
+ switch (viteResult) {
5904
+ case "injected":
5905
+ process.stdout.write("added searchsocketVitePlugin to Vite config\n");
5906
+ break;
5907
+ case "already-present":
5908
+ process.stdout.write("searchsocketVitePlugin already in Vite config\n");
5909
+ break;
5910
+ case "no-config":
5911
+ process.stdout.write("no vite.config.ts/js found \u2014 add plugin manually:\n\n");
5912
+ process.stdout.write(VITE_PLUGIN_SNIPPET + "\n\n");
5913
+ break;
5914
+ case "fallback":
5915
+ process.stdout.write("could not auto-inject Vite config \u2014 add manually:\n\n");
5916
+ process.stdout.write(VITE_PLUGIN_SNIPPET + "\n\n");
5917
+ break;
5918
+ }
5919
+ }
5920
+ var program = new Command();
5921
+ program.name("searchsocket").description("Semantic site search and MCP retrieval for SvelteKit").version(package_default.version).option("-C, --cwd <path>", "working directory", process.cwd()).option("--config <path>", "config path (defaults to searchsocket.config.ts)");
5922
+ program.command("init").description("Initialize SearchSocket in a SvelteKit project").option("--non-interactive", "skip interactive prompts").action(async (opts, command) => {
5923
+ const root = getRootOptions(command).cwd ?? process.cwd();
5924
+ const cwd = path15.resolve(root);
5925
+ const isInteractive = Boolean(process.stdout.isTTY) && !opts.nonInteractive;
5926
+ if (isInteractive) {
5927
+ await runInteractiveInit(cwd);
5928
+ } else {
5929
+ await runSilentInit(cwd);
5930
+ }
3272
5931
  });
3273
5932
  program.command("index").description("Index site content into Upstash Search").option("--scope <name>", "scope override").option("--changed-only", "only process changed chunks", true).option("--no-changed-only", "re-index regardless of previous manifest").option("--force", "force full rebuild", false).option("--dry-run", "compute plan, no writes", false).option("--source <mode>", "source mode override: static-output|crawl|content-files|build").option("--max-pages <n>", "limit pages processed").option("--max-chunks <n>", "limit chunks processed").option("--quiet", "suppress all output except errors and warnings", false).option("--verbose", "verbose output", false).option("--json", "emit JSON logs and summary", false).action(async (opts, command) => {
3274
5933
  const rootOpts = getRootOptions(command);
3275
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
5934
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3276
5935
  await runIndexCommand({
3277
5936
  cwd,
3278
5937
  configPath: rootOpts?.config,
@@ -3290,7 +5949,7 @@ program.command("index").description("Index site content into Upstash Search").o
3290
5949
  });
3291
5950
  program.command("status").description("Show scope, indexing state, and backend health").option("--scope <name>", "scope override").action(async (opts, command) => {
3292
5951
  const rootOpts = getRootOptions(command);
3293
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
5952
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3294
5953
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3295
5954
  const scope = resolveScope(config, opts.scope);
3296
5955
  let store;
@@ -3329,7 +5988,7 @@ program.command("status").description("Show scope, indexing state, and backend h
3329
5988
  `);
3330
5989
  process.stdout.write(`resolved scope: ${scope.scopeName}
3331
5990
  `);
3332
- process.stdout.write(`backend: upstash-search
5991
+ process.stdout.write(`backend: upstash-vector
3333
5992
  `);
3334
5993
  process.stdout.write(`backend health: ${health.ok ? "ok" : `error (${health.details ?? "n/a"})`}
3335
5994
  `);
@@ -3354,19 +6013,31 @@ program.command("status").description("Show scope, indexing state, and backend h
3354
6013
  }
3355
6014
  }
3356
6015
  });
3357
- program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
6016
+ program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--playground", "serve playground UI at /_searchsocket (default: true)", true).option("--no-playground", "disable playground UI").option("--playground-port <n>", "playground HTTP port", "3337").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
3358
6017
  const rootOpts = getRootOptions(command);
3359
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6018
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3360
6019
  const config = await loadResolvedConfigForDev(cwd, rootOpts?.config);
3361
6020
  const watchPaths = collectWatchPaths(config, cwd);
3362
6021
  process.stdout.write("starting searchsocket dev watcher...\n");
3363
6022
  process.stdout.write(`watching:
3364
6023
  ${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
3365
6024
  `);
6025
+ const upstashUrl = config.upstash.url ?? process.env[config.upstash.urlEnv];
6026
+ const upstashToken = config.upstash.token ?? process.env[config.upstash.tokenEnv];
6027
+ const backendMissing = !upstashUrl || !upstashToken;
6028
+ if (backendMissing) {
6029
+ process.stdout.write(
6030
+ `Search backend not configured \u2014 set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} to enable indexing. Watching for file changes only.
6031
+ `
6032
+ );
6033
+ }
3366
6034
  let running = false;
3367
6035
  let pending = false;
3368
6036
  let timer = null;
3369
6037
  const run = async () => {
6038
+ if (backendMissing) {
6039
+ return;
6040
+ }
3370
6041
  if (running) {
3371
6042
  pending = true;
3372
6043
  return;
@@ -3417,18 +6088,40 @@ ${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
3417
6088
  httpPath: opts.mcpPath
3418
6089
  });
3419
6090
  }
6091
+ let closePlayground;
6092
+ if (opts.playground) {
6093
+ if (backendMissing) {
6094
+ process.stdout.write("playground disabled: search backend not configured\n");
6095
+ } else {
6096
+ void runPlaygroundServer({
6097
+ cwd,
6098
+ configPath: rootOpts?.config,
6099
+ config,
6100
+ port: parsePositiveInt(opts.playgroundPort, "--playground-port")
6101
+ }).then(({ port, close }) => {
6102
+ closePlayground = close;
6103
+ process.stdout.write(`playground available at http://127.0.0.1:${port}/_searchsocket
6104
+ `);
6105
+ }).catch((err) => {
6106
+ process.stderr.write(`playground error: ${err instanceof Error ? err.message : String(err)}
6107
+ `);
6108
+ });
6109
+ }
6110
+ }
3420
6111
  await new Promise((resolve) => {
3421
6112
  process.on("SIGINT", () => {
3422
- void watcher.close().then(() => resolve());
6113
+ const cleanups = [watcher.close()];
6114
+ if (closePlayground) cleanups.push(closePlayground());
6115
+ void Promise.all(cleanups).then(() => resolve());
3423
6116
  });
3424
6117
  });
3425
6118
  });
3426
6119
  program.command("clean").description("Delete local state and optionally delete remote indexes for a scope").option("--scope <name>", "scope override").option("--remote", "delete remote scope indexes", false).action(async (opts, command) => {
3427
6120
  const rootOpts = getRootOptions(command);
3428
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6121
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3429
6122
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3430
- const statePath = path12.join(cwd, config.state.dir);
3431
- await fsp.rm(statePath, { recursive: true, force: true });
6123
+ const statePath = path15.join(cwd, config.state.dir);
6124
+ await fsp2.rm(statePath, { recursive: true, force: true });
3432
6125
  process.stdout.write(`deleted local state directory: ${statePath}
3433
6126
  `);
3434
6127
  if (opts.remote) {
@@ -3440,7 +6133,7 @@ program.command("clean").description("Delete local state and optionally delete r
3440
6133
  });
3441
6134
  program.command("prune").description("List/delete stale scopes (dry-run by default)").option("--apply", "apply deletions", false).option("--scopes-file <path>", "file containing active scopes").option("--older-than <duration>", "ttl cutoff like 30d").action(async (opts, command) => {
3442
6135
  const rootOpts = getRootOptions(command);
3443
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6136
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3444
6137
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3445
6138
  const baseScope = resolveScope(config);
3446
6139
  let store;
@@ -3450,17 +6143,17 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
3450
6143
  scopes = await store.listScopes(config.project.id);
3451
6144
  } catch (error) {
3452
6145
  process.stderr.write(
3453
- `error: failed to access Upstash Search: ${error instanceof Error ? error.message : String(error)}
6146
+ `error: failed to access Upstash Vector: ${error instanceof Error ? error.message : String(error)}
3454
6147
  `
3455
6148
  );
3456
6149
  process.exitCode = 1;
3457
6150
  return;
3458
6151
  }
3459
- process.stdout.write(`using Upstash Search
6152
+ process.stdout.write(`using Upstash Vector
3460
6153
  `);
3461
6154
  let keepScopes = /* @__PURE__ */ new Set();
3462
6155
  if (opts.scopesFile) {
3463
- keepScopes = readScopesFromFile(path12.resolve(cwd, opts.scopesFile));
6156
+ keepScopes = readScopesFromFile(path15.resolve(cwd, opts.scopesFile));
3464
6157
  } else {
3465
6158
  keepScopes = readRemoteGitBranches(cwd);
3466
6159
  }
@@ -3531,7 +6224,7 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
3531
6224
  });
3532
6225
  program.command("doctor").description("Validate config, env vars, provider connectivity, and local write access").action(async (_opts, command) => {
3533
6226
  const rootOpts = getRootOptions(command);
3534
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6227
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3535
6228
  const checks = [];
3536
6229
  let config = null;
3537
6230
  try {
@@ -3558,8 +6251,8 @@ program.command("doctor").description("Validate config, env vars, provider conne
3558
6251
  details: upstashToken ? void 0 : "missing"
3559
6252
  });
3560
6253
  if (config.source.mode === "static-output") {
3561
- const outputDir = path12.resolve(cwd, config.source.staticOutputDir);
3562
- const exists = fs8.existsSync(outputDir);
6254
+ const outputDir = path15.resolve(cwd, config.source.staticOutputDir);
6255
+ const exists = fs11.existsSync(outputDir);
3563
6256
  checks.push({
3564
6257
  name: "source: static output dir",
3565
6258
  ok: exists,
@@ -3568,15 +6261,15 @@ program.command("doctor").description("Validate config, env vars, provider conne
3568
6261
  } else if (config.source.mode === "build") {
3569
6262
  const buildConfig = config.source.build;
3570
6263
  if (buildConfig) {
3571
- const manifestPath = path12.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
3572
- const manifestExists = fs8.existsSync(manifestPath);
6264
+ const manifestPath = path15.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
6265
+ const manifestExists = fs11.existsSync(manifestPath);
3573
6266
  checks.push({
3574
6267
  name: "source: build manifest",
3575
6268
  ok: manifestExists,
3576
6269
  details: manifestExists ? manifestPath : `${manifestPath} not found (run \`vite build\` first)`
3577
6270
  });
3578
- const viteBin = path12.resolve(cwd, "node_modules", ".bin", "vite");
3579
- const viteExists = fs8.existsSync(viteBin);
6271
+ const viteBin = path15.resolve(cwd, "node_modules", ".bin", "vite");
6272
+ const viteExists = fs11.existsSync(viteBin);
3580
6273
  checks.push({
3581
6274
  name: "source: vite binary",
3582
6275
  ok: viteExists,
@@ -3593,7 +6286,7 @@ program.command("doctor").description("Validate config, env vars, provider conne
3593
6286
  const contentConfig = config.source.contentFiles;
3594
6287
  if (contentConfig) {
3595
6288
  const fg4 = await import("fast-glob");
3596
- const baseDir = path12.resolve(cwd, contentConfig.baseDir);
6289
+ const baseDir = path15.resolve(cwd, contentConfig.baseDir);
3597
6290
  const files = await fg4.default(contentConfig.globs, { cwd: baseDir, onlyFiles: true });
3598
6291
  checks.push({
3599
6292
  name: "source: content files",
@@ -3627,9 +6320,9 @@ program.command("doctor").description("Validate config, env vars, provider conne
3627
6320
  try {
3628
6321
  const scope = resolveScope(config);
3629
6322
  const { statePath } = ensureStateDirs(cwd, config.state.dir, scope);
3630
- const testPath = path12.join(statePath, ".write-test");
3631
- await fsp.writeFile(testPath, "ok\n", "utf8");
3632
- await fsp.rm(testPath, { force: true });
6323
+ const testPath = path15.join(statePath, ".write-test");
6324
+ await fsp2.writeFile(testPath, "ok\n", "utf8");
6325
+ await fsp2.rm(testPath, { force: true });
3633
6326
  checks.push({ name: "state directory writable", ok: true });
3634
6327
  } catch (error) {
3635
6328
  checks.push({
@@ -3654,20 +6347,22 @@ program.command("doctor").description("Validate config, env vars, provider conne
3654
6347
  process.exitCode = 1;
3655
6348
  }
3656
6349
  });
3657
- program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").action(async (opts, command) => {
6350
+ program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").addOption(new Option("--access <mode>", "access mode").choices(["public", "private"])).option("--api-key <key>", "API key for public access mode").action(async (opts, command) => {
3658
6351
  const rootOpts = getRootOptions(command);
3659
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6352
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3660
6353
  await runMcpServer({
3661
6354
  cwd,
3662
6355
  configPath: rootOpts?.config,
3663
6356
  transport: opts.transport,
3664
6357
  httpPort: parsePositiveInt(opts.port, "--port"),
3665
- httpPath: opts.path
6358
+ httpPath: opts.path,
6359
+ access: opts.access,
6360
+ apiKey: opts.apiKey
3666
6361
  });
3667
6362
  });
3668
6363
  program.command("search").description("Quick CLI search against Upstash Search").requiredOption("--q <query>", "search query").option("--scope <name>", "scope override").option("--top-k <n>", "top K results", "10").option("--path-prefix <prefix>", "path prefix filter").action(async (opts, command) => {
3669
6364
  const rootOpts = getRootOptions(command);
3670
- const cwd = path12.resolve(rootOpts?.cwd ?? process.cwd());
6365
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
3671
6366
  const engine = await SearchEngine.create({
3672
6367
  cwd,
3673
6368
  configPath: rootOpts?.config
@@ -3681,8 +6376,156 @@ program.command("search").description("Quick CLI search against Upstash Search")
3681
6376
  process.stdout.write(`${JSON.stringify(result, null, 2)}
3682
6377
  `);
3683
6378
  });
6379
+ program.command("test").description("Run search quality assertions against the live index").option("--file <path>", "path to test file", "searchsocket.test.json").option("--scope <name>", "scope override").option("--top-k <n>", "results per query", "10").action(async (opts, command) => {
6380
+ const rootOpts = getRootOptions(command);
6381
+ const cwd = path15.resolve(rootOpts?.cwd ?? process.cwd());
6382
+ const topK = parsePositiveInt(opts.topK, "--top-k");
6383
+ const filePath = path15.resolve(cwd, opts.file);
6384
+ let rawContent;
6385
+ try {
6386
+ rawContent = await fsp2.readFile(filePath, "utf8");
6387
+ } catch {
6388
+ process.stderr.write(`error: test file not found: ${filePath}
6389
+ `);
6390
+ process.exitCode = 1;
6391
+ return;
6392
+ }
6393
+ let rawJson;
6394
+ try {
6395
+ rawJson = JSON.parse(rawContent);
6396
+ } catch {
6397
+ process.stderr.write(`error: invalid JSON in ${filePath}
6398
+ `);
6399
+ process.exitCode = 1;
6400
+ return;
6401
+ }
6402
+ const parsed = testFileSchema.safeParse(rawJson);
6403
+ if (!parsed.success) {
6404
+ process.stderr.write(`error: invalid test file: ${parsed.error.issues[0]?.message ?? "unknown error"}
6405
+ `);
6406
+ process.exitCode = 1;
6407
+ return;
6408
+ }
6409
+ const testCases = parsed.data;
6410
+ const engine = await SearchEngine.create({
6411
+ cwd,
6412
+ configPath: rootOpts?.config
6413
+ });
6414
+ let passed = 0;
6415
+ let failed = 0;
6416
+ const mrrData = [];
6417
+ for (const tc of testCases) {
6418
+ let results;
6419
+ try {
6420
+ const response = await engine.search({
6421
+ q: tc.query,
6422
+ topK,
6423
+ scope: opts.scope
6424
+ });
6425
+ results = response.results;
6426
+ } catch (error) {
6427
+ const msg = error instanceof Error ? error.message : String(error);
6428
+ process.stdout.write(`FAIL "${tc.query}" \u2192 search error: ${msg}
6429
+ `);
6430
+ failed++;
6431
+ continue;
6432
+ }
6433
+ if (tc.expect.topResult !== void 0) {
6434
+ const expectedUrl = tc.expect.topResult;
6435
+ const rank = results.findIndex((r) => r.url === expectedUrl) + 1;
6436
+ mrrData.push({ results, relevant: [expectedUrl] });
6437
+ if (rank === 1) {
6438
+ process.stdout.write(`PASS "${tc.query}" \u2192 ${expectedUrl} at rank 1
6439
+ `);
6440
+ passed++;
6441
+ } else {
6442
+ const detail = rank === 0 ? "not found" : `got rank ${rank}`;
6443
+ process.stdout.write(`FAIL "${tc.query}" \u2192 expected ${expectedUrl} at rank 1, ${detail}
6444
+ `);
6445
+ failed++;
6446
+ }
6447
+ }
6448
+ if (tc.expect.inTop5 !== void 0) {
6449
+ const expectedUrls = tc.expect.inTop5;
6450
+ const top5Urls = results.slice(0, 5).map((r) => r.url);
6451
+ const missing = expectedUrls.filter((url) => !top5Urls.includes(url));
6452
+ mrrData.push({ results, relevant: expectedUrls });
6453
+ if (missing.length === 0) {
6454
+ process.stdout.write(`PASS "${tc.query}" \u2192 all expected URLs in top 5
6455
+ `);
6456
+ passed++;
6457
+ } else {
6458
+ const missingDetail = missing.map((url) => {
6459
+ const rank = results.findIndex((r) => r.url === url) + 1;
6460
+ return rank === 0 ? `${url} (not found)` : `${url} (rank ${rank})`;
6461
+ }).join(", ");
6462
+ process.stdout.write(`FAIL "${tc.query}" \u2192 missing from top 5: ${missingDetail}
6463
+ `);
6464
+ failed++;
6465
+ }
6466
+ }
6467
+ if (tc.expect.maxResults !== void 0) {
6468
+ const max = tc.expect.maxResults;
6469
+ const actual = results.length;
6470
+ if (actual <= max) {
6471
+ process.stdout.write(`PASS "${tc.query}" \u2192 ${actual} results (max ${max})
6472
+ `);
6473
+ passed++;
6474
+ } else {
6475
+ process.stdout.write(`FAIL "${tc.query}" \u2192 expected at most ${max} results, got ${actual}
6476
+ `);
6477
+ failed++;
6478
+ }
6479
+ }
6480
+ }
6481
+ const total = passed + failed;
6482
+ process.stdout.write(`
6483
+ results: ${passed} passed, ${failed} failed of ${total} assertions
6484
+ `);
6485
+ if (mrrData.length > 0) {
6486
+ const mrrValue = mrr(mrrData);
6487
+ process.stdout.write(`MRR: ${mrrValue.toFixed(4)}
6488
+ `);
6489
+ }
6490
+ process.stdout.write(`pass rate: ${total > 0 ? (passed / total * 100).toFixed(1) : "0.0"}%
6491
+ `);
6492
+ if (failed > 0) {
6493
+ process.exitCode = 1;
6494
+ }
6495
+ });
6496
+ program.command("add <component>").description("Copy a Svelte 5 search component template into your project").option("--dir <path>", "output directory", "src/lib/components/search").option("--overwrite", "overwrite existing files", false).action(async (component, opts, command) => {
6497
+ const root = getRootOptions(command).cwd ?? process.cwd();
6498
+ const cwd = path15.resolve(root);
6499
+ if (!isValidComponent(component)) {
6500
+ const available = listAvailableComponents();
6501
+ process.stderr.write(`unknown component: ${component}
6502
+ `);
6503
+ process.stderr.write(`available components: ${available.join(", ")}
6504
+ `);
6505
+ process.exit(1);
6506
+ }
6507
+ const targetDir = path15.resolve(cwd, opts.dir);
6508
+ const result = await copyComponent(component, targetDir, { overwrite: opts.overwrite });
6509
+ for (const filePath of result.written) {
6510
+ process.stdout.write(`created: ${path15.relative(cwd, filePath)}
6511
+ `);
6512
+ }
6513
+ for (const filePath of result.skipped) {
6514
+ process.stdout.write(`skipped (exists): ${path15.relative(cwd, filePath)}
6515
+ `);
6516
+ }
6517
+ const firstWritten = result.written[0];
6518
+ if (firstWritten) {
6519
+ process.stdout.write(`
6520
+ Usage:
6521
+ `);
6522
+ const fileName = path15.basename(firstWritten, ".svelte");
6523
+ process.stdout.write(` import ${fileName} from "${path15.relative(cwd, firstWritten).replace(/\\/g, "/")}";
6524
+ `);
6525
+ }
6526
+ });
3684
6527
  async function main() {
3685
- dotenvConfig({ path: path12.resolve(process.cwd(), ".env") });
6528
+ dotenvConfig({ path: path15.resolve(process.cwd(), ".env") });
3686
6529
  await program.parseAsync(process.argv);
3687
6530
  }
3688
6531
  main().catch((error) => {