searchsocket 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/cli.ts
4
- import fs9 from "fs";
4
+ import fs10 from "fs";
5
5
  import fsp from "fs/promises";
6
- import path13 from "path";
6
+ import path14 from "path";
7
7
  import { execSync as execSync2 } from "child_process";
8
8
  import { config as dotenvConfig } from "dotenv";
9
9
  import chokidar from "chokidar";
@@ -12,7 +12,7 @@ import { Command } from "commander";
12
12
  // package.json
13
13
  var package_default = {
14
14
  name: "searchsocket",
15
- version: "0.3.2",
15
+ version: "0.4.0",
16
16
  description: "Semantic site search and MCP retrieval for SvelteKit static sites",
17
17
  license: "MIT",
18
18
  author: "Greg Priday <greg@siteorigin.com>",
@@ -115,6 +115,8 @@ var searchSocketConfigSchema = z.object({
115
115
  envVar: z.string().min(1).optional(),
116
116
  sanitize: z.boolean().optional()
117
117
  }).optional(),
118
+ exclude: z.array(z.string()).optional(),
119
+ respectRobotsTxt: z.boolean().optional(),
118
120
  source: z.object({
119
121
  mode: z.enum(["static-output", "crawl", "content-files", "build"]).optional(),
120
122
  staticOutputDir: z.string().min(1).optional(),
@@ -245,6 +247,8 @@ function createDefaultConfig(projectId) {
245
247
  envVar: "SEARCHSOCKET_SCOPE",
246
248
  sanitize: true
247
249
  },
250
+ exclude: [],
251
+ respectRobotsTxt: true,
248
252
  source: {
249
253
  mode: "static-output",
250
254
  staticOutputDir: "build",
@@ -275,7 +279,7 @@ function createDefaultConfig(projectId) {
275
279
  },
276
280
  embeddings: {
277
281
  provider: "jina",
278
- model: "jina-embeddings-v3",
282
+ model: "jina-embeddings-v5-text-small",
279
283
  apiKeyEnv: "JINA_API_KEY",
280
284
  batchSize: 64,
281
285
  concurrency: 4
@@ -288,9 +292,9 @@ function createDefaultConfig(projectId) {
288
292
  }
289
293
  },
290
294
  rerank: {
291
- enabled: false,
295
+ enabled: true,
292
296
  topN: 20,
293
- model: "jina-reranker-v2-base-multilingual"
297
+ model: "jina-reranker-v3"
294
298
  },
295
299
  ranking: {
296
300
  enableIncomingLinkBoost: true,
@@ -393,6 +397,8 @@ ${issues}`
393
397
  ...defaults.scope,
394
398
  ...parsed.scope
395
399
  },
400
+ exclude: parsed.exclude ?? defaults.exclude,
401
+ respectRobotsTxt: parsed.respectRobotsTxt ?? defaults.respectRobotsTxt,
396
402
  source: {
397
403
  ...defaults.source,
398
404
  ...parsed.source,
@@ -829,7 +835,7 @@ function createEmbeddingsProvider(config) {
829
835
  }
830
836
 
831
837
  // src/indexing/pipeline.ts
832
- import path11 from "path";
838
+ import path12 from "path";
833
839
 
834
840
  // src/vector/factory.ts
835
841
  import fs3 from "fs";
@@ -1710,6 +1716,17 @@ function extractFromHtml(url, html, config) {
1710
1716
  if ($(`[${config.extract.noindexAttr}]`).length > 0) {
1711
1717
  return null;
1712
1718
  }
1719
+ const weightRaw = $("meta[name='searchsocket-weight']").attr("content")?.trim();
1720
+ let weight;
1721
+ if (weightRaw !== void 0) {
1722
+ const parsed = Number(weightRaw);
1723
+ if (Number.isFinite(parsed) && parsed >= 0) {
1724
+ weight = parsed;
1725
+ }
1726
+ }
1727
+ if (weight === 0) {
1728
+ return null;
1729
+ }
1713
1730
  const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
1714
1731
  const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
1715
1732
  const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
@@ -1765,7 +1782,8 @@ function extractFromHtml(url, html, config) {
1765
1782
  noindex: false,
1766
1783
  tags,
1767
1784
  description,
1768
- keywords
1785
+ keywords,
1786
+ weight
1769
1787
  };
1770
1788
  }
1771
1789
  function extractFromMarkdown(url, markdown, title) {
@@ -1778,6 +1796,14 @@ function extractFromMarkdown(url, markdown, title) {
1778
1796
  if (frontmatter.noindex === true || searchsocketMeta?.noindex === true) {
1779
1797
  return null;
1780
1798
  }
1799
+ let mdWeight;
1800
+ const rawWeight = searchsocketMeta?.weight ?? frontmatter.searchsocketWeight;
1801
+ if (typeof rawWeight === "number" && Number.isFinite(rawWeight) && rawWeight >= 0) {
1802
+ mdWeight = rawWeight;
1803
+ }
1804
+ if (mdWeight === 0) {
1805
+ return null;
1806
+ }
1781
1807
  const content = parsed.content;
1782
1808
  const normalized = normalizeMarkdown(content);
1783
1809
  if (!normalizeText(normalized)) {
@@ -1800,7 +1826,8 @@ function extractFromMarkdown(url, markdown, title) {
1800
1826
  noindex: false,
1801
1827
  tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
1802
1828
  description: fmDescription,
1803
- keywords: fmKeywords
1829
+ keywords: fmKeywords,
1830
+ weight: mdWeight
1804
1831
  };
1805
1832
  }
1806
1833
 
@@ -1937,6 +1964,38 @@ import pLimit2 from "p-limit";
1937
1964
  // src/indexing/sources/build/manifest-parser.ts
1938
1965
  import fs5 from "fs/promises";
1939
1966
  import path7 from "path";
1967
+
1968
+ // src/utils/pattern.ts
1969
+ function matchUrlPattern(url, pattern) {
1970
+ const norm = (p) => p !== "/" && p.endsWith("/") ? p.slice(0, -1) : p;
1971
+ const normalizedUrl = norm(url);
1972
+ const normalizedPattern = norm(pattern);
1973
+ if (normalizedPattern.endsWith("/**")) {
1974
+ const prefix = normalizedPattern.slice(0, -3);
1975
+ if (prefix === "") {
1976
+ return true;
1977
+ }
1978
+ return normalizedUrl === prefix || normalizedUrl.startsWith(prefix + "/");
1979
+ }
1980
+ if (normalizedPattern.endsWith("/*")) {
1981
+ const prefix = normalizedPattern.slice(0, -2);
1982
+ if (prefix === "") {
1983
+ return normalizedUrl !== "/" && !normalizedUrl.slice(1).includes("/");
1984
+ }
1985
+ if (!normalizedUrl.startsWith(prefix + "/")) return false;
1986
+ const rest = normalizedUrl.slice(prefix.length + 1);
1987
+ return rest.length > 0 && !rest.includes("/");
1988
+ }
1989
+ return normalizedUrl === normalizedPattern;
1990
+ }
1991
+ function matchUrlPatterns(url, patterns) {
1992
+ for (const pattern of patterns) {
1993
+ if (matchUrlPattern(url, pattern)) return true;
1994
+ }
1995
+ return false;
1996
+ }
1997
+
1998
+ // src/indexing/sources/build/manifest-parser.ts
1940
1999
  function routeIdToFile(routeId) {
1941
2000
  if (routeId === "/") {
1942
2001
  return "src/routes/+page.svelte";
@@ -2010,15 +2069,7 @@ function expandDynamicUrl(url, value) {
2010
2069
  return url.replace(/\[\[?\.\.\.[^\]]+\]?\]|\[\[[^\]]+\]\]|\[[^\]]+\]/g, value);
2011
2070
  }
2012
2071
  function isExcluded(url, patterns) {
2013
- for (const pattern of patterns) {
2014
- if (pattern.endsWith("/*")) {
2015
- const prefix = pattern.slice(0, -1);
2016
- if (url.startsWith(prefix) || url === prefix.slice(0, -1)) return true;
2017
- } else if (url === pattern) {
2018
- return true;
2019
- }
2020
- }
2021
- return false;
2072
+ return matchUrlPatterns(url, patterns);
2022
2073
  }
2023
2074
 
2024
2075
  // src/indexing/sources/build/preview-server.ts
@@ -2456,6 +2507,162 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
2456
2507
  return pages;
2457
2508
  }
2458
2509
 
2510
+ // src/indexing/robots.ts
2511
+ import fs9 from "fs/promises";
2512
+ import path11 from "path";
2513
+ function parseRobotsTxt(content, userAgent = "Searchsocket") {
2514
+ const lines = content.split(/\r?\n/);
2515
+ const agentGroups = /* @__PURE__ */ new Map();
2516
+ let currentAgents = [];
2517
+ for (const rawLine of lines) {
2518
+ const line = rawLine.replace(/#.*$/, "").trim();
2519
+ if (!line) continue;
2520
+ const colonIdx = line.indexOf(":");
2521
+ if (colonIdx === -1) continue;
2522
+ const directive = line.slice(0, colonIdx).trim().toLowerCase();
2523
+ const value = line.slice(colonIdx + 1).trim();
2524
+ if (directive === "user-agent") {
2525
+ const agentName = value.toLowerCase();
2526
+ currentAgents.push(agentName);
2527
+ if (!agentGroups.has(agentName)) {
2528
+ agentGroups.set(agentName, { disallow: [], allow: [] });
2529
+ }
2530
+ } else if (directive === "disallow" && value && currentAgents.length > 0) {
2531
+ for (const agent of currentAgents) {
2532
+ agentGroups.get(agent).disallow.push(value);
2533
+ }
2534
+ } else if (directive === "allow" && value && currentAgents.length > 0) {
2535
+ for (const agent of currentAgents) {
2536
+ agentGroups.get(agent).allow.push(value);
2537
+ }
2538
+ } else if (directive !== "disallow" && directive !== "allow") {
2539
+ currentAgents = [];
2540
+ }
2541
+ }
2542
+ const specific = agentGroups.get(userAgent.toLowerCase());
2543
+ if (specific && (specific.disallow.length > 0 || specific.allow.length > 0)) {
2544
+ return specific;
2545
+ }
2546
+ return agentGroups.get("*") ?? { disallow: [], allow: [] };
2547
+ }
2548
+ function isBlockedByRobots(urlPath, rules) {
2549
+ let longestDisallow = "";
2550
+ for (const pattern of rules.disallow) {
2551
+ if (urlPath.startsWith(pattern) && pattern.length > longestDisallow.length) {
2552
+ longestDisallow = pattern;
2553
+ }
2554
+ }
2555
+ if (!longestDisallow) return false;
2556
+ let longestAllow = "";
2557
+ for (const pattern of rules.allow) {
2558
+ if (urlPath.startsWith(pattern) && pattern.length > longestAllow.length) {
2559
+ longestAllow = pattern;
2560
+ }
2561
+ }
2562
+ return longestAllow.length < longestDisallow.length;
2563
+ }
2564
+ async function loadRobotsTxtFromDir(dir) {
2565
+ try {
2566
+ const content = await fs9.readFile(path11.join(dir, "robots.txt"), "utf8");
2567
+ return parseRobotsTxt(content);
2568
+ } catch {
2569
+ return null;
2570
+ }
2571
+ }
2572
+ async function fetchRobotsTxt(baseUrl) {
2573
+ try {
2574
+ const url = new URL("/robots.txt", baseUrl).href;
2575
+ const response = await fetch(url);
2576
+ if (!response.ok) return null;
2577
+ const content = await response.text();
2578
+ return parseRobotsTxt(content);
2579
+ } catch {
2580
+ return null;
2581
+ }
2582
+ }
2583
+
2584
+ // src/search/ranking.ts
2585
+ function nonNegativeOrZero(value) {
2586
+ if (!Number.isFinite(value)) {
2587
+ return 0;
2588
+ }
2589
+ return Math.max(0, value);
2590
+ }
2591
+ function rankHits(hits, config) {
2592
+ return hits.map((hit) => {
2593
+ let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
2594
+ if (config.ranking.enableIncomingLinkBoost) {
2595
+ const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
2596
+ score += incomingBoost * config.ranking.weights.incomingLinks;
2597
+ }
2598
+ if (config.ranking.enableDepthBoost) {
2599
+ const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
2600
+ score += depthBoost * config.ranking.weights.depth;
2601
+ }
2602
+ return {
2603
+ hit,
2604
+ finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
2605
+ };
2606
+ }).sort((a, b) => {
2607
+ const delta = b.finalScore - a.finalScore;
2608
+ return Number.isNaN(delta) ? 0 : delta;
2609
+ });
2610
+ }
2611
+ function findPageWeight(url, pageWeights) {
2612
+ let bestPattern = "";
2613
+ let bestWeight = 1;
2614
+ for (const [pattern, weight] of Object.entries(pageWeights)) {
2615
+ if (matchUrlPattern(url, pattern) && pattern.length > bestPattern.length) {
2616
+ bestPattern = pattern;
2617
+ bestWeight = weight;
2618
+ }
2619
+ }
2620
+ return bestWeight;
2621
+ }
2622
+ function aggregateByPage(ranked, config) {
2623
+ const groups = /* @__PURE__ */ new Map();
2624
+ for (const hit of ranked) {
2625
+ const url = hit.hit.metadata.url;
2626
+ const group = groups.get(url);
2627
+ if (group) group.push(hit);
2628
+ else groups.set(url, [hit]);
2629
+ }
2630
+ const { aggregationCap, aggregationDecay } = config.ranking;
2631
+ const pages = [];
2632
+ for (const [url, chunks] of groups) {
2633
+ chunks.sort((a, b) => {
2634
+ const delta = b.finalScore - a.finalScore;
2635
+ return Number.isNaN(delta) ? 0 : delta;
2636
+ });
2637
+ const best = chunks[0];
2638
+ const maxScore = Number.isFinite(best.finalScore) ? best.finalScore : Number.NEGATIVE_INFINITY;
2639
+ const topChunks = chunks.slice(0, aggregationCap);
2640
+ let aggregationBonus = 0;
2641
+ for (let i = 1; i < topChunks.length; i++) {
2642
+ const chunkScore = Number.isFinite(topChunks[i].finalScore) ? topChunks[i].finalScore : 0;
2643
+ aggregationBonus += chunkScore * Math.pow(aggregationDecay, i);
2644
+ }
2645
+ let pageScore = maxScore + aggregationBonus * config.ranking.weights.aggregation;
2646
+ const pageWeight = findPageWeight(url, config.ranking.pageWeights);
2647
+ if (pageWeight === 0) continue;
2648
+ if (pageWeight !== 1) {
2649
+ pageScore *= pageWeight;
2650
+ }
2651
+ pages.push({
2652
+ url,
2653
+ title: best.hit.metadata.title,
2654
+ routeFile: best.hit.metadata.routeFile,
2655
+ pageScore: Number.isFinite(pageScore) ? pageScore : Number.NEGATIVE_INFINITY,
2656
+ bestChunk: best,
2657
+ matchingChunks: chunks
2658
+ });
2659
+ }
2660
+ return pages.sort((a, b) => {
2661
+ const delta = b.pageScore - a.pageScore;
2662
+ return Number.isNaN(delta) ? 0 : delta;
2663
+ });
2664
+ }
2665
+
2459
2666
  // src/utils/time.ts
2460
2667
  function nowIso() {
2461
2668
  return (/* @__PURE__ */ new Date()).toISOString();
@@ -2466,9 +2673,10 @@ function hrTimeMs(start) {
2466
2673
 
2467
2674
  // src/indexing/pipeline.ts
2468
2675
  var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
2469
- "jina-embeddings-v3": 2e-5
2676
+ "jina-embeddings-v3": 2e-5,
2677
+ "jina-embeddings-v5-text-small": 5e-5
2470
2678
  };
2471
- var DEFAULT_EMBEDDING_PRICE_PER_1K = 2e-5;
2679
+ var DEFAULT_EMBEDDING_PRICE_PER_1K = 5e-5;
2472
2680
  var IndexPipeline = class _IndexPipeline {
2473
2681
  cwd;
2474
2682
  config;
@@ -2483,7 +2691,7 @@ var IndexPipeline = class _IndexPipeline {
2483
2691
  this.logger = options.logger;
2484
2692
  }
2485
2693
  static async create(options = {}) {
2486
- const cwd = path11.resolve(options.cwd ?? process.cwd());
2694
+ const cwd = path12.resolve(options.cwd ?? process.cwd());
2487
2695
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
2488
2696
  const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
2489
2697
  const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
@@ -2546,6 +2754,53 @@ var IndexPipeline = class _IndexPipeline {
2546
2754
  }
2547
2755
  stageEnd("source", sourceStart);
2548
2756
  this.logger.info(`Loaded ${sourcePages.length} page${sourcePages.length === 1 ? "" : "s"} (${stageTimingsMs["source"]}ms)`);
2757
+ const filterStart = stageStart();
2758
+ let filteredSourcePages = sourcePages;
2759
+ if (this.config.exclude.length > 0) {
2760
+ const beforeExclude = filteredSourcePages.length;
2761
+ filteredSourcePages = filteredSourcePages.filter((p) => {
2762
+ const url = normalizeUrlPath(p.url);
2763
+ if (matchUrlPatterns(url, this.config.exclude)) {
2764
+ this.logger.debug(`Excluding ${url} (matched exclude pattern)`);
2765
+ return false;
2766
+ }
2767
+ return true;
2768
+ });
2769
+ const excludedCount = beforeExclude - filteredSourcePages.length;
2770
+ if (excludedCount > 0) {
2771
+ this.logger.info(`Excluded ${excludedCount} page${excludedCount === 1 ? "" : "s"} by config exclude patterns`);
2772
+ }
2773
+ }
2774
+ if (this.config.respectRobotsTxt) {
2775
+ let robotsRules = null;
2776
+ if (sourceMode === "static-output") {
2777
+ robotsRules = await loadRobotsTxtFromDir(
2778
+ path12.resolve(this.cwd, this.config.source.staticOutputDir)
2779
+ );
2780
+ } else if (sourceMode === "build" && this.config.source.build) {
2781
+ robotsRules = await loadRobotsTxtFromDir(
2782
+ path12.resolve(this.cwd, this.config.source.build.outputDir)
2783
+ );
2784
+ } else if (sourceMode === "crawl" && this.config.source.crawl) {
2785
+ robotsRules = await fetchRobotsTxt(this.config.source.crawl.baseUrl);
2786
+ }
2787
+ if (robotsRules) {
2788
+ const beforeRobots = filteredSourcePages.length;
2789
+ filteredSourcePages = filteredSourcePages.filter((p) => {
2790
+ const url = normalizeUrlPath(p.url);
2791
+ if (isBlockedByRobots(url, robotsRules)) {
2792
+ this.logger.debug(`Excluding ${url} (blocked by robots.txt)`);
2793
+ return false;
2794
+ }
2795
+ return true;
2796
+ });
2797
+ const robotsExcluded = beforeRobots - filteredSourcePages.length;
2798
+ if (robotsExcluded > 0) {
2799
+ this.logger.info(`Excluded ${robotsExcluded} page${robotsExcluded === 1 ? "" : "s"} by robots.txt`);
2800
+ }
2801
+ }
2802
+ }
2803
+ stageEnd("filter", filterStart);
2549
2804
  const routeStart = stageStart();
2550
2805
  const routePatterns = await buildRoutePatterns(this.cwd);
2551
2806
  stageEnd("route_map", routeStart);
@@ -2553,7 +2808,7 @@ var IndexPipeline = class _IndexPipeline {
2553
2808
  const extractStart = stageStart();
2554
2809
  this.logger.info("Extracting content...");
2555
2810
  const extractedPages = [];
2556
- for (const sourcePage of sourcePages) {
2811
+ for (const sourcePage of filteredSourcePages) {
2557
2812
  const extracted = sourcePage.html ? extractFromHtml(sourcePage.url, sourcePage.html, this.config) : extractFromMarkdown(sourcePage.url, sourcePage.markdown ?? "", sourcePage.title);
2558
2813
  if (!extracted) {
2559
2814
  this.logger.warn(
@@ -2579,16 +2834,29 @@ var IndexPipeline = class _IndexPipeline {
2579
2834
  seenUrls.add(page.url);
2580
2835
  uniquePages.push(page);
2581
2836
  }
2837
+ const indexablePages = [];
2838
+ for (const page of uniquePages) {
2839
+ const effectiveWeight = page.weight ?? findPageWeight(page.url, this.config.ranking.pageWeights);
2840
+ if (effectiveWeight === 0) {
2841
+ this.logger.debug(`Excluding ${page.url} (zero weight)`);
2842
+ continue;
2843
+ }
2844
+ indexablePages.push(page);
2845
+ }
2846
+ const zeroWeightCount = uniquePages.length - indexablePages.length;
2847
+ if (zeroWeightCount > 0) {
2848
+ this.logger.info(`Excluded ${zeroWeightCount} page${zeroWeightCount === 1 ? "" : "s"} with zero weight`);
2849
+ }
2582
2850
  stageEnd("extract", extractStart);
2583
- const skippedPages = sourcePages.length - uniquePages.length;
2584
- this.logger.info(`Extracted ${uniquePages.length} page${uniquePages.length === 1 ? "" : "s"}${skippedPages > 0 ? ` (${skippedPages} skipped)` : ""} (${stageTimingsMs["extract"]}ms)`);
2851
+ const skippedPages = filteredSourcePages.length - indexablePages.length;
2852
+ this.logger.info(`Extracted ${indexablePages.length} page${indexablePages.length === 1 ? "" : "s"}${skippedPages > 0 ? ` (${skippedPages} skipped)` : ""} (${stageTimingsMs["extract"]}ms)`);
2585
2853
  const linkStart = stageStart();
2586
- const pageSet = new Set(uniquePages.map((page) => normalizeUrlPath(page.url)));
2854
+ const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
2587
2855
  const incomingLinkCount = /* @__PURE__ */ new Map();
2588
- for (const page of uniquePages) {
2856
+ for (const page of indexablePages) {
2589
2857
  incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
2590
2858
  }
2591
- for (const page of uniquePages) {
2859
+ for (const page of indexablePages) {
2592
2860
  for (const outgoing of page.outgoingLinks) {
2593
2861
  if (!pageSet.has(outgoing)) {
2594
2862
  continue;
@@ -2612,7 +2880,7 @@ var IndexPipeline = class _IndexPipeline {
2612
2880
  });
2613
2881
  }
2614
2882
  }
2615
- for (const page of uniquePages) {
2883
+ for (const page of indexablePages) {
2616
2884
  const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
2617
2885
  if (routeMatch.routeResolution === "best-effort") {
2618
2886
  if (this.config.source.strictRouteMapping) {
@@ -2838,7 +3106,7 @@ import { createMcpExpressApp } from "@modelcontextprotocol/sdk/server/express.js
2838
3106
  import { z as z3 } from "zod";
2839
3107
 
2840
3108
  // src/search/engine.ts
2841
- import path12 from "path";
3109
+ import path13 from "path";
2842
3110
  import { z as z2 } from "zod";
2843
3111
 
2844
3112
  // src/rerank/jina.ts
@@ -2854,7 +3122,7 @@ var JinaReranker = class {
2854
3122
  constructor(options) {
2855
3123
  this.apiKey = options.apiKey;
2856
3124
  this.model = options.model;
2857
- this.maxRetries = options.maxRetries ?? 4;
3125
+ this.maxRetries = options.maxRetries ?? 2;
2858
3126
  }
2859
3127
  async rerank(query, candidates, topN) {
2860
3128
  if (candidates.length === 0) {
@@ -2864,7 +3132,8 @@ var JinaReranker = class {
2864
3132
  model: this.model,
2865
3133
  query,
2866
3134
  documents: candidates.map((candidate) => candidate.text),
2867
- top_n: topN ?? candidates.length
3135
+ top_n: topN ?? candidates.length,
3136
+ return_documents: false
2868
3137
  };
2869
3138
  let attempt = 0;
2870
3139
  while (attempt <= this.maxRetries) {
@@ -2937,98 +3206,6 @@ function createReranker(config) {
2937
3206
  });
2938
3207
  }
2939
3208
 
2940
- // src/search/ranking.ts
2941
- function nonNegativeOrZero(value) {
2942
- if (!Number.isFinite(value)) {
2943
- return 0;
2944
- }
2945
- return Math.max(0, value);
2946
- }
2947
- function rankHits(hits, config) {
2948
- return hits.map((hit) => {
2949
- let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
2950
- if (config.ranking.enableIncomingLinkBoost) {
2951
- const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
2952
- score += incomingBoost * config.ranking.weights.incomingLinks;
2953
- }
2954
- if (config.ranking.enableDepthBoost) {
2955
- const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
2956
- score += depthBoost * config.ranking.weights.depth;
2957
- }
2958
- return {
2959
- hit,
2960
- finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
2961
- };
2962
- }).sort((a, b) => {
2963
- const delta = b.finalScore - a.finalScore;
2964
- return Number.isNaN(delta) ? 0 : delta;
2965
- });
2966
- }
2967
- function findPageWeight(url, pageWeights) {
2968
- const norm = (p) => p !== "/" && p.endsWith("/") ? p.slice(0, -1) : p;
2969
- const normalizedUrl = norm(url);
2970
- for (const [pattern, weight] of Object.entries(pageWeights)) {
2971
- if (norm(pattern) === normalizedUrl) {
2972
- return weight;
2973
- }
2974
- }
2975
- let bestPrefix = "";
2976
- let bestWeight = 1;
2977
- for (const [pattern, weight] of Object.entries(pageWeights)) {
2978
- const normalizedPattern = norm(pattern);
2979
- if (normalizedPattern === "/") continue;
2980
- const prefix = `${normalizedPattern}/`;
2981
- if (normalizedUrl.startsWith(prefix) && prefix.length > bestPrefix.length) {
2982
- bestPrefix = prefix;
2983
- bestWeight = weight;
2984
- }
2985
- }
2986
- return bestWeight;
2987
- }
2988
- function aggregateByPage(ranked, config) {
2989
- const groups = /* @__PURE__ */ new Map();
2990
- for (const hit of ranked) {
2991
- const url = hit.hit.metadata.url;
2992
- const group = groups.get(url);
2993
- if (group) group.push(hit);
2994
- else groups.set(url, [hit]);
2995
- }
2996
- const { aggregationCap, aggregationDecay } = config.ranking;
2997
- const pages = [];
2998
- for (const [url, chunks] of groups) {
2999
- chunks.sort((a, b) => {
3000
- const delta = b.finalScore - a.finalScore;
3001
- return Number.isNaN(delta) ? 0 : delta;
3002
- });
3003
- const best = chunks[0];
3004
- const maxScore = Number.isFinite(best.finalScore) ? best.finalScore : Number.NEGATIVE_INFINITY;
3005
- const topChunks = chunks.slice(0, aggregationCap);
3006
- let aggregationBonus = 0;
3007
- for (let i = 1; i < topChunks.length; i++) {
3008
- const chunkScore = Number.isFinite(topChunks[i].finalScore) ? topChunks[i].finalScore : 0;
3009
- aggregationBonus += chunkScore * Math.pow(aggregationDecay, i);
3010
- }
3011
- let pageScore = maxScore + aggregationBonus * config.ranking.weights.aggregation;
3012
- const pageWeight = findPageWeight(url, config.ranking.pageWeights);
3013
- if (pageWeight === 0) continue;
3014
- if (pageWeight !== 1) {
3015
- pageScore *= pageWeight;
3016
- }
3017
- pages.push({
3018
- url,
3019
- title: best.hit.metadata.title,
3020
- routeFile: best.hit.metadata.routeFile,
3021
- pageScore: Number.isFinite(pageScore) ? pageScore : Number.NEGATIVE_INFINITY,
3022
- bestChunk: best,
3023
- matchingChunks: chunks
3024
- });
3025
- }
3026
- return pages.sort((a, b) => {
3027
- const delta = b.pageScore - a.pageScore;
3028
- return Number.isNaN(delta) ? 0 : delta;
3029
- });
3030
- }
3031
-
3032
3209
  // src/search/engine.ts
3033
3210
  var requestSchema = z2.object({
3034
3211
  q: z2.string().trim().min(1),
@@ -3037,7 +3214,8 @@ var requestSchema = z2.object({
3037
3214
  pathPrefix: z2.string().optional(),
3038
3215
  tags: z2.array(z2.string()).optional(),
3039
3216
  rerank: z2.boolean().optional(),
3040
- groupBy: z2.enum(["page", "chunk"]).optional()
3217
+ groupBy: z2.enum(["page", "chunk"]).optional(),
3218
+ stream: z2.boolean().optional()
3041
3219
  });
3042
3220
  var SearchEngine = class _SearchEngine {
3043
3221
  cwd;
@@ -3053,7 +3231,7 @@ var SearchEngine = class _SearchEngine {
3053
3231
  this.reranker = options.reranker;
3054
3232
  }
3055
3233
  static async create(options = {}) {
3056
- const cwd = path12.resolve(options.cwd ?? process.cwd());
3234
+ const cwd = path13.resolve(options.cwd ?? process.cwd());
3057
3235
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
3058
3236
  const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
3059
3237
  const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
@@ -3110,7 +3288,103 @@ var SearchEngine = class _SearchEngine {
3110
3288
  rerankMs = hrTimeMs(rerankStart);
3111
3289
  usedRerank = true;
3112
3290
  }
3113
- let results;
3291
+ const results = this.buildResults(ordered, topK, groupByPage);
3292
+ return {
3293
+ q: input.q,
3294
+ scope: resolvedScope.scopeName,
3295
+ results,
3296
+ meta: {
3297
+ timingsMs: {
3298
+ embed: Math.round(embedMs),
3299
+ vector: Math.round(vectorMs),
3300
+ rerank: Math.round(rerankMs),
3301
+ total: Math.round(hrTimeMs(totalStart))
3302
+ },
3303
+ usedRerank,
3304
+ modelId: this.config.embeddings.model
3305
+ }
3306
+ };
3307
+ }
3308
+ async *searchStreaming(request) {
3309
+ const parsed = requestSchema.safeParse(request);
3310
+ if (!parsed.success) {
3311
+ throw new SearchSocketError("INVALID_REQUEST", parsed.error.issues[0]?.message ?? "Invalid request", 400);
3312
+ }
3313
+ const input = parsed.data;
3314
+ const wantsRerank = Boolean(input.rerank);
3315
+ if (!wantsRerank) {
3316
+ const response = await this.search(request);
3317
+ yield { phase: "initial", data: response };
3318
+ return;
3319
+ }
3320
+ const totalStart = process.hrtime.bigint();
3321
+ const resolvedScope = resolveScope(this.config, input.scope);
3322
+ await this.assertModelCompatibility(resolvedScope);
3323
+ const topK = input.topK ?? 10;
3324
+ const groupByPage = (input.groupBy ?? "page") === "page";
3325
+ const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
3326
+ const embedStart = process.hrtime.bigint();
3327
+ const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
3328
+ const queryVector = queryEmbeddings[0];
3329
+ if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
3330
+ throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
3331
+ }
3332
+ const embedMs = hrTimeMs(embedStart);
3333
+ const vectorStart = process.hrtime.bigint();
3334
+ const hits = await this.vectorStore.query(
3335
+ queryVector,
3336
+ {
3337
+ topK: candidateK,
3338
+ pathPrefix: input.pathPrefix,
3339
+ tags: input.tags
3340
+ },
3341
+ resolvedScope
3342
+ );
3343
+ const vectorMs = hrTimeMs(vectorStart);
3344
+ const ranked = rankHits(hits, this.config);
3345
+ const initialResults = this.buildResults(ranked, topK, groupByPage);
3346
+ yield {
3347
+ phase: "initial",
3348
+ data: {
3349
+ q: input.q,
3350
+ scope: resolvedScope.scopeName,
3351
+ results: initialResults,
3352
+ meta: {
3353
+ timingsMs: {
3354
+ embed: Math.round(embedMs),
3355
+ vector: Math.round(vectorMs),
3356
+ rerank: 0,
3357
+ total: Math.round(hrTimeMs(totalStart))
3358
+ },
3359
+ usedRerank: false,
3360
+ modelId: this.config.embeddings.model
3361
+ }
3362
+ }
3363
+ };
3364
+ const rerankStart = process.hrtime.bigint();
3365
+ const reranked = await this.rerankHits(input.q, ranked, topK);
3366
+ const rerankMs = hrTimeMs(rerankStart);
3367
+ const rerankedResults = this.buildResults(reranked, topK, groupByPage);
3368
+ yield {
3369
+ phase: "reranked",
3370
+ data: {
3371
+ q: input.q,
3372
+ scope: resolvedScope.scopeName,
3373
+ results: rerankedResults,
3374
+ meta: {
3375
+ timingsMs: {
3376
+ embed: Math.round(embedMs),
3377
+ vector: Math.round(vectorMs),
3378
+ rerank: Math.round(rerankMs),
3379
+ total: Math.round(hrTimeMs(totalStart))
3380
+ },
3381
+ usedRerank: true,
3382
+ modelId: this.config.embeddings.model
3383
+ }
3384
+ }
3385
+ };
3386
+ }
3387
+ buildResults(ordered, topK, groupByPage) {
3114
3388
  const minScore = this.config.ranking.minScore;
3115
3389
  if (groupByPage) {
3116
3390
  let pages = aggregateByPage(ordered, this.config);
@@ -3118,10 +3392,10 @@ var SearchEngine = class _SearchEngine {
3118
3392
  pages = pages.filter((p) => p.pageScore >= minScore);
3119
3393
  }
3120
3394
  const minRatio = this.config.ranking.minChunkScoreRatio;
3121
- results = pages.slice(0, topK).map((page) => {
3395
+ return pages.slice(0, topK).map((page) => {
3122
3396
  const bestScore = page.bestChunk.finalScore;
3123
- const minScore2 = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
3124
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore2).slice(0, 5);
3397
+ const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
3398
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
3125
3399
  return {
3126
3400
  url: page.url,
3127
3401
  title: page.title,
@@ -3138,10 +3412,11 @@ var SearchEngine = class _SearchEngine {
3138
3412
  };
3139
3413
  });
3140
3414
  } else {
3415
+ let filtered = ordered;
3141
3416
  if (minScore > 0) {
3142
- ordered = ordered.filter((entry) => entry.finalScore >= minScore);
3417
+ filtered = ordered.filter((entry) => entry.finalScore >= minScore);
3143
3418
  }
3144
- results = ordered.slice(0, topK).map(({ hit, finalScore }) => ({
3419
+ return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
3145
3420
  url: hit.metadata.url,
3146
3421
  title: hit.metadata.title,
3147
3422
  sectionTitle: hit.metadata.sectionTitle || void 0,
@@ -3150,21 +3425,6 @@ var SearchEngine = class _SearchEngine {
3150
3425
  routeFile: hit.metadata.routeFile
3151
3426
  }));
3152
3427
  }
3153
- return {
3154
- q: input.q,
3155
- scope: resolvedScope.scopeName,
3156
- results,
3157
- meta: {
3158
- timingsMs: {
3159
- embed: Math.round(embedMs),
3160
- vector: Math.round(vectorMs),
3161
- rerank: Math.round(rerankMs),
3162
- total: Math.round(hrTimeMs(totalStart))
3163
- },
3164
- usedRerank,
3165
- modelId: this.config.embeddings.model
3166
- }
3167
- };
3168
3428
  }
3169
3429
  async getPage(pathOrUrl, scope) {
3170
3430
  const resolvedScope = resolveScope(this.config, scope);
@@ -3236,6 +3496,7 @@ var SearchEngine = class _SearchEngine {
3236
3496
  const MAX_CHUNKS_PER_PAGE = 5;
3237
3497
  const MIN_CHUNKS_PER_PAGE = 1;
3238
3498
  const MIN_CHUNK_SCORE_RATIO = 0.5;
3499
+ const MAX_DOC_CHARS = 2e3;
3239
3500
  const pageCandidates = [];
3240
3501
  for (const [url, chunks] of pageGroups) {
3241
3502
  const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
@@ -3255,12 +3516,18 @@ var SearchEngine = class _SearchEngine {
3255
3516
  }
3256
3517
  const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
3257
3518
  parts.push(body);
3258
- pageCandidates.push({ id: url, text: parts.join("\n\n") });
3519
+ let text = parts.join("\n\n");
3520
+ if (text.length > MAX_DOC_CHARS) {
3521
+ text = text.slice(0, MAX_DOC_CHARS);
3522
+ }
3523
+ pageCandidates.push({ id: url, text });
3259
3524
  }
3525
+ const maxCandidates = Math.max(topK, this.config.rerank.topN);
3526
+ const cappedCandidates = pageCandidates.slice(0, maxCandidates);
3260
3527
  const reranked = await this.reranker.rerank(
3261
3528
  query,
3262
- pageCandidates,
3263
- Math.max(topK, this.config.rerank.topN)
3529
+ cappedCandidates,
3530
+ maxCandidates
3264
3531
  );
3265
3532
  const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
3266
3533
  return ranked.map((entry) => {
@@ -3282,7 +3549,7 @@ var SearchEngine = class _SearchEngine {
3282
3549
  };
3283
3550
 
3284
3551
  // src/mcp/server.ts
3285
- function createServer(engine) {
3552
+ function createServer(engine, config) {
3286
3553
  const server = new McpServer({
3287
3554
  name: "searchsocket-mcp",
3288
3555
  version: "0.1.0"
@@ -3290,14 +3557,15 @@ function createServer(engine) {
3290
3557
  server.registerTool(
3291
3558
  "search",
3292
3559
  {
3293
- description: "Semantic site search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, and topK.",
3560
+ description: "Semantic site search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and rerank. Enable rerank for better relevance on natural-language queries.",
3294
3561
  inputSchema: {
3295
3562
  query: z3.string().min(1),
3296
3563
  scope: z3.string().optional(),
3297
3564
  topK: z3.number().int().positive().max(100).optional(),
3298
3565
  pathPrefix: z3.string().optional(),
3299
3566
  tags: z3.array(z3.string()).optional(),
3300
- groupBy: z3.enum(["page", "chunk"]).optional()
3567
+ groupBy: z3.enum(["page", "chunk"]).optional(),
3568
+ rerank: z3.boolean().optional().describe("Enable reranking for better relevance (uses Jina Reranker). Defaults to true when rerank is enabled in config.")
3301
3569
  }
3302
3570
  },
3303
3571
  async (input) => {
@@ -3307,7 +3575,8 @@ function createServer(engine) {
3307
3575
  scope: input.scope,
3308
3576
  pathPrefix: input.pathPrefix,
3309
3577
  tags: input.tags,
3310
- groupBy: input.groupBy
3578
+ groupBy: input.groupBy,
3579
+ rerank: input.rerank ?? config.rerank.enabled
3311
3580
  });
3312
3581
  return {
3313
3582
  content: [
@@ -3435,10 +3704,10 @@ async function runMcpServer(options = {}) {
3435
3704
  config
3436
3705
  });
3437
3706
  if (resolvedTransport === "http") {
3438
- await startHttpServer(() => createServer(engine), config, options);
3707
+ await startHttpServer(() => createServer(engine, config), config, options);
3439
3708
  return;
3440
3709
  }
3441
- const server = createServer(engine);
3710
+ const server = createServer(engine, config);
3442
3711
  const stdioTransport = new StdioServerTransport();
3443
3712
  await server.connect(stdioTransport);
3444
3713
  }
@@ -3507,7 +3776,7 @@ function collectWatchPaths(config, cwd) {
3507
3776
  const paths = ["src/routes/**"];
3508
3777
  if (config.source.mode === "content-files" && config.source.contentFiles) {
3509
3778
  for (const pattern of config.source.contentFiles.globs) {
3510
- paths.push(path13.join(config.source.contentFiles.baseDir, pattern));
3779
+ paths.push(path14.join(config.source.contentFiles.baseDir, pattern));
3511
3780
  }
3512
3781
  }
3513
3782
  if (config.source.mode === "static-output") {
@@ -3520,15 +3789,15 @@ function collectWatchPaths(config, cwd) {
3520
3789
  paths.push("searchsocket.config.ts");
3521
3790
  paths.push(config.source.build.outputDir);
3522
3791
  }
3523
- return paths.map((value) => path13.resolve(cwd, value));
3792
+ return paths.map((value) => path14.resolve(cwd, value));
3524
3793
  }
3525
3794
  function ensureStateDir(cwd) {
3526
- const target = path13.join(cwd, ".searchsocket");
3527
- fs9.mkdirSync(target, { recursive: true });
3795
+ const target = path14.join(cwd, ".searchsocket");
3796
+ fs10.mkdirSync(target, { recursive: true });
3528
3797
  return target;
3529
3798
  }
3530
3799
  function ensureGitignore(cwd) {
3531
- const gitignorePath = path13.join(cwd, ".gitignore");
3800
+ const gitignorePath = path14.join(cwd, ".gitignore");
3532
3801
  const entries = [
3533
3802
  ".searchsocket/vectors.db",
3534
3803
  ".searchsocket/vectors.db-shm",
@@ -3537,8 +3806,8 @@ function ensureGitignore(cwd) {
3537
3806
  ".searchsocket/registry.json"
3538
3807
  ];
3539
3808
  let content = "";
3540
- if (fs9.existsSync(gitignorePath)) {
3541
- content = fs9.readFileSync(gitignorePath, "utf8");
3809
+ if (fs10.existsSync(gitignorePath)) {
3810
+ content = fs10.readFileSync(gitignorePath, "utf8");
3542
3811
  }
3543
3812
  const lines = content.split("\n");
3544
3813
  const missing = entries.filter((entry) => !lines.some((line) => line.trim() === entry));
@@ -3549,10 +3818,10 @@ function ensureGitignore(cwd) {
3549
3818
  # SearchSocket local state
3550
3819
  ${missing.join("\n")}
3551
3820
  `;
3552
- fs9.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
3821
+ fs10.writeFileSync(gitignorePath, content.trimEnd() + block, "utf8");
3553
3822
  }
3554
3823
  function readScopesFromFile(filePath) {
3555
- const raw = fs9.readFileSync(filePath, "utf8");
3824
+ const raw = fs10.readFileSync(filePath, "utf8");
3556
3825
  return new Set(
3557
3826
  raw.split(/\r?\n/).map((line) => line.trim()).filter(Boolean)
3558
3827
  );
@@ -3576,8 +3845,8 @@ function readRemoteGitBranches(cwd) {
3576
3845
  }
3577
3846
  }
3578
3847
  async function loadResolvedConfigForDev(cwd, configPath) {
3579
- const resolvedConfigPath = path13.resolve(cwd, configPath ?? "searchsocket.config.ts");
3580
- if (fs9.existsSync(resolvedConfigPath)) {
3848
+ const resolvedConfigPath = path14.resolve(cwd, configPath ?? "searchsocket.config.ts");
3849
+ if (fs10.existsSync(resolvedConfigPath)) {
3581
3850
  return loadConfig({ cwd, configPath });
3582
3851
  }
3583
3852
  return mergeConfig(cwd, {});
@@ -3624,7 +3893,7 @@ var program = new Command();
3624
3893
  program.name("searchsocket").description("Semantic site search and MCP retrieval for SvelteKit").version(package_default.version).option("-C, --cwd <path>", "working directory", process.cwd()).option("--config <path>", "config path (defaults to searchsocket.config.ts)");
3625
3894
  program.command("init").description("Create searchsocket.config.ts and .searchsocket state directory").action(async (_opts, command) => {
3626
3895
  const root = getRootOptions(command).cwd ?? process.cwd();
3627
- const cwd = path13.resolve(root);
3896
+ const cwd = path14.resolve(root);
3628
3897
  const configPath = writeMinimalConfig(cwd);
3629
3898
  const stateDir = ensureStateDir(cwd);
3630
3899
  ensureGitignore(cwd);
@@ -3644,13 +3913,13 @@ program.command("init").description("Create searchsocket.config.ts and .searchso
3644
3913
  });
3645
3914
  program.command("index").description("Index site content into markdown mirror + vector store").option("--scope <name>", "scope override").option("--changed-only", "only process changed chunks", true).option("--no-changed-only", "re-index regardless of previous manifest").option("--force", "force full mirror rebuild and re-upsert", false).option("--dry-run", "compute plan and cost, no API writes", false).option("--source <mode>", "source mode override: static-output|crawl|content-files|build").option("--max-pages <n>", "limit pages processed").option("--max-chunks <n>", "limit chunks processed").option("--quiet", "suppress all output except errors and warnings", false).option("--verbose", "verbose output", false).option("--json", "emit JSON logs and summary", false).action(async (opts, command) => {
3646
3915
  const rootOpts = getRootOptions(command);
3647
- const cwd = path13.resolve(rootOpts?.cwd ?? process.cwd());
3916
+ const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3648
3917
  await runIndexCommand({
3649
3918
  cwd,
3650
3919
  configPath: rootOpts?.config,
3651
3920
  scope: opts.scope,
3652
3921
  changedOnly: opts.changedOnly,
3653
- force: opts.force,
3922
+ force: opts.force || /^(1|true|yes)$/i.test(process.env.SEARCHSOCKET_FORCE_REINDEX ?? ""),
3654
3923
  dryRun: opts.dryRun,
3655
3924
  source: opts.source,
3656
3925
  maxPages: opts.maxPages ? parsePositiveInt(opts.maxPages, "--max-pages") : void 0,
@@ -3662,7 +3931,7 @@ program.command("index").description("Index site content into markdown mirror +
3662
3931
  });
3663
3932
  program.command("status").description("Show scope, indexing state, backend health, and recent cost estimate").option("--scope <name>", "scope override").action(async (opts, command) => {
3664
3933
  const rootOpts = getRootOptions(command);
3665
- const cwd = path13.resolve(rootOpts?.cwd ?? process.cwd());
3934
+ const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3666
3935
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3667
3936
  const scope = resolveScope(config, opts.scope);
3668
3937
  let vectorStore;
@@ -3740,7 +4009,7 @@ program.command("status").description("Show scope, indexing state, backend healt
3740
4009
  });
3741
4010
  program.command("dev").description("Watch content files/routes and incrementally reindex on changes").option("--scope <name>", "scope override").option("--mcp", "start MCP server (http transport) alongside watcher", false).option("--mcp-port <n>", "MCP HTTP port", "3338").option("--mcp-path <path>", "MCP HTTP path", "/mcp").option("--verbose", "verbose logs", false).action(async (opts, command) => {
3742
4011
  const rootOpts = getRootOptions(command);
3743
- const cwd = path13.resolve(rootOpts?.cwd ?? process.cwd());
4012
+ const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3744
4013
  const config = await loadResolvedConfigForDev(cwd, rootOpts?.config);
3745
4014
  const watchPaths = collectWatchPaths(config, cwd);
3746
4015
  process.stdout.write("starting searchsocket dev watcher...\n");
@@ -3809,10 +4078,10 @@ ${watchPaths.map((entry) => ` - ${entry}`).join("\n")}
3809
4078
  });
3810
4079
  program.command("clean").description("Delete local state and optionally delete remote vectors for a scope").option("--scope <name>", "scope override").option("--remote", "delete remote scope vectors", false).action(async (opts, command) => {
3811
4080
  const rootOpts = getRootOptions(command);
3812
- const cwd = path13.resolve(rootOpts?.cwd ?? process.cwd());
4081
+ const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3813
4082
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3814
4083
  const scope = resolveScope(config, opts.scope);
3815
- const statePath = path13.join(cwd, config.state.dir);
4084
+ const statePath = path14.join(cwd, config.state.dir);
3816
4085
  await fsp.rm(statePath, { recursive: true, force: true });
3817
4086
  process.stdout.write(`deleted local state directory: ${statePath}
3818
4087
  `);
@@ -3825,7 +4094,7 @@ program.command("clean").description("Delete local state and optionally delete r
3825
4094
  });
3826
4095
  program.command("prune").description("List/delete stale scopes (dry-run by default)").option("--apply", "apply deletions", false).option("--scopes-file <path>", "file containing active scopes").option("--older-than <duration>", "ttl cutoff like 30d").action(async (opts, command) => {
3827
4096
  const rootOpts = getRootOptions(command);
3828
- const cwd = path13.resolve(rootOpts?.cwd ?? process.cwd());
4097
+ const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3829
4098
  const config = await loadConfig({ cwd, configPath: rootOpts?.config });
3830
4099
  const baseScope = resolveScope(config);
3831
4100
  let vectorStore;
@@ -3845,7 +4114,7 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
3845
4114
  `);
3846
4115
  let keepScopes = /* @__PURE__ */ new Set();
3847
4116
  if (opts.scopesFile) {
3848
- keepScopes = readScopesFromFile(path13.resolve(cwd, opts.scopesFile));
4117
+ keepScopes = readScopesFromFile(path14.resolve(cwd, opts.scopesFile));
3849
4118
  } else {
3850
4119
  keepScopes = readRemoteGitBranches(cwd);
3851
4120
  }
@@ -3916,7 +4185,7 @@ program.command("prune").description("List/delete stale scopes (dry-run by defau
3916
4185
  });
3917
4186
  program.command("doctor").description("Validate config, env vars, provider connectivity, and local write access").action(async (_opts, command) => {
3918
4187
  const rootOpts = getRootOptions(command);
3919
- const cwd = path13.resolve(rootOpts?.cwd ?? process.cwd());
4188
+ const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
3920
4189
  const checks = [];
3921
4190
  let config = null;
3922
4191
  try {
@@ -3945,8 +4214,8 @@ program.command("doctor").description("Validate config, env vars, provider conne
3945
4214
  });
3946
4215
  }
3947
4216
  if (config.source.mode === "static-output") {
3948
- const outputDir = path13.resolve(cwd, config.source.staticOutputDir);
3949
- const exists = fs9.existsSync(outputDir);
4217
+ const outputDir = path14.resolve(cwd, config.source.staticOutputDir);
4218
+ const exists = fs10.existsSync(outputDir);
3950
4219
  checks.push({
3951
4220
  name: "source: static output dir",
3952
4221
  ok: exists,
@@ -3955,15 +4224,15 @@ program.command("doctor").description("Validate config, env vars, provider conne
3955
4224
  } else if (config.source.mode === "build") {
3956
4225
  const buildConfig = config.source.build;
3957
4226
  if (buildConfig) {
3958
- const manifestPath = path13.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
3959
- const manifestExists = fs9.existsSync(manifestPath);
4227
+ const manifestPath = path14.resolve(cwd, buildConfig.outputDir, "server", "manifest-full.js");
4228
+ const manifestExists = fs10.existsSync(manifestPath);
3960
4229
  checks.push({
3961
4230
  name: "source: build manifest",
3962
4231
  ok: manifestExists,
3963
4232
  details: manifestExists ? manifestPath : `${manifestPath} not found (run \`vite build\` first)`
3964
4233
  });
3965
- const viteBin = path13.resolve(cwd, "node_modules", ".bin", "vite");
3966
- const viteExists = fs9.existsSync(viteBin);
4234
+ const viteBin = path14.resolve(cwd, "node_modules", ".bin", "vite");
4235
+ const viteExists = fs10.existsSync(viteBin);
3967
4236
  checks.push({
3968
4237
  name: "source: vite binary",
3969
4238
  ok: viteExists,
@@ -3980,7 +4249,7 @@ program.command("doctor").description("Validate config, env vars, provider conne
3980
4249
  const contentConfig = config.source.contentFiles;
3981
4250
  if (contentConfig) {
3982
4251
  const fg4 = await import("fast-glob");
3983
- const baseDir = path13.resolve(cwd, contentConfig.baseDir);
4252
+ const baseDir = path14.resolve(cwd, contentConfig.baseDir);
3984
4253
  const files = await fg4.default(contentConfig.globs, { cwd: baseDir, onlyFiles: true });
3985
4254
  checks.push({
3986
4255
  name: "source: content files",
@@ -4049,7 +4318,7 @@ program.command("doctor").description("Validate config, env vars, provider conne
4049
4318
  try {
4050
4319
  const scope = resolveScope(config);
4051
4320
  const { statePath } = ensureStateDirs(cwd, config.state.dir, scope);
4052
- const testPath = path13.join(statePath, ".write-test");
4321
+ const testPath = path14.join(statePath, ".write-test");
4053
4322
  await fsp.writeFile(testPath, "ok\n", "utf8");
4054
4323
  await fsp.rm(testPath, { force: true });
4055
4324
  checks.push({ name: "state directory writable", ok: true });
@@ -4078,7 +4347,7 @@ program.command("doctor").description("Validate config, env vars, provider conne
4078
4347
  });
4079
4348
  program.command("mcp").description("Run SearchSocket MCP server").option("--transport <transport>", "stdio|http", "stdio").option("--port <n>", "HTTP port", "3338").option("--path <path>", "HTTP path", "/mcp").action(async (opts, command) => {
4080
4349
  const rootOpts = getRootOptions(command);
4081
- const cwd = path13.resolve(rootOpts?.cwd ?? process.cwd());
4350
+ const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
4082
4351
  await runMcpServer({
4083
4352
  cwd,
4084
4353
  configPath: rootOpts?.config,
@@ -4089,7 +4358,7 @@ program.command("mcp").description("Run SearchSocket MCP server").option("--tran
4089
4358
  });
4090
4359
  program.command("search").description("Quick local CLI search against indexed vectors").requiredOption("--q <query>", "search query").option("--scope <name>", "scope override").option("--top-k <n>", "top K results", "10").option("--path-prefix <prefix>", "path prefix filter").option("--rerank", "enable configured reranker", false).action(async (opts, command) => {
4091
4360
  const rootOpts = getRootOptions(command);
4092
- const cwd = path13.resolve(rootOpts?.cwd ?? process.cwd());
4361
+ const cwd = path14.resolve(rootOpts?.cwd ?? process.cwd());
4093
4362
  const engine = await SearchEngine.create({
4094
4363
  cwd,
4095
4364
  configPath: rootOpts?.config
@@ -4105,7 +4374,7 @@ program.command("search").description("Quick local CLI search against indexed ve
4105
4374
  `);
4106
4375
  });
4107
4376
  async function main() {
4108
- dotenvConfig({ path: path13.resolve(process.cwd(), ".env") });
4377
+ dotenvConfig({ path: path14.resolve(process.cwd(), ".env") });
4109
4378
  await program.parseAsync(process.argv);
4110
4379
  }
4111
4380
  main().catch((error) => {