searchsocket 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3,18 +3,20 @@ import path from 'path';
3
3
  import { createJiti } from 'jiti';
4
4
  import { z } from 'zod';
5
5
  import { execSync, spawn } from 'child_process';
6
- import { createHash } from 'crypto';
6
+ import { FusionAlgorithm, QueryMode } from '@upstash/vector';
7
+ import { timingSafeEqual, createHash } from 'crypto';
7
8
  import { load } from 'cheerio';
8
9
  import matter from 'gray-matter';
9
10
  import fg from 'fast-glob';
10
11
  import pLimit from 'p-limit';
11
- import fs3 from 'fs/promises';
12
+ import fs8 from 'fs/promises';
12
13
  import net from 'net';
13
14
  import { gunzipSync } from 'zlib';
14
15
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
15
16
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
16
17
  import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
17
18
  import { createMcpExpressApp } from '@modelcontextprotocol/sdk/server/express.js';
19
+ import { WebStandardStreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js';
18
20
 
19
21
  var __getOwnPropNames = Object.getOwnPropertyNames;
20
22
  var __commonJS = (cb, mod) => function __require() {
@@ -5013,32 +5015,32 @@ var require_URL = __commonJS({
5013
5015
  else
5014
5016
  return basepath.substring(0, lastslash + 1) + refpath;
5015
5017
  }
5016
- function remove_dot_segments(path13) {
5017
- if (!path13) return path13;
5018
+ function remove_dot_segments(path14) {
5019
+ if (!path14) return path14;
5018
5020
  var output = "";
5019
- while (path13.length > 0) {
5020
- if (path13 === "." || path13 === "..") {
5021
- path13 = "";
5021
+ while (path14.length > 0) {
5022
+ if (path14 === "." || path14 === "..") {
5023
+ path14 = "";
5022
5024
  break;
5023
5025
  }
5024
- var twochars = path13.substring(0, 2);
5025
- var threechars = path13.substring(0, 3);
5026
- var fourchars = path13.substring(0, 4);
5026
+ var twochars = path14.substring(0, 2);
5027
+ var threechars = path14.substring(0, 3);
5028
+ var fourchars = path14.substring(0, 4);
5027
5029
  if (threechars === "../") {
5028
- path13 = path13.substring(3);
5030
+ path14 = path14.substring(3);
5029
5031
  } else if (twochars === "./") {
5030
- path13 = path13.substring(2);
5032
+ path14 = path14.substring(2);
5031
5033
  } else if (threechars === "/./") {
5032
- path13 = "/" + path13.substring(3);
5033
- } else if (twochars === "/." && path13.length === 2) {
5034
- path13 = "/";
5035
- } else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
5036
- path13 = "/" + path13.substring(4);
5034
+ path14 = "/" + path14.substring(3);
5035
+ } else if (twochars === "/." && path14.length === 2) {
5036
+ path14 = "/";
5037
+ } else if (fourchars === "/../" || threechars === "/.." && path14.length === 3) {
5038
+ path14 = "/" + path14.substring(4);
5037
5039
  output = output.replace(/\/?[^\/]*$/, "");
5038
5040
  } else {
5039
- var segment = path13.match(/(\/?([^\/]*))/)[0];
5041
+ var segment = path14.match(/(\/?([^\/]*))/)[0];
5040
5042
  output += segment;
5041
- path13 = path13.substring(segment.length);
5043
+ path14 = path14.substring(segment.length);
5042
5044
  }
5043
5045
  }
5044
5046
  return output;
@@ -16634,6 +16636,7 @@ var searchSocketConfigSchema = z.object({
16634
16636
  dropSelectors: z.array(z.string()).optional(),
16635
16637
  ignoreAttr: z.string().optional(),
16636
16638
  noindexAttr: z.string().optional(),
16639
+ imageDescAttr: z.string().optional(),
16637
16640
  respectRobotsNoindex: z.boolean().optional()
16638
16641
  }).optional(),
16639
16642
  transform: z.object({
@@ -16649,35 +16652,48 @@ var searchSocketConfigSchema = z.object({
16649
16652
  headingPathDepth: z.number().int().positive().optional(),
16650
16653
  dontSplitInside: z.array(z.enum(["code", "table", "blockquote"])).optional(),
16651
16654
  prependTitle: z.boolean().optional(),
16652
- pageSummaryChunk: z.boolean().optional()
16655
+ pageSummaryChunk: z.boolean().optional(),
16656
+ weightHeadings: z.boolean().optional()
16653
16657
  }).optional(),
16654
16658
  upstash: z.object({
16655
16659
  url: z.string().url().optional(),
16656
16660
  token: z.string().min(1).optional(),
16657
16661
  urlEnv: z.string().min(1).optional(),
16658
- tokenEnv: z.string().min(1).optional()
16662
+ tokenEnv: z.string().min(1).optional(),
16663
+ namespaces: z.object({
16664
+ pages: z.string().min(1).optional(),
16665
+ chunks: z.string().min(1).optional()
16666
+ }).optional()
16667
+ }).optional(),
16668
+ embedding: z.object({
16669
+ model: z.string().optional(),
16670
+ dimensions: z.number().int().positive().optional(),
16671
+ taskType: z.string().optional(),
16672
+ batchSize: z.number().int().positive().optional()
16659
16673
  }).optional(),
16660
16674
  search: z.object({
16661
- semanticWeight: z.number().min(0).max(1).optional(),
16662
- inputEnrichment: z.boolean().optional(),
16663
- reranking: z.boolean().optional(),
16664
16675
  dualSearch: z.boolean().optional(),
16665
16676
  pageSearchWeight: z.number().min(0).max(1).optional()
16666
16677
  }).optional(),
16667
16678
  ranking: z.object({
16668
16679
  enableIncomingLinkBoost: z.boolean().optional(),
16669
16680
  enableDepthBoost: z.boolean().optional(),
16681
+ enableFreshnessBoost: z.boolean().optional(),
16682
+ freshnessDecayRate: z.number().positive().optional(),
16683
+ enableAnchorTextBoost: z.boolean().optional(),
16670
16684
  pageWeights: z.record(z.string(), z.number().min(0)).optional(),
16671
16685
  aggregationCap: z.number().int().positive().optional(),
16672
16686
  aggregationDecay: z.number().min(0).max(1).optional(),
16673
16687
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
16674
- minScore: z.number().min(0).max(1).optional(),
16688
+ minScoreRatio: z.number().min(0).max(1).optional(),
16675
16689
  scoreGapThreshold: z.number().min(0).max(1).optional(),
16676
16690
  weights: z.object({
16677
16691
  incomingLinks: z.number().optional(),
16678
16692
  depth: z.number().optional(),
16679
16693
  aggregation: z.number().optional(),
16680
- titleMatch: z.number().optional()
16694
+ titleMatch: z.number().optional(),
16695
+ freshness: z.number().optional(),
16696
+ anchorText: z.number().optional()
16681
16697
  }).optional()
16682
16698
  }).optional(),
16683
16699
  api: z.object({
@@ -16692,12 +16708,28 @@ var searchSocketConfigSchema = z.object({
16692
16708
  }).optional(),
16693
16709
  mcp: z.object({
16694
16710
  enable: z.boolean().optional(),
16711
+ access: z.enum(["public", "private"]).optional(),
16695
16712
  transport: z.enum(["stdio", "http"]).optional(),
16696
16713
  http: z.object({
16697
16714
  port: z.number().int().positive().optional(),
16698
- path: z.string().optional()
16715
+ path: z.string().optional(),
16716
+ apiKey: z.string().min(1).optional(),
16717
+ apiKeyEnv: z.string().min(1).optional()
16718
+ }).optional(),
16719
+ handle: z.object({
16720
+ path: z.string().optional(),
16721
+ apiKey: z.string().min(1).optional(),
16722
+ enableJsonResponse: z.boolean().optional()
16699
16723
  }).optional()
16700
16724
  }).optional(),
16725
+ llmsTxt: z.object({
16726
+ enable: z.boolean().optional(),
16727
+ outputPath: z.string().optional(),
16728
+ title: z.string().optional(),
16729
+ description: z.string().optional(),
16730
+ generateFull: z.boolean().optional(),
16731
+ serveMarkdownVariants: z.boolean().optional()
16732
+ }).optional(),
16701
16733
  state: z.object({
16702
16734
  dir: z.string().optional()
16703
16735
  }).optional()
@@ -16736,6 +16768,7 @@ function createDefaultConfig(projectId) {
16736
16768
  dropSelectors: DEFAULT_DROP_SELECTORS,
16737
16769
  ignoreAttr: "data-search-ignore",
16738
16770
  noindexAttr: "data-search-noindex",
16771
+ imageDescAttr: "data-search-description",
16739
16772
  respectRobotsNoindex: true
16740
16773
  },
16741
16774
  transform: {
@@ -16745,39 +16778,52 @@ function createDefaultConfig(projectId) {
16745
16778
  },
16746
16779
  chunking: {
16747
16780
  strategy: "hybrid",
16748
- maxChars: 2200,
16781
+ maxChars: 1500,
16749
16782
  overlapChars: 200,
16750
16783
  minChars: 250,
16751
16784
  headingPathDepth: 3,
16752
16785
  dontSplitInside: ["code", "table", "blockquote"],
16753
16786
  prependTitle: true,
16754
- pageSummaryChunk: true
16787
+ pageSummaryChunk: true,
16788
+ weightHeadings: true
16755
16789
  },
16756
16790
  upstash: {
16757
- urlEnv: "UPSTASH_SEARCH_REST_URL",
16758
- tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
16791
+ urlEnv: "UPSTASH_VECTOR_REST_URL",
16792
+ tokenEnv: "UPSTASH_VECTOR_REST_TOKEN",
16793
+ namespaces: {
16794
+ pages: "pages",
16795
+ chunks: "chunks"
16796
+ }
16797
+ },
16798
+ embedding: {
16799
+ model: "bge-large-en-v1.5",
16800
+ dimensions: 1024,
16801
+ taskType: "RETRIEVAL_DOCUMENT",
16802
+ batchSize: 100
16759
16803
  },
16760
16804
  search: {
16761
- semanticWeight: 0.75,
16762
- inputEnrichment: true,
16763
- reranking: true,
16764
16805
  dualSearch: true,
16765
16806
  pageSearchWeight: 0.3
16766
16807
  },
16767
16808
  ranking: {
16768
16809
  enableIncomingLinkBoost: true,
16769
16810
  enableDepthBoost: true,
16811
+ enableFreshnessBoost: false,
16812
+ freshnessDecayRate: 1e-3,
16813
+ enableAnchorTextBoost: false,
16770
16814
  pageWeights: {},
16771
16815
  aggregationCap: 5,
16772
16816
  aggregationDecay: 0.5,
16773
16817
  minChunkScoreRatio: 0.5,
16774
- minScore: 0.3,
16818
+ minScoreRatio: 0.7,
16775
16819
  scoreGapThreshold: 0.4,
16776
16820
  weights: {
16777
16821
  incomingLinks: 0.05,
16778
16822
  depth: 0.03,
16779
16823
  aggregation: 0.1,
16780
- titleMatch: 0.15
16824
+ titleMatch: 0.15,
16825
+ freshness: 0.1,
16826
+ anchorText: 0.1
16781
16827
  }
16782
16828
  },
16783
16829
  api: {
@@ -16788,12 +16834,23 @@ function createDefaultConfig(projectId) {
16788
16834
  },
16789
16835
  mcp: {
16790
16836
  enable: process.env.NODE_ENV !== "production",
16837
+ access: "private",
16791
16838
  transport: "stdio",
16792
16839
  http: {
16793
16840
  port: 3338,
16794
16841
  path: "/mcp"
16842
+ },
16843
+ handle: {
16844
+ path: "/api/mcp",
16845
+ enableJsonResponse: true
16795
16846
  }
16796
16847
  },
16848
+ llmsTxt: {
16849
+ enable: false,
16850
+ outputPath: "static/llms.txt",
16851
+ generateFull: true,
16852
+ serveMarkdownVariants: false
16853
+ },
16797
16854
  state: {
16798
16855
  dir: ".searchsocket"
16799
16856
  }
@@ -16921,7 +16978,15 @@ ${issues}`
16921
16978
  },
16922
16979
  upstash: {
16923
16980
  ...defaults.upstash,
16924
- ...parsed.upstash
16981
+ ...parsed.upstash,
16982
+ namespaces: {
16983
+ ...defaults.upstash.namespaces,
16984
+ ...parsed.upstash?.namespaces
16985
+ }
16986
+ },
16987
+ embedding: {
16988
+ ...defaults.embedding,
16989
+ ...parsed.embedding
16925
16990
  },
16926
16991
  search: {
16927
16992
  ...defaults.search,
@@ -16958,8 +17023,16 @@ ${issues}`
16958
17023
  http: {
16959
17024
  ...defaults.mcp.http,
16960
17025
  ...parsed.mcp?.http
17026
+ },
17027
+ handle: {
17028
+ ...defaults.mcp.handle,
17029
+ ...parsed.mcp?.handle
16961
17030
  }
16962
17031
  },
17032
+ llmsTxt: {
17033
+ ...defaults.llmsTxt,
17034
+ ...parsed.llmsTxt
17035
+ },
16963
17036
  state: {
16964
17037
  ...defaults.state,
16965
17038
  ...parsed.state
@@ -16979,6 +17052,15 @@ ${issues}`
16979
17052
  maxDepth: 10
16980
17053
  };
16981
17054
  }
17055
+ if (merged.mcp.access === "public") {
17056
+ const resolvedKey = merged.mcp.http.apiKey ?? (merged.mcp.http.apiKeyEnv ? process.env[merged.mcp.http.apiKeyEnv] : void 0);
17057
+ if (!resolvedKey) {
17058
+ throw new SearchSocketError(
17059
+ "CONFIG_MISSING",
17060
+ '`mcp.access` is "public" but no API key is configured. Set `mcp.http.apiKey` or `mcp.http.apiKeyEnv`.'
17061
+ );
17062
+ }
17063
+ }
16982
17064
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
16983
17065
  throw new SearchSocketError("CONFIG_MISSING", "`source.crawl.baseUrl` is required when source.mode is crawl.");
16984
17066
  }
@@ -17042,13 +17124,84 @@ function normalizeMarkdown(input) {
17042
17124
  function sanitizeScopeName(scopeName) {
17043
17125
  return scopeName.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
17044
17126
  }
17127
+ function markdownToPlain(markdown) {
17128
+ return markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17129
+ }
17045
17130
  function toSnippet(markdown, maxLen = 220) {
17046
- const plain = markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17131
+ const plain = markdownToPlain(markdown);
17047
17132
  if (plain.length <= maxLen) {
17048
17133
  return plain;
17049
17134
  }
17050
17135
  return `${plain.slice(0, Math.max(0, maxLen - 1)).trim()}\u2026`;
17051
17136
  }
17137
+ function queryAwareExcerpt(markdown, query, maxLen = 220) {
17138
+ const plain = markdownToPlain(markdown);
17139
+ if (plain.length <= maxLen) return plain;
17140
+ const tokens = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
17141
+ if (tokens.length === 0) return toSnippet(markdown, maxLen);
17142
+ const positions = [];
17143
+ for (let ti = 0; ti < tokens.length; ti++) {
17144
+ const escaped = tokens[ti].replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
17145
+ const re = new RegExp(escaped, "gi");
17146
+ let m;
17147
+ while ((m = re.exec(plain)) !== null) {
17148
+ positions.push({ start: m.index, end: m.index + m[0].length, tokenIdx: ti });
17149
+ }
17150
+ }
17151
+ if (positions.length === 0) return toSnippet(markdown, maxLen);
17152
+ positions.sort((a, b) => a.start - b.start);
17153
+ let bestUniqueCount = 0;
17154
+ let bestTotalCount = 0;
17155
+ let bestLeft = 0;
17156
+ let bestRight = 0;
17157
+ let left = 0;
17158
+ const tokenCounts = /* @__PURE__ */ new Map();
17159
+ for (let right = 0; right < positions.length; right++) {
17160
+ tokenCounts.set(positions[right].tokenIdx, (tokenCounts.get(positions[right].tokenIdx) ?? 0) + 1);
17161
+ while (positions[right].end - positions[left].start > maxLen && left < right) {
17162
+ const leftToken = positions[left].tokenIdx;
17163
+ const cnt = tokenCounts.get(leftToken) - 1;
17164
+ if (cnt === 0) tokenCounts.delete(leftToken);
17165
+ else tokenCounts.set(leftToken, cnt);
17166
+ left++;
17167
+ }
17168
+ const uniqueCount = tokenCounts.size;
17169
+ const totalCount = right - left + 1;
17170
+ if (uniqueCount > bestUniqueCount || uniqueCount === bestUniqueCount && totalCount > bestTotalCount) {
17171
+ bestUniqueCount = uniqueCount;
17172
+ bestTotalCount = totalCount;
17173
+ bestLeft = left;
17174
+ bestRight = right;
17175
+ }
17176
+ }
17177
+ const mid = Math.floor((positions[bestLeft].start + positions[bestRight].end) / 2);
17178
+ let start = Math.max(0, mid - Math.floor(maxLen / 2));
17179
+ let end = Math.min(plain.length, start + maxLen);
17180
+ start = Math.max(0, end - maxLen);
17181
+ if (start > 0) {
17182
+ const spaceIdx = plain.lastIndexOf(" ", start);
17183
+ if (spaceIdx > start - 30) {
17184
+ start = spaceIdx + 1;
17185
+ }
17186
+ }
17187
+ if (end < plain.length) {
17188
+ const spaceIdx = plain.indexOf(" ", end);
17189
+ if (spaceIdx !== -1 && spaceIdx < end + 30) {
17190
+ end = spaceIdx;
17191
+ }
17192
+ }
17193
+ let excerpt = plain.slice(start, end);
17194
+ if (excerpt.length > Math.ceil(maxLen * 1.2)) {
17195
+ excerpt = excerpt.slice(0, maxLen);
17196
+ const lastSpace = excerpt.lastIndexOf(" ");
17197
+ if (lastSpace > maxLen * 0.5) {
17198
+ excerpt = excerpt.slice(0, lastSpace);
17199
+ }
17200
+ }
17201
+ const prefix = start > 0 ? "\u2026" : "";
17202
+ const suffix = end < plain.length ? "\u2026" : "";
17203
+ return `${prefix}${excerpt}${suffix}`;
17204
+ }
17052
17205
  function extractFirstParagraph(markdown) {
17053
17206
  const lines = markdown.split("\n");
17054
17207
  let inFence = false;
@@ -17109,162 +17262,342 @@ function ensureStateDirs(cwd, stateDir, scope) {
17109
17262
  fs.mkdirSync(statePath, { recursive: true });
17110
17263
  return { statePath };
17111
17264
  }
17112
-
17113
- // src/vector/upstash.ts
17114
- function chunkIndexName(scope) {
17115
- return `${scope.projectId}--${scope.scopeName}`;
17116
- }
17117
- function pageIndexName(scope) {
17118
- return `${scope.projectId}--${scope.scopeName}--pages`;
17119
- }
17120
17265
  var UpstashSearchStore = class {
17121
- client;
17266
+ index;
17267
+ pagesNs;
17268
+ chunksNs;
17122
17269
  constructor(opts) {
17123
- this.client = opts.client;
17124
- }
17125
- chunkIndex(scope) {
17126
- return this.client.index(chunkIndexName(scope));
17127
- }
17128
- pageIndex(scope) {
17129
- return this.client.index(pageIndexName(scope));
17270
+ this.index = opts.index;
17271
+ this.pagesNs = opts.index.namespace(opts.pagesNamespace);
17272
+ this.chunksNs = opts.index.namespace(opts.chunksNamespace);
17130
17273
  }
17131
17274
  async upsertChunks(chunks, scope) {
17132
17275
  if (chunks.length === 0) return;
17133
- const index = this.chunkIndex(scope);
17134
- const BATCH_SIZE = 100;
17276
+ const BATCH_SIZE = 90;
17135
17277
  for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
17136
17278
  const batch = chunks.slice(i, i + BATCH_SIZE);
17137
- await index.upsert(batch);
17138
- }
17139
- }
17140
- async search(query, opts, scope) {
17141
- const index = this.chunkIndex(scope);
17142
- const results = await index.search({
17143
- query,
17144
- limit: opts.limit,
17145
- semanticWeight: opts.semanticWeight,
17146
- inputEnrichment: opts.inputEnrichment,
17147
- reranking: opts.reranking,
17148
- filter: opts.filter
17279
+ await this.chunksNs.upsert(
17280
+ batch.map((c) => ({
17281
+ id: c.id,
17282
+ data: c.data,
17283
+ metadata: {
17284
+ ...c.metadata,
17285
+ projectId: scope.projectId,
17286
+ scopeName: scope.scopeName,
17287
+ type: c.metadata.type || "chunk"
17288
+ }
17289
+ }))
17290
+ );
17291
+ }
17292
+ }
17293
+ async search(data, opts, scope) {
17294
+ const filterParts = [
17295
+ `projectId = '${scope.projectId}'`,
17296
+ `scopeName = '${scope.scopeName}'`
17297
+ ];
17298
+ if (opts.filter) {
17299
+ filterParts.push(opts.filter);
17300
+ }
17301
+ const results = await this.chunksNs.query({
17302
+ data,
17303
+ topK: opts.limit,
17304
+ includeMetadata: true,
17305
+ filter: filterParts.join(" AND "),
17306
+ queryMode: QueryMode.HYBRID,
17307
+ fusionAlgorithm: FusionAlgorithm.DBSF
17308
+ });
17309
+ return results.map((doc) => ({
17310
+ id: String(doc.id),
17311
+ score: doc.score,
17312
+ metadata: {
17313
+ projectId: doc.metadata?.projectId ?? "",
17314
+ scopeName: doc.metadata?.scopeName ?? "",
17315
+ url: doc.metadata?.url ?? "",
17316
+ path: doc.metadata?.path ?? "",
17317
+ title: doc.metadata?.title ?? "",
17318
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17319
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17320
+ snippet: doc.metadata?.snippet ?? "",
17321
+ chunkText: doc.metadata?.chunkText ?? "",
17322
+ ordinal: doc.metadata?.ordinal ?? 0,
17323
+ contentHash: doc.metadata?.contentHash ?? "",
17324
+ depth: doc.metadata?.depth ?? 0,
17325
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17326
+ routeFile: doc.metadata?.routeFile ?? "",
17327
+ tags: doc.metadata?.tags ?? [],
17328
+ description: doc.metadata?.description || void 0,
17329
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17330
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17331
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17332
+ }
17333
+ }));
17334
+ }
17335
+ async searchChunksByUrl(data, url, opts, scope) {
17336
+ const filterParts = [
17337
+ `projectId = '${scope.projectId}'`,
17338
+ `scopeName = '${scope.scopeName}'`,
17339
+ `url = '${url}'`
17340
+ ];
17341
+ if (opts.filter) {
17342
+ filterParts.push(opts.filter);
17343
+ }
17344
+ const results = await this.chunksNs.query({
17345
+ data,
17346
+ topK: opts.limit,
17347
+ includeMetadata: true,
17348
+ filter: filterParts.join(" AND "),
17349
+ queryMode: QueryMode.HYBRID,
17350
+ fusionAlgorithm: FusionAlgorithm.DBSF
17149
17351
  });
17150
17352
  return results.map((doc) => ({
17151
- id: doc.id,
17353
+ id: String(doc.id),
17152
17354
  score: doc.score,
17153
17355
  metadata: {
17154
17356
  projectId: doc.metadata?.projectId ?? "",
17155
17357
  scopeName: doc.metadata?.scopeName ?? "",
17156
- url: doc.content.url,
17358
+ url: doc.metadata?.url ?? "",
17157
17359
  path: doc.metadata?.path ?? "",
17158
- title: doc.content.title,
17159
- sectionTitle: doc.content.sectionTitle,
17160
- headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
17360
+ title: doc.metadata?.title ?? "",
17361
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17362
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17161
17363
  snippet: doc.metadata?.snippet ?? "",
17162
- chunkText: doc.content.text,
17364
+ chunkText: doc.metadata?.chunkText ?? "",
17163
17365
  ordinal: doc.metadata?.ordinal ?? 0,
17164
17366
  contentHash: doc.metadata?.contentHash ?? "",
17165
17367
  depth: doc.metadata?.depth ?? 0,
17166
17368
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17167
17369
  routeFile: doc.metadata?.routeFile ?? "",
17168
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17370
+ tags: doc.metadata?.tags ?? [],
17169
17371
  description: doc.metadata?.description || void 0,
17170
- keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
17372
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17373
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17374
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17171
17375
  }
17172
17376
  }));
17173
17377
  }
17174
- async searchPages(query, opts, scope) {
17175
- const index = this.pageIndex(scope);
17378
+ async searchPagesByText(data, opts, scope) {
17379
+ return this.queryPages({ data }, opts, scope);
17380
+ }
17381
+ async searchPagesByVector(vector, opts, scope) {
17382
+ return this.queryPages({ vector }, opts, scope);
17383
+ }
17384
+ async queryPages(input, opts, scope) {
17385
+ const filterParts = [
17386
+ `projectId = '${scope.projectId}'`,
17387
+ `scopeName = '${scope.scopeName}'`
17388
+ ];
17389
+ if (opts.filter) {
17390
+ filterParts.push(opts.filter);
17391
+ }
17176
17392
  let results;
17177
17393
  try {
17178
- results = await index.search({
17179
- query,
17180
- limit: opts.limit,
17181
- semanticWeight: opts.semanticWeight,
17182
- inputEnrichment: opts.inputEnrichment,
17183
- reranking: true,
17184
- filter: opts.filter
17394
+ results = await this.pagesNs.query({
17395
+ ...input,
17396
+ topK: opts.limit,
17397
+ includeMetadata: true,
17398
+ filter: filterParts.join(" AND "),
17399
+ queryMode: QueryMode.HYBRID,
17400
+ fusionAlgorithm: FusionAlgorithm.DBSF
17185
17401
  });
17186
17402
  } catch {
17187
17403
  return [];
17188
17404
  }
17189
17405
  return results.map((doc) => ({
17190
- id: doc.id,
17406
+ id: String(doc.id),
17191
17407
  score: doc.score,
17192
- title: doc.content.title,
17193
- url: doc.content.url,
17194
- description: doc.content.description ?? "",
17195
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17408
+ title: doc.metadata?.title ?? "",
17409
+ url: doc.metadata?.url ?? "",
17410
+ description: doc.metadata?.description ?? "",
17411
+ tags: doc.metadata?.tags ?? [],
17196
17412
  depth: doc.metadata?.depth ?? 0,
17197
17413
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17198
- routeFile: doc.metadata?.routeFile ?? ""
17414
+ routeFile: doc.metadata?.routeFile ?? "",
17415
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17199
17416
  }));
17200
17417
  }
17201
- async deleteByIds(ids, scope) {
17418
+ async deleteByIds(ids, _scope) {
17202
17419
  if (ids.length === 0) return;
17203
- const index = this.chunkIndex(scope);
17204
- const BATCH_SIZE = 500;
17420
+ const BATCH_SIZE = 90;
17205
17421
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17206
17422
  const batch = ids.slice(i, i + BATCH_SIZE);
17207
- await index.delete(batch);
17423
+ await this.chunksNs.delete(batch);
17208
17424
  }
17209
17425
  }
17210
17426
  async deleteScope(scope) {
17211
- try {
17212
- const chunkIdx = this.chunkIndex(scope);
17213
- await chunkIdx.deleteIndex();
17214
- } catch {
17215
- }
17216
- try {
17217
- const pageIdx = this.pageIndex(scope);
17218
- await pageIdx.deleteIndex();
17219
- } catch {
17427
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17428
+ const ids = [];
17429
+ let cursor = "0";
17430
+ try {
17431
+ for (; ; ) {
17432
+ const result = await ns.range({
17433
+ cursor,
17434
+ limit: 100,
17435
+ includeMetadata: true
17436
+ });
17437
+ for (const doc of result.vectors) {
17438
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17439
+ ids.push(String(doc.id));
17440
+ }
17441
+ }
17442
+ if (!result.nextCursor || result.nextCursor === "0") break;
17443
+ cursor = result.nextCursor;
17444
+ }
17445
+ } catch {
17446
+ }
17447
+ if (ids.length > 0) {
17448
+ const BATCH_SIZE = 90;
17449
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17450
+ const batch = ids.slice(i, i + BATCH_SIZE);
17451
+ await ns.delete(batch);
17452
+ }
17453
+ }
17220
17454
  }
17221
17455
  }
17222
17456
  async listScopes(projectId) {
17223
- const allIndexes = await this.client.listIndexes();
17224
- const prefix = `${projectId}--`;
17225
- const scopeNames = /* @__PURE__ */ new Set();
17226
- for (const name of allIndexes) {
17227
- if (name.startsWith(prefix) && !name.endsWith("--pages")) {
17228
- const scopeName = name.slice(prefix.length);
17229
- scopeNames.add(scopeName);
17230
- }
17231
- }
17232
- const scopes = [];
17233
- for (const scopeName of scopeNames) {
17234
- const scope = {
17235
- projectId,
17236
- scopeName,
17237
- scopeId: `${projectId}:${scopeName}`
17238
- };
17457
+ const scopeMap = /* @__PURE__ */ new Map();
17458
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17459
+ let cursor = "0";
17460
+ try {
17461
+ for (; ; ) {
17462
+ const result = await ns.range({
17463
+ cursor,
17464
+ limit: 100,
17465
+ includeMetadata: true
17466
+ });
17467
+ for (const doc of result.vectors) {
17468
+ if (doc.metadata?.projectId === projectId) {
17469
+ const scopeName = doc.metadata.scopeName ?? "";
17470
+ scopeMap.set(scopeName, (scopeMap.get(scopeName) ?? 0) + 1);
17471
+ }
17472
+ }
17473
+ if (!result.nextCursor || result.nextCursor === "0") break;
17474
+ cursor = result.nextCursor;
17475
+ }
17476
+ } catch {
17477
+ }
17478
+ }
17479
+ return [...scopeMap.entries()].map(([scopeName, count]) => ({
17480
+ projectId,
17481
+ scopeName,
17482
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17483
+ documentCount: count
17484
+ }));
17485
+ }
17486
+ async getContentHashes(scope) {
17487
+ return this.scanHashes(this.chunksNs, scope);
17488
+ }
17489
+ /**
17490
+ * Fetch content hashes for a specific set of chunk keys using direct fetch()
17491
+ * instead of range(). This avoids potential issues with range() returning
17492
+ * vectors from the wrong namespace on hybrid indexes.
17493
+ */
17494
+ async fetchContentHashesForKeys(keys, scope) {
17495
+ const map = /* @__PURE__ */ new Map();
17496
+ if (keys.length === 0) return map;
17497
+ const BATCH_SIZE = 90;
17498
+ for (let i = 0; i < keys.length; i += BATCH_SIZE) {
17499
+ const batch = keys.slice(i, i + BATCH_SIZE);
17239
17500
  try {
17240
- const info = await this.chunkIndex(scope).info();
17241
- scopes.push({
17242
- projectId,
17243
- scopeName,
17244
- lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17245
- documentCount: info.documentCount
17501
+ const results = await this.chunksNs.fetch(batch, {
17502
+ includeMetadata: true
17246
17503
  });
17504
+ for (const doc of results) {
17505
+ if (doc && doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17506
+ map.set(String(doc.id), doc.metadata.contentHash);
17507
+ }
17508
+ }
17247
17509
  } catch {
17248
- scopes.push({
17249
- projectId,
17250
- scopeName,
17251
- lastIndexedAt: "unknown",
17252
- documentCount: 0
17510
+ }
17511
+ }
17512
+ return map;
17513
+ }
17514
+ /**
17515
+ * Scan all IDs in the chunks namespace for this scope.
17516
+ * Used for deletion detection (finding stale chunk keys).
17517
+ */
17518
+ async scanChunkIds(scope) {
17519
+ const ids = /* @__PURE__ */ new Set();
17520
+ let cursor = "0";
17521
+ try {
17522
+ for (; ; ) {
17523
+ const result = await this.chunksNs.range({
17524
+ cursor,
17525
+ limit: 100,
17526
+ includeMetadata: true
17253
17527
  });
17528
+ for (const doc of result.vectors) {
17529
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17530
+ ids.add(String(doc.id));
17531
+ }
17532
+ }
17533
+ if (!result.nextCursor || result.nextCursor === "0") break;
17534
+ cursor = result.nextCursor;
17254
17535
  }
17536
+ } catch {
17255
17537
  }
17256
- return scopes;
17538
+ return ids;
17257
17539
  }
17258
- async getContentHashes(scope) {
17540
+ async scanHashes(ns, scope) {
17541
+ const map = /* @__PURE__ */ new Map();
17542
+ let cursor = "0";
17543
+ try {
17544
+ for (; ; ) {
17545
+ const result = await ns.range({
17546
+ cursor,
17547
+ limit: 100,
17548
+ includeMetadata: true
17549
+ });
17550
+ for (const doc of result.vectors) {
17551
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17552
+ map.set(String(doc.id), doc.metadata.contentHash);
17553
+ }
17554
+ }
17555
+ if (!result.nextCursor || result.nextCursor === "0") break;
17556
+ cursor = result.nextCursor;
17557
+ }
17558
+ } catch {
17559
+ }
17560
+ return map;
17561
+ }
17562
+ async listPages(scope, opts) {
17563
+ const cursor = opts?.cursor ?? "0";
17564
+ const limit = opts?.limit ?? 50;
17565
+ try {
17566
+ const result = await this.pagesNs.range({
17567
+ cursor,
17568
+ limit,
17569
+ includeMetadata: true
17570
+ });
17571
+ const pages = result.vectors.filter(
17572
+ (doc) => doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && (!opts?.pathPrefix || (doc.metadata?.url ?? "").startsWith(opts.pathPrefix))
17573
+ ).map((doc) => ({
17574
+ url: doc.metadata?.url ?? "",
17575
+ title: doc.metadata?.title ?? "",
17576
+ description: doc.metadata?.description ?? "",
17577
+ routeFile: doc.metadata?.routeFile ?? ""
17578
+ }));
17579
+ const response = { pages };
17580
+ if (result.nextCursor && result.nextCursor !== "0") {
17581
+ response.nextCursor = result.nextCursor;
17582
+ }
17583
+ return response;
17584
+ } catch {
17585
+ return { pages: [] };
17586
+ }
17587
+ }
17588
+ async getPageHashes(scope) {
17259
17589
  const map = /* @__PURE__ */ new Map();
17260
- const index = this.chunkIndex(scope);
17261
17590
  let cursor = "0";
17262
17591
  try {
17263
17592
  for (; ; ) {
17264
- const result = await index.range({ cursor, limit: 100 });
17265
- for (const doc of result.documents) {
17266
- if (doc.metadata?.contentHash) {
17267
- map.set(doc.id, doc.metadata.contentHash);
17593
+ const result = await this.pagesNs.range({
17594
+ cursor,
17595
+ limit: 100,
17596
+ includeMetadata: true
17597
+ });
17598
+ for (const doc of result.vectors) {
17599
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17600
+ map.set(String(doc.id), doc.metadata.contentHash);
17268
17601
  }
17269
17602
  }
17270
17603
  if (!result.nextCursor || result.nextCursor === "0") break;
@@ -17274,47 +17607,43 @@ var UpstashSearchStore = class {
17274
17607
  }
17275
17608
  return map;
17276
17609
  }
17610
+ async deletePagesByIds(ids, _scope) {
17611
+ if (ids.length === 0) return;
17612
+ const BATCH_SIZE = 90;
17613
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17614
+ const batch = ids.slice(i, i + BATCH_SIZE);
17615
+ await this.pagesNs.delete(batch);
17616
+ }
17617
+ }
17277
17618
  async upsertPages(pages, scope) {
17278
17619
  if (pages.length === 0) return;
17279
- const index = this.pageIndex(scope);
17280
- const BATCH_SIZE = 50;
17620
+ const BATCH_SIZE = 90;
17281
17621
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
17282
17622
  const batch = pages.slice(i, i + BATCH_SIZE);
17283
- const docs = batch.map((p) => ({
17284
- id: p.url,
17285
- content: {
17286
- title: p.title,
17287
- url: p.url,
17288
- type: "page",
17289
- description: p.description ?? "",
17290
- keywords: (p.keywords ?? []).join(","),
17291
- summary: p.summary ?? "",
17292
- tags: p.tags.join(",")
17293
- },
17294
- metadata: {
17295
- markdown: p.markdown,
17296
- projectId: p.projectId,
17297
- scopeName: p.scopeName,
17298
- routeFile: p.routeFile,
17299
- routeResolution: p.routeResolution,
17300
- incomingLinks: p.incomingLinks,
17301
- outgoingLinks: p.outgoingLinks,
17302
- depth: p.depth,
17303
- indexedAt: p.indexedAt
17304
- }
17305
- }));
17306
- await index.upsert(docs);
17623
+ await this.pagesNs.upsert(
17624
+ batch.map((p) => ({
17625
+ id: p.id,
17626
+ data: p.data,
17627
+ metadata: {
17628
+ ...p.metadata,
17629
+ projectId: scope.projectId,
17630
+ scopeName: scope.scopeName,
17631
+ type: "page"
17632
+ }
17633
+ }))
17634
+ );
17307
17635
  }
17308
17636
  }
17309
17637
  async getPage(url, scope) {
17310
- const index = this.pageIndex(scope);
17311
17638
  try {
17312
- const results = await index.fetch([url]);
17639
+ const results = await this.pagesNs.fetch([url], {
17640
+ includeMetadata: true
17641
+ });
17313
17642
  const doc = results[0];
17314
- if (!doc) return null;
17643
+ if (!doc || !doc.metadata) return null;
17315
17644
  return {
17316
- url: doc.content.url,
17317
- title: doc.content.title,
17645
+ url: doc.metadata.url,
17646
+ title: doc.metadata.title,
17318
17647
  markdown: doc.metadata.markdown,
17319
17648
  projectId: doc.metadata.projectId,
17320
17649
  scopeName: doc.metadata.scopeName,
@@ -17322,27 +17651,86 @@ var UpstashSearchStore = class {
17322
17651
  routeResolution: doc.metadata.routeResolution,
17323
17652
  incomingLinks: doc.metadata.incomingLinks,
17324
17653
  outgoingLinks: doc.metadata.outgoingLinks,
17654
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? void 0,
17325
17655
  depth: doc.metadata.depth,
17326
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17656
+ tags: doc.metadata.tags ?? [],
17327
17657
  indexedAt: doc.metadata.indexedAt,
17328
- summary: doc.content.summary || void 0,
17329
- description: doc.content.description || void 0,
17330
- keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
17658
+ summary: doc.metadata.summary || void 0,
17659
+ description: doc.metadata.description || void 0,
17660
+ keywords: doc.metadata.keywords?.length ? doc.metadata.keywords : void 0,
17661
+ publishedAt: typeof doc.metadata.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17331
17662
  };
17332
17663
  } catch {
17333
17664
  return null;
17334
17665
  }
17335
17666
  }
17667
+ async fetchPageWithVector(url, scope) {
17668
+ try {
17669
+ const results = await this.pagesNs.fetch([url], {
17670
+ includeMetadata: true,
17671
+ includeVectors: true
17672
+ });
17673
+ const doc = results[0];
17674
+ if (!doc || !doc.metadata || !doc.vector) return null;
17675
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17676
+ return null;
17677
+ }
17678
+ return { metadata: doc.metadata, vector: doc.vector };
17679
+ } catch {
17680
+ return null;
17681
+ }
17682
+ }
17683
+ async fetchPagesBatch(urls, scope) {
17684
+ if (urls.length === 0) return [];
17685
+ try {
17686
+ const results = await this.pagesNs.fetch(urls, {
17687
+ includeMetadata: true
17688
+ });
17689
+ const out = [];
17690
+ for (const doc of results) {
17691
+ if (!doc || !doc.metadata) continue;
17692
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17693
+ continue;
17694
+ }
17695
+ out.push({
17696
+ url: doc.metadata.url,
17697
+ title: doc.metadata.title,
17698
+ routeFile: doc.metadata.routeFile,
17699
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? []
17700
+ });
17701
+ }
17702
+ return out;
17703
+ } catch {
17704
+ return [];
17705
+ }
17706
+ }
17336
17707
  async deletePages(scope) {
17708
+ const ids = [];
17709
+ let cursor = "0";
17337
17710
  try {
17338
- const index = this.pageIndex(scope);
17339
- await index.reset();
17711
+ for (; ; ) {
17712
+ const result = await this.pagesNs.range({
17713
+ cursor,
17714
+ limit: 100,
17715
+ includeMetadata: true
17716
+ });
17717
+ for (const doc of result.vectors) {
17718
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17719
+ ids.push(String(doc.id));
17720
+ }
17721
+ }
17722
+ if (!result.nextCursor || result.nextCursor === "0") break;
17723
+ cursor = result.nextCursor;
17724
+ }
17340
17725
  } catch {
17341
17726
  }
17727
+ if (ids.length > 0) {
17728
+ await this.deletePagesByIds(ids, scope);
17729
+ }
17342
17730
  }
17343
17731
  async health() {
17344
17732
  try {
17345
- await this.client.info();
17733
+ await this.index.info();
17346
17734
  return { ok: true };
17347
17735
  } catch (error) {
17348
17736
  return {
@@ -17352,14 +17740,31 @@ var UpstashSearchStore = class {
17352
17740
  }
17353
17741
  }
17354
17742
  async dropAllIndexes(projectId) {
17355
- const allIndexes = await this.client.listIndexes();
17356
- const prefix = `${projectId}--`;
17357
- for (const name of allIndexes) {
17358
- if (name.startsWith(prefix)) {
17359
- try {
17360
- const index = this.client.index(name);
17361
- await index.deleteIndex();
17362
- } catch {
17743
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17744
+ const ids = [];
17745
+ let cursor = "0";
17746
+ try {
17747
+ for (; ; ) {
17748
+ const result = await ns.range({
17749
+ cursor,
17750
+ limit: 100,
17751
+ includeMetadata: true
17752
+ });
17753
+ for (const doc of result.vectors) {
17754
+ if (doc.metadata?.projectId === projectId) {
17755
+ ids.push(String(doc.id));
17756
+ }
17757
+ }
17758
+ if (!result.nextCursor || result.nextCursor === "0") break;
17759
+ cursor = result.nextCursor;
17760
+ }
17761
+ } catch {
17762
+ }
17763
+ if (ids.length > 0) {
17764
+ const BATCH_SIZE = 90;
17765
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17766
+ const batch = ids.slice(i, i + BATCH_SIZE);
17767
+ await ns.delete(batch);
17363
17768
  }
17364
17769
  }
17365
17770
  }
@@ -17373,12 +17778,16 @@ async function createUpstashStore(config) {
17373
17778
  if (!url || !token) {
17374
17779
  throw new SearchSocketError(
17375
17780
  "VECTOR_BACKEND_UNAVAILABLE",
17376
- `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17781
+ `Missing Upstash Vector credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17377
17782
  );
17378
17783
  }
17379
- const { Search } = await import('@upstash/search');
17380
- const client = new Search({ url, token });
17381
- return new UpstashSearchStore({ client });
17784
+ const { Index } = await import('@upstash/vector');
17785
+ const index = new Index({ url, token });
17786
+ return new UpstashSearchStore({
17787
+ index,
17788
+ pagesNamespace: config.upstash.namespaces.pages,
17789
+ chunksNamespace: config.upstash.namespaces.chunks
17790
+ });
17382
17791
  }
17383
17792
  function sha1(input) {
17384
17793
  return createHash("sha1").update(input).digest("hex");
@@ -17446,6 +17855,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
17446
17855
  if (normalizeText(current.text)) {
17447
17856
  sections.push({
17448
17857
  sectionTitle: current.sectionTitle,
17858
+ headingLevel: current.headingLevel,
17449
17859
  headingPath: current.headingPath,
17450
17860
  text: current.text.trim()
17451
17861
  });
@@ -17464,6 +17874,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
17464
17874
  headingStack.length = level;
17465
17875
  current = {
17466
17876
  sectionTitle: title,
17877
+ headingLevel: level,
17467
17878
  headingPath: headingStack.filter((entry) => Boolean(entry)).slice(0, headingPathDepth),
17468
17879
  text: `${line}
17469
17880
  `
@@ -17599,6 +18010,7 @@ function splitSection(section, config) {
17599
18010
  return [
17600
18011
  {
17601
18012
  sectionTitle: section.sectionTitle,
18013
+ headingLevel: section.headingLevel,
17602
18014
  headingPath: section.headingPath,
17603
18015
  chunkText: text
17604
18016
  }
@@ -17649,6 +18061,7 @@ ${chunk}`;
17649
18061
  }
17650
18062
  return merged.map((chunkText) => ({
17651
18063
  sectionTitle: section.sectionTitle,
18064
+ headingLevel: section.headingLevel,
17652
18065
  headingPath: section.headingPath,
17653
18066
  chunkText
17654
18067
  }));
@@ -17664,6 +18077,18 @@ function buildSummaryChunkText(page) {
17664
18077
  }
17665
18078
  return parts.join("\n\n");
17666
18079
  }
18080
+ function buildEmbeddingTitle(chunk) {
18081
+ if (!chunk.sectionTitle || chunk.headingLevel === void 0) return void 0;
18082
+ if (chunk.headingPath.length > 1) {
18083
+ const path14 = chunk.headingPath.join(" > ");
18084
+ const lastInPath = chunk.headingPath[chunk.headingPath.length - 1];
18085
+ if (lastInPath !== chunk.sectionTitle) {
18086
+ return `${chunk.title} \u2014 ${path14} > ${chunk.sectionTitle}`;
18087
+ }
18088
+ return `${chunk.title} \u2014 ${path14}`;
18089
+ }
18090
+ return `${chunk.title} \u2014 ${chunk.sectionTitle}`;
18091
+ }
17667
18092
  function buildEmbeddingText(chunk, prependTitle) {
17668
18093
  if (!prependTitle) return chunk.chunkText;
17669
18094
  const prefix = chunk.sectionTitle ? `${chunk.title} \u2014 ${chunk.sectionTitle}` : chunk.title;
@@ -17694,10 +18119,14 @@ function chunkPage(page, config, scope) {
17694
18119
  tags: page.tags,
17695
18120
  contentHash: "",
17696
18121
  description: page.description,
17697
- keywords: page.keywords
18122
+ keywords: page.keywords,
18123
+ publishedAt: page.publishedAt,
18124
+ incomingAnchorText: page.incomingAnchorText,
18125
+ meta: page.meta
17698
18126
  };
17699
18127
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
17700
- summaryChunk.contentHash = sha256(normalizeText(embeddingText));
18128
+ const metaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
18129
+ summaryChunk.contentHash = sha256(normalizeText(embeddingText) + metaSuffix);
17701
18130
  chunks.push(summaryChunk);
17702
18131
  }
17703
18132
  const ordinalOffset = config.chunking.pageSummaryChunk ? 1 : 0;
@@ -17714,6 +18143,7 @@ function chunkPage(page, config, scope) {
17714
18143
  path: page.url,
17715
18144
  title: page.title,
17716
18145
  sectionTitle: entry.sectionTitle,
18146
+ headingLevel: entry.headingLevel,
17717
18147
  headingPath: entry.headingPath,
17718
18148
  chunkText: entry.chunkText,
17719
18149
  snippet: toSnippet(entry.chunkText),
@@ -17723,10 +18153,16 @@ function chunkPage(page, config, scope) {
17723
18153
  tags: page.tags,
17724
18154
  contentHash: "",
17725
18155
  description: page.description,
17726
- keywords: page.keywords
18156
+ keywords: page.keywords,
18157
+ publishedAt: page.publishedAt,
18158
+ incomingAnchorText: page.incomingAnchorText,
18159
+ meta: page.meta
17727
18160
  };
17728
18161
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
17729
- chunk.contentHash = sha256(normalizeText(embeddingText));
18162
+ const embeddingTitle = config.chunking.weightHeadings ? buildEmbeddingTitle(chunk) : void 0;
18163
+ const chunkMetaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
18164
+ const hashInput = embeddingTitle ? `${normalizeText(embeddingText)}|title:${embeddingTitle}` : normalizeText(embeddingText);
18165
+ chunk.contentHash = sha256(hashInput + chunkMetaSuffix);
17730
18166
  chunks.push(chunk);
17731
18167
  }
17732
18168
  return chunks;
@@ -18558,7 +18994,112 @@ function gfm(turndownService) {
18558
18994
  ]);
18559
18995
  }
18560
18996
 
18997
+ // src/utils/structured-meta.ts
18998
+ var VALID_KEY_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
18999
+ function validateMetaKey(key) {
19000
+ return VALID_KEY_RE.test(key);
19001
+ }
19002
+ function parseMetaValue(content, dataType) {
19003
+ switch (dataType) {
19004
+ case "number": {
19005
+ const n = Number(content);
19006
+ return Number.isFinite(n) ? n : content;
19007
+ }
19008
+ case "boolean":
19009
+ return content === "true";
19010
+ case "string[]":
19011
+ return content ? content.split(",").map((s) => s.trim()) : [];
19012
+ case "date": {
19013
+ const ms = Number(content);
19014
+ return Number.isFinite(ms) ? ms : content;
19015
+ }
19016
+ default:
19017
+ return content;
19018
+ }
19019
+ }
19020
+ function escapeFilterValue(s) {
19021
+ return s.replace(/'/g, "''");
19022
+ }
19023
+ function buildMetaFilterString(filters) {
19024
+ const clauses = [];
19025
+ for (const [key, value] of Object.entries(filters)) {
19026
+ if (!validateMetaKey(key)) continue;
19027
+ const field = `meta.${key}`;
19028
+ if (typeof value === "string") {
19029
+ clauses.push(`${field} CONTAINS '${escapeFilterValue(value)}'`);
19030
+ } else if (typeof value === "boolean") {
19031
+ clauses.push(`${field} = ${value}`);
19032
+ } else {
19033
+ clauses.push(`${field} = ${value}`);
19034
+ }
19035
+ }
19036
+ return clauses.join(" AND ");
19037
+ }
19038
+
18561
19039
  // src/indexing/extractor.ts
19040
+ function normalizeDateToMs(value) {
19041
+ if (value == null) return void 0;
19042
+ if (value instanceof Date) {
19043
+ const ts = value.getTime();
19044
+ return Number.isFinite(ts) ? ts : void 0;
19045
+ }
19046
+ if (typeof value === "string") {
19047
+ const ts = new Date(value).getTime();
19048
+ return Number.isFinite(ts) ? ts : void 0;
19049
+ }
19050
+ if (typeof value === "number") {
19051
+ return Number.isFinite(value) ? value : void 0;
19052
+ }
19053
+ return void 0;
19054
+ }
19055
+ var FRONTMATTER_DATE_FIELDS = ["date", "publishedAt", "updatedAt", "published_at", "updated_at"];
19056
+ function extractPublishedAtFromFrontmatter(data) {
19057
+ for (const field of FRONTMATTER_DATE_FIELDS) {
19058
+ const val = normalizeDateToMs(data[field]);
19059
+ if (val !== void 0) return val;
19060
+ }
19061
+ return void 0;
19062
+ }
19063
+ function extractPublishedAtFromHtml($) {
19064
+ const jsonLdScripts = $('script[type="application/ld+json"]');
19065
+ for (let i = 0; i < jsonLdScripts.length; i++) {
19066
+ try {
19067
+ const raw = $(jsonLdScripts[i]).html();
19068
+ if (!raw) continue;
19069
+ const parsed = JSON.parse(raw);
19070
+ const candidates = [];
19071
+ if (Array.isArray(parsed)) {
19072
+ candidates.push(...parsed);
19073
+ } else if (parsed && typeof parsed === "object") {
19074
+ candidates.push(parsed);
19075
+ if (Array.isArray(parsed["@graph"])) {
19076
+ candidates.push(...parsed["@graph"]);
19077
+ }
19078
+ }
19079
+ for (const candidate of candidates) {
19080
+ const val = normalizeDateToMs(candidate.datePublished);
19081
+ if (val !== void 0) return val;
19082
+ }
19083
+ } catch {
19084
+ }
19085
+ }
19086
+ const ogTime = $('meta[property="article:published_time"]').attr("content")?.trim();
19087
+ if (ogTime) {
19088
+ const val = normalizeDateToMs(ogTime);
19089
+ if (val !== void 0) return val;
19090
+ }
19091
+ const itempropDate = $('meta[itemprop="datePublished"]').attr("content")?.trim() || $('time[itemprop="datePublished"]').attr("datetime")?.trim();
19092
+ if (itempropDate) {
19093
+ const val = normalizeDateToMs(itempropDate);
19094
+ if (val !== void 0) return val;
19095
+ }
19096
+ const timeEl = $("time[datetime]").first().attr("datetime")?.trim();
19097
+ if (timeEl) {
19098
+ const val = normalizeDateToMs(timeEl);
19099
+ if (val !== void 0) return val;
19100
+ }
19101
+ return void 0;
19102
+ }
18562
19103
  function hasTopLevelNoindexComment(markdown) {
18563
19104
  const lines = markdown.split(/\r?\n/);
18564
19105
  let inFence = false;
@@ -18574,6 +19115,97 @@ function hasTopLevelNoindexComment(markdown) {
18574
19115
  }
18575
19116
  return false;
18576
19117
  }
19118
+ var GARBAGE_ALT_WORDS = /* @__PURE__ */ new Set([
19119
+ "image",
19120
+ "photo",
19121
+ "picture",
19122
+ "icon",
19123
+ "logo",
19124
+ "banner",
19125
+ "screenshot",
19126
+ "thumbnail",
19127
+ "img",
19128
+ "graphic",
19129
+ "illustration",
19130
+ "spacer",
19131
+ "pixel",
19132
+ "placeholder",
19133
+ "avatar",
19134
+ "background"
19135
+ ]);
19136
+ var IMAGE_EXT_RE = /\.(jpg|jpeg|png|gif|svg|webp|avif|bmp|ico)(\?.*)?$/i;
19137
+ function isMeaningfulAlt(alt) {
19138
+ const trimmed = alt.trim();
19139
+ if (!trimmed || trimmed.length < 5) return false;
19140
+ if (IMAGE_EXT_RE.test(trimmed)) return false;
19141
+ if (GARBAGE_ALT_WORDS.has(trimmed.toLowerCase())) return false;
19142
+ return true;
19143
+ }
19144
+ function resolveImageText(img, $, imageDescAttr) {
19145
+ const imgDesc = img.attr(imageDescAttr)?.trim();
19146
+ if (imgDesc) return imgDesc;
19147
+ const figure = img.closest("figure");
19148
+ if (figure.length) {
19149
+ const figDesc = figure.attr(imageDescAttr)?.trim();
19150
+ if (figDesc) return figDesc;
19151
+ }
19152
+ const alt = img.attr("alt")?.trim() ?? "";
19153
+ const caption = figure.length ? figure.find("figcaption").first().text().trim() : "";
19154
+ if (isMeaningfulAlt(alt) && caption) {
19155
+ return `${alt} \u2014 ${caption}`;
19156
+ }
19157
+ if (isMeaningfulAlt(alt)) {
19158
+ return alt;
19159
+ }
19160
+ if (caption) {
19161
+ return caption;
19162
+ }
19163
+ return null;
19164
+ }
19165
+ var STOP_ANCHORS = /* @__PURE__ */ new Set([
19166
+ "here",
19167
+ "click",
19168
+ "click here",
19169
+ "read more",
19170
+ "link",
19171
+ "this",
19172
+ "more"
19173
+ ]);
19174
+ function normalizeAnchorText(raw) {
19175
+ const normalized = raw.replace(/\s+/g, " ").trim().toLowerCase();
19176
+ if (normalized.length < 3) return "";
19177
+ if (STOP_ANCHORS.has(normalized)) return "";
19178
+ if (normalized.length > 100) return normalized.slice(0, 100);
19179
+ return normalized;
19180
+ }
19181
+ function escapeHtml(text) {
19182
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
19183
+ }
19184
+ function preprocessImages(root2, $, imageDescAttr) {
19185
+ root2.find("picture").each((_i, el) => {
19186
+ const picture = $(el);
19187
+ const img = picture.find("img").first();
19188
+ const parentFigure = picture.closest("figure");
19189
+ const text = img.length ? resolveImageText(img, $, imageDescAttr) : null;
19190
+ if (text) {
19191
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
19192
+ picture.replaceWith(`<span>${escapeHtml(text)}</span>`);
19193
+ } else {
19194
+ picture.remove();
19195
+ }
19196
+ });
19197
+ root2.find("img").each((_i, el) => {
19198
+ const img = $(el);
19199
+ const parentFigure = img.closest("figure");
19200
+ const text = resolveImageText(img, $, imageDescAttr);
19201
+ if (text) {
19202
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
19203
+ img.replaceWith(`<span>${escapeHtml(text)}</span>`);
19204
+ } else {
19205
+ img.remove();
19206
+ }
19207
+ });
19208
+ }
18577
19209
  function extractFromHtml(url, html, config) {
18578
19210
  const $ = load(html);
18579
19211
  const normalizedUrl = normalizeUrlPath(url);
@@ -18599,6 +19231,20 @@ function extractFromHtml(url, html, config) {
18599
19231
  if (weight === 0) {
18600
19232
  return null;
18601
19233
  }
19234
+ if ($('meta[name="searchsocket:noindex"]').attr("content") === "true") {
19235
+ return null;
19236
+ }
19237
+ const RESERVED_META_KEYS = /* @__PURE__ */ new Set(["noindex", "tags"]);
19238
+ const meta = {};
19239
+ $('meta[name^="searchsocket:"]').each((_i, el) => {
19240
+ const name = $(el).attr("name") ?? "";
19241
+ const key = name.slice("searchsocket:".length);
19242
+ if (!key || RESERVED_META_KEYS.has(key) || !validateMetaKey(key)) return;
19243
+ const content = $(el).attr("content") ?? "";
19244
+ const dataType = $(el).attr("data-type") ?? "string";
19245
+ meta[key] = parseMetaValue(content, dataType);
19246
+ });
19247
+ const componentTags = $('meta[name="searchsocket:tags"]').attr("content")?.trim();
18602
19248
  const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
18603
19249
  const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
18604
19250
  const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
@@ -18610,7 +19256,9 @@ function extractFromHtml(url, html, config) {
18610
19256
  root2.find(selector).remove();
18611
19257
  }
18612
19258
  root2.find(`[${config.extract.ignoreAttr}]`).remove();
19259
+ preprocessImages(root2, $, config.extract.imageDescAttr);
18613
19260
  const outgoingLinks = [];
19261
+ const seenLinkKeys = /* @__PURE__ */ new Set();
18614
19262
  root2.find("a[href]").each((_index, node) => {
18615
19263
  const href = $(node).attr("href");
18616
19264
  if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:")) {
@@ -18621,7 +19269,19 @@ function extractFromHtml(url, html, config) {
18621
19269
  if (!["http:", "https:"].includes(parsed.protocol)) {
18622
19270
  return;
18623
19271
  }
18624
- outgoingLinks.push(normalizeUrlPath(parsed.pathname));
19272
+ const url2 = normalizeUrlPath(parsed.pathname);
19273
+ let anchorText = normalizeAnchorText($(node).text());
19274
+ if (!anchorText) {
19275
+ const imgAlt = $(node).find("img").first().attr("alt") ?? "";
19276
+ if (isMeaningfulAlt(imgAlt)) {
19277
+ anchorText = normalizeAnchorText(imgAlt);
19278
+ }
19279
+ }
19280
+ const key = `${url2}|${anchorText}`;
19281
+ if (!seenLinkKeys.has(key)) {
19282
+ seenLinkKeys.add(key);
19283
+ outgoingLinks.push({ url: url2, anchorText });
19284
+ }
18625
19285
  } catch {
18626
19286
  }
18627
19287
  });
@@ -18646,16 +19306,25 @@ function extractFromHtml(url, html, config) {
18646
19306
  return null;
18647
19307
  }
18648
19308
  const tags = normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1);
19309
+ const publishedAt = extractPublishedAtFromHtml($);
19310
+ if (componentTags) {
19311
+ const extraTags = componentTags.split(",").map((t) => t.trim()).filter(Boolean);
19312
+ for (const t of extraTags) {
19313
+ if (!tags.includes(t)) tags.push(t);
19314
+ }
19315
+ }
18649
19316
  return {
18650
19317
  url: normalizeUrlPath(url),
18651
19318
  title,
18652
19319
  markdown,
18653
- outgoingLinks: [...new Set(outgoingLinks)],
19320
+ outgoingLinks,
18654
19321
  noindex: false,
18655
19322
  tags,
18656
19323
  description,
18657
19324
  keywords,
18658
- weight
19325
+ weight,
19326
+ publishedAt,
19327
+ meta: Object.keys(meta).length > 0 ? meta : void 0
18659
19328
  };
18660
19329
  }
18661
19330
  function extractFromMarkdown(url, markdown, title) {
@@ -18676,6 +19345,24 @@ function extractFromMarkdown(url, markdown, title) {
18676
19345
  if (mdWeight === 0) {
18677
19346
  return null;
18678
19347
  }
19348
+ let mdMeta;
19349
+ const rawMeta = searchsocketMeta?.meta;
19350
+ if (rawMeta && typeof rawMeta === "object" && !Array.isArray(rawMeta)) {
19351
+ const metaObj = {};
19352
+ for (const [key, val] of Object.entries(rawMeta)) {
19353
+ if (!validateMetaKey(key)) continue;
19354
+ if (typeof val === "string" || typeof val === "number" || typeof val === "boolean") {
19355
+ metaObj[key] = val;
19356
+ } else if (Array.isArray(val) && val.every((v) => typeof v === "string")) {
19357
+ metaObj[key] = val;
19358
+ } else if (val instanceof Date) {
19359
+ metaObj[key] = val.getTime();
19360
+ }
19361
+ }
19362
+ if (Object.keys(metaObj).length > 0) {
19363
+ mdMeta = metaObj;
19364
+ }
19365
+ }
18679
19366
  const content = parsed.content;
18680
19367
  const normalized = normalizeMarkdown(content);
18681
19368
  if (!normalizeText(normalized)) {
@@ -18690,6 +19377,7 @@ function extractFromMarkdown(url, markdown, title) {
18690
19377
  fmKeywords = frontmatter.keywords.split(",").map((k) => k.trim()).filter(Boolean);
18691
19378
  }
18692
19379
  if (fmKeywords && fmKeywords.length === 0) fmKeywords = void 0;
19380
+ const publishedAt = extractPublishedAtFromFrontmatter(frontmatter);
18693
19381
  return {
18694
19382
  url: normalizeUrlPath(url),
18695
19383
  title: resolvedTitle,
@@ -18699,7 +19387,9 @@ function extractFromMarkdown(url, markdown, title) {
18699
19387
  tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
18700
19388
  description: fmDescription,
18701
19389
  keywords: fmKeywords,
18702
- weight: mdWeight
19390
+ weight: mdWeight,
19391
+ publishedAt,
19392
+ meta: mdMeta
18703
19393
  };
18704
19394
  }
18705
19395
  function segmentToRegex(segment) {
@@ -18894,7 +19584,7 @@ async function parseManifest(cwd, outputDir) {
18894
19584
  const manifestPath = path.resolve(cwd, outputDir, "server", "manifest-full.js");
18895
19585
  let content;
18896
19586
  try {
18897
- content = await fs3.readFile(manifestPath, "utf8");
19587
+ content = await fs8.readFile(manifestPath, "utf8");
18898
19588
  } catch {
18899
19589
  throw new SearchSocketError(
18900
19590
  "BUILD_MANIFEST_NOT_FOUND",
@@ -19205,13 +19895,132 @@ function filePathToUrl(filePath, baseDir) {
19205
19895
  const noExt = relative.replace(/\.md$/i, "").replace(/\/index$/i, "");
19206
19896
  return normalizeUrlPath(noExt || "/");
19207
19897
  }
19208
- function normalizeSvelteToMarkdown(source) {
19209
- return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
19898
+ var ROUTE_FILE_RE = /\+(page|layout|error)(@[^.]+)?\.svelte$/;
19899
+ function isSvelteComponentFile(filePath) {
19900
+ if (!filePath.endsWith(".svelte")) return false;
19901
+ return !ROUTE_FILE_RE.test(filePath);
19210
19902
  }
19211
- async function loadContentFilesPages(cwd, config, maxPages) {
19212
- const contentConfig = config.source.contentFiles;
19213
- if (!contentConfig) {
19214
- throw new Error("content-files config is missing");
19903
+ function extractSvelteComponentMeta(source) {
19904
+ const componentMatch = source.match(/<!--\s*@component\s*([\s\S]*?)\s*-->/);
19905
+ const description = componentMatch?.[1]?.trim() || void 0;
19906
+ const propsMatch = source.match(
19907
+ /let\s+\{([\s\S]*?)\}\s*(?::\s*([^=;{][\s\S]*?))?\s*=\s*\$props\(\)/
19908
+ );
19909
+ const props = [];
19910
+ if (propsMatch) {
19911
+ const destructureBlock = propsMatch[1];
19912
+ const typeAnnotation = propsMatch[2]?.trim();
19913
+ let resolvedTypeMap;
19914
+ if (typeAnnotation && /^[A-Z]\w*$/.test(typeAnnotation)) {
19915
+ resolvedTypeMap = resolveTypeReference(source, typeAnnotation);
19916
+ } else if (typeAnnotation && typeAnnotation.startsWith("{")) {
19917
+ resolvedTypeMap = parseInlineTypeAnnotation(typeAnnotation);
19918
+ }
19919
+ const propEntries = splitDestructureBlock(destructureBlock);
19920
+ for (const entry of propEntries) {
19921
+ const trimmed = entry.trim();
19922
+ if (!trimmed || trimmed.startsWith("...")) continue;
19923
+ let propName;
19924
+ let defaultValue;
19925
+ const renameMatch = trimmed.match(/^(\w+)\s*:\s*\w+\s*(?:=\s*([\s\S]+))?$/);
19926
+ if (renameMatch) {
19927
+ propName = renameMatch[1];
19928
+ defaultValue = renameMatch[2]?.trim();
19929
+ } else {
19930
+ const defaultMatch = trimmed.match(/^(\w+)\s*=\s*([\s\S]+)$/);
19931
+ if (defaultMatch) {
19932
+ propName = defaultMatch[1];
19933
+ defaultValue = defaultMatch[2]?.trim();
19934
+ } else {
19935
+ propName = trimmed.match(/^(\w+)/)?.[1] ?? trimmed;
19936
+ }
19937
+ }
19938
+ const propType = resolvedTypeMap?.get(propName);
19939
+ props.push({
19940
+ name: propName,
19941
+ ...propType ? { type: propType } : {},
19942
+ ...defaultValue ? { default: defaultValue } : {}
19943
+ });
19944
+ }
19945
+ }
19946
+ return { description, props };
19947
+ }
19948
+ function splitDestructureBlock(block) {
19949
+ const entries = [];
19950
+ let depth = 0;
19951
+ let current = "";
19952
+ for (const ch of block) {
19953
+ if (ch === "{" || ch === "[" || ch === "(") {
19954
+ depth++;
19955
+ current += ch;
19956
+ } else if (ch === "}" || ch === "]" || ch === ")") {
19957
+ depth--;
19958
+ current += ch;
19959
+ } else if (ch === "," && depth === 0) {
19960
+ entries.push(current);
19961
+ current = "";
19962
+ } else {
19963
+ current += ch;
19964
+ }
19965
+ }
19966
+ if (current.trim()) entries.push(current);
19967
+ return entries;
19968
+ }
19969
+ function resolveTypeReference(source, typeName) {
19970
+ const startRe = new RegExp(`(?:interface\\s+${typeName}\\s*|type\\s+${typeName}\\s*=\\s*)\\{`);
19971
+ const startMatch = source.match(startRe);
19972
+ if (!startMatch || startMatch.index === void 0) return void 0;
19973
+ const bodyStart = startMatch.index + startMatch[0].length;
19974
+ let depth = 1;
19975
+ let i = bodyStart;
19976
+ while (i < source.length && depth > 0) {
19977
+ if (source[i] === "{") depth++;
19978
+ else if (source[i] === "}") depth--;
19979
+ i++;
19980
+ }
19981
+ if (depth !== 0) return void 0;
19982
+ const body = source.slice(bodyStart, i - 1);
19983
+ return parseTypeMembers(body);
19984
+ }
19985
+ function parseInlineTypeAnnotation(annotation) {
19986
+ const inner = annotation.replace(/^\{/, "").replace(/\}$/, "");
19987
+ return parseTypeMembers(inner);
19988
+ }
19989
+ function parseTypeMembers(body) {
19990
+ const map = /* @__PURE__ */ new Map();
19991
+ const members = body.split(/[;\n]/).map((m) => m.trim()).filter(Boolean);
19992
+ for (const member of members) {
19993
+ const memberMatch = member.match(/^(\w+)\??\s*:\s*(.+)$/);
19994
+ if (memberMatch) {
19995
+ map.set(memberMatch[1], memberMatch[2].replace(/,\s*$/, "").trim());
19996
+ }
19997
+ }
19998
+ return map;
19999
+ }
20000
+ function buildComponentMarkdown(componentName, meta) {
20001
+ if (!meta.description && meta.props.length === 0) return "";
20002
+ const parts = [`${componentName} component.`];
20003
+ if (meta.description) {
20004
+ parts.push(meta.description);
20005
+ }
20006
+ if (meta.props.length > 0) {
20007
+ const propEntries = meta.props.map((p) => {
20008
+ let entry = p.name;
20009
+ if (p.type) entry += ` (${p.type})`;
20010
+ if (p.default) entry += ` default: ${p.default}`;
20011
+ return entry;
20012
+ });
20013
+ parts.push(`Props: ${propEntries.join(", ")}.`);
20014
+ }
20015
+ return parts.join(" ");
20016
+ }
20017
+ function normalizeSvelteToMarkdown(source) {
20018
+ return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
20019
+ }
20020
+ async function loadContentFilesPages(cwd, config, maxPages) {
20021
+ const contentConfig = config.source.contentFiles;
20022
+ if (!contentConfig) {
20023
+ throw new Error("content-files config is missing");
19215
20024
  }
19216
20025
  const baseDir = path.resolve(cwd, contentConfig.baseDir);
19217
20026
  const files = await fg(contentConfig.globs, {
@@ -19223,13 +20032,27 @@ async function loadContentFilesPages(cwd, config, maxPages) {
19223
20032
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
19224
20033
  const pages = [];
19225
20034
  for (const filePath of selected) {
19226
- const raw = await fs3.readFile(filePath, "utf8");
19227
- const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
20035
+ const raw = await fs8.readFile(filePath, "utf8");
20036
+ let markdown;
20037
+ let tags;
20038
+ if (filePath.endsWith(".md")) {
20039
+ markdown = raw;
20040
+ } else if (isSvelteComponentFile(filePath)) {
20041
+ const componentName = path.basename(filePath, ".svelte");
20042
+ const meta = extractSvelteComponentMeta(raw);
20043
+ const componentMarkdown = buildComponentMarkdown(componentName, meta);
20044
+ const templateContent = normalizeSvelteToMarkdown(raw);
20045
+ markdown = componentMarkdown ? [componentMarkdown, templateContent].filter(Boolean).join("\n\n") : templateContent;
20046
+ tags = ["component"];
20047
+ } else {
20048
+ markdown = normalizeSvelteToMarkdown(raw);
20049
+ }
19228
20050
  pages.push({
19229
20051
  url: filePathToUrl(filePath, baseDir),
19230
20052
  markdown,
19231
20053
  sourcePath: path.relative(cwd, filePath).replace(/\\/g, "/"),
19232
- outgoingLinks: []
20054
+ outgoingLinks: [],
20055
+ ...tags ? { tags } : {}
19233
20056
  });
19234
20057
  }
19235
20058
  return pages;
@@ -19359,7 +20182,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
19359
20182
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
19360
20183
  const pages = [];
19361
20184
  for (const filePath of selected) {
19362
- const html = await fs3.readFile(filePath, "utf8");
20185
+ const html = await fs8.readFile(filePath, "utf8");
19363
20186
  pages.push({
19364
20187
  url: staticHtmlFileToUrl(filePath, outputDir),
19365
20188
  html,
@@ -19422,7 +20245,7 @@ function isBlockedByRobots(urlPath, rules3) {
19422
20245
  }
19423
20246
  async function loadRobotsTxtFromDir(dir) {
19424
20247
  try {
19425
- const content = await fs3.readFile(path.join(dir, "robots.txt"), "utf8");
20248
+ const content = await fs8.readFile(path.join(dir, "robots.txt"), "utf8");
19426
20249
  return parseRobotsTxt(content);
19427
20250
  } catch {
19428
20251
  return null;
@@ -19450,29 +20273,65 @@ function nonNegativeOrZero(value) {
19450
20273
  function normalizeForTitleMatch(text) {
19451
20274
  return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
19452
20275
  }
19453
- function rankHits(hits, config, query) {
20276
+ function rankHits(hits, config, query, debug) {
19454
20277
  const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
19455
20278
  const titleMatchWeight = config.ranking.weights.titleMatch;
19456
20279
  return hits.map((hit) => {
19457
- let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20280
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20281
+ let score = baseScore;
20282
+ let incomingLinkBoostValue = 0;
19458
20283
  if (config.ranking.enableIncomingLinkBoost) {
19459
20284
  const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
19460
- score += incomingBoost * config.ranking.weights.incomingLinks;
20285
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
20286
+ score += incomingLinkBoostValue;
19461
20287
  }
20288
+ let depthBoostValue = 0;
19462
20289
  if (config.ranking.enableDepthBoost) {
19463
20290
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
19464
- score += depthBoost * config.ranking.weights.depth;
20291
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
20292
+ score += depthBoostValue;
19465
20293
  }
20294
+ let titleMatchBoostValue = 0;
19466
20295
  if (normalizedQuery && titleMatchWeight > 0) {
19467
20296
  const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
19468
20297
  if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
19469
- score += titleMatchWeight;
20298
+ titleMatchBoostValue = titleMatchWeight;
20299
+ score += titleMatchBoostValue;
19470
20300
  }
19471
20301
  }
19472
- return {
20302
+ let freshnessBoostValue = 0;
20303
+ if (config.ranking.enableFreshnessBoost) {
20304
+ const publishedAt = hit.metadata.publishedAt;
20305
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
20306
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
20307
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
20308
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
20309
+ score += freshnessBoostValue;
20310
+ }
20311
+ }
20312
+ let anchorTextMatchBoostValue = 0;
20313
+ if (config.ranking.enableAnchorTextBoost && normalizedQuery && config.ranking.weights.anchorText > 0) {
20314
+ const normalizedAnchorText = normalizeForTitleMatch(hit.metadata.incomingAnchorText ?? "");
20315
+ if (normalizedAnchorText.length > 0 && normalizedQuery.length > 0 && (normalizedAnchorText.includes(normalizedQuery) || normalizedQuery.includes(normalizedAnchorText))) {
20316
+ anchorTextMatchBoostValue = config.ranking.weights.anchorText;
20317
+ score += anchorTextMatchBoostValue;
20318
+ }
20319
+ }
20320
+ const result = {
19473
20321
  hit,
19474
20322
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
19475
20323
  };
20324
+ if (debug) {
20325
+ result.breakdown = {
20326
+ baseScore,
20327
+ incomingLinkBoost: incomingLinkBoostValue,
20328
+ depthBoost: depthBoostValue,
20329
+ titleMatchBoost: titleMatchBoostValue,
20330
+ freshnessBoost: freshnessBoostValue,
20331
+ anchorTextMatchBoost: anchorTextMatchBoostValue
20332
+ };
20333
+ }
20334
+ return result;
19476
20335
  }).sort((a, b) => {
19477
20336
  const delta = b.finalScore - a.finalScore;
19478
20337
  return Number.isNaN(delta) ? 0 : delta;
@@ -19481,12 +20340,13 @@ function rankHits(hits, config, query) {
19481
20340
  function trimByScoreGap(results, config) {
19482
20341
  if (results.length === 0) return results;
19483
20342
  const threshold = config.ranking.scoreGapThreshold;
19484
- const minScore = config.ranking.minScore;
19485
- if (minScore > 0 && results.length > 0) {
19486
- const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
19487
- const mid = Math.floor(sortedScores.length / 2);
19488
- const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
19489
- if (median < minScore) return [];
20343
+ const minScoreRatio = config.ranking.minScoreRatio;
20344
+ if (minScoreRatio > 0 && results.length > 0) {
20345
+ const topScore = results[0].pageScore;
20346
+ if (Number.isFinite(topScore) && topScore > 0) {
20347
+ const minThreshold = topScore * minScoreRatio;
20348
+ results = results.filter((r) => r.pageScore >= minThreshold);
20349
+ }
19490
20350
  }
19491
20351
  if (threshold > 0 && results.length > 1) {
19492
20352
  for (let i = 1; i < results.length; i++) {
@@ -19556,61 +20416,99 @@ function aggregateByPage(ranked, config) {
19556
20416
  return Number.isNaN(delta) ? 0 : delta;
19557
20417
  });
19558
20418
  }
19559
- function mergePageAndChunkResults(pageHits, rankedChunks, config) {
19560
- if (pageHits.length === 0) return rankedChunks;
19561
- const w = config.search.pageSearchWeight;
19562
- const pageScoreMap = /* @__PURE__ */ new Map();
19563
- for (const ph of pageHits) {
19564
- pageScoreMap.set(ph.url, ph);
19565
- }
19566
- const pagesWithChunks = /* @__PURE__ */ new Set();
19567
- const merged = rankedChunks.map((ranked) => {
19568
- const url = ranked.hit.metadata.url;
19569
- const pageHit = pageScoreMap.get(url);
19570
- if (pageHit) {
19571
- pagesWithChunks.add(url);
19572
- const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
19573
- return {
19574
- hit: ranked.hit,
19575
- finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
19576
- };
20419
+ function rankPageHits(pageHits, config, query, debug) {
20420
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
20421
+ const titleMatchWeight = config.ranking.weights.titleMatch;
20422
+ return pageHits.map((hit) => {
20423
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20424
+ let score = baseScore;
20425
+ let incomingLinkBoostValue = 0;
20426
+ if (config.ranking.enableIncomingLinkBoost) {
20427
+ const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.incomingLinks));
20428
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
20429
+ score += incomingLinkBoostValue;
19577
20430
  }
19578
- return ranked;
19579
- });
19580
- for (const [url, pageHit] of pageScoreMap) {
19581
- if (pagesWithChunks.has(url)) continue;
19582
- const syntheticScore = pageHit.score * w;
19583
- const syntheticHit = {
19584
- id: `page:${url}`,
19585
- score: pageHit.score,
19586
- metadata: {
19587
- projectId: "",
19588
- scopeName: "",
19589
- url: pageHit.url,
19590
- path: pageHit.url,
19591
- title: pageHit.title,
19592
- sectionTitle: "",
19593
- headingPath: [],
19594
- snippet: pageHit.description || pageHit.title,
19595
- chunkText: pageHit.description || pageHit.title,
19596
- ordinal: 0,
19597
- contentHash: "",
19598
- depth: pageHit.depth,
19599
- incomingLinks: pageHit.incomingLinks,
19600
- routeFile: pageHit.routeFile,
19601
- tags: pageHit.tags
20431
+ let depthBoostValue = 0;
20432
+ if (config.ranking.enableDepthBoost) {
20433
+ const depthBoost = 1 / (1 + nonNegativeOrZero(hit.depth));
20434
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
20435
+ score += depthBoostValue;
20436
+ }
20437
+ let titleMatchBoostValue = 0;
20438
+ if (normalizedQuery && titleMatchWeight > 0) {
20439
+ const normalizedTitle = normalizeForTitleMatch(hit.title);
20440
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
20441
+ titleMatchBoostValue = titleMatchWeight;
20442
+ score += titleMatchBoostValue;
20443
+ }
20444
+ }
20445
+ let freshnessBoostValue = 0;
20446
+ if (config.ranking.enableFreshnessBoost) {
20447
+ const publishedAt = hit.publishedAt;
20448
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
20449
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
20450
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
20451
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
20452
+ score += freshnessBoostValue;
19602
20453
  }
20454
+ }
20455
+ const pageWeight = findPageWeight(hit.url, config.ranking.pageWeights);
20456
+ if (pageWeight !== 1) {
20457
+ score *= pageWeight;
20458
+ }
20459
+ const result = {
20460
+ url: hit.url,
20461
+ title: hit.title,
20462
+ description: hit.description,
20463
+ routeFile: hit.routeFile,
20464
+ depth: hit.depth,
20465
+ incomingLinks: hit.incomingLinks,
20466
+ tags: hit.tags,
20467
+ baseScore,
20468
+ finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY,
20469
+ publishedAt: hit.publishedAt
19603
20470
  };
19604
- merged.push({
19605
- hit: syntheticHit,
19606
- finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
19607
- });
19608
- }
19609
- return merged.sort((a, b) => {
20471
+ if (debug) {
20472
+ result.breakdown = {
20473
+ baseScore,
20474
+ pageWeight,
20475
+ incomingLinkBoost: incomingLinkBoostValue,
20476
+ depthBoost: depthBoostValue,
20477
+ titleMatchBoost: titleMatchBoostValue,
20478
+ freshnessBoost: freshnessBoostValue
20479
+ };
20480
+ }
20481
+ return result;
20482
+ }).filter((p) => findPageWeight(p.url, config.ranking.pageWeights) !== 0).sort((a, b) => {
19610
20483
  const delta = b.finalScore - a.finalScore;
19611
20484
  return Number.isNaN(delta) ? 0 : delta;
19612
20485
  });
19613
20486
  }
20487
+ function trimPagesByScoreGap(results, config) {
20488
+ if (results.length === 0) return results;
20489
+ const threshold = config.ranking.scoreGapThreshold;
20490
+ const minScoreRatio = config.ranking.minScoreRatio;
20491
+ if (minScoreRatio > 0 && results.length > 0) {
20492
+ const topScore = results[0].finalScore;
20493
+ if (Number.isFinite(topScore) && topScore > 0) {
20494
+ const minThreshold = topScore * minScoreRatio;
20495
+ results = results.filter((r) => r.finalScore >= minThreshold);
20496
+ }
20497
+ }
20498
+ if (threshold > 0 && results.length > 1) {
20499
+ for (let i = 1; i < results.length; i++) {
20500
+ const prev = results[i - 1].finalScore;
20501
+ const current = results[i].finalScore;
20502
+ if (prev > 0) {
20503
+ const gap = (prev - current) / prev;
20504
+ if (gap >= threshold) {
20505
+ return results.slice(0, i);
20506
+ }
20507
+ }
20508
+ }
20509
+ }
20510
+ return results;
20511
+ }
19614
20512
 
19615
20513
  // src/utils/time.ts
19616
20514
  function nowIso() {
@@ -19619,6 +20517,81 @@ function nowIso() {
19619
20517
  function hrTimeMs(start) {
19620
20518
  return Number(process.hrtime.bigint() - start) / 1e6;
19621
20519
  }
20520
+ function resolvePageUrl(pageUrl, baseUrl) {
20521
+ if (!baseUrl) return pageUrl;
20522
+ try {
20523
+ return new URL(pageUrl, baseUrl).href;
20524
+ } catch {
20525
+ return pageUrl;
20526
+ }
20527
+ }
20528
+ function generateLlmsTxt(pages, config) {
20529
+ const title = config.llmsTxt.title ?? config.project.id;
20530
+ const description = config.llmsTxt.description;
20531
+ const baseUrl = config.project.baseUrl;
20532
+ const lines = [`# ${title}`];
20533
+ if (description) {
20534
+ lines.push("", `> ${description}`);
20535
+ }
20536
+ const filtered = pages.filter(
20537
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
20538
+ );
20539
+ const sorted = [...filtered].sort((a, b) => {
20540
+ if (a.depth !== b.depth) return a.depth - b.depth;
20541
+ return b.incomingLinks - a.incomingLinks;
20542
+ });
20543
+ if (sorted.length > 0) {
20544
+ lines.push("", "## Pages", "");
20545
+ for (const page of sorted) {
20546
+ const url = resolvePageUrl(page.url, baseUrl);
20547
+ if (page.description) {
20548
+ lines.push(`- [${page.title}](${url}): ${page.description}`);
20549
+ } else {
20550
+ lines.push(`- [${page.title}](${url})`);
20551
+ }
20552
+ }
20553
+ }
20554
+ lines.push("");
20555
+ return lines.join("\n");
20556
+ }
20557
+ function generateLlmsFullTxt(pages, config) {
20558
+ const title = config.llmsTxt.title ?? config.project.id;
20559
+ const description = config.llmsTxt.description;
20560
+ const baseUrl = config.project.baseUrl;
20561
+ const lines = [`# ${title}`];
20562
+ if (description) {
20563
+ lines.push("", `> ${description}`);
20564
+ }
20565
+ const filtered = pages.filter(
20566
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
20567
+ );
20568
+ const sorted = [...filtered].sort((a, b) => {
20569
+ if (a.depth !== b.depth) return a.depth - b.depth;
20570
+ return b.incomingLinks - a.incomingLinks;
20571
+ });
20572
+ for (const page of sorted) {
20573
+ const url = resolvePageUrl(page.url, baseUrl);
20574
+ lines.push("", "---", "", `## [${page.title}](${url})`, "");
20575
+ lines.push(page.markdown.trim());
20576
+ }
20577
+ lines.push("");
20578
+ return lines.join("\n");
20579
+ }
20580
+ async function writeLlmsTxt(pages, config, cwd, logger3) {
20581
+ const outputPath = path.resolve(cwd, config.llmsTxt.outputPath);
20582
+ const outputDir = path.dirname(outputPath);
20583
+ await fs8.mkdir(outputDir, { recursive: true });
20584
+ const content = generateLlmsTxt(pages, config);
20585
+ await fs8.writeFile(outputPath, content, "utf8");
20586
+ logger3.info(`Generated llms.txt at ${config.llmsTxt.outputPath}`);
20587
+ if (config.llmsTxt.generateFull) {
20588
+ const fullPath = outputPath.replace(/\.txt$/, "-full.txt");
20589
+ const fullContent = generateLlmsFullTxt(pages, config);
20590
+ await fs8.writeFile(fullPath, fullContent, "utf8");
20591
+ const relativeFull = path.relative(cwd, fullPath);
20592
+ logger3.info(`Generated llms-full.txt at ${relativeFull}`);
20593
+ }
20594
+ }
19622
20595
 
19623
20596
  // src/indexing/pipeline.ts
19624
20597
  function buildPageSummary(page, maxChars = 3500) {
@@ -19637,16 +20610,33 @@ function buildPageSummary(page, maxChars = 3500) {
19637
20610
  if (joined.length <= maxChars) return joined;
19638
20611
  return joined.slice(0, maxChars).trim();
19639
20612
  }
20613
+ function buildPageContentHash(page) {
20614
+ const parts = [
20615
+ page.title,
20616
+ page.description ?? "",
20617
+ (page.keywords ?? []).slice().sort().join(","),
20618
+ page.tags.slice().sort().join(","),
20619
+ page.markdown,
20620
+ String(page.outgoingLinks),
20621
+ String(page.publishedAt ?? ""),
20622
+ page.incomingAnchorText ?? "",
20623
+ (page.outgoingLinkUrls ?? []).slice().sort().join(","),
20624
+ page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : ""
20625
+ ];
20626
+ return sha256(parts.join("|"));
20627
+ }
19640
20628
  var IndexPipeline = class _IndexPipeline {
19641
20629
  cwd;
19642
20630
  config;
19643
20631
  store;
19644
20632
  logger;
20633
+ hooks;
19645
20634
  constructor(options) {
19646
20635
  this.cwd = options.cwd;
19647
20636
  this.config = options.config;
19648
20637
  this.store = options.store;
19649
20638
  this.logger = options.logger;
20639
+ this.hooks = options.hooks;
19650
20640
  }
19651
20641
  static async create(options = {}) {
19652
20642
  const cwd = path.resolve(options.cwd ?? process.cwd());
@@ -19656,7 +20646,8 @@ var IndexPipeline = class _IndexPipeline {
19656
20646
  cwd,
19657
20647
  config,
19658
20648
  store,
19659
- logger: options.logger ?? new Logger()
20649
+ logger: options.logger ?? new Logger(),
20650
+ hooks: options.hooks ?? {}
19660
20651
  });
19661
20652
  }
19662
20653
  getConfig() {
@@ -19677,7 +20668,7 @@ var IndexPipeline = class _IndexPipeline {
19677
20668
  const scope = resolveScope(this.config, options.scopeOverride);
19678
20669
  ensureStateDirs(this.cwd, this.config.state.dir);
19679
20670
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
19680
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
20671
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-vector)`);
19681
20672
  if (options.force) {
19682
20673
  this.logger.info("Force mode enabled \u2014 full rebuild");
19683
20674
  }
@@ -19685,9 +20676,9 @@ var IndexPipeline = class _IndexPipeline {
19685
20676
  this.logger.info("Dry run \u2014 no writes will be performed");
19686
20677
  }
19687
20678
  const manifestStart = stageStart();
19688
- const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
20679
+ const existingPageHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getPageHashes(scope);
19689
20680
  stageEnd("manifest", manifestStart);
19690
- this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
20681
+ this.logger.debug(`Manifest: ${existingPageHashes.size} existing page hashes loaded`);
19691
20682
  const sourceStart = stageStart();
19692
20683
  this.logger.info(`Loading pages (source: ${sourceMode})...`);
19693
20684
  let sourcePages;
@@ -19764,11 +20755,61 @@ var IndexPipeline = class _IndexPipeline {
19764
20755
  );
19765
20756
  continue;
19766
20757
  }
19767
- extractedPages.push(extracted);
20758
+ if (sourcePage.tags && sourcePage.tags.length > 0) {
20759
+ extracted.tags = [.../* @__PURE__ */ new Set([...extracted.tags, ...sourcePage.tags])];
20760
+ }
20761
+ let accepted;
20762
+ if (this.hooks.transformPage) {
20763
+ const transformed = await this.hooks.transformPage(extracted);
20764
+ if (transformed === null) {
20765
+ this.logger.debug(`Page ${sourcePage.url} skipped by transformPage hook`);
20766
+ continue;
20767
+ }
20768
+ accepted = transformed;
20769
+ } else {
20770
+ accepted = extracted;
20771
+ }
20772
+ extractedPages.push(accepted);
19768
20773
  this.logger.event("page_extracted", {
19769
- url: extracted.url
20774
+ url: accepted.url
19770
20775
  });
19771
20776
  }
20777
+ const customRecords = options.customRecords ?? [];
20778
+ if (customRecords.length > 0) {
20779
+ this.logger.info(`Processing ${customRecords.length} custom record${customRecords.length === 1 ? "" : "s"}...`);
20780
+ for (const record of customRecords) {
20781
+ const normalizedUrl = normalizeUrlPath(record.url);
20782
+ const normalized = normalizeMarkdown(record.content);
20783
+ if (!normalized.trim()) {
20784
+ this.logger.warn(`Custom record ${normalizedUrl} has empty content and was skipped.`);
20785
+ continue;
20786
+ }
20787
+ const urlTags = normalizedUrl.split("/").filter(Boolean).slice(0, 1);
20788
+ const tags = record.tags ? [.../* @__PURE__ */ new Set([...urlTags, ...record.tags])] : urlTags;
20789
+ const extracted = {
20790
+ url: normalizedUrl,
20791
+ title: record.title,
20792
+ markdown: normalized,
20793
+ outgoingLinks: [],
20794
+ noindex: false,
20795
+ tags,
20796
+ weight: record.weight
20797
+ };
20798
+ let accepted;
20799
+ if (this.hooks.transformPage) {
20800
+ const transformed = await this.hooks.transformPage(extracted);
20801
+ if (transformed === null) {
20802
+ this.logger.debug(`Custom record ${normalizedUrl} skipped by transformPage hook`);
20803
+ continue;
20804
+ }
20805
+ accepted = transformed;
20806
+ } else {
20807
+ accepted = extracted;
20808
+ }
20809
+ extractedPages.push(accepted);
20810
+ this.logger.event("page_extracted", { url: accepted.url, custom: true });
20811
+ }
20812
+ }
19772
20813
  extractedPages.sort((a, b) => a.url.localeCompare(b.url));
19773
20814
  const uniquePages = [];
19774
20815
  const seenUrls = /* @__PURE__ */ new Set();
@@ -19801,15 +20842,28 @@ var IndexPipeline = class _IndexPipeline {
19801
20842
  const linkStart = stageStart();
19802
20843
  const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
19803
20844
  const incomingLinkCount = /* @__PURE__ */ new Map();
20845
+ const incomingAnchorTexts = /* @__PURE__ */ new Map();
19804
20846
  for (const page of indexablePages) {
19805
20847
  incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
19806
20848
  }
19807
20849
  for (const page of indexablePages) {
19808
- for (const outgoing of page.outgoingLinks) {
20850
+ const seenForCount = /* @__PURE__ */ new Set();
20851
+ const seenForAnchor = /* @__PURE__ */ new Set();
20852
+ for (const { url: outgoing, anchorText } of page.outgoingLinks) {
19809
20853
  if (!pageSet.has(outgoing)) {
19810
20854
  continue;
19811
20855
  }
19812
- incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
20856
+ if (!seenForCount.has(outgoing)) {
20857
+ seenForCount.add(outgoing);
20858
+ incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
20859
+ }
20860
+ if (anchorText && !seenForAnchor.has(outgoing)) {
20861
+ seenForAnchor.add(outgoing);
20862
+ if (!incomingAnchorTexts.has(outgoing)) {
20863
+ incomingAnchorTexts.set(outgoing, /* @__PURE__ */ new Set());
20864
+ }
20865
+ incomingAnchorTexts.get(outgoing).add(anchorText);
20866
+ }
19813
20867
  }
19814
20868
  }
19815
20869
  stageEnd("links", linkStart);
@@ -19828,6 +20882,15 @@ var IndexPipeline = class _IndexPipeline {
19828
20882
  });
19829
20883
  }
19830
20884
  }
20885
+ for (const record of customRecords) {
20886
+ const normalizedUrl = normalizeUrlPath(record.url);
20887
+ if (!precomputedRoutes.has(normalizedUrl)) {
20888
+ precomputedRoutes.set(normalizedUrl, {
20889
+ routeFile: "",
20890
+ routeResolution: "exact"
20891
+ });
20892
+ }
20893
+ }
19831
20894
  for (const page of indexablePages) {
19832
20895
  const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
19833
20896
  if (routeMatch.routeResolution === "best-effort") {
@@ -19845,6 +20908,17 @@ var IndexPipeline = class _IndexPipeline {
19845
20908
  } else {
19846
20909
  routeExact += 1;
19847
20910
  }
20911
+ const anchorSet = incomingAnchorTexts.get(page.url);
20912
+ let incomingAnchorText;
20913
+ if (anchorSet && anchorSet.size > 0) {
20914
+ let joined = "";
20915
+ for (const phrase of anchorSet) {
20916
+ const next2 = joined ? `${joined} ${phrase}` : phrase;
20917
+ if (next2.length > 500) break;
20918
+ joined = next2;
20919
+ }
20920
+ incomingAnchorText = joined || void 0;
20921
+ }
19848
20922
  const indexedPage = {
19849
20923
  url: page.url,
19850
20924
  title: page.title,
@@ -19854,40 +20928,113 @@ var IndexPipeline = class _IndexPipeline {
19854
20928
  generatedAt: nowIso(),
19855
20929
  incomingLinks: incomingLinkCount.get(page.url) ?? 0,
19856
20930
  outgoingLinks: page.outgoingLinks.length,
20931
+ outgoingLinkUrls: page.outgoingLinks.map((l) => typeof l === "string" ? l : l.url),
19857
20932
  depth: getUrlDepth(page.url),
19858
20933
  tags: page.tags,
19859
20934
  markdown: page.markdown,
19860
20935
  description: page.description,
19861
- keywords: page.keywords
20936
+ keywords: page.keywords,
20937
+ publishedAt: page.publishedAt,
20938
+ incomingAnchorText,
20939
+ meta: page.meta
19862
20940
  };
19863
20941
  pages.push(indexedPage);
19864
20942
  this.logger.event("page_indexed", { url: page.url });
19865
20943
  }
20944
+ const pageRecords = pages.map((p) => {
20945
+ const summary = buildPageSummary(p);
20946
+ return {
20947
+ url: p.url,
20948
+ title: p.title,
20949
+ markdown: p.markdown,
20950
+ projectId: scope.projectId,
20951
+ scopeName: scope.scopeName,
20952
+ routeFile: p.routeFile,
20953
+ routeResolution: p.routeResolution,
20954
+ incomingLinks: p.incomingLinks,
20955
+ outgoingLinks: p.outgoingLinks,
20956
+ outgoingLinkUrls: p.outgoingLinkUrls,
20957
+ depth: p.depth,
20958
+ tags: p.tags,
20959
+ indexedAt: p.generatedAt,
20960
+ summary,
20961
+ description: p.description,
20962
+ keywords: p.keywords,
20963
+ contentHash: buildPageContentHash(p),
20964
+ publishedAt: p.publishedAt,
20965
+ meta: p.meta
20966
+ };
20967
+ });
20968
+ const currentPageUrls = new Set(pageRecords.map((r) => r.url));
20969
+ const changedPages = pageRecords.filter(
20970
+ (r) => !existingPageHashes.has(r.url) || existingPageHashes.get(r.url) !== r.contentHash
20971
+ );
20972
+ const deletedPageUrls = [...existingPageHashes.keys()].filter((url) => !currentPageUrls.has(url));
19866
20973
  if (!options.dryRun) {
19867
- const pageRecords = pages.map((p) => {
19868
- const summary = buildPageSummary(p);
19869
- return {
19870
- url: p.url,
19871
- title: p.title,
19872
- markdown: p.markdown,
19873
- projectId: scope.projectId,
19874
- scopeName: scope.scopeName,
19875
- routeFile: p.routeFile,
19876
- routeResolution: p.routeResolution,
19877
- incomingLinks: p.incomingLinks,
19878
- outgoingLinks: p.outgoingLinks,
19879
- depth: p.depth,
19880
- tags: p.tags,
19881
- indexedAt: p.generatedAt,
19882
- summary,
19883
- description: p.description,
19884
- keywords: p.keywords
19885
- };
19886
- });
19887
- await this.store.deletePages(scope);
19888
- await this.store.upsertPages(pageRecords, scope);
20974
+ if (options.force) {
20975
+ await this.store.deletePages(scope);
20976
+ this.logger.info(`Upserting ${pageRecords.length} page summaries...`);
20977
+ const pageDocs = pageRecords.map((r) => ({
20978
+ id: r.url,
20979
+ data: r.summary ?? r.title,
20980
+ metadata: {
20981
+ title: r.title,
20982
+ url: r.url,
20983
+ description: r.description ?? "",
20984
+ keywords: r.keywords ?? [],
20985
+ summary: r.summary ?? "",
20986
+ tags: r.tags,
20987
+ markdown: r.markdown,
20988
+ routeFile: r.routeFile,
20989
+ routeResolution: r.routeResolution,
20990
+ incomingLinks: r.incomingLinks,
20991
+ outgoingLinks: r.outgoingLinks,
20992
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
20993
+ depth: r.depth,
20994
+ indexedAt: r.indexedAt,
20995
+ contentHash: r.contentHash ?? "",
20996
+ publishedAt: r.publishedAt ?? null,
20997
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
20998
+ }
20999
+ }));
21000
+ await this.store.upsertPages(pageDocs, scope);
21001
+ } else {
21002
+ if (changedPages.length > 0) {
21003
+ this.logger.info(`Upserting ${changedPages.length} changed page summaries...`);
21004
+ const pageDocs = changedPages.map((r) => ({
21005
+ id: r.url,
21006
+ data: r.summary ?? r.title,
21007
+ metadata: {
21008
+ title: r.title,
21009
+ url: r.url,
21010
+ description: r.description ?? "",
21011
+ keywords: r.keywords ?? [],
21012
+ summary: r.summary ?? "",
21013
+ tags: r.tags,
21014
+ markdown: r.markdown,
21015
+ routeFile: r.routeFile,
21016
+ routeResolution: r.routeResolution,
21017
+ incomingLinks: r.incomingLinks,
21018
+ outgoingLinks: r.outgoingLinks,
21019
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
21020
+ depth: r.depth,
21021
+ indexedAt: r.indexedAt,
21022
+ contentHash: r.contentHash ?? "",
21023
+ publishedAt: r.publishedAt ?? null,
21024
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
21025
+ }
21026
+ }));
21027
+ await this.store.upsertPages(pageDocs, scope);
21028
+ }
21029
+ if (deletedPageUrls.length > 0) {
21030
+ await this.store.deletePagesByIds(deletedPageUrls, scope);
21031
+ }
21032
+ }
19889
21033
  }
21034
+ const pagesChanged = options.force ? pageRecords.length : changedPages.length;
21035
+ const pagesDeleted = deletedPageUrls.length;
19890
21036
  stageEnd("pages", pagesStart);
21037
+ this.logger.info(`Page changes: ${pagesChanged} changed/new, ${pagesDeleted} deleted, ${pageRecords.length - changedPages.length} unchanged`);
19891
21038
  this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
19892
21039
  const chunkStart = stageStart();
19893
21040
  this.logger.info("Chunking pages...");
@@ -19896,6 +21043,18 @@ var IndexPipeline = class _IndexPipeline {
19896
21043
  if (typeof maxChunks === "number") {
19897
21044
  chunks = chunks.slice(0, maxChunks);
19898
21045
  }
21046
+ if (this.hooks.transformChunk) {
21047
+ const transformed = [];
21048
+ for (const chunk of chunks) {
21049
+ const result = await this.hooks.transformChunk(chunk);
21050
+ if (result === null) {
21051
+ this.logger.debug(`Chunk ${chunk.chunkKey} skipped by transformChunk hook`);
21052
+ continue;
21053
+ }
21054
+ transformed.push(result);
21055
+ }
21056
+ chunks = transformed;
21057
+ }
19899
21058
  for (const chunk of chunks) {
19900
21059
  this.logger.event("chunked", {
19901
21060
  url: chunk.url,
@@ -19908,7 +21067,12 @@ var IndexPipeline = class _IndexPipeline {
19908
21067
  for (const chunk of chunks) {
19909
21068
  currentChunkMap.set(chunk.chunkKey, chunk);
19910
21069
  }
19911
- const changedChunks = chunks.filter((chunk) => {
21070
+ const chunkHashStart = stageStart();
21071
+ const currentChunkKeys = chunks.map((c) => c.chunkKey);
21072
+ const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.fetchContentHashesForKeys(currentChunkKeys, scope);
21073
+ stageEnd("chunk_hashes", chunkHashStart);
21074
+ this.logger.debug(`Fetched ${existingHashes.size} existing chunk hashes for ${currentChunkKeys.length} current keys`);
21075
+ let changedChunks = chunks.filter((chunk) => {
19912
21076
  if (options.force) {
19913
21077
  return true;
19914
21078
  }
@@ -19921,37 +21085,45 @@ var IndexPipeline = class _IndexPipeline {
19921
21085
  }
19922
21086
  return existingHash !== chunk.contentHash;
19923
21087
  });
19924
- const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
21088
+ const existingChunkIds = options.force ? /* @__PURE__ */ new Set() : await this.store.scanChunkIds(scope);
21089
+ const deletes = [...existingChunkIds].filter((chunkKey) => !currentChunkMap.has(chunkKey));
21090
+ if (this.hooks.beforeIndex) {
21091
+ changedChunks = await this.hooks.beforeIndex(changedChunks);
21092
+ }
19925
21093
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
19926
21094
  const upsertStart = stageStart();
19927
21095
  let documentsUpserted = 0;
19928
21096
  if (!options.dryRun && changedChunks.length > 0) {
19929
- this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
19930
- const UPSTASH_CONTENT_LIMIT = 4096;
21097
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
19931
21098
  const docs = changedChunks.map((chunk) => {
19932
- const title = chunk.title;
19933
- const sectionTitle = chunk.sectionTitle ?? "";
19934
- const url = chunk.url;
19935
- const tags = chunk.tags.join(",");
19936
- const headingPath = chunk.headingPath.join(" > ");
19937
- const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
19938
- const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
19939
- const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
21099
+ const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
21100
+ if (embeddingText.length > 2e3) {
21101
+ this.logger.warn(
21102
+ `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
21103
+ );
21104
+ }
19940
21105
  return {
19941
21106
  id: chunk.chunkKey,
19942
- content: { title, sectionTitle, text, url, tags, headingPath },
21107
+ data: embeddingText,
19943
21108
  metadata: {
19944
- projectId: scope.projectId,
19945
- scopeName: scope.scopeName,
21109
+ url: chunk.url,
19946
21110
  path: chunk.path,
21111
+ title: chunk.title,
21112
+ sectionTitle: chunk.sectionTitle ?? "",
21113
+ headingPath: chunk.headingPath.join(" > "),
19947
21114
  snippet: chunk.snippet,
21115
+ chunkText: embeddingText,
21116
+ tags: chunk.tags,
19948
21117
  ordinal: chunk.ordinal,
19949
21118
  contentHash: chunk.contentHash,
19950
21119
  depth: chunk.depth,
19951
21120
  incomingLinks: chunk.incomingLinks,
19952
21121
  routeFile: chunk.routeFile,
19953
21122
  description: chunk.description ?? "",
19954
- keywords: (chunk.keywords ?? []).join(",")
21123
+ keywords: chunk.keywords ?? [],
21124
+ publishedAt: chunk.publishedAt ?? null,
21125
+ incomingAnchorText: chunk.incomingAnchorText ?? "",
21126
+ ...chunk.meta && Object.keys(chunk.meta).length > 0 ? { meta: chunk.meta } : {}
19955
21127
  }
19956
21128
  };
19957
21129
  });
@@ -19969,9 +21141,16 @@ var IndexPipeline = class _IndexPipeline {
19969
21141
  } else {
19970
21142
  this.logger.info("No chunks to upsert \u2014 all up to date");
19971
21143
  }
21144
+ if (this.config.llmsTxt.enable && !options.dryRun) {
21145
+ const llmsStart = stageStart();
21146
+ await writeLlmsTxt(pages, this.config, this.cwd, this.logger);
21147
+ stageEnd("llms_txt", llmsStart);
21148
+ }
19972
21149
  this.logger.info("Done.");
19973
- return {
21150
+ const stats = {
19974
21151
  pagesProcessed: pages.length,
21152
+ pagesChanged,
21153
+ pagesDeleted,
19975
21154
  chunksTotal: chunks.length,
19976
21155
  chunksChanged: changedChunks.length,
19977
21156
  documentsUpserted,
@@ -19980,16 +21159,143 @@ var IndexPipeline = class _IndexPipeline {
19980
21159
  routeBestEffort,
19981
21160
  stageTimingsMs
19982
21161
  };
21162
+ if (this.hooks.afterIndex) {
21163
+ await this.hooks.afterIndex(stats);
21164
+ }
21165
+ return stats;
19983
21166
  }
19984
21167
  };
21168
+
21169
+ // src/search/related-pages.ts
21170
+ function diceScore(urlA, urlB) {
21171
+ const segmentsA = urlA.split("/").filter(Boolean);
21172
+ const segmentsB = urlB.split("/").filter(Boolean);
21173
+ if (segmentsA.length === 0 && segmentsB.length === 0) return 1;
21174
+ if (segmentsA.length === 0 || segmentsB.length === 0) return 0;
21175
+ let shared = 0;
21176
+ const minLen = Math.min(segmentsA.length, segmentsB.length);
21177
+ for (let i = 0; i < minLen; i++) {
21178
+ if (segmentsA[i] === segmentsB[i]) {
21179
+ shared++;
21180
+ } else {
21181
+ break;
21182
+ }
21183
+ }
21184
+ return 2 * shared / (segmentsA.length + segmentsB.length);
21185
+ }
21186
+ function compositeScore(isLinked, dice, semantic) {
21187
+ return (isLinked ? 0.5 : 0) + 0.3 * dice + 0.2 * semantic;
21188
+ }
21189
+ function dominantRelationshipType(isOutgoing, isIncoming, dice) {
21190
+ if (isOutgoing) return "outgoing_link";
21191
+ if (isIncoming) return "incoming_link";
21192
+ if (dice > 0.4) return "sibling";
21193
+ return "semantic";
21194
+ }
21195
+
21196
+ // src/search/engine.ts
21197
+ var rankingOverridesSchema = z.object({
21198
+ ranking: z.object({
21199
+ enableIncomingLinkBoost: z.boolean().optional(),
21200
+ enableDepthBoost: z.boolean().optional(),
21201
+ aggregationCap: z.number().int().positive().optional(),
21202
+ aggregationDecay: z.number().min(0).max(1).optional(),
21203
+ minChunkScoreRatio: z.number().min(0).max(1).optional(),
21204
+ minScoreRatio: z.number().min(0).max(1).optional(),
21205
+ scoreGapThreshold: z.number().min(0).max(1).optional(),
21206
+ weights: z.object({
21207
+ incomingLinks: z.number().optional(),
21208
+ depth: z.number().optional(),
21209
+ aggregation: z.number().optional(),
21210
+ titleMatch: z.number().optional()
21211
+ }).optional()
21212
+ }).optional(),
21213
+ search: z.object({
21214
+ pageSearchWeight: z.number().min(0).max(1).optional()
21215
+ }).optional()
21216
+ }).optional();
19985
21217
  var requestSchema = z.object({
19986
21218
  q: z.string().trim().min(1),
19987
21219
  topK: z.number().int().positive().max(100).optional(),
19988
21220
  scope: z.string().optional(),
19989
21221
  pathPrefix: z.string().optional(),
19990
21222
  tags: z.array(z.string()).optional(),
19991
- groupBy: z.enum(["page", "chunk"]).optional()
21223
+ filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
21224
+ groupBy: z.enum(["page", "chunk"]).optional(),
21225
+ maxSubResults: z.number().int().positive().max(20).optional(),
21226
+ debug: z.boolean().optional(),
21227
+ rankingOverrides: rankingOverridesSchema
19992
21228
  });
21229
+ var MAX_SITE_STRUCTURE_PAGES = 2e3;
21230
+ function makeNode(url, depth) {
21231
+ return { url, title: "", depth, routeFile: "", isIndexed: false, childCount: 0, children: [] };
21232
+ }
21233
+ function buildTree(pages, pathPrefix) {
21234
+ const nodeMap = /* @__PURE__ */ new Map();
21235
+ const root2 = makeNode("/", 0);
21236
+ nodeMap.set("/", root2);
21237
+ for (const page of pages) {
21238
+ const normalized = normalizeUrlPath(page.url);
21239
+ const segments = normalized.split("/").filter(Boolean);
21240
+ if (segments.length === 0) {
21241
+ root2.title = page.title;
21242
+ root2.routeFile = page.routeFile;
21243
+ root2.isIndexed = true;
21244
+ continue;
21245
+ }
21246
+ for (let i = 1; i <= segments.length; i++) {
21247
+ const partialUrl = "/" + segments.slice(0, i).join("/");
21248
+ if (!nodeMap.has(partialUrl)) {
21249
+ nodeMap.set(partialUrl, makeNode(partialUrl, i));
21250
+ }
21251
+ }
21252
+ const node = nodeMap.get(normalized);
21253
+ node.title = page.title;
21254
+ node.routeFile = page.routeFile;
21255
+ node.isIndexed = true;
21256
+ }
21257
+ for (const [url, node] of nodeMap) {
21258
+ if (url === "/") continue;
21259
+ const segments = url.split("/").filter(Boolean);
21260
+ const parentUrl = segments.length === 1 ? "/" : "/" + segments.slice(0, -1).join("/");
21261
+ const parent = nodeMap.get(parentUrl) ?? root2;
21262
+ parent.children.push(node);
21263
+ }
21264
+ const sortAndCount = (node) => {
21265
+ node.children.sort((a, b) => a.url.localeCompare(b.url));
21266
+ node.childCount = node.children.length;
21267
+ for (const child of node.children) {
21268
+ sortAndCount(child);
21269
+ }
21270
+ };
21271
+ sortAndCount(root2);
21272
+ if (pathPrefix) {
21273
+ const normalizedPrefix = normalizeUrlPath(pathPrefix);
21274
+ const subtreeRoot = nodeMap.get(normalizedPrefix);
21275
+ if (subtreeRoot) {
21276
+ return subtreeRoot;
21277
+ }
21278
+ return makeNode(normalizedPrefix, normalizedPrefix.split("/").filter(Boolean).length);
21279
+ }
21280
+ return root2;
21281
+ }
21282
+ function mergeRankingOverrides(base, overrides) {
21283
+ return {
21284
+ ...base,
21285
+ search: {
21286
+ ...base.search,
21287
+ ...overrides.search
21288
+ },
21289
+ ranking: {
21290
+ ...base.ranking,
21291
+ ...overrides.ranking,
21292
+ weights: {
21293
+ ...base.ranking.weights,
21294
+ ...overrides.ranking?.weights
21295
+ }
21296
+ }
21297
+ };
21298
+ }
19993
21299
  var SearchEngine = class _SearchEngine {
19994
21300
  cwd;
19995
21301
  config;
@@ -20019,125 +21325,203 @@ var SearchEngine = class _SearchEngine {
20019
21325
  }
20020
21326
  const input = parsed.data;
20021
21327
  const totalStart = process.hrtime.bigint();
21328
+ const effectiveConfig = input.debug && input.rankingOverrides ? mergeRankingOverrides(this.config, input.rankingOverrides) : this.config;
20022
21329
  const resolvedScope = resolveScope(this.config, input.scope);
20023
21330
  const topK = input.topK ?? 10;
21331
+ const maxSubResults = input.maxSubResults ?? 5;
20024
21332
  const groupByPage = (input.groupBy ?? "page") === "page";
20025
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20026
- const filterParts = [];
20027
- if (input.pathPrefix) {
20028
- const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
20029
- filterParts.push(`url GLOB '${prefix}*'`);
20030
- }
20031
- if (input.tags && input.tags.length > 0) {
20032
- for (const tag of input.tags) {
20033
- filterParts.push(`tags GLOB '*${tag}*'`);
21333
+ const queryText = input.q;
21334
+ const pathPrefix = input.pathPrefix ? input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}` : void 0;
21335
+ const filterTags = input.tags && input.tags.length > 0 ? input.tags : void 0;
21336
+ const metaFilterStr = input.filters && Object.keys(input.filters).length > 0 ? buildMetaFilterString(input.filters) : "";
21337
+ const metaFilter = metaFilterStr || void 0;
21338
+ const applyPagePostFilters = (hits) => {
21339
+ let filtered = hits;
21340
+ if (pathPrefix) {
21341
+ filtered = filtered.filter((h) => h.url.startsWith(pathPrefix));
21342
+ }
21343
+ if (filterTags) {
21344
+ filtered = filtered.filter(
21345
+ (h) => filterTags.every((tag) => h.tags.includes(tag))
21346
+ );
20034
21347
  }
20035
- }
20036
- const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
20037
- const useDualSearch = this.config.search.dualSearch && groupByPage;
21348
+ return filtered;
21349
+ };
21350
+ const applyChunkPostFilters = (hits) => {
21351
+ let filtered = hits;
21352
+ if (filterTags) {
21353
+ filtered = filtered.filter(
21354
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
21355
+ );
21356
+ }
21357
+ return filtered;
21358
+ };
20038
21359
  const searchStart = process.hrtime.bigint();
20039
- let ranked;
20040
- if (useDualSearch) {
20041
- const chunkLimit = Math.max(topK * 10, 100);
20042
- const pageLimit = 20;
20043
- const [pageHits, chunkHits] = await Promise.all([
20044
- this.store.searchPages(
20045
- input.q,
20046
- {
20047
- limit: pageLimit,
20048
- semanticWeight: this.config.search.semanticWeight,
20049
- inputEnrichment: this.config.search.inputEnrichment,
20050
- filter
20051
- },
20052
- resolvedScope
20053
- ),
20054
- this.store.search(
20055
- input.q,
20056
- {
20057
- limit: chunkLimit,
20058
- semanticWeight: this.config.search.semanticWeight,
20059
- inputEnrichment: this.config.search.inputEnrichment,
20060
- reranking: false,
20061
- filter
20062
- },
21360
+ if (groupByPage) {
21361
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
21362
+ const pageLimit = Math.max(topK * 2, 20);
21363
+ const pageHits = await this.store.searchPagesByText(
21364
+ queryText,
21365
+ { limit: pageLimit * fetchMultiplier, filter: metaFilter },
21366
+ resolvedScope
21367
+ );
21368
+ const filteredPages = applyPagePostFilters(pageHits);
21369
+ let rankedPages = rankPageHits(filteredPages, effectiveConfig, input.q, input.debug);
21370
+ rankedPages = trimPagesByScoreGap(rankedPages, effectiveConfig);
21371
+ const topPages = rankedPages.slice(0, topK);
21372
+ const chunkPromises = topPages.map(
21373
+ (page) => this.store.searchChunksByUrl(
21374
+ queryText,
21375
+ page.url,
21376
+ { limit: maxSubResults, filter: metaFilter },
20063
21377
  resolvedScope
20064
- )
20065
- ]);
20066
- const rankedChunks = rankHits(chunkHits, this.config, input.q);
20067
- ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
21378
+ ).then((chunks) => applyChunkPostFilters(chunks))
21379
+ );
21380
+ const allChunks = await Promise.all(chunkPromises);
21381
+ const searchMs = hrTimeMs(searchStart);
21382
+ const results = this.buildPageFirstResults(topPages, allChunks, input.q, input.debug, maxSubResults);
21383
+ return {
21384
+ q: input.q,
21385
+ scope: resolvedScope.scopeName,
21386
+ results,
21387
+ meta: {
21388
+ timingsMs: {
21389
+ search: Math.round(searchMs),
21390
+ total: Math.round(hrTimeMs(totalStart))
21391
+ }
21392
+ }
21393
+ };
20068
21394
  } else {
21395
+ const candidateK = Math.max(50, topK);
21396
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
20069
21397
  const hits = await this.store.search(
20070
- input.q,
20071
- {
20072
- limit: candidateK,
20073
- semanticWeight: this.config.search.semanticWeight,
20074
- inputEnrichment: this.config.search.inputEnrichment,
20075
- reranking: this.config.search.reranking,
20076
- filter
20077
- },
21398
+ queryText,
21399
+ { limit: candidateK * fetchMultiplier, filter: metaFilter },
20078
21400
  resolvedScope
20079
21401
  );
20080
- ranked = rankHits(hits, this.config, input.q);
20081
- }
20082
- const searchMs = hrTimeMs(searchStart);
20083
- const results = this.buildResults(ranked, topK, groupByPage, input.q);
20084
- return {
20085
- q: input.q,
20086
- scope: resolvedScope.scopeName,
20087
- results,
20088
- meta: {
20089
- timingsMs: {
20090
- search: Math.round(searchMs),
20091
- total: Math.round(hrTimeMs(totalStart))
21402
+ let filtered = hits;
21403
+ if (pathPrefix) {
21404
+ filtered = filtered.filter((h) => h.metadata.url.startsWith(pathPrefix));
21405
+ }
21406
+ if (filterTags) {
21407
+ filtered = filtered.filter(
21408
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
21409
+ );
21410
+ }
21411
+ const ranked = rankHits(filtered, effectiveConfig, input.q, input.debug);
21412
+ const searchMs = hrTimeMs(searchStart);
21413
+ const results = this.buildResults(ranked, topK, false, maxSubResults, input.q, input.debug, effectiveConfig);
21414
+ return {
21415
+ q: input.q,
21416
+ scope: resolvedScope.scopeName,
21417
+ results,
21418
+ meta: {
21419
+ timingsMs: {
21420
+ search: Math.round(searchMs),
21421
+ total: Math.round(hrTimeMs(totalStart))
21422
+ }
20092
21423
  }
21424
+ };
21425
+ }
21426
+ }
21427
+ buildPageFirstResults(rankedPages, allChunks, query, debug, maxSubResults = 5) {
21428
+ return rankedPages.map((page, i) => {
21429
+ const chunks = allChunks[i] ?? [];
21430
+ const bestChunk = chunks[0];
21431
+ const snippet = bestChunk ? query ? queryAwareExcerpt(bestChunk.metadata.chunkText, query) : toSnippet(bestChunk.metadata.chunkText) : page.description || page.title;
21432
+ const result = {
21433
+ url: page.url,
21434
+ title: page.title,
21435
+ sectionTitle: bestChunk?.metadata.sectionTitle || void 0,
21436
+ snippet,
21437
+ chunkText: bestChunk?.metadata.chunkText || void 0,
21438
+ score: Number(page.finalScore.toFixed(6)),
21439
+ routeFile: page.routeFile,
21440
+ chunks: chunks.length > 0 ? chunks.slice(0, maxSubResults).map((c) => ({
21441
+ sectionTitle: c.metadata.sectionTitle || void 0,
21442
+ snippet: query ? queryAwareExcerpt(c.metadata.chunkText, query) : toSnippet(c.metadata.chunkText),
21443
+ chunkText: c.metadata.chunkText || void 0,
21444
+ headingPath: c.metadata.headingPath,
21445
+ score: Number(c.score.toFixed(6))
21446
+ })) : void 0
21447
+ };
21448
+ if (debug && page.breakdown) {
21449
+ result.breakdown = {
21450
+ baseScore: page.breakdown.baseScore,
21451
+ incomingLinkBoost: page.breakdown.incomingLinkBoost,
21452
+ depthBoost: page.breakdown.depthBoost,
21453
+ titleMatchBoost: page.breakdown.titleMatchBoost,
21454
+ freshnessBoost: page.breakdown.freshnessBoost,
21455
+ anchorTextMatchBoost: 0
21456
+ };
20093
21457
  }
20094
- };
21458
+ return result;
21459
+ });
20095
21460
  }
20096
- ensureSnippet(hit) {
21461
+ ensureSnippet(hit, query) {
21462
+ const chunkText = hit.hit.metadata.chunkText;
21463
+ if (query && chunkText) return queryAwareExcerpt(chunkText, query);
20097
21464
  const snippet = hit.hit.metadata.snippet;
20098
21465
  if (snippet && snippet.length >= 30) return snippet;
20099
- const chunkText = hit.hit.metadata.chunkText;
20100
21466
  if (chunkText) return toSnippet(chunkText);
20101
21467
  return snippet || "";
20102
21468
  }
20103
- buildResults(ordered, topK, groupByPage, _query) {
21469
+ buildResults(ordered, topK, groupByPage, maxSubResults, query, debug, config) {
21470
+ const cfg = config ?? this.config;
20104
21471
  if (groupByPage) {
20105
- let pages = aggregateByPage(ordered, this.config);
20106
- pages = trimByScoreGap(pages, this.config);
20107
- const minRatio = this.config.ranking.minChunkScoreRatio;
21472
+ let pages = aggregateByPage(ordered, cfg);
21473
+ pages = trimByScoreGap(pages, cfg);
21474
+ const minRatio = cfg.ranking.minChunkScoreRatio;
20108
21475
  return pages.slice(0, topK).map((page) => {
20109
21476
  const bestScore = page.bestChunk.finalScore;
20110
21477
  const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
20111
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
20112
- return {
21478
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, maxSubResults);
21479
+ const result = {
20113
21480
  url: page.url,
20114
21481
  title: page.title,
20115
21482
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
20116
- snippet: this.ensureSnippet(page.bestChunk),
21483
+ snippet: this.ensureSnippet(page.bestChunk, query),
21484
+ chunkText: page.bestChunk.hit.metadata.chunkText || void 0,
20117
21485
  score: Number(page.pageScore.toFixed(6)),
20118
21486
  routeFile: page.routeFile,
20119
- chunks: meaningful.length > 1 ? meaningful.map((c) => ({
21487
+ chunks: meaningful.length >= 1 ? meaningful.map((c) => ({
20120
21488
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
20121
- snippet: this.ensureSnippet(c),
21489
+ snippet: this.ensureSnippet(c, query),
21490
+ chunkText: c.hit.metadata.chunkText || void 0,
20122
21491
  headingPath: c.hit.metadata.headingPath,
20123
21492
  score: Number(c.finalScore.toFixed(6))
20124
21493
  })) : void 0
20125
21494
  };
21495
+ if (debug && page.bestChunk.breakdown) {
21496
+ result.breakdown = page.bestChunk.breakdown;
21497
+ }
21498
+ return result;
20126
21499
  });
20127
21500
  } else {
20128
21501
  let filtered = ordered;
20129
- const minScore = this.config.ranking.minScore;
20130
- if (minScore > 0) {
20131
- filtered = ordered.filter((entry) => entry.finalScore >= minScore);
20132
- }
20133
- return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
20134
- url: hit.metadata.url,
20135
- title: hit.metadata.title,
20136
- sectionTitle: hit.metadata.sectionTitle || void 0,
20137
- snippet: this.ensureSnippet({ hit, finalScore }),
20138
- score: Number(finalScore.toFixed(6)),
20139
- routeFile: hit.metadata.routeFile
20140
- }));
21502
+ const minScoreRatio = cfg.ranking.minScoreRatio;
21503
+ if (minScoreRatio > 0 && ordered.length > 0) {
21504
+ const topScore = ordered[0].finalScore;
21505
+ if (Number.isFinite(topScore) && topScore > 0) {
21506
+ const threshold = topScore * minScoreRatio;
21507
+ filtered = ordered.filter((entry) => entry.finalScore >= threshold);
21508
+ }
21509
+ }
21510
+ return filtered.slice(0, topK).map(({ hit, finalScore, breakdown }) => {
21511
+ const result = {
21512
+ url: hit.metadata.url,
21513
+ title: hit.metadata.title,
21514
+ sectionTitle: hit.metadata.sectionTitle || void 0,
21515
+ snippet: this.ensureSnippet({ hit, finalScore }, query),
21516
+ chunkText: hit.metadata.chunkText || void 0,
21517
+ score: Number(finalScore.toFixed(6)),
21518
+ routeFile: hit.metadata.routeFile
21519
+ };
21520
+ if (debug && breakdown) {
21521
+ result.breakdown = breakdown;
21522
+ }
21523
+ return result;
21524
+ });
20141
21525
  }
20142
21526
  }
20143
21527
  async getPage(pathOrUrl, scope) {
@@ -20163,6 +21547,116 @@ var SearchEngine = class _SearchEngine {
20163
21547
  markdown: page.markdown
20164
21548
  };
20165
21549
  }
21550
+ async listPages(opts) {
21551
+ const resolvedScope = resolveScope(this.config, opts?.scope);
21552
+ const pathPrefix = opts?.pathPrefix ? opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}` : void 0;
21553
+ return this.store.listPages(resolvedScope, {
21554
+ cursor: opts?.cursor,
21555
+ limit: opts?.limit,
21556
+ pathPrefix
21557
+ });
21558
+ }
21559
+ async getSiteStructure(opts) {
21560
+ const maxPages = Math.min(opts?.maxPages ?? MAX_SITE_STRUCTURE_PAGES, MAX_SITE_STRUCTURE_PAGES);
21561
+ const allPages = [];
21562
+ let cursor;
21563
+ let truncated = false;
21564
+ do {
21565
+ const result = await this.listPages({
21566
+ pathPrefix: opts?.pathPrefix,
21567
+ scope: opts?.scope,
21568
+ cursor,
21569
+ limit: 200
21570
+ });
21571
+ allPages.push(...result.pages);
21572
+ cursor = result.nextCursor;
21573
+ if (allPages.length >= maxPages) {
21574
+ truncated = allPages.length > maxPages || !!cursor;
21575
+ allPages.length = maxPages;
21576
+ break;
21577
+ }
21578
+ } while (cursor);
21579
+ const root2 = buildTree(allPages, opts?.pathPrefix);
21580
+ return {
21581
+ root: root2,
21582
+ totalPages: allPages.length,
21583
+ truncated
21584
+ };
21585
+ }
21586
+ async getRelatedPages(pathOrUrl, opts) {
21587
+ const resolvedScope = resolveScope(this.config, opts?.scope);
21588
+ const urlPath = this.resolveInputPath(pathOrUrl);
21589
+ const topK = Math.min(opts?.topK ?? 10, 25);
21590
+ const source = await this.store.fetchPageWithVector(urlPath, resolvedScope);
21591
+ if (!source) {
21592
+ throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
21593
+ }
21594
+ const sourceOutgoing = new Set(source.metadata.outgoingLinkUrls ?? []);
21595
+ const semanticHits = await this.store.searchPagesByVector(
21596
+ source.vector,
21597
+ { limit: 50 },
21598
+ resolvedScope
21599
+ );
21600
+ const filteredHits = semanticHits.filter((h) => h.url !== urlPath);
21601
+ const semanticScoreMap = /* @__PURE__ */ new Map();
21602
+ for (const hit of filteredHits) {
21603
+ semanticScoreMap.set(hit.url, hit.score);
21604
+ }
21605
+ const candidateUrls = /* @__PURE__ */ new Set();
21606
+ for (const hit of filteredHits) {
21607
+ candidateUrls.add(hit.url);
21608
+ }
21609
+ for (const url of sourceOutgoing) {
21610
+ if (url !== urlPath) candidateUrls.add(url);
21611
+ }
21612
+ const missingUrls = [...sourceOutgoing].filter(
21613
+ (u) => u !== urlPath && !semanticScoreMap.has(u)
21614
+ );
21615
+ const fetchedPages = missingUrls.length > 0 ? await this.store.fetchPagesBatch(missingUrls, resolvedScope) : [];
21616
+ const metaMap = /* @__PURE__ */ new Map();
21617
+ for (const hit of filteredHits) {
21618
+ metaMap.set(hit.url, { title: hit.title, routeFile: hit.routeFile, outgoingLinkUrls: [] });
21619
+ }
21620
+ for (const p of fetchedPages) {
21621
+ metaMap.set(p.url, { title: p.title, routeFile: p.routeFile, outgoingLinkUrls: p.outgoingLinkUrls });
21622
+ }
21623
+ const semanticUrls = filteredHits.map((h) => h.url);
21624
+ if (semanticUrls.length > 0) {
21625
+ const semanticPageData = await this.store.fetchPagesBatch(semanticUrls, resolvedScope);
21626
+ for (const p of semanticPageData) {
21627
+ const existing = metaMap.get(p.url);
21628
+ if (existing) {
21629
+ existing.outgoingLinkUrls = p.outgoingLinkUrls;
21630
+ }
21631
+ }
21632
+ }
21633
+ const candidates = [];
21634
+ for (const url of candidateUrls) {
21635
+ const meta = metaMap.get(url);
21636
+ if (!meta) continue;
21637
+ const isOutgoing = sourceOutgoing.has(url);
21638
+ const isIncoming = meta.outgoingLinkUrls.includes(urlPath);
21639
+ const isLinked = isOutgoing || isIncoming;
21640
+ const dice = diceScore(urlPath, url);
21641
+ const semantic = semanticScoreMap.get(url) ?? 0;
21642
+ const score = compositeScore(isLinked, dice, semantic);
21643
+ const relationshipType = dominantRelationshipType(isOutgoing, isIncoming, dice);
21644
+ candidates.push({
21645
+ url,
21646
+ title: meta.title,
21647
+ score: Number(score.toFixed(6)),
21648
+ relationshipType,
21649
+ routeFile: meta.routeFile
21650
+ });
21651
+ }
21652
+ candidates.sort((a, b) => b.score - a.score);
21653
+ const results = candidates.slice(0, topK);
21654
+ return {
21655
+ sourceUrl: urlPath,
21656
+ scope: resolvedScope.scopeName,
21657
+ relatedPages: results
21658
+ };
21659
+ }
20166
21660
  async health() {
20167
21661
  return this.store.health();
20168
21662
  }
@@ -20185,14 +21679,40 @@ function createServer(engine) {
20185
21679
  server.registerTool(
20186
21680
  "search",
20187
21681
  {
20188
- description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
21682
+ description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
20189
21683
  inputSchema: {
20190
21684
  query: z.string().min(1),
20191
21685
  scope: z.string().optional(),
20192
21686
  topK: z.number().int().positive().max(100).optional(),
20193
21687
  pathPrefix: z.string().optional(),
20194
21688
  tags: z.array(z.string()).optional(),
20195
- groupBy: z.enum(["page", "chunk"]).optional()
21689
+ filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
21690
+ groupBy: z.enum(["page", "chunk"]).optional(),
21691
+ maxSubResults: z.number().int().positive().max(20).optional()
21692
+ },
21693
+ outputSchema: {
21694
+ q: z.string(),
21695
+ scope: z.string(),
21696
+ results: z.array(z.object({
21697
+ url: z.string(),
21698
+ title: z.string(),
21699
+ sectionTitle: z.string().optional(),
21700
+ snippet: z.string(),
21701
+ score: z.number(),
21702
+ routeFile: z.string(),
21703
+ chunks: z.array(z.object({
21704
+ sectionTitle: z.string().optional(),
21705
+ snippet: z.string(),
21706
+ headingPath: z.array(z.string()),
21707
+ score: z.number()
21708
+ })).optional()
21709
+ })),
21710
+ meta: z.object({
21711
+ timingsMs: z.object({
21712
+ search: z.number(),
21713
+ total: z.number()
21714
+ })
21715
+ })
20196
21716
  }
20197
21717
  },
20198
21718
  async (input) => {
@@ -20202,7 +21722,9 @@ function createServer(engine) {
20202
21722
  scope: input.scope,
20203
21723
  pathPrefix: input.pathPrefix,
20204
21724
  tags: input.tags,
20205
- groupBy: input.groupBy
21725
+ filters: input.filters,
21726
+ groupBy: input.groupBy,
21727
+ maxSubResults: input.maxSubResults
20206
21728
  });
20207
21729
  return {
20208
21730
  content: [
@@ -20210,7 +21732,8 @@ function createServer(engine) {
20210
21732
  type: "text",
20211
21733
  text: JSON.stringify(result, null, 2)
20212
21734
  }
20213
- ]
21735
+ ],
21736
+ structuredContent: result
20214
21737
  };
20215
21738
  }
20216
21739
  );
@@ -20235,8 +21758,134 @@ function createServer(engine) {
20235
21758
  };
20236
21759
  }
20237
21760
  );
21761
+ server.registerTool(
21762
+ "list_pages",
21763
+ {
21764
+ description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
21765
+ inputSchema: {
21766
+ pathPrefix: z.string().optional(),
21767
+ cursor: z.string().optional(),
21768
+ limit: z.number().int().positive().max(200).optional(),
21769
+ scope: z.string().optional()
21770
+ }
21771
+ },
21772
+ async (input) => {
21773
+ const result = await engine.listPages({
21774
+ pathPrefix: input.pathPrefix,
21775
+ cursor: input.cursor,
21776
+ limit: input.limit,
21777
+ scope: input.scope
21778
+ });
21779
+ return {
21780
+ content: [
21781
+ {
21782
+ type: "text",
21783
+ text: JSON.stringify(result, null, 2)
21784
+ }
21785
+ ]
21786
+ };
21787
+ }
21788
+ );
21789
+ server.registerTool(
21790
+ "get_site_structure",
21791
+ {
21792
+ description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
21793
+ inputSchema: {
21794
+ pathPrefix: z.string().optional(),
21795
+ scope: z.string().optional(),
21796
+ maxPages: z.number().int().positive().max(2e3).optional()
21797
+ }
21798
+ },
21799
+ async (input) => {
21800
+ const result = await engine.getSiteStructure({
21801
+ pathPrefix: input.pathPrefix,
21802
+ scope: input.scope,
21803
+ maxPages: input.maxPages
21804
+ });
21805
+ return {
21806
+ content: [
21807
+ {
21808
+ type: "text",
21809
+ text: JSON.stringify(result, null, 2)
21810
+ }
21811
+ ]
21812
+ };
21813
+ }
21814
+ );
21815
+ server.registerTool(
21816
+ "find_source_file",
21817
+ {
21818
+ description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
21819
+ inputSchema: {
21820
+ query: z.string().min(1),
21821
+ scope: z.string().optional()
21822
+ }
21823
+ },
21824
+ async (input) => {
21825
+ const result = await engine.search({
21826
+ q: input.query,
21827
+ topK: 1,
21828
+ scope: input.scope
21829
+ });
21830
+ if (result.results.length === 0) {
21831
+ return {
21832
+ content: [
21833
+ {
21834
+ type: "text",
21835
+ text: JSON.stringify({
21836
+ error: "No matching content found for the given query."
21837
+ })
21838
+ }
21839
+ ]
21840
+ };
21841
+ }
21842
+ const match = result.results[0];
21843
+ const { url, routeFile, sectionTitle, snippet } = match;
21844
+ return {
21845
+ content: [
21846
+ {
21847
+ type: "text",
21848
+ text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
21849
+ }
21850
+ ]
21851
+ };
21852
+ }
21853
+ );
21854
+ server.registerTool(
21855
+ "get_related_pages",
21856
+ {
21857
+ description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
21858
+ inputSchema: {
21859
+ pathOrUrl: z.string().min(1),
21860
+ scope: z.string().optional(),
21861
+ topK: z.number().int().positive().max(25).optional()
21862
+ }
21863
+ },
21864
+ async (input) => {
21865
+ const result = await engine.getRelatedPages(input.pathOrUrl, {
21866
+ topK: input.topK,
21867
+ scope: input.scope
21868
+ });
21869
+ return {
21870
+ content: [
21871
+ {
21872
+ type: "text",
21873
+ text: JSON.stringify(result, null, 2)
21874
+ }
21875
+ ]
21876
+ };
21877
+ }
21878
+ );
20238
21879
  return server;
20239
21880
  }
21881
+ function resolveApiKey(config) {
21882
+ return config.mcp.http.apiKey ?? (config.mcp.http.apiKeyEnv ? process.env[config.mcp.http.apiKeyEnv] : void 0);
21883
+ }
21884
+ function verifyApiKey(provided, expected) {
21885
+ const a = createHash("sha256").update(provided).digest();
21886
+ const b = createHash("sha256").update(expected).digest();
21887
+ return timingSafeEqual(a, b);
21888
+ }
20240
21889
  function redirectConsoleToStderr() {
20241
21890
  console.log = (...args) => {
20242
21891
  process.stderr.write(`[LOG] ${args.map(String).join(" ")}
@@ -20251,7 +21900,22 @@ async function startHttpServer(serverFactory, config, opts) {
20251
21900
  const app = createMcpExpressApp();
20252
21901
  const port = opts.httpPort ?? config.mcp.http.port;
20253
21902
  const endpointPath = opts.httpPath ?? config.mcp.http.path;
21903
+ const isPublic = config.mcp.access === "public";
21904
+ const host = isPublic ? "0.0.0.0" : "127.0.0.1";
21905
+ const apiKey = isPublic ? resolveApiKey(config) : void 0;
20254
21906
  app.post(endpointPath, async (req, res) => {
21907
+ if (isPublic && apiKey) {
21908
+ const authHeader = req.headers["authorization"];
21909
+ const provided = (authHeader?.startsWith("Bearer ") ? authHeader.slice(7) : void 0) ?? req.headers["x-api-key"] ?? "";
21910
+ if (!provided || !verifyApiKey(provided, apiKey)) {
21911
+ res.status(401).json({
21912
+ jsonrpc: "2.0",
21913
+ error: { code: -32001, message: "Unauthorized" },
21914
+ id: null
21915
+ });
21916
+ return;
21917
+ }
21918
+ }
20255
21919
  const server = serverFactory();
20256
21920
  const transport = new StreamableHTTPServerTransport({
20257
21921
  sessionIdGenerator: void 0
@@ -20301,9 +21965,12 @@ async function startHttpServer(serverFactory, config, opts) {
20301
21965
  );
20302
21966
  });
20303
21967
  await new Promise((resolve, reject) => {
20304
- const instance = app.listen(port, "127.0.0.1", () => {
20305
- process.stderr.write(`SearchSocket MCP HTTP server listening on http://127.0.0.1:${port}${endpointPath}
21968
+ const instance = app.listen(port, host, () => {
21969
+ process.stderr.write(`SearchSocket MCP HTTP server listening on http://${host}:${port}${endpointPath}
20306
21970
  `);
21971
+ if (isPublic) {
21972
+ process.stderr.write("WARNING: Server is in public mode. Ensure HTTPS is configured via a reverse proxy for production use.\n");
21973
+ }
20307
21974
  resolve();
20308
21975
  });
20309
21976
  instance.once("error", reject);
@@ -20318,6 +21985,13 @@ async function runMcpServer(options = {}) {
20318
21985
  cwd: options.cwd,
20319
21986
  configPath: options.configPath
20320
21987
  });
21988
+ if (options.access) config.mcp.access = options.access;
21989
+ if (options.apiKey) config.mcp.http.apiKey = options.apiKey;
21990
+ if (config.mcp.access === "public" && !resolveApiKey(config)) {
21991
+ throw new Error(
21992
+ 'MCP access is "public" but no API key is configured. Pass --api-key or set mcp.http.apiKey / mcp.http.apiKeyEnv in config.'
21993
+ );
21994
+ }
20321
21995
  const resolvedTransport = options.transport ?? config.mcp.transport;
20322
21996
  if (resolvedTransport === "stdio") {
20323
21997
  redirectConsoleToStderr();
@@ -20335,8 +22009,6 @@ async function runMcpServer(options = {}) {
20335
22009
  const stdioTransport = new StdioServerTransport();
20336
22010
  await server.connect(stdioTransport);
20337
22011
  }
20338
-
20339
- // src/sveltekit/handle.ts
20340
22012
  var InMemoryRateLimiter = class {
20341
22013
  constructor(windowMs, max) {
20342
22014
  this.windowMs = windowMs;
@@ -20364,7 +22036,13 @@ function searchsocketHandle(options = {}) {
20364
22036
  let enginePromise = null;
20365
22037
  let configPromise = null;
20366
22038
  let apiPath = options.path;
22039
+ let llmsServePath = null;
22040
+ let serveMarkdownVariants = false;
22041
+ let mcpPath;
22042
+ let mcpApiKey;
22043
+ let mcpEnableJsonResponse = true;
20367
22044
  let rateLimiter = null;
22045
+ let notConfigured = false;
20368
22046
  const getConfig = async () => {
20369
22047
  if (!configPromise) {
20370
22048
  let configP;
@@ -20381,6 +22059,13 @@ function searchsocketHandle(options = {}) {
20381
22059
  }
20382
22060
  configPromise = configP.then((config) => {
20383
22061
  apiPath = apiPath ?? config.api.path;
22062
+ mcpPath = config.mcp.handle.path;
22063
+ mcpApiKey = config.mcp.handle.apiKey;
22064
+ mcpEnableJsonResponse = config.mcp.handle.enableJsonResponse;
22065
+ if (config.llmsTxt.enable) {
22066
+ llmsServePath = "/" + config.llmsTxt.outputPath.replace(/^static\//, "");
22067
+ serveMarkdownVariants = config.llmsTxt.serveMarkdownVariants;
22068
+ }
20384
22069
  if (config.api.rateLimit && !isServerless()) {
20385
22070
  rateLimiter = new InMemoryRateLimiter(config.api.rateLimit.windowMs, config.api.rateLimit.max);
20386
22071
  }
@@ -20390,59 +22075,109 @@ function searchsocketHandle(options = {}) {
20390
22075
  return configPromise;
20391
22076
  };
20392
22077
  const getEngine = async () => {
22078
+ if (notConfigured) {
22079
+ throw new SearchSocketError(
22080
+ "SEARCH_NOT_CONFIGURED",
22081
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
22082
+ 503
22083
+ );
22084
+ }
20393
22085
  if (!enginePromise) {
20394
22086
  const config = await getConfig();
20395
22087
  enginePromise = SearchEngine.create({
20396
22088
  cwd: options.cwd,
20397
22089
  config
22090
+ }).catch((error) => {
22091
+ enginePromise = null;
22092
+ if (error instanceof SearchSocketError && error.code === "VECTOR_BACKEND_UNAVAILABLE") {
22093
+ notConfigured = true;
22094
+ throw new SearchSocketError(
22095
+ "SEARCH_NOT_CONFIGURED",
22096
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
22097
+ 503
22098
+ );
22099
+ }
22100
+ throw error;
20398
22101
  });
20399
22102
  }
20400
22103
  return enginePromise;
20401
22104
  };
20402
22105
  const bodyLimit = options.maxBodyBytes ?? 64 * 1024;
20403
22106
  return async ({ event, resolve }) => {
20404
- if (apiPath && event.url.pathname !== apiPath) {
20405
- return resolve(event);
22107
+ if (apiPath && !isApiPath(event.url.pathname, apiPath) && event.url.pathname !== llmsServePath) {
22108
+ const isMarkdownVariant = event.request.method === "GET" && event.url.pathname.endsWith(".md");
22109
+ if (mcpPath && event.url.pathname === mcpPath) {
22110
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22111
+ }
22112
+ if (mcpPath) {
22113
+ if (serveMarkdownVariants && isMarkdownVariant) ; else {
22114
+ return resolve(event);
22115
+ }
22116
+ } else {
22117
+ if (configPromise || options.config || options.rawConfig) {
22118
+ await getConfig();
22119
+ if (mcpPath && event.url.pathname === mcpPath) {
22120
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22121
+ }
22122
+ if (!(serveMarkdownVariants && isMarkdownVariant)) {
22123
+ return resolve(event);
22124
+ }
22125
+ } else {
22126
+ return resolve(event);
22127
+ }
22128
+ }
20406
22129
  }
20407
22130
  const config = await getConfig();
22131
+ if (llmsServePath && event.request.method === "GET" && event.url.pathname === llmsServePath) {
22132
+ const cwd = options.cwd ?? process.cwd();
22133
+ const filePath = path.resolve(cwd, config.llmsTxt.outputPath);
22134
+ try {
22135
+ const content = await fs8.readFile(filePath, "utf8");
22136
+ return new Response(content, {
22137
+ status: 200,
22138
+ headers: { "content-type": "text/plain; charset=utf-8" }
22139
+ });
22140
+ } catch {
22141
+ return resolve(event);
22142
+ }
22143
+ }
22144
+ if (serveMarkdownVariants && event.request.method === "GET" && event.url.pathname.endsWith(".md")) {
22145
+ let rawPath;
22146
+ try {
22147
+ rawPath = decodeURIComponent(event.url.pathname.slice(0, -3));
22148
+ } catch {
22149
+ return resolve(event);
22150
+ }
22151
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
22152
+ try {
22153
+ const engine = await getEngine();
22154
+ const page = await engine.getPage(rawPath, scope);
22155
+ return new Response(page.markdown, {
22156
+ status: 200,
22157
+ headers: { "content-type": "text/markdown; charset=utf-8" }
22158
+ });
22159
+ } catch (error) {
22160
+ if (error instanceof SearchSocketError && error.status === 404) {
22161
+ return resolve(event);
22162
+ }
22163
+ throw error;
22164
+ }
22165
+ }
22166
+ if (mcpPath && event.url.pathname === mcpPath) {
22167
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22168
+ }
20408
22169
  const targetPath = apiPath ?? config.api.path;
20409
- if (event.url.pathname !== targetPath) {
22170
+ if (!isApiPath(event.url.pathname, targetPath)) {
20410
22171
  return resolve(event);
20411
22172
  }
20412
- if (event.request.method === "OPTIONS") {
22173
+ const subPath = event.url.pathname.slice(targetPath.length);
22174
+ const method = event.request.method;
22175
+ if (method === "OPTIONS") {
20413
22176
  return new Response(null, {
20414
22177
  status: 204,
20415
22178
  headers: buildCorsHeaders(event.request, config)
20416
22179
  });
20417
22180
  }
20418
- if (event.request.method !== "POST") {
20419
- return withCors(
20420
- new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
20421
- status: 405,
20422
- headers: {
20423
- "content-type": "application/json"
20424
- }
20425
- }),
20426
- event.request,
20427
- config
20428
- );
20429
- }
20430
- const contentLength = Number(event.request.headers.get("content-length") ?? 0);
20431
- if (contentLength > bodyLimit) {
20432
- return withCors(
20433
- new Response(
20434
- JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Request body too large", 413))),
20435
- {
20436
- status: 413,
20437
- headers: {
20438
- "content-type": "application/json"
20439
- }
20440
- }
20441
- ),
20442
- event.request,
20443
- config
20444
- );
20445
- }
20446
22181
  if (rateLimiter) {
20447
22182
  const ip = event.getClientAddress?.() ?? event.request.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? "unknown";
20448
22183
  if (!rateLimiter.check(ip)) {
@@ -20462,39 +22197,32 @@ function searchsocketHandle(options = {}) {
20462
22197
  }
20463
22198
  }
20464
22199
  try {
20465
- let rawBody;
20466
- if (typeof event.request.text === "function") {
20467
- rawBody = await event.request.text();
20468
- } else {
20469
- let parsedFallback;
20470
- try {
20471
- parsedFallback = await event.request.json();
20472
- } catch (error) {
20473
- if (error instanceof SyntaxError) {
20474
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
20475
- }
20476
- throw error;
22200
+ if (method === "GET") {
22201
+ if (subPath === "" || subPath === "/") {
22202
+ return await handleGetSearch(event, config, getEngine);
20477
22203
  }
20478
- rawBody = JSON.stringify(parsedFallback);
22204
+ if (subPath === "/health") {
22205
+ return await handleGetHealth(event, config, getEngine);
22206
+ }
22207
+ if (subPath.startsWith("/pages/")) {
22208
+ return await handleGetPage(event, config, getEngine, subPath);
22209
+ }
22210
+ return withCors(
22211
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Not found", 404))), {
22212
+ status: 404,
22213
+ headers: { "content-type": "application/json" }
22214
+ }),
22215
+ event.request,
22216
+ config
22217
+ );
20479
22218
  }
20480
- if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
20481
- throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22219
+ if (method === "POST" && (subPath === "" || subPath === "/")) {
22220
+ return await handlePostSearch(event, config, getEngine, bodyLimit);
20482
22221
  }
20483
- let body;
20484
- try {
20485
- body = JSON.parse(rawBody);
20486
- } catch {
20487
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
20488
- }
20489
- const engine = await getEngine();
20490
- const searchRequest = body;
20491
- const result = await engine.search(searchRequest);
20492
22222
  return withCors(
20493
- new Response(JSON.stringify(result), {
20494
- status: 200,
20495
- headers: {
20496
- "content-type": "application/json"
20497
- }
22223
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
22224
+ status: 405,
22225
+ headers: { "content-type": "application/json" }
20498
22226
  }),
20499
22227
  event.request,
20500
22228
  config
@@ -20515,6 +22243,183 @@ function searchsocketHandle(options = {}) {
20515
22243
  }
20516
22244
  };
20517
22245
  }
22246
+ function isApiPath(pathname, apiPath) {
22247
+ return pathname === apiPath || pathname.startsWith(apiPath + "/");
22248
+ }
22249
+ async function handleGetSearch(event, config, getEngine) {
22250
+ const params = event.url.searchParams;
22251
+ const q = params.get("q");
22252
+ if (!q || q.trim() === "") {
22253
+ throw new SearchSocketError("INVALID_REQUEST", "Missing required query parameter: q", 400);
22254
+ }
22255
+ const searchRequest = { q };
22256
+ const topK = params.get("topK");
22257
+ if (topK !== null) {
22258
+ const parsed = Number.parseInt(topK, 10);
22259
+ if (Number.isNaN(parsed) || parsed < 1) {
22260
+ throw new SearchSocketError("INVALID_REQUEST", "topK must be a positive integer", 400);
22261
+ }
22262
+ searchRequest.topK = parsed;
22263
+ }
22264
+ const scope = params.get("scope");
22265
+ if (scope !== null) searchRequest.scope = scope;
22266
+ const pathPrefix = params.get("pathPrefix");
22267
+ if (pathPrefix !== null) searchRequest.pathPrefix = pathPrefix;
22268
+ const groupBy = params.get("groupBy");
22269
+ if (groupBy) {
22270
+ if (groupBy !== "page" && groupBy !== "chunk") {
22271
+ throw new SearchSocketError("INVALID_REQUEST", 'groupBy must be "page" or "chunk"', 400);
22272
+ }
22273
+ searchRequest.groupBy = groupBy;
22274
+ }
22275
+ const maxSubResults = params.get("maxSubResults");
22276
+ if (maxSubResults !== null) {
22277
+ const parsed = Number.parseInt(maxSubResults, 10);
22278
+ if (Number.isNaN(parsed) || parsed < 1 || parsed > 20) {
22279
+ throw new SearchSocketError("INVALID_REQUEST", "maxSubResults must be a positive integer between 1 and 20", 400);
22280
+ }
22281
+ searchRequest.maxSubResults = parsed;
22282
+ }
22283
+ const tags = params.getAll("tags");
22284
+ if (tags.length > 0) searchRequest.tags = tags;
22285
+ const engine = await getEngine();
22286
+ const result = await engine.search(searchRequest);
22287
+ return withCors(
22288
+ new Response(JSON.stringify(result), {
22289
+ status: 200,
22290
+ headers: { "content-type": "application/json" }
22291
+ }),
22292
+ event.request,
22293
+ config
22294
+ );
22295
+ }
22296
+ async function handleGetHealth(event, config, getEngine) {
22297
+ const engine = await getEngine();
22298
+ const result = await engine.health();
22299
+ return withCors(
22300
+ new Response(JSON.stringify(result), {
22301
+ status: 200,
22302
+ headers: { "content-type": "application/json" }
22303
+ }),
22304
+ event.request,
22305
+ config
22306
+ );
22307
+ }
22308
+ async function handleGetPage(event, config, getEngine, subPath) {
22309
+ const rawPath = subPath.slice("/pages".length);
22310
+ let pagePath;
22311
+ try {
22312
+ pagePath = decodeURIComponent(rawPath);
22313
+ } catch {
22314
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed page path", 400);
22315
+ }
22316
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
22317
+ const engine = await getEngine();
22318
+ const result = await engine.getPage(pagePath, scope);
22319
+ return withCors(
22320
+ new Response(JSON.stringify(result), {
22321
+ status: 200,
22322
+ headers: { "content-type": "application/json" }
22323
+ }),
22324
+ event.request,
22325
+ config
22326
+ );
22327
+ }
22328
+ async function handlePostSearch(event, config, getEngine, bodyLimit) {
22329
+ const contentLength = Number(event.request.headers.get("content-length") ?? 0);
22330
+ if (contentLength > bodyLimit) {
22331
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22332
+ }
22333
+ let rawBody;
22334
+ if (typeof event.request.text === "function") {
22335
+ rawBody = await event.request.text();
22336
+ } else {
22337
+ let parsedFallback;
22338
+ try {
22339
+ parsedFallback = await event.request.json();
22340
+ } catch (error) {
22341
+ if (error instanceof SyntaxError) {
22342
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22343
+ }
22344
+ throw error;
22345
+ }
22346
+ rawBody = JSON.stringify(parsedFallback);
22347
+ }
22348
+ if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
22349
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22350
+ }
22351
+ let body;
22352
+ try {
22353
+ body = JSON.parse(rawBody);
22354
+ } catch {
22355
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22356
+ }
22357
+ const engine = await getEngine();
22358
+ const searchRequest = body;
22359
+ const result = await engine.search(searchRequest);
22360
+ return withCors(
22361
+ new Response(JSON.stringify(result), {
22362
+ status: 200,
22363
+ headers: { "content-type": "application/json" }
22364
+ }),
22365
+ event.request,
22366
+ config
22367
+ );
22368
+ }
22369
+ async function handleMcpRequest(event, apiKey, enableJsonResponse, getEngine) {
22370
+ if (apiKey) {
22371
+ const authHeader = event.request.headers.get("authorization") ?? "";
22372
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
22373
+ const tokenBuf = Buffer.from(token);
22374
+ const keyBuf = Buffer.from(apiKey);
22375
+ if (tokenBuf.length !== keyBuf.length || !timingSafeEqual(tokenBuf, keyBuf)) {
22376
+ return new Response(
22377
+ JSON.stringify({
22378
+ jsonrpc: "2.0",
22379
+ error: { code: -32001, message: "Unauthorized" },
22380
+ id: null
22381
+ }),
22382
+ { status: 401, headers: { "content-type": "application/json" } }
22383
+ );
22384
+ }
22385
+ }
22386
+ const transport = new WebStandardStreamableHTTPServerTransport({
22387
+ sessionIdGenerator: void 0,
22388
+ enableJsonResponse
22389
+ });
22390
+ let server;
22391
+ try {
22392
+ const engine = await getEngine();
22393
+ server = createServer(engine);
22394
+ await server.connect(transport);
22395
+ const response = await transport.handleRequest(event.request);
22396
+ if (enableJsonResponse) {
22397
+ await transport.close();
22398
+ await server.close();
22399
+ }
22400
+ return response;
22401
+ } catch (error) {
22402
+ try {
22403
+ await transport.close();
22404
+ } catch {
22405
+ }
22406
+ try {
22407
+ await server?.close();
22408
+ } catch {
22409
+ }
22410
+ return new Response(
22411
+ JSON.stringify({
22412
+ jsonrpc: "2.0",
22413
+ error: {
22414
+ code: -32603,
22415
+ message: error instanceof Error ? error.message : "Internal server error"
22416
+ },
22417
+ id: null
22418
+ }),
22419
+ { status: 500, headers: { "content-type": "application/json" } }
22420
+ );
22421
+ }
22422
+ }
20518
22423
  function buildCorsHeaders(request, config) {
20519
22424
  const allowOrigins = config.api.cors.allowOrigins;
20520
22425
  if (!allowOrigins || allowOrigins.length === 0) {
@@ -20527,7 +22432,7 @@ function buildCorsHeaders(request, config) {
20527
22432
  }
20528
22433
  return {
20529
22434
  "access-control-allow-origin": allowOrigins.includes("*") ? "*" : origin,
20530
- "access-control-allow-methods": "POST, OPTIONS",
22435
+ "access-control-allow-methods": "GET, POST, OPTIONS",
20531
22436
  "access-control-allow-headers": "content-type"
20532
22437
  };
20533
22438
  }
@@ -20563,9 +22468,6 @@ function shouldRunAutoIndex(options) {
20563
22468
  if (explicit && /^(1|true|yes)$/i.test(explicit)) {
20564
22469
  return true;
20565
22470
  }
20566
- if (process.env.CI && /^(1|true)$/i.test(process.env.CI)) {
20567
- return true;
20568
- }
20569
22471
  return false;
20570
22472
  }
20571
22473
  function searchsocketVitePlugin(options = {}) {
@@ -20590,7 +22492,8 @@ function searchsocketVitePlugin(options = {}) {
20590
22492
  const pipeline = await IndexPipeline.create({
20591
22493
  cwd,
20592
22494
  configPath: options.configPath,
20593
- logger: logger3
22495
+ logger: logger3,
22496
+ hooks: options.hooks
20594
22497
  });
20595
22498
  const stats = await pipeline.run({
20596
22499
  changedOnly: options.changedOnly ?? true,