searchsocket 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -5,18 +5,20 @@ var path = require('path');
5
5
  var jiti = require('jiti');
6
6
  var zod = require('zod');
7
7
  var child_process = require('child_process');
8
+ var vector = require('@upstash/vector');
8
9
  var crypto = require('crypto');
9
10
  var cheerio = require('cheerio');
10
11
  var matter = require('gray-matter');
11
12
  var fg = require('fast-glob');
12
13
  var pLimit = require('p-limit');
13
- var fs3 = require('fs/promises');
14
+ var fs8 = require('fs/promises');
14
15
  var net = require('net');
15
16
  var zlib = require('zlib');
16
17
  var mcp_js = require('@modelcontextprotocol/sdk/server/mcp.js');
17
18
  var stdio_js = require('@modelcontextprotocol/sdk/server/stdio.js');
18
19
  var streamableHttp_js = require('@modelcontextprotocol/sdk/server/streamableHttp.js');
19
20
  var express_js = require('@modelcontextprotocol/sdk/server/express.js');
21
+ var webStandardStreamableHttp_js = require('@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js');
20
22
 
21
23
  function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
22
24
 
@@ -25,7 +27,7 @@ var path__default = /*#__PURE__*/_interopDefault(path);
25
27
  var matter__default = /*#__PURE__*/_interopDefault(matter);
26
28
  var fg__default = /*#__PURE__*/_interopDefault(fg);
27
29
  var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
28
- var fs3__default = /*#__PURE__*/_interopDefault(fs3);
30
+ var fs8__default = /*#__PURE__*/_interopDefault(fs8);
29
31
  var net__default = /*#__PURE__*/_interopDefault(net);
30
32
 
31
33
  var __getOwnPropNames = Object.getOwnPropertyNames;
@@ -5025,32 +5027,32 @@ var require_URL = __commonJS({
5025
5027
  else
5026
5028
  return basepath.substring(0, lastslash + 1) + refpath;
5027
5029
  }
5028
- function remove_dot_segments(path13) {
5029
- if (!path13) return path13;
5030
+ function remove_dot_segments(path14) {
5031
+ if (!path14) return path14;
5030
5032
  var output = "";
5031
- while (path13.length > 0) {
5032
- if (path13 === "." || path13 === "..") {
5033
- path13 = "";
5033
+ while (path14.length > 0) {
5034
+ if (path14 === "." || path14 === "..") {
5035
+ path14 = "";
5034
5036
  break;
5035
5037
  }
5036
- var twochars = path13.substring(0, 2);
5037
- var threechars = path13.substring(0, 3);
5038
- var fourchars = path13.substring(0, 4);
5038
+ var twochars = path14.substring(0, 2);
5039
+ var threechars = path14.substring(0, 3);
5040
+ var fourchars = path14.substring(0, 4);
5039
5041
  if (threechars === "../") {
5040
- path13 = path13.substring(3);
5042
+ path14 = path14.substring(3);
5041
5043
  } else if (twochars === "./") {
5042
- path13 = path13.substring(2);
5044
+ path14 = path14.substring(2);
5043
5045
  } else if (threechars === "/./") {
5044
- path13 = "/" + path13.substring(3);
5045
- } else if (twochars === "/." && path13.length === 2) {
5046
- path13 = "/";
5047
- } else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
5048
- path13 = "/" + path13.substring(4);
5046
+ path14 = "/" + path14.substring(3);
5047
+ } else if (twochars === "/." && path14.length === 2) {
5048
+ path14 = "/";
5049
+ } else if (fourchars === "/../" || threechars === "/.." && path14.length === 3) {
5050
+ path14 = "/" + path14.substring(4);
5049
5051
  output = output.replace(/\/?[^\/]*$/, "");
5050
5052
  } else {
5051
- var segment = path13.match(/(\/?([^\/]*))/)[0];
5053
+ var segment = path14.match(/(\/?([^\/]*))/)[0];
5052
5054
  output += segment;
5053
- path13 = path13.substring(segment.length);
5055
+ path14 = path14.substring(segment.length);
5054
5056
  }
5055
5057
  }
5056
5058
  return output;
@@ -16646,6 +16648,7 @@ var searchSocketConfigSchema = zod.z.object({
16646
16648
  dropSelectors: zod.z.array(zod.z.string()).optional(),
16647
16649
  ignoreAttr: zod.z.string().optional(),
16648
16650
  noindexAttr: zod.z.string().optional(),
16651
+ imageDescAttr: zod.z.string().optional(),
16649
16652
  respectRobotsNoindex: zod.z.boolean().optional()
16650
16653
  }).optional(),
16651
16654
  transform: zod.z.object({
@@ -16661,35 +16664,48 @@ var searchSocketConfigSchema = zod.z.object({
16661
16664
  headingPathDepth: zod.z.number().int().positive().optional(),
16662
16665
  dontSplitInside: zod.z.array(zod.z.enum(["code", "table", "blockquote"])).optional(),
16663
16666
  prependTitle: zod.z.boolean().optional(),
16664
- pageSummaryChunk: zod.z.boolean().optional()
16667
+ pageSummaryChunk: zod.z.boolean().optional(),
16668
+ weightHeadings: zod.z.boolean().optional()
16665
16669
  }).optional(),
16666
16670
  upstash: zod.z.object({
16667
16671
  url: zod.z.string().url().optional(),
16668
16672
  token: zod.z.string().min(1).optional(),
16669
16673
  urlEnv: zod.z.string().min(1).optional(),
16670
- tokenEnv: zod.z.string().min(1).optional()
16674
+ tokenEnv: zod.z.string().min(1).optional(),
16675
+ namespaces: zod.z.object({
16676
+ pages: zod.z.string().min(1).optional(),
16677
+ chunks: zod.z.string().min(1).optional()
16678
+ }).optional()
16679
+ }).optional(),
16680
+ embedding: zod.z.object({
16681
+ model: zod.z.string().optional(),
16682
+ dimensions: zod.z.number().int().positive().optional(),
16683
+ taskType: zod.z.string().optional(),
16684
+ batchSize: zod.z.number().int().positive().optional()
16671
16685
  }).optional(),
16672
16686
  search: zod.z.object({
16673
- semanticWeight: zod.z.number().min(0).max(1).optional(),
16674
- inputEnrichment: zod.z.boolean().optional(),
16675
- reranking: zod.z.boolean().optional(),
16676
16687
  dualSearch: zod.z.boolean().optional(),
16677
16688
  pageSearchWeight: zod.z.number().min(0).max(1).optional()
16678
16689
  }).optional(),
16679
16690
  ranking: zod.z.object({
16680
16691
  enableIncomingLinkBoost: zod.z.boolean().optional(),
16681
16692
  enableDepthBoost: zod.z.boolean().optional(),
16693
+ enableFreshnessBoost: zod.z.boolean().optional(),
16694
+ freshnessDecayRate: zod.z.number().positive().optional(),
16695
+ enableAnchorTextBoost: zod.z.boolean().optional(),
16682
16696
  pageWeights: zod.z.record(zod.z.string(), zod.z.number().min(0)).optional(),
16683
16697
  aggregationCap: zod.z.number().int().positive().optional(),
16684
16698
  aggregationDecay: zod.z.number().min(0).max(1).optional(),
16685
16699
  minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
16686
- minScore: zod.z.number().min(0).max(1).optional(),
16700
+ minScoreRatio: zod.z.number().min(0).max(1).optional(),
16687
16701
  scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
16688
16702
  weights: zod.z.object({
16689
16703
  incomingLinks: zod.z.number().optional(),
16690
16704
  depth: zod.z.number().optional(),
16691
16705
  aggregation: zod.z.number().optional(),
16692
- titleMatch: zod.z.number().optional()
16706
+ titleMatch: zod.z.number().optional(),
16707
+ freshness: zod.z.number().optional(),
16708
+ anchorText: zod.z.number().optional()
16693
16709
  }).optional()
16694
16710
  }).optional(),
16695
16711
  api: zod.z.object({
@@ -16704,12 +16720,28 @@ var searchSocketConfigSchema = zod.z.object({
16704
16720
  }).optional(),
16705
16721
  mcp: zod.z.object({
16706
16722
  enable: zod.z.boolean().optional(),
16723
+ access: zod.z.enum(["public", "private"]).optional(),
16707
16724
  transport: zod.z.enum(["stdio", "http"]).optional(),
16708
16725
  http: zod.z.object({
16709
16726
  port: zod.z.number().int().positive().optional(),
16710
- path: zod.z.string().optional()
16727
+ path: zod.z.string().optional(),
16728
+ apiKey: zod.z.string().min(1).optional(),
16729
+ apiKeyEnv: zod.z.string().min(1).optional()
16730
+ }).optional(),
16731
+ handle: zod.z.object({
16732
+ path: zod.z.string().optional(),
16733
+ apiKey: zod.z.string().min(1).optional(),
16734
+ enableJsonResponse: zod.z.boolean().optional()
16711
16735
  }).optional()
16712
16736
  }).optional(),
16737
+ llmsTxt: zod.z.object({
16738
+ enable: zod.z.boolean().optional(),
16739
+ outputPath: zod.z.string().optional(),
16740
+ title: zod.z.string().optional(),
16741
+ description: zod.z.string().optional(),
16742
+ generateFull: zod.z.boolean().optional(),
16743
+ serveMarkdownVariants: zod.z.boolean().optional()
16744
+ }).optional(),
16713
16745
  state: zod.z.object({
16714
16746
  dir: zod.z.string().optional()
16715
16747
  }).optional()
@@ -16748,6 +16780,7 @@ function createDefaultConfig(projectId) {
16748
16780
  dropSelectors: DEFAULT_DROP_SELECTORS,
16749
16781
  ignoreAttr: "data-search-ignore",
16750
16782
  noindexAttr: "data-search-noindex",
16783
+ imageDescAttr: "data-search-description",
16751
16784
  respectRobotsNoindex: true
16752
16785
  },
16753
16786
  transform: {
@@ -16757,39 +16790,52 @@ function createDefaultConfig(projectId) {
16757
16790
  },
16758
16791
  chunking: {
16759
16792
  strategy: "hybrid",
16760
- maxChars: 2200,
16793
+ maxChars: 1500,
16761
16794
  overlapChars: 200,
16762
16795
  minChars: 250,
16763
16796
  headingPathDepth: 3,
16764
16797
  dontSplitInside: ["code", "table", "blockquote"],
16765
16798
  prependTitle: true,
16766
- pageSummaryChunk: true
16799
+ pageSummaryChunk: true,
16800
+ weightHeadings: true
16767
16801
  },
16768
16802
  upstash: {
16769
- urlEnv: "UPSTASH_SEARCH_REST_URL",
16770
- tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
16803
+ urlEnv: "UPSTASH_VECTOR_REST_URL",
16804
+ tokenEnv: "UPSTASH_VECTOR_REST_TOKEN",
16805
+ namespaces: {
16806
+ pages: "pages",
16807
+ chunks: "chunks"
16808
+ }
16809
+ },
16810
+ embedding: {
16811
+ model: "bge-large-en-v1.5",
16812
+ dimensions: 1024,
16813
+ taskType: "RETRIEVAL_DOCUMENT",
16814
+ batchSize: 100
16771
16815
  },
16772
16816
  search: {
16773
- semanticWeight: 0.75,
16774
- inputEnrichment: true,
16775
- reranking: true,
16776
16817
  dualSearch: true,
16777
16818
  pageSearchWeight: 0.3
16778
16819
  },
16779
16820
  ranking: {
16780
16821
  enableIncomingLinkBoost: true,
16781
16822
  enableDepthBoost: true,
16823
+ enableFreshnessBoost: false,
16824
+ freshnessDecayRate: 1e-3,
16825
+ enableAnchorTextBoost: false,
16782
16826
  pageWeights: {},
16783
16827
  aggregationCap: 5,
16784
16828
  aggregationDecay: 0.5,
16785
16829
  minChunkScoreRatio: 0.5,
16786
- minScore: 0.3,
16830
+ minScoreRatio: 0.7,
16787
16831
  scoreGapThreshold: 0.4,
16788
16832
  weights: {
16789
16833
  incomingLinks: 0.05,
16790
16834
  depth: 0.03,
16791
16835
  aggregation: 0.1,
16792
- titleMatch: 0.15
16836
+ titleMatch: 0.15,
16837
+ freshness: 0.1,
16838
+ anchorText: 0.1
16793
16839
  }
16794
16840
  },
16795
16841
  api: {
@@ -16800,12 +16846,23 @@ function createDefaultConfig(projectId) {
16800
16846
  },
16801
16847
  mcp: {
16802
16848
  enable: process.env.NODE_ENV !== "production",
16849
+ access: "private",
16803
16850
  transport: "stdio",
16804
16851
  http: {
16805
16852
  port: 3338,
16806
16853
  path: "/mcp"
16854
+ },
16855
+ handle: {
16856
+ path: "/api/mcp",
16857
+ enableJsonResponse: true
16807
16858
  }
16808
16859
  },
16860
+ llmsTxt: {
16861
+ enable: false,
16862
+ outputPath: "static/llms.txt",
16863
+ generateFull: true,
16864
+ serveMarkdownVariants: false
16865
+ },
16809
16866
  state: {
16810
16867
  dir: ".searchsocket"
16811
16868
  }
@@ -16933,7 +16990,15 @@ ${issues}`
16933
16990
  },
16934
16991
  upstash: {
16935
16992
  ...defaults.upstash,
16936
- ...parsed.upstash
16993
+ ...parsed.upstash,
16994
+ namespaces: {
16995
+ ...defaults.upstash.namespaces,
16996
+ ...parsed.upstash?.namespaces
16997
+ }
16998
+ },
16999
+ embedding: {
17000
+ ...defaults.embedding,
17001
+ ...parsed.embedding
16937
17002
  },
16938
17003
  search: {
16939
17004
  ...defaults.search,
@@ -16970,8 +17035,16 @@ ${issues}`
16970
17035
  http: {
16971
17036
  ...defaults.mcp.http,
16972
17037
  ...parsed.mcp?.http
17038
+ },
17039
+ handle: {
17040
+ ...defaults.mcp.handle,
17041
+ ...parsed.mcp?.handle
16973
17042
  }
16974
17043
  },
17044
+ llmsTxt: {
17045
+ ...defaults.llmsTxt,
17046
+ ...parsed.llmsTxt
17047
+ },
16975
17048
  state: {
16976
17049
  ...defaults.state,
16977
17050
  ...parsed.state
@@ -16991,6 +17064,15 @@ ${issues}`
16991
17064
  maxDepth: 10
16992
17065
  };
16993
17066
  }
17067
+ if (merged.mcp.access === "public") {
17068
+ const resolvedKey = merged.mcp.http.apiKey ?? (merged.mcp.http.apiKeyEnv ? process.env[merged.mcp.http.apiKeyEnv] : void 0);
17069
+ if (!resolvedKey) {
17070
+ throw new SearchSocketError(
17071
+ "CONFIG_MISSING",
17072
+ '`mcp.access` is "public" but no API key is configured. Set `mcp.http.apiKey` or `mcp.http.apiKeyEnv`.'
17073
+ );
17074
+ }
17075
+ }
16994
17076
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
16995
17077
  throw new SearchSocketError("CONFIG_MISSING", "`source.crawl.baseUrl` is required when source.mode is crawl.");
16996
17078
  }
@@ -17054,13 +17136,84 @@ function normalizeMarkdown(input) {
17054
17136
  function sanitizeScopeName(scopeName) {
17055
17137
  return scopeName.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
17056
17138
  }
17139
+ function markdownToPlain(markdown) {
17140
+ return markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17141
+ }
17057
17142
  function toSnippet(markdown, maxLen = 220) {
17058
- const plain = markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17143
+ const plain = markdownToPlain(markdown);
17059
17144
  if (plain.length <= maxLen) {
17060
17145
  return plain;
17061
17146
  }
17062
17147
  return `${plain.slice(0, Math.max(0, maxLen - 1)).trim()}\u2026`;
17063
17148
  }
17149
+ function queryAwareExcerpt(markdown, query, maxLen = 220) {
17150
+ const plain = markdownToPlain(markdown);
17151
+ if (plain.length <= maxLen) return plain;
17152
+ const tokens = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
17153
+ if (tokens.length === 0) return toSnippet(markdown, maxLen);
17154
+ const positions = [];
17155
+ for (let ti = 0; ti < tokens.length; ti++) {
17156
+ const escaped = tokens[ti].replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
17157
+ const re = new RegExp(escaped, "gi");
17158
+ let m;
17159
+ while ((m = re.exec(plain)) !== null) {
17160
+ positions.push({ start: m.index, end: m.index + m[0].length, tokenIdx: ti });
17161
+ }
17162
+ }
17163
+ if (positions.length === 0) return toSnippet(markdown, maxLen);
17164
+ positions.sort((a, b) => a.start - b.start);
17165
+ let bestUniqueCount = 0;
17166
+ let bestTotalCount = 0;
17167
+ let bestLeft = 0;
17168
+ let bestRight = 0;
17169
+ let left = 0;
17170
+ const tokenCounts = /* @__PURE__ */ new Map();
17171
+ for (let right = 0; right < positions.length; right++) {
17172
+ tokenCounts.set(positions[right].tokenIdx, (tokenCounts.get(positions[right].tokenIdx) ?? 0) + 1);
17173
+ while (positions[right].end - positions[left].start > maxLen && left < right) {
17174
+ const leftToken = positions[left].tokenIdx;
17175
+ const cnt = tokenCounts.get(leftToken) - 1;
17176
+ if (cnt === 0) tokenCounts.delete(leftToken);
17177
+ else tokenCounts.set(leftToken, cnt);
17178
+ left++;
17179
+ }
17180
+ const uniqueCount = tokenCounts.size;
17181
+ const totalCount = right - left + 1;
17182
+ if (uniqueCount > bestUniqueCount || uniqueCount === bestUniqueCount && totalCount > bestTotalCount) {
17183
+ bestUniqueCount = uniqueCount;
17184
+ bestTotalCount = totalCount;
17185
+ bestLeft = left;
17186
+ bestRight = right;
17187
+ }
17188
+ }
17189
+ const mid = Math.floor((positions[bestLeft].start + positions[bestRight].end) / 2);
17190
+ let start = Math.max(0, mid - Math.floor(maxLen / 2));
17191
+ let end = Math.min(plain.length, start + maxLen);
17192
+ start = Math.max(0, end - maxLen);
17193
+ if (start > 0) {
17194
+ const spaceIdx = plain.lastIndexOf(" ", start);
17195
+ if (spaceIdx > start - 30) {
17196
+ start = spaceIdx + 1;
17197
+ }
17198
+ }
17199
+ if (end < plain.length) {
17200
+ const spaceIdx = plain.indexOf(" ", end);
17201
+ if (spaceIdx !== -1 && spaceIdx < end + 30) {
17202
+ end = spaceIdx;
17203
+ }
17204
+ }
17205
+ let excerpt = plain.slice(start, end);
17206
+ if (excerpt.length > Math.ceil(maxLen * 1.2)) {
17207
+ excerpt = excerpt.slice(0, maxLen);
17208
+ const lastSpace = excerpt.lastIndexOf(" ");
17209
+ if (lastSpace > maxLen * 0.5) {
17210
+ excerpt = excerpt.slice(0, lastSpace);
17211
+ }
17212
+ }
17213
+ const prefix = start > 0 ? "\u2026" : "";
17214
+ const suffix = end < plain.length ? "\u2026" : "";
17215
+ return `${prefix}${excerpt}${suffix}`;
17216
+ }
17064
17217
  function extractFirstParagraph(markdown) {
17065
17218
  const lines = markdown.split("\n");
17066
17219
  let inFence = false;
@@ -17121,162 +17274,288 @@ function ensureStateDirs(cwd, stateDir, scope) {
17121
17274
  fs__default.default.mkdirSync(statePath, { recursive: true });
17122
17275
  return { statePath };
17123
17276
  }
17124
-
17125
- // src/vector/upstash.ts
17126
- function chunkIndexName(scope) {
17127
- return `${scope.projectId}--${scope.scopeName}`;
17128
- }
17129
- function pageIndexName(scope) {
17130
- return `${scope.projectId}--${scope.scopeName}--pages`;
17131
- }
17132
17277
  var UpstashSearchStore = class {
17133
- client;
17278
+ index;
17279
+ pagesNs;
17280
+ chunksNs;
17134
17281
  constructor(opts) {
17135
- this.client = opts.client;
17136
- }
17137
- chunkIndex(scope) {
17138
- return this.client.index(chunkIndexName(scope));
17139
- }
17140
- pageIndex(scope) {
17141
- return this.client.index(pageIndexName(scope));
17282
+ this.index = opts.index;
17283
+ this.pagesNs = opts.index.namespace(opts.pagesNamespace);
17284
+ this.chunksNs = opts.index.namespace(opts.chunksNamespace);
17142
17285
  }
17143
17286
  async upsertChunks(chunks, scope) {
17144
17287
  if (chunks.length === 0) return;
17145
- const index = this.chunkIndex(scope);
17146
17288
  const BATCH_SIZE = 100;
17147
17289
  for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
17148
17290
  const batch = chunks.slice(i, i + BATCH_SIZE);
17149
- await index.upsert(batch);
17150
- }
17151
- }
17152
- async search(query, opts, scope) {
17153
- const index = this.chunkIndex(scope);
17154
- const results = await index.search({
17155
- query,
17156
- limit: opts.limit,
17157
- semanticWeight: opts.semanticWeight,
17158
- inputEnrichment: opts.inputEnrichment,
17159
- reranking: opts.reranking,
17160
- filter: opts.filter
17291
+ await this.chunksNs.upsert(
17292
+ batch.map((c) => ({
17293
+ id: c.id,
17294
+ data: c.data,
17295
+ metadata: {
17296
+ ...c.metadata,
17297
+ projectId: scope.projectId,
17298
+ scopeName: scope.scopeName,
17299
+ type: c.metadata.type || "chunk"
17300
+ }
17301
+ }))
17302
+ );
17303
+ }
17304
+ }
17305
+ async search(data, opts, scope) {
17306
+ const filterParts = [
17307
+ `projectId = '${scope.projectId}'`,
17308
+ `scopeName = '${scope.scopeName}'`
17309
+ ];
17310
+ if (opts.filter) {
17311
+ filterParts.push(opts.filter);
17312
+ }
17313
+ const results = await this.chunksNs.query({
17314
+ data,
17315
+ topK: opts.limit,
17316
+ includeMetadata: true,
17317
+ filter: filterParts.join(" AND "),
17318
+ queryMode: vector.QueryMode.HYBRID,
17319
+ fusionAlgorithm: vector.FusionAlgorithm.DBSF
17320
+ });
17321
+ return results.map((doc) => ({
17322
+ id: String(doc.id),
17323
+ score: doc.score,
17324
+ metadata: {
17325
+ projectId: doc.metadata?.projectId ?? "",
17326
+ scopeName: doc.metadata?.scopeName ?? "",
17327
+ url: doc.metadata?.url ?? "",
17328
+ path: doc.metadata?.path ?? "",
17329
+ title: doc.metadata?.title ?? "",
17330
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17331
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17332
+ snippet: doc.metadata?.snippet ?? "",
17333
+ chunkText: doc.metadata?.chunkText ?? "",
17334
+ ordinal: doc.metadata?.ordinal ?? 0,
17335
+ contentHash: doc.metadata?.contentHash ?? "",
17336
+ depth: doc.metadata?.depth ?? 0,
17337
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17338
+ routeFile: doc.metadata?.routeFile ?? "",
17339
+ tags: doc.metadata?.tags ?? [],
17340
+ description: doc.metadata?.description || void 0,
17341
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17342
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17343
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17344
+ }
17345
+ }));
17346
+ }
17347
+ async searchChunksByUrl(data, url, opts, scope) {
17348
+ const filterParts = [
17349
+ `projectId = '${scope.projectId}'`,
17350
+ `scopeName = '${scope.scopeName}'`,
17351
+ `url = '${url}'`
17352
+ ];
17353
+ if (opts.filter) {
17354
+ filterParts.push(opts.filter);
17355
+ }
17356
+ const results = await this.chunksNs.query({
17357
+ data,
17358
+ topK: opts.limit,
17359
+ includeMetadata: true,
17360
+ filter: filterParts.join(" AND "),
17361
+ queryMode: vector.QueryMode.HYBRID,
17362
+ fusionAlgorithm: vector.FusionAlgorithm.DBSF
17161
17363
  });
17162
17364
  return results.map((doc) => ({
17163
- id: doc.id,
17365
+ id: String(doc.id),
17164
17366
  score: doc.score,
17165
17367
  metadata: {
17166
17368
  projectId: doc.metadata?.projectId ?? "",
17167
17369
  scopeName: doc.metadata?.scopeName ?? "",
17168
- url: doc.content.url,
17370
+ url: doc.metadata?.url ?? "",
17169
17371
  path: doc.metadata?.path ?? "",
17170
- title: doc.content.title,
17171
- sectionTitle: doc.content.sectionTitle,
17172
- headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
17372
+ title: doc.metadata?.title ?? "",
17373
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17374
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17173
17375
  snippet: doc.metadata?.snippet ?? "",
17174
- chunkText: doc.content.text,
17376
+ chunkText: doc.metadata?.chunkText ?? "",
17175
17377
  ordinal: doc.metadata?.ordinal ?? 0,
17176
17378
  contentHash: doc.metadata?.contentHash ?? "",
17177
17379
  depth: doc.metadata?.depth ?? 0,
17178
17380
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17179
17381
  routeFile: doc.metadata?.routeFile ?? "",
17180
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17382
+ tags: doc.metadata?.tags ?? [],
17181
17383
  description: doc.metadata?.description || void 0,
17182
- keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
17384
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17385
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17386
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17183
17387
  }
17184
17388
  }));
17185
17389
  }
17186
- async searchPages(query, opts, scope) {
17187
- const index = this.pageIndex(scope);
17390
+ async searchPagesByText(data, opts, scope) {
17391
+ return this.queryPages({ data }, opts, scope);
17392
+ }
17393
+ async searchPagesByVector(vector, opts, scope) {
17394
+ return this.queryPages({ vector }, opts, scope);
17395
+ }
17396
+ async queryPages(input, opts, scope) {
17397
+ const filterParts = [
17398
+ `projectId = '${scope.projectId}'`,
17399
+ `scopeName = '${scope.scopeName}'`
17400
+ ];
17401
+ if (opts.filter) {
17402
+ filterParts.push(opts.filter);
17403
+ }
17188
17404
  let results;
17189
17405
  try {
17190
- results = await index.search({
17191
- query,
17192
- limit: opts.limit,
17193
- semanticWeight: opts.semanticWeight,
17194
- inputEnrichment: opts.inputEnrichment,
17195
- reranking: true,
17196
- filter: opts.filter
17406
+ results = await this.pagesNs.query({
17407
+ ...input,
17408
+ topK: opts.limit,
17409
+ includeMetadata: true,
17410
+ filter: filterParts.join(" AND "),
17411
+ queryMode: vector.QueryMode.HYBRID,
17412
+ fusionAlgorithm: vector.FusionAlgorithm.DBSF
17197
17413
  });
17198
17414
  } catch {
17199
17415
  return [];
17200
17416
  }
17201
17417
  return results.map((doc) => ({
17202
- id: doc.id,
17418
+ id: String(doc.id),
17203
17419
  score: doc.score,
17204
- title: doc.content.title,
17205
- url: doc.content.url,
17206
- description: doc.content.description ?? "",
17207
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17420
+ title: doc.metadata?.title ?? "",
17421
+ url: doc.metadata?.url ?? "",
17422
+ description: doc.metadata?.description ?? "",
17423
+ tags: doc.metadata?.tags ?? [],
17208
17424
  depth: doc.metadata?.depth ?? 0,
17209
17425
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17210
- routeFile: doc.metadata?.routeFile ?? ""
17426
+ routeFile: doc.metadata?.routeFile ?? "",
17427
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17211
17428
  }));
17212
17429
  }
17213
- async deleteByIds(ids, scope) {
17430
+ async deleteByIds(ids, _scope) {
17214
17431
  if (ids.length === 0) return;
17215
- const index = this.chunkIndex(scope);
17216
- const BATCH_SIZE = 500;
17432
+ const BATCH_SIZE = 100;
17217
17433
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17218
17434
  const batch = ids.slice(i, i + BATCH_SIZE);
17219
- await index.delete(batch);
17435
+ await this.chunksNs.delete(batch);
17220
17436
  }
17221
17437
  }
17222
17438
  async deleteScope(scope) {
17223
- try {
17224
- const chunkIdx = this.chunkIndex(scope);
17225
- await chunkIdx.deleteIndex();
17226
- } catch {
17227
- }
17228
- try {
17229
- const pageIdx = this.pageIndex(scope);
17230
- await pageIdx.deleteIndex();
17231
- } catch {
17439
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17440
+ const ids = [];
17441
+ let cursor = "0";
17442
+ try {
17443
+ for (; ; ) {
17444
+ const result = await ns.range({
17445
+ cursor,
17446
+ limit: 100,
17447
+ includeMetadata: true
17448
+ });
17449
+ for (const doc of result.vectors) {
17450
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17451
+ ids.push(String(doc.id));
17452
+ }
17453
+ }
17454
+ if (!result.nextCursor || result.nextCursor === "0") break;
17455
+ cursor = result.nextCursor;
17456
+ }
17457
+ } catch {
17458
+ }
17459
+ if (ids.length > 0) {
17460
+ const BATCH_SIZE = 100;
17461
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17462
+ const batch = ids.slice(i, i + BATCH_SIZE);
17463
+ await ns.delete(batch);
17464
+ }
17465
+ }
17232
17466
  }
17233
17467
  }
17234
17468
  async listScopes(projectId) {
17235
- const allIndexes = await this.client.listIndexes();
17236
- const prefix = `${projectId}--`;
17237
- const scopeNames = /* @__PURE__ */ new Set();
17238
- for (const name of allIndexes) {
17239
- if (name.startsWith(prefix) && !name.endsWith("--pages")) {
17240
- const scopeName = name.slice(prefix.length);
17241
- scopeNames.add(scopeName);
17242
- }
17243
- }
17244
- const scopes = [];
17245
- for (const scopeName of scopeNames) {
17246
- const scope = {
17247
- projectId,
17248
- scopeName,
17249
- scopeId: `${projectId}:${scopeName}`
17250
- };
17469
+ const scopeMap = /* @__PURE__ */ new Map();
17470
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17471
+ let cursor = "0";
17251
17472
  try {
17252
- const info = await this.chunkIndex(scope).info();
17253
- scopes.push({
17254
- projectId,
17255
- scopeName,
17256
- lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17257
- documentCount: info.documentCount
17258
- });
17473
+ for (; ; ) {
17474
+ const result = await ns.range({
17475
+ cursor,
17476
+ limit: 100,
17477
+ includeMetadata: true
17478
+ });
17479
+ for (const doc of result.vectors) {
17480
+ if (doc.metadata?.projectId === projectId) {
17481
+ const scopeName = doc.metadata.scopeName ?? "";
17482
+ scopeMap.set(scopeName, (scopeMap.get(scopeName) ?? 0) + 1);
17483
+ }
17484
+ }
17485
+ if (!result.nextCursor || result.nextCursor === "0") break;
17486
+ cursor = result.nextCursor;
17487
+ }
17259
17488
  } catch {
17260
- scopes.push({
17261
- projectId,
17262
- scopeName,
17263
- lastIndexedAt: "unknown",
17264
- documentCount: 0
17265
- });
17266
17489
  }
17267
17490
  }
17268
- return scopes;
17491
+ return [...scopeMap.entries()].map(([scopeName, count]) => ({
17492
+ projectId,
17493
+ scopeName,
17494
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17495
+ documentCount: count
17496
+ }));
17269
17497
  }
17270
17498
  async getContentHashes(scope) {
17271
17499
  const map = /* @__PURE__ */ new Map();
17272
- const index = this.chunkIndex(scope);
17273
17500
  let cursor = "0";
17274
17501
  try {
17275
17502
  for (; ; ) {
17276
- const result = await index.range({ cursor, limit: 100 });
17277
- for (const doc of result.documents) {
17278
- if (doc.metadata?.contentHash) {
17279
- map.set(doc.id, doc.metadata.contentHash);
17503
+ const result = await this.chunksNs.range({
17504
+ cursor,
17505
+ limit: 100,
17506
+ includeMetadata: true
17507
+ });
17508
+ for (const doc of result.vectors) {
17509
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17510
+ map.set(String(doc.id), doc.metadata.contentHash);
17511
+ }
17512
+ }
17513
+ if (!result.nextCursor || result.nextCursor === "0") break;
17514
+ cursor = result.nextCursor;
17515
+ }
17516
+ } catch {
17517
+ }
17518
+ return map;
17519
+ }
17520
+ async listPages(scope, opts) {
17521
+ const cursor = opts?.cursor ?? "0";
17522
+ const limit = opts?.limit ?? 50;
17523
+ try {
17524
+ const result = await this.pagesNs.range({
17525
+ cursor,
17526
+ limit,
17527
+ includeMetadata: true
17528
+ });
17529
+ const pages = result.vectors.filter(
17530
+ (doc) => doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && (!opts?.pathPrefix || (doc.metadata?.url ?? "").startsWith(opts.pathPrefix))
17531
+ ).map((doc) => ({
17532
+ url: doc.metadata?.url ?? "",
17533
+ title: doc.metadata?.title ?? "",
17534
+ description: doc.metadata?.description ?? "",
17535
+ routeFile: doc.metadata?.routeFile ?? ""
17536
+ }));
17537
+ const response = { pages };
17538
+ if (result.nextCursor && result.nextCursor !== "0") {
17539
+ response.nextCursor = result.nextCursor;
17540
+ }
17541
+ return response;
17542
+ } catch {
17543
+ return { pages: [] };
17544
+ }
17545
+ }
17546
+ async getPageHashes(scope) {
17547
+ const map = /* @__PURE__ */ new Map();
17548
+ let cursor = "0";
17549
+ try {
17550
+ for (; ; ) {
17551
+ const result = await this.pagesNs.range({
17552
+ cursor,
17553
+ limit: 100,
17554
+ includeMetadata: true
17555
+ });
17556
+ for (const doc of result.vectors) {
17557
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17558
+ map.set(String(doc.id), doc.metadata.contentHash);
17280
17559
  }
17281
17560
  }
17282
17561
  if (!result.nextCursor || result.nextCursor === "0") break;
@@ -17286,47 +17565,43 @@ var UpstashSearchStore = class {
17286
17565
  }
17287
17566
  return map;
17288
17567
  }
17568
+ async deletePagesByIds(ids, _scope) {
17569
+ if (ids.length === 0) return;
17570
+ const BATCH_SIZE = 50;
17571
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17572
+ const batch = ids.slice(i, i + BATCH_SIZE);
17573
+ await this.pagesNs.delete(batch);
17574
+ }
17575
+ }
17289
17576
  async upsertPages(pages, scope) {
17290
17577
  if (pages.length === 0) return;
17291
- const index = this.pageIndex(scope);
17292
17578
  const BATCH_SIZE = 50;
17293
17579
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
17294
17580
  const batch = pages.slice(i, i + BATCH_SIZE);
17295
- const docs = batch.map((p) => ({
17296
- id: p.url,
17297
- content: {
17298
- title: p.title,
17299
- url: p.url,
17300
- type: "page",
17301
- description: p.description ?? "",
17302
- keywords: (p.keywords ?? []).join(","),
17303
- summary: p.summary ?? "",
17304
- tags: p.tags.join(",")
17305
- },
17306
- metadata: {
17307
- markdown: p.markdown,
17308
- projectId: p.projectId,
17309
- scopeName: p.scopeName,
17310
- routeFile: p.routeFile,
17311
- routeResolution: p.routeResolution,
17312
- incomingLinks: p.incomingLinks,
17313
- outgoingLinks: p.outgoingLinks,
17314
- depth: p.depth,
17315
- indexedAt: p.indexedAt
17316
- }
17317
- }));
17318
- await index.upsert(docs);
17581
+ await this.pagesNs.upsert(
17582
+ batch.map((p) => ({
17583
+ id: p.id,
17584
+ data: p.data,
17585
+ metadata: {
17586
+ ...p.metadata,
17587
+ projectId: scope.projectId,
17588
+ scopeName: scope.scopeName,
17589
+ type: "page"
17590
+ }
17591
+ }))
17592
+ );
17319
17593
  }
17320
17594
  }
17321
17595
  async getPage(url, scope) {
17322
- const index = this.pageIndex(scope);
17323
17596
  try {
17324
- const results = await index.fetch([url]);
17597
+ const results = await this.pagesNs.fetch([url], {
17598
+ includeMetadata: true
17599
+ });
17325
17600
  const doc = results[0];
17326
- if (!doc) return null;
17601
+ if (!doc || !doc.metadata) return null;
17327
17602
  return {
17328
- url: doc.content.url,
17329
- title: doc.content.title,
17603
+ url: doc.metadata.url,
17604
+ title: doc.metadata.title,
17330
17605
  markdown: doc.metadata.markdown,
17331
17606
  projectId: doc.metadata.projectId,
17332
17607
  scopeName: doc.metadata.scopeName,
@@ -17334,27 +17609,86 @@ var UpstashSearchStore = class {
17334
17609
  routeResolution: doc.metadata.routeResolution,
17335
17610
  incomingLinks: doc.metadata.incomingLinks,
17336
17611
  outgoingLinks: doc.metadata.outgoingLinks,
17612
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? void 0,
17337
17613
  depth: doc.metadata.depth,
17338
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17614
+ tags: doc.metadata.tags ?? [],
17339
17615
  indexedAt: doc.metadata.indexedAt,
17340
- summary: doc.content.summary || void 0,
17341
- description: doc.content.description || void 0,
17342
- keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
17616
+ summary: doc.metadata.summary || void 0,
17617
+ description: doc.metadata.description || void 0,
17618
+ keywords: doc.metadata.keywords?.length ? doc.metadata.keywords : void 0,
17619
+ publishedAt: typeof doc.metadata.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17343
17620
  };
17344
17621
  } catch {
17345
17622
  return null;
17346
17623
  }
17347
17624
  }
17625
+ async fetchPageWithVector(url, scope) {
17626
+ try {
17627
+ const results = await this.pagesNs.fetch([url], {
17628
+ includeMetadata: true,
17629
+ includeVectors: true
17630
+ });
17631
+ const doc = results[0];
17632
+ if (!doc || !doc.metadata || !doc.vector) return null;
17633
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17634
+ return null;
17635
+ }
17636
+ return { metadata: doc.metadata, vector: doc.vector };
17637
+ } catch {
17638
+ return null;
17639
+ }
17640
+ }
17641
+ async fetchPagesBatch(urls, scope) {
17642
+ if (urls.length === 0) return [];
17643
+ try {
17644
+ const results = await this.pagesNs.fetch(urls, {
17645
+ includeMetadata: true
17646
+ });
17647
+ const out = [];
17648
+ for (const doc of results) {
17649
+ if (!doc || !doc.metadata) continue;
17650
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17651
+ continue;
17652
+ }
17653
+ out.push({
17654
+ url: doc.metadata.url,
17655
+ title: doc.metadata.title,
17656
+ routeFile: doc.metadata.routeFile,
17657
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? []
17658
+ });
17659
+ }
17660
+ return out;
17661
+ } catch {
17662
+ return [];
17663
+ }
17664
+ }
17348
17665
  async deletePages(scope) {
17666
+ const ids = [];
17667
+ let cursor = "0";
17349
17668
  try {
17350
- const index = this.pageIndex(scope);
17351
- await index.reset();
17669
+ for (; ; ) {
17670
+ const result = await this.pagesNs.range({
17671
+ cursor,
17672
+ limit: 100,
17673
+ includeMetadata: true
17674
+ });
17675
+ for (const doc of result.vectors) {
17676
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17677
+ ids.push(String(doc.id));
17678
+ }
17679
+ }
17680
+ if (!result.nextCursor || result.nextCursor === "0") break;
17681
+ cursor = result.nextCursor;
17682
+ }
17352
17683
  } catch {
17353
17684
  }
17685
+ if (ids.length > 0) {
17686
+ await this.deletePagesByIds(ids, scope);
17687
+ }
17354
17688
  }
17355
17689
  async health() {
17356
17690
  try {
17357
- await this.client.info();
17691
+ await this.index.info();
17358
17692
  return { ok: true };
17359
17693
  } catch (error) {
17360
17694
  return {
@@ -17364,14 +17698,31 @@ var UpstashSearchStore = class {
17364
17698
  }
17365
17699
  }
17366
17700
  async dropAllIndexes(projectId) {
17367
- const allIndexes = await this.client.listIndexes();
17368
- const prefix = `${projectId}--`;
17369
- for (const name of allIndexes) {
17370
- if (name.startsWith(prefix)) {
17371
- try {
17372
- const index = this.client.index(name);
17373
- await index.deleteIndex();
17374
- } catch {
17701
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17702
+ const ids = [];
17703
+ let cursor = "0";
17704
+ try {
17705
+ for (; ; ) {
17706
+ const result = await ns.range({
17707
+ cursor,
17708
+ limit: 100,
17709
+ includeMetadata: true
17710
+ });
17711
+ for (const doc of result.vectors) {
17712
+ if (doc.metadata?.projectId === projectId) {
17713
+ ids.push(String(doc.id));
17714
+ }
17715
+ }
17716
+ if (!result.nextCursor || result.nextCursor === "0") break;
17717
+ cursor = result.nextCursor;
17718
+ }
17719
+ } catch {
17720
+ }
17721
+ if (ids.length > 0) {
17722
+ const BATCH_SIZE = 100;
17723
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17724
+ const batch = ids.slice(i, i + BATCH_SIZE);
17725
+ await ns.delete(batch);
17375
17726
  }
17376
17727
  }
17377
17728
  }
@@ -17385,12 +17736,16 @@ async function createUpstashStore(config) {
17385
17736
  if (!url || !token) {
17386
17737
  throw new SearchSocketError(
17387
17738
  "VECTOR_BACKEND_UNAVAILABLE",
17388
- `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17739
+ `Missing Upstash Vector credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17389
17740
  );
17390
17741
  }
17391
- const { Search } = await import('@upstash/search');
17392
- const client = new Search({ url, token });
17393
- return new UpstashSearchStore({ client });
17742
+ const { Index } = await import('@upstash/vector');
17743
+ const index = new Index({ url, token });
17744
+ return new UpstashSearchStore({
17745
+ index,
17746
+ pagesNamespace: config.upstash.namespaces.pages,
17747
+ chunksNamespace: config.upstash.namespaces.chunks
17748
+ });
17394
17749
  }
17395
17750
  function sha1(input) {
17396
17751
  return crypto.createHash("sha1").update(input).digest("hex");
@@ -17458,6 +17813,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
17458
17813
  if (normalizeText(current.text)) {
17459
17814
  sections.push({
17460
17815
  sectionTitle: current.sectionTitle,
17816
+ headingLevel: current.headingLevel,
17461
17817
  headingPath: current.headingPath,
17462
17818
  text: current.text.trim()
17463
17819
  });
@@ -17476,6 +17832,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
17476
17832
  headingStack.length = level;
17477
17833
  current = {
17478
17834
  sectionTitle: title,
17835
+ headingLevel: level,
17479
17836
  headingPath: headingStack.filter((entry) => Boolean(entry)).slice(0, headingPathDepth),
17480
17837
  text: `${line}
17481
17838
  `
@@ -17611,6 +17968,7 @@ function splitSection(section, config) {
17611
17968
  return [
17612
17969
  {
17613
17970
  sectionTitle: section.sectionTitle,
17971
+ headingLevel: section.headingLevel,
17614
17972
  headingPath: section.headingPath,
17615
17973
  chunkText: text
17616
17974
  }
@@ -17661,6 +18019,7 @@ ${chunk}`;
17661
18019
  }
17662
18020
  return merged.map((chunkText) => ({
17663
18021
  sectionTitle: section.sectionTitle,
18022
+ headingLevel: section.headingLevel,
17664
18023
  headingPath: section.headingPath,
17665
18024
  chunkText
17666
18025
  }));
@@ -17676,6 +18035,18 @@ function buildSummaryChunkText(page) {
17676
18035
  }
17677
18036
  return parts.join("\n\n");
17678
18037
  }
18038
+ function buildEmbeddingTitle(chunk) {
18039
+ if (!chunk.sectionTitle || chunk.headingLevel === void 0) return void 0;
18040
+ if (chunk.headingPath.length > 1) {
18041
+ const path14 = chunk.headingPath.join(" > ");
18042
+ const lastInPath = chunk.headingPath[chunk.headingPath.length - 1];
18043
+ if (lastInPath !== chunk.sectionTitle) {
18044
+ return `${chunk.title} \u2014 ${path14} > ${chunk.sectionTitle}`;
18045
+ }
18046
+ return `${chunk.title} \u2014 ${path14}`;
18047
+ }
18048
+ return `${chunk.title} \u2014 ${chunk.sectionTitle}`;
18049
+ }
17679
18050
  function buildEmbeddingText(chunk, prependTitle) {
17680
18051
  if (!prependTitle) return chunk.chunkText;
17681
18052
  const prefix = chunk.sectionTitle ? `${chunk.title} \u2014 ${chunk.sectionTitle}` : chunk.title;
@@ -17706,10 +18077,14 @@ function chunkPage(page, config, scope) {
17706
18077
  tags: page.tags,
17707
18078
  contentHash: "",
17708
18079
  description: page.description,
17709
- keywords: page.keywords
18080
+ keywords: page.keywords,
18081
+ publishedAt: page.publishedAt,
18082
+ incomingAnchorText: page.incomingAnchorText,
18083
+ meta: page.meta
17710
18084
  };
17711
18085
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
17712
- summaryChunk.contentHash = sha256(normalizeText(embeddingText));
18086
+ const metaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
18087
+ summaryChunk.contentHash = sha256(normalizeText(embeddingText) + metaSuffix);
17713
18088
  chunks.push(summaryChunk);
17714
18089
  }
17715
18090
  const ordinalOffset = config.chunking.pageSummaryChunk ? 1 : 0;
@@ -17726,6 +18101,7 @@ function chunkPage(page, config, scope) {
17726
18101
  path: page.url,
17727
18102
  title: page.title,
17728
18103
  sectionTitle: entry.sectionTitle,
18104
+ headingLevel: entry.headingLevel,
17729
18105
  headingPath: entry.headingPath,
17730
18106
  chunkText: entry.chunkText,
17731
18107
  snippet: toSnippet(entry.chunkText),
@@ -17735,10 +18111,16 @@ function chunkPage(page, config, scope) {
17735
18111
  tags: page.tags,
17736
18112
  contentHash: "",
17737
18113
  description: page.description,
17738
- keywords: page.keywords
18114
+ keywords: page.keywords,
18115
+ publishedAt: page.publishedAt,
18116
+ incomingAnchorText: page.incomingAnchorText,
18117
+ meta: page.meta
17739
18118
  };
17740
18119
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
17741
- chunk.contentHash = sha256(normalizeText(embeddingText));
18120
+ const embeddingTitle = config.chunking.weightHeadings ? buildEmbeddingTitle(chunk) : void 0;
18121
+ const chunkMetaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
18122
+ const hashInput = embeddingTitle ? `${normalizeText(embeddingText)}|title:${embeddingTitle}` : normalizeText(embeddingText);
18123
+ chunk.contentHash = sha256(hashInput + chunkMetaSuffix);
17742
18124
  chunks.push(chunk);
17743
18125
  }
17744
18126
  return chunks;
@@ -18570,7 +18952,112 @@ function gfm(turndownService) {
18570
18952
  ]);
18571
18953
  }
18572
18954
 
18955
+ // src/utils/structured-meta.ts
18956
+ var VALID_KEY_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
18957
+ function validateMetaKey(key) {
18958
+ return VALID_KEY_RE.test(key);
18959
+ }
18960
+ function parseMetaValue(content, dataType) {
18961
+ switch (dataType) {
18962
+ case "number": {
18963
+ const n = Number(content);
18964
+ return Number.isFinite(n) ? n : content;
18965
+ }
18966
+ case "boolean":
18967
+ return content === "true";
18968
+ case "string[]":
18969
+ return content ? content.split(",").map((s) => s.trim()) : [];
18970
+ case "date": {
18971
+ const ms = Number(content);
18972
+ return Number.isFinite(ms) ? ms : content;
18973
+ }
18974
+ default:
18975
+ return content;
18976
+ }
18977
+ }
18978
+ function escapeFilterValue(s) {
18979
+ return s.replace(/'/g, "''");
18980
+ }
18981
+ function buildMetaFilterString(filters) {
18982
+ const clauses = [];
18983
+ for (const [key, value] of Object.entries(filters)) {
18984
+ if (!validateMetaKey(key)) continue;
18985
+ const field = `meta.${key}`;
18986
+ if (typeof value === "string") {
18987
+ clauses.push(`${field} CONTAINS '${escapeFilterValue(value)}'`);
18988
+ } else if (typeof value === "boolean") {
18989
+ clauses.push(`${field} = ${value}`);
18990
+ } else {
18991
+ clauses.push(`${field} = ${value}`);
18992
+ }
18993
+ }
18994
+ return clauses.join(" AND ");
18995
+ }
18996
+
18573
18997
  // src/indexing/extractor.ts
18998
+ function normalizeDateToMs(value) {
18999
+ if (value == null) return void 0;
19000
+ if (value instanceof Date) {
19001
+ const ts = value.getTime();
19002
+ return Number.isFinite(ts) ? ts : void 0;
19003
+ }
19004
+ if (typeof value === "string") {
19005
+ const ts = new Date(value).getTime();
19006
+ return Number.isFinite(ts) ? ts : void 0;
19007
+ }
19008
+ if (typeof value === "number") {
19009
+ return Number.isFinite(value) ? value : void 0;
19010
+ }
19011
+ return void 0;
19012
+ }
19013
+ var FRONTMATTER_DATE_FIELDS = ["date", "publishedAt", "updatedAt", "published_at", "updated_at"];
19014
+ function extractPublishedAtFromFrontmatter(data) {
19015
+ for (const field of FRONTMATTER_DATE_FIELDS) {
19016
+ const val = normalizeDateToMs(data[field]);
19017
+ if (val !== void 0) return val;
19018
+ }
19019
+ return void 0;
19020
+ }
19021
+ function extractPublishedAtFromHtml($) {
19022
+ const jsonLdScripts = $('script[type="application/ld+json"]');
19023
+ for (let i = 0; i < jsonLdScripts.length; i++) {
19024
+ try {
19025
+ const raw = $(jsonLdScripts[i]).html();
19026
+ if (!raw) continue;
19027
+ const parsed = JSON.parse(raw);
19028
+ const candidates = [];
19029
+ if (Array.isArray(parsed)) {
19030
+ candidates.push(...parsed);
19031
+ } else if (parsed && typeof parsed === "object") {
19032
+ candidates.push(parsed);
19033
+ if (Array.isArray(parsed["@graph"])) {
19034
+ candidates.push(...parsed["@graph"]);
19035
+ }
19036
+ }
19037
+ for (const candidate of candidates) {
19038
+ const val = normalizeDateToMs(candidate.datePublished);
19039
+ if (val !== void 0) return val;
19040
+ }
19041
+ } catch {
19042
+ }
19043
+ }
19044
+ const ogTime = $('meta[property="article:published_time"]').attr("content")?.trim();
19045
+ if (ogTime) {
19046
+ const val = normalizeDateToMs(ogTime);
19047
+ if (val !== void 0) return val;
19048
+ }
19049
+ const itempropDate = $('meta[itemprop="datePublished"]').attr("content")?.trim() || $('time[itemprop="datePublished"]').attr("datetime")?.trim();
19050
+ if (itempropDate) {
19051
+ const val = normalizeDateToMs(itempropDate);
19052
+ if (val !== void 0) return val;
19053
+ }
19054
+ const timeEl = $("time[datetime]").first().attr("datetime")?.trim();
19055
+ if (timeEl) {
19056
+ const val = normalizeDateToMs(timeEl);
19057
+ if (val !== void 0) return val;
19058
+ }
19059
+ return void 0;
19060
+ }
18574
19061
  function hasTopLevelNoindexComment(markdown) {
18575
19062
  const lines = markdown.split(/\r?\n/);
18576
19063
  let inFence = false;
@@ -18586,6 +19073,97 @@ function hasTopLevelNoindexComment(markdown) {
18586
19073
  }
18587
19074
  return false;
18588
19075
  }
19076
+ var GARBAGE_ALT_WORDS = /* @__PURE__ */ new Set([
19077
+ "image",
19078
+ "photo",
19079
+ "picture",
19080
+ "icon",
19081
+ "logo",
19082
+ "banner",
19083
+ "screenshot",
19084
+ "thumbnail",
19085
+ "img",
19086
+ "graphic",
19087
+ "illustration",
19088
+ "spacer",
19089
+ "pixel",
19090
+ "placeholder",
19091
+ "avatar",
19092
+ "background"
19093
+ ]);
19094
+ var IMAGE_EXT_RE = /\.(jpg|jpeg|png|gif|svg|webp|avif|bmp|ico)(\?.*)?$/i;
19095
+ function isMeaningfulAlt(alt) {
19096
+ const trimmed = alt.trim();
19097
+ if (!trimmed || trimmed.length < 5) return false;
19098
+ if (IMAGE_EXT_RE.test(trimmed)) return false;
19099
+ if (GARBAGE_ALT_WORDS.has(trimmed.toLowerCase())) return false;
19100
+ return true;
19101
+ }
19102
+ function resolveImageText(img, $, imageDescAttr) {
19103
+ const imgDesc = img.attr(imageDescAttr)?.trim();
19104
+ if (imgDesc) return imgDesc;
19105
+ const figure = img.closest("figure");
19106
+ if (figure.length) {
19107
+ const figDesc = figure.attr(imageDescAttr)?.trim();
19108
+ if (figDesc) return figDesc;
19109
+ }
19110
+ const alt = img.attr("alt")?.trim() ?? "";
19111
+ const caption = figure.length ? figure.find("figcaption").first().text().trim() : "";
19112
+ if (isMeaningfulAlt(alt) && caption) {
19113
+ return `${alt} \u2014 ${caption}`;
19114
+ }
19115
+ if (isMeaningfulAlt(alt)) {
19116
+ return alt;
19117
+ }
19118
+ if (caption) {
19119
+ return caption;
19120
+ }
19121
+ return null;
19122
+ }
19123
+ var STOP_ANCHORS = /* @__PURE__ */ new Set([
19124
+ "here",
19125
+ "click",
19126
+ "click here",
19127
+ "read more",
19128
+ "link",
19129
+ "this",
19130
+ "more"
19131
+ ]);
19132
+ function normalizeAnchorText(raw) {
19133
+ const normalized = raw.replace(/\s+/g, " ").trim().toLowerCase();
19134
+ if (normalized.length < 3) return "";
19135
+ if (STOP_ANCHORS.has(normalized)) return "";
19136
+ if (normalized.length > 100) return normalized.slice(0, 100);
19137
+ return normalized;
19138
+ }
19139
+ function escapeHtml(text) {
19140
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
19141
+ }
19142
+ function preprocessImages(root2, $, imageDescAttr) {
19143
+ root2.find("picture").each((_i, el) => {
19144
+ const picture = $(el);
19145
+ const img = picture.find("img").first();
19146
+ const parentFigure = picture.closest("figure");
19147
+ const text = img.length ? resolveImageText(img, $, imageDescAttr) : null;
19148
+ if (text) {
19149
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
19150
+ picture.replaceWith(`<span>${escapeHtml(text)}</span>`);
19151
+ } else {
19152
+ picture.remove();
19153
+ }
19154
+ });
19155
+ root2.find("img").each((_i, el) => {
19156
+ const img = $(el);
19157
+ const parentFigure = img.closest("figure");
19158
+ const text = resolveImageText(img, $, imageDescAttr);
19159
+ if (text) {
19160
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
19161
+ img.replaceWith(`<span>${escapeHtml(text)}</span>`);
19162
+ } else {
19163
+ img.remove();
19164
+ }
19165
+ });
19166
+ }
18589
19167
  function extractFromHtml(url, html, config) {
18590
19168
  const $ = cheerio.load(html);
18591
19169
  const normalizedUrl = normalizeUrlPath(url);
@@ -18611,6 +19189,20 @@ function extractFromHtml(url, html, config) {
18611
19189
  if (weight === 0) {
18612
19190
  return null;
18613
19191
  }
19192
+ if ($('meta[name="searchsocket:noindex"]').attr("content") === "true") {
19193
+ return null;
19194
+ }
19195
+ const RESERVED_META_KEYS = /* @__PURE__ */ new Set(["noindex", "tags"]);
19196
+ const meta = {};
19197
+ $('meta[name^="searchsocket:"]').each((_i, el) => {
19198
+ const name = $(el).attr("name") ?? "";
19199
+ const key = name.slice("searchsocket:".length);
19200
+ if (!key || RESERVED_META_KEYS.has(key) || !validateMetaKey(key)) return;
19201
+ const content = $(el).attr("content") ?? "";
19202
+ const dataType = $(el).attr("data-type") ?? "string";
19203
+ meta[key] = parseMetaValue(content, dataType);
19204
+ });
19205
+ const componentTags = $('meta[name="searchsocket:tags"]').attr("content")?.trim();
18614
19206
  const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
18615
19207
  const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
18616
19208
  const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
@@ -18622,7 +19214,9 @@ function extractFromHtml(url, html, config) {
18622
19214
  root2.find(selector).remove();
18623
19215
  }
18624
19216
  root2.find(`[${config.extract.ignoreAttr}]`).remove();
19217
+ preprocessImages(root2, $, config.extract.imageDescAttr);
18625
19218
  const outgoingLinks = [];
19219
+ const seenLinkKeys = /* @__PURE__ */ new Set();
18626
19220
  root2.find("a[href]").each((_index, node) => {
18627
19221
  const href = $(node).attr("href");
18628
19222
  if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:")) {
@@ -18633,7 +19227,19 @@ function extractFromHtml(url, html, config) {
18633
19227
  if (!["http:", "https:"].includes(parsed.protocol)) {
18634
19228
  return;
18635
19229
  }
18636
- outgoingLinks.push(normalizeUrlPath(parsed.pathname));
19230
+ const url2 = normalizeUrlPath(parsed.pathname);
19231
+ let anchorText = normalizeAnchorText($(node).text());
19232
+ if (!anchorText) {
19233
+ const imgAlt = $(node).find("img").first().attr("alt") ?? "";
19234
+ if (isMeaningfulAlt(imgAlt)) {
19235
+ anchorText = normalizeAnchorText(imgAlt);
19236
+ }
19237
+ }
19238
+ const key = `${url2}|${anchorText}`;
19239
+ if (!seenLinkKeys.has(key)) {
19240
+ seenLinkKeys.add(key);
19241
+ outgoingLinks.push({ url: url2, anchorText });
19242
+ }
18637
19243
  } catch {
18638
19244
  }
18639
19245
  });
@@ -18658,16 +19264,25 @@ function extractFromHtml(url, html, config) {
18658
19264
  return null;
18659
19265
  }
18660
19266
  const tags = normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1);
19267
+ const publishedAt = extractPublishedAtFromHtml($);
19268
+ if (componentTags) {
19269
+ const extraTags = componentTags.split(",").map((t) => t.trim()).filter(Boolean);
19270
+ for (const t of extraTags) {
19271
+ if (!tags.includes(t)) tags.push(t);
19272
+ }
19273
+ }
18661
19274
  return {
18662
19275
  url: normalizeUrlPath(url),
18663
19276
  title,
18664
19277
  markdown,
18665
- outgoingLinks: [...new Set(outgoingLinks)],
19278
+ outgoingLinks,
18666
19279
  noindex: false,
18667
19280
  tags,
18668
19281
  description,
18669
19282
  keywords,
18670
- weight
19283
+ weight,
19284
+ publishedAt,
19285
+ meta: Object.keys(meta).length > 0 ? meta : void 0
18671
19286
  };
18672
19287
  }
18673
19288
  function extractFromMarkdown(url, markdown, title) {
@@ -18688,6 +19303,24 @@ function extractFromMarkdown(url, markdown, title) {
18688
19303
  if (mdWeight === 0) {
18689
19304
  return null;
18690
19305
  }
19306
+ let mdMeta;
19307
+ const rawMeta = searchsocketMeta?.meta;
19308
+ if (rawMeta && typeof rawMeta === "object" && !Array.isArray(rawMeta)) {
19309
+ const metaObj = {};
19310
+ for (const [key, val] of Object.entries(rawMeta)) {
19311
+ if (!validateMetaKey(key)) continue;
19312
+ if (typeof val === "string" || typeof val === "number" || typeof val === "boolean") {
19313
+ metaObj[key] = val;
19314
+ } else if (Array.isArray(val) && val.every((v) => typeof v === "string")) {
19315
+ metaObj[key] = val;
19316
+ } else if (val instanceof Date) {
19317
+ metaObj[key] = val.getTime();
19318
+ }
19319
+ }
19320
+ if (Object.keys(metaObj).length > 0) {
19321
+ mdMeta = metaObj;
19322
+ }
19323
+ }
18691
19324
  const content = parsed.content;
18692
19325
  const normalized = normalizeMarkdown(content);
18693
19326
  if (!normalizeText(normalized)) {
@@ -18702,6 +19335,7 @@ function extractFromMarkdown(url, markdown, title) {
18702
19335
  fmKeywords = frontmatter.keywords.split(",").map((k) => k.trim()).filter(Boolean);
18703
19336
  }
18704
19337
  if (fmKeywords && fmKeywords.length === 0) fmKeywords = void 0;
19338
+ const publishedAt = extractPublishedAtFromFrontmatter(frontmatter);
18705
19339
  return {
18706
19340
  url: normalizeUrlPath(url),
18707
19341
  title: resolvedTitle,
@@ -18711,7 +19345,9 @@ function extractFromMarkdown(url, markdown, title) {
18711
19345
  tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
18712
19346
  description: fmDescription,
18713
19347
  keywords: fmKeywords,
18714
- weight: mdWeight
19348
+ weight: mdWeight,
19349
+ publishedAt,
19350
+ meta: mdMeta
18715
19351
  };
18716
19352
  }
18717
19353
  function segmentToRegex(segment) {
@@ -18906,7 +19542,7 @@ async function parseManifest(cwd, outputDir) {
18906
19542
  const manifestPath = path__default.default.resolve(cwd, outputDir, "server", "manifest-full.js");
18907
19543
  let content;
18908
19544
  try {
18909
- content = await fs3__default.default.readFile(manifestPath, "utf8");
19545
+ content = await fs8__default.default.readFile(manifestPath, "utf8");
18910
19546
  } catch {
18911
19547
  throw new SearchSocketError(
18912
19548
  "BUILD_MANIFEST_NOT_FOUND",
@@ -19217,6 +19853,125 @@ function filePathToUrl(filePath, baseDir) {
19217
19853
  const noExt = relative.replace(/\.md$/i, "").replace(/\/index$/i, "");
19218
19854
  return normalizeUrlPath(noExt || "/");
19219
19855
  }
19856
+ var ROUTE_FILE_RE = /\+(page|layout|error)(@[^.]+)?\.svelte$/;
19857
+ function isSvelteComponentFile(filePath) {
19858
+ if (!filePath.endsWith(".svelte")) return false;
19859
+ return !ROUTE_FILE_RE.test(filePath);
19860
+ }
19861
+ function extractSvelteComponentMeta(source) {
19862
+ const componentMatch = source.match(/<!--\s*@component\s*([\s\S]*?)\s*-->/);
19863
+ const description = componentMatch?.[1]?.trim() || void 0;
19864
+ const propsMatch = source.match(
19865
+ /let\s+\{([\s\S]*?)\}\s*(?::\s*([^=;{][\s\S]*?))?\s*=\s*\$props\(\)/
19866
+ );
19867
+ const props = [];
19868
+ if (propsMatch) {
19869
+ const destructureBlock = propsMatch[1];
19870
+ const typeAnnotation = propsMatch[2]?.trim();
19871
+ let resolvedTypeMap;
19872
+ if (typeAnnotation && /^[A-Z]\w*$/.test(typeAnnotation)) {
19873
+ resolvedTypeMap = resolveTypeReference(source, typeAnnotation);
19874
+ } else if (typeAnnotation && typeAnnotation.startsWith("{")) {
19875
+ resolvedTypeMap = parseInlineTypeAnnotation(typeAnnotation);
19876
+ }
19877
+ const propEntries = splitDestructureBlock(destructureBlock);
19878
+ for (const entry of propEntries) {
19879
+ const trimmed = entry.trim();
19880
+ if (!trimmed || trimmed.startsWith("...")) continue;
19881
+ let propName;
19882
+ let defaultValue;
19883
+ const renameMatch = trimmed.match(/^(\w+)\s*:\s*\w+\s*(?:=\s*([\s\S]+))?$/);
19884
+ if (renameMatch) {
19885
+ propName = renameMatch[1];
19886
+ defaultValue = renameMatch[2]?.trim();
19887
+ } else {
19888
+ const defaultMatch = trimmed.match(/^(\w+)\s*=\s*([\s\S]+)$/);
19889
+ if (defaultMatch) {
19890
+ propName = defaultMatch[1];
19891
+ defaultValue = defaultMatch[2]?.trim();
19892
+ } else {
19893
+ propName = trimmed.match(/^(\w+)/)?.[1] ?? trimmed;
19894
+ }
19895
+ }
19896
+ const propType = resolvedTypeMap?.get(propName);
19897
+ props.push({
19898
+ name: propName,
19899
+ ...propType ? { type: propType } : {},
19900
+ ...defaultValue ? { default: defaultValue } : {}
19901
+ });
19902
+ }
19903
+ }
19904
+ return { description, props };
19905
+ }
19906
+ function splitDestructureBlock(block) {
19907
+ const entries = [];
19908
+ let depth = 0;
19909
+ let current = "";
19910
+ for (const ch of block) {
19911
+ if (ch === "{" || ch === "[" || ch === "(") {
19912
+ depth++;
19913
+ current += ch;
19914
+ } else if (ch === "}" || ch === "]" || ch === ")") {
19915
+ depth--;
19916
+ current += ch;
19917
+ } else if (ch === "," && depth === 0) {
19918
+ entries.push(current);
19919
+ current = "";
19920
+ } else {
19921
+ current += ch;
19922
+ }
19923
+ }
19924
+ if (current.trim()) entries.push(current);
19925
+ return entries;
19926
+ }
19927
+ function resolveTypeReference(source, typeName) {
19928
+ const startRe = new RegExp(`(?:interface\\s+${typeName}\\s*|type\\s+${typeName}\\s*=\\s*)\\{`);
19929
+ const startMatch = source.match(startRe);
19930
+ if (!startMatch || startMatch.index === void 0) return void 0;
19931
+ const bodyStart = startMatch.index + startMatch[0].length;
19932
+ let depth = 1;
19933
+ let i = bodyStart;
19934
+ while (i < source.length && depth > 0) {
19935
+ if (source[i] === "{") depth++;
19936
+ else if (source[i] === "}") depth--;
19937
+ i++;
19938
+ }
19939
+ if (depth !== 0) return void 0;
19940
+ const body = source.slice(bodyStart, i - 1);
19941
+ return parseTypeMembers(body);
19942
+ }
19943
+ function parseInlineTypeAnnotation(annotation) {
19944
+ const inner = annotation.replace(/^\{/, "").replace(/\}$/, "");
19945
+ return parseTypeMembers(inner);
19946
+ }
19947
+ function parseTypeMembers(body) {
19948
+ const map = /* @__PURE__ */ new Map();
19949
+ const members = body.split(/[;\n]/).map((m) => m.trim()).filter(Boolean);
19950
+ for (const member of members) {
19951
+ const memberMatch = member.match(/^(\w+)\??\s*:\s*(.+)$/);
19952
+ if (memberMatch) {
19953
+ map.set(memberMatch[1], memberMatch[2].replace(/,\s*$/, "").trim());
19954
+ }
19955
+ }
19956
+ return map;
19957
+ }
19958
+ function buildComponentMarkdown(componentName, meta) {
19959
+ if (!meta.description && meta.props.length === 0) return "";
19960
+ const parts = [`${componentName} component.`];
19961
+ if (meta.description) {
19962
+ parts.push(meta.description);
19963
+ }
19964
+ if (meta.props.length > 0) {
19965
+ const propEntries = meta.props.map((p) => {
19966
+ let entry = p.name;
19967
+ if (p.type) entry += ` (${p.type})`;
19968
+ if (p.default) entry += ` default: ${p.default}`;
19969
+ return entry;
19970
+ });
19971
+ parts.push(`Props: ${propEntries.join(", ")}.`);
19972
+ }
19973
+ return parts.join(" ");
19974
+ }
19220
19975
  function normalizeSvelteToMarkdown(source) {
19221
19976
  return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
19222
19977
  }
@@ -19235,13 +19990,27 @@ async function loadContentFilesPages(cwd, config, maxPages) {
19235
19990
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
19236
19991
  const pages = [];
19237
19992
  for (const filePath of selected) {
19238
- const raw = await fs3__default.default.readFile(filePath, "utf8");
19239
- const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
19993
+ const raw = await fs8__default.default.readFile(filePath, "utf8");
19994
+ let markdown;
19995
+ let tags;
19996
+ if (filePath.endsWith(".md")) {
19997
+ markdown = raw;
19998
+ } else if (isSvelteComponentFile(filePath)) {
19999
+ const componentName = path__default.default.basename(filePath, ".svelte");
20000
+ const meta = extractSvelteComponentMeta(raw);
20001
+ const componentMarkdown = buildComponentMarkdown(componentName, meta);
20002
+ const templateContent = normalizeSvelteToMarkdown(raw);
20003
+ markdown = componentMarkdown ? [componentMarkdown, templateContent].filter(Boolean).join("\n\n") : templateContent;
20004
+ tags = ["component"];
20005
+ } else {
20006
+ markdown = normalizeSvelteToMarkdown(raw);
20007
+ }
19240
20008
  pages.push({
19241
20009
  url: filePathToUrl(filePath, baseDir),
19242
20010
  markdown,
19243
20011
  sourcePath: path__default.default.relative(cwd, filePath).replace(/\\/g, "/"),
19244
- outgoingLinks: []
20012
+ outgoingLinks: [],
20013
+ ...tags ? { tags } : {}
19245
20014
  });
19246
20015
  }
19247
20016
  return pages;
@@ -19371,7 +20140,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
19371
20140
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
19372
20141
  const pages = [];
19373
20142
  for (const filePath of selected) {
19374
- const html = await fs3__default.default.readFile(filePath, "utf8");
20143
+ const html = await fs8__default.default.readFile(filePath, "utf8");
19375
20144
  pages.push({
19376
20145
  url: staticHtmlFileToUrl(filePath, outputDir),
19377
20146
  html,
@@ -19434,7 +20203,7 @@ function isBlockedByRobots(urlPath, rules3) {
19434
20203
  }
19435
20204
  async function loadRobotsTxtFromDir(dir) {
19436
20205
  try {
19437
- const content = await fs3__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
20206
+ const content = await fs8__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
19438
20207
  return parseRobotsTxt(content);
19439
20208
  } catch {
19440
20209
  return null;
@@ -19462,29 +20231,65 @@ function nonNegativeOrZero(value) {
19462
20231
  function normalizeForTitleMatch(text) {
19463
20232
  return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
19464
20233
  }
19465
- function rankHits(hits, config, query) {
20234
+ function rankHits(hits, config, query, debug) {
19466
20235
  const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
19467
20236
  const titleMatchWeight = config.ranking.weights.titleMatch;
19468
20237
  return hits.map((hit) => {
19469
- let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20238
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20239
+ let score = baseScore;
20240
+ let incomingLinkBoostValue = 0;
19470
20241
  if (config.ranking.enableIncomingLinkBoost) {
19471
20242
  const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
19472
- score += incomingBoost * config.ranking.weights.incomingLinks;
20243
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
20244
+ score += incomingLinkBoostValue;
19473
20245
  }
20246
+ let depthBoostValue = 0;
19474
20247
  if (config.ranking.enableDepthBoost) {
19475
20248
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
19476
- score += depthBoost * config.ranking.weights.depth;
20249
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
20250
+ score += depthBoostValue;
19477
20251
  }
20252
+ let titleMatchBoostValue = 0;
19478
20253
  if (normalizedQuery && titleMatchWeight > 0) {
19479
20254
  const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
19480
20255
  if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
19481
- score += titleMatchWeight;
20256
+ titleMatchBoostValue = titleMatchWeight;
20257
+ score += titleMatchBoostValue;
19482
20258
  }
19483
20259
  }
19484
- return {
20260
+ let freshnessBoostValue = 0;
20261
+ if (config.ranking.enableFreshnessBoost) {
20262
+ const publishedAt = hit.metadata.publishedAt;
20263
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
20264
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
20265
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
20266
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
20267
+ score += freshnessBoostValue;
20268
+ }
20269
+ }
20270
+ let anchorTextMatchBoostValue = 0;
20271
+ if (config.ranking.enableAnchorTextBoost && normalizedQuery && config.ranking.weights.anchorText > 0) {
20272
+ const normalizedAnchorText = normalizeForTitleMatch(hit.metadata.incomingAnchorText ?? "");
20273
+ if (normalizedAnchorText.length > 0 && normalizedQuery.length > 0 && (normalizedAnchorText.includes(normalizedQuery) || normalizedQuery.includes(normalizedAnchorText))) {
20274
+ anchorTextMatchBoostValue = config.ranking.weights.anchorText;
20275
+ score += anchorTextMatchBoostValue;
20276
+ }
20277
+ }
20278
+ const result = {
19485
20279
  hit,
19486
20280
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
19487
20281
  };
20282
+ if (debug) {
20283
+ result.breakdown = {
20284
+ baseScore,
20285
+ incomingLinkBoost: incomingLinkBoostValue,
20286
+ depthBoost: depthBoostValue,
20287
+ titleMatchBoost: titleMatchBoostValue,
20288
+ freshnessBoost: freshnessBoostValue,
20289
+ anchorTextMatchBoost: anchorTextMatchBoostValue
20290
+ };
20291
+ }
20292
+ return result;
19488
20293
  }).sort((a, b) => {
19489
20294
  const delta = b.finalScore - a.finalScore;
19490
20295
  return Number.isNaN(delta) ? 0 : delta;
@@ -19493,12 +20298,13 @@ function rankHits(hits, config, query) {
19493
20298
  function trimByScoreGap(results, config) {
19494
20299
  if (results.length === 0) return results;
19495
20300
  const threshold = config.ranking.scoreGapThreshold;
19496
- const minScore = config.ranking.minScore;
19497
- if (minScore > 0 && results.length > 0) {
19498
- const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
19499
- const mid = Math.floor(sortedScores.length / 2);
19500
- const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
19501
- if (median < minScore) return [];
20301
+ const minScoreRatio = config.ranking.minScoreRatio;
20302
+ if (minScoreRatio > 0 && results.length > 0) {
20303
+ const topScore = results[0].pageScore;
20304
+ if (Number.isFinite(topScore) && topScore > 0) {
20305
+ const minThreshold = topScore * minScoreRatio;
20306
+ results = results.filter((r) => r.pageScore >= minThreshold);
20307
+ }
19502
20308
  }
19503
20309
  if (threshold > 0 && results.length > 1) {
19504
20310
  for (let i = 1; i < results.length; i++) {
@@ -19568,61 +20374,99 @@ function aggregateByPage(ranked, config) {
19568
20374
  return Number.isNaN(delta) ? 0 : delta;
19569
20375
  });
19570
20376
  }
19571
- function mergePageAndChunkResults(pageHits, rankedChunks, config) {
19572
- if (pageHits.length === 0) return rankedChunks;
19573
- const w = config.search.pageSearchWeight;
19574
- const pageScoreMap = /* @__PURE__ */ new Map();
19575
- for (const ph of pageHits) {
19576
- pageScoreMap.set(ph.url, ph);
19577
- }
19578
- const pagesWithChunks = /* @__PURE__ */ new Set();
19579
- const merged = rankedChunks.map((ranked) => {
19580
- const url = ranked.hit.metadata.url;
19581
- const pageHit = pageScoreMap.get(url);
19582
- if (pageHit) {
19583
- pagesWithChunks.add(url);
19584
- const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
19585
- return {
19586
- hit: ranked.hit,
19587
- finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
19588
- };
20377
+ function rankPageHits(pageHits, config, query, debug) {
20378
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
20379
+ const titleMatchWeight = config.ranking.weights.titleMatch;
20380
+ return pageHits.map((hit) => {
20381
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20382
+ let score = baseScore;
20383
+ let incomingLinkBoostValue = 0;
20384
+ if (config.ranking.enableIncomingLinkBoost) {
20385
+ const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.incomingLinks));
20386
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
20387
+ score += incomingLinkBoostValue;
19589
20388
  }
19590
- return ranked;
19591
- });
19592
- for (const [url, pageHit] of pageScoreMap) {
19593
- if (pagesWithChunks.has(url)) continue;
19594
- const syntheticScore = pageHit.score * w;
19595
- const syntheticHit = {
19596
- id: `page:${url}`,
19597
- score: pageHit.score,
19598
- metadata: {
19599
- projectId: "",
19600
- scopeName: "",
19601
- url: pageHit.url,
19602
- path: pageHit.url,
19603
- title: pageHit.title,
19604
- sectionTitle: "",
19605
- headingPath: [],
19606
- snippet: pageHit.description || pageHit.title,
19607
- chunkText: pageHit.description || pageHit.title,
19608
- ordinal: 0,
19609
- contentHash: "",
19610
- depth: pageHit.depth,
19611
- incomingLinks: pageHit.incomingLinks,
19612
- routeFile: pageHit.routeFile,
19613
- tags: pageHit.tags
20389
+ let depthBoostValue = 0;
20390
+ if (config.ranking.enableDepthBoost) {
20391
+ const depthBoost = 1 / (1 + nonNegativeOrZero(hit.depth));
20392
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
20393
+ score += depthBoostValue;
20394
+ }
20395
+ let titleMatchBoostValue = 0;
20396
+ if (normalizedQuery && titleMatchWeight > 0) {
20397
+ const normalizedTitle = normalizeForTitleMatch(hit.title);
20398
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
20399
+ titleMatchBoostValue = titleMatchWeight;
20400
+ score += titleMatchBoostValue;
19614
20401
  }
20402
+ }
20403
+ let freshnessBoostValue = 0;
20404
+ if (config.ranking.enableFreshnessBoost) {
20405
+ const publishedAt = hit.publishedAt;
20406
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
20407
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
20408
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
20409
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
20410
+ score += freshnessBoostValue;
20411
+ }
20412
+ }
20413
+ const pageWeight = findPageWeight(hit.url, config.ranking.pageWeights);
20414
+ if (pageWeight !== 1) {
20415
+ score *= pageWeight;
20416
+ }
20417
+ const result = {
20418
+ url: hit.url,
20419
+ title: hit.title,
20420
+ description: hit.description,
20421
+ routeFile: hit.routeFile,
20422
+ depth: hit.depth,
20423
+ incomingLinks: hit.incomingLinks,
20424
+ tags: hit.tags,
20425
+ baseScore,
20426
+ finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY,
20427
+ publishedAt: hit.publishedAt
19615
20428
  };
19616
- merged.push({
19617
- hit: syntheticHit,
19618
- finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
19619
- });
19620
- }
19621
- return merged.sort((a, b) => {
20429
+ if (debug) {
20430
+ result.breakdown = {
20431
+ baseScore,
20432
+ pageWeight,
20433
+ incomingLinkBoost: incomingLinkBoostValue,
20434
+ depthBoost: depthBoostValue,
20435
+ titleMatchBoost: titleMatchBoostValue,
20436
+ freshnessBoost: freshnessBoostValue
20437
+ };
20438
+ }
20439
+ return result;
20440
+ }).filter((p) => findPageWeight(p.url, config.ranking.pageWeights) !== 0).sort((a, b) => {
19622
20441
  const delta = b.finalScore - a.finalScore;
19623
20442
  return Number.isNaN(delta) ? 0 : delta;
19624
20443
  });
19625
20444
  }
20445
+ function trimPagesByScoreGap(results, config) {
20446
+ if (results.length === 0) return results;
20447
+ const threshold = config.ranking.scoreGapThreshold;
20448
+ const minScoreRatio = config.ranking.minScoreRatio;
20449
+ if (minScoreRatio > 0 && results.length > 0) {
20450
+ const topScore = results[0].finalScore;
20451
+ if (Number.isFinite(topScore) && topScore > 0) {
20452
+ const minThreshold = topScore * minScoreRatio;
20453
+ results = results.filter((r) => r.finalScore >= minThreshold);
20454
+ }
20455
+ }
20456
+ if (threshold > 0 && results.length > 1) {
20457
+ for (let i = 1; i < results.length; i++) {
20458
+ const prev = results[i - 1].finalScore;
20459
+ const current = results[i].finalScore;
20460
+ if (prev > 0) {
20461
+ const gap = (prev - current) / prev;
20462
+ if (gap >= threshold) {
20463
+ return results.slice(0, i);
20464
+ }
20465
+ }
20466
+ }
20467
+ }
20468
+ return results;
20469
+ }
19626
20470
 
19627
20471
  // src/utils/time.ts
19628
20472
  function nowIso() {
@@ -19631,6 +20475,81 @@ function nowIso() {
19631
20475
  function hrTimeMs(start) {
19632
20476
  return Number(process.hrtime.bigint() - start) / 1e6;
19633
20477
  }
20478
+ function resolvePageUrl(pageUrl, baseUrl) {
20479
+ if (!baseUrl) return pageUrl;
20480
+ try {
20481
+ return new URL(pageUrl, baseUrl).href;
20482
+ } catch {
20483
+ return pageUrl;
20484
+ }
20485
+ }
20486
+ function generateLlmsTxt(pages, config) {
20487
+ const title = config.llmsTxt.title ?? config.project.id;
20488
+ const description = config.llmsTxt.description;
20489
+ const baseUrl = config.project.baseUrl;
20490
+ const lines = [`# ${title}`];
20491
+ if (description) {
20492
+ lines.push("", `> ${description}`);
20493
+ }
20494
+ const filtered = pages.filter(
20495
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
20496
+ );
20497
+ const sorted = [...filtered].sort((a, b) => {
20498
+ if (a.depth !== b.depth) return a.depth - b.depth;
20499
+ return b.incomingLinks - a.incomingLinks;
20500
+ });
20501
+ if (sorted.length > 0) {
20502
+ lines.push("", "## Pages", "");
20503
+ for (const page of sorted) {
20504
+ const url = resolvePageUrl(page.url, baseUrl);
20505
+ if (page.description) {
20506
+ lines.push(`- [${page.title}](${url}): ${page.description}`);
20507
+ } else {
20508
+ lines.push(`- [${page.title}](${url})`);
20509
+ }
20510
+ }
20511
+ }
20512
+ lines.push("");
20513
+ return lines.join("\n");
20514
+ }
20515
+ function generateLlmsFullTxt(pages, config) {
20516
+ const title = config.llmsTxt.title ?? config.project.id;
20517
+ const description = config.llmsTxt.description;
20518
+ const baseUrl = config.project.baseUrl;
20519
+ const lines = [`# ${title}`];
20520
+ if (description) {
20521
+ lines.push("", `> ${description}`);
20522
+ }
20523
+ const filtered = pages.filter(
20524
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
20525
+ );
20526
+ const sorted = [...filtered].sort((a, b) => {
20527
+ if (a.depth !== b.depth) return a.depth - b.depth;
20528
+ return b.incomingLinks - a.incomingLinks;
20529
+ });
20530
+ for (const page of sorted) {
20531
+ const url = resolvePageUrl(page.url, baseUrl);
20532
+ lines.push("", "---", "", `## [${page.title}](${url})`, "");
20533
+ lines.push(page.markdown.trim());
20534
+ }
20535
+ lines.push("");
20536
+ return lines.join("\n");
20537
+ }
20538
+ async function writeLlmsTxt(pages, config, cwd, logger3) {
20539
+ const outputPath = path__default.default.resolve(cwd, config.llmsTxt.outputPath);
20540
+ const outputDir = path__default.default.dirname(outputPath);
20541
+ await fs8__default.default.mkdir(outputDir, { recursive: true });
20542
+ const content = generateLlmsTxt(pages, config);
20543
+ await fs8__default.default.writeFile(outputPath, content, "utf8");
20544
+ logger3.info(`Generated llms.txt at ${config.llmsTxt.outputPath}`);
20545
+ if (config.llmsTxt.generateFull) {
20546
+ const fullPath = outputPath.replace(/\.txt$/, "-full.txt");
20547
+ const fullContent = generateLlmsFullTxt(pages, config);
20548
+ await fs8__default.default.writeFile(fullPath, fullContent, "utf8");
20549
+ const relativeFull = path__default.default.relative(cwd, fullPath);
20550
+ logger3.info(`Generated llms-full.txt at ${relativeFull}`);
20551
+ }
20552
+ }
19634
20553
 
19635
20554
  // src/indexing/pipeline.ts
19636
20555
  function buildPageSummary(page, maxChars = 3500) {
@@ -19649,16 +20568,33 @@ function buildPageSummary(page, maxChars = 3500) {
19649
20568
  if (joined.length <= maxChars) return joined;
19650
20569
  return joined.slice(0, maxChars).trim();
19651
20570
  }
20571
+ function buildPageContentHash(page) {
20572
+ const parts = [
20573
+ page.title,
20574
+ page.description ?? "",
20575
+ (page.keywords ?? []).slice().sort().join(","),
20576
+ page.tags.slice().sort().join(","),
20577
+ page.markdown,
20578
+ String(page.outgoingLinks),
20579
+ String(page.publishedAt ?? ""),
20580
+ page.incomingAnchorText ?? "",
20581
+ (page.outgoingLinkUrls ?? []).slice().sort().join(","),
20582
+ page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : ""
20583
+ ];
20584
+ return sha256(parts.join("|"));
20585
+ }
19652
20586
  var IndexPipeline = class _IndexPipeline {
19653
20587
  cwd;
19654
20588
  config;
19655
20589
  store;
19656
20590
  logger;
20591
+ hooks;
19657
20592
  constructor(options) {
19658
20593
  this.cwd = options.cwd;
19659
20594
  this.config = options.config;
19660
20595
  this.store = options.store;
19661
20596
  this.logger = options.logger;
20597
+ this.hooks = options.hooks;
19662
20598
  }
19663
20599
  static async create(options = {}) {
19664
20600
  const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
@@ -19668,7 +20604,8 @@ var IndexPipeline = class _IndexPipeline {
19668
20604
  cwd,
19669
20605
  config,
19670
20606
  store,
19671
- logger: options.logger ?? new Logger()
20607
+ logger: options.logger ?? new Logger(),
20608
+ hooks: options.hooks ?? {}
19672
20609
  });
19673
20610
  }
19674
20611
  getConfig() {
@@ -19689,7 +20626,7 @@ var IndexPipeline = class _IndexPipeline {
19689
20626
  const scope = resolveScope(this.config, options.scopeOverride);
19690
20627
  ensureStateDirs(this.cwd, this.config.state.dir);
19691
20628
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
19692
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
20629
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-vector)`);
19693
20630
  if (options.force) {
19694
20631
  this.logger.info("Force mode enabled \u2014 full rebuild");
19695
20632
  }
@@ -19698,8 +20635,9 @@ var IndexPipeline = class _IndexPipeline {
19698
20635
  }
19699
20636
  const manifestStart = stageStart();
19700
20637
  const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
20638
+ const existingPageHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getPageHashes(scope);
19701
20639
  stageEnd("manifest", manifestStart);
19702
- this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
20640
+ this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes, ${existingPageHashes.size} existing page hashes loaded`);
19703
20641
  const sourceStart = stageStart();
19704
20642
  this.logger.info(`Loading pages (source: ${sourceMode})...`);
19705
20643
  let sourcePages;
@@ -19776,11 +20714,61 @@ var IndexPipeline = class _IndexPipeline {
19776
20714
  );
19777
20715
  continue;
19778
20716
  }
19779
- extractedPages.push(extracted);
20717
+ if (sourcePage.tags && sourcePage.tags.length > 0) {
20718
+ extracted.tags = [.../* @__PURE__ */ new Set([...extracted.tags, ...sourcePage.tags])];
20719
+ }
20720
+ let accepted;
20721
+ if (this.hooks.transformPage) {
20722
+ const transformed = await this.hooks.transformPage(extracted);
20723
+ if (transformed === null) {
20724
+ this.logger.debug(`Page ${sourcePage.url} skipped by transformPage hook`);
20725
+ continue;
20726
+ }
20727
+ accepted = transformed;
20728
+ } else {
20729
+ accepted = extracted;
20730
+ }
20731
+ extractedPages.push(accepted);
19780
20732
  this.logger.event("page_extracted", {
19781
- url: extracted.url
20733
+ url: accepted.url
19782
20734
  });
19783
20735
  }
20736
+ const customRecords = options.customRecords ?? [];
20737
+ if (customRecords.length > 0) {
20738
+ this.logger.info(`Processing ${customRecords.length} custom record${customRecords.length === 1 ? "" : "s"}...`);
20739
+ for (const record of customRecords) {
20740
+ const normalizedUrl = normalizeUrlPath(record.url);
20741
+ const normalized = normalizeMarkdown(record.content);
20742
+ if (!normalized.trim()) {
20743
+ this.logger.warn(`Custom record ${normalizedUrl} has empty content and was skipped.`);
20744
+ continue;
20745
+ }
20746
+ const urlTags = normalizedUrl.split("/").filter(Boolean).slice(0, 1);
20747
+ const tags = record.tags ? [.../* @__PURE__ */ new Set([...urlTags, ...record.tags])] : urlTags;
20748
+ const extracted = {
20749
+ url: normalizedUrl,
20750
+ title: record.title,
20751
+ markdown: normalized,
20752
+ outgoingLinks: [],
20753
+ noindex: false,
20754
+ tags,
20755
+ weight: record.weight
20756
+ };
20757
+ let accepted;
20758
+ if (this.hooks.transformPage) {
20759
+ const transformed = await this.hooks.transformPage(extracted);
20760
+ if (transformed === null) {
20761
+ this.logger.debug(`Custom record ${normalizedUrl} skipped by transformPage hook`);
20762
+ continue;
20763
+ }
20764
+ accepted = transformed;
20765
+ } else {
20766
+ accepted = extracted;
20767
+ }
20768
+ extractedPages.push(accepted);
20769
+ this.logger.event("page_extracted", { url: accepted.url, custom: true });
20770
+ }
20771
+ }
19784
20772
  extractedPages.sort((a, b) => a.url.localeCompare(b.url));
19785
20773
  const uniquePages = [];
19786
20774
  const seenUrls = /* @__PURE__ */ new Set();
@@ -19813,15 +20801,28 @@ var IndexPipeline = class _IndexPipeline {
19813
20801
  const linkStart = stageStart();
19814
20802
  const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
19815
20803
  const incomingLinkCount = /* @__PURE__ */ new Map();
20804
+ const incomingAnchorTexts = /* @__PURE__ */ new Map();
19816
20805
  for (const page of indexablePages) {
19817
20806
  incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
19818
20807
  }
19819
20808
  for (const page of indexablePages) {
19820
- for (const outgoing of page.outgoingLinks) {
20809
+ const seenForCount = /* @__PURE__ */ new Set();
20810
+ const seenForAnchor = /* @__PURE__ */ new Set();
20811
+ for (const { url: outgoing, anchorText } of page.outgoingLinks) {
19821
20812
  if (!pageSet.has(outgoing)) {
19822
20813
  continue;
19823
20814
  }
19824
- incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
20815
+ if (!seenForCount.has(outgoing)) {
20816
+ seenForCount.add(outgoing);
20817
+ incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
20818
+ }
20819
+ if (anchorText && !seenForAnchor.has(outgoing)) {
20820
+ seenForAnchor.add(outgoing);
20821
+ if (!incomingAnchorTexts.has(outgoing)) {
20822
+ incomingAnchorTexts.set(outgoing, /* @__PURE__ */ new Set());
20823
+ }
20824
+ incomingAnchorTexts.get(outgoing).add(anchorText);
20825
+ }
19825
20826
  }
19826
20827
  }
19827
20828
  stageEnd("links", linkStart);
@@ -19840,6 +20841,15 @@ var IndexPipeline = class _IndexPipeline {
19840
20841
  });
19841
20842
  }
19842
20843
  }
20844
+ for (const record of customRecords) {
20845
+ const normalizedUrl = normalizeUrlPath(record.url);
20846
+ if (!precomputedRoutes.has(normalizedUrl)) {
20847
+ precomputedRoutes.set(normalizedUrl, {
20848
+ routeFile: "",
20849
+ routeResolution: "exact"
20850
+ });
20851
+ }
20852
+ }
19843
20853
  for (const page of indexablePages) {
19844
20854
  const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
19845
20855
  if (routeMatch.routeResolution === "best-effort") {
@@ -19857,6 +20867,17 @@ var IndexPipeline = class _IndexPipeline {
19857
20867
  } else {
19858
20868
  routeExact += 1;
19859
20869
  }
20870
+ const anchorSet = incomingAnchorTexts.get(page.url);
20871
+ let incomingAnchorText;
20872
+ if (anchorSet && anchorSet.size > 0) {
20873
+ let joined = "";
20874
+ for (const phrase of anchorSet) {
20875
+ const next2 = joined ? `${joined} ${phrase}` : phrase;
20876
+ if (next2.length > 500) break;
20877
+ joined = next2;
20878
+ }
20879
+ incomingAnchorText = joined || void 0;
20880
+ }
19860
20881
  const indexedPage = {
19861
20882
  url: page.url,
19862
20883
  title: page.title,
@@ -19866,40 +20887,113 @@ var IndexPipeline = class _IndexPipeline {
19866
20887
  generatedAt: nowIso(),
19867
20888
  incomingLinks: incomingLinkCount.get(page.url) ?? 0,
19868
20889
  outgoingLinks: page.outgoingLinks.length,
20890
+ outgoingLinkUrls: page.outgoingLinks.map((l) => typeof l === "string" ? l : l.url),
19869
20891
  depth: getUrlDepth(page.url),
19870
20892
  tags: page.tags,
19871
20893
  markdown: page.markdown,
19872
20894
  description: page.description,
19873
- keywords: page.keywords
20895
+ keywords: page.keywords,
20896
+ publishedAt: page.publishedAt,
20897
+ incomingAnchorText,
20898
+ meta: page.meta
19874
20899
  };
19875
20900
  pages.push(indexedPage);
19876
20901
  this.logger.event("page_indexed", { url: page.url });
19877
20902
  }
20903
+ const pageRecords = pages.map((p) => {
20904
+ const summary = buildPageSummary(p);
20905
+ return {
20906
+ url: p.url,
20907
+ title: p.title,
20908
+ markdown: p.markdown,
20909
+ projectId: scope.projectId,
20910
+ scopeName: scope.scopeName,
20911
+ routeFile: p.routeFile,
20912
+ routeResolution: p.routeResolution,
20913
+ incomingLinks: p.incomingLinks,
20914
+ outgoingLinks: p.outgoingLinks,
20915
+ outgoingLinkUrls: p.outgoingLinkUrls,
20916
+ depth: p.depth,
20917
+ tags: p.tags,
20918
+ indexedAt: p.generatedAt,
20919
+ summary,
20920
+ description: p.description,
20921
+ keywords: p.keywords,
20922
+ contentHash: buildPageContentHash(p),
20923
+ publishedAt: p.publishedAt,
20924
+ meta: p.meta
20925
+ };
20926
+ });
20927
+ const currentPageUrls = new Set(pageRecords.map((r) => r.url));
20928
+ const changedPages = pageRecords.filter(
20929
+ (r) => !existingPageHashes.has(r.url) || existingPageHashes.get(r.url) !== r.contentHash
20930
+ );
20931
+ const deletedPageUrls = [...existingPageHashes.keys()].filter((url) => !currentPageUrls.has(url));
19878
20932
  if (!options.dryRun) {
19879
- const pageRecords = pages.map((p) => {
19880
- const summary = buildPageSummary(p);
19881
- return {
19882
- url: p.url,
19883
- title: p.title,
19884
- markdown: p.markdown,
19885
- projectId: scope.projectId,
19886
- scopeName: scope.scopeName,
19887
- routeFile: p.routeFile,
19888
- routeResolution: p.routeResolution,
19889
- incomingLinks: p.incomingLinks,
19890
- outgoingLinks: p.outgoingLinks,
19891
- depth: p.depth,
19892
- tags: p.tags,
19893
- indexedAt: p.generatedAt,
19894
- summary,
19895
- description: p.description,
19896
- keywords: p.keywords
19897
- };
19898
- });
19899
- await this.store.deletePages(scope);
19900
- await this.store.upsertPages(pageRecords, scope);
20933
+ if (options.force) {
20934
+ await this.store.deletePages(scope);
20935
+ this.logger.info(`Upserting ${pageRecords.length} page summaries...`);
20936
+ const pageDocs = pageRecords.map((r) => ({
20937
+ id: r.url,
20938
+ data: r.summary ?? r.title,
20939
+ metadata: {
20940
+ title: r.title,
20941
+ url: r.url,
20942
+ description: r.description ?? "",
20943
+ keywords: r.keywords ?? [],
20944
+ summary: r.summary ?? "",
20945
+ tags: r.tags,
20946
+ markdown: r.markdown,
20947
+ routeFile: r.routeFile,
20948
+ routeResolution: r.routeResolution,
20949
+ incomingLinks: r.incomingLinks,
20950
+ outgoingLinks: r.outgoingLinks,
20951
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
20952
+ depth: r.depth,
20953
+ indexedAt: r.indexedAt,
20954
+ contentHash: r.contentHash ?? "",
20955
+ publishedAt: r.publishedAt ?? null,
20956
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
20957
+ }
20958
+ }));
20959
+ await this.store.upsertPages(pageDocs, scope);
20960
+ } else {
20961
+ if (changedPages.length > 0) {
20962
+ this.logger.info(`Upserting ${changedPages.length} changed page summaries...`);
20963
+ const pageDocs = changedPages.map((r) => ({
20964
+ id: r.url,
20965
+ data: r.summary ?? r.title,
20966
+ metadata: {
20967
+ title: r.title,
20968
+ url: r.url,
20969
+ description: r.description ?? "",
20970
+ keywords: r.keywords ?? [],
20971
+ summary: r.summary ?? "",
20972
+ tags: r.tags,
20973
+ markdown: r.markdown,
20974
+ routeFile: r.routeFile,
20975
+ routeResolution: r.routeResolution,
20976
+ incomingLinks: r.incomingLinks,
20977
+ outgoingLinks: r.outgoingLinks,
20978
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
20979
+ depth: r.depth,
20980
+ indexedAt: r.indexedAt,
20981
+ contentHash: r.contentHash ?? "",
20982
+ publishedAt: r.publishedAt ?? null,
20983
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
20984
+ }
20985
+ }));
20986
+ await this.store.upsertPages(pageDocs, scope);
20987
+ }
20988
+ if (deletedPageUrls.length > 0) {
20989
+ await this.store.deletePagesByIds(deletedPageUrls, scope);
20990
+ }
20991
+ }
19901
20992
  }
20993
+ const pagesChanged = options.force ? pageRecords.length : changedPages.length;
20994
+ const pagesDeleted = deletedPageUrls.length;
19902
20995
  stageEnd("pages", pagesStart);
20996
+ this.logger.info(`Page changes: ${pagesChanged} changed/new, ${pagesDeleted} deleted, ${pageRecords.length - changedPages.length} unchanged`);
19903
20997
  this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
19904
20998
  const chunkStart = stageStart();
19905
20999
  this.logger.info("Chunking pages...");
@@ -19908,6 +21002,18 @@ var IndexPipeline = class _IndexPipeline {
19908
21002
  if (typeof maxChunks === "number") {
19909
21003
  chunks = chunks.slice(0, maxChunks);
19910
21004
  }
21005
+ if (this.hooks.transformChunk) {
21006
+ const transformed = [];
21007
+ for (const chunk of chunks) {
21008
+ const result = await this.hooks.transformChunk(chunk);
21009
+ if (result === null) {
21010
+ this.logger.debug(`Chunk ${chunk.chunkKey} skipped by transformChunk hook`);
21011
+ continue;
21012
+ }
21013
+ transformed.push(result);
21014
+ }
21015
+ chunks = transformed;
21016
+ }
19911
21017
  for (const chunk of chunks) {
19912
21018
  this.logger.event("chunked", {
19913
21019
  url: chunk.url,
@@ -19920,7 +21026,7 @@ var IndexPipeline = class _IndexPipeline {
19920
21026
  for (const chunk of chunks) {
19921
21027
  currentChunkMap.set(chunk.chunkKey, chunk);
19922
21028
  }
19923
- const changedChunks = chunks.filter((chunk) => {
21029
+ let changedChunks = chunks.filter((chunk) => {
19924
21030
  if (options.force) {
19925
21031
  return true;
19926
21032
  }
@@ -19934,36 +21040,43 @@ var IndexPipeline = class _IndexPipeline {
19934
21040
  return existingHash !== chunk.contentHash;
19935
21041
  });
19936
21042
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
21043
+ if (this.hooks.beforeIndex) {
21044
+ changedChunks = await this.hooks.beforeIndex(changedChunks);
21045
+ }
19937
21046
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
19938
21047
  const upsertStart = stageStart();
19939
21048
  let documentsUpserted = 0;
19940
21049
  if (!options.dryRun && changedChunks.length > 0) {
19941
- this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
19942
- const UPSTASH_CONTENT_LIMIT = 4096;
21050
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
19943
21051
  const docs = changedChunks.map((chunk) => {
19944
- const title = chunk.title;
19945
- const sectionTitle = chunk.sectionTitle ?? "";
19946
- const url = chunk.url;
19947
- const tags = chunk.tags.join(",");
19948
- const headingPath = chunk.headingPath.join(" > ");
19949
- const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
19950
- const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
19951
- const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
21052
+ const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
21053
+ if (embeddingText.length > 2e3) {
21054
+ this.logger.warn(
21055
+ `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
21056
+ );
21057
+ }
19952
21058
  return {
19953
21059
  id: chunk.chunkKey,
19954
- content: { title, sectionTitle, text, url, tags, headingPath },
21060
+ data: embeddingText,
19955
21061
  metadata: {
19956
- projectId: scope.projectId,
19957
- scopeName: scope.scopeName,
21062
+ url: chunk.url,
19958
21063
  path: chunk.path,
21064
+ title: chunk.title,
21065
+ sectionTitle: chunk.sectionTitle ?? "",
21066
+ headingPath: chunk.headingPath.join(" > "),
19959
21067
  snippet: chunk.snippet,
21068
+ chunkText: embeddingText,
21069
+ tags: chunk.tags,
19960
21070
  ordinal: chunk.ordinal,
19961
21071
  contentHash: chunk.contentHash,
19962
21072
  depth: chunk.depth,
19963
21073
  incomingLinks: chunk.incomingLinks,
19964
21074
  routeFile: chunk.routeFile,
19965
21075
  description: chunk.description ?? "",
19966
- keywords: (chunk.keywords ?? []).join(",")
21076
+ keywords: chunk.keywords ?? [],
21077
+ publishedAt: chunk.publishedAt ?? null,
21078
+ incomingAnchorText: chunk.incomingAnchorText ?? "",
21079
+ ...chunk.meta && Object.keys(chunk.meta).length > 0 ? { meta: chunk.meta } : {}
19967
21080
  }
19968
21081
  };
19969
21082
  });
@@ -19981,9 +21094,16 @@ var IndexPipeline = class _IndexPipeline {
19981
21094
  } else {
19982
21095
  this.logger.info("No chunks to upsert \u2014 all up to date");
19983
21096
  }
21097
+ if (this.config.llmsTxt.enable && !options.dryRun) {
21098
+ const llmsStart = stageStart();
21099
+ await writeLlmsTxt(pages, this.config, this.cwd, this.logger);
21100
+ stageEnd("llms_txt", llmsStart);
21101
+ }
19984
21102
  this.logger.info("Done.");
19985
- return {
21103
+ const stats = {
19986
21104
  pagesProcessed: pages.length,
21105
+ pagesChanged,
21106
+ pagesDeleted,
19987
21107
  chunksTotal: chunks.length,
19988
21108
  chunksChanged: changedChunks.length,
19989
21109
  documentsUpserted,
@@ -19992,16 +21112,143 @@ var IndexPipeline = class _IndexPipeline {
19992
21112
  routeBestEffort,
19993
21113
  stageTimingsMs
19994
21114
  };
21115
+ if (this.hooks.afterIndex) {
21116
+ await this.hooks.afterIndex(stats);
21117
+ }
21118
+ return stats;
19995
21119
  }
19996
21120
  };
21121
+
21122
+ // src/search/related-pages.ts
21123
+ function diceScore(urlA, urlB) {
21124
+ const segmentsA = urlA.split("/").filter(Boolean);
21125
+ const segmentsB = urlB.split("/").filter(Boolean);
21126
+ if (segmentsA.length === 0 && segmentsB.length === 0) return 1;
21127
+ if (segmentsA.length === 0 || segmentsB.length === 0) return 0;
21128
+ let shared = 0;
21129
+ const minLen = Math.min(segmentsA.length, segmentsB.length);
21130
+ for (let i = 0; i < minLen; i++) {
21131
+ if (segmentsA[i] === segmentsB[i]) {
21132
+ shared++;
21133
+ } else {
21134
+ break;
21135
+ }
21136
+ }
21137
+ return 2 * shared / (segmentsA.length + segmentsB.length);
21138
+ }
21139
+ function compositeScore(isLinked, dice, semantic) {
21140
+ return (isLinked ? 0.5 : 0) + 0.3 * dice + 0.2 * semantic;
21141
+ }
21142
+ function dominantRelationshipType(isOutgoing, isIncoming, dice) {
21143
+ if (isOutgoing) return "outgoing_link";
21144
+ if (isIncoming) return "incoming_link";
21145
+ if (dice > 0.4) return "sibling";
21146
+ return "semantic";
21147
+ }
21148
+
21149
+ // src/search/engine.ts
21150
+ var rankingOverridesSchema = zod.z.object({
21151
+ ranking: zod.z.object({
21152
+ enableIncomingLinkBoost: zod.z.boolean().optional(),
21153
+ enableDepthBoost: zod.z.boolean().optional(),
21154
+ aggregationCap: zod.z.number().int().positive().optional(),
21155
+ aggregationDecay: zod.z.number().min(0).max(1).optional(),
21156
+ minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
21157
+ minScoreRatio: zod.z.number().min(0).max(1).optional(),
21158
+ scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
21159
+ weights: zod.z.object({
21160
+ incomingLinks: zod.z.number().optional(),
21161
+ depth: zod.z.number().optional(),
21162
+ aggregation: zod.z.number().optional(),
21163
+ titleMatch: zod.z.number().optional()
21164
+ }).optional()
21165
+ }).optional(),
21166
+ search: zod.z.object({
21167
+ pageSearchWeight: zod.z.number().min(0).max(1).optional()
21168
+ }).optional()
21169
+ }).optional();
19997
21170
  var requestSchema = zod.z.object({
19998
21171
  q: zod.z.string().trim().min(1),
19999
21172
  topK: zod.z.number().int().positive().max(100).optional(),
20000
21173
  scope: zod.z.string().optional(),
20001
21174
  pathPrefix: zod.z.string().optional(),
20002
21175
  tags: zod.z.array(zod.z.string()).optional(),
20003
- groupBy: zod.z.enum(["page", "chunk"]).optional()
21176
+ filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional(),
21177
+ groupBy: zod.z.enum(["page", "chunk"]).optional(),
21178
+ maxSubResults: zod.z.number().int().positive().max(20).optional(),
21179
+ debug: zod.z.boolean().optional(),
21180
+ rankingOverrides: rankingOverridesSchema
20004
21181
  });
21182
+ var MAX_SITE_STRUCTURE_PAGES = 2e3;
21183
+ function makeNode(url, depth) {
21184
+ return { url, title: "", depth, routeFile: "", isIndexed: false, childCount: 0, children: [] };
21185
+ }
21186
+ function buildTree(pages, pathPrefix) {
21187
+ const nodeMap = /* @__PURE__ */ new Map();
21188
+ const root2 = makeNode("/", 0);
21189
+ nodeMap.set("/", root2);
21190
+ for (const page of pages) {
21191
+ const normalized = normalizeUrlPath(page.url);
21192
+ const segments = normalized.split("/").filter(Boolean);
21193
+ if (segments.length === 0) {
21194
+ root2.title = page.title;
21195
+ root2.routeFile = page.routeFile;
21196
+ root2.isIndexed = true;
21197
+ continue;
21198
+ }
21199
+ for (let i = 1; i <= segments.length; i++) {
21200
+ const partialUrl = "/" + segments.slice(0, i).join("/");
21201
+ if (!nodeMap.has(partialUrl)) {
21202
+ nodeMap.set(partialUrl, makeNode(partialUrl, i));
21203
+ }
21204
+ }
21205
+ const node = nodeMap.get(normalized);
21206
+ node.title = page.title;
21207
+ node.routeFile = page.routeFile;
21208
+ node.isIndexed = true;
21209
+ }
21210
+ for (const [url, node] of nodeMap) {
21211
+ if (url === "/") continue;
21212
+ const segments = url.split("/").filter(Boolean);
21213
+ const parentUrl = segments.length === 1 ? "/" : "/" + segments.slice(0, -1).join("/");
21214
+ const parent = nodeMap.get(parentUrl) ?? root2;
21215
+ parent.children.push(node);
21216
+ }
21217
+ const sortAndCount = (node) => {
21218
+ node.children.sort((a, b) => a.url.localeCompare(b.url));
21219
+ node.childCount = node.children.length;
21220
+ for (const child of node.children) {
21221
+ sortAndCount(child);
21222
+ }
21223
+ };
21224
+ sortAndCount(root2);
21225
+ if (pathPrefix) {
21226
+ const normalizedPrefix = normalizeUrlPath(pathPrefix);
21227
+ const subtreeRoot = nodeMap.get(normalizedPrefix);
21228
+ if (subtreeRoot) {
21229
+ return subtreeRoot;
21230
+ }
21231
+ return makeNode(normalizedPrefix, normalizedPrefix.split("/").filter(Boolean).length);
21232
+ }
21233
+ return root2;
21234
+ }
21235
+ function mergeRankingOverrides(base, overrides) {
21236
+ return {
21237
+ ...base,
21238
+ search: {
21239
+ ...base.search,
21240
+ ...overrides.search
21241
+ },
21242
+ ranking: {
21243
+ ...base.ranking,
21244
+ ...overrides.ranking,
21245
+ weights: {
21246
+ ...base.ranking.weights,
21247
+ ...overrides.ranking?.weights
21248
+ }
21249
+ }
21250
+ };
21251
+ }
20005
21252
  var SearchEngine = class _SearchEngine {
20006
21253
  cwd;
20007
21254
  config;
@@ -20031,125 +21278,203 @@ var SearchEngine = class _SearchEngine {
20031
21278
  }
20032
21279
  const input = parsed.data;
20033
21280
  const totalStart = process.hrtime.bigint();
21281
+ const effectiveConfig = input.debug && input.rankingOverrides ? mergeRankingOverrides(this.config, input.rankingOverrides) : this.config;
20034
21282
  const resolvedScope = resolveScope(this.config, input.scope);
20035
21283
  const topK = input.topK ?? 10;
21284
+ const maxSubResults = input.maxSubResults ?? 5;
20036
21285
  const groupByPage = (input.groupBy ?? "page") === "page";
20037
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20038
- const filterParts = [];
20039
- if (input.pathPrefix) {
20040
- const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
20041
- filterParts.push(`url GLOB '${prefix}*'`);
20042
- }
20043
- if (input.tags && input.tags.length > 0) {
20044
- for (const tag of input.tags) {
20045
- filterParts.push(`tags GLOB '*${tag}*'`);
21286
+ const queryText = input.q;
21287
+ const pathPrefix = input.pathPrefix ? input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}` : void 0;
21288
+ const filterTags = input.tags && input.tags.length > 0 ? input.tags : void 0;
21289
+ const metaFilterStr = input.filters && Object.keys(input.filters).length > 0 ? buildMetaFilterString(input.filters) : "";
21290
+ const metaFilter = metaFilterStr || void 0;
21291
+ const applyPagePostFilters = (hits) => {
21292
+ let filtered = hits;
21293
+ if (pathPrefix) {
21294
+ filtered = filtered.filter((h) => h.url.startsWith(pathPrefix));
21295
+ }
21296
+ if (filterTags) {
21297
+ filtered = filtered.filter(
21298
+ (h) => filterTags.every((tag) => h.tags.includes(tag))
21299
+ );
20046
21300
  }
20047
- }
20048
- const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
20049
- const useDualSearch = this.config.search.dualSearch && groupByPage;
21301
+ return filtered;
21302
+ };
21303
+ const applyChunkPostFilters = (hits) => {
21304
+ let filtered = hits;
21305
+ if (filterTags) {
21306
+ filtered = filtered.filter(
21307
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
21308
+ );
21309
+ }
21310
+ return filtered;
21311
+ };
20050
21312
  const searchStart = process.hrtime.bigint();
20051
- let ranked;
20052
- if (useDualSearch) {
20053
- const chunkLimit = Math.max(topK * 10, 100);
20054
- const pageLimit = 20;
20055
- const [pageHits, chunkHits] = await Promise.all([
20056
- this.store.searchPages(
20057
- input.q,
20058
- {
20059
- limit: pageLimit,
20060
- semanticWeight: this.config.search.semanticWeight,
20061
- inputEnrichment: this.config.search.inputEnrichment,
20062
- filter
20063
- },
20064
- resolvedScope
20065
- ),
20066
- this.store.search(
20067
- input.q,
20068
- {
20069
- limit: chunkLimit,
20070
- semanticWeight: this.config.search.semanticWeight,
20071
- inputEnrichment: this.config.search.inputEnrichment,
20072
- reranking: false,
20073
- filter
20074
- },
21313
+ if (groupByPage) {
21314
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
21315
+ const pageLimit = Math.max(topK * 2, 20);
21316
+ const pageHits = await this.store.searchPagesByText(
21317
+ queryText,
21318
+ { limit: pageLimit * fetchMultiplier, filter: metaFilter },
21319
+ resolvedScope
21320
+ );
21321
+ const filteredPages = applyPagePostFilters(pageHits);
21322
+ let rankedPages = rankPageHits(filteredPages, effectiveConfig, input.q, input.debug);
21323
+ rankedPages = trimPagesByScoreGap(rankedPages, effectiveConfig);
21324
+ const topPages = rankedPages.slice(0, topK);
21325
+ const chunkPromises = topPages.map(
21326
+ (page) => this.store.searchChunksByUrl(
21327
+ queryText,
21328
+ page.url,
21329
+ { limit: maxSubResults, filter: metaFilter },
20075
21330
  resolvedScope
20076
- )
20077
- ]);
20078
- const rankedChunks = rankHits(chunkHits, this.config, input.q);
20079
- ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
21331
+ ).then((chunks) => applyChunkPostFilters(chunks))
21332
+ );
21333
+ const allChunks = await Promise.all(chunkPromises);
21334
+ const searchMs = hrTimeMs(searchStart);
21335
+ const results = this.buildPageFirstResults(topPages, allChunks, input.q, input.debug, maxSubResults);
21336
+ return {
21337
+ q: input.q,
21338
+ scope: resolvedScope.scopeName,
21339
+ results,
21340
+ meta: {
21341
+ timingsMs: {
21342
+ search: Math.round(searchMs),
21343
+ total: Math.round(hrTimeMs(totalStart))
21344
+ }
21345
+ }
21346
+ };
20080
21347
  } else {
21348
+ const candidateK = Math.max(50, topK);
21349
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
20081
21350
  const hits = await this.store.search(
20082
- input.q,
20083
- {
20084
- limit: candidateK,
20085
- semanticWeight: this.config.search.semanticWeight,
20086
- inputEnrichment: this.config.search.inputEnrichment,
20087
- reranking: this.config.search.reranking,
20088
- filter
20089
- },
21351
+ queryText,
21352
+ { limit: candidateK * fetchMultiplier, filter: metaFilter },
20090
21353
  resolvedScope
20091
21354
  );
20092
- ranked = rankHits(hits, this.config, input.q);
20093
- }
20094
- const searchMs = hrTimeMs(searchStart);
20095
- const results = this.buildResults(ranked, topK, groupByPage, input.q);
20096
- return {
20097
- q: input.q,
20098
- scope: resolvedScope.scopeName,
20099
- results,
20100
- meta: {
20101
- timingsMs: {
20102
- search: Math.round(searchMs),
20103
- total: Math.round(hrTimeMs(totalStart))
21355
+ let filtered = hits;
21356
+ if (pathPrefix) {
21357
+ filtered = filtered.filter((h) => h.metadata.url.startsWith(pathPrefix));
21358
+ }
21359
+ if (filterTags) {
21360
+ filtered = filtered.filter(
21361
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
21362
+ );
21363
+ }
21364
+ const ranked = rankHits(filtered, effectiveConfig, input.q, input.debug);
21365
+ const searchMs = hrTimeMs(searchStart);
21366
+ const results = this.buildResults(ranked, topK, false, maxSubResults, input.q, input.debug, effectiveConfig);
21367
+ return {
21368
+ q: input.q,
21369
+ scope: resolvedScope.scopeName,
21370
+ results,
21371
+ meta: {
21372
+ timingsMs: {
21373
+ search: Math.round(searchMs),
21374
+ total: Math.round(hrTimeMs(totalStart))
21375
+ }
20104
21376
  }
21377
+ };
21378
+ }
21379
+ }
21380
+ buildPageFirstResults(rankedPages, allChunks, query, debug, maxSubResults = 5) {
21381
+ return rankedPages.map((page, i) => {
21382
+ const chunks = allChunks[i] ?? [];
21383
+ const bestChunk = chunks[0];
21384
+ const snippet = bestChunk ? query ? queryAwareExcerpt(bestChunk.metadata.chunkText, query) : toSnippet(bestChunk.metadata.chunkText) : page.description || page.title;
21385
+ const result = {
21386
+ url: page.url,
21387
+ title: page.title,
21388
+ sectionTitle: bestChunk?.metadata.sectionTitle || void 0,
21389
+ snippet,
21390
+ chunkText: bestChunk?.metadata.chunkText || void 0,
21391
+ score: Number(page.finalScore.toFixed(6)),
21392
+ routeFile: page.routeFile,
21393
+ chunks: chunks.length > 0 ? chunks.slice(0, maxSubResults).map((c) => ({
21394
+ sectionTitle: c.metadata.sectionTitle || void 0,
21395
+ snippet: query ? queryAwareExcerpt(c.metadata.chunkText, query) : toSnippet(c.metadata.chunkText),
21396
+ chunkText: c.metadata.chunkText || void 0,
21397
+ headingPath: c.metadata.headingPath,
21398
+ score: Number(c.score.toFixed(6))
21399
+ })) : void 0
21400
+ };
21401
+ if (debug && page.breakdown) {
21402
+ result.breakdown = {
21403
+ baseScore: page.breakdown.baseScore,
21404
+ incomingLinkBoost: page.breakdown.incomingLinkBoost,
21405
+ depthBoost: page.breakdown.depthBoost,
21406
+ titleMatchBoost: page.breakdown.titleMatchBoost,
21407
+ freshnessBoost: page.breakdown.freshnessBoost,
21408
+ anchorTextMatchBoost: 0
21409
+ };
20105
21410
  }
20106
- };
21411
+ return result;
21412
+ });
20107
21413
  }
20108
- ensureSnippet(hit) {
21414
+ ensureSnippet(hit, query) {
21415
+ const chunkText = hit.hit.metadata.chunkText;
21416
+ if (query && chunkText) return queryAwareExcerpt(chunkText, query);
20109
21417
  const snippet = hit.hit.metadata.snippet;
20110
21418
  if (snippet && snippet.length >= 30) return snippet;
20111
- const chunkText = hit.hit.metadata.chunkText;
20112
21419
  if (chunkText) return toSnippet(chunkText);
20113
21420
  return snippet || "";
20114
21421
  }
20115
- buildResults(ordered, topK, groupByPage, _query) {
21422
+ buildResults(ordered, topK, groupByPage, maxSubResults, query, debug, config) {
21423
+ const cfg = config ?? this.config;
20116
21424
  if (groupByPage) {
20117
- let pages = aggregateByPage(ordered, this.config);
20118
- pages = trimByScoreGap(pages, this.config);
20119
- const minRatio = this.config.ranking.minChunkScoreRatio;
21425
+ let pages = aggregateByPage(ordered, cfg);
21426
+ pages = trimByScoreGap(pages, cfg);
21427
+ const minRatio = cfg.ranking.minChunkScoreRatio;
20120
21428
  return pages.slice(0, topK).map((page) => {
20121
21429
  const bestScore = page.bestChunk.finalScore;
20122
21430
  const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
20123
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
20124
- return {
21431
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, maxSubResults);
21432
+ const result = {
20125
21433
  url: page.url,
20126
21434
  title: page.title,
20127
21435
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
20128
- snippet: this.ensureSnippet(page.bestChunk),
21436
+ snippet: this.ensureSnippet(page.bestChunk, query),
21437
+ chunkText: page.bestChunk.hit.metadata.chunkText || void 0,
20129
21438
  score: Number(page.pageScore.toFixed(6)),
20130
21439
  routeFile: page.routeFile,
20131
- chunks: meaningful.length > 1 ? meaningful.map((c) => ({
21440
+ chunks: meaningful.length >= 1 ? meaningful.map((c) => ({
20132
21441
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
20133
- snippet: this.ensureSnippet(c),
21442
+ snippet: this.ensureSnippet(c, query),
21443
+ chunkText: c.hit.metadata.chunkText || void 0,
20134
21444
  headingPath: c.hit.metadata.headingPath,
20135
21445
  score: Number(c.finalScore.toFixed(6))
20136
21446
  })) : void 0
20137
21447
  };
21448
+ if (debug && page.bestChunk.breakdown) {
21449
+ result.breakdown = page.bestChunk.breakdown;
21450
+ }
21451
+ return result;
20138
21452
  });
20139
21453
  } else {
20140
21454
  let filtered = ordered;
20141
- const minScore = this.config.ranking.minScore;
20142
- if (minScore > 0) {
20143
- filtered = ordered.filter((entry) => entry.finalScore >= minScore);
20144
- }
20145
- return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
20146
- url: hit.metadata.url,
20147
- title: hit.metadata.title,
20148
- sectionTitle: hit.metadata.sectionTitle || void 0,
20149
- snippet: this.ensureSnippet({ hit, finalScore }),
20150
- score: Number(finalScore.toFixed(6)),
20151
- routeFile: hit.metadata.routeFile
20152
- }));
21455
+ const minScoreRatio = cfg.ranking.minScoreRatio;
21456
+ if (minScoreRatio > 0 && ordered.length > 0) {
21457
+ const topScore = ordered[0].finalScore;
21458
+ if (Number.isFinite(topScore) && topScore > 0) {
21459
+ const threshold = topScore * minScoreRatio;
21460
+ filtered = ordered.filter((entry) => entry.finalScore >= threshold);
21461
+ }
21462
+ }
21463
+ return filtered.slice(0, topK).map(({ hit, finalScore, breakdown }) => {
21464
+ const result = {
21465
+ url: hit.metadata.url,
21466
+ title: hit.metadata.title,
21467
+ sectionTitle: hit.metadata.sectionTitle || void 0,
21468
+ snippet: this.ensureSnippet({ hit, finalScore }, query),
21469
+ chunkText: hit.metadata.chunkText || void 0,
21470
+ score: Number(finalScore.toFixed(6)),
21471
+ routeFile: hit.metadata.routeFile
21472
+ };
21473
+ if (debug && breakdown) {
21474
+ result.breakdown = breakdown;
21475
+ }
21476
+ return result;
21477
+ });
20153
21478
  }
20154
21479
  }
20155
21480
  async getPage(pathOrUrl, scope) {
@@ -20175,6 +21500,116 @@ var SearchEngine = class _SearchEngine {
20175
21500
  markdown: page.markdown
20176
21501
  };
20177
21502
  }
21503
+ async listPages(opts) {
21504
+ const resolvedScope = resolveScope(this.config, opts?.scope);
21505
+ const pathPrefix = opts?.pathPrefix ? opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}` : void 0;
21506
+ return this.store.listPages(resolvedScope, {
21507
+ cursor: opts?.cursor,
21508
+ limit: opts?.limit,
21509
+ pathPrefix
21510
+ });
21511
+ }
21512
+ async getSiteStructure(opts) {
21513
+ const maxPages = Math.min(opts?.maxPages ?? MAX_SITE_STRUCTURE_PAGES, MAX_SITE_STRUCTURE_PAGES);
21514
+ const allPages = [];
21515
+ let cursor;
21516
+ let truncated = false;
21517
+ do {
21518
+ const result = await this.listPages({
21519
+ pathPrefix: opts?.pathPrefix,
21520
+ scope: opts?.scope,
21521
+ cursor,
21522
+ limit: 200
21523
+ });
21524
+ allPages.push(...result.pages);
21525
+ cursor = result.nextCursor;
21526
+ if (allPages.length >= maxPages) {
21527
+ truncated = allPages.length > maxPages || !!cursor;
21528
+ allPages.length = maxPages;
21529
+ break;
21530
+ }
21531
+ } while (cursor);
21532
+ const root2 = buildTree(allPages, opts?.pathPrefix);
21533
+ return {
21534
+ root: root2,
21535
+ totalPages: allPages.length,
21536
+ truncated
21537
+ };
21538
+ }
21539
+ async getRelatedPages(pathOrUrl, opts) {
21540
+ const resolvedScope = resolveScope(this.config, opts?.scope);
21541
+ const urlPath = this.resolveInputPath(pathOrUrl);
21542
+ const topK = Math.min(opts?.topK ?? 10, 25);
21543
+ const source = await this.store.fetchPageWithVector(urlPath, resolvedScope);
21544
+ if (!source) {
21545
+ throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
21546
+ }
21547
+ const sourceOutgoing = new Set(source.metadata.outgoingLinkUrls ?? []);
21548
+ const semanticHits = await this.store.searchPagesByVector(
21549
+ source.vector,
21550
+ { limit: 50 },
21551
+ resolvedScope
21552
+ );
21553
+ const filteredHits = semanticHits.filter((h) => h.url !== urlPath);
21554
+ const semanticScoreMap = /* @__PURE__ */ new Map();
21555
+ for (const hit of filteredHits) {
21556
+ semanticScoreMap.set(hit.url, hit.score);
21557
+ }
21558
+ const candidateUrls = /* @__PURE__ */ new Set();
21559
+ for (const hit of filteredHits) {
21560
+ candidateUrls.add(hit.url);
21561
+ }
21562
+ for (const url of sourceOutgoing) {
21563
+ if (url !== urlPath) candidateUrls.add(url);
21564
+ }
21565
+ const missingUrls = [...sourceOutgoing].filter(
21566
+ (u) => u !== urlPath && !semanticScoreMap.has(u)
21567
+ );
21568
+ const fetchedPages = missingUrls.length > 0 ? await this.store.fetchPagesBatch(missingUrls, resolvedScope) : [];
21569
+ const metaMap = /* @__PURE__ */ new Map();
21570
+ for (const hit of filteredHits) {
21571
+ metaMap.set(hit.url, { title: hit.title, routeFile: hit.routeFile, outgoingLinkUrls: [] });
21572
+ }
21573
+ for (const p of fetchedPages) {
21574
+ metaMap.set(p.url, { title: p.title, routeFile: p.routeFile, outgoingLinkUrls: p.outgoingLinkUrls });
21575
+ }
21576
+ const semanticUrls = filteredHits.map((h) => h.url);
21577
+ if (semanticUrls.length > 0) {
21578
+ const semanticPageData = await this.store.fetchPagesBatch(semanticUrls, resolvedScope);
21579
+ for (const p of semanticPageData) {
21580
+ const existing = metaMap.get(p.url);
21581
+ if (existing) {
21582
+ existing.outgoingLinkUrls = p.outgoingLinkUrls;
21583
+ }
21584
+ }
21585
+ }
21586
+ const candidates = [];
21587
+ for (const url of candidateUrls) {
21588
+ const meta = metaMap.get(url);
21589
+ if (!meta) continue;
21590
+ const isOutgoing = sourceOutgoing.has(url);
21591
+ const isIncoming = meta.outgoingLinkUrls.includes(urlPath);
21592
+ const isLinked = isOutgoing || isIncoming;
21593
+ const dice = diceScore(urlPath, url);
21594
+ const semantic = semanticScoreMap.get(url) ?? 0;
21595
+ const score = compositeScore(isLinked, dice, semantic);
21596
+ const relationshipType = dominantRelationshipType(isOutgoing, isIncoming, dice);
21597
+ candidates.push({
21598
+ url,
21599
+ title: meta.title,
21600
+ score: Number(score.toFixed(6)),
21601
+ relationshipType,
21602
+ routeFile: meta.routeFile
21603
+ });
21604
+ }
21605
+ candidates.sort((a, b) => b.score - a.score);
21606
+ const results = candidates.slice(0, topK);
21607
+ return {
21608
+ sourceUrl: urlPath,
21609
+ scope: resolvedScope.scopeName,
21610
+ relatedPages: results
21611
+ };
21612
+ }
20178
21613
  async health() {
20179
21614
  return this.store.health();
20180
21615
  }
@@ -20197,14 +21632,40 @@ function createServer(engine) {
20197
21632
  server.registerTool(
20198
21633
  "search",
20199
21634
  {
20200
- description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
21635
+ description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
20201
21636
  inputSchema: {
20202
21637
  query: zod.z.string().min(1),
20203
21638
  scope: zod.z.string().optional(),
20204
21639
  topK: zod.z.number().int().positive().max(100).optional(),
20205
21640
  pathPrefix: zod.z.string().optional(),
20206
21641
  tags: zod.z.array(zod.z.string()).optional(),
20207
- groupBy: zod.z.enum(["page", "chunk"]).optional()
21642
+ filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional(),
21643
+ groupBy: zod.z.enum(["page", "chunk"]).optional(),
21644
+ maxSubResults: zod.z.number().int().positive().max(20).optional()
21645
+ },
21646
+ outputSchema: {
21647
+ q: zod.z.string(),
21648
+ scope: zod.z.string(),
21649
+ results: zod.z.array(zod.z.object({
21650
+ url: zod.z.string(),
21651
+ title: zod.z.string(),
21652
+ sectionTitle: zod.z.string().optional(),
21653
+ snippet: zod.z.string(),
21654
+ score: zod.z.number(),
21655
+ routeFile: zod.z.string(),
21656
+ chunks: zod.z.array(zod.z.object({
21657
+ sectionTitle: zod.z.string().optional(),
21658
+ snippet: zod.z.string(),
21659
+ headingPath: zod.z.array(zod.z.string()),
21660
+ score: zod.z.number()
21661
+ })).optional()
21662
+ })),
21663
+ meta: zod.z.object({
21664
+ timingsMs: zod.z.object({
21665
+ search: zod.z.number(),
21666
+ total: zod.z.number()
21667
+ })
21668
+ })
20208
21669
  }
20209
21670
  },
20210
21671
  async (input) => {
@@ -20214,7 +21675,9 @@ function createServer(engine) {
20214
21675
  scope: input.scope,
20215
21676
  pathPrefix: input.pathPrefix,
20216
21677
  tags: input.tags,
20217
- groupBy: input.groupBy
21678
+ filters: input.filters,
21679
+ groupBy: input.groupBy,
21680
+ maxSubResults: input.maxSubResults
20218
21681
  });
20219
21682
  return {
20220
21683
  content: [
@@ -20222,7 +21685,8 @@ function createServer(engine) {
20222
21685
  type: "text",
20223
21686
  text: JSON.stringify(result, null, 2)
20224
21687
  }
20225
- ]
21688
+ ],
21689
+ structuredContent: result
20226
21690
  };
20227
21691
  }
20228
21692
  );
@@ -20247,8 +21711,134 @@ function createServer(engine) {
20247
21711
  };
20248
21712
  }
20249
21713
  );
21714
+ server.registerTool(
21715
+ "list_pages",
21716
+ {
21717
+ description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
21718
+ inputSchema: {
21719
+ pathPrefix: zod.z.string().optional(),
21720
+ cursor: zod.z.string().optional(),
21721
+ limit: zod.z.number().int().positive().max(200).optional(),
21722
+ scope: zod.z.string().optional()
21723
+ }
21724
+ },
21725
+ async (input) => {
21726
+ const result = await engine.listPages({
21727
+ pathPrefix: input.pathPrefix,
21728
+ cursor: input.cursor,
21729
+ limit: input.limit,
21730
+ scope: input.scope
21731
+ });
21732
+ return {
21733
+ content: [
21734
+ {
21735
+ type: "text",
21736
+ text: JSON.stringify(result, null, 2)
21737
+ }
21738
+ ]
21739
+ };
21740
+ }
21741
+ );
21742
+ server.registerTool(
21743
+ "get_site_structure",
21744
+ {
21745
+ description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
21746
+ inputSchema: {
21747
+ pathPrefix: zod.z.string().optional(),
21748
+ scope: zod.z.string().optional(),
21749
+ maxPages: zod.z.number().int().positive().max(2e3).optional()
21750
+ }
21751
+ },
21752
+ async (input) => {
21753
+ const result = await engine.getSiteStructure({
21754
+ pathPrefix: input.pathPrefix,
21755
+ scope: input.scope,
21756
+ maxPages: input.maxPages
21757
+ });
21758
+ return {
21759
+ content: [
21760
+ {
21761
+ type: "text",
21762
+ text: JSON.stringify(result, null, 2)
21763
+ }
21764
+ ]
21765
+ };
21766
+ }
21767
+ );
21768
+ server.registerTool(
21769
+ "find_source_file",
21770
+ {
21771
+ description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
21772
+ inputSchema: {
21773
+ query: zod.z.string().min(1),
21774
+ scope: zod.z.string().optional()
21775
+ }
21776
+ },
21777
+ async (input) => {
21778
+ const result = await engine.search({
21779
+ q: input.query,
21780
+ topK: 1,
21781
+ scope: input.scope
21782
+ });
21783
+ if (result.results.length === 0) {
21784
+ return {
21785
+ content: [
21786
+ {
21787
+ type: "text",
21788
+ text: JSON.stringify({
21789
+ error: "No matching content found for the given query."
21790
+ })
21791
+ }
21792
+ ]
21793
+ };
21794
+ }
21795
+ const match = result.results[0];
21796
+ const { url, routeFile, sectionTitle, snippet } = match;
21797
+ return {
21798
+ content: [
21799
+ {
21800
+ type: "text",
21801
+ text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
21802
+ }
21803
+ ]
21804
+ };
21805
+ }
21806
+ );
21807
+ server.registerTool(
21808
+ "get_related_pages",
21809
+ {
21810
+ description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
21811
+ inputSchema: {
21812
+ pathOrUrl: zod.z.string().min(1),
21813
+ scope: zod.z.string().optional(),
21814
+ topK: zod.z.number().int().positive().max(25).optional()
21815
+ }
21816
+ },
21817
+ async (input) => {
21818
+ const result = await engine.getRelatedPages(input.pathOrUrl, {
21819
+ topK: input.topK,
21820
+ scope: input.scope
21821
+ });
21822
+ return {
21823
+ content: [
21824
+ {
21825
+ type: "text",
21826
+ text: JSON.stringify(result, null, 2)
21827
+ }
21828
+ ]
21829
+ };
21830
+ }
21831
+ );
20250
21832
  return server;
20251
21833
  }
21834
+ function resolveApiKey(config) {
21835
+ return config.mcp.http.apiKey ?? (config.mcp.http.apiKeyEnv ? process.env[config.mcp.http.apiKeyEnv] : void 0);
21836
+ }
21837
+ function verifyApiKey(provided, expected) {
21838
+ const a = crypto.createHash("sha256").update(provided).digest();
21839
+ const b = crypto.createHash("sha256").update(expected).digest();
21840
+ return crypto.timingSafeEqual(a, b);
21841
+ }
20252
21842
  function redirectConsoleToStderr() {
20253
21843
  console.log = (...args) => {
20254
21844
  process.stderr.write(`[LOG] ${args.map(String).join(" ")}
@@ -20263,7 +21853,22 @@ async function startHttpServer(serverFactory, config, opts) {
20263
21853
  const app = express_js.createMcpExpressApp();
20264
21854
  const port = opts.httpPort ?? config.mcp.http.port;
20265
21855
  const endpointPath = opts.httpPath ?? config.mcp.http.path;
21856
+ const isPublic = config.mcp.access === "public";
21857
+ const host = isPublic ? "0.0.0.0" : "127.0.0.1";
21858
+ const apiKey = isPublic ? resolveApiKey(config) : void 0;
20266
21859
  app.post(endpointPath, async (req, res) => {
21860
+ if (isPublic && apiKey) {
21861
+ const authHeader = req.headers["authorization"];
21862
+ const provided = (authHeader?.startsWith("Bearer ") ? authHeader.slice(7) : void 0) ?? req.headers["x-api-key"] ?? "";
21863
+ if (!provided || !verifyApiKey(provided, apiKey)) {
21864
+ res.status(401).json({
21865
+ jsonrpc: "2.0",
21866
+ error: { code: -32001, message: "Unauthorized" },
21867
+ id: null
21868
+ });
21869
+ return;
21870
+ }
21871
+ }
20267
21872
  const server = serverFactory();
20268
21873
  const transport = new streamableHttp_js.StreamableHTTPServerTransport({
20269
21874
  sessionIdGenerator: void 0
@@ -20313,9 +21918,12 @@ async function startHttpServer(serverFactory, config, opts) {
20313
21918
  );
20314
21919
  });
20315
21920
  await new Promise((resolve, reject) => {
20316
- const instance = app.listen(port, "127.0.0.1", () => {
20317
- process.stderr.write(`SearchSocket MCP HTTP server listening on http://127.0.0.1:${port}${endpointPath}
21921
+ const instance = app.listen(port, host, () => {
21922
+ process.stderr.write(`SearchSocket MCP HTTP server listening on http://${host}:${port}${endpointPath}
20318
21923
  `);
21924
+ if (isPublic) {
21925
+ process.stderr.write("WARNING: Server is in public mode. Ensure HTTPS is configured via a reverse proxy for production use.\n");
21926
+ }
20319
21927
  resolve();
20320
21928
  });
20321
21929
  instance.once("error", reject);
@@ -20330,6 +21938,13 @@ async function runMcpServer(options = {}) {
20330
21938
  cwd: options.cwd,
20331
21939
  configPath: options.configPath
20332
21940
  });
21941
+ if (options.access) config.mcp.access = options.access;
21942
+ if (options.apiKey) config.mcp.http.apiKey = options.apiKey;
21943
+ if (config.mcp.access === "public" && !resolveApiKey(config)) {
21944
+ throw new Error(
21945
+ 'MCP access is "public" but no API key is configured. Pass --api-key or set mcp.http.apiKey / mcp.http.apiKeyEnv in config.'
21946
+ );
21947
+ }
20333
21948
  const resolvedTransport = options.transport ?? config.mcp.transport;
20334
21949
  if (resolvedTransport === "stdio") {
20335
21950
  redirectConsoleToStderr();
@@ -20347,8 +21962,6 @@ async function runMcpServer(options = {}) {
20347
21962
  const stdioTransport = new stdio_js.StdioServerTransport();
20348
21963
  await server.connect(stdioTransport);
20349
21964
  }
20350
-
20351
- // src/sveltekit/handle.ts
20352
21965
  var InMemoryRateLimiter = class {
20353
21966
  constructor(windowMs, max) {
20354
21967
  this.windowMs = windowMs;
@@ -20376,7 +21989,13 @@ function searchsocketHandle(options = {}) {
20376
21989
  let enginePromise = null;
20377
21990
  let configPromise = null;
20378
21991
  let apiPath = options.path;
21992
+ let llmsServePath = null;
21993
+ let serveMarkdownVariants = false;
21994
+ let mcpPath;
21995
+ let mcpApiKey;
21996
+ let mcpEnableJsonResponse = true;
20379
21997
  let rateLimiter = null;
21998
+ let notConfigured = false;
20380
21999
  const getConfig = async () => {
20381
22000
  if (!configPromise) {
20382
22001
  let configP;
@@ -20393,6 +22012,13 @@ function searchsocketHandle(options = {}) {
20393
22012
  }
20394
22013
  configPromise = configP.then((config) => {
20395
22014
  apiPath = apiPath ?? config.api.path;
22015
+ mcpPath = config.mcp.handle.path;
22016
+ mcpApiKey = config.mcp.handle.apiKey;
22017
+ mcpEnableJsonResponse = config.mcp.handle.enableJsonResponse;
22018
+ if (config.llmsTxt.enable) {
22019
+ llmsServePath = "/" + config.llmsTxt.outputPath.replace(/^static\//, "");
22020
+ serveMarkdownVariants = config.llmsTxt.serveMarkdownVariants;
22021
+ }
20396
22022
  if (config.api.rateLimit && !isServerless()) {
20397
22023
  rateLimiter = new InMemoryRateLimiter(config.api.rateLimit.windowMs, config.api.rateLimit.max);
20398
22024
  }
@@ -20402,59 +22028,109 @@ function searchsocketHandle(options = {}) {
20402
22028
  return configPromise;
20403
22029
  };
20404
22030
  const getEngine = async () => {
22031
+ if (notConfigured) {
22032
+ throw new SearchSocketError(
22033
+ "SEARCH_NOT_CONFIGURED",
22034
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
22035
+ 503
22036
+ );
22037
+ }
20405
22038
  if (!enginePromise) {
20406
22039
  const config = await getConfig();
20407
22040
  enginePromise = SearchEngine.create({
20408
22041
  cwd: options.cwd,
20409
22042
  config
22043
+ }).catch((error) => {
22044
+ enginePromise = null;
22045
+ if (error instanceof SearchSocketError && error.code === "VECTOR_BACKEND_UNAVAILABLE") {
22046
+ notConfigured = true;
22047
+ throw new SearchSocketError(
22048
+ "SEARCH_NOT_CONFIGURED",
22049
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
22050
+ 503
22051
+ );
22052
+ }
22053
+ throw error;
20410
22054
  });
20411
22055
  }
20412
22056
  return enginePromise;
20413
22057
  };
20414
22058
  const bodyLimit = options.maxBodyBytes ?? 64 * 1024;
20415
22059
  return async ({ event, resolve }) => {
20416
- if (apiPath && event.url.pathname !== apiPath) {
20417
- return resolve(event);
22060
+ if (apiPath && !isApiPath(event.url.pathname, apiPath) && event.url.pathname !== llmsServePath) {
22061
+ const isMarkdownVariant = event.request.method === "GET" && event.url.pathname.endsWith(".md");
22062
+ if (mcpPath && event.url.pathname === mcpPath) {
22063
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22064
+ }
22065
+ if (mcpPath) {
22066
+ if (serveMarkdownVariants && isMarkdownVariant) ; else {
22067
+ return resolve(event);
22068
+ }
22069
+ } else {
22070
+ if (configPromise || options.config || options.rawConfig) {
22071
+ await getConfig();
22072
+ if (mcpPath && event.url.pathname === mcpPath) {
22073
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22074
+ }
22075
+ if (!(serveMarkdownVariants && isMarkdownVariant)) {
22076
+ return resolve(event);
22077
+ }
22078
+ } else {
22079
+ return resolve(event);
22080
+ }
22081
+ }
20418
22082
  }
20419
22083
  const config = await getConfig();
22084
+ if (llmsServePath && event.request.method === "GET" && event.url.pathname === llmsServePath) {
22085
+ const cwd = options.cwd ?? process.cwd();
22086
+ const filePath = path__default.default.resolve(cwd, config.llmsTxt.outputPath);
22087
+ try {
22088
+ const content = await fs8__default.default.readFile(filePath, "utf8");
22089
+ return new Response(content, {
22090
+ status: 200,
22091
+ headers: { "content-type": "text/plain; charset=utf-8" }
22092
+ });
22093
+ } catch {
22094
+ return resolve(event);
22095
+ }
22096
+ }
22097
+ if (serveMarkdownVariants && event.request.method === "GET" && event.url.pathname.endsWith(".md")) {
22098
+ let rawPath;
22099
+ try {
22100
+ rawPath = decodeURIComponent(event.url.pathname.slice(0, -3));
22101
+ } catch {
22102
+ return resolve(event);
22103
+ }
22104
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
22105
+ try {
22106
+ const engine = await getEngine();
22107
+ const page = await engine.getPage(rawPath, scope);
22108
+ return new Response(page.markdown, {
22109
+ status: 200,
22110
+ headers: { "content-type": "text/markdown; charset=utf-8" }
22111
+ });
22112
+ } catch (error) {
22113
+ if (error instanceof SearchSocketError && error.status === 404) {
22114
+ return resolve(event);
22115
+ }
22116
+ throw error;
22117
+ }
22118
+ }
22119
+ if (mcpPath && event.url.pathname === mcpPath) {
22120
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22121
+ }
20420
22122
  const targetPath = apiPath ?? config.api.path;
20421
- if (event.url.pathname !== targetPath) {
22123
+ if (!isApiPath(event.url.pathname, targetPath)) {
20422
22124
  return resolve(event);
20423
22125
  }
20424
- if (event.request.method === "OPTIONS") {
22126
+ const subPath = event.url.pathname.slice(targetPath.length);
22127
+ const method = event.request.method;
22128
+ if (method === "OPTIONS") {
20425
22129
  return new Response(null, {
20426
22130
  status: 204,
20427
22131
  headers: buildCorsHeaders(event.request, config)
20428
22132
  });
20429
22133
  }
20430
- if (event.request.method !== "POST") {
20431
- return withCors(
20432
- new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
20433
- status: 405,
20434
- headers: {
20435
- "content-type": "application/json"
20436
- }
20437
- }),
20438
- event.request,
20439
- config
20440
- );
20441
- }
20442
- const contentLength = Number(event.request.headers.get("content-length") ?? 0);
20443
- if (contentLength > bodyLimit) {
20444
- return withCors(
20445
- new Response(
20446
- JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Request body too large", 413))),
20447
- {
20448
- status: 413,
20449
- headers: {
20450
- "content-type": "application/json"
20451
- }
20452
- }
20453
- ),
20454
- event.request,
20455
- config
20456
- );
20457
- }
20458
22134
  if (rateLimiter) {
20459
22135
  const ip = event.getClientAddress?.() ?? event.request.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? "unknown";
20460
22136
  if (!rateLimiter.check(ip)) {
@@ -20474,39 +22150,32 @@ function searchsocketHandle(options = {}) {
20474
22150
  }
20475
22151
  }
20476
22152
  try {
20477
- let rawBody;
20478
- if (typeof event.request.text === "function") {
20479
- rawBody = await event.request.text();
20480
- } else {
20481
- let parsedFallback;
20482
- try {
20483
- parsedFallback = await event.request.json();
20484
- } catch (error) {
20485
- if (error instanceof SyntaxError) {
20486
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
20487
- }
20488
- throw error;
22153
+ if (method === "GET") {
22154
+ if (subPath === "" || subPath === "/") {
22155
+ return await handleGetSearch(event, config, getEngine);
20489
22156
  }
20490
- rawBody = JSON.stringify(parsedFallback);
20491
- }
20492
- if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
20493
- throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22157
+ if (subPath === "/health") {
22158
+ return await handleGetHealth(event, config, getEngine);
22159
+ }
22160
+ if (subPath.startsWith("/pages/")) {
22161
+ return await handleGetPage(event, config, getEngine, subPath);
22162
+ }
22163
+ return withCors(
22164
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Not found", 404))), {
22165
+ status: 404,
22166
+ headers: { "content-type": "application/json" }
22167
+ }),
22168
+ event.request,
22169
+ config
22170
+ );
20494
22171
  }
20495
- let body;
20496
- try {
20497
- body = JSON.parse(rawBody);
20498
- } catch {
20499
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22172
+ if (method === "POST" && (subPath === "" || subPath === "/")) {
22173
+ return await handlePostSearch(event, config, getEngine, bodyLimit);
20500
22174
  }
20501
- const engine = await getEngine();
20502
- const searchRequest = body;
20503
- const result = await engine.search(searchRequest);
20504
22175
  return withCors(
20505
- new Response(JSON.stringify(result), {
20506
- status: 200,
20507
- headers: {
20508
- "content-type": "application/json"
20509
- }
22176
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
22177
+ status: 405,
22178
+ headers: { "content-type": "application/json" }
20510
22179
  }),
20511
22180
  event.request,
20512
22181
  config
@@ -20527,6 +22196,183 @@ function searchsocketHandle(options = {}) {
20527
22196
  }
20528
22197
  };
20529
22198
  }
22199
+ function isApiPath(pathname, apiPath) {
22200
+ return pathname === apiPath || pathname.startsWith(apiPath + "/");
22201
+ }
22202
+ async function handleGetSearch(event, config, getEngine) {
22203
+ const params = event.url.searchParams;
22204
+ const q = params.get("q");
22205
+ if (!q || q.trim() === "") {
22206
+ throw new SearchSocketError("INVALID_REQUEST", "Missing required query parameter: q", 400);
22207
+ }
22208
+ const searchRequest = { q };
22209
+ const topK = params.get("topK");
22210
+ if (topK !== null) {
22211
+ const parsed = Number.parseInt(topK, 10);
22212
+ if (Number.isNaN(parsed) || parsed < 1) {
22213
+ throw new SearchSocketError("INVALID_REQUEST", "topK must be a positive integer", 400);
22214
+ }
22215
+ searchRequest.topK = parsed;
22216
+ }
22217
+ const scope = params.get("scope");
22218
+ if (scope !== null) searchRequest.scope = scope;
22219
+ const pathPrefix = params.get("pathPrefix");
22220
+ if (pathPrefix !== null) searchRequest.pathPrefix = pathPrefix;
22221
+ const groupBy = params.get("groupBy");
22222
+ if (groupBy) {
22223
+ if (groupBy !== "page" && groupBy !== "chunk") {
22224
+ throw new SearchSocketError("INVALID_REQUEST", 'groupBy must be "page" or "chunk"', 400);
22225
+ }
22226
+ searchRequest.groupBy = groupBy;
22227
+ }
22228
+ const maxSubResults = params.get("maxSubResults");
22229
+ if (maxSubResults !== null) {
22230
+ const parsed = Number.parseInt(maxSubResults, 10);
22231
+ if (Number.isNaN(parsed) || parsed < 1 || parsed > 20) {
22232
+ throw new SearchSocketError("INVALID_REQUEST", "maxSubResults must be a positive integer between 1 and 20", 400);
22233
+ }
22234
+ searchRequest.maxSubResults = parsed;
22235
+ }
22236
+ const tags = params.getAll("tags");
22237
+ if (tags.length > 0) searchRequest.tags = tags;
22238
+ const engine = await getEngine();
22239
+ const result = await engine.search(searchRequest);
22240
+ return withCors(
22241
+ new Response(JSON.stringify(result), {
22242
+ status: 200,
22243
+ headers: { "content-type": "application/json" }
22244
+ }),
22245
+ event.request,
22246
+ config
22247
+ );
22248
+ }
22249
+ async function handleGetHealth(event, config, getEngine) {
22250
+ const engine = await getEngine();
22251
+ const result = await engine.health();
22252
+ return withCors(
22253
+ new Response(JSON.stringify(result), {
22254
+ status: 200,
22255
+ headers: { "content-type": "application/json" }
22256
+ }),
22257
+ event.request,
22258
+ config
22259
+ );
22260
+ }
22261
+ async function handleGetPage(event, config, getEngine, subPath) {
22262
+ const rawPath = subPath.slice("/pages".length);
22263
+ let pagePath;
22264
+ try {
22265
+ pagePath = decodeURIComponent(rawPath);
22266
+ } catch {
22267
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed page path", 400);
22268
+ }
22269
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
22270
+ const engine = await getEngine();
22271
+ const result = await engine.getPage(pagePath, scope);
22272
+ return withCors(
22273
+ new Response(JSON.stringify(result), {
22274
+ status: 200,
22275
+ headers: { "content-type": "application/json" }
22276
+ }),
22277
+ event.request,
22278
+ config
22279
+ );
22280
+ }
22281
+ async function handlePostSearch(event, config, getEngine, bodyLimit) {
22282
+ const contentLength = Number(event.request.headers.get("content-length") ?? 0);
22283
+ if (contentLength > bodyLimit) {
22284
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22285
+ }
22286
+ let rawBody;
22287
+ if (typeof event.request.text === "function") {
22288
+ rawBody = await event.request.text();
22289
+ } else {
22290
+ let parsedFallback;
22291
+ try {
22292
+ parsedFallback = await event.request.json();
22293
+ } catch (error) {
22294
+ if (error instanceof SyntaxError) {
22295
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22296
+ }
22297
+ throw error;
22298
+ }
22299
+ rawBody = JSON.stringify(parsedFallback);
22300
+ }
22301
+ if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
22302
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22303
+ }
22304
+ let body;
22305
+ try {
22306
+ body = JSON.parse(rawBody);
22307
+ } catch {
22308
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22309
+ }
22310
+ const engine = await getEngine();
22311
+ const searchRequest = body;
22312
+ const result = await engine.search(searchRequest);
22313
+ return withCors(
22314
+ new Response(JSON.stringify(result), {
22315
+ status: 200,
22316
+ headers: { "content-type": "application/json" }
22317
+ }),
22318
+ event.request,
22319
+ config
22320
+ );
22321
+ }
22322
+ async function handleMcpRequest(event, apiKey, enableJsonResponse, getEngine) {
22323
+ if (apiKey) {
22324
+ const authHeader = event.request.headers.get("authorization") ?? "";
22325
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
22326
+ const tokenBuf = Buffer.from(token);
22327
+ const keyBuf = Buffer.from(apiKey);
22328
+ if (tokenBuf.length !== keyBuf.length || !crypto.timingSafeEqual(tokenBuf, keyBuf)) {
22329
+ return new Response(
22330
+ JSON.stringify({
22331
+ jsonrpc: "2.0",
22332
+ error: { code: -32001, message: "Unauthorized" },
22333
+ id: null
22334
+ }),
22335
+ { status: 401, headers: { "content-type": "application/json" } }
22336
+ );
22337
+ }
22338
+ }
22339
+ const transport = new webStandardStreamableHttp_js.WebStandardStreamableHTTPServerTransport({
22340
+ sessionIdGenerator: void 0,
22341
+ enableJsonResponse
22342
+ });
22343
+ let server;
22344
+ try {
22345
+ const engine = await getEngine();
22346
+ server = createServer(engine);
22347
+ await server.connect(transport);
22348
+ const response = await transport.handleRequest(event.request);
22349
+ if (enableJsonResponse) {
22350
+ await transport.close();
22351
+ await server.close();
22352
+ }
22353
+ return response;
22354
+ } catch (error) {
22355
+ try {
22356
+ await transport.close();
22357
+ } catch {
22358
+ }
22359
+ try {
22360
+ await server?.close();
22361
+ } catch {
22362
+ }
22363
+ return new Response(
22364
+ JSON.stringify({
22365
+ jsonrpc: "2.0",
22366
+ error: {
22367
+ code: -32603,
22368
+ message: error instanceof Error ? error.message : "Internal server error"
22369
+ },
22370
+ id: null
22371
+ }),
22372
+ { status: 500, headers: { "content-type": "application/json" } }
22373
+ );
22374
+ }
22375
+ }
20530
22376
  function buildCorsHeaders(request, config) {
20531
22377
  const allowOrigins = config.api.cors.allowOrigins;
20532
22378
  if (!allowOrigins || allowOrigins.length === 0) {
@@ -20539,7 +22385,7 @@ function buildCorsHeaders(request, config) {
20539
22385
  }
20540
22386
  return {
20541
22387
  "access-control-allow-origin": allowOrigins.includes("*") ? "*" : origin,
20542
- "access-control-allow-methods": "POST, OPTIONS",
22388
+ "access-control-allow-methods": "GET, POST, OPTIONS",
20543
22389
  "access-control-allow-headers": "content-type"
20544
22390
  };
20545
22391
  }
@@ -20575,9 +22421,6 @@ function shouldRunAutoIndex(options) {
20575
22421
  if (explicit && /^(1|true|yes)$/i.test(explicit)) {
20576
22422
  return true;
20577
22423
  }
20578
- if (process.env.CI && /^(1|true)$/i.test(process.env.CI)) {
20579
- return true;
20580
- }
20581
22424
  return false;
20582
22425
  }
20583
22426
  function searchsocketVitePlugin(options = {}) {
@@ -20602,7 +22445,8 @@ function searchsocketVitePlugin(options = {}) {
20602
22445
  const pipeline = await IndexPipeline.create({
20603
22446
  cwd,
20604
22447
  configPath: options.configPath,
20605
- logger: logger3
22448
+ logger: logger3,
22449
+ hooks: options.hooks
20606
22450
  });
20607
22451
  const stats = await pipeline.run({
20608
22452
  changedOnly: options.changedOnly ?? true,