searchsocket 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -5,18 +5,20 @@ var path = require('path');
5
5
  var jiti = require('jiti');
6
6
  var zod = require('zod');
7
7
  var child_process = require('child_process');
8
+ var vector = require('@upstash/vector');
8
9
  var crypto = require('crypto');
9
10
  var cheerio = require('cheerio');
10
11
  var matter = require('gray-matter');
11
12
  var fg = require('fast-glob');
12
13
  var pLimit = require('p-limit');
13
- var fs3 = require('fs/promises');
14
+ var fs8 = require('fs/promises');
14
15
  var net = require('net');
15
16
  var zlib = require('zlib');
16
17
  var mcp_js = require('@modelcontextprotocol/sdk/server/mcp.js');
17
18
  var stdio_js = require('@modelcontextprotocol/sdk/server/stdio.js');
18
19
  var streamableHttp_js = require('@modelcontextprotocol/sdk/server/streamableHttp.js');
19
20
  var express_js = require('@modelcontextprotocol/sdk/server/express.js');
21
+ var webStandardStreamableHttp_js = require('@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js');
20
22
 
21
23
  function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
22
24
 
@@ -25,7 +27,7 @@ var path__default = /*#__PURE__*/_interopDefault(path);
25
27
  var matter__default = /*#__PURE__*/_interopDefault(matter);
26
28
  var fg__default = /*#__PURE__*/_interopDefault(fg);
27
29
  var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
28
- var fs3__default = /*#__PURE__*/_interopDefault(fs3);
30
+ var fs8__default = /*#__PURE__*/_interopDefault(fs8);
29
31
  var net__default = /*#__PURE__*/_interopDefault(net);
30
32
 
31
33
  var __getOwnPropNames = Object.getOwnPropertyNames;
@@ -5025,32 +5027,32 @@ var require_URL = __commonJS({
5025
5027
  else
5026
5028
  return basepath.substring(0, lastslash + 1) + refpath;
5027
5029
  }
5028
- function remove_dot_segments(path13) {
5029
- if (!path13) return path13;
5030
+ function remove_dot_segments(path14) {
5031
+ if (!path14) return path14;
5030
5032
  var output = "";
5031
- while (path13.length > 0) {
5032
- if (path13 === "." || path13 === "..") {
5033
- path13 = "";
5033
+ while (path14.length > 0) {
5034
+ if (path14 === "." || path14 === "..") {
5035
+ path14 = "";
5034
5036
  break;
5035
5037
  }
5036
- var twochars = path13.substring(0, 2);
5037
- var threechars = path13.substring(0, 3);
5038
- var fourchars = path13.substring(0, 4);
5038
+ var twochars = path14.substring(0, 2);
5039
+ var threechars = path14.substring(0, 3);
5040
+ var fourchars = path14.substring(0, 4);
5039
5041
  if (threechars === "../") {
5040
- path13 = path13.substring(3);
5042
+ path14 = path14.substring(3);
5041
5043
  } else if (twochars === "./") {
5042
- path13 = path13.substring(2);
5044
+ path14 = path14.substring(2);
5043
5045
  } else if (threechars === "/./") {
5044
- path13 = "/" + path13.substring(3);
5045
- } else if (twochars === "/." && path13.length === 2) {
5046
- path13 = "/";
5047
- } else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
5048
- path13 = "/" + path13.substring(4);
5046
+ path14 = "/" + path14.substring(3);
5047
+ } else if (twochars === "/." && path14.length === 2) {
5048
+ path14 = "/";
5049
+ } else if (fourchars === "/../" || threechars === "/.." && path14.length === 3) {
5050
+ path14 = "/" + path14.substring(4);
5049
5051
  output = output.replace(/\/?[^\/]*$/, "");
5050
5052
  } else {
5051
- var segment = path13.match(/(\/?([^\/]*))/)[0];
5053
+ var segment = path14.match(/(\/?([^\/]*))/)[0];
5052
5054
  output += segment;
5053
- path13 = path13.substring(segment.length);
5055
+ path14 = path14.substring(segment.length);
5054
5056
  }
5055
5057
  }
5056
5058
  return output;
@@ -16646,6 +16648,7 @@ var searchSocketConfigSchema = zod.z.object({
16646
16648
  dropSelectors: zod.z.array(zod.z.string()).optional(),
16647
16649
  ignoreAttr: zod.z.string().optional(),
16648
16650
  noindexAttr: zod.z.string().optional(),
16651
+ imageDescAttr: zod.z.string().optional(),
16649
16652
  respectRobotsNoindex: zod.z.boolean().optional()
16650
16653
  }).optional(),
16651
16654
  transform: zod.z.object({
@@ -16661,35 +16664,48 @@ var searchSocketConfigSchema = zod.z.object({
16661
16664
  headingPathDepth: zod.z.number().int().positive().optional(),
16662
16665
  dontSplitInside: zod.z.array(zod.z.enum(["code", "table", "blockquote"])).optional(),
16663
16666
  prependTitle: zod.z.boolean().optional(),
16664
- pageSummaryChunk: zod.z.boolean().optional()
16667
+ pageSummaryChunk: zod.z.boolean().optional(),
16668
+ weightHeadings: zod.z.boolean().optional()
16665
16669
  }).optional(),
16666
16670
  upstash: zod.z.object({
16667
16671
  url: zod.z.string().url().optional(),
16668
16672
  token: zod.z.string().min(1).optional(),
16669
16673
  urlEnv: zod.z.string().min(1).optional(),
16670
- tokenEnv: zod.z.string().min(1).optional()
16674
+ tokenEnv: zod.z.string().min(1).optional(),
16675
+ namespaces: zod.z.object({
16676
+ pages: zod.z.string().min(1).optional(),
16677
+ chunks: zod.z.string().min(1).optional()
16678
+ }).optional()
16679
+ }).optional(),
16680
+ embedding: zod.z.object({
16681
+ model: zod.z.string().optional(),
16682
+ dimensions: zod.z.number().int().positive().optional(),
16683
+ taskType: zod.z.string().optional(),
16684
+ batchSize: zod.z.number().int().positive().optional()
16671
16685
  }).optional(),
16672
16686
  search: zod.z.object({
16673
- semanticWeight: zod.z.number().min(0).max(1).optional(),
16674
- inputEnrichment: zod.z.boolean().optional(),
16675
- reranking: zod.z.boolean().optional(),
16676
16687
  dualSearch: zod.z.boolean().optional(),
16677
16688
  pageSearchWeight: zod.z.number().min(0).max(1).optional()
16678
16689
  }).optional(),
16679
16690
  ranking: zod.z.object({
16680
16691
  enableIncomingLinkBoost: zod.z.boolean().optional(),
16681
16692
  enableDepthBoost: zod.z.boolean().optional(),
16693
+ enableFreshnessBoost: zod.z.boolean().optional(),
16694
+ freshnessDecayRate: zod.z.number().positive().optional(),
16695
+ enableAnchorTextBoost: zod.z.boolean().optional(),
16682
16696
  pageWeights: zod.z.record(zod.z.string(), zod.z.number().min(0)).optional(),
16683
16697
  aggregationCap: zod.z.number().int().positive().optional(),
16684
16698
  aggregationDecay: zod.z.number().min(0).max(1).optional(),
16685
16699
  minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
16686
- minScore: zod.z.number().min(0).max(1).optional(),
16700
+ minScoreRatio: zod.z.number().min(0).max(1).optional(),
16687
16701
  scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
16688
16702
  weights: zod.z.object({
16689
16703
  incomingLinks: zod.z.number().optional(),
16690
16704
  depth: zod.z.number().optional(),
16691
16705
  aggregation: zod.z.number().optional(),
16692
- titleMatch: zod.z.number().optional()
16706
+ titleMatch: zod.z.number().optional(),
16707
+ freshness: zod.z.number().optional(),
16708
+ anchorText: zod.z.number().optional()
16693
16709
  }).optional()
16694
16710
  }).optional(),
16695
16711
  api: zod.z.object({
@@ -16704,12 +16720,28 @@ var searchSocketConfigSchema = zod.z.object({
16704
16720
  }).optional(),
16705
16721
  mcp: zod.z.object({
16706
16722
  enable: zod.z.boolean().optional(),
16723
+ access: zod.z.enum(["public", "private"]).optional(),
16707
16724
  transport: zod.z.enum(["stdio", "http"]).optional(),
16708
16725
  http: zod.z.object({
16709
16726
  port: zod.z.number().int().positive().optional(),
16710
- path: zod.z.string().optional()
16727
+ path: zod.z.string().optional(),
16728
+ apiKey: zod.z.string().min(1).optional(),
16729
+ apiKeyEnv: zod.z.string().min(1).optional()
16730
+ }).optional(),
16731
+ handle: zod.z.object({
16732
+ path: zod.z.string().optional(),
16733
+ apiKey: zod.z.string().min(1).optional(),
16734
+ enableJsonResponse: zod.z.boolean().optional()
16711
16735
  }).optional()
16712
16736
  }).optional(),
16737
+ llmsTxt: zod.z.object({
16738
+ enable: zod.z.boolean().optional(),
16739
+ outputPath: zod.z.string().optional(),
16740
+ title: zod.z.string().optional(),
16741
+ description: zod.z.string().optional(),
16742
+ generateFull: zod.z.boolean().optional(),
16743
+ serveMarkdownVariants: zod.z.boolean().optional()
16744
+ }).optional(),
16713
16745
  state: zod.z.object({
16714
16746
  dir: zod.z.string().optional()
16715
16747
  }).optional()
@@ -16748,6 +16780,7 @@ function createDefaultConfig(projectId) {
16748
16780
  dropSelectors: DEFAULT_DROP_SELECTORS,
16749
16781
  ignoreAttr: "data-search-ignore",
16750
16782
  noindexAttr: "data-search-noindex",
16783
+ imageDescAttr: "data-search-description",
16751
16784
  respectRobotsNoindex: true
16752
16785
  },
16753
16786
  transform: {
@@ -16757,39 +16790,52 @@ function createDefaultConfig(projectId) {
16757
16790
  },
16758
16791
  chunking: {
16759
16792
  strategy: "hybrid",
16760
- maxChars: 2200,
16793
+ maxChars: 1500,
16761
16794
  overlapChars: 200,
16762
16795
  minChars: 250,
16763
16796
  headingPathDepth: 3,
16764
16797
  dontSplitInside: ["code", "table", "blockquote"],
16765
16798
  prependTitle: true,
16766
- pageSummaryChunk: true
16799
+ pageSummaryChunk: true,
16800
+ weightHeadings: true
16767
16801
  },
16768
16802
  upstash: {
16769
- urlEnv: "UPSTASH_SEARCH_REST_URL",
16770
- tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
16803
+ urlEnv: "UPSTASH_VECTOR_REST_URL",
16804
+ tokenEnv: "UPSTASH_VECTOR_REST_TOKEN",
16805
+ namespaces: {
16806
+ pages: "pages",
16807
+ chunks: "chunks"
16808
+ }
16809
+ },
16810
+ embedding: {
16811
+ model: "bge-large-en-v1.5",
16812
+ dimensions: 1024,
16813
+ taskType: "RETRIEVAL_DOCUMENT",
16814
+ batchSize: 100
16771
16815
  },
16772
16816
  search: {
16773
- semanticWeight: 0.75,
16774
- inputEnrichment: true,
16775
- reranking: true,
16776
16817
  dualSearch: true,
16777
16818
  pageSearchWeight: 0.3
16778
16819
  },
16779
16820
  ranking: {
16780
16821
  enableIncomingLinkBoost: true,
16781
16822
  enableDepthBoost: true,
16823
+ enableFreshnessBoost: false,
16824
+ freshnessDecayRate: 1e-3,
16825
+ enableAnchorTextBoost: false,
16782
16826
  pageWeights: {},
16783
16827
  aggregationCap: 5,
16784
16828
  aggregationDecay: 0.5,
16785
16829
  minChunkScoreRatio: 0.5,
16786
- minScore: 0.3,
16830
+ minScoreRatio: 0.7,
16787
16831
  scoreGapThreshold: 0.4,
16788
16832
  weights: {
16789
16833
  incomingLinks: 0.05,
16790
16834
  depth: 0.03,
16791
16835
  aggregation: 0.1,
16792
- titleMatch: 0.15
16836
+ titleMatch: 0.15,
16837
+ freshness: 0.1,
16838
+ anchorText: 0.1
16793
16839
  }
16794
16840
  },
16795
16841
  api: {
@@ -16800,12 +16846,23 @@ function createDefaultConfig(projectId) {
16800
16846
  },
16801
16847
  mcp: {
16802
16848
  enable: process.env.NODE_ENV !== "production",
16849
+ access: "private",
16803
16850
  transport: "stdio",
16804
16851
  http: {
16805
16852
  port: 3338,
16806
16853
  path: "/mcp"
16854
+ },
16855
+ handle: {
16856
+ path: "/api/mcp",
16857
+ enableJsonResponse: true
16807
16858
  }
16808
16859
  },
16860
+ llmsTxt: {
16861
+ enable: false,
16862
+ outputPath: "static/llms.txt",
16863
+ generateFull: true,
16864
+ serveMarkdownVariants: false
16865
+ },
16809
16866
  state: {
16810
16867
  dir: ".searchsocket"
16811
16868
  }
@@ -16933,7 +16990,15 @@ ${issues}`
16933
16990
  },
16934
16991
  upstash: {
16935
16992
  ...defaults.upstash,
16936
- ...parsed.upstash
16993
+ ...parsed.upstash,
16994
+ namespaces: {
16995
+ ...defaults.upstash.namespaces,
16996
+ ...parsed.upstash?.namespaces
16997
+ }
16998
+ },
16999
+ embedding: {
17000
+ ...defaults.embedding,
17001
+ ...parsed.embedding
16937
17002
  },
16938
17003
  search: {
16939
17004
  ...defaults.search,
@@ -16970,8 +17035,16 @@ ${issues}`
16970
17035
  http: {
16971
17036
  ...defaults.mcp.http,
16972
17037
  ...parsed.mcp?.http
17038
+ },
17039
+ handle: {
17040
+ ...defaults.mcp.handle,
17041
+ ...parsed.mcp?.handle
16973
17042
  }
16974
17043
  },
17044
+ llmsTxt: {
17045
+ ...defaults.llmsTxt,
17046
+ ...parsed.llmsTxt
17047
+ },
16975
17048
  state: {
16976
17049
  ...defaults.state,
16977
17050
  ...parsed.state
@@ -16991,6 +17064,15 @@ ${issues}`
16991
17064
  maxDepth: 10
16992
17065
  };
16993
17066
  }
17067
+ if (merged.mcp.access === "public") {
17068
+ const resolvedKey = merged.mcp.http.apiKey ?? (merged.mcp.http.apiKeyEnv ? process.env[merged.mcp.http.apiKeyEnv] : void 0);
17069
+ if (!resolvedKey) {
17070
+ throw new SearchSocketError(
17071
+ "CONFIG_MISSING",
17072
+ '`mcp.access` is "public" but no API key is configured. Set `mcp.http.apiKey` or `mcp.http.apiKeyEnv`.'
17073
+ );
17074
+ }
17075
+ }
16994
17076
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
16995
17077
  throw new SearchSocketError("CONFIG_MISSING", "`source.crawl.baseUrl` is required when source.mode is crawl.");
16996
17078
  }
@@ -17054,13 +17136,84 @@ function normalizeMarkdown(input) {
17054
17136
  function sanitizeScopeName(scopeName) {
17055
17137
  return scopeName.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
17056
17138
  }
17139
+ function markdownToPlain(markdown) {
17140
+ return markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17141
+ }
17057
17142
  function toSnippet(markdown, maxLen = 220) {
17058
- const plain = markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17143
+ const plain = markdownToPlain(markdown);
17059
17144
  if (plain.length <= maxLen) {
17060
17145
  return plain;
17061
17146
  }
17062
17147
  return `${plain.slice(0, Math.max(0, maxLen - 1)).trim()}\u2026`;
17063
17148
  }
17149
+ function queryAwareExcerpt(markdown, query, maxLen = 220) {
17150
+ const plain = markdownToPlain(markdown);
17151
+ if (plain.length <= maxLen) return plain;
17152
+ const tokens = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
17153
+ if (tokens.length === 0) return toSnippet(markdown, maxLen);
17154
+ const positions = [];
17155
+ for (let ti = 0; ti < tokens.length; ti++) {
17156
+ const escaped = tokens[ti].replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
17157
+ const re = new RegExp(escaped, "gi");
17158
+ let m;
17159
+ while ((m = re.exec(plain)) !== null) {
17160
+ positions.push({ start: m.index, end: m.index + m[0].length, tokenIdx: ti });
17161
+ }
17162
+ }
17163
+ if (positions.length === 0) return toSnippet(markdown, maxLen);
17164
+ positions.sort((a, b) => a.start - b.start);
17165
+ let bestUniqueCount = 0;
17166
+ let bestTotalCount = 0;
17167
+ let bestLeft = 0;
17168
+ let bestRight = 0;
17169
+ let left = 0;
17170
+ const tokenCounts = /* @__PURE__ */ new Map();
17171
+ for (let right = 0; right < positions.length; right++) {
17172
+ tokenCounts.set(positions[right].tokenIdx, (tokenCounts.get(positions[right].tokenIdx) ?? 0) + 1);
17173
+ while (positions[right].end - positions[left].start > maxLen && left < right) {
17174
+ const leftToken = positions[left].tokenIdx;
17175
+ const cnt = tokenCounts.get(leftToken) - 1;
17176
+ if (cnt === 0) tokenCounts.delete(leftToken);
17177
+ else tokenCounts.set(leftToken, cnt);
17178
+ left++;
17179
+ }
17180
+ const uniqueCount = tokenCounts.size;
17181
+ const totalCount = right - left + 1;
17182
+ if (uniqueCount > bestUniqueCount || uniqueCount === bestUniqueCount && totalCount > bestTotalCount) {
17183
+ bestUniqueCount = uniqueCount;
17184
+ bestTotalCount = totalCount;
17185
+ bestLeft = left;
17186
+ bestRight = right;
17187
+ }
17188
+ }
17189
+ const mid = Math.floor((positions[bestLeft].start + positions[bestRight].end) / 2);
17190
+ let start = Math.max(0, mid - Math.floor(maxLen / 2));
17191
+ let end = Math.min(plain.length, start + maxLen);
17192
+ start = Math.max(0, end - maxLen);
17193
+ if (start > 0) {
17194
+ const spaceIdx = plain.lastIndexOf(" ", start);
17195
+ if (spaceIdx > start - 30) {
17196
+ start = spaceIdx + 1;
17197
+ }
17198
+ }
17199
+ if (end < plain.length) {
17200
+ const spaceIdx = plain.indexOf(" ", end);
17201
+ if (spaceIdx !== -1 && spaceIdx < end + 30) {
17202
+ end = spaceIdx;
17203
+ }
17204
+ }
17205
+ let excerpt = plain.slice(start, end);
17206
+ if (excerpt.length > Math.ceil(maxLen * 1.2)) {
17207
+ excerpt = excerpt.slice(0, maxLen);
17208
+ const lastSpace = excerpt.lastIndexOf(" ");
17209
+ if (lastSpace > maxLen * 0.5) {
17210
+ excerpt = excerpt.slice(0, lastSpace);
17211
+ }
17212
+ }
17213
+ const prefix = start > 0 ? "\u2026" : "";
17214
+ const suffix = end < plain.length ? "\u2026" : "";
17215
+ return `${prefix}${excerpt}${suffix}`;
17216
+ }
17064
17217
  function extractFirstParagraph(markdown) {
17065
17218
  const lines = markdown.split("\n");
17066
17219
  let inFence = false;
@@ -17121,162 +17274,342 @@ function ensureStateDirs(cwd, stateDir, scope) {
17121
17274
  fs__default.default.mkdirSync(statePath, { recursive: true });
17122
17275
  return { statePath };
17123
17276
  }
17124
-
17125
- // src/vector/upstash.ts
17126
- function chunkIndexName(scope) {
17127
- return `${scope.projectId}--${scope.scopeName}`;
17128
- }
17129
- function pageIndexName(scope) {
17130
- return `${scope.projectId}--${scope.scopeName}--pages`;
17131
- }
17132
17277
  var UpstashSearchStore = class {
17133
- client;
17278
+ index;
17279
+ pagesNs;
17280
+ chunksNs;
17134
17281
  constructor(opts) {
17135
- this.client = opts.client;
17136
- }
17137
- chunkIndex(scope) {
17138
- return this.client.index(chunkIndexName(scope));
17139
- }
17140
- pageIndex(scope) {
17141
- return this.client.index(pageIndexName(scope));
17282
+ this.index = opts.index;
17283
+ this.pagesNs = opts.index.namespace(opts.pagesNamespace);
17284
+ this.chunksNs = opts.index.namespace(opts.chunksNamespace);
17142
17285
  }
17143
17286
  async upsertChunks(chunks, scope) {
17144
17287
  if (chunks.length === 0) return;
17145
- const index = this.chunkIndex(scope);
17146
- const BATCH_SIZE = 100;
17288
+ const BATCH_SIZE = 90;
17147
17289
  for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
17148
17290
  const batch = chunks.slice(i, i + BATCH_SIZE);
17149
- await index.upsert(batch);
17150
- }
17151
- }
17152
- async search(query, opts, scope) {
17153
- const index = this.chunkIndex(scope);
17154
- const results = await index.search({
17155
- query,
17156
- limit: opts.limit,
17157
- semanticWeight: opts.semanticWeight,
17158
- inputEnrichment: opts.inputEnrichment,
17159
- reranking: opts.reranking,
17160
- filter: opts.filter
17291
+ await this.chunksNs.upsert(
17292
+ batch.map((c) => ({
17293
+ id: c.id,
17294
+ data: c.data,
17295
+ metadata: {
17296
+ ...c.metadata,
17297
+ projectId: scope.projectId,
17298
+ scopeName: scope.scopeName,
17299
+ type: c.metadata.type || "chunk"
17300
+ }
17301
+ }))
17302
+ );
17303
+ }
17304
+ }
17305
+ async search(data, opts, scope) {
17306
+ const filterParts = [
17307
+ `projectId = '${scope.projectId}'`,
17308
+ `scopeName = '${scope.scopeName}'`
17309
+ ];
17310
+ if (opts.filter) {
17311
+ filterParts.push(opts.filter);
17312
+ }
17313
+ const results = await this.chunksNs.query({
17314
+ data,
17315
+ topK: opts.limit,
17316
+ includeMetadata: true,
17317
+ filter: filterParts.join(" AND "),
17318
+ queryMode: vector.QueryMode.HYBRID,
17319
+ fusionAlgorithm: vector.FusionAlgorithm.DBSF
17320
+ });
17321
+ return results.map((doc) => ({
17322
+ id: String(doc.id),
17323
+ score: doc.score,
17324
+ metadata: {
17325
+ projectId: doc.metadata?.projectId ?? "",
17326
+ scopeName: doc.metadata?.scopeName ?? "",
17327
+ url: doc.metadata?.url ?? "",
17328
+ path: doc.metadata?.path ?? "",
17329
+ title: doc.metadata?.title ?? "",
17330
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17331
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17332
+ snippet: doc.metadata?.snippet ?? "",
17333
+ chunkText: doc.metadata?.chunkText ?? "",
17334
+ ordinal: doc.metadata?.ordinal ?? 0,
17335
+ contentHash: doc.metadata?.contentHash ?? "",
17336
+ depth: doc.metadata?.depth ?? 0,
17337
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17338
+ routeFile: doc.metadata?.routeFile ?? "",
17339
+ tags: doc.metadata?.tags ?? [],
17340
+ description: doc.metadata?.description || void 0,
17341
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17342
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17343
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17344
+ }
17345
+ }));
17346
+ }
17347
+ async searchChunksByUrl(data, url, opts, scope) {
17348
+ const filterParts = [
17349
+ `projectId = '${scope.projectId}'`,
17350
+ `scopeName = '${scope.scopeName}'`,
17351
+ `url = '${url}'`
17352
+ ];
17353
+ if (opts.filter) {
17354
+ filterParts.push(opts.filter);
17355
+ }
17356
+ const results = await this.chunksNs.query({
17357
+ data,
17358
+ topK: opts.limit,
17359
+ includeMetadata: true,
17360
+ filter: filterParts.join(" AND "),
17361
+ queryMode: vector.QueryMode.HYBRID,
17362
+ fusionAlgorithm: vector.FusionAlgorithm.DBSF
17161
17363
  });
17162
17364
  return results.map((doc) => ({
17163
- id: doc.id,
17365
+ id: String(doc.id),
17164
17366
  score: doc.score,
17165
17367
  metadata: {
17166
17368
  projectId: doc.metadata?.projectId ?? "",
17167
17369
  scopeName: doc.metadata?.scopeName ?? "",
17168
- url: doc.content.url,
17370
+ url: doc.metadata?.url ?? "",
17169
17371
  path: doc.metadata?.path ?? "",
17170
- title: doc.content.title,
17171
- sectionTitle: doc.content.sectionTitle,
17172
- headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
17372
+ title: doc.metadata?.title ?? "",
17373
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17374
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17173
17375
  snippet: doc.metadata?.snippet ?? "",
17174
- chunkText: doc.content.text,
17376
+ chunkText: doc.metadata?.chunkText ?? "",
17175
17377
  ordinal: doc.metadata?.ordinal ?? 0,
17176
17378
  contentHash: doc.metadata?.contentHash ?? "",
17177
17379
  depth: doc.metadata?.depth ?? 0,
17178
17380
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17179
17381
  routeFile: doc.metadata?.routeFile ?? "",
17180
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17382
+ tags: doc.metadata?.tags ?? [],
17181
17383
  description: doc.metadata?.description || void 0,
17182
- keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
17384
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17385
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17386
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17183
17387
  }
17184
17388
  }));
17185
17389
  }
17186
- async searchPages(query, opts, scope) {
17187
- const index = this.pageIndex(scope);
17390
+ async searchPagesByText(data, opts, scope) {
17391
+ return this.queryPages({ data }, opts, scope);
17392
+ }
17393
+ async searchPagesByVector(vector, opts, scope) {
17394
+ return this.queryPages({ vector }, opts, scope);
17395
+ }
17396
+ async queryPages(input, opts, scope) {
17397
+ const filterParts = [
17398
+ `projectId = '${scope.projectId}'`,
17399
+ `scopeName = '${scope.scopeName}'`
17400
+ ];
17401
+ if (opts.filter) {
17402
+ filterParts.push(opts.filter);
17403
+ }
17188
17404
  let results;
17189
17405
  try {
17190
- results = await index.search({
17191
- query,
17192
- limit: opts.limit,
17193
- semanticWeight: opts.semanticWeight,
17194
- inputEnrichment: opts.inputEnrichment,
17195
- reranking: true,
17196
- filter: opts.filter
17406
+ results = await this.pagesNs.query({
17407
+ ...input,
17408
+ topK: opts.limit,
17409
+ includeMetadata: true,
17410
+ filter: filterParts.join(" AND "),
17411
+ queryMode: vector.QueryMode.HYBRID,
17412
+ fusionAlgorithm: vector.FusionAlgorithm.DBSF
17197
17413
  });
17198
17414
  } catch {
17199
17415
  return [];
17200
17416
  }
17201
17417
  return results.map((doc) => ({
17202
- id: doc.id,
17418
+ id: String(doc.id),
17203
17419
  score: doc.score,
17204
- title: doc.content.title,
17205
- url: doc.content.url,
17206
- description: doc.content.description ?? "",
17207
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17420
+ title: doc.metadata?.title ?? "",
17421
+ url: doc.metadata?.url ?? "",
17422
+ description: doc.metadata?.description ?? "",
17423
+ tags: doc.metadata?.tags ?? [],
17208
17424
  depth: doc.metadata?.depth ?? 0,
17209
17425
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17210
- routeFile: doc.metadata?.routeFile ?? ""
17426
+ routeFile: doc.metadata?.routeFile ?? "",
17427
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17211
17428
  }));
17212
17429
  }
17213
- async deleteByIds(ids, scope) {
17430
+ async deleteByIds(ids, _scope) {
17214
17431
  if (ids.length === 0) return;
17215
- const index = this.chunkIndex(scope);
17216
- const BATCH_SIZE = 500;
17432
+ const BATCH_SIZE = 90;
17217
17433
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17218
17434
  const batch = ids.slice(i, i + BATCH_SIZE);
17219
- await index.delete(batch);
17435
+ await this.chunksNs.delete(batch);
17220
17436
  }
17221
17437
  }
17222
17438
  async deleteScope(scope) {
17223
- try {
17224
- const chunkIdx = this.chunkIndex(scope);
17225
- await chunkIdx.deleteIndex();
17226
- } catch {
17227
- }
17228
- try {
17229
- const pageIdx = this.pageIndex(scope);
17230
- await pageIdx.deleteIndex();
17231
- } catch {
17439
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17440
+ const ids = [];
17441
+ let cursor = "0";
17442
+ try {
17443
+ for (; ; ) {
17444
+ const result = await ns.range({
17445
+ cursor,
17446
+ limit: 100,
17447
+ includeMetadata: true
17448
+ });
17449
+ for (const doc of result.vectors) {
17450
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17451
+ ids.push(String(doc.id));
17452
+ }
17453
+ }
17454
+ if (!result.nextCursor || result.nextCursor === "0") break;
17455
+ cursor = result.nextCursor;
17456
+ }
17457
+ } catch {
17458
+ }
17459
+ if (ids.length > 0) {
17460
+ const BATCH_SIZE = 90;
17461
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17462
+ const batch = ids.slice(i, i + BATCH_SIZE);
17463
+ await ns.delete(batch);
17464
+ }
17465
+ }
17232
17466
  }
17233
17467
  }
17234
17468
  async listScopes(projectId) {
17235
- const allIndexes = await this.client.listIndexes();
17236
- const prefix = `${projectId}--`;
17237
- const scopeNames = /* @__PURE__ */ new Set();
17238
- for (const name of allIndexes) {
17239
- if (name.startsWith(prefix) && !name.endsWith("--pages")) {
17240
- const scopeName = name.slice(prefix.length);
17241
- scopeNames.add(scopeName);
17242
- }
17243
- }
17244
- const scopes = [];
17245
- for (const scopeName of scopeNames) {
17246
- const scope = {
17247
- projectId,
17248
- scopeName,
17249
- scopeId: `${projectId}:${scopeName}`
17250
- };
17469
+ const scopeMap = /* @__PURE__ */ new Map();
17470
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17471
+ let cursor = "0";
17472
+ try {
17473
+ for (; ; ) {
17474
+ const result = await ns.range({
17475
+ cursor,
17476
+ limit: 100,
17477
+ includeMetadata: true
17478
+ });
17479
+ for (const doc of result.vectors) {
17480
+ if (doc.metadata?.projectId === projectId) {
17481
+ const scopeName = doc.metadata.scopeName ?? "";
17482
+ scopeMap.set(scopeName, (scopeMap.get(scopeName) ?? 0) + 1);
17483
+ }
17484
+ }
17485
+ if (!result.nextCursor || result.nextCursor === "0") break;
17486
+ cursor = result.nextCursor;
17487
+ }
17488
+ } catch {
17489
+ }
17490
+ }
17491
+ return [...scopeMap.entries()].map(([scopeName, count]) => ({
17492
+ projectId,
17493
+ scopeName,
17494
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17495
+ documentCount: count
17496
+ }));
17497
+ }
17498
+ async getContentHashes(scope) {
17499
+ return this.scanHashes(this.chunksNs, scope);
17500
+ }
17501
+ /**
17502
+ * Fetch content hashes for a specific set of chunk keys using direct fetch()
17503
+ * instead of range(). This avoids potential issues with range() returning
17504
+ * vectors from the wrong namespace on hybrid indexes.
17505
+ */
17506
+ async fetchContentHashesForKeys(keys, scope) {
17507
+ const map = /* @__PURE__ */ new Map();
17508
+ if (keys.length === 0) return map;
17509
+ const BATCH_SIZE = 90;
17510
+ for (let i = 0; i < keys.length; i += BATCH_SIZE) {
17511
+ const batch = keys.slice(i, i + BATCH_SIZE);
17251
17512
  try {
17252
- const info = await this.chunkIndex(scope).info();
17253
- scopes.push({
17254
- projectId,
17255
- scopeName,
17256
- lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17257
- documentCount: info.documentCount
17513
+ const results = await this.chunksNs.fetch(batch, {
17514
+ includeMetadata: true
17258
17515
  });
17516
+ for (const doc of results) {
17517
+ if (doc && doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17518
+ map.set(String(doc.id), doc.metadata.contentHash);
17519
+ }
17520
+ }
17259
17521
  } catch {
17260
- scopes.push({
17261
- projectId,
17262
- scopeName,
17263
- lastIndexedAt: "unknown",
17264
- documentCount: 0
17522
+ }
17523
+ }
17524
+ return map;
17525
+ }
17526
+ /**
17527
+ * Scan all IDs in the chunks namespace for this scope.
17528
+ * Used for deletion detection (finding stale chunk keys).
17529
+ */
17530
+ async scanChunkIds(scope) {
17531
+ const ids = /* @__PURE__ */ new Set();
17532
+ let cursor = "0";
17533
+ try {
17534
+ for (; ; ) {
17535
+ const result = await this.chunksNs.range({
17536
+ cursor,
17537
+ limit: 100,
17538
+ includeMetadata: true
17265
17539
  });
17540
+ for (const doc of result.vectors) {
17541
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17542
+ ids.add(String(doc.id));
17543
+ }
17544
+ }
17545
+ if (!result.nextCursor || result.nextCursor === "0") break;
17546
+ cursor = result.nextCursor;
17266
17547
  }
17548
+ } catch {
17267
17549
  }
17268
- return scopes;
17550
+ return ids;
17269
17551
  }
17270
- async getContentHashes(scope) {
17552
+ async scanHashes(ns, scope) {
17553
+ const map = /* @__PURE__ */ new Map();
17554
+ let cursor = "0";
17555
+ try {
17556
+ for (; ; ) {
17557
+ const result = await ns.range({
17558
+ cursor,
17559
+ limit: 100,
17560
+ includeMetadata: true
17561
+ });
17562
+ for (const doc of result.vectors) {
17563
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17564
+ map.set(String(doc.id), doc.metadata.contentHash);
17565
+ }
17566
+ }
17567
+ if (!result.nextCursor || result.nextCursor === "0") break;
17568
+ cursor = result.nextCursor;
17569
+ }
17570
+ } catch {
17571
+ }
17572
+ return map;
17573
+ }
17574
+ async listPages(scope, opts) {
17575
+ const cursor = opts?.cursor ?? "0";
17576
+ const limit = opts?.limit ?? 50;
17577
+ try {
17578
+ const result = await this.pagesNs.range({
17579
+ cursor,
17580
+ limit,
17581
+ includeMetadata: true
17582
+ });
17583
+ const pages = result.vectors.filter(
17584
+ (doc) => doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && (!opts?.pathPrefix || (doc.metadata?.url ?? "").startsWith(opts.pathPrefix))
17585
+ ).map((doc) => ({
17586
+ url: doc.metadata?.url ?? "",
17587
+ title: doc.metadata?.title ?? "",
17588
+ description: doc.metadata?.description ?? "",
17589
+ routeFile: doc.metadata?.routeFile ?? ""
17590
+ }));
17591
+ const response = { pages };
17592
+ if (result.nextCursor && result.nextCursor !== "0") {
17593
+ response.nextCursor = result.nextCursor;
17594
+ }
17595
+ return response;
17596
+ } catch {
17597
+ return { pages: [] };
17598
+ }
17599
+ }
17600
+ async getPageHashes(scope) {
17271
17601
  const map = /* @__PURE__ */ new Map();
17272
- const index = this.chunkIndex(scope);
17273
17602
  let cursor = "0";
17274
17603
  try {
17275
17604
  for (; ; ) {
17276
- const result = await index.range({ cursor, limit: 100 });
17277
- for (const doc of result.documents) {
17278
- if (doc.metadata?.contentHash) {
17279
- map.set(doc.id, doc.metadata.contentHash);
17605
+ const result = await this.pagesNs.range({
17606
+ cursor,
17607
+ limit: 100,
17608
+ includeMetadata: true
17609
+ });
17610
+ for (const doc of result.vectors) {
17611
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17612
+ map.set(String(doc.id), doc.metadata.contentHash);
17280
17613
  }
17281
17614
  }
17282
17615
  if (!result.nextCursor || result.nextCursor === "0") break;
@@ -17286,47 +17619,43 @@ var UpstashSearchStore = class {
17286
17619
  }
17287
17620
  return map;
17288
17621
  }
17622
+ async deletePagesByIds(ids, _scope) {
17623
+ if (ids.length === 0) return;
17624
+ const BATCH_SIZE = 90;
17625
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17626
+ const batch = ids.slice(i, i + BATCH_SIZE);
17627
+ await this.pagesNs.delete(batch);
17628
+ }
17629
+ }
17289
17630
  async upsertPages(pages, scope) {
17290
17631
  if (pages.length === 0) return;
17291
- const index = this.pageIndex(scope);
17292
- const BATCH_SIZE = 50;
17632
+ const BATCH_SIZE = 90;
17293
17633
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
17294
17634
  const batch = pages.slice(i, i + BATCH_SIZE);
17295
- const docs = batch.map((p) => ({
17296
- id: p.url,
17297
- content: {
17298
- title: p.title,
17299
- url: p.url,
17300
- type: "page",
17301
- description: p.description ?? "",
17302
- keywords: (p.keywords ?? []).join(","),
17303
- summary: p.summary ?? "",
17304
- tags: p.tags.join(",")
17305
- },
17306
- metadata: {
17307
- markdown: p.markdown,
17308
- projectId: p.projectId,
17309
- scopeName: p.scopeName,
17310
- routeFile: p.routeFile,
17311
- routeResolution: p.routeResolution,
17312
- incomingLinks: p.incomingLinks,
17313
- outgoingLinks: p.outgoingLinks,
17314
- depth: p.depth,
17315
- indexedAt: p.indexedAt
17316
- }
17317
- }));
17318
- await index.upsert(docs);
17635
+ await this.pagesNs.upsert(
17636
+ batch.map((p) => ({
17637
+ id: p.id,
17638
+ data: p.data,
17639
+ metadata: {
17640
+ ...p.metadata,
17641
+ projectId: scope.projectId,
17642
+ scopeName: scope.scopeName,
17643
+ type: "page"
17644
+ }
17645
+ }))
17646
+ );
17319
17647
  }
17320
17648
  }
17321
17649
  async getPage(url, scope) {
17322
- const index = this.pageIndex(scope);
17323
17650
  try {
17324
- const results = await index.fetch([url]);
17651
+ const results = await this.pagesNs.fetch([url], {
17652
+ includeMetadata: true
17653
+ });
17325
17654
  const doc = results[0];
17326
- if (!doc) return null;
17655
+ if (!doc || !doc.metadata) return null;
17327
17656
  return {
17328
- url: doc.content.url,
17329
- title: doc.content.title,
17657
+ url: doc.metadata.url,
17658
+ title: doc.metadata.title,
17330
17659
  markdown: doc.metadata.markdown,
17331
17660
  projectId: doc.metadata.projectId,
17332
17661
  scopeName: doc.metadata.scopeName,
@@ -17334,27 +17663,86 @@ var UpstashSearchStore = class {
17334
17663
  routeResolution: doc.metadata.routeResolution,
17335
17664
  incomingLinks: doc.metadata.incomingLinks,
17336
17665
  outgoingLinks: doc.metadata.outgoingLinks,
17666
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? void 0,
17337
17667
  depth: doc.metadata.depth,
17338
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17668
+ tags: doc.metadata.tags ?? [],
17339
17669
  indexedAt: doc.metadata.indexedAt,
17340
- summary: doc.content.summary || void 0,
17341
- description: doc.content.description || void 0,
17342
- keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
17670
+ summary: doc.metadata.summary || void 0,
17671
+ description: doc.metadata.description || void 0,
17672
+ keywords: doc.metadata.keywords?.length ? doc.metadata.keywords : void 0,
17673
+ publishedAt: typeof doc.metadata.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17343
17674
  };
17344
17675
  } catch {
17345
17676
  return null;
17346
17677
  }
17347
17678
  }
17679
+ async fetchPageWithVector(url, scope) {
17680
+ try {
17681
+ const results = await this.pagesNs.fetch([url], {
17682
+ includeMetadata: true,
17683
+ includeVectors: true
17684
+ });
17685
+ const doc = results[0];
17686
+ if (!doc || !doc.metadata || !doc.vector) return null;
17687
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17688
+ return null;
17689
+ }
17690
+ return { metadata: doc.metadata, vector: doc.vector };
17691
+ } catch {
17692
+ return null;
17693
+ }
17694
+ }
17695
+ async fetchPagesBatch(urls, scope) {
17696
+ if (urls.length === 0) return [];
17697
+ try {
17698
+ const results = await this.pagesNs.fetch(urls, {
17699
+ includeMetadata: true
17700
+ });
17701
+ const out = [];
17702
+ for (const doc of results) {
17703
+ if (!doc || !doc.metadata) continue;
17704
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17705
+ continue;
17706
+ }
17707
+ out.push({
17708
+ url: doc.metadata.url,
17709
+ title: doc.metadata.title,
17710
+ routeFile: doc.metadata.routeFile,
17711
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? []
17712
+ });
17713
+ }
17714
+ return out;
17715
+ } catch {
17716
+ return [];
17717
+ }
17718
+ }
17348
17719
  async deletePages(scope) {
17720
+ const ids = [];
17721
+ let cursor = "0";
17349
17722
  try {
17350
- const index = this.pageIndex(scope);
17351
- await index.reset();
17723
+ for (; ; ) {
17724
+ const result = await this.pagesNs.range({
17725
+ cursor,
17726
+ limit: 100,
17727
+ includeMetadata: true
17728
+ });
17729
+ for (const doc of result.vectors) {
17730
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17731
+ ids.push(String(doc.id));
17732
+ }
17733
+ }
17734
+ if (!result.nextCursor || result.nextCursor === "0") break;
17735
+ cursor = result.nextCursor;
17736
+ }
17352
17737
  } catch {
17353
17738
  }
17739
+ if (ids.length > 0) {
17740
+ await this.deletePagesByIds(ids, scope);
17741
+ }
17354
17742
  }
17355
17743
  async health() {
17356
17744
  try {
17357
- await this.client.info();
17745
+ await this.index.info();
17358
17746
  return { ok: true };
17359
17747
  } catch (error) {
17360
17748
  return {
@@ -17364,14 +17752,31 @@ var UpstashSearchStore = class {
17364
17752
  }
17365
17753
  }
17366
17754
  async dropAllIndexes(projectId) {
17367
- const allIndexes = await this.client.listIndexes();
17368
- const prefix = `${projectId}--`;
17369
- for (const name of allIndexes) {
17370
- if (name.startsWith(prefix)) {
17371
- try {
17372
- const index = this.client.index(name);
17373
- await index.deleteIndex();
17374
- } catch {
17755
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17756
+ const ids = [];
17757
+ let cursor = "0";
17758
+ try {
17759
+ for (; ; ) {
17760
+ const result = await ns.range({
17761
+ cursor,
17762
+ limit: 100,
17763
+ includeMetadata: true
17764
+ });
17765
+ for (const doc of result.vectors) {
17766
+ if (doc.metadata?.projectId === projectId) {
17767
+ ids.push(String(doc.id));
17768
+ }
17769
+ }
17770
+ if (!result.nextCursor || result.nextCursor === "0") break;
17771
+ cursor = result.nextCursor;
17772
+ }
17773
+ } catch {
17774
+ }
17775
+ if (ids.length > 0) {
17776
+ const BATCH_SIZE = 90;
17777
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17778
+ const batch = ids.slice(i, i + BATCH_SIZE);
17779
+ await ns.delete(batch);
17375
17780
  }
17376
17781
  }
17377
17782
  }
@@ -17385,12 +17790,16 @@ async function createUpstashStore(config) {
17385
17790
  if (!url || !token) {
17386
17791
  throw new SearchSocketError(
17387
17792
  "VECTOR_BACKEND_UNAVAILABLE",
17388
- `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17793
+ `Missing Upstash Vector credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17389
17794
  );
17390
17795
  }
17391
- const { Search } = await import('@upstash/search');
17392
- const client = new Search({ url, token });
17393
- return new UpstashSearchStore({ client });
17796
+ const { Index } = await import('@upstash/vector');
17797
+ const index = new Index({ url, token });
17798
+ return new UpstashSearchStore({
17799
+ index,
17800
+ pagesNamespace: config.upstash.namespaces.pages,
17801
+ chunksNamespace: config.upstash.namespaces.chunks
17802
+ });
17394
17803
  }
17395
17804
  function sha1(input) {
17396
17805
  return crypto.createHash("sha1").update(input).digest("hex");
@@ -17458,6 +17867,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
17458
17867
  if (normalizeText(current.text)) {
17459
17868
  sections.push({
17460
17869
  sectionTitle: current.sectionTitle,
17870
+ headingLevel: current.headingLevel,
17461
17871
  headingPath: current.headingPath,
17462
17872
  text: current.text.trim()
17463
17873
  });
@@ -17476,6 +17886,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
17476
17886
  headingStack.length = level;
17477
17887
  current = {
17478
17888
  sectionTitle: title,
17889
+ headingLevel: level,
17479
17890
  headingPath: headingStack.filter((entry) => Boolean(entry)).slice(0, headingPathDepth),
17480
17891
  text: `${line}
17481
17892
  `
@@ -17611,6 +18022,7 @@ function splitSection(section, config) {
17611
18022
  return [
17612
18023
  {
17613
18024
  sectionTitle: section.sectionTitle,
18025
+ headingLevel: section.headingLevel,
17614
18026
  headingPath: section.headingPath,
17615
18027
  chunkText: text
17616
18028
  }
@@ -17661,6 +18073,7 @@ ${chunk}`;
17661
18073
  }
17662
18074
  return merged.map((chunkText) => ({
17663
18075
  sectionTitle: section.sectionTitle,
18076
+ headingLevel: section.headingLevel,
17664
18077
  headingPath: section.headingPath,
17665
18078
  chunkText
17666
18079
  }));
@@ -17676,6 +18089,18 @@ function buildSummaryChunkText(page) {
17676
18089
  }
17677
18090
  return parts.join("\n\n");
17678
18091
  }
18092
+ function buildEmbeddingTitle(chunk) {
18093
+ if (!chunk.sectionTitle || chunk.headingLevel === void 0) return void 0;
18094
+ if (chunk.headingPath.length > 1) {
18095
+ const path14 = chunk.headingPath.join(" > ");
18096
+ const lastInPath = chunk.headingPath[chunk.headingPath.length - 1];
18097
+ if (lastInPath !== chunk.sectionTitle) {
18098
+ return `${chunk.title} \u2014 ${path14} > ${chunk.sectionTitle}`;
18099
+ }
18100
+ return `${chunk.title} \u2014 ${path14}`;
18101
+ }
18102
+ return `${chunk.title} \u2014 ${chunk.sectionTitle}`;
18103
+ }
17679
18104
  function buildEmbeddingText(chunk, prependTitle) {
17680
18105
  if (!prependTitle) return chunk.chunkText;
17681
18106
  const prefix = chunk.sectionTitle ? `${chunk.title} \u2014 ${chunk.sectionTitle}` : chunk.title;
@@ -17706,10 +18131,14 @@ function chunkPage(page, config, scope) {
17706
18131
  tags: page.tags,
17707
18132
  contentHash: "",
17708
18133
  description: page.description,
17709
- keywords: page.keywords
18134
+ keywords: page.keywords,
18135
+ publishedAt: page.publishedAt,
18136
+ incomingAnchorText: page.incomingAnchorText,
18137
+ meta: page.meta
17710
18138
  };
17711
18139
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
17712
- summaryChunk.contentHash = sha256(normalizeText(embeddingText));
18140
+ const metaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
18141
+ summaryChunk.contentHash = sha256(normalizeText(embeddingText) + metaSuffix);
17713
18142
  chunks.push(summaryChunk);
17714
18143
  }
17715
18144
  const ordinalOffset = config.chunking.pageSummaryChunk ? 1 : 0;
@@ -17726,6 +18155,7 @@ function chunkPage(page, config, scope) {
17726
18155
  path: page.url,
17727
18156
  title: page.title,
17728
18157
  sectionTitle: entry.sectionTitle,
18158
+ headingLevel: entry.headingLevel,
17729
18159
  headingPath: entry.headingPath,
17730
18160
  chunkText: entry.chunkText,
17731
18161
  snippet: toSnippet(entry.chunkText),
@@ -17735,10 +18165,16 @@ function chunkPage(page, config, scope) {
17735
18165
  tags: page.tags,
17736
18166
  contentHash: "",
17737
18167
  description: page.description,
17738
- keywords: page.keywords
18168
+ keywords: page.keywords,
18169
+ publishedAt: page.publishedAt,
18170
+ incomingAnchorText: page.incomingAnchorText,
18171
+ meta: page.meta
17739
18172
  };
17740
18173
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
17741
- chunk.contentHash = sha256(normalizeText(embeddingText));
18174
+ const embeddingTitle = config.chunking.weightHeadings ? buildEmbeddingTitle(chunk) : void 0;
18175
+ const chunkMetaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
18176
+ const hashInput = embeddingTitle ? `${normalizeText(embeddingText)}|title:${embeddingTitle}` : normalizeText(embeddingText);
18177
+ chunk.contentHash = sha256(hashInput + chunkMetaSuffix);
17742
18178
  chunks.push(chunk);
17743
18179
  }
17744
18180
  return chunks;
@@ -18570,7 +19006,112 @@ function gfm(turndownService) {
18570
19006
  ]);
18571
19007
  }
18572
19008
 
19009
+ // src/utils/structured-meta.ts
19010
+ var VALID_KEY_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
19011
+ function validateMetaKey(key) {
19012
+ return VALID_KEY_RE.test(key);
19013
+ }
19014
+ function parseMetaValue(content, dataType) {
19015
+ switch (dataType) {
19016
+ case "number": {
19017
+ const n = Number(content);
19018
+ return Number.isFinite(n) ? n : content;
19019
+ }
19020
+ case "boolean":
19021
+ return content === "true";
19022
+ case "string[]":
19023
+ return content ? content.split(",").map((s) => s.trim()) : [];
19024
+ case "date": {
19025
+ const ms = Number(content);
19026
+ return Number.isFinite(ms) ? ms : content;
19027
+ }
19028
+ default:
19029
+ return content;
19030
+ }
19031
+ }
19032
+ function escapeFilterValue(s) {
19033
+ return s.replace(/'/g, "''");
19034
+ }
19035
+ function buildMetaFilterString(filters) {
19036
+ const clauses = [];
19037
+ for (const [key, value] of Object.entries(filters)) {
19038
+ if (!validateMetaKey(key)) continue;
19039
+ const field = `meta.${key}`;
19040
+ if (typeof value === "string") {
19041
+ clauses.push(`${field} CONTAINS '${escapeFilterValue(value)}'`);
19042
+ } else if (typeof value === "boolean") {
19043
+ clauses.push(`${field} = ${value}`);
19044
+ } else {
19045
+ clauses.push(`${field} = ${value}`);
19046
+ }
19047
+ }
19048
+ return clauses.join(" AND ");
19049
+ }
19050
+
18573
19051
  // src/indexing/extractor.ts
19052
+ function normalizeDateToMs(value) {
19053
+ if (value == null) return void 0;
19054
+ if (value instanceof Date) {
19055
+ const ts = value.getTime();
19056
+ return Number.isFinite(ts) ? ts : void 0;
19057
+ }
19058
+ if (typeof value === "string") {
19059
+ const ts = new Date(value).getTime();
19060
+ return Number.isFinite(ts) ? ts : void 0;
19061
+ }
19062
+ if (typeof value === "number") {
19063
+ return Number.isFinite(value) ? value : void 0;
19064
+ }
19065
+ return void 0;
19066
+ }
19067
+ var FRONTMATTER_DATE_FIELDS = ["date", "publishedAt", "updatedAt", "published_at", "updated_at"];
19068
+ function extractPublishedAtFromFrontmatter(data) {
19069
+ for (const field of FRONTMATTER_DATE_FIELDS) {
19070
+ const val = normalizeDateToMs(data[field]);
19071
+ if (val !== void 0) return val;
19072
+ }
19073
+ return void 0;
19074
+ }
19075
+ function extractPublishedAtFromHtml($) {
19076
+ const jsonLdScripts = $('script[type="application/ld+json"]');
19077
+ for (let i = 0; i < jsonLdScripts.length; i++) {
19078
+ try {
19079
+ const raw = $(jsonLdScripts[i]).html();
19080
+ if (!raw) continue;
19081
+ const parsed = JSON.parse(raw);
19082
+ const candidates = [];
19083
+ if (Array.isArray(parsed)) {
19084
+ candidates.push(...parsed);
19085
+ } else if (parsed && typeof parsed === "object") {
19086
+ candidates.push(parsed);
19087
+ if (Array.isArray(parsed["@graph"])) {
19088
+ candidates.push(...parsed["@graph"]);
19089
+ }
19090
+ }
19091
+ for (const candidate of candidates) {
19092
+ const val = normalizeDateToMs(candidate.datePublished);
19093
+ if (val !== void 0) return val;
19094
+ }
19095
+ } catch {
19096
+ }
19097
+ }
19098
+ const ogTime = $('meta[property="article:published_time"]').attr("content")?.trim();
19099
+ if (ogTime) {
19100
+ const val = normalizeDateToMs(ogTime);
19101
+ if (val !== void 0) return val;
19102
+ }
19103
+ const itempropDate = $('meta[itemprop="datePublished"]').attr("content")?.trim() || $('time[itemprop="datePublished"]').attr("datetime")?.trim();
19104
+ if (itempropDate) {
19105
+ const val = normalizeDateToMs(itempropDate);
19106
+ if (val !== void 0) return val;
19107
+ }
19108
+ const timeEl = $("time[datetime]").first().attr("datetime")?.trim();
19109
+ if (timeEl) {
19110
+ const val = normalizeDateToMs(timeEl);
19111
+ if (val !== void 0) return val;
19112
+ }
19113
+ return void 0;
19114
+ }
18574
19115
  function hasTopLevelNoindexComment(markdown) {
18575
19116
  const lines = markdown.split(/\r?\n/);
18576
19117
  let inFence = false;
@@ -18586,6 +19127,97 @@ function hasTopLevelNoindexComment(markdown) {
18586
19127
  }
18587
19128
  return false;
18588
19129
  }
19130
+ var GARBAGE_ALT_WORDS = /* @__PURE__ */ new Set([
19131
+ "image",
19132
+ "photo",
19133
+ "picture",
19134
+ "icon",
19135
+ "logo",
19136
+ "banner",
19137
+ "screenshot",
19138
+ "thumbnail",
19139
+ "img",
19140
+ "graphic",
19141
+ "illustration",
19142
+ "spacer",
19143
+ "pixel",
19144
+ "placeholder",
19145
+ "avatar",
19146
+ "background"
19147
+ ]);
19148
+ var IMAGE_EXT_RE = /\.(jpg|jpeg|png|gif|svg|webp|avif|bmp|ico)(\?.*)?$/i;
19149
+ function isMeaningfulAlt(alt) {
19150
+ const trimmed = alt.trim();
19151
+ if (!trimmed || trimmed.length < 5) return false;
19152
+ if (IMAGE_EXT_RE.test(trimmed)) return false;
19153
+ if (GARBAGE_ALT_WORDS.has(trimmed.toLowerCase())) return false;
19154
+ return true;
19155
+ }
19156
+ function resolveImageText(img, $, imageDescAttr) {
19157
+ const imgDesc = img.attr(imageDescAttr)?.trim();
19158
+ if (imgDesc) return imgDesc;
19159
+ const figure = img.closest("figure");
19160
+ if (figure.length) {
19161
+ const figDesc = figure.attr(imageDescAttr)?.trim();
19162
+ if (figDesc) return figDesc;
19163
+ }
19164
+ const alt = img.attr("alt")?.trim() ?? "";
19165
+ const caption = figure.length ? figure.find("figcaption").first().text().trim() : "";
19166
+ if (isMeaningfulAlt(alt) && caption) {
19167
+ return `${alt} \u2014 ${caption}`;
19168
+ }
19169
+ if (isMeaningfulAlt(alt)) {
19170
+ return alt;
19171
+ }
19172
+ if (caption) {
19173
+ return caption;
19174
+ }
19175
+ return null;
19176
+ }
19177
+ var STOP_ANCHORS = /* @__PURE__ */ new Set([
19178
+ "here",
19179
+ "click",
19180
+ "click here",
19181
+ "read more",
19182
+ "link",
19183
+ "this",
19184
+ "more"
19185
+ ]);
19186
+ function normalizeAnchorText(raw) {
19187
+ const normalized = raw.replace(/\s+/g, " ").trim().toLowerCase();
19188
+ if (normalized.length < 3) return "";
19189
+ if (STOP_ANCHORS.has(normalized)) return "";
19190
+ if (normalized.length > 100) return normalized.slice(0, 100);
19191
+ return normalized;
19192
+ }
19193
+ function escapeHtml(text) {
19194
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
19195
+ }
19196
+ function preprocessImages(root2, $, imageDescAttr) {
19197
+ root2.find("picture").each((_i, el) => {
19198
+ const picture = $(el);
19199
+ const img = picture.find("img").first();
19200
+ const parentFigure = picture.closest("figure");
19201
+ const text = img.length ? resolveImageText(img, $, imageDescAttr) : null;
19202
+ if (text) {
19203
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
19204
+ picture.replaceWith(`<span>${escapeHtml(text)}</span>`);
19205
+ } else {
19206
+ picture.remove();
19207
+ }
19208
+ });
19209
+ root2.find("img").each((_i, el) => {
19210
+ const img = $(el);
19211
+ const parentFigure = img.closest("figure");
19212
+ const text = resolveImageText(img, $, imageDescAttr);
19213
+ if (text) {
19214
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
19215
+ img.replaceWith(`<span>${escapeHtml(text)}</span>`);
19216
+ } else {
19217
+ img.remove();
19218
+ }
19219
+ });
19220
+ }
18589
19221
  function extractFromHtml(url, html, config) {
18590
19222
  const $ = cheerio.load(html);
18591
19223
  const normalizedUrl = normalizeUrlPath(url);
@@ -18611,6 +19243,20 @@ function extractFromHtml(url, html, config) {
18611
19243
  if (weight === 0) {
18612
19244
  return null;
18613
19245
  }
19246
+ if ($('meta[name="searchsocket:noindex"]').attr("content") === "true") {
19247
+ return null;
19248
+ }
19249
+ const RESERVED_META_KEYS = /* @__PURE__ */ new Set(["noindex", "tags"]);
19250
+ const meta = {};
19251
+ $('meta[name^="searchsocket:"]').each((_i, el) => {
19252
+ const name = $(el).attr("name") ?? "";
19253
+ const key = name.slice("searchsocket:".length);
19254
+ if (!key || RESERVED_META_KEYS.has(key) || !validateMetaKey(key)) return;
19255
+ const content = $(el).attr("content") ?? "";
19256
+ const dataType = $(el).attr("data-type") ?? "string";
19257
+ meta[key] = parseMetaValue(content, dataType);
19258
+ });
19259
+ const componentTags = $('meta[name="searchsocket:tags"]').attr("content")?.trim();
18614
19260
  const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
18615
19261
  const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
18616
19262
  const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
@@ -18622,7 +19268,9 @@ function extractFromHtml(url, html, config) {
18622
19268
  root2.find(selector).remove();
18623
19269
  }
18624
19270
  root2.find(`[${config.extract.ignoreAttr}]`).remove();
19271
+ preprocessImages(root2, $, config.extract.imageDescAttr);
18625
19272
  const outgoingLinks = [];
19273
+ const seenLinkKeys = /* @__PURE__ */ new Set();
18626
19274
  root2.find("a[href]").each((_index, node) => {
18627
19275
  const href = $(node).attr("href");
18628
19276
  if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:")) {
@@ -18633,7 +19281,19 @@ function extractFromHtml(url, html, config) {
18633
19281
  if (!["http:", "https:"].includes(parsed.protocol)) {
18634
19282
  return;
18635
19283
  }
18636
- outgoingLinks.push(normalizeUrlPath(parsed.pathname));
19284
+ const url2 = normalizeUrlPath(parsed.pathname);
19285
+ let anchorText = normalizeAnchorText($(node).text());
19286
+ if (!anchorText) {
19287
+ const imgAlt = $(node).find("img").first().attr("alt") ?? "";
19288
+ if (isMeaningfulAlt(imgAlt)) {
19289
+ anchorText = normalizeAnchorText(imgAlt);
19290
+ }
19291
+ }
19292
+ const key = `${url2}|${anchorText}`;
19293
+ if (!seenLinkKeys.has(key)) {
19294
+ seenLinkKeys.add(key);
19295
+ outgoingLinks.push({ url: url2, anchorText });
19296
+ }
18637
19297
  } catch {
18638
19298
  }
18639
19299
  });
@@ -18658,16 +19318,25 @@ function extractFromHtml(url, html, config) {
18658
19318
  return null;
18659
19319
  }
18660
19320
  const tags = normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1);
19321
+ const publishedAt = extractPublishedAtFromHtml($);
19322
+ if (componentTags) {
19323
+ const extraTags = componentTags.split(",").map((t) => t.trim()).filter(Boolean);
19324
+ for (const t of extraTags) {
19325
+ if (!tags.includes(t)) tags.push(t);
19326
+ }
19327
+ }
18661
19328
  return {
18662
19329
  url: normalizeUrlPath(url),
18663
19330
  title,
18664
19331
  markdown,
18665
- outgoingLinks: [...new Set(outgoingLinks)],
19332
+ outgoingLinks,
18666
19333
  noindex: false,
18667
19334
  tags,
18668
19335
  description,
18669
19336
  keywords,
18670
- weight
19337
+ weight,
19338
+ publishedAt,
19339
+ meta: Object.keys(meta).length > 0 ? meta : void 0
18671
19340
  };
18672
19341
  }
18673
19342
  function extractFromMarkdown(url, markdown, title) {
@@ -18688,6 +19357,24 @@ function extractFromMarkdown(url, markdown, title) {
18688
19357
  if (mdWeight === 0) {
18689
19358
  return null;
18690
19359
  }
19360
+ let mdMeta;
19361
+ const rawMeta = searchsocketMeta?.meta;
19362
+ if (rawMeta && typeof rawMeta === "object" && !Array.isArray(rawMeta)) {
19363
+ const metaObj = {};
19364
+ for (const [key, val] of Object.entries(rawMeta)) {
19365
+ if (!validateMetaKey(key)) continue;
19366
+ if (typeof val === "string" || typeof val === "number" || typeof val === "boolean") {
19367
+ metaObj[key] = val;
19368
+ } else if (Array.isArray(val) && val.every((v) => typeof v === "string")) {
19369
+ metaObj[key] = val;
19370
+ } else if (val instanceof Date) {
19371
+ metaObj[key] = val.getTime();
19372
+ }
19373
+ }
19374
+ if (Object.keys(metaObj).length > 0) {
19375
+ mdMeta = metaObj;
19376
+ }
19377
+ }
18691
19378
  const content = parsed.content;
18692
19379
  const normalized = normalizeMarkdown(content);
18693
19380
  if (!normalizeText(normalized)) {
@@ -18702,6 +19389,7 @@ function extractFromMarkdown(url, markdown, title) {
18702
19389
  fmKeywords = frontmatter.keywords.split(",").map((k) => k.trim()).filter(Boolean);
18703
19390
  }
18704
19391
  if (fmKeywords && fmKeywords.length === 0) fmKeywords = void 0;
19392
+ const publishedAt = extractPublishedAtFromFrontmatter(frontmatter);
18705
19393
  return {
18706
19394
  url: normalizeUrlPath(url),
18707
19395
  title: resolvedTitle,
@@ -18711,7 +19399,9 @@ function extractFromMarkdown(url, markdown, title) {
18711
19399
  tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
18712
19400
  description: fmDescription,
18713
19401
  keywords: fmKeywords,
18714
- weight: mdWeight
19402
+ weight: mdWeight,
19403
+ publishedAt,
19404
+ meta: mdMeta
18715
19405
  };
18716
19406
  }
18717
19407
  function segmentToRegex(segment) {
@@ -18906,7 +19596,7 @@ async function parseManifest(cwd, outputDir) {
18906
19596
  const manifestPath = path__default.default.resolve(cwd, outputDir, "server", "manifest-full.js");
18907
19597
  let content;
18908
19598
  try {
18909
- content = await fs3__default.default.readFile(manifestPath, "utf8");
19599
+ content = await fs8__default.default.readFile(manifestPath, "utf8");
18910
19600
  } catch {
18911
19601
  throw new SearchSocketError(
18912
19602
  "BUILD_MANIFEST_NOT_FOUND",
@@ -19217,13 +19907,132 @@ function filePathToUrl(filePath, baseDir) {
19217
19907
  const noExt = relative.replace(/\.md$/i, "").replace(/\/index$/i, "");
19218
19908
  return normalizeUrlPath(noExt || "/");
19219
19909
  }
19220
- function normalizeSvelteToMarkdown(source) {
19221
- return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
19910
+ var ROUTE_FILE_RE = /\+(page|layout|error)(@[^.]+)?\.svelte$/;
19911
+ function isSvelteComponentFile(filePath) {
19912
+ if (!filePath.endsWith(".svelte")) return false;
19913
+ return !ROUTE_FILE_RE.test(filePath);
19222
19914
  }
19223
- async function loadContentFilesPages(cwd, config, maxPages) {
19224
- const contentConfig = config.source.contentFiles;
19225
- if (!contentConfig) {
19226
- throw new Error("content-files config is missing");
19915
+ function extractSvelteComponentMeta(source) {
19916
+ const componentMatch = source.match(/<!--\s*@component\s*([\s\S]*?)\s*-->/);
19917
+ const description = componentMatch?.[1]?.trim() || void 0;
19918
+ const propsMatch = source.match(
19919
+ /let\s+\{([\s\S]*?)\}\s*(?::\s*([^=;{][\s\S]*?))?\s*=\s*\$props\(\)/
19920
+ );
19921
+ const props = [];
19922
+ if (propsMatch) {
19923
+ const destructureBlock = propsMatch[1];
19924
+ const typeAnnotation = propsMatch[2]?.trim();
19925
+ let resolvedTypeMap;
19926
+ if (typeAnnotation && /^[A-Z]\w*$/.test(typeAnnotation)) {
19927
+ resolvedTypeMap = resolveTypeReference(source, typeAnnotation);
19928
+ } else if (typeAnnotation && typeAnnotation.startsWith("{")) {
19929
+ resolvedTypeMap = parseInlineTypeAnnotation(typeAnnotation);
19930
+ }
19931
+ const propEntries = splitDestructureBlock(destructureBlock);
19932
+ for (const entry of propEntries) {
19933
+ const trimmed = entry.trim();
19934
+ if (!trimmed || trimmed.startsWith("...")) continue;
19935
+ let propName;
19936
+ let defaultValue;
19937
+ const renameMatch = trimmed.match(/^(\w+)\s*:\s*\w+\s*(?:=\s*([\s\S]+))?$/);
19938
+ if (renameMatch) {
19939
+ propName = renameMatch[1];
19940
+ defaultValue = renameMatch[2]?.trim();
19941
+ } else {
19942
+ const defaultMatch = trimmed.match(/^(\w+)\s*=\s*([\s\S]+)$/);
19943
+ if (defaultMatch) {
19944
+ propName = defaultMatch[1];
19945
+ defaultValue = defaultMatch[2]?.trim();
19946
+ } else {
19947
+ propName = trimmed.match(/^(\w+)/)?.[1] ?? trimmed;
19948
+ }
19949
+ }
19950
+ const propType = resolvedTypeMap?.get(propName);
19951
+ props.push({
19952
+ name: propName,
19953
+ ...propType ? { type: propType } : {},
19954
+ ...defaultValue ? { default: defaultValue } : {}
19955
+ });
19956
+ }
19957
+ }
19958
+ return { description, props };
19959
+ }
19960
+ function splitDestructureBlock(block) {
19961
+ const entries = [];
19962
+ let depth = 0;
19963
+ let current = "";
19964
+ for (const ch of block) {
19965
+ if (ch === "{" || ch === "[" || ch === "(") {
19966
+ depth++;
19967
+ current += ch;
19968
+ } else if (ch === "}" || ch === "]" || ch === ")") {
19969
+ depth--;
19970
+ current += ch;
19971
+ } else if (ch === "," && depth === 0) {
19972
+ entries.push(current);
19973
+ current = "";
19974
+ } else {
19975
+ current += ch;
19976
+ }
19977
+ }
19978
+ if (current.trim()) entries.push(current);
19979
+ return entries;
19980
+ }
19981
+ function resolveTypeReference(source, typeName) {
19982
+ const startRe = new RegExp(`(?:interface\\s+${typeName}\\s*|type\\s+${typeName}\\s*=\\s*)\\{`);
19983
+ const startMatch = source.match(startRe);
19984
+ if (!startMatch || startMatch.index === void 0) return void 0;
19985
+ const bodyStart = startMatch.index + startMatch[0].length;
19986
+ let depth = 1;
19987
+ let i = bodyStart;
19988
+ while (i < source.length && depth > 0) {
19989
+ if (source[i] === "{") depth++;
19990
+ else if (source[i] === "}") depth--;
19991
+ i++;
19992
+ }
19993
+ if (depth !== 0) return void 0;
19994
+ const body = source.slice(bodyStart, i - 1);
19995
+ return parseTypeMembers(body);
19996
+ }
19997
+ function parseInlineTypeAnnotation(annotation) {
19998
+ const inner = annotation.replace(/^\{/, "").replace(/\}$/, "");
19999
+ return parseTypeMembers(inner);
20000
+ }
20001
+ function parseTypeMembers(body) {
20002
+ const map = /* @__PURE__ */ new Map();
20003
+ const members = body.split(/[;\n]/).map((m) => m.trim()).filter(Boolean);
20004
+ for (const member of members) {
20005
+ const memberMatch = member.match(/^(\w+)\??\s*:\s*(.+)$/);
20006
+ if (memberMatch) {
20007
+ map.set(memberMatch[1], memberMatch[2].replace(/,\s*$/, "").trim());
20008
+ }
20009
+ }
20010
+ return map;
20011
+ }
20012
+ function buildComponentMarkdown(componentName, meta) {
20013
+ if (!meta.description && meta.props.length === 0) return "";
20014
+ const parts = [`${componentName} component.`];
20015
+ if (meta.description) {
20016
+ parts.push(meta.description);
20017
+ }
20018
+ if (meta.props.length > 0) {
20019
+ const propEntries = meta.props.map((p) => {
20020
+ let entry = p.name;
20021
+ if (p.type) entry += ` (${p.type})`;
20022
+ if (p.default) entry += ` default: ${p.default}`;
20023
+ return entry;
20024
+ });
20025
+ parts.push(`Props: ${propEntries.join(", ")}.`);
20026
+ }
20027
+ return parts.join(" ");
20028
+ }
20029
+ function normalizeSvelteToMarkdown(source) {
20030
+ return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
20031
+ }
20032
+ async function loadContentFilesPages(cwd, config, maxPages) {
20033
+ const contentConfig = config.source.contentFiles;
20034
+ if (!contentConfig) {
20035
+ throw new Error("content-files config is missing");
19227
20036
  }
19228
20037
  const baseDir = path__default.default.resolve(cwd, contentConfig.baseDir);
19229
20038
  const files = await fg__default.default(contentConfig.globs, {
@@ -19235,13 +20044,27 @@ async function loadContentFilesPages(cwd, config, maxPages) {
19235
20044
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
19236
20045
  const pages = [];
19237
20046
  for (const filePath of selected) {
19238
- const raw = await fs3__default.default.readFile(filePath, "utf8");
19239
- const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
20047
+ const raw = await fs8__default.default.readFile(filePath, "utf8");
20048
+ let markdown;
20049
+ let tags;
20050
+ if (filePath.endsWith(".md")) {
20051
+ markdown = raw;
20052
+ } else if (isSvelteComponentFile(filePath)) {
20053
+ const componentName = path__default.default.basename(filePath, ".svelte");
20054
+ const meta = extractSvelteComponentMeta(raw);
20055
+ const componentMarkdown = buildComponentMarkdown(componentName, meta);
20056
+ const templateContent = normalizeSvelteToMarkdown(raw);
20057
+ markdown = componentMarkdown ? [componentMarkdown, templateContent].filter(Boolean).join("\n\n") : templateContent;
20058
+ tags = ["component"];
20059
+ } else {
20060
+ markdown = normalizeSvelteToMarkdown(raw);
20061
+ }
19240
20062
  pages.push({
19241
20063
  url: filePathToUrl(filePath, baseDir),
19242
20064
  markdown,
19243
20065
  sourcePath: path__default.default.relative(cwd, filePath).replace(/\\/g, "/"),
19244
- outgoingLinks: []
20066
+ outgoingLinks: [],
20067
+ ...tags ? { tags } : {}
19245
20068
  });
19246
20069
  }
19247
20070
  return pages;
@@ -19371,7 +20194,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
19371
20194
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
19372
20195
  const pages = [];
19373
20196
  for (const filePath of selected) {
19374
- const html = await fs3__default.default.readFile(filePath, "utf8");
20197
+ const html = await fs8__default.default.readFile(filePath, "utf8");
19375
20198
  pages.push({
19376
20199
  url: staticHtmlFileToUrl(filePath, outputDir),
19377
20200
  html,
@@ -19434,7 +20257,7 @@ function isBlockedByRobots(urlPath, rules3) {
19434
20257
  }
19435
20258
  async function loadRobotsTxtFromDir(dir) {
19436
20259
  try {
19437
- const content = await fs3__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
20260
+ const content = await fs8__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
19438
20261
  return parseRobotsTxt(content);
19439
20262
  } catch {
19440
20263
  return null;
@@ -19462,29 +20285,65 @@ function nonNegativeOrZero(value) {
19462
20285
  function normalizeForTitleMatch(text) {
19463
20286
  return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
19464
20287
  }
19465
- function rankHits(hits, config, query) {
20288
+ function rankHits(hits, config, query, debug) {
19466
20289
  const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
19467
20290
  const titleMatchWeight = config.ranking.weights.titleMatch;
19468
20291
  return hits.map((hit) => {
19469
- let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20292
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20293
+ let score = baseScore;
20294
+ let incomingLinkBoostValue = 0;
19470
20295
  if (config.ranking.enableIncomingLinkBoost) {
19471
20296
  const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
19472
- score += incomingBoost * config.ranking.weights.incomingLinks;
20297
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
20298
+ score += incomingLinkBoostValue;
19473
20299
  }
20300
+ let depthBoostValue = 0;
19474
20301
  if (config.ranking.enableDepthBoost) {
19475
20302
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
19476
- score += depthBoost * config.ranking.weights.depth;
20303
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
20304
+ score += depthBoostValue;
19477
20305
  }
20306
+ let titleMatchBoostValue = 0;
19478
20307
  if (normalizedQuery && titleMatchWeight > 0) {
19479
20308
  const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
19480
20309
  if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
19481
- score += titleMatchWeight;
20310
+ titleMatchBoostValue = titleMatchWeight;
20311
+ score += titleMatchBoostValue;
19482
20312
  }
19483
20313
  }
19484
- return {
20314
+ let freshnessBoostValue = 0;
20315
+ if (config.ranking.enableFreshnessBoost) {
20316
+ const publishedAt = hit.metadata.publishedAt;
20317
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
20318
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
20319
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
20320
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
20321
+ score += freshnessBoostValue;
20322
+ }
20323
+ }
20324
+ let anchorTextMatchBoostValue = 0;
20325
+ if (config.ranking.enableAnchorTextBoost && normalizedQuery && config.ranking.weights.anchorText > 0) {
20326
+ const normalizedAnchorText = normalizeForTitleMatch(hit.metadata.incomingAnchorText ?? "");
20327
+ if (normalizedAnchorText.length > 0 && normalizedQuery.length > 0 && (normalizedAnchorText.includes(normalizedQuery) || normalizedQuery.includes(normalizedAnchorText))) {
20328
+ anchorTextMatchBoostValue = config.ranking.weights.anchorText;
20329
+ score += anchorTextMatchBoostValue;
20330
+ }
20331
+ }
20332
+ const result = {
19485
20333
  hit,
19486
20334
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
19487
20335
  };
20336
+ if (debug) {
20337
+ result.breakdown = {
20338
+ baseScore,
20339
+ incomingLinkBoost: incomingLinkBoostValue,
20340
+ depthBoost: depthBoostValue,
20341
+ titleMatchBoost: titleMatchBoostValue,
20342
+ freshnessBoost: freshnessBoostValue,
20343
+ anchorTextMatchBoost: anchorTextMatchBoostValue
20344
+ };
20345
+ }
20346
+ return result;
19488
20347
  }).sort((a, b) => {
19489
20348
  const delta = b.finalScore - a.finalScore;
19490
20349
  return Number.isNaN(delta) ? 0 : delta;
@@ -19493,12 +20352,13 @@ function rankHits(hits, config, query) {
19493
20352
  function trimByScoreGap(results, config) {
19494
20353
  if (results.length === 0) return results;
19495
20354
  const threshold = config.ranking.scoreGapThreshold;
19496
- const minScore = config.ranking.minScore;
19497
- if (minScore > 0 && results.length > 0) {
19498
- const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
19499
- const mid = Math.floor(sortedScores.length / 2);
19500
- const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
19501
- if (median < minScore) return [];
20355
+ const minScoreRatio = config.ranking.minScoreRatio;
20356
+ if (minScoreRatio > 0 && results.length > 0) {
20357
+ const topScore = results[0].pageScore;
20358
+ if (Number.isFinite(topScore) && topScore > 0) {
20359
+ const minThreshold = topScore * minScoreRatio;
20360
+ results = results.filter((r) => r.pageScore >= minThreshold);
20361
+ }
19502
20362
  }
19503
20363
  if (threshold > 0 && results.length > 1) {
19504
20364
  for (let i = 1; i < results.length; i++) {
@@ -19568,61 +20428,99 @@ function aggregateByPage(ranked, config) {
19568
20428
  return Number.isNaN(delta) ? 0 : delta;
19569
20429
  });
19570
20430
  }
19571
- function mergePageAndChunkResults(pageHits, rankedChunks, config) {
19572
- if (pageHits.length === 0) return rankedChunks;
19573
- const w = config.search.pageSearchWeight;
19574
- const pageScoreMap = /* @__PURE__ */ new Map();
19575
- for (const ph of pageHits) {
19576
- pageScoreMap.set(ph.url, ph);
19577
- }
19578
- const pagesWithChunks = /* @__PURE__ */ new Set();
19579
- const merged = rankedChunks.map((ranked) => {
19580
- const url = ranked.hit.metadata.url;
19581
- const pageHit = pageScoreMap.get(url);
19582
- if (pageHit) {
19583
- pagesWithChunks.add(url);
19584
- const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
19585
- return {
19586
- hit: ranked.hit,
19587
- finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
19588
- };
20431
+ function rankPageHits(pageHits, config, query, debug) {
20432
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
20433
+ const titleMatchWeight = config.ranking.weights.titleMatch;
20434
+ return pageHits.map((hit) => {
20435
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20436
+ let score = baseScore;
20437
+ let incomingLinkBoostValue = 0;
20438
+ if (config.ranking.enableIncomingLinkBoost) {
20439
+ const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.incomingLinks));
20440
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
20441
+ score += incomingLinkBoostValue;
19589
20442
  }
19590
- return ranked;
19591
- });
19592
- for (const [url, pageHit] of pageScoreMap) {
19593
- if (pagesWithChunks.has(url)) continue;
19594
- const syntheticScore = pageHit.score * w;
19595
- const syntheticHit = {
19596
- id: `page:${url}`,
19597
- score: pageHit.score,
19598
- metadata: {
19599
- projectId: "",
19600
- scopeName: "",
19601
- url: pageHit.url,
19602
- path: pageHit.url,
19603
- title: pageHit.title,
19604
- sectionTitle: "",
19605
- headingPath: [],
19606
- snippet: pageHit.description || pageHit.title,
19607
- chunkText: pageHit.description || pageHit.title,
19608
- ordinal: 0,
19609
- contentHash: "",
19610
- depth: pageHit.depth,
19611
- incomingLinks: pageHit.incomingLinks,
19612
- routeFile: pageHit.routeFile,
19613
- tags: pageHit.tags
20443
+ let depthBoostValue = 0;
20444
+ if (config.ranking.enableDepthBoost) {
20445
+ const depthBoost = 1 / (1 + nonNegativeOrZero(hit.depth));
20446
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
20447
+ score += depthBoostValue;
20448
+ }
20449
+ let titleMatchBoostValue = 0;
20450
+ if (normalizedQuery && titleMatchWeight > 0) {
20451
+ const normalizedTitle = normalizeForTitleMatch(hit.title);
20452
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
20453
+ titleMatchBoostValue = titleMatchWeight;
20454
+ score += titleMatchBoostValue;
20455
+ }
20456
+ }
20457
+ let freshnessBoostValue = 0;
20458
+ if (config.ranking.enableFreshnessBoost) {
20459
+ const publishedAt = hit.publishedAt;
20460
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
20461
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
20462
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
20463
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
20464
+ score += freshnessBoostValue;
19614
20465
  }
20466
+ }
20467
+ const pageWeight = findPageWeight(hit.url, config.ranking.pageWeights);
20468
+ if (pageWeight !== 1) {
20469
+ score *= pageWeight;
20470
+ }
20471
+ const result = {
20472
+ url: hit.url,
20473
+ title: hit.title,
20474
+ description: hit.description,
20475
+ routeFile: hit.routeFile,
20476
+ depth: hit.depth,
20477
+ incomingLinks: hit.incomingLinks,
20478
+ tags: hit.tags,
20479
+ baseScore,
20480
+ finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY,
20481
+ publishedAt: hit.publishedAt
19615
20482
  };
19616
- merged.push({
19617
- hit: syntheticHit,
19618
- finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
19619
- });
19620
- }
19621
- return merged.sort((a, b) => {
20483
+ if (debug) {
20484
+ result.breakdown = {
20485
+ baseScore,
20486
+ pageWeight,
20487
+ incomingLinkBoost: incomingLinkBoostValue,
20488
+ depthBoost: depthBoostValue,
20489
+ titleMatchBoost: titleMatchBoostValue,
20490
+ freshnessBoost: freshnessBoostValue
20491
+ };
20492
+ }
20493
+ return result;
20494
+ }).filter((p) => findPageWeight(p.url, config.ranking.pageWeights) !== 0).sort((a, b) => {
19622
20495
  const delta = b.finalScore - a.finalScore;
19623
20496
  return Number.isNaN(delta) ? 0 : delta;
19624
20497
  });
19625
20498
  }
20499
+ function trimPagesByScoreGap(results, config) {
20500
+ if (results.length === 0) return results;
20501
+ const threshold = config.ranking.scoreGapThreshold;
20502
+ const minScoreRatio = config.ranking.minScoreRatio;
20503
+ if (minScoreRatio > 0 && results.length > 0) {
20504
+ const topScore = results[0].finalScore;
20505
+ if (Number.isFinite(topScore) && topScore > 0) {
20506
+ const minThreshold = topScore * minScoreRatio;
20507
+ results = results.filter((r) => r.finalScore >= minThreshold);
20508
+ }
20509
+ }
20510
+ if (threshold > 0 && results.length > 1) {
20511
+ for (let i = 1; i < results.length; i++) {
20512
+ const prev = results[i - 1].finalScore;
20513
+ const current = results[i].finalScore;
20514
+ if (prev > 0) {
20515
+ const gap = (prev - current) / prev;
20516
+ if (gap >= threshold) {
20517
+ return results.slice(0, i);
20518
+ }
20519
+ }
20520
+ }
20521
+ }
20522
+ return results;
20523
+ }
19626
20524
 
19627
20525
  // src/utils/time.ts
19628
20526
  function nowIso() {
@@ -19631,6 +20529,81 @@ function nowIso() {
19631
20529
  function hrTimeMs(start) {
19632
20530
  return Number(process.hrtime.bigint() - start) / 1e6;
19633
20531
  }
20532
+ function resolvePageUrl(pageUrl, baseUrl) {
20533
+ if (!baseUrl) return pageUrl;
20534
+ try {
20535
+ return new URL(pageUrl, baseUrl).href;
20536
+ } catch {
20537
+ return pageUrl;
20538
+ }
20539
+ }
20540
+ function generateLlmsTxt(pages, config) {
20541
+ const title = config.llmsTxt.title ?? config.project.id;
20542
+ const description = config.llmsTxt.description;
20543
+ const baseUrl = config.project.baseUrl;
20544
+ const lines = [`# ${title}`];
20545
+ if (description) {
20546
+ lines.push("", `> ${description}`);
20547
+ }
20548
+ const filtered = pages.filter(
20549
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
20550
+ );
20551
+ const sorted = [...filtered].sort((a, b) => {
20552
+ if (a.depth !== b.depth) return a.depth - b.depth;
20553
+ return b.incomingLinks - a.incomingLinks;
20554
+ });
20555
+ if (sorted.length > 0) {
20556
+ lines.push("", "## Pages", "");
20557
+ for (const page of sorted) {
20558
+ const url = resolvePageUrl(page.url, baseUrl);
20559
+ if (page.description) {
20560
+ lines.push(`- [${page.title}](${url}): ${page.description}`);
20561
+ } else {
20562
+ lines.push(`- [${page.title}](${url})`);
20563
+ }
20564
+ }
20565
+ }
20566
+ lines.push("");
20567
+ return lines.join("\n");
20568
+ }
20569
+ function generateLlmsFullTxt(pages, config) {
20570
+ const title = config.llmsTxt.title ?? config.project.id;
20571
+ const description = config.llmsTxt.description;
20572
+ const baseUrl = config.project.baseUrl;
20573
+ const lines = [`# ${title}`];
20574
+ if (description) {
20575
+ lines.push("", `> ${description}`);
20576
+ }
20577
+ const filtered = pages.filter(
20578
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
20579
+ );
20580
+ const sorted = [...filtered].sort((a, b) => {
20581
+ if (a.depth !== b.depth) return a.depth - b.depth;
20582
+ return b.incomingLinks - a.incomingLinks;
20583
+ });
20584
+ for (const page of sorted) {
20585
+ const url = resolvePageUrl(page.url, baseUrl);
20586
+ lines.push("", "---", "", `## [${page.title}](${url})`, "");
20587
+ lines.push(page.markdown.trim());
20588
+ }
20589
+ lines.push("");
20590
+ return lines.join("\n");
20591
+ }
20592
+ async function writeLlmsTxt(pages, config, cwd, logger3) {
20593
+ const outputPath = path__default.default.resolve(cwd, config.llmsTxt.outputPath);
20594
+ const outputDir = path__default.default.dirname(outputPath);
20595
+ await fs8__default.default.mkdir(outputDir, { recursive: true });
20596
+ const content = generateLlmsTxt(pages, config);
20597
+ await fs8__default.default.writeFile(outputPath, content, "utf8");
20598
+ logger3.info(`Generated llms.txt at ${config.llmsTxt.outputPath}`);
20599
+ if (config.llmsTxt.generateFull) {
20600
+ const fullPath = outputPath.replace(/\.txt$/, "-full.txt");
20601
+ const fullContent = generateLlmsFullTxt(pages, config);
20602
+ await fs8__default.default.writeFile(fullPath, fullContent, "utf8");
20603
+ const relativeFull = path__default.default.relative(cwd, fullPath);
20604
+ logger3.info(`Generated llms-full.txt at ${relativeFull}`);
20605
+ }
20606
+ }
19634
20607
 
19635
20608
  // src/indexing/pipeline.ts
19636
20609
  function buildPageSummary(page, maxChars = 3500) {
@@ -19649,16 +20622,33 @@ function buildPageSummary(page, maxChars = 3500) {
19649
20622
  if (joined.length <= maxChars) return joined;
19650
20623
  return joined.slice(0, maxChars).trim();
19651
20624
  }
20625
+ function buildPageContentHash(page) {
20626
+ const parts = [
20627
+ page.title,
20628
+ page.description ?? "",
20629
+ (page.keywords ?? []).slice().sort().join(","),
20630
+ page.tags.slice().sort().join(","),
20631
+ page.markdown,
20632
+ String(page.outgoingLinks),
20633
+ String(page.publishedAt ?? ""),
20634
+ page.incomingAnchorText ?? "",
20635
+ (page.outgoingLinkUrls ?? []).slice().sort().join(","),
20636
+ page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : ""
20637
+ ];
20638
+ return sha256(parts.join("|"));
20639
+ }
19652
20640
  var IndexPipeline = class _IndexPipeline {
19653
20641
  cwd;
19654
20642
  config;
19655
20643
  store;
19656
20644
  logger;
20645
+ hooks;
19657
20646
  constructor(options) {
19658
20647
  this.cwd = options.cwd;
19659
20648
  this.config = options.config;
19660
20649
  this.store = options.store;
19661
20650
  this.logger = options.logger;
20651
+ this.hooks = options.hooks;
19662
20652
  }
19663
20653
  static async create(options = {}) {
19664
20654
  const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
@@ -19668,7 +20658,8 @@ var IndexPipeline = class _IndexPipeline {
19668
20658
  cwd,
19669
20659
  config,
19670
20660
  store,
19671
- logger: options.logger ?? new Logger()
20661
+ logger: options.logger ?? new Logger(),
20662
+ hooks: options.hooks ?? {}
19672
20663
  });
19673
20664
  }
19674
20665
  getConfig() {
@@ -19689,7 +20680,7 @@ var IndexPipeline = class _IndexPipeline {
19689
20680
  const scope = resolveScope(this.config, options.scopeOverride);
19690
20681
  ensureStateDirs(this.cwd, this.config.state.dir);
19691
20682
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
19692
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
20683
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-vector)`);
19693
20684
  if (options.force) {
19694
20685
  this.logger.info("Force mode enabled \u2014 full rebuild");
19695
20686
  }
@@ -19697,9 +20688,9 @@ var IndexPipeline = class _IndexPipeline {
19697
20688
  this.logger.info("Dry run \u2014 no writes will be performed");
19698
20689
  }
19699
20690
  const manifestStart = stageStart();
19700
- const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
20691
+ const existingPageHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getPageHashes(scope);
19701
20692
  stageEnd("manifest", manifestStart);
19702
- this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
20693
+ this.logger.debug(`Manifest: ${existingPageHashes.size} existing page hashes loaded`);
19703
20694
  const sourceStart = stageStart();
19704
20695
  this.logger.info(`Loading pages (source: ${sourceMode})...`);
19705
20696
  let sourcePages;
@@ -19776,11 +20767,61 @@ var IndexPipeline = class _IndexPipeline {
19776
20767
  );
19777
20768
  continue;
19778
20769
  }
19779
- extractedPages.push(extracted);
20770
+ if (sourcePage.tags && sourcePage.tags.length > 0) {
20771
+ extracted.tags = [.../* @__PURE__ */ new Set([...extracted.tags, ...sourcePage.tags])];
20772
+ }
20773
+ let accepted;
20774
+ if (this.hooks.transformPage) {
20775
+ const transformed = await this.hooks.transformPage(extracted);
20776
+ if (transformed === null) {
20777
+ this.logger.debug(`Page ${sourcePage.url} skipped by transformPage hook`);
20778
+ continue;
20779
+ }
20780
+ accepted = transformed;
20781
+ } else {
20782
+ accepted = extracted;
20783
+ }
20784
+ extractedPages.push(accepted);
19780
20785
  this.logger.event("page_extracted", {
19781
- url: extracted.url
20786
+ url: accepted.url
19782
20787
  });
19783
20788
  }
20789
+ const customRecords = options.customRecords ?? [];
20790
+ if (customRecords.length > 0) {
20791
+ this.logger.info(`Processing ${customRecords.length} custom record${customRecords.length === 1 ? "" : "s"}...`);
20792
+ for (const record of customRecords) {
20793
+ const normalizedUrl = normalizeUrlPath(record.url);
20794
+ const normalized = normalizeMarkdown(record.content);
20795
+ if (!normalized.trim()) {
20796
+ this.logger.warn(`Custom record ${normalizedUrl} has empty content and was skipped.`);
20797
+ continue;
20798
+ }
20799
+ const urlTags = normalizedUrl.split("/").filter(Boolean).slice(0, 1);
20800
+ const tags = record.tags ? [.../* @__PURE__ */ new Set([...urlTags, ...record.tags])] : urlTags;
20801
+ const extracted = {
20802
+ url: normalizedUrl,
20803
+ title: record.title,
20804
+ markdown: normalized,
20805
+ outgoingLinks: [],
20806
+ noindex: false,
20807
+ tags,
20808
+ weight: record.weight
20809
+ };
20810
+ let accepted;
20811
+ if (this.hooks.transformPage) {
20812
+ const transformed = await this.hooks.transformPage(extracted);
20813
+ if (transformed === null) {
20814
+ this.logger.debug(`Custom record ${normalizedUrl} skipped by transformPage hook`);
20815
+ continue;
20816
+ }
20817
+ accepted = transformed;
20818
+ } else {
20819
+ accepted = extracted;
20820
+ }
20821
+ extractedPages.push(accepted);
20822
+ this.logger.event("page_extracted", { url: accepted.url, custom: true });
20823
+ }
20824
+ }
19784
20825
  extractedPages.sort((a, b) => a.url.localeCompare(b.url));
19785
20826
  const uniquePages = [];
19786
20827
  const seenUrls = /* @__PURE__ */ new Set();
@@ -19813,15 +20854,28 @@ var IndexPipeline = class _IndexPipeline {
19813
20854
  const linkStart = stageStart();
19814
20855
  const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
19815
20856
  const incomingLinkCount = /* @__PURE__ */ new Map();
20857
+ const incomingAnchorTexts = /* @__PURE__ */ new Map();
19816
20858
  for (const page of indexablePages) {
19817
20859
  incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
19818
20860
  }
19819
20861
  for (const page of indexablePages) {
19820
- for (const outgoing of page.outgoingLinks) {
20862
+ const seenForCount = /* @__PURE__ */ new Set();
20863
+ const seenForAnchor = /* @__PURE__ */ new Set();
20864
+ for (const { url: outgoing, anchorText } of page.outgoingLinks) {
19821
20865
  if (!pageSet.has(outgoing)) {
19822
20866
  continue;
19823
20867
  }
19824
- incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
20868
+ if (!seenForCount.has(outgoing)) {
20869
+ seenForCount.add(outgoing);
20870
+ incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
20871
+ }
20872
+ if (anchorText && !seenForAnchor.has(outgoing)) {
20873
+ seenForAnchor.add(outgoing);
20874
+ if (!incomingAnchorTexts.has(outgoing)) {
20875
+ incomingAnchorTexts.set(outgoing, /* @__PURE__ */ new Set());
20876
+ }
20877
+ incomingAnchorTexts.get(outgoing).add(anchorText);
20878
+ }
19825
20879
  }
19826
20880
  }
19827
20881
  stageEnd("links", linkStart);
@@ -19840,6 +20894,15 @@ var IndexPipeline = class _IndexPipeline {
19840
20894
  });
19841
20895
  }
19842
20896
  }
20897
+ for (const record of customRecords) {
20898
+ const normalizedUrl = normalizeUrlPath(record.url);
20899
+ if (!precomputedRoutes.has(normalizedUrl)) {
20900
+ precomputedRoutes.set(normalizedUrl, {
20901
+ routeFile: "",
20902
+ routeResolution: "exact"
20903
+ });
20904
+ }
20905
+ }
19843
20906
  for (const page of indexablePages) {
19844
20907
  const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
19845
20908
  if (routeMatch.routeResolution === "best-effort") {
@@ -19857,6 +20920,17 @@ var IndexPipeline = class _IndexPipeline {
19857
20920
  } else {
19858
20921
  routeExact += 1;
19859
20922
  }
20923
+ const anchorSet = incomingAnchorTexts.get(page.url);
20924
+ let incomingAnchorText;
20925
+ if (anchorSet && anchorSet.size > 0) {
20926
+ let joined = "";
20927
+ for (const phrase of anchorSet) {
20928
+ const next2 = joined ? `${joined} ${phrase}` : phrase;
20929
+ if (next2.length > 500) break;
20930
+ joined = next2;
20931
+ }
20932
+ incomingAnchorText = joined || void 0;
20933
+ }
19860
20934
  const indexedPage = {
19861
20935
  url: page.url,
19862
20936
  title: page.title,
@@ -19866,40 +20940,113 @@ var IndexPipeline = class _IndexPipeline {
19866
20940
  generatedAt: nowIso(),
19867
20941
  incomingLinks: incomingLinkCount.get(page.url) ?? 0,
19868
20942
  outgoingLinks: page.outgoingLinks.length,
20943
+ outgoingLinkUrls: page.outgoingLinks.map((l) => typeof l === "string" ? l : l.url),
19869
20944
  depth: getUrlDepth(page.url),
19870
20945
  tags: page.tags,
19871
20946
  markdown: page.markdown,
19872
20947
  description: page.description,
19873
- keywords: page.keywords
20948
+ keywords: page.keywords,
20949
+ publishedAt: page.publishedAt,
20950
+ incomingAnchorText,
20951
+ meta: page.meta
19874
20952
  };
19875
20953
  pages.push(indexedPage);
19876
20954
  this.logger.event("page_indexed", { url: page.url });
19877
20955
  }
20956
+ const pageRecords = pages.map((p) => {
20957
+ const summary = buildPageSummary(p);
20958
+ return {
20959
+ url: p.url,
20960
+ title: p.title,
20961
+ markdown: p.markdown,
20962
+ projectId: scope.projectId,
20963
+ scopeName: scope.scopeName,
20964
+ routeFile: p.routeFile,
20965
+ routeResolution: p.routeResolution,
20966
+ incomingLinks: p.incomingLinks,
20967
+ outgoingLinks: p.outgoingLinks,
20968
+ outgoingLinkUrls: p.outgoingLinkUrls,
20969
+ depth: p.depth,
20970
+ tags: p.tags,
20971
+ indexedAt: p.generatedAt,
20972
+ summary,
20973
+ description: p.description,
20974
+ keywords: p.keywords,
20975
+ contentHash: buildPageContentHash(p),
20976
+ publishedAt: p.publishedAt,
20977
+ meta: p.meta
20978
+ };
20979
+ });
20980
+ const currentPageUrls = new Set(pageRecords.map((r) => r.url));
20981
+ const changedPages = pageRecords.filter(
20982
+ (r) => !existingPageHashes.has(r.url) || existingPageHashes.get(r.url) !== r.contentHash
20983
+ );
20984
+ const deletedPageUrls = [...existingPageHashes.keys()].filter((url) => !currentPageUrls.has(url));
19878
20985
  if (!options.dryRun) {
19879
- const pageRecords = pages.map((p) => {
19880
- const summary = buildPageSummary(p);
19881
- return {
19882
- url: p.url,
19883
- title: p.title,
19884
- markdown: p.markdown,
19885
- projectId: scope.projectId,
19886
- scopeName: scope.scopeName,
19887
- routeFile: p.routeFile,
19888
- routeResolution: p.routeResolution,
19889
- incomingLinks: p.incomingLinks,
19890
- outgoingLinks: p.outgoingLinks,
19891
- depth: p.depth,
19892
- tags: p.tags,
19893
- indexedAt: p.generatedAt,
19894
- summary,
19895
- description: p.description,
19896
- keywords: p.keywords
19897
- };
19898
- });
19899
- await this.store.deletePages(scope);
19900
- await this.store.upsertPages(pageRecords, scope);
20986
+ if (options.force) {
20987
+ await this.store.deletePages(scope);
20988
+ this.logger.info(`Upserting ${pageRecords.length} page summaries...`);
20989
+ const pageDocs = pageRecords.map((r) => ({
20990
+ id: r.url,
20991
+ data: r.summary ?? r.title,
20992
+ metadata: {
20993
+ title: r.title,
20994
+ url: r.url,
20995
+ description: r.description ?? "",
20996
+ keywords: r.keywords ?? [],
20997
+ summary: r.summary ?? "",
20998
+ tags: r.tags,
20999
+ markdown: r.markdown,
21000
+ routeFile: r.routeFile,
21001
+ routeResolution: r.routeResolution,
21002
+ incomingLinks: r.incomingLinks,
21003
+ outgoingLinks: r.outgoingLinks,
21004
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
21005
+ depth: r.depth,
21006
+ indexedAt: r.indexedAt,
21007
+ contentHash: r.contentHash ?? "",
21008
+ publishedAt: r.publishedAt ?? null,
21009
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
21010
+ }
21011
+ }));
21012
+ await this.store.upsertPages(pageDocs, scope);
21013
+ } else {
21014
+ if (changedPages.length > 0) {
21015
+ this.logger.info(`Upserting ${changedPages.length} changed page summaries...`);
21016
+ const pageDocs = changedPages.map((r) => ({
21017
+ id: r.url,
21018
+ data: r.summary ?? r.title,
21019
+ metadata: {
21020
+ title: r.title,
21021
+ url: r.url,
21022
+ description: r.description ?? "",
21023
+ keywords: r.keywords ?? [],
21024
+ summary: r.summary ?? "",
21025
+ tags: r.tags,
21026
+ markdown: r.markdown,
21027
+ routeFile: r.routeFile,
21028
+ routeResolution: r.routeResolution,
21029
+ incomingLinks: r.incomingLinks,
21030
+ outgoingLinks: r.outgoingLinks,
21031
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
21032
+ depth: r.depth,
21033
+ indexedAt: r.indexedAt,
21034
+ contentHash: r.contentHash ?? "",
21035
+ publishedAt: r.publishedAt ?? null,
21036
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
21037
+ }
21038
+ }));
21039
+ await this.store.upsertPages(pageDocs, scope);
21040
+ }
21041
+ if (deletedPageUrls.length > 0) {
21042
+ await this.store.deletePagesByIds(deletedPageUrls, scope);
21043
+ }
21044
+ }
19901
21045
  }
21046
+ const pagesChanged = options.force ? pageRecords.length : changedPages.length;
21047
+ const pagesDeleted = deletedPageUrls.length;
19902
21048
  stageEnd("pages", pagesStart);
21049
+ this.logger.info(`Page changes: ${pagesChanged} changed/new, ${pagesDeleted} deleted, ${pageRecords.length - changedPages.length} unchanged`);
19903
21050
  this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
19904
21051
  const chunkStart = stageStart();
19905
21052
  this.logger.info("Chunking pages...");
@@ -19908,6 +21055,18 @@ var IndexPipeline = class _IndexPipeline {
19908
21055
  if (typeof maxChunks === "number") {
19909
21056
  chunks = chunks.slice(0, maxChunks);
19910
21057
  }
21058
+ if (this.hooks.transformChunk) {
21059
+ const transformed = [];
21060
+ for (const chunk of chunks) {
21061
+ const result = await this.hooks.transformChunk(chunk);
21062
+ if (result === null) {
21063
+ this.logger.debug(`Chunk ${chunk.chunkKey} skipped by transformChunk hook`);
21064
+ continue;
21065
+ }
21066
+ transformed.push(result);
21067
+ }
21068
+ chunks = transformed;
21069
+ }
19911
21070
  for (const chunk of chunks) {
19912
21071
  this.logger.event("chunked", {
19913
21072
  url: chunk.url,
@@ -19920,7 +21079,12 @@ var IndexPipeline = class _IndexPipeline {
19920
21079
  for (const chunk of chunks) {
19921
21080
  currentChunkMap.set(chunk.chunkKey, chunk);
19922
21081
  }
19923
- const changedChunks = chunks.filter((chunk) => {
21082
+ const chunkHashStart = stageStart();
21083
+ const currentChunkKeys = chunks.map((c) => c.chunkKey);
21084
+ const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.fetchContentHashesForKeys(currentChunkKeys, scope);
21085
+ stageEnd("chunk_hashes", chunkHashStart);
21086
+ this.logger.debug(`Fetched ${existingHashes.size} existing chunk hashes for ${currentChunkKeys.length} current keys`);
21087
+ let changedChunks = chunks.filter((chunk) => {
19924
21088
  if (options.force) {
19925
21089
  return true;
19926
21090
  }
@@ -19933,37 +21097,45 @@ var IndexPipeline = class _IndexPipeline {
19933
21097
  }
19934
21098
  return existingHash !== chunk.contentHash;
19935
21099
  });
19936
- const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
21100
+ const existingChunkIds = options.force ? /* @__PURE__ */ new Set() : await this.store.scanChunkIds(scope);
21101
+ const deletes = [...existingChunkIds].filter((chunkKey) => !currentChunkMap.has(chunkKey));
21102
+ if (this.hooks.beforeIndex) {
21103
+ changedChunks = await this.hooks.beforeIndex(changedChunks);
21104
+ }
19937
21105
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
19938
21106
  const upsertStart = stageStart();
19939
21107
  let documentsUpserted = 0;
19940
21108
  if (!options.dryRun && changedChunks.length > 0) {
19941
- this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
19942
- const UPSTASH_CONTENT_LIMIT = 4096;
21109
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
19943
21110
  const docs = changedChunks.map((chunk) => {
19944
- const title = chunk.title;
19945
- const sectionTitle = chunk.sectionTitle ?? "";
19946
- const url = chunk.url;
19947
- const tags = chunk.tags.join(",");
19948
- const headingPath = chunk.headingPath.join(" > ");
19949
- const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
19950
- const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
19951
- const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
21111
+ const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
21112
+ if (embeddingText.length > 2e3) {
21113
+ this.logger.warn(
21114
+ `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
21115
+ );
21116
+ }
19952
21117
  return {
19953
21118
  id: chunk.chunkKey,
19954
- content: { title, sectionTitle, text, url, tags, headingPath },
21119
+ data: embeddingText,
19955
21120
  metadata: {
19956
- projectId: scope.projectId,
19957
- scopeName: scope.scopeName,
21121
+ url: chunk.url,
19958
21122
  path: chunk.path,
21123
+ title: chunk.title,
21124
+ sectionTitle: chunk.sectionTitle ?? "",
21125
+ headingPath: chunk.headingPath.join(" > "),
19959
21126
  snippet: chunk.snippet,
21127
+ chunkText: embeddingText,
21128
+ tags: chunk.tags,
19960
21129
  ordinal: chunk.ordinal,
19961
21130
  contentHash: chunk.contentHash,
19962
21131
  depth: chunk.depth,
19963
21132
  incomingLinks: chunk.incomingLinks,
19964
21133
  routeFile: chunk.routeFile,
19965
21134
  description: chunk.description ?? "",
19966
- keywords: (chunk.keywords ?? []).join(",")
21135
+ keywords: chunk.keywords ?? [],
21136
+ publishedAt: chunk.publishedAt ?? null,
21137
+ incomingAnchorText: chunk.incomingAnchorText ?? "",
21138
+ ...chunk.meta && Object.keys(chunk.meta).length > 0 ? { meta: chunk.meta } : {}
19967
21139
  }
19968
21140
  };
19969
21141
  });
@@ -19981,9 +21153,16 @@ var IndexPipeline = class _IndexPipeline {
19981
21153
  } else {
19982
21154
  this.logger.info("No chunks to upsert \u2014 all up to date");
19983
21155
  }
21156
+ if (this.config.llmsTxt.enable && !options.dryRun) {
21157
+ const llmsStart = stageStart();
21158
+ await writeLlmsTxt(pages, this.config, this.cwd, this.logger);
21159
+ stageEnd("llms_txt", llmsStart);
21160
+ }
19984
21161
  this.logger.info("Done.");
19985
- return {
21162
+ const stats = {
19986
21163
  pagesProcessed: pages.length,
21164
+ pagesChanged,
21165
+ pagesDeleted,
19987
21166
  chunksTotal: chunks.length,
19988
21167
  chunksChanged: changedChunks.length,
19989
21168
  documentsUpserted,
@@ -19992,16 +21171,143 @@ var IndexPipeline = class _IndexPipeline {
19992
21171
  routeBestEffort,
19993
21172
  stageTimingsMs
19994
21173
  };
21174
+ if (this.hooks.afterIndex) {
21175
+ await this.hooks.afterIndex(stats);
21176
+ }
21177
+ return stats;
19995
21178
  }
19996
21179
  };
21180
+
21181
+ // src/search/related-pages.ts
21182
+ function diceScore(urlA, urlB) {
21183
+ const segmentsA = urlA.split("/").filter(Boolean);
21184
+ const segmentsB = urlB.split("/").filter(Boolean);
21185
+ if (segmentsA.length === 0 && segmentsB.length === 0) return 1;
21186
+ if (segmentsA.length === 0 || segmentsB.length === 0) return 0;
21187
+ let shared = 0;
21188
+ const minLen = Math.min(segmentsA.length, segmentsB.length);
21189
+ for (let i = 0; i < minLen; i++) {
21190
+ if (segmentsA[i] === segmentsB[i]) {
21191
+ shared++;
21192
+ } else {
21193
+ break;
21194
+ }
21195
+ }
21196
+ return 2 * shared / (segmentsA.length + segmentsB.length);
21197
+ }
21198
+ function compositeScore(isLinked, dice, semantic) {
21199
+ return (isLinked ? 0.5 : 0) + 0.3 * dice + 0.2 * semantic;
21200
+ }
21201
+ function dominantRelationshipType(isOutgoing, isIncoming, dice) {
21202
+ if (isOutgoing) return "outgoing_link";
21203
+ if (isIncoming) return "incoming_link";
21204
+ if (dice > 0.4) return "sibling";
21205
+ return "semantic";
21206
+ }
21207
+
21208
+ // src/search/engine.ts
21209
+ var rankingOverridesSchema = zod.z.object({
21210
+ ranking: zod.z.object({
21211
+ enableIncomingLinkBoost: zod.z.boolean().optional(),
21212
+ enableDepthBoost: zod.z.boolean().optional(),
21213
+ aggregationCap: zod.z.number().int().positive().optional(),
21214
+ aggregationDecay: zod.z.number().min(0).max(1).optional(),
21215
+ minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
21216
+ minScoreRatio: zod.z.number().min(0).max(1).optional(),
21217
+ scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
21218
+ weights: zod.z.object({
21219
+ incomingLinks: zod.z.number().optional(),
21220
+ depth: zod.z.number().optional(),
21221
+ aggregation: zod.z.number().optional(),
21222
+ titleMatch: zod.z.number().optional()
21223
+ }).optional()
21224
+ }).optional(),
21225
+ search: zod.z.object({
21226
+ pageSearchWeight: zod.z.number().min(0).max(1).optional()
21227
+ }).optional()
21228
+ }).optional();
19997
21229
  var requestSchema = zod.z.object({
19998
21230
  q: zod.z.string().trim().min(1),
19999
21231
  topK: zod.z.number().int().positive().max(100).optional(),
20000
21232
  scope: zod.z.string().optional(),
20001
21233
  pathPrefix: zod.z.string().optional(),
20002
21234
  tags: zod.z.array(zod.z.string()).optional(),
20003
- groupBy: zod.z.enum(["page", "chunk"]).optional()
21235
+ filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional(),
21236
+ groupBy: zod.z.enum(["page", "chunk"]).optional(),
21237
+ maxSubResults: zod.z.number().int().positive().max(20).optional(),
21238
+ debug: zod.z.boolean().optional(),
21239
+ rankingOverrides: rankingOverridesSchema
20004
21240
  });
21241
+ var MAX_SITE_STRUCTURE_PAGES = 2e3;
21242
+ function makeNode(url, depth) {
21243
+ return { url, title: "", depth, routeFile: "", isIndexed: false, childCount: 0, children: [] };
21244
+ }
21245
+ function buildTree(pages, pathPrefix) {
21246
+ const nodeMap = /* @__PURE__ */ new Map();
21247
+ const root2 = makeNode("/", 0);
21248
+ nodeMap.set("/", root2);
21249
+ for (const page of pages) {
21250
+ const normalized = normalizeUrlPath(page.url);
21251
+ const segments = normalized.split("/").filter(Boolean);
21252
+ if (segments.length === 0) {
21253
+ root2.title = page.title;
21254
+ root2.routeFile = page.routeFile;
21255
+ root2.isIndexed = true;
21256
+ continue;
21257
+ }
21258
+ for (let i = 1; i <= segments.length; i++) {
21259
+ const partialUrl = "/" + segments.slice(0, i).join("/");
21260
+ if (!nodeMap.has(partialUrl)) {
21261
+ nodeMap.set(partialUrl, makeNode(partialUrl, i));
21262
+ }
21263
+ }
21264
+ const node = nodeMap.get(normalized);
21265
+ node.title = page.title;
21266
+ node.routeFile = page.routeFile;
21267
+ node.isIndexed = true;
21268
+ }
21269
+ for (const [url, node] of nodeMap) {
21270
+ if (url === "/") continue;
21271
+ const segments = url.split("/").filter(Boolean);
21272
+ const parentUrl = segments.length === 1 ? "/" : "/" + segments.slice(0, -1).join("/");
21273
+ const parent = nodeMap.get(parentUrl) ?? root2;
21274
+ parent.children.push(node);
21275
+ }
21276
+ const sortAndCount = (node) => {
21277
+ node.children.sort((a, b) => a.url.localeCompare(b.url));
21278
+ node.childCount = node.children.length;
21279
+ for (const child of node.children) {
21280
+ sortAndCount(child);
21281
+ }
21282
+ };
21283
+ sortAndCount(root2);
21284
+ if (pathPrefix) {
21285
+ const normalizedPrefix = normalizeUrlPath(pathPrefix);
21286
+ const subtreeRoot = nodeMap.get(normalizedPrefix);
21287
+ if (subtreeRoot) {
21288
+ return subtreeRoot;
21289
+ }
21290
+ return makeNode(normalizedPrefix, normalizedPrefix.split("/").filter(Boolean).length);
21291
+ }
21292
+ return root2;
21293
+ }
21294
+ function mergeRankingOverrides(base, overrides) {
21295
+ return {
21296
+ ...base,
21297
+ search: {
21298
+ ...base.search,
21299
+ ...overrides.search
21300
+ },
21301
+ ranking: {
21302
+ ...base.ranking,
21303
+ ...overrides.ranking,
21304
+ weights: {
21305
+ ...base.ranking.weights,
21306
+ ...overrides.ranking?.weights
21307
+ }
21308
+ }
21309
+ };
21310
+ }
20005
21311
  var SearchEngine = class _SearchEngine {
20006
21312
  cwd;
20007
21313
  config;
@@ -20031,125 +21337,203 @@ var SearchEngine = class _SearchEngine {
20031
21337
  }
20032
21338
  const input = parsed.data;
20033
21339
  const totalStart = process.hrtime.bigint();
21340
+ const effectiveConfig = input.debug && input.rankingOverrides ? mergeRankingOverrides(this.config, input.rankingOverrides) : this.config;
20034
21341
  const resolvedScope = resolveScope(this.config, input.scope);
20035
21342
  const topK = input.topK ?? 10;
21343
+ const maxSubResults = input.maxSubResults ?? 5;
20036
21344
  const groupByPage = (input.groupBy ?? "page") === "page";
20037
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20038
- const filterParts = [];
20039
- if (input.pathPrefix) {
20040
- const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
20041
- filterParts.push(`url GLOB '${prefix}*'`);
20042
- }
20043
- if (input.tags && input.tags.length > 0) {
20044
- for (const tag of input.tags) {
20045
- filterParts.push(`tags GLOB '*${tag}*'`);
21345
+ const queryText = input.q;
21346
+ const pathPrefix = input.pathPrefix ? input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}` : void 0;
21347
+ const filterTags = input.tags && input.tags.length > 0 ? input.tags : void 0;
21348
+ const metaFilterStr = input.filters && Object.keys(input.filters).length > 0 ? buildMetaFilterString(input.filters) : "";
21349
+ const metaFilter = metaFilterStr || void 0;
21350
+ const applyPagePostFilters = (hits) => {
21351
+ let filtered = hits;
21352
+ if (pathPrefix) {
21353
+ filtered = filtered.filter((h) => h.url.startsWith(pathPrefix));
21354
+ }
21355
+ if (filterTags) {
21356
+ filtered = filtered.filter(
21357
+ (h) => filterTags.every((tag) => h.tags.includes(tag))
21358
+ );
20046
21359
  }
20047
- }
20048
- const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
20049
- const useDualSearch = this.config.search.dualSearch && groupByPage;
21360
+ return filtered;
21361
+ };
21362
+ const applyChunkPostFilters = (hits) => {
21363
+ let filtered = hits;
21364
+ if (filterTags) {
21365
+ filtered = filtered.filter(
21366
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
21367
+ );
21368
+ }
21369
+ return filtered;
21370
+ };
20050
21371
  const searchStart = process.hrtime.bigint();
20051
- let ranked;
20052
- if (useDualSearch) {
20053
- const chunkLimit = Math.max(topK * 10, 100);
20054
- const pageLimit = 20;
20055
- const [pageHits, chunkHits] = await Promise.all([
20056
- this.store.searchPages(
20057
- input.q,
20058
- {
20059
- limit: pageLimit,
20060
- semanticWeight: this.config.search.semanticWeight,
20061
- inputEnrichment: this.config.search.inputEnrichment,
20062
- filter
20063
- },
20064
- resolvedScope
20065
- ),
20066
- this.store.search(
20067
- input.q,
20068
- {
20069
- limit: chunkLimit,
20070
- semanticWeight: this.config.search.semanticWeight,
20071
- inputEnrichment: this.config.search.inputEnrichment,
20072
- reranking: false,
20073
- filter
20074
- },
21372
+ if (groupByPage) {
21373
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
21374
+ const pageLimit = Math.max(topK * 2, 20);
21375
+ const pageHits = await this.store.searchPagesByText(
21376
+ queryText,
21377
+ { limit: pageLimit * fetchMultiplier, filter: metaFilter },
21378
+ resolvedScope
21379
+ );
21380
+ const filteredPages = applyPagePostFilters(pageHits);
21381
+ let rankedPages = rankPageHits(filteredPages, effectiveConfig, input.q, input.debug);
21382
+ rankedPages = trimPagesByScoreGap(rankedPages, effectiveConfig);
21383
+ const topPages = rankedPages.slice(0, topK);
21384
+ const chunkPromises = topPages.map(
21385
+ (page) => this.store.searchChunksByUrl(
21386
+ queryText,
21387
+ page.url,
21388
+ { limit: maxSubResults, filter: metaFilter },
20075
21389
  resolvedScope
20076
- )
20077
- ]);
20078
- const rankedChunks = rankHits(chunkHits, this.config, input.q);
20079
- ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
21390
+ ).then((chunks) => applyChunkPostFilters(chunks))
21391
+ );
21392
+ const allChunks = await Promise.all(chunkPromises);
21393
+ const searchMs = hrTimeMs(searchStart);
21394
+ const results = this.buildPageFirstResults(topPages, allChunks, input.q, input.debug, maxSubResults);
21395
+ return {
21396
+ q: input.q,
21397
+ scope: resolvedScope.scopeName,
21398
+ results,
21399
+ meta: {
21400
+ timingsMs: {
21401
+ search: Math.round(searchMs),
21402
+ total: Math.round(hrTimeMs(totalStart))
21403
+ }
21404
+ }
21405
+ };
20080
21406
  } else {
21407
+ const candidateK = Math.max(50, topK);
21408
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
20081
21409
  const hits = await this.store.search(
20082
- input.q,
20083
- {
20084
- limit: candidateK,
20085
- semanticWeight: this.config.search.semanticWeight,
20086
- inputEnrichment: this.config.search.inputEnrichment,
20087
- reranking: this.config.search.reranking,
20088
- filter
20089
- },
21410
+ queryText,
21411
+ { limit: candidateK * fetchMultiplier, filter: metaFilter },
20090
21412
  resolvedScope
20091
21413
  );
20092
- ranked = rankHits(hits, this.config, input.q);
20093
- }
20094
- const searchMs = hrTimeMs(searchStart);
20095
- const results = this.buildResults(ranked, topK, groupByPage, input.q);
20096
- return {
20097
- q: input.q,
20098
- scope: resolvedScope.scopeName,
20099
- results,
20100
- meta: {
20101
- timingsMs: {
20102
- search: Math.round(searchMs),
20103
- total: Math.round(hrTimeMs(totalStart))
21414
+ let filtered = hits;
21415
+ if (pathPrefix) {
21416
+ filtered = filtered.filter((h) => h.metadata.url.startsWith(pathPrefix));
21417
+ }
21418
+ if (filterTags) {
21419
+ filtered = filtered.filter(
21420
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
21421
+ );
21422
+ }
21423
+ const ranked = rankHits(filtered, effectiveConfig, input.q, input.debug);
21424
+ const searchMs = hrTimeMs(searchStart);
21425
+ const results = this.buildResults(ranked, topK, false, maxSubResults, input.q, input.debug, effectiveConfig);
21426
+ return {
21427
+ q: input.q,
21428
+ scope: resolvedScope.scopeName,
21429
+ results,
21430
+ meta: {
21431
+ timingsMs: {
21432
+ search: Math.round(searchMs),
21433
+ total: Math.round(hrTimeMs(totalStart))
21434
+ }
20104
21435
  }
21436
+ };
21437
+ }
21438
+ }
21439
+ buildPageFirstResults(rankedPages, allChunks, query, debug, maxSubResults = 5) {
21440
+ return rankedPages.map((page, i) => {
21441
+ const chunks = allChunks[i] ?? [];
21442
+ const bestChunk = chunks[0];
21443
+ const snippet = bestChunk ? query ? queryAwareExcerpt(bestChunk.metadata.chunkText, query) : toSnippet(bestChunk.metadata.chunkText) : page.description || page.title;
21444
+ const result = {
21445
+ url: page.url,
21446
+ title: page.title,
21447
+ sectionTitle: bestChunk?.metadata.sectionTitle || void 0,
21448
+ snippet,
21449
+ chunkText: bestChunk?.metadata.chunkText || void 0,
21450
+ score: Number(page.finalScore.toFixed(6)),
21451
+ routeFile: page.routeFile,
21452
+ chunks: chunks.length > 0 ? chunks.slice(0, maxSubResults).map((c) => ({
21453
+ sectionTitle: c.metadata.sectionTitle || void 0,
21454
+ snippet: query ? queryAwareExcerpt(c.metadata.chunkText, query) : toSnippet(c.metadata.chunkText),
21455
+ chunkText: c.metadata.chunkText || void 0,
21456
+ headingPath: c.metadata.headingPath,
21457
+ score: Number(c.score.toFixed(6))
21458
+ })) : void 0
21459
+ };
21460
+ if (debug && page.breakdown) {
21461
+ result.breakdown = {
21462
+ baseScore: page.breakdown.baseScore,
21463
+ incomingLinkBoost: page.breakdown.incomingLinkBoost,
21464
+ depthBoost: page.breakdown.depthBoost,
21465
+ titleMatchBoost: page.breakdown.titleMatchBoost,
21466
+ freshnessBoost: page.breakdown.freshnessBoost,
21467
+ anchorTextMatchBoost: 0
21468
+ };
20105
21469
  }
20106
- };
21470
+ return result;
21471
+ });
20107
21472
  }
20108
- ensureSnippet(hit) {
21473
+ ensureSnippet(hit, query) {
21474
+ const chunkText = hit.hit.metadata.chunkText;
21475
+ if (query && chunkText) return queryAwareExcerpt(chunkText, query);
20109
21476
  const snippet = hit.hit.metadata.snippet;
20110
21477
  if (snippet && snippet.length >= 30) return snippet;
20111
- const chunkText = hit.hit.metadata.chunkText;
20112
21478
  if (chunkText) return toSnippet(chunkText);
20113
21479
  return snippet || "";
20114
21480
  }
20115
- buildResults(ordered, topK, groupByPage, _query) {
21481
+ buildResults(ordered, topK, groupByPage, maxSubResults, query, debug, config) {
21482
+ const cfg = config ?? this.config;
20116
21483
  if (groupByPage) {
20117
- let pages = aggregateByPage(ordered, this.config);
20118
- pages = trimByScoreGap(pages, this.config);
20119
- const minRatio = this.config.ranking.minChunkScoreRatio;
21484
+ let pages = aggregateByPage(ordered, cfg);
21485
+ pages = trimByScoreGap(pages, cfg);
21486
+ const minRatio = cfg.ranking.minChunkScoreRatio;
20120
21487
  return pages.slice(0, topK).map((page) => {
20121
21488
  const bestScore = page.bestChunk.finalScore;
20122
21489
  const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
20123
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
20124
- return {
21490
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, maxSubResults);
21491
+ const result = {
20125
21492
  url: page.url,
20126
21493
  title: page.title,
20127
21494
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
20128
- snippet: this.ensureSnippet(page.bestChunk),
21495
+ snippet: this.ensureSnippet(page.bestChunk, query),
21496
+ chunkText: page.bestChunk.hit.metadata.chunkText || void 0,
20129
21497
  score: Number(page.pageScore.toFixed(6)),
20130
21498
  routeFile: page.routeFile,
20131
- chunks: meaningful.length > 1 ? meaningful.map((c) => ({
21499
+ chunks: meaningful.length >= 1 ? meaningful.map((c) => ({
20132
21500
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
20133
- snippet: this.ensureSnippet(c),
21501
+ snippet: this.ensureSnippet(c, query),
21502
+ chunkText: c.hit.metadata.chunkText || void 0,
20134
21503
  headingPath: c.hit.metadata.headingPath,
20135
21504
  score: Number(c.finalScore.toFixed(6))
20136
21505
  })) : void 0
20137
21506
  };
21507
+ if (debug && page.bestChunk.breakdown) {
21508
+ result.breakdown = page.bestChunk.breakdown;
21509
+ }
21510
+ return result;
20138
21511
  });
20139
21512
  } else {
20140
21513
  let filtered = ordered;
20141
- const minScore = this.config.ranking.minScore;
20142
- if (minScore > 0) {
20143
- filtered = ordered.filter((entry) => entry.finalScore >= minScore);
20144
- }
20145
- return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
20146
- url: hit.metadata.url,
20147
- title: hit.metadata.title,
20148
- sectionTitle: hit.metadata.sectionTitle || void 0,
20149
- snippet: this.ensureSnippet({ hit, finalScore }),
20150
- score: Number(finalScore.toFixed(6)),
20151
- routeFile: hit.metadata.routeFile
20152
- }));
21514
+ const minScoreRatio = cfg.ranking.minScoreRatio;
21515
+ if (minScoreRatio > 0 && ordered.length > 0) {
21516
+ const topScore = ordered[0].finalScore;
21517
+ if (Number.isFinite(topScore) && topScore > 0) {
21518
+ const threshold = topScore * minScoreRatio;
21519
+ filtered = ordered.filter((entry) => entry.finalScore >= threshold);
21520
+ }
21521
+ }
21522
+ return filtered.slice(0, topK).map(({ hit, finalScore, breakdown }) => {
21523
+ const result = {
21524
+ url: hit.metadata.url,
21525
+ title: hit.metadata.title,
21526
+ sectionTitle: hit.metadata.sectionTitle || void 0,
21527
+ snippet: this.ensureSnippet({ hit, finalScore }, query),
21528
+ chunkText: hit.metadata.chunkText || void 0,
21529
+ score: Number(finalScore.toFixed(6)),
21530
+ routeFile: hit.metadata.routeFile
21531
+ };
21532
+ if (debug && breakdown) {
21533
+ result.breakdown = breakdown;
21534
+ }
21535
+ return result;
21536
+ });
20153
21537
  }
20154
21538
  }
20155
21539
  async getPage(pathOrUrl, scope) {
@@ -20175,6 +21559,116 @@ var SearchEngine = class _SearchEngine {
20175
21559
  markdown: page.markdown
20176
21560
  };
20177
21561
  }
21562
+ async listPages(opts) {
21563
+ const resolvedScope = resolveScope(this.config, opts?.scope);
21564
+ const pathPrefix = opts?.pathPrefix ? opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}` : void 0;
21565
+ return this.store.listPages(resolvedScope, {
21566
+ cursor: opts?.cursor,
21567
+ limit: opts?.limit,
21568
+ pathPrefix
21569
+ });
21570
+ }
21571
+ async getSiteStructure(opts) {
21572
+ const maxPages = Math.min(opts?.maxPages ?? MAX_SITE_STRUCTURE_PAGES, MAX_SITE_STRUCTURE_PAGES);
21573
+ const allPages = [];
21574
+ let cursor;
21575
+ let truncated = false;
21576
+ do {
21577
+ const result = await this.listPages({
21578
+ pathPrefix: opts?.pathPrefix,
21579
+ scope: opts?.scope,
21580
+ cursor,
21581
+ limit: 200
21582
+ });
21583
+ allPages.push(...result.pages);
21584
+ cursor = result.nextCursor;
21585
+ if (allPages.length >= maxPages) {
21586
+ truncated = allPages.length > maxPages || !!cursor;
21587
+ allPages.length = maxPages;
21588
+ break;
21589
+ }
21590
+ } while (cursor);
21591
+ const root2 = buildTree(allPages, opts?.pathPrefix);
21592
+ return {
21593
+ root: root2,
21594
+ totalPages: allPages.length,
21595
+ truncated
21596
+ };
21597
+ }
21598
+ async getRelatedPages(pathOrUrl, opts) {
21599
+ const resolvedScope = resolveScope(this.config, opts?.scope);
21600
+ const urlPath = this.resolveInputPath(pathOrUrl);
21601
+ const topK = Math.min(opts?.topK ?? 10, 25);
21602
+ const source = await this.store.fetchPageWithVector(urlPath, resolvedScope);
21603
+ if (!source) {
21604
+ throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
21605
+ }
21606
+ const sourceOutgoing = new Set(source.metadata.outgoingLinkUrls ?? []);
21607
+ const semanticHits = await this.store.searchPagesByVector(
21608
+ source.vector,
21609
+ { limit: 50 },
21610
+ resolvedScope
21611
+ );
21612
+ const filteredHits = semanticHits.filter((h) => h.url !== urlPath);
21613
+ const semanticScoreMap = /* @__PURE__ */ new Map();
21614
+ for (const hit of filteredHits) {
21615
+ semanticScoreMap.set(hit.url, hit.score);
21616
+ }
21617
+ const candidateUrls = /* @__PURE__ */ new Set();
21618
+ for (const hit of filteredHits) {
21619
+ candidateUrls.add(hit.url);
21620
+ }
21621
+ for (const url of sourceOutgoing) {
21622
+ if (url !== urlPath) candidateUrls.add(url);
21623
+ }
21624
+ const missingUrls = [...sourceOutgoing].filter(
21625
+ (u) => u !== urlPath && !semanticScoreMap.has(u)
21626
+ );
21627
+ const fetchedPages = missingUrls.length > 0 ? await this.store.fetchPagesBatch(missingUrls, resolvedScope) : [];
21628
+ const metaMap = /* @__PURE__ */ new Map();
21629
+ for (const hit of filteredHits) {
21630
+ metaMap.set(hit.url, { title: hit.title, routeFile: hit.routeFile, outgoingLinkUrls: [] });
21631
+ }
21632
+ for (const p of fetchedPages) {
21633
+ metaMap.set(p.url, { title: p.title, routeFile: p.routeFile, outgoingLinkUrls: p.outgoingLinkUrls });
21634
+ }
21635
+ const semanticUrls = filteredHits.map((h) => h.url);
21636
+ if (semanticUrls.length > 0) {
21637
+ const semanticPageData = await this.store.fetchPagesBatch(semanticUrls, resolvedScope);
21638
+ for (const p of semanticPageData) {
21639
+ const existing = metaMap.get(p.url);
21640
+ if (existing) {
21641
+ existing.outgoingLinkUrls = p.outgoingLinkUrls;
21642
+ }
21643
+ }
21644
+ }
21645
+ const candidates = [];
21646
+ for (const url of candidateUrls) {
21647
+ const meta = metaMap.get(url);
21648
+ if (!meta) continue;
21649
+ const isOutgoing = sourceOutgoing.has(url);
21650
+ const isIncoming = meta.outgoingLinkUrls.includes(urlPath);
21651
+ const isLinked = isOutgoing || isIncoming;
21652
+ const dice = diceScore(urlPath, url);
21653
+ const semantic = semanticScoreMap.get(url) ?? 0;
21654
+ const score = compositeScore(isLinked, dice, semantic);
21655
+ const relationshipType = dominantRelationshipType(isOutgoing, isIncoming, dice);
21656
+ candidates.push({
21657
+ url,
21658
+ title: meta.title,
21659
+ score: Number(score.toFixed(6)),
21660
+ relationshipType,
21661
+ routeFile: meta.routeFile
21662
+ });
21663
+ }
21664
+ candidates.sort((a, b) => b.score - a.score);
21665
+ const results = candidates.slice(0, topK);
21666
+ return {
21667
+ sourceUrl: urlPath,
21668
+ scope: resolvedScope.scopeName,
21669
+ relatedPages: results
21670
+ };
21671
+ }
20178
21672
  async health() {
20179
21673
  return this.store.health();
20180
21674
  }
@@ -20197,14 +21691,40 @@ function createServer(engine) {
20197
21691
  server.registerTool(
20198
21692
  "search",
20199
21693
  {
20200
- description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
21694
+ description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
20201
21695
  inputSchema: {
20202
21696
  query: zod.z.string().min(1),
20203
21697
  scope: zod.z.string().optional(),
20204
21698
  topK: zod.z.number().int().positive().max(100).optional(),
20205
21699
  pathPrefix: zod.z.string().optional(),
20206
21700
  tags: zod.z.array(zod.z.string()).optional(),
20207
- groupBy: zod.z.enum(["page", "chunk"]).optional()
21701
+ filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional(),
21702
+ groupBy: zod.z.enum(["page", "chunk"]).optional(),
21703
+ maxSubResults: zod.z.number().int().positive().max(20).optional()
21704
+ },
21705
+ outputSchema: {
21706
+ q: zod.z.string(),
21707
+ scope: zod.z.string(),
21708
+ results: zod.z.array(zod.z.object({
21709
+ url: zod.z.string(),
21710
+ title: zod.z.string(),
21711
+ sectionTitle: zod.z.string().optional(),
21712
+ snippet: zod.z.string(),
21713
+ score: zod.z.number(),
21714
+ routeFile: zod.z.string(),
21715
+ chunks: zod.z.array(zod.z.object({
21716
+ sectionTitle: zod.z.string().optional(),
21717
+ snippet: zod.z.string(),
21718
+ headingPath: zod.z.array(zod.z.string()),
21719
+ score: zod.z.number()
21720
+ })).optional()
21721
+ })),
21722
+ meta: zod.z.object({
21723
+ timingsMs: zod.z.object({
21724
+ search: zod.z.number(),
21725
+ total: zod.z.number()
21726
+ })
21727
+ })
20208
21728
  }
20209
21729
  },
20210
21730
  async (input) => {
@@ -20214,7 +21734,9 @@ function createServer(engine) {
20214
21734
  scope: input.scope,
20215
21735
  pathPrefix: input.pathPrefix,
20216
21736
  tags: input.tags,
20217
- groupBy: input.groupBy
21737
+ filters: input.filters,
21738
+ groupBy: input.groupBy,
21739
+ maxSubResults: input.maxSubResults
20218
21740
  });
20219
21741
  return {
20220
21742
  content: [
@@ -20222,7 +21744,8 @@ function createServer(engine) {
20222
21744
  type: "text",
20223
21745
  text: JSON.stringify(result, null, 2)
20224
21746
  }
20225
- ]
21747
+ ],
21748
+ structuredContent: result
20226
21749
  };
20227
21750
  }
20228
21751
  );
@@ -20247,8 +21770,134 @@ function createServer(engine) {
20247
21770
  };
20248
21771
  }
20249
21772
  );
21773
+ server.registerTool(
21774
+ "list_pages",
21775
+ {
21776
+ description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
21777
+ inputSchema: {
21778
+ pathPrefix: zod.z.string().optional(),
21779
+ cursor: zod.z.string().optional(),
21780
+ limit: zod.z.number().int().positive().max(200).optional(),
21781
+ scope: zod.z.string().optional()
21782
+ }
21783
+ },
21784
+ async (input) => {
21785
+ const result = await engine.listPages({
21786
+ pathPrefix: input.pathPrefix,
21787
+ cursor: input.cursor,
21788
+ limit: input.limit,
21789
+ scope: input.scope
21790
+ });
21791
+ return {
21792
+ content: [
21793
+ {
21794
+ type: "text",
21795
+ text: JSON.stringify(result, null, 2)
21796
+ }
21797
+ ]
21798
+ };
21799
+ }
21800
+ );
21801
+ server.registerTool(
21802
+ "get_site_structure",
21803
+ {
21804
+ description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
21805
+ inputSchema: {
21806
+ pathPrefix: zod.z.string().optional(),
21807
+ scope: zod.z.string().optional(),
21808
+ maxPages: zod.z.number().int().positive().max(2e3).optional()
21809
+ }
21810
+ },
21811
+ async (input) => {
21812
+ const result = await engine.getSiteStructure({
21813
+ pathPrefix: input.pathPrefix,
21814
+ scope: input.scope,
21815
+ maxPages: input.maxPages
21816
+ });
21817
+ return {
21818
+ content: [
21819
+ {
21820
+ type: "text",
21821
+ text: JSON.stringify(result, null, 2)
21822
+ }
21823
+ ]
21824
+ };
21825
+ }
21826
+ );
21827
+ server.registerTool(
21828
+ "find_source_file",
21829
+ {
21830
+ description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
21831
+ inputSchema: {
21832
+ query: zod.z.string().min(1),
21833
+ scope: zod.z.string().optional()
21834
+ }
21835
+ },
21836
+ async (input) => {
21837
+ const result = await engine.search({
21838
+ q: input.query,
21839
+ topK: 1,
21840
+ scope: input.scope
21841
+ });
21842
+ if (result.results.length === 0) {
21843
+ return {
21844
+ content: [
21845
+ {
21846
+ type: "text",
21847
+ text: JSON.stringify({
21848
+ error: "No matching content found for the given query."
21849
+ })
21850
+ }
21851
+ ]
21852
+ };
21853
+ }
21854
+ const match = result.results[0];
21855
+ const { url, routeFile, sectionTitle, snippet } = match;
21856
+ return {
21857
+ content: [
21858
+ {
21859
+ type: "text",
21860
+ text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
21861
+ }
21862
+ ]
21863
+ };
21864
+ }
21865
+ );
21866
+ server.registerTool(
21867
+ "get_related_pages",
21868
+ {
21869
+ description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
21870
+ inputSchema: {
21871
+ pathOrUrl: zod.z.string().min(1),
21872
+ scope: zod.z.string().optional(),
21873
+ topK: zod.z.number().int().positive().max(25).optional()
21874
+ }
21875
+ },
21876
+ async (input) => {
21877
+ const result = await engine.getRelatedPages(input.pathOrUrl, {
21878
+ topK: input.topK,
21879
+ scope: input.scope
21880
+ });
21881
+ return {
21882
+ content: [
21883
+ {
21884
+ type: "text",
21885
+ text: JSON.stringify(result, null, 2)
21886
+ }
21887
+ ]
21888
+ };
21889
+ }
21890
+ );
20250
21891
  return server;
20251
21892
  }
21893
+ function resolveApiKey(config) {
21894
+ return config.mcp.http.apiKey ?? (config.mcp.http.apiKeyEnv ? process.env[config.mcp.http.apiKeyEnv] : void 0);
21895
+ }
21896
+ function verifyApiKey(provided, expected) {
21897
+ const a = crypto.createHash("sha256").update(provided).digest();
21898
+ const b = crypto.createHash("sha256").update(expected).digest();
21899
+ return crypto.timingSafeEqual(a, b);
21900
+ }
20252
21901
  function redirectConsoleToStderr() {
20253
21902
  console.log = (...args) => {
20254
21903
  process.stderr.write(`[LOG] ${args.map(String).join(" ")}
@@ -20263,7 +21912,22 @@ async function startHttpServer(serverFactory, config, opts) {
20263
21912
  const app = express_js.createMcpExpressApp();
20264
21913
  const port = opts.httpPort ?? config.mcp.http.port;
20265
21914
  const endpointPath = opts.httpPath ?? config.mcp.http.path;
21915
+ const isPublic = config.mcp.access === "public";
21916
+ const host = isPublic ? "0.0.0.0" : "127.0.0.1";
21917
+ const apiKey = isPublic ? resolveApiKey(config) : void 0;
20266
21918
  app.post(endpointPath, async (req, res) => {
21919
+ if (isPublic && apiKey) {
21920
+ const authHeader = req.headers["authorization"];
21921
+ const provided = (authHeader?.startsWith("Bearer ") ? authHeader.slice(7) : void 0) ?? req.headers["x-api-key"] ?? "";
21922
+ if (!provided || !verifyApiKey(provided, apiKey)) {
21923
+ res.status(401).json({
21924
+ jsonrpc: "2.0",
21925
+ error: { code: -32001, message: "Unauthorized" },
21926
+ id: null
21927
+ });
21928
+ return;
21929
+ }
21930
+ }
20267
21931
  const server = serverFactory();
20268
21932
  const transport = new streamableHttp_js.StreamableHTTPServerTransport({
20269
21933
  sessionIdGenerator: void 0
@@ -20313,9 +21977,12 @@ async function startHttpServer(serverFactory, config, opts) {
20313
21977
  );
20314
21978
  });
20315
21979
  await new Promise((resolve, reject) => {
20316
- const instance = app.listen(port, "127.0.0.1", () => {
20317
- process.stderr.write(`SearchSocket MCP HTTP server listening on http://127.0.0.1:${port}${endpointPath}
21980
+ const instance = app.listen(port, host, () => {
21981
+ process.stderr.write(`SearchSocket MCP HTTP server listening on http://${host}:${port}${endpointPath}
20318
21982
  `);
21983
+ if (isPublic) {
21984
+ process.stderr.write("WARNING: Server is in public mode. Ensure HTTPS is configured via a reverse proxy for production use.\n");
21985
+ }
20319
21986
  resolve();
20320
21987
  });
20321
21988
  instance.once("error", reject);
@@ -20330,6 +21997,13 @@ async function runMcpServer(options = {}) {
20330
21997
  cwd: options.cwd,
20331
21998
  configPath: options.configPath
20332
21999
  });
22000
+ if (options.access) config.mcp.access = options.access;
22001
+ if (options.apiKey) config.mcp.http.apiKey = options.apiKey;
22002
+ if (config.mcp.access === "public" && !resolveApiKey(config)) {
22003
+ throw new Error(
22004
+ 'MCP access is "public" but no API key is configured. Pass --api-key or set mcp.http.apiKey / mcp.http.apiKeyEnv in config.'
22005
+ );
22006
+ }
20333
22007
  const resolvedTransport = options.transport ?? config.mcp.transport;
20334
22008
  if (resolvedTransport === "stdio") {
20335
22009
  redirectConsoleToStderr();
@@ -20347,8 +22021,6 @@ async function runMcpServer(options = {}) {
20347
22021
  const stdioTransport = new stdio_js.StdioServerTransport();
20348
22022
  await server.connect(stdioTransport);
20349
22023
  }
20350
-
20351
- // src/sveltekit/handle.ts
20352
22024
  var InMemoryRateLimiter = class {
20353
22025
  constructor(windowMs, max) {
20354
22026
  this.windowMs = windowMs;
@@ -20376,7 +22048,13 @@ function searchsocketHandle(options = {}) {
20376
22048
  let enginePromise = null;
20377
22049
  let configPromise = null;
20378
22050
  let apiPath = options.path;
22051
+ let llmsServePath = null;
22052
+ let serveMarkdownVariants = false;
22053
+ let mcpPath;
22054
+ let mcpApiKey;
22055
+ let mcpEnableJsonResponse = true;
20379
22056
  let rateLimiter = null;
22057
+ let notConfigured = false;
20380
22058
  const getConfig = async () => {
20381
22059
  if (!configPromise) {
20382
22060
  let configP;
@@ -20393,6 +22071,13 @@ function searchsocketHandle(options = {}) {
20393
22071
  }
20394
22072
  configPromise = configP.then((config) => {
20395
22073
  apiPath = apiPath ?? config.api.path;
22074
+ mcpPath = config.mcp.handle.path;
22075
+ mcpApiKey = config.mcp.handle.apiKey;
22076
+ mcpEnableJsonResponse = config.mcp.handle.enableJsonResponse;
22077
+ if (config.llmsTxt.enable) {
22078
+ llmsServePath = "/" + config.llmsTxt.outputPath.replace(/^static\//, "");
22079
+ serveMarkdownVariants = config.llmsTxt.serveMarkdownVariants;
22080
+ }
20396
22081
  if (config.api.rateLimit && !isServerless()) {
20397
22082
  rateLimiter = new InMemoryRateLimiter(config.api.rateLimit.windowMs, config.api.rateLimit.max);
20398
22083
  }
@@ -20402,59 +22087,109 @@ function searchsocketHandle(options = {}) {
20402
22087
  return configPromise;
20403
22088
  };
20404
22089
  const getEngine = async () => {
22090
+ if (notConfigured) {
22091
+ throw new SearchSocketError(
22092
+ "SEARCH_NOT_CONFIGURED",
22093
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
22094
+ 503
22095
+ );
22096
+ }
20405
22097
  if (!enginePromise) {
20406
22098
  const config = await getConfig();
20407
22099
  enginePromise = SearchEngine.create({
20408
22100
  cwd: options.cwd,
20409
22101
  config
22102
+ }).catch((error) => {
22103
+ enginePromise = null;
22104
+ if (error instanceof SearchSocketError && error.code === "VECTOR_BACKEND_UNAVAILABLE") {
22105
+ notConfigured = true;
22106
+ throw new SearchSocketError(
22107
+ "SEARCH_NOT_CONFIGURED",
22108
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
22109
+ 503
22110
+ );
22111
+ }
22112
+ throw error;
20410
22113
  });
20411
22114
  }
20412
22115
  return enginePromise;
20413
22116
  };
20414
22117
  const bodyLimit = options.maxBodyBytes ?? 64 * 1024;
20415
22118
  return async ({ event, resolve }) => {
20416
- if (apiPath && event.url.pathname !== apiPath) {
20417
- return resolve(event);
22119
+ if (apiPath && !isApiPath(event.url.pathname, apiPath) && event.url.pathname !== llmsServePath) {
22120
+ const isMarkdownVariant = event.request.method === "GET" && event.url.pathname.endsWith(".md");
22121
+ if (mcpPath && event.url.pathname === mcpPath) {
22122
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22123
+ }
22124
+ if (mcpPath) {
22125
+ if (serveMarkdownVariants && isMarkdownVariant) ; else {
22126
+ return resolve(event);
22127
+ }
22128
+ } else {
22129
+ if (configPromise || options.config || options.rawConfig) {
22130
+ await getConfig();
22131
+ if (mcpPath && event.url.pathname === mcpPath) {
22132
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22133
+ }
22134
+ if (!(serveMarkdownVariants && isMarkdownVariant)) {
22135
+ return resolve(event);
22136
+ }
22137
+ } else {
22138
+ return resolve(event);
22139
+ }
22140
+ }
20418
22141
  }
20419
22142
  const config = await getConfig();
22143
+ if (llmsServePath && event.request.method === "GET" && event.url.pathname === llmsServePath) {
22144
+ const cwd = options.cwd ?? process.cwd();
22145
+ const filePath = path__default.default.resolve(cwd, config.llmsTxt.outputPath);
22146
+ try {
22147
+ const content = await fs8__default.default.readFile(filePath, "utf8");
22148
+ return new Response(content, {
22149
+ status: 200,
22150
+ headers: { "content-type": "text/plain; charset=utf-8" }
22151
+ });
22152
+ } catch {
22153
+ return resolve(event);
22154
+ }
22155
+ }
22156
+ if (serveMarkdownVariants && event.request.method === "GET" && event.url.pathname.endsWith(".md")) {
22157
+ let rawPath;
22158
+ try {
22159
+ rawPath = decodeURIComponent(event.url.pathname.slice(0, -3));
22160
+ } catch {
22161
+ return resolve(event);
22162
+ }
22163
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
22164
+ try {
22165
+ const engine = await getEngine();
22166
+ const page = await engine.getPage(rawPath, scope);
22167
+ return new Response(page.markdown, {
22168
+ status: 200,
22169
+ headers: { "content-type": "text/markdown; charset=utf-8" }
22170
+ });
22171
+ } catch (error) {
22172
+ if (error instanceof SearchSocketError && error.status === 404) {
22173
+ return resolve(event);
22174
+ }
22175
+ throw error;
22176
+ }
22177
+ }
22178
+ if (mcpPath && event.url.pathname === mcpPath) {
22179
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22180
+ }
20420
22181
  const targetPath = apiPath ?? config.api.path;
20421
- if (event.url.pathname !== targetPath) {
22182
+ if (!isApiPath(event.url.pathname, targetPath)) {
20422
22183
  return resolve(event);
20423
22184
  }
20424
- if (event.request.method === "OPTIONS") {
22185
+ const subPath = event.url.pathname.slice(targetPath.length);
22186
+ const method = event.request.method;
22187
+ if (method === "OPTIONS") {
20425
22188
  return new Response(null, {
20426
22189
  status: 204,
20427
22190
  headers: buildCorsHeaders(event.request, config)
20428
22191
  });
20429
22192
  }
20430
- if (event.request.method !== "POST") {
20431
- return withCors(
20432
- new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
20433
- status: 405,
20434
- headers: {
20435
- "content-type": "application/json"
20436
- }
20437
- }),
20438
- event.request,
20439
- config
20440
- );
20441
- }
20442
- const contentLength = Number(event.request.headers.get("content-length") ?? 0);
20443
- if (contentLength > bodyLimit) {
20444
- return withCors(
20445
- new Response(
20446
- JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Request body too large", 413))),
20447
- {
20448
- status: 413,
20449
- headers: {
20450
- "content-type": "application/json"
20451
- }
20452
- }
20453
- ),
20454
- event.request,
20455
- config
20456
- );
20457
- }
20458
22193
  if (rateLimiter) {
20459
22194
  const ip = event.getClientAddress?.() ?? event.request.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? "unknown";
20460
22195
  if (!rateLimiter.check(ip)) {
@@ -20474,39 +22209,32 @@ function searchsocketHandle(options = {}) {
20474
22209
  }
20475
22210
  }
20476
22211
  try {
20477
- let rawBody;
20478
- if (typeof event.request.text === "function") {
20479
- rawBody = await event.request.text();
20480
- } else {
20481
- let parsedFallback;
20482
- try {
20483
- parsedFallback = await event.request.json();
20484
- } catch (error) {
20485
- if (error instanceof SyntaxError) {
20486
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
20487
- }
20488
- throw error;
22212
+ if (method === "GET") {
22213
+ if (subPath === "" || subPath === "/") {
22214
+ return await handleGetSearch(event, config, getEngine);
20489
22215
  }
20490
- rawBody = JSON.stringify(parsedFallback);
22216
+ if (subPath === "/health") {
22217
+ return await handleGetHealth(event, config, getEngine);
22218
+ }
22219
+ if (subPath.startsWith("/pages/")) {
22220
+ return await handleGetPage(event, config, getEngine, subPath);
22221
+ }
22222
+ return withCors(
22223
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Not found", 404))), {
22224
+ status: 404,
22225
+ headers: { "content-type": "application/json" }
22226
+ }),
22227
+ event.request,
22228
+ config
22229
+ );
20491
22230
  }
20492
- if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
20493
- throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22231
+ if (method === "POST" && (subPath === "" || subPath === "/")) {
22232
+ return await handlePostSearch(event, config, getEngine, bodyLimit);
20494
22233
  }
20495
- let body;
20496
- try {
20497
- body = JSON.parse(rawBody);
20498
- } catch {
20499
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
20500
- }
20501
- const engine = await getEngine();
20502
- const searchRequest = body;
20503
- const result = await engine.search(searchRequest);
20504
22234
  return withCors(
20505
- new Response(JSON.stringify(result), {
20506
- status: 200,
20507
- headers: {
20508
- "content-type": "application/json"
20509
- }
22235
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
22236
+ status: 405,
22237
+ headers: { "content-type": "application/json" }
20510
22238
  }),
20511
22239
  event.request,
20512
22240
  config
@@ -20527,6 +22255,183 @@ function searchsocketHandle(options = {}) {
20527
22255
  }
20528
22256
  };
20529
22257
  }
22258
+ function isApiPath(pathname, apiPath) {
22259
+ return pathname === apiPath || pathname.startsWith(apiPath + "/");
22260
+ }
22261
+ async function handleGetSearch(event, config, getEngine) {
22262
+ const params = event.url.searchParams;
22263
+ const q = params.get("q");
22264
+ if (!q || q.trim() === "") {
22265
+ throw new SearchSocketError("INVALID_REQUEST", "Missing required query parameter: q", 400);
22266
+ }
22267
+ const searchRequest = { q };
22268
+ const topK = params.get("topK");
22269
+ if (topK !== null) {
22270
+ const parsed = Number.parseInt(topK, 10);
22271
+ if (Number.isNaN(parsed) || parsed < 1) {
22272
+ throw new SearchSocketError("INVALID_REQUEST", "topK must be a positive integer", 400);
22273
+ }
22274
+ searchRequest.topK = parsed;
22275
+ }
22276
+ const scope = params.get("scope");
22277
+ if (scope !== null) searchRequest.scope = scope;
22278
+ const pathPrefix = params.get("pathPrefix");
22279
+ if (pathPrefix !== null) searchRequest.pathPrefix = pathPrefix;
22280
+ const groupBy = params.get("groupBy");
22281
+ if (groupBy) {
22282
+ if (groupBy !== "page" && groupBy !== "chunk") {
22283
+ throw new SearchSocketError("INVALID_REQUEST", 'groupBy must be "page" or "chunk"', 400);
22284
+ }
22285
+ searchRequest.groupBy = groupBy;
22286
+ }
22287
+ const maxSubResults = params.get("maxSubResults");
22288
+ if (maxSubResults !== null) {
22289
+ const parsed = Number.parseInt(maxSubResults, 10);
22290
+ if (Number.isNaN(parsed) || parsed < 1 || parsed > 20) {
22291
+ throw new SearchSocketError("INVALID_REQUEST", "maxSubResults must be a positive integer between 1 and 20", 400);
22292
+ }
22293
+ searchRequest.maxSubResults = parsed;
22294
+ }
22295
+ const tags = params.getAll("tags");
22296
+ if (tags.length > 0) searchRequest.tags = tags;
22297
+ const engine = await getEngine();
22298
+ const result = await engine.search(searchRequest);
22299
+ return withCors(
22300
+ new Response(JSON.stringify(result), {
22301
+ status: 200,
22302
+ headers: { "content-type": "application/json" }
22303
+ }),
22304
+ event.request,
22305
+ config
22306
+ );
22307
+ }
22308
+ async function handleGetHealth(event, config, getEngine) {
22309
+ const engine = await getEngine();
22310
+ const result = await engine.health();
22311
+ return withCors(
22312
+ new Response(JSON.stringify(result), {
22313
+ status: 200,
22314
+ headers: { "content-type": "application/json" }
22315
+ }),
22316
+ event.request,
22317
+ config
22318
+ );
22319
+ }
22320
+ async function handleGetPage(event, config, getEngine, subPath) {
22321
+ const rawPath = subPath.slice("/pages".length);
22322
+ let pagePath;
22323
+ try {
22324
+ pagePath = decodeURIComponent(rawPath);
22325
+ } catch {
22326
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed page path", 400);
22327
+ }
22328
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
22329
+ const engine = await getEngine();
22330
+ const result = await engine.getPage(pagePath, scope);
22331
+ return withCors(
22332
+ new Response(JSON.stringify(result), {
22333
+ status: 200,
22334
+ headers: { "content-type": "application/json" }
22335
+ }),
22336
+ event.request,
22337
+ config
22338
+ );
22339
+ }
22340
+ async function handlePostSearch(event, config, getEngine, bodyLimit) {
22341
+ const contentLength = Number(event.request.headers.get("content-length") ?? 0);
22342
+ if (contentLength > bodyLimit) {
22343
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22344
+ }
22345
+ let rawBody;
22346
+ if (typeof event.request.text === "function") {
22347
+ rawBody = await event.request.text();
22348
+ } else {
22349
+ let parsedFallback;
22350
+ try {
22351
+ parsedFallback = await event.request.json();
22352
+ } catch (error) {
22353
+ if (error instanceof SyntaxError) {
22354
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22355
+ }
22356
+ throw error;
22357
+ }
22358
+ rawBody = JSON.stringify(parsedFallback);
22359
+ }
22360
+ if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
22361
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22362
+ }
22363
+ let body;
22364
+ try {
22365
+ body = JSON.parse(rawBody);
22366
+ } catch {
22367
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22368
+ }
22369
+ const engine = await getEngine();
22370
+ const searchRequest = body;
22371
+ const result = await engine.search(searchRequest);
22372
+ return withCors(
22373
+ new Response(JSON.stringify(result), {
22374
+ status: 200,
22375
+ headers: { "content-type": "application/json" }
22376
+ }),
22377
+ event.request,
22378
+ config
22379
+ );
22380
+ }
22381
+ async function handleMcpRequest(event, apiKey, enableJsonResponse, getEngine) {
22382
+ if (apiKey) {
22383
+ const authHeader = event.request.headers.get("authorization") ?? "";
22384
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
22385
+ const tokenBuf = Buffer.from(token);
22386
+ const keyBuf = Buffer.from(apiKey);
22387
+ if (tokenBuf.length !== keyBuf.length || !crypto.timingSafeEqual(tokenBuf, keyBuf)) {
22388
+ return new Response(
22389
+ JSON.stringify({
22390
+ jsonrpc: "2.0",
22391
+ error: { code: -32001, message: "Unauthorized" },
22392
+ id: null
22393
+ }),
22394
+ { status: 401, headers: { "content-type": "application/json" } }
22395
+ );
22396
+ }
22397
+ }
22398
+ const transport = new webStandardStreamableHttp_js.WebStandardStreamableHTTPServerTransport({
22399
+ sessionIdGenerator: void 0,
22400
+ enableJsonResponse
22401
+ });
22402
+ let server;
22403
+ try {
22404
+ const engine = await getEngine();
22405
+ server = createServer(engine);
22406
+ await server.connect(transport);
22407
+ const response = await transport.handleRequest(event.request);
22408
+ if (enableJsonResponse) {
22409
+ await transport.close();
22410
+ await server.close();
22411
+ }
22412
+ return response;
22413
+ } catch (error) {
22414
+ try {
22415
+ await transport.close();
22416
+ } catch {
22417
+ }
22418
+ try {
22419
+ await server?.close();
22420
+ } catch {
22421
+ }
22422
+ return new Response(
22423
+ JSON.stringify({
22424
+ jsonrpc: "2.0",
22425
+ error: {
22426
+ code: -32603,
22427
+ message: error instanceof Error ? error.message : "Internal server error"
22428
+ },
22429
+ id: null
22430
+ }),
22431
+ { status: 500, headers: { "content-type": "application/json" } }
22432
+ );
22433
+ }
22434
+ }
20530
22435
  function buildCorsHeaders(request, config) {
20531
22436
  const allowOrigins = config.api.cors.allowOrigins;
20532
22437
  if (!allowOrigins || allowOrigins.length === 0) {
@@ -20539,7 +22444,7 @@ function buildCorsHeaders(request, config) {
20539
22444
  }
20540
22445
  return {
20541
22446
  "access-control-allow-origin": allowOrigins.includes("*") ? "*" : origin,
20542
- "access-control-allow-methods": "POST, OPTIONS",
22447
+ "access-control-allow-methods": "GET, POST, OPTIONS",
20543
22448
  "access-control-allow-headers": "content-type"
20544
22449
  };
20545
22450
  }
@@ -20575,9 +22480,6 @@ function shouldRunAutoIndex(options) {
20575
22480
  if (explicit && /^(1|true|yes)$/i.test(explicit)) {
20576
22481
  return true;
20577
22482
  }
20578
- if (process.env.CI && /^(1|true)$/i.test(process.env.CI)) {
20579
- return true;
20580
- }
20581
22483
  return false;
20582
22484
  }
20583
22485
  function searchsocketVitePlugin(options = {}) {
@@ -20602,7 +22504,8 @@ function searchsocketVitePlugin(options = {}) {
20602
22504
  const pipeline = await IndexPipeline.create({
20603
22505
  cwd,
20604
22506
  configPath: options.configPath,
20605
- logger: logger3
22507
+ logger: logger3,
22508
+ hooks: options.hooks
20606
22509
  });
20607
22510
  const stats = await pipeline.run({
20608
22511
  changedOnly: options.changedOnly ?? true,