searchsocket 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/sveltekit.js CHANGED
@@ -1,14 +1,20 @@
1
- import fs from 'fs';
1
+ import { timingSafeEqual, createHash } from 'crypto';
2
+ import fs9 from 'fs/promises';
2
3
  import path from 'path';
4
+ import { WebStandardStreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js';
5
+ import fs from 'fs';
3
6
  import { createJiti } from 'jiti';
4
7
  import { z } from 'zod';
8
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
9
+ import '@modelcontextprotocol/sdk/server/stdio.js';
10
+ import '@modelcontextprotocol/sdk/server/streamableHttp.js';
11
+ import '@modelcontextprotocol/sdk/server/express.js';
5
12
  import { execSync, spawn } from 'child_process';
6
- import { createHash } from 'crypto';
13
+ import { FusionAlgorithm, QueryMode } from '@upstash/vector';
7
14
  import { load } from 'cheerio';
8
15
  import matter from 'gray-matter';
9
16
  import fg from 'fast-glob';
10
17
  import pLimit from 'p-limit';
11
- import fs3 from 'fs/promises';
12
18
  import net from 'net';
13
19
  import { gunzipSync } from 'zlib';
14
20
 
@@ -5009,32 +5015,32 @@ var require_URL = __commonJS({
5009
5015
  else
5010
5016
  return basepath.substring(0, lastslash + 1) + refpath;
5011
5017
  }
5012
- function remove_dot_segments(path13) {
5013
- if (!path13) return path13;
5018
+ function remove_dot_segments(path14) {
5019
+ if (!path14) return path14;
5014
5020
  var output = "";
5015
- while (path13.length > 0) {
5016
- if (path13 === "." || path13 === "..") {
5017
- path13 = "";
5021
+ while (path14.length > 0) {
5022
+ if (path14 === "." || path14 === "..") {
5023
+ path14 = "";
5018
5024
  break;
5019
5025
  }
5020
- var twochars = path13.substring(0, 2);
5021
- var threechars = path13.substring(0, 3);
5022
- var fourchars = path13.substring(0, 4);
5026
+ var twochars = path14.substring(0, 2);
5027
+ var threechars = path14.substring(0, 3);
5028
+ var fourchars = path14.substring(0, 4);
5023
5029
  if (threechars === "../") {
5024
- path13 = path13.substring(3);
5030
+ path14 = path14.substring(3);
5025
5031
  } else if (twochars === "./") {
5026
- path13 = path13.substring(2);
5032
+ path14 = path14.substring(2);
5027
5033
  } else if (threechars === "/./") {
5028
- path13 = "/" + path13.substring(3);
5029
- } else if (twochars === "/." && path13.length === 2) {
5030
- path13 = "/";
5031
- } else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
5032
- path13 = "/" + path13.substring(4);
5034
+ path14 = "/" + path14.substring(3);
5035
+ } else if (twochars === "/." && path14.length === 2) {
5036
+ path14 = "/";
5037
+ } else if (fourchars === "/../" || threechars === "/.." && path14.length === 3) {
5038
+ path14 = "/" + path14.substring(4);
5033
5039
  output = output.replace(/\/?[^\/]*$/, "");
5034
5040
  } else {
5035
- var segment = path13.match(/(\/?([^\/]*))/)[0];
5041
+ var segment = path14.match(/(\/?([^\/]*))/)[0];
5036
5042
  output += segment;
5037
- path13 = path13.substring(segment.length);
5043
+ path14 = path14.substring(segment.length);
5038
5044
  }
5039
5045
  }
5040
5046
  return output;
@@ -16630,6 +16636,7 @@ var searchSocketConfigSchema = z.object({
16630
16636
  dropSelectors: z.array(z.string()).optional(),
16631
16637
  ignoreAttr: z.string().optional(),
16632
16638
  noindexAttr: z.string().optional(),
16639
+ imageDescAttr: z.string().optional(),
16633
16640
  respectRobotsNoindex: z.boolean().optional()
16634
16641
  }).optional(),
16635
16642
  transform: z.object({
@@ -16645,35 +16652,48 @@ var searchSocketConfigSchema = z.object({
16645
16652
  headingPathDepth: z.number().int().positive().optional(),
16646
16653
  dontSplitInside: z.array(z.enum(["code", "table", "blockquote"])).optional(),
16647
16654
  prependTitle: z.boolean().optional(),
16648
- pageSummaryChunk: z.boolean().optional()
16655
+ pageSummaryChunk: z.boolean().optional(),
16656
+ weightHeadings: z.boolean().optional()
16649
16657
  }).optional(),
16650
16658
  upstash: z.object({
16651
16659
  url: z.string().url().optional(),
16652
16660
  token: z.string().min(1).optional(),
16653
16661
  urlEnv: z.string().min(1).optional(),
16654
- tokenEnv: z.string().min(1).optional()
16662
+ tokenEnv: z.string().min(1).optional(),
16663
+ namespaces: z.object({
16664
+ pages: z.string().min(1).optional(),
16665
+ chunks: z.string().min(1).optional()
16666
+ }).optional()
16667
+ }).optional(),
16668
+ embedding: z.object({
16669
+ model: z.string().optional(),
16670
+ dimensions: z.number().int().positive().optional(),
16671
+ taskType: z.string().optional(),
16672
+ batchSize: z.number().int().positive().optional()
16655
16673
  }).optional(),
16656
16674
  search: z.object({
16657
- semanticWeight: z.number().min(0).max(1).optional(),
16658
- inputEnrichment: z.boolean().optional(),
16659
- reranking: z.boolean().optional(),
16660
16675
  dualSearch: z.boolean().optional(),
16661
16676
  pageSearchWeight: z.number().min(0).max(1).optional()
16662
16677
  }).optional(),
16663
16678
  ranking: z.object({
16664
16679
  enableIncomingLinkBoost: z.boolean().optional(),
16665
16680
  enableDepthBoost: z.boolean().optional(),
16681
+ enableFreshnessBoost: z.boolean().optional(),
16682
+ freshnessDecayRate: z.number().positive().optional(),
16683
+ enableAnchorTextBoost: z.boolean().optional(),
16666
16684
  pageWeights: z.record(z.string(), z.number().min(0)).optional(),
16667
16685
  aggregationCap: z.number().int().positive().optional(),
16668
16686
  aggregationDecay: z.number().min(0).max(1).optional(),
16669
16687
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
16670
- minScore: z.number().min(0).max(1).optional(),
16688
+ minScoreRatio: z.number().min(0).max(1).optional(),
16671
16689
  scoreGapThreshold: z.number().min(0).max(1).optional(),
16672
16690
  weights: z.object({
16673
16691
  incomingLinks: z.number().optional(),
16674
16692
  depth: z.number().optional(),
16675
16693
  aggregation: z.number().optional(),
16676
- titleMatch: z.number().optional()
16694
+ titleMatch: z.number().optional(),
16695
+ freshness: z.number().optional(),
16696
+ anchorText: z.number().optional()
16677
16697
  }).optional()
16678
16698
  }).optional(),
16679
16699
  api: z.object({
@@ -16688,12 +16708,28 @@ var searchSocketConfigSchema = z.object({
16688
16708
  }).optional(),
16689
16709
  mcp: z.object({
16690
16710
  enable: z.boolean().optional(),
16711
+ access: z.enum(["public", "private"]).optional(),
16691
16712
  transport: z.enum(["stdio", "http"]).optional(),
16692
16713
  http: z.object({
16693
16714
  port: z.number().int().positive().optional(),
16694
- path: z.string().optional()
16715
+ path: z.string().optional(),
16716
+ apiKey: z.string().min(1).optional(),
16717
+ apiKeyEnv: z.string().min(1).optional()
16718
+ }).optional(),
16719
+ handle: z.object({
16720
+ path: z.string().optional(),
16721
+ apiKey: z.string().min(1).optional(),
16722
+ enableJsonResponse: z.boolean().optional()
16695
16723
  }).optional()
16696
16724
  }).optional(),
16725
+ llmsTxt: z.object({
16726
+ enable: z.boolean().optional(),
16727
+ outputPath: z.string().optional(),
16728
+ title: z.string().optional(),
16729
+ description: z.string().optional(),
16730
+ generateFull: z.boolean().optional(),
16731
+ serveMarkdownVariants: z.boolean().optional()
16732
+ }).optional(),
16697
16733
  state: z.object({
16698
16734
  dir: z.string().optional()
16699
16735
  }).optional()
@@ -16732,6 +16768,7 @@ function createDefaultConfig(projectId) {
16732
16768
  dropSelectors: DEFAULT_DROP_SELECTORS,
16733
16769
  ignoreAttr: "data-search-ignore",
16734
16770
  noindexAttr: "data-search-noindex",
16771
+ imageDescAttr: "data-search-description",
16735
16772
  respectRobotsNoindex: true
16736
16773
  },
16737
16774
  transform: {
@@ -16741,39 +16778,52 @@ function createDefaultConfig(projectId) {
16741
16778
  },
16742
16779
  chunking: {
16743
16780
  strategy: "hybrid",
16744
- maxChars: 2200,
16781
+ maxChars: 1500,
16745
16782
  overlapChars: 200,
16746
16783
  minChars: 250,
16747
16784
  headingPathDepth: 3,
16748
16785
  dontSplitInside: ["code", "table", "blockquote"],
16749
16786
  prependTitle: true,
16750
- pageSummaryChunk: true
16787
+ pageSummaryChunk: true,
16788
+ weightHeadings: true
16751
16789
  },
16752
16790
  upstash: {
16753
- urlEnv: "UPSTASH_SEARCH_REST_URL",
16754
- tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
16791
+ urlEnv: "UPSTASH_VECTOR_REST_URL",
16792
+ tokenEnv: "UPSTASH_VECTOR_REST_TOKEN",
16793
+ namespaces: {
16794
+ pages: "pages",
16795
+ chunks: "chunks"
16796
+ }
16797
+ },
16798
+ embedding: {
16799
+ model: "bge-large-en-v1.5",
16800
+ dimensions: 1024,
16801
+ taskType: "RETRIEVAL_DOCUMENT",
16802
+ batchSize: 100
16755
16803
  },
16756
16804
  search: {
16757
- semanticWeight: 0.75,
16758
- inputEnrichment: true,
16759
- reranking: true,
16760
16805
  dualSearch: true,
16761
16806
  pageSearchWeight: 0.3
16762
16807
  },
16763
16808
  ranking: {
16764
16809
  enableIncomingLinkBoost: true,
16765
16810
  enableDepthBoost: true,
16811
+ enableFreshnessBoost: false,
16812
+ freshnessDecayRate: 1e-3,
16813
+ enableAnchorTextBoost: false,
16766
16814
  pageWeights: {},
16767
16815
  aggregationCap: 5,
16768
16816
  aggregationDecay: 0.5,
16769
16817
  minChunkScoreRatio: 0.5,
16770
- minScore: 0.3,
16818
+ minScoreRatio: 0.7,
16771
16819
  scoreGapThreshold: 0.4,
16772
16820
  weights: {
16773
16821
  incomingLinks: 0.05,
16774
16822
  depth: 0.03,
16775
16823
  aggregation: 0.1,
16776
- titleMatch: 0.15
16824
+ titleMatch: 0.15,
16825
+ freshness: 0.1,
16826
+ anchorText: 0.1
16777
16827
  }
16778
16828
  },
16779
16829
  api: {
@@ -16784,12 +16834,23 @@ function createDefaultConfig(projectId) {
16784
16834
  },
16785
16835
  mcp: {
16786
16836
  enable: process.env.NODE_ENV !== "production",
16837
+ access: "private",
16787
16838
  transport: "stdio",
16788
16839
  http: {
16789
16840
  port: 3338,
16790
16841
  path: "/mcp"
16842
+ },
16843
+ handle: {
16844
+ path: "/api/mcp",
16845
+ enableJsonResponse: true
16791
16846
  }
16792
16847
  },
16848
+ llmsTxt: {
16849
+ enable: false,
16850
+ outputPath: "static/llms.txt",
16851
+ generateFull: true,
16852
+ serveMarkdownVariants: false
16853
+ },
16793
16854
  state: {
16794
16855
  dir: ".searchsocket"
16795
16856
  }
@@ -16917,7 +16978,15 @@ ${issues}`
16917
16978
  },
16918
16979
  upstash: {
16919
16980
  ...defaults.upstash,
16920
- ...parsed.upstash
16981
+ ...parsed.upstash,
16982
+ namespaces: {
16983
+ ...defaults.upstash.namespaces,
16984
+ ...parsed.upstash?.namespaces
16985
+ }
16986
+ },
16987
+ embedding: {
16988
+ ...defaults.embedding,
16989
+ ...parsed.embedding
16921
16990
  },
16922
16991
  search: {
16923
16992
  ...defaults.search,
@@ -16954,8 +17023,16 @@ ${issues}`
16954
17023
  http: {
16955
17024
  ...defaults.mcp.http,
16956
17025
  ...parsed.mcp?.http
17026
+ },
17027
+ handle: {
17028
+ ...defaults.mcp.handle,
17029
+ ...parsed.mcp?.handle
16957
17030
  }
16958
17031
  },
17032
+ llmsTxt: {
17033
+ ...defaults.llmsTxt,
17034
+ ...parsed.llmsTxt
17035
+ },
16959
17036
  state: {
16960
17037
  ...defaults.state,
16961
17038
  ...parsed.state
@@ -16975,6 +17052,15 @@ ${issues}`
16975
17052
  maxDepth: 10
16976
17053
  };
16977
17054
  }
17055
+ if (merged.mcp.access === "public") {
17056
+ const resolvedKey = merged.mcp.http.apiKey ?? (merged.mcp.http.apiKeyEnv ? process.env[merged.mcp.http.apiKeyEnv] : void 0);
17057
+ if (!resolvedKey) {
17058
+ throw new SearchSocketError(
17059
+ "CONFIG_MISSING",
17060
+ '`mcp.access` is "public" but no API key is configured. Set `mcp.http.apiKey` or `mcp.http.apiKeyEnv`.'
17061
+ );
17062
+ }
17063
+ }
16978
17064
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
16979
17065
  throw new SearchSocketError("CONFIG_MISSING", "`source.crawl.baseUrl` is required when source.mode is crawl.");
16980
17066
  }
@@ -17023,13 +17109,84 @@ function normalizeMarkdown(input) {
17023
17109
  function sanitizeScopeName(scopeName) {
17024
17110
  return scopeName.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
17025
17111
  }
17112
+ function markdownToPlain(markdown) {
17113
+ return markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17114
+ }
17026
17115
  function toSnippet(markdown, maxLen = 220) {
17027
- const plain = markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17116
+ const plain = markdownToPlain(markdown);
17028
17117
  if (plain.length <= maxLen) {
17029
17118
  return plain;
17030
17119
  }
17031
17120
  return `${plain.slice(0, Math.max(0, maxLen - 1)).trim()}\u2026`;
17032
17121
  }
17122
+ function queryAwareExcerpt(markdown, query, maxLen = 220) {
17123
+ const plain = markdownToPlain(markdown);
17124
+ if (plain.length <= maxLen) return plain;
17125
+ const tokens = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
17126
+ if (tokens.length === 0) return toSnippet(markdown, maxLen);
17127
+ const positions = [];
17128
+ for (let ti = 0; ti < tokens.length; ti++) {
17129
+ const escaped = tokens[ti].replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
17130
+ const re = new RegExp(escaped, "gi");
17131
+ let m;
17132
+ while ((m = re.exec(plain)) !== null) {
17133
+ positions.push({ start: m.index, end: m.index + m[0].length, tokenIdx: ti });
17134
+ }
17135
+ }
17136
+ if (positions.length === 0) return toSnippet(markdown, maxLen);
17137
+ positions.sort((a, b) => a.start - b.start);
17138
+ let bestUniqueCount = 0;
17139
+ let bestTotalCount = 0;
17140
+ let bestLeft = 0;
17141
+ let bestRight = 0;
17142
+ let left = 0;
17143
+ const tokenCounts = /* @__PURE__ */ new Map();
17144
+ for (let right = 0; right < positions.length; right++) {
17145
+ tokenCounts.set(positions[right].tokenIdx, (tokenCounts.get(positions[right].tokenIdx) ?? 0) + 1);
17146
+ while (positions[right].end - positions[left].start > maxLen && left < right) {
17147
+ const leftToken = positions[left].tokenIdx;
17148
+ const cnt = tokenCounts.get(leftToken) - 1;
17149
+ if (cnt === 0) tokenCounts.delete(leftToken);
17150
+ else tokenCounts.set(leftToken, cnt);
17151
+ left++;
17152
+ }
17153
+ const uniqueCount = tokenCounts.size;
17154
+ const totalCount = right - left + 1;
17155
+ if (uniqueCount > bestUniqueCount || uniqueCount === bestUniqueCount && totalCount > bestTotalCount) {
17156
+ bestUniqueCount = uniqueCount;
17157
+ bestTotalCount = totalCount;
17158
+ bestLeft = left;
17159
+ bestRight = right;
17160
+ }
17161
+ }
17162
+ const mid = Math.floor((positions[bestLeft].start + positions[bestRight].end) / 2);
17163
+ let start = Math.max(0, mid - Math.floor(maxLen / 2));
17164
+ let end = Math.min(plain.length, start + maxLen);
17165
+ start = Math.max(0, end - maxLen);
17166
+ if (start > 0) {
17167
+ const spaceIdx = plain.lastIndexOf(" ", start);
17168
+ if (spaceIdx > start - 30) {
17169
+ start = spaceIdx + 1;
17170
+ }
17171
+ }
17172
+ if (end < plain.length) {
17173
+ const spaceIdx = plain.indexOf(" ", end);
17174
+ if (spaceIdx !== -1 && spaceIdx < end + 30) {
17175
+ end = spaceIdx;
17176
+ }
17177
+ }
17178
+ let excerpt = plain.slice(start, end);
17179
+ if (excerpt.length > Math.ceil(maxLen * 1.2)) {
17180
+ excerpt = excerpt.slice(0, maxLen);
17181
+ const lastSpace = excerpt.lastIndexOf(" ");
17182
+ if (lastSpace > maxLen * 0.5) {
17183
+ excerpt = excerpt.slice(0, lastSpace);
17184
+ }
17185
+ }
17186
+ const prefix = start > 0 ? "\u2026" : "";
17187
+ const suffix = end < plain.length ? "\u2026" : "";
17188
+ return `${prefix}${excerpt}${suffix}`;
17189
+ }
17033
17190
  function extractFirstParagraph(markdown) {
17034
17191
  const lines = markdown.split("\n");
17035
17192
  let inFence = false;
@@ -17136,162 +17293,342 @@ function joinUrl(baseUrl, route) {
17136
17293
  const routePart = ensureLeadingSlash(route);
17137
17294
  return `${base}${routePart}`;
17138
17295
  }
17139
-
17140
- // src/vector/upstash.ts
17141
- function chunkIndexName(scope) {
17142
- return `${scope.projectId}--${scope.scopeName}`;
17143
- }
17144
- function pageIndexName(scope) {
17145
- return `${scope.projectId}--${scope.scopeName}--pages`;
17146
- }
17147
17296
  var UpstashSearchStore = class {
17148
- client;
17297
+ index;
17298
+ pagesNs;
17299
+ chunksNs;
17149
17300
  constructor(opts) {
17150
- this.client = opts.client;
17151
- }
17152
- chunkIndex(scope) {
17153
- return this.client.index(chunkIndexName(scope));
17154
- }
17155
- pageIndex(scope) {
17156
- return this.client.index(pageIndexName(scope));
17301
+ this.index = opts.index;
17302
+ this.pagesNs = opts.index.namespace(opts.pagesNamespace);
17303
+ this.chunksNs = opts.index.namespace(opts.chunksNamespace);
17157
17304
  }
17158
17305
  async upsertChunks(chunks, scope) {
17159
17306
  if (chunks.length === 0) return;
17160
- const index = this.chunkIndex(scope);
17161
- const BATCH_SIZE = 100;
17307
+ const BATCH_SIZE = 90;
17162
17308
  for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
17163
17309
  const batch = chunks.slice(i, i + BATCH_SIZE);
17164
- await index.upsert(batch);
17165
- }
17166
- }
17167
- async search(query, opts, scope) {
17168
- const index = this.chunkIndex(scope);
17169
- const results = await index.search({
17170
- query,
17171
- limit: opts.limit,
17172
- semanticWeight: opts.semanticWeight,
17173
- inputEnrichment: opts.inputEnrichment,
17174
- reranking: opts.reranking,
17175
- filter: opts.filter
17310
+ await this.chunksNs.upsert(
17311
+ batch.map((c) => ({
17312
+ id: c.id,
17313
+ data: c.data,
17314
+ metadata: {
17315
+ ...c.metadata,
17316
+ projectId: scope.projectId,
17317
+ scopeName: scope.scopeName,
17318
+ type: c.metadata.type || "chunk"
17319
+ }
17320
+ }))
17321
+ );
17322
+ }
17323
+ }
17324
+ async search(data, opts, scope) {
17325
+ const filterParts = [
17326
+ `projectId = '${scope.projectId}'`,
17327
+ `scopeName = '${scope.scopeName}'`
17328
+ ];
17329
+ if (opts.filter) {
17330
+ filterParts.push(opts.filter);
17331
+ }
17332
+ const results = await this.chunksNs.query({
17333
+ data,
17334
+ topK: opts.limit,
17335
+ includeMetadata: true,
17336
+ filter: filterParts.join(" AND "),
17337
+ queryMode: QueryMode.HYBRID,
17338
+ fusionAlgorithm: FusionAlgorithm.DBSF
17339
+ });
17340
+ return results.map((doc) => ({
17341
+ id: String(doc.id),
17342
+ score: doc.score,
17343
+ metadata: {
17344
+ projectId: doc.metadata?.projectId ?? "",
17345
+ scopeName: doc.metadata?.scopeName ?? "",
17346
+ url: doc.metadata?.url ?? "",
17347
+ path: doc.metadata?.path ?? "",
17348
+ title: doc.metadata?.title ?? "",
17349
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17350
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17351
+ snippet: doc.metadata?.snippet ?? "",
17352
+ chunkText: doc.metadata?.chunkText ?? "",
17353
+ ordinal: doc.metadata?.ordinal ?? 0,
17354
+ contentHash: doc.metadata?.contentHash ?? "",
17355
+ depth: doc.metadata?.depth ?? 0,
17356
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17357
+ routeFile: doc.metadata?.routeFile ?? "",
17358
+ tags: doc.metadata?.tags ?? [],
17359
+ description: doc.metadata?.description || void 0,
17360
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17361
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17362
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17363
+ }
17364
+ }));
17365
+ }
17366
+ async searchChunksByUrl(data, url, opts, scope) {
17367
+ const filterParts = [
17368
+ `projectId = '${scope.projectId}'`,
17369
+ `scopeName = '${scope.scopeName}'`,
17370
+ `url = '${url}'`
17371
+ ];
17372
+ if (opts.filter) {
17373
+ filterParts.push(opts.filter);
17374
+ }
17375
+ const results = await this.chunksNs.query({
17376
+ data,
17377
+ topK: opts.limit,
17378
+ includeMetadata: true,
17379
+ filter: filterParts.join(" AND "),
17380
+ queryMode: QueryMode.HYBRID,
17381
+ fusionAlgorithm: FusionAlgorithm.DBSF
17176
17382
  });
17177
17383
  return results.map((doc) => ({
17178
- id: doc.id,
17384
+ id: String(doc.id),
17179
17385
  score: doc.score,
17180
17386
  metadata: {
17181
17387
  projectId: doc.metadata?.projectId ?? "",
17182
17388
  scopeName: doc.metadata?.scopeName ?? "",
17183
- url: doc.content.url,
17389
+ url: doc.metadata?.url ?? "",
17184
17390
  path: doc.metadata?.path ?? "",
17185
- title: doc.content.title,
17186
- sectionTitle: doc.content.sectionTitle,
17187
- headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
17391
+ title: doc.metadata?.title ?? "",
17392
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17393
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17188
17394
  snippet: doc.metadata?.snippet ?? "",
17189
- chunkText: doc.content.text,
17395
+ chunkText: doc.metadata?.chunkText ?? "",
17190
17396
  ordinal: doc.metadata?.ordinal ?? 0,
17191
17397
  contentHash: doc.metadata?.contentHash ?? "",
17192
17398
  depth: doc.metadata?.depth ?? 0,
17193
17399
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17194
17400
  routeFile: doc.metadata?.routeFile ?? "",
17195
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17401
+ tags: doc.metadata?.tags ?? [],
17196
17402
  description: doc.metadata?.description || void 0,
17197
- keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
17403
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17404
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17405
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17198
17406
  }
17199
17407
  }));
17200
17408
  }
17201
- async searchPages(query, opts, scope) {
17202
- const index = this.pageIndex(scope);
17409
+ async searchPagesByText(data, opts, scope) {
17410
+ return this.queryPages({ data }, opts, scope);
17411
+ }
17412
+ async searchPagesByVector(vector, opts, scope) {
17413
+ return this.queryPages({ vector }, opts, scope);
17414
+ }
17415
+ async queryPages(input, opts, scope) {
17416
+ const filterParts = [
17417
+ `projectId = '${scope.projectId}'`,
17418
+ `scopeName = '${scope.scopeName}'`
17419
+ ];
17420
+ if (opts.filter) {
17421
+ filterParts.push(opts.filter);
17422
+ }
17203
17423
  let results;
17204
17424
  try {
17205
- results = await index.search({
17206
- query,
17207
- limit: opts.limit,
17208
- semanticWeight: opts.semanticWeight,
17209
- inputEnrichment: opts.inputEnrichment,
17210
- reranking: true,
17211
- filter: opts.filter
17425
+ results = await this.pagesNs.query({
17426
+ ...input,
17427
+ topK: opts.limit,
17428
+ includeMetadata: true,
17429
+ filter: filterParts.join(" AND "),
17430
+ queryMode: QueryMode.HYBRID,
17431
+ fusionAlgorithm: FusionAlgorithm.DBSF
17212
17432
  });
17213
17433
  } catch {
17214
17434
  return [];
17215
17435
  }
17216
17436
  return results.map((doc) => ({
17217
- id: doc.id,
17437
+ id: String(doc.id),
17218
17438
  score: doc.score,
17219
- title: doc.content.title,
17220
- url: doc.content.url,
17221
- description: doc.content.description ?? "",
17222
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17439
+ title: doc.metadata?.title ?? "",
17440
+ url: doc.metadata?.url ?? "",
17441
+ description: doc.metadata?.description ?? "",
17442
+ tags: doc.metadata?.tags ?? [],
17223
17443
  depth: doc.metadata?.depth ?? 0,
17224
17444
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17225
- routeFile: doc.metadata?.routeFile ?? ""
17445
+ routeFile: doc.metadata?.routeFile ?? "",
17446
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17226
17447
  }));
17227
17448
  }
17228
- async deleteByIds(ids, scope) {
17449
+ async deleteByIds(ids, _scope) {
17229
17450
  if (ids.length === 0) return;
17230
- const index = this.chunkIndex(scope);
17231
- const BATCH_SIZE = 500;
17451
+ const BATCH_SIZE = 90;
17232
17452
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17233
17453
  const batch = ids.slice(i, i + BATCH_SIZE);
17234
- await index.delete(batch);
17454
+ await this.chunksNs.delete(batch);
17235
17455
  }
17236
17456
  }
17237
17457
  async deleteScope(scope) {
17238
- try {
17239
- const chunkIdx = this.chunkIndex(scope);
17240
- await chunkIdx.deleteIndex();
17241
- } catch {
17242
- }
17243
- try {
17244
- const pageIdx = this.pageIndex(scope);
17245
- await pageIdx.deleteIndex();
17246
- } catch {
17458
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17459
+ const ids = [];
17460
+ let cursor = "0";
17461
+ try {
17462
+ for (; ; ) {
17463
+ const result = await ns.range({
17464
+ cursor,
17465
+ limit: 100,
17466
+ includeMetadata: true
17467
+ });
17468
+ for (const doc of result.vectors) {
17469
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17470
+ ids.push(String(doc.id));
17471
+ }
17472
+ }
17473
+ if (!result.nextCursor || result.nextCursor === "0") break;
17474
+ cursor = result.nextCursor;
17475
+ }
17476
+ } catch {
17477
+ }
17478
+ if (ids.length > 0) {
17479
+ const BATCH_SIZE = 90;
17480
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17481
+ const batch = ids.slice(i, i + BATCH_SIZE);
17482
+ await ns.delete(batch);
17483
+ }
17484
+ }
17247
17485
  }
17248
17486
  }
17249
17487
  async listScopes(projectId) {
17250
- const allIndexes = await this.client.listIndexes();
17251
- const prefix = `${projectId}--`;
17252
- const scopeNames = /* @__PURE__ */ new Set();
17253
- for (const name of allIndexes) {
17254
- if (name.startsWith(prefix) && !name.endsWith("--pages")) {
17255
- const scopeName = name.slice(prefix.length);
17256
- scopeNames.add(scopeName);
17257
- }
17258
- }
17259
- const scopes = [];
17260
- for (const scopeName of scopeNames) {
17261
- const scope = {
17262
- projectId,
17263
- scopeName,
17264
- scopeId: `${projectId}:${scopeName}`
17265
- };
17488
+ const scopeMap = /* @__PURE__ */ new Map();
17489
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17490
+ let cursor = "0";
17491
+ try {
17492
+ for (; ; ) {
17493
+ const result = await ns.range({
17494
+ cursor,
17495
+ limit: 100,
17496
+ includeMetadata: true
17497
+ });
17498
+ for (const doc of result.vectors) {
17499
+ if (doc.metadata?.projectId === projectId) {
17500
+ const scopeName = doc.metadata.scopeName ?? "";
17501
+ scopeMap.set(scopeName, (scopeMap.get(scopeName) ?? 0) + 1);
17502
+ }
17503
+ }
17504
+ if (!result.nextCursor || result.nextCursor === "0") break;
17505
+ cursor = result.nextCursor;
17506
+ }
17507
+ } catch {
17508
+ }
17509
+ }
17510
+ return [...scopeMap.entries()].map(([scopeName, count]) => ({
17511
+ projectId,
17512
+ scopeName,
17513
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17514
+ documentCount: count
17515
+ }));
17516
+ }
17517
+ async getContentHashes(scope) {
17518
+ return this.scanHashes(this.chunksNs, scope);
17519
+ }
17520
+ /**
17521
+ * Fetch content hashes for a specific set of chunk keys using direct fetch()
17522
+ * instead of range(). This avoids potential issues with range() returning
17523
+ * vectors from the wrong namespace on hybrid indexes.
17524
+ */
17525
+ async fetchContentHashesForKeys(keys, scope) {
17526
+ const map = /* @__PURE__ */ new Map();
17527
+ if (keys.length === 0) return map;
17528
+ const BATCH_SIZE = 90;
17529
+ for (let i = 0; i < keys.length; i += BATCH_SIZE) {
17530
+ const batch = keys.slice(i, i + BATCH_SIZE);
17266
17531
  try {
17267
- const info = await this.chunkIndex(scope).info();
17268
- scopes.push({
17269
- projectId,
17270
- scopeName,
17271
- lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17272
- documentCount: info.documentCount
17532
+ const results = await this.chunksNs.fetch(batch, {
17533
+ includeMetadata: true
17273
17534
  });
17535
+ for (const doc of results) {
17536
+ if (doc && doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17537
+ map.set(String(doc.id), doc.metadata.contentHash);
17538
+ }
17539
+ }
17274
17540
  } catch {
17275
- scopes.push({
17276
- projectId,
17277
- scopeName,
17278
- lastIndexedAt: "unknown",
17279
- documentCount: 0
17541
+ }
17542
+ }
17543
+ return map;
17544
+ }
17545
+ /**
17546
+ * Scan all IDs in the chunks namespace for this scope.
17547
+ * Used for deletion detection (finding stale chunk keys).
17548
+ */
17549
+ async scanChunkIds(scope) {
17550
+ const ids = /* @__PURE__ */ new Set();
17551
+ let cursor = "0";
17552
+ try {
17553
+ for (; ; ) {
17554
+ const result = await this.chunksNs.range({
17555
+ cursor,
17556
+ limit: 100,
17557
+ includeMetadata: true
17280
17558
  });
17559
+ for (const doc of result.vectors) {
17560
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17561
+ ids.add(String(doc.id));
17562
+ }
17563
+ }
17564
+ if (!result.nextCursor || result.nextCursor === "0") break;
17565
+ cursor = result.nextCursor;
17281
17566
  }
17567
+ } catch {
17282
17568
  }
17283
- return scopes;
17569
+ return ids;
17284
17570
  }
17285
- async getContentHashes(scope) {
17571
+ async scanHashes(ns, scope) {
17572
+ const map = /* @__PURE__ */ new Map();
17573
+ let cursor = "0";
17574
+ try {
17575
+ for (; ; ) {
17576
+ const result = await ns.range({
17577
+ cursor,
17578
+ limit: 100,
17579
+ includeMetadata: true
17580
+ });
17581
+ for (const doc of result.vectors) {
17582
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17583
+ map.set(String(doc.id), doc.metadata.contentHash);
17584
+ }
17585
+ }
17586
+ if (!result.nextCursor || result.nextCursor === "0") break;
17587
+ cursor = result.nextCursor;
17588
+ }
17589
+ } catch {
17590
+ }
17591
+ return map;
17592
+ }
17593
+ async listPages(scope, opts) {
17594
+ const cursor = opts?.cursor ?? "0";
17595
+ const limit = opts?.limit ?? 50;
17596
+ try {
17597
+ const result = await this.pagesNs.range({
17598
+ cursor,
17599
+ limit,
17600
+ includeMetadata: true
17601
+ });
17602
+ const pages = result.vectors.filter(
17603
+ (doc) => doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && (!opts?.pathPrefix || (doc.metadata?.url ?? "").startsWith(opts.pathPrefix))
17604
+ ).map((doc) => ({
17605
+ url: doc.metadata?.url ?? "",
17606
+ title: doc.metadata?.title ?? "",
17607
+ description: doc.metadata?.description ?? "",
17608
+ routeFile: doc.metadata?.routeFile ?? ""
17609
+ }));
17610
+ const response = { pages };
17611
+ if (result.nextCursor && result.nextCursor !== "0") {
17612
+ response.nextCursor = result.nextCursor;
17613
+ }
17614
+ return response;
17615
+ } catch {
17616
+ return { pages: [] };
17617
+ }
17618
+ }
17619
+ async getPageHashes(scope) {
17286
17620
  const map = /* @__PURE__ */ new Map();
17287
- const index = this.chunkIndex(scope);
17288
17621
  let cursor = "0";
17289
17622
  try {
17290
17623
  for (; ; ) {
17291
- const result = await index.range({ cursor, limit: 100 });
17292
- for (const doc of result.documents) {
17293
- if (doc.metadata?.contentHash) {
17294
- map.set(doc.id, doc.metadata.contentHash);
17624
+ const result = await this.pagesNs.range({
17625
+ cursor,
17626
+ limit: 100,
17627
+ includeMetadata: true
17628
+ });
17629
+ for (const doc of result.vectors) {
17630
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17631
+ map.set(String(doc.id), doc.metadata.contentHash);
17295
17632
  }
17296
17633
  }
17297
17634
  if (!result.nextCursor || result.nextCursor === "0") break;
@@ -17301,47 +17638,43 @@ var UpstashSearchStore = class {
17301
17638
  }
17302
17639
  return map;
17303
17640
  }
17641
+ async deletePagesByIds(ids, _scope) {
17642
+ if (ids.length === 0) return;
17643
+ const BATCH_SIZE = 90;
17644
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17645
+ const batch = ids.slice(i, i + BATCH_SIZE);
17646
+ await this.pagesNs.delete(batch);
17647
+ }
17648
+ }
17304
17649
  async upsertPages(pages, scope) {
17305
17650
  if (pages.length === 0) return;
17306
- const index = this.pageIndex(scope);
17307
- const BATCH_SIZE = 50;
17651
+ const BATCH_SIZE = 90;
17308
17652
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
17309
17653
  const batch = pages.slice(i, i + BATCH_SIZE);
17310
- const docs = batch.map((p) => ({
17311
- id: p.url,
17312
- content: {
17313
- title: p.title,
17314
- url: p.url,
17315
- type: "page",
17316
- description: p.description ?? "",
17317
- keywords: (p.keywords ?? []).join(","),
17318
- summary: p.summary ?? "",
17319
- tags: p.tags.join(",")
17320
- },
17321
- metadata: {
17322
- markdown: p.markdown,
17323
- projectId: p.projectId,
17324
- scopeName: p.scopeName,
17325
- routeFile: p.routeFile,
17326
- routeResolution: p.routeResolution,
17327
- incomingLinks: p.incomingLinks,
17328
- outgoingLinks: p.outgoingLinks,
17329
- depth: p.depth,
17330
- indexedAt: p.indexedAt
17331
- }
17332
- }));
17333
- await index.upsert(docs);
17654
+ await this.pagesNs.upsert(
17655
+ batch.map((p) => ({
17656
+ id: p.id,
17657
+ data: p.data,
17658
+ metadata: {
17659
+ ...p.metadata,
17660
+ projectId: scope.projectId,
17661
+ scopeName: scope.scopeName,
17662
+ type: "page"
17663
+ }
17664
+ }))
17665
+ );
17334
17666
  }
17335
17667
  }
17336
17668
  async getPage(url, scope) {
17337
- const index = this.pageIndex(scope);
17338
17669
  try {
17339
- const results = await index.fetch([url]);
17670
+ const results = await this.pagesNs.fetch([url], {
17671
+ includeMetadata: true
17672
+ });
17340
17673
  const doc = results[0];
17341
- if (!doc) return null;
17674
+ if (!doc || !doc.metadata) return null;
17342
17675
  return {
17343
- url: doc.content.url,
17344
- title: doc.content.title,
17676
+ url: doc.metadata.url,
17677
+ title: doc.metadata.title,
17345
17678
  markdown: doc.metadata.markdown,
17346
17679
  projectId: doc.metadata.projectId,
17347
17680
  scopeName: doc.metadata.scopeName,
@@ -17349,27 +17682,86 @@ var UpstashSearchStore = class {
17349
17682
  routeResolution: doc.metadata.routeResolution,
17350
17683
  incomingLinks: doc.metadata.incomingLinks,
17351
17684
  outgoingLinks: doc.metadata.outgoingLinks,
17685
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? void 0,
17352
17686
  depth: doc.metadata.depth,
17353
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17687
+ tags: doc.metadata.tags ?? [],
17354
17688
  indexedAt: doc.metadata.indexedAt,
17355
- summary: doc.content.summary || void 0,
17356
- description: doc.content.description || void 0,
17357
- keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
17689
+ summary: doc.metadata.summary || void 0,
17690
+ description: doc.metadata.description || void 0,
17691
+ keywords: doc.metadata.keywords?.length ? doc.metadata.keywords : void 0,
17692
+ publishedAt: typeof doc.metadata.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17358
17693
  };
17359
17694
  } catch {
17360
17695
  return null;
17361
17696
  }
17362
17697
  }
17698
+ async fetchPageWithVector(url, scope) {
17699
+ try {
17700
+ const results = await this.pagesNs.fetch([url], {
17701
+ includeMetadata: true,
17702
+ includeVectors: true
17703
+ });
17704
+ const doc = results[0];
17705
+ if (!doc || !doc.metadata || !doc.vector) return null;
17706
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17707
+ return null;
17708
+ }
17709
+ return { metadata: doc.metadata, vector: doc.vector };
17710
+ } catch {
17711
+ return null;
17712
+ }
17713
+ }
17714
+ async fetchPagesBatch(urls, scope) {
17715
+ if (urls.length === 0) return [];
17716
+ try {
17717
+ const results = await this.pagesNs.fetch(urls, {
17718
+ includeMetadata: true
17719
+ });
17720
+ const out = [];
17721
+ for (const doc of results) {
17722
+ if (!doc || !doc.metadata) continue;
17723
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17724
+ continue;
17725
+ }
17726
+ out.push({
17727
+ url: doc.metadata.url,
17728
+ title: doc.metadata.title,
17729
+ routeFile: doc.metadata.routeFile,
17730
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? []
17731
+ });
17732
+ }
17733
+ return out;
17734
+ } catch {
17735
+ return [];
17736
+ }
17737
+ }
17363
17738
  async deletePages(scope) {
17739
+ const ids = [];
17740
+ let cursor = "0";
17364
17741
  try {
17365
- const index = this.pageIndex(scope);
17366
- await index.reset();
17742
+ for (; ; ) {
17743
+ const result = await this.pagesNs.range({
17744
+ cursor,
17745
+ limit: 100,
17746
+ includeMetadata: true
17747
+ });
17748
+ for (const doc of result.vectors) {
17749
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17750
+ ids.push(String(doc.id));
17751
+ }
17752
+ }
17753
+ if (!result.nextCursor || result.nextCursor === "0") break;
17754
+ cursor = result.nextCursor;
17755
+ }
17367
17756
  } catch {
17368
17757
  }
17758
+ if (ids.length > 0) {
17759
+ await this.deletePagesByIds(ids, scope);
17760
+ }
17369
17761
  }
17370
17762
  async health() {
17371
17763
  try {
17372
- await this.client.info();
17764
+ await this.index.info();
17373
17765
  return { ok: true };
17374
17766
  } catch (error) {
17375
17767
  return {
@@ -17379,14 +17771,31 @@ var UpstashSearchStore = class {
17379
17771
  }
17380
17772
  }
17381
17773
  async dropAllIndexes(projectId) {
17382
- const allIndexes = await this.client.listIndexes();
17383
- const prefix = `${projectId}--`;
17384
- for (const name of allIndexes) {
17385
- if (name.startsWith(prefix)) {
17386
- try {
17387
- const index = this.client.index(name);
17388
- await index.deleteIndex();
17389
- } catch {
17774
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17775
+ const ids = [];
17776
+ let cursor = "0";
17777
+ try {
17778
+ for (; ; ) {
17779
+ const result = await ns.range({
17780
+ cursor,
17781
+ limit: 100,
17782
+ includeMetadata: true
17783
+ });
17784
+ for (const doc of result.vectors) {
17785
+ if (doc.metadata?.projectId === projectId) {
17786
+ ids.push(String(doc.id));
17787
+ }
17788
+ }
17789
+ if (!result.nextCursor || result.nextCursor === "0") break;
17790
+ cursor = result.nextCursor;
17791
+ }
17792
+ } catch {
17793
+ }
17794
+ if (ids.length > 0) {
17795
+ const BATCH_SIZE = 90;
17796
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17797
+ const batch = ids.slice(i, i + BATCH_SIZE);
17798
+ await ns.delete(batch);
17390
17799
  }
17391
17800
  }
17392
17801
  }
@@ -17400,12 +17809,16 @@ async function createUpstashStore(config) {
17400
17809
  if (!url || !token) {
17401
17810
  throw new SearchSocketError(
17402
17811
  "VECTOR_BACKEND_UNAVAILABLE",
17403
- `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17812
+ `Missing Upstash Vector credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17404
17813
  );
17405
17814
  }
17406
- const { Search } = await import('@upstash/search');
17407
- const client = new Search({ url, token });
17408
- return new UpstashSearchStore({ client });
17815
+ const { Index } = await import('@upstash/vector');
17816
+ const index = new Index({ url, token });
17817
+ return new UpstashSearchStore({
17818
+ index,
17819
+ pagesNamespace: config.upstash.namespaces.pages,
17820
+ chunksNamespace: config.upstash.namespaces.chunks
17821
+ });
17409
17822
  }
17410
17823
 
17411
17824
  // src/utils/pattern.ts
@@ -17448,29 +17861,65 @@ function nonNegativeOrZero(value) {
17448
17861
  function normalizeForTitleMatch(text) {
17449
17862
  return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
17450
17863
  }
17451
- function rankHits(hits, config, query) {
17864
+ function rankHits(hits, config, query, debug) {
17452
17865
  const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
17453
17866
  const titleMatchWeight = config.ranking.weights.titleMatch;
17454
17867
  return hits.map((hit) => {
17455
- let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
17868
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
17869
+ let score = baseScore;
17870
+ let incomingLinkBoostValue = 0;
17456
17871
  if (config.ranking.enableIncomingLinkBoost) {
17457
17872
  const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
17458
- score += incomingBoost * config.ranking.weights.incomingLinks;
17873
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
17874
+ score += incomingLinkBoostValue;
17459
17875
  }
17876
+ let depthBoostValue = 0;
17460
17877
  if (config.ranking.enableDepthBoost) {
17461
17878
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
17462
- score += depthBoost * config.ranking.weights.depth;
17879
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
17880
+ score += depthBoostValue;
17463
17881
  }
17882
+ let titleMatchBoostValue = 0;
17464
17883
  if (normalizedQuery && titleMatchWeight > 0) {
17465
17884
  const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
17466
17885
  if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
17467
- score += titleMatchWeight;
17886
+ titleMatchBoostValue = titleMatchWeight;
17887
+ score += titleMatchBoostValue;
17468
17888
  }
17469
17889
  }
17470
- return {
17890
+ let freshnessBoostValue = 0;
17891
+ if (config.ranking.enableFreshnessBoost) {
17892
+ const publishedAt = hit.metadata.publishedAt;
17893
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
17894
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
17895
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
17896
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
17897
+ score += freshnessBoostValue;
17898
+ }
17899
+ }
17900
+ let anchorTextMatchBoostValue = 0;
17901
+ if (config.ranking.enableAnchorTextBoost && normalizedQuery && config.ranking.weights.anchorText > 0) {
17902
+ const normalizedAnchorText = normalizeForTitleMatch(hit.metadata.incomingAnchorText ?? "");
17903
+ if (normalizedAnchorText.length > 0 && normalizedQuery.length > 0 && (normalizedAnchorText.includes(normalizedQuery) || normalizedQuery.includes(normalizedAnchorText))) {
17904
+ anchorTextMatchBoostValue = config.ranking.weights.anchorText;
17905
+ score += anchorTextMatchBoostValue;
17906
+ }
17907
+ }
17908
+ const result = {
17471
17909
  hit,
17472
17910
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
17473
17911
  };
17912
+ if (debug) {
17913
+ result.breakdown = {
17914
+ baseScore,
17915
+ incomingLinkBoost: incomingLinkBoostValue,
17916
+ depthBoost: depthBoostValue,
17917
+ titleMatchBoost: titleMatchBoostValue,
17918
+ freshnessBoost: freshnessBoostValue,
17919
+ anchorTextMatchBoost: anchorTextMatchBoostValue
17920
+ };
17921
+ }
17922
+ return result;
17474
17923
  }).sort((a, b) => {
17475
17924
  const delta = b.finalScore - a.finalScore;
17476
17925
  return Number.isNaN(delta) ? 0 : delta;
@@ -17479,12 +17928,13 @@ function rankHits(hits, config, query) {
17479
17928
  function trimByScoreGap(results, config) {
17480
17929
  if (results.length === 0) return results;
17481
17930
  const threshold = config.ranking.scoreGapThreshold;
17482
- const minScore = config.ranking.minScore;
17483
- if (minScore > 0 && results.length > 0) {
17484
- const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
17485
- const mid = Math.floor(sortedScores.length / 2);
17486
- const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
17487
- if (median < minScore) return [];
17931
+ const minScoreRatio = config.ranking.minScoreRatio;
17932
+ if (minScoreRatio > 0 && results.length > 0) {
17933
+ const topScore = results[0].pageScore;
17934
+ if (Number.isFinite(topScore) && topScore > 0) {
17935
+ const minThreshold = topScore * minScoreRatio;
17936
+ results = results.filter((r) => r.pageScore >= minThreshold);
17937
+ }
17488
17938
  }
17489
17939
  if (threshold > 0 && results.length > 1) {
17490
17940
  for (let i = 1; i < results.length; i++) {
@@ -17554,82 +18004,283 @@ function aggregateByPage(ranked, config) {
17554
18004
  return Number.isNaN(delta) ? 0 : delta;
17555
18005
  });
17556
18006
  }
17557
- function mergePageAndChunkResults(pageHits, rankedChunks, config) {
17558
- if (pageHits.length === 0) return rankedChunks;
17559
- const w = config.search.pageSearchWeight;
17560
- const pageScoreMap = /* @__PURE__ */ new Map();
17561
- for (const ph of pageHits) {
17562
- pageScoreMap.set(ph.url, ph);
17563
- }
17564
- const pagesWithChunks = /* @__PURE__ */ new Set();
17565
- const merged = rankedChunks.map((ranked) => {
17566
- const url = ranked.hit.metadata.url;
17567
- const pageHit = pageScoreMap.get(url);
17568
- if (pageHit) {
17569
- pagesWithChunks.add(url);
17570
- const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
17571
- return {
17572
- hit: ranked.hit,
17573
- finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
17574
- };
18007
+ function rankPageHits(pageHits, config, query, debug) {
18008
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
18009
+ const titleMatchWeight = config.ranking.weights.titleMatch;
18010
+ return pageHits.map((hit) => {
18011
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
18012
+ let score = baseScore;
18013
+ let incomingLinkBoostValue = 0;
18014
+ if (config.ranking.enableIncomingLinkBoost) {
18015
+ const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.incomingLinks));
18016
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
18017
+ score += incomingLinkBoostValue;
17575
18018
  }
17576
- return ranked;
17577
- });
17578
- for (const [url, pageHit] of pageScoreMap) {
17579
- if (pagesWithChunks.has(url)) continue;
17580
- const syntheticScore = pageHit.score * w;
17581
- const syntheticHit = {
17582
- id: `page:${url}`,
17583
- score: pageHit.score,
17584
- metadata: {
17585
- projectId: "",
17586
- scopeName: "",
17587
- url: pageHit.url,
17588
- path: pageHit.url,
17589
- title: pageHit.title,
17590
- sectionTitle: "",
17591
- headingPath: [],
17592
- snippet: pageHit.description || pageHit.title,
17593
- chunkText: pageHit.description || pageHit.title,
17594
- ordinal: 0,
17595
- contentHash: "",
17596
- depth: pageHit.depth,
17597
- incomingLinks: pageHit.incomingLinks,
17598
- routeFile: pageHit.routeFile,
17599
- tags: pageHit.tags
18019
+ let depthBoostValue = 0;
18020
+ if (config.ranking.enableDepthBoost) {
18021
+ const depthBoost = 1 / (1 + nonNegativeOrZero(hit.depth));
18022
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
18023
+ score += depthBoostValue;
18024
+ }
18025
+ let titleMatchBoostValue = 0;
18026
+ if (normalizedQuery && titleMatchWeight > 0) {
18027
+ const normalizedTitle = normalizeForTitleMatch(hit.title);
18028
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
18029
+ titleMatchBoostValue = titleMatchWeight;
18030
+ score += titleMatchBoostValue;
18031
+ }
18032
+ }
18033
+ let freshnessBoostValue = 0;
18034
+ if (config.ranking.enableFreshnessBoost) {
18035
+ const publishedAt = hit.publishedAt;
18036
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
18037
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
18038
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
18039
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
18040
+ score += freshnessBoostValue;
17600
18041
  }
18042
+ }
18043
+ const pageWeight = findPageWeight(hit.url, config.ranking.pageWeights);
18044
+ if (pageWeight !== 1) {
18045
+ score *= pageWeight;
18046
+ }
18047
+ const result = {
18048
+ url: hit.url,
18049
+ title: hit.title,
18050
+ description: hit.description,
18051
+ routeFile: hit.routeFile,
18052
+ depth: hit.depth,
18053
+ incomingLinks: hit.incomingLinks,
18054
+ tags: hit.tags,
18055
+ baseScore,
18056
+ finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY,
18057
+ publishedAt: hit.publishedAt
17601
18058
  };
17602
- merged.push({
17603
- hit: syntheticHit,
17604
- finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
17605
- });
17606
- }
17607
- return merged.sort((a, b) => {
18059
+ if (debug) {
18060
+ result.breakdown = {
18061
+ baseScore,
18062
+ pageWeight,
18063
+ incomingLinkBoost: incomingLinkBoostValue,
18064
+ depthBoost: depthBoostValue,
18065
+ titleMatchBoost: titleMatchBoostValue,
18066
+ freshnessBoost: freshnessBoostValue
18067
+ };
18068
+ }
18069
+ return result;
18070
+ }).filter((p) => findPageWeight(p.url, config.ranking.pageWeights) !== 0).sort((a, b) => {
17608
18071
  const delta = b.finalScore - a.finalScore;
17609
18072
  return Number.isNaN(delta) ? 0 : delta;
17610
18073
  });
17611
18074
  }
18075
+ function trimPagesByScoreGap(results, config) {
18076
+ if (results.length === 0) return results;
18077
+ const threshold = config.ranking.scoreGapThreshold;
18078
+ const minScoreRatio = config.ranking.minScoreRatio;
18079
+ if (minScoreRatio > 0 && results.length > 0) {
18080
+ const topScore = results[0].finalScore;
18081
+ if (Number.isFinite(topScore) && topScore > 0) {
18082
+ const minThreshold = topScore * minScoreRatio;
18083
+ results = results.filter((r) => r.finalScore >= minThreshold);
18084
+ }
18085
+ }
18086
+ if (threshold > 0 && results.length > 1) {
18087
+ for (let i = 1; i < results.length; i++) {
18088
+ const prev = results[i - 1].finalScore;
18089
+ const current = results[i].finalScore;
18090
+ if (prev > 0) {
18091
+ const gap = (prev - current) / prev;
18092
+ if (gap >= threshold) {
18093
+ return results.slice(0, i);
18094
+ }
18095
+ }
18096
+ }
18097
+ }
18098
+ return results;
18099
+ }
17612
18100
 
17613
- // src/search/engine.ts
17614
- var requestSchema = z.object({
17615
- q: z.string().trim().min(1),
17616
- topK: z.number().int().positive().max(100).optional(),
17617
- scope: z.string().optional(),
17618
- pathPrefix: z.string().optional(),
17619
- tags: z.array(z.string()).optional(),
17620
- groupBy: z.enum(["page", "chunk"]).optional()
17621
- });
17622
- var SearchEngine = class _SearchEngine {
17623
- cwd;
17624
- config;
17625
- store;
17626
- constructor(options) {
17627
- this.cwd = options.cwd;
17628
- this.config = options.config;
17629
- this.store = options.store;
18101
+ // src/search/related-pages.ts
18102
+ function diceScore(urlA, urlB) {
18103
+ const segmentsA = urlA.split("/").filter(Boolean);
18104
+ const segmentsB = urlB.split("/").filter(Boolean);
18105
+ if (segmentsA.length === 0 && segmentsB.length === 0) return 1;
18106
+ if (segmentsA.length === 0 || segmentsB.length === 0) return 0;
18107
+ let shared = 0;
18108
+ const minLen = Math.min(segmentsA.length, segmentsB.length);
18109
+ for (let i = 0; i < minLen; i++) {
18110
+ if (segmentsA[i] === segmentsB[i]) {
18111
+ shared++;
18112
+ } else {
18113
+ break;
18114
+ }
17630
18115
  }
17631
- static async create(options = {}) {
17632
- const cwd = path.resolve(options.cwd ?? process.cwd());
18116
+ return 2 * shared / (segmentsA.length + segmentsB.length);
18117
+ }
18118
+ function compositeScore(isLinked, dice, semantic) {
18119
+ return (isLinked ? 0.5 : 0) + 0.3 * dice + 0.2 * semantic;
18120
+ }
18121
+ function dominantRelationshipType(isOutgoing, isIncoming, dice) {
18122
+ if (isOutgoing) return "outgoing_link";
18123
+ if (isIncoming) return "incoming_link";
18124
+ if (dice > 0.4) return "sibling";
18125
+ return "semantic";
18126
+ }
18127
+
18128
+ // src/utils/structured-meta.ts
18129
+ var VALID_KEY_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
18130
+ function validateMetaKey(key) {
18131
+ return VALID_KEY_RE.test(key);
18132
+ }
18133
+ function parseMetaValue(content, dataType) {
18134
+ switch (dataType) {
18135
+ case "number": {
18136
+ const n = Number(content);
18137
+ return Number.isFinite(n) ? n : content;
18138
+ }
18139
+ case "boolean":
18140
+ return content === "true";
18141
+ case "string[]":
18142
+ return content ? content.split(",").map((s) => s.trim()) : [];
18143
+ case "date": {
18144
+ const ms = Number(content);
18145
+ return Number.isFinite(ms) ? ms : content;
18146
+ }
18147
+ default:
18148
+ return content;
18149
+ }
18150
+ }
18151
+ function escapeFilterValue(s) {
18152
+ return s.replace(/'/g, "''");
18153
+ }
18154
+ function buildMetaFilterString(filters) {
18155
+ const clauses = [];
18156
+ for (const [key, value] of Object.entries(filters)) {
18157
+ if (!validateMetaKey(key)) continue;
18158
+ const field = `meta.${key}`;
18159
+ if (typeof value === "string") {
18160
+ clauses.push(`${field} CONTAINS '${escapeFilterValue(value)}'`);
18161
+ } else if (typeof value === "boolean") {
18162
+ clauses.push(`${field} = ${value}`);
18163
+ } else {
18164
+ clauses.push(`${field} = ${value}`);
18165
+ }
18166
+ }
18167
+ return clauses.join(" AND ");
18168
+ }
18169
+
18170
+ // src/search/engine.ts
18171
+ var rankingOverridesSchema = z.object({
18172
+ ranking: z.object({
18173
+ enableIncomingLinkBoost: z.boolean().optional(),
18174
+ enableDepthBoost: z.boolean().optional(),
18175
+ aggregationCap: z.number().int().positive().optional(),
18176
+ aggregationDecay: z.number().min(0).max(1).optional(),
18177
+ minChunkScoreRatio: z.number().min(0).max(1).optional(),
18178
+ minScoreRatio: z.number().min(0).max(1).optional(),
18179
+ scoreGapThreshold: z.number().min(0).max(1).optional(),
18180
+ weights: z.object({
18181
+ incomingLinks: z.number().optional(),
18182
+ depth: z.number().optional(),
18183
+ aggregation: z.number().optional(),
18184
+ titleMatch: z.number().optional()
18185
+ }).optional()
18186
+ }).optional(),
18187
+ search: z.object({
18188
+ pageSearchWeight: z.number().min(0).max(1).optional()
18189
+ }).optional()
18190
+ }).optional();
18191
+ var requestSchema = z.object({
18192
+ q: z.string().trim().min(1),
18193
+ topK: z.number().int().positive().max(100).optional(),
18194
+ scope: z.string().optional(),
18195
+ pathPrefix: z.string().optional(),
18196
+ tags: z.array(z.string()).optional(),
18197
+ filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
18198
+ groupBy: z.enum(["page", "chunk"]).optional(),
18199
+ maxSubResults: z.number().int().positive().max(20).optional(),
18200
+ debug: z.boolean().optional(),
18201
+ rankingOverrides: rankingOverridesSchema
18202
+ });
18203
+ var MAX_SITE_STRUCTURE_PAGES = 2e3;
18204
+ function makeNode(url, depth) {
18205
+ return { url, title: "", depth, routeFile: "", isIndexed: false, childCount: 0, children: [] };
18206
+ }
18207
+ function buildTree(pages, pathPrefix) {
18208
+ const nodeMap = /* @__PURE__ */ new Map();
18209
+ const root2 = makeNode("/", 0);
18210
+ nodeMap.set("/", root2);
18211
+ for (const page of pages) {
18212
+ const normalized = normalizeUrlPath(page.url);
18213
+ const segments = normalized.split("/").filter(Boolean);
18214
+ if (segments.length === 0) {
18215
+ root2.title = page.title;
18216
+ root2.routeFile = page.routeFile;
18217
+ root2.isIndexed = true;
18218
+ continue;
18219
+ }
18220
+ for (let i = 1; i <= segments.length; i++) {
18221
+ const partialUrl = "/" + segments.slice(0, i).join("/");
18222
+ if (!nodeMap.has(partialUrl)) {
18223
+ nodeMap.set(partialUrl, makeNode(partialUrl, i));
18224
+ }
18225
+ }
18226
+ const node = nodeMap.get(normalized);
18227
+ node.title = page.title;
18228
+ node.routeFile = page.routeFile;
18229
+ node.isIndexed = true;
18230
+ }
18231
+ for (const [url, node] of nodeMap) {
18232
+ if (url === "/") continue;
18233
+ const segments = url.split("/").filter(Boolean);
18234
+ const parentUrl = segments.length === 1 ? "/" : "/" + segments.slice(0, -1).join("/");
18235
+ const parent = nodeMap.get(parentUrl) ?? root2;
18236
+ parent.children.push(node);
18237
+ }
18238
+ const sortAndCount = (node) => {
18239
+ node.children.sort((a, b) => a.url.localeCompare(b.url));
18240
+ node.childCount = node.children.length;
18241
+ for (const child of node.children) {
18242
+ sortAndCount(child);
18243
+ }
18244
+ };
18245
+ sortAndCount(root2);
18246
+ if (pathPrefix) {
18247
+ const normalizedPrefix = normalizeUrlPath(pathPrefix);
18248
+ const subtreeRoot = nodeMap.get(normalizedPrefix);
18249
+ if (subtreeRoot) {
18250
+ return subtreeRoot;
18251
+ }
18252
+ return makeNode(normalizedPrefix, normalizedPrefix.split("/").filter(Boolean).length);
18253
+ }
18254
+ return root2;
18255
+ }
18256
+ function mergeRankingOverrides(base, overrides) {
18257
+ return {
18258
+ ...base,
18259
+ search: {
18260
+ ...base.search,
18261
+ ...overrides.search
18262
+ },
18263
+ ranking: {
18264
+ ...base.ranking,
18265
+ ...overrides.ranking,
18266
+ weights: {
18267
+ ...base.ranking.weights,
18268
+ ...overrides.ranking?.weights
18269
+ }
18270
+ }
18271
+ };
18272
+ }
18273
+ var SearchEngine = class _SearchEngine {
18274
+ cwd;
18275
+ config;
18276
+ store;
18277
+ constructor(options) {
18278
+ this.cwd = options.cwd;
18279
+ this.config = options.config;
18280
+ this.store = options.store;
18281
+ }
18282
+ static async create(options = {}) {
18283
+ const cwd = path.resolve(options.cwd ?? process.cwd());
17633
18284
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
17634
18285
  const store = options.store ?? await createUpstashStore(config);
17635
18286
  return new _SearchEngine({
@@ -17648,125 +18299,203 @@ var SearchEngine = class _SearchEngine {
17648
18299
  }
17649
18300
  const input = parsed.data;
17650
18301
  const totalStart = process.hrtime.bigint();
18302
+ const effectiveConfig = input.debug && input.rankingOverrides ? mergeRankingOverrides(this.config, input.rankingOverrides) : this.config;
17651
18303
  const resolvedScope = resolveScope(this.config, input.scope);
17652
18304
  const topK = input.topK ?? 10;
18305
+ const maxSubResults = input.maxSubResults ?? 5;
17653
18306
  const groupByPage = (input.groupBy ?? "page") === "page";
17654
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
17655
- const filterParts = [];
17656
- if (input.pathPrefix) {
17657
- const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
17658
- filterParts.push(`url GLOB '${prefix}*'`);
17659
- }
17660
- if (input.tags && input.tags.length > 0) {
17661
- for (const tag of input.tags) {
17662
- filterParts.push(`tags GLOB '*${tag}*'`);
18307
+ const queryText = input.q;
18308
+ const pathPrefix = input.pathPrefix ? input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}` : void 0;
18309
+ const filterTags = input.tags && input.tags.length > 0 ? input.tags : void 0;
18310
+ const metaFilterStr = input.filters && Object.keys(input.filters).length > 0 ? buildMetaFilterString(input.filters) : "";
18311
+ const metaFilter = metaFilterStr || void 0;
18312
+ const applyPagePostFilters = (hits) => {
18313
+ let filtered = hits;
18314
+ if (pathPrefix) {
18315
+ filtered = filtered.filter((h) => h.url.startsWith(pathPrefix));
18316
+ }
18317
+ if (filterTags) {
18318
+ filtered = filtered.filter(
18319
+ (h) => filterTags.every((tag) => h.tags.includes(tag))
18320
+ );
17663
18321
  }
17664
- }
17665
- const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
17666
- const useDualSearch = this.config.search.dualSearch && groupByPage;
18322
+ return filtered;
18323
+ };
18324
+ const applyChunkPostFilters = (hits) => {
18325
+ let filtered = hits;
18326
+ if (filterTags) {
18327
+ filtered = filtered.filter(
18328
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
18329
+ );
18330
+ }
18331
+ return filtered;
18332
+ };
17667
18333
  const searchStart = process.hrtime.bigint();
17668
- let ranked;
17669
- if (useDualSearch) {
17670
- const chunkLimit = Math.max(topK * 10, 100);
17671
- const pageLimit = 20;
17672
- const [pageHits, chunkHits] = await Promise.all([
17673
- this.store.searchPages(
17674
- input.q,
17675
- {
17676
- limit: pageLimit,
17677
- semanticWeight: this.config.search.semanticWeight,
17678
- inputEnrichment: this.config.search.inputEnrichment,
17679
- filter
17680
- },
17681
- resolvedScope
17682
- ),
17683
- this.store.search(
17684
- input.q,
17685
- {
17686
- limit: chunkLimit,
17687
- semanticWeight: this.config.search.semanticWeight,
17688
- inputEnrichment: this.config.search.inputEnrichment,
17689
- reranking: false,
17690
- filter
17691
- },
18334
+ if (groupByPage) {
18335
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
18336
+ const pageLimit = Math.max(topK * 2, 20);
18337
+ const pageHits = await this.store.searchPagesByText(
18338
+ queryText,
18339
+ { limit: pageLimit * fetchMultiplier, filter: metaFilter },
18340
+ resolvedScope
18341
+ );
18342
+ const filteredPages = applyPagePostFilters(pageHits);
18343
+ let rankedPages = rankPageHits(filteredPages, effectiveConfig, input.q, input.debug);
18344
+ rankedPages = trimPagesByScoreGap(rankedPages, effectiveConfig);
18345
+ const topPages = rankedPages.slice(0, topK);
18346
+ const chunkPromises = topPages.map(
18347
+ (page) => this.store.searchChunksByUrl(
18348
+ queryText,
18349
+ page.url,
18350
+ { limit: maxSubResults, filter: metaFilter },
17692
18351
  resolvedScope
17693
- )
17694
- ]);
17695
- const rankedChunks = rankHits(chunkHits, this.config, input.q);
17696
- ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
18352
+ ).then((chunks) => applyChunkPostFilters(chunks))
18353
+ );
18354
+ const allChunks = await Promise.all(chunkPromises);
18355
+ const searchMs = hrTimeMs(searchStart);
18356
+ const results = this.buildPageFirstResults(topPages, allChunks, input.q, input.debug, maxSubResults);
18357
+ return {
18358
+ q: input.q,
18359
+ scope: resolvedScope.scopeName,
18360
+ results,
18361
+ meta: {
18362
+ timingsMs: {
18363
+ search: Math.round(searchMs),
18364
+ total: Math.round(hrTimeMs(totalStart))
18365
+ }
18366
+ }
18367
+ };
17697
18368
  } else {
18369
+ const candidateK = Math.max(50, topK);
18370
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
17698
18371
  const hits = await this.store.search(
17699
- input.q,
17700
- {
17701
- limit: candidateK,
17702
- semanticWeight: this.config.search.semanticWeight,
17703
- inputEnrichment: this.config.search.inputEnrichment,
17704
- reranking: this.config.search.reranking,
17705
- filter
17706
- },
18372
+ queryText,
18373
+ { limit: candidateK * fetchMultiplier, filter: metaFilter },
17707
18374
  resolvedScope
17708
18375
  );
17709
- ranked = rankHits(hits, this.config, input.q);
17710
- }
17711
- const searchMs = hrTimeMs(searchStart);
17712
- const results = this.buildResults(ranked, topK, groupByPage, input.q);
17713
- return {
17714
- q: input.q,
17715
- scope: resolvedScope.scopeName,
17716
- results,
17717
- meta: {
17718
- timingsMs: {
17719
- search: Math.round(searchMs),
17720
- total: Math.round(hrTimeMs(totalStart))
18376
+ let filtered = hits;
18377
+ if (pathPrefix) {
18378
+ filtered = filtered.filter((h) => h.metadata.url.startsWith(pathPrefix));
18379
+ }
18380
+ if (filterTags) {
18381
+ filtered = filtered.filter(
18382
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
18383
+ );
18384
+ }
18385
+ const ranked = rankHits(filtered, effectiveConfig, input.q, input.debug);
18386
+ const searchMs = hrTimeMs(searchStart);
18387
+ const results = this.buildResults(ranked, topK, false, maxSubResults, input.q, input.debug, effectiveConfig);
18388
+ return {
18389
+ q: input.q,
18390
+ scope: resolvedScope.scopeName,
18391
+ results,
18392
+ meta: {
18393
+ timingsMs: {
18394
+ search: Math.round(searchMs),
18395
+ total: Math.round(hrTimeMs(totalStart))
18396
+ }
17721
18397
  }
18398
+ };
18399
+ }
18400
+ }
18401
+ buildPageFirstResults(rankedPages, allChunks, query, debug, maxSubResults = 5) {
18402
+ return rankedPages.map((page, i) => {
18403
+ const chunks = allChunks[i] ?? [];
18404
+ const bestChunk = chunks[0];
18405
+ const snippet = bestChunk ? query ? queryAwareExcerpt(bestChunk.metadata.chunkText, query) : toSnippet(bestChunk.metadata.chunkText) : page.description || page.title;
18406
+ const result = {
18407
+ url: page.url,
18408
+ title: page.title,
18409
+ sectionTitle: bestChunk?.metadata.sectionTitle || void 0,
18410
+ snippet,
18411
+ chunkText: bestChunk?.metadata.chunkText || void 0,
18412
+ score: Number(page.finalScore.toFixed(6)),
18413
+ routeFile: page.routeFile,
18414
+ chunks: chunks.length > 0 ? chunks.slice(0, maxSubResults).map((c) => ({
18415
+ sectionTitle: c.metadata.sectionTitle || void 0,
18416
+ snippet: query ? queryAwareExcerpt(c.metadata.chunkText, query) : toSnippet(c.metadata.chunkText),
18417
+ chunkText: c.metadata.chunkText || void 0,
18418
+ headingPath: c.metadata.headingPath,
18419
+ score: Number(c.score.toFixed(6))
18420
+ })) : void 0
18421
+ };
18422
+ if (debug && page.breakdown) {
18423
+ result.breakdown = {
18424
+ baseScore: page.breakdown.baseScore,
18425
+ incomingLinkBoost: page.breakdown.incomingLinkBoost,
18426
+ depthBoost: page.breakdown.depthBoost,
18427
+ titleMatchBoost: page.breakdown.titleMatchBoost,
18428
+ freshnessBoost: page.breakdown.freshnessBoost,
18429
+ anchorTextMatchBoost: 0
18430
+ };
17722
18431
  }
17723
- };
18432
+ return result;
18433
+ });
17724
18434
  }
17725
- ensureSnippet(hit) {
18435
+ ensureSnippet(hit, query) {
18436
+ const chunkText = hit.hit.metadata.chunkText;
18437
+ if (query && chunkText) return queryAwareExcerpt(chunkText, query);
17726
18438
  const snippet = hit.hit.metadata.snippet;
17727
18439
  if (snippet && snippet.length >= 30) return snippet;
17728
- const chunkText = hit.hit.metadata.chunkText;
17729
18440
  if (chunkText) return toSnippet(chunkText);
17730
18441
  return snippet || "";
17731
18442
  }
17732
- buildResults(ordered, topK, groupByPage, _query) {
18443
+ buildResults(ordered, topK, groupByPage, maxSubResults, query, debug, config) {
18444
+ const cfg = config ?? this.config;
17733
18445
  if (groupByPage) {
17734
- let pages = aggregateByPage(ordered, this.config);
17735
- pages = trimByScoreGap(pages, this.config);
17736
- const minRatio = this.config.ranking.minChunkScoreRatio;
18446
+ let pages = aggregateByPage(ordered, cfg);
18447
+ pages = trimByScoreGap(pages, cfg);
18448
+ const minRatio = cfg.ranking.minChunkScoreRatio;
17737
18449
  return pages.slice(0, topK).map((page) => {
17738
18450
  const bestScore = page.bestChunk.finalScore;
17739
18451
  const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
17740
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
17741
- return {
18452
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, maxSubResults);
18453
+ const result = {
17742
18454
  url: page.url,
17743
18455
  title: page.title,
17744
18456
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
17745
- snippet: this.ensureSnippet(page.bestChunk),
18457
+ snippet: this.ensureSnippet(page.bestChunk, query),
18458
+ chunkText: page.bestChunk.hit.metadata.chunkText || void 0,
17746
18459
  score: Number(page.pageScore.toFixed(6)),
17747
18460
  routeFile: page.routeFile,
17748
- chunks: meaningful.length > 1 ? meaningful.map((c) => ({
18461
+ chunks: meaningful.length >= 1 ? meaningful.map((c) => ({
17749
18462
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
17750
- snippet: this.ensureSnippet(c),
18463
+ snippet: this.ensureSnippet(c, query),
18464
+ chunkText: c.hit.metadata.chunkText || void 0,
17751
18465
  headingPath: c.hit.metadata.headingPath,
17752
18466
  score: Number(c.finalScore.toFixed(6))
17753
18467
  })) : void 0
17754
18468
  };
18469
+ if (debug && page.bestChunk.breakdown) {
18470
+ result.breakdown = page.bestChunk.breakdown;
18471
+ }
18472
+ return result;
17755
18473
  });
17756
18474
  } else {
17757
18475
  let filtered = ordered;
17758
- const minScore = this.config.ranking.minScore;
17759
- if (minScore > 0) {
17760
- filtered = ordered.filter((entry) => entry.finalScore >= minScore);
17761
- }
17762
- return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
17763
- url: hit.metadata.url,
17764
- title: hit.metadata.title,
17765
- sectionTitle: hit.metadata.sectionTitle || void 0,
17766
- snippet: this.ensureSnippet({ hit, finalScore }),
17767
- score: Number(finalScore.toFixed(6)),
17768
- routeFile: hit.metadata.routeFile
17769
- }));
18476
+ const minScoreRatio = cfg.ranking.minScoreRatio;
18477
+ if (minScoreRatio > 0 && ordered.length > 0) {
18478
+ const topScore = ordered[0].finalScore;
18479
+ if (Number.isFinite(topScore) && topScore > 0) {
18480
+ const threshold = topScore * minScoreRatio;
18481
+ filtered = ordered.filter((entry) => entry.finalScore >= threshold);
18482
+ }
18483
+ }
18484
+ return filtered.slice(0, topK).map(({ hit, finalScore, breakdown }) => {
18485
+ const result = {
18486
+ url: hit.metadata.url,
18487
+ title: hit.metadata.title,
18488
+ sectionTitle: hit.metadata.sectionTitle || void 0,
18489
+ snippet: this.ensureSnippet({ hit, finalScore }, query),
18490
+ chunkText: hit.metadata.chunkText || void 0,
18491
+ score: Number(finalScore.toFixed(6)),
18492
+ routeFile: hit.metadata.routeFile
18493
+ };
18494
+ if (debug && breakdown) {
18495
+ result.breakdown = breakdown;
18496
+ }
18497
+ return result;
18498
+ });
17770
18499
  }
17771
18500
  }
17772
18501
  async getPage(pathOrUrl, scope) {
@@ -17792,6 +18521,116 @@ var SearchEngine = class _SearchEngine {
17792
18521
  markdown: page.markdown
17793
18522
  };
17794
18523
  }
18524
+ async listPages(opts) {
18525
+ const resolvedScope = resolveScope(this.config, opts?.scope);
18526
+ const pathPrefix = opts?.pathPrefix ? opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}` : void 0;
18527
+ return this.store.listPages(resolvedScope, {
18528
+ cursor: opts?.cursor,
18529
+ limit: opts?.limit,
18530
+ pathPrefix
18531
+ });
18532
+ }
18533
+ async getSiteStructure(opts) {
18534
+ const maxPages = Math.min(opts?.maxPages ?? MAX_SITE_STRUCTURE_PAGES, MAX_SITE_STRUCTURE_PAGES);
18535
+ const allPages = [];
18536
+ let cursor;
18537
+ let truncated = false;
18538
+ do {
18539
+ const result = await this.listPages({
18540
+ pathPrefix: opts?.pathPrefix,
18541
+ scope: opts?.scope,
18542
+ cursor,
18543
+ limit: 200
18544
+ });
18545
+ allPages.push(...result.pages);
18546
+ cursor = result.nextCursor;
18547
+ if (allPages.length >= maxPages) {
18548
+ truncated = allPages.length > maxPages || !!cursor;
18549
+ allPages.length = maxPages;
18550
+ break;
18551
+ }
18552
+ } while (cursor);
18553
+ const root2 = buildTree(allPages, opts?.pathPrefix);
18554
+ return {
18555
+ root: root2,
18556
+ totalPages: allPages.length,
18557
+ truncated
18558
+ };
18559
+ }
18560
+ async getRelatedPages(pathOrUrl, opts) {
18561
+ const resolvedScope = resolveScope(this.config, opts?.scope);
18562
+ const urlPath = this.resolveInputPath(pathOrUrl);
18563
+ const topK = Math.min(opts?.topK ?? 10, 25);
18564
+ const source = await this.store.fetchPageWithVector(urlPath, resolvedScope);
18565
+ if (!source) {
18566
+ throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
18567
+ }
18568
+ const sourceOutgoing = new Set(source.metadata.outgoingLinkUrls ?? []);
18569
+ const semanticHits = await this.store.searchPagesByVector(
18570
+ source.vector,
18571
+ { limit: 50 },
18572
+ resolvedScope
18573
+ );
18574
+ const filteredHits = semanticHits.filter((h) => h.url !== urlPath);
18575
+ const semanticScoreMap = /* @__PURE__ */ new Map();
18576
+ for (const hit of filteredHits) {
18577
+ semanticScoreMap.set(hit.url, hit.score);
18578
+ }
18579
+ const candidateUrls = /* @__PURE__ */ new Set();
18580
+ for (const hit of filteredHits) {
18581
+ candidateUrls.add(hit.url);
18582
+ }
18583
+ for (const url of sourceOutgoing) {
18584
+ if (url !== urlPath) candidateUrls.add(url);
18585
+ }
18586
+ const missingUrls = [...sourceOutgoing].filter(
18587
+ (u) => u !== urlPath && !semanticScoreMap.has(u)
18588
+ );
18589
+ const fetchedPages = missingUrls.length > 0 ? await this.store.fetchPagesBatch(missingUrls, resolvedScope) : [];
18590
+ const metaMap = /* @__PURE__ */ new Map();
18591
+ for (const hit of filteredHits) {
18592
+ metaMap.set(hit.url, { title: hit.title, routeFile: hit.routeFile, outgoingLinkUrls: [] });
18593
+ }
18594
+ for (const p of fetchedPages) {
18595
+ metaMap.set(p.url, { title: p.title, routeFile: p.routeFile, outgoingLinkUrls: p.outgoingLinkUrls });
18596
+ }
18597
+ const semanticUrls = filteredHits.map((h) => h.url);
18598
+ if (semanticUrls.length > 0) {
18599
+ const semanticPageData = await this.store.fetchPagesBatch(semanticUrls, resolvedScope);
18600
+ for (const p of semanticPageData) {
18601
+ const existing = metaMap.get(p.url);
18602
+ if (existing) {
18603
+ existing.outgoingLinkUrls = p.outgoingLinkUrls;
18604
+ }
18605
+ }
18606
+ }
18607
+ const candidates = [];
18608
+ for (const url of candidateUrls) {
18609
+ const meta = metaMap.get(url);
18610
+ if (!meta) continue;
18611
+ const isOutgoing = sourceOutgoing.has(url);
18612
+ const isIncoming = meta.outgoingLinkUrls.includes(urlPath);
18613
+ const isLinked = isOutgoing || isIncoming;
18614
+ const dice = diceScore(urlPath, url);
18615
+ const semantic = semanticScoreMap.get(url) ?? 0;
18616
+ const score = compositeScore(isLinked, dice, semantic);
18617
+ const relationshipType = dominantRelationshipType(isOutgoing, isIncoming, dice);
18618
+ candidates.push({
18619
+ url,
18620
+ title: meta.title,
18621
+ score: Number(score.toFixed(6)),
18622
+ relationshipType,
18623
+ routeFile: meta.routeFile
18624
+ });
18625
+ }
18626
+ candidates.sort((a, b) => b.score - a.score);
18627
+ const results = candidates.slice(0, topK);
18628
+ return {
18629
+ sourceUrl: urlPath,
18630
+ scope: resolvedScope.scopeName,
18631
+ relatedPages: results
18632
+ };
18633
+ }
17795
18634
  async health() {
17796
18635
  return this.store.health();
17797
18636
  }
@@ -17807,6 +18646,215 @@ var SearchEngine = class _SearchEngine {
17807
18646
  }
17808
18647
  };
17809
18648
 
18649
+ // src/mcp/server.ts
18650
+ function createServer(engine) {
18651
+ const server = new McpServer({
18652
+ name: "searchsocket-mcp",
18653
+ version: "0.1.0"
18654
+ });
18655
+ server.registerTool(
18656
+ "search",
18657
+ {
18658
+ description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
18659
+ inputSchema: {
18660
+ query: z.string().min(1),
18661
+ scope: z.string().optional(),
18662
+ topK: z.number().int().positive().max(100).optional(),
18663
+ pathPrefix: z.string().optional(),
18664
+ tags: z.array(z.string()).optional(),
18665
+ filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
18666
+ groupBy: z.enum(["page", "chunk"]).optional(),
18667
+ maxSubResults: z.number().int().positive().max(20).optional()
18668
+ },
18669
+ outputSchema: {
18670
+ q: z.string(),
18671
+ scope: z.string(),
18672
+ results: z.array(z.object({
18673
+ url: z.string(),
18674
+ title: z.string(),
18675
+ sectionTitle: z.string().optional(),
18676
+ snippet: z.string(),
18677
+ score: z.number(),
18678
+ routeFile: z.string(),
18679
+ chunks: z.array(z.object({
18680
+ sectionTitle: z.string().optional(),
18681
+ snippet: z.string(),
18682
+ headingPath: z.array(z.string()),
18683
+ score: z.number()
18684
+ })).optional()
18685
+ })),
18686
+ meta: z.object({
18687
+ timingsMs: z.object({
18688
+ search: z.number(),
18689
+ total: z.number()
18690
+ })
18691
+ })
18692
+ }
18693
+ },
18694
+ async (input) => {
18695
+ const result = await engine.search({
18696
+ q: input.query,
18697
+ topK: input.topK,
18698
+ scope: input.scope,
18699
+ pathPrefix: input.pathPrefix,
18700
+ tags: input.tags,
18701
+ filters: input.filters,
18702
+ groupBy: input.groupBy,
18703
+ maxSubResults: input.maxSubResults
18704
+ });
18705
+ return {
18706
+ content: [
18707
+ {
18708
+ type: "text",
18709
+ text: JSON.stringify(result, null, 2)
18710
+ }
18711
+ ],
18712
+ structuredContent: result
18713
+ };
18714
+ }
18715
+ );
18716
+ server.registerTool(
18717
+ "get_page",
18718
+ {
18719
+ description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
18720
+ inputSchema: {
18721
+ pathOrUrl: z.string().min(1),
18722
+ scope: z.string().optional()
18723
+ }
18724
+ },
18725
+ async (input) => {
18726
+ const page = await engine.getPage(input.pathOrUrl, input.scope);
18727
+ return {
18728
+ content: [
18729
+ {
18730
+ type: "text",
18731
+ text: JSON.stringify(page, null, 2)
18732
+ }
18733
+ ]
18734
+ };
18735
+ }
18736
+ );
18737
+ server.registerTool(
18738
+ "list_pages",
18739
+ {
18740
+ description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
18741
+ inputSchema: {
18742
+ pathPrefix: z.string().optional(),
18743
+ cursor: z.string().optional(),
18744
+ limit: z.number().int().positive().max(200).optional(),
18745
+ scope: z.string().optional()
18746
+ }
18747
+ },
18748
+ async (input) => {
18749
+ const result = await engine.listPages({
18750
+ pathPrefix: input.pathPrefix,
18751
+ cursor: input.cursor,
18752
+ limit: input.limit,
18753
+ scope: input.scope
18754
+ });
18755
+ return {
18756
+ content: [
18757
+ {
18758
+ type: "text",
18759
+ text: JSON.stringify(result, null, 2)
18760
+ }
18761
+ ]
18762
+ };
18763
+ }
18764
+ );
18765
+ server.registerTool(
18766
+ "get_site_structure",
18767
+ {
18768
+ description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
18769
+ inputSchema: {
18770
+ pathPrefix: z.string().optional(),
18771
+ scope: z.string().optional(),
18772
+ maxPages: z.number().int().positive().max(2e3).optional()
18773
+ }
18774
+ },
18775
+ async (input) => {
18776
+ const result = await engine.getSiteStructure({
18777
+ pathPrefix: input.pathPrefix,
18778
+ scope: input.scope,
18779
+ maxPages: input.maxPages
18780
+ });
18781
+ return {
18782
+ content: [
18783
+ {
18784
+ type: "text",
18785
+ text: JSON.stringify(result, null, 2)
18786
+ }
18787
+ ]
18788
+ };
18789
+ }
18790
+ );
18791
+ server.registerTool(
18792
+ "find_source_file",
18793
+ {
18794
+ description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
18795
+ inputSchema: {
18796
+ query: z.string().min(1),
18797
+ scope: z.string().optional()
18798
+ }
18799
+ },
18800
+ async (input) => {
18801
+ const result = await engine.search({
18802
+ q: input.query,
18803
+ topK: 1,
18804
+ scope: input.scope
18805
+ });
18806
+ if (result.results.length === 0) {
18807
+ return {
18808
+ content: [
18809
+ {
18810
+ type: "text",
18811
+ text: JSON.stringify({
18812
+ error: "No matching content found for the given query."
18813
+ })
18814
+ }
18815
+ ]
18816
+ };
18817
+ }
18818
+ const match = result.results[0];
18819
+ const { url, routeFile, sectionTitle, snippet } = match;
18820
+ return {
18821
+ content: [
18822
+ {
18823
+ type: "text",
18824
+ text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
18825
+ }
18826
+ ]
18827
+ };
18828
+ }
18829
+ );
18830
+ server.registerTool(
18831
+ "get_related_pages",
18832
+ {
18833
+ description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
18834
+ inputSchema: {
18835
+ pathOrUrl: z.string().min(1),
18836
+ scope: z.string().optional(),
18837
+ topK: z.number().int().positive().max(25).optional()
18838
+ }
18839
+ },
18840
+ async (input) => {
18841
+ const result = await engine.getRelatedPages(input.pathOrUrl, {
18842
+ topK: input.topK,
18843
+ scope: input.scope
18844
+ });
18845
+ return {
18846
+ content: [
18847
+ {
18848
+ type: "text",
18849
+ text: JSON.stringify(result, null, 2)
18850
+ }
18851
+ ]
18852
+ };
18853
+ }
18854
+ );
18855
+ return server;
18856
+ }
18857
+
17810
18858
  // src/sveltekit/handle.ts
17811
18859
  var InMemoryRateLimiter = class {
17812
18860
  constructor(windowMs, max) {
@@ -17835,7 +18883,13 @@ function searchsocketHandle(options = {}) {
17835
18883
  let enginePromise = null;
17836
18884
  let configPromise = null;
17837
18885
  let apiPath = options.path;
18886
+ let llmsServePath = null;
18887
+ let serveMarkdownVariants = false;
18888
+ let mcpPath;
18889
+ let mcpApiKey;
18890
+ let mcpEnableJsonResponse = true;
17838
18891
  let rateLimiter = null;
18892
+ let notConfigured = false;
17839
18893
  const getConfig = async () => {
17840
18894
  if (!configPromise) {
17841
18895
  let configP;
@@ -17852,6 +18906,13 @@ function searchsocketHandle(options = {}) {
17852
18906
  }
17853
18907
  configPromise = configP.then((config) => {
17854
18908
  apiPath = apiPath ?? config.api.path;
18909
+ mcpPath = config.mcp.handle.path;
18910
+ mcpApiKey = config.mcp.handle.apiKey;
18911
+ mcpEnableJsonResponse = config.mcp.handle.enableJsonResponse;
18912
+ if (config.llmsTxt.enable) {
18913
+ llmsServePath = "/" + config.llmsTxt.outputPath.replace(/^static\//, "");
18914
+ serveMarkdownVariants = config.llmsTxt.serveMarkdownVariants;
18915
+ }
17855
18916
  if (config.api.rateLimit && !isServerless()) {
17856
18917
  rateLimiter = new InMemoryRateLimiter(config.api.rateLimit.windowMs, config.api.rateLimit.max);
17857
18918
  }
@@ -17861,59 +18922,109 @@ function searchsocketHandle(options = {}) {
17861
18922
  return configPromise;
17862
18923
  };
17863
18924
  const getEngine = async () => {
18925
+ if (notConfigured) {
18926
+ throw new SearchSocketError(
18927
+ "SEARCH_NOT_CONFIGURED",
18928
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
18929
+ 503
18930
+ );
18931
+ }
17864
18932
  if (!enginePromise) {
17865
18933
  const config = await getConfig();
17866
18934
  enginePromise = SearchEngine.create({
17867
18935
  cwd: options.cwd,
17868
18936
  config
18937
+ }).catch((error) => {
18938
+ enginePromise = null;
18939
+ if (error instanceof SearchSocketError && error.code === "VECTOR_BACKEND_UNAVAILABLE") {
18940
+ notConfigured = true;
18941
+ throw new SearchSocketError(
18942
+ "SEARCH_NOT_CONFIGURED",
18943
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
18944
+ 503
18945
+ );
18946
+ }
18947
+ throw error;
17869
18948
  });
17870
18949
  }
17871
18950
  return enginePromise;
17872
18951
  };
17873
18952
  const bodyLimit = options.maxBodyBytes ?? 64 * 1024;
17874
18953
  return async ({ event, resolve }) => {
17875
- if (apiPath && event.url.pathname !== apiPath) {
17876
- return resolve(event);
18954
+ if (apiPath && !isApiPath(event.url.pathname, apiPath) && event.url.pathname !== llmsServePath) {
18955
+ const isMarkdownVariant = event.request.method === "GET" && event.url.pathname.endsWith(".md");
18956
+ if (mcpPath && event.url.pathname === mcpPath) {
18957
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
18958
+ }
18959
+ if (mcpPath) {
18960
+ if (serveMarkdownVariants && isMarkdownVariant) ; else {
18961
+ return resolve(event);
18962
+ }
18963
+ } else {
18964
+ if (configPromise || options.config || options.rawConfig) {
18965
+ await getConfig();
18966
+ if (mcpPath && event.url.pathname === mcpPath) {
18967
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
18968
+ }
18969
+ if (!(serveMarkdownVariants && isMarkdownVariant)) {
18970
+ return resolve(event);
18971
+ }
18972
+ } else {
18973
+ return resolve(event);
18974
+ }
18975
+ }
17877
18976
  }
17878
18977
  const config = await getConfig();
18978
+ if (llmsServePath && event.request.method === "GET" && event.url.pathname === llmsServePath) {
18979
+ const cwd = options.cwd ?? process.cwd();
18980
+ const filePath = path.resolve(cwd, config.llmsTxt.outputPath);
18981
+ try {
18982
+ const content = await fs9.readFile(filePath, "utf8");
18983
+ return new Response(content, {
18984
+ status: 200,
18985
+ headers: { "content-type": "text/plain; charset=utf-8" }
18986
+ });
18987
+ } catch {
18988
+ return resolve(event);
18989
+ }
18990
+ }
18991
+ if (serveMarkdownVariants && event.request.method === "GET" && event.url.pathname.endsWith(".md")) {
18992
+ let rawPath;
18993
+ try {
18994
+ rawPath = decodeURIComponent(event.url.pathname.slice(0, -3));
18995
+ } catch {
18996
+ return resolve(event);
18997
+ }
18998
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
18999
+ try {
19000
+ const engine = await getEngine();
19001
+ const page = await engine.getPage(rawPath, scope);
19002
+ return new Response(page.markdown, {
19003
+ status: 200,
19004
+ headers: { "content-type": "text/markdown; charset=utf-8" }
19005
+ });
19006
+ } catch (error) {
19007
+ if (error instanceof SearchSocketError && error.status === 404) {
19008
+ return resolve(event);
19009
+ }
19010
+ throw error;
19011
+ }
19012
+ }
19013
+ if (mcpPath && event.url.pathname === mcpPath) {
19014
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
19015
+ }
17879
19016
  const targetPath = apiPath ?? config.api.path;
17880
- if (event.url.pathname !== targetPath) {
19017
+ if (!isApiPath(event.url.pathname, targetPath)) {
17881
19018
  return resolve(event);
17882
19019
  }
17883
- if (event.request.method === "OPTIONS") {
19020
+ const subPath = event.url.pathname.slice(targetPath.length);
19021
+ const method = event.request.method;
19022
+ if (method === "OPTIONS") {
17884
19023
  return new Response(null, {
17885
19024
  status: 204,
17886
19025
  headers: buildCorsHeaders(event.request, config)
17887
19026
  });
17888
19027
  }
17889
- if (event.request.method !== "POST") {
17890
- return withCors(
17891
- new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
17892
- status: 405,
17893
- headers: {
17894
- "content-type": "application/json"
17895
- }
17896
- }),
17897
- event.request,
17898
- config
17899
- );
17900
- }
17901
- const contentLength = Number(event.request.headers.get("content-length") ?? 0);
17902
- if (contentLength > bodyLimit) {
17903
- return withCors(
17904
- new Response(
17905
- JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Request body too large", 413))),
17906
- {
17907
- status: 413,
17908
- headers: {
17909
- "content-type": "application/json"
17910
- }
17911
- }
17912
- ),
17913
- event.request,
17914
- config
17915
- );
17916
- }
17917
19028
  if (rateLimiter) {
17918
19029
  const ip = event.getClientAddress?.() ?? event.request.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? "unknown";
17919
19030
  if (!rateLimiter.check(ip)) {
@@ -17933,39 +19044,32 @@ function searchsocketHandle(options = {}) {
17933
19044
  }
17934
19045
  }
17935
19046
  try {
17936
- let rawBody;
17937
- if (typeof event.request.text === "function") {
17938
- rawBody = await event.request.text();
17939
- } else {
17940
- let parsedFallback;
17941
- try {
17942
- parsedFallback = await event.request.json();
17943
- } catch (error) {
17944
- if (error instanceof SyntaxError) {
17945
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
17946
- }
17947
- throw error;
19047
+ if (method === "GET") {
19048
+ if (subPath === "" || subPath === "/") {
19049
+ return await handleGetSearch(event, config, getEngine);
17948
19050
  }
17949
- rawBody = JSON.stringify(parsedFallback);
17950
- }
17951
- if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
17952
- throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
19051
+ if (subPath === "/health") {
19052
+ return await handleGetHealth(event, config, getEngine);
19053
+ }
19054
+ if (subPath.startsWith("/pages/")) {
19055
+ return await handleGetPage(event, config, getEngine, subPath);
19056
+ }
19057
+ return withCors(
19058
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Not found", 404))), {
19059
+ status: 404,
19060
+ headers: { "content-type": "application/json" }
19061
+ }),
19062
+ event.request,
19063
+ config
19064
+ );
17953
19065
  }
17954
- let body;
17955
- try {
17956
- body = JSON.parse(rawBody);
17957
- } catch {
17958
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
19066
+ if (method === "POST" && (subPath === "" || subPath === "/")) {
19067
+ return await handlePostSearch(event, config, getEngine, bodyLimit);
17959
19068
  }
17960
- const engine = await getEngine();
17961
- const searchRequest = body;
17962
- const result = await engine.search(searchRequest);
17963
19069
  return withCors(
17964
- new Response(JSON.stringify(result), {
17965
- status: 200,
17966
- headers: {
17967
- "content-type": "application/json"
17968
- }
19070
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
19071
+ status: 405,
19072
+ headers: { "content-type": "application/json" }
17969
19073
  }),
17970
19074
  event.request,
17971
19075
  config
@@ -17986,6 +19090,183 @@ function searchsocketHandle(options = {}) {
17986
19090
  }
17987
19091
  };
17988
19092
  }
19093
+ function isApiPath(pathname, apiPath) {
19094
+ return pathname === apiPath || pathname.startsWith(apiPath + "/");
19095
+ }
19096
+ async function handleGetSearch(event, config, getEngine) {
19097
+ const params = event.url.searchParams;
19098
+ const q = params.get("q");
19099
+ if (!q || q.trim() === "") {
19100
+ throw new SearchSocketError("INVALID_REQUEST", "Missing required query parameter: q", 400);
19101
+ }
19102
+ const searchRequest = { q };
19103
+ const topK = params.get("topK");
19104
+ if (topK !== null) {
19105
+ const parsed = Number.parseInt(topK, 10);
19106
+ if (Number.isNaN(parsed) || parsed < 1) {
19107
+ throw new SearchSocketError("INVALID_REQUEST", "topK must be a positive integer", 400);
19108
+ }
19109
+ searchRequest.topK = parsed;
19110
+ }
19111
+ const scope = params.get("scope");
19112
+ if (scope !== null) searchRequest.scope = scope;
19113
+ const pathPrefix = params.get("pathPrefix");
19114
+ if (pathPrefix !== null) searchRequest.pathPrefix = pathPrefix;
19115
+ const groupBy = params.get("groupBy");
19116
+ if (groupBy) {
19117
+ if (groupBy !== "page" && groupBy !== "chunk") {
19118
+ throw new SearchSocketError("INVALID_REQUEST", 'groupBy must be "page" or "chunk"', 400);
19119
+ }
19120
+ searchRequest.groupBy = groupBy;
19121
+ }
19122
+ const maxSubResults = params.get("maxSubResults");
19123
+ if (maxSubResults !== null) {
19124
+ const parsed = Number.parseInt(maxSubResults, 10);
19125
+ if (Number.isNaN(parsed) || parsed < 1 || parsed > 20) {
19126
+ throw new SearchSocketError("INVALID_REQUEST", "maxSubResults must be a positive integer between 1 and 20", 400);
19127
+ }
19128
+ searchRequest.maxSubResults = parsed;
19129
+ }
19130
+ const tags = params.getAll("tags");
19131
+ if (tags.length > 0) searchRequest.tags = tags;
19132
+ const engine = await getEngine();
19133
+ const result = await engine.search(searchRequest);
19134
+ return withCors(
19135
+ new Response(JSON.stringify(result), {
19136
+ status: 200,
19137
+ headers: { "content-type": "application/json" }
19138
+ }),
19139
+ event.request,
19140
+ config
19141
+ );
19142
+ }
19143
+ async function handleGetHealth(event, config, getEngine) {
19144
+ const engine = await getEngine();
19145
+ const result = await engine.health();
19146
+ return withCors(
19147
+ new Response(JSON.stringify(result), {
19148
+ status: 200,
19149
+ headers: { "content-type": "application/json" }
19150
+ }),
19151
+ event.request,
19152
+ config
19153
+ );
19154
+ }
19155
+ async function handleGetPage(event, config, getEngine, subPath) {
19156
+ const rawPath = subPath.slice("/pages".length);
19157
+ let pagePath;
19158
+ try {
19159
+ pagePath = decodeURIComponent(rawPath);
19160
+ } catch {
19161
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed page path", 400);
19162
+ }
19163
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
19164
+ const engine = await getEngine();
19165
+ const result = await engine.getPage(pagePath, scope);
19166
+ return withCors(
19167
+ new Response(JSON.stringify(result), {
19168
+ status: 200,
19169
+ headers: { "content-type": "application/json" }
19170
+ }),
19171
+ event.request,
19172
+ config
19173
+ );
19174
+ }
19175
+ async function handlePostSearch(event, config, getEngine, bodyLimit) {
19176
+ const contentLength = Number(event.request.headers.get("content-length") ?? 0);
19177
+ if (contentLength > bodyLimit) {
19178
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
19179
+ }
19180
+ let rawBody;
19181
+ if (typeof event.request.text === "function") {
19182
+ rawBody = await event.request.text();
19183
+ } else {
19184
+ let parsedFallback;
19185
+ try {
19186
+ parsedFallback = await event.request.json();
19187
+ } catch (error) {
19188
+ if (error instanceof SyntaxError) {
19189
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
19190
+ }
19191
+ throw error;
19192
+ }
19193
+ rawBody = JSON.stringify(parsedFallback);
19194
+ }
19195
+ if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
19196
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
19197
+ }
19198
+ let body;
19199
+ try {
19200
+ body = JSON.parse(rawBody);
19201
+ } catch {
19202
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
19203
+ }
19204
+ const engine = await getEngine();
19205
+ const searchRequest = body;
19206
+ const result = await engine.search(searchRequest);
19207
+ return withCors(
19208
+ new Response(JSON.stringify(result), {
19209
+ status: 200,
19210
+ headers: { "content-type": "application/json" }
19211
+ }),
19212
+ event.request,
19213
+ config
19214
+ );
19215
+ }
19216
+ async function handleMcpRequest(event, apiKey, enableJsonResponse, getEngine) {
19217
+ if (apiKey) {
19218
+ const authHeader = event.request.headers.get("authorization") ?? "";
19219
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
19220
+ const tokenBuf = Buffer.from(token);
19221
+ const keyBuf = Buffer.from(apiKey);
19222
+ if (tokenBuf.length !== keyBuf.length || !timingSafeEqual(tokenBuf, keyBuf)) {
19223
+ return new Response(
19224
+ JSON.stringify({
19225
+ jsonrpc: "2.0",
19226
+ error: { code: -32001, message: "Unauthorized" },
19227
+ id: null
19228
+ }),
19229
+ { status: 401, headers: { "content-type": "application/json" } }
19230
+ );
19231
+ }
19232
+ }
19233
+ const transport = new WebStandardStreamableHTTPServerTransport({
19234
+ sessionIdGenerator: void 0,
19235
+ enableJsonResponse
19236
+ });
19237
+ let server;
19238
+ try {
19239
+ const engine = await getEngine();
19240
+ server = createServer(engine);
19241
+ await server.connect(transport);
19242
+ const response = await transport.handleRequest(event.request);
19243
+ if (enableJsonResponse) {
19244
+ await transport.close();
19245
+ await server.close();
19246
+ }
19247
+ return response;
19248
+ } catch (error) {
19249
+ try {
19250
+ await transport.close();
19251
+ } catch {
19252
+ }
19253
+ try {
19254
+ await server?.close();
19255
+ } catch {
19256
+ }
19257
+ return new Response(
19258
+ JSON.stringify({
19259
+ jsonrpc: "2.0",
19260
+ error: {
19261
+ code: -32603,
19262
+ message: error instanceof Error ? error.message : "Internal server error"
19263
+ },
19264
+ id: null
19265
+ }),
19266
+ { status: 500, headers: { "content-type": "application/json" } }
19267
+ );
19268
+ }
19269
+ }
17989
19270
  function buildCorsHeaders(request, config) {
17990
19271
  const allowOrigins = config.api.cors.allowOrigins;
17991
19272
  if (!allowOrigins || allowOrigins.length === 0) {
@@ -17998,7 +19279,7 @@ function buildCorsHeaders(request, config) {
17998
19279
  }
17999
19280
  return {
18000
19281
  "access-control-allow-origin": allowOrigins.includes("*") ? "*" : origin,
18001
- "access-control-allow-methods": "POST, OPTIONS",
19282
+ "access-control-allow-methods": "GET, POST, OPTIONS",
18002
19283
  "access-control-allow-headers": "content-type"
18003
19284
  };
18004
19285
  }
@@ -18045,6 +19326,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
18045
19326
  if (normalizeText(current.text)) {
18046
19327
  sections.push({
18047
19328
  sectionTitle: current.sectionTitle,
19329
+ headingLevel: current.headingLevel,
18048
19330
  headingPath: current.headingPath,
18049
19331
  text: current.text.trim()
18050
19332
  });
@@ -18063,6 +19345,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
18063
19345
  headingStack.length = level;
18064
19346
  current = {
18065
19347
  sectionTitle: title,
19348
+ headingLevel: level,
18066
19349
  headingPath: headingStack.filter((entry) => Boolean(entry)).slice(0, headingPathDepth),
18067
19350
  text: `${line}
18068
19351
  `
@@ -18198,6 +19481,7 @@ function splitSection(section, config) {
18198
19481
  return [
18199
19482
  {
18200
19483
  sectionTitle: section.sectionTitle,
19484
+ headingLevel: section.headingLevel,
18201
19485
  headingPath: section.headingPath,
18202
19486
  chunkText: text
18203
19487
  }
@@ -18248,6 +19532,7 @@ ${chunk}`;
18248
19532
  }
18249
19533
  return merged.map((chunkText) => ({
18250
19534
  sectionTitle: section.sectionTitle,
19535
+ headingLevel: section.headingLevel,
18251
19536
  headingPath: section.headingPath,
18252
19537
  chunkText
18253
19538
  }));
@@ -18263,6 +19548,18 @@ function buildSummaryChunkText(page) {
18263
19548
  }
18264
19549
  return parts.join("\n\n");
18265
19550
  }
19551
+ function buildEmbeddingTitle(chunk) {
19552
+ if (!chunk.sectionTitle || chunk.headingLevel === void 0) return void 0;
19553
+ if (chunk.headingPath.length > 1) {
19554
+ const path14 = chunk.headingPath.join(" > ");
19555
+ const lastInPath = chunk.headingPath[chunk.headingPath.length - 1];
19556
+ if (lastInPath !== chunk.sectionTitle) {
19557
+ return `${chunk.title} \u2014 ${path14} > ${chunk.sectionTitle}`;
19558
+ }
19559
+ return `${chunk.title} \u2014 ${path14}`;
19560
+ }
19561
+ return `${chunk.title} \u2014 ${chunk.sectionTitle}`;
19562
+ }
18266
19563
  function buildEmbeddingText(chunk, prependTitle) {
18267
19564
  if (!prependTitle) return chunk.chunkText;
18268
19565
  const prefix = chunk.sectionTitle ? `${chunk.title} \u2014 ${chunk.sectionTitle}` : chunk.title;
@@ -18293,10 +19590,14 @@ function chunkPage(page, config, scope) {
18293
19590
  tags: page.tags,
18294
19591
  contentHash: "",
18295
19592
  description: page.description,
18296
- keywords: page.keywords
19593
+ keywords: page.keywords,
19594
+ publishedAt: page.publishedAt,
19595
+ incomingAnchorText: page.incomingAnchorText,
19596
+ meta: page.meta
18297
19597
  };
18298
19598
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
18299
- summaryChunk.contentHash = sha256(normalizeText(embeddingText));
19599
+ const metaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
19600
+ summaryChunk.contentHash = sha256(normalizeText(embeddingText) + metaSuffix);
18300
19601
  chunks.push(summaryChunk);
18301
19602
  }
18302
19603
  const ordinalOffset = config.chunking.pageSummaryChunk ? 1 : 0;
@@ -18313,6 +19614,7 @@ function chunkPage(page, config, scope) {
18313
19614
  path: page.url,
18314
19615
  title: page.title,
18315
19616
  sectionTitle: entry.sectionTitle,
19617
+ headingLevel: entry.headingLevel,
18316
19618
  headingPath: entry.headingPath,
18317
19619
  chunkText: entry.chunkText,
18318
19620
  snippet: toSnippet(entry.chunkText),
@@ -18322,10 +19624,16 @@ function chunkPage(page, config, scope) {
18322
19624
  tags: page.tags,
18323
19625
  contentHash: "",
18324
19626
  description: page.description,
18325
- keywords: page.keywords
19627
+ keywords: page.keywords,
19628
+ publishedAt: page.publishedAt,
19629
+ incomingAnchorText: page.incomingAnchorText,
19630
+ meta: page.meta
18326
19631
  };
18327
19632
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
18328
- chunk.contentHash = sha256(normalizeText(embeddingText));
19633
+ const embeddingTitle = config.chunking.weightHeadings ? buildEmbeddingTitle(chunk) : void 0;
19634
+ const chunkMetaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
19635
+ const hashInput = embeddingTitle ? `${normalizeText(embeddingText)}|title:${embeddingTitle}` : normalizeText(embeddingText);
19636
+ chunk.contentHash = sha256(hashInput + chunkMetaSuffix);
18329
19637
  chunks.push(chunk);
18330
19638
  }
18331
19639
  return chunks;
@@ -19158,6 +20466,69 @@ function gfm(turndownService) {
19158
20466
  }
19159
20467
 
19160
20468
  // src/indexing/extractor.ts
20469
+ function normalizeDateToMs(value) {
20470
+ if (value == null) return void 0;
20471
+ if (value instanceof Date) {
20472
+ const ts = value.getTime();
20473
+ return Number.isFinite(ts) ? ts : void 0;
20474
+ }
20475
+ if (typeof value === "string") {
20476
+ const ts = new Date(value).getTime();
20477
+ return Number.isFinite(ts) ? ts : void 0;
20478
+ }
20479
+ if (typeof value === "number") {
20480
+ return Number.isFinite(value) ? value : void 0;
20481
+ }
20482
+ return void 0;
20483
+ }
20484
+ var FRONTMATTER_DATE_FIELDS = ["date", "publishedAt", "updatedAt", "published_at", "updated_at"];
20485
+ function extractPublishedAtFromFrontmatter(data) {
20486
+ for (const field of FRONTMATTER_DATE_FIELDS) {
20487
+ const val = normalizeDateToMs(data[field]);
20488
+ if (val !== void 0) return val;
20489
+ }
20490
+ return void 0;
20491
+ }
20492
+ function extractPublishedAtFromHtml($) {
20493
+ const jsonLdScripts = $('script[type="application/ld+json"]');
20494
+ for (let i = 0; i < jsonLdScripts.length; i++) {
20495
+ try {
20496
+ const raw = $(jsonLdScripts[i]).html();
20497
+ if (!raw) continue;
20498
+ const parsed = JSON.parse(raw);
20499
+ const candidates = [];
20500
+ if (Array.isArray(parsed)) {
20501
+ candidates.push(...parsed);
20502
+ } else if (parsed && typeof parsed === "object") {
20503
+ candidates.push(parsed);
20504
+ if (Array.isArray(parsed["@graph"])) {
20505
+ candidates.push(...parsed["@graph"]);
20506
+ }
20507
+ }
20508
+ for (const candidate of candidates) {
20509
+ const val = normalizeDateToMs(candidate.datePublished);
20510
+ if (val !== void 0) return val;
20511
+ }
20512
+ } catch {
20513
+ }
20514
+ }
20515
+ const ogTime = $('meta[property="article:published_time"]').attr("content")?.trim();
20516
+ if (ogTime) {
20517
+ const val = normalizeDateToMs(ogTime);
20518
+ if (val !== void 0) return val;
20519
+ }
20520
+ const itempropDate = $('meta[itemprop="datePublished"]').attr("content")?.trim() || $('time[itemprop="datePublished"]').attr("datetime")?.trim();
20521
+ if (itempropDate) {
20522
+ const val = normalizeDateToMs(itempropDate);
20523
+ if (val !== void 0) return val;
20524
+ }
20525
+ const timeEl = $("time[datetime]").first().attr("datetime")?.trim();
20526
+ if (timeEl) {
20527
+ const val = normalizeDateToMs(timeEl);
20528
+ if (val !== void 0) return val;
20529
+ }
20530
+ return void 0;
20531
+ }
19161
20532
  function hasTopLevelNoindexComment(markdown) {
19162
20533
  const lines = markdown.split(/\r?\n/);
19163
20534
  let inFence = false;
@@ -19173,6 +20544,97 @@ function hasTopLevelNoindexComment(markdown) {
19173
20544
  }
19174
20545
  return false;
19175
20546
  }
20547
+ var GARBAGE_ALT_WORDS = /* @__PURE__ */ new Set([
20548
+ "image",
20549
+ "photo",
20550
+ "picture",
20551
+ "icon",
20552
+ "logo",
20553
+ "banner",
20554
+ "screenshot",
20555
+ "thumbnail",
20556
+ "img",
20557
+ "graphic",
20558
+ "illustration",
20559
+ "spacer",
20560
+ "pixel",
20561
+ "placeholder",
20562
+ "avatar",
20563
+ "background"
20564
+ ]);
20565
+ var IMAGE_EXT_RE = /\.(jpg|jpeg|png|gif|svg|webp|avif|bmp|ico)(\?.*)?$/i;
20566
+ function isMeaningfulAlt(alt) {
20567
+ const trimmed = alt.trim();
20568
+ if (!trimmed || trimmed.length < 5) return false;
20569
+ if (IMAGE_EXT_RE.test(trimmed)) return false;
20570
+ if (GARBAGE_ALT_WORDS.has(trimmed.toLowerCase())) return false;
20571
+ return true;
20572
+ }
20573
+ function resolveImageText(img, $, imageDescAttr) {
20574
+ const imgDesc = img.attr(imageDescAttr)?.trim();
20575
+ if (imgDesc) return imgDesc;
20576
+ const figure = img.closest("figure");
20577
+ if (figure.length) {
20578
+ const figDesc = figure.attr(imageDescAttr)?.trim();
20579
+ if (figDesc) return figDesc;
20580
+ }
20581
+ const alt = img.attr("alt")?.trim() ?? "";
20582
+ const caption = figure.length ? figure.find("figcaption").first().text().trim() : "";
20583
+ if (isMeaningfulAlt(alt) && caption) {
20584
+ return `${alt} \u2014 ${caption}`;
20585
+ }
20586
+ if (isMeaningfulAlt(alt)) {
20587
+ return alt;
20588
+ }
20589
+ if (caption) {
20590
+ return caption;
20591
+ }
20592
+ return null;
20593
+ }
20594
+ var STOP_ANCHORS = /* @__PURE__ */ new Set([
20595
+ "here",
20596
+ "click",
20597
+ "click here",
20598
+ "read more",
20599
+ "link",
20600
+ "this",
20601
+ "more"
20602
+ ]);
20603
+ function normalizeAnchorText(raw) {
20604
+ const normalized = raw.replace(/\s+/g, " ").trim().toLowerCase();
20605
+ if (normalized.length < 3) return "";
20606
+ if (STOP_ANCHORS.has(normalized)) return "";
20607
+ if (normalized.length > 100) return normalized.slice(0, 100);
20608
+ return normalized;
20609
+ }
20610
+ function escapeHtml(text) {
20611
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
20612
+ }
20613
+ function preprocessImages(root2, $, imageDescAttr) {
20614
+ root2.find("picture").each((_i, el) => {
20615
+ const picture = $(el);
20616
+ const img = picture.find("img").first();
20617
+ const parentFigure = picture.closest("figure");
20618
+ const text = img.length ? resolveImageText(img, $, imageDescAttr) : null;
20619
+ if (text) {
20620
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
20621
+ picture.replaceWith(`<span>${escapeHtml(text)}</span>`);
20622
+ } else {
20623
+ picture.remove();
20624
+ }
20625
+ });
20626
+ root2.find("img").each((_i, el) => {
20627
+ const img = $(el);
20628
+ const parentFigure = img.closest("figure");
20629
+ const text = resolveImageText(img, $, imageDescAttr);
20630
+ if (text) {
20631
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
20632
+ img.replaceWith(`<span>${escapeHtml(text)}</span>`);
20633
+ } else {
20634
+ img.remove();
20635
+ }
20636
+ });
20637
+ }
19176
20638
  function extractFromHtml(url, html, config) {
19177
20639
  const $ = load(html);
19178
20640
  const normalizedUrl = normalizeUrlPath(url);
@@ -19198,6 +20660,20 @@ function extractFromHtml(url, html, config) {
19198
20660
  if (weight === 0) {
19199
20661
  return null;
19200
20662
  }
20663
+ if ($('meta[name="searchsocket:noindex"]').attr("content") === "true") {
20664
+ return null;
20665
+ }
20666
+ const RESERVED_META_KEYS = /* @__PURE__ */ new Set(["noindex", "tags"]);
20667
+ const meta = {};
20668
+ $('meta[name^="searchsocket:"]').each((_i, el) => {
20669
+ const name = $(el).attr("name") ?? "";
20670
+ const key = name.slice("searchsocket:".length);
20671
+ if (!key || RESERVED_META_KEYS.has(key) || !validateMetaKey(key)) return;
20672
+ const content = $(el).attr("content") ?? "";
20673
+ const dataType = $(el).attr("data-type") ?? "string";
20674
+ meta[key] = parseMetaValue(content, dataType);
20675
+ });
20676
+ const componentTags = $('meta[name="searchsocket:tags"]').attr("content")?.trim();
19201
20677
  const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
19202
20678
  const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
19203
20679
  const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
@@ -19209,7 +20685,9 @@ function extractFromHtml(url, html, config) {
19209
20685
  root2.find(selector).remove();
19210
20686
  }
19211
20687
  root2.find(`[${config.extract.ignoreAttr}]`).remove();
20688
+ preprocessImages(root2, $, config.extract.imageDescAttr);
19212
20689
  const outgoingLinks = [];
20690
+ const seenLinkKeys = /* @__PURE__ */ new Set();
19213
20691
  root2.find("a[href]").each((_index, node) => {
19214
20692
  const href = $(node).attr("href");
19215
20693
  if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:")) {
@@ -19220,7 +20698,19 @@ function extractFromHtml(url, html, config) {
19220
20698
  if (!["http:", "https:"].includes(parsed.protocol)) {
19221
20699
  return;
19222
20700
  }
19223
- outgoingLinks.push(normalizeUrlPath(parsed.pathname));
20701
+ const url2 = normalizeUrlPath(parsed.pathname);
20702
+ let anchorText = normalizeAnchorText($(node).text());
20703
+ if (!anchorText) {
20704
+ const imgAlt = $(node).find("img").first().attr("alt") ?? "";
20705
+ if (isMeaningfulAlt(imgAlt)) {
20706
+ anchorText = normalizeAnchorText(imgAlt);
20707
+ }
20708
+ }
20709
+ const key = `${url2}|${anchorText}`;
20710
+ if (!seenLinkKeys.has(key)) {
20711
+ seenLinkKeys.add(key);
20712
+ outgoingLinks.push({ url: url2, anchorText });
20713
+ }
19224
20714
  } catch {
19225
20715
  }
19226
20716
  });
@@ -19245,16 +20735,25 @@ function extractFromHtml(url, html, config) {
19245
20735
  return null;
19246
20736
  }
19247
20737
  const tags = normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1);
20738
+ const publishedAt = extractPublishedAtFromHtml($);
20739
+ if (componentTags) {
20740
+ const extraTags = componentTags.split(",").map((t) => t.trim()).filter(Boolean);
20741
+ for (const t of extraTags) {
20742
+ if (!tags.includes(t)) tags.push(t);
20743
+ }
20744
+ }
19248
20745
  return {
19249
20746
  url: normalizeUrlPath(url),
19250
20747
  title,
19251
20748
  markdown,
19252
- outgoingLinks: [...new Set(outgoingLinks)],
20749
+ outgoingLinks,
19253
20750
  noindex: false,
19254
20751
  tags,
19255
20752
  description,
19256
20753
  keywords,
19257
- weight
20754
+ weight,
20755
+ publishedAt,
20756
+ meta: Object.keys(meta).length > 0 ? meta : void 0
19258
20757
  };
19259
20758
  }
19260
20759
  function extractFromMarkdown(url, markdown, title) {
@@ -19275,6 +20774,24 @@ function extractFromMarkdown(url, markdown, title) {
19275
20774
  if (mdWeight === 0) {
19276
20775
  return null;
19277
20776
  }
20777
+ let mdMeta;
20778
+ const rawMeta = searchsocketMeta?.meta;
20779
+ if (rawMeta && typeof rawMeta === "object" && !Array.isArray(rawMeta)) {
20780
+ const metaObj = {};
20781
+ for (const [key, val] of Object.entries(rawMeta)) {
20782
+ if (!validateMetaKey(key)) continue;
20783
+ if (typeof val === "string" || typeof val === "number" || typeof val === "boolean") {
20784
+ metaObj[key] = val;
20785
+ } else if (Array.isArray(val) && val.every((v) => typeof v === "string")) {
20786
+ metaObj[key] = val;
20787
+ } else if (val instanceof Date) {
20788
+ metaObj[key] = val.getTime();
20789
+ }
20790
+ }
20791
+ if (Object.keys(metaObj).length > 0) {
20792
+ mdMeta = metaObj;
20793
+ }
20794
+ }
19278
20795
  const content = parsed.content;
19279
20796
  const normalized = normalizeMarkdown(content);
19280
20797
  if (!normalizeText(normalized)) {
@@ -19289,6 +20806,7 @@ function extractFromMarkdown(url, markdown, title) {
19289
20806
  fmKeywords = frontmatter.keywords.split(",").map((k) => k.trim()).filter(Boolean);
19290
20807
  }
19291
20808
  if (fmKeywords && fmKeywords.length === 0) fmKeywords = void 0;
20809
+ const publishedAt = extractPublishedAtFromFrontmatter(frontmatter);
19292
20810
  return {
19293
20811
  url: normalizeUrlPath(url),
19294
20812
  title: resolvedTitle,
@@ -19298,7 +20816,9 @@ function extractFromMarkdown(url, markdown, title) {
19298
20816
  tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
19299
20817
  description: fmDescription,
19300
20818
  keywords: fmKeywords,
19301
- weight: mdWeight
20819
+ weight: mdWeight,
20820
+ publishedAt,
20821
+ meta: mdMeta
19302
20822
  };
19303
20823
  }
19304
20824
  function segmentToRegex(segment) {
@@ -19461,7 +20981,7 @@ async function parseManifest(cwd, outputDir) {
19461
20981
  const manifestPath = path.resolve(cwd, outputDir, "server", "manifest-full.js");
19462
20982
  let content;
19463
20983
  try {
19464
- content = await fs3.readFile(manifestPath, "utf8");
20984
+ content = await fs9.readFile(manifestPath, "utf8");
19465
20985
  } catch {
19466
20986
  throw new SearchSocketError(
19467
20987
  "BUILD_MANIFEST_NOT_FOUND",
@@ -19772,6 +21292,125 @@ function filePathToUrl(filePath, baseDir) {
19772
21292
  const noExt = relative.replace(/\.md$/i, "").replace(/\/index$/i, "");
19773
21293
  return normalizeUrlPath(noExt || "/");
19774
21294
  }
21295
+ var ROUTE_FILE_RE = /\+(page|layout|error)(@[^.]+)?\.svelte$/;
21296
+ function isSvelteComponentFile(filePath) {
21297
+ if (!filePath.endsWith(".svelte")) return false;
21298
+ return !ROUTE_FILE_RE.test(filePath);
21299
+ }
21300
+ function extractSvelteComponentMeta(source) {
21301
+ const componentMatch = source.match(/<!--\s*@component\s*([\s\S]*?)\s*-->/);
21302
+ const description = componentMatch?.[1]?.trim() || void 0;
21303
+ const propsMatch = source.match(
21304
+ /let\s+\{([\s\S]*?)\}\s*(?::\s*([^=;{][\s\S]*?))?\s*=\s*\$props\(\)/
21305
+ );
21306
+ const props = [];
21307
+ if (propsMatch) {
21308
+ const destructureBlock = propsMatch[1];
21309
+ const typeAnnotation = propsMatch[2]?.trim();
21310
+ let resolvedTypeMap;
21311
+ if (typeAnnotation && /^[A-Z]\w*$/.test(typeAnnotation)) {
21312
+ resolvedTypeMap = resolveTypeReference(source, typeAnnotation);
21313
+ } else if (typeAnnotation && typeAnnotation.startsWith("{")) {
21314
+ resolvedTypeMap = parseInlineTypeAnnotation(typeAnnotation);
21315
+ }
21316
+ const propEntries = splitDestructureBlock(destructureBlock);
21317
+ for (const entry of propEntries) {
21318
+ const trimmed = entry.trim();
21319
+ if (!trimmed || trimmed.startsWith("...")) continue;
21320
+ let propName;
21321
+ let defaultValue;
21322
+ const renameMatch = trimmed.match(/^(\w+)\s*:\s*\w+\s*(?:=\s*([\s\S]+))?$/);
21323
+ if (renameMatch) {
21324
+ propName = renameMatch[1];
21325
+ defaultValue = renameMatch[2]?.trim();
21326
+ } else {
21327
+ const defaultMatch = trimmed.match(/^(\w+)\s*=\s*([\s\S]+)$/);
21328
+ if (defaultMatch) {
21329
+ propName = defaultMatch[1];
21330
+ defaultValue = defaultMatch[2]?.trim();
21331
+ } else {
21332
+ propName = trimmed.match(/^(\w+)/)?.[1] ?? trimmed;
21333
+ }
21334
+ }
21335
+ const propType = resolvedTypeMap?.get(propName);
21336
+ props.push({
21337
+ name: propName,
21338
+ ...propType ? { type: propType } : {},
21339
+ ...defaultValue ? { default: defaultValue } : {}
21340
+ });
21341
+ }
21342
+ }
21343
+ return { description, props };
21344
+ }
21345
+ function splitDestructureBlock(block) {
21346
+ const entries = [];
21347
+ let depth = 0;
21348
+ let current = "";
21349
+ for (const ch of block) {
21350
+ if (ch === "{" || ch === "[" || ch === "(") {
21351
+ depth++;
21352
+ current += ch;
21353
+ } else if (ch === "}" || ch === "]" || ch === ")") {
21354
+ depth--;
21355
+ current += ch;
21356
+ } else if (ch === "," && depth === 0) {
21357
+ entries.push(current);
21358
+ current = "";
21359
+ } else {
21360
+ current += ch;
21361
+ }
21362
+ }
21363
+ if (current.trim()) entries.push(current);
21364
+ return entries;
21365
+ }
21366
+ function resolveTypeReference(source, typeName) {
21367
+ const startRe = new RegExp(`(?:interface\\s+${typeName}\\s*|type\\s+${typeName}\\s*=\\s*)\\{`);
21368
+ const startMatch = source.match(startRe);
21369
+ if (!startMatch || startMatch.index === void 0) return void 0;
21370
+ const bodyStart = startMatch.index + startMatch[0].length;
21371
+ let depth = 1;
21372
+ let i = bodyStart;
21373
+ while (i < source.length && depth > 0) {
21374
+ if (source[i] === "{") depth++;
21375
+ else if (source[i] === "}") depth--;
21376
+ i++;
21377
+ }
21378
+ if (depth !== 0) return void 0;
21379
+ const body = source.slice(bodyStart, i - 1);
21380
+ return parseTypeMembers(body);
21381
+ }
21382
+ function parseInlineTypeAnnotation(annotation) {
21383
+ const inner = annotation.replace(/^\{/, "").replace(/\}$/, "");
21384
+ return parseTypeMembers(inner);
21385
+ }
21386
+ function parseTypeMembers(body) {
21387
+ const map = /* @__PURE__ */ new Map();
21388
+ const members = body.split(/[;\n]/).map((m) => m.trim()).filter(Boolean);
21389
+ for (const member of members) {
21390
+ const memberMatch = member.match(/^(\w+)\??\s*:\s*(.+)$/);
21391
+ if (memberMatch) {
21392
+ map.set(memberMatch[1], memberMatch[2].replace(/,\s*$/, "").trim());
21393
+ }
21394
+ }
21395
+ return map;
21396
+ }
21397
+ function buildComponentMarkdown(componentName, meta) {
21398
+ if (!meta.description && meta.props.length === 0) return "";
21399
+ const parts = [`${componentName} component.`];
21400
+ if (meta.description) {
21401
+ parts.push(meta.description);
21402
+ }
21403
+ if (meta.props.length > 0) {
21404
+ const propEntries = meta.props.map((p) => {
21405
+ let entry = p.name;
21406
+ if (p.type) entry += ` (${p.type})`;
21407
+ if (p.default) entry += ` default: ${p.default}`;
21408
+ return entry;
21409
+ });
21410
+ parts.push(`Props: ${propEntries.join(", ")}.`);
21411
+ }
21412
+ return parts.join(" ");
21413
+ }
19775
21414
  function normalizeSvelteToMarkdown(source) {
19776
21415
  return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
19777
21416
  }
@@ -19790,13 +21429,27 @@ async function loadContentFilesPages(cwd, config, maxPages) {
19790
21429
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
19791
21430
  const pages = [];
19792
21431
  for (const filePath of selected) {
19793
- const raw = await fs3.readFile(filePath, "utf8");
19794
- const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
21432
+ const raw = await fs9.readFile(filePath, "utf8");
21433
+ let markdown;
21434
+ let tags;
21435
+ if (filePath.endsWith(".md")) {
21436
+ markdown = raw;
21437
+ } else if (isSvelteComponentFile(filePath)) {
21438
+ const componentName = path.basename(filePath, ".svelte");
21439
+ const meta = extractSvelteComponentMeta(raw);
21440
+ const componentMarkdown = buildComponentMarkdown(componentName, meta);
21441
+ const templateContent = normalizeSvelteToMarkdown(raw);
21442
+ markdown = componentMarkdown ? [componentMarkdown, templateContent].filter(Boolean).join("\n\n") : templateContent;
21443
+ tags = ["component"];
21444
+ } else {
21445
+ markdown = normalizeSvelteToMarkdown(raw);
21446
+ }
19795
21447
  pages.push({
19796
21448
  url: filePathToUrl(filePath, baseDir),
19797
21449
  markdown,
19798
21450
  sourcePath: path.relative(cwd, filePath).replace(/\\/g, "/"),
19799
- outgoingLinks: []
21451
+ outgoingLinks: [],
21452
+ ...tags ? { tags } : {}
19800
21453
  });
19801
21454
  }
19802
21455
  return pages;
@@ -19926,7 +21579,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
19926
21579
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
19927
21580
  const pages = [];
19928
21581
  for (const filePath of selected) {
19929
- const html = await fs3.readFile(filePath, "utf8");
21582
+ const html = await fs9.readFile(filePath, "utf8");
19930
21583
  pages.push({
19931
21584
  url: staticHtmlFileToUrl(filePath, outputDir),
19932
21585
  html,
@@ -19989,7 +21642,7 @@ function isBlockedByRobots(urlPath, rules3) {
19989
21642
  }
19990
21643
  async function loadRobotsTxtFromDir(dir) {
19991
21644
  try {
19992
- const content = await fs3.readFile(path.join(dir, "robots.txt"), "utf8");
21645
+ const content = await fs9.readFile(path.join(dir, "robots.txt"), "utf8");
19993
21646
  return parseRobotsTxt(content);
19994
21647
  } catch {
19995
21648
  return null;
@@ -20006,6 +21659,81 @@ async function fetchRobotsTxt(baseUrl) {
20006
21659
  return null;
20007
21660
  }
20008
21661
  }
21662
+ function resolvePageUrl(pageUrl, baseUrl) {
21663
+ if (!baseUrl) return pageUrl;
21664
+ try {
21665
+ return new URL(pageUrl, baseUrl).href;
21666
+ } catch {
21667
+ return pageUrl;
21668
+ }
21669
+ }
21670
+ function generateLlmsTxt(pages, config) {
21671
+ const title = config.llmsTxt.title ?? config.project.id;
21672
+ const description = config.llmsTxt.description;
21673
+ const baseUrl = config.project.baseUrl;
21674
+ const lines = [`# ${title}`];
21675
+ if (description) {
21676
+ lines.push("", `> ${description}`);
21677
+ }
21678
+ const filtered = pages.filter(
21679
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
21680
+ );
21681
+ const sorted = [...filtered].sort((a, b) => {
21682
+ if (a.depth !== b.depth) return a.depth - b.depth;
21683
+ return b.incomingLinks - a.incomingLinks;
21684
+ });
21685
+ if (sorted.length > 0) {
21686
+ lines.push("", "## Pages", "");
21687
+ for (const page of sorted) {
21688
+ const url = resolvePageUrl(page.url, baseUrl);
21689
+ if (page.description) {
21690
+ lines.push(`- [${page.title}](${url}): ${page.description}`);
21691
+ } else {
21692
+ lines.push(`- [${page.title}](${url})`);
21693
+ }
21694
+ }
21695
+ }
21696
+ lines.push("");
21697
+ return lines.join("\n");
21698
+ }
21699
+ function generateLlmsFullTxt(pages, config) {
21700
+ const title = config.llmsTxt.title ?? config.project.id;
21701
+ const description = config.llmsTxt.description;
21702
+ const baseUrl = config.project.baseUrl;
21703
+ const lines = [`# ${title}`];
21704
+ if (description) {
21705
+ lines.push("", `> ${description}`);
21706
+ }
21707
+ const filtered = pages.filter(
21708
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
21709
+ );
21710
+ const sorted = [...filtered].sort((a, b) => {
21711
+ if (a.depth !== b.depth) return a.depth - b.depth;
21712
+ return b.incomingLinks - a.incomingLinks;
21713
+ });
21714
+ for (const page of sorted) {
21715
+ const url = resolvePageUrl(page.url, baseUrl);
21716
+ lines.push("", "---", "", `## [${page.title}](${url})`, "");
21717
+ lines.push(page.markdown.trim());
21718
+ }
21719
+ lines.push("");
21720
+ return lines.join("\n");
21721
+ }
21722
+ async function writeLlmsTxt(pages, config, cwd, logger3) {
21723
+ const outputPath = path.resolve(cwd, config.llmsTxt.outputPath);
21724
+ const outputDir = path.dirname(outputPath);
21725
+ await fs9.mkdir(outputDir, { recursive: true });
21726
+ const content = generateLlmsTxt(pages, config);
21727
+ await fs9.writeFile(outputPath, content, "utf8");
21728
+ logger3.info(`Generated llms.txt at ${config.llmsTxt.outputPath}`);
21729
+ if (config.llmsTxt.generateFull) {
21730
+ const fullPath = outputPath.replace(/\.txt$/, "-full.txt");
21731
+ const fullContent = generateLlmsFullTxt(pages, config);
21732
+ await fs9.writeFile(fullPath, fullContent, "utf8");
21733
+ const relativeFull = path.relative(cwd, fullPath);
21734
+ logger3.info(`Generated llms-full.txt at ${relativeFull}`);
21735
+ }
21736
+ }
20009
21737
 
20010
21738
  // src/indexing/pipeline.ts
20011
21739
  function buildPageSummary(page, maxChars = 3500) {
@@ -20024,16 +21752,33 @@ function buildPageSummary(page, maxChars = 3500) {
20024
21752
  if (joined.length <= maxChars) return joined;
20025
21753
  return joined.slice(0, maxChars).trim();
20026
21754
  }
21755
+ function buildPageContentHash(page) {
21756
+ const parts = [
21757
+ page.title,
21758
+ page.description ?? "",
21759
+ (page.keywords ?? []).slice().sort().join(","),
21760
+ page.tags.slice().sort().join(","),
21761
+ page.markdown,
21762
+ String(page.outgoingLinks),
21763
+ String(page.publishedAt ?? ""),
21764
+ page.incomingAnchorText ?? "",
21765
+ (page.outgoingLinkUrls ?? []).slice().sort().join(","),
21766
+ page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : ""
21767
+ ];
21768
+ return sha256(parts.join("|"));
21769
+ }
20027
21770
  var IndexPipeline = class _IndexPipeline {
20028
21771
  cwd;
20029
21772
  config;
20030
21773
  store;
20031
21774
  logger;
21775
+ hooks;
20032
21776
  constructor(options) {
20033
21777
  this.cwd = options.cwd;
20034
21778
  this.config = options.config;
20035
21779
  this.store = options.store;
20036
21780
  this.logger = options.logger;
21781
+ this.hooks = options.hooks;
20037
21782
  }
20038
21783
  static async create(options = {}) {
20039
21784
  const cwd = path.resolve(options.cwd ?? process.cwd());
@@ -20043,7 +21788,8 @@ var IndexPipeline = class _IndexPipeline {
20043
21788
  cwd,
20044
21789
  config,
20045
21790
  store,
20046
- logger: options.logger ?? new Logger()
21791
+ logger: options.logger ?? new Logger(),
21792
+ hooks: options.hooks ?? {}
20047
21793
  });
20048
21794
  }
20049
21795
  getConfig() {
@@ -20064,7 +21810,7 @@ var IndexPipeline = class _IndexPipeline {
20064
21810
  const scope = resolveScope(this.config, options.scopeOverride);
20065
21811
  ensureStateDirs(this.cwd, this.config.state.dir);
20066
21812
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
20067
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
21813
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-vector)`);
20068
21814
  if (options.force) {
20069
21815
  this.logger.info("Force mode enabled \u2014 full rebuild");
20070
21816
  }
@@ -20072,9 +21818,9 @@ var IndexPipeline = class _IndexPipeline {
20072
21818
  this.logger.info("Dry run \u2014 no writes will be performed");
20073
21819
  }
20074
21820
  const manifestStart = stageStart();
20075
- const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
21821
+ const existingPageHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getPageHashes(scope);
20076
21822
  stageEnd("manifest", manifestStart);
20077
- this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
21823
+ this.logger.debug(`Manifest: ${existingPageHashes.size} existing page hashes loaded`);
20078
21824
  const sourceStart = stageStart();
20079
21825
  this.logger.info(`Loading pages (source: ${sourceMode})...`);
20080
21826
  let sourcePages;
@@ -20151,11 +21897,61 @@ var IndexPipeline = class _IndexPipeline {
20151
21897
  );
20152
21898
  continue;
20153
21899
  }
20154
- extractedPages.push(extracted);
21900
+ if (sourcePage.tags && sourcePage.tags.length > 0) {
21901
+ extracted.tags = [.../* @__PURE__ */ new Set([...extracted.tags, ...sourcePage.tags])];
21902
+ }
21903
+ let accepted;
21904
+ if (this.hooks.transformPage) {
21905
+ const transformed = await this.hooks.transformPage(extracted);
21906
+ if (transformed === null) {
21907
+ this.logger.debug(`Page ${sourcePage.url} skipped by transformPage hook`);
21908
+ continue;
21909
+ }
21910
+ accepted = transformed;
21911
+ } else {
21912
+ accepted = extracted;
21913
+ }
21914
+ extractedPages.push(accepted);
20155
21915
  this.logger.event("page_extracted", {
20156
- url: extracted.url
21916
+ url: accepted.url
20157
21917
  });
20158
21918
  }
21919
+ const customRecords = options.customRecords ?? [];
21920
+ if (customRecords.length > 0) {
21921
+ this.logger.info(`Processing ${customRecords.length} custom record${customRecords.length === 1 ? "" : "s"}...`);
21922
+ for (const record of customRecords) {
21923
+ const normalizedUrl = normalizeUrlPath(record.url);
21924
+ const normalized = normalizeMarkdown(record.content);
21925
+ if (!normalized.trim()) {
21926
+ this.logger.warn(`Custom record ${normalizedUrl} has empty content and was skipped.`);
21927
+ continue;
21928
+ }
21929
+ const urlTags = normalizedUrl.split("/").filter(Boolean).slice(0, 1);
21930
+ const tags = record.tags ? [.../* @__PURE__ */ new Set([...urlTags, ...record.tags])] : urlTags;
21931
+ const extracted = {
21932
+ url: normalizedUrl,
21933
+ title: record.title,
21934
+ markdown: normalized,
21935
+ outgoingLinks: [],
21936
+ noindex: false,
21937
+ tags,
21938
+ weight: record.weight
21939
+ };
21940
+ let accepted;
21941
+ if (this.hooks.transformPage) {
21942
+ const transformed = await this.hooks.transformPage(extracted);
21943
+ if (transformed === null) {
21944
+ this.logger.debug(`Custom record ${normalizedUrl} skipped by transformPage hook`);
21945
+ continue;
21946
+ }
21947
+ accepted = transformed;
21948
+ } else {
21949
+ accepted = extracted;
21950
+ }
21951
+ extractedPages.push(accepted);
21952
+ this.logger.event("page_extracted", { url: accepted.url, custom: true });
21953
+ }
21954
+ }
20159
21955
  extractedPages.sort((a, b) => a.url.localeCompare(b.url));
20160
21956
  const uniquePages = [];
20161
21957
  const seenUrls = /* @__PURE__ */ new Set();
@@ -20188,15 +21984,28 @@ var IndexPipeline = class _IndexPipeline {
20188
21984
  const linkStart = stageStart();
20189
21985
  const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
20190
21986
  const incomingLinkCount = /* @__PURE__ */ new Map();
21987
+ const incomingAnchorTexts = /* @__PURE__ */ new Map();
20191
21988
  for (const page of indexablePages) {
20192
21989
  incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
20193
21990
  }
20194
21991
  for (const page of indexablePages) {
20195
- for (const outgoing of page.outgoingLinks) {
21992
+ const seenForCount = /* @__PURE__ */ new Set();
21993
+ const seenForAnchor = /* @__PURE__ */ new Set();
21994
+ for (const { url: outgoing, anchorText } of page.outgoingLinks) {
20196
21995
  if (!pageSet.has(outgoing)) {
20197
21996
  continue;
20198
21997
  }
20199
- incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
21998
+ if (!seenForCount.has(outgoing)) {
21999
+ seenForCount.add(outgoing);
22000
+ incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
22001
+ }
22002
+ if (anchorText && !seenForAnchor.has(outgoing)) {
22003
+ seenForAnchor.add(outgoing);
22004
+ if (!incomingAnchorTexts.has(outgoing)) {
22005
+ incomingAnchorTexts.set(outgoing, /* @__PURE__ */ new Set());
22006
+ }
22007
+ incomingAnchorTexts.get(outgoing).add(anchorText);
22008
+ }
20200
22009
  }
20201
22010
  }
20202
22011
  stageEnd("links", linkStart);
@@ -20215,6 +22024,15 @@ var IndexPipeline = class _IndexPipeline {
20215
22024
  });
20216
22025
  }
20217
22026
  }
22027
+ for (const record of customRecords) {
22028
+ const normalizedUrl = normalizeUrlPath(record.url);
22029
+ if (!precomputedRoutes.has(normalizedUrl)) {
22030
+ precomputedRoutes.set(normalizedUrl, {
22031
+ routeFile: "",
22032
+ routeResolution: "exact"
22033
+ });
22034
+ }
22035
+ }
20218
22036
  for (const page of indexablePages) {
20219
22037
  const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
20220
22038
  if (routeMatch.routeResolution === "best-effort") {
@@ -20232,6 +22050,17 @@ var IndexPipeline = class _IndexPipeline {
20232
22050
  } else {
20233
22051
  routeExact += 1;
20234
22052
  }
22053
+ const anchorSet = incomingAnchorTexts.get(page.url);
22054
+ let incomingAnchorText;
22055
+ if (anchorSet && anchorSet.size > 0) {
22056
+ let joined = "";
22057
+ for (const phrase of anchorSet) {
22058
+ const next2 = joined ? `${joined} ${phrase}` : phrase;
22059
+ if (next2.length > 500) break;
22060
+ joined = next2;
22061
+ }
22062
+ incomingAnchorText = joined || void 0;
22063
+ }
20235
22064
  const indexedPage = {
20236
22065
  url: page.url,
20237
22066
  title: page.title,
@@ -20241,40 +22070,113 @@ var IndexPipeline = class _IndexPipeline {
20241
22070
  generatedAt: nowIso(),
20242
22071
  incomingLinks: incomingLinkCount.get(page.url) ?? 0,
20243
22072
  outgoingLinks: page.outgoingLinks.length,
22073
+ outgoingLinkUrls: page.outgoingLinks.map((l) => typeof l === "string" ? l : l.url),
20244
22074
  depth: getUrlDepth(page.url),
20245
22075
  tags: page.tags,
20246
22076
  markdown: page.markdown,
20247
22077
  description: page.description,
20248
- keywords: page.keywords
22078
+ keywords: page.keywords,
22079
+ publishedAt: page.publishedAt,
22080
+ incomingAnchorText,
22081
+ meta: page.meta
20249
22082
  };
20250
22083
  pages.push(indexedPage);
20251
22084
  this.logger.event("page_indexed", { url: page.url });
20252
22085
  }
22086
+ const pageRecords = pages.map((p) => {
22087
+ const summary = buildPageSummary(p);
22088
+ return {
22089
+ url: p.url,
22090
+ title: p.title,
22091
+ markdown: p.markdown,
22092
+ projectId: scope.projectId,
22093
+ scopeName: scope.scopeName,
22094
+ routeFile: p.routeFile,
22095
+ routeResolution: p.routeResolution,
22096
+ incomingLinks: p.incomingLinks,
22097
+ outgoingLinks: p.outgoingLinks,
22098
+ outgoingLinkUrls: p.outgoingLinkUrls,
22099
+ depth: p.depth,
22100
+ tags: p.tags,
22101
+ indexedAt: p.generatedAt,
22102
+ summary,
22103
+ description: p.description,
22104
+ keywords: p.keywords,
22105
+ contentHash: buildPageContentHash(p),
22106
+ publishedAt: p.publishedAt,
22107
+ meta: p.meta
22108
+ };
22109
+ });
22110
+ const currentPageUrls = new Set(pageRecords.map((r) => r.url));
22111
+ const changedPages = pageRecords.filter(
22112
+ (r) => !existingPageHashes.has(r.url) || existingPageHashes.get(r.url) !== r.contentHash
22113
+ );
22114
+ const deletedPageUrls = [...existingPageHashes.keys()].filter((url) => !currentPageUrls.has(url));
20253
22115
  if (!options.dryRun) {
20254
- const pageRecords = pages.map((p) => {
20255
- const summary = buildPageSummary(p);
20256
- return {
20257
- url: p.url,
20258
- title: p.title,
20259
- markdown: p.markdown,
20260
- projectId: scope.projectId,
20261
- scopeName: scope.scopeName,
20262
- routeFile: p.routeFile,
20263
- routeResolution: p.routeResolution,
20264
- incomingLinks: p.incomingLinks,
20265
- outgoingLinks: p.outgoingLinks,
20266
- depth: p.depth,
20267
- tags: p.tags,
20268
- indexedAt: p.generatedAt,
20269
- summary,
20270
- description: p.description,
20271
- keywords: p.keywords
20272
- };
20273
- });
20274
- await this.store.deletePages(scope);
20275
- await this.store.upsertPages(pageRecords, scope);
22116
+ if (options.force) {
22117
+ await this.store.deletePages(scope);
22118
+ this.logger.info(`Upserting ${pageRecords.length} page summaries...`);
22119
+ const pageDocs = pageRecords.map((r) => ({
22120
+ id: r.url,
22121
+ data: r.summary ?? r.title,
22122
+ metadata: {
22123
+ title: r.title,
22124
+ url: r.url,
22125
+ description: r.description ?? "",
22126
+ keywords: r.keywords ?? [],
22127
+ summary: r.summary ?? "",
22128
+ tags: r.tags,
22129
+ markdown: r.markdown,
22130
+ routeFile: r.routeFile,
22131
+ routeResolution: r.routeResolution,
22132
+ incomingLinks: r.incomingLinks,
22133
+ outgoingLinks: r.outgoingLinks,
22134
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
22135
+ depth: r.depth,
22136
+ indexedAt: r.indexedAt,
22137
+ contentHash: r.contentHash ?? "",
22138
+ publishedAt: r.publishedAt ?? null,
22139
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
22140
+ }
22141
+ }));
22142
+ await this.store.upsertPages(pageDocs, scope);
22143
+ } else {
22144
+ if (changedPages.length > 0) {
22145
+ this.logger.info(`Upserting ${changedPages.length} changed page summaries...`);
22146
+ const pageDocs = changedPages.map((r) => ({
22147
+ id: r.url,
22148
+ data: r.summary ?? r.title,
22149
+ metadata: {
22150
+ title: r.title,
22151
+ url: r.url,
22152
+ description: r.description ?? "",
22153
+ keywords: r.keywords ?? [],
22154
+ summary: r.summary ?? "",
22155
+ tags: r.tags,
22156
+ markdown: r.markdown,
22157
+ routeFile: r.routeFile,
22158
+ routeResolution: r.routeResolution,
22159
+ incomingLinks: r.incomingLinks,
22160
+ outgoingLinks: r.outgoingLinks,
22161
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
22162
+ depth: r.depth,
22163
+ indexedAt: r.indexedAt,
22164
+ contentHash: r.contentHash ?? "",
22165
+ publishedAt: r.publishedAt ?? null,
22166
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
22167
+ }
22168
+ }));
22169
+ await this.store.upsertPages(pageDocs, scope);
22170
+ }
22171
+ if (deletedPageUrls.length > 0) {
22172
+ await this.store.deletePagesByIds(deletedPageUrls, scope);
22173
+ }
22174
+ }
20276
22175
  }
22176
+ const pagesChanged = options.force ? pageRecords.length : changedPages.length;
22177
+ const pagesDeleted = deletedPageUrls.length;
20277
22178
  stageEnd("pages", pagesStart);
22179
+ this.logger.info(`Page changes: ${pagesChanged} changed/new, ${pagesDeleted} deleted, ${pageRecords.length - changedPages.length} unchanged`);
20278
22180
  this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
20279
22181
  const chunkStart = stageStart();
20280
22182
  this.logger.info("Chunking pages...");
@@ -20283,6 +22185,18 @@ var IndexPipeline = class _IndexPipeline {
20283
22185
  if (typeof maxChunks === "number") {
20284
22186
  chunks = chunks.slice(0, maxChunks);
20285
22187
  }
22188
+ if (this.hooks.transformChunk) {
22189
+ const transformed = [];
22190
+ for (const chunk of chunks) {
22191
+ const result = await this.hooks.transformChunk(chunk);
22192
+ if (result === null) {
22193
+ this.logger.debug(`Chunk ${chunk.chunkKey} skipped by transformChunk hook`);
22194
+ continue;
22195
+ }
22196
+ transformed.push(result);
22197
+ }
22198
+ chunks = transformed;
22199
+ }
20286
22200
  for (const chunk of chunks) {
20287
22201
  this.logger.event("chunked", {
20288
22202
  url: chunk.url,
@@ -20295,7 +22209,12 @@ var IndexPipeline = class _IndexPipeline {
20295
22209
  for (const chunk of chunks) {
20296
22210
  currentChunkMap.set(chunk.chunkKey, chunk);
20297
22211
  }
20298
- const changedChunks = chunks.filter((chunk) => {
22212
+ const chunkHashStart = stageStart();
22213
+ const currentChunkKeys = chunks.map((c) => c.chunkKey);
22214
+ const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.fetchContentHashesForKeys(currentChunkKeys, scope);
22215
+ stageEnd("chunk_hashes", chunkHashStart);
22216
+ this.logger.debug(`Fetched ${existingHashes.size} existing chunk hashes for ${currentChunkKeys.length} current keys`);
22217
+ let changedChunks = chunks.filter((chunk) => {
20299
22218
  if (options.force) {
20300
22219
  return true;
20301
22220
  }
@@ -20308,37 +22227,45 @@ var IndexPipeline = class _IndexPipeline {
20308
22227
  }
20309
22228
  return existingHash !== chunk.contentHash;
20310
22229
  });
20311
- const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
22230
+ const existingChunkIds = options.force ? /* @__PURE__ */ new Set() : await this.store.scanChunkIds(scope);
22231
+ const deletes = [...existingChunkIds].filter((chunkKey) => !currentChunkMap.has(chunkKey));
22232
+ if (this.hooks.beforeIndex) {
22233
+ changedChunks = await this.hooks.beforeIndex(changedChunks);
22234
+ }
20312
22235
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
20313
22236
  const upsertStart = stageStart();
20314
22237
  let documentsUpserted = 0;
20315
22238
  if (!options.dryRun && changedChunks.length > 0) {
20316
- this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
20317
- const UPSTASH_CONTENT_LIMIT = 4096;
22239
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
20318
22240
  const docs = changedChunks.map((chunk) => {
20319
- const title = chunk.title;
20320
- const sectionTitle = chunk.sectionTitle ?? "";
20321
- const url = chunk.url;
20322
- const tags = chunk.tags.join(",");
20323
- const headingPath = chunk.headingPath.join(" > ");
20324
- const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
20325
- const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
20326
- const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
22241
+ const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
22242
+ if (embeddingText.length > 2e3) {
22243
+ this.logger.warn(
22244
+ `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
22245
+ );
22246
+ }
20327
22247
  return {
20328
22248
  id: chunk.chunkKey,
20329
- content: { title, sectionTitle, text, url, tags, headingPath },
22249
+ data: embeddingText,
20330
22250
  metadata: {
20331
- projectId: scope.projectId,
20332
- scopeName: scope.scopeName,
22251
+ url: chunk.url,
20333
22252
  path: chunk.path,
22253
+ title: chunk.title,
22254
+ sectionTitle: chunk.sectionTitle ?? "",
22255
+ headingPath: chunk.headingPath.join(" > "),
20334
22256
  snippet: chunk.snippet,
22257
+ chunkText: embeddingText,
22258
+ tags: chunk.tags,
20335
22259
  ordinal: chunk.ordinal,
20336
22260
  contentHash: chunk.contentHash,
20337
22261
  depth: chunk.depth,
20338
22262
  incomingLinks: chunk.incomingLinks,
20339
22263
  routeFile: chunk.routeFile,
20340
22264
  description: chunk.description ?? "",
20341
- keywords: (chunk.keywords ?? []).join(",")
22265
+ keywords: chunk.keywords ?? [],
22266
+ publishedAt: chunk.publishedAt ?? null,
22267
+ incomingAnchorText: chunk.incomingAnchorText ?? "",
22268
+ ...chunk.meta && Object.keys(chunk.meta).length > 0 ? { meta: chunk.meta } : {}
20342
22269
  }
20343
22270
  };
20344
22271
  });
@@ -20356,9 +22283,16 @@ var IndexPipeline = class _IndexPipeline {
20356
22283
  } else {
20357
22284
  this.logger.info("No chunks to upsert \u2014 all up to date");
20358
22285
  }
22286
+ if (this.config.llmsTxt.enable && !options.dryRun) {
22287
+ const llmsStart = stageStart();
22288
+ await writeLlmsTxt(pages, this.config, this.cwd, this.logger);
22289
+ stageEnd("llms_txt", llmsStart);
22290
+ }
20359
22291
  this.logger.info("Done.");
20360
- return {
22292
+ const stats = {
20361
22293
  pagesProcessed: pages.length,
22294
+ pagesChanged,
22295
+ pagesDeleted,
20362
22296
  chunksTotal: chunks.length,
20363
22297
  chunksChanged: changedChunks.length,
20364
22298
  documentsUpserted,
@@ -20367,6 +22301,10 @@ var IndexPipeline = class _IndexPipeline {
20367
22301
  routeBestEffort,
20368
22302
  stageTimingsMs
20369
22303
  };
22304
+ if (this.hooks.afterIndex) {
22305
+ await this.hooks.afterIndex(stats);
22306
+ }
22307
+ return stats;
20370
22308
  }
20371
22309
  };
20372
22310
 
@@ -20388,9 +22326,6 @@ function shouldRunAutoIndex(options) {
20388
22326
  if (explicit && /^(1|true|yes)$/i.test(explicit)) {
20389
22327
  return true;
20390
22328
  }
20391
- if (process.env.CI && /^(1|true)$/i.test(process.env.CI)) {
20392
- return true;
20393
- }
20394
22329
  return false;
20395
22330
  }
20396
22331
  function searchsocketVitePlugin(options = {}) {
@@ -20415,7 +22350,8 @@ function searchsocketVitePlugin(options = {}) {
20415
22350
  const pipeline = await IndexPipeline.create({
20416
22351
  cwd,
20417
22352
  configPath: options.configPath,
20418
- logger: logger3
22353
+ logger: logger3,
22354
+ hooks: options.hooks
20419
22355
  });
20420
22356
  const stats = await pipeline.run({
20421
22357
  changedOnly: options.changedOnly ?? true,