searchsocket 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3,18 +3,20 @@ import path from 'path';
3
3
  import { createJiti } from 'jiti';
4
4
  import { z } from 'zod';
5
5
  import { execSync, spawn } from 'child_process';
6
- import { createHash } from 'crypto';
6
+ import { FusionAlgorithm, QueryMode } from '@upstash/vector';
7
+ import { timingSafeEqual, createHash } from 'crypto';
7
8
  import { load } from 'cheerio';
8
9
  import matter from 'gray-matter';
9
10
  import fg from 'fast-glob';
10
11
  import pLimit from 'p-limit';
11
- import fs3 from 'fs/promises';
12
+ import fs8 from 'fs/promises';
12
13
  import net from 'net';
13
14
  import { gunzipSync } from 'zlib';
14
15
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
15
16
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
16
17
  import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
17
18
  import { createMcpExpressApp } from '@modelcontextprotocol/sdk/server/express.js';
19
+ import { WebStandardStreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js';
18
20
 
19
21
  var __getOwnPropNames = Object.getOwnPropertyNames;
20
22
  var __commonJS = (cb, mod) => function __require() {
@@ -5013,32 +5015,32 @@ var require_URL = __commonJS({
5013
5015
  else
5014
5016
  return basepath.substring(0, lastslash + 1) + refpath;
5015
5017
  }
5016
- function remove_dot_segments(path13) {
5017
- if (!path13) return path13;
5018
+ function remove_dot_segments(path14) {
5019
+ if (!path14) return path14;
5018
5020
  var output = "";
5019
- while (path13.length > 0) {
5020
- if (path13 === "." || path13 === "..") {
5021
- path13 = "";
5021
+ while (path14.length > 0) {
5022
+ if (path14 === "." || path14 === "..") {
5023
+ path14 = "";
5022
5024
  break;
5023
5025
  }
5024
- var twochars = path13.substring(0, 2);
5025
- var threechars = path13.substring(0, 3);
5026
- var fourchars = path13.substring(0, 4);
5026
+ var twochars = path14.substring(0, 2);
5027
+ var threechars = path14.substring(0, 3);
5028
+ var fourchars = path14.substring(0, 4);
5027
5029
  if (threechars === "../") {
5028
- path13 = path13.substring(3);
5030
+ path14 = path14.substring(3);
5029
5031
  } else if (twochars === "./") {
5030
- path13 = path13.substring(2);
5032
+ path14 = path14.substring(2);
5031
5033
  } else if (threechars === "/./") {
5032
- path13 = "/" + path13.substring(3);
5033
- } else if (twochars === "/." && path13.length === 2) {
5034
- path13 = "/";
5035
- } else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
5036
- path13 = "/" + path13.substring(4);
5034
+ path14 = "/" + path14.substring(3);
5035
+ } else if (twochars === "/." && path14.length === 2) {
5036
+ path14 = "/";
5037
+ } else if (fourchars === "/../" || threechars === "/.." && path14.length === 3) {
5038
+ path14 = "/" + path14.substring(4);
5037
5039
  output = output.replace(/\/?[^\/]*$/, "");
5038
5040
  } else {
5039
- var segment = path13.match(/(\/?([^\/]*))/)[0];
5041
+ var segment = path14.match(/(\/?([^\/]*))/)[0];
5040
5042
  output += segment;
5041
- path13 = path13.substring(segment.length);
5043
+ path14 = path14.substring(segment.length);
5042
5044
  }
5043
5045
  }
5044
5046
  return output;
@@ -16634,6 +16636,7 @@ var searchSocketConfigSchema = z.object({
16634
16636
  dropSelectors: z.array(z.string()).optional(),
16635
16637
  ignoreAttr: z.string().optional(),
16636
16638
  noindexAttr: z.string().optional(),
16639
+ imageDescAttr: z.string().optional(),
16637
16640
  respectRobotsNoindex: z.boolean().optional()
16638
16641
  }).optional(),
16639
16642
  transform: z.object({
@@ -16649,35 +16652,48 @@ var searchSocketConfigSchema = z.object({
16649
16652
  headingPathDepth: z.number().int().positive().optional(),
16650
16653
  dontSplitInside: z.array(z.enum(["code", "table", "blockquote"])).optional(),
16651
16654
  prependTitle: z.boolean().optional(),
16652
- pageSummaryChunk: z.boolean().optional()
16655
+ pageSummaryChunk: z.boolean().optional(),
16656
+ weightHeadings: z.boolean().optional()
16653
16657
  }).optional(),
16654
16658
  upstash: z.object({
16655
16659
  url: z.string().url().optional(),
16656
16660
  token: z.string().min(1).optional(),
16657
16661
  urlEnv: z.string().min(1).optional(),
16658
- tokenEnv: z.string().min(1).optional()
16662
+ tokenEnv: z.string().min(1).optional(),
16663
+ namespaces: z.object({
16664
+ pages: z.string().min(1).optional(),
16665
+ chunks: z.string().min(1).optional()
16666
+ }).optional()
16667
+ }).optional(),
16668
+ embedding: z.object({
16669
+ model: z.string().optional(),
16670
+ dimensions: z.number().int().positive().optional(),
16671
+ taskType: z.string().optional(),
16672
+ batchSize: z.number().int().positive().optional()
16659
16673
  }).optional(),
16660
16674
  search: z.object({
16661
- semanticWeight: z.number().min(0).max(1).optional(),
16662
- inputEnrichment: z.boolean().optional(),
16663
- reranking: z.boolean().optional(),
16664
16675
  dualSearch: z.boolean().optional(),
16665
16676
  pageSearchWeight: z.number().min(0).max(1).optional()
16666
16677
  }).optional(),
16667
16678
  ranking: z.object({
16668
16679
  enableIncomingLinkBoost: z.boolean().optional(),
16669
16680
  enableDepthBoost: z.boolean().optional(),
16681
+ enableFreshnessBoost: z.boolean().optional(),
16682
+ freshnessDecayRate: z.number().positive().optional(),
16683
+ enableAnchorTextBoost: z.boolean().optional(),
16670
16684
  pageWeights: z.record(z.string(), z.number().min(0)).optional(),
16671
16685
  aggregationCap: z.number().int().positive().optional(),
16672
16686
  aggregationDecay: z.number().min(0).max(1).optional(),
16673
16687
  minChunkScoreRatio: z.number().min(0).max(1).optional(),
16674
- minScore: z.number().min(0).max(1).optional(),
16688
+ minScoreRatio: z.number().min(0).max(1).optional(),
16675
16689
  scoreGapThreshold: z.number().min(0).max(1).optional(),
16676
16690
  weights: z.object({
16677
16691
  incomingLinks: z.number().optional(),
16678
16692
  depth: z.number().optional(),
16679
16693
  aggregation: z.number().optional(),
16680
- titleMatch: z.number().optional()
16694
+ titleMatch: z.number().optional(),
16695
+ freshness: z.number().optional(),
16696
+ anchorText: z.number().optional()
16681
16697
  }).optional()
16682
16698
  }).optional(),
16683
16699
  api: z.object({
@@ -16692,12 +16708,28 @@ var searchSocketConfigSchema = z.object({
16692
16708
  }).optional(),
16693
16709
  mcp: z.object({
16694
16710
  enable: z.boolean().optional(),
16711
+ access: z.enum(["public", "private"]).optional(),
16695
16712
  transport: z.enum(["stdio", "http"]).optional(),
16696
16713
  http: z.object({
16697
16714
  port: z.number().int().positive().optional(),
16698
- path: z.string().optional()
16715
+ path: z.string().optional(),
16716
+ apiKey: z.string().min(1).optional(),
16717
+ apiKeyEnv: z.string().min(1).optional()
16718
+ }).optional(),
16719
+ handle: z.object({
16720
+ path: z.string().optional(),
16721
+ apiKey: z.string().min(1).optional(),
16722
+ enableJsonResponse: z.boolean().optional()
16699
16723
  }).optional()
16700
16724
  }).optional(),
16725
+ llmsTxt: z.object({
16726
+ enable: z.boolean().optional(),
16727
+ outputPath: z.string().optional(),
16728
+ title: z.string().optional(),
16729
+ description: z.string().optional(),
16730
+ generateFull: z.boolean().optional(),
16731
+ serveMarkdownVariants: z.boolean().optional()
16732
+ }).optional(),
16701
16733
  state: z.object({
16702
16734
  dir: z.string().optional()
16703
16735
  }).optional()
@@ -16736,6 +16768,7 @@ function createDefaultConfig(projectId) {
16736
16768
  dropSelectors: DEFAULT_DROP_SELECTORS,
16737
16769
  ignoreAttr: "data-search-ignore",
16738
16770
  noindexAttr: "data-search-noindex",
16771
+ imageDescAttr: "data-search-description",
16739
16772
  respectRobotsNoindex: true
16740
16773
  },
16741
16774
  transform: {
@@ -16745,39 +16778,52 @@ function createDefaultConfig(projectId) {
16745
16778
  },
16746
16779
  chunking: {
16747
16780
  strategy: "hybrid",
16748
- maxChars: 2200,
16781
+ maxChars: 1500,
16749
16782
  overlapChars: 200,
16750
16783
  minChars: 250,
16751
16784
  headingPathDepth: 3,
16752
16785
  dontSplitInside: ["code", "table", "blockquote"],
16753
16786
  prependTitle: true,
16754
- pageSummaryChunk: true
16787
+ pageSummaryChunk: true,
16788
+ weightHeadings: true
16755
16789
  },
16756
16790
  upstash: {
16757
- urlEnv: "UPSTASH_SEARCH_REST_URL",
16758
- tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
16791
+ urlEnv: "UPSTASH_VECTOR_REST_URL",
16792
+ tokenEnv: "UPSTASH_VECTOR_REST_TOKEN",
16793
+ namespaces: {
16794
+ pages: "pages",
16795
+ chunks: "chunks"
16796
+ }
16797
+ },
16798
+ embedding: {
16799
+ model: "bge-large-en-v1.5",
16800
+ dimensions: 1024,
16801
+ taskType: "RETRIEVAL_DOCUMENT",
16802
+ batchSize: 100
16759
16803
  },
16760
16804
  search: {
16761
- semanticWeight: 0.75,
16762
- inputEnrichment: true,
16763
- reranking: true,
16764
16805
  dualSearch: true,
16765
16806
  pageSearchWeight: 0.3
16766
16807
  },
16767
16808
  ranking: {
16768
16809
  enableIncomingLinkBoost: true,
16769
16810
  enableDepthBoost: true,
16811
+ enableFreshnessBoost: false,
16812
+ freshnessDecayRate: 1e-3,
16813
+ enableAnchorTextBoost: false,
16770
16814
  pageWeights: {},
16771
16815
  aggregationCap: 5,
16772
16816
  aggregationDecay: 0.5,
16773
16817
  minChunkScoreRatio: 0.5,
16774
- minScore: 0.3,
16818
+ minScoreRatio: 0.7,
16775
16819
  scoreGapThreshold: 0.4,
16776
16820
  weights: {
16777
16821
  incomingLinks: 0.05,
16778
16822
  depth: 0.03,
16779
16823
  aggregation: 0.1,
16780
- titleMatch: 0.15
16824
+ titleMatch: 0.15,
16825
+ freshness: 0.1,
16826
+ anchorText: 0.1
16781
16827
  }
16782
16828
  },
16783
16829
  api: {
@@ -16788,12 +16834,23 @@ function createDefaultConfig(projectId) {
16788
16834
  },
16789
16835
  mcp: {
16790
16836
  enable: process.env.NODE_ENV !== "production",
16837
+ access: "private",
16791
16838
  transport: "stdio",
16792
16839
  http: {
16793
16840
  port: 3338,
16794
16841
  path: "/mcp"
16842
+ },
16843
+ handle: {
16844
+ path: "/api/mcp",
16845
+ enableJsonResponse: true
16795
16846
  }
16796
16847
  },
16848
+ llmsTxt: {
16849
+ enable: false,
16850
+ outputPath: "static/llms.txt",
16851
+ generateFull: true,
16852
+ serveMarkdownVariants: false
16853
+ },
16797
16854
  state: {
16798
16855
  dir: ".searchsocket"
16799
16856
  }
@@ -16921,7 +16978,15 @@ ${issues}`
16921
16978
  },
16922
16979
  upstash: {
16923
16980
  ...defaults.upstash,
16924
- ...parsed.upstash
16981
+ ...parsed.upstash,
16982
+ namespaces: {
16983
+ ...defaults.upstash.namespaces,
16984
+ ...parsed.upstash?.namespaces
16985
+ }
16986
+ },
16987
+ embedding: {
16988
+ ...defaults.embedding,
16989
+ ...parsed.embedding
16925
16990
  },
16926
16991
  search: {
16927
16992
  ...defaults.search,
@@ -16958,8 +17023,16 @@ ${issues}`
16958
17023
  http: {
16959
17024
  ...defaults.mcp.http,
16960
17025
  ...parsed.mcp?.http
17026
+ },
17027
+ handle: {
17028
+ ...defaults.mcp.handle,
17029
+ ...parsed.mcp?.handle
16961
17030
  }
16962
17031
  },
17032
+ llmsTxt: {
17033
+ ...defaults.llmsTxt,
17034
+ ...parsed.llmsTxt
17035
+ },
16963
17036
  state: {
16964
17037
  ...defaults.state,
16965
17038
  ...parsed.state
@@ -16979,6 +17052,15 @@ ${issues}`
16979
17052
  maxDepth: 10
16980
17053
  };
16981
17054
  }
17055
+ if (merged.mcp.access === "public") {
17056
+ const resolvedKey = merged.mcp.http.apiKey ?? (merged.mcp.http.apiKeyEnv ? process.env[merged.mcp.http.apiKeyEnv] : void 0);
17057
+ if (!resolvedKey) {
17058
+ throw new SearchSocketError(
17059
+ "CONFIG_MISSING",
17060
+ '`mcp.access` is "public" but no API key is configured. Set `mcp.http.apiKey` or `mcp.http.apiKeyEnv`.'
17061
+ );
17062
+ }
17063
+ }
16982
17064
  if (merged.source.mode === "crawl" && !merged.source.crawl?.baseUrl) {
16983
17065
  throw new SearchSocketError("CONFIG_MISSING", "`source.crawl.baseUrl` is required when source.mode is crawl.");
16984
17066
  }
@@ -17042,13 +17124,84 @@ function normalizeMarkdown(input) {
17042
17124
  function sanitizeScopeName(scopeName) {
17043
17125
  return scopeName.toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
17044
17126
  }
17127
+ function markdownToPlain(markdown) {
17128
+ return markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17129
+ }
17045
17130
  function toSnippet(markdown, maxLen = 220) {
17046
- const plain = markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/[#>*_|\-]/g, " ").replace(/\s+/g, " ").trim();
17131
+ const plain = markdownToPlain(markdown);
17047
17132
  if (plain.length <= maxLen) {
17048
17133
  return plain;
17049
17134
  }
17050
17135
  return `${plain.slice(0, Math.max(0, maxLen - 1)).trim()}\u2026`;
17051
17136
  }
17137
+ function queryAwareExcerpt(markdown, query, maxLen = 220) {
17138
+ const plain = markdownToPlain(markdown);
17139
+ if (plain.length <= maxLen) return plain;
17140
+ const tokens = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
17141
+ if (tokens.length === 0) return toSnippet(markdown, maxLen);
17142
+ const positions = [];
17143
+ for (let ti = 0; ti < tokens.length; ti++) {
17144
+ const escaped = tokens[ti].replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
17145
+ const re = new RegExp(escaped, "gi");
17146
+ let m;
17147
+ while ((m = re.exec(plain)) !== null) {
17148
+ positions.push({ start: m.index, end: m.index + m[0].length, tokenIdx: ti });
17149
+ }
17150
+ }
17151
+ if (positions.length === 0) return toSnippet(markdown, maxLen);
17152
+ positions.sort((a, b) => a.start - b.start);
17153
+ let bestUniqueCount = 0;
17154
+ let bestTotalCount = 0;
17155
+ let bestLeft = 0;
17156
+ let bestRight = 0;
17157
+ let left = 0;
17158
+ const tokenCounts = /* @__PURE__ */ new Map();
17159
+ for (let right = 0; right < positions.length; right++) {
17160
+ tokenCounts.set(positions[right].tokenIdx, (tokenCounts.get(positions[right].tokenIdx) ?? 0) + 1);
17161
+ while (positions[right].end - positions[left].start > maxLen && left < right) {
17162
+ const leftToken = positions[left].tokenIdx;
17163
+ const cnt = tokenCounts.get(leftToken) - 1;
17164
+ if (cnt === 0) tokenCounts.delete(leftToken);
17165
+ else tokenCounts.set(leftToken, cnt);
17166
+ left++;
17167
+ }
17168
+ const uniqueCount = tokenCounts.size;
17169
+ const totalCount = right - left + 1;
17170
+ if (uniqueCount > bestUniqueCount || uniqueCount === bestUniqueCount && totalCount > bestTotalCount) {
17171
+ bestUniqueCount = uniqueCount;
17172
+ bestTotalCount = totalCount;
17173
+ bestLeft = left;
17174
+ bestRight = right;
17175
+ }
17176
+ }
17177
+ const mid = Math.floor((positions[bestLeft].start + positions[bestRight].end) / 2);
17178
+ let start = Math.max(0, mid - Math.floor(maxLen / 2));
17179
+ let end = Math.min(plain.length, start + maxLen);
17180
+ start = Math.max(0, end - maxLen);
17181
+ if (start > 0) {
17182
+ const spaceIdx = plain.lastIndexOf(" ", start);
17183
+ if (spaceIdx > start - 30) {
17184
+ start = spaceIdx + 1;
17185
+ }
17186
+ }
17187
+ if (end < plain.length) {
17188
+ const spaceIdx = plain.indexOf(" ", end);
17189
+ if (spaceIdx !== -1 && spaceIdx < end + 30) {
17190
+ end = spaceIdx;
17191
+ }
17192
+ }
17193
+ let excerpt = plain.slice(start, end);
17194
+ if (excerpt.length > Math.ceil(maxLen * 1.2)) {
17195
+ excerpt = excerpt.slice(0, maxLen);
17196
+ const lastSpace = excerpt.lastIndexOf(" ");
17197
+ if (lastSpace > maxLen * 0.5) {
17198
+ excerpt = excerpt.slice(0, lastSpace);
17199
+ }
17200
+ }
17201
+ const prefix = start > 0 ? "\u2026" : "";
17202
+ const suffix = end < plain.length ? "\u2026" : "";
17203
+ return `${prefix}${excerpt}${suffix}`;
17204
+ }
17052
17205
  function extractFirstParagraph(markdown) {
17053
17206
  const lines = markdown.split("\n");
17054
17207
  let inFence = false;
@@ -17109,162 +17262,288 @@ function ensureStateDirs(cwd, stateDir, scope) {
17109
17262
  fs.mkdirSync(statePath, { recursive: true });
17110
17263
  return { statePath };
17111
17264
  }
17112
-
17113
- // src/vector/upstash.ts
17114
- function chunkIndexName(scope) {
17115
- return `${scope.projectId}--${scope.scopeName}`;
17116
- }
17117
- function pageIndexName(scope) {
17118
- return `${scope.projectId}--${scope.scopeName}--pages`;
17119
- }
17120
17265
  var UpstashSearchStore = class {
17121
- client;
17266
+ index;
17267
+ pagesNs;
17268
+ chunksNs;
17122
17269
  constructor(opts) {
17123
- this.client = opts.client;
17124
- }
17125
- chunkIndex(scope) {
17126
- return this.client.index(chunkIndexName(scope));
17127
- }
17128
- pageIndex(scope) {
17129
- return this.client.index(pageIndexName(scope));
17270
+ this.index = opts.index;
17271
+ this.pagesNs = opts.index.namespace(opts.pagesNamespace);
17272
+ this.chunksNs = opts.index.namespace(opts.chunksNamespace);
17130
17273
  }
17131
17274
  async upsertChunks(chunks, scope) {
17132
17275
  if (chunks.length === 0) return;
17133
- const index = this.chunkIndex(scope);
17134
17276
  const BATCH_SIZE = 100;
17135
17277
  for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
17136
17278
  const batch = chunks.slice(i, i + BATCH_SIZE);
17137
- await index.upsert(batch);
17138
- }
17139
- }
17140
- async search(query, opts, scope) {
17141
- const index = this.chunkIndex(scope);
17142
- const results = await index.search({
17143
- query,
17144
- limit: opts.limit,
17145
- semanticWeight: opts.semanticWeight,
17146
- inputEnrichment: opts.inputEnrichment,
17147
- reranking: opts.reranking,
17148
- filter: opts.filter
17279
+ await this.chunksNs.upsert(
17280
+ batch.map((c) => ({
17281
+ id: c.id,
17282
+ data: c.data,
17283
+ metadata: {
17284
+ ...c.metadata,
17285
+ projectId: scope.projectId,
17286
+ scopeName: scope.scopeName,
17287
+ type: c.metadata.type || "chunk"
17288
+ }
17289
+ }))
17290
+ );
17291
+ }
17292
+ }
17293
+ async search(data, opts, scope) {
17294
+ const filterParts = [
17295
+ `projectId = '${scope.projectId}'`,
17296
+ `scopeName = '${scope.scopeName}'`
17297
+ ];
17298
+ if (opts.filter) {
17299
+ filterParts.push(opts.filter);
17300
+ }
17301
+ const results = await this.chunksNs.query({
17302
+ data,
17303
+ topK: opts.limit,
17304
+ includeMetadata: true,
17305
+ filter: filterParts.join(" AND "),
17306
+ queryMode: QueryMode.HYBRID,
17307
+ fusionAlgorithm: FusionAlgorithm.DBSF
17308
+ });
17309
+ return results.map((doc) => ({
17310
+ id: String(doc.id),
17311
+ score: doc.score,
17312
+ metadata: {
17313
+ projectId: doc.metadata?.projectId ?? "",
17314
+ scopeName: doc.metadata?.scopeName ?? "",
17315
+ url: doc.metadata?.url ?? "",
17316
+ path: doc.metadata?.path ?? "",
17317
+ title: doc.metadata?.title ?? "",
17318
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17319
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17320
+ snippet: doc.metadata?.snippet ?? "",
17321
+ chunkText: doc.metadata?.chunkText ?? "",
17322
+ ordinal: doc.metadata?.ordinal ?? 0,
17323
+ contentHash: doc.metadata?.contentHash ?? "",
17324
+ depth: doc.metadata?.depth ?? 0,
17325
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17326
+ routeFile: doc.metadata?.routeFile ?? "",
17327
+ tags: doc.metadata?.tags ?? [],
17328
+ description: doc.metadata?.description || void 0,
17329
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17330
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17331
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17332
+ }
17333
+ }));
17334
+ }
17335
+ async searchChunksByUrl(data, url, opts, scope) {
17336
+ const filterParts = [
17337
+ `projectId = '${scope.projectId}'`,
17338
+ `scopeName = '${scope.scopeName}'`,
17339
+ `url = '${url}'`
17340
+ ];
17341
+ if (opts.filter) {
17342
+ filterParts.push(opts.filter);
17343
+ }
17344
+ const results = await this.chunksNs.query({
17345
+ data,
17346
+ topK: opts.limit,
17347
+ includeMetadata: true,
17348
+ filter: filterParts.join(" AND "),
17349
+ queryMode: QueryMode.HYBRID,
17350
+ fusionAlgorithm: FusionAlgorithm.DBSF
17149
17351
  });
17150
17352
  return results.map((doc) => ({
17151
- id: doc.id,
17353
+ id: String(doc.id),
17152
17354
  score: doc.score,
17153
17355
  metadata: {
17154
17356
  projectId: doc.metadata?.projectId ?? "",
17155
17357
  scopeName: doc.metadata?.scopeName ?? "",
17156
- url: doc.content.url,
17358
+ url: doc.metadata?.url ?? "",
17157
17359
  path: doc.metadata?.path ?? "",
17158
- title: doc.content.title,
17159
- sectionTitle: doc.content.sectionTitle,
17160
- headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
17360
+ title: doc.metadata?.title ?? "",
17361
+ sectionTitle: doc.metadata?.sectionTitle ?? "",
17362
+ headingPath: doc.metadata?.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : [],
17161
17363
  snippet: doc.metadata?.snippet ?? "",
17162
- chunkText: doc.content.text,
17364
+ chunkText: doc.metadata?.chunkText ?? "",
17163
17365
  ordinal: doc.metadata?.ordinal ?? 0,
17164
17366
  contentHash: doc.metadata?.contentHash ?? "",
17165
17367
  depth: doc.metadata?.depth ?? 0,
17166
17368
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17167
17369
  routeFile: doc.metadata?.routeFile ?? "",
17168
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17370
+ tags: doc.metadata?.tags ?? [],
17169
17371
  description: doc.metadata?.description || void 0,
17170
- keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
17372
+ keywords: doc.metadata?.keywords?.length ? doc.metadata.keywords : void 0,
17373
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0,
17374
+ incomingAnchorText: doc.metadata?.incomingAnchorText || void 0
17171
17375
  }
17172
17376
  }));
17173
17377
  }
17174
- async searchPages(query, opts, scope) {
17175
- const index = this.pageIndex(scope);
17378
+ async searchPagesByText(data, opts, scope) {
17379
+ return this.queryPages({ data }, opts, scope);
17380
+ }
17381
+ async searchPagesByVector(vector, opts, scope) {
17382
+ return this.queryPages({ vector }, opts, scope);
17383
+ }
17384
+ async queryPages(input, opts, scope) {
17385
+ const filterParts = [
17386
+ `projectId = '${scope.projectId}'`,
17387
+ `scopeName = '${scope.scopeName}'`
17388
+ ];
17389
+ if (opts.filter) {
17390
+ filterParts.push(opts.filter);
17391
+ }
17176
17392
  let results;
17177
17393
  try {
17178
- results = await index.search({
17179
- query,
17180
- limit: opts.limit,
17181
- semanticWeight: opts.semanticWeight,
17182
- inputEnrichment: opts.inputEnrichment,
17183
- reranking: true,
17184
- filter: opts.filter
17394
+ results = await this.pagesNs.query({
17395
+ ...input,
17396
+ topK: opts.limit,
17397
+ includeMetadata: true,
17398
+ filter: filterParts.join(" AND "),
17399
+ queryMode: QueryMode.HYBRID,
17400
+ fusionAlgorithm: FusionAlgorithm.DBSF
17185
17401
  });
17186
17402
  } catch {
17187
17403
  return [];
17188
17404
  }
17189
17405
  return results.map((doc) => ({
17190
- id: doc.id,
17406
+ id: String(doc.id),
17191
17407
  score: doc.score,
17192
- title: doc.content.title,
17193
- url: doc.content.url,
17194
- description: doc.content.description ?? "",
17195
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17408
+ title: doc.metadata?.title ?? "",
17409
+ url: doc.metadata?.url ?? "",
17410
+ description: doc.metadata?.description ?? "",
17411
+ tags: doc.metadata?.tags ?? [],
17196
17412
  depth: doc.metadata?.depth ?? 0,
17197
17413
  incomingLinks: doc.metadata?.incomingLinks ?? 0,
17198
- routeFile: doc.metadata?.routeFile ?? ""
17414
+ routeFile: doc.metadata?.routeFile ?? "",
17415
+ publishedAt: typeof doc.metadata?.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17199
17416
  }));
17200
17417
  }
17201
- async deleteByIds(ids, scope) {
17418
+ async deleteByIds(ids, _scope) {
17202
17419
  if (ids.length === 0) return;
17203
- const index = this.chunkIndex(scope);
17204
- const BATCH_SIZE = 500;
17420
+ const BATCH_SIZE = 100;
17205
17421
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17206
17422
  const batch = ids.slice(i, i + BATCH_SIZE);
17207
- await index.delete(batch);
17423
+ await this.chunksNs.delete(batch);
17208
17424
  }
17209
17425
  }
17210
17426
  async deleteScope(scope) {
17211
- try {
17212
- const chunkIdx = this.chunkIndex(scope);
17213
- await chunkIdx.deleteIndex();
17214
- } catch {
17215
- }
17216
- try {
17217
- const pageIdx = this.pageIndex(scope);
17218
- await pageIdx.deleteIndex();
17219
- } catch {
17427
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17428
+ const ids = [];
17429
+ let cursor = "0";
17430
+ try {
17431
+ for (; ; ) {
17432
+ const result = await ns.range({
17433
+ cursor,
17434
+ limit: 100,
17435
+ includeMetadata: true
17436
+ });
17437
+ for (const doc of result.vectors) {
17438
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17439
+ ids.push(String(doc.id));
17440
+ }
17441
+ }
17442
+ if (!result.nextCursor || result.nextCursor === "0") break;
17443
+ cursor = result.nextCursor;
17444
+ }
17445
+ } catch {
17446
+ }
17447
+ if (ids.length > 0) {
17448
+ const BATCH_SIZE = 100;
17449
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17450
+ const batch = ids.slice(i, i + BATCH_SIZE);
17451
+ await ns.delete(batch);
17452
+ }
17453
+ }
17220
17454
  }
17221
17455
  }
17222
17456
  async listScopes(projectId) {
17223
- const allIndexes = await this.client.listIndexes();
17224
- const prefix = `${projectId}--`;
17225
- const scopeNames = /* @__PURE__ */ new Set();
17226
- for (const name of allIndexes) {
17227
- if (name.startsWith(prefix) && !name.endsWith("--pages")) {
17228
- const scopeName = name.slice(prefix.length);
17229
- scopeNames.add(scopeName);
17230
- }
17231
- }
17232
- const scopes = [];
17233
- for (const scopeName of scopeNames) {
17234
- const scope = {
17235
- projectId,
17236
- scopeName,
17237
- scopeId: `${projectId}:${scopeName}`
17238
- };
17457
+ const scopeMap = /* @__PURE__ */ new Map();
17458
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17459
+ let cursor = "0";
17239
17460
  try {
17240
- const info = await this.chunkIndex(scope).info();
17241
- scopes.push({
17242
- projectId,
17243
- scopeName,
17244
- lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17245
- documentCount: info.documentCount
17246
- });
17461
+ for (; ; ) {
17462
+ const result = await ns.range({
17463
+ cursor,
17464
+ limit: 100,
17465
+ includeMetadata: true
17466
+ });
17467
+ for (const doc of result.vectors) {
17468
+ if (doc.metadata?.projectId === projectId) {
17469
+ const scopeName = doc.metadata.scopeName ?? "";
17470
+ scopeMap.set(scopeName, (scopeMap.get(scopeName) ?? 0) + 1);
17471
+ }
17472
+ }
17473
+ if (!result.nextCursor || result.nextCursor === "0") break;
17474
+ cursor = result.nextCursor;
17475
+ }
17247
17476
  } catch {
17248
- scopes.push({
17249
- projectId,
17250
- scopeName,
17251
- lastIndexedAt: "unknown",
17252
- documentCount: 0
17253
- });
17254
17477
  }
17255
17478
  }
17256
- return scopes;
17479
+ return [...scopeMap.entries()].map(([scopeName, count]) => ({
17480
+ projectId,
17481
+ scopeName,
17482
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17483
+ documentCount: count
17484
+ }));
17257
17485
  }
17258
17486
  async getContentHashes(scope) {
17259
17487
  const map = /* @__PURE__ */ new Map();
17260
- const index = this.chunkIndex(scope);
17261
17488
  let cursor = "0";
17262
17489
  try {
17263
17490
  for (; ; ) {
17264
- const result = await index.range({ cursor, limit: 100 });
17265
- for (const doc of result.documents) {
17266
- if (doc.metadata?.contentHash) {
17267
- map.set(doc.id, doc.metadata.contentHash);
17491
+ const result = await this.chunksNs.range({
17492
+ cursor,
17493
+ limit: 100,
17494
+ includeMetadata: true
17495
+ });
17496
+ for (const doc of result.vectors) {
17497
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17498
+ map.set(String(doc.id), doc.metadata.contentHash);
17499
+ }
17500
+ }
17501
+ if (!result.nextCursor || result.nextCursor === "0") break;
17502
+ cursor = result.nextCursor;
17503
+ }
17504
+ } catch {
17505
+ }
17506
+ return map;
17507
+ }
17508
+ async listPages(scope, opts) {
17509
+ const cursor = opts?.cursor ?? "0";
17510
+ const limit = opts?.limit ?? 50;
17511
+ try {
17512
+ const result = await this.pagesNs.range({
17513
+ cursor,
17514
+ limit,
17515
+ includeMetadata: true
17516
+ });
17517
+ const pages = result.vectors.filter(
17518
+ (doc) => doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && (!opts?.pathPrefix || (doc.metadata?.url ?? "").startsWith(opts.pathPrefix))
17519
+ ).map((doc) => ({
17520
+ url: doc.metadata?.url ?? "",
17521
+ title: doc.metadata?.title ?? "",
17522
+ description: doc.metadata?.description ?? "",
17523
+ routeFile: doc.metadata?.routeFile ?? ""
17524
+ }));
17525
+ const response = { pages };
17526
+ if (result.nextCursor && result.nextCursor !== "0") {
17527
+ response.nextCursor = result.nextCursor;
17528
+ }
17529
+ return response;
17530
+ } catch {
17531
+ return { pages: [] };
17532
+ }
17533
+ }
17534
+ async getPageHashes(scope) {
17535
+ const map = /* @__PURE__ */ new Map();
17536
+ let cursor = "0";
17537
+ try {
17538
+ for (; ; ) {
17539
+ const result = await this.pagesNs.range({
17540
+ cursor,
17541
+ limit: 100,
17542
+ includeMetadata: true
17543
+ });
17544
+ for (const doc of result.vectors) {
17545
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.contentHash) {
17546
+ map.set(String(doc.id), doc.metadata.contentHash);
17268
17547
  }
17269
17548
  }
17270
17549
  if (!result.nextCursor || result.nextCursor === "0") break;
@@ -17274,47 +17553,43 @@ var UpstashSearchStore = class {
17274
17553
  }
17275
17554
  return map;
17276
17555
  }
17556
+ async deletePagesByIds(ids, _scope) {
17557
+ if (ids.length === 0) return;
17558
+ const BATCH_SIZE = 50;
17559
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17560
+ const batch = ids.slice(i, i + BATCH_SIZE);
17561
+ await this.pagesNs.delete(batch);
17562
+ }
17563
+ }
17277
17564
  async upsertPages(pages, scope) {
17278
17565
  if (pages.length === 0) return;
17279
- const index = this.pageIndex(scope);
17280
17566
  const BATCH_SIZE = 50;
17281
17567
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
17282
17568
  const batch = pages.slice(i, i + BATCH_SIZE);
17283
- const docs = batch.map((p) => ({
17284
- id: p.url,
17285
- content: {
17286
- title: p.title,
17287
- url: p.url,
17288
- type: "page",
17289
- description: p.description ?? "",
17290
- keywords: (p.keywords ?? []).join(","),
17291
- summary: p.summary ?? "",
17292
- tags: p.tags.join(",")
17293
- },
17294
- metadata: {
17295
- markdown: p.markdown,
17296
- projectId: p.projectId,
17297
- scopeName: p.scopeName,
17298
- routeFile: p.routeFile,
17299
- routeResolution: p.routeResolution,
17300
- incomingLinks: p.incomingLinks,
17301
- outgoingLinks: p.outgoingLinks,
17302
- depth: p.depth,
17303
- indexedAt: p.indexedAt
17304
- }
17305
- }));
17306
- await index.upsert(docs);
17569
+ await this.pagesNs.upsert(
17570
+ batch.map((p) => ({
17571
+ id: p.id,
17572
+ data: p.data,
17573
+ metadata: {
17574
+ ...p.metadata,
17575
+ projectId: scope.projectId,
17576
+ scopeName: scope.scopeName,
17577
+ type: "page"
17578
+ }
17579
+ }))
17580
+ );
17307
17581
  }
17308
17582
  }
17309
17583
  async getPage(url, scope) {
17310
- const index = this.pageIndex(scope);
17311
17584
  try {
17312
- const results = await index.fetch([url]);
17585
+ const results = await this.pagesNs.fetch([url], {
17586
+ includeMetadata: true
17587
+ });
17313
17588
  const doc = results[0];
17314
- if (!doc) return null;
17589
+ if (!doc || !doc.metadata) return null;
17315
17590
  return {
17316
- url: doc.content.url,
17317
- title: doc.content.title,
17591
+ url: doc.metadata.url,
17592
+ title: doc.metadata.title,
17318
17593
  markdown: doc.metadata.markdown,
17319
17594
  projectId: doc.metadata.projectId,
17320
17595
  scopeName: doc.metadata.scopeName,
@@ -17322,27 +17597,86 @@ var UpstashSearchStore = class {
17322
17597
  routeResolution: doc.metadata.routeResolution,
17323
17598
  incomingLinks: doc.metadata.incomingLinks,
17324
17599
  outgoingLinks: doc.metadata.outgoingLinks,
17600
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? void 0,
17325
17601
  depth: doc.metadata.depth,
17326
- tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17602
+ tags: doc.metadata.tags ?? [],
17327
17603
  indexedAt: doc.metadata.indexedAt,
17328
- summary: doc.content.summary || void 0,
17329
- description: doc.content.description || void 0,
17330
- keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
17604
+ summary: doc.metadata.summary || void 0,
17605
+ description: doc.metadata.description || void 0,
17606
+ keywords: doc.metadata.keywords?.length ? doc.metadata.keywords : void 0,
17607
+ publishedAt: typeof doc.metadata.publishedAt === "number" ? doc.metadata.publishedAt : void 0
17331
17608
  };
17332
17609
  } catch {
17333
17610
  return null;
17334
17611
  }
17335
17612
  }
17613
+ async fetchPageWithVector(url, scope) {
17614
+ try {
17615
+ const results = await this.pagesNs.fetch([url], {
17616
+ includeMetadata: true,
17617
+ includeVectors: true
17618
+ });
17619
+ const doc = results[0];
17620
+ if (!doc || !doc.metadata || !doc.vector) return null;
17621
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17622
+ return null;
17623
+ }
17624
+ return { metadata: doc.metadata, vector: doc.vector };
17625
+ } catch {
17626
+ return null;
17627
+ }
17628
+ }
17629
+ async fetchPagesBatch(urls, scope) {
17630
+ if (urls.length === 0) return [];
17631
+ try {
17632
+ const results = await this.pagesNs.fetch(urls, {
17633
+ includeMetadata: true
17634
+ });
17635
+ const out = [];
17636
+ for (const doc of results) {
17637
+ if (!doc || !doc.metadata) continue;
17638
+ if (doc.metadata.projectId !== scope.projectId || doc.metadata.scopeName !== scope.scopeName) {
17639
+ continue;
17640
+ }
17641
+ out.push({
17642
+ url: doc.metadata.url,
17643
+ title: doc.metadata.title,
17644
+ routeFile: doc.metadata.routeFile,
17645
+ outgoingLinkUrls: doc.metadata.outgoingLinkUrls ?? []
17646
+ });
17647
+ }
17648
+ return out;
17649
+ } catch {
17650
+ return [];
17651
+ }
17652
+ }
17336
17653
  async deletePages(scope) {
17654
+ const ids = [];
17655
+ let cursor = "0";
17337
17656
  try {
17338
- const index = this.pageIndex(scope);
17339
- await index.reset();
17657
+ for (; ; ) {
17658
+ const result = await this.pagesNs.range({
17659
+ cursor,
17660
+ limit: 100,
17661
+ includeMetadata: true
17662
+ });
17663
+ for (const doc of result.vectors) {
17664
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName) {
17665
+ ids.push(String(doc.id));
17666
+ }
17667
+ }
17668
+ if (!result.nextCursor || result.nextCursor === "0") break;
17669
+ cursor = result.nextCursor;
17670
+ }
17340
17671
  } catch {
17341
17672
  }
17673
+ if (ids.length > 0) {
17674
+ await this.deletePagesByIds(ids, scope);
17675
+ }
17342
17676
  }
17343
17677
  async health() {
17344
17678
  try {
17345
- await this.client.info();
17679
+ await this.index.info();
17346
17680
  return { ok: true };
17347
17681
  } catch (error) {
17348
17682
  return {
@@ -17352,14 +17686,31 @@ var UpstashSearchStore = class {
17352
17686
  }
17353
17687
  }
17354
17688
  async dropAllIndexes(projectId) {
17355
- const allIndexes = await this.client.listIndexes();
17356
- const prefix = `${projectId}--`;
17357
- for (const name of allIndexes) {
17358
- if (name.startsWith(prefix)) {
17359
- try {
17360
- const index = this.client.index(name);
17361
- await index.deleteIndex();
17362
- } catch {
17689
+ for (const ns of [this.chunksNs, this.pagesNs]) {
17690
+ const ids = [];
17691
+ let cursor = "0";
17692
+ try {
17693
+ for (; ; ) {
17694
+ const result = await ns.range({
17695
+ cursor,
17696
+ limit: 100,
17697
+ includeMetadata: true
17698
+ });
17699
+ for (const doc of result.vectors) {
17700
+ if (doc.metadata?.projectId === projectId) {
17701
+ ids.push(String(doc.id));
17702
+ }
17703
+ }
17704
+ if (!result.nextCursor || result.nextCursor === "0") break;
17705
+ cursor = result.nextCursor;
17706
+ }
17707
+ } catch {
17708
+ }
17709
+ if (ids.length > 0) {
17710
+ const BATCH_SIZE = 100;
17711
+ for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17712
+ const batch = ids.slice(i, i + BATCH_SIZE);
17713
+ await ns.delete(batch);
17363
17714
  }
17364
17715
  }
17365
17716
  }
@@ -17373,12 +17724,16 @@ async function createUpstashStore(config) {
17373
17724
  if (!url || !token) {
17374
17725
  throw new SearchSocketError(
17375
17726
  "VECTOR_BACKEND_UNAVAILABLE",
17376
- `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17727
+ `Missing Upstash Vector credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17377
17728
  );
17378
17729
  }
17379
- const { Search } = await import('@upstash/search');
17380
- const client = new Search({ url, token });
17381
- return new UpstashSearchStore({ client });
17730
+ const { Index } = await import('@upstash/vector');
17731
+ const index = new Index({ url, token });
17732
+ return new UpstashSearchStore({
17733
+ index,
17734
+ pagesNamespace: config.upstash.namespaces.pages,
17735
+ chunksNamespace: config.upstash.namespaces.chunks
17736
+ });
17382
17737
  }
17383
17738
  function sha1(input) {
17384
17739
  return createHash("sha1").update(input).digest("hex");
@@ -17446,6 +17801,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
17446
17801
  if (normalizeText(current.text)) {
17447
17802
  sections.push({
17448
17803
  sectionTitle: current.sectionTitle,
17804
+ headingLevel: current.headingLevel,
17449
17805
  headingPath: current.headingPath,
17450
17806
  text: current.text.trim()
17451
17807
  });
@@ -17464,6 +17820,7 @@ function parseHeadingSections(markdown, headingPathDepth) {
17464
17820
  headingStack.length = level;
17465
17821
  current = {
17466
17822
  sectionTitle: title,
17823
+ headingLevel: level,
17467
17824
  headingPath: headingStack.filter((entry) => Boolean(entry)).slice(0, headingPathDepth),
17468
17825
  text: `${line}
17469
17826
  `
@@ -17599,6 +17956,7 @@ function splitSection(section, config) {
17599
17956
  return [
17600
17957
  {
17601
17958
  sectionTitle: section.sectionTitle,
17959
+ headingLevel: section.headingLevel,
17602
17960
  headingPath: section.headingPath,
17603
17961
  chunkText: text
17604
17962
  }
@@ -17649,6 +18007,7 @@ ${chunk}`;
17649
18007
  }
17650
18008
  return merged.map((chunkText) => ({
17651
18009
  sectionTitle: section.sectionTitle,
18010
+ headingLevel: section.headingLevel,
17652
18011
  headingPath: section.headingPath,
17653
18012
  chunkText
17654
18013
  }));
@@ -17664,6 +18023,18 @@ function buildSummaryChunkText(page) {
17664
18023
  }
17665
18024
  return parts.join("\n\n");
17666
18025
  }
18026
+ function buildEmbeddingTitle(chunk) {
18027
+ if (!chunk.sectionTitle || chunk.headingLevel === void 0) return void 0;
18028
+ if (chunk.headingPath.length > 1) {
18029
+ const path14 = chunk.headingPath.join(" > ");
18030
+ const lastInPath = chunk.headingPath[chunk.headingPath.length - 1];
18031
+ if (lastInPath !== chunk.sectionTitle) {
18032
+ return `${chunk.title} \u2014 ${path14} > ${chunk.sectionTitle}`;
18033
+ }
18034
+ return `${chunk.title} \u2014 ${path14}`;
18035
+ }
18036
+ return `${chunk.title} \u2014 ${chunk.sectionTitle}`;
18037
+ }
17667
18038
  function buildEmbeddingText(chunk, prependTitle) {
17668
18039
  if (!prependTitle) return chunk.chunkText;
17669
18040
  const prefix = chunk.sectionTitle ? `${chunk.title} \u2014 ${chunk.sectionTitle}` : chunk.title;
@@ -17694,10 +18065,14 @@ function chunkPage(page, config, scope) {
17694
18065
  tags: page.tags,
17695
18066
  contentHash: "",
17696
18067
  description: page.description,
17697
- keywords: page.keywords
18068
+ keywords: page.keywords,
18069
+ publishedAt: page.publishedAt,
18070
+ incomingAnchorText: page.incomingAnchorText,
18071
+ meta: page.meta
17698
18072
  };
17699
18073
  const embeddingText = buildEmbeddingText(summaryChunk, config.chunking.prependTitle);
17700
- summaryChunk.contentHash = sha256(normalizeText(embeddingText));
18074
+ const metaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
18075
+ summaryChunk.contentHash = sha256(normalizeText(embeddingText) + metaSuffix);
17701
18076
  chunks.push(summaryChunk);
17702
18077
  }
17703
18078
  const ordinalOffset = config.chunking.pageSummaryChunk ? 1 : 0;
@@ -17714,6 +18089,7 @@ function chunkPage(page, config, scope) {
17714
18089
  path: page.url,
17715
18090
  title: page.title,
17716
18091
  sectionTitle: entry.sectionTitle,
18092
+ headingLevel: entry.headingLevel,
17717
18093
  headingPath: entry.headingPath,
17718
18094
  chunkText: entry.chunkText,
17719
18095
  snippet: toSnippet(entry.chunkText),
@@ -17723,10 +18099,16 @@ function chunkPage(page, config, scope) {
17723
18099
  tags: page.tags,
17724
18100
  contentHash: "",
17725
18101
  description: page.description,
17726
- keywords: page.keywords
18102
+ keywords: page.keywords,
18103
+ publishedAt: page.publishedAt,
18104
+ incomingAnchorText: page.incomingAnchorText,
18105
+ meta: page.meta
17727
18106
  };
17728
18107
  const embeddingText = buildEmbeddingText(chunk, config.chunking.prependTitle);
17729
- chunk.contentHash = sha256(normalizeText(embeddingText));
18108
+ const embeddingTitle = config.chunking.weightHeadings ? buildEmbeddingTitle(chunk) : void 0;
18109
+ const chunkMetaSuffix = page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : "";
18110
+ const hashInput = embeddingTitle ? `${normalizeText(embeddingText)}|title:${embeddingTitle}` : normalizeText(embeddingText);
18111
+ chunk.contentHash = sha256(hashInput + chunkMetaSuffix);
17730
18112
  chunks.push(chunk);
17731
18113
  }
17732
18114
  return chunks;
@@ -18558,7 +18940,112 @@ function gfm(turndownService) {
18558
18940
  ]);
18559
18941
  }
18560
18942
 
18943
+ // src/utils/structured-meta.ts
18944
+ var VALID_KEY_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
18945
+ function validateMetaKey(key) {
18946
+ return VALID_KEY_RE.test(key);
18947
+ }
18948
+ function parseMetaValue(content, dataType) {
18949
+ switch (dataType) {
18950
+ case "number": {
18951
+ const n = Number(content);
18952
+ return Number.isFinite(n) ? n : content;
18953
+ }
18954
+ case "boolean":
18955
+ return content === "true";
18956
+ case "string[]":
18957
+ return content ? content.split(",").map((s) => s.trim()) : [];
18958
+ case "date": {
18959
+ const ms = Number(content);
18960
+ return Number.isFinite(ms) ? ms : content;
18961
+ }
18962
+ default:
18963
+ return content;
18964
+ }
18965
+ }
18966
+ function escapeFilterValue(s) {
18967
+ return s.replace(/'/g, "''");
18968
+ }
18969
+ function buildMetaFilterString(filters) {
18970
+ const clauses = [];
18971
+ for (const [key, value] of Object.entries(filters)) {
18972
+ if (!validateMetaKey(key)) continue;
18973
+ const field = `meta.${key}`;
18974
+ if (typeof value === "string") {
18975
+ clauses.push(`${field} CONTAINS '${escapeFilterValue(value)}'`);
18976
+ } else if (typeof value === "boolean") {
18977
+ clauses.push(`${field} = ${value}`);
18978
+ } else {
18979
+ clauses.push(`${field} = ${value}`);
18980
+ }
18981
+ }
18982
+ return clauses.join(" AND ");
18983
+ }
18984
+
18561
18985
  // src/indexing/extractor.ts
18986
+ function normalizeDateToMs(value) {
18987
+ if (value == null) return void 0;
18988
+ if (value instanceof Date) {
18989
+ const ts = value.getTime();
18990
+ return Number.isFinite(ts) ? ts : void 0;
18991
+ }
18992
+ if (typeof value === "string") {
18993
+ const ts = new Date(value).getTime();
18994
+ return Number.isFinite(ts) ? ts : void 0;
18995
+ }
18996
+ if (typeof value === "number") {
18997
+ return Number.isFinite(value) ? value : void 0;
18998
+ }
18999
+ return void 0;
19000
+ }
19001
+ var FRONTMATTER_DATE_FIELDS = ["date", "publishedAt", "updatedAt", "published_at", "updated_at"];
19002
+ function extractPublishedAtFromFrontmatter(data) {
19003
+ for (const field of FRONTMATTER_DATE_FIELDS) {
19004
+ const val = normalizeDateToMs(data[field]);
19005
+ if (val !== void 0) return val;
19006
+ }
19007
+ return void 0;
19008
+ }
19009
+ function extractPublishedAtFromHtml($) {
19010
+ const jsonLdScripts = $('script[type="application/ld+json"]');
19011
+ for (let i = 0; i < jsonLdScripts.length; i++) {
19012
+ try {
19013
+ const raw = $(jsonLdScripts[i]).html();
19014
+ if (!raw) continue;
19015
+ const parsed = JSON.parse(raw);
19016
+ const candidates = [];
19017
+ if (Array.isArray(parsed)) {
19018
+ candidates.push(...parsed);
19019
+ } else if (parsed && typeof parsed === "object") {
19020
+ candidates.push(parsed);
19021
+ if (Array.isArray(parsed["@graph"])) {
19022
+ candidates.push(...parsed["@graph"]);
19023
+ }
19024
+ }
19025
+ for (const candidate of candidates) {
19026
+ const val = normalizeDateToMs(candidate.datePublished);
19027
+ if (val !== void 0) return val;
19028
+ }
19029
+ } catch {
19030
+ }
19031
+ }
19032
+ const ogTime = $('meta[property="article:published_time"]').attr("content")?.trim();
19033
+ if (ogTime) {
19034
+ const val = normalizeDateToMs(ogTime);
19035
+ if (val !== void 0) return val;
19036
+ }
19037
+ const itempropDate = $('meta[itemprop="datePublished"]').attr("content")?.trim() || $('time[itemprop="datePublished"]').attr("datetime")?.trim();
19038
+ if (itempropDate) {
19039
+ const val = normalizeDateToMs(itempropDate);
19040
+ if (val !== void 0) return val;
19041
+ }
19042
+ const timeEl = $("time[datetime]").first().attr("datetime")?.trim();
19043
+ if (timeEl) {
19044
+ const val = normalizeDateToMs(timeEl);
19045
+ if (val !== void 0) return val;
19046
+ }
19047
+ return void 0;
19048
+ }
18562
19049
  function hasTopLevelNoindexComment(markdown) {
18563
19050
  const lines = markdown.split(/\r?\n/);
18564
19051
  let inFence = false;
@@ -18574,6 +19061,97 @@ function hasTopLevelNoindexComment(markdown) {
18574
19061
  }
18575
19062
  return false;
18576
19063
  }
19064
+ var GARBAGE_ALT_WORDS = /* @__PURE__ */ new Set([
19065
+ "image",
19066
+ "photo",
19067
+ "picture",
19068
+ "icon",
19069
+ "logo",
19070
+ "banner",
19071
+ "screenshot",
19072
+ "thumbnail",
19073
+ "img",
19074
+ "graphic",
19075
+ "illustration",
19076
+ "spacer",
19077
+ "pixel",
19078
+ "placeholder",
19079
+ "avatar",
19080
+ "background"
19081
+ ]);
19082
+ var IMAGE_EXT_RE = /\.(jpg|jpeg|png|gif|svg|webp|avif|bmp|ico)(\?.*)?$/i;
19083
+ function isMeaningfulAlt(alt) {
19084
+ const trimmed = alt.trim();
19085
+ if (!trimmed || trimmed.length < 5) return false;
19086
+ if (IMAGE_EXT_RE.test(trimmed)) return false;
19087
+ if (GARBAGE_ALT_WORDS.has(trimmed.toLowerCase())) return false;
19088
+ return true;
19089
+ }
19090
+ function resolveImageText(img, $, imageDescAttr) {
19091
+ const imgDesc = img.attr(imageDescAttr)?.trim();
19092
+ if (imgDesc) return imgDesc;
19093
+ const figure = img.closest("figure");
19094
+ if (figure.length) {
19095
+ const figDesc = figure.attr(imageDescAttr)?.trim();
19096
+ if (figDesc) return figDesc;
19097
+ }
19098
+ const alt = img.attr("alt")?.trim() ?? "";
19099
+ const caption = figure.length ? figure.find("figcaption").first().text().trim() : "";
19100
+ if (isMeaningfulAlt(alt) && caption) {
19101
+ return `${alt} \u2014 ${caption}`;
19102
+ }
19103
+ if (isMeaningfulAlt(alt)) {
19104
+ return alt;
19105
+ }
19106
+ if (caption) {
19107
+ return caption;
19108
+ }
19109
+ return null;
19110
+ }
19111
+ var STOP_ANCHORS = /* @__PURE__ */ new Set([
19112
+ "here",
19113
+ "click",
19114
+ "click here",
19115
+ "read more",
19116
+ "link",
19117
+ "this",
19118
+ "more"
19119
+ ]);
19120
+ function normalizeAnchorText(raw) {
19121
+ const normalized = raw.replace(/\s+/g, " ").trim().toLowerCase();
19122
+ if (normalized.length < 3) return "";
19123
+ if (STOP_ANCHORS.has(normalized)) return "";
19124
+ if (normalized.length > 100) return normalized.slice(0, 100);
19125
+ return normalized;
19126
+ }
19127
+ function escapeHtml(text) {
19128
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
19129
+ }
19130
+ function preprocessImages(root2, $, imageDescAttr) {
19131
+ root2.find("picture").each((_i, el) => {
19132
+ const picture = $(el);
19133
+ const img = picture.find("img").first();
19134
+ const parentFigure = picture.closest("figure");
19135
+ const text = img.length ? resolveImageText(img, $, imageDescAttr) : null;
19136
+ if (text) {
19137
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
19138
+ picture.replaceWith(`<span>${escapeHtml(text)}</span>`);
19139
+ } else {
19140
+ picture.remove();
19141
+ }
19142
+ });
19143
+ root2.find("img").each((_i, el) => {
19144
+ const img = $(el);
19145
+ const parentFigure = img.closest("figure");
19146
+ const text = resolveImageText(img, $, imageDescAttr);
19147
+ if (text) {
19148
+ if (parentFigure.length) parentFigure.find("figcaption").remove();
19149
+ img.replaceWith(`<span>${escapeHtml(text)}</span>`);
19150
+ } else {
19151
+ img.remove();
19152
+ }
19153
+ });
19154
+ }
18577
19155
  function extractFromHtml(url, html, config) {
18578
19156
  const $ = load(html);
18579
19157
  const normalizedUrl = normalizeUrlPath(url);
@@ -18599,6 +19177,20 @@ function extractFromHtml(url, html, config) {
18599
19177
  if (weight === 0) {
18600
19178
  return null;
18601
19179
  }
19180
+ if ($('meta[name="searchsocket:noindex"]').attr("content") === "true") {
19181
+ return null;
19182
+ }
19183
+ const RESERVED_META_KEYS = /* @__PURE__ */ new Set(["noindex", "tags"]);
19184
+ const meta = {};
19185
+ $('meta[name^="searchsocket:"]').each((_i, el) => {
19186
+ const name = $(el).attr("name") ?? "";
19187
+ const key = name.slice("searchsocket:".length);
19188
+ if (!key || RESERVED_META_KEYS.has(key) || !validateMetaKey(key)) return;
19189
+ const content = $(el).attr("content") ?? "";
19190
+ const dataType = $(el).attr("data-type") ?? "string";
19191
+ meta[key] = parseMetaValue(content, dataType);
19192
+ });
19193
+ const componentTags = $('meta[name="searchsocket:tags"]').attr("content")?.trim();
18602
19194
  const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
18603
19195
  const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
18604
19196
  const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
@@ -18610,7 +19202,9 @@ function extractFromHtml(url, html, config) {
18610
19202
  root2.find(selector).remove();
18611
19203
  }
18612
19204
  root2.find(`[${config.extract.ignoreAttr}]`).remove();
19205
+ preprocessImages(root2, $, config.extract.imageDescAttr);
18613
19206
  const outgoingLinks = [];
19207
+ const seenLinkKeys = /* @__PURE__ */ new Set();
18614
19208
  root2.find("a[href]").each((_index, node) => {
18615
19209
  const href = $(node).attr("href");
18616
19210
  if (!href || href.startsWith("#") || href.startsWith("mailto:") || href.startsWith("tel:")) {
@@ -18621,7 +19215,19 @@ function extractFromHtml(url, html, config) {
18621
19215
  if (!["http:", "https:"].includes(parsed.protocol)) {
18622
19216
  return;
18623
19217
  }
18624
- outgoingLinks.push(normalizeUrlPath(parsed.pathname));
19218
+ const url2 = normalizeUrlPath(parsed.pathname);
19219
+ let anchorText = normalizeAnchorText($(node).text());
19220
+ if (!anchorText) {
19221
+ const imgAlt = $(node).find("img").first().attr("alt") ?? "";
19222
+ if (isMeaningfulAlt(imgAlt)) {
19223
+ anchorText = normalizeAnchorText(imgAlt);
19224
+ }
19225
+ }
19226
+ const key = `${url2}|${anchorText}`;
19227
+ if (!seenLinkKeys.has(key)) {
19228
+ seenLinkKeys.add(key);
19229
+ outgoingLinks.push({ url: url2, anchorText });
19230
+ }
18625
19231
  } catch {
18626
19232
  }
18627
19233
  });
@@ -18646,16 +19252,25 @@ function extractFromHtml(url, html, config) {
18646
19252
  return null;
18647
19253
  }
18648
19254
  const tags = normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1);
19255
+ const publishedAt = extractPublishedAtFromHtml($);
19256
+ if (componentTags) {
19257
+ const extraTags = componentTags.split(",").map((t) => t.trim()).filter(Boolean);
19258
+ for (const t of extraTags) {
19259
+ if (!tags.includes(t)) tags.push(t);
19260
+ }
19261
+ }
18649
19262
  return {
18650
19263
  url: normalizeUrlPath(url),
18651
19264
  title,
18652
19265
  markdown,
18653
- outgoingLinks: [...new Set(outgoingLinks)],
19266
+ outgoingLinks,
18654
19267
  noindex: false,
18655
19268
  tags,
18656
19269
  description,
18657
19270
  keywords,
18658
- weight
19271
+ weight,
19272
+ publishedAt,
19273
+ meta: Object.keys(meta).length > 0 ? meta : void 0
18659
19274
  };
18660
19275
  }
18661
19276
  function extractFromMarkdown(url, markdown, title) {
@@ -18676,6 +19291,24 @@ function extractFromMarkdown(url, markdown, title) {
18676
19291
  if (mdWeight === 0) {
18677
19292
  return null;
18678
19293
  }
19294
+ let mdMeta;
19295
+ const rawMeta = searchsocketMeta?.meta;
19296
+ if (rawMeta && typeof rawMeta === "object" && !Array.isArray(rawMeta)) {
19297
+ const metaObj = {};
19298
+ for (const [key, val] of Object.entries(rawMeta)) {
19299
+ if (!validateMetaKey(key)) continue;
19300
+ if (typeof val === "string" || typeof val === "number" || typeof val === "boolean") {
19301
+ metaObj[key] = val;
19302
+ } else if (Array.isArray(val) && val.every((v) => typeof v === "string")) {
19303
+ metaObj[key] = val;
19304
+ } else if (val instanceof Date) {
19305
+ metaObj[key] = val.getTime();
19306
+ }
19307
+ }
19308
+ if (Object.keys(metaObj).length > 0) {
19309
+ mdMeta = metaObj;
19310
+ }
19311
+ }
18679
19312
  const content = parsed.content;
18680
19313
  const normalized = normalizeMarkdown(content);
18681
19314
  if (!normalizeText(normalized)) {
@@ -18690,6 +19323,7 @@ function extractFromMarkdown(url, markdown, title) {
18690
19323
  fmKeywords = frontmatter.keywords.split(",").map((k) => k.trim()).filter(Boolean);
18691
19324
  }
18692
19325
  if (fmKeywords && fmKeywords.length === 0) fmKeywords = void 0;
19326
+ const publishedAt = extractPublishedAtFromFrontmatter(frontmatter);
18693
19327
  return {
18694
19328
  url: normalizeUrlPath(url),
18695
19329
  title: resolvedTitle,
@@ -18699,7 +19333,9 @@ function extractFromMarkdown(url, markdown, title) {
18699
19333
  tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
18700
19334
  description: fmDescription,
18701
19335
  keywords: fmKeywords,
18702
- weight: mdWeight
19336
+ weight: mdWeight,
19337
+ publishedAt,
19338
+ meta: mdMeta
18703
19339
  };
18704
19340
  }
18705
19341
  function segmentToRegex(segment) {
@@ -18894,7 +19530,7 @@ async function parseManifest(cwd, outputDir) {
18894
19530
  const manifestPath = path.resolve(cwd, outputDir, "server", "manifest-full.js");
18895
19531
  let content;
18896
19532
  try {
18897
- content = await fs3.readFile(manifestPath, "utf8");
19533
+ content = await fs8.readFile(manifestPath, "utf8");
18898
19534
  } catch {
18899
19535
  throw new SearchSocketError(
18900
19536
  "BUILD_MANIFEST_NOT_FOUND",
@@ -19205,6 +19841,125 @@ function filePathToUrl(filePath, baseDir) {
19205
19841
  const noExt = relative.replace(/\.md$/i, "").replace(/\/index$/i, "");
19206
19842
  return normalizeUrlPath(noExt || "/");
19207
19843
  }
19844
+ var ROUTE_FILE_RE = /\+(page|layout|error)(@[^.]+)?\.svelte$/;
19845
+ function isSvelteComponentFile(filePath) {
19846
+ if (!filePath.endsWith(".svelte")) return false;
19847
+ return !ROUTE_FILE_RE.test(filePath);
19848
+ }
19849
+ function extractSvelteComponentMeta(source) {
19850
+ const componentMatch = source.match(/<!--\s*@component\s*([\s\S]*?)\s*-->/);
19851
+ const description = componentMatch?.[1]?.trim() || void 0;
19852
+ const propsMatch = source.match(
19853
+ /let\s+\{([\s\S]*?)\}\s*(?::\s*([^=;{][\s\S]*?))?\s*=\s*\$props\(\)/
19854
+ );
19855
+ const props = [];
19856
+ if (propsMatch) {
19857
+ const destructureBlock = propsMatch[1];
19858
+ const typeAnnotation = propsMatch[2]?.trim();
19859
+ let resolvedTypeMap;
19860
+ if (typeAnnotation && /^[A-Z]\w*$/.test(typeAnnotation)) {
19861
+ resolvedTypeMap = resolveTypeReference(source, typeAnnotation);
19862
+ } else if (typeAnnotation && typeAnnotation.startsWith("{")) {
19863
+ resolvedTypeMap = parseInlineTypeAnnotation(typeAnnotation);
19864
+ }
19865
+ const propEntries = splitDestructureBlock(destructureBlock);
19866
+ for (const entry of propEntries) {
19867
+ const trimmed = entry.trim();
19868
+ if (!trimmed || trimmed.startsWith("...")) continue;
19869
+ let propName;
19870
+ let defaultValue;
19871
+ const renameMatch = trimmed.match(/^(\w+)\s*:\s*\w+\s*(?:=\s*([\s\S]+))?$/);
19872
+ if (renameMatch) {
19873
+ propName = renameMatch[1];
19874
+ defaultValue = renameMatch[2]?.trim();
19875
+ } else {
19876
+ const defaultMatch = trimmed.match(/^(\w+)\s*=\s*([\s\S]+)$/);
19877
+ if (defaultMatch) {
19878
+ propName = defaultMatch[1];
19879
+ defaultValue = defaultMatch[2]?.trim();
19880
+ } else {
19881
+ propName = trimmed.match(/^(\w+)/)?.[1] ?? trimmed;
19882
+ }
19883
+ }
19884
+ const propType = resolvedTypeMap?.get(propName);
19885
+ props.push({
19886
+ name: propName,
19887
+ ...propType ? { type: propType } : {},
19888
+ ...defaultValue ? { default: defaultValue } : {}
19889
+ });
19890
+ }
19891
+ }
19892
+ return { description, props };
19893
+ }
19894
+ function splitDestructureBlock(block) {
19895
+ const entries = [];
19896
+ let depth = 0;
19897
+ let current = "";
19898
+ for (const ch of block) {
19899
+ if (ch === "{" || ch === "[" || ch === "(") {
19900
+ depth++;
19901
+ current += ch;
19902
+ } else if (ch === "}" || ch === "]" || ch === ")") {
19903
+ depth--;
19904
+ current += ch;
19905
+ } else if (ch === "," && depth === 0) {
19906
+ entries.push(current);
19907
+ current = "";
19908
+ } else {
19909
+ current += ch;
19910
+ }
19911
+ }
19912
+ if (current.trim()) entries.push(current);
19913
+ return entries;
19914
+ }
19915
+ function resolveTypeReference(source, typeName) {
19916
+ const startRe = new RegExp(`(?:interface\\s+${typeName}\\s*|type\\s+${typeName}\\s*=\\s*)\\{`);
19917
+ const startMatch = source.match(startRe);
19918
+ if (!startMatch || startMatch.index === void 0) return void 0;
19919
+ const bodyStart = startMatch.index + startMatch[0].length;
19920
+ let depth = 1;
19921
+ let i = bodyStart;
19922
+ while (i < source.length && depth > 0) {
19923
+ if (source[i] === "{") depth++;
19924
+ else if (source[i] === "}") depth--;
19925
+ i++;
19926
+ }
19927
+ if (depth !== 0) return void 0;
19928
+ const body = source.slice(bodyStart, i - 1);
19929
+ return parseTypeMembers(body);
19930
+ }
19931
+ function parseInlineTypeAnnotation(annotation) {
19932
+ const inner = annotation.replace(/^\{/, "").replace(/\}$/, "");
19933
+ return parseTypeMembers(inner);
19934
+ }
19935
+ function parseTypeMembers(body) {
19936
+ const map = /* @__PURE__ */ new Map();
19937
+ const members = body.split(/[;\n]/).map((m) => m.trim()).filter(Boolean);
19938
+ for (const member of members) {
19939
+ const memberMatch = member.match(/^(\w+)\??\s*:\s*(.+)$/);
19940
+ if (memberMatch) {
19941
+ map.set(memberMatch[1], memberMatch[2].replace(/,\s*$/, "").trim());
19942
+ }
19943
+ }
19944
+ return map;
19945
+ }
19946
+ function buildComponentMarkdown(componentName, meta) {
19947
+ if (!meta.description && meta.props.length === 0) return "";
19948
+ const parts = [`${componentName} component.`];
19949
+ if (meta.description) {
19950
+ parts.push(meta.description);
19951
+ }
19952
+ if (meta.props.length > 0) {
19953
+ const propEntries = meta.props.map((p) => {
19954
+ let entry = p.name;
19955
+ if (p.type) entry += ` (${p.type})`;
19956
+ if (p.default) entry += ` default: ${p.default}`;
19957
+ return entry;
19958
+ });
19959
+ parts.push(`Props: ${propEntries.join(", ")}.`);
19960
+ }
19961
+ return parts.join(" ");
19962
+ }
19208
19963
  function normalizeSvelteToMarkdown(source) {
19209
19964
  return source.replace(/<script[\s\S]*?<\/script>/g, "").replace(/<style[\s\S]*?<\/style>/g, "").replace(/<[^>]+>/g, " ").replace(/\{[^}]+\}/g, " ").replace(/\s+/g, " ").trim();
19210
19965
  }
@@ -19223,13 +19978,27 @@ async function loadContentFilesPages(cwd, config, maxPages) {
19223
19978
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
19224
19979
  const pages = [];
19225
19980
  for (const filePath of selected) {
19226
- const raw = await fs3.readFile(filePath, "utf8");
19227
- const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
19981
+ const raw = await fs8.readFile(filePath, "utf8");
19982
+ let markdown;
19983
+ let tags;
19984
+ if (filePath.endsWith(".md")) {
19985
+ markdown = raw;
19986
+ } else if (isSvelteComponentFile(filePath)) {
19987
+ const componentName = path.basename(filePath, ".svelte");
19988
+ const meta = extractSvelteComponentMeta(raw);
19989
+ const componentMarkdown = buildComponentMarkdown(componentName, meta);
19990
+ const templateContent = normalizeSvelteToMarkdown(raw);
19991
+ markdown = componentMarkdown ? [componentMarkdown, templateContent].filter(Boolean).join("\n\n") : templateContent;
19992
+ tags = ["component"];
19993
+ } else {
19994
+ markdown = normalizeSvelteToMarkdown(raw);
19995
+ }
19228
19996
  pages.push({
19229
19997
  url: filePathToUrl(filePath, baseDir),
19230
19998
  markdown,
19231
19999
  sourcePath: path.relative(cwd, filePath).replace(/\\/g, "/"),
19232
- outgoingLinks: []
20000
+ outgoingLinks: [],
20001
+ ...tags ? { tags } : {}
19233
20002
  });
19234
20003
  }
19235
20004
  return pages;
@@ -19359,7 +20128,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
19359
20128
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
19360
20129
  const pages = [];
19361
20130
  for (const filePath of selected) {
19362
- const html = await fs3.readFile(filePath, "utf8");
20131
+ const html = await fs8.readFile(filePath, "utf8");
19363
20132
  pages.push({
19364
20133
  url: staticHtmlFileToUrl(filePath, outputDir),
19365
20134
  html,
@@ -19422,7 +20191,7 @@ function isBlockedByRobots(urlPath, rules3) {
19422
20191
  }
19423
20192
  async function loadRobotsTxtFromDir(dir) {
19424
20193
  try {
19425
- const content = await fs3.readFile(path.join(dir, "robots.txt"), "utf8");
20194
+ const content = await fs8.readFile(path.join(dir, "robots.txt"), "utf8");
19426
20195
  return parseRobotsTxt(content);
19427
20196
  } catch {
19428
20197
  return null;
@@ -19450,29 +20219,65 @@ function nonNegativeOrZero(value) {
19450
20219
  function normalizeForTitleMatch(text) {
19451
20220
  return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
19452
20221
  }
19453
- function rankHits(hits, config, query) {
20222
+ function rankHits(hits, config, query, debug) {
19454
20223
  const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
19455
20224
  const titleMatchWeight = config.ranking.weights.titleMatch;
19456
20225
  return hits.map((hit) => {
19457
- let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20226
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20227
+ let score = baseScore;
20228
+ let incomingLinkBoostValue = 0;
19458
20229
  if (config.ranking.enableIncomingLinkBoost) {
19459
20230
  const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.metadata.incomingLinks));
19460
- score += incomingBoost * config.ranking.weights.incomingLinks;
20231
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
20232
+ score += incomingLinkBoostValue;
19461
20233
  }
20234
+ let depthBoostValue = 0;
19462
20235
  if (config.ranking.enableDepthBoost) {
19463
20236
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
19464
- score += depthBoost * config.ranking.weights.depth;
20237
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
20238
+ score += depthBoostValue;
19465
20239
  }
20240
+ let titleMatchBoostValue = 0;
19466
20241
  if (normalizedQuery && titleMatchWeight > 0) {
19467
20242
  const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
19468
20243
  if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
19469
- score += titleMatchWeight;
20244
+ titleMatchBoostValue = titleMatchWeight;
20245
+ score += titleMatchBoostValue;
19470
20246
  }
19471
20247
  }
19472
- return {
20248
+ let freshnessBoostValue = 0;
20249
+ if (config.ranking.enableFreshnessBoost) {
20250
+ const publishedAt = hit.metadata.publishedAt;
20251
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
20252
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
20253
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
20254
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
20255
+ score += freshnessBoostValue;
20256
+ }
20257
+ }
20258
+ let anchorTextMatchBoostValue = 0;
20259
+ if (config.ranking.enableAnchorTextBoost && normalizedQuery && config.ranking.weights.anchorText > 0) {
20260
+ const normalizedAnchorText = normalizeForTitleMatch(hit.metadata.incomingAnchorText ?? "");
20261
+ if (normalizedAnchorText.length > 0 && normalizedQuery.length > 0 && (normalizedAnchorText.includes(normalizedQuery) || normalizedQuery.includes(normalizedAnchorText))) {
20262
+ anchorTextMatchBoostValue = config.ranking.weights.anchorText;
20263
+ score += anchorTextMatchBoostValue;
20264
+ }
20265
+ }
20266
+ const result = {
19473
20267
  hit,
19474
20268
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
19475
20269
  };
20270
+ if (debug) {
20271
+ result.breakdown = {
20272
+ baseScore,
20273
+ incomingLinkBoost: incomingLinkBoostValue,
20274
+ depthBoost: depthBoostValue,
20275
+ titleMatchBoost: titleMatchBoostValue,
20276
+ freshnessBoost: freshnessBoostValue,
20277
+ anchorTextMatchBoost: anchorTextMatchBoostValue
20278
+ };
20279
+ }
20280
+ return result;
19476
20281
  }).sort((a, b) => {
19477
20282
  const delta = b.finalScore - a.finalScore;
19478
20283
  return Number.isNaN(delta) ? 0 : delta;
@@ -19481,12 +20286,13 @@ function rankHits(hits, config, query) {
19481
20286
  function trimByScoreGap(results, config) {
19482
20287
  if (results.length === 0) return results;
19483
20288
  const threshold = config.ranking.scoreGapThreshold;
19484
- const minScore = config.ranking.minScore;
19485
- if (minScore > 0 && results.length > 0) {
19486
- const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
19487
- const mid = Math.floor(sortedScores.length / 2);
19488
- const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
19489
- if (median < minScore) return [];
20289
+ const minScoreRatio = config.ranking.minScoreRatio;
20290
+ if (minScoreRatio > 0 && results.length > 0) {
20291
+ const topScore = results[0].pageScore;
20292
+ if (Number.isFinite(topScore) && topScore > 0) {
20293
+ const minThreshold = topScore * minScoreRatio;
20294
+ results = results.filter((r) => r.pageScore >= minThreshold);
20295
+ }
19490
20296
  }
19491
20297
  if (threshold > 0 && results.length > 1) {
19492
20298
  for (let i = 1; i < results.length; i++) {
@@ -19556,61 +20362,99 @@ function aggregateByPage(ranked, config) {
19556
20362
  return Number.isNaN(delta) ? 0 : delta;
19557
20363
  });
19558
20364
  }
19559
- function mergePageAndChunkResults(pageHits, rankedChunks, config) {
19560
- if (pageHits.length === 0) return rankedChunks;
19561
- const w = config.search.pageSearchWeight;
19562
- const pageScoreMap = /* @__PURE__ */ new Map();
19563
- for (const ph of pageHits) {
19564
- pageScoreMap.set(ph.url, ph);
19565
- }
19566
- const pagesWithChunks = /* @__PURE__ */ new Set();
19567
- const merged = rankedChunks.map((ranked) => {
19568
- const url = ranked.hit.metadata.url;
19569
- const pageHit = pageScoreMap.get(url);
19570
- if (pageHit) {
19571
- pagesWithChunks.add(url);
19572
- const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
19573
- return {
19574
- hit: ranked.hit,
19575
- finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
19576
- };
20365
+ function rankPageHits(pageHits, config, query, debug) {
20366
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
20367
+ const titleMatchWeight = config.ranking.weights.titleMatch;
20368
+ return pageHits.map((hit) => {
20369
+ const baseScore = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
20370
+ let score = baseScore;
20371
+ let incomingLinkBoostValue = 0;
20372
+ if (config.ranking.enableIncomingLinkBoost) {
20373
+ const incomingBoost = Math.log(1 + nonNegativeOrZero(hit.incomingLinks));
20374
+ incomingLinkBoostValue = incomingBoost * config.ranking.weights.incomingLinks;
20375
+ score += incomingLinkBoostValue;
19577
20376
  }
19578
- return ranked;
19579
- });
19580
- for (const [url, pageHit] of pageScoreMap) {
19581
- if (pagesWithChunks.has(url)) continue;
19582
- const syntheticScore = pageHit.score * w;
19583
- const syntheticHit = {
19584
- id: `page:${url}`,
19585
- score: pageHit.score,
19586
- metadata: {
19587
- projectId: "",
19588
- scopeName: "",
19589
- url: pageHit.url,
19590
- path: pageHit.url,
19591
- title: pageHit.title,
19592
- sectionTitle: "",
19593
- headingPath: [],
19594
- snippet: pageHit.description || pageHit.title,
19595
- chunkText: pageHit.description || pageHit.title,
19596
- ordinal: 0,
19597
- contentHash: "",
19598
- depth: pageHit.depth,
19599
- incomingLinks: pageHit.incomingLinks,
19600
- routeFile: pageHit.routeFile,
19601
- tags: pageHit.tags
20377
+ let depthBoostValue = 0;
20378
+ if (config.ranking.enableDepthBoost) {
20379
+ const depthBoost = 1 / (1 + nonNegativeOrZero(hit.depth));
20380
+ depthBoostValue = depthBoost * config.ranking.weights.depth;
20381
+ score += depthBoostValue;
20382
+ }
20383
+ let titleMatchBoostValue = 0;
20384
+ if (normalizedQuery && titleMatchWeight > 0) {
20385
+ const normalizedTitle = normalizeForTitleMatch(hit.title);
20386
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
20387
+ titleMatchBoostValue = titleMatchWeight;
20388
+ score += titleMatchBoostValue;
19602
20389
  }
20390
+ }
20391
+ let freshnessBoostValue = 0;
20392
+ if (config.ranking.enableFreshnessBoost) {
20393
+ const publishedAt = hit.publishedAt;
20394
+ if (typeof publishedAt === "number" && Number.isFinite(publishedAt)) {
20395
+ const daysSince = Math.max(0, (Date.now() - publishedAt) / 864e5);
20396
+ const decay = 1 / (1 + nonNegativeOrZero(daysSince) * config.ranking.freshnessDecayRate);
20397
+ freshnessBoostValue = decay * config.ranking.weights.freshness;
20398
+ score += freshnessBoostValue;
20399
+ }
20400
+ }
20401
+ const pageWeight = findPageWeight(hit.url, config.ranking.pageWeights);
20402
+ if (pageWeight !== 1) {
20403
+ score *= pageWeight;
20404
+ }
20405
+ const result = {
20406
+ url: hit.url,
20407
+ title: hit.title,
20408
+ description: hit.description,
20409
+ routeFile: hit.routeFile,
20410
+ depth: hit.depth,
20411
+ incomingLinks: hit.incomingLinks,
20412
+ tags: hit.tags,
20413
+ baseScore,
20414
+ finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY,
20415
+ publishedAt: hit.publishedAt
19603
20416
  };
19604
- merged.push({
19605
- hit: syntheticHit,
19606
- finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
19607
- });
19608
- }
19609
- return merged.sort((a, b) => {
20417
+ if (debug) {
20418
+ result.breakdown = {
20419
+ baseScore,
20420
+ pageWeight,
20421
+ incomingLinkBoost: incomingLinkBoostValue,
20422
+ depthBoost: depthBoostValue,
20423
+ titleMatchBoost: titleMatchBoostValue,
20424
+ freshnessBoost: freshnessBoostValue
20425
+ };
20426
+ }
20427
+ return result;
20428
+ }).filter((p) => findPageWeight(p.url, config.ranking.pageWeights) !== 0).sort((a, b) => {
19610
20429
  const delta = b.finalScore - a.finalScore;
19611
20430
  return Number.isNaN(delta) ? 0 : delta;
19612
20431
  });
19613
20432
  }
20433
+ function trimPagesByScoreGap(results, config) {
20434
+ if (results.length === 0) return results;
20435
+ const threshold = config.ranking.scoreGapThreshold;
20436
+ const minScoreRatio = config.ranking.minScoreRatio;
20437
+ if (minScoreRatio > 0 && results.length > 0) {
20438
+ const topScore = results[0].finalScore;
20439
+ if (Number.isFinite(topScore) && topScore > 0) {
20440
+ const minThreshold = topScore * minScoreRatio;
20441
+ results = results.filter((r) => r.finalScore >= minThreshold);
20442
+ }
20443
+ }
20444
+ if (threshold > 0 && results.length > 1) {
20445
+ for (let i = 1; i < results.length; i++) {
20446
+ const prev = results[i - 1].finalScore;
20447
+ const current = results[i].finalScore;
20448
+ if (prev > 0) {
20449
+ const gap = (prev - current) / prev;
20450
+ if (gap >= threshold) {
20451
+ return results.slice(0, i);
20452
+ }
20453
+ }
20454
+ }
20455
+ }
20456
+ return results;
20457
+ }
19614
20458
 
19615
20459
  // src/utils/time.ts
19616
20460
  function nowIso() {
@@ -19619,6 +20463,81 @@ function nowIso() {
19619
20463
  function hrTimeMs(start) {
19620
20464
  return Number(process.hrtime.bigint() - start) / 1e6;
19621
20465
  }
20466
+ function resolvePageUrl(pageUrl, baseUrl) {
20467
+ if (!baseUrl) return pageUrl;
20468
+ try {
20469
+ return new URL(pageUrl, baseUrl).href;
20470
+ } catch {
20471
+ return pageUrl;
20472
+ }
20473
+ }
20474
+ function generateLlmsTxt(pages, config) {
20475
+ const title = config.llmsTxt.title ?? config.project.id;
20476
+ const description = config.llmsTxt.description;
20477
+ const baseUrl = config.project.baseUrl;
20478
+ const lines = [`# ${title}`];
20479
+ if (description) {
20480
+ lines.push("", `> ${description}`);
20481
+ }
20482
+ const filtered = pages.filter(
20483
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
20484
+ );
20485
+ const sorted = [...filtered].sort((a, b) => {
20486
+ if (a.depth !== b.depth) return a.depth - b.depth;
20487
+ return b.incomingLinks - a.incomingLinks;
20488
+ });
20489
+ if (sorted.length > 0) {
20490
+ lines.push("", "## Pages", "");
20491
+ for (const page of sorted) {
20492
+ const url = resolvePageUrl(page.url, baseUrl);
20493
+ if (page.description) {
20494
+ lines.push(`- [${page.title}](${url}): ${page.description}`);
20495
+ } else {
20496
+ lines.push(`- [${page.title}](${url})`);
20497
+ }
20498
+ }
20499
+ }
20500
+ lines.push("");
20501
+ return lines.join("\n");
20502
+ }
20503
+ function generateLlmsFullTxt(pages, config) {
20504
+ const title = config.llmsTxt.title ?? config.project.id;
20505
+ const description = config.llmsTxt.description;
20506
+ const baseUrl = config.project.baseUrl;
20507
+ const lines = [`# ${title}`];
20508
+ if (description) {
20509
+ lines.push("", `> ${description}`);
20510
+ }
20511
+ const filtered = pages.filter(
20512
+ (p) => p.url !== "/llms.txt" && p.url !== "/llms-full.txt"
20513
+ );
20514
+ const sorted = [...filtered].sort((a, b) => {
20515
+ if (a.depth !== b.depth) return a.depth - b.depth;
20516
+ return b.incomingLinks - a.incomingLinks;
20517
+ });
20518
+ for (const page of sorted) {
20519
+ const url = resolvePageUrl(page.url, baseUrl);
20520
+ lines.push("", "---", "", `## [${page.title}](${url})`, "");
20521
+ lines.push(page.markdown.trim());
20522
+ }
20523
+ lines.push("");
20524
+ return lines.join("\n");
20525
+ }
20526
+ async function writeLlmsTxt(pages, config, cwd, logger3) {
20527
+ const outputPath = path.resolve(cwd, config.llmsTxt.outputPath);
20528
+ const outputDir = path.dirname(outputPath);
20529
+ await fs8.mkdir(outputDir, { recursive: true });
20530
+ const content = generateLlmsTxt(pages, config);
20531
+ await fs8.writeFile(outputPath, content, "utf8");
20532
+ logger3.info(`Generated llms.txt at ${config.llmsTxt.outputPath}`);
20533
+ if (config.llmsTxt.generateFull) {
20534
+ const fullPath = outputPath.replace(/\.txt$/, "-full.txt");
20535
+ const fullContent = generateLlmsFullTxt(pages, config);
20536
+ await fs8.writeFile(fullPath, fullContent, "utf8");
20537
+ const relativeFull = path.relative(cwd, fullPath);
20538
+ logger3.info(`Generated llms-full.txt at ${relativeFull}`);
20539
+ }
20540
+ }
19622
20541
 
19623
20542
  // src/indexing/pipeline.ts
19624
20543
  function buildPageSummary(page, maxChars = 3500) {
@@ -19637,16 +20556,33 @@ function buildPageSummary(page, maxChars = 3500) {
19637
20556
  if (joined.length <= maxChars) return joined;
19638
20557
  return joined.slice(0, maxChars).trim();
19639
20558
  }
20559
+ function buildPageContentHash(page) {
20560
+ const parts = [
20561
+ page.title,
20562
+ page.description ?? "",
20563
+ (page.keywords ?? []).slice().sort().join(","),
20564
+ page.tags.slice().sort().join(","),
20565
+ page.markdown,
20566
+ String(page.outgoingLinks),
20567
+ String(page.publishedAt ?? ""),
20568
+ page.incomingAnchorText ?? "",
20569
+ (page.outgoingLinkUrls ?? []).slice().sort().join(","),
20570
+ page.meta ? JSON.stringify(page.meta, Object.keys(page.meta).sort()) : ""
20571
+ ];
20572
+ return sha256(parts.join("|"));
20573
+ }
19640
20574
  var IndexPipeline = class _IndexPipeline {
19641
20575
  cwd;
19642
20576
  config;
19643
20577
  store;
19644
20578
  logger;
20579
+ hooks;
19645
20580
  constructor(options) {
19646
20581
  this.cwd = options.cwd;
19647
20582
  this.config = options.config;
19648
20583
  this.store = options.store;
19649
20584
  this.logger = options.logger;
20585
+ this.hooks = options.hooks;
19650
20586
  }
19651
20587
  static async create(options = {}) {
19652
20588
  const cwd = path.resolve(options.cwd ?? process.cwd());
@@ -19656,7 +20592,8 @@ var IndexPipeline = class _IndexPipeline {
19656
20592
  cwd,
19657
20593
  config,
19658
20594
  store,
19659
- logger: options.logger ?? new Logger()
20595
+ logger: options.logger ?? new Logger(),
20596
+ hooks: options.hooks ?? {}
19660
20597
  });
19661
20598
  }
19662
20599
  getConfig() {
@@ -19677,7 +20614,7 @@ var IndexPipeline = class _IndexPipeline {
19677
20614
  const scope = resolveScope(this.config, options.scopeOverride);
19678
20615
  ensureStateDirs(this.cwd, this.config.state.dir);
19679
20616
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
19680
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
20617
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-vector)`);
19681
20618
  if (options.force) {
19682
20619
  this.logger.info("Force mode enabled \u2014 full rebuild");
19683
20620
  }
@@ -19686,8 +20623,9 @@ var IndexPipeline = class _IndexPipeline {
19686
20623
  }
19687
20624
  const manifestStart = stageStart();
19688
20625
  const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
20626
+ const existingPageHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getPageHashes(scope);
19689
20627
  stageEnd("manifest", manifestStart);
19690
- this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
20628
+ this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes, ${existingPageHashes.size} existing page hashes loaded`);
19691
20629
  const sourceStart = stageStart();
19692
20630
  this.logger.info(`Loading pages (source: ${sourceMode})...`);
19693
20631
  let sourcePages;
@@ -19764,11 +20702,61 @@ var IndexPipeline = class _IndexPipeline {
19764
20702
  );
19765
20703
  continue;
19766
20704
  }
19767
- extractedPages.push(extracted);
20705
+ if (sourcePage.tags && sourcePage.tags.length > 0) {
20706
+ extracted.tags = [.../* @__PURE__ */ new Set([...extracted.tags, ...sourcePage.tags])];
20707
+ }
20708
+ let accepted;
20709
+ if (this.hooks.transformPage) {
20710
+ const transformed = await this.hooks.transformPage(extracted);
20711
+ if (transformed === null) {
20712
+ this.logger.debug(`Page ${sourcePage.url} skipped by transformPage hook`);
20713
+ continue;
20714
+ }
20715
+ accepted = transformed;
20716
+ } else {
20717
+ accepted = extracted;
20718
+ }
20719
+ extractedPages.push(accepted);
19768
20720
  this.logger.event("page_extracted", {
19769
- url: extracted.url
20721
+ url: accepted.url
19770
20722
  });
19771
20723
  }
20724
+ const customRecords = options.customRecords ?? [];
20725
+ if (customRecords.length > 0) {
20726
+ this.logger.info(`Processing ${customRecords.length} custom record${customRecords.length === 1 ? "" : "s"}...`);
20727
+ for (const record of customRecords) {
20728
+ const normalizedUrl = normalizeUrlPath(record.url);
20729
+ const normalized = normalizeMarkdown(record.content);
20730
+ if (!normalized.trim()) {
20731
+ this.logger.warn(`Custom record ${normalizedUrl} has empty content and was skipped.`);
20732
+ continue;
20733
+ }
20734
+ const urlTags = normalizedUrl.split("/").filter(Boolean).slice(0, 1);
20735
+ const tags = record.tags ? [.../* @__PURE__ */ new Set([...urlTags, ...record.tags])] : urlTags;
20736
+ const extracted = {
20737
+ url: normalizedUrl,
20738
+ title: record.title,
20739
+ markdown: normalized,
20740
+ outgoingLinks: [],
20741
+ noindex: false,
20742
+ tags,
20743
+ weight: record.weight
20744
+ };
20745
+ let accepted;
20746
+ if (this.hooks.transformPage) {
20747
+ const transformed = await this.hooks.transformPage(extracted);
20748
+ if (transformed === null) {
20749
+ this.logger.debug(`Custom record ${normalizedUrl} skipped by transformPage hook`);
20750
+ continue;
20751
+ }
20752
+ accepted = transformed;
20753
+ } else {
20754
+ accepted = extracted;
20755
+ }
20756
+ extractedPages.push(accepted);
20757
+ this.logger.event("page_extracted", { url: accepted.url, custom: true });
20758
+ }
20759
+ }
19772
20760
  extractedPages.sort((a, b) => a.url.localeCompare(b.url));
19773
20761
  const uniquePages = [];
19774
20762
  const seenUrls = /* @__PURE__ */ new Set();
@@ -19801,15 +20789,28 @@ var IndexPipeline = class _IndexPipeline {
19801
20789
  const linkStart = stageStart();
19802
20790
  const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
19803
20791
  const incomingLinkCount = /* @__PURE__ */ new Map();
20792
+ const incomingAnchorTexts = /* @__PURE__ */ new Map();
19804
20793
  for (const page of indexablePages) {
19805
20794
  incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
19806
20795
  }
19807
20796
  for (const page of indexablePages) {
19808
- for (const outgoing of page.outgoingLinks) {
20797
+ const seenForCount = /* @__PURE__ */ new Set();
20798
+ const seenForAnchor = /* @__PURE__ */ new Set();
20799
+ for (const { url: outgoing, anchorText } of page.outgoingLinks) {
19809
20800
  if (!pageSet.has(outgoing)) {
19810
20801
  continue;
19811
20802
  }
19812
- incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
20803
+ if (!seenForCount.has(outgoing)) {
20804
+ seenForCount.add(outgoing);
20805
+ incomingLinkCount.set(outgoing, (incomingLinkCount.get(outgoing) ?? 0) + 1);
20806
+ }
20807
+ if (anchorText && !seenForAnchor.has(outgoing)) {
20808
+ seenForAnchor.add(outgoing);
20809
+ if (!incomingAnchorTexts.has(outgoing)) {
20810
+ incomingAnchorTexts.set(outgoing, /* @__PURE__ */ new Set());
20811
+ }
20812
+ incomingAnchorTexts.get(outgoing).add(anchorText);
20813
+ }
19813
20814
  }
19814
20815
  }
19815
20816
  stageEnd("links", linkStart);
@@ -19828,6 +20829,15 @@ var IndexPipeline = class _IndexPipeline {
19828
20829
  });
19829
20830
  }
19830
20831
  }
20832
+ for (const record of customRecords) {
20833
+ const normalizedUrl = normalizeUrlPath(record.url);
20834
+ if (!precomputedRoutes.has(normalizedUrl)) {
20835
+ precomputedRoutes.set(normalizedUrl, {
20836
+ routeFile: "",
20837
+ routeResolution: "exact"
20838
+ });
20839
+ }
20840
+ }
19831
20841
  for (const page of indexablePages) {
19832
20842
  const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
19833
20843
  if (routeMatch.routeResolution === "best-effort") {
@@ -19845,6 +20855,17 @@ var IndexPipeline = class _IndexPipeline {
19845
20855
  } else {
19846
20856
  routeExact += 1;
19847
20857
  }
20858
+ const anchorSet = incomingAnchorTexts.get(page.url);
20859
+ let incomingAnchorText;
20860
+ if (anchorSet && anchorSet.size > 0) {
20861
+ let joined = "";
20862
+ for (const phrase of anchorSet) {
20863
+ const next2 = joined ? `${joined} ${phrase}` : phrase;
20864
+ if (next2.length > 500) break;
20865
+ joined = next2;
20866
+ }
20867
+ incomingAnchorText = joined || void 0;
20868
+ }
19848
20869
  const indexedPage = {
19849
20870
  url: page.url,
19850
20871
  title: page.title,
@@ -19854,40 +20875,113 @@ var IndexPipeline = class _IndexPipeline {
19854
20875
  generatedAt: nowIso(),
19855
20876
  incomingLinks: incomingLinkCount.get(page.url) ?? 0,
19856
20877
  outgoingLinks: page.outgoingLinks.length,
20878
+ outgoingLinkUrls: page.outgoingLinks.map((l) => typeof l === "string" ? l : l.url),
19857
20879
  depth: getUrlDepth(page.url),
19858
20880
  tags: page.tags,
19859
20881
  markdown: page.markdown,
19860
20882
  description: page.description,
19861
- keywords: page.keywords
20883
+ keywords: page.keywords,
20884
+ publishedAt: page.publishedAt,
20885
+ incomingAnchorText,
20886
+ meta: page.meta
19862
20887
  };
19863
20888
  pages.push(indexedPage);
19864
20889
  this.logger.event("page_indexed", { url: page.url });
19865
20890
  }
20891
+ const pageRecords = pages.map((p) => {
20892
+ const summary = buildPageSummary(p);
20893
+ return {
20894
+ url: p.url,
20895
+ title: p.title,
20896
+ markdown: p.markdown,
20897
+ projectId: scope.projectId,
20898
+ scopeName: scope.scopeName,
20899
+ routeFile: p.routeFile,
20900
+ routeResolution: p.routeResolution,
20901
+ incomingLinks: p.incomingLinks,
20902
+ outgoingLinks: p.outgoingLinks,
20903
+ outgoingLinkUrls: p.outgoingLinkUrls,
20904
+ depth: p.depth,
20905
+ tags: p.tags,
20906
+ indexedAt: p.generatedAt,
20907
+ summary,
20908
+ description: p.description,
20909
+ keywords: p.keywords,
20910
+ contentHash: buildPageContentHash(p),
20911
+ publishedAt: p.publishedAt,
20912
+ meta: p.meta
20913
+ };
20914
+ });
20915
+ const currentPageUrls = new Set(pageRecords.map((r) => r.url));
20916
+ const changedPages = pageRecords.filter(
20917
+ (r) => !existingPageHashes.has(r.url) || existingPageHashes.get(r.url) !== r.contentHash
20918
+ );
20919
+ const deletedPageUrls = [...existingPageHashes.keys()].filter((url) => !currentPageUrls.has(url));
19866
20920
  if (!options.dryRun) {
19867
- const pageRecords = pages.map((p) => {
19868
- const summary = buildPageSummary(p);
19869
- return {
19870
- url: p.url,
19871
- title: p.title,
19872
- markdown: p.markdown,
19873
- projectId: scope.projectId,
19874
- scopeName: scope.scopeName,
19875
- routeFile: p.routeFile,
19876
- routeResolution: p.routeResolution,
19877
- incomingLinks: p.incomingLinks,
19878
- outgoingLinks: p.outgoingLinks,
19879
- depth: p.depth,
19880
- tags: p.tags,
19881
- indexedAt: p.generatedAt,
19882
- summary,
19883
- description: p.description,
19884
- keywords: p.keywords
19885
- };
19886
- });
19887
- await this.store.deletePages(scope);
19888
- await this.store.upsertPages(pageRecords, scope);
20921
+ if (options.force) {
20922
+ await this.store.deletePages(scope);
20923
+ this.logger.info(`Upserting ${pageRecords.length} page summaries...`);
20924
+ const pageDocs = pageRecords.map((r) => ({
20925
+ id: r.url,
20926
+ data: r.summary ?? r.title,
20927
+ metadata: {
20928
+ title: r.title,
20929
+ url: r.url,
20930
+ description: r.description ?? "",
20931
+ keywords: r.keywords ?? [],
20932
+ summary: r.summary ?? "",
20933
+ tags: r.tags,
20934
+ markdown: r.markdown,
20935
+ routeFile: r.routeFile,
20936
+ routeResolution: r.routeResolution,
20937
+ incomingLinks: r.incomingLinks,
20938
+ outgoingLinks: r.outgoingLinks,
20939
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
20940
+ depth: r.depth,
20941
+ indexedAt: r.indexedAt,
20942
+ contentHash: r.contentHash ?? "",
20943
+ publishedAt: r.publishedAt ?? null,
20944
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
20945
+ }
20946
+ }));
20947
+ await this.store.upsertPages(pageDocs, scope);
20948
+ } else {
20949
+ if (changedPages.length > 0) {
20950
+ this.logger.info(`Upserting ${changedPages.length} changed page summaries...`);
20951
+ const pageDocs = changedPages.map((r) => ({
20952
+ id: r.url,
20953
+ data: r.summary ?? r.title,
20954
+ metadata: {
20955
+ title: r.title,
20956
+ url: r.url,
20957
+ description: r.description ?? "",
20958
+ keywords: r.keywords ?? [],
20959
+ summary: r.summary ?? "",
20960
+ tags: r.tags,
20961
+ markdown: r.markdown,
20962
+ routeFile: r.routeFile,
20963
+ routeResolution: r.routeResolution,
20964
+ incomingLinks: r.incomingLinks,
20965
+ outgoingLinks: r.outgoingLinks,
20966
+ outgoingLinkUrls: r.outgoingLinkUrls ?? [],
20967
+ depth: r.depth,
20968
+ indexedAt: r.indexedAt,
20969
+ contentHash: r.contentHash ?? "",
20970
+ publishedAt: r.publishedAt ?? null,
20971
+ ...r.meta && Object.keys(r.meta).length > 0 ? { meta: r.meta } : {}
20972
+ }
20973
+ }));
20974
+ await this.store.upsertPages(pageDocs, scope);
20975
+ }
20976
+ if (deletedPageUrls.length > 0) {
20977
+ await this.store.deletePagesByIds(deletedPageUrls, scope);
20978
+ }
20979
+ }
19889
20980
  }
20981
+ const pagesChanged = options.force ? pageRecords.length : changedPages.length;
20982
+ const pagesDeleted = deletedPageUrls.length;
19890
20983
  stageEnd("pages", pagesStart);
20984
+ this.logger.info(`Page changes: ${pagesChanged} changed/new, ${pagesDeleted} deleted, ${pageRecords.length - changedPages.length} unchanged`);
19891
20985
  this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
19892
20986
  const chunkStart = stageStart();
19893
20987
  this.logger.info("Chunking pages...");
@@ -19896,6 +20990,18 @@ var IndexPipeline = class _IndexPipeline {
19896
20990
  if (typeof maxChunks === "number") {
19897
20991
  chunks = chunks.slice(0, maxChunks);
19898
20992
  }
20993
+ if (this.hooks.transformChunk) {
20994
+ const transformed = [];
20995
+ for (const chunk of chunks) {
20996
+ const result = await this.hooks.transformChunk(chunk);
20997
+ if (result === null) {
20998
+ this.logger.debug(`Chunk ${chunk.chunkKey} skipped by transformChunk hook`);
20999
+ continue;
21000
+ }
21001
+ transformed.push(result);
21002
+ }
21003
+ chunks = transformed;
21004
+ }
19899
21005
  for (const chunk of chunks) {
19900
21006
  this.logger.event("chunked", {
19901
21007
  url: chunk.url,
@@ -19908,7 +21014,7 @@ var IndexPipeline = class _IndexPipeline {
19908
21014
  for (const chunk of chunks) {
19909
21015
  currentChunkMap.set(chunk.chunkKey, chunk);
19910
21016
  }
19911
- const changedChunks = chunks.filter((chunk) => {
21017
+ let changedChunks = chunks.filter((chunk) => {
19912
21018
  if (options.force) {
19913
21019
  return true;
19914
21020
  }
@@ -19922,36 +21028,43 @@ var IndexPipeline = class _IndexPipeline {
19922
21028
  return existingHash !== chunk.contentHash;
19923
21029
  });
19924
21030
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
21031
+ if (this.hooks.beforeIndex) {
21032
+ changedChunks = await this.hooks.beforeIndex(changedChunks);
21033
+ }
19925
21034
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
19926
21035
  const upsertStart = stageStart();
19927
21036
  let documentsUpserted = 0;
19928
21037
  if (!options.dryRun && changedChunks.length > 0) {
19929
- this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
19930
- const UPSTASH_CONTENT_LIMIT = 4096;
21038
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
19931
21039
  const docs = changedChunks.map((chunk) => {
19932
- const title = chunk.title;
19933
- const sectionTitle = chunk.sectionTitle ?? "";
19934
- const url = chunk.url;
19935
- const tags = chunk.tags.join(",");
19936
- const headingPath = chunk.headingPath.join(" > ");
19937
- const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
19938
- const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
19939
- const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
21040
+ const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
21041
+ if (embeddingText.length > 2e3) {
21042
+ this.logger.warn(
21043
+ `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
21044
+ );
21045
+ }
19940
21046
  return {
19941
21047
  id: chunk.chunkKey,
19942
- content: { title, sectionTitle, text, url, tags, headingPath },
21048
+ data: embeddingText,
19943
21049
  metadata: {
19944
- projectId: scope.projectId,
19945
- scopeName: scope.scopeName,
21050
+ url: chunk.url,
19946
21051
  path: chunk.path,
21052
+ title: chunk.title,
21053
+ sectionTitle: chunk.sectionTitle ?? "",
21054
+ headingPath: chunk.headingPath.join(" > "),
19947
21055
  snippet: chunk.snippet,
21056
+ chunkText: embeddingText,
21057
+ tags: chunk.tags,
19948
21058
  ordinal: chunk.ordinal,
19949
21059
  contentHash: chunk.contentHash,
19950
21060
  depth: chunk.depth,
19951
21061
  incomingLinks: chunk.incomingLinks,
19952
21062
  routeFile: chunk.routeFile,
19953
21063
  description: chunk.description ?? "",
19954
- keywords: (chunk.keywords ?? []).join(",")
21064
+ keywords: chunk.keywords ?? [],
21065
+ publishedAt: chunk.publishedAt ?? null,
21066
+ incomingAnchorText: chunk.incomingAnchorText ?? "",
21067
+ ...chunk.meta && Object.keys(chunk.meta).length > 0 ? { meta: chunk.meta } : {}
19955
21068
  }
19956
21069
  };
19957
21070
  });
@@ -19969,9 +21082,16 @@ var IndexPipeline = class _IndexPipeline {
19969
21082
  } else {
19970
21083
  this.logger.info("No chunks to upsert \u2014 all up to date");
19971
21084
  }
21085
+ if (this.config.llmsTxt.enable && !options.dryRun) {
21086
+ const llmsStart = stageStart();
21087
+ await writeLlmsTxt(pages, this.config, this.cwd, this.logger);
21088
+ stageEnd("llms_txt", llmsStart);
21089
+ }
19972
21090
  this.logger.info("Done.");
19973
- return {
21091
+ const stats = {
19974
21092
  pagesProcessed: pages.length,
21093
+ pagesChanged,
21094
+ pagesDeleted,
19975
21095
  chunksTotal: chunks.length,
19976
21096
  chunksChanged: changedChunks.length,
19977
21097
  documentsUpserted,
@@ -19980,16 +21100,143 @@ var IndexPipeline = class _IndexPipeline {
19980
21100
  routeBestEffort,
19981
21101
  stageTimingsMs
19982
21102
  };
21103
+ if (this.hooks.afterIndex) {
21104
+ await this.hooks.afterIndex(stats);
21105
+ }
21106
+ return stats;
19983
21107
  }
19984
21108
  };
21109
+
21110
+ // src/search/related-pages.ts
21111
+ function diceScore(urlA, urlB) {
21112
+ const segmentsA = urlA.split("/").filter(Boolean);
21113
+ const segmentsB = urlB.split("/").filter(Boolean);
21114
+ if (segmentsA.length === 0 && segmentsB.length === 0) return 1;
21115
+ if (segmentsA.length === 0 || segmentsB.length === 0) return 0;
21116
+ let shared = 0;
21117
+ const minLen = Math.min(segmentsA.length, segmentsB.length);
21118
+ for (let i = 0; i < minLen; i++) {
21119
+ if (segmentsA[i] === segmentsB[i]) {
21120
+ shared++;
21121
+ } else {
21122
+ break;
21123
+ }
21124
+ }
21125
+ return 2 * shared / (segmentsA.length + segmentsB.length);
21126
+ }
21127
+ function compositeScore(isLinked, dice, semantic) {
21128
+ return (isLinked ? 0.5 : 0) + 0.3 * dice + 0.2 * semantic;
21129
+ }
21130
+ function dominantRelationshipType(isOutgoing, isIncoming, dice) {
21131
+ if (isOutgoing) return "outgoing_link";
21132
+ if (isIncoming) return "incoming_link";
21133
+ if (dice > 0.4) return "sibling";
21134
+ return "semantic";
21135
+ }
21136
+
21137
+ // src/search/engine.ts
21138
+ var rankingOverridesSchema = z.object({
21139
+ ranking: z.object({
21140
+ enableIncomingLinkBoost: z.boolean().optional(),
21141
+ enableDepthBoost: z.boolean().optional(),
21142
+ aggregationCap: z.number().int().positive().optional(),
21143
+ aggregationDecay: z.number().min(0).max(1).optional(),
21144
+ minChunkScoreRatio: z.number().min(0).max(1).optional(),
21145
+ minScoreRatio: z.number().min(0).max(1).optional(),
21146
+ scoreGapThreshold: z.number().min(0).max(1).optional(),
21147
+ weights: z.object({
21148
+ incomingLinks: z.number().optional(),
21149
+ depth: z.number().optional(),
21150
+ aggregation: z.number().optional(),
21151
+ titleMatch: z.number().optional()
21152
+ }).optional()
21153
+ }).optional(),
21154
+ search: z.object({
21155
+ pageSearchWeight: z.number().min(0).max(1).optional()
21156
+ }).optional()
21157
+ }).optional();
19985
21158
  var requestSchema = z.object({
19986
21159
  q: z.string().trim().min(1),
19987
21160
  topK: z.number().int().positive().max(100).optional(),
19988
21161
  scope: z.string().optional(),
19989
21162
  pathPrefix: z.string().optional(),
19990
21163
  tags: z.array(z.string()).optional(),
19991
- groupBy: z.enum(["page", "chunk"]).optional()
21164
+ filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
21165
+ groupBy: z.enum(["page", "chunk"]).optional(),
21166
+ maxSubResults: z.number().int().positive().max(20).optional(),
21167
+ debug: z.boolean().optional(),
21168
+ rankingOverrides: rankingOverridesSchema
19992
21169
  });
21170
+ var MAX_SITE_STRUCTURE_PAGES = 2e3;
21171
+ function makeNode(url, depth) {
21172
+ return { url, title: "", depth, routeFile: "", isIndexed: false, childCount: 0, children: [] };
21173
+ }
21174
+ function buildTree(pages, pathPrefix) {
21175
+ const nodeMap = /* @__PURE__ */ new Map();
21176
+ const root2 = makeNode("/", 0);
21177
+ nodeMap.set("/", root2);
21178
+ for (const page of pages) {
21179
+ const normalized = normalizeUrlPath(page.url);
21180
+ const segments = normalized.split("/").filter(Boolean);
21181
+ if (segments.length === 0) {
21182
+ root2.title = page.title;
21183
+ root2.routeFile = page.routeFile;
21184
+ root2.isIndexed = true;
21185
+ continue;
21186
+ }
21187
+ for (let i = 1; i <= segments.length; i++) {
21188
+ const partialUrl = "/" + segments.slice(0, i).join("/");
21189
+ if (!nodeMap.has(partialUrl)) {
21190
+ nodeMap.set(partialUrl, makeNode(partialUrl, i));
21191
+ }
21192
+ }
21193
+ const node = nodeMap.get(normalized);
21194
+ node.title = page.title;
21195
+ node.routeFile = page.routeFile;
21196
+ node.isIndexed = true;
21197
+ }
21198
+ for (const [url, node] of nodeMap) {
21199
+ if (url === "/") continue;
21200
+ const segments = url.split("/").filter(Boolean);
21201
+ const parentUrl = segments.length === 1 ? "/" : "/" + segments.slice(0, -1).join("/");
21202
+ const parent = nodeMap.get(parentUrl) ?? root2;
21203
+ parent.children.push(node);
21204
+ }
21205
+ const sortAndCount = (node) => {
21206
+ node.children.sort((a, b) => a.url.localeCompare(b.url));
21207
+ node.childCount = node.children.length;
21208
+ for (const child of node.children) {
21209
+ sortAndCount(child);
21210
+ }
21211
+ };
21212
+ sortAndCount(root2);
21213
+ if (pathPrefix) {
21214
+ const normalizedPrefix = normalizeUrlPath(pathPrefix);
21215
+ const subtreeRoot = nodeMap.get(normalizedPrefix);
21216
+ if (subtreeRoot) {
21217
+ return subtreeRoot;
21218
+ }
21219
+ return makeNode(normalizedPrefix, normalizedPrefix.split("/").filter(Boolean).length);
21220
+ }
21221
+ return root2;
21222
+ }
21223
+ function mergeRankingOverrides(base, overrides) {
21224
+ return {
21225
+ ...base,
21226
+ search: {
21227
+ ...base.search,
21228
+ ...overrides.search
21229
+ },
21230
+ ranking: {
21231
+ ...base.ranking,
21232
+ ...overrides.ranking,
21233
+ weights: {
21234
+ ...base.ranking.weights,
21235
+ ...overrides.ranking?.weights
21236
+ }
21237
+ }
21238
+ };
21239
+ }
19993
21240
  var SearchEngine = class _SearchEngine {
19994
21241
  cwd;
19995
21242
  config;
@@ -20019,125 +21266,203 @@ var SearchEngine = class _SearchEngine {
20019
21266
  }
20020
21267
  const input = parsed.data;
20021
21268
  const totalStart = process.hrtime.bigint();
21269
+ const effectiveConfig = input.debug && input.rankingOverrides ? mergeRankingOverrides(this.config, input.rankingOverrides) : this.config;
20022
21270
  const resolvedScope = resolveScope(this.config, input.scope);
20023
21271
  const topK = input.topK ?? 10;
21272
+ const maxSubResults = input.maxSubResults ?? 5;
20024
21273
  const groupByPage = (input.groupBy ?? "page") === "page";
20025
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20026
- const filterParts = [];
20027
- if (input.pathPrefix) {
20028
- const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
20029
- filterParts.push(`url GLOB '${prefix}*'`);
20030
- }
20031
- if (input.tags && input.tags.length > 0) {
20032
- for (const tag of input.tags) {
20033
- filterParts.push(`tags GLOB '*${tag}*'`);
21274
+ const queryText = input.q;
21275
+ const pathPrefix = input.pathPrefix ? input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}` : void 0;
21276
+ const filterTags = input.tags && input.tags.length > 0 ? input.tags : void 0;
21277
+ const metaFilterStr = input.filters && Object.keys(input.filters).length > 0 ? buildMetaFilterString(input.filters) : "";
21278
+ const metaFilter = metaFilterStr || void 0;
21279
+ const applyPagePostFilters = (hits) => {
21280
+ let filtered = hits;
21281
+ if (pathPrefix) {
21282
+ filtered = filtered.filter((h) => h.url.startsWith(pathPrefix));
21283
+ }
21284
+ if (filterTags) {
21285
+ filtered = filtered.filter(
21286
+ (h) => filterTags.every((tag) => h.tags.includes(tag))
21287
+ );
20034
21288
  }
20035
- }
20036
- const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
20037
- const useDualSearch = this.config.search.dualSearch && groupByPage;
21289
+ return filtered;
21290
+ };
21291
+ const applyChunkPostFilters = (hits) => {
21292
+ let filtered = hits;
21293
+ if (filterTags) {
21294
+ filtered = filtered.filter(
21295
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
21296
+ );
21297
+ }
21298
+ return filtered;
21299
+ };
20038
21300
  const searchStart = process.hrtime.bigint();
20039
- let ranked;
20040
- if (useDualSearch) {
20041
- const chunkLimit = Math.max(topK * 10, 100);
20042
- const pageLimit = 20;
20043
- const [pageHits, chunkHits] = await Promise.all([
20044
- this.store.searchPages(
20045
- input.q,
20046
- {
20047
- limit: pageLimit,
20048
- semanticWeight: this.config.search.semanticWeight,
20049
- inputEnrichment: this.config.search.inputEnrichment,
20050
- filter
20051
- },
20052
- resolvedScope
20053
- ),
20054
- this.store.search(
20055
- input.q,
20056
- {
20057
- limit: chunkLimit,
20058
- semanticWeight: this.config.search.semanticWeight,
20059
- inputEnrichment: this.config.search.inputEnrichment,
20060
- reranking: false,
20061
- filter
20062
- },
21301
+ if (groupByPage) {
21302
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
21303
+ const pageLimit = Math.max(topK * 2, 20);
21304
+ const pageHits = await this.store.searchPagesByText(
21305
+ queryText,
21306
+ { limit: pageLimit * fetchMultiplier, filter: metaFilter },
21307
+ resolvedScope
21308
+ );
21309
+ const filteredPages = applyPagePostFilters(pageHits);
21310
+ let rankedPages = rankPageHits(filteredPages, effectiveConfig, input.q, input.debug);
21311
+ rankedPages = trimPagesByScoreGap(rankedPages, effectiveConfig);
21312
+ const topPages = rankedPages.slice(0, topK);
21313
+ const chunkPromises = topPages.map(
21314
+ (page) => this.store.searchChunksByUrl(
21315
+ queryText,
21316
+ page.url,
21317
+ { limit: maxSubResults, filter: metaFilter },
20063
21318
  resolvedScope
20064
- )
20065
- ]);
20066
- const rankedChunks = rankHits(chunkHits, this.config, input.q);
20067
- ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
21319
+ ).then((chunks) => applyChunkPostFilters(chunks))
21320
+ );
21321
+ const allChunks = await Promise.all(chunkPromises);
21322
+ const searchMs = hrTimeMs(searchStart);
21323
+ const results = this.buildPageFirstResults(topPages, allChunks, input.q, input.debug, maxSubResults);
21324
+ return {
21325
+ q: input.q,
21326
+ scope: resolvedScope.scopeName,
21327
+ results,
21328
+ meta: {
21329
+ timingsMs: {
21330
+ search: Math.round(searchMs),
21331
+ total: Math.round(hrTimeMs(totalStart))
21332
+ }
21333
+ }
21334
+ };
20068
21335
  } else {
21336
+ const candidateK = Math.max(50, topK);
21337
+ const fetchMultiplier = pathPrefix || filterTags ? 2 : 1;
20069
21338
  const hits = await this.store.search(
20070
- input.q,
20071
- {
20072
- limit: candidateK,
20073
- semanticWeight: this.config.search.semanticWeight,
20074
- inputEnrichment: this.config.search.inputEnrichment,
20075
- reranking: this.config.search.reranking,
20076
- filter
20077
- },
21339
+ queryText,
21340
+ { limit: candidateK * fetchMultiplier, filter: metaFilter },
20078
21341
  resolvedScope
20079
21342
  );
20080
- ranked = rankHits(hits, this.config, input.q);
20081
- }
20082
- const searchMs = hrTimeMs(searchStart);
20083
- const results = this.buildResults(ranked, topK, groupByPage, input.q);
20084
- return {
20085
- q: input.q,
20086
- scope: resolvedScope.scopeName,
20087
- results,
20088
- meta: {
20089
- timingsMs: {
20090
- search: Math.round(searchMs),
20091
- total: Math.round(hrTimeMs(totalStart))
21343
+ let filtered = hits;
21344
+ if (pathPrefix) {
21345
+ filtered = filtered.filter((h) => h.metadata.url.startsWith(pathPrefix));
21346
+ }
21347
+ if (filterTags) {
21348
+ filtered = filtered.filter(
21349
+ (h) => filterTags.every((tag) => h.metadata.tags.includes(tag))
21350
+ );
21351
+ }
21352
+ const ranked = rankHits(filtered, effectiveConfig, input.q, input.debug);
21353
+ const searchMs = hrTimeMs(searchStart);
21354
+ const results = this.buildResults(ranked, topK, false, maxSubResults, input.q, input.debug, effectiveConfig);
21355
+ return {
21356
+ q: input.q,
21357
+ scope: resolvedScope.scopeName,
21358
+ results,
21359
+ meta: {
21360
+ timingsMs: {
21361
+ search: Math.round(searchMs),
21362
+ total: Math.round(hrTimeMs(totalStart))
21363
+ }
20092
21364
  }
21365
+ };
21366
+ }
21367
+ }
21368
+ buildPageFirstResults(rankedPages, allChunks, query, debug, maxSubResults = 5) {
21369
+ return rankedPages.map((page, i) => {
21370
+ const chunks = allChunks[i] ?? [];
21371
+ const bestChunk = chunks[0];
21372
+ const snippet = bestChunk ? query ? queryAwareExcerpt(bestChunk.metadata.chunkText, query) : toSnippet(bestChunk.metadata.chunkText) : page.description || page.title;
21373
+ const result = {
21374
+ url: page.url,
21375
+ title: page.title,
21376
+ sectionTitle: bestChunk?.metadata.sectionTitle || void 0,
21377
+ snippet,
21378
+ chunkText: bestChunk?.metadata.chunkText || void 0,
21379
+ score: Number(page.finalScore.toFixed(6)),
21380
+ routeFile: page.routeFile,
21381
+ chunks: chunks.length > 0 ? chunks.slice(0, maxSubResults).map((c) => ({
21382
+ sectionTitle: c.metadata.sectionTitle || void 0,
21383
+ snippet: query ? queryAwareExcerpt(c.metadata.chunkText, query) : toSnippet(c.metadata.chunkText),
21384
+ chunkText: c.metadata.chunkText || void 0,
21385
+ headingPath: c.metadata.headingPath,
21386
+ score: Number(c.score.toFixed(6))
21387
+ })) : void 0
21388
+ };
21389
+ if (debug && page.breakdown) {
21390
+ result.breakdown = {
21391
+ baseScore: page.breakdown.baseScore,
21392
+ incomingLinkBoost: page.breakdown.incomingLinkBoost,
21393
+ depthBoost: page.breakdown.depthBoost,
21394
+ titleMatchBoost: page.breakdown.titleMatchBoost,
21395
+ freshnessBoost: page.breakdown.freshnessBoost,
21396
+ anchorTextMatchBoost: 0
21397
+ };
20093
21398
  }
20094
- };
21399
+ return result;
21400
+ });
20095
21401
  }
20096
- ensureSnippet(hit) {
21402
+ ensureSnippet(hit, query) {
21403
+ const chunkText = hit.hit.metadata.chunkText;
21404
+ if (query && chunkText) return queryAwareExcerpt(chunkText, query);
20097
21405
  const snippet = hit.hit.metadata.snippet;
20098
21406
  if (snippet && snippet.length >= 30) return snippet;
20099
- const chunkText = hit.hit.metadata.chunkText;
20100
21407
  if (chunkText) return toSnippet(chunkText);
20101
21408
  return snippet || "";
20102
21409
  }
20103
- buildResults(ordered, topK, groupByPage, _query) {
21410
+ buildResults(ordered, topK, groupByPage, maxSubResults, query, debug, config) {
21411
+ const cfg = config ?? this.config;
20104
21412
  if (groupByPage) {
20105
- let pages = aggregateByPage(ordered, this.config);
20106
- pages = trimByScoreGap(pages, this.config);
20107
- const minRatio = this.config.ranking.minChunkScoreRatio;
21413
+ let pages = aggregateByPage(ordered, cfg);
21414
+ pages = trimByScoreGap(pages, cfg);
21415
+ const minRatio = cfg.ranking.minChunkScoreRatio;
20108
21416
  return pages.slice(0, topK).map((page) => {
20109
21417
  const bestScore = page.bestChunk.finalScore;
20110
21418
  const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
20111
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
20112
- return {
21419
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, maxSubResults);
21420
+ const result = {
20113
21421
  url: page.url,
20114
21422
  title: page.title,
20115
21423
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
20116
- snippet: this.ensureSnippet(page.bestChunk),
21424
+ snippet: this.ensureSnippet(page.bestChunk, query),
21425
+ chunkText: page.bestChunk.hit.metadata.chunkText || void 0,
20117
21426
  score: Number(page.pageScore.toFixed(6)),
20118
21427
  routeFile: page.routeFile,
20119
- chunks: meaningful.length > 1 ? meaningful.map((c) => ({
21428
+ chunks: meaningful.length >= 1 ? meaningful.map((c) => ({
20120
21429
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
20121
- snippet: this.ensureSnippet(c),
21430
+ snippet: this.ensureSnippet(c, query),
21431
+ chunkText: c.hit.metadata.chunkText || void 0,
20122
21432
  headingPath: c.hit.metadata.headingPath,
20123
21433
  score: Number(c.finalScore.toFixed(6))
20124
21434
  })) : void 0
20125
21435
  };
21436
+ if (debug && page.bestChunk.breakdown) {
21437
+ result.breakdown = page.bestChunk.breakdown;
21438
+ }
21439
+ return result;
20126
21440
  });
20127
21441
  } else {
20128
21442
  let filtered = ordered;
20129
- const minScore = this.config.ranking.minScore;
20130
- if (minScore > 0) {
20131
- filtered = ordered.filter((entry) => entry.finalScore >= minScore);
20132
- }
20133
- return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
20134
- url: hit.metadata.url,
20135
- title: hit.metadata.title,
20136
- sectionTitle: hit.metadata.sectionTitle || void 0,
20137
- snippet: this.ensureSnippet({ hit, finalScore }),
20138
- score: Number(finalScore.toFixed(6)),
20139
- routeFile: hit.metadata.routeFile
20140
- }));
21443
+ const minScoreRatio = cfg.ranking.minScoreRatio;
21444
+ if (minScoreRatio > 0 && ordered.length > 0) {
21445
+ const topScore = ordered[0].finalScore;
21446
+ if (Number.isFinite(topScore) && topScore > 0) {
21447
+ const threshold = topScore * minScoreRatio;
21448
+ filtered = ordered.filter((entry) => entry.finalScore >= threshold);
21449
+ }
21450
+ }
21451
+ return filtered.slice(0, topK).map(({ hit, finalScore, breakdown }) => {
21452
+ const result = {
21453
+ url: hit.metadata.url,
21454
+ title: hit.metadata.title,
21455
+ sectionTitle: hit.metadata.sectionTitle || void 0,
21456
+ snippet: this.ensureSnippet({ hit, finalScore }, query),
21457
+ chunkText: hit.metadata.chunkText || void 0,
21458
+ score: Number(finalScore.toFixed(6)),
21459
+ routeFile: hit.metadata.routeFile
21460
+ };
21461
+ if (debug && breakdown) {
21462
+ result.breakdown = breakdown;
21463
+ }
21464
+ return result;
21465
+ });
20141
21466
  }
20142
21467
  }
20143
21468
  async getPage(pathOrUrl, scope) {
@@ -20163,6 +21488,116 @@ var SearchEngine = class _SearchEngine {
20163
21488
  markdown: page.markdown
20164
21489
  };
20165
21490
  }
21491
+ async listPages(opts) {
21492
+ const resolvedScope = resolveScope(this.config, opts?.scope);
21493
+ const pathPrefix = opts?.pathPrefix ? opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}` : void 0;
21494
+ return this.store.listPages(resolvedScope, {
21495
+ cursor: opts?.cursor,
21496
+ limit: opts?.limit,
21497
+ pathPrefix
21498
+ });
21499
+ }
21500
+ async getSiteStructure(opts) {
21501
+ const maxPages = Math.min(opts?.maxPages ?? MAX_SITE_STRUCTURE_PAGES, MAX_SITE_STRUCTURE_PAGES);
21502
+ const allPages = [];
21503
+ let cursor;
21504
+ let truncated = false;
21505
+ do {
21506
+ const result = await this.listPages({
21507
+ pathPrefix: opts?.pathPrefix,
21508
+ scope: opts?.scope,
21509
+ cursor,
21510
+ limit: 200
21511
+ });
21512
+ allPages.push(...result.pages);
21513
+ cursor = result.nextCursor;
21514
+ if (allPages.length >= maxPages) {
21515
+ truncated = allPages.length > maxPages || !!cursor;
21516
+ allPages.length = maxPages;
21517
+ break;
21518
+ }
21519
+ } while (cursor);
21520
+ const root2 = buildTree(allPages, opts?.pathPrefix);
21521
+ return {
21522
+ root: root2,
21523
+ totalPages: allPages.length,
21524
+ truncated
21525
+ };
21526
+ }
21527
+ async getRelatedPages(pathOrUrl, opts) {
21528
+ const resolvedScope = resolveScope(this.config, opts?.scope);
21529
+ const urlPath = this.resolveInputPath(pathOrUrl);
21530
+ const topK = Math.min(opts?.topK ?? 10, 25);
21531
+ const source = await this.store.fetchPageWithVector(urlPath, resolvedScope);
21532
+ if (!source) {
21533
+ throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
21534
+ }
21535
+ const sourceOutgoing = new Set(source.metadata.outgoingLinkUrls ?? []);
21536
+ const semanticHits = await this.store.searchPagesByVector(
21537
+ source.vector,
21538
+ { limit: 50 },
21539
+ resolvedScope
21540
+ );
21541
+ const filteredHits = semanticHits.filter((h) => h.url !== urlPath);
21542
+ const semanticScoreMap = /* @__PURE__ */ new Map();
21543
+ for (const hit of filteredHits) {
21544
+ semanticScoreMap.set(hit.url, hit.score);
21545
+ }
21546
+ const candidateUrls = /* @__PURE__ */ new Set();
21547
+ for (const hit of filteredHits) {
21548
+ candidateUrls.add(hit.url);
21549
+ }
21550
+ for (const url of sourceOutgoing) {
21551
+ if (url !== urlPath) candidateUrls.add(url);
21552
+ }
21553
+ const missingUrls = [...sourceOutgoing].filter(
21554
+ (u) => u !== urlPath && !semanticScoreMap.has(u)
21555
+ );
21556
+ const fetchedPages = missingUrls.length > 0 ? await this.store.fetchPagesBatch(missingUrls, resolvedScope) : [];
21557
+ const metaMap = /* @__PURE__ */ new Map();
21558
+ for (const hit of filteredHits) {
21559
+ metaMap.set(hit.url, { title: hit.title, routeFile: hit.routeFile, outgoingLinkUrls: [] });
21560
+ }
21561
+ for (const p of fetchedPages) {
21562
+ metaMap.set(p.url, { title: p.title, routeFile: p.routeFile, outgoingLinkUrls: p.outgoingLinkUrls });
21563
+ }
21564
+ const semanticUrls = filteredHits.map((h) => h.url);
21565
+ if (semanticUrls.length > 0) {
21566
+ const semanticPageData = await this.store.fetchPagesBatch(semanticUrls, resolvedScope);
21567
+ for (const p of semanticPageData) {
21568
+ const existing = metaMap.get(p.url);
21569
+ if (existing) {
21570
+ existing.outgoingLinkUrls = p.outgoingLinkUrls;
21571
+ }
21572
+ }
21573
+ }
21574
+ const candidates = [];
21575
+ for (const url of candidateUrls) {
21576
+ const meta = metaMap.get(url);
21577
+ if (!meta) continue;
21578
+ const isOutgoing = sourceOutgoing.has(url);
21579
+ const isIncoming = meta.outgoingLinkUrls.includes(urlPath);
21580
+ const isLinked = isOutgoing || isIncoming;
21581
+ const dice = diceScore(urlPath, url);
21582
+ const semantic = semanticScoreMap.get(url) ?? 0;
21583
+ const score = compositeScore(isLinked, dice, semantic);
21584
+ const relationshipType = dominantRelationshipType(isOutgoing, isIncoming, dice);
21585
+ candidates.push({
21586
+ url,
21587
+ title: meta.title,
21588
+ score: Number(score.toFixed(6)),
21589
+ relationshipType,
21590
+ routeFile: meta.routeFile
21591
+ });
21592
+ }
21593
+ candidates.sort((a, b) => b.score - a.score);
21594
+ const results = candidates.slice(0, topK);
21595
+ return {
21596
+ sourceUrl: urlPath,
21597
+ scope: resolvedScope.scopeName,
21598
+ relatedPages: results
21599
+ };
21600
+ }
20166
21601
  async health() {
20167
21602
  return this.store.health();
20168
21603
  }
@@ -20185,14 +21620,40 @@ function createServer(engine) {
20185
21620
  server.registerTool(
20186
21621
  "search",
20187
21622
  {
20188
- description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
21623
+ description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
20189
21624
  inputSchema: {
20190
21625
  query: z.string().min(1),
20191
21626
  scope: z.string().optional(),
20192
21627
  topK: z.number().int().positive().max(100).optional(),
20193
21628
  pathPrefix: z.string().optional(),
20194
21629
  tags: z.array(z.string()).optional(),
20195
- groupBy: z.enum(["page", "chunk"]).optional()
21630
+ filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
21631
+ groupBy: z.enum(["page", "chunk"]).optional(),
21632
+ maxSubResults: z.number().int().positive().max(20).optional()
21633
+ },
21634
+ outputSchema: {
21635
+ q: z.string(),
21636
+ scope: z.string(),
21637
+ results: z.array(z.object({
21638
+ url: z.string(),
21639
+ title: z.string(),
21640
+ sectionTitle: z.string().optional(),
21641
+ snippet: z.string(),
21642
+ score: z.number(),
21643
+ routeFile: z.string(),
21644
+ chunks: z.array(z.object({
21645
+ sectionTitle: z.string().optional(),
21646
+ snippet: z.string(),
21647
+ headingPath: z.array(z.string()),
21648
+ score: z.number()
21649
+ })).optional()
21650
+ })),
21651
+ meta: z.object({
21652
+ timingsMs: z.object({
21653
+ search: z.number(),
21654
+ total: z.number()
21655
+ })
21656
+ })
20196
21657
  }
20197
21658
  },
20198
21659
  async (input) => {
@@ -20202,7 +21663,9 @@ function createServer(engine) {
20202
21663
  scope: input.scope,
20203
21664
  pathPrefix: input.pathPrefix,
20204
21665
  tags: input.tags,
20205
- groupBy: input.groupBy
21666
+ filters: input.filters,
21667
+ groupBy: input.groupBy,
21668
+ maxSubResults: input.maxSubResults
20206
21669
  });
20207
21670
  return {
20208
21671
  content: [
@@ -20210,7 +21673,8 @@ function createServer(engine) {
20210
21673
  type: "text",
20211
21674
  text: JSON.stringify(result, null, 2)
20212
21675
  }
20213
- ]
21676
+ ],
21677
+ structuredContent: result
20214
21678
  };
20215
21679
  }
20216
21680
  );
@@ -20235,8 +21699,134 @@ function createServer(engine) {
20235
21699
  };
20236
21700
  }
20237
21701
  );
21702
+ server.registerTool(
21703
+ "list_pages",
21704
+ {
21705
+ description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
21706
+ inputSchema: {
21707
+ pathPrefix: z.string().optional(),
21708
+ cursor: z.string().optional(),
21709
+ limit: z.number().int().positive().max(200).optional(),
21710
+ scope: z.string().optional()
21711
+ }
21712
+ },
21713
+ async (input) => {
21714
+ const result = await engine.listPages({
21715
+ pathPrefix: input.pathPrefix,
21716
+ cursor: input.cursor,
21717
+ limit: input.limit,
21718
+ scope: input.scope
21719
+ });
21720
+ return {
21721
+ content: [
21722
+ {
21723
+ type: "text",
21724
+ text: JSON.stringify(result, null, 2)
21725
+ }
21726
+ ]
21727
+ };
21728
+ }
21729
+ );
21730
+ server.registerTool(
21731
+ "get_site_structure",
21732
+ {
21733
+ description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
21734
+ inputSchema: {
21735
+ pathPrefix: z.string().optional(),
21736
+ scope: z.string().optional(),
21737
+ maxPages: z.number().int().positive().max(2e3).optional()
21738
+ }
21739
+ },
21740
+ async (input) => {
21741
+ const result = await engine.getSiteStructure({
21742
+ pathPrefix: input.pathPrefix,
21743
+ scope: input.scope,
21744
+ maxPages: input.maxPages
21745
+ });
21746
+ return {
21747
+ content: [
21748
+ {
21749
+ type: "text",
21750
+ text: JSON.stringify(result, null, 2)
21751
+ }
21752
+ ]
21753
+ };
21754
+ }
21755
+ );
21756
+ server.registerTool(
21757
+ "find_source_file",
21758
+ {
21759
+ description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
21760
+ inputSchema: {
21761
+ query: z.string().min(1),
21762
+ scope: z.string().optional()
21763
+ }
21764
+ },
21765
+ async (input) => {
21766
+ const result = await engine.search({
21767
+ q: input.query,
21768
+ topK: 1,
21769
+ scope: input.scope
21770
+ });
21771
+ if (result.results.length === 0) {
21772
+ return {
21773
+ content: [
21774
+ {
21775
+ type: "text",
21776
+ text: JSON.stringify({
21777
+ error: "No matching content found for the given query."
21778
+ })
21779
+ }
21780
+ ]
21781
+ };
21782
+ }
21783
+ const match = result.results[0];
21784
+ const { url, routeFile, sectionTitle, snippet } = match;
21785
+ return {
21786
+ content: [
21787
+ {
21788
+ type: "text",
21789
+ text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
21790
+ }
21791
+ ]
21792
+ };
21793
+ }
21794
+ );
21795
+ server.registerTool(
21796
+ "get_related_pages",
21797
+ {
21798
+ description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
21799
+ inputSchema: {
21800
+ pathOrUrl: z.string().min(1),
21801
+ scope: z.string().optional(),
21802
+ topK: z.number().int().positive().max(25).optional()
21803
+ }
21804
+ },
21805
+ async (input) => {
21806
+ const result = await engine.getRelatedPages(input.pathOrUrl, {
21807
+ topK: input.topK,
21808
+ scope: input.scope
21809
+ });
21810
+ return {
21811
+ content: [
21812
+ {
21813
+ type: "text",
21814
+ text: JSON.stringify(result, null, 2)
21815
+ }
21816
+ ]
21817
+ };
21818
+ }
21819
+ );
20238
21820
  return server;
20239
21821
  }
21822
+ function resolveApiKey(config) {
21823
+ return config.mcp.http.apiKey ?? (config.mcp.http.apiKeyEnv ? process.env[config.mcp.http.apiKeyEnv] : void 0);
21824
+ }
21825
+ function verifyApiKey(provided, expected) {
21826
+ const a = createHash("sha256").update(provided).digest();
21827
+ const b = createHash("sha256").update(expected).digest();
21828
+ return timingSafeEqual(a, b);
21829
+ }
20240
21830
  function redirectConsoleToStderr() {
20241
21831
  console.log = (...args) => {
20242
21832
  process.stderr.write(`[LOG] ${args.map(String).join(" ")}
@@ -20251,7 +21841,22 @@ async function startHttpServer(serverFactory, config, opts) {
20251
21841
  const app = createMcpExpressApp();
20252
21842
  const port = opts.httpPort ?? config.mcp.http.port;
20253
21843
  const endpointPath = opts.httpPath ?? config.mcp.http.path;
21844
+ const isPublic = config.mcp.access === "public";
21845
+ const host = isPublic ? "0.0.0.0" : "127.0.0.1";
21846
+ const apiKey = isPublic ? resolveApiKey(config) : void 0;
20254
21847
  app.post(endpointPath, async (req, res) => {
21848
+ if (isPublic && apiKey) {
21849
+ const authHeader = req.headers["authorization"];
21850
+ const provided = (authHeader?.startsWith("Bearer ") ? authHeader.slice(7) : void 0) ?? req.headers["x-api-key"] ?? "";
21851
+ if (!provided || !verifyApiKey(provided, apiKey)) {
21852
+ res.status(401).json({
21853
+ jsonrpc: "2.0",
21854
+ error: { code: -32001, message: "Unauthorized" },
21855
+ id: null
21856
+ });
21857
+ return;
21858
+ }
21859
+ }
20255
21860
  const server = serverFactory();
20256
21861
  const transport = new StreamableHTTPServerTransport({
20257
21862
  sessionIdGenerator: void 0
@@ -20301,9 +21906,12 @@ async function startHttpServer(serverFactory, config, opts) {
20301
21906
  );
20302
21907
  });
20303
21908
  await new Promise((resolve, reject) => {
20304
- const instance = app.listen(port, "127.0.0.1", () => {
20305
- process.stderr.write(`SearchSocket MCP HTTP server listening on http://127.0.0.1:${port}${endpointPath}
21909
+ const instance = app.listen(port, host, () => {
21910
+ process.stderr.write(`SearchSocket MCP HTTP server listening on http://${host}:${port}${endpointPath}
20306
21911
  `);
21912
+ if (isPublic) {
21913
+ process.stderr.write("WARNING: Server is in public mode. Ensure HTTPS is configured via a reverse proxy for production use.\n");
21914
+ }
20307
21915
  resolve();
20308
21916
  });
20309
21917
  instance.once("error", reject);
@@ -20318,6 +21926,13 @@ async function runMcpServer(options = {}) {
20318
21926
  cwd: options.cwd,
20319
21927
  configPath: options.configPath
20320
21928
  });
21929
+ if (options.access) config.mcp.access = options.access;
21930
+ if (options.apiKey) config.mcp.http.apiKey = options.apiKey;
21931
+ if (config.mcp.access === "public" && !resolveApiKey(config)) {
21932
+ throw new Error(
21933
+ 'MCP access is "public" but no API key is configured. Pass --api-key or set mcp.http.apiKey / mcp.http.apiKeyEnv in config.'
21934
+ );
21935
+ }
20321
21936
  const resolvedTransport = options.transport ?? config.mcp.transport;
20322
21937
  if (resolvedTransport === "stdio") {
20323
21938
  redirectConsoleToStderr();
@@ -20335,8 +21950,6 @@ async function runMcpServer(options = {}) {
20335
21950
  const stdioTransport = new StdioServerTransport();
20336
21951
  await server.connect(stdioTransport);
20337
21952
  }
20338
-
20339
- // src/sveltekit/handle.ts
20340
21953
  var InMemoryRateLimiter = class {
20341
21954
  constructor(windowMs, max) {
20342
21955
  this.windowMs = windowMs;
@@ -20364,7 +21977,13 @@ function searchsocketHandle(options = {}) {
20364
21977
  let enginePromise = null;
20365
21978
  let configPromise = null;
20366
21979
  let apiPath = options.path;
21980
+ let llmsServePath = null;
21981
+ let serveMarkdownVariants = false;
21982
+ let mcpPath;
21983
+ let mcpApiKey;
21984
+ let mcpEnableJsonResponse = true;
20367
21985
  let rateLimiter = null;
21986
+ let notConfigured = false;
20368
21987
  const getConfig = async () => {
20369
21988
  if (!configPromise) {
20370
21989
  let configP;
@@ -20381,6 +22000,13 @@ function searchsocketHandle(options = {}) {
20381
22000
  }
20382
22001
  configPromise = configP.then((config) => {
20383
22002
  apiPath = apiPath ?? config.api.path;
22003
+ mcpPath = config.mcp.handle.path;
22004
+ mcpApiKey = config.mcp.handle.apiKey;
22005
+ mcpEnableJsonResponse = config.mcp.handle.enableJsonResponse;
22006
+ if (config.llmsTxt.enable) {
22007
+ llmsServePath = "/" + config.llmsTxt.outputPath.replace(/^static\//, "");
22008
+ serveMarkdownVariants = config.llmsTxt.serveMarkdownVariants;
22009
+ }
20384
22010
  if (config.api.rateLimit && !isServerless()) {
20385
22011
  rateLimiter = new InMemoryRateLimiter(config.api.rateLimit.windowMs, config.api.rateLimit.max);
20386
22012
  }
@@ -20390,59 +22016,109 @@ function searchsocketHandle(options = {}) {
20390
22016
  return configPromise;
20391
22017
  };
20392
22018
  const getEngine = async () => {
22019
+ if (notConfigured) {
22020
+ throw new SearchSocketError(
22021
+ "SEARCH_NOT_CONFIGURED",
22022
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
22023
+ 503
22024
+ );
22025
+ }
20393
22026
  if (!enginePromise) {
20394
22027
  const config = await getConfig();
20395
22028
  enginePromise = SearchEngine.create({
20396
22029
  cwd: options.cwd,
20397
22030
  config
22031
+ }).catch((error) => {
22032
+ enginePromise = null;
22033
+ if (error instanceof SearchSocketError && error.code === "VECTOR_BACKEND_UNAVAILABLE") {
22034
+ notConfigured = true;
22035
+ throw new SearchSocketError(
22036
+ "SEARCH_NOT_CONFIGURED",
22037
+ "Search is not configured. Set the required Upstash environment variables to enable search.",
22038
+ 503
22039
+ );
22040
+ }
22041
+ throw error;
20398
22042
  });
20399
22043
  }
20400
22044
  return enginePromise;
20401
22045
  };
20402
22046
  const bodyLimit = options.maxBodyBytes ?? 64 * 1024;
20403
22047
  return async ({ event, resolve }) => {
20404
- if (apiPath && event.url.pathname !== apiPath) {
20405
- return resolve(event);
22048
+ if (apiPath && !isApiPath(event.url.pathname, apiPath) && event.url.pathname !== llmsServePath) {
22049
+ const isMarkdownVariant = event.request.method === "GET" && event.url.pathname.endsWith(".md");
22050
+ if (mcpPath && event.url.pathname === mcpPath) {
22051
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22052
+ }
22053
+ if (mcpPath) {
22054
+ if (serveMarkdownVariants && isMarkdownVariant) ; else {
22055
+ return resolve(event);
22056
+ }
22057
+ } else {
22058
+ if (configPromise || options.config || options.rawConfig) {
22059
+ await getConfig();
22060
+ if (mcpPath && event.url.pathname === mcpPath) {
22061
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22062
+ }
22063
+ if (!(serveMarkdownVariants && isMarkdownVariant)) {
22064
+ return resolve(event);
22065
+ }
22066
+ } else {
22067
+ return resolve(event);
22068
+ }
22069
+ }
20406
22070
  }
20407
22071
  const config = await getConfig();
22072
+ if (llmsServePath && event.request.method === "GET" && event.url.pathname === llmsServePath) {
22073
+ const cwd = options.cwd ?? process.cwd();
22074
+ const filePath = path.resolve(cwd, config.llmsTxt.outputPath);
22075
+ try {
22076
+ const content = await fs8.readFile(filePath, "utf8");
22077
+ return new Response(content, {
22078
+ status: 200,
22079
+ headers: { "content-type": "text/plain; charset=utf-8" }
22080
+ });
22081
+ } catch {
22082
+ return resolve(event);
22083
+ }
22084
+ }
22085
+ if (serveMarkdownVariants && event.request.method === "GET" && event.url.pathname.endsWith(".md")) {
22086
+ let rawPath;
22087
+ try {
22088
+ rawPath = decodeURIComponent(event.url.pathname.slice(0, -3));
22089
+ } catch {
22090
+ return resolve(event);
22091
+ }
22092
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
22093
+ try {
22094
+ const engine = await getEngine();
22095
+ const page = await engine.getPage(rawPath, scope);
22096
+ return new Response(page.markdown, {
22097
+ status: 200,
22098
+ headers: { "content-type": "text/markdown; charset=utf-8" }
22099
+ });
22100
+ } catch (error) {
22101
+ if (error instanceof SearchSocketError && error.status === 404) {
22102
+ return resolve(event);
22103
+ }
22104
+ throw error;
22105
+ }
22106
+ }
22107
+ if (mcpPath && event.url.pathname === mcpPath) {
22108
+ return handleMcpRequest(event, mcpApiKey, mcpEnableJsonResponse, getEngine);
22109
+ }
20408
22110
  const targetPath = apiPath ?? config.api.path;
20409
- if (event.url.pathname !== targetPath) {
22111
+ if (!isApiPath(event.url.pathname, targetPath)) {
20410
22112
  return resolve(event);
20411
22113
  }
20412
- if (event.request.method === "OPTIONS") {
22114
+ const subPath = event.url.pathname.slice(targetPath.length);
22115
+ const method = event.request.method;
22116
+ if (method === "OPTIONS") {
20413
22117
  return new Response(null, {
20414
22118
  status: 204,
20415
22119
  headers: buildCorsHeaders(event.request, config)
20416
22120
  });
20417
22121
  }
20418
- if (event.request.method !== "POST") {
20419
- return withCors(
20420
- new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
20421
- status: 405,
20422
- headers: {
20423
- "content-type": "application/json"
20424
- }
20425
- }),
20426
- event.request,
20427
- config
20428
- );
20429
- }
20430
- const contentLength = Number(event.request.headers.get("content-length") ?? 0);
20431
- if (contentLength > bodyLimit) {
20432
- return withCors(
20433
- new Response(
20434
- JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Request body too large", 413))),
20435
- {
20436
- status: 413,
20437
- headers: {
20438
- "content-type": "application/json"
20439
- }
20440
- }
20441
- ),
20442
- event.request,
20443
- config
20444
- );
20445
- }
20446
22122
  if (rateLimiter) {
20447
22123
  const ip = event.getClientAddress?.() ?? event.request.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? "unknown";
20448
22124
  if (!rateLimiter.check(ip)) {
@@ -20462,39 +22138,32 @@ function searchsocketHandle(options = {}) {
20462
22138
  }
20463
22139
  }
20464
22140
  try {
20465
- let rawBody;
20466
- if (typeof event.request.text === "function") {
20467
- rawBody = await event.request.text();
20468
- } else {
20469
- let parsedFallback;
20470
- try {
20471
- parsedFallback = await event.request.json();
20472
- } catch (error) {
20473
- if (error instanceof SyntaxError) {
20474
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
20475
- }
20476
- throw error;
22141
+ if (method === "GET") {
22142
+ if (subPath === "" || subPath === "/") {
22143
+ return await handleGetSearch(event, config, getEngine);
20477
22144
  }
20478
- rawBody = JSON.stringify(parsedFallback);
20479
- }
20480
- if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
20481
- throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22145
+ if (subPath === "/health") {
22146
+ return await handleGetHealth(event, config, getEngine);
22147
+ }
22148
+ if (subPath.startsWith("/pages/")) {
22149
+ return await handleGetPage(event, config, getEngine, subPath);
22150
+ }
22151
+ return withCors(
22152
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Not found", 404))), {
22153
+ status: 404,
22154
+ headers: { "content-type": "application/json" }
22155
+ }),
22156
+ event.request,
22157
+ config
22158
+ );
20482
22159
  }
20483
- let body;
20484
- try {
20485
- body = JSON.parse(rawBody);
20486
- } catch {
20487
- throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22160
+ if (method === "POST" && (subPath === "" || subPath === "/")) {
22161
+ return await handlePostSearch(event, config, getEngine, bodyLimit);
20488
22162
  }
20489
- const engine = await getEngine();
20490
- const searchRequest = body;
20491
- const result = await engine.search(searchRequest);
20492
22163
  return withCors(
20493
- new Response(JSON.stringify(result), {
20494
- status: 200,
20495
- headers: {
20496
- "content-type": "application/json"
20497
- }
22164
+ new Response(JSON.stringify(toErrorPayload(new SearchSocketError("INVALID_REQUEST", "Method not allowed", 405))), {
22165
+ status: 405,
22166
+ headers: { "content-type": "application/json" }
20498
22167
  }),
20499
22168
  event.request,
20500
22169
  config
@@ -20515,6 +22184,183 @@ function searchsocketHandle(options = {}) {
20515
22184
  }
20516
22185
  };
20517
22186
  }
22187
+ function isApiPath(pathname, apiPath) {
22188
+ return pathname === apiPath || pathname.startsWith(apiPath + "/");
22189
+ }
22190
+ async function handleGetSearch(event, config, getEngine) {
22191
+ const params = event.url.searchParams;
22192
+ const q = params.get("q");
22193
+ if (!q || q.trim() === "") {
22194
+ throw new SearchSocketError("INVALID_REQUEST", "Missing required query parameter: q", 400);
22195
+ }
22196
+ const searchRequest = { q };
22197
+ const topK = params.get("topK");
22198
+ if (topK !== null) {
22199
+ const parsed = Number.parseInt(topK, 10);
22200
+ if (Number.isNaN(parsed) || parsed < 1) {
22201
+ throw new SearchSocketError("INVALID_REQUEST", "topK must be a positive integer", 400);
22202
+ }
22203
+ searchRequest.topK = parsed;
22204
+ }
22205
+ const scope = params.get("scope");
22206
+ if (scope !== null) searchRequest.scope = scope;
22207
+ const pathPrefix = params.get("pathPrefix");
22208
+ if (pathPrefix !== null) searchRequest.pathPrefix = pathPrefix;
22209
+ const groupBy = params.get("groupBy");
22210
+ if (groupBy) {
22211
+ if (groupBy !== "page" && groupBy !== "chunk") {
22212
+ throw new SearchSocketError("INVALID_REQUEST", 'groupBy must be "page" or "chunk"', 400);
22213
+ }
22214
+ searchRequest.groupBy = groupBy;
22215
+ }
22216
+ const maxSubResults = params.get("maxSubResults");
22217
+ if (maxSubResults !== null) {
22218
+ const parsed = Number.parseInt(maxSubResults, 10);
22219
+ if (Number.isNaN(parsed) || parsed < 1 || parsed > 20) {
22220
+ throw new SearchSocketError("INVALID_REQUEST", "maxSubResults must be a positive integer between 1 and 20", 400);
22221
+ }
22222
+ searchRequest.maxSubResults = parsed;
22223
+ }
22224
+ const tags = params.getAll("tags");
22225
+ if (tags.length > 0) searchRequest.tags = tags;
22226
+ const engine = await getEngine();
22227
+ const result = await engine.search(searchRequest);
22228
+ return withCors(
22229
+ new Response(JSON.stringify(result), {
22230
+ status: 200,
22231
+ headers: { "content-type": "application/json" }
22232
+ }),
22233
+ event.request,
22234
+ config
22235
+ );
22236
+ }
22237
+ async function handleGetHealth(event, config, getEngine) {
22238
+ const engine = await getEngine();
22239
+ const result = await engine.health();
22240
+ return withCors(
22241
+ new Response(JSON.stringify(result), {
22242
+ status: 200,
22243
+ headers: { "content-type": "application/json" }
22244
+ }),
22245
+ event.request,
22246
+ config
22247
+ );
22248
+ }
22249
+ async function handleGetPage(event, config, getEngine, subPath) {
22250
+ const rawPath = subPath.slice("/pages".length);
22251
+ let pagePath;
22252
+ try {
22253
+ pagePath = decodeURIComponent(rawPath);
22254
+ } catch {
22255
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed page path", 400);
22256
+ }
22257
+ const scope = event.url.searchParams?.get("scope") ?? void 0;
22258
+ const engine = await getEngine();
22259
+ const result = await engine.getPage(pagePath, scope);
22260
+ return withCors(
22261
+ new Response(JSON.stringify(result), {
22262
+ status: 200,
22263
+ headers: { "content-type": "application/json" }
22264
+ }),
22265
+ event.request,
22266
+ config
22267
+ );
22268
+ }
22269
+ async function handlePostSearch(event, config, getEngine, bodyLimit) {
22270
+ const contentLength = Number(event.request.headers.get("content-length") ?? 0);
22271
+ if (contentLength > bodyLimit) {
22272
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22273
+ }
22274
+ let rawBody;
22275
+ if (typeof event.request.text === "function") {
22276
+ rawBody = await event.request.text();
22277
+ } else {
22278
+ let parsedFallback;
22279
+ try {
22280
+ parsedFallback = await event.request.json();
22281
+ } catch (error) {
22282
+ if (error instanceof SyntaxError) {
22283
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22284
+ }
22285
+ throw error;
22286
+ }
22287
+ rawBody = JSON.stringify(parsedFallback);
22288
+ }
22289
+ if (Buffer.byteLength(rawBody, "utf8") > bodyLimit) {
22290
+ throw new SearchSocketError("INVALID_REQUEST", "Request body too large", 413);
22291
+ }
22292
+ let body;
22293
+ try {
22294
+ body = JSON.parse(rawBody);
22295
+ } catch {
22296
+ throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
22297
+ }
22298
+ const engine = await getEngine();
22299
+ const searchRequest = body;
22300
+ const result = await engine.search(searchRequest);
22301
+ return withCors(
22302
+ new Response(JSON.stringify(result), {
22303
+ status: 200,
22304
+ headers: { "content-type": "application/json" }
22305
+ }),
22306
+ event.request,
22307
+ config
22308
+ );
22309
+ }
22310
+ async function handleMcpRequest(event, apiKey, enableJsonResponse, getEngine) {
22311
+ if (apiKey) {
22312
+ const authHeader = event.request.headers.get("authorization") ?? "";
22313
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
22314
+ const tokenBuf = Buffer.from(token);
22315
+ const keyBuf = Buffer.from(apiKey);
22316
+ if (tokenBuf.length !== keyBuf.length || !timingSafeEqual(tokenBuf, keyBuf)) {
22317
+ return new Response(
22318
+ JSON.stringify({
22319
+ jsonrpc: "2.0",
22320
+ error: { code: -32001, message: "Unauthorized" },
22321
+ id: null
22322
+ }),
22323
+ { status: 401, headers: { "content-type": "application/json" } }
22324
+ );
22325
+ }
22326
+ }
22327
+ const transport = new WebStandardStreamableHTTPServerTransport({
22328
+ sessionIdGenerator: void 0,
22329
+ enableJsonResponse
22330
+ });
22331
+ let server;
22332
+ try {
22333
+ const engine = await getEngine();
22334
+ server = createServer(engine);
22335
+ await server.connect(transport);
22336
+ const response = await transport.handleRequest(event.request);
22337
+ if (enableJsonResponse) {
22338
+ await transport.close();
22339
+ await server.close();
22340
+ }
22341
+ return response;
22342
+ } catch (error) {
22343
+ try {
22344
+ await transport.close();
22345
+ } catch {
22346
+ }
22347
+ try {
22348
+ await server?.close();
22349
+ } catch {
22350
+ }
22351
+ return new Response(
22352
+ JSON.stringify({
22353
+ jsonrpc: "2.0",
22354
+ error: {
22355
+ code: -32603,
22356
+ message: error instanceof Error ? error.message : "Internal server error"
22357
+ },
22358
+ id: null
22359
+ }),
22360
+ { status: 500, headers: { "content-type": "application/json" } }
22361
+ );
22362
+ }
22363
+ }
20518
22364
  function buildCorsHeaders(request, config) {
20519
22365
  const allowOrigins = config.api.cors.allowOrigins;
20520
22366
  if (!allowOrigins || allowOrigins.length === 0) {
@@ -20527,7 +22373,7 @@ function buildCorsHeaders(request, config) {
20527
22373
  }
20528
22374
  return {
20529
22375
  "access-control-allow-origin": allowOrigins.includes("*") ? "*" : origin,
20530
- "access-control-allow-methods": "POST, OPTIONS",
22376
+ "access-control-allow-methods": "GET, POST, OPTIONS",
20531
22377
  "access-control-allow-headers": "content-type"
20532
22378
  };
20533
22379
  }
@@ -20563,9 +22409,6 @@ function shouldRunAutoIndex(options) {
20563
22409
  if (explicit && /^(1|true|yes)$/i.test(explicit)) {
20564
22410
  return true;
20565
22411
  }
20566
- if (process.env.CI && /^(1|true)$/i.test(process.env.CI)) {
20567
- return true;
20568
- }
20569
22412
  return false;
20570
22413
  }
20571
22414
  function searchsocketVitePlugin(options = {}) {
@@ -20590,7 +22433,8 @@ function searchsocketVitePlugin(options = {}) {
20590
22433
  const pipeline = await IndexPipeline.create({
20591
22434
  cwd,
20592
22435
  configPath: options.configPath,
20593
- logger: logger3
22436
+ logger: logger3,
22437
+ hooks: options.hooks
20594
22438
  });
20595
22439
  const stats = await pipeline.run({
20596
22440
  changedOnly: options.changedOnly ?? true,