searchsocket 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17305,6 +17305,26 @@ function joinUrl(baseUrl, route) {
17305
17305
  const routePart = ensureLeadingSlash(route);
17306
17306
  return `${base}${routePart}`;
17307
17307
  }
17308
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
17309
+ if (chunks.length === 0) return "";
17310
+ const parts = [];
17311
+ for (const chunk of chunks) {
17312
+ let text = chunk.chunkText;
17313
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
17314
+
17315
+ `;
17316
+ const prefixWithoutSection = `${pageTitle}
17317
+
17318
+ `;
17319
+ if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
17320
+ text = text.slice(prefixWithSection.length);
17321
+ } else if (text.startsWith(prefixWithoutSection)) {
17322
+ text = text.slice(prefixWithoutSection.length);
17323
+ }
17324
+ parts.push(text.trim());
17325
+ }
17326
+ return parts.join("\n\n");
17327
+ }
17308
17328
  var UpstashSearchStore = class {
17309
17329
  index;
17310
17330
  pagesNs;
@@ -17684,10 +17704,12 @@ var UpstashSearchStore = class {
17684
17704
  });
17685
17705
  const doc = results[0];
17686
17706
  if (!doc || !doc.metadata) return null;
17707
+ const chunks = await this.getChunksForPage(url, scope);
17708
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
17687
17709
  return {
17688
17710
  url: doc.metadata.url,
17689
17711
  title: doc.metadata.title,
17690
- markdown: doc.metadata.markdown,
17712
+ markdown,
17691
17713
  projectId: doc.metadata.projectId,
17692
17714
  scopeName: doc.metadata.scopeName,
17693
17715
  routeFile: doc.metadata.routeFile,
@@ -17707,6 +17729,37 @@ var UpstashSearchStore = class {
17707
17729
  return null;
17708
17730
  }
17709
17731
  }
17732
+ /**
17733
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
17734
+ * Used to reconstruct full page markdown from chunk content.
17735
+ */
17736
+ async getChunksForPage(url, scope) {
17737
+ const chunks = [];
17738
+ let cursor = "0";
17739
+ try {
17740
+ for (; ; ) {
17741
+ const result = await this.chunksNs.range({
17742
+ cursor,
17743
+ limit: 100,
17744
+ includeMetadata: true
17745
+ });
17746
+ for (const doc of result.vectors) {
17747
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
17748
+ chunks.push({
17749
+ chunkText: doc.metadata.chunkText ?? "",
17750
+ ordinal: doc.metadata.ordinal ?? 0,
17751
+ sectionTitle: doc.metadata.sectionTitle ?? "",
17752
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
17753
+ });
17754
+ }
17755
+ }
17756
+ if (!result.nextCursor || result.nextCursor === "0") break;
17757
+ cursor = result.nextCursor;
17758
+ }
17759
+ } catch {
17760
+ }
17761
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
17762
+ }
17710
17763
  async fetchPageWithVector(url, scope) {
17711
17764
  try {
17712
17765
  const results = await this.pagesNs.fetch([url], {
@@ -18662,45 +18715,20 @@ var SearchEngine = class _SearchEngine {
18662
18715
  function createServer(engine) {
18663
18716
  const server = new mcp_js.McpServer({
18664
18717
  name: "searchsocket-mcp",
18665
- version: "0.1.0"
18718
+ version: "0.2.0"
18666
18719
  });
18667
18720
  server.registerTool(
18668
18721
  "search",
18669
18722
  {
18670
- description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
18723
+ description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
18671
18724
  inputSchema: {
18672
- query: zod.z.string().min(1),
18673
- scope: zod.z.string().optional(),
18674
- topK: zod.z.number().int().positive().max(100).optional(),
18675
- pathPrefix: zod.z.string().optional(),
18676
- tags: zod.z.array(zod.z.string()).optional(),
18677
- filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional(),
18678
- groupBy: zod.z.enum(["page", "chunk"]).optional(),
18679
- maxSubResults: zod.z.number().int().positive().max(20).optional()
18680
- },
18681
- outputSchema: {
18682
- q: zod.z.string(),
18683
- scope: zod.z.string(),
18684
- results: zod.z.array(zod.z.object({
18685
- url: zod.z.string(),
18686
- title: zod.z.string(),
18687
- sectionTitle: zod.z.string().optional(),
18688
- snippet: zod.z.string(),
18689
- score: zod.z.number(),
18690
- routeFile: zod.z.string(),
18691
- chunks: zod.z.array(zod.z.object({
18692
- sectionTitle: zod.z.string().optional(),
18693
- snippet: zod.z.string(),
18694
- headingPath: zod.z.array(zod.z.string()),
18695
- score: zod.z.number()
18696
- })).optional()
18697
- })),
18698
- meta: zod.z.object({
18699
- timingsMs: zod.z.object({
18700
- search: zod.z.number(),
18701
- total: zod.z.number()
18702
- })
18703
- })
18725
+ query: zod.z.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
18726
+ topK: zod.z.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
18727
+ pathPrefix: zod.z.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
18728
+ tags: zod.z.array(zod.z.string()).optional().describe("Filter results to pages matching all specified tags"),
18729
+ filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
18730
+ groupBy: zod.z.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
18731
+ scope: zod.z.string().optional()
18704
18732
  }
18705
18733
  },
18706
18734
  async (input) => {
@@ -18711,85 +18739,18 @@ function createServer(engine) {
18711
18739
  pathPrefix: input.pathPrefix,
18712
18740
  tags: input.tags,
18713
18741
  filters: input.filters,
18714
- groupBy: input.groupBy,
18715
- maxSubResults: input.maxSubResults
18742
+ groupBy: input.groupBy
18716
18743
  });
18717
- return {
18718
- content: [
18719
- {
18720
- type: "text",
18721
- text: JSON.stringify(result, null, 2)
18722
- }
18723
- ],
18724
- structuredContent: result
18725
- };
18726
- }
18727
- );
18728
- server.registerTool(
18729
- "get_page",
18730
- {
18731
- description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
18732
- inputSchema: {
18733
- pathOrUrl: zod.z.string().min(1),
18734
- scope: zod.z.string().optional()
18735
- }
18736
- },
18737
- async (input) => {
18738
- const page = await engine.getPage(input.pathOrUrl, input.scope);
18739
- return {
18740
- content: [
18741
- {
18742
- type: "text",
18743
- text: JSON.stringify(page, null, 2)
18744
- }
18745
- ]
18746
- };
18747
- }
18748
- );
18749
- server.registerTool(
18750
- "list_pages",
18751
- {
18752
- description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
18753
- inputSchema: {
18754
- pathPrefix: zod.z.string().optional(),
18755
- cursor: zod.z.string().optional(),
18756
- limit: zod.z.number().int().positive().max(200).optional(),
18757
- scope: zod.z.string().optional()
18758
- }
18759
- },
18760
- async (input) => {
18761
- const result = await engine.listPages({
18762
- pathPrefix: input.pathPrefix,
18763
- cursor: input.cursor,
18764
- limit: input.limit,
18765
- scope: input.scope
18766
- });
18767
- return {
18768
- content: [
18769
- {
18770
- type: "text",
18771
- text: JSON.stringify(result, null, 2)
18772
- }
18773
- ]
18774
- };
18775
- }
18776
- );
18777
- server.registerTool(
18778
- "get_site_structure",
18779
- {
18780
- description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
18781
- inputSchema: {
18782
- pathPrefix: zod.z.string().optional(),
18783
- scope: zod.z.string().optional(),
18784
- maxPages: zod.z.number().int().positive().max(2e3).optional()
18744
+ if (result.results.length === 0) {
18745
+ return {
18746
+ content: [
18747
+ {
18748
+ type: "text",
18749
+ text: `No results found for "${input.query}". Try broader keywords or remove filters.`
18750
+ }
18751
+ ]
18752
+ };
18785
18753
  }
18786
- },
18787
- async (input) => {
18788
- const result = await engine.getSiteStructure({
18789
- pathPrefix: input.pathPrefix,
18790
- scope: input.scope,
18791
- maxPages: input.maxPages
18792
- });
18793
18754
  return {
18794
18755
  content: [
18795
18756
  {
@@ -18801,56 +18762,51 @@ function createServer(engine) {
18801
18762
  }
18802
18763
  );
18803
18764
  server.registerTool(
18804
- "find_source_file",
18765
+ "get_page",
18805
18766
  {
18806
- description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
18767
+ description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
18807
18768
  inputSchema: {
18808
- query: zod.z.string().min(1),
18769
+ path: zod.z.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
18809
18770
  scope: zod.z.string().optional()
18810
18771
  }
18811
18772
  },
18812
18773
  async (input) => {
18813
- const result = await engine.search({
18814
- q: input.query,
18815
- topK: 1,
18816
- scope: input.scope
18817
- });
18818
- if (result.results.length === 0) {
18774
+ try {
18775
+ const page = await engine.getPage(input.path, input.scope);
18819
18776
  return {
18820
18777
  content: [
18821
18778
  {
18822
18779
  type: "text",
18823
- text: JSON.stringify({
18824
- error: "No matching content found for the given query."
18825
- })
18780
+ text: JSON.stringify(page, null, 2)
18781
+ }
18782
+ ]
18783
+ };
18784
+ } catch {
18785
+ const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
18786
+ const similar = suggestions.results.map((r) => r.url);
18787
+ return {
18788
+ content: [
18789
+ {
18790
+ type: "text",
18791
+ text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
18826
18792
  }
18827
18793
  ]
18828
18794
  };
18829
18795
  }
18830
- const match = result.results[0];
18831
- const { url, routeFile, sectionTitle, snippet } = match;
18832
- return {
18833
- content: [
18834
- {
18835
- type: "text",
18836
- text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
18837
- }
18838
- ]
18839
- };
18840
18796
  }
18841
18797
  );
18842
18798
  server.registerTool(
18843
18799
  "get_related_pages",
18844
18800
  {
18845
- description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
18801
+ description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
18846
18802
  inputSchema: {
18847
- pathOrUrl: zod.z.string().min(1),
18848
- scope: zod.z.string().optional(),
18849
- topK: zod.z.number().int().positive().max(25).optional()
18803
+ path: zod.z.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
18804
+ topK: zod.z.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
18805
+ scope: zod.z.string().optional()
18850
18806
  }
18851
18807
  },
18852
18808
  async (input) => {
18853
- const result = await engine.getRelatedPages(input.pathOrUrl, {
18809
+ const result = await engine.getRelatedPages(input.path, {
18854
18810
  topK: input.topK,
18855
18811
  scope: input.scope
18856
18812
  });
@@ -22138,7 +22094,6 @@ var IndexPipeline = class _IndexPipeline {
22138
22094
  keywords: r.keywords ?? [],
22139
22095
  summary: r.summary ?? "",
22140
22096
  tags: r.tags,
22141
- markdown: r.markdown,
22142
22097
  routeFile: r.routeFile,
22143
22098
  routeResolution: r.routeResolution,
22144
22099
  incomingLinks: r.incomingLinks,
@@ -22165,7 +22120,6 @@ var IndexPipeline = class _IndexPipeline {
22165
22120
  keywords: r.keywords ?? [],
22166
22121
  summary: r.summary ?? "",
22167
22122
  tags: r.tags,
22168
- markdown: r.markdown,
22169
22123
  routeFile: r.routeFile,
22170
22124
  routeResolution: r.routeResolution,
22171
22125
  incomingLinks: r.incomingLinks,
@@ -22249,6 +22203,7 @@ var IndexPipeline = class _IndexPipeline {
22249
22203
  let documentsUpserted = 0;
22250
22204
  if (!options.dryRun && changedChunks.length > 0) {
22251
22205
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
22206
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
22252
22207
  const docs = changedChunks.map((chunk) => {
22253
22208
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
22254
22209
  if (embeddingText.length > 2e3) {
@@ -22256,6 +22211,7 @@ var IndexPipeline = class _IndexPipeline {
22256
22211
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
22257
22212
  );
22258
22213
  }
22214
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
22259
22215
  return {
22260
22216
  id: chunk.chunkKey,
22261
22217
  data: embeddingText,
@@ -22266,7 +22222,7 @@ var IndexPipeline = class _IndexPipeline {
22266
22222
  sectionTitle: chunk.sectionTitle ?? "",
22267
22223
  headingPath: chunk.headingPath.join(" > "),
22268
22224
  snippet: chunk.snippet,
22269
- chunkText: embeddingText,
22225
+ chunkText: cappedText,
22270
22226
  tags: chunk.tags,
22271
22227
  ordinal: chunk.ordinal,
22272
22228
  contentHash: chunk.contentHash,