npm - searchsocket - Versions diffs - 0.6.2 → 0.7.0 - Mend

searchsocket 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/sveltekit.js CHANGED Viewed

@@ -17293,6 +17293,26 @@ function joinUrl(baseUrl, route) {
   const routePart = ensureLeadingSlash(route);
   return `${base}${routePart}`;
 }
+function reconstructMarkdownFromChunks(chunks, pageTitle) {
+  if (chunks.length === 0) return "";
+  const parts = [];
+  for (const chunk of chunks) {
+    let text = chunk.chunkText;
+    const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
+`;
+    const prefixWithoutSection = `${pageTitle}
+`;
+    if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
+      text = text.slice(prefixWithSection.length);
+    } else if (text.startsWith(prefixWithoutSection)) {
+      text = text.slice(prefixWithoutSection.length);
+    }
+    parts.push(text.trim());
+  }
+  return parts.join("\n\n");
+}
 var UpstashSearchStore = class {
   index;
   pagesNs;
@@ -17672,10 +17692,12 @@ var UpstashSearchStore = class {
       });
       const doc = results[0];
       if (!doc || !doc.metadata) return null;
+      const chunks = await this.getChunksForPage(url, scope);
+      const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
       return {
         url: doc.metadata.url,
         title: doc.metadata.title,
-        markdown: doc.metadata.markdown,
+        markdown,
         projectId: doc.metadata.projectId,
         scopeName: doc.metadata.scopeName,
         routeFile: doc.metadata.routeFile,
@@ -17695,6 +17717,37 @@ var UpstashSearchStore = class {
       return null;
     }
   }
+  /**
+   * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
+   * Used to reconstruct full page markdown from chunk content.
+   */
+  async getChunksForPage(url, scope) {
+    const chunks = [];
+    let cursor = "0";
+    try {
+      for (; ; ) {
+        const result = await this.chunksNs.range({
+          cursor,
+          limit: 100,
+          includeMetadata: true
+        });
+        for (const doc of result.vectors) {
+          if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
+            chunks.push({
+              chunkText: doc.metadata.chunkText ?? "",
+              ordinal: doc.metadata.ordinal ?? 0,
+              sectionTitle: doc.metadata.sectionTitle ?? "",
+              headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
+            });
+          }
+        }
+        if (!result.nextCursor || result.nextCursor === "0") break;
+        cursor = result.nextCursor;
+      }
+    } catch {
+    }
+    return chunks.sort((a, b) => a.ordinal - b.ordinal);
+  }
   async fetchPageWithVector(url, scope) {
     try {
       const results = await this.pagesNs.fetch([url], {
@@ -18650,45 +18703,20 @@ var SearchEngine = class _SearchEngine {
 function createServer(engine) {
   const server = new McpServer({
     name: "searchsocket-mcp",
-    version: "0.1.0"
+    version: "0.2.0"
   });
   server.registerTool(
     "search",
     {
-      description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
+      description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
       inputSchema: {
-        query: z.string().min(1),
-        scope: z.string().optional(),
-        topK: z.number().int().positive().max(100).optional(),
-        pathPrefix: z.string().optional(),
-        tags: z.array(z.string()).optional(),
-        filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
-        groupBy: z.enum(["page", "chunk"]).optional(),
-        maxSubResults: z.number().int().positive().max(20).optional()
-      },
-      outputSchema: {
-        q: z.string(),
-        scope: z.string(),
-        results: z.array(z.object({
-          url: z.string(),
-          title: z.string(),
-          sectionTitle: z.string().optional(),
-          snippet: z.string(),
-          score: z.number(),
-          routeFile: z.string(),
-          chunks: z.array(z.object({
-            sectionTitle: z.string().optional(),
-            snippet: z.string(),
-            headingPath: z.array(z.string()),
-            score: z.number()
-          })).optional()
-        })),
-        meta: z.object({
-          timingsMs: z.object({
-            search: z.number(),
-            total: z.number()
-          })
-        })
+        query: z.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
+        topK: z.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
+        pathPrefix: z.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
+        tags: z.array(z.string()).optional().describe("Filter results to pages matching all specified tags"),
+        filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
+        groupBy: z.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
+        scope: z.string().optional()
       }
     },
     async (input) => {
@@ -18699,85 +18727,18 @@ function createServer(engine) {
         pathPrefix: input.pathPrefix,
         tags: input.tags,
         filters: input.filters,
-        groupBy: input.groupBy,
-        maxSubResults: input.maxSubResults
+        groupBy: input.groupBy
       });
-      return {
-        content: [
-          {
-            type: "text",
-            text: JSON.stringify(result, null, 2)
-          }
-        ],
-        structuredContent: result
-      };
-    }
-  );
-  server.registerTool(
-    "get_page",
-    {
-      description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
-      inputSchema: {
-        pathOrUrl: z.string().min(1),
-        scope: z.string().optional()
-      }
-    },
-    async (input) => {
-      const page = await engine.getPage(input.pathOrUrl, input.scope);
-      return {
-        content: [
-          {
-            type: "text",
-            text: JSON.stringify(page, null, 2)
-          }
-        ]
-      };
-    }
-  );
-  server.registerTool(
-    "list_pages",
-    {
-      description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
-      inputSchema: {
-        pathPrefix: z.string().optional(),
-        cursor: z.string().optional(),
-        limit: z.number().int().positive().max(200).optional(),
-        scope: z.string().optional()
-      }
-    },
-    async (input) => {
-      const result = await engine.listPages({
-        pathPrefix: input.pathPrefix,
-        cursor: input.cursor,
-        limit: input.limit,
-        scope: input.scope
-      });
-      return {
-        content: [
-          {
-            type: "text",
-            text: JSON.stringify(result, null, 2)
-          }
-        ]
-      };
-    }
-  );
-  server.registerTool(
-    "get_site_structure",
-    {
-      description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
-      inputSchema: {
-        pathPrefix: z.string().optional(),
-        scope: z.string().optional(),
-        maxPages: z.number().int().positive().max(2e3).optional()
+      if (result.results.length === 0) {
+        return {
+          content: [
+            {
+              type: "text",
+              text: `No results found for "${input.query}". Try broader keywords or remove filters.`
+            }
+          ]
+        };
       }
-    },
-    async (input) => {
-      const result = await engine.getSiteStructure({
-        pathPrefix: input.pathPrefix,
-        scope: input.scope,
-        maxPages: input.maxPages
-      });
       return {
         content: [
           {
@@ -18789,56 +18750,51 @@ function createServer(engine) {
     }
   );
   server.registerTool(
-    "find_source_file",
+    "get_page",
     {
-      description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
+      description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
       inputSchema: {
-        query: z.string().min(1),
+        path: z.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
         scope: z.string().optional()
       }
     },
     async (input) => {
-      const result = await engine.search({
-        q: input.query,
-        topK: 1,
-        scope: input.scope
-      });
-      if (result.results.length === 0) {
+      try {
+        const page = await engine.getPage(input.path, input.scope);
         return {
           content: [
             {
               type: "text",
-              text: JSON.stringify({
-                error: "No matching content found for the given query."
-              })
+              text: JSON.stringify(page, null, 2)
+            }
+          ]
+        };
+      } catch {
+        const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
+        const similar = suggestions.results.map((r) => r.url);
+        return {
+          content: [
+            {
+              type: "text",
+              text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
             }
           ]
         };
       }
-      const match = result.results[0];
-      const { url, routeFile, sectionTitle, snippet } = match;
-      return {
-        content: [
-          {
-            type: "text",
-            text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
-          }
-        ]
-      };
     }
   );
   server.registerTool(
     "get_related_pages",
     {
-      description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
+      description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
       inputSchema: {
-        pathOrUrl: z.string().min(1),
-        scope: z.string().optional(),
-        topK: z.number().int().positive().max(25).optional()
+        path: z.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
+        topK: z.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
+        scope: z.string().optional()
       }
     },
     async (input) => {
-      const result = await engine.getRelatedPages(input.pathOrUrl, {
+      const result = await engine.getRelatedPages(input.path, {
         topK: input.topK,
         scope: input.scope
       });
@@ -22126,7 +22082,6 @@ var IndexPipeline = class _IndexPipeline {
             keywords: r.keywords ?? [],
             summary: r.summary ?? "",
             tags: r.tags,
-            markdown: r.markdown,
             routeFile: r.routeFile,
             routeResolution: r.routeResolution,
             incomingLinks: r.incomingLinks,
@@ -22153,7 +22108,6 @@ var IndexPipeline = class _IndexPipeline {
               keywords: r.keywords ?? [],
               summary: r.summary ?? "",
               tags: r.tags,
-              markdown: r.markdown,
               routeFile: r.routeFile,
               routeResolution: r.routeResolution,
               incomingLinks: r.incomingLinks,
@@ -22237,6 +22191,7 @@ var IndexPipeline = class _IndexPipeline {
     let documentsUpserted = 0;
     if (!options.dryRun && changedChunks.length > 0) {
       this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
+      const CHUNK_TEXT_MAX_CHARS = 3e4;
       const docs = changedChunks.map((chunk) => {
         const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
         if (embeddingText.length > 2e3) {
@@ -22244,6 +22199,7 @@ var IndexPipeline = class _IndexPipeline {
             `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
           );
         }
+        const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
         return {
           id: chunk.chunkKey,
           data: embeddingText,
@@ -22254,7 +22210,7 @@ var IndexPipeline = class _IndexPipeline {
             sectionTitle: chunk.sectionTitle ?? "",
             headingPath: chunk.headingPath.join(" > "),
             snippet: chunk.snippet,
-            chunkText: embeddingText,
+            chunkText: cappedText,
             tags: chunk.tags,
             ordinal: chunk.ordinal,
             contentHash: chunk.contentHash,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "searchsocket",
-  "version": "0.6.2",
+  "version": "0.7.0",
   "description": "Semantic site search and MCP retrieval for SvelteKit static sites",
   "license": "MIT",
   "author": "Greg Priday <greg@siteorigin.com>",