searchsocket 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -17262,6 +17262,26 @@ function ensureStateDirs(cwd, stateDir, scope) {
17262
17262
  fs.mkdirSync(statePath, { recursive: true });
17263
17263
  return { statePath };
17264
17264
  }
17265
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
17266
+ if (chunks.length === 0) return "";
17267
+ const parts = [];
17268
+ for (const chunk of chunks) {
17269
+ let text = chunk.chunkText;
17270
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
17271
+
17272
+ `;
17273
+ const prefixWithoutSection = `${pageTitle}
17274
+
17275
+ `;
17276
+ if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
17277
+ text = text.slice(prefixWithSection.length);
17278
+ } else if (text.startsWith(prefixWithoutSection)) {
17279
+ text = text.slice(prefixWithoutSection.length);
17280
+ }
17281
+ parts.push(text.trim());
17282
+ }
17283
+ return parts.join("\n\n");
17284
+ }
17265
17285
  var UpstashSearchStore = class {
17266
17286
  index;
17267
17287
  pagesNs;
@@ -17641,10 +17661,12 @@ var UpstashSearchStore = class {
17641
17661
  });
17642
17662
  const doc = results[0];
17643
17663
  if (!doc || !doc.metadata) return null;
17664
+ const chunks = await this.getChunksForPage(url, scope);
17665
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
17644
17666
  return {
17645
17667
  url: doc.metadata.url,
17646
17668
  title: doc.metadata.title,
17647
- markdown: doc.metadata.markdown,
17669
+ markdown,
17648
17670
  projectId: doc.metadata.projectId,
17649
17671
  scopeName: doc.metadata.scopeName,
17650
17672
  routeFile: doc.metadata.routeFile,
@@ -17664,6 +17686,37 @@ var UpstashSearchStore = class {
17664
17686
  return null;
17665
17687
  }
17666
17688
  }
17689
+ /**
17690
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
17691
+ * Used to reconstruct full page markdown from chunk content.
17692
+ */
17693
+ async getChunksForPage(url, scope) {
17694
+ const chunks = [];
17695
+ let cursor = "0";
17696
+ try {
17697
+ for (; ; ) {
17698
+ const result = await this.chunksNs.range({
17699
+ cursor,
17700
+ limit: 100,
17701
+ includeMetadata: true
17702
+ });
17703
+ for (const doc of result.vectors) {
17704
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
17705
+ chunks.push({
17706
+ chunkText: doc.metadata.chunkText ?? "",
17707
+ ordinal: doc.metadata.ordinal ?? 0,
17708
+ sectionTitle: doc.metadata.sectionTitle ?? "",
17709
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
17710
+ });
17711
+ }
17712
+ }
17713
+ if (!result.nextCursor || result.nextCursor === "0") break;
17714
+ cursor = result.nextCursor;
17715
+ }
17716
+ } catch {
17717
+ }
17718
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
17719
+ }
17667
17720
  async fetchPageWithVector(url, scope) {
17668
17721
  try {
17669
17722
  const results = await this.pagesNs.fetch([url], {
@@ -20984,7 +21037,6 @@ var IndexPipeline = class _IndexPipeline {
20984
21037
  keywords: r.keywords ?? [],
20985
21038
  summary: r.summary ?? "",
20986
21039
  tags: r.tags,
20987
- markdown: r.markdown,
20988
21040
  routeFile: r.routeFile,
20989
21041
  routeResolution: r.routeResolution,
20990
21042
  incomingLinks: r.incomingLinks,
@@ -21011,7 +21063,6 @@ var IndexPipeline = class _IndexPipeline {
21011
21063
  keywords: r.keywords ?? [],
21012
21064
  summary: r.summary ?? "",
21013
21065
  tags: r.tags,
21014
- markdown: r.markdown,
21015
21066
  routeFile: r.routeFile,
21016
21067
  routeResolution: r.routeResolution,
21017
21068
  incomingLinks: r.incomingLinks,
@@ -21095,6 +21146,7 @@ var IndexPipeline = class _IndexPipeline {
21095
21146
  let documentsUpserted = 0;
21096
21147
  if (!options.dryRun && changedChunks.length > 0) {
21097
21148
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
21149
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
21098
21150
  const docs = changedChunks.map((chunk) => {
21099
21151
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
21100
21152
  if (embeddingText.length > 2e3) {
@@ -21102,6 +21154,7 @@ var IndexPipeline = class _IndexPipeline {
21102
21154
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
21103
21155
  );
21104
21156
  }
21157
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
21105
21158
  return {
21106
21159
  id: chunk.chunkKey,
21107
21160
  data: embeddingText,
@@ -21112,7 +21165,7 @@ var IndexPipeline = class _IndexPipeline {
21112
21165
  sectionTitle: chunk.sectionTitle ?? "",
21113
21166
  headingPath: chunk.headingPath.join(" > "),
21114
21167
  snippet: chunk.snippet,
21115
- chunkText: embeddingText,
21168
+ chunkText: cappedText,
21116
21169
  tags: chunk.tags,
21117
21170
  ordinal: chunk.ordinal,
21118
21171
  contentHash: chunk.contentHash,
@@ -21674,45 +21727,20 @@ var SearchEngine = class _SearchEngine {
21674
21727
  function createServer(engine) {
21675
21728
  const server = new McpServer({
21676
21729
  name: "searchsocket-mcp",
21677
- version: "0.1.0"
21730
+ version: "0.2.0"
21678
21731
  });
21679
21732
  server.registerTool(
21680
21733
  "search",
21681
21734
  {
21682
- description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
21735
+ description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
21683
21736
  inputSchema: {
21684
- query: z.string().min(1),
21685
- scope: z.string().optional(),
21686
- topK: z.number().int().positive().max(100).optional(),
21687
- pathPrefix: z.string().optional(),
21688
- tags: z.array(z.string()).optional(),
21689
- filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
21690
- groupBy: z.enum(["page", "chunk"]).optional(),
21691
- maxSubResults: z.number().int().positive().max(20).optional()
21692
- },
21693
- outputSchema: {
21694
- q: z.string(),
21695
- scope: z.string(),
21696
- results: z.array(z.object({
21697
- url: z.string(),
21698
- title: z.string(),
21699
- sectionTitle: z.string().optional(),
21700
- snippet: z.string(),
21701
- score: z.number(),
21702
- routeFile: z.string(),
21703
- chunks: z.array(z.object({
21704
- sectionTitle: z.string().optional(),
21705
- snippet: z.string(),
21706
- headingPath: z.array(z.string()),
21707
- score: z.number()
21708
- })).optional()
21709
- })),
21710
- meta: z.object({
21711
- timingsMs: z.object({
21712
- search: z.number(),
21713
- total: z.number()
21714
- })
21715
- })
21737
+ query: z.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
21738
+ topK: z.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
21739
+ pathPrefix: z.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
21740
+ tags: z.array(z.string()).optional().describe("Filter results to pages matching all specified tags"),
21741
+ filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
21742
+ groupBy: z.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
21743
+ scope: z.string().optional()
21716
21744
  }
21717
21745
  },
21718
21746
  async (input) => {
@@ -21723,85 +21751,18 @@ function createServer(engine) {
21723
21751
  pathPrefix: input.pathPrefix,
21724
21752
  tags: input.tags,
21725
21753
  filters: input.filters,
21726
- groupBy: input.groupBy,
21727
- maxSubResults: input.maxSubResults
21754
+ groupBy: input.groupBy
21728
21755
  });
21729
- return {
21730
- content: [
21731
- {
21732
- type: "text",
21733
- text: JSON.stringify(result, null, 2)
21734
- }
21735
- ],
21736
- structuredContent: result
21737
- };
21738
- }
21739
- );
21740
- server.registerTool(
21741
- "get_page",
21742
- {
21743
- description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
21744
- inputSchema: {
21745
- pathOrUrl: z.string().min(1),
21746
- scope: z.string().optional()
21747
- }
21748
- },
21749
- async (input) => {
21750
- const page = await engine.getPage(input.pathOrUrl, input.scope);
21751
- return {
21752
- content: [
21753
- {
21754
- type: "text",
21755
- text: JSON.stringify(page, null, 2)
21756
- }
21757
- ]
21758
- };
21759
- }
21760
- );
21761
- server.registerTool(
21762
- "list_pages",
21763
- {
21764
- description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
21765
- inputSchema: {
21766
- pathPrefix: z.string().optional(),
21767
- cursor: z.string().optional(),
21768
- limit: z.number().int().positive().max(200).optional(),
21769
- scope: z.string().optional()
21770
- }
21771
- },
21772
- async (input) => {
21773
- const result = await engine.listPages({
21774
- pathPrefix: input.pathPrefix,
21775
- cursor: input.cursor,
21776
- limit: input.limit,
21777
- scope: input.scope
21778
- });
21779
- return {
21780
- content: [
21781
- {
21782
- type: "text",
21783
- text: JSON.stringify(result, null, 2)
21784
- }
21785
- ]
21786
- };
21787
- }
21788
- );
21789
- server.registerTool(
21790
- "get_site_structure",
21791
- {
21792
- description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
21793
- inputSchema: {
21794
- pathPrefix: z.string().optional(),
21795
- scope: z.string().optional(),
21796
- maxPages: z.number().int().positive().max(2e3).optional()
21756
+ if (result.results.length === 0) {
21757
+ return {
21758
+ content: [
21759
+ {
21760
+ type: "text",
21761
+ text: `No results found for "${input.query}". Try broader keywords or remove filters.`
21762
+ }
21763
+ ]
21764
+ };
21797
21765
  }
21798
- },
21799
- async (input) => {
21800
- const result = await engine.getSiteStructure({
21801
- pathPrefix: input.pathPrefix,
21802
- scope: input.scope,
21803
- maxPages: input.maxPages
21804
- });
21805
21766
  return {
21806
21767
  content: [
21807
21768
  {
@@ -21813,56 +21774,51 @@ function createServer(engine) {
21813
21774
  }
21814
21775
  );
21815
21776
  server.registerTool(
21816
- "find_source_file",
21777
+ "get_page",
21817
21778
  {
21818
- description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
21779
+ description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
21819
21780
  inputSchema: {
21820
- query: z.string().min(1),
21781
+ path: z.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
21821
21782
  scope: z.string().optional()
21822
21783
  }
21823
21784
  },
21824
21785
  async (input) => {
21825
- const result = await engine.search({
21826
- q: input.query,
21827
- topK: 1,
21828
- scope: input.scope
21829
- });
21830
- if (result.results.length === 0) {
21786
+ try {
21787
+ const page = await engine.getPage(input.path, input.scope);
21831
21788
  return {
21832
21789
  content: [
21833
21790
  {
21834
21791
  type: "text",
21835
- text: JSON.stringify({
21836
- error: "No matching content found for the given query."
21837
- })
21792
+ text: JSON.stringify(page, null, 2)
21793
+ }
21794
+ ]
21795
+ };
21796
+ } catch {
21797
+ const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
21798
+ const similar = suggestions.results.map((r) => r.url);
21799
+ return {
21800
+ content: [
21801
+ {
21802
+ type: "text",
21803
+ text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
21838
21804
  }
21839
21805
  ]
21840
21806
  };
21841
21807
  }
21842
- const match = result.results[0];
21843
- const { url, routeFile, sectionTitle, snippet } = match;
21844
- return {
21845
- content: [
21846
- {
21847
- type: "text",
21848
- text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
21849
- }
21850
- ]
21851
- };
21852
21808
  }
21853
21809
  );
21854
21810
  server.registerTool(
21855
21811
  "get_related_pages",
21856
21812
  {
21857
- description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
21813
+ description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
21858
21814
  inputSchema: {
21859
- pathOrUrl: z.string().min(1),
21860
- scope: z.string().optional(),
21861
- topK: z.number().int().positive().max(25).optional()
21815
+ path: z.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
21816
+ topK: z.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
21817
+ scope: z.string().optional()
21862
21818
  }
21863
21819
  },
21864
21820
  async (input) => {
21865
- const result = await engine.getRelatedPages(input.pathOrUrl, {
21821
+ const result = await engine.getRelatedPages(input.path, {
21866
21822
  topK: input.topK,
21867
21823
  scope: input.scope
21868
21824
  });