searchsocket 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -17274,6 +17274,26 @@ function ensureStateDirs(cwd, stateDir, scope) {
17274
17274
  fs__default.default.mkdirSync(statePath, { recursive: true });
17275
17275
  return { statePath };
17276
17276
  }
17277
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
17278
+ if (chunks.length === 0) return "";
17279
+ const parts = [];
17280
+ for (const chunk of chunks) {
17281
+ let text = chunk.chunkText;
17282
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
17283
+
17284
+ `;
17285
+ const prefixWithoutSection = `${pageTitle}
17286
+
17287
+ `;
17288
+ if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
17289
+ text = text.slice(prefixWithSection.length);
17290
+ } else if (text.startsWith(prefixWithoutSection)) {
17291
+ text = text.slice(prefixWithoutSection.length);
17292
+ }
17293
+ parts.push(text.trim());
17294
+ }
17295
+ return parts.join("\n\n");
17296
+ }
17277
17297
  var UpstashSearchStore = class {
17278
17298
  index;
17279
17299
  pagesNs;
@@ -17653,10 +17673,12 @@ var UpstashSearchStore = class {
17653
17673
  });
17654
17674
  const doc = results[0];
17655
17675
  if (!doc || !doc.metadata) return null;
17676
+ const chunks = await this.getChunksForPage(url, scope);
17677
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
17656
17678
  return {
17657
17679
  url: doc.metadata.url,
17658
17680
  title: doc.metadata.title,
17659
- markdown: doc.metadata.markdown,
17681
+ markdown,
17660
17682
  projectId: doc.metadata.projectId,
17661
17683
  scopeName: doc.metadata.scopeName,
17662
17684
  routeFile: doc.metadata.routeFile,
@@ -17676,6 +17698,37 @@ var UpstashSearchStore = class {
17676
17698
  return null;
17677
17699
  }
17678
17700
  }
17701
+ /**
17702
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
17703
+ * Used to reconstruct full page markdown from chunk content.
17704
+ */
17705
+ async getChunksForPage(url, scope) {
17706
+ const chunks = [];
17707
+ let cursor = "0";
17708
+ try {
17709
+ for (; ; ) {
17710
+ const result = await this.chunksNs.range({
17711
+ cursor,
17712
+ limit: 100,
17713
+ includeMetadata: true
17714
+ });
17715
+ for (const doc of result.vectors) {
17716
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
17717
+ chunks.push({
17718
+ chunkText: doc.metadata.chunkText ?? "",
17719
+ ordinal: doc.metadata.ordinal ?? 0,
17720
+ sectionTitle: doc.metadata.sectionTitle ?? "",
17721
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
17722
+ });
17723
+ }
17724
+ }
17725
+ if (!result.nextCursor || result.nextCursor === "0") break;
17726
+ cursor = result.nextCursor;
17727
+ }
17728
+ } catch {
17729
+ }
17730
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
17731
+ }
17679
17732
  async fetchPageWithVector(url, scope) {
17680
17733
  try {
17681
17734
  const results = await this.pagesNs.fetch([url], {
@@ -20996,7 +21049,6 @@ var IndexPipeline = class _IndexPipeline {
20996
21049
  keywords: r.keywords ?? [],
20997
21050
  summary: r.summary ?? "",
20998
21051
  tags: r.tags,
20999
- markdown: r.markdown,
21000
21052
  routeFile: r.routeFile,
21001
21053
  routeResolution: r.routeResolution,
21002
21054
  incomingLinks: r.incomingLinks,
@@ -21023,7 +21075,6 @@ var IndexPipeline = class _IndexPipeline {
21023
21075
  keywords: r.keywords ?? [],
21024
21076
  summary: r.summary ?? "",
21025
21077
  tags: r.tags,
21026
- markdown: r.markdown,
21027
21078
  routeFile: r.routeFile,
21028
21079
  routeResolution: r.routeResolution,
21029
21080
  incomingLinks: r.incomingLinks,
@@ -21107,6 +21158,7 @@ var IndexPipeline = class _IndexPipeline {
21107
21158
  let documentsUpserted = 0;
21108
21159
  if (!options.dryRun && changedChunks.length > 0) {
21109
21160
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
21161
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
21110
21162
  const docs = changedChunks.map((chunk) => {
21111
21163
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
21112
21164
  if (embeddingText.length > 2e3) {
@@ -21114,6 +21166,7 @@ var IndexPipeline = class _IndexPipeline {
21114
21166
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
21115
21167
  );
21116
21168
  }
21169
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
21117
21170
  return {
21118
21171
  id: chunk.chunkKey,
21119
21172
  data: embeddingText,
@@ -21124,7 +21177,7 @@ var IndexPipeline = class _IndexPipeline {
21124
21177
  sectionTitle: chunk.sectionTitle ?? "",
21125
21178
  headingPath: chunk.headingPath.join(" > "),
21126
21179
  snippet: chunk.snippet,
21127
- chunkText: embeddingText,
21180
+ chunkText: cappedText,
21128
21181
  tags: chunk.tags,
21129
21182
  ordinal: chunk.ordinal,
21130
21183
  contentHash: chunk.contentHash,
@@ -21686,45 +21739,20 @@ var SearchEngine = class _SearchEngine {
21686
21739
  function createServer(engine) {
21687
21740
  const server = new mcp_js.McpServer({
21688
21741
  name: "searchsocket-mcp",
21689
- version: "0.1.0"
21742
+ version: "0.2.0"
21690
21743
  });
21691
21744
  server.registerTool(
21692
21745
  "search",
21693
21746
  {
21694
- description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
21747
+ description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
21695
21748
  inputSchema: {
21696
- query: zod.z.string().min(1),
21697
- scope: zod.z.string().optional(),
21698
- topK: zod.z.number().int().positive().max(100).optional(),
21699
- pathPrefix: zod.z.string().optional(),
21700
- tags: zod.z.array(zod.z.string()).optional(),
21701
- filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional(),
21702
- groupBy: zod.z.enum(["page", "chunk"]).optional(),
21703
- maxSubResults: zod.z.number().int().positive().max(20).optional()
21704
- },
21705
- outputSchema: {
21706
- q: zod.z.string(),
21707
- scope: zod.z.string(),
21708
- results: zod.z.array(zod.z.object({
21709
- url: zod.z.string(),
21710
- title: zod.z.string(),
21711
- sectionTitle: zod.z.string().optional(),
21712
- snippet: zod.z.string(),
21713
- score: zod.z.number(),
21714
- routeFile: zod.z.string(),
21715
- chunks: zod.z.array(zod.z.object({
21716
- sectionTitle: zod.z.string().optional(),
21717
- snippet: zod.z.string(),
21718
- headingPath: zod.z.array(zod.z.string()),
21719
- score: zod.z.number()
21720
- })).optional()
21721
- })),
21722
- meta: zod.z.object({
21723
- timingsMs: zod.z.object({
21724
- search: zod.z.number(),
21725
- total: zod.z.number()
21726
- })
21727
- })
21749
+ query: zod.z.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
21750
+ topK: zod.z.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
21751
+ pathPrefix: zod.z.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
21752
+ tags: zod.z.array(zod.z.string()).optional().describe("Filter results to pages matching all specified tags"),
21753
+ filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
21754
+ groupBy: zod.z.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
21755
+ scope: zod.z.string().optional()
21728
21756
  }
21729
21757
  },
21730
21758
  async (input) => {
@@ -21735,85 +21763,18 @@ function createServer(engine) {
21735
21763
  pathPrefix: input.pathPrefix,
21736
21764
  tags: input.tags,
21737
21765
  filters: input.filters,
21738
- groupBy: input.groupBy,
21739
- maxSubResults: input.maxSubResults
21766
+ groupBy: input.groupBy
21740
21767
  });
21741
- return {
21742
- content: [
21743
- {
21744
- type: "text",
21745
- text: JSON.stringify(result, null, 2)
21746
- }
21747
- ],
21748
- structuredContent: result
21749
- };
21750
- }
21751
- );
21752
- server.registerTool(
21753
- "get_page",
21754
- {
21755
- description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
21756
- inputSchema: {
21757
- pathOrUrl: zod.z.string().min(1),
21758
- scope: zod.z.string().optional()
21759
- }
21760
- },
21761
- async (input) => {
21762
- const page = await engine.getPage(input.pathOrUrl, input.scope);
21763
- return {
21764
- content: [
21765
- {
21766
- type: "text",
21767
- text: JSON.stringify(page, null, 2)
21768
- }
21769
- ]
21770
- };
21771
- }
21772
- );
21773
- server.registerTool(
21774
- "list_pages",
21775
- {
21776
- description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
21777
- inputSchema: {
21778
- pathPrefix: zod.z.string().optional(),
21779
- cursor: zod.z.string().optional(),
21780
- limit: zod.z.number().int().positive().max(200).optional(),
21781
- scope: zod.z.string().optional()
21782
- }
21783
- },
21784
- async (input) => {
21785
- const result = await engine.listPages({
21786
- pathPrefix: input.pathPrefix,
21787
- cursor: input.cursor,
21788
- limit: input.limit,
21789
- scope: input.scope
21790
- });
21791
- return {
21792
- content: [
21793
- {
21794
- type: "text",
21795
- text: JSON.stringify(result, null, 2)
21796
- }
21797
- ]
21798
- };
21799
- }
21800
- );
21801
- server.registerTool(
21802
- "get_site_structure",
21803
- {
21804
- description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
21805
- inputSchema: {
21806
- pathPrefix: zod.z.string().optional(),
21807
- scope: zod.z.string().optional(),
21808
- maxPages: zod.z.number().int().positive().max(2e3).optional()
21768
+ if (result.results.length === 0) {
21769
+ return {
21770
+ content: [
21771
+ {
21772
+ type: "text",
21773
+ text: `No results found for "${input.query}". Try broader keywords or remove filters.`
21774
+ }
21775
+ ]
21776
+ };
21809
21777
  }
21810
- },
21811
- async (input) => {
21812
- const result = await engine.getSiteStructure({
21813
- pathPrefix: input.pathPrefix,
21814
- scope: input.scope,
21815
- maxPages: input.maxPages
21816
- });
21817
21778
  return {
21818
21779
  content: [
21819
21780
  {
@@ -21825,56 +21786,51 @@ function createServer(engine) {
21825
21786
  }
21826
21787
  );
21827
21788
  server.registerTool(
21828
- "find_source_file",
21789
+ "get_page",
21829
21790
  {
21830
- description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
21791
+ description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
21831
21792
  inputSchema: {
21832
- query: zod.z.string().min(1),
21793
+ path: zod.z.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
21833
21794
  scope: zod.z.string().optional()
21834
21795
  }
21835
21796
  },
21836
21797
  async (input) => {
21837
- const result = await engine.search({
21838
- q: input.query,
21839
- topK: 1,
21840
- scope: input.scope
21841
- });
21842
- if (result.results.length === 0) {
21798
+ try {
21799
+ const page = await engine.getPage(input.path, input.scope);
21843
21800
  return {
21844
21801
  content: [
21845
21802
  {
21846
21803
  type: "text",
21847
- text: JSON.stringify({
21848
- error: "No matching content found for the given query."
21849
- })
21804
+ text: JSON.stringify(page, null, 2)
21805
+ }
21806
+ ]
21807
+ };
21808
+ } catch {
21809
+ const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
21810
+ const similar = suggestions.results.map((r) => r.url);
21811
+ return {
21812
+ content: [
21813
+ {
21814
+ type: "text",
21815
+ text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
21850
21816
  }
21851
21817
  ]
21852
21818
  };
21853
21819
  }
21854
- const match = result.results[0];
21855
- const { url, routeFile, sectionTitle, snippet } = match;
21856
- return {
21857
- content: [
21858
- {
21859
- type: "text",
21860
- text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
21861
- }
21862
- ]
21863
- };
21864
21820
  }
21865
21821
  );
21866
21822
  server.registerTool(
21867
21823
  "get_related_pages",
21868
21824
  {
21869
- description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
21825
+ description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
21870
21826
  inputSchema: {
21871
- pathOrUrl: zod.z.string().min(1),
21872
- scope: zod.z.string().optional(),
21873
- topK: zod.z.number().int().positive().max(25).optional()
21827
+ path: zod.z.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
21828
+ topK: zod.z.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
21829
+ scope: zod.z.string().optional()
21874
21830
  }
21875
21831
  },
21876
21832
  async (input) => {
21877
- const result = await engine.getRelatedPages(input.pathOrUrl, {
21833
+ const result = await engine.getRelatedPages(input.path, {
21878
21834
  topK: input.topK,
21879
21835
  scope: input.scope
21880
21836
  });
package/dist/index.d.cts CHANGED
@@ -40,7 +40,6 @@ interface PageVectorMetadata {
40
40
  keywords: string[];
41
41
  summary: string;
42
42
  tags: string[];
43
- markdown: string;
44
43
  routeFile: string;
45
44
  routeResolution: string;
46
45
  incomingLinks: number;
@@ -121,6 +120,16 @@ declare class UpstashSearchStore {
121
120
  metadata: Record<string, unknown>;
122
121
  }>, scope: Scope): Promise<void>;
123
122
  getPage(url: string, scope: Scope): Promise<PageRecord | null>;
123
+ /**
124
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
125
+ * Used to reconstruct full page markdown from chunk content.
126
+ */
127
+ getChunksForPage(url: string, scope: Scope): Promise<Array<{
128
+ chunkText: string;
129
+ ordinal: number;
130
+ sectionTitle: string;
131
+ headingPath: string[];
132
+ }>>;
124
133
  fetchPageWithVector(url: string, scope: Scope): Promise<{
125
134
  metadata: PageVectorMetadata;
126
135
  vector: number[];
package/dist/index.d.ts CHANGED
@@ -40,7 +40,6 @@ interface PageVectorMetadata {
40
40
  keywords: string[];
41
41
  summary: string;
42
42
  tags: string[];
43
- markdown: string;
44
43
  routeFile: string;
45
44
  routeResolution: string;
46
45
  incomingLinks: number;
@@ -121,6 +120,16 @@ declare class UpstashSearchStore {
121
120
  metadata: Record<string, unknown>;
122
121
  }>, scope: Scope): Promise<void>;
123
122
  getPage(url: string, scope: Scope): Promise<PageRecord | null>;
123
+ /**
124
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
125
+ * Used to reconstruct full page markdown from chunk content.
126
+ */
127
+ getChunksForPage(url: string, scope: Scope): Promise<Array<{
128
+ chunkText: string;
129
+ ordinal: number;
130
+ sectionTitle: string;
131
+ headingPath: string[];
132
+ }>>;
124
133
  fetchPageWithVector(url: string, scope: Scope): Promise<{
125
134
  metadata: PageVectorMetadata;
126
135
  vector: number[];