searchsocket 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/sveltekit.js CHANGED
@@ -17293,6 +17293,26 @@ function joinUrl(baseUrl, route) {
17293
17293
  const routePart = ensureLeadingSlash(route);
17294
17294
  return `${base}${routePart}`;
17295
17295
  }
17296
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
17297
+ if (chunks.length === 0) return "";
17298
+ const parts = [];
17299
+ for (const chunk of chunks) {
17300
+ let text = chunk.chunkText;
17301
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
17302
+
17303
+ `;
17304
+ const prefixWithoutSection = `${pageTitle}
17305
+
17306
+ `;
17307
+ if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
17308
+ text = text.slice(prefixWithSection.length);
17309
+ } else if (text.startsWith(prefixWithoutSection)) {
17310
+ text = text.slice(prefixWithoutSection.length);
17311
+ }
17312
+ parts.push(text.trim());
17313
+ }
17314
+ return parts.join("\n\n");
17315
+ }
17296
17316
  var UpstashSearchStore = class {
17297
17317
  index;
17298
17318
  pagesNs;
@@ -17672,10 +17692,12 @@ var UpstashSearchStore = class {
17672
17692
  });
17673
17693
  const doc = results[0];
17674
17694
  if (!doc || !doc.metadata) return null;
17695
+ const chunks = await this.getChunksForPage(url, scope);
17696
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
17675
17697
  return {
17676
17698
  url: doc.metadata.url,
17677
17699
  title: doc.metadata.title,
17678
- markdown: doc.metadata.markdown,
17700
+ markdown,
17679
17701
  projectId: doc.metadata.projectId,
17680
17702
  scopeName: doc.metadata.scopeName,
17681
17703
  routeFile: doc.metadata.routeFile,
@@ -17695,6 +17717,37 @@ var UpstashSearchStore = class {
17695
17717
  return null;
17696
17718
  }
17697
17719
  }
17720
+ /**
17721
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
17722
+ * Used to reconstruct full page markdown from chunk content.
17723
+ */
17724
+ async getChunksForPage(url, scope) {
17725
+ const chunks = [];
17726
+ let cursor = "0";
17727
+ try {
17728
+ for (; ; ) {
17729
+ const result = await this.chunksNs.range({
17730
+ cursor,
17731
+ limit: 100,
17732
+ includeMetadata: true
17733
+ });
17734
+ for (const doc of result.vectors) {
17735
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
17736
+ chunks.push({
17737
+ chunkText: doc.metadata.chunkText ?? "",
17738
+ ordinal: doc.metadata.ordinal ?? 0,
17739
+ sectionTitle: doc.metadata.sectionTitle ?? "",
17740
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
17741
+ });
17742
+ }
17743
+ }
17744
+ if (!result.nextCursor || result.nextCursor === "0") break;
17745
+ cursor = result.nextCursor;
17746
+ }
17747
+ } catch {
17748
+ }
17749
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
17750
+ }
17698
17751
  async fetchPageWithVector(url, scope) {
17699
17752
  try {
17700
17753
  const results = await this.pagesNs.fetch([url], {
@@ -18650,45 +18703,20 @@ var SearchEngine = class _SearchEngine {
18650
18703
  function createServer(engine) {
18651
18704
  const server = new McpServer({
18652
18705
  name: "searchsocket-mcp",
18653
- version: "0.1.0"
18706
+ version: "0.2.0"
18654
18707
  });
18655
18708
  server.registerTool(
18656
18709
  "search",
18657
18710
  {
18658
- description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
18711
+ description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
18659
18712
  inputSchema: {
18660
- query: z.string().min(1),
18661
- scope: z.string().optional(),
18662
- topK: z.number().int().positive().max(100).optional(),
18663
- pathPrefix: z.string().optional(),
18664
- tags: z.array(z.string()).optional(),
18665
- filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional(),
18666
- groupBy: z.enum(["page", "chunk"]).optional(),
18667
- maxSubResults: z.number().int().positive().max(20).optional()
18668
- },
18669
- outputSchema: {
18670
- q: z.string(),
18671
- scope: z.string(),
18672
- results: z.array(z.object({
18673
- url: z.string(),
18674
- title: z.string(),
18675
- sectionTitle: z.string().optional(),
18676
- snippet: z.string(),
18677
- score: z.number(),
18678
- routeFile: z.string(),
18679
- chunks: z.array(z.object({
18680
- sectionTitle: z.string().optional(),
18681
- snippet: z.string(),
18682
- headingPath: z.array(z.string()),
18683
- score: z.number()
18684
- })).optional()
18685
- })),
18686
- meta: z.object({
18687
- timingsMs: z.object({
18688
- search: z.number(),
18689
- total: z.number()
18690
- })
18691
- })
18713
+ query: z.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
18714
+ topK: z.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
18715
+ pathPrefix: z.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
18716
+ tags: z.array(z.string()).optional().describe("Filter results to pages matching all specified tags"),
18717
+ filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
18718
+ groupBy: z.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
18719
+ scope: z.string().optional()
18692
18720
  }
18693
18721
  },
18694
18722
  async (input) => {
@@ -18699,85 +18727,18 @@ function createServer(engine) {
18699
18727
  pathPrefix: input.pathPrefix,
18700
18728
  tags: input.tags,
18701
18729
  filters: input.filters,
18702
- groupBy: input.groupBy,
18703
- maxSubResults: input.maxSubResults
18730
+ groupBy: input.groupBy
18704
18731
  });
18705
- return {
18706
- content: [
18707
- {
18708
- type: "text",
18709
- text: JSON.stringify(result, null, 2)
18710
- }
18711
- ],
18712
- structuredContent: result
18713
- };
18714
- }
18715
- );
18716
- server.registerTool(
18717
- "get_page",
18718
- {
18719
- description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
18720
- inputSchema: {
18721
- pathOrUrl: z.string().min(1),
18722
- scope: z.string().optional()
18723
- }
18724
- },
18725
- async (input) => {
18726
- const page = await engine.getPage(input.pathOrUrl, input.scope);
18727
- return {
18728
- content: [
18729
- {
18730
- type: "text",
18731
- text: JSON.stringify(page, null, 2)
18732
- }
18733
- ]
18734
- };
18735
- }
18736
- );
18737
- server.registerTool(
18738
- "list_pages",
18739
- {
18740
- description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
18741
- inputSchema: {
18742
- pathPrefix: z.string().optional(),
18743
- cursor: z.string().optional(),
18744
- limit: z.number().int().positive().max(200).optional(),
18745
- scope: z.string().optional()
18746
- }
18747
- },
18748
- async (input) => {
18749
- const result = await engine.listPages({
18750
- pathPrefix: input.pathPrefix,
18751
- cursor: input.cursor,
18752
- limit: input.limit,
18753
- scope: input.scope
18754
- });
18755
- return {
18756
- content: [
18757
- {
18758
- type: "text",
18759
- text: JSON.stringify(result, null, 2)
18760
- }
18761
- ]
18762
- };
18763
- }
18764
- );
18765
- server.registerTool(
18766
- "get_site_structure",
18767
- {
18768
- description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
18769
- inputSchema: {
18770
- pathPrefix: z.string().optional(),
18771
- scope: z.string().optional(),
18772
- maxPages: z.number().int().positive().max(2e3).optional()
18732
+ if (result.results.length === 0) {
18733
+ return {
18734
+ content: [
18735
+ {
18736
+ type: "text",
18737
+ text: `No results found for "${input.query}". Try broader keywords or remove filters.`
18738
+ }
18739
+ ]
18740
+ };
18773
18741
  }
18774
- },
18775
- async (input) => {
18776
- const result = await engine.getSiteStructure({
18777
- pathPrefix: input.pathPrefix,
18778
- scope: input.scope,
18779
- maxPages: input.maxPages
18780
- });
18781
18742
  return {
18782
18743
  content: [
18783
18744
  {
@@ -18789,56 +18750,51 @@ function createServer(engine) {
18789
18750
  }
18790
18751
  );
18791
18752
  server.registerTool(
18792
- "find_source_file",
18753
+ "get_page",
18793
18754
  {
18794
- description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
18755
+ description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
18795
18756
  inputSchema: {
18796
- query: z.string().min(1),
18757
+ path: z.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
18797
18758
  scope: z.string().optional()
18798
18759
  }
18799
18760
  },
18800
18761
  async (input) => {
18801
- const result = await engine.search({
18802
- q: input.query,
18803
- topK: 1,
18804
- scope: input.scope
18805
- });
18806
- if (result.results.length === 0) {
18762
+ try {
18763
+ const page = await engine.getPage(input.path, input.scope);
18807
18764
  return {
18808
18765
  content: [
18809
18766
  {
18810
18767
  type: "text",
18811
- text: JSON.stringify({
18812
- error: "No matching content found for the given query."
18813
- })
18768
+ text: JSON.stringify(page, null, 2)
18769
+ }
18770
+ ]
18771
+ };
18772
+ } catch {
18773
+ const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
18774
+ const similar = suggestions.results.map((r) => r.url);
18775
+ return {
18776
+ content: [
18777
+ {
18778
+ type: "text",
18779
+ text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
18814
18780
  }
18815
18781
  ]
18816
18782
  };
18817
18783
  }
18818
- const match = result.results[0];
18819
- const { url, routeFile, sectionTitle, snippet } = match;
18820
- return {
18821
- content: [
18822
- {
18823
- type: "text",
18824
- text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
18825
- }
18826
- ]
18827
- };
18828
18784
  }
18829
18785
  );
18830
18786
  server.registerTool(
18831
18787
  "get_related_pages",
18832
18788
  {
18833
- description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
18789
+ description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
18834
18790
  inputSchema: {
18835
- pathOrUrl: z.string().min(1),
18836
- scope: z.string().optional(),
18837
- topK: z.number().int().positive().max(25).optional()
18791
+ path: z.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
18792
+ topK: z.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
18793
+ scope: z.string().optional()
18838
18794
  }
18839
18795
  },
18840
18796
  async (input) => {
18841
- const result = await engine.getRelatedPages(input.pathOrUrl, {
18797
+ const result = await engine.getRelatedPages(input.path, {
18842
18798
  topK: input.topK,
18843
18799
  scope: input.scope
18844
18800
  });
@@ -22126,7 +22082,6 @@ var IndexPipeline = class _IndexPipeline {
22126
22082
  keywords: r.keywords ?? [],
22127
22083
  summary: r.summary ?? "",
22128
22084
  tags: r.tags,
22129
- markdown: r.markdown,
22130
22085
  routeFile: r.routeFile,
22131
22086
  routeResolution: r.routeResolution,
22132
22087
  incomingLinks: r.incomingLinks,
@@ -22153,7 +22108,6 @@ var IndexPipeline = class _IndexPipeline {
22153
22108
  keywords: r.keywords ?? [],
22154
22109
  summary: r.summary ?? "",
22155
22110
  tags: r.tags,
22156
- markdown: r.markdown,
22157
22111
  routeFile: r.routeFile,
22158
22112
  routeResolution: r.routeResolution,
22159
22113
  incomingLinks: r.incomingLinks,
@@ -22237,6 +22191,7 @@ var IndexPipeline = class _IndexPipeline {
22237
22191
  let documentsUpserted = 0;
22238
22192
  if (!options.dryRun && changedChunks.length > 0) {
22239
22193
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
22194
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
22240
22195
  const docs = changedChunks.map((chunk) => {
22241
22196
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
22242
22197
  if (embeddingText.length > 2e3) {
@@ -22244,6 +22199,7 @@ var IndexPipeline = class _IndexPipeline {
22244
22199
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
22245
22200
  );
22246
22201
  }
22202
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
22247
22203
  return {
22248
22204
  id: chunk.chunkKey,
22249
22205
  data: embeddingText,
@@ -22254,7 +22210,7 @@ var IndexPipeline = class _IndexPipeline {
22254
22210
  sectionTitle: chunk.sectionTitle ?? "",
22255
22211
  headingPath: chunk.headingPath.join(" > "),
22256
22212
  snippet: chunk.snippet,
22257
- chunkText: embeddingText,
22213
+ chunkText: cappedText,
22258
22214
  tags: chunk.tags,
22259
22215
  ordinal: chunk.ordinal,
22260
22216
  contentHash: chunk.contentHash,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "searchsocket",
3
- "version": "0.6.2",
3
+ "version": "0.7.0",
4
4
  "description": "Semantic site search and MCP retrieval for SvelteKit static sites",
5
5
  "license": "MIT",
6
6
  "author": "Greg Priday <greg@siteorigin.com>",