searchsocket 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -12,7 +12,7 @@ import { Command, Option } from "commander";
12
12
  // package.json
13
13
  var package_default = {
14
14
  name: "searchsocket",
15
- version: "0.6.2",
15
+ version: "0.7.0",
16
16
  description: "Semantic site search and MCP retrieval for SvelteKit static sites",
17
17
  license: "MIT",
18
18
  author: "Greg Priday <greg@siteorigin.com>",
@@ -863,6 +863,26 @@ import path11 from "path";
863
863
 
864
864
  // src/vector/upstash.ts
865
865
  import { QueryMode, FusionAlgorithm } from "@upstash/vector";
866
+ function reconstructMarkdownFromChunks(chunks, pageTitle) {
867
+ if (chunks.length === 0) return "";
868
+ const parts = [];
869
+ for (const chunk of chunks) {
870
+ let text2 = chunk.chunkText;
871
+ const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
872
+
873
+ `;
874
+ const prefixWithoutSection = `${pageTitle}
875
+
876
+ `;
877
+ if (chunk.sectionTitle && text2.startsWith(prefixWithSection)) {
878
+ text2 = text2.slice(prefixWithSection.length);
879
+ } else if (text2.startsWith(prefixWithoutSection)) {
880
+ text2 = text2.slice(prefixWithoutSection.length);
881
+ }
882
+ parts.push(text2.trim());
883
+ }
884
+ return parts.join("\n\n");
885
+ }
866
886
  var UpstashSearchStore = class {
867
887
  index;
868
888
  pagesNs;
@@ -1242,10 +1262,12 @@ var UpstashSearchStore = class {
1242
1262
  });
1243
1263
  const doc = results[0];
1244
1264
  if (!doc || !doc.metadata) return null;
1265
+ const chunks = await this.getChunksForPage(url, scope);
1266
+ const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
1245
1267
  return {
1246
1268
  url: doc.metadata.url,
1247
1269
  title: doc.metadata.title,
1248
- markdown: doc.metadata.markdown,
1270
+ markdown,
1249
1271
  projectId: doc.metadata.projectId,
1250
1272
  scopeName: doc.metadata.scopeName,
1251
1273
  routeFile: doc.metadata.routeFile,
@@ -1265,6 +1287,37 @@ var UpstashSearchStore = class {
1265
1287
  return null;
1266
1288
  }
1267
1289
  }
1290
+ /**
1291
+ * Fetch all chunks belonging to a specific page URL, sorted by ordinal.
1292
+ * Used to reconstruct full page markdown from chunk content.
1293
+ */
1294
+ async getChunksForPage(url, scope) {
1295
+ const chunks = [];
1296
+ let cursor = "0";
1297
+ try {
1298
+ for (; ; ) {
1299
+ const result = await this.chunksNs.range({
1300
+ cursor,
1301
+ limit: 100,
1302
+ includeMetadata: true
1303
+ });
1304
+ for (const doc of result.vectors) {
1305
+ if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
1306
+ chunks.push({
1307
+ chunkText: doc.metadata.chunkText ?? "",
1308
+ ordinal: doc.metadata.ordinal ?? 0,
1309
+ sectionTitle: doc.metadata.sectionTitle ?? "",
1310
+ headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
1311
+ });
1312
+ }
1313
+ }
1314
+ if (!result.nextCursor || result.nextCursor === "0") break;
1315
+ cursor = result.nextCursor;
1316
+ }
1317
+ } catch {
1318
+ }
1319
+ return chunks.sort((a, b) => a.ordinal - b.ordinal);
1320
+ }
1268
1321
  async fetchPageWithVector(url, scope) {
1269
1322
  try {
1270
1323
  const results = await this.pagesNs.fetch([url], {
@@ -3737,7 +3790,6 @@ var IndexPipeline = class _IndexPipeline {
3737
3790
  keywords: r.keywords ?? [],
3738
3791
  summary: r.summary ?? "",
3739
3792
  tags: r.tags,
3740
- markdown: r.markdown,
3741
3793
  routeFile: r.routeFile,
3742
3794
  routeResolution: r.routeResolution,
3743
3795
  incomingLinks: r.incomingLinks,
@@ -3764,7 +3816,6 @@ var IndexPipeline = class _IndexPipeline {
3764
3816
  keywords: r.keywords ?? [],
3765
3817
  summary: r.summary ?? "",
3766
3818
  tags: r.tags,
3767
- markdown: r.markdown,
3768
3819
  routeFile: r.routeFile,
3769
3820
  routeResolution: r.routeResolution,
3770
3821
  incomingLinks: r.incomingLinks,
@@ -3848,6 +3899,7 @@ var IndexPipeline = class _IndexPipeline {
3848
3899
  let documentsUpserted = 0;
3849
3900
  if (!options.dryRun && changedChunks.length > 0) {
3850
3901
  this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
3902
+ const CHUNK_TEXT_MAX_CHARS = 3e4;
3851
3903
  const docs = changedChunks.map((chunk) => {
3852
3904
  const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
3853
3905
  if (embeddingText.length > 2e3) {
@@ -3855,6 +3907,7 @@ var IndexPipeline = class _IndexPipeline {
3855
3907
  `Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
3856
3908
  );
3857
3909
  }
3910
+ const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
3858
3911
  return {
3859
3912
  id: chunk.chunkKey,
3860
3913
  data: embeddingText,
@@ -3865,7 +3918,7 @@ var IndexPipeline = class _IndexPipeline {
3865
3918
  sectionTitle: chunk.sectionTitle ?? "",
3866
3919
  headingPath: chunk.headingPath.join(" > "),
3867
3920
  snippet: chunk.snippet,
3868
- chunkText: embeddingText,
3921
+ chunkText: cappedText,
3869
3922
  tags: chunk.tags,
3870
3923
  ordinal: chunk.ordinal,
3871
3924
  contentHash: chunk.contentHash,
@@ -4441,45 +4494,20 @@ var SearchEngine = class _SearchEngine {
4441
4494
  function createServer(engine) {
4442
4495
  const server = new McpServer({
4443
4496
  name: "searchsocket-mcp",
4444
- version: "0.1.0"
4497
+ version: "0.2.0"
4445
4498
  });
4446
4499
  server.registerTool(
4447
4500
  "search",
4448
4501
  {
4449
- description: `Semantic site search powered by Upstash Search. Returns url, title, snippet, chunkText, score, and routeFile per result. chunkText contains the full raw chunk markdown. When groupBy is 'page' (default), each result includes a chunks array with section-level sub-results containing sectionTitle, headingPath, snippet, and score. Supports optional filters for structured metadata (e.g. {"version": 2, "deprecated": false}).`,
4502
+ description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
4450
4503
  inputSchema: {
4451
- query: z3.string().min(1),
4452
- scope: z3.string().optional(),
4453
- topK: z3.number().int().positive().max(100).optional(),
4454
- pathPrefix: z3.string().optional(),
4455
- tags: z3.array(z3.string()).optional(),
4456
- filters: z3.record(z3.string(), z3.union([z3.string(), z3.number(), z3.boolean()])).optional(),
4457
- groupBy: z3.enum(["page", "chunk"]).optional(),
4458
- maxSubResults: z3.number().int().positive().max(20).optional()
4459
- },
4460
- outputSchema: {
4461
- q: z3.string(),
4462
- scope: z3.string(),
4463
- results: z3.array(z3.object({
4464
- url: z3.string(),
4465
- title: z3.string(),
4466
- sectionTitle: z3.string().optional(),
4467
- snippet: z3.string(),
4468
- score: z3.number(),
4469
- routeFile: z3.string(),
4470
- chunks: z3.array(z3.object({
4471
- sectionTitle: z3.string().optional(),
4472
- snippet: z3.string(),
4473
- headingPath: z3.array(z3.string()),
4474
- score: z3.number()
4475
- })).optional()
4476
- })),
4477
- meta: z3.object({
4478
- timingsMs: z3.object({
4479
- search: z3.number(),
4480
- total: z3.number()
4481
- })
4482
- })
4504
+ query: z3.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
4505
+ topK: z3.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
4506
+ pathPrefix: z3.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
4507
+ tags: z3.array(z3.string()).optional().describe("Filter results to pages matching all specified tags"),
4508
+ filters: z3.record(z3.string(), z3.union([z3.string(), z3.number(), z3.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
4509
+ groupBy: z3.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
4510
+ scope: z3.string().optional()
4483
4511
  }
4484
4512
  },
4485
4513
  async (input) => {
@@ -4490,85 +4518,18 @@ function createServer(engine) {
4490
4518
  pathPrefix: input.pathPrefix,
4491
4519
  tags: input.tags,
4492
4520
  filters: input.filters,
4493
- groupBy: input.groupBy,
4494
- maxSubResults: input.maxSubResults
4495
- });
4496
- return {
4497
- content: [
4498
- {
4499
- type: "text",
4500
- text: JSON.stringify(result, null, 2)
4501
- }
4502
- ],
4503
- structuredContent: result
4504
- };
4505
- }
4506
- );
4507
- server.registerTool(
4508
- "get_page",
4509
- {
4510
- description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
4511
- inputSchema: {
4512
- pathOrUrl: z3.string().min(1),
4513
- scope: z3.string().optional()
4514
- }
4515
- },
4516
- async (input) => {
4517
- const page = await engine.getPage(input.pathOrUrl, input.scope);
4518
- return {
4519
- content: [
4520
- {
4521
- type: "text",
4522
- text: JSON.stringify(page, null, 2)
4523
- }
4524
- ]
4525
- };
4526
- }
4527
- );
4528
- server.registerTool(
4529
- "list_pages",
4530
- {
4531
- description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
4532
- inputSchema: {
4533
- pathPrefix: z3.string().optional(),
4534
- cursor: z3.string().optional(),
4535
- limit: z3.number().int().positive().max(200).optional(),
4536
- scope: z3.string().optional()
4537
- }
4538
- },
4539
- async (input) => {
4540
- const result = await engine.listPages({
4541
- pathPrefix: input.pathPrefix,
4542
- cursor: input.cursor,
4543
- limit: input.limit,
4544
- scope: input.scope
4521
+ groupBy: input.groupBy
4545
4522
  });
4546
- return {
4547
- content: [
4548
- {
4549
- type: "text",
4550
- text: JSON.stringify(result, null, 2)
4551
- }
4552
- ]
4553
- };
4554
- }
4555
- );
4556
- server.registerTool(
4557
- "get_site_structure",
4558
- {
4559
- description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
4560
- inputSchema: {
4561
- pathPrefix: z3.string().optional(),
4562
- scope: z3.string().optional(),
4563
- maxPages: z3.number().int().positive().max(2e3).optional()
4523
+ if (result.results.length === 0) {
4524
+ return {
4525
+ content: [
4526
+ {
4527
+ type: "text",
4528
+ text: `No results found for "${input.query}". Try broader keywords or remove filters.`
4529
+ }
4530
+ ]
4531
+ };
4564
4532
  }
4565
- },
4566
- async (input) => {
4567
- const result = await engine.getSiteStructure({
4568
- pathPrefix: input.pathPrefix,
4569
- scope: input.scope,
4570
- maxPages: input.maxPages
4571
- });
4572
4533
  return {
4573
4534
  content: [
4574
4535
  {
@@ -4580,56 +4541,51 @@ function createServer(engine) {
4580
4541
  }
4581
4542
  );
4582
4543
  server.registerTool(
4583
- "find_source_file",
4544
+ "get_page",
4584
4545
  {
4585
- description: "Find the SvelteKit source file for a piece of site content. Use this when you need to locate and edit content on the site. Returns the URL, route file path, section title, and a content snippet.",
4546
+ description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
4586
4547
  inputSchema: {
4587
- query: z3.string().min(1),
4548
+ path: z3.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
4588
4549
  scope: z3.string().optional()
4589
4550
  }
4590
4551
  },
4591
4552
  async (input) => {
4592
- const result = await engine.search({
4593
- q: input.query,
4594
- topK: 1,
4595
- scope: input.scope
4596
- });
4597
- if (result.results.length === 0) {
4553
+ try {
4554
+ const page = await engine.getPage(input.path, input.scope);
4598
4555
  return {
4599
4556
  content: [
4600
4557
  {
4601
4558
  type: "text",
4602
- text: JSON.stringify({
4603
- error: "No matching content found for the given query."
4604
- })
4559
+ text: JSON.stringify(page, null, 2)
4560
+ }
4561
+ ]
4562
+ };
4563
+ } catch {
4564
+ const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
4565
+ const similar = suggestions.results.map((r) => r.url);
4566
+ return {
4567
+ content: [
4568
+ {
4569
+ type: "text",
4570
+ text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
4605
4571
  }
4606
4572
  ]
4607
4573
  };
4608
4574
  }
4609
- const match = result.results[0];
4610
- const { url, routeFile, sectionTitle, snippet } = match;
4611
- return {
4612
- content: [
4613
- {
4614
- type: "text",
4615
- text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
4616
- }
4617
- ]
4618
- };
4619
4575
  }
4620
4576
  );
4621
4577
  server.registerTool(
4622
4578
  "get_related_pages",
4623
4579
  {
4624
- description: "Find pages related to a given URL using link graph, semantic similarity, and structural proximity. Returns related pages ranked by a composite relatedness score. Use this to discover content connected to a known page.",
4580
+ description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
4625
4581
  inputSchema: {
4626
- pathOrUrl: z3.string().min(1),
4627
- scope: z3.string().optional(),
4628
- topK: z3.number().int().positive().max(25).optional()
4582
+ path: z3.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
4583
+ topK: z3.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
4584
+ scope: z3.string().optional()
4629
4585
  }
4630
4586
  },
4631
4587
  async (input) => {
4632
- const result = await engine.getRelatedPages(input.pathOrUrl, {
4588
+ const result = await engine.getRelatedPages(input.path, {
4633
4589
  topK: input.topK,
4634
4590
  scope: input.scope
4635
4591
  });