searchsocket 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +99 -143
- package/dist/index.cjs +98 -142
- package/dist/index.d.cts +10 -1
- package/dist/index.d.ts +10 -1
- package/dist/index.js +98 -142
- package/dist/sveltekit.cjs +98 -142
- package/dist/sveltekit.js +98 -142
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -12,7 +12,7 @@ import { Command, Option } from "commander";
|
|
|
12
12
|
// package.json
|
|
13
13
|
var package_default = {
|
|
14
14
|
name: "searchsocket",
|
|
15
|
-
version: "0.
|
|
15
|
+
version: "0.7.0",
|
|
16
16
|
description: "Semantic site search and MCP retrieval for SvelteKit static sites",
|
|
17
17
|
license: "MIT",
|
|
18
18
|
author: "Greg Priday <greg@siteorigin.com>",
|
|
@@ -863,6 +863,26 @@ import path11 from "path";
|
|
|
863
863
|
|
|
864
864
|
// src/vector/upstash.ts
|
|
865
865
|
import { QueryMode, FusionAlgorithm } from "@upstash/vector";
|
|
866
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
867
|
+
if (chunks.length === 0) return "";
|
|
868
|
+
const parts = [];
|
|
869
|
+
for (const chunk of chunks) {
|
|
870
|
+
let text2 = chunk.chunkText;
|
|
871
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
872
|
+
|
|
873
|
+
`;
|
|
874
|
+
const prefixWithoutSection = `${pageTitle}
|
|
875
|
+
|
|
876
|
+
`;
|
|
877
|
+
if (chunk.sectionTitle && text2.startsWith(prefixWithSection)) {
|
|
878
|
+
text2 = text2.slice(prefixWithSection.length);
|
|
879
|
+
} else if (text2.startsWith(prefixWithoutSection)) {
|
|
880
|
+
text2 = text2.slice(prefixWithoutSection.length);
|
|
881
|
+
}
|
|
882
|
+
parts.push(text2.trim());
|
|
883
|
+
}
|
|
884
|
+
return parts.join("\n\n");
|
|
885
|
+
}
|
|
866
886
|
var UpstashSearchStore = class {
|
|
867
887
|
index;
|
|
868
888
|
pagesNs;
|
|
@@ -1242,10 +1262,12 @@ var UpstashSearchStore = class {
|
|
|
1242
1262
|
});
|
|
1243
1263
|
const doc = results[0];
|
|
1244
1264
|
if (!doc || !doc.metadata) return null;
|
|
1265
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
1266
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
1245
1267
|
return {
|
|
1246
1268
|
url: doc.metadata.url,
|
|
1247
1269
|
title: doc.metadata.title,
|
|
1248
|
-
markdown
|
|
1270
|
+
markdown,
|
|
1249
1271
|
projectId: doc.metadata.projectId,
|
|
1250
1272
|
scopeName: doc.metadata.scopeName,
|
|
1251
1273
|
routeFile: doc.metadata.routeFile,
|
|
@@ -1265,6 +1287,37 @@ var UpstashSearchStore = class {
|
|
|
1265
1287
|
return null;
|
|
1266
1288
|
}
|
|
1267
1289
|
}
|
|
1290
|
+
/**
|
|
1291
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
1292
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
1293
|
+
*/
|
|
1294
|
+
async getChunksForPage(url, scope) {
|
|
1295
|
+
const chunks = [];
|
|
1296
|
+
let cursor = "0";
|
|
1297
|
+
try {
|
|
1298
|
+
for (; ; ) {
|
|
1299
|
+
const result = await this.chunksNs.range({
|
|
1300
|
+
cursor,
|
|
1301
|
+
limit: 100,
|
|
1302
|
+
includeMetadata: true
|
|
1303
|
+
});
|
|
1304
|
+
for (const doc of result.vectors) {
|
|
1305
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
1306
|
+
chunks.push({
|
|
1307
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
1308
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
1309
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
1310
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
1311
|
+
});
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
1315
|
+
cursor = result.nextCursor;
|
|
1316
|
+
}
|
|
1317
|
+
} catch {
|
|
1318
|
+
}
|
|
1319
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
1320
|
+
}
|
|
1268
1321
|
async fetchPageWithVector(url, scope) {
|
|
1269
1322
|
try {
|
|
1270
1323
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -3737,7 +3790,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3737
3790
|
keywords: r.keywords ?? [],
|
|
3738
3791
|
summary: r.summary ?? "",
|
|
3739
3792
|
tags: r.tags,
|
|
3740
|
-
markdown: r.markdown,
|
|
3741
3793
|
routeFile: r.routeFile,
|
|
3742
3794
|
routeResolution: r.routeResolution,
|
|
3743
3795
|
incomingLinks: r.incomingLinks,
|
|
@@ -3764,7 +3816,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3764
3816
|
keywords: r.keywords ?? [],
|
|
3765
3817
|
summary: r.summary ?? "",
|
|
3766
3818
|
tags: r.tags,
|
|
3767
|
-
markdown: r.markdown,
|
|
3768
3819
|
routeFile: r.routeFile,
|
|
3769
3820
|
routeResolution: r.routeResolution,
|
|
3770
3821
|
incomingLinks: r.incomingLinks,
|
|
@@ -3848,6 +3899,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3848
3899
|
let documentsUpserted = 0;
|
|
3849
3900
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
3850
3901
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
3902
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
3851
3903
|
const docs = changedChunks.map((chunk) => {
|
|
3852
3904
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
3853
3905
|
if (embeddingText.length > 2e3) {
|
|
@@ -3855,6 +3907,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3855
3907
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
3856
3908
|
);
|
|
3857
3909
|
}
|
|
3910
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
3858
3911
|
return {
|
|
3859
3912
|
id: chunk.chunkKey,
|
|
3860
3913
|
data: embeddingText,
|
|
@@ -3865,7 +3918,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
3865
3918
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
3866
3919
|
headingPath: chunk.headingPath.join(" > "),
|
|
3867
3920
|
snippet: chunk.snippet,
|
|
3868
|
-
chunkText:
|
|
3921
|
+
chunkText: cappedText,
|
|
3869
3922
|
tags: chunk.tags,
|
|
3870
3923
|
ordinal: chunk.ordinal,
|
|
3871
3924
|
contentHash: chunk.contentHash,
|
|
@@ -4441,45 +4494,20 @@ var SearchEngine = class _SearchEngine {
|
|
|
4441
4494
|
function createServer(engine) {
|
|
4442
4495
|
const server = new McpServer({
|
|
4443
4496
|
name: "searchsocket-mcp",
|
|
4444
|
-
version: "0.
|
|
4497
|
+
version: "0.2.0"
|
|
4445
4498
|
});
|
|
4446
4499
|
server.registerTool(
|
|
4447
4500
|
"search",
|
|
4448
4501
|
{
|
|
4449
|
-
description:
|
|
4502
|
+
description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
|
|
4450
4503
|
inputSchema: {
|
|
4451
|
-
query: z3.string().min(1),
|
|
4452
|
-
|
|
4453
|
-
|
|
4454
|
-
|
|
4455
|
-
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
maxSubResults: z3.number().int().positive().max(20).optional()
|
|
4459
|
-
},
|
|
4460
|
-
outputSchema: {
|
|
4461
|
-
q: z3.string(),
|
|
4462
|
-
scope: z3.string(),
|
|
4463
|
-
results: z3.array(z3.object({
|
|
4464
|
-
url: z3.string(),
|
|
4465
|
-
title: z3.string(),
|
|
4466
|
-
sectionTitle: z3.string().optional(),
|
|
4467
|
-
snippet: z3.string(),
|
|
4468
|
-
score: z3.number(),
|
|
4469
|
-
routeFile: z3.string(),
|
|
4470
|
-
chunks: z3.array(z3.object({
|
|
4471
|
-
sectionTitle: z3.string().optional(),
|
|
4472
|
-
snippet: z3.string(),
|
|
4473
|
-
headingPath: z3.array(z3.string()),
|
|
4474
|
-
score: z3.number()
|
|
4475
|
-
})).optional()
|
|
4476
|
-
})),
|
|
4477
|
-
meta: z3.object({
|
|
4478
|
-
timingsMs: z3.object({
|
|
4479
|
-
search: z3.number(),
|
|
4480
|
-
total: z3.number()
|
|
4481
|
-
})
|
|
4482
|
-
})
|
|
4504
|
+
query: z3.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
|
|
4505
|
+
topK: z3.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
|
|
4506
|
+
pathPrefix: z3.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
|
|
4507
|
+
tags: z3.array(z3.string()).optional().describe("Filter results to pages matching all specified tags"),
|
|
4508
|
+
filters: z3.record(z3.string(), z3.union([z3.string(), z3.number(), z3.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
|
|
4509
|
+
groupBy: z3.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
|
|
4510
|
+
scope: z3.string().optional()
|
|
4483
4511
|
}
|
|
4484
4512
|
},
|
|
4485
4513
|
async (input) => {
|
|
@@ -4490,85 +4518,18 @@ function createServer(engine) {
|
|
|
4490
4518
|
pathPrefix: input.pathPrefix,
|
|
4491
4519
|
tags: input.tags,
|
|
4492
4520
|
filters: input.filters,
|
|
4493
|
-
groupBy: input.groupBy
|
|
4494
|
-
maxSubResults: input.maxSubResults
|
|
4495
|
-
});
|
|
4496
|
-
return {
|
|
4497
|
-
content: [
|
|
4498
|
-
{
|
|
4499
|
-
type: "text",
|
|
4500
|
-
text: JSON.stringify(result, null, 2)
|
|
4501
|
-
}
|
|
4502
|
-
],
|
|
4503
|
-
structuredContent: result
|
|
4504
|
-
};
|
|
4505
|
-
}
|
|
4506
|
-
);
|
|
4507
|
-
server.registerTool(
|
|
4508
|
-
"get_page",
|
|
4509
|
-
{
|
|
4510
|
-
description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
|
|
4511
|
-
inputSchema: {
|
|
4512
|
-
pathOrUrl: z3.string().min(1),
|
|
4513
|
-
scope: z3.string().optional()
|
|
4514
|
-
}
|
|
4515
|
-
},
|
|
4516
|
-
async (input) => {
|
|
4517
|
-
const page = await engine.getPage(input.pathOrUrl, input.scope);
|
|
4518
|
-
return {
|
|
4519
|
-
content: [
|
|
4520
|
-
{
|
|
4521
|
-
type: "text",
|
|
4522
|
-
text: JSON.stringify(page, null, 2)
|
|
4523
|
-
}
|
|
4524
|
-
]
|
|
4525
|
-
};
|
|
4526
|
-
}
|
|
4527
|
-
);
|
|
4528
|
-
server.registerTool(
|
|
4529
|
-
"list_pages",
|
|
4530
|
-
{
|
|
4531
|
-
description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
|
|
4532
|
-
inputSchema: {
|
|
4533
|
-
pathPrefix: z3.string().optional(),
|
|
4534
|
-
cursor: z3.string().optional(),
|
|
4535
|
-
limit: z3.number().int().positive().max(200).optional(),
|
|
4536
|
-
scope: z3.string().optional()
|
|
4537
|
-
}
|
|
4538
|
-
},
|
|
4539
|
-
async (input) => {
|
|
4540
|
-
const result = await engine.listPages({
|
|
4541
|
-
pathPrefix: input.pathPrefix,
|
|
4542
|
-
cursor: input.cursor,
|
|
4543
|
-
limit: input.limit,
|
|
4544
|
-
scope: input.scope
|
|
4521
|
+
groupBy: input.groupBy
|
|
4545
4522
|
});
|
|
4546
|
-
|
|
4547
|
-
|
|
4548
|
-
|
|
4549
|
-
|
|
4550
|
-
|
|
4551
|
-
|
|
4552
|
-
|
|
4553
|
-
|
|
4554
|
-
|
|
4555
|
-
);
|
|
4556
|
-
server.registerTool(
|
|
4557
|
-
"get_site_structure",
|
|
4558
|
-
{
|
|
4559
|
-
description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
|
|
4560
|
-
inputSchema: {
|
|
4561
|
-
pathPrefix: z3.string().optional(),
|
|
4562
|
-
scope: z3.string().optional(),
|
|
4563
|
-
maxPages: z3.number().int().positive().max(2e3).optional()
|
|
4523
|
+
if (result.results.length === 0) {
|
|
4524
|
+
return {
|
|
4525
|
+
content: [
|
|
4526
|
+
{
|
|
4527
|
+
type: "text",
|
|
4528
|
+
text: `No results found for "${input.query}". Try broader keywords or remove filters.`
|
|
4529
|
+
}
|
|
4530
|
+
]
|
|
4531
|
+
};
|
|
4564
4532
|
}
|
|
4565
|
-
},
|
|
4566
|
-
async (input) => {
|
|
4567
|
-
const result = await engine.getSiteStructure({
|
|
4568
|
-
pathPrefix: input.pathPrefix,
|
|
4569
|
-
scope: input.scope,
|
|
4570
|
-
maxPages: input.maxPages
|
|
4571
|
-
});
|
|
4572
4533
|
return {
|
|
4573
4534
|
content: [
|
|
4574
4535
|
{
|
|
@@ -4580,56 +4541,51 @@ function createServer(engine) {
|
|
|
4580
4541
|
}
|
|
4581
4542
|
);
|
|
4582
4543
|
server.registerTool(
|
|
4583
|
-
"
|
|
4544
|
+
"get_page",
|
|
4584
4545
|
{
|
|
4585
|
-
description: "
|
|
4546
|
+
description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
|
|
4586
4547
|
inputSchema: {
|
|
4587
|
-
|
|
4548
|
+
path: z3.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
4588
4549
|
scope: z3.string().optional()
|
|
4589
4550
|
}
|
|
4590
4551
|
},
|
|
4591
4552
|
async (input) => {
|
|
4592
|
-
|
|
4593
|
-
|
|
4594
|
-
topK: 1,
|
|
4595
|
-
scope: input.scope
|
|
4596
|
-
});
|
|
4597
|
-
if (result.results.length === 0) {
|
|
4553
|
+
try {
|
|
4554
|
+
const page = await engine.getPage(input.path, input.scope);
|
|
4598
4555
|
return {
|
|
4599
4556
|
content: [
|
|
4600
4557
|
{
|
|
4601
4558
|
type: "text",
|
|
4602
|
-
text: JSON.stringify(
|
|
4603
|
-
|
|
4604
|
-
|
|
4559
|
+
text: JSON.stringify(page, null, 2)
|
|
4560
|
+
}
|
|
4561
|
+
]
|
|
4562
|
+
};
|
|
4563
|
+
} catch {
|
|
4564
|
+
const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
|
|
4565
|
+
const similar = suggestions.results.map((r) => r.url);
|
|
4566
|
+
return {
|
|
4567
|
+
content: [
|
|
4568
|
+
{
|
|
4569
|
+
type: "text",
|
|
4570
|
+
text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
|
|
4605
4571
|
}
|
|
4606
4572
|
]
|
|
4607
4573
|
};
|
|
4608
4574
|
}
|
|
4609
|
-
const match = result.results[0];
|
|
4610
|
-
const { url, routeFile, sectionTitle, snippet } = match;
|
|
4611
|
-
return {
|
|
4612
|
-
content: [
|
|
4613
|
-
{
|
|
4614
|
-
type: "text",
|
|
4615
|
-
text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
|
|
4616
|
-
}
|
|
4617
|
-
]
|
|
4618
|
-
};
|
|
4619
4575
|
}
|
|
4620
4576
|
);
|
|
4621
4577
|
server.registerTool(
|
|
4622
4578
|
"get_related_pages",
|
|
4623
4579
|
{
|
|
4624
|
-
description: "
|
|
4580
|
+
description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
|
|
4625
4581
|
inputSchema: {
|
|
4626
|
-
|
|
4627
|
-
|
|
4628
|
-
|
|
4582
|
+
path: z3.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
4583
|
+
topK: z3.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
|
|
4584
|
+
scope: z3.string().optional()
|
|
4629
4585
|
}
|
|
4630
4586
|
},
|
|
4631
4587
|
async (input) => {
|
|
4632
|
-
const result = await engine.getRelatedPages(input.
|
|
4588
|
+
const result = await engine.getRelatedPages(input.path, {
|
|
4633
4589
|
topK: input.topK,
|
|
4634
4590
|
scope: input.scope
|
|
4635
4591
|
});
|