searchsocket 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +99 -143
- package/dist/index.cjs +98 -142
- package/dist/index.d.cts +10 -1
- package/dist/index.d.ts +10 -1
- package/dist/index.js +98 -142
- package/dist/sveltekit.cjs +98 -142
- package/dist/sveltekit.js +98 -142
- package/package.json +1 -1
package/dist/sveltekit.cjs
CHANGED
|
@@ -17305,6 +17305,26 @@ function joinUrl(baseUrl, route) {
|
|
|
17305
17305
|
const routePart = ensureLeadingSlash(route);
|
|
17306
17306
|
return `${base}${routePart}`;
|
|
17307
17307
|
}
|
|
17308
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
17309
|
+
if (chunks.length === 0) return "";
|
|
17310
|
+
const parts = [];
|
|
17311
|
+
for (const chunk of chunks) {
|
|
17312
|
+
let text = chunk.chunkText;
|
|
17313
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
17314
|
+
|
|
17315
|
+
`;
|
|
17316
|
+
const prefixWithoutSection = `${pageTitle}
|
|
17317
|
+
|
|
17318
|
+
`;
|
|
17319
|
+
if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
|
|
17320
|
+
text = text.slice(prefixWithSection.length);
|
|
17321
|
+
} else if (text.startsWith(prefixWithoutSection)) {
|
|
17322
|
+
text = text.slice(prefixWithoutSection.length);
|
|
17323
|
+
}
|
|
17324
|
+
parts.push(text.trim());
|
|
17325
|
+
}
|
|
17326
|
+
return parts.join("\n\n");
|
|
17327
|
+
}
|
|
17308
17328
|
var UpstashSearchStore = class {
|
|
17309
17329
|
index;
|
|
17310
17330
|
pagesNs;
|
|
@@ -17684,10 +17704,12 @@ var UpstashSearchStore = class {
|
|
|
17684
17704
|
});
|
|
17685
17705
|
const doc = results[0];
|
|
17686
17706
|
if (!doc || !doc.metadata) return null;
|
|
17707
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
17708
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
17687
17709
|
return {
|
|
17688
17710
|
url: doc.metadata.url,
|
|
17689
17711
|
title: doc.metadata.title,
|
|
17690
|
-
markdown
|
|
17712
|
+
markdown,
|
|
17691
17713
|
projectId: doc.metadata.projectId,
|
|
17692
17714
|
scopeName: doc.metadata.scopeName,
|
|
17693
17715
|
routeFile: doc.metadata.routeFile,
|
|
@@ -17707,6 +17729,37 @@ var UpstashSearchStore = class {
|
|
|
17707
17729
|
return null;
|
|
17708
17730
|
}
|
|
17709
17731
|
}
|
|
17732
|
+
/**
|
|
17733
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
17734
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
17735
|
+
*/
|
|
17736
|
+
async getChunksForPage(url, scope) {
|
|
17737
|
+
const chunks = [];
|
|
17738
|
+
let cursor = "0";
|
|
17739
|
+
try {
|
|
17740
|
+
for (; ; ) {
|
|
17741
|
+
const result = await this.chunksNs.range({
|
|
17742
|
+
cursor,
|
|
17743
|
+
limit: 100,
|
|
17744
|
+
includeMetadata: true
|
|
17745
|
+
});
|
|
17746
|
+
for (const doc of result.vectors) {
|
|
17747
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
17748
|
+
chunks.push({
|
|
17749
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
17750
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
17751
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
17752
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
17753
|
+
});
|
|
17754
|
+
}
|
|
17755
|
+
}
|
|
17756
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17757
|
+
cursor = result.nextCursor;
|
|
17758
|
+
}
|
|
17759
|
+
} catch {
|
|
17760
|
+
}
|
|
17761
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
17762
|
+
}
|
|
17710
17763
|
async fetchPageWithVector(url, scope) {
|
|
17711
17764
|
try {
|
|
17712
17765
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -18662,45 +18715,20 @@ var SearchEngine = class _SearchEngine {
|
|
|
18662
18715
|
function createServer(engine) {
|
|
18663
18716
|
const server = new mcp_js.McpServer({
|
|
18664
18717
|
name: "searchsocket-mcp",
|
|
18665
|
-
version: "0.
|
|
18718
|
+
version: "0.2.0"
|
|
18666
18719
|
});
|
|
18667
18720
|
server.registerTool(
|
|
18668
18721
|
"search",
|
|
18669
18722
|
{
|
|
18670
|
-
description:
|
|
18723
|
+
description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
|
|
18671
18724
|
inputSchema: {
|
|
18672
|
-
query: zod.z.string().min(1),
|
|
18673
|
-
|
|
18674
|
-
|
|
18675
|
-
|
|
18676
|
-
|
|
18677
|
-
|
|
18678
|
-
|
|
18679
|
-
maxSubResults: zod.z.number().int().positive().max(20).optional()
|
|
18680
|
-
},
|
|
18681
|
-
outputSchema: {
|
|
18682
|
-
q: zod.z.string(),
|
|
18683
|
-
scope: zod.z.string(),
|
|
18684
|
-
results: zod.z.array(zod.z.object({
|
|
18685
|
-
url: zod.z.string(),
|
|
18686
|
-
title: zod.z.string(),
|
|
18687
|
-
sectionTitle: zod.z.string().optional(),
|
|
18688
|
-
snippet: zod.z.string(),
|
|
18689
|
-
score: zod.z.number(),
|
|
18690
|
-
routeFile: zod.z.string(),
|
|
18691
|
-
chunks: zod.z.array(zod.z.object({
|
|
18692
|
-
sectionTitle: zod.z.string().optional(),
|
|
18693
|
-
snippet: zod.z.string(),
|
|
18694
|
-
headingPath: zod.z.array(zod.z.string()),
|
|
18695
|
-
score: zod.z.number()
|
|
18696
|
-
})).optional()
|
|
18697
|
-
})),
|
|
18698
|
-
meta: zod.z.object({
|
|
18699
|
-
timingsMs: zod.z.object({
|
|
18700
|
-
search: zod.z.number(),
|
|
18701
|
-
total: zod.z.number()
|
|
18702
|
-
})
|
|
18703
|
-
})
|
|
18725
|
+
query: zod.z.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
|
|
18726
|
+
topK: zod.z.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
|
|
18727
|
+
pathPrefix: zod.z.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
|
|
18728
|
+
tags: zod.z.array(zod.z.string()).optional().describe("Filter results to pages matching all specified tags"),
|
|
18729
|
+
filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
|
|
18730
|
+
groupBy: zod.z.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
|
|
18731
|
+
scope: zod.z.string().optional()
|
|
18704
18732
|
}
|
|
18705
18733
|
},
|
|
18706
18734
|
async (input) => {
|
|
@@ -18711,85 +18739,18 @@ function createServer(engine) {
|
|
|
18711
18739
|
pathPrefix: input.pathPrefix,
|
|
18712
18740
|
tags: input.tags,
|
|
18713
18741
|
filters: input.filters,
|
|
18714
|
-
groupBy: input.groupBy
|
|
18715
|
-
maxSubResults: input.maxSubResults
|
|
18742
|
+
groupBy: input.groupBy
|
|
18716
18743
|
});
|
|
18717
|
-
|
|
18718
|
-
|
|
18719
|
-
|
|
18720
|
-
|
|
18721
|
-
|
|
18722
|
-
|
|
18723
|
-
|
|
18724
|
-
|
|
18725
|
-
|
|
18726
|
-
}
|
|
18727
|
-
);
|
|
18728
|
-
server.registerTool(
|
|
18729
|
-
"get_page",
|
|
18730
|
-
{
|
|
18731
|
-
description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
|
|
18732
|
-
inputSchema: {
|
|
18733
|
-
pathOrUrl: zod.z.string().min(1),
|
|
18734
|
-
scope: zod.z.string().optional()
|
|
18735
|
-
}
|
|
18736
|
-
},
|
|
18737
|
-
async (input) => {
|
|
18738
|
-
const page = await engine.getPage(input.pathOrUrl, input.scope);
|
|
18739
|
-
return {
|
|
18740
|
-
content: [
|
|
18741
|
-
{
|
|
18742
|
-
type: "text",
|
|
18743
|
-
text: JSON.stringify(page, null, 2)
|
|
18744
|
-
}
|
|
18745
|
-
]
|
|
18746
|
-
};
|
|
18747
|
-
}
|
|
18748
|
-
);
|
|
18749
|
-
server.registerTool(
|
|
18750
|
-
"list_pages",
|
|
18751
|
-
{
|
|
18752
|
-
description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
|
|
18753
|
-
inputSchema: {
|
|
18754
|
-
pathPrefix: zod.z.string().optional(),
|
|
18755
|
-
cursor: zod.z.string().optional(),
|
|
18756
|
-
limit: zod.z.number().int().positive().max(200).optional(),
|
|
18757
|
-
scope: zod.z.string().optional()
|
|
18758
|
-
}
|
|
18759
|
-
},
|
|
18760
|
-
async (input) => {
|
|
18761
|
-
const result = await engine.listPages({
|
|
18762
|
-
pathPrefix: input.pathPrefix,
|
|
18763
|
-
cursor: input.cursor,
|
|
18764
|
-
limit: input.limit,
|
|
18765
|
-
scope: input.scope
|
|
18766
|
-
});
|
|
18767
|
-
return {
|
|
18768
|
-
content: [
|
|
18769
|
-
{
|
|
18770
|
-
type: "text",
|
|
18771
|
-
text: JSON.stringify(result, null, 2)
|
|
18772
|
-
}
|
|
18773
|
-
]
|
|
18774
|
-
};
|
|
18775
|
-
}
|
|
18776
|
-
);
|
|
18777
|
-
server.registerTool(
|
|
18778
|
-
"get_site_structure",
|
|
18779
|
-
{
|
|
18780
|
-
description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
|
|
18781
|
-
inputSchema: {
|
|
18782
|
-
pathPrefix: zod.z.string().optional(),
|
|
18783
|
-
scope: zod.z.string().optional(),
|
|
18784
|
-
maxPages: zod.z.number().int().positive().max(2e3).optional()
|
|
18744
|
+
if (result.results.length === 0) {
|
|
18745
|
+
return {
|
|
18746
|
+
content: [
|
|
18747
|
+
{
|
|
18748
|
+
type: "text",
|
|
18749
|
+
text: `No results found for "${input.query}". Try broader keywords or remove filters.`
|
|
18750
|
+
}
|
|
18751
|
+
]
|
|
18752
|
+
};
|
|
18785
18753
|
}
|
|
18786
|
-
},
|
|
18787
|
-
async (input) => {
|
|
18788
|
-
const result = await engine.getSiteStructure({
|
|
18789
|
-
pathPrefix: input.pathPrefix,
|
|
18790
|
-
scope: input.scope,
|
|
18791
|
-
maxPages: input.maxPages
|
|
18792
|
-
});
|
|
18793
18754
|
return {
|
|
18794
18755
|
content: [
|
|
18795
18756
|
{
|
|
@@ -18801,56 +18762,51 @@ function createServer(engine) {
|
|
|
18801
18762
|
}
|
|
18802
18763
|
);
|
|
18803
18764
|
server.registerTool(
|
|
18804
|
-
"
|
|
18765
|
+
"get_page",
|
|
18805
18766
|
{
|
|
18806
|
-
description: "
|
|
18767
|
+
description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
|
|
18807
18768
|
inputSchema: {
|
|
18808
|
-
|
|
18769
|
+
path: zod.z.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
18809
18770
|
scope: zod.z.string().optional()
|
|
18810
18771
|
}
|
|
18811
18772
|
},
|
|
18812
18773
|
async (input) => {
|
|
18813
|
-
|
|
18814
|
-
|
|
18815
|
-
topK: 1,
|
|
18816
|
-
scope: input.scope
|
|
18817
|
-
});
|
|
18818
|
-
if (result.results.length === 0) {
|
|
18774
|
+
try {
|
|
18775
|
+
const page = await engine.getPage(input.path, input.scope);
|
|
18819
18776
|
return {
|
|
18820
18777
|
content: [
|
|
18821
18778
|
{
|
|
18822
18779
|
type: "text",
|
|
18823
|
-
text: JSON.stringify(
|
|
18824
|
-
|
|
18825
|
-
|
|
18780
|
+
text: JSON.stringify(page, null, 2)
|
|
18781
|
+
}
|
|
18782
|
+
]
|
|
18783
|
+
};
|
|
18784
|
+
} catch {
|
|
18785
|
+
const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
|
|
18786
|
+
const similar = suggestions.results.map((r) => r.url);
|
|
18787
|
+
return {
|
|
18788
|
+
content: [
|
|
18789
|
+
{
|
|
18790
|
+
type: "text",
|
|
18791
|
+
text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
|
|
18826
18792
|
}
|
|
18827
18793
|
]
|
|
18828
18794
|
};
|
|
18829
18795
|
}
|
|
18830
|
-
const match = result.results[0];
|
|
18831
|
-
const { url, routeFile, sectionTitle, snippet } = match;
|
|
18832
|
-
return {
|
|
18833
|
-
content: [
|
|
18834
|
-
{
|
|
18835
|
-
type: "text",
|
|
18836
|
-
text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
|
|
18837
|
-
}
|
|
18838
|
-
]
|
|
18839
|
-
};
|
|
18840
18796
|
}
|
|
18841
18797
|
);
|
|
18842
18798
|
server.registerTool(
|
|
18843
18799
|
"get_related_pages",
|
|
18844
18800
|
{
|
|
18845
|
-
description: "
|
|
18801
|
+
description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
|
|
18846
18802
|
inputSchema: {
|
|
18847
|
-
|
|
18848
|
-
|
|
18849
|
-
|
|
18803
|
+
path: zod.z.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
18804
|
+
topK: zod.z.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
|
|
18805
|
+
scope: zod.z.string().optional()
|
|
18850
18806
|
}
|
|
18851
18807
|
},
|
|
18852
18808
|
async (input) => {
|
|
18853
|
-
const result = await engine.getRelatedPages(input.
|
|
18809
|
+
const result = await engine.getRelatedPages(input.path, {
|
|
18854
18810
|
topK: input.topK,
|
|
18855
18811
|
scope: input.scope
|
|
18856
18812
|
});
|
|
@@ -22138,7 +22094,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22138
22094
|
keywords: r.keywords ?? [],
|
|
22139
22095
|
summary: r.summary ?? "",
|
|
22140
22096
|
tags: r.tags,
|
|
22141
|
-
markdown: r.markdown,
|
|
22142
22097
|
routeFile: r.routeFile,
|
|
22143
22098
|
routeResolution: r.routeResolution,
|
|
22144
22099
|
incomingLinks: r.incomingLinks,
|
|
@@ -22165,7 +22120,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22165
22120
|
keywords: r.keywords ?? [],
|
|
22166
22121
|
summary: r.summary ?? "",
|
|
22167
22122
|
tags: r.tags,
|
|
22168
|
-
markdown: r.markdown,
|
|
22169
22123
|
routeFile: r.routeFile,
|
|
22170
22124
|
routeResolution: r.routeResolution,
|
|
22171
22125
|
incomingLinks: r.incomingLinks,
|
|
@@ -22249,6 +22203,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22249
22203
|
let documentsUpserted = 0;
|
|
22250
22204
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
22251
22205
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
22206
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
22252
22207
|
const docs = changedChunks.map((chunk) => {
|
|
22253
22208
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
22254
22209
|
if (embeddingText.length > 2e3) {
|
|
@@ -22256,6 +22211,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22256
22211
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
22257
22212
|
);
|
|
22258
22213
|
}
|
|
22214
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
22259
22215
|
return {
|
|
22260
22216
|
id: chunk.chunkKey,
|
|
22261
22217
|
data: embeddingText,
|
|
@@ -22266,7 +22222,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22266
22222
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
22267
22223
|
headingPath: chunk.headingPath.join(" > "),
|
|
22268
22224
|
snippet: chunk.snippet,
|
|
22269
|
-
chunkText:
|
|
22225
|
+
chunkText: cappedText,
|
|
22270
22226
|
tags: chunk.tags,
|
|
22271
22227
|
ordinal: chunk.ordinal,
|
|
22272
22228
|
contentHash: chunk.contentHash,
|