searchsocket 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +99 -143
- package/dist/index.cjs +98 -142
- package/dist/index.d.cts +10 -1
- package/dist/index.d.ts +10 -1
- package/dist/index.js +98 -142
- package/dist/sveltekit.cjs +98 -142
- package/dist/sveltekit.js +98 -142
- package/package.json +1 -1
package/dist/sveltekit.js
CHANGED
|
@@ -17293,6 +17293,26 @@ function joinUrl(baseUrl, route) {
|
|
|
17293
17293
|
const routePart = ensureLeadingSlash(route);
|
|
17294
17294
|
return `${base}${routePart}`;
|
|
17295
17295
|
}
|
|
17296
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
17297
|
+
if (chunks.length === 0) return "";
|
|
17298
|
+
const parts = [];
|
|
17299
|
+
for (const chunk of chunks) {
|
|
17300
|
+
let text = chunk.chunkText;
|
|
17301
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
17302
|
+
|
|
17303
|
+
`;
|
|
17304
|
+
const prefixWithoutSection = `${pageTitle}
|
|
17305
|
+
|
|
17306
|
+
`;
|
|
17307
|
+
if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
|
|
17308
|
+
text = text.slice(prefixWithSection.length);
|
|
17309
|
+
} else if (text.startsWith(prefixWithoutSection)) {
|
|
17310
|
+
text = text.slice(prefixWithoutSection.length);
|
|
17311
|
+
}
|
|
17312
|
+
parts.push(text.trim());
|
|
17313
|
+
}
|
|
17314
|
+
return parts.join("\n\n");
|
|
17315
|
+
}
|
|
17296
17316
|
var UpstashSearchStore = class {
|
|
17297
17317
|
index;
|
|
17298
17318
|
pagesNs;
|
|
@@ -17672,10 +17692,12 @@ var UpstashSearchStore = class {
|
|
|
17672
17692
|
});
|
|
17673
17693
|
const doc = results[0];
|
|
17674
17694
|
if (!doc || !doc.metadata) return null;
|
|
17695
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
17696
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
17675
17697
|
return {
|
|
17676
17698
|
url: doc.metadata.url,
|
|
17677
17699
|
title: doc.metadata.title,
|
|
17678
|
-
markdown
|
|
17700
|
+
markdown,
|
|
17679
17701
|
projectId: doc.metadata.projectId,
|
|
17680
17702
|
scopeName: doc.metadata.scopeName,
|
|
17681
17703
|
routeFile: doc.metadata.routeFile,
|
|
@@ -17695,6 +17717,37 @@ var UpstashSearchStore = class {
|
|
|
17695
17717
|
return null;
|
|
17696
17718
|
}
|
|
17697
17719
|
}
|
|
17720
|
+
/**
|
|
17721
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
17722
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
17723
|
+
*/
|
|
17724
|
+
async getChunksForPage(url, scope) {
|
|
17725
|
+
const chunks = [];
|
|
17726
|
+
let cursor = "0";
|
|
17727
|
+
try {
|
|
17728
|
+
for (; ; ) {
|
|
17729
|
+
const result = await this.chunksNs.range({
|
|
17730
|
+
cursor,
|
|
17731
|
+
limit: 100,
|
|
17732
|
+
includeMetadata: true
|
|
17733
|
+
});
|
|
17734
|
+
for (const doc of result.vectors) {
|
|
17735
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
17736
|
+
chunks.push({
|
|
17737
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
17738
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
17739
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
17740
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
17741
|
+
});
|
|
17742
|
+
}
|
|
17743
|
+
}
|
|
17744
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17745
|
+
cursor = result.nextCursor;
|
|
17746
|
+
}
|
|
17747
|
+
} catch {
|
|
17748
|
+
}
|
|
17749
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
17750
|
+
}
|
|
17698
17751
|
async fetchPageWithVector(url, scope) {
|
|
17699
17752
|
try {
|
|
17700
17753
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -18650,45 +18703,20 @@ var SearchEngine = class _SearchEngine {
|
|
|
18650
18703
|
function createServer(engine) {
|
|
18651
18704
|
const server = new McpServer({
|
|
18652
18705
|
name: "searchsocket-mcp",
|
|
18653
|
-
version: "0.
|
|
18706
|
+
version: "0.2.0"
|
|
18654
18707
|
});
|
|
18655
18708
|
server.registerTool(
|
|
18656
18709
|
"search",
|
|
18657
18710
|
{
|
|
18658
|
-
description:
|
|
18711
|
+
description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
|
|
18659
18712
|
inputSchema: {
|
|
18660
|
-
query: z.string().min(1),
|
|
18661
|
-
|
|
18662
|
-
|
|
18663
|
-
|
|
18664
|
-
|
|
18665
|
-
|
|
18666
|
-
|
|
18667
|
-
maxSubResults: z.number().int().positive().max(20).optional()
|
|
18668
|
-
},
|
|
18669
|
-
outputSchema: {
|
|
18670
|
-
q: z.string(),
|
|
18671
|
-
scope: z.string(),
|
|
18672
|
-
results: z.array(z.object({
|
|
18673
|
-
url: z.string(),
|
|
18674
|
-
title: z.string(),
|
|
18675
|
-
sectionTitle: z.string().optional(),
|
|
18676
|
-
snippet: z.string(),
|
|
18677
|
-
score: z.number(),
|
|
18678
|
-
routeFile: z.string(),
|
|
18679
|
-
chunks: z.array(z.object({
|
|
18680
|
-
sectionTitle: z.string().optional(),
|
|
18681
|
-
snippet: z.string(),
|
|
18682
|
-
headingPath: z.array(z.string()),
|
|
18683
|
-
score: z.number()
|
|
18684
|
-
})).optional()
|
|
18685
|
-
})),
|
|
18686
|
-
meta: z.object({
|
|
18687
|
-
timingsMs: z.object({
|
|
18688
|
-
search: z.number(),
|
|
18689
|
-
total: z.number()
|
|
18690
|
-
})
|
|
18691
|
-
})
|
|
18713
|
+
query: z.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
|
|
18714
|
+
topK: z.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
|
|
18715
|
+
pathPrefix: z.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
|
|
18716
|
+
tags: z.array(z.string()).optional().describe("Filter results to pages matching all specified tags"),
|
|
18717
|
+
filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
|
|
18718
|
+
groupBy: z.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
|
|
18719
|
+
scope: z.string().optional()
|
|
18692
18720
|
}
|
|
18693
18721
|
},
|
|
18694
18722
|
async (input) => {
|
|
@@ -18699,85 +18727,18 @@ function createServer(engine) {
|
|
|
18699
18727
|
pathPrefix: input.pathPrefix,
|
|
18700
18728
|
tags: input.tags,
|
|
18701
18729
|
filters: input.filters,
|
|
18702
|
-
groupBy: input.groupBy
|
|
18703
|
-
maxSubResults: input.maxSubResults
|
|
18730
|
+
groupBy: input.groupBy
|
|
18704
18731
|
});
|
|
18705
|
-
|
|
18706
|
-
|
|
18707
|
-
|
|
18708
|
-
|
|
18709
|
-
|
|
18710
|
-
|
|
18711
|
-
|
|
18712
|
-
|
|
18713
|
-
|
|
18714
|
-
}
|
|
18715
|
-
);
|
|
18716
|
-
server.registerTool(
|
|
18717
|
-
"get_page",
|
|
18718
|
-
{
|
|
18719
|
-
description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
|
|
18720
|
-
inputSchema: {
|
|
18721
|
-
pathOrUrl: z.string().min(1),
|
|
18722
|
-
scope: z.string().optional()
|
|
18723
|
-
}
|
|
18724
|
-
},
|
|
18725
|
-
async (input) => {
|
|
18726
|
-
const page = await engine.getPage(input.pathOrUrl, input.scope);
|
|
18727
|
-
return {
|
|
18728
|
-
content: [
|
|
18729
|
-
{
|
|
18730
|
-
type: "text",
|
|
18731
|
-
text: JSON.stringify(page, null, 2)
|
|
18732
|
-
}
|
|
18733
|
-
]
|
|
18734
|
-
};
|
|
18735
|
-
}
|
|
18736
|
-
);
|
|
18737
|
-
server.registerTool(
|
|
18738
|
-
"list_pages",
|
|
18739
|
-
{
|
|
18740
|
-
description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
|
|
18741
|
-
inputSchema: {
|
|
18742
|
-
pathPrefix: z.string().optional(),
|
|
18743
|
-
cursor: z.string().optional(),
|
|
18744
|
-
limit: z.number().int().positive().max(200).optional(),
|
|
18745
|
-
scope: z.string().optional()
|
|
18746
|
-
}
|
|
18747
|
-
},
|
|
18748
|
-
async (input) => {
|
|
18749
|
-
const result = await engine.listPages({
|
|
18750
|
-
pathPrefix: input.pathPrefix,
|
|
18751
|
-
cursor: input.cursor,
|
|
18752
|
-
limit: input.limit,
|
|
18753
|
-
scope: input.scope
|
|
18754
|
-
});
|
|
18755
|
-
return {
|
|
18756
|
-
content: [
|
|
18757
|
-
{
|
|
18758
|
-
type: "text",
|
|
18759
|
-
text: JSON.stringify(result, null, 2)
|
|
18760
|
-
}
|
|
18761
|
-
]
|
|
18762
|
-
};
|
|
18763
|
-
}
|
|
18764
|
-
);
|
|
18765
|
-
server.registerTool(
|
|
18766
|
-
"get_site_structure",
|
|
18767
|
-
{
|
|
18768
|
-
description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
|
|
18769
|
-
inputSchema: {
|
|
18770
|
-
pathPrefix: z.string().optional(),
|
|
18771
|
-
scope: z.string().optional(),
|
|
18772
|
-
maxPages: z.number().int().positive().max(2e3).optional()
|
|
18732
|
+
if (result.results.length === 0) {
|
|
18733
|
+
return {
|
|
18734
|
+
content: [
|
|
18735
|
+
{
|
|
18736
|
+
type: "text",
|
|
18737
|
+
text: `No results found for "${input.query}". Try broader keywords or remove filters.`
|
|
18738
|
+
}
|
|
18739
|
+
]
|
|
18740
|
+
};
|
|
18773
18741
|
}
|
|
18774
|
-
},
|
|
18775
|
-
async (input) => {
|
|
18776
|
-
const result = await engine.getSiteStructure({
|
|
18777
|
-
pathPrefix: input.pathPrefix,
|
|
18778
|
-
scope: input.scope,
|
|
18779
|
-
maxPages: input.maxPages
|
|
18780
|
-
});
|
|
18781
18742
|
return {
|
|
18782
18743
|
content: [
|
|
18783
18744
|
{
|
|
@@ -18789,56 +18750,51 @@ function createServer(engine) {
|
|
|
18789
18750
|
}
|
|
18790
18751
|
);
|
|
18791
18752
|
server.registerTool(
|
|
18792
|
-
"
|
|
18753
|
+
"get_page",
|
|
18793
18754
|
{
|
|
18794
|
-
description: "
|
|
18755
|
+
description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
|
|
18795
18756
|
inputSchema: {
|
|
18796
|
-
|
|
18757
|
+
path: z.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
18797
18758
|
scope: z.string().optional()
|
|
18798
18759
|
}
|
|
18799
18760
|
},
|
|
18800
18761
|
async (input) => {
|
|
18801
|
-
|
|
18802
|
-
|
|
18803
|
-
topK: 1,
|
|
18804
|
-
scope: input.scope
|
|
18805
|
-
});
|
|
18806
|
-
if (result.results.length === 0) {
|
|
18762
|
+
try {
|
|
18763
|
+
const page = await engine.getPage(input.path, input.scope);
|
|
18807
18764
|
return {
|
|
18808
18765
|
content: [
|
|
18809
18766
|
{
|
|
18810
18767
|
type: "text",
|
|
18811
|
-
text: JSON.stringify(
|
|
18812
|
-
|
|
18813
|
-
|
|
18768
|
+
text: JSON.stringify(page, null, 2)
|
|
18769
|
+
}
|
|
18770
|
+
]
|
|
18771
|
+
};
|
|
18772
|
+
} catch {
|
|
18773
|
+
const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
|
|
18774
|
+
const similar = suggestions.results.map((r) => r.url);
|
|
18775
|
+
return {
|
|
18776
|
+
content: [
|
|
18777
|
+
{
|
|
18778
|
+
type: "text",
|
|
18779
|
+
text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
|
|
18814
18780
|
}
|
|
18815
18781
|
]
|
|
18816
18782
|
};
|
|
18817
18783
|
}
|
|
18818
|
-
const match = result.results[0];
|
|
18819
|
-
const { url, routeFile, sectionTitle, snippet } = match;
|
|
18820
|
-
return {
|
|
18821
|
-
content: [
|
|
18822
|
-
{
|
|
18823
|
-
type: "text",
|
|
18824
|
-
text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
|
|
18825
|
-
}
|
|
18826
|
-
]
|
|
18827
|
-
};
|
|
18828
18784
|
}
|
|
18829
18785
|
);
|
|
18830
18786
|
server.registerTool(
|
|
18831
18787
|
"get_related_pages",
|
|
18832
18788
|
{
|
|
18833
|
-
description: "
|
|
18789
|
+
description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
|
|
18834
18790
|
inputSchema: {
|
|
18835
|
-
|
|
18836
|
-
|
|
18837
|
-
|
|
18791
|
+
path: z.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
18792
|
+
topK: z.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
|
|
18793
|
+
scope: z.string().optional()
|
|
18838
18794
|
}
|
|
18839
18795
|
},
|
|
18840
18796
|
async (input) => {
|
|
18841
|
-
const result = await engine.getRelatedPages(input.
|
|
18797
|
+
const result = await engine.getRelatedPages(input.path, {
|
|
18842
18798
|
topK: input.topK,
|
|
18843
18799
|
scope: input.scope
|
|
18844
18800
|
});
|
|
@@ -22126,7 +22082,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22126
22082
|
keywords: r.keywords ?? [],
|
|
22127
22083
|
summary: r.summary ?? "",
|
|
22128
22084
|
tags: r.tags,
|
|
22129
|
-
markdown: r.markdown,
|
|
22130
22085
|
routeFile: r.routeFile,
|
|
22131
22086
|
routeResolution: r.routeResolution,
|
|
22132
22087
|
incomingLinks: r.incomingLinks,
|
|
@@ -22153,7 +22108,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22153
22108
|
keywords: r.keywords ?? [],
|
|
22154
22109
|
summary: r.summary ?? "",
|
|
22155
22110
|
tags: r.tags,
|
|
22156
|
-
markdown: r.markdown,
|
|
22157
22111
|
routeFile: r.routeFile,
|
|
22158
22112
|
routeResolution: r.routeResolution,
|
|
22159
22113
|
incomingLinks: r.incomingLinks,
|
|
@@ -22237,6 +22191,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22237
22191
|
let documentsUpserted = 0;
|
|
22238
22192
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
22239
22193
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
22194
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
22240
22195
|
const docs = changedChunks.map((chunk) => {
|
|
22241
22196
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
22242
22197
|
if (embeddingText.length > 2e3) {
|
|
@@ -22244,6 +22199,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22244
22199
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
22245
22200
|
);
|
|
22246
22201
|
}
|
|
22202
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
22247
22203
|
return {
|
|
22248
22204
|
id: chunk.chunkKey,
|
|
22249
22205
|
data: embeddingText,
|
|
@@ -22254,7 +22210,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
22254
22210
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
22255
22211
|
headingPath: chunk.headingPath.join(" > "),
|
|
22256
22212
|
snippet: chunk.snippet,
|
|
22257
|
-
chunkText:
|
|
22213
|
+
chunkText: cappedText,
|
|
22258
22214
|
tags: chunk.tags,
|
|
22259
22215
|
ordinal: chunk.ordinal,
|
|
22260
22216
|
contentHash: chunk.contentHash,
|