searchsocket 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +99 -143
- package/dist/index.cjs +98 -142
- package/dist/index.d.cts +10 -1
- package/dist/index.d.ts +10 -1
- package/dist/index.js +98 -142
- package/dist/sveltekit.cjs +98 -142
- package/dist/sveltekit.js +98 -142
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -17262,6 +17262,26 @@ function ensureStateDirs(cwd, stateDir, scope) {
|
|
|
17262
17262
|
fs.mkdirSync(statePath, { recursive: true });
|
|
17263
17263
|
return { statePath };
|
|
17264
17264
|
}
|
|
17265
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
17266
|
+
if (chunks.length === 0) return "";
|
|
17267
|
+
const parts = [];
|
|
17268
|
+
for (const chunk of chunks) {
|
|
17269
|
+
let text = chunk.chunkText;
|
|
17270
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
17271
|
+
|
|
17272
|
+
`;
|
|
17273
|
+
const prefixWithoutSection = `${pageTitle}
|
|
17274
|
+
|
|
17275
|
+
`;
|
|
17276
|
+
if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
|
|
17277
|
+
text = text.slice(prefixWithSection.length);
|
|
17278
|
+
} else if (text.startsWith(prefixWithoutSection)) {
|
|
17279
|
+
text = text.slice(prefixWithoutSection.length);
|
|
17280
|
+
}
|
|
17281
|
+
parts.push(text.trim());
|
|
17282
|
+
}
|
|
17283
|
+
return parts.join("\n\n");
|
|
17284
|
+
}
|
|
17265
17285
|
var UpstashSearchStore = class {
|
|
17266
17286
|
index;
|
|
17267
17287
|
pagesNs;
|
|
@@ -17641,10 +17661,12 @@ var UpstashSearchStore = class {
|
|
|
17641
17661
|
});
|
|
17642
17662
|
const doc = results[0];
|
|
17643
17663
|
if (!doc || !doc.metadata) return null;
|
|
17664
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
17665
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
17644
17666
|
return {
|
|
17645
17667
|
url: doc.metadata.url,
|
|
17646
17668
|
title: doc.metadata.title,
|
|
17647
|
-
markdown
|
|
17669
|
+
markdown,
|
|
17648
17670
|
projectId: doc.metadata.projectId,
|
|
17649
17671
|
scopeName: doc.metadata.scopeName,
|
|
17650
17672
|
routeFile: doc.metadata.routeFile,
|
|
@@ -17664,6 +17686,37 @@ var UpstashSearchStore = class {
|
|
|
17664
17686
|
return null;
|
|
17665
17687
|
}
|
|
17666
17688
|
}
|
|
17689
|
+
/**
|
|
17690
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
17691
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
17692
|
+
*/
|
|
17693
|
+
async getChunksForPage(url, scope) {
|
|
17694
|
+
const chunks = [];
|
|
17695
|
+
let cursor = "0";
|
|
17696
|
+
try {
|
|
17697
|
+
for (; ; ) {
|
|
17698
|
+
const result = await this.chunksNs.range({
|
|
17699
|
+
cursor,
|
|
17700
|
+
limit: 100,
|
|
17701
|
+
includeMetadata: true
|
|
17702
|
+
});
|
|
17703
|
+
for (const doc of result.vectors) {
|
|
17704
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
17705
|
+
chunks.push({
|
|
17706
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
17707
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
17708
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
17709
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
17710
|
+
});
|
|
17711
|
+
}
|
|
17712
|
+
}
|
|
17713
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17714
|
+
cursor = result.nextCursor;
|
|
17715
|
+
}
|
|
17716
|
+
} catch {
|
|
17717
|
+
}
|
|
17718
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
17719
|
+
}
|
|
17667
17720
|
async fetchPageWithVector(url, scope) {
|
|
17668
17721
|
try {
|
|
17669
17722
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -20984,7 +21037,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20984
21037
|
keywords: r.keywords ?? [],
|
|
20985
21038
|
summary: r.summary ?? "",
|
|
20986
21039
|
tags: r.tags,
|
|
20987
|
-
markdown: r.markdown,
|
|
20988
21040
|
routeFile: r.routeFile,
|
|
20989
21041
|
routeResolution: r.routeResolution,
|
|
20990
21042
|
incomingLinks: r.incomingLinks,
|
|
@@ -21011,7 +21063,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21011
21063
|
keywords: r.keywords ?? [],
|
|
21012
21064
|
summary: r.summary ?? "",
|
|
21013
21065
|
tags: r.tags,
|
|
21014
|
-
markdown: r.markdown,
|
|
21015
21066
|
routeFile: r.routeFile,
|
|
21016
21067
|
routeResolution: r.routeResolution,
|
|
21017
21068
|
incomingLinks: r.incomingLinks,
|
|
@@ -21095,6 +21146,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21095
21146
|
let documentsUpserted = 0;
|
|
21096
21147
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
21097
21148
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
21149
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
21098
21150
|
const docs = changedChunks.map((chunk) => {
|
|
21099
21151
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
21100
21152
|
if (embeddingText.length > 2e3) {
|
|
@@ -21102,6 +21154,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21102
21154
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
21103
21155
|
);
|
|
21104
21156
|
}
|
|
21157
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
21105
21158
|
return {
|
|
21106
21159
|
id: chunk.chunkKey,
|
|
21107
21160
|
data: embeddingText,
|
|
@@ -21112,7 +21165,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21112
21165
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
21113
21166
|
headingPath: chunk.headingPath.join(" > "),
|
|
21114
21167
|
snippet: chunk.snippet,
|
|
21115
|
-
chunkText:
|
|
21168
|
+
chunkText: cappedText,
|
|
21116
21169
|
tags: chunk.tags,
|
|
21117
21170
|
ordinal: chunk.ordinal,
|
|
21118
21171
|
contentHash: chunk.contentHash,
|
|
@@ -21674,45 +21727,20 @@ var SearchEngine = class _SearchEngine {
|
|
|
21674
21727
|
function createServer(engine) {
|
|
21675
21728
|
const server = new McpServer({
|
|
21676
21729
|
name: "searchsocket-mcp",
|
|
21677
|
-
version: "0.
|
|
21730
|
+
version: "0.2.0"
|
|
21678
21731
|
});
|
|
21679
21732
|
server.registerTool(
|
|
21680
21733
|
"search",
|
|
21681
21734
|
{
|
|
21682
|
-
description:
|
|
21735
|
+
description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
|
|
21683
21736
|
inputSchema: {
|
|
21684
|
-
query: z.string().min(1),
|
|
21685
|
-
|
|
21686
|
-
|
|
21687
|
-
|
|
21688
|
-
|
|
21689
|
-
|
|
21690
|
-
|
|
21691
|
-
maxSubResults: z.number().int().positive().max(20).optional()
|
|
21692
|
-
},
|
|
21693
|
-
outputSchema: {
|
|
21694
|
-
q: z.string(),
|
|
21695
|
-
scope: z.string(),
|
|
21696
|
-
results: z.array(z.object({
|
|
21697
|
-
url: z.string(),
|
|
21698
|
-
title: z.string(),
|
|
21699
|
-
sectionTitle: z.string().optional(),
|
|
21700
|
-
snippet: z.string(),
|
|
21701
|
-
score: z.number(),
|
|
21702
|
-
routeFile: z.string(),
|
|
21703
|
-
chunks: z.array(z.object({
|
|
21704
|
-
sectionTitle: z.string().optional(),
|
|
21705
|
-
snippet: z.string(),
|
|
21706
|
-
headingPath: z.array(z.string()),
|
|
21707
|
-
score: z.number()
|
|
21708
|
-
})).optional()
|
|
21709
|
-
})),
|
|
21710
|
-
meta: z.object({
|
|
21711
|
-
timingsMs: z.object({
|
|
21712
|
-
search: z.number(),
|
|
21713
|
-
total: z.number()
|
|
21714
|
-
})
|
|
21715
|
-
})
|
|
21737
|
+
query: z.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
|
|
21738
|
+
topK: z.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
|
|
21739
|
+
pathPrefix: z.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
|
|
21740
|
+
tags: z.array(z.string()).optional().describe("Filter results to pages matching all specified tags"),
|
|
21741
|
+
filters: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
|
|
21742
|
+
groupBy: z.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
|
|
21743
|
+
scope: z.string().optional()
|
|
21716
21744
|
}
|
|
21717
21745
|
},
|
|
21718
21746
|
async (input) => {
|
|
@@ -21723,85 +21751,18 @@ function createServer(engine) {
|
|
|
21723
21751
|
pathPrefix: input.pathPrefix,
|
|
21724
21752
|
tags: input.tags,
|
|
21725
21753
|
filters: input.filters,
|
|
21726
|
-
groupBy: input.groupBy
|
|
21727
|
-
maxSubResults: input.maxSubResults
|
|
21754
|
+
groupBy: input.groupBy
|
|
21728
21755
|
});
|
|
21729
|
-
|
|
21730
|
-
|
|
21731
|
-
|
|
21732
|
-
|
|
21733
|
-
|
|
21734
|
-
|
|
21735
|
-
|
|
21736
|
-
|
|
21737
|
-
|
|
21738
|
-
}
|
|
21739
|
-
);
|
|
21740
|
-
server.registerTool(
|
|
21741
|
-
"get_page",
|
|
21742
|
-
{
|
|
21743
|
-
description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
|
|
21744
|
-
inputSchema: {
|
|
21745
|
-
pathOrUrl: z.string().min(1),
|
|
21746
|
-
scope: z.string().optional()
|
|
21747
|
-
}
|
|
21748
|
-
},
|
|
21749
|
-
async (input) => {
|
|
21750
|
-
const page = await engine.getPage(input.pathOrUrl, input.scope);
|
|
21751
|
-
return {
|
|
21752
|
-
content: [
|
|
21753
|
-
{
|
|
21754
|
-
type: "text",
|
|
21755
|
-
text: JSON.stringify(page, null, 2)
|
|
21756
|
-
}
|
|
21757
|
-
]
|
|
21758
|
-
};
|
|
21759
|
-
}
|
|
21760
|
-
);
|
|
21761
|
-
server.registerTool(
|
|
21762
|
-
"list_pages",
|
|
21763
|
-
{
|
|
21764
|
-
description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
|
|
21765
|
-
inputSchema: {
|
|
21766
|
-
pathPrefix: z.string().optional(),
|
|
21767
|
-
cursor: z.string().optional(),
|
|
21768
|
-
limit: z.number().int().positive().max(200).optional(),
|
|
21769
|
-
scope: z.string().optional()
|
|
21770
|
-
}
|
|
21771
|
-
},
|
|
21772
|
-
async (input) => {
|
|
21773
|
-
const result = await engine.listPages({
|
|
21774
|
-
pathPrefix: input.pathPrefix,
|
|
21775
|
-
cursor: input.cursor,
|
|
21776
|
-
limit: input.limit,
|
|
21777
|
-
scope: input.scope
|
|
21778
|
-
});
|
|
21779
|
-
return {
|
|
21780
|
-
content: [
|
|
21781
|
-
{
|
|
21782
|
-
type: "text",
|
|
21783
|
-
text: JSON.stringify(result, null, 2)
|
|
21784
|
-
}
|
|
21785
|
-
]
|
|
21786
|
-
};
|
|
21787
|
-
}
|
|
21788
|
-
);
|
|
21789
|
-
server.registerTool(
|
|
21790
|
-
"get_site_structure",
|
|
21791
|
-
{
|
|
21792
|
-
description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
|
|
21793
|
-
inputSchema: {
|
|
21794
|
-
pathPrefix: z.string().optional(),
|
|
21795
|
-
scope: z.string().optional(),
|
|
21796
|
-
maxPages: z.number().int().positive().max(2e3).optional()
|
|
21756
|
+
if (result.results.length === 0) {
|
|
21757
|
+
return {
|
|
21758
|
+
content: [
|
|
21759
|
+
{
|
|
21760
|
+
type: "text",
|
|
21761
|
+
text: `No results found for "${input.query}". Try broader keywords or remove filters.`
|
|
21762
|
+
}
|
|
21763
|
+
]
|
|
21764
|
+
};
|
|
21797
21765
|
}
|
|
21798
|
-
},
|
|
21799
|
-
async (input) => {
|
|
21800
|
-
const result = await engine.getSiteStructure({
|
|
21801
|
-
pathPrefix: input.pathPrefix,
|
|
21802
|
-
scope: input.scope,
|
|
21803
|
-
maxPages: input.maxPages
|
|
21804
|
-
});
|
|
21805
21766
|
return {
|
|
21806
21767
|
content: [
|
|
21807
21768
|
{
|
|
@@ -21813,56 +21774,51 @@ function createServer(engine) {
|
|
|
21813
21774
|
}
|
|
21814
21775
|
);
|
|
21815
21776
|
server.registerTool(
|
|
21816
|
-
"
|
|
21777
|
+
"get_page",
|
|
21817
21778
|
{
|
|
21818
|
-
description: "
|
|
21779
|
+
description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
|
|
21819
21780
|
inputSchema: {
|
|
21820
|
-
|
|
21781
|
+
path: z.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
21821
21782
|
scope: z.string().optional()
|
|
21822
21783
|
}
|
|
21823
21784
|
},
|
|
21824
21785
|
async (input) => {
|
|
21825
|
-
|
|
21826
|
-
|
|
21827
|
-
topK: 1,
|
|
21828
|
-
scope: input.scope
|
|
21829
|
-
});
|
|
21830
|
-
if (result.results.length === 0) {
|
|
21786
|
+
try {
|
|
21787
|
+
const page = await engine.getPage(input.path, input.scope);
|
|
21831
21788
|
return {
|
|
21832
21789
|
content: [
|
|
21833
21790
|
{
|
|
21834
21791
|
type: "text",
|
|
21835
|
-
text: JSON.stringify(
|
|
21836
|
-
|
|
21837
|
-
|
|
21792
|
+
text: JSON.stringify(page, null, 2)
|
|
21793
|
+
}
|
|
21794
|
+
]
|
|
21795
|
+
};
|
|
21796
|
+
} catch {
|
|
21797
|
+
const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
|
|
21798
|
+
const similar = suggestions.results.map((r) => r.url);
|
|
21799
|
+
return {
|
|
21800
|
+
content: [
|
|
21801
|
+
{
|
|
21802
|
+
type: "text",
|
|
21803
|
+
text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
|
|
21838
21804
|
}
|
|
21839
21805
|
]
|
|
21840
21806
|
};
|
|
21841
21807
|
}
|
|
21842
|
-
const match = result.results[0];
|
|
21843
|
-
const { url, routeFile, sectionTitle, snippet } = match;
|
|
21844
|
-
return {
|
|
21845
|
-
content: [
|
|
21846
|
-
{
|
|
21847
|
-
type: "text",
|
|
21848
|
-
text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
|
|
21849
|
-
}
|
|
21850
|
-
]
|
|
21851
|
-
};
|
|
21852
21808
|
}
|
|
21853
21809
|
);
|
|
21854
21810
|
server.registerTool(
|
|
21855
21811
|
"get_related_pages",
|
|
21856
21812
|
{
|
|
21857
|
-
description: "
|
|
21813
|
+
description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
|
|
21858
21814
|
inputSchema: {
|
|
21859
|
-
|
|
21860
|
-
|
|
21861
|
-
|
|
21815
|
+
path: z.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
21816
|
+
topK: z.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
|
|
21817
|
+
scope: z.string().optional()
|
|
21862
21818
|
}
|
|
21863
21819
|
},
|
|
21864
21820
|
async (input) => {
|
|
21865
|
-
const result = await engine.getRelatedPages(input.
|
|
21821
|
+
const result = await engine.getRelatedPages(input.path, {
|
|
21866
21822
|
topK: input.topK,
|
|
21867
21823
|
scope: input.scope
|
|
21868
21824
|
});
|