searchsocket 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +99 -143
- package/dist/index.cjs +98 -142
- package/dist/index.d.cts +10 -1
- package/dist/index.d.ts +10 -1
- package/dist/index.js +98 -142
- package/dist/sveltekit.cjs +98 -142
- package/dist/sveltekit.js +98 -142
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -17274,6 +17274,26 @@ function ensureStateDirs(cwd, stateDir, scope) {
|
|
|
17274
17274
|
fs__default.default.mkdirSync(statePath, { recursive: true });
|
|
17275
17275
|
return { statePath };
|
|
17276
17276
|
}
|
|
17277
|
+
function reconstructMarkdownFromChunks(chunks, pageTitle) {
|
|
17278
|
+
if (chunks.length === 0) return "";
|
|
17279
|
+
const parts = [];
|
|
17280
|
+
for (const chunk of chunks) {
|
|
17281
|
+
let text = chunk.chunkText;
|
|
17282
|
+
const prefixWithSection = `${pageTitle} \u2014 ${chunk.sectionTitle}
|
|
17283
|
+
|
|
17284
|
+
`;
|
|
17285
|
+
const prefixWithoutSection = `${pageTitle}
|
|
17286
|
+
|
|
17287
|
+
`;
|
|
17288
|
+
if (chunk.sectionTitle && text.startsWith(prefixWithSection)) {
|
|
17289
|
+
text = text.slice(prefixWithSection.length);
|
|
17290
|
+
} else if (text.startsWith(prefixWithoutSection)) {
|
|
17291
|
+
text = text.slice(prefixWithoutSection.length);
|
|
17292
|
+
}
|
|
17293
|
+
parts.push(text.trim());
|
|
17294
|
+
}
|
|
17295
|
+
return parts.join("\n\n");
|
|
17296
|
+
}
|
|
17277
17297
|
var UpstashSearchStore = class {
|
|
17278
17298
|
index;
|
|
17279
17299
|
pagesNs;
|
|
@@ -17653,10 +17673,12 @@ var UpstashSearchStore = class {
|
|
|
17653
17673
|
});
|
|
17654
17674
|
const doc = results[0];
|
|
17655
17675
|
if (!doc || !doc.metadata) return null;
|
|
17676
|
+
const chunks = await this.getChunksForPage(url, scope);
|
|
17677
|
+
const markdown = reconstructMarkdownFromChunks(chunks, doc.metadata.title);
|
|
17656
17678
|
return {
|
|
17657
17679
|
url: doc.metadata.url,
|
|
17658
17680
|
title: doc.metadata.title,
|
|
17659
|
-
markdown
|
|
17681
|
+
markdown,
|
|
17660
17682
|
projectId: doc.metadata.projectId,
|
|
17661
17683
|
scopeName: doc.metadata.scopeName,
|
|
17662
17684
|
routeFile: doc.metadata.routeFile,
|
|
@@ -17676,6 +17698,37 @@ var UpstashSearchStore = class {
|
|
|
17676
17698
|
return null;
|
|
17677
17699
|
}
|
|
17678
17700
|
}
|
|
17701
|
+
/**
|
|
17702
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
17703
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
17704
|
+
*/
|
|
17705
|
+
async getChunksForPage(url, scope) {
|
|
17706
|
+
const chunks = [];
|
|
17707
|
+
let cursor = "0";
|
|
17708
|
+
try {
|
|
17709
|
+
for (; ; ) {
|
|
17710
|
+
const result = await this.chunksNs.range({
|
|
17711
|
+
cursor,
|
|
17712
|
+
limit: 100,
|
|
17713
|
+
includeMetadata: true
|
|
17714
|
+
});
|
|
17715
|
+
for (const doc of result.vectors) {
|
|
17716
|
+
if (doc.metadata?.projectId === scope.projectId && doc.metadata?.scopeName === scope.scopeName && doc.metadata?.url === url) {
|
|
17717
|
+
chunks.push({
|
|
17718
|
+
chunkText: doc.metadata.chunkText ?? "",
|
|
17719
|
+
ordinal: doc.metadata.ordinal ?? 0,
|
|
17720
|
+
sectionTitle: doc.metadata.sectionTitle ?? "",
|
|
17721
|
+
headingPath: doc.metadata.headingPath ? String(doc.metadata.headingPath).split(" > ").filter(Boolean) : []
|
|
17722
|
+
});
|
|
17723
|
+
}
|
|
17724
|
+
}
|
|
17725
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17726
|
+
cursor = result.nextCursor;
|
|
17727
|
+
}
|
|
17728
|
+
} catch {
|
|
17729
|
+
}
|
|
17730
|
+
return chunks.sort((a, b) => a.ordinal - b.ordinal);
|
|
17731
|
+
}
|
|
17679
17732
|
async fetchPageWithVector(url, scope) {
|
|
17680
17733
|
try {
|
|
17681
17734
|
const results = await this.pagesNs.fetch([url], {
|
|
@@ -20996,7 +21049,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20996
21049
|
keywords: r.keywords ?? [],
|
|
20997
21050
|
summary: r.summary ?? "",
|
|
20998
21051
|
tags: r.tags,
|
|
20999
|
-
markdown: r.markdown,
|
|
21000
21052
|
routeFile: r.routeFile,
|
|
21001
21053
|
routeResolution: r.routeResolution,
|
|
21002
21054
|
incomingLinks: r.incomingLinks,
|
|
@@ -21023,7 +21075,6 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21023
21075
|
keywords: r.keywords ?? [],
|
|
21024
21076
|
summary: r.summary ?? "",
|
|
21025
21077
|
tags: r.tags,
|
|
21026
|
-
markdown: r.markdown,
|
|
21027
21078
|
routeFile: r.routeFile,
|
|
21028
21079
|
routeResolution: r.routeResolution,
|
|
21029
21080
|
incomingLinks: r.incomingLinks,
|
|
@@ -21107,6 +21158,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21107
21158
|
let documentsUpserted = 0;
|
|
21108
21159
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
21109
21160
|
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Vector...`);
|
|
21161
|
+
const CHUNK_TEXT_MAX_CHARS = 3e4;
|
|
21110
21162
|
const docs = changedChunks.map((chunk) => {
|
|
21111
21163
|
const embeddingText = buildEmbeddingText(chunk, this.config.chunking.prependTitle);
|
|
21112
21164
|
if (embeddingText.length > 2e3) {
|
|
@@ -21114,6 +21166,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21114
21166
|
`Chunk ${chunk.chunkKey} text is ${embeddingText.length} chars (~${Math.round(embeddingText.length / 4)} tokens), which may exceed the 512-token model limit and be silently truncated.`
|
|
21115
21167
|
);
|
|
21116
21168
|
}
|
|
21169
|
+
const cappedText = embeddingText.length > CHUNK_TEXT_MAX_CHARS ? embeddingText.slice(0, CHUNK_TEXT_MAX_CHARS) : embeddingText;
|
|
21117
21170
|
return {
|
|
21118
21171
|
id: chunk.chunkKey,
|
|
21119
21172
|
data: embeddingText,
|
|
@@ -21124,7 +21177,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
21124
21177
|
sectionTitle: chunk.sectionTitle ?? "",
|
|
21125
21178
|
headingPath: chunk.headingPath.join(" > "),
|
|
21126
21179
|
snippet: chunk.snippet,
|
|
21127
|
-
chunkText:
|
|
21180
|
+
chunkText: cappedText,
|
|
21128
21181
|
tags: chunk.tags,
|
|
21129
21182
|
ordinal: chunk.ordinal,
|
|
21130
21183
|
contentHash: chunk.contentHash,
|
|
@@ -21686,45 +21739,20 @@ var SearchEngine = class _SearchEngine {
|
|
|
21686
21739
|
function createServer(engine) {
|
|
21687
21740
|
const server = new mcp_js.McpServer({
|
|
21688
21741
|
name: "searchsocket-mcp",
|
|
21689
|
-
version: "0.
|
|
21742
|
+
version: "0.2.0"
|
|
21690
21743
|
});
|
|
21691
21744
|
server.registerTool(
|
|
21692
21745
|
"search",
|
|
21693
21746
|
{
|
|
21694
|
-
description:
|
|
21747
|
+
description: "Searches indexed site content using semantic similarity. Returns ranked results with url, title, snippet, chunkText (full section markdown), score, and routeFile (source file path for editing). Each result includes the best-matching section; set groupBy to 'page' (default) for additional chunk sub-results per page. Use routeFile to locate the source file when editing content. If snippets lack detail, call get_page with the result URL to retrieve the full page markdown.",
|
|
21695
21748
|
inputSchema: {
|
|
21696
|
-
query: zod.z.string().min(1),
|
|
21697
|
-
|
|
21698
|
-
|
|
21699
|
-
|
|
21700
|
-
|
|
21701
|
-
|
|
21702
|
-
|
|
21703
|
-
maxSubResults: zod.z.number().int().positive().max(20).optional()
|
|
21704
|
-
},
|
|
21705
|
-
outputSchema: {
|
|
21706
|
-
q: zod.z.string(),
|
|
21707
|
-
scope: zod.z.string(),
|
|
21708
|
-
results: zod.z.array(zod.z.object({
|
|
21709
|
-
url: zod.z.string(),
|
|
21710
|
-
title: zod.z.string(),
|
|
21711
|
-
sectionTitle: zod.z.string().optional(),
|
|
21712
|
-
snippet: zod.z.string(),
|
|
21713
|
-
score: zod.z.number(),
|
|
21714
|
-
routeFile: zod.z.string(),
|
|
21715
|
-
chunks: zod.z.array(zod.z.object({
|
|
21716
|
-
sectionTitle: zod.z.string().optional(),
|
|
21717
|
-
snippet: zod.z.string(),
|
|
21718
|
-
headingPath: zod.z.array(zod.z.string()),
|
|
21719
|
-
score: zod.z.number()
|
|
21720
|
-
})).optional()
|
|
21721
|
-
})),
|
|
21722
|
-
meta: zod.z.object({
|
|
21723
|
-
timingsMs: zod.z.object({
|
|
21724
|
-
search: zod.z.number(),
|
|
21725
|
-
total: zod.z.number()
|
|
21726
|
-
})
|
|
21727
|
-
})
|
|
21749
|
+
query: zod.z.string().min(1).describe("Search query. Use keywords or natural language, not full sentences."),
|
|
21750
|
+
topK: zod.z.number().int().positive().max(100).optional().describe("Number of results to return (default: 10, max: 100)"),
|
|
21751
|
+
pathPrefix: zod.z.string().optional().describe("Filter results to URLs starting with this prefix (e.g. '/docs')"),
|
|
21752
|
+
tags: zod.z.array(zod.z.string()).optional().describe("Filter results to pages matching all specified tags"),
|
|
21753
|
+
filters: zod.z.record(zod.z.string(), zod.z.union([zod.z.string(), zod.z.number(), zod.z.boolean()])).optional().describe('Filter by structured page metadata (e.g. {"version": 2})'),
|
|
21754
|
+
groupBy: zod.z.enum(["page", "chunk"]).optional().describe("'page' (default) groups chunks by page with sub-results; 'chunk' returns individual chunks"),
|
|
21755
|
+
scope: zod.z.string().optional()
|
|
21728
21756
|
}
|
|
21729
21757
|
},
|
|
21730
21758
|
async (input) => {
|
|
@@ -21735,85 +21763,18 @@ function createServer(engine) {
|
|
|
21735
21763
|
pathPrefix: input.pathPrefix,
|
|
21736
21764
|
tags: input.tags,
|
|
21737
21765
|
filters: input.filters,
|
|
21738
|
-
groupBy: input.groupBy
|
|
21739
|
-
maxSubResults: input.maxSubResults
|
|
21766
|
+
groupBy: input.groupBy
|
|
21740
21767
|
});
|
|
21741
|
-
|
|
21742
|
-
|
|
21743
|
-
|
|
21744
|
-
|
|
21745
|
-
|
|
21746
|
-
|
|
21747
|
-
|
|
21748
|
-
|
|
21749
|
-
|
|
21750
|
-
}
|
|
21751
|
-
);
|
|
21752
|
-
server.registerTool(
|
|
21753
|
-
"get_page",
|
|
21754
|
-
{
|
|
21755
|
-
description: "Fetch indexed markdown for a specific path or URL, including frontmatter and routeFile mapping.",
|
|
21756
|
-
inputSchema: {
|
|
21757
|
-
pathOrUrl: zod.z.string().min(1),
|
|
21758
|
-
scope: zod.z.string().optional()
|
|
21759
|
-
}
|
|
21760
|
-
},
|
|
21761
|
-
async (input) => {
|
|
21762
|
-
const page = await engine.getPage(input.pathOrUrl, input.scope);
|
|
21763
|
-
return {
|
|
21764
|
-
content: [
|
|
21765
|
-
{
|
|
21766
|
-
type: "text",
|
|
21767
|
-
text: JSON.stringify(page, null, 2)
|
|
21768
|
-
}
|
|
21769
|
-
]
|
|
21770
|
-
};
|
|
21771
|
-
}
|
|
21772
|
-
);
|
|
21773
|
-
server.registerTool(
|
|
21774
|
-
"list_pages",
|
|
21775
|
-
{
|
|
21776
|
-
description: "List indexed pages with optional path prefix filtering and cursor-based pagination. Returns url, title, description, and routeFile for each page. Use nextCursor to fetch subsequent pages.",
|
|
21777
|
-
inputSchema: {
|
|
21778
|
-
pathPrefix: zod.z.string().optional(),
|
|
21779
|
-
cursor: zod.z.string().optional(),
|
|
21780
|
-
limit: zod.z.number().int().positive().max(200).optional(),
|
|
21781
|
-
scope: zod.z.string().optional()
|
|
21782
|
-
}
|
|
21783
|
-
},
|
|
21784
|
-
async (input) => {
|
|
21785
|
-
const result = await engine.listPages({
|
|
21786
|
-
pathPrefix: input.pathPrefix,
|
|
21787
|
-
cursor: input.cursor,
|
|
21788
|
-
limit: input.limit,
|
|
21789
|
-
scope: input.scope
|
|
21790
|
-
});
|
|
21791
|
-
return {
|
|
21792
|
-
content: [
|
|
21793
|
-
{
|
|
21794
|
-
type: "text",
|
|
21795
|
-
text: JSON.stringify(result, null, 2)
|
|
21796
|
-
}
|
|
21797
|
-
]
|
|
21798
|
-
};
|
|
21799
|
-
}
|
|
21800
|
-
);
|
|
21801
|
-
server.registerTool(
|
|
21802
|
-
"get_site_structure",
|
|
21803
|
-
{
|
|
21804
|
-
description: "Returns the hierarchical page tree derived from URL paths. Use this to understand site navigation structure, find where pages belong, or scope further operations to a section. Nodes with isIndexed: false are implicit structural parents not directly in the index. Large sites (>2000 pages) return truncated: true.",
|
|
21805
|
-
inputSchema: {
|
|
21806
|
-
pathPrefix: zod.z.string().optional(),
|
|
21807
|
-
scope: zod.z.string().optional(),
|
|
21808
|
-
maxPages: zod.z.number().int().positive().max(2e3).optional()
|
|
21768
|
+
if (result.results.length === 0) {
|
|
21769
|
+
return {
|
|
21770
|
+
content: [
|
|
21771
|
+
{
|
|
21772
|
+
type: "text",
|
|
21773
|
+
text: `No results found for "${input.query}". Try broader keywords or remove filters.`
|
|
21774
|
+
}
|
|
21775
|
+
]
|
|
21776
|
+
};
|
|
21809
21777
|
}
|
|
21810
|
-
},
|
|
21811
|
-
async (input) => {
|
|
21812
|
-
const result = await engine.getSiteStructure({
|
|
21813
|
-
pathPrefix: input.pathPrefix,
|
|
21814
|
-
scope: input.scope,
|
|
21815
|
-
maxPages: input.maxPages
|
|
21816
|
-
});
|
|
21817
21778
|
return {
|
|
21818
21779
|
content: [
|
|
21819
21780
|
{
|
|
@@ -21825,56 +21786,51 @@ function createServer(engine) {
|
|
|
21825
21786
|
}
|
|
21826
21787
|
);
|
|
21827
21788
|
server.registerTool(
|
|
21828
|
-
"
|
|
21789
|
+
"get_page",
|
|
21829
21790
|
{
|
|
21830
|
-
description: "
|
|
21791
|
+
description: "Retrieves the full markdown content and metadata for a specific page by its URL path. Use this after search when snippets lack the detail needed to answer a question. Returns reconstructed page markdown, frontmatter (title, routeFile, tags, link counts, indexedAt), and the source file path. Do NOT use this for discovery \u2014 use search first to find relevant pages.",
|
|
21831
21792
|
inputSchema: {
|
|
21832
|
-
|
|
21793
|
+
path: zod.z.string().min(1).describe("URL path of the page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
21833
21794
|
scope: zod.z.string().optional()
|
|
21834
21795
|
}
|
|
21835
21796
|
},
|
|
21836
21797
|
async (input) => {
|
|
21837
|
-
|
|
21838
|
-
|
|
21839
|
-
topK: 1,
|
|
21840
|
-
scope: input.scope
|
|
21841
|
-
});
|
|
21842
|
-
if (result.results.length === 0) {
|
|
21798
|
+
try {
|
|
21799
|
+
const page = await engine.getPage(input.path, input.scope);
|
|
21843
21800
|
return {
|
|
21844
21801
|
content: [
|
|
21845
21802
|
{
|
|
21846
21803
|
type: "text",
|
|
21847
|
-
text: JSON.stringify(
|
|
21848
|
-
|
|
21849
|
-
|
|
21804
|
+
text: JSON.stringify(page, null, 2)
|
|
21805
|
+
}
|
|
21806
|
+
]
|
|
21807
|
+
};
|
|
21808
|
+
} catch {
|
|
21809
|
+
const suggestions = await engine.search({ q: input.path, topK: 3, scope: input.scope });
|
|
21810
|
+
const similar = suggestions.results.map((r) => r.url);
|
|
21811
|
+
return {
|
|
21812
|
+
content: [
|
|
21813
|
+
{
|
|
21814
|
+
type: "text",
|
|
21815
|
+
text: similar.length > 0 ? `Page '${input.path}' not found. Similar pages: ${similar.join(", ")}` : `Page '${input.path}' not found. Use search to find the correct URL.`
|
|
21850
21816
|
}
|
|
21851
21817
|
]
|
|
21852
21818
|
};
|
|
21853
21819
|
}
|
|
21854
|
-
const match = result.results[0];
|
|
21855
|
-
const { url, routeFile, sectionTitle, snippet } = match;
|
|
21856
|
-
return {
|
|
21857
|
-
content: [
|
|
21858
|
-
{
|
|
21859
|
-
type: "text",
|
|
21860
|
-
text: JSON.stringify({ url, routeFile, sectionTitle, snippet })
|
|
21861
|
-
}
|
|
21862
|
-
]
|
|
21863
|
-
};
|
|
21864
21820
|
}
|
|
21865
21821
|
);
|
|
21866
21822
|
server.registerTool(
|
|
21867
21823
|
"get_related_pages",
|
|
21868
21824
|
{
|
|
21869
|
-
description: "
|
|
21825
|
+
description: "Finds pages related to a specific page using link graph analysis, semantic similarity, and URL structure. Returns related pages with relationship type (outgoing_link, incoming_link, sibling, semantic) and relevance score. Do NOT use this for general search \u2014 use search instead. Use this only when you already have a specific page URL and need to discover connected content.",
|
|
21870
21826
|
inputSchema: {
|
|
21871
|
-
|
|
21872
|
-
|
|
21873
|
-
|
|
21827
|
+
path: zod.z.string().min(1).describe("URL path of the source page (e.g. '/docs/auth'). Use a URL from search results."),
|
|
21828
|
+
topK: zod.z.number().int().positive().max(25).optional().describe("Number of related pages to return (default: 10, max: 25)"),
|
|
21829
|
+
scope: zod.z.string().optional()
|
|
21874
21830
|
}
|
|
21875
21831
|
},
|
|
21876
21832
|
async (input) => {
|
|
21877
|
-
const result = await engine.getRelatedPages(input.
|
|
21833
|
+
const result = await engine.getRelatedPages(input.path, {
|
|
21878
21834
|
topK: input.topK,
|
|
21879
21835
|
scope: input.scope
|
|
21880
21836
|
});
|
package/dist/index.d.cts
CHANGED
|
@@ -40,7 +40,6 @@ interface PageVectorMetadata {
|
|
|
40
40
|
keywords: string[];
|
|
41
41
|
summary: string;
|
|
42
42
|
tags: string[];
|
|
43
|
-
markdown: string;
|
|
44
43
|
routeFile: string;
|
|
45
44
|
routeResolution: string;
|
|
46
45
|
incomingLinks: number;
|
|
@@ -121,6 +120,16 @@ declare class UpstashSearchStore {
|
|
|
121
120
|
metadata: Record<string, unknown>;
|
|
122
121
|
}>, scope: Scope): Promise<void>;
|
|
123
122
|
getPage(url: string, scope: Scope): Promise<PageRecord | null>;
|
|
123
|
+
/**
|
|
124
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
125
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
126
|
+
*/
|
|
127
|
+
getChunksForPage(url: string, scope: Scope): Promise<Array<{
|
|
128
|
+
chunkText: string;
|
|
129
|
+
ordinal: number;
|
|
130
|
+
sectionTitle: string;
|
|
131
|
+
headingPath: string[];
|
|
132
|
+
}>>;
|
|
124
133
|
fetchPageWithVector(url: string, scope: Scope): Promise<{
|
|
125
134
|
metadata: PageVectorMetadata;
|
|
126
135
|
vector: number[];
|
package/dist/index.d.ts
CHANGED
|
@@ -40,7 +40,6 @@ interface PageVectorMetadata {
|
|
|
40
40
|
keywords: string[];
|
|
41
41
|
summary: string;
|
|
42
42
|
tags: string[];
|
|
43
|
-
markdown: string;
|
|
44
43
|
routeFile: string;
|
|
45
44
|
routeResolution: string;
|
|
46
45
|
incomingLinks: number;
|
|
@@ -121,6 +120,16 @@ declare class UpstashSearchStore {
|
|
|
121
120
|
metadata: Record<string, unknown>;
|
|
122
121
|
}>, scope: Scope): Promise<void>;
|
|
123
122
|
getPage(url: string, scope: Scope): Promise<PageRecord | null>;
|
|
123
|
+
/**
|
|
124
|
+
* Fetch all chunks belonging to a specific page URL, sorted by ordinal.
|
|
125
|
+
* Used to reconstruct full page markdown from chunk content.
|
|
126
|
+
*/
|
|
127
|
+
getChunksForPage(url: string, scope: Scope): Promise<Array<{
|
|
128
|
+
chunkText: string;
|
|
129
|
+
ordinal: number;
|
|
130
|
+
sectionTitle: string;
|
|
131
|
+
headingPath: string[];
|
|
132
|
+
}>>;
|
|
124
133
|
fetchPageWithVector(url: string, scope: Scope): Promise<{
|
|
125
134
|
metadata: PageVectorMetadata;
|
|
126
135
|
vector: number[];
|