docshark 0.1.23 → 0.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,6 +38,13 @@ export function createApiRouter(deps) {
38
38
  const results = deps.searchEngine.search(q, { library, limit });
39
39
  return json(results);
40
40
  }
41
+ // POST /api/search/batch
42
+ if (method === 'POST' && path === '/search/batch') {
43
+ const body = await request.json();
44
+ const requests = Array.isArray(body?.requests) ? body.requests : [];
45
+ const results = deps.searchEngine.searchMany(requests);
46
+ return json(results);
47
+ }
41
48
  // GET /api/crawls
42
49
  if (method === 'GET' && path === '/crawls') {
43
50
  const libraryId = url.searchParams.get('library_id') || undefined;
package/dist/cli.js CHANGED
@@ -5,7 +5,7 @@ import { startHttpServer } from "./http.js";
5
5
  import { StdioTransport } from "@tmcp/transport-stdio";
6
6
  import { server, db, searchEngine, libraryService } from "./server.js";
7
7
  import { maybeNotifyAboutUpdate, runUpdateCommand } from "./cli-update.js";
8
- import { formatSearchResults } from "./search/format-results.js";
8
+ import { formatBatchSearchResults, formatSearchResults, } from "./search/format-results.js";
9
9
  import { VERSION } from "./version.js";
10
10
  const useColor = process.stdout.isTTY;
11
11
  const color = {
@@ -163,6 +163,24 @@ cli
163
163
  }
164
164
  console.log(`\n${formatSearchResults(query, results)}\n`);
165
165
  });
166
+ cli
167
+ .command("search-batch [...queries]", "Search multiple documentation queries")
168
+ .option("-l, --library <name>", "Filter all queries by library")
169
+ .option("-m, --limit <n>", "Max results per query", { default: "5" })
170
+ .action(async (queries, opts) => {
171
+ await maybeNotifyForCommand("search-batch");
172
+ if (!Array.isArray(queries) || queries.length === 0) {
173
+ console.error("\n❌ Please provide at least one query.\n");
174
+ process.exit(1);
175
+ }
176
+ db.init();
177
+ const results = searchEngine.searchMany(queries.map((query) => ({
178
+ query,
179
+ library: opts.library,
180
+ limit: parseInt(opts.limit),
181
+ })));
182
+ console.log(`\n${formatBatchSearchResults(results)}\n`);
183
+ });
166
184
  cli
167
185
  .command("list", "List indexed libraries")
168
186
  .alias("l")
package/dist/index.d.ts CHANGED
@@ -2,3 +2,5 @@ export * from "./server.js";
2
2
  export * from "./types.js";
3
3
  export * from "./version.js";
4
4
  export * from "./http.js";
5
+ export * from "./tools/search-docs.js";
6
+ export * from "./tools/search-docs-batch.js";
package/dist/index.js CHANGED
@@ -2,3 +2,5 @@ export * from "./server.js";
2
2
  export * from "./types.js";
3
3
  export * from "./version.js";
4
4
  export * from "./http.js";
5
+ export * from "./tools/search-docs.js";
6
+ export * from "./tools/search-docs-batch.js";
@@ -1,2 +1,3 @@
1
- import type { SearchResult } from './types.js';
1
+ import type { BatchSearchResult, SearchResult } from './types.js';
2
2
  export declare function formatSearchResults(query: string, results: SearchResult[]): string;
3
+ export declare function formatBatchSearchResults(results: BatchSearchResult[]): string;
@@ -5,19 +5,32 @@ function formatReasons(reasons) {
5
5
  }
6
6
  return `**Why this ranked highly:** ${reasons.join(', ')}\n\n`;
7
7
  }
8
- export function formatSearchResults(query, results) {
9
- const formatted = results
8
+ function formatResultBlocks(results) {
9
+ return results
10
10
  .map((result, index) => {
11
11
  let block = `### ${index + 1}. ${result.page_title} — ${result.library_display_name}\n`;
12
12
  block += `**Source:** ${result.page_url}\n`;
13
13
  if (result.heading_context.trim().length > 0) {
14
14
  block += `**Section:** ${result.heading_context}\n`;
15
15
  }
16
- // Sanitize content to prevent prompt injection
17
16
  const sanitizedContent = sanitizeDocContent(result.content);
18
17
  block += `${formatReasons(result.reasons)}${sanitizedContent}`;
19
18
  return block;
20
19
  })
21
20
  .join('\n\n---\n\n');
22
- return `## Results for "${query}"\n\n${formatted}`;
21
+ }
22
+ export function formatSearchResults(query, results) {
23
+ return `## Results for "${query}"\n\n${formatResultBlocks(results)}`;
24
+ }
25
+ export function formatBatchSearchResults(results) {
26
+ const formatted = results
27
+ .map((result, index) => {
28
+ const librarySuffix = result.library ? ` — ${result.library}` : '';
29
+ if (result.results.length === 0) {
30
+ return `### ${index + 1}. ${result.query}${librarySuffix}\n\nNo results found.`;
31
+ }
32
+ return `### ${index + 1}. ${result.query}${librarySuffix}\n\n${formatResultBlocks(result.results)}`;
33
+ })
34
+ .join('\n\n***\n\n');
35
+ return `## Batch Search Results\n\n${formatted}`;
23
36
  }
@@ -2,6 +2,10 @@ import type { SearchPlan } from "./types.js";
2
2
  export declare function normalizeSearchText(value: string): string;
3
3
  export declare class QueryPlanner {
4
4
  build(query: string, library?: string): SearchPlan;
5
+ private buildDecomposedQueries;
6
+ private shouldDecompose;
7
+ private segmentBySeparators;
8
+ private chunkKeywords;
5
9
  private detectIntent;
6
10
  private extractVersion;
7
11
  }
@@ -44,18 +44,60 @@ export class QueryPlanner {
44
44
  .map((token) => sanitizeToken(token))
45
45
  .filter(Boolean);
46
46
  const filteredKeywords = Array.from(new Set(rawTokens.filter((token) => token.length > 1 && !STOP_WORDS.has(token))));
47
+ const keywords = filteredKeywords.length > 0
48
+ ? filteredKeywords
49
+ : Array.from(new Set(rawTokens));
47
50
  return {
48
51
  original_query: query,
49
52
  normalized_query: normalizedQuery,
50
53
  intent: this.detectIntent(normalizedQuery),
51
- keywords: filteredKeywords.length > 0
52
- ? filteredKeywords
53
- : Array.from(new Set(rawTokens)),
54
+ keywords,
54
55
  phrases: PHRASE_HINTS.filter((phrase) => normalizedQuery.includes(phrase)),
56
+ decomposed_queries: this.buildDecomposedQueries(normalizedQuery, keywords),
55
57
  requested_library: library,
56
58
  requested_version: this.extractVersion(normalizedQuery),
57
59
  };
58
60
  }
61
+ buildDecomposedQueries(normalizedQuery, keywords) {
62
+ if (!this.shouldDecompose(normalizedQuery, keywords)) {
63
+ return [];
64
+ }
65
+ const segmentedQueries = this.segmentBySeparators(normalizedQuery);
66
+ if (segmentedQueries.length > 1) {
67
+ return segmentedQueries;
68
+ }
69
+ return this.chunkKeywords(keywords);
70
+ }
71
+ shouldDecompose(normalizedQuery, keywords) {
72
+ if (keywords.length < 4) {
73
+ return false;
74
+ }
75
+ if (/[;,]/.test(normalizedQuery) || /\b(and|or|then|plus|with)\b/.test(normalizedQuery)) {
76
+ return true;
77
+ }
78
+ return keywords.length >= 7;
79
+ }
80
+ segmentBySeparators(normalizedQuery) {
81
+ const segments = normalizedQuery
82
+ .split(/(?:,|;|\band\b|\bor\b|\bthen\b|\bplus\b)/g)
83
+ .map((segment) => normalizeSearchText(segment))
84
+ .filter((segment) => segment.split(/\s+/).length >= 2);
85
+ return Array.from(new Set(segments)).slice(0, 4);
86
+ }
87
+ chunkKeywords(keywords) {
88
+ const targetBranches = Math.min(4, Math.ceil(keywords.length / 2));
89
+ const chunkSize = Math.min(3, Math.max(2, Math.ceil(keywords.length / targetBranches)));
90
+ const chunks = [];
91
+ for (let index = 0; index < keywords.length; index += chunkSize) {
92
+ const group = keywords.slice(index, index + chunkSize);
93
+ if (group.length === 1 && chunks.length > 0) {
94
+ chunks[chunks.length - 1] += ` ${group[0]}`;
95
+ continue;
96
+ }
97
+ chunks.push(group.join(" "));
98
+ }
99
+ return Array.from(new Set(chunks)).slice(0, 4);
100
+ }
59
101
  detectIntent(query) {
60
102
  if (query.includes("getting started") ||
61
103
  query.includes("quickstart") ||
@@ -3,12 +3,18 @@ export interface SearchOptions {
3
3
  library?: string;
4
4
  limit?: number;
5
5
  }
6
+ export interface BatchSearchRequest {
7
+ query: string;
8
+ library?: string;
9
+ limit?: number;
10
+ }
6
11
  export interface SearchPlan {
7
12
  original_query: string;
8
13
  normalized_query: string;
9
14
  intent: SearchIntent;
10
15
  keywords: string[];
11
16
  phrases: string[];
17
+ decomposed_queries: string[];
12
18
  requested_version?: string;
13
19
  requested_library?: string;
14
20
  }
@@ -31,3 +37,9 @@ export interface SearchResult extends SearchCandidate {
31
37
  path_type: string;
32
38
  version_tag: string | null;
33
39
  }
40
+ export interface BatchSearchResult {
41
+ query: string;
42
+ library?: string;
43
+ limit: number;
44
+ results: SearchResult[];
45
+ }
package/dist/server.js CHANGED
@@ -5,7 +5,7 @@ import * as v from "valibot";
5
5
  import { tool } from "tmcp/utils";
6
6
  import { Database } from "./storage/db.js";
7
7
  import { SearchEngine } from "./storage/search.js";
8
- import { formatSearchResults } from "./search/format-results.js";
8
+ import { formatBatchSearchResults, formatSearchResults, } from "./search/format-results.js";
9
9
  import { LibraryService } from "./services/library.js";
10
10
  import { JobManager } from "./jobs/manager.js";
11
11
  import { VERSION } from "./version.js";
@@ -55,6 +55,30 @@ server.tool({
55
55
  return tool.text(`❌ Error: ${message}`);
56
56
  }
57
57
  });
58
+ server.tool({
59
+ name: "search_docs_batch",
60
+ description: "Run multiple documentation searches in one call. Use this for repeated or decomposed lookups.",
61
+ annotations: {
62
+ readOnlyHint: true,
63
+ idempotentHint: true,
64
+ },
65
+ schema: v.object({
66
+ requests: v.pipe(v.array(v.object({
67
+ query: v.pipe(v.string(), v.description("Search query. Use natural language.")),
68
+ library: v.optional(v.pipe(v.string(), v.description("Filter to a specific library."))),
69
+ limit: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(20)), 5),
70
+ })), v.minLength(1), v.maxLength(10)),
71
+ }),
72
+ }, async ({ requests }) => {
73
+ try {
74
+ const results = searchEngine.searchMany(requests);
75
+ return tool.text(formatBatchSearchResults(results));
76
+ }
77
+ catch (err) {
78
+ const message = err instanceof Error ? err.message : "Search failed";
79
+ return tool.text(`❌ Error: ${message}`);
80
+ }
81
+ });
58
82
  function requireValue(value, message) {
59
83
  if (value === undefined || value === null || value === "") {
60
84
  throw new Error(message);
@@ -1,11 +1,15 @@
1
1
  import type { Database } from "./db.js";
2
- import type { SearchOptions, SearchResult } from "../search/types.js";
3
- export type { SearchOptions, SearchResult } from "../search/types.js";
2
+ import type { BatchSearchRequest, BatchSearchResult, SearchOptions, SearchResult } from "../search/types.js";
3
+ export type { BatchSearchRequest, BatchSearchResult, SearchOptions, SearchResult, } from "../search/types.js";
4
4
  export declare class SearchEngine {
5
5
  private db;
6
6
  private planner;
7
7
  constructor(db: Database);
8
8
  search(query: string, opts?: SearchOptions): SearchResult[];
9
+ searchMany(requests: BatchSearchRequest[]): BatchSearchResult[];
10
+ private searchWithPlan;
11
+ private expandPlans;
12
+ private searchSingle;
9
13
  private fetchCandidates;
10
14
  private buildFtsQuery;
11
15
  private quoteTerm;
@@ -8,16 +8,98 @@ export class SearchEngine {
8
8
  search(query, opts = {}) {
9
9
  const limit = opts.limit ?? 5;
10
10
  const plan = this.planner.build(query, opts.library);
11
- const ftsQuery = this.buildFtsQuery(plan);
11
+ return this.searchWithPlan(plan, opts.library, limit);
12
+ }
13
+ searchMany(requests) {
14
+ return requests.map((request) => {
15
+ const limit = request.limit ?? 5;
16
+ return {
17
+ query: request.query,
18
+ library: request.library,
19
+ limit,
20
+ results: this.search(request.query, {
21
+ library: request.library,
22
+ limit,
23
+ }),
24
+ };
25
+ });
26
+ }
27
+ searchWithPlan(plan, library, limit) {
28
+ const branchPlans = this.expandPlans(plan, library);
29
+ if (branchPlans.length === 1) {
30
+ return this.searchSingle(branchPlans[0], plan, library, limit);
31
+ }
32
+ const branchLimit = Math.min(Math.max(limit * 2, 6), 12);
33
+ const bestByChunk = new Map();
34
+ for (const [branchIndex, branchPlan] of branchPlans.entries()) {
35
+ const branchResults = this.searchSingle(branchPlan, plan, library, branchLimit);
36
+ for (const branchResult of branchResults) {
37
+ const chunkKey = `${branchResult.page_url}#${branchResult.chunk_index}`;
38
+ const scoreBoost = branchIndex === 0 ? 0 : 0.03;
39
+ const adjustedScore = Number((branchResult.rerank_score + scoreBoost).toFixed(6));
40
+ const existing = bestByChunk.get(chunkKey);
41
+ if (!existing) {
42
+ bestByChunk.set(chunkKey, {
43
+ ...branchResult,
44
+ rerank_score: adjustedScore,
45
+ branch_hits: 1,
46
+ });
47
+ continue;
48
+ }
49
+ existing.branch_hits += 1;
50
+ if (adjustedScore > existing.rerank_score) {
51
+ bestByChunk.set(chunkKey, {
52
+ ...branchResult,
53
+ rerank_score: adjustedScore,
54
+ branch_hits: existing.branch_hits,
55
+ });
56
+ }
57
+ }
58
+ }
59
+ const aggregated = Array.from(bestByChunk.values())
60
+ .map(({ branch_hits, ...result }) => {
61
+ const reasons = [...result.reasons];
62
+ if (branch_hits > 1) {
63
+ reasons.push("matched multiple focused subqueries");
64
+ }
65
+ return {
66
+ ...result,
67
+ rerank_score: Number((result.rerank_score + Math.min((branch_hits - 1) * 0.05, 0.15)).toFixed(6)),
68
+ reasons: Array.from(new Set(reasons)).slice(0, 4),
69
+ };
70
+ })
71
+ .sort((left, right) => {
72
+ if (right.rerank_score !== left.rerank_score) {
73
+ return right.rerank_score - left.rerank_score;
74
+ }
75
+ return left.lexical_score - right.lexical_score;
76
+ });
77
+ return this.collapseDuplicates(plan, aggregated).slice(0, limit);
78
+ }
79
+ expandPlans(plan, library) {
80
+ const plans = [plan];
81
+ const seen = new Set([plan.normalized_query]);
82
+ for (const subquery of plan.decomposed_queries) {
83
+ const subqueryPlan = this.planner.build(subquery, library);
84
+ if (seen.has(subqueryPlan.normalized_query)) {
85
+ continue;
86
+ }
87
+ seen.add(subqueryPlan.normalized_query);
88
+ plans.push(subqueryPlan);
89
+ }
90
+ return plans;
91
+ }
92
+ searchSingle(retrievalPlan, scoringPlan, library, limit) {
93
+ const ftsQuery = this.buildFtsQuery(retrievalPlan);
12
94
  if (!ftsQuery)
13
95
  return [];
14
96
  try {
15
- const candidates = this.fetchCandidates(ftsQuery, opts.library, limit);
97
+ const candidates = this.fetchCandidates(ftsQuery, library, limit);
16
98
  if (candidates.length === 0) {
17
99
  return [];
18
100
  }
19
- const reranked = this.rerank(plan, candidates);
20
- return this.collapseDuplicates(plan, reranked).slice(0, limit);
101
+ const reranked = this.rerank(scoringPlan, candidates);
102
+ return this.collapseDuplicates(scoringPlan, reranked).slice(0, limit);
21
103
  }
22
104
  catch (err) {
23
105
  console.warn(`[DocShark] Search failed:`, err.message);
@@ -0,0 +1,35 @@
1
+ import * as v from 'valibot';
2
+ import type { SearchEngine } from '../storage/search.js';
3
+ export declare function createSearchDocsBatchTool(searchEngine: SearchEngine): {
4
+ definition: {
5
+ name: "search_docs_batch";
6
+ description: string;
7
+ schema: v.ObjectSchema<{
8
+ readonly requests: v.SchemaWithPipe<readonly [v.ArraySchema<v.ObjectSchema<{
9
+ readonly query: v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.DescriptionAction<string, "The search query. Use natural language or specific terms.">]>;
10
+ readonly library: v.OptionalSchema<v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.DescriptionAction<string, "Optional library filter for this query.">]>, undefined>;
11
+ readonly limit: v.OptionalSchema<v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.IntegerAction<number, undefined>, v.MinValueAction<number, 1, undefined>, v.MaxValueAction<number, 20, undefined>, v.DescriptionAction<number, "Max results to return for this query. Default: 5.">]>, 5>;
12
+ }, undefined>, undefined>, v.MinLengthAction<{
13
+ query: string;
14
+ library?: string | undefined;
15
+ limit: number;
16
+ }[], 1, undefined>, v.MaxLengthAction<{
17
+ query: string;
18
+ library?: string | undefined;
19
+ limit: number;
20
+ }[], 10, undefined>]>;
21
+ }, undefined>;
22
+ };
23
+ handler: ({ requests }: {
24
+ requests: Array<{
25
+ query: string;
26
+ library?: string;
27
+ limit?: number;
28
+ }>;
29
+ }) => Promise<{
30
+ content: {
31
+ type: "text";
32
+ text: string;
33
+ }[];
34
+ }>;
35
+ };
@@ -0,0 +1,23 @@
1
+ import * as v from 'valibot';
2
+ import { tool } from 'tmcp/utils';
3
+ import { formatBatchSearchResults } from '../search/format-results.js';
4
+ export function createSearchDocsBatchTool(searchEngine) {
5
+ return {
6
+ definition: {
7
+ name: 'search_docs_batch',
8
+ description: 'Run multiple documentation searches in one call. ' +
9
+ 'Useful when you need several focused lookups against one library or across a small set of libraries.',
10
+ schema: v.object({
11
+ requests: v.pipe(v.array(v.object({
12
+ query: v.pipe(v.string(), v.description('The search query. Use natural language or specific terms.')),
13
+ library: v.optional(v.pipe(v.string(), v.description('Optional library filter for this query.'))),
14
+ limit: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(20), v.description('Max results to return for this query. Default: 5.')), 5),
15
+ })), v.minLength(1), v.maxLength(10)),
16
+ }),
17
+ },
18
+ handler: async ({ requests }) => {
19
+ const results = searchEngine.searchMany(requests);
20
+ return tool.text(formatBatchSearchResults(results));
21
+ },
22
+ };
23
+ }
package/dist/version.d.ts CHANGED
@@ -1 +1 @@
1
- export declare const VERSION = "0.1.23";
1
+ export declare const VERSION = "0.1.25";
package/dist/version.js CHANGED
@@ -1,2 +1,2 @@
1
1
  // This file is automatically updated by the version sync script.
2
- export const VERSION = '0.1.23';
2
+ export const VERSION = '0.1.25';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docshark",
3
- "version": "0.1.23",
3
+ "version": "0.1.25",
4
4
  "description": "🦈 Documentation MCP Server — scrape, index, and search any doc website",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",