docshark 0.1.22 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -160,6 +160,16 @@ bun run src/cli.ts start --port 6380
160
160
  bun run src/cli.ts list
161
161
  ```
162
162
 
163
+ ### Tests
164
+
165
+ Run the regression suite before merging or publishing changes:
166
+
167
+ ```bash
168
+ bun test scripts/*.test.ts
169
+ ```
170
+
171
+ The suite covers storage and migrations, library management, extraction, chunking, search, crawl helpers, API routes, and MCP tool wrappers.
172
+
163
173
  ## šŸ”„ Versioning & Changelog
164
174
 
165
175
  This project uses [Google's Release Please](https://github.com/googleapis/release-please) to automate versioning and changelog generation.
@@ -38,6 +38,13 @@ export function createApiRouter(deps) {
38
38
  const results = deps.searchEngine.search(q, { library, limit });
39
39
  return json(results);
40
40
  }
41
+ // POST /api/search/batch
42
+ if (method === 'POST' && path === '/search/batch') {
43
+ const body = await request.json();
44
+ const requests = Array.isArray(body?.requests) ? body.requests : [];
45
+ const results = deps.searchEngine.searchMany(requests);
46
+ return json(results);
47
+ }
41
48
  // GET /api/crawls
42
49
  if (method === 'GET' && path === '/crawls') {
43
50
  const libraryId = url.searchParams.get('library_id') || undefined;
package/dist/cli.js CHANGED
@@ -5,7 +5,7 @@ import { startHttpServer } from "./http.js";
5
5
  import { StdioTransport } from "@tmcp/transport-stdio";
6
6
  import { server, db, searchEngine, libraryService } from "./server.js";
7
7
  import { maybeNotifyAboutUpdate, runUpdateCommand } from "./cli-update.js";
8
- import { formatSearchResults } from "./search/format-results.js";
8
+ import { formatBatchSearchResults, formatSearchResults, } from "./search/format-results.js";
9
9
  import { VERSION } from "./version.js";
10
10
  const useColor = process.stdout.isTTY;
11
11
  const color = {
@@ -163,6 +163,24 @@ cli
163
163
  }
164
164
  console.log(`\n${formatSearchResults(query, results)}\n`);
165
165
  });
166
+ cli
167
+ .command("search-batch [...queries]", "Search multiple documentation queries")
168
+ .option("-l, --library <name>", "Filter all queries by library")
169
+ .option("-m, --limit <n>", "Max results per query", { default: "5" })
170
+ .action(async (queries, opts) => {
171
+ await maybeNotifyForCommand("search-batch");
172
+ if (!Array.isArray(queries) || queries.length === 0) {
173
+ console.error("\nāŒ Please provide at least one query.\n");
174
+ process.exit(1);
175
+ }
176
+ db.init();
177
+ const results = searchEngine.searchMany(queries.map((query) => ({
178
+ query,
179
+ library: opts.library,
180
+ limit: parseInt(opts.limit),
181
+ })));
182
+ console.log(`\n${formatBatchSearchResults(results)}\n`);
183
+ });
166
184
  cli
167
185
  .command("list", "List indexed libraries")
168
186
  .alias("l")
package/dist/index.d.ts CHANGED
@@ -2,3 +2,5 @@ export * from "./server.js";
2
2
  export * from "./types.js";
3
3
  export * from "./version.js";
4
4
  export * from "./http.js";
5
+ export * from "./tools/search-docs.js";
6
+ export * from "./tools/search-docs-batch.js";
package/dist/index.js CHANGED
@@ -2,3 +2,5 @@ export * from "./server.js";
2
2
  export * from "./types.js";
3
3
  export * from "./version.js";
4
4
  export * from "./http.js";
5
+ export * from "./tools/search-docs.js";
6
+ export * from "./tools/search-docs-batch.js";
@@ -1,2 +1,3 @@
1
- import type { SearchResult } from './types.js';
1
+ import type { BatchSearchResult, SearchResult } from './types.js';
2
2
  export declare function formatSearchResults(query: string, results: SearchResult[]): string;
3
+ export declare function formatBatchSearchResults(results: BatchSearchResult[]): string;
@@ -5,19 +5,32 @@ function formatReasons(reasons) {
5
5
  }
6
6
  return `**Why this ranked highly:** ${reasons.join(', ')}\n\n`;
7
7
  }
8
- export function formatSearchResults(query, results) {
9
- const formatted = results
8
+ function formatResultBlocks(results) {
9
+ return results
10
10
  .map((result, index) => {
11
11
  let block = `### ${index + 1}. ${result.page_title} — ${result.library_display_name}\n`;
12
12
  block += `**Source:** ${result.page_url}\n`;
13
13
  if (result.heading_context.trim().length > 0) {
14
14
  block += `**Section:** ${result.heading_context}\n`;
15
15
  }
16
- // Sanitize content to prevent prompt injection
17
16
  const sanitizedContent = sanitizeDocContent(result.content);
18
17
  block += `${formatReasons(result.reasons)}${sanitizedContent}`;
19
18
  return block;
20
19
  })
21
20
  .join('\n\n---\n\n');
22
- return `## Results for "${query}"\n\n${formatted}`;
21
+ }
22
+ export function formatSearchResults(query, results) {
23
+ return `## Results for "${query}"\n\n${formatResultBlocks(results)}`;
24
+ }
25
+ export function formatBatchSearchResults(results) {
26
+ const formatted = results
27
+ .map((result, index) => {
28
+ const librarySuffix = result.library ? ` — ${result.library}` : '';
29
+ if (result.results.length === 0) {
30
+ return `### ${index + 1}. ${result.query}${librarySuffix}\n\nNo results found.`;
31
+ }
32
+ return `### ${index + 1}. ${result.query}${librarySuffix}\n\n${formatResultBlocks(result.results)}`;
33
+ })
34
+ .join('\n\n***\n\n');
35
+ return `## Batch Search Results\n\n${formatted}`;
23
36
  }
@@ -2,6 +2,10 @@ import type { SearchPlan } from "./types.js";
2
2
  export declare function normalizeSearchText(value: string): string;
3
3
  export declare class QueryPlanner {
4
4
  build(query: string, library?: string): SearchPlan;
5
+ private buildDecomposedQueries;
6
+ private shouldDecompose;
7
+ private segmentBySeparators;
8
+ private chunkKeywords;
5
9
  private detectIntent;
6
10
  private extractVersion;
7
11
  }
@@ -44,18 +44,60 @@ export class QueryPlanner {
44
44
  .map((token) => sanitizeToken(token))
45
45
  .filter(Boolean);
46
46
  const filteredKeywords = Array.from(new Set(rawTokens.filter((token) => token.length > 1 && !STOP_WORDS.has(token))));
47
+ const keywords = filteredKeywords.length > 0
48
+ ? filteredKeywords
49
+ : Array.from(new Set(rawTokens));
47
50
  return {
48
51
  original_query: query,
49
52
  normalized_query: normalizedQuery,
50
53
  intent: this.detectIntent(normalizedQuery),
51
- keywords: filteredKeywords.length > 0
52
- ? filteredKeywords
53
- : Array.from(new Set(rawTokens)),
54
+ keywords,
54
55
  phrases: PHRASE_HINTS.filter((phrase) => normalizedQuery.includes(phrase)),
56
+ decomposed_queries: this.buildDecomposedQueries(normalizedQuery, keywords),
55
57
  requested_library: library,
56
58
  requested_version: this.extractVersion(normalizedQuery),
57
59
  };
58
60
  }
61
+ buildDecomposedQueries(normalizedQuery, keywords) {
62
+ if (!this.shouldDecompose(normalizedQuery, keywords)) {
63
+ return [];
64
+ }
65
+ const segmentedQueries = this.segmentBySeparators(normalizedQuery);
66
+ if (segmentedQueries.length > 1) {
67
+ return segmentedQueries;
68
+ }
69
+ return this.chunkKeywords(keywords);
70
+ }
71
+ shouldDecompose(normalizedQuery, keywords) {
72
+ if (keywords.length < 4) {
73
+ return false;
74
+ }
75
+ if (/[;,]/.test(normalizedQuery) || /\b(and|or|then|plus|with)\b/.test(normalizedQuery)) {
76
+ return true;
77
+ }
78
+ return keywords.length >= 7;
79
+ }
80
+ segmentBySeparators(normalizedQuery) {
81
+ const segments = normalizedQuery
82
+ .split(/(?:,|;|\band\b|\bor\b|\bthen\b|\bplus\b)/g)
83
+ .map((segment) => normalizeSearchText(segment))
84
+ .filter((segment) => segment.split(/\s+/).length >= 2);
85
+ return Array.from(new Set(segments)).slice(0, 4);
86
+ }
87
+ chunkKeywords(keywords) {
88
+ const targetBranches = Math.min(4, Math.ceil(keywords.length / 2));
89
+ const chunkSize = Math.min(3, Math.max(2, Math.ceil(keywords.length / targetBranches)));
90
+ const chunks = [];
91
+ for (let index = 0; index < keywords.length; index += chunkSize) {
92
+ const group = keywords.slice(index, index + chunkSize);
93
+ if (group.length === 1 && chunks.length > 0) {
94
+ chunks[chunks.length - 1] += ` ${group[0]}`;
95
+ continue;
96
+ }
97
+ chunks.push(group.join(" "));
98
+ }
99
+ return Array.from(new Set(chunks)).slice(0, 4);
100
+ }
59
101
  detectIntent(query) {
60
102
  if (query.includes("getting started") ||
61
103
  query.includes("quickstart") ||
@@ -3,12 +3,18 @@ export interface SearchOptions {
3
3
  library?: string;
4
4
  limit?: number;
5
5
  }
6
+ export interface BatchSearchRequest {
7
+ query: string;
8
+ library?: string;
9
+ limit?: number;
10
+ }
6
11
  export interface SearchPlan {
7
12
  original_query: string;
8
13
  normalized_query: string;
9
14
  intent: SearchIntent;
10
15
  keywords: string[];
11
16
  phrases: string[];
17
+ decomposed_queries: string[];
12
18
  requested_version?: string;
13
19
  requested_library?: string;
14
20
  }
@@ -31,3 +37,9 @@ export interface SearchResult extends SearchCandidate {
31
37
  path_type: string;
32
38
  version_tag: string | null;
33
39
  }
40
+ export interface BatchSearchResult {
41
+ query: string;
42
+ library?: string;
43
+ limit: number;
44
+ results: SearchResult[];
45
+ }
package/dist/server.js CHANGED
@@ -5,7 +5,7 @@ import * as v from "valibot";
5
5
  import { tool } from "tmcp/utils";
6
6
  import { Database } from "./storage/db.js";
7
7
  import { SearchEngine } from "./storage/search.js";
8
- import { formatSearchResults } from "./search/format-results.js";
8
+ import { formatBatchSearchResults, formatSearchResults, } from "./search/format-results.js";
9
9
  import { LibraryService } from "./services/library.js";
10
10
  import { JobManager } from "./jobs/manager.js";
11
11
  import { VERSION } from "./version.js";
@@ -55,6 +55,30 @@ server.tool({
55
55
  return tool.text(`āŒ Error: ${message}`);
56
56
  }
57
57
  });
58
+ server.tool({
59
+ name: "search_docs_batch",
60
+ description: "Run multiple documentation searches in one call. Use this for repeated or decomposed lookups.",
61
+ annotations: {
62
+ readOnlyHint: true,
63
+ idempotentHint: true,
64
+ },
65
+ schema: v.object({
66
+ requests: v.pipe(v.array(v.object({
67
+ query: v.pipe(v.string(), v.description("Search query. Use natural language.")),
68
+ library: v.optional(v.pipe(v.string(), v.description("Filter to a specific library."))),
69
+ limit: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(20)), 5),
70
+ })), v.minLength(1), v.maxLength(10)),
71
+ }),
72
+ }, async ({ requests }) => {
73
+ try {
74
+ const results = searchEngine.searchMany(requests);
75
+ return tool.text(formatBatchSearchResults(results));
76
+ }
77
+ catch (err) {
78
+ const message = err instanceof Error ? err.message : "Search failed";
79
+ return tool.text(`āŒ Error: ${message}`);
80
+ }
81
+ });
58
82
  function requireValue(value, message) {
59
83
  if (value === undefined || value === null || value === "") {
60
84
  throw new Error(message);
@@ -2,6 +2,8 @@ import { Database as BunDatabase } from "bun:sqlite";
2
2
  import type { Library, Page, CrawlJob } from "../types.js";
3
3
  export declare class Database {
4
4
  private db;
5
+ private hasColumn;
6
+ private ensureColumn;
5
7
  init(): void;
6
8
  /** Expose raw DB for search engine direct queries */
7
9
  raw(): BunDatabase;
@@ -5,6 +5,17 @@ import { mkdirSync } from "fs";
5
5
  import { homedir } from "os";
6
6
  export class Database {
7
7
  db;
8
+ hasColumn(tableName, columnName) {
9
+ const columns = this.db
10
+ .prepare(`PRAGMA table_info(${tableName})`)
11
+ .all();
12
+ return columns.some((column) => column.name === columnName);
13
+ }
14
+ ensureColumn(tableName, columnName, definition) {
15
+ if (!this.hasColumn(tableName, columnName)) {
16
+ this.db.run(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} ${definition}`);
17
+ }
18
+ }
8
19
  init() {
9
20
  const dir = process.env.DOCSHARK_DATA_DIR || resolve(homedir(), ".docshark");
10
21
  mkdirSync(dir, { recursive: true });
@@ -104,6 +115,7 @@ export class Database {
104
115
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
105
116
  )
106
117
  `);
118
+ this.ensureColumn("crawl_jobs", "session_id", "TEXT");
107
119
  }
108
120
  // ──────────────────────────────────────
109
121
  // Library CRUD
@@ -1,11 +1,15 @@
1
1
  import type { Database } from "./db.js";
2
- import type { SearchOptions, SearchResult } from "../search/types.js";
3
- export type { SearchOptions, SearchResult } from "../search/types.js";
2
+ import type { BatchSearchRequest, BatchSearchResult, SearchOptions, SearchResult } from "../search/types.js";
3
+ export type { BatchSearchRequest, BatchSearchResult, SearchOptions, SearchResult, } from "../search/types.js";
4
4
  export declare class SearchEngine {
5
5
  private db;
6
6
  private planner;
7
7
  constructor(db: Database);
8
8
  search(query: string, opts?: SearchOptions): SearchResult[];
9
+ searchMany(requests: BatchSearchRequest[]): BatchSearchResult[];
10
+ private searchWithPlan;
11
+ private expandPlans;
12
+ private searchSingle;
9
13
  private fetchCandidates;
10
14
  private buildFtsQuery;
11
15
  private quoteTerm;
@@ -8,16 +8,98 @@ export class SearchEngine {
8
8
  search(query, opts = {}) {
9
9
  const limit = opts.limit ?? 5;
10
10
  const plan = this.planner.build(query, opts.library);
11
- const ftsQuery = this.buildFtsQuery(plan);
11
+ return this.searchWithPlan(plan, opts.library, limit);
12
+ }
13
+ searchMany(requests) {
14
+ return requests.map((request) => {
15
+ const limit = request.limit ?? 5;
16
+ return {
17
+ query: request.query,
18
+ library: request.library,
19
+ limit,
20
+ results: this.search(request.query, {
21
+ library: request.library,
22
+ limit,
23
+ }),
24
+ };
25
+ });
26
+ }
27
+ searchWithPlan(plan, library, limit) {
28
+ const branchPlans = this.expandPlans(plan, library);
29
+ if (branchPlans.length === 1) {
30
+ return this.searchSingle(branchPlans[0], plan, library, limit);
31
+ }
32
+ const branchLimit = Math.min(Math.max(limit * 2, 6), 12);
33
+ const bestByChunk = new Map();
34
+ for (const [branchIndex, branchPlan] of branchPlans.entries()) {
35
+ const branchResults = this.searchSingle(branchPlan, plan, library, branchLimit);
36
+ for (const branchResult of branchResults) {
37
+ const chunkKey = `${branchResult.page_url}#${branchResult.chunk_index}`;
38
+ const scoreBoost = branchIndex === 0 ? 0 : 0.03;
39
+ const adjustedScore = Number((branchResult.rerank_score + scoreBoost).toFixed(6));
40
+ const existing = bestByChunk.get(chunkKey);
41
+ if (!existing) {
42
+ bestByChunk.set(chunkKey, {
43
+ ...branchResult,
44
+ rerank_score: adjustedScore,
45
+ branch_hits: 1,
46
+ });
47
+ continue;
48
+ }
49
+ existing.branch_hits += 1;
50
+ if (adjustedScore > existing.rerank_score) {
51
+ bestByChunk.set(chunkKey, {
52
+ ...branchResult,
53
+ rerank_score: adjustedScore,
54
+ branch_hits: existing.branch_hits,
55
+ });
56
+ }
57
+ }
58
+ }
59
+ const aggregated = Array.from(bestByChunk.values())
60
+ .map(({ branch_hits, ...result }) => {
61
+ const reasons = [...result.reasons];
62
+ if (branch_hits > 1) {
63
+ reasons.push("matched multiple focused subqueries");
64
+ }
65
+ return {
66
+ ...result,
67
+ rerank_score: Number((result.rerank_score + Math.min((branch_hits - 1) * 0.05, 0.15)).toFixed(6)),
68
+ reasons: Array.from(new Set(reasons)).slice(0, 4),
69
+ };
70
+ })
71
+ .sort((left, right) => {
72
+ if (right.rerank_score !== left.rerank_score) {
73
+ return right.rerank_score - left.rerank_score;
74
+ }
75
+ return left.lexical_score - right.lexical_score;
76
+ });
77
+ return this.collapseDuplicates(plan, aggregated).slice(0, limit);
78
+ }
79
+ expandPlans(plan, library) {
80
+ const plans = [plan];
81
+ const seen = new Set([plan.normalized_query]);
82
+ for (const subquery of plan.decomposed_queries) {
83
+ const subqueryPlan = this.planner.build(subquery, library);
84
+ if (seen.has(subqueryPlan.normalized_query)) {
85
+ continue;
86
+ }
87
+ seen.add(subqueryPlan.normalized_query);
88
+ plans.push(subqueryPlan);
89
+ }
90
+ return plans;
91
+ }
92
+ searchSingle(retrievalPlan, scoringPlan, library, limit) {
93
+ const ftsQuery = this.buildFtsQuery(retrievalPlan);
12
94
  if (!ftsQuery)
13
95
  return [];
14
96
  try {
15
- const candidates = this.fetchCandidates(ftsQuery, opts.library, limit);
97
+ const candidates = this.fetchCandidates(ftsQuery, library, limit);
16
98
  if (candidates.length === 0) {
17
99
  return [];
18
100
  }
19
- const reranked = this.rerank(plan, candidates);
20
- return this.collapseDuplicates(plan, reranked).slice(0, limit);
101
+ const reranked = this.rerank(scoringPlan, candidates);
102
+ return this.collapseDuplicates(scoringPlan, reranked).slice(0, limit);
21
103
  }
22
104
  catch (err) {
23
105
  console.warn(`[DocShark] Search failed:`, err.message);
@@ -0,0 +1,35 @@
1
+ import * as v from 'valibot';
2
+ import type { SearchEngine } from '../storage/search.js';
3
+ export declare function createSearchDocsBatchTool(searchEngine: SearchEngine): {
4
+ definition: {
5
+ name: "search_docs_batch";
6
+ description: string;
7
+ schema: v.ObjectSchema<{
8
+ readonly requests: v.SchemaWithPipe<readonly [v.ArraySchema<v.ObjectSchema<{
9
+ readonly query: v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.DescriptionAction<string, "The search query. Use natural language or specific terms.">]>;
10
+ readonly library: v.OptionalSchema<v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.DescriptionAction<string, "Optional library filter for this query.">]>, undefined>;
11
+ readonly limit: v.OptionalSchema<v.SchemaWithPipe<readonly [v.NumberSchema<undefined>, v.IntegerAction<number, undefined>, v.MinValueAction<number, 1, undefined>, v.MaxValueAction<number, 20, undefined>, v.DescriptionAction<number, "Max results to return for this query. Default: 5.">]>, 5>;
12
+ }, undefined>, undefined>, v.MinLengthAction<{
13
+ query: string;
14
+ library?: string | undefined;
15
+ limit: number;
16
+ }[], 1, undefined>, v.MaxLengthAction<{
17
+ query: string;
18
+ library?: string | undefined;
19
+ limit: number;
20
+ }[], 10, undefined>]>;
21
+ }, undefined>;
22
+ };
23
+ handler: ({ requests }: {
24
+ requests: Array<{
25
+ query: string;
26
+ library?: string;
27
+ limit?: number;
28
+ }>;
29
+ }) => Promise<{
30
+ content: {
31
+ type: "text";
32
+ text: string;
33
+ }[];
34
+ }>;
35
+ };
@@ -0,0 +1,23 @@
1
+ import * as v from 'valibot';
2
+ import { tool } from 'tmcp/utils';
3
+ import { formatBatchSearchResults } from '../search/format-results.js';
4
+ export function createSearchDocsBatchTool(searchEngine) {
5
+ return {
6
+ definition: {
7
+ name: 'search_docs_batch',
8
+ description: 'Run multiple documentation searches in one call. ' +
9
+ 'Useful when you need several focused lookups against one library or across a small set of libraries.',
10
+ schema: v.object({
11
+ requests: v.pipe(v.array(v.object({
12
+ query: v.pipe(v.string(), v.description('The search query. Use natural language or specific terms.')),
13
+ library: v.optional(v.pipe(v.string(), v.description('Optional library filter for this query.'))),
14
+ limit: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(20), v.description('Max results to return for this query. Default: 5.')), 5),
15
+ })), v.minLength(1), v.maxLength(10)),
16
+ }),
17
+ },
18
+ handler: async ({ requests }) => {
19
+ const results = searchEngine.searchMany(requests);
20
+ return tool.text(formatBatchSearchResults(results));
21
+ },
22
+ };
23
+ }
package/dist/version.d.ts CHANGED
@@ -1 +1 @@
1
- export declare const VERSION = "0.1.22";
1
+ export declare const VERSION = "0.1.24";
package/dist/version.js CHANGED
@@ -1,2 +1,2 @@
1
1
  // This file is automatically updated by the version sync script.
2
- export const VERSION = '0.1.22';
2
+ export const VERSION = '0.1.24';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docshark",
3
- "version": "0.1.22",
3
+ "version": "0.1.24",
4
4
  "description": "🦈 Documentation MCP Server — scrape, index, and search any doc website",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -36,6 +36,7 @@
36
36
  "dev": "bun run --watch src/cli.ts start",
37
37
  "cli": "bun run src/cli.ts",
38
38
  "check": "tsc --noEmit",
39
+ "test": "bun test scripts/*.test.ts",
39
40
  "sync:version": "bun run src/scripts/sync-version.ts",
40
41
  "build": "bun run sync:version && rm -rf dist && tsc && chmod +x dist/cli.js",
41
42
  "prepublishOnly": "bun run build",