struth 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,141 @@
1
+ /**
2
+ * MCP retrieval logic — pure functions for keyword search and library listing.
3
+ * No classes — only exported functions with injected dependencies for testability.
4
+ */
5
+
6
+ import { listDocSets, readDocSet } from "../core/storage/index.js";
7
+ import type { DocSetManifest, LibrarySummary, ServedSection } from "../core/types.js";
8
+ import { buildServedSection } from "./fts5-search.js";
9
+
10
+ /**
11
+ * Tokenize a string into lowercase words.
12
+ */
13
+ function tokenize(text: string): string[] {
14
+ return text
15
+ .toLowerCase()
16
+ .split(/\W+/)
17
+ .filter((w) => w.length > 0);
18
+ }
19
+
20
+ /**
21
+ * Count occurrences of query tokens in a target string (tokenized).
22
+ */
23
+ function countMatches(queryTokens: string[], targetTokens: string[]): number {
24
+ let count = 0;
25
+ for (const qt of queryTokens) {
26
+ for (const tt of targetTokens) {
27
+ if (tt === qt) {
28
+ count++;
29
+ }
30
+ }
31
+ }
32
+ return count;
33
+ }
34
+
35
+ /**
36
+ * Searches sections within a manifest for matching content.
37
+ * Uses weighted keyword matching: title 3x, tags 2x, body 1x.
38
+ *
39
+ * @param query - Search query string
40
+ * @param manifest - Doc set manifest to search within
41
+ * @param sectionReader - Injected function to read condensed markdown from disk
42
+ * @param maxSections - Maximum number of sections to return (default 5)
43
+ */
44
+ export async function keywordSearch(
45
+ query: string,
46
+ manifest: DocSetManifest,
47
+ sectionReader: (slug: string) => Promise<string>,
48
+ maxSections = 5,
49
+ ): Promise<ServedSection[]> {
50
+ const queryTokens = tokenize(query);
51
+ if (queryTokens.length === 0) {
52
+ return [];
53
+ }
54
+
55
+ type ScoredResult = {
56
+ score: number;
57
+ title: string;
58
+ content: string;
59
+ pageSlug: string;
60
+ };
61
+
62
+ const scored: ScoredResult[] = [];
63
+
64
+ for (const section of manifest.sections) {
65
+ const titleTokens = tokenize(section.name);
66
+ const tagTokens = section.topic_tags.flatMap((t) => tokenize(t));
67
+
68
+ for (const page of section.pages) {
69
+ let body: string;
70
+ try {
71
+ body = await sectionReader(page.slug);
72
+ } catch {
73
+ body = "";
74
+ }
75
+
76
+ // First 200 words for scoring
77
+ const bodyWords = tokenize(body).slice(0, 200);
78
+
79
+ const titleScore = countMatches(queryTokens, titleTokens) * 3;
80
+ const tagScore = countMatches(queryTokens, tagTokens) * 2;
81
+ const bodyScore = countMatches(queryTokens, bodyWords) * 1;
82
+ const totalScore = titleScore + tagScore + bodyScore;
83
+
84
+ if (totalScore > 0) {
85
+ scored.push({
86
+ score: totalScore,
87
+ title: section.name,
88
+ content: body,
89
+ pageSlug: page.slug,
90
+ });
91
+ }
92
+ }
93
+ }
94
+
95
+ // Sort descending by score
96
+ scored.sort((a, b) => b.score - a.score);
97
+
98
+ // Take top N and map to ServedSection
99
+ return scored
100
+ .slice(0, maxSections)
101
+ .map((item) => buildServedSection(item.title, item.content, manifest));
102
+ }
103
+
104
+ /**
105
+ * Lists all indexed doc sets as LibrarySummary.
106
+ *
107
+ * @param filter - Optional case-insensitive substring filter on library name
108
+ */
109
+ export async function getLibraries(filter?: string): Promise<LibrarySummary[]> {
110
+ const docSets = await listDocSets();
111
+
112
+ const results: LibrarySummary[] = [];
113
+
114
+ for (const ds of docSets) {
115
+ // Apply filter if provided
116
+ if (filter && !ds.name.toLowerCase().includes(filter.toLowerCase())) {
117
+ continue;
118
+ }
119
+
120
+ let manifest: DocSetManifest;
121
+ try {
122
+ manifest = await readDocSet(ds.name, ds.version ?? undefined);
123
+ } catch {
124
+ continue;
125
+ }
126
+
127
+ const totalWords = manifest.sections.reduce((sum, s) => sum + s.word_count_condensed, 0);
128
+
129
+ results.push({
130
+ name: manifest.name,
131
+ version: manifest.version,
132
+ sections: manifest.sections.length,
133
+ pages: manifest.pages.length,
134
+ freshness: manifest.trust.freshness,
135
+ last_updated: manifest.generated_at,
136
+ word_count: totalWords,
137
+ });
138
+ }
139
+
140
+ return results;
141
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * MCP-specific schemas — re-exported from core for MCP server consumption.
3
+ * MCP request/response shapes validated at the MCP boundary.
4
+ */
5
+ export {
6
+ GetDocsRequest,
7
+ GetDocsResponse,
8
+ ListLibrariesRequest,
9
+ ListLibrariesResponse,
10
+ LibrarySummary,
11
+ ServedSection,
12
+ } from "../core/schemas.js";
@@ -0,0 +1,293 @@
1
+ /**
2
+ * Struth MCP server.
3
+ *
4
+ * Tools:
5
+ * get_docs — Retrieve documentation sections for a library
6
+ * list_libraries — List all indexed libraries with metadata
7
+ *
8
+ * Handler functions are exported for direct testing.
9
+ */
10
+
11
+ import { readFile } from "node:fs/promises";
12
+ import { join } from "node:path";
13
+ import { z } from "zod";
14
+ import { CLIENT_VERSION, SCHEMA_VERSION } from "../core/constants.js";
15
+ import { listDocSets, readDocSet } from "../core/storage/index.js";
16
+ import { sectionsDir } from "../core/storage/paths.js";
17
+ import type { GetDocsResponse, ServedSection, TrustMetadata } from "../core/types.js";
18
+ import { sendEvent } from "../telemetry/client.js";
19
+ import { crossLibrarySearch, fts5Search } from "./fts5-search.js";
20
+ import { getLibraries, keywordSearch } from "./retrieval.js";
21
+
22
+ /** Default trust metadata for cross-library or empty responses */
23
+ const EMPTY_TRUST: TrustMetadata = {
24
+ freshness: "unknown",
25
+ last_checked: null,
26
+ last_changed: null,
27
+ check_interval_hours: 24,
28
+ coverage: {
29
+ total_discovered: 0,
30
+ successfully_processed: 0,
31
+ skipped: 0,
32
+ skip_reasons: {},
33
+ coverage_ratio: 0,
34
+ },
35
+ };
36
+
37
+ /**
38
+ * Create a section reader for a specific doc set.
39
+ * Reads condensed markdown files from disk.
40
+ */
41
+ function makeSectionReader(name: string, version: string | null) {
42
+ return async (slug: string) => {
43
+ const dir = sectionsDir(name, version);
44
+ return readFile(join(dir, `${slug}.condensed.md`), "utf-8");
45
+ };
46
+ }
47
+
48
+ /** Cross-library keyword fallback — used when FTS5 is unavailable or returns empty */
49
+ async function keywordFallback(
50
+ docSetInputs: Array<{
51
+ manifest: Awaited<ReturnType<typeof readDocSet>>;
52
+ sectionReader: (slug: string) => Promise<string>;
53
+ }>,
54
+ query: string,
55
+ maxSections: number,
56
+ ): Promise<ServedSection[]> {
57
+ const allSections: ServedSection[] = [];
58
+ for (const ds of docSetInputs) {
59
+ const sections = await keywordSearch(query, ds.manifest, ds.sectionReader, maxSections);
60
+ allSections.push(...sections);
61
+ }
62
+ return allSections.slice(0, maxSections);
63
+ }
64
+
65
+ /** MCP CallToolResult shape — index signature required by SDK */
66
+ type CallToolResult = {
67
+ [key: string]: unknown;
68
+ content: Array<{ type: "text"; text: string }>;
69
+ isError?: boolean;
70
+ };
71
+
72
+ /**
73
+ * Handler for the get_docs tool.
74
+ * Exported for direct testing without transport layer.
75
+ */
76
+ export async function handleGetDocs(params: {
77
+ query: string;
78
+ library?: string;
79
+ version?: string;
80
+ project_path?: string;
81
+ max_sections?: number;
82
+ }): Promise<CallToolResult> {
83
+ const queryStart = Date.now();
84
+ const maxSections = params.max_sections ?? 5;
85
+
86
+ // Validate query is non-empty
87
+ if (!params.query || params.query.trim().length === 0) {
88
+ return {
89
+ content: [{ type: "text", text: "Error: query must be a non-empty string" }],
90
+ isError: true,
91
+ };
92
+ }
93
+
94
+ if (params.library) {
95
+ // Search within a specific library
96
+ let manifest: Awaited<ReturnType<typeof readDocSet>>;
97
+ try {
98
+ manifest = await readDocSet(params.library, params.version);
99
+ } catch {
100
+ return {
101
+ content: [
102
+ {
103
+ type: "text",
104
+ text: `Error: library "${params.library}" not found. Use list_libraries to see available libraries.`,
105
+ },
106
+ ],
107
+ isError: true,
108
+ };
109
+ }
110
+
111
+ const reader = makeSectionReader(params.library, params.version ?? null);
112
+ let sections: ServedSection[];
113
+ try {
114
+ sections = await fts5Search(params.query, manifest, reader, maxSections);
115
+ if (sections.length === 0) {
116
+ // FTS5 returned nothing — try keyword fallback
117
+ sections = await keywordSearch(params.query, manifest, reader, maxSections);
118
+ }
119
+ } catch {
120
+ // FTS5 failure — fall back to keyword search
121
+ sections = await keywordSearch(params.query, manifest, reader, maxSections);
122
+ }
123
+
124
+ const response: GetDocsResponse & { warning?: string } = {
125
+ sections,
126
+ trust: manifest.trust,
127
+ library: manifest.name,
128
+ version: manifest.version,
129
+ query: params.query,
130
+ };
131
+
132
+ // Check schema version mismatch
133
+ if (manifest.schema_version !== SCHEMA_VERSION) {
134
+ response.warning = `Warning: schema version mismatch — doc set has ${manifest.schema_version}, server expects ${SCHEMA_VERSION}. Results may be incomplete.`;
135
+ }
136
+
137
+ sendEvent({
138
+ event: "query",
139
+ library: params.library,
140
+ sections_hit: sections.map((s) => s.title),
141
+ latency_ms: Date.now() - queryStart,
142
+ client_version: CLIENT_VERSION,
143
+ timestamp: new Date().toISOString(),
144
+ });
145
+
146
+ return {
147
+ content: [{ type: "text", text: JSON.stringify(response) }],
148
+ };
149
+ }
150
+
151
+ // Cross-library search: read each manifest once (avoid double-read via getLibraries)
152
+ const allDocSets = await listDocSets();
153
+
154
+ // Auto-detect relevant libraries from lockfile when project_path is provided
155
+ let docSets = allDocSets;
156
+ if (params.project_path) {
157
+ try {
158
+ const { resolveLockfile } = await import("./lockfile.js");
159
+ const versions = await resolveLockfile(params.project_path);
160
+ if (versions && versions.size > 0) {
161
+ const pkgNames = new Set([...versions.keys()].map((k) => k.toLowerCase()));
162
+ const matched = allDocSets.filter((ds) => pkgNames.has(ds.name.toLowerCase()));
163
+ if (matched.length > 0) docSets = matched;
164
+ }
165
+ } catch {
166
+ // lockfile resolution failed — fall back to searching all doc sets
167
+ }
168
+ }
169
+
170
+ if (docSets.length === 0) {
171
+ return {
172
+ content: [
173
+ {
174
+ type: "text",
175
+ text: JSON.stringify({
176
+ sections: [],
177
+ trust: EMPTY_TRUST,
178
+ library: "*",
179
+ version: null,
180
+ query: params.query,
181
+ }),
182
+ },
183
+ ],
184
+ };
185
+ }
186
+
187
+ const docSetInputs: Array<{
188
+ name: string;
189
+ version: string | null;
190
+ manifest: Awaited<ReturnType<typeof readDocSet>>;
191
+ sectionReader: (slug: string) => Promise<string>;
192
+ }> = [];
193
+
194
+ for (const ds of docSets) {
195
+ try {
196
+ const manifest = await readDocSet(ds.name, ds.version ?? undefined);
197
+ const reader = makeSectionReader(ds.name, ds.version);
198
+ docSetInputs.push({ name: ds.name, version: ds.version, manifest, sectionReader: reader });
199
+ } catch {
200
+ // Skip libraries that fail to read
201
+ }
202
+ }
203
+
204
+ let topSections: ServedSection[];
205
+ try {
206
+ topSections = await crossLibrarySearch(params.query, docSetInputs, maxSections);
207
+ if (topSections.length === 0) {
208
+ topSections = await keywordFallback(docSetInputs, params.query, maxSections);
209
+ }
210
+ } catch {
211
+ topSections = await keywordFallback(docSetInputs, params.query, maxSections);
212
+ }
213
+
214
+ sendEvent({
215
+ event: "query",
216
+ library: "*",
217
+ sections_hit: topSections.map((s) => s.title),
218
+ latency_ms: Date.now() - queryStart,
219
+ client_version: CLIENT_VERSION,
220
+ timestamp: new Date().toISOString(),
221
+ });
222
+
223
+ return {
224
+ content: [
225
+ {
226
+ type: "text",
227
+ text: JSON.stringify({
228
+ sections: topSections,
229
+ trust: EMPTY_TRUST,
230
+ library: "*",
231
+ version: null,
232
+ query: params.query,
233
+ }),
234
+ },
235
+ ],
236
+ };
237
+ }
238
+
239
+ /**
240
+ * Handler for the list_libraries tool.
241
+ * Exported for direct testing without transport layer.
242
+ */
243
+ export async function handleListLibraries(params: {
244
+ filter?: string;
245
+ }): Promise<CallToolResult> {
246
+ const libraries = await getLibraries(params.filter);
247
+ return {
248
+ content: [
249
+ {
250
+ type: "text",
251
+ text: JSON.stringify({ libraries }),
252
+ },
253
+ ],
254
+ };
255
+ }
256
+
257
+ /**
258
+ * Start the MCP server with stdio transport.
259
+ */
260
+ export async function startMcpServer(): Promise<void> {
261
+ const { McpServer } = await import("@modelcontextprotocol/sdk/server/mcp.js");
262
+ const { StdioServerTransport } = await import("@modelcontextprotocol/sdk/server/stdio.js");
263
+
264
+ const server = new McpServer({
265
+ name: "struth",
266
+ version: SCHEMA_VERSION,
267
+ });
268
+
269
+ server.tool(
270
+ "get_docs",
271
+ "Retrieve documentation sections for a library, with weighted keyword search",
272
+ {
273
+ query: z.string().min(1),
274
+ library: z.string().optional(),
275
+ version: z.string().optional(),
276
+ project_path: z.string().optional(),
277
+ max_sections: z.number().int().min(1).max(20).default(5),
278
+ },
279
+ async (params) => handleGetDocs(params),
280
+ );
281
+
282
+ server.tool(
283
+ "list_libraries",
284
+ "List all indexed libraries with metadata",
285
+ {
286
+ filter: z.string().optional(),
287
+ },
288
+ async (params) => handleListLibraries(params),
289
+ );
290
+
291
+ const transport = new StdioServerTransport();
292
+ await server.connect(transport);
293
+ }
@@ -0,0 +1,36 @@
1
+ import type { z } from "zod";
2
+ import { TELEMETRY_ENDPOINT } from "../core/constants.js";
3
+ import type { TelemetryEvent } from "../core/schemas.js";
4
+
5
+ /**
6
+ * Check if telemetry collection is enabled.
7
+ * Opt-in only: requires STRUTH_TELEMETRY=on.
8
+ */
9
+ export function shouldCollect(): boolean {
10
+ return process.env.STRUTH_TELEMETRY === "on";
11
+ }
12
+
13
+ /**
14
+ * Send a telemetry event to the ingest endpoint.
15
+ * Fails silently — telemetry must never break the user's workflow.
16
+ */
17
+ export async function sendEvent(event: z.infer<typeof TelemetryEvent>): Promise<void> {
18
+ if (!shouldCollect()) return;
19
+
20
+ const token = process.env.STRUTH_TELEMETRY_TOKEN;
21
+ if (!token) return;
22
+
23
+ try {
24
+ await fetch(`${TELEMETRY_ENDPOINT}/events`, {
25
+ method: "POST",
26
+ headers: {
27
+ "Content-Type": "application/json",
28
+ Authorization: `Bearer ${token}`,
29
+ },
30
+ body: JSON.stringify(event),
31
+ signal: AbortSignal.timeout(2000),
32
+ });
33
+ } catch {
34
+ // Fail silently — telemetry must never break the user's workflow
35
+ }
36
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Telemetry event schema — re-exported from core.
3
+ * Used by both the client (CLI/MCP) and the CF Worker ingest endpoint.
4
+ */
5
+ export { TelemetryEvent } from "../core/schemas.js";