npm - struth - Versions diffs - 1.0.0 - Mend

struth 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/COMPLIANCE.md +41 -0
package/LICENSE +21 -0
package/README.md +135 -0
package/package.json +54 -0
package/src/cli/index.ts +244 -0
package/src/core/constants.ts +32 -0
package/src/core/pipeline/clean.ts +246 -0
package/src/core/pipeline/condense.ts +249 -0
package/src/core/pipeline/discover.ts +448 -0
package/src/core/pipeline/integrity.ts +214 -0
package/src/core/pipeline/organize.ts +184 -0
package/src/core/schemas.ts +204 -0
package/src/core/spawn.ts +22 -0
package/src/core/storage/index.ts +108 -0
package/src/core/storage/paths.ts +40 -0
package/src/core/types.ts +36 -0
package/src/daemon/process.ts +95 -0
package/src/daemon/refresh.ts +254 -0
package/src/mcp/fts5-index.ts +114 -0
package/src/mcp/fts5-search.ts +150 -0
package/src/mcp/lockfile.ts +135 -0
package/src/mcp/retrieval.ts +141 -0
package/src/mcp/schemas.ts +12 -0
package/src/mcp/server.ts +293 -0
package/src/telemetry/client.ts +36 -0
package/src/telemetry/schemas.ts +5 -0

package/src/mcp/retrieval.ts ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * MCP retrieval logic — pure functions for keyword search and library listing.
+ * No classes — only exported functions with injected dependencies for testability.
+ */
+import { listDocSets, readDocSet } from "../core/storage/index.js";
+import type { DocSetManifest, LibrarySummary, ServedSection } from "../core/types.js";
+import { buildServedSection } from "./fts5-search.js";
+/**
+ * Tokenize a string into lowercase words.
+ */
+function tokenize(text: string): string[] {
+	return text
+		.toLowerCase()
+		.split(/\W+/)
+		.filter((w) => w.length > 0);
+}
+/**
+ * Count occurrences of query tokens in a target string (tokenized).
+ */
+function countMatches(queryTokens: string[], targetTokens: string[]): number {
+	let count = 0;
+	for (const qt of queryTokens) {
+		for (const tt of targetTokens) {
+			if (tt === qt) {
+				count++;
+			}
+		}
+	}
+	return count;
+}
+/**
+ * Searches sections within a manifest for matching content.
+ * Uses weighted keyword matching: title 3x, tags 2x, body 1x.
+ *
+ * @param query - Search query string
+ * @param manifest - Doc set manifest to search within
+ * @param sectionReader - Injected function to read condensed markdown from disk
+ * @param maxSections - Maximum number of sections to return (default 5)
+ */
+export async function keywordSearch(
+	query: string,
+	manifest: DocSetManifest,
+	sectionReader: (slug: string) => Promise<string>,
+	maxSections = 5,
+): Promise<ServedSection[]> {
+	const queryTokens = tokenize(query);
+	if (queryTokens.length === 0) {
+		return [];
+	}
+	type ScoredResult = {
+		score: number;
+		title: string;
+		content: string;
+		pageSlug: string;
+	};
+	const scored: ScoredResult[] = [];
+	for (const section of manifest.sections) {
+		const titleTokens = tokenize(section.name);
+		const tagTokens = section.topic_tags.flatMap((t) => tokenize(t));
+		for (const page of section.pages) {
+			let body: string;
+			try {
+				body = await sectionReader(page.slug);
+			} catch {
+				body = "";
+			}
+			// First 200 words for scoring
+			const bodyWords = tokenize(body).slice(0, 200);
+			const titleScore = countMatches(queryTokens, titleTokens) * 3;
+			const tagScore = countMatches(queryTokens, tagTokens) * 2;
+			const bodyScore = countMatches(queryTokens, bodyWords) * 1;
+			const totalScore = titleScore + tagScore + bodyScore;
+			if (totalScore > 0) {
+				scored.push({
+					score: totalScore,
+					title: section.name,
+					content: body,
+					pageSlug: page.slug,
+				});
+			}
+		}
+	}
+	// Sort descending by score
+	scored.sort((a, b) => b.score - a.score);
+	// Take top N and map to ServedSection
+	return scored
+		.slice(0, maxSections)
+		.map((item) => buildServedSection(item.title, item.content, manifest));
+}
+/**
+ * Lists all indexed doc sets as LibrarySummary.
+ *
+ * @param filter - Optional case-insensitive substring filter on library name
+ */
+export async function getLibraries(filter?: string): Promise<LibrarySummary[]> {
+	const docSets = await listDocSets();
+	const results: LibrarySummary[] = [];
+	for (const ds of docSets) {
+		// Apply filter if provided
+		if (filter && !ds.name.toLowerCase().includes(filter.toLowerCase())) {
+			continue;
+		}
+		let manifest: DocSetManifest;
+		try {
+			manifest = await readDocSet(ds.name, ds.version ?? undefined);
+		} catch {
+			continue;
+		}
+		const totalWords = manifest.sections.reduce((sum, s) => sum + s.word_count_condensed, 0);
+		results.push({
+			name: manifest.name,
+			version: manifest.version,
+			sections: manifest.sections.length,
+			pages: manifest.pages.length,
+			freshness: manifest.trust.freshness,
+			last_updated: manifest.generated_at,
+			word_count: totalWords,
+		});
+	}
+	return results;
+}

package/src/mcp/schemas.ts ADDED Viewed

@@ -0,0 +1,12 @@
+/**
+ * MCP-specific schemas — re-exported from core for MCP server consumption.
+ * MCP request/response shapes validated at the MCP boundary.
+ */
+export {
+	GetDocsRequest,
+	GetDocsResponse,
+	ListLibrariesRequest,
+	ListLibrariesResponse,
+	LibrarySummary,
+	ServedSection,
+} from "../core/schemas.js";

package/src/mcp/server.ts ADDED Viewed

@@ -0,0 +1,293 @@
+/**
+ * Struth MCP server.
+ *
+ * Tools:
+ *   get_docs      — Retrieve documentation sections for a library
+ *   list_libraries — List all indexed libraries with metadata
+ *
+ * Handler functions are exported for direct testing.
+ */
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
+import { z } from "zod";
+import { CLIENT_VERSION, SCHEMA_VERSION } from "../core/constants.js";
+import { listDocSets, readDocSet } from "../core/storage/index.js";
+import { sectionsDir } from "../core/storage/paths.js";
+import type { GetDocsResponse, ServedSection, TrustMetadata } from "../core/types.js";
+import { sendEvent } from "../telemetry/client.js";
+import { crossLibrarySearch, fts5Search } from "./fts5-search.js";
+import { getLibraries, keywordSearch } from "./retrieval.js";
+/** Default trust metadata for cross-library or empty responses */
+const EMPTY_TRUST: TrustMetadata = {
+	freshness: "unknown",
+	last_checked: null,
+	last_changed: null,
+	check_interval_hours: 24,
+	coverage: {
+		total_discovered: 0,
+		successfully_processed: 0,
+		skipped: 0,
+		skip_reasons: {},
+		coverage_ratio: 0,
+	},
+};
+/**
+ * Create a section reader for a specific doc set.
+ * Reads condensed markdown files from disk.
+ */
+function makeSectionReader(name: string, version: string | null) {
+	return async (slug: string) => {
+		const dir = sectionsDir(name, version);
+		return readFile(join(dir, `${slug}.condensed.md`), "utf-8");
+	};
+}
+/** Cross-library keyword fallback — used when FTS5 is unavailable or returns empty */
+async function keywordFallback(
+	docSetInputs: Array<{
+		manifest: Awaited<ReturnType<typeof readDocSet>>;
+		sectionReader: (slug: string) => Promise<string>;
+	}>,
+	query: string,
+	maxSections: number,
+): Promise<ServedSection[]> {
+	const allSections: ServedSection[] = [];
+	for (const ds of docSetInputs) {
+		const sections = await keywordSearch(query, ds.manifest, ds.sectionReader, maxSections);
+		allSections.push(...sections);
+	}
+	return allSections.slice(0, maxSections);
+}
+/** MCP CallToolResult shape — index signature required by SDK */
+type CallToolResult = {
+	[key: string]: unknown;
+	content: Array<{ type: "text"; text: string }>;
+	isError?: boolean;
+};
+/**
+ * Handler for the get_docs tool.
+ * Exported for direct testing without transport layer.
+ */
+export async function handleGetDocs(params: {
+	query: string;
+	library?: string;
+	version?: string;
+	project_path?: string;
+	max_sections?: number;
+}): Promise<CallToolResult> {
+	const queryStart = Date.now();
+	const maxSections = params.max_sections ?? 5;
+	// Validate query is non-empty
+	if (!params.query || params.query.trim().length === 0) {
+		return {
+			content: [{ type: "text", text: "Error: query must be a non-empty string" }],
+			isError: true,
+		};
+	}
+	if (params.library) {
+		// Search within a specific library
+		let manifest: Awaited<ReturnType<typeof readDocSet>>;
+		try {
+			manifest = await readDocSet(params.library, params.version);
+		} catch {
+			return {
+				content: [
+					{
+						type: "text",
+						text: `Error: library "${params.library}" not found. Use list_libraries to see available libraries.`,
+					},
+				],
+				isError: true,
+			};
+		}
+		const reader = makeSectionReader(params.library, params.version ?? null);
+		let sections: ServedSection[];
+		try {
+			sections = await fts5Search(params.query, manifest, reader, maxSections);
+			if (sections.length === 0) {
+				// FTS5 returned nothing — try keyword fallback
+				sections = await keywordSearch(params.query, manifest, reader, maxSections);
+			}
+		} catch {
+			// FTS5 failure — fall back to keyword search
+			sections = await keywordSearch(params.query, manifest, reader, maxSections);
+		}
+		const response: GetDocsResponse & { warning?: string } = {
+			sections,
+			trust: manifest.trust,
+			library: manifest.name,
+			version: manifest.version,
+			query: params.query,
+		};
+		// Check schema version mismatch
+		if (manifest.schema_version !== SCHEMA_VERSION) {
+			response.warning = `Warning: schema version mismatch — doc set has ${manifest.schema_version}, server expects ${SCHEMA_VERSION}. Results may be incomplete.`;
+		}
+		sendEvent({
+			event: "query",
+			library: params.library,
+			sections_hit: sections.map((s) => s.title),
+			latency_ms: Date.now() - queryStart,
+			client_version: CLIENT_VERSION,
+			timestamp: new Date().toISOString(),
+		});
+		return {
+			content: [{ type: "text", text: JSON.stringify(response) }],
+		};
+	}
+	// Cross-library search: read each manifest once (avoid double-read via getLibraries)
+	const allDocSets = await listDocSets();
+	// Auto-detect relevant libraries from lockfile when project_path is provided
+	let docSets = allDocSets;
+	if (params.project_path) {
+		try {
+			const { resolveLockfile } = await import("./lockfile.js");
+			const versions = await resolveLockfile(params.project_path);
+			if (versions && versions.size > 0) {
+				const pkgNames = new Set([...versions.keys()].map((k) => k.toLowerCase()));
+				const matched = allDocSets.filter((ds) => pkgNames.has(ds.name.toLowerCase()));
+				if (matched.length > 0) docSets = matched;
+			}
+		} catch {
+			// lockfile resolution failed — fall back to searching all doc sets
+		}
+	}
+	if (docSets.length === 0) {
+		return {
+			content: [
+				{
+					type: "text",
+					text: JSON.stringify({
+						sections: [],
+						trust: EMPTY_TRUST,
+						library: "*",
+						version: null,
+						query: params.query,
+					}),
+				},
+			],
+		};
+	}
+	const docSetInputs: Array<{
+		name: string;
+		version: string | null;
+		manifest: Awaited<ReturnType<typeof readDocSet>>;
+		sectionReader: (slug: string) => Promise<string>;
+	}> = [];
+	for (const ds of docSets) {
+		try {
+			const manifest = await readDocSet(ds.name, ds.version ?? undefined);
+			const reader = makeSectionReader(ds.name, ds.version);
+			docSetInputs.push({ name: ds.name, version: ds.version, manifest, sectionReader: reader });
+		} catch {
+			// Skip libraries that fail to read
+		}
+	}
+	let topSections: ServedSection[];
+	try {
+		topSections = await crossLibrarySearch(params.query, docSetInputs, maxSections);
+		if (topSections.length === 0) {
+			topSections = await keywordFallback(docSetInputs, params.query, maxSections);
+		}
+	} catch {
+		topSections = await keywordFallback(docSetInputs, params.query, maxSections);
+	}
+	sendEvent({
+		event: "query",
+		library: "*",
+		sections_hit: topSections.map((s) => s.title),
+		latency_ms: Date.now() - queryStart,
+		client_version: CLIENT_VERSION,
+		timestamp: new Date().toISOString(),
+	});
+	return {
+		content: [
+			{
+				type: "text",
+				text: JSON.stringify({
+					sections: topSections,
+					trust: EMPTY_TRUST,
+					library: "*",
+					version: null,
+					query: params.query,
+				}),
+			},
+		],
+	};
+}
+/**
+ * Handler for the list_libraries tool.
+ * Exported for direct testing without transport layer.
+ */
+export async function handleListLibraries(params: {
+	filter?: string;
+}): Promise<CallToolResult> {
+	const libraries = await getLibraries(params.filter);
+	return {
+		content: [
+			{
+				type: "text",
+				text: JSON.stringify({ libraries }),
+			},
+		],
+	};
+}
+/**
+ * Start the MCP server with stdio transport.
+ */
+export async function startMcpServer(): Promise<void> {
+	const { McpServer } = await import("@modelcontextprotocol/sdk/server/mcp.js");
+	const { StdioServerTransport } = await import("@modelcontextprotocol/sdk/server/stdio.js");
+	const server = new McpServer({
+		name: "struth",
+		version: SCHEMA_VERSION,
+	});
+	server.tool(
+		"get_docs",
+		"Retrieve documentation sections for a library, with weighted keyword search",
+		{
+			query: z.string().min(1),
+			library: z.string().optional(),
+			version: z.string().optional(),
+			project_path: z.string().optional(),
+			max_sections: z.number().int().min(1).max(20).default(5),
+		},
+		async (params) => handleGetDocs(params),
+	);
+	server.tool(
+		"list_libraries",
+		"List all indexed libraries with metadata",
+		{
+			filter: z.string().optional(),
+		},
+		async (params) => handleListLibraries(params),
+	);
+	const transport = new StdioServerTransport();
+	await server.connect(transport);
+}

package/src/telemetry/client.ts ADDED Viewed

@@ -0,0 +1,36 @@
+import type { z } from "zod";
+import { TELEMETRY_ENDPOINT } from "../core/constants.js";
+import type { TelemetryEvent } from "../core/schemas.js";
+/**
+ * Check if telemetry collection is enabled.
+ * Opt-in only: requires STRUTH_TELEMETRY=on.
+ */
+export function shouldCollect(): boolean {
+	return process.env.STRUTH_TELEMETRY === "on";
+}
+/**
+ * Send a telemetry event to the ingest endpoint.
+ * Fails silently — telemetry must never break the user's workflow.
+ */
+export async function sendEvent(event: z.infer<typeof TelemetryEvent>): Promise<void> {
+	if (!shouldCollect()) return;
+	const token = process.env.STRUTH_TELEMETRY_TOKEN;
+	if (!token) return;
+	try {
+		await fetch(`${TELEMETRY_ENDPOINT}/events`, {
+			method: "POST",
+			headers: {
+				"Content-Type": "application/json",
+				Authorization: `Bearer ${token}`,
+			},
+			body: JSON.stringify(event),
+			signal: AbortSignal.timeout(2000),
+		});
+	} catch {
+		// Fail silently — telemetry must never break the user's workflow
+	}
+}

package/src/telemetry/schemas.ts ADDED Viewed

@@ -0,0 +1,5 @@
+/**
+ * Telemetry event schema — re-exported from core.
+ * Used by both the client (CLI/MCP) and the CF Worker ingest endpoint.
+ */
+export { TelemetryEvent } from "../core/schemas.js";